File Coverage

libdeflate-one.c

Criterion	Covered	Total	%
statement	1879	3472	54.1
branch	871	2400	36.2
condition			n/a
subroutine			n/a
pod			n/a
total	2750	5872	46.8

line	stmt	bran	code
1			/*
2			Copyright 2016 Eric Biggers
3			Copyright 2024 Google LLC
4
5			Permission is hereby granted, free of charge, to any person
6			obtaining a copy of this software and associated documentation files
7			(the "Software"), to deal in the Software without restriction,
8			including without limitation the rights to use, copy, modify, merge,
9			publish, distribute, sublicense, and/or sell copies of the Software,
10			and to permit persons to whom the Software is furnished to do so,
11			subject to the following conditions:
12
13			The above copyright notice and this permission notice shall be
14			included in all copies or substantial portions of the Software.
15
16			THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17			EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18			MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19			NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20			BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21			ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22			CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23			SOFTWARE.
24
25			*/
26			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/adler32.c */
27
28
29			/* #include "lib_common.h" */
30
31
32			#ifndef LIB_LIB_COMMON_H
33			#define LIB_LIB_COMMON_H
34
35			#ifdef LIBDEFLATE_H
36
37			# error "lib_common.h must always be included before libdeflate.h"
38			#endif
39
40			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
41			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
42			#elif defined(__GNUC__)
43			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
44			#else
45			# define LIBDEFLATE_EXPORT_SYM
46			#endif
47
48
49			#if defined(__GNUC__) && defined(__i386__)
50			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
51			#else
52			# define LIBDEFLATE_ALIGN_STACK
53			#endif
54
55			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
56
57			/* #include "../common_defs.h" */
58
59
60			#ifndef COMMON_DEFS_H
61			#define COMMON_DEFS_H
62
63			/* #include "libdeflate.h" */
64
65
66			#ifndef LIBDEFLATE_H
67			#define LIBDEFLATE_H
68
69			#include
70			#include
71
72			#ifdef __cplusplus
73			extern "C" {
74			#endif
75
76			#define LIBDEFLATE_VERSION_MAJOR 1
77			#define LIBDEFLATE_VERSION_MINOR 25
78			#define LIBDEFLATE_VERSION_STRING "1.25"
79
80
81			#ifndef LIBDEFLATEAPI
82			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
83			# define LIBDEFLATEAPI __declspec(dllimport)
84			# else
85			# define LIBDEFLATEAPI
86			# endif
87			#endif
88
89
90
91
92
93			struct libdeflate_compressor;
94			struct libdeflate_options;
95
96
97			LIBDEFLATEAPI struct libdeflate_compressor *
98			libdeflate_alloc_compressor(int compression_level);
99
100
101			LIBDEFLATEAPI struct libdeflate_compressor *
102			libdeflate_alloc_compressor_ex(int compression_level,
103			const struct libdeflate_options *options);
104
105
106			LIBDEFLATEAPI size_t
107			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
108			const void *in, size_t in_nbytes,
109			void *out, size_t out_nbytes_avail);
110
111
112			LIBDEFLATEAPI size_t
113			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
114			size_t in_nbytes);
115
116
117			LIBDEFLATEAPI size_t
118			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
119			const void *in, size_t in_nbytes,
120			void *out, size_t out_nbytes_avail);
121
122
123			LIBDEFLATEAPI size_t
124			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
125			size_t in_nbytes);
126
127
128			LIBDEFLATEAPI size_t
129			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
130			const void *in, size_t in_nbytes,
131			void *out, size_t out_nbytes_avail);
132
133
134			LIBDEFLATEAPI size_t
135			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
136			size_t in_nbytes);
137
138
139			LIBDEFLATEAPI void
140			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
141
142
143
144
145
146			struct libdeflate_decompressor;
147			struct libdeflate_options;
148
149
150			LIBDEFLATEAPI struct libdeflate_decompressor *
151			libdeflate_alloc_decompressor(void);
152
153
154			LIBDEFLATEAPI struct libdeflate_decompressor *
155			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
156
157
158			enum libdeflate_result {
159
160			LIBDEFLATE_SUCCESS = 0,
161
162
163			LIBDEFLATE_BAD_DATA = 1,
164
165
166			LIBDEFLATE_SHORT_OUTPUT = 2,
167
168
169			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
170			};
171
172
173			LIBDEFLATEAPI enum libdeflate_result
174			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
175			const void *in, size_t in_nbytes,
176			void *out, size_t out_nbytes_avail,
177			size_t *actual_out_nbytes_ret);
178
179
180			LIBDEFLATEAPI enum libdeflate_result
181			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
182			const void *in, size_t in_nbytes,
183			void *out, size_t out_nbytes_avail,
184			size_t *actual_in_nbytes_ret,
185			size_t *actual_out_nbytes_ret);
186
187
188			LIBDEFLATEAPI enum libdeflate_result
189			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
190			const void *in, size_t in_nbytes,
191			void *out, size_t out_nbytes_avail,
192			size_t *actual_out_nbytes_ret);
193
194
195			LIBDEFLATEAPI enum libdeflate_result
196			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
197			const void *in, size_t in_nbytes,
198			void *out, size_t out_nbytes_avail,
199			size_t *actual_in_nbytes_ret,
200			size_t *actual_out_nbytes_ret);
201
202
203			LIBDEFLATEAPI enum libdeflate_result
204			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
205			const void *in, size_t in_nbytes,
206			void *out, size_t out_nbytes_avail,
207			size_t *actual_out_nbytes_ret);
208
209
210			LIBDEFLATEAPI enum libdeflate_result
211			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
212			const void *in, size_t in_nbytes,
213			void *out, size_t out_nbytes_avail,
214			size_t *actual_in_nbytes_ret,
215			size_t *actual_out_nbytes_ret);
216
217
218			LIBDEFLATEAPI void
219			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
220
221
222
223
224
225
226			LIBDEFLATEAPI uint32_t
227			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
228
229
230
231			LIBDEFLATEAPI uint32_t
232			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
233
234
235
236
237
238
239			LIBDEFLATEAPI void
240			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
241			void (free_func)(void ));
242
243
244			struct libdeflate_options {
245
246
247			size_t sizeof_options;
248
249
250			void (malloc_func)(size_t);
251			void (free_func)(void );
252			};
253
254			#ifdef __cplusplus
255			}
256			#endif
257
258			#endif
259
260
261			#include
262			#include
263			#include
264			#ifdef _MSC_VER
265			# include
266			# include
267
268
269			# pragma warning(disable : 4146)
270
271			# pragma warning(disable : 4018)
272			# pragma warning(disable : 4244)
273			# pragma warning(disable : 4267)
274			# pragma warning(disable : 4310)
275
276			# pragma warning(disable : 4100)
277			# pragma warning(disable : 4127)
278			# pragma warning(disable : 4189)
279			# pragma warning(disable : 4232)
280			# pragma warning(disable : 4245)
281			# pragma warning(disable : 4295)
282			#endif
283			#ifndef FREESTANDING
284			# include
285			#endif
286
287
288
289
290
291
292			#undef ARCH_X86_64
293			#undef ARCH_X86_32
294			#undef ARCH_ARM64
295			#undef ARCH_ARM32
296			#undef ARCH_RISCV
297			#ifdef _MSC_VER
298
299			# if defined(_M_X64) && !defined(_M_ARM64EC)
300			# define ARCH_X86_64
301			# elif defined(_M_IX86)
302			# define ARCH_X86_32
303			# elif defined(_M_ARM64)
304			# define ARCH_ARM64
305			# elif defined(_M_ARM)
306			# define ARCH_ARM32
307			# endif
308			#else
309			# if defined(__x86_64__)
310			# define ARCH_X86_64
311			# elif defined(__i386__)
312			# define ARCH_X86_32
313			# elif defined(__aarch64__)
314			# define ARCH_ARM64
315			# elif defined(__arm__)
316			# define ARCH_ARM32
317			# elif defined(__riscv)
318			# define ARCH_RISCV
319			# endif
320			#endif
321
322
323
324
325
326
327			typedef uint8_t u8;
328			typedef uint16_t u16;
329			typedef uint32_t u32;
330			typedef uint64_t u64;
331			typedef int8_t s8;
332			typedef int16_t s16;
333			typedef int32_t s32;
334			typedef int64_t s64;
335
336
337			#ifdef _MSC_VER
338			# ifdef _WIN64
339			typedef long long ssize_t;
340			# else
341			typedef long ssize_t;
342			# endif
343			#endif
344
345
346			typedef size_t machine_word_t;
347
348
349			#define WORDBYTES ((int)sizeof(machine_word_t))
350
351
352			#define WORDBITS (8 * WORDBYTES)
353
354
355
356
357
358
359			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
360			# define GCC_PREREQ(major, minor) \
361			(__GNUC__ > (major) \|\| \
362			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
363			# if !GCC_PREREQ(4, 9)
364			# error "gcc versions older than 4.9 are no longer supported"
365			# endif
366			#else
367			# define GCC_PREREQ(major, minor) 0
368			#endif
369			#ifdef __clang__
370			# ifdef __apple_build_version__
371			# define CLANG_PREREQ(major, minor, apple_version) \
372			(__apple_build_version__ >= (apple_version))
373			# else
374			# define CLANG_PREREQ(major, minor, apple_version) \
375			(__clang_major__ > (major) \|\| \
376			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
377			# endif
378			# if !CLANG_PREREQ(3, 9, 8000000)
379			# error "clang versions older than 3.9 are no longer supported"
380			# endif
381			#else
382			# define CLANG_PREREQ(major, minor, apple_version) 0
383			#endif
384			#ifdef _MSC_VER
385			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
386			# if !MSVC_PREREQ(1900)
387			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
388			# endif
389			#else
390			# define MSVC_PREREQ(version) 0
391			#endif
392
393
394			#ifndef __has_attribute
395			# define __has_attribute(attribute) 0
396			#endif
397
398
399			#ifndef __has_builtin
400			# define __has_builtin(builtin) 0
401			#endif
402
403
404			#ifdef _MSC_VER
405			# define inline __inline
406			#endif
407
408
409			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
410			# define forceinline inline __attribute__((always_inline))
411			#elif defined(_MSC_VER)
412			# define forceinline __forceinline
413			#else
414			# define forceinline inline
415			#endif
416
417
418			#if defined(__GNUC__) \|\| __has_attribute(unused)
419			# define MAYBE_UNUSED __attribute__((unused))
420			#else
421			# define MAYBE_UNUSED
422			#endif
423
424
425			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
426			# define NORETURN __attribute__((noreturn))
427			#else
428			# define NORETURN
429			#endif
430
431
432			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
433			# if defined(__GNUC__) \|\| defined(__clang__)
434			# define restrict __restrict__
435			# else
436			# define restrict
437			# endif
438			#endif
439
440
441			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
442			# define likely(expr) __builtin_expect(!!(expr), 1)
443			#else
444			# define likely(expr) (expr)
445			#endif
446
447
448			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
449			# define unlikely(expr) __builtin_expect(!!(expr), 0)
450			#else
451			# define unlikely(expr) (expr)
452			#endif
453
454
455			#undef prefetchr
456			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
457			# define prefetchr(addr) __builtin_prefetch((addr), 0)
458			#elif defined(_MSC_VER)
459			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
460			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
461			# elif defined(ARCH_ARM64)
462			# define prefetchr(addr) __prefetch2((addr), 0x00 )
463			# elif defined(ARCH_ARM32)
464			# define prefetchr(addr) __prefetch(addr)
465			# endif
466			#endif
467			#ifndef prefetchr
468			# define prefetchr(addr)
469			#endif
470
471
472			#undef prefetchw
473			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
474			# define prefetchw(addr) __builtin_prefetch((addr), 1)
475			#elif defined(_MSC_VER)
476			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
477			# define prefetchw(addr) _m_prefetchw(addr)
478			# elif defined(ARCH_ARM64)
479			# define prefetchw(addr) __prefetch2((addr), 0x10 )
480			# elif defined(ARCH_ARM32)
481			# define prefetchw(addr) __prefetchw(addr)
482			# endif
483			#endif
484			#ifndef prefetchw
485			# define prefetchw(addr)
486			#endif
487
488
489			#undef _aligned_attribute
490			#if defined(__GNUC__) \|\| __has_attribute(aligned)
491			# define _aligned_attribute(n) __attribute__((aligned(n)))
492			#elif defined(_MSC_VER)
493			# define _aligned_attribute(n) __declspec(align(n))
494			#endif
495
496
497			#if defined(__GNUC__) \|\| __has_attribute(target)
498			# define _target_attribute(attrs) __attribute__((target(attrs)))
499			#else
500			# define _target_attribute(attrs)
501			#endif
502
503
504
505
506
507			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
508			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
509			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
510			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
511			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
512			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
513			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
514
515
516
517
518
519
520			#if defined(__BYTE_ORDER__)
521			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
522			#elif defined(_MSC_VER)
523			# define CPU_IS_LITTLE_ENDIAN() true
524			#else
525			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
526			{
527			union {
528			u32 w;
529			u8 b;
530			} u;
531
532			u.w = 1;
533			return u.b;
534			}
535			#endif
536
537
538			static forceinline u16 bswap16(u16 v)
539			{
540			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
541	24		return __builtin_bswap16(v);
542			#elif defined(_MSC_VER)
543			return _byteswap_ushort(v);
544			#else
545			return (v << 8) \| (v >> 8);
546			#endif
547			}
548
549
550			static forceinline u32 bswap32(u32 v)
551			{
552			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
553	12		return __builtin_bswap32(v);
554			#elif defined(_MSC_VER)
555			return _byteswap_ulong(v);
556			#else
557			return ((v & 0x000000FF) << 24) \|
558			((v & 0x0000FF00) << 8) \|
559			((v & 0x00FF0000) >> 8) \|
560			((v & 0xFF000000) >> 24);
561			#endif
562			}
563
564
565			static forceinline u64 bswap64(u64 v)
566			{
567			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
568			return __builtin_bswap64(v);
569			#elif defined(_MSC_VER)
570			return _byteswap_uint64(v);
571			#else
572			return ((v & 0x00000000000000FF) << 56) \|
573			((v & 0x000000000000FF00) << 40) \|
574			((v & 0x0000000000FF0000) << 24) \|
575			((v & 0x00000000FF000000) << 8) \|
576			((v & 0x000000FF00000000) >> 8) \|
577			((v & 0x0000FF0000000000) >> 24) \|
578			((v & 0x00FF000000000000) >> 40) \|
579			((v & 0xFF00000000000000) >> 56);
580			#endif
581			}
582
583			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
584			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
585			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
586			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
587			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
588			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
589
590
591
592
593
594
595			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
596			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
597			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
598			defined(__riscv_misaligned_fast) \|\| \
599			defined(__wasm__))
600			# define UNALIGNED_ACCESS_IS_FAST 1
601			#elif defined(_MSC_VER)
602			# define UNALIGNED_ACCESS_IS_FAST 1
603			#else
604			# define UNALIGNED_ACCESS_IS_FAST 0
605			#endif
606
607
608
609			#ifdef FREESTANDING
610			# define MEMCOPY __builtin_memcpy
611			#else
612			# define MEMCOPY memcpy
613			#endif
614
615
616
617			#define DEFINE_UNALIGNED_TYPE(type) \
618			static forceinline type \
619			load_##type##_unaligned(const void *p) \
620			{ \
621			type v; \
622			\
623			MEMCOPY(&v, p, sizeof(v)); \
624			return v; \
625			} \
626			\
627			static forceinline void \
628			store_##type##_unaligned(type v, void *p) \
629			{ \
630			MEMCOPY(p, &v, sizeof(v)); \
631			}
632
633	12		DEFINE_UNALIGNED_TYPE(u16)
634	90016		DEFINE_UNALIGNED_TYPE(u32)
635	14708		DEFINE_UNALIGNED_TYPE(u64)
636	8448		DEFINE_UNALIGNED_TYPE(machine_word_t)
637
638			#undef MEMCOPY
639
640			#define load_word_unaligned load_machine_word_t_unaligned
641			#define store_word_unaligned store_machine_word_t_unaligned
642
643
644
645			static forceinline u16
646			get_unaligned_le16(const u8 *p)
647			{
648			if (UNALIGNED_ACCESS_IS_FAST)
649	0		return le16_bswap(load_u16_unaligned(p));
650			else
651			return ((u16)p[1] << 8) \| p[0];
652			}
653
654			static forceinline u16
655			get_unaligned_be16(const u8 *p)
656			{
657			if (UNALIGNED_ACCESS_IS_FAST)
658	24		return be16_bswap(load_u16_unaligned(p));
659			else
660			return ((u16)p[0] << 8) \| p[1];
661			}
662
663			static forceinline u32
664			get_unaligned_le32(const u8 *p)
665			{
666			if (UNALIGNED_ACCESS_IS_FAST)
667	86219		return le32_bswap(load_u32_unaligned(p));
668			else
669			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
670			((u32)p[1] << 8) \| p[0];
671			}
672
673			static forceinline u32
674			get_unaligned_be32(const u8 *p)
675			{
676			if (UNALIGNED_ACCESS_IS_FAST)
677	24		return be32_bswap(load_u32_unaligned(p));
678			else
679			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
680			((u32)p[2] << 8) \| p[3];
681			}
682
683			static forceinline u64
684			get_unaligned_le64(const u8 *p)
685			{
686			if (UNALIGNED_ACCESS_IS_FAST)
687	11015		return le64_bswap(load_u64_unaligned(p));
688			else
689			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
690			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
691			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
692			((u64)p[1] << 8) \| p[0];
693			}
694
695			static forceinline machine_word_t
696			get_unaligned_leword(const u8 *p)
697			{
698			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
699			if (WORDBITS == 32)
700			return get_unaligned_le32(p);
701			else
702	11015		return get_unaligned_le64(p);
703			}
704
705
706
707			static forceinline void
708			put_unaligned_le16(u16 v, u8 *p)
709			{
710			if (UNALIGNED_ACCESS_IS_FAST) {
711	0		store_u16_unaligned(le16_bswap(v), p);
712			} else {
713			p[0] = (u8)(v >> 0);
714			p[1] = (u8)(v >> 8);
715			}
716	0		}
717
718			static forceinline void
719			put_unaligned_be16(u16 v, u8 *p)
720			{
721			if (UNALIGNED_ACCESS_IS_FAST) {
722	24		store_u16_unaligned(be16_bswap(v), p);
723			} else {
724			p[0] = (u8)(v >> 8);
725			p[1] = (u8)(v >> 0);
726			}
727	12		}
728
729			static forceinline void
730			put_unaligned_le32(u32 v, u8 *p)
731			{
732			if (UNALIGNED_ACCESS_IS_FAST) {
733	39		store_u32_unaligned(le32_bswap(v), p);
734			} else {
735			p[0] = (u8)(v >> 0);
736			p[1] = (u8)(v >> 8);
737			p[2] = (u8)(v >> 16);
738			p[3] = (u8)(v >> 24);
739			}
740	39		}
741
742			static forceinline void
743			put_unaligned_be32(u32 v, u8 *p)
744			{
745			if (UNALIGNED_ACCESS_IS_FAST) {
746	12		store_u32_unaligned(be32_bswap(v), p);
747			} else {
748			p[0] = (u8)(v >> 24);
749			p[1] = (u8)(v >> 16);
750			p[2] = (u8)(v >> 8);
751			p[3] = (u8)(v >> 0);
752			}
753	12		}
754
755			static forceinline void
756			put_unaligned_le64(u64 v, u8 *p)
757			{
758			if (UNALIGNED_ACCESS_IS_FAST) {
759	3693		store_u64_unaligned(le64_bswap(v), p);
760			} else {
761			p[0] = (u8)(v >> 0);
762			p[1] = (u8)(v >> 8);
763			p[2] = (u8)(v >> 16);
764			p[3] = (u8)(v >> 24);
765			p[4] = (u8)(v >> 32);
766			p[5] = (u8)(v >> 40);
767			p[6] = (u8)(v >> 48);
768			p[7] = (u8)(v >> 56);
769			}
770	3693		}
771
772			static forceinline void
773			put_unaligned_leword(machine_word_t v, u8 *p)
774			{
775			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
776			if (WORDBITS == 32)
777			put_unaligned_le32(v, p);
778			else
779			put_unaligned_le64(v, p);
780	3693		}
781
782
783
784
785
786
787
788			static forceinline unsigned
789			bsr32(u32 v)
790			{
791			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
792	12387		return 31 - __builtin_clz(v);
793			#elif defined(_MSC_VER)
794			unsigned long i;
795
796			_BitScanReverse(&i, v);
797			return i;
798			#else
799			unsigned i = 0;
800
801			while ((v >>= 1) != 0)
802			i++;
803			return i;
804			#endif
805			}
806
807			static forceinline unsigned
808			bsr64(u64 v)
809			{
810			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
811			return 63 - __builtin_clzll(v);
812			#elif defined(_MSC_VER) && defined(_WIN64)
813			unsigned long i;
814
815			_BitScanReverse64(&i, v);
816			return i;
817			#else
818			unsigned i = 0;
819
820			while ((v >>= 1) != 0)
821			i++;
822			return i;
823			#endif
824			}
825
826			static forceinline unsigned
827			bsrw(machine_word_t v)
828			{
829			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
830			if (WORDBITS == 32)
831			return bsr32(v);
832			else
833			return bsr64(v);
834			}
835
836
837
838			static forceinline unsigned
839			bsf32(u32 v)
840			{
841			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
842			return __builtin_ctz(v);
843			#elif defined(_MSC_VER)
844			unsigned long i;
845
846			_BitScanForward(&i, v);
847			return i;
848			#else
849			unsigned i = 0;
850
851			for (; (v & 1) == 0; v >>= 1)
852			i++;
853			return i;
854			#endif
855			}
856
857			static forceinline unsigned
858			bsf64(u64 v)
859			{
860			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
861	2632		return __builtin_ctzll(v);
862			#elif defined(_MSC_VER) && defined(_WIN64)
863			unsigned long i;
864
865			_BitScanForward64(&i, v);
866			return i;
867			#else
868			unsigned i = 0;
869
870			for (; (v & 1) == 0; v >>= 1)
871			i++;
872			return i;
873			#endif
874			}
875
876			static forceinline unsigned
877			bsfw(machine_word_t v)
878			{
879			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
880			if (WORDBITS == 32)
881			return bsf32(v);
882			else
883	2632		return bsf64(v);
884			}
885
886
887			#undef rbit32
888			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
889			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
890			static forceinline u32
891			rbit32(u32 v)
892			{
893			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
894			return v;
895			}
896			#define rbit32 rbit32
897			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
898			static forceinline u32
899			rbit32(u32 v)
900			{
901			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
902			return v;
903			}
904			#define rbit32 rbit32
905			#endif
906
907			#endif
908
909
910			typedef void (malloc_func_t)(size_t);
911			typedef void (free_func_t)(void );
912
913			extern malloc_func_t libdeflate_default_malloc_func;
914			extern free_func_t libdeflate_default_free_func;
915
916			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
917			size_t alignment, size_t size);
918			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
919
920			#ifdef FREESTANDING
921
922			void memset(void s, int c, size_t n);
923			#define memset(s, c, n) __builtin_memset((s), (c), (n))
924
925			void memcpy(void dest, const void *src, size_t n);
926			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
927
928			void memmove(void dest, const void *src, size_t n);
929			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
930
931			int memcmp(const void s1, const void s2, size_t n);
932			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
933
934			#undef LIBDEFLATE_ENABLE_ASSERTIONS
935			#else
936			# include
937
938			# ifdef __clang_analyzer__
939			# define LIBDEFLATE_ENABLE_ASSERTIONS
940			# endif
941			#endif
942
943
944			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
945			NORETURN void
946			libdeflate_assertion_failed(const char expr, const char file, int line);
947			#define ASSERT(expr) { if (unlikely(!(expr))) \
948			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
949			#else
950			#define ASSERT(expr) (void)(expr)
951			#endif
952
953			#define CONCAT_IMPL(a, b) a##b
954			#define CONCAT(a, b) CONCAT_IMPL(a, b)
955			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
956
957			#endif
958
959
960
961			#define DIVISOR 65521
962
963
964			#define MAX_CHUNK_LEN 5552
965
966
967			#define ADLER32_CHUNK(s1, s2, p, n) \
968			do { \
969			if (n >= 4) { \
970			u32 s1_sum = 0; \
971			u32 byte_0_sum = 0; \
972			u32 byte_1_sum = 0; \
973			u32 byte_2_sum = 0; \
974			u32 byte_3_sum = 0; \
975			\
976			do { \
977			s1_sum += s1; \
978			s1 += p[0] + p[1] + p[2] + p[3]; \
979			byte_0_sum += p[0]; \
980			byte_1_sum += p[1]; \
981			byte_2_sum += p[2]; \
982			byte_3_sum += p[3]; \
983			p += 4; \
984			n -= 4; \
985			} while (n >= 4); \
986			s2 += (4 * (s1_sum + byte_0_sum)) + (3 * byte_1_sum) + \
987			(2 * byte_2_sum) + byte_3_sum; \
988			} \
989			for (; n; n--, p++) { \
990			s1 += *p; \
991			s2 += s1; \
992			} \
993			s1 %= DIVISOR; \
994			s2 %= DIVISOR; \
995			} while (0)
996
997			static u32 MAYBE_UNUSED
998	0		adler32_generic(u32 adler, const u8 *p, size_t len)
999			{
1000	0		u32 s1 = adler & 0xFFFF;
1001	0		u32 s2 = adler >> 16;
1002
1003	0	0	while (len) {
1004	0		size_t n = MIN(len, MAX_CHUNK_LEN & ~3);
1005
1006	0		len -= n;
1007	0	0	ADLER32_CHUNK(s1, s2, p, n);
		0
		0
1008			}
1009
1010	0		return (s2 << 16) \| s1;
1011			}
1012
1013
1014			#undef DEFAULT_IMPL
1015			#undef arch_select_adler32_func
1016			typedef u32 (adler32_func_t)(u32 adler, const u8 p, size_t len);
1017			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
1018			/* # include "arm/adler32_impl.h" */
1019
1020
1021			#ifndef LIB_ARM_ADLER32_IMPL_H
1022			#define LIB_ARM_ADLER32_IMPL_H
1023
1024			/* #include "arm-cpu_features.h" */
1025
1026
1027			#ifndef LIB_ARM_CPU_FEATURES_H
1028			#define LIB_ARM_CPU_FEATURES_H
1029
1030			/* #include "lib_common.h" */
1031
1032
1033			#ifndef LIB_LIB_COMMON_H
1034			#define LIB_LIB_COMMON_H
1035
1036			#ifdef LIBDEFLATE_H
1037
1038			# error "lib_common.h must always be included before libdeflate.h"
1039			#endif
1040
1041			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
1042			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
1043			#elif defined(__GNUC__)
1044			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
1045			#else
1046			# define LIBDEFLATE_EXPORT_SYM
1047			#endif
1048
1049
1050			#if defined(__GNUC__) && defined(__i386__)
1051			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
1052			#else
1053			# define LIBDEFLATE_ALIGN_STACK
1054			#endif
1055
1056			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
1057
1058			/* #include "../common_defs.h" */
1059
1060
1061			#ifndef COMMON_DEFS_H
1062			#define COMMON_DEFS_H
1063
1064			/* #include "libdeflate.h" */
1065
1066
1067			#ifndef LIBDEFLATE_H
1068			#define LIBDEFLATE_H
1069
1070			#include
1071			#include
1072
1073			#ifdef __cplusplus
1074			extern "C" {
1075			#endif
1076
1077			#define LIBDEFLATE_VERSION_MAJOR 1
1078			#define LIBDEFLATE_VERSION_MINOR 25
1079			#define LIBDEFLATE_VERSION_STRING "1.25"
1080
1081
1082			#ifndef LIBDEFLATEAPI
1083			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
1084			# define LIBDEFLATEAPI __declspec(dllimport)
1085			# else
1086			# define LIBDEFLATEAPI
1087			# endif
1088			#endif
1089
1090
1091
1092
1093
1094			struct libdeflate_compressor;
1095			struct libdeflate_options;
1096
1097
1098			LIBDEFLATEAPI struct libdeflate_compressor *
1099			libdeflate_alloc_compressor(int compression_level);
1100
1101
1102			LIBDEFLATEAPI struct libdeflate_compressor *
1103			libdeflate_alloc_compressor_ex(int compression_level,
1104			const struct libdeflate_options *options);
1105
1106
1107			LIBDEFLATEAPI size_t
1108			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
1109			const void *in, size_t in_nbytes,
1110			void *out, size_t out_nbytes_avail);
1111
1112
1113			LIBDEFLATEAPI size_t
1114			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
1115			size_t in_nbytes);
1116
1117
1118			LIBDEFLATEAPI size_t
1119			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
1120			const void *in, size_t in_nbytes,
1121			void *out, size_t out_nbytes_avail);
1122
1123
1124			LIBDEFLATEAPI size_t
1125			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
1126			size_t in_nbytes);
1127
1128
1129			LIBDEFLATEAPI size_t
1130			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
1131			const void *in, size_t in_nbytes,
1132			void *out, size_t out_nbytes_avail);
1133
1134
1135			LIBDEFLATEAPI size_t
1136			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
1137			size_t in_nbytes);
1138
1139
1140			LIBDEFLATEAPI void
1141			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
1142
1143
1144
1145
1146
1147			struct libdeflate_decompressor;
1148			struct libdeflate_options;
1149
1150
1151			LIBDEFLATEAPI struct libdeflate_decompressor *
1152			libdeflate_alloc_decompressor(void);
1153
1154
1155			LIBDEFLATEAPI struct libdeflate_decompressor *
1156			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
1157
1158
1159			enum libdeflate_result {
1160
1161			LIBDEFLATE_SUCCESS = 0,
1162
1163
1164			LIBDEFLATE_BAD_DATA = 1,
1165
1166
1167			LIBDEFLATE_SHORT_OUTPUT = 2,
1168
1169
1170			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
1171			};
1172
1173
1174			LIBDEFLATEAPI enum libdeflate_result
1175			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
1176			const void *in, size_t in_nbytes,
1177			void *out, size_t out_nbytes_avail,
1178			size_t *actual_out_nbytes_ret);
1179
1180
1181			LIBDEFLATEAPI enum libdeflate_result
1182			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
1183			const void *in, size_t in_nbytes,
1184			void *out, size_t out_nbytes_avail,
1185			size_t *actual_in_nbytes_ret,
1186			size_t *actual_out_nbytes_ret);
1187
1188
1189			LIBDEFLATEAPI enum libdeflate_result
1190			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
1191			const void *in, size_t in_nbytes,
1192			void *out, size_t out_nbytes_avail,
1193			size_t *actual_out_nbytes_ret);
1194
1195
1196			LIBDEFLATEAPI enum libdeflate_result
1197			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
1198			const void *in, size_t in_nbytes,
1199			void *out, size_t out_nbytes_avail,
1200			size_t *actual_in_nbytes_ret,
1201			size_t *actual_out_nbytes_ret);
1202
1203
1204			LIBDEFLATEAPI enum libdeflate_result
1205			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
1206			const void *in, size_t in_nbytes,
1207			void *out, size_t out_nbytes_avail,
1208			size_t *actual_out_nbytes_ret);
1209
1210
1211			LIBDEFLATEAPI enum libdeflate_result
1212			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
1213			const void *in, size_t in_nbytes,
1214			void *out, size_t out_nbytes_avail,
1215			size_t *actual_in_nbytes_ret,
1216			size_t *actual_out_nbytes_ret);
1217
1218
1219			LIBDEFLATEAPI void
1220			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
1221
1222
1223
1224
1225
1226
1227			LIBDEFLATEAPI uint32_t
1228			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
1229
1230
1231
1232			LIBDEFLATEAPI uint32_t
1233			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
1234
1235
1236
1237
1238
1239
1240			LIBDEFLATEAPI void
1241			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
1242			void (free_func)(void ));
1243
1244
1245			struct libdeflate_options {
1246
1247
1248			size_t sizeof_options;
1249
1250
1251			void (malloc_func)(size_t);
1252			void (free_func)(void );
1253			};
1254
1255			#ifdef __cplusplus
1256			}
1257			#endif
1258
1259			#endif
1260
1261
1262			#include
1263			#include
1264			#include
1265			#ifdef _MSC_VER
1266			# include
1267			# include
1268
1269
1270			# pragma warning(disable : 4146)
1271
1272			# pragma warning(disable : 4018)
1273			# pragma warning(disable : 4244)
1274			# pragma warning(disable : 4267)
1275			# pragma warning(disable : 4310)
1276
1277			# pragma warning(disable : 4100)
1278			# pragma warning(disable : 4127)
1279			# pragma warning(disable : 4189)
1280			# pragma warning(disable : 4232)
1281			# pragma warning(disable : 4245)
1282			# pragma warning(disable : 4295)
1283			#endif
1284			#ifndef FREESTANDING
1285			# include
1286			#endif
1287
1288
1289
1290
1291
1292
1293			#undef ARCH_X86_64
1294			#undef ARCH_X86_32
1295			#undef ARCH_ARM64
1296			#undef ARCH_ARM32
1297			#undef ARCH_RISCV
1298			#ifdef _MSC_VER
1299
1300			# if defined(_M_X64) && !defined(_M_ARM64EC)
1301			# define ARCH_X86_64
1302			# elif defined(_M_IX86)
1303			# define ARCH_X86_32
1304			# elif defined(_M_ARM64)
1305			# define ARCH_ARM64
1306			# elif defined(_M_ARM)
1307			# define ARCH_ARM32
1308			# endif
1309			#else
1310			# if defined(__x86_64__)
1311			# define ARCH_X86_64
1312			# elif defined(__i386__)
1313			# define ARCH_X86_32
1314			# elif defined(__aarch64__)
1315			# define ARCH_ARM64
1316			# elif defined(__arm__)
1317			# define ARCH_ARM32
1318			# elif defined(__riscv)
1319			# define ARCH_RISCV
1320			# endif
1321			#endif
1322
1323
1324
1325
1326
1327
1328			typedef uint8_t u8;
1329			typedef uint16_t u16;
1330			typedef uint32_t u32;
1331			typedef uint64_t u64;
1332			typedef int8_t s8;
1333			typedef int16_t s16;
1334			typedef int32_t s32;
1335			typedef int64_t s64;
1336
1337
1338			#ifdef _MSC_VER
1339			# ifdef _WIN64
1340			typedef long long ssize_t;
1341			# else
1342			typedef long ssize_t;
1343			# endif
1344			#endif
1345
1346
1347			typedef size_t machine_word_t;
1348
1349
1350			#define WORDBYTES ((int)sizeof(machine_word_t))
1351
1352
1353			#define WORDBITS (8 * WORDBYTES)
1354
1355
1356
1357
1358
1359
1360			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
1361			# define GCC_PREREQ(major, minor) \
1362			(__GNUC__ > (major) \|\| \
1363			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
1364			# if !GCC_PREREQ(4, 9)
1365			# error "gcc versions older than 4.9 are no longer supported"
1366			# endif
1367			#else
1368			# define GCC_PREREQ(major, minor) 0
1369			#endif
1370			#ifdef __clang__
1371			# ifdef __apple_build_version__
1372			# define CLANG_PREREQ(major, minor, apple_version) \
1373			(__apple_build_version__ >= (apple_version))
1374			# else
1375			# define CLANG_PREREQ(major, minor, apple_version) \
1376			(__clang_major__ > (major) \|\| \
1377			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
1378			# endif
1379			# if !CLANG_PREREQ(3, 9, 8000000)
1380			# error "clang versions older than 3.9 are no longer supported"
1381			# endif
1382			#else
1383			# define CLANG_PREREQ(major, minor, apple_version) 0
1384			#endif
1385			#ifdef _MSC_VER
1386			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
1387			# if !MSVC_PREREQ(1900)
1388			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
1389			# endif
1390			#else
1391			# define MSVC_PREREQ(version) 0
1392			#endif
1393
1394
1395			#ifndef __has_attribute
1396			# define __has_attribute(attribute) 0
1397			#endif
1398
1399
1400			#ifndef __has_builtin
1401			# define __has_builtin(builtin) 0
1402			#endif
1403
1404
1405			#ifdef _MSC_VER
1406			# define inline __inline
1407			#endif
1408
1409
1410			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
1411			# define forceinline inline __attribute__((always_inline))
1412			#elif defined(_MSC_VER)
1413			# define forceinline __forceinline
1414			#else
1415			# define forceinline inline
1416			#endif
1417
1418
1419			#if defined(__GNUC__) \|\| __has_attribute(unused)
1420			# define MAYBE_UNUSED __attribute__((unused))
1421			#else
1422			# define MAYBE_UNUSED
1423			#endif
1424
1425
1426			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
1427			# define NORETURN __attribute__((noreturn))
1428			#else
1429			# define NORETURN
1430			#endif
1431
1432
1433			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
1434			# if defined(__GNUC__) \|\| defined(__clang__)
1435			# define restrict __restrict__
1436			# else
1437			# define restrict
1438			# endif
1439			#endif
1440
1441
1442			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
1443			# define likely(expr) __builtin_expect(!!(expr), 1)
1444			#else
1445			# define likely(expr) (expr)
1446			#endif
1447
1448
1449			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
1450			# define unlikely(expr) __builtin_expect(!!(expr), 0)
1451			#else
1452			# define unlikely(expr) (expr)
1453			#endif
1454
1455
1456			#undef prefetchr
1457			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
1458			# define prefetchr(addr) __builtin_prefetch((addr), 0)
1459			#elif defined(_MSC_VER)
1460			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
1461			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
1462			# elif defined(ARCH_ARM64)
1463			# define prefetchr(addr) __prefetch2((addr), 0x00 )
1464			# elif defined(ARCH_ARM32)
1465			# define prefetchr(addr) __prefetch(addr)
1466			# endif
1467			#endif
1468			#ifndef prefetchr
1469			# define prefetchr(addr)
1470			#endif
1471
1472
1473			#undef prefetchw
1474			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
1475			# define prefetchw(addr) __builtin_prefetch((addr), 1)
1476			#elif defined(_MSC_VER)
1477			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
1478			# define prefetchw(addr) _m_prefetchw(addr)
1479			# elif defined(ARCH_ARM64)
1480			# define prefetchw(addr) __prefetch2((addr), 0x10 )
1481			# elif defined(ARCH_ARM32)
1482			# define prefetchw(addr) __prefetchw(addr)
1483			# endif
1484			#endif
1485			#ifndef prefetchw
1486			# define prefetchw(addr)
1487			#endif
1488
1489
1490			#undef _aligned_attribute
1491			#if defined(__GNUC__) \|\| __has_attribute(aligned)
1492			# define _aligned_attribute(n) __attribute__((aligned(n)))
1493			#elif defined(_MSC_VER)
1494			# define _aligned_attribute(n) __declspec(align(n))
1495			#endif
1496
1497
1498			#if defined(__GNUC__) \|\| __has_attribute(target)
1499			# define _target_attribute(attrs) __attribute__((target(attrs)))
1500			#else
1501			# define _target_attribute(attrs)
1502			#endif
1503
1504
1505
1506
1507
1508			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
1509			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
1510			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
1511			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
1512			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
1513			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
1514			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
1515
1516
1517
1518
1519
1520
1521			#if defined(__BYTE_ORDER__)
1522			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1523			#elif defined(_MSC_VER)
1524			# define CPU_IS_LITTLE_ENDIAN() true
1525			#else
1526			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
1527			{
1528			union {
1529			u32 w;
1530			u8 b;
1531			} u;
1532
1533			u.w = 1;
1534			return u.b;
1535			}
1536			#endif
1537
1538
1539			static forceinline u16 bswap16(u16 v)
1540			{
1541			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
1542			return __builtin_bswap16(v);
1543			#elif defined(_MSC_VER)
1544			return _byteswap_ushort(v);
1545			#else
1546			return (v << 8) \| (v >> 8);
1547			#endif
1548			}
1549
1550
1551			static forceinline u32 bswap32(u32 v)
1552			{
1553			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
1554			return __builtin_bswap32(v);
1555			#elif defined(_MSC_VER)
1556			return _byteswap_ulong(v);
1557			#else
1558			return ((v & 0x000000FF) << 24) \|
1559			((v & 0x0000FF00) << 8) \|
1560			((v & 0x00FF0000) >> 8) \|
1561			((v & 0xFF000000) >> 24);
1562			#endif
1563			}
1564
1565
1566			static forceinline u64 bswap64(u64 v)
1567			{
1568			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
1569			return __builtin_bswap64(v);
1570			#elif defined(_MSC_VER)
1571			return _byteswap_uint64(v);
1572			#else
1573			return ((v & 0x00000000000000FF) << 56) \|
1574			((v & 0x000000000000FF00) << 40) \|
1575			((v & 0x0000000000FF0000) << 24) \|
1576			((v & 0x00000000FF000000) << 8) \|
1577			((v & 0x000000FF00000000) >> 8) \|
1578			((v & 0x0000FF0000000000) >> 24) \|
1579			((v & 0x00FF000000000000) >> 40) \|
1580			((v & 0xFF00000000000000) >> 56);
1581			#endif
1582			}
1583
1584			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
1585			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
1586			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
1587			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
1588			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
1589			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
1590
1591
1592
1593
1594
1595
1596			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
1597			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
1598			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
1599			defined(__riscv_misaligned_fast) \|\| \
1600			defined(__wasm__))
1601			# define UNALIGNED_ACCESS_IS_FAST 1
1602			#elif defined(_MSC_VER)
1603			# define UNALIGNED_ACCESS_IS_FAST 1
1604			#else
1605			# define UNALIGNED_ACCESS_IS_FAST 0
1606			#endif
1607
1608
1609
1610			#ifdef FREESTANDING
1611			# define MEMCOPY __builtin_memcpy
1612			#else
1613			# define MEMCOPY memcpy
1614			#endif
1615
1616
1617
1618			#define DEFINE_UNALIGNED_TYPE(type) \
1619			static forceinline type \
1620			load_##type##_unaligned(const void *p) \
1621			{ \
1622			type v; \
1623			\
1624			MEMCOPY(&v, p, sizeof(v)); \
1625			return v; \
1626			} \
1627			\
1628			static forceinline void \
1629			store_##type##_unaligned(type v, void *p) \
1630			{ \
1631			MEMCOPY(p, &v, sizeof(v)); \
1632			}
1633
1634			DEFINE_UNALIGNED_TYPE(u16)
1635			DEFINE_UNALIGNED_TYPE(u32)
1636			DEFINE_UNALIGNED_TYPE(u64)
1637			DEFINE_UNALIGNED_TYPE(machine_word_t)
1638
1639			#undef MEMCOPY
1640
1641			#define load_word_unaligned load_machine_word_t_unaligned
1642			#define store_word_unaligned store_machine_word_t_unaligned
1643
1644
1645
1646			static forceinline u16
1647			get_unaligned_le16(const u8 *p)
1648			{
1649			if (UNALIGNED_ACCESS_IS_FAST)
1650			return le16_bswap(load_u16_unaligned(p));
1651			else
1652			return ((u16)p[1] << 8) \| p[0];
1653			}
1654
1655			static forceinline u16
1656			get_unaligned_be16(const u8 *p)
1657			{
1658			if (UNALIGNED_ACCESS_IS_FAST)
1659			return be16_bswap(load_u16_unaligned(p));
1660			else
1661			return ((u16)p[0] << 8) \| p[1];
1662			}
1663
1664			static forceinline u32
1665			get_unaligned_le32(const u8 *p)
1666			{
1667			if (UNALIGNED_ACCESS_IS_FAST)
1668			return le32_bswap(load_u32_unaligned(p));
1669			else
1670			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
1671			((u32)p[1] << 8) \| p[0];
1672			}
1673
1674			static forceinline u32
1675			get_unaligned_be32(const u8 *p)
1676			{
1677			if (UNALIGNED_ACCESS_IS_FAST)
1678			return be32_bswap(load_u32_unaligned(p));
1679			else
1680			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
1681			((u32)p[2] << 8) \| p[3];
1682			}
1683
1684			static forceinline u64
1685			get_unaligned_le64(const u8 *p)
1686			{
1687			if (UNALIGNED_ACCESS_IS_FAST)
1688			return le64_bswap(load_u64_unaligned(p));
1689			else
1690			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
1691			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
1692			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
1693			((u64)p[1] << 8) \| p[0];
1694			}
1695
1696			static forceinline machine_word_t
1697			get_unaligned_leword(const u8 *p)
1698			{
1699			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
1700			if (WORDBITS == 32)
1701			return get_unaligned_le32(p);
1702			else
1703			return get_unaligned_le64(p);
1704			}
1705
1706
1707
1708			static forceinline void
1709			put_unaligned_le16(u16 v, u8 *p)
1710			{
1711			if (UNALIGNED_ACCESS_IS_FAST) {
1712			store_u16_unaligned(le16_bswap(v), p);
1713			} else {
1714			p[0] = (u8)(v >> 0);
1715			p[1] = (u8)(v >> 8);
1716			}
1717			}
1718
1719			static forceinline void
1720			put_unaligned_be16(u16 v, u8 *p)
1721			{
1722			if (UNALIGNED_ACCESS_IS_FAST) {
1723			store_u16_unaligned(be16_bswap(v), p);
1724			} else {
1725			p[0] = (u8)(v >> 8);
1726			p[1] = (u8)(v >> 0);
1727			}
1728			}
1729
1730			static forceinline void
1731			put_unaligned_le32(u32 v, u8 *p)
1732			{
1733			if (UNALIGNED_ACCESS_IS_FAST) {
1734			store_u32_unaligned(le32_bswap(v), p);
1735			} else {
1736			p[0] = (u8)(v >> 0);
1737			p[1] = (u8)(v >> 8);
1738			p[2] = (u8)(v >> 16);
1739			p[3] = (u8)(v >> 24);
1740			}
1741			}
1742
1743			static forceinline void
1744			put_unaligned_be32(u32 v, u8 *p)
1745			{
1746			if (UNALIGNED_ACCESS_IS_FAST) {
1747			store_u32_unaligned(be32_bswap(v), p);
1748			} else {
1749			p[0] = (u8)(v >> 24);
1750			p[1] = (u8)(v >> 16);
1751			p[2] = (u8)(v >> 8);
1752			p[3] = (u8)(v >> 0);
1753			}
1754			}
1755
1756			static forceinline void
1757			put_unaligned_le64(u64 v, u8 *p)
1758			{
1759			if (UNALIGNED_ACCESS_IS_FAST) {
1760			store_u64_unaligned(le64_bswap(v), p);
1761			} else {
1762			p[0] = (u8)(v >> 0);
1763			p[1] = (u8)(v >> 8);
1764			p[2] = (u8)(v >> 16);
1765			p[3] = (u8)(v >> 24);
1766			p[4] = (u8)(v >> 32);
1767			p[5] = (u8)(v >> 40);
1768			p[6] = (u8)(v >> 48);
1769			p[7] = (u8)(v >> 56);
1770			}
1771			}
1772
1773			static forceinline void
1774			put_unaligned_leword(machine_word_t v, u8 *p)
1775			{
1776			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
1777			if (WORDBITS == 32)
1778			put_unaligned_le32(v, p);
1779			else
1780			put_unaligned_le64(v, p);
1781			}
1782
1783
1784
1785
1786
1787
1788
1789			static forceinline unsigned
1790			bsr32(u32 v)
1791			{
1792			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
1793			return 31 - __builtin_clz(v);
1794			#elif defined(_MSC_VER)
1795			unsigned long i;
1796
1797			_BitScanReverse(&i, v);
1798			return i;
1799			#else
1800			unsigned i = 0;
1801
1802			while ((v >>= 1) != 0)
1803			i++;
1804			return i;
1805			#endif
1806			}
1807
1808			static forceinline unsigned
1809			bsr64(u64 v)
1810			{
1811			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
1812			return 63 - __builtin_clzll(v);
1813			#elif defined(_MSC_VER) && defined(_WIN64)
1814			unsigned long i;
1815
1816			_BitScanReverse64(&i, v);
1817			return i;
1818			#else
1819			unsigned i = 0;
1820
1821			while ((v >>= 1) != 0)
1822			i++;
1823			return i;
1824			#endif
1825			}
1826
1827			static forceinline unsigned
1828			bsrw(machine_word_t v)
1829			{
1830			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
1831			if (WORDBITS == 32)
1832			return bsr32(v);
1833			else
1834			return bsr64(v);
1835			}
1836
1837
1838
1839			static forceinline unsigned
1840			bsf32(u32 v)
1841			{
1842			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
1843			return __builtin_ctz(v);
1844			#elif defined(_MSC_VER)
1845			unsigned long i;
1846
1847			_BitScanForward(&i, v);
1848			return i;
1849			#else
1850			unsigned i = 0;
1851
1852			for (; (v & 1) == 0; v >>= 1)
1853			i++;
1854			return i;
1855			#endif
1856			}
1857
1858			static forceinline unsigned
1859			bsf64(u64 v)
1860			{
1861			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
1862			return __builtin_ctzll(v);
1863			#elif defined(_MSC_VER) && defined(_WIN64)
1864			unsigned long i;
1865
1866			_BitScanForward64(&i, v);
1867			return i;
1868			#else
1869			unsigned i = 0;
1870
1871			for (; (v & 1) == 0; v >>= 1)
1872			i++;
1873			return i;
1874			#endif
1875			}
1876
1877			static forceinline unsigned
1878			bsfw(machine_word_t v)
1879			{
1880			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
1881			if (WORDBITS == 32)
1882			return bsf32(v);
1883			else
1884			return bsf64(v);
1885			}
1886
1887
1888			#undef rbit32
1889			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
1890			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
1891			static forceinline u32
1892			rbit32(u32 v)
1893			{
1894			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
1895			return v;
1896			}
1897			#define rbit32 rbit32
1898			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
1899			static forceinline u32
1900			rbit32(u32 v)
1901			{
1902			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
1903			return v;
1904			}
1905			#define rbit32 rbit32
1906			#endif
1907
1908			#endif
1909
1910
1911			typedef void (malloc_func_t)(size_t);
1912			typedef void (free_func_t)(void );
1913
1914			extern malloc_func_t libdeflate_default_malloc_func;
1915			extern free_func_t libdeflate_default_free_func;
1916
1917			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
1918			size_t alignment, size_t size);
1919			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
1920
1921			#ifdef FREESTANDING
1922
1923			void memset(void s, int c, size_t n);
1924			#define memset(s, c, n) __builtin_memset((s), (c), (n))
1925
1926			void memcpy(void dest, const void *src, size_t n);
1927			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
1928
1929			void memmove(void dest, const void *src, size_t n);
1930			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
1931
1932			int memcmp(const void s1, const void s2, size_t n);
1933			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
1934
1935			#undef LIBDEFLATE_ENABLE_ASSERTIONS
1936			#else
1937			# include
1938
1939			# ifdef __clang_analyzer__
1940			# define LIBDEFLATE_ENABLE_ASSERTIONS
1941			# endif
1942			#endif
1943
1944
1945			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
1946			NORETURN void
1947			libdeflate_assertion_failed(const char expr, const char file, int line);
1948			#define ASSERT(expr) { if (unlikely(!(expr))) \
1949			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
1950			#else
1951			#define ASSERT(expr) (void)(expr)
1952			#endif
1953
1954			#define CONCAT_IMPL(a, b) a##b
1955			#define CONCAT(a, b) CONCAT_IMPL(a, b)
1956			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
1957
1958			#endif
1959
1960
1961			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
1962
1963			#define ARM_CPU_FEATURE_NEON (1 << 0)
1964			#define ARM_CPU_FEATURE_PMULL (1 << 1)
1965
1966			#define ARM_CPU_FEATURE_PREFER_PMULL (1 << 2)
1967			#define ARM_CPU_FEATURE_CRC32 (1 << 3)
1968			#define ARM_CPU_FEATURE_SHA3 (1 << 4)
1969			#define ARM_CPU_FEATURE_DOTPROD (1 << 5)
1970
1971			#if !defined(FREESTANDING) && \
1972			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
1973			(defined(__linux__) \|\| \
1974			(defined(__APPLE__) && defined(ARCH_ARM64)) \|\| \
1975			(defined(_WIN32) && defined(ARCH_ARM64)))
1976
1977			# define ARM_CPU_FEATURES_KNOWN (1U << 31)
1978			extern volatile u32 libdeflate_arm_cpu_features;
1979
1980			void libdeflate_init_arm_cpu_features(void);
1981
1982			static inline u32 get_arm_cpu_features(void)
1983			{
1984			if (libdeflate_arm_cpu_features == 0)
1985			libdeflate_init_arm_cpu_features();
1986			return libdeflate_arm_cpu_features;
1987			}
1988			#else
1989			static inline u32 get_arm_cpu_features(void) { return 0; }
1990			#endif
1991
1992
1993			#if defined(__ARM_NEON) \|\| (defined(_MSC_VER) && defined(ARCH_ARM64))
1994			# define HAVE_NEON(features) 1
1995			# define HAVE_NEON_NATIVE 1
1996			#else
1997			# define HAVE_NEON(features) ((features) & ARM_CPU_FEATURE_NEON)
1998			# define HAVE_NEON_NATIVE 0
1999			#endif
2000
2001			#if (defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
2002			(HAVE_NEON_NATIVE \|\| (GCC_PREREQ(6, 1) && defined(__ARM_FP)))
2003			# define HAVE_NEON_INTRIN 1
2004			# include
2005			#else
2006			# define HAVE_NEON_INTRIN 0
2007			#endif
2008
2009
2010			#ifdef __ARM_FEATURE_CRYPTO
2011			# define HAVE_PMULL(features) 1
2012			#else
2013			# define HAVE_PMULL(features) ((features) & ARM_CPU_FEATURE_PMULL)
2014			#endif
2015			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
2016			(GCC_PREREQ(7, 1) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
2017			CPU_IS_LITTLE_ENDIAN()
2018			# define HAVE_PMULL_INTRIN 1
2019
2020			# ifdef _MSC_VER
2021			# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b))
2022			# else
2023			# define compat_vmull_p64(a, b) vmull_p64((a), (b))
2024			# endif
2025			#else
2026			# define HAVE_PMULL_INTRIN 0
2027			#endif
2028
2029
2030			#ifdef __ARM_FEATURE_CRC32
2031			# define HAVE_CRC32(features) 1
2032			#else
2033			# define HAVE_CRC32(features) ((features) & ARM_CPU_FEATURE_CRC32)
2034			#endif
2035			#if defined(ARCH_ARM64) && \
2036			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER))
2037			# define HAVE_CRC32_INTRIN 1
2038			# if defined(__GNUC__) \|\| defined(__clang__)
2039			# include
2040			# endif
2041
2042			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
2043			!defined(__ARM_FEATURE_CRC32)
2044			# undef __crc32b
2045			# define __crc32b(a, b) \
2046			({ uint32_t res; \
2047			__asm__("crc32b %w0, %w1, %w2" \
2048			: "=r" (res) : "r" (a), "r" (b)); \
2049			res; })
2050			# undef __crc32h
2051			# define __crc32h(a, b) \
2052			({ uint32_t res; \
2053			__asm__("crc32h %w0, %w1, %w2" \
2054			: "=r" (res) : "r" (a), "r" (b)); \
2055			res; })
2056			# undef __crc32w
2057			# define __crc32w(a, b) \
2058			({ uint32_t res; \
2059			__asm__("crc32w %w0, %w1, %w2" \
2060			: "=r" (res) : "r" (a), "r" (b)); \
2061			res; })
2062			# undef __crc32d
2063			# define __crc32d(a, b) \
2064			({ uint32_t res; \
2065			__asm__("crc32x %w0, %w1, %2" \
2066			: "=r" (res) : "r" (a), "r" (b)); \
2067			res; })
2068			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
2069			# endif
2070			#else
2071			# define HAVE_CRC32_INTRIN 0
2072			#endif
2073
2074
2075			#ifdef __ARM_FEATURE_SHA3
2076			# define HAVE_SHA3(features) 1
2077			#else
2078			# define HAVE_SHA3(features) ((features) & ARM_CPU_FEATURE_SHA3)
2079			#endif
2080			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
2081			(GCC_PREREQ(9, 1) \|\| \
2082			CLANG_PREREQ(7, 0, 10010463) )
2083			# define HAVE_SHA3_INTRIN 1
2084
2085			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
2086			!defined(__ARM_FEATURE_SHA3)
2087			# undef veor3q_u8
2088			# define veor3q_u8(a, b, c) \
2089			({ uint8x16_t res; \
2090			__asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b" \
2091			: "=w" (res) : "w" (a), "w" (b), "w" (c)); \
2092			res; })
2093			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
2094			# endif
2095			#else
2096			# define HAVE_SHA3_INTRIN 0
2097			#endif
2098
2099
2100			#ifdef __ARM_FEATURE_DOTPROD
2101			# define HAVE_DOTPROD(features) 1
2102			#else
2103			# define HAVE_DOTPROD(features) ((features) & ARM_CPU_FEATURE_DOTPROD)
2104			#endif
2105			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
2106			(GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(7, 0, 10010000) \|\| defined(_MSC_VER))
2107			# define HAVE_DOTPROD_INTRIN 1
2108
2109			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
2110			!defined(__ARM_FEATURE_DOTPROD)
2111			# undef vdotq_u32
2112			# define vdotq_u32(a, b, c) \
2113			({ uint32x4_t res = (a); \
2114			__asm__("udot %0.4s, %1.16b, %2.16b" \
2115			: "+w" (res) : "w" (b), "w" (c)); \
2116			res; })
2117			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
2118			# endif
2119			#else
2120			# define HAVE_DOTPROD_INTRIN 0
2121			#endif
2122
2123			#endif
2124
2125			#endif
2126
2127
2128
2129			#if HAVE_NEON_INTRIN && CPU_IS_LITTLE_ENDIAN()
2130			# define adler32_arm_neon adler32_arm_neon
2131			# if HAVE_NEON_NATIVE
2132
2133			# define ATTRIBUTES
2134			# elif defined(ARCH_ARM32)
2135			# define ATTRIBUTES _target_attribute("fpu=neon")
2136			# elif defined(__clang__)
2137			# define ATTRIBUTES _target_attribute("simd")
2138			# else
2139			# define ATTRIBUTES _target_attribute("+simd")
2140			# endif
2141			static ATTRIBUTES MAYBE_UNUSED u32
2142			adler32_arm_neon(u32 adler, const u8 *p, size_t len)
2143			{
2144			static const u16 _aligned_attribute(16) mults[64] = {
2145			64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
2146			48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
2147			32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
2148			16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
2149			};
2150			const uint16x8_t mults_a = vld1q_u16(&mults[0]);
2151			const uint16x8_t mults_b = vld1q_u16(&mults[8]);
2152			const uint16x8_t mults_c = vld1q_u16(&mults[16]);
2153			const uint16x8_t mults_d = vld1q_u16(&mults[24]);
2154			const uint16x8_t mults_e = vld1q_u16(&mults[32]);
2155			const uint16x8_t mults_f = vld1q_u16(&mults[40]);
2156			const uint16x8_t mults_g = vld1q_u16(&mults[48]);
2157			const uint16x8_t mults_h = vld1q_u16(&mults[56]);
2158			u32 s1 = adler & 0xFFFF;
2159			u32 s2 = adler >> 16;
2160
2161
2162			if (unlikely(len > 32768 && ((uintptr_t)p & 15))) {
2163			do {
2164			s1 += *p++;
2165			s2 += s1;
2166			len--;
2167			} while ((uintptr_t)p & 15);
2168			s1 %= DIVISOR;
2169			s2 %= DIVISOR;
2170			}
2171
2172			while (len) {
2173
2174			size_t n = MIN(len, MAX_CHUNK_LEN & ~63);
2175
2176			len -= n;
2177
2178			if (n >= 64) {
2179			uint32x4_t v_s1 = vdupq_n_u32(0);
2180			uint32x4_t v_s2 = vdupq_n_u32(0);
2181
2182			uint16x8_t v_byte_sums_a = vdupq_n_u16(0);
2183			uint16x8_t v_byte_sums_b = vdupq_n_u16(0);
2184			uint16x8_t v_byte_sums_c = vdupq_n_u16(0);
2185			uint16x8_t v_byte_sums_d = vdupq_n_u16(0);
2186			uint16x8_t v_byte_sums_e = vdupq_n_u16(0);
2187			uint16x8_t v_byte_sums_f = vdupq_n_u16(0);
2188			uint16x8_t v_byte_sums_g = vdupq_n_u16(0);
2189			uint16x8_t v_byte_sums_h = vdupq_n_u16(0);
2190
2191			s2 += s1 * (n & ~63);
2192
2193			do {
2194
2195			const uint8x16_t data_a = vld1q_u8(p + 0);
2196			const uint8x16_t data_b = vld1q_u8(p + 16);
2197			const uint8x16_t data_c = vld1q_u8(p + 32);
2198			const uint8x16_t data_d = vld1q_u8(p + 48);
2199			uint16x8_t tmp;
2200
2201
2202			v_s2 = vaddq_u32(v_s2, v_s1);
2203
2204
2205			tmp = vpaddlq_u8(data_a);
2206			v_byte_sums_a = vaddw_u8(v_byte_sums_a,
2207			vget_low_u8(data_a));
2208			v_byte_sums_b = vaddw_u8(v_byte_sums_b,
2209			vget_high_u8(data_a));
2210			tmp = vpadalq_u8(tmp, data_b);
2211			v_byte_sums_c = vaddw_u8(v_byte_sums_c,
2212			vget_low_u8(data_b));
2213			v_byte_sums_d = vaddw_u8(v_byte_sums_d,
2214			vget_high_u8(data_b));
2215			tmp = vpadalq_u8(tmp, data_c);
2216			v_byte_sums_e = vaddw_u8(v_byte_sums_e,
2217			vget_low_u8(data_c));
2218			v_byte_sums_f = vaddw_u8(v_byte_sums_f,
2219			vget_high_u8(data_c));
2220			tmp = vpadalq_u8(tmp, data_d);
2221			v_byte_sums_g = vaddw_u8(v_byte_sums_g,
2222			vget_low_u8(data_d));
2223			v_byte_sums_h = vaddw_u8(v_byte_sums_h,
2224			vget_high_u8(data_d));
2225			v_s1 = vpadalq_u16(v_s1, tmp);
2226
2227			p += 64;
2228			n -= 64;
2229			} while (n >= 64);
2230
2231
2232			#ifdef ARCH_ARM32
2233			# define umlal2(a, b, c) vmlal_u16((a), vget_high_u16(b), vget_high_u16(c))
2234			#else
2235			# define umlal2 vmlal_high_u16
2236			#endif
2237			v_s2 = vqshlq_n_u32(v_s2, 6);
2238			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_a),
2239			vget_low_u16(mults_a));
2240			v_s2 = umlal2(v_s2, v_byte_sums_a, mults_a);
2241			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_b),
2242			vget_low_u16(mults_b));
2243			v_s2 = umlal2(v_s2, v_byte_sums_b, mults_b);
2244			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_c),
2245			vget_low_u16(mults_c));
2246			v_s2 = umlal2(v_s2, v_byte_sums_c, mults_c);
2247			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_d),
2248			vget_low_u16(mults_d));
2249			v_s2 = umlal2(v_s2, v_byte_sums_d, mults_d);
2250			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_e),
2251			vget_low_u16(mults_e));
2252			v_s2 = umlal2(v_s2, v_byte_sums_e, mults_e);
2253			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_f),
2254			vget_low_u16(mults_f));
2255			v_s2 = umlal2(v_s2, v_byte_sums_f, mults_f);
2256			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_g),
2257			vget_low_u16(mults_g));
2258			v_s2 = umlal2(v_s2, v_byte_sums_g, mults_g);
2259			v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_h),
2260			vget_low_u16(mults_h));
2261			v_s2 = umlal2(v_s2, v_byte_sums_h, mults_h);
2262			#undef umlal2
2263
2264
2265			#ifdef ARCH_ARM32
2266			s1 += vgetq_lane_u32(v_s1, 0) + vgetq_lane_u32(v_s1, 1) +
2267			vgetq_lane_u32(v_s1, 2) + vgetq_lane_u32(v_s1, 3);
2268			s2 += vgetq_lane_u32(v_s2, 0) + vgetq_lane_u32(v_s2, 1) +
2269			vgetq_lane_u32(v_s2, 2) + vgetq_lane_u32(v_s2, 3);
2270			#else
2271			s1 += vaddvq_u32(v_s1);
2272			s2 += vaddvq_u32(v_s2);
2273			#endif
2274			}
2275
2276			ADLER32_CHUNK(s1, s2, p, n);
2277			}
2278			return (s2 << 16) \| s1;
2279			}
2280			#undef ATTRIBUTES
2281			#endif
2282
2283
2284			#if HAVE_DOTPROD_INTRIN && CPU_IS_LITTLE_ENDIAN() && \
2285			!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_DOTPROD)
2286			# define adler32_arm_neon_dotprod adler32_arm_neon_dotprod
2287			# ifdef __clang__
2288			# define ATTRIBUTES _target_attribute("dotprod")
2289
2290			# elif GCC_PREREQ(14, 0) \|\| defined(__ARM_FEATURE_JCVT) \
2291			\|\| defined(__ARM_FEATURE_DOTPROD)
2292			# define ATTRIBUTES _target_attribute("+dotprod")
2293			# else
2294			# define ATTRIBUTES _target_attribute("arch=armv8.2-a+dotprod")
2295			# endif
2296			static ATTRIBUTES u32
2297			adler32_arm_neon_dotprod(u32 adler, const u8 *p, size_t len)
2298			{
2299			static const u8 _aligned_attribute(16) mults[64] = {
2300			64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
2301			48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
2302			32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
2303			16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
2304			};
2305			const uint8x16_t mults_a = vld1q_u8(&mults[0]);
2306			const uint8x16_t mults_b = vld1q_u8(&mults[16]);
2307			const uint8x16_t mults_c = vld1q_u8(&mults[32]);
2308			const uint8x16_t mults_d = vld1q_u8(&mults[48]);
2309			const uint8x16_t ones = vdupq_n_u8(1);
2310			u32 s1 = adler & 0xFFFF;
2311			u32 s2 = adler >> 16;
2312
2313
2314			if (unlikely(len > 32768 && ((uintptr_t)p & 15))) {
2315			do {
2316			s1 += *p++;
2317			s2 += s1;
2318			len--;
2319			} while ((uintptr_t)p & 15);
2320			s1 %= DIVISOR;
2321			s2 %= DIVISOR;
2322			}
2323
2324			while (len) {
2325
2326			size_t n = MIN(len, MAX_CHUNK_LEN & ~63);
2327
2328			len -= n;
2329
2330			if (n >= 64) {
2331			uint32x4_t v_s1_a = vdupq_n_u32(0);
2332			uint32x4_t v_s1_b = vdupq_n_u32(0);
2333			uint32x4_t v_s1_c = vdupq_n_u32(0);
2334			uint32x4_t v_s1_d = vdupq_n_u32(0);
2335			uint32x4_t v_s2_a = vdupq_n_u32(0);
2336			uint32x4_t v_s2_b = vdupq_n_u32(0);
2337			uint32x4_t v_s2_c = vdupq_n_u32(0);
2338			uint32x4_t v_s2_d = vdupq_n_u32(0);
2339			uint32x4_t v_s1_sums_a = vdupq_n_u32(0);
2340			uint32x4_t v_s1_sums_b = vdupq_n_u32(0);
2341			uint32x4_t v_s1_sums_c = vdupq_n_u32(0);
2342			uint32x4_t v_s1_sums_d = vdupq_n_u32(0);
2343			uint32x4_t v_s1;
2344			uint32x4_t v_s2;
2345			uint32x4_t v_s1_sums;
2346
2347			s2 += s1 * (n & ~63);
2348
2349			do {
2350			uint8x16_t data_a = vld1q_u8(p + 0);
2351			uint8x16_t data_b = vld1q_u8(p + 16);
2352			uint8x16_t data_c = vld1q_u8(p + 32);
2353			uint8x16_t data_d = vld1q_u8(p + 48);
2354
2355			v_s1_sums_a = vaddq_u32(v_s1_sums_a, v_s1_a);
2356			v_s1_a = vdotq_u32(v_s1_a, data_a, ones);
2357			v_s2_a = vdotq_u32(v_s2_a, data_a, mults_a);
2358
2359			v_s1_sums_b = vaddq_u32(v_s1_sums_b, v_s1_b);
2360			v_s1_b = vdotq_u32(v_s1_b, data_b, ones);
2361			v_s2_b = vdotq_u32(v_s2_b, data_b, mults_b);
2362
2363			v_s1_sums_c = vaddq_u32(v_s1_sums_c, v_s1_c);
2364			v_s1_c = vdotq_u32(v_s1_c, data_c, ones);
2365			v_s2_c = vdotq_u32(v_s2_c, data_c, mults_c);
2366
2367			v_s1_sums_d = vaddq_u32(v_s1_sums_d, v_s1_d);
2368			v_s1_d = vdotq_u32(v_s1_d, data_d, ones);
2369			v_s2_d = vdotq_u32(v_s2_d, data_d, mults_d);
2370
2371			p += 64;
2372			n -= 64;
2373			} while (n >= 64);
2374
2375			v_s1 = vaddq_u32(vaddq_u32(v_s1_a, v_s1_b),
2376			vaddq_u32(v_s1_c, v_s1_d));
2377			v_s2 = vaddq_u32(vaddq_u32(v_s2_a, v_s2_b),
2378			vaddq_u32(v_s2_c, v_s2_d));
2379			v_s1_sums = vaddq_u32(vaddq_u32(v_s1_sums_a,
2380			v_s1_sums_b),
2381			vaddq_u32(v_s1_sums_c,
2382			v_s1_sums_d));
2383			v_s2 = vaddq_u32(v_s2, vqshlq_n_u32(v_s1_sums, 6));
2384
2385			s1 += vaddvq_u32(v_s1);
2386			s2 += vaddvq_u32(v_s2);
2387			}
2388
2389			ADLER32_CHUNK(s1, s2, p, n);
2390			}
2391			return (s2 << 16) \| s1;
2392			}
2393			#undef ATTRIBUTES
2394			#endif
2395
2396			#if defined(adler32_arm_neon_dotprod) && defined(__ARM_FEATURE_DOTPROD)
2397			#define DEFAULT_IMPL adler32_arm_neon_dotprod
2398			#else
2399			static inline adler32_func_t
2400			arch_select_adler32_func(void)
2401			{
2402			const u32 features MAYBE_UNUSED = get_arm_cpu_features();
2403
2404			#ifdef adler32_arm_neon_dotprod
2405			if (HAVE_NEON(features) && HAVE_DOTPROD(features))
2406			return adler32_arm_neon_dotprod;
2407			#endif
2408			#ifdef adler32_arm_neon
2409			if (HAVE_NEON(features))
2410			return adler32_arm_neon;
2411			#endif
2412			return NULL;
2413			}
2414			#define arch_select_adler32_func arch_select_adler32_func
2415			#endif
2416
2417			#endif
2418
2419			#elif defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
2420			/* # include "x86/adler32_impl.h" */
2421
2422
2423			#ifndef LIB_X86_ADLER32_IMPL_H
2424			#define LIB_X86_ADLER32_IMPL_H
2425
2426			/* #include "x86-cpu_features.h" */
2427
2428
2429			#ifndef LIB_X86_CPU_FEATURES_H
2430			#define LIB_X86_CPU_FEATURES_H
2431
2432			/* #include "lib_common.h" */
2433
2434
2435			#ifndef LIB_LIB_COMMON_H
2436			#define LIB_LIB_COMMON_H
2437
2438			#ifdef LIBDEFLATE_H
2439
2440			# error "lib_common.h must always be included before libdeflate.h"
2441			#endif
2442
2443			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
2444			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
2445			#elif defined(__GNUC__)
2446			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
2447			#else
2448			# define LIBDEFLATE_EXPORT_SYM
2449			#endif
2450
2451
2452			#if defined(__GNUC__) && defined(__i386__)
2453			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
2454			#else
2455			# define LIBDEFLATE_ALIGN_STACK
2456			#endif
2457
2458			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
2459
2460			/* #include "../common_defs.h" */
2461
2462
2463			#ifndef COMMON_DEFS_H
2464			#define COMMON_DEFS_H
2465
2466			/* #include "libdeflate.h" */
2467
2468
2469			#ifndef LIBDEFLATE_H
2470			#define LIBDEFLATE_H
2471
2472			#include
2473			#include
2474
2475			#ifdef __cplusplus
2476			extern "C" {
2477			#endif
2478
2479			#define LIBDEFLATE_VERSION_MAJOR 1
2480			#define LIBDEFLATE_VERSION_MINOR 25
2481			#define LIBDEFLATE_VERSION_STRING "1.25"
2482
2483
2484			#ifndef LIBDEFLATEAPI
2485			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
2486			# define LIBDEFLATEAPI __declspec(dllimport)
2487			# else
2488			# define LIBDEFLATEAPI
2489			# endif
2490			#endif
2491
2492
2493
2494
2495
2496			struct libdeflate_compressor;
2497			struct libdeflate_options;
2498
2499
2500			LIBDEFLATEAPI struct libdeflate_compressor *
2501			libdeflate_alloc_compressor(int compression_level);
2502
2503
2504			LIBDEFLATEAPI struct libdeflate_compressor *
2505			libdeflate_alloc_compressor_ex(int compression_level,
2506			const struct libdeflate_options *options);
2507
2508
2509			LIBDEFLATEAPI size_t
2510			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
2511			const void *in, size_t in_nbytes,
2512			void *out, size_t out_nbytes_avail);
2513
2514
2515			LIBDEFLATEAPI size_t
2516			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
2517			size_t in_nbytes);
2518
2519
2520			LIBDEFLATEAPI size_t
2521			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
2522			const void *in, size_t in_nbytes,
2523			void *out, size_t out_nbytes_avail);
2524
2525
2526			LIBDEFLATEAPI size_t
2527			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
2528			size_t in_nbytes);
2529
2530
2531			LIBDEFLATEAPI size_t
2532			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
2533			const void *in, size_t in_nbytes,
2534			void *out, size_t out_nbytes_avail);
2535
2536
2537			LIBDEFLATEAPI size_t
2538			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
2539			size_t in_nbytes);
2540
2541
2542			LIBDEFLATEAPI void
2543			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
2544
2545
2546
2547
2548
2549			struct libdeflate_decompressor;
2550			struct libdeflate_options;
2551
2552
2553			LIBDEFLATEAPI struct libdeflate_decompressor *
2554			libdeflate_alloc_decompressor(void);
2555
2556
2557			LIBDEFLATEAPI struct libdeflate_decompressor *
2558			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
2559
2560
2561			enum libdeflate_result {
2562
2563			LIBDEFLATE_SUCCESS = 0,
2564
2565
2566			LIBDEFLATE_BAD_DATA = 1,
2567
2568
2569			LIBDEFLATE_SHORT_OUTPUT = 2,
2570
2571
2572			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
2573			};
2574
2575
2576			LIBDEFLATEAPI enum libdeflate_result
2577			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
2578			const void *in, size_t in_nbytes,
2579			void *out, size_t out_nbytes_avail,
2580			size_t *actual_out_nbytes_ret);
2581
2582
2583			LIBDEFLATEAPI enum libdeflate_result
2584			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
2585			const void *in, size_t in_nbytes,
2586			void *out, size_t out_nbytes_avail,
2587			size_t *actual_in_nbytes_ret,
2588			size_t *actual_out_nbytes_ret);
2589
2590
2591			LIBDEFLATEAPI enum libdeflate_result
2592			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
2593			const void *in, size_t in_nbytes,
2594			void *out, size_t out_nbytes_avail,
2595			size_t *actual_out_nbytes_ret);
2596
2597
2598			LIBDEFLATEAPI enum libdeflate_result
2599			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
2600			const void *in, size_t in_nbytes,
2601			void *out, size_t out_nbytes_avail,
2602			size_t *actual_in_nbytes_ret,
2603			size_t *actual_out_nbytes_ret);
2604
2605
2606			LIBDEFLATEAPI enum libdeflate_result
2607			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
2608			const void *in, size_t in_nbytes,
2609			void *out, size_t out_nbytes_avail,
2610			size_t *actual_out_nbytes_ret);
2611
2612
2613			LIBDEFLATEAPI enum libdeflate_result
2614			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
2615			const void *in, size_t in_nbytes,
2616			void *out, size_t out_nbytes_avail,
2617			size_t *actual_in_nbytes_ret,
2618			size_t *actual_out_nbytes_ret);
2619
2620
2621			LIBDEFLATEAPI void
2622			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
2623
2624
2625
2626
2627
2628
2629			LIBDEFLATEAPI uint32_t
2630			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
2631
2632
2633
2634			LIBDEFLATEAPI uint32_t
2635			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
2636
2637
2638
2639
2640
2641
2642			LIBDEFLATEAPI void
2643			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
2644			void (free_func)(void ));
2645
2646
2647			struct libdeflate_options {
2648
2649
2650			size_t sizeof_options;
2651
2652
2653			void (malloc_func)(size_t);
2654			void (free_func)(void );
2655			};
2656
2657			#ifdef __cplusplus
2658			}
2659			#endif
2660
2661			#endif
2662
2663
2664			#include
2665			#include
2666			#include
2667			#ifdef _MSC_VER
2668			# include
2669			# include
2670
2671
2672			# pragma warning(disable : 4146)
2673
2674			# pragma warning(disable : 4018)
2675			# pragma warning(disable : 4244)
2676			# pragma warning(disable : 4267)
2677			# pragma warning(disable : 4310)
2678
2679			# pragma warning(disable : 4100)
2680			# pragma warning(disable : 4127)
2681			# pragma warning(disable : 4189)
2682			# pragma warning(disable : 4232)
2683			# pragma warning(disable : 4245)
2684			# pragma warning(disable : 4295)
2685			#endif
2686			#ifndef FREESTANDING
2687			# include
2688			#endif
2689
2690
2691
2692
2693
2694
2695			#undef ARCH_X86_64
2696			#undef ARCH_X86_32
2697			#undef ARCH_ARM64
2698			#undef ARCH_ARM32
2699			#undef ARCH_RISCV
2700			#ifdef _MSC_VER
2701
2702			# if defined(_M_X64) && !defined(_M_ARM64EC)
2703			# define ARCH_X86_64
2704			# elif defined(_M_IX86)
2705			# define ARCH_X86_32
2706			# elif defined(_M_ARM64)
2707			# define ARCH_ARM64
2708			# elif defined(_M_ARM)
2709			# define ARCH_ARM32
2710			# endif
2711			#else
2712			# if defined(__x86_64__)
2713			# define ARCH_X86_64
2714			# elif defined(__i386__)
2715			# define ARCH_X86_32
2716			# elif defined(__aarch64__)
2717			# define ARCH_ARM64
2718			# elif defined(__arm__)
2719			# define ARCH_ARM32
2720			# elif defined(__riscv)
2721			# define ARCH_RISCV
2722			# endif
2723			#endif
2724
2725
2726
2727
2728
2729
2730			typedef uint8_t u8;
2731			typedef uint16_t u16;
2732			typedef uint32_t u32;
2733			typedef uint64_t u64;
2734			typedef int8_t s8;
2735			typedef int16_t s16;
2736			typedef int32_t s32;
2737			typedef int64_t s64;
2738
2739
2740			#ifdef _MSC_VER
2741			# ifdef _WIN64
2742			typedef long long ssize_t;
2743			# else
2744			typedef long ssize_t;
2745			# endif
2746			#endif
2747
2748
2749			typedef size_t machine_word_t;
2750
2751
2752			#define WORDBYTES ((int)sizeof(machine_word_t))
2753
2754
2755			#define WORDBITS (8 * WORDBYTES)
2756
2757
2758
2759
2760
2761
2762			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
2763			# define GCC_PREREQ(major, minor) \
2764			(__GNUC__ > (major) \|\| \
2765			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
2766			# if !GCC_PREREQ(4, 9)
2767			# error "gcc versions older than 4.9 are no longer supported"
2768			# endif
2769			#else
2770			# define GCC_PREREQ(major, minor) 0
2771			#endif
2772			#ifdef __clang__
2773			# ifdef __apple_build_version__
2774			# define CLANG_PREREQ(major, minor, apple_version) \
2775			(__apple_build_version__ >= (apple_version))
2776			# else
2777			# define CLANG_PREREQ(major, minor, apple_version) \
2778			(__clang_major__ > (major) \|\| \
2779			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
2780			# endif
2781			# if !CLANG_PREREQ(3, 9, 8000000)
2782			# error "clang versions older than 3.9 are no longer supported"
2783			# endif
2784			#else
2785			# define CLANG_PREREQ(major, minor, apple_version) 0
2786			#endif
2787			#ifdef _MSC_VER
2788			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
2789			# if !MSVC_PREREQ(1900)
2790			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
2791			# endif
2792			#else
2793			# define MSVC_PREREQ(version) 0
2794			#endif
2795
2796
2797			#ifndef __has_attribute
2798			# define __has_attribute(attribute) 0
2799			#endif
2800
2801
2802			#ifndef __has_builtin
2803			# define __has_builtin(builtin) 0
2804			#endif
2805
2806
2807			#ifdef _MSC_VER
2808			# define inline __inline
2809			#endif
2810
2811
2812			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
2813			# define forceinline inline __attribute__((always_inline))
2814			#elif defined(_MSC_VER)
2815			# define forceinline __forceinline
2816			#else
2817			# define forceinline inline
2818			#endif
2819
2820
2821			#if defined(__GNUC__) \|\| __has_attribute(unused)
2822			# define MAYBE_UNUSED __attribute__((unused))
2823			#else
2824			# define MAYBE_UNUSED
2825			#endif
2826
2827
2828			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
2829			# define NORETURN __attribute__((noreturn))
2830			#else
2831			# define NORETURN
2832			#endif
2833
2834
2835			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
2836			# if defined(__GNUC__) \|\| defined(__clang__)
2837			# define restrict __restrict__
2838			# else
2839			# define restrict
2840			# endif
2841			#endif
2842
2843
2844			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
2845			# define likely(expr) __builtin_expect(!!(expr), 1)
2846			#else
2847			# define likely(expr) (expr)
2848			#endif
2849
2850
2851			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
2852			# define unlikely(expr) __builtin_expect(!!(expr), 0)
2853			#else
2854			# define unlikely(expr) (expr)
2855			#endif
2856
2857
2858			#undef prefetchr
2859			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
2860			# define prefetchr(addr) __builtin_prefetch((addr), 0)
2861			#elif defined(_MSC_VER)
2862			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
2863			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
2864			# elif defined(ARCH_ARM64)
2865			# define prefetchr(addr) __prefetch2((addr), 0x00 )
2866			# elif defined(ARCH_ARM32)
2867			# define prefetchr(addr) __prefetch(addr)
2868			# endif
2869			#endif
2870			#ifndef prefetchr
2871			# define prefetchr(addr)
2872			#endif
2873
2874
2875			#undef prefetchw
2876			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
2877			# define prefetchw(addr) __builtin_prefetch((addr), 1)
2878			#elif defined(_MSC_VER)
2879			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
2880			# define prefetchw(addr) _m_prefetchw(addr)
2881			# elif defined(ARCH_ARM64)
2882			# define prefetchw(addr) __prefetch2((addr), 0x10 )
2883			# elif defined(ARCH_ARM32)
2884			# define prefetchw(addr) __prefetchw(addr)
2885			# endif
2886			#endif
2887			#ifndef prefetchw
2888			# define prefetchw(addr)
2889			#endif
2890
2891
2892			#undef _aligned_attribute
2893			#if defined(__GNUC__) \|\| __has_attribute(aligned)
2894			# define _aligned_attribute(n) __attribute__((aligned(n)))
2895			#elif defined(_MSC_VER)
2896			# define _aligned_attribute(n) __declspec(align(n))
2897			#endif
2898
2899
2900			#if defined(__GNUC__) \|\| __has_attribute(target)
2901			# define _target_attribute(attrs) __attribute__((target(attrs)))
2902			#else
2903			# define _target_attribute(attrs)
2904			#endif
2905
2906
2907
2908
2909
2910			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
2911			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
2912			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
2913			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
2914			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
2915			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
2916			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
2917
2918
2919
2920
2921
2922
2923			#if defined(__BYTE_ORDER__)
2924			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
2925			#elif defined(_MSC_VER)
2926			# define CPU_IS_LITTLE_ENDIAN() true
2927			#else
2928			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
2929			{
2930			union {
2931			u32 w;
2932			u8 b;
2933			} u;
2934
2935			u.w = 1;
2936			return u.b;
2937			}
2938			#endif
2939
2940
2941			static forceinline u16 bswap16(u16 v)
2942			{
2943			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
2944			return __builtin_bswap16(v);
2945			#elif defined(_MSC_VER)
2946			return _byteswap_ushort(v);
2947			#else
2948			return (v << 8) \| (v >> 8);
2949			#endif
2950			}
2951
2952
2953			static forceinline u32 bswap32(u32 v)
2954			{
2955			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
2956			return __builtin_bswap32(v);
2957			#elif defined(_MSC_VER)
2958			return _byteswap_ulong(v);
2959			#else
2960			return ((v & 0x000000FF) << 24) \|
2961			((v & 0x0000FF00) << 8) \|
2962			((v & 0x00FF0000) >> 8) \|
2963			((v & 0xFF000000) >> 24);
2964			#endif
2965			}
2966
2967
2968			static forceinline u64 bswap64(u64 v)
2969			{
2970			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
2971			return __builtin_bswap64(v);
2972			#elif defined(_MSC_VER)
2973			return _byteswap_uint64(v);
2974			#else
2975			return ((v & 0x00000000000000FF) << 56) \|
2976			((v & 0x000000000000FF00) << 40) \|
2977			((v & 0x0000000000FF0000) << 24) \|
2978			((v & 0x00000000FF000000) << 8) \|
2979			((v & 0x000000FF00000000) >> 8) \|
2980			((v & 0x0000FF0000000000) >> 24) \|
2981			((v & 0x00FF000000000000) >> 40) \|
2982			((v & 0xFF00000000000000) >> 56);
2983			#endif
2984			}
2985
2986			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
2987			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
2988			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
2989			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
2990			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
2991			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
2992
2993
2994
2995
2996
2997
2998			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
2999			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
3000			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
3001			defined(__riscv_misaligned_fast) \|\| \
3002			defined(__wasm__))
3003			# define UNALIGNED_ACCESS_IS_FAST 1
3004			#elif defined(_MSC_VER)
3005			# define UNALIGNED_ACCESS_IS_FAST 1
3006			#else
3007			# define UNALIGNED_ACCESS_IS_FAST 0
3008			#endif
3009
3010
3011
3012			#ifdef FREESTANDING
3013			# define MEMCOPY __builtin_memcpy
3014			#else
3015			# define MEMCOPY memcpy
3016			#endif
3017
3018
3019
3020			#define DEFINE_UNALIGNED_TYPE(type) \
3021			static forceinline type \
3022			load_##type##_unaligned(const void *p) \
3023			{ \
3024			type v; \
3025			\
3026			MEMCOPY(&v, p, sizeof(v)); \
3027			return v; \
3028			} \
3029			\
3030			static forceinline void \
3031			store_##type##_unaligned(type v, void *p) \
3032			{ \
3033			MEMCOPY(p, &v, sizeof(v)); \
3034			}
3035
3036			DEFINE_UNALIGNED_TYPE(u16)
3037			DEFINE_UNALIGNED_TYPE(u32)
3038			DEFINE_UNALIGNED_TYPE(u64)
3039			DEFINE_UNALIGNED_TYPE(machine_word_t)
3040
3041			#undef MEMCOPY
3042
3043			#define load_word_unaligned load_machine_word_t_unaligned
3044			#define store_word_unaligned store_machine_word_t_unaligned
3045
3046
3047
3048			static forceinline u16
3049			get_unaligned_le16(const u8 *p)
3050			{
3051			if (UNALIGNED_ACCESS_IS_FAST)
3052			return le16_bswap(load_u16_unaligned(p));
3053			else
3054			return ((u16)p[1] << 8) \| p[0];
3055			}
3056
3057			static forceinline u16
3058			get_unaligned_be16(const u8 *p)
3059			{
3060			if (UNALIGNED_ACCESS_IS_FAST)
3061			return be16_bswap(load_u16_unaligned(p));
3062			else
3063			return ((u16)p[0] << 8) \| p[1];
3064			}
3065
3066			static forceinline u32
3067			get_unaligned_le32(const u8 *p)
3068			{
3069			if (UNALIGNED_ACCESS_IS_FAST)
3070			return le32_bswap(load_u32_unaligned(p));
3071			else
3072			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
3073			((u32)p[1] << 8) \| p[0];
3074			}
3075
3076			static forceinline u32
3077			get_unaligned_be32(const u8 *p)
3078			{
3079			if (UNALIGNED_ACCESS_IS_FAST)
3080			return be32_bswap(load_u32_unaligned(p));
3081			else
3082			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
3083			((u32)p[2] << 8) \| p[3];
3084			}
3085
3086			static forceinline u64
3087			get_unaligned_le64(const u8 *p)
3088			{
3089			if (UNALIGNED_ACCESS_IS_FAST)
3090			return le64_bswap(load_u64_unaligned(p));
3091			else
3092			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
3093			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
3094			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
3095			((u64)p[1] << 8) \| p[0];
3096			}
3097
3098			static forceinline machine_word_t
3099			get_unaligned_leword(const u8 *p)
3100			{
3101			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
3102			if (WORDBITS == 32)
3103			return get_unaligned_le32(p);
3104			else
3105			return get_unaligned_le64(p);
3106			}
3107
3108
3109
3110			static forceinline void
3111			put_unaligned_le16(u16 v, u8 *p)
3112			{
3113			if (UNALIGNED_ACCESS_IS_FAST) {
3114			store_u16_unaligned(le16_bswap(v), p);
3115			} else {
3116			p[0] = (u8)(v >> 0);
3117			p[1] = (u8)(v >> 8);
3118			}
3119			}
3120
3121			static forceinline void
3122			put_unaligned_be16(u16 v, u8 *p)
3123			{
3124			if (UNALIGNED_ACCESS_IS_FAST) {
3125			store_u16_unaligned(be16_bswap(v), p);
3126			} else {
3127			p[0] = (u8)(v >> 8);
3128			p[1] = (u8)(v >> 0);
3129			}
3130			}
3131
3132			static forceinline void
3133			put_unaligned_le32(u32 v, u8 *p)
3134			{
3135			if (UNALIGNED_ACCESS_IS_FAST) {
3136			store_u32_unaligned(le32_bswap(v), p);
3137			} else {
3138			p[0] = (u8)(v >> 0);
3139			p[1] = (u8)(v >> 8);
3140			p[2] = (u8)(v >> 16);
3141			p[3] = (u8)(v >> 24);
3142			}
3143			}
3144
3145			static forceinline void
3146			put_unaligned_be32(u32 v, u8 *p)
3147			{
3148			if (UNALIGNED_ACCESS_IS_FAST) {
3149			store_u32_unaligned(be32_bswap(v), p);
3150			} else {
3151			p[0] = (u8)(v >> 24);
3152			p[1] = (u8)(v >> 16);
3153			p[2] = (u8)(v >> 8);
3154			p[3] = (u8)(v >> 0);
3155			}
3156			}
3157
3158			static forceinline void
3159			put_unaligned_le64(u64 v, u8 *p)
3160			{
3161			if (UNALIGNED_ACCESS_IS_FAST) {
3162			store_u64_unaligned(le64_bswap(v), p);
3163			} else {
3164			p[0] = (u8)(v >> 0);
3165			p[1] = (u8)(v >> 8);
3166			p[2] = (u8)(v >> 16);
3167			p[3] = (u8)(v >> 24);
3168			p[4] = (u8)(v >> 32);
3169			p[5] = (u8)(v >> 40);
3170			p[6] = (u8)(v >> 48);
3171			p[7] = (u8)(v >> 56);
3172			}
3173			}
3174
3175			static forceinline void
3176			put_unaligned_leword(machine_word_t v, u8 *p)
3177			{
3178			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
3179			if (WORDBITS == 32)
3180			put_unaligned_le32(v, p);
3181			else
3182			put_unaligned_le64(v, p);
3183			}
3184
3185
3186
3187
3188
3189
3190
3191			static forceinline unsigned
3192			bsr32(u32 v)
3193			{
3194			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
3195			return 31 - __builtin_clz(v);
3196			#elif defined(_MSC_VER)
3197			unsigned long i;
3198
3199			_BitScanReverse(&i, v);
3200			return i;
3201			#else
3202			unsigned i = 0;
3203
3204			while ((v >>= 1) != 0)
3205			i++;
3206			return i;
3207			#endif
3208			}
3209
3210			static forceinline unsigned
3211			bsr64(u64 v)
3212			{
3213			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
3214			return 63 - __builtin_clzll(v);
3215			#elif defined(_MSC_VER) && defined(_WIN64)
3216			unsigned long i;
3217
3218			_BitScanReverse64(&i, v);
3219			return i;
3220			#else
3221			unsigned i = 0;
3222
3223			while ((v >>= 1) != 0)
3224			i++;
3225			return i;
3226			#endif
3227			}
3228
3229			static forceinline unsigned
3230			bsrw(machine_word_t v)
3231			{
3232			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
3233			if (WORDBITS == 32)
3234			return bsr32(v);
3235			else
3236			return bsr64(v);
3237			}
3238
3239
3240
3241			static forceinline unsigned
3242			bsf32(u32 v)
3243			{
3244			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
3245			return __builtin_ctz(v);
3246			#elif defined(_MSC_VER)
3247			unsigned long i;
3248
3249			_BitScanForward(&i, v);
3250			return i;
3251			#else
3252			unsigned i = 0;
3253
3254			for (; (v & 1) == 0; v >>= 1)
3255			i++;
3256			return i;
3257			#endif
3258			}
3259
3260			static forceinline unsigned
3261			bsf64(u64 v)
3262			{
3263			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
3264			return __builtin_ctzll(v);
3265			#elif defined(_MSC_VER) && defined(_WIN64)
3266			unsigned long i;
3267
3268			_BitScanForward64(&i, v);
3269			return i;
3270			#else
3271			unsigned i = 0;
3272
3273			for (; (v & 1) == 0; v >>= 1)
3274			i++;
3275			return i;
3276			#endif
3277			}
3278
3279			static forceinline unsigned
3280			bsfw(machine_word_t v)
3281			{
3282			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
3283			if (WORDBITS == 32)
3284			return bsf32(v);
3285			else
3286			return bsf64(v);
3287			}
3288
3289
3290			#undef rbit32
3291			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
3292			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
3293			static forceinline u32
3294			rbit32(u32 v)
3295			{
3296			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
3297			return v;
3298			}
3299			#define rbit32 rbit32
3300			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
3301			static forceinline u32
3302			rbit32(u32 v)
3303			{
3304			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
3305			return v;
3306			}
3307			#define rbit32 rbit32
3308			#endif
3309
3310			#endif
3311
3312
3313			typedef void (malloc_func_t)(size_t);
3314			typedef void (free_func_t)(void );
3315
3316			extern malloc_func_t libdeflate_default_malloc_func;
3317			extern free_func_t libdeflate_default_free_func;
3318
3319			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
3320			size_t alignment, size_t size);
3321			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
3322
3323			#ifdef FREESTANDING
3324
3325			void memset(void s, int c, size_t n);
3326			#define memset(s, c, n) __builtin_memset((s), (c), (n))
3327
3328			void memcpy(void dest, const void *src, size_t n);
3329			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
3330
3331			void memmove(void dest, const void *src, size_t n);
3332			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
3333
3334			int memcmp(const void s1, const void s2, size_t n);
3335			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
3336
3337			#undef LIBDEFLATE_ENABLE_ASSERTIONS
3338			#else
3339			# include
3340
3341			# ifdef __clang_analyzer__
3342			# define LIBDEFLATE_ENABLE_ASSERTIONS
3343			# endif
3344			#endif
3345
3346
3347			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
3348			NORETURN void
3349			libdeflate_assertion_failed(const char expr, const char file, int line);
3350			#define ASSERT(expr) { if (unlikely(!(expr))) \
3351			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
3352			#else
3353			#define ASSERT(expr) (void)(expr)
3354			#endif
3355
3356			#define CONCAT_IMPL(a, b) a##b
3357			#define CONCAT(a, b) CONCAT_IMPL(a, b)
3358			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
3359
3360			#endif
3361
3362
3363			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
3364
3365			#define X86_CPU_FEATURE_SSE2 (1 << 0)
3366			#define X86_CPU_FEATURE_PCLMULQDQ (1 << 1)
3367			#define X86_CPU_FEATURE_AVX (1 << 2)
3368			#define X86_CPU_FEATURE_AVX2 (1 << 3)
3369			#define X86_CPU_FEATURE_BMI2 (1 << 4)
3370
3371			#define X86_CPU_FEATURE_ZMM (1 << 5)
3372			#define X86_CPU_FEATURE_AVX512BW (1 << 6)
3373			#define X86_CPU_FEATURE_AVX512VL (1 << 7)
3374			#define X86_CPU_FEATURE_VPCLMULQDQ (1 << 8)
3375			#define X86_CPU_FEATURE_AVX512VNNI (1 << 9)
3376			#define X86_CPU_FEATURE_AVXVNNI (1 << 10)
3377
3378			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
3379
3380			# define X86_CPU_FEATURES_KNOWN (1U << 31)
3381			extern volatile u32 libdeflate_x86_cpu_features;
3382
3383			void libdeflate_init_x86_cpu_features(void);
3384
3385	5		static inline u32 get_x86_cpu_features(void)
3386			{
3387	5	100	if (libdeflate_x86_cpu_features == 0)
3388	2		libdeflate_init_x86_cpu_features();
3389	5		return libdeflate_x86_cpu_features;
3390			}
3391
3392			# include
3393			# if defined(_MSC_VER) && defined(__clang__)
3394			# include
3395			# include
3396			# include
3397			# include
3398			# include
3399			# include
3400			# include
3401			# include
3402			# if __has_include()
3403			# include
3404			# endif
3405			# if __has_include()
3406			# include
3407			# endif
3408			# if __has_include()
3409			# include
3410			# endif
3411			# if __has_include()
3412			# include
3413			# endif
3414			# if __has_include()
3415			# include
3416			# endif
3417			# endif
3418			#else
3419			static inline u32 get_x86_cpu_features(void) { return 0; }
3420			#endif
3421
3422			#if defined(__SSE2__) \|\| \
3423			(defined(_MSC_VER) && \
3424			(defined(ARCH_X86_64) \|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
3425			# define HAVE_SSE2(features) 1
3426			# define HAVE_SSE2_NATIVE 1
3427			#else
3428			# define HAVE_SSE2(features) ((features) & X86_CPU_FEATURE_SSE2)
3429			# define HAVE_SSE2_NATIVE 0
3430			#endif
3431
3432			#if (defined(__PCLMUL__) && defined(__SSE4_1__)) \|\| \
3433			(defined(_MSC_VER) && defined(__AVX2__))
3434			# define HAVE_PCLMULQDQ(features) 1
3435			#else
3436			# define HAVE_PCLMULQDQ(features) ((features) & X86_CPU_FEATURE_PCLMULQDQ)
3437			#endif
3438
3439			#ifdef __AVX__
3440			# define HAVE_AVX(features) 1
3441			#else
3442			# define HAVE_AVX(features) ((features) & X86_CPU_FEATURE_AVX)
3443			#endif
3444
3445			#ifdef __AVX2__
3446			# define HAVE_AVX2(features) 1
3447			#else
3448			# define HAVE_AVX2(features) ((features) & X86_CPU_FEATURE_AVX2)
3449			#endif
3450
3451			#if defined(__BMI2__) \|\| (defined(_MSC_VER) && defined(__AVX2__))
3452			# define HAVE_BMI2(features) 1
3453			# define HAVE_BMI2_NATIVE 1
3454			#else
3455			# define HAVE_BMI2(features) ((features) & X86_CPU_FEATURE_BMI2)
3456			# define HAVE_BMI2_NATIVE 0
3457			#endif
3458
3459			#ifdef __AVX512BW__
3460			# define HAVE_AVX512BW(features) 1
3461			#else
3462			# define HAVE_AVX512BW(features) ((features) & X86_CPU_FEATURE_AVX512BW)
3463			#endif
3464
3465			#ifdef __AVX512VL__
3466			# define HAVE_AVX512VL(features) 1
3467			#else
3468			# define HAVE_AVX512VL(features) ((features) & X86_CPU_FEATURE_AVX512VL)
3469			#endif
3470
3471			#ifdef __VPCLMULQDQ__
3472			# define HAVE_VPCLMULQDQ(features) 1
3473			#else
3474			# define HAVE_VPCLMULQDQ(features) ((features) & X86_CPU_FEATURE_VPCLMULQDQ)
3475			#endif
3476
3477			#ifdef __AVX512VNNI__
3478			# define HAVE_AVX512VNNI(features) 1
3479			#else
3480			# define HAVE_AVX512VNNI(features) ((features) & X86_CPU_FEATURE_AVX512VNNI)
3481			#endif
3482
3483			#ifdef __AVXVNNI__
3484			# define HAVE_AVXVNNI(features) 1
3485			#else
3486			# define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI)
3487			#endif
3488
3489			#endif
3490
3491			#endif
3492
3493
3494
3495			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
3496			# define adler32_x86_sse2 adler32_x86_sse2
3497			# define SUFFIX _sse2
3498			# define ATTRIBUTES _target_attribute("sse2")
3499			# define VL 16
3500			# define USE_VNNI 0
3501			# define USE_AVX512 0
3502			/* #include "x86-adler32_template.h" */
3503
3504
3505
3506
3507			#if VL == 16
3508			# define vec_t __m128i
3509			# define mask_t u16
3510			# define LOG2_VL 4
3511			# define VADD8(a, b) _mm_add_epi8((a), (b))
3512			# define VADD16(a, b) _mm_add_epi16((a), (b))
3513			# define VADD32(a, b) _mm_add_epi32((a), (b))
3514			# if USE_AVX512
3515			# define VDPBUSD(a, b, c) _mm_dpbusd_epi32((a), (b), (c))
3516			# else
3517			# define VDPBUSD(a, b, c) _mm_dpbusd_avx_epi32((a), (b), (c))
3518			# endif
3519			# define VLOAD(p) _mm_load_si128((const void *)(p))
3520			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
3521			# define VMADD16(a, b) _mm_madd_epi16((a), (b))
3522			# define VMASKZ_LOADU(mask, p) _mm_maskz_loadu_epi8((mask), (p))
3523			# define VMULLO32(a, b) _mm_mullo_epi32((a), (b))
3524			# define VSAD8(a, b) _mm_sad_epu8((a), (b))
3525			# define VSET1_8(a) _mm_set1_epi8(a)
3526			# define VSET1_32(a) _mm_set1_epi32(a)
3527			# define VSETZERO() _mm_setzero_si128()
3528			# define VSLL32(a, b) _mm_slli_epi32((a), (b))
3529			# define VUNPACKLO8(a, b) _mm_unpacklo_epi8((a), (b))
3530			# define VUNPACKHI8(a, b) _mm_unpackhi_epi8((a), (b))
3531			#elif VL == 32
3532			# define vec_t __m256i
3533			# define mask_t u32
3534			# define LOG2_VL 5
3535			# define VADD8(a, b) _mm256_add_epi8((a), (b))
3536			# define VADD16(a, b) _mm256_add_epi16((a), (b))
3537			# define VADD32(a, b) _mm256_add_epi32((a), (b))
3538			# if USE_AVX512
3539			# define VDPBUSD(a, b, c) _mm256_dpbusd_epi32((a), (b), (c))
3540			# else
3541			# define VDPBUSD(a, b, c) _mm256_dpbusd_avx_epi32((a), (b), (c))
3542			# endif
3543			# define VLOAD(p) _mm256_load_si256((const void *)(p))
3544			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
3545			# define VMADD16(a, b) _mm256_madd_epi16((a), (b))
3546			# define VMASKZ_LOADU(mask, p) _mm256_maskz_loadu_epi8((mask), (p))
3547			# define VMULLO32(a, b) _mm256_mullo_epi32((a), (b))
3548			# define VSAD8(a, b) _mm256_sad_epu8((a), (b))
3549			# define VSET1_8(a) _mm256_set1_epi8(a)
3550			# define VSET1_32(a) _mm256_set1_epi32(a)
3551			# define VSETZERO() _mm256_setzero_si256()
3552			# define VSLL32(a, b) _mm256_slli_epi32((a), (b))
3553			# define VUNPACKLO8(a, b) _mm256_unpacklo_epi8((a), (b))
3554			# define VUNPACKHI8(a, b) _mm256_unpackhi_epi8((a), (b))
3555			#elif VL == 64
3556			# define vec_t __m512i
3557			# define mask_t u64
3558			# define LOG2_VL 6
3559			# define VADD8(a, b) _mm512_add_epi8((a), (b))
3560			# define VADD16(a, b) _mm512_add_epi16((a), (b))
3561			# define VADD32(a, b) _mm512_add_epi32((a), (b))
3562			# define VDPBUSD(a, b, c) _mm512_dpbusd_epi32((a), (b), (c))
3563			# define VLOAD(p) _mm512_load_si512((const void *)(p))
3564			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
3565			# define VMADD16(a, b) _mm512_madd_epi16((a), (b))
3566			# define VMASKZ_LOADU(mask, p) _mm512_maskz_loadu_epi8((mask), (p))
3567			# define VMULLO32(a, b) _mm512_mullo_epi32((a), (b))
3568			# define VSAD8(a, b) _mm512_sad_epu8((a), (b))
3569			# define VSET1_8(a) _mm512_set1_epi8(a)
3570			# define VSET1_32(a) _mm512_set1_epi32(a)
3571			# define VSETZERO() _mm512_setzero_si512()
3572			# define VSLL32(a, b) _mm512_slli_epi32((a), (b))
3573			# define VUNPACKLO8(a, b) _mm512_unpacklo_epi8((a), (b))
3574			# define VUNPACKHI8(a, b) _mm512_unpackhi_epi8((a), (b))
3575			#else
3576			# error "unsupported vector length"
3577			#endif
3578
3579			#define VADD32_3X(a, b, c) VADD32(VADD32((a), (b)), (c))
3580			#define VADD32_4X(a, b, c, d) VADD32(VADD32((a), (b)), VADD32((c), (d)))
3581			#define VADD32_5X(a, b, c, d, e) VADD32((a), VADD32_4X((b), (c), (d), (e)))
3582			#define VADD32_7X(a, b, c, d, e, f, g) \
3583			VADD32(VADD32_3X((a), (b), (c)), VADD32_4X((d), (e), (f), (g)))
3584
3585
3586			#undef reduce_to_32bits
3587			static forceinline ATTRIBUTES void
3588			ADD_SUFFIX(reduce_to_32bits)(vec_t v_s1, vec_t v_s2, u32 s1_p, u32 s2_p)
3589			{
3590			__m128i v_s1_128, v_s2_128;
3591			#if VL == 16
3592			{
3593	0		v_s1_128 = v_s1;
3594	0		v_s2_128 = v_s2;
3595			}
3596			#else
3597			{
3598			__m256i v_s1_256, v_s2_256;
3599			#if VL == 32
3600			v_s1_256 = v_s1;
3601			v_s2_256 = v_s2;
3602			#else
3603
3604			v_s1_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s1, 0),
3605			_mm512_extracti64x4_epi64(v_s1, 1));
3606			v_s2_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s2, 0),
3607			_mm512_extracti64x4_epi64(v_s2, 1));
3608			#endif
3609
3610			v_s1_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s1_256, 0),
3611			_mm256_extracti128_si256(v_s1_256, 1));
3612			v_s2_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s2_256, 0),
3613			_mm256_extracti128_si256(v_s2_256, 1));
3614			}
3615			#endif
3616
3617
3618			#if USE_VNNI
3619			v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x31));
3620			#endif
3621	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x31));
3622	0		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x02));
3623	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x02));
3624
3625	0		*s1_p += (u32)_mm_cvtsi128_si32(v_s1_128);
3626	0		*s2_p += (u32)_mm_cvtsi128_si32(v_s2_128);
3627	0		}
3628			#define reduce_to_32bits ADD_SUFFIX(reduce_to_32bits)
3629
3630			static ATTRIBUTES u32
3631	0		ADD_SUFFIX(adler32_x86)(u32 adler, const u8 *p, size_t len)
3632			{
3633			#if USE_VNNI
3634
3635			static const u8 _aligned_attribute(VL) raw_mults[VL] = {
3636			#if VL == 64
3637			64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
3638			48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
3639			#endif
3640			#if VL >= 32
3641			32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
3642			#endif
3643			16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3644			};
3645			const vec_t ones = VSET1_8(1);
3646			#else
3647
3648			static const u16 _aligned_attribute(VL) raw_mults[4][VL / 2] = {
3649			#if VL == 16
3650			{ 32, 31, 30, 29, 28, 27, 26, 25 },
3651			{ 24, 23, 22, 21, 20, 19, 18, 17 },
3652			{ 16, 15, 14, 13, 12, 11, 10, 9 },
3653			{ 8, 7, 6, 5, 4, 3, 2, 1 },
3654			#elif VL == 32
3655			{ 64, 63, 62, 61, 60, 59, 58, 57, 48, 47, 46, 45, 44, 43, 42, 41 },
3656			{ 56, 55, 54, 53, 52, 51, 50, 49, 40, 39, 38, 37, 36, 35, 34, 33 },
3657			{ 32, 31, 30, 29, 28, 27, 26, 25, 16, 15, 14, 13, 12, 11, 10, 9 },
3658			{ 24, 23, 22, 21, 20, 19, 18, 17, 8, 7, 6, 5, 4, 3, 2, 1 },
3659			#else
3660			# error "unsupported parameters"
3661			#endif
3662			};
3663	0		const vec_t mults_a = VLOAD(raw_mults[0]);
3664	0		const vec_t mults_b = VLOAD(raw_mults[1]);
3665	0		const vec_t mults_c = VLOAD(raw_mults[2]);
3666	0		const vec_t mults_d = VLOAD(raw_mults[3]);
3667			#endif
3668	0		const vec_t zeroes = VSETZERO();
3669	0		u32 s1 = adler & 0xFFFF;
3670	0		u32 s2 = adler >> 16;
3671
3672
3673	0	0	if (unlikely(len > 65536 && ((uintptr_t)p & (VL-1)))) {
		0
3674			do {
3675	0		s1 += *p++;
3676	0		s2 += s1;
3677	0		len--;
3678	0	0	} while ((uintptr_t)p & (VL-1));
3679	0		s1 %= DIVISOR;
3680	0		s2 %= DIVISOR;
3681			}
3682
3683			#if USE_VNNI
3684
3685			while (len) {
3686
3687			size_t n = MIN(len, MAX_CHUNK_LEN & ~(4*VL - 1));
3688			vec_t mults = VLOAD(raw_mults);
3689			vec_t v_s1 = zeroes;
3690			vec_t v_s2 = zeroes;
3691
3692			s2 += s1 * n;
3693			len -= n;
3694
3695			if (n >= 4*VL) {
3696			vec_t v_s1_b = zeroes;
3697			vec_t v_s1_c = zeroes;
3698			vec_t v_s1_d = zeroes;
3699			vec_t v_s2_b = zeroes;
3700			vec_t v_s2_c = zeroes;
3701			vec_t v_s2_d = zeroes;
3702			vec_t v_s1_sums = zeroes;
3703			vec_t v_s1_sums_b = zeroes;
3704			vec_t v_s1_sums_c = zeroes;
3705			vec_t v_s1_sums_d = zeroes;
3706			vec_t tmp0, tmp1;
3707
3708			do {
3709			vec_t data_a = VLOADU(p + 0*VL);
3710			vec_t data_b = VLOADU(p + 1*VL);
3711			vec_t data_c = VLOADU(p + 2*VL);
3712			vec_t data_d = VLOADU(p + 3*VL);
3713
3714
3715			#if GCC_PREREQ(1, 0)
3716			__asm__("" : "+v" (data_a), "+v" (data_b),
3717			"+v" (data_c), "+v" (data_d));
3718			#endif
3719
3720			v_s2 = VDPBUSD(v_s2, data_a, mults);
3721			v_s2_b = VDPBUSD(v_s2_b, data_b, mults);
3722			v_s2_c = VDPBUSD(v_s2_c, data_c, mults);
3723			v_s2_d = VDPBUSD(v_s2_d, data_d, mults);
3724
3725			v_s1_sums = VADD32(v_s1_sums, v_s1);
3726			v_s1_sums_b = VADD32(v_s1_sums_b, v_s1_b);
3727			v_s1_sums_c = VADD32(v_s1_sums_c, v_s1_c);
3728			v_s1_sums_d = VADD32(v_s1_sums_d, v_s1_d);
3729
3730			v_s1 = VDPBUSD(v_s1, data_a, ones);
3731			v_s1_b = VDPBUSD(v_s1_b, data_b, ones);
3732			v_s1_c = VDPBUSD(v_s1_c, data_c, ones);
3733			v_s1_d = VDPBUSD(v_s1_d, data_d, ones);
3734
3735
3736			#if GCC_PREREQ(1, 0) && !defined(ARCH_X86_32)
3737			__asm__("" : "+v" (v_s2), "+v" (v_s2_b),
3738			"+v" (v_s2_c), "+v" (v_s2_d),
3739			"+v" (v_s1_sums),
3740			"+v" (v_s1_sums_b),
3741			"+v" (v_s1_sums_c),
3742			"+v" (v_s1_sums_d),
3743			"+v" (v_s1), "+v" (v_s1_b),
3744			"+v" (v_s1_c), "+v" (v_s1_d));
3745			#endif
3746			p += 4*VL;
3747			n -= 4*VL;
3748			} while (n >= 4*VL);
3749
3750
3751			tmp0 = VADD32(v_s1, v_s1_b);
3752			tmp1 = VADD32(v_s1, v_s1_c);
3753			v_s1_sums = VADD32_4X(v_s1_sums, v_s1_sums_b,
3754			v_s1_sums_c, v_s1_sums_d);
3755			v_s1 = VADD32_3X(tmp0, v_s1_c, v_s1_d);
3756			v_s2 = VADD32_7X(VSLL32(v_s1_sums, LOG2_VL + 2),
3757			VSLL32(tmp0, LOG2_VL + 1),
3758			VSLL32(tmp1, LOG2_VL),
3759			v_s2, v_s2_b, v_s2_c, v_s2_d);
3760			}
3761
3762
3763			if (n >= 2*VL) {
3764			const vec_t data_a = VLOADU(p + 0*VL);
3765			const vec_t data_b = VLOADU(p + 1*VL);
3766
3767			v_s2 = VADD32(v_s2, VSLL32(v_s1, LOG2_VL + 1));
3768			v_s1 = VDPBUSD(v_s1, data_a, ones);
3769			v_s1 = VDPBUSD(v_s1, data_b, ones);
3770			v_s2 = VDPBUSD(v_s2, data_a, VSET1_8(VL));
3771			v_s2 = VDPBUSD(v_s2, data_a, mults);
3772			v_s2 = VDPBUSD(v_s2, data_b, mults);
3773			p += 2*VL;
3774			n -= 2*VL;
3775			}
3776			if (n) {
3777
3778			vec_t data;
3779
3780			v_s2 = VADD32(v_s2, VMULLO32(v_s1, VSET1_32(n)));
3781
3782			mults = VADD8(mults, VSET1_8((int)n - VL));
3783			if (n > VL) {
3784			data = VLOADU(p);
3785			v_s1 = VDPBUSD(v_s1, data, ones);
3786			v_s2 = VDPBUSD(v_s2, data, mults);
3787			p += VL;
3788			n -= VL;
3789			mults = VADD8(mults, VSET1_8(-VL));
3790			}
3791
3792			#if USE_AVX512
3793			data = VMASKZ_LOADU((mask_t)-1 >> (VL - n), p);
3794			#else
3795			data = zeroes;
3796			memcpy(&data, p, n);
3797			#endif
3798			v_s1 = VDPBUSD(v_s1, data, ones);
3799			v_s2 = VDPBUSD(v_s2, data, mults);
3800			p += n;
3801			}
3802
3803			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
3804			s1 %= DIVISOR;
3805			s2 %= DIVISOR;
3806			}
3807			#else
3808
3809	0	0	while (len) {
3810
3811	0		size_t n = MIN(len, MIN(2 * VL * (INT16_MAX / UINT8_MAX),
3812			MAX_CHUNK_LEN) & ~(2*VL - 1));
3813	0		len -= n;
3814
3815	0	0	if (n >= 2*VL) {
3816	0		vec_t v_s1 = zeroes;
3817	0		vec_t v_s1_sums = zeroes;
3818	0		vec_t v_byte_sums_a = zeroes;
3819	0		vec_t v_byte_sums_b = zeroes;
3820	0		vec_t v_byte_sums_c = zeroes;
3821	0		vec_t v_byte_sums_d = zeroes;
3822			vec_t v_s2;
3823
3824	0		s2 += s1 * (n & ~(2*VL - 1));
3825
3826			do {
3827	0		vec_t data_a = VLOADU(p + 0*VL);
3828	0		vec_t data_b = VLOADU(p + 1*VL);
3829
3830	0		v_s1_sums = VADD32(v_s1_sums, v_s1);
3831	0		v_byte_sums_a = VADD16(v_byte_sums_a,
3832			VUNPACKLO8(data_a, zeroes));
3833	0		v_byte_sums_b = VADD16(v_byte_sums_b,
3834			VUNPACKHI8(data_a, zeroes));
3835	0		v_byte_sums_c = VADD16(v_byte_sums_c,
3836			VUNPACKLO8(data_b, zeroes));
3837	0		v_byte_sums_d = VADD16(v_byte_sums_d,
3838			VUNPACKHI8(data_b, zeroes));
3839	0		v_s1 = VADD32(v_s1,
3840			VADD32(VSAD8(data_a, zeroes),
3841			VSAD8(data_b, zeroes)));
3842
3843			#if GCC_PREREQ(1, 0)
3844	0		__asm__("" : "+x" (v_s1), "+x" (v_s1_sums),
3845			"+x" (v_byte_sums_a),
3846			"+x" (v_byte_sums_b),
3847			"+x" (v_byte_sums_c),
3848			"+x" (v_byte_sums_d));
3849			#endif
3850	0		p += 2*VL;
3851	0		n -= 2*VL;
3852	0	0	} while (n >= 2*VL);
3853
3854
3855	0		v_s2 = VADD32_5X(VSLL32(v_s1_sums, LOG2_VL + 1),
3856			VMADD16(v_byte_sums_a, mults_a),
3857			VMADD16(v_byte_sums_b, mults_b),
3858			VMADD16(v_byte_sums_c, mults_c),
3859			VMADD16(v_byte_sums_d, mults_d));
3860			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
3861			}
3862
3863	0	0	ADLER32_CHUNK(s1, s2, p, n);
		0
		0
3864			}
3865			#endif
3866	0		return (s2 << 16) \| s1;
3867			}
3868
3869			#undef vec_t
3870			#undef mask_t
3871			#undef LOG2_VL
3872			#undef VADD8
3873			#undef VADD16
3874			#undef VADD32
3875			#undef VDPBUSD
3876			#undef VLOAD
3877			#undef VLOADU
3878			#undef VMADD16
3879			#undef VMASKZ_LOADU
3880			#undef VMULLO32
3881			#undef VSAD8
3882			#undef VSET1_8
3883			#undef VSET1_32
3884			#undef VSETZERO
3885			#undef VSLL32
3886			#undef VUNPACKLO8
3887			#undef VUNPACKHI8
3888
3889			#undef SUFFIX
3890			#undef ATTRIBUTES
3891			#undef VL
3892			#undef USE_VNNI
3893			#undef USE_AVX512
3894
3895
3896			# define adler32_x86_avx2 adler32_x86_avx2
3897			# define SUFFIX _avx2
3898			# define ATTRIBUTES _target_attribute("avx2")
3899			# define VL 32
3900			# define USE_VNNI 0
3901			# define USE_AVX512 0
3902			/* #include "x86-adler32_template.h" */
3903
3904
3905
3906
3907			#if VL == 16
3908			# define vec_t __m128i
3909			# define mask_t u16
3910			# define LOG2_VL 4
3911			# define VADD8(a, b) _mm_add_epi8((a), (b))
3912			# define VADD16(a, b) _mm_add_epi16((a), (b))
3913			# define VADD32(a, b) _mm_add_epi32((a), (b))
3914			# if USE_AVX512
3915			# define VDPBUSD(a, b, c) _mm_dpbusd_epi32((a), (b), (c))
3916			# else
3917			# define VDPBUSD(a, b, c) _mm_dpbusd_avx_epi32((a), (b), (c))
3918			# endif
3919			# define VLOAD(p) _mm_load_si128((const void *)(p))
3920			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
3921			# define VMADD16(a, b) _mm_madd_epi16((a), (b))
3922			# define VMASKZ_LOADU(mask, p) _mm_maskz_loadu_epi8((mask), (p))
3923			# define VMULLO32(a, b) _mm_mullo_epi32((a), (b))
3924			# define VSAD8(a, b) _mm_sad_epu8((a), (b))
3925			# define VSET1_8(a) _mm_set1_epi8(a)
3926			# define VSET1_32(a) _mm_set1_epi32(a)
3927			# define VSETZERO() _mm_setzero_si128()
3928			# define VSLL32(a, b) _mm_slli_epi32((a), (b))
3929			# define VUNPACKLO8(a, b) _mm_unpacklo_epi8((a), (b))
3930			# define VUNPACKHI8(a, b) _mm_unpackhi_epi8((a), (b))
3931			#elif VL == 32
3932			# define vec_t __m256i
3933			# define mask_t u32
3934			# define LOG2_VL 5
3935			# define VADD8(a, b) _mm256_add_epi8((a), (b))
3936			# define VADD16(a, b) _mm256_add_epi16((a), (b))
3937			# define VADD32(a, b) _mm256_add_epi32((a), (b))
3938			# if USE_AVX512
3939			# define VDPBUSD(a, b, c) _mm256_dpbusd_epi32((a), (b), (c))
3940			# else
3941			# define VDPBUSD(a, b, c) _mm256_dpbusd_avx_epi32((a), (b), (c))
3942			# endif
3943			# define VLOAD(p) _mm256_load_si256((const void *)(p))
3944			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
3945			# define VMADD16(a, b) _mm256_madd_epi16((a), (b))
3946			# define VMASKZ_LOADU(mask, p) _mm256_maskz_loadu_epi8((mask), (p))
3947			# define VMULLO32(a, b) _mm256_mullo_epi32((a), (b))
3948			# define VSAD8(a, b) _mm256_sad_epu8((a), (b))
3949			# define VSET1_8(a) _mm256_set1_epi8(a)
3950			# define VSET1_32(a) _mm256_set1_epi32(a)
3951			# define VSETZERO() _mm256_setzero_si256()
3952			# define VSLL32(a, b) _mm256_slli_epi32((a), (b))
3953			# define VUNPACKLO8(a, b) _mm256_unpacklo_epi8((a), (b))
3954			# define VUNPACKHI8(a, b) _mm256_unpackhi_epi8((a), (b))
3955			#elif VL == 64
3956			# define vec_t __m512i
3957			# define mask_t u64
3958			# define LOG2_VL 6
3959			# define VADD8(a, b) _mm512_add_epi8((a), (b))
3960			# define VADD16(a, b) _mm512_add_epi16((a), (b))
3961			# define VADD32(a, b) _mm512_add_epi32((a), (b))
3962			# define VDPBUSD(a, b, c) _mm512_dpbusd_epi32((a), (b), (c))
3963			# define VLOAD(p) _mm512_load_si512((const void *)(p))
3964			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
3965			# define VMADD16(a, b) _mm512_madd_epi16((a), (b))
3966			# define VMASKZ_LOADU(mask, p) _mm512_maskz_loadu_epi8((mask), (p))
3967			# define VMULLO32(a, b) _mm512_mullo_epi32((a), (b))
3968			# define VSAD8(a, b) _mm512_sad_epu8((a), (b))
3969			# define VSET1_8(a) _mm512_set1_epi8(a)
3970			# define VSET1_32(a) _mm512_set1_epi32(a)
3971			# define VSETZERO() _mm512_setzero_si512()
3972			# define VSLL32(a, b) _mm512_slli_epi32((a), (b))
3973			# define VUNPACKLO8(a, b) _mm512_unpacklo_epi8((a), (b))
3974			# define VUNPACKHI8(a, b) _mm512_unpackhi_epi8((a), (b))
3975			#else
3976			# error "unsupported vector length"
3977			#endif
3978
3979			#define VADD32_3X(a, b, c) VADD32(VADD32((a), (b)), (c))
3980			#define VADD32_4X(a, b, c, d) VADD32(VADD32((a), (b)), VADD32((c), (d)))
3981			#define VADD32_5X(a, b, c, d, e) VADD32((a), VADD32_4X((b), (c), (d), (e)))
3982			#define VADD32_7X(a, b, c, d, e, f, g) \
3983			VADD32(VADD32_3X((a), (b), (c)), VADD32_4X((d), (e), (f), (g)))
3984
3985
3986			#undef reduce_to_32bits
3987			static forceinline ATTRIBUTES void
3988			ADD_SUFFIX(reduce_to_32bits)(vec_t v_s1, vec_t v_s2, u32 s1_p, u32 s2_p)
3989			{
3990			__m128i v_s1_128, v_s2_128;
3991			#if VL == 16
3992			{
3993			v_s1_128 = v_s1;
3994			v_s2_128 = v_s2;
3995			}
3996			#else
3997			{
3998			__m256i v_s1_256, v_s2_256;
3999			#if VL == 32
4000	24		v_s1_256 = v_s1;
4001	24		v_s2_256 = v_s2;
4002			#else
4003
4004			v_s1_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s1, 0),
4005			_mm512_extracti64x4_epi64(v_s1, 1));
4006			v_s2_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s2, 0),
4007			_mm512_extracti64x4_epi64(v_s2, 1));
4008			#endif
4009
4010	48		v_s1_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s1_256, 0),
4011	24		_mm256_extracti128_si256(v_s1_256, 1));
4012	48		v_s2_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s2_256, 0),
4013	24		_mm256_extracti128_si256(v_s2_256, 1));
4014			}
4015			#endif
4016
4017
4018			#if USE_VNNI
4019			v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x31));
4020			#endif
4021	24		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x31));
4022	24		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x02));
4023	48		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x02));
4024
4025	24		*s1_p += (u32)_mm_cvtsi128_si32(v_s1_128);
4026	24		*s2_p += (u32)_mm_cvtsi128_si32(v_s2_128);
4027	24		}
4028			#define reduce_to_32bits ADD_SUFFIX(reduce_to_32bits)
4029
4030			static ATTRIBUTES u32
4031	24		ADD_SUFFIX(adler32_x86)(u32 adler, const u8 *p, size_t len)
4032			{
4033			#if USE_VNNI
4034
4035			static const u8 _aligned_attribute(VL) raw_mults[VL] = {
4036			#if VL == 64
4037			64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
4038			48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
4039			#endif
4040			#if VL >= 32
4041			32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
4042			#endif
4043			16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4044			};
4045			const vec_t ones = VSET1_8(1);
4046			#else
4047
4048			static const u16 _aligned_attribute(VL) raw_mults[4][VL / 2] = {
4049			#if VL == 16
4050			{ 32, 31, 30, 29, 28, 27, 26, 25 },
4051			{ 24, 23, 22, 21, 20, 19, 18, 17 },
4052			{ 16, 15, 14, 13, 12, 11, 10, 9 },
4053			{ 8, 7, 6, 5, 4, 3, 2, 1 },
4054			#elif VL == 32
4055			{ 64, 63, 62, 61, 60, 59, 58, 57, 48, 47, 46, 45, 44, 43, 42, 41 },
4056			{ 56, 55, 54, 53, 52, 51, 50, 49, 40, 39, 38, 37, 36, 35, 34, 33 },
4057			{ 32, 31, 30, 29, 28, 27, 26, 25, 16, 15, 14, 13, 12, 11, 10, 9 },
4058			{ 24, 23, 22, 21, 20, 19, 18, 17, 8, 7, 6, 5, 4, 3, 2, 1 },
4059			#else
4060			# error "unsupported parameters"
4061			#endif
4062			};
4063	24		const vec_t mults_a = VLOAD(raw_mults[0]);
4064	24		const vec_t mults_b = VLOAD(raw_mults[1]);
4065	24		const vec_t mults_c = VLOAD(raw_mults[2]);
4066	24		const vec_t mults_d = VLOAD(raw_mults[3]);
4067			#endif
4068	24		const vec_t zeroes = VSETZERO();
4069	24		u32 s1 = adler & 0xFFFF;
4070	24		u32 s2 = adler >> 16;
4071
4072
4073	24	50	if (unlikely(len > 65536 && ((uintptr_t)p & (VL-1)))) {
		0
4074			do {
4075	0		s1 += *p++;
4076	0		s2 += s1;
4077	0		len--;
4078	0	0	} while ((uintptr_t)p & (VL-1));
4079	0		s1 %= DIVISOR;
4080	0		s2 %= DIVISOR;
4081			}
4082
4083			#if USE_VNNI
4084
4085			while (len) {
4086
4087			size_t n = MIN(len, MAX_CHUNK_LEN & ~(4*VL - 1));
4088			vec_t mults = VLOAD(raw_mults);
4089			vec_t v_s1 = zeroes;
4090			vec_t v_s2 = zeroes;
4091
4092			s2 += s1 * n;
4093			len -= n;
4094
4095			if (n >= 4*VL) {
4096			vec_t v_s1_b = zeroes;
4097			vec_t v_s1_c = zeroes;
4098			vec_t v_s1_d = zeroes;
4099			vec_t v_s2_b = zeroes;
4100			vec_t v_s2_c = zeroes;
4101			vec_t v_s2_d = zeroes;
4102			vec_t v_s1_sums = zeroes;
4103			vec_t v_s1_sums_b = zeroes;
4104			vec_t v_s1_sums_c = zeroes;
4105			vec_t v_s1_sums_d = zeroes;
4106			vec_t tmp0, tmp1;
4107
4108			do {
4109			vec_t data_a = VLOADU(p + 0*VL);
4110			vec_t data_b = VLOADU(p + 1*VL);
4111			vec_t data_c = VLOADU(p + 2*VL);
4112			vec_t data_d = VLOADU(p + 3*VL);
4113
4114
4115			#if GCC_PREREQ(1, 0)
4116			__asm__("" : "+v" (data_a), "+v" (data_b),
4117			"+v" (data_c), "+v" (data_d));
4118			#endif
4119
4120			v_s2 = VDPBUSD(v_s2, data_a, mults);
4121			v_s2_b = VDPBUSD(v_s2_b, data_b, mults);
4122			v_s2_c = VDPBUSD(v_s2_c, data_c, mults);
4123			v_s2_d = VDPBUSD(v_s2_d, data_d, mults);
4124
4125			v_s1_sums = VADD32(v_s1_sums, v_s1);
4126			v_s1_sums_b = VADD32(v_s1_sums_b, v_s1_b);
4127			v_s1_sums_c = VADD32(v_s1_sums_c, v_s1_c);
4128			v_s1_sums_d = VADD32(v_s1_sums_d, v_s1_d);
4129
4130			v_s1 = VDPBUSD(v_s1, data_a, ones);
4131			v_s1_b = VDPBUSD(v_s1_b, data_b, ones);
4132			v_s1_c = VDPBUSD(v_s1_c, data_c, ones);
4133			v_s1_d = VDPBUSD(v_s1_d, data_d, ones);
4134
4135
4136			#if GCC_PREREQ(1, 0) && !defined(ARCH_X86_32)
4137			__asm__("" : "+v" (v_s2), "+v" (v_s2_b),
4138			"+v" (v_s2_c), "+v" (v_s2_d),
4139			"+v" (v_s1_sums),
4140			"+v" (v_s1_sums_b),
4141			"+v" (v_s1_sums_c),
4142			"+v" (v_s1_sums_d),
4143			"+v" (v_s1), "+v" (v_s1_b),
4144			"+v" (v_s1_c), "+v" (v_s1_d));
4145			#endif
4146			p += 4*VL;
4147			n -= 4*VL;
4148			} while (n >= 4*VL);
4149
4150
4151			tmp0 = VADD32(v_s1, v_s1_b);
4152			tmp1 = VADD32(v_s1, v_s1_c);
4153			v_s1_sums = VADD32_4X(v_s1_sums, v_s1_sums_b,
4154			v_s1_sums_c, v_s1_sums_d);
4155			v_s1 = VADD32_3X(tmp0, v_s1_c, v_s1_d);
4156			v_s2 = VADD32_7X(VSLL32(v_s1_sums, LOG2_VL + 2),
4157			VSLL32(tmp0, LOG2_VL + 1),
4158			VSLL32(tmp1, LOG2_VL),
4159			v_s2, v_s2_b, v_s2_c, v_s2_d);
4160			}
4161
4162
4163			if (n >= 2*VL) {
4164			const vec_t data_a = VLOADU(p + 0*VL);
4165			const vec_t data_b = VLOADU(p + 1*VL);
4166
4167			v_s2 = VADD32(v_s2, VSLL32(v_s1, LOG2_VL + 1));
4168			v_s1 = VDPBUSD(v_s1, data_a, ones);
4169			v_s1 = VDPBUSD(v_s1, data_b, ones);
4170			v_s2 = VDPBUSD(v_s2, data_a, VSET1_8(VL));
4171			v_s2 = VDPBUSD(v_s2, data_a, mults);
4172			v_s2 = VDPBUSD(v_s2, data_b, mults);
4173			p += 2*VL;
4174			n -= 2*VL;
4175			}
4176			if (n) {
4177
4178			vec_t data;
4179
4180			v_s2 = VADD32(v_s2, VMULLO32(v_s1, VSET1_32(n)));
4181
4182			mults = VADD8(mults, VSET1_8((int)n - VL));
4183			if (n > VL) {
4184			data = VLOADU(p);
4185			v_s1 = VDPBUSD(v_s1, data, ones);
4186			v_s2 = VDPBUSD(v_s2, data, mults);
4187			p += VL;
4188			n -= VL;
4189			mults = VADD8(mults, VSET1_8(-VL));
4190			}
4191
4192			#if USE_AVX512
4193			data = VMASKZ_LOADU((mask_t)-1 >> (VL - n), p);
4194			#else
4195			data = zeroes;
4196			memcpy(&data, p, n);
4197			#endif
4198			v_s1 = VDPBUSD(v_s1, data, ones);
4199			v_s2 = VDPBUSD(v_s2, data, mults);
4200			p += n;
4201			}
4202
4203			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
4204			s1 %= DIVISOR;
4205			s2 %= DIVISOR;
4206			}
4207			#else
4208
4209	48	100	while (len) {
4210
4211	24		size_t n = MIN(len, MIN(2 * VL * (INT16_MAX / UINT8_MAX),
4212			MAX_CHUNK_LEN) & ~(2*VL - 1));
4213	24		len -= n;
4214
4215	24	50	if (n >= 2*VL) {
4216	24		vec_t v_s1 = zeroes;
4217	24		vec_t v_s1_sums = zeroes;
4218	24		vec_t v_byte_sums_a = zeroes;
4219	24		vec_t v_byte_sums_b = zeroes;
4220	24		vec_t v_byte_sums_c = zeroes;
4221	24		vec_t v_byte_sums_d = zeroes;
4222			vec_t v_s2;
4223
4224	24		s2 += s1 * (n & ~(2*VL - 1));
4225
4226			do {
4227	624		vec_t data_a = VLOADU(p + 0*VL);
4228	1248		vec_t data_b = VLOADU(p + 1*VL);
4229
4230	624		v_s1_sums = VADD32(v_s1_sums, v_s1);
4231	1248		v_byte_sums_a = VADD16(v_byte_sums_a,
4232			VUNPACKLO8(data_a, zeroes));
4233	1248		v_byte_sums_b = VADD16(v_byte_sums_b,
4234			VUNPACKHI8(data_a, zeroes));
4235	1248		v_byte_sums_c = VADD16(v_byte_sums_c,
4236			VUNPACKLO8(data_b, zeroes));
4237	1248		v_byte_sums_d = VADD16(v_byte_sums_d,
4238			VUNPACKHI8(data_b, zeroes));
4239	1872		v_s1 = VADD32(v_s1,
4240			VADD32(VSAD8(data_a, zeroes),
4241			VSAD8(data_b, zeroes)));
4242
4243			#if GCC_PREREQ(1, 0)
4244	624		__asm__("" : "+x" (v_s1), "+x" (v_s1_sums),
4245			"+x" (v_byte_sums_a),
4246			"+x" (v_byte_sums_b),
4247			"+x" (v_byte_sums_c),
4248			"+x" (v_byte_sums_d));
4249			#endif
4250	624		p += 2*VL;
4251	624		n -= 2*VL;
4252	624	100	} while (n >= 2*VL);
4253
4254
4255	216		v_s2 = VADD32_5X(VSLL32(v_s1_sums, LOG2_VL + 1),
4256			VMADD16(v_byte_sums_a, mults_a),
4257			VMADD16(v_byte_sums_b, mults_b),
4258			VMADD16(v_byte_sums_c, mults_c),
4259			VMADD16(v_byte_sums_d, mults_d));
4260			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
4261			}
4262
4263	216	50	ADLER32_CHUNK(s1, s2, p, n);
		100
		50
4264			}
4265			#endif
4266	24		return (s2 << 16) \| s1;
4267			}
4268
4269			#undef vec_t
4270			#undef mask_t
4271			#undef LOG2_VL
4272			#undef VADD8
4273			#undef VADD16
4274			#undef VADD32
4275			#undef VDPBUSD
4276			#undef VLOAD
4277			#undef VLOADU
4278			#undef VMADD16
4279			#undef VMASKZ_LOADU
4280			#undef VMULLO32
4281			#undef VSAD8
4282			#undef VSET1_8
4283			#undef VSET1_32
4284			#undef VSETZERO
4285			#undef VSLL32
4286			#undef VUNPACKLO8
4287			#undef VUNPACKHI8
4288
4289			#undef SUFFIX
4290			#undef ATTRIBUTES
4291			#undef VL
4292			#undef USE_VNNI
4293			#undef USE_AVX512
4294
4295			#endif
4296
4297
4298			#if (GCC_PREREQ(12, 1) \|\| CLANG_PREREQ(12, 0, 13000000) \|\| MSVC_PREREQ(1930)) && \
4299			!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
4300			# define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
4301			# define SUFFIX _avx2_vnni
4302			# define ATTRIBUTES _target_attribute("avx2,avxvnni")
4303			# define VL 32
4304			# define USE_VNNI 1
4305			# define USE_AVX512 0
4306			/* #include "x86-adler32_template.h" */
4307
4308
4309
4310
4311			#if VL == 16
4312			# define vec_t __m128i
4313			# define mask_t u16
4314			# define LOG2_VL 4
4315			# define VADD8(a, b) _mm_add_epi8((a), (b))
4316			# define VADD16(a, b) _mm_add_epi16((a), (b))
4317			# define VADD32(a, b) _mm_add_epi32((a), (b))
4318			# if USE_AVX512
4319			# define VDPBUSD(a, b, c) _mm_dpbusd_epi32((a), (b), (c))
4320			# else
4321			# define VDPBUSD(a, b, c) _mm_dpbusd_avx_epi32((a), (b), (c))
4322			# endif
4323			# define VLOAD(p) _mm_load_si128((const void *)(p))
4324			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
4325			# define VMADD16(a, b) _mm_madd_epi16((a), (b))
4326			# define VMASKZ_LOADU(mask, p) _mm_maskz_loadu_epi8((mask), (p))
4327			# define VMULLO32(a, b) _mm_mullo_epi32((a), (b))
4328			# define VSAD8(a, b) _mm_sad_epu8((a), (b))
4329			# define VSET1_8(a) _mm_set1_epi8(a)
4330			# define VSET1_32(a) _mm_set1_epi32(a)
4331			# define VSETZERO() _mm_setzero_si128()
4332			# define VSLL32(a, b) _mm_slli_epi32((a), (b))
4333			# define VUNPACKLO8(a, b) _mm_unpacklo_epi8((a), (b))
4334			# define VUNPACKHI8(a, b) _mm_unpackhi_epi8((a), (b))
4335			#elif VL == 32
4336			# define vec_t __m256i
4337			# define mask_t u32
4338			# define LOG2_VL 5
4339			# define VADD8(a, b) _mm256_add_epi8((a), (b))
4340			# define VADD16(a, b) _mm256_add_epi16((a), (b))
4341			# define VADD32(a, b) _mm256_add_epi32((a), (b))
4342			# if USE_AVX512
4343			# define VDPBUSD(a, b, c) _mm256_dpbusd_epi32((a), (b), (c))
4344			# else
4345			# define VDPBUSD(a, b, c) _mm256_dpbusd_avx_epi32((a), (b), (c))
4346			# endif
4347			# define VLOAD(p) _mm256_load_si256((const void *)(p))
4348			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
4349			# define VMADD16(a, b) _mm256_madd_epi16((a), (b))
4350			# define VMASKZ_LOADU(mask, p) _mm256_maskz_loadu_epi8((mask), (p))
4351			# define VMULLO32(a, b) _mm256_mullo_epi32((a), (b))
4352			# define VSAD8(a, b) _mm256_sad_epu8((a), (b))
4353			# define VSET1_8(a) _mm256_set1_epi8(a)
4354			# define VSET1_32(a) _mm256_set1_epi32(a)
4355			# define VSETZERO() _mm256_setzero_si256()
4356			# define VSLL32(a, b) _mm256_slli_epi32((a), (b))
4357			# define VUNPACKLO8(a, b) _mm256_unpacklo_epi8((a), (b))
4358			# define VUNPACKHI8(a, b) _mm256_unpackhi_epi8((a), (b))
4359			#elif VL == 64
4360			# define vec_t __m512i
4361			# define mask_t u64
4362			# define LOG2_VL 6
4363			# define VADD8(a, b) _mm512_add_epi8((a), (b))
4364			# define VADD16(a, b) _mm512_add_epi16((a), (b))
4365			# define VADD32(a, b) _mm512_add_epi32((a), (b))
4366			# define VDPBUSD(a, b, c) _mm512_dpbusd_epi32((a), (b), (c))
4367			# define VLOAD(p) _mm512_load_si512((const void *)(p))
4368			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
4369			# define VMADD16(a, b) _mm512_madd_epi16((a), (b))
4370			# define VMASKZ_LOADU(mask, p) _mm512_maskz_loadu_epi8((mask), (p))
4371			# define VMULLO32(a, b) _mm512_mullo_epi32((a), (b))
4372			# define VSAD8(a, b) _mm512_sad_epu8((a), (b))
4373			# define VSET1_8(a) _mm512_set1_epi8(a)
4374			# define VSET1_32(a) _mm512_set1_epi32(a)
4375			# define VSETZERO() _mm512_setzero_si512()
4376			# define VSLL32(a, b) _mm512_slli_epi32((a), (b))
4377			# define VUNPACKLO8(a, b) _mm512_unpacklo_epi8((a), (b))
4378			# define VUNPACKHI8(a, b) _mm512_unpackhi_epi8((a), (b))
4379			#else
4380			# error "unsupported vector length"
4381			#endif
4382
4383			#define VADD32_3X(a, b, c) VADD32(VADD32((a), (b)), (c))
4384			#define VADD32_4X(a, b, c, d) VADD32(VADD32((a), (b)), VADD32((c), (d)))
4385			#define VADD32_5X(a, b, c, d, e) VADD32((a), VADD32_4X((b), (c), (d), (e)))
4386			#define VADD32_7X(a, b, c, d, e, f, g) \
4387			VADD32(VADD32_3X((a), (b), (c)), VADD32_4X((d), (e), (f), (g)))
4388
4389
4390			#undef reduce_to_32bits
4391			static forceinline ATTRIBUTES void
4392			ADD_SUFFIX(reduce_to_32bits)(vec_t v_s1, vec_t v_s2, u32 s1_p, u32 s2_p)
4393			{
4394			__m128i v_s1_128, v_s2_128;
4395			#if VL == 16
4396			{
4397			v_s1_128 = v_s1;
4398			v_s2_128 = v_s2;
4399			}
4400			#else
4401			{
4402			__m256i v_s1_256, v_s2_256;
4403			#if VL == 32
4404	0		v_s1_256 = v_s1;
4405	0		v_s2_256 = v_s2;
4406			#else
4407
4408			v_s1_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s1, 0),
4409			_mm512_extracti64x4_epi64(v_s1, 1));
4410			v_s2_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s2, 0),
4411			_mm512_extracti64x4_epi64(v_s2, 1));
4412			#endif
4413
4414	0		v_s1_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s1_256, 0),
4415	0		_mm256_extracti128_si256(v_s1_256, 1));
4416	0		v_s2_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s2_256, 0),
4417	0		_mm256_extracti128_si256(v_s2_256, 1));
4418			}
4419			#endif
4420
4421
4422			#if USE_VNNI
4423	0		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x31));
4424			#endif
4425	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x31));
4426	0		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x02));
4427	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x02));
4428
4429	0		*s1_p += (u32)_mm_cvtsi128_si32(v_s1_128);
4430	0		*s2_p += (u32)_mm_cvtsi128_si32(v_s2_128);
4431	0		}
4432			#define reduce_to_32bits ADD_SUFFIX(reduce_to_32bits)
4433
4434			static ATTRIBUTES u32
4435	0		ADD_SUFFIX(adler32_x86)(u32 adler, const u8 *p, size_t len)
4436			{
4437			#if USE_VNNI
4438
4439			static const u8 _aligned_attribute(VL) raw_mults[VL] = {
4440			#if VL == 64
4441			64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
4442			48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
4443			#endif
4444			#if VL >= 32
4445			32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
4446			#endif
4447			16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4448			};
4449	0		const vec_t ones = VSET1_8(1);
4450			#else
4451
4452			static const u16 _aligned_attribute(VL) raw_mults[4][VL / 2] = {
4453			#if VL == 16
4454			{ 32, 31, 30, 29, 28, 27, 26, 25 },
4455			{ 24, 23, 22, 21, 20, 19, 18, 17 },
4456			{ 16, 15, 14, 13, 12, 11, 10, 9 },
4457			{ 8, 7, 6, 5, 4, 3, 2, 1 },
4458			#elif VL == 32
4459			{ 64, 63, 62, 61, 60, 59, 58, 57, 48, 47, 46, 45, 44, 43, 42, 41 },
4460			{ 56, 55, 54, 53, 52, 51, 50, 49, 40, 39, 38, 37, 36, 35, 34, 33 },
4461			{ 32, 31, 30, 29, 28, 27, 26, 25, 16, 15, 14, 13, 12, 11, 10, 9 },
4462			{ 24, 23, 22, 21, 20, 19, 18, 17, 8, 7, 6, 5, 4, 3, 2, 1 },
4463			#else
4464			# error "unsupported parameters"
4465			#endif
4466			};
4467			const vec_t mults_a = VLOAD(raw_mults[0]);
4468			const vec_t mults_b = VLOAD(raw_mults[1]);
4469			const vec_t mults_c = VLOAD(raw_mults[2]);
4470			const vec_t mults_d = VLOAD(raw_mults[3]);
4471			#endif
4472	0		const vec_t zeroes = VSETZERO();
4473	0		u32 s1 = adler & 0xFFFF;
4474	0		u32 s2 = adler >> 16;
4475
4476
4477	0	0	if (unlikely(len > 65536 && ((uintptr_t)p & (VL-1)))) {
		0
4478			do {
4479	0		s1 += *p++;
4480	0		s2 += s1;
4481	0		len--;
4482	0	0	} while ((uintptr_t)p & (VL-1));
4483	0		s1 %= DIVISOR;
4484	0		s2 %= DIVISOR;
4485			}
4486
4487			#if USE_VNNI
4488
4489	0	0	while (len) {
4490
4491	0		size_t n = MIN(len, MAX_CHUNK_LEN & ~(4*VL - 1));
4492	0		vec_t mults = VLOAD(raw_mults);
4493	0		vec_t v_s1 = zeroes;
4494	0		vec_t v_s2 = zeroes;
4495
4496	0		s2 += s1 * n;
4497	0		len -= n;
4498
4499	0	0	if (n >= 4*VL) {
4500	0		vec_t v_s1_b = zeroes;
4501	0		vec_t v_s1_c = zeroes;
4502	0		vec_t v_s1_d = zeroes;
4503	0		vec_t v_s2_b = zeroes;
4504	0		vec_t v_s2_c = zeroes;
4505	0		vec_t v_s2_d = zeroes;
4506	0		vec_t v_s1_sums = zeroes;
4507	0		vec_t v_s1_sums_b = zeroes;
4508	0		vec_t v_s1_sums_c = zeroes;
4509	0		vec_t v_s1_sums_d = zeroes;
4510			vec_t tmp0, tmp1;
4511
4512			do {
4513	0		vec_t data_a = VLOADU(p + 0*VL);
4514	0		vec_t data_b = VLOADU(p + 1*VL);
4515	0		vec_t data_c = VLOADU(p + 2*VL);
4516	0		vec_t data_d = VLOADU(p + 3*VL);
4517
4518
4519			#if GCC_PREREQ(1, 0)
4520	0		__asm__("" : "+v" (data_a), "+v" (data_b),
4521			"+v" (data_c), "+v" (data_d));
4522			#endif
4523
4524	0		v_s2 = VDPBUSD(v_s2, data_a, mults);
4525	0		v_s2_b = VDPBUSD(v_s2_b, data_b, mults);
4526	0		v_s2_c = VDPBUSD(v_s2_c, data_c, mults);
4527	0		v_s2_d = VDPBUSD(v_s2_d, data_d, mults);
4528
4529	0		v_s1_sums = VADD32(v_s1_sums, v_s1);
4530	0		v_s1_sums_b = VADD32(v_s1_sums_b, v_s1_b);
4531	0		v_s1_sums_c = VADD32(v_s1_sums_c, v_s1_c);
4532	0		v_s1_sums_d = VADD32(v_s1_sums_d, v_s1_d);
4533
4534	0		v_s1 = VDPBUSD(v_s1, data_a, ones);
4535	0		v_s1_b = VDPBUSD(v_s1_b, data_b, ones);
4536	0		v_s1_c = VDPBUSD(v_s1_c, data_c, ones);
4537	0		v_s1_d = VDPBUSD(v_s1_d, data_d, ones);
4538
4539
4540			#if GCC_PREREQ(1, 0) && !defined(ARCH_X86_32)
4541	0		__asm__("" : "+v" (v_s2), "+v" (v_s2_b),
4542			"+v" (v_s2_c), "+v" (v_s2_d),
4543			"+v" (v_s1_sums),
4544			"+v" (v_s1_sums_b),
4545			"+v" (v_s1_sums_c),
4546			"+v" (v_s1_sums_d),
4547			"+v" (v_s1), "+v" (v_s1_b),
4548			"+v" (v_s1_c), "+v" (v_s1_d));
4549			#endif
4550	0		p += 4*VL;
4551	0		n -= 4*VL;
4552	0	0	} while (n >= 4*VL);
4553
4554
4555	0		tmp0 = VADD32(v_s1, v_s1_b);
4556	0		tmp1 = VADD32(v_s1, v_s1_c);
4557	0		v_s1_sums = VADD32_4X(v_s1_sums, v_s1_sums_b,
4558			v_s1_sums_c, v_s1_sums_d);
4559	0		v_s1 = VADD32_3X(tmp0, v_s1_c, v_s1_d);
4560	0		v_s2 = VADD32_7X(VSLL32(v_s1_sums, LOG2_VL + 2),
4561			VSLL32(tmp0, LOG2_VL + 1),
4562			VSLL32(tmp1, LOG2_VL),
4563			v_s2, v_s2_b, v_s2_c, v_s2_d);
4564			}
4565
4566
4567	0	0	if (n >= 2*VL) {
4568	0		const vec_t data_a = VLOADU(p + 0*VL);
4569	0		const vec_t data_b = VLOADU(p + 1*VL);
4570
4571	0		v_s2 = VADD32(v_s2, VSLL32(v_s1, LOG2_VL + 1));
4572	0		v_s1 = VDPBUSD(v_s1, data_a, ones);
4573	0		v_s1 = VDPBUSD(v_s1, data_b, ones);
4574	0		v_s2 = VDPBUSD(v_s2, data_a, VSET1_8(VL));
4575	0		v_s2 = VDPBUSD(v_s2, data_a, mults);
4576	0		v_s2 = VDPBUSD(v_s2, data_b, mults);
4577	0		p += 2*VL;
4578	0		n -= 2*VL;
4579			}
4580	0	0	if (n) {
4581
4582			vec_t data;
4583
4584	0		v_s2 = VADD32(v_s2, VMULLO32(v_s1, VSET1_32(n)));
4585
4586	0		mults = VADD8(mults, VSET1_8((int)n - VL));
4587	0	0	if (n > VL) {
4588	0		data = VLOADU(p);
4589	0		v_s1 = VDPBUSD(v_s1, data, ones);
4590	0		v_s2 = VDPBUSD(v_s2, data, mults);
4591	0		p += VL;
4592	0		n -= VL;
4593	0		mults = VADD8(mults, VSET1_8(-VL));
4594			}
4595
4596			#if USE_AVX512
4597			data = VMASKZ_LOADU((mask_t)-1 >> (VL - n), p);
4598			#else
4599	0		data = zeroes;
4600	0		memcpy(&data, p, n);
4601			#endif
4602	0		v_s1 = VDPBUSD(v_s1, data, ones);
4603	0		v_s2 = VDPBUSD(v_s2, data, mults);
4604	0		p += n;
4605			}
4606
4607			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
4608	0		s1 %= DIVISOR;
4609	0		s2 %= DIVISOR;
4610			}
4611			#else
4612
4613			while (len) {
4614
4615			size_t n = MIN(len, MIN(2 * VL * (INT16_MAX / UINT8_MAX),
4616			MAX_CHUNK_LEN) & ~(2*VL - 1));
4617			len -= n;
4618
4619			if (n >= 2*VL) {
4620			vec_t v_s1 = zeroes;
4621			vec_t v_s1_sums = zeroes;
4622			vec_t v_byte_sums_a = zeroes;
4623			vec_t v_byte_sums_b = zeroes;
4624			vec_t v_byte_sums_c = zeroes;
4625			vec_t v_byte_sums_d = zeroes;
4626			vec_t v_s2;
4627
4628			s2 += s1 * (n & ~(2*VL - 1));
4629
4630			do {
4631			vec_t data_a = VLOADU(p + 0*VL);
4632			vec_t data_b = VLOADU(p + 1*VL);
4633
4634			v_s1_sums = VADD32(v_s1_sums, v_s1);
4635			v_byte_sums_a = VADD16(v_byte_sums_a,
4636			VUNPACKLO8(data_a, zeroes));
4637			v_byte_sums_b = VADD16(v_byte_sums_b,
4638			VUNPACKHI8(data_a, zeroes));
4639			v_byte_sums_c = VADD16(v_byte_sums_c,
4640			VUNPACKLO8(data_b, zeroes));
4641			v_byte_sums_d = VADD16(v_byte_sums_d,
4642			VUNPACKHI8(data_b, zeroes));
4643			v_s1 = VADD32(v_s1,
4644			VADD32(VSAD8(data_a, zeroes),
4645			VSAD8(data_b, zeroes)));
4646
4647			#if GCC_PREREQ(1, 0)
4648			__asm__("" : "+x" (v_s1), "+x" (v_s1_sums),
4649			"+x" (v_byte_sums_a),
4650			"+x" (v_byte_sums_b),
4651			"+x" (v_byte_sums_c),
4652			"+x" (v_byte_sums_d));
4653			#endif
4654			p += 2*VL;
4655			n -= 2*VL;
4656			} while (n >= 2*VL);
4657
4658
4659			v_s2 = VADD32_5X(VSLL32(v_s1_sums, LOG2_VL + 1),
4660			VMADD16(v_byte_sums_a, mults_a),
4661			VMADD16(v_byte_sums_b, mults_b),
4662			VMADD16(v_byte_sums_c, mults_c),
4663			VMADD16(v_byte_sums_d, mults_d));
4664			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
4665			}
4666
4667			ADLER32_CHUNK(s1, s2, p, n);
4668			}
4669			#endif
4670	0		return (s2 << 16) \| s1;
4671			}
4672
4673			#undef vec_t
4674			#undef mask_t
4675			#undef LOG2_VL
4676			#undef VADD8
4677			#undef VADD16
4678			#undef VADD32
4679			#undef VDPBUSD
4680			#undef VLOAD
4681			#undef VLOADU
4682			#undef VMADD16
4683			#undef VMASKZ_LOADU
4684			#undef VMULLO32
4685			#undef VSAD8
4686			#undef VSET1_8
4687			#undef VSET1_32
4688			#undef VSETZERO
4689			#undef VSLL32
4690			#undef VUNPACKLO8
4691			#undef VUNPACKHI8
4692
4693			#undef SUFFIX
4694			#undef ATTRIBUTES
4695			#undef VL
4696			#undef USE_VNNI
4697			#undef USE_AVX512
4698
4699			#endif
4700
4701			#if (GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(6, 0, 10000000) \|\| MSVC_PREREQ(1920)) && \
4702			!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
4703
4704			# define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni
4705			# define SUFFIX _avx512_vl256_vnni
4706			# define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni")
4707			# define VL 32
4708			# define USE_VNNI 1
4709			# define USE_AVX512 1
4710			/* #include "x86-adler32_template.h" */
4711
4712
4713
4714
4715			#if VL == 16
4716			# define vec_t __m128i
4717			# define mask_t u16
4718			# define LOG2_VL 4
4719			# define VADD8(a, b) _mm_add_epi8((a), (b))
4720			# define VADD16(a, b) _mm_add_epi16((a), (b))
4721			# define VADD32(a, b) _mm_add_epi32((a), (b))
4722			# if USE_AVX512
4723			# define VDPBUSD(a, b, c) _mm_dpbusd_epi32((a), (b), (c))
4724			# else
4725			# define VDPBUSD(a, b, c) _mm_dpbusd_avx_epi32((a), (b), (c))
4726			# endif
4727			# define VLOAD(p) _mm_load_si128((const void *)(p))
4728			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
4729			# define VMADD16(a, b) _mm_madd_epi16((a), (b))
4730			# define VMASKZ_LOADU(mask, p) _mm_maskz_loadu_epi8((mask), (p))
4731			# define VMULLO32(a, b) _mm_mullo_epi32((a), (b))
4732			# define VSAD8(a, b) _mm_sad_epu8((a), (b))
4733			# define VSET1_8(a) _mm_set1_epi8(a)
4734			# define VSET1_32(a) _mm_set1_epi32(a)
4735			# define VSETZERO() _mm_setzero_si128()
4736			# define VSLL32(a, b) _mm_slli_epi32((a), (b))
4737			# define VUNPACKLO8(a, b) _mm_unpacklo_epi8((a), (b))
4738			# define VUNPACKHI8(a, b) _mm_unpackhi_epi8((a), (b))
4739			#elif VL == 32
4740			# define vec_t __m256i
4741			# define mask_t u32
4742			# define LOG2_VL 5
4743			# define VADD8(a, b) _mm256_add_epi8((a), (b))
4744			# define VADD16(a, b) _mm256_add_epi16((a), (b))
4745			# define VADD32(a, b) _mm256_add_epi32((a), (b))
4746			# if USE_AVX512
4747			# define VDPBUSD(a, b, c) _mm256_dpbusd_epi32((a), (b), (c))
4748			# else
4749			# define VDPBUSD(a, b, c) _mm256_dpbusd_avx_epi32((a), (b), (c))
4750			# endif
4751			# define VLOAD(p) _mm256_load_si256((const void *)(p))
4752			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
4753			# define VMADD16(a, b) _mm256_madd_epi16((a), (b))
4754			# define VMASKZ_LOADU(mask, p) _mm256_maskz_loadu_epi8((mask), (p))
4755			# define VMULLO32(a, b) _mm256_mullo_epi32((a), (b))
4756			# define VSAD8(a, b) _mm256_sad_epu8((a), (b))
4757			# define VSET1_8(a) _mm256_set1_epi8(a)
4758			# define VSET1_32(a) _mm256_set1_epi32(a)
4759			# define VSETZERO() _mm256_setzero_si256()
4760			# define VSLL32(a, b) _mm256_slli_epi32((a), (b))
4761			# define VUNPACKLO8(a, b) _mm256_unpacklo_epi8((a), (b))
4762			# define VUNPACKHI8(a, b) _mm256_unpackhi_epi8((a), (b))
4763			#elif VL == 64
4764			# define vec_t __m512i
4765			# define mask_t u64
4766			# define LOG2_VL 6
4767			# define VADD8(a, b) _mm512_add_epi8((a), (b))
4768			# define VADD16(a, b) _mm512_add_epi16((a), (b))
4769			# define VADD32(a, b) _mm512_add_epi32((a), (b))
4770			# define VDPBUSD(a, b, c) _mm512_dpbusd_epi32((a), (b), (c))
4771			# define VLOAD(p) _mm512_load_si512((const void *)(p))
4772			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
4773			# define VMADD16(a, b) _mm512_madd_epi16((a), (b))
4774			# define VMASKZ_LOADU(mask, p) _mm512_maskz_loadu_epi8((mask), (p))
4775			# define VMULLO32(a, b) _mm512_mullo_epi32((a), (b))
4776			# define VSAD8(a, b) _mm512_sad_epu8((a), (b))
4777			# define VSET1_8(a) _mm512_set1_epi8(a)
4778			# define VSET1_32(a) _mm512_set1_epi32(a)
4779			# define VSETZERO() _mm512_setzero_si512()
4780			# define VSLL32(a, b) _mm512_slli_epi32((a), (b))
4781			# define VUNPACKLO8(a, b) _mm512_unpacklo_epi8((a), (b))
4782			# define VUNPACKHI8(a, b) _mm512_unpackhi_epi8((a), (b))
4783			#else
4784			# error "unsupported vector length"
4785			#endif
4786
4787			#define VADD32_3X(a, b, c) VADD32(VADD32((a), (b)), (c))
4788			#define VADD32_4X(a, b, c, d) VADD32(VADD32((a), (b)), VADD32((c), (d)))
4789			#define VADD32_5X(a, b, c, d, e) VADD32((a), VADD32_4X((b), (c), (d), (e)))
4790			#define VADD32_7X(a, b, c, d, e, f, g) \
4791			VADD32(VADD32_3X((a), (b), (c)), VADD32_4X((d), (e), (f), (g)))
4792
4793
4794			#undef reduce_to_32bits
4795			static forceinline ATTRIBUTES void
4796			ADD_SUFFIX(reduce_to_32bits)(vec_t v_s1, vec_t v_s2, u32 s1_p, u32 s2_p)
4797			{
4798			__m128i v_s1_128, v_s2_128;
4799			#if VL == 16
4800			{
4801			v_s1_128 = v_s1;
4802			v_s2_128 = v_s2;
4803			}
4804			#else
4805			{
4806			__m256i v_s1_256, v_s2_256;
4807			#if VL == 32
4808	0		v_s1_256 = v_s1;
4809	0		v_s2_256 = v_s2;
4810			#else
4811
4812			v_s1_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s1, 0),
4813			_mm512_extracti64x4_epi64(v_s1, 1));
4814			v_s2_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s2, 0),
4815			_mm512_extracti64x4_epi64(v_s2, 1));
4816			#endif
4817
4818	0		v_s1_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s1_256, 0),
4819	0		_mm256_extracti128_si256(v_s1_256, 1));
4820	0		v_s2_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s2_256, 0),
4821	0		_mm256_extracti128_si256(v_s2_256, 1));
4822			}
4823			#endif
4824
4825
4826			#if USE_VNNI
4827	0		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x31));
4828			#endif
4829	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x31));
4830	0		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x02));
4831	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x02));
4832
4833	0		*s1_p += (u32)_mm_cvtsi128_si32(v_s1_128);
4834	0		*s2_p += (u32)_mm_cvtsi128_si32(v_s2_128);
4835	0		}
4836			#define reduce_to_32bits ADD_SUFFIX(reduce_to_32bits)
4837
4838			static ATTRIBUTES u32
4839	0		ADD_SUFFIX(adler32_x86)(u32 adler, const u8 *p, size_t len)
4840			{
4841			#if USE_VNNI
4842
4843			static const u8 _aligned_attribute(VL) raw_mults[VL] = {
4844			#if VL == 64
4845			64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
4846			48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
4847			#endif
4848			#if VL >= 32
4849			32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
4850			#endif
4851			16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
4852			};
4853	0		const vec_t ones = VSET1_8(1);
4854			#else
4855
4856			static const u16 _aligned_attribute(VL) raw_mults[4][VL / 2] = {
4857			#if VL == 16
4858			{ 32, 31, 30, 29, 28, 27, 26, 25 },
4859			{ 24, 23, 22, 21, 20, 19, 18, 17 },
4860			{ 16, 15, 14, 13, 12, 11, 10, 9 },
4861			{ 8, 7, 6, 5, 4, 3, 2, 1 },
4862			#elif VL == 32
4863			{ 64, 63, 62, 61, 60, 59, 58, 57, 48, 47, 46, 45, 44, 43, 42, 41 },
4864			{ 56, 55, 54, 53, 52, 51, 50, 49, 40, 39, 38, 37, 36, 35, 34, 33 },
4865			{ 32, 31, 30, 29, 28, 27, 26, 25, 16, 15, 14, 13, 12, 11, 10, 9 },
4866			{ 24, 23, 22, 21, 20, 19, 18, 17, 8, 7, 6, 5, 4, 3, 2, 1 },
4867			#else
4868			# error "unsupported parameters"
4869			#endif
4870			};
4871			const vec_t mults_a = VLOAD(raw_mults[0]);
4872			const vec_t mults_b = VLOAD(raw_mults[1]);
4873			const vec_t mults_c = VLOAD(raw_mults[2]);
4874			const vec_t mults_d = VLOAD(raw_mults[3]);
4875			#endif
4876	0		const vec_t zeroes = VSETZERO();
4877	0		u32 s1 = adler & 0xFFFF;
4878	0		u32 s2 = adler >> 16;
4879
4880
4881	0	0	if (unlikely(len > 65536 && ((uintptr_t)p & (VL-1)))) {
		0
4882			do {
4883	0		s1 += *p++;
4884	0		s2 += s1;
4885	0		len--;
4886	0	0	} while ((uintptr_t)p & (VL-1));
4887	0		s1 %= DIVISOR;
4888	0		s2 %= DIVISOR;
4889			}
4890
4891			#if USE_VNNI
4892
4893	0	0	while (len) {
4894
4895	0		size_t n = MIN(len, MAX_CHUNK_LEN & ~(4*VL - 1));
4896	0		vec_t mults = VLOAD(raw_mults);
4897	0		vec_t v_s1 = zeroes;
4898	0		vec_t v_s2 = zeroes;
4899
4900	0		s2 += s1 * n;
4901	0		len -= n;
4902
4903	0	0	if (n >= 4*VL) {
4904	0		vec_t v_s1_b = zeroes;
4905	0		vec_t v_s1_c = zeroes;
4906	0		vec_t v_s1_d = zeroes;
4907	0		vec_t v_s2_b = zeroes;
4908	0		vec_t v_s2_c = zeroes;
4909	0		vec_t v_s2_d = zeroes;
4910	0		vec_t v_s1_sums = zeroes;
4911	0		vec_t v_s1_sums_b = zeroes;
4912	0		vec_t v_s1_sums_c = zeroes;
4913	0		vec_t v_s1_sums_d = zeroes;
4914			vec_t tmp0, tmp1;
4915
4916			do {
4917	0		vec_t data_a = VLOADU(p + 0*VL);
4918	0		vec_t data_b = VLOADU(p + 1*VL);
4919	0		vec_t data_c = VLOADU(p + 2*VL);
4920	0		vec_t data_d = VLOADU(p + 3*VL);
4921
4922
4923			#if GCC_PREREQ(1, 0)
4924	0		__asm__("" : "+v" (data_a), "+v" (data_b),
4925			"+v" (data_c), "+v" (data_d));
4926			#endif
4927
4928	0		v_s2 = VDPBUSD(v_s2, data_a, mults);
4929	0		v_s2_b = VDPBUSD(v_s2_b, data_b, mults);
4930	0		v_s2_c = VDPBUSD(v_s2_c, data_c, mults);
4931	0		v_s2_d = VDPBUSD(v_s2_d, data_d, mults);
4932
4933	0		v_s1_sums = VADD32(v_s1_sums, v_s1);
4934	0		v_s1_sums_b = VADD32(v_s1_sums_b, v_s1_b);
4935	0		v_s1_sums_c = VADD32(v_s1_sums_c, v_s1_c);
4936	0		v_s1_sums_d = VADD32(v_s1_sums_d, v_s1_d);
4937
4938	0		v_s1 = VDPBUSD(v_s1, data_a, ones);
4939	0		v_s1_b = VDPBUSD(v_s1_b, data_b, ones);
4940	0		v_s1_c = VDPBUSD(v_s1_c, data_c, ones);
4941	0		v_s1_d = VDPBUSD(v_s1_d, data_d, ones);
4942
4943
4944			#if GCC_PREREQ(1, 0) && !defined(ARCH_X86_32)
4945	0		__asm__("" : "+v" (v_s2), "+v" (v_s2_b),
4946			"+v" (v_s2_c), "+v" (v_s2_d),
4947			"+v" (v_s1_sums),
4948			"+v" (v_s1_sums_b),
4949			"+v" (v_s1_sums_c),
4950			"+v" (v_s1_sums_d),
4951			"+v" (v_s1), "+v" (v_s1_b),
4952			"+v" (v_s1_c), "+v" (v_s1_d));
4953			#endif
4954	0		p += 4*VL;
4955	0		n -= 4*VL;
4956	0	0	} while (n >= 4*VL);
4957
4958
4959	0		tmp0 = VADD32(v_s1, v_s1_b);
4960	0		tmp1 = VADD32(v_s1, v_s1_c);
4961	0		v_s1_sums = VADD32_4X(v_s1_sums, v_s1_sums_b,
4962			v_s1_sums_c, v_s1_sums_d);
4963	0		v_s1 = VADD32_3X(tmp0, v_s1_c, v_s1_d);
4964	0		v_s2 = VADD32_7X(VSLL32(v_s1_sums, LOG2_VL + 2),
4965			VSLL32(tmp0, LOG2_VL + 1),
4966			VSLL32(tmp1, LOG2_VL),
4967			v_s2, v_s2_b, v_s2_c, v_s2_d);
4968			}
4969
4970
4971	0	0	if (n >= 2*VL) {
4972	0		const vec_t data_a = VLOADU(p + 0*VL);
4973	0		const vec_t data_b = VLOADU(p + 1*VL);
4974
4975	0		v_s2 = VADD32(v_s2, VSLL32(v_s1, LOG2_VL + 1));
4976	0		v_s1 = VDPBUSD(v_s1, data_a, ones);
4977	0		v_s1 = VDPBUSD(v_s1, data_b, ones);
4978	0		v_s2 = VDPBUSD(v_s2, data_a, VSET1_8(VL));
4979	0		v_s2 = VDPBUSD(v_s2, data_a, mults);
4980	0		v_s2 = VDPBUSD(v_s2, data_b, mults);
4981	0		p += 2*VL;
4982	0		n -= 2*VL;
4983			}
4984	0	0	if (n) {
4985
4986			vec_t data;
4987
4988	0		v_s2 = VADD32(v_s2, VMULLO32(v_s1, VSET1_32(n)));
4989
4990	0		mults = VADD8(mults, VSET1_8((int)n - VL));
4991	0	0	if (n > VL) {
4992	0		data = VLOADU(p);
4993	0		v_s1 = VDPBUSD(v_s1, data, ones);
4994	0		v_s2 = VDPBUSD(v_s2, data, mults);
4995	0		p += VL;
4996	0		n -= VL;
4997	0		mults = VADD8(mults, VSET1_8(-VL));
4998			}
4999
5000			#if USE_AVX512
5001	0		data = VMASKZ_LOADU((mask_t)-1 >> (VL - n), p);
5002			#else
5003			data = zeroes;
5004			memcpy(&data, p, n);
5005			#endif
5006	0		v_s1 = VDPBUSD(v_s1, data, ones);
5007	0		v_s2 = VDPBUSD(v_s2, data, mults);
5008	0		p += n;
5009			}
5010
5011			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
5012	0		s1 %= DIVISOR;
5013	0		s2 %= DIVISOR;
5014			}
5015			#else
5016
5017			while (len) {
5018
5019			size_t n = MIN(len, MIN(2 * VL * (INT16_MAX / UINT8_MAX),
5020			MAX_CHUNK_LEN) & ~(2*VL - 1));
5021			len -= n;
5022
5023			if (n >= 2*VL) {
5024			vec_t v_s1 = zeroes;
5025			vec_t v_s1_sums = zeroes;
5026			vec_t v_byte_sums_a = zeroes;
5027			vec_t v_byte_sums_b = zeroes;
5028			vec_t v_byte_sums_c = zeroes;
5029			vec_t v_byte_sums_d = zeroes;
5030			vec_t v_s2;
5031
5032			s2 += s1 * (n & ~(2*VL - 1));
5033
5034			do {
5035			vec_t data_a = VLOADU(p + 0*VL);
5036			vec_t data_b = VLOADU(p + 1*VL);
5037
5038			v_s1_sums = VADD32(v_s1_sums, v_s1);
5039			v_byte_sums_a = VADD16(v_byte_sums_a,
5040			VUNPACKLO8(data_a, zeroes));
5041			v_byte_sums_b = VADD16(v_byte_sums_b,
5042			VUNPACKHI8(data_a, zeroes));
5043			v_byte_sums_c = VADD16(v_byte_sums_c,
5044			VUNPACKLO8(data_b, zeroes));
5045			v_byte_sums_d = VADD16(v_byte_sums_d,
5046			VUNPACKHI8(data_b, zeroes));
5047			v_s1 = VADD32(v_s1,
5048			VADD32(VSAD8(data_a, zeroes),
5049			VSAD8(data_b, zeroes)));
5050
5051			#if GCC_PREREQ(1, 0)
5052			__asm__("" : "+x" (v_s1), "+x" (v_s1_sums),
5053			"+x" (v_byte_sums_a),
5054			"+x" (v_byte_sums_b),
5055			"+x" (v_byte_sums_c),
5056			"+x" (v_byte_sums_d));
5057			#endif
5058			p += 2*VL;
5059			n -= 2*VL;
5060			} while (n >= 2*VL);
5061
5062
5063			v_s2 = VADD32_5X(VSLL32(v_s1_sums, LOG2_VL + 1),
5064			VMADD16(v_byte_sums_a, mults_a),
5065			VMADD16(v_byte_sums_b, mults_b),
5066			VMADD16(v_byte_sums_c, mults_c),
5067			VMADD16(v_byte_sums_d, mults_d));
5068			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
5069			}
5070
5071			ADLER32_CHUNK(s1, s2, p, n);
5072			}
5073			#endif
5074	0		return (s2 << 16) \| s1;
5075			}
5076
5077			#undef vec_t
5078			#undef mask_t
5079			#undef LOG2_VL
5080			#undef VADD8
5081			#undef VADD16
5082			#undef VADD32
5083			#undef VDPBUSD
5084			#undef VLOAD
5085			#undef VLOADU
5086			#undef VMADD16
5087			#undef VMASKZ_LOADU
5088			#undef VMULLO32
5089			#undef VSAD8
5090			#undef VSET1_8
5091			#undef VSET1_32
5092			#undef VSETZERO
5093			#undef VSLL32
5094			#undef VUNPACKLO8
5095			#undef VUNPACKHI8
5096
5097			#undef SUFFIX
5098			#undef ATTRIBUTES
5099			#undef VL
5100			#undef USE_VNNI
5101			#undef USE_AVX512
5102
5103
5104
5105			# define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni
5106			# define SUFFIX _avx512_vl512_vnni
5107			# define ATTRIBUTES _target_attribute("avx512bw,avx512vnni")
5108			# define VL 64
5109			# define USE_VNNI 1
5110			# define USE_AVX512 1
5111			/* #include "x86-adler32_template.h" */
5112
5113
5114
5115
5116			#if VL == 16
5117			# define vec_t __m128i
5118			# define mask_t u16
5119			# define LOG2_VL 4
5120			# define VADD8(a, b) _mm_add_epi8((a), (b))
5121			# define VADD16(a, b) _mm_add_epi16((a), (b))
5122			# define VADD32(a, b) _mm_add_epi32((a), (b))
5123			# if USE_AVX512
5124			# define VDPBUSD(a, b, c) _mm_dpbusd_epi32((a), (b), (c))
5125			# else
5126			# define VDPBUSD(a, b, c) _mm_dpbusd_avx_epi32((a), (b), (c))
5127			# endif
5128			# define VLOAD(p) _mm_load_si128((const void *)(p))
5129			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
5130			# define VMADD16(a, b) _mm_madd_epi16((a), (b))
5131			# define VMASKZ_LOADU(mask, p) _mm_maskz_loadu_epi8((mask), (p))
5132			# define VMULLO32(a, b) _mm_mullo_epi32((a), (b))
5133			# define VSAD8(a, b) _mm_sad_epu8((a), (b))
5134			# define VSET1_8(a) _mm_set1_epi8(a)
5135			# define VSET1_32(a) _mm_set1_epi32(a)
5136			# define VSETZERO() _mm_setzero_si128()
5137			# define VSLL32(a, b) _mm_slli_epi32((a), (b))
5138			# define VUNPACKLO8(a, b) _mm_unpacklo_epi8((a), (b))
5139			# define VUNPACKHI8(a, b) _mm_unpackhi_epi8((a), (b))
5140			#elif VL == 32
5141			# define vec_t __m256i
5142			# define mask_t u32
5143			# define LOG2_VL 5
5144			# define VADD8(a, b) _mm256_add_epi8((a), (b))
5145			# define VADD16(a, b) _mm256_add_epi16((a), (b))
5146			# define VADD32(a, b) _mm256_add_epi32((a), (b))
5147			# if USE_AVX512
5148			# define VDPBUSD(a, b, c) _mm256_dpbusd_epi32((a), (b), (c))
5149			# else
5150			# define VDPBUSD(a, b, c) _mm256_dpbusd_avx_epi32((a), (b), (c))
5151			# endif
5152			# define VLOAD(p) _mm256_load_si256((const void *)(p))
5153			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
5154			# define VMADD16(a, b) _mm256_madd_epi16((a), (b))
5155			# define VMASKZ_LOADU(mask, p) _mm256_maskz_loadu_epi8((mask), (p))
5156			# define VMULLO32(a, b) _mm256_mullo_epi32((a), (b))
5157			# define VSAD8(a, b) _mm256_sad_epu8((a), (b))
5158			# define VSET1_8(a) _mm256_set1_epi8(a)
5159			# define VSET1_32(a) _mm256_set1_epi32(a)
5160			# define VSETZERO() _mm256_setzero_si256()
5161			# define VSLL32(a, b) _mm256_slli_epi32((a), (b))
5162			# define VUNPACKLO8(a, b) _mm256_unpacklo_epi8((a), (b))
5163			# define VUNPACKHI8(a, b) _mm256_unpackhi_epi8((a), (b))
5164			#elif VL == 64
5165			# define vec_t __m512i
5166			# define mask_t u64
5167			# define LOG2_VL 6
5168			# define VADD8(a, b) _mm512_add_epi8((a), (b))
5169			# define VADD16(a, b) _mm512_add_epi16((a), (b))
5170			# define VADD32(a, b) _mm512_add_epi32((a), (b))
5171			# define VDPBUSD(a, b, c) _mm512_dpbusd_epi32((a), (b), (c))
5172			# define VLOAD(p) _mm512_load_si512((const void *)(p))
5173			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
5174			# define VMADD16(a, b) _mm512_madd_epi16((a), (b))
5175			# define VMASKZ_LOADU(mask, p) _mm512_maskz_loadu_epi8((mask), (p))
5176			# define VMULLO32(a, b) _mm512_mullo_epi32((a), (b))
5177			# define VSAD8(a, b) _mm512_sad_epu8((a), (b))
5178			# define VSET1_8(a) _mm512_set1_epi8(a)
5179			# define VSET1_32(a) _mm512_set1_epi32(a)
5180			# define VSETZERO() _mm512_setzero_si512()
5181			# define VSLL32(a, b) _mm512_slli_epi32((a), (b))
5182			# define VUNPACKLO8(a, b) _mm512_unpacklo_epi8((a), (b))
5183			# define VUNPACKHI8(a, b) _mm512_unpackhi_epi8((a), (b))
5184			#else
5185			# error "unsupported vector length"
5186			#endif
5187
5188			#define VADD32_3X(a, b, c) VADD32(VADD32((a), (b)), (c))
5189			#define VADD32_4X(a, b, c, d) VADD32(VADD32((a), (b)), VADD32((c), (d)))
5190			#define VADD32_5X(a, b, c, d, e) VADD32((a), VADD32_4X((b), (c), (d), (e)))
5191			#define VADD32_7X(a, b, c, d, e, f, g) \
5192			VADD32(VADD32_3X((a), (b), (c)), VADD32_4X((d), (e), (f), (g)))
5193
5194
5195			#undef reduce_to_32bits
5196			static forceinline ATTRIBUTES void
5197			ADD_SUFFIX(reduce_to_32bits)(vec_t v_s1, vec_t v_s2, u32 s1_p, u32 s2_p)
5198			{
5199			__m128i v_s1_128, v_s2_128;
5200			#if VL == 16
5201			{
5202			v_s1_128 = v_s1;
5203			v_s2_128 = v_s2;
5204			}
5205			#else
5206			{
5207			__m256i v_s1_256, v_s2_256;
5208			#if VL == 32
5209			v_s1_256 = v_s1;
5210			v_s2_256 = v_s2;
5211			#else
5212
5213	0		v_s1_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s1, 0),
5214	0		_mm512_extracti64x4_epi64(v_s1, 1));
5215	0		v_s2_256 = _mm256_add_epi32(_mm512_extracti64x4_epi64(v_s2, 0),
5216	0		_mm512_extracti64x4_epi64(v_s2, 1));
5217			#endif
5218
5219	0		v_s1_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s1_256, 0),
5220	0		_mm256_extracti128_si256(v_s1_256, 1));
5221	0		v_s2_128 = _mm_add_epi32(_mm256_extracti128_si256(v_s2_256, 0),
5222	0		_mm256_extracti128_si256(v_s2_256, 1));
5223			}
5224			#endif
5225
5226
5227			#if USE_VNNI
5228	0		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x31));
5229			#endif
5230	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x31));
5231	0		v_s1_128 = _mm_add_epi32(v_s1_128, _mm_shuffle_epi32(v_s1_128, 0x02));
5232	0		v_s2_128 = _mm_add_epi32(v_s2_128, _mm_shuffle_epi32(v_s2_128, 0x02));
5233
5234	0		*s1_p += (u32)_mm_cvtsi128_si32(v_s1_128);
5235	0		*s2_p += (u32)_mm_cvtsi128_si32(v_s2_128);
5236	0		}
5237			#define reduce_to_32bits ADD_SUFFIX(reduce_to_32bits)
5238
5239			static ATTRIBUTES u32
5240	0		ADD_SUFFIX(adler32_x86)(u32 adler, const u8 *p, size_t len)
5241			{
5242			#if USE_VNNI
5243
5244			static const u8 _aligned_attribute(VL) raw_mults[VL] = {
5245			#if VL == 64
5246			64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49,
5247			48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
5248			#endif
5249			#if VL >= 32
5250			32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
5251			#endif
5252			16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
5253			};
5254	0		const vec_t ones = VSET1_8(1);
5255			#else
5256
5257			static const u16 _aligned_attribute(VL) raw_mults[4][VL / 2] = {
5258			#if VL == 16
5259			{ 32, 31, 30, 29, 28, 27, 26, 25 },
5260			{ 24, 23, 22, 21, 20, 19, 18, 17 },
5261			{ 16, 15, 14, 13, 12, 11, 10, 9 },
5262			{ 8, 7, 6, 5, 4, 3, 2, 1 },
5263			#elif VL == 32
5264			{ 64, 63, 62, 61, 60, 59, 58, 57, 48, 47, 46, 45, 44, 43, 42, 41 },
5265			{ 56, 55, 54, 53, 52, 51, 50, 49, 40, 39, 38, 37, 36, 35, 34, 33 },
5266			{ 32, 31, 30, 29, 28, 27, 26, 25, 16, 15, 14, 13, 12, 11, 10, 9 },
5267			{ 24, 23, 22, 21, 20, 19, 18, 17, 8, 7, 6, 5, 4, 3, 2, 1 },
5268			#else
5269			# error "unsupported parameters"
5270			#endif
5271			};
5272			const vec_t mults_a = VLOAD(raw_mults[0]);
5273			const vec_t mults_b = VLOAD(raw_mults[1]);
5274			const vec_t mults_c = VLOAD(raw_mults[2]);
5275			const vec_t mults_d = VLOAD(raw_mults[3]);
5276			#endif
5277	0		const vec_t zeroes = VSETZERO();
5278	0		u32 s1 = adler & 0xFFFF;
5279	0		u32 s2 = adler >> 16;
5280
5281
5282	0	0	if (unlikely(len > 65536 && ((uintptr_t)p & (VL-1)))) {
		0
5283			do {
5284	0		s1 += *p++;
5285	0		s2 += s1;
5286	0		len--;
5287	0	0	} while ((uintptr_t)p & (VL-1));
5288	0		s1 %= DIVISOR;
5289	0		s2 %= DIVISOR;
5290			}
5291
5292			#if USE_VNNI
5293
5294	0	0	while (len) {
5295
5296	0		size_t n = MIN(len, MAX_CHUNK_LEN & ~(4*VL - 1));
5297	0		vec_t mults = VLOAD(raw_mults);
5298	0		vec_t v_s1 = zeroes;
5299	0		vec_t v_s2 = zeroes;
5300
5301	0		s2 += s1 * n;
5302	0		len -= n;
5303
5304	0	0	if (n >= 4*VL) {
5305	0		vec_t v_s1_b = zeroes;
5306	0		vec_t v_s1_c = zeroes;
5307	0		vec_t v_s1_d = zeroes;
5308	0		vec_t v_s2_b = zeroes;
5309	0		vec_t v_s2_c = zeroes;
5310	0		vec_t v_s2_d = zeroes;
5311	0		vec_t v_s1_sums = zeroes;
5312	0		vec_t v_s1_sums_b = zeroes;
5313	0		vec_t v_s1_sums_c = zeroes;
5314	0		vec_t v_s1_sums_d = zeroes;
5315			vec_t tmp0, tmp1;
5316
5317			do {
5318	0		vec_t data_a = VLOADU(p + 0*VL);
5319	0		vec_t data_b = VLOADU(p + 1*VL);
5320	0		vec_t data_c = VLOADU(p + 2*VL);
5321	0		vec_t data_d = VLOADU(p + 3*VL);
5322
5323
5324			#if GCC_PREREQ(1, 0)
5325	0		__asm__("" : "+v" (data_a), "+v" (data_b),
5326			"+v" (data_c), "+v" (data_d));
5327			#endif
5328
5329	0		v_s2 = VDPBUSD(v_s2, data_a, mults);
5330	0		v_s2_b = VDPBUSD(v_s2_b, data_b, mults);
5331	0		v_s2_c = VDPBUSD(v_s2_c, data_c, mults);
5332	0		v_s2_d = VDPBUSD(v_s2_d, data_d, mults);
5333
5334	0		v_s1_sums = VADD32(v_s1_sums, v_s1);
5335	0		v_s1_sums_b = VADD32(v_s1_sums_b, v_s1_b);
5336	0		v_s1_sums_c = VADD32(v_s1_sums_c, v_s1_c);
5337	0		v_s1_sums_d = VADD32(v_s1_sums_d, v_s1_d);
5338
5339	0		v_s1 = VDPBUSD(v_s1, data_a, ones);
5340	0		v_s1_b = VDPBUSD(v_s1_b, data_b, ones);
5341	0		v_s1_c = VDPBUSD(v_s1_c, data_c, ones);
5342	0		v_s1_d = VDPBUSD(v_s1_d, data_d, ones);
5343
5344
5345			#if GCC_PREREQ(1, 0) && !defined(ARCH_X86_32)
5346	0		__asm__("" : "+v" (v_s2), "+v" (v_s2_b),
5347			"+v" (v_s2_c), "+v" (v_s2_d),
5348			"+v" (v_s1_sums),
5349			"+v" (v_s1_sums_b),
5350			"+v" (v_s1_sums_c),
5351			"+v" (v_s1_sums_d),
5352			"+v" (v_s1), "+v" (v_s1_b),
5353			"+v" (v_s1_c), "+v" (v_s1_d));
5354			#endif
5355	0		p += 4*VL;
5356	0		n -= 4*VL;
5357	0	0	} while (n >= 4*VL);
5358
5359
5360	0		tmp0 = VADD32(v_s1, v_s1_b);
5361	0		tmp1 = VADD32(v_s1, v_s1_c);
5362	0		v_s1_sums = VADD32_4X(v_s1_sums, v_s1_sums_b,
5363			v_s1_sums_c, v_s1_sums_d);
5364	0		v_s1 = VADD32_3X(tmp0, v_s1_c, v_s1_d);
5365	0		v_s2 = VADD32_7X(VSLL32(v_s1_sums, LOG2_VL + 2),
5366			VSLL32(tmp0, LOG2_VL + 1),
5367			VSLL32(tmp1, LOG2_VL),
5368			v_s2, v_s2_b, v_s2_c, v_s2_d);
5369			}
5370
5371
5372	0	0	if (n >= 2*VL) {
5373	0		const vec_t data_a = VLOADU(p + 0*VL);
5374	0		const vec_t data_b = VLOADU(p + 1*VL);
5375
5376	0		v_s2 = VADD32(v_s2, VSLL32(v_s1, LOG2_VL + 1));
5377	0		v_s1 = VDPBUSD(v_s1, data_a, ones);
5378	0		v_s1 = VDPBUSD(v_s1, data_b, ones);
5379	0		v_s2 = VDPBUSD(v_s2, data_a, VSET1_8(VL));
5380	0		v_s2 = VDPBUSD(v_s2, data_a, mults);
5381	0		v_s2 = VDPBUSD(v_s2, data_b, mults);
5382	0		p += 2*VL;
5383	0		n -= 2*VL;
5384			}
5385	0	0	if (n) {
5386
5387			vec_t data;
5388
5389	0		v_s2 = VADD32(v_s2, VMULLO32(v_s1, VSET1_32(n)));
5390
5391	0		mults = VADD8(mults, VSET1_8((int)n - VL));
5392	0	0	if (n > VL) {
5393	0		data = VLOADU(p);
5394	0		v_s1 = VDPBUSD(v_s1, data, ones);
5395	0		v_s2 = VDPBUSD(v_s2, data, mults);
5396	0		p += VL;
5397	0		n -= VL;
5398	0		mults = VADD8(mults, VSET1_8(-VL));
5399			}
5400
5401			#if USE_AVX512
5402	0		data = VMASKZ_LOADU((mask_t)-1 >> (VL - n), p);
5403			#else
5404			data = zeroes;
5405			memcpy(&data, p, n);
5406			#endif
5407	0		v_s1 = VDPBUSD(v_s1, data, ones);
5408	0		v_s2 = VDPBUSD(v_s2, data, mults);
5409	0		p += n;
5410			}
5411
5412			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
5413	0		s1 %= DIVISOR;
5414	0		s2 %= DIVISOR;
5415			}
5416			#else
5417
5418			while (len) {
5419
5420			size_t n = MIN(len, MIN(2 * VL * (INT16_MAX / UINT8_MAX),
5421			MAX_CHUNK_LEN) & ~(2*VL - 1));
5422			len -= n;
5423
5424			if (n >= 2*VL) {
5425			vec_t v_s1 = zeroes;
5426			vec_t v_s1_sums = zeroes;
5427			vec_t v_byte_sums_a = zeroes;
5428			vec_t v_byte_sums_b = zeroes;
5429			vec_t v_byte_sums_c = zeroes;
5430			vec_t v_byte_sums_d = zeroes;
5431			vec_t v_s2;
5432
5433			s2 += s1 * (n & ~(2*VL - 1));
5434
5435			do {
5436			vec_t data_a = VLOADU(p + 0*VL);
5437			vec_t data_b = VLOADU(p + 1*VL);
5438
5439			v_s1_sums = VADD32(v_s1_sums, v_s1);
5440			v_byte_sums_a = VADD16(v_byte_sums_a,
5441			VUNPACKLO8(data_a, zeroes));
5442			v_byte_sums_b = VADD16(v_byte_sums_b,
5443			VUNPACKHI8(data_a, zeroes));
5444			v_byte_sums_c = VADD16(v_byte_sums_c,
5445			VUNPACKLO8(data_b, zeroes));
5446			v_byte_sums_d = VADD16(v_byte_sums_d,
5447			VUNPACKHI8(data_b, zeroes));
5448			v_s1 = VADD32(v_s1,
5449			VADD32(VSAD8(data_a, zeroes),
5450			VSAD8(data_b, zeroes)));
5451
5452			#if GCC_PREREQ(1, 0)
5453			__asm__("" : "+x" (v_s1), "+x" (v_s1_sums),
5454			"+x" (v_byte_sums_a),
5455			"+x" (v_byte_sums_b),
5456			"+x" (v_byte_sums_c),
5457			"+x" (v_byte_sums_d));
5458			#endif
5459			p += 2*VL;
5460			n -= 2*VL;
5461			} while (n >= 2*VL);
5462
5463
5464			v_s2 = VADD32_5X(VSLL32(v_s1_sums, LOG2_VL + 1),
5465			VMADD16(v_byte_sums_a, mults_a),
5466			VMADD16(v_byte_sums_b, mults_b),
5467			VMADD16(v_byte_sums_c, mults_c),
5468			VMADD16(v_byte_sums_d, mults_d));
5469			reduce_to_32bits(v_s1, v_s2, &s1, &s2);
5470			}
5471
5472			ADLER32_CHUNK(s1, s2, p, n);
5473			}
5474			#endif
5475	0		return (s2 << 16) \| s1;
5476			}
5477
5478			#undef vec_t
5479			#undef mask_t
5480			#undef LOG2_VL
5481			#undef VADD8
5482			#undef VADD16
5483			#undef VADD32
5484			#undef VDPBUSD
5485			#undef VLOAD
5486			#undef VLOADU
5487			#undef VMADD16
5488			#undef VMASKZ_LOADU
5489			#undef VMULLO32
5490			#undef VSAD8
5491			#undef VSET1_8
5492			#undef VSET1_32
5493			#undef VSETZERO
5494			#undef VSLL32
5495			#undef VUNPACKLO8
5496			#undef VUNPACKHI8
5497
5498			#undef SUFFIX
5499			#undef ATTRIBUTES
5500			#undef VL
5501			#undef USE_VNNI
5502			#undef USE_AVX512
5503
5504			#endif
5505
5506			static inline adler32_func_t
5507	1		arch_select_adler32_func(void)
5508			{
5509	1		const u32 features MAYBE_UNUSED = get_x86_cpu_features();
5510
5511			#ifdef adler32_x86_avx512_vl512_vnni
5512	1	50	if ((features & X86_CPU_FEATURE_ZMM) &&
5513	0	0	HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
		0
5514	0		return adler32_x86_avx512_vl512_vnni;
5515			#endif
5516			#ifdef adler32_x86_avx512_vl256_vnni
5517	1	50	if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
		0
5518	0	0	HAVE_AVX512VNNI(features))
5519	0		return adler32_x86_avx512_vl256_vnni;
5520			#endif
5521			#ifdef adler32_x86_avx2_vnni
5522	1	50	if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
		50
5523	0		return adler32_x86_avx2_vnni;
5524			#endif
5525			#ifdef adler32_x86_avx2
5526	1	50	if (HAVE_AVX2(features))
5527	1		return adler32_x86_avx2;
5528			#endif
5529			#ifdef adler32_x86_sse2
5530			if (HAVE_SSE2(features))
5531	0		return adler32_x86_sse2;
5532			#endif
5533			return NULL;
5534			}
5535			#define arch_select_adler32_func arch_select_adler32_func
5536
5537			#endif
5538
5539			#endif
5540
5541			#ifndef DEFAULT_IMPL
5542			# define DEFAULT_IMPL adler32_generic
5543			#endif
5544
5545			#ifdef arch_select_adler32_func
5546			static u32 adler32_dispatch_adler32(u32 adler, const u8 *p, size_t len);
5547
5548			static volatile adler32_func_t adler32_impl = adler32_dispatch_adler32;
5549
5550
5551	1		static u32 adler32_dispatch_adler32(u32 adler, const u8 *p, size_t len)
5552			{
5553	1		adler32_func_t f = arch_select_adler32_func();
5554
5555	1	50	if (f == NULL)
5556	0		f = DEFAULT_IMPL;
5557
5558	1		adler32_impl = f;
5559	1		return f(adler, p, len);
5560			}
5561			#else
5562
5563			#define adler32_impl DEFAULT_IMPL
5564			#endif
5565
5566			LIBDEFLATEAPI u32
5567	24		libdeflate_adler32(u32 adler, const void *buffer, size_t len)
5568			{
5569	24	50	if (buffer == NULL)
5570	0		return 1;
5571	24		return adler32_impl(adler, buffer, len);
5572			}
5573			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/crc32.c */
5574
5575
5576
5577
5578			/* #include "lib_common.h" */
5579
5580
5581			#ifndef LIB_LIB_COMMON_H
5582			#define LIB_LIB_COMMON_H
5583
5584			#ifdef LIBDEFLATE_H
5585
5586			# error "lib_common.h must always be included before libdeflate.h"
5587			#endif
5588
5589			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
5590			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
5591			#elif defined(__GNUC__)
5592			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
5593			#else
5594			# define LIBDEFLATE_EXPORT_SYM
5595			#endif
5596
5597
5598			#if defined(__GNUC__) && defined(__i386__)
5599			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
5600			#else
5601			# define LIBDEFLATE_ALIGN_STACK
5602			#endif
5603
5604			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
5605
5606			/* #include "../common_defs.h" */
5607
5608
5609			#ifndef COMMON_DEFS_H
5610			#define COMMON_DEFS_H
5611
5612			/* #include "libdeflate.h" */
5613
5614
5615			#ifndef LIBDEFLATE_H
5616			#define LIBDEFLATE_H
5617
5618			#include
5619			#include
5620
5621			#ifdef __cplusplus
5622			extern "C" {
5623			#endif
5624
5625			#define LIBDEFLATE_VERSION_MAJOR 1
5626			#define LIBDEFLATE_VERSION_MINOR 25
5627			#define LIBDEFLATE_VERSION_STRING "1.25"
5628
5629
5630			#ifndef LIBDEFLATEAPI
5631			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
5632			# define LIBDEFLATEAPI __declspec(dllimport)
5633			# else
5634			# define LIBDEFLATEAPI
5635			# endif
5636			#endif
5637
5638
5639
5640
5641
5642			struct libdeflate_compressor;
5643			struct libdeflate_options;
5644
5645
5646			LIBDEFLATEAPI struct libdeflate_compressor *
5647			libdeflate_alloc_compressor(int compression_level);
5648
5649
5650			LIBDEFLATEAPI struct libdeflate_compressor *
5651			libdeflate_alloc_compressor_ex(int compression_level,
5652			const struct libdeflate_options *options);
5653
5654
5655			LIBDEFLATEAPI size_t
5656			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
5657			const void *in, size_t in_nbytes,
5658			void *out, size_t out_nbytes_avail);
5659
5660
5661			LIBDEFLATEAPI size_t
5662			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
5663			size_t in_nbytes);
5664
5665
5666			LIBDEFLATEAPI size_t
5667			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
5668			const void *in, size_t in_nbytes,
5669			void *out, size_t out_nbytes_avail);
5670
5671
5672			LIBDEFLATEAPI size_t
5673			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
5674			size_t in_nbytes);
5675
5676
5677			LIBDEFLATEAPI size_t
5678			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
5679			const void *in, size_t in_nbytes,
5680			void *out, size_t out_nbytes_avail);
5681
5682
5683			LIBDEFLATEAPI size_t
5684			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
5685			size_t in_nbytes);
5686
5687
5688			LIBDEFLATEAPI void
5689			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
5690
5691
5692
5693
5694
5695			struct libdeflate_decompressor;
5696			struct libdeflate_options;
5697
5698
5699			LIBDEFLATEAPI struct libdeflate_decompressor *
5700			libdeflate_alloc_decompressor(void);
5701
5702
5703			LIBDEFLATEAPI struct libdeflate_decompressor *
5704			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
5705
5706
5707			enum libdeflate_result {
5708
5709			LIBDEFLATE_SUCCESS = 0,
5710
5711
5712			LIBDEFLATE_BAD_DATA = 1,
5713
5714
5715			LIBDEFLATE_SHORT_OUTPUT = 2,
5716
5717
5718			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
5719			};
5720
5721
5722			LIBDEFLATEAPI enum libdeflate_result
5723			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
5724			const void *in, size_t in_nbytes,
5725			void *out, size_t out_nbytes_avail,
5726			size_t *actual_out_nbytes_ret);
5727
5728
5729			LIBDEFLATEAPI enum libdeflate_result
5730			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
5731			const void *in, size_t in_nbytes,
5732			void *out, size_t out_nbytes_avail,
5733			size_t *actual_in_nbytes_ret,
5734			size_t *actual_out_nbytes_ret);
5735
5736
5737			LIBDEFLATEAPI enum libdeflate_result
5738			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
5739			const void *in, size_t in_nbytes,
5740			void *out, size_t out_nbytes_avail,
5741			size_t *actual_out_nbytes_ret);
5742
5743
5744			LIBDEFLATEAPI enum libdeflate_result
5745			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
5746			const void *in, size_t in_nbytes,
5747			void *out, size_t out_nbytes_avail,
5748			size_t *actual_in_nbytes_ret,
5749			size_t *actual_out_nbytes_ret);
5750
5751
5752			LIBDEFLATEAPI enum libdeflate_result
5753			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
5754			const void *in, size_t in_nbytes,
5755			void *out, size_t out_nbytes_avail,
5756			size_t *actual_out_nbytes_ret);
5757
5758
5759			LIBDEFLATEAPI enum libdeflate_result
5760			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
5761			const void *in, size_t in_nbytes,
5762			void *out, size_t out_nbytes_avail,
5763			size_t *actual_in_nbytes_ret,
5764			size_t *actual_out_nbytes_ret);
5765
5766
5767			LIBDEFLATEAPI void
5768			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
5769
5770
5771
5772
5773
5774
5775			LIBDEFLATEAPI uint32_t
5776			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
5777
5778
5779
5780			LIBDEFLATEAPI uint32_t
5781			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
5782
5783
5784
5785
5786
5787
5788			LIBDEFLATEAPI void
5789			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
5790			void (free_func)(void ));
5791
5792
5793			struct libdeflate_options {
5794
5795
5796			size_t sizeof_options;
5797
5798
5799			void (malloc_func)(size_t);
5800			void (free_func)(void );
5801			};
5802
5803			#ifdef __cplusplus
5804			}
5805			#endif
5806
5807			#endif
5808
5809
5810			#include
5811			#include
5812			#include
5813			#ifdef _MSC_VER
5814			# include
5815			# include
5816
5817
5818			# pragma warning(disable : 4146)
5819
5820			# pragma warning(disable : 4018)
5821			# pragma warning(disable : 4244)
5822			# pragma warning(disable : 4267)
5823			# pragma warning(disable : 4310)
5824
5825			# pragma warning(disable : 4100)
5826			# pragma warning(disable : 4127)
5827			# pragma warning(disable : 4189)
5828			# pragma warning(disable : 4232)
5829			# pragma warning(disable : 4245)
5830			# pragma warning(disable : 4295)
5831			#endif
5832			#ifndef FREESTANDING
5833			# include
5834			#endif
5835
5836
5837
5838
5839
5840
5841			#undef ARCH_X86_64
5842			#undef ARCH_X86_32
5843			#undef ARCH_ARM64
5844			#undef ARCH_ARM32
5845			#undef ARCH_RISCV
5846			#ifdef _MSC_VER
5847
5848			# if defined(_M_X64) && !defined(_M_ARM64EC)
5849			# define ARCH_X86_64
5850			# elif defined(_M_IX86)
5851			# define ARCH_X86_32
5852			# elif defined(_M_ARM64)
5853			# define ARCH_ARM64
5854			# elif defined(_M_ARM)
5855			# define ARCH_ARM32
5856			# endif
5857			#else
5858			# if defined(__x86_64__)
5859			# define ARCH_X86_64
5860			# elif defined(__i386__)
5861			# define ARCH_X86_32
5862			# elif defined(__aarch64__)
5863			# define ARCH_ARM64
5864			# elif defined(__arm__)
5865			# define ARCH_ARM32
5866			# elif defined(__riscv)
5867			# define ARCH_RISCV
5868			# endif
5869			#endif
5870
5871
5872
5873
5874
5875
5876			typedef uint8_t u8;
5877			typedef uint16_t u16;
5878			typedef uint32_t u32;
5879			typedef uint64_t u64;
5880			typedef int8_t s8;
5881			typedef int16_t s16;
5882			typedef int32_t s32;
5883			typedef int64_t s64;
5884
5885
5886			#ifdef _MSC_VER
5887			# ifdef _WIN64
5888			typedef long long ssize_t;
5889			# else
5890			typedef long ssize_t;
5891			# endif
5892			#endif
5893
5894
5895			typedef size_t machine_word_t;
5896
5897
5898			#define WORDBYTES ((int)sizeof(machine_word_t))
5899
5900
5901			#define WORDBITS (8 * WORDBYTES)
5902
5903
5904
5905
5906
5907
5908			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
5909			# define GCC_PREREQ(major, minor) \
5910			(__GNUC__ > (major) \|\| \
5911			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
5912			# if !GCC_PREREQ(4, 9)
5913			# error "gcc versions older than 4.9 are no longer supported"
5914			# endif
5915			#else
5916			# define GCC_PREREQ(major, minor) 0
5917			#endif
5918			#ifdef __clang__
5919			# ifdef __apple_build_version__
5920			# define CLANG_PREREQ(major, minor, apple_version) \
5921			(__apple_build_version__ >= (apple_version))
5922			# else
5923			# define CLANG_PREREQ(major, minor, apple_version) \
5924			(__clang_major__ > (major) \|\| \
5925			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
5926			# endif
5927			# if !CLANG_PREREQ(3, 9, 8000000)
5928			# error "clang versions older than 3.9 are no longer supported"
5929			# endif
5930			#else
5931			# define CLANG_PREREQ(major, minor, apple_version) 0
5932			#endif
5933			#ifdef _MSC_VER
5934			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
5935			# if !MSVC_PREREQ(1900)
5936			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
5937			# endif
5938			#else
5939			# define MSVC_PREREQ(version) 0
5940			#endif
5941
5942
5943			#ifndef __has_attribute
5944			# define __has_attribute(attribute) 0
5945			#endif
5946
5947
5948			#ifndef __has_builtin
5949			# define __has_builtin(builtin) 0
5950			#endif
5951
5952
5953			#ifdef _MSC_VER
5954			# define inline __inline
5955			#endif
5956
5957
5958			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
5959			# define forceinline inline __attribute__((always_inline))
5960			#elif defined(_MSC_VER)
5961			# define forceinline __forceinline
5962			#else
5963			# define forceinline inline
5964			#endif
5965
5966
5967			#if defined(__GNUC__) \|\| __has_attribute(unused)
5968			# define MAYBE_UNUSED __attribute__((unused))
5969			#else
5970			# define MAYBE_UNUSED
5971			#endif
5972
5973
5974			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
5975			# define NORETURN __attribute__((noreturn))
5976			#else
5977			# define NORETURN
5978			#endif
5979
5980
5981			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
5982			# if defined(__GNUC__) \|\| defined(__clang__)
5983			# define restrict __restrict__
5984			# else
5985			# define restrict
5986			# endif
5987			#endif
5988
5989
5990			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
5991			# define likely(expr) __builtin_expect(!!(expr), 1)
5992			#else
5993			# define likely(expr) (expr)
5994			#endif
5995
5996
5997			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
5998			# define unlikely(expr) __builtin_expect(!!(expr), 0)
5999			#else
6000			# define unlikely(expr) (expr)
6001			#endif
6002
6003
6004			#undef prefetchr
6005			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
6006			# define prefetchr(addr) __builtin_prefetch((addr), 0)
6007			#elif defined(_MSC_VER)
6008			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
6009			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
6010			# elif defined(ARCH_ARM64)
6011			# define prefetchr(addr) __prefetch2((addr), 0x00 )
6012			# elif defined(ARCH_ARM32)
6013			# define prefetchr(addr) __prefetch(addr)
6014			# endif
6015			#endif
6016			#ifndef prefetchr
6017			# define prefetchr(addr)
6018			#endif
6019
6020
6021			#undef prefetchw
6022			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
6023			# define prefetchw(addr) __builtin_prefetch((addr), 1)
6024			#elif defined(_MSC_VER)
6025			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
6026			# define prefetchw(addr) _m_prefetchw(addr)
6027			# elif defined(ARCH_ARM64)
6028			# define prefetchw(addr) __prefetch2((addr), 0x10 )
6029			# elif defined(ARCH_ARM32)
6030			# define prefetchw(addr) __prefetchw(addr)
6031			# endif
6032			#endif
6033			#ifndef prefetchw
6034			# define prefetchw(addr)
6035			#endif
6036
6037
6038			#undef _aligned_attribute
6039			#if defined(__GNUC__) \|\| __has_attribute(aligned)
6040			# define _aligned_attribute(n) __attribute__((aligned(n)))
6041			#elif defined(_MSC_VER)
6042			# define _aligned_attribute(n) __declspec(align(n))
6043			#endif
6044
6045
6046			#if defined(__GNUC__) \|\| __has_attribute(target)
6047			# define _target_attribute(attrs) __attribute__((target(attrs)))
6048			#else
6049			# define _target_attribute(attrs)
6050			#endif
6051
6052
6053
6054
6055
6056			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
6057			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
6058			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
6059			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
6060			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
6061			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
6062			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
6063
6064
6065
6066
6067
6068
6069			#if defined(__BYTE_ORDER__)
6070			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
6071			#elif defined(_MSC_VER)
6072			# define CPU_IS_LITTLE_ENDIAN() true
6073			#else
6074			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
6075			{
6076			union {
6077			u32 w;
6078			u8 b;
6079			} u;
6080
6081			u.w = 1;
6082			return u.b;
6083			}
6084			#endif
6085
6086
6087			static forceinline u16 bswap16(u16 v)
6088			{
6089			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
6090			return __builtin_bswap16(v);
6091			#elif defined(_MSC_VER)
6092			return _byteswap_ushort(v);
6093			#else
6094			return (v << 8) \| (v >> 8);
6095			#endif
6096			}
6097
6098
6099			static forceinline u32 bswap32(u32 v)
6100			{
6101			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
6102			return __builtin_bswap32(v);
6103			#elif defined(_MSC_VER)
6104			return _byteswap_ulong(v);
6105			#else
6106			return ((v & 0x000000FF) << 24) \|
6107			((v & 0x0000FF00) << 8) \|
6108			((v & 0x00FF0000) >> 8) \|
6109			((v & 0xFF000000) >> 24);
6110			#endif
6111			}
6112
6113
6114			static forceinline u64 bswap64(u64 v)
6115			{
6116			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
6117			return __builtin_bswap64(v);
6118			#elif defined(_MSC_VER)
6119			return _byteswap_uint64(v);
6120			#else
6121			return ((v & 0x00000000000000FF) << 56) \|
6122			((v & 0x000000000000FF00) << 40) \|
6123			((v & 0x0000000000FF0000) << 24) \|
6124			((v & 0x00000000FF000000) << 8) \|
6125			((v & 0x000000FF00000000) >> 8) \|
6126			((v & 0x0000FF0000000000) >> 24) \|
6127			((v & 0x00FF000000000000) >> 40) \|
6128			((v & 0xFF00000000000000) >> 56);
6129			#endif
6130			}
6131
6132			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
6133			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
6134			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
6135			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
6136			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
6137			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
6138
6139
6140
6141
6142
6143
6144			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
6145			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
6146			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
6147			defined(__riscv_misaligned_fast) \|\| \
6148			defined(__wasm__))
6149			# define UNALIGNED_ACCESS_IS_FAST 1
6150			#elif defined(_MSC_VER)
6151			# define UNALIGNED_ACCESS_IS_FAST 1
6152			#else
6153			# define UNALIGNED_ACCESS_IS_FAST 0
6154			#endif
6155
6156
6157
6158			#ifdef FREESTANDING
6159			# define MEMCOPY __builtin_memcpy
6160			#else
6161			# define MEMCOPY memcpy
6162			#endif
6163
6164
6165
6166			#define DEFINE_UNALIGNED_TYPE(type) \
6167			static forceinline type \
6168			load_##type##_unaligned(const void *p) \
6169			{ \
6170			type v; \
6171			\
6172			MEMCOPY(&v, p, sizeof(v)); \
6173			return v; \
6174			} \
6175			\
6176			static forceinline void \
6177			store_##type##_unaligned(type v, void *p) \
6178			{ \
6179			MEMCOPY(p, &v, sizeof(v)); \
6180			}
6181
6182			DEFINE_UNALIGNED_TYPE(u16)
6183			DEFINE_UNALIGNED_TYPE(u32)
6184			DEFINE_UNALIGNED_TYPE(u64)
6185			DEFINE_UNALIGNED_TYPE(machine_word_t)
6186
6187			#undef MEMCOPY
6188
6189			#define load_word_unaligned load_machine_word_t_unaligned
6190			#define store_word_unaligned store_machine_word_t_unaligned
6191
6192
6193
6194			static forceinline u16
6195			get_unaligned_le16(const u8 *p)
6196			{
6197			if (UNALIGNED_ACCESS_IS_FAST)
6198			return le16_bswap(load_u16_unaligned(p));
6199			else
6200			return ((u16)p[1] << 8) \| p[0];
6201			}
6202
6203			static forceinline u16
6204			get_unaligned_be16(const u8 *p)
6205			{
6206			if (UNALIGNED_ACCESS_IS_FAST)
6207			return be16_bswap(load_u16_unaligned(p));
6208			else
6209			return ((u16)p[0] << 8) \| p[1];
6210			}
6211
6212			static forceinline u32
6213			get_unaligned_le32(const u8 *p)
6214			{
6215			if (UNALIGNED_ACCESS_IS_FAST)
6216			return le32_bswap(load_u32_unaligned(p));
6217			else
6218			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
6219			((u32)p[1] << 8) \| p[0];
6220			}
6221
6222			static forceinline u32
6223			get_unaligned_be32(const u8 *p)
6224			{
6225			if (UNALIGNED_ACCESS_IS_FAST)
6226			return be32_bswap(load_u32_unaligned(p));
6227			else
6228			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
6229			((u32)p[2] << 8) \| p[3];
6230			}
6231
6232			static forceinline u64
6233			get_unaligned_le64(const u8 *p)
6234			{
6235			if (UNALIGNED_ACCESS_IS_FAST)
6236			return le64_bswap(load_u64_unaligned(p));
6237			else
6238			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
6239			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
6240			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
6241			((u64)p[1] << 8) \| p[0];
6242			}
6243
6244			static forceinline machine_word_t
6245			get_unaligned_leword(const u8 *p)
6246			{
6247			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
6248			if (WORDBITS == 32)
6249			return get_unaligned_le32(p);
6250			else
6251			return get_unaligned_le64(p);
6252			}
6253
6254
6255
6256			static forceinline void
6257			put_unaligned_le16(u16 v, u8 *p)
6258			{
6259			if (UNALIGNED_ACCESS_IS_FAST) {
6260			store_u16_unaligned(le16_bswap(v), p);
6261			} else {
6262			p[0] = (u8)(v >> 0);
6263			p[1] = (u8)(v >> 8);
6264			}
6265			}
6266
6267			static forceinline void
6268			put_unaligned_be16(u16 v, u8 *p)
6269			{
6270			if (UNALIGNED_ACCESS_IS_FAST) {
6271			store_u16_unaligned(be16_bswap(v), p);
6272			} else {
6273			p[0] = (u8)(v >> 8);
6274			p[1] = (u8)(v >> 0);
6275			}
6276			}
6277
6278			static forceinline void
6279			put_unaligned_le32(u32 v, u8 *p)
6280			{
6281			if (UNALIGNED_ACCESS_IS_FAST) {
6282			store_u32_unaligned(le32_bswap(v), p);
6283			} else {
6284			p[0] = (u8)(v >> 0);
6285			p[1] = (u8)(v >> 8);
6286			p[2] = (u8)(v >> 16);
6287			p[3] = (u8)(v >> 24);
6288			}
6289			}
6290
6291			static forceinline void
6292			put_unaligned_be32(u32 v, u8 *p)
6293			{
6294			if (UNALIGNED_ACCESS_IS_FAST) {
6295			store_u32_unaligned(be32_bswap(v), p);
6296			} else {
6297			p[0] = (u8)(v >> 24);
6298			p[1] = (u8)(v >> 16);
6299			p[2] = (u8)(v >> 8);
6300			p[3] = (u8)(v >> 0);
6301			}
6302			}
6303
6304			static forceinline void
6305			put_unaligned_le64(u64 v, u8 *p)
6306			{
6307			if (UNALIGNED_ACCESS_IS_FAST) {
6308			store_u64_unaligned(le64_bswap(v), p);
6309			} else {
6310			p[0] = (u8)(v >> 0);
6311			p[1] = (u8)(v >> 8);
6312			p[2] = (u8)(v >> 16);
6313			p[3] = (u8)(v >> 24);
6314			p[4] = (u8)(v >> 32);
6315			p[5] = (u8)(v >> 40);
6316			p[6] = (u8)(v >> 48);
6317			p[7] = (u8)(v >> 56);
6318			}
6319			}
6320
6321			static forceinline void
6322			put_unaligned_leword(machine_word_t v, u8 *p)
6323			{
6324			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
6325			if (WORDBITS == 32)
6326			put_unaligned_le32(v, p);
6327			else
6328			put_unaligned_le64(v, p);
6329			}
6330
6331
6332
6333
6334
6335
6336
6337			static forceinline unsigned
6338			bsr32(u32 v)
6339			{
6340			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
6341			return 31 - __builtin_clz(v);
6342			#elif defined(_MSC_VER)
6343			unsigned long i;
6344
6345			_BitScanReverse(&i, v);
6346			return i;
6347			#else
6348			unsigned i = 0;
6349
6350			while ((v >>= 1) != 0)
6351			i++;
6352			return i;
6353			#endif
6354			}
6355
6356			static forceinline unsigned
6357			bsr64(u64 v)
6358			{
6359			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
6360			return 63 - __builtin_clzll(v);
6361			#elif defined(_MSC_VER) && defined(_WIN64)
6362			unsigned long i;
6363
6364			_BitScanReverse64(&i, v);
6365			return i;
6366			#else
6367			unsigned i = 0;
6368
6369			while ((v >>= 1) != 0)
6370			i++;
6371			return i;
6372			#endif
6373			}
6374
6375			static forceinline unsigned
6376			bsrw(machine_word_t v)
6377			{
6378			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
6379			if (WORDBITS == 32)
6380			return bsr32(v);
6381			else
6382			return bsr64(v);
6383			}
6384
6385
6386
6387			static forceinline unsigned
6388			bsf32(u32 v)
6389			{
6390			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
6391			return __builtin_ctz(v);
6392			#elif defined(_MSC_VER)
6393			unsigned long i;
6394
6395			_BitScanForward(&i, v);
6396			return i;
6397			#else
6398			unsigned i = 0;
6399
6400			for (; (v & 1) == 0; v >>= 1)
6401			i++;
6402			return i;
6403			#endif
6404			}
6405
6406			static forceinline unsigned
6407			bsf64(u64 v)
6408			{
6409			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
6410			return __builtin_ctzll(v);
6411			#elif defined(_MSC_VER) && defined(_WIN64)
6412			unsigned long i;
6413
6414			_BitScanForward64(&i, v);
6415			return i;
6416			#else
6417			unsigned i = 0;
6418
6419			for (; (v & 1) == 0; v >>= 1)
6420			i++;
6421			return i;
6422			#endif
6423			}
6424
6425			static forceinline unsigned
6426			bsfw(machine_word_t v)
6427			{
6428			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
6429			if (WORDBITS == 32)
6430			return bsf32(v);
6431			else
6432			return bsf64(v);
6433			}
6434
6435
6436			#undef rbit32
6437			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
6438			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
6439			static forceinline u32
6440			rbit32(u32 v)
6441			{
6442			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
6443			return v;
6444			}
6445			#define rbit32 rbit32
6446			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
6447			static forceinline u32
6448			rbit32(u32 v)
6449			{
6450			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
6451			return v;
6452			}
6453			#define rbit32 rbit32
6454			#endif
6455
6456			#endif
6457
6458
6459			typedef void (malloc_func_t)(size_t);
6460			typedef void (free_func_t)(void );
6461
6462			extern malloc_func_t libdeflate_default_malloc_func;
6463			extern free_func_t libdeflate_default_free_func;
6464
6465			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
6466			size_t alignment, size_t size);
6467			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
6468
6469			#ifdef FREESTANDING
6470
6471			void memset(void s, int c, size_t n);
6472			#define memset(s, c, n) __builtin_memset((s), (c), (n))
6473
6474			void memcpy(void dest, const void *src, size_t n);
6475			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
6476
6477			void memmove(void dest, const void *src, size_t n);
6478			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
6479
6480			int memcmp(const void s1, const void s2, size_t n);
6481			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
6482
6483			#undef LIBDEFLATE_ENABLE_ASSERTIONS
6484			#else
6485			# include
6486
6487			# ifdef __clang_analyzer__
6488			# define LIBDEFLATE_ENABLE_ASSERTIONS
6489			# endif
6490			#endif
6491
6492
6493			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
6494			NORETURN void
6495			libdeflate_assertion_failed(const char expr, const char file, int line);
6496			#define ASSERT(expr) { if (unlikely(!(expr))) \
6497			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
6498			#else
6499			#define ASSERT(expr) (void)(expr)
6500			#endif
6501
6502			#define CONCAT_IMPL(a, b) a##b
6503			#define CONCAT(a, b) CONCAT_IMPL(a, b)
6504			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
6505
6506			#endif
6507
6508			/* #include "crc32_multipliers.h" */
6509
6510
6511			#define CRC32_X159_MODG 0xae689191
6512			#define CRC32_X95_MODG 0xccaa009e
6513
6514			#define CRC32_X287_MODG 0xf1da05aa
6515			#define CRC32_X223_MODG 0x81256527
6516
6517			#define CRC32_X415_MODG 0x3db1ecdc
6518			#define CRC32_X351_MODG 0xaf449247
6519
6520			#define CRC32_X543_MODG 0x8f352d95
6521			#define CRC32_X479_MODG 0x1d9513d7
6522
6523			#define CRC32_X671_MODG 0x1c279815
6524			#define CRC32_X607_MODG 0xae0b5394
6525
6526			#define CRC32_X799_MODG 0xdf068dc2
6527			#define CRC32_X735_MODG 0x57c54819
6528
6529			#define CRC32_X927_MODG 0x31f8303f
6530			#define CRC32_X863_MODG 0x0cbec0ed
6531
6532			#define CRC32_X1055_MODG 0x33fff533
6533			#define CRC32_X991_MODG 0x910eeec1
6534
6535			#define CRC32_X1183_MODG 0x26b70c3d
6536			#define CRC32_X1119_MODG 0x3f41287a
6537
6538			#define CRC32_X1311_MODG 0xe3543be0
6539			#define CRC32_X1247_MODG 0x9026d5b1
6540
6541			#define CRC32_X1439_MODG 0x5a1bb05d
6542			#define CRC32_X1375_MODG 0xd1df2327
6543
6544			#define CRC32_X1567_MODG 0x596c8d81
6545			#define CRC32_X1503_MODG 0xf5e48c85
6546
6547			#define CRC32_X1695_MODG 0x682bdd4f
6548			#define CRC32_X1631_MODG 0x3c656ced
6549
6550			#define CRC32_X1823_MODG 0x4a28bd43
6551			#define CRC32_X1759_MODG 0xfe807bbd
6552
6553			#define CRC32_X1951_MODG 0x0077f00d
6554			#define CRC32_X1887_MODG 0x1f0c2cdd
6555
6556			#define CRC32_X2079_MODG 0xce3371cb
6557			#define CRC32_X2015_MODG 0xe95c1271
6558
6559			#define CRC32_X2207_MODG 0xa749e894
6560			#define CRC32_X2143_MODG 0xb918a347
6561
6562			#define CRC32_X2335_MODG 0x2c538639
6563			#define CRC32_X2271_MODG 0x71d54a59
6564
6565			#define CRC32_X2463_MODG 0x32b0733c
6566			#define CRC32_X2399_MODG 0xff6f2fc2
6567
6568			#define CRC32_X2591_MODG 0x0e9bd5cc
6569			#define CRC32_X2527_MODG 0xcec97417
6570
6571			#define CRC32_X2719_MODG 0x76278617
6572			#define CRC32_X2655_MODG 0x1c63267b
6573
6574			#define CRC32_X2847_MODG 0xc51b93e3
6575			#define CRC32_X2783_MODG 0xf183c71b
6576
6577			#define CRC32_X2975_MODG 0x7eaed122
6578			#define CRC32_X2911_MODG 0x9b9bdbd0
6579
6580			#define CRC32_X3103_MODG 0x2ce423f1
6581			#define CRC32_X3039_MODG 0xd31343ea
6582
6583			#define CRC32_X3231_MODG 0x8b8d8645
6584			#define CRC32_X3167_MODG 0x4470ac44
6585
6586			#define CRC32_X3359_MODG 0x4b700aa8
6587			#define CRC32_X3295_MODG 0xeea395c4
6588
6589			#define CRC32_X3487_MODG 0xeff5e99d
6590			#define CRC32_X3423_MODG 0xf9d9c7ee
6591
6592			#define CRC32_X3615_MODG 0xad0d2bb2
6593			#define CRC32_X3551_MODG 0xcd669a40
6594
6595			#define CRC32_X3743_MODG 0x9fb66bd3
6596			#define CRC32_X3679_MODG 0x6d40f445
6597
6598			#define CRC32_X3871_MODG 0xc2dcc467
6599			#define CRC32_X3807_MODG 0x9ee62949
6600
6601			#define CRC32_X3999_MODG 0x398e2ff2
6602			#define CRC32_X3935_MODG 0x145575d5
6603
6604			#define CRC32_X4127_MODG 0x1072db28
6605			#define CRC32_X4063_MODG 0x0c30f51d
6606
6607			#define CRC32_BARRETT_CONSTANT_1 0xb4e5b025f7011641ULL
6608			#define CRC32_BARRETT_CONSTANT_2 0x00000001db710641ULL
6609
6610			#define CRC32_NUM_CHUNKS 4
6611			#define CRC32_MIN_VARIABLE_CHUNK_LEN 128UL
6612			#define CRC32_MAX_VARIABLE_CHUNK_LEN 16384UL
6613
6614
6615			static const u32 crc32_mults_for_chunklen[][CRC32_NUM_CHUNKS - 1] MAYBE_UNUSED = {
6616			{ 0 },
6617
6618			{ 0xd31343ea , 0xe95c1271 , 0x910eeec1 , },
6619
6620			{ 0x1d6708a0 , 0x0c30f51d , 0xe95c1271 , },
6621
6622			{ 0xdb3839f3 , 0x1d6708a0 , 0xd31343ea , },
6623
6624			{ 0x1753ab84 , 0xbbf2f6d6 , 0x0c30f51d , },
6625
6626			{ 0x3796455c , 0xb8e0e4a8 , 0xc352f6de , },
6627
6628			{ 0x3954de39 , 0x1753ab84 , 0x1d6708a0 , },
6629
6630			{ 0x632d78c5 , 0x3fc33de4 , 0x9a1b53c8 , },
6631
6632			{ 0xa0decef3 , 0x7b4aa8b7 , 0xbbf2f6d6 , },
6633
6634			{ 0xe9c09bb0 , 0x3954de39 , 0xdb3839f3 , },
6635
6636			{ 0xd51917a4 , 0xcae68461 , 0xb8e0e4a8 , },
6637
6638			{ 0x154a8a62 , 0x41e7589c , 0x3e9a43cd , },
6639
6640			{ 0xf196555d , 0xa0decef3 , 0x1753ab84 , },
6641
6642			{ 0x8eec2999 , 0xefb0a128 , 0x6044fbb0 , },
6643
6644			{ 0x27892abf , 0x48d72bb1 , 0x3fc33de4 , },
6645
6646			{ 0x77bc2419 , 0xd51917a4 , 0x3796455c , },
6647
6648			{ 0xcea114a5 , 0x68c0a2c5 , 0x7b4aa8b7 , },
6649
6650			{ 0xa1077e85 , 0x188cc628 , 0x0c21f835 , },
6651
6652			{ 0xc5ed75e1 , 0xf196555d , 0x3954de39 , },
6653
6654			{ 0xca4fba3f , 0x0acfa26f , 0x6cb21510 , },
6655
6656			{ 0xcf5bcdc4 , 0x4fae7fc0 , 0xcae68461 , },
6657
6658			{ 0xf36b9d16 , 0x27892abf , 0x632d78c5 , },
6659
6660			{ 0xf76fd988 , 0xed5c39b1 , 0x41e7589c , },
6661
6662			{ 0x6c45d92e , 0xff809fcd , 0x0c46baec , },
6663
6664			{ 0x6116b82b , 0xcea114a5 , 0xa0decef3 , },
6665
6666			{ 0x4d9899bb , 0x9f9d8d9c , 0x53deb236 , },
6667
6668			{ 0x3e7c93b9 , 0x6666b805 , 0xefb0a128 , },
6669
6670			{ 0x388b20ac , 0xc5ed75e1 , 0xe9c09bb0 , },
6671
6672			{ 0x0956d953 , 0x97fbdb14 , 0x48d72bb1 , },
6673
6674			{ 0x55cb4dfe , 0x1b37c832 , 0xc07331b3 , },
6675
6676			{ 0x52222fea , 0xcf5bcdc4 , 0xd51917a4 , },
6677
6678			{ 0x0603989b , 0xb03c8112 , 0x5e04b9a5 , },
6679
6680			{ 0x4470c029 , 0x2339d155 , 0x68c0a2c5 , },
6681
6682			{ 0xb6f35093 , 0xf76fd988 , 0x154a8a62 , },
6683
6684			{ 0xc46805ba , 0x416f9449 , 0x188cc628 , },
6685
6686			{ 0xc3876592 , 0x4b809189 , 0xc35cf6e7 , },
6687
6688			{ 0x5b0c98b9 , 0x6116b82b , 0xf196555d , },
6689
6690			{ 0x30d13e5f , 0x4c5a315a , 0x8c224466 , },
6691
6692			{ 0x54afca53 , 0xbccfa2c1 , 0x0acfa26f , },
6693
6694			{ 0x93102436 , 0x3e7c93b9 , 0x8eec2999 , },
6695
6696			{ 0xbd2655a8 , 0x3e116c9d , 0x4fae7fc0 , },
6697
6698			{ 0x70cd7f26 , 0x408e57f2 , 0x1691be45 , },
6699
6700			{ 0x2d546c53 , 0x0956d953 , 0x27892abf , },
6701
6702			{ 0xb53410a8 , 0x42ebf0ad , 0x161f3c12 , },
6703
6704			{ 0x67a93f75 , 0xcf3233e4 , 0xed5c39b1 , },
6705
6706			{ 0x9830ac33 , 0x52222fea , 0x77bc2419 , },
6707
6708			{ 0xb0b6fc3e , 0x2fde73f8 , 0xff809fcd , },
6709
6710			{ 0x84170f16 , 0xced90d99 , 0x30de0f98 , },
6711
6712			{ 0xd7017a0c , 0x4470c029 , 0xcea114a5 , },
6713
6714			{ 0xadb25de6 , 0x84f40beb , 0x2b7e0e1b , },
6715
6716			{ 0x8282fddc , 0xec855937 , 0x9f9d8d9c , },
6717
6718			{ 0x46362bee , 0xc46805ba , 0xa1077e85 , },
6719
6720			{ 0xb9077a01 , 0xdf7a24ac , 0x6666b805 , },
6721
6722			{ 0xf51d9bc6 , 0x2b52dc39 , 0x7e774cf6 , },
6723
6724			{ 0x4ca19a29 , 0x5b0c98b9 , 0xc5ed75e1 , },
6725
6726			{ 0xdc0fc3fc , 0xb939fcdf , 0x3678fed2 , },
6727
6728			{ 0x63c3d167 , 0x70f9947d , 0x97fbdb14 , },
6729
6730			{ 0x5851d254 , 0x54afca53 , 0xca4fba3f , },
6731
6732			{ 0xfeacf2a1 , 0x7a3c0a6a , 0x1b37c832 , },
6733
6734			{ 0x93b7edc8 , 0x1fea4d2a , 0x58fa96ee , },
6735
6736			{ 0x5539e44a , 0xbd2655a8 , 0xcf5bcdc4 , },
6737
6738			{ 0xde32a3d2 , 0x4ff61aa1 , 0x6a6a3694 , },
6739
6740			{ 0xf0baeeb6 , 0x7ae2f6f4 , 0xb03c8112 , },
6741
6742			{ 0xbe15887f , 0x2d546c53 , 0xf36b9d16 , },
6743
6744			{ 0x64f34a05 , 0xe0ee5efe , 0x2339d155 , },
6745
6746			{ 0x1b6d1aea , 0xfeafb67c , 0x4fb001a8 , },
6747
6748			{ 0x82adb0b8 , 0x67a93f75 , 0xf76fd988 , },
6749
6750			{ 0x694587c7 , 0x3b34408b , 0xeccb2978 , },
6751
6752			{ 0xd2fc57c3 , 0x07fcf8c6 , 0x416f9449 , },
6753
6754			{ 0x9dd6837c , 0xb0b6fc3e , 0x6c45d92e , },
6755
6756			{ 0x3a9d1f97 , 0xefd033b2 , 0x4b809189 , },
6757
6758			{ 0x1eee1d2a , 0xf2a6e46e , 0x55b4c814 , },
6759
6760			{ 0xb57c7728 , 0xd7017a0c , 0x6116b82b , },
6761
6762			{ 0xf2fc5d61 , 0x242aac86 , 0x05245cf0 , },
6763
6764			{ 0x26387824 , 0xc15c4ca5 , 0x4c5a315a , },
6765
6766			{ 0x8c151e77 , 0x8282fddc , 0x4d9899bb , },
6767
6768			{ 0x8ea1f680 , 0xf5ff6cdd , 0xbccfa2c1 , },
6769
6770			{ 0xe8cf3d2a , 0x338b1fb1 , 0xeda61f70 , },
6771
6772			{ 0x21f15b59 , 0xb9077a01 , 0x3e7c93b9 , },
6773
6774			{ 0x6f68d64a , 0x901b0161 , 0xb9fd3537 , },
6775
6776			{ 0x71b74d95 , 0xf5ddd5ad , 0x3e116c9d , },
6777
6778			{ 0x4c2e7261 , 0x4ca19a29 , 0x388b20ac , },
6779
6780			{ 0x8a2d38e8 , 0xd27ee0a1 , 0x408e57f2 , },
6781
6782			{ 0x7e58ca17 , 0x69dfedd2 , 0x3a76805e , },
6783
6784			{ 0xf997967f , 0x63c3d167 , 0x0956d953 , },
6785
6786			{ 0x48215963 , 0x71e1dfe0 , 0x42a6d410 , },
6787
6788			{ 0xa704b94c , 0x679f198a , 0x42ebf0ad , },
6789
6790			{ 0x1d699056 , 0xfeacf2a1 , 0x55cb4dfe , },
6791
6792			{ 0x6800bcc5 , 0x16024f15 , 0xcf3233e4 , },
6793
6794			{ 0x2d48e4ca , 0xbe61582f , 0x46026283 , },
6795
6796			{ 0x4c4c2b55 , 0x5539e44a , 0x52222fea , },
6797
6798			{ 0xd8ce94cb , 0xbc613c26 , 0x33776b4b , },
6799
6800			{ 0xd0b5a02b , 0x490d3cc6 , 0x2fde73f8 , },
6801
6802			{ 0xa223f7ec , 0xf0baeeb6 , 0x0603989b , },
6803
6804			{ 0x58de337a , 0x3bf3d597 , 0xced90d99 , },
6805
6806			{ 0x37f5d8f4 , 0x4d5b699b , 0xd7262e5f , },
6807
6808			{ 0xfa8a435d , 0x64f34a05 , 0x4470c029 , },
6809
6810			{ 0x238709fe , 0x52e7458f , 0x9a174cd3 , },
6811
6812			{ 0x9e1ba6f5 , 0xef0272f7 , 0x84f40beb , },
6813
6814			{ 0xcd8b57fa , 0x82adb0b8 , 0xb6f35093 , },
6815
6816			{ 0x0aed142f , 0xb1650290 , 0xec855937 , },
6817
6818			{ 0xd1f064db , 0x6e7340d3 , 0x5c28cb52 , },
6819
6820			{ 0x464ac895 , 0xd2fc57c3 , 0xc46805ba , },
6821
6822			{ 0xa0e6beea , 0xcfeec3d0 , 0x0225d214 , },
6823
6824			{ 0x78703ce0 , 0xc60f6075 , 0xdf7a24ac , },
6825
6826			{ 0xfea48165 , 0x3a9d1f97 , 0xc3876592 , },
6827
6828			{ 0xdb89b8db , 0xa6172211 , 0x2b52dc39 , },
6829
6830			{ 0x7ca03731 , 0x1db42849 , 0xc5df246e , },
6831
6832			{ 0x8801d0aa , 0xb57c7728 , 0x5b0c98b9 , },
6833
6834			{ 0xf89cd7f0 , 0xcc396a0b , 0xdb799c51 , },
6835
6836			{ 0x1611a808 , 0xaeae6105 , 0xb939fcdf , },
6837
6838			{ 0xe3cdb888 , 0x26387824 , 0x30d13e5f , },
6839
6840			{ 0x552a4cf6 , 0xee2d04bb , 0x70f9947d , },
6841
6842			{ 0x85e248e9 , 0x0a79663f , 0x53339cf7 , },
6843
6844			{ 0x1c61c3e9 , 0x8ea1f680 , 0x54afca53 , },
6845
6846			{ 0xb14cfc2b , 0x2e073302 , 0x10897992 , },
6847
6848			{ 0x6ec444cc , 0x9e819f13 , 0x7a3c0a6a , },
6849
6850			{ 0xe2fa5f80 , 0x21f15b59 , 0x93102436 , },
6851
6852			{ 0x6d33f4c6 , 0x31a27455 , 0x1fea4d2a , },
6853
6854			{ 0xb6dec609 , 0x4d437056 , 0x42eb1e2a , },
6855
6856			{ 0x1846c518 , 0x71b74d95 , 0xbd2655a8 , },
6857
6858			{ 0x9f947f8a , 0x2b501619 , 0xa4924b0e , },
6859
6860			{ 0xb7442f4d , 0xba30a5d8 , 0x4ff61aa1 , },
6861
6862			{ 0xe2c93242 , 0x8a2d38e8 , 0x70cd7f26 , },
6863
6864			{ 0xcd6863df , 0x78fd88dc , 0x7ae2f6f4 , },
6865
6866			{ 0xd512001d , 0xe6612dff , 0x5c4d0ca9 , },
6867
6868			{ 0x4e8d6b6c , 0xf997967f , 0x2d546c53 , },
6869
6870			{ 0xfa653ba1 , 0xc99014d4 , 0xa0c9fd27 , },
6871
6872			{ 0x49893408 , 0x29c2448b , 0xe0ee5efe , },
6873			};
6874
6875
6876			#define CRC32_FIXED_CHUNK_LEN 32768UL
6877			#define CRC32_FIXED_CHUNK_MULT_1 0x29c2448b
6878			#define CRC32_FIXED_CHUNK_MULT_2 0x4b912f53
6879			#define CRC32_FIXED_CHUNK_MULT_3 0x454c93be
6880
6881			/* #include "crc32_tables.h" */
6882
6883
6884			static const u32 crc32_slice1_table[] MAYBE_UNUSED = {
6885			0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
6886			0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
6887			0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
6888			0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
6889			0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
6890			0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
6891			0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
6892			0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
6893			0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
6894			0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
6895			0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
6896			0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
6897			0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
6898			0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
6899			0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
6900			0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
6901			0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
6902			0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
6903			0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
6904			0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
6905			0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
6906			0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
6907			0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
6908			0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
6909			0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
6910			0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
6911			0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
6912			0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
6913			0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
6914			0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
6915			0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
6916			0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
6917			0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
6918			0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
6919			0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
6920			0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
6921			0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
6922			0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
6923			0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
6924			0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
6925			0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
6926			0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
6927			0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
6928			0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
6929			0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
6930			0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
6931			0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
6932			0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
6933			0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
6934			0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
6935			0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
6936			0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
6937			0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
6938			0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
6939			0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
6940			0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
6941			0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
6942			0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
6943			0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
6944			0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
6945			0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
6946			0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
6947			0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
6948			0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
6949			};
6950
6951			static const u32 crc32_slice8_table[] MAYBE_UNUSED = {
6952			0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
6953			0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
6954			0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
6955			0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
6956			0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
6957			0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
6958			0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
6959			0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
6960			0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
6961			0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
6962			0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
6963			0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
6964			0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
6965			0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
6966			0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
6967			0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
6968			0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
6969			0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
6970			0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
6971			0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
6972			0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
6973			0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
6974			0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
6975			0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
6976			0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
6977			0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
6978			0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
6979			0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
6980			0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
6981			0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
6982			0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
6983			0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
6984			0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
6985			0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
6986			0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
6987			0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
6988			0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
6989			0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
6990			0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
6991			0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
6992			0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
6993			0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
6994			0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
6995			0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
6996			0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
6997			0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
6998			0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
6999			0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
7000			0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
7001			0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
7002			0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
7003			0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
7004			0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
7005			0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
7006			0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
7007			0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
7008			0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
7009			0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
7010			0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
7011			0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
7012			0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
7013			0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
7014			0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
7015			0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
7016			0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3,
7017			0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7,
7018			0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb,
7019			0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf,
7020			0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
7021			0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496,
7022			0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a,
7023			0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e,
7024			0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761,
7025			0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
7026			0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69,
7027			0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d,
7028			0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530,
7029			0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034,
7030			0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
7031			0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c,
7032			0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6,
7033			0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2,
7034			0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce,
7035			0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
7036			0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97,
7037			0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93,
7038			0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f,
7039			0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b,
7040			0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
7041			0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60,
7042			0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c,
7043			0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768,
7044			0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35,
7045			0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
7046			0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d,
7047			0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539,
7048			0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88,
7049			0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c,
7050			0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
7051			0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484,
7052			0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9,
7053			0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd,
7054			0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1,
7055			0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
7056			0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a,
7057			0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e,
7058			0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522,
7059			0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026,
7060			0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
7061			0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f,
7062			0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773,
7063			0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277,
7064			0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d,
7065			0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
7066			0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85,
7067			0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81,
7068			0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc,
7069			0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8,
7070			0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
7071			0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0,
7072			0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f,
7073			0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b,
7074			0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27,
7075			0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
7076			0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e,
7077			0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a,
7078			0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876,
7079			0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72,
7080			0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59,
7081			0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685,
7082			0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1,
7083			0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d,
7084			0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
7085			0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5,
7086			0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91,
7087			0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d,
7088			0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9,
7089			0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
7090			0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901,
7091			0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd,
7092			0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9,
7093			0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315,
7094			0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
7095			0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad,
7096			0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399,
7097			0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45,
7098			0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221,
7099			0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
7100			0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9,
7101			0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835,
7102			0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151,
7103			0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d,
7104			0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
7105			0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5,
7106			0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1,
7107			0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d,
7108			0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609,
7109			0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
7110			0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1,
7111			0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d,
7112			0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9,
7113			0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05,
7114			0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
7115			0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd,
7116			0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9,
7117			0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75,
7118			0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711,
7119			0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
7120			0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339,
7121			0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5,
7122			0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281,
7123			0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d,
7124			0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
7125			0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895,
7126			0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1,
7127			0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d,
7128			0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819,
7129			0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
7130			0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1,
7131			0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d,
7132			0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69,
7133			0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5,
7134			0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
7135			0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d,
7136			0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9,
7137			0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625,
7138			0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41,
7139			0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
7140			0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89,
7141			0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555,
7142			0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31,
7143			0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed,
7144			0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee,
7145			0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9,
7146			0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701,
7147			0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056,
7148			0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
7149			0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26,
7150			0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e,
7151			0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9,
7152			0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0,
7153			0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
7154			0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f,
7155			0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68,
7156			0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f,
7157			0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018,
7158			0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
7159			0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7,
7160			0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3,
7161			0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084,
7162			0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c,
7163			0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
7164			0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c,
7165			0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b,
7166			0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3,
7167			0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4,
7168			0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
7169			0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba,
7170			0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002,
7171			0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755,
7172			0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72,
7173			0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
7174			0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d,
7175			0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca,
7176			0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5,
7177			0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82,
7178			0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
7179			0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d,
7180			0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a,
7181			0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d,
7182			0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5,
7183			0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
7184			0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb,
7185			0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc,
7186			0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04,
7187			0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953,
7188			0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
7189			0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623,
7190			0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b,
7191			0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc,
7192			0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8,
7193			0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
7194			0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907,
7195			0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50,
7196			0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677,
7197			0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120,
7198			0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
7199			0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf,
7200			0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6,
7201			0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981,
7202			0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639,
7203			0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
7204			0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949,
7205			0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e,
7206			0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6,
7207			0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1,
7208			0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0,
7209			0xf580a6c0, 0xc8e08f70, 0x8f40f5a0, 0xb220dc10,
7210			0x30704bc1, 0x0d106271, 0x4ab018a1, 0x77d03111,
7211			0xc5f0ed01, 0xf890c4b1, 0xbf30be61, 0x825097d1,
7212			0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52,
7213			0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92,
7214			0x5090dc43, 0x6df0f5f3, 0x2a508f23, 0x1730a693,
7215			0xa5107a83, 0x98705333, 0xdfd029e3, 0xe2b00053,
7216			0xc1c12f04, 0xfca106b4, 0xbb017c64, 0x866155d4,
7217			0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314,
7218			0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15,
7219			0x0431c205, 0x3951ebb5, 0x7ef19165, 0x4391b8d5,
7220			0xa121b886, 0x9c419136, 0xdbe1ebe6, 0xe681c256,
7221			0x54a11e46, 0x69c137f6, 0x2e614d26, 0x13016496,
7222			0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997,
7223			0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57,
7224			0x58f35849, 0x659371f9, 0x22330b29, 0x1f532299,
7225			0xad73fe89, 0x9013d739, 0xd7b3ade9, 0xead38459,
7226			0x68831388, 0x55e33a38, 0x124340e8, 0x2f236958,
7227			0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98,
7228			0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b,
7229			0xcd93690b, 0xf0f340bb, 0xb7533a6b, 0x8a3313db,
7230			0x0863840a, 0x3503adba, 0x72a3d76a, 0x4fc3feda,
7231			0xfde322ca, 0xc0830b7a, 0x872371aa, 0xba43581a,
7232			0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d,
7233			0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d,
7234			0xa9423c8c, 0x9422153c, 0xd3826fec, 0xeee2465c,
7235			0x5cc29a4c, 0x61a2b3fc, 0x2602c92c, 0x1b62e09c,
7236			0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, 0xbe729a1f,
7237			0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf,
7238			0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de,
7239			0x3c220dce, 0x0142247e, 0x46e25eae, 0x7b82771e,
7240			0xb1e6b092, 0x8c869922, 0xcb26e3f2, 0xf646ca42,
7241			0x44661652, 0x79063fe2, 0x3ea64532, 0x03c66c82,
7242			0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183,
7243			0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743,
7244			0xd1062710, 0xec660ea0, 0xabc67470, 0x96a65dc0,
7245			0x248681d0, 0x19e6a860, 0x5e46d2b0, 0x6326fb00,
7246			0xe1766cd1, 0xdc164561, 0x9bb63fb1, 0xa6d61601,
7247			0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1,
7248			0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546,
7249			0x85a73956, 0xb8c710e6, 0xff676a36, 0xc2074386,
7250			0x4057d457, 0x7d37fde7, 0x3a978737, 0x07f7ae87,
7251			0xb5d77297, 0x88b75b27, 0xcf1721f7, 0xf2770847,
7252			0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4,
7253			0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404,
7254			0x20b743d5, 0x1dd76a65, 0x5a7710b5, 0x67173905,
7255			0xd537e515, 0xe857cca5, 0xaff7b675, 0x92979fc5,
7256			0xe915e8db, 0xd475c16b, 0x93d5bbbb, 0xaeb5920b,
7257			0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb,
7258			0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca,
7259			0x2ce505da, 0x11852c6a, 0x562556ba, 0x6b457f0a,
7260			0x89f57f59, 0xb49556e9, 0xf3352c39, 0xce550589,
7261			0x7c75d999, 0x4115f029, 0x06b58af9, 0x3bd5a349,
7262			0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48,
7263			0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888,
7264			0x28d4c7df, 0x15b4ee6f, 0x521494bf, 0x6f74bd0f,
7265			0xdd54611f, 0xe03448af, 0xa794327f, 0x9af41bcf,
7266			0x18a48c1e, 0x25c4a5ae, 0x6264df7e, 0x5f04f6ce,
7267			0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e,
7268			0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d,
7269			0xbdb4f69d, 0x80d4df2d, 0xc774a5fd, 0xfa148c4d,
7270			0x78441b9c, 0x4524322c, 0x028448fc, 0x3fe4614c,
7271			0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, 0xca64c78c,
7272			0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae,
7273			0x9b914216, 0x50cd91b3, 0xd659e31d, 0x1d0530b8,
7274			0xec53826d, 0x270f51c8, 0xa19b2366, 0x6ac7f0c3,
7275			0x77c2c07b, 0xbc9e13de, 0x3a0a6170, 0xf156b2d5,
7276			0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035,
7277			0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223,
7278			0xef8580f6, 0x24d95353, 0xa24d21fd, 0x6911f258,
7279			0x7414c2e0, 0xbf481145, 0x39dc63eb, 0xf280b04e,
7280			0x07ac0536, 0xccf0d693, 0x4a64a43d, 0x81387798,
7281			0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e,
7282			0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5,
7283			0x706ec54d, 0xbb3216e8, 0x3da66446, 0xf6fab7e3,
7284			0x047a07ad, 0xcf26d408, 0x49b2a6a6, 0x82ee7503,
7285			0x9feb45bb, 0x54b7961e, 0xd223e4b0, 0x197f3715,
7286			0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e,
7287			0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578,
7288			0x0f580a6c, 0xc404d9c9, 0x4290ab67, 0x89cc78c2,
7289			0x94c9487a, 0x5f959bdf, 0xd901e971, 0x125d3ad4,
7290			0xe30b8801, 0x28575ba4, 0xaec3290a, 0x659ffaaf,
7291			0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9,
7292			0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59,
7293			0x971f4ae1, 0x5c439944, 0xdad7ebea, 0x118b384f,
7294			0xe0dd8a9a, 0x2b81593f, 0xad152b91, 0x6649f834,
7295			0x7b4cc88c, 0xb0101b29, 0x36846987, 0xfdd8ba22,
7296			0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4,
7297			0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2,
7298			0xe4a78d37, 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99,
7299			0x7f36cf21, 0xb46a1c84, 0x32fe6e2a, 0xf9a2bd8f,
7300			0x0b220dc1, 0xc07ede64, 0x46eaacca, 0x8db67f6f,
7301			0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79,
7302			0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02,
7303			0x7ce0cdba, 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14,
7304			0x1eb014d8, 0xd5ecc77d, 0x5378b5d3, 0x98246676,
7305			0x852156ce, 0x4e7d856b, 0xc8e9f7c5, 0x03b52460,
7306			0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b,
7307			0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d,
7308			0x1d661643, 0xd63ac5e6, 0x50aeb748, 0x9bf264ed,
7309			0x86f75455, 0x4dab87f0, 0xcb3ff55e, 0x006326fb,
7310			0xf135942e, 0x3a69478b, 0xbcfd3525, 0x77a1e680,
7311			0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496,
7312			0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340,
7313			0x828d53f8, 0x49d1805d, 0xcf45f2f3, 0x04192156,
7314			0xf54f9383, 0x3e134026, 0xb8873288, 0x73dbe12d,
7315			0x6eded195, 0xa5820230, 0x2316709e, 0xe84aa33b,
7316			0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db,
7317			0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd,
7318			0xf6999118, 0x3dc542bd, 0xbb513013, 0x700de3b6,
7319			0x6d08d30e, 0xa65400ab, 0x20c07205, 0xeb9ca1a0,
7320			0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, 0x977c6c1a,
7321			0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c,
7322			0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77,
7323			0x662adecf, 0xad760d6a, 0x2be27fc4, 0xe0beac61,
7324			0x123e1c2f, 0xd962cf8a, 0x5ff6bd24, 0x94aa6e81,
7325			0x89af5e39, 0x42f38d9c, 0xc467ff32, 0x0f3b2c97,
7326			0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec,
7327			0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa,
7328			0x16441b82, 0xdd18c827, 0x5b8cba89, 0x90d0692c,
7329			0x8dd55994, 0x46898a31, 0xc01df89f, 0x0b412b3a,
7330			0xfa1799ef, 0x314b4a4a, 0xb7df38e4, 0x7c83eb41,
7331			0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957,
7332			0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7,
7333			0x8e035b0f, 0x455f88aa, 0xc3cbfa04, 0x089729a1,
7334			0xf9c19b74, 0x329d48d1, 0xb4093a7f, 0x7f55e9da,
7335			0x6250d962, 0xa90c0ac7, 0x2f987869, 0xe4c4abcc,
7336			0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d,
7337			0xf44f2413, 0x52382fa7, 0x63d0353a, 0xc5a73e8e,
7338			0x33ef4e67, 0x959845d3, 0xa4705f4e, 0x020754fa,
7339			0xc7a06a74, 0x61d761c0, 0x503f7b5d, 0xf64870e9,
7340			0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653,
7341			0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240,
7342			0x5431d2a9, 0xf246d91d, 0xc3aec380, 0x65d9c834,
7343			0xa07ef6ba, 0x0609fd0e, 0x37e1e793, 0x9196ec27,
7344			0xcfbd399c, 0x69ca3228, 0x582228b5, 0xfe552301,
7345			0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712,
7346			0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66,
7347			0x081d53e8, 0xae6a585c, 0x9f8242c1, 0x39f54975,
7348			0xa863a552, 0x0e14aee6, 0x3ffcb47b, 0x998bbfcf,
7349			0x5c2c8141, 0xfa5b8af5, 0xcbb39068, 0x6dc49bdc,
7350			0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8,
7351			0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb,
7352			0x440b7579, 0xe27c7ecd, 0xd3946450, 0x75e36fe4,
7353			0xb044516a, 0x16335ade, 0x27db4043, 0x81ac4bf7,
7354			0x77e43b1e, 0xd19330aa, 0xe07b2a37, 0x460c2183,
7355			0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590,
7356			0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a,
7357			0xd79acda4, 0x71edc610, 0x4005dc8d, 0xe672d739,
7358			0x103aa7d0, 0xb64dac64, 0x87a5b6f9, 0x21d2bd4d,
7359			0xe47583c3, 0x42028877, 0x73ea92ea, 0xd59d995e,
7360			0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678,
7361			0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b,
7362			0xb8590282, 0x1e2e0936, 0x2fc613ab, 0x89b1181f,
7363			0x4c162691, 0xea612d25, 0xdb8937b8, 0x7dfe3c0c,
7364			0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, 0xdd80cab6,
7365			0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5,
7366			0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1,
7367			0x2bc8ba5f, 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2,
7368			0x8816eaf2, 0x2e61e146, 0x1f89fbdb, 0xb9fef06f,
7369			0x7c59cee1, 0xda2ec555, 0xebc6dfc8, 0x4db1d47c,
7370			0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08,
7371			0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b,
7372			0xefc8763c, 0x49bf7d88, 0x78576715, 0xde206ca1,
7373			0x1b87522f, 0xbdf0599b, 0x8c184306, 0x2a6f48b2,
7374			0xdc27385b, 0x7a5033ef, 0x4bb82972, 0xedcf22c6,
7375			0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5,
7376			0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3,
7377			0xb3e4f77d, 0x1593fcc9, 0x247be654, 0x820cede0,
7378			0x74449d09, 0xd23396bd, 0xe3db8c20, 0x45ac8794,
7379			0x800bb91a, 0x267cb2ae, 0x1794a833, 0xb1e3a387,
7380			0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d,
7381			0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e,
7382			0x139a01c7, 0xb5ed0a73, 0x840510ee, 0x22721b5a,
7383			0xe7d525d4, 0x41a22e60, 0x704a34fd, 0xd63d3f49,
7384			0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, 0xfdf58516,
7385			0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105,
7386			0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71,
7387			0x0bbdf5ff, 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62,
7388			0xabc30345, 0x0db408f1, 0x3c5c126c, 0x9a2b19d8,
7389			0x5f8c2756, 0xf9fb2ce2, 0xc813367f, 0x6e643dcb,
7390			0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf,
7391			0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac,
7392			0x03a0a617, 0xa5d7ada3, 0x943fb73e, 0x3248bc8a,
7393			0xf7ef8204, 0x519889b0, 0x6070932d, 0xc6079899,
7394			0x304fe870, 0x9638e3c4, 0xa7d0f959, 0x01a7f2ed,
7395			0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe,
7396			0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044,
7397			0x90311eca, 0x3646157e, 0x07ae0fe3, 0xa1d90457,
7398			0x579174be, 0xf1e67f0a, 0xc00e6597, 0x66796e23,
7399			0xa3de50ad, 0x05a95b19, 0x34414184, 0x92364a30,
7400			0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3,
7401			0x844a0efa, 0x48e00e64, 0xc66f0987, 0x0ac50919,
7402			0xd3e51bb5, 0x1f4f1b2b, 0x91c01cc8, 0x5d6a1c56,
7403			0x57af154f, 0x9b0515d1, 0x158a1232, 0xd92012ac,
7404			0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8,
7405			0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832,
7406			0xaf5e2a9e, 0x63f42a00, 0xed7b2de3, 0x21d12d7d,
7407			0x2b142464, 0xe7be24fa, 0x69312319, 0xa59b2387,
7408			0xf9766256, 0x35dc62c8, 0xbb53652b, 0x77f965b5,
7409			0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f,
7410			0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00,
7411			0xaed97719, 0x62737787, 0xecfc7064, 0x205670fa,
7412			0x85cd537d, 0x496753e3, 0xc7e85400, 0x0b42549e,
7413			0x01875d87, 0xcd2d5d19, 0x43a25afa, 0x8f085a64,
7414			0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b,
7415			0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1,
7416			0x299dc2ed, 0xe537c273, 0x6bb8c590, 0xa712c50e,
7417			0xadd7cc17, 0x617dcc89, 0xeff2cb6a, 0x2358cbf4,
7418			0xfa78d958, 0x36d2d9c6, 0xb85dde25, 0x74f7debb,
7419			0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041,
7420			0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425,
7421			0xd16cfd3c, 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf,
7422			0x86c3e873, 0x4a69e8ed, 0xc4e6ef0e, 0x084cef90,
7423			0x0289e689, 0xce23e617, 0x40ace1f4, 0x8c06e16a,
7424			0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758,
7425			0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2,
7426			0x030ebb0e, 0xcfa4bb90, 0x412bbc73, 0x8d81bced,
7427			0x8744b5f4, 0x4beeb56a, 0xc561b289, 0x09cbb217,
7428			0xac509190, 0x60fa910e, 0xee7596ed, 0x22df9673,
7429			0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889,
7430			0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6,
7431			0xfbff84df, 0x37558441, 0xb9da83a2, 0x7570833c,
7432			0x533b85da, 0x9f918544, 0x111e82a7, 0xddb48239,
7433			0xd7718b20, 0x1bdb8bbe, 0x95548c5d, 0x59fe8cc3,
7434			0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c,
7435			0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776,
7436			0x2f80b4f1, 0xe32ab46f, 0x6da5b38c, 0xa10fb312,
7437			0xabcaba0b, 0x6760ba95, 0xe9efbd76, 0x2545bde8,
7438			0xfc65af44, 0x30cfafda, 0xbe40a839, 0x72eaa8a7,
7439			0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d,
7440			0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f,
7441			0x2e07e976, 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95,
7442			0x79a8fc39, 0xb502fca7, 0x3b8dfb44, 0xf727fbda,
7443			0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, 0x736df520,
7444			0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144,
7445			0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe,
7446			0x0513cd12, 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1,
7447			0x8159c3e8, 0x4df3c376, 0xc37cc495, 0x0fd6c40b,
7448			0x7aa64737, 0xb60c47a9, 0x3883404a, 0xf42940d4,
7449			0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e,
7450			0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61,
7451			0x2d095278, 0xe1a352e6, 0x6f2c5505, 0xa386559b,
7452			0x061d761c, 0xcab77682, 0x44387161, 0x889271ff,
7453			0x825778e6, 0x4efd7878, 0xc0727f9b, 0x0cd87f05,
7454			0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a,
7455			0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0,
7456			0x83d02561, 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282,
7457			0x079a2b9b, 0xcb302b05, 0x45bf2ce6, 0x89152c78,
7458			0x50353ed4, 0x9c9f3e4a, 0x121039a9, 0xdeba3937,
7459			0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd,
7460			0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9,
7461			0x7b211ab0, 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53,
7462			0x2c8e0fff, 0xe0240f61, 0x6eab0882, 0xa201081c,
7463			0xa8c40105, 0x646e019b, 0xeae10678, 0x264b06e6,
7464			};
7465
7466
7467
7468			static u32 MAYBE_UNUSED
7469	0		crc32_slice8(u32 crc, const u8 *p, size_t len)
7470			{
7471	0		const u8 * const end = p + len;
7472			const u8 *end64;
7473
7474	0	0	for (; ((uintptr_t)p & 7) && p != end; p++)
		0
7475	0		crc = (crc >> 8) ^ crc32_slice8_table[(u8)crc ^ *p];
7476
7477	0		end64 = p + ((end - p) & ~7);
7478	0	0	for (; p != end64; p += 8) {
7479	0		u32 v1 = le32_bswap((const u32 )(p + 0));
7480	0		u32 v2 = le32_bswap((const u32 )(p + 4));
7481
7482	0		crc = crc32_slice8_table[0x700 + (u8)((crc ^ v1) >> 0)] ^
7483	0		crc32_slice8_table[0x600 + (u8)((crc ^ v1) >> 8)] ^
7484	0		crc32_slice8_table[0x500 + (u8)((crc ^ v1) >> 16)] ^
7485	0		crc32_slice8_table[0x400 + (u8)((crc ^ v1) >> 24)] ^
7486	0		crc32_slice8_table[0x300 + (u8)(v2 >> 0)] ^
7487	0		crc32_slice8_table[0x200 + (u8)(v2 >> 8)] ^
7488	0		crc32_slice8_table[0x100 + (u8)(v2 >> 16)] ^
7489	0		crc32_slice8_table[0x000 + (u8)(v2 >> 24)];
7490			}
7491
7492	0	0	for (; p != end; p++)
7493	0		crc = (crc >> 8) ^ crc32_slice8_table[(u8)crc ^ *p];
7494
7495	0		return crc;
7496			}
7497
7498
7499			static forceinline u32 MAYBE_UNUSED
7500			crc32_slice1(u32 crc, const u8 *p, size_t len)
7501			{
7502			size_t i;
7503
7504	0	0	for (i = 0; i < len; i++)
		0
		0
		0
		0
7505	0		crc = (crc >> 8) ^ crc32_slice1_table[(u8)crc ^ p[i]];
7506	0		return crc;
7507			}
7508
7509
7510			#undef DEFAULT_IMPL
7511			#undef arch_select_crc32_func
7512			typedef u32 (crc32_func_t)(u32 crc, const u8 p, size_t len);
7513			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
7514			/* # include "arm/crc32_impl.h" */
7515
7516
7517			#ifndef LIB_ARM_CRC32_IMPL_H
7518			#define LIB_ARM_CRC32_IMPL_H
7519
7520			/* #include "arm-cpu_features.h" */
7521
7522
7523			#ifndef LIB_ARM_CPU_FEATURES_H
7524			#define LIB_ARM_CPU_FEATURES_H
7525
7526			/* #include "lib_common.h" */
7527
7528
7529			#ifndef LIB_LIB_COMMON_H
7530			#define LIB_LIB_COMMON_H
7531
7532			#ifdef LIBDEFLATE_H
7533
7534			# error "lib_common.h must always be included before libdeflate.h"
7535			#endif
7536
7537			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
7538			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
7539			#elif defined(__GNUC__)
7540			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
7541			#else
7542			# define LIBDEFLATE_EXPORT_SYM
7543			#endif
7544
7545
7546			#if defined(__GNUC__) && defined(__i386__)
7547			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
7548			#else
7549			# define LIBDEFLATE_ALIGN_STACK
7550			#endif
7551
7552			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
7553
7554			/* #include "../common_defs.h" */
7555
7556
7557			#ifndef COMMON_DEFS_H
7558			#define COMMON_DEFS_H
7559
7560			/* #include "libdeflate.h" */
7561
7562
7563			#ifndef LIBDEFLATE_H
7564			#define LIBDEFLATE_H
7565
7566			#include
7567			#include
7568
7569			#ifdef __cplusplus
7570			extern "C" {
7571			#endif
7572
7573			#define LIBDEFLATE_VERSION_MAJOR 1
7574			#define LIBDEFLATE_VERSION_MINOR 25
7575			#define LIBDEFLATE_VERSION_STRING "1.25"
7576
7577
7578			#ifndef LIBDEFLATEAPI
7579			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
7580			# define LIBDEFLATEAPI __declspec(dllimport)
7581			# else
7582			# define LIBDEFLATEAPI
7583			# endif
7584			#endif
7585
7586
7587
7588
7589
7590			struct libdeflate_compressor;
7591			struct libdeflate_options;
7592
7593
7594			LIBDEFLATEAPI struct libdeflate_compressor *
7595			libdeflate_alloc_compressor(int compression_level);
7596
7597
7598			LIBDEFLATEAPI struct libdeflate_compressor *
7599			libdeflate_alloc_compressor_ex(int compression_level,
7600			const struct libdeflate_options *options);
7601
7602
7603			LIBDEFLATEAPI size_t
7604			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
7605			const void *in, size_t in_nbytes,
7606			void *out, size_t out_nbytes_avail);
7607
7608
7609			LIBDEFLATEAPI size_t
7610			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
7611			size_t in_nbytes);
7612
7613
7614			LIBDEFLATEAPI size_t
7615			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
7616			const void *in, size_t in_nbytes,
7617			void *out, size_t out_nbytes_avail);
7618
7619
7620			LIBDEFLATEAPI size_t
7621			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
7622			size_t in_nbytes);
7623
7624
7625			LIBDEFLATEAPI size_t
7626			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
7627			const void *in, size_t in_nbytes,
7628			void *out, size_t out_nbytes_avail);
7629
7630
7631			LIBDEFLATEAPI size_t
7632			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
7633			size_t in_nbytes);
7634
7635
7636			LIBDEFLATEAPI void
7637			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
7638
7639
7640
7641
7642
7643			struct libdeflate_decompressor;
7644			struct libdeflate_options;
7645
7646
7647			LIBDEFLATEAPI struct libdeflate_decompressor *
7648			libdeflate_alloc_decompressor(void);
7649
7650
7651			LIBDEFLATEAPI struct libdeflate_decompressor *
7652			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
7653
7654
7655			enum libdeflate_result {
7656
7657			LIBDEFLATE_SUCCESS = 0,
7658
7659
7660			LIBDEFLATE_BAD_DATA = 1,
7661
7662
7663			LIBDEFLATE_SHORT_OUTPUT = 2,
7664
7665
7666			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
7667			};
7668
7669
7670			LIBDEFLATEAPI enum libdeflate_result
7671			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
7672			const void *in, size_t in_nbytes,
7673			void *out, size_t out_nbytes_avail,
7674			size_t *actual_out_nbytes_ret);
7675
7676
7677			LIBDEFLATEAPI enum libdeflate_result
7678			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
7679			const void *in, size_t in_nbytes,
7680			void *out, size_t out_nbytes_avail,
7681			size_t *actual_in_nbytes_ret,
7682			size_t *actual_out_nbytes_ret);
7683
7684
7685			LIBDEFLATEAPI enum libdeflate_result
7686			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
7687			const void *in, size_t in_nbytes,
7688			void *out, size_t out_nbytes_avail,
7689			size_t *actual_out_nbytes_ret);
7690
7691
7692			LIBDEFLATEAPI enum libdeflate_result
7693			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
7694			const void *in, size_t in_nbytes,
7695			void *out, size_t out_nbytes_avail,
7696			size_t *actual_in_nbytes_ret,
7697			size_t *actual_out_nbytes_ret);
7698
7699
7700			LIBDEFLATEAPI enum libdeflate_result
7701			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
7702			const void *in, size_t in_nbytes,
7703			void *out, size_t out_nbytes_avail,
7704			size_t *actual_out_nbytes_ret);
7705
7706
7707			LIBDEFLATEAPI enum libdeflate_result
7708			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
7709			const void *in, size_t in_nbytes,
7710			void *out, size_t out_nbytes_avail,
7711			size_t *actual_in_nbytes_ret,
7712			size_t *actual_out_nbytes_ret);
7713
7714
7715			LIBDEFLATEAPI void
7716			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
7717
7718
7719
7720
7721
7722
7723			LIBDEFLATEAPI uint32_t
7724			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
7725
7726
7727
7728			LIBDEFLATEAPI uint32_t
7729			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
7730
7731
7732
7733
7734
7735
7736			LIBDEFLATEAPI void
7737			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
7738			void (free_func)(void ));
7739
7740
7741			struct libdeflate_options {
7742
7743
7744			size_t sizeof_options;
7745
7746
7747			void (malloc_func)(size_t);
7748			void (free_func)(void );
7749			};
7750
7751			#ifdef __cplusplus
7752			}
7753			#endif
7754
7755			#endif
7756
7757
7758			#include
7759			#include
7760			#include
7761			#ifdef _MSC_VER
7762			# include
7763			# include
7764
7765
7766			# pragma warning(disable : 4146)
7767
7768			# pragma warning(disable : 4018)
7769			# pragma warning(disable : 4244)
7770			# pragma warning(disable : 4267)
7771			# pragma warning(disable : 4310)
7772
7773			# pragma warning(disable : 4100)
7774			# pragma warning(disable : 4127)
7775			# pragma warning(disable : 4189)
7776			# pragma warning(disable : 4232)
7777			# pragma warning(disable : 4245)
7778			# pragma warning(disable : 4295)
7779			#endif
7780			#ifndef FREESTANDING
7781			# include
7782			#endif
7783
7784
7785
7786
7787
7788
7789			#undef ARCH_X86_64
7790			#undef ARCH_X86_32
7791			#undef ARCH_ARM64
7792			#undef ARCH_ARM32
7793			#undef ARCH_RISCV
7794			#ifdef _MSC_VER
7795
7796			# if defined(_M_X64) && !defined(_M_ARM64EC)
7797			# define ARCH_X86_64
7798			# elif defined(_M_IX86)
7799			# define ARCH_X86_32
7800			# elif defined(_M_ARM64)
7801			# define ARCH_ARM64
7802			# elif defined(_M_ARM)
7803			# define ARCH_ARM32
7804			# endif
7805			#else
7806			# if defined(__x86_64__)
7807			# define ARCH_X86_64
7808			# elif defined(__i386__)
7809			# define ARCH_X86_32
7810			# elif defined(__aarch64__)
7811			# define ARCH_ARM64
7812			# elif defined(__arm__)
7813			# define ARCH_ARM32
7814			# elif defined(__riscv)
7815			# define ARCH_RISCV
7816			# endif
7817			#endif
7818
7819
7820
7821
7822
7823
7824			typedef uint8_t u8;
7825			typedef uint16_t u16;
7826			typedef uint32_t u32;
7827			typedef uint64_t u64;
7828			typedef int8_t s8;
7829			typedef int16_t s16;
7830			typedef int32_t s32;
7831			typedef int64_t s64;
7832
7833
7834			#ifdef _MSC_VER
7835			# ifdef _WIN64
7836			typedef long long ssize_t;
7837			# else
7838			typedef long ssize_t;
7839			# endif
7840			#endif
7841
7842
7843			typedef size_t machine_word_t;
7844
7845
7846			#define WORDBYTES ((int)sizeof(machine_word_t))
7847
7848
7849			#define WORDBITS (8 * WORDBYTES)
7850
7851
7852
7853
7854
7855
7856			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
7857			# define GCC_PREREQ(major, minor) \
7858			(__GNUC__ > (major) \|\| \
7859			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
7860			# if !GCC_PREREQ(4, 9)
7861			# error "gcc versions older than 4.9 are no longer supported"
7862			# endif
7863			#else
7864			# define GCC_PREREQ(major, minor) 0
7865			#endif
7866			#ifdef __clang__
7867			# ifdef __apple_build_version__
7868			# define CLANG_PREREQ(major, minor, apple_version) \
7869			(__apple_build_version__ >= (apple_version))
7870			# else
7871			# define CLANG_PREREQ(major, minor, apple_version) \
7872			(__clang_major__ > (major) \|\| \
7873			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
7874			# endif
7875			# if !CLANG_PREREQ(3, 9, 8000000)
7876			# error "clang versions older than 3.9 are no longer supported"
7877			# endif
7878			#else
7879			# define CLANG_PREREQ(major, minor, apple_version) 0
7880			#endif
7881			#ifdef _MSC_VER
7882			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
7883			# if !MSVC_PREREQ(1900)
7884			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
7885			# endif
7886			#else
7887			# define MSVC_PREREQ(version) 0
7888			#endif
7889
7890
7891			#ifndef __has_attribute
7892			# define __has_attribute(attribute) 0
7893			#endif
7894
7895
7896			#ifndef __has_builtin
7897			# define __has_builtin(builtin) 0
7898			#endif
7899
7900
7901			#ifdef _MSC_VER
7902			# define inline __inline
7903			#endif
7904
7905
7906			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
7907			# define forceinline inline __attribute__((always_inline))
7908			#elif defined(_MSC_VER)
7909			# define forceinline __forceinline
7910			#else
7911			# define forceinline inline
7912			#endif
7913
7914
7915			#if defined(__GNUC__) \|\| __has_attribute(unused)
7916			# define MAYBE_UNUSED __attribute__((unused))
7917			#else
7918			# define MAYBE_UNUSED
7919			#endif
7920
7921
7922			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
7923			# define NORETURN __attribute__((noreturn))
7924			#else
7925			# define NORETURN
7926			#endif
7927
7928
7929			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
7930			# if defined(__GNUC__) \|\| defined(__clang__)
7931			# define restrict __restrict__
7932			# else
7933			# define restrict
7934			# endif
7935			#endif
7936
7937
7938			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
7939			# define likely(expr) __builtin_expect(!!(expr), 1)
7940			#else
7941			# define likely(expr) (expr)
7942			#endif
7943
7944
7945			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
7946			# define unlikely(expr) __builtin_expect(!!(expr), 0)
7947			#else
7948			# define unlikely(expr) (expr)
7949			#endif
7950
7951
7952			#undef prefetchr
7953			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
7954			# define prefetchr(addr) __builtin_prefetch((addr), 0)
7955			#elif defined(_MSC_VER)
7956			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
7957			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
7958			# elif defined(ARCH_ARM64)
7959			# define prefetchr(addr) __prefetch2((addr), 0x00 )
7960			# elif defined(ARCH_ARM32)
7961			# define prefetchr(addr) __prefetch(addr)
7962			# endif
7963			#endif
7964			#ifndef prefetchr
7965			# define prefetchr(addr)
7966			#endif
7967
7968
7969			#undef prefetchw
7970			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
7971			# define prefetchw(addr) __builtin_prefetch((addr), 1)
7972			#elif defined(_MSC_VER)
7973			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
7974			# define prefetchw(addr) _m_prefetchw(addr)
7975			# elif defined(ARCH_ARM64)
7976			# define prefetchw(addr) __prefetch2((addr), 0x10 )
7977			# elif defined(ARCH_ARM32)
7978			# define prefetchw(addr) __prefetchw(addr)
7979			# endif
7980			#endif
7981			#ifndef prefetchw
7982			# define prefetchw(addr)
7983			#endif
7984
7985
7986			#undef _aligned_attribute
7987			#if defined(__GNUC__) \|\| __has_attribute(aligned)
7988			# define _aligned_attribute(n) __attribute__((aligned(n)))
7989			#elif defined(_MSC_VER)
7990			# define _aligned_attribute(n) __declspec(align(n))
7991			#endif
7992
7993
7994			#if defined(__GNUC__) \|\| __has_attribute(target)
7995			# define _target_attribute(attrs) __attribute__((target(attrs)))
7996			#else
7997			# define _target_attribute(attrs)
7998			#endif
7999
8000
8001
8002
8003
8004			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
8005			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
8006			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
8007			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
8008			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
8009			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
8010			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
8011
8012
8013
8014
8015
8016
8017			#if defined(__BYTE_ORDER__)
8018			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
8019			#elif defined(_MSC_VER)
8020			# define CPU_IS_LITTLE_ENDIAN() true
8021			#else
8022			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
8023			{
8024			union {
8025			u32 w;
8026			u8 b;
8027			} u;
8028
8029			u.w = 1;
8030			return u.b;
8031			}
8032			#endif
8033
8034
8035			static forceinline u16 bswap16(u16 v)
8036			{
8037			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
8038			return __builtin_bswap16(v);
8039			#elif defined(_MSC_VER)
8040			return _byteswap_ushort(v);
8041			#else
8042			return (v << 8) \| (v >> 8);
8043			#endif
8044			}
8045
8046
8047			static forceinline u32 bswap32(u32 v)
8048			{
8049			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
8050			return __builtin_bswap32(v);
8051			#elif defined(_MSC_VER)
8052			return _byteswap_ulong(v);
8053			#else
8054			return ((v & 0x000000FF) << 24) \|
8055			((v & 0x0000FF00) << 8) \|
8056			((v & 0x00FF0000) >> 8) \|
8057			((v & 0xFF000000) >> 24);
8058			#endif
8059			}
8060
8061
8062			static forceinline u64 bswap64(u64 v)
8063			{
8064			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
8065			return __builtin_bswap64(v);
8066			#elif defined(_MSC_VER)
8067			return _byteswap_uint64(v);
8068			#else
8069			return ((v & 0x00000000000000FF) << 56) \|
8070			((v & 0x000000000000FF00) << 40) \|
8071			((v & 0x0000000000FF0000) << 24) \|
8072			((v & 0x00000000FF000000) << 8) \|
8073			((v & 0x000000FF00000000) >> 8) \|
8074			((v & 0x0000FF0000000000) >> 24) \|
8075			((v & 0x00FF000000000000) >> 40) \|
8076			((v & 0xFF00000000000000) >> 56);
8077			#endif
8078			}
8079
8080			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
8081			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
8082			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
8083			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
8084			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
8085			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
8086
8087
8088
8089
8090
8091
8092			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
8093			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
8094			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
8095			defined(__riscv_misaligned_fast) \|\| \
8096			defined(__wasm__))
8097			# define UNALIGNED_ACCESS_IS_FAST 1
8098			#elif defined(_MSC_VER)
8099			# define UNALIGNED_ACCESS_IS_FAST 1
8100			#else
8101			# define UNALIGNED_ACCESS_IS_FAST 0
8102			#endif
8103
8104
8105
8106			#ifdef FREESTANDING
8107			# define MEMCOPY __builtin_memcpy
8108			#else
8109			# define MEMCOPY memcpy
8110			#endif
8111
8112
8113
8114			#define DEFINE_UNALIGNED_TYPE(type) \
8115			static forceinline type \
8116			load_##type##_unaligned(const void *p) \
8117			{ \
8118			type v; \
8119			\
8120			MEMCOPY(&v, p, sizeof(v)); \
8121			return v; \
8122			} \
8123			\
8124			static forceinline void \
8125			store_##type##_unaligned(type v, void *p) \
8126			{ \
8127			MEMCOPY(p, &v, sizeof(v)); \
8128			}
8129
8130			DEFINE_UNALIGNED_TYPE(u16)
8131			DEFINE_UNALIGNED_TYPE(u32)
8132			DEFINE_UNALIGNED_TYPE(u64)
8133			DEFINE_UNALIGNED_TYPE(machine_word_t)
8134
8135			#undef MEMCOPY
8136
8137			#define load_word_unaligned load_machine_word_t_unaligned
8138			#define store_word_unaligned store_machine_word_t_unaligned
8139
8140
8141
8142			static forceinline u16
8143			get_unaligned_le16(const u8 *p)
8144			{
8145			if (UNALIGNED_ACCESS_IS_FAST)
8146			return le16_bswap(load_u16_unaligned(p));
8147			else
8148			return ((u16)p[1] << 8) \| p[0];
8149			}
8150
8151			static forceinline u16
8152			get_unaligned_be16(const u8 *p)
8153			{
8154			if (UNALIGNED_ACCESS_IS_FAST)
8155			return be16_bswap(load_u16_unaligned(p));
8156			else
8157			return ((u16)p[0] << 8) \| p[1];
8158			}
8159
8160			static forceinline u32
8161			get_unaligned_le32(const u8 *p)
8162			{
8163			if (UNALIGNED_ACCESS_IS_FAST)
8164			return le32_bswap(load_u32_unaligned(p));
8165			else
8166			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
8167			((u32)p[1] << 8) \| p[0];
8168			}
8169
8170			static forceinline u32
8171			get_unaligned_be32(const u8 *p)
8172			{
8173			if (UNALIGNED_ACCESS_IS_FAST)
8174			return be32_bswap(load_u32_unaligned(p));
8175			else
8176			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
8177			((u32)p[2] << 8) \| p[3];
8178			}
8179
8180			static forceinline u64
8181			get_unaligned_le64(const u8 *p)
8182			{
8183			if (UNALIGNED_ACCESS_IS_FAST)
8184			return le64_bswap(load_u64_unaligned(p));
8185			else
8186			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
8187			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
8188			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
8189			((u64)p[1] << 8) \| p[0];
8190			}
8191
8192			static forceinline machine_word_t
8193			get_unaligned_leword(const u8 *p)
8194			{
8195			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
8196			if (WORDBITS == 32)
8197			return get_unaligned_le32(p);
8198			else
8199			return get_unaligned_le64(p);
8200			}
8201
8202
8203
8204			static forceinline void
8205			put_unaligned_le16(u16 v, u8 *p)
8206			{
8207			if (UNALIGNED_ACCESS_IS_FAST) {
8208			store_u16_unaligned(le16_bswap(v), p);
8209			} else {
8210			p[0] = (u8)(v >> 0);
8211			p[1] = (u8)(v >> 8);
8212			}
8213			}
8214
8215			static forceinline void
8216			put_unaligned_be16(u16 v, u8 *p)
8217			{
8218			if (UNALIGNED_ACCESS_IS_FAST) {
8219			store_u16_unaligned(be16_bswap(v), p);
8220			} else {
8221			p[0] = (u8)(v >> 8);
8222			p[1] = (u8)(v >> 0);
8223			}
8224			}
8225
8226			static forceinline void
8227			put_unaligned_le32(u32 v, u8 *p)
8228			{
8229			if (UNALIGNED_ACCESS_IS_FAST) {
8230			store_u32_unaligned(le32_bswap(v), p);
8231			} else {
8232			p[0] = (u8)(v >> 0);
8233			p[1] = (u8)(v >> 8);
8234			p[2] = (u8)(v >> 16);
8235			p[3] = (u8)(v >> 24);
8236			}
8237			}
8238
8239			static forceinline void
8240			put_unaligned_be32(u32 v, u8 *p)
8241			{
8242			if (UNALIGNED_ACCESS_IS_FAST) {
8243			store_u32_unaligned(be32_bswap(v), p);
8244			} else {
8245			p[0] = (u8)(v >> 24);
8246			p[1] = (u8)(v >> 16);
8247			p[2] = (u8)(v >> 8);
8248			p[3] = (u8)(v >> 0);
8249			}
8250			}
8251
8252			static forceinline void
8253			put_unaligned_le64(u64 v, u8 *p)
8254			{
8255			if (UNALIGNED_ACCESS_IS_FAST) {
8256			store_u64_unaligned(le64_bswap(v), p);
8257			} else {
8258			p[0] = (u8)(v >> 0);
8259			p[1] = (u8)(v >> 8);
8260			p[2] = (u8)(v >> 16);
8261			p[3] = (u8)(v >> 24);
8262			p[4] = (u8)(v >> 32);
8263			p[5] = (u8)(v >> 40);
8264			p[6] = (u8)(v >> 48);
8265			p[7] = (u8)(v >> 56);
8266			}
8267			}
8268
8269			static forceinline void
8270			put_unaligned_leword(machine_word_t v, u8 *p)
8271			{
8272			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
8273			if (WORDBITS == 32)
8274			put_unaligned_le32(v, p);
8275			else
8276			put_unaligned_le64(v, p);
8277			}
8278
8279
8280
8281
8282
8283
8284
8285			static forceinline unsigned
8286			bsr32(u32 v)
8287			{
8288			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
8289			return 31 - __builtin_clz(v);
8290			#elif defined(_MSC_VER)
8291			unsigned long i;
8292
8293			_BitScanReverse(&i, v);
8294			return i;
8295			#else
8296			unsigned i = 0;
8297
8298			while ((v >>= 1) != 0)
8299			i++;
8300			return i;
8301			#endif
8302			}
8303
8304			static forceinline unsigned
8305			bsr64(u64 v)
8306			{
8307			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
8308			return 63 - __builtin_clzll(v);
8309			#elif defined(_MSC_VER) && defined(_WIN64)
8310			unsigned long i;
8311
8312			_BitScanReverse64(&i, v);
8313			return i;
8314			#else
8315			unsigned i = 0;
8316
8317			while ((v >>= 1) != 0)
8318			i++;
8319			return i;
8320			#endif
8321			}
8322
8323			static forceinline unsigned
8324			bsrw(machine_word_t v)
8325			{
8326			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
8327			if (WORDBITS == 32)
8328			return bsr32(v);
8329			else
8330			return bsr64(v);
8331			}
8332
8333
8334
8335			static forceinline unsigned
8336			bsf32(u32 v)
8337			{
8338			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
8339			return __builtin_ctz(v);
8340			#elif defined(_MSC_VER)
8341			unsigned long i;
8342
8343			_BitScanForward(&i, v);
8344			return i;
8345			#else
8346			unsigned i = 0;
8347
8348			for (; (v & 1) == 0; v >>= 1)
8349			i++;
8350			return i;
8351			#endif
8352			}
8353
8354			static forceinline unsigned
8355			bsf64(u64 v)
8356			{
8357			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
8358			return __builtin_ctzll(v);
8359			#elif defined(_MSC_VER) && defined(_WIN64)
8360			unsigned long i;
8361
8362			_BitScanForward64(&i, v);
8363			return i;
8364			#else
8365			unsigned i = 0;
8366
8367			for (; (v & 1) == 0; v >>= 1)
8368			i++;
8369			return i;
8370			#endif
8371			}
8372
8373			static forceinline unsigned
8374			bsfw(machine_word_t v)
8375			{
8376			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
8377			if (WORDBITS == 32)
8378			return bsf32(v);
8379			else
8380			return bsf64(v);
8381			}
8382
8383
8384			#undef rbit32
8385			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
8386			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
8387			static forceinline u32
8388			rbit32(u32 v)
8389			{
8390			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
8391			return v;
8392			}
8393			#define rbit32 rbit32
8394			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
8395			static forceinline u32
8396			rbit32(u32 v)
8397			{
8398			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
8399			return v;
8400			}
8401			#define rbit32 rbit32
8402			#endif
8403
8404			#endif
8405
8406
8407			typedef void (malloc_func_t)(size_t);
8408			typedef void (free_func_t)(void );
8409
8410			extern malloc_func_t libdeflate_default_malloc_func;
8411			extern free_func_t libdeflate_default_free_func;
8412
8413			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
8414			size_t alignment, size_t size);
8415			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
8416
8417			#ifdef FREESTANDING
8418
8419			void memset(void s, int c, size_t n);
8420			#define memset(s, c, n) __builtin_memset((s), (c), (n))
8421
8422			void memcpy(void dest, const void *src, size_t n);
8423			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
8424
8425			void memmove(void dest, const void *src, size_t n);
8426			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
8427
8428			int memcmp(const void s1, const void s2, size_t n);
8429			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
8430
8431			#undef LIBDEFLATE_ENABLE_ASSERTIONS
8432			#else
8433			# include
8434
8435			# ifdef __clang_analyzer__
8436			# define LIBDEFLATE_ENABLE_ASSERTIONS
8437			# endif
8438			#endif
8439
8440
8441			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
8442			NORETURN void
8443			libdeflate_assertion_failed(const char expr, const char file, int line);
8444			#define ASSERT(expr) { if (unlikely(!(expr))) \
8445			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
8446			#else
8447			#define ASSERT(expr) (void)(expr)
8448			#endif
8449
8450			#define CONCAT_IMPL(a, b) a##b
8451			#define CONCAT(a, b) CONCAT_IMPL(a, b)
8452			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
8453
8454			#endif
8455
8456
8457			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
8458
8459			#define ARM_CPU_FEATURE_NEON (1 << 0)
8460			#define ARM_CPU_FEATURE_PMULL (1 << 1)
8461
8462			#define ARM_CPU_FEATURE_PREFER_PMULL (1 << 2)
8463			#define ARM_CPU_FEATURE_CRC32 (1 << 3)
8464			#define ARM_CPU_FEATURE_SHA3 (1 << 4)
8465			#define ARM_CPU_FEATURE_DOTPROD (1 << 5)
8466
8467			#if !defined(FREESTANDING) && \
8468			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
8469			(defined(__linux__) \|\| \
8470			(defined(__APPLE__) && defined(ARCH_ARM64)) \|\| \
8471			(defined(_WIN32) && defined(ARCH_ARM64)))
8472
8473			# define ARM_CPU_FEATURES_KNOWN (1U << 31)
8474			extern volatile u32 libdeflate_arm_cpu_features;
8475
8476			void libdeflate_init_arm_cpu_features(void);
8477
8478			static inline u32 get_arm_cpu_features(void)
8479			{
8480			if (libdeflate_arm_cpu_features == 0)
8481			libdeflate_init_arm_cpu_features();
8482			return libdeflate_arm_cpu_features;
8483			}
8484			#else
8485			static inline u32 get_arm_cpu_features(void) { return 0; }
8486			#endif
8487
8488
8489			#if defined(__ARM_NEON) \|\| (defined(_MSC_VER) && defined(ARCH_ARM64))
8490			# define HAVE_NEON(features) 1
8491			# define HAVE_NEON_NATIVE 1
8492			#else
8493			# define HAVE_NEON(features) ((features) & ARM_CPU_FEATURE_NEON)
8494			# define HAVE_NEON_NATIVE 0
8495			#endif
8496
8497			#if (defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
8498			(HAVE_NEON_NATIVE \|\| (GCC_PREREQ(6, 1) && defined(__ARM_FP)))
8499			# define HAVE_NEON_INTRIN 1
8500			# include
8501			#else
8502			# define HAVE_NEON_INTRIN 0
8503			#endif
8504
8505
8506			#ifdef __ARM_FEATURE_CRYPTO
8507			# define HAVE_PMULL(features) 1
8508			#else
8509			# define HAVE_PMULL(features) ((features) & ARM_CPU_FEATURE_PMULL)
8510			#endif
8511			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
8512			(GCC_PREREQ(7, 1) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
8513			CPU_IS_LITTLE_ENDIAN()
8514			# define HAVE_PMULL_INTRIN 1
8515
8516			# ifdef _MSC_VER
8517			# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b))
8518			# else
8519			# define compat_vmull_p64(a, b) vmull_p64((a), (b))
8520			# endif
8521			#else
8522			# define HAVE_PMULL_INTRIN 0
8523			#endif
8524
8525
8526			#ifdef __ARM_FEATURE_CRC32
8527			# define HAVE_CRC32(features) 1
8528			#else
8529			# define HAVE_CRC32(features) ((features) & ARM_CPU_FEATURE_CRC32)
8530			#endif
8531			#if defined(ARCH_ARM64) && \
8532			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER))
8533			# define HAVE_CRC32_INTRIN 1
8534			# if defined(__GNUC__) \|\| defined(__clang__)
8535			# include
8536			# endif
8537
8538			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
8539			!defined(__ARM_FEATURE_CRC32)
8540			# undef __crc32b
8541			# define __crc32b(a, b) \
8542			({ uint32_t res; \
8543			__asm__("crc32b %w0, %w1, %w2" \
8544			: "=r" (res) : "r" (a), "r" (b)); \
8545			res; })
8546			# undef __crc32h
8547			# define __crc32h(a, b) \
8548			({ uint32_t res; \
8549			__asm__("crc32h %w0, %w1, %w2" \
8550			: "=r" (res) : "r" (a), "r" (b)); \
8551			res; })
8552			# undef __crc32w
8553			# define __crc32w(a, b) \
8554			({ uint32_t res; \
8555			__asm__("crc32w %w0, %w1, %w2" \
8556			: "=r" (res) : "r" (a), "r" (b)); \
8557			res; })
8558			# undef __crc32d
8559			# define __crc32d(a, b) \
8560			({ uint32_t res; \
8561			__asm__("crc32x %w0, %w1, %2" \
8562			: "=r" (res) : "r" (a), "r" (b)); \
8563			res; })
8564			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
8565			# endif
8566			#else
8567			# define HAVE_CRC32_INTRIN 0
8568			#endif
8569
8570
8571			#ifdef __ARM_FEATURE_SHA3
8572			# define HAVE_SHA3(features) 1
8573			#else
8574			# define HAVE_SHA3(features) ((features) & ARM_CPU_FEATURE_SHA3)
8575			#endif
8576			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
8577			(GCC_PREREQ(9, 1) \|\| \
8578			CLANG_PREREQ(7, 0, 10010463) )
8579			# define HAVE_SHA3_INTRIN 1
8580
8581			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
8582			!defined(__ARM_FEATURE_SHA3)
8583			# undef veor3q_u8
8584			# define veor3q_u8(a, b, c) \
8585			({ uint8x16_t res; \
8586			__asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b" \
8587			: "=w" (res) : "w" (a), "w" (b), "w" (c)); \
8588			res; })
8589			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
8590			# endif
8591			#else
8592			# define HAVE_SHA3_INTRIN 0
8593			#endif
8594
8595
8596			#ifdef __ARM_FEATURE_DOTPROD
8597			# define HAVE_DOTPROD(features) 1
8598			#else
8599			# define HAVE_DOTPROD(features) ((features) & ARM_CPU_FEATURE_DOTPROD)
8600			#endif
8601			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
8602			(GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(7, 0, 10010000) \|\| defined(_MSC_VER))
8603			# define HAVE_DOTPROD_INTRIN 1
8604
8605			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
8606			!defined(__ARM_FEATURE_DOTPROD)
8607			# undef vdotq_u32
8608			# define vdotq_u32(a, b, c) \
8609			({ uint32x4_t res = (a); \
8610			__asm__("udot %0.4s, %1.16b, %2.16b" \
8611			: "+w" (res) : "w" (b), "w" (c)); \
8612			res; })
8613			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
8614			# endif
8615			#else
8616			# define HAVE_DOTPROD_INTRIN 0
8617			#endif
8618
8619			#endif
8620
8621			#endif
8622
8623
8624
8625			#if HAVE_CRC32_INTRIN
8626			# ifdef __clang__
8627			# define ATTRIBUTES _target_attribute("crc")
8628			# else
8629			# define ATTRIBUTES _target_attribute("+crc")
8630			# endif
8631
8632
8633			static forceinline ATTRIBUTES u32
8634			combine_crcs_slow(u32 crc0, u32 crc1, u32 crc2, u32 crc3)
8635			{
8636			u64 res0 = 0, res1 = 0, res2 = 0;
8637			int i;
8638
8639
8640			for (i = 0; i < 32; i++) {
8641			if (CRC32_FIXED_CHUNK_MULT_3 & (1U << i))
8642			res0 ^= (u64)crc0 << i;
8643			if (CRC32_FIXED_CHUNK_MULT_2 & (1U << i))
8644			res1 ^= (u64)crc1 << i;
8645			if (CRC32_FIXED_CHUNK_MULT_1 & (1U << i))
8646			res2 ^= (u64)crc2 << i;
8647			}
8648
8649			return __crc32d(0, res0 ^ res1 ^ res2) ^ crc3;
8650			}
8651
8652			#define crc32_arm_crc crc32_arm_crc
8653			static ATTRIBUTES u32
8654			crc32_arm_crc(u32 crc, const u8 *p, size_t len)
8655			{
8656			if (len >= 64) {
8657			const size_t align = -(uintptr_t)p & 7;
8658
8659
8660			if (align) {
8661			if (align & 1)
8662			crc = __crc32b(crc, *p++);
8663			if (align & 2) {
8664			crc = __crc32h(crc, le16_bswap((u16 )p));
8665			p += 2;
8666			}
8667			if (align & 4) {
8668			crc = __crc32w(crc, le32_bswap((u32 )p));
8669			p += 4;
8670			}
8671			len -= align;
8672			}
8673
8674			while (len >= CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN) {
8675			const u64 wp0 = (const u64 )p;
8676			const u64 * const wp0_end =
8677			(const u64 *)(p + CRC32_FIXED_CHUNK_LEN);
8678			u32 crc1 = 0, crc2 = 0, crc3 = 0;
8679
8680			STATIC_ASSERT(CRC32_NUM_CHUNKS == 4);
8681			STATIC_ASSERT(CRC32_FIXED_CHUNK_LEN % (4 * 8) == 0);
8682			do {
8683			prefetchr(&wp0[64 + 0*CRC32_FIXED_CHUNK_LEN/8]);
8684			prefetchr(&wp0[64 + 1*CRC32_FIXED_CHUNK_LEN/8]);
8685			prefetchr(&wp0[64 + 2*CRC32_FIXED_CHUNK_LEN/8]);
8686			prefetchr(&wp0[64 + 3*CRC32_FIXED_CHUNK_LEN/8]);
8687			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
8688			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
8689			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
8690			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
8691			wp0++;
8692			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
8693			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
8694			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
8695			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
8696			wp0++;
8697			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
8698			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
8699			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
8700			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
8701			wp0++;
8702			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_FIXED_CHUNK_LEN/8]));
8703			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_FIXED_CHUNK_LEN/8]));
8704			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_FIXED_CHUNK_LEN/8]));
8705			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_FIXED_CHUNK_LEN/8]));
8706			wp0++;
8707			} while (wp0 != wp0_end);
8708			crc = combine_crcs_slow(crc, crc1, crc2, crc3);
8709			p += CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN;
8710			len -= CRC32_NUM_CHUNKS * CRC32_FIXED_CHUNK_LEN;
8711			}
8712
8713			while (len >= 64) {
8714			crc = __crc32d(crc, le64_bswap((u64 )(p + 0)));
8715			crc = __crc32d(crc, le64_bswap((u64 )(p + 8)));
8716			crc = __crc32d(crc, le64_bswap((u64 )(p + 16)));
8717			crc = __crc32d(crc, le64_bswap((u64 )(p + 24)));
8718			crc = __crc32d(crc, le64_bswap((u64 )(p + 32)));
8719			crc = __crc32d(crc, le64_bswap((u64 )(p + 40)));
8720			crc = __crc32d(crc, le64_bswap((u64 )(p + 48)));
8721			crc = __crc32d(crc, le64_bswap((u64 )(p + 56)));
8722			p += 64;
8723			len -= 64;
8724			}
8725			}
8726			if (len & 32) {
8727			crc = __crc32d(crc, get_unaligned_le64(p + 0));
8728			crc = __crc32d(crc, get_unaligned_le64(p + 8));
8729			crc = __crc32d(crc, get_unaligned_le64(p + 16));
8730			crc = __crc32d(crc, get_unaligned_le64(p + 24));
8731			p += 32;
8732			}
8733			if (len & 16) {
8734			crc = __crc32d(crc, get_unaligned_le64(p + 0));
8735			crc = __crc32d(crc, get_unaligned_le64(p + 8));
8736			p += 16;
8737			}
8738			if (len & 8) {
8739			crc = __crc32d(crc, get_unaligned_le64(p));
8740			p += 8;
8741			}
8742			if (len & 4) {
8743			crc = __crc32w(crc, get_unaligned_le32(p));
8744			p += 4;
8745			}
8746			if (len & 2) {
8747			crc = __crc32h(crc, get_unaligned_le16(p));
8748			p += 2;
8749			}
8750			if (len & 1)
8751			crc = __crc32b(crc, *p);
8752			return crc;
8753			}
8754			#undef ATTRIBUTES
8755			#endif
8756
8757
8758			#if HAVE_CRC32_INTRIN && HAVE_PMULL_INTRIN
8759			# ifdef __clang__
8760			# define ATTRIBUTES _target_attribute("crc,aes")
8761			# else
8762			# define ATTRIBUTES _target_attribute("+crc,+crypto")
8763			# endif
8764
8765
8766			static forceinline ATTRIBUTES u64
8767			clmul_u32(u32 a, u32 b)
8768			{
8769			uint64x2_t res = vreinterpretq_u64_p128(
8770			compat_vmull_p64((poly64_t)a, (poly64_t)b));
8771
8772			return vgetq_lane_u64(res, 0);
8773			}
8774
8775
8776			static forceinline ATTRIBUTES u32
8777			combine_crcs_fast(u32 crc0, u32 crc1, u32 crc2, u32 crc3, size_t i)
8778			{
8779			u64 res0 = clmul_u32(crc0, crc32_mults_for_chunklen[i][0]);
8780			u64 res1 = clmul_u32(crc1, crc32_mults_for_chunklen[i][1]);
8781			u64 res2 = clmul_u32(crc2, crc32_mults_for_chunklen[i][2]);
8782
8783			return __crc32d(0, res0 ^ res1 ^ res2) ^ crc3;
8784			}
8785
8786			#define crc32_arm_crc_pmullcombine crc32_arm_crc_pmullcombine
8787			static ATTRIBUTES u32
8788			crc32_arm_crc_pmullcombine(u32 crc, const u8 *p, size_t len)
8789			{
8790			const size_t align = -(uintptr_t)p & 7;
8791
8792			if (len >= align + CRC32_NUM_CHUNKS * CRC32_MIN_VARIABLE_CHUNK_LEN) {
8793
8794			if (align) {
8795			if (align & 1)
8796			crc = __crc32b(crc, *p++);
8797			if (align & 2) {
8798			crc = __crc32h(crc, le16_bswap((u16 )p));
8799			p += 2;
8800			}
8801			if (align & 4) {
8802			crc = __crc32w(crc, le32_bswap((u32 )p));
8803			p += 4;
8804			}
8805			len -= align;
8806			}
8807
8808			while (len >= CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN) {
8809			const u64 wp0 = (const u64 )p;
8810			const u64 * const wp0_end =
8811			(const u64 *)(p + CRC32_MAX_VARIABLE_CHUNK_LEN);
8812			u32 crc1 = 0, crc2 = 0, crc3 = 0;
8813
8814			STATIC_ASSERT(CRC32_NUM_CHUNKS == 4);
8815			STATIC_ASSERT(CRC32_MAX_VARIABLE_CHUNK_LEN % (4 * 8) == 0);
8816			do {
8817			prefetchr(&wp0[64 + 0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
8818			prefetchr(&wp0[64 + 1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
8819			prefetchr(&wp0[64 + 2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
8820			prefetchr(&wp0[64 + 3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]);
8821			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8822			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8823			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8824			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8825			wp0++;
8826			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8827			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8828			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8829			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8830			wp0++;
8831			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8832			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8833			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8834			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8835			wp0++;
8836			crc = __crc32d(crc, le64_bswap(wp0[0*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8837			crc1 = __crc32d(crc1, le64_bswap(wp0[1*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8838			crc2 = __crc32d(crc2, le64_bswap(wp0[2*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8839			crc3 = __crc32d(crc3, le64_bswap(wp0[3*CRC32_MAX_VARIABLE_CHUNK_LEN/8]));
8840			wp0++;
8841			} while (wp0 != wp0_end);
8842			crc = combine_crcs_fast(crc, crc1, crc2, crc3,
8843			ARRAY_LEN(crc32_mults_for_chunklen) - 1);
8844			p += CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN;
8845			len -= CRC32_NUM_CHUNKS * CRC32_MAX_VARIABLE_CHUNK_LEN;
8846			}
8847
8848			if (len >= CRC32_NUM_CHUNKS * CRC32_MIN_VARIABLE_CHUNK_LEN) {
8849			const size_t i = len / (CRC32_NUM_CHUNKS *
8850			CRC32_MIN_VARIABLE_CHUNK_LEN);
8851			const size_t chunk_len =
8852			i * CRC32_MIN_VARIABLE_CHUNK_LEN;
8853			const u64 wp0 = (const u64 )(p + 0*chunk_len);
8854			const u64 wp1 = (const u64 )(p + 1*chunk_len);
8855			const u64 wp2 = (const u64 )(p + 2*chunk_len);
8856			const u64 wp3 = (const u64 )(p + 3*chunk_len);
8857			const u64 * const wp0_end = wp1;
8858			u32 crc1 = 0, crc2 = 0, crc3 = 0;
8859
8860			STATIC_ASSERT(CRC32_NUM_CHUNKS == 4);
8861			STATIC_ASSERT(CRC32_MIN_VARIABLE_CHUNK_LEN % (4 * 8) == 0);
8862			do {
8863			prefetchr(wp0 + 64);
8864			prefetchr(wp1 + 64);
8865			prefetchr(wp2 + 64);
8866			prefetchr(wp3 + 64);
8867			crc = __crc32d(crc, le64_bswap(*wp0++));
8868			crc1 = __crc32d(crc1, le64_bswap(*wp1++));
8869			crc2 = __crc32d(crc2, le64_bswap(*wp2++));
8870			crc3 = __crc32d(crc3, le64_bswap(*wp3++));
8871			crc = __crc32d(crc, le64_bswap(*wp0++));
8872			crc1 = __crc32d(crc1, le64_bswap(*wp1++));
8873			crc2 = __crc32d(crc2, le64_bswap(*wp2++));
8874			crc3 = __crc32d(crc3, le64_bswap(*wp3++));
8875			crc = __crc32d(crc, le64_bswap(*wp0++));
8876			crc1 = __crc32d(crc1, le64_bswap(*wp1++));
8877			crc2 = __crc32d(crc2, le64_bswap(*wp2++));
8878			crc3 = __crc32d(crc3, le64_bswap(*wp3++));
8879			crc = __crc32d(crc, le64_bswap(*wp0++));
8880			crc1 = __crc32d(crc1, le64_bswap(*wp1++));
8881			crc2 = __crc32d(crc2, le64_bswap(*wp2++));
8882			crc3 = __crc32d(crc3, le64_bswap(*wp3++));
8883			} while (wp0 != wp0_end);
8884			crc = combine_crcs_fast(crc, crc1, crc2, crc3, i);
8885			p += CRC32_NUM_CHUNKS * chunk_len;
8886			len -= CRC32_NUM_CHUNKS * chunk_len;
8887			}
8888
8889			while (len >= 32) {
8890			crc = __crc32d(crc, le64_bswap((u64 )(p + 0)));
8891			crc = __crc32d(crc, le64_bswap((u64 )(p + 8)));
8892			crc = __crc32d(crc, le64_bswap((u64 )(p + 16)));
8893			crc = __crc32d(crc, le64_bswap((u64 )(p + 24)));
8894			p += 32;
8895			len -= 32;
8896			}
8897			} else {
8898			while (len >= 32) {
8899			crc = __crc32d(crc, get_unaligned_le64(p + 0));
8900			crc = __crc32d(crc, get_unaligned_le64(p + 8));
8901			crc = __crc32d(crc, get_unaligned_le64(p + 16));
8902			crc = __crc32d(crc, get_unaligned_le64(p + 24));
8903			p += 32;
8904			len -= 32;
8905			}
8906			}
8907			if (len & 16) {
8908			crc = __crc32d(crc, get_unaligned_le64(p + 0));
8909			crc = __crc32d(crc, get_unaligned_le64(p + 8));
8910			p += 16;
8911			}
8912			if (len & 8) {
8913			crc = __crc32d(crc, get_unaligned_le64(p));
8914			p += 8;
8915			}
8916			if (len & 4) {
8917			crc = __crc32w(crc, get_unaligned_le32(p));
8918			p += 4;
8919			}
8920			if (len & 2) {
8921			crc = __crc32h(crc, get_unaligned_le16(p));
8922			p += 2;
8923			}
8924			if (len & 1)
8925			crc = __crc32b(crc, *p);
8926			return crc;
8927			}
8928			#undef ATTRIBUTES
8929			#endif
8930
8931
8932			#if HAVE_PMULL_INTRIN
8933			# define crc32_arm_pmullx4 crc32_arm_pmullx4
8934			# define SUFFIX _pmullx4
8935			# ifdef __clang__
8936
8937			# define ATTRIBUTES _target_attribute("aes")
8938			# else
8939
8940			# define ATTRIBUTES _target_attribute("+crypto")
8941			# endif
8942			# define ENABLE_EOR3 0
8943			/* #include "arm-crc32_pmull_helpers.h" */
8944
8945
8946
8947
8948
8949			#undef u32_to_bytevec
8950			static forceinline ATTRIBUTES uint8x16_t
8951			ADD_SUFFIX(u32_to_bytevec)(u32 a)
8952			{
8953			return vreinterpretq_u8_u32(vsetq_lane_u32(a, vdupq_n_u32(0), 0));
8954			}
8955			#define u32_to_bytevec ADD_SUFFIX(u32_to_bytevec)
8956
8957
8958			#undef load_multipliers
8959			static forceinline ATTRIBUTES poly64x2_t
8960			ADD_SUFFIX(load_multipliers)(const u64 p[2])
8961			{
8962			return vreinterpretq_p64_u64(vld1q_u64(p));
8963			}
8964			#define load_multipliers ADD_SUFFIX(load_multipliers)
8965
8966
8967			#undef clmul_low
8968			static forceinline ATTRIBUTES uint8x16_t
8969			ADD_SUFFIX(clmul_low)(uint8x16_t a, poly64x2_t b)
8970			{
8971			return vreinterpretq_u8_p128(
8972			compat_vmull_p64(vgetq_lane_p64(vreinterpretq_p64_u8(a), 0),
8973			vgetq_lane_p64(b, 0)));
8974			}
8975			#define clmul_low ADD_SUFFIX(clmul_low)
8976
8977
8978			#undef clmul_high
8979			static forceinline ATTRIBUTES uint8x16_t
8980			ADD_SUFFIX(clmul_high)(uint8x16_t a, poly64x2_t b)
8981			{
8982			#ifdef __clang__
8983
8984			uint8x16_t res;
8985
8986			__asm__("pmull2 %0.1q, %1.2d, %2.2d" : "=w" (res) : "w" (a), "w" (b));
8987			return res;
8988			#else
8989			return vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(a), b));
8990			#endif
8991			}
8992			#define clmul_high ADD_SUFFIX(clmul_high)
8993
8994			#undef eor3
8995			static forceinline ATTRIBUTES uint8x16_t
8996			ADD_SUFFIX(eor3)(uint8x16_t a, uint8x16_t b, uint8x16_t c)
8997			{
8998			#if ENABLE_EOR3
8999			return veor3q_u8(a, b, c);
9000			#else
9001			return veorq_u8(veorq_u8(a, b), c);
9002			#endif
9003			}
9004			#define eor3 ADD_SUFFIX(eor3)
9005
9006			#undef fold_vec
9007			static forceinline ATTRIBUTES uint8x16_t
9008			ADD_SUFFIX(fold_vec)(uint8x16_t src, uint8x16_t dst, poly64x2_t multipliers)
9009			{
9010			uint8x16_t a = clmul_low(src, multipliers);
9011			uint8x16_t b = clmul_high(src, multipliers);
9012
9013			return eor3(a, b, dst);
9014			}
9015			#define fold_vec ADD_SUFFIX(fold_vec)
9016
9017
9018			#undef fold_partial_vec
9019			static forceinline ATTRIBUTES MAYBE_UNUSED uint8x16_t
9020			ADD_SUFFIX(fold_partial_vec)(uint8x16_t v, const u8 *p, size_t len,
9021			poly64x2_t multipliers_1)
9022			{
9023
9024			static const u8 shift_tab[48] = {
9025			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9026			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9027			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
9028			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
9029			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9030			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9031			};
9032			const uint8x16_t lshift = vld1q_u8(&shift_tab[len]);
9033			const uint8x16_t rshift = vld1q_u8(&shift_tab[len + 16]);
9034			uint8x16_t x0, x1, bsl_mask;
9035
9036
9037			x0 = vqtbl1q_u8(v, lshift);
9038
9039
9040			bsl_mask = vreinterpretq_u8_s8(
9041			vshrq_n_s8(vreinterpretq_s8_u8(rshift), 7));
9042
9043
9044			x1 = vbslq_u8(bsl_mask ,
9045			vld1q_u8(p + len - 16), vqtbl1q_u8(v, rshift));
9046
9047			return fold_vec(x0, x1, multipliers_1);
9048			}
9049			#define fold_partial_vec ADD_SUFFIX(fold_partial_vec)
9050
9051
9052			static ATTRIBUTES u32
9053			crc32_arm_pmullx4(u32 crc, const u8 *p, size_t len)
9054			{
9055			static const u64 _aligned_attribute(16) mults[3][2] = {
9056			{ CRC32_X159_MODG, CRC32_X95_MODG },
9057			{ CRC32_X543_MODG, CRC32_X479_MODG },
9058			{ CRC32_X287_MODG, CRC32_X223_MODG },
9059			};
9060			static const u64 _aligned_attribute(16) barrett_consts[3][2] = {
9061			{ CRC32_X95_MODG, },
9062			{ CRC32_BARRETT_CONSTANT_1, },
9063			{ CRC32_BARRETT_CONSTANT_2, },
9064			};
9065			const poly64x2_t multipliers_1 = load_multipliers(mults[0]);
9066			uint8x16_t v0, v1, v2, v3;
9067
9068			if (len < 64 + 15) {
9069			if (len < 16)
9070			return crc32_slice1(crc, p, len);
9071			v0 = veorq_u8(vld1q_u8(p), u32_to_bytevec(crc));
9072			p += 16;
9073			len -= 16;
9074			while (len >= 16) {
9075			v0 = fold_vec(v0, vld1q_u8(p), multipliers_1);
9076			p += 16;
9077			len -= 16;
9078			}
9079			} else {
9080			const poly64x2_t multipliers_4 = load_multipliers(mults[1]);
9081			const poly64x2_t multipliers_2 = load_multipliers(mults[2]);
9082			const size_t align = -(uintptr_t)p & 15;
9083			const uint8x16_t *vp;
9084
9085			v0 = veorq_u8(vld1q_u8(p), u32_to_bytevec(crc));
9086			p += 16;
9087
9088			if (align) {
9089			v0 = fold_partial_vec(v0, p, align, multipliers_1);
9090			p += align;
9091			len -= align;
9092			}
9093			vp = (const uint8x16_t *)p;
9094			v1 = *vp++;
9095			v2 = *vp++;
9096			v3 = *vp++;
9097			while (len >= 64 + 64) {
9098			v0 = fold_vec(v0, *vp++, multipliers_4);
9099			v1 = fold_vec(v1, *vp++, multipliers_4);
9100			v2 = fold_vec(v2, *vp++, multipliers_4);
9101			v3 = fold_vec(v3, *vp++, multipliers_4);
9102			len -= 64;
9103			}
9104			v0 = fold_vec(v0, v2, multipliers_2);
9105			v1 = fold_vec(v1, v3, multipliers_2);
9106			if (len & 32) {
9107			v0 = fold_vec(v0, *vp++, multipliers_2);
9108			v1 = fold_vec(v1, *vp++, multipliers_2);
9109			}
9110			v0 = fold_vec(v0, v1, multipliers_1);
9111			if (len & 16)
9112			v0 = fold_vec(v0, *vp++, multipliers_1);
9113			p = (const u8 *)vp;
9114			len &= 15;
9115			}
9116
9117
9118			if (len)
9119			v0 = fold_partial_vec(v0, p, len, multipliers_1);
9120
9121
9122			v0 = veorq_u8(clmul_low(v0, load_multipliers(barrett_consts[0])),
9123			vextq_u8(v0, vdupq_n_u8(0), 8));
9124			v1 = clmul_low(v0, load_multipliers(barrett_consts[1]));
9125			v1 = clmul_low(v1, load_multipliers(barrett_consts[2]));
9126			v0 = veorq_u8(v0, v1);
9127			return vgetq_lane_u32(vreinterpretq_u32_u8(v0), 2);
9128			}
9129			#undef SUFFIX
9130			#undef ATTRIBUTES
9131			#undef ENABLE_EOR3
9132			#endif
9133
9134
9135			#if HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN
9136			# define crc32_arm_pmullx12_crc crc32_arm_pmullx12_crc
9137			# define SUFFIX _pmullx12_crc
9138			# ifdef __clang__
9139			# define ATTRIBUTES _target_attribute("aes,crc")
9140			# else
9141			# define ATTRIBUTES _target_attribute("+crypto,+crc")
9142			# endif
9143			# define ENABLE_EOR3 0
9144			/* #include "arm-crc32_pmull_wide.h" */
9145
9146
9147
9148
9149			/* #include "arm-crc32_pmull_helpers.h" */
9150
9151
9152
9153
9154
9155			#undef u32_to_bytevec
9156			static forceinline ATTRIBUTES uint8x16_t
9157			ADD_SUFFIX(u32_to_bytevec)(u32 a)
9158			{
9159			return vreinterpretq_u8_u32(vsetq_lane_u32(a, vdupq_n_u32(0), 0));
9160			}
9161			#define u32_to_bytevec ADD_SUFFIX(u32_to_bytevec)
9162
9163
9164			#undef load_multipliers
9165			static forceinline ATTRIBUTES poly64x2_t
9166			ADD_SUFFIX(load_multipliers)(const u64 p[2])
9167			{
9168			return vreinterpretq_p64_u64(vld1q_u64(p));
9169			}
9170			#define load_multipliers ADD_SUFFIX(load_multipliers)
9171
9172
9173			#undef clmul_low
9174			static forceinline ATTRIBUTES uint8x16_t
9175			ADD_SUFFIX(clmul_low)(uint8x16_t a, poly64x2_t b)
9176			{
9177			return vreinterpretq_u8_p128(
9178			compat_vmull_p64(vgetq_lane_p64(vreinterpretq_p64_u8(a), 0),
9179			vgetq_lane_p64(b, 0)));
9180			}
9181			#define clmul_low ADD_SUFFIX(clmul_low)
9182
9183
9184			#undef clmul_high
9185			static forceinline ATTRIBUTES uint8x16_t
9186			ADD_SUFFIX(clmul_high)(uint8x16_t a, poly64x2_t b)
9187			{
9188			#ifdef __clang__
9189
9190			uint8x16_t res;
9191
9192			__asm__("pmull2 %0.1q, %1.2d, %2.2d" : "=w" (res) : "w" (a), "w" (b));
9193			return res;
9194			#else
9195			return vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(a), b));
9196			#endif
9197			}
9198			#define clmul_high ADD_SUFFIX(clmul_high)
9199
9200			#undef eor3
9201			static forceinline ATTRIBUTES uint8x16_t
9202			ADD_SUFFIX(eor3)(uint8x16_t a, uint8x16_t b, uint8x16_t c)
9203			{
9204			#if ENABLE_EOR3
9205			return veor3q_u8(a, b, c);
9206			#else
9207			return veorq_u8(veorq_u8(a, b), c);
9208			#endif
9209			}
9210			#define eor3 ADD_SUFFIX(eor3)
9211
9212			#undef fold_vec
9213			static forceinline ATTRIBUTES uint8x16_t
9214			ADD_SUFFIX(fold_vec)(uint8x16_t src, uint8x16_t dst, poly64x2_t multipliers)
9215			{
9216			uint8x16_t a = clmul_low(src, multipliers);
9217			uint8x16_t b = clmul_high(src, multipliers);
9218
9219			return eor3(a, b, dst);
9220			}
9221			#define fold_vec ADD_SUFFIX(fold_vec)
9222
9223
9224			#undef fold_partial_vec
9225			static forceinline ATTRIBUTES MAYBE_UNUSED uint8x16_t
9226			ADD_SUFFIX(fold_partial_vec)(uint8x16_t v, const u8 *p, size_t len,
9227			poly64x2_t multipliers_1)
9228			{
9229
9230			static const u8 shift_tab[48] = {
9231			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9232			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9233			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
9234			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
9235			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9236			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9237			};
9238			const uint8x16_t lshift = vld1q_u8(&shift_tab[len]);
9239			const uint8x16_t rshift = vld1q_u8(&shift_tab[len + 16]);
9240			uint8x16_t x0, x1, bsl_mask;
9241
9242
9243			x0 = vqtbl1q_u8(v, lshift);
9244
9245
9246			bsl_mask = vreinterpretq_u8_s8(
9247			vshrq_n_s8(vreinterpretq_s8_u8(rshift), 7));
9248
9249
9250			x1 = vbslq_u8(bsl_mask ,
9251			vld1q_u8(p + len - 16), vqtbl1q_u8(v, rshift));
9252
9253			return fold_vec(x0, x1, multipliers_1);
9254			}
9255			#define fold_partial_vec ADD_SUFFIX(fold_partial_vec)
9256
9257
9258			static ATTRIBUTES u32
9259			ADD_SUFFIX(crc32_arm)(u32 crc, const u8 *p, size_t len)
9260			{
9261			uint8x16_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11;
9262
9263			if (len < 3 * 192) {
9264			static const u64 _aligned_attribute(16) mults[3][2] = {
9265			{ CRC32_X543_MODG, CRC32_X479_MODG },
9266			{ CRC32_X287_MODG, CRC32_X223_MODG },
9267			{ CRC32_X159_MODG, CRC32_X95_MODG },
9268			};
9269			poly64x2_t multipliers_4, multipliers_2, multipliers_1;
9270
9271			if (len < 64)
9272			goto tail;
9273			multipliers_4 = load_multipliers(mults[0]);
9274			multipliers_2 = load_multipliers(mults[1]);
9275			multipliers_1 = load_multipliers(mults[2]);
9276
9277			v0 = veorq_u8(vld1q_u8(p + 0), u32_to_bytevec(crc));
9278			v1 = vld1q_u8(p + 16);
9279			v2 = vld1q_u8(p + 32);
9280			v3 = vld1q_u8(p + 48);
9281			p += 64;
9282			len -= 64;
9283			while (len >= 64) {
9284			v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_4);
9285			v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_4);
9286			v2 = fold_vec(v2, vld1q_u8(p + 32), multipliers_4);
9287			v3 = fold_vec(v3, vld1q_u8(p + 48), multipliers_4);
9288			p += 64;
9289			len -= 64;
9290			}
9291			v0 = fold_vec(v0, v2, multipliers_2);
9292			v1 = fold_vec(v1, v3, multipliers_2);
9293			if (len >= 32) {
9294			v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_2);
9295			v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_2);
9296			p += 32;
9297			len -= 32;
9298			}
9299			v0 = fold_vec(v0, v1, multipliers_1);
9300			} else {
9301			static const u64 _aligned_attribute(16) mults[4][2] = {
9302			{ CRC32_X1567_MODG, CRC32_X1503_MODG },
9303			{ CRC32_X799_MODG, CRC32_X735_MODG },
9304			{ CRC32_X415_MODG, CRC32_X351_MODG },
9305			{ CRC32_X159_MODG, CRC32_X95_MODG },
9306			};
9307			const poly64x2_t multipliers_12 = load_multipliers(mults[0]);
9308			const poly64x2_t multipliers_6 = load_multipliers(mults[1]);
9309			const poly64x2_t multipliers_3 = load_multipliers(mults[2]);
9310			const poly64x2_t multipliers_1 = load_multipliers(mults[3]);
9311			const size_t align = -(uintptr_t)p & 15;
9312			const uint8x16_t *vp;
9313
9314
9315			if (align) {
9316			if (align & 1)
9317			crc = __crc32b(crc, *p++);
9318			if (align & 2) {
9319			crc = __crc32h(crc, le16_bswap((u16 )p));
9320			p += 2;
9321			}
9322			if (align & 4) {
9323			crc = __crc32w(crc, le32_bswap((u32 )p));
9324			p += 4;
9325			}
9326			if (align & 8) {
9327			crc = __crc32d(crc, le64_bswap((u64 )p));
9328			p += 8;
9329			}
9330			len -= align;
9331			}
9332			vp = (const uint8x16_t *)p;
9333			v0 = veorq_u8(*vp++, u32_to_bytevec(crc));
9334			v1 = *vp++;
9335			v2 = *vp++;
9336			v3 = *vp++;
9337			v4 = *vp++;
9338			v5 = *vp++;
9339			v6 = *vp++;
9340			v7 = *vp++;
9341			v8 = *vp++;
9342			v9 = *vp++;
9343			v10 = *vp++;
9344			v11 = *vp++;
9345			len -= 192;
9346
9347			do {
9348			v0 = fold_vec(v0, *vp++, multipliers_12);
9349			v1 = fold_vec(v1, *vp++, multipliers_12);
9350			v2 = fold_vec(v2, *vp++, multipliers_12);
9351			v3 = fold_vec(v3, *vp++, multipliers_12);
9352			v4 = fold_vec(v4, *vp++, multipliers_12);
9353			v5 = fold_vec(v5, *vp++, multipliers_12);
9354			v6 = fold_vec(v6, *vp++, multipliers_12);
9355			v7 = fold_vec(v7, *vp++, multipliers_12);
9356			v8 = fold_vec(v8, *vp++, multipliers_12);
9357			v9 = fold_vec(v9, *vp++, multipliers_12);
9358			v10 = fold_vec(v10, *vp++, multipliers_12);
9359			v11 = fold_vec(v11, *vp++, multipliers_12);
9360			len -= 192;
9361			} while (len >= 192);
9362
9363
9364			v0 = fold_vec(v0, v6, multipliers_6);
9365			v1 = fold_vec(v1, v7, multipliers_6);
9366			v2 = fold_vec(v2, v8, multipliers_6);
9367			v3 = fold_vec(v3, v9, multipliers_6);
9368			v4 = fold_vec(v4, v10, multipliers_6);
9369			v5 = fold_vec(v5, v11, multipliers_6);
9370			if (len >= 96) {
9371			v0 = fold_vec(v0, *vp++, multipliers_6);
9372			v1 = fold_vec(v1, *vp++, multipliers_6);
9373			v2 = fold_vec(v2, *vp++, multipliers_6);
9374			v3 = fold_vec(v3, *vp++, multipliers_6);
9375			v4 = fold_vec(v4, *vp++, multipliers_6);
9376			v5 = fold_vec(v5, *vp++, multipliers_6);
9377			len -= 96;
9378			}
9379			v0 = fold_vec(v0, v3, multipliers_3);
9380			v1 = fold_vec(v1, v4, multipliers_3);
9381			v2 = fold_vec(v2, v5, multipliers_3);
9382			if (len >= 48) {
9383			v0 = fold_vec(v0, *vp++, multipliers_3);
9384			v1 = fold_vec(v1, *vp++, multipliers_3);
9385			v2 = fold_vec(v2, *vp++, multipliers_3);
9386			len -= 48;
9387			}
9388			v0 = fold_vec(v0, v1, multipliers_1);
9389			v0 = fold_vec(v0, v2, multipliers_1);
9390			p = (const u8 *)vp;
9391			}
9392
9393			crc = __crc32d(0, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 0));
9394			crc = __crc32d(crc, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 1));
9395			tail:
9396
9397			if (len & 32) {
9398			crc = __crc32d(crc, get_unaligned_le64(p + 0));
9399			crc = __crc32d(crc, get_unaligned_le64(p + 8));
9400			crc = __crc32d(crc, get_unaligned_le64(p + 16));
9401			crc = __crc32d(crc, get_unaligned_le64(p + 24));
9402			p += 32;
9403			}
9404			if (len & 16) {
9405			crc = __crc32d(crc, get_unaligned_le64(p + 0));
9406			crc = __crc32d(crc, get_unaligned_le64(p + 8));
9407			p += 16;
9408			}
9409			if (len & 8) {
9410			crc = __crc32d(crc, get_unaligned_le64(p));
9411			p += 8;
9412			}
9413			if (len & 4) {
9414			crc = __crc32w(crc, get_unaligned_le32(p));
9415			p += 4;
9416			}
9417			if (len & 2) {
9418			crc = __crc32h(crc, get_unaligned_le16(p));
9419			p += 2;
9420			}
9421			if (len & 1)
9422			crc = __crc32b(crc, *p);
9423			return crc;
9424			}
9425
9426			#undef SUFFIX
9427			#undef ATTRIBUTES
9428			#undef ENABLE_EOR3
9429
9430			#endif
9431
9432
9433			#if HAVE_PMULL_INTRIN && HAVE_CRC32_INTRIN && HAVE_SHA3_INTRIN && \
9434			!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_SHA3)
9435			# define crc32_arm_pmullx12_crc_eor3 crc32_arm_pmullx12_crc_eor3
9436			# define SUFFIX _pmullx12_crc_eor3
9437			# ifdef __clang__
9438			# define ATTRIBUTES _target_attribute("aes,crc,sha3")
9439
9440			# elif GCC_PREREQ(14, 0) \|\| defined(__ARM_FEATURE_JCVT) \
9441			\|\| defined(__ARM_FEATURE_DOTPROD)
9442			# define ATTRIBUTES _target_attribute("+crypto,+crc,+sha3")
9443			# else
9444			# define ATTRIBUTES _target_attribute("arch=armv8.2-a+crypto+crc+sha3")
9445			# endif
9446			# define ENABLE_EOR3 1
9447			/* #include "arm-crc32_pmull_wide.h" */
9448
9449
9450
9451
9452			/* #include "arm-crc32_pmull_helpers.h" */
9453
9454
9455
9456
9457
9458			#undef u32_to_bytevec
9459			static forceinline ATTRIBUTES uint8x16_t
9460			ADD_SUFFIX(u32_to_bytevec)(u32 a)
9461			{
9462			return vreinterpretq_u8_u32(vsetq_lane_u32(a, vdupq_n_u32(0), 0));
9463			}
9464			#define u32_to_bytevec ADD_SUFFIX(u32_to_bytevec)
9465
9466
9467			#undef load_multipliers
9468			static forceinline ATTRIBUTES poly64x2_t
9469			ADD_SUFFIX(load_multipliers)(const u64 p[2])
9470			{
9471			return vreinterpretq_p64_u64(vld1q_u64(p));
9472			}
9473			#define load_multipliers ADD_SUFFIX(load_multipliers)
9474
9475
9476			#undef clmul_low
9477			static forceinline ATTRIBUTES uint8x16_t
9478			ADD_SUFFIX(clmul_low)(uint8x16_t a, poly64x2_t b)
9479			{
9480			return vreinterpretq_u8_p128(
9481			compat_vmull_p64(vgetq_lane_p64(vreinterpretq_p64_u8(a), 0),
9482			vgetq_lane_p64(b, 0)));
9483			}
9484			#define clmul_low ADD_SUFFIX(clmul_low)
9485
9486
9487			#undef clmul_high
9488			static forceinline ATTRIBUTES uint8x16_t
9489			ADD_SUFFIX(clmul_high)(uint8x16_t a, poly64x2_t b)
9490			{
9491			#ifdef __clang__
9492
9493			uint8x16_t res;
9494
9495			__asm__("pmull2 %0.1q, %1.2d, %2.2d" : "=w" (res) : "w" (a), "w" (b));
9496			return res;
9497			#else
9498			return vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(a), b));
9499			#endif
9500			}
9501			#define clmul_high ADD_SUFFIX(clmul_high)
9502
9503			#undef eor3
9504			static forceinline ATTRIBUTES uint8x16_t
9505			ADD_SUFFIX(eor3)(uint8x16_t a, uint8x16_t b, uint8x16_t c)
9506			{
9507			#if ENABLE_EOR3
9508			return veor3q_u8(a, b, c);
9509			#else
9510			return veorq_u8(veorq_u8(a, b), c);
9511			#endif
9512			}
9513			#define eor3 ADD_SUFFIX(eor3)
9514
9515			#undef fold_vec
9516			static forceinline ATTRIBUTES uint8x16_t
9517			ADD_SUFFIX(fold_vec)(uint8x16_t src, uint8x16_t dst, poly64x2_t multipliers)
9518			{
9519			uint8x16_t a = clmul_low(src, multipliers);
9520			uint8x16_t b = clmul_high(src, multipliers);
9521
9522			return eor3(a, b, dst);
9523			}
9524			#define fold_vec ADD_SUFFIX(fold_vec)
9525
9526
9527			#undef fold_partial_vec
9528			static forceinline ATTRIBUTES MAYBE_UNUSED uint8x16_t
9529			ADD_SUFFIX(fold_partial_vec)(uint8x16_t v, const u8 *p, size_t len,
9530			poly64x2_t multipliers_1)
9531			{
9532
9533			static const u8 shift_tab[48] = {
9534			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9535			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9536			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
9537			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
9538			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9539			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
9540			};
9541			const uint8x16_t lshift = vld1q_u8(&shift_tab[len]);
9542			const uint8x16_t rshift = vld1q_u8(&shift_tab[len + 16]);
9543			uint8x16_t x0, x1, bsl_mask;
9544
9545
9546			x0 = vqtbl1q_u8(v, lshift);
9547
9548
9549			bsl_mask = vreinterpretq_u8_s8(
9550			vshrq_n_s8(vreinterpretq_s8_u8(rshift), 7));
9551
9552
9553			x1 = vbslq_u8(bsl_mask ,
9554			vld1q_u8(p + len - 16), vqtbl1q_u8(v, rshift));
9555
9556			return fold_vec(x0, x1, multipliers_1);
9557			}
9558			#define fold_partial_vec ADD_SUFFIX(fold_partial_vec)
9559
9560
9561			static ATTRIBUTES u32
9562			ADD_SUFFIX(crc32_arm)(u32 crc, const u8 *p, size_t len)
9563			{
9564			uint8x16_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11;
9565
9566			if (len < 3 * 192) {
9567			static const u64 _aligned_attribute(16) mults[3][2] = {
9568			{ CRC32_X543_MODG, CRC32_X479_MODG },
9569			{ CRC32_X287_MODG, CRC32_X223_MODG },
9570			{ CRC32_X159_MODG, CRC32_X95_MODG },
9571			};
9572			poly64x2_t multipliers_4, multipliers_2, multipliers_1;
9573
9574			if (len < 64)
9575			goto tail;
9576			multipliers_4 = load_multipliers(mults[0]);
9577			multipliers_2 = load_multipliers(mults[1]);
9578			multipliers_1 = load_multipliers(mults[2]);
9579
9580			v0 = veorq_u8(vld1q_u8(p + 0), u32_to_bytevec(crc));
9581			v1 = vld1q_u8(p + 16);
9582			v2 = vld1q_u8(p + 32);
9583			v3 = vld1q_u8(p + 48);
9584			p += 64;
9585			len -= 64;
9586			while (len >= 64) {
9587			v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_4);
9588			v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_4);
9589			v2 = fold_vec(v2, vld1q_u8(p + 32), multipliers_4);
9590			v3 = fold_vec(v3, vld1q_u8(p + 48), multipliers_4);
9591			p += 64;
9592			len -= 64;
9593			}
9594			v0 = fold_vec(v0, v2, multipliers_2);
9595			v1 = fold_vec(v1, v3, multipliers_2);
9596			if (len >= 32) {
9597			v0 = fold_vec(v0, vld1q_u8(p + 0), multipliers_2);
9598			v1 = fold_vec(v1, vld1q_u8(p + 16), multipliers_2);
9599			p += 32;
9600			len -= 32;
9601			}
9602			v0 = fold_vec(v0, v1, multipliers_1);
9603			} else {
9604			static const u64 _aligned_attribute(16) mults[4][2] = {
9605			{ CRC32_X1567_MODG, CRC32_X1503_MODG },
9606			{ CRC32_X799_MODG, CRC32_X735_MODG },
9607			{ CRC32_X415_MODG, CRC32_X351_MODG },
9608			{ CRC32_X159_MODG, CRC32_X95_MODG },
9609			};
9610			const poly64x2_t multipliers_12 = load_multipliers(mults[0]);
9611			const poly64x2_t multipliers_6 = load_multipliers(mults[1]);
9612			const poly64x2_t multipliers_3 = load_multipliers(mults[2]);
9613			const poly64x2_t multipliers_1 = load_multipliers(mults[3]);
9614			const size_t align = -(uintptr_t)p & 15;
9615			const uint8x16_t *vp;
9616
9617
9618			if (align) {
9619			if (align & 1)
9620			crc = __crc32b(crc, *p++);
9621			if (align & 2) {
9622			crc = __crc32h(crc, le16_bswap((u16 )p));
9623			p += 2;
9624			}
9625			if (align & 4) {
9626			crc = __crc32w(crc, le32_bswap((u32 )p));
9627			p += 4;
9628			}
9629			if (align & 8) {
9630			crc = __crc32d(crc, le64_bswap((u64 )p));
9631			p += 8;
9632			}
9633			len -= align;
9634			}
9635			vp = (const uint8x16_t *)p;
9636			v0 = veorq_u8(*vp++, u32_to_bytevec(crc));
9637			v1 = *vp++;
9638			v2 = *vp++;
9639			v3 = *vp++;
9640			v4 = *vp++;
9641			v5 = *vp++;
9642			v6 = *vp++;
9643			v7 = *vp++;
9644			v8 = *vp++;
9645			v9 = *vp++;
9646			v10 = *vp++;
9647			v11 = *vp++;
9648			len -= 192;
9649
9650			do {
9651			v0 = fold_vec(v0, *vp++, multipliers_12);
9652			v1 = fold_vec(v1, *vp++, multipliers_12);
9653			v2 = fold_vec(v2, *vp++, multipliers_12);
9654			v3 = fold_vec(v3, *vp++, multipliers_12);
9655			v4 = fold_vec(v4, *vp++, multipliers_12);
9656			v5 = fold_vec(v5, *vp++, multipliers_12);
9657			v6 = fold_vec(v6, *vp++, multipliers_12);
9658			v7 = fold_vec(v7, *vp++, multipliers_12);
9659			v8 = fold_vec(v8, *vp++, multipliers_12);
9660			v9 = fold_vec(v9, *vp++, multipliers_12);
9661			v10 = fold_vec(v10, *vp++, multipliers_12);
9662			v11 = fold_vec(v11, *vp++, multipliers_12);
9663			len -= 192;
9664			} while (len >= 192);
9665
9666
9667			v0 = fold_vec(v0, v6, multipliers_6);
9668			v1 = fold_vec(v1, v7, multipliers_6);
9669			v2 = fold_vec(v2, v8, multipliers_6);
9670			v3 = fold_vec(v3, v9, multipliers_6);
9671			v4 = fold_vec(v4, v10, multipliers_6);
9672			v5 = fold_vec(v5, v11, multipliers_6);
9673			if (len >= 96) {
9674			v0 = fold_vec(v0, *vp++, multipliers_6);
9675			v1 = fold_vec(v1, *vp++, multipliers_6);
9676			v2 = fold_vec(v2, *vp++, multipliers_6);
9677			v3 = fold_vec(v3, *vp++, multipliers_6);
9678			v4 = fold_vec(v4, *vp++, multipliers_6);
9679			v5 = fold_vec(v5, *vp++, multipliers_6);
9680			len -= 96;
9681			}
9682			v0 = fold_vec(v0, v3, multipliers_3);
9683			v1 = fold_vec(v1, v4, multipliers_3);
9684			v2 = fold_vec(v2, v5, multipliers_3);
9685			if (len >= 48) {
9686			v0 = fold_vec(v0, *vp++, multipliers_3);
9687			v1 = fold_vec(v1, *vp++, multipliers_3);
9688			v2 = fold_vec(v2, *vp++, multipliers_3);
9689			len -= 48;
9690			}
9691			v0 = fold_vec(v0, v1, multipliers_1);
9692			v0 = fold_vec(v0, v2, multipliers_1);
9693			p = (const u8 *)vp;
9694			}
9695
9696			crc = __crc32d(0, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 0));
9697			crc = __crc32d(crc, vgetq_lane_u64(vreinterpretq_u64_u8(v0), 1));
9698			tail:
9699
9700			if (len & 32) {
9701			crc = __crc32d(crc, get_unaligned_le64(p + 0));
9702			crc = __crc32d(crc, get_unaligned_le64(p + 8));
9703			crc = __crc32d(crc, get_unaligned_le64(p + 16));
9704			crc = __crc32d(crc, get_unaligned_le64(p + 24));
9705			p += 32;
9706			}
9707			if (len & 16) {
9708			crc = __crc32d(crc, get_unaligned_le64(p + 0));
9709			crc = __crc32d(crc, get_unaligned_le64(p + 8));
9710			p += 16;
9711			}
9712			if (len & 8) {
9713			crc = __crc32d(crc, get_unaligned_le64(p));
9714			p += 8;
9715			}
9716			if (len & 4) {
9717			crc = __crc32w(crc, get_unaligned_le32(p));
9718			p += 4;
9719			}
9720			if (len & 2) {
9721			crc = __crc32h(crc, get_unaligned_le16(p));
9722			p += 2;
9723			}
9724			if (len & 1)
9725			crc = __crc32b(crc, *p);
9726			return crc;
9727			}
9728
9729			#undef SUFFIX
9730			#undef ATTRIBUTES
9731			#undef ENABLE_EOR3
9732
9733			#endif
9734
9735			static inline crc32_func_t
9736			arch_select_crc32_func(void)
9737			{
9738			const u32 features MAYBE_UNUSED = get_arm_cpu_features();
9739
9740			#ifdef crc32_arm_pmullx12_crc_eor3
9741			if ((features & ARM_CPU_FEATURE_PREFER_PMULL) &&
9742			HAVE_PMULL(features) && HAVE_CRC32(features) && HAVE_SHA3(features))
9743			return crc32_arm_pmullx12_crc_eor3;
9744			#endif
9745			#ifdef crc32_arm_pmullx12_crc
9746			if ((features & ARM_CPU_FEATURE_PREFER_PMULL) &&
9747			HAVE_PMULL(features) && HAVE_CRC32(features))
9748			return crc32_arm_pmullx12_crc;
9749			#endif
9750			#ifdef crc32_arm_crc_pmullcombine
9751			if (HAVE_CRC32(features) && HAVE_PMULL(features))
9752			return crc32_arm_crc_pmullcombine;
9753			#endif
9754			#ifdef crc32_arm_crc
9755			if (HAVE_CRC32(features))
9756			return crc32_arm_crc;
9757			#endif
9758			#ifdef crc32_arm_pmullx4
9759			if (HAVE_PMULL(features))
9760			return crc32_arm_pmullx4;
9761			#endif
9762			return NULL;
9763			}
9764			#define arch_select_crc32_func arch_select_crc32_func
9765
9766			#endif
9767
9768			#elif defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
9769			/* # include "x86/crc32_impl.h" */
9770
9771
9772			#ifndef LIB_X86_CRC32_IMPL_H
9773			#define LIB_X86_CRC32_IMPL_H
9774
9775			/* #include "x86-cpu_features.h" */
9776
9777
9778			#ifndef LIB_X86_CPU_FEATURES_H
9779			#define LIB_X86_CPU_FEATURES_H
9780
9781			/* #include "lib_common.h" */
9782
9783
9784			#ifndef LIB_LIB_COMMON_H
9785			#define LIB_LIB_COMMON_H
9786
9787			#ifdef LIBDEFLATE_H
9788
9789			# error "lib_common.h must always be included before libdeflate.h"
9790			#endif
9791
9792			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
9793			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
9794			#elif defined(__GNUC__)
9795			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
9796			#else
9797			# define LIBDEFLATE_EXPORT_SYM
9798			#endif
9799
9800
9801			#if defined(__GNUC__) && defined(__i386__)
9802			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
9803			#else
9804			# define LIBDEFLATE_ALIGN_STACK
9805			#endif
9806
9807			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
9808
9809			/* #include "../common_defs.h" */
9810
9811
9812			#ifndef COMMON_DEFS_H
9813			#define COMMON_DEFS_H
9814
9815			/* #include "libdeflate.h" */
9816
9817
9818			#ifndef LIBDEFLATE_H
9819			#define LIBDEFLATE_H
9820
9821			#include
9822			#include
9823
9824			#ifdef __cplusplus
9825			extern "C" {
9826			#endif
9827
9828			#define LIBDEFLATE_VERSION_MAJOR 1
9829			#define LIBDEFLATE_VERSION_MINOR 25
9830			#define LIBDEFLATE_VERSION_STRING "1.25"
9831
9832
9833			#ifndef LIBDEFLATEAPI
9834			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
9835			# define LIBDEFLATEAPI __declspec(dllimport)
9836			# else
9837			# define LIBDEFLATEAPI
9838			# endif
9839			#endif
9840
9841
9842
9843
9844
9845			struct libdeflate_compressor;
9846			struct libdeflate_options;
9847
9848
9849			LIBDEFLATEAPI struct libdeflate_compressor *
9850			libdeflate_alloc_compressor(int compression_level);
9851
9852
9853			LIBDEFLATEAPI struct libdeflate_compressor *
9854			libdeflate_alloc_compressor_ex(int compression_level,
9855			const struct libdeflate_options *options);
9856
9857
9858			LIBDEFLATEAPI size_t
9859			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
9860			const void *in, size_t in_nbytes,
9861			void *out, size_t out_nbytes_avail);
9862
9863
9864			LIBDEFLATEAPI size_t
9865			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
9866			size_t in_nbytes);
9867
9868
9869			LIBDEFLATEAPI size_t
9870			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
9871			const void *in, size_t in_nbytes,
9872			void *out, size_t out_nbytes_avail);
9873
9874
9875			LIBDEFLATEAPI size_t
9876			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
9877			size_t in_nbytes);
9878
9879
9880			LIBDEFLATEAPI size_t
9881			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
9882			const void *in, size_t in_nbytes,
9883			void *out, size_t out_nbytes_avail);
9884
9885
9886			LIBDEFLATEAPI size_t
9887			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
9888			size_t in_nbytes);
9889
9890
9891			LIBDEFLATEAPI void
9892			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
9893
9894
9895
9896
9897
9898			struct libdeflate_decompressor;
9899			struct libdeflate_options;
9900
9901
9902			LIBDEFLATEAPI struct libdeflate_decompressor *
9903			libdeflate_alloc_decompressor(void);
9904
9905
9906			LIBDEFLATEAPI struct libdeflate_decompressor *
9907			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
9908
9909
9910			enum libdeflate_result {
9911
9912			LIBDEFLATE_SUCCESS = 0,
9913
9914
9915			LIBDEFLATE_BAD_DATA = 1,
9916
9917
9918			LIBDEFLATE_SHORT_OUTPUT = 2,
9919
9920
9921			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
9922			};
9923
9924
9925			LIBDEFLATEAPI enum libdeflate_result
9926			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
9927			const void *in, size_t in_nbytes,
9928			void *out, size_t out_nbytes_avail,
9929			size_t *actual_out_nbytes_ret);
9930
9931
9932			LIBDEFLATEAPI enum libdeflate_result
9933			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
9934			const void *in, size_t in_nbytes,
9935			void *out, size_t out_nbytes_avail,
9936			size_t *actual_in_nbytes_ret,
9937			size_t *actual_out_nbytes_ret);
9938
9939
9940			LIBDEFLATEAPI enum libdeflate_result
9941			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
9942			const void *in, size_t in_nbytes,
9943			void *out, size_t out_nbytes_avail,
9944			size_t *actual_out_nbytes_ret);
9945
9946
9947			LIBDEFLATEAPI enum libdeflate_result
9948			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
9949			const void *in, size_t in_nbytes,
9950			void *out, size_t out_nbytes_avail,
9951			size_t *actual_in_nbytes_ret,
9952			size_t *actual_out_nbytes_ret);
9953
9954
9955			LIBDEFLATEAPI enum libdeflate_result
9956			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
9957			const void *in, size_t in_nbytes,
9958			void *out, size_t out_nbytes_avail,
9959			size_t *actual_out_nbytes_ret);
9960
9961
9962			LIBDEFLATEAPI enum libdeflate_result
9963			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
9964			const void *in, size_t in_nbytes,
9965			void *out, size_t out_nbytes_avail,
9966			size_t *actual_in_nbytes_ret,
9967			size_t *actual_out_nbytes_ret);
9968
9969
9970			LIBDEFLATEAPI void
9971			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
9972
9973
9974
9975
9976
9977
9978			LIBDEFLATEAPI uint32_t
9979			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
9980
9981
9982
9983			LIBDEFLATEAPI uint32_t
9984			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
9985
9986
9987
9988
9989
9990
9991			LIBDEFLATEAPI void
9992			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
9993			void (free_func)(void ));
9994
9995
9996			struct libdeflate_options {
9997
9998
9999			size_t sizeof_options;
10000
10001
10002			void (malloc_func)(size_t);
10003			void (free_func)(void );
10004			};
10005
10006			#ifdef __cplusplus
10007			}
10008			#endif
10009
10010			#endif
10011
10012
10013			#include
10014			#include
10015			#include
10016			#ifdef _MSC_VER
10017			# include
10018			# include
10019
10020
10021			# pragma warning(disable : 4146)
10022
10023			# pragma warning(disable : 4018)
10024			# pragma warning(disable : 4244)
10025			# pragma warning(disable : 4267)
10026			# pragma warning(disable : 4310)
10027
10028			# pragma warning(disable : 4100)
10029			# pragma warning(disable : 4127)
10030			# pragma warning(disable : 4189)
10031			# pragma warning(disable : 4232)
10032			# pragma warning(disable : 4245)
10033			# pragma warning(disable : 4295)
10034			#endif
10035			#ifndef FREESTANDING
10036			# include
10037			#endif
10038
10039
10040
10041
10042
10043
10044			#undef ARCH_X86_64
10045			#undef ARCH_X86_32
10046			#undef ARCH_ARM64
10047			#undef ARCH_ARM32
10048			#undef ARCH_RISCV
10049			#ifdef _MSC_VER
10050
10051			# if defined(_M_X64) && !defined(_M_ARM64EC)
10052			# define ARCH_X86_64
10053			# elif defined(_M_IX86)
10054			# define ARCH_X86_32
10055			# elif defined(_M_ARM64)
10056			# define ARCH_ARM64
10057			# elif defined(_M_ARM)
10058			# define ARCH_ARM32
10059			# endif
10060			#else
10061			# if defined(__x86_64__)
10062			# define ARCH_X86_64
10063			# elif defined(__i386__)
10064			# define ARCH_X86_32
10065			# elif defined(__aarch64__)
10066			# define ARCH_ARM64
10067			# elif defined(__arm__)
10068			# define ARCH_ARM32
10069			# elif defined(__riscv)
10070			# define ARCH_RISCV
10071			# endif
10072			#endif
10073
10074
10075
10076
10077
10078
10079			typedef uint8_t u8;
10080			typedef uint16_t u16;
10081			typedef uint32_t u32;
10082			typedef uint64_t u64;
10083			typedef int8_t s8;
10084			typedef int16_t s16;
10085			typedef int32_t s32;
10086			typedef int64_t s64;
10087
10088
10089			#ifdef _MSC_VER
10090			# ifdef _WIN64
10091			typedef long long ssize_t;
10092			# else
10093			typedef long ssize_t;
10094			# endif
10095			#endif
10096
10097
10098			typedef size_t machine_word_t;
10099
10100
10101			#define WORDBYTES ((int)sizeof(machine_word_t))
10102
10103
10104			#define WORDBITS (8 * WORDBYTES)
10105
10106
10107
10108
10109
10110
10111			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
10112			# define GCC_PREREQ(major, minor) \
10113			(__GNUC__ > (major) \|\| \
10114			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
10115			# if !GCC_PREREQ(4, 9)
10116			# error "gcc versions older than 4.9 are no longer supported"
10117			# endif
10118			#else
10119			# define GCC_PREREQ(major, minor) 0
10120			#endif
10121			#ifdef __clang__
10122			# ifdef __apple_build_version__
10123			# define CLANG_PREREQ(major, minor, apple_version) \
10124			(__apple_build_version__ >= (apple_version))
10125			# else
10126			# define CLANG_PREREQ(major, minor, apple_version) \
10127			(__clang_major__ > (major) \|\| \
10128			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
10129			# endif
10130			# if !CLANG_PREREQ(3, 9, 8000000)
10131			# error "clang versions older than 3.9 are no longer supported"
10132			# endif
10133			#else
10134			# define CLANG_PREREQ(major, minor, apple_version) 0
10135			#endif
10136			#ifdef _MSC_VER
10137			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
10138			# if !MSVC_PREREQ(1900)
10139			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
10140			# endif
10141			#else
10142			# define MSVC_PREREQ(version) 0
10143			#endif
10144
10145
10146			#ifndef __has_attribute
10147			# define __has_attribute(attribute) 0
10148			#endif
10149
10150
10151			#ifndef __has_builtin
10152			# define __has_builtin(builtin) 0
10153			#endif
10154
10155
10156			#ifdef _MSC_VER
10157			# define inline __inline
10158			#endif
10159
10160
10161			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
10162			# define forceinline inline __attribute__((always_inline))
10163			#elif defined(_MSC_VER)
10164			# define forceinline __forceinline
10165			#else
10166			# define forceinline inline
10167			#endif
10168
10169
10170			#if defined(__GNUC__) \|\| __has_attribute(unused)
10171			# define MAYBE_UNUSED __attribute__((unused))
10172			#else
10173			# define MAYBE_UNUSED
10174			#endif
10175
10176
10177			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
10178			# define NORETURN __attribute__((noreturn))
10179			#else
10180			# define NORETURN
10181			#endif
10182
10183
10184			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
10185			# if defined(__GNUC__) \|\| defined(__clang__)
10186			# define restrict __restrict__
10187			# else
10188			# define restrict
10189			# endif
10190			#endif
10191
10192
10193			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
10194			# define likely(expr) __builtin_expect(!!(expr), 1)
10195			#else
10196			# define likely(expr) (expr)
10197			#endif
10198
10199
10200			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
10201			# define unlikely(expr) __builtin_expect(!!(expr), 0)
10202			#else
10203			# define unlikely(expr) (expr)
10204			#endif
10205
10206
10207			#undef prefetchr
10208			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
10209			# define prefetchr(addr) __builtin_prefetch((addr), 0)
10210			#elif defined(_MSC_VER)
10211			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
10212			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
10213			# elif defined(ARCH_ARM64)
10214			# define prefetchr(addr) __prefetch2((addr), 0x00 )
10215			# elif defined(ARCH_ARM32)
10216			# define prefetchr(addr) __prefetch(addr)
10217			# endif
10218			#endif
10219			#ifndef prefetchr
10220			# define prefetchr(addr)
10221			#endif
10222
10223
10224			#undef prefetchw
10225			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
10226			# define prefetchw(addr) __builtin_prefetch((addr), 1)
10227			#elif defined(_MSC_VER)
10228			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
10229			# define prefetchw(addr) _m_prefetchw(addr)
10230			# elif defined(ARCH_ARM64)
10231			# define prefetchw(addr) __prefetch2((addr), 0x10 )
10232			# elif defined(ARCH_ARM32)
10233			# define prefetchw(addr) __prefetchw(addr)
10234			# endif
10235			#endif
10236			#ifndef prefetchw
10237			# define prefetchw(addr)
10238			#endif
10239
10240
10241			#undef _aligned_attribute
10242			#if defined(__GNUC__) \|\| __has_attribute(aligned)
10243			# define _aligned_attribute(n) __attribute__((aligned(n)))
10244			#elif defined(_MSC_VER)
10245			# define _aligned_attribute(n) __declspec(align(n))
10246			#endif
10247
10248
10249			#if defined(__GNUC__) \|\| __has_attribute(target)
10250			# define _target_attribute(attrs) __attribute__((target(attrs)))
10251			#else
10252			# define _target_attribute(attrs)
10253			#endif
10254
10255
10256
10257
10258
10259			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
10260			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
10261			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
10262			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
10263			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
10264			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
10265			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
10266
10267
10268
10269
10270
10271
10272			#if defined(__BYTE_ORDER__)
10273			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
10274			#elif defined(_MSC_VER)
10275			# define CPU_IS_LITTLE_ENDIAN() true
10276			#else
10277			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
10278			{
10279			union {
10280			u32 w;
10281			u8 b;
10282			} u;
10283
10284			u.w = 1;
10285			return u.b;
10286			}
10287			#endif
10288
10289
10290			static forceinline u16 bswap16(u16 v)
10291			{
10292			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
10293			return __builtin_bswap16(v);
10294			#elif defined(_MSC_VER)
10295			return _byteswap_ushort(v);
10296			#else
10297			return (v << 8) \| (v >> 8);
10298			#endif
10299			}
10300
10301
10302			static forceinline u32 bswap32(u32 v)
10303			{
10304			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
10305			return __builtin_bswap32(v);
10306			#elif defined(_MSC_VER)
10307			return _byteswap_ulong(v);
10308			#else
10309			return ((v & 0x000000FF) << 24) \|
10310			((v & 0x0000FF00) << 8) \|
10311			((v & 0x00FF0000) >> 8) \|
10312			((v & 0xFF000000) >> 24);
10313			#endif
10314			}
10315
10316
10317			static forceinline u64 bswap64(u64 v)
10318			{
10319			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
10320			return __builtin_bswap64(v);
10321			#elif defined(_MSC_VER)
10322			return _byteswap_uint64(v);
10323			#else
10324			return ((v & 0x00000000000000FF) << 56) \|
10325			((v & 0x000000000000FF00) << 40) \|
10326			((v & 0x0000000000FF0000) << 24) \|
10327			((v & 0x00000000FF000000) << 8) \|
10328			((v & 0x000000FF00000000) >> 8) \|
10329			((v & 0x0000FF0000000000) >> 24) \|
10330			((v & 0x00FF000000000000) >> 40) \|
10331			((v & 0xFF00000000000000) >> 56);
10332			#endif
10333			}
10334
10335			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
10336			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
10337			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
10338			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
10339			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
10340			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
10341
10342
10343
10344
10345
10346
10347			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
10348			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
10349			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
10350			defined(__riscv_misaligned_fast) \|\| \
10351			defined(__wasm__))
10352			# define UNALIGNED_ACCESS_IS_FAST 1
10353			#elif defined(_MSC_VER)
10354			# define UNALIGNED_ACCESS_IS_FAST 1
10355			#else
10356			# define UNALIGNED_ACCESS_IS_FAST 0
10357			#endif
10358
10359
10360
10361			#ifdef FREESTANDING
10362			# define MEMCOPY __builtin_memcpy
10363			#else
10364			# define MEMCOPY memcpy
10365			#endif
10366
10367
10368
10369			#define DEFINE_UNALIGNED_TYPE(type) \
10370			static forceinline type \
10371			load_##type##_unaligned(const void *p) \
10372			{ \
10373			type v; \
10374			\
10375			MEMCOPY(&v, p, sizeof(v)); \
10376			return v; \
10377			} \
10378			\
10379			static forceinline void \
10380			store_##type##_unaligned(type v, void *p) \
10381			{ \
10382			MEMCOPY(p, &v, sizeof(v)); \
10383			}
10384
10385			DEFINE_UNALIGNED_TYPE(u16)
10386			DEFINE_UNALIGNED_TYPE(u32)
10387			DEFINE_UNALIGNED_TYPE(u64)
10388			DEFINE_UNALIGNED_TYPE(machine_word_t)
10389
10390			#undef MEMCOPY
10391
10392			#define load_word_unaligned load_machine_word_t_unaligned
10393			#define store_word_unaligned store_machine_word_t_unaligned
10394
10395
10396
10397			static forceinline u16
10398			get_unaligned_le16(const u8 *p)
10399			{
10400			if (UNALIGNED_ACCESS_IS_FAST)
10401			return le16_bswap(load_u16_unaligned(p));
10402			else
10403			return ((u16)p[1] << 8) \| p[0];
10404			}
10405
10406			static forceinline u16
10407			get_unaligned_be16(const u8 *p)
10408			{
10409			if (UNALIGNED_ACCESS_IS_FAST)
10410			return be16_bswap(load_u16_unaligned(p));
10411			else
10412			return ((u16)p[0] << 8) \| p[1];
10413			}
10414
10415			static forceinline u32
10416			get_unaligned_le32(const u8 *p)
10417			{
10418			if (UNALIGNED_ACCESS_IS_FAST)
10419			return le32_bswap(load_u32_unaligned(p));
10420			else
10421			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
10422			((u32)p[1] << 8) \| p[0];
10423			}
10424
10425			static forceinline u32
10426			get_unaligned_be32(const u8 *p)
10427			{
10428			if (UNALIGNED_ACCESS_IS_FAST)
10429			return be32_bswap(load_u32_unaligned(p));
10430			else
10431			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
10432			((u32)p[2] << 8) \| p[3];
10433			}
10434
10435			static forceinline u64
10436			get_unaligned_le64(const u8 *p)
10437			{
10438			if (UNALIGNED_ACCESS_IS_FAST)
10439			return le64_bswap(load_u64_unaligned(p));
10440			else
10441			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
10442			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
10443			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
10444			((u64)p[1] << 8) \| p[0];
10445			}
10446
10447			static forceinline machine_word_t
10448			get_unaligned_leword(const u8 *p)
10449			{
10450			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
10451			if (WORDBITS == 32)
10452			return get_unaligned_le32(p);
10453			else
10454			return get_unaligned_le64(p);
10455			}
10456
10457
10458
10459			static forceinline void
10460			put_unaligned_le16(u16 v, u8 *p)
10461			{
10462			if (UNALIGNED_ACCESS_IS_FAST) {
10463			store_u16_unaligned(le16_bswap(v), p);
10464			} else {
10465			p[0] = (u8)(v >> 0);
10466			p[1] = (u8)(v >> 8);
10467			}
10468			}
10469
10470			static forceinline void
10471			put_unaligned_be16(u16 v, u8 *p)
10472			{
10473			if (UNALIGNED_ACCESS_IS_FAST) {
10474			store_u16_unaligned(be16_bswap(v), p);
10475			} else {
10476			p[0] = (u8)(v >> 8);
10477			p[1] = (u8)(v >> 0);
10478			}
10479			}
10480
10481			static forceinline void
10482			put_unaligned_le32(u32 v, u8 *p)
10483			{
10484			if (UNALIGNED_ACCESS_IS_FAST) {
10485			store_u32_unaligned(le32_bswap(v), p);
10486			} else {
10487			p[0] = (u8)(v >> 0);
10488			p[1] = (u8)(v >> 8);
10489			p[2] = (u8)(v >> 16);
10490			p[3] = (u8)(v >> 24);
10491			}
10492			}
10493
10494			static forceinline void
10495			put_unaligned_be32(u32 v, u8 *p)
10496			{
10497			if (UNALIGNED_ACCESS_IS_FAST) {
10498			store_u32_unaligned(be32_bswap(v), p);
10499			} else {
10500			p[0] = (u8)(v >> 24);
10501			p[1] = (u8)(v >> 16);
10502			p[2] = (u8)(v >> 8);
10503			p[3] = (u8)(v >> 0);
10504			}
10505			}
10506
10507			static forceinline void
10508			put_unaligned_le64(u64 v, u8 *p)
10509			{
10510			if (UNALIGNED_ACCESS_IS_FAST) {
10511			store_u64_unaligned(le64_bswap(v), p);
10512			} else {
10513			p[0] = (u8)(v >> 0);
10514			p[1] = (u8)(v >> 8);
10515			p[2] = (u8)(v >> 16);
10516			p[3] = (u8)(v >> 24);
10517			p[4] = (u8)(v >> 32);
10518			p[5] = (u8)(v >> 40);
10519			p[6] = (u8)(v >> 48);
10520			p[7] = (u8)(v >> 56);
10521			}
10522			}
10523
10524			static forceinline void
10525			put_unaligned_leword(machine_word_t v, u8 *p)
10526			{
10527			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
10528			if (WORDBITS == 32)
10529			put_unaligned_le32(v, p);
10530			else
10531			put_unaligned_le64(v, p);
10532			}
10533
10534
10535
10536
10537
10538
10539
10540			static forceinline unsigned
10541			bsr32(u32 v)
10542			{
10543			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
10544			return 31 - __builtin_clz(v);
10545			#elif defined(_MSC_VER)
10546			unsigned long i;
10547
10548			_BitScanReverse(&i, v);
10549			return i;
10550			#else
10551			unsigned i = 0;
10552
10553			while ((v >>= 1) != 0)
10554			i++;
10555			return i;
10556			#endif
10557			}
10558
10559			static forceinline unsigned
10560			bsr64(u64 v)
10561			{
10562			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
10563			return 63 - __builtin_clzll(v);
10564			#elif defined(_MSC_VER) && defined(_WIN64)
10565			unsigned long i;
10566
10567			_BitScanReverse64(&i, v);
10568			return i;
10569			#else
10570			unsigned i = 0;
10571
10572			while ((v >>= 1) != 0)
10573			i++;
10574			return i;
10575			#endif
10576			}
10577
10578			static forceinline unsigned
10579			bsrw(machine_word_t v)
10580			{
10581			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
10582			if (WORDBITS == 32)
10583			return bsr32(v);
10584			else
10585			return bsr64(v);
10586			}
10587
10588
10589
10590			static forceinline unsigned
10591			bsf32(u32 v)
10592			{
10593			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
10594			return __builtin_ctz(v);
10595			#elif defined(_MSC_VER)
10596			unsigned long i;
10597
10598			_BitScanForward(&i, v);
10599			return i;
10600			#else
10601			unsigned i = 0;
10602
10603			for (; (v & 1) == 0; v >>= 1)
10604			i++;
10605			return i;
10606			#endif
10607			}
10608
10609			static forceinline unsigned
10610			bsf64(u64 v)
10611			{
10612			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
10613			return __builtin_ctzll(v);
10614			#elif defined(_MSC_VER) && defined(_WIN64)
10615			unsigned long i;
10616
10617			_BitScanForward64(&i, v);
10618			return i;
10619			#else
10620			unsigned i = 0;
10621
10622			for (; (v & 1) == 0; v >>= 1)
10623			i++;
10624			return i;
10625			#endif
10626			}
10627
10628			static forceinline unsigned
10629			bsfw(machine_word_t v)
10630			{
10631			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
10632			if (WORDBITS == 32)
10633			return bsf32(v);
10634			else
10635			return bsf64(v);
10636			}
10637
10638
10639			#undef rbit32
10640			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
10641			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
10642			static forceinline u32
10643			rbit32(u32 v)
10644			{
10645			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
10646			return v;
10647			}
10648			#define rbit32 rbit32
10649			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
10650			static forceinline u32
10651			rbit32(u32 v)
10652			{
10653			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
10654			return v;
10655			}
10656			#define rbit32 rbit32
10657			#endif
10658
10659			#endif
10660
10661
10662			typedef void (malloc_func_t)(size_t);
10663			typedef void (free_func_t)(void );
10664
10665			extern malloc_func_t libdeflate_default_malloc_func;
10666			extern free_func_t libdeflate_default_free_func;
10667
10668			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
10669			size_t alignment, size_t size);
10670			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
10671
10672			#ifdef FREESTANDING
10673
10674			void memset(void s, int c, size_t n);
10675			#define memset(s, c, n) __builtin_memset((s), (c), (n))
10676
10677			void memcpy(void dest, const void *src, size_t n);
10678			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
10679
10680			void memmove(void dest, const void *src, size_t n);
10681			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
10682
10683			int memcmp(const void s1, const void s2, size_t n);
10684			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
10685
10686			#undef LIBDEFLATE_ENABLE_ASSERTIONS
10687			#else
10688			# include
10689
10690			# ifdef __clang_analyzer__
10691			# define LIBDEFLATE_ENABLE_ASSERTIONS
10692			# endif
10693			#endif
10694
10695
10696			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
10697			NORETURN void
10698			libdeflate_assertion_failed(const char expr, const char file, int line);
10699			#define ASSERT(expr) { if (unlikely(!(expr))) \
10700			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
10701			#else
10702			#define ASSERT(expr) (void)(expr)
10703			#endif
10704
10705			#define CONCAT_IMPL(a, b) a##b
10706			#define CONCAT(a, b) CONCAT_IMPL(a, b)
10707			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
10708
10709			#endif
10710
10711
10712			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
10713
10714			#define X86_CPU_FEATURE_SSE2 (1 << 0)
10715			#define X86_CPU_FEATURE_PCLMULQDQ (1 << 1)
10716			#define X86_CPU_FEATURE_AVX (1 << 2)
10717			#define X86_CPU_FEATURE_AVX2 (1 << 3)
10718			#define X86_CPU_FEATURE_BMI2 (1 << 4)
10719
10720			#define X86_CPU_FEATURE_ZMM (1 << 5)
10721			#define X86_CPU_FEATURE_AVX512BW (1 << 6)
10722			#define X86_CPU_FEATURE_AVX512VL (1 << 7)
10723			#define X86_CPU_FEATURE_VPCLMULQDQ (1 << 8)
10724			#define X86_CPU_FEATURE_AVX512VNNI (1 << 9)
10725			#define X86_CPU_FEATURE_AVXVNNI (1 << 10)
10726
10727			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
10728
10729			# define X86_CPU_FEATURES_KNOWN (1U << 31)
10730			extern volatile u32 libdeflate_x86_cpu_features;
10731
10732			void libdeflate_init_x86_cpu_features(void);
10733
10734			static inline u32 get_x86_cpu_features(void)
10735			{
10736			if (libdeflate_x86_cpu_features == 0)
10737			libdeflate_init_x86_cpu_features();
10738			return libdeflate_x86_cpu_features;
10739			}
10740
10741			# include
10742			# if defined(_MSC_VER) && defined(__clang__)
10743			# include
10744			# include
10745			# include
10746			# include
10747			# include
10748			# include
10749			# include
10750			# include
10751			# if __has_include()
10752			# include
10753			# endif
10754			# if __has_include()
10755			# include
10756			# endif
10757			# if __has_include()
10758			# include
10759			# endif
10760			# if __has_include()
10761			# include
10762			# endif
10763			# if __has_include()
10764			# include
10765			# endif
10766			# endif
10767			#else
10768			static inline u32 get_x86_cpu_features(void) { return 0; }
10769			#endif
10770
10771			#if defined(__SSE2__) \|\| \
10772			(defined(_MSC_VER) && \
10773			(defined(ARCH_X86_64) \|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
10774			# define HAVE_SSE2(features) 1
10775			# define HAVE_SSE2_NATIVE 1
10776			#else
10777			# define HAVE_SSE2(features) ((features) & X86_CPU_FEATURE_SSE2)
10778			# define HAVE_SSE2_NATIVE 0
10779			#endif
10780
10781			#if (defined(__PCLMUL__) && defined(__SSE4_1__)) \|\| \
10782			(defined(_MSC_VER) && defined(__AVX2__))
10783			# define HAVE_PCLMULQDQ(features) 1
10784			#else
10785			# define HAVE_PCLMULQDQ(features) ((features) & X86_CPU_FEATURE_PCLMULQDQ)
10786			#endif
10787
10788			#ifdef __AVX__
10789			# define HAVE_AVX(features) 1
10790			#else
10791			# define HAVE_AVX(features) ((features) & X86_CPU_FEATURE_AVX)
10792			#endif
10793
10794			#ifdef __AVX2__
10795			# define HAVE_AVX2(features) 1
10796			#else
10797			# define HAVE_AVX2(features) ((features) & X86_CPU_FEATURE_AVX2)
10798			#endif
10799
10800			#if defined(__BMI2__) \|\| (defined(_MSC_VER) && defined(__AVX2__))
10801			# define HAVE_BMI2(features) 1
10802			# define HAVE_BMI2_NATIVE 1
10803			#else
10804			# define HAVE_BMI2(features) ((features) & X86_CPU_FEATURE_BMI2)
10805			# define HAVE_BMI2_NATIVE 0
10806			#endif
10807
10808			#ifdef __AVX512BW__
10809			# define HAVE_AVX512BW(features) 1
10810			#else
10811			# define HAVE_AVX512BW(features) ((features) & X86_CPU_FEATURE_AVX512BW)
10812			#endif
10813
10814			#ifdef __AVX512VL__
10815			# define HAVE_AVX512VL(features) 1
10816			#else
10817			# define HAVE_AVX512VL(features) ((features) & X86_CPU_FEATURE_AVX512VL)
10818			#endif
10819
10820			#ifdef __VPCLMULQDQ__
10821			# define HAVE_VPCLMULQDQ(features) 1
10822			#else
10823			# define HAVE_VPCLMULQDQ(features) ((features) & X86_CPU_FEATURE_VPCLMULQDQ)
10824			#endif
10825
10826			#ifdef __AVX512VNNI__
10827			# define HAVE_AVX512VNNI(features) 1
10828			#else
10829			# define HAVE_AVX512VNNI(features) ((features) & X86_CPU_FEATURE_AVX512VNNI)
10830			#endif
10831
10832			#ifdef __AVXVNNI__
10833			# define HAVE_AVXVNNI(features) 1
10834			#else
10835			# define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI)
10836			#endif
10837
10838			#endif
10839
10840			#endif
10841
10842
10843
10844			static const u8 MAYBE_UNUSED shift_tab[48] = {
10845			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
10846			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
10847			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
10848			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
10849			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
10850			0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
10851			};
10852
10853			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
10854
10855			# define crc32_x86_pclmulqdq crc32_x86_pclmulqdq
10856			# define SUFFIX _pclmulqdq
10857			# define ATTRIBUTES _target_attribute("pclmul,sse4.1")
10858			# define VL 16
10859			# define USE_AVX512 0
10860			/* #include "x86-crc32_pclmul_template.h" */
10861
10862
10863
10864
10865			#if VL == 16
10866			# define vec_t __m128i
10867			# define fold_vec fold_vec128
10868			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
10869			# define VXOR(a, b) _mm_xor_si128((a), (b))
10870			# define M128I_TO_VEC(a) a
10871			# define MULTS_8V _mm_set_epi64x(CRC32_X991_MODG, CRC32_X1055_MODG)
10872			# define MULTS_4V _mm_set_epi64x(CRC32_X479_MODG, CRC32_X543_MODG)
10873			# define MULTS_2V _mm_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG)
10874			# define MULTS_1V _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG)
10875			#elif VL == 32
10876			# define vec_t __m256i
10877			# define fold_vec fold_vec256
10878			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
10879			# define VXOR(a, b) _mm256_xor_si256((a), (b))
10880			# define M128I_TO_VEC(a) _mm256_zextsi128_si256(a)
10881			# define MULTS(a, b) _mm256_set_epi64x(a, b, a, b)
10882			# define MULTS_8V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
10883			# define MULTS_4V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
10884			# define MULTS_2V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
10885			# define MULTS_1V MULTS(CRC32_X223_MODG, CRC32_X287_MODG)
10886			#elif VL == 64
10887			# define vec_t __m512i
10888			# define fold_vec fold_vec512
10889			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
10890			# define VXOR(a, b) _mm512_xor_si512((a), (b))
10891			# define M128I_TO_VEC(a) _mm512_zextsi128_si512(a)
10892			# define MULTS(a, b) _mm512_set_epi64(a, b, a, b, a, b, a, b)
10893			# define MULTS_8V MULTS(CRC32_X4063_MODG, CRC32_X4127_MODG)
10894			# define MULTS_4V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
10895			# define MULTS_2V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
10896			# define MULTS_1V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
10897			#else
10898			# error "unsupported vector length"
10899			#endif
10900
10901			#undef fold_vec128
10902			static forceinline ATTRIBUTES __m128i
10903			ADD_SUFFIX(fold_vec128)(__m128i src, __m128i dst, __m128i mults)
10904			{
10905	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x00));
10906	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x11));
10907	0		return dst;
10908			}
10909			#define fold_vec128 ADD_SUFFIX(fold_vec128)
10910
10911			#if VL >= 32
10912			#undef fold_vec256
10913			static forceinline ATTRIBUTES __m256i
10914			ADD_SUFFIX(fold_vec256)(__m256i src, __m256i dst, __m256i mults)
10915			{
10916			#if USE_AVX512
10917
10918			return _mm256_ternarylogic_epi32(
10919			_mm256_clmulepi64_epi128(src, mults, 0x00),
10920			_mm256_clmulepi64_epi128(src, mults, 0x11),
10921			dst,
10922			0x96);
10923			#else
10924			return _mm256_xor_si256(
10925			_mm256_xor_si256(dst,
10926			_mm256_clmulepi64_epi128(src, mults, 0x00)),
10927			_mm256_clmulepi64_epi128(src, mults, 0x11));
10928			#endif
10929			}
10930			#define fold_vec256 ADD_SUFFIX(fold_vec256)
10931			#endif
10932
10933			#if VL >= 64
10934			#undef fold_vec512
10935			static forceinline ATTRIBUTES __m512i
10936			ADD_SUFFIX(fold_vec512)(__m512i src, __m512i dst, __m512i mults)
10937			{
10938
10939			return _mm512_ternarylogic_epi32(
10940			_mm512_clmulepi64_epi128(src, mults, 0x00),
10941			_mm512_clmulepi64_epi128(src, mults, 0x11),
10942			dst,
10943			0x96);
10944			}
10945			#define fold_vec512 ADD_SUFFIX(fold_vec512)
10946			#endif
10947
10948
10949			#undef fold_lessthan16bytes
10950			static forceinline ATTRIBUTES __m128i
10951			ADD_SUFFIX(fold_lessthan16bytes)(__m128i x, const u8 *p, size_t len,
10952			__m128i mults_128b)
10953			{
10954	0		__m128i lshift = _mm_loadu_si128((const void *)&shift_tab[len]);
10955	0		__m128i rshift = _mm_loadu_si128((const void *)&shift_tab[len + 16]);
10956			__m128i x0, x1;
10957
10958
10959	0		x0 = _mm_shuffle_epi8(x, lshift);
10960
10961
10962	0		x1 = _mm_blendv_epi8(_mm_shuffle_epi8(x, rshift),
10963	0		_mm_loadu_si128((const void *)(p + len - 16)),
10964
10965			rshift);
10966
10967	0		return fold_vec128(x0, x1, mults_128b);
10968			}
10969			#define fold_lessthan16bytes ADD_SUFFIX(fold_lessthan16bytes)
10970
10971			static ATTRIBUTES u32
10972	0		ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
10973			{
10974
10975	0		const vec_t mults_8v = MULTS_8V;
10976	0		const vec_t mults_4v = MULTS_4V;
10977	0		const vec_t mults_2v = MULTS_2V;
10978	0		const vec_t mults_1v = MULTS_1V;
10979	0		const __m128i mults_128b = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
10980			const __m128i barrett_reduction_constants =
10981	0		_mm_set_epi64x(CRC32_BARRETT_CONSTANT_2, CRC32_BARRETT_CONSTANT_1);
10982			vec_t v0, v1, v2, v3, v4, v5, v6, v7;
10983	0		__m128i x0 = _mm_cvtsi32_si128(crc);
10984			__m128i x1;
10985
10986	0	0	if (len < 8*VL) {
10987	0	0	if (len < VL) {
10988			STATIC_ASSERT(VL == 16 \|\| VL == 32 \|\| VL == 64);
10989	0	0	if (len < 16) {
10990			#if USE_AVX512
10991			if (len < 4)
10992			return crc32_slice1(crc, p, len);
10993
10994			x0 = _mm_xor_si128(
10995			x0, _mm_maskz_loadu_epi8((1 << len) - 1, p));
10996			x0 = _mm_shuffle_epi8(
10997			x0, _mm_loadu_si128((const void *)&shift_tab[len]));
10998			goto reduce_x0;
10999			#else
11000	0		return crc32_slice1(crc, p, len);
11001			#endif
11002			}
11003
11004	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
11005	0	0	if (len >= 32) {
11006	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 16)),
11007			mults_128b);
11008	0	0	if (len >= 48)
11009	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 32)),
11010			mults_128b);
11011			}
11012	0		p += len & ~15;
11013	0		goto less_than_16_remaining;
11014			}
11015	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
11016	0	0	if (len < 2*VL) {
11017	0		p += VL;
11018	0		goto less_than_vl_remaining;
11019			}
11020	0		v1 = VLOADU(p + 1*VL);
11021	0	0	if (len < 4*VL) {
11022	0		p += 2*VL;
11023	0		goto less_than_2vl_remaining;
11024			}
11025	0		v2 = VLOADU(p + 2*VL);
11026	0		v3 = VLOADU(p + 3*VL);
11027	0		p += 4*VL;
11028			} else {
11029
11030	0	0	if (len > 65536 && ((uintptr_t)p & (VL-1))) {
		0
11031	0		size_t align = -(uintptr_t)p & (VL-1);
11032
11033	0		len -= align;
11034	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
11035	0		p += 16;
11036	0	0	if (align & 15) {
11037	0		x0 = fold_lessthan16bytes(x0, p, align & 15,
11038			mults_128b);
11039	0		p += align & 15;
11040	0		align &= ~15;
11041			}
11042	0	0	while (align) {
11043	0		x0 = fold_vec128(x0, (const __m128i )p,
11044			mults_128b);
11045	0		p += 16;
11046	0		align -= 16;
11047			}
11048	0		v0 = M128I_TO_VEC(x0);
11049			# if VL == 32
11050			v0 = _mm256_inserti128_si256(v0, (const __m128i )p, 1);
11051			# elif VL == 64
11052			v0 = _mm512_inserti32x4(v0, (const __m128i )p, 1);
11053			v0 = _mm512_inserti64x4(v0, (const __m256i )(p + 16), 1);
11054			# endif
11055	0		p -= 16;
11056			} else {
11057	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
11058			}
11059	0		v1 = VLOADU(p + 1*VL);
11060	0		v2 = VLOADU(p + 2*VL);
11061	0		v3 = VLOADU(p + 3*VL);
11062	0		v4 = VLOADU(p + 4*VL);
11063	0		v5 = VLOADU(p + 5*VL);
11064	0		v6 = VLOADU(p + 6*VL);
11065	0		v7 = VLOADU(p + 7*VL);
11066	0		p += 8*VL;
11067
11068
11069	0	0	while (len >= 16*VL) {
11070	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_8v);
11071	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_8v);
11072	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_8v);
11073	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_8v);
11074	0		v4 = fold_vec(v4, VLOADU(p + 4*VL), mults_8v);
11075	0		v5 = fold_vec(v5, VLOADU(p + 5*VL), mults_8v);
11076	0		v6 = fold_vec(v6, VLOADU(p + 6*VL), mults_8v);
11077	0		v7 = fold_vec(v7, VLOADU(p + 7*VL), mults_8v);
11078	0		p += 8*VL;
11079	0		len -= 8*VL;
11080			}
11081
11082
11083	0		v0 = fold_vec(v0, v4, mults_4v);
11084	0		v1 = fold_vec(v1, v5, mults_4v);
11085	0		v2 = fold_vec(v2, v6, mults_4v);
11086	0		v3 = fold_vec(v3, v7, mults_4v);
11087	0	0	if (len & (4*VL)) {
11088	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_4v);
11089	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_4v);
11090	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_4v);
11091	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_4v);
11092	0		p += 4*VL;
11093			}
11094			}
11095
11096	0		v0 = fold_vec(v0, v2, mults_2v);
11097	0		v1 = fold_vec(v1, v3, mults_2v);
11098	0	0	if (len & (2*VL)) {
11099	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_2v);
11100	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_2v);
11101	0		p += 2*VL;
11102			}
11103	0		less_than_2vl_remaining:
11104
11105	0		v0 = fold_vec(v0, v1, mults_1v);
11106	0	0	if (len & VL) {
11107	0		v0 = fold_vec(v0, VLOADU(p), mults_1v);
11108	0		p += VL;
11109			}
11110	0		less_than_vl_remaining:
11111
11112			#if VL == 16
11113	0		x0 = v0;
11114			#else
11115			{
11116			#if VL == 32
11117			__m256i y0 = v0;
11118			#else
11119			const __m256i mults_256b =
11120			_mm256_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG,
11121			CRC32_X223_MODG, CRC32_X287_MODG);
11122			__m256i y0 = fold_vec256(_mm512_extracti64x4_epi64(v0, 0),
11123			_mm512_extracti64x4_epi64(v0, 1),
11124			mults_256b);
11125			if (len & 32) {
11126			y0 = fold_vec256(y0, _mm256_loadu_si256((const void *)p),
11127			mults_256b);
11128			p += 32;
11129			}
11130			#endif
11131			x0 = fold_vec128(_mm256_extracti128_si256(y0, 0),
11132			_mm256_extracti128_si256(y0, 1), mults_128b);
11133			}
11134			if (len & 16) {
11135			x0 = fold_vec128(x0, _mm_loadu_si128((const void *)p),
11136			mults_128b);
11137			p += 16;
11138			}
11139			#endif
11140	0		less_than_16_remaining:
11141	0		len &= 15;
11142
11143
11144	0	0	if (len)
11145	0		x0 = fold_lessthan16bytes(x0, p, len, mults_128b);
11146			#if USE_AVX512
11147			reduce_x0:
11148			#endif
11149
11150	0		x0 = _mm_xor_si128(_mm_clmulepi64_si128(x0, mults_128b, 0x10),
11151	0		_mm_bsrli_si128(x0, 8));
11152	0		x1 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x00);
11153	0		x1 = _mm_clmulepi64_si128(x1, barrett_reduction_constants, 0x10);
11154	0		x0 = _mm_xor_si128(x0, x1);
11155	0		return _mm_extract_epi32(x0, 2);
11156			}
11157
11158			#undef vec_t
11159			#undef fold_vec
11160			#undef VLOADU
11161			#undef VXOR
11162			#undef M128I_TO_VEC
11163			#undef MULTS
11164			#undef MULTS_8V
11165			#undef MULTS_4V
11166			#undef MULTS_2V
11167			#undef MULTS_1V
11168
11169			#undef SUFFIX
11170			#undef ATTRIBUTES
11171			#undef VL
11172			#undef USE_AVX512
11173
11174
11175
11176			# define crc32_x86_pclmulqdq_avx crc32_x86_pclmulqdq_avx
11177			# define SUFFIX _pclmulqdq_avx
11178			# define ATTRIBUTES _target_attribute("pclmul,avx")
11179			# define VL 16
11180			# define USE_AVX512 0
11181			/* #include "x86-crc32_pclmul_template.h" */
11182
11183
11184
11185
11186			#if VL == 16
11187			# define vec_t __m128i
11188			# define fold_vec fold_vec128
11189			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
11190			# define VXOR(a, b) _mm_xor_si128((a), (b))
11191			# define M128I_TO_VEC(a) a
11192			# define MULTS_8V _mm_set_epi64x(CRC32_X991_MODG, CRC32_X1055_MODG)
11193			# define MULTS_4V _mm_set_epi64x(CRC32_X479_MODG, CRC32_X543_MODG)
11194			# define MULTS_2V _mm_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG)
11195			# define MULTS_1V _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG)
11196			#elif VL == 32
11197			# define vec_t __m256i
11198			# define fold_vec fold_vec256
11199			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
11200			# define VXOR(a, b) _mm256_xor_si256((a), (b))
11201			# define M128I_TO_VEC(a) _mm256_zextsi128_si256(a)
11202			# define MULTS(a, b) _mm256_set_epi64x(a, b, a, b)
11203			# define MULTS_8V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
11204			# define MULTS_4V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
11205			# define MULTS_2V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
11206			# define MULTS_1V MULTS(CRC32_X223_MODG, CRC32_X287_MODG)
11207			#elif VL == 64
11208			# define vec_t __m512i
11209			# define fold_vec fold_vec512
11210			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
11211			# define VXOR(a, b) _mm512_xor_si512((a), (b))
11212			# define M128I_TO_VEC(a) _mm512_zextsi128_si512(a)
11213			# define MULTS(a, b) _mm512_set_epi64(a, b, a, b, a, b, a, b)
11214			# define MULTS_8V MULTS(CRC32_X4063_MODG, CRC32_X4127_MODG)
11215			# define MULTS_4V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
11216			# define MULTS_2V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
11217			# define MULTS_1V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
11218			#else
11219			# error "unsupported vector length"
11220			#endif
11221
11222			#undef fold_vec128
11223			static forceinline ATTRIBUTES __m128i
11224			ADD_SUFFIX(fold_vec128)(__m128i src, __m128i dst, __m128i mults)
11225			{
11226	58		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x00));
11227	11484		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x11));
11228	11484		return dst;
11229			}
11230			#define fold_vec128 ADD_SUFFIX(fold_vec128)
11231
11232			#if VL >= 32
11233			#undef fold_vec256
11234			static forceinline ATTRIBUTES __m256i
11235			ADD_SUFFIX(fold_vec256)(__m256i src, __m256i dst, __m256i mults)
11236			{
11237			#if USE_AVX512
11238
11239			return _mm256_ternarylogic_epi32(
11240			_mm256_clmulepi64_epi128(src, mults, 0x00),
11241			_mm256_clmulepi64_epi128(src, mults, 0x11),
11242			dst,
11243			0x96);
11244			#else
11245			return _mm256_xor_si256(
11246			_mm256_xor_si256(dst,
11247			_mm256_clmulepi64_epi128(src, mults, 0x00)),
11248			_mm256_clmulepi64_epi128(src, mults, 0x11));
11249			#endif
11250			}
11251			#define fold_vec256 ADD_SUFFIX(fold_vec256)
11252			#endif
11253
11254			#if VL >= 64
11255			#undef fold_vec512
11256			static forceinline ATTRIBUTES __m512i
11257			ADD_SUFFIX(fold_vec512)(__m512i src, __m512i dst, __m512i mults)
11258			{
11259
11260			return _mm512_ternarylogic_epi32(
11261			_mm512_clmulepi64_epi128(src, mults, 0x00),
11262			_mm512_clmulepi64_epi128(src, mults, 0x11),
11263			dst,
11264			0x96);
11265			}
11266			#define fold_vec512 ADD_SUFFIX(fold_vec512)
11267			#endif
11268
11269
11270			#undef fold_lessthan16bytes
11271			static forceinline ATTRIBUTES __m128i
11272			ADD_SUFFIX(fold_lessthan16bytes)(__m128i x, const u8 *p, size_t len,
11273			__m128i mults_128b)
11274			{
11275	29		__m128i lshift = _mm_loadu_si128((const void *)&shift_tab[len]);
11276	58		__m128i rshift = _mm_loadu_si128((const void *)&shift_tab[len + 16]);
11277			__m128i x0, x1;
11278
11279
11280	29		x0 = _mm_shuffle_epi8(x, lshift);
11281
11282
11283	87		x1 = _mm_blendv_epi8(_mm_shuffle_epi8(x, rshift),
11284	29		_mm_loadu_si128((const void *)(p + len - 16)),
11285
11286			rshift);
11287
11288	29		return fold_vec128(x0, x1, mults_128b);
11289			}
11290			#define fold_lessthan16bytes ADD_SUFFIX(fold_lessthan16bytes)
11291
11292			static ATTRIBUTES u32
11293	29		ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
11294			{
11295
11296	29		const vec_t mults_8v = MULTS_8V;
11297	29		const vec_t mults_4v = MULTS_4V;
11298	29		const vec_t mults_2v = MULTS_2V;
11299	29		const vec_t mults_1v = MULTS_1V;
11300	29		const __m128i mults_128b = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
11301			const __m128i barrett_reduction_constants =
11302	29		_mm_set_epi64x(CRC32_BARRETT_CONSTANT_2, CRC32_BARRETT_CONSTANT_1);
11303			vec_t v0, v1, v2, v3, v4, v5, v6, v7;
11304	29		__m128i x0 = _mm_cvtsi32_si128(crc);
11305			__m128i x1;
11306
11307	29	50	if (len < 8*VL) {
11308	0	0	if (len < VL) {
11309			STATIC_ASSERT(VL == 16 \|\| VL == 32 \|\| VL == 64);
11310	0	0	if (len < 16) {
11311			#if USE_AVX512
11312			if (len < 4)
11313			return crc32_slice1(crc, p, len);
11314
11315			x0 = _mm_xor_si128(
11316			x0, _mm_maskz_loadu_epi8((1 << len) - 1, p));
11317			x0 = _mm_shuffle_epi8(
11318			x0, _mm_loadu_si128((const void *)&shift_tab[len]));
11319			goto reduce_x0;
11320			#else
11321	0		return crc32_slice1(crc, p, len);
11322			#endif
11323			}
11324
11325	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
11326	0	0	if (len >= 32) {
11327	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 16)),
11328			mults_128b);
11329	0	0	if (len >= 48)
11330	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 32)),
11331			mults_128b);
11332			}
11333	0		p += len & ~15;
11334	0		goto less_than_16_remaining;
11335			}
11336	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
11337	0	0	if (len < 2*VL) {
11338	0		p += VL;
11339	0		goto less_than_vl_remaining;
11340			}
11341	0		v1 = VLOADU(p + 1*VL);
11342	0	0	if (len < 4*VL) {
11343	0		p += 2*VL;
11344	0		goto less_than_2vl_remaining;
11345			}
11346	0		v2 = VLOADU(p + 2*VL);
11347	0		v3 = VLOADU(p + 3*VL);
11348	0		p += 4*VL;
11349			} else {
11350
11351	29	50	if (len > 65536 && ((uintptr_t)p & (VL-1))) {
		0
11352	0		size_t align = -(uintptr_t)p & (VL-1);
11353
11354	0		len -= align;
11355	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
11356	0		p += 16;
11357	0	0	if (align & 15) {
11358	0		x0 = fold_lessthan16bytes(x0, p, align & 15,
11359			mults_128b);
11360	0		p += align & 15;
11361	0		align &= ~15;
11362			}
11363	0	0	while (align) {
11364	0		x0 = fold_vec128(x0, (const __m128i )p,
11365			mults_128b);
11366	0		p += 16;
11367	0		align -= 16;
11368			}
11369	0		v0 = M128I_TO_VEC(x0);
11370			# if VL == 32
11371			v0 = _mm256_inserti128_si256(v0, (const __m128i )p, 1);
11372			# elif VL == 64
11373			v0 = _mm512_inserti32x4(v0, (const __m128i )p, 1);
11374			v0 = _mm512_inserti64x4(v0, (const __m256i )(p + 16), 1);
11375			# endif
11376	0		p -= 16;
11377			} else {
11378	58		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
11379			}
11380	29		v1 = VLOADU(p + 1*VL);
11381	29		v2 = VLOADU(p + 2*VL);
11382	29		v3 = VLOADU(p + 3*VL);
11383	29		v4 = VLOADU(p + 4*VL);
11384	29		v5 = VLOADU(p + 5*VL);
11385	29		v6 = VLOADU(p + 6*VL);
11386	29		v7 = VLOADU(p + 7*VL);
11387	29		p += 8*VL;
11388
11389
11390	1427	100	while (len >= 16*VL) {
11391	1398		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_8v);
11392	2796		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_8v);
11393	2796		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_8v);
11394	2796		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_8v);
11395	2796		v4 = fold_vec(v4, VLOADU(p + 4*VL), mults_8v);
11396	2796		v5 = fold_vec(v5, VLOADU(p + 5*VL), mults_8v);
11397	2796		v6 = fold_vec(v6, VLOADU(p + 6*VL), mults_8v);
11398	2796		v7 = fold_vec(v7, VLOADU(p + 7*VL), mults_8v);
11399	1398		p += 8*VL;
11400	1398		len -= 8*VL;
11401			}
11402
11403
11404	29		v0 = fold_vec(v0, v4, mults_4v);
11405	29		v1 = fold_vec(v1, v5, mults_4v);
11406	29		v2 = fold_vec(v2, v6, mults_4v);
11407	29		v3 = fold_vec(v3, v7, mults_4v);
11408	29	100	if (len & (4*VL)) {
11409	5		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_4v);
11410	10		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_4v);
11411	10		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_4v);
11412	10		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_4v);
11413	5		p += 4*VL;
11414			}
11415			}
11416
11417	29		v0 = fold_vec(v0, v2, mults_2v);
11418	29		v1 = fold_vec(v1, v3, mults_2v);
11419	29	100	if (len & (2*VL)) {
11420	24		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_2v);
11421	48		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_2v);
11422	24		p += 2*VL;
11423			}
11424	29		less_than_2vl_remaining:
11425
11426	29		v0 = fold_vec(v0, v1, mults_1v);
11427	29	50	if (len & VL) {
11428	0		v0 = fold_vec(v0, VLOADU(p), mults_1v);
11429	0		p += VL;
11430			}
11431	29		less_than_vl_remaining:
11432
11433			#if VL == 16
11434	29		x0 = v0;
11435			#else
11436			{
11437			#if VL == 32
11438			__m256i y0 = v0;
11439			#else
11440			const __m256i mults_256b =
11441			_mm256_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG,
11442			CRC32_X223_MODG, CRC32_X287_MODG);
11443			__m256i y0 = fold_vec256(_mm512_extracti64x4_epi64(v0, 0),
11444			_mm512_extracti64x4_epi64(v0, 1),
11445			mults_256b);
11446			if (len & 32) {
11447			y0 = fold_vec256(y0, _mm256_loadu_si256((const void *)p),
11448			mults_256b);
11449			p += 32;
11450			}
11451			#endif
11452			x0 = fold_vec128(_mm256_extracti128_si256(y0, 0),
11453			_mm256_extracti128_si256(y0, 1), mults_128b);
11454			}
11455			if (len & 16) {
11456			x0 = fold_vec128(x0, _mm_loadu_si128((const void *)p),
11457			mults_128b);
11458			p += 16;
11459			}
11460			#endif
11461	29		less_than_16_remaining:
11462	29		len &= 15;
11463
11464
11465	29	50	if (len)
11466	29		x0 = fold_lessthan16bytes(x0, p, len, mults_128b);
11467			#if USE_AVX512
11468			reduce_x0:
11469			#endif
11470
11471	58		x0 = _mm_xor_si128(_mm_clmulepi64_si128(x0, mults_128b, 0x10),
11472	29		_mm_bsrli_si128(x0, 8));
11473	29		x1 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x00);
11474	29		x1 = _mm_clmulepi64_si128(x1, barrett_reduction_constants, 0x10);
11475	29		x0 = _mm_xor_si128(x0, x1);
11476	29		return _mm_extract_epi32(x0, 2);
11477			}
11478
11479			#undef vec_t
11480			#undef fold_vec
11481			#undef VLOADU
11482			#undef VXOR
11483			#undef M128I_TO_VEC
11484			#undef MULTS
11485			#undef MULTS_8V
11486			#undef MULTS_4V
11487			#undef MULTS_2V
11488			#undef MULTS_1V
11489
11490			#undef SUFFIX
11491			#undef ATTRIBUTES
11492			#undef VL
11493			#undef USE_AVX512
11494
11495			#endif
11496
11497
11498			#if (GCC_PREREQ(10, 1) \|\| CLANG_PREREQ(6, 0, 10000000)) && \
11499			!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
11500			# define crc32_x86_vpclmulqdq_avx2 crc32_x86_vpclmulqdq_avx2
11501			# define SUFFIX _vpclmulqdq_avx2
11502			# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx2")
11503			# define VL 32
11504			# define USE_AVX512 0
11505			/* #include "x86-crc32_pclmul_template.h" */
11506
11507
11508
11509
11510			#if VL == 16
11511			# define vec_t __m128i
11512			# define fold_vec fold_vec128
11513			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
11514			# define VXOR(a, b) _mm_xor_si128((a), (b))
11515			# define M128I_TO_VEC(a) a
11516			# define MULTS_8V _mm_set_epi64x(CRC32_X991_MODG, CRC32_X1055_MODG)
11517			# define MULTS_4V _mm_set_epi64x(CRC32_X479_MODG, CRC32_X543_MODG)
11518			# define MULTS_2V _mm_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG)
11519			# define MULTS_1V _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG)
11520			#elif VL == 32
11521			# define vec_t __m256i
11522			# define fold_vec fold_vec256
11523			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
11524			# define VXOR(a, b) _mm256_xor_si256((a), (b))
11525			# define M128I_TO_VEC(a) _mm256_zextsi128_si256(a)
11526			# define MULTS(a, b) _mm256_set_epi64x(a, b, a, b)
11527			# define MULTS_8V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
11528			# define MULTS_4V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
11529			# define MULTS_2V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
11530			# define MULTS_1V MULTS(CRC32_X223_MODG, CRC32_X287_MODG)
11531			#elif VL == 64
11532			# define vec_t __m512i
11533			# define fold_vec fold_vec512
11534			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
11535			# define VXOR(a, b) _mm512_xor_si512((a), (b))
11536			# define M128I_TO_VEC(a) _mm512_zextsi128_si512(a)
11537			# define MULTS(a, b) _mm512_set_epi64(a, b, a, b, a, b, a, b)
11538			# define MULTS_8V MULTS(CRC32_X4063_MODG, CRC32_X4127_MODG)
11539			# define MULTS_4V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
11540			# define MULTS_2V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
11541			# define MULTS_1V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
11542			#else
11543			# error "unsupported vector length"
11544			#endif
11545
11546			#undef fold_vec128
11547			static forceinline ATTRIBUTES __m128i
11548			ADD_SUFFIX(fold_vec128)(__m128i src, __m128i dst, __m128i mults)
11549			{
11550	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x00));
11551	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x11));
11552	0		return dst;
11553			}
11554			#define fold_vec128 ADD_SUFFIX(fold_vec128)
11555
11556			#if VL >= 32
11557			#undef fold_vec256
11558			static forceinline ATTRIBUTES __m256i
11559			ADD_SUFFIX(fold_vec256)(__m256i src, __m256i dst, __m256i mults)
11560			{
11561			#if USE_AVX512
11562
11563			return _mm256_ternarylogic_epi32(
11564			_mm256_clmulepi64_epi128(src, mults, 0x00),
11565			_mm256_clmulepi64_epi128(src, mults, 0x11),
11566			dst,
11567			0x96);
11568			#else
11569	0		return _mm256_xor_si256(
11570			_mm256_xor_si256(dst,
11571	0		_mm256_clmulepi64_epi128(src, mults, 0x00)),
11572	0		_mm256_clmulepi64_epi128(src, mults, 0x11));
11573			#endif
11574			}
11575			#define fold_vec256 ADD_SUFFIX(fold_vec256)
11576			#endif
11577
11578			#if VL >= 64
11579			#undef fold_vec512
11580			static forceinline ATTRIBUTES __m512i
11581			ADD_SUFFIX(fold_vec512)(__m512i src, __m512i dst, __m512i mults)
11582			{
11583
11584			return _mm512_ternarylogic_epi32(
11585			_mm512_clmulepi64_epi128(src, mults, 0x00),
11586			_mm512_clmulepi64_epi128(src, mults, 0x11),
11587			dst,
11588			0x96);
11589			}
11590			#define fold_vec512 ADD_SUFFIX(fold_vec512)
11591			#endif
11592
11593
11594			#undef fold_lessthan16bytes
11595			static forceinline ATTRIBUTES __m128i
11596			ADD_SUFFIX(fold_lessthan16bytes)(__m128i x, const u8 *p, size_t len,
11597			__m128i mults_128b)
11598			{
11599	0		__m128i lshift = _mm_loadu_si128((const void *)&shift_tab[len]);
11600	0		__m128i rshift = _mm_loadu_si128((const void *)&shift_tab[len + 16]);
11601			__m128i x0, x1;
11602
11603
11604	0		x0 = _mm_shuffle_epi8(x, lshift);
11605
11606
11607	0		x1 = _mm_blendv_epi8(_mm_shuffle_epi8(x, rshift),
11608	0		_mm_loadu_si128((const void *)(p + len - 16)),
11609
11610			rshift);
11611
11612	0		return fold_vec128(x0, x1, mults_128b);
11613			}
11614			#define fold_lessthan16bytes ADD_SUFFIX(fold_lessthan16bytes)
11615
11616			static ATTRIBUTES u32
11617	0		ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
11618			{
11619
11620	0		const vec_t mults_8v = MULTS_8V;
11621	0		const vec_t mults_4v = MULTS_4V;
11622	0		const vec_t mults_2v = MULTS_2V;
11623	0		const vec_t mults_1v = MULTS_1V;
11624	0		const __m128i mults_128b = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
11625			const __m128i barrett_reduction_constants =
11626	0		_mm_set_epi64x(CRC32_BARRETT_CONSTANT_2, CRC32_BARRETT_CONSTANT_1);
11627			vec_t v0, v1, v2, v3, v4, v5, v6, v7;
11628	0		__m128i x0 = _mm_cvtsi32_si128(crc);
11629			__m128i x1;
11630
11631	0	0	if (len < 8*VL) {
11632	0	0	if (len < VL) {
11633			STATIC_ASSERT(VL == 16 \|\| VL == 32 \|\| VL == 64);
11634	0	0	if (len < 16) {
11635			#if USE_AVX512
11636			if (len < 4)
11637			return crc32_slice1(crc, p, len);
11638
11639			x0 = _mm_xor_si128(
11640			x0, _mm_maskz_loadu_epi8((1 << len) - 1, p));
11641			x0 = _mm_shuffle_epi8(
11642			x0, _mm_loadu_si128((const void *)&shift_tab[len]));
11643			goto reduce_x0;
11644			#else
11645	0		return crc32_slice1(crc, p, len);
11646			#endif
11647			}
11648
11649	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
11650	0	0	if (len >= 32) {
11651	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 16)),
11652			mults_128b);
11653	0	0	if (len >= 48)
11654	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 32)),
11655			mults_128b);
11656			}
11657	0		p += len & ~15;
11658	0		goto less_than_16_remaining;
11659			}
11660	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
11661	0	0	if (len < 2*VL) {
11662	0		p += VL;
11663	0		goto less_than_vl_remaining;
11664			}
11665	0		v1 = VLOADU(p + 1*VL);
11666	0	0	if (len < 4*VL) {
11667	0		p += 2*VL;
11668	0		goto less_than_2vl_remaining;
11669			}
11670	0		v2 = VLOADU(p + 2*VL);
11671	0		v3 = VLOADU(p + 3*VL);
11672	0		p += 4*VL;
11673			} else {
11674
11675	0	0	if (len > 65536 && ((uintptr_t)p & (VL-1))) {
		0
11676	0		size_t align = -(uintptr_t)p & (VL-1);
11677
11678	0		len -= align;
11679	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
11680	0		p += 16;
11681	0	0	if (align & 15) {
11682	0		x0 = fold_lessthan16bytes(x0, p, align & 15,
11683			mults_128b);
11684	0		p += align & 15;
11685	0		align &= ~15;
11686			}
11687	0	0	while (align) {
11688	0		x0 = fold_vec128(x0, (const __m128i )p,
11689			mults_128b);
11690	0		p += 16;
11691	0		align -= 16;
11692			}
11693	0		v0 = M128I_TO_VEC(x0);
11694			# if VL == 32
11695	0		v0 = _mm256_inserti128_si256(v0, (const __m128i )p, 1);
11696			# elif VL == 64
11697			v0 = _mm512_inserti32x4(v0, (const __m128i )p, 1);
11698			v0 = _mm512_inserti64x4(v0, (const __m256i )(p + 16), 1);
11699			# endif
11700	0		p -= 16;
11701			} else {
11702	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
11703			}
11704	0		v1 = VLOADU(p + 1*VL);
11705	0		v2 = VLOADU(p + 2*VL);
11706	0		v3 = VLOADU(p + 3*VL);
11707	0		v4 = VLOADU(p + 4*VL);
11708	0		v5 = VLOADU(p + 5*VL);
11709	0		v6 = VLOADU(p + 6*VL);
11710	0		v7 = VLOADU(p + 7*VL);
11711	0		p += 8*VL;
11712
11713
11714	0	0	while (len >= 16*VL) {
11715	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_8v);
11716	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_8v);
11717	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_8v);
11718	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_8v);
11719	0		v4 = fold_vec(v4, VLOADU(p + 4*VL), mults_8v);
11720	0		v5 = fold_vec(v5, VLOADU(p + 5*VL), mults_8v);
11721	0		v6 = fold_vec(v6, VLOADU(p + 6*VL), mults_8v);
11722	0		v7 = fold_vec(v7, VLOADU(p + 7*VL), mults_8v);
11723	0		p += 8*VL;
11724	0		len -= 8*VL;
11725			}
11726
11727
11728	0		v0 = fold_vec(v0, v4, mults_4v);
11729	0		v1 = fold_vec(v1, v5, mults_4v);
11730	0		v2 = fold_vec(v2, v6, mults_4v);
11731	0		v3 = fold_vec(v3, v7, mults_4v);
11732	0	0	if (len & (4*VL)) {
11733	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_4v);
11734	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_4v);
11735	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_4v);
11736	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_4v);
11737	0		p += 4*VL;
11738			}
11739			}
11740
11741	0		v0 = fold_vec(v0, v2, mults_2v);
11742	0		v1 = fold_vec(v1, v3, mults_2v);
11743	0	0	if (len & (2*VL)) {
11744	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_2v);
11745	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_2v);
11746	0		p += 2*VL;
11747			}
11748	0		less_than_2vl_remaining:
11749
11750	0		v0 = fold_vec(v0, v1, mults_1v);
11751	0	0	if (len & VL) {
11752	0		v0 = fold_vec(v0, VLOADU(p), mults_1v);
11753	0		p += VL;
11754			}
11755	0		less_than_vl_remaining:
11756
11757			#if VL == 16
11758			x0 = v0;
11759			#else
11760			{
11761			#if VL == 32
11762	0		__m256i y0 = v0;
11763			#else
11764			const __m256i mults_256b =
11765			_mm256_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG,
11766			CRC32_X223_MODG, CRC32_X287_MODG);
11767			__m256i y0 = fold_vec256(_mm512_extracti64x4_epi64(v0, 0),
11768			_mm512_extracti64x4_epi64(v0, 1),
11769			mults_256b);
11770			if (len & 32) {
11771			y0 = fold_vec256(y0, _mm256_loadu_si256((const void *)p),
11772			mults_256b);
11773			p += 32;
11774			}
11775			#endif
11776	0		x0 = fold_vec128(_mm256_extracti128_si256(y0, 0),
11777	0		_mm256_extracti128_si256(y0, 1), mults_128b);
11778			}
11779	0	0	if (len & 16) {
11780	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)p),
11781			mults_128b);
11782	0		p += 16;
11783			}
11784			#endif
11785	0		less_than_16_remaining:
11786	0		len &= 15;
11787
11788
11789	0	0	if (len)
11790	0		x0 = fold_lessthan16bytes(x0, p, len, mults_128b);
11791			#if USE_AVX512
11792			reduce_x0:
11793			#endif
11794
11795	0		x0 = _mm_xor_si128(_mm_clmulepi64_si128(x0, mults_128b, 0x10),
11796	0		_mm_bsrli_si128(x0, 8));
11797	0		x1 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x00);
11798	0		x1 = _mm_clmulepi64_si128(x1, barrett_reduction_constants, 0x10);
11799	0		x0 = _mm_xor_si128(x0, x1);
11800	0		return _mm_extract_epi32(x0, 2);
11801			}
11802
11803			#undef vec_t
11804			#undef fold_vec
11805			#undef VLOADU
11806			#undef VXOR
11807			#undef M128I_TO_VEC
11808			#undef MULTS
11809			#undef MULTS_8V
11810			#undef MULTS_4V
11811			#undef MULTS_2V
11812			#undef MULTS_1V
11813
11814			#undef SUFFIX
11815			#undef ATTRIBUTES
11816			#undef VL
11817			#undef USE_AVX512
11818
11819			#endif
11820
11821			#if (GCC_PREREQ(10, 1) \|\| CLANG_PREREQ(6, 0, 10000000) \|\| MSVC_PREREQ(1920)) && \
11822			!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_VPCLMULQDQ)
11823
11824			# define crc32_x86_vpclmulqdq_avx512_vl256 crc32_x86_vpclmulqdq_avx512_vl256
11825			# define SUFFIX _vpclmulqdq_avx512_vl256
11826			# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl")
11827			# define VL 32
11828			# define USE_AVX512 1
11829			/* #include "x86-crc32_pclmul_template.h" */
11830
11831
11832
11833
11834			#if VL == 16
11835			# define vec_t __m128i
11836			# define fold_vec fold_vec128
11837			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
11838			# define VXOR(a, b) _mm_xor_si128((a), (b))
11839			# define M128I_TO_VEC(a) a
11840			# define MULTS_8V _mm_set_epi64x(CRC32_X991_MODG, CRC32_X1055_MODG)
11841			# define MULTS_4V _mm_set_epi64x(CRC32_X479_MODG, CRC32_X543_MODG)
11842			# define MULTS_2V _mm_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG)
11843			# define MULTS_1V _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG)
11844			#elif VL == 32
11845			# define vec_t __m256i
11846			# define fold_vec fold_vec256
11847			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
11848			# define VXOR(a, b) _mm256_xor_si256((a), (b))
11849			# define M128I_TO_VEC(a) _mm256_zextsi128_si256(a)
11850			# define MULTS(a, b) _mm256_set_epi64x(a, b, a, b)
11851			# define MULTS_8V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
11852			# define MULTS_4V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
11853			# define MULTS_2V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
11854			# define MULTS_1V MULTS(CRC32_X223_MODG, CRC32_X287_MODG)
11855			#elif VL == 64
11856			# define vec_t __m512i
11857			# define fold_vec fold_vec512
11858			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
11859			# define VXOR(a, b) _mm512_xor_si512((a), (b))
11860			# define M128I_TO_VEC(a) _mm512_zextsi128_si512(a)
11861			# define MULTS(a, b) _mm512_set_epi64(a, b, a, b, a, b, a, b)
11862			# define MULTS_8V MULTS(CRC32_X4063_MODG, CRC32_X4127_MODG)
11863			# define MULTS_4V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
11864			# define MULTS_2V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
11865			# define MULTS_1V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
11866			#else
11867			# error "unsupported vector length"
11868			#endif
11869
11870			#undef fold_vec128
11871			static forceinline ATTRIBUTES __m128i
11872			ADD_SUFFIX(fold_vec128)(__m128i src, __m128i dst, __m128i mults)
11873			{
11874	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x00));
11875	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x11));
11876	0		return dst;
11877			}
11878			#define fold_vec128 ADD_SUFFIX(fold_vec128)
11879
11880			#if VL >= 32
11881			#undef fold_vec256
11882			static forceinline ATTRIBUTES __m256i
11883			ADD_SUFFIX(fold_vec256)(__m256i src, __m256i dst, __m256i mults)
11884			{
11885			#if USE_AVX512
11886
11887	0		return _mm256_ternarylogic_epi32(
11888			_mm256_clmulepi64_epi128(src, mults, 0x00),
11889			_mm256_clmulepi64_epi128(src, mults, 0x11),
11890			dst,
11891			0x96);
11892			#else
11893			return _mm256_xor_si256(
11894			_mm256_xor_si256(dst,
11895			_mm256_clmulepi64_epi128(src, mults, 0x00)),
11896			_mm256_clmulepi64_epi128(src, mults, 0x11));
11897			#endif
11898			}
11899			#define fold_vec256 ADD_SUFFIX(fold_vec256)
11900			#endif
11901
11902			#if VL >= 64
11903			#undef fold_vec512
11904			static forceinline ATTRIBUTES __m512i
11905			ADD_SUFFIX(fold_vec512)(__m512i src, __m512i dst, __m512i mults)
11906			{
11907
11908			return _mm512_ternarylogic_epi32(
11909			_mm512_clmulepi64_epi128(src, mults, 0x00),
11910			_mm512_clmulepi64_epi128(src, mults, 0x11),
11911			dst,
11912			0x96);
11913			}
11914			#define fold_vec512 ADD_SUFFIX(fold_vec512)
11915			#endif
11916
11917
11918			#undef fold_lessthan16bytes
11919			static forceinline ATTRIBUTES __m128i
11920			ADD_SUFFIX(fold_lessthan16bytes)(__m128i x, const u8 *p, size_t len,
11921			__m128i mults_128b)
11922			{
11923	0		__m128i lshift = _mm_loadu_si128((const void *)&shift_tab[len]);
11924	0		__m128i rshift = _mm_loadu_si128((const void *)&shift_tab[len + 16]);
11925			__m128i x0, x1;
11926
11927
11928	0		x0 = _mm_shuffle_epi8(x, lshift);
11929
11930
11931	0		x1 = _mm_blendv_epi8(_mm_shuffle_epi8(x, rshift),
11932	0		_mm_loadu_si128((const void *)(p + len - 16)),
11933
11934			rshift);
11935
11936	0		return fold_vec128(x0, x1, mults_128b);
11937			}
11938			#define fold_lessthan16bytes ADD_SUFFIX(fold_lessthan16bytes)
11939
11940			static ATTRIBUTES u32
11941	0		ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
11942			{
11943
11944	0		const vec_t mults_8v = MULTS_8V;
11945	0		const vec_t mults_4v = MULTS_4V;
11946	0		const vec_t mults_2v = MULTS_2V;
11947	0		const vec_t mults_1v = MULTS_1V;
11948	0		const __m128i mults_128b = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
11949			const __m128i barrett_reduction_constants =
11950	0		_mm_set_epi64x(CRC32_BARRETT_CONSTANT_2, CRC32_BARRETT_CONSTANT_1);
11951			vec_t v0, v1, v2, v3, v4, v5, v6, v7;
11952	0		__m128i x0 = _mm_cvtsi32_si128(crc);
11953			__m128i x1;
11954
11955	0	0	if (len < 8*VL) {
11956	0	0	if (len < VL) {
11957			STATIC_ASSERT(VL == 16 \|\| VL == 32 \|\| VL == 64);
11958	0	0	if (len < 16) {
11959			#if USE_AVX512
11960	0	0	if (len < 4)
11961	0		return crc32_slice1(crc, p, len);
11962
11963	0		x0 = _mm_xor_si128(
11964	0		x0, _mm_maskz_loadu_epi8((1 << len) - 1, p));
11965	0		x0 = _mm_shuffle_epi8(
11966	0		x0, _mm_loadu_si128((const void *)&shift_tab[len]));
11967	0		goto reduce_x0;
11968			#else
11969			return crc32_slice1(crc, p, len);
11970			#endif
11971			}
11972
11973	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
11974	0	0	if (len >= 32) {
11975	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 16)),
11976			mults_128b);
11977	0	0	if (len >= 48)
11978	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 32)),
11979			mults_128b);
11980			}
11981	0		p += len & ~15;
11982	0		goto less_than_16_remaining;
11983			}
11984	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
11985	0	0	if (len < 2*VL) {
11986	0		p += VL;
11987	0		goto less_than_vl_remaining;
11988			}
11989	0		v1 = VLOADU(p + 1*VL);
11990	0	0	if (len < 4*VL) {
11991	0		p += 2*VL;
11992	0		goto less_than_2vl_remaining;
11993			}
11994	0		v2 = VLOADU(p + 2*VL);
11995	0		v3 = VLOADU(p + 3*VL);
11996	0		p += 4*VL;
11997			} else {
11998
11999	0	0	if (len > 65536 && ((uintptr_t)p & (VL-1))) {
		0
12000	0		size_t align = -(uintptr_t)p & (VL-1);
12001
12002	0		len -= align;
12003	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
12004	0		p += 16;
12005	0	0	if (align & 15) {
12006	0		x0 = fold_lessthan16bytes(x0, p, align & 15,
12007			mults_128b);
12008	0		p += align & 15;
12009	0		align &= ~15;
12010			}
12011	0	0	while (align) {
12012	0		x0 = fold_vec128(x0, (const __m128i )p,
12013			mults_128b);
12014	0		p += 16;
12015	0		align -= 16;
12016			}
12017	0		v0 = M128I_TO_VEC(x0);
12018			# if VL == 32
12019	0		v0 = _mm256_inserti128_si256(v0, (const __m128i )p, 1);
12020			# elif VL == 64
12021			v0 = _mm512_inserti32x4(v0, (const __m128i )p, 1);
12022			v0 = _mm512_inserti64x4(v0, (const __m256i )(p + 16), 1);
12023			# endif
12024	0		p -= 16;
12025			} else {
12026	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
12027			}
12028	0		v1 = VLOADU(p + 1*VL);
12029	0		v2 = VLOADU(p + 2*VL);
12030	0		v3 = VLOADU(p + 3*VL);
12031	0		v4 = VLOADU(p + 4*VL);
12032	0		v5 = VLOADU(p + 5*VL);
12033	0		v6 = VLOADU(p + 6*VL);
12034	0		v7 = VLOADU(p + 7*VL);
12035	0		p += 8*VL;
12036
12037
12038	0	0	while (len >= 16*VL) {
12039	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_8v);
12040	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_8v);
12041	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_8v);
12042	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_8v);
12043	0		v4 = fold_vec(v4, VLOADU(p + 4*VL), mults_8v);
12044	0		v5 = fold_vec(v5, VLOADU(p + 5*VL), mults_8v);
12045	0		v6 = fold_vec(v6, VLOADU(p + 6*VL), mults_8v);
12046	0		v7 = fold_vec(v7, VLOADU(p + 7*VL), mults_8v);
12047	0		p += 8*VL;
12048	0		len -= 8*VL;
12049			}
12050
12051
12052	0		v0 = fold_vec(v0, v4, mults_4v);
12053	0		v1 = fold_vec(v1, v5, mults_4v);
12054	0		v2 = fold_vec(v2, v6, mults_4v);
12055	0		v3 = fold_vec(v3, v7, mults_4v);
12056	0	0	if (len & (4*VL)) {
12057	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_4v);
12058	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_4v);
12059	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_4v);
12060	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_4v);
12061	0		p += 4*VL;
12062			}
12063			}
12064
12065	0		v0 = fold_vec(v0, v2, mults_2v);
12066	0		v1 = fold_vec(v1, v3, mults_2v);
12067	0	0	if (len & (2*VL)) {
12068	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_2v);
12069	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_2v);
12070	0		p += 2*VL;
12071			}
12072	0		less_than_2vl_remaining:
12073
12074	0		v0 = fold_vec(v0, v1, mults_1v);
12075	0	0	if (len & VL) {
12076	0		v0 = fold_vec(v0, VLOADU(p), mults_1v);
12077	0		p += VL;
12078			}
12079	0		less_than_vl_remaining:
12080
12081			#if VL == 16
12082			x0 = v0;
12083			#else
12084			{
12085			#if VL == 32
12086	0		__m256i y0 = v0;
12087			#else
12088			const __m256i mults_256b =
12089			_mm256_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG,
12090			CRC32_X223_MODG, CRC32_X287_MODG);
12091			__m256i y0 = fold_vec256(_mm512_extracti64x4_epi64(v0, 0),
12092			_mm512_extracti64x4_epi64(v0, 1),
12093			mults_256b);
12094			if (len & 32) {
12095			y0 = fold_vec256(y0, _mm256_loadu_si256((const void *)p),
12096			mults_256b);
12097			p += 32;
12098			}
12099			#endif
12100	0		x0 = fold_vec128(_mm256_extracti128_si256(y0, 0),
12101	0		_mm256_extracti128_si256(y0, 1), mults_128b);
12102			}
12103	0	0	if (len & 16) {
12104	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)p),
12105			mults_128b);
12106	0		p += 16;
12107			}
12108			#endif
12109	0		less_than_16_remaining:
12110	0		len &= 15;
12111
12112
12113	0	0	if (len)
12114	0		x0 = fold_lessthan16bytes(x0, p, len, mults_128b);
12115			#if USE_AVX512
12116	0		reduce_x0:
12117			#endif
12118
12119	0		x0 = _mm_xor_si128(_mm_clmulepi64_si128(x0, mults_128b, 0x10),
12120	0		_mm_bsrli_si128(x0, 8));
12121	0		x1 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x00);
12122	0		x1 = _mm_clmulepi64_si128(x1, barrett_reduction_constants, 0x10);
12123	0		x0 = _mm_xor_si128(x0, x1);
12124	0		return _mm_extract_epi32(x0, 2);
12125			}
12126
12127			#undef vec_t
12128			#undef fold_vec
12129			#undef VLOADU
12130			#undef VXOR
12131			#undef M128I_TO_VEC
12132			#undef MULTS
12133			#undef MULTS_8V
12134			#undef MULTS_4V
12135			#undef MULTS_2V
12136			#undef MULTS_1V
12137
12138			#undef SUFFIX
12139			#undef ATTRIBUTES
12140			#undef VL
12141			#undef USE_AVX512
12142
12143
12144
12145			# define crc32_x86_vpclmulqdq_avx512_vl512 crc32_x86_vpclmulqdq_avx512_vl512
12146			# define SUFFIX _vpclmulqdq_avx512_vl512
12147			# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl")
12148			# define VL 64
12149			# define USE_AVX512 1
12150			/* #include "x86-crc32_pclmul_template.h" */
12151
12152
12153
12154
12155			#if VL == 16
12156			# define vec_t __m128i
12157			# define fold_vec fold_vec128
12158			# define VLOADU(p) _mm_loadu_si128((const void *)(p))
12159			# define VXOR(a, b) _mm_xor_si128((a), (b))
12160			# define M128I_TO_VEC(a) a
12161			# define MULTS_8V _mm_set_epi64x(CRC32_X991_MODG, CRC32_X1055_MODG)
12162			# define MULTS_4V _mm_set_epi64x(CRC32_X479_MODG, CRC32_X543_MODG)
12163			# define MULTS_2V _mm_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG)
12164			# define MULTS_1V _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG)
12165			#elif VL == 32
12166			# define vec_t __m256i
12167			# define fold_vec fold_vec256
12168			# define VLOADU(p) _mm256_loadu_si256((const void *)(p))
12169			# define VXOR(a, b) _mm256_xor_si256((a), (b))
12170			# define M128I_TO_VEC(a) _mm256_zextsi128_si256(a)
12171			# define MULTS(a, b) _mm256_set_epi64x(a, b, a, b)
12172			# define MULTS_8V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
12173			# define MULTS_4V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
12174			# define MULTS_2V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
12175			# define MULTS_1V MULTS(CRC32_X223_MODG, CRC32_X287_MODG)
12176			#elif VL == 64
12177			# define vec_t __m512i
12178			# define fold_vec fold_vec512
12179			# define VLOADU(p) _mm512_loadu_si512((const void *)(p))
12180			# define VXOR(a, b) _mm512_xor_si512((a), (b))
12181			# define M128I_TO_VEC(a) _mm512_zextsi128_si512(a)
12182			# define MULTS(a, b) _mm512_set_epi64(a, b, a, b, a, b, a, b)
12183			# define MULTS_8V MULTS(CRC32_X4063_MODG, CRC32_X4127_MODG)
12184			# define MULTS_4V MULTS(CRC32_X2015_MODG, CRC32_X2079_MODG)
12185			# define MULTS_2V MULTS(CRC32_X991_MODG, CRC32_X1055_MODG)
12186			# define MULTS_1V MULTS(CRC32_X479_MODG, CRC32_X543_MODG)
12187			#else
12188			# error "unsupported vector length"
12189			#endif
12190
12191			#undef fold_vec128
12192			static forceinline ATTRIBUTES __m128i
12193			ADD_SUFFIX(fold_vec128)(__m128i src, __m128i dst, __m128i mults)
12194			{
12195	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x00));
12196	0		dst = _mm_xor_si128(dst, _mm_clmulepi64_si128(src, mults, 0x11));
12197	0		return dst;
12198			}
12199			#define fold_vec128 ADD_SUFFIX(fold_vec128)
12200
12201			#if VL >= 32
12202			#undef fold_vec256
12203			static forceinline ATTRIBUTES __m256i
12204			ADD_SUFFIX(fold_vec256)(__m256i src, __m256i dst, __m256i mults)
12205			{
12206			#if USE_AVX512
12207
12208	0		return _mm256_ternarylogic_epi32(
12209			_mm256_clmulepi64_epi128(src, mults, 0x00),
12210			_mm256_clmulepi64_epi128(src, mults, 0x11),
12211			dst,
12212			0x96);
12213			#else
12214			return _mm256_xor_si256(
12215			_mm256_xor_si256(dst,
12216			_mm256_clmulepi64_epi128(src, mults, 0x00)),
12217			_mm256_clmulepi64_epi128(src, mults, 0x11));
12218			#endif
12219			}
12220			#define fold_vec256 ADD_SUFFIX(fold_vec256)
12221			#endif
12222
12223			#if VL >= 64
12224			#undef fold_vec512
12225			static forceinline ATTRIBUTES __m512i
12226			ADD_SUFFIX(fold_vec512)(__m512i src, __m512i dst, __m512i mults)
12227			{
12228
12229	0		return _mm512_ternarylogic_epi32(
12230			_mm512_clmulepi64_epi128(src, mults, 0x00),
12231			_mm512_clmulepi64_epi128(src, mults, 0x11),
12232			dst,
12233			0x96);
12234			}
12235			#define fold_vec512 ADD_SUFFIX(fold_vec512)
12236			#endif
12237
12238
12239			#undef fold_lessthan16bytes
12240			static forceinline ATTRIBUTES __m128i
12241			ADD_SUFFIX(fold_lessthan16bytes)(__m128i x, const u8 *p, size_t len,
12242			__m128i mults_128b)
12243			{
12244	0		__m128i lshift = _mm_loadu_si128((const void *)&shift_tab[len]);
12245	0		__m128i rshift = _mm_loadu_si128((const void *)&shift_tab[len + 16]);
12246			__m128i x0, x1;
12247
12248
12249	0		x0 = _mm_shuffle_epi8(x, lshift);
12250
12251
12252	0		x1 = _mm_blendv_epi8(_mm_shuffle_epi8(x, rshift),
12253	0		_mm_loadu_si128((const void *)(p + len - 16)),
12254
12255			rshift);
12256
12257	0		return fold_vec128(x0, x1, mults_128b);
12258			}
12259			#define fold_lessthan16bytes ADD_SUFFIX(fold_lessthan16bytes)
12260
12261			static ATTRIBUTES u32
12262	0		ADD_SUFFIX(crc32_x86)(u32 crc, const u8 *p, size_t len)
12263			{
12264
12265	0		const vec_t mults_8v = MULTS_8V;
12266	0		const vec_t mults_4v = MULTS_4V;
12267	0		const vec_t mults_2v = MULTS_2V;
12268	0		const vec_t mults_1v = MULTS_1V;
12269	0		const __m128i mults_128b = _mm_set_epi64x(CRC32_X95_MODG, CRC32_X159_MODG);
12270			const __m128i barrett_reduction_constants =
12271	0		_mm_set_epi64x(CRC32_BARRETT_CONSTANT_2, CRC32_BARRETT_CONSTANT_1);
12272			vec_t v0, v1, v2, v3, v4, v5, v6, v7;
12273	0		__m128i x0 = _mm_cvtsi32_si128(crc);
12274			__m128i x1;
12275
12276	0	0	if (len < 8*VL) {
12277	0	0	if (len < VL) {
12278			STATIC_ASSERT(VL == 16 \|\| VL == 32 \|\| VL == 64);
12279	0	0	if (len < 16) {
12280			#if USE_AVX512
12281	0	0	if (len < 4)
12282	0		return crc32_slice1(crc, p, len);
12283
12284	0		x0 = _mm_xor_si128(
12285	0		x0, _mm_maskz_loadu_epi8((1 << len) - 1, p));
12286	0		x0 = _mm_shuffle_epi8(
12287	0		x0, _mm_loadu_si128((const void *)&shift_tab[len]));
12288	0		goto reduce_x0;
12289			#else
12290			return crc32_slice1(crc, p, len);
12291			#endif
12292			}
12293
12294	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
12295	0	0	if (len >= 32) {
12296	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 16)),
12297			mults_128b);
12298	0	0	if (len >= 48)
12299	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)(p + 32)),
12300			mults_128b);
12301			}
12302	0		p += len & ~15;
12303	0		goto less_than_16_remaining;
12304			}
12305	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
12306	0	0	if (len < 2*VL) {
12307	0		p += VL;
12308	0		goto less_than_vl_remaining;
12309			}
12310	0		v1 = VLOADU(p + 1*VL);
12311	0	0	if (len < 4*VL) {
12312	0		p += 2*VL;
12313	0		goto less_than_2vl_remaining;
12314			}
12315	0		v2 = VLOADU(p + 2*VL);
12316	0		v3 = VLOADU(p + 3*VL);
12317	0		p += 4*VL;
12318			} else {
12319
12320	0	0	if (len > 65536 && ((uintptr_t)p & (VL-1))) {
		0
12321	0		size_t align = -(uintptr_t)p & (VL-1);
12322
12323	0		len -= align;
12324	0		x0 = _mm_xor_si128(_mm_loadu_si128((const void *)p), x0);
12325	0		p += 16;
12326	0	0	if (align & 15) {
12327	0		x0 = fold_lessthan16bytes(x0, p, align & 15,
12328			mults_128b);
12329	0		p += align & 15;
12330	0		align &= ~15;
12331			}
12332	0	0	while (align) {
12333	0		x0 = fold_vec128(x0, (const __m128i )p,
12334			mults_128b);
12335	0		p += 16;
12336	0		align -= 16;
12337			}
12338	0		v0 = M128I_TO_VEC(x0);
12339			# if VL == 32
12340			v0 = _mm256_inserti128_si256(v0, (const __m128i )p, 1);
12341			# elif VL == 64
12342	0		v0 = _mm512_inserti32x4(v0, (const __m128i )p, 1);
12343	0		v0 = _mm512_inserti64x4(v0, (const __m256i )(p + 16), 1);
12344			# endif
12345	0		p -= 16;
12346			} else {
12347	0		v0 = VXOR(VLOADU(p), M128I_TO_VEC(x0));
12348			}
12349	0		v1 = VLOADU(p + 1*VL);
12350	0		v2 = VLOADU(p + 2*VL);
12351	0		v3 = VLOADU(p + 3*VL);
12352	0		v4 = VLOADU(p + 4*VL);
12353	0		v5 = VLOADU(p + 5*VL);
12354	0		v6 = VLOADU(p + 6*VL);
12355	0		v7 = VLOADU(p + 7*VL);
12356	0		p += 8*VL;
12357
12358
12359	0	0	while (len >= 16*VL) {
12360	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_8v);
12361	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_8v);
12362	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_8v);
12363	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_8v);
12364	0		v4 = fold_vec(v4, VLOADU(p + 4*VL), mults_8v);
12365	0		v5 = fold_vec(v5, VLOADU(p + 5*VL), mults_8v);
12366	0		v6 = fold_vec(v6, VLOADU(p + 6*VL), mults_8v);
12367	0		v7 = fold_vec(v7, VLOADU(p + 7*VL), mults_8v);
12368	0		p += 8*VL;
12369	0		len -= 8*VL;
12370			}
12371
12372
12373	0		v0 = fold_vec(v0, v4, mults_4v);
12374	0		v1 = fold_vec(v1, v5, mults_4v);
12375	0		v2 = fold_vec(v2, v6, mults_4v);
12376	0		v3 = fold_vec(v3, v7, mults_4v);
12377	0	0	if (len & (4*VL)) {
12378	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_4v);
12379	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_4v);
12380	0		v2 = fold_vec(v2, VLOADU(p + 2*VL), mults_4v);
12381	0		v3 = fold_vec(v3, VLOADU(p + 3*VL), mults_4v);
12382	0		p += 4*VL;
12383			}
12384			}
12385
12386	0		v0 = fold_vec(v0, v2, mults_2v);
12387	0		v1 = fold_vec(v1, v3, mults_2v);
12388	0	0	if (len & (2*VL)) {
12389	0		v0 = fold_vec(v0, VLOADU(p + 0*VL), mults_2v);
12390	0		v1 = fold_vec(v1, VLOADU(p + 1*VL), mults_2v);
12391	0		p += 2*VL;
12392			}
12393	0		less_than_2vl_remaining:
12394
12395	0		v0 = fold_vec(v0, v1, mults_1v);
12396	0	0	if (len & VL) {
12397	0		v0 = fold_vec(v0, VLOADU(p), mults_1v);
12398	0		p += VL;
12399			}
12400	0		less_than_vl_remaining:
12401
12402			#if VL == 16
12403			x0 = v0;
12404			#else
12405			{
12406			#if VL == 32
12407			__m256i y0 = v0;
12408			#else
12409			const __m256i mults_256b =
12410	0		_mm256_set_epi64x(CRC32_X223_MODG, CRC32_X287_MODG,
12411			CRC32_X223_MODG, CRC32_X287_MODG);
12412	0		__m256i y0 = fold_vec256(_mm512_extracti64x4_epi64(v0, 0),
12413	0		_mm512_extracti64x4_epi64(v0, 1),
12414			mults_256b);
12415	0	0	if (len & 32) {
12416	0		y0 = fold_vec256(y0, _mm256_loadu_si256((const void *)p),
12417			mults_256b);
12418	0		p += 32;
12419			}
12420			#endif
12421	0		x0 = fold_vec128(_mm256_extracti128_si256(y0, 0),
12422	0		_mm256_extracti128_si256(y0, 1), mults_128b);
12423			}
12424	0	0	if (len & 16) {
12425	0		x0 = fold_vec128(x0, _mm_loadu_si128((const void *)p),
12426			mults_128b);
12427	0		p += 16;
12428			}
12429			#endif
12430	0		less_than_16_remaining:
12431	0		len &= 15;
12432
12433
12434	0	0	if (len)
12435	0		x0 = fold_lessthan16bytes(x0, p, len, mults_128b);
12436			#if USE_AVX512
12437	0		reduce_x0:
12438			#endif
12439
12440	0		x0 = _mm_xor_si128(_mm_clmulepi64_si128(x0, mults_128b, 0x10),
12441	0		_mm_bsrli_si128(x0, 8));
12442	0		x1 = _mm_clmulepi64_si128(x0, barrett_reduction_constants, 0x00);
12443	0		x1 = _mm_clmulepi64_si128(x1, barrett_reduction_constants, 0x10);
12444	0		x0 = _mm_xor_si128(x0, x1);
12445	0		return _mm_extract_epi32(x0, 2);
12446			}
12447
12448			#undef vec_t
12449			#undef fold_vec
12450			#undef VLOADU
12451			#undef VXOR
12452			#undef M128I_TO_VEC
12453			#undef MULTS
12454			#undef MULTS_8V
12455			#undef MULTS_4V
12456			#undef MULTS_2V
12457			#undef MULTS_1V
12458
12459			#undef SUFFIX
12460			#undef ATTRIBUTES
12461			#undef VL
12462			#undef USE_AVX512
12463
12464			#endif
12465
12466			static inline crc32_func_t
12467	2		arch_select_crc32_func(void)
12468			{
12469	2		const u32 features MAYBE_UNUSED = get_x86_cpu_features();
12470
12471			#ifdef crc32_x86_vpclmulqdq_avx512_vl512
12472	2	50	if ((features & X86_CPU_FEATURE_ZMM) &&
12473	0	0	HAVE_VPCLMULQDQ(features) && HAVE_PCLMULQDQ(features) &&
		0
12474	0	0	HAVE_AVX512BW(features) && HAVE_AVX512VL(features))
		0
12475	0		return crc32_x86_vpclmulqdq_avx512_vl512;
12476			#endif
12477			#ifdef crc32_x86_vpclmulqdq_avx512_vl256
12478	2	50	if (HAVE_VPCLMULQDQ(features) && HAVE_PCLMULQDQ(features) &&
		0
12479	0	0	HAVE_AVX512BW(features) && HAVE_AVX512VL(features))
		0
12480	0		return crc32_x86_vpclmulqdq_avx512_vl256;
12481			#endif
12482			#ifdef crc32_x86_vpclmulqdq_avx2
12483	2	50	if (HAVE_VPCLMULQDQ(features) && HAVE_PCLMULQDQ(features) &&
		0
12484	0	0	HAVE_AVX2(features))
12485	0		return crc32_x86_vpclmulqdq_avx2;
12486			#endif
12487			#ifdef crc32_x86_pclmulqdq_avx
12488	2	50	if (HAVE_PCLMULQDQ(features) && HAVE_AVX(features))
		50
12489	2		return crc32_x86_pclmulqdq_avx;
12490			#endif
12491			#ifdef crc32_x86_pclmulqdq
12492	0	0	if (HAVE_PCLMULQDQ(features))
12493	0		return crc32_x86_pclmulqdq;
12494			#endif
12495	0		return NULL;
12496			}
12497			#define arch_select_crc32_func arch_select_crc32_func
12498
12499			#endif
12500
12501			#endif
12502
12503			#ifndef DEFAULT_IMPL
12504			# define DEFAULT_IMPL crc32_slice8
12505			#endif
12506
12507			#ifdef arch_select_crc32_func
12508			static u32 crc32_dispatch_crc32(u32 crc, const u8 *p, size_t len);
12509
12510			static volatile crc32_func_t crc32_impl = crc32_dispatch_crc32;
12511
12512
12513	2		static u32 crc32_dispatch_crc32(u32 crc, const u8 *p, size_t len)
12514			{
12515	2		crc32_func_t f = arch_select_crc32_func();
12516
12517	2	50	if (f == NULL)
12518	0		f = DEFAULT_IMPL;
12519
12520	2		crc32_impl = f;
12521	2		return f(crc, p, len);
12522			}
12523			#else
12524
12525			#define crc32_impl DEFAULT_IMPL
12526			#endif
12527
12528			LIBDEFLATEAPI u32
12529	29		libdeflate_crc32(u32 crc, const void *p, size_t len)
12530			{
12531	29	50	if (p == NULL)
12532	0		return 0;
12533	29		return ~crc32_impl(~crc, p, len);
12534			}
12535			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/deflate_compress.c */
12536
12537
12538			/* #include "deflate_compress.h" */
12539			#ifndef LIB_DEFLATE_COMPRESS_H
12540			#define LIB_DEFLATE_COMPRESS_H
12541
12542			/* #include "lib_common.h" */
12543
12544
12545			#ifndef LIB_LIB_COMMON_H
12546			#define LIB_LIB_COMMON_H
12547
12548			#ifdef LIBDEFLATE_H
12549
12550			# error "lib_common.h must always be included before libdeflate.h"
12551			#endif
12552
12553			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
12554			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
12555			#elif defined(__GNUC__)
12556			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
12557			#else
12558			# define LIBDEFLATE_EXPORT_SYM
12559			#endif
12560
12561
12562			#if defined(__GNUC__) && defined(__i386__)
12563			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
12564			#else
12565			# define LIBDEFLATE_ALIGN_STACK
12566			#endif
12567
12568			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
12569
12570			/* #include "../common_defs.h" */
12571
12572
12573			#ifndef COMMON_DEFS_H
12574			#define COMMON_DEFS_H
12575
12576			/* #include "libdeflate.h" */
12577
12578
12579			#ifndef LIBDEFLATE_H
12580			#define LIBDEFLATE_H
12581
12582			#include
12583			#include
12584
12585			#ifdef __cplusplus
12586			extern "C" {
12587			#endif
12588
12589			#define LIBDEFLATE_VERSION_MAJOR 1
12590			#define LIBDEFLATE_VERSION_MINOR 25
12591			#define LIBDEFLATE_VERSION_STRING "1.25"
12592
12593
12594			#ifndef LIBDEFLATEAPI
12595			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
12596			# define LIBDEFLATEAPI __declspec(dllimport)
12597			# else
12598			# define LIBDEFLATEAPI
12599			# endif
12600			#endif
12601
12602
12603
12604
12605
12606			struct libdeflate_compressor;
12607			struct libdeflate_options;
12608
12609
12610			LIBDEFLATEAPI struct libdeflate_compressor *
12611			libdeflate_alloc_compressor(int compression_level);
12612
12613
12614			LIBDEFLATEAPI struct libdeflate_compressor *
12615			libdeflate_alloc_compressor_ex(int compression_level,
12616			const struct libdeflate_options *options);
12617
12618
12619			LIBDEFLATEAPI size_t
12620			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
12621			const void *in, size_t in_nbytes,
12622			void *out, size_t out_nbytes_avail);
12623
12624
12625			LIBDEFLATEAPI size_t
12626			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
12627			size_t in_nbytes);
12628
12629
12630			LIBDEFLATEAPI size_t
12631			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
12632			const void *in, size_t in_nbytes,
12633			void *out, size_t out_nbytes_avail);
12634
12635
12636			LIBDEFLATEAPI size_t
12637			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
12638			size_t in_nbytes);
12639
12640
12641			LIBDEFLATEAPI size_t
12642			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
12643			const void *in, size_t in_nbytes,
12644			void *out, size_t out_nbytes_avail);
12645
12646
12647			LIBDEFLATEAPI size_t
12648			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
12649			size_t in_nbytes);
12650
12651
12652			LIBDEFLATEAPI void
12653			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
12654
12655
12656
12657
12658
12659			struct libdeflate_decompressor;
12660			struct libdeflate_options;
12661
12662
12663			LIBDEFLATEAPI struct libdeflate_decompressor *
12664			libdeflate_alloc_decompressor(void);
12665
12666
12667			LIBDEFLATEAPI struct libdeflate_decompressor *
12668			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
12669
12670
12671			enum libdeflate_result {
12672
12673			LIBDEFLATE_SUCCESS = 0,
12674
12675
12676			LIBDEFLATE_BAD_DATA = 1,
12677
12678
12679			LIBDEFLATE_SHORT_OUTPUT = 2,
12680
12681
12682			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
12683			};
12684
12685
12686			LIBDEFLATEAPI enum libdeflate_result
12687			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
12688			const void *in, size_t in_nbytes,
12689			void *out, size_t out_nbytes_avail,
12690			size_t *actual_out_nbytes_ret);
12691
12692
12693			LIBDEFLATEAPI enum libdeflate_result
12694			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
12695			const void *in, size_t in_nbytes,
12696			void *out, size_t out_nbytes_avail,
12697			size_t *actual_in_nbytes_ret,
12698			size_t *actual_out_nbytes_ret);
12699
12700
12701			LIBDEFLATEAPI enum libdeflate_result
12702			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
12703			const void *in, size_t in_nbytes,
12704			void *out, size_t out_nbytes_avail,
12705			size_t *actual_out_nbytes_ret);
12706
12707
12708			LIBDEFLATEAPI enum libdeflate_result
12709			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
12710			const void *in, size_t in_nbytes,
12711			void *out, size_t out_nbytes_avail,
12712			size_t *actual_in_nbytes_ret,
12713			size_t *actual_out_nbytes_ret);
12714
12715
12716			LIBDEFLATEAPI enum libdeflate_result
12717			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
12718			const void *in, size_t in_nbytes,
12719			void *out, size_t out_nbytes_avail,
12720			size_t *actual_out_nbytes_ret);
12721
12722
12723			LIBDEFLATEAPI enum libdeflate_result
12724			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
12725			const void *in, size_t in_nbytes,
12726			void *out, size_t out_nbytes_avail,
12727			size_t *actual_in_nbytes_ret,
12728			size_t *actual_out_nbytes_ret);
12729
12730
12731			LIBDEFLATEAPI void
12732			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
12733
12734
12735
12736
12737
12738
12739			LIBDEFLATEAPI uint32_t
12740			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
12741
12742
12743
12744			LIBDEFLATEAPI uint32_t
12745			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
12746
12747
12748
12749
12750
12751
12752			LIBDEFLATEAPI void
12753			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
12754			void (free_func)(void ));
12755
12756
12757			struct libdeflate_options {
12758
12759
12760			size_t sizeof_options;
12761
12762
12763			void (malloc_func)(size_t);
12764			void (free_func)(void );
12765			};
12766
12767			#ifdef __cplusplus
12768			}
12769			#endif
12770
12771			#endif
12772
12773
12774			#include
12775			#include
12776			#include
12777			#ifdef _MSC_VER
12778			# include
12779			# include
12780
12781
12782			# pragma warning(disable : 4146)
12783
12784			# pragma warning(disable : 4018)
12785			# pragma warning(disable : 4244)
12786			# pragma warning(disable : 4267)
12787			# pragma warning(disable : 4310)
12788
12789			# pragma warning(disable : 4100)
12790			# pragma warning(disable : 4127)
12791			# pragma warning(disable : 4189)
12792			# pragma warning(disable : 4232)
12793			# pragma warning(disable : 4245)
12794			# pragma warning(disable : 4295)
12795			#endif
12796			#ifndef FREESTANDING
12797			# include
12798			#endif
12799
12800
12801
12802
12803
12804
12805			#undef ARCH_X86_64
12806			#undef ARCH_X86_32
12807			#undef ARCH_ARM64
12808			#undef ARCH_ARM32
12809			#undef ARCH_RISCV
12810			#ifdef _MSC_VER
12811
12812			# if defined(_M_X64) && !defined(_M_ARM64EC)
12813			# define ARCH_X86_64
12814			# elif defined(_M_IX86)
12815			# define ARCH_X86_32
12816			# elif defined(_M_ARM64)
12817			# define ARCH_ARM64
12818			# elif defined(_M_ARM)
12819			# define ARCH_ARM32
12820			# endif
12821			#else
12822			# if defined(__x86_64__)
12823			# define ARCH_X86_64
12824			# elif defined(__i386__)
12825			# define ARCH_X86_32
12826			# elif defined(__aarch64__)
12827			# define ARCH_ARM64
12828			# elif defined(__arm__)
12829			# define ARCH_ARM32
12830			# elif defined(__riscv)
12831			# define ARCH_RISCV
12832			# endif
12833			#endif
12834
12835
12836
12837
12838
12839
12840			typedef uint8_t u8;
12841			typedef uint16_t u16;
12842			typedef uint32_t u32;
12843			typedef uint64_t u64;
12844			typedef int8_t s8;
12845			typedef int16_t s16;
12846			typedef int32_t s32;
12847			typedef int64_t s64;
12848
12849
12850			#ifdef _MSC_VER
12851			# ifdef _WIN64
12852			typedef long long ssize_t;
12853			# else
12854			typedef long ssize_t;
12855			# endif
12856			#endif
12857
12858
12859			typedef size_t machine_word_t;
12860
12861
12862			#define WORDBYTES ((int)sizeof(machine_word_t))
12863
12864
12865			#define WORDBITS (8 * WORDBYTES)
12866
12867
12868
12869
12870
12871
12872			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
12873			# define GCC_PREREQ(major, minor) \
12874			(__GNUC__ > (major) \|\| \
12875			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
12876			# if !GCC_PREREQ(4, 9)
12877			# error "gcc versions older than 4.9 are no longer supported"
12878			# endif
12879			#else
12880			# define GCC_PREREQ(major, minor) 0
12881			#endif
12882			#ifdef __clang__
12883			# ifdef __apple_build_version__
12884			# define CLANG_PREREQ(major, minor, apple_version) \
12885			(__apple_build_version__ >= (apple_version))
12886			# else
12887			# define CLANG_PREREQ(major, minor, apple_version) \
12888			(__clang_major__ > (major) \|\| \
12889			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
12890			# endif
12891			# if !CLANG_PREREQ(3, 9, 8000000)
12892			# error "clang versions older than 3.9 are no longer supported"
12893			# endif
12894			#else
12895			# define CLANG_PREREQ(major, minor, apple_version) 0
12896			#endif
12897			#ifdef _MSC_VER
12898			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
12899			# if !MSVC_PREREQ(1900)
12900			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
12901			# endif
12902			#else
12903			# define MSVC_PREREQ(version) 0
12904			#endif
12905
12906
12907			#ifndef __has_attribute
12908			# define __has_attribute(attribute) 0
12909			#endif
12910
12911
12912			#ifndef __has_builtin
12913			# define __has_builtin(builtin) 0
12914			#endif
12915
12916
12917			#ifdef _MSC_VER
12918			# define inline __inline
12919			#endif
12920
12921
12922			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
12923			# define forceinline inline __attribute__((always_inline))
12924			#elif defined(_MSC_VER)
12925			# define forceinline __forceinline
12926			#else
12927			# define forceinline inline
12928			#endif
12929
12930
12931			#if defined(__GNUC__) \|\| __has_attribute(unused)
12932			# define MAYBE_UNUSED __attribute__((unused))
12933			#else
12934			# define MAYBE_UNUSED
12935			#endif
12936
12937
12938			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
12939			# define NORETURN __attribute__((noreturn))
12940			#else
12941			# define NORETURN
12942			#endif
12943
12944
12945			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
12946			# if defined(__GNUC__) \|\| defined(__clang__)
12947			# define restrict __restrict__
12948			# else
12949			# define restrict
12950			# endif
12951			#endif
12952
12953
12954			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
12955			# define likely(expr) __builtin_expect(!!(expr), 1)
12956			#else
12957			# define likely(expr) (expr)
12958			#endif
12959
12960
12961			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
12962			# define unlikely(expr) __builtin_expect(!!(expr), 0)
12963			#else
12964			# define unlikely(expr) (expr)
12965			#endif
12966
12967
12968			#undef prefetchr
12969			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
12970			# define prefetchr(addr) __builtin_prefetch((addr), 0)
12971			#elif defined(_MSC_VER)
12972			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
12973			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
12974			# elif defined(ARCH_ARM64)
12975			# define prefetchr(addr) __prefetch2((addr), 0x00 )
12976			# elif defined(ARCH_ARM32)
12977			# define prefetchr(addr) __prefetch(addr)
12978			# endif
12979			#endif
12980			#ifndef prefetchr
12981			# define prefetchr(addr)
12982			#endif
12983
12984
12985			#undef prefetchw
12986			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
12987			# define prefetchw(addr) __builtin_prefetch((addr), 1)
12988			#elif defined(_MSC_VER)
12989			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
12990			# define prefetchw(addr) _m_prefetchw(addr)
12991			# elif defined(ARCH_ARM64)
12992			# define prefetchw(addr) __prefetch2((addr), 0x10 )
12993			# elif defined(ARCH_ARM32)
12994			# define prefetchw(addr) __prefetchw(addr)
12995			# endif
12996			#endif
12997			#ifndef prefetchw
12998			# define prefetchw(addr)
12999			#endif
13000
13001
13002			#undef _aligned_attribute
13003			#if defined(__GNUC__) \|\| __has_attribute(aligned)
13004			# define _aligned_attribute(n) __attribute__((aligned(n)))
13005			#elif defined(_MSC_VER)
13006			# define _aligned_attribute(n) __declspec(align(n))
13007			#endif
13008
13009
13010			#if defined(__GNUC__) \|\| __has_attribute(target)
13011			# define _target_attribute(attrs) __attribute__((target(attrs)))
13012			#else
13013			# define _target_attribute(attrs)
13014			#endif
13015
13016
13017
13018
13019
13020			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
13021			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
13022			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
13023			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
13024			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
13025			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
13026			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
13027
13028
13029
13030
13031
13032
13033			#if defined(__BYTE_ORDER__)
13034			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
13035			#elif defined(_MSC_VER)
13036			# define CPU_IS_LITTLE_ENDIAN() true
13037			#else
13038			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
13039			{
13040			union {
13041			u32 w;
13042			u8 b;
13043			} u;
13044
13045			u.w = 1;
13046			return u.b;
13047			}
13048			#endif
13049
13050
13051			static forceinline u16 bswap16(u16 v)
13052			{
13053			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
13054			return __builtin_bswap16(v);
13055			#elif defined(_MSC_VER)
13056			return _byteswap_ushort(v);
13057			#else
13058			return (v << 8) \| (v >> 8);
13059			#endif
13060			}
13061
13062
13063			static forceinline u32 bswap32(u32 v)
13064			{
13065			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
13066			return __builtin_bswap32(v);
13067			#elif defined(_MSC_VER)
13068			return _byteswap_ulong(v);
13069			#else
13070			return ((v & 0x000000FF) << 24) \|
13071			((v & 0x0000FF00) << 8) \|
13072			((v & 0x00FF0000) >> 8) \|
13073			((v & 0xFF000000) >> 24);
13074			#endif
13075			}
13076
13077
13078			static forceinline u64 bswap64(u64 v)
13079			{
13080			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
13081			return __builtin_bswap64(v);
13082			#elif defined(_MSC_VER)
13083			return _byteswap_uint64(v);
13084			#else
13085			return ((v & 0x00000000000000FF) << 56) \|
13086			((v & 0x000000000000FF00) << 40) \|
13087			((v & 0x0000000000FF0000) << 24) \|
13088			((v & 0x00000000FF000000) << 8) \|
13089			((v & 0x000000FF00000000) >> 8) \|
13090			((v & 0x0000FF0000000000) >> 24) \|
13091			((v & 0x00FF000000000000) >> 40) \|
13092			((v & 0xFF00000000000000) >> 56);
13093			#endif
13094			}
13095
13096			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
13097			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
13098			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
13099			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
13100			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
13101			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
13102
13103
13104
13105
13106
13107
13108			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
13109			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
13110			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
13111			defined(__riscv_misaligned_fast) \|\| \
13112			defined(__wasm__))
13113			# define UNALIGNED_ACCESS_IS_FAST 1
13114			#elif defined(_MSC_VER)
13115			# define UNALIGNED_ACCESS_IS_FAST 1
13116			#else
13117			# define UNALIGNED_ACCESS_IS_FAST 0
13118			#endif
13119
13120
13121
13122			#ifdef FREESTANDING
13123			# define MEMCOPY __builtin_memcpy
13124			#else
13125			# define MEMCOPY memcpy
13126			#endif
13127
13128
13129
13130			#define DEFINE_UNALIGNED_TYPE(type) \
13131			static forceinline type \
13132			load_##type##_unaligned(const void *p) \
13133			{ \
13134			type v; \
13135			\
13136			MEMCOPY(&v, p, sizeof(v)); \
13137			return v; \
13138			} \
13139			\
13140			static forceinline void \
13141			store_##type##_unaligned(type v, void *p) \
13142			{ \
13143			MEMCOPY(p, &v, sizeof(v)); \
13144			}
13145
13146			DEFINE_UNALIGNED_TYPE(u16)
13147			DEFINE_UNALIGNED_TYPE(u32)
13148			DEFINE_UNALIGNED_TYPE(u64)
13149			DEFINE_UNALIGNED_TYPE(machine_word_t)
13150
13151			#undef MEMCOPY
13152
13153			#define load_word_unaligned load_machine_word_t_unaligned
13154			#define store_word_unaligned store_machine_word_t_unaligned
13155
13156
13157
13158			static forceinline u16
13159			get_unaligned_le16(const u8 *p)
13160			{
13161			if (UNALIGNED_ACCESS_IS_FAST)
13162			return le16_bswap(load_u16_unaligned(p));
13163			else
13164			return ((u16)p[1] << 8) \| p[0];
13165			}
13166
13167			static forceinline u16
13168			get_unaligned_be16(const u8 *p)
13169			{
13170			if (UNALIGNED_ACCESS_IS_FAST)
13171			return be16_bswap(load_u16_unaligned(p));
13172			else
13173			return ((u16)p[0] << 8) \| p[1];
13174			}
13175
13176			static forceinline u32
13177			get_unaligned_le32(const u8 *p)
13178			{
13179			if (UNALIGNED_ACCESS_IS_FAST)
13180			return le32_bswap(load_u32_unaligned(p));
13181			else
13182			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
13183			((u32)p[1] << 8) \| p[0];
13184			}
13185
13186			static forceinline u32
13187			get_unaligned_be32(const u8 *p)
13188			{
13189			if (UNALIGNED_ACCESS_IS_FAST)
13190			return be32_bswap(load_u32_unaligned(p));
13191			else
13192			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
13193			((u32)p[2] << 8) \| p[3];
13194			}
13195
13196			static forceinline u64
13197			get_unaligned_le64(const u8 *p)
13198			{
13199			if (UNALIGNED_ACCESS_IS_FAST)
13200			return le64_bswap(load_u64_unaligned(p));
13201			else
13202			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
13203			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
13204			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
13205			((u64)p[1] << 8) \| p[0];
13206			}
13207
13208			static forceinline machine_word_t
13209			get_unaligned_leword(const u8 *p)
13210			{
13211			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
13212			if (WORDBITS == 32)
13213			return get_unaligned_le32(p);
13214			else
13215			return get_unaligned_le64(p);
13216			}
13217
13218
13219
13220			static forceinline void
13221			put_unaligned_le16(u16 v, u8 *p)
13222			{
13223			if (UNALIGNED_ACCESS_IS_FAST) {
13224			store_u16_unaligned(le16_bswap(v), p);
13225			} else {
13226			p[0] = (u8)(v >> 0);
13227			p[1] = (u8)(v >> 8);
13228			}
13229			}
13230
13231			static forceinline void
13232			put_unaligned_be16(u16 v, u8 *p)
13233			{
13234			if (UNALIGNED_ACCESS_IS_FAST) {
13235			store_u16_unaligned(be16_bswap(v), p);
13236			} else {
13237			p[0] = (u8)(v >> 8);
13238			p[1] = (u8)(v >> 0);
13239			}
13240			}
13241
13242			static forceinline void
13243			put_unaligned_le32(u32 v, u8 *p)
13244			{
13245			if (UNALIGNED_ACCESS_IS_FAST) {
13246			store_u32_unaligned(le32_bswap(v), p);
13247			} else {
13248			p[0] = (u8)(v >> 0);
13249			p[1] = (u8)(v >> 8);
13250			p[2] = (u8)(v >> 16);
13251			p[3] = (u8)(v >> 24);
13252			}
13253			}
13254
13255			static forceinline void
13256			put_unaligned_be32(u32 v, u8 *p)
13257			{
13258			if (UNALIGNED_ACCESS_IS_FAST) {
13259			store_u32_unaligned(be32_bswap(v), p);
13260			} else {
13261			p[0] = (u8)(v >> 24);
13262			p[1] = (u8)(v >> 16);
13263			p[2] = (u8)(v >> 8);
13264			p[3] = (u8)(v >> 0);
13265			}
13266			}
13267
13268			static forceinline void
13269			put_unaligned_le64(u64 v, u8 *p)
13270			{
13271			if (UNALIGNED_ACCESS_IS_FAST) {
13272			store_u64_unaligned(le64_bswap(v), p);
13273			} else {
13274			p[0] = (u8)(v >> 0);
13275			p[1] = (u8)(v >> 8);
13276			p[2] = (u8)(v >> 16);
13277			p[3] = (u8)(v >> 24);
13278			p[4] = (u8)(v >> 32);
13279			p[5] = (u8)(v >> 40);
13280			p[6] = (u8)(v >> 48);
13281			p[7] = (u8)(v >> 56);
13282			}
13283			}
13284
13285			static forceinline void
13286			put_unaligned_leword(machine_word_t v, u8 *p)
13287			{
13288			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
13289			if (WORDBITS == 32)
13290			put_unaligned_le32(v, p);
13291			else
13292			put_unaligned_le64(v, p);
13293			}
13294
13295
13296
13297
13298
13299
13300
13301			static forceinline unsigned
13302			bsr32(u32 v)
13303			{
13304			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
13305			return 31 - __builtin_clz(v);
13306			#elif defined(_MSC_VER)
13307			unsigned long i;
13308
13309			_BitScanReverse(&i, v);
13310			return i;
13311			#else
13312			unsigned i = 0;
13313
13314			while ((v >>= 1) != 0)
13315			i++;
13316			return i;
13317			#endif
13318			}
13319
13320			static forceinline unsigned
13321			bsr64(u64 v)
13322			{
13323			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
13324			return 63 - __builtin_clzll(v);
13325			#elif defined(_MSC_VER) && defined(_WIN64)
13326			unsigned long i;
13327
13328			_BitScanReverse64(&i, v);
13329			return i;
13330			#else
13331			unsigned i = 0;
13332
13333			while ((v >>= 1) != 0)
13334			i++;
13335			return i;
13336			#endif
13337			}
13338
13339			static forceinline unsigned
13340			bsrw(machine_word_t v)
13341			{
13342			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
13343			if (WORDBITS == 32)
13344			return bsr32(v);
13345			else
13346			return bsr64(v);
13347			}
13348
13349
13350
13351			static forceinline unsigned
13352			bsf32(u32 v)
13353			{
13354			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
13355			return __builtin_ctz(v);
13356			#elif defined(_MSC_VER)
13357			unsigned long i;
13358
13359			_BitScanForward(&i, v);
13360			return i;
13361			#else
13362			unsigned i = 0;
13363
13364			for (; (v & 1) == 0; v >>= 1)
13365			i++;
13366			return i;
13367			#endif
13368			}
13369
13370			static forceinline unsigned
13371			bsf64(u64 v)
13372			{
13373			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
13374			return __builtin_ctzll(v);
13375			#elif defined(_MSC_VER) && defined(_WIN64)
13376			unsigned long i;
13377
13378			_BitScanForward64(&i, v);
13379			return i;
13380			#else
13381			unsigned i = 0;
13382
13383			for (; (v & 1) == 0; v >>= 1)
13384			i++;
13385			return i;
13386			#endif
13387			}
13388
13389			static forceinline unsigned
13390			bsfw(machine_word_t v)
13391			{
13392			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
13393			if (WORDBITS == 32)
13394			return bsf32(v);
13395			else
13396			return bsf64(v);
13397			}
13398
13399
13400			#undef rbit32
13401			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
13402			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
13403			static forceinline u32
13404			rbit32(u32 v)
13405			{
13406			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
13407			return v;
13408			}
13409			#define rbit32 rbit32
13410			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
13411			static forceinline u32
13412			rbit32(u32 v)
13413			{
13414			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
13415			return v;
13416			}
13417			#define rbit32 rbit32
13418			#endif
13419
13420			#endif
13421
13422
13423			typedef void (malloc_func_t)(size_t);
13424			typedef void (free_func_t)(void );
13425
13426			extern malloc_func_t libdeflate_default_malloc_func;
13427			extern free_func_t libdeflate_default_free_func;
13428
13429			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
13430			size_t alignment, size_t size);
13431			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
13432
13433			#ifdef FREESTANDING
13434
13435			void memset(void s, int c, size_t n);
13436			#define memset(s, c, n) __builtin_memset((s), (c), (n))
13437
13438			void memcpy(void dest, const void *src, size_t n);
13439			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
13440
13441			void memmove(void dest, const void *src, size_t n);
13442			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
13443
13444			int memcmp(const void s1, const void s2, size_t n);
13445			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
13446
13447			#undef LIBDEFLATE_ENABLE_ASSERTIONS
13448			#else
13449			# include
13450
13451			# ifdef __clang_analyzer__
13452			# define LIBDEFLATE_ENABLE_ASSERTIONS
13453			# endif
13454			#endif
13455
13456
13457			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
13458			NORETURN void
13459			libdeflate_assertion_failed(const char expr, const char file, int line);
13460			#define ASSERT(expr) { if (unlikely(!(expr))) \
13461			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
13462			#else
13463			#define ASSERT(expr) (void)(expr)
13464			#endif
13465
13466			#define CONCAT_IMPL(a, b) a##b
13467			#define CONCAT(a, b) CONCAT_IMPL(a, b)
13468			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
13469
13470			#endif
13471
13472
13473
13474
13475			struct libdeflate_compressor;
13476
13477			unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c);
13478
13479			#endif
13480
13481			/* #include "deflate_constants.h" */
13482
13483
13484			#ifndef LIB_DEFLATE_CONSTANTS_H
13485			#define LIB_DEFLATE_CONSTANTS_H
13486
13487
13488			#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
13489			#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
13490			#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
13491
13492
13493			#define DEFLATE_MIN_MATCH_LEN 3
13494			#define DEFLATE_MAX_MATCH_LEN 258
13495
13496
13497			#define DEFLATE_MAX_MATCH_OFFSET 32768
13498
13499
13500			#define DEFLATE_WINDOW_ORDER 15
13501
13502
13503			#define DEFLATE_NUM_PRECODE_SYMS 19
13504			#define DEFLATE_NUM_LITLEN_SYMS 288
13505			#define DEFLATE_NUM_OFFSET_SYMS 32
13506
13507
13508			#define DEFLATE_MAX_NUM_SYMS 288
13509
13510
13511			#define DEFLATE_NUM_LITERALS 256
13512			#define DEFLATE_END_OF_BLOCK 256
13513			#define DEFLATE_FIRST_LEN_SYM 257
13514
13515
13516			#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
13517			#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
13518			#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
13519
13520
13521			#define DEFLATE_MAX_CODEWORD_LEN 15
13522
13523
13524			#define DEFLATE_MAX_LENS_OVERRUN 137
13525
13526
13527			#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
13528			#define DEFLATE_MAX_EXTRA_OFFSET_BITS 13
13529
13530			#endif
13531
13532
13533
13534
13535
13536
13537
13538			#define SUPPORT_NEAR_OPTIMAL_PARSING 1
13539
13540
13541			#define MIN_BLOCK_LENGTH 5000
13542
13543
13544			#define SOFT_MAX_BLOCK_LENGTH 300000
13545
13546
13547			#define SEQ_STORE_LENGTH 50000
13548
13549
13550			#define FAST_SOFT_MAX_BLOCK_LENGTH 65535
13551
13552
13553			#define FAST_SEQ_STORE_LENGTH 8192
13554
13555
13556			#define MAX_LITLEN_CODEWORD_LEN 14
13557			#define MAX_OFFSET_CODEWORD_LEN DEFLATE_MAX_OFFSET_CODEWORD_LEN
13558			#define MAX_PRE_CODEWORD_LEN DEFLATE_MAX_PRE_CODEWORD_LEN
13559
13560			#if SUPPORT_NEAR_OPTIMAL_PARSING
13561
13562
13563
13564
13565			#define BIT_COST 16
13566
13567
13568			#define LITERAL_NOSTAT_BITS 13
13569			#define LENGTH_NOSTAT_BITS 13
13570			#define OFFSET_NOSTAT_BITS 10
13571
13572
13573			#define MATCH_CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5)
13574
13575			#endif
13576
13577
13578
13579
13580			#define MATCHFINDER_WINDOW_ORDER DEFLATE_WINDOW_ORDER
13581			/* #include "hc_matchfinder.h" */
13582
13583
13584			#ifndef LIB_HC_MATCHFINDER_H
13585			#define LIB_HC_MATCHFINDER_H
13586
13587			/* #include "matchfinder_common.h" */
13588
13589
13590			#ifndef LIB_MATCHFINDER_COMMON_H
13591			#define LIB_MATCHFINDER_COMMON_H
13592
13593			/* #include "lib_common.h" */
13594
13595
13596			#ifndef LIB_LIB_COMMON_H
13597			#define LIB_LIB_COMMON_H
13598
13599			#ifdef LIBDEFLATE_H
13600
13601			# error "lib_common.h must always be included before libdeflate.h"
13602			#endif
13603
13604			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
13605			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
13606			#elif defined(__GNUC__)
13607			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
13608			#else
13609			# define LIBDEFLATE_EXPORT_SYM
13610			#endif
13611
13612
13613			#if defined(__GNUC__) && defined(__i386__)
13614			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
13615			#else
13616			# define LIBDEFLATE_ALIGN_STACK
13617			#endif
13618
13619			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
13620
13621			/* #include "../common_defs.h" */
13622
13623
13624			#ifndef COMMON_DEFS_H
13625			#define COMMON_DEFS_H
13626
13627			/* #include "libdeflate.h" */
13628
13629
13630			#ifndef LIBDEFLATE_H
13631			#define LIBDEFLATE_H
13632
13633			#include
13634			#include
13635
13636			#ifdef __cplusplus
13637			extern "C" {
13638			#endif
13639
13640			#define LIBDEFLATE_VERSION_MAJOR 1
13641			#define LIBDEFLATE_VERSION_MINOR 25
13642			#define LIBDEFLATE_VERSION_STRING "1.25"
13643
13644
13645			#ifndef LIBDEFLATEAPI
13646			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
13647			# define LIBDEFLATEAPI __declspec(dllimport)
13648			# else
13649			# define LIBDEFLATEAPI
13650			# endif
13651			#endif
13652
13653
13654
13655
13656
13657			struct libdeflate_compressor;
13658			struct libdeflate_options;
13659
13660
13661			LIBDEFLATEAPI struct libdeflate_compressor *
13662			libdeflate_alloc_compressor(int compression_level);
13663
13664
13665			LIBDEFLATEAPI struct libdeflate_compressor *
13666			libdeflate_alloc_compressor_ex(int compression_level,
13667			const struct libdeflate_options *options);
13668
13669
13670			LIBDEFLATEAPI size_t
13671			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
13672			const void *in, size_t in_nbytes,
13673			void *out, size_t out_nbytes_avail);
13674
13675
13676			LIBDEFLATEAPI size_t
13677			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
13678			size_t in_nbytes);
13679
13680
13681			LIBDEFLATEAPI size_t
13682			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
13683			const void *in, size_t in_nbytes,
13684			void *out, size_t out_nbytes_avail);
13685
13686
13687			LIBDEFLATEAPI size_t
13688			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
13689			size_t in_nbytes);
13690
13691
13692			LIBDEFLATEAPI size_t
13693			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
13694			const void *in, size_t in_nbytes,
13695			void *out, size_t out_nbytes_avail);
13696
13697
13698			LIBDEFLATEAPI size_t
13699			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
13700			size_t in_nbytes);
13701
13702
13703			LIBDEFLATEAPI void
13704			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
13705
13706
13707
13708
13709
13710			struct libdeflate_decompressor;
13711			struct libdeflate_options;
13712
13713
13714			LIBDEFLATEAPI struct libdeflate_decompressor *
13715			libdeflate_alloc_decompressor(void);
13716
13717
13718			LIBDEFLATEAPI struct libdeflate_decompressor *
13719			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
13720
13721
13722			enum libdeflate_result {
13723
13724			LIBDEFLATE_SUCCESS = 0,
13725
13726
13727			LIBDEFLATE_BAD_DATA = 1,
13728
13729
13730			LIBDEFLATE_SHORT_OUTPUT = 2,
13731
13732
13733			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
13734			};
13735
13736
13737			LIBDEFLATEAPI enum libdeflate_result
13738			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
13739			const void *in, size_t in_nbytes,
13740			void *out, size_t out_nbytes_avail,
13741			size_t *actual_out_nbytes_ret);
13742
13743
13744			LIBDEFLATEAPI enum libdeflate_result
13745			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
13746			const void *in, size_t in_nbytes,
13747			void *out, size_t out_nbytes_avail,
13748			size_t *actual_in_nbytes_ret,
13749			size_t *actual_out_nbytes_ret);
13750
13751
13752			LIBDEFLATEAPI enum libdeflate_result
13753			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
13754			const void *in, size_t in_nbytes,
13755			void *out, size_t out_nbytes_avail,
13756			size_t *actual_out_nbytes_ret);
13757
13758
13759			LIBDEFLATEAPI enum libdeflate_result
13760			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
13761			const void *in, size_t in_nbytes,
13762			void *out, size_t out_nbytes_avail,
13763			size_t *actual_in_nbytes_ret,
13764			size_t *actual_out_nbytes_ret);
13765
13766
13767			LIBDEFLATEAPI enum libdeflate_result
13768			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
13769			const void *in, size_t in_nbytes,
13770			void *out, size_t out_nbytes_avail,
13771			size_t *actual_out_nbytes_ret);
13772
13773
13774			LIBDEFLATEAPI enum libdeflate_result
13775			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
13776			const void *in, size_t in_nbytes,
13777			void *out, size_t out_nbytes_avail,
13778			size_t *actual_in_nbytes_ret,
13779			size_t *actual_out_nbytes_ret);
13780
13781
13782			LIBDEFLATEAPI void
13783			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
13784
13785
13786
13787
13788
13789
13790			LIBDEFLATEAPI uint32_t
13791			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
13792
13793
13794
13795			LIBDEFLATEAPI uint32_t
13796			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
13797
13798
13799
13800
13801
13802
13803			LIBDEFLATEAPI void
13804			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
13805			void (free_func)(void ));
13806
13807
13808			struct libdeflate_options {
13809
13810
13811			size_t sizeof_options;
13812
13813
13814			void (malloc_func)(size_t);
13815			void (free_func)(void );
13816			};
13817
13818			#ifdef __cplusplus
13819			}
13820			#endif
13821
13822			#endif
13823
13824
13825			#include
13826			#include
13827			#include
13828			#ifdef _MSC_VER
13829			# include
13830			# include
13831
13832
13833			# pragma warning(disable : 4146)
13834
13835			# pragma warning(disable : 4018)
13836			# pragma warning(disable : 4244)
13837			# pragma warning(disable : 4267)
13838			# pragma warning(disable : 4310)
13839
13840			# pragma warning(disable : 4100)
13841			# pragma warning(disable : 4127)
13842			# pragma warning(disable : 4189)
13843			# pragma warning(disable : 4232)
13844			# pragma warning(disable : 4245)
13845			# pragma warning(disable : 4295)
13846			#endif
13847			#ifndef FREESTANDING
13848			# include
13849			#endif
13850
13851
13852
13853
13854
13855
13856			#undef ARCH_X86_64
13857			#undef ARCH_X86_32
13858			#undef ARCH_ARM64
13859			#undef ARCH_ARM32
13860			#undef ARCH_RISCV
13861			#ifdef _MSC_VER
13862
13863			# if defined(_M_X64) && !defined(_M_ARM64EC)
13864			# define ARCH_X86_64
13865			# elif defined(_M_IX86)
13866			# define ARCH_X86_32
13867			# elif defined(_M_ARM64)
13868			# define ARCH_ARM64
13869			# elif defined(_M_ARM)
13870			# define ARCH_ARM32
13871			# endif
13872			#else
13873			# if defined(__x86_64__)
13874			# define ARCH_X86_64
13875			# elif defined(__i386__)
13876			# define ARCH_X86_32
13877			# elif defined(__aarch64__)
13878			# define ARCH_ARM64
13879			# elif defined(__arm__)
13880			# define ARCH_ARM32
13881			# elif defined(__riscv)
13882			# define ARCH_RISCV
13883			# endif
13884			#endif
13885
13886
13887
13888
13889
13890
13891			typedef uint8_t u8;
13892			typedef uint16_t u16;
13893			typedef uint32_t u32;
13894			typedef uint64_t u64;
13895			typedef int8_t s8;
13896			typedef int16_t s16;
13897			typedef int32_t s32;
13898			typedef int64_t s64;
13899
13900
13901			#ifdef _MSC_VER
13902			# ifdef _WIN64
13903			typedef long long ssize_t;
13904			# else
13905			typedef long ssize_t;
13906			# endif
13907			#endif
13908
13909
13910			typedef size_t machine_word_t;
13911
13912
13913			#define WORDBYTES ((int)sizeof(machine_word_t))
13914
13915
13916			#define WORDBITS (8 * WORDBYTES)
13917
13918
13919
13920
13921
13922
13923			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
13924			# define GCC_PREREQ(major, minor) \
13925			(__GNUC__ > (major) \|\| \
13926			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
13927			# if !GCC_PREREQ(4, 9)
13928			# error "gcc versions older than 4.9 are no longer supported"
13929			# endif
13930			#else
13931			# define GCC_PREREQ(major, minor) 0
13932			#endif
13933			#ifdef __clang__
13934			# ifdef __apple_build_version__
13935			# define CLANG_PREREQ(major, minor, apple_version) \
13936			(__apple_build_version__ >= (apple_version))
13937			# else
13938			# define CLANG_PREREQ(major, minor, apple_version) \
13939			(__clang_major__ > (major) \|\| \
13940			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
13941			# endif
13942			# if !CLANG_PREREQ(3, 9, 8000000)
13943			# error "clang versions older than 3.9 are no longer supported"
13944			# endif
13945			#else
13946			# define CLANG_PREREQ(major, minor, apple_version) 0
13947			#endif
13948			#ifdef _MSC_VER
13949			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
13950			# if !MSVC_PREREQ(1900)
13951			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
13952			# endif
13953			#else
13954			# define MSVC_PREREQ(version) 0
13955			#endif
13956
13957
13958			#ifndef __has_attribute
13959			# define __has_attribute(attribute) 0
13960			#endif
13961
13962
13963			#ifndef __has_builtin
13964			# define __has_builtin(builtin) 0
13965			#endif
13966
13967
13968			#ifdef _MSC_VER
13969			# define inline __inline
13970			#endif
13971
13972
13973			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
13974			# define forceinline inline __attribute__((always_inline))
13975			#elif defined(_MSC_VER)
13976			# define forceinline __forceinline
13977			#else
13978			# define forceinline inline
13979			#endif
13980
13981
13982			#if defined(__GNUC__) \|\| __has_attribute(unused)
13983			# define MAYBE_UNUSED __attribute__((unused))
13984			#else
13985			# define MAYBE_UNUSED
13986			#endif
13987
13988
13989			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
13990			# define NORETURN __attribute__((noreturn))
13991			#else
13992			# define NORETURN
13993			#endif
13994
13995
13996			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
13997			# if defined(__GNUC__) \|\| defined(__clang__)
13998			# define restrict __restrict__
13999			# else
14000			# define restrict
14001			# endif
14002			#endif
14003
14004
14005			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
14006			# define likely(expr) __builtin_expect(!!(expr), 1)
14007			#else
14008			# define likely(expr) (expr)
14009			#endif
14010
14011
14012			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
14013			# define unlikely(expr) __builtin_expect(!!(expr), 0)
14014			#else
14015			# define unlikely(expr) (expr)
14016			#endif
14017
14018
14019			#undef prefetchr
14020			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
14021			# define prefetchr(addr) __builtin_prefetch((addr), 0)
14022			#elif defined(_MSC_VER)
14023			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
14024			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
14025			# elif defined(ARCH_ARM64)
14026			# define prefetchr(addr) __prefetch2((addr), 0x00 )
14027			# elif defined(ARCH_ARM32)
14028			# define prefetchr(addr) __prefetch(addr)
14029			# endif
14030			#endif
14031			#ifndef prefetchr
14032			# define prefetchr(addr)
14033			#endif
14034
14035
14036			#undef prefetchw
14037			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
14038			# define prefetchw(addr) __builtin_prefetch((addr), 1)
14039			#elif defined(_MSC_VER)
14040			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
14041			# define prefetchw(addr) _m_prefetchw(addr)
14042			# elif defined(ARCH_ARM64)
14043			# define prefetchw(addr) __prefetch2((addr), 0x10 )
14044			# elif defined(ARCH_ARM32)
14045			# define prefetchw(addr) __prefetchw(addr)
14046			# endif
14047			#endif
14048			#ifndef prefetchw
14049			# define prefetchw(addr)
14050			#endif
14051
14052
14053			#undef _aligned_attribute
14054			#if defined(__GNUC__) \|\| __has_attribute(aligned)
14055			# define _aligned_attribute(n) __attribute__((aligned(n)))
14056			#elif defined(_MSC_VER)
14057			# define _aligned_attribute(n) __declspec(align(n))
14058			#endif
14059
14060
14061			#if defined(__GNUC__) \|\| __has_attribute(target)
14062			# define _target_attribute(attrs) __attribute__((target(attrs)))
14063			#else
14064			# define _target_attribute(attrs)
14065			#endif
14066
14067
14068
14069
14070
14071			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
14072			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
14073			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
14074			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
14075			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
14076			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
14077			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
14078
14079
14080
14081
14082
14083
14084			#if defined(__BYTE_ORDER__)
14085			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
14086			#elif defined(_MSC_VER)
14087			# define CPU_IS_LITTLE_ENDIAN() true
14088			#else
14089			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
14090			{
14091			union {
14092			u32 w;
14093			u8 b;
14094			} u;
14095
14096			u.w = 1;
14097			return u.b;
14098			}
14099			#endif
14100
14101
14102			static forceinline u16 bswap16(u16 v)
14103			{
14104			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
14105			return __builtin_bswap16(v);
14106			#elif defined(_MSC_VER)
14107			return _byteswap_ushort(v);
14108			#else
14109			return (v << 8) \| (v >> 8);
14110			#endif
14111			}
14112
14113
14114			static forceinline u32 bswap32(u32 v)
14115			{
14116			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
14117			return __builtin_bswap32(v);
14118			#elif defined(_MSC_VER)
14119			return _byteswap_ulong(v);
14120			#else
14121			return ((v & 0x000000FF) << 24) \|
14122			((v & 0x0000FF00) << 8) \|
14123			((v & 0x00FF0000) >> 8) \|
14124			((v & 0xFF000000) >> 24);
14125			#endif
14126			}
14127
14128
14129			static forceinline u64 bswap64(u64 v)
14130			{
14131			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
14132			return __builtin_bswap64(v);
14133			#elif defined(_MSC_VER)
14134			return _byteswap_uint64(v);
14135			#else
14136			return ((v & 0x00000000000000FF) << 56) \|
14137			((v & 0x000000000000FF00) << 40) \|
14138			((v & 0x0000000000FF0000) << 24) \|
14139			((v & 0x00000000FF000000) << 8) \|
14140			((v & 0x000000FF00000000) >> 8) \|
14141			((v & 0x0000FF0000000000) >> 24) \|
14142			((v & 0x00FF000000000000) >> 40) \|
14143			((v & 0xFF00000000000000) >> 56);
14144			#endif
14145			}
14146
14147			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
14148			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
14149			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
14150			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
14151			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
14152			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
14153
14154
14155
14156
14157
14158
14159			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
14160			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
14161			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
14162			defined(__riscv_misaligned_fast) \|\| \
14163			defined(__wasm__))
14164			# define UNALIGNED_ACCESS_IS_FAST 1
14165			#elif defined(_MSC_VER)
14166			# define UNALIGNED_ACCESS_IS_FAST 1
14167			#else
14168			# define UNALIGNED_ACCESS_IS_FAST 0
14169			#endif
14170
14171
14172
14173			#ifdef FREESTANDING
14174			# define MEMCOPY __builtin_memcpy
14175			#else
14176			# define MEMCOPY memcpy
14177			#endif
14178
14179
14180
14181			#define DEFINE_UNALIGNED_TYPE(type) \
14182			static forceinline type \
14183			load_##type##_unaligned(const void *p) \
14184			{ \
14185			type v; \
14186			\
14187			MEMCOPY(&v, p, sizeof(v)); \
14188			return v; \
14189			} \
14190			\
14191			static forceinline void \
14192			store_##type##_unaligned(type v, void *p) \
14193			{ \
14194			MEMCOPY(p, &v, sizeof(v)); \
14195			}
14196
14197			DEFINE_UNALIGNED_TYPE(u16)
14198			DEFINE_UNALIGNED_TYPE(u32)
14199			DEFINE_UNALIGNED_TYPE(u64)
14200			DEFINE_UNALIGNED_TYPE(machine_word_t)
14201
14202			#undef MEMCOPY
14203
14204			#define load_word_unaligned load_machine_word_t_unaligned
14205			#define store_word_unaligned store_machine_word_t_unaligned
14206
14207
14208
14209			static forceinline u16
14210			get_unaligned_le16(const u8 *p)
14211			{
14212			if (UNALIGNED_ACCESS_IS_FAST)
14213			return le16_bswap(load_u16_unaligned(p));
14214			else
14215			return ((u16)p[1] << 8) \| p[0];
14216			}
14217
14218			static forceinline u16
14219			get_unaligned_be16(const u8 *p)
14220			{
14221			if (UNALIGNED_ACCESS_IS_FAST)
14222			return be16_bswap(load_u16_unaligned(p));
14223			else
14224			return ((u16)p[0] << 8) \| p[1];
14225			}
14226
14227			static forceinline u32
14228			get_unaligned_le32(const u8 *p)
14229			{
14230			if (UNALIGNED_ACCESS_IS_FAST)
14231			return le32_bswap(load_u32_unaligned(p));
14232			else
14233			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
14234			((u32)p[1] << 8) \| p[0];
14235			}
14236
14237			static forceinline u32
14238			get_unaligned_be32(const u8 *p)
14239			{
14240			if (UNALIGNED_ACCESS_IS_FAST)
14241			return be32_bswap(load_u32_unaligned(p));
14242			else
14243			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
14244			((u32)p[2] << 8) \| p[3];
14245			}
14246
14247			static forceinline u64
14248			get_unaligned_le64(const u8 *p)
14249			{
14250			if (UNALIGNED_ACCESS_IS_FAST)
14251			return le64_bswap(load_u64_unaligned(p));
14252			else
14253			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
14254			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
14255			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
14256			((u64)p[1] << 8) \| p[0];
14257			}
14258
14259			static forceinline machine_word_t
14260			get_unaligned_leword(const u8 *p)
14261			{
14262			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
14263			if (WORDBITS == 32)
14264			return get_unaligned_le32(p);
14265			else
14266			return get_unaligned_le64(p);
14267			}
14268
14269
14270
14271			static forceinline void
14272			put_unaligned_le16(u16 v, u8 *p)
14273			{
14274			if (UNALIGNED_ACCESS_IS_FAST) {
14275			store_u16_unaligned(le16_bswap(v), p);
14276			} else {
14277			p[0] = (u8)(v >> 0);
14278			p[1] = (u8)(v >> 8);
14279			}
14280			}
14281
14282			static forceinline void
14283			put_unaligned_be16(u16 v, u8 *p)
14284			{
14285			if (UNALIGNED_ACCESS_IS_FAST) {
14286			store_u16_unaligned(be16_bswap(v), p);
14287			} else {
14288			p[0] = (u8)(v >> 8);
14289			p[1] = (u8)(v >> 0);
14290			}
14291			}
14292
14293			static forceinline void
14294			put_unaligned_le32(u32 v, u8 *p)
14295			{
14296			if (UNALIGNED_ACCESS_IS_FAST) {
14297			store_u32_unaligned(le32_bswap(v), p);
14298			} else {
14299			p[0] = (u8)(v >> 0);
14300			p[1] = (u8)(v >> 8);
14301			p[2] = (u8)(v >> 16);
14302			p[3] = (u8)(v >> 24);
14303			}
14304			}
14305
14306			static forceinline void
14307			put_unaligned_be32(u32 v, u8 *p)
14308			{
14309			if (UNALIGNED_ACCESS_IS_FAST) {
14310			store_u32_unaligned(be32_bswap(v), p);
14311			} else {
14312			p[0] = (u8)(v >> 24);
14313			p[1] = (u8)(v >> 16);
14314			p[2] = (u8)(v >> 8);
14315			p[3] = (u8)(v >> 0);
14316			}
14317			}
14318
14319			static forceinline void
14320			put_unaligned_le64(u64 v, u8 *p)
14321			{
14322			if (UNALIGNED_ACCESS_IS_FAST) {
14323			store_u64_unaligned(le64_bswap(v), p);
14324			} else {
14325			p[0] = (u8)(v >> 0);
14326			p[1] = (u8)(v >> 8);
14327			p[2] = (u8)(v >> 16);
14328			p[3] = (u8)(v >> 24);
14329			p[4] = (u8)(v >> 32);
14330			p[5] = (u8)(v >> 40);
14331			p[6] = (u8)(v >> 48);
14332			p[7] = (u8)(v >> 56);
14333			}
14334			}
14335
14336			static forceinline void
14337			put_unaligned_leword(machine_word_t v, u8 *p)
14338			{
14339			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
14340			if (WORDBITS == 32)
14341			put_unaligned_le32(v, p);
14342			else
14343			put_unaligned_le64(v, p);
14344			}
14345
14346
14347
14348
14349
14350
14351
14352			static forceinline unsigned
14353			bsr32(u32 v)
14354			{
14355			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
14356			return 31 - __builtin_clz(v);
14357			#elif defined(_MSC_VER)
14358			unsigned long i;
14359
14360			_BitScanReverse(&i, v);
14361			return i;
14362			#else
14363			unsigned i = 0;
14364
14365			while ((v >>= 1) != 0)
14366			i++;
14367			return i;
14368			#endif
14369			}
14370
14371			static forceinline unsigned
14372			bsr64(u64 v)
14373			{
14374			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
14375			return 63 - __builtin_clzll(v);
14376			#elif defined(_MSC_VER) && defined(_WIN64)
14377			unsigned long i;
14378
14379			_BitScanReverse64(&i, v);
14380			return i;
14381			#else
14382			unsigned i = 0;
14383
14384			while ((v >>= 1) != 0)
14385			i++;
14386			return i;
14387			#endif
14388			}
14389
14390			static forceinline unsigned
14391			bsrw(machine_word_t v)
14392			{
14393			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
14394			if (WORDBITS == 32)
14395			return bsr32(v);
14396			else
14397			return bsr64(v);
14398			}
14399
14400
14401
14402			static forceinline unsigned
14403			bsf32(u32 v)
14404			{
14405			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
14406			return __builtin_ctz(v);
14407			#elif defined(_MSC_VER)
14408			unsigned long i;
14409
14410			_BitScanForward(&i, v);
14411			return i;
14412			#else
14413			unsigned i = 0;
14414
14415			for (; (v & 1) == 0; v >>= 1)
14416			i++;
14417			return i;
14418			#endif
14419			}
14420
14421			static forceinline unsigned
14422			bsf64(u64 v)
14423			{
14424			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
14425			return __builtin_ctzll(v);
14426			#elif defined(_MSC_VER) && defined(_WIN64)
14427			unsigned long i;
14428
14429			_BitScanForward64(&i, v);
14430			return i;
14431			#else
14432			unsigned i = 0;
14433
14434			for (; (v & 1) == 0; v >>= 1)
14435			i++;
14436			return i;
14437			#endif
14438			}
14439
14440			static forceinline unsigned
14441			bsfw(machine_word_t v)
14442			{
14443			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
14444			if (WORDBITS == 32)
14445			return bsf32(v);
14446			else
14447			return bsf64(v);
14448			}
14449
14450
14451			#undef rbit32
14452			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
14453			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
14454			static forceinline u32
14455			rbit32(u32 v)
14456			{
14457			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
14458			return v;
14459			}
14460			#define rbit32 rbit32
14461			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
14462			static forceinline u32
14463			rbit32(u32 v)
14464			{
14465			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
14466			return v;
14467			}
14468			#define rbit32 rbit32
14469			#endif
14470
14471			#endif
14472
14473
14474			typedef void (malloc_func_t)(size_t);
14475			typedef void (free_func_t)(void );
14476
14477			extern malloc_func_t libdeflate_default_malloc_func;
14478			extern free_func_t libdeflate_default_free_func;
14479
14480			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
14481			size_t alignment, size_t size);
14482			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
14483
14484			#ifdef FREESTANDING
14485
14486			void memset(void s, int c, size_t n);
14487			#define memset(s, c, n) __builtin_memset((s), (c), (n))
14488
14489			void memcpy(void dest, const void *src, size_t n);
14490			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
14491
14492			void memmove(void dest, const void *src, size_t n);
14493			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
14494
14495			int memcmp(const void s1, const void s2, size_t n);
14496			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
14497
14498			#undef LIBDEFLATE_ENABLE_ASSERTIONS
14499			#else
14500			# include
14501
14502			# ifdef __clang_analyzer__
14503			# define LIBDEFLATE_ENABLE_ASSERTIONS
14504			# endif
14505			#endif
14506
14507
14508			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
14509			NORETURN void
14510			libdeflate_assertion_failed(const char expr, const char file, int line);
14511			#define ASSERT(expr) { if (unlikely(!(expr))) \
14512			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
14513			#else
14514			#define ASSERT(expr) (void)(expr)
14515			#endif
14516
14517			#define CONCAT_IMPL(a, b) a##b
14518			#define CONCAT(a, b) CONCAT_IMPL(a, b)
14519			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
14520
14521			#endif
14522
14523
14524			#ifndef MATCHFINDER_WINDOW_ORDER
14525			# error "MATCHFINDER_WINDOW_ORDER must be defined!"
14526			#endif
14527
14528
14529			static forceinline u32
14530			loaded_u32_to_u24(u32 v)
14531			{
14532			if (CPU_IS_LITTLE_ENDIAN())
14533	126		return v & 0xFFFFFF;
14534			else
14535			return v >> 8;
14536			}
14537
14538
14539			static forceinline u32
14540			load_u24_unaligned(const u8 *p)
14541			{
14542			#if UNALIGNED_ACCESS_IS_FAST
14543	126		return loaded_u32_to_u24(load_u32_unaligned(p));
14544			#else
14545			if (CPU_IS_LITTLE_ENDIAN())
14546			return ((u32)p[0] << 0) \| ((u32)p[1] << 8) \| ((u32)p[2] << 16);
14547			else
14548			return ((u32)p[2] << 0) \| ((u32)p[1] << 8) \| ((u32)p[0] << 16);
14549			#endif
14550			}
14551
14552			#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
14553
14554			typedef s16 mf_pos_t;
14555
14556			#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
14557
14558
14559			#define MATCHFINDER_MEM_ALIGNMENT 32
14560
14561
14562			#define MATCHFINDER_SIZE_ALIGNMENT 1024
14563
14564			#undef matchfinder_init
14565			#undef matchfinder_rebase
14566			#ifdef _aligned_attribute
14567			# define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
14568			# if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
14569			/* # include "arm/matchfinder_impl.h" */
14570
14571
14572			#ifndef LIB_ARM_MATCHFINDER_IMPL_H
14573			#define LIB_ARM_MATCHFINDER_IMPL_H
14574
14575			/* #include "arm-cpu_features.h" */
14576
14577
14578			#ifndef LIB_ARM_CPU_FEATURES_H
14579			#define LIB_ARM_CPU_FEATURES_H
14580
14581			/* #include "lib_common.h" */
14582
14583
14584			#ifndef LIB_LIB_COMMON_H
14585			#define LIB_LIB_COMMON_H
14586
14587			#ifdef LIBDEFLATE_H
14588
14589			# error "lib_common.h must always be included before libdeflate.h"
14590			#endif
14591
14592			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
14593			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
14594			#elif defined(__GNUC__)
14595			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
14596			#else
14597			# define LIBDEFLATE_EXPORT_SYM
14598			#endif
14599
14600
14601			#if defined(__GNUC__) && defined(__i386__)
14602			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
14603			#else
14604			# define LIBDEFLATE_ALIGN_STACK
14605			#endif
14606
14607			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
14608
14609			/* #include "../common_defs.h" */
14610
14611
14612			#ifndef COMMON_DEFS_H
14613			#define COMMON_DEFS_H
14614
14615			/* #include "libdeflate.h" */
14616
14617
14618			#ifndef LIBDEFLATE_H
14619			#define LIBDEFLATE_H
14620
14621			#include
14622			#include
14623
14624			#ifdef __cplusplus
14625			extern "C" {
14626			#endif
14627
14628			#define LIBDEFLATE_VERSION_MAJOR 1
14629			#define LIBDEFLATE_VERSION_MINOR 25
14630			#define LIBDEFLATE_VERSION_STRING "1.25"
14631
14632
14633			#ifndef LIBDEFLATEAPI
14634			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
14635			# define LIBDEFLATEAPI __declspec(dllimport)
14636			# else
14637			# define LIBDEFLATEAPI
14638			# endif
14639			#endif
14640
14641
14642
14643
14644
14645			struct libdeflate_compressor;
14646			struct libdeflate_options;
14647
14648
14649			LIBDEFLATEAPI struct libdeflate_compressor *
14650			libdeflate_alloc_compressor(int compression_level);
14651
14652
14653			LIBDEFLATEAPI struct libdeflate_compressor *
14654			libdeflate_alloc_compressor_ex(int compression_level,
14655			const struct libdeflate_options *options);
14656
14657
14658			LIBDEFLATEAPI size_t
14659			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
14660			const void *in, size_t in_nbytes,
14661			void *out, size_t out_nbytes_avail);
14662
14663
14664			LIBDEFLATEAPI size_t
14665			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
14666			size_t in_nbytes);
14667
14668
14669			LIBDEFLATEAPI size_t
14670			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
14671			const void *in, size_t in_nbytes,
14672			void *out, size_t out_nbytes_avail);
14673
14674
14675			LIBDEFLATEAPI size_t
14676			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
14677			size_t in_nbytes);
14678
14679
14680			LIBDEFLATEAPI size_t
14681			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
14682			const void *in, size_t in_nbytes,
14683			void *out, size_t out_nbytes_avail);
14684
14685
14686			LIBDEFLATEAPI size_t
14687			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
14688			size_t in_nbytes);
14689
14690
14691			LIBDEFLATEAPI void
14692			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
14693
14694
14695
14696
14697
14698			struct libdeflate_decompressor;
14699			struct libdeflate_options;
14700
14701
14702			LIBDEFLATEAPI struct libdeflate_decompressor *
14703			libdeflate_alloc_decompressor(void);
14704
14705
14706			LIBDEFLATEAPI struct libdeflate_decompressor *
14707			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
14708
14709
14710			enum libdeflate_result {
14711
14712			LIBDEFLATE_SUCCESS = 0,
14713
14714
14715			LIBDEFLATE_BAD_DATA = 1,
14716
14717
14718			LIBDEFLATE_SHORT_OUTPUT = 2,
14719
14720
14721			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
14722			};
14723
14724
14725			LIBDEFLATEAPI enum libdeflate_result
14726			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
14727			const void *in, size_t in_nbytes,
14728			void *out, size_t out_nbytes_avail,
14729			size_t *actual_out_nbytes_ret);
14730
14731
14732			LIBDEFLATEAPI enum libdeflate_result
14733			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
14734			const void *in, size_t in_nbytes,
14735			void *out, size_t out_nbytes_avail,
14736			size_t *actual_in_nbytes_ret,
14737			size_t *actual_out_nbytes_ret);
14738
14739
14740			LIBDEFLATEAPI enum libdeflate_result
14741			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
14742			const void *in, size_t in_nbytes,
14743			void *out, size_t out_nbytes_avail,
14744			size_t *actual_out_nbytes_ret);
14745
14746
14747			LIBDEFLATEAPI enum libdeflate_result
14748			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
14749			const void *in, size_t in_nbytes,
14750			void *out, size_t out_nbytes_avail,
14751			size_t *actual_in_nbytes_ret,
14752			size_t *actual_out_nbytes_ret);
14753
14754
14755			LIBDEFLATEAPI enum libdeflate_result
14756			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
14757			const void *in, size_t in_nbytes,
14758			void *out, size_t out_nbytes_avail,
14759			size_t *actual_out_nbytes_ret);
14760
14761
14762			LIBDEFLATEAPI enum libdeflate_result
14763			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
14764			const void *in, size_t in_nbytes,
14765			void *out, size_t out_nbytes_avail,
14766			size_t *actual_in_nbytes_ret,
14767			size_t *actual_out_nbytes_ret);
14768
14769
14770			LIBDEFLATEAPI void
14771			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
14772
14773
14774
14775
14776
14777
14778			LIBDEFLATEAPI uint32_t
14779			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
14780
14781
14782
14783			LIBDEFLATEAPI uint32_t
14784			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
14785
14786
14787
14788
14789
14790
14791			LIBDEFLATEAPI void
14792			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
14793			void (free_func)(void ));
14794
14795
14796			struct libdeflate_options {
14797
14798
14799			size_t sizeof_options;
14800
14801
14802			void (malloc_func)(size_t);
14803			void (free_func)(void );
14804			};
14805
14806			#ifdef __cplusplus
14807			}
14808			#endif
14809
14810			#endif
14811
14812
14813			#include
14814			#include
14815			#include
14816			#ifdef _MSC_VER
14817			# include
14818			# include
14819
14820
14821			# pragma warning(disable : 4146)
14822
14823			# pragma warning(disable : 4018)
14824			# pragma warning(disable : 4244)
14825			# pragma warning(disable : 4267)
14826			# pragma warning(disable : 4310)
14827
14828			# pragma warning(disable : 4100)
14829			# pragma warning(disable : 4127)
14830			# pragma warning(disable : 4189)
14831			# pragma warning(disable : 4232)
14832			# pragma warning(disable : 4245)
14833			# pragma warning(disable : 4295)
14834			#endif
14835			#ifndef FREESTANDING
14836			# include
14837			#endif
14838
14839
14840
14841
14842
14843
14844			#undef ARCH_X86_64
14845			#undef ARCH_X86_32
14846			#undef ARCH_ARM64
14847			#undef ARCH_ARM32
14848			#undef ARCH_RISCV
14849			#ifdef _MSC_VER
14850
14851			# if defined(_M_X64) && !defined(_M_ARM64EC)
14852			# define ARCH_X86_64
14853			# elif defined(_M_IX86)
14854			# define ARCH_X86_32
14855			# elif defined(_M_ARM64)
14856			# define ARCH_ARM64
14857			# elif defined(_M_ARM)
14858			# define ARCH_ARM32
14859			# endif
14860			#else
14861			# if defined(__x86_64__)
14862			# define ARCH_X86_64
14863			# elif defined(__i386__)
14864			# define ARCH_X86_32
14865			# elif defined(__aarch64__)
14866			# define ARCH_ARM64
14867			# elif defined(__arm__)
14868			# define ARCH_ARM32
14869			# elif defined(__riscv)
14870			# define ARCH_RISCV
14871			# endif
14872			#endif
14873
14874
14875
14876
14877
14878
14879			typedef uint8_t u8;
14880			typedef uint16_t u16;
14881			typedef uint32_t u32;
14882			typedef uint64_t u64;
14883			typedef int8_t s8;
14884			typedef int16_t s16;
14885			typedef int32_t s32;
14886			typedef int64_t s64;
14887
14888
14889			#ifdef _MSC_VER
14890			# ifdef _WIN64
14891			typedef long long ssize_t;
14892			# else
14893			typedef long ssize_t;
14894			# endif
14895			#endif
14896
14897
14898			typedef size_t machine_word_t;
14899
14900
14901			#define WORDBYTES ((int)sizeof(machine_word_t))
14902
14903
14904			#define WORDBITS (8 * WORDBYTES)
14905
14906
14907
14908
14909
14910
14911			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
14912			# define GCC_PREREQ(major, minor) \
14913			(__GNUC__ > (major) \|\| \
14914			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
14915			# if !GCC_PREREQ(4, 9)
14916			# error "gcc versions older than 4.9 are no longer supported"
14917			# endif
14918			#else
14919			# define GCC_PREREQ(major, minor) 0
14920			#endif
14921			#ifdef __clang__
14922			# ifdef __apple_build_version__
14923			# define CLANG_PREREQ(major, minor, apple_version) \
14924			(__apple_build_version__ >= (apple_version))
14925			# else
14926			# define CLANG_PREREQ(major, minor, apple_version) \
14927			(__clang_major__ > (major) \|\| \
14928			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
14929			# endif
14930			# if !CLANG_PREREQ(3, 9, 8000000)
14931			# error "clang versions older than 3.9 are no longer supported"
14932			# endif
14933			#else
14934			# define CLANG_PREREQ(major, minor, apple_version) 0
14935			#endif
14936			#ifdef _MSC_VER
14937			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
14938			# if !MSVC_PREREQ(1900)
14939			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
14940			# endif
14941			#else
14942			# define MSVC_PREREQ(version) 0
14943			#endif
14944
14945
14946			#ifndef __has_attribute
14947			# define __has_attribute(attribute) 0
14948			#endif
14949
14950
14951			#ifndef __has_builtin
14952			# define __has_builtin(builtin) 0
14953			#endif
14954
14955
14956			#ifdef _MSC_VER
14957			# define inline __inline
14958			#endif
14959
14960
14961			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
14962			# define forceinline inline __attribute__((always_inline))
14963			#elif defined(_MSC_VER)
14964			# define forceinline __forceinline
14965			#else
14966			# define forceinline inline
14967			#endif
14968
14969
14970			#if defined(__GNUC__) \|\| __has_attribute(unused)
14971			# define MAYBE_UNUSED __attribute__((unused))
14972			#else
14973			# define MAYBE_UNUSED
14974			#endif
14975
14976
14977			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
14978			# define NORETURN __attribute__((noreturn))
14979			#else
14980			# define NORETURN
14981			#endif
14982
14983
14984			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
14985			# if defined(__GNUC__) \|\| defined(__clang__)
14986			# define restrict __restrict__
14987			# else
14988			# define restrict
14989			# endif
14990			#endif
14991
14992
14993			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
14994			# define likely(expr) __builtin_expect(!!(expr), 1)
14995			#else
14996			# define likely(expr) (expr)
14997			#endif
14998
14999
15000			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
15001			# define unlikely(expr) __builtin_expect(!!(expr), 0)
15002			#else
15003			# define unlikely(expr) (expr)
15004			#endif
15005
15006
15007			#undef prefetchr
15008			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
15009			# define prefetchr(addr) __builtin_prefetch((addr), 0)
15010			#elif defined(_MSC_VER)
15011			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
15012			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
15013			# elif defined(ARCH_ARM64)
15014			# define prefetchr(addr) __prefetch2((addr), 0x00 )
15015			# elif defined(ARCH_ARM32)
15016			# define prefetchr(addr) __prefetch(addr)
15017			# endif
15018			#endif
15019			#ifndef prefetchr
15020			# define prefetchr(addr)
15021			#endif
15022
15023
15024			#undef prefetchw
15025			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
15026			# define prefetchw(addr) __builtin_prefetch((addr), 1)
15027			#elif defined(_MSC_VER)
15028			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
15029			# define prefetchw(addr) _m_prefetchw(addr)
15030			# elif defined(ARCH_ARM64)
15031			# define prefetchw(addr) __prefetch2((addr), 0x10 )
15032			# elif defined(ARCH_ARM32)
15033			# define prefetchw(addr) __prefetchw(addr)
15034			# endif
15035			#endif
15036			#ifndef prefetchw
15037			# define prefetchw(addr)
15038			#endif
15039
15040
15041			#undef _aligned_attribute
15042			#if defined(__GNUC__) \|\| __has_attribute(aligned)
15043			# define _aligned_attribute(n) __attribute__((aligned(n)))
15044			#elif defined(_MSC_VER)
15045			# define _aligned_attribute(n) __declspec(align(n))
15046			#endif
15047
15048
15049			#if defined(__GNUC__) \|\| __has_attribute(target)
15050			# define _target_attribute(attrs) __attribute__((target(attrs)))
15051			#else
15052			# define _target_attribute(attrs)
15053			#endif
15054
15055
15056
15057
15058
15059			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
15060			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
15061			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
15062			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
15063			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
15064			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
15065			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
15066
15067
15068
15069
15070
15071
15072			#if defined(__BYTE_ORDER__)
15073			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
15074			#elif defined(_MSC_VER)
15075			# define CPU_IS_LITTLE_ENDIAN() true
15076			#else
15077			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
15078			{
15079			union {
15080			u32 w;
15081			u8 b;
15082			} u;
15083
15084			u.w = 1;
15085			return u.b;
15086			}
15087			#endif
15088
15089
15090			static forceinline u16 bswap16(u16 v)
15091			{
15092			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
15093			return __builtin_bswap16(v);
15094			#elif defined(_MSC_VER)
15095			return _byteswap_ushort(v);
15096			#else
15097			return (v << 8) \| (v >> 8);
15098			#endif
15099			}
15100
15101
15102			static forceinline u32 bswap32(u32 v)
15103			{
15104			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
15105			return __builtin_bswap32(v);
15106			#elif defined(_MSC_VER)
15107			return _byteswap_ulong(v);
15108			#else
15109			return ((v & 0x000000FF) << 24) \|
15110			((v & 0x0000FF00) << 8) \|
15111			((v & 0x00FF0000) >> 8) \|
15112			((v & 0xFF000000) >> 24);
15113			#endif
15114			}
15115
15116
15117			static forceinline u64 bswap64(u64 v)
15118			{
15119			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
15120			return __builtin_bswap64(v);
15121			#elif defined(_MSC_VER)
15122			return _byteswap_uint64(v);
15123			#else
15124			return ((v & 0x00000000000000FF) << 56) \|
15125			((v & 0x000000000000FF00) << 40) \|
15126			((v & 0x0000000000FF0000) << 24) \|
15127			((v & 0x00000000FF000000) << 8) \|
15128			((v & 0x000000FF00000000) >> 8) \|
15129			((v & 0x0000FF0000000000) >> 24) \|
15130			((v & 0x00FF000000000000) >> 40) \|
15131			((v & 0xFF00000000000000) >> 56);
15132			#endif
15133			}
15134
15135			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
15136			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
15137			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
15138			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
15139			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
15140			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
15141
15142
15143
15144
15145
15146
15147			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
15148			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
15149			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
15150			defined(__riscv_misaligned_fast) \|\| \
15151			defined(__wasm__))
15152			# define UNALIGNED_ACCESS_IS_FAST 1
15153			#elif defined(_MSC_VER)
15154			# define UNALIGNED_ACCESS_IS_FAST 1
15155			#else
15156			# define UNALIGNED_ACCESS_IS_FAST 0
15157			#endif
15158
15159
15160
15161			#ifdef FREESTANDING
15162			# define MEMCOPY __builtin_memcpy
15163			#else
15164			# define MEMCOPY memcpy
15165			#endif
15166
15167
15168
15169			#define DEFINE_UNALIGNED_TYPE(type) \
15170			static forceinline type \
15171			load_##type##_unaligned(const void *p) \
15172			{ \
15173			type v; \
15174			\
15175			MEMCOPY(&v, p, sizeof(v)); \
15176			return v; \
15177			} \
15178			\
15179			static forceinline void \
15180			store_##type##_unaligned(type v, void *p) \
15181			{ \
15182			MEMCOPY(p, &v, sizeof(v)); \
15183			}
15184
15185			DEFINE_UNALIGNED_TYPE(u16)
15186			DEFINE_UNALIGNED_TYPE(u32)
15187			DEFINE_UNALIGNED_TYPE(u64)
15188			DEFINE_UNALIGNED_TYPE(machine_word_t)
15189
15190			#undef MEMCOPY
15191
15192			#define load_word_unaligned load_machine_word_t_unaligned
15193			#define store_word_unaligned store_machine_word_t_unaligned
15194
15195
15196
15197			static forceinline u16
15198			get_unaligned_le16(const u8 *p)
15199			{
15200			if (UNALIGNED_ACCESS_IS_FAST)
15201			return le16_bswap(load_u16_unaligned(p));
15202			else
15203			return ((u16)p[1] << 8) \| p[0];
15204			}
15205
15206			static forceinline u16
15207			get_unaligned_be16(const u8 *p)
15208			{
15209			if (UNALIGNED_ACCESS_IS_FAST)
15210			return be16_bswap(load_u16_unaligned(p));
15211			else
15212			return ((u16)p[0] << 8) \| p[1];
15213			}
15214
15215			static forceinline u32
15216			get_unaligned_le32(const u8 *p)
15217			{
15218			if (UNALIGNED_ACCESS_IS_FAST)
15219			return le32_bswap(load_u32_unaligned(p));
15220			else
15221			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
15222			((u32)p[1] << 8) \| p[0];
15223			}
15224
15225			static forceinline u32
15226			get_unaligned_be32(const u8 *p)
15227			{
15228			if (UNALIGNED_ACCESS_IS_FAST)
15229			return be32_bswap(load_u32_unaligned(p));
15230			else
15231			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
15232			((u32)p[2] << 8) \| p[3];
15233			}
15234
15235			static forceinline u64
15236			get_unaligned_le64(const u8 *p)
15237			{
15238			if (UNALIGNED_ACCESS_IS_FAST)
15239			return le64_bswap(load_u64_unaligned(p));
15240			else
15241			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
15242			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
15243			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
15244			((u64)p[1] << 8) \| p[0];
15245			}
15246
15247			static forceinline machine_word_t
15248			get_unaligned_leword(const u8 *p)
15249			{
15250			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
15251			if (WORDBITS == 32)
15252			return get_unaligned_le32(p);
15253			else
15254			return get_unaligned_le64(p);
15255			}
15256
15257
15258
15259			static forceinline void
15260			put_unaligned_le16(u16 v, u8 *p)
15261			{
15262			if (UNALIGNED_ACCESS_IS_FAST) {
15263			store_u16_unaligned(le16_bswap(v), p);
15264			} else {
15265			p[0] = (u8)(v >> 0);
15266			p[1] = (u8)(v >> 8);
15267			}
15268			}
15269
15270			static forceinline void
15271			put_unaligned_be16(u16 v, u8 *p)
15272			{
15273			if (UNALIGNED_ACCESS_IS_FAST) {
15274			store_u16_unaligned(be16_bswap(v), p);
15275			} else {
15276			p[0] = (u8)(v >> 8);
15277			p[1] = (u8)(v >> 0);
15278			}
15279			}
15280
15281			static forceinline void
15282			put_unaligned_le32(u32 v, u8 *p)
15283			{
15284			if (UNALIGNED_ACCESS_IS_FAST) {
15285			store_u32_unaligned(le32_bswap(v), p);
15286			} else {
15287			p[0] = (u8)(v >> 0);
15288			p[1] = (u8)(v >> 8);
15289			p[2] = (u8)(v >> 16);
15290			p[3] = (u8)(v >> 24);
15291			}
15292			}
15293
15294			static forceinline void
15295			put_unaligned_be32(u32 v, u8 *p)
15296			{
15297			if (UNALIGNED_ACCESS_IS_FAST) {
15298			store_u32_unaligned(be32_bswap(v), p);
15299			} else {
15300			p[0] = (u8)(v >> 24);
15301			p[1] = (u8)(v >> 16);
15302			p[2] = (u8)(v >> 8);
15303			p[3] = (u8)(v >> 0);
15304			}
15305			}
15306
15307			static forceinline void
15308			put_unaligned_le64(u64 v, u8 *p)
15309			{
15310			if (UNALIGNED_ACCESS_IS_FAST) {
15311			store_u64_unaligned(le64_bswap(v), p);
15312			} else {
15313			p[0] = (u8)(v >> 0);
15314			p[1] = (u8)(v >> 8);
15315			p[2] = (u8)(v >> 16);
15316			p[3] = (u8)(v >> 24);
15317			p[4] = (u8)(v >> 32);
15318			p[5] = (u8)(v >> 40);
15319			p[6] = (u8)(v >> 48);
15320			p[7] = (u8)(v >> 56);
15321			}
15322			}
15323
15324			static forceinline void
15325			put_unaligned_leword(machine_word_t v, u8 *p)
15326			{
15327			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
15328			if (WORDBITS == 32)
15329			put_unaligned_le32(v, p);
15330			else
15331			put_unaligned_le64(v, p);
15332			}
15333
15334
15335
15336
15337
15338
15339
15340			static forceinline unsigned
15341			bsr32(u32 v)
15342			{
15343			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
15344			return 31 - __builtin_clz(v);
15345			#elif defined(_MSC_VER)
15346			unsigned long i;
15347
15348			_BitScanReverse(&i, v);
15349			return i;
15350			#else
15351			unsigned i = 0;
15352
15353			while ((v >>= 1) != 0)
15354			i++;
15355			return i;
15356			#endif
15357			}
15358
15359			static forceinline unsigned
15360			bsr64(u64 v)
15361			{
15362			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
15363			return 63 - __builtin_clzll(v);
15364			#elif defined(_MSC_VER) && defined(_WIN64)
15365			unsigned long i;
15366
15367			_BitScanReverse64(&i, v);
15368			return i;
15369			#else
15370			unsigned i = 0;
15371
15372			while ((v >>= 1) != 0)
15373			i++;
15374			return i;
15375			#endif
15376			}
15377
15378			static forceinline unsigned
15379			bsrw(machine_word_t v)
15380			{
15381			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
15382			if (WORDBITS == 32)
15383			return bsr32(v);
15384			else
15385			return bsr64(v);
15386			}
15387
15388
15389
15390			static forceinline unsigned
15391			bsf32(u32 v)
15392			{
15393			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
15394			return __builtin_ctz(v);
15395			#elif defined(_MSC_VER)
15396			unsigned long i;
15397
15398			_BitScanForward(&i, v);
15399			return i;
15400			#else
15401			unsigned i = 0;
15402
15403			for (; (v & 1) == 0; v >>= 1)
15404			i++;
15405			return i;
15406			#endif
15407			}
15408
15409			static forceinline unsigned
15410			bsf64(u64 v)
15411			{
15412			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
15413			return __builtin_ctzll(v);
15414			#elif defined(_MSC_VER) && defined(_WIN64)
15415			unsigned long i;
15416
15417			_BitScanForward64(&i, v);
15418			return i;
15419			#else
15420			unsigned i = 0;
15421
15422			for (; (v & 1) == 0; v >>= 1)
15423			i++;
15424			return i;
15425			#endif
15426			}
15427
15428			static forceinline unsigned
15429			bsfw(machine_word_t v)
15430			{
15431			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
15432			if (WORDBITS == 32)
15433			return bsf32(v);
15434			else
15435			return bsf64(v);
15436			}
15437
15438
15439			#undef rbit32
15440			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
15441			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
15442			static forceinline u32
15443			rbit32(u32 v)
15444			{
15445			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
15446			return v;
15447			}
15448			#define rbit32 rbit32
15449			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
15450			static forceinline u32
15451			rbit32(u32 v)
15452			{
15453			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
15454			return v;
15455			}
15456			#define rbit32 rbit32
15457			#endif
15458
15459			#endif
15460
15461
15462			typedef void (malloc_func_t)(size_t);
15463			typedef void (free_func_t)(void );
15464
15465			extern malloc_func_t libdeflate_default_malloc_func;
15466			extern free_func_t libdeflate_default_free_func;
15467
15468			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
15469			size_t alignment, size_t size);
15470			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
15471
15472			#ifdef FREESTANDING
15473
15474			void memset(void s, int c, size_t n);
15475			#define memset(s, c, n) __builtin_memset((s), (c), (n))
15476
15477			void memcpy(void dest, const void *src, size_t n);
15478			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
15479
15480			void memmove(void dest, const void *src, size_t n);
15481			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
15482
15483			int memcmp(const void s1, const void s2, size_t n);
15484			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
15485
15486			#undef LIBDEFLATE_ENABLE_ASSERTIONS
15487			#else
15488			# include
15489
15490			# ifdef __clang_analyzer__
15491			# define LIBDEFLATE_ENABLE_ASSERTIONS
15492			# endif
15493			#endif
15494
15495
15496			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
15497			NORETURN void
15498			libdeflate_assertion_failed(const char expr, const char file, int line);
15499			#define ASSERT(expr) { if (unlikely(!(expr))) \
15500			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
15501			#else
15502			#define ASSERT(expr) (void)(expr)
15503			#endif
15504
15505			#define CONCAT_IMPL(a, b) a##b
15506			#define CONCAT(a, b) CONCAT_IMPL(a, b)
15507			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
15508
15509			#endif
15510
15511
15512			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
15513
15514			#define ARM_CPU_FEATURE_NEON (1 << 0)
15515			#define ARM_CPU_FEATURE_PMULL (1 << 1)
15516
15517			#define ARM_CPU_FEATURE_PREFER_PMULL (1 << 2)
15518			#define ARM_CPU_FEATURE_CRC32 (1 << 3)
15519			#define ARM_CPU_FEATURE_SHA3 (1 << 4)
15520			#define ARM_CPU_FEATURE_DOTPROD (1 << 5)
15521
15522			#if !defined(FREESTANDING) && \
15523			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
15524			(defined(__linux__) \|\| \
15525			(defined(__APPLE__) && defined(ARCH_ARM64)) \|\| \
15526			(defined(_WIN32) && defined(ARCH_ARM64)))
15527
15528			# define ARM_CPU_FEATURES_KNOWN (1U << 31)
15529			extern volatile u32 libdeflate_arm_cpu_features;
15530
15531			void libdeflate_init_arm_cpu_features(void);
15532
15533			static inline u32 get_arm_cpu_features(void)
15534			{
15535			if (libdeflate_arm_cpu_features == 0)
15536			libdeflate_init_arm_cpu_features();
15537			return libdeflate_arm_cpu_features;
15538			}
15539			#else
15540			static inline u32 get_arm_cpu_features(void) { return 0; }
15541			#endif
15542
15543
15544			#if defined(__ARM_NEON) \|\| (defined(_MSC_VER) && defined(ARCH_ARM64))
15545			# define HAVE_NEON(features) 1
15546			# define HAVE_NEON_NATIVE 1
15547			#else
15548			# define HAVE_NEON(features) ((features) & ARM_CPU_FEATURE_NEON)
15549			# define HAVE_NEON_NATIVE 0
15550			#endif
15551
15552			#if (defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
15553			(HAVE_NEON_NATIVE \|\| (GCC_PREREQ(6, 1) && defined(__ARM_FP)))
15554			# define HAVE_NEON_INTRIN 1
15555			# include
15556			#else
15557			# define HAVE_NEON_INTRIN 0
15558			#endif
15559
15560
15561			#ifdef __ARM_FEATURE_CRYPTO
15562			# define HAVE_PMULL(features) 1
15563			#else
15564			# define HAVE_PMULL(features) ((features) & ARM_CPU_FEATURE_PMULL)
15565			#endif
15566			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
15567			(GCC_PREREQ(7, 1) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
15568			CPU_IS_LITTLE_ENDIAN()
15569			# define HAVE_PMULL_INTRIN 1
15570
15571			# ifdef _MSC_VER
15572			# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b))
15573			# else
15574			# define compat_vmull_p64(a, b) vmull_p64((a), (b))
15575			# endif
15576			#else
15577			# define HAVE_PMULL_INTRIN 0
15578			#endif
15579
15580
15581			#ifdef __ARM_FEATURE_CRC32
15582			# define HAVE_CRC32(features) 1
15583			#else
15584			# define HAVE_CRC32(features) ((features) & ARM_CPU_FEATURE_CRC32)
15585			#endif
15586			#if defined(ARCH_ARM64) && \
15587			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER))
15588			# define HAVE_CRC32_INTRIN 1
15589			# if defined(__GNUC__) \|\| defined(__clang__)
15590			# include
15591			# endif
15592
15593			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
15594			!defined(__ARM_FEATURE_CRC32)
15595			# undef __crc32b
15596			# define __crc32b(a, b) \
15597			({ uint32_t res; \
15598			__asm__("crc32b %w0, %w1, %w2" \
15599			: "=r" (res) : "r" (a), "r" (b)); \
15600			res; })
15601			# undef __crc32h
15602			# define __crc32h(a, b) \
15603			({ uint32_t res; \
15604			__asm__("crc32h %w0, %w1, %w2" \
15605			: "=r" (res) : "r" (a), "r" (b)); \
15606			res; })
15607			# undef __crc32w
15608			# define __crc32w(a, b) \
15609			({ uint32_t res; \
15610			__asm__("crc32w %w0, %w1, %w2" \
15611			: "=r" (res) : "r" (a), "r" (b)); \
15612			res; })
15613			# undef __crc32d
15614			# define __crc32d(a, b) \
15615			({ uint32_t res; \
15616			__asm__("crc32x %w0, %w1, %2" \
15617			: "=r" (res) : "r" (a), "r" (b)); \
15618			res; })
15619			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
15620			# endif
15621			#else
15622			# define HAVE_CRC32_INTRIN 0
15623			#endif
15624
15625
15626			#ifdef __ARM_FEATURE_SHA3
15627			# define HAVE_SHA3(features) 1
15628			#else
15629			# define HAVE_SHA3(features) ((features) & ARM_CPU_FEATURE_SHA3)
15630			#endif
15631			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
15632			(GCC_PREREQ(9, 1) \|\| \
15633			CLANG_PREREQ(7, 0, 10010463) )
15634			# define HAVE_SHA3_INTRIN 1
15635
15636			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
15637			!defined(__ARM_FEATURE_SHA3)
15638			# undef veor3q_u8
15639			# define veor3q_u8(a, b, c) \
15640			({ uint8x16_t res; \
15641			__asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b" \
15642			: "=w" (res) : "w" (a), "w" (b), "w" (c)); \
15643			res; })
15644			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
15645			# endif
15646			#else
15647			# define HAVE_SHA3_INTRIN 0
15648			#endif
15649
15650
15651			#ifdef __ARM_FEATURE_DOTPROD
15652			# define HAVE_DOTPROD(features) 1
15653			#else
15654			# define HAVE_DOTPROD(features) ((features) & ARM_CPU_FEATURE_DOTPROD)
15655			#endif
15656			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
15657			(GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(7, 0, 10010000) \|\| defined(_MSC_VER))
15658			# define HAVE_DOTPROD_INTRIN 1
15659
15660			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
15661			!defined(__ARM_FEATURE_DOTPROD)
15662			# undef vdotq_u32
15663			# define vdotq_u32(a, b, c) \
15664			({ uint32x4_t res = (a); \
15665			__asm__("udot %0.4s, %1.16b, %2.16b" \
15666			: "+w" (res) : "w" (b), "w" (c)); \
15667			res; })
15668			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
15669			# endif
15670			#else
15671			# define HAVE_DOTPROD_INTRIN 0
15672			#endif
15673
15674			#endif
15675
15676			#endif
15677
15678
15679			#if HAVE_NEON_NATIVE
15680			static forceinline void
15681			matchfinder_init_neon(mf_pos_t *data, size_t size)
15682			{
15683			int16x8_t p = (int16x8_t )data;
15684			int16x8_t v = vdupq_n_s16(MATCHFINDER_INITVAL);
15685
15686			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
15687			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
15688			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
15689
15690			do {
15691			p[0] = v;
15692			p[1] = v;
15693			p[2] = v;
15694			p[3] = v;
15695			p += 4;
15696			size -= 4 * sizeof(*p);
15697			} while (size != 0);
15698			}
15699			#define matchfinder_init matchfinder_init_neon
15700
15701			static forceinline void
15702			matchfinder_rebase_neon(mf_pos_t *data, size_t size)
15703			{
15704			int16x8_t p = (int16x8_t )data;
15705			int16x8_t v = vdupq_n_s16((u16)-MATCHFINDER_WINDOW_SIZE);
15706
15707			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
15708			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
15709			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
15710
15711			do {
15712			p[0] = vqaddq_s16(p[0], v);
15713			p[1] = vqaddq_s16(p[1], v);
15714			p[2] = vqaddq_s16(p[2], v);
15715			p[3] = vqaddq_s16(p[3], v);
15716			p += 4;
15717			size -= 4 * sizeof(*p);
15718			} while (size != 0);
15719			}
15720			#define matchfinder_rebase matchfinder_rebase_neon
15721
15722			#endif
15723
15724			#endif
15725
15726			# elif defined(ARCH_RISCV)
15727			# include "riscv/matchfinder_impl.h"
15728			# elif defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
15729			/* # include "x86/matchfinder_impl.h" */
15730
15731
15732			#ifndef LIB_X86_MATCHFINDER_IMPL_H
15733			#define LIB_X86_MATCHFINDER_IMPL_H
15734
15735			/* #include "x86-cpu_features.h" */
15736
15737
15738			#ifndef LIB_X86_CPU_FEATURES_H
15739			#define LIB_X86_CPU_FEATURES_H
15740
15741			/* #include "lib_common.h" */
15742
15743
15744			#ifndef LIB_LIB_COMMON_H
15745			#define LIB_LIB_COMMON_H
15746
15747			#ifdef LIBDEFLATE_H
15748
15749			# error "lib_common.h must always be included before libdeflate.h"
15750			#endif
15751
15752			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
15753			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
15754			#elif defined(__GNUC__)
15755			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
15756			#else
15757			# define LIBDEFLATE_EXPORT_SYM
15758			#endif
15759
15760
15761			#if defined(__GNUC__) && defined(__i386__)
15762			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
15763			#else
15764			# define LIBDEFLATE_ALIGN_STACK
15765			#endif
15766
15767			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
15768
15769			/* #include "../common_defs.h" */
15770
15771
15772			#ifndef COMMON_DEFS_H
15773			#define COMMON_DEFS_H
15774
15775			/* #include "libdeflate.h" */
15776
15777
15778			#ifndef LIBDEFLATE_H
15779			#define LIBDEFLATE_H
15780
15781			#include
15782			#include
15783
15784			#ifdef __cplusplus
15785			extern "C" {
15786			#endif
15787
15788			#define LIBDEFLATE_VERSION_MAJOR 1
15789			#define LIBDEFLATE_VERSION_MINOR 25
15790			#define LIBDEFLATE_VERSION_STRING "1.25"
15791
15792
15793			#ifndef LIBDEFLATEAPI
15794			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
15795			# define LIBDEFLATEAPI __declspec(dllimport)
15796			# else
15797			# define LIBDEFLATEAPI
15798			# endif
15799			#endif
15800
15801
15802
15803
15804
15805			struct libdeflate_compressor;
15806			struct libdeflate_options;
15807
15808
15809			LIBDEFLATEAPI struct libdeflate_compressor *
15810			libdeflate_alloc_compressor(int compression_level);
15811
15812
15813			LIBDEFLATEAPI struct libdeflate_compressor *
15814			libdeflate_alloc_compressor_ex(int compression_level,
15815			const struct libdeflate_options *options);
15816
15817
15818			LIBDEFLATEAPI size_t
15819			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
15820			const void *in, size_t in_nbytes,
15821			void *out, size_t out_nbytes_avail);
15822
15823
15824			LIBDEFLATEAPI size_t
15825			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
15826			size_t in_nbytes);
15827
15828
15829			LIBDEFLATEAPI size_t
15830			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
15831			const void *in, size_t in_nbytes,
15832			void *out, size_t out_nbytes_avail);
15833
15834
15835			LIBDEFLATEAPI size_t
15836			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
15837			size_t in_nbytes);
15838
15839
15840			LIBDEFLATEAPI size_t
15841			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
15842			const void *in, size_t in_nbytes,
15843			void *out, size_t out_nbytes_avail);
15844
15845
15846			LIBDEFLATEAPI size_t
15847			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
15848			size_t in_nbytes);
15849
15850
15851			LIBDEFLATEAPI void
15852			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
15853
15854
15855
15856
15857
15858			struct libdeflate_decompressor;
15859			struct libdeflate_options;
15860
15861
15862			LIBDEFLATEAPI struct libdeflate_decompressor *
15863			libdeflate_alloc_decompressor(void);
15864
15865
15866			LIBDEFLATEAPI struct libdeflate_decompressor *
15867			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
15868
15869
15870			enum libdeflate_result {
15871
15872			LIBDEFLATE_SUCCESS = 0,
15873
15874
15875			LIBDEFLATE_BAD_DATA = 1,
15876
15877
15878			LIBDEFLATE_SHORT_OUTPUT = 2,
15879
15880
15881			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
15882			};
15883
15884
15885			LIBDEFLATEAPI enum libdeflate_result
15886			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
15887			const void *in, size_t in_nbytes,
15888			void *out, size_t out_nbytes_avail,
15889			size_t *actual_out_nbytes_ret);
15890
15891
15892			LIBDEFLATEAPI enum libdeflate_result
15893			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
15894			const void *in, size_t in_nbytes,
15895			void *out, size_t out_nbytes_avail,
15896			size_t *actual_in_nbytes_ret,
15897			size_t *actual_out_nbytes_ret);
15898
15899
15900			LIBDEFLATEAPI enum libdeflate_result
15901			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
15902			const void *in, size_t in_nbytes,
15903			void *out, size_t out_nbytes_avail,
15904			size_t *actual_out_nbytes_ret);
15905
15906
15907			LIBDEFLATEAPI enum libdeflate_result
15908			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
15909			const void *in, size_t in_nbytes,
15910			void *out, size_t out_nbytes_avail,
15911			size_t *actual_in_nbytes_ret,
15912			size_t *actual_out_nbytes_ret);
15913
15914
15915			LIBDEFLATEAPI enum libdeflate_result
15916			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
15917			const void *in, size_t in_nbytes,
15918			void *out, size_t out_nbytes_avail,
15919			size_t *actual_out_nbytes_ret);
15920
15921
15922			LIBDEFLATEAPI enum libdeflate_result
15923			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
15924			const void *in, size_t in_nbytes,
15925			void *out, size_t out_nbytes_avail,
15926			size_t *actual_in_nbytes_ret,
15927			size_t *actual_out_nbytes_ret);
15928
15929
15930			LIBDEFLATEAPI void
15931			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
15932
15933
15934
15935
15936
15937
15938			LIBDEFLATEAPI uint32_t
15939			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
15940
15941
15942
15943			LIBDEFLATEAPI uint32_t
15944			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
15945
15946
15947
15948
15949
15950
15951			LIBDEFLATEAPI void
15952			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
15953			void (free_func)(void ));
15954
15955
15956			struct libdeflate_options {
15957
15958
15959			size_t sizeof_options;
15960
15961
15962			void (malloc_func)(size_t);
15963			void (free_func)(void );
15964			};
15965
15966			#ifdef __cplusplus
15967			}
15968			#endif
15969
15970			#endif
15971
15972
15973			#include
15974			#include
15975			#include
15976			#ifdef _MSC_VER
15977			# include
15978			# include
15979
15980
15981			# pragma warning(disable : 4146)
15982
15983			# pragma warning(disable : 4018)
15984			# pragma warning(disable : 4244)
15985			# pragma warning(disable : 4267)
15986			# pragma warning(disable : 4310)
15987
15988			# pragma warning(disable : 4100)
15989			# pragma warning(disable : 4127)
15990			# pragma warning(disable : 4189)
15991			# pragma warning(disable : 4232)
15992			# pragma warning(disable : 4245)
15993			# pragma warning(disable : 4295)
15994			#endif
15995			#ifndef FREESTANDING
15996			# include
15997			#endif
15998
15999
16000
16001
16002
16003
16004			#undef ARCH_X86_64
16005			#undef ARCH_X86_32
16006			#undef ARCH_ARM64
16007			#undef ARCH_ARM32
16008			#undef ARCH_RISCV
16009			#ifdef _MSC_VER
16010
16011			# if defined(_M_X64) && !defined(_M_ARM64EC)
16012			# define ARCH_X86_64
16013			# elif defined(_M_IX86)
16014			# define ARCH_X86_32
16015			# elif defined(_M_ARM64)
16016			# define ARCH_ARM64
16017			# elif defined(_M_ARM)
16018			# define ARCH_ARM32
16019			# endif
16020			#else
16021			# if defined(__x86_64__)
16022			# define ARCH_X86_64
16023			# elif defined(__i386__)
16024			# define ARCH_X86_32
16025			# elif defined(__aarch64__)
16026			# define ARCH_ARM64
16027			# elif defined(__arm__)
16028			# define ARCH_ARM32
16029			# elif defined(__riscv)
16030			# define ARCH_RISCV
16031			# endif
16032			#endif
16033
16034
16035
16036
16037
16038
16039			typedef uint8_t u8;
16040			typedef uint16_t u16;
16041			typedef uint32_t u32;
16042			typedef uint64_t u64;
16043			typedef int8_t s8;
16044			typedef int16_t s16;
16045			typedef int32_t s32;
16046			typedef int64_t s64;
16047
16048
16049			#ifdef _MSC_VER
16050			# ifdef _WIN64
16051			typedef long long ssize_t;
16052			# else
16053			typedef long ssize_t;
16054			# endif
16055			#endif
16056
16057
16058			typedef size_t machine_word_t;
16059
16060
16061			#define WORDBYTES ((int)sizeof(machine_word_t))
16062
16063
16064			#define WORDBITS (8 * WORDBYTES)
16065
16066
16067
16068
16069
16070
16071			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
16072			# define GCC_PREREQ(major, minor) \
16073			(__GNUC__ > (major) \|\| \
16074			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
16075			# if !GCC_PREREQ(4, 9)
16076			# error "gcc versions older than 4.9 are no longer supported"
16077			# endif
16078			#else
16079			# define GCC_PREREQ(major, minor) 0
16080			#endif
16081			#ifdef __clang__
16082			# ifdef __apple_build_version__
16083			# define CLANG_PREREQ(major, minor, apple_version) \
16084			(__apple_build_version__ >= (apple_version))
16085			# else
16086			# define CLANG_PREREQ(major, minor, apple_version) \
16087			(__clang_major__ > (major) \|\| \
16088			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
16089			# endif
16090			# if !CLANG_PREREQ(3, 9, 8000000)
16091			# error "clang versions older than 3.9 are no longer supported"
16092			# endif
16093			#else
16094			# define CLANG_PREREQ(major, minor, apple_version) 0
16095			#endif
16096			#ifdef _MSC_VER
16097			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
16098			# if !MSVC_PREREQ(1900)
16099			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
16100			# endif
16101			#else
16102			# define MSVC_PREREQ(version) 0
16103			#endif
16104
16105
16106			#ifndef __has_attribute
16107			# define __has_attribute(attribute) 0
16108			#endif
16109
16110
16111			#ifndef __has_builtin
16112			# define __has_builtin(builtin) 0
16113			#endif
16114
16115
16116			#ifdef _MSC_VER
16117			# define inline __inline
16118			#endif
16119
16120
16121			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
16122			# define forceinline inline __attribute__((always_inline))
16123			#elif defined(_MSC_VER)
16124			# define forceinline __forceinline
16125			#else
16126			# define forceinline inline
16127			#endif
16128
16129
16130			#if defined(__GNUC__) \|\| __has_attribute(unused)
16131			# define MAYBE_UNUSED __attribute__((unused))
16132			#else
16133			# define MAYBE_UNUSED
16134			#endif
16135
16136
16137			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
16138			# define NORETURN __attribute__((noreturn))
16139			#else
16140			# define NORETURN
16141			#endif
16142
16143
16144			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
16145			# if defined(__GNUC__) \|\| defined(__clang__)
16146			# define restrict __restrict__
16147			# else
16148			# define restrict
16149			# endif
16150			#endif
16151
16152
16153			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
16154			# define likely(expr) __builtin_expect(!!(expr), 1)
16155			#else
16156			# define likely(expr) (expr)
16157			#endif
16158
16159
16160			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
16161			# define unlikely(expr) __builtin_expect(!!(expr), 0)
16162			#else
16163			# define unlikely(expr) (expr)
16164			#endif
16165
16166
16167			#undef prefetchr
16168			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
16169			# define prefetchr(addr) __builtin_prefetch((addr), 0)
16170			#elif defined(_MSC_VER)
16171			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
16172			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
16173			# elif defined(ARCH_ARM64)
16174			# define prefetchr(addr) __prefetch2((addr), 0x00 )
16175			# elif defined(ARCH_ARM32)
16176			# define prefetchr(addr) __prefetch(addr)
16177			# endif
16178			#endif
16179			#ifndef prefetchr
16180			# define prefetchr(addr)
16181			#endif
16182
16183
16184			#undef prefetchw
16185			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
16186			# define prefetchw(addr) __builtin_prefetch((addr), 1)
16187			#elif defined(_MSC_VER)
16188			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
16189			# define prefetchw(addr) _m_prefetchw(addr)
16190			# elif defined(ARCH_ARM64)
16191			# define prefetchw(addr) __prefetch2((addr), 0x10 )
16192			# elif defined(ARCH_ARM32)
16193			# define prefetchw(addr) __prefetchw(addr)
16194			# endif
16195			#endif
16196			#ifndef prefetchw
16197			# define prefetchw(addr)
16198			#endif
16199
16200
16201			#undef _aligned_attribute
16202			#if defined(__GNUC__) \|\| __has_attribute(aligned)
16203			# define _aligned_attribute(n) __attribute__((aligned(n)))
16204			#elif defined(_MSC_VER)
16205			# define _aligned_attribute(n) __declspec(align(n))
16206			#endif
16207
16208
16209			#if defined(__GNUC__) \|\| __has_attribute(target)
16210			# define _target_attribute(attrs) __attribute__((target(attrs)))
16211			#else
16212			# define _target_attribute(attrs)
16213			#endif
16214
16215
16216
16217
16218
16219			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
16220			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
16221			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
16222			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
16223			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
16224			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
16225			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
16226
16227
16228
16229
16230
16231
16232			#if defined(__BYTE_ORDER__)
16233			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
16234			#elif defined(_MSC_VER)
16235			# define CPU_IS_LITTLE_ENDIAN() true
16236			#else
16237			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
16238			{
16239			union {
16240			u32 w;
16241			u8 b;
16242			} u;
16243
16244			u.w = 1;
16245			return u.b;
16246			}
16247			#endif
16248
16249
16250			static forceinline u16 bswap16(u16 v)
16251			{
16252			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
16253			return __builtin_bswap16(v);
16254			#elif defined(_MSC_VER)
16255			return _byteswap_ushort(v);
16256			#else
16257			return (v << 8) \| (v >> 8);
16258			#endif
16259			}
16260
16261
16262			static forceinline u32 bswap32(u32 v)
16263			{
16264			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
16265			return __builtin_bswap32(v);
16266			#elif defined(_MSC_VER)
16267			return _byteswap_ulong(v);
16268			#else
16269			return ((v & 0x000000FF) << 24) \|
16270			((v & 0x0000FF00) << 8) \|
16271			((v & 0x00FF0000) >> 8) \|
16272			((v & 0xFF000000) >> 24);
16273			#endif
16274			}
16275
16276
16277			static forceinline u64 bswap64(u64 v)
16278			{
16279			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
16280			return __builtin_bswap64(v);
16281			#elif defined(_MSC_VER)
16282			return _byteswap_uint64(v);
16283			#else
16284			return ((v & 0x00000000000000FF) << 56) \|
16285			((v & 0x000000000000FF00) << 40) \|
16286			((v & 0x0000000000FF0000) << 24) \|
16287			((v & 0x00000000FF000000) << 8) \|
16288			((v & 0x000000FF00000000) >> 8) \|
16289			((v & 0x0000FF0000000000) >> 24) \|
16290			((v & 0x00FF000000000000) >> 40) \|
16291			((v & 0xFF00000000000000) >> 56);
16292			#endif
16293			}
16294
16295			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
16296			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
16297			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
16298			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
16299			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
16300			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
16301
16302
16303
16304
16305
16306
16307			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
16308			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
16309			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
16310			defined(__riscv_misaligned_fast) \|\| \
16311			defined(__wasm__))
16312			# define UNALIGNED_ACCESS_IS_FAST 1
16313			#elif defined(_MSC_VER)
16314			# define UNALIGNED_ACCESS_IS_FAST 1
16315			#else
16316			# define UNALIGNED_ACCESS_IS_FAST 0
16317			#endif
16318
16319
16320
16321			#ifdef FREESTANDING
16322			# define MEMCOPY __builtin_memcpy
16323			#else
16324			# define MEMCOPY memcpy
16325			#endif
16326
16327
16328
16329			#define DEFINE_UNALIGNED_TYPE(type) \
16330			static forceinline type \
16331			load_##type##_unaligned(const void *p) \
16332			{ \
16333			type v; \
16334			\
16335			MEMCOPY(&v, p, sizeof(v)); \
16336			return v; \
16337			} \
16338			\
16339			static forceinline void \
16340			store_##type##_unaligned(type v, void *p) \
16341			{ \
16342			MEMCOPY(p, &v, sizeof(v)); \
16343			}
16344
16345			DEFINE_UNALIGNED_TYPE(u16)
16346			DEFINE_UNALIGNED_TYPE(u32)
16347			DEFINE_UNALIGNED_TYPE(u64)
16348			DEFINE_UNALIGNED_TYPE(machine_word_t)
16349
16350			#undef MEMCOPY
16351
16352			#define load_word_unaligned load_machine_word_t_unaligned
16353			#define store_word_unaligned store_machine_word_t_unaligned
16354
16355
16356
16357			static forceinline u16
16358			get_unaligned_le16(const u8 *p)
16359			{
16360			if (UNALIGNED_ACCESS_IS_FAST)
16361			return le16_bswap(load_u16_unaligned(p));
16362			else
16363			return ((u16)p[1] << 8) \| p[0];
16364			}
16365
16366			static forceinline u16
16367			get_unaligned_be16(const u8 *p)
16368			{
16369			if (UNALIGNED_ACCESS_IS_FAST)
16370			return be16_bswap(load_u16_unaligned(p));
16371			else
16372			return ((u16)p[0] << 8) \| p[1];
16373			}
16374
16375			static forceinline u32
16376			get_unaligned_le32(const u8 *p)
16377			{
16378			if (UNALIGNED_ACCESS_IS_FAST)
16379			return le32_bswap(load_u32_unaligned(p));
16380			else
16381			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
16382			((u32)p[1] << 8) \| p[0];
16383			}
16384
16385			static forceinline u32
16386			get_unaligned_be32(const u8 *p)
16387			{
16388			if (UNALIGNED_ACCESS_IS_FAST)
16389			return be32_bswap(load_u32_unaligned(p));
16390			else
16391			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
16392			((u32)p[2] << 8) \| p[3];
16393			}
16394
16395			static forceinline u64
16396			get_unaligned_le64(const u8 *p)
16397			{
16398			if (UNALIGNED_ACCESS_IS_FAST)
16399			return le64_bswap(load_u64_unaligned(p));
16400			else
16401			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
16402			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
16403			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
16404			((u64)p[1] << 8) \| p[0];
16405			}
16406
16407			static forceinline machine_word_t
16408			get_unaligned_leword(const u8 *p)
16409			{
16410			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
16411			if (WORDBITS == 32)
16412			return get_unaligned_le32(p);
16413			else
16414			return get_unaligned_le64(p);
16415			}
16416
16417
16418
16419			static forceinline void
16420			put_unaligned_le16(u16 v, u8 *p)
16421			{
16422			if (UNALIGNED_ACCESS_IS_FAST) {
16423			store_u16_unaligned(le16_bswap(v), p);
16424			} else {
16425			p[0] = (u8)(v >> 0);
16426			p[1] = (u8)(v >> 8);
16427			}
16428			}
16429
16430			static forceinline void
16431			put_unaligned_be16(u16 v, u8 *p)
16432			{
16433			if (UNALIGNED_ACCESS_IS_FAST) {
16434			store_u16_unaligned(be16_bswap(v), p);
16435			} else {
16436			p[0] = (u8)(v >> 8);
16437			p[1] = (u8)(v >> 0);
16438			}
16439			}
16440
16441			static forceinline void
16442			put_unaligned_le32(u32 v, u8 *p)
16443			{
16444			if (UNALIGNED_ACCESS_IS_FAST) {
16445			store_u32_unaligned(le32_bswap(v), p);
16446			} else {
16447			p[0] = (u8)(v >> 0);
16448			p[1] = (u8)(v >> 8);
16449			p[2] = (u8)(v >> 16);
16450			p[3] = (u8)(v >> 24);
16451			}
16452			}
16453
16454			static forceinline void
16455			put_unaligned_be32(u32 v, u8 *p)
16456			{
16457			if (UNALIGNED_ACCESS_IS_FAST) {
16458			store_u32_unaligned(be32_bswap(v), p);
16459			} else {
16460			p[0] = (u8)(v >> 24);
16461			p[1] = (u8)(v >> 16);
16462			p[2] = (u8)(v >> 8);
16463			p[3] = (u8)(v >> 0);
16464			}
16465			}
16466
16467			static forceinline void
16468			put_unaligned_le64(u64 v, u8 *p)
16469			{
16470			if (UNALIGNED_ACCESS_IS_FAST) {
16471			store_u64_unaligned(le64_bswap(v), p);
16472			} else {
16473			p[0] = (u8)(v >> 0);
16474			p[1] = (u8)(v >> 8);
16475			p[2] = (u8)(v >> 16);
16476			p[3] = (u8)(v >> 24);
16477			p[4] = (u8)(v >> 32);
16478			p[5] = (u8)(v >> 40);
16479			p[6] = (u8)(v >> 48);
16480			p[7] = (u8)(v >> 56);
16481			}
16482			}
16483
16484			static forceinline void
16485			put_unaligned_leword(machine_word_t v, u8 *p)
16486			{
16487			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
16488			if (WORDBITS == 32)
16489			put_unaligned_le32(v, p);
16490			else
16491			put_unaligned_le64(v, p);
16492			}
16493
16494
16495
16496
16497
16498
16499
16500			static forceinline unsigned
16501			bsr32(u32 v)
16502			{
16503			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
16504			return 31 - __builtin_clz(v);
16505			#elif defined(_MSC_VER)
16506			unsigned long i;
16507
16508			_BitScanReverse(&i, v);
16509			return i;
16510			#else
16511			unsigned i = 0;
16512
16513			while ((v >>= 1) != 0)
16514			i++;
16515			return i;
16516			#endif
16517			}
16518
16519			static forceinline unsigned
16520			bsr64(u64 v)
16521			{
16522			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
16523			return 63 - __builtin_clzll(v);
16524			#elif defined(_MSC_VER) && defined(_WIN64)
16525			unsigned long i;
16526
16527			_BitScanReverse64(&i, v);
16528			return i;
16529			#else
16530			unsigned i = 0;
16531
16532			while ((v >>= 1) != 0)
16533			i++;
16534			return i;
16535			#endif
16536			}
16537
16538			static forceinline unsigned
16539			bsrw(machine_word_t v)
16540			{
16541			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
16542			if (WORDBITS == 32)
16543			return bsr32(v);
16544			else
16545			return bsr64(v);
16546			}
16547
16548
16549
16550			static forceinline unsigned
16551			bsf32(u32 v)
16552			{
16553			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
16554			return __builtin_ctz(v);
16555			#elif defined(_MSC_VER)
16556			unsigned long i;
16557
16558			_BitScanForward(&i, v);
16559			return i;
16560			#else
16561			unsigned i = 0;
16562
16563			for (; (v & 1) == 0; v >>= 1)
16564			i++;
16565			return i;
16566			#endif
16567			}
16568
16569			static forceinline unsigned
16570			bsf64(u64 v)
16571			{
16572			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
16573			return __builtin_ctzll(v);
16574			#elif defined(_MSC_VER) && defined(_WIN64)
16575			unsigned long i;
16576
16577			_BitScanForward64(&i, v);
16578			return i;
16579			#else
16580			unsigned i = 0;
16581
16582			for (; (v & 1) == 0; v >>= 1)
16583			i++;
16584			return i;
16585			#endif
16586			}
16587
16588			static forceinline unsigned
16589			bsfw(machine_word_t v)
16590			{
16591			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
16592			if (WORDBITS == 32)
16593			return bsf32(v);
16594			else
16595			return bsf64(v);
16596			}
16597
16598
16599			#undef rbit32
16600			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
16601			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
16602			static forceinline u32
16603			rbit32(u32 v)
16604			{
16605			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
16606			return v;
16607			}
16608			#define rbit32 rbit32
16609			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
16610			static forceinline u32
16611			rbit32(u32 v)
16612			{
16613			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
16614			return v;
16615			}
16616			#define rbit32 rbit32
16617			#endif
16618
16619			#endif
16620
16621
16622			typedef void (malloc_func_t)(size_t);
16623			typedef void (free_func_t)(void );
16624
16625			extern malloc_func_t libdeflate_default_malloc_func;
16626			extern free_func_t libdeflate_default_free_func;
16627
16628			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
16629			size_t alignment, size_t size);
16630			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
16631
16632			#ifdef FREESTANDING
16633
16634			void memset(void s, int c, size_t n);
16635			#define memset(s, c, n) __builtin_memset((s), (c), (n))
16636
16637			void memcpy(void dest, const void *src, size_t n);
16638			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
16639
16640			void memmove(void dest, const void *src, size_t n);
16641			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
16642
16643			int memcmp(const void s1, const void s2, size_t n);
16644			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
16645
16646			#undef LIBDEFLATE_ENABLE_ASSERTIONS
16647			#else
16648			# include
16649
16650			# ifdef __clang_analyzer__
16651			# define LIBDEFLATE_ENABLE_ASSERTIONS
16652			# endif
16653			#endif
16654
16655
16656			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
16657			NORETURN void
16658			libdeflate_assertion_failed(const char expr, const char file, int line);
16659			#define ASSERT(expr) { if (unlikely(!(expr))) \
16660			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
16661			#else
16662			#define ASSERT(expr) (void)(expr)
16663			#endif
16664
16665			#define CONCAT_IMPL(a, b) a##b
16666			#define CONCAT(a, b) CONCAT_IMPL(a, b)
16667			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
16668
16669			#endif
16670
16671
16672			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
16673
16674			#define X86_CPU_FEATURE_SSE2 (1 << 0)
16675			#define X86_CPU_FEATURE_PCLMULQDQ (1 << 1)
16676			#define X86_CPU_FEATURE_AVX (1 << 2)
16677			#define X86_CPU_FEATURE_AVX2 (1 << 3)
16678			#define X86_CPU_FEATURE_BMI2 (1 << 4)
16679
16680			#define X86_CPU_FEATURE_ZMM (1 << 5)
16681			#define X86_CPU_FEATURE_AVX512BW (1 << 6)
16682			#define X86_CPU_FEATURE_AVX512VL (1 << 7)
16683			#define X86_CPU_FEATURE_VPCLMULQDQ (1 << 8)
16684			#define X86_CPU_FEATURE_AVX512VNNI (1 << 9)
16685			#define X86_CPU_FEATURE_AVXVNNI (1 << 10)
16686
16687			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
16688
16689			# define X86_CPU_FEATURES_KNOWN (1U << 31)
16690			extern volatile u32 libdeflate_x86_cpu_features;
16691
16692			void libdeflate_init_x86_cpu_features(void);
16693
16694			static inline u32 get_x86_cpu_features(void)
16695			{
16696			if (libdeflate_x86_cpu_features == 0)
16697			libdeflate_init_x86_cpu_features();
16698			return libdeflate_x86_cpu_features;
16699			}
16700
16701			# include
16702			# if defined(_MSC_VER) && defined(__clang__)
16703			# include
16704			# include
16705			# include
16706			# include
16707			# include
16708			# include
16709			# include
16710			# include
16711			# if __has_include()
16712			# include
16713			# endif
16714			# if __has_include()
16715			# include
16716			# endif
16717			# if __has_include()
16718			# include
16719			# endif
16720			# if __has_include()
16721			# include
16722			# endif
16723			# if __has_include()
16724			# include
16725			# endif
16726			# endif
16727			#else
16728			static inline u32 get_x86_cpu_features(void) { return 0; }
16729			#endif
16730
16731			#if defined(__SSE2__) \|\| \
16732			(defined(_MSC_VER) && \
16733			(defined(ARCH_X86_64) \|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
16734			# define HAVE_SSE2(features) 1
16735			# define HAVE_SSE2_NATIVE 1
16736			#else
16737			# define HAVE_SSE2(features) ((features) & X86_CPU_FEATURE_SSE2)
16738			# define HAVE_SSE2_NATIVE 0
16739			#endif
16740
16741			#if (defined(__PCLMUL__) && defined(__SSE4_1__)) \|\| \
16742			(defined(_MSC_VER) && defined(__AVX2__))
16743			# define HAVE_PCLMULQDQ(features) 1
16744			#else
16745			# define HAVE_PCLMULQDQ(features) ((features) & X86_CPU_FEATURE_PCLMULQDQ)
16746			#endif
16747
16748			#ifdef __AVX__
16749			# define HAVE_AVX(features) 1
16750			#else
16751			# define HAVE_AVX(features) ((features) & X86_CPU_FEATURE_AVX)
16752			#endif
16753
16754			#ifdef __AVX2__
16755			# define HAVE_AVX2(features) 1
16756			#else
16757			# define HAVE_AVX2(features) ((features) & X86_CPU_FEATURE_AVX2)
16758			#endif
16759
16760			#if defined(__BMI2__) \|\| (defined(_MSC_VER) && defined(__AVX2__))
16761			# define HAVE_BMI2(features) 1
16762			# define HAVE_BMI2_NATIVE 1
16763			#else
16764			# define HAVE_BMI2(features) ((features) & X86_CPU_FEATURE_BMI2)
16765			# define HAVE_BMI2_NATIVE 0
16766			#endif
16767
16768			#ifdef __AVX512BW__
16769			# define HAVE_AVX512BW(features) 1
16770			#else
16771			# define HAVE_AVX512BW(features) ((features) & X86_CPU_FEATURE_AVX512BW)
16772			#endif
16773
16774			#ifdef __AVX512VL__
16775			# define HAVE_AVX512VL(features) 1
16776			#else
16777			# define HAVE_AVX512VL(features) ((features) & X86_CPU_FEATURE_AVX512VL)
16778			#endif
16779
16780			#ifdef __VPCLMULQDQ__
16781			# define HAVE_VPCLMULQDQ(features) 1
16782			#else
16783			# define HAVE_VPCLMULQDQ(features) ((features) & X86_CPU_FEATURE_VPCLMULQDQ)
16784			#endif
16785
16786			#ifdef __AVX512VNNI__
16787			# define HAVE_AVX512VNNI(features) 1
16788			#else
16789			# define HAVE_AVX512VNNI(features) ((features) & X86_CPU_FEATURE_AVX512VNNI)
16790			#endif
16791
16792			#ifdef __AVXVNNI__
16793			# define HAVE_AVXVNNI(features) 1
16794			#else
16795			# define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI)
16796			#endif
16797
16798			#endif
16799
16800			#endif
16801
16802
16803			#ifdef __AVX2__
16804			static forceinline void
16805			matchfinder_init_avx2(mf_pos_t *data, size_t size)
16806			{
16807			__m256i p = (__m256i )data;
16808			__m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
16809
16810			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
16811			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
16812			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
16813
16814			do {
16815			p[0] = v;
16816			p[1] = v;
16817			p[2] = v;
16818			p[3] = v;
16819			p += 4;
16820			size -= 4 * sizeof(*p);
16821			} while (size != 0);
16822			}
16823			#define matchfinder_init matchfinder_init_avx2
16824
16825			static forceinline void
16826			matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
16827			{
16828			__m256i p = (__m256i )data;
16829			__m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
16830
16831			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
16832			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
16833			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
16834
16835			do {
16836
16837			p[0] = _mm256_adds_epi16(p[0], v);
16838			p[1] = _mm256_adds_epi16(p[1], v);
16839			p[2] = _mm256_adds_epi16(p[2], v);
16840			p[3] = _mm256_adds_epi16(p[3], v);
16841			p += 4;
16842			size -= 4 * sizeof(*p);
16843			} while (size != 0);
16844			}
16845			#define matchfinder_rebase matchfinder_rebase_avx2
16846
16847			#elif HAVE_SSE2_NATIVE
16848			static forceinline void
16849			matchfinder_init_sse2(mf_pos_t *data, size_t size)
16850			{
16851	37		__m128i p = (__m128i )data;
16852	37		__m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
16853
16854			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
16855			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
16856			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
16857
16858			do {
16859	138240		p[0] = v;
16860	138240		p[1] = v;
16861	138240		p[2] = v;
16862	138240		p[3] = v;
16863	138240		p += 4;
16864	138240		size -= 4 * sizeof(*p);
16865	138240	100	} while (size != 0);
		100
		100
		100
		100
16866	37		}
16867			#define matchfinder_init matchfinder_init_sse2
16868
16869			static forceinline void
16870			matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
16871			{
16872	0		__m128i p = (__m128i )data;
16873	0		__m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
16874
16875			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
16876			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
16877			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
16878
16879			do {
16880
16881	0		p[0] = _mm_adds_epi16(p[0], v);
16882	0		p[1] = _mm_adds_epi16(p[1], v);
16883	0		p[2] = _mm_adds_epi16(p[2], v);
16884	0		p[3] = _mm_adds_epi16(p[3], v);
16885	0		p += 4;
16886	0		size -= 4 * sizeof(*p);
16887	0	0	} while (size != 0);
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
16888	0		}
16889			#define matchfinder_rebase matchfinder_rebase_sse2
16890			#endif
16891
16892			#endif
16893
16894			# endif
16895			#else
16896			# define MATCHFINDER_ALIGNED
16897			#endif
16898
16899
16900			#ifndef matchfinder_init
16901			static forceinline void
16902			matchfinder_init(mf_pos_t *data, size_t size)
16903			{
16904			size_t num_entries = size / sizeof(*data);
16905			size_t i;
16906
16907			for (i = 0; i < num_entries; i++)
16908			data[i] = MATCHFINDER_INITVAL;
16909			}
16910			#endif
16911
16912
16913			#ifndef matchfinder_rebase
16914			static forceinline void
16915			matchfinder_rebase(mf_pos_t *data, size_t size)
16916			{
16917			size_t num_entries = size / sizeof(*data);
16918			size_t i;
16919
16920			if (MATCHFINDER_WINDOW_SIZE == 32768) {
16921
16922			for (i = 0; i < num_entries; i++)
16923			data[i] = 0x8000 \| (data[i] & ~(data[i] >> 15));
16924			} else {
16925			for (i = 0; i < num_entries; i++) {
16926			if (data[i] >= 0)
16927			data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
16928			else
16929			data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
16930			}
16931			}
16932			}
16933			#endif
16934
16935
16936			static forceinline u32
16937			lz_hash(u32 seq, unsigned num_bits)
16938			{
16939	167673		return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
16940			}
16941
16942
16943			static forceinline u32
16944			lz_extend(const u8 * const strptr, const u8 * const matchptr,
16945			const u32 start_len, const u32 max_len)
16946			{
16947	21		u32 len = start_len;
16948			machine_word_t v_word;
16949
16950			if (UNALIGNED_ACCESS_IS_FAST) {
16951
16952	17924	50	if (likely(max_len - len >= 4 * WORDBYTES)) {
		100
		0
		50
		0
		0
		0
		0
		100
		50
		100
		50
		0
		0
		0
		50
		50
		0
		0
16953
16954			#define COMPARE_WORD_STEP \
16955			v_word = load_word_unaligned(&matchptr[len]) ^ \
16956			load_word_unaligned(&strptr[len]); \
16957			if (v_word != 0) \
16958			goto word_differs; \
16959			len += WORDBYTES; \
16960
16961	53004	50	COMPARE_WORD_STEP
		50
		0
		50
		0
		0
		0
		0
		100
		100
		100
		100
		0
		0
		0
		50
		50
		0
		0
16962	48231	50	COMPARE_WORD_STEP
		50
		0
		50
		0
		0
		0
		0
		100
		100
		100
		100
		0
		0
		0
		50
		50
		0
		0
16963	46488	50	COMPARE_WORD_STEP
		50
		0
		50
		0
		0
		0
		0
		100
		100
		100
		100
		0
		0
		0
		50
		50
		0
		0
16964	45558	50	COMPARE_WORD_STEP
		50
		0
		50
		0
		0
		0
		0
		100
		100
		50
		100
		0
		0
		0
		50
		50
		0
		0
16965			#undef COMPARE_WORD_STEP
16966			}
16967
16968	241392	100	while (len + WORDBYTES <= max_len) {
		100
		0
		100
		0
		0
		0
		0
		100
		100
		50
		50
		0
		0
		0
		100
		100
		0
		0
16969	226100		v_word = load_word_unaligned(&matchptr[len]) ^
16970	226100		load_word_unaligned(&strptr[len]);
16971	226100	50	if (v_word != 0)
		50
		0
		50
		0
		0
		0
		0
		100
		100
		100
		100
		0
		0
		0
		50
		50
		0
		0
16972	90		goto word_differs;
16973	226010		len += WORDBYTES;
16974			}
16975			}
16976
16977	58096	100	while (len < max_len && matchptr[len] == strptr[len])
		50
		100
		50
		0
		0
		100
		50
		0
		0
		0
		0
		0
		0
		0
		0
		100
		50
		100
		50
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		100
		50
		100
		50
		0
		0
		0
		0
16978	42804		len++;
16979	15292		return len;
16980
16981	2632		word_differs:
16982			if (CPU_IS_LITTLE_ENDIAN())
16983	2632		len += (bsfw(v_word) >> 3);
16984			else
16985			len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
16986	2632		return len;
16987			}
16988
16989			#endif
16990
16991
16992			#define HC_MATCHFINDER_HASH3_ORDER 15
16993			#define HC_MATCHFINDER_HASH4_ORDER 16
16994
16995			#define HC_MATCHFINDER_TOTAL_HASH_SIZE \
16996			(((1UL << HC_MATCHFINDER_HASH3_ORDER) + \
16997			(1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
16998
16999			struct MATCHFINDER_ALIGNED hc_matchfinder {
17000
17001
17002			mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
17003
17004
17005			mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
17006
17007
17008			mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
17009			};
17010
17011
17012			static forceinline void
17013			hc_matchfinder_init(struct hc_matchfinder *mf)
17014			{
17015			STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
17016			MATCHFINDER_SIZE_ALIGNMENT == 0);
17017
17018			matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
17019	25		}
17020
17021			static forceinline void
17022			hc_matchfinder_slide_window(struct hc_matchfinder *mf)
17023			{
17024			STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
17025
17026			matchfinder_rebase((mf_pos_t )mf, sizeof(mf));
17027	0		}
17028
17029
17030			static forceinline u32
17031			hc_matchfinder_longest_match(struct hc_matchfinder * const mf,
17032			const u8 ** const in_base_p,
17033			const u8 * const in_next,
17034			u32 best_len,
17035			const u32 max_len,
17036			const u32 nice_len,
17037			const u32 max_search_depth,
17038			u32 * const next_hashes,
17039			u32 * const offset_ret)
17040			{
17041	7046		u32 depth_remaining = max_search_depth;
17042	7046		const u8 *best_matchptr = in_next;
17043			mf_pos_t cur_node3, cur_node4;
17044			u32 hash3, hash4;
17045			u32 next_hashseq;
17046			u32 seq4;
17047			const u8 *matchptr;
17048			u32 len;
17049	7046		u32 cur_pos = in_next - *in_base_p;
17050			const u8 *in_base;
17051			mf_pos_t cutoff;
17052
17053	7046		if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
17054			hc_matchfinder_slide_window(mf);
17055	0		*in_base_p += MATCHFINDER_WINDOW_SIZE;
17056	0		cur_pos = 0;
17057			}
17058
17059	7046		in_base = *in_base_p;
17060	7046		cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
17061
17062	7046	50	if (unlikely(max_len < 5))
		0
		0
		50
		50
		0
		50
17063	0		goto out;
17064
17065
17066	7046		hash3 = next_hashes[0];
17067	7046		hash4 = next_hashes[1];
17068
17069
17070	7046		cur_node3 = mf->hash3_tab[hash3];
17071	7046		cur_node4 = mf->hash4_tab[hash4];
17072
17073
17074	7046		mf->hash3_tab[hash3] = cur_pos;
17075
17076
17077	7046		mf->hash4_tab[hash4] = cur_pos;
17078	7046		mf->next_tab[cur_pos] = cur_node4;
17079
17080
17081	7046		next_hashseq = get_unaligned_le32(in_next + 1);
17082	7046		next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
17083	7046		next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
17084	7046		prefetchw(&mf->hash3_tab[next_hashes[0]]);
17085	7046		prefetchw(&mf->hash4_tab[next_hashes[1]]);
17086
17087	7046		if (best_len < 4) {
17088
17089
17090
17091	4747	0	if (cur_node3 <= cutoff)
		0
		0
		100
		50
		0
		0
17092	1888		goto out;
17093
17094	2859		seq4 = load_u32_unaligned(in_next);
17095
17096	2859	0	if (best_len < 3) {
		0
		0
		50
		50
		0
		0
17097	0		matchptr = &in_base[cur_node3];
17098	0	0	if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
		0
		0
		0
		0
		0
		0
17099	0		best_len = 3;
17100	0		best_matchptr = matchptr;
17101			}
17102			}
17103
17104
17105
17106	2859	0	if (cur_node4 <= cutoff)
		0
		0
		100
		100
		0
		0
17107	855		goto out;
17108
17109			for (;;) {
17110
17111	2180		matchptr = &in_base[cur_node4];
17112
17113	2180	0	if (load_u32_unaligned(matchptr) == seq4)
		0
		0
		100
		100
		0
		0
17114	1964		break;
17115
17116
17117	216		cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
17118	216	0	if (cur_node4 <= cutoff \|\| !--depth_remaining)
		0
		0
		0
		0
		0
		100
		100
		100
		50
		0
		0
		0
		0
17119	40		goto out;
17120			}
17121
17122
17123	1964		best_matchptr = matchptr;
17124	1964		best_len = lz_extend(in_next, best_matchptr, 4, max_len);
17125	1964	0	if (best_len >= nice_len)
		0
		0
		100
		50
		0
		0
17126	1		goto out;
17127	1963		cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
17128	1963	0	if (cur_node4 <= cutoff \|\| !--depth_remaining)
		0
		0
		0
		0
		0
		100
		50
		100
		50
		0
		0
		0
		0
17129	400		goto out;
17130			} else {
17131	2299	100	if (cur_node4 <= cutoff \|\| best_len >= nice_len)
		50
		0
		0
		0
		0
		100
		50
		50
		50
		0
		0
		100
		50
17132	432		goto out;
17133			}
17134
17135
17136
17137	604		for (;;) {
17138			for (;;) {
17139	36147		matchptr = &in_base[cur_node4];
17140
17141
17142			#if UNALIGNED_ACCESS_IS_FAST
17143	36147		if ((load_u32_unaligned(matchptr + best_len - 3) ==
17144	72294	50	load_u32_unaligned(in_next + best_len - 3)) &&
		0
		0
		100
		100
		0
		50
17145	837		(load_u32_unaligned(matchptr) ==
17146	837	50	load_u32_unaligned(in_next)))
		0
		0
		50
		50
		0
		50
17147			#else
17148			if (matchptr[best_len] == in_next[best_len])
17149			#endif
17150	837		break;
17151
17152
17153	35310		cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
17154	35310	0	if (cur_node4 <= cutoff \|\| !--depth_remaining)
		0
		0
		0
		0
		0
		100
		100
		100
		100
		0
		0
		0
		0
17155	3197		goto out;
17156			}
17157
17158			#if UNALIGNED_ACCESS_IS_FAST
17159	837		len = 4;
17160			#else
17161			len = 0;
17162			#endif
17163	837		len = lz_extend(in_next, matchptr, len, max_len);
17164	837	50	if (len > best_len) {
		0
		0
		100
		100
		0
		50
17165
17166	831		best_len = len;
17167	831		best_matchptr = matchptr;
17168	831	50	if (best_len >= nice_len)
		0
		0
		100
		50
		0
		50
17169	168		goto out;
17170			}
17171
17172
17173	669		cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
17174	669	0	if (cur_node4 <= cutoff \|\| !--depth_remaining)
		0
		0
		0
		0
		0
		100
		100
		100
		50
		0
		0
		0
		0
17175	65		goto out;
17176			}
17177	7046		out:
17178	7046		*offset_ret = in_next - best_matchptr;
17179	7046		return best_len;
17180			}
17181
17182
17183			static forceinline void
17184			hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf,
17185			const u8 ** const in_base_p,
17186			const u8 *in_next,
17187			const u8 * const in_end,
17188			const u32 count,
17189			u32 * const next_hashes)
17190			{
17191			u32 cur_pos;
17192			u32 hash3, hash4;
17193			u32 next_hashseq;
17194	2070		u32 remaining = count;
17195
17196	2070		if (unlikely(count + 5 > in_end - in_next))
17197	25		return;
17198
17199	2045		cur_pos = in_next - *in_base_p;
17200	2045		hash3 = next_hashes[0];
17201	2045		hash4 = next_hashes[1];
17202			do {
17203	59176	50	if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
		0
		0
		50
		0
		50
		50
17204			hc_matchfinder_slide_window(mf);
17205	0		*in_base_p += MATCHFINDER_WINDOW_SIZE;
17206	0		cur_pos = 0;
17207			}
17208	59176		mf->hash3_tab[hash3] = cur_pos;
17209	59176		mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
17210	59176		mf->hash4_tab[hash4] = cur_pos;
17211
17212	59176		next_hashseq = get_unaligned_le32(++in_next);
17213	118352		hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
17214	59176		hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
17215	59176		cur_pos++;
17216	59176	100	} while (--remaining);
		0
		0
		100
		0
		100
		100
17217
17218	2045		prefetchw(&mf->hash3_tab[hash3]);
17219	2045		prefetchw(&mf->hash4_tab[hash4]);
17220	2045		next_hashes[0] = hash3;
17221	2045		next_hashes[1] = hash4;
17222			}
17223
17224			#endif
17225
17226			/* #include "ht_matchfinder.h" */
17227
17228
17229			#ifndef LIB_HT_MATCHFINDER_H
17230			#define LIB_HT_MATCHFINDER_H
17231
17232			/* #include "matchfinder_common.h" */
17233
17234
17235			#ifndef LIB_MATCHFINDER_COMMON_H
17236			#define LIB_MATCHFINDER_COMMON_H
17237
17238			/* #include "lib_common.h" */
17239
17240
17241			#ifndef LIB_LIB_COMMON_H
17242			#define LIB_LIB_COMMON_H
17243
17244			#ifdef LIBDEFLATE_H
17245
17246			# error "lib_common.h must always be included before libdeflate.h"
17247			#endif
17248
17249			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
17250			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
17251			#elif defined(__GNUC__)
17252			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
17253			#else
17254			# define LIBDEFLATE_EXPORT_SYM
17255			#endif
17256
17257
17258			#if defined(__GNUC__) && defined(__i386__)
17259			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
17260			#else
17261			# define LIBDEFLATE_ALIGN_STACK
17262			#endif
17263
17264			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
17265
17266			/* #include "../common_defs.h" */
17267
17268
17269			#ifndef COMMON_DEFS_H
17270			#define COMMON_DEFS_H
17271
17272			/* #include "libdeflate.h" */
17273
17274
17275			#ifndef LIBDEFLATE_H
17276			#define LIBDEFLATE_H
17277
17278			#include
17279			#include
17280
17281			#ifdef __cplusplus
17282			extern "C" {
17283			#endif
17284
17285			#define LIBDEFLATE_VERSION_MAJOR 1
17286			#define LIBDEFLATE_VERSION_MINOR 25
17287			#define LIBDEFLATE_VERSION_STRING "1.25"
17288
17289
17290			#ifndef LIBDEFLATEAPI
17291			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
17292			# define LIBDEFLATEAPI __declspec(dllimport)
17293			# else
17294			# define LIBDEFLATEAPI
17295			# endif
17296			#endif
17297
17298
17299
17300
17301
17302			struct libdeflate_compressor;
17303			struct libdeflate_options;
17304
17305
17306			LIBDEFLATEAPI struct libdeflate_compressor *
17307			libdeflate_alloc_compressor(int compression_level);
17308
17309
17310			LIBDEFLATEAPI struct libdeflate_compressor *
17311			libdeflate_alloc_compressor_ex(int compression_level,
17312			const struct libdeflate_options *options);
17313
17314
17315			LIBDEFLATEAPI size_t
17316			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
17317			const void *in, size_t in_nbytes,
17318			void *out, size_t out_nbytes_avail);
17319
17320
17321			LIBDEFLATEAPI size_t
17322			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
17323			size_t in_nbytes);
17324
17325
17326			LIBDEFLATEAPI size_t
17327			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
17328			const void *in, size_t in_nbytes,
17329			void *out, size_t out_nbytes_avail);
17330
17331
17332			LIBDEFLATEAPI size_t
17333			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
17334			size_t in_nbytes);
17335
17336
17337			LIBDEFLATEAPI size_t
17338			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
17339			const void *in, size_t in_nbytes,
17340			void *out, size_t out_nbytes_avail);
17341
17342
17343			LIBDEFLATEAPI size_t
17344			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
17345			size_t in_nbytes);
17346
17347
17348			LIBDEFLATEAPI void
17349			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
17350
17351
17352
17353
17354
17355			struct libdeflate_decompressor;
17356			struct libdeflate_options;
17357
17358
17359			LIBDEFLATEAPI struct libdeflate_decompressor *
17360			libdeflate_alloc_decompressor(void);
17361
17362
17363			LIBDEFLATEAPI struct libdeflate_decompressor *
17364			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
17365
17366
17367			enum libdeflate_result {
17368
17369			LIBDEFLATE_SUCCESS = 0,
17370
17371
17372			LIBDEFLATE_BAD_DATA = 1,
17373
17374
17375			LIBDEFLATE_SHORT_OUTPUT = 2,
17376
17377
17378			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
17379			};
17380
17381
17382			LIBDEFLATEAPI enum libdeflate_result
17383			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
17384			const void *in, size_t in_nbytes,
17385			void *out, size_t out_nbytes_avail,
17386			size_t *actual_out_nbytes_ret);
17387
17388
17389			LIBDEFLATEAPI enum libdeflate_result
17390			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
17391			const void *in, size_t in_nbytes,
17392			void *out, size_t out_nbytes_avail,
17393			size_t *actual_in_nbytes_ret,
17394			size_t *actual_out_nbytes_ret);
17395
17396
17397			LIBDEFLATEAPI enum libdeflate_result
17398			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
17399			const void *in, size_t in_nbytes,
17400			void *out, size_t out_nbytes_avail,
17401			size_t *actual_out_nbytes_ret);
17402
17403
17404			LIBDEFLATEAPI enum libdeflate_result
17405			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
17406			const void *in, size_t in_nbytes,
17407			void *out, size_t out_nbytes_avail,
17408			size_t *actual_in_nbytes_ret,
17409			size_t *actual_out_nbytes_ret);
17410
17411
17412			LIBDEFLATEAPI enum libdeflate_result
17413			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
17414			const void *in, size_t in_nbytes,
17415			void *out, size_t out_nbytes_avail,
17416			size_t *actual_out_nbytes_ret);
17417
17418
17419			LIBDEFLATEAPI enum libdeflate_result
17420			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
17421			const void *in, size_t in_nbytes,
17422			void *out, size_t out_nbytes_avail,
17423			size_t *actual_in_nbytes_ret,
17424			size_t *actual_out_nbytes_ret);
17425
17426
17427			LIBDEFLATEAPI void
17428			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
17429
17430
17431
17432
17433
17434
17435			LIBDEFLATEAPI uint32_t
17436			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
17437
17438
17439
17440			LIBDEFLATEAPI uint32_t
17441			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
17442
17443
17444
17445
17446
17447
17448			LIBDEFLATEAPI void
17449			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
17450			void (free_func)(void ));
17451
17452
17453			struct libdeflate_options {
17454
17455
17456			size_t sizeof_options;
17457
17458
17459			void (malloc_func)(size_t);
17460			void (free_func)(void );
17461			};
17462
17463			#ifdef __cplusplus
17464			}
17465			#endif
17466
17467			#endif
17468
17469
17470			#include
17471			#include
17472			#include
17473			#ifdef _MSC_VER
17474			# include
17475			# include
17476
17477
17478			# pragma warning(disable : 4146)
17479
17480			# pragma warning(disable : 4018)
17481			# pragma warning(disable : 4244)
17482			# pragma warning(disable : 4267)
17483			# pragma warning(disable : 4310)
17484
17485			# pragma warning(disable : 4100)
17486			# pragma warning(disable : 4127)
17487			# pragma warning(disable : 4189)
17488			# pragma warning(disable : 4232)
17489			# pragma warning(disable : 4245)
17490			# pragma warning(disable : 4295)
17491			#endif
17492			#ifndef FREESTANDING
17493			# include
17494			#endif
17495
17496
17497
17498
17499
17500
17501			#undef ARCH_X86_64
17502			#undef ARCH_X86_32
17503			#undef ARCH_ARM64
17504			#undef ARCH_ARM32
17505			#undef ARCH_RISCV
17506			#ifdef _MSC_VER
17507
17508			# if defined(_M_X64) && !defined(_M_ARM64EC)
17509			# define ARCH_X86_64
17510			# elif defined(_M_IX86)
17511			# define ARCH_X86_32
17512			# elif defined(_M_ARM64)
17513			# define ARCH_ARM64
17514			# elif defined(_M_ARM)
17515			# define ARCH_ARM32
17516			# endif
17517			#else
17518			# if defined(__x86_64__)
17519			# define ARCH_X86_64
17520			# elif defined(__i386__)
17521			# define ARCH_X86_32
17522			# elif defined(__aarch64__)
17523			# define ARCH_ARM64
17524			# elif defined(__arm__)
17525			# define ARCH_ARM32
17526			# elif defined(__riscv)
17527			# define ARCH_RISCV
17528			# endif
17529			#endif
17530
17531
17532
17533
17534
17535
17536			typedef uint8_t u8;
17537			typedef uint16_t u16;
17538			typedef uint32_t u32;
17539			typedef uint64_t u64;
17540			typedef int8_t s8;
17541			typedef int16_t s16;
17542			typedef int32_t s32;
17543			typedef int64_t s64;
17544
17545
17546			#ifdef _MSC_VER
17547			# ifdef _WIN64
17548			typedef long long ssize_t;
17549			# else
17550			typedef long ssize_t;
17551			# endif
17552			#endif
17553
17554
17555			typedef size_t machine_word_t;
17556
17557
17558			#define WORDBYTES ((int)sizeof(machine_word_t))
17559
17560
17561			#define WORDBITS (8 * WORDBYTES)
17562
17563
17564
17565
17566
17567
17568			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
17569			# define GCC_PREREQ(major, minor) \
17570			(__GNUC__ > (major) \|\| \
17571			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
17572			# if !GCC_PREREQ(4, 9)
17573			# error "gcc versions older than 4.9 are no longer supported"
17574			# endif
17575			#else
17576			# define GCC_PREREQ(major, minor) 0
17577			#endif
17578			#ifdef __clang__
17579			# ifdef __apple_build_version__
17580			# define CLANG_PREREQ(major, minor, apple_version) \
17581			(__apple_build_version__ >= (apple_version))
17582			# else
17583			# define CLANG_PREREQ(major, minor, apple_version) \
17584			(__clang_major__ > (major) \|\| \
17585			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
17586			# endif
17587			# if !CLANG_PREREQ(3, 9, 8000000)
17588			# error "clang versions older than 3.9 are no longer supported"
17589			# endif
17590			#else
17591			# define CLANG_PREREQ(major, minor, apple_version) 0
17592			#endif
17593			#ifdef _MSC_VER
17594			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
17595			# if !MSVC_PREREQ(1900)
17596			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
17597			# endif
17598			#else
17599			# define MSVC_PREREQ(version) 0
17600			#endif
17601
17602
17603			#ifndef __has_attribute
17604			# define __has_attribute(attribute) 0
17605			#endif
17606
17607
17608			#ifndef __has_builtin
17609			# define __has_builtin(builtin) 0
17610			#endif
17611
17612
17613			#ifdef _MSC_VER
17614			# define inline __inline
17615			#endif
17616
17617
17618			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
17619			# define forceinline inline __attribute__((always_inline))
17620			#elif defined(_MSC_VER)
17621			# define forceinline __forceinline
17622			#else
17623			# define forceinline inline
17624			#endif
17625
17626
17627			#if defined(__GNUC__) \|\| __has_attribute(unused)
17628			# define MAYBE_UNUSED __attribute__((unused))
17629			#else
17630			# define MAYBE_UNUSED
17631			#endif
17632
17633
17634			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
17635			# define NORETURN __attribute__((noreturn))
17636			#else
17637			# define NORETURN
17638			#endif
17639
17640
17641			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
17642			# if defined(__GNUC__) \|\| defined(__clang__)
17643			# define restrict __restrict__
17644			# else
17645			# define restrict
17646			# endif
17647			#endif
17648
17649
17650			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
17651			# define likely(expr) __builtin_expect(!!(expr), 1)
17652			#else
17653			# define likely(expr) (expr)
17654			#endif
17655
17656
17657			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
17658			# define unlikely(expr) __builtin_expect(!!(expr), 0)
17659			#else
17660			# define unlikely(expr) (expr)
17661			#endif
17662
17663
17664			#undef prefetchr
17665			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
17666			# define prefetchr(addr) __builtin_prefetch((addr), 0)
17667			#elif defined(_MSC_VER)
17668			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
17669			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
17670			# elif defined(ARCH_ARM64)
17671			# define prefetchr(addr) __prefetch2((addr), 0x00 )
17672			# elif defined(ARCH_ARM32)
17673			# define prefetchr(addr) __prefetch(addr)
17674			# endif
17675			#endif
17676			#ifndef prefetchr
17677			# define prefetchr(addr)
17678			#endif
17679
17680
17681			#undef prefetchw
17682			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
17683			# define prefetchw(addr) __builtin_prefetch((addr), 1)
17684			#elif defined(_MSC_VER)
17685			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
17686			# define prefetchw(addr) _m_prefetchw(addr)
17687			# elif defined(ARCH_ARM64)
17688			# define prefetchw(addr) __prefetch2((addr), 0x10 )
17689			# elif defined(ARCH_ARM32)
17690			# define prefetchw(addr) __prefetchw(addr)
17691			# endif
17692			#endif
17693			#ifndef prefetchw
17694			# define prefetchw(addr)
17695			#endif
17696
17697
17698			#undef _aligned_attribute
17699			#if defined(__GNUC__) \|\| __has_attribute(aligned)
17700			# define _aligned_attribute(n) __attribute__((aligned(n)))
17701			#elif defined(_MSC_VER)
17702			# define _aligned_attribute(n) __declspec(align(n))
17703			#endif
17704
17705
17706			#if defined(__GNUC__) \|\| __has_attribute(target)
17707			# define _target_attribute(attrs) __attribute__((target(attrs)))
17708			#else
17709			# define _target_attribute(attrs)
17710			#endif
17711
17712
17713
17714
17715
17716			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
17717			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
17718			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
17719			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
17720			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
17721			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
17722			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
17723
17724
17725
17726
17727
17728
17729			#if defined(__BYTE_ORDER__)
17730			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
17731			#elif defined(_MSC_VER)
17732			# define CPU_IS_LITTLE_ENDIAN() true
17733			#else
17734			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
17735			{
17736			union {
17737			u32 w;
17738			u8 b;
17739			} u;
17740
17741			u.w = 1;
17742			return u.b;
17743			}
17744			#endif
17745
17746
17747			static forceinline u16 bswap16(u16 v)
17748			{
17749			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
17750			return __builtin_bswap16(v);
17751			#elif defined(_MSC_VER)
17752			return _byteswap_ushort(v);
17753			#else
17754			return (v << 8) \| (v >> 8);
17755			#endif
17756			}
17757
17758
17759			static forceinline u32 bswap32(u32 v)
17760			{
17761			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
17762			return __builtin_bswap32(v);
17763			#elif defined(_MSC_VER)
17764			return _byteswap_ulong(v);
17765			#else
17766			return ((v & 0x000000FF) << 24) \|
17767			((v & 0x0000FF00) << 8) \|
17768			((v & 0x00FF0000) >> 8) \|
17769			((v & 0xFF000000) >> 24);
17770			#endif
17771			}
17772
17773
17774			static forceinline u64 bswap64(u64 v)
17775			{
17776			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
17777			return __builtin_bswap64(v);
17778			#elif defined(_MSC_VER)
17779			return _byteswap_uint64(v);
17780			#else
17781			return ((v & 0x00000000000000FF) << 56) \|
17782			((v & 0x000000000000FF00) << 40) \|
17783			((v & 0x0000000000FF0000) << 24) \|
17784			((v & 0x00000000FF000000) << 8) \|
17785			((v & 0x000000FF00000000) >> 8) \|
17786			((v & 0x0000FF0000000000) >> 24) \|
17787			((v & 0x00FF000000000000) >> 40) \|
17788			((v & 0xFF00000000000000) >> 56);
17789			#endif
17790			}
17791
17792			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
17793			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
17794			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
17795			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
17796			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
17797			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
17798
17799
17800
17801
17802
17803
17804			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
17805			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
17806			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
17807			defined(__riscv_misaligned_fast) \|\| \
17808			defined(__wasm__))
17809			# define UNALIGNED_ACCESS_IS_FAST 1
17810			#elif defined(_MSC_VER)
17811			# define UNALIGNED_ACCESS_IS_FAST 1
17812			#else
17813			# define UNALIGNED_ACCESS_IS_FAST 0
17814			#endif
17815
17816
17817
17818			#ifdef FREESTANDING
17819			# define MEMCOPY __builtin_memcpy
17820			#else
17821			# define MEMCOPY memcpy
17822			#endif
17823
17824
17825
17826			#define DEFINE_UNALIGNED_TYPE(type) \
17827			static forceinline type \
17828			load_##type##_unaligned(const void *p) \
17829			{ \
17830			type v; \
17831			\
17832			MEMCOPY(&v, p, sizeof(v)); \
17833			return v; \
17834			} \
17835			\
17836			static forceinline void \
17837			store_##type##_unaligned(type v, void *p) \
17838			{ \
17839			MEMCOPY(p, &v, sizeof(v)); \
17840			}
17841
17842			DEFINE_UNALIGNED_TYPE(u16)
17843			DEFINE_UNALIGNED_TYPE(u32)
17844			DEFINE_UNALIGNED_TYPE(u64)
17845			DEFINE_UNALIGNED_TYPE(machine_word_t)
17846
17847			#undef MEMCOPY
17848
17849			#define load_word_unaligned load_machine_word_t_unaligned
17850			#define store_word_unaligned store_machine_word_t_unaligned
17851
17852
17853
17854			static forceinline u16
17855			get_unaligned_le16(const u8 *p)
17856			{
17857			if (UNALIGNED_ACCESS_IS_FAST)
17858			return le16_bswap(load_u16_unaligned(p));
17859			else
17860			return ((u16)p[1] << 8) \| p[0];
17861			}
17862
17863			static forceinline u16
17864			get_unaligned_be16(const u8 *p)
17865			{
17866			if (UNALIGNED_ACCESS_IS_FAST)
17867			return be16_bswap(load_u16_unaligned(p));
17868			else
17869			return ((u16)p[0] << 8) \| p[1];
17870			}
17871
17872			static forceinline u32
17873			get_unaligned_le32(const u8 *p)
17874			{
17875			if (UNALIGNED_ACCESS_IS_FAST)
17876			return le32_bswap(load_u32_unaligned(p));
17877			else
17878			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
17879			((u32)p[1] << 8) \| p[0];
17880			}
17881
17882			static forceinline u32
17883			get_unaligned_be32(const u8 *p)
17884			{
17885			if (UNALIGNED_ACCESS_IS_FAST)
17886			return be32_bswap(load_u32_unaligned(p));
17887			else
17888			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
17889			((u32)p[2] << 8) \| p[3];
17890			}
17891
17892			static forceinline u64
17893			get_unaligned_le64(const u8 *p)
17894			{
17895			if (UNALIGNED_ACCESS_IS_FAST)
17896			return le64_bswap(load_u64_unaligned(p));
17897			else
17898			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
17899			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
17900			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
17901			((u64)p[1] << 8) \| p[0];
17902			}
17903
17904			static forceinline machine_word_t
17905			get_unaligned_leword(const u8 *p)
17906			{
17907			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
17908			if (WORDBITS == 32)
17909			return get_unaligned_le32(p);
17910			else
17911			return get_unaligned_le64(p);
17912			}
17913
17914
17915
17916			static forceinline void
17917			put_unaligned_le16(u16 v, u8 *p)
17918			{
17919			if (UNALIGNED_ACCESS_IS_FAST) {
17920			store_u16_unaligned(le16_bswap(v), p);
17921			} else {
17922			p[0] = (u8)(v >> 0);
17923			p[1] = (u8)(v >> 8);
17924			}
17925			}
17926
17927			static forceinline void
17928			put_unaligned_be16(u16 v, u8 *p)
17929			{
17930			if (UNALIGNED_ACCESS_IS_FAST) {
17931			store_u16_unaligned(be16_bswap(v), p);
17932			} else {
17933			p[0] = (u8)(v >> 8);
17934			p[1] = (u8)(v >> 0);
17935			}
17936			}
17937
17938			static forceinline void
17939			put_unaligned_le32(u32 v, u8 *p)
17940			{
17941			if (UNALIGNED_ACCESS_IS_FAST) {
17942			store_u32_unaligned(le32_bswap(v), p);
17943			} else {
17944			p[0] = (u8)(v >> 0);
17945			p[1] = (u8)(v >> 8);
17946			p[2] = (u8)(v >> 16);
17947			p[3] = (u8)(v >> 24);
17948			}
17949			}
17950
17951			static forceinline void
17952			put_unaligned_be32(u32 v, u8 *p)
17953			{
17954			if (UNALIGNED_ACCESS_IS_FAST) {
17955			store_u32_unaligned(be32_bswap(v), p);
17956			} else {
17957			p[0] = (u8)(v >> 24);
17958			p[1] = (u8)(v >> 16);
17959			p[2] = (u8)(v >> 8);
17960			p[3] = (u8)(v >> 0);
17961			}
17962			}
17963
17964			static forceinline void
17965			put_unaligned_le64(u64 v, u8 *p)
17966			{
17967			if (UNALIGNED_ACCESS_IS_FAST) {
17968			store_u64_unaligned(le64_bswap(v), p);
17969			} else {
17970			p[0] = (u8)(v >> 0);
17971			p[1] = (u8)(v >> 8);
17972			p[2] = (u8)(v >> 16);
17973			p[3] = (u8)(v >> 24);
17974			p[4] = (u8)(v >> 32);
17975			p[5] = (u8)(v >> 40);
17976			p[6] = (u8)(v >> 48);
17977			p[7] = (u8)(v >> 56);
17978			}
17979			}
17980
17981			static forceinline void
17982			put_unaligned_leword(machine_word_t v, u8 *p)
17983			{
17984			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
17985			if (WORDBITS == 32)
17986			put_unaligned_le32(v, p);
17987			else
17988			put_unaligned_le64(v, p);
17989			}
17990
17991
17992
17993
17994
17995
17996
17997			static forceinline unsigned
17998			bsr32(u32 v)
17999			{
18000			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
18001			return 31 - __builtin_clz(v);
18002			#elif defined(_MSC_VER)
18003			unsigned long i;
18004
18005			_BitScanReverse(&i, v);
18006			return i;
18007			#else
18008			unsigned i = 0;
18009
18010			while ((v >>= 1) != 0)
18011			i++;
18012			return i;
18013			#endif
18014			}
18015
18016			static forceinline unsigned
18017			bsr64(u64 v)
18018			{
18019			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
18020			return 63 - __builtin_clzll(v);
18021			#elif defined(_MSC_VER) && defined(_WIN64)
18022			unsigned long i;
18023
18024			_BitScanReverse64(&i, v);
18025			return i;
18026			#else
18027			unsigned i = 0;
18028
18029			while ((v >>= 1) != 0)
18030			i++;
18031			return i;
18032			#endif
18033			}
18034
18035			static forceinline unsigned
18036			bsrw(machine_word_t v)
18037			{
18038			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
18039			if (WORDBITS == 32)
18040			return bsr32(v);
18041			else
18042			return bsr64(v);
18043			}
18044
18045
18046
18047			static forceinline unsigned
18048			bsf32(u32 v)
18049			{
18050			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
18051			return __builtin_ctz(v);
18052			#elif defined(_MSC_VER)
18053			unsigned long i;
18054
18055			_BitScanForward(&i, v);
18056			return i;
18057			#else
18058			unsigned i = 0;
18059
18060			for (; (v & 1) == 0; v >>= 1)
18061			i++;
18062			return i;
18063			#endif
18064			}
18065
18066			static forceinline unsigned
18067			bsf64(u64 v)
18068			{
18069			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
18070			return __builtin_ctzll(v);
18071			#elif defined(_MSC_VER) && defined(_WIN64)
18072			unsigned long i;
18073
18074			_BitScanForward64(&i, v);
18075			return i;
18076			#else
18077			unsigned i = 0;
18078
18079			for (; (v & 1) == 0; v >>= 1)
18080			i++;
18081			return i;
18082			#endif
18083			}
18084
18085			static forceinline unsigned
18086			bsfw(machine_word_t v)
18087			{
18088			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
18089			if (WORDBITS == 32)
18090			return bsf32(v);
18091			else
18092			return bsf64(v);
18093			}
18094
18095
18096			#undef rbit32
18097			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
18098			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
18099			static forceinline u32
18100			rbit32(u32 v)
18101			{
18102			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
18103			return v;
18104			}
18105			#define rbit32 rbit32
18106			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
18107			static forceinline u32
18108			rbit32(u32 v)
18109			{
18110			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
18111			return v;
18112			}
18113			#define rbit32 rbit32
18114			#endif
18115
18116			#endif
18117
18118
18119			typedef void (malloc_func_t)(size_t);
18120			typedef void (free_func_t)(void );
18121
18122			extern malloc_func_t libdeflate_default_malloc_func;
18123			extern free_func_t libdeflate_default_free_func;
18124
18125			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
18126			size_t alignment, size_t size);
18127			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
18128
18129			#ifdef FREESTANDING
18130
18131			void memset(void s, int c, size_t n);
18132			#define memset(s, c, n) __builtin_memset((s), (c), (n))
18133
18134			void memcpy(void dest, const void *src, size_t n);
18135			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
18136
18137			void memmove(void dest, const void *src, size_t n);
18138			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
18139
18140			int memcmp(const void s1, const void s2, size_t n);
18141			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
18142
18143			#undef LIBDEFLATE_ENABLE_ASSERTIONS
18144			#else
18145			# include
18146
18147			# ifdef __clang_analyzer__
18148			# define LIBDEFLATE_ENABLE_ASSERTIONS
18149			# endif
18150			#endif
18151
18152
18153			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
18154			NORETURN void
18155			libdeflate_assertion_failed(const char expr, const char file, int line);
18156			#define ASSERT(expr) { if (unlikely(!(expr))) \
18157			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
18158			#else
18159			#define ASSERT(expr) (void)(expr)
18160			#endif
18161
18162			#define CONCAT_IMPL(a, b) a##b
18163			#define CONCAT(a, b) CONCAT_IMPL(a, b)
18164			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
18165
18166			#endif
18167
18168
18169			#ifndef MATCHFINDER_WINDOW_ORDER
18170			# error "MATCHFINDER_WINDOW_ORDER must be defined!"
18171			#endif
18172
18173
18174			static forceinline u32
18175			loaded_u32_to_u24(u32 v)
18176			{
18177			if (CPU_IS_LITTLE_ENDIAN())
18178			return v & 0xFFFFFF;
18179			else
18180			return v >> 8;
18181			}
18182
18183
18184			static forceinline u32
18185			load_u24_unaligned(const u8 *p)
18186			{
18187			#if UNALIGNED_ACCESS_IS_FAST
18188			return loaded_u32_to_u24(load_u32_unaligned(p));
18189			#else
18190			if (CPU_IS_LITTLE_ENDIAN())
18191			return ((u32)p[0] << 0) \| ((u32)p[1] << 8) \| ((u32)p[2] << 16);
18192			else
18193			return ((u32)p[2] << 0) \| ((u32)p[1] << 8) \| ((u32)p[0] << 16);
18194			#endif
18195			}
18196
18197			#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
18198
18199			typedef s16 mf_pos_t;
18200
18201			#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
18202
18203
18204			#define MATCHFINDER_MEM_ALIGNMENT 32
18205
18206
18207			#define MATCHFINDER_SIZE_ALIGNMENT 1024
18208
18209			#undef matchfinder_init
18210			#undef matchfinder_rebase
18211			#ifdef _aligned_attribute
18212			# define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
18213			# if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
18214			/* # include "arm/matchfinder_impl.h" */
18215
18216
18217			#ifndef LIB_ARM_MATCHFINDER_IMPL_H
18218			#define LIB_ARM_MATCHFINDER_IMPL_H
18219
18220			/* #include "arm-cpu_features.h" */
18221
18222
18223			#ifndef LIB_ARM_CPU_FEATURES_H
18224			#define LIB_ARM_CPU_FEATURES_H
18225
18226			/* #include "lib_common.h" */
18227
18228
18229			#ifndef LIB_LIB_COMMON_H
18230			#define LIB_LIB_COMMON_H
18231
18232			#ifdef LIBDEFLATE_H
18233
18234			# error "lib_common.h must always be included before libdeflate.h"
18235			#endif
18236
18237			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
18238			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
18239			#elif defined(__GNUC__)
18240			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
18241			#else
18242			# define LIBDEFLATE_EXPORT_SYM
18243			#endif
18244
18245
18246			#if defined(__GNUC__) && defined(__i386__)
18247			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
18248			#else
18249			# define LIBDEFLATE_ALIGN_STACK
18250			#endif
18251
18252			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
18253
18254			/* #include "../common_defs.h" */
18255
18256
18257			#ifndef COMMON_DEFS_H
18258			#define COMMON_DEFS_H
18259
18260			/* #include "libdeflate.h" */
18261
18262
18263			#ifndef LIBDEFLATE_H
18264			#define LIBDEFLATE_H
18265
18266			#include
18267			#include
18268
18269			#ifdef __cplusplus
18270			extern "C" {
18271			#endif
18272
18273			#define LIBDEFLATE_VERSION_MAJOR 1
18274			#define LIBDEFLATE_VERSION_MINOR 25
18275			#define LIBDEFLATE_VERSION_STRING "1.25"
18276
18277
18278			#ifndef LIBDEFLATEAPI
18279			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
18280			# define LIBDEFLATEAPI __declspec(dllimport)
18281			# else
18282			# define LIBDEFLATEAPI
18283			# endif
18284			#endif
18285
18286
18287
18288
18289
18290			struct libdeflate_compressor;
18291			struct libdeflate_options;
18292
18293
18294			LIBDEFLATEAPI struct libdeflate_compressor *
18295			libdeflate_alloc_compressor(int compression_level);
18296
18297
18298			LIBDEFLATEAPI struct libdeflate_compressor *
18299			libdeflate_alloc_compressor_ex(int compression_level,
18300			const struct libdeflate_options *options);
18301
18302
18303			LIBDEFLATEAPI size_t
18304			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
18305			const void *in, size_t in_nbytes,
18306			void *out, size_t out_nbytes_avail);
18307
18308
18309			LIBDEFLATEAPI size_t
18310			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
18311			size_t in_nbytes);
18312
18313
18314			LIBDEFLATEAPI size_t
18315			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
18316			const void *in, size_t in_nbytes,
18317			void *out, size_t out_nbytes_avail);
18318
18319
18320			LIBDEFLATEAPI size_t
18321			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
18322			size_t in_nbytes);
18323
18324
18325			LIBDEFLATEAPI size_t
18326			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
18327			const void *in, size_t in_nbytes,
18328			void *out, size_t out_nbytes_avail);
18329
18330
18331			LIBDEFLATEAPI size_t
18332			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
18333			size_t in_nbytes);
18334
18335
18336			LIBDEFLATEAPI void
18337			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
18338
18339
18340
18341
18342
18343			struct libdeflate_decompressor;
18344			struct libdeflate_options;
18345
18346
18347			LIBDEFLATEAPI struct libdeflate_decompressor *
18348			libdeflate_alloc_decompressor(void);
18349
18350
18351			LIBDEFLATEAPI struct libdeflate_decompressor *
18352			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
18353
18354
18355			enum libdeflate_result {
18356
18357			LIBDEFLATE_SUCCESS = 0,
18358
18359
18360			LIBDEFLATE_BAD_DATA = 1,
18361
18362
18363			LIBDEFLATE_SHORT_OUTPUT = 2,
18364
18365
18366			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
18367			};
18368
18369
18370			LIBDEFLATEAPI enum libdeflate_result
18371			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
18372			const void *in, size_t in_nbytes,
18373			void *out, size_t out_nbytes_avail,
18374			size_t *actual_out_nbytes_ret);
18375
18376
18377			LIBDEFLATEAPI enum libdeflate_result
18378			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
18379			const void *in, size_t in_nbytes,
18380			void *out, size_t out_nbytes_avail,
18381			size_t *actual_in_nbytes_ret,
18382			size_t *actual_out_nbytes_ret);
18383
18384
18385			LIBDEFLATEAPI enum libdeflate_result
18386			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
18387			const void *in, size_t in_nbytes,
18388			void *out, size_t out_nbytes_avail,
18389			size_t *actual_out_nbytes_ret);
18390
18391
18392			LIBDEFLATEAPI enum libdeflate_result
18393			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
18394			const void *in, size_t in_nbytes,
18395			void *out, size_t out_nbytes_avail,
18396			size_t *actual_in_nbytes_ret,
18397			size_t *actual_out_nbytes_ret);
18398
18399
18400			LIBDEFLATEAPI enum libdeflate_result
18401			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
18402			const void *in, size_t in_nbytes,
18403			void *out, size_t out_nbytes_avail,
18404			size_t *actual_out_nbytes_ret);
18405
18406
18407			LIBDEFLATEAPI enum libdeflate_result
18408			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
18409			const void *in, size_t in_nbytes,
18410			void *out, size_t out_nbytes_avail,
18411			size_t *actual_in_nbytes_ret,
18412			size_t *actual_out_nbytes_ret);
18413
18414
18415			LIBDEFLATEAPI void
18416			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
18417
18418
18419
18420
18421
18422
18423			LIBDEFLATEAPI uint32_t
18424			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
18425
18426
18427
18428			LIBDEFLATEAPI uint32_t
18429			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
18430
18431
18432
18433
18434
18435
18436			LIBDEFLATEAPI void
18437			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
18438			void (free_func)(void ));
18439
18440
18441			struct libdeflate_options {
18442
18443
18444			size_t sizeof_options;
18445
18446
18447			void (malloc_func)(size_t);
18448			void (free_func)(void );
18449			};
18450
18451			#ifdef __cplusplus
18452			}
18453			#endif
18454
18455			#endif
18456
18457
18458			#include
18459			#include
18460			#include
18461			#ifdef _MSC_VER
18462			# include
18463			# include
18464
18465
18466			# pragma warning(disable : 4146)
18467
18468			# pragma warning(disable : 4018)
18469			# pragma warning(disable : 4244)
18470			# pragma warning(disable : 4267)
18471			# pragma warning(disable : 4310)
18472
18473			# pragma warning(disable : 4100)
18474			# pragma warning(disable : 4127)
18475			# pragma warning(disable : 4189)
18476			# pragma warning(disable : 4232)
18477			# pragma warning(disable : 4245)
18478			# pragma warning(disable : 4295)
18479			#endif
18480			#ifndef FREESTANDING
18481			# include
18482			#endif
18483
18484
18485
18486
18487
18488
18489			#undef ARCH_X86_64
18490			#undef ARCH_X86_32
18491			#undef ARCH_ARM64
18492			#undef ARCH_ARM32
18493			#undef ARCH_RISCV
18494			#ifdef _MSC_VER
18495
18496			# if defined(_M_X64) && !defined(_M_ARM64EC)
18497			# define ARCH_X86_64
18498			# elif defined(_M_IX86)
18499			# define ARCH_X86_32
18500			# elif defined(_M_ARM64)
18501			# define ARCH_ARM64
18502			# elif defined(_M_ARM)
18503			# define ARCH_ARM32
18504			# endif
18505			#else
18506			# if defined(__x86_64__)
18507			# define ARCH_X86_64
18508			# elif defined(__i386__)
18509			# define ARCH_X86_32
18510			# elif defined(__aarch64__)
18511			# define ARCH_ARM64
18512			# elif defined(__arm__)
18513			# define ARCH_ARM32
18514			# elif defined(__riscv)
18515			# define ARCH_RISCV
18516			# endif
18517			#endif
18518
18519
18520
18521
18522
18523
18524			typedef uint8_t u8;
18525			typedef uint16_t u16;
18526			typedef uint32_t u32;
18527			typedef uint64_t u64;
18528			typedef int8_t s8;
18529			typedef int16_t s16;
18530			typedef int32_t s32;
18531			typedef int64_t s64;
18532
18533
18534			#ifdef _MSC_VER
18535			# ifdef _WIN64
18536			typedef long long ssize_t;
18537			# else
18538			typedef long ssize_t;
18539			# endif
18540			#endif
18541
18542
18543			typedef size_t machine_word_t;
18544
18545
18546			#define WORDBYTES ((int)sizeof(machine_word_t))
18547
18548
18549			#define WORDBITS (8 * WORDBYTES)
18550
18551
18552
18553
18554
18555
18556			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
18557			# define GCC_PREREQ(major, minor) \
18558			(__GNUC__ > (major) \|\| \
18559			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
18560			# if !GCC_PREREQ(4, 9)
18561			# error "gcc versions older than 4.9 are no longer supported"
18562			# endif
18563			#else
18564			# define GCC_PREREQ(major, minor) 0
18565			#endif
18566			#ifdef __clang__
18567			# ifdef __apple_build_version__
18568			# define CLANG_PREREQ(major, minor, apple_version) \
18569			(__apple_build_version__ >= (apple_version))
18570			# else
18571			# define CLANG_PREREQ(major, minor, apple_version) \
18572			(__clang_major__ > (major) \|\| \
18573			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
18574			# endif
18575			# if !CLANG_PREREQ(3, 9, 8000000)
18576			# error "clang versions older than 3.9 are no longer supported"
18577			# endif
18578			#else
18579			# define CLANG_PREREQ(major, minor, apple_version) 0
18580			#endif
18581			#ifdef _MSC_VER
18582			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
18583			# if !MSVC_PREREQ(1900)
18584			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
18585			# endif
18586			#else
18587			# define MSVC_PREREQ(version) 0
18588			#endif
18589
18590
18591			#ifndef __has_attribute
18592			# define __has_attribute(attribute) 0
18593			#endif
18594
18595
18596			#ifndef __has_builtin
18597			# define __has_builtin(builtin) 0
18598			#endif
18599
18600
18601			#ifdef _MSC_VER
18602			# define inline __inline
18603			#endif
18604
18605
18606			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
18607			# define forceinline inline __attribute__((always_inline))
18608			#elif defined(_MSC_VER)
18609			# define forceinline __forceinline
18610			#else
18611			# define forceinline inline
18612			#endif
18613
18614
18615			#if defined(__GNUC__) \|\| __has_attribute(unused)
18616			# define MAYBE_UNUSED __attribute__((unused))
18617			#else
18618			# define MAYBE_UNUSED
18619			#endif
18620
18621
18622			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
18623			# define NORETURN __attribute__((noreturn))
18624			#else
18625			# define NORETURN
18626			#endif
18627
18628
18629			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
18630			# if defined(__GNUC__) \|\| defined(__clang__)
18631			# define restrict __restrict__
18632			# else
18633			# define restrict
18634			# endif
18635			#endif
18636
18637
18638			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
18639			# define likely(expr) __builtin_expect(!!(expr), 1)
18640			#else
18641			# define likely(expr) (expr)
18642			#endif
18643
18644
18645			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
18646			# define unlikely(expr) __builtin_expect(!!(expr), 0)
18647			#else
18648			# define unlikely(expr) (expr)
18649			#endif
18650
18651
18652			#undef prefetchr
18653			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
18654			# define prefetchr(addr) __builtin_prefetch((addr), 0)
18655			#elif defined(_MSC_VER)
18656			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
18657			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
18658			# elif defined(ARCH_ARM64)
18659			# define prefetchr(addr) __prefetch2((addr), 0x00 )
18660			# elif defined(ARCH_ARM32)
18661			# define prefetchr(addr) __prefetch(addr)
18662			# endif
18663			#endif
18664			#ifndef prefetchr
18665			# define prefetchr(addr)
18666			#endif
18667
18668
18669			#undef prefetchw
18670			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
18671			# define prefetchw(addr) __builtin_prefetch((addr), 1)
18672			#elif defined(_MSC_VER)
18673			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
18674			# define prefetchw(addr) _m_prefetchw(addr)
18675			# elif defined(ARCH_ARM64)
18676			# define prefetchw(addr) __prefetch2((addr), 0x10 )
18677			# elif defined(ARCH_ARM32)
18678			# define prefetchw(addr) __prefetchw(addr)
18679			# endif
18680			#endif
18681			#ifndef prefetchw
18682			# define prefetchw(addr)
18683			#endif
18684
18685
18686			#undef _aligned_attribute
18687			#if defined(__GNUC__) \|\| __has_attribute(aligned)
18688			# define _aligned_attribute(n) __attribute__((aligned(n)))
18689			#elif defined(_MSC_VER)
18690			# define _aligned_attribute(n) __declspec(align(n))
18691			#endif
18692
18693
18694			#if defined(__GNUC__) \|\| __has_attribute(target)
18695			# define _target_attribute(attrs) __attribute__((target(attrs)))
18696			#else
18697			# define _target_attribute(attrs)
18698			#endif
18699
18700
18701
18702
18703
18704			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
18705			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
18706			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
18707			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
18708			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
18709			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
18710			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
18711
18712
18713
18714
18715
18716
18717			#if defined(__BYTE_ORDER__)
18718			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
18719			#elif defined(_MSC_VER)
18720			# define CPU_IS_LITTLE_ENDIAN() true
18721			#else
18722			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
18723			{
18724			union {
18725			u32 w;
18726			u8 b;
18727			} u;
18728
18729			u.w = 1;
18730			return u.b;
18731			}
18732			#endif
18733
18734
18735			static forceinline u16 bswap16(u16 v)
18736			{
18737			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
18738			return __builtin_bswap16(v);
18739			#elif defined(_MSC_VER)
18740			return _byteswap_ushort(v);
18741			#else
18742			return (v << 8) \| (v >> 8);
18743			#endif
18744			}
18745
18746
18747			static forceinline u32 bswap32(u32 v)
18748			{
18749			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
18750			return __builtin_bswap32(v);
18751			#elif defined(_MSC_VER)
18752			return _byteswap_ulong(v);
18753			#else
18754			return ((v & 0x000000FF) << 24) \|
18755			((v & 0x0000FF00) << 8) \|
18756			((v & 0x00FF0000) >> 8) \|
18757			((v & 0xFF000000) >> 24);
18758			#endif
18759			}
18760
18761
18762			static forceinline u64 bswap64(u64 v)
18763			{
18764			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
18765			return __builtin_bswap64(v);
18766			#elif defined(_MSC_VER)
18767			return _byteswap_uint64(v);
18768			#else
18769			return ((v & 0x00000000000000FF) << 56) \|
18770			((v & 0x000000000000FF00) << 40) \|
18771			((v & 0x0000000000FF0000) << 24) \|
18772			((v & 0x00000000FF000000) << 8) \|
18773			((v & 0x000000FF00000000) >> 8) \|
18774			((v & 0x0000FF0000000000) >> 24) \|
18775			((v & 0x00FF000000000000) >> 40) \|
18776			((v & 0xFF00000000000000) >> 56);
18777			#endif
18778			}
18779
18780			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
18781			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
18782			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
18783			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
18784			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
18785			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
18786
18787
18788
18789
18790
18791
18792			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
18793			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
18794			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
18795			defined(__riscv_misaligned_fast) \|\| \
18796			defined(__wasm__))
18797			# define UNALIGNED_ACCESS_IS_FAST 1
18798			#elif defined(_MSC_VER)
18799			# define UNALIGNED_ACCESS_IS_FAST 1
18800			#else
18801			# define UNALIGNED_ACCESS_IS_FAST 0
18802			#endif
18803
18804
18805
18806			#ifdef FREESTANDING
18807			# define MEMCOPY __builtin_memcpy
18808			#else
18809			# define MEMCOPY memcpy
18810			#endif
18811
18812
18813
18814			#define DEFINE_UNALIGNED_TYPE(type) \
18815			static forceinline type \
18816			load_##type##_unaligned(const void *p) \
18817			{ \
18818			type v; \
18819			\
18820			MEMCOPY(&v, p, sizeof(v)); \
18821			return v; \
18822			} \
18823			\
18824			static forceinline void \
18825			store_##type##_unaligned(type v, void *p) \
18826			{ \
18827			MEMCOPY(p, &v, sizeof(v)); \
18828			}
18829
18830			DEFINE_UNALIGNED_TYPE(u16)
18831			DEFINE_UNALIGNED_TYPE(u32)
18832			DEFINE_UNALIGNED_TYPE(u64)
18833			DEFINE_UNALIGNED_TYPE(machine_word_t)
18834
18835			#undef MEMCOPY
18836
18837			#define load_word_unaligned load_machine_word_t_unaligned
18838			#define store_word_unaligned store_machine_word_t_unaligned
18839
18840
18841
18842			static forceinline u16
18843			get_unaligned_le16(const u8 *p)
18844			{
18845			if (UNALIGNED_ACCESS_IS_FAST)
18846			return le16_bswap(load_u16_unaligned(p));
18847			else
18848			return ((u16)p[1] << 8) \| p[0];
18849			}
18850
18851			static forceinline u16
18852			get_unaligned_be16(const u8 *p)
18853			{
18854			if (UNALIGNED_ACCESS_IS_FAST)
18855			return be16_bswap(load_u16_unaligned(p));
18856			else
18857			return ((u16)p[0] << 8) \| p[1];
18858			}
18859
18860			static forceinline u32
18861			get_unaligned_le32(const u8 *p)
18862			{
18863			if (UNALIGNED_ACCESS_IS_FAST)
18864			return le32_bswap(load_u32_unaligned(p));
18865			else
18866			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
18867			((u32)p[1] << 8) \| p[0];
18868			}
18869
18870			static forceinline u32
18871			get_unaligned_be32(const u8 *p)
18872			{
18873			if (UNALIGNED_ACCESS_IS_FAST)
18874			return be32_bswap(load_u32_unaligned(p));
18875			else
18876			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
18877			((u32)p[2] << 8) \| p[3];
18878			}
18879
18880			static forceinline u64
18881			get_unaligned_le64(const u8 *p)
18882			{
18883			if (UNALIGNED_ACCESS_IS_FAST)
18884			return le64_bswap(load_u64_unaligned(p));
18885			else
18886			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
18887			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
18888			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
18889			((u64)p[1] << 8) \| p[0];
18890			}
18891
18892			static forceinline machine_word_t
18893			get_unaligned_leword(const u8 *p)
18894			{
18895			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
18896			if (WORDBITS == 32)
18897			return get_unaligned_le32(p);
18898			else
18899			return get_unaligned_le64(p);
18900			}
18901
18902
18903
18904			static forceinline void
18905			put_unaligned_le16(u16 v, u8 *p)
18906			{
18907			if (UNALIGNED_ACCESS_IS_FAST) {
18908			store_u16_unaligned(le16_bswap(v), p);
18909			} else {
18910			p[0] = (u8)(v >> 0);
18911			p[1] = (u8)(v >> 8);
18912			}
18913			}
18914
18915			static forceinline void
18916			put_unaligned_be16(u16 v, u8 *p)
18917			{
18918			if (UNALIGNED_ACCESS_IS_FAST) {
18919			store_u16_unaligned(be16_bswap(v), p);
18920			} else {
18921			p[0] = (u8)(v >> 8);
18922			p[1] = (u8)(v >> 0);
18923			}
18924			}
18925
18926			static forceinline void
18927			put_unaligned_le32(u32 v, u8 *p)
18928			{
18929			if (UNALIGNED_ACCESS_IS_FAST) {
18930			store_u32_unaligned(le32_bswap(v), p);
18931			} else {
18932			p[0] = (u8)(v >> 0);
18933			p[1] = (u8)(v >> 8);
18934			p[2] = (u8)(v >> 16);
18935			p[3] = (u8)(v >> 24);
18936			}
18937			}
18938
18939			static forceinline void
18940			put_unaligned_be32(u32 v, u8 *p)
18941			{
18942			if (UNALIGNED_ACCESS_IS_FAST) {
18943			store_u32_unaligned(be32_bswap(v), p);
18944			} else {
18945			p[0] = (u8)(v >> 24);
18946			p[1] = (u8)(v >> 16);
18947			p[2] = (u8)(v >> 8);
18948			p[3] = (u8)(v >> 0);
18949			}
18950			}
18951
18952			static forceinline void
18953			put_unaligned_le64(u64 v, u8 *p)
18954			{
18955			if (UNALIGNED_ACCESS_IS_FAST) {
18956			store_u64_unaligned(le64_bswap(v), p);
18957			} else {
18958			p[0] = (u8)(v >> 0);
18959			p[1] = (u8)(v >> 8);
18960			p[2] = (u8)(v >> 16);
18961			p[3] = (u8)(v >> 24);
18962			p[4] = (u8)(v >> 32);
18963			p[5] = (u8)(v >> 40);
18964			p[6] = (u8)(v >> 48);
18965			p[7] = (u8)(v >> 56);
18966			}
18967			}
18968
18969			static forceinline void
18970			put_unaligned_leword(machine_word_t v, u8 *p)
18971			{
18972			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
18973			if (WORDBITS == 32)
18974			put_unaligned_le32(v, p);
18975			else
18976			put_unaligned_le64(v, p);
18977			}
18978
18979
18980
18981
18982
18983
18984
18985			static forceinline unsigned
18986			bsr32(u32 v)
18987			{
18988			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
18989			return 31 - __builtin_clz(v);
18990			#elif defined(_MSC_VER)
18991			unsigned long i;
18992
18993			_BitScanReverse(&i, v);
18994			return i;
18995			#else
18996			unsigned i = 0;
18997
18998			while ((v >>= 1) != 0)
18999			i++;
19000			return i;
19001			#endif
19002			}
19003
19004			static forceinline unsigned
19005			bsr64(u64 v)
19006			{
19007			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
19008			return 63 - __builtin_clzll(v);
19009			#elif defined(_MSC_VER) && defined(_WIN64)
19010			unsigned long i;
19011
19012			_BitScanReverse64(&i, v);
19013			return i;
19014			#else
19015			unsigned i = 0;
19016
19017			while ((v >>= 1) != 0)
19018			i++;
19019			return i;
19020			#endif
19021			}
19022
19023			static forceinline unsigned
19024			bsrw(machine_word_t v)
19025			{
19026			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
19027			if (WORDBITS == 32)
19028			return bsr32(v);
19029			else
19030			return bsr64(v);
19031			}
19032
19033
19034
19035			static forceinline unsigned
19036			bsf32(u32 v)
19037			{
19038			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
19039			return __builtin_ctz(v);
19040			#elif defined(_MSC_VER)
19041			unsigned long i;
19042
19043			_BitScanForward(&i, v);
19044			return i;
19045			#else
19046			unsigned i = 0;
19047
19048			for (; (v & 1) == 0; v >>= 1)
19049			i++;
19050			return i;
19051			#endif
19052			}
19053
19054			static forceinline unsigned
19055			bsf64(u64 v)
19056			{
19057			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
19058			return __builtin_ctzll(v);
19059			#elif defined(_MSC_VER) && defined(_WIN64)
19060			unsigned long i;
19061
19062			_BitScanForward64(&i, v);
19063			return i;
19064			#else
19065			unsigned i = 0;
19066
19067			for (; (v & 1) == 0; v >>= 1)
19068			i++;
19069			return i;
19070			#endif
19071			}
19072
19073			static forceinline unsigned
19074			bsfw(machine_word_t v)
19075			{
19076			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
19077			if (WORDBITS == 32)
19078			return bsf32(v);
19079			else
19080			return bsf64(v);
19081			}
19082
19083
19084			#undef rbit32
19085			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
19086			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
19087			static forceinline u32
19088			rbit32(u32 v)
19089			{
19090			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
19091			return v;
19092			}
19093			#define rbit32 rbit32
19094			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
19095			static forceinline u32
19096			rbit32(u32 v)
19097			{
19098			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
19099			return v;
19100			}
19101			#define rbit32 rbit32
19102			#endif
19103
19104			#endif
19105
19106
19107			typedef void (malloc_func_t)(size_t);
19108			typedef void (free_func_t)(void );
19109
19110			extern malloc_func_t libdeflate_default_malloc_func;
19111			extern free_func_t libdeflate_default_free_func;
19112
19113			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
19114			size_t alignment, size_t size);
19115			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
19116
19117			#ifdef FREESTANDING
19118
19119			void memset(void s, int c, size_t n);
19120			#define memset(s, c, n) __builtin_memset((s), (c), (n))
19121
19122			void memcpy(void dest, const void *src, size_t n);
19123			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
19124
19125			void memmove(void dest, const void *src, size_t n);
19126			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
19127
19128			int memcmp(const void s1, const void s2, size_t n);
19129			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
19130
19131			#undef LIBDEFLATE_ENABLE_ASSERTIONS
19132			#else
19133			# include
19134
19135			# ifdef __clang_analyzer__
19136			# define LIBDEFLATE_ENABLE_ASSERTIONS
19137			# endif
19138			#endif
19139
19140
19141			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
19142			NORETURN void
19143			libdeflate_assertion_failed(const char expr, const char file, int line);
19144			#define ASSERT(expr) { if (unlikely(!(expr))) \
19145			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
19146			#else
19147			#define ASSERT(expr) (void)(expr)
19148			#endif
19149
19150			#define CONCAT_IMPL(a, b) a##b
19151			#define CONCAT(a, b) CONCAT_IMPL(a, b)
19152			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
19153
19154			#endif
19155
19156
19157			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
19158
19159			#define ARM_CPU_FEATURE_NEON (1 << 0)
19160			#define ARM_CPU_FEATURE_PMULL (1 << 1)
19161
19162			#define ARM_CPU_FEATURE_PREFER_PMULL (1 << 2)
19163			#define ARM_CPU_FEATURE_CRC32 (1 << 3)
19164			#define ARM_CPU_FEATURE_SHA3 (1 << 4)
19165			#define ARM_CPU_FEATURE_DOTPROD (1 << 5)
19166
19167			#if !defined(FREESTANDING) && \
19168			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
19169			(defined(__linux__) \|\| \
19170			(defined(__APPLE__) && defined(ARCH_ARM64)) \|\| \
19171			(defined(_WIN32) && defined(ARCH_ARM64)))
19172
19173			# define ARM_CPU_FEATURES_KNOWN (1U << 31)
19174			extern volatile u32 libdeflate_arm_cpu_features;
19175
19176			void libdeflate_init_arm_cpu_features(void);
19177
19178			static inline u32 get_arm_cpu_features(void)
19179			{
19180			if (libdeflate_arm_cpu_features == 0)
19181			libdeflate_init_arm_cpu_features();
19182			return libdeflate_arm_cpu_features;
19183			}
19184			#else
19185			static inline u32 get_arm_cpu_features(void) { return 0; }
19186			#endif
19187
19188
19189			#if defined(__ARM_NEON) \|\| (defined(_MSC_VER) && defined(ARCH_ARM64))
19190			# define HAVE_NEON(features) 1
19191			# define HAVE_NEON_NATIVE 1
19192			#else
19193			# define HAVE_NEON(features) ((features) & ARM_CPU_FEATURE_NEON)
19194			# define HAVE_NEON_NATIVE 0
19195			#endif
19196
19197			#if (defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
19198			(HAVE_NEON_NATIVE \|\| (GCC_PREREQ(6, 1) && defined(__ARM_FP)))
19199			# define HAVE_NEON_INTRIN 1
19200			# include
19201			#else
19202			# define HAVE_NEON_INTRIN 0
19203			#endif
19204
19205
19206			#ifdef __ARM_FEATURE_CRYPTO
19207			# define HAVE_PMULL(features) 1
19208			#else
19209			# define HAVE_PMULL(features) ((features) & ARM_CPU_FEATURE_PMULL)
19210			#endif
19211			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
19212			(GCC_PREREQ(7, 1) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
19213			CPU_IS_LITTLE_ENDIAN()
19214			# define HAVE_PMULL_INTRIN 1
19215
19216			# ifdef _MSC_VER
19217			# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b))
19218			# else
19219			# define compat_vmull_p64(a, b) vmull_p64((a), (b))
19220			# endif
19221			#else
19222			# define HAVE_PMULL_INTRIN 0
19223			#endif
19224
19225
19226			#ifdef __ARM_FEATURE_CRC32
19227			# define HAVE_CRC32(features) 1
19228			#else
19229			# define HAVE_CRC32(features) ((features) & ARM_CPU_FEATURE_CRC32)
19230			#endif
19231			#if defined(ARCH_ARM64) && \
19232			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER))
19233			# define HAVE_CRC32_INTRIN 1
19234			# if defined(__GNUC__) \|\| defined(__clang__)
19235			# include
19236			# endif
19237
19238			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
19239			!defined(__ARM_FEATURE_CRC32)
19240			# undef __crc32b
19241			# define __crc32b(a, b) \
19242			({ uint32_t res; \
19243			__asm__("crc32b %w0, %w1, %w2" \
19244			: "=r" (res) : "r" (a), "r" (b)); \
19245			res; })
19246			# undef __crc32h
19247			# define __crc32h(a, b) \
19248			({ uint32_t res; \
19249			__asm__("crc32h %w0, %w1, %w2" \
19250			: "=r" (res) : "r" (a), "r" (b)); \
19251			res; })
19252			# undef __crc32w
19253			# define __crc32w(a, b) \
19254			({ uint32_t res; \
19255			__asm__("crc32w %w0, %w1, %w2" \
19256			: "=r" (res) : "r" (a), "r" (b)); \
19257			res; })
19258			# undef __crc32d
19259			# define __crc32d(a, b) \
19260			({ uint32_t res; \
19261			__asm__("crc32x %w0, %w1, %2" \
19262			: "=r" (res) : "r" (a), "r" (b)); \
19263			res; })
19264			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
19265			# endif
19266			#else
19267			# define HAVE_CRC32_INTRIN 0
19268			#endif
19269
19270
19271			#ifdef __ARM_FEATURE_SHA3
19272			# define HAVE_SHA3(features) 1
19273			#else
19274			# define HAVE_SHA3(features) ((features) & ARM_CPU_FEATURE_SHA3)
19275			#endif
19276			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
19277			(GCC_PREREQ(9, 1) \|\| \
19278			CLANG_PREREQ(7, 0, 10010463) )
19279			# define HAVE_SHA3_INTRIN 1
19280
19281			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
19282			!defined(__ARM_FEATURE_SHA3)
19283			# undef veor3q_u8
19284			# define veor3q_u8(a, b, c) \
19285			({ uint8x16_t res; \
19286			__asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b" \
19287			: "=w" (res) : "w" (a), "w" (b), "w" (c)); \
19288			res; })
19289			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
19290			# endif
19291			#else
19292			# define HAVE_SHA3_INTRIN 0
19293			#endif
19294
19295
19296			#ifdef __ARM_FEATURE_DOTPROD
19297			# define HAVE_DOTPROD(features) 1
19298			#else
19299			# define HAVE_DOTPROD(features) ((features) & ARM_CPU_FEATURE_DOTPROD)
19300			#endif
19301			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
19302			(GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(7, 0, 10010000) \|\| defined(_MSC_VER))
19303			# define HAVE_DOTPROD_INTRIN 1
19304
19305			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
19306			!defined(__ARM_FEATURE_DOTPROD)
19307			# undef vdotq_u32
19308			# define vdotq_u32(a, b, c) \
19309			({ uint32x4_t res = (a); \
19310			__asm__("udot %0.4s, %1.16b, %2.16b" \
19311			: "+w" (res) : "w" (b), "w" (c)); \
19312			res; })
19313			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
19314			# endif
19315			#else
19316			# define HAVE_DOTPROD_INTRIN 0
19317			#endif
19318
19319			#endif
19320
19321			#endif
19322
19323
19324			#if HAVE_NEON_NATIVE
19325			static forceinline void
19326			matchfinder_init_neon(mf_pos_t *data, size_t size)
19327			{
19328			int16x8_t p = (int16x8_t )data;
19329			int16x8_t v = vdupq_n_s16(MATCHFINDER_INITVAL);
19330
19331			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
19332			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
19333			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
19334
19335			do {
19336			p[0] = v;
19337			p[1] = v;
19338			p[2] = v;
19339			p[3] = v;
19340			p += 4;
19341			size -= 4 * sizeof(*p);
19342			} while (size != 0);
19343			}
19344			#define matchfinder_init matchfinder_init_neon
19345
19346			static forceinline void
19347			matchfinder_rebase_neon(mf_pos_t *data, size_t size)
19348			{
19349			int16x8_t p = (int16x8_t )data;
19350			int16x8_t v = vdupq_n_s16((u16)-MATCHFINDER_WINDOW_SIZE);
19351
19352			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
19353			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
19354			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
19355
19356			do {
19357			p[0] = vqaddq_s16(p[0], v);
19358			p[1] = vqaddq_s16(p[1], v);
19359			p[2] = vqaddq_s16(p[2], v);
19360			p[3] = vqaddq_s16(p[3], v);
19361			p += 4;
19362			size -= 4 * sizeof(*p);
19363			} while (size != 0);
19364			}
19365			#define matchfinder_rebase matchfinder_rebase_neon
19366
19367			#endif
19368
19369			#endif
19370
19371			# elif defined(ARCH_RISCV)
19372			# include "riscv/matchfinder_impl.h"
19373			# elif defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
19374			/* # include "x86/matchfinder_impl.h" */
19375
19376
19377			#ifndef LIB_X86_MATCHFINDER_IMPL_H
19378			#define LIB_X86_MATCHFINDER_IMPL_H
19379
19380			/* #include "x86-cpu_features.h" */
19381
19382
19383			#ifndef LIB_X86_CPU_FEATURES_H
19384			#define LIB_X86_CPU_FEATURES_H
19385
19386			/* #include "lib_common.h" */
19387
19388
19389			#ifndef LIB_LIB_COMMON_H
19390			#define LIB_LIB_COMMON_H
19391
19392			#ifdef LIBDEFLATE_H
19393
19394			# error "lib_common.h must always be included before libdeflate.h"
19395			#endif
19396
19397			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
19398			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
19399			#elif defined(__GNUC__)
19400			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
19401			#else
19402			# define LIBDEFLATE_EXPORT_SYM
19403			#endif
19404
19405
19406			#if defined(__GNUC__) && defined(__i386__)
19407			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
19408			#else
19409			# define LIBDEFLATE_ALIGN_STACK
19410			#endif
19411
19412			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
19413
19414			/* #include "../common_defs.h" */
19415
19416
19417			#ifndef COMMON_DEFS_H
19418			#define COMMON_DEFS_H
19419
19420			/* #include "libdeflate.h" */
19421
19422
19423			#ifndef LIBDEFLATE_H
19424			#define LIBDEFLATE_H
19425
19426			#include
19427			#include
19428
19429			#ifdef __cplusplus
19430			extern "C" {
19431			#endif
19432
19433			#define LIBDEFLATE_VERSION_MAJOR 1
19434			#define LIBDEFLATE_VERSION_MINOR 25
19435			#define LIBDEFLATE_VERSION_STRING "1.25"
19436
19437
19438			#ifndef LIBDEFLATEAPI
19439			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
19440			# define LIBDEFLATEAPI __declspec(dllimport)
19441			# else
19442			# define LIBDEFLATEAPI
19443			# endif
19444			#endif
19445
19446
19447
19448
19449
19450			struct libdeflate_compressor;
19451			struct libdeflate_options;
19452
19453
19454			LIBDEFLATEAPI struct libdeflate_compressor *
19455			libdeflate_alloc_compressor(int compression_level);
19456
19457
19458			LIBDEFLATEAPI struct libdeflate_compressor *
19459			libdeflate_alloc_compressor_ex(int compression_level,
19460			const struct libdeflate_options *options);
19461
19462
19463			LIBDEFLATEAPI size_t
19464			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
19465			const void *in, size_t in_nbytes,
19466			void *out, size_t out_nbytes_avail);
19467
19468
19469			LIBDEFLATEAPI size_t
19470			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
19471			size_t in_nbytes);
19472
19473
19474			LIBDEFLATEAPI size_t
19475			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
19476			const void *in, size_t in_nbytes,
19477			void *out, size_t out_nbytes_avail);
19478
19479
19480			LIBDEFLATEAPI size_t
19481			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
19482			size_t in_nbytes);
19483
19484
19485			LIBDEFLATEAPI size_t
19486			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
19487			const void *in, size_t in_nbytes,
19488			void *out, size_t out_nbytes_avail);
19489
19490
19491			LIBDEFLATEAPI size_t
19492			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
19493			size_t in_nbytes);
19494
19495
19496			LIBDEFLATEAPI void
19497			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
19498
19499
19500
19501
19502
19503			struct libdeflate_decompressor;
19504			struct libdeflate_options;
19505
19506
19507			LIBDEFLATEAPI struct libdeflate_decompressor *
19508			libdeflate_alloc_decompressor(void);
19509
19510
19511			LIBDEFLATEAPI struct libdeflate_decompressor *
19512			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
19513
19514
19515			enum libdeflate_result {
19516
19517			LIBDEFLATE_SUCCESS = 0,
19518
19519
19520			LIBDEFLATE_BAD_DATA = 1,
19521
19522
19523			LIBDEFLATE_SHORT_OUTPUT = 2,
19524
19525
19526			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
19527			};
19528
19529
19530			LIBDEFLATEAPI enum libdeflate_result
19531			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
19532			const void *in, size_t in_nbytes,
19533			void *out, size_t out_nbytes_avail,
19534			size_t *actual_out_nbytes_ret);
19535
19536
19537			LIBDEFLATEAPI enum libdeflate_result
19538			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
19539			const void *in, size_t in_nbytes,
19540			void *out, size_t out_nbytes_avail,
19541			size_t *actual_in_nbytes_ret,
19542			size_t *actual_out_nbytes_ret);
19543
19544
19545			LIBDEFLATEAPI enum libdeflate_result
19546			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
19547			const void *in, size_t in_nbytes,
19548			void *out, size_t out_nbytes_avail,
19549			size_t *actual_out_nbytes_ret);
19550
19551
19552			LIBDEFLATEAPI enum libdeflate_result
19553			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
19554			const void *in, size_t in_nbytes,
19555			void *out, size_t out_nbytes_avail,
19556			size_t *actual_in_nbytes_ret,
19557			size_t *actual_out_nbytes_ret);
19558
19559
19560			LIBDEFLATEAPI enum libdeflate_result
19561			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
19562			const void *in, size_t in_nbytes,
19563			void *out, size_t out_nbytes_avail,
19564			size_t *actual_out_nbytes_ret);
19565
19566
19567			LIBDEFLATEAPI enum libdeflate_result
19568			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
19569			const void *in, size_t in_nbytes,
19570			void *out, size_t out_nbytes_avail,
19571			size_t *actual_in_nbytes_ret,
19572			size_t *actual_out_nbytes_ret);
19573
19574
19575			LIBDEFLATEAPI void
19576			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
19577
19578
19579
19580
19581
19582
19583			LIBDEFLATEAPI uint32_t
19584			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
19585
19586
19587
19588			LIBDEFLATEAPI uint32_t
19589			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
19590
19591
19592
19593
19594
19595
19596			LIBDEFLATEAPI void
19597			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
19598			void (free_func)(void ));
19599
19600
19601			struct libdeflate_options {
19602
19603
19604			size_t sizeof_options;
19605
19606
19607			void (malloc_func)(size_t);
19608			void (free_func)(void );
19609			};
19610
19611			#ifdef __cplusplus
19612			}
19613			#endif
19614
19615			#endif
19616
19617
19618			#include
19619			#include
19620			#include
19621			#ifdef _MSC_VER
19622			# include
19623			# include
19624
19625
19626			# pragma warning(disable : 4146)
19627
19628			# pragma warning(disable : 4018)
19629			# pragma warning(disable : 4244)
19630			# pragma warning(disable : 4267)
19631			# pragma warning(disable : 4310)
19632
19633			# pragma warning(disable : 4100)
19634			# pragma warning(disable : 4127)
19635			# pragma warning(disable : 4189)
19636			# pragma warning(disable : 4232)
19637			# pragma warning(disable : 4245)
19638			# pragma warning(disable : 4295)
19639			#endif
19640			#ifndef FREESTANDING
19641			# include
19642			#endif
19643
19644
19645
19646
19647
19648
19649			#undef ARCH_X86_64
19650			#undef ARCH_X86_32
19651			#undef ARCH_ARM64
19652			#undef ARCH_ARM32
19653			#undef ARCH_RISCV
19654			#ifdef _MSC_VER
19655
19656			# if defined(_M_X64) && !defined(_M_ARM64EC)
19657			# define ARCH_X86_64
19658			# elif defined(_M_IX86)
19659			# define ARCH_X86_32
19660			# elif defined(_M_ARM64)
19661			# define ARCH_ARM64
19662			# elif defined(_M_ARM)
19663			# define ARCH_ARM32
19664			# endif
19665			#else
19666			# if defined(__x86_64__)
19667			# define ARCH_X86_64
19668			# elif defined(__i386__)
19669			# define ARCH_X86_32
19670			# elif defined(__aarch64__)
19671			# define ARCH_ARM64
19672			# elif defined(__arm__)
19673			# define ARCH_ARM32
19674			# elif defined(__riscv)
19675			# define ARCH_RISCV
19676			# endif
19677			#endif
19678
19679
19680
19681
19682
19683
19684			typedef uint8_t u8;
19685			typedef uint16_t u16;
19686			typedef uint32_t u32;
19687			typedef uint64_t u64;
19688			typedef int8_t s8;
19689			typedef int16_t s16;
19690			typedef int32_t s32;
19691			typedef int64_t s64;
19692
19693
19694			#ifdef _MSC_VER
19695			# ifdef _WIN64
19696			typedef long long ssize_t;
19697			# else
19698			typedef long ssize_t;
19699			# endif
19700			#endif
19701
19702
19703			typedef size_t machine_word_t;
19704
19705
19706			#define WORDBYTES ((int)sizeof(machine_word_t))
19707
19708
19709			#define WORDBITS (8 * WORDBYTES)
19710
19711
19712
19713
19714
19715
19716			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
19717			# define GCC_PREREQ(major, minor) \
19718			(__GNUC__ > (major) \|\| \
19719			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
19720			# if !GCC_PREREQ(4, 9)
19721			# error "gcc versions older than 4.9 are no longer supported"
19722			# endif
19723			#else
19724			# define GCC_PREREQ(major, minor) 0
19725			#endif
19726			#ifdef __clang__
19727			# ifdef __apple_build_version__
19728			# define CLANG_PREREQ(major, minor, apple_version) \
19729			(__apple_build_version__ >= (apple_version))
19730			# else
19731			# define CLANG_PREREQ(major, minor, apple_version) \
19732			(__clang_major__ > (major) \|\| \
19733			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
19734			# endif
19735			# if !CLANG_PREREQ(3, 9, 8000000)
19736			# error "clang versions older than 3.9 are no longer supported"
19737			# endif
19738			#else
19739			# define CLANG_PREREQ(major, minor, apple_version) 0
19740			#endif
19741			#ifdef _MSC_VER
19742			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
19743			# if !MSVC_PREREQ(1900)
19744			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
19745			# endif
19746			#else
19747			# define MSVC_PREREQ(version) 0
19748			#endif
19749
19750
19751			#ifndef __has_attribute
19752			# define __has_attribute(attribute) 0
19753			#endif
19754
19755
19756			#ifndef __has_builtin
19757			# define __has_builtin(builtin) 0
19758			#endif
19759
19760
19761			#ifdef _MSC_VER
19762			# define inline __inline
19763			#endif
19764
19765
19766			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
19767			# define forceinline inline __attribute__((always_inline))
19768			#elif defined(_MSC_VER)
19769			# define forceinline __forceinline
19770			#else
19771			# define forceinline inline
19772			#endif
19773
19774
19775			#if defined(__GNUC__) \|\| __has_attribute(unused)
19776			# define MAYBE_UNUSED __attribute__((unused))
19777			#else
19778			# define MAYBE_UNUSED
19779			#endif
19780
19781
19782			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
19783			# define NORETURN __attribute__((noreturn))
19784			#else
19785			# define NORETURN
19786			#endif
19787
19788
19789			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
19790			# if defined(__GNUC__) \|\| defined(__clang__)
19791			# define restrict __restrict__
19792			# else
19793			# define restrict
19794			# endif
19795			#endif
19796
19797
19798			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
19799			# define likely(expr) __builtin_expect(!!(expr), 1)
19800			#else
19801			# define likely(expr) (expr)
19802			#endif
19803
19804
19805			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
19806			# define unlikely(expr) __builtin_expect(!!(expr), 0)
19807			#else
19808			# define unlikely(expr) (expr)
19809			#endif
19810
19811
19812			#undef prefetchr
19813			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
19814			# define prefetchr(addr) __builtin_prefetch((addr), 0)
19815			#elif defined(_MSC_VER)
19816			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
19817			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
19818			# elif defined(ARCH_ARM64)
19819			# define prefetchr(addr) __prefetch2((addr), 0x00 )
19820			# elif defined(ARCH_ARM32)
19821			# define prefetchr(addr) __prefetch(addr)
19822			# endif
19823			#endif
19824			#ifndef prefetchr
19825			# define prefetchr(addr)
19826			#endif
19827
19828
19829			#undef prefetchw
19830			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
19831			# define prefetchw(addr) __builtin_prefetch((addr), 1)
19832			#elif defined(_MSC_VER)
19833			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
19834			# define prefetchw(addr) _m_prefetchw(addr)
19835			# elif defined(ARCH_ARM64)
19836			# define prefetchw(addr) __prefetch2((addr), 0x10 )
19837			# elif defined(ARCH_ARM32)
19838			# define prefetchw(addr) __prefetchw(addr)
19839			# endif
19840			#endif
19841			#ifndef prefetchw
19842			# define prefetchw(addr)
19843			#endif
19844
19845
19846			#undef _aligned_attribute
19847			#if defined(__GNUC__) \|\| __has_attribute(aligned)
19848			# define _aligned_attribute(n) __attribute__((aligned(n)))
19849			#elif defined(_MSC_VER)
19850			# define _aligned_attribute(n) __declspec(align(n))
19851			#endif
19852
19853
19854			#if defined(__GNUC__) \|\| __has_attribute(target)
19855			# define _target_attribute(attrs) __attribute__((target(attrs)))
19856			#else
19857			# define _target_attribute(attrs)
19858			#endif
19859
19860
19861
19862
19863
19864			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
19865			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
19866			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
19867			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
19868			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
19869			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
19870			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
19871
19872
19873
19874
19875
19876
19877			#if defined(__BYTE_ORDER__)
19878			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
19879			#elif defined(_MSC_VER)
19880			# define CPU_IS_LITTLE_ENDIAN() true
19881			#else
19882			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
19883			{
19884			union {
19885			u32 w;
19886			u8 b;
19887			} u;
19888
19889			u.w = 1;
19890			return u.b;
19891			}
19892			#endif
19893
19894
19895			static forceinline u16 bswap16(u16 v)
19896			{
19897			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
19898			return __builtin_bswap16(v);
19899			#elif defined(_MSC_VER)
19900			return _byteswap_ushort(v);
19901			#else
19902			return (v << 8) \| (v >> 8);
19903			#endif
19904			}
19905
19906
19907			static forceinline u32 bswap32(u32 v)
19908			{
19909			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
19910			return __builtin_bswap32(v);
19911			#elif defined(_MSC_VER)
19912			return _byteswap_ulong(v);
19913			#else
19914			return ((v & 0x000000FF) << 24) \|
19915			((v & 0x0000FF00) << 8) \|
19916			((v & 0x00FF0000) >> 8) \|
19917			((v & 0xFF000000) >> 24);
19918			#endif
19919			}
19920
19921
19922			static forceinline u64 bswap64(u64 v)
19923			{
19924			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
19925			return __builtin_bswap64(v);
19926			#elif defined(_MSC_VER)
19927			return _byteswap_uint64(v);
19928			#else
19929			return ((v & 0x00000000000000FF) << 56) \|
19930			((v & 0x000000000000FF00) << 40) \|
19931			((v & 0x0000000000FF0000) << 24) \|
19932			((v & 0x00000000FF000000) << 8) \|
19933			((v & 0x000000FF00000000) >> 8) \|
19934			((v & 0x0000FF0000000000) >> 24) \|
19935			((v & 0x00FF000000000000) >> 40) \|
19936			((v & 0xFF00000000000000) >> 56);
19937			#endif
19938			}
19939
19940			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
19941			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
19942			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
19943			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
19944			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
19945			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
19946
19947
19948
19949
19950
19951
19952			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
19953			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
19954			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
19955			defined(__riscv_misaligned_fast) \|\| \
19956			defined(__wasm__))
19957			# define UNALIGNED_ACCESS_IS_FAST 1
19958			#elif defined(_MSC_VER)
19959			# define UNALIGNED_ACCESS_IS_FAST 1
19960			#else
19961			# define UNALIGNED_ACCESS_IS_FAST 0
19962			#endif
19963
19964
19965
19966			#ifdef FREESTANDING
19967			# define MEMCOPY __builtin_memcpy
19968			#else
19969			# define MEMCOPY memcpy
19970			#endif
19971
19972
19973
19974			#define DEFINE_UNALIGNED_TYPE(type) \
19975			static forceinline type \
19976			load_##type##_unaligned(const void *p) \
19977			{ \
19978			type v; \
19979			\
19980			MEMCOPY(&v, p, sizeof(v)); \
19981			return v; \
19982			} \
19983			\
19984			static forceinline void \
19985			store_##type##_unaligned(type v, void *p) \
19986			{ \
19987			MEMCOPY(p, &v, sizeof(v)); \
19988			}
19989
19990			DEFINE_UNALIGNED_TYPE(u16)
19991			DEFINE_UNALIGNED_TYPE(u32)
19992			DEFINE_UNALIGNED_TYPE(u64)
19993			DEFINE_UNALIGNED_TYPE(machine_word_t)
19994
19995			#undef MEMCOPY
19996
19997			#define load_word_unaligned load_machine_word_t_unaligned
19998			#define store_word_unaligned store_machine_word_t_unaligned
19999
20000
20001
20002			static forceinline u16
20003			get_unaligned_le16(const u8 *p)
20004			{
20005			if (UNALIGNED_ACCESS_IS_FAST)
20006			return le16_bswap(load_u16_unaligned(p));
20007			else
20008			return ((u16)p[1] << 8) \| p[0];
20009			}
20010
20011			static forceinline u16
20012			get_unaligned_be16(const u8 *p)
20013			{
20014			if (UNALIGNED_ACCESS_IS_FAST)
20015			return be16_bswap(load_u16_unaligned(p));
20016			else
20017			return ((u16)p[0] << 8) \| p[1];
20018			}
20019
20020			static forceinline u32
20021			get_unaligned_le32(const u8 *p)
20022			{
20023			if (UNALIGNED_ACCESS_IS_FAST)
20024			return le32_bswap(load_u32_unaligned(p));
20025			else
20026			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
20027			((u32)p[1] << 8) \| p[0];
20028			}
20029
20030			static forceinline u32
20031			get_unaligned_be32(const u8 *p)
20032			{
20033			if (UNALIGNED_ACCESS_IS_FAST)
20034			return be32_bswap(load_u32_unaligned(p));
20035			else
20036			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
20037			((u32)p[2] << 8) \| p[3];
20038			}
20039
20040			static forceinline u64
20041			get_unaligned_le64(const u8 *p)
20042			{
20043			if (UNALIGNED_ACCESS_IS_FAST)
20044			return le64_bswap(load_u64_unaligned(p));
20045			else
20046			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
20047			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
20048			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
20049			((u64)p[1] << 8) \| p[0];
20050			}
20051
20052			static forceinline machine_word_t
20053			get_unaligned_leword(const u8 *p)
20054			{
20055			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
20056			if (WORDBITS == 32)
20057			return get_unaligned_le32(p);
20058			else
20059			return get_unaligned_le64(p);
20060			}
20061
20062
20063
20064			static forceinline void
20065			put_unaligned_le16(u16 v, u8 *p)
20066			{
20067			if (UNALIGNED_ACCESS_IS_FAST) {
20068			store_u16_unaligned(le16_bswap(v), p);
20069			} else {
20070			p[0] = (u8)(v >> 0);
20071			p[1] = (u8)(v >> 8);
20072			}
20073			}
20074
20075			static forceinline void
20076			put_unaligned_be16(u16 v, u8 *p)
20077			{
20078			if (UNALIGNED_ACCESS_IS_FAST) {
20079			store_u16_unaligned(be16_bswap(v), p);
20080			} else {
20081			p[0] = (u8)(v >> 8);
20082			p[1] = (u8)(v >> 0);
20083			}
20084			}
20085
20086			static forceinline void
20087			put_unaligned_le32(u32 v, u8 *p)
20088			{
20089			if (UNALIGNED_ACCESS_IS_FAST) {
20090			store_u32_unaligned(le32_bswap(v), p);
20091			} else {
20092			p[0] = (u8)(v >> 0);
20093			p[1] = (u8)(v >> 8);
20094			p[2] = (u8)(v >> 16);
20095			p[3] = (u8)(v >> 24);
20096			}
20097			}
20098
20099			static forceinline void
20100			put_unaligned_be32(u32 v, u8 *p)
20101			{
20102			if (UNALIGNED_ACCESS_IS_FAST) {
20103			store_u32_unaligned(be32_bswap(v), p);
20104			} else {
20105			p[0] = (u8)(v >> 24);
20106			p[1] = (u8)(v >> 16);
20107			p[2] = (u8)(v >> 8);
20108			p[3] = (u8)(v >> 0);
20109			}
20110			}
20111
20112			static forceinline void
20113			put_unaligned_le64(u64 v, u8 *p)
20114			{
20115			if (UNALIGNED_ACCESS_IS_FAST) {
20116			store_u64_unaligned(le64_bswap(v), p);
20117			} else {
20118			p[0] = (u8)(v >> 0);
20119			p[1] = (u8)(v >> 8);
20120			p[2] = (u8)(v >> 16);
20121			p[3] = (u8)(v >> 24);
20122			p[4] = (u8)(v >> 32);
20123			p[5] = (u8)(v >> 40);
20124			p[6] = (u8)(v >> 48);
20125			p[7] = (u8)(v >> 56);
20126			}
20127			}
20128
20129			static forceinline void
20130			put_unaligned_leword(machine_word_t v, u8 *p)
20131			{
20132			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
20133			if (WORDBITS == 32)
20134			put_unaligned_le32(v, p);
20135			else
20136			put_unaligned_le64(v, p);
20137			}
20138
20139
20140
20141
20142
20143
20144
20145			static forceinline unsigned
20146			bsr32(u32 v)
20147			{
20148			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
20149			return 31 - __builtin_clz(v);
20150			#elif defined(_MSC_VER)
20151			unsigned long i;
20152
20153			_BitScanReverse(&i, v);
20154			return i;
20155			#else
20156			unsigned i = 0;
20157
20158			while ((v >>= 1) != 0)
20159			i++;
20160			return i;
20161			#endif
20162			}
20163
20164			static forceinline unsigned
20165			bsr64(u64 v)
20166			{
20167			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
20168			return 63 - __builtin_clzll(v);
20169			#elif defined(_MSC_VER) && defined(_WIN64)
20170			unsigned long i;
20171
20172			_BitScanReverse64(&i, v);
20173			return i;
20174			#else
20175			unsigned i = 0;
20176
20177			while ((v >>= 1) != 0)
20178			i++;
20179			return i;
20180			#endif
20181			}
20182
20183			static forceinline unsigned
20184			bsrw(machine_word_t v)
20185			{
20186			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
20187			if (WORDBITS == 32)
20188			return bsr32(v);
20189			else
20190			return bsr64(v);
20191			}
20192
20193
20194
20195			static forceinline unsigned
20196			bsf32(u32 v)
20197			{
20198			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
20199			return __builtin_ctz(v);
20200			#elif defined(_MSC_VER)
20201			unsigned long i;
20202
20203			_BitScanForward(&i, v);
20204			return i;
20205			#else
20206			unsigned i = 0;
20207
20208			for (; (v & 1) == 0; v >>= 1)
20209			i++;
20210			return i;
20211			#endif
20212			}
20213
20214			static forceinline unsigned
20215			bsf64(u64 v)
20216			{
20217			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
20218			return __builtin_ctzll(v);
20219			#elif defined(_MSC_VER) && defined(_WIN64)
20220			unsigned long i;
20221
20222			_BitScanForward64(&i, v);
20223			return i;
20224			#else
20225			unsigned i = 0;
20226
20227			for (; (v & 1) == 0; v >>= 1)
20228			i++;
20229			return i;
20230			#endif
20231			}
20232
20233			static forceinline unsigned
20234			bsfw(machine_word_t v)
20235			{
20236			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
20237			if (WORDBITS == 32)
20238			return bsf32(v);
20239			else
20240			return bsf64(v);
20241			}
20242
20243
20244			#undef rbit32
20245			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
20246			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
20247			static forceinline u32
20248			rbit32(u32 v)
20249			{
20250			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
20251			return v;
20252			}
20253			#define rbit32 rbit32
20254			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
20255			static forceinline u32
20256			rbit32(u32 v)
20257			{
20258			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
20259			return v;
20260			}
20261			#define rbit32 rbit32
20262			#endif
20263
20264			#endif
20265
20266
20267			typedef void (malloc_func_t)(size_t);
20268			typedef void (free_func_t)(void );
20269
20270			extern malloc_func_t libdeflate_default_malloc_func;
20271			extern free_func_t libdeflate_default_free_func;
20272
20273			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
20274			size_t alignment, size_t size);
20275			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
20276
20277			#ifdef FREESTANDING
20278
20279			void memset(void s, int c, size_t n);
20280			#define memset(s, c, n) __builtin_memset((s), (c), (n))
20281
20282			void memcpy(void dest, const void *src, size_t n);
20283			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
20284
20285			void memmove(void dest, const void *src, size_t n);
20286			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
20287
20288			int memcmp(const void s1, const void s2, size_t n);
20289			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
20290
20291			#undef LIBDEFLATE_ENABLE_ASSERTIONS
20292			#else
20293			# include
20294
20295			# ifdef __clang_analyzer__
20296			# define LIBDEFLATE_ENABLE_ASSERTIONS
20297			# endif
20298			#endif
20299
20300
20301			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
20302			NORETURN void
20303			libdeflate_assertion_failed(const char expr, const char file, int line);
20304			#define ASSERT(expr) { if (unlikely(!(expr))) \
20305			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
20306			#else
20307			#define ASSERT(expr) (void)(expr)
20308			#endif
20309
20310			#define CONCAT_IMPL(a, b) a##b
20311			#define CONCAT(a, b) CONCAT_IMPL(a, b)
20312			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
20313
20314			#endif
20315
20316
20317			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
20318
20319			#define X86_CPU_FEATURE_SSE2 (1 << 0)
20320			#define X86_CPU_FEATURE_PCLMULQDQ (1 << 1)
20321			#define X86_CPU_FEATURE_AVX (1 << 2)
20322			#define X86_CPU_FEATURE_AVX2 (1 << 3)
20323			#define X86_CPU_FEATURE_BMI2 (1 << 4)
20324
20325			#define X86_CPU_FEATURE_ZMM (1 << 5)
20326			#define X86_CPU_FEATURE_AVX512BW (1 << 6)
20327			#define X86_CPU_FEATURE_AVX512VL (1 << 7)
20328			#define X86_CPU_FEATURE_VPCLMULQDQ (1 << 8)
20329			#define X86_CPU_FEATURE_AVX512VNNI (1 << 9)
20330			#define X86_CPU_FEATURE_AVXVNNI (1 << 10)
20331
20332			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
20333
20334			# define X86_CPU_FEATURES_KNOWN (1U << 31)
20335			extern volatile u32 libdeflate_x86_cpu_features;
20336
20337			void libdeflate_init_x86_cpu_features(void);
20338
20339			static inline u32 get_x86_cpu_features(void)
20340			{
20341			if (libdeflate_x86_cpu_features == 0)
20342			libdeflate_init_x86_cpu_features();
20343			return libdeflate_x86_cpu_features;
20344			}
20345
20346			# include
20347			# if defined(_MSC_VER) && defined(__clang__)
20348			# include
20349			# include
20350			# include
20351			# include
20352			# include
20353			# include
20354			# include
20355			# include
20356			# if __has_include()
20357			# include
20358			# endif
20359			# if __has_include()
20360			# include
20361			# endif
20362			# if __has_include()
20363			# include
20364			# endif
20365			# if __has_include()
20366			# include
20367			# endif
20368			# if __has_include()
20369			# include
20370			# endif
20371			# endif
20372			#else
20373			static inline u32 get_x86_cpu_features(void) { return 0; }
20374			#endif
20375
20376			#if defined(__SSE2__) \|\| \
20377			(defined(_MSC_VER) && \
20378			(defined(ARCH_X86_64) \|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
20379			# define HAVE_SSE2(features) 1
20380			# define HAVE_SSE2_NATIVE 1
20381			#else
20382			# define HAVE_SSE2(features) ((features) & X86_CPU_FEATURE_SSE2)
20383			# define HAVE_SSE2_NATIVE 0
20384			#endif
20385
20386			#if (defined(__PCLMUL__) && defined(__SSE4_1__)) \|\| \
20387			(defined(_MSC_VER) && defined(__AVX2__))
20388			# define HAVE_PCLMULQDQ(features) 1
20389			#else
20390			# define HAVE_PCLMULQDQ(features) ((features) & X86_CPU_FEATURE_PCLMULQDQ)
20391			#endif
20392
20393			#ifdef __AVX__
20394			# define HAVE_AVX(features) 1
20395			#else
20396			# define HAVE_AVX(features) ((features) & X86_CPU_FEATURE_AVX)
20397			#endif
20398
20399			#ifdef __AVX2__
20400			# define HAVE_AVX2(features) 1
20401			#else
20402			# define HAVE_AVX2(features) ((features) & X86_CPU_FEATURE_AVX2)
20403			#endif
20404
20405			#if defined(__BMI2__) \|\| (defined(_MSC_VER) && defined(__AVX2__))
20406			# define HAVE_BMI2(features) 1
20407			# define HAVE_BMI2_NATIVE 1
20408			#else
20409			# define HAVE_BMI2(features) ((features) & X86_CPU_FEATURE_BMI2)
20410			# define HAVE_BMI2_NATIVE 0
20411			#endif
20412
20413			#ifdef __AVX512BW__
20414			# define HAVE_AVX512BW(features) 1
20415			#else
20416			# define HAVE_AVX512BW(features) ((features) & X86_CPU_FEATURE_AVX512BW)
20417			#endif
20418
20419			#ifdef __AVX512VL__
20420			# define HAVE_AVX512VL(features) 1
20421			#else
20422			# define HAVE_AVX512VL(features) ((features) & X86_CPU_FEATURE_AVX512VL)
20423			#endif
20424
20425			#ifdef __VPCLMULQDQ__
20426			# define HAVE_VPCLMULQDQ(features) 1
20427			#else
20428			# define HAVE_VPCLMULQDQ(features) ((features) & X86_CPU_FEATURE_VPCLMULQDQ)
20429			#endif
20430
20431			#ifdef __AVX512VNNI__
20432			# define HAVE_AVX512VNNI(features) 1
20433			#else
20434			# define HAVE_AVX512VNNI(features) ((features) & X86_CPU_FEATURE_AVX512VNNI)
20435			#endif
20436
20437			#ifdef __AVXVNNI__
20438			# define HAVE_AVXVNNI(features) 1
20439			#else
20440			# define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI)
20441			#endif
20442
20443			#endif
20444
20445			#endif
20446
20447
20448			#ifdef __AVX2__
20449			static forceinline void
20450			matchfinder_init_avx2(mf_pos_t *data, size_t size)
20451			{
20452			__m256i p = (__m256i )data;
20453			__m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
20454
20455			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
20456			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
20457			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
20458
20459			do {
20460			p[0] = v;
20461			p[1] = v;
20462			p[2] = v;
20463			p[3] = v;
20464			p += 4;
20465			size -= 4 * sizeof(*p);
20466			} while (size != 0);
20467			}
20468			#define matchfinder_init matchfinder_init_avx2
20469
20470			static forceinline void
20471			matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
20472			{
20473			__m256i p = (__m256i )data;
20474			__m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
20475
20476			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
20477			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
20478			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
20479
20480			do {
20481
20482			p[0] = _mm256_adds_epi16(p[0], v);
20483			p[1] = _mm256_adds_epi16(p[1], v);
20484			p[2] = _mm256_adds_epi16(p[2], v);
20485			p[3] = _mm256_adds_epi16(p[3], v);
20486			p += 4;
20487			size -= 4 * sizeof(*p);
20488			} while (size != 0);
20489			}
20490			#define matchfinder_rebase matchfinder_rebase_avx2
20491
20492			#elif HAVE_SSE2_NATIVE
20493			static forceinline void
20494			matchfinder_init_sse2(mf_pos_t *data, size_t size)
20495			{
20496			__m128i p = (__m128i )data;
20497			__m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
20498
20499			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
20500			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
20501			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
20502
20503			do {
20504			p[0] = v;
20505			p[1] = v;
20506			p[2] = v;
20507			p[3] = v;
20508			p += 4;
20509			size -= 4 * sizeof(*p);
20510			} while (size != 0);
20511			}
20512			#define matchfinder_init matchfinder_init_sse2
20513
20514			static forceinline void
20515			matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
20516			{
20517			__m128i p = (__m128i )data;
20518			__m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
20519
20520			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
20521			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
20522			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
20523
20524			do {
20525
20526			p[0] = _mm_adds_epi16(p[0], v);
20527			p[1] = _mm_adds_epi16(p[1], v);
20528			p[2] = _mm_adds_epi16(p[2], v);
20529			p[3] = _mm_adds_epi16(p[3], v);
20530			p += 4;
20531			size -= 4 * sizeof(*p);
20532			} while (size != 0);
20533			}
20534			#define matchfinder_rebase matchfinder_rebase_sse2
20535			#endif
20536
20537			#endif
20538
20539			# endif
20540			#else
20541			# define MATCHFINDER_ALIGNED
20542			#endif
20543
20544
20545			#ifndef matchfinder_init
20546			static forceinline void
20547			matchfinder_init(mf_pos_t *data, size_t size)
20548			{
20549			size_t num_entries = size / sizeof(*data);
20550			size_t i;
20551
20552			for (i = 0; i < num_entries; i++)
20553			data[i] = MATCHFINDER_INITVAL;
20554			}
20555			#endif
20556
20557
20558			#ifndef matchfinder_rebase
20559			static forceinline void
20560			matchfinder_rebase(mf_pos_t *data, size_t size)
20561			{
20562			size_t num_entries = size / sizeof(*data);
20563			size_t i;
20564
20565			if (MATCHFINDER_WINDOW_SIZE == 32768) {
20566
20567			for (i = 0; i < num_entries; i++)
20568			data[i] = 0x8000 \| (data[i] & ~(data[i] >> 15));
20569			} else {
20570			for (i = 0; i < num_entries; i++) {
20571			if (data[i] >= 0)
20572			data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
20573			else
20574			data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
20575			}
20576			}
20577			}
20578			#endif
20579
20580
20581			static forceinline u32
20582			lz_hash(u32 seq, unsigned num_bits)
20583			{
20584			return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
20585			}
20586
20587
20588			static forceinline u32
20589			lz_extend(const u8 * const strptr, const u8 * const matchptr,
20590			const u32 start_len, const u32 max_len)
20591			{
20592			u32 len = start_len;
20593			machine_word_t v_word;
20594
20595			if (UNALIGNED_ACCESS_IS_FAST) {
20596
20597			if (likely(max_len - len >= 4 * WORDBYTES)) {
20598
20599			#define COMPARE_WORD_STEP \
20600			v_word = load_word_unaligned(&matchptr[len]) ^ \
20601			load_word_unaligned(&strptr[len]); \
20602			if (v_word != 0) \
20603			goto word_differs; \
20604			len += WORDBYTES; \
20605
20606			COMPARE_WORD_STEP
20607			COMPARE_WORD_STEP
20608			COMPARE_WORD_STEP
20609			COMPARE_WORD_STEP
20610			#undef COMPARE_WORD_STEP
20611			}
20612
20613			while (len + WORDBYTES <= max_len) {
20614			v_word = load_word_unaligned(&matchptr[len]) ^
20615			load_word_unaligned(&strptr[len]);
20616			if (v_word != 0)
20617			goto word_differs;
20618			len += WORDBYTES;
20619			}
20620			}
20621
20622			while (len < max_len && matchptr[len] == strptr[len])
20623			len++;
20624			return len;
20625
20626			word_differs:
20627			if (CPU_IS_LITTLE_ENDIAN())
20628			len += (bsfw(v_word) >> 3);
20629			else
20630			len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
20631			return len;
20632			}
20633
20634			#endif
20635
20636
20637			#define HT_MATCHFINDER_HASH_ORDER 15
20638			#define HT_MATCHFINDER_BUCKET_SIZE 2
20639
20640			#define HT_MATCHFINDER_MIN_MATCH_LEN 4
20641
20642			#define HT_MATCHFINDER_REQUIRED_NBYTES 5
20643
20644			struct MATCHFINDER_ALIGNED ht_matchfinder {
20645			mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
20646			[HT_MATCHFINDER_BUCKET_SIZE];
20647			};
20648
20649			static forceinline void
20650			ht_matchfinder_init(struct ht_matchfinder *mf)
20651			{
20652			STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
20653
20654			matchfinder_init((mf_pos_t )mf, sizeof(mf));
20655	3		}
20656
20657			static forceinline void
20658			ht_matchfinder_slide_window(struct ht_matchfinder *mf)
20659			{
20660			matchfinder_rebase((mf_pos_t )mf, sizeof(mf));
20661	0		}
20662
20663
20664			static forceinline u32
20665			ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
20666			const u8 ** const in_base_p,
20667			const u8 * const in_next,
20668			const u32 max_len,
20669			const u32 nice_len,
20670			u32 * const next_hash,
20671			u32 * const offset_ret)
20672			{
20673	75		u32 best_len = 0;
20674	75		const u8 *best_matchptr = in_next;
20675	75		u32 cur_pos = in_next - *in_base_p;
20676			const u8 *in_base;
20677			mf_pos_t cutoff;
20678			u32 hash;
20679			u32 seq;
20680			mf_pos_t cur_node;
20681			const u8 *matchptr;
20682			#if HT_MATCHFINDER_BUCKET_SIZE > 1
20683			mf_pos_t to_insert;
20684			u32 len;
20685			#endif
20686			#if HT_MATCHFINDER_BUCKET_SIZE > 2
20687			int i;
20688			#endif
20689
20690
20691			STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
20692
20693	75		if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
20694			ht_matchfinder_slide_window(mf);
20695	0		*in_base_p += MATCHFINDER_WINDOW_SIZE;
20696	0		cur_pos = 0;
20697			}
20698	75		in_base = *in_base_p;
20699	75		cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
20700
20701	75		hash = *next_hash;
20702			STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
20703	225		*next_hash = lz_hash(get_unaligned_le32(in_next + 1),
20704			HT_MATCHFINDER_HASH_ORDER);
20705	75		seq = load_u32_unaligned(in_next);
20706	75		prefetchw(&mf->hash_tab[*next_hash]);
20707			#if HT_MATCHFINDER_BUCKET_SIZE == 1
20708
20709			cur_node = mf->hash_tab[hash][0];
20710			mf->hash_tab[hash][0] = cur_pos;
20711			if (cur_node <= cutoff)
20712			goto out;
20713			matchptr = &in_base[cur_node];
20714			if (load_u32_unaligned(matchptr) == seq) {
20715			best_len = lz_extend(in_next, matchptr, 4, max_len);
20716			best_matchptr = matchptr;
20717			}
20718			#elif HT_MATCHFINDER_BUCKET_SIZE == 2
20719
20720	75		cur_node = mf->hash_tab[hash][0];
20721	75		mf->hash_tab[hash][0] = cur_pos;
20722	75	100	if (cur_node <= cutoff)
20723	54		goto out;
20724	21		matchptr = &in_base[cur_node];
20725
20726	21		to_insert = cur_node;
20727	21		cur_node = mf->hash_tab[hash][1];
20728	21		mf->hash_tab[hash][1] = to_insert;
20729
20730	21	50	if (load_u32_unaligned(matchptr) == seq) {
20731	21		best_len = lz_extend(in_next, matchptr, 4, max_len);
20732	21		best_matchptr = matchptr;
20733	21	100	if (cur_node <= cutoff \|\| best_len >= nice_len)
		50
20734	21		goto out;
20735	0		matchptr = &in_base[cur_node];
20736	0	0	if (load_u32_unaligned(matchptr) == seq &&
20737	0		load_u32_unaligned(matchptr + best_len - 3) ==
20738	0	0	load_u32_unaligned(in_next + best_len - 3)) {
20739	0		len = lz_extend(in_next, matchptr, 4, max_len);
20740	0	0	if (len > best_len) {
20741	0		best_len = len;
20742	0		best_matchptr = matchptr;
20743			}
20744			}
20745			} else {
20746	0	0	if (cur_node <= cutoff)
20747	0		goto out;
20748	0		matchptr = &in_base[cur_node];
20749	0	0	if (load_u32_unaligned(matchptr) == seq) {
20750	0		best_len = lz_extend(in_next, matchptr, 4, max_len);
20751	0		best_matchptr = matchptr;
20752			}
20753			}
20754			#else
20755
20756			to_insert = cur_pos;
20757			for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
20758			cur_node = mf->hash_tab[hash][i];
20759			mf->hash_tab[hash][i] = to_insert;
20760			if (cur_node <= cutoff)
20761			goto out;
20762			matchptr = &in_base[cur_node];
20763			if (load_u32_unaligned(matchptr) == seq) {
20764			len = lz_extend(in_next, matchptr, 4, max_len);
20765			if (len > best_len) {
20766			best_len = len;
20767			best_matchptr = matchptr;
20768			if (best_len >= nice_len)
20769			goto out;
20770			}
20771			}
20772			to_insert = cur_node;
20773			}
20774			#endif
20775	0		out:
20776	75		*offset_ret = in_next - best_matchptr;
20777	75		return best_len;
20778			}
20779
20780			static forceinline void
20781			ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
20782			const u8 ** const in_base_p,
20783			const u8 *in_next,
20784			const u8 * const in_end,
20785			const u32 count,
20786			u32 * const next_hash)
20787			{
20788	21		s32 cur_pos = in_next - *in_base_p;
20789			u32 hash;
20790	21		u32 remaining = count;
20791			int i;
20792
20793	21		if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
20794	3		return;
20795
20796	18	50	if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
20797			ht_matchfinder_slide_window(mf);
20798	0		*in_base_p += MATCHFINDER_WINDOW_SIZE;
20799	0		cur_pos -= MATCHFINDER_WINDOW_SIZE;
20800			}
20801
20802	18		hash = *next_hash;
20803			do {
20804	9252	100	for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
20805	4626		mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
20806	4626		mf->hash_tab[hash][0] = cur_pos;
20807
20808	9252		hash = lz_hash(get_unaligned_le32(++in_next),
20809			HT_MATCHFINDER_HASH_ORDER);
20810	4626		cur_pos++;
20811	4626	100	} while (--remaining);
20812
20813	18		prefetchw(&mf->hash_tab[hash]);
20814	18		*next_hash = hash;
20815			}
20816
20817			#endif
20818
20819			#if SUPPORT_NEAR_OPTIMAL_PARSING
20820			/* # include "bt_matchfinder.h" */
20821
20822
20823			#ifndef LIB_BT_MATCHFINDER_H
20824			#define LIB_BT_MATCHFINDER_H
20825
20826			/* #include "matchfinder_common.h" */
20827
20828
20829			#ifndef LIB_MATCHFINDER_COMMON_H
20830			#define LIB_MATCHFINDER_COMMON_H
20831
20832			/* #include "lib_common.h" */
20833
20834
20835			#ifndef LIB_LIB_COMMON_H
20836			#define LIB_LIB_COMMON_H
20837
20838			#ifdef LIBDEFLATE_H
20839
20840			# error "lib_common.h must always be included before libdeflate.h"
20841			#endif
20842
20843			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
20844			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
20845			#elif defined(__GNUC__)
20846			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
20847			#else
20848			# define LIBDEFLATE_EXPORT_SYM
20849			#endif
20850
20851
20852			#if defined(__GNUC__) && defined(__i386__)
20853			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
20854			#else
20855			# define LIBDEFLATE_ALIGN_STACK
20856			#endif
20857
20858			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
20859
20860			/* #include "../common_defs.h" */
20861
20862
20863			#ifndef COMMON_DEFS_H
20864			#define COMMON_DEFS_H
20865
20866			/* #include "libdeflate.h" */
20867
20868
20869			#ifndef LIBDEFLATE_H
20870			#define LIBDEFLATE_H
20871
20872			#include
20873			#include
20874
20875			#ifdef __cplusplus
20876			extern "C" {
20877			#endif
20878
20879			#define LIBDEFLATE_VERSION_MAJOR 1
20880			#define LIBDEFLATE_VERSION_MINOR 25
20881			#define LIBDEFLATE_VERSION_STRING "1.25"
20882
20883
20884			#ifndef LIBDEFLATEAPI
20885			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
20886			# define LIBDEFLATEAPI __declspec(dllimport)
20887			# else
20888			# define LIBDEFLATEAPI
20889			# endif
20890			#endif
20891
20892
20893
20894
20895
20896			struct libdeflate_compressor;
20897			struct libdeflate_options;
20898
20899
20900			LIBDEFLATEAPI struct libdeflate_compressor *
20901			libdeflate_alloc_compressor(int compression_level);
20902
20903
20904			LIBDEFLATEAPI struct libdeflate_compressor *
20905			libdeflate_alloc_compressor_ex(int compression_level,
20906			const struct libdeflate_options *options);
20907
20908
20909			LIBDEFLATEAPI size_t
20910			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
20911			const void *in, size_t in_nbytes,
20912			void *out, size_t out_nbytes_avail);
20913
20914
20915			LIBDEFLATEAPI size_t
20916			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
20917			size_t in_nbytes);
20918
20919
20920			LIBDEFLATEAPI size_t
20921			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
20922			const void *in, size_t in_nbytes,
20923			void *out, size_t out_nbytes_avail);
20924
20925
20926			LIBDEFLATEAPI size_t
20927			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
20928			size_t in_nbytes);
20929
20930
20931			LIBDEFLATEAPI size_t
20932			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
20933			const void *in, size_t in_nbytes,
20934			void *out, size_t out_nbytes_avail);
20935
20936
20937			LIBDEFLATEAPI size_t
20938			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
20939			size_t in_nbytes);
20940
20941
20942			LIBDEFLATEAPI void
20943			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
20944
20945
20946
20947
20948
20949			struct libdeflate_decompressor;
20950			struct libdeflate_options;
20951
20952
20953			LIBDEFLATEAPI struct libdeflate_decompressor *
20954			libdeflate_alloc_decompressor(void);
20955
20956
20957			LIBDEFLATEAPI struct libdeflate_decompressor *
20958			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
20959
20960
20961			enum libdeflate_result {
20962
20963			LIBDEFLATE_SUCCESS = 0,
20964
20965
20966			LIBDEFLATE_BAD_DATA = 1,
20967
20968
20969			LIBDEFLATE_SHORT_OUTPUT = 2,
20970
20971
20972			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
20973			};
20974
20975
20976			LIBDEFLATEAPI enum libdeflate_result
20977			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
20978			const void *in, size_t in_nbytes,
20979			void *out, size_t out_nbytes_avail,
20980			size_t *actual_out_nbytes_ret);
20981
20982
20983			LIBDEFLATEAPI enum libdeflate_result
20984			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
20985			const void *in, size_t in_nbytes,
20986			void *out, size_t out_nbytes_avail,
20987			size_t *actual_in_nbytes_ret,
20988			size_t *actual_out_nbytes_ret);
20989
20990
20991			LIBDEFLATEAPI enum libdeflate_result
20992			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
20993			const void *in, size_t in_nbytes,
20994			void *out, size_t out_nbytes_avail,
20995			size_t *actual_out_nbytes_ret);
20996
20997
20998			LIBDEFLATEAPI enum libdeflate_result
20999			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
21000			const void *in, size_t in_nbytes,
21001			void *out, size_t out_nbytes_avail,
21002			size_t *actual_in_nbytes_ret,
21003			size_t *actual_out_nbytes_ret);
21004
21005
21006			LIBDEFLATEAPI enum libdeflate_result
21007			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
21008			const void *in, size_t in_nbytes,
21009			void *out, size_t out_nbytes_avail,
21010			size_t *actual_out_nbytes_ret);
21011
21012
21013			LIBDEFLATEAPI enum libdeflate_result
21014			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
21015			const void *in, size_t in_nbytes,
21016			void *out, size_t out_nbytes_avail,
21017			size_t *actual_in_nbytes_ret,
21018			size_t *actual_out_nbytes_ret);
21019
21020
21021			LIBDEFLATEAPI void
21022			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
21023
21024
21025
21026
21027
21028
21029			LIBDEFLATEAPI uint32_t
21030			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
21031
21032
21033
21034			LIBDEFLATEAPI uint32_t
21035			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
21036
21037
21038
21039
21040
21041
21042			LIBDEFLATEAPI void
21043			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
21044			void (free_func)(void ));
21045
21046
21047			struct libdeflate_options {
21048
21049
21050			size_t sizeof_options;
21051
21052
21053			void (malloc_func)(size_t);
21054			void (free_func)(void );
21055			};
21056
21057			#ifdef __cplusplus
21058			}
21059			#endif
21060
21061			#endif
21062
21063
21064			#include
21065			#include
21066			#include
21067			#ifdef _MSC_VER
21068			# include
21069			# include
21070
21071
21072			# pragma warning(disable : 4146)
21073
21074			# pragma warning(disable : 4018)
21075			# pragma warning(disable : 4244)
21076			# pragma warning(disable : 4267)
21077			# pragma warning(disable : 4310)
21078
21079			# pragma warning(disable : 4100)
21080			# pragma warning(disable : 4127)
21081			# pragma warning(disable : 4189)
21082			# pragma warning(disable : 4232)
21083			# pragma warning(disable : 4245)
21084			# pragma warning(disable : 4295)
21085			#endif
21086			#ifndef FREESTANDING
21087			# include
21088			#endif
21089
21090
21091
21092
21093
21094
21095			#undef ARCH_X86_64
21096			#undef ARCH_X86_32
21097			#undef ARCH_ARM64
21098			#undef ARCH_ARM32
21099			#undef ARCH_RISCV
21100			#ifdef _MSC_VER
21101
21102			# if defined(_M_X64) && !defined(_M_ARM64EC)
21103			# define ARCH_X86_64
21104			# elif defined(_M_IX86)
21105			# define ARCH_X86_32
21106			# elif defined(_M_ARM64)
21107			# define ARCH_ARM64
21108			# elif defined(_M_ARM)
21109			# define ARCH_ARM32
21110			# endif
21111			#else
21112			# if defined(__x86_64__)
21113			# define ARCH_X86_64
21114			# elif defined(__i386__)
21115			# define ARCH_X86_32
21116			# elif defined(__aarch64__)
21117			# define ARCH_ARM64
21118			# elif defined(__arm__)
21119			# define ARCH_ARM32
21120			# elif defined(__riscv)
21121			# define ARCH_RISCV
21122			# endif
21123			#endif
21124
21125
21126
21127
21128
21129
21130			typedef uint8_t u8;
21131			typedef uint16_t u16;
21132			typedef uint32_t u32;
21133			typedef uint64_t u64;
21134			typedef int8_t s8;
21135			typedef int16_t s16;
21136			typedef int32_t s32;
21137			typedef int64_t s64;
21138
21139
21140			#ifdef _MSC_VER
21141			# ifdef _WIN64
21142			typedef long long ssize_t;
21143			# else
21144			typedef long ssize_t;
21145			# endif
21146			#endif
21147
21148
21149			typedef size_t machine_word_t;
21150
21151
21152			#define WORDBYTES ((int)sizeof(machine_word_t))
21153
21154
21155			#define WORDBITS (8 * WORDBYTES)
21156
21157
21158
21159
21160
21161
21162			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
21163			# define GCC_PREREQ(major, minor) \
21164			(__GNUC__ > (major) \|\| \
21165			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
21166			# if !GCC_PREREQ(4, 9)
21167			# error "gcc versions older than 4.9 are no longer supported"
21168			# endif
21169			#else
21170			# define GCC_PREREQ(major, minor) 0
21171			#endif
21172			#ifdef __clang__
21173			# ifdef __apple_build_version__
21174			# define CLANG_PREREQ(major, minor, apple_version) \
21175			(__apple_build_version__ >= (apple_version))
21176			# else
21177			# define CLANG_PREREQ(major, minor, apple_version) \
21178			(__clang_major__ > (major) \|\| \
21179			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
21180			# endif
21181			# if !CLANG_PREREQ(3, 9, 8000000)
21182			# error "clang versions older than 3.9 are no longer supported"
21183			# endif
21184			#else
21185			# define CLANG_PREREQ(major, minor, apple_version) 0
21186			#endif
21187			#ifdef _MSC_VER
21188			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
21189			# if !MSVC_PREREQ(1900)
21190			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
21191			# endif
21192			#else
21193			# define MSVC_PREREQ(version) 0
21194			#endif
21195
21196
21197			#ifndef __has_attribute
21198			# define __has_attribute(attribute) 0
21199			#endif
21200
21201
21202			#ifndef __has_builtin
21203			# define __has_builtin(builtin) 0
21204			#endif
21205
21206
21207			#ifdef _MSC_VER
21208			# define inline __inline
21209			#endif
21210
21211
21212			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
21213			# define forceinline inline __attribute__((always_inline))
21214			#elif defined(_MSC_VER)
21215			# define forceinline __forceinline
21216			#else
21217			# define forceinline inline
21218			#endif
21219
21220
21221			#if defined(__GNUC__) \|\| __has_attribute(unused)
21222			# define MAYBE_UNUSED __attribute__((unused))
21223			#else
21224			# define MAYBE_UNUSED
21225			#endif
21226
21227
21228			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
21229			# define NORETURN __attribute__((noreturn))
21230			#else
21231			# define NORETURN
21232			#endif
21233
21234
21235			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
21236			# if defined(__GNUC__) \|\| defined(__clang__)
21237			# define restrict __restrict__
21238			# else
21239			# define restrict
21240			# endif
21241			#endif
21242
21243
21244			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
21245			# define likely(expr) __builtin_expect(!!(expr), 1)
21246			#else
21247			# define likely(expr) (expr)
21248			#endif
21249
21250
21251			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
21252			# define unlikely(expr) __builtin_expect(!!(expr), 0)
21253			#else
21254			# define unlikely(expr) (expr)
21255			#endif
21256
21257
21258			#undef prefetchr
21259			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
21260			# define prefetchr(addr) __builtin_prefetch((addr), 0)
21261			#elif defined(_MSC_VER)
21262			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
21263			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
21264			# elif defined(ARCH_ARM64)
21265			# define prefetchr(addr) __prefetch2((addr), 0x00 )
21266			# elif defined(ARCH_ARM32)
21267			# define prefetchr(addr) __prefetch(addr)
21268			# endif
21269			#endif
21270			#ifndef prefetchr
21271			# define prefetchr(addr)
21272			#endif
21273
21274
21275			#undef prefetchw
21276			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
21277			# define prefetchw(addr) __builtin_prefetch((addr), 1)
21278			#elif defined(_MSC_VER)
21279			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
21280			# define prefetchw(addr) _m_prefetchw(addr)
21281			# elif defined(ARCH_ARM64)
21282			# define prefetchw(addr) __prefetch2((addr), 0x10 )
21283			# elif defined(ARCH_ARM32)
21284			# define prefetchw(addr) __prefetchw(addr)
21285			# endif
21286			#endif
21287			#ifndef prefetchw
21288			# define prefetchw(addr)
21289			#endif
21290
21291
21292			#undef _aligned_attribute
21293			#if defined(__GNUC__) \|\| __has_attribute(aligned)
21294			# define _aligned_attribute(n) __attribute__((aligned(n)))
21295			#elif defined(_MSC_VER)
21296			# define _aligned_attribute(n) __declspec(align(n))
21297			#endif
21298
21299
21300			#if defined(__GNUC__) \|\| __has_attribute(target)
21301			# define _target_attribute(attrs) __attribute__((target(attrs)))
21302			#else
21303			# define _target_attribute(attrs)
21304			#endif
21305
21306
21307
21308
21309
21310			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
21311			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
21312			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
21313			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
21314			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
21315			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
21316			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
21317
21318
21319
21320
21321
21322
21323			#if defined(__BYTE_ORDER__)
21324			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
21325			#elif defined(_MSC_VER)
21326			# define CPU_IS_LITTLE_ENDIAN() true
21327			#else
21328			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
21329			{
21330			union {
21331			u32 w;
21332			u8 b;
21333			} u;
21334
21335			u.w = 1;
21336			return u.b;
21337			}
21338			#endif
21339
21340
21341			static forceinline u16 bswap16(u16 v)
21342			{
21343			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
21344			return __builtin_bswap16(v);
21345			#elif defined(_MSC_VER)
21346			return _byteswap_ushort(v);
21347			#else
21348			return (v << 8) \| (v >> 8);
21349			#endif
21350			}
21351
21352
21353			static forceinline u32 bswap32(u32 v)
21354			{
21355			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
21356			return __builtin_bswap32(v);
21357			#elif defined(_MSC_VER)
21358			return _byteswap_ulong(v);
21359			#else
21360			return ((v & 0x000000FF) << 24) \|
21361			((v & 0x0000FF00) << 8) \|
21362			((v & 0x00FF0000) >> 8) \|
21363			((v & 0xFF000000) >> 24);
21364			#endif
21365			}
21366
21367
21368			static forceinline u64 bswap64(u64 v)
21369			{
21370			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
21371			return __builtin_bswap64(v);
21372			#elif defined(_MSC_VER)
21373			return _byteswap_uint64(v);
21374			#else
21375			return ((v & 0x00000000000000FF) << 56) \|
21376			((v & 0x000000000000FF00) << 40) \|
21377			((v & 0x0000000000FF0000) << 24) \|
21378			((v & 0x00000000FF000000) << 8) \|
21379			((v & 0x000000FF00000000) >> 8) \|
21380			((v & 0x0000FF0000000000) >> 24) \|
21381			((v & 0x00FF000000000000) >> 40) \|
21382			((v & 0xFF00000000000000) >> 56);
21383			#endif
21384			}
21385
21386			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
21387			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
21388			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
21389			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
21390			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
21391			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
21392
21393
21394
21395
21396
21397
21398			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
21399			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
21400			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
21401			defined(__riscv_misaligned_fast) \|\| \
21402			defined(__wasm__))
21403			# define UNALIGNED_ACCESS_IS_FAST 1
21404			#elif defined(_MSC_VER)
21405			# define UNALIGNED_ACCESS_IS_FAST 1
21406			#else
21407			# define UNALIGNED_ACCESS_IS_FAST 0
21408			#endif
21409
21410
21411
21412			#ifdef FREESTANDING
21413			# define MEMCOPY __builtin_memcpy
21414			#else
21415			# define MEMCOPY memcpy
21416			#endif
21417
21418
21419
21420			#define DEFINE_UNALIGNED_TYPE(type) \
21421			static forceinline type \
21422			load_##type##_unaligned(const void *p) \
21423			{ \
21424			type v; \
21425			\
21426			MEMCOPY(&v, p, sizeof(v)); \
21427			return v; \
21428			} \
21429			\
21430			static forceinline void \
21431			store_##type##_unaligned(type v, void *p) \
21432			{ \
21433			MEMCOPY(p, &v, sizeof(v)); \
21434			}
21435
21436			DEFINE_UNALIGNED_TYPE(u16)
21437			DEFINE_UNALIGNED_TYPE(u32)
21438			DEFINE_UNALIGNED_TYPE(u64)
21439			DEFINE_UNALIGNED_TYPE(machine_word_t)
21440
21441			#undef MEMCOPY
21442
21443			#define load_word_unaligned load_machine_word_t_unaligned
21444			#define store_word_unaligned store_machine_word_t_unaligned
21445
21446
21447
21448			static forceinline u16
21449			get_unaligned_le16(const u8 *p)
21450			{
21451			if (UNALIGNED_ACCESS_IS_FAST)
21452			return le16_bswap(load_u16_unaligned(p));
21453			else
21454			return ((u16)p[1] << 8) \| p[0];
21455			}
21456
21457			static forceinline u16
21458			get_unaligned_be16(const u8 *p)
21459			{
21460			if (UNALIGNED_ACCESS_IS_FAST)
21461			return be16_bswap(load_u16_unaligned(p));
21462			else
21463			return ((u16)p[0] << 8) \| p[1];
21464			}
21465
21466			static forceinline u32
21467			get_unaligned_le32(const u8 *p)
21468			{
21469			if (UNALIGNED_ACCESS_IS_FAST)
21470			return le32_bswap(load_u32_unaligned(p));
21471			else
21472			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
21473			((u32)p[1] << 8) \| p[0];
21474			}
21475
21476			static forceinline u32
21477			get_unaligned_be32(const u8 *p)
21478			{
21479			if (UNALIGNED_ACCESS_IS_FAST)
21480			return be32_bswap(load_u32_unaligned(p));
21481			else
21482			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
21483			((u32)p[2] << 8) \| p[3];
21484			}
21485
21486			static forceinline u64
21487			get_unaligned_le64(const u8 *p)
21488			{
21489			if (UNALIGNED_ACCESS_IS_FAST)
21490			return le64_bswap(load_u64_unaligned(p));
21491			else
21492			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
21493			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
21494			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
21495			((u64)p[1] << 8) \| p[0];
21496			}
21497
21498			static forceinline machine_word_t
21499			get_unaligned_leword(const u8 *p)
21500			{
21501			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
21502			if (WORDBITS == 32)
21503			return get_unaligned_le32(p);
21504			else
21505			return get_unaligned_le64(p);
21506			}
21507
21508
21509
21510			static forceinline void
21511			put_unaligned_le16(u16 v, u8 *p)
21512			{
21513			if (UNALIGNED_ACCESS_IS_FAST) {
21514			store_u16_unaligned(le16_bswap(v), p);
21515			} else {
21516			p[0] = (u8)(v >> 0);
21517			p[1] = (u8)(v >> 8);
21518			}
21519			}
21520
21521			static forceinline void
21522			put_unaligned_be16(u16 v, u8 *p)
21523			{
21524			if (UNALIGNED_ACCESS_IS_FAST) {
21525			store_u16_unaligned(be16_bswap(v), p);
21526			} else {
21527			p[0] = (u8)(v >> 8);
21528			p[1] = (u8)(v >> 0);
21529			}
21530			}
21531
21532			static forceinline void
21533			put_unaligned_le32(u32 v, u8 *p)
21534			{
21535			if (UNALIGNED_ACCESS_IS_FAST) {
21536			store_u32_unaligned(le32_bswap(v), p);
21537			} else {
21538			p[0] = (u8)(v >> 0);
21539			p[1] = (u8)(v >> 8);
21540			p[2] = (u8)(v >> 16);
21541			p[3] = (u8)(v >> 24);
21542			}
21543			}
21544
21545			static forceinline void
21546			put_unaligned_be32(u32 v, u8 *p)
21547			{
21548			if (UNALIGNED_ACCESS_IS_FAST) {
21549			store_u32_unaligned(be32_bswap(v), p);
21550			} else {
21551			p[0] = (u8)(v >> 24);
21552			p[1] = (u8)(v >> 16);
21553			p[2] = (u8)(v >> 8);
21554			p[3] = (u8)(v >> 0);
21555			}
21556			}
21557
21558			static forceinline void
21559			put_unaligned_le64(u64 v, u8 *p)
21560			{
21561			if (UNALIGNED_ACCESS_IS_FAST) {
21562			store_u64_unaligned(le64_bswap(v), p);
21563			} else {
21564			p[0] = (u8)(v >> 0);
21565			p[1] = (u8)(v >> 8);
21566			p[2] = (u8)(v >> 16);
21567			p[3] = (u8)(v >> 24);
21568			p[4] = (u8)(v >> 32);
21569			p[5] = (u8)(v >> 40);
21570			p[6] = (u8)(v >> 48);
21571			p[7] = (u8)(v >> 56);
21572			}
21573			}
21574
21575			static forceinline void
21576			put_unaligned_leword(machine_word_t v, u8 *p)
21577			{
21578			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
21579			if (WORDBITS == 32)
21580			put_unaligned_le32(v, p);
21581			else
21582			put_unaligned_le64(v, p);
21583			}
21584
21585
21586
21587
21588
21589
21590
21591			static forceinline unsigned
21592			bsr32(u32 v)
21593			{
21594			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
21595			return 31 - __builtin_clz(v);
21596			#elif defined(_MSC_VER)
21597			unsigned long i;
21598
21599			_BitScanReverse(&i, v);
21600			return i;
21601			#else
21602			unsigned i = 0;
21603
21604			while ((v >>= 1) != 0)
21605			i++;
21606			return i;
21607			#endif
21608			}
21609
21610			static forceinline unsigned
21611			bsr64(u64 v)
21612			{
21613			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
21614			return 63 - __builtin_clzll(v);
21615			#elif defined(_MSC_VER) && defined(_WIN64)
21616			unsigned long i;
21617
21618			_BitScanReverse64(&i, v);
21619			return i;
21620			#else
21621			unsigned i = 0;
21622
21623			while ((v >>= 1) != 0)
21624			i++;
21625			return i;
21626			#endif
21627			}
21628
21629			static forceinline unsigned
21630			bsrw(machine_word_t v)
21631			{
21632			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
21633			if (WORDBITS == 32)
21634			return bsr32(v);
21635			else
21636			return bsr64(v);
21637			}
21638
21639
21640
21641			static forceinline unsigned
21642			bsf32(u32 v)
21643			{
21644			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
21645			return __builtin_ctz(v);
21646			#elif defined(_MSC_VER)
21647			unsigned long i;
21648
21649			_BitScanForward(&i, v);
21650			return i;
21651			#else
21652			unsigned i = 0;
21653
21654			for (; (v & 1) == 0; v >>= 1)
21655			i++;
21656			return i;
21657			#endif
21658			}
21659
21660			static forceinline unsigned
21661			bsf64(u64 v)
21662			{
21663			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
21664			return __builtin_ctzll(v);
21665			#elif defined(_MSC_VER) && defined(_WIN64)
21666			unsigned long i;
21667
21668			_BitScanForward64(&i, v);
21669			return i;
21670			#else
21671			unsigned i = 0;
21672
21673			for (; (v & 1) == 0; v >>= 1)
21674			i++;
21675			return i;
21676			#endif
21677			}
21678
21679			static forceinline unsigned
21680			bsfw(machine_word_t v)
21681			{
21682			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
21683			if (WORDBITS == 32)
21684			return bsf32(v);
21685			else
21686			return bsf64(v);
21687			}
21688
21689
21690			#undef rbit32
21691			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
21692			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
21693			static forceinline u32
21694			rbit32(u32 v)
21695			{
21696			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
21697			return v;
21698			}
21699			#define rbit32 rbit32
21700			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
21701			static forceinline u32
21702			rbit32(u32 v)
21703			{
21704			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
21705			return v;
21706			}
21707			#define rbit32 rbit32
21708			#endif
21709
21710			#endif
21711
21712
21713			typedef void (malloc_func_t)(size_t);
21714			typedef void (free_func_t)(void );
21715
21716			extern malloc_func_t libdeflate_default_malloc_func;
21717			extern free_func_t libdeflate_default_free_func;
21718
21719			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
21720			size_t alignment, size_t size);
21721			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
21722
21723			#ifdef FREESTANDING
21724
21725			void memset(void s, int c, size_t n);
21726			#define memset(s, c, n) __builtin_memset((s), (c), (n))
21727
21728			void memcpy(void dest, const void *src, size_t n);
21729			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
21730
21731			void memmove(void dest, const void *src, size_t n);
21732			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
21733
21734			int memcmp(const void s1, const void s2, size_t n);
21735			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
21736
21737			#undef LIBDEFLATE_ENABLE_ASSERTIONS
21738			#else
21739			# include
21740
21741			# ifdef __clang_analyzer__
21742			# define LIBDEFLATE_ENABLE_ASSERTIONS
21743			# endif
21744			#endif
21745
21746
21747			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
21748			NORETURN void
21749			libdeflate_assertion_failed(const char expr, const char file, int line);
21750			#define ASSERT(expr) { if (unlikely(!(expr))) \
21751			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
21752			#else
21753			#define ASSERT(expr) (void)(expr)
21754			#endif
21755
21756			#define CONCAT_IMPL(a, b) a##b
21757			#define CONCAT(a, b) CONCAT_IMPL(a, b)
21758			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
21759
21760			#endif
21761
21762
21763			#ifndef MATCHFINDER_WINDOW_ORDER
21764			# error "MATCHFINDER_WINDOW_ORDER must be defined!"
21765			#endif
21766
21767
21768			static forceinline u32
21769			loaded_u32_to_u24(u32 v)
21770			{
21771			if (CPU_IS_LITTLE_ENDIAN())
21772			return v & 0xFFFFFF;
21773			else
21774			return v >> 8;
21775			}
21776
21777
21778			static forceinline u32
21779			load_u24_unaligned(const u8 *p)
21780			{
21781			#if UNALIGNED_ACCESS_IS_FAST
21782			return loaded_u32_to_u24(load_u32_unaligned(p));
21783			#else
21784			if (CPU_IS_LITTLE_ENDIAN())
21785			return ((u32)p[0] << 0) \| ((u32)p[1] << 8) \| ((u32)p[2] << 16);
21786			else
21787			return ((u32)p[2] << 0) \| ((u32)p[1] << 8) \| ((u32)p[0] << 16);
21788			#endif
21789			}
21790
21791			#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
21792
21793			typedef s16 mf_pos_t;
21794
21795			#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
21796
21797
21798			#define MATCHFINDER_MEM_ALIGNMENT 32
21799
21800
21801			#define MATCHFINDER_SIZE_ALIGNMENT 1024
21802
21803			#undef matchfinder_init
21804			#undef matchfinder_rebase
21805			#ifdef _aligned_attribute
21806			# define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
21807			# if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
21808			/* # include "arm/matchfinder_impl.h" */
21809
21810
21811			#ifndef LIB_ARM_MATCHFINDER_IMPL_H
21812			#define LIB_ARM_MATCHFINDER_IMPL_H
21813
21814			/* #include "arm-cpu_features.h" */
21815
21816
21817			#ifndef LIB_ARM_CPU_FEATURES_H
21818			#define LIB_ARM_CPU_FEATURES_H
21819
21820			/* #include "lib_common.h" */
21821
21822
21823			#ifndef LIB_LIB_COMMON_H
21824			#define LIB_LIB_COMMON_H
21825
21826			#ifdef LIBDEFLATE_H
21827
21828			# error "lib_common.h must always be included before libdeflate.h"
21829			#endif
21830
21831			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
21832			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
21833			#elif defined(__GNUC__)
21834			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
21835			#else
21836			# define LIBDEFLATE_EXPORT_SYM
21837			#endif
21838
21839
21840			#if defined(__GNUC__) && defined(__i386__)
21841			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
21842			#else
21843			# define LIBDEFLATE_ALIGN_STACK
21844			#endif
21845
21846			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
21847
21848			/* #include "../common_defs.h" */
21849
21850
21851			#ifndef COMMON_DEFS_H
21852			#define COMMON_DEFS_H
21853
21854			/* #include "libdeflate.h" */
21855
21856
21857			#ifndef LIBDEFLATE_H
21858			#define LIBDEFLATE_H
21859
21860			#include
21861			#include
21862
21863			#ifdef __cplusplus
21864			extern "C" {
21865			#endif
21866
21867			#define LIBDEFLATE_VERSION_MAJOR 1
21868			#define LIBDEFLATE_VERSION_MINOR 25
21869			#define LIBDEFLATE_VERSION_STRING "1.25"
21870
21871
21872			#ifndef LIBDEFLATEAPI
21873			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
21874			# define LIBDEFLATEAPI __declspec(dllimport)
21875			# else
21876			# define LIBDEFLATEAPI
21877			# endif
21878			#endif
21879
21880
21881
21882
21883
21884			struct libdeflate_compressor;
21885			struct libdeflate_options;
21886
21887
21888			LIBDEFLATEAPI struct libdeflate_compressor *
21889			libdeflate_alloc_compressor(int compression_level);
21890
21891
21892			LIBDEFLATEAPI struct libdeflate_compressor *
21893			libdeflate_alloc_compressor_ex(int compression_level,
21894			const struct libdeflate_options *options);
21895
21896
21897			LIBDEFLATEAPI size_t
21898			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
21899			const void *in, size_t in_nbytes,
21900			void *out, size_t out_nbytes_avail);
21901
21902
21903			LIBDEFLATEAPI size_t
21904			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
21905			size_t in_nbytes);
21906
21907
21908			LIBDEFLATEAPI size_t
21909			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
21910			const void *in, size_t in_nbytes,
21911			void *out, size_t out_nbytes_avail);
21912
21913
21914			LIBDEFLATEAPI size_t
21915			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
21916			size_t in_nbytes);
21917
21918
21919			LIBDEFLATEAPI size_t
21920			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
21921			const void *in, size_t in_nbytes,
21922			void *out, size_t out_nbytes_avail);
21923
21924
21925			LIBDEFLATEAPI size_t
21926			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
21927			size_t in_nbytes);
21928
21929
21930			LIBDEFLATEAPI void
21931			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
21932
21933
21934
21935
21936
21937			struct libdeflate_decompressor;
21938			struct libdeflate_options;
21939
21940
21941			LIBDEFLATEAPI struct libdeflate_decompressor *
21942			libdeflate_alloc_decompressor(void);
21943
21944
21945			LIBDEFLATEAPI struct libdeflate_decompressor *
21946			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
21947
21948
21949			enum libdeflate_result {
21950
21951			LIBDEFLATE_SUCCESS = 0,
21952
21953
21954			LIBDEFLATE_BAD_DATA = 1,
21955
21956
21957			LIBDEFLATE_SHORT_OUTPUT = 2,
21958
21959
21960			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
21961			};
21962
21963
21964			LIBDEFLATEAPI enum libdeflate_result
21965			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
21966			const void *in, size_t in_nbytes,
21967			void *out, size_t out_nbytes_avail,
21968			size_t *actual_out_nbytes_ret);
21969
21970
21971			LIBDEFLATEAPI enum libdeflate_result
21972			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
21973			const void *in, size_t in_nbytes,
21974			void *out, size_t out_nbytes_avail,
21975			size_t *actual_in_nbytes_ret,
21976			size_t *actual_out_nbytes_ret);
21977
21978
21979			LIBDEFLATEAPI enum libdeflate_result
21980			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
21981			const void *in, size_t in_nbytes,
21982			void *out, size_t out_nbytes_avail,
21983			size_t *actual_out_nbytes_ret);
21984
21985
21986			LIBDEFLATEAPI enum libdeflate_result
21987			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
21988			const void *in, size_t in_nbytes,
21989			void *out, size_t out_nbytes_avail,
21990			size_t *actual_in_nbytes_ret,
21991			size_t *actual_out_nbytes_ret);
21992
21993
21994			LIBDEFLATEAPI enum libdeflate_result
21995			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
21996			const void *in, size_t in_nbytes,
21997			void *out, size_t out_nbytes_avail,
21998			size_t *actual_out_nbytes_ret);
21999
22000
22001			LIBDEFLATEAPI enum libdeflate_result
22002			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
22003			const void *in, size_t in_nbytes,
22004			void *out, size_t out_nbytes_avail,
22005			size_t *actual_in_nbytes_ret,
22006			size_t *actual_out_nbytes_ret);
22007
22008
22009			LIBDEFLATEAPI void
22010			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
22011
22012
22013
22014
22015
22016
22017			LIBDEFLATEAPI uint32_t
22018			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
22019
22020
22021
22022			LIBDEFLATEAPI uint32_t
22023			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
22024
22025
22026
22027
22028
22029
22030			LIBDEFLATEAPI void
22031			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
22032			void (free_func)(void ));
22033
22034
22035			struct libdeflate_options {
22036
22037
22038			size_t sizeof_options;
22039
22040
22041			void (malloc_func)(size_t);
22042			void (free_func)(void );
22043			};
22044
22045			#ifdef __cplusplus
22046			}
22047			#endif
22048
22049			#endif
22050
22051
22052			#include
22053			#include
22054			#include
22055			#ifdef _MSC_VER
22056			# include
22057			# include
22058
22059
22060			# pragma warning(disable : 4146)
22061
22062			# pragma warning(disable : 4018)
22063			# pragma warning(disable : 4244)
22064			# pragma warning(disable : 4267)
22065			# pragma warning(disable : 4310)
22066
22067			# pragma warning(disable : 4100)
22068			# pragma warning(disable : 4127)
22069			# pragma warning(disable : 4189)
22070			# pragma warning(disable : 4232)
22071			# pragma warning(disable : 4245)
22072			# pragma warning(disable : 4295)
22073			#endif
22074			#ifndef FREESTANDING
22075			# include
22076			#endif
22077
22078
22079
22080
22081
22082
22083			#undef ARCH_X86_64
22084			#undef ARCH_X86_32
22085			#undef ARCH_ARM64
22086			#undef ARCH_ARM32
22087			#undef ARCH_RISCV
22088			#ifdef _MSC_VER
22089
22090			# if defined(_M_X64) && !defined(_M_ARM64EC)
22091			# define ARCH_X86_64
22092			# elif defined(_M_IX86)
22093			# define ARCH_X86_32
22094			# elif defined(_M_ARM64)
22095			# define ARCH_ARM64
22096			# elif defined(_M_ARM)
22097			# define ARCH_ARM32
22098			# endif
22099			#else
22100			# if defined(__x86_64__)
22101			# define ARCH_X86_64
22102			# elif defined(__i386__)
22103			# define ARCH_X86_32
22104			# elif defined(__aarch64__)
22105			# define ARCH_ARM64
22106			# elif defined(__arm__)
22107			# define ARCH_ARM32
22108			# elif defined(__riscv)
22109			# define ARCH_RISCV
22110			# endif
22111			#endif
22112
22113
22114
22115
22116
22117
22118			typedef uint8_t u8;
22119			typedef uint16_t u16;
22120			typedef uint32_t u32;
22121			typedef uint64_t u64;
22122			typedef int8_t s8;
22123			typedef int16_t s16;
22124			typedef int32_t s32;
22125			typedef int64_t s64;
22126
22127
22128			#ifdef _MSC_VER
22129			# ifdef _WIN64
22130			typedef long long ssize_t;
22131			# else
22132			typedef long ssize_t;
22133			# endif
22134			#endif
22135
22136
22137			typedef size_t machine_word_t;
22138
22139
22140			#define WORDBYTES ((int)sizeof(machine_word_t))
22141
22142
22143			#define WORDBITS (8 * WORDBYTES)
22144
22145
22146
22147
22148
22149
22150			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
22151			# define GCC_PREREQ(major, minor) \
22152			(__GNUC__ > (major) \|\| \
22153			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
22154			# if !GCC_PREREQ(4, 9)
22155			# error "gcc versions older than 4.9 are no longer supported"
22156			# endif
22157			#else
22158			# define GCC_PREREQ(major, minor) 0
22159			#endif
22160			#ifdef __clang__
22161			# ifdef __apple_build_version__
22162			# define CLANG_PREREQ(major, minor, apple_version) \
22163			(__apple_build_version__ >= (apple_version))
22164			# else
22165			# define CLANG_PREREQ(major, minor, apple_version) \
22166			(__clang_major__ > (major) \|\| \
22167			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
22168			# endif
22169			# if !CLANG_PREREQ(3, 9, 8000000)
22170			# error "clang versions older than 3.9 are no longer supported"
22171			# endif
22172			#else
22173			# define CLANG_PREREQ(major, minor, apple_version) 0
22174			#endif
22175			#ifdef _MSC_VER
22176			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
22177			# if !MSVC_PREREQ(1900)
22178			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
22179			# endif
22180			#else
22181			# define MSVC_PREREQ(version) 0
22182			#endif
22183
22184
22185			#ifndef __has_attribute
22186			# define __has_attribute(attribute) 0
22187			#endif
22188
22189
22190			#ifndef __has_builtin
22191			# define __has_builtin(builtin) 0
22192			#endif
22193
22194
22195			#ifdef _MSC_VER
22196			# define inline __inline
22197			#endif
22198
22199
22200			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
22201			# define forceinline inline __attribute__((always_inline))
22202			#elif defined(_MSC_VER)
22203			# define forceinline __forceinline
22204			#else
22205			# define forceinline inline
22206			#endif
22207
22208
22209			#if defined(__GNUC__) \|\| __has_attribute(unused)
22210			# define MAYBE_UNUSED __attribute__((unused))
22211			#else
22212			# define MAYBE_UNUSED
22213			#endif
22214
22215
22216			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
22217			# define NORETURN __attribute__((noreturn))
22218			#else
22219			# define NORETURN
22220			#endif
22221
22222
22223			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
22224			# if defined(__GNUC__) \|\| defined(__clang__)
22225			# define restrict __restrict__
22226			# else
22227			# define restrict
22228			# endif
22229			#endif
22230
22231
22232			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
22233			# define likely(expr) __builtin_expect(!!(expr), 1)
22234			#else
22235			# define likely(expr) (expr)
22236			#endif
22237
22238
22239			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
22240			# define unlikely(expr) __builtin_expect(!!(expr), 0)
22241			#else
22242			# define unlikely(expr) (expr)
22243			#endif
22244
22245
22246			#undef prefetchr
22247			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
22248			# define prefetchr(addr) __builtin_prefetch((addr), 0)
22249			#elif defined(_MSC_VER)
22250			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
22251			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
22252			# elif defined(ARCH_ARM64)
22253			# define prefetchr(addr) __prefetch2((addr), 0x00 )
22254			# elif defined(ARCH_ARM32)
22255			# define prefetchr(addr) __prefetch(addr)
22256			# endif
22257			#endif
22258			#ifndef prefetchr
22259			# define prefetchr(addr)
22260			#endif
22261
22262
22263			#undef prefetchw
22264			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
22265			# define prefetchw(addr) __builtin_prefetch((addr), 1)
22266			#elif defined(_MSC_VER)
22267			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
22268			# define prefetchw(addr) _m_prefetchw(addr)
22269			# elif defined(ARCH_ARM64)
22270			# define prefetchw(addr) __prefetch2((addr), 0x10 )
22271			# elif defined(ARCH_ARM32)
22272			# define prefetchw(addr) __prefetchw(addr)
22273			# endif
22274			#endif
22275			#ifndef prefetchw
22276			# define prefetchw(addr)
22277			#endif
22278
22279
22280			#undef _aligned_attribute
22281			#if defined(__GNUC__) \|\| __has_attribute(aligned)
22282			# define _aligned_attribute(n) __attribute__((aligned(n)))
22283			#elif defined(_MSC_VER)
22284			# define _aligned_attribute(n) __declspec(align(n))
22285			#endif
22286
22287
22288			#if defined(__GNUC__) \|\| __has_attribute(target)
22289			# define _target_attribute(attrs) __attribute__((target(attrs)))
22290			#else
22291			# define _target_attribute(attrs)
22292			#endif
22293
22294
22295
22296
22297
22298			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
22299			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
22300			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
22301			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
22302			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
22303			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
22304			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
22305
22306
22307
22308
22309
22310
22311			#if defined(__BYTE_ORDER__)
22312			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
22313			#elif defined(_MSC_VER)
22314			# define CPU_IS_LITTLE_ENDIAN() true
22315			#else
22316			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
22317			{
22318			union {
22319			u32 w;
22320			u8 b;
22321			} u;
22322
22323			u.w = 1;
22324			return u.b;
22325			}
22326			#endif
22327
22328
22329			static forceinline u16 bswap16(u16 v)
22330			{
22331			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
22332			return __builtin_bswap16(v);
22333			#elif defined(_MSC_VER)
22334			return _byteswap_ushort(v);
22335			#else
22336			return (v << 8) \| (v >> 8);
22337			#endif
22338			}
22339
22340
22341			static forceinline u32 bswap32(u32 v)
22342			{
22343			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
22344			return __builtin_bswap32(v);
22345			#elif defined(_MSC_VER)
22346			return _byteswap_ulong(v);
22347			#else
22348			return ((v & 0x000000FF) << 24) \|
22349			((v & 0x0000FF00) << 8) \|
22350			((v & 0x00FF0000) >> 8) \|
22351			((v & 0xFF000000) >> 24);
22352			#endif
22353			}
22354
22355
22356			static forceinline u64 bswap64(u64 v)
22357			{
22358			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
22359			return __builtin_bswap64(v);
22360			#elif defined(_MSC_VER)
22361			return _byteswap_uint64(v);
22362			#else
22363			return ((v & 0x00000000000000FF) << 56) \|
22364			((v & 0x000000000000FF00) << 40) \|
22365			((v & 0x0000000000FF0000) << 24) \|
22366			((v & 0x00000000FF000000) << 8) \|
22367			((v & 0x000000FF00000000) >> 8) \|
22368			((v & 0x0000FF0000000000) >> 24) \|
22369			((v & 0x00FF000000000000) >> 40) \|
22370			((v & 0xFF00000000000000) >> 56);
22371			#endif
22372			}
22373
22374			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
22375			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
22376			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
22377			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
22378			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
22379			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
22380
22381
22382
22383
22384
22385
22386			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
22387			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
22388			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
22389			defined(__riscv_misaligned_fast) \|\| \
22390			defined(__wasm__))
22391			# define UNALIGNED_ACCESS_IS_FAST 1
22392			#elif defined(_MSC_VER)
22393			# define UNALIGNED_ACCESS_IS_FAST 1
22394			#else
22395			# define UNALIGNED_ACCESS_IS_FAST 0
22396			#endif
22397
22398
22399
22400			#ifdef FREESTANDING
22401			# define MEMCOPY __builtin_memcpy
22402			#else
22403			# define MEMCOPY memcpy
22404			#endif
22405
22406
22407
22408			#define DEFINE_UNALIGNED_TYPE(type) \
22409			static forceinline type \
22410			load_##type##_unaligned(const void *p) \
22411			{ \
22412			type v; \
22413			\
22414			MEMCOPY(&v, p, sizeof(v)); \
22415			return v; \
22416			} \
22417			\
22418			static forceinline void \
22419			store_##type##_unaligned(type v, void *p) \
22420			{ \
22421			MEMCOPY(p, &v, sizeof(v)); \
22422			}
22423
22424			DEFINE_UNALIGNED_TYPE(u16)
22425			DEFINE_UNALIGNED_TYPE(u32)
22426			DEFINE_UNALIGNED_TYPE(u64)
22427			DEFINE_UNALIGNED_TYPE(machine_word_t)
22428
22429			#undef MEMCOPY
22430
22431			#define load_word_unaligned load_machine_word_t_unaligned
22432			#define store_word_unaligned store_machine_word_t_unaligned
22433
22434
22435
22436			static forceinline u16
22437			get_unaligned_le16(const u8 *p)
22438			{
22439			if (UNALIGNED_ACCESS_IS_FAST)
22440			return le16_bswap(load_u16_unaligned(p));
22441			else
22442			return ((u16)p[1] << 8) \| p[0];
22443			}
22444
22445			static forceinline u16
22446			get_unaligned_be16(const u8 *p)
22447			{
22448			if (UNALIGNED_ACCESS_IS_FAST)
22449			return be16_bswap(load_u16_unaligned(p));
22450			else
22451			return ((u16)p[0] << 8) \| p[1];
22452			}
22453
22454			static forceinline u32
22455			get_unaligned_le32(const u8 *p)
22456			{
22457			if (UNALIGNED_ACCESS_IS_FAST)
22458			return le32_bswap(load_u32_unaligned(p));
22459			else
22460			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
22461			((u32)p[1] << 8) \| p[0];
22462			}
22463
22464			static forceinline u32
22465			get_unaligned_be32(const u8 *p)
22466			{
22467			if (UNALIGNED_ACCESS_IS_FAST)
22468			return be32_bswap(load_u32_unaligned(p));
22469			else
22470			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
22471			((u32)p[2] << 8) \| p[3];
22472			}
22473
22474			static forceinline u64
22475			get_unaligned_le64(const u8 *p)
22476			{
22477			if (UNALIGNED_ACCESS_IS_FAST)
22478			return le64_bswap(load_u64_unaligned(p));
22479			else
22480			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
22481			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
22482			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
22483			((u64)p[1] << 8) \| p[0];
22484			}
22485
22486			static forceinline machine_word_t
22487			get_unaligned_leword(const u8 *p)
22488			{
22489			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
22490			if (WORDBITS == 32)
22491			return get_unaligned_le32(p);
22492			else
22493			return get_unaligned_le64(p);
22494			}
22495
22496
22497
22498			static forceinline void
22499			put_unaligned_le16(u16 v, u8 *p)
22500			{
22501			if (UNALIGNED_ACCESS_IS_FAST) {
22502			store_u16_unaligned(le16_bswap(v), p);
22503			} else {
22504			p[0] = (u8)(v >> 0);
22505			p[1] = (u8)(v >> 8);
22506			}
22507			}
22508
22509			static forceinline void
22510			put_unaligned_be16(u16 v, u8 *p)
22511			{
22512			if (UNALIGNED_ACCESS_IS_FAST) {
22513			store_u16_unaligned(be16_bswap(v), p);
22514			} else {
22515			p[0] = (u8)(v >> 8);
22516			p[1] = (u8)(v >> 0);
22517			}
22518			}
22519
22520			static forceinline void
22521			put_unaligned_le32(u32 v, u8 *p)
22522			{
22523			if (UNALIGNED_ACCESS_IS_FAST) {
22524			store_u32_unaligned(le32_bswap(v), p);
22525			} else {
22526			p[0] = (u8)(v >> 0);
22527			p[1] = (u8)(v >> 8);
22528			p[2] = (u8)(v >> 16);
22529			p[3] = (u8)(v >> 24);
22530			}
22531			}
22532
22533			static forceinline void
22534			put_unaligned_be32(u32 v, u8 *p)
22535			{
22536			if (UNALIGNED_ACCESS_IS_FAST) {
22537			store_u32_unaligned(be32_bswap(v), p);
22538			} else {
22539			p[0] = (u8)(v >> 24);
22540			p[1] = (u8)(v >> 16);
22541			p[2] = (u8)(v >> 8);
22542			p[3] = (u8)(v >> 0);
22543			}
22544			}
22545
22546			static forceinline void
22547			put_unaligned_le64(u64 v, u8 *p)
22548			{
22549			if (UNALIGNED_ACCESS_IS_FAST) {
22550			store_u64_unaligned(le64_bswap(v), p);
22551			} else {
22552			p[0] = (u8)(v >> 0);
22553			p[1] = (u8)(v >> 8);
22554			p[2] = (u8)(v >> 16);
22555			p[3] = (u8)(v >> 24);
22556			p[4] = (u8)(v >> 32);
22557			p[5] = (u8)(v >> 40);
22558			p[6] = (u8)(v >> 48);
22559			p[7] = (u8)(v >> 56);
22560			}
22561			}
22562
22563			static forceinline void
22564			put_unaligned_leword(machine_word_t v, u8 *p)
22565			{
22566			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
22567			if (WORDBITS == 32)
22568			put_unaligned_le32(v, p);
22569			else
22570			put_unaligned_le64(v, p);
22571			}
22572
22573
22574
22575
22576
22577
22578
22579			static forceinline unsigned
22580			bsr32(u32 v)
22581			{
22582			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
22583			return 31 - __builtin_clz(v);
22584			#elif defined(_MSC_VER)
22585			unsigned long i;
22586
22587			_BitScanReverse(&i, v);
22588			return i;
22589			#else
22590			unsigned i = 0;
22591
22592			while ((v >>= 1) != 0)
22593			i++;
22594			return i;
22595			#endif
22596			}
22597
22598			static forceinline unsigned
22599			bsr64(u64 v)
22600			{
22601			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
22602			return 63 - __builtin_clzll(v);
22603			#elif defined(_MSC_VER) && defined(_WIN64)
22604			unsigned long i;
22605
22606			_BitScanReverse64(&i, v);
22607			return i;
22608			#else
22609			unsigned i = 0;
22610
22611			while ((v >>= 1) != 0)
22612			i++;
22613			return i;
22614			#endif
22615			}
22616
22617			static forceinline unsigned
22618			bsrw(machine_word_t v)
22619			{
22620			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
22621			if (WORDBITS == 32)
22622			return bsr32(v);
22623			else
22624			return bsr64(v);
22625			}
22626
22627
22628
22629			static forceinline unsigned
22630			bsf32(u32 v)
22631			{
22632			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
22633			return __builtin_ctz(v);
22634			#elif defined(_MSC_VER)
22635			unsigned long i;
22636
22637			_BitScanForward(&i, v);
22638			return i;
22639			#else
22640			unsigned i = 0;
22641
22642			for (; (v & 1) == 0; v >>= 1)
22643			i++;
22644			return i;
22645			#endif
22646			}
22647
22648			static forceinline unsigned
22649			bsf64(u64 v)
22650			{
22651			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
22652			return __builtin_ctzll(v);
22653			#elif defined(_MSC_VER) && defined(_WIN64)
22654			unsigned long i;
22655
22656			_BitScanForward64(&i, v);
22657			return i;
22658			#else
22659			unsigned i = 0;
22660
22661			for (; (v & 1) == 0; v >>= 1)
22662			i++;
22663			return i;
22664			#endif
22665			}
22666
22667			static forceinline unsigned
22668			bsfw(machine_word_t v)
22669			{
22670			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
22671			if (WORDBITS == 32)
22672			return bsf32(v);
22673			else
22674			return bsf64(v);
22675			}
22676
22677
22678			#undef rbit32
22679			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
22680			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
22681			static forceinline u32
22682			rbit32(u32 v)
22683			{
22684			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
22685			return v;
22686			}
22687			#define rbit32 rbit32
22688			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
22689			static forceinline u32
22690			rbit32(u32 v)
22691			{
22692			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
22693			return v;
22694			}
22695			#define rbit32 rbit32
22696			#endif
22697
22698			#endif
22699
22700
22701			typedef void (malloc_func_t)(size_t);
22702			typedef void (free_func_t)(void );
22703
22704			extern malloc_func_t libdeflate_default_malloc_func;
22705			extern free_func_t libdeflate_default_free_func;
22706
22707			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
22708			size_t alignment, size_t size);
22709			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
22710
22711			#ifdef FREESTANDING
22712
22713			void memset(void s, int c, size_t n);
22714			#define memset(s, c, n) __builtin_memset((s), (c), (n))
22715
22716			void memcpy(void dest, const void *src, size_t n);
22717			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
22718
22719			void memmove(void dest, const void *src, size_t n);
22720			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
22721
22722			int memcmp(const void s1, const void s2, size_t n);
22723			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
22724
22725			#undef LIBDEFLATE_ENABLE_ASSERTIONS
22726			#else
22727			# include
22728
22729			# ifdef __clang_analyzer__
22730			# define LIBDEFLATE_ENABLE_ASSERTIONS
22731			# endif
22732			#endif
22733
22734
22735			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
22736			NORETURN void
22737			libdeflate_assertion_failed(const char expr, const char file, int line);
22738			#define ASSERT(expr) { if (unlikely(!(expr))) \
22739			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
22740			#else
22741			#define ASSERT(expr) (void)(expr)
22742			#endif
22743
22744			#define CONCAT_IMPL(a, b) a##b
22745			#define CONCAT(a, b) CONCAT_IMPL(a, b)
22746			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
22747
22748			#endif
22749
22750
22751			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
22752
22753			#define ARM_CPU_FEATURE_NEON (1 << 0)
22754			#define ARM_CPU_FEATURE_PMULL (1 << 1)
22755
22756			#define ARM_CPU_FEATURE_PREFER_PMULL (1 << 2)
22757			#define ARM_CPU_FEATURE_CRC32 (1 << 3)
22758			#define ARM_CPU_FEATURE_SHA3 (1 << 4)
22759			#define ARM_CPU_FEATURE_DOTPROD (1 << 5)
22760
22761			#if !defined(FREESTANDING) && \
22762			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
22763			(defined(__linux__) \|\| \
22764			(defined(__APPLE__) && defined(ARCH_ARM64)) \|\| \
22765			(defined(_WIN32) && defined(ARCH_ARM64)))
22766
22767			# define ARM_CPU_FEATURES_KNOWN (1U << 31)
22768			extern volatile u32 libdeflate_arm_cpu_features;
22769
22770			void libdeflate_init_arm_cpu_features(void);
22771
22772			static inline u32 get_arm_cpu_features(void)
22773			{
22774			if (libdeflate_arm_cpu_features == 0)
22775			libdeflate_init_arm_cpu_features();
22776			return libdeflate_arm_cpu_features;
22777			}
22778			#else
22779			static inline u32 get_arm_cpu_features(void) { return 0; }
22780			#endif
22781
22782
22783			#if defined(__ARM_NEON) \|\| (defined(_MSC_VER) && defined(ARCH_ARM64))
22784			# define HAVE_NEON(features) 1
22785			# define HAVE_NEON_NATIVE 1
22786			#else
22787			# define HAVE_NEON(features) ((features) & ARM_CPU_FEATURE_NEON)
22788			# define HAVE_NEON_NATIVE 0
22789			#endif
22790
22791			#if (defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
22792			(HAVE_NEON_NATIVE \|\| (GCC_PREREQ(6, 1) && defined(__ARM_FP)))
22793			# define HAVE_NEON_INTRIN 1
22794			# include
22795			#else
22796			# define HAVE_NEON_INTRIN 0
22797			#endif
22798
22799
22800			#ifdef __ARM_FEATURE_CRYPTO
22801			# define HAVE_PMULL(features) 1
22802			#else
22803			# define HAVE_PMULL(features) ((features) & ARM_CPU_FEATURE_PMULL)
22804			#endif
22805			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
22806			(GCC_PREREQ(7, 1) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
22807			CPU_IS_LITTLE_ENDIAN()
22808			# define HAVE_PMULL_INTRIN 1
22809
22810			# ifdef _MSC_VER
22811			# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b))
22812			# else
22813			# define compat_vmull_p64(a, b) vmull_p64((a), (b))
22814			# endif
22815			#else
22816			# define HAVE_PMULL_INTRIN 0
22817			#endif
22818
22819
22820			#ifdef __ARM_FEATURE_CRC32
22821			# define HAVE_CRC32(features) 1
22822			#else
22823			# define HAVE_CRC32(features) ((features) & ARM_CPU_FEATURE_CRC32)
22824			#endif
22825			#if defined(ARCH_ARM64) && \
22826			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER))
22827			# define HAVE_CRC32_INTRIN 1
22828			# if defined(__GNUC__) \|\| defined(__clang__)
22829			# include
22830			# endif
22831
22832			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
22833			!defined(__ARM_FEATURE_CRC32)
22834			# undef __crc32b
22835			# define __crc32b(a, b) \
22836			({ uint32_t res; \
22837			__asm__("crc32b %w0, %w1, %w2" \
22838			: "=r" (res) : "r" (a), "r" (b)); \
22839			res; })
22840			# undef __crc32h
22841			# define __crc32h(a, b) \
22842			({ uint32_t res; \
22843			__asm__("crc32h %w0, %w1, %w2" \
22844			: "=r" (res) : "r" (a), "r" (b)); \
22845			res; })
22846			# undef __crc32w
22847			# define __crc32w(a, b) \
22848			({ uint32_t res; \
22849			__asm__("crc32w %w0, %w1, %w2" \
22850			: "=r" (res) : "r" (a), "r" (b)); \
22851			res; })
22852			# undef __crc32d
22853			# define __crc32d(a, b) \
22854			({ uint32_t res; \
22855			__asm__("crc32x %w0, %w1, %2" \
22856			: "=r" (res) : "r" (a), "r" (b)); \
22857			res; })
22858			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
22859			# endif
22860			#else
22861			# define HAVE_CRC32_INTRIN 0
22862			#endif
22863
22864
22865			#ifdef __ARM_FEATURE_SHA3
22866			# define HAVE_SHA3(features) 1
22867			#else
22868			# define HAVE_SHA3(features) ((features) & ARM_CPU_FEATURE_SHA3)
22869			#endif
22870			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
22871			(GCC_PREREQ(9, 1) \|\| \
22872			CLANG_PREREQ(7, 0, 10010463) )
22873			# define HAVE_SHA3_INTRIN 1
22874
22875			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
22876			!defined(__ARM_FEATURE_SHA3)
22877			# undef veor3q_u8
22878			# define veor3q_u8(a, b, c) \
22879			({ uint8x16_t res; \
22880			__asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b" \
22881			: "=w" (res) : "w" (a), "w" (b), "w" (c)); \
22882			res; })
22883			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
22884			# endif
22885			#else
22886			# define HAVE_SHA3_INTRIN 0
22887			#endif
22888
22889
22890			#ifdef __ARM_FEATURE_DOTPROD
22891			# define HAVE_DOTPROD(features) 1
22892			#else
22893			# define HAVE_DOTPROD(features) ((features) & ARM_CPU_FEATURE_DOTPROD)
22894			#endif
22895			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
22896			(GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(7, 0, 10010000) \|\| defined(_MSC_VER))
22897			# define HAVE_DOTPROD_INTRIN 1
22898
22899			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
22900			!defined(__ARM_FEATURE_DOTPROD)
22901			# undef vdotq_u32
22902			# define vdotq_u32(a, b, c) \
22903			({ uint32x4_t res = (a); \
22904			__asm__("udot %0.4s, %1.16b, %2.16b" \
22905			: "+w" (res) : "w" (b), "w" (c)); \
22906			res; })
22907			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
22908			# endif
22909			#else
22910			# define HAVE_DOTPROD_INTRIN 0
22911			#endif
22912
22913			#endif
22914
22915			#endif
22916
22917
22918			#if HAVE_NEON_NATIVE
22919			static forceinline void
22920			matchfinder_init_neon(mf_pos_t *data, size_t size)
22921			{
22922			int16x8_t p = (int16x8_t )data;
22923			int16x8_t v = vdupq_n_s16(MATCHFINDER_INITVAL);
22924
22925			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
22926			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
22927			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
22928
22929			do {
22930			p[0] = v;
22931			p[1] = v;
22932			p[2] = v;
22933			p[3] = v;
22934			p += 4;
22935			size -= 4 * sizeof(*p);
22936			} while (size != 0);
22937			}
22938			#define matchfinder_init matchfinder_init_neon
22939
22940			static forceinline void
22941			matchfinder_rebase_neon(mf_pos_t *data, size_t size)
22942			{
22943			int16x8_t p = (int16x8_t )data;
22944			int16x8_t v = vdupq_n_s16((u16)-MATCHFINDER_WINDOW_SIZE);
22945
22946			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
22947			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
22948			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
22949
22950			do {
22951			p[0] = vqaddq_s16(p[0], v);
22952			p[1] = vqaddq_s16(p[1], v);
22953			p[2] = vqaddq_s16(p[2], v);
22954			p[3] = vqaddq_s16(p[3], v);
22955			p += 4;
22956			size -= 4 * sizeof(*p);
22957			} while (size != 0);
22958			}
22959			#define matchfinder_rebase matchfinder_rebase_neon
22960
22961			#endif
22962
22963			#endif
22964
22965			# elif defined(ARCH_RISCV)
22966			# include "riscv/matchfinder_impl.h"
22967			# elif defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
22968			/* # include "x86/matchfinder_impl.h" */
22969
22970
22971			#ifndef LIB_X86_MATCHFINDER_IMPL_H
22972			#define LIB_X86_MATCHFINDER_IMPL_H
22973
22974			/* #include "x86-cpu_features.h" */
22975
22976
22977			#ifndef LIB_X86_CPU_FEATURES_H
22978			#define LIB_X86_CPU_FEATURES_H
22979
22980			/* #include "lib_common.h" */
22981
22982
22983			#ifndef LIB_LIB_COMMON_H
22984			#define LIB_LIB_COMMON_H
22985
22986			#ifdef LIBDEFLATE_H
22987
22988			# error "lib_common.h must always be included before libdeflate.h"
22989			#endif
22990
22991			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
22992			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
22993			#elif defined(__GNUC__)
22994			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
22995			#else
22996			# define LIBDEFLATE_EXPORT_SYM
22997			#endif
22998
22999
23000			#if defined(__GNUC__) && defined(__i386__)
23001			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
23002			#else
23003			# define LIBDEFLATE_ALIGN_STACK
23004			#endif
23005
23006			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
23007
23008			/* #include "../common_defs.h" */
23009
23010
23011			#ifndef COMMON_DEFS_H
23012			#define COMMON_DEFS_H
23013
23014			/* #include "libdeflate.h" */
23015
23016
23017			#ifndef LIBDEFLATE_H
23018			#define LIBDEFLATE_H
23019
23020			#include
23021			#include
23022
23023			#ifdef __cplusplus
23024			extern "C" {
23025			#endif
23026
23027			#define LIBDEFLATE_VERSION_MAJOR 1
23028			#define LIBDEFLATE_VERSION_MINOR 25
23029			#define LIBDEFLATE_VERSION_STRING "1.25"
23030
23031
23032			#ifndef LIBDEFLATEAPI
23033			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
23034			# define LIBDEFLATEAPI __declspec(dllimport)
23035			# else
23036			# define LIBDEFLATEAPI
23037			# endif
23038			#endif
23039
23040
23041
23042
23043
23044			struct libdeflate_compressor;
23045			struct libdeflate_options;
23046
23047
23048			LIBDEFLATEAPI struct libdeflate_compressor *
23049			libdeflate_alloc_compressor(int compression_level);
23050
23051
23052			LIBDEFLATEAPI struct libdeflate_compressor *
23053			libdeflate_alloc_compressor_ex(int compression_level,
23054			const struct libdeflate_options *options);
23055
23056
23057			LIBDEFLATEAPI size_t
23058			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
23059			const void *in, size_t in_nbytes,
23060			void *out, size_t out_nbytes_avail);
23061
23062
23063			LIBDEFLATEAPI size_t
23064			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
23065			size_t in_nbytes);
23066
23067
23068			LIBDEFLATEAPI size_t
23069			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
23070			const void *in, size_t in_nbytes,
23071			void *out, size_t out_nbytes_avail);
23072
23073
23074			LIBDEFLATEAPI size_t
23075			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
23076			size_t in_nbytes);
23077
23078
23079			LIBDEFLATEAPI size_t
23080			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
23081			const void *in, size_t in_nbytes,
23082			void *out, size_t out_nbytes_avail);
23083
23084
23085			LIBDEFLATEAPI size_t
23086			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
23087			size_t in_nbytes);
23088
23089
23090			LIBDEFLATEAPI void
23091			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
23092
23093
23094
23095
23096
23097			struct libdeflate_decompressor;
23098			struct libdeflate_options;
23099
23100
23101			LIBDEFLATEAPI struct libdeflate_decompressor *
23102			libdeflate_alloc_decompressor(void);
23103
23104
23105			LIBDEFLATEAPI struct libdeflate_decompressor *
23106			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
23107
23108
23109			enum libdeflate_result {
23110
23111			LIBDEFLATE_SUCCESS = 0,
23112
23113
23114			LIBDEFLATE_BAD_DATA = 1,
23115
23116
23117			LIBDEFLATE_SHORT_OUTPUT = 2,
23118
23119
23120			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
23121			};
23122
23123
23124			LIBDEFLATEAPI enum libdeflate_result
23125			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
23126			const void *in, size_t in_nbytes,
23127			void *out, size_t out_nbytes_avail,
23128			size_t *actual_out_nbytes_ret);
23129
23130
23131			LIBDEFLATEAPI enum libdeflate_result
23132			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
23133			const void *in, size_t in_nbytes,
23134			void *out, size_t out_nbytes_avail,
23135			size_t *actual_in_nbytes_ret,
23136			size_t *actual_out_nbytes_ret);
23137
23138
23139			LIBDEFLATEAPI enum libdeflate_result
23140			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
23141			const void *in, size_t in_nbytes,
23142			void *out, size_t out_nbytes_avail,
23143			size_t *actual_out_nbytes_ret);
23144
23145
23146			LIBDEFLATEAPI enum libdeflate_result
23147			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
23148			const void *in, size_t in_nbytes,
23149			void *out, size_t out_nbytes_avail,
23150			size_t *actual_in_nbytes_ret,
23151			size_t *actual_out_nbytes_ret);
23152
23153
23154			LIBDEFLATEAPI enum libdeflate_result
23155			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
23156			const void *in, size_t in_nbytes,
23157			void *out, size_t out_nbytes_avail,
23158			size_t *actual_out_nbytes_ret);
23159
23160
23161			LIBDEFLATEAPI enum libdeflate_result
23162			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
23163			const void *in, size_t in_nbytes,
23164			void *out, size_t out_nbytes_avail,
23165			size_t *actual_in_nbytes_ret,
23166			size_t *actual_out_nbytes_ret);
23167
23168
23169			LIBDEFLATEAPI void
23170			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
23171
23172
23173
23174
23175
23176
23177			LIBDEFLATEAPI uint32_t
23178			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
23179
23180
23181
23182			LIBDEFLATEAPI uint32_t
23183			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
23184
23185
23186
23187
23188
23189
23190			LIBDEFLATEAPI void
23191			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
23192			void (free_func)(void ));
23193
23194
23195			struct libdeflate_options {
23196
23197
23198			size_t sizeof_options;
23199
23200
23201			void (malloc_func)(size_t);
23202			void (free_func)(void );
23203			};
23204
23205			#ifdef __cplusplus
23206			}
23207			#endif
23208
23209			#endif
23210
23211
23212			#include
23213			#include
23214			#include
23215			#ifdef _MSC_VER
23216			# include
23217			# include
23218
23219
23220			# pragma warning(disable : 4146)
23221
23222			# pragma warning(disable : 4018)
23223			# pragma warning(disable : 4244)
23224			# pragma warning(disable : 4267)
23225			# pragma warning(disable : 4310)
23226
23227			# pragma warning(disable : 4100)
23228			# pragma warning(disable : 4127)
23229			# pragma warning(disable : 4189)
23230			# pragma warning(disable : 4232)
23231			# pragma warning(disable : 4245)
23232			# pragma warning(disable : 4295)
23233			#endif
23234			#ifndef FREESTANDING
23235			# include
23236			#endif
23237
23238
23239
23240
23241
23242
23243			#undef ARCH_X86_64
23244			#undef ARCH_X86_32
23245			#undef ARCH_ARM64
23246			#undef ARCH_ARM32
23247			#undef ARCH_RISCV
23248			#ifdef _MSC_VER
23249
23250			# if defined(_M_X64) && !defined(_M_ARM64EC)
23251			# define ARCH_X86_64
23252			# elif defined(_M_IX86)
23253			# define ARCH_X86_32
23254			# elif defined(_M_ARM64)
23255			# define ARCH_ARM64
23256			# elif defined(_M_ARM)
23257			# define ARCH_ARM32
23258			# endif
23259			#else
23260			# if defined(__x86_64__)
23261			# define ARCH_X86_64
23262			# elif defined(__i386__)
23263			# define ARCH_X86_32
23264			# elif defined(__aarch64__)
23265			# define ARCH_ARM64
23266			# elif defined(__arm__)
23267			# define ARCH_ARM32
23268			# elif defined(__riscv)
23269			# define ARCH_RISCV
23270			# endif
23271			#endif
23272
23273
23274
23275
23276
23277
23278			typedef uint8_t u8;
23279			typedef uint16_t u16;
23280			typedef uint32_t u32;
23281			typedef uint64_t u64;
23282			typedef int8_t s8;
23283			typedef int16_t s16;
23284			typedef int32_t s32;
23285			typedef int64_t s64;
23286
23287
23288			#ifdef _MSC_VER
23289			# ifdef _WIN64
23290			typedef long long ssize_t;
23291			# else
23292			typedef long ssize_t;
23293			# endif
23294			#endif
23295
23296
23297			typedef size_t machine_word_t;
23298
23299
23300			#define WORDBYTES ((int)sizeof(machine_word_t))
23301
23302
23303			#define WORDBITS (8 * WORDBYTES)
23304
23305
23306
23307
23308
23309
23310			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
23311			# define GCC_PREREQ(major, minor) \
23312			(__GNUC__ > (major) \|\| \
23313			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
23314			# if !GCC_PREREQ(4, 9)
23315			# error "gcc versions older than 4.9 are no longer supported"
23316			# endif
23317			#else
23318			# define GCC_PREREQ(major, minor) 0
23319			#endif
23320			#ifdef __clang__
23321			# ifdef __apple_build_version__
23322			# define CLANG_PREREQ(major, minor, apple_version) \
23323			(__apple_build_version__ >= (apple_version))
23324			# else
23325			# define CLANG_PREREQ(major, minor, apple_version) \
23326			(__clang_major__ > (major) \|\| \
23327			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
23328			# endif
23329			# if !CLANG_PREREQ(3, 9, 8000000)
23330			# error "clang versions older than 3.9 are no longer supported"
23331			# endif
23332			#else
23333			# define CLANG_PREREQ(major, minor, apple_version) 0
23334			#endif
23335			#ifdef _MSC_VER
23336			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
23337			# if !MSVC_PREREQ(1900)
23338			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
23339			# endif
23340			#else
23341			# define MSVC_PREREQ(version) 0
23342			#endif
23343
23344
23345			#ifndef __has_attribute
23346			# define __has_attribute(attribute) 0
23347			#endif
23348
23349
23350			#ifndef __has_builtin
23351			# define __has_builtin(builtin) 0
23352			#endif
23353
23354
23355			#ifdef _MSC_VER
23356			# define inline __inline
23357			#endif
23358
23359
23360			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
23361			# define forceinline inline __attribute__((always_inline))
23362			#elif defined(_MSC_VER)
23363			# define forceinline __forceinline
23364			#else
23365			# define forceinline inline
23366			#endif
23367
23368
23369			#if defined(__GNUC__) \|\| __has_attribute(unused)
23370			# define MAYBE_UNUSED __attribute__((unused))
23371			#else
23372			# define MAYBE_UNUSED
23373			#endif
23374
23375
23376			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
23377			# define NORETURN __attribute__((noreturn))
23378			#else
23379			# define NORETURN
23380			#endif
23381
23382
23383			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
23384			# if defined(__GNUC__) \|\| defined(__clang__)
23385			# define restrict __restrict__
23386			# else
23387			# define restrict
23388			# endif
23389			#endif
23390
23391
23392			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
23393			# define likely(expr) __builtin_expect(!!(expr), 1)
23394			#else
23395			# define likely(expr) (expr)
23396			#endif
23397
23398
23399			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
23400			# define unlikely(expr) __builtin_expect(!!(expr), 0)
23401			#else
23402			# define unlikely(expr) (expr)
23403			#endif
23404
23405
23406			#undef prefetchr
23407			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
23408			# define prefetchr(addr) __builtin_prefetch((addr), 0)
23409			#elif defined(_MSC_VER)
23410			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
23411			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
23412			# elif defined(ARCH_ARM64)
23413			# define prefetchr(addr) __prefetch2((addr), 0x00 )
23414			# elif defined(ARCH_ARM32)
23415			# define prefetchr(addr) __prefetch(addr)
23416			# endif
23417			#endif
23418			#ifndef prefetchr
23419			# define prefetchr(addr)
23420			#endif
23421
23422
23423			#undef prefetchw
23424			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
23425			# define prefetchw(addr) __builtin_prefetch((addr), 1)
23426			#elif defined(_MSC_VER)
23427			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
23428			# define prefetchw(addr) _m_prefetchw(addr)
23429			# elif defined(ARCH_ARM64)
23430			# define prefetchw(addr) __prefetch2((addr), 0x10 )
23431			# elif defined(ARCH_ARM32)
23432			# define prefetchw(addr) __prefetchw(addr)
23433			# endif
23434			#endif
23435			#ifndef prefetchw
23436			# define prefetchw(addr)
23437			#endif
23438
23439
23440			#undef _aligned_attribute
23441			#if defined(__GNUC__) \|\| __has_attribute(aligned)
23442			# define _aligned_attribute(n) __attribute__((aligned(n)))
23443			#elif defined(_MSC_VER)
23444			# define _aligned_attribute(n) __declspec(align(n))
23445			#endif
23446
23447
23448			#if defined(__GNUC__) \|\| __has_attribute(target)
23449			# define _target_attribute(attrs) __attribute__((target(attrs)))
23450			#else
23451			# define _target_attribute(attrs)
23452			#endif
23453
23454
23455
23456
23457
23458			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
23459			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
23460			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
23461			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
23462			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
23463			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
23464			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
23465
23466
23467
23468
23469
23470
23471			#if defined(__BYTE_ORDER__)
23472			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
23473			#elif defined(_MSC_VER)
23474			# define CPU_IS_LITTLE_ENDIAN() true
23475			#else
23476			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
23477			{
23478			union {
23479			u32 w;
23480			u8 b;
23481			} u;
23482
23483			u.w = 1;
23484			return u.b;
23485			}
23486			#endif
23487
23488
23489			static forceinline u16 bswap16(u16 v)
23490			{
23491			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
23492			return __builtin_bswap16(v);
23493			#elif defined(_MSC_VER)
23494			return _byteswap_ushort(v);
23495			#else
23496			return (v << 8) \| (v >> 8);
23497			#endif
23498			}
23499
23500
23501			static forceinline u32 bswap32(u32 v)
23502			{
23503			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
23504			return __builtin_bswap32(v);
23505			#elif defined(_MSC_VER)
23506			return _byteswap_ulong(v);
23507			#else
23508			return ((v & 0x000000FF) << 24) \|
23509			((v & 0x0000FF00) << 8) \|
23510			((v & 0x00FF0000) >> 8) \|
23511			((v & 0xFF000000) >> 24);
23512			#endif
23513			}
23514
23515
23516			static forceinline u64 bswap64(u64 v)
23517			{
23518			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
23519			return __builtin_bswap64(v);
23520			#elif defined(_MSC_VER)
23521			return _byteswap_uint64(v);
23522			#else
23523			return ((v & 0x00000000000000FF) << 56) \|
23524			((v & 0x000000000000FF00) << 40) \|
23525			((v & 0x0000000000FF0000) << 24) \|
23526			((v & 0x00000000FF000000) << 8) \|
23527			((v & 0x000000FF00000000) >> 8) \|
23528			((v & 0x0000FF0000000000) >> 24) \|
23529			((v & 0x00FF000000000000) >> 40) \|
23530			((v & 0xFF00000000000000) >> 56);
23531			#endif
23532			}
23533
23534			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
23535			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
23536			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
23537			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
23538			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
23539			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
23540
23541
23542
23543
23544
23545
23546			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
23547			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
23548			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
23549			defined(__riscv_misaligned_fast) \|\| \
23550			defined(__wasm__))
23551			# define UNALIGNED_ACCESS_IS_FAST 1
23552			#elif defined(_MSC_VER)
23553			# define UNALIGNED_ACCESS_IS_FAST 1
23554			#else
23555			# define UNALIGNED_ACCESS_IS_FAST 0
23556			#endif
23557
23558
23559
23560			#ifdef FREESTANDING
23561			# define MEMCOPY __builtin_memcpy
23562			#else
23563			# define MEMCOPY memcpy
23564			#endif
23565
23566
23567
23568			#define DEFINE_UNALIGNED_TYPE(type) \
23569			static forceinline type \
23570			load_##type##_unaligned(const void *p) \
23571			{ \
23572			type v; \
23573			\
23574			MEMCOPY(&v, p, sizeof(v)); \
23575			return v; \
23576			} \
23577			\
23578			static forceinline void \
23579			store_##type##_unaligned(type v, void *p) \
23580			{ \
23581			MEMCOPY(p, &v, sizeof(v)); \
23582			}
23583
23584			DEFINE_UNALIGNED_TYPE(u16)
23585			DEFINE_UNALIGNED_TYPE(u32)
23586			DEFINE_UNALIGNED_TYPE(u64)
23587			DEFINE_UNALIGNED_TYPE(machine_word_t)
23588
23589			#undef MEMCOPY
23590
23591			#define load_word_unaligned load_machine_word_t_unaligned
23592			#define store_word_unaligned store_machine_word_t_unaligned
23593
23594
23595
23596			static forceinline u16
23597			get_unaligned_le16(const u8 *p)
23598			{
23599			if (UNALIGNED_ACCESS_IS_FAST)
23600			return le16_bswap(load_u16_unaligned(p));
23601			else
23602			return ((u16)p[1] << 8) \| p[0];
23603			}
23604
23605			static forceinline u16
23606			get_unaligned_be16(const u8 *p)
23607			{
23608			if (UNALIGNED_ACCESS_IS_FAST)
23609			return be16_bswap(load_u16_unaligned(p));
23610			else
23611			return ((u16)p[0] << 8) \| p[1];
23612			}
23613
23614			static forceinline u32
23615			get_unaligned_le32(const u8 *p)
23616			{
23617			if (UNALIGNED_ACCESS_IS_FAST)
23618			return le32_bswap(load_u32_unaligned(p));
23619			else
23620			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
23621			((u32)p[1] << 8) \| p[0];
23622			}
23623
23624			static forceinline u32
23625			get_unaligned_be32(const u8 *p)
23626			{
23627			if (UNALIGNED_ACCESS_IS_FAST)
23628			return be32_bswap(load_u32_unaligned(p));
23629			else
23630			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
23631			((u32)p[2] << 8) \| p[3];
23632			}
23633
23634			static forceinline u64
23635			get_unaligned_le64(const u8 *p)
23636			{
23637			if (UNALIGNED_ACCESS_IS_FAST)
23638			return le64_bswap(load_u64_unaligned(p));
23639			else
23640			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
23641			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
23642			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
23643			((u64)p[1] << 8) \| p[0];
23644			}
23645
23646			static forceinline machine_word_t
23647			get_unaligned_leword(const u8 *p)
23648			{
23649			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
23650			if (WORDBITS == 32)
23651			return get_unaligned_le32(p);
23652			else
23653			return get_unaligned_le64(p);
23654			}
23655
23656
23657
23658			static forceinline void
23659			put_unaligned_le16(u16 v, u8 *p)
23660			{
23661			if (UNALIGNED_ACCESS_IS_FAST) {
23662			store_u16_unaligned(le16_bswap(v), p);
23663			} else {
23664			p[0] = (u8)(v >> 0);
23665			p[1] = (u8)(v >> 8);
23666			}
23667			}
23668
23669			static forceinline void
23670			put_unaligned_be16(u16 v, u8 *p)
23671			{
23672			if (UNALIGNED_ACCESS_IS_FAST) {
23673			store_u16_unaligned(be16_bswap(v), p);
23674			} else {
23675			p[0] = (u8)(v >> 8);
23676			p[1] = (u8)(v >> 0);
23677			}
23678			}
23679
23680			static forceinline void
23681			put_unaligned_le32(u32 v, u8 *p)
23682			{
23683			if (UNALIGNED_ACCESS_IS_FAST) {
23684			store_u32_unaligned(le32_bswap(v), p);
23685			} else {
23686			p[0] = (u8)(v >> 0);
23687			p[1] = (u8)(v >> 8);
23688			p[2] = (u8)(v >> 16);
23689			p[3] = (u8)(v >> 24);
23690			}
23691			}
23692
23693			static forceinline void
23694			put_unaligned_be32(u32 v, u8 *p)
23695			{
23696			if (UNALIGNED_ACCESS_IS_FAST) {
23697			store_u32_unaligned(be32_bswap(v), p);
23698			} else {
23699			p[0] = (u8)(v >> 24);
23700			p[1] = (u8)(v >> 16);
23701			p[2] = (u8)(v >> 8);
23702			p[3] = (u8)(v >> 0);
23703			}
23704			}
23705
23706			static forceinline void
23707			put_unaligned_le64(u64 v, u8 *p)
23708			{
23709			if (UNALIGNED_ACCESS_IS_FAST) {
23710			store_u64_unaligned(le64_bswap(v), p);
23711			} else {
23712			p[0] = (u8)(v >> 0);
23713			p[1] = (u8)(v >> 8);
23714			p[2] = (u8)(v >> 16);
23715			p[3] = (u8)(v >> 24);
23716			p[4] = (u8)(v >> 32);
23717			p[5] = (u8)(v >> 40);
23718			p[6] = (u8)(v >> 48);
23719			p[7] = (u8)(v >> 56);
23720			}
23721			}
23722
23723			static forceinline void
23724			put_unaligned_leword(machine_word_t v, u8 *p)
23725			{
23726			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
23727			if (WORDBITS == 32)
23728			put_unaligned_le32(v, p);
23729			else
23730			put_unaligned_le64(v, p);
23731			}
23732
23733
23734
23735
23736
23737
23738
23739			static forceinline unsigned
23740			bsr32(u32 v)
23741			{
23742			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
23743			return 31 - __builtin_clz(v);
23744			#elif defined(_MSC_VER)
23745			unsigned long i;
23746
23747			_BitScanReverse(&i, v);
23748			return i;
23749			#else
23750			unsigned i = 0;
23751
23752			while ((v >>= 1) != 0)
23753			i++;
23754			return i;
23755			#endif
23756			}
23757
23758			static forceinline unsigned
23759			bsr64(u64 v)
23760			{
23761			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
23762			return 63 - __builtin_clzll(v);
23763			#elif defined(_MSC_VER) && defined(_WIN64)
23764			unsigned long i;
23765
23766			_BitScanReverse64(&i, v);
23767			return i;
23768			#else
23769			unsigned i = 0;
23770
23771			while ((v >>= 1) != 0)
23772			i++;
23773			return i;
23774			#endif
23775			}
23776
23777			static forceinline unsigned
23778			bsrw(machine_word_t v)
23779			{
23780			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
23781			if (WORDBITS == 32)
23782			return bsr32(v);
23783			else
23784			return bsr64(v);
23785			}
23786
23787
23788
23789			static forceinline unsigned
23790			bsf32(u32 v)
23791			{
23792			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
23793			return __builtin_ctz(v);
23794			#elif defined(_MSC_VER)
23795			unsigned long i;
23796
23797			_BitScanForward(&i, v);
23798			return i;
23799			#else
23800			unsigned i = 0;
23801
23802			for (; (v & 1) == 0; v >>= 1)
23803			i++;
23804			return i;
23805			#endif
23806			}
23807
23808			static forceinline unsigned
23809			bsf64(u64 v)
23810			{
23811			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
23812			return __builtin_ctzll(v);
23813			#elif defined(_MSC_VER) && defined(_WIN64)
23814			unsigned long i;
23815
23816			_BitScanForward64(&i, v);
23817			return i;
23818			#else
23819			unsigned i = 0;
23820
23821			for (; (v & 1) == 0; v >>= 1)
23822			i++;
23823			return i;
23824			#endif
23825			}
23826
23827			static forceinline unsigned
23828			bsfw(machine_word_t v)
23829			{
23830			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
23831			if (WORDBITS == 32)
23832			return bsf32(v);
23833			else
23834			return bsf64(v);
23835			}
23836
23837
23838			#undef rbit32
23839			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
23840			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
23841			static forceinline u32
23842			rbit32(u32 v)
23843			{
23844			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
23845			return v;
23846			}
23847			#define rbit32 rbit32
23848			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
23849			static forceinline u32
23850			rbit32(u32 v)
23851			{
23852			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
23853			return v;
23854			}
23855			#define rbit32 rbit32
23856			#endif
23857
23858			#endif
23859
23860
23861			typedef void (malloc_func_t)(size_t);
23862			typedef void (free_func_t)(void );
23863
23864			extern malloc_func_t libdeflate_default_malloc_func;
23865			extern free_func_t libdeflate_default_free_func;
23866
23867			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
23868			size_t alignment, size_t size);
23869			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
23870
23871			#ifdef FREESTANDING
23872
23873			void memset(void s, int c, size_t n);
23874			#define memset(s, c, n) __builtin_memset((s), (c), (n))
23875
23876			void memcpy(void dest, const void *src, size_t n);
23877			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
23878
23879			void memmove(void dest, const void *src, size_t n);
23880			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
23881
23882			int memcmp(const void s1, const void s2, size_t n);
23883			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
23884
23885			#undef LIBDEFLATE_ENABLE_ASSERTIONS
23886			#else
23887			# include
23888
23889			# ifdef __clang_analyzer__
23890			# define LIBDEFLATE_ENABLE_ASSERTIONS
23891			# endif
23892			#endif
23893
23894
23895			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
23896			NORETURN void
23897			libdeflate_assertion_failed(const char expr, const char file, int line);
23898			#define ASSERT(expr) { if (unlikely(!(expr))) \
23899			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
23900			#else
23901			#define ASSERT(expr) (void)(expr)
23902			#endif
23903
23904			#define CONCAT_IMPL(a, b) a##b
23905			#define CONCAT(a, b) CONCAT_IMPL(a, b)
23906			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
23907
23908			#endif
23909
23910
23911			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
23912
23913			#define X86_CPU_FEATURE_SSE2 (1 << 0)
23914			#define X86_CPU_FEATURE_PCLMULQDQ (1 << 1)
23915			#define X86_CPU_FEATURE_AVX (1 << 2)
23916			#define X86_CPU_FEATURE_AVX2 (1 << 3)
23917			#define X86_CPU_FEATURE_BMI2 (1 << 4)
23918
23919			#define X86_CPU_FEATURE_ZMM (1 << 5)
23920			#define X86_CPU_FEATURE_AVX512BW (1 << 6)
23921			#define X86_CPU_FEATURE_AVX512VL (1 << 7)
23922			#define X86_CPU_FEATURE_VPCLMULQDQ (1 << 8)
23923			#define X86_CPU_FEATURE_AVX512VNNI (1 << 9)
23924			#define X86_CPU_FEATURE_AVXVNNI (1 << 10)
23925
23926			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
23927
23928			# define X86_CPU_FEATURES_KNOWN (1U << 31)
23929			extern volatile u32 libdeflate_x86_cpu_features;
23930
23931			void libdeflate_init_x86_cpu_features(void);
23932
23933			static inline u32 get_x86_cpu_features(void)
23934			{
23935			if (libdeflate_x86_cpu_features == 0)
23936			libdeflate_init_x86_cpu_features();
23937			return libdeflate_x86_cpu_features;
23938			}
23939
23940			# include
23941			# if defined(_MSC_VER) && defined(__clang__)
23942			# include
23943			# include
23944			# include
23945			# include
23946			# include
23947			# include
23948			# include
23949			# include
23950			# if __has_include()
23951			# include
23952			# endif
23953			# if __has_include()
23954			# include
23955			# endif
23956			# if __has_include()
23957			# include
23958			# endif
23959			# if __has_include()
23960			# include
23961			# endif
23962			# if __has_include()
23963			# include
23964			# endif
23965			# endif
23966			#else
23967			static inline u32 get_x86_cpu_features(void) { return 0; }
23968			#endif
23969
23970			#if defined(__SSE2__) \|\| \
23971			(defined(_MSC_VER) && \
23972			(defined(ARCH_X86_64) \|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
23973			# define HAVE_SSE2(features) 1
23974			# define HAVE_SSE2_NATIVE 1
23975			#else
23976			# define HAVE_SSE2(features) ((features) & X86_CPU_FEATURE_SSE2)
23977			# define HAVE_SSE2_NATIVE 0
23978			#endif
23979
23980			#if (defined(__PCLMUL__) && defined(__SSE4_1__)) \|\| \
23981			(defined(_MSC_VER) && defined(__AVX2__))
23982			# define HAVE_PCLMULQDQ(features) 1
23983			#else
23984			# define HAVE_PCLMULQDQ(features) ((features) & X86_CPU_FEATURE_PCLMULQDQ)
23985			#endif
23986
23987			#ifdef __AVX__
23988			# define HAVE_AVX(features) 1
23989			#else
23990			# define HAVE_AVX(features) ((features) & X86_CPU_FEATURE_AVX)
23991			#endif
23992
23993			#ifdef __AVX2__
23994			# define HAVE_AVX2(features) 1
23995			#else
23996			# define HAVE_AVX2(features) ((features) & X86_CPU_FEATURE_AVX2)
23997			#endif
23998
23999			#if defined(__BMI2__) \|\| (defined(_MSC_VER) && defined(__AVX2__))
24000			# define HAVE_BMI2(features) 1
24001			# define HAVE_BMI2_NATIVE 1
24002			#else
24003			# define HAVE_BMI2(features) ((features) & X86_CPU_FEATURE_BMI2)
24004			# define HAVE_BMI2_NATIVE 0
24005			#endif
24006
24007			#ifdef __AVX512BW__
24008			# define HAVE_AVX512BW(features) 1
24009			#else
24010			# define HAVE_AVX512BW(features) ((features) & X86_CPU_FEATURE_AVX512BW)
24011			#endif
24012
24013			#ifdef __AVX512VL__
24014			# define HAVE_AVX512VL(features) 1
24015			#else
24016			# define HAVE_AVX512VL(features) ((features) & X86_CPU_FEATURE_AVX512VL)
24017			#endif
24018
24019			#ifdef __VPCLMULQDQ__
24020			# define HAVE_VPCLMULQDQ(features) 1
24021			#else
24022			# define HAVE_VPCLMULQDQ(features) ((features) & X86_CPU_FEATURE_VPCLMULQDQ)
24023			#endif
24024
24025			#ifdef __AVX512VNNI__
24026			# define HAVE_AVX512VNNI(features) 1
24027			#else
24028			# define HAVE_AVX512VNNI(features) ((features) & X86_CPU_FEATURE_AVX512VNNI)
24029			#endif
24030
24031			#ifdef __AVXVNNI__
24032			# define HAVE_AVXVNNI(features) 1
24033			#else
24034			# define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI)
24035			#endif
24036
24037			#endif
24038
24039			#endif
24040
24041
24042			#ifdef __AVX2__
24043			static forceinline void
24044			matchfinder_init_avx2(mf_pos_t *data, size_t size)
24045			{
24046			__m256i p = (__m256i )data;
24047			__m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
24048
24049			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
24050			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
24051			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
24052
24053			do {
24054			p[0] = v;
24055			p[1] = v;
24056			p[2] = v;
24057			p[3] = v;
24058			p += 4;
24059			size -= 4 * sizeof(*p);
24060			} while (size != 0);
24061			}
24062			#define matchfinder_init matchfinder_init_avx2
24063
24064			static forceinline void
24065			matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
24066			{
24067			__m256i p = (__m256i )data;
24068			__m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
24069
24070			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
24071			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
24072			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
24073
24074			do {
24075
24076			p[0] = _mm256_adds_epi16(p[0], v);
24077			p[1] = _mm256_adds_epi16(p[1], v);
24078			p[2] = _mm256_adds_epi16(p[2], v);
24079			p[3] = _mm256_adds_epi16(p[3], v);
24080			p += 4;
24081			size -= 4 * sizeof(*p);
24082			} while (size != 0);
24083			}
24084			#define matchfinder_rebase matchfinder_rebase_avx2
24085
24086			#elif HAVE_SSE2_NATIVE
24087			static forceinline void
24088			matchfinder_init_sse2(mf_pos_t *data, size_t size)
24089			{
24090			__m128i p = (__m128i )data;
24091			__m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL);
24092
24093			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
24094			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
24095			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
24096
24097			do {
24098			p[0] = v;
24099			p[1] = v;
24100			p[2] = v;
24101			p[3] = v;
24102			p += 4;
24103			size -= 4 * sizeof(*p);
24104			} while (size != 0);
24105			}
24106			#define matchfinder_init matchfinder_init_sse2
24107
24108			static forceinline void
24109			matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
24110			{
24111			__m128i p = (__m128i )data;
24112			__m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
24113
24114			STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0);
24115			STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0);
24116			STATIC_ASSERT(sizeof(mf_pos_t) == 2);
24117
24118			do {
24119
24120			p[0] = _mm_adds_epi16(p[0], v);
24121			p[1] = _mm_adds_epi16(p[1], v);
24122			p[2] = _mm_adds_epi16(p[2], v);
24123			p[3] = _mm_adds_epi16(p[3], v);
24124			p += 4;
24125			size -= 4 * sizeof(*p);
24126			} while (size != 0);
24127			}
24128			#define matchfinder_rebase matchfinder_rebase_sse2
24129			#endif
24130
24131			#endif
24132
24133			# endif
24134			#else
24135			# define MATCHFINDER_ALIGNED
24136			#endif
24137
24138
24139			#ifndef matchfinder_init
24140			static forceinline void
24141			matchfinder_init(mf_pos_t *data, size_t size)
24142			{
24143			size_t num_entries = size / sizeof(*data);
24144			size_t i;
24145
24146			for (i = 0; i < num_entries; i++)
24147			data[i] = MATCHFINDER_INITVAL;
24148			}
24149			#endif
24150
24151
24152			#ifndef matchfinder_rebase
24153			static forceinline void
24154			matchfinder_rebase(mf_pos_t *data, size_t size)
24155			{
24156			size_t num_entries = size / sizeof(*data);
24157			size_t i;
24158
24159			if (MATCHFINDER_WINDOW_SIZE == 32768) {
24160
24161			for (i = 0; i < num_entries; i++)
24162			data[i] = 0x8000 \| (data[i] & ~(data[i] >> 15));
24163			} else {
24164			for (i = 0; i < num_entries; i++) {
24165			if (data[i] >= 0)
24166			data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
24167			else
24168			data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
24169			}
24170			}
24171			}
24172			#endif
24173
24174
24175			static forceinline u32
24176			lz_hash(u32 seq, unsigned num_bits)
24177			{
24178			return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
24179			}
24180
24181
24182			static forceinline u32
24183			lz_extend(const u8 * const strptr, const u8 * const matchptr,
24184			const u32 start_len, const u32 max_len)
24185			{
24186			u32 len = start_len;
24187			machine_word_t v_word;
24188
24189			if (UNALIGNED_ACCESS_IS_FAST) {
24190
24191			if (likely(max_len - len >= 4 * WORDBYTES)) {
24192
24193			#define COMPARE_WORD_STEP \
24194			v_word = load_word_unaligned(&matchptr[len]) ^ \
24195			load_word_unaligned(&strptr[len]); \
24196			if (v_word != 0) \
24197			goto word_differs; \
24198			len += WORDBYTES; \
24199
24200			COMPARE_WORD_STEP
24201			COMPARE_WORD_STEP
24202			COMPARE_WORD_STEP
24203			COMPARE_WORD_STEP
24204			#undef COMPARE_WORD_STEP
24205			}
24206
24207			while (len + WORDBYTES <= max_len) {
24208			v_word = load_word_unaligned(&matchptr[len]) ^
24209			load_word_unaligned(&strptr[len]);
24210			if (v_word != 0)
24211			goto word_differs;
24212			len += WORDBYTES;
24213			}
24214			}
24215
24216			while (len < max_len && matchptr[len] == strptr[len])
24217			len++;
24218			return len;
24219
24220			word_differs:
24221			if (CPU_IS_LITTLE_ENDIAN())
24222			len += (bsfw(v_word) >> 3);
24223			else
24224			len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
24225			return len;
24226			}
24227
24228			#endif
24229
24230
24231			#define BT_MATCHFINDER_HASH3_ORDER 16
24232			#define BT_MATCHFINDER_HASH3_WAYS 2
24233			#define BT_MATCHFINDER_HASH4_ORDER 16
24234
24235			#define BT_MATCHFINDER_TOTAL_HASH_SIZE \
24236			(((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
24237			(1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
24238
24239
24240			struct lz_match {
24241
24242
24243			u16 length;
24244
24245
24246			u16 offset;
24247			};
24248
24249			struct MATCHFINDER_ALIGNED bt_matchfinder {
24250
24251
24252			mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
24253
24254
24255			mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
24256
24257
24258			mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
24259			};
24260
24261
24262			static forceinline void
24263			bt_matchfinder_init(struct bt_matchfinder *mf)
24264			{
24265			STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE %
24266			MATCHFINDER_SIZE_ALIGNMENT == 0);
24267
24268			matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE);
24269	9		}
24270
24271			static forceinline void
24272			bt_matchfinder_slide_window(struct bt_matchfinder *mf)
24273			{
24274			STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
24275
24276			matchfinder_rebase((mf_pos_t )mf, sizeof(mf));
24277	0		}
24278
24279			static forceinline mf_pos_t *
24280			bt_left_child(struct bt_matchfinder *mf, s32 node)
24281			{
24282	15102		return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0];
24283			}
24284
24285			static forceinline mf_pos_t *
24286			bt_right_child(struct bt_matchfinder *mf, s32 node)
24287			{
24288	30366		return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1];
24289			}
24290
24291
24292			#define BT_MATCHFINDER_REQUIRED_NBYTES 5
24293
24294
24295			static forceinline struct lz_match *
24296			bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf,
24297			const u8 * const in_base,
24298			const ptrdiff_t cur_pos,
24299			const u32 max_len,
24300			const u32 nice_len,
24301			const u32 max_search_depth,
24302			u32 * const next_hashes,
24303			struct lz_match *lz_matchptr,
24304			const bool record_matches)
24305			{
24306	15264		const u8 *in_next = in_base + cur_pos;
24307	15264		u32 depth_remaining = max_search_depth;
24308	15264		const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
24309			u32 next_hashseq;
24310			u32 hash3;
24311			u32 hash4;
24312			s32 cur_node;
24313			#if BT_MATCHFINDER_HASH3_WAYS >= 2
24314			s32 cur_node_2;
24315			#endif
24316			const u8 *matchptr;
24317			mf_pos_t pending_lt_ptr, pending_gt_ptr;
24318			u32 best_lt_len, best_gt_len;
24319			u32 len;
24320	15264		u32 best_len = 3;
24321
24322			STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
24323			BT_MATCHFINDER_HASH3_WAYS <= 2);
24324
24325	30528		next_hashseq = get_unaligned_le32(in_next + 1);
24326
24327	15264		hash3 = next_hashes[0];
24328	15264		hash4 = next_hashes[1];
24329
24330	15264		next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER);
24331	15264		next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER);
24332	15264		prefetchw(&mf->hash3_tab[next_hashes[0]]);
24333	15264		prefetchw(&mf->hash4_tab[next_hashes[1]]);
24334
24335	15264		cur_node = mf->hash3_tab[hash3][0];
24336	15264		mf->hash3_tab[hash3][0] = cur_pos;
24337			#if BT_MATCHFINDER_HASH3_WAYS >= 2
24338	15264		cur_node_2 = mf->hash3_tab[hash3][1];
24339	15264		mf->hash3_tab[hash3][1] = cur_node;
24340			#endif
24341	225	100	if (record_matches && cur_node > cutoff) {
24342	63		u32 seq3 = load_u24_unaligned(in_next);
24343	126	50	if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
24344	63		lz_matchptr->length = 3;
24345	63		lz_matchptr->offset = in_next - &in_base[cur_node];
24346	63		lz_matchptr++;
24347			}
24348			#if BT_MATCHFINDER_HASH3_WAYS >= 2
24349	0	0	else if (cur_node_2 > cutoff &&
24350	0	0	seq3 == load_u24_unaligned(&in_base[cur_node_2]))
24351			{
24352	0		lz_matchptr->length = 3;
24353	0		lz_matchptr->offset = in_next - &in_base[cur_node_2];
24354	0		lz_matchptr++;
24355			}
24356			#endif
24357			}
24358
24359	15264		cur_node = mf->hash4_tab[hash4];
24360	15264		mf->hash4_tab[hash4] = cur_pos;
24361
24362	15264		pending_lt_ptr = bt_left_child(mf, cur_pos);
24363	15264		pending_gt_ptr = bt_right_child(mf, cur_pos);
24364
24365	15264	100	if (cur_node <= cutoff) {
		50
24366	162		*pending_lt_ptr = MATCHFINDER_INITVAL;
24367	162		*pending_gt_ptr = MATCHFINDER_INITVAL;
24368	162		return lz_matchptr;
24369			}
24370
24371	15102		best_lt_len = 0;
24372	15102		best_gt_len = 0;
24373	15102		len = 0;
24374
24375			for (;;) {
24376	15102		matchptr = &in_base[cur_node];
24377
24378	15102	50	if (matchptr[len] == in_next[len]) {
		50
24379	15102		len = lz_extend(in_next, matchptr, len + 1, max_len);
24380	15102	50	if (!record_matches \|\| len > best_len) {
		50
		50
		0
24381			if (record_matches) {
24382	63		best_len = len;
24383	63		lz_matchptr->length = len;
24384	63		lz_matchptr->offset = in_next - matchptr;
24385	63		lz_matchptr++;
24386			}
24387	15102	50	if (len >= nice_len) {
		50
24388	15102		pending_lt_ptr = bt_left_child(mf, cur_node);
24389	15102		pending_gt_ptr = bt_right_child(mf, cur_node);
24390	15102		return lz_matchptr;
24391			}
24392			}
24393			}
24394
24395	0	0	if (matchptr[len] < in_next[len]) {
		0
24396	0		*pending_lt_ptr = cur_node;
24397	0		pending_lt_ptr = bt_right_child(mf, cur_node);
24398	0		cur_node = *pending_lt_ptr;
24399	0		best_lt_len = len;
24400	0	0	if (best_gt_len < len)
		0
24401	0		len = best_gt_len;
24402			} else {
24403	0		*pending_gt_ptr = cur_node;
24404	0		pending_gt_ptr = bt_left_child(mf, cur_node);
24405	0		cur_node = *pending_gt_ptr;
24406	0		best_gt_len = len;
24407	0	0	if (best_lt_len < len)
		0
24408	0		len = best_lt_len;
24409			}
24410
24411	0	0	if (cur_node <= cutoff \|\| !--depth_remaining) {
		0
		0
		0
24412	0		*pending_lt_ptr = MATCHFINDER_INITVAL;
24413	0		*pending_gt_ptr = MATCHFINDER_INITVAL;
24414	0		return lz_matchptr;
24415			}
24416			}
24417			}
24418
24419
24420			static forceinline struct lz_match *
24421			bt_matchfinder_get_matches(struct bt_matchfinder *mf,
24422			const u8 *in_base,
24423			ptrdiff_t cur_pos,
24424			u32 max_len,
24425			u32 nice_len,
24426			u32 max_search_depth,
24427			u32 next_hashes[2],
24428			struct lz_match *lz_matchptr)
24429			{
24430	225		return bt_matchfinder_advance_one_byte(mf,
24431			in_base,
24432			cur_pos,
24433			max_len,
24434			nice_len,
24435			max_search_depth,
24436			next_hashes,
24437			lz_matchptr,
24438			true);
24439			}
24440
24441
24442			static forceinline void
24443			bt_matchfinder_skip_byte(struct bt_matchfinder *mf,
24444			const u8 *in_base,
24445			ptrdiff_t cur_pos,
24446			u32 nice_len,
24447			u32 max_search_depth,
24448			u32 next_hashes[2])
24449			{
24450			bt_matchfinder_advance_one_byte(mf,
24451			in_base,
24452			cur_pos,
24453			nice_len,
24454			nice_len,
24455			max_search_depth,
24456			next_hashes,
24457			NULL,
24458			false);
24459	15039		}
24460
24461			#endif
24462
24463
24464			#define MAX_MATCHES_PER_POS \
24465			(DEFLATE_MAX_MATCH_LEN - DEFLATE_MIN_MATCH_LEN + 1)
24466			#endif
24467
24468
24469			#define MAX_BLOCK_LENGTH \
24470			MAX(SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH - 1, \
24471			SOFT_MAX_BLOCK_LENGTH + 1 + DEFLATE_MAX_MATCH_LEN)
24472
24473			static forceinline void
24474			check_buildtime_parameters(void)
24475			{
24476
24477			STATIC_ASSERT(SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
24478			STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH >= MIN_BLOCK_LENGTH);
24479			STATIC_ASSERT(SEQ_STORE_LENGTH * DEFLATE_MIN_MATCH_LEN >=
24480			MIN_BLOCK_LENGTH);
24481			STATIC_ASSERT(FAST_SEQ_STORE_LENGTH * HT_MATCHFINDER_MIN_MATCH_LEN >=
24482			MIN_BLOCK_LENGTH);
24483			#if SUPPORT_NEAR_OPTIMAL_PARSING
24484			STATIC_ASSERT(MIN_BLOCK_LENGTH * MAX_MATCHES_PER_POS <=
24485			MATCH_CACHE_LENGTH);
24486			#endif
24487
24488
24489			STATIC_ASSERT(FAST_SOFT_MAX_BLOCK_LENGTH <= SOFT_MAX_BLOCK_LENGTH);
24490
24491
24492			STATIC_ASSERT(SEQ_STORE_LENGTH * DEFLATE_MIN_MATCH_LEN <=
24493			SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH);
24494			STATIC_ASSERT(FAST_SEQ_STORE_LENGTH * HT_MATCHFINDER_MIN_MATCH_LEN <=
24495			FAST_SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH);
24496
24497
24498			STATIC_ASSERT(
24499			MAX_LITLEN_CODEWORD_LEN <= DEFLATE_MAX_LITLEN_CODEWORD_LEN);
24500			STATIC_ASSERT(
24501			MAX_OFFSET_CODEWORD_LEN <= DEFLATE_MAX_OFFSET_CODEWORD_LEN);
24502			STATIC_ASSERT(
24503			MAX_PRE_CODEWORD_LEN <= DEFLATE_MAX_PRE_CODEWORD_LEN);
24504			STATIC_ASSERT(
24505			(1U << MAX_LITLEN_CODEWORD_LEN) >= DEFLATE_NUM_LITLEN_SYMS);
24506			STATIC_ASSERT(
24507			(1U << MAX_OFFSET_CODEWORD_LEN) >= DEFLATE_NUM_OFFSET_SYMS);
24508			STATIC_ASSERT(
24509			(1U << MAX_PRE_CODEWORD_LEN) >= DEFLATE_NUM_PRECODE_SYMS);
24510	37		}
24511
24512
24513
24514
24515			static const u32 deflate_length_slot_base[] = {
24516			3, 4, 5, 6, 7, 8, 9, 10,
24517			11, 13, 15, 17, 19, 23, 27, 31,
24518			35, 43, 51, 59, 67, 83, 99, 115,
24519			131, 163, 195, 227, 258,
24520			};
24521
24522
24523			static const u8 deflate_extra_length_bits[] = {
24524			0, 0, 0, 0, 0, 0, 0, 0,
24525			1, 1, 1, 1, 2, 2, 2, 2,
24526			3, 3, 3, 3, 4, 4, 4, 4,
24527			5, 5, 5, 5, 0,
24528			};
24529
24530
24531			static const u32 deflate_offset_slot_base[] = {
24532			1, 2, 3, 4, 5, 7, 9, 13,
24533			17, 25, 33, 49, 65, 97, 129, 193,
24534			257, 385, 513, 769, 1025, 1537, 2049, 3073,
24535			4097, 6145, 8193, 12289, 16385, 24577,
24536			};
24537
24538
24539			static const u8 deflate_extra_offset_bits[] = {
24540			0, 0, 0, 0, 1, 1, 2, 2,
24541			3, 3, 4, 4, 5, 5, 6, 6,
24542			7, 7, 8, 8, 9, 9, 10, 10,
24543			11, 11, 12, 12, 13, 13,
24544			};
24545
24546
24547			static const u8 deflate_length_slot[DEFLATE_MAX_MATCH_LEN + 1] = {
24548			0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12,
24549			12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16,
24550			16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18,
24551			18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
24552			20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
24553			21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
24554			22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
24555			23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24556			24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25,
24557			25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
24558			25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26,
24559			26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
24560			26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
24561			27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
24562			27, 27, 28,
24563			};
24564
24565
24566			static const u8 deflate_offset_slot[256] = {
24567			0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
24568			8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
24569			10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
24570			11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
24571			12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
24572			12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
24573			13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
24574			13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
24575			14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
24576			14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
24577			14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
24578			14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
24579			15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
24580			15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
24581			15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
24582			15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
24583			};
24584
24585
24586			static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
24587			16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
24588			};
24589
24590
24591			static const u8 deflate_extra_precode_bits[DEFLATE_NUM_PRECODE_SYMS] = {
24592			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7
24593			};
24594
24595
24596			struct deflate_codewords {
24597			u32 litlen[DEFLATE_NUM_LITLEN_SYMS];
24598			u32 offset[DEFLATE_NUM_OFFSET_SYMS];
24599			};
24600
24601
24602			struct deflate_lens {
24603			u8 litlen[DEFLATE_NUM_LITLEN_SYMS];
24604			u8 offset[DEFLATE_NUM_OFFSET_SYMS];
24605			};
24606
24607
24608			struct deflate_codes {
24609			struct deflate_codewords codewords;
24610			struct deflate_lens lens;
24611			};
24612
24613
24614			struct deflate_freqs {
24615			u32 litlen[DEFLATE_NUM_LITLEN_SYMS];
24616			u32 offset[DEFLATE_NUM_OFFSET_SYMS];
24617			};
24618
24619
24620			struct deflate_sequence {
24621
24622
24623			#define SEQ_LENGTH_SHIFT 23
24624			#define SEQ_LITRUNLEN_MASK (((u32)1 << SEQ_LENGTH_SHIFT) - 1)
24625			u32 litrunlen_and_length;
24626
24627
24628			u16 offset;
24629
24630
24631			u16 offset_slot;
24632			};
24633
24634			#if SUPPORT_NEAR_OPTIMAL_PARSING
24635
24636
24637			struct deflate_costs {
24638
24639
24640			u32 literal[DEFLATE_NUM_LITERALS];
24641
24642
24643			u32 length[DEFLATE_MAX_MATCH_LEN + 1];
24644
24645
24646			u32 offset_slot[DEFLATE_NUM_OFFSET_SYMS];
24647			};
24648
24649
24650			struct deflate_optimum_node {
24651
24652			u32 cost_to_end;
24653
24654
24655			#define OPTIMUM_OFFSET_SHIFT 9
24656			#define OPTIMUM_LEN_MASK (((u32)1 << OPTIMUM_OFFSET_SHIFT) - 1)
24657			u32 item;
24658
24659			};
24660
24661			#endif
24662
24663
24664			#define NUM_LITERAL_OBSERVATION_TYPES 8
24665			#define NUM_MATCH_OBSERVATION_TYPES 2
24666			#define NUM_OBSERVATION_TYPES (NUM_LITERAL_OBSERVATION_TYPES + \
24667			NUM_MATCH_OBSERVATION_TYPES)
24668			#define NUM_OBSERVATIONS_PER_BLOCK_CHECK 512
24669			struct block_split_stats {
24670			u32 new_observations[NUM_OBSERVATION_TYPES];
24671			u32 observations[NUM_OBSERVATION_TYPES];
24672			u32 num_new_observations;
24673			u32 num_observations;
24674			};
24675
24676			struct deflate_output_bitstream;
24677
24678
24679			struct libdeflate_compressor {
24680
24681
24682			void (impl)(struct libdeflate_compressor restrict c, const u8 *in,
24683			size_t in_nbytes, struct deflate_output_bitstream *os);
24684
24685
24686			free_func_t free_func;
24687
24688
24689			unsigned compression_level;
24690
24691
24692			size_t max_passthrough_size;
24693
24694
24695			u32 max_search_depth;
24696
24697
24698			u32 nice_match_length;
24699
24700
24701			struct deflate_freqs freqs;
24702
24703
24704			struct block_split_stats split_stats;
24705
24706
24707			struct deflate_codes codes;
24708
24709
24710			struct deflate_codes static_codes;
24711
24712
24713			union {
24714
24715			struct {
24716			u32 freqs[DEFLATE_NUM_PRECODE_SYMS];
24717			u32 codewords[DEFLATE_NUM_PRECODE_SYMS];
24718			u8 lens[DEFLATE_NUM_PRECODE_SYMS];
24719			unsigned items[DEFLATE_NUM_LITLEN_SYMS +
24720			DEFLATE_NUM_OFFSET_SYMS];
24721			unsigned num_litlen_syms;
24722			unsigned num_offset_syms;
24723			unsigned num_explicit_lens;
24724			unsigned num_items;
24725			} precode;
24726
24727			struct {
24728			u32 codewords[DEFLATE_MAX_MATCH_LEN + 1];
24729			u8 lens[DEFLATE_MAX_MATCH_LEN + 1];
24730			} length;
24731			} o;
24732
24733			union {
24734
24735			struct {
24736
24737			struct hc_matchfinder hc_mf;
24738
24739
24740			struct deflate_sequence sequences[SEQ_STORE_LENGTH + 1];
24741
24742			} g;
24743
24744
24745			struct {
24746
24747			struct ht_matchfinder ht_mf;
24748
24749
24750			struct deflate_sequence sequences[
24751			FAST_SEQ_STORE_LENGTH + 1];
24752
24753			} f;
24754
24755			#if SUPPORT_NEAR_OPTIMAL_PARSING
24756
24757			struct {
24758
24759
24760			struct bt_matchfinder bt_mf;
24761
24762
24763			struct lz_match match_cache[MATCH_CACHE_LENGTH +
24764			MAX_MATCHES_PER_POS +
24765			DEFLATE_MAX_MATCH_LEN - 1];
24766
24767
24768			struct deflate_optimum_node optimum_nodes[
24769			MAX_BLOCK_LENGTH + 1];
24770
24771
24772			struct deflate_costs costs;
24773
24774
24775			struct deflate_costs costs_saved;
24776
24777
24778			u8 offset_slot_full[DEFLATE_MAX_MATCH_OFFSET + 1];
24779
24780
24781			u32 prev_observations[NUM_OBSERVATION_TYPES];
24782			u32 prev_num_observations;
24783
24784
24785			u32 new_match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1];
24786			u32 match_len_freqs[DEFLATE_MAX_MATCH_LEN + 1];
24787
24788
24789			unsigned max_optim_passes;
24790
24791
24792			u32 min_improvement_to_continue;
24793
24794
24795			u32 min_bits_to_use_nonfinal_path;
24796
24797
24798			u32 max_len_to_optimize_static_block;
24799
24800			} n;
24801			#endif
24802
24803			} p;
24804			};
24805
24806
24807			typedef machine_word_t bitbuf_t;
24808
24809
24810			#define COMPRESS_BITBUF_NBITS (8 * sizeof(bitbuf_t) - 1)
24811
24812
24813			#define CAN_BUFFER(n) (7 + (n) <= COMPRESS_BITBUF_NBITS)
24814
24815
24816			struct deflate_output_bitstream {
24817
24818
24819			bitbuf_t bitbuf;
24820
24821
24822			unsigned bitcount;
24823
24824
24825			u8 *next;
24826
24827
24828			u8 *end;
24829
24830
24831			bool overflow;
24832			};
24833
24834
24835			#define ADD_BITS(bits, n) \
24836			do { \
24837			bitbuf \|= (bitbuf_t)(bits) << bitcount; \
24838			bitcount += (n); \
24839			ASSERT(bitcount <= COMPRESS_BITBUF_NBITS); \
24840			} while (0)
24841
24842
24843			#define FLUSH_BITS() \
24844			do { \
24845			if (UNALIGNED_ACCESS_IS_FAST && likely(out_next < out_fast_end)) { \
24846			\
24847			put_unaligned_leword(bitbuf, out_next); \
24848			bitbuf >>= bitcount & ~7; \
24849			out_next += bitcount >> 3; \
24850			bitcount &= 7; \
24851			} else { \
24852			\
24853			while (bitcount >= 8) { \
24854			ASSERT(out_next < os->end); \
24855			*out_next++ = bitbuf; \
24856			bitcount -= 8; \
24857			bitbuf >>= 8; \
24858			} \
24859			} \
24860			} while (0)
24861
24862
24863			static void
24864	35		heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx)
24865			{
24866			unsigned parent_idx;
24867			unsigned child_idx;
24868			u32 v;
24869
24870	35		v = A[subtree_idx];
24871	35		parent_idx = subtree_idx;
24872	87	100	while ((child_idx = parent_idx * 2) <= length) {
24873	63	100	if (child_idx < length && A[child_idx + 1] > A[child_idx])
		100
24874	26		child_idx++;
24875	63	100	if (v >= A[child_idx])
24876	11		break;
24877	52		A[parent_idx] = A[child_idx];
24878	52		parent_idx = child_idx;
24879			}
24880	35		A[parent_idx] = v;
24881	35		}
24882
24883
24884			static void
24885	263		heapify_array(u32 A[], unsigned length)
24886			{
24887			unsigned subtree_idx;
24888
24889	275	100	for (subtree_idx = length / 2; subtree_idx >= 1; subtree_idx--)
24890	12		heapify_subtree(A, length, subtree_idx);
24891	263		}
24892
24893
24894			static void
24895	263		heap_sort(u32 A[], unsigned length)
24896			{
24897	263		A--;
24898
24899	263		heapify_array(A, length);
24900
24901	286	100	while (length >= 2) {
24902	23		u32 tmp = A[length];
24903
24904	23		A[length] = A[1];
24905	23		A[1] = tmp;
24906	23		length--;
24907	23		heapify_subtree(A, length, 1);
24908			}
24909	263		}
24910
24911			#define NUM_SYMBOL_BITS 10
24912			#define NUM_FREQ_BITS (32 - NUM_SYMBOL_BITS)
24913			#define SYMBOL_MASK ((1 << NUM_SYMBOL_BITS) - 1)
24914			#define FREQ_MASK (~SYMBOL_MASK)
24915
24916			#define GET_NUM_COUNTERS(num_syms) (num_syms)
24917
24918
24919			static unsigned
24920	263		sort_symbols(unsigned num_syms, const u32 freqs[], u8 lens[], u32 symout[])
24921			{
24922			unsigned sym;
24923			unsigned i;
24924			unsigned num_used_syms;
24925			unsigned num_counters;
24926			unsigned counters[GET_NUM_COUNTERS(DEFLATE_MAX_NUM_SYMS)];
24927
24928
24929
24930	263		num_counters = GET_NUM_COUNTERS(num_syms);
24931
24932	263		memset(counters, 0, num_counters * sizeof(counters[0]));
24933
24934
24935	33178	100	for (sym = 0; sym < num_syms; sym++)
24936	32915		counters[MIN(freqs[sym], num_counters - 1)]++;
24937
24938
24939	263		num_used_syms = 0;
24940	32915	100	for (i = 1; i < num_counters; i++) {
24941	32652		unsigned count = counters[i];
24942
24943	32652		counters[i] = num_used_syms;
24944	32652		num_used_syms += count;
24945			}
24946
24947
24948	33178	100	for (sym = 0; sym < num_syms; sym++) {
24949	32915		u32 freq = freqs[sym];
24950
24951	32915	100	if (freq != 0) {
24952	13499		symout[counters[MIN(freq, num_counters - 1)]++] =
24953	13499		sym \| (freq << NUM_SYMBOL_BITS);
24954			} else {
24955	19416		lens[sym] = 0;
24956			}
24957			}
24958
24959
24960	263		heap_sort(symout + counters[num_counters - 2],
24961	263		counters[num_counters - 1] - counters[num_counters - 2]);
24962
24963	263		return num_used_syms;
24964			}
24965
24966
24967			static void
24968	203		build_tree(u32 A[], unsigned sym_count)
24969			{
24970	203		const unsigned last_idx = sym_count - 1;
24971
24972
24973	203		unsigned i = 0;
24974
24975
24976	203		unsigned b = 0;
24977
24978
24979	203		unsigned e = 0;
24980
24981			do {
24982			u32 new_freq;
24983
24984
24985	13245	100	if (i + 1 <= last_idx &&
		100
24986	7767	100	(b == e \|\| (A[i + 1] & FREQ_MASK) <= (A[b] & FREQ_MASK))) {
24987
24988	6523		new_freq = (A[i] & FREQ_MASK) + (A[i + 1] & FREQ_MASK);
24989	6523		i += 2;
24990	6722	100	} else if (b + 2 <= e &&
		100
24991	1526		(i > last_idx \|\|
24992	1526	100	(A[b + 1] & FREQ_MASK) < (A[i] & FREQ_MASK))) {
24993
24994	6320		new_freq = (A[b] & FREQ_MASK) + (A[b + 1] & FREQ_MASK);
24995	6320		A[b] = (e << NUM_SYMBOL_BITS) \| (A[b] & SYMBOL_MASK);
24996	6320		A[b + 1] = (e << NUM_SYMBOL_BITS) \|
24997	6320		(A[b + 1] & SYMBOL_MASK);
24998	6320		b += 2;
24999			} else {
25000
25001	402		new_freq = (A[i] & FREQ_MASK) + (A[b] & FREQ_MASK);
25002	402		A[b] = (e << NUM_SYMBOL_BITS) \| (A[b] & SYMBOL_MASK);
25003	402		i++;
25004	402		b++;
25005			}
25006	13245		A[e] = new_freq \| (A[e] & SYMBOL_MASK);
25007
25008	13245	100	} while (++e < last_idx);
25009	203		}
25010
25011
25012			static void
25013	203		compute_length_counts(u32 A[], unsigned root_idx, unsigned len_counts[],
25014			unsigned max_codeword_len)
25015			{
25016			unsigned len;
25017			int node;
25018
25019
25020
25021	2832	100	for (len = 0; len <= max_codeword_len; len++)
25022	2629		len_counts[len] = 0;
25023	203		len_counts[1] = 2;
25024
25025
25026	203		A[root_idx] &= SYMBOL_MASK;
25027
25028	13245	100	for (node = root_idx - 1; node >= 0; node--) {
25029
25030
25031
25032	13042		unsigned parent = A[node] >> NUM_SYMBOL_BITS;
25033	13042		unsigned parent_depth = A[parent] >> NUM_SYMBOL_BITS;
25034	13042		unsigned depth = parent_depth + 1;
25035
25036
25037	13042		A[node] = (A[node] & SYMBOL_MASK) \| (depth << NUM_SYMBOL_BITS);
25038
25039
25040	13042	50	if (depth >= max_codeword_len) {
25041	0		depth = max_codeword_len;
25042			do {
25043	0		depth--;
25044	0	0	} while (len_counts[depth] == 0);
25045			}
25046
25047
25048	13042		len_counts[depth]--;
25049	13042		len_counts[depth + 1] += 2;
25050			}
25051	203		}
25052
25053
25054
25055			#ifdef rbit32
25056			static forceinline u32 reverse_codeword(u32 codeword, u8 len)
25057			{
25058			return rbit32(codeword) >> ((32 - len) & 31);
25059			}
25060			#else
25061
25062			static const u8 bitreverse_tab[256] = {
25063			0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
25064			0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
25065			0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
25066			0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
25067			0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
25068			0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
25069			0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
25070			0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
25071			0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
25072			0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
25073			0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
25074			0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
25075			0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
25076			0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
25077			0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
25078			0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
25079			0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
25080			0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
25081			0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
25082			0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
25083			0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
25084			0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
25085			0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
25086			0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
25087			0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
25088			0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
25089			0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
25090			0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
25091			0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
25092			0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
25093			0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
25094			0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
25095			};
25096
25097			static forceinline u32 reverse_codeword(u32 codeword, u8 len)
25098			{
25099			STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16);
25100	30995		codeword = ((u32)bitreverse_tab[codeword & 0xff] << 8) \|
25101	30995		bitreverse_tab[codeword >> 8];
25102	30995		return codeword >> (16 - len);
25103			}
25104			#endif
25105
25106
25107			static void
25108	203		gen_codewords(u32 A[], u8 lens[], const unsigned len_counts[],
25109			unsigned max_codeword_len, unsigned num_syms)
25110			{
25111			u32 next_codewords[DEFLATE_MAX_CODEWORD_LEN + 1];
25112			unsigned i;
25113			unsigned len;
25114			unsigned sym;
25115
25116
25117	2629	100	for (i = 0, len = max_codeword_len; len >= 1; len--) {
25118	2426		unsigned count = len_counts[len];
25119
25120	15874	100	while (count--)
25121	13448		lens[A[i++] & SYMBOL_MASK] = len;
25122			}
25123
25124
25125	203		next_codewords[0] = 0;
25126	203		next_codewords[1] = 0;
25127	2426	100	for (len = 2; len <= max_codeword_len; len++)
25128	2223		next_codewords[len] =
25129	2223		(next_codewords[len - 1] + len_counts[len - 1]) << 1;
25130
25131	31198	100	for (sym = 0; sym < num_syms; sym++) {
25132
25133	30995		A[sym] = reverse_codeword(next_codewords[lens[sym]]++,
25134	30995		lens[sym]);
25135			}
25136	203		}
25137
25138
25139			static void
25140	263		deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len,
25141			const u32 freqs[], u8 lens[], u32 codewords[])
25142			{
25143	263		u32 *A = codewords;
25144			unsigned num_used_syms;
25145
25146			STATIC_ASSERT(DEFLATE_MAX_NUM_SYMS <= 1 << NUM_SYMBOL_BITS);
25147			STATIC_ASSERT(MAX_BLOCK_LENGTH <= ((u32)1 << NUM_FREQ_BITS) - 1);
25148
25149
25150	263		num_used_syms = sort_symbols(num_syms, freqs, lens, A);
25151
25152
25153
25154	263	100	if (unlikely(num_used_syms < 2)) {
25155	60	100	unsigned sym = num_used_syms ? (A[0] & SYMBOL_MASK) : 0;
25156	60	100	unsigned nonzero_idx = sym ? sym : 1;
25157
25158	60		codewords[0] = 0;
25159	60		lens[0] = 1;
25160	60		codewords[nonzero_idx] = 1;
25161	60		lens[nonzero_idx] = 1;
25162	60		return;
25163			}
25164
25165
25166
25167	203		build_tree(A, num_used_syms);
25168
25169			{
25170			unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1];
25171
25172	203		compute_length_counts(A, num_used_syms - 2,
25173			len_counts, max_codeword_len);
25174
25175	203		gen_codewords(A, lens, len_counts, max_codeword_len, num_syms);
25176			}
25177			}
25178
25179
25180			static void
25181	62		deflate_reset_symbol_frequencies(struct libdeflate_compressor *c)
25182			{
25183	62		memset(&c->freqs, 0, sizeof(c->freqs));
25184	62		}
25185
25186
25187			static void
25188	99		deflate_make_huffman_codes(const struct deflate_freqs *freqs,
25189			struct deflate_codes *codes)
25190			{
25191	99		deflate_make_huffman_code(DEFLATE_NUM_LITLEN_SYMS,
25192			MAX_LITLEN_CODEWORD_LEN,
25193	99		freqs->litlen,
25194	99		codes->lens.litlen,
25195	99		codes->codewords.litlen);
25196
25197	99		deflate_make_huffman_code(DEFLATE_NUM_OFFSET_SYMS,
25198			MAX_OFFSET_CODEWORD_LEN,
25199	99		freqs->offset,
25200	99		codes->lens.offset,
25201	99		codes->codewords.offset);
25202	99		}
25203
25204
25205			static void
25206	37		deflate_init_static_codes(struct libdeflate_compressor *c)
25207			{
25208			unsigned i;
25209
25210	5365	100	for (i = 0; i < 144; i++)
25211	5328		c->freqs.litlen[i] = 1 << (9 - 8);
25212	4181	100	for (; i < 256; i++)
25213	4144		c->freqs.litlen[i] = 1 << (9 - 9);
25214	925	100	for (; i < 280; i++)
25215	888		c->freqs.litlen[i] = 1 << (9 - 7);
25216	333	100	for (; i < 288; i++)
25217	296		c->freqs.litlen[i] = 1 << (9 - 8);
25218
25219	1221	100	for (i = 0; i < 32; i++)
25220	1184		c->freqs.offset[i] = 1 << (5 - 5);
25221
25222	37		deflate_make_huffman_codes(&c->freqs, &c->static_codes);
25223	37		}
25224
25225
25226			static forceinline unsigned
25227			deflate_get_offset_slot(u32 offset)
25228			{
25229
25230	2091		unsigned n = (256 - offset) >> 29;
25231
25232	2091	50	ASSERT(offset >= 1 && offset <= 32768);
		0
		0
		50
		0
		50
		50
		50
25233
25234	2091		return deflate_offset_slot[(offset - 1) >> n] + (n << 1);
25235			}
25236
25237			static unsigned
25238	65		deflate_compute_precode_items(const u8 lens[], const unsigned num_lens,
25239			u32 precode_freqs[], unsigned precode_items[])
25240			{
25241			unsigned *itemptr;
25242			unsigned run_start;
25243			unsigned run_end;
25244			unsigned extra_bits;
25245			u8 len;
25246
25247	65		memset(precode_freqs, 0,
25248			DEFLATE_NUM_PRECODE_SYMS * sizeof(precode_freqs[0]));
25249
25250	65		itemptr = precode_items;
25251	65		run_start = 0;
25252			do {
25253
25254
25255
25256	1973		len = lens[run_start];
25257
25258
25259	1973		run_end = run_start;
25260			do {
25261	18873		run_end++;
25262	18873	100	} while (run_end != num_lens && len == lens[run_end]);
		100
25263
25264	1973	100	if (len == 0) {
25265
25266
25267
25268	1083	100	while ((run_end - run_start) >= 11) {
25269	249		extra_bits = MIN((run_end - run_start) - 11,
25270			0x7F);
25271	249		precode_freqs[18]++;
25272	249		*itemptr++ = 18 \| (extra_bits << 5);
25273	249		run_start += 11 + extra_bits;
25274			}
25275
25276
25277	834	100	if ((run_end - run_start) >= 3) {
25278	309		extra_bits = MIN((run_end - run_start) - 3,
25279			0x7);
25280	309		precode_freqs[17]++;
25281	309		*itemptr++ = 17 \| (extra_bits << 5);
25282	309		run_start += 3 + extra_bits;
25283			}
25284			} else {
25285
25286
25287
25288
25289	1139	100	if ((run_end - run_start) >= 4) {
25290	6		precode_freqs[len]++;
25291	6		*itemptr++ = len;
25292	6		run_start++;
25293			do {
25294	6		extra_bits = MIN((run_end - run_start) -
25295			3, 0x3);
25296	6		precode_freqs[16]++;
25297	6		*itemptr++ = 16 \| (extra_bits << 5);
25298	6		run_start += 3 + extra_bits;
25299	6	50	} while ((run_end - run_start) >= 3);
25300			}
25301			}
25302
25303
25304	3544	100	while (run_start != run_end) {
25305	1571		precode_freqs[len]++;
25306	1571		*itemptr++ = len;
25307	1571		run_start++;
25308			}
25309	1973	100	} while (run_start != num_lens);
25310
25311	65		return itemptr - precode_items;
25312			}
25313
25314
25315
25316
25317			static void
25318	65		deflate_precompute_huffman_header(struct libdeflate_compressor *c)
25319			{
25320
25321
25322	65		for (c->o.precode.num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS;
25323	475	100	c->o.precode.num_litlen_syms > 257;
25324	410		c->o.precode.num_litlen_syms--)
25325	466	100	if (c->codes.lens.litlen[c->o.precode.num_litlen_syms - 1] != 0)
25326	56		break;
25327
25328	65		for (c->o.precode.num_offset_syms = DEFLATE_NUM_OFFSET_SYMS;
25329	1582	50	c->o.precode.num_offset_syms > 1;
25330	1517		c->o.precode.num_offset_syms--)
25331	1582	100	if (c->codes.lens.offset[c->o.precode.num_offset_syms - 1] != 0)
25332	65		break;
25333
25334
25335			STATIC_ASSERT(offsetof(struct deflate_lens, offset) ==
25336			DEFLATE_NUM_LITLEN_SYMS);
25337	65	50	if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
25338	65		memmove((u8 *)&c->codes.lens + c->o.precode.num_litlen_syms,
25339	65		(u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS,
25340	65		c->o.precode.num_offset_syms);
25341			}
25342
25343
25344	65		c->o.precode.num_items =
25345	65		deflate_compute_precode_items((u8 *)&c->codes.lens,
25346	65		c->o.precode.num_litlen_syms +
25347	65		c->o.precode.num_offset_syms,
25348	65		c->o.precode.freqs,
25349	65		c->o.precode.items);
25350
25351
25352	65		deflate_make_huffman_code(DEFLATE_NUM_PRECODE_SYMS,
25353			MAX_PRE_CODEWORD_LEN,
25354	65		c->o.precode.freqs, c->o.precode.lens,
25355	65		c->o.precode.codewords);
25356
25357
25358	65		for (c->o.precode.num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS;
25359	138	50	c->o.precode.num_explicit_lens > 4;
25360	73		c->o.precode.num_explicit_lens--)
25361	138		if (c->o.precode.lens[deflate_precode_lens_permutation[
25362	138	100	c->o.precode.num_explicit_lens - 1]] != 0)
25363	65		break;
25364
25365
25366	65	50	if (c->o.precode.num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) {
25367	65		memmove((u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS,
25368	65		(u8 *)&c->codes.lens + c->o.precode.num_litlen_syms,
25369	65		c->o.precode.num_offset_syms);
25370			}
25371	65		}
25372
25373
25374			static void
25375	38		deflate_compute_full_len_codewords(struct libdeflate_compressor *c,
25376			const struct deflate_codes *codes)
25377			{
25378			u32 len;
25379
25380			STATIC_ASSERT(MAX_LITLEN_CODEWORD_LEN +
25381			DEFLATE_MAX_EXTRA_LENGTH_BITS <= 32);
25382
25383	9766	100	for (len = DEFLATE_MIN_MATCH_LEN; len <= DEFLATE_MAX_MATCH_LEN; len++) {
25384	9728		unsigned slot = deflate_length_slot[len];
25385	9728		unsigned litlen_sym = DEFLATE_FIRST_LEN_SYM + slot;
25386	9728		u32 extra_bits = len - deflate_length_slot_base[slot];
25387
25388	9728		c->o.length.codewords[len] =
25389	9728		codes->codewords.litlen[litlen_sym] \|
25390	9728		(extra_bits << codes->lens.litlen[litlen_sym]);
25391	9728		c->o.length.lens[len] = codes->lens.litlen[litlen_sym] +
25392	9728		deflate_extra_length_bits[slot];
25393			}
25394	38		}
25395
25396
25397			#define WRITE_MATCH(c_, codes_, length_, offset_, offset_slot_) \
25398			do { \
25399			const struct libdeflate_compressor *c__ = (c_); \
25400			const struct deflate_codes *codes__ = (codes_); \
25401			u32 length__ = (length_); \
25402			u32 offset__ = (offset_); \
25403			unsigned offset_slot__ = (offset_slot_); \
25404			\
25405			\
25406			STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \
25407			DEFLATE_MAX_EXTRA_LENGTH_BITS)); \
25408			ADD_BITS(c__->o.length.codewords[length__], \
25409			c__->o.length.lens[length__]); \
25410			\
25411			if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + \
25412			DEFLATE_MAX_EXTRA_LENGTH_BITS + \
25413			MAX_OFFSET_CODEWORD_LEN + \
25414			DEFLATE_MAX_EXTRA_OFFSET_BITS)) \
25415			FLUSH_BITS(); \
25416			\
25417			\
25418			ADD_BITS(codes__->codewords.offset[offset_slot__], \
25419			codes__->lens.offset[offset_slot__]); \
25420			\
25421			if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + \
25422			DEFLATE_MAX_EXTRA_OFFSET_BITS)) \
25423			FLUSH_BITS(); \
25424			\
25425			\
25426			ADD_BITS(offset__ - deflate_offset_slot_base[offset_slot__], \
25427			deflate_extra_offset_bits[offset_slot__]); \
25428			\
25429			FLUSH_BITS(); \
25430			} while (0)
25431
25432
25433			static void
25434	38		deflate_flush_block(struct libdeflate_compressor *c,
25435			struct deflate_output_bitstream *os,
25436			const u8 *block_begin, u32 block_length,
25437			const struct deflate_sequence *sequences,
25438			bool is_final_block)
25439			{
25440
25441	38		const u8 *in_next = block_begin;
25442	38		const u8 * const in_end = block_begin + block_length;
25443	38		bitbuf_t bitbuf = os->bitbuf;
25444	38		unsigned bitcount = os->bitcount;
25445	38		u8 *out_next = os->next;
25446	38		u8 * const out_fast_end =
25447	38	50	os->end - MIN(WORDBYTES - 1, os->end - out_next);
25448
25449	38		u32 dynamic_cost = 3;
25450	38		u32 static_cost = 3;
25451	38		u32 uncompressed_cost = 3;
25452			u32 best_cost;
25453			struct deflate_codes *codes;
25454			unsigned sym;
25455
25456	38	100	ASSERT(block_length >= MIN_BLOCK_LENGTH \|\|
		50
		50
25457			(is_final_block && block_length > 0));
25458			ASSERT(block_length <= MAX_BLOCK_LENGTH);
25459			ASSERT(bitcount <= 7);
25460	38		ASSERT((bitbuf & ~(((bitbuf_t)1 << bitcount) - 1)) == 0);
25461	38		ASSERT(out_next <= os->end);
25462	38		ASSERT(!os->overflow);
25463
25464
25465	38		deflate_precompute_huffman_header(c);
25466
25467
25468	38		dynamic_cost += 5 + 5 + 4 + (3 * c->o.precode.num_explicit_lens);
25469	760	100	for (sym = 0; sym < DEFLATE_NUM_PRECODE_SYMS; sym++) {
25470	722		u32 extra = deflate_extra_precode_bits[sym];
25471
25472	722		dynamic_cost += c->o.precode.freqs[sym] *
25473	722		(extra + c->o.precode.lens[sym]);
25474			}
25475
25476
25477	5510	100	for (sym = 0; sym < 144; sym++) {
25478	5472		dynamic_cost += c->freqs.litlen[sym] *
25479	5472		c->codes.lens.litlen[sym];
25480	5472		static_cost += c->freqs.litlen[sym] * 8;
25481			}
25482	4294	100	for (; sym < 256; sym++) {
25483	4256		dynamic_cost += c->freqs.litlen[sym] *
25484	4256		c->codes.lens.litlen[sym];
25485	4256		static_cost += c->freqs.litlen[sym] * 9;
25486			}
25487
25488
25489	38		dynamic_cost += c->codes.lens.litlen[DEFLATE_END_OF_BLOCK];
25490	38		static_cost += 7;
25491
25492
25493	38		for (sym = DEFLATE_FIRST_LEN_SYM;
25494	1140	100	sym < DEFLATE_FIRST_LEN_SYM + ARRAY_LEN(deflate_extra_length_bits);
25495	1102		sym++) {
25496	1102		u32 extra = deflate_extra_length_bits[
25497	1102		sym - DEFLATE_FIRST_LEN_SYM];
25498
25499	1102		dynamic_cost += c->freqs.litlen[sym] *
25500	1102		(extra + c->codes.lens.litlen[sym]);
25501	1102		static_cost += c->freqs.litlen[sym] *
25502	1102		(extra + c->static_codes.lens.litlen[sym]);
25503			}
25504
25505
25506	1178	100	for (sym = 0; sym < ARRAY_LEN(deflate_extra_offset_bits); sym++) {
25507	1140		u32 extra = deflate_extra_offset_bits[sym];
25508
25509	1140		dynamic_cost += c->freqs.offset[sym] *
25510	1140		(extra + c->codes.lens.offset[sym]);
25511	1140		static_cost += c->freqs.offset[sym] * (extra + 5);
25512			}
25513
25514
25515	38		uncompressed_cost += (-(bitcount + 3) & 7) + 32 +
25516	38		(40 * (DIV_ROUND_UP(block_length,
25517	38		UINT16_MAX) - 1)) +
25518	38		(8 * block_length);
25519
25520
25521
25522	38		best_cost = MIN(dynamic_cost, MIN(static_cost, uncompressed_cost));
25523
25524
25525	38	50	if (DIV_ROUND_UP(bitcount + best_cost, 8) > os->end - out_next) {
25526	0		os->overflow = true;
25527	0		return;
25528			}
25529
25530
25531	38	50	if (best_cost == uncompressed_cost) {
25532
25533			do {
25534	0		u8 bfinal = 0;
25535	0		size_t len = UINT16_MAX;
25536
25537	0	0	if (in_end - in_next <= UINT16_MAX) {
25538	0		bfinal = is_final_block;
25539	0		len = in_end - in_next;
25540			}
25541
25542	0		ASSERT(os->end - out_next >=
25543			DIV_ROUND_UP(bitcount + 3, 8) + 4 + len);
25544
25545			STATIC_ASSERT(DEFLATE_BLOCKTYPE_UNCOMPRESSED == 0);
25546	0		*out_next++ = (bfinal << bitcount) \| bitbuf;
25547	0	0	if (bitcount > 5)
25548	0		*out_next++ = 0;
25549	0		bitbuf = 0;
25550	0		bitcount = 0;
25551
25552	0		put_unaligned_le16(len, out_next);
25553	0		out_next += 2;
25554	0		put_unaligned_le16(~len, out_next);
25555	0		out_next += 2;
25556	0		memcpy(out_next, in_next, len);
25557	0		out_next += len;
25558	0		in_next += len;
25559	0	0	} while (in_next != in_end);
25560
25561	0		goto out;
25562			}
25563
25564	38	100	if (best_cost == static_cost) {
25565
25566	36		codes = &c->static_codes;
25567	36		ADD_BITS(is_final_block, 1);
25568	36		ADD_BITS(DEFLATE_BLOCKTYPE_STATIC_HUFFMAN, 2);
25569	72	50	FLUSH_BITS();
		0
25570			} else {
25571	2		const unsigned num_explicit_lens = c->o.precode.num_explicit_lens;
25572	2		const unsigned num_precode_items = c->o.precode.num_items;
25573			unsigned precode_sym, precode_item;
25574			unsigned i;
25575
25576
25577
25578	2		codes = &c->codes;
25579			STATIC_ASSERT(CAN_BUFFER(1 + 2 + 5 + 5 + 4 + 3));
25580	2		ADD_BITS(is_final_block, 1);
25581	2		ADD_BITS(DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN, 2);
25582	2		ADD_BITS(c->o.precode.num_litlen_syms - 257, 5);
25583	2		ADD_BITS(c->o.precode.num_offset_syms - 1, 5);
25584	2		ADD_BITS(num_explicit_lens - 4, 4);
25585
25586
25587			if (CAN_BUFFER(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
25588
25589	2		precode_sym = deflate_precode_lens_permutation[0];
25590	2		ADD_BITS(c->o.precode.lens[precode_sym], 3);
25591	4	50	FLUSH_BITS();
		0
25592	2		i = 1;
25593			do {
25594	26		precode_sym =
25595	26		deflate_precode_lens_permutation[i];
25596	26		ADD_BITS(c->o.precode.lens[precode_sym], 3);
25597	26	100	} while (++i < num_explicit_lens);
25598	4	50	FLUSH_BITS();
		0
25599			} else {
25600			FLUSH_BITS();
25601			i = 0;
25602			do {
25603			precode_sym =
25604			deflate_precode_lens_permutation[i];
25605			ADD_BITS(c->o.precode.lens[precode_sym], 3);
25606			FLUSH_BITS();
25607			} while (++i < num_explicit_lens);
25608			}
25609
25610
25611	2		i = 0;
25612			do {
25613	233		precode_item = c->o.precode.items[i];
25614	233		precode_sym = precode_item & 0x1F;
25615			STATIC_ASSERT(CAN_BUFFER(MAX_PRE_CODEWORD_LEN + 7));
25616	233		ADD_BITS(c->o.precode.codewords[precode_sym],
25617			c->o.precode.lens[precode_sym]);
25618	233		ADD_BITS(precode_item >> 5,
25619			deflate_extra_precode_bits[precode_sym]);
25620	466	50	FLUSH_BITS();
		0
25621	233	100	} while (++i < num_precode_items);
25622			}
25623
25624
25625			ASSERT(bitcount <= 7);
25626	38		deflate_compute_full_len_codewords(c, codes);
25627			#if SUPPORT_NEAR_OPTIMAL_PARSING
25628	38	100	if (sequences == NULL) {
25629
25630	9		struct deflate_optimum_node *cur_node =
25631			&c->p.n.optimum_nodes[0];
25632	9		struct deflate_optimum_node * const end_node =
25633			&c->p.n.optimum_nodes[block_length];
25634			do {
25635	225		u32 length = cur_node->item & OPTIMUM_LEN_MASK;
25636	225		u32 offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
25637
25638	225	100	if (length == 1) {
25639
25640	162		ADD_BITS(codes->codewords.litlen[offset],
25641			codes->lens.litlen[offset]);
25642	324	50	FLUSH_BITS();
		0
25643			} else {
25644
25645	126	50	WRITE_MATCH(c, codes, length, offset,
		0
25646			c->p.n.offset_slot_full[offset]);
25647			}
25648	225		cur_node += length;
25649	225	100	} while (cur_node != end_node);
25650			} else
25651			#endif
25652			{
25653
25654			const struct deflate_sequence *seq;
25655
25656	29		for (seq = sequences; ; seq++) {
25657	2120		u32 litrunlen = seq->litrunlen_and_length &
25658			SEQ_LITRUNLEN_MASK;
25659	2120		u32 length = seq->litrunlen_and_length >>
25660			SEQ_LENGTH_SHIFT;
25661			unsigned lit;
25662
25663
25664			if (CAN_BUFFER(4 * MAX_LITLEN_CODEWORD_LEN)) {
25665	2677	100	for (; litrunlen >= 4; litrunlen -= 4) {
25666	557		lit = *in_next++;
25667	557		ADD_BITS(codes->codewords.litlen[lit],
25668			codes->lens.litlen[lit]);
25669	557		lit = *in_next++;
25670	557		ADD_BITS(codes->codewords.litlen[lit],
25671			codes->lens.litlen[lit]);
25672	557		lit = *in_next++;
25673	557		ADD_BITS(codes->codewords.litlen[lit],
25674			codes->lens.litlen[lit]);
25675	557		lit = *in_next++;
25676	557		ADD_BITS(codes->codewords.litlen[lit],
25677			codes->lens.litlen[lit]);
25678	1114	50	FLUSH_BITS();
		0
25679			}
25680	2120	100	if (litrunlen-- != 0) {
25681	509		lit = *in_next++;
25682	509		ADD_BITS(codes->codewords.litlen[lit],
25683			codes->lens.litlen[lit]);
25684	509	100	if (litrunlen-- != 0) {
25685	280		lit = *in_next++;
25686	280		ADD_BITS(codes->codewords.litlen[lit],
25687			codes->lens.litlen[lit]);
25688	280	100	if (litrunlen-- != 0) {
25689	112		lit = *in_next++;
25690	112		ADD_BITS(codes->codewords.litlen[lit],
25691			codes->lens.litlen[lit]);
25692			}
25693			}
25694	1018	50	FLUSH_BITS();
		0
25695			}
25696			} else {
25697			while (litrunlen--) {
25698			lit = *in_next++;
25699			ADD_BITS(codes->codewords.litlen[lit],
25700			codes->lens.litlen[lit]);
25701			FLUSH_BITS();
25702			}
25703			}
25704
25705	2120	100	if (length == 0) {
25706			ASSERT(in_next == in_end);
25707	29		break;
25708			}
25709
25710
25711	4182	50	WRITE_MATCH(c, codes, length, seq->offset,
		0
25712			seq->offset_slot);
25713	2091		in_next += length;
25714			}
25715			}
25716
25717
25718			ASSERT(bitcount <= 7);
25719	38		ADD_BITS(codes->codewords.litlen[DEFLATE_END_OF_BLOCK],
25720			codes->lens.litlen[DEFLATE_END_OF_BLOCK]);
25721	76	50	FLUSH_BITS();
		0
25722	0		out:
25723			ASSERT(bitcount <= 7);
25724
25725	38		ASSERT(8 * (out_next - os->next) + bitcount - os->bitcount == best_cost);
25726	38		os->bitbuf = bitbuf;
25727	38		os->bitcount = bitcount;
25728	38		os->next = out_next;
25729			}
25730
25731			static void
25732	29		deflate_finish_block(struct libdeflate_compressor *c,
25733			struct deflate_output_bitstream *os,
25734			const u8 *block_begin, u32 block_length,
25735			const struct deflate_sequence *sequences,
25736			bool is_final_block)
25737			{
25738	29		c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
25739	29		deflate_make_huffman_codes(&c->freqs, &c->codes);
25740	29		deflate_flush_block(c, os, block_begin, block_length, sequences,
25741			is_final_block);
25742	29		}
25743
25744
25745
25746
25747
25748
25749			static void
25750	44		init_block_split_stats(struct block_split_stats *stats)
25751			{
25752			int i;
25753
25754	484	100	for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
25755	440		stats->new_observations[i] = 0;
25756	440		stats->observations[i] = 0;
25757			}
25758	44		stats->num_new_observations = 0;
25759	44		stats->num_observations = 0;
25760	44		}
25761
25762
25763			static forceinline void
25764			observe_literal(struct block_split_stats *stats, u8 lit)
25765			{
25766	3237		stats->new_observations[((lit >> 5) & 0x6) \| (lit & 1)]++;
25767	3237		stats->num_new_observations++;
25768	3237		}
25769
25770
25771			static forceinline void
25772			observe_match(struct block_split_stats *stats, u32 length)
25773			{
25774	2133		stats->new_observations[NUM_LITERAL_OBSERVATION_TYPES +
25775	2133		(length >= 9)]++;
25776	2133		stats->num_new_observations++;
25777	2133		}
25778
25779			static void
25780	15		merge_new_observations(struct block_split_stats *stats)
25781			{
25782			int i;
25783
25784	165	100	for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
25785	150		stats->observations[i] += stats->new_observations[i];
25786	150		stats->new_observations[i] = 0;
25787			}
25788	15		stats->num_observations += stats->num_new_observations;
25789	15		stats->num_new_observations = 0;
25790	15		}
25791
25792			static bool
25793	7		do_end_block_check(struct block_split_stats *stats, u32 block_length)
25794			{
25795	7	100	if (stats->num_observations > 0) {
25796
25797	6		u32 total_delta = 0;
25798			u32 num_items;
25799			u32 cutoff;
25800			int i;
25801
25802	66	100	for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
25803	60		u32 expected = stats->observations[i] *
25804	60		stats->num_new_observations;
25805	60		u32 actual = stats->new_observations[i] *
25806	60		stats->num_observations;
25807	60	100	u32 delta = (actual > expected) ? actual - expected :
25808			expected - actual;
25809
25810	60		total_delta += delta;
25811			}
25812
25813	6		num_items = stats->num_observations +
25814	6		stats->num_new_observations;
25815
25816	6		cutoff = stats->num_new_observations * 200 / 512 *
25817	6		stats->num_observations;
25818
25819	6	100	if (block_length < 10000 && num_items < 8192)
		50
25820	1		cutoff += (u64)cutoff * (8192 - num_items) / 8192;
25821
25822
25823	6		if (total_delta +
25824	6	100	(block_length / 4096) * stats->num_observations >= cutoff)
25825	1		return true;
25826			}
25827	6		merge_new_observations(stats);
25828	6		return false;
25829			}
25830
25831			static forceinline bool
25832			ready_to_check_block(const struct block_split_stats *stats,
25833			const u8 in_block_begin, const u8 in_next,
25834			const u8 *in_end)
25835			{
25836	5245		return stats->num_new_observations >= NUM_OBSERVATIONS_PER_BLOCK_CHECK
25837	450	0	&& in_next - in_block_begin >= MIN_BLOCK_LENGTH
		0
		100
		0
25838	5245	0	&& in_end - in_next >= MIN_BLOCK_LENGTH;
		0
		100
		0
25839			}
25840
25841			static forceinline bool
25842			should_end_block(struct block_split_stats *stats,
25843			const u8 in_block_begin, const u8 in_next, const u8 *in_end)
25844			{
25845
25846	5029	50	if (!ready_to_check_block(stats, in_block_begin, in_next, in_end))
		100
		50
25847	5022		return false;
25848
25849	7		return do_end_block_check(stats, in_next - in_block_begin);
25850			}
25851
25852
25853
25854			static void
25855	29		deflate_begin_sequences(struct libdeflate_compressor *c,
25856			struct deflate_sequence *first_seq)
25857			{
25858	29		deflate_reset_symbol_frequencies(c);
25859	29		first_seq->litrunlen_and_length = 0;
25860	29		}
25861
25862			static forceinline void
25863			deflate_choose_literal(struct libdeflate_compressor *c, unsigned literal,
25864			bool gather_split_stats, struct deflate_sequence *seq)
25865			{
25866	3129		c->freqs.litlen[literal]++;
25867
25868	3129		if (gather_split_stats)
25869	3075		observe_literal(&c->split_stats, literal);
25870
25871			STATIC_ASSERT(MAX_BLOCK_LENGTH <= SEQ_LITRUNLEN_MASK);
25872	3129		seq->litrunlen_and_length++;
25873	3129		}
25874
25875			static forceinline void
25876			deflate_choose_match(struct libdeflate_compressor *c,
25877			u32 length, u32 offset, bool gather_split_stats,
25878			struct deflate_sequence **seq_p)
25879			{
25880	2091		struct deflate_sequence seq = seq_p;
25881	2091		unsigned length_slot = deflate_length_slot[length];
25882	2091		unsigned offset_slot = deflate_get_offset_slot(offset);
25883
25884	2091		c->freqs.litlen[DEFLATE_FIRST_LEN_SYM + length_slot]++;
25885	2091		c->freqs.offset[offset_slot]++;
25886	2091	50	if (gather_split_stats)
		0
		0
		50
		0
		50
		50
		50
25887	2070	50	observe_match(&c->split_stats, length);
		0
		0
		50
		0
		100
		50
		0
25888
25889	2091		seq->litrunlen_and_length \|= length << SEQ_LENGTH_SHIFT;
25890	2091		seq->offset = offset;
25891	2091		seq->offset_slot = offset_slot;
25892
25893	2091		seq++;
25894	2091		seq->litrunlen_and_length = 0;
25895	2091		*seq_p = seq;
25896	2091		}
25897
25898
25899			static forceinline void
25900			adjust_max_and_nice_len(u32 max_len, u32 nice_len, size_t remaining)
25901			{
25902	15075	100	if (unlikely(remaining < DEFLATE_MAX_MATCH_LEN)) {
25903	2389		*max_len = remaining;
25904	2389		nice_len = MIN(nice_len, *max_len);
25905			}
25906	22346		}
25907
25908
25909			static u32
25910	46		choose_min_match_len(u32 num_used_literals, u32 max_search_depth)
25911			{
25912
25913			static const u8 min_lens[] = {
25914			9, 9, 9, 9, 9, 9, 8, 8, 7, 7, 6, 6, 6, 6, 6, 6,
25915			5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
25916			5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4,
25917			4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
25918			4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
25919
25920			};
25921			u32 min_len;
25922
25923			STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN <= 3);
25924			STATIC_ASSERT(ARRAY_LEN(min_lens) <= DEFLATE_NUM_LITERALS + 1);
25925
25926	46	50	if (num_used_literals >= ARRAY_LEN(min_lens))
25927	0		return 3;
25928	46		min_len = min_lens[num_used_literals];
25929
25930	46	100	if (max_search_depth < 16) {
25931	6	50	if (max_search_depth < 5)
25932	0		min_len = MIN(min_len, 4);
25933	6	100	else if (max_search_depth < 10)
25934	3		min_len = MIN(min_len, 5);
25935			else
25936	3		min_len = MIN(min_len, 7);
25937			}
25938	46		return min_len;
25939			}
25940
25941			static u32
25942	35		calculate_min_match_len(const u8 *data, size_t data_len, u32 max_search_depth)
25943			{
25944	35		u8 used[256] = { 0 };
25945	35		u32 num_used_literals = 0;
25946			size_t i;
25947
25948
25949	35	50	if (data_len < 512)
25950	0		return DEFLATE_MIN_MATCH_LEN;
25951
25952
25953	35		data_len = MIN(data_len, 4096);
25954	64327	100	for (i = 0; i < data_len; i++)
25955	64292		used[data[i]] = 1;
25956	8995	100	for (i = 0; i < 256; i++)
25957	8960		num_used_literals += used[i];
25958	35		return choose_min_match_len(num_used_literals, max_search_depth);
25959			}
25960
25961
25962			static u32
25963	2		recalculate_min_match_len(const struct deflate_freqs *freqs,
25964			u32 max_search_depth)
25965			{
25966	2		u32 literal_freq = 0;
25967			u32 cutoff;
25968	2		u32 num_used_literals = 0;
25969			int i;
25970
25971	514	100	for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
25972	512		literal_freq += freqs->litlen[i];
25973
25974	2		cutoff = literal_freq >> 10;
25975
25976	514	100	for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
25977	512	100	if (freqs->litlen[i] > cutoff)
25978	119		num_used_literals++;
25979			}
25980	2		return choose_min_match_len(num_used_literals, max_search_depth);
25981			}
25982
25983			static forceinline const u8 *
25984			choose_max_block_end(const u8 in_block_begin, const u8 in_end,
25985			size_t soft_max_len)
25986			{
25987	9	50	if (in_end - in_block_begin < soft_max_len + MIN_BLOCK_LENGTH)
25988	38		return in_end;
25989	0		return in_block_begin + soft_max_len;
25990			}
25991
25992
25993			static size_t
25994	0		deflate_compress_none(const u8 *in, size_t in_nbytes,
25995			u8 *out, size_t out_nbytes_avail)
25996			{
25997	0		const u8 *in_next = in;
25998	0		const u8 * const in_end = in + in_nbytes;
25999	0		u8 *out_next = out;
26000	0		u8 * const out_end = out + out_nbytes_avail;
26001
26002
26003	0	0	if (unlikely(in_nbytes == 0)) {
26004	0	0	if (out_nbytes_avail < 5)
26005	0		return 0;
26006
26007	0		*out_next++ = 1 \| (DEFLATE_BLOCKTYPE_UNCOMPRESSED << 1);
26008
26009			put_unaligned_le32(0xFFFF0000, out_next);
26010	0		return 5;
26011			}
26012
26013			do {
26014	0		u8 bfinal = 0;
26015	0		size_t len = UINT16_MAX;
26016
26017	0	0	if (in_end - in_next <= UINT16_MAX) {
26018	0		bfinal = 1;
26019	0		len = in_end - in_next;
26020			}
26021	0	0	if (out_end - out_next < 5 + len)
26022	0		return 0;
26023
26024	0		*out_next++ = bfinal \| (DEFLATE_BLOCKTYPE_UNCOMPRESSED << 1);
26025
26026
26027	0		put_unaligned_le16(len, out_next);
26028	0		out_next += 2;
26029	0		put_unaligned_le16(~len, out_next);
26030	0		out_next += 2;
26031	0		memcpy(out_next, in_next, len);
26032	0		out_next += len;
26033	0		in_next += len;
26034	0	0	} while (in_next != in_end);
26035
26036	0		return out_next - out;
26037			}
26038
26039
26040			static void
26041	3		deflate_compress_fastest(struct libdeflate_compressor * restrict c,
26042			const u8 *in, size_t in_nbytes,
26043			struct deflate_output_bitstream *os)
26044			{
26045	3		const u8 *in_next = in;
26046	3		const u8 *in_end = in_next + in_nbytes;
26047	3		const u8 *in_cur_base = in_next;
26048	3		u32 max_len = DEFLATE_MAX_MATCH_LEN;
26049	3		u32 nice_len = MIN(c->nice_match_length, max_len);
26050	3		u32 next_hash = 0;
26051
26052	3		ht_matchfinder_init(&c->p.f.ht_mf);
26053
26054			do {
26055
26056
26057	3	50	const u8 * const in_block_begin = in_next;
26058	3		const u8 * const in_max_block_end = choose_max_block_end(
26059			in_next, in_end, FAST_SOFT_MAX_BLOCK_LENGTH);
26060	3		struct deflate_sequence *seq = c->p.f.sequences;
26061
26062	3		deflate_begin_sequences(c, seq);
26063
26064			do {
26065			u32 length;
26066			u32 offset;
26067	75		size_t remaining = in_end - in_next;
26068
26069	75	100	if (unlikely(remaining < DEFLATE_MAX_MATCH_LEN)) {
26070	3		max_len = remaining;
26071	3	50	if (max_len < HT_MATCHFINDER_REQUIRED_NBYTES) {
26072			do {
26073	0		deflate_choose_literal(c,
26074	0	0	*in_next++, false, seq);
26075	0	0	} while (--max_len);
26076	0		break;
26077			}
26078	3		nice_len = MIN(nice_len, max_len);
26079			}
26080	75	50	length = ht_matchfinder_longest_match(&c->p.f.ht_mf,
26081			&in_cur_base,
26082			in_next,
26083			max_len,
26084			nice_len,
26085			&next_hash,
26086			&offset);
26087	75	100	if (length) {
26088
26089	21	50	deflate_choose_match(c, length, offset, false,
26090			&seq);
26091	21	100	ht_matchfinder_skip_bytes(&c->p.f.ht_mf,
26092			&in_cur_base,
26093			in_next + 1,
26094			in_end,
26095			length - 1,
26096			&next_hash);
26097	21		in_next += length;
26098			} else {
26099
26100	54	50	deflate_choose_literal(c, *in_next++, false,
26101			seq);
26102			}
26103
26104
26105	75	100	} while (in_next < in_max_block_end &&
26106	72	50	seq < &c->p.f.sequences[FAST_SEQ_STORE_LENGTH]);
26107
26108	3		deflate_finish_block(c, os, in_block_begin,
26109	3		in_next - in_block_begin,
26110	3		c->p.f.sequences, in_next == in_end);
26111	3	50	} while (in_next != in_end && !os->overflow);
		0
26112	3		}
26113
26114
26115			static void
26116	9		deflate_compress_greedy(struct libdeflate_compressor * restrict c,
26117			const u8 *in, size_t in_nbytes,
26118			struct deflate_output_bitstream *os)
26119			{
26120	9		const u8 *in_next = in;
26121	9		const u8 *in_end = in_next + in_nbytes;
26122	9		const u8 *in_cur_base = in_next;
26123	9		u32 max_len = DEFLATE_MAX_MATCH_LEN;
26124	9		u32 nice_len = MIN(c->nice_match_length, max_len);
26125	9		u32 next_hashes[2] = {0, 0};
26126
26127	9		hc_matchfinder_init(&c->p.g.hc_mf);
26128
26129			do {
26130
26131
26132	9	50	const u8 * const in_block_begin = in_next;
26133	9		const u8 * const in_max_block_end = choose_max_block_end(
26134			in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
26135	9		struct deflate_sequence *seq = c->p.g.sequences;
26136			u32 min_len;
26137
26138	9		init_block_split_stats(&c->split_stats);
26139	9		deflate_begin_sequences(c, seq);
26140	9		min_len = calculate_min_match_len(in_next,
26141	9		in_max_block_end - in_next,
26142			c->max_search_depth);
26143			do {
26144			u32 length;
26145			u32 offset;
26146
26147	225		adjust_max_and_nice_len(&max_len, &nice_len,
26148	225	100	in_end - in_next);
26149	225	50	length = hc_matchfinder_longest_match(
26150			&c->p.g.hc_mf,
26151			&in_cur_base,
26152			in_next,
26153			min_len - 1,
26154			max_len,
26155			nice_len,
26156			c->max_search_depth,
26157			next_hashes,
26158			&offset);
26159
26160	225	100	if (length >= min_len &&
		50
26161	0		(length > DEFLATE_MIN_MATCH_LEN \|\|
26162	0	0	offset <= 4096)) {
26163
26164	63	50	deflate_choose_match(c, length, offset, true,
26165			&seq);
26166	63	100	hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
26167			&in_cur_base,
26168			in_next + 1,
26169			in_end,
26170			length - 1,
26171			next_hashes);
26172	63		in_next += length;
26173			} else {
26174
26175	162	50	deflate_choose_literal(c, *in_next++, true,
26176			seq);
26177			}
26178
26179
26180	216		} while (in_next < in_max_block_end &&
26181	225	100	seq < &c->p.g.sequences[SEQ_STORE_LENGTH] &&
		50
26182	432	50	!should_end_block(&c->split_stats,
		50
26183			in_block_begin, in_next, in_end));
26184
26185	9		deflate_finish_block(c, os, in_block_begin,
26186	9		in_next - in_block_begin,
26187	9		c->p.g.sequences, in_next == in_end);
26188	9	50	} while (in_next != in_end && !os->overflow);
		0
26189	9		}
26190
26191			static forceinline void
26192			deflate_compress_lazy_generic(struct libdeflate_compressor * restrict c,
26193			const u8 *in, size_t in_nbytes,
26194			struct deflate_output_bitstream *os, bool lazy2)
26195			{
26196	16		const u8 *in_next = in;
26197	16		const u8 *in_end = in_next + in_nbytes;
26198	16		const u8 *in_cur_base = in_next;
26199	16		u32 max_len = DEFLATE_MAX_MATCH_LEN;
26200	16		u32 nice_len = MIN(c->nice_match_length, max_len);
26201	16		u32 next_hashes[2] = {0, 0};
26202
26203	16		hc_matchfinder_init(&c->p.g.hc_mf);
26204
26205			do {
26206
26207
26208	17	50	const u8 * const in_block_begin = in_next;
		50
26209	17		const u8 * const in_max_block_end = choose_max_block_end(
26210			in_next, in_end, SOFT_MAX_BLOCK_LENGTH);
26211	17		const u8 *next_recalc_min_len =
26212	17		in_next + MIN(in_end - in_next, 10000);
26213	17		struct deflate_sequence *seq = c->p.g.sequences;
26214			u32 min_len;
26215
26216	17		init_block_split_stats(&c->split_stats);
26217	17		deflate_begin_sequences(c, seq);
26218	17		min_len = calculate_min_match_len(in_next,
26219	17		in_max_block_end - in_next,
26220			c->max_search_depth);
26221			do {
26222			u32 cur_len;
26223			u32 cur_offset;
26224			u32 next_len;
26225			u32 next_offset;
26226
26227
26228	4829	50	if (in_next >= next_recalc_min_len) {
		100
26229	2		min_len = recalculate_min_match_len(
26230	2		&c->freqs,
26231			c->max_search_depth);
26232	2		next_recalc_min_len +=
26233	2		MIN(in_end - next_recalc_min_len,
26234			in_next - in_block_begin);
26235			}
26236
26237
26238	4829		adjust_max_and_nice_len(&max_len, &nice_len,
26239	4829	100	in_end - in_next);
		100
26240	4829	50	cur_len = hc_matchfinder_longest_match(
		50
26241			&c->p.g.hc_mf,
26242			&in_cur_base,
26243			in_next,
26244			min_len - 1,
26245			max_len,
26246			nice_len,
26247			c->max_search_depth,
26248			next_hashes,
26249			&cur_offset);
26250	4829	100	if (cur_len < min_len \|\|
		50
		100
		50
26251	0		(cur_len == DEFLATE_MIN_MATCH_LEN &&
26252	0	0	cur_offset > 8192)) {
		0
26253
26254	2822	50	deflate_choose_literal(c, *in_next++, true,
		50
26255			seq);
26256	2928		continue;
26257			}
26258	2007		in_next++;
26259
26260	91		have_cur_match:
26261
26262	2098	50	if (cur_len >= nice_len) {
		100
26263	106	50	deflate_choose_match(c, cur_len, cur_offset,
		50
26264			true, &seq);
26265	106	100	hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
		100
26266			&in_cur_base,
26267			in_next,
26268			in_end,
26269			cur_len - 1,
26270			next_hashes);
26271	106		in_next += cur_len - 1;
26272	106		continue;
26273			}
26274
26275
26276	1992		adjust_max_and_nice_len(&max_len, &nice_len,
26277	1992	0	in_end - in_next);
		100
26278	3984		next_len = hc_matchfinder_longest_match(
26279			&c->p.g.hc_mf,
26280			&in_cur_base,
26281			in_next++,
26282			cur_len - 1,
26283			max_len,
26284			nice_len,
26285	1992	0	c->max_search_depth >> 1,
		50
26286			next_hashes,
26287			&next_offset);
26288	1992	0	if (next_len >= cur_len &&
		100
26289	276		4 * (int)(next_len - cur_len) +
26290	138		((int)bsr32(cur_offset) -
26291	276	0	(int)bsr32(next_offset)) > 2) {
		100
26292
26293	91	0	deflate_choose_literal(c, *(in_next - 2), true,
		50
26294			seq);
26295	91		cur_len = next_len;
26296	91		cur_offset = next_offset;
26297	91		goto have_cur_match;
26298			}
26299
26300	1901	0	if (lazy2) {
		50
26301
26302	0		adjust_max_and_nice_len(&max_len, &nice_len,
26303	0	0	in_end - in_next);
		0
26304	0		next_len = hc_matchfinder_longest_match(
26305			&c->p.g.hc_mf,
26306			&in_cur_base,
26307			in_next++,
26308			cur_len - 1,
26309			max_len,
26310			nice_len,
26311	0	0	c->max_search_depth >> 2,
		0
26312			next_hashes,
26313			&next_offset);
26314	0	0	if (next_len >= cur_len &&
		0
26315	0		4 * (int)(next_len - cur_len) +
26316	0		((int)bsr32(cur_offset) -
26317	0	0	(int)bsr32(next_offset)) > 6) {
		0
26318
26319	0		deflate_choose_literal(
26320	0	0	c, *(in_next - 3), true, seq);
		0
26321	0		deflate_choose_literal(
26322	0	0	c, *(in_next - 2), true, seq);
		0
26323	0		cur_len = next_len;
26324	0		cur_offset = next_offset;
26325	0		goto have_cur_match;
26326			}
26327
26328	0	0	deflate_choose_match(c, cur_len, cur_offset,
		0
26329			true, &seq);
26330	0	0	if (cur_len > 3) {
		0
26331	0	0	hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
		0
26332			&in_cur_base,
26333			in_next,
26334			in_end,
26335			cur_len - 3,
26336			next_hashes);
26337	0		in_next += cur_len - 3;
26338			}
26339			} else {
26340
26341	1901	0	deflate_choose_match(c, cur_len, cur_offset,
		50
26342			true, &seq);
26343	1901	0	hc_matchfinder_skip_bytes(&c->p.g.hc_mf,
		50
26344			&in_cur_base,
26345			in_next,
26346			in_end,
26347			cur_len - 2,
26348			next_hashes);
26349	1901		in_next += cur_len - 2;
26350			}
26351
26352	4813		} while (in_next < in_max_block_end &&
26353	4829	100	seq < &c->p.g.sequences[SEQ_STORE_LENGTH] &&
		50
		100
		50
26354	9626	50	!should_end_block(&c->split_stats,
		50
		100
		100
26355			in_block_begin, in_next, in_end));
26356
26357	17		deflate_finish_block(c, os, in_block_begin,
26358	17		in_next - in_block_begin,
26359	17		c->p.g.sequences, in_next == in_end);
26360	17	50	} while (in_next != in_end && !os->overflow);
		50
26361	16		}
26362
26363
26364			static void
26365	10		deflate_compress_lazy(struct libdeflate_compressor * restrict c,
26366			const u8 *in, size_t in_nbytes,
26367			struct deflate_output_bitstream *os)
26368			{
26369			deflate_compress_lazy_generic(c, in, in_nbytes, os, false);
26370	10		}
26371
26372
26373			static void
26374	6		deflate_compress_lazy2(struct libdeflate_compressor * restrict c,
26375			const u8 *in, size_t in_nbytes,
26376			struct deflate_output_bitstream *os)
26377			{
26378			deflate_compress_lazy_generic(c, in, in_nbytes, os, true);
26379	6		}
26380
26381			#if SUPPORT_NEAR_OPTIMAL_PARSING
26382
26383
26384			static void
26385	24		deflate_tally_item_list(struct libdeflate_compressor *c, u32 block_length)
26386			{
26387	24		struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0];
26388	24		struct deflate_optimum_node *end_node =
26389			&c->p.n.optimum_nodes[block_length];
26390
26391			do {
26392	600		u32 length = cur_node->item & OPTIMUM_LEN_MASK;
26393	600		u32 offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT;
26394
26395	600	100	if (length == 1) {
26396
26397	432		c->freqs.litlen[offset]++;
26398			} else {
26399
26400	168		c->freqs.litlen[DEFLATE_FIRST_LEN_SYM +
26401	168		deflate_length_slot[length]]++;
26402	168		c->freqs.offset[c->p.n.offset_slot_full[offset]]++;
26403			}
26404	600		cur_node += length;
26405	600	100	} while (cur_node != end_node);
26406
26407
26408	24		c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
26409	24		}
26410
26411			static void
26412	9		deflate_choose_all_literals(struct libdeflate_compressor *c,
26413			const u8 *block, u32 block_length)
26414			{
26415			u32 i;
26416
26417	9		deflate_reset_symbol_frequencies(c);
26418	15309	100	for (i = 0; i < block_length; i++)
26419	15300		c->freqs.litlen[block[i]]++;
26420	9		c->freqs.litlen[DEFLATE_END_OF_BLOCK]++;
26421
26422	9		deflate_make_huffman_codes(&c->freqs, &c->codes);
26423	9		}
26424
26425
26426			static u32
26427	27		deflate_compute_true_cost(struct libdeflate_compressor *c)
26428			{
26429	27		u32 cost = 0;
26430			unsigned sym;
26431
26432	27		deflate_precompute_huffman_header(c);
26433
26434	27		memset(&c->codes.lens.litlen[c->o.precode.num_litlen_syms], 0,
26435	27		DEFLATE_NUM_LITLEN_SYMS - c->o.precode.num_litlen_syms);
26436
26437	27		cost += 5 + 5 + 4 + (3 * c->o.precode.num_explicit_lens);
26438	540	100	for (sym = 0; sym < DEFLATE_NUM_PRECODE_SYMS; sym++) {
26439	513		cost += c->o.precode.freqs[sym] *
26440	513		(c->o.precode.lens[sym] +
26441	513		deflate_extra_precode_bits[sym]);
26442			}
26443
26444	6966	100	for (sym = 0; sym < DEFLATE_FIRST_LEN_SYM; sym++)
26445	6939		cost += c->freqs.litlen[sym] * c->codes.lens.litlen[sym];
26446
26447	810	100	for (; sym < DEFLATE_FIRST_LEN_SYM +
26448	783		ARRAY_LEN(deflate_extra_length_bits); sym++)
26449	783		cost += c->freqs.litlen[sym] *
26450	783		(c->codes.lens.litlen[sym] +
26451	783		deflate_extra_length_bits[sym - DEFLATE_FIRST_LEN_SYM]);
26452
26453	837	100	for (sym = 0; sym < ARRAY_LEN(deflate_extra_offset_bits); sym++)
26454	810		cost += c->freqs.offset[sym] *
26455	810		(c->codes.lens.offset[sym] +
26456	810		deflate_extra_offset_bits[sym]);
26457	27		return cost;
26458			}
26459
26460
26461			static void
26462	15		deflate_set_costs_from_codes(struct libdeflate_compressor *c,
26463			const struct deflate_lens *lens)
26464			{
26465			unsigned i;
26466
26467
26468	3855	100	for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
26469	7680		u32 bits = (lens->litlen[i] ?
26470	3840	100	lens->litlen[i] : LITERAL_NOSTAT_BITS);
26471
26472	3840		c->p.n.costs.literal[i] = bits * BIT_COST;
26473			}
26474
26475
26476	3855	100	for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) {
26477	3840		unsigned length_slot = deflate_length_slot[i];
26478	3840		unsigned litlen_sym = DEFLATE_FIRST_LEN_SYM + length_slot;
26479	7680		u32 bits = (lens->litlen[litlen_sym] ?
26480	3840	100	lens->litlen[litlen_sym] : LENGTH_NOSTAT_BITS);
26481
26482	3840		bits += deflate_extra_length_bits[length_slot];
26483	3840		c->p.n.costs.length[i] = bits * BIT_COST;
26484			}
26485
26486
26487	465	100	for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) {
26488	900		u32 bits = (lens->offset[i] ?
26489	450	100	lens->offset[i] : OFFSET_NOSTAT_BITS);
26490
26491	450		bits += deflate_extra_offset_bits[i];
26492	450		c->p.n.costs.offset_slot[i] = bits * BIT_COST;
26493			}
26494	15		}
26495
26496
26497			static const struct {
26498			u8 used_lits_to_lit_cost[257];
26499			u8 len_sym_cost;
26500			} default_litlen_costs[] = {
26501			{
26502			.used_lits_to_lit_cost = {
26503			6, 6, 22, 32, 38, 43, 48, 51,
26504			54, 57, 59, 61, 64, 65, 67, 69,
26505			70, 72, 73, 74, 75, 76, 77, 79,
26506			80, 80, 81, 82, 83, 84, 85, 85,
26507			86, 87, 88, 88, 89, 89, 90, 91,
26508			91, 92, 92, 93, 93, 94, 95, 95,
26509			96, 96, 96, 97, 97, 98, 98, 99,
26510			99, 99, 100, 100, 101, 101, 101, 102,
26511			102, 102, 103, 103, 104, 104, 104, 105,
26512			105, 105, 105, 106, 106, 106, 107, 107,
26513			107, 108, 108, 108, 108, 109, 109, 109,
26514			109, 110, 110, 110, 111, 111, 111, 111,
26515			112, 112, 112, 112, 112, 113, 113, 113,
26516			113, 114, 114, 114, 114, 114, 115, 115,
26517			115, 115, 115, 116, 116, 116, 116, 116,
26518			117, 117, 117, 117, 117, 118, 118, 118,
26519			118, 118, 118, 119, 119, 119, 119, 119,
26520			120, 120, 120, 120, 120, 120, 121, 121,
26521			121, 121, 121, 121, 121, 122, 122, 122,
26522			122, 122, 122, 123, 123, 123, 123, 123,
26523			123, 123, 124, 124, 124, 124, 124, 124,
26524			124, 125, 125, 125, 125, 125, 125, 125,
26525			125, 126, 126, 126, 126, 126, 126, 126,
26526			127, 127, 127, 127, 127, 127, 127, 127,
26527			128, 128, 128, 128, 128, 128, 128, 128,
26528			128, 129, 129, 129, 129, 129, 129, 129,
26529			129, 129, 130, 130, 130, 130, 130, 130,
26530			130, 130, 130, 131, 131, 131, 131, 131,
26531			131, 131, 131, 131, 131, 132, 132, 132,
26532			132, 132, 132, 132, 132, 132, 132, 133,
26533			133, 133, 133, 133, 133, 133, 133, 133,
26534			133, 134, 134, 134, 134, 134, 134, 134,
26535			134,
26536			},
26537			.len_sym_cost = 109,
26538			}, {
26539			.used_lits_to_lit_cost = {
26540			16, 16, 32, 41, 48, 53, 57, 60,
26541			64, 66, 69, 71, 73, 75, 76, 78,
26542			80, 81, 82, 83, 85, 86, 87, 88,
26543			89, 90, 91, 92, 92, 93, 94, 95,
26544			96, 96, 97, 98, 98, 99, 99, 100,
26545			101, 101, 102, 102, 103, 103, 104, 104,
26546			105, 105, 106, 106, 107, 107, 108, 108,
26547			108, 109, 109, 110, 110, 110, 111, 111,
26548			112, 112, 112, 113, 113, 113, 114, 114,
26549			114, 115, 115, 115, 115, 116, 116, 116,
26550			117, 117, 117, 118, 118, 118, 118, 119,
26551			119, 119, 119, 120, 120, 120, 120, 121,
26552			121, 121, 121, 122, 122, 122, 122, 122,
26553			123, 123, 123, 123, 124, 124, 124, 124,
26554			124, 125, 125, 125, 125, 125, 126, 126,
26555			126, 126, 126, 127, 127, 127, 127, 127,
26556			128, 128, 128, 128, 128, 128, 129, 129,
26557			129, 129, 129, 129, 130, 130, 130, 130,
26558			130, 130, 131, 131, 131, 131, 131, 131,
26559			131, 132, 132, 132, 132, 132, 132, 133,
26560			133, 133, 133, 133, 133, 133, 134, 134,
26561			134, 134, 134, 134, 134, 134, 135, 135,
26562			135, 135, 135, 135, 135, 135, 136, 136,
26563			136, 136, 136, 136, 136, 136, 137, 137,
26564			137, 137, 137, 137, 137, 137, 138, 138,
26565			138, 138, 138, 138, 138, 138, 138, 139,
26566			139, 139, 139, 139, 139, 139, 139, 139,
26567			140, 140, 140, 140, 140, 140, 140, 140,
26568			140, 141, 141, 141, 141, 141, 141, 141,
26569			141, 141, 141, 142, 142, 142, 142, 142,
26570			142, 142, 142, 142, 142, 142, 143, 143,
26571			143, 143, 143, 143, 143, 143, 143, 143,
26572			144,
26573			},
26574			.len_sym_cost = 93,
26575			}, {
26576			.used_lits_to_lit_cost = {
26577			32, 32, 48, 57, 64, 69, 73, 76,
26578			80, 82, 85, 87, 89, 91, 92, 94,
26579			96, 97, 98, 99, 101, 102, 103, 104,
26580			105, 106, 107, 108, 108, 109, 110, 111,
26581			112, 112, 113, 114, 114, 115, 115, 116,
26582			117, 117, 118, 118, 119, 119, 120, 120,
26583			121, 121, 122, 122, 123, 123, 124, 124,
26584			124, 125, 125, 126, 126, 126, 127, 127,
26585			128, 128, 128, 129, 129, 129, 130, 130,
26586			130, 131, 131, 131, 131, 132, 132, 132,
26587			133, 133, 133, 134, 134, 134, 134, 135,
26588			135, 135, 135, 136, 136, 136, 136, 137,
26589			137, 137, 137, 138, 138, 138, 138, 138,
26590			139, 139, 139, 139, 140, 140, 140, 140,
26591			140, 141, 141, 141, 141, 141, 142, 142,
26592			142, 142, 142, 143, 143, 143, 143, 143,
26593			144, 144, 144, 144, 144, 144, 145, 145,
26594			145, 145, 145, 145, 146, 146, 146, 146,
26595			146, 146, 147, 147, 147, 147, 147, 147,
26596			147, 148, 148, 148, 148, 148, 148, 149,
26597			149, 149, 149, 149, 149, 149, 150, 150,
26598			150, 150, 150, 150, 150, 150, 151, 151,
26599			151, 151, 151, 151, 151, 151, 152, 152,
26600			152, 152, 152, 152, 152, 152, 153, 153,
26601			153, 153, 153, 153, 153, 153, 154, 154,
26602			154, 154, 154, 154, 154, 154, 154, 155,
26603			155, 155, 155, 155, 155, 155, 155, 155,
26604			156, 156, 156, 156, 156, 156, 156, 156,
26605			156, 157, 157, 157, 157, 157, 157, 157,
26606			157, 157, 157, 158, 158, 158, 158, 158,
26607			158, 158, 158, 158, 158, 158, 159, 159,
26608			159, 159, 159, 159, 159, 159, 159, 159,
26609			160,
26610			},
26611			.len_sym_cost = 84,
26612			},
26613			};
26614
26615
26616			static void
26617	9		deflate_choose_default_litlen_costs(struct libdeflate_compressor *c,
26618			const u8 *block_begin, u32 block_length,
26619			u32 lit_cost, u32 len_sym_cost)
26620			{
26621	9		u32 num_used_literals = 0;
26622	9		u32 literal_freq = block_length;
26623	9		u32 match_freq = 0;
26624			u32 cutoff;
26625			u32 i;
26626
26627
26628	9		memset(c->freqs.litlen, 0,
26629			DEFLATE_NUM_LITERALS * sizeof(c->freqs.litlen[0]));
26630	9		cutoff = literal_freq >> 11;
26631	15309	100	for (i = 0; i < block_length; i++)
26632	15300		c->freqs.litlen[block_begin[i]]++;
26633	2313	100	for (i = 0; i < DEFLATE_NUM_LITERALS; i++) {
26634	2304	100	if (c->freqs.litlen[i] > cutoff)
26635	108		num_used_literals++;
26636			}
26637	9	50	if (num_used_literals == 0)
26638	0		num_used_literals = 1;
26639
26640
26641	9		match_freq = 0;
26642	9		i = choose_min_match_len(num_used_literals, c->max_search_depth);
26643	2286	100	for (; i < ARRAY_LEN(c->p.n.match_len_freqs); i++) {
26644	2277		match_freq += c->p.n.match_len_freqs[i];
26645	2277		literal_freq -= i * c->p.n.match_len_freqs[i];
26646			}
26647	9	50	if ((s32)literal_freq < 0)
26648	0		literal_freq = 0;
26649
26650	9	50	if (match_freq > literal_freq)
26651	0		i = 2;
26652	9	50	else if (match_freq * 4 > literal_freq)
26653	9		i = 1;
26654			else
26655	0		i = 0;
26656
26657			STATIC_ASSERT(BIT_COST == 16);
26658	9		*lit_cost = default_litlen_costs[i].used_lits_to_lit_cost[
26659			num_used_literals];
26660	9		*len_sym_cost = default_litlen_costs[i].len_sym_cost;
26661	9		}
26662
26663			static forceinline u32
26664			deflate_default_length_cost(u32 len, u32 len_sym_cost)
26665			{
26666	2304		unsigned slot = deflate_length_slot[len];
26667	2304		u32 num_extra_bits = deflate_extra_length_bits[slot];
26668
26669	2304		return len_sym_cost + (num_extra_bits * BIT_COST);
26670			}
26671
26672			static forceinline u32
26673			deflate_default_offset_slot_cost(unsigned slot)
26674			{
26675	270		u32 num_extra_bits = deflate_extra_offset_bits[slot];
26676
26677	270		u32 offset_sym_cost = 4BIT_COST + (907BIT_COST)/1000;
26678
26679	270		return offset_sym_cost + (num_extra_bits * BIT_COST);
26680			}
26681
26682
26683			static void
26684	9		deflate_set_default_costs(struct libdeflate_compressor *c,
26685			u32 lit_cost, u32 len_sym_cost)
26686			{
26687			u32 i;
26688
26689
26690	2313	100	for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
26691	2304		c->p.n.costs.literal[i] = lit_cost;
26692
26693
26694	2313	100	for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++)
26695	2304		c->p.n.costs.length[i] =
26696	2304		deflate_default_length_cost(i, len_sym_cost);
26697
26698
26699	279	100	for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++)
26700	270		c->p.n.costs.offset_slot[i] =
26701	270		deflate_default_offset_slot_cost(i);
26702	9		}
26703
26704			static forceinline void
26705			deflate_adjust_cost(u32 *cost_p, u32 default_cost, int change_amount)
26706			{
26707	0		if (change_amount == 0)
26708
26709	0		cost_p = (default_cost + 3 *cost_p) / 4;
26710	0	0	else if (change_amount == 1)
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
26711	0		cost_p = (default_cost + cost_p) / 2;
26712	0	0	else if (change_amount == 2)
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
26713	0		cost_p = (5 default_cost + 3 * *cost_p) / 8;
26714			else
26715
26716	0		cost_p = (3 default_cost + *cost_p) / 4;
26717	0		}
26718
26719			static forceinline void
26720			deflate_adjust_costs_impl(struct libdeflate_compressor *c,
26721			u32 lit_cost, u32 len_sym_cost, int change_amount)
26722			{
26723			u32 i;
26724
26725
26726	0	0	for (i = 0; i < DEFLATE_NUM_LITERALS; i++)
		0
		0
		0
26727	0	0	deflate_adjust_cost(&c->p.n.costs.literal[i], lit_cost,
		0
		0
		0
26728			change_amount);
26729
26730
26731	0	0	for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++)
		0
		0
		0
26732	0	0	deflate_adjust_cost(&c->p.n.costs.length[i],
		0
		0
		0
26733			deflate_default_length_cost(i,
26734			len_sym_cost),
26735			change_amount);
26736
26737
26738	0	0	for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++)
		0
		0
		0
26739	0	0	deflate_adjust_cost(&c->p.n.costs.offset_slot[i],
		0
		0
		0
26740			deflate_default_offset_slot_cost(i),
26741			change_amount);
26742	0		}
26743
26744
26745			static void
26746	0		deflate_adjust_costs(struct libdeflate_compressor *c,
26747			u32 lit_cost, u32 len_sym_cost)
26748			{
26749	0		u64 total_delta = 0;
26750			u64 cutoff;
26751			int i;
26752
26753
26754	0	0	for (i = 0; i < NUM_OBSERVATION_TYPES; i++) {
26755	0		u64 prev = (u64)c->p.n.prev_observations[i] *
26756	0		c->split_stats.num_observations;
26757	0		u64 cur = (u64)c->split_stats.observations[i] *
26758	0		c->p.n.prev_num_observations;
26759
26760	0	0	total_delta += prev > cur ? prev - cur : cur - prev;
26761			}
26762	0		cutoff = ((u64)c->p.n.prev_num_observations *
26763	0		c->split_stats.num_observations * 200) / 512;
26764
26765	0	0	if (total_delta > 3 * cutoff)
26766
26767	0		deflate_set_default_costs(c, lit_cost, len_sym_cost);
26768	0	0	else if (4 * total_delta > 9 * cutoff)
26769			deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 3);
26770	0	0	else if (2 * total_delta > 3 * cutoff)
26771			deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 2);
26772	0	0	else if (2 * total_delta > cutoff)
26773			deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 1);
26774			else
26775			deflate_adjust_costs_impl(c, lit_cost, len_sym_cost, 0);
26776	0		}
26777
26778			static void
26779	9		deflate_set_initial_costs(struct libdeflate_compressor *c,
26780			const u8 *block_begin, u32 block_length,
26781			bool is_first_block)
26782			{
26783			u32 lit_cost, len_sym_cost;
26784
26785	9		deflate_choose_default_litlen_costs(c, block_begin, block_length,
26786			&lit_cost, &len_sym_cost);
26787	9	50	if (is_first_block)
26788	9		deflate_set_default_costs(c, lit_cost, len_sym_cost);
26789			else
26790	0		deflate_adjust_costs(c, lit_cost, len_sym_cost);
26791	9		}
26792
26793
26794			static void
26795	24		deflate_find_min_cost_path(struct libdeflate_compressor *c,
26796			const u32 block_length,
26797			const struct lz_match *cache_ptr)
26798			{
26799	24		struct deflate_optimum_node *end_node =
26800			&c->p.n.optimum_nodes[block_length];
26801	24		struct deflate_optimum_node *cur_node = end_node;
26802
26803	24		cur_node->cost_to_end = 0;
26804			do {
26805			unsigned num_matches;
26806			u32 literal;
26807			u32 best_cost_to_end;
26808
26809	40800		cur_node--;
26810	40800		cache_ptr--;
26811
26812	40800		num_matches = cache_ptr->length;
26813	40800		literal = cache_ptr->offset;
26814
26815
26816	40800		best_cost_to_end = c->p.n.costs.literal[literal] +
26817	40800		(cur_node + 1)->cost_to_end;
26818	40800		cur_node->item = (literal << OPTIMUM_OFFSET_SHIFT) \| 1;
26819
26820
26821	40800	100	if (num_matches) {
26822			const struct lz_match *match;
26823			u32 len;
26824			u32 offset;
26825			u32 offset_slot;
26826			u32 offset_cost;
26827			u32 cost_to_end;
26828
26829
26830	168		match = cache_ptr - num_matches;
26831	168		len = DEFLATE_MIN_MATCH_LEN;
26832			do {
26833	336		offset = match->offset;
26834	336		offset_slot = c->p.n.offset_slot_full[offset];
26835	336		offset_cost =
26836			c->p.n.costs.offset_slot[offset_slot];
26837			do {
26838	40032		cost_to_end = offset_cost +
26839	40032		c->p.n.costs.length[len] +
26840	40032		(cur_node + len)->cost_to_end;
26841	40032	100	if (cost_to_end < best_cost_to_end) {
26842	39753		best_cost_to_end = cost_to_end;
26843	39753		cur_node->item = len \|
26844	39753		(offset <<
26845			OPTIMUM_OFFSET_SHIFT);
26846			}
26847	40032	100	} while (++len <= match->length);
26848	336	100	} while (++match != cache_ptr);
26849	168		cache_ptr -= num_matches;
26850			}
26851	40800		cur_node->cost_to_end = best_cost_to_end;
26852	40800	100	} while (cur_node != &c->p.n.optimum_nodes[0]);
26853
26854	24		deflate_reset_symbol_frequencies(c);
26855	24		deflate_tally_item_list(c, block_length);
26856	24		deflate_make_huffman_codes(&c->freqs, &c->codes);
26857	24		}
26858
26859
26860			static void
26861	9		deflate_optimize_and_flush_block(struct libdeflate_compressor *c,
26862			struct deflate_output_bitstream *os,
26863			const u8 *block_begin, u32 block_length,
26864			const struct lz_match *cache_ptr,
26865			bool is_first_block, bool is_final_block,
26866			bool *used_only_literals)
26867			{
26868	9		unsigned num_passes_remaining = c->p.n.max_optim_passes;
26869	9		u32 best_true_cost = UINT32_MAX;
26870			u32 true_cost;
26871			u32 only_lits_cost;
26872	9		u32 static_cost = UINT32_MAX;
26873			struct deflate_sequence seq_;
26874	9		struct deflate_sequence *seq = NULL;
26875			u32 i;
26876
26877
26878	9		deflate_choose_all_literals(c, block_begin, block_length);
26879	9		only_lits_cost = deflate_compute_true_cost(c);
26880
26881
26882	9		for (i = block_length;
26883	2331	100	i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN,
26884	2322		ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++)
26885	2322		c->p.n.optimum_nodes[i].cost_to_end = 0x80000000;
26886
26887
26888	9	100	if (block_length <= c->p.n.max_len_to_optimize_static_block) {
26889
26890	3		c->p.n.costs_saved = c->p.n.costs;
26891
26892	3		deflate_set_costs_from_codes(c, &c->static_codes.lens);
26893	3		deflate_find_min_cost_path(c, block_length, cache_ptr);
26894	3		static_cost = c->p.n.optimum_nodes[0].cost_to_end / BIT_COST;
26895	3		static_cost += 7;
26896
26897
26898	3		c->p.n.costs = c->p.n.costs_saved;
26899			}
26900
26901
26902	9		deflate_set_initial_costs(c, block_begin, block_length, is_first_block);
26903
26904			do {
26905
26906	18		deflate_find_min_cost_path(c, block_length, cache_ptr);
26907
26908
26909	18		true_cost = deflate_compute_true_cost(c);
26910
26911
26912	18	100	if (true_cost + c->p.n.min_improvement_to_continue >
26913			best_true_cost)
26914	9		break;
26915
26916	9		best_true_cost = true_cost;
26917
26918
26919	9		c->p.n.costs_saved = c->p.n.costs;
26920
26921
26922	9		deflate_set_costs_from_codes(c, &c->codes.lens);
26923
26924	9	50	} while (--num_passes_remaining);
26925
26926	9		*used_only_literals = false;
26927	9	100	if (MIN(only_lits_cost, static_cost) < best_true_cost) {
26928	3	50	if (only_lits_cost < static_cost) {
26929
26930	0		deflate_choose_all_literals(c, block_begin, block_length);
26931	0		deflate_set_costs_from_codes(c, &c->codes.lens);
26932	0		seq_.litrunlen_and_length = block_length;
26933	0		seq = &seq_;
26934	0		*used_only_literals = true;
26935			} else {
26936
26937	3		deflate_set_costs_from_codes(c, &c->static_codes.lens);
26938	3		deflate_find_min_cost_path(c, block_length, cache_ptr);
26939			}
26940	6		} else if (true_cost >=
26941	6	50	best_true_cost + c->p.n.min_bits_to_use_nonfinal_path) {
26942
26943	0		c->p.n.costs = c->p.n.costs_saved;
26944	0		deflate_find_min_cost_path(c, block_length, cache_ptr);
26945	0		deflate_set_costs_from_codes(c, &c->codes.lens);
26946			}
26947	9		deflate_flush_block(c, os, block_begin, block_length, seq,
26948			is_final_block);
26949	9		}
26950
26951			static void
26952	18		deflate_near_optimal_init_stats(struct libdeflate_compressor *c)
26953			{
26954	18		init_block_split_stats(&c->split_stats);
26955	18		memset(c->p.n.new_match_len_freqs, 0,
26956			sizeof(c->p.n.new_match_len_freqs));
26957	18		memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs));
26958	18		}
26959
26960			static void
26961	9		deflate_near_optimal_merge_stats(struct libdeflate_compressor *c)
26962			{
26963			unsigned i;
26964
26965	9		merge_new_observations(&c->split_stats);
26966	2340	100	for (i = 0; i < ARRAY_LEN(c->p.n.match_len_freqs); i++) {
26967	2331		c->p.n.match_len_freqs[i] += c->p.n.new_match_len_freqs[i];
26968	2331		c->p.n.new_match_len_freqs[i] = 0;
26969			}
26970	9		}
26971
26972
26973			static void
26974	9		deflate_near_optimal_save_stats(struct libdeflate_compressor *c)
26975			{
26976			int i;
26977
26978	99	100	for (i = 0; i < NUM_OBSERVATION_TYPES; i++)
26979	90		c->p.n.prev_observations[i] = c->split_stats.observations[i];
26980	9		c->p.n.prev_num_observations = c->split_stats.num_observations;
26981	9		}
26982
26983			static void
26984	0		deflate_near_optimal_clear_old_stats(struct libdeflate_compressor *c)
26985			{
26986			int i;
26987
26988	0	0	for (i = 0; i < NUM_OBSERVATION_TYPES; i++)
26989	0		c->split_stats.observations[i] = 0;
26990	0		c->split_stats.num_observations = 0;
26991	0		memset(c->p.n.match_len_freqs, 0, sizeof(c->p.n.match_len_freqs));
26992	0		}
26993
26994
26995			static void
26996	9		deflate_compress_near_optimal(struct libdeflate_compressor * restrict c,
26997			const u8 *in, size_t in_nbytes,
26998			struct deflate_output_bitstream *os)
26999			{
27000	9		const u8 *in_next = in;
27001	9		const u8 *in_block_begin = in_next;
27002	9		const u8 *in_end = in_next + in_nbytes;
27003	9		const u8 *in_cur_base = in_next;
27004	9		const u8 *in_next_slide =
27005	9		in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE);
27006	9		u32 max_len = DEFLATE_MAX_MATCH_LEN;
27007	9		u32 nice_len = MIN(c->nice_match_length, max_len);
27008	9		struct lz_match *cache_ptr = c->p.n.match_cache;
27009	9		u32 next_hashes[2] = {0, 0};
27010	9		bool prev_block_used_only_literals = false;
27011
27012	9		bt_matchfinder_init(&c->p.n.bt_mf);
27013	9		deflate_near_optimal_init_stats(c);
27014
27015			do {
27016
27017	9		const u8 * const in_max_block_end = choose_max_block_end(
27018			in_block_begin, in_end, SOFT_MAX_BLOCK_LENGTH);
27019	9		const u8 *prev_end_block_check = NULL;
27020	9		bool change_detected = false;
27021	9		const u8 *next_observation = in_next;
27022			u32 min_len;
27023
27024
27025	9	50	if (prev_block_used_only_literals)
27026	0		min_len = DEFLATE_MAX_MATCH_LEN + 1;
27027			else
27028	9		min_len = calculate_min_match_len(
27029			in_block_begin,
27030	9		in_max_block_end - in_block_begin,
27031			c->max_search_depth);
27032
27033
27034	216		for (;;) {
27035			struct lz_match *matches;
27036			u32 best_len;
27037	225		size_t remaining = in_end - in_next;
27038
27039
27040	225	50	if (in_next == in_next_slide) {
27041	0		bt_matchfinder_slide_window(&c->p.n.bt_mf);
27042	0		in_cur_base = in_next;
27043	0		in_next_slide = in_next +
27044	0		MIN(remaining, MATCHFINDER_WINDOW_SIZE);
27045			}
27046
27047
27048	225		matches = cache_ptr;
27049	225	100	best_len = 0;
27050			adjust_max_and_nice_len(&max_len, &nice_len, remaining);
27051	225	50	if (likely(max_len >= BT_MATCHFINDER_REQUIRED_NBYTES)) {
27052	225		cache_ptr = bt_matchfinder_get_matches(
27053			&c->p.n.bt_mf,
27054			in_cur_base,
27055			in_next - in_cur_base,
27056			max_len,
27057			nice_len,
27058			c->max_search_depth,
27059			next_hashes,
27060			matches);
27061	225	100	if (cache_ptr > matches)
27062	63		best_len = cache_ptr[-1].length;
27063			}
27064	225	50	if (in_next >= next_observation) {
27065	225	100	if (best_len >= min_len) {
27066	63	50	observe_match(&c->split_stats,
27067			best_len);
27068	63		next_observation = in_next + best_len;
27069	63		c->p.n.new_match_len_freqs[best_len]++;
27070			} else {
27071	162		observe_literal(&c->split_stats,
27072	162		*in_next);
27073	162		next_observation = in_next + 1;
27074			}
27075			}
27076
27077	225		cache_ptr->length = cache_ptr - matches;
27078	225		cache_ptr->offset = *in_next;
27079	225		in_next++;
27080	225		cache_ptr++;
27081
27082
27083	225	100	if (best_len >= DEFLATE_MIN_MATCH_LEN &&
27084	63	50	best_len >= nice_len) {
27085	63		--best_len;
27086			do {
27087	15075		remaining = in_end - in_next;
27088	15075	50	if (in_next == in_next_slide) {
27089	0		bt_matchfinder_slide_window(
27090			&c->p.n.bt_mf);
27091	0		in_cur_base = in_next;
27092	0		in_next_slide = in_next +
27093	0		MIN(remaining,
27094			MATCHFINDER_WINDOW_SIZE);
27095			}
27096			adjust_max_and_nice_len(&max_len,
27097			&nice_len,
27098			remaining);
27099	15075	100	if (max_len >=
27100			BT_MATCHFINDER_REQUIRED_NBYTES) {
27101	15039		bt_matchfinder_skip_byte(
27102			&c->p.n.bt_mf,
27103			in_cur_base,
27104			in_next - in_cur_base,
27105			nice_len,
27106			c->max_search_depth,
27107			next_hashes);
27108			}
27109	15075		cache_ptr->length = 0;
27110	15075		cache_ptr->offset = *in_next;
27111	15075		in_next++;
27112	15075		cache_ptr++;
27113	15075	100	} while (--best_len);
27114			}
27115
27116	225	100	if (in_next >= in_max_block_end)
27117	9		break;
27118
27119	216		if (cache_ptr >=
27120	216	50	&c->p.n.match_cache[MATCH_CACHE_LENGTH])
27121	0		break;
27122
27123	432	50	if (!ready_to_check_block(&c->split_stats,
		50
27124			in_block_begin, in_next,
27125			in_end))
27126	216		continue;
27127
27128	0	0	if (do_end_block_check(&c->split_stats,
27129	0		in_next - in_block_begin)) {
27130	0		change_detected = true;
27131	0		break;
27132			}
27133
27134	0		deflate_near_optimal_merge_stats(c);
27135	0		prev_end_block_check = in_next;
27136			}
27137
27138	9	50	if (change_detected && prev_end_block_check != NULL) {
		0
27139
27140	0		struct lz_match *orig_cache_ptr = cache_ptr;
27141	0		const u8 *in_block_end = prev_end_block_check;
27142	0		u32 block_length = in_block_end - in_block_begin;
27143	0		bool is_first = (in_block_begin == in);
27144	0		bool is_final = false;
27145	0		u32 num_bytes_to_rewind = in_next - in_block_end;
27146			size_t cache_len_rewound;
27147
27148
27149			do {
27150	0		cache_ptr--;
27151	0		cache_ptr -= cache_ptr->length;
27152	0	0	} while (--num_bytes_to_rewind);
27153	0		cache_len_rewound = orig_cache_ptr - cache_ptr;
27154
27155	0		deflate_optimize_and_flush_block(
27156			c, os, in_block_begin,
27157			block_length, cache_ptr,
27158			is_first, is_final,
27159			&prev_block_used_only_literals);
27160	0		memmove(c->p.n.match_cache, cache_ptr,
27161			cache_len_rewound * sizeof(*cache_ptr));
27162	0		cache_ptr = &c->p.n.match_cache[cache_len_rewound];
27163	0		deflate_near_optimal_save_stats(c);
27164
27165	0		deflate_near_optimal_clear_old_stats(c);
27166	0		in_block_begin = in_block_end;
27167			} else {
27168
27169	9		u32 block_length = in_next - in_block_begin;
27170	9		bool is_first = (in_block_begin == in);
27171	9		bool is_final = (in_next == in_end);
27172
27173	9		deflate_near_optimal_merge_stats(c);
27174	9		deflate_optimize_and_flush_block(
27175			c, os, in_block_begin,
27176			block_length, cache_ptr,
27177			is_first, is_final,
27178			&prev_block_used_only_literals);
27179	9		cache_ptr = &c->p.n.match_cache[0];
27180	9		deflate_near_optimal_save_stats(c);
27181	9		deflate_near_optimal_init_stats(c);
27182	9		in_block_begin = in_next;
27183			}
27184	9	50	} while (in_next != in_end && !os->overflow);
		0
27185	9		}
27186
27187
27188			static void
27189	9		deflate_init_offset_slot_full(struct libdeflate_compressor *c)
27190			{
27191			u32 offset_slot;
27192			u32 offset;
27193			u32 offset_end;
27194
27195	279	100	for (offset_slot = 0; offset_slot < ARRAY_LEN(deflate_offset_slot_base);
27196	270		offset_slot++) {
27197	270		offset = deflate_offset_slot_base[offset_slot];
27198	270		offset_end = offset +
27199	270		(1 << deflate_extra_offset_bits[offset_slot]);
27200			do {
27201	294912		c->p.n.offset_slot_full[offset] = offset_slot;
27202	294912	100	} while (++offset != offset_end);
27203			}
27204	9		}
27205
27206			#endif
27207
27208			LIBDEFLATEAPI struct libdeflate_compressor *
27209	37		libdeflate_alloc_compressor_ex(int compression_level,
27210			const struct libdeflate_options *options)
27211			{
27212			struct libdeflate_compressor *c;
27213	37		size_t size = offsetof(struct libdeflate_compressor, p);
27214
27215			check_buildtime_parameters();
27216
27217
27218	37	50	if (options->sizeof_options != sizeof(*options))
27219	0		return NULL;
27220
27221	37	50	if (compression_level < 0 \|\| compression_level > 12)
		50
27222	0		return NULL;
27223
27224			#if SUPPORT_NEAR_OPTIMAL_PARSING
27225	37	100	if (compression_level >= 10)
27226	9		size += sizeof(c->p.n);
27227			else
27228			#endif
27229			{
27230	28	100	if (compression_level >= 2)
27231	25		size += sizeof(c->p.g);
27232	3	50	else if (compression_level == 1)
27233	3		size += sizeof(c->p.f);
27234			}
27235
27236	37	50	c = libdeflate_aligned_malloc(options->malloc_func ?
27237			options->malloc_func :
27238			libdeflate_default_malloc_func,
27239			MATCHFINDER_MEM_ALIGNMENT, size);
27240	37	50	if (!c)
27241	0		return NULL;
27242	74		c->free_func = options->free_func ?
27243	37	50	options->free_func : libdeflate_default_free_func;
27244
27245	37		c->compression_level = compression_level;
27246
27247
27248	37		c->max_passthrough_size = 55 - (compression_level * 4);
27249
27250	37		switch (compression_level) {
27251	0		case 0:
27252	0		c->max_passthrough_size = SIZE_MAX;
27253	0		c->impl = NULL;
27254	0		break;
27255	3		case 1:
27256	3		c->impl = deflate_compress_fastest;
27257
27258	3		c->nice_match_length = 32;
27259	3		break;
27260	3		case 2:
27261	3		c->impl = deflate_compress_greedy;
27262	3		c->max_search_depth = 6;
27263	3		c->nice_match_length = 10;
27264	3		break;
27265	3		case 3:
27266	3		c->impl = deflate_compress_greedy;
27267	3		c->max_search_depth = 12;
27268	3		c->nice_match_length = 14;
27269	3		break;
27270	3		case 4:
27271	3		c->impl = deflate_compress_greedy;
27272	3		c->max_search_depth = 16;
27273	3		c->nice_match_length = 30;
27274	3		break;
27275	3		case 5:
27276	3		c->impl = deflate_compress_lazy;
27277	3		c->max_search_depth = 16;
27278	3		c->nice_match_length = 30;
27279	3		break;
27280	4		case 6:
27281	4		c->impl = deflate_compress_lazy;
27282	4		c->max_search_depth = 35;
27283	4		c->nice_match_length = 65;
27284	4		break;
27285	3		case 7:
27286	3		c->impl = deflate_compress_lazy;
27287	3		c->max_search_depth = 100;
27288	3		c->nice_match_length = 130;
27289	3		break;
27290	3		case 8:
27291	3		c->impl = deflate_compress_lazy2;
27292	3		c->max_search_depth = 300;
27293	3		c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
27294	3		break;
27295	3		case 9:
27296			#if !SUPPORT_NEAR_OPTIMAL_PARSING
27297			default:
27298			#endif
27299	3		c->impl = deflate_compress_lazy2;
27300	3		c->max_search_depth = 600;
27301	3		c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
27302	3		break;
27303			#if SUPPORT_NEAR_OPTIMAL_PARSING
27304	3		case 10:
27305	3		c->impl = deflate_compress_near_optimal;
27306	3		c->max_search_depth = 35;
27307	3		c->nice_match_length = 75;
27308	3		c->p.n.max_optim_passes = 2;
27309	3		c->p.n.min_improvement_to_continue = 32;
27310	3		c->p.n.min_bits_to_use_nonfinal_path = 32;
27311	3		c->p.n.max_len_to_optimize_static_block = 0;
27312	3		deflate_init_offset_slot_full(c);
27313	3		break;
27314	3		case 11:
27315	3		c->impl = deflate_compress_near_optimal;
27316	3		c->max_search_depth = 100;
27317	3		c->nice_match_length = 150;
27318	3		c->p.n.max_optim_passes = 4;
27319	3		c->p.n.min_improvement_to_continue = 16;
27320	3		c->p.n.min_bits_to_use_nonfinal_path = 16;
27321	3		c->p.n.max_len_to_optimize_static_block = 1000;
27322	3		deflate_init_offset_slot_full(c);
27323	3		break;
27324	3		case 12:
27325			default:
27326	3		c->impl = deflate_compress_near_optimal;
27327	3		c->max_search_depth = 300;
27328	3		c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
27329	3		c->p.n.max_optim_passes = 10;
27330	3		c->p.n.min_improvement_to_continue = 1;
27331	3		c->p.n.min_bits_to_use_nonfinal_path = 1;
27332	3		c->p.n.max_len_to_optimize_static_block = 10000;
27333	3		deflate_init_offset_slot_full(c);
27334	3		break;
27335			#endif
27336			}
27337
27338	37		deflate_init_static_codes(c);
27339
27340	37		return c;
27341			}
27342
27343
27344			LIBDEFLATEAPI struct libdeflate_compressor *
27345	37		libdeflate_alloc_compressor(int compression_level)
27346			{
27347			static const struct libdeflate_options defaults = {
27348			.sizeof_options = sizeof(defaults),
27349			};
27350	37		return libdeflate_alloc_compressor_ex(compression_level, &defaults);
27351			}
27352
27353			LIBDEFLATEAPI size_t
27354	37		libdeflate_deflate_compress(struct libdeflate_compressor *c,
27355			const void *in, size_t in_nbytes,
27356			void *out, size_t out_nbytes_avail)
27357			{
27358			struct deflate_output_bitstream os;
27359
27360
27361	37	50	if (unlikely(in_nbytes <= c->max_passthrough_size))
27362	0		return deflate_compress_none(in, in_nbytes,
27363			out, out_nbytes_avail);
27364
27365
27366	37		os.bitbuf = 0;
27367	37		os.bitcount = 0;
27368	37		os.next = out;
27369	37		os.end = os.next + out_nbytes_avail;
27370	37		os.overflow = false;
27371
27372
27373	37		(*c->impl)(c, in, in_nbytes, &os);
27374
27375
27376	37	50	if (os.overflow)
27377	0		return 0;
27378
27379
27380	37		ASSERT(os.bitcount <= 7);
27381	37	50	if (os.bitcount) {
27382	37		ASSERT(os.next < os.end);
27383	37		*os.next++ = os.bitbuf;
27384			}
27385
27386
27387	37		return os.next - (u8 *)out;
27388			}
27389
27390			LIBDEFLATEAPI void
27391	37		libdeflate_free_compressor(struct libdeflate_compressor *c)
27392			{
27393	37	50	if (c)
27394	37		libdeflate_aligned_free(c->free_func, c);
27395	37		}
27396
27397			unsigned int
27398	25		libdeflate_get_compression_level(struct libdeflate_compressor *c)
27399			{
27400	25		return c->compression_level;
27401			}
27402
27403			LIBDEFLATEAPI size_t
27404	37		libdeflate_deflate_compress_bound(struct libdeflate_compressor *c,
27405			size_t in_nbytes)
27406			{
27407			size_t max_blocks;
27408
27409
27410
27411
27412			STATIC_ASSERT(2 * MIN_BLOCK_LENGTH <= UINT16_MAX);
27413	37	50	max_blocks = MAX(DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH), 1);
27414
27415
27416	37		return (5 * max_blocks) + in_nbytes;
27417			}
27418			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/deflate_decompress.c */
27419
27420
27421			/* #include "lib_common.h" */
27422
27423
27424			#ifndef LIB_LIB_COMMON_H
27425			#define LIB_LIB_COMMON_H
27426
27427			#ifdef LIBDEFLATE_H
27428
27429			# error "lib_common.h must always be included before libdeflate.h"
27430			#endif
27431
27432			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
27433			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
27434			#elif defined(__GNUC__)
27435			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
27436			#else
27437			# define LIBDEFLATE_EXPORT_SYM
27438			#endif
27439
27440
27441			#if defined(__GNUC__) && defined(__i386__)
27442			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
27443			#else
27444			# define LIBDEFLATE_ALIGN_STACK
27445			#endif
27446
27447			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
27448
27449			/* #include "../common_defs.h" */
27450
27451
27452			#ifndef COMMON_DEFS_H
27453			#define COMMON_DEFS_H
27454
27455			/* #include "libdeflate.h" */
27456
27457
27458			#ifndef LIBDEFLATE_H
27459			#define LIBDEFLATE_H
27460
27461			#include
27462			#include
27463
27464			#ifdef __cplusplus
27465			extern "C" {
27466			#endif
27467
27468			#define LIBDEFLATE_VERSION_MAJOR 1
27469			#define LIBDEFLATE_VERSION_MINOR 25
27470			#define LIBDEFLATE_VERSION_STRING "1.25"
27471
27472
27473			#ifndef LIBDEFLATEAPI
27474			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
27475			# define LIBDEFLATEAPI __declspec(dllimport)
27476			# else
27477			# define LIBDEFLATEAPI
27478			# endif
27479			#endif
27480
27481
27482
27483
27484
27485			struct libdeflate_compressor;
27486			struct libdeflate_options;
27487
27488
27489			LIBDEFLATEAPI struct libdeflate_compressor *
27490			libdeflate_alloc_compressor(int compression_level);
27491
27492
27493			LIBDEFLATEAPI struct libdeflate_compressor *
27494			libdeflate_alloc_compressor_ex(int compression_level,
27495			const struct libdeflate_options *options);
27496
27497
27498			LIBDEFLATEAPI size_t
27499			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
27500			const void *in, size_t in_nbytes,
27501			void *out, size_t out_nbytes_avail);
27502
27503
27504			LIBDEFLATEAPI size_t
27505			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
27506			size_t in_nbytes);
27507
27508
27509			LIBDEFLATEAPI size_t
27510			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
27511			const void *in, size_t in_nbytes,
27512			void *out, size_t out_nbytes_avail);
27513
27514
27515			LIBDEFLATEAPI size_t
27516			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
27517			size_t in_nbytes);
27518
27519
27520			LIBDEFLATEAPI size_t
27521			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
27522			const void *in, size_t in_nbytes,
27523			void *out, size_t out_nbytes_avail);
27524
27525
27526			LIBDEFLATEAPI size_t
27527			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
27528			size_t in_nbytes);
27529
27530
27531			LIBDEFLATEAPI void
27532			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
27533
27534
27535
27536
27537
27538			struct libdeflate_decompressor;
27539			struct libdeflate_options;
27540
27541
27542			LIBDEFLATEAPI struct libdeflate_decompressor *
27543			libdeflate_alloc_decompressor(void);
27544
27545
27546			LIBDEFLATEAPI struct libdeflate_decompressor *
27547			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
27548
27549
27550			enum libdeflate_result {
27551
27552			LIBDEFLATE_SUCCESS = 0,
27553
27554
27555			LIBDEFLATE_BAD_DATA = 1,
27556
27557
27558			LIBDEFLATE_SHORT_OUTPUT = 2,
27559
27560
27561			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
27562			};
27563
27564
27565			LIBDEFLATEAPI enum libdeflate_result
27566			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
27567			const void *in, size_t in_nbytes,
27568			void *out, size_t out_nbytes_avail,
27569			size_t *actual_out_nbytes_ret);
27570
27571
27572			LIBDEFLATEAPI enum libdeflate_result
27573			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
27574			const void *in, size_t in_nbytes,
27575			void *out, size_t out_nbytes_avail,
27576			size_t *actual_in_nbytes_ret,
27577			size_t *actual_out_nbytes_ret);
27578
27579
27580			LIBDEFLATEAPI enum libdeflate_result
27581			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
27582			const void *in, size_t in_nbytes,
27583			void *out, size_t out_nbytes_avail,
27584			size_t *actual_out_nbytes_ret);
27585
27586
27587			LIBDEFLATEAPI enum libdeflate_result
27588			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
27589			const void *in, size_t in_nbytes,
27590			void *out, size_t out_nbytes_avail,
27591			size_t *actual_in_nbytes_ret,
27592			size_t *actual_out_nbytes_ret);
27593
27594
27595			LIBDEFLATEAPI enum libdeflate_result
27596			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
27597			const void *in, size_t in_nbytes,
27598			void *out, size_t out_nbytes_avail,
27599			size_t *actual_out_nbytes_ret);
27600
27601
27602			LIBDEFLATEAPI enum libdeflate_result
27603			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
27604			const void *in, size_t in_nbytes,
27605			void *out, size_t out_nbytes_avail,
27606			size_t *actual_in_nbytes_ret,
27607			size_t *actual_out_nbytes_ret);
27608
27609
27610			LIBDEFLATEAPI void
27611			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
27612
27613
27614
27615
27616
27617
27618			LIBDEFLATEAPI uint32_t
27619			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
27620
27621
27622
27623			LIBDEFLATEAPI uint32_t
27624			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
27625
27626
27627
27628
27629
27630
27631			LIBDEFLATEAPI void
27632			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
27633			void (free_func)(void ));
27634
27635
27636			struct libdeflate_options {
27637
27638
27639			size_t sizeof_options;
27640
27641
27642			void (malloc_func)(size_t);
27643			void (free_func)(void );
27644			};
27645
27646			#ifdef __cplusplus
27647			}
27648			#endif
27649
27650			#endif
27651
27652
27653			#include
27654			#include
27655			#include
27656			#ifdef _MSC_VER
27657			# include
27658			# include
27659
27660
27661			# pragma warning(disable : 4146)
27662
27663			# pragma warning(disable : 4018)
27664			# pragma warning(disable : 4244)
27665			# pragma warning(disable : 4267)
27666			# pragma warning(disable : 4310)
27667
27668			# pragma warning(disable : 4100)
27669			# pragma warning(disable : 4127)
27670			# pragma warning(disable : 4189)
27671			# pragma warning(disable : 4232)
27672			# pragma warning(disable : 4245)
27673			# pragma warning(disable : 4295)
27674			#endif
27675			#ifndef FREESTANDING
27676			# include
27677			#endif
27678
27679
27680
27681
27682
27683
27684			#undef ARCH_X86_64
27685			#undef ARCH_X86_32
27686			#undef ARCH_ARM64
27687			#undef ARCH_ARM32
27688			#undef ARCH_RISCV
27689			#ifdef _MSC_VER
27690
27691			# if defined(_M_X64) && !defined(_M_ARM64EC)
27692			# define ARCH_X86_64
27693			# elif defined(_M_IX86)
27694			# define ARCH_X86_32
27695			# elif defined(_M_ARM64)
27696			# define ARCH_ARM64
27697			# elif defined(_M_ARM)
27698			# define ARCH_ARM32
27699			# endif
27700			#else
27701			# if defined(__x86_64__)
27702			# define ARCH_X86_64
27703			# elif defined(__i386__)
27704			# define ARCH_X86_32
27705			# elif defined(__aarch64__)
27706			# define ARCH_ARM64
27707			# elif defined(__arm__)
27708			# define ARCH_ARM32
27709			# elif defined(__riscv)
27710			# define ARCH_RISCV
27711			# endif
27712			#endif
27713
27714
27715
27716
27717
27718
27719			typedef uint8_t u8;
27720			typedef uint16_t u16;
27721			typedef uint32_t u32;
27722			typedef uint64_t u64;
27723			typedef int8_t s8;
27724			typedef int16_t s16;
27725			typedef int32_t s32;
27726			typedef int64_t s64;
27727
27728
27729			#ifdef _MSC_VER
27730			# ifdef _WIN64
27731			typedef long long ssize_t;
27732			# else
27733			typedef long ssize_t;
27734			# endif
27735			#endif
27736
27737
27738			typedef size_t machine_word_t;
27739
27740
27741			#define WORDBYTES ((int)sizeof(machine_word_t))
27742
27743
27744			#define WORDBITS (8 * WORDBYTES)
27745
27746
27747
27748
27749
27750
27751			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
27752			# define GCC_PREREQ(major, minor) \
27753			(__GNUC__ > (major) \|\| \
27754			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
27755			# if !GCC_PREREQ(4, 9)
27756			# error "gcc versions older than 4.9 are no longer supported"
27757			# endif
27758			#else
27759			# define GCC_PREREQ(major, minor) 0
27760			#endif
27761			#ifdef __clang__
27762			# ifdef __apple_build_version__
27763			# define CLANG_PREREQ(major, minor, apple_version) \
27764			(__apple_build_version__ >= (apple_version))
27765			# else
27766			# define CLANG_PREREQ(major, minor, apple_version) \
27767			(__clang_major__ > (major) \|\| \
27768			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
27769			# endif
27770			# if !CLANG_PREREQ(3, 9, 8000000)
27771			# error "clang versions older than 3.9 are no longer supported"
27772			# endif
27773			#else
27774			# define CLANG_PREREQ(major, minor, apple_version) 0
27775			#endif
27776			#ifdef _MSC_VER
27777			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
27778			# if !MSVC_PREREQ(1900)
27779			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
27780			# endif
27781			#else
27782			# define MSVC_PREREQ(version) 0
27783			#endif
27784
27785
27786			#ifndef __has_attribute
27787			# define __has_attribute(attribute) 0
27788			#endif
27789
27790
27791			#ifndef __has_builtin
27792			# define __has_builtin(builtin) 0
27793			#endif
27794
27795
27796			#ifdef _MSC_VER
27797			# define inline __inline
27798			#endif
27799
27800
27801			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
27802			# define forceinline inline __attribute__((always_inline))
27803			#elif defined(_MSC_VER)
27804			# define forceinline __forceinline
27805			#else
27806			# define forceinline inline
27807			#endif
27808
27809
27810			#if defined(__GNUC__) \|\| __has_attribute(unused)
27811			# define MAYBE_UNUSED __attribute__((unused))
27812			#else
27813			# define MAYBE_UNUSED
27814			#endif
27815
27816
27817			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
27818			# define NORETURN __attribute__((noreturn))
27819			#else
27820			# define NORETURN
27821			#endif
27822
27823
27824			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
27825			# if defined(__GNUC__) \|\| defined(__clang__)
27826			# define restrict __restrict__
27827			# else
27828			# define restrict
27829			# endif
27830			#endif
27831
27832
27833			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
27834			# define likely(expr) __builtin_expect(!!(expr), 1)
27835			#else
27836			# define likely(expr) (expr)
27837			#endif
27838
27839
27840			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
27841			# define unlikely(expr) __builtin_expect(!!(expr), 0)
27842			#else
27843			# define unlikely(expr) (expr)
27844			#endif
27845
27846
27847			#undef prefetchr
27848			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
27849			# define prefetchr(addr) __builtin_prefetch((addr), 0)
27850			#elif defined(_MSC_VER)
27851			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
27852			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
27853			# elif defined(ARCH_ARM64)
27854			# define prefetchr(addr) __prefetch2((addr), 0x00 )
27855			# elif defined(ARCH_ARM32)
27856			# define prefetchr(addr) __prefetch(addr)
27857			# endif
27858			#endif
27859			#ifndef prefetchr
27860			# define prefetchr(addr)
27861			#endif
27862
27863
27864			#undef prefetchw
27865			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
27866			# define prefetchw(addr) __builtin_prefetch((addr), 1)
27867			#elif defined(_MSC_VER)
27868			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
27869			# define prefetchw(addr) _m_prefetchw(addr)
27870			# elif defined(ARCH_ARM64)
27871			# define prefetchw(addr) __prefetch2((addr), 0x10 )
27872			# elif defined(ARCH_ARM32)
27873			# define prefetchw(addr) __prefetchw(addr)
27874			# endif
27875			#endif
27876			#ifndef prefetchw
27877			# define prefetchw(addr)
27878			#endif
27879
27880
27881			#undef _aligned_attribute
27882			#if defined(__GNUC__) \|\| __has_attribute(aligned)
27883			# define _aligned_attribute(n) __attribute__((aligned(n)))
27884			#elif defined(_MSC_VER)
27885			# define _aligned_attribute(n) __declspec(align(n))
27886			#endif
27887
27888
27889			#if defined(__GNUC__) \|\| __has_attribute(target)
27890			# define _target_attribute(attrs) __attribute__((target(attrs)))
27891			#else
27892			# define _target_attribute(attrs)
27893			#endif
27894
27895
27896
27897
27898
27899			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
27900			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
27901			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
27902			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
27903			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
27904			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
27905			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
27906
27907
27908
27909
27910
27911
27912			#if defined(__BYTE_ORDER__)
27913			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
27914			#elif defined(_MSC_VER)
27915			# define CPU_IS_LITTLE_ENDIAN() true
27916			#else
27917			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
27918			{
27919			union {
27920			u32 w;
27921			u8 b;
27922			} u;
27923
27924			u.w = 1;
27925			return u.b;
27926			}
27927			#endif
27928
27929
27930			static forceinline u16 bswap16(u16 v)
27931			{
27932			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
27933			return __builtin_bswap16(v);
27934			#elif defined(_MSC_VER)
27935			return _byteswap_ushort(v);
27936			#else
27937			return (v << 8) \| (v >> 8);
27938			#endif
27939			}
27940
27941
27942			static forceinline u32 bswap32(u32 v)
27943			{
27944			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
27945			return __builtin_bswap32(v);
27946			#elif defined(_MSC_VER)
27947			return _byteswap_ulong(v);
27948			#else
27949			return ((v & 0x000000FF) << 24) \|
27950			((v & 0x0000FF00) << 8) \|
27951			((v & 0x00FF0000) >> 8) \|
27952			((v & 0xFF000000) >> 24);
27953			#endif
27954			}
27955
27956
27957			static forceinline u64 bswap64(u64 v)
27958			{
27959			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
27960			return __builtin_bswap64(v);
27961			#elif defined(_MSC_VER)
27962			return _byteswap_uint64(v);
27963			#else
27964			return ((v & 0x00000000000000FF) << 56) \|
27965			((v & 0x000000000000FF00) << 40) \|
27966			((v & 0x0000000000FF0000) << 24) \|
27967			((v & 0x00000000FF000000) << 8) \|
27968			((v & 0x000000FF00000000) >> 8) \|
27969			((v & 0x0000FF0000000000) >> 24) \|
27970			((v & 0x00FF000000000000) >> 40) \|
27971			((v & 0xFF00000000000000) >> 56);
27972			#endif
27973			}
27974
27975			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
27976			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
27977			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
27978			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
27979			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
27980			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
27981
27982
27983
27984
27985
27986
27987			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
27988			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
27989			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
27990			defined(__riscv_misaligned_fast) \|\| \
27991			defined(__wasm__))
27992			# define UNALIGNED_ACCESS_IS_FAST 1
27993			#elif defined(_MSC_VER)
27994			# define UNALIGNED_ACCESS_IS_FAST 1
27995			#else
27996			# define UNALIGNED_ACCESS_IS_FAST 0
27997			#endif
27998
27999
28000
28001			#ifdef FREESTANDING
28002			# define MEMCOPY __builtin_memcpy
28003			#else
28004			# define MEMCOPY memcpy
28005			#endif
28006
28007
28008
28009			#define DEFINE_UNALIGNED_TYPE(type) \
28010			static forceinline type \
28011			load_##type##_unaligned(const void *p) \
28012			{ \
28013			type v; \
28014			\
28015			MEMCOPY(&v, p, sizeof(v)); \
28016			return v; \
28017			} \
28018			\
28019			static forceinline void \
28020			store_##type##_unaligned(type v, void *p) \
28021			{ \
28022			MEMCOPY(p, &v, sizeof(v)); \
28023			}
28024
28025			DEFINE_UNALIGNED_TYPE(u16)
28026			DEFINE_UNALIGNED_TYPE(u32)
28027			DEFINE_UNALIGNED_TYPE(u64)
28028			DEFINE_UNALIGNED_TYPE(machine_word_t)
28029
28030			#undef MEMCOPY
28031
28032			#define load_word_unaligned load_machine_word_t_unaligned
28033			#define store_word_unaligned store_machine_word_t_unaligned
28034
28035
28036
28037			static forceinline u16
28038			get_unaligned_le16(const u8 *p)
28039			{
28040			if (UNALIGNED_ACCESS_IS_FAST)
28041			return le16_bswap(load_u16_unaligned(p));
28042			else
28043			return ((u16)p[1] << 8) \| p[0];
28044			}
28045
28046			static forceinline u16
28047			get_unaligned_be16(const u8 *p)
28048			{
28049			if (UNALIGNED_ACCESS_IS_FAST)
28050			return be16_bswap(load_u16_unaligned(p));
28051			else
28052			return ((u16)p[0] << 8) \| p[1];
28053			}
28054
28055			static forceinline u32
28056			get_unaligned_le32(const u8 *p)
28057			{
28058			if (UNALIGNED_ACCESS_IS_FAST)
28059			return le32_bswap(load_u32_unaligned(p));
28060			else
28061			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
28062			((u32)p[1] << 8) \| p[0];
28063			}
28064
28065			static forceinline u32
28066			get_unaligned_be32(const u8 *p)
28067			{
28068			if (UNALIGNED_ACCESS_IS_FAST)
28069			return be32_bswap(load_u32_unaligned(p));
28070			else
28071			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
28072			((u32)p[2] << 8) \| p[3];
28073			}
28074
28075			static forceinline u64
28076			get_unaligned_le64(const u8 *p)
28077			{
28078			if (UNALIGNED_ACCESS_IS_FAST)
28079			return le64_bswap(load_u64_unaligned(p));
28080			else
28081			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
28082			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
28083			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
28084			((u64)p[1] << 8) \| p[0];
28085			}
28086
28087			static forceinline machine_word_t
28088			get_unaligned_leword(const u8 *p)
28089			{
28090			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
28091			if (WORDBITS == 32)
28092			return get_unaligned_le32(p);
28093			else
28094			return get_unaligned_le64(p);
28095			}
28096
28097
28098
28099			static forceinline void
28100			put_unaligned_le16(u16 v, u8 *p)
28101			{
28102			if (UNALIGNED_ACCESS_IS_FAST) {
28103			store_u16_unaligned(le16_bswap(v), p);
28104			} else {
28105			p[0] = (u8)(v >> 0);
28106			p[1] = (u8)(v >> 8);
28107			}
28108			}
28109
28110			static forceinline void
28111			put_unaligned_be16(u16 v, u8 *p)
28112			{
28113			if (UNALIGNED_ACCESS_IS_FAST) {
28114			store_u16_unaligned(be16_bswap(v), p);
28115			} else {
28116			p[0] = (u8)(v >> 8);
28117			p[1] = (u8)(v >> 0);
28118			}
28119			}
28120
28121			static forceinline void
28122			put_unaligned_le32(u32 v, u8 *p)
28123			{
28124			if (UNALIGNED_ACCESS_IS_FAST) {
28125			store_u32_unaligned(le32_bswap(v), p);
28126			} else {
28127			p[0] = (u8)(v >> 0);
28128			p[1] = (u8)(v >> 8);
28129			p[2] = (u8)(v >> 16);
28130			p[3] = (u8)(v >> 24);
28131			}
28132			}
28133
28134			static forceinline void
28135			put_unaligned_be32(u32 v, u8 *p)
28136			{
28137			if (UNALIGNED_ACCESS_IS_FAST) {
28138			store_u32_unaligned(be32_bswap(v), p);
28139			} else {
28140			p[0] = (u8)(v >> 24);
28141			p[1] = (u8)(v >> 16);
28142			p[2] = (u8)(v >> 8);
28143			p[3] = (u8)(v >> 0);
28144			}
28145			}
28146
28147			static forceinline void
28148			put_unaligned_le64(u64 v, u8 *p)
28149			{
28150			if (UNALIGNED_ACCESS_IS_FAST) {
28151			store_u64_unaligned(le64_bswap(v), p);
28152			} else {
28153			p[0] = (u8)(v >> 0);
28154			p[1] = (u8)(v >> 8);
28155			p[2] = (u8)(v >> 16);
28156			p[3] = (u8)(v >> 24);
28157			p[4] = (u8)(v >> 32);
28158			p[5] = (u8)(v >> 40);
28159			p[6] = (u8)(v >> 48);
28160			p[7] = (u8)(v >> 56);
28161			}
28162			}
28163
28164			static forceinline void
28165			put_unaligned_leword(machine_word_t v, u8 *p)
28166			{
28167			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
28168			if (WORDBITS == 32)
28169			put_unaligned_le32(v, p);
28170			else
28171			put_unaligned_le64(v, p);
28172			}
28173
28174
28175
28176
28177
28178
28179
28180			static forceinline unsigned
28181			bsr32(u32 v)
28182			{
28183			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
28184			return 31 - __builtin_clz(v);
28185			#elif defined(_MSC_VER)
28186			unsigned long i;
28187
28188			_BitScanReverse(&i, v);
28189			return i;
28190			#else
28191			unsigned i = 0;
28192
28193			while ((v >>= 1) != 0)
28194			i++;
28195			return i;
28196			#endif
28197			}
28198
28199			static forceinline unsigned
28200			bsr64(u64 v)
28201			{
28202			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
28203			return 63 - __builtin_clzll(v);
28204			#elif defined(_MSC_VER) && defined(_WIN64)
28205			unsigned long i;
28206
28207			_BitScanReverse64(&i, v);
28208			return i;
28209			#else
28210			unsigned i = 0;
28211
28212			while ((v >>= 1) != 0)
28213			i++;
28214			return i;
28215			#endif
28216			}
28217
28218			static forceinline unsigned
28219			bsrw(machine_word_t v)
28220			{
28221			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
28222			if (WORDBITS == 32)
28223			return bsr32(v);
28224			else
28225			return bsr64(v);
28226			}
28227
28228
28229
28230			static forceinline unsigned
28231			bsf32(u32 v)
28232			{
28233			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
28234			return __builtin_ctz(v);
28235			#elif defined(_MSC_VER)
28236			unsigned long i;
28237
28238			_BitScanForward(&i, v);
28239			return i;
28240			#else
28241			unsigned i = 0;
28242
28243			for (; (v & 1) == 0; v >>= 1)
28244			i++;
28245			return i;
28246			#endif
28247			}
28248
28249			static forceinline unsigned
28250			bsf64(u64 v)
28251			{
28252			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
28253			return __builtin_ctzll(v);
28254			#elif defined(_MSC_VER) && defined(_WIN64)
28255			unsigned long i;
28256
28257			_BitScanForward64(&i, v);
28258			return i;
28259			#else
28260			unsigned i = 0;
28261
28262			for (; (v & 1) == 0; v >>= 1)
28263			i++;
28264			return i;
28265			#endif
28266			}
28267
28268			static forceinline unsigned
28269			bsfw(machine_word_t v)
28270			{
28271			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
28272			if (WORDBITS == 32)
28273			return bsf32(v);
28274			else
28275			return bsf64(v);
28276			}
28277
28278
28279			#undef rbit32
28280			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
28281			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
28282			static forceinline u32
28283			rbit32(u32 v)
28284			{
28285			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
28286			return v;
28287			}
28288			#define rbit32 rbit32
28289			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
28290			static forceinline u32
28291			rbit32(u32 v)
28292			{
28293			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
28294			return v;
28295			}
28296			#define rbit32 rbit32
28297			#endif
28298
28299			#endif
28300
28301
28302			typedef void (malloc_func_t)(size_t);
28303			typedef void (free_func_t)(void );
28304
28305			extern malloc_func_t libdeflate_default_malloc_func;
28306			extern free_func_t libdeflate_default_free_func;
28307
28308			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
28309			size_t alignment, size_t size);
28310			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
28311
28312			#ifdef FREESTANDING
28313
28314			void memset(void s, int c, size_t n);
28315			#define memset(s, c, n) __builtin_memset((s), (c), (n))
28316
28317			void memcpy(void dest, const void *src, size_t n);
28318			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
28319
28320			void memmove(void dest, const void *src, size_t n);
28321			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
28322
28323			int memcmp(const void s1, const void s2, size_t n);
28324			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
28325
28326			#undef LIBDEFLATE_ENABLE_ASSERTIONS
28327			#else
28328			# include
28329
28330			# ifdef __clang_analyzer__
28331			# define LIBDEFLATE_ENABLE_ASSERTIONS
28332			# endif
28333			#endif
28334
28335
28336			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
28337			NORETURN void
28338			libdeflate_assertion_failed(const char expr, const char file, int line);
28339			#define ASSERT(expr) { if (unlikely(!(expr))) \
28340			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
28341			#else
28342			#define ASSERT(expr) (void)(expr)
28343			#endif
28344
28345			#define CONCAT_IMPL(a, b) a##b
28346			#define CONCAT(a, b) CONCAT_IMPL(a, b)
28347			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
28348
28349			#endif
28350
28351			/* #include "deflate_constants.h" */
28352
28353
28354			#ifndef LIB_DEFLATE_CONSTANTS_H
28355			#define LIB_DEFLATE_CONSTANTS_H
28356
28357
28358			#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0
28359			#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1
28360			#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2
28361
28362
28363			#define DEFLATE_MIN_MATCH_LEN 3
28364			#define DEFLATE_MAX_MATCH_LEN 258
28365
28366
28367			#define DEFLATE_MAX_MATCH_OFFSET 32768
28368
28369
28370			#define DEFLATE_WINDOW_ORDER 15
28371
28372
28373			#define DEFLATE_NUM_PRECODE_SYMS 19
28374			#define DEFLATE_NUM_LITLEN_SYMS 288
28375			#define DEFLATE_NUM_OFFSET_SYMS 32
28376
28377
28378			#define DEFLATE_MAX_NUM_SYMS 288
28379
28380
28381			#define DEFLATE_NUM_LITERALS 256
28382			#define DEFLATE_END_OF_BLOCK 256
28383			#define DEFLATE_FIRST_LEN_SYM 257
28384
28385
28386			#define DEFLATE_MAX_PRE_CODEWORD_LEN 7
28387			#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15
28388			#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15
28389
28390
28391			#define DEFLATE_MAX_CODEWORD_LEN 15
28392
28393
28394			#define DEFLATE_MAX_LENS_OVERRUN 137
28395
28396
28397			#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5
28398			#define DEFLATE_MAX_EXTRA_OFFSET_BITS 13
28399
28400			#endif
28401
28402
28403
28404			#if 0
28405			# pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
28406			# define SAFETY_CHECK(expr) (void)(expr)
28407			#else
28408			# define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
28409			#endif
28410
28411
28412
28413
28414
28415
28416			/* typedef machine_word_t bitbuf_t; */
28417			#define DECOMPRESS_BITBUF_NBITS (8 * (int)sizeof(bitbuf_t))
28418
28419
28420			#define BITMASK(n) (((bitbuf_t)1 << (n)) - 1)
28421
28422
28423			#define MAX_BITSLEFT \
28424			(UNALIGNED_ACCESS_IS_FAST ? DECOMPRESS_BITBUF_NBITS - 1 : DECOMPRESS_BITBUF_NBITS)
28425
28426
28427			#define CONSUMABLE_NBITS (MAX_BITSLEFT - 7)
28428
28429
28430			#define FASTLOOP_PRELOADABLE_NBITS \
28431			(UNALIGNED_ACCESS_IS_FAST ? DECOMPRESS_BITBUF_NBITS : CONSUMABLE_NBITS)
28432
28433
28434			#define PRELOAD_SLACK MAX(0, FASTLOOP_PRELOADABLE_NBITS - MAX_BITSLEFT)
28435
28436
28437			#define CAN_CONSUME(n) (CONSUMABLE_NBITS >= (n))
28438
28439
28440			#define CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) \
28441			(CONSUMABLE_NBITS >= (consume_nbits) && \
28442			FASTLOOP_PRELOADABLE_NBITS >= (consume_nbits) + (preload_nbits))
28443
28444
28445			#define REFILL_BITS_BRANCHLESS() \
28446			do { \
28447			bitbuf \|= get_unaligned_leword(in_next) << (u8)bitsleft; \
28448			in_next += sizeof(bitbuf_t) - 1; \
28449			in_next -= (bitsleft >> 3) & 0x7; \
28450			bitsleft \|= MAX_BITSLEFT & ~7; \
28451			} while (0)
28452
28453
28454			#define REFILL_BITS() \
28455			do { \
28456			if (UNALIGNED_ACCESS_IS_FAST && \
28457			likely(in_end - in_next >= sizeof(bitbuf_t))) { \
28458			REFILL_BITS_BRANCHLESS(); \
28459			} else { \
28460			while ((u8)bitsleft < CONSUMABLE_NBITS) { \
28461			if (likely(in_next != in_end)) { \
28462			bitbuf \|= (bitbuf_t)*in_next++ << \
28463			(u8)bitsleft; \
28464			} else { \
28465			overread_count++; \
28466			SAFETY_CHECK(overread_count <= \
28467			sizeof(bitbuf_t)); \
28468			} \
28469			bitsleft += 8; \
28470			} \
28471			} \
28472			} while (0)
28473
28474
28475			#define REFILL_BITS_IN_FASTLOOP() \
28476			do { \
28477			STATIC_ASSERT(UNALIGNED_ACCESS_IS_FAST \|\| \
28478			FASTLOOP_PRELOADABLE_NBITS == CONSUMABLE_NBITS); \
28479			if (UNALIGNED_ACCESS_IS_FAST) { \
28480			REFILL_BITS_BRANCHLESS(); \
28481			} else { \
28482			while ((u8)bitsleft < CONSUMABLE_NBITS) { \
28483			bitbuf \|= (bitbuf_t)*in_next++ << (u8)bitsleft; \
28484			bitsleft += 8; \
28485			} \
28486			} \
28487			} while (0)
28488
28489
28490			#define FASTLOOP_MAX_BYTES_WRITTEN \
28491			(2 + DEFLATE_MAX_MATCH_LEN + (5 * WORDBYTES) - 1)
28492
28493
28494			#define FASTLOOP_MAX_BYTES_READ \
28495			(DIV_ROUND_UP(MAX_BITSLEFT + (2 * LITLEN_TABLEBITS) + \
28496			LENGTH_MAXBITS + OFFSET_MAXBITS, 8) + \
28497			sizeof(bitbuf_t))
28498
28499
28500
28501
28502
28503
28504
28505			#define PRECODE_TABLEBITS 7
28506			#define PRECODE_ENOUGH 128
28507			#define LITLEN_TABLEBITS 11
28508			#define LITLEN_ENOUGH 2342
28509			#define OFFSET_TABLEBITS 8
28510			#define OFFSET_ENOUGH 402
28511
28512
28513			static forceinline u32
28514			make_decode_table_entry(const u32 decode_results[], u32 sym, u32 len)
28515			{
28516	0		return decode_results[sym] + (len << 8) + len;
28517			}
28518
28519
28520			static const u32 precode_decode_results[] = {
28521			#define ENTRY(presym) ((u32)presym << 16)
28522			ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
28523			ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
28524			ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
28525			ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
28526			ENTRY(16) , ENTRY(17) , ENTRY(18) ,
28527			#undef ENTRY
28528			};
28529
28530
28531
28532
28533			#define HUFFDEC_LITERAL 0x80000000
28534
28535
28536			#define HUFFDEC_EXCEPTIONAL 0x00008000
28537
28538
28539			#define HUFFDEC_SUBTABLE_POINTER 0x00004000
28540
28541
28542			#define HUFFDEC_END_OF_BLOCK 0x00002000
28543
28544
28545			#define LENGTH_MAXBITS (DEFLATE_MAX_LITLEN_CODEWORD_LEN + \
28546			DEFLATE_MAX_EXTRA_LENGTH_BITS)
28547			#define LENGTH_MAXFASTBITS (LITLEN_TABLEBITS + \
28548			DEFLATE_MAX_EXTRA_LENGTH_BITS)
28549
28550
28551			static const u32 litlen_decode_results[] = {
28552
28553
28554			#define ENTRY(literal) (HUFFDEC_LITERAL \| ((u32)literal << 16))
28555			ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) ,
28556			ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) ,
28557			ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) ,
28558			ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) ,
28559			ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) ,
28560			ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) ,
28561			ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) ,
28562			ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) ,
28563			ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) ,
28564			ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) ,
28565			ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) ,
28566			ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) ,
28567			ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) ,
28568			ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) ,
28569			ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) ,
28570			ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) ,
28571			ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) ,
28572			ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) ,
28573			ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) ,
28574			ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) ,
28575			ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) ,
28576			ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) ,
28577			ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) ,
28578			ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) ,
28579			ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) ,
28580			ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
28581			ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
28582			ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
28583			ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
28584			ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
28585			ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
28586			ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
28587			ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
28588			ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
28589			ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
28590			ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
28591			ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
28592			ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
28593			ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
28594			ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
28595			ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
28596			ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
28597			ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
28598			ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
28599			ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
28600			ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
28601			ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
28602			ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
28603			ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
28604			ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
28605			ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
28606			ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
28607			ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
28608			ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
28609			ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
28610			ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
28611			ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
28612			ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
28613			ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
28614			ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
28615			ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
28616			ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
28617			ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
28618			ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
28619			#undef ENTRY
28620
28621
28622			HUFFDEC_EXCEPTIONAL \| HUFFDEC_END_OF_BLOCK,
28623
28624
28625			#define ENTRY(length_base, num_extra_bits) \
28626			(((u32)(length_base) << 16) \| (num_extra_bits))
28627			ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0),
28628			ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0),
28629			ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
28630			ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
28631			ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
28632			ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
28633			ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
28634			ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
28635			#undef ENTRY
28636			};
28637
28638
28639			#define OFFSET_MAXBITS (DEFLATE_MAX_OFFSET_CODEWORD_LEN + \
28640			DEFLATE_MAX_EXTRA_OFFSET_BITS)
28641			#define OFFSET_MAXFASTBITS (OFFSET_TABLEBITS + \
28642			DEFLATE_MAX_EXTRA_OFFSET_BITS)
28643
28644
28645			static const u32 offset_decode_results[] = {
28646			#define ENTRY(offset_base, num_extra_bits) \
28647			(((u32)(offset_base) << 16) \| (num_extra_bits))
28648			ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) ,
28649			ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) ,
28650			ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) ,
28651			ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) ,
28652			ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) ,
28653			ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) ,
28654			ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) ,
28655			ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) ,
28656			#undef ENTRY
28657			};
28658
28659
28660			struct libdeflate_decompressor {
28661
28662
28663
28664			union {
28665			u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
28666
28667			struct {
28668			u8 lens[DEFLATE_NUM_LITLEN_SYMS +
28669			DEFLATE_NUM_OFFSET_SYMS +
28670			DEFLATE_MAX_LENS_OVERRUN];
28671
28672			u32 precode_decode_table[PRECODE_ENOUGH];
28673			} l;
28674
28675			u32 litlen_decode_table[LITLEN_ENOUGH];
28676			} u;
28677
28678			u32 offset_decode_table[OFFSET_ENOUGH];
28679
28680
28681			u16 sorted_syms[DEFLATE_MAX_NUM_SYMS];
28682
28683			bool static_codes_loaded;
28684			unsigned litlen_tablebits;
28685
28686
28687			free_func_t free_func;
28688			};
28689
28690
28691			static bool
28692	87		build_decode_table(u32 decode_table[],
28693			const u8 lens[],
28694			const unsigned num_syms,
28695			const u32 decode_results[],
28696			unsigned table_bits,
28697			unsigned max_codeword_len,
28698			u16 *sorted_syms,
28699			unsigned *table_bits_ret)
28700			{
28701			unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1];
28702			unsigned offsets[DEFLATE_MAX_CODEWORD_LEN + 1];
28703			unsigned sym;
28704			unsigned codeword;
28705			unsigned len;
28706			unsigned count;
28707			u32 codespace_used;
28708			unsigned cur_table_end;
28709			unsigned subtable_prefix;
28710			unsigned subtable_start;
28711			unsigned subtable_bits;
28712
28713
28714	1439	100	for (len = 0; len <= max_codeword_len; len++)
28715	1352		len_counts[len] = 0;
28716	13235	100	for (sym = 0; sym < num_syms; sym++)
28717	13148		len_counts[lens[sym]]++;
28718
28719
28720	715	50	while (max_codeword_len > 1 && len_counts[max_codeword_len] == 0)
		100
28721	628		max_codeword_len--;
28722	87	100	if (table_bits_ret != NULL) {
28723	41		table_bits = MIN(table_bits, max_codeword_len);
28724	41		*table_bits_ret = table_bits;
28725			}
28726
28727
28728
28729
28730			STATIC_ASSERT(sizeof(codespace_used) == 4);
28731			STATIC_ASSERT(UINT32_MAX / (1U << (DEFLATE_MAX_CODEWORD_LEN - 1)) >=
28732			DEFLATE_MAX_NUM_SYMS);
28733
28734	87		offsets[0] = 0;
28735	87		offsets[1] = len_counts[0];
28736	87		codespace_used = 0;
28737	637	100	for (len = 1; len < max_codeword_len; len++) {
28738	550		offsets[len + 1] = offsets[len] + len_counts[len];
28739	550		codespace_used = (codespace_used << 1) + len_counts[len];
28740			}
28741	87		codespace_used = (codespace_used << 1) + len_counts[len];
28742
28743	13235	100	for (sym = 0; sym < num_syms; sym++)
28744	13148		sorted_syms[offsets[lens[sym]]++] = sym;
28745
28746	87		sorted_syms += offsets[0];
28747
28748
28749
28750
28751
28752
28753	87	50	if (unlikely(codespace_used > (1U << max_codeword_len)))
28754	0		return false;
28755
28756
28757	87	50	if (unlikely(codespace_used < (1U << max_codeword_len))) {
28758			u32 entry;
28759			unsigned i;
28760
28761
28762	0	0	if (codespace_used == 0) {
28763	0		sym = 0;
28764			} else {
28765	0	0	if (codespace_used != (1U << (max_codeword_len - 1)) \|\|
28766	0	0	len_counts[1] != 1)
28767	0		return false;
28768	0		sym = sorted_syms[0];
28769			}
28770	0		entry = make_decode_table_entry(decode_results, sym, 1);
28771	0	0	for (i = 0; i < (1U << table_bits); i++)
28772	0		decode_table[i] = entry;
28773	0		return true;
28774			}
28775
28776
28777	87		codeword = 0;
28778	87		len = 1;
28779	480	100	while ((count = len_counts[len]) == 0)
28780	393		len++;
28781	87		cur_table_end = 1U << len;
28782	241	100	while (len <= table_bits) {
28783
28784			do {
28785			unsigned bit;
28786
28787
28788	24320		decode_table[codeword] =
28789	24320		make_decode_table_entry(decode_results,
28790	12160		*sorted_syms++, len);
28791
28792	12160	100	if (codeword == cur_table_end - 1) {
28793
28794	190	100	for (; len < table_bits; len++) {
28795	111		memcpy(&decode_table[cur_table_end],
28796			decode_table,
28797			cur_table_end *
28798			sizeof(decode_table[0]));
28799	111		cur_table_end <<= 1;
28800			}
28801	79		return true;
28802			}
28803
28804	12081		bit = 1U << bsr32(codeword ^ (cur_table_end - 1));
28805	12081		codeword &= bit - 1;
28806	12081		codeword \|= bit;
28807	12081	100	} while (--count);
28808
28809
28810			do {
28811	157	100	if (++len <= table_bits) {
28812	149		memcpy(&decode_table[cur_table_end],
28813			decode_table,
28814			cur_table_end * sizeof(decode_table[0]));
28815	149		cur_table_end <<= 1;
28816			}
28817	157	100	} while ((count = len_counts[len]) == 0);
28818			}
28819
28820
28821	8		cur_table_end = 1U << table_bits;
28822	8		subtable_prefix = -1;
28823	8		subtable_start = 0;
28824	30		for (;;) {
28825			u32 entry;
28826			unsigned i;
28827			unsigned stride;
28828			unsigned bit;
28829
28830
28831	38	100	if ((codeword & ((1U << table_bits) - 1)) != subtable_prefix) {
28832	19		subtable_prefix = (codeword & ((1U << table_bits) - 1));
28833	19		subtable_start = cur_table_end;
28834
28835	19		subtable_bits = len - table_bits;
28836	19		codespace_used = count;
28837	19	50	while (codespace_used < (1U << subtable_bits)) {
28838	0		subtable_bits++;
28839	0		codespace_used = (codespace_used << 1) +
28840	0		len_counts[table_bits + subtable_bits];
28841			}
28842	19		cur_table_end = subtable_start + (1U << subtable_bits);
28843
28844
28845	19		decode_table[subtable_prefix] =
28846	19		((u32)subtable_start << 16) \|
28847			HUFFDEC_EXCEPTIONAL \|
28848	19		HUFFDEC_SUBTABLE_POINTER \|
28849	19		(subtable_bits << 8) \| table_bits;
28850			}
28851
28852
28853	38		entry = make_decode_table_entry(decode_results, *sorted_syms++,
28854			len - table_bits);
28855	38		i = subtable_start + (codeword >> table_bits);
28856	38		stride = 1U << (len - table_bits);
28857			do {
28858	38		decode_table[i] = entry;
28859	38		i += stride;
28860	38	50	} while (i < cur_table_end);
28861
28862
28863	38	100	if (codeword == (1U << len) - 1)
28864	8		return true;
28865	30		bit = 1U << bsr32(codeword ^ ((1U << len) - 1));
28866	30		codeword &= bit - 1;
28867	30		codeword \|= bit;
28868	30		count--;
28869	30	50	while (count == 0)
28870	0		count = len_counts[++len];
28871			}
28872			}
28873
28874
28875			static bool
28876	5		build_precode_decode_table(struct libdeflate_decompressor *d)
28877			{
28878
28879			STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
28880
28881			STATIC_ASSERT(ARRAY_LEN(precode_decode_results) ==
28882			DEFLATE_NUM_PRECODE_SYMS);
28883
28884	10		return build_decode_table(d->u.l.precode_decode_table,
28885	5		d->u.precode_lens,
28886			DEFLATE_NUM_PRECODE_SYMS,
28887			precode_decode_results,
28888			PRECODE_TABLEBITS,
28889			DEFLATE_MAX_PRE_CODEWORD_LEN,
28890	5		d->sorted_syms,
28891			NULL);
28892			}
28893
28894
28895			static bool
28896	41		build_litlen_decode_table(struct libdeflate_decompressor *d,
28897			unsigned num_litlen_syms, unsigned num_offset_syms)
28898			{
28899
28900			STATIC_ASSERT(LITLEN_TABLEBITS == 11 && LITLEN_ENOUGH == 2342);
28901
28902			STATIC_ASSERT(ARRAY_LEN(litlen_decode_results) ==
28903			DEFLATE_NUM_LITLEN_SYMS);
28904
28905	82		return build_decode_table(d->u.litlen_decode_table,
28906	41		d->u.l.lens,
28907			num_litlen_syms,
28908			litlen_decode_results,
28909			LITLEN_TABLEBITS,
28910			DEFLATE_MAX_LITLEN_CODEWORD_LEN,
28911	41		d->sorted_syms,
28912			&d->litlen_tablebits);
28913			}
28914
28915
28916			static bool
28917	41		build_offset_decode_table(struct libdeflate_decompressor *d,
28918			unsigned num_litlen_syms, unsigned num_offset_syms)
28919			{
28920
28921			STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
28922
28923			STATIC_ASSERT(ARRAY_LEN(offset_decode_results) ==
28924			DEFLATE_NUM_OFFSET_SYMS);
28925
28926	82		return build_decode_table(d->offset_decode_table,
28927	41		d->u.l.lens + num_litlen_syms,
28928			num_offset_syms,
28929			offset_decode_results,
28930			OFFSET_TABLEBITS,
28931			DEFLATE_MAX_OFFSET_CODEWORD_LEN,
28932	41		d->sorted_syms,
28933			NULL);
28934			}
28935
28936
28937
28938			typedef enum libdeflate_result (*decompress_func_t)
28939			(struct libdeflate_decompressor * restrict d,
28940			const void * restrict in, size_t in_nbytes,
28941			void * restrict out, size_t out_nbytes_avail,
28942			size_t actual_in_nbytes_ret, size_t actual_out_nbytes_ret);
28943
28944			#define FUNCNAME deflate_decompress_default
28945			#undef ATTRIBUTES
28946			#undef EXTRACT_VARBITS
28947			#undef EXTRACT_VARBITS8
28948			/* #include "decompress_template.h" */
28949
28950
28951
28952
28953			#ifndef ATTRIBUTES
28954			# define ATTRIBUTES
28955			#endif
28956			#ifndef EXTRACT_VARBITS
28957			# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count))
28958			#endif
28959			#ifndef EXTRACT_VARBITS8
28960			# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count)))
28961			#endif
28962
28963			static ATTRIBUTES MAYBE_UNUSED enum libdeflate_result
28964	0		FUNCNAME(struct libdeflate_decompressor * restrict d,
28965			const void * restrict in, size_t in_nbytes,
28966			void * restrict out, size_t out_nbytes_avail,
28967			size_t actual_in_nbytes_ret, size_t actual_out_nbytes_ret)
28968			{
28969	0		u8 *out_next = out;
28970	0		u8 * const out_end = out_next + out_nbytes_avail;
28971	0		u8 * const out_fastloop_end =
28972	0	0	out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
28973
28974
28975	0		const u8 *in_next = in;
28976	0		const u8 * const in_end = in_next + in_nbytes;
28977	0		const u8 * const in_fastloop_end =
28978	0	0	in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
28979	0		bitbuf_t bitbuf = 0;
28980			bitbuf_t saved_bitbuf;
28981	0		u32 bitsleft = 0;
28982	0		size_t overread_count = 0;
28983
28984			bool is_final_block;
28985			unsigned block_type;
28986			unsigned num_litlen_syms;
28987			unsigned num_offset_syms;
28988			bitbuf_t litlen_tablemask;
28989			u32 entry;
28990
28991	0		next_block:
28992
28993			;
28994
28995			STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
28996	0	0	REFILL_BITS();
		0
		0
		0
28997
28998
28999	0		is_final_block = bitbuf & BITMASK(1);
29000
29001
29002	0		block_type = (bitbuf >> 1) & BITMASK(2);
29003
29004	0	0	if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
29005
29006
29007
29008
29009			static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
29010			16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
29011			};
29012
29013			unsigned num_explicit_precode_lens;
29014			unsigned i;
29015
29016
29017
29018			STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
29019	0		num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
29020
29021			STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
29022	0		num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
29023
29024			STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
29025	0		num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
29026
29027	0		d->static_codes_loaded = false;
29028
29029
29030			STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
29031			if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
29032	0		d->u.precode_lens[deflate_precode_lens_permutation[0]] =
29033	0		(bitbuf >> 17) & BITMASK(3);
29034	0		bitbuf >>= 20;
29035	0		bitsleft -= 20;
29036	0	0	REFILL_BITS();
		0
		0
		0
29037	0		i = 1;
29038			do {
29039	0		d->u.precode_lens[deflate_precode_lens_permutation[i]] =
29040	0		bitbuf & BITMASK(3);
29041	0		bitbuf >>= 3;
29042	0		bitsleft -= 3;
29043	0	0	} while (++i < num_explicit_precode_lens);
29044			} else {
29045			bitbuf >>= 17;
29046			bitsleft -= 17;
29047			i = 0;
29048			do {
29049			if ((u8)bitsleft < 3)
29050			REFILL_BITS();
29051			d->u.precode_lens[deflate_precode_lens_permutation[i]] =
29052			bitbuf & BITMASK(3);
29053			bitbuf >>= 3;
29054			bitsleft -= 3;
29055			} while (++i < num_explicit_precode_lens);
29056			}
29057	0	0	for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
29058	0		d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
29059
29060
29061	0	0	SAFETY_CHECK(build_precode_decode_table(d));
29062
29063
29064	0		i = 0;
29065			do {
29066			unsigned presym;
29067			u8 rep_val;
29068			unsigned rep_count;
29069
29070	0	0	if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
29071	0	0	REFILL_BITS();
		0
		0
		0
29072
29073
29074			STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
29075
29076
29077	0		entry = d->u.l.precode_decode_table[
29078	0		bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
29079	0		bitbuf >>= (u8)entry;
29080	0		bitsleft -= entry;
29081	0		presym = entry >> 16;
29082
29083	0	0	if (presym < 16) {
29084
29085	0		d->u.l.lens[i++] = presym;
29086	0		continue;
29087			}
29088
29089
29090
29091
29092			STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
29093
29094	0	0	if (presym == 16) {
29095
29096	0	0	SAFETY_CHECK(i != 0);
29097	0		rep_val = d->u.l.lens[i - 1];
29098			STATIC_ASSERT(3 + BITMASK(2) == 6);
29099	0		rep_count = 3 + (bitbuf & BITMASK(2));
29100	0		bitbuf >>= 2;
29101	0		bitsleft -= 2;
29102	0		d->u.l.lens[i + 0] = rep_val;
29103	0		d->u.l.lens[i + 1] = rep_val;
29104	0		d->u.l.lens[i + 2] = rep_val;
29105	0		d->u.l.lens[i + 3] = rep_val;
29106	0		d->u.l.lens[i + 4] = rep_val;
29107	0		d->u.l.lens[i + 5] = rep_val;
29108	0		i += rep_count;
29109	0	0	} else if (presym == 17) {
29110
29111			STATIC_ASSERT(3 + BITMASK(3) == 10);
29112	0		rep_count = 3 + (bitbuf & BITMASK(3));
29113	0		bitbuf >>= 3;
29114	0		bitsleft -= 3;
29115	0		d->u.l.lens[i + 0] = 0;
29116	0		d->u.l.lens[i + 1] = 0;
29117	0		d->u.l.lens[i + 2] = 0;
29118	0		d->u.l.lens[i + 3] = 0;
29119	0		d->u.l.lens[i + 4] = 0;
29120	0		d->u.l.lens[i + 5] = 0;
29121	0		d->u.l.lens[i + 6] = 0;
29122	0		d->u.l.lens[i + 7] = 0;
29123	0		d->u.l.lens[i + 8] = 0;
29124	0		d->u.l.lens[i + 9] = 0;
29125	0		i += rep_count;
29126			} else {
29127
29128			STATIC_ASSERT(11 + BITMASK(7) == 138);
29129	0		rep_count = 11 + (bitbuf & BITMASK(7));
29130	0		bitbuf >>= 7;
29131	0		bitsleft -= 7;
29132	0		memset(&d->u.l.lens[i], 0,
29133			rep_count * sizeof(d->u.l.lens[i]));
29134	0		i += rep_count;
29135			}
29136	0	0	} while (i < num_litlen_syms + num_offset_syms);
29137
29138
29139	0	0	SAFETY_CHECK(i == num_litlen_syms + num_offset_syms);
29140
29141	0	0	} else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
29142			u16 len, nlen;
29143
29144
29145
29146	0		bitsleft -= 3;
29147
29148
29149	0		bitsleft = (u8)bitsleft;
29150	0	0	SAFETY_CHECK(overread_count <= (bitsleft >> 3));
29151	0		in_next -= (bitsleft >> 3) - overread_count;
29152	0		overread_count = 0;
29153	0		bitbuf = 0;
29154	0		bitsleft = 0;
29155
29156	0	0	SAFETY_CHECK(in_end - in_next >= 4);
29157	0		len = get_unaligned_le16(in_next);
29158	0		nlen = get_unaligned_le16(in_next + 2);
29159	0		in_next += 4;
29160
29161	0	0	SAFETY_CHECK(len == (u16)~nlen);
29162	0	0	if (unlikely(len > out_end - out_next))
29163	0		return LIBDEFLATE_INSUFFICIENT_SPACE;
29164	0	0	SAFETY_CHECK(len <= in_end - in_next);
29165
29166	0		memcpy(out_next, in_next, len);
29167	0		in_next += len;
29168	0		out_next += len;
29169
29170	0		goto block_done;
29171
29172			} else {
29173			unsigned i;
29174
29175	0	0	SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
29176
29177
29178
29179	0		bitbuf >>= 3;
29180	0		bitsleft -= 3;
29181
29182	0	0	if (d->static_codes_loaded)
29183	0		goto have_decode_tables;
29184
29185	0		d->static_codes_loaded = true;
29186
29187			STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
29188			STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
29189
29190	0	0	for (i = 0; i < 144; i++)
29191	0		d->u.l.lens[i] = 8;
29192	0	0	for (; i < 256; i++)
29193	0		d->u.l.lens[i] = 9;
29194	0	0	for (; i < 280; i++)
29195	0		d->u.l.lens[i] = 7;
29196	0	0	for (; i < 288; i++)
29197	0		d->u.l.lens[i] = 8;
29198
29199	0	0	for (; i < 288 + 32; i++)
29200	0		d->u.l.lens[i] = 5;
29201
29202	0		num_litlen_syms = 288;
29203	0		num_offset_syms = 32;
29204			}
29205
29206
29207
29208	0	0	SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
29209	0	0	SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
29210	0		have_decode_tables:
29211	0		litlen_tablemask = BITMASK(d->litlen_tablebits);
29212
29213
29214	0	0	if (in_next >= in_fastloop_end \|\| out_next >= out_fastloop_end)
		0
29215	0		goto generic_loop;
29216	0		REFILL_BITS_IN_FASTLOOP();
29217	0		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29218			do {
29219			u32 length, offset, lit;
29220			const u8 *src;
29221			u8 *dst;
29222
29223
29224	0		saved_bitbuf = bitbuf;
29225	0		bitbuf >>= (u8)entry;
29226	0		bitsleft -= entry;
29227
29228
29229	0	0	if (entry & HUFFDEC_LITERAL) {
29230
29231			if (
29232			CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
29233			LENGTH_MAXBITS,
29234			OFFSET_TABLEBITS) &&
29235
29236			CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
29237			DEFLATE_MAX_LITLEN_CODEWORD_LEN,
29238			LITLEN_TABLEBITS)) {
29239
29240	0		lit = entry >> 16;
29241	0		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29242	0		saved_bitbuf = bitbuf;
29243	0		bitbuf >>= (u8)entry;
29244	0		bitsleft -= entry;
29245	0		*out_next++ = lit;
29246	0	0	if (entry & HUFFDEC_LITERAL) {
29247
29248	0		lit = entry >> 16;
29249	0		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29250	0		saved_bitbuf = bitbuf;
29251	0		bitbuf >>= (u8)entry;
29252	0		bitsleft -= entry;
29253	0		*out_next++ = lit;
29254	0	0	if (entry & HUFFDEC_LITERAL) {
29255
29256	0		lit = entry >> 16;
29257	0		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29258	0		REFILL_BITS_IN_FASTLOOP();
29259	0		*out_next++ = lit;
29260	0		continue;
29261			}
29262			}
29263			} else {
29264
29265			STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
29266			LITLEN_TABLEBITS, LITLEN_TABLEBITS));
29267			lit = entry >> 16;
29268			entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29269			REFILL_BITS_IN_FASTLOOP();
29270			*out_next++ = lit;
29271			continue;
29272			}
29273			}
29274
29275
29276	0	0	if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
29277
29278
29279	0	0	if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
29280	0		goto block_done;
29281
29282
29283	0		entry = d->u.litlen_decode_table[(entry >> 16) +
29284	0		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
29285	0		saved_bitbuf = bitbuf;
29286	0		bitbuf >>= (u8)entry;
29287	0		bitsleft -= entry;
29288
29289
29290			if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
29291			LITLEN_TABLEBITS) \|\|
29292			!CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
29293			OFFSET_TABLEBITS))
29294			REFILL_BITS_IN_FASTLOOP();
29295	0	0	if (entry & HUFFDEC_LITERAL) {
29296
29297	0		lit = entry >> 16;
29298	0		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29299	0		REFILL_BITS_IN_FASTLOOP();
29300	0		*out_next++ = lit;
29301	0		continue;
29302			}
29303	0	0	if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
29304	0		goto block_done;
29305
29306			}
29307
29308
29309	0		length = entry >> 16;
29310	0		length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
29311
29312
29313			STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
29314			OFFSET_TABLEBITS));
29315	0		entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
29316			if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
29317			LITLEN_TABLEBITS)) {
29318
29319	0	0	if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
29320
29321	0	0	if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
29322			LITLEN_TABLEBITS - PRELOAD_SLACK))
29323	0		REFILL_BITS_IN_FASTLOOP();
29324	0		bitbuf >>= OFFSET_TABLEBITS;
29325	0		bitsleft -= OFFSET_TABLEBITS;
29326	0		entry = d->offset_decode_table[(entry >> 16) +
29327	0		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
29328	0	0	} else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
29329			LITLEN_TABLEBITS - PRELOAD_SLACK))
29330	0		REFILL_BITS_IN_FASTLOOP();
29331			} else {
29332
29333			REFILL_BITS_IN_FASTLOOP();
29334			if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
29335
29336			bitbuf >>= OFFSET_TABLEBITS;
29337			bitsleft -= OFFSET_TABLEBITS;
29338			entry = d->offset_decode_table[(entry >> 16) +
29339			EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
29340			REFILL_BITS_IN_FASTLOOP();
29341
29342			STATIC_ASSERT(CAN_CONSUME(
29343			OFFSET_MAXBITS - OFFSET_TABLEBITS));
29344			} else {
29345
29346			STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
29347			}
29348			}
29349	0		saved_bitbuf = bitbuf;
29350	0		bitbuf >>= (u8)entry;
29351	0		bitsleft -= entry;
29352	0		offset = entry >> 16;
29353	0		offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
29354
29355
29356	0	0	SAFETY_CHECK(offset <= out_next - (const u8 *)out);
29357	0		src = out_next - offset;
29358	0		dst = out_next;
29359	0		out_next += length;
29360
29361
29362			if (!CAN_CONSUME_AND_THEN_PRELOAD(
29363			MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
29364			OFFSET_MAXFASTBITS),
29365			LITLEN_TABLEBITS) &&
29366			unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
29367			REFILL_BITS_IN_FASTLOOP();
29368	0		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29369	0		REFILL_BITS_IN_FASTLOOP();
29370
29371
29372	0	0	if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
29373	0		store_word_unaligned(load_word_unaligned(src), dst);
29374	0		src += WORDBYTES;
29375	0		dst += WORDBYTES;
29376	0		store_word_unaligned(load_word_unaligned(src), dst);
29377	0		src += WORDBYTES;
29378	0		dst += WORDBYTES;
29379	0		store_word_unaligned(load_word_unaligned(src), dst);
29380	0		src += WORDBYTES;
29381	0		dst += WORDBYTES;
29382	0		store_word_unaligned(load_word_unaligned(src), dst);
29383	0		src += WORDBYTES;
29384	0		dst += WORDBYTES;
29385	0		store_word_unaligned(load_word_unaligned(src), dst);
29386	0		src += WORDBYTES;
29387	0		dst += WORDBYTES;
29388	0	0	while (dst < out_next) {
29389	0		store_word_unaligned(load_word_unaligned(src), dst);
29390	0		src += WORDBYTES;
29391	0		dst += WORDBYTES;
29392	0		store_word_unaligned(load_word_unaligned(src), dst);
29393	0		src += WORDBYTES;
29394	0		dst += WORDBYTES;
29395	0		store_word_unaligned(load_word_unaligned(src), dst);
29396	0		src += WORDBYTES;
29397	0		dst += WORDBYTES;
29398	0		store_word_unaligned(load_word_unaligned(src), dst);
29399	0		src += WORDBYTES;
29400	0		dst += WORDBYTES;
29401	0		store_word_unaligned(load_word_unaligned(src), dst);
29402	0		src += WORDBYTES;
29403	0		dst += WORDBYTES;
29404			}
29405	0	0	} else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
29406			machine_word_t v;
29407
29408
29409	0		v = (machine_word_t)0x0101010101010101 * src[0];
29410	0		store_word_unaligned(v, dst);
29411	0		dst += WORDBYTES;
29412	0		store_word_unaligned(v, dst);
29413	0		dst += WORDBYTES;
29414	0		store_word_unaligned(v, dst);
29415	0		dst += WORDBYTES;
29416	0		store_word_unaligned(v, dst);
29417	0		dst += WORDBYTES;
29418	0	0	while (dst < out_next) {
29419	0		store_word_unaligned(v, dst);
29420	0		dst += WORDBYTES;
29421	0		store_word_unaligned(v, dst);
29422	0		dst += WORDBYTES;
29423	0		store_word_unaligned(v, dst);
29424	0		dst += WORDBYTES;
29425	0		store_word_unaligned(v, dst);
29426	0		dst += WORDBYTES;
29427			}
29428			} else if (UNALIGNED_ACCESS_IS_FAST) {
29429	0		store_word_unaligned(load_word_unaligned(src), dst);
29430	0		src += offset;
29431	0		dst += offset;
29432	0		store_word_unaligned(load_word_unaligned(src), dst);
29433	0		src += offset;
29434	0		dst += offset;
29435			do {
29436	0		store_word_unaligned(load_word_unaligned(src), dst);
29437	0		src += offset;
29438	0		dst += offset;
29439	0		store_word_unaligned(load_word_unaligned(src), dst);
29440	0		src += offset;
29441	0		dst += offset;
29442	0	0	} while (dst < out_next);
29443			} else {
29444			dst++ = src++;
29445			dst++ = src++;
29446			do {
29447			dst++ = src++;
29448			} while (dst < out_next);
29449			}
29450	0	0	} while (in_next < in_fastloop_end && out_next < out_fastloop_end);
		0
29451
29452
29453	0		generic_loop:
29454	0		for (;;) {
29455			u32 length, offset;
29456			const u8 *src;
29457			u8 *dst;
29458
29459	0	0	REFILL_BITS();
		0
		0
		0
29460	0		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
29461	0		saved_bitbuf = bitbuf;
29462	0		bitbuf >>= (u8)entry;
29463	0		bitsleft -= entry;
29464	0	0	if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
29465	0		entry = d->u.litlen_decode_table[(entry >> 16) +
29466	0		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
29467	0		saved_bitbuf = bitbuf;
29468	0		bitbuf >>= (u8)entry;
29469	0		bitsleft -= entry;
29470			}
29471	0		length = entry >> 16;
29472	0	0	if (entry & HUFFDEC_LITERAL) {
29473	0	0	if (unlikely(out_next == out_end))
29474	0		return LIBDEFLATE_INSUFFICIENT_SPACE;
29475	0		*out_next++ = length;
29476	0		continue;
29477			}
29478	0	0	if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
29479	0		goto block_done;
29480	0		length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
29481	0	0	if (unlikely(length > out_end - out_next))
29482	0		return LIBDEFLATE_INSUFFICIENT_SPACE;
29483
29484			if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
29485			REFILL_BITS();
29486	0		entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
29487	0	0	if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
29488	0		bitbuf >>= OFFSET_TABLEBITS;
29489	0		bitsleft -= OFFSET_TABLEBITS;
29490	0		entry = d->offset_decode_table[(entry >> 16) +
29491	0		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
29492			if (!CAN_CONSUME(OFFSET_MAXBITS))
29493			REFILL_BITS();
29494			}
29495	0		offset = entry >> 16;
29496	0		offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
29497	0		bitbuf >>= (u8)entry;
29498	0		bitsleft -= entry;
29499
29500	0	0	SAFETY_CHECK(offset <= out_next - (const u8 *)out);
29501	0		src = out_next - offset;
29502	0		dst = out_next;
29503	0		out_next += length;
29504
29505			STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
29506	0		dst++ = src++;
29507	0		dst++ = src++;
29508			do {
29509	0		dst++ = src++;
29510	0	0	} while (dst < out_next);
29511			}
29512
29513	0		block_done:
29514
29515
29516	0	0	if (!is_final_block)
29517	0		goto next_block;
29518
29519
29520
29521	0		bitsleft = (u8)bitsleft;
29522
29523
29524	0	0	SAFETY_CHECK(overread_count <= (bitsleft >> 3));
29525
29526
29527	0	0	if (actual_in_nbytes_ret) {
29528
29529	0		in_next -= (bitsleft >> 3) - overread_count;
29530
29531	0		actual_in_nbytes_ret = in_next - (u8 )in;
29532			}
29533
29534
29535	0	0	if (actual_out_nbytes_ret) {
29536	0		actual_out_nbytes_ret = out_next - (u8 )out;
29537			} else {
29538	0	0	if (out_next != out_end)
29539	0		return LIBDEFLATE_SHORT_OUTPUT;
29540			}
29541	0		return LIBDEFLATE_SUCCESS;
29542			}
29543
29544			#undef FUNCNAME
29545			#undef ATTRIBUTES
29546			#undef EXTRACT_VARBITS
29547			#undef EXTRACT_VARBITS8
29548
29549
29550
29551			#undef DEFAULT_IMPL
29552			#undef arch_select_decompress_func
29553			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
29554			/* # include "x86/decompress_impl.h" */
29555			#ifndef LIB_X86_DECOMPRESS_IMPL_H
29556			#define LIB_X86_DECOMPRESS_IMPL_H
29557
29558			/* #include "x86-cpu_features.h" */
29559
29560
29561			#ifndef LIB_X86_CPU_FEATURES_H
29562			#define LIB_X86_CPU_FEATURES_H
29563
29564			/* #include "lib_common.h" */
29565
29566
29567			#ifndef LIB_LIB_COMMON_H
29568			#define LIB_LIB_COMMON_H
29569
29570			#ifdef LIBDEFLATE_H
29571
29572			# error "lib_common.h must always be included before libdeflate.h"
29573			#endif
29574
29575			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
29576			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
29577			#elif defined(__GNUC__)
29578			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
29579			#else
29580			# define LIBDEFLATE_EXPORT_SYM
29581			#endif
29582
29583
29584			#if defined(__GNUC__) && defined(__i386__)
29585			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
29586			#else
29587			# define LIBDEFLATE_ALIGN_STACK
29588			#endif
29589
29590			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
29591
29592			/* #include "../common_defs.h" */
29593
29594
29595			#ifndef COMMON_DEFS_H
29596			#define COMMON_DEFS_H
29597
29598			/* #include "libdeflate.h" */
29599
29600
29601			#ifndef LIBDEFLATE_H
29602			#define LIBDEFLATE_H
29603
29604			#include
29605			#include
29606
29607			#ifdef __cplusplus
29608			extern "C" {
29609			#endif
29610
29611			#define LIBDEFLATE_VERSION_MAJOR 1
29612			#define LIBDEFLATE_VERSION_MINOR 25
29613			#define LIBDEFLATE_VERSION_STRING "1.25"
29614
29615
29616			#ifndef LIBDEFLATEAPI
29617			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
29618			# define LIBDEFLATEAPI __declspec(dllimport)
29619			# else
29620			# define LIBDEFLATEAPI
29621			# endif
29622			#endif
29623
29624
29625
29626
29627
29628			struct libdeflate_compressor;
29629			struct libdeflate_options;
29630
29631
29632			LIBDEFLATEAPI struct libdeflate_compressor *
29633			libdeflate_alloc_compressor(int compression_level);
29634
29635
29636			LIBDEFLATEAPI struct libdeflate_compressor *
29637			libdeflate_alloc_compressor_ex(int compression_level,
29638			const struct libdeflate_options *options);
29639
29640
29641			LIBDEFLATEAPI size_t
29642			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
29643			const void *in, size_t in_nbytes,
29644			void *out, size_t out_nbytes_avail);
29645
29646
29647			LIBDEFLATEAPI size_t
29648			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
29649			size_t in_nbytes);
29650
29651
29652			LIBDEFLATEAPI size_t
29653			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
29654			const void *in, size_t in_nbytes,
29655			void *out, size_t out_nbytes_avail);
29656
29657
29658			LIBDEFLATEAPI size_t
29659			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
29660			size_t in_nbytes);
29661
29662
29663			LIBDEFLATEAPI size_t
29664			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
29665			const void *in, size_t in_nbytes,
29666			void *out, size_t out_nbytes_avail);
29667
29668
29669			LIBDEFLATEAPI size_t
29670			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
29671			size_t in_nbytes);
29672
29673
29674			LIBDEFLATEAPI void
29675			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
29676
29677
29678
29679
29680
29681			struct libdeflate_decompressor;
29682			struct libdeflate_options;
29683
29684
29685			LIBDEFLATEAPI struct libdeflate_decompressor *
29686			libdeflate_alloc_decompressor(void);
29687
29688
29689			LIBDEFLATEAPI struct libdeflate_decompressor *
29690			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
29691
29692
29693			enum libdeflate_result {
29694
29695			LIBDEFLATE_SUCCESS = 0,
29696
29697
29698			LIBDEFLATE_BAD_DATA = 1,
29699
29700
29701			LIBDEFLATE_SHORT_OUTPUT = 2,
29702
29703
29704			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
29705			};
29706
29707
29708			LIBDEFLATEAPI enum libdeflate_result
29709			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
29710			const void *in, size_t in_nbytes,
29711			void *out, size_t out_nbytes_avail,
29712			size_t *actual_out_nbytes_ret);
29713
29714
29715			LIBDEFLATEAPI enum libdeflate_result
29716			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
29717			const void *in, size_t in_nbytes,
29718			void *out, size_t out_nbytes_avail,
29719			size_t *actual_in_nbytes_ret,
29720			size_t *actual_out_nbytes_ret);
29721
29722
29723			LIBDEFLATEAPI enum libdeflate_result
29724			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
29725			const void *in, size_t in_nbytes,
29726			void *out, size_t out_nbytes_avail,
29727			size_t *actual_out_nbytes_ret);
29728
29729
29730			LIBDEFLATEAPI enum libdeflate_result
29731			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
29732			const void *in, size_t in_nbytes,
29733			void *out, size_t out_nbytes_avail,
29734			size_t *actual_in_nbytes_ret,
29735			size_t *actual_out_nbytes_ret);
29736
29737
29738			LIBDEFLATEAPI enum libdeflate_result
29739			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
29740			const void *in, size_t in_nbytes,
29741			void *out, size_t out_nbytes_avail,
29742			size_t *actual_out_nbytes_ret);
29743
29744
29745			LIBDEFLATEAPI enum libdeflate_result
29746			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
29747			const void *in, size_t in_nbytes,
29748			void *out, size_t out_nbytes_avail,
29749			size_t *actual_in_nbytes_ret,
29750			size_t *actual_out_nbytes_ret);
29751
29752
29753			LIBDEFLATEAPI void
29754			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
29755
29756
29757
29758
29759
29760
29761			LIBDEFLATEAPI uint32_t
29762			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
29763
29764
29765
29766			LIBDEFLATEAPI uint32_t
29767			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
29768
29769
29770
29771
29772
29773
29774			LIBDEFLATEAPI void
29775			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
29776			void (free_func)(void ));
29777
29778
29779			struct libdeflate_options {
29780
29781
29782			size_t sizeof_options;
29783
29784
29785			void (malloc_func)(size_t);
29786			void (free_func)(void );
29787			};
29788
29789			#ifdef __cplusplus
29790			}
29791			#endif
29792
29793			#endif
29794
29795
29796			#include
29797			#include
29798			#include
29799			#ifdef _MSC_VER
29800			# include
29801			# include
29802
29803
29804			# pragma warning(disable : 4146)
29805
29806			# pragma warning(disable : 4018)
29807			# pragma warning(disable : 4244)
29808			# pragma warning(disable : 4267)
29809			# pragma warning(disable : 4310)
29810
29811			# pragma warning(disable : 4100)
29812			# pragma warning(disable : 4127)
29813			# pragma warning(disable : 4189)
29814			# pragma warning(disable : 4232)
29815			# pragma warning(disable : 4245)
29816			# pragma warning(disable : 4295)
29817			#endif
29818			#ifndef FREESTANDING
29819			# include
29820			#endif
29821
29822
29823
29824
29825
29826
29827			#undef ARCH_X86_64
29828			#undef ARCH_X86_32
29829			#undef ARCH_ARM64
29830			#undef ARCH_ARM32
29831			#undef ARCH_RISCV
29832			#ifdef _MSC_VER
29833
29834			# if defined(_M_X64) && !defined(_M_ARM64EC)
29835			# define ARCH_X86_64
29836			# elif defined(_M_IX86)
29837			# define ARCH_X86_32
29838			# elif defined(_M_ARM64)
29839			# define ARCH_ARM64
29840			# elif defined(_M_ARM)
29841			# define ARCH_ARM32
29842			# endif
29843			#else
29844			# if defined(__x86_64__)
29845			# define ARCH_X86_64
29846			# elif defined(__i386__)
29847			# define ARCH_X86_32
29848			# elif defined(__aarch64__)
29849			# define ARCH_ARM64
29850			# elif defined(__arm__)
29851			# define ARCH_ARM32
29852			# elif defined(__riscv)
29853			# define ARCH_RISCV
29854			# endif
29855			#endif
29856
29857
29858
29859
29860
29861
29862			typedef uint8_t u8;
29863			typedef uint16_t u16;
29864			typedef uint32_t u32;
29865			typedef uint64_t u64;
29866			typedef int8_t s8;
29867			typedef int16_t s16;
29868			typedef int32_t s32;
29869			typedef int64_t s64;
29870
29871
29872			#ifdef _MSC_VER
29873			# ifdef _WIN64
29874			typedef long long ssize_t;
29875			# else
29876			typedef long ssize_t;
29877			# endif
29878			#endif
29879
29880
29881			typedef size_t machine_word_t;
29882
29883
29884			#define WORDBYTES ((int)sizeof(machine_word_t))
29885
29886
29887			#define WORDBITS (8 * WORDBYTES)
29888
29889
29890
29891
29892
29893
29894			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
29895			# define GCC_PREREQ(major, minor) \
29896			(__GNUC__ > (major) \|\| \
29897			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
29898			# if !GCC_PREREQ(4, 9)
29899			# error "gcc versions older than 4.9 are no longer supported"
29900			# endif
29901			#else
29902			# define GCC_PREREQ(major, minor) 0
29903			#endif
29904			#ifdef __clang__
29905			# ifdef __apple_build_version__
29906			# define CLANG_PREREQ(major, minor, apple_version) \
29907			(__apple_build_version__ >= (apple_version))
29908			# else
29909			# define CLANG_PREREQ(major, minor, apple_version) \
29910			(__clang_major__ > (major) \|\| \
29911			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
29912			# endif
29913			# if !CLANG_PREREQ(3, 9, 8000000)
29914			# error "clang versions older than 3.9 are no longer supported"
29915			# endif
29916			#else
29917			# define CLANG_PREREQ(major, minor, apple_version) 0
29918			#endif
29919			#ifdef _MSC_VER
29920			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
29921			# if !MSVC_PREREQ(1900)
29922			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
29923			# endif
29924			#else
29925			# define MSVC_PREREQ(version) 0
29926			#endif
29927
29928
29929			#ifndef __has_attribute
29930			# define __has_attribute(attribute) 0
29931			#endif
29932
29933
29934			#ifndef __has_builtin
29935			# define __has_builtin(builtin) 0
29936			#endif
29937
29938
29939			#ifdef _MSC_VER
29940			# define inline __inline
29941			#endif
29942
29943
29944			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
29945			# define forceinline inline __attribute__((always_inline))
29946			#elif defined(_MSC_VER)
29947			# define forceinline __forceinline
29948			#else
29949			# define forceinline inline
29950			#endif
29951
29952
29953			#if defined(__GNUC__) \|\| __has_attribute(unused)
29954			# define MAYBE_UNUSED __attribute__((unused))
29955			#else
29956			# define MAYBE_UNUSED
29957			#endif
29958
29959
29960			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
29961			# define NORETURN __attribute__((noreturn))
29962			#else
29963			# define NORETURN
29964			#endif
29965
29966
29967			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
29968			# if defined(__GNUC__) \|\| defined(__clang__)
29969			# define restrict __restrict__
29970			# else
29971			# define restrict
29972			# endif
29973			#endif
29974
29975
29976			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
29977			# define likely(expr) __builtin_expect(!!(expr), 1)
29978			#else
29979			# define likely(expr) (expr)
29980			#endif
29981
29982
29983			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
29984			# define unlikely(expr) __builtin_expect(!!(expr), 0)
29985			#else
29986			# define unlikely(expr) (expr)
29987			#endif
29988
29989
29990			#undef prefetchr
29991			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
29992			# define prefetchr(addr) __builtin_prefetch((addr), 0)
29993			#elif defined(_MSC_VER)
29994			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
29995			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
29996			# elif defined(ARCH_ARM64)
29997			# define prefetchr(addr) __prefetch2((addr), 0x00 )
29998			# elif defined(ARCH_ARM32)
29999			# define prefetchr(addr) __prefetch(addr)
30000			# endif
30001			#endif
30002			#ifndef prefetchr
30003			# define prefetchr(addr)
30004			#endif
30005
30006
30007			#undef prefetchw
30008			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
30009			# define prefetchw(addr) __builtin_prefetch((addr), 1)
30010			#elif defined(_MSC_VER)
30011			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
30012			# define prefetchw(addr) _m_prefetchw(addr)
30013			# elif defined(ARCH_ARM64)
30014			# define prefetchw(addr) __prefetch2((addr), 0x10 )
30015			# elif defined(ARCH_ARM32)
30016			# define prefetchw(addr) __prefetchw(addr)
30017			# endif
30018			#endif
30019			#ifndef prefetchw
30020			# define prefetchw(addr)
30021			#endif
30022
30023
30024			#undef _aligned_attribute
30025			#if defined(__GNUC__) \|\| __has_attribute(aligned)
30026			# define _aligned_attribute(n) __attribute__((aligned(n)))
30027			#elif defined(_MSC_VER)
30028			# define _aligned_attribute(n) __declspec(align(n))
30029			#endif
30030
30031
30032			#if defined(__GNUC__) \|\| __has_attribute(target)
30033			# define _target_attribute(attrs) __attribute__((target(attrs)))
30034			#else
30035			# define _target_attribute(attrs)
30036			#endif
30037
30038
30039
30040
30041
30042			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
30043			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
30044			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
30045			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
30046			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
30047			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
30048			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
30049
30050
30051
30052
30053
30054
30055			#if defined(__BYTE_ORDER__)
30056			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
30057			#elif defined(_MSC_VER)
30058			# define CPU_IS_LITTLE_ENDIAN() true
30059			#else
30060			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
30061			{
30062			union {
30063			u32 w;
30064			u8 b;
30065			} u;
30066
30067			u.w = 1;
30068			return u.b;
30069			}
30070			#endif
30071
30072
30073			static forceinline u16 bswap16(u16 v)
30074			{
30075			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
30076			return __builtin_bswap16(v);
30077			#elif defined(_MSC_VER)
30078			return _byteswap_ushort(v);
30079			#else
30080			return (v << 8) \| (v >> 8);
30081			#endif
30082			}
30083
30084
30085			static forceinline u32 bswap32(u32 v)
30086			{
30087			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
30088			return __builtin_bswap32(v);
30089			#elif defined(_MSC_VER)
30090			return _byteswap_ulong(v);
30091			#else
30092			return ((v & 0x000000FF) << 24) \|
30093			((v & 0x0000FF00) << 8) \|
30094			((v & 0x00FF0000) >> 8) \|
30095			((v & 0xFF000000) >> 24);
30096			#endif
30097			}
30098
30099
30100			static forceinline u64 bswap64(u64 v)
30101			{
30102			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
30103			return __builtin_bswap64(v);
30104			#elif defined(_MSC_VER)
30105			return _byteswap_uint64(v);
30106			#else
30107			return ((v & 0x00000000000000FF) << 56) \|
30108			((v & 0x000000000000FF00) << 40) \|
30109			((v & 0x0000000000FF0000) << 24) \|
30110			((v & 0x00000000FF000000) << 8) \|
30111			((v & 0x000000FF00000000) >> 8) \|
30112			((v & 0x0000FF0000000000) >> 24) \|
30113			((v & 0x00FF000000000000) >> 40) \|
30114			((v & 0xFF00000000000000) >> 56);
30115			#endif
30116			}
30117
30118			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
30119			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
30120			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
30121			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
30122			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
30123			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
30124
30125
30126
30127
30128
30129
30130			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
30131			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
30132			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
30133			defined(__riscv_misaligned_fast) \|\| \
30134			defined(__wasm__))
30135			# define UNALIGNED_ACCESS_IS_FAST 1
30136			#elif defined(_MSC_VER)
30137			# define UNALIGNED_ACCESS_IS_FAST 1
30138			#else
30139			# define UNALIGNED_ACCESS_IS_FAST 0
30140			#endif
30141
30142
30143
30144			#ifdef FREESTANDING
30145			# define MEMCOPY __builtin_memcpy
30146			#else
30147			# define MEMCOPY memcpy
30148			#endif
30149
30150
30151
30152			#define DEFINE_UNALIGNED_TYPE(type) \
30153			static forceinline type \
30154			load_##type##_unaligned(const void *p) \
30155			{ \
30156			type v; \
30157			\
30158			MEMCOPY(&v, p, sizeof(v)); \
30159			return v; \
30160			} \
30161			\
30162			static forceinline void \
30163			store_##type##_unaligned(type v, void *p) \
30164			{ \
30165			MEMCOPY(p, &v, sizeof(v)); \
30166			}
30167
30168			DEFINE_UNALIGNED_TYPE(u16)
30169			DEFINE_UNALIGNED_TYPE(u32)
30170			DEFINE_UNALIGNED_TYPE(u64)
30171			DEFINE_UNALIGNED_TYPE(machine_word_t)
30172
30173			#undef MEMCOPY
30174
30175			#define load_word_unaligned load_machine_word_t_unaligned
30176			#define store_word_unaligned store_machine_word_t_unaligned
30177
30178
30179
30180			static forceinline u16
30181			get_unaligned_le16(const u8 *p)
30182			{
30183			if (UNALIGNED_ACCESS_IS_FAST)
30184			return le16_bswap(load_u16_unaligned(p));
30185			else
30186			return ((u16)p[1] << 8) \| p[0];
30187			}
30188
30189			static forceinline u16
30190			get_unaligned_be16(const u8 *p)
30191			{
30192			if (UNALIGNED_ACCESS_IS_FAST)
30193			return be16_bswap(load_u16_unaligned(p));
30194			else
30195			return ((u16)p[0] << 8) \| p[1];
30196			}
30197
30198			static forceinline u32
30199			get_unaligned_le32(const u8 *p)
30200			{
30201			if (UNALIGNED_ACCESS_IS_FAST)
30202			return le32_bswap(load_u32_unaligned(p));
30203			else
30204			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
30205			((u32)p[1] << 8) \| p[0];
30206			}
30207
30208			static forceinline u32
30209			get_unaligned_be32(const u8 *p)
30210			{
30211			if (UNALIGNED_ACCESS_IS_FAST)
30212			return be32_bswap(load_u32_unaligned(p));
30213			else
30214			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
30215			((u32)p[2] << 8) \| p[3];
30216			}
30217
30218			static forceinline u64
30219			get_unaligned_le64(const u8 *p)
30220			{
30221			if (UNALIGNED_ACCESS_IS_FAST)
30222			return le64_bswap(load_u64_unaligned(p));
30223			else
30224			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
30225			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
30226			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
30227			((u64)p[1] << 8) \| p[0];
30228			}
30229
30230			static forceinline machine_word_t
30231			get_unaligned_leword(const u8 *p)
30232			{
30233			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
30234			if (WORDBITS == 32)
30235			return get_unaligned_le32(p);
30236			else
30237			return get_unaligned_le64(p);
30238			}
30239
30240
30241
30242			static forceinline void
30243			put_unaligned_le16(u16 v, u8 *p)
30244			{
30245			if (UNALIGNED_ACCESS_IS_FAST) {
30246			store_u16_unaligned(le16_bswap(v), p);
30247			} else {
30248			p[0] = (u8)(v >> 0);
30249			p[1] = (u8)(v >> 8);
30250			}
30251			}
30252
30253			static forceinline void
30254			put_unaligned_be16(u16 v, u8 *p)
30255			{
30256			if (UNALIGNED_ACCESS_IS_FAST) {
30257			store_u16_unaligned(be16_bswap(v), p);
30258			} else {
30259			p[0] = (u8)(v >> 8);
30260			p[1] = (u8)(v >> 0);
30261			}
30262			}
30263
30264			static forceinline void
30265			put_unaligned_le32(u32 v, u8 *p)
30266			{
30267			if (UNALIGNED_ACCESS_IS_FAST) {
30268			store_u32_unaligned(le32_bswap(v), p);
30269			} else {
30270			p[0] = (u8)(v >> 0);
30271			p[1] = (u8)(v >> 8);
30272			p[2] = (u8)(v >> 16);
30273			p[3] = (u8)(v >> 24);
30274			}
30275			}
30276
30277			static forceinline void
30278			put_unaligned_be32(u32 v, u8 *p)
30279			{
30280			if (UNALIGNED_ACCESS_IS_FAST) {
30281			store_u32_unaligned(be32_bswap(v), p);
30282			} else {
30283			p[0] = (u8)(v >> 24);
30284			p[1] = (u8)(v >> 16);
30285			p[2] = (u8)(v >> 8);
30286			p[3] = (u8)(v >> 0);
30287			}
30288			}
30289
30290			static forceinline void
30291			put_unaligned_le64(u64 v, u8 *p)
30292			{
30293			if (UNALIGNED_ACCESS_IS_FAST) {
30294			store_u64_unaligned(le64_bswap(v), p);
30295			} else {
30296			p[0] = (u8)(v >> 0);
30297			p[1] = (u8)(v >> 8);
30298			p[2] = (u8)(v >> 16);
30299			p[3] = (u8)(v >> 24);
30300			p[4] = (u8)(v >> 32);
30301			p[5] = (u8)(v >> 40);
30302			p[6] = (u8)(v >> 48);
30303			p[7] = (u8)(v >> 56);
30304			}
30305			}
30306
30307			static forceinline void
30308			put_unaligned_leword(machine_word_t v, u8 *p)
30309			{
30310			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
30311			if (WORDBITS == 32)
30312			put_unaligned_le32(v, p);
30313			else
30314			put_unaligned_le64(v, p);
30315			}
30316
30317
30318
30319
30320
30321
30322
30323			static forceinline unsigned
30324			bsr32(u32 v)
30325			{
30326			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
30327			return 31 - __builtin_clz(v);
30328			#elif defined(_MSC_VER)
30329			unsigned long i;
30330
30331			_BitScanReverse(&i, v);
30332			return i;
30333			#else
30334			unsigned i = 0;
30335
30336			while ((v >>= 1) != 0)
30337			i++;
30338			return i;
30339			#endif
30340			}
30341
30342			static forceinline unsigned
30343			bsr64(u64 v)
30344			{
30345			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
30346			return 63 - __builtin_clzll(v);
30347			#elif defined(_MSC_VER) && defined(_WIN64)
30348			unsigned long i;
30349
30350			_BitScanReverse64(&i, v);
30351			return i;
30352			#else
30353			unsigned i = 0;
30354
30355			while ((v >>= 1) != 0)
30356			i++;
30357			return i;
30358			#endif
30359			}
30360
30361			static forceinline unsigned
30362			bsrw(machine_word_t v)
30363			{
30364			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
30365			if (WORDBITS == 32)
30366			return bsr32(v);
30367			else
30368			return bsr64(v);
30369			}
30370
30371
30372
30373			static forceinline unsigned
30374			bsf32(u32 v)
30375			{
30376			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
30377			return __builtin_ctz(v);
30378			#elif defined(_MSC_VER)
30379			unsigned long i;
30380
30381			_BitScanForward(&i, v);
30382			return i;
30383			#else
30384			unsigned i = 0;
30385
30386			for (; (v & 1) == 0; v >>= 1)
30387			i++;
30388			return i;
30389			#endif
30390			}
30391
30392			static forceinline unsigned
30393			bsf64(u64 v)
30394			{
30395			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
30396			return __builtin_ctzll(v);
30397			#elif defined(_MSC_VER) && defined(_WIN64)
30398			unsigned long i;
30399
30400			_BitScanForward64(&i, v);
30401			return i;
30402			#else
30403			unsigned i = 0;
30404
30405			for (; (v & 1) == 0; v >>= 1)
30406			i++;
30407			return i;
30408			#endif
30409			}
30410
30411			static forceinline unsigned
30412			bsfw(machine_word_t v)
30413			{
30414			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
30415			if (WORDBITS == 32)
30416			return bsf32(v);
30417			else
30418			return bsf64(v);
30419			}
30420
30421
30422			#undef rbit32
30423			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
30424			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
30425			static forceinline u32
30426			rbit32(u32 v)
30427			{
30428			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
30429			return v;
30430			}
30431			#define rbit32 rbit32
30432			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
30433			static forceinline u32
30434			rbit32(u32 v)
30435			{
30436			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
30437			return v;
30438			}
30439			#define rbit32 rbit32
30440			#endif
30441
30442			#endif
30443
30444
30445			typedef void (malloc_func_t)(size_t);
30446			typedef void (free_func_t)(void );
30447
30448			extern malloc_func_t libdeflate_default_malloc_func;
30449			extern free_func_t libdeflate_default_free_func;
30450
30451			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
30452			size_t alignment, size_t size);
30453			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
30454
30455			#ifdef FREESTANDING
30456
30457			void memset(void s, int c, size_t n);
30458			#define memset(s, c, n) __builtin_memset((s), (c), (n))
30459
30460			void memcpy(void dest, const void *src, size_t n);
30461			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
30462
30463			void memmove(void dest, const void *src, size_t n);
30464			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
30465
30466			int memcmp(const void s1, const void s2, size_t n);
30467			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
30468
30469			#undef LIBDEFLATE_ENABLE_ASSERTIONS
30470			#else
30471			# include
30472
30473			# ifdef __clang_analyzer__
30474			# define LIBDEFLATE_ENABLE_ASSERTIONS
30475			# endif
30476			#endif
30477
30478
30479			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
30480			NORETURN void
30481			libdeflate_assertion_failed(const char expr, const char file, int line);
30482			#define ASSERT(expr) { if (unlikely(!(expr))) \
30483			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
30484			#else
30485			#define ASSERT(expr) (void)(expr)
30486			#endif
30487
30488			#define CONCAT_IMPL(a, b) a##b
30489			#define CONCAT(a, b) CONCAT_IMPL(a, b)
30490			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
30491
30492			#endif
30493
30494
30495			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
30496
30497			#define X86_CPU_FEATURE_SSE2 (1 << 0)
30498			#define X86_CPU_FEATURE_PCLMULQDQ (1 << 1)
30499			#define X86_CPU_FEATURE_AVX (1 << 2)
30500			#define X86_CPU_FEATURE_AVX2 (1 << 3)
30501			#define X86_CPU_FEATURE_BMI2 (1 << 4)
30502
30503			#define X86_CPU_FEATURE_ZMM (1 << 5)
30504			#define X86_CPU_FEATURE_AVX512BW (1 << 6)
30505			#define X86_CPU_FEATURE_AVX512VL (1 << 7)
30506			#define X86_CPU_FEATURE_VPCLMULQDQ (1 << 8)
30507			#define X86_CPU_FEATURE_AVX512VNNI (1 << 9)
30508			#define X86_CPU_FEATURE_AVXVNNI (1 << 10)
30509
30510			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
30511
30512			# define X86_CPU_FEATURES_KNOWN (1U << 31)
30513			extern volatile u32 libdeflate_x86_cpu_features;
30514
30515			void libdeflate_init_x86_cpu_features(void);
30516
30517			static inline u32 get_x86_cpu_features(void)
30518			{
30519			if (libdeflate_x86_cpu_features == 0)
30520			libdeflate_init_x86_cpu_features();
30521			return libdeflate_x86_cpu_features;
30522			}
30523
30524			# include
30525			# if defined(_MSC_VER) && defined(__clang__)
30526			# include
30527			# include
30528			# include
30529			# include
30530			# include
30531			# include
30532			# include
30533			# include
30534			# if __has_include()
30535			# include
30536			# endif
30537			# if __has_include()
30538			# include
30539			# endif
30540			# if __has_include()
30541			# include
30542			# endif
30543			# if __has_include()
30544			# include
30545			# endif
30546			# if __has_include()
30547			# include
30548			# endif
30549			# endif
30550			#else
30551			static inline u32 get_x86_cpu_features(void) { return 0; }
30552			#endif
30553
30554			#if defined(__SSE2__) \|\| \
30555			(defined(_MSC_VER) && \
30556			(defined(ARCH_X86_64) \|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
30557			# define HAVE_SSE2(features) 1
30558			# define HAVE_SSE2_NATIVE 1
30559			#else
30560			# define HAVE_SSE2(features) ((features) & X86_CPU_FEATURE_SSE2)
30561			# define HAVE_SSE2_NATIVE 0
30562			#endif
30563
30564			#if (defined(__PCLMUL__) && defined(__SSE4_1__)) \|\| \
30565			(defined(_MSC_VER) && defined(__AVX2__))
30566			# define HAVE_PCLMULQDQ(features) 1
30567			#else
30568			# define HAVE_PCLMULQDQ(features) ((features) & X86_CPU_FEATURE_PCLMULQDQ)
30569			#endif
30570
30571			#ifdef __AVX__
30572			# define HAVE_AVX(features) 1
30573			#else
30574			# define HAVE_AVX(features) ((features) & X86_CPU_FEATURE_AVX)
30575			#endif
30576
30577			#ifdef __AVX2__
30578			# define HAVE_AVX2(features) 1
30579			#else
30580			# define HAVE_AVX2(features) ((features) & X86_CPU_FEATURE_AVX2)
30581			#endif
30582
30583			#if defined(__BMI2__) \|\| (defined(_MSC_VER) && defined(__AVX2__))
30584			# define HAVE_BMI2(features) 1
30585			# define HAVE_BMI2_NATIVE 1
30586			#else
30587			# define HAVE_BMI2(features) ((features) & X86_CPU_FEATURE_BMI2)
30588			# define HAVE_BMI2_NATIVE 0
30589			#endif
30590
30591			#ifdef __AVX512BW__
30592			# define HAVE_AVX512BW(features) 1
30593			#else
30594			# define HAVE_AVX512BW(features) ((features) & X86_CPU_FEATURE_AVX512BW)
30595			#endif
30596
30597			#ifdef __AVX512VL__
30598			# define HAVE_AVX512VL(features) 1
30599			#else
30600			# define HAVE_AVX512VL(features) ((features) & X86_CPU_FEATURE_AVX512VL)
30601			#endif
30602
30603			#ifdef __VPCLMULQDQ__
30604			# define HAVE_VPCLMULQDQ(features) 1
30605			#else
30606			# define HAVE_VPCLMULQDQ(features) ((features) & X86_CPU_FEATURE_VPCLMULQDQ)
30607			#endif
30608
30609			#ifdef __AVX512VNNI__
30610			# define HAVE_AVX512VNNI(features) 1
30611			#else
30612			# define HAVE_AVX512VNNI(features) ((features) & X86_CPU_FEATURE_AVX512VNNI)
30613			#endif
30614
30615			#ifdef __AVXVNNI__
30616			# define HAVE_AVXVNNI(features) 1
30617			#else
30618			# define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI)
30619			#endif
30620
30621			#endif
30622
30623			#endif
30624
30625
30626
30627			#if defined(__GNUC__) \|\| defined(__clang__) \|\| MSVC_PREREQ(1930)
30628			# define deflate_decompress_bmi2 deflate_decompress_bmi2
30629			# define FUNCNAME deflate_decompress_bmi2
30630			# define ATTRIBUTES _target_attribute("bmi2")
30631
30632			# ifndef __clang__
30633			# ifdef ARCH_X86_64
30634			# define EXTRACT_VARBITS(word, count) _bzhi_u64((word), (count))
30635			# define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count))
30636			# else
30637			# define EXTRACT_VARBITS(word, count) _bzhi_u32((word), (count))
30638			# define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count))
30639			# endif
30640			# endif
30641			/* #include "decompress_template.h" */
30642
30643
30644
30645
30646			#ifndef ATTRIBUTES
30647			# define ATTRIBUTES
30648			#endif
30649			#ifndef EXTRACT_VARBITS
30650			# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count))
30651			#endif
30652			#ifndef EXTRACT_VARBITS8
30653			# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count)))
30654			#endif
30655
30656			static ATTRIBUTES MAYBE_UNUSED enum libdeflate_result
30657	40		FUNCNAME(struct libdeflate_decompressor * restrict d,
30658			const void * restrict in, size_t in_nbytes,
30659			void * restrict out, size_t out_nbytes_avail,
30660			size_t actual_in_nbytes_ret, size_t actual_out_nbytes_ret)
30661			{
30662	40		u8 *out_next = out;
30663	40		u8 * const out_end = out_next + out_nbytes_avail;
30664	40		u8 * const out_fastloop_end =
30665	40	50	out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN);
30666
30667
30668	40		const u8 *in_next = in;
30669	40		const u8 * const in_end = in_next + in_nbytes;
30670	40		const u8 * const in_fastloop_end =
30671	40	50	in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ);
30672	40		bitbuf_t bitbuf = 0;
30673			bitbuf_t saved_bitbuf;
30674	40		u32 bitsleft = 0;
30675	40		size_t overread_count = 0;
30676
30677			bool is_final_block;
30678			unsigned block_type;
30679			unsigned num_litlen_syms;
30680			unsigned num_offset_syms;
30681			bitbuf_t litlen_tablemask;
30682			u32 entry;
30683
30684	41		next_block:
30685
30686			;
30687
30688			STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3));
30689	82	50	REFILL_BITS();
		0
		0
		0
30690
30691
30692	41		is_final_block = bitbuf & BITMASK(1);
30693
30694
30695	41		block_type = (bitbuf >> 1) & BITMASK(2);
30696
30697	41	100	if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) {
30698
30699
30700
30701
30702			static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = {
30703			16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
30704			};
30705
30706			unsigned num_explicit_precode_lens;
30707			unsigned i;
30708
30709
30710
30711			STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5));
30712	5		num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5));
30713
30714			STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5));
30715	5		num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5));
30716
30717			STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4));
30718	5		num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4));
30719
30720	5		d->static_codes_loaded = false;
30721
30722
30723			STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1);
30724			if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) {
30725	5		d->u.precode_lens[deflate_precode_lens_permutation[0]] =
30726	5		(bitbuf >> 17) & BITMASK(3);
30727	5		bitbuf >>= 20;
30728	5		bitsleft -= 20;
30729	10	50	REFILL_BITS();
		0
		0
		0
30730	5		i = 1;
30731			do {
30732	65		d->u.precode_lens[deflate_precode_lens_permutation[i]] =
30733	65		bitbuf & BITMASK(3);
30734	65		bitbuf >>= 3;
30735	65		bitsleft -= 3;
30736	65	100	} while (++i < num_explicit_precode_lens);
30737			} else {
30738			bitbuf >>= 17;
30739			bitsleft -= 17;
30740			i = 0;
30741			do {
30742			if ((u8)bitsleft < 3)
30743			REFILL_BITS();
30744			d->u.precode_lens[deflate_precode_lens_permutation[i]] =
30745			bitbuf & BITMASK(3);
30746			bitbuf >>= 3;
30747			bitsleft -= 3;
30748			} while (++i < num_explicit_precode_lens);
30749			}
30750	30	100	for (; i < DEFLATE_NUM_PRECODE_SYMS; i++)
30751	25		d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0;
30752
30753
30754	5	50	SAFETY_CHECK(build_precode_decode_table(d));
30755
30756
30757	5		i = 0;
30758			do {
30759			unsigned presym;
30760			u8 rep_val;
30761			unsigned rep_count;
30762
30763	674	100	if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7)
30764	110	50	REFILL_BITS();
		0
		0
		0
30765
30766
30767			STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN);
30768
30769
30770	674		entry = d->u.l.precode_decode_table[
30771	674		bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)];
30772	674		bitbuf >>= (u8)entry;
30773	674		bitsleft -= entry;
30774	674		presym = entry >> 16;
30775
30776	674	100	if (presym < 16) {
30777
30778	626		d->u.l.lens[i++] = presym;
30779	626		continue;
30780			}
30781
30782
30783
30784
30785			STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1);
30786
30787	48	100	if (presym == 16) {
30788
30789	15	50	SAFETY_CHECK(i != 0);
30790	15		rep_val = d->u.l.lens[i - 1];
30791			STATIC_ASSERT(3 + BITMASK(2) == 6);
30792	15		rep_count = 3 + (bitbuf & BITMASK(2));
30793	15		bitbuf >>= 2;
30794	15		bitsleft -= 2;
30795	15		d->u.l.lens[i + 0] = rep_val;
30796	15		d->u.l.lens[i + 1] = rep_val;
30797	15		d->u.l.lens[i + 2] = rep_val;
30798	15		d->u.l.lens[i + 3] = rep_val;
30799	15		d->u.l.lens[i + 4] = rep_val;
30800	15		d->u.l.lens[i + 5] = rep_val;
30801	15		i += rep_count;
30802	33	100	} else if (presym == 17) {
30803
30804			STATIC_ASSERT(3 + BITMASK(3) == 10);
30805	15		rep_count = 3 + (bitbuf & BITMASK(3));
30806	15		bitbuf >>= 3;
30807	15		bitsleft -= 3;
30808	15		d->u.l.lens[i + 0] = 0;
30809	15		d->u.l.lens[i + 1] = 0;
30810	15		d->u.l.lens[i + 2] = 0;
30811	15		d->u.l.lens[i + 3] = 0;
30812	15		d->u.l.lens[i + 4] = 0;
30813	15		d->u.l.lens[i + 5] = 0;
30814	15		d->u.l.lens[i + 6] = 0;
30815	15		d->u.l.lens[i + 7] = 0;
30816	15		d->u.l.lens[i + 8] = 0;
30817	15		d->u.l.lens[i + 9] = 0;
30818	15		i += rep_count;
30819			} else {
30820
30821			STATIC_ASSERT(11 + BITMASK(7) == 138);
30822	18		rep_count = 11 + (bitbuf & BITMASK(7));
30823	18		bitbuf >>= 7;
30824	18		bitsleft -= 7;
30825	18		memset(&d->u.l.lens[i], 0,
30826			rep_count * sizeof(d->u.l.lens[i]));
30827	18		i += rep_count;
30828			}
30829	674	100	} while (i < num_litlen_syms + num_offset_syms);
30830
30831
30832	5	50	SAFETY_CHECK(i == num_litlen_syms + num_offset_syms);
30833
30834	36	50	} else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) {
30835			u16 len, nlen;
30836
30837
30838
30839	0		bitsleft -= 3;
30840
30841
30842	0		bitsleft = (u8)bitsleft;
30843	0	0	SAFETY_CHECK(overread_count <= (bitsleft >> 3));
30844	0		in_next -= (bitsleft >> 3) - overread_count;
30845	0		overread_count = 0;
30846	0		bitbuf = 0;
30847	0		bitsleft = 0;
30848
30849	0	0	SAFETY_CHECK(in_end - in_next >= 4);
30850	0		len = get_unaligned_le16(in_next);
30851	0		nlen = get_unaligned_le16(in_next + 2);
30852	0		in_next += 4;
30853
30854	0	0	SAFETY_CHECK(len == (u16)~nlen);
30855	0	0	if (unlikely(len > out_end - out_next))
30856	0		return LIBDEFLATE_INSUFFICIENT_SPACE;
30857	0	0	SAFETY_CHECK(len <= in_end - in_next);
30858
30859	0		memcpy(out_next, in_next, len);
30860	0		in_next += len;
30861	0		out_next += len;
30862
30863	0		goto block_done;
30864
30865			} else {
30866			unsigned i;
30867
30868	36	50	SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN);
30869
30870
30871
30872	36		bitbuf >>= 3;
30873	36		bitsleft -= 3;
30874
30875	36	50	if (d->static_codes_loaded)
30876	0		goto have_decode_tables;
30877
30878	36		d->static_codes_loaded = true;
30879
30880			STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288);
30881			STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32);
30882
30883	5220	100	for (i = 0; i < 144; i++)
30884	5184		d->u.l.lens[i] = 8;
30885	4068	100	for (; i < 256; i++)
30886	4032		d->u.l.lens[i] = 9;
30887	900	100	for (; i < 280; i++)
30888	864		d->u.l.lens[i] = 7;
30889	324	100	for (; i < 288; i++)
30890	288		d->u.l.lens[i] = 8;
30891
30892	1188	100	for (; i < 288 + 32; i++)
30893	1152		d->u.l.lens[i] = 5;
30894
30895	36		num_litlen_syms = 288;
30896	36		num_offset_syms = 32;
30897			}
30898
30899
30900
30901	41	50	SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms));
30902	41	50	SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms));
30903	41		have_decode_tables:
30904	41		litlen_tablemask = BITMASK(d->litlen_tablebits);
30905
30906
30907	41	50	if (in_next >= in_fastloop_end \|\| out_next >= out_fastloop_end)
		50
30908	0		goto generic_loop;
30909	41		REFILL_BITS_IN_FASTLOOP();
30910	41		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
30911			do {
30912			u32 length, offset, lit;
30913			const u8 *src;
30914			u8 *dst;
30915
30916
30917	10151		saved_bitbuf = bitbuf;
30918	10151		bitbuf >>= (u8)entry;
30919	10151		bitsleft -= entry;
30920
30921
30922	10151	100	if (entry & HUFFDEC_LITERAL) {
30923
30924			if (
30925			CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
30926			LENGTH_MAXBITS,
30927			OFFSET_TABLEBITS) &&
30928
30929			CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS +
30930			DEFLATE_MAX_LITLEN_CODEWORD_LEN,
30931			LITLEN_TABLEBITS)) {
30932
30933	3886		lit = entry >> 16;
30934	3886		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
30935	3886		saved_bitbuf = bitbuf;
30936	3886		bitbuf >>= (u8)entry;
30937	3886		bitsleft -= entry;
30938	3886		*out_next++ = lit;
30939	3886	100	if (entry & HUFFDEC_LITERAL) {
30940
30941	2617		lit = entry >> 16;
30942	2617		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
30943	2617		saved_bitbuf = bitbuf;
30944	2617		bitbuf >>= (u8)entry;
30945	2617		bitsleft -= entry;
30946	2617		*out_next++ = lit;
30947	2617	100	if (entry & HUFFDEC_LITERAL) {
30948
30949	1951		lit = entry >> 16;
30950	1951		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
30951	1951		REFILL_BITS_IN_FASTLOOP();
30952	1951		*out_next++ = lit;
30953	1951		continue;
30954			}
30955			}
30956			} else {
30957
30958			STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(
30959			LITLEN_TABLEBITS, LITLEN_TABLEBITS));
30960			lit = entry >> 16;
30961			entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
30962			REFILL_BITS_IN_FASTLOOP();
30963			*out_next++ = lit;
30964			continue;
30965			}
30966			}
30967
30968
30969	8200	100	if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
30970
30971
30972	28	100	if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
30973	1		goto block_done;
30974
30975
30976	54		entry = d->u.litlen_decode_table[(entry >> 16) +
30977	27		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
30978	27		saved_bitbuf = bitbuf;
30979	27		bitbuf >>= (u8)entry;
30980	27		bitsleft -= entry;
30981
30982
30983			if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN,
30984			LITLEN_TABLEBITS) \|\|
30985			!CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS,
30986			OFFSET_TABLEBITS))
30987			REFILL_BITS_IN_FASTLOOP();
30988	27	50	if (entry & HUFFDEC_LITERAL) {
30989
30990	27		lit = entry >> 16;
30991	27		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
30992	27		REFILL_BITS_IN_FASTLOOP();
30993	27		*out_next++ = lit;
30994	27		continue;
30995			}
30996	0	0	if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
30997	0		goto block_done;
30998
30999			}
31000
31001
31002	8172		length = entry >> 16;
31003	8172		length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
31004
31005
31006			STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS,
31007			OFFSET_TABLEBITS));
31008	8172		entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
31009			if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS,
31010			LITLEN_TABLEBITS)) {
31011
31012	8172	100	if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
31013
31014	17	50	if (unlikely((u8)bitsleft < OFFSET_MAXBITS +
31015			LITLEN_TABLEBITS - PRELOAD_SLACK))
31016	0		REFILL_BITS_IN_FASTLOOP();
31017	17		bitbuf >>= OFFSET_TABLEBITS;
31018	17		bitsleft -= OFFSET_TABLEBITS;
31019	34		entry = d->offset_decode_table[(entry >> 16) +
31020	34		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
31021	8155	100	} else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS +
31022			LITLEN_TABLEBITS - PRELOAD_SLACK))
31023	1		REFILL_BITS_IN_FASTLOOP();
31024			} else {
31025
31026			REFILL_BITS_IN_FASTLOOP();
31027			if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
31028
31029			bitbuf >>= OFFSET_TABLEBITS;
31030			bitsleft -= OFFSET_TABLEBITS;
31031			entry = d->offset_decode_table[(entry >> 16) +
31032			EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
31033			REFILL_BITS_IN_FASTLOOP();
31034
31035			STATIC_ASSERT(CAN_CONSUME(
31036			OFFSET_MAXBITS - OFFSET_TABLEBITS));
31037			} else {
31038
31039			STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS));
31040			}
31041			}
31042	8172		saved_bitbuf = bitbuf;
31043	8172		bitbuf >>= (u8)entry;
31044	8172		bitsleft -= entry;
31045	8172		offset = entry >> 16;
31046	8172		offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
31047
31048
31049	8172	50	SAFETY_CHECK(offset <= out_next - (const u8 *)out);
31050	8172		src = out_next - offset;
31051	8172		dst = out_next;
31052	8172		out_next += length;
31053
31054
31055			if (!CAN_CONSUME_AND_THEN_PRELOAD(
31056			MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS,
31057			OFFSET_MAXFASTBITS),
31058			LITLEN_TABLEBITS) &&
31059			unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK))
31060			REFILL_BITS_IN_FASTLOOP();
31061	8172		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
31062	8172		REFILL_BITS_IN_FASTLOOP();
31063
31064
31065	8172	100	if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) {
31066	8163		store_word_unaligned(load_word_unaligned(src), dst);
31067	8163		src += WORDBYTES;
31068	8163		dst += WORDBYTES;
31069	8163		store_word_unaligned(load_word_unaligned(src), dst);
31070	8163		src += WORDBYTES;
31071	8163		dst += WORDBYTES;
31072	8163		store_word_unaligned(load_word_unaligned(src), dst);
31073	8163		src += WORDBYTES;
31074	8163		dst += WORDBYTES;
31075	8163		store_word_unaligned(load_word_unaligned(src), dst);
31076	8163		src += WORDBYTES;
31077	8163		dst += WORDBYTES;
31078	8163		store_word_unaligned(load_word_unaligned(src), dst);
31079	8163		src += WORDBYTES;
31080	8163		dst += WORDBYTES;
31081	8430	100	while (dst < out_next) {
31082	267		store_word_unaligned(load_word_unaligned(src), dst);
31083	267		src += WORDBYTES;
31084	267		dst += WORDBYTES;
31085	267		store_word_unaligned(load_word_unaligned(src), dst);
31086	267		src += WORDBYTES;
31087	267		dst += WORDBYTES;
31088	267		store_word_unaligned(load_word_unaligned(src), dst);
31089	267		src += WORDBYTES;
31090	267		dst += WORDBYTES;
31091	267		store_word_unaligned(load_word_unaligned(src), dst);
31092	267		src += WORDBYTES;
31093	267		dst += WORDBYTES;
31094	267		store_word_unaligned(load_word_unaligned(src), dst);
31095	267		src += WORDBYTES;
31096	267		dst += WORDBYTES;
31097			}
31098	9	50	} else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) {
31099			machine_word_t v;
31100
31101
31102	0		v = (machine_word_t)0x0101010101010101 * src[0];
31103	0		store_word_unaligned(v, dst);
31104	0		dst += WORDBYTES;
31105	0		store_word_unaligned(v, dst);
31106	0		dst += WORDBYTES;
31107	0		store_word_unaligned(v, dst);
31108	0		dst += WORDBYTES;
31109	0		store_word_unaligned(v, dst);
31110	0		dst += WORDBYTES;
31111	0	0	while (dst < out_next) {
31112	0		store_word_unaligned(v, dst);
31113	0		dst += WORDBYTES;
31114	0		store_word_unaligned(v, dst);
31115	0		dst += WORDBYTES;
31116	0		store_word_unaligned(v, dst);
31117	0		dst += WORDBYTES;
31118	0		store_word_unaligned(v, dst);
31119	0		dst += WORDBYTES;
31120			}
31121			} else if (UNALIGNED_ACCESS_IS_FAST) {
31122	9		store_word_unaligned(load_word_unaligned(src), dst);
31123	9		src += offset;
31124	9		dst += offset;
31125	9		store_word_unaligned(load_word_unaligned(src), dst);
31126	9		src += offset;
31127	9		dst += offset;
31128			do {
31129	9		store_word_unaligned(load_word_unaligned(src), dst);
31130	9		src += offset;
31131	9		dst += offset;
31132	9		store_word_unaligned(load_word_unaligned(src), dst);
31133	9		src += offset;
31134	9		dst += offset;
31135	9	50	} while (dst < out_next);
31136			} else {
31137			dst++ = src++;
31138			dst++ = src++;
31139			do {
31140			dst++ = src++;
31141			} while (dst < out_next);
31142			}
31143	10150	100	} while (in_next < in_fastloop_end && out_next < out_fastloop_end);
		100
31144
31145
31146	40		generic_loop:
31147	930		for (;;) {
31148			u32 length, offset;
31149			const u8 *src;
31150			u8 *dst;
31151
31152	2172	100	REFILL_BITS();
		100
		50
		100
31153	970		entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask];
31154	970		saved_bitbuf = bitbuf;
31155	970		bitbuf >>= (u8)entry;
31156	970		bitsleft -= entry;
31157	970	100	if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) {
31158	6		entry = d->u.litlen_decode_table[(entry >> 16) +
31159	3		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
31160	3		saved_bitbuf = bitbuf;
31161	3		bitbuf >>= (u8)entry;
31162	3		bitsleft -= entry;
31163			}
31164	970		length = entry >> 16;
31165	970	100	if (entry & HUFFDEC_LITERAL) {
31166	588	50	if (unlikely(out_next == out_end))
31167	0		return LIBDEFLATE_INSUFFICIENT_SPACE;
31168	588		*out_next++ = length;
31169	588		continue;
31170			}
31171	382	100	if (unlikely(entry & HUFFDEC_END_OF_BLOCK))
31172	40		goto block_done;
31173	342		length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8);
31174	342	50	if (unlikely(length > out_end - out_next))
31175	0		return LIBDEFLATE_INSUFFICIENT_SPACE;
31176
31177			if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS))
31178			REFILL_BITS();
31179	342		entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)];
31180	342	50	if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) {
31181	0		bitbuf >>= OFFSET_TABLEBITS;
31182	0		bitsleft -= OFFSET_TABLEBITS;
31183	0		entry = d->offset_decode_table[(entry >> 16) +
31184	0		EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)];
31185			if (!CAN_CONSUME(OFFSET_MAXBITS))
31186			REFILL_BITS();
31187			}
31188	342		offset = entry >> 16;
31189	342		offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8);
31190	342		bitbuf >>= (u8)entry;
31191	342		bitsleft -= entry;
31192
31193	342	50	SAFETY_CHECK(offset <= out_next - (const u8 *)out);
31194	342		src = out_next - offset;
31195	342		dst = out_next;
31196	342		out_next += length;
31197
31198			STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3);
31199	342		dst++ = src++;
31200	342		dst++ = src++;
31201			do {
31202	60992		dst++ = src++;
31203	60992	100	} while (dst < out_next);
31204			}
31205
31206	41		block_done:
31207
31208
31209	41	100	if (!is_final_block)
31210	1		goto next_block;
31211
31212
31213
31214	40		bitsleft = (u8)bitsleft;
31215
31216
31217	40	50	SAFETY_CHECK(overread_count <= (bitsleft >> 3));
31218
31219
31220	40	50	if (actual_in_nbytes_ret) {
31221
31222	40		in_next -= (bitsleft >> 3) - overread_count;
31223
31224	40		actual_in_nbytes_ret = in_next - (u8 )in;
31225			}
31226
31227
31228	40	50	if (actual_out_nbytes_ret) {
31229	40		actual_out_nbytes_ret = out_next - (u8 )out;
31230			} else {
31231	0	0	if (out_next != out_end)
31232	0		return LIBDEFLATE_SHORT_OUTPUT;
31233			}
31234	40		return LIBDEFLATE_SUCCESS;
31235			}
31236
31237			#undef FUNCNAME
31238			#undef ATTRIBUTES
31239			#undef EXTRACT_VARBITS
31240			#undef EXTRACT_VARBITS8
31241
31242			#endif
31243
31244			#if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE
31245			#define DEFAULT_IMPL deflate_decompress_bmi2
31246			#else
31247			static inline decompress_func_t
31248	2		arch_select_decompress_func(void)
31249			{
31250			#ifdef deflate_decompress_bmi2
31251	2	50	if (HAVE_BMI2(get_x86_cpu_features()))
31252	2		return deflate_decompress_bmi2;
31253			#endif
31254	0		return NULL;
31255			}
31256			#define arch_select_decompress_func arch_select_decompress_func
31257			#endif
31258
31259			#endif
31260
31261			#endif
31262
31263			#ifndef DEFAULT_IMPL
31264			# define DEFAULT_IMPL deflate_decompress_default
31265			#endif
31266
31267			#ifdef arch_select_decompress_func
31268			static enum libdeflate_result
31269			dispatch_decomp(struct libdeflate_decompressor *d,
31270			const void *in, size_t in_nbytes,
31271			void *out, size_t out_nbytes_avail,
31272			size_t actual_in_nbytes_ret, size_t actual_out_nbytes_ret);
31273
31274			static volatile decompress_func_t decompress_impl = dispatch_decomp;
31275
31276
31277			static enum libdeflate_result
31278	2		dispatch_decomp(struct libdeflate_decompressor *d,
31279			const void *in, size_t in_nbytes,
31280			void *out, size_t out_nbytes_avail,
31281			size_t actual_in_nbytes_ret, size_t actual_out_nbytes_ret)
31282			{
31283	2		decompress_func_t f = arch_select_decompress_func();
31284
31285	2	50	if (f == NULL)
31286	0		f = DEFAULT_IMPL;
31287
31288	2		decompress_impl = f;
31289	2		return f(d, in, in_nbytes, out, out_nbytes_avail,
31290			actual_in_nbytes_ret, actual_out_nbytes_ret);
31291			}
31292			#else
31293
31294			# define decompress_impl DEFAULT_IMPL
31295			#endif
31296
31297
31298			LIBDEFLATEAPI enum libdeflate_result
31299	40		libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d,
31300			const void *in, size_t in_nbytes,
31301			void *out, size_t out_nbytes_avail,
31302			size_t *actual_in_nbytes_ret,
31303			size_t *actual_out_nbytes_ret)
31304			{
31305	40		return decompress_impl(d, in, in_nbytes, out, out_nbytes_avail,
31306			actual_in_nbytes_ret, actual_out_nbytes_ret);
31307			}
31308
31309			LIBDEFLATEAPI enum libdeflate_result
31310	0		libdeflate_deflate_decompress(struct libdeflate_decompressor *d,
31311			const void *in, size_t in_nbytes,
31312			void *out, size_t out_nbytes_avail,
31313			size_t *actual_out_nbytes_ret)
31314			{
31315	0		return libdeflate_deflate_decompress_ex(d, in, in_nbytes,
31316			out, out_nbytes_avail,
31317			NULL, actual_out_nbytes_ret);
31318			}
31319
31320			LIBDEFLATEAPI struct libdeflate_decompressor *
31321	38		libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options)
31322			{
31323			struct libdeflate_decompressor *d;
31324
31325
31326	38	50	if (options->sizeof_options != sizeof(*options))
31327	0		return NULL;
31328
31329	38	50	d = (options->malloc_func ? options->malloc_func :
31330			libdeflate_default_malloc_func)(sizeof(*d));
31331	38	50	if (d == NULL)
31332	0		return NULL;
31333
31334	38		memset(d, 0, sizeof(*d));
31335	76		d->free_func = options->free_func ?
31336	38	50	options->free_func : libdeflate_default_free_func;
31337	38		return d;
31338			}
31339
31340			LIBDEFLATEAPI struct libdeflate_decompressor *
31341	38		libdeflate_alloc_decompressor(void)
31342			{
31343			static const struct libdeflate_options defaults = {
31344			.sizeof_options = sizeof(defaults),
31345			};
31346	38		return libdeflate_alloc_decompressor_ex(&defaults);
31347			}
31348
31349			LIBDEFLATEAPI void
31350	38		libdeflate_free_decompressor(struct libdeflate_decompressor *d)
31351			{
31352	38	50	if (d)
31353	38		d->free_func(d);
31354	38		}
31355			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/gzip_compress.c */
31356
31357
31358			/* #include "deflate_compress.h" */
31359			#ifndef LIB_DEFLATE_COMPRESS_H
31360			#define LIB_DEFLATE_COMPRESS_H
31361
31362			/* #include "lib_common.h" */
31363
31364
31365			#ifndef LIB_LIB_COMMON_H
31366			#define LIB_LIB_COMMON_H
31367
31368			#ifdef LIBDEFLATE_H
31369
31370			# error "lib_common.h must always be included before libdeflate.h"
31371			#endif
31372
31373			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
31374			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
31375			#elif defined(__GNUC__)
31376			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
31377			#else
31378			# define LIBDEFLATE_EXPORT_SYM
31379			#endif
31380
31381
31382			#if defined(__GNUC__) && defined(__i386__)
31383			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
31384			#else
31385			# define LIBDEFLATE_ALIGN_STACK
31386			#endif
31387
31388			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
31389
31390			/* #include "../common_defs.h" */
31391
31392
31393			#ifndef COMMON_DEFS_H
31394			#define COMMON_DEFS_H
31395
31396			/* #include "libdeflate.h" */
31397
31398
31399			#ifndef LIBDEFLATE_H
31400			#define LIBDEFLATE_H
31401
31402			#include
31403			#include
31404
31405			#ifdef __cplusplus
31406			extern "C" {
31407			#endif
31408
31409			#define LIBDEFLATE_VERSION_MAJOR 1
31410			#define LIBDEFLATE_VERSION_MINOR 25
31411			#define LIBDEFLATE_VERSION_STRING "1.25"
31412
31413
31414			#ifndef LIBDEFLATEAPI
31415			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
31416			# define LIBDEFLATEAPI __declspec(dllimport)
31417			# else
31418			# define LIBDEFLATEAPI
31419			# endif
31420			#endif
31421
31422
31423
31424
31425
31426			struct libdeflate_compressor;
31427			struct libdeflate_options;
31428
31429
31430			LIBDEFLATEAPI struct libdeflate_compressor *
31431			libdeflate_alloc_compressor(int compression_level);
31432
31433
31434			LIBDEFLATEAPI struct libdeflate_compressor *
31435			libdeflate_alloc_compressor_ex(int compression_level,
31436			const struct libdeflate_options *options);
31437
31438
31439			LIBDEFLATEAPI size_t
31440			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
31441			const void *in, size_t in_nbytes,
31442			void *out, size_t out_nbytes_avail);
31443
31444
31445			LIBDEFLATEAPI size_t
31446			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
31447			size_t in_nbytes);
31448
31449
31450			LIBDEFLATEAPI size_t
31451			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
31452			const void *in, size_t in_nbytes,
31453			void *out, size_t out_nbytes_avail);
31454
31455
31456			LIBDEFLATEAPI size_t
31457			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
31458			size_t in_nbytes);
31459
31460
31461			LIBDEFLATEAPI size_t
31462			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
31463			const void *in, size_t in_nbytes,
31464			void *out, size_t out_nbytes_avail);
31465
31466
31467			LIBDEFLATEAPI size_t
31468			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
31469			size_t in_nbytes);
31470
31471
31472			LIBDEFLATEAPI void
31473			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
31474
31475
31476
31477
31478
31479			struct libdeflate_decompressor;
31480			struct libdeflate_options;
31481
31482
31483			LIBDEFLATEAPI struct libdeflate_decompressor *
31484			libdeflate_alloc_decompressor(void);
31485
31486
31487			LIBDEFLATEAPI struct libdeflate_decompressor *
31488			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
31489
31490
31491			enum libdeflate_result {
31492
31493			LIBDEFLATE_SUCCESS = 0,
31494
31495
31496			LIBDEFLATE_BAD_DATA = 1,
31497
31498
31499			LIBDEFLATE_SHORT_OUTPUT = 2,
31500
31501
31502			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
31503			};
31504
31505
31506			LIBDEFLATEAPI enum libdeflate_result
31507			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
31508			const void *in, size_t in_nbytes,
31509			void *out, size_t out_nbytes_avail,
31510			size_t *actual_out_nbytes_ret);
31511
31512
31513			LIBDEFLATEAPI enum libdeflate_result
31514			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
31515			const void *in, size_t in_nbytes,
31516			void *out, size_t out_nbytes_avail,
31517			size_t *actual_in_nbytes_ret,
31518			size_t *actual_out_nbytes_ret);
31519
31520
31521			LIBDEFLATEAPI enum libdeflate_result
31522			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
31523			const void *in, size_t in_nbytes,
31524			void *out, size_t out_nbytes_avail,
31525			size_t *actual_out_nbytes_ret);
31526
31527
31528			LIBDEFLATEAPI enum libdeflate_result
31529			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
31530			const void *in, size_t in_nbytes,
31531			void *out, size_t out_nbytes_avail,
31532			size_t *actual_in_nbytes_ret,
31533			size_t *actual_out_nbytes_ret);
31534
31535
31536			LIBDEFLATEAPI enum libdeflate_result
31537			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
31538			const void *in, size_t in_nbytes,
31539			void *out, size_t out_nbytes_avail,
31540			size_t *actual_out_nbytes_ret);
31541
31542
31543			LIBDEFLATEAPI enum libdeflate_result
31544			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
31545			const void *in, size_t in_nbytes,
31546			void *out, size_t out_nbytes_avail,
31547			size_t *actual_in_nbytes_ret,
31548			size_t *actual_out_nbytes_ret);
31549
31550
31551			LIBDEFLATEAPI void
31552			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
31553
31554
31555
31556
31557
31558
31559			LIBDEFLATEAPI uint32_t
31560			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
31561
31562
31563
31564			LIBDEFLATEAPI uint32_t
31565			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
31566
31567
31568
31569
31570
31571
31572			LIBDEFLATEAPI void
31573			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
31574			void (free_func)(void ));
31575
31576
31577			struct libdeflate_options {
31578
31579
31580			size_t sizeof_options;
31581
31582
31583			void (malloc_func)(size_t);
31584			void (free_func)(void );
31585			};
31586
31587			#ifdef __cplusplus
31588			}
31589			#endif
31590
31591			#endif
31592
31593
31594			#include
31595			#include
31596			#include
31597			#ifdef _MSC_VER
31598			# include
31599			# include
31600
31601
31602			# pragma warning(disable : 4146)
31603
31604			# pragma warning(disable : 4018)
31605			# pragma warning(disable : 4244)
31606			# pragma warning(disable : 4267)
31607			# pragma warning(disable : 4310)
31608
31609			# pragma warning(disable : 4100)
31610			# pragma warning(disable : 4127)
31611			# pragma warning(disable : 4189)
31612			# pragma warning(disable : 4232)
31613			# pragma warning(disable : 4245)
31614			# pragma warning(disable : 4295)
31615			#endif
31616			#ifndef FREESTANDING
31617			# include
31618			#endif
31619
31620
31621
31622
31623
31624
31625			#undef ARCH_X86_64
31626			#undef ARCH_X86_32
31627			#undef ARCH_ARM64
31628			#undef ARCH_ARM32
31629			#undef ARCH_RISCV
31630			#ifdef _MSC_VER
31631
31632			# if defined(_M_X64) && !defined(_M_ARM64EC)
31633			# define ARCH_X86_64
31634			# elif defined(_M_IX86)
31635			# define ARCH_X86_32
31636			# elif defined(_M_ARM64)
31637			# define ARCH_ARM64
31638			# elif defined(_M_ARM)
31639			# define ARCH_ARM32
31640			# endif
31641			#else
31642			# if defined(__x86_64__)
31643			# define ARCH_X86_64
31644			# elif defined(__i386__)
31645			# define ARCH_X86_32
31646			# elif defined(__aarch64__)
31647			# define ARCH_ARM64
31648			# elif defined(__arm__)
31649			# define ARCH_ARM32
31650			# elif defined(__riscv)
31651			# define ARCH_RISCV
31652			# endif
31653			#endif
31654
31655
31656
31657
31658
31659
31660			typedef uint8_t u8;
31661			typedef uint16_t u16;
31662			typedef uint32_t u32;
31663			typedef uint64_t u64;
31664			typedef int8_t s8;
31665			typedef int16_t s16;
31666			typedef int32_t s32;
31667			typedef int64_t s64;
31668
31669
31670			#ifdef _MSC_VER
31671			# ifdef _WIN64
31672			typedef long long ssize_t;
31673			# else
31674			typedef long ssize_t;
31675			# endif
31676			#endif
31677
31678
31679			typedef size_t machine_word_t;
31680
31681
31682			#define WORDBYTES ((int)sizeof(machine_word_t))
31683
31684
31685			#define WORDBITS (8 * WORDBYTES)
31686
31687
31688
31689
31690
31691
31692			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
31693			# define GCC_PREREQ(major, minor) \
31694			(__GNUC__ > (major) \|\| \
31695			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
31696			# if !GCC_PREREQ(4, 9)
31697			# error "gcc versions older than 4.9 are no longer supported"
31698			# endif
31699			#else
31700			# define GCC_PREREQ(major, minor) 0
31701			#endif
31702			#ifdef __clang__
31703			# ifdef __apple_build_version__
31704			# define CLANG_PREREQ(major, minor, apple_version) \
31705			(__apple_build_version__ >= (apple_version))
31706			# else
31707			# define CLANG_PREREQ(major, minor, apple_version) \
31708			(__clang_major__ > (major) \|\| \
31709			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
31710			# endif
31711			# if !CLANG_PREREQ(3, 9, 8000000)
31712			# error "clang versions older than 3.9 are no longer supported"
31713			# endif
31714			#else
31715			# define CLANG_PREREQ(major, minor, apple_version) 0
31716			#endif
31717			#ifdef _MSC_VER
31718			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
31719			# if !MSVC_PREREQ(1900)
31720			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
31721			# endif
31722			#else
31723			# define MSVC_PREREQ(version) 0
31724			#endif
31725
31726
31727			#ifndef __has_attribute
31728			# define __has_attribute(attribute) 0
31729			#endif
31730
31731
31732			#ifndef __has_builtin
31733			# define __has_builtin(builtin) 0
31734			#endif
31735
31736
31737			#ifdef _MSC_VER
31738			# define inline __inline
31739			#endif
31740
31741
31742			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
31743			# define forceinline inline __attribute__((always_inline))
31744			#elif defined(_MSC_VER)
31745			# define forceinline __forceinline
31746			#else
31747			# define forceinline inline
31748			#endif
31749
31750
31751			#if defined(__GNUC__) \|\| __has_attribute(unused)
31752			# define MAYBE_UNUSED __attribute__((unused))
31753			#else
31754			# define MAYBE_UNUSED
31755			#endif
31756
31757
31758			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
31759			# define NORETURN __attribute__((noreturn))
31760			#else
31761			# define NORETURN
31762			#endif
31763
31764
31765			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
31766			# if defined(__GNUC__) \|\| defined(__clang__)
31767			# define restrict __restrict__
31768			# else
31769			# define restrict
31770			# endif
31771			#endif
31772
31773
31774			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
31775			# define likely(expr) __builtin_expect(!!(expr), 1)
31776			#else
31777			# define likely(expr) (expr)
31778			#endif
31779
31780
31781			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
31782			# define unlikely(expr) __builtin_expect(!!(expr), 0)
31783			#else
31784			# define unlikely(expr) (expr)
31785			#endif
31786
31787
31788			#undef prefetchr
31789			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
31790			# define prefetchr(addr) __builtin_prefetch((addr), 0)
31791			#elif defined(_MSC_VER)
31792			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
31793			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
31794			# elif defined(ARCH_ARM64)
31795			# define prefetchr(addr) __prefetch2((addr), 0x00 )
31796			# elif defined(ARCH_ARM32)
31797			# define prefetchr(addr) __prefetch(addr)
31798			# endif
31799			#endif
31800			#ifndef prefetchr
31801			# define prefetchr(addr)
31802			#endif
31803
31804
31805			#undef prefetchw
31806			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
31807			# define prefetchw(addr) __builtin_prefetch((addr), 1)
31808			#elif defined(_MSC_VER)
31809			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
31810			# define prefetchw(addr) _m_prefetchw(addr)
31811			# elif defined(ARCH_ARM64)
31812			# define prefetchw(addr) __prefetch2((addr), 0x10 )
31813			# elif defined(ARCH_ARM32)
31814			# define prefetchw(addr) __prefetchw(addr)
31815			# endif
31816			#endif
31817			#ifndef prefetchw
31818			# define prefetchw(addr)
31819			#endif
31820
31821
31822			#undef _aligned_attribute
31823			#if defined(__GNUC__) \|\| __has_attribute(aligned)
31824			# define _aligned_attribute(n) __attribute__((aligned(n)))
31825			#elif defined(_MSC_VER)
31826			# define _aligned_attribute(n) __declspec(align(n))
31827			#endif
31828
31829
31830			#if defined(__GNUC__) \|\| __has_attribute(target)
31831			# define _target_attribute(attrs) __attribute__((target(attrs)))
31832			#else
31833			# define _target_attribute(attrs)
31834			#endif
31835
31836
31837
31838
31839
31840			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
31841			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
31842			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
31843			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
31844			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
31845			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
31846			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
31847
31848
31849
31850
31851
31852
31853			#if defined(__BYTE_ORDER__)
31854			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
31855			#elif defined(_MSC_VER)
31856			# define CPU_IS_LITTLE_ENDIAN() true
31857			#else
31858			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
31859			{
31860			union {
31861			u32 w;
31862			u8 b;
31863			} u;
31864
31865			u.w = 1;
31866			return u.b;
31867			}
31868			#endif
31869
31870
31871			static forceinline u16 bswap16(u16 v)
31872			{
31873			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
31874			return __builtin_bswap16(v);
31875			#elif defined(_MSC_VER)
31876			return _byteswap_ushort(v);
31877			#else
31878			return (v << 8) \| (v >> 8);
31879			#endif
31880			}
31881
31882
31883			static forceinline u32 bswap32(u32 v)
31884			{
31885			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
31886			return __builtin_bswap32(v);
31887			#elif defined(_MSC_VER)
31888			return _byteswap_ulong(v);
31889			#else
31890			return ((v & 0x000000FF) << 24) \|
31891			((v & 0x0000FF00) << 8) \|
31892			((v & 0x00FF0000) >> 8) \|
31893			((v & 0xFF000000) >> 24);
31894			#endif
31895			}
31896
31897
31898			static forceinline u64 bswap64(u64 v)
31899			{
31900			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
31901			return __builtin_bswap64(v);
31902			#elif defined(_MSC_VER)
31903			return _byteswap_uint64(v);
31904			#else
31905			return ((v & 0x00000000000000FF) << 56) \|
31906			((v & 0x000000000000FF00) << 40) \|
31907			((v & 0x0000000000FF0000) << 24) \|
31908			((v & 0x00000000FF000000) << 8) \|
31909			((v & 0x000000FF00000000) >> 8) \|
31910			((v & 0x0000FF0000000000) >> 24) \|
31911			((v & 0x00FF000000000000) >> 40) \|
31912			((v & 0xFF00000000000000) >> 56);
31913			#endif
31914			}
31915
31916			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
31917			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
31918			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
31919			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
31920			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
31921			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
31922
31923
31924
31925
31926
31927
31928			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
31929			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
31930			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
31931			defined(__riscv_misaligned_fast) \|\| \
31932			defined(__wasm__))
31933			# define UNALIGNED_ACCESS_IS_FAST 1
31934			#elif defined(_MSC_VER)
31935			# define UNALIGNED_ACCESS_IS_FAST 1
31936			#else
31937			# define UNALIGNED_ACCESS_IS_FAST 0
31938			#endif
31939
31940
31941
31942			#ifdef FREESTANDING
31943			# define MEMCOPY __builtin_memcpy
31944			#else
31945			# define MEMCOPY memcpy
31946			#endif
31947
31948
31949
31950			#define DEFINE_UNALIGNED_TYPE(type) \
31951			static forceinline type \
31952			load_##type##_unaligned(const void *p) \
31953			{ \
31954			type v; \
31955			\
31956			MEMCOPY(&v, p, sizeof(v)); \
31957			return v; \
31958			} \
31959			\
31960			static forceinline void \
31961			store_##type##_unaligned(type v, void *p) \
31962			{ \
31963			MEMCOPY(p, &v, sizeof(v)); \
31964			}
31965
31966			DEFINE_UNALIGNED_TYPE(u16)
31967			DEFINE_UNALIGNED_TYPE(u32)
31968			DEFINE_UNALIGNED_TYPE(u64)
31969			DEFINE_UNALIGNED_TYPE(machine_word_t)
31970
31971			#undef MEMCOPY
31972
31973			#define load_word_unaligned load_machine_word_t_unaligned
31974			#define store_word_unaligned store_machine_word_t_unaligned
31975
31976
31977
31978			static forceinline u16
31979			get_unaligned_le16(const u8 *p)
31980			{
31981			if (UNALIGNED_ACCESS_IS_FAST)
31982			return le16_bswap(load_u16_unaligned(p));
31983			else
31984			return ((u16)p[1] << 8) \| p[0];
31985			}
31986
31987			static forceinline u16
31988			get_unaligned_be16(const u8 *p)
31989			{
31990			if (UNALIGNED_ACCESS_IS_FAST)
31991			return be16_bswap(load_u16_unaligned(p));
31992			else
31993			return ((u16)p[0] << 8) \| p[1];
31994			}
31995
31996			static forceinline u32
31997			get_unaligned_le32(const u8 *p)
31998			{
31999			if (UNALIGNED_ACCESS_IS_FAST)
32000			return le32_bswap(load_u32_unaligned(p));
32001			else
32002			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
32003			((u32)p[1] << 8) \| p[0];
32004			}
32005
32006			static forceinline u32
32007			get_unaligned_be32(const u8 *p)
32008			{
32009			if (UNALIGNED_ACCESS_IS_FAST)
32010			return be32_bswap(load_u32_unaligned(p));
32011			else
32012			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
32013			((u32)p[2] << 8) \| p[3];
32014			}
32015
32016			static forceinline u64
32017			get_unaligned_le64(const u8 *p)
32018			{
32019			if (UNALIGNED_ACCESS_IS_FAST)
32020			return le64_bswap(load_u64_unaligned(p));
32021			else
32022			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
32023			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
32024			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
32025			((u64)p[1] << 8) \| p[0];
32026			}
32027
32028			static forceinline machine_word_t
32029			get_unaligned_leword(const u8 *p)
32030			{
32031			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
32032			if (WORDBITS == 32)
32033			return get_unaligned_le32(p);
32034			else
32035			return get_unaligned_le64(p);
32036			}
32037
32038
32039
32040			static forceinline void
32041			put_unaligned_le16(u16 v, u8 *p)
32042			{
32043			if (UNALIGNED_ACCESS_IS_FAST) {
32044			store_u16_unaligned(le16_bswap(v), p);
32045			} else {
32046			p[0] = (u8)(v >> 0);
32047			p[1] = (u8)(v >> 8);
32048			}
32049			}
32050
32051			static forceinline void
32052			put_unaligned_be16(u16 v, u8 *p)
32053			{
32054			if (UNALIGNED_ACCESS_IS_FAST) {
32055			store_u16_unaligned(be16_bswap(v), p);
32056			} else {
32057			p[0] = (u8)(v >> 8);
32058			p[1] = (u8)(v >> 0);
32059			}
32060			}
32061
32062			static forceinline void
32063			put_unaligned_le32(u32 v, u8 *p)
32064			{
32065			if (UNALIGNED_ACCESS_IS_FAST) {
32066			store_u32_unaligned(le32_bswap(v), p);
32067			} else {
32068			p[0] = (u8)(v >> 0);
32069			p[1] = (u8)(v >> 8);
32070			p[2] = (u8)(v >> 16);
32071			p[3] = (u8)(v >> 24);
32072			}
32073			}
32074
32075			static forceinline void
32076			put_unaligned_be32(u32 v, u8 *p)
32077			{
32078			if (UNALIGNED_ACCESS_IS_FAST) {
32079			store_u32_unaligned(be32_bswap(v), p);
32080			} else {
32081			p[0] = (u8)(v >> 24);
32082			p[1] = (u8)(v >> 16);
32083			p[2] = (u8)(v >> 8);
32084			p[3] = (u8)(v >> 0);
32085			}
32086			}
32087
32088			static forceinline void
32089			put_unaligned_le64(u64 v, u8 *p)
32090			{
32091			if (UNALIGNED_ACCESS_IS_FAST) {
32092			store_u64_unaligned(le64_bswap(v), p);
32093			} else {
32094			p[0] = (u8)(v >> 0);
32095			p[1] = (u8)(v >> 8);
32096			p[2] = (u8)(v >> 16);
32097			p[3] = (u8)(v >> 24);
32098			p[4] = (u8)(v >> 32);
32099			p[5] = (u8)(v >> 40);
32100			p[6] = (u8)(v >> 48);
32101			p[7] = (u8)(v >> 56);
32102			}
32103			}
32104
32105			static forceinline void
32106			put_unaligned_leword(machine_word_t v, u8 *p)
32107			{
32108			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
32109			if (WORDBITS == 32)
32110			put_unaligned_le32(v, p);
32111			else
32112			put_unaligned_le64(v, p);
32113			}
32114
32115
32116
32117
32118
32119
32120
32121			static forceinline unsigned
32122			bsr32(u32 v)
32123			{
32124			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
32125			return 31 - __builtin_clz(v);
32126			#elif defined(_MSC_VER)
32127			unsigned long i;
32128
32129			_BitScanReverse(&i, v);
32130			return i;
32131			#else
32132			unsigned i = 0;
32133
32134			while ((v >>= 1) != 0)
32135			i++;
32136			return i;
32137			#endif
32138			}
32139
32140			static forceinline unsigned
32141			bsr64(u64 v)
32142			{
32143			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
32144			return 63 - __builtin_clzll(v);
32145			#elif defined(_MSC_VER) && defined(_WIN64)
32146			unsigned long i;
32147
32148			_BitScanReverse64(&i, v);
32149			return i;
32150			#else
32151			unsigned i = 0;
32152
32153			while ((v >>= 1) != 0)
32154			i++;
32155			return i;
32156			#endif
32157			}
32158
32159			static forceinline unsigned
32160			bsrw(machine_word_t v)
32161			{
32162			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
32163			if (WORDBITS == 32)
32164			return bsr32(v);
32165			else
32166			return bsr64(v);
32167			}
32168
32169
32170
32171			static forceinline unsigned
32172			bsf32(u32 v)
32173			{
32174			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
32175			return __builtin_ctz(v);
32176			#elif defined(_MSC_VER)
32177			unsigned long i;
32178
32179			_BitScanForward(&i, v);
32180			return i;
32181			#else
32182			unsigned i = 0;
32183
32184			for (; (v & 1) == 0; v >>= 1)
32185			i++;
32186			return i;
32187			#endif
32188			}
32189
32190			static forceinline unsigned
32191			bsf64(u64 v)
32192			{
32193			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
32194			return __builtin_ctzll(v);
32195			#elif defined(_MSC_VER) && defined(_WIN64)
32196			unsigned long i;
32197
32198			_BitScanForward64(&i, v);
32199			return i;
32200			#else
32201			unsigned i = 0;
32202
32203			for (; (v & 1) == 0; v >>= 1)
32204			i++;
32205			return i;
32206			#endif
32207			}
32208
32209			static forceinline unsigned
32210			bsfw(machine_word_t v)
32211			{
32212			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
32213			if (WORDBITS == 32)
32214			return bsf32(v);
32215			else
32216			return bsf64(v);
32217			}
32218
32219
32220			#undef rbit32
32221			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
32222			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
32223			static forceinline u32
32224			rbit32(u32 v)
32225			{
32226			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
32227			return v;
32228			}
32229			#define rbit32 rbit32
32230			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
32231			static forceinline u32
32232			rbit32(u32 v)
32233			{
32234			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
32235			return v;
32236			}
32237			#define rbit32 rbit32
32238			#endif
32239
32240			#endif
32241
32242
32243			typedef void (malloc_func_t)(size_t);
32244			typedef void (free_func_t)(void );
32245
32246			extern malloc_func_t libdeflate_default_malloc_func;
32247			extern free_func_t libdeflate_default_free_func;
32248
32249			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
32250			size_t alignment, size_t size);
32251			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
32252
32253			#ifdef FREESTANDING
32254
32255			void memset(void s, int c, size_t n);
32256			#define memset(s, c, n) __builtin_memset((s), (c), (n))
32257
32258			void memcpy(void dest, const void *src, size_t n);
32259			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
32260
32261			void memmove(void dest, const void *src, size_t n);
32262			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
32263
32264			int memcmp(const void s1, const void s2, size_t n);
32265			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
32266
32267			#undef LIBDEFLATE_ENABLE_ASSERTIONS
32268			#else
32269			# include
32270
32271			# ifdef __clang_analyzer__
32272			# define LIBDEFLATE_ENABLE_ASSERTIONS
32273			# endif
32274			#endif
32275
32276
32277			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
32278			NORETURN void
32279			libdeflate_assertion_failed(const char expr, const char file, int line);
32280			#define ASSERT(expr) { if (unlikely(!(expr))) \
32281			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
32282			#else
32283			#define ASSERT(expr) (void)(expr)
32284			#endif
32285
32286			#define CONCAT_IMPL(a, b) a##b
32287			#define CONCAT(a, b) CONCAT_IMPL(a, b)
32288			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
32289
32290			#endif
32291
32292
32293
32294
32295			struct libdeflate_compressor;
32296
32297			unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c);
32298
32299			#endif
32300
32301			/* #include "gzip_constants.h" */
32302
32303
32304			#ifndef LIB_GZIP_CONSTANTS_H
32305			#define LIB_GZIP_CONSTANTS_H
32306
32307			#define GZIP_MIN_HEADER_SIZE 10
32308			#define GZIP_FOOTER_SIZE 8
32309			#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
32310
32311			#define GZIP_ID1 0x1F
32312			#define GZIP_ID2 0x8B
32313
32314			#define GZIP_CM_DEFLATE 8
32315
32316			#define GZIP_FTEXT 0x01
32317			#define GZIP_FHCRC 0x02
32318			#define GZIP_FEXTRA 0x04
32319			#define GZIP_FNAME 0x08
32320			#define GZIP_FCOMMENT 0x10
32321			#define GZIP_FRESERVED 0xE0
32322
32323			#define GZIP_MTIME_UNAVAILABLE 0
32324
32325			#define GZIP_XFL_SLOWEST_COMPRESSION 0x02
32326			#define GZIP_XFL_FASTEST_COMPRESSION 0x04
32327
32328			#define GZIP_OS_FAT 0
32329			#define GZIP_OS_AMIGA 1
32330			#define GZIP_OS_VMS 2
32331			#define GZIP_OS_UNIX 3
32332			#define GZIP_OS_VM_CMS 4
32333			#define GZIP_OS_ATARI_TOS 5
32334			#define GZIP_OS_HPFS 6
32335			#define GZIP_OS_MACINTOSH 7
32336			#define GZIP_OS_Z_SYSTEM 8
32337			#define GZIP_OS_CP_M 9
32338			#define GZIP_OS_TOPS_20 10
32339			#define GZIP_OS_NTFS 11
32340			#define GZIP_OS_QDOS 12
32341			#define GZIP_OS_RISCOS 13
32342			#define GZIP_OS_UNKNOWN 255
32343
32344			#endif
32345
32346
32347			LIBDEFLATEAPI size_t
32348	13		libdeflate_gzip_compress(struct libdeflate_compressor *c,
32349			const void *in, size_t in_nbytes,
32350			void *out, size_t out_nbytes_avail)
32351			{
32352	13		u8 *out_next = out;
32353			unsigned compression_level;
32354			u8 xfl;
32355			size_t deflate_size;
32356
32357	13	50	if (out_nbytes_avail <= GZIP_MIN_OVERHEAD)
32358	0		return 0;
32359
32360
32361	13		*out_next++ = GZIP_ID1;
32362
32363	13		*out_next++ = GZIP_ID2;
32364
32365	13		*out_next++ = GZIP_CM_DEFLATE;
32366
32367	13		*out_next++ = 0;
32368
32369			put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next);
32370	13		out_next += 4;
32371
32372	13		xfl = 0;
32373	13		compression_level = libdeflate_get_compression_level(c);
32374	13	100	if (compression_level < 2)
32375	1		xfl \|= GZIP_XFL_FASTEST_COMPRESSION;
32376	12	100	else if (compression_level >= 8)
32377	5		xfl \|= GZIP_XFL_SLOWEST_COMPRESSION;
32378	13		*out_next++ = xfl;
32379
32380	13		*out_next++ = GZIP_OS_UNKNOWN;
32381
32382
32383	13		deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
32384			out_nbytes_avail - GZIP_MIN_OVERHEAD);
32385	13	50	if (deflate_size == 0)
32386	0		return 0;
32387	13		out_next += deflate_size;
32388
32389
32390	13		put_unaligned_le32(libdeflate_crc32(0, in, in_nbytes), out_next);
32391	13		out_next += 4;
32392
32393
32394	13		put_unaligned_le32((u32)in_nbytes, out_next);
32395	13		out_next += 4;
32396
32397	13		return out_next - (u8 *)out;
32398			}
32399
32400			LIBDEFLATEAPI size_t
32401	13		libdeflate_gzip_compress_bound(struct libdeflate_compressor *c,
32402			size_t in_nbytes)
32403			{
32404	13		return GZIP_MIN_OVERHEAD +
32405	13		libdeflate_deflate_compress_bound(c, in_nbytes);
32406			}
32407			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/gzip_decompress.c */
32408
32409
32410			/* #include "lib_common.h" */
32411
32412
32413			#ifndef LIB_LIB_COMMON_H
32414			#define LIB_LIB_COMMON_H
32415
32416			#ifdef LIBDEFLATE_H
32417
32418			# error "lib_common.h must always be included before libdeflate.h"
32419			#endif
32420
32421			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
32422			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
32423			#elif defined(__GNUC__)
32424			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
32425			#else
32426			# define LIBDEFLATE_EXPORT_SYM
32427			#endif
32428
32429
32430			#if defined(__GNUC__) && defined(__i386__)
32431			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
32432			#else
32433			# define LIBDEFLATE_ALIGN_STACK
32434			#endif
32435
32436			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
32437
32438			/* #include "../common_defs.h" */
32439
32440
32441			#ifndef COMMON_DEFS_H
32442			#define COMMON_DEFS_H
32443
32444			/* #include "libdeflate.h" */
32445
32446
32447			#ifndef LIBDEFLATE_H
32448			#define LIBDEFLATE_H
32449
32450			#include
32451			#include
32452
32453			#ifdef __cplusplus
32454			extern "C" {
32455			#endif
32456
32457			#define LIBDEFLATE_VERSION_MAJOR 1
32458			#define LIBDEFLATE_VERSION_MINOR 25
32459			#define LIBDEFLATE_VERSION_STRING "1.25"
32460
32461
32462			#ifndef LIBDEFLATEAPI
32463			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
32464			# define LIBDEFLATEAPI __declspec(dllimport)
32465			# else
32466			# define LIBDEFLATEAPI
32467			# endif
32468			#endif
32469
32470
32471
32472
32473
32474			struct libdeflate_compressor;
32475			struct libdeflate_options;
32476
32477
32478			LIBDEFLATEAPI struct libdeflate_compressor *
32479			libdeflate_alloc_compressor(int compression_level);
32480
32481
32482			LIBDEFLATEAPI struct libdeflate_compressor *
32483			libdeflate_alloc_compressor_ex(int compression_level,
32484			const struct libdeflate_options *options);
32485
32486
32487			LIBDEFLATEAPI size_t
32488			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
32489			const void *in, size_t in_nbytes,
32490			void *out, size_t out_nbytes_avail);
32491
32492
32493			LIBDEFLATEAPI size_t
32494			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
32495			size_t in_nbytes);
32496
32497
32498			LIBDEFLATEAPI size_t
32499			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
32500			const void *in, size_t in_nbytes,
32501			void *out, size_t out_nbytes_avail);
32502
32503
32504			LIBDEFLATEAPI size_t
32505			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
32506			size_t in_nbytes);
32507
32508
32509			LIBDEFLATEAPI size_t
32510			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
32511			const void *in, size_t in_nbytes,
32512			void *out, size_t out_nbytes_avail);
32513
32514
32515			LIBDEFLATEAPI size_t
32516			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
32517			size_t in_nbytes);
32518
32519
32520			LIBDEFLATEAPI void
32521			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
32522
32523
32524
32525
32526
32527			struct libdeflate_decompressor;
32528			struct libdeflate_options;
32529
32530
32531			LIBDEFLATEAPI struct libdeflate_decompressor *
32532			libdeflate_alloc_decompressor(void);
32533
32534
32535			LIBDEFLATEAPI struct libdeflate_decompressor *
32536			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
32537
32538
32539			enum libdeflate_result {
32540
32541			LIBDEFLATE_SUCCESS = 0,
32542
32543
32544			LIBDEFLATE_BAD_DATA = 1,
32545
32546
32547			LIBDEFLATE_SHORT_OUTPUT = 2,
32548
32549
32550			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
32551			};
32552
32553
32554			LIBDEFLATEAPI enum libdeflate_result
32555			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
32556			const void *in, size_t in_nbytes,
32557			void *out, size_t out_nbytes_avail,
32558			size_t *actual_out_nbytes_ret);
32559
32560
32561			LIBDEFLATEAPI enum libdeflate_result
32562			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
32563			const void *in, size_t in_nbytes,
32564			void *out, size_t out_nbytes_avail,
32565			size_t *actual_in_nbytes_ret,
32566			size_t *actual_out_nbytes_ret);
32567
32568
32569			LIBDEFLATEAPI enum libdeflate_result
32570			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
32571			const void *in, size_t in_nbytes,
32572			void *out, size_t out_nbytes_avail,
32573			size_t *actual_out_nbytes_ret);
32574
32575
32576			LIBDEFLATEAPI enum libdeflate_result
32577			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
32578			const void *in, size_t in_nbytes,
32579			void *out, size_t out_nbytes_avail,
32580			size_t *actual_in_nbytes_ret,
32581			size_t *actual_out_nbytes_ret);
32582
32583
32584			LIBDEFLATEAPI enum libdeflate_result
32585			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
32586			const void *in, size_t in_nbytes,
32587			void *out, size_t out_nbytes_avail,
32588			size_t *actual_out_nbytes_ret);
32589
32590
32591			LIBDEFLATEAPI enum libdeflate_result
32592			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
32593			const void *in, size_t in_nbytes,
32594			void *out, size_t out_nbytes_avail,
32595			size_t *actual_in_nbytes_ret,
32596			size_t *actual_out_nbytes_ret);
32597
32598
32599			LIBDEFLATEAPI void
32600			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
32601
32602
32603
32604
32605
32606
32607			LIBDEFLATEAPI uint32_t
32608			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
32609
32610
32611
32612			LIBDEFLATEAPI uint32_t
32613			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
32614
32615
32616
32617
32618
32619
32620			LIBDEFLATEAPI void
32621			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
32622			void (free_func)(void ));
32623
32624
32625			struct libdeflate_options {
32626
32627
32628			size_t sizeof_options;
32629
32630
32631			void (malloc_func)(size_t);
32632			void (free_func)(void );
32633			};
32634
32635			#ifdef __cplusplus
32636			}
32637			#endif
32638
32639			#endif
32640
32641
32642			#include
32643			#include
32644			#include
32645			#ifdef _MSC_VER
32646			# include
32647			# include
32648
32649
32650			# pragma warning(disable : 4146)
32651
32652			# pragma warning(disable : 4018)
32653			# pragma warning(disable : 4244)
32654			# pragma warning(disable : 4267)
32655			# pragma warning(disable : 4310)
32656
32657			# pragma warning(disable : 4100)
32658			# pragma warning(disable : 4127)
32659			# pragma warning(disable : 4189)
32660			# pragma warning(disable : 4232)
32661			# pragma warning(disable : 4245)
32662			# pragma warning(disable : 4295)
32663			#endif
32664			#ifndef FREESTANDING
32665			# include
32666			#endif
32667
32668
32669
32670
32671
32672
32673			#undef ARCH_X86_64
32674			#undef ARCH_X86_32
32675			#undef ARCH_ARM64
32676			#undef ARCH_ARM32
32677			#undef ARCH_RISCV
32678			#ifdef _MSC_VER
32679
32680			# if defined(_M_X64) && !defined(_M_ARM64EC)
32681			# define ARCH_X86_64
32682			# elif defined(_M_IX86)
32683			# define ARCH_X86_32
32684			# elif defined(_M_ARM64)
32685			# define ARCH_ARM64
32686			# elif defined(_M_ARM)
32687			# define ARCH_ARM32
32688			# endif
32689			#else
32690			# if defined(__x86_64__)
32691			# define ARCH_X86_64
32692			# elif defined(__i386__)
32693			# define ARCH_X86_32
32694			# elif defined(__aarch64__)
32695			# define ARCH_ARM64
32696			# elif defined(__arm__)
32697			# define ARCH_ARM32
32698			# elif defined(__riscv)
32699			# define ARCH_RISCV
32700			# endif
32701			#endif
32702
32703
32704
32705
32706
32707
32708			typedef uint8_t u8;
32709			typedef uint16_t u16;
32710			typedef uint32_t u32;
32711			typedef uint64_t u64;
32712			typedef int8_t s8;
32713			typedef int16_t s16;
32714			typedef int32_t s32;
32715			typedef int64_t s64;
32716
32717
32718			#ifdef _MSC_VER
32719			# ifdef _WIN64
32720			typedef long long ssize_t;
32721			# else
32722			typedef long ssize_t;
32723			# endif
32724			#endif
32725
32726
32727			typedef size_t machine_word_t;
32728
32729
32730			#define WORDBYTES ((int)sizeof(machine_word_t))
32731
32732
32733			#define WORDBITS (8 * WORDBYTES)
32734
32735
32736
32737
32738
32739
32740			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
32741			# define GCC_PREREQ(major, minor) \
32742			(__GNUC__ > (major) \|\| \
32743			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
32744			# if !GCC_PREREQ(4, 9)
32745			# error "gcc versions older than 4.9 are no longer supported"
32746			# endif
32747			#else
32748			# define GCC_PREREQ(major, minor) 0
32749			#endif
32750			#ifdef __clang__
32751			# ifdef __apple_build_version__
32752			# define CLANG_PREREQ(major, minor, apple_version) \
32753			(__apple_build_version__ >= (apple_version))
32754			# else
32755			# define CLANG_PREREQ(major, minor, apple_version) \
32756			(__clang_major__ > (major) \|\| \
32757			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
32758			# endif
32759			# if !CLANG_PREREQ(3, 9, 8000000)
32760			# error "clang versions older than 3.9 are no longer supported"
32761			# endif
32762			#else
32763			# define CLANG_PREREQ(major, minor, apple_version) 0
32764			#endif
32765			#ifdef _MSC_VER
32766			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
32767			# if !MSVC_PREREQ(1900)
32768			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
32769			# endif
32770			#else
32771			# define MSVC_PREREQ(version) 0
32772			#endif
32773
32774
32775			#ifndef __has_attribute
32776			# define __has_attribute(attribute) 0
32777			#endif
32778
32779
32780			#ifndef __has_builtin
32781			# define __has_builtin(builtin) 0
32782			#endif
32783
32784
32785			#ifdef _MSC_VER
32786			# define inline __inline
32787			#endif
32788
32789
32790			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
32791			# define forceinline inline __attribute__((always_inline))
32792			#elif defined(_MSC_VER)
32793			# define forceinline __forceinline
32794			#else
32795			# define forceinline inline
32796			#endif
32797
32798
32799			#if defined(__GNUC__) \|\| __has_attribute(unused)
32800			# define MAYBE_UNUSED __attribute__((unused))
32801			#else
32802			# define MAYBE_UNUSED
32803			#endif
32804
32805
32806			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
32807			# define NORETURN __attribute__((noreturn))
32808			#else
32809			# define NORETURN
32810			#endif
32811
32812
32813			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
32814			# if defined(__GNUC__) \|\| defined(__clang__)
32815			# define restrict __restrict__
32816			# else
32817			# define restrict
32818			# endif
32819			#endif
32820
32821
32822			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
32823			# define likely(expr) __builtin_expect(!!(expr), 1)
32824			#else
32825			# define likely(expr) (expr)
32826			#endif
32827
32828
32829			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
32830			# define unlikely(expr) __builtin_expect(!!(expr), 0)
32831			#else
32832			# define unlikely(expr) (expr)
32833			#endif
32834
32835
32836			#undef prefetchr
32837			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
32838			# define prefetchr(addr) __builtin_prefetch((addr), 0)
32839			#elif defined(_MSC_VER)
32840			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
32841			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
32842			# elif defined(ARCH_ARM64)
32843			# define prefetchr(addr) __prefetch2((addr), 0x00 )
32844			# elif defined(ARCH_ARM32)
32845			# define prefetchr(addr) __prefetch(addr)
32846			# endif
32847			#endif
32848			#ifndef prefetchr
32849			# define prefetchr(addr)
32850			#endif
32851
32852
32853			#undef prefetchw
32854			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
32855			# define prefetchw(addr) __builtin_prefetch((addr), 1)
32856			#elif defined(_MSC_VER)
32857			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
32858			# define prefetchw(addr) _m_prefetchw(addr)
32859			# elif defined(ARCH_ARM64)
32860			# define prefetchw(addr) __prefetch2((addr), 0x10 )
32861			# elif defined(ARCH_ARM32)
32862			# define prefetchw(addr) __prefetchw(addr)
32863			# endif
32864			#endif
32865			#ifndef prefetchw
32866			# define prefetchw(addr)
32867			#endif
32868
32869
32870			#undef _aligned_attribute
32871			#if defined(__GNUC__) \|\| __has_attribute(aligned)
32872			# define _aligned_attribute(n) __attribute__((aligned(n)))
32873			#elif defined(_MSC_VER)
32874			# define _aligned_attribute(n) __declspec(align(n))
32875			#endif
32876
32877
32878			#if defined(__GNUC__) \|\| __has_attribute(target)
32879			# define _target_attribute(attrs) __attribute__((target(attrs)))
32880			#else
32881			# define _target_attribute(attrs)
32882			#endif
32883
32884
32885
32886
32887
32888			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
32889			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
32890			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
32891			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
32892			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
32893			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
32894			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
32895
32896
32897
32898
32899
32900
32901			#if defined(__BYTE_ORDER__)
32902			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
32903			#elif defined(_MSC_VER)
32904			# define CPU_IS_LITTLE_ENDIAN() true
32905			#else
32906			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
32907			{
32908			union {
32909			u32 w;
32910			u8 b;
32911			} u;
32912
32913			u.w = 1;
32914			return u.b;
32915			}
32916			#endif
32917
32918
32919			static forceinline u16 bswap16(u16 v)
32920			{
32921			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
32922			return __builtin_bswap16(v);
32923			#elif defined(_MSC_VER)
32924			return _byteswap_ushort(v);
32925			#else
32926			return (v << 8) \| (v >> 8);
32927			#endif
32928			}
32929
32930
32931			static forceinline u32 bswap32(u32 v)
32932			{
32933			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
32934			return __builtin_bswap32(v);
32935			#elif defined(_MSC_VER)
32936			return _byteswap_ulong(v);
32937			#else
32938			return ((v & 0x000000FF) << 24) \|
32939			((v & 0x0000FF00) << 8) \|
32940			((v & 0x00FF0000) >> 8) \|
32941			((v & 0xFF000000) >> 24);
32942			#endif
32943			}
32944
32945
32946			static forceinline u64 bswap64(u64 v)
32947			{
32948			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
32949			return __builtin_bswap64(v);
32950			#elif defined(_MSC_VER)
32951			return _byteswap_uint64(v);
32952			#else
32953			return ((v & 0x00000000000000FF) << 56) \|
32954			((v & 0x000000000000FF00) << 40) \|
32955			((v & 0x0000000000FF0000) << 24) \|
32956			((v & 0x00000000FF000000) << 8) \|
32957			((v & 0x000000FF00000000) >> 8) \|
32958			((v & 0x0000FF0000000000) >> 24) \|
32959			((v & 0x00FF000000000000) >> 40) \|
32960			((v & 0xFF00000000000000) >> 56);
32961			#endif
32962			}
32963
32964			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
32965			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
32966			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
32967			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
32968			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
32969			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
32970
32971
32972
32973
32974
32975
32976			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
32977			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
32978			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
32979			defined(__riscv_misaligned_fast) \|\| \
32980			defined(__wasm__))
32981			# define UNALIGNED_ACCESS_IS_FAST 1
32982			#elif defined(_MSC_VER)
32983			# define UNALIGNED_ACCESS_IS_FAST 1
32984			#else
32985			# define UNALIGNED_ACCESS_IS_FAST 0
32986			#endif
32987
32988
32989
32990			#ifdef FREESTANDING
32991			# define MEMCOPY __builtin_memcpy
32992			#else
32993			# define MEMCOPY memcpy
32994			#endif
32995
32996
32997
32998			#define DEFINE_UNALIGNED_TYPE(type) \
32999			static forceinline type \
33000			load_##type##_unaligned(const void *p) \
33001			{ \
33002			type v; \
33003			\
33004			MEMCOPY(&v, p, sizeof(v)); \
33005			return v; \
33006			} \
33007			\
33008			static forceinline void \
33009			store_##type##_unaligned(type v, void *p) \
33010			{ \
33011			MEMCOPY(p, &v, sizeof(v)); \
33012			}
33013
33014			DEFINE_UNALIGNED_TYPE(u16)
33015			DEFINE_UNALIGNED_TYPE(u32)
33016			DEFINE_UNALIGNED_TYPE(u64)
33017			DEFINE_UNALIGNED_TYPE(machine_word_t)
33018
33019			#undef MEMCOPY
33020
33021			#define load_word_unaligned load_machine_word_t_unaligned
33022			#define store_word_unaligned store_machine_word_t_unaligned
33023
33024
33025
33026			static forceinline u16
33027			get_unaligned_le16(const u8 *p)
33028			{
33029			if (UNALIGNED_ACCESS_IS_FAST)
33030			return le16_bswap(load_u16_unaligned(p));
33031			else
33032			return ((u16)p[1] << 8) \| p[0];
33033			}
33034
33035			static forceinline u16
33036			get_unaligned_be16(const u8 *p)
33037			{
33038			if (UNALIGNED_ACCESS_IS_FAST)
33039			return be16_bswap(load_u16_unaligned(p));
33040			else
33041			return ((u16)p[0] << 8) \| p[1];
33042			}
33043
33044			static forceinline u32
33045			get_unaligned_le32(const u8 *p)
33046			{
33047			if (UNALIGNED_ACCESS_IS_FAST)
33048			return le32_bswap(load_u32_unaligned(p));
33049			else
33050			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
33051			((u32)p[1] << 8) \| p[0];
33052			}
33053
33054			static forceinline u32
33055			get_unaligned_be32(const u8 *p)
33056			{
33057			if (UNALIGNED_ACCESS_IS_FAST)
33058			return be32_bswap(load_u32_unaligned(p));
33059			else
33060			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
33061			((u32)p[2] << 8) \| p[3];
33062			}
33063
33064			static forceinline u64
33065			get_unaligned_le64(const u8 *p)
33066			{
33067			if (UNALIGNED_ACCESS_IS_FAST)
33068			return le64_bswap(load_u64_unaligned(p));
33069			else
33070			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
33071			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
33072			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
33073			((u64)p[1] << 8) \| p[0];
33074			}
33075
33076			static forceinline machine_word_t
33077			get_unaligned_leword(const u8 *p)
33078			{
33079			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
33080			if (WORDBITS == 32)
33081			return get_unaligned_le32(p);
33082			else
33083			return get_unaligned_le64(p);
33084			}
33085
33086
33087
33088			static forceinline void
33089			put_unaligned_le16(u16 v, u8 *p)
33090			{
33091			if (UNALIGNED_ACCESS_IS_FAST) {
33092			store_u16_unaligned(le16_bswap(v), p);
33093			} else {
33094			p[0] = (u8)(v >> 0);
33095			p[1] = (u8)(v >> 8);
33096			}
33097			}
33098
33099			static forceinline void
33100			put_unaligned_be16(u16 v, u8 *p)
33101			{
33102			if (UNALIGNED_ACCESS_IS_FAST) {
33103			store_u16_unaligned(be16_bswap(v), p);
33104			} else {
33105			p[0] = (u8)(v >> 8);
33106			p[1] = (u8)(v >> 0);
33107			}
33108			}
33109
33110			static forceinline void
33111			put_unaligned_le32(u32 v, u8 *p)
33112			{
33113			if (UNALIGNED_ACCESS_IS_FAST) {
33114			store_u32_unaligned(le32_bswap(v), p);
33115			} else {
33116			p[0] = (u8)(v >> 0);
33117			p[1] = (u8)(v >> 8);
33118			p[2] = (u8)(v >> 16);
33119			p[3] = (u8)(v >> 24);
33120			}
33121			}
33122
33123			static forceinline void
33124			put_unaligned_be32(u32 v, u8 *p)
33125			{
33126			if (UNALIGNED_ACCESS_IS_FAST) {
33127			store_u32_unaligned(be32_bswap(v), p);
33128			} else {
33129			p[0] = (u8)(v >> 24);
33130			p[1] = (u8)(v >> 16);
33131			p[2] = (u8)(v >> 8);
33132			p[3] = (u8)(v >> 0);
33133			}
33134			}
33135
33136			static forceinline void
33137			put_unaligned_le64(u64 v, u8 *p)
33138			{
33139			if (UNALIGNED_ACCESS_IS_FAST) {
33140			store_u64_unaligned(le64_bswap(v), p);
33141			} else {
33142			p[0] = (u8)(v >> 0);
33143			p[1] = (u8)(v >> 8);
33144			p[2] = (u8)(v >> 16);
33145			p[3] = (u8)(v >> 24);
33146			p[4] = (u8)(v >> 32);
33147			p[5] = (u8)(v >> 40);
33148			p[6] = (u8)(v >> 48);
33149			p[7] = (u8)(v >> 56);
33150			}
33151			}
33152
33153			static forceinline void
33154			put_unaligned_leword(machine_word_t v, u8 *p)
33155			{
33156			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
33157			if (WORDBITS == 32)
33158			put_unaligned_le32(v, p);
33159			else
33160			put_unaligned_le64(v, p);
33161			}
33162
33163
33164
33165
33166
33167
33168
33169			static forceinline unsigned
33170			bsr32(u32 v)
33171			{
33172			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
33173			return 31 - __builtin_clz(v);
33174			#elif defined(_MSC_VER)
33175			unsigned long i;
33176
33177			_BitScanReverse(&i, v);
33178			return i;
33179			#else
33180			unsigned i = 0;
33181
33182			while ((v >>= 1) != 0)
33183			i++;
33184			return i;
33185			#endif
33186			}
33187
33188			static forceinline unsigned
33189			bsr64(u64 v)
33190			{
33191			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
33192			return 63 - __builtin_clzll(v);
33193			#elif defined(_MSC_VER) && defined(_WIN64)
33194			unsigned long i;
33195
33196			_BitScanReverse64(&i, v);
33197			return i;
33198			#else
33199			unsigned i = 0;
33200
33201			while ((v >>= 1) != 0)
33202			i++;
33203			return i;
33204			#endif
33205			}
33206
33207			static forceinline unsigned
33208			bsrw(machine_word_t v)
33209			{
33210			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
33211			if (WORDBITS == 32)
33212			return bsr32(v);
33213			else
33214			return bsr64(v);
33215			}
33216
33217
33218
33219			static forceinline unsigned
33220			bsf32(u32 v)
33221			{
33222			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
33223			return __builtin_ctz(v);
33224			#elif defined(_MSC_VER)
33225			unsigned long i;
33226
33227			_BitScanForward(&i, v);
33228			return i;
33229			#else
33230			unsigned i = 0;
33231
33232			for (; (v & 1) == 0; v >>= 1)
33233			i++;
33234			return i;
33235			#endif
33236			}
33237
33238			static forceinline unsigned
33239			bsf64(u64 v)
33240			{
33241			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
33242			return __builtin_ctzll(v);
33243			#elif defined(_MSC_VER) && defined(_WIN64)
33244			unsigned long i;
33245
33246			_BitScanForward64(&i, v);
33247			return i;
33248			#else
33249			unsigned i = 0;
33250
33251			for (; (v & 1) == 0; v >>= 1)
33252			i++;
33253			return i;
33254			#endif
33255			}
33256
33257			static forceinline unsigned
33258			bsfw(machine_word_t v)
33259			{
33260			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
33261			if (WORDBITS == 32)
33262			return bsf32(v);
33263			else
33264			return bsf64(v);
33265			}
33266
33267
33268			#undef rbit32
33269			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
33270			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
33271			static forceinline u32
33272			rbit32(u32 v)
33273			{
33274			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
33275			return v;
33276			}
33277			#define rbit32 rbit32
33278			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
33279			static forceinline u32
33280			rbit32(u32 v)
33281			{
33282			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
33283			return v;
33284			}
33285			#define rbit32 rbit32
33286			#endif
33287
33288			#endif
33289
33290
33291			typedef void (malloc_func_t)(size_t);
33292			typedef void (free_func_t)(void );
33293
33294			extern malloc_func_t libdeflate_default_malloc_func;
33295			extern free_func_t libdeflate_default_free_func;
33296
33297			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
33298			size_t alignment, size_t size);
33299			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
33300
33301			#ifdef FREESTANDING
33302
33303			void memset(void s, int c, size_t n);
33304			#define memset(s, c, n) __builtin_memset((s), (c), (n))
33305
33306			void memcpy(void dest, const void *src, size_t n);
33307			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
33308
33309			void memmove(void dest, const void *src, size_t n);
33310			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
33311
33312			int memcmp(const void s1, const void s2, size_t n);
33313			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
33314
33315			#undef LIBDEFLATE_ENABLE_ASSERTIONS
33316			#else
33317			# include
33318
33319			# ifdef __clang_analyzer__
33320			# define LIBDEFLATE_ENABLE_ASSERTIONS
33321			# endif
33322			#endif
33323
33324
33325			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
33326			NORETURN void
33327			libdeflate_assertion_failed(const char expr, const char file, int line);
33328			#define ASSERT(expr) { if (unlikely(!(expr))) \
33329			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
33330			#else
33331			#define ASSERT(expr) (void)(expr)
33332			#endif
33333
33334			#define CONCAT_IMPL(a, b) a##b
33335			#define CONCAT(a, b) CONCAT_IMPL(a, b)
33336			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
33337
33338			#endif
33339
33340			/* #include "gzip_constants.h" */
33341
33342
33343			#ifndef LIB_GZIP_CONSTANTS_H
33344			#define LIB_GZIP_CONSTANTS_H
33345
33346			#define GZIP_MIN_HEADER_SIZE 10
33347			#define GZIP_FOOTER_SIZE 8
33348			#define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE)
33349
33350			#define GZIP_ID1 0x1F
33351			#define GZIP_ID2 0x8B
33352
33353			#define GZIP_CM_DEFLATE 8
33354
33355			#define GZIP_FTEXT 0x01
33356			#define GZIP_FHCRC 0x02
33357			#define GZIP_FEXTRA 0x04
33358			#define GZIP_FNAME 0x08
33359			#define GZIP_FCOMMENT 0x10
33360			#define GZIP_FRESERVED 0xE0
33361
33362			#define GZIP_MTIME_UNAVAILABLE 0
33363
33364			#define GZIP_XFL_SLOWEST_COMPRESSION 0x02
33365			#define GZIP_XFL_FASTEST_COMPRESSION 0x04
33366
33367			#define GZIP_OS_FAT 0
33368			#define GZIP_OS_AMIGA 1
33369			#define GZIP_OS_VMS 2
33370			#define GZIP_OS_UNIX 3
33371			#define GZIP_OS_VM_CMS 4
33372			#define GZIP_OS_ATARI_TOS 5
33373			#define GZIP_OS_HPFS 6
33374			#define GZIP_OS_MACINTOSH 7
33375			#define GZIP_OS_Z_SYSTEM 8
33376			#define GZIP_OS_CP_M 9
33377			#define GZIP_OS_TOPS_20 10
33378			#define GZIP_OS_NTFS 11
33379			#define GZIP_OS_QDOS 12
33380			#define GZIP_OS_RISCOS 13
33381			#define GZIP_OS_UNKNOWN 255
33382
33383			#endif
33384
33385
33386			LIBDEFLATEAPI enum libdeflate_result
33387	16		libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d,
33388			const void *in, size_t in_nbytes,
33389			void *out, size_t out_nbytes_avail,
33390			size_t *actual_in_nbytes_ret,
33391			size_t *actual_out_nbytes_ret)
33392			{
33393	16		const u8 *in_next = in;
33394	16		const u8 * const in_end = in_next + in_nbytes;
33395			u8 flg;
33396			size_t actual_in_nbytes;
33397			size_t actual_out_nbytes;
33398			enum libdeflate_result result;
33399
33400	16	50	if (in_nbytes < GZIP_MIN_OVERHEAD)
33401	0		return LIBDEFLATE_BAD_DATA;
33402
33403
33404	16	50	if (*in_next++ != GZIP_ID1)
33405	0		return LIBDEFLATE_BAD_DATA;
33406
33407	16	50	if (*in_next++ != GZIP_ID2)
33408	0		return LIBDEFLATE_BAD_DATA;
33409
33410	16	50	if (*in_next++ != GZIP_CM_DEFLATE)
33411	0		return LIBDEFLATE_BAD_DATA;
33412	16		flg = *in_next++;
33413
33414	16		in_next += 4;
33415
33416	16		in_next += 1;
33417
33418	16		in_next += 1;
33419
33420	16	50	if (flg & GZIP_FRESERVED)
33421	0		return LIBDEFLATE_BAD_DATA;
33422
33423
33424	16	50	if (flg & GZIP_FEXTRA) {
33425	0		u16 xlen = get_unaligned_le16(in_next);
33426	0		in_next += 2;
33427
33428	0	0	if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE)
33429	0		return LIBDEFLATE_BAD_DATA;
33430
33431	0		in_next += xlen;
33432			}
33433
33434
33435	16	100	if (flg & GZIP_FNAME) {
33436	33	100	while (*in_next++ != 0 && in_next != in_end)
		50
33437			;
33438	3	50	if (in_end - in_next < GZIP_FOOTER_SIZE)
33439	0		return LIBDEFLATE_BAD_DATA;
33440			}
33441
33442
33443	16	50	if (flg & GZIP_FCOMMENT) {
33444	0	0	while (*in_next++ != 0 && in_next != in_end)
		0
33445			;
33446	0	0	if (in_end - in_next < GZIP_FOOTER_SIZE)
33447	0		return LIBDEFLATE_BAD_DATA;
33448			}
33449
33450
33451	16	50	if (flg & GZIP_FHCRC) {
33452	0		in_next += 2;
33453	0	0	if (in_end - in_next < GZIP_FOOTER_SIZE)
33454	0		return LIBDEFLATE_BAD_DATA;
33455			}
33456
33457
33458	16		result = libdeflate_deflate_decompress_ex(d, in_next,
33459	16		in_end - GZIP_FOOTER_SIZE - in_next,
33460			out, out_nbytes_avail,
33461			&actual_in_nbytes,
33462			actual_out_nbytes_ret);
33463	16	50	if (result != LIBDEFLATE_SUCCESS)
33464	0		return result;
33465
33466	16	50	if (actual_out_nbytes_ret)
33467	16		actual_out_nbytes = *actual_out_nbytes_ret;
33468			else
33469	0		actual_out_nbytes = out_nbytes_avail;
33470
33471	16		in_next += actual_in_nbytes;
33472
33473
33474	16		if (libdeflate_crc32(0, out, actual_out_nbytes) !=
33475	16	50	get_unaligned_le32(in_next))
33476	0		return LIBDEFLATE_BAD_DATA;
33477	16		in_next += 4;
33478
33479
33480	32	50	if ((u32)actual_out_nbytes != get_unaligned_le32(in_next))
33481	0		return LIBDEFLATE_BAD_DATA;
33482	16		in_next += 4;
33483
33484	16	50	if (actual_in_nbytes_ret)
33485	16		actual_in_nbytes_ret = in_next - (u8 )in;
33486
33487	16		return LIBDEFLATE_SUCCESS;
33488			}
33489
33490			LIBDEFLATEAPI enum libdeflate_result
33491	0		libdeflate_gzip_decompress(struct libdeflate_decompressor *d,
33492			const void *in, size_t in_nbytes,
33493			void *out, size_t out_nbytes_avail,
33494			size_t *actual_out_nbytes_ret)
33495			{
33496	0		return libdeflate_gzip_decompress_ex(d, in, in_nbytes,
33497			out, out_nbytes_avail,
33498			NULL, actual_out_nbytes_ret);
33499			}
33500			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/utils.c */
33501
33502
33503			/* #include "lib_common.h" */
33504
33505
33506			#ifndef LIB_LIB_COMMON_H
33507			#define LIB_LIB_COMMON_H
33508
33509			#ifdef LIBDEFLATE_H
33510
33511			# error "lib_common.h must always be included before libdeflate.h"
33512			#endif
33513
33514			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
33515			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
33516			#elif defined(__GNUC__)
33517			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
33518			#else
33519			# define LIBDEFLATE_EXPORT_SYM
33520			#endif
33521
33522
33523			#if defined(__GNUC__) && defined(__i386__)
33524			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
33525			#else
33526			# define LIBDEFLATE_ALIGN_STACK
33527			#endif
33528
33529			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
33530
33531			/* #include "../common_defs.h" */
33532
33533
33534			#ifndef COMMON_DEFS_H
33535			#define COMMON_DEFS_H
33536
33537			/* #include "libdeflate.h" */
33538
33539
33540			#ifndef LIBDEFLATE_H
33541			#define LIBDEFLATE_H
33542
33543			#include
33544			#include
33545
33546			#ifdef __cplusplus
33547			extern "C" {
33548			#endif
33549
33550			#define LIBDEFLATE_VERSION_MAJOR 1
33551			#define LIBDEFLATE_VERSION_MINOR 25
33552			#define LIBDEFLATE_VERSION_STRING "1.25"
33553
33554
33555			#ifndef LIBDEFLATEAPI
33556			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
33557			# define LIBDEFLATEAPI __declspec(dllimport)
33558			# else
33559			# define LIBDEFLATEAPI
33560			# endif
33561			#endif
33562
33563
33564
33565
33566
33567			struct libdeflate_compressor;
33568			struct libdeflate_options;
33569
33570
33571			LIBDEFLATEAPI struct libdeflate_compressor *
33572			libdeflate_alloc_compressor(int compression_level);
33573
33574
33575			LIBDEFLATEAPI struct libdeflate_compressor *
33576			libdeflate_alloc_compressor_ex(int compression_level,
33577			const struct libdeflate_options *options);
33578
33579
33580			LIBDEFLATEAPI size_t
33581			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
33582			const void *in, size_t in_nbytes,
33583			void *out, size_t out_nbytes_avail);
33584
33585
33586			LIBDEFLATEAPI size_t
33587			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
33588			size_t in_nbytes);
33589
33590
33591			LIBDEFLATEAPI size_t
33592			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
33593			const void *in, size_t in_nbytes,
33594			void *out, size_t out_nbytes_avail);
33595
33596
33597			LIBDEFLATEAPI size_t
33598			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
33599			size_t in_nbytes);
33600
33601
33602			LIBDEFLATEAPI size_t
33603			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
33604			const void *in, size_t in_nbytes,
33605			void *out, size_t out_nbytes_avail);
33606
33607
33608			LIBDEFLATEAPI size_t
33609			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
33610			size_t in_nbytes);
33611
33612
33613			LIBDEFLATEAPI void
33614			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
33615
33616
33617
33618
33619
33620			struct libdeflate_decompressor;
33621			struct libdeflate_options;
33622
33623
33624			LIBDEFLATEAPI struct libdeflate_decompressor *
33625			libdeflate_alloc_decompressor(void);
33626
33627
33628			LIBDEFLATEAPI struct libdeflate_decompressor *
33629			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
33630
33631
33632			enum libdeflate_result {
33633
33634			LIBDEFLATE_SUCCESS = 0,
33635
33636
33637			LIBDEFLATE_BAD_DATA = 1,
33638
33639
33640			LIBDEFLATE_SHORT_OUTPUT = 2,
33641
33642
33643			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
33644			};
33645
33646
33647			LIBDEFLATEAPI enum libdeflate_result
33648			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
33649			const void *in, size_t in_nbytes,
33650			void *out, size_t out_nbytes_avail,
33651			size_t *actual_out_nbytes_ret);
33652
33653
33654			LIBDEFLATEAPI enum libdeflate_result
33655			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
33656			const void *in, size_t in_nbytes,
33657			void *out, size_t out_nbytes_avail,
33658			size_t *actual_in_nbytes_ret,
33659			size_t *actual_out_nbytes_ret);
33660
33661
33662			LIBDEFLATEAPI enum libdeflate_result
33663			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
33664			const void *in, size_t in_nbytes,
33665			void *out, size_t out_nbytes_avail,
33666			size_t *actual_out_nbytes_ret);
33667
33668
33669			LIBDEFLATEAPI enum libdeflate_result
33670			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
33671			const void *in, size_t in_nbytes,
33672			void *out, size_t out_nbytes_avail,
33673			size_t *actual_in_nbytes_ret,
33674			size_t *actual_out_nbytes_ret);
33675
33676
33677			LIBDEFLATEAPI enum libdeflate_result
33678			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
33679			const void *in, size_t in_nbytes,
33680			void *out, size_t out_nbytes_avail,
33681			size_t *actual_out_nbytes_ret);
33682
33683
33684			LIBDEFLATEAPI enum libdeflate_result
33685			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
33686			const void *in, size_t in_nbytes,
33687			void *out, size_t out_nbytes_avail,
33688			size_t *actual_in_nbytes_ret,
33689			size_t *actual_out_nbytes_ret);
33690
33691
33692			LIBDEFLATEAPI void
33693			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
33694
33695
33696
33697
33698
33699
33700			LIBDEFLATEAPI uint32_t
33701			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
33702
33703
33704
33705			LIBDEFLATEAPI uint32_t
33706			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
33707
33708
33709
33710
33711
33712
33713			LIBDEFLATEAPI void
33714			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
33715			void (free_func)(void ));
33716
33717
33718			struct libdeflate_options {
33719
33720
33721			size_t sizeof_options;
33722
33723
33724			void (malloc_func)(size_t);
33725			void (free_func)(void );
33726			};
33727
33728			#ifdef __cplusplus
33729			}
33730			#endif
33731
33732			#endif
33733
33734
33735			#include
33736			#include
33737			#include
33738			#ifdef _MSC_VER
33739			# include
33740			# include
33741
33742
33743			# pragma warning(disable : 4146)
33744
33745			# pragma warning(disable : 4018)
33746			# pragma warning(disable : 4244)
33747			# pragma warning(disable : 4267)
33748			# pragma warning(disable : 4310)
33749
33750			# pragma warning(disable : 4100)
33751			# pragma warning(disable : 4127)
33752			# pragma warning(disable : 4189)
33753			# pragma warning(disable : 4232)
33754			# pragma warning(disable : 4245)
33755			# pragma warning(disable : 4295)
33756			#endif
33757			#ifndef FREESTANDING
33758			# include
33759			#endif
33760
33761
33762
33763
33764
33765
33766			#undef ARCH_X86_64
33767			#undef ARCH_X86_32
33768			#undef ARCH_ARM64
33769			#undef ARCH_ARM32
33770			#undef ARCH_RISCV
33771			#ifdef _MSC_VER
33772
33773			# if defined(_M_X64) && !defined(_M_ARM64EC)
33774			# define ARCH_X86_64
33775			# elif defined(_M_IX86)
33776			# define ARCH_X86_32
33777			# elif defined(_M_ARM64)
33778			# define ARCH_ARM64
33779			# elif defined(_M_ARM)
33780			# define ARCH_ARM32
33781			# endif
33782			#else
33783			# if defined(__x86_64__)
33784			# define ARCH_X86_64
33785			# elif defined(__i386__)
33786			# define ARCH_X86_32
33787			# elif defined(__aarch64__)
33788			# define ARCH_ARM64
33789			# elif defined(__arm__)
33790			# define ARCH_ARM32
33791			# elif defined(__riscv)
33792			# define ARCH_RISCV
33793			# endif
33794			#endif
33795
33796
33797
33798
33799
33800
33801			typedef uint8_t u8;
33802			typedef uint16_t u16;
33803			typedef uint32_t u32;
33804			typedef uint64_t u64;
33805			typedef int8_t s8;
33806			typedef int16_t s16;
33807			typedef int32_t s32;
33808			typedef int64_t s64;
33809
33810
33811			#ifdef _MSC_VER
33812			# ifdef _WIN64
33813			typedef long long ssize_t;
33814			# else
33815			typedef long ssize_t;
33816			# endif
33817			#endif
33818
33819
33820			typedef size_t machine_word_t;
33821
33822
33823			#define WORDBYTES ((int)sizeof(machine_word_t))
33824
33825
33826			#define WORDBITS (8 * WORDBYTES)
33827
33828
33829
33830
33831
33832
33833			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
33834			# define GCC_PREREQ(major, minor) \
33835			(__GNUC__ > (major) \|\| \
33836			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
33837			# if !GCC_PREREQ(4, 9)
33838			# error "gcc versions older than 4.9 are no longer supported"
33839			# endif
33840			#else
33841			# define GCC_PREREQ(major, minor) 0
33842			#endif
33843			#ifdef __clang__
33844			# ifdef __apple_build_version__
33845			# define CLANG_PREREQ(major, minor, apple_version) \
33846			(__apple_build_version__ >= (apple_version))
33847			# else
33848			# define CLANG_PREREQ(major, minor, apple_version) \
33849			(__clang_major__ > (major) \|\| \
33850			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
33851			# endif
33852			# if !CLANG_PREREQ(3, 9, 8000000)
33853			# error "clang versions older than 3.9 are no longer supported"
33854			# endif
33855			#else
33856			# define CLANG_PREREQ(major, minor, apple_version) 0
33857			#endif
33858			#ifdef _MSC_VER
33859			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
33860			# if !MSVC_PREREQ(1900)
33861			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
33862			# endif
33863			#else
33864			# define MSVC_PREREQ(version) 0
33865			#endif
33866
33867
33868			#ifndef __has_attribute
33869			# define __has_attribute(attribute) 0
33870			#endif
33871
33872
33873			#ifndef __has_builtin
33874			# define __has_builtin(builtin) 0
33875			#endif
33876
33877
33878			#ifdef _MSC_VER
33879			# define inline __inline
33880			#endif
33881
33882
33883			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
33884			# define forceinline inline __attribute__((always_inline))
33885			#elif defined(_MSC_VER)
33886			# define forceinline __forceinline
33887			#else
33888			# define forceinline inline
33889			#endif
33890
33891
33892			#if defined(__GNUC__) \|\| __has_attribute(unused)
33893			# define MAYBE_UNUSED __attribute__((unused))
33894			#else
33895			# define MAYBE_UNUSED
33896			#endif
33897
33898
33899			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
33900			# define NORETURN __attribute__((noreturn))
33901			#else
33902			# define NORETURN
33903			#endif
33904
33905
33906			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
33907			# if defined(__GNUC__) \|\| defined(__clang__)
33908			# define restrict __restrict__
33909			# else
33910			# define restrict
33911			# endif
33912			#endif
33913
33914
33915			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
33916			# define likely(expr) __builtin_expect(!!(expr), 1)
33917			#else
33918			# define likely(expr) (expr)
33919			#endif
33920
33921
33922			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
33923			# define unlikely(expr) __builtin_expect(!!(expr), 0)
33924			#else
33925			# define unlikely(expr) (expr)
33926			#endif
33927
33928
33929			#undef prefetchr
33930			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
33931			# define prefetchr(addr) __builtin_prefetch((addr), 0)
33932			#elif defined(_MSC_VER)
33933			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
33934			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
33935			# elif defined(ARCH_ARM64)
33936			# define prefetchr(addr) __prefetch2((addr), 0x00 )
33937			# elif defined(ARCH_ARM32)
33938			# define prefetchr(addr) __prefetch(addr)
33939			# endif
33940			#endif
33941			#ifndef prefetchr
33942			# define prefetchr(addr)
33943			#endif
33944
33945
33946			#undef prefetchw
33947			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
33948			# define prefetchw(addr) __builtin_prefetch((addr), 1)
33949			#elif defined(_MSC_VER)
33950			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
33951			# define prefetchw(addr) _m_prefetchw(addr)
33952			# elif defined(ARCH_ARM64)
33953			# define prefetchw(addr) __prefetch2((addr), 0x10 )
33954			# elif defined(ARCH_ARM32)
33955			# define prefetchw(addr) __prefetchw(addr)
33956			# endif
33957			#endif
33958			#ifndef prefetchw
33959			# define prefetchw(addr)
33960			#endif
33961
33962
33963			#undef _aligned_attribute
33964			#if defined(__GNUC__) \|\| __has_attribute(aligned)
33965			# define _aligned_attribute(n) __attribute__((aligned(n)))
33966			#elif defined(_MSC_VER)
33967			# define _aligned_attribute(n) __declspec(align(n))
33968			#endif
33969
33970
33971			#if defined(__GNUC__) \|\| __has_attribute(target)
33972			# define _target_attribute(attrs) __attribute__((target(attrs)))
33973			#else
33974			# define _target_attribute(attrs)
33975			#endif
33976
33977
33978
33979
33980
33981			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
33982			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
33983			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
33984			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
33985			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
33986			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
33987			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
33988
33989
33990
33991
33992
33993
33994			#if defined(__BYTE_ORDER__)
33995			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
33996			#elif defined(_MSC_VER)
33997			# define CPU_IS_LITTLE_ENDIAN() true
33998			#else
33999			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
34000			{
34001			union {
34002			u32 w;
34003			u8 b;
34004			} u;
34005
34006			u.w = 1;
34007			return u.b;
34008			}
34009			#endif
34010
34011
34012			static forceinline u16 bswap16(u16 v)
34013			{
34014			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
34015			return __builtin_bswap16(v);
34016			#elif defined(_MSC_VER)
34017			return _byteswap_ushort(v);
34018			#else
34019			return (v << 8) \| (v >> 8);
34020			#endif
34021			}
34022
34023
34024			static forceinline u32 bswap32(u32 v)
34025			{
34026			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
34027			return __builtin_bswap32(v);
34028			#elif defined(_MSC_VER)
34029			return _byteswap_ulong(v);
34030			#else
34031			return ((v & 0x000000FF) << 24) \|
34032			((v & 0x0000FF00) << 8) \|
34033			((v & 0x00FF0000) >> 8) \|
34034			((v & 0xFF000000) >> 24);
34035			#endif
34036			}
34037
34038
34039			static forceinline u64 bswap64(u64 v)
34040			{
34041			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
34042			return __builtin_bswap64(v);
34043			#elif defined(_MSC_VER)
34044			return _byteswap_uint64(v);
34045			#else
34046			return ((v & 0x00000000000000FF) << 56) \|
34047			((v & 0x000000000000FF00) << 40) \|
34048			((v & 0x0000000000FF0000) << 24) \|
34049			((v & 0x00000000FF000000) << 8) \|
34050			((v & 0x000000FF00000000) >> 8) \|
34051			((v & 0x0000FF0000000000) >> 24) \|
34052			((v & 0x00FF000000000000) >> 40) \|
34053			((v & 0xFF00000000000000) >> 56);
34054			#endif
34055			}
34056
34057			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
34058			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
34059			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
34060			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
34061			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
34062			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
34063
34064
34065
34066
34067
34068
34069			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
34070			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
34071			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
34072			defined(__riscv_misaligned_fast) \|\| \
34073			defined(__wasm__))
34074			# define UNALIGNED_ACCESS_IS_FAST 1
34075			#elif defined(_MSC_VER)
34076			# define UNALIGNED_ACCESS_IS_FAST 1
34077			#else
34078			# define UNALIGNED_ACCESS_IS_FAST 0
34079			#endif
34080
34081
34082
34083			#ifdef FREESTANDING
34084			# define MEMCOPY __builtin_memcpy
34085			#else
34086			# define MEMCOPY memcpy
34087			#endif
34088
34089
34090
34091			#define DEFINE_UNALIGNED_TYPE(type) \
34092			static forceinline type \
34093			load_##type##_unaligned(const void *p) \
34094			{ \
34095			type v; \
34096			\
34097			MEMCOPY(&v, p, sizeof(v)); \
34098			return v; \
34099			} \
34100			\
34101			static forceinline void \
34102			store_##type##_unaligned(type v, void *p) \
34103			{ \
34104			MEMCOPY(p, &v, sizeof(v)); \
34105			}
34106
34107			DEFINE_UNALIGNED_TYPE(u16)
34108			DEFINE_UNALIGNED_TYPE(u32)
34109			DEFINE_UNALIGNED_TYPE(u64)
34110			DEFINE_UNALIGNED_TYPE(machine_word_t)
34111
34112			#undef MEMCOPY
34113
34114			#define load_word_unaligned load_machine_word_t_unaligned
34115			#define store_word_unaligned store_machine_word_t_unaligned
34116
34117
34118
34119			static forceinline u16
34120			get_unaligned_le16(const u8 *p)
34121			{
34122			if (UNALIGNED_ACCESS_IS_FAST)
34123			return le16_bswap(load_u16_unaligned(p));
34124			else
34125			return ((u16)p[1] << 8) \| p[0];
34126			}
34127
34128			static forceinline u16
34129			get_unaligned_be16(const u8 *p)
34130			{
34131			if (UNALIGNED_ACCESS_IS_FAST)
34132			return be16_bswap(load_u16_unaligned(p));
34133			else
34134			return ((u16)p[0] << 8) \| p[1];
34135			}
34136
34137			static forceinline u32
34138			get_unaligned_le32(const u8 *p)
34139			{
34140			if (UNALIGNED_ACCESS_IS_FAST)
34141			return le32_bswap(load_u32_unaligned(p));
34142			else
34143			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
34144			((u32)p[1] << 8) \| p[0];
34145			}
34146
34147			static forceinline u32
34148			get_unaligned_be32(const u8 *p)
34149			{
34150			if (UNALIGNED_ACCESS_IS_FAST)
34151			return be32_bswap(load_u32_unaligned(p));
34152			else
34153			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
34154			((u32)p[2] << 8) \| p[3];
34155			}
34156
34157			static forceinline u64
34158			get_unaligned_le64(const u8 *p)
34159			{
34160			if (UNALIGNED_ACCESS_IS_FAST)
34161			return le64_bswap(load_u64_unaligned(p));
34162			else
34163			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
34164			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
34165			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
34166			((u64)p[1] << 8) \| p[0];
34167			}
34168
34169			static forceinline machine_word_t
34170			get_unaligned_leword(const u8 *p)
34171			{
34172			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
34173			if (WORDBITS == 32)
34174			return get_unaligned_le32(p);
34175			else
34176			return get_unaligned_le64(p);
34177			}
34178
34179
34180
34181			static forceinline void
34182			put_unaligned_le16(u16 v, u8 *p)
34183			{
34184			if (UNALIGNED_ACCESS_IS_FAST) {
34185			store_u16_unaligned(le16_bswap(v), p);
34186			} else {
34187			p[0] = (u8)(v >> 0);
34188			p[1] = (u8)(v >> 8);
34189			}
34190			}
34191
34192			static forceinline void
34193			put_unaligned_be16(u16 v, u8 *p)
34194			{
34195			if (UNALIGNED_ACCESS_IS_FAST) {
34196			store_u16_unaligned(be16_bswap(v), p);
34197			} else {
34198			p[0] = (u8)(v >> 8);
34199			p[1] = (u8)(v >> 0);
34200			}
34201			}
34202
34203			static forceinline void
34204			put_unaligned_le32(u32 v, u8 *p)
34205			{
34206			if (UNALIGNED_ACCESS_IS_FAST) {
34207			store_u32_unaligned(le32_bswap(v), p);
34208			} else {
34209			p[0] = (u8)(v >> 0);
34210			p[1] = (u8)(v >> 8);
34211			p[2] = (u8)(v >> 16);
34212			p[3] = (u8)(v >> 24);
34213			}
34214			}
34215
34216			static forceinline void
34217			put_unaligned_be32(u32 v, u8 *p)
34218			{
34219			if (UNALIGNED_ACCESS_IS_FAST) {
34220			store_u32_unaligned(be32_bswap(v), p);
34221			} else {
34222			p[0] = (u8)(v >> 24);
34223			p[1] = (u8)(v >> 16);
34224			p[2] = (u8)(v >> 8);
34225			p[3] = (u8)(v >> 0);
34226			}
34227			}
34228
34229			static forceinline void
34230			put_unaligned_le64(u64 v, u8 *p)
34231			{
34232			if (UNALIGNED_ACCESS_IS_FAST) {
34233			store_u64_unaligned(le64_bswap(v), p);
34234			} else {
34235			p[0] = (u8)(v >> 0);
34236			p[1] = (u8)(v >> 8);
34237			p[2] = (u8)(v >> 16);
34238			p[3] = (u8)(v >> 24);
34239			p[4] = (u8)(v >> 32);
34240			p[5] = (u8)(v >> 40);
34241			p[6] = (u8)(v >> 48);
34242			p[7] = (u8)(v >> 56);
34243			}
34244			}
34245
34246			static forceinline void
34247			put_unaligned_leword(machine_word_t v, u8 *p)
34248			{
34249			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
34250			if (WORDBITS == 32)
34251			put_unaligned_le32(v, p);
34252			else
34253			put_unaligned_le64(v, p);
34254			}
34255
34256
34257
34258
34259
34260
34261
34262			static forceinline unsigned
34263			bsr32(u32 v)
34264			{
34265			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
34266			return 31 - __builtin_clz(v);
34267			#elif defined(_MSC_VER)
34268			unsigned long i;
34269
34270			_BitScanReverse(&i, v);
34271			return i;
34272			#else
34273			unsigned i = 0;
34274
34275			while ((v >>= 1) != 0)
34276			i++;
34277			return i;
34278			#endif
34279			}
34280
34281			static forceinline unsigned
34282			bsr64(u64 v)
34283			{
34284			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
34285			return 63 - __builtin_clzll(v);
34286			#elif defined(_MSC_VER) && defined(_WIN64)
34287			unsigned long i;
34288
34289			_BitScanReverse64(&i, v);
34290			return i;
34291			#else
34292			unsigned i = 0;
34293
34294			while ((v >>= 1) != 0)
34295			i++;
34296			return i;
34297			#endif
34298			}
34299
34300			static forceinline unsigned
34301			bsrw(machine_word_t v)
34302			{
34303			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
34304			if (WORDBITS == 32)
34305			return bsr32(v);
34306			else
34307			return bsr64(v);
34308			}
34309
34310
34311
34312			static forceinline unsigned
34313			bsf32(u32 v)
34314			{
34315			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
34316			return __builtin_ctz(v);
34317			#elif defined(_MSC_VER)
34318			unsigned long i;
34319
34320			_BitScanForward(&i, v);
34321			return i;
34322			#else
34323			unsigned i = 0;
34324
34325			for (; (v & 1) == 0; v >>= 1)
34326			i++;
34327			return i;
34328			#endif
34329			}
34330
34331			static forceinline unsigned
34332			bsf64(u64 v)
34333			{
34334			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
34335			return __builtin_ctzll(v);
34336			#elif defined(_MSC_VER) && defined(_WIN64)
34337			unsigned long i;
34338
34339			_BitScanForward64(&i, v);
34340			return i;
34341			#else
34342			unsigned i = 0;
34343
34344			for (; (v & 1) == 0; v >>= 1)
34345			i++;
34346			return i;
34347			#endif
34348			}
34349
34350			static forceinline unsigned
34351			bsfw(machine_word_t v)
34352			{
34353			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
34354			if (WORDBITS == 32)
34355			return bsf32(v);
34356			else
34357			return bsf64(v);
34358			}
34359
34360
34361			#undef rbit32
34362			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
34363			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
34364			static forceinline u32
34365			rbit32(u32 v)
34366			{
34367			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
34368			return v;
34369			}
34370			#define rbit32 rbit32
34371			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
34372			static forceinline u32
34373			rbit32(u32 v)
34374			{
34375			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
34376			return v;
34377			}
34378			#define rbit32 rbit32
34379			#endif
34380
34381			#endif
34382
34383
34384			typedef void (malloc_func_t)(size_t);
34385			typedef void (free_func_t)(void );
34386
34387			extern malloc_func_t libdeflate_default_malloc_func;
34388			extern free_func_t libdeflate_default_free_func;
34389
34390			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
34391			size_t alignment, size_t size);
34392			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
34393
34394			#ifdef FREESTANDING
34395
34396			void memset(void s, int c, size_t n);
34397			#define memset(s, c, n) __builtin_memset((s), (c), (n))
34398
34399			void memcpy(void dest, const void *src, size_t n);
34400			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
34401
34402			void memmove(void dest, const void *src, size_t n);
34403			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
34404
34405			int memcmp(const void s1, const void s2, size_t n);
34406			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
34407
34408			#undef LIBDEFLATE_ENABLE_ASSERTIONS
34409			#else
34410			# include
34411
34412			# ifdef __clang_analyzer__
34413			# define LIBDEFLATE_ENABLE_ASSERTIONS
34414			# endif
34415			#endif
34416
34417
34418			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
34419			NORETURN void
34420			libdeflate_assertion_failed(const char expr, const char file, int line);
34421			#define ASSERT(expr) { if (unlikely(!(expr))) \
34422			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
34423			#else
34424			#define ASSERT(expr) (void)(expr)
34425			#endif
34426
34427			#define CONCAT_IMPL(a, b) a##b
34428			#define CONCAT(a, b) CONCAT_IMPL(a, b)
34429			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
34430
34431			#endif
34432
34433
34434			#ifdef FREESTANDING
34435			# define malloc NULL
34436			# define free NULL
34437			#else
34438			# include
34439			#endif
34440
34441			malloc_func_t libdeflate_default_malloc_func = malloc;
34442			free_func_t libdeflate_default_free_func = free;
34443
34444			void *
34445	37		libdeflate_aligned_malloc(malloc_func_t malloc_func,
34446			size_t alignment, size_t size)
34447			{
34448	37		void ptr = (malloc_func)(sizeof(void *) + alignment - 1 + size);
34449
34450	37	50	if (ptr) {
34451	37		void *orig_ptr = ptr;
34452
34453	37		ptr = (void )ALIGN((uintptr_t)ptr + sizeof(void ), alignment);
34454	37		((void **)ptr)[-1] = orig_ptr;
34455			}
34456	37		return ptr;
34457			}
34458
34459			void
34460	37		libdeflate_aligned_free(free_func_t free_func, void *ptr)
34461			{
34462	37		(free_func)(((void *)ptr)[-1]);
34463	37		}
34464
34465			LIBDEFLATEAPI void
34466	0		libdeflate_set_memory_allocator(malloc_func_t malloc_func,
34467			free_func_t free_func)
34468			{
34469	0		libdeflate_default_malloc_func = malloc_func;
34470	0		libdeflate_default_free_func = free_func;
34471	0		}
34472
34473
34474			#ifdef FREESTANDING
34475			#undef memset
34476			void * __attribute__((weak))
34477			memset(void *s, int c, size_t n)
34478			{
34479			u8 *p = s;
34480			size_t i;
34481
34482			for (i = 0; i < n; i++)
34483			p[i] = c;
34484			return s;
34485			}
34486
34487			#undef memcpy
34488			void * __attribute__((weak))
34489			memcpy(void dest, const void src, size_t n)
34490			{
34491			u8 *d = dest;
34492			const u8 *s = src;
34493			size_t i;
34494
34495			for (i = 0; i < n; i++)
34496			d[i] = s[i];
34497			return dest;
34498			}
34499
34500			#undef memmove
34501			void * __attribute__((weak))
34502			memmove(void dest, const void src, size_t n)
34503			{
34504			u8 *d = dest;
34505			const u8 *s = src;
34506			size_t i;
34507
34508			if (d <= s)
34509			return memcpy(d, s, n);
34510
34511			for (i = n; i > 0; i--)
34512			d[i - 1] = s[i - 1];
34513			return dest;
34514			}
34515
34516			#undef memcmp
34517			int __attribute__((weak))
34518			memcmp(const void s1, const void s2, size_t n)
34519			{
34520			const u8 *p1 = s1;
34521			const u8 *p2 = s2;
34522			size_t i;
34523
34524			for (i = 0; i < n; i++) {
34525			if (p1[i] != p2[i])
34526			return (int)p1[i] - (int)p2[i];
34527			}
34528			return 0;
34529			}
34530			#endif
34531
34532			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
34533			#include
34534			#include
34535			NORETURN void
34536			libdeflate_assertion_failed(const char expr, const char file, int line)
34537			{
34538			fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line);
34539			abort();
34540			}
34541			#endif
34542			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/zlib_compress.c */
34543
34544
34545			/* #include "deflate_compress.h" */
34546			#ifndef LIB_DEFLATE_COMPRESS_H
34547			#define LIB_DEFLATE_COMPRESS_H
34548
34549			/* #include "lib_common.h" */
34550
34551
34552			#ifndef LIB_LIB_COMMON_H
34553			#define LIB_LIB_COMMON_H
34554
34555			#ifdef LIBDEFLATE_H
34556
34557			# error "lib_common.h must always be included before libdeflate.h"
34558			#endif
34559
34560			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
34561			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
34562			#elif defined(__GNUC__)
34563			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
34564			#else
34565			# define LIBDEFLATE_EXPORT_SYM
34566			#endif
34567
34568
34569			#if defined(__GNUC__) && defined(__i386__)
34570			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
34571			#else
34572			# define LIBDEFLATE_ALIGN_STACK
34573			#endif
34574
34575			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
34576
34577			/* #include "../common_defs.h" */
34578
34579
34580			#ifndef COMMON_DEFS_H
34581			#define COMMON_DEFS_H
34582
34583			/* #include "libdeflate.h" */
34584
34585
34586			#ifndef LIBDEFLATE_H
34587			#define LIBDEFLATE_H
34588
34589			#include
34590			#include
34591
34592			#ifdef __cplusplus
34593			extern "C" {
34594			#endif
34595
34596			#define LIBDEFLATE_VERSION_MAJOR 1
34597			#define LIBDEFLATE_VERSION_MINOR 25
34598			#define LIBDEFLATE_VERSION_STRING "1.25"
34599
34600
34601			#ifndef LIBDEFLATEAPI
34602			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
34603			# define LIBDEFLATEAPI __declspec(dllimport)
34604			# else
34605			# define LIBDEFLATEAPI
34606			# endif
34607			#endif
34608
34609
34610
34611
34612
34613			struct libdeflate_compressor;
34614			struct libdeflate_options;
34615
34616
34617			LIBDEFLATEAPI struct libdeflate_compressor *
34618			libdeflate_alloc_compressor(int compression_level);
34619
34620
34621			LIBDEFLATEAPI struct libdeflate_compressor *
34622			libdeflate_alloc_compressor_ex(int compression_level,
34623			const struct libdeflate_options *options);
34624
34625
34626			LIBDEFLATEAPI size_t
34627			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
34628			const void *in, size_t in_nbytes,
34629			void *out, size_t out_nbytes_avail);
34630
34631
34632			LIBDEFLATEAPI size_t
34633			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
34634			size_t in_nbytes);
34635
34636
34637			LIBDEFLATEAPI size_t
34638			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
34639			const void *in, size_t in_nbytes,
34640			void *out, size_t out_nbytes_avail);
34641
34642
34643			LIBDEFLATEAPI size_t
34644			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
34645			size_t in_nbytes);
34646
34647
34648			LIBDEFLATEAPI size_t
34649			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
34650			const void *in, size_t in_nbytes,
34651			void *out, size_t out_nbytes_avail);
34652
34653
34654			LIBDEFLATEAPI size_t
34655			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
34656			size_t in_nbytes);
34657
34658
34659			LIBDEFLATEAPI void
34660			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
34661
34662
34663
34664
34665
34666			struct libdeflate_decompressor;
34667			struct libdeflate_options;
34668
34669
34670			LIBDEFLATEAPI struct libdeflate_decompressor *
34671			libdeflate_alloc_decompressor(void);
34672
34673
34674			LIBDEFLATEAPI struct libdeflate_decompressor *
34675			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
34676
34677
34678			enum libdeflate_result {
34679
34680			LIBDEFLATE_SUCCESS = 0,
34681
34682
34683			LIBDEFLATE_BAD_DATA = 1,
34684
34685
34686			LIBDEFLATE_SHORT_OUTPUT = 2,
34687
34688
34689			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
34690			};
34691
34692
34693			LIBDEFLATEAPI enum libdeflate_result
34694			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
34695			const void *in, size_t in_nbytes,
34696			void *out, size_t out_nbytes_avail,
34697			size_t *actual_out_nbytes_ret);
34698
34699
34700			LIBDEFLATEAPI enum libdeflate_result
34701			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
34702			const void *in, size_t in_nbytes,
34703			void *out, size_t out_nbytes_avail,
34704			size_t *actual_in_nbytes_ret,
34705			size_t *actual_out_nbytes_ret);
34706
34707
34708			LIBDEFLATEAPI enum libdeflate_result
34709			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
34710			const void *in, size_t in_nbytes,
34711			void *out, size_t out_nbytes_avail,
34712			size_t *actual_out_nbytes_ret);
34713
34714
34715			LIBDEFLATEAPI enum libdeflate_result
34716			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
34717			const void *in, size_t in_nbytes,
34718			void *out, size_t out_nbytes_avail,
34719			size_t *actual_in_nbytes_ret,
34720			size_t *actual_out_nbytes_ret);
34721
34722
34723			LIBDEFLATEAPI enum libdeflate_result
34724			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
34725			const void *in, size_t in_nbytes,
34726			void *out, size_t out_nbytes_avail,
34727			size_t *actual_out_nbytes_ret);
34728
34729
34730			LIBDEFLATEAPI enum libdeflate_result
34731			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
34732			const void *in, size_t in_nbytes,
34733			void *out, size_t out_nbytes_avail,
34734			size_t *actual_in_nbytes_ret,
34735			size_t *actual_out_nbytes_ret);
34736
34737
34738			LIBDEFLATEAPI void
34739			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
34740
34741
34742
34743
34744
34745
34746			LIBDEFLATEAPI uint32_t
34747			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
34748
34749
34750
34751			LIBDEFLATEAPI uint32_t
34752			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
34753
34754
34755
34756
34757
34758
34759			LIBDEFLATEAPI void
34760			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
34761			void (free_func)(void ));
34762
34763
34764			struct libdeflate_options {
34765
34766
34767			size_t sizeof_options;
34768
34769
34770			void (malloc_func)(size_t);
34771			void (free_func)(void );
34772			};
34773
34774			#ifdef __cplusplus
34775			}
34776			#endif
34777
34778			#endif
34779
34780
34781			#include
34782			#include
34783			#include
34784			#ifdef _MSC_VER
34785			# include
34786			# include
34787
34788
34789			# pragma warning(disable : 4146)
34790
34791			# pragma warning(disable : 4018)
34792			# pragma warning(disable : 4244)
34793			# pragma warning(disable : 4267)
34794			# pragma warning(disable : 4310)
34795
34796			# pragma warning(disable : 4100)
34797			# pragma warning(disable : 4127)
34798			# pragma warning(disable : 4189)
34799			# pragma warning(disable : 4232)
34800			# pragma warning(disable : 4245)
34801			# pragma warning(disable : 4295)
34802			#endif
34803			#ifndef FREESTANDING
34804			# include
34805			#endif
34806
34807
34808
34809
34810
34811
34812			#undef ARCH_X86_64
34813			#undef ARCH_X86_32
34814			#undef ARCH_ARM64
34815			#undef ARCH_ARM32
34816			#undef ARCH_RISCV
34817			#ifdef _MSC_VER
34818
34819			# if defined(_M_X64) && !defined(_M_ARM64EC)
34820			# define ARCH_X86_64
34821			# elif defined(_M_IX86)
34822			# define ARCH_X86_32
34823			# elif defined(_M_ARM64)
34824			# define ARCH_ARM64
34825			# elif defined(_M_ARM)
34826			# define ARCH_ARM32
34827			# endif
34828			#else
34829			# if defined(__x86_64__)
34830			# define ARCH_X86_64
34831			# elif defined(__i386__)
34832			# define ARCH_X86_32
34833			# elif defined(__aarch64__)
34834			# define ARCH_ARM64
34835			# elif defined(__arm__)
34836			# define ARCH_ARM32
34837			# elif defined(__riscv)
34838			# define ARCH_RISCV
34839			# endif
34840			#endif
34841
34842
34843
34844
34845
34846
34847			typedef uint8_t u8;
34848			typedef uint16_t u16;
34849			typedef uint32_t u32;
34850			typedef uint64_t u64;
34851			typedef int8_t s8;
34852			typedef int16_t s16;
34853			typedef int32_t s32;
34854			typedef int64_t s64;
34855
34856
34857			#ifdef _MSC_VER
34858			# ifdef _WIN64
34859			typedef long long ssize_t;
34860			# else
34861			typedef long ssize_t;
34862			# endif
34863			#endif
34864
34865
34866			typedef size_t machine_word_t;
34867
34868
34869			#define WORDBYTES ((int)sizeof(machine_word_t))
34870
34871
34872			#define WORDBITS (8 * WORDBYTES)
34873
34874
34875
34876
34877
34878
34879			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
34880			# define GCC_PREREQ(major, minor) \
34881			(__GNUC__ > (major) \|\| \
34882			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
34883			# if !GCC_PREREQ(4, 9)
34884			# error "gcc versions older than 4.9 are no longer supported"
34885			# endif
34886			#else
34887			# define GCC_PREREQ(major, minor) 0
34888			#endif
34889			#ifdef __clang__
34890			# ifdef __apple_build_version__
34891			# define CLANG_PREREQ(major, minor, apple_version) \
34892			(__apple_build_version__ >= (apple_version))
34893			# else
34894			# define CLANG_PREREQ(major, minor, apple_version) \
34895			(__clang_major__ > (major) \|\| \
34896			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
34897			# endif
34898			# if !CLANG_PREREQ(3, 9, 8000000)
34899			# error "clang versions older than 3.9 are no longer supported"
34900			# endif
34901			#else
34902			# define CLANG_PREREQ(major, minor, apple_version) 0
34903			#endif
34904			#ifdef _MSC_VER
34905			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
34906			# if !MSVC_PREREQ(1900)
34907			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
34908			# endif
34909			#else
34910			# define MSVC_PREREQ(version) 0
34911			#endif
34912
34913
34914			#ifndef __has_attribute
34915			# define __has_attribute(attribute) 0
34916			#endif
34917
34918
34919			#ifndef __has_builtin
34920			# define __has_builtin(builtin) 0
34921			#endif
34922
34923
34924			#ifdef _MSC_VER
34925			# define inline __inline
34926			#endif
34927
34928
34929			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
34930			# define forceinline inline __attribute__((always_inline))
34931			#elif defined(_MSC_VER)
34932			# define forceinline __forceinline
34933			#else
34934			# define forceinline inline
34935			#endif
34936
34937
34938			#if defined(__GNUC__) \|\| __has_attribute(unused)
34939			# define MAYBE_UNUSED __attribute__((unused))
34940			#else
34941			# define MAYBE_UNUSED
34942			#endif
34943
34944
34945			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
34946			# define NORETURN __attribute__((noreturn))
34947			#else
34948			# define NORETURN
34949			#endif
34950
34951
34952			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
34953			# if defined(__GNUC__) \|\| defined(__clang__)
34954			# define restrict __restrict__
34955			# else
34956			# define restrict
34957			# endif
34958			#endif
34959
34960
34961			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
34962			# define likely(expr) __builtin_expect(!!(expr), 1)
34963			#else
34964			# define likely(expr) (expr)
34965			#endif
34966
34967
34968			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
34969			# define unlikely(expr) __builtin_expect(!!(expr), 0)
34970			#else
34971			# define unlikely(expr) (expr)
34972			#endif
34973
34974
34975			#undef prefetchr
34976			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
34977			# define prefetchr(addr) __builtin_prefetch((addr), 0)
34978			#elif defined(_MSC_VER)
34979			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
34980			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
34981			# elif defined(ARCH_ARM64)
34982			# define prefetchr(addr) __prefetch2((addr), 0x00 )
34983			# elif defined(ARCH_ARM32)
34984			# define prefetchr(addr) __prefetch(addr)
34985			# endif
34986			#endif
34987			#ifndef prefetchr
34988			# define prefetchr(addr)
34989			#endif
34990
34991
34992			#undef prefetchw
34993			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
34994			# define prefetchw(addr) __builtin_prefetch((addr), 1)
34995			#elif defined(_MSC_VER)
34996			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
34997			# define prefetchw(addr) _m_prefetchw(addr)
34998			# elif defined(ARCH_ARM64)
34999			# define prefetchw(addr) __prefetch2((addr), 0x10 )
35000			# elif defined(ARCH_ARM32)
35001			# define prefetchw(addr) __prefetchw(addr)
35002			# endif
35003			#endif
35004			#ifndef prefetchw
35005			# define prefetchw(addr)
35006			#endif
35007
35008
35009			#undef _aligned_attribute
35010			#if defined(__GNUC__) \|\| __has_attribute(aligned)
35011			# define _aligned_attribute(n) __attribute__((aligned(n)))
35012			#elif defined(_MSC_VER)
35013			# define _aligned_attribute(n) __declspec(align(n))
35014			#endif
35015
35016
35017			#if defined(__GNUC__) \|\| __has_attribute(target)
35018			# define _target_attribute(attrs) __attribute__((target(attrs)))
35019			#else
35020			# define _target_attribute(attrs)
35021			#endif
35022
35023
35024
35025
35026
35027			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
35028			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
35029			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
35030			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
35031			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
35032			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
35033			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
35034
35035
35036
35037
35038
35039
35040			#if defined(__BYTE_ORDER__)
35041			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
35042			#elif defined(_MSC_VER)
35043			# define CPU_IS_LITTLE_ENDIAN() true
35044			#else
35045			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
35046			{
35047			union {
35048			u32 w;
35049			u8 b;
35050			} u;
35051
35052			u.w = 1;
35053			return u.b;
35054			}
35055			#endif
35056
35057
35058			static forceinline u16 bswap16(u16 v)
35059			{
35060			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
35061			return __builtin_bswap16(v);
35062			#elif defined(_MSC_VER)
35063			return _byteswap_ushort(v);
35064			#else
35065			return (v << 8) \| (v >> 8);
35066			#endif
35067			}
35068
35069
35070			static forceinline u32 bswap32(u32 v)
35071			{
35072			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
35073			return __builtin_bswap32(v);
35074			#elif defined(_MSC_VER)
35075			return _byteswap_ulong(v);
35076			#else
35077			return ((v & 0x000000FF) << 24) \|
35078			((v & 0x0000FF00) << 8) \|
35079			((v & 0x00FF0000) >> 8) \|
35080			((v & 0xFF000000) >> 24);
35081			#endif
35082			}
35083
35084
35085			static forceinline u64 bswap64(u64 v)
35086			{
35087			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
35088			return __builtin_bswap64(v);
35089			#elif defined(_MSC_VER)
35090			return _byteswap_uint64(v);
35091			#else
35092			return ((v & 0x00000000000000FF) << 56) \|
35093			((v & 0x000000000000FF00) << 40) \|
35094			((v & 0x0000000000FF0000) << 24) \|
35095			((v & 0x00000000FF000000) << 8) \|
35096			((v & 0x000000FF00000000) >> 8) \|
35097			((v & 0x0000FF0000000000) >> 24) \|
35098			((v & 0x00FF000000000000) >> 40) \|
35099			((v & 0xFF00000000000000) >> 56);
35100			#endif
35101			}
35102
35103			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
35104			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
35105			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
35106			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
35107			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
35108			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
35109
35110
35111
35112
35113
35114
35115			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
35116			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
35117			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
35118			defined(__riscv_misaligned_fast) \|\| \
35119			defined(__wasm__))
35120			# define UNALIGNED_ACCESS_IS_FAST 1
35121			#elif defined(_MSC_VER)
35122			# define UNALIGNED_ACCESS_IS_FAST 1
35123			#else
35124			# define UNALIGNED_ACCESS_IS_FAST 0
35125			#endif
35126
35127
35128
35129			#ifdef FREESTANDING
35130			# define MEMCOPY __builtin_memcpy
35131			#else
35132			# define MEMCOPY memcpy
35133			#endif
35134
35135
35136
35137			#define DEFINE_UNALIGNED_TYPE(type) \
35138			static forceinline type \
35139			load_##type##_unaligned(const void *p) \
35140			{ \
35141			type v; \
35142			\
35143			MEMCOPY(&v, p, sizeof(v)); \
35144			return v; \
35145			} \
35146			\
35147			static forceinline void \
35148			store_##type##_unaligned(type v, void *p) \
35149			{ \
35150			MEMCOPY(p, &v, sizeof(v)); \
35151			}
35152
35153			DEFINE_UNALIGNED_TYPE(u16)
35154			DEFINE_UNALIGNED_TYPE(u32)
35155			DEFINE_UNALIGNED_TYPE(u64)
35156			DEFINE_UNALIGNED_TYPE(machine_word_t)
35157
35158			#undef MEMCOPY
35159
35160			#define load_word_unaligned load_machine_word_t_unaligned
35161			#define store_word_unaligned store_machine_word_t_unaligned
35162
35163
35164
35165			static forceinline u16
35166			get_unaligned_le16(const u8 *p)
35167			{
35168			if (UNALIGNED_ACCESS_IS_FAST)
35169			return le16_bswap(load_u16_unaligned(p));
35170			else
35171			return ((u16)p[1] << 8) \| p[0];
35172			}
35173
35174			static forceinline u16
35175			get_unaligned_be16(const u8 *p)
35176			{
35177			if (UNALIGNED_ACCESS_IS_FAST)
35178			return be16_bswap(load_u16_unaligned(p));
35179			else
35180			return ((u16)p[0] << 8) \| p[1];
35181			}
35182
35183			static forceinline u32
35184			get_unaligned_le32(const u8 *p)
35185			{
35186			if (UNALIGNED_ACCESS_IS_FAST)
35187			return le32_bswap(load_u32_unaligned(p));
35188			else
35189			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
35190			((u32)p[1] << 8) \| p[0];
35191			}
35192
35193			static forceinline u32
35194			get_unaligned_be32(const u8 *p)
35195			{
35196			if (UNALIGNED_ACCESS_IS_FAST)
35197			return be32_bswap(load_u32_unaligned(p));
35198			else
35199			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
35200			((u32)p[2] << 8) \| p[3];
35201			}
35202
35203			static forceinline u64
35204			get_unaligned_le64(const u8 *p)
35205			{
35206			if (UNALIGNED_ACCESS_IS_FAST)
35207			return le64_bswap(load_u64_unaligned(p));
35208			else
35209			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
35210			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
35211			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
35212			((u64)p[1] << 8) \| p[0];
35213			}
35214
35215			static forceinline machine_word_t
35216			get_unaligned_leword(const u8 *p)
35217			{
35218			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
35219			if (WORDBITS == 32)
35220			return get_unaligned_le32(p);
35221			else
35222			return get_unaligned_le64(p);
35223			}
35224
35225
35226
35227			static forceinline void
35228			put_unaligned_le16(u16 v, u8 *p)
35229			{
35230			if (UNALIGNED_ACCESS_IS_FAST) {
35231			store_u16_unaligned(le16_bswap(v), p);
35232			} else {
35233			p[0] = (u8)(v >> 0);
35234			p[1] = (u8)(v >> 8);
35235			}
35236			}
35237
35238			static forceinline void
35239			put_unaligned_be16(u16 v, u8 *p)
35240			{
35241			if (UNALIGNED_ACCESS_IS_FAST) {
35242			store_u16_unaligned(be16_bswap(v), p);
35243			} else {
35244			p[0] = (u8)(v >> 8);
35245			p[1] = (u8)(v >> 0);
35246			}
35247			}
35248
35249			static forceinline void
35250			put_unaligned_le32(u32 v, u8 *p)
35251			{
35252			if (UNALIGNED_ACCESS_IS_FAST) {
35253			store_u32_unaligned(le32_bswap(v), p);
35254			} else {
35255			p[0] = (u8)(v >> 0);
35256			p[1] = (u8)(v >> 8);
35257			p[2] = (u8)(v >> 16);
35258			p[3] = (u8)(v >> 24);
35259			}
35260			}
35261
35262			static forceinline void
35263			put_unaligned_be32(u32 v, u8 *p)
35264			{
35265			if (UNALIGNED_ACCESS_IS_FAST) {
35266			store_u32_unaligned(be32_bswap(v), p);
35267			} else {
35268			p[0] = (u8)(v >> 24);
35269			p[1] = (u8)(v >> 16);
35270			p[2] = (u8)(v >> 8);
35271			p[3] = (u8)(v >> 0);
35272			}
35273			}
35274
35275			static forceinline void
35276			put_unaligned_le64(u64 v, u8 *p)
35277			{
35278			if (UNALIGNED_ACCESS_IS_FAST) {
35279			store_u64_unaligned(le64_bswap(v), p);
35280			} else {
35281			p[0] = (u8)(v >> 0);
35282			p[1] = (u8)(v >> 8);
35283			p[2] = (u8)(v >> 16);
35284			p[3] = (u8)(v >> 24);
35285			p[4] = (u8)(v >> 32);
35286			p[5] = (u8)(v >> 40);
35287			p[6] = (u8)(v >> 48);
35288			p[7] = (u8)(v >> 56);
35289			}
35290			}
35291
35292			static forceinline void
35293			put_unaligned_leword(machine_word_t v, u8 *p)
35294			{
35295			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
35296			if (WORDBITS == 32)
35297			put_unaligned_le32(v, p);
35298			else
35299			put_unaligned_le64(v, p);
35300			}
35301
35302
35303
35304
35305
35306
35307
35308			static forceinline unsigned
35309			bsr32(u32 v)
35310			{
35311			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
35312			return 31 - __builtin_clz(v);
35313			#elif defined(_MSC_VER)
35314			unsigned long i;
35315
35316			_BitScanReverse(&i, v);
35317			return i;
35318			#else
35319			unsigned i = 0;
35320
35321			while ((v >>= 1) != 0)
35322			i++;
35323			return i;
35324			#endif
35325			}
35326
35327			static forceinline unsigned
35328			bsr64(u64 v)
35329			{
35330			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
35331			return 63 - __builtin_clzll(v);
35332			#elif defined(_MSC_VER) && defined(_WIN64)
35333			unsigned long i;
35334
35335			_BitScanReverse64(&i, v);
35336			return i;
35337			#else
35338			unsigned i = 0;
35339
35340			while ((v >>= 1) != 0)
35341			i++;
35342			return i;
35343			#endif
35344			}
35345
35346			static forceinline unsigned
35347			bsrw(machine_word_t v)
35348			{
35349			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
35350			if (WORDBITS == 32)
35351			return bsr32(v);
35352			else
35353			return bsr64(v);
35354			}
35355
35356
35357
35358			static forceinline unsigned
35359			bsf32(u32 v)
35360			{
35361			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
35362			return __builtin_ctz(v);
35363			#elif defined(_MSC_VER)
35364			unsigned long i;
35365
35366			_BitScanForward(&i, v);
35367			return i;
35368			#else
35369			unsigned i = 0;
35370
35371			for (; (v & 1) == 0; v >>= 1)
35372			i++;
35373			return i;
35374			#endif
35375			}
35376
35377			static forceinline unsigned
35378			bsf64(u64 v)
35379			{
35380			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
35381			return __builtin_ctzll(v);
35382			#elif defined(_MSC_VER) && defined(_WIN64)
35383			unsigned long i;
35384
35385			_BitScanForward64(&i, v);
35386			return i;
35387			#else
35388			unsigned i = 0;
35389
35390			for (; (v & 1) == 0; v >>= 1)
35391			i++;
35392			return i;
35393			#endif
35394			}
35395
35396			static forceinline unsigned
35397			bsfw(machine_word_t v)
35398			{
35399			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
35400			if (WORDBITS == 32)
35401			return bsf32(v);
35402			else
35403			return bsf64(v);
35404			}
35405
35406
35407			#undef rbit32
35408			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
35409			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
35410			static forceinline u32
35411			rbit32(u32 v)
35412			{
35413			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
35414			return v;
35415			}
35416			#define rbit32 rbit32
35417			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
35418			static forceinline u32
35419			rbit32(u32 v)
35420			{
35421			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
35422			return v;
35423			}
35424			#define rbit32 rbit32
35425			#endif
35426
35427			#endif
35428
35429
35430			typedef void (malloc_func_t)(size_t);
35431			typedef void (free_func_t)(void );
35432
35433			extern malloc_func_t libdeflate_default_malloc_func;
35434			extern free_func_t libdeflate_default_free_func;
35435
35436			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
35437			size_t alignment, size_t size);
35438			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
35439
35440			#ifdef FREESTANDING
35441
35442			void memset(void s, int c, size_t n);
35443			#define memset(s, c, n) __builtin_memset((s), (c), (n))
35444
35445			void memcpy(void dest, const void *src, size_t n);
35446			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
35447
35448			void memmove(void dest, const void *src, size_t n);
35449			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
35450
35451			int memcmp(const void s1, const void s2, size_t n);
35452			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
35453
35454			#undef LIBDEFLATE_ENABLE_ASSERTIONS
35455			#else
35456			# include
35457
35458			# ifdef __clang_analyzer__
35459			# define LIBDEFLATE_ENABLE_ASSERTIONS
35460			# endif
35461			#endif
35462
35463
35464			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
35465			NORETURN void
35466			libdeflate_assertion_failed(const char expr, const char file, int line);
35467			#define ASSERT(expr) { if (unlikely(!(expr))) \
35468			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
35469			#else
35470			#define ASSERT(expr) (void)(expr)
35471			#endif
35472
35473			#define CONCAT_IMPL(a, b) a##b
35474			#define CONCAT(a, b) CONCAT_IMPL(a, b)
35475			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
35476
35477			#endif
35478
35479
35480
35481
35482			struct libdeflate_compressor;
35483
35484			unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c);
35485
35486			#endif
35487
35488			/* #include "zlib_constants.h" */
35489
35490
35491			#ifndef LIB_ZLIB_CONSTANTS_H
35492			#define LIB_ZLIB_CONSTANTS_H
35493
35494			#define ZLIB_MIN_HEADER_SIZE 2
35495			#define ZLIB_FOOTER_SIZE 4
35496			#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
35497
35498			#define ZLIB_CM_DEFLATE 8
35499
35500			#define ZLIB_CINFO_32K_WINDOW 7
35501
35502			#define ZLIB_FASTEST_COMPRESSION 0
35503			#define ZLIB_FAST_COMPRESSION 1
35504			#define ZLIB_DEFAULT_COMPRESSION 2
35505			#define ZLIB_SLOWEST_COMPRESSION 3
35506
35507			#endif
35508
35509
35510			LIBDEFLATEAPI size_t
35511	12		libdeflate_zlib_compress(struct libdeflate_compressor *c,
35512			const void *in, size_t in_nbytes,
35513			void *out, size_t out_nbytes_avail)
35514			{
35515	12		u8 *out_next = out;
35516			u16 hdr;
35517			unsigned compression_level;
35518			unsigned level_hint;
35519			size_t deflate_size;
35520
35521	12	50	if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD)
35522	0		return 0;
35523
35524
35525	12		hdr = (ZLIB_CM_DEFLATE << 8) \| (ZLIB_CINFO_32K_WINDOW << 12);
35526	12		compression_level = libdeflate_get_compression_level(c);
35527	12	100	if (compression_level < 2)
35528	1		level_hint = ZLIB_FASTEST_COMPRESSION;
35529	11	100	else if (compression_level < 6)
35530	4		level_hint = ZLIB_FAST_COMPRESSION;
35531	7	100	else if (compression_level < 8)
35532	2		level_hint = ZLIB_DEFAULT_COMPRESSION;
35533			else
35534	5		level_hint = ZLIB_SLOWEST_COMPRESSION;
35535	12		hdr \|= level_hint << 6;
35536	12		hdr \|= 31 - (hdr % 31);
35537
35538	12		put_unaligned_be16(hdr, out_next);
35539	12		out_next += 2;
35540
35541
35542	12		deflate_size = libdeflate_deflate_compress(c, in, in_nbytes, out_next,
35543			out_nbytes_avail - ZLIB_MIN_OVERHEAD);
35544	12	50	if (deflate_size == 0)
35545	0		return 0;
35546	12		out_next += deflate_size;
35547
35548
35549	12		put_unaligned_be32(libdeflate_adler32(1, in, in_nbytes), out_next);
35550	12		out_next += 4;
35551
35552	12		return out_next - (u8 *)out;
35553			}
35554
35555			LIBDEFLATEAPI size_t
35556	12		libdeflate_zlib_compress_bound(struct libdeflate_compressor *c,
35557			size_t in_nbytes)
35558			{
35559	12		return ZLIB_MIN_OVERHEAD +
35560	12		libdeflate_deflate_compress_bound(c, in_nbytes);
35561			}
35562			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/zlib_decompress.c */
35563
35564
35565			/* #include "lib_common.h" */
35566
35567
35568			#ifndef LIB_LIB_COMMON_H
35569			#define LIB_LIB_COMMON_H
35570
35571			#ifdef LIBDEFLATE_H
35572
35573			# error "lib_common.h must always be included before libdeflate.h"
35574			#endif
35575
35576			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
35577			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
35578			#elif defined(__GNUC__)
35579			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
35580			#else
35581			# define LIBDEFLATE_EXPORT_SYM
35582			#endif
35583
35584
35585			#if defined(__GNUC__) && defined(__i386__)
35586			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
35587			#else
35588			# define LIBDEFLATE_ALIGN_STACK
35589			#endif
35590
35591			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
35592
35593			/* #include "../common_defs.h" */
35594
35595
35596			#ifndef COMMON_DEFS_H
35597			#define COMMON_DEFS_H
35598
35599			/* #include "libdeflate.h" */
35600
35601
35602			#ifndef LIBDEFLATE_H
35603			#define LIBDEFLATE_H
35604
35605			#include
35606			#include
35607
35608			#ifdef __cplusplus
35609			extern "C" {
35610			#endif
35611
35612			#define LIBDEFLATE_VERSION_MAJOR 1
35613			#define LIBDEFLATE_VERSION_MINOR 25
35614			#define LIBDEFLATE_VERSION_STRING "1.25"
35615
35616
35617			#ifndef LIBDEFLATEAPI
35618			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
35619			# define LIBDEFLATEAPI __declspec(dllimport)
35620			# else
35621			# define LIBDEFLATEAPI
35622			# endif
35623			#endif
35624
35625
35626
35627
35628
35629			struct libdeflate_compressor;
35630			struct libdeflate_options;
35631
35632
35633			LIBDEFLATEAPI struct libdeflate_compressor *
35634			libdeflate_alloc_compressor(int compression_level);
35635
35636
35637			LIBDEFLATEAPI struct libdeflate_compressor *
35638			libdeflate_alloc_compressor_ex(int compression_level,
35639			const struct libdeflate_options *options);
35640
35641
35642			LIBDEFLATEAPI size_t
35643			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
35644			const void *in, size_t in_nbytes,
35645			void *out, size_t out_nbytes_avail);
35646
35647
35648			LIBDEFLATEAPI size_t
35649			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
35650			size_t in_nbytes);
35651
35652
35653			LIBDEFLATEAPI size_t
35654			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
35655			const void *in, size_t in_nbytes,
35656			void *out, size_t out_nbytes_avail);
35657
35658
35659			LIBDEFLATEAPI size_t
35660			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
35661			size_t in_nbytes);
35662
35663
35664			LIBDEFLATEAPI size_t
35665			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
35666			const void *in, size_t in_nbytes,
35667			void *out, size_t out_nbytes_avail);
35668
35669
35670			LIBDEFLATEAPI size_t
35671			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
35672			size_t in_nbytes);
35673
35674
35675			LIBDEFLATEAPI void
35676			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
35677
35678
35679
35680
35681
35682			struct libdeflate_decompressor;
35683			struct libdeflate_options;
35684
35685
35686			LIBDEFLATEAPI struct libdeflate_decompressor *
35687			libdeflate_alloc_decompressor(void);
35688
35689
35690			LIBDEFLATEAPI struct libdeflate_decompressor *
35691			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
35692
35693
35694			enum libdeflate_result {
35695
35696			LIBDEFLATE_SUCCESS = 0,
35697
35698
35699			LIBDEFLATE_BAD_DATA = 1,
35700
35701
35702			LIBDEFLATE_SHORT_OUTPUT = 2,
35703
35704
35705			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
35706			};
35707
35708
35709			LIBDEFLATEAPI enum libdeflate_result
35710			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
35711			const void *in, size_t in_nbytes,
35712			void *out, size_t out_nbytes_avail,
35713			size_t *actual_out_nbytes_ret);
35714
35715
35716			LIBDEFLATEAPI enum libdeflate_result
35717			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
35718			const void *in, size_t in_nbytes,
35719			void *out, size_t out_nbytes_avail,
35720			size_t *actual_in_nbytes_ret,
35721			size_t *actual_out_nbytes_ret);
35722
35723
35724			LIBDEFLATEAPI enum libdeflate_result
35725			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
35726			const void *in, size_t in_nbytes,
35727			void *out, size_t out_nbytes_avail,
35728			size_t *actual_out_nbytes_ret);
35729
35730
35731			LIBDEFLATEAPI enum libdeflate_result
35732			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
35733			const void *in, size_t in_nbytes,
35734			void *out, size_t out_nbytes_avail,
35735			size_t *actual_in_nbytes_ret,
35736			size_t *actual_out_nbytes_ret);
35737
35738
35739			LIBDEFLATEAPI enum libdeflate_result
35740			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
35741			const void *in, size_t in_nbytes,
35742			void *out, size_t out_nbytes_avail,
35743			size_t *actual_out_nbytes_ret);
35744
35745
35746			LIBDEFLATEAPI enum libdeflate_result
35747			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
35748			const void *in, size_t in_nbytes,
35749			void *out, size_t out_nbytes_avail,
35750			size_t *actual_in_nbytes_ret,
35751			size_t *actual_out_nbytes_ret);
35752
35753
35754			LIBDEFLATEAPI void
35755			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
35756
35757
35758
35759
35760
35761
35762			LIBDEFLATEAPI uint32_t
35763			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
35764
35765
35766
35767			LIBDEFLATEAPI uint32_t
35768			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
35769
35770
35771
35772
35773
35774
35775			LIBDEFLATEAPI void
35776			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
35777			void (free_func)(void ));
35778
35779
35780			struct libdeflate_options {
35781
35782
35783			size_t sizeof_options;
35784
35785
35786			void (malloc_func)(size_t);
35787			void (free_func)(void );
35788			};
35789
35790			#ifdef __cplusplus
35791			}
35792			#endif
35793
35794			#endif
35795
35796
35797			#include
35798			#include
35799			#include
35800			#ifdef _MSC_VER
35801			# include
35802			# include
35803
35804
35805			# pragma warning(disable : 4146)
35806
35807			# pragma warning(disable : 4018)
35808			# pragma warning(disable : 4244)
35809			# pragma warning(disable : 4267)
35810			# pragma warning(disable : 4310)
35811
35812			# pragma warning(disable : 4100)
35813			# pragma warning(disable : 4127)
35814			# pragma warning(disable : 4189)
35815			# pragma warning(disable : 4232)
35816			# pragma warning(disable : 4245)
35817			# pragma warning(disable : 4295)
35818			#endif
35819			#ifndef FREESTANDING
35820			# include
35821			#endif
35822
35823
35824
35825
35826
35827
35828			#undef ARCH_X86_64
35829			#undef ARCH_X86_32
35830			#undef ARCH_ARM64
35831			#undef ARCH_ARM32
35832			#undef ARCH_RISCV
35833			#ifdef _MSC_VER
35834
35835			# if defined(_M_X64) && !defined(_M_ARM64EC)
35836			# define ARCH_X86_64
35837			# elif defined(_M_IX86)
35838			# define ARCH_X86_32
35839			# elif defined(_M_ARM64)
35840			# define ARCH_ARM64
35841			# elif defined(_M_ARM)
35842			# define ARCH_ARM32
35843			# endif
35844			#else
35845			# if defined(__x86_64__)
35846			# define ARCH_X86_64
35847			# elif defined(__i386__)
35848			# define ARCH_X86_32
35849			# elif defined(__aarch64__)
35850			# define ARCH_ARM64
35851			# elif defined(__arm__)
35852			# define ARCH_ARM32
35853			# elif defined(__riscv)
35854			# define ARCH_RISCV
35855			# endif
35856			#endif
35857
35858
35859
35860
35861
35862
35863			typedef uint8_t u8;
35864			typedef uint16_t u16;
35865			typedef uint32_t u32;
35866			typedef uint64_t u64;
35867			typedef int8_t s8;
35868			typedef int16_t s16;
35869			typedef int32_t s32;
35870			typedef int64_t s64;
35871
35872
35873			#ifdef _MSC_VER
35874			# ifdef _WIN64
35875			typedef long long ssize_t;
35876			# else
35877			typedef long ssize_t;
35878			# endif
35879			#endif
35880
35881
35882			typedef size_t machine_word_t;
35883
35884
35885			#define WORDBYTES ((int)sizeof(machine_word_t))
35886
35887
35888			#define WORDBITS (8 * WORDBYTES)
35889
35890
35891
35892
35893
35894
35895			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
35896			# define GCC_PREREQ(major, minor) \
35897			(__GNUC__ > (major) \|\| \
35898			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
35899			# if !GCC_PREREQ(4, 9)
35900			# error "gcc versions older than 4.9 are no longer supported"
35901			# endif
35902			#else
35903			# define GCC_PREREQ(major, minor) 0
35904			#endif
35905			#ifdef __clang__
35906			# ifdef __apple_build_version__
35907			# define CLANG_PREREQ(major, minor, apple_version) \
35908			(__apple_build_version__ >= (apple_version))
35909			# else
35910			# define CLANG_PREREQ(major, minor, apple_version) \
35911			(__clang_major__ > (major) \|\| \
35912			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
35913			# endif
35914			# if !CLANG_PREREQ(3, 9, 8000000)
35915			# error "clang versions older than 3.9 are no longer supported"
35916			# endif
35917			#else
35918			# define CLANG_PREREQ(major, minor, apple_version) 0
35919			#endif
35920			#ifdef _MSC_VER
35921			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
35922			# if !MSVC_PREREQ(1900)
35923			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
35924			# endif
35925			#else
35926			# define MSVC_PREREQ(version) 0
35927			#endif
35928
35929
35930			#ifndef __has_attribute
35931			# define __has_attribute(attribute) 0
35932			#endif
35933
35934
35935			#ifndef __has_builtin
35936			# define __has_builtin(builtin) 0
35937			#endif
35938
35939
35940			#ifdef _MSC_VER
35941			# define inline __inline
35942			#endif
35943
35944
35945			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
35946			# define forceinline inline __attribute__((always_inline))
35947			#elif defined(_MSC_VER)
35948			# define forceinline __forceinline
35949			#else
35950			# define forceinline inline
35951			#endif
35952
35953
35954			#if defined(__GNUC__) \|\| __has_attribute(unused)
35955			# define MAYBE_UNUSED __attribute__((unused))
35956			#else
35957			# define MAYBE_UNUSED
35958			#endif
35959
35960
35961			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
35962			# define NORETURN __attribute__((noreturn))
35963			#else
35964			# define NORETURN
35965			#endif
35966
35967
35968			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
35969			# if defined(__GNUC__) \|\| defined(__clang__)
35970			# define restrict __restrict__
35971			# else
35972			# define restrict
35973			# endif
35974			#endif
35975
35976
35977			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
35978			# define likely(expr) __builtin_expect(!!(expr), 1)
35979			#else
35980			# define likely(expr) (expr)
35981			#endif
35982
35983
35984			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
35985			# define unlikely(expr) __builtin_expect(!!(expr), 0)
35986			#else
35987			# define unlikely(expr) (expr)
35988			#endif
35989
35990
35991			#undef prefetchr
35992			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
35993			# define prefetchr(addr) __builtin_prefetch((addr), 0)
35994			#elif defined(_MSC_VER)
35995			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
35996			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
35997			# elif defined(ARCH_ARM64)
35998			# define prefetchr(addr) __prefetch2((addr), 0x00 )
35999			# elif defined(ARCH_ARM32)
36000			# define prefetchr(addr) __prefetch(addr)
36001			# endif
36002			#endif
36003			#ifndef prefetchr
36004			# define prefetchr(addr)
36005			#endif
36006
36007
36008			#undef prefetchw
36009			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
36010			# define prefetchw(addr) __builtin_prefetch((addr), 1)
36011			#elif defined(_MSC_VER)
36012			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
36013			# define prefetchw(addr) _m_prefetchw(addr)
36014			# elif defined(ARCH_ARM64)
36015			# define prefetchw(addr) __prefetch2((addr), 0x10 )
36016			# elif defined(ARCH_ARM32)
36017			# define prefetchw(addr) __prefetchw(addr)
36018			# endif
36019			#endif
36020			#ifndef prefetchw
36021			# define prefetchw(addr)
36022			#endif
36023
36024
36025			#undef _aligned_attribute
36026			#if defined(__GNUC__) \|\| __has_attribute(aligned)
36027			# define _aligned_attribute(n) __attribute__((aligned(n)))
36028			#elif defined(_MSC_VER)
36029			# define _aligned_attribute(n) __declspec(align(n))
36030			#endif
36031
36032
36033			#if defined(__GNUC__) \|\| __has_attribute(target)
36034			# define _target_attribute(attrs) __attribute__((target(attrs)))
36035			#else
36036			# define _target_attribute(attrs)
36037			#endif
36038
36039
36040
36041
36042
36043			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
36044			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
36045			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
36046			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
36047			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
36048			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
36049			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
36050
36051
36052
36053
36054
36055
36056			#if defined(__BYTE_ORDER__)
36057			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
36058			#elif defined(_MSC_VER)
36059			# define CPU_IS_LITTLE_ENDIAN() true
36060			#else
36061			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
36062			{
36063			union {
36064			u32 w;
36065			u8 b;
36066			} u;
36067
36068			u.w = 1;
36069			return u.b;
36070			}
36071			#endif
36072
36073
36074			static forceinline u16 bswap16(u16 v)
36075			{
36076			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
36077			return __builtin_bswap16(v);
36078			#elif defined(_MSC_VER)
36079			return _byteswap_ushort(v);
36080			#else
36081			return (v << 8) \| (v >> 8);
36082			#endif
36083			}
36084
36085
36086			static forceinline u32 bswap32(u32 v)
36087			{
36088			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
36089			return __builtin_bswap32(v);
36090			#elif defined(_MSC_VER)
36091			return _byteswap_ulong(v);
36092			#else
36093			return ((v & 0x000000FF) << 24) \|
36094			((v & 0x0000FF00) << 8) \|
36095			((v & 0x00FF0000) >> 8) \|
36096			((v & 0xFF000000) >> 24);
36097			#endif
36098			}
36099
36100
36101			static forceinline u64 bswap64(u64 v)
36102			{
36103			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
36104			return __builtin_bswap64(v);
36105			#elif defined(_MSC_VER)
36106			return _byteswap_uint64(v);
36107			#else
36108			return ((v & 0x00000000000000FF) << 56) \|
36109			((v & 0x000000000000FF00) << 40) \|
36110			((v & 0x0000000000FF0000) << 24) \|
36111			((v & 0x00000000FF000000) << 8) \|
36112			((v & 0x000000FF00000000) >> 8) \|
36113			((v & 0x0000FF0000000000) >> 24) \|
36114			((v & 0x00FF000000000000) >> 40) \|
36115			((v & 0xFF00000000000000) >> 56);
36116			#endif
36117			}
36118
36119			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
36120			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
36121			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
36122			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
36123			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
36124			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
36125
36126
36127
36128
36129
36130
36131			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
36132			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
36133			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
36134			defined(__riscv_misaligned_fast) \|\| \
36135			defined(__wasm__))
36136			# define UNALIGNED_ACCESS_IS_FAST 1
36137			#elif defined(_MSC_VER)
36138			# define UNALIGNED_ACCESS_IS_FAST 1
36139			#else
36140			# define UNALIGNED_ACCESS_IS_FAST 0
36141			#endif
36142
36143
36144
36145			#ifdef FREESTANDING
36146			# define MEMCOPY __builtin_memcpy
36147			#else
36148			# define MEMCOPY memcpy
36149			#endif
36150
36151
36152
36153			#define DEFINE_UNALIGNED_TYPE(type) \
36154			static forceinline type \
36155			load_##type##_unaligned(const void *p) \
36156			{ \
36157			type v; \
36158			\
36159			MEMCOPY(&v, p, sizeof(v)); \
36160			return v; \
36161			} \
36162			\
36163			static forceinline void \
36164			store_##type##_unaligned(type v, void *p) \
36165			{ \
36166			MEMCOPY(p, &v, sizeof(v)); \
36167			}
36168
36169			DEFINE_UNALIGNED_TYPE(u16)
36170			DEFINE_UNALIGNED_TYPE(u32)
36171			DEFINE_UNALIGNED_TYPE(u64)
36172			DEFINE_UNALIGNED_TYPE(machine_word_t)
36173
36174			#undef MEMCOPY
36175
36176			#define load_word_unaligned load_machine_word_t_unaligned
36177			#define store_word_unaligned store_machine_word_t_unaligned
36178
36179
36180
36181			static forceinline u16
36182			get_unaligned_le16(const u8 *p)
36183			{
36184			if (UNALIGNED_ACCESS_IS_FAST)
36185			return le16_bswap(load_u16_unaligned(p));
36186			else
36187			return ((u16)p[1] << 8) \| p[0];
36188			}
36189
36190			static forceinline u16
36191			get_unaligned_be16(const u8 *p)
36192			{
36193			if (UNALIGNED_ACCESS_IS_FAST)
36194			return be16_bswap(load_u16_unaligned(p));
36195			else
36196			return ((u16)p[0] << 8) \| p[1];
36197			}
36198
36199			static forceinline u32
36200			get_unaligned_le32(const u8 *p)
36201			{
36202			if (UNALIGNED_ACCESS_IS_FAST)
36203			return le32_bswap(load_u32_unaligned(p));
36204			else
36205			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
36206			((u32)p[1] << 8) \| p[0];
36207			}
36208
36209			static forceinline u32
36210			get_unaligned_be32(const u8 *p)
36211			{
36212			if (UNALIGNED_ACCESS_IS_FAST)
36213			return be32_bswap(load_u32_unaligned(p));
36214			else
36215			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
36216			((u32)p[2] << 8) \| p[3];
36217			}
36218
36219			static forceinline u64
36220			get_unaligned_le64(const u8 *p)
36221			{
36222			if (UNALIGNED_ACCESS_IS_FAST)
36223			return le64_bswap(load_u64_unaligned(p));
36224			else
36225			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
36226			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
36227			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
36228			((u64)p[1] << 8) \| p[0];
36229			}
36230
36231			static forceinline machine_word_t
36232			get_unaligned_leword(const u8 *p)
36233			{
36234			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
36235			if (WORDBITS == 32)
36236			return get_unaligned_le32(p);
36237			else
36238			return get_unaligned_le64(p);
36239			}
36240
36241
36242
36243			static forceinline void
36244			put_unaligned_le16(u16 v, u8 *p)
36245			{
36246			if (UNALIGNED_ACCESS_IS_FAST) {
36247			store_u16_unaligned(le16_bswap(v), p);
36248			} else {
36249			p[0] = (u8)(v >> 0);
36250			p[1] = (u8)(v >> 8);
36251			}
36252			}
36253
36254			static forceinline void
36255			put_unaligned_be16(u16 v, u8 *p)
36256			{
36257			if (UNALIGNED_ACCESS_IS_FAST) {
36258			store_u16_unaligned(be16_bswap(v), p);
36259			} else {
36260			p[0] = (u8)(v >> 8);
36261			p[1] = (u8)(v >> 0);
36262			}
36263			}
36264
36265			static forceinline void
36266			put_unaligned_le32(u32 v, u8 *p)
36267			{
36268			if (UNALIGNED_ACCESS_IS_FAST) {
36269			store_u32_unaligned(le32_bswap(v), p);
36270			} else {
36271			p[0] = (u8)(v >> 0);
36272			p[1] = (u8)(v >> 8);
36273			p[2] = (u8)(v >> 16);
36274			p[3] = (u8)(v >> 24);
36275			}
36276			}
36277
36278			static forceinline void
36279			put_unaligned_be32(u32 v, u8 *p)
36280			{
36281			if (UNALIGNED_ACCESS_IS_FAST) {
36282			store_u32_unaligned(be32_bswap(v), p);
36283			} else {
36284			p[0] = (u8)(v >> 24);
36285			p[1] = (u8)(v >> 16);
36286			p[2] = (u8)(v >> 8);
36287			p[3] = (u8)(v >> 0);
36288			}
36289			}
36290
36291			static forceinline void
36292			put_unaligned_le64(u64 v, u8 *p)
36293			{
36294			if (UNALIGNED_ACCESS_IS_FAST) {
36295			store_u64_unaligned(le64_bswap(v), p);
36296			} else {
36297			p[0] = (u8)(v >> 0);
36298			p[1] = (u8)(v >> 8);
36299			p[2] = (u8)(v >> 16);
36300			p[3] = (u8)(v >> 24);
36301			p[4] = (u8)(v >> 32);
36302			p[5] = (u8)(v >> 40);
36303			p[6] = (u8)(v >> 48);
36304			p[7] = (u8)(v >> 56);
36305			}
36306			}
36307
36308			static forceinline void
36309			put_unaligned_leword(machine_word_t v, u8 *p)
36310			{
36311			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
36312			if (WORDBITS == 32)
36313			put_unaligned_le32(v, p);
36314			else
36315			put_unaligned_le64(v, p);
36316			}
36317
36318
36319
36320
36321
36322
36323
36324			static forceinline unsigned
36325			bsr32(u32 v)
36326			{
36327			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
36328			return 31 - __builtin_clz(v);
36329			#elif defined(_MSC_VER)
36330			unsigned long i;
36331
36332			_BitScanReverse(&i, v);
36333			return i;
36334			#else
36335			unsigned i = 0;
36336
36337			while ((v >>= 1) != 0)
36338			i++;
36339			return i;
36340			#endif
36341			}
36342
36343			static forceinline unsigned
36344			bsr64(u64 v)
36345			{
36346			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
36347			return 63 - __builtin_clzll(v);
36348			#elif defined(_MSC_VER) && defined(_WIN64)
36349			unsigned long i;
36350
36351			_BitScanReverse64(&i, v);
36352			return i;
36353			#else
36354			unsigned i = 0;
36355
36356			while ((v >>= 1) != 0)
36357			i++;
36358			return i;
36359			#endif
36360			}
36361
36362			static forceinline unsigned
36363			bsrw(machine_word_t v)
36364			{
36365			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
36366			if (WORDBITS == 32)
36367			return bsr32(v);
36368			else
36369			return bsr64(v);
36370			}
36371
36372
36373
36374			static forceinline unsigned
36375			bsf32(u32 v)
36376			{
36377			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
36378			return __builtin_ctz(v);
36379			#elif defined(_MSC_VER)
36380			unsigned long i;
36381
36382			_BitScanForward(&i, v);
36383			return i;
36384			#else
36385			unsigned i = 0;
36386
36387			for (; (v & 1) == 0; v >>= 1)
36388			i++;
36389			return i;
36390			#endif
36391			}
36392
36393			static forceinline unsigned
36394			bsf64(u64 v)
36395			{
36396			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
36397			return __builtin_ctzll(v);
36398			#elif defined(_MSC_VER) && defined(_WIN64)
36399			unsigned long i;
36400
36401			_BitScanForward64(&i, v);
36402			return i;
36403			#else
36404			unsigned i = 0;
36405
36406			for (; (v & 1) == 0; v >>= 1)
36407			i++;
36408			return i;
36409			#endif
36410			}
36411
36412			static forceinline unsigned
36413			bsfw(machine_word_t v)
36414			{
36415			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
36416			if (WORDBITS == 32)
36417			return bsf32(v);
36418			else
36419			return bsf64(v);
36420			}
36421
36422
36423			#undef rbit32
36424			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
36425			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
36426			static forceinline u32
36427			rbit32(u32 v)
36428			{
36429			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
36430			return v;
36431			}
36432			#define rbit32 rbit32
36433			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
36434			static forceinline u32
36435			rbit32(u32 v)
36436			{
36437			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
36438			return v;
36439			}
36440			#define rbit32 rbit32
36441			#endif
36442
36443			#endif
36444
36445
36446			typedef void (malloc_func_t)(size_t);
36447			typedef void (free_func_t)(void );
36448
36449			extern malloc_func_t libdeflate_default_malloc_func;
36450			extern free_func_t libdeflate_default_free_func;
36451
36452			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
36453			size_t alignment, size_t size);
36454			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
36455
36456			#ifdef FREESTANDING
36457
36458			void memset(void s, int c, size_t n);
36459			#define memset(s, c, n) __builtin_memset((s), (c), (n))
36460
36461			void memcpy(void dest, const void *src, size_t n);
36462			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
36463
36464			void memmove(void dest, const void *src, size_t n);
36465			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
36466
36467			int memcmp(const void s1, const void s2, size_t n);
36468			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
36469
36470			#undef LIBDEFLATE_ENABLE_ASSERTIONS
36471			#else
36472			# include
36473
36474			# ifdef __clang_analyzer__
36475			# define LIBDEFLATE_ENABLE_ASSERTIONS
36476			# endif
36477			#endif
36478
36479
36480			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
36481			NORETURN void
36482			libdeflate_assertion_failed(const char expr, const char file, int line);
36483			#define ASSERT(expr) { if (unlikely(!(expr))) \
36484			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
36485			#else
36486			#define ASSERT(expr) (void)(expr)
36487			#endif
36488
36489			#define CONCAT_IMPL(a, b) a##b
36490			#define CONCAT(a, b) CONCAT_IMPL(a, b)
36491			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
36492
36493			#endif
36494
36495			/* #include "zlib_constants.h" */
36496
36497
36498			#ifndef LIB_ZLIB_CONSTANTS_H
36499			#define LIB_ZLIB_CONSTANTS_H
36500
36501			#define ZLIB_MIN_HEADER_SIZE 2
36502			#define ZLIB_FOOTER_SIZE 4
36503			#define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE)
36504
36505			#define ZLIB_CM_DEFLATE 8
36506
36507			#define ZLIB_CINFO_32K_WINDOW 7
36508
36509			#define ZLIB_FASTEST_COMPRESSION 0
36510			#define ZLIB_FAST_COMPRESSION 1
36511			#define ZLIB_DEFAULT_COMPRESSION 2
36512			#define ZLIB_SLOWEST_COMPRESSION 3
36513
36514			#endif
36515
36516
36517			LIBDEFLATEAPI enum libdeflate_result
36518	12		libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *d,
36519			const void *in, size_t in_nbytes,
36520			void *out, size_t out_nbytes_avail,
36521			size_t *actual_in_nbytes_ret,
36522			size_t *actual_out_nbytes_ret)
36523			{
36524	12		const u8 *in_next = in;
36525	12		const u8 * const in_end = in_next + in_nbytes;
36526			u16 hdr;
36527			size_t actual_in_nbytes;
36528			size_t actual_out_nbytes;
36529			enum libdeflate_result result;
36530
36531	12	50	if (in_nbytes < ZLIB_MIN_OVERHEAD)
36532	0		return LIBDEFLATE_BAD_DATA;
36533
36534
36535	12		hdr = get_unaligned_be16(in_next);
36536	12		in_next += 2;
36537
36538
36539	12	50	if ((hdr % 31) != 0)
36540	0		return LIBDEFLATE_BAD_DATA;
36541
36542
36543	12	50	if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE)
36544	0		return LIBDEFLATE_BAD_DATA;
36545
36546
36547	12	50	if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW)
36548	0		return LIBDEFLATE_BAD_DATA;
36549
36550
36551	12	50	if ((hdr >> 5) & 1)
36552	0		return LIBDEFLATE_BAD_DATA;
36553
36554
36555	12		result = libdeflate_deflate_decompress_ex(d, in_next,
36556	12		in_end - ZLIB_FOOTER_SIZE - in_next,
36557			out, out_nbytes_avail,
36558			&actual_in_nbytes, actual_out_nbytes_ret);
36559	12	50	if (result != LIBDEFLATE_SUCCESS)
36560	0		return result;
36561
36562	12	50	if (actual_out_nbytes_ret)
36563	12		actual_out_nbytes = *actual_out_nbytes_ret;
36564			else
36565	0		actual_out_nbytes = out_nbytes_avail;
36566
36567	12		in_next += actual_in_nbytes;
36568
36569
36570	12		if (libdeflate_adler32(1, out, actual_out_nbytes) !=
36571	12	50	get_unaligned_be32(in_next))
36572	0		return LIBDEFLATE_BAD_DATA;
36573	12		in_next += 4;
36574
36575	12	50	if (actual_in_nbytes_ret)
36576	12		actual_in_nbytes_ret = in_next - (u8 )in;
36577
36578	12		return LIBDEFLATE_SUCCESS;
36579			}
36580
36581			LIBDEFLATEAPI enum libdeflate_result
36582	0		libdeflate_zlib_decompress(struct libdeflate_decompressor *d,
36583			const void *in, size_t in_nbytes,
36584			void *out, size_t out_nbytes_avail,
36585			size_t *actual_out_nbytes_ret)
36586			{
36587	0		return libdeflate_zlib_decompress_ex(d, in, in_nbytes,
36588			out, out_nbytes_avail,
36589			NULL, actual_out_nbytes_ret);
36590			}
36591			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/arm/cpu_features.c */
36592
36593
36594
36595
36596			#ifdef __APPLE__
36597			# undef _ANSI_SOURCE
36598			# undef _DARWIN_C_SOURCE
36599			# define _DARWIN_C_SOURCE
36600			#endif
36601
36602			/* #include "cpu_features_common.h" */
36603
36604
36605			#ifndef LIB_CPU_FEATURES_COMMON_H
36606			#define LIB_CPU_FEATURES_COMMON_H
36607
36608			#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
36609
36610			# undef _ANSI_SOURCE
36611			# ifndef __APPLE__
36612			# undef _GNU_SOURCE
36613			# define _GNU_SOURCE
36614			# endif
36615			# include
36616			# include
36617			# include
36618			#endif
36619
36620			/* #include "lib_common.h" */
36621
36622
36623			#ifndef LIB_LIB_COMMON_H
36624			#define LIB_LIB_COMMON_H
36625
36626			#ifdef LIBDEFLATE_H
36627
36628			# error "lib_common.h must always be included before libdeflate.h"
36629			#endif
36630
36631			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
36632			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
36633			#elif defined(__GNUC__)
36634			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
36635			#else
36636			# define LIBDEFLATE_EXPORT_SYM
36637			#endif
36638
36639
36640			#if defined(__GNUC__) && defined(__i386__)
36641			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
36642			#else
36643			# define LIBDEFLATE_ALIGN_STACK
36644			#endif
36645
36646			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
36647
36648			/* #include "../common_defs.h" */
36649
36650
36651			#ifndef COMMON_DEFS_H
36652			#define COMMON_DEFS_H
36653
36654			/* #include "libdeflate.h" */
36655
36656
36657			#ifndef LIBDEFLATE_H
36658			#define LIBDEFLATE_H
36659
36660			#include
36661			#include
36662
36663			#ifdef __cplusplus
36664			extern "C" {
36665			#endif
36666
36667			#define LIBDEFLATE_VERSION_MAJOR 1
36668			#define LIBDEFLATE_VERSION_MINOR 25
36669			#define LIBDEFLATE_VERSION_STRING "1.25"
36670
36671
36672			#ifndef LIBDEFLATEAPI
36673			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
36674			# define LIBDEFLATEAPI __declspec(dllimport)
36675			# else
36676			# define LIBDEFLATEAPI
36677			# endif
36678			#endif
36679
36680
36681
36682
36683
36684			struct libdeflate_compressor;
36685			struct libdeflate_options;
36686
36687
36688			LIBDEFLATEAPI struct libdeflate_compressor *
36689			libdeflate_alloc_compressor(int compression_level);
36690
36691
36692			LIBDEFLATEAPI struct libdeflate_compressor *
36693			libdeflate_alloc_compressor_ex(int compression_level,
36694			const struct libdeflate_options *options);
36695
36696
36697			LIBDEFLATEAPI size_t
36698			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
36699			const void *in, size_t in_nbytes,
36700			void *out, size_t out_nbytes_avail);
36701
36702
36703			LIBDEFLATEAPI size_t
36704			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
36705			size_t in_nbytes);
36706
36707
36708			LIBDEFLATEAPI size_t
36709			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
36710			const void *in, size_t in_nbytes,
36711			void *out, size_t out_nbytes_avail);
36712
36713
36714			LIBDEFLATEAPI size_t
36715			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
36716			size_t in_nbytes);
36717
36718
36719			LIBDEFLATEAPI size_t
36720			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
36721			const void *in, size_t in_nbytes,
36722			void *out, size_t out_nbytes_avail);
36723
36724
36725			LIBDEFLATEAPI size_t
36726			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
36727			size_t in_nbytes);
36728
36729
36730			LIBDEFLATEAPI void
36731			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
36732
36733
36734
36735
36736
36737			struct libdeflate_decompressor;
36738			struct libdeflate_options;
36739
36740
36741			LIBDEFLATEAPI struct libdeflate_decompressor *
36742			libdeflate_alloc_decompressor(void);
36743
36744
36745			LIBDEFLATEAPI struct libdeflate_decompressor *
36746			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
36747
36748
36749			enum libdeflate_result {
36750
36751			LIBDEFLATE_SUCCESS = 0,
36752
36753
36754			LIBDEFLATE_BAD_DATA = 1,
36755
36756
36757			LIBDEFLATE_SHORT_OUTPUT = 2,
36758
36759
36760			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
36761			};
36762
36763
36764			LIBDEFLATEAPI enum libdeflate_result
36765			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
36766			const void *in, size_t in_nbytes,
36767			void *out, size_t out_nbytes_avail,
36768			size_t *actual_out_nbytes_ret);
36769
36770
36771			LIBDEFLATEAPI enum libdeflate_result
36772			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
36773			const void *in, size_t in_nbytes,
36774			void *out, size_t out_nbytes_avail,
36775			size_t *actual_in_nbytes_ret,
36776			size_t *actual_out_nbytes_ret);
36777
36778
36779			LIBDEFLATEAPI enum libdeflate_result
36780			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
36781			const void *in, size_t in_nbytes,
36782			void *out, size_t out_nbytes_avail,
36783			size_t *actual_out_nbytes_ret);
36784
36785
36786			LIBDEFLATEAPI enum libdeflate_result
36787			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
36788			const void *in, size_t in_nbytes,
36789			void *out, size_t out_nbytes_avail,
36790			size_t *actual_in_nbytes_ret,
36791			size_t *actual_out_nbytes_ret);
36792
36793
36794			LIBDEFLATEAPI enum libdeflate_result
36795			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
36796			const void *in, size_t in_nbytes,
36797			void *out, size_t out_nbytes_avail,
36798			size_t *actual_out_nbytes_ret);
36799
36800
36801			LIBDEFLATEAPI enum libdeflate_result
36802			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
36803			const void *in, size_t in_nbytes,
36804			void *out, size_t out_nbytes_avail,
36805			size_t *actual_in_nbytes_ret,
36806			size_t *actual_out_nbytes_ret);
36807
36808
36809			LIBDEFLATEAPI void
36810			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
36811
36812
36813
36814
36815
36816
36817			LIBDEFLATEAPI uint32_t
36818			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
36819
36820
36821
36822			LIBDEFLATEAPI uint32_t
36823			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
36824
36825
36826
36827
36828
36829
36830			LIBDEFLATEAPI void
36831			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
36832			void (free_func)(void ));
36833
36834
36835			struct libdeflate_options {
36836
36837
36838			size_t sizeof_options;
36839
36840
36841			void (malloc_func)(size_t);
36842			void (free_func)(void );
36843			};
36844
36845			#ifdef __cplusplus
36846			}
36847			#endif
36848
36849			#endif
36850
36851
36852			#include
36853			#include
36854			#include
36855			#ifdef _MSC_VER
36856			# include
36857			# include
36858
36859
36860			# pragma warning(disable : 4146)
36861
36862			# pragma warning(disable : 4018)
36863			# pragma warning(disable : 4244)
36864			# pragma warning(disable : 4267)
36865			# pragma warning(disable : 4310)
36866
36867			# pragma warning(disable : 4100)
36868			# pragma warning(disable : 4127)
36869			# pragma warning(disable : 4189)
36870			# pragma warning(disable : 4232)
36871			# pragma warning(disable : 4245)
36872			# pragma warning(disable : 4295)
36873			#endif
36874			#ifndef FREESTANDING
36875			# include
36876			#endif
36877
36878
36879
36880
36881
36882
36883			#undef ARCH_X86_64
36884			#undef ARCH_X86_32
36885			#undef ARCH_ARM64
36886			#undef ARCH_ARM32
36887			#undef ARCH_RISCV
36888			#ifdef _MSC_VER
36889
36890			# if defined(_M_X64) && !defined(_M_ARM64EC)
36891			# define ARCH_X86_64
36892			# elif defined(_M_IX86)
36893			# define ARCH_X86_32
36894			# elif defined(_M_ARM64)
36895			# define ARCH_ARM64
36896			# elif defined(_M_ARM)
36897			# define ARCH_ARM32
36898			# endif
36899			#else
36900			# if defined(__x86_64__)
36901			# define ARCH_X86_64
36902			# elif defined(__i386__)
36903			# define ARCH_X86_32
36904			# elif defined(__aarch64__)
36905			# define ARCH_ARM64
36906			# elif defined(__arm__)
36907			# define ARCH_ARM32
36908			# elif defined(__riscv)
36909			# define ARCH_RISCV
36910			# endif
36911			#endif
36912
36913
36914
36915
36916
36917
36918			typedef uint8_t u8;
36919			typedef uint16_t u16;
36920			typedef uint32_t u32;
36921			typedef uint64_t u64;
36922			typedef int8_t s8;
36923			typedef int16_t s16;
36924			typedef int32_t s32;
36925			typedef int64_t s64;
36926
36927
36928			#ifdef _MSC_VER
36929			# ifdef _WIN64
36930			typedef long long ssize_t;
36931			# else
36932			typedef long ssize_t;
36933			# endif
36934			#endif
36935
36936
36937			typedef size_t machine_word_t;
36938
36939
36940			#define WORDBYTES ((int)sizeof(machine_word_t))
36941
36942
36943			#define WORDBITS (8 * WORDBYTES)
36944
36945
36946
36947
36948
36949
36950			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
36951			# define GCC_PREREQ(major, minor) \
36952			(__GNUC__ > (major) \|\| \
36953			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
36954			# if !GCC_PREREQ(4, 9)
36955			# error "gcc versions older than 4.9 are no longer supported"
36956			# endif
36957			#else
36958			# define GCC_PREREQ(major, minor) 0
36959			#endif
36960			#ifdef __clang__
36961			# ifdef __apple_build_version__
36962			# define CLANG_PREREQ(major, minor, apple_version) \
36963			(__apple_build_version__ >= (apple_version))
36964			# else
36965			# define CLANG_PREREQ(major, minor, apple_version) \
36966			(__clang_major__ > (major) \|\| \
36967			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
36968			# endif
36969			# if !CLANG_PREREQ(3, 9, 8000000)
36970			# error "clang versions older than 3.9 are no longer supported"
36971			# endif
36972			#else
36973			# define CLANG_PREREQ(major, minor, apple_version) 0
36974			#endif
36975			#ifdef _MSC_VER
36976			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
36977			# if !MSVC_PREREQ(1900)
36978			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
36979			# endif
36980			#else
36981			# define MSVC_PREREQ(version) 0
36982			#endif
36983
36984
36985			#ifndef __has_attribute
36986			# define __has_attribute(attribute) 0
36987			#endif
36988
36989
36990			#ifndef __has_builtin
36991			# define __has_builtin(builtin) 0
36992			#endif
36993
36994
36995			#ifdef _MSC_VER
36996			# define inline __inline
36997			#endif
36998
36999
37000			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
37001			# define forceinline inline __attribute__((always_inline))
37002			#elif defined(_MSC_VER)
37003			# define forceinline __forceinline
37004			#else
37005			# define forceinline inline
37006			#endif
37007
37008
37009			#if defined(__GNUC__) \|\| __has_attribute(unused)
37010			# define MAYBE_UNUSED __attribute__((unused))
37011			#else
37012			# define MAYBE_UNUSED
37013			#endif
37014
37015
37016			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
37017			# define NORETURN __attribute__((noreturn))
37018			#else
37019			# define NORETURN
37020			#endif
37021
37022
37023			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
37024			# if defined(__GNUC__) \|\| defined(__clang__)
37025			# define restrict __restrict__
37026			# else
37027			# define restrict
37028			# endif
37029			#endif
37030
37031
37032			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
37033			# define likely(expr) __builtin_expect(!!(expr), 1)
37034			#else
37035			# define likely(expr) (expr)
37036			#endif
37037
37038
37039			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
37040			# define unlikely(expr) __builtin_expect(!!(expr), 0)
37041			#else
37042			# define unlikely(expr) (expr)
37043			#endif
37044
37045
37046			#undef prefetchr
37047			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
37048			# define prefetchr(addr) __builtin_prefetch((addr), 0)
37049			#elif defined(_MSC_VER)
37050			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
37051			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
37052			# elif defined(ARCH_ARM64)
37053			# define prefetchr(addr) __prefetch2((addr), 0x00 )
37054			# elif defined(ARCH_ARM32)
37055			# define prefetchr(addr) __prefetch(addr)
37056			# endif
37057			#endif
37058			#ifndef prefetchr
37059			# define prefetchr(addr)
37060			#endif
37061
37062
37063			#undef prefetchw
37064			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
37065			# define prefetchw(addr) __builtin_prefetch((addr), 1)
37066			#elif defined(_MSC_VER)
37067			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
37068			# define prefetchw(addr) _m_prefetchw(addr)
37069			# elif defined(ARCH_ARM64)
37070			# define prefetchw(addr) __prefetch2((addr), 0x10 )
37071			# elif defined(ARCH_ARM32)
37072			# define prefetchw(addr) __prefetchw(addr)
37073			# endif
37074			#endif
37075			#ifndef prefetchw
37076			# define prefetchw(addr)
37077			#endif
37078
37079
37080			#undef _aligned_attribute
37081			#if defined(__GNUC__) \|\| __has_attribute(aligned)
37082			# define _aligned_attribute(n) __attribute__((aligned(n)))
37083			#elif defined(_MSC_VER)
37084			# define _aligned_attribute(n) __declspec(align(n))
37085			#endif
37086
37087
37088			#if defined(__GNUC__) \|\| __has_attribute(target)
37089			# define _target_attribute(attrs) __attribute__((target(attrs)))
37090			#else
37091			# define _target_attribute(attrs)
37092			#endif
37093
37094
37095
37096
37097
37098			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
37099			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
37100			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
37101			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
37102			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
37103			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
37104			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
37105
37106
37107
37108
37109
37110
37111			#if defined(__BYTE_ORDER__)
37112			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
37113			#elif defined(_MSC_VER)
37114			# define CPU_IS_LITTLE_ENDIAN() true
37115			#else
37116			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
37117			{
37118			union {
37119			u32 w;
37120			u8 b;
37121			} u;
37122
37123			u.w = 1;
37124			return u.b;
37125			}
37126			#endif
37127
37128
37129			static forceinline u16 bswap16(u16 v)
37130			{
37131			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
37132			return __builtin_bswap16(v);
37133			#elif defined(_MSC_VER)
37134			return _byteswap_ushort(v);
37135			#else
37136			return (v << 8) \| (v >> 8);
37137			#endif
37138			}
37139
37140
37141			static forceinline u32 bswap32(u32 v)
37142			{
37143			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
37144			return __builtin_bswap32(v);
37145			#elif defined(_MSC_VER)
37146			return _byteswap_ulong(v);
37147			#else
37148			return ((v & 0x000000FF) << 24) \|
37149			((v & 0x0000FF00) << 8) \|
37150			((v & 0x00FF0000) >> 8) \|
37151			((v & 0xFF000000) >> 24);
37152			#endif
37153			}
37154
37155
37156			static forceinline u64 bswap64(u64 v)
37157			{
37158			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
37159			return __builtin_bswap64(v);
37160			#elif defined(_MSC_VER)
37161			return _byteswap_uint64(v);
37162			#else
37163			return ((v & 0x00000000000000FF) << 56) \|
37164			((v & 0x000000000000FF00) << 40) \|
37165			((v & 0x0000000000FF0000) << 24) \|
37166			((v & 0x00000000FF000000) << 8) \|
37167			((v & 0x000000FF00000000) >> 8) \|
37168			((v & 0x0000FF0000000000) >> 24) \|
37169			((v & 0x00FF000000000000) >> 40) \|
37170			((v & 0xFF00000000000000) >> 56);
37171			#endif
37172			}
37173
37174			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
37175			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
37176			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
37177			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
37178			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
37179			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
37180
37181
37182
37183
37184
37185
37186			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
37187			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
37188			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
37189			defined(__riscv_misaligned_fast) \|\| \
37190			defined(__wasm__))
37191			# define UNALIGNED_ACCESS_IS_FAST 1
37192			#elif defined(_MSC_VER)
37193			# define UNALIGNED_ACCESS_IS_FAST 1
37194			#else
37195			# define UNALIGNED_ACCESS_IS_FAST 0
37196			#endif
37197
37198
37199
37200			#ifdef FREESTANDING
37201			# define MEMCOPY __builtin_memcpy
37202			#else
37203			# define MEMCOPY memcpy
37204			#endif
37205
37206
37207
37208			#define DEFINE_UNALIGNED_TYPE(type) \
37209			static forceinline type \
37210			load_##type##_unaligned(const void *p) \
37211			{ \
37212			type v; \
37213			\
37214			MEMCOPY(&v, p, sizeof(v)); \
37215			return v; \
37216			} \
37217			\
37218			static forceinline void \
37219			store_##type##_unaligned(type v, void *p) \
37220			{ \
37221			MEMCOPY(p, &v, sizeof(v)); \
37222			}
37223
37224			DEFINE_UNALIGNED_TYPE(u16)
37225			DEFINE_UNALIGNED_TYPE(u32)
37226			DEFINE_UNALIGNED_TYPE(u64)
37227			DEFINE_UNALIGNED_TYPE(machine_word_t)
37228
37229			#undef MEMCOPY
37230
37231			#define load_word_unaligned load_machine_word_t_unaligned
37232			#define store_word_unaligned store_machine_word_t_unaligned
37233
37234
37235
37236			static forceinline u16
37237			get_unaligned_le16(const u8 *p)
37238			{
37239			if (UNALIGNED_ACCESS_IS_FAST)
37240			return le16_bswap(load_u16_unaligned(p));
37241			else
37242			return ((u16)p[1] << 8) \| p[0];
37243			}
37244
37245			static forceinline u16
37246			get_unaligned_be16(const u8 *p)
37247			{
37248			if (UNALIGNED_ACCESS_IS_FAST)
37249			return be16_bswap(load_u16_unaligned(p));
37250			else
37251			return ((u16)p[0] << 8) \| p[1];
37252			}
37253
37254			static forceinline u32
37255			get_unaligned_le32(const u8 *p)
37256			{
37257			if (UNALIGNED_ACCESS_IS_FAST)
37258			return le32_bswap(load_u32_unaligned(p));
37259			else
37260			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
37261			((u32)p[1] << 8) \| p[0];
37262			}
37263
37264			static forceinline u32
37265			get_unaligned_be32(const u8 *p)
37266			{
37267			if (UNALIGNED_ACCESS_IS_FAST)
37268			return be32_bswap(load_u32_unaligned(p));
37269			else
37270			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
37271			((u32)p[2] << 8) \| p[3];
37272			}
37273
37274			static forceinline u64
37275			get_unaligned_le64(const u8 *p)
37276			{
37277			if (UNALIGNED_ACCESS_IS_FAST)
37278			return le64_bswap(load_u64_unaligned(p));
37279			else
37280			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
37281			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
37282			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
37283			((u64)p[1] << 8) \| p[0];
37284			}
37285
37286			static forceinline machine_word_t
37287			get_unaligned_leword(const u8 *p)
37288			{
37289			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
37290			if (WORDBITS == 32)
37291			return get_unaligned_le32(p);
37292			else
37293			return get_unaligned_le64(p);
37294			}
37295
37296
37297
37298			static forceinline void
37299			put_unaligned_le16(u16 v, u8 *p)
37300			{
37301			if (UNALIGNED_ACCESS_IS_FAST) {
37302			store_u16_unaligned(le16_bswap(v), p);
37303			} else {
37304			p[0] = (u8)(v >> 0);
37305			p[1] = (u8)(v >> 8);
37306			}
37307			}
37308
37309			static forceinline void
37310			put_unaligned_be16(u16 v, u8 *p)
37311			{
37312			if (UNALIGNED_ACCESS_IS_FAST) {
37313			store_u16_unaligned(be16_bswap(v), p);
37314			} else {
37315			p[0] = (u8)(v >> 8);
37316			p[1] = (u8)(v >> 0);
37317			}
37318			}
37319
37320			static forceinline void
37321			put_unaligned_le32(u32 v, u8 *p)
37322			{
37323			if (UNALIGNED_ACCESS_IS_FAST) {
37324			store_u32_unaligned(le32_bswap(v), p);
37325			} else {
37326			p[0] = (u8)(v >> 0);
37327			p[1] = (u8)(v >> 8);
37328			p[2] = (u8)(v >> 16);
37329			p[3] = (u8)(v >> 24);
37330			}
37331			}
37332
37333			static forceinline void
37334			put_unaligned_be32(u32 v, u8 *p)
37335			{
37336			if (UNALIGNED_ACCESS_IS_FAST) {
37337			store_u32_unaligned(be32_bswap(v), p);
37338			} else {
37339			p[0] = (u8)(v >> 24);
37340			p[1] = (u8)(v >> 16);
37341			p[2] = (u8)(v >> 8);
37342			p[3] = (u8)(v >> 0);
37343			}
37344			}
37345
37346			static forceinline void
37347			put_unaligned_le64(u64 v, u8 *p)
37348			{
37349			if (UNALIGNED_ACCESS_IS_FAST) {
37350			store_u64_unaligned(le64_bswap(v), p);
37351			} else {
37352			p[0] = (u8)(v >> 0);
37353			p[1] = (u8)(v >> 8);
37354			p[2] = (u8)(v >> 16);
37355			p[3] = (u8)(v >> 24);
37356			p[4] = (u8)(v >> 32);
37357			p[5] = (u8)(v >> 40);
37358			p[6] = (u8)(v >> 48);
37359			p[7] = (u8)(v >> 56);
37360			}
37361			}
37362
37363			static forceinline void
37364			put_unaligned_leword(machine_word_t v, u8 *p)
37365			{
37366			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
37367			if (WORDBITS == 32)
37368			put_unaligned_le32(v, p);
37369			else
37370			put_unaligned_le64(v, p);
37371			}
37372
37373
37374
37375
37376
37377
37378
37379			static forceinline unsigned
37380			bsr32(u32 v)
37381			{
37382			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
37383			return 31 - __builtin_clz(v);
37384			#elif defined(_MSC_VER)
37385			unsigned long i;
37386
37387			_BitScanReverse(&i, v);
37388			return i;
37389			#else
37390			unsigned i = 0;
37391
37392			while ((v >>= 1) != 0)
37393			i++;
37394			return i;
37395			#endif
37396			}
37397
37398			static forceinline unsigned
37399			bsr64(u64 v)
37400			{
37401			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
37402			return 63 - __builtin_clzll(v);
37403			#elif defined(_MSC_VER) && defined(_WIN64)
37404			unsigned long i;
37405
37406			_BitScanReverse64(&i, v);
37407			return i;
37408			#else
37409			unsigned i = 0;
37410
37411			while ((v >>= 1) != 0)
37412			i++;
37413			return i;
37414			#endif
37415			}
37416
37417			static forceinline unsigned
37418			bsrw(machine_word_t v)
37419			{
37420			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
37421			if (WORDBITS == 32)
37422			return bsr32(v);
37423			else
37424			return bsr64(v);
37425			}
37426
37427
37428
37429			static forceinline unsigned
37430			bsf32(u32 v)
37431			{
37432			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
37433			return __builtin_ctz(v);
37434			#elif defined(_MSC_VER)
37435			unsigned long i;
37436
37437			_BitScanForward(&i, v);
37438			return i;
37439			#else
37440			unsigned i = 0;
37441
37442			for (; (v & 1) == 0; v >>= 1)
37443			i++;
37444			return i;
37445			#endif
37446			}
37447
37448			static forceinline unsigned
37449			bsf64(u64 v)
37450			{
37451			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
37452			return __builtin_ctzll(v);
37453			#elif defined(_MSC_VER) && defined(_WIN64)
37454			unsigned long i;
37455
37456			_BitScanForward64(&i, v);
37457			return i;
37458			#else
37459			unsigned i = 0;
37460
37461			for (; (v & 1) == 0; v >>= 1)
37462			i++;
37463			return i;
37464			#endif
37465			}
37466
37467			static forceinline unsigned
37468			bsfw(machine_word_t v)
37469			{
37470			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
37471			if (WORDBITS == 32)
37472			return bsf32(v);
37473			else
37474			return bsf64(v);
37475			}
37476
37477
37478			#undef rbit32
37479			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
37480			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
37481			static forceinline u32
37482			rbit32(u32 v)
37483			{
37484			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
37485			return v;
37486			}
37487			#define rbit32 rbit32
37488			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
37489			static forceinline u32
37490			rbit32(u32 v)
37491			{
37492			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
37493			return v;
37494			}
37495			#define rbit32 rbit32
37496			#endif
37497
37498			#endif
37499
37500
37501			typedef void (malloc_func_t)(size_t);
37502			typedef void (free_func_t)(void );
37503
37504			extern malloc_func_t libdeflate_default_malloc_func;
37505			extern free_func_t libdeflate_default_free_func;
37506
37507			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
37508			size_t alignment, size_t size);
37509			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
37510
37511			#ifdef FREESTANDING
37512
37513			void memset(void s, int c, size_t n);
37514			#define memset(s, c, n) __builtin_memset((s), (c), (n))
37515
37516			void memcpy(void dest, const void *src, size_t n);
37517			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
37518
37519			void memmove(void dest, const void *src, size_t n);
37520			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
37521
37522			int memcmp(const void s1, const void s2, size_t n);
37523			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
37524
37525			#undef LIBDEFLATE_ENABLE_ASSERTIONS
37526			#else
37527			# include
37528
37529			# ifdef __clang_analyzer__
37530			# define LIBDEFLATE_ENABLE_ASSERTIONS
37531			# endif
37532			#endif
37533
37534
37535			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
37536			NORETURN void
37537			libdeflate_assertion_failed(const char expr, const char file, int line);
37538			#define ASSERT(expr) { if (unlikely(!(expr))) \
37539			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
37540			#else
37541			#define ASSERT(expr) (void)(expr)
37542			#endif
37543
37544			#define CONCAT_IMPL(a, b) a##b
37545			#define CONCAT(a, b) CONCAT_IMPL(a, b)
37546			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
37547
37548			#endif
37549
37550
37551			struct cpu_feature {
37552			u32 bit;
37553			const char *name;
37554			};
37555
37556			#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING)
37557
37558			static inline void
37559			disable_cpu_features_for_testing(u32 *features,
37560			const struct cpu_feature *feature_table,
37561			size_t feature_table_length)
37562			{
37563			char env_value, strbuf, p, saveptr = NULL;
37564			size_t i;
37565
37566			env_value = getenv("LIBDEFLATE_DISABLE_CPU_FEATURES");
37567			if (!env_value)
37568			return;
37569			strbuf = strdup(env_value);
37570			if (!strbuf)
37571			abort();
37572			p = strtok_r(strbuf, ",", &saveptr);
37573			while (p) {
37574			for (i = 0; i < feature_table_length; i++) {
37575			if (strcmp(p, feature_table[i].name) == 0) {
37576			*features &= ~feature_table[i].bit;
37577			break;
37578			}
37579			}
37580			if (i == feature_table_length) {
37581			fprintf(stderr,
37582			"unrecognized feature in LIBDEFLATE_DISABLE_CPU_FEATURES: \"%s\"\n",
37583			p);
37584			abort();
37585			}
37586			p = strtok_r(NULL, ",", &saveptr);
37587			}
37588			free(strbuf);
37589			}
37590			#else
37591			static inline void
37592	2		disable_cpu_features_for_testing(u32 *features,
37593			const struct cpu_feature *feature_table,
37594			size_t feature_table_length)
37595			{
37596	2		}
37597			#endif
37598
37599			#endif
37600
37601			/* #include "arm-cpu_features.h" */
37602
37603
37604			#ifndef LIB_ARM_CPU_FEATURES_H
37605			#define LIB_ARM_CPU_FEATURES_H
37606
37607			/* #include "lib_common.h" */
37608
37609
37610			#ifndef LIB_LIB_COMMON_H
37611			#define LIB_LIB_COMMON_H
37612
37613			#ifdef LIBDEFLATE_H
37614
37615			# error "lib_common.h must always be included before libdeflate.h"
37616			#endif
37617
37618			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
37619			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
37620			#elif defined(__GNUC__)
37621			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
37622			#else
37623			# define LIBDEFLATE_EXPORT_SYM
37624			#endif
37625
37626
37627			#if defined(__GNUC__) && defined(__i386__)
37628			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
37629			#else
37630			# define LIBDEFLATE_ALIGN_STACK
37631			#endif
37632
37633			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
37634
37635			/* #include "../common_defs.h" */
37636
37637
37638			#ifndef COMMON_DEFS_H
37639			#define COMMON_DEFS_H
37640
37641			/* #include "libdeflate.h" */
37642
37643
37644			#ifndef LIBDEFLATE_H
37645			#define LIBDEFLATE_H
37646
37647			#include
37648			#include
37649
37650			#ifdef __cplusplus
37651			extern "C" {
37652			#endif
37653
37654			#define LIBDEFLATE_VERSION_MAJOR 1
37655			#define LIBDEFLATE_VERSION_MINOR 25
37656			#define LIBDEFLATE_VERSION_STRING "1.25"
37657
37658
37659			#ifndef LIBDEFLATEAPI
37660			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
37661			# define LIBDEFLATEAPI __declspec(dllimport)
37662			# else
37663			# define LIBDEFLATEAPI
37664			# endif
37665			#endif
37666
37667
37668
37669
37670
37671			struct libdeflate_compressor;
37672			struct libdeflate_options;
37673
37674
37675			LIBDEFLATEAPI struct libdeflate_compressor *
37676			libdeflate_alloc_compressor(int compression_level);
37677
37678
37679			LIBDEFLATEAPI struct libdeflate_compressor *
37680			libdeflate_alloc_compressor_ex(int compression_level,
37681			const struct libdeflate_options *options);
37682
37683
37684			LIBDEFLATEAPI size_t
37685			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
37686			const void *in, size_t in_nbytes,
37687			void *out, size_t out_nbytes_avail);
37688
37689
37690			LIBDEFLATEAPI size_t
37691			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
37692			size_t in_nbytes);
37693
37694
37695			LIBDEFLATEAPI size_t
37696			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
37697			const void *in, size_t in_nbytes,
37698			void *out, size_t out_nbytes_avail);
37699
37700
37701			LIBDEFLATEAPI size_t
37702			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
37703			size_t in_nbytes);
37704
37705
37706			LIBDEFLATEAPI size_t
37707			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
37708			const void *in, size_t in_nbytes,
37709			void *out, size_t out_nbytes_avail);
37710
37711
37712			LIBDEFLATEAPI size_t
37713			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
37714			size_t in_nbytes);
37715
37716
37717			LIBDEFLATEAPI void
37718			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
37719
37720
37721
37722
37723
37724			struct libdeflate_decompressor;
37725			struct libdeflate_options;
37726
37727
37728			LIBDEFLATEAPI struct libdeflate_decompressor *
37729			libdeflate_alloc_decompressor(void);
37730
37731
37732			LIBDEFLATEAPI struct libdeflate_decompressor *
37733			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
37734
37735
37736			enum libdeflate_result {
37737
37738			LIBDEFLATE_SUCCESS = 0,
37739
37740
37741			LIBDEFLATE_BAD_DATA = 1,
37742
37743
37744			LIBDEFLATE_SHORT_OUTPUT = 2,
37745
37746
37747			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
37748			};
37749
37750
37751			LIBDEFLATEAPI enum libdeflate_result
37752			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
37753			const void *in, size_t in_nbytes,
37754			void *out, size_t out_nbytes_avail,
37755			size_t *actual_out_nbytes_ret);
37756
37757
37758			LIBDEFLATEAPI enum libdeflate_result
37759			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
37760			const void *in, size_t in_nbytes,
37761			void *out, size_t out_nbytes_avail,
37762			size_t *actual_in_nbytes_ret,
37763			size_t *actual_out_nbytes_ret);
37764
37765
37766			LIBDEFLATEAPI enum libdeflate_result
37767			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
37768			const void *in, size_t in_nbytes,
37769			void *out, size_t out_nbytes_avail,
37770			size_t *actual_out_nbytes_ret);
37771
37772
37773			LIBDEFLATEAPI enum libdeflate_result
37774			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
37775			const void *in, size_t in_nbytes,
37776			void *out, size_t out_nbytes_avail,
37777			size_t *actual_in_nbytes_ret,
37778			size_t *actual_out_nbytes_ret);
37779
37780
37781			LIBDEFLATEAPI enum libdeflate_result
37782			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
37783			const void *in, size_t in_nbytes,
37784			void *out, size_t out_nbytes_avail,
37785			size_t *actual_out_nbytes_ret);
37786
37787
37788			LIBDEFLATEAPI enum libdeflate_result
37789			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
37790			const void *in, size_t in_nbytes,
37791			void *out, size_t out_nbytes_avail,
37792			size_t *actual_in_nbytes_ret,
37793			size_t *actual_out_nbytes_ret);
37794
37795
37796			LIBDEFLATEAPI void
37797			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
37798
37799
37800
37801
37802
37803
37804			LIBDEFLATEAPI uint32_t
37805			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
37806
37807
37808
37809			LIBDEFLATEAPI uint32_t
37810			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
37811
37812
37813
37814
37815
37816
37817			LIBDEFLATEAPI void
37818			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
37819			void (free_func)(void ));
37820
37821
37822			struct libdeflate_options {
37823
37824
37825			size_t sizeof_options;
37826
37827
37828			void (malloc_func)(size_t);
37829			void (free_func)(void );
37830			};
37831
37832			#ifdef __cplusplus
37833			}
37834			#endif
37835
37836			#endif
37837
37838
37839			#include
37840			#include
37841			#include
37842			#ifdef _MSC_VER
37843			# include
37844			# include
37845
37846
37847			# pragma warning(disable : 4146)
37848
37849			# pragma warning(disable : 4018)
37850			# pragma warning(disable : 4244)
37851			# pragma warning(disable : 4267)
37852			# pragma warning(disable : 4310)
37853
37854			# pragma warning(disable : 4100)
37855			# pragma warning(disable : 4127)
37856			# pragma warning(disable : 4189)
37857			# pragma warning(disable : 4232)
37858			# pragma warning(disable : 4245)
37859			# pragma warning(disable : 4295)
37860			#endif
37861			#ifndef FREESTANDING
37862			# include
37863			#endif
37864
37865
37866
37867
37868
37869
37870			#undef ARCH_X86_64
37871			#undef ARCH_X86_32
37872			#undef ARCH_ARM64
37873			#undef ARCH_ARM32
37874			#undef ARCH_RISCV
37875			#ifdef _MSC_VER
37876
37877			# if defined(_M_X64) && !defined(_M_ARM64EC)
37878			# define ARCH_X86_64
37879			# elif defined(_M_IX86)
37880			# define ARCH_X86_32
37881			# elif defined(_M_ARM64)
37882			# define ARCH_ARM64
37883			# elif defined(_M_ARM)
37884			# define ARCH_ARM32
37885			# endif
37886			#else
37887			# if defined(__x86_64__)
37888			# define ARCH_X86_64
37889			# elif defined(__i386__)
37890			# define ARCH_X86_32
37891			# elif defined(__aarch64__)
37892			# define ARCH_ARM64
37893			# elif defined(__arm__)
37894			# define ARCH_ARM32
37895			# elif defined(__riscv)
37896			# define ARCH_RISCV
37897			# endif
37898			#endif
37899
37900
37901
37902
37903
37904
37905			typedef uint8_t u8;
37906			typedef uint16_t u16;
37907			typedef uint32_t u32;
37908			typedef uint64_t u64;
37909			typedef int8_t s8;
37910			typedef int16_t s16;
37911			typedef int32_t s32;
37912			typedef int64_t s64;
37913
37914
37915			#ifdef _MSC_VER
37916			# ifdef _WIN64
37917			typedef long long ssize_t;
37918			# else
37919			typedef long ssize_t;
37920			# endif
37921			#endif
37922
37923
37924			typedef size_t machine_word_t;
37925
37926
37927			#define WORDBYTES ((int)sizeof(machine_word_t))
37928
37929
37930			#define WORDBITS (8 * WORDBYTES)
37931
37932
37933
37934
37935
37936
37937			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
37938			# define GCC_PREREQ(major, minor) \
37939			(__GNUC__ > (major) \|\| \
37940			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
37941			# if !GCC_PREREQ(4, 9)
37942			# error "gcc versions older than 4.9 are no longer supported"
37943			# endif
37944			#else
37945			# define GCC_PREREQ(major, minor) 0
37946			#endif
37947			#ifdef __clang__
37948			# ifdef __apple_build_version__
37949			# define CLANG_PREREQ(major, minor, apple_version) \
37950			(__apple_build_version__ >= (apple_version))
37951			# else
37952			# define CLANG_PREREQ(major, minor, apple_version) \
37953			(__clang_major__ > (major) \|\| \
37954			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
37955			# endif
37956			# if !CLANG_PREREQ(3, 9, 8000000)
37957			# error "clang versions older than 3.9 are no longer supported"
37958			# endif
37959			#else
37960			# define CLANG_PREREQ(major, minor, apple_version) 0
37961			#endif
37962			#ifdef _MSC_VER
37963			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
37964			# if !MSVC_PREREQ(1900)
37965			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
37966			# endif
37967			#else
37968			# define MSVC_PREREQ(version) 0
37969			#endif
37970
37971
37972			#ifndef __has_attribute
37973			# define __has_attribute(attribute) 0
37974			#endif
37975
37976
37977			#ifndef __has_builtin
37978			# define __has_builtin(builtin) 0
37979			#endif
37980
37981
37982			#ifdef _MSC_VER
37983			# define inline __inline
37984			#endif
37985
37986
37987			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
37988			# define forceinline inline __attribute__((always_inline))
37989			#elif defined(_MSC_VER)
37990			# define forceinline __forceinline
37991			#else
37992			# define forceinline inline
37993			#endif
37994
37995
37996			#if defined(__GNUC__) \|\| __has_attribute(unused)
37997			# define MAYBE_UNUSED __attribute__((unused))
37998			#else
37999			# define MAYBE_UNUSED
38000			#endif
38001
38002
38003			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
38004			# define NORETURN __attribute__((noreturn))
38005			#else
38006			# define NORETURN
38007			#endif
38008
38009
38010			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
38011			# if defined(__GNUC__) \|\| defined(__clang__)
38012			# define restrict __restrict__
38013			# else
38014			# define restrict
38015			# endif
38016			#endif
38017
38018
38019			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
38020			# define likely(expr) __builtin_expect(!!(expr), 1)
38021			#else
38022			# define likely(expr) (expr)
38023			#endif
38024
38025
38026			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
38027			# define unlikely(expr) __builtin_expect(!!(expr), 0)
38028			#else
38029			# define unlikely(expr) (expr)
38030			#endif
38031
38032
38033			#undef prefetchr
38034			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
38035			# define prefetchr(addr) __builtin_prefetch((addr), 0)
38036			#elif defined(_MSC_VER)
38037			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
38038			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
38039			# elif defined(ARCH_ARM64)
38040			# define prefetchr(addr) __prefetch2((addr), 0x00 )
38041			# elif defined(ARCH_ARM32)
38042			# define prefetchr(addr) __prefetch(addr)
38043			# endif
38044			#endif
38045			#ifndef prefetchr
38046			# define prefetchr(addr)
38047			#endif
38048
38049
38050			#undef prefetchw
38051			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
38052			# define prefetchw(addr) __builtin_prefetch((addr), 1)
38053			#elif defined(_MSC_VER)
38054			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
38055			# define prefetchw(addr) _m_prefetchw(addr)
38056			# elif defined(ARCH_ARM64)
38057			# define prefetchw(addr) __prefetch2((addr), 0x10 )
38058			# elif defined(ARCH_ARM32)
38059			# define prefetchw(addr) __prefetchw(addr)
38060			# endif
38061			#endif
38062			#ifndef prefetchw
38063			# define prefetchw(addr)
38064			#endif
38065
38066
38067			#undef _aligned_attribute
38068			#if defined(__GNUC__) \|\| __has_attribute(aligned)
38069			# define _aligned_attribute(n) __attribute__((aligned(n)))
38070			#elif defined(_MSC_VER)
38071			# define _aligned_attribute(n) __declspec(align(n))
38072			#endif
38073
38074
38075			#if defined(__GNUC__) \|\| __has_attribute(target)
38076			# define _target_attribute(attrs) __attribute__((target(attrs)))
38077			#else
38078			# define _target_attribute(attrs)
38079			#endif
38080
38081
38082
38083
38084
38085			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
38086			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
38087			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
38088			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
38089			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
38090			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
38091			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
38092
38093
38094
38095
38096
38097
38098			#if defined(__BYTE_ORDER__)
38099			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
38100			#elif defined(_MSC_VER)
38101			# define CPU_IS_LITTLE_ENDIAN() true
38102			#else
38103			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
38104			{
38105			union {
38106			u32 w;
38107			u8 b;
38108			} u;
38109
38110			u.w = 1;
38111			return u.b;
38112			}
38113			#endif
38114
38115
38116			static forceinline u16 bswap16(u16 v)
38117			{
38118			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
38119			return __builtin_bswap16(v);
38120			#elif defined(_MSC_VER)
38121			return _byteswap_ushort(v);
38122			#else
38123			return (v << 8) \| (v >> 8);
38124			#endif
38125			}
38126
38127
38128			static forceinline u32 bswap32(u32 v)
38129			{
38130			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
38131			return __builtin_bswap32(v);
38132			#elif defined(_MSC_VER)
38133			return _byteswap_ulong(v);
38134			#else
38135			return ((v & 0x000000FF) << 24) \|
38136			((v & 0x0000FF00) << 8) \|
38137			((v & 0x00FF0000) >> 8) \|
38138			((v & 0xFF000000) >> 24);
38139			#endif
38140			}
38141
38142
38143			static forceinline u64 bswap64(u64 v)
38144			{
38145			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
38146			return __builtin_bswap64(v);
38147			#elif defined(_MSC_VER)
38148			return _byteswap_uint64(v);
38149			#else
38150			return ((v & 0x00000000000000FF) << 56) \|
38151			((v & 0x000000000000FF00) << 40) \|
38152			((v & 0x0000000000FF0000) << 24) \|
38153			((v & 0x00000000FF000000) << 8) \|
38154			((v & 0x000000FF00000000) >> 8) \|
38155			((v & 0x0000FF0000000000) >> 24) \|
38156			((v & 0x00FF000000000000) >> 40) \|
38157			((v & 0xFF00000000000000) >> 56);
38158			#endif
38159			}
38160
38161			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
38162			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
38163			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
38164			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
38165			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
38166			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
38167
38168
38169
38170
38171
38172
38173			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
38174			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
38175			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
38176			defined(__riscv_misaligned_fast) \|\| \
38177			defined(__wasm__))
38178			# define UNALIGNED_ACCESS_IS_FAST 1
38179			#elif defined(_MSC_VER)
38180			# define UNALIGNED_ACCESS_IS_FAST 1
38181			#else
38182			# define UNALIGNED_ACCESS_IS_FAST 0
38183			#endif
38184
38185
38186
38187			#ifdef FREESTANDING
38188			# define MEMCOPY __builtin_memcpy
38189			#else
38190			# define MEMCOPY memcpy
38191			#endif
38192
38193
38194
38195			#define DEFINE_UNALIGNED_TYPE(type) \
38196			static forceinline type \
38197			load_##type##_unaligned(const void *p) \
38198			{ \
38199			type v; \
38200			\
38201			MEMCOPY(&v, p, sizeof(v)); \
38202			return v; \
38203			} \
38204			\
38205			static forceinline void \
38206			store_##type##_unaligned(type v, void *p) \
38207			{ \
38208			MEMCOPY(p, &v, sizeof(v)); \
38209			}
38210
38211			DEFINE_UNALIGNED_TYPE(u16)
38212			DEFINE_UNALIGNED_TYPE(u32)
38213			DEFINE_UNALIGNED_TYPE(u64)
38214			DEFINE_UNALIGNED_TYPE(machine_word_t)
38215
38216			#undef MEMCOPY
38217
38218			#define load_word_unaligned load_machine_word_t_unaligned
38219			#define store_word_unaligned store_machine_word_t_unaligned
38220
38221
38222
38223			static forceinline u16
38224			get_unaligned_le16(const u8 *p)
38225			{
38226			if (UNALIGNED_ACCESS_IS_FAST)
38227			return le16_bswap(load_u16_unaligned(p));
38228			else
38229			return ((u16)p[1] << 8) \| p[0];
38230			}
38231
38232			static forceinline u16
38233			get_unaligned_be16(const u8 *p)
38234			{
38235			if (UNALIGNED_ACCESS_IS_FAST)
38236			return be16_bswap(load_u16_unaligned(p));
38237			else
38238			return ((u16)p[0] << 8) \| p[1];
38239			}
38240
38241			static forceinline u32
38242			get_unaligned_le32(const u8 *p)
38243			{
38244			if (UNALIGNED_ACCESS_IS_FAST)
38245			return le32_bswap(load_u32_unaligned(p));
38246			else
38247			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
38248			((u32)p[1] << 8) \| p[0];
38249			}
38250
38251			static forceinline u32
38252			get_unaligned_be32(const u8 *p)
38253			{
38254			if (UNALIGNED_ACCESS_IS_FAST)
38255			return be32_bswap(load_u32_unaligned(p));
38256			else
38257			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
38258			((u32)p[2] << 8) \| p[3];
38259			}
38260
38261			static forceinline u64
38262			get_unaligned_le64(const u8 *p)
38263			{
38264			if (UNALIGNED_ACCESS_IS_FAST)
38265			return le64_bswap(load_u64_unaligned(p));
38266			else
38267			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
38268			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
38269			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
38270			((u64)p[1] << 8) \| p[0];
38271			}
38272
38273			static forceinline machine_word_t
38274			get_unaligned_leword(const u8 *p)
38275			{
38276			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
38277			if (WORDBITS == 32)
38278			return get_unaligned_le32(p);
38279			else
38280			return get_unaligned_le64(p);
38281			}
38282
38283
38284
38285			static forceinline void
38286			put_unaligned_le16(u16 v, u8 *p)
38287			{
38288			if (UNALIGNED_ACCESS_IS_FAST) {
38289			store_u16_unaligned(le16_bswap(v), p);
38290			} else {
38291			p[0] = (u8)(v >> 0);
38292			p[1] = (u8)(v >> 8);
38293			}
38294			}
38295
38296			static forceinline void
38297			put_unaligned_be16(u16 v, u8 *p)
38298			{
38299			if (UNALIGNED_ACCESS_IS_FAST) {
38300			store_u16_unaligned(be16_bswap(v), p);
38301			} else {
38302			p[0] = (u8)(v >> 8);
38303			p[1] = (u8)(v >> 0);
38304			}
38305			}
38306
38307			static forceinline void
38308			put_unaligned_le32(u32 v, u8 *p)
38309			{
38310			if (UNALIGNED_ACCESS_IS_FAST) {
38311			store_u32_unaligned(le32_bswap(v), p);
38312			} else {
38313			p[0] = (u8)(v >> 0);
38314			p[1] = (u8)(v >> 8);
38315			p[2] = (u8)(v >> 16);
38316			p[3] = (u8)(v >> 24);
38317			}
38318			}
38319
38320			static forceinline void
38321			put_unaligned_be32(u32 v, u8 *p)
38322			{
38323			if (UNALIGNED_ACCESS_IS_FAST) {
38324			store_u32_unaligned(be32_bswap(v), p);
38325			} else {
38326			p[0] = (u8)(v >> 24);
38327			p[1] = (u8)(v >> 16);
38328			p[2] = (u8)(v >> 8);
38329			p[3] = (u8)(v >> 0);
38330			}
38331			}
38332
38333			static forceinline void
38334			put_unaligned_le64(u64 v, u8 *p)
38335			{
38336			if (UNALIGNED_ACCESS_IS_FAST) {
38337			store_u64_unaligned(le64_bswap(v), p);
38338			} else {
38339			p[0] = (u8)(v >> 0);
38340			p[1] = (u8)(v >> 8);
38341			p[2] = (u8)(v >> 16);
38342			p[3] = (u8)(v >> 24);
38343			p[4] = (u8)(v >> 32);
38344			p[5] = (u8)(v >> 40);
38345			p[6] = (u8)(v >> 48);
38346			p[7] = (u8)(v >> 56);
38347			}
38348			}
38349
38350			static forceinline void
38351			put_unaligned_leword(machine_word_t v, u8 *p)
38352			{
38353			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
38354			if (WORDBITS == 32)
38355			put_unaligned_le32(v, p);
38356			else
38357			put_unaligned_le64(v, p);
38358			}
38359
38360
38361
38362
38363
38364
38365
38366			static forceinline unsigned
38367			bsr32(u32 v)
38368			{
38369			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
38370			return 31 - __builtin_clz(v);
38371			#elif defined(_MSC_VER)
38372			unsigned long i;
38373
38374			_BitScanReverse(&i, v);
38375			return i;
38376			#else
38377			unsigned i = 0;
38378
38379			while ((v >>= 1) != 0)
38380			i++;
38381			return i;
38382			#endif
38383			}
38384
38385			static forceinline unsigned
38386			bsr64(u64 v)
38387			{
38388			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
38389			return 63 - __builtin_clzll(v);
38390			#elif defined(_MSC_VER) && defined(_WIN64)
38391			unsigned long i;
38392
38393			_BitScanReverse64(&i, v);
38394			return i;
38395			#else
38396			unsigned i = 0;
38397
38398			while ((v >>= 1) != 0)
38399			i++;
38400			return i;
38401			#endif
38402			}
38403
38404			static forceinline unsigned
38405			bsrw(machine_word_t v)
38406			{
38407			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
38408			if (WORDBITS == 32)
38409			return bsr32(v);
38410			else
38411			return bsr64(v);
38412			}
38413
38414
38415
38416			static forceinline unsigned
38417			bsf32(u32 v)
38418			{
38419			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
38420			return __builtin_ctz(v);
38421			#elif defined(_MSC_VER)
38422			unsigned long i;
38423
38424			_BitScanForward(&i, v);
38425			return i;
38426			#else
38427			unsigned i = 0;
38428
38429			for (; (v & 1) == 0; v >>= 1)
38430			i++;
38431			return i;
38432			#endif
38433			}
38434
38435			static forceinline unsigned
38436			bsf64(u64 v)
38437			{
38438			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
38439			return __builtin_ctzll(v);
38440			#elif defined(_MSC_VER) && defined(_WIN64)
38441			unsigned long i;
38442
38443			_BitScanForward64(&i, v);
38444			return i;
38445			#else
38446			unsigned i = 0;
38447
38448			for (; (v & 1) == 0; v >>= 1)
38449			i++;
38450			return i;
38451			#endif
38452			}
38453
38454			static forceinline unsigned
38455			bsfw(machine_word_t v)
38456			{
38457			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
38458			if (WORDBITS == 32)
38459			return bsf32(v);
38460			else
38461			return bsf64(v);
38462			}
38463
38464
38465			#undef rbit32
38466			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
38467			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
38468			static forceinline u32
38469			rbit32(u32 v)
38470			{
38471			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
38472			return v;
38473			}
38474			#define rbit32 rbit32
38475			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
38476			static forceinline u32
38477			rbit32(u32 v)
38478			{
38479			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
38480			return v;
38481			}
38482			#define rbit32 rbit32
38483			#endif
38484
38485			#endif
38486
38487
38488			typedef void (malloc_func_t)(size_t);
38489			typedef void (free_func_t)(void );
38490
38491			extern malloc_func_t libdeflate_default_malloc_func;
38492			extern free_func_t libdeflate_default_free_func;
38493
38494			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
38495			size_t alignment, size_t size);
38496			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
38497
38498			#ifdef FREESTANDING
38499
38500			void memset(void s, int c, size_t n);
38501			#define memset(s, c, n) __builtin_memset((s), (c), (n))
38502
38503			void memcpy(void dest, const void *src, size_t n);
38504			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
38505
38506			void memmove(void dest, const void *src, size_t n);
38507			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
38508
38509			int memcmp(const void s1, const void s2, size_t n);
38510			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
38511
38512			#undef LIBDEFLATE_ENABLE_ASSERTIONS
38513			#else
38514			# include
38515
38516			# ifdef __clang_analyzer__
38517			# define LIBDEFLATE_ENABLE_ASSERTIONS
38518			# endif
38519			#endif
38520
38521
38522			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
38523			NORETURN void
38524			libdeflate_assertion_failed(const char expr, const char file, int line);
38525			#define ASSERT(expr) { if (unlikely(!(expr))) \
38526			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
38527			#else
38528			#define ASSERT(expr) (void)(expr)
38529			#endif
38530
38531			#define CONCAT_IMPL(a, b) a##b
38532			#define CONCAT(a, b) CONCAT_IMPL(a, b)
38533			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
38534
38535			#endif
38536
38537
38538			#if defined(ARCH_ARM32) \|\| defined(ARCH_ARM64)
38539
38540			#define ARM_CPU_FEATURE_NEON (1 << 0)
38541			#define ARM_CPU_FEATURE_PMULL (1 << 1)
38542
38543			#define ARM_CPU_FEATURE_PREFER_PMULL (1 << 2)
38544			#define ARM_CPU_FEATURE_CRC32 (1 << 3)
38545			#define ARM_CPU_FEATURE_SHA3 (1 << 4)
38546			#define ARM_CPU_FEATURE_DOTPROD (1 << 5)
38547
38548			#if !defined(FREESTANDING) && \
38549			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
38550			(defined(__linux__) \|\| \
38551			(defined(__APPLE__) && defined(ARCH_ARM64)) \|\| \
38552			(defined(_WIN32) && defined(ARCH_ARM64)))
38553
38554			# define ARM_CPU_FEATURES_KNOWN (1U << 31)
38555			extern volatile u32 libdeflate_arm_cpu_features;
38556
38557			void libdeflate_init_arm_cpu_features(void);
38558
38559			static inline u32 get_arm_cpu_features(void)
38560			{
38561			if (libdeflate_arm_cpu_features == 0)
38562			libdeflate_init_arm_cpu_features();
38563			return libdeflate_arm_cpu_features;
38564			}
38565			#else
38566			static inline u32 get_arm_cpu_features(void) { return 0; }
38567			#endif
38568
38569
38570			#if defined(__ARM_NEON) \|\| (defined(_MSC_VER) && defined(ARCH_ARM64))
38571			# define HAVE_NEON(features) 1
38572			# define HAVE_NEON_NATIVE 1
38573			#else
38574			# define HAVE_NEON(features) ((features) & ARM_CPU_FEATURE_NEON)
38575			# define HAVE_NEON_NATIVE 0
38576			#endif
38577
38578			#if (defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
38579			(HAVE_NEON_NATIVE \|\| (GCC_PREREQ(6, 1) && defined(__ARM_FP)))
38580			# define HAVE_NEON_INTRIN 1
38581			# include
38582			#else
38583			# define HAVE_NEON_INTRIN 0
38584			#endif
38585
38586
38587			#ifdef __ARM_FEATURE_CRYPTO
38588			# define HAVE_PMULL(features) 1
38589			#else
38590			# define HAVE_PMULL(features) ((features) & ARM_CPU_FEATURE_PMULL)
38591			#endif
38592			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
38593			(GCC_PREREQ(7, 1) \|\| defined(__clang__) \|\| defined(_MSC_VER)) && \
38594			CPU_IS_LITTLE_ENDIAN()
38595			# define HAVE_PMULL_INTRIN 1
38596
38597			# ifdef _MSC_VER
38598			# define compat_vmull_p64(a, b) vmull_p64(vcreate_p64(a), vcreate_p64(b))
38599			# else
38600			# define compat_vmull_p64(a, b) vmull_p64((a), (b))
38601			# endif
38602			#else
38603			# define HAVE_PMULL_INTRIN 0
38604			#endif
38605
38606
38607			#ifdef __ARM_FEATURE_CRC32
38608			# define HAVE_CRC32(features) 1
38609			#else
38610			# define HAVE_CRC32(features) ((features) & ARM_CPU_FEATURE_CRC32)
38611			#endif
38612			#if defined(ARCH_ARM64) && \
38613			(defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER))
38614			# define HAVE_CRC32_INTRIN 1
38615			# if defined(__GNUC__) \|\| defined(__clang__)
38616			# include
38617			# endif
38618
38619			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
38620			!defined(__ARM_FEATURE_CRC32)
38621			# undef __crc32b
38622			# define __crc32b(a, b) \
38623			({ uint32_t res; \
38624			__asm__("crc32b %w0, %w1, %w2" \
38625			: "=r" (res) : "r" (a), "r" (b)); \
38626			res; })
38627			# undef __crc32h
38628			# define __crc32h(a, b) \
38629			({ uint32_t res; \
38630			__asm__("crc32h %w0, %w1, %w2" \
38631			: "=r" (res) : "r" (a), "r" (b)); \
38632			res; })
38633			# undef __crc32w
38634			# define __crc32w(a, b) \
38635			({ uint32_t res; \
38636			__asm__("crc32w %w0, %w1, %w2" \
38637			: "=r" (res) : "r" (a), "r" (b)); \
38638			res; })
38639			# undef __crc32d
38640			# define __crc32d(a, b) \
38641			({ uint32_t res; \
38642			__asm__("crc32x %w0, %w1, %2" \
38643			: "=r" (res) : "r" (a), "r" (b)); \
38644			res; })
38645			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
38646			# endif
38647			#else
38648			# define HAVE_CRC32_INTRIN 0
38649			#endif
38650
38651
38652			#ifdef __ARM_FEATURE_SHA3
38653			# define HAVE_SHA3(features) 1
38654			#else
38655			# define HAVE_SHA3(features) ((features) & ARM_CPU_FEATURE_SHA3)
38656			#endif
38657			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
38658			(GCC_PREREQ(9, 1) \|\| \
38659			CLANG_PREREQ(7, 0, 10010463) )
38660			# define HAVE_SHA3_INTRIN 1
38661
38662			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
38663			!defined(__ARM_FEATURE_SHA3)
38664			# undef veor3q_u8
38665			# define veor3q_u8(a, b, c) \
38666			({ uint8x16_t res; \
38667			__asm__("eor3 %0.16b, %1.16b, %2.16b, %3.16b" \
38668			: "=w" (res) : "w" (a), "w" (b), "w" (c)); \
38669			res; })
38670			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
38671			# endif
38672			#else
38673			# define HAVE_SHA3_INTRIN 0
38674			#endif
38675
38676
38677			#ifdef __ARM_FEATURE_DOTPROD
38678			# define HAVE_DOTPROD(features) 1
38679			#else
38680			# define HAVE_DOTPROD(features) ((features) & ARM_CPU_FEATURE_DOTPROD)
38681			#endif
38682			#if defined(ARCH_ARM64) && HAVE_NEON_INTRIN && \
38683			(GCC_PREREQ(8, 1) \|\| CLANG_PREREQ(7, 0, 10010000) \|\| defined(_MSC_VER))
38684			# define HAVE_DOTPROD_INTRIN 1
38685
38686			# if defined(__clang__) && !CLANG_PREREQ(16, 0, 16000000) && \
38687			!defined(__ARM_FEATURE_DOTPROD)
38688			# undef vdotq_u32
38689			# define vdotq_u32(a, b, c) \
38690			({ uint32x4_t res = (a); \
38691			__asm__("udot %0.4s, %1.16b, %2.16b" \
38692			: "+w" (res) : "w" (b), "w" (c)); \
38693			res; })
38694			# pragma clang diagnostic ignored "-Wgnu-statement-expression"
38695			# endif
38696			#else
38697			# define HAVE_DOTPROD_INTRIN 0
38698			#endif
38699
38700			#endif
38701
38702			#endif
38703
38704
38705			#ifdef ARM_CPU_FEATURES_KNOWN
38706
38707
38708			#ifdef __linux__
38709
38710
38711			#include
38712			#include
38713			#include
38714			#include
38715
38716			#define AT_HWCAP 16
38717			#define AT_HWCAP2 26
38718
38719			static void scan_auxv(unsigned long hwcap, unsigned long hwcap2)
38720			{
38721			int fd;
38722			unsigned long auxbuf[32];
38723			int filled = 0;
38724			int i;
38725
38726			fd = open("/proc/self/auxv", O_RDONLY);
38727			if (fd < 0)
38728			return;
38729
38730			for (;;) {
38731			do {
38732			int ret = read(fd, &((char *)auxbuf)[filled],
38733			sizeof(auxbuf) - filled);
38734			if (ret <= 0) {
38735			if (ret < 0 && errno == EINTR)
38736			continue;
38737			goto out;
38738			}
38739			filled += ret;
38740			} while (filled < 2 * sizeof(long));
38741
38742			i = 0;
38743			do {
38744			unsigned long type = auxbuf[i];
38745			unsigned long value = auxbuf[i + 1];
38746
38747			if (type == AT_HWCAP)
38748			*hwcap = value;
38749			else if (type == AT_HWCAP2)
38750			*hwcap2 = value;
38751			i += 2;
38752			filled -= 2 * sizeof(long);
38753			} while (filled >= 2 * sizeof(long));
38754
38755			memmove(auxbuf, &auxbuf[i], filled);
38756			}
38757			out:
38758			close(fd);
38759			}
38760
38761			static u32 query_arm_cpu_features(void)
38762			{
38763			u32 features = 0;
38764			unsigned long hwcap = 0;
38765			unsigned long hwcap2 = 0;
38766
38767			scan_auxv(&hwcap, &hwcap2);
38768
38769			#ifdef ARCH_ARM32
38770			STATIC_ASSERT(sizeof(long) == 4);
38771			if (hwcap & (1 << 12))
38772			features \|= ARM_CPU_FEATURE_NEON;
38773			#else
38774			STATIC_ASSERT(sizeof(long) == 8);
38775			if (hwcap & (1 << 1))
38776			features \|= ARM_CPU_FEATURE_NEON;
38777			if (hwcap & (1 << 4))
38778			features \|= ARM_CPU_FEATURE_PMULL;
38779			if (hwcap & (1 << 7))
38780			features \|= ARM_CPU_FEATURE_CRC32;
38781			if (hwcap & (1 << 17))
38782			features \|= ARM_CPU_FEATURE_SHA3;
38783			if (hwcap & (1 << 20))
38784			features \|= ARM_CPU_FEATURE_DOTPROD;
38785			#endif
38786			return features;
38787			}
38788
38789			#elif defined(__APPLE__)
38790
38791
38792			#include
38793			#include
38794			#include
38795
38796			static const struct {
38797			const char *name;
38798			u32 feature;
38799			} feature_sysctls[] = {
38800			{ "hw.optional.neon", ARM_CPU_FEATURE_NEON },
38801			{ "hw.optional.AdvSIMD", ARM_CPU_FEATURE_NEON },
38802			{ "hw.optional.arm.FEAT_PMULL", ARM_CPU_FEATURE_PMULL },
38803			{ "hw.optional.armv8_crc32", ARM_CPU_FEATURE_CRC32 },
38804			{ "hw.optional.armv8_2_sha3", ARM_CPU_FEATURE_SHA3 },
38805			{ "hw.optional.arm.FEAT_SHA3", ARM_CPU_FEATURE_SHA3 },
38806			{ "hw.optional.arm.FEAT_DotProd", ARM_CPU_FEATURE_DOTPROD },
38807			};
38808
38809			static u32 query_arm_cpu_features(void)
38810			{
38811			u32 features = 0;
38812			size_t i;
38813
38814			for (i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
38815			const char *name = feature_sysctls[i].name;
38816			u32 val = 0;
38817			size_t valsize = sizeof(val);
38818
38819			if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
38820			valsize == sizeof(val) && val == 1)
38821			features \|= feature_sysctls[i].feature;
38822			}
38823			return features;
38824			}
38825			#elif defined(_WIN32)
38826
38827			#include
38828
38829			#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
38830			# define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
38831			#endif
38832
38833			static u32 query_arm_cpu_features(void)
38834			{
38835			u32 features = ARM_CPU_FEATURE_NEON;
38836
38837			if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
38838			features \|= ARM_CPU_FEATURE_PMULL;
38839			if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
38840			features \|= ARM_CPU_FEATURE_CRC32;
38841			if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
38842			features \|= ARM_CPU_FEATURE_DOTPROD;
38843
38844
38845
38846			return features;
38847			}
38848			#else
38849			#error "unhandled case"
38850			#endif
38851
38852			static const struct cpu_feature arm_cpu_feature_table[] = {
38853			{ARM_CPU_FEATURE_NEON, "neon"},
38854			{ARM_CPU_FEATURE_PMULL, "pmull"},
38855			{ARM_CPU_FEATURE_PREFER_PMULL, "prefer_pmull"},
38856			{ARM_CPU_FEATURE_CRC32, "crc32"},
38857			{ARM_CPU_FEATURE_SHA3, "sha3"},
38858			{ARM_CPU_FEATURE_DOTPROD, "dotprod"},
38859			};
38860
38861			volatile u32 libdeflate_arm_cpu_features = 0;
38862
38863			void libdeflate_init_arm_cpu_features(void)
38864			{
38865			u32 features = query_arm_cpu_features();
38866
38867
38868			#if (defined(__APPLE__) && TARGET_OS_OSX) \|\| defined(TEST_SUPPORT__DO_NOT_USE)
38869			features \|= ARM_CPU_FEATURE_PREFER_PMULL;
38870			#endif
38871
38872			disable_cpu_features_for_testing(&features, arm_cpu_feature_table,
38873			ARRAY_LEN(arm_cpu_feature_table));
38874
38875			libdeflate_arm_cpu_features = features \| ARM_CPU_FEATURES_KNOWN;
38876			}
38877
38878			#endif
38879			/* /usr/home/ben/projects/gzip-libdeflate/../../software/libdeflate/libdeflate-1.25/lib/x86/cpu_features.c */
38880
38881
38882			/* #include "cpu_features_common.h" - no include guard */
38883			/* #include "x86-cpu_features.h" */
38884
38885
38886			#ifndef LIB_X86_CPU_FEATURES_H
38887			#define LIB_X86_CPU_FEATURES_H
38888
38889			/* #include "lib_common.h" */
38890
38891
38892			#ifndef LIB_LIB_COMMON_H
38893			#define LIB_LIB_COMMON_H
38894
38895			#ifdef LIBDEFLATE_H
38896
38897			# error "lib_common.h must always be included before libdeflate.h"
38898			#endif
38899
38900			#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
38901			# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport)
38902			#elif defined(__GNUC__)
38903			# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default")))
38904			#else
38905			# define LIBDEFLATE_EXPORT_SYM
38906			#endif
38907
38908
38909			#if defined(__GNUC__) && defined(__i386__)
38910			# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer))
38911			#else
38912			# define LIBDEFLATE_ALIGN_STACK
38913			#endif
38914
38915			#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK
38916
38917			/* #include "../common_defs.h" */
38918
38919
38920			#ifndef COMMON_DEFS_H
38921			#define COMMON_DEFS_H
38922
38923			/* #include "libdeflate.h" */
38924
38925
38926			#ifndef LIBDEFLATE_H
38927			#define LIBDEFLATE_H
38928
38929			#include
38930			#include
38931
38932			#ifdef __cplusplus
38933			extern "C" {
38934			#endif
38935
38936			#define LIBDEFLATE_VERSION_MAJOR 1
38937			#define LIBDEFLATE_VERSION_MINOR 25
38938			#define LIBDEFLATE_VERSION_STRING "1.25"
38939
38940
38941			#ifndef LIBDEFLATEAPI
38942			# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) \|\| defined(__CYGWIN__))
38943			# define LIBDEFLATEAPI __declspec(dllimport)
38944			# else
38945			# define LIBDEFLATEAPI
38946			# endif
38947			#endif
38948
38949
38950
38951
38952
38953			struct libdeflate_compressor;
38954			struct libdeflate_options;
38955
38956
38957			LIBDEFLATEAPI struct libdeflate_compressor *
38958			libdeflate_alloc_compressor(int compression_level);
38959
38960
38961			LIBDEFLATEAPI struct libdeflate_compressor *
38962			libdeflate_alloc_compressor_ex(int compression_level,
38963			const struct libdeflate_options *options);
38964
38965
38966			LIBDEFLATEAPI size_t
38967			libdeflate_deflate_compress(struct libdeflate_compressor *compressor,
38968			const void *in, size_t in_nbytes,
38969			void *out, size_t out_nbytes_avail);
38970
38971
38972			LIBDEFLATEAPI size_t
38973			libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor,
38974			size_t in_nbytes);
38975
38976
38977			LIBDEFLATEAPI size_t
38978			libdeflate_zlib_compress(struct libdeflate_compressor *compressor,
38979			const void *in, size_t in_nbytes,
38980			void *out, size_t out_nbytes_avail);
38981
38982
38983			LIBDEFLATEAPI size_t
38984			libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor,
38985			size_t in_nbytes);
38986
38987
38988			LIBDEFLATEAPI size_t
38989			libdeflate_gzip_compress(struct libdeflate_compressor *compressor,
38990			const void *in, size_t in_nbytes,
38991			void *out, size_t out_nbytes_avail);
38992
38993
38994			LIBDEFLATEAPI size_t
38995			libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor,
38996			size_t in_nbytes);
38997
38998
38999			LIBDEFLATEAPI void
39000			libdeflate_free_compressor(struct libdeflate_compressor *compressor);
39001
39002
39003
39004
39005
39006			struct libdeflate_decompressor;
39007			struct libdeflate_options;
39008
39009
39010			LIBDEFLATEAPI struct libdeflate_decompressor *
39011			libdeflate_alloc_decompressor(void);
39012
39013
39014			LIBDEFLATEAPI struct libdeflate_decompressor *
39015			libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options);
39016
39017
39018			enum libdeflate_result {
39019
39020			LIBDEFLATE_SUCCESS = 0,
39021
39022
39023			LIBDEFLATE_BAD_DATA = 1,
39024
39025
39026			LIBDEFLATE_SHORT_OUTPUT = 2,
39027
39028
39029			LIBDEFLATE_INSUFFICIENT_SPACE = 3,
39030			};
39031
39032
39033			LIBDEFLATEAPI enum libdeflate_result
39034			libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor,
39035			const void *in, size_t in_nbytes,
39036			void *out, size_t out_nbytes_avail,
39037			size_t *actual_out_nbytes_ret);
39038
39039
39040			LIBDEFLATEAPI enum libdeflate_result
39041			libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor,
39042			const void *in, size_t in_nbytes,
39043			void *out, size_t out_nbytes_avail,
39044			size_t *actual_in_nbytes_ret,
39045			size_t *actual_out_nbytes_ret);
39046
39047
39048			LIBDEFLATEAPI enum libdeflate_result
39049			libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor,
39050			const void *in, size_t in_nbytes,
39051			void *out, size_t out_nbytes_avail,
39052			size_t *actual_out_nbytes_ret);
39053
39054
39055			LIBDEFLATEAPI enum libdeflate_result
39056			libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor,
39057			const void *in, size_t in_nbytes,
39058			void *out, size_t out_nbytes_avail,
39059			size_t *actual_in_nbytes_ret,
39060			size_t *actual_out_nbytes_ret);
39061
39062
39063			LIBDEFLATEAPI enum libdeflate_result
39064			libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor,
39065			const void *in, size_t in_nbytes,
39066			void *out, size_t out_nbytes_avail,
39067			size_t *actual_out_nbytes_ret);
39068
39069
39070			LIBDEFLATEAPI enum libdeflate_result
39071			libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor,
39072			const void *in, size_t in_nbytes,
39073			void *out, size_t out_nbytes_avail,
39074			size_t *actual_in_nbytes_ret,
39075			size_t *actual_out_nbytes_ret);
39076
39077
39078			LIBDEFLATEAPI void
39079			libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor);
39080
39081
39082
39083
39084
39085
39086			LIBDEFLATEAPI uint32_t
39087			libdeflate_adler32(uint32_t adler, const void *buffer, size_t len);
39088
39089
39090
39091			LIBDEFLATEAPI uint32_t
39092			libdeflate_crc32(uint32_t crc, const void *buffer, size_t len);
39093
39094
39095
39096
39097
39098
39099			LIBDEFLATEAPI void
39100			libdeflate_set_memory_allocator(void (malloc_func)(size_t),
39101			void (free_func)(void ));
39102
39103
39104			struct libdeflate_options {
39105
39106
39107			size_t sizeof_options;
39108
39109
39110			void (malloc_func)(size_t);
39111			void (free_func)(void );
39112			};
39113
39114			#ifdef __cplusplus
39115			}
39116			#endif
39117
39118			#endif
39119
39120
39121			#include
39122			#include
39123			#include
39124			#ifdef _MSC_VER
39125			# include
39126			# include
39127
39128
39129			# pragma warning(disable : 4146)
39130
39131			# pragma warning(disable : 4018)
39132			# pragma warning(disable : 4244)
39133			# pragma warning(disable : 4267)
39134			# pragma warning(disable : 4310)
39135
39136			# pragma warning(disable : 4100)
39137			# pragma warning(disable : 4127)
39138			# pragma warning(disable : 4189)
39139			# pragma warning(disable : 4232)
39140			# pragma warning(disable : 4245)
39141			# pragma warning(disable : 4295)
39142			#endif
39143			#ifndef FREESTANDING
39144			# include
39145			#endif
39146
39147
39148
39149
39150
39151
39152			#undef ARCH_X86_64
39153			#undef ARCH_X86_32
39154			#undef ARCH_ARM64
39155			#undef ARCH_ARM32
39156			#undef ARCH_RISCV
39157			#ifdef _MSC_VER
39158
39159			# if defined(_M_X64) && !defined(_M_ARM64EC)
39160			# define ARCH_X86_64
39161			# elif defined(_M_IX86)
39162			# define ARCH_X86_32
39163			# elif defined(_M_ARM64)
39164			# define ARCH_ARM64
39165			# elif defined(_M_ARM)
39166			# define ARCH_ARM32
39167			# endif
39168			#else
39169			# if defined(__x86_64__)
39170			# define ARCH_X86_64
39171			# elif defined(__i386__)
39172			# define ARCH_X86_32
39173			# elif defined(__aarch64__)
39174			# define ARCH_ARM64
39175			# elif defined(__arm__)
39176			# define ARCH_ARM32
39177			# elif defined(__riscv)
39178			# define ARCH_RISCV
39179			# endif
39180			#endif
39181
39182
39183
39184
39185
39186
39187			typedef uint8_t u8;
39188			typedef uint16_t u16;
39189			typedef uint32_t u32;
39190			typedef uint64_t u64;
39191			typedef int8_t s8;
39192			typedef int16_t s16;
39193			typedef int32_t s32;
39194			typedef int64_t s64;
39195
39196
39197			#ifdef _MSC_VER
39198			# ifdef _WIN64
39199			typedef long long ssize_t;
39200			# else
39201			typedef long ssize_t;
39202			# endif
39203			#endif
39204
39205
39206			typedef size_t machine_word_t;
39207
39208
39209			#define WORDBYTES ((int)sizeof(machine_word_t))
39210
39211
39212			#define WORDBITS (8 * WORDBYTES)
39213
39214
39215
39216
39217
39218
39219			#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
39220			# define GCC_PREREQ(major, minor) \
39221			(__GNUC__ > (major) \|\| \
39222			(__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
39223			# if !GCC_PREREQ(4, 9)
39224			# error "gcc versions older than 4.9 are no longer supported"
39225			# endif
39226			#else
39227			# define GCC_PREREQ(major, minor) 0
39228			#endif
39229			#ifdef __clang__
39230			# ifdef __apple_build_version__
39231			# define CLANG_PREREQ(major, minor, apple_version) \
39232			(__apple_build_version__ >= (apple_version))
39233			# else
39234			# define CLANG_PREREQ(major, minor, apple_version) \
39235			(__clang_major__ > (major) \|\| \
39236			(__clang_major__ == (major) && __clang_minor__ >= (minor)))
39237			# endif
39238			# if !CLANG_PREREQ(3, 9, 8000000)
39239			# error "clang versions older than 3.9 are no longer supported"
39240			# endif
39241			#else
39242			# define CLANG_PREREQ(major, minor, apple_version) 0
39243			#endif
39244			#ifdef _MSC_VER
39245			# define MSVC_PREREQ(version) (_MSC_VER >= (version))
39246			# if !MSVC_PREREQ(1900)
39247			# error "MSVC versions older than Visual Studio 2015 are no longer supported"
39248			# endif
39249			#else
39250			# define MSVC_PREREQ(version) 0
39251			#endif
39252
39253
39254			#ifndef __has_attribute
39255			# define __has_attribute(attribute) 0
39256			#endif
39257
39258
39259			#ifndef __has_builtin
39260			# define __has_builtin(builtin) 0
39261			#endif
39262
39263
39264			#ifdef _MSC_VER
39265			# define inline __inline
39266			#endif
39267
39268
39269			#if defined(__GNUC__) \|\| __has_attribute(always_inline)
39270			# define forceinline inline __attribute__((always_inline))
39271			#elif defined(_MSC_VER)
39272			# define forceinline __forceinline
39273			#else
39274			# define forceinline inline
39275			#endif
39276
39277
39278			#if defined(__GNUC__) \|\| __has_attribute(unused)
39279			# define MAYBE_UNUSED __attribute__((unused))
39280			#else
39281			# define MAYBE_UNUSED
39282			#endif
39283
39284
39285			#if defined(__GNUC__) \|\| __has_attribute(noreturn)
39286			# define NORETURN __attribute__((noreturn))
39287			#else
39288			# define NORETURN
39289			#endif
39290
39291
39292			#if !defined(__STDC_VERSION__) \|\| (__STDC_VERSION__ < 201112L)
39293			# if defined(__GNUC__) \|\| defined(__clang__)
39294			# define restrict __restrict__
39295			# else
39296			# define restrict
39297			# endif
39298			#endif
39299
39300
39301			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
39302			# define likely(expr) __builtin_expect(!!(expr), 1)
39303			#else
39304			# define likely(expr) (expr)
39305			#endif
39306
39307
39308			#if defined(__GNUC__) \|\| __has_builtin(__builtin_expect)
39309			# define unlikely(expr) __builtin_expect(!!(expr), 0)
39310			#else
39311			# define unlikely(expr) (expr)
39312			#endif
39313
39314
39315			#undef prefetchr
39316			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
39317			# define prefetchr(addr) __builtin_prefetch((addr), 0)
39318			#elif defined(_MSC_VER)
39319			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
39320			# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0)
39321			# elif defined(ARCH_ARM64)
39322			# define prefetchr(addr) __prefetch2((addr), 0x00 )
39323			# elif defined(ARCH_ARM32)
39324			# define prefetchr(addr) __prefetch(addr)
39325			# endif
39326			#endif
39327			#ifndef prefetchr
39328			# define prefetchr(addr)
39329			#endif
39330
39331
39332			#undef prefetchw
39333			#if defined(__GNUC__) \|\| __has_builtin(__builtin_prefetch)
39334			# define prefetchw(addr) __builtin_prefetch((addr), 1)
39335			#elif defined(_MSC_VER)
39336			# if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
39337			# define prefetchw(addr) _m_prefetchw(addr)
39338			# elif defined(ARCH_ARM64)
39339			# define prefetchw(addr) __prefetch2((addr), 0x10 )
39340			# elif defined(ARCH_ARM32)
39341			# define prefetchw(addr) __prefetchw(addr)
39342			# endif
39343			#endif
39344			#ifndef prefetchw
39345			# define prefetchw(addr)
39346			#endif
39347
39348
39349			#undef _aligned_attribute
39350			#if defined(__GNUC__) \|\| __has_attribute(aligned)
39351			# define _aligned_attribute(n) __attribute__((aligned(n)))
39352			#elif defined(_MSC_VER)
39353			# define _aligned_attribute(n) __declspec(align(n))
39354			#endif
39355
39356
39357			#if defined(__GNUC__) \|\| __has_attribute(target)
39358			# define _target_attribute(attrs) __attribute__((target(attrs)))
39359			#else
39360			# define _target_attribute(attrs)
39361			#endif
39362
39363
39364
39365
39366
39367			#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0]))
39368			#define MIN(a, b) ((a) <= (b) ? (a) : (b))
39369			#define MAX(a, b) ((a) >= (b) ? (a) : (b))
39370			#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
39371			#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
39372			#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1))
39373			#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d)))
39374
39375
39376
39377
39378
39379
39380			#if defined(__BYTE_ORDER__)
39381			# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
39382			#elif defined(_MSC_VER)
39383			# define CPU_IS_LITTLE_ENDIAN() true
39384			#else
39385			static forceinline bool CPU_IS_LITTLE_ENDIAN(void)
39386			{
39387			union {
39388			u32 w;
39389			u8 b;
39390			} u;
39391
39392			u.w = 1;
39393			return u.b;
39394			}
39395			#endif
39396
39397
39398			static forceinline u16 bswap16(u16 v)
39399			{
39400			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap16)
39401			return __builtin_bswap16(v);
39402			#elif defined(_MSC_VER)
39403			return _byteswap_ushort(v);
39404			#else
39405			return (v << 8) \| (v >> 8);
39406			#endif
39407			}
39408
39409
39410			static forceinline u32 bswap32(u32 v)
39411			{
39412			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap32)
39413			return __builtin_bswap32(v);
39414			#elif defined(_MSC_VER)
39415			return _byteswap_ulong(v);
39416			#else
39417			return ((v & 0x000000FF) << 24) \|
39418			((v & 0x0000FF00) << 8) \|
39419			((v & 0x00FF0000) >> 8) \|
39420			((v & 0xFF000000) >> 24);
39421			#endif
39422			}
39423
39424
39425			static forceinline u64 bswap64(u64 v)
39426			{
39427			#if defined(__GNUC__) \|\| __has_builtin(__builtin_bswap64)
39428			return __builtin_bswap64(v);
39429			#elif defined(_MSC_VER)
39430			return _byteswap_uint64(v);
39431			#else
39432			return ((v & 0x00000000000000FF) << 56) \|
39433			((v & 0x000000000000FF00) << 40) \|
39434			((v & 0x0000000000FF0000) << 24) \|
39435			((v & 0x00000000FF000000) << 8) \|
39436			((v & 0x000000FF00000000) >> 8) \|
39437			((v & 0x0000FF0000000000) >> 24) \|
39438			((v & 0x00FF000000000000) >> 40) \|
39439			((v & 0xFF00000000000000) >> 56);
39440			#endif
39441			}
39442
39443			#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v))
39444			#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v))
39445			#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v))
39446			#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v))
39447			#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v))
39448			#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v))
39449
39450
39451
39452
39453
39454
39455			#if (defined(__GNUC__) \|\| defined(__clang__)) && \
39456			(defined(ARCH_X86_64) \|\| defined(ARCH_X86_32) \|\| \
39457			defined(__ARM_FEATURE_UNALIGNED) \|\| defined(__powerpc64__) \|\| \
39458			defined(__riscv_misaligned_fast) \|\| \
39459			defined(__wasm__))
39460			# define UNALIGNED_ACCESS_IS_FAST 1
39461			#elif defined(_MSC_VER)
39462			# define UNALIGNED_ACCESS_IS_FAST 1
39463			#else
39464			# define UNALIGNED_ACCESS_IS_FAST 0
39465			#endif
39466
39467
39468
39469			#ifdef FREESTANDING
39470			# define MEMCOPY __builtin_memcpy
39471			#else
39472			# define MEMCOPY memcpy
39473			#endif
39474
39475
39476
39477			#define DEFINE_UNALIGNED_TYPE(type) \
39478			static forceinline type \
39479			load_##type##_unaligned(const void *p) \
39480			{ \
39481			type v; \
39482			\
39483			MEMCOPY(&v, p, sizeof(v)); \
39484			return v; \
39485			} \
39486			\
39487			static forceinline void \
39488			store_##type##_unaligned(type v, void *p) \
39489			{ \
39490			MEMCOPY(p, &v, sizeof(v)); \
39491			}
39492
39493			DEFINE_UNALIGNED_TYPE(u16)
39494			DEFINE_UNALIGNED_TYPE(u32)
39495			DEFINE_UNALIGNED_TYPE(u64)
39496			DEFINE_UNALIGNED_TYPE(machine_word_t)
39497
39498			#undef MEMCOPY
39499
39500			#define load_word_unaligned load_machine_word_t_unaligned
39501			#define store_word_unaligned store_machine_word_t_unaligned
39502
39503
39504
39505			static forceinline u16
39506			get_unaligned_le16(const u8 *p)
39507			{
39508			if (UNALIGNED_ACCESS_IS_FAST)
39509			return le16_bswap(load_u16_unaligned(p));
39510			else
39511			return ((u16)p[1] << 8) \| p[0];
39512			}
39513
39514			static forceinline u16
39515			get_unaligned_be16(const u8 *p)
39516			{
39517			if (UNALIGNED_ACCESS_IS_FAST)
39518			return be16_bswap(load_u16_unaligned(p));
39519			else
39520			return ((u16)p[0] << 8) \| p[1];
39521			}
39522
39523			static forceinline u32
39524			get_unaligned_le32(const u8 *p)
39525			{
39526			if (UNALIGNED_ACCESS_IS_FAST)
39527			return le32_bswap(load_u32_unaligned(p));
39528			else
39529			return ((u32)p[3] << 24) \| ((u32)p[2] << 16) \|
39530			((u32)p[1] << 8) \| p[0];
39531			}
39532
39533			static forceinline u32
39534			get_unaligned_be32(const u8 *p)
39535			{
39536			if (UNALIGNED_ACCESS_IS_FAST)
39537			return be32_bswap(load_u32_unaligned(p));
39538			else
39539			return ((u32)p[0] << 24) \| ((u32)p[1] << 16) \|
39540			((u32)p[2] << 8) \| p[3];
39541			}
39542
39543			static forceinline u64
39544			get_unaligned_le64(const u8 *p)
39545			{
39546			if (UNALIGNED_ACCESS_IS_FAST)
39547			return le64_bswap(load_u64_unaligned(p));
39548			else
39549			return ((u64)p[7] << 56) \| ((u64)p[6] << 48) \|
39550			((u64)p[5] << 40) \| ((u64)p[4] << 32) \|
39551			((u64)p[3] << 24) \| ((u64)p[2] << 16) \|
39552			((u64)p[1] << 8) \| p[0];
39553			}
39554
39555			static forceinline machine_word_t
39556			get_unaligned_leword(const u8 *p)
39557			{
39558			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
39559			if (WORDBITS == 32)
39560			return get_unaligned_le32(p);
39561			else
39562			return get_unaligned_le64(p);
39563			}
39564
39565
39566
39567			static forceinline void
39568			put_unaligned_le16(u16 v, u8 *p)
39569			{
39570			if (UNALIGNED_ACCESS_IS_FAST) {
39571			store_u16_unaligned(le16_bswap(v), p);
39572			} else {
39573			p[0] = (u8)(v >> 0);
39574			p[1] = (u8)(v >> 8);
39575			}
39576			}
39577
39578			static forceinline void
39579			put_unaligned_be16(u16 v, u8 *p)
39580			{
39581			if (UNALIGNED_ACCESS_IS_FAST) {
39582			store_u16_unaligned(be16_bswap(v), p);
39583			} else {
39584			p[0] = (u8)(v >> 8);
39585			p[1] = (u8)(v >> 0);
39586			}
39587			}
39588
39589			static forceinline void
39590			put_unaligned_le32(u32 v, u8 *p)
39591			{
39592			if (UNALIGNED_ACCESS_IS_FAST) {
39593			store_u32_unaligned(le32_bswap(v), p);
39594			} else {
39595			p[0] = (u8)(v >> 0);
39596			p[1] = (u8)(v >> 8);
39597			p[2] = (u8)(v >> 16);
39598			p[3] = (u8)(v >> 24);
39599			}
39600			}
39601
39602			static forceinline void
39603			put_unaligned_be32(u32 v, u8 *p)
39604			{
39605			if (UNALIGNED_ACCESS_IS_FAST) {
39606			store_u32_unaligned(be32_bswap(v), p);
39607			} else {
39608			p[0] = (u8)(v >> 24);
39609			p[1] = (u8)(v >> 16);
39610			p[2] = (u8)(v >> 8);
39611			p[3] = (u8)(v >> 0);
39612			}
39613			}
39614
39615			static forceinline void
39616			put_unaligned_le64(u64 v, u8 *p)
39617			{
39618			if (UNALIGNED_ACCESS_IS_FAST) {
39619			store_u64_unaligned(le64_bswap(v), p);
39620			} else {
39621			p[0] = (u8)(v >> 0);
39622			p[1] = (u8)(v >> 8);
39623			p[2] = (u8)(v >> 16);
39624			p[3] = (u8)(v >> 24);
39625			p[4] = (u8)(v >> 32);
39626			p[5] = (u8)(v >> 40);
39627			p[6] = (u8)(v >> 48);
39628			p[7] = (u8)(v >> 56);
39629			}
39630			}
39631
39632			static forceinline void
39633			put_unaligned_leword(machine_word_t v, u8 *p)
39634			{
39635			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
39636			if (WORDBITS == 32)
39637			put_unaligned_le32(v, p);
39638			else
39639			put_unaligned_le64(v, p);
39640			}
39641
39642
39643
39644
39645
39646
39647
39648			static forceinline unsigned
39649			bsr32(u32 v)
39650			{
39651			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clz)
39652			return 31 - __builtin_clz(v);
39653			#elif defined(_MSC_VER)
39654			unsigned long i;
39655
39656			_BitScanReverse(&i, v);
39657			return i;
39658			#else
39659			unsigned i = 0;
39660
39661			while ((v >>= 1) != 0)
39662			i++;
39663			return i;
39664			#endif
39665			}
39666
39667			static forceinline unsigned
39668			bsr64(u64 v)
39669			{
39670			#if defined(__GNUC__) \|\| __has_builtin(__builtin_clzll)
39671			return 63 - __builtin_clzll(v);
39672			#elif defined(_MSC_VER) && defined(_WIN64)
39673			unsigned long i;
39674
39675			_BitScanReverse64(&i, v);
39676			return i;
39677			#else
39678			unsigned i = 0;
39679
39680			while ((v >>= 1) != 0)
39681			i++;
39682			return i;
39683			#endif
39684			}
39685
39686			static forceinline unsigned
39687			bsrw(machine_word_t v)
39688			{
39689			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
39690			if (WORDBITS == 32)
39691			return bsr32(v);
39692			else
39693			return bsr64(v);
39694			}
39695
39696
39697
39698			static forceinline unsigned
39699			bsf32(u32 v)
39700			{
39701			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctz)
39702			return __builtin_ctz(v);
39703			#elif defined(_MSC_VER)
39704			unsigned long i;
39705
39706			_BitScanForward(&i, v);
39707			return i;
39708			#else
39709			unsigned i = 0;
39710
39711			for (; (v & 1) == 0; v >>= 1)
39712			i++;
39713			return i;
39714			#endif
39715			}
39716
39717			static forceinline unsigned
39718			bsf64(u64 v)
39719			{
39720			#if defined(__GNUC__) \|\| __has_builtin(__builtin_ctzll)
39721			return __builtin_ctzll(v);
39722			#elif defined(_MSC_VER) && defined(_WIN64)
39723			unsigned long i;
39724
39725			_BitScanForward64(&i, v);
39726			return i;
39727			#else
39728			unsigned i = 0;
39729
39730			for (; (v & 1) == 0; v >>= 1)
39731			i++;
39732			return i;
39733			#endif
39734			}
39735
39736			static forceinline unsigned
39737			bsfw(machine_word_t v)
39738			{
39739			STATIC_ASSERT(WORDBITS == 32 \|\| WORDBITS == 64);
39740			if (WORDBITS == 32)
39741			return bsf32(v);
39742			else
39743			return bsf64(v);
39744			}
39745
39746
39747			#undef rbit32
39748			#if (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM32) && \
39749			(__ARM_ARCH >= 7 \|\| (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
39750			static forceinline u32
39751			rbit32(u32 v)
39752			{
39753			__asm__("rbit %0, %1" : "=r" (v) : "r" (v));
39754			return v;
39755			}
39756			#define rbit32 rbit32
39757			#elif (defined(__GNUC__) \|\| defined(__clang__)) && defined(ARCH_ARM64)
39758			static forceinline u32
39759			rbit32(u32 v)
39760			{
39761			__asm__("rbit %w0, %w1" : "=r" (v) : "r" (v));
39762			return v;
39763			}
39764			#define rbit32 rbit32
39765			#endif
39766
39767			#endif
39768
39769
39770			typedef void (malloc_func_t)(size_t);
39771			typedef void (free_func_t)(void );
39772
39773			extern malloc_func_t libdeflate_default_malloc_func;
39774			extern free_func_t libdeflate_default_free_func;
39775
39776			void *libdeflate_aligned_malloc(malloc_func_t malloc_func,
39777			size_t alignment, size_t size);
39778			void libdeflate_aligned_free(free_func_t free_func, void *ptr);
39779
39780			#ifdef FREESTANDING
39781
39782			void memset(void s, int c, size_t n);
39783			#define memset(s, c, n) __builtin_memset((s), (c), (n))
39784
39785			void memcpy(void dest, const void *src, size_t n);
39786			#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
39787
39788			void memmove(void dest, const void *src, size_t n);
39789			#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
39790
39791			int memcmp(const void s1, const void s2, size_t n);
39792			#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
39793
39794			#undef LIBDEFLATE_ENABLE_ASSERTIONS
39795			#else
39796			# include
39797
39798			# ifdef __clang_analyzer__
39799			# define LIBDEFLATE_ENABLE_ASSERTIONS
39800			# endif
39801			#endif
39802
39803
39804			#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
39805			NORETURN void
39806			libdeflate_assertion_failed(const char expr, const char file, int line);
39807			#define ASSERT(expr) { if (unlikely(!(expr))) \
39808			libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
39809			#else
39810			#define ASSERT(expr) (void)(expr)
39811			#endif
39812
39813			#define CONCAT_IMPL(a, b) a##b
39814			#define CONCAT(a, b) CONCAT_IMPL(a, b)
39815			#define ADD_SUFFIX(name) CONCAT(name, SUFFIX)
39816
39817			#endif
39818
39819
39820			#if defined(ARCH_X86_32) \|\| defined(ARCH_X86_64)
39821
39822			#define X86_CPU_FEATURE_SSE2 (1 << 0)
39823			#define X86_CPU_FEATURE_PCLMULQDQ (1 << 1)
39824			#define X86_CPU_FEATURE_AVX (1 << 2)
39825			#define X86_CPU_FEATURE_AVX2 (1 << 3)
39826			#define X86_CPU_FEATURE_BMI2 (1 << 4)
39827
39828			#define X86_CPU_FEATURE_ZMM (1 << 5)
39829			#define X86_CPU_FEATURE_AVX512BW (1 << 6)
39830			#define X86_CPU_FEATURE_AVX512VL (1 << 7)
39831			#define X86_CPU_FEATURE_VPCLMULQDQ (1 << 8)
39832			#define X86_CPU_FEATURE_AVX512VNNI (1 << 9)
39833			#define X86_CPU_FEATURE_AVXVNNI (1 << 10)
39834
39835			#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
39836
39837			# define X86_CPU_FEATURES_KNOWN (1U << 31)
39838			extern volatile u32 libdeflate_x86_cpu_features;
39839
39840			void libdeflate_init_x86_cpu_features(void);
39841
39842			static inline u32 get_x86_cpu_features(void)
39843			{
39844			if (libdeflate_x86_cpu_features == 0)
39845			libdeflate_init_x86_cpu_features();
39846			return libdeflate_x86_cpu_features;
39847			}
39848
39849			# include
39850			# if defined(_MSC_VER) && defined(__clang__)
39851			# include
39852			# include
39853			# include
39854			# include
39855			# include
39856			# include
39857			# include
39858			# include
39859			# if __has_include()
39860			# include
39861			# endif
39862			# if __has_include()
39863			# include
39864			# endif
39865			# if __has_include()
39866			# include
39867			# endif
39868			# if __has_include()
39869			# include
39870			# endif
39871			# if __has_include()
39872			# include
39873			# endif
39874			# endif
39875			#else
39876			static inline u32 get_x86_cpu_features(void) { return 0; }
39877			#endif
39878
39879			#if defined(__SSE2__) \|\| \
39880			(defined(_MSC_VER) && \
39881			(defined(ARCH_X86_64) \|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2)))
39882			# define HAVE_SSE2(features) 1
39883			# define HAVE_SSE2_NATIVE 1
39884			#else
39885			# define HAVE_SSE2(features) ((features) & X86_CPU_FEATURE_SSE2)
39886			# define HAVE_SSE2_NATIVE 0
39887			#endif
39888
39889			#if (defined(__PCLMUL__) && defined(__SSE4_1__)) \|\| \
39890			(defined(_MSC_VER) && defined(__AVX2__))
39891			# define HAVE_PCLMULQDQ(features) 1
39892			#else
39893			# define HAVE_PCLMULQDQ(features) ((features) & X86_CPU_FEATURE_PCLMULQDQ)
39894			#endif
39895
39896			#ifdef __AVX__
39897			# define HAVE_AVX(features) 1
39898			#else
39899			# define HAVE_AVX(features) ((features) & X86_CPU_FEATURE_AVX)
39900			#endif
39901
39902			#ifdef __AVX2__
39903			# define HAVE_AVX2(features) 1
39904			#else
39905			# define HAVE_AVX2(features) ((features) & X86_CPU_FEATURE_AVX2)
39906			#endif
39907
39908			#if defined(__BMI2__) \|\| (defined(_MSC_VER) && defined(__AVX2__))
39909			# define HAVE_BMI2(features) 1
39910			# define HAVE_BMI2_NATIVE 1
39911			#else
39912			# define HAVE_BMI2(features) ((features) & X86_CPU_FEATURE_BMI2)
39913			# define HAVE_BMI2_NATIVE 0
39914			#endif
39915
39916			#ifdef __AVX512BW__
39917			# define HAVE_AVX512BW(features) 1
39918			#else
39919			# define HAVE_AVX512BW(features) ((features) & X86_CPU_FEATURE_AVX512BW)
39920			#endif
39921
39922			#ifdef __AVX512VL__
39923			# define HAVE_AVX512VL(features) 1
39924			#else
39925			# define HAVE_AVX512VL(features) ((features) & X86_CPU_FEATURE_AVX512VL)
39926			#endif
39927
39928			#ifdef __VPCLMULQDQ__
39929			# define HAVE_VPCLMULQDQ(features) 1
39930			#else
39931			# define HAVE_VPCLMULQDQ(features) ((features) & X86_CPU_FEATURE_VPCLMULQDQ)
39932			#endif
39933
39934			#ifdef __AVX512VNNI__
39935			# define HAVE_AVX512VNNI(features) 1
39936			#else
39937			# define HAVE_AVX512VNNI(features) ((features) & X86_CPU_FEATURE_AVX512VNNI)
39938			#endif
39939
39940			#ifdef __AVXVNNI__
39941			# define HAVE_AVXVNNI(features) 1
39942			#else
39943			# define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI)
39944			#endif
39945
39946			#endif
39947
39948			#endif
39949
39950
39951			#ifdef X86_CPU_FEATURES_KNOWN
39952
39953
39954
39955			static inline void
39956	8		cpuid(u32 leaf, u32 subleaf, u32 a, u32 b, u32 c, u32 d)
39957			{
39958			#ifdef _MSC_VER
39959			int result[4];
39960
39961			__cpuidex(result, leaf, subleaf);
39962			*a = result[0];
39963			*b = result[1];
39964			*c = result[2];
39965			*d = result[3];
39966			#else
39967	8		__asm__ volatile("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d)
39968			: "a" (leaf), "c" (subleaf));
39969			#endif
39970	8		}
39971
39972
39973			static inline u64
39974	2		read_xcr(u32 index)
39975			{
39976			#ifdef _MSC_VER
39977			return _xgetbv(index);
39978			#else
39979			u32 d, a;
39980
39981
39982	2		__asm__ volatile(".byte 0x0f, 0x01, 0xd0" :
39983			"=d" (d), "=a" (a) : "c" (index));
39984
39985	2		return ((u64)d << 32) \| a;
39986			#endif
39987			}
39988
39989			static const struct cpu_feature x86_cpu_feature_table[] = {
39990			{X86_CPU_FEATURE_SSE2, "sse2"},
39991			{X86_CPU_FEATURE_PCLMULQDQ, "pclmulqdq"},
39992			{X86_CPU_FEATURE_AVX, "avx"},
39993			{X86_CPU_FEATURE_AVX2, "avx2"},
39994			{X86_CPU_FEATURE_BMI2, "bmi2"},
39995			{X86_CPU_FEATURE_ZMM, "zmm"},
39996			{X86_CPU_FEATURE_AVX512BW, "avx512bw"},
39997			{X86_CPU_FEATURE_AVX512VL, "avx512vl"},
39998			{X86_CPU_FEATURE_VPCLMULQDQ, "vpclmulqdq"},
39999			{X86_CPU_FEATURE_AVX512VNNI, "avx512_vnni"},
40000			{X86_CPU_FEATURE_AVXVNNI, "avx_vnni"},
40001			};
40002
40003			volatile u32 libdeflate_x86_cpu_features = 0;
40004
40005			static inline bool
40006	2		os_supports_avx512(u64 xcr0)
40007			{
40008			#ifdef __APPLE__
40009
40010			return false;
40011			#else
40012	2		return (xcr0 & 0xe6) == 0xe6;
40013			#endif
40014			}
40015
40016
40017			static inline bool
40018	0		allow_512bit_vectors(const u32 manufacturer[3], u32 family, u32 model)
40019			{
40020			#ifdef TEST_SUPPORT__DO_NOT_USE
40021			return true;
40022			#endif
40023	0	0	if (memcmp(manufacturer, "GenuineIntel", 12) != 0)
40024	0		return true;
40025	0	0	if (family != 6)
40026	0		return true;
40027	0	0	switch (model) {
40028	0		case 85:
40029			case 106:
40030			case 108:
40031			case 126:
40032			case 140:
40033			case 141:
40034	0		return false;
40035			}
40036	0		return true;
40037			}
40038
40039
40040	2		void libdeflate_init_x86_cpu_features(void)
40041			{
40042			u32 max_leaf;
40043			u32 manufacturer[3];
40044			u32 family, model;
40045			u32 a, b, c, d;
40046	2		u64 xcr0 = 0;
40047	2		u32 features = 0;
40048
40049
40050	2		cpuid(0, 0, &max_leaf, &manufacturer[0], &manufacturer[2],
40051			&manufacturer[1]);
40052	2	50	if (max_leaf < 1)
40053	0		goto out;
40054
40055
40056	2		cpuid(1, 0, &a, &b, &c, &d);
40057	2		family = (a >> 8) & 0xf;
40058	2		model = (a >> 4) & 0xf;
40059	2	50	if (family == 6 \|\| family == 0xf)
		0
40060	2		model += (a >> 12) & 0xf0;
40061	2	50	if (family == 0xf)
40062	0		family += (a >> 20) & 0xff;
40063	2	50	if (d & (1 << 26))
40064	2		features \|= X86_CPU_FEATURE_SSE2;
40065
40066	2	50	if ((c & (1 << 1)) && (c & (1 << 19)))
		50
40067	2		features \|= X86_CPU_FEATURE_PCLMULQDQ;
40068	2	50	if (c & (1 << 27))
40069	2		xcr0 = read_xcr(0);
40070	2	50	if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6))
		50
40071	2		features \|= X86_CPU_FEATURE_AVX;
40072
40073	2	50	if (max_leaf < 7)
40074	0		goto out;
40075
40076
40077	2		cpuid(7, 0, &a, &b, &c, &d);
40078	2	50	if (b & (1 << 8))
40079	2		features \|= X86_CPU_FEATURE_BMI2;
40080	2	50	if ((xcr0 & 0x6) == 0x6) {
40081	2	50	if (b & (1 << 5))
40082	2		features \|= X86_CPU_FEATURE_AVX2;
40083	2	50	if (c & (1 << 10))
40084	0		features \|= X86_CPU_FEATURE_VPCLMULQDQ;
40085			}
40086	2	50	if (os_supports_avx512(xcr0)) {
40087	0	0	if (allow_512bit_vectors(manufacturer, family, model))
40088	0		features \|= X86_CPU_FEATURE_ZMM;
40089	0	0	if (b & (1 << 30))
40090	0		features \|= X86_CPU_FEATURE_AVX512BW;
40091	0	0	if (b & (1U << 31))
40092	0		features \|= X86_CPU_FEATURE_AVX512VL;
40093	0	0	if (c & (1 << 11))
40094	0		features \|= X86_CPU_FEATURE_AVX512VNNI;
40095			}
40096
40097
40098	2		cpuid(7, 1, &a, &b, &c, &d);
40099	2	50	if ((a & (1 << 4)) && ((xcr0 & 0x6) == 0x6))
		0
40100	0		features \|= X86_CPU_FEATURE_AVXVNNI;
40101
40102	2		out:
40103	2		disable_cpu_features_for_testing(&features, x86_cpu_feature_table,
40104			ARRAY_LEN(x86_cpu_feature_table));
40105
40106	2		libdeflate_x86_cpu_features = features \| X86_CPU_FEATURES_KNOWN;
40107	2		}
40108
40109			#endif