File Coverage

src/csnappy_compress.c

Criterion	Covered	Total	%
statement	114	157	72.6
branch	34	52	65.3
condition			n/a
subroutine			n/a
pod			n/a
total	148	209	70.8

line	stmt	bran	code
1			/*
2			Copyright 2011, Google Inc.
3			All rights reserved.
4
5			Redistribution and use in source and binary forms, with or without
6			modification, are permitted provided that the following conditions are
7			met:
8
9			* Redistributions of source code must retain the above copyright
10			notice, this list of conditions and the following disclaimer.
11			* Redistributions in binary form must reproduce the above
12			copyright notice, this list of conditions and the following disclaimer
13			in the documentation and/or other materials provided with the
14			distribution.
15			* Neither the name of Google Inc. nor the names of its
16			contributors may be used to endorse or promote products derived from
17			this software without specific prior written permission.
18
19			THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20			"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21			LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22			A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23			OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24			SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25			LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26			DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27			THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28			(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29			OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31			File modified for the Linux Kernel by
32			Zeev Tarantov
33
34			File modified for Sereal by
35			Steffen Mueller
36			*/
37
38			#include "csnappy_internal.h"
39			#ifdef __KERNEL__
40			#include
41			#include
42			#endif
43			#include "csnappy.h"
44
45
46			static INLINE char*
47	1030		encode_varint32(char *sptr, uint32_t v)
48			{
49	1030		uint8_t* ptr = (uint8_t *)sptr;
50			static const int B = 128;
51	1030	100	if (v < (1<<7)) {
52	131		*(ptr++) = v;
53	899	50	} else if (v < (1<<14)) {
54	899		*(ptr++) = v \| B;
55	899		*(ptr++) = v>>7;
56	0	0	} else if (v < (1<<21)) {
57	0		*(ptr++) = v \| B;
58	0		*(ptr++) = (v>>7) \| B;
59	0		*(ptr++) = v>>14;
60	0	0	} else if (v < (1<<28)) {
61	0		*(ptr++) = v \| B;
62	0		*(ptr++) = (v>>7) \| B;
63	0		*(ptr++) = (v>>14) \| B;
64	0		*(ptr++) = v>>21;
65			} else {
66	0		*(ptr++) = v \| B;
67	0		*(ptr++) = (v>>7) \| B;
68	0		*(ptr++) = (v>>14) \| B;
69	0		*(ptr++) = (v>>21) \| B;
70	0		*(ptr++) = v>>28;
71			}
72	1030		return (char *)ptr;
73			}
74
75			/*
76			* * DO NOT CHANGE THE VALUE OF kBlockSize *
77
78			* New Compression code chops up the input into blocks of at most
79			* the following size. This ensures that back-references in the
80			* output never cross kBlockSize block boundaries. This can be
81			* helpful in implementing blocked decompression. However the
82			* decompression code should not rely on this guarantee since older
83			* compression code may not obey it.
84			*/
85			#define kBlockLog 15
86			#define kBlockSize (1 << kBlockLog)
87
88
89			#if defined(__arm__) && !defined(ARCH_ARM_HAVE_UNALIGNED)
90
91			static uint8_t* emit_literal(
92			uint8_t *op,
93			const uint8_t *src,
94			const uint8_t *end)
95			{
96			uint32_t length = end - src;
97			uint32_t n = length - 1;
98			if (!length)
99			return op;
100			if (n < 60) {
101			/* Fits in tag byte */
102			*op++ = LITERAL \| (n << 2);
103			} else {
104			/* Encode in upcoming bytes */
105			uint8_t *base = op;
106			op++;
107			do {
108			*op++ = n & 0xff;
109			n >>= 8;
110			} while (n > 0);
111			*base = LITERAL \| ((59 + (op - base - 1)) << 2);
112			}
113			memcpy(op, src, length);
114			return op + length;
115			}
116
117			static uint8_t* emit_copy(
118			uint8_t *op,
119			uint32_t offset,
120			uint32_t len)
121			{
122			DCHECK_GT(offset, 0);
123
124			/* Emit 64 byte copies but make sure to keep at least four bytes
125			* reserved */
126			while (unlikely(len >= 68)) {
127			*op++ = COPY_2_BYTE_OFFSET \| ((64 - 1) << 2);
128			*op++ = offset & 255;
129			*op++ = offset >> 8;
130			len -= 64;
131			}
132
133			/* Emit an extra 60 byte copy if have too much data to fit in one
134			* copy */
135			if (unlikely(len > 64)) {
136			*op++ = COPY_2_BYTE_OFFSET \| ((60 - 1) << 2);
137			*op++ = offset & 255;
138			*op++ = offset >> 8;
139			len -= 60;
140			}
141
142			/* Emit remainder */
143			DCHECK_GE(len, 4);
144			if ((len < 12) && (offset < 2048)) {
145			int len_minus_4 = len - 4;
146			*op++ = COPY_1_BYTE_OFFSET \|
147			((len_minus_4) << 2) \|
148			((offset >> 8) << 5);
149			*op++ = offset & 0xff;
150			} else {
151			*op++ = COPY_2_BYTE_OFFSET \| ((len-1) << 2);
152			*op++ = offset & 255;
153			*op++ = offset >> 8;
154			}
155			return op;
156			}
157
158			static uint32_t find_match_length(
159			const uint8_t *s1,
160			const uint8_t *s2,
161			const uint8_t *s2_end)
162			{
163			const uint8_t * const s2_start = s2;
164			while (s2 < s2_end && s1++ == s2++) /nothing/;
165			return s2 - s2_start - 1;
166			}
167
168			static uint32_t hash(uint32_t v)
169			{
170			return v * UINT32_C(0x1e35a7bd);
171			}
172
173			char*
174			csnappy_compress_fragment(
175			const char *input,
176			const uint32_t input_size,
177			char *dst,
178			void *working_memory,
179			const int workmem_bytes_power_of_two)
180			{
181			const uint8_t * const src_start = (const uint8_t *)input;
182			const uint8_t * const src_end_minus4 = src_start + input_size - 4;
183			const uint8_t src = src_start, done_upto = src_start, *match;
184			uint8_t op = (uint8_t )dst;
185			uint16_t wm = (uint16_t )working_memory;
186			int shift = 33 - workmem_bytes_power_of_two;
187			uint32_t curr_val, curr_hash, match_val, offset, length;
188			if (unlikely(input_size < 4))
189			goto the_end;
190			memset(wm, 0, 1 << workmem_bytes_power_of_two);
191			for (;;) {
192			curr_val = (src[1] << 8) \| (src[2] << 16) \| (src[3] << 24);
193			do {
194			src++;
195			if (unlikely(src >= src_end_minus4))
196			goto the_end;
197			curr_val = (curr_val >> 8) \| (src[3] << 24);
198			DCHECK_EQ(curr_val, get_unaligned_le32(src));
199			curr_hash = hash(curr_val) >> shift;
200			match = src_start + wm[curr_hash];
201			DCHECK_LT(match, src);
202			wm[curr_hash] = src - src_start;
203			match_val = get_unaligned_le32(match);
204			} while (likely(curr_val != match_val));
205			offset = src - match;
206			length = 4 + find_match_length(
207			match + 4, src + 4, src_end_minus4 + 4);
208			DCHECK_EQ(memcmp(src, match, length), 0);
209			op = emit_literal(op, done_upto, src);
210			op = emit_copy(op, offset, length);
211			done_upto = src + length;
212			src = done_upto - 1;
213			}
214			the_end:
215			op = emit_literal(op, done_upto, src_end_minus4 + 4);
216			return (char *)op;
217			}
218
219			#else /* !simple */
220
221			/*
222			* Any hash function will produce a valid compressed bitstream, but a good
223			* hash function reduces the number of collisions and thus yields better
224			* compression for compressible input, and more speed for incompressible
225			* input. Of course, it doesn't hurt if the hash function is reasonably fast
226			* either, as it gets called a lot.
227			*/
228	2022		static INLINE uint32_t HashBytes(uint32_t bytes, int shift)
229			{
230	2022		uint32_t kMul = 0x1e35a7bd;
231	2022		return (bytes * kMul) >> shift;
232			}
233	2022		static INLINE uint32_t Hash(const char *p, int shift)
234			{
235	2022		return HashBytes(UNALIGNED_LOAD32(p), shift);
236			}
237
238
239			/*
240			* Return the largest n such that
241			*
242			* s1[0,n-1] == s2[0,n-1]
243			* and n <= (s2_limit - s2).
244			*
245			* Does not read *s2_limit or beyond.
246			* Does not read *(s1 + (s2_limit - s2)) or beyond.
247			* Requires that s2_limit >= s2.
248			*
249			* Separate implementation for x86_64, for speed. Uses the fact that
250			* x86_64 is little endian.
251			*/
252			#if defined(__x86_64__) \|\| defined(__aarch64__)
253			static INLINE int
254	1010		FindMatchLength(const char s1, const char s2, const char *s2_limit)
255			{
256			uint64_t x;
257			int matched, matching_bits;
258			DCHECK_GE(s2_limit, s2);
259	1010		matched = 0;
260			/*
261			* Find out how long the match is. We loop over the data 64 bits at a
262			* time until we find a 64-bit block that doesn't match; then we find
263			* the first non-matching bit and use that to calculate the total
264			* length of the match.
265			*/
266	65776	100	while (likely(s2 <= s2_limit - 8)) {
267	64766	50	if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
268			UNALIGNED_LOAD64(s2))) {
269	64766		s2 += 8;
270	64766		matched += 8;
271			} else {
272			/*
273			* On current (mid-2008) Opteron models there is a 3%
274			* more efficient code sequence to find the first
275			* non-matching byte. However, what follows is ~10%
276			* better on Intel Core 2 and newer, and we expect AMD's
277			* bsf instruction to improve.
278			*/
279	0		x = UNALIGNED_LOAD64(s1 + matched) ^
280	0		UNALIGNED_LOAD64(s2);
281	0		matching_bits = FindLSBSetNonZero64(x);
282	0		matched += matching_bits >> 3;
283	0		return matched;
284			}
285			}
286	4544	100	while (likely(s2 < s2_limit)) {
287	3534	50	if (likely(s1[matched] == *s2)) {
288	3534		++s2;
289	3534		++matched;
290			} else {
291	0		return matched;
292			}
293			}
294	1010		return matched;
295			}
296			#else /* !defined(__x86_64__) && !defined(__aarch64__) */
297			static INLINE int
298			FindMatchLength(const char s1, const char s2, const char *s2_limit)
299			{
300			/* Implementation based on the x86-64 version, above. */
301			int matched = 0;
302			DCHECK_GE(s2_limit, s2);
303
304			while (s2 <= s2_limit - 4 &&
305			UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
306			s2 += 4;
307			matched += 4;
308			}
309			#if __BYTE_ORDER == __LITTLE_ENDIAN
310			if (s2 <= s2_limit - 4) {
311			uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
312			UNALIGNED_LOAD32(s2);
313			int matching_bits = FindLSBSetNonZero(x);
314			matched += matching_bits >> 3;
315			} else {
316			while ((s2 < s2_limit) && (s1[matched] == *s2)) {
317			++s2;
318			++matched;
319			}
320			}
321			#else
322			while ((s2 < s2_limit) && (s1[matched] == *s2)) {
323			++s2;
324			++matched;
325			}
326			#endif
327			return matched;
328			}
329			#endif /* !defined(__x86_64__) && !defined(__aarch64__) */
330
331
332			static INLINE char*
333	1030		EmitLiteral(char op, const char literal, int len, int allow_fast_path)
334			{
335	1030		int n = len - 1; /* Zero-length literals are disallowed */
336	1030	50	if (n < 60) {
337			/* Fits in tag byte */
338	1030		*op++ = LITERAL \| (n << 2);
339			/*
340			The vast majority of copies are below 16 bytes, for which a
341			call to memcpy is overkill. This fast path can sometimes
342			copy up to 15 bytes too much, but that is okay in the
343			main loop, since we have a bit to go on for both sides:
344			- The input will always have kInputMarginBytes = 15 extra
345			available bytes, as long as we're in the main loop, and
346			if not, allow_fast_path = false.
347			- The output will always have 32 spare bytes (see
348			snappy_max_compressed_length).
349			*/
350	1030	100	if (allow_fast_path && len <= 16) {
		50
351	1010		UnalignedCopy64(literal, op);
352	1010		UnalignedCopy64(literal + 8, op + 8);
353	1010		return op + len;
354			}
355			} else {
356			/* Encode in upcoming bytes */
357	0		char *base = op;
358	0		int count = 0;
359	0		op++;
360	0	0	while (n > 0) {
361	0		*op++ = n & 0xff;
362	0		n >>= 8;
363	0		count++;
364			}
365			DCHECK_GE(count, 1);
366			DCHECK_LE(count, 4);
367	0		*base = LITERAL \| ((59+count) << 2);
368			}
369	20		memcpy(op, literal, len);
370	20		return op + len;
371			}
372
373			static INLINE char*
374	8705		EmitCopyLessThan64(char *op, int offset, int len)
375			{
376			DCHECK_LE(len, 64);
377			DCHECK_GE(len, 4);
378			DCHECK_LT(offset, 65536);
379
380	8705	100	if ((len < 12) && (offset < 2048)) {
		50
381	165		int len_minus_4 = len - 4;
382			DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
383	165		*op++ = COPY_1_BYTE_OFFSET +
384	165		((len_minus_4) << 2) +
385	165		((offset >> 8) << 5);
386	165		*op++ = offset & 0xff;
387			} else {
388	8540		*op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
389	8540		put_unaligned_le16(offset, op);
390	8540		op += 2;
391			}
392	8705		return op;
393			}
394
395			static INLINE char*
396	1010		EmitCopy(char *op, int offset, int len)
397			{
398			/* Emit 64 byte copies but make sure to keep at least four bytes
399			* reserved */
400	8660	100	while (len >= 68) {
401	7650		op = EmitCopyLessThan64(op, offset, 64);
402	7650		len -= 64;
403			}
404
405			/* Emit an extra 60 byte copy if have too much data to fit in one
406			* copy */
407	1010	100	if (len > 64) {
408	45		op = EmitCopyLessThan64(op, offset, 60);
409	45		len -= 60;
410			}
411
412			/* Emit remainder */
413	1010		op = EmitCopyLessThan64(op, offset, len);
414	1010		return op;
415			}
416
417
418			/*
419			For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
420			equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
421			empirically found that overlapping loads such as
422			UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
423			are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
424
425			We have different versions for 64- and 32-bit; ideally we would avoid the
426			two functions and just INLINE the UNALIGNED_LOAD64 call into
427			GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
428			enough to avoid loading the value multiple times then. For 64-bit, the load
429			is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
430			done at GetUint32AtOffset() time.
431			*/
432
433			#if defined(__x86_64__) \|\| (__SIZEOF_SIZE_T__ == 8)
434
435			typedef uint64_t EightBytesReference;
436
437	0		static INLINE EightBytesReference GetEightBytesAt(const char* ptr) {
438	0		return UNALIGNED_LOAD64(ptr);
439			}
440
441	0		static INLINE uint32_t GetUint32AtOffset(uint64_t v, int offset) {
442			DCHECK_GE(offset, 0);
443			DCHECK_LE(offset, 4);
444			#if __BYTE_ORDER == __LITTLE_ENDIAN
445	0		return v >> (8 * offset);
446			#else
447			return v >> (32 - 8 * offset);
448			#endif
449			}
450
451			#else /* !ARCH_K8 */
452
453			typedef const char* EightBytesReference;
454
455			static INLINE EightBytesReference GetEightBytesAt(const char* ptr) {
456			return ptr;
457			}
458
459			static INLINE uint32_t GetUint32AtOffset(const char* v, int offset) {
460			DCHECK_GE(offset, 0);
461			DCHECK_LE(offset, 4);
462			return UNALIGNED_LOAD32(v + offset);
463			}
464
465			#endif /* !ARCH_K8 */
466
467
468			#define kInputMarginBytes 15
469			char*
470	1030		csnappy_compress_fragment(
471			const char *input,
472			const uint32_t input_size,
473			char *op,
474			void *working_memory,
475			const int workmem_bytes_power_of_two)
476			{
477			const char ip, ip_end, base_ip, next_emit, ip_limit, next_ip,
478			candidate, base;
479	1030		uint16_t table = (uint16_t )working_memory;
480			EightBytesReference input_bytes;
481			uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
482			int shift, matched;
483
484			DCHECK_GE(workmem_bytes_power_of_two, 9);
485			DCHECK_LE(workmem_bytes_power_of_two, 15);
486			/* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
487			* How many bits of 32bit hash function result are discarded? */
488	1030		shift = 33 - workmem_bytes_power_of_two;
489			/* "ip" is the input pointer, and "op" is the output pointer. */
490	1030		ip = input;
491			DCHECK_LE(input_size, kBlockSize);
492	1030		ip_end = input + input_size;
493	1030		base_ip = ip;
494			/* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
495			[next_emit, ip_end) after the main loop. */
496	1030		next_emit = ip;
497
498	1030	100	if (unlikely(input_size < kInputMarginBytes))
499	18		goto emit_remainder;
500
501	1012		memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
502
503	1012		ip_limit = input + input_size - kInputMarginBytes;
504	1012		next_hash = Hash(++ip, shift);
505
506	1012		main_loop:
507			DCHECK_LT(next_emit, ip);
508			/*
509			* The body of this loop calls EmitLiteral once and then EmitCopy one or
510			* more times. (The exception is that when we're close to exhausting
511			* the input we goto emit_remainder.)
512			*
513			* In the first iteration of this loop we're just starting, so
514			* there's nothing to copy, so calling EmitLiteral once is
515			* necessary. And we only start a new iteration when the
516			* current iteration has determined that a call to EmitLiteral will
517			* precede the next call to EmitCopy (if any).
518			*
519			* Step 1: Scan forward in the input looking for a 4-byte-long match.
520			* If we get close to exhausting the input then goto emit_remainder.
521			*
522			* Heuristic match skipping: If 32 bytes are scanned with no matches
523			* found, start looking only at every other byte. If 32 more bytes are
524			* scanned, look at every third byte, etc.. When a match is found,
525			* immediately go back to looking at every byte. This is a small loss
526			* (~5% performance, ~0.1% density) for compressible data due to more
527			* bookkeeping, but for non-compressible data (such as JPEG) it's a huge
528			* win since the compressor quickly "realizes" the data is incompressible
529			* and doesn't bother looking for matches everywhere.
530			*
531			* The "skip" variable keeps track of how many bytes there are since the
532			* last match; dividing it by 32 (ie. right-shifting by five) gives the
533			* number of bytes to move ahead for each iteration.
534			*/
535	1012		skip = 32;
536
537	1012		next_ip = ip;
538			do {
539	1012		ip = next_ip;
540	1012		hash = next_hash;
541			DCHECK_EQ(hash, Hash(ip, shift));
542	1012		next_ip = ip + (skip++ >> 5);
543	1012	100	if (unlikely(next_ip > ip_limit))
544	2		goto emit_remainder;
545	1010		next_hash = Hash(next_ip, shift);
546	1010		candidate = base_ip + table[hash];
547			DCHECK_GE(candidate, base_ip);
548			DCHECK_LT(candidate, ip);
549
550	1010		table[hash] = ip - base_ip;
551	1010	50	} while (likely(UNALIGNED_LOAD32(ip) !=
552			UNALIGNED_LOAD32(candidate)));
553
554			/*
555			* Step 2: A 4-byte match has been found. We'll later see if more
556			* than 4 bytes match. But, prior to the match, input
557			* bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
558			*/
559			DCHECK_LE(next_emit + 16, ip_end);
560	1010		op = EmitLiteral(op, next_emit, ip - next_emit, 1);
561
562			/*
563			* Step 3: Call EmitCopy, and then see if another EmitCopy could
564			* be our next move. Repeat until we find no match for the
565			* input immediately after what was consumed by the last EmitCopy call.
566			*
567			* If we exit this loop normally then we need to call EmitLiteral next,
568			* though we don't yet know how big the literal will be. We handle that
569			* by proceeding to the next iteration of the main loop. We also can exit
570			* this loop via goto if we get close to exhausting the input.
571			*/
572	1010		candidate_bytes = 0;
573
574			do {
575			/* We have a 4-byte match at ip, and no need to emit any
576			"literal bytes" prior to ip. */
577	1010		base = ip;
578	1010		matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
579	1010		ip += matched;
580			DCHECK_EQ(0, memcmp(base, candidate, matched));
581	1010		op = EmitCopy(op, base - candidate, matched);
582			/* We could immediately start working at ip now, but to improve
583			compression we first update table[Hash(ip - 1, ...)]. */
584	1010		next_emit = ip;
585	1010	50	if (unlikely(ip >= ip_limit))
586	1010		goto emit_remainder;
587	0		input_bytes = GetEightBytesAt(ip - 1);
588	0		prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
589	0		table[prev_hash] = ip - base_ip - 1;
590	0		cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
591	0		candidate = base_ip + table[cur_hash];
592	0		candidate_bytes = UNALIGNED_LOAD32(candidate);
593	0		table[cur_hash] = ip - base_ip;
594	0	0	} while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
595
596	0		next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
597	0		++ip;
598	0		goto main_loop;
599
600	1030		emit_remainder:
601			/* Emit the remaining bytes as a literal */
602	1030	100	if (next_emit < ip_end)
603	20		op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
604
605	1030		return op;
606			}
607			#endif /* !simple */
608			#if defined(__KERNEL__) && !defined(STATIC)
609			EXPORT_SYMBOL(csnappy_compress_fragment);
610			#endif
611
612			uint32_t __attribute__((const))
613	1030		csnappy_max_compressed_length(uint32_t source_len)
614			{
615	1030		return 32 + source_len + source_len/6;
616			}
617			#if defined(__KERNEL__) && !defined(STATIC)
618			EXPORT_SYMBOL(csnappy_max_compressed_length);
619			#endif
620
621			void
622	1030		csnappy_compress(
623			const char *input,
624			uint32_t input_length,
625			char *compressed,
626			uint32_t *compressed_length,
627			void *working_memory,
628			const int workmem_bytes_power_of_two)
629			{
630			int workmem_size;
631			int num_to_read;
632	1030		uint32_t written = 0;
633	1030		char *p = encode_varint32(compressed, input_length);
634	1030		written += (p - compressed);
635	1030		compressed = p;
636	2060	100	while (input_length > 0) {
637	1030		num_to_read = min(input_length, (uint32_t)kBlockSize);
638	1030		workmem_size = workmem_bytes_power_of_two;
639	1030	50	if (unlikely(num_to_read < kBlockSize)) {
640	1030		for (workmem_size = 9;
641	2314	50	workmem_size < workmem_bytes_power_of_two;
642	1284		++workmem_size) {
643	2314	100	if ((1 << (workmem_size-1)) >= num_to_read)
644	1030		break;
645			}
646			}
647	1030		p = csnappy_compress_fragment(
648			input, num_to_read, compressed,
649			working_memory, workmem_size);
650	1030		written += (p - compressed);
651	1030		compressed = p;
652	1030		input_length -= num_to_read;
653	1030		input += num_to_read;
654			}
655	1030		*compressed_length = written;
656	1030		}
657			#if defined(__KERNEL__) && !defined(STATIC)
658			EXPORT_SYMBOL(csnappy_compress);
659
660			MODULE_LICENSE("BSD");
661			MODULE_DESCRIPTION("Snappy Compressor");
662			#endif