File Coverage

src/csnappy_decompress.c
Criterion Covered Total %
statement 86 114 75.4
branch 43 66 65.1
condition n/a
subroutine n/a
pod n/a
total 129 180 71.6


line stmt bran cond sub pod time code
1             /*
2             Copyright 2011, Google Inc.
3             All rights reserved.
4              
5             Redistribution and use in source and binary forms, with or without
6             modification, are permitted provided that the following conditions are
7             met:
8              
9             * Redistributions of source code must retain the above copyright
10             notice, this list of conditions and the following disclaimer.
11             * Redistributions in binary form must reproduce the above
12             copyright notice, this list of conditions and the following disclaimer
13             in the documentation and/or other materials provided with the
14             distribution.
15             * Neither the name of Google Inc. nor the names of its
16             contributors may be used to endorse or promote products derived from
17             this software without specific prior written permission.
18              
19             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20             "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21             LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22             A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23             OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24             SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25             LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26             DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27             THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28             (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29             OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30              
31             File modified for the Linux Kernel by
32             Zeev Tarantov
33              
34             File modified for Sereal by
35             Steffen Mueller
36             */
37              
38             #include "csnappy_internal.h"
39             #ifdef __KERNEL__
40             #include
41             #include
42             #endif
43             #include "csnappy.h"
44              
45             int
46 1026           csnappy_get_uncompressed_length(
47             const char *src,
48             uint32_t src_len,
49             uint32_t *result)
50             {
51 1026           const char *src_base = src;
52 1026           uint32_t shift = 0;
53             uint8_t c;
54             /* Length is encoded in 1..5 bytes */
55 1026           *result = 0;
56             for (;;) {
57 1923 50         if (shift >= 32)
58 0           goto err_out;
59 1923 50         if (src_len == 0)
60 0           goto err_out;
61 1923           c = *(const uint8_t *)src++;
62 1923           src_len -= 1;
63 1923           *result |= (uint32_t)(c & 0x7f) << shift;
64 1923 100         if (c < 128)
65 1026           break;
66 897           shift += 7;
67             }
68 1026           return src - src_base;
69 0           err_out:
70 0           return CSNAPPY_E_HEADER_BAD;
71             }
72             #if defined(__KERNEL__) && !defined(STATIC)
73             EXPORT_SYMBOL(csnappy_get_uncompressed_length);
74             #endif
75              
76             #if defined(__arm__) && !defined(ARCH_ARM_HAVE_UNALIGNED)
77             int csnappy_decompress_noheader(
78             const char *src_,
79             uint32_t src_remaining,
80             char *dst,
81             uint32_t *dst_len)
82             {
83             const uint8_t * src = (const uint8_t *)src_;
84             const uint8_t * const src_end = src + src_remaining;
85             char * const dst_base = dst;
86             char * const dst_end = dst + *dst_len;
87             while (src < src_end) {
88             uint32_t opcode = *src++;
89             uint32_t length = (opcode >> 2) + 1;
90             const uint8_t *copy_src;
91             if (likely((opcode & 3) == 0)) {
92             if (unlikely(length > 60)) {
93             uint32_t extra_bytes = length - 60;
94             int shift, max_shift;
95             if (unlikely(src + extra_bytes > src_end))
96             return CSNAPPY_E_DATA_MALFORMED;
97             length = 0;
98             for (shift = 0, max_shift = extra_bytes*8;
99             shift < max_shift;
100             shift += 8)
101             length |= *src++ << shift;
102             ++length;
103             }
104             if (unlikely(src + length > src_end))
105             return CSNAPPY_E_DATA_MALFORMED;
106             copy_src = src;
107             src += length;
108             } else {
109             uint32_t offset;
110             if (likely((opcode & 3) == 1)) {
111             if (unlikely(src + 1 > src_end))
112             return CSNAPPY_E_DATA_MALFORMED;
113             length = ((length - 1) & 7) + 4;
114             offset = ((opcode >> 5) << 8) + *src++;
115             } else if (likely((opcode & 3) == 2)) {
116             if (unlikely(src + 2 > src_end))
117             return CSNAPPY_E_DATA_MALFORMED;
118             offset = src[0] | (src[1] << 8);
119             src += 2;
120             } else {
121             if (unlikely(src + 4 > src_end))
122             return CSNAPPY_E_DATA_MALFORMED;
123             offset = src[0] | (src[1] << 8) |
124             (src[2] << 16) | (src[3] << 24);
125             src += 4;
126             }
127             if (unlikely(!offset || (offset > dst - dst_base)))
128             return CSNAPPY_E_DATA_MALFORMED;
129             copy_src = (const uint8_t *)dst - offset;
130             }
131             if (unlikely(dst + length > dst_end))
132             return CSNAPPY_E_OUTPUT_OVERRUN;
133             do *dst++ = *copy_src++; while (--length);
134             }
135             *dst_len = dst - dst_base;
136             return CSNAPPY_E_OK;
137             }
138             #else /* !(arm with no unaligned access) */
139             /*
140             * Data stored per entry in lookup table:
141             * Range Bits-used Description
142             * ------------------------------------
143             * 1..64 0..7 Literal/copy length encoded in opcode byte
144             * 0..7 8..10 Copy offset encoded in opcode byte / 256
145             * 0..4 11..13 Extra bytes after opcode
146             *
147             * We use eight bits for the length even though 7 would have sufficed
148             * because of efficiency reasons:
149             * (1) Extracting a byte is faster than a bit-field
150             * (2) It properly aligns copy offset so we do not need a <<8
151             */
152             static const uint16_t char_table[256] = {
153             0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
154             0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
155             0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
156             0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
157             0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
158             0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
159             0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
160             0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
161             0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
162             0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
163             0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
164             0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
165             0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
166             0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
167             0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
168             0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
169             0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
170             0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
171             0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
172             0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
173             0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
174             0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
175             0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
176             0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
177             0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
178             0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
179             0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
180             0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
181             0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
182             0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
183             0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
184             0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
185             };
186              
187             /*
188             * Copy "len" bytes from "src" to "op", one byte at a time. Used for
189             * handling COPY operations where the input and output regions may
190             * overlap. For example, suppose:
191             * src == "ab"
192             * op == src + 2
193             * len == 20
194             * After IncrementalCopy(src, op, len), the result will have
195             * eleven copies of "ab"
196             * ababababababababababab
197             * Note that this does not match the semantics of either memcpy()
198             * or memmove().
199             */
200 1143           static INLINE void IncrementalCopy(const char *src, char *op, int len)
201             {
202             DCHECK_GT(len, 0);
203             do {
204 41736           *op++ = *src++;
205 41736 100         } while (--len > 0);
206 1143           }
207              
208             /*
209             * Equivalent to IncrementalCopy except that it can write up to ten extra
210             * bytes after the end of the copy, and that it is faster.
211             *
212             * The main part of this loop is a simple copy of eight bytes at a time until
213             * we've copied (at least) the requested amount of bytes. However, if op and
214             * src are less than eight bytes apart (indicating a repeating pattern of
215             * length < 8), we first need to expand the pattern in order to get the correct
216             * results. For instance, if the buffer looks like this, with the eight-byte
217             * and patterns marked as intervals:
218             *
219             * abxxxxxxxxxxxx
220             * [------] src
221             * [------] op
222             *
223             * a single eight-byte copy from to will repeat the pattern once,
224             * after which we can move two bytes without moving :
225             *
226             * ababxxxxxxxxxx
227             * [------] src
228             * [------] op
229             *
230             * and repeat the exercise until the two no longer overlap.
231             *
232             * This allows us to do very well in the special case of one single byte
233             * repeated many times, without taking a big hit for more general cases.
234             *
235             * The worst case of extra writing past the end of the match occurs when
236             * op - src == 1 and len == 1; the last copy will read from byte positions
237             * [0..7] and write to [4..11], whereas it was only supposed to write to
238             * position 1. Thus, ten excess bytes.
239             */
240             static const int kMaxIncrementCopyOverflow = 10;
241 7530           static INLINE void IncrementalCopyFastPath(const char *src, char *op, int len)
242             {
243 30120 100         while (op - src < 8) {
244 22590           UnalignedCopy64(src, op);
245 22590           len -= op - src;
246 22590           op += op - src;
247             }
248 67770 100         while (len > 0) {
249 60240           UnalignedCopy64(src, op);
250 60240           src += 8;
251 60240           op += 8;
252 60240           len -= 8;
253             }
254 7530           }
255              
256              
257             /* A type that writes to a flat array. */
258             struct SnappyArrayWriter {
259             char *base;
260             char *op;
261             char *op_limit;
262             };
263              
264             static INLINE int
265 757           SAW__AppendFastPath(struct SnappyArrayWriter *this,
266             const char *ip, uint32_t len)
267             {
268 757           char *op = this->op;
269 757           const uint32_t space_left = this->op_limit - op;
270 757 50         if (likely(space_left >= 16)) {
271 757           UnalignedCopy64(ip, op);
272 757           UnalignedCopy64(ip + 8, op + 8);
273             } else {
274 0 0         if (unlikely(space_left < len))
275 0           return CSNAPPY_E_OUTPUT_OVERRUN;
276 0           memcpy(op, ip, len);
277             }
278 757           this->op = op + len;
279 757           return CSNAPPY_E_OK;
280             }
281              
282             static INLINE int
283 269           SAW__Append(struct SnappyArrayWriter *this,
284             const char *ip, uint32_t len)
285             {
286 269           char *op = this->op;
287 269           const uint32_t space_left = this->op_limit - op;
288 269 50         if (unlikely(space_left < len))
289 0           return CSNAPPY_E_OUTPUT_OVERRUN;
290 269           memcpy(op, ip, len);
291 269           this->op = op + len;
292 269           return CSNAPPY_E_OK;
293             }
294              
295             static INLINE int
296 8673           SAW__AppendFromSelf(struct SnappyArrayWriter *this,
297             uint32_t offset, uint32_t len)
298             {
299 8673           char *op = this->op;
300 8673           const uint32_t space_left = this->op_limit - op;
301             /* -1u catches offset==0 */
302 8673 50         if (op - this->base <= offset - 1u)
303 0           return CSNAPPY_E_DATA_MALFORMED;
304             /* Fast path, used for the majority (70-80%) of dynamic invocations. */
305 8673 100         if (len <= 16 && offset >= 8 && space_left >= 16) {
    50          
    0          
306 0           UnalignedCopy64(op - offset, op);
307 0           UnalignedCopy64(op - offset + 8, op + 8);
308 8673 100         } else if (space_left >= (len + kMaxIncrementCopyOverflow)) {
309 7530           IncrementalCopyFastPath(op - offset, op, len);
310             } else {
311 1143 50         if (space_left < len)
312 0           return CSNAPPY_E_OUTPUT_OVERRUN;
313 1143           IncrementalCopy(op - offset, op, len);
314             }
315 8673           this->op = op + len;
316 8673           return CSNAPPY_E_OK;
317             }
318              
319             int
320 1026           csnappy_decompress_noheader(
321             const char *src,
322             uint32_t src_remaining,
323             char *dst,
324             uint32_t *dst_len)
325             {
326             struct SnappyArrayWriter writer;
327 1026           const char *end_minus5 = src + src_remaining - 5;
328             uint32_t length, trailer, opword, extra_bytes;
329             int ret, available;
330             uint8_t opcode;
331             char scratch[5];
332 1026           writer.op = writer.base = dst;
333 1026           writer.op_limit = writer.op + *dst_len;
334             #define LOOP_COND() \
335             if (unlikely(src >= end_minus5)) { \
336             available = end_minus5 + 5 - src; \
337             if (unlikely(available <= 0)) \
338             goto out; \
339             memmove(scratch, src, available); \
340             src = scratch; \
341             end_minus5 = scratch + available - 5; \
342             }
343            
344 1026 100         LOOP_COND();
    50          
345             for (;;) {
346 9699           opcode = *(const uint8_t *)src++;
347 9699 100         if (opcode & 0x3) {
348 8673           opword = char_table[opcode];
349 8673           extra_bytes = opword >> 11;
350 8673           trailer = get_unaligned_le(src, extra_bytes);
351 8673           length = opword & 0xff;
352 8673           src += extra_bytes;
353 8673           trailer += opword & 0x700;
354 8673           ret = SAW__AppendFromSelf(&writer, trailer, length);
355 8673 50         if (ret < 0)
356 0           return ret;
357 8673 100         LOOP_COND();
    100          
358             } else {
359 1026           length = (opcode >> 2) + 1;
360 1026           available = end_minus5 + 5 - src;
361 1026 50         if (length <= 16 && available >= 16) {
    100          
362 757 50         if ((ret = SAW__AppendFastPath(&writer, src, length)) < 0)
363 0           return ret;
364 757           src += length;
365 757 100         LOOP_COND();
    50          
366 756           continue;
367             }
368 269 50         if (unlikely(length > 60)) {
369 0           extra_bytes = length - 60;
370 0           length = get_unaligned_le(src, extra_bytes) + 1;
371 0           src += extra_bytes;
372 0           available = end_minus5 + 5 - src;
373             }
374 269 50         if (unlikely(available < (int32_t)length))
375 0           return CSNAPPY_E_DATA_MALFORMED;
376 269           ret = SAW__Append(&writer, src, length);
377 269 50         if (ret < 0)
378 0           return ret;
379 269           src += length;
380 269 100         LOOP_COND();
    100          
381             }
382             }
383             #undef LOOP_COND
384 1026           out:
385 1026           *dst_len = writer.op - writer.base;
386 1026           return CSNAPPY_E_OK;
387             }
388             #endif /* optimized for unaligned arch */
389              
390             #if defined(__KERNEL__) && !defined(STATIC)
391             EXPORT_SYMBOL(csnappy_decompress_noheader);
392             #endif
393              
394             int
395 0           csnappy_decompress(
396             const char *src,
397             uint32_t src_len,
398             char *dst,
399             uint32_t dst_len)
400             {
401             int n;
402 0           uint32_t olen = 0;
403             /* Read uncompressed length from the front of the compressed input */
404 0           n = csnappy_get_uncompressed_length(src, src_len, &olen);
405 0 0         if (unlikely(n < CSNAPPY_E_OK))
406 0           return n;
407             /* Protect against possible DoS attack */
408 0 0         if (unlikely(olen > dst_len))
409 0           return CSNAPPY_E_OUTPUT_INSUF;
410 0           return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen);
411             }
412             #if defined(__KERNEL__) && !defined(STATIC)
413             EXPORT_SYMBOL(csnappy_decompress);
414              
415             MODULE_LICENSE("BSD");
416             MODULE_DESCRIPTION("Snappy Decompressor");
417             #endif