1 /**********************************************************************
2 Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29
30 #include <stdint.h>
31 #include <stdlib.h>
32 #include <assert.h>
33 #include "igzip_lib.h"
34 #include "unaligned.h"
35
36 #if __x86_64__ || __i386__ || _M_X64 || _M_IX86
37 #ifdef _MSC_VER
38 #include <intrin.h>
39 #define inline __inline
40 #else
41 #include <x86intrin.h>
42 #endif
43 #else
44 #define inline __inline
45 #endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86
46
47 /**
48 * @brief Calculate the bit offset of the msb.
49 * @param val 32-bit unsigned integer input
50 *
51 * @returns bit offset of msb starting at 1 for first bit
52 */
53 static inline uint32_t
bsr(uint32_t val)54 bsr(uint32_t val)
55 {
56 uint32_t msb;
57 #if defined(_MSC_VER)
58 unsigned long ret = 0;
59 if (val != 0) {
60 _BitScanReverse(&ret, val);
61 msb = ret + 1;
62 } else
63 msb = 0;
64 #elif defined(__LZCNT__)
65 msb = 32 - __lzcnt32(val);
66 #elif defined(__x86_64__) || defined(__aarch64__)
67 msb = (val == 0) ? 0 : 32 - __builtin_clz(val);
68 #else
69 for (msb = 0; val > 0; val >>= 1)
70 msb++;
71 #endif
72 return msb;
73 }
74
75 static inline uint32_t
tzbytecnt(uint64_t val)76 tzbytecnt(uint64_t val)
77 {
78 uint32_t cnt;
79
80 #ifdef __BMI__
81 cnt = __tzcnt_u64(val);
82 cnt = cnt / 8;
83 #elif defined(__x86_64__) || defined(__aarch64__)
84
85 cnt = (val == 0) ? 64 : __builtin_ctzll(val);
86 cnt = cnt / 8;
87
88 #else
89 for (cnt = 8; val > 0; val <<= 8)
90 cnt -= 1;
91 #endif
92 return cnt;
93 }
94
95 static void
compute_dist_code(struct isal_hufftables * hufftables,uint16_t dist,uint64_t * p_code,uint64_t * p_len)96 compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code,
97 uint64_t *p_len)
98 {
99 assert(dist > IGZIP_DIST_TABLE_SIZE);
100
101 dist -= 1;
102 uint32_t msb;
103 uint32_t num_extra_bits;
104 uint32_t extra_bits;
105 uint32_t sym;
106 uint32_t len;
107 uint32_t code;
108
109 msb = bsr(dist);
110 assert(msb >= 2);
111 num_extra_bits = msb - 2;
112 extra_bits = dist & ((1 << num_extra_bits) - 1);
113 dist >>= num_extra_bits;
114 sym = dist + 2 * num_extra_bits;
115 assert(sym < 30);
116 code = hufftables->dcodes[sym - IGZIP_DECODE_OFFSET];
117 len = hufftables->dcodes_sizes[sym - IGZIP_DECODE_OFFSET];
118 *p_code = code | (extra_bits << len);
119 *p_len = len + num_extra_bits;
120 }
121
122 static inline void
get_dist_code(struct isal_hufftables * hufftables,uint32_t dist,uint64_t * code,uint64_t * len)123 get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len)
124 {
125 assert(dist >= 1);
126 assert(dist <= 32768);
127 if (dist <= IGZIP_DIST_TABLE_SIZE) {
128 uint64_t code_len;
129 code_len = hufftables->dist_table[dist - 1];
130 *code = code_len >> 5;
131 *len = code_len & 0x1F;
132 } else {
133 compute_dist_code(hufftables, dist, code, len);
134 }
135 }
136
137 static inline void
get_len_code(struct isal_hufftables * hufftables,uint32_t length,uint64_t * code,uint64_t * len)138 get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len)
139 {
140 assert(length >= 3);
141 assert(length <= 258);
142
143 uint64_t code_len;
144 code_len = hufftables->len_table[length - 3];
145 *code = code_len >> 5;
146 *len = code_len & 0x1F;
147 }
148
149 static inline void
get_lit_code(struct isal_hufftables * hufftables,uint32_t lit,uint64_t * code,uint64_t * len)150 get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len)
151 {
152 assert(lit <= 256);
153
154 *code = hufftables->lit_table[lit];
155 *len = hufftables->lit_table_sizes[lit];
156 }
157
158 static void
compute_dist_icf_code(uint32_t dist,uint32_t * code,uint32_t * extra_bits)159 compute_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
160 {
161 uint32_t msb;
162 uint32_t num_extra_bits;
163
164 dist -= 1;
165 msb = bsr(dist);
166 assert(msb >= 2);
167 num_extra_bits = msb - 2;
168 *extra_bits = dist & ((1 << num_extra_bits) - 1);
169 dist >>= num_extra_bits;
170 *code = dist + 2 * num_extra_bits;
171 assert(*code < 30);
172 }
173
174 static inline void
get_dist_icf_code(uint32_t dist,uint32_t * code,uint32_t * extra_bits)175 get_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits)
176 {
177 assert(dist >= 1);
178 assert(dist <= 32768);
179 if (dist <= 2) {
180 *code = dist - 1;
181 *extra_bits = 0;
182 } else {
183 compute_dist_icf_code(dist, code, extra_bits);
184 }
185 }
186
187 static inline void
get_len_icf_code(uint32_t length,uint32_t * code)188 get_len_icf_code(uint32_t length, uint32_t *code)
189 {
190 assert(length >= 3);
191 assert(length <= 258);
192
193 *code = length + 254;
194 }
195
196 static inline void
get_lit_icf_code(uint32_t lit,uint32_t * code)197 get_lit_icf_code(uint32_t lit, uint32_t *code)
198 {
199 assert(lit <= 256);
200
201 *code = lit;
202 }
203
204 /**
205 * @brief Returns a hash of the first 3 bytes of input data.
206 */
207 static inline uint32_t
compute_hash(uint32_t data)208 compute_hash(uint32_t data)
209 {
210 #ifdef __SSE4_2__
211
212 return _mm_crc32_u32(0, data);
213
214 #else
215 uint64_t hash;
216 /* Use multiplication to create a hash, 0xBDD06057 is a prime number */
217 hash = data;
218 hash *= 0xB2D06057;
219 hash >>= 16;
220 hash *= 0xB2D06057;
221 hash >>= 16;
222
223 return hash;
224
225 #endif /* __SSE4_2__ */
226 }
227
228 #define PROD1 0xFFFFE84B
229 #define PROD2 0xFFFF97B1
230 static inline uint32_t
compute_hash_mad(uint32_t data)231 compute_hash_mad(uint32_t data)
232 {
233 int16_t data_low;
234 int16_t data_high;
235
236 data_low = data;
237 data_high = data >> 16;
238 data = PROD1 * data_low + PROD2 * data_high;
239
240 data_low = data;
241 data_high = data >> 16;
242 data = PROD1 * data_low + PROD2 * data_high;
243
244 return data;
245 }
246
247 static inline uint32_t
compute_long_hash(uint64_t data)248 compute_long_hash(uint64_t data)
249 {
250
251 return compute_hash(data >> 32) ^ compute_hash(data);
252 }
253
254 /**
255 * @brief Returns how long str1 and str2 have the same symbols.
256 * @param str1: First input string.
257 * @param str2: Second input string.
258 * @param max_length: length of the smaller string.
259 */
260 static inline int
compare258(uint8_t * str1,uint8_t * str2,uint32_t max_length)261 compare258(uint8_t *str1, uint8_t *str2, uint32_t max_length)
262 {
263 uint32_t count;
264 uint64_t test;
265 uint64_t loop_length;
266
267 if (max_length > 258)
268 max_length = 258;
269
270 loop_length = max_length & ~0x7;
271
272 for (count = 0; count < loop_length; count += 8) {
273 test = load_le_u64(str1);
274 test ^= load_le_u64(str2);
275 if (test != 0)
276 return count + tzbytecnt(test);
277 str1 += 8;
278 str2 += 8;
279 }
280
281 switch (max_length % 8) {
282
283 case 7:
284 if (*str1++ != *str2++)
285 return count;
286 count++;
287 case 6:
288 if (*str1++ != *str2++)
289 return count;
290 count++;
291 case 5:
292 if (*str1++ != *str2++)
293 return count;
294 count++;
295 case 4:
296 if (*str1++ != *str2++)
297 return count;
298 count++;
299 case 3:
300 if (*str1++ != *str2++)
301 return count;
302 count++;
303 case 2:
304 if (*str1++ != *str2++)
305 return count;
306 count++;
307 case 1:
308 if (*str1 != *str2)
309 return count;
310 count++;
311 }
312
313 return count;
314 }
315
316 /**
317 * @brief Returns how long str1 and str2 have the same symbols.
318 * @param str1: First input string.
319 * @param str2: Second input string.
320 * @param max_length: length of the smaller string.
321 */
322 static inline int
compare(uint8_t * str1,uint8_t * str2,uint32_t max_length)323 compare(uint8_t *str1, uint8_t *str2, uint32_t max_length)
324 {
325 uint32_t count;
326 uint64_t test;
327 uint64_t loop_length;
328
329 loop_length = max_length & ~0x7;
330
331 for (count = 0; count < loop_length; count += 8) {
332 test = load_le_u64(str1);
333 test ^= load_le_u64(str2);
334 if (test != 0)
335 return count + tzbytecnt(test);
336 str1 += 8;
337 str2 += 8;
338 }
339
340 switch (max_length % 8) {
341
342 case 7:
343 if (*str1++ != *str2++)
344 return count;
345 count++;
346 case 6:
347 if (*str1++ != *str2++)
348 return count;
349 count++;
350 case 5:
351 if (*str1++ != *str2++)
352 return count;
353 count++;
354 case 4:
355 if (*str1++ != *str2++)
356 return count;
357 count++;
358 case 3:
359 if (*str1++ != *str2++)
360 return count;
361 count++;
362 case 2:
363 if (*str1++ != *str2++)
364 return count;
365 count++;
366 case 1:
367 if (*str1 != *str2)
368 return count;
369 count++;
370 }
371
372 return count;
373 }
374