1 /********************************************************************** 2 Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Intel Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 **********************************************************************/ 29 30 #include <stdint.h> 31 #include <stdlib.h> 32 #include <assert.h> 33 #include "igzip_lib.h" 34 #include "unaligned.h" 35 36 #if __x86_64__ || __i386__ || _M_X64 || _M_IX86 37 #ifdef _MSC_VER 38 # include <intrin.h> 39 # define inline __inline 40 #else 41 # include <x86intrin.h> 42 #endif 43 #else 44 # define inline __inline 45 #endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 46 47 static inline uint32_t bsr(uint32_t val) 48 { 49 uint32_t msb; 50 #ifdef __LZCNT__ 51 msb = 32 - __lzcnt32(val); 52 #else 53 for(msb = 0; val > 0; val >>= 1) 54 msb++; 55 #endif 56 return msb; 57 } 58 59 static inline uint32_t tzbytecnt(uint64_t val) 60 { 61 uint32_t cnt; 62 63 #ifdef __BMI__ 64 cnt = __tzcnt_u64(val); 65 cnt = cnt / 8; 66 #elif defined(__x86_64__) 67 68 cnt = (val == 0)? 64 : __builtin_ctzll(val); 69 cnt = cnt / 8; 70 71 #else 72 for(cnt = 8; val > 0; val <<= 8) 73 cnt -= 1; 74 #endif 75 return cnt; 76 } 77 78 static void compute_dist_code(struct isal_hufftables *hufftables, uint16_t dist, uint64_t *p_code, uint64_t *p_len) 79 { 80 assert(dist > IGZIP_DIST_TABLE_SIZE); 81 82 dist -= 1; 83 uint32_t msb; 84 uint32_t num_extra_bits; 85 uint32_t extra_bits; 86 uint32_t sym; 87 uint32_t len; 88 uint32_t code; 89 90 msb = bsr(dist); 91 assert(msb >= 1); 92 num_extra_bits = msb - 2; 93 extra_bits = dist & ((1 << num_extra_bits) - 1); 94 dist >>= num_extra_bits; 95 sym = dist + 2 * num_extra_bits; 96 assert(sym < 30); 97 code = hufftables->dcodes[sym - IGZIP_DECODE_OFFSET]; 98 len = hufftables->dcodes_sizes[sym - IGZIP_DECODE_OFFSET]; 99 *p_code = code | (extra_bits << len); 100 *p_len = len + num_extra_bits; 101 } 102 103 static inline void get_dist_code(struct isal_hufftables *hufftables, uint32_t dist, uint64_t *code, uint64_t *len) 104 { 105 if (dist < 1) 106 dist = 0; 107 assert(dist >= 1); 108 assert(dist <= 32768); 109 if (dist <= IGZIP_DIST_TABLE_SIZE) { 110 uint64_t code_len; 111 code_len = hufftables->dist_table[dist - 1]; 112 *code = code_len >> 5; 113 *len = code_len & 0x1F; 114 } else { 115 compute_dist_code(hufftables, dist, code, len); 116 } 117 } 118 119 static inline void get_len_code(struct isal_hufftables *hufftables, uint32_t length, uint64_t *code, uint64_t *len) 120 { 121 assert(length >= 3); 122 assert(length <= 258); 123 124 uint64_t code_len; 125 code_len = hufftables->len_table[length - 3]; 126 *code = code_len >> 5; 127 *len = code_len & 0x1F; 128 } 129 130 static inline void get_lit_code(struct isal_hufftables *hufftables, uint32_t lit, uint64_t *code, uint64_t *len) 131 { 132 assert(lit <= 256); 133 134 *code = hufftables->lit_table[lit]; 135 *len = hufftables->lit_table_sizes[lit]; 136 } 137 138 static void compute_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits) 139 { 140 uint32_t msb; 141 uint32_t num_extra_bits; 142 143 dist -= 1; 144 msb = bsr(dist); 145 assert(msb >= 1); 146 num_extra_bits = msb - 2; 147 *extra_bits = dist & ((1 << num_extra_bits) - 1); 148 dist >>= num_extra_bits; 149 *code = dist + 2 * num_extra_bits; 150 assert(*code < 30); 151 } 152 153 static inline void get_dist_icf_code(uint32_t dist, uint32_t *code, uint32_t *extra_bits) 154 { 155 assert(dist >= 1); 156 assert(dist <= 32768); 157 if (dist <= 2) { 158 *code = dist - 1; 159 *extra_bits = 0; 160 } else { 161 compute_dist_icf_code(dist, code, extra_bits); 162 } 163 } 164 165 static inline void get_len_icf_code(uint32_t length, uint32_t *code) 166 { 167 assert(length >= 3); 168 assert(length <= 258); 169 170 *code = length + 254; 171 } 172 173 static inline void get_lit_icf_code(uint32_t lit, uint32_t *code) 174 { 175 assert(lit <= 256); 176 177 *code = lit; 178 } 179 180 /** 181 * @brief Returns a hash of the first 3 bytes of input data. 182 */ 183 static inline uint32_t compute_hash(uint32_t data) 184 { 185 #ifdef __SSE4_2__ 186 187 return _mm_crc32_u32(0, data); 188 189 #else 190 uint64_t hash; 191 /* Use multiplication to create a hash, 0xBDD06057 is a prime number */ 192 hash = data; 193 hash *= 0xB2D06057; 194 hash >>= 16; 195 hash *= 0xB2D06057; 196 hash >>= 16; 197 198 return hash; 199 200 #endif /* __SSE4_2__ */ 201 } 202 203 #define PROD1 0xFFFFE84B 204 #define PROD2 0xFFFF97B1 205 static inline uint32_t compute_hash_mad(uint32_t data) 206 { 207 int16_t data_low; 208 int16_t data_high; 209 210 data_low = data; 211 data_high = data >> 16; 212 data = PROD1 * data_low + PROD2 * data_high; 213 214 data_low = data; 215 data_high = data >> 16; 216 data = PROD1 * data_low + PROD2 * data_high; 217 218 return data; 219 } 220 221 static inline uint32_t compute_long_hash(uint64_t data) { 222 223 return compute_hash(data >> 32)^compute_hash(data); 224 } 225 226 /** 227 * @brief Returns how long str1 and str2 have the same symbols. 228 * @param str1: First input string. 229 * @param str2: Second input string. 230 * @param max_length: length of the smaller string. 231 */ 232 static inline int compare258(uint8_t * str1, uint8_t * str2, uint32_t max_length) 233 { 234 uint32_t count; 235 uint64_t test; 236 uint64_t loop_length; 237 238 if(max_length > 258) 239 max_length = 258; 240 241 loop_length = max_length & ~0x7; 242 243 for(count = 0; count < loop_length; count += 8){ 244 test = load_u64(str1); 245 test ^= load_u64(str2); 246 if(test != 0) 247 return count + tzbytecnt(test); 248 str1 += 8; 249 str2 += 8; 250 } 251 252 switch(max_length % 8){ 253 254 case 7: 255 if(*str1++ != *str2++) 256 return count; 257 count++; 258 case 6: 259 if(*str1++ != *str2++) 260 return count; 261 count++; 262 case 5: 263 if(*str1++ != *str2++) 264 return count; 265 count++; 266 case 4: 267 if(*str1++ != *str2++) 268 return count; 269 count++; 270 case 3: 271 if(*str1++ != *str2++) 272 return count; 273 count++; 274 case 2: 275 if(*str1++ != *str2++) 276 return count; 277 count++; 278 case 1: 279 if(*str1 != *str2) 280 return count; 281 count++; 282 } 283 284 return count; 285 } 286 287 /** 288 * @brief Returns how long str1 and str2 have the same symbols. 289 * @param str1: First input string. 290 * @param str2: Second input string. 291 * @param max_length: length of the smaller string. 292 */ 293 static inline int compare(uint8_t * str1, uint8_t * str2, uint32_t max_length) 294 { 295 uint32_t count; 296 uint64_t test; 297 uint64_t loop_length; 298 299 loop_length = max_length & ~0x7; 300 301 for(count = 0; count < loop_length; count += 8){ 302 test = load_u64(str1); 303 test ^= load_u64(str2); 304 if(test != 0) 305 return count + tzbytecnt(test); 306 str1 += 8; 307 str2 += 8; 308 } 309 310 switch(max_length % 8){ 311 312 case 7: 313 if(*str1++ != *str2++) 314 return count; 315 count++; 316 case 6: 317 if(*str1++ != *str2++) 318 return count; 319 count++; 320 case 5: 321 if(*str1++ != *str2++) 322 return count; 323 count++; 324 case 4: 325 if(*str1++ != *str2++) 326 return count; 327 count++; 328 case 3: 329 if(*str1++ != *str2++) 330 return count; 331 count++; 332 case 2: 333 if(*str1++ != *str2++) 334 return count; 335 count++; 336 case 1: 337 if(*str1 != *str2) 338 return count; 339 count++; 340 } 341 342 return count; 343 } 344