1 /********************************************************************** 2 Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Intel Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 **********************************************************************/ 29 30 #include <immintrin.h> 31 #include <stdint.h> 32 #include <string.h> 33 #include <assert.h> 34 #include "igzip_lib.h" 35 #include "huff_codes.h" 36 #include "huffman.h" 37 #include "bitbuf2.h" 38 #include "flatten_ll.h" 39 40 /* The order code length codes are written in the dynamic code header. This is 41 * defined in RFC 1951 page 13 */ 42 static const uint8_t code_length_code_order[] = 43 { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; 44 45 struct slver { 46 uint16_t snum; 47 uint8_t ver; 48 uint8_t core; 49 }; 50 51 /* Version info */ 52 struct slver isal_update_histogram_slver_00010085; 53 struct slver isal_update_histogram_slver = { 0x0085, 0x01, 0x00 }; 54 55 struct slver isal_create_hufftables_slver_00010086; 56 struct slver isal_create_hufftables_slver = { 0x0086, 0x01, 0x00 }; 57 58 struct slver isal_create_hufftables_subset_slver_00010087; 59 struct slver isal_create_hufftables_subset_slver = { 0x0087, 0x01, 0x00 }; 60 61 extern uint32_t build_huff_tree(struct heap_tree *heap, uint64_t heap_size, uint64_t node_ptr); 62 extern void build_heap_asm(uint64_t * heap, uint64_t heap_size); 63 64 static const uint8_t bitrev8[0x100] = { 65 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 66 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 67 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 68 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 69 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 70 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 71 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 72 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 73 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 74 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 75 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 76 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 77 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 78 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 79 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 80 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 81 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 82 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 83 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 84 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 85 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 86 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 87 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 88 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 89 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 90 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 91 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 92 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 93 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 94 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 95 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 96 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF 97 }; 98 99 // bit reverse low order LENGTH bits in code, and return result in low order bits 100 static inline uint16_t bit_reverse(uint16_t code, uint32_t length) 101 { 102 code = (bitrev8[code & 0x00FF] << 8) | (bitrev8[code >> 8]); 103 return (code >> (16 - length)); 104 } 105 106 void isal_update_histogram_base(uint8_t * start_stream, int length, 107 struct isal_huff_histogram *histogram) 108 { 109 uint32_t literal = 0, hash; 110 uint16_t seen, *last_seen = histogram->hash_table; 111 uint8_t *current, *end_stream, *next_hash, *end; 112 uint32_t match_length; 113 uint32_t dist; 114 uint64_t *lit_len_histogram = histogram->lit_len_histogram; 115 uint64_t *dist_histogram = histogram->dist_histogram; 116 117 if (length <= 0) 118 return; 119 120 end_stream = start_stream + length; 121 memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */ 122 for (current = start_stream; current < end_stream - 3; current++) { 123 literal = *(uint32_t *) current; 124 hash = compute_hash(literal) & HASH_MASK; 125 seen = last_seen[hash]; 126 last_seen[hash] = ((uint64_t) current - (uint64_t) start_stream) & 0xFFFF; 127 dist = ((uint64_t) current - (uint64_t) start_stream - seen) & 0xFFFF; 128 if (dist - 1 < D - 1) { 129 assert(start_stream <= current - dist); 130 match_length = 131 compare258(current - dist, current, end_stream - current); 132 if (match_length >= SHORTEST_MATCH) { 133 next_hash = current; 134 #ifdef ISAL_LIMIT_HASH_UPDATE 135 end = next_hash + 3; 136 #else 137 end = next_hash + match_length; 138 #endif 139 if (end > end_stream - 3) 140 end = end_stream - 3; 141 next_hash++; 142 for (; next_hash < end; next_hash++) { 143 literal = *(uint32_t *) next_hash; 144 hash = compute_hash(literal) & HASH_MASK; 145 last_seen[hash] = 146 ((uint64_t) next_hash - 147 (uint64_t) start_stream) & 0xFFFF; 148 } 149 150 dist_histogram[convert_dist_to_dist_sym(dist)] += 1; 151 lit_len_histogram[convert_length_to_len_sym(match_length)] += 152 1; 153 current += match_length - 1; 154 continue; 155 } 156 } 157 lit_len_histogram[literal & 0xFF] += 1; 158 } 159 literal = literal >> 8; 160 hash = compute_hash(literal) & HASH_MASK; 161 seen = last_seen[hash]; 162 last_seen[hash] = ((uint64_t) current - (uint64_t) start_stream) & 0xFFFF; 163 dist = ((uint64_t) current - (uint64_t) start_stream - seen) & 0xFFFF; 164 if (dist < D) { 165 match_length = compare258(current - dist, current, end_stream - current); 166 if (match_length >= SHORTEST_MATCH) { 167 dist_histogram[convert_dist_to_dist_sym(dist)] += 1; 168 lit_len_histogram[convert_length_to_len_sym(match_length)] += 1; 169 lit_len_histogram[256] += 1; 170 return; 171 } 172 } else 173 lit_len_histogram[literal & 0xFF] += 1; 174 lit_len_histogram[(literal >> 8) & 0xFF] += 1; 175 lit_len_histogram[(literal >> 16) & 0xFF] += 1; 176 lit_len_histogram[256] += 1; 177 return; 178 } 179 180 uint32_t convert_dist_to_dist_sym(uint32_t dist) 181 { 182 assert(dist <= 32768 && dist > 0); 183 if (dist <= 2) 184 return dist - 1; 185 else if (dist <= 4) 186 return 0 + (dist - 1) / 1; 187 else if (dist <= 8) 188 return 2 + (dist - 1) / 2; 189 else if (dist <= 16) 190 return 4 + (dist - 1) / 4; 191 else if (dist <= 32) 192 return 6 + (dist - 1) / 8; 193 else if (dist <= 64) 194 return 8 + (dist - 1) / 16; 195 else if (dist <= 128) 196 return 10 + (dist - 1) / 32; 197 else if (dist <= 256) 198 return 12 + (dist - 1) / 64; 199 else if (dist <= 512) 200 return 14 + (dist - 1) / 128; 201 else if (dist <= 1024) 202 return 16 + (dist - 1) / 256; 203 else if (dist <= 2048) 204 return 18 + (dist - 1) / 512; 205 else if (dist <= 4096) 206 return 20 + (dist - 1) / 1024; 207 else if (dist <= 8192) 208 return 22 + (dist - 1) / 2048; 209 else if (dist <= 16384) 210 return 24 + (dist - 1) / 4096; 211 else if (dist <= 32768) 212 return 26 + (dist - 1) / 8192; 213 else 214 return ~0; /* ~0 is an invalid distance code */ 215 216 } 217 218 uint32_t convert_length_to_len_sym(uint32_t length) 219 { 220 assert(length > 2 && length < 259); 221 222 /* Based on tables on page 11 in RFC 1951 */ 223 if (length < 11) 224 return 257 + length - 3; 225 else if (length < 19) 226 return 261 + (length - 3) / 2; 227 else if (length < 35) 228 return 265 + (length - 3) / 4; 229 else if (length < 67) 230 return 269 + (length - 3) / 8; 231 else if (length < 131) 232 return 273 + (length - 3) / 16; 233 else if (length < 258) 234 return 277 + (length - 3) / 32; 235 else 236 return 285; 237 } 238 239 // Upon return, codes[] contains the code lengths, 240 // and bl_count is the count of the lengths 241 242 /* Init heap with the histogram, and return the histogram size */ 243 static inline uint32_t init_heap16(struct heap_tree *heap_space, uint16_t * histogram, 244 uint32_t hist_size) 245 { 246 uint32_t heap_size, i; 247 248 memset(heap_space, 0, sizeof(struct heap_tree)); 249 250 heap_size = 0; 251 for (i = 0; i < hist_size; i++) { 252 if (histogram[i] != 0) 253 heap_space->heap[++heap_size] = 254 (((uint64_t) histogram[i]) << FREQ_SHIFT) | i; 255 } 256 257 // make sure heap has at least two elements in it 258 if (heap_size < 2) { 259 if (heap_size == 0) { 260 heap_space->heap[1] = 1ULL << FREQ_SHIFT; 261 heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; 262 heap_size = 2; 263 } else { 264 // heap size == 1 265 if (histogram[0] == 0) 266 heap_space->heap[2] = 1ULL << FREQ_SHIFT; 267 else 268 heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; 269 heap_size = 2; 270 } 271 } 272 273 build_heap_asm(heap_space->heap, heap_size); 274 275 return heap_size; 276 } 277 278 static inline uint32_t init_heap64(struct heap_tree *heap_space, uint64_t * histogram, 279 uint64_t hist_size) 280 { 281 uint32_t heap_size, i; 282 283 memset(heap_space, 0, sizeof(struct heap_tree)); 284 285 heap_size = 0; 286 for (i = 0; i < hist_size; i++) { 287 if (histogram[i] != 0) 288 heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; 289 } 290 291 // make sure heap has at least two elements in it 292 if (heap_size < 2) { 293 if (heap_size == 0) { 294 heap_space->heap[1] = 1ULL << FREQ_SHIFT; 295 heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; 296 heap_size = 2; 297 } else { 298 // heap size == 1 299 if (histogram[0] == 0) 300 heap_space->heap[2] = 1ULL << FREQ_SHIFT; 301 else 302 heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; 303 heap_size = 2; 304 } 305 } 306 307 build_heap_asm(heap_space->heap, heap_size); 308 309 return heap_size; 310 } 311 312 static inline uint32_t init_heap64_complete(struct heap_tree *heap_space, uint64_t * histogram, 313 uint64_t hist_size) 314 { 315 uint32_t heap_size, i; 316 317 memset(heap_space, 0, sizeof(struct heap_tree)); 318 319 heap_size = 0; 320 for (i = 0; i < hist_size; i++) 321 heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; 322 323 build_heap_asm(heap_space->heap, heap_size); 324 325 return heap_size; 326 } 327 328 static inline uint32_t fix_code_lens(struct heap_tree *heap_space, uint32_t root_node, 329 uint32_t * bl_count, uint32_t max_code_len) 330 { 331 struct tree_node *tree = heap_space->tree; 332 uint64_t *code_len_count = heap_space->code_len_count; 333 uint32_t i, j, k, child, depth, code_len; 334 335 // compute code lengths and code length counts 336 code_len = 0; 337 j = root_node; 338 for (i = root_node; i <= HEAP_TREE_NODE_START; i++) { 339 child = tree[i].child; 340 if (child > MAX_HISTHEAP_SIZE) { 341 depth = 1 + tree[i].depth; 342 343 tree[child].depth = depth; 344 tree[child - 1].depth = depth; 345 } else { 346 tree[j++] = tree[i]; 347 depth = tree[i].depth; 348 while (code_len < depth) { 349 code_len++; 350 code_len_count[code_len] = 0; 351 } 352 code_len_count[depth]++; 353 } 354 } 355 356 if (code_len > max_code_len) { 357 while (code_len > max_code_len) { 358 assert(code_len_count[code_len] > 1); 359 for (i = max_code_len - 1; i != 0; i--) 360 if (code_len_count[i] != 0) 361 break; 362 assert(i != 0); 363 code_len_count[i]--; 364 code_len_count[i + 1] += 2; 365 code_len_count[code_len - 1]++; 366 code_len_count[code_len] -= 2; 367 if (code_len_count[code_len] == 0) 368 code_len--; 369 } 370 371 for (i = 1; i <= code_len; i++) 372 bl_count[i] = code_len_count[i]; 373 for (; i <= max_code_len; i++) 374 bl_count[i] = 0; 375 376 for (k = 1; code_len_count[k] == 0; k++) ; 377 for (i = root_node; i < j; i++) { 378 tree[i].depth = k; 379 code_len_count[k]--; 380 for (; code_len_count[k] == 0; k++) ; 381 } 382 } else { 383 for (i = 1; i <= code_len; i++) 384 bl_count[i] = code_len_count[i]; 385 for (; i <= max_code_len; i++) 386 bl_count[i] = 0; 387 } 388 389 return j; 390 391 } 392 393 static inline void 394 gen_huff_code_lens(struct heap_tree *heap_space, uint32_t heap_size, uint32_t * bl_count, 395 struct huff_code *codes, uint32_t codes_count, uint32_t max_code_len) 396 { 397 struct tree_node *tree = heap_space->tree; 398 uint32_t root_node = HEAP_TREE_NODE_START, node_ptr; 399 uint32_t end_node; 400 401 root_node = build_huff_tree(heap_space, heap_size, root_node); 402 403 end_node = fix_code_lens(heap_space, root_node, bl_count, max_code_len); 404 405 memset(codes, 0, codes_count * sizeof(*codes)); 406 for (node_ptr = root_node; node_ptr < end_node; node_ptr++) 407 codes[tree[node_ptr].child].length = tree[node_ptr].depth; 408 409 } 410 411 inline uint32_t set_huff_codes(struct huff_code *huff_code_table, int table_length, 412 uint32_t * count) 413 { 414 /* Uses the algorithm mentioned in the deflate standard, Rfc 1951. */ 415 int i; 416 uint16_t code = 0; 417 uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1]; 418 uint32_t max_code = 0; 419 420 next_code[0] = code; 421 422 for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++) 423 next_code[i] = (next_code[i - 1] + count[i - 1]) << 1; 424 425 for (i = 0; i < table_length; i++) { 426 if (huff_code_table[i].length != 0) { 427 huff_code_table[i].code = 428 bit_reverse(next_code[huff_code_table[i].length], 429 huff_code_table[i].length); 430 next_code[huff_code_table[i].length] += 1; 431 max_code = i; 432 } 433 } 434 435 return max_code; 436 } 437 438 // on input, codes contain the code lengths 439 // on output, code contains: 440 // 23:16 code length 441 // 15:0 code value in low order bits 442 // returns max code value 443 static inline uint32_t set_dist_huff_codes(struct huff_code *codes, uint32_t * bl_count) 444 { 445 uint32_t code, code_len, bits, i; 446 uint32_t next_code[MAX_DEFLATE_CODE_LEN + 1]; 447 uint32_t max_code = 0; 448 const uint32_t num_codes = DIST_LEN; 449 const uint32_t num_eb[] = { 450 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 451 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 452 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa, 453 0xb, 0xb, 0xc, 0xc, 0xd, 0xd 454 }; 455 456 code = bl_count[0] = 0; 457 for (bits = 1; bits <= MAX_HUFF_TREE_DEPTH; bits++) { 458 code = (code + bl_count[bits - 1]) << 1; 459 next_code[bits] = code; 460 } 461 for (i = 0; i < num_codes; i++) { 462 code_len = codes[i].length; 463 if (code_len != 0) { 464 codes[i].code = bit_reverse(next_code[code_len], code_len); 465 codes[i].extra_bit_count = num_eb[i]; 466 next_code[code_len] += 1; 467 max_code = i; 468 } 469 } 470 return max_code; 471 } 472 473 int create_huffman_header(struct BitBuf2 *header_bitbuf, 474 struct huff_code *lookup_table, 475 struct rl_code *huffman_rep, 476 uint16_t huffman_rep_length, uint32_t end_of_block, 477 uint32_t hclen, uint32_t hlit, uint32_t hdist) 478 { 479 /* hlit, hdist, hclen are as defined in the deflate standard, head is the 480 * first three deflate header bits.*/ 481 int i; 482 uint64_t bit_count; 483 uint64_t data; 484 struct huff_code huffman_value; 485 const uint32_t extra_bits[3] = { 2, 3, 7 }; 486 487 bit_count = buffer_bits_used(header_bitbuf); 488 489 data = (end_of_block ? 5 : 4) | (hlit << 3) | (hdist << 8) | (hclen << 13); 490 data |= ((lookup_table[code_length_code_order[0]].length) << DYN_HDR_START_LEN); 491 write_bits(header_bitbuf, data, DYN_HDR_START_LEN + 3); 492 data = 0; 493 for (i = hclen + 3; i >= 1; i--) 494 data = (data << 3) | lookup_table[code_length_code_order[i]].length; 495 496 write_bits(header_bitbuf, data, (hclen + 3) * 3); 497 498 for (i = 0; i < huffman_rep_length; i++) { 499 huffman_value = lookup_table[huffman_rep[i].code]; 500 501 write_bits(header_bitbuf, (uint64_t) huffman_value.code, 502 (uint32_t) huffman_value.length); 503 504 if (huffman_rep[i].code > 15) { 505 write_bits(header_bitbuf, (uint64_t) huffman_rep[i].extra_bits, 506 (uint32_t) extra_bits[huffman_rep[i].code - 16]); 507 } 508 } 509 bit_count = buffer_bits_used(header_bitbuf) - bit_count; 510 511 return bit_count; 512 } 513 514 inline int create_header(struct BitBuf2 *header_bitbuf, struct rl_code *huffman_rep, 515 uint32_t length, uint64_t * histogram, uint32_t hlit, 516 uint32_t hdist, uint32_t end_of_block) 517 { 518 int i; 519 520 uint32_t heap_size; 521 struct heap_tree heap_space; 522 uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1]; 523 struct huff_code lookup_table[HUFF_LEN]; 524 525 /* hlit, hdist, and hclen are defined in RFC 1951 page 13 */ 526 uint32_t hclen; 527 uint64_t bit_count; 528 529 /* Create a huffman tree to encode run length encoded representation. */ 530 heap_size = init_heap64(&heap_space, histogram, HUFF_LEN); 531 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 532 (struct huff_code *)lookup_table, HUFF_LEN, 7); 533 set_huff_codes(lookup_table, HUFF_LEN, code_len_count); 534 535 /* Calculate hclen */ 536 for (i = CODE_LEN_CODES - 1; i > 3; i--) /* i must be at least 4 */ 537 if (lookup_table[code_length_code_order[i]].length != 0) 538 break; 539 540 hclen = i - 3; 541 542 /* Generate actual header. */ 543 bit_count = create_huffman_header(header_bitbuf, lookup_table, huffman_rep, 544 length, end_of_block, hclen, hlit, hdist); 545 546 return bit_count; 547 } 548 549 static inline 550 struct rl_code *write_rl(struct rl_code *pout, uint16_t last_len, uint32_t run_len, 551 uint64_t * counts) 552 { 553 if (last_len == 0) { 554 while (run_len > 138) { 555 pout->code = 18; 556 pout->extra_bits = 138 - 11; 557 pout++; 558 run_len -= 138; 559 counts[18]++; 560 } 561 // 1 <= run_len <= 138 562 if (run_len > 10) { 563 pout->code = 18; 564 pout->extra_bits = run_len - 11; 565 pout++; 566 counts[18]++; 567 } else if (run_len > 2) { 568 pout->code = 17; 569 pout->extra_bits = run_len - 3; 570 pout++; 571 counts[17]++; 572 } else if (run_len == 1) { 573 pout->code = 0; 574 pout->extra_bits = 0; 575 pout++; 576 counts[0]++; 577 } else { 578 assert(run_len == 2); 579 pout[0].code = 0; 580 pout[0].extra_bits = 0; 581 pout[1].code = 0; 582 pout[1].extra_bits = 0; 583 pout += 2; 584 counts[0] += 2; 585 } 586 } else { 587 // last_len != 0 588 pout->code = last_len; 589 pout->extra_bits = 0; 590 pout++; 591 counts[last_len]++; 592 run_len--; 593 if (run_len != 0) { 594 while (run_len > 6) { 595 pout->code = 16; 596 pout->extra_bits = 6 - 3; 597 pout++; 598 run_len -= 6; 599 counts[16]++; 600 } 601 // 1 <= run_len <= 6 602 switch (run_len) { 603 case 1: 604 pout->code = last_len; 605 pout->extra_bits = 0; 606 pout++; 607 counts[last_len]++; 608 break; 609 case 2: 610 pout[0].code = last_len; 611 pout[0].extra_bits = 0; 612 pout[1].code = last_len; 613 pout[1].extra_bits = 0; 614 pout += 2; 615 counts[last_len] += 2; 616 break; 617 default: // 3...6 618 pout->code = 16; 619 pout->extra_bits = run_len - 3; 620 pout++; 621 counts[16]++; 622 } 623 } 624 } 625 return pout; 626 } 627 628 // convert codes into run-length symbols, write symbols into OUT 629 // generate histogram into COUNTS (assumed to be initialized to 0) 630 // Format of OUT: 631 // 4:0 code (0...18) 632 // 15:8 Extra bits (0...127) 633 // returns number of symbols in out 634 static inline uint32_t rl_encode(uint16_t * codes, uint32_t num_codes, uint64_t * counts, 635 struct rl_code *out) 636 { 637 uint32_t i, run_len; 638 uint16_t last_len, len; 639 struct rl_code *pout; 640 641 pout = out; 642 last_len = codes[0]; 643 run_len = 1; 644 for (i = 1; i < num_codes; i++) { 645 len = codes[i]; 646 if (len == last_len) { 647 run_len++; 648 continue; 649 } 650 pout = write_rl(pout, last_len, run_len, counts); 651 last_len = len; 652 run_len = 1; 653 } 654 pout = write_rl(pout, last_len, run_len, counts); 655 656 return (uint32_t) (pout - out); 657 } 658 659 void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, uint32_t length, 660 struct huff_code *hufftable) 661 { 662 int i; 663 for (i = 0; i < length; i++) { 664 code_table[i] = hufftable[i].code; 665 code_length_table[i] = hufftable[i].length; 666 } 667 } 668 669 void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable) 670 { 671 int i, count = 0; 672 uint16_t extra_bits; 673 uint16_t extra_bits_count = 0; 674 675 /* Gain extra bits is the next place where the number of extra bits in 676 * lenght codes increases. */ 677 uint16_t gain_extra_bits = LEN_EXTRA_BITS_START; 678 679 for (i = 257; i < LIT_LEN - 1; i++) { 680 for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) { 681 if (count > 254) 682 break; 683 packed_table[count++] = 684 (extra_bits << (lit_len_hufftable[i].length + LENGTH_BITS)) | 685 (lit_len_hufftable[i].code << LENGTH_BITS) | 686 (lit_len_hufftable[i].length + extra_bits_count); 687 } 688 689 if (i == gain_extra_bits) { 690 gain_extra_bits += LEN_EXTRA_BITS_INTERVAL; 691 extra_bits_count += 1; 692 } 693 } 694 695 packed_table[count] = (lit_len_hufftable[LIT_LEN - 1].code << LENGTH_BITS) | 696 (lit_len_hufftable[LIT_LEN - 1].length); 697 } 698 699 void create_packed_dist_table(uint32_t * packed_table, uint32_t length, 700 struct huff_code *dist_hufftable) 701 { 702 int i, count = 0; 703 uint16_t extra_bits; 704 uint16_t extra_bits_count = 0; 705 706 /* Gain extra bits is the next place where the number of extra bits in 707 * distance codes increases. */ 708 uint16_t gain_extra_bits = DIST_EXTRA_BITS_START; 709 710 for (i = 0; i < DIST_LEN; i++) { 711 for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) { 712 if (count >= length) 713 return; 714 715 packed_table[count++] = 716 (extra_bits << (dist_hufftable[i].length + LENGTH_BITS)) | 717 (dist_hufftable[i].code << LENGTH_BITS) | 718 (dist_hufftable[i].length + extra_bits_count); 719 720 } 721 722 if (i == gain_extra_bits) { 723 gain_extra_bits += DIST_EXTRA_BITS_INTERVAL; 724 extra_bits_count += 1; 725 } 726 } 727 } 728 729 int are_hufftables_useable(struct huff_code *lit_len_hufftable, 730 struct huff_code *dist_hufftable) 731 { 732 int max_lit_code_len = 0, max_len_code_len = 0, max_dist_code_len = 0; 733 int dist_extra_bits = 0, len_extra_bits = 0; 734 int gain_dist_extra_bits = DIST_EXTRA_BITS_START; 735 int gain_len_extra_bits = LEN_EXTRA_BITS_START; 736 int max_code_len; 737 int i; 738 739 for (i = 0; i < LIT_LEN; i++) 740 if (lit_len_hufftable[i].length > max_lit_code_len) 741 max_lit_code_len = lit_len_hufftable[i].length; 742 743 for (i = 257; i < LIT_LEN - 1; i++) { 744 if (lit_len_hufftable[i].length + len_extra_bits > max_len_code_len) 745 max_len_code_len = lit_len_hufftable[i].length + len_extra_bits; 746 747 if (i == gain_len_extra_bits) { 748 gain_len_extra_bits += LEN_EXTRA_BITS_INTERVAL; 749 len_extra_bits += 1; 750 } 751 } 752 753 for (i = 0; i < DIST_LEN; i++) { 754 if (dist_hufftable[i].length + dist_extra_bits > max_dist_code_len) 755 max_dist_code_len = dist_hufftable[i].length + dist_extra_bits; 756 757 if (i == gain_dist_extra_bits) { 758 gain_dist_extra_bits += DIST_EXTRA_BITS_INTERVAL; 759 dist_extra_bits += 1; 760 } 761 } 762 763 max_code_len = max_lit_code_len + max_len_code_len + max_dist_code_len; 764 765 /* Some versions of igzip can write upto one literal, one length and one 766 * distance code at the same time. This checks to make sure that is 767 * always writeable in bitbuf*/ 768 return (max_code_len > MAX_BITBUF_BIT_WRITE); 769 } 770 771 int isal_create_hufftables(struct isal_hufftables *hufftables, 772 struct isal_huff_histogram *histogram) 773 { 774 struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; 775 uint64_t bit_count; 776 int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE); 777 struct heap_tree heap_space; 778 uint32_t heap_size; 779 uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1]; 780 struct BitBuf2 header_bitbuf; 781 uint32_t max_lit_len_sym; 782 uint32_t max_dist_sym; 783 uint32_t hlit, hdist, i; 784 uint16_t combined_table[LIT_LEN + DIST_LEN]; 785 uint64_t count_histogram[HUFF_LEN]; 786 struct rl_code rl_huff[LIT_LEN + DIST_LEN]; 787 uint32_t rl_huff_len; 788 789 uint32_t *dist_table = hufftables->dist_table; 790 uint32_t *len_table = hufftables->len_table; 791 uint16_t *lit_table = hufftables->lit_table; 792 uint16_t *dcodes = hufftables->dcodes; 793 uint8_t *lit_table_sizes = hufftables->lit_table_sizes; 794 uint8_t *dcodes_sizes = hufftables->dcodes_sizes; 795 uint8_t *deflate_hdr = hufftables->deflate_hdr; 796 uint64_t *lit_len_histogram = histogram->lit_len_histogram; 797 uint64_t *dist_histogram = histogram->dist_histogram; 798 799 memset(hufftables, 0, sizeof(struct isal_hufftables)); 800 801 heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN); 802 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 803 (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN); 804 max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); 805 806 heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); 807 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 808 (struct huff_code *)dist_huff_table, max_dist, 809 MAX_DEFLATE_CODE_LEN); 810 max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); 811 812 if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { 813 heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN); 814 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 815 (struct huff_code *)lit_huff_table, LIT_LEN, 816 MAX_SAFE_LIT_CODE_LEN); 817 max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); 818 819 heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); 820 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 821 (struct huff_code *)dist_huff_table, max_dist, 822 MAX_SAFE_DIST_CODE_LEN); 823 max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); 824 825 } 826 827 create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET, 828 dist_huff_table + DCODE_OFFSET); 829 830 create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table); 831 832 create_packed_len_table(len_table, lit_huff_table); 833 create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table); 834 835 set_buf(&header_bitbuf, deflate_hdr, sizeof(deflate_hdr)); 836 init(&header_bitbuf); 837 838 hlit = max_lit_len_sym - 256; 839 hdist = max_dist_sym; 840 841 /* Run length encode the length and distance huffman codes */ 842 memset(count_histogram, 0, sizeof(count_histogram)); 843 for (i = 0; i < 257 + hlit; i++) 844 combined_table[i] = lit_huff_table[i].length; 845 for (i = 0; i < 1 + hdist; i++) 846 combined_table[i + hlit + 257] = dist_huff_table[i].length; 847 rl_huff_len = 848 rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff); 849 850 /* Create header */ 851 bit_count = 852 create_header(&header_bitbuf, rl_huff, rl_huff_len, 853 count_histogram, hlit, hdist, LAST_BLOCK); 854 flush(&header_bitbuf); 855 856 hufftables->deflate_hdr_count = bit_count / 8; 857 hufftables->deflate_hdr_extra_bits = bit_count % 8; 858 859 return 0; 860 } 861 862 int isal_create_hufftables_subset(struct isal_hufftables *hufftables, 863 struct isal_huff_histogram *histogram) 864 { 865 struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN]; 866 uint64_t bit_count; 867 int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE); 868 struct heap_tree heap_space; 869 uint32_t heap_size; 870 uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1]; 871 struct BitBuf2 header_bitbuf; 872 uint32_t max_lit_len_sym; 873 uint32_t max_dist_sym; 874 uint32_t hlit, hdist, i; 875 uint16_t combined_table[LIT_LEN + DIST_LEN]; 876 uint64_t count_histogram[HUFF_LEN]; 877 struct rl_code rl_huff[LIT_LEN + DIST_LEN]; 878 uint32_t rl_huff_len; 879 880 uint32_t *dist_table = hufftables->dist_table; 881 uint32_t *len_table = hufftables->len_table; 882 uint16_t *lit_table = hufftables->lit_table; 883 uint16_t *dcodes = hufftables->dcodes; 884 uint8_t *lit_table_sizes = hufftables->lit_table_sizes; 885 uint8_t *dcodes_sizes = hufftables->dcodes_sizes; 886 uint8_t *deflate_hdr = hufftables->deflate_hdr; 887 uint64_t *lit_len_histogram = histogram->lit_len_histogram; 888 uint64_t *dist_histogram = histogram->dist_histogram; 889 890 memset(hufftables, 0, sizeof(struct isal_hufftables)); 891 892 heap_size = init_heap64(&heap_space, lit_len_histogram, LIT_LEN); 893 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 894 (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN); 895 max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); 896 897 heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); 898 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 899 (struct huff_code *)dist_huff_table, max_dist, 900 MAX_DEFLATE_CODE_LEN); 901 max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); 902 903 if (are_hufftables_useable(lit_huff_table, dist_huff_table)) { 904 heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN); 905 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 906 (struct huff_code *)lit_huff_table, LIT_LEN, 907 MAX_SAFE_LIT_CODE_LEN); 908 max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); 909 910 heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN); 911 gen_huff_code_lens(&heap_space, heap_size, code_len_count, 912 (struct huff_code *)dist_huff_table, max_dist, 913 MAX_SAFE_DIST_CODE_LEN); 914 max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count); 915 916 } 917 918 create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET, 919 dist_huff_table + DCODE_OFFSET); 920 921 create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table); 922 923 create_packed_len_table(len_table, lit_huff_table); 924 create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table); 925 926 set_buf(&header_bitbuf, deflate_hdr, sizeof(deflate_hdr)); 927 init(&header_bitbuf); 928 929 hlit = max_lit_len_sym - 256; 930 hdist = max_dist_sym; 931 932 /* Run length encode the length and distance huffman codes */ 933 memset(count_histogram, 0, sizeof(count_histogram)); 934 for (i = 0; i < 257 + hlit; i++) 935 combined_table[i] = lit_huff_table[i].length; 936 for (i = 0; i < 1 + hdist; i++) 937 combined_table[i + hlit + 257] = dist_huff_table[i].length; 938 rl_huff_len = 939 rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff); 940 941 /* Create header */ 942 bit_count = 943 create_header(&header_bitbuf, rl_huff, rl_huff_len, 944 count_histogram, hlit, hdist, LAST_BLOCK); 945 flush(&header_bitbuf); 946 947 hufftables->deflate_hdr_count = bit_count / 8; 948 hufftables->deflate_hdr_extra_bits = bit_count % 8; 949 950 return 0; 951 } 952 953 void expand_hufftables_icf(struct hufftables_icf *hufftables) 954 { 955 uint32_t i, eb, j, k, len, code; 956 struct huff_code orig[21], *p_code; 957 struct huff_code *lit_len_codes = hufftables->lit_len_table; 958 struct huff_code *dist_codes = hufftables->dist_table; 959 960 for (i = 0; i < 21; i++) 961 orig[i] = lit_len_codes[i + 265]; 962 963 p_code = &lit_len_codes[265]; 964 965 i = 0; 966 for (eb = 1; eb < 6; eb++) { 967 for (k = 0; k < 4; k++) { 968 len = orig[i].length; 969 code = orig[i++].code; 970 for (j = 0; j < (1u << eb); j++) { 971 p_code->code_and_extra = code | (j << len); 972 p_code->length = len + eb; 973 p_code++; 974 } 975 } // end for k 976 } // end for eb 977 // fix up last record 978 p_code[-1] = orig[i]; 979 980 dist_codes[DIST_LEN].code_and_extra = 0; 981 dist_codes[DIST_LEN].length = 0; 982 } 983 984 void 985 create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf *hufftables, 986 struct isal_mod_hist *hist, uint32_t end_of_block) 987 { 988 uint32_t bl_count[MAX_DEFLATE_CODE_LEN + 1]; 989 uint32_t max_ll_code, max_d_code; 990 struct heap_tree heap_space; 991 uint32_t heap_size; 992 struct rl_code cl_tokens[LIT_LEN + DIST_LEN]; 993 uint32_t num_cl_tokens; 994 uint64_t cl_counts[CODE_LEN_CODES]; 995 uint16_t combined_table[LIT_LEN + DIST_LEN]; 996 int i; 997 998 struct huff_code *ll_codes = hufftables->lit_len_table; 999 struct huff_code *d_codes = hufftables->dist_table; 1000 uint16_t *ll_hist = hist->ll_hist; 1001 uint16_t *d_hist = hist->d_hist; 1002 1003 flatten_ll(hist->ll_hist); 1004 1005 // make sure EOB is present 1006 if (ll_hist[256] == 0) 1007 ll_hist[256] = 1; 1008 1009 heap_size = init_heap16(&heap_space, ll_hist, LIT_LEN); 1010 gen_huff_code_lens(&heap_space, heap_size, bl_count, 1011 ll_codes, LIT_LEN, MAX_DEFLATE_CODE_LEN); 1012 max_ll_code = set_huff_codes(ll_codes, LIT_LEN, bl_count); 1013 1014 heap_size = init_heap16(&heap_space, d_hist, DIST_LEN); 1015 gen_huff_code_lens(&heap_space, heap_size, bl_count, d_codes, 1016 DIST_LEN, MAX_DEFLATE_CODE_LEN); 1017 max_d_code = set_dist_huff_codes(d_codes, bl_count); 1018 1019 assert(max_ll_code >= 256); // must be EOB code 1020 assert(max_d_code != 0); 1021 1022 /* Run length encode the length and distance huffman codes */ 1023 memset(cl_counts, 0, sizeof(cl_counts)); 1024 for (i = 0; i < max_ll_code + 1; i++) 1025 combined_table[i] = ll_codes[i].length; 1026 for (i = 0; i < max_d_code + 1; i++) 1027 combined_table[i + max_ll_code + 1] = d_codes[i].length; 1028 1029 expand_hufftables_icf(hufftables); 1030 1031 num_cl_tokens = 1032 rl_encode(combined_table, max_ll_code + max_d_code + 2, cl_counts, cl_tokens); 1033 1034 /* Create header */ 1035 create_header(bb, cl_tokens, num_cl_tokens, cl_counts, max_ll_code - 256, max_d_code, 1036 end_of_block); 1037 1038 } 1039