xref: /isa-l/igzip/huff_codes.c (revision 01dfbcc4846fe6f54b141047ca458620b6dd9ec9)
1 /**********************************************************************
2   Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7     * Redistributions of source code must retain the above copyright
8       notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright
10       notice, this list of conditions and the following disclaimer in
11       the documentation and/or other materials provided with the
12       distribution.
13     * Neither the name of Intel Corporation nor the names of its
14       contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 
30 #include <immintrin.h>
31 #include <stdint.h>
32 #include <string.h>
33 #include <assert.h>
34 #include "igzip_lib.h"
35 #include "huff_codes.h"
36 #include "huffman.h"
37 #include "bitbuf2.h"
38 #include "flatten_ll.h"
39 
40 /* The order code length codes are written in the dynamic code header. This is
41  * defined in RFC 1951 page 13 */
42 static const uint8_t code_length_code_order[] =
43     { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
44 
45 struct slver {
46 	uint16_t snum;
47 	uint8_t ver;
48 	uint8_t core;
49 };
50 
51 /* Version info */
52 struct slver isal_update_histogram_slver_00010085;
53 struct slver isal_update_histogram_slver = { 0x0085, 0x01, 0x00 };
54 
55 struct slver isal_create_hufftables_slver_00010086;
56 struct slver isal_create_hufftables_slver = { 0x0086, 0x01, 0x00 };
57 
58 struct slver isal_create_hufftables_subset_slver_00010087;
59 struct slver isal_create_hufftables_subset_slver = { 0x0087, 0x01, 0x00 };
60 
61 extern uint32_t build_huff_tree(struct heap_tree *heap, uint64_t heap_size, uint64_t node_ptr);
62 extern void build_heap_asm(uint64_t * heap, uint64_t heap_size);
63 
64 static const uint8_t bitrev8[0x100] = {
65 	0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
66 	0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
67 	0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
68 	0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
69 	0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
70 	0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
71 	0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
72 	0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
73 	0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
74 	0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
75 	0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
76 	0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
77 	0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
78 	0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
79 	0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
80 	0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
81 	0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
82 	0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
83 	0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
84 	0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
85 	0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
86 	0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
87 	0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
88 	0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
89 	0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
90 	0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
91 	0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
92 	0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
93 	0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
94 	0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
95 	0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
96 	0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
97 };
98 
99 // bit reverse low order LENGTH bits in code, and return result in low order bits
100 static inline uint16_t bit_reverse(uint16_t code, uint32_t length)
101 {
102 	code = (bitrev8[code & 0x00FF] << 8) | (bitrev8[code >> 8]);
103 	return (code >> (16 - length));
104 }
105 
106 void isal_update_histogram_base(uint8_t * start_stream, int length,
107 				struct isal_huff_histogram *histogram)
108 {
109 	uint32_t literal = 0, hash;
110 	uint16_t seen, *last_seen = histogram->hash_table;
111 	uint8_t *current, *end_stream, *next_hash, *end;
112 	uint32_t match_length;
113 	uint32_t dist;
114 	uint64_t *lit_len_histogram = histogram->lit_len_histogram;
115 	uint64_t *dist_histogram = histogram->dist_histogram;
116 
117 	if (length <= 0)
118 		return;
119 
120 	end_stream = start_stream + length;
121 	memset(last_seen, 0, sizeof(histogram->hash_table));	/* Initialize last_seen to be 0. */
122 	for (current = start_stream; current < end_stream - 3; current++) {
123 		literal = *(uint32_t *) current;
124 		hash = compute_hash(literal) & HASH_MASK;
125 		seen = last_seen[hash];
126 		last_seen[hash] = ((uint64_t) current - (uint64_t) start_stream) & 0xFFFF;
127 		dist = ((uint64_t) current - (uint64_t) start_stream - seen) & 0xFFFF;
128 		if (dist - 1 < D - 1) {
129 			assert(start_stream <= current - dist);
130 			match_length =
131 			    compare258(current - dist, current, end_stream - current);
132 			if (match_length >= SHORTEST_MATCH) {
133 				next_hash = current;
134 #ifdef ISAL_LIMIT_HASH_UPDATE
135 				end = next_hash + 3;
136 #else
137 				end = next_hash + match_length;
138 #endif
139 				if (end > end_stream - 3)
140 					end = end_stream - 3;
141 				next_hash++;
142 				for (; next_hash < end; next_hash++) {
143 					literal = *(uint32_t *) next_hash;
144 					hash = compute_hash(literal) & HASH_MASK;
145 					last_seen[hash] =
146 					    ((uint64_t) next_hash -
147 					     (uint64_t) start_stream) & 0xFFFF;
148 				}
149 
150 				dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
151 				lit_len_histogram[convert_length_to_len_sym(match_length)] +=
152 				    1;
153 				current += match_length - 1;
154 				continue;
155 			}
156 		}
157 		lit_len_histogram[literal & 0xFF] += 1;
158 	}
159 	literal = literal >> 8;
160 	hash = compute_hash(literal) & HASH_MASK;
161 	seen = last_seen[hash];
162 	last_seen[hash] = ((uint64_t) current - (uint64_t) start_stream) & 0xFFFF;
163 	dist = ((uint64_t) current - (uint64_t) start_stream - seen) & 0xFFFF;
164 	if (dist < D) {
165 		match_length = compare258(current - dist, current, end_stream - current);
166 		if (match_length >= SHORTEST_MATCH) {
167 			dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
168 			lit_len_histogram[convert_length_to_len_sym(match_length)] += 1;
169 			lit_len_histogram[256] += 1;
170 			return;
171 		}
172 	} else
173 		lit_len_histogram[literal & 0xFF] += 1;
174 	lit_len_histogram[(literal >> 8) & 0xFF] += 1;
175 	lit_len_histogram[(literal >> 16) & 0xFF] += 1;
176 	lit_len_histogram[256] += 1;
177 	return;
178 }
179 
180 uint32_t convert_dist_to_dist_sym(uint32_t dist)
181 {
182 	assert(dist <= 32768 && dist > 0);
183 	if (dist <= 2)
184 		return dist - 1;
185 	else if (dist <= 4)
186 		return 0 + (dist - 1) / 1;
187 	else if (dist <= 8)
188 		return 2 + (dist - 1) / 2;
189 	else if (dist <= 16)
190 		return 4 + (dist - 1) / 4;
191 	else if (dist <= 32)
192 		return 6 + (dist - 1) / 8;
193 	else if (dist <= 64)
194 		return 8 + (dist - 1) / 16;
195 	else if (dist <= 128)
196 		return 10 + (dist - 1) / 32;
197 	else if (dist <= 256)
198 		return 12 + (dist - 1) / 64;
199 	else if (dist <= 512)
200 		return 14 + (dist - 1) / 128;
201 	else if (dist <= 1024)
202 		return 16 + (dist - 1) / 256;
203 	else if (dist <= 2048)
204 		return 18 + (dist - 1) / 512;
205 	else if (dist <= 4096)
206 		return 20 + (dist - 1) / 1024;
207 	else if (dist <= 8192)
208 		return 22 + (dist - 1) / 2048;
209 	else if (dist <= 16384)
210 		return 24 + (dist - 1) / 4096;
211 	else if (dist <= 32768)
212 		return 26 + (dist - 1) / 8192;
213 	else
214 		return ~0;	/* ~0 is an invalid distance code */
215 
216 }
217 
218 uint32_t convert_length_to_len_sym(uint32_t length)
219 {
220 	assert(length > 2 && length < 259);
221 
222 	/* Based on tables on page 11 in RFC 1951 */
223 	if (length < 11)
224 		return 257 + length - 3;
225 	else if (length < 19)
226 		return 261 + (length - 3) / 2;
227 	else if (length < 35)
228 		return 265 + (length - 3) / 4;
229 	else if (length < 67)
230 		return 269 + (length - 3) / 8;
231 	else if (length < 131)
232 		return 273 + (length - 3) / 16;
233 	else if (length < 258)
234 		return 277 + (length - 3) / 32;
235 	else
236 		return 285;
237 }
238 
239 // Upon return, codes[] contains the code lengths,
240 // and bl_count is the count of the lengths
241 
242 /* Init heap with the histogram, and return the histogram size */
243 static inline uint32_t init_heap16(struct heap_tree *heap_space, uint16_t * histogram,
244 				   uint32_t hist_size)
245 {
246 	uint32_t heap_size, i;
247 
248 	memset(heap_space, 0, sizeof(struct heap_tree));
249 
250 	heap_size = 0;
251 	for (i = 0; i < hist_size; i++) {
252 		if (histogram[i] != 0)
253 			heap_space->heap[++heap_size] =
254 			    (((uint64_t) histogram[i]) << FREQ_SHIFT) | i;
255 	}
256 
257 	// make sure heap has at least two elements in it
258 	if (heap_size < 2) {
259 		if (heap_size == 0) {
260 			heap_space->heap[1] = 1ULL << FREQ_SHIFT;
261 			heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
262 			heap_size = 2;
263 		} else {
264 			// heap size == 1
265 			if (histogram[0] == 0)
266 				heap_space->heap[2] = 1ULL << FREQ_SHIFT;
267 			else
268 				heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
269 			heap_size = 2;
270 		}
271 	}
272 
273 	build_heap_asm(heap_space->heap, heap_size);
274 
275 	return heap_size;
276 }
277 
278 static inline uint32_t init_heap64(struct heap_tree *heap_space, uint64_t * histogram,
279 				   uint64_t hist_size)
280 {
281 	uint32_t heap_size, i;
282 
283 	memset(heap_space, 0, sizeof(struct heap_tree));
284 
285 	heap_size = 0;
286 	for (i = 0; i < hist_size; i++) {
287 		if (histogram[i] != 0)
288 			heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i;
289 	}
290 
291 	// make sure heap has at least two elements in it
292 	if (heap_size < 2) {
293 		if (heap_size == 0) {
294 			heap_space->heap[1] = 1ULL << FREQ_SHIFT;
295 			heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
296 			heap_size = 2;
297 		} else {
298 			// heap size == 1
299 			if (histogram[0] == 0)
300 				heap_space->heap[2] = 1ULL << FREQ_SHIFT;
301 			else
302 				heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1;
303 			heap_size = 2;
304 		}
305 	}
306 
307 	build_heap_asm(heap_space->heap, heap_size);
308 
309 	return heap_size;
310 }
311 
312 static inline uint32_t init_heap64_complete(struct heap_tree *heap_space, uint64_t * histogram,
313 					    uint64_t hist_size)
314 {
315 	uint32_t heap_size, i;
316 
317 	memset(heap_space, 0, sizeof(struct heap_tree));
318 
319 	heap_size = 0;
320 	for (i = 0; i < hist_size; i++)
321 		heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i;
322 
323 	build_heap_asm(heap_space->heap, heap_size);
324 
325 	return heap_size;
326 }
327 
328 static inline uint32_t fix_code_lens(struct heap_tree *heap_space, uint32_t root_node,
329 				     uint32_t * bl_count, uint32_t max_code_len)
330 {
331 	struct tree_node *tree = heap_space->tree;
332 	uint64_t *code_len_count = heap_space->code_len_count;
333 	uint32_t i, j, k, child, depth, code_len;
334 
335 	// compute code lengths and code length counts
336 	code_len = 0;
337 	j = root_node;
338 	for (i = root_node; i <= HEAP_TREE_NODE_START; i++) {
339 		child = tree[i].child;
340 		if (child > MAX_HISTHEAP_SIZE) {
341 			depth = 1 + tree[i].depth;
342 
343 			tree[child].depth = depth;
344 			tree[child - 1].depth = depth;
345 		} else {
346 			tree[j++] = tree[i];
347 			depth = tree[i].depth;
348 			while (code_len < depth) {
349 				code_len++;
350 				code_len_count[code_len] = 0;
351 			}
352 			code_len_count[depth]++;
353 		}
354 	}
355 
356 	if (code_len > max_code_len) {
357 		while (code_len > max_code_len) {
358 			assert(code_len_count[code_len] > 1);
359 			for (i = max_code_len - 1; i != 0; i--)
360 				if (code_len_count[i] != 0)
361 					break;
362 			assert(i != 0);
363 			code_len_count[i]--;
364 			code_len_count[i + 1] += 2;
365 			code_len_count[code_len - 1]++;
366 			code_len_count[code_len] -= 2;
367 			if (code_len_count[code_len] == 0)
368 				code_len--;
369 		}
370 
371 		for (i = 1; i <= code_len; i++)
372 			bl_count[i] = code_len_count[i];
373 		for (; i <= max_code_len; i++)
374 			bl_count[i] = 0;
375 
376 		for (k = 1; code_len_count[k] == 0; k++) ;
377 		for (i = root_node; i < j; i++) {
378 			tree[i].depth = k;
379 			code_len_count[k]--;
380 			for (; code_len_count[k] == 0; k++) ;
381 		}
382 	} else {
383 		for (i = 1; i <= code_len; i++)
384 			bl_count[i] = code_len_count[i];
385 		for (; i <= max_code_len; i++)
386 			bl_count[i] = 0;
387 	}
388 
389 	return j;
390 
391 }
392 
393 static inline void
394 gen_huff_code_lens(struct heap_tree *heap_space, uint32_t heap_size, uint32_t * bl_count,
395 		   struct huff_code *codes, uint32_t codes_count, uint32_t max_code_len)
396 {
397 	struct tree_node *tree = heap_space->tree;
398 	uint32_t root_node = HEAP_TREE_NODE_START, node_ptr;
399 	uint32_t end_node;
400 
401 	root_node = build_huff_tree(heap_space, heap_size, root_node);
402 
403 	end_node = fix_code_lens(heap_space, root_node, bl_count, max_code_len);
404 
405 	memset(codes, 0, codes_count * sizeof(*codes));
406 	for (node_ptr = root_node; node_ptr < end_node; node_ptr++)
407 		codes[tree[node_ptr].child].length = tree[node_ptr].depth;
408 
409 }
410 
411 inline uint32_t set_huff_codes(struct huff_code *huff_code_table, int table_length,
412 			       uint32_t * count)
413 {
414 	/* Uses the algorithm mentioned in the deflate standard, Rfc 1951. */
415 	int i;
416 	uint16_t code = 0;
417 	uint16_t next_code[MAX_HUFF_TREE_DEPTH + 1];
418 	uint32_t max_code = 0;
419 
420 	next_code[0] = code;
421 
422 	for (i = 1; i < MAX_HUFF_TREE_DEPTH + 1; i++)
423 		next_code[i] = (next_code[i - 1] + count[i - 1]) << 1;
424 
425 	for (i = 0; i < table_length; i++) {
426 		if (huff_code_table[i].length != 0) {
427 			huff_code_table[i].code =
428 			    bit_reverse(next_code[huff_code_table[i].length],
429 					huff_code_table[i].length);
430 			next_code[huff_code_table[i].length] += 1;
431 			max_code = i;
432 		}
433 	}
434 
435 	return max_code;
436 }
437 
438 // on input, codes contain the code lengths
439 // on output, code contains:
440 // 23:16 code length
441 // 15:0  code value in low order bits
442 // returns max code value
443 static inline uint32_t set_dist_huff_codes(struct huff_code *codes, uint32_t * bl_count)
444 {
445 	uint32_t code, code_len, bits, i;
446 	uint32_t next_code[MAX_DEFLATE_CODE_LEN + 1];
447 	uint32_t max_code = 0;
448 	const uint32_t num_codes = DIST_LEN;
449 	const uint32_t num_eb[] = {
450 		0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
451 		0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6,
452 		0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xa, 0xa,
453 		0xb, 0xb, 0xc, 0xc, 0xd, 0xd
454 	};
455 
456 	code = bl_count[0] = 0;
457 	for (bits = 1; bits <= MAX_HUFF_TREE_DEPTH; bits++) {
458 		code = (code + bl_count[bits - 1]) << 1;
459 		next_code[bits] = code;
460 	}
461 	for (i = 0; i < num_codes; i++) {
462 		code_len = codes[i].length;
463 		if (code_len != 0) {
464 			codes[i].code = bit_reverse(next_code[code_len], code_len);
465 			codes[i].extra_bit_count = num_eb[i];
466 			next_code[code_len] += 1;
467 			max_code = i;
468 		}
469 	}
470 	return max_code;
471 }
472 
473 int create_huffman_header(struct BitBuf2 *header_bitbuf,
474 			  struct huff_code *lookup_table,
475 			  struct rl_code *huffman_rep,
476 			  uint16_t huffman_rep_length, uint32_t end_of_block,
477 			  uint32_t hclen, uint32_t hlit, uint32_t hdist)
478 {
479 	/* hlit, hdist, hclen are as defined in the deflate standard, head is the
480 	 * first three deflate header bits.*/
481 	int i;
482 	uint64_t bit_count;
483 	uint64_t data;
484 	struct huff_code huffman_value;
485 	const uint32_t extra_bits[3] = { 2, 3, 7 };
486 
487 	bit_count = buffer_bits_used(header_bitbuf);
488 
489 	data = (end_of_block ? 5 : 4) | (hlit << 3) | (hdist << 8) | (hclen << 13);
490 	data |= ((lookup_table[code_length_code_order[0]].length) << DYN_HDR_START_LEN);
491 	write_bits(header_bitbuf, data, DYN_HDR_START_LEN + 3);
492 	data = 0;
493 	for (i = hclen + 3; i >= 1; i--)
494 		data = (data << 3) | lookup_table[code_length_code_order[i]].length;
495 
496 	write_bits(header_bitbuf, data, (hclen + 3) * 3);
497 
498 	for (i = 0; i < huffman_rep_length; i++) {
499 		huffman_value = lookup_table[huffman_rep[i].code];
500 
501 		write_bits(header_bitbuf, (uint64_t) huffman_value.code,
502 			   (uint32_t) huffman_value.length);
503 
504 		if (huffman_rep[i].code > 15) {
505 			write_bits(header_bitbuf, (uint64_t) huffman_rep[i].extra_bits,
506 				   (uint32_t) extra_bits[huffman_rep[i].code - 16]);
507 		}
508 	}
509 	bit_count = buffer_bits_used(header_bitbuf) - bit_count;
510 
511 	return bit_count;
512 }
513 
514 inline int create_header(struct BitBuf2 *header_bitbuf, struct rl_code *huffman_rep,
515 			 uint32_t length, uint64_t * histogram, uint32_t hlit,
516 			 uint32_t hdist, uint32_t end_of_block)
517 {
518 	int i;
519 
520 	uint32_t heap_size;
521 	struct heap_tree heap_space;
522 	uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
523 	struct huff_code lookup_table[HUFF_LEN];
524 
525 	/* hlit, hdist, and hclen are defined in RFC 1951 page 13 */
526 	uint32_t hclen;
527 	uint64_t bit_count;
528 
529 	/* Create a huffman tree to encode run length encoded representation. */
530 	heap_size = init_heap64(&heap_space, histogram, HUFF_LEN);
531 	gen_huff_code_lens(&heap_space, heap_size, code_len_count,
532 			   (struct huff_code *)lookup_table, HUFF_LEN, 7);
533 	set_huff_codes(lookup_table, HUFF_LEN, code_len_count);
534 
535 	/* Calculate hclen */
536 	for (i = CODE_LEN_CODES - 1; i > 3; i--)	/* i must be at least 4 */
537 		if (lookup_table[code_length_code_order[i]].length != 0)
538 			break;
539 
540 	hclen = i - 3;
541 
542 	/* Generate actual header. */
543 	bit_count = create_huffman_header(header_bitbuf, lookup_table, huffman_rep,
544 					  length, end_of_block, hclen, hlit, hdist);
545 
546 	return bit_count;
547 }
548 
549 static inline
550     struct rl_code *write_rl(struct rl_code *pout, uint16_t last_len, uint32_t run_len,
551 			     uint64_t * counts)
552 {
553 	if (last_len == 0) {
554 		while (run_len > 138) {
555 			pout->code = 18;
556 			pout->extra_bits = 138 - 11;
557 			pout++;
558 			run_len -= 138;
559 			counts[18]++;
560 		}
561 		// 1 <= run_len <= 138
562 		if (run_len > 10) {
563 			pout->code = 18;
564 			pout->extra_bits = run_len - 11;
565 			pout++;
566 			counts[18]++;
567 		} else if (run_len > 2) {
568 			pout->code = 17;
569 			pout->extra_bits = run_len - 3;
570 			pout++;
571 			counts[17]++;
572 		} else if (run_len == 1) {
573 			pout->code = 0;
574 			pout->extra_bits = 0;
575 			pout++;
576 			counts[0]++;
577 		} else {
578 			assert(run_len == 2);
579 			pout[0].code = 0;
580 			pout[0].extra_bits = 0;
581 			pout[1].code = 0;
582 			pout[1].extra_bits = 0;
583 			pout += 2;
584 			counts[0] += 2;
585 		}
586 	} else {
587 		// last_len != 0
588 		pout->code = last_len;
589 		pout->extra_bits = 0;
590 		pout++;
591 		counts[last_len]++;
592 		run_len--;
593 		if (run_len != 0) {
594 			while (run_len > 6) {
595 				pout->code = 16;
596 				pout->extra_bits = 6 - 3;
597 				pout++;
598 				run_len -= 6;
599 				counts[16]++;
600 			}
601 			// 1 <= run_len <= 6
602 			switch (run_len) {
603 			case 1:
604 				pout->code = last_len;
605 				pout->extra_bits = 0;
606 				pout++;
607 				counts[last_len]++;
608 				break;
609 			case 2:
610 				pout[0].code = last_len;
611 				pout[0].extra_bits = 0;
612 				pout[1].code = last_len;
613 				pout[1].extra_bits = 0;
614 				pout += 2;
615 				counts[last_len] += 2;
616 				break;
617 			default:	// 3...6
618 				pout->code = 16;
619 				pout->extra_bits = run_len - 3;
620 				pout++;
621 				counts[16]++;
622 			}
623 		}
624 	}
625 	return pout;
626 }
627 
628 // convert codes into run-length symbols, write symbols into OUT
629 // generate histogram into COUNTS (assumed to be initialized to 0)
630 // Format of OUT:
631 // 4:0  code (0...18)
632 // 15:8 Extra bits (0...127)
633 // returns number of symbols in out
634 static inline uint32_t rl_encode(uint16_t * codes, uint32_t num_codes, uint64_t * counts,
635 				 struct rl_code *out)
636 {
637 	uint32_t i, run_len;
638 	uint16_t last_len, len;
639 	struct rl_code *pout;
640 
641 	pout = out;
642 	last_len = codes[0];
643 	run_len = 1;
644 	for (i = 1; i < num_codes; i++) {
645 		len = codes[i];
646 		if (len == last_len) {
647 			run_len++;
648 			continue;
649 		}
650 		pout = write_rl(pout, last_len, run_len, counts);
651 		last_len = len;
652 		run_len = 1;
653 	}
654 	pout = write_rl(pout, last_len, run_len, counts);
655 
656 	return (uint32_t) (pout - out);
657 }
658 
659 void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, uint32_t length,
660 			struct huff_code *hufftable)
661 {
662 	int i;
663 	for (i = 0; i < length; i++) {
664 		code_table[i] = hufftable[i].code;
665 		code_length_table[i] = hufftable[i].length;
666 	}
667 }
668 
669 void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable)
670 {
671 	int i, count = 0;
672 	uint16_t extra_bits;
673 	uint16_t extra_bits_count = 0;
674 
675 	/* Gain extra bits is the next place where the number of extra bits in
676 	 * lenght codes increases. */
677 	uint16_t gain_extra_bits = LEN_EXTRA_BITS_START;
678 
679 	for (i = 257; i < LIT_LEN - 1; i++) {
680 		for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
681 			if (count > 254)
682 				break;
683 			packed_table[count++] =
684 			    (extra_bits << (lit_len_hufftable[i].length + LENGTH_BITS)) |
685 			    (lit_len_hufftable[i].code << LENGTH_BITS) |
686 			    (lit_len_hufftable[i].length + extra_bits_count);
687 		}
688 
689 		if (i == gain_extra_bits) {
690 			gain_extra_bits += LEN_EXTRA_BITS_INTERVAL;
691 			extra_bits_count += 1;
692 		}
693 	}
694 
695 	packed_table[count] = (lit_len_hufftable[LIT_LEN - 1].code << LENGTH_BITS) |
696 	    (lit_len_hufftable[LIT_LEN - 1].length);
697 }
698 
699 void create_packed_dist_table(uint32_t * packed_table, uint32_t length,
700 			      struct huff_code *dist_hufftable)
701 {
702 	int i, count = 0;
703 	uint16_t extra_bits;
704 	uint16_t extra_bits_count = 0;
705 
706 	/* Gain extra bits is the next place where the number of extra bits in
707 	 * distance codes increases. */
708 	uint16_t gain_extra_bits = DIST_EXTRA_BITS_START;
709 
710 	for (i = 0; i < DIST_LEN; i++) {
711 		for (extra_bits = 0; extra_bits < (1 << extra_bits_count); extra_bits++) {
712 			if (count >= length)
713 				return;
714 
715 			packed_table[count++] =
716 			    (extra_bits << (dist_hufftable[i].length + LENGTH_BITS)) |
717 			    (dist_hufftable[i].code << LENGTH_BITS) |
718 			    (dist_hufftable[i].length + extra_bits_count);
719 
720 		}
721 
722 		if (i == gain_extra_bits) {
723 			gain_extra_bits += DIST_EXTRA_BITS_INTERVAL;
724 			extra_bits_count += 1;
725 		}
726 	}
727 }
728 
729 int are_hufftables_useable(struct huff_code *lit_len_hufftable,
730 			   struct huff_code *dist_hufftable)
731 {
732 	int max_lit_code_len = 0, max_len_code_len = 0, max_dist_code_len = 0;
733 	int dist_extra_bits = 0, len_extra_bits = 0;
734 	int gain_dist_extra_bits = DIST_EXTRA_BITS_START;
735 	int gain_len_extra_bits = LEN_EXTRA_BITS_START;
736 	int max_code_len;
737 	int i;
738 
739 	for (i = 0; i < LIT_LEN; i++)
740 		if (lit_len_hufftable[i].length > max_lit_code_len)
741 			max_lit_code_len = lit_len_hufftable[i].length;
742 
743 	for (i = 257; i < LIT_LEN - 1; i++) {
744 		if (lit_len_hufftable[i].length + len_extra_bits > max_len_code_len)
745 			max_len_code_len = lit_len_hufftable[i].length + len_extra_bits;
746 
747 		if (i == gain_len_extra_bits) {
748 			gain_len_extra_bits += LEN_EXTRA_BITS_INTERVAL;
749 			len_extra_bits += 1;
750 		}
751 	}
752 
753 	for (i = 0; i < DIST_LEN; i++) {
754 		if (dist_hufftable[i].length + dist_extra_bits > max_dist_code_len)
755 			max_dist_code_len = dist_hufftable[i].length + dist_extra_bits;
756 
757 		if (i == gain_dist_extra_bits) {
758 			gain_dist_extra_bits += DIST_EXTRA_BITS_INTERVAL;
759 			dist_extra_bits += 1;
760 		}
761 	}
762 
763 	max_code_len = max_lit_code_len + max_len_code_len + max_dist_code_len;
764 
765 	/* Some versions of igzip can write upto one literal, one length and one
766 	 * distance code at the same time. This checks to make sure that is
767 	 * always writeable in bitbuf*/
768 	return (max_code_len > MAX_BITBUF_BIT_WRITE);
769 }
770 
771 int isal_create_hufftables(struct isal_hufftables *hufftables,
772 			   struct isal_huff_histogram *histogram)
773 {
774 	struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
775 	uint64_t bit_count;
776 	int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE);
777 	struct heap_tree heap_space;
778 	uint32_t heap_size;
779 	uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
780 	struct BitBuf2 header_bitbuf;
781 	uint32_t max_lit_len_sym;
782 	uint32_t max_dist_sym;
783 	uint32_t hlit, hdist, i;
784 	uint16_t combined_table[LIT_LEN + DIST_LEN];
785 	uint64_t count_histogram[HUFF_LEN];
786 	struct rl_code rl_huff[LIT_LEN + DIST_LEN];
787 	uint32_t rl_huff_len;
788 
789 	uint32_t *dist_table = hufftables->dist_table;
790 	uint32_t *len_table = hufftables->len_table;
791 	uint16_t *lit_table = hufftables->lit_table;
792 	uint16_t *dcodes = hufftables->dcodes;
793 	uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
794 	uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
795 	uint8_t *deflate_hdr = hufftables->deflate_hdr;
796 	uint64_t *lit_len_histogram = histogram->lit_len_histogram;
797 	uint64_t *dist_histogram = histogram->dist_histogram;
798 
799 	memset(hufftables, 0, sizeof(struct isal_hufftables));
800 
801 	heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN);
802 	gen_huff_code_lens(&heap_space, heap_size, code_len_count,
803 			   (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN);
804 	max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
805 
806 	heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
807 	gen_huff_code_lens(&heap_space, heap_size, code_len_count,
808 			   (struct huff_code *)dist_huff_table, max_dist,
809 			   MAX_DEFLATE_CODE_LEN);
810 	max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
811 
812 	if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
813 		heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN);
814 		gen_huff_code_lens(&heap_space, heap_size, code_len_count,
815 				   (struct huff_code *)lit_huff_table, LIT_LEN,
816 				   MAX_SAFE_LIT_CODE_LEN);
817 		max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
818 
819 		heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
820 		gen_huff_code_lens(&heap_space, heap_size, code_len_count,
821 				   (struct huff_code *)dist_huff_table, max_dist,
822 				   MAX_SAFE_DIST_CODE_LEN);
823 		max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
824 
825 	}
826 
827 	create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
828 			   dist_huff_table + DCODE_OFFSET);
829 
830 	create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table);
831 
832 	create_packed_len_table(len_table, lit_huff_table);
833 	create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table);
834 
835 	set_buf(&header_bitbuf, deflate_hdr, sizeof(deflate_hdr));
836 	init(&header_bitbuf);
837 
838 	hlit = max_lit_len_sym - 256;
839 	hdist = max_dist_sym;
840 
841 	/* Run length encode the length and distance huffman codes */
842 	memset(count_histogram, 0, sizeof(count_histogram));
843 	for (i = 0; i < 257 + hlit; i++)
844 		combined_table[i] = lit_huff_table[i].length;
845 	for (i = 0; i < 1 + hdist; i++)
846 		combined_table[i + hlit + 257] = dist_huff_table[i].length;
847 	rl_huff_len =
848 	    rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff);
849 
850 	/* Create header */
851 	bit_count =
852 	    create_header(&header_bitbuf, rl_huff, rl_huff_len,
853 			  count_histogram, hlit, hdist, LAST_BLOCK);
854 	flush(&header_bitbuf);
855 
856 	hufftables->deflate_hdr_count = bit_count / 8;
857 	hufftables->deflate_hdr_extra_bits = bit_count % 8;
858 
859 	return 0;
860 }
861 
862 int isal_create_hufftables_subset(struct isal_hufftables *hufftables,
863 				  struct isal_huff_histogram *histogram)
864 {
865 	struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
866 	uint64_t bit_count;
867 	int max_dist = convert_dist_to_dist_sym(IGZIP_HIST_SIZE);
868 	struct heap_tree heap_space;
869 	uint32_t heap_size;
870 	uint32_t code_len_count[MAX_HUFF_TREE_DEPTH + 1];
871 	struct BitBuf2 header_bitbuf;
872 	uint32_t max_lit_len_sym;
873 	uint32_t max_dist_sym;
874 	uint32_t hlit, hdist, i;
875 	uint16_t combined_table[LIT_LEN + DIST_LEN];
876 	uint64_t count_histogram[HUFF_LEN];
877 	struct rl_code rl_huff[LIT_LEN + DIST_LEN];
878 	uint32_t rl_huff_len;
879 
880 	uint32_t *dist_table = hufftables->dist_table;
881 	uint32_t *len_table = hufftables->len_table;
882 	uint16_t *lit_table = hufftables->lit_table;
883 	uint16_t *dcodes = hufftables->dcodes;
884 	uint8_t *lit_table_sizes = hufftables->lit_table_sizes;
885 	uint8_t *dcodes_sizes = hufftables->dcodes_sizes;
886 	uint8_t *deflate_hdr = hufftables->deflate_hdr;
887 	uint64_t *lit_len_histogram = histogram->lit_len_histogram;
888 	uint64_t *dist_histogram = histogram->dist_histogram;
889 
890 	memset(hufftables, 0, sizeof(struct isal_hufftables));
891 
892 	heap_size = init_heap64(&heap_space, lit_len_histogram, LIT_LEN);
893 	gen_huff_code_lens(&heap_space, heap_size, code_len_count,
894 			   (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN);
895 	max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
896 
897 	heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
898 	gen_huff_code_lens(&heap_space, heap_size, code_len_count,
899 			   (struct huff_code *)dist_huff_table, max_dist,
900 			   MAX_DEFLATE_CODE_LEN);
901 	max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
902 
903 	if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
904 		heap_size = init_heap64_complete(&heap_space, lit_len_histogram, LIT_LEN);
905 		gen_huff_code_lens(&heap_space, heap_size, code_len_count,
906 				   (struct huff_code *)lit_huff_table, LIT_LEN,
907 				   MAX_SAFE_LIT_CODE_LEN);
908 		max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count);
909 
910 		heap_size = init_heap64_complete(&heap_space, dist_histogram, DIST_LEN);
911 		gen_huff_code_lens(&heap_space, heap_size, code_len_count,
912 				   (struct huff_code *)dist_huff_table, max_dist,
913 				   MAX_SAFE_DIST_CODE_LEN);
914 		max_dist_sym = set_huff_codes(dist_huff_table, DIST_LEN, code_len_count);
915 
916 	}
917 
918 	create_code_tables(dcodes, dcodes_sizes, DIST_LEN - DCODE_OFFSET,
919 			   dist_huff_table + DCODE_OFFSET);
920 
921 	create_code_tables(lit_table, lit_table_sizes, IGZIP_LIT_TABLE_SIZE, lit_huff_table);
922 
923 	create_packed_len_table(len_table, lit_huff_table);
924 	create_packed_dist_table(dist_table, IGZIP_DIST_TABLE_SIZE, dist_huff_table);
925 
926 	set_buf(&header_bitbuf, deflate_hdr, sizeof(deflate_hdr));
927 	init(&header_bitbuf);
928 
929 	hlit = max_lit_len_sym - 256;
930 	hdist = max_dist_sym;
931 
932 	/* Run length encode the length and distance huffman codes */
933 	memset(count_histogram, 0, sizeof(count_histogram));
934 	for (i = 0; i < 257 + hlit; i++)
935 		combined_table[i] = lit_huff_table[i].length;
936 	for (i = 0; i < 1 + hdist; i++)
937 		combined_table[i + hlit + 257] = dist_huff_table[i].length;
938 	rl_huff_len =
939 	    rl_encode(combined_table, hlit + 257 + hdist + 1, count_histogram, rl_huff);
940 
941 	/* Create header */
942 	bit_count =
943 	    create_header(&header_bitbuf, rl_huff, rl_huff_len,
944 			  count_histogram, hlit, hdist, LAST_BLOCK);
945 	flush(&header_bitbuf);
946 
947 	hufftables->deflate_hdr_count = bit_count / 8;
948 	hufftables->deflate_hdr_extra_bits = bit_count % 8;
949 
950 	return 0;
951 }
952 
953 void expand_hufftables_icf(struct hufftables_icf *hufftables)
954 {
955 	uint32_t i, eb, j, k, len, code;
956 	struct huff_code orig[21], *p_code;
957 	struct huff_code *lit_len_codes = hufftables->lit_len_table;
958 	struct huff_code *dist_codes = hufftables->dist_table;
959 
960 	for (i = 0; i < 21; i++)
961 		orig[i] = lit_len_codes[i + 265];
962 
963 	p_code = &lit_len_codes[265];
964 
965 	i = 0;
966 	for (eb = 1; eb < 6; eb++) {
967 		for (k = 0; k < 4; k++) {
968 			len = orig[i].length;
969 			code = orig[i++].code;
970 			for (j = 0; j < (1u << eb); j++) {
971 				p_code->code_and_extra = code | (j << len);
972 				p_code->length = len + eb;
973 				p_code++;
974 			}
975 		}		// end for k
976 	}			// end for eb
977 	// fix up last record
978 	p_code[-1] = orig[i];
979 
980 	dist_codes[DIST_LEN].code_and_extra = 0;
981 	dist_codes[DIST_LEN].length = 0;
982 }
983 
984 void
985 create_hufftables_icf(struct BitBuf2 *bb, struct hufftables_icf *hufftables,
986 		      struct isal_mod_hist *hist, uint32_t end_of_block)
987 {
988 	uint32_t bl_count[MAX_DEFLATE_CODE_LEN + 1];
989 	uint32_t max_ll_code, max_d_code;
990 	struct heap_tree heap_space;
991 	uint32_t heap_size;
992 	struct rl_code cl_tokens[LIT_LEN + DIST_LEN];
993 	uint32_t num_cl_tokens;
994 	uint64_t cl_counts[CODE_LEN_CODES];
995 	uint16_t combined_table[LIT_LEN + DIST_LEN];
996 	int i;
997 
998 	struct huff_code *ll_codes = hufftables->lit_len_table;
999 	struct huff_code *d_codes = hufftables->dist_table;
1000 	uint16_t *ll_hist = hist->ll_hist;
1001 	uint16_t *d_hist = hist->d_hist;
1002 
1003 	flatten_ll(hist->ll_hist);
1004 
1005 	// make sure EOB is present
1006 	if (ll_hist[256] == 0)
1007 		ll_hist[256] = 1;
1008 
1009 	heap_size = init_heap16(&heap_space, ll_hist, LIT_LEN);
1010 	gen_huff_code_lens(&heap_space, heap_size, bl_count,
1011 			   ll_codes, LIT_LEN, MAX_DEFLATE_CODE_LEN);
1012 	max_ll_code = set_huff_codes(ll_codes, LIT_LEN, bl_count);
1013 
1014 	heap_size = init_heap16(&heap_space, d_hist, DIST_LEN);
1015 	gen_huff_code_lens(&heap_space, heap_size, bl_count, d_codes,
1016 			   DIST_LEN, MAX_DEFLATE_CODE_LEN);
1017 	max_d_code = set_dist_huff_codes(d_codes, bl_count);
1018 
1019 	assert(max_ll_code >= 256);	// must be EOB code
1020 	assert(max_d_code != 0);
1021 
1022 	/* Run length encode the length and distance huffman codes */
1023 	memset(cl_counts, 0, sizeof(cl_counts));
1024 	for (i = 0; i < max_ll_code + 1; i++)
1025 		combined_table[i] = ll_codes[i].length;
1026 	for (i = 0; i < max_d_code + 1; i++)
1027 		combined_table[i + max_ll_code + 1] = d_codes[i].length;
1028 
1029 	expand_hufftables_icf(hufftables);
1030 
1031 	num_cl_tokens =
1032 	    rl_encode(combined_table, max_ll_code + max_d_code + 2, cl_counts, cl_tokens);
1033 
1034 	/* Create header */
1035 	create_header(bb, cl_tokens, num_cl_tokens, cl_counts, max_ll_code - 256, max_d_code,
1036 		      end_of_block);
1037 
1038 }
1039