xref: /isa-l/igzip/generate_custom_hufftables.c (revision 5a00eaec3325e6bc681424fe66b4680400bca540)
1660f49b0SGreg Tucker /**********************************************************************
2660f49b0SGreg Tucker   Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3660f49b0SGreg Tucker 
4660f49b0SGreg Tucker   Redistribution and use in source and binary forms, with or without
5660f49b0SGreg Tucker   modification, are permitted provided that the following conditions
6660f49b0SGreg Tucker   are met:
7660f49b0SGreg Tucker     * Redistributions of source code must retain the above copyright
8660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer.
9660f49b0SGreg Tucker     * Redistributions in binary form must reproduce the above copyright
10660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer in
11660f49b0SGreg Tucker       the documentation and/or other materials provided with the
12660f49b0SGreg Tucker       distribution.
13660f49b0SGreg Tucker     * Neither the name of Intel Corporation nor the names of its
14660f49b0SGreg Tucker       contributors may be used to endorse or promote products derived
15660f49b0SGreg Tucker       from this software without specific prior written permission.
16660f49b0SGreg Tucker 
17660f49b0SGreg Tucker   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18660f49b0SGreg Tucker   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19660f49b0SGreg Tucker   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20660f49b0SGreg Tucker   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21660f49b0SGreg Tucker   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22660f49b0SGreg Tucker   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23660f49b0SGreg Tucker   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24660f49b0SGreg Tucker   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25660f49b0SGreg Tucker   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26660f49b0SGreg Tucker   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27660f49b0SGreg Tucker   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28660f49b0SGreg Tucker **********************************************************************/
29660f49b0SGreg Tucker 
30660f49b0SGreg Tucker /* This program can be used to generate custom a custom huffman encoding to get
31660f49b0SGreg Tucker  * better data compression. This is most useful when the type of data being
32660f49b0SGreg Tucker  * compressed is well known.
33660f49b0SGreg Tucker  *
34660f49b0SGreg Tucker  * To use generate_custom_hufftables, pass a sequence of files to the program
35660f49b0SGreg Tucker  * that together form an accurate representation of the data that is being
36660f49b0SGreg Tucker  * compressed. Generate_custom_hufftables will then produce the file
37660f49b0SGreg Tucker  * hufftables_c.c, which should be moved to replace its counterpart in the igzip
38660f49b0SGreg Tucker  * source folder. After recompiling the Isa-l library, the igzip compression
39660f49b0SGreg Tucker  * functions will use the new hufftables.
40660f49b0SGreg Tucker  *
41660f49b0SGreg Tucker  * Generate_custom_hufftables should be compiled with the same compile time
42660f49b0SGreg Tucker  * parameters as the igzip source code. Generating custom hufftables with
43660f49b0SGreg Tucker  * different compile time parameters may cause igzip to produce invalid output
44660f49b0SGreg Tucker  * for the reasons described below. The default parameters used by
45660f49b0SGreg Tucker  * generate_custom_hufftables are the same as the default parameters used by
46660f49b0SGreg Tucker  * igzip.
47660f49b0SGreg Tucker  *
4888f95d85SRoy Oursler  * *WARNING* generate custom hufftables must be compiled with a IGZIP_HIST_SIZE
4988f95d85SRoy Oursler  * that is at least as large as the IGZIP_HIST_SIZE used by igzip. By default
5091fef2d3SRoy Oursler  * IGZIP_HIST_SIZE is 32K, the maximum usable IGZIP_HIST_SIZE is 32K. The reason
5188f95d85SRoy Oursler  * for this is to generate better compression. Igzip cannot produce look back
5288f95d85SRoy Oursler  * distances with sizes larger than the IGZIP_HIST_SIZE igzip was compiled with,
5388f95d85SRoy Oursler  * so look back distances with sizes larger than IGZIP_HIST_SIZE are not
5491fef2d3SRoy Oursler  * assigned a huffman code. The definition of LONGER_HUFFTABLES must be
5591fef2d3SRoy Oursler  * consistent as well since that definition changes the size of the structures
5691fef2d3SRoy Oursler  * printed by this tool.
57660f49b0SGreg Tucker  *
58660f49b0SGreg Tucker  */
59660f49b0SGreg Tucker 
60660f49b0SGreg Tucker #include <stdint.h>
61660f49b0SGreg Tucker #include <stdio.h>
62660f49b0SGreg Tucker #include <inttypes.h>
6391fef2d3SRoy Oursler #include <string.h>
6491fef2d3SRoy Oursler #include <stdlib.h>
6591fef2d3SRoy Oursler #include "igzip_lib.h"
66660f49b0SGreg Tucker 
679968e7a0SGreg Tucker #include "huff_codes.h"
689968e7a0SGreg Tucker #include "huffman.h"
699968e7a0SGreg Tucker 
70660f49b0SGreg Tucker /*These max code lengths are limited by how the data is stored in
71660f49b0SGreg Tucker  * hufftables.asm. The deflate standard max is 15.*/
72660f49b0SGreg Tucker 
7388f95d85SRoy Oursler #define MAX_HEADER_SIZE ISAL_DEF_MAX_HDR_SIZE
74660f49b0SGreg Tucker 
75660f49b0SGreg Tucker #define GZIP_HEADER_SIZE 10
76660f49b0SGreg Tucker #define GZIP_TRAILER_SIZE 8
7742591691SRoy Oursler #define ZLIB_HEADER_SIZE 2
7842591691SRoy Oursler #define ZLIB_TRAILER_SIZE 4
79660f49b0SGreg Tucker 
80660f49b0SGreg Tucker /**
81660f49b0SGreg Tucker  * @brief Prints a table of uint8_t elements to a file.
82660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
83660f49b0SGreg Tucker  * @param table: the table to be printed.
84660f49b0SGreg Tucker  * @param length: number of elements to be printed.
85660f49b0SGreg Tucker  * @param header: header to append in front of the table.
86660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
87660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
88660f49b0SGreg Tucker  */
89660f49b0SGreg Tucker void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header,
90660f49b0SGreg Tucker 			char *footer, char *begin_line)
91660f49b0SGreg Tucker {
92660f49b0SGreg Tucker 	int i;
93660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
94660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
95660f49b0SGreg Tucker 		if ((i & 7) == 0)
96660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
97660f49b0SGreg Tucker 		else
98660f49b0SGreg Tucker 			fprintf(outfile, " ");
99660f49b0SGreg Tucker 		fprintf(outfile, "0x%02x,", table[i]);
100660f49b0SGreg Tucker 	}
101660f49b0SGreg Tucker 
102660f49b0SGreg Tucker 	if ((i & 7) == 0)
103660f49b0SGreg Tucker 		fprintf(outfile, "\n%s", begin_line);
104660f49b0SGreg Tucker 	else
105660f49b0SGreg Tucker 		fprintf(outfile, " ");
106660f49b0SGreg Tucker 	fprintf(outfile, "0x%02x", table[i]);
107660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
108660f49b0SGreg Tucker 
109660f49b0SGreg Tucker }
110660f49b0SGreg Tucker 
111660f49b0SGreg Tucker /**
112660f49b0SGreg Tucker  * @brief Prints a table of uint16_t elements to a file.
113660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
114660f49b0SGreg Tucker  * @param table: the table to be printed.
115660f49b0SGreg Tucker  * @param length: number of elements to be printed.
116660f49b0SGreg Tucker  * @param header: header to append in front of the table.
117660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
118660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
119660f49b0SGreg Tucker  */
120660f49b0SGreg Tucker void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
121660f49b0SGreg Tucker 			 char *footer, char *begin_line)
122660f49b0SGreg Tucker {
123660f49b0SGreg Tucker 	int i;
124660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
125660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
126660f49b0SGreg Tucker 		if ((i & 7) == 0)
127660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
128660f49b0SGreg Tucker 		else
129660f49b0SGreg Tucker 			fprintf(outfile, " ");
130660f49b0SGreg Tucker 		fprintf(outfile, "0x%04x,", table[i]);
131660f49b0SGreg Tucker 	}
132660f49b0SGreg Tucker 
133660f49b0SGreg Tucker 	if ((i & 7) == 0)
134660f49b0SGreg Tucker 		fprintf(outfile, "\n%s", begin_line);
135660f49b0SGreg Tucker 	else
136660f49b0SGreg Tucker 		fprintf(outfile, " ");
137660f49b0SGreg Tucker 	fprintf(outfile, "0x%04x", table[i]);
138660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
139660f49b0SGreg Tucker 
140660f49b0SGreg Tucker }
141660f49b0SGreg Tucker 
142660f49b0SGreg Tucker /**
143660f49b0SGreg Tucker  * @brief Prints a table of uint32_t elements to a file.
144660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
145660f49b0SGreg Tucker  * @param table: the table to be printed.
146660f49b0SGreg Tucker  * @param length: number of elements to be printed.
147660f49b0SGreg Tucker  * @param header: header to append in front of the table.
148660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
149660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
150660f49b0SGreg Tucker  */
151660f49b0SGreg Tucker void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
152660f49b0SGreg Tucker 			 char *footer, char *begin_line)
153660f49b0SGreg Tucker {
154660f49b0SGreg Tucker 	int i;
155660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
156660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
157660f49b0SGreg Tucker 		if ((i & 3) == 0)
158660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
159660f49b0SGreg Tucker 		else
160660f49b0SGreg Tucker 			fprintf(outfile, " ");
161660f49b0SGreg Tucker 		fprintf(outfile, "0x%08x,", table[i]);
162660f49b0SGreg Tucker 	}
163660f49b0SGreg Tucker 
164660f49b0SGreg Tucker 	if ((i & 3) == 0)
165660f49b0SGreg Tucker 		fprintf(outfile, "%s", begin_line);
166660f49b0SGreg Tucker 	else
167660f49b0SGreg Tucker 		fprintf(outfile, " ");
168660f49b0SGreg Tucker 	fprintf(outfile, "0x%08x", table[i]);
169660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
170660f49b0SGreg Tucker 
171660f49b0SGreg Tucker }
172660f49b0SGreg Tucker 
17391fef2d3SRoy Oursler void fprint_hufftables(FILE * output_file, char *hufftables_name,
17491fef2d3SRoy Oursler 		       struct isal_hufftables *hufftables)
175660f49b0SGreg Tucker {
1767fefb539SRoy Oursler 	fprintf(output_file, "struct isal_hufftables %s = {\n\n", hufftables_name);
177660f49b0SGreg Tucker 
17891fef2d3SRoy Oursler 	fprint_uint8_table(output_file, hufftables->deflate_hdr,
17991fef2d3SRoy Oursler 			   hufftables->deflate_hdr_count +
18091fef2d3SRoy Oursler 			   (hufftables->deflate_hdr_extra_bits + 7) / 8,
18191fef2d3SRoy Oursler 			   "\t.deflate_hdr = {", "},\n\n", "\t\t");
182660f49b0SGreg Tucker 
18391fef2d3SRoy Oursler 	fprintf(output_file, "\t.deflate_hdr_count = %d,\n", hufftables->deflate_hdr_count);
18491fef2d3SRoy Oursler 	fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n",
18591fef2d3SRoy Oursler 		hufftables->deflate_hdr_extra_bits);
186660f49b0SGreg Tucker 
18791fef2d3SRoy Oursler 	fprint_uint32_table(output_file, hufftables->dist_table, IGZIP_DIST_TABLE_SIZE,
18891fef2d3SRoy Oursler 			    "\t.dist_table = {", "},\n\n", "\t\t");
189660f49b0SGreg Tucker 
19091fef2d3SRoy Oursler 	fprint_uint32_table(output_file, hufftables->len_table, IGZIP_LEN_TABLE_SIZE,
19191fef2d3SRoy Oursler 			    "\t.len_table = {", "},\n\n", "\t\t");
19291fef2d3SRoy Oursler 
19391fef2d3SRoy Oursler 	fprint_uint16_table(output_file, hufftables->lit_table, IGZIP_LIT_TABLE_SIZE,
19491fef2d3SRoy Oursler 			    "\t.lit_table = {", "},\n\n", "\t\t");
19591fef2d3SRoy Oursler 	fprint_uint8_table(output_file, hufftables->lit_table_sizes, IGZIP_LIT_TABLE_SIZE,
19691fef2d3SRoy Oursler 			   "\t.lit_table_sizes = {", "},\n\n", "\t\t");
19791fef2d3SRoy Oursler 
19891fef2d3SRoy Oursler 	fprint_uint16_table(output_file, hufftables->dcodes,
19991fef2d3SRoy Oursler 			    ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET,
20091fef2d3SRoy Oursler 			    "\t.dcodes = {", "},\n\n", "\t\t");
20191fef2d3SRoy Oursler 	fprint_uint8_table(output_file, hufftables->dcodes_sizes,
20291fef2d3SRoy Oursler 			   ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET,
20391fef2d3SRoy Oursler 			   "\t.dcodes_sizes = {", "}\n", "\t\t");
204660f49b0SGreg Tucker 	fprintf(output_file, "};\n");
205660f49b0SGreg Tucker }
206660f49b0SGreg Tucker 
2077fefb539SRoy Oursler void fprint_header(FILE * output_file)
208660f49b0SGreg Tucker {
20991fef2d3SRoy Oursler 
210660f49b0SGreg Tucker 	fprintf(output_file, "#include <stdint.h>\n");
211660f49b0SGreg Tucker 	fprintf(output_file, "#include <igzip_lib.h>\n\n");
212660f49b0SGreg Tucker 
21391fef2d3SRoy Oursler 	fprintf(output_file, "#if IGZIP_HIST_SIZE > %d\n"
21491fef2d3SRoy Oursler 		"# error \"Invalid history size for the custom hufftable\"\n"
21591fef2d3SRoy Oursler 		"#endif\n", IGZIP_HIST_SIZE);
21691fef2d3SRoy Oursler 
21791fef2d3SRoy Oursler #ifdef LONGER_HUFFTABLE
21891fef2d3SRoy Oursler 	fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n"
21991fef2d3SRoy Oursler 		"# error \"Custom hufftable requires LONGER_HUFFTABLE to be defined \"\n"
22091fef2d3SRoy Oursler 		"#endif\n");
22191fef2d3SRoy Oursler #else
22291fef2d3SRoy Oursler 	fprintf(output_file, "#ifdef LONGER_HUFFTABLE\n"
22391fef2d3SRoy Oursler 		"# error \"Custom hufftable requires LONGER_HUFFTABLE to not be defined \"\n"
22491fef2d3SRoy Oursler 		"#endif\n");
22591fef2d3SRoy Oursler #endif
22691fef2d3SRoy Oursler 	fprintf(output_file, "\n");
22791fef2d3SRoy Oursler 
228660f49b0SGreg Tucker 	fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
229660f49b0SGreg Tucker 		"\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
230660f49b0SGreg Tucker 
231660f49b0SGreg Tucker 	fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
23242591691SRoy Oursler 	fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
23342591691SRoy Oursler 
23442591691SRoy Oursler 	fprintf(output_file, "const uint8_t zlib_hdr[] = { 0x78, 0x01 };\n\n");
23542591691SRoy Oursler 	fprintf(output_file, "const uint32_t zlib_hdr_bytes = %d;\n", ZLIB_HEADER_SIZE);
23642591691SRoy Oursler 	fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE);
237660f49b0SGreg Tucker }
238660f49b0SGreg Tucker 
2399968e7a0SGreg Tucker static uint32_t convert_dist_to_dist_sym(uint32_t dist)
2409968e7a0SGreg Tucker {
2419968e7a0SGreg Tucker 	assert(dist <= 32768 && dist > 0);
2429968e7a0SGreg Tucker 	if (dist <= 32768) {
2439968e7a0SGreg Tucker 		uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0;
2449968e7a0SGreg Tucker 		return (msb * 2) + ((dist - 1) >> msb);
2459968e7a0SGreg Tucker 	} else {
2469968e7a0SGreg Tucker 		return ~0;
2479968e7a0SGreg Tucker 	}
2489968e7a0SGreg Tucker }
2499968e7a0SGreg Tucker 
2509968e7a0SGreg Tucker /**
2519968e7a0SGreg Tucker  * @brief  Returns the deflate symbol value for a repeat length.
2529968e7a0SGreg Tucker  */
2539968e7a0SGreg Tucker static uint32_t convert_length_to_len_sym(uint32_t length)
2549968e7a0SGreg Tucker {
2559968e7a0SGreg Tucker 	assert(length > 2 && length < 259);
2569968e7a0SGreg Tucker 
2579968e7a0SGreg Tucker 	/* Based on tables on page 11 in RFC 1951 */
2589968e7a0SGreg Tucker 	if (length < 11)
2599968e7a0SGreg Tucker 		return 257 + length - 3;
2609968e7a0SGreg Tucker 	else if (length < 19)
2619968e7a0SGreg Tucker 		return 261 + (length - 3) / 2;
2629968e7a0SGreg Tucker 	else if (length < 35)
2639968e7a0SGreg Tucker 		return 265 + (length - 3) / 4;
2649968e7a0SGreg Tucker 	else if (length < 67)
2659968e7a0SGreg Tucker 		return 269 + (length - 3) / 8;
2669968e7a0SGreg Tucker 	else if (length < 131)
2679968e7a0SGreg Tucker 		return 273 + (length - 3) / 16;
2689968e7a0SGreg Tucker 	else if (length < 258)
2699968e7a0SGreg Tucker 		return 277 + (length - 3) / 32;
2709968e7a0SGreg Tucker 	else
2719968e7a0SGreg Tucker 		return 285;
2729968e7a0SGreg Tucker }
2739968e7a0SGreg Tucker 
2749968e7a0SGreg Tucker void isal_update_histogram_dict(uint8_t * start_stream, int dict_length, int length,
2759968e7a0SGreg Tucker 				struct isal_huff_histogram *histogram)
2769968e7a0SGreg Tucker {
2779968e7a0SGreg Tucker 	uint32_t literal = 0, hash;
2789968e7a0SGreg Tucker 	uint16_t seen, *last_seen = histogram->hash_table;
2799968e7a0SGreg Tucker 	uint8_t *current, *end_stream, *next_hash, *end, *end_dict;
2809968e7a0SGreg Tucker 	uint32_t match_length;
2819968e7a0SGreg Tucker 	uint32_t dist;
2829968e7a0SGreg Tucker 	uint64_t *lit_len_histogram = histogram->lit_len_histogram;
2839968e7a0SGreg Tucker 	uint64_t *dist_histogram = histogram->dist_histogram;
2849968e7a0SGreg Tucker 
2859968e7a0SGreg Tucker 	if (length <= 0)
2869968e7a0SGreg Tucker 		return;
2879968e7a0SGreg Tucker 
2889968e7a0SGreg Tucker 	end_stream = start_stream + dict_length + length;
2899968e7a0SGreg Tucker 	end_dict = start_stream + dict_length;
2909968e7a0SGreg Tucker 
2919968e7a0SGreg Tucker 	memset(last_seen, 0, sizeof(histogram->hash_table));	/* Initialize last_seen to be 0. */
2929968e7a0SGreg Tucker 
2939968e7a0SGreg Tucker 	for (current = start_stream; current < end_dict - 4; current++) {
294d3cfb2fbSIlya Leoshkevich 		literal = load_le_u32(current);
2959968e7a0SGreg Tucker 		hash = compute_hash(literal) & LVL0_HASH_MASK;
2969968e7a0SGreg Tucker 		last_seen[hash] = (current - start_stream) & 0xFFFF;
2979968e7a0SGreg Tucker 	}
2989968e7a0SGreg Tucker 
2999968e7a0SGreg Tucker 	for (current = start_stream + dict_length; current < end_stream - 3; current++) {
300d3cfb2fbSIlya Leoshkevich 		literal = load_le_u32(current);
3019968e7a0SGreg Tucker 		hash = compute_hash(literal) & LVL0_HASH_MASK;
3029968e7a0SGreg Tucker 		seen = last_seen[hash];
3039968e7a0SGreg Tucker 		last_seen[hash] = (current - start_stream) & 0xFFFF;
3049968e7a0SGreg Tucker 		dist = (current - start_stream - seen) & 0xFFFF;
3059968e7a0SGreg Tucker 		if (dist - 1 < D - 1) {
3069968e7a0SGreg Tucker 			assert(start_stream <= current - dist);
3079968e7a0SGreg Tucker 			match_length =
3089968e7a0SGreg Tucker 			    compare258(current - dist, current, end_stream - current);
3099968e7a0SGreg Tucker 			if (match_length >= SHORTEST_MATCH) {
3109968e7a0SGreg Tucker 				next_hash = current;
3119968e7a0SGreg Tucker #ifdef ISAL_LIMIT_HASH_UPDATE
3129968e7a0SGreg Tucker 				end = next_hash + 3;
3139968e7a0SGreg Tucker #else
3149968e7a0SGreg Tucker 				end = next_hash + match_length;
3159968e7a0SGreg Tucker #endif
3169968e7a0SGreg Tucker 				if (end > end_stream - 3)
3179968e7a0SGreg Tucker 					end = end_stream - 3;
3189968e7a0SGreg Tucker 				next_hash++;
3199968e7a0SGreg Tucker 				for (; next_hash < end; next_hash++) {
320d3cfb2fbSIlya Leoshkevich 					literal = load_le_u32(next_hash);
3219968e7a0SGreg Tucker 					hash = compute_hash(literal) & LVL0_HASH_MASK;
3229968e7a0SGreg Tucker 					last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
3239968e7a0SGreg Tucker 				}
3249968e7a0SGreg Tucker 
3259968e7a0SGreg Tucker 				dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
3269968e7a0SGreg Tucker 				lit_len_histogram[convert_length_to_len_sym(match_length)] +=
3279968e7a0SGreg Tucker 				    1;
3289968e7a0SGreg Tucker 				current += match_length - 1;
3299968e7a0SGreg Tucker 				continue;
3309968e7a0SGreg Tucker 			}
3319968e7a0SGreg Tucker 		}
3329968e7a0SGreg Tucker 		lit_len_histogram[literal & 0xFF] += 1;
3339968e7a0SGreg Tucker 	}
3349968e7a0SGreg Tucker 
3359968e7a0SGreg Tucker 	for (; current < end_stream; current++)
3369968e7a0SGreg Tucker 		lit_len_histogram[*current] += 1;
3379968e7a0SGreg Tucker 
3389968e7a0SGreg Tucker 	lit_len_histogram[256] += 1;
3399968e7a0SGreg Tucker 	return;
3409968e7a0SGreg Tucker }
3419968e7a0SGreg Tucker 
342660f49b0SGreg Tucker int main(int argc, char *argv[])
343660f49b0SGreg Tucker {
344660f49b0SGreg Tucker 	long int file_length;
3459968e7a0SGreg Tucker 	int argi = 1;
346660f49b0SGreg Tucker 	uint8_t *stream = NULL;
34791fef2d3SRoy Oursler 	struct isal_hufftables hufftables;
348660f49b0SGreg Tucker 	struct isal_huff_histogram histogram;
34991fef2d3SRoy Oursler 	struct isal_zstream tmp_stream;
3509968e7a0SGreg Tucker 	FILE *file = NULL;
3519968e7a0SGreg Tucker 	FILE *dict_file = NULL;
352438ecd81SGreg Tucker 	FILE *hist_file = NULL;
3539968e7a0SGreg Tucker 	long int dict_file_length = 0;
354438ecd81SGreg Tucker 	long int hist_file_length = 0;
3559968e7a0SGreg Tucker 	uint8_t *dict_stream = NULL;
356660f49b0SGreg Tucker 
357660f49b0SGreg Tucker 	if (argc == 1) {
358660f49b0SGreg Tucker 		printf("Error, no input file.\n");
359660f49b0SGreg Tucker 		return 1;
360660f49b0SGreg Tucker 	}
361660f49b0SGreg Tucker 
3629968e7a0SGreg Tucker 	if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') {
3639968e7a0SGreg Tucker 		dict_file = fopen(argv[2], "r");
364*5a00eaecSTomasz Kantecki 		if (dict_file == NULL) {
365*5a00eaecSTomasz Kantecki 			printf("File \"%s\" open error!\n", argv[2]);
366*5a00eaecSTomasz Kantecki 			return 1;
367*5a00eaecSTomasz Kantecki 		}
3689968e7a0SGreg Tucker 
3699968e7a0SGreg Tucker 		fseek(dict_file, 0, SEEK_END);
3709968e7a0SGreg Tucker 		dict_file_length = ftell(dict_file);
3719968e7a0SGreg Tucker 		fseek(dict_file, 0, SEEK_SET);
3729968e7a0SGreg Tucker 		dict_file_length -= ftell(dict_file);
3739968e7a0SGreg Tucker 		dict_stream = malloc(dict_file_length);
3749968e7a0SGreg Tucker 		if (dict_stream == NULL) {
3759968e7a0SGreg Tucker 			printf("Failed to allocate memory to read in dictionary file\n");
3769968e7a0SGreg Tucker 			fclose(dict_file);
3779968e7a0SGreg Tucker 			return 1;
3789968e7a0SGreg Tucker 		}
3799968e7a0SGreg Tucker 		if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) {
3809968e7a0SGreg Tucker 			printf("Error occurred when reading dictionary file");
3819968e7a0SGreg Tucker 			fclose(dict_file);
3829968e7a0SGreg Tucker 			free(dict_stream);
3839968e7a0SGreg Tucker 			return 1;
3849968e7a0SGreg Tucker 		}
3859968e7a0SGreg Tucker 		isal_update_histogram(dict_stream, dict_file_length, &histogram);
3869968e7a0SGreg Tucker 
3879968e7a0SGreg Tucker 		printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]);
3889968e7a0SGreg Tucker 		argi += 2;
3899968e7a0SGreg Tucker 		fclose(dict_file);
3909968e7a0SGreg Tucker 		free(dict_stream);
3919968e7a0SGreg Tucker 	}
3929968e7a0SGreg Tucker 
393438ecd81SGreg Tucker 	if ((argc > argi + 1) && argv[argi][0] == '-' && argv[argi][1] == 'h') {
394438ecd81SGreg Tucker 		hist_file = fopen(argv[argi + 1], "r+");
395*5a00eaecSTomasz Kantecki 		if (hist_file == NULL) {
396*5a00eaecSTomasz Kantecki 			printf("File \"%s\" open error!\n", argv[argi + 1]);
397*5a00eaecSTomasz Kantecki 			return 1;
398*5a00eaecSTomasz Kantecki 		}
399438ecd81SGreg Tucker 		fseek(hist_file, 0, SEEK_END);
400438ecd81SGreg Tucker 		hist_file_length = ftell(hist_file);
401438ecd81SGreg Tucker 		fseek(hist_file, 0, SEEK_SET);
402438ecd81SGreg Tucker 		hist_file_length -= ftell(hist_file);
403438ecd81SGreg Tucker 		if (hist_file_length > sizeof(histogram)) {
404438ecd81SGreg Tucker 			printf("Histogram file too long\n");
405438ecd81SGreg Tucker 			return 1;
406438ecd81SGreg Tucker 		}
407438ecd81SGreg Tucker 		if (fread(&histogram, 1, hist_file_length, hist_file) != hist_file_length) {
408438ecd81SGreg Tucker 			printf("Error occurred when reading history file");
409438ecd81SGreg Tucker 			fclose(hist_file);
410438ecd81SGreg Tucker 			return 1;
411438ecd81SGreg Tucker 		}
412438ecd81SGreg Tucker 		fseek(hist_file, 0, SEEK_SET);
413438ecd81SGreg Tucker 
414438ecd81SGreg Tucker 		printf("Read %ld bytes of history file %s\n", hist_file_length,
415438ecd81SGreg Tucker 		       argv[argi + 1]);
416438ecd81SGreg Tucker 		argi += 2;
417438ecd81SGreg Tucker 	} else
418660f49b0SGreg Tucker 		memset(&histogram, 0, sizeof(histogram));	/* Initialize histograms. */
419660f49b0SGreg Tucker 
4209968e7a0SGreg Tucker 	while (argi < argc) {
4219968e7a0SGreg Tucker 		printf("Processing %s\n", argv[argi]);
4229968e7a0SGreg Tucker 		file = fopen(argv[argi], "r");
423660f49b0SGreg Tucker 		if (file == NULL) {
424660f49b0SGreg Tucker 			printf("Error opening file\n");
425660f49b0SGreg Tucker 			return 1;
426660f49b0SGreg Tucker 		}
427660f49b0SGreg Tucker 		fseek(file, 0, SEEK_END);
428660f49b0SGreg Tucker 		file_length = ftell(file);
429660f49b0SGreg Tucker 		fseek(file, 0, SEEK_SET);
430660f49b0SGreg Tucker 		file_length -= ftell(file);
4319968e7a0SGreg Tucker 		stream = malloc(file_length + dict_file_length);
432660f49b0SGreg Tucker 		if (stream == NULL) {
433660f49b0SGreg Tucker 			printf("Failed to allocate memory to read in file\n");
434660f49b0SGreg Tucker 			fclose(file);
435660f49b0SGreg Tucker 			return 1;
436660f49b0SGreg Tucker 		}
4379968e7a0SGreg Tucker 		if (dict_file_length > 0)
4389968e7a0SGreg Tucker 			memcpy(stream, dict_stream, dict_file_length);
4399968e7a0SGreg Tucker 
4409968e7a0SGreg Tucker 		if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) {
441660f49b0SGreg Tucker 			printf("Error occurred when reading file");
442660f49b0SGreg Tucker 			fclose(file);
443660f49b0SGreg Tucker 			free(stream);
444660f49b0SGreg Tucker 			return 1;
445660f49b0SGreg Tucker 		}
446660f49b0SGreg Tucker 
447660f49b0SGreg Tucker 		/* Create a histogram of frequency of symbols found in stream to
448660f49b0SGreg Tucker 		 * generate the huffman tree.*/
4499968e7a0SGreg Tucker 		if (0 == dict_file_length)
450660f49b0SGreg Tucker 			isal_update_histogram(stream, file_length, &histogram);
4519968e7a0SGreg Tucker 		else
4529968e7a0SGreg Tucker 			isal_update_histogram_dict(stream, dict_file_length, file_length,
4539968e7a0SGreg Tucker 						   &histogram);
454660f49b0SGreg Tucker 
455660f49b0SGreg Tucker 		fclose(file);
456660f49b0SGreg Tucker 		free(stream);
4579968e7a0SGreg Tucker 		argi++;
458660f49b0SGreg Tucker 	}
459660f49b0SGreg Tucker 
46091fef2d3SRoy Oursler 	isal_create_hufftables(&hufftables, &histogram);
461660f49b0SGreg Tucker 
462660f49b0SGreg Tucker 	file = fopen("hufftables_c.c", "w");
463660f49b0SGreg Tucker 	if (file == NULL) {
464660f49b0SGreg Tucker 		printf("Error creating file hufftables_c.c\n");
465660f49b0SGreg Tucker 		return 1;
466660f49b0SGreg Tucker 	}
467660f49b0SGreg Tucker 
4687fefb539SRoy Oursler 	fprint_header(file);
4697fefb539SRoy Oursler 
4707fefb539SRoy Oursler 	fprintf(file, "\n");
4717fefb539SRoy Oursler 
47291fef2d3SRoy Oursler 	fprint_hufftables(file, "hufftables_default", &hufftables);
4737fefb539SRoy Oursler 
4747fefb539SRoy Oursler 	fprintf(file, "\n");
4757fefb539SRoy Oursler 
47691fef2d3SRoy Oursler 	isal_deflate_stateless_init(&tmp_stream);
47791fef2d3SRoy Oursler 	isal_deflate_set_hufftables(&tmp_stream, NULL, IGZIP_HUFFTABLE_STATIC);
47891fef2d3SRoy Oursler 	fprint_hufftables(file, "hufftables_static", tmp_stream.hufftables);
479660f49b0SGreg Tucker 
480660f49b0SGreg Tucker 	fclose(file);
481660f49b0SGreg Tucker 
482438ecd81SGreg Tucker 	if (hist_file) {
483438ecd81SGreg Tucker 		int len = fwrite(&histogram, 1, sizeof(histogram), hist_file);
484438ecd81SGreg Tucker 		printf("wrote %d bytes of histogram file\n", len);
485438ecd81SGreg Tucker 		fclose(hist_file);
486438ecd81SGreg Tucker 	}
487660f49b0SGreg Tucker 	return 0;
488660f49b0SGreg Tucker }
489