xref: /isa-l/igzip/generate_custom_hufftables.c (revision 9d53af0c7cc8739169f0349ddf1165a42a4bc24b)
1660f49b0SGreg Tucker /**********************************************************************
2660f49b0SGreg Tucker   Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3660f49b0SGreg Tucker 
4660f49b0SGreg Tucker   Redistribution and use in source and binary forms, with or without
5660f49b0SGreg Tucker   modification, are permitted provided that the following conditions
6660f49b0SGreg Tucker   are met:
7660f49b0SGreg Tucker     * Redistributions of source code must retain the above copyright
8660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer.
9660f49b0SGreg Tucker     * Redistributions in binary form must reproduce the above copyright
10660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer in
11660f49b0SGreg Tucker       the documentation and/or other materials provided with the
12660f49b0SGreg Tucker       distribution.
13660f49b0SGreg Tucker     * Neither the name of Intel Corporation nor the names of its
14660f49b0SGreg Tucker       contributors may be used to endorse or promote products derived
15660f49b0SGreg Tucker       from this software without specific prior written permission.
16660f49b0SGreg Tucker 
17660f49b0SGreg Tucker   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18660f49b0SGreg Tucker   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19660f49b0SGreg Tucker   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20660f49b0SGreg Tucker   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21660f49b0SGreg Tucker   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22660f49b0SGreg Tucker   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23660f49b0SGreg Tucker   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24660f49b0SGreg Tucker   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25660f49b0SGreg Tucker   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26660f49b0SGreg Tucker   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27660f49b0SGreg Tucker   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28660f49b0SGreg Tucker **********************************************************************/
29660f49b0SGreg Tucker 
30660f49b0SGreg Tucker /* This program can be used to generate custom a custom huffman encoding to get
31660f49b0SGreg Tucker  * better data compression. This is most useful when the type of data being
32660f49b0SGreg Tucker  * compressed is well known.
33660f49b0SGreg Tucker  *
34660f49b0SGreg Tucker  * To use generate_custom_hufftables, pass a sequence of files to the program
35660f49b0SGreg Tucker  * that together form an accurate representation of the data that is being
36660f49b0SGreg Tucker  * compressed. Generate_custom_hufftables will then produce the file
37660f49b0SGreg Tucker  * hufftables_c.c, which should be moved to replace its counterpart in the igzip
38660f49b0SGreg Tucker  * source folder. After recompiling the Isa-l library, the igzip compression
39660f49b0SGreg Tucker  * functions will use the new hufftables.
40660f49b0SGreg Tucker  *
41660f49b0SGreg Tucker  * Generate_custom_hufftables should be compiled with the same compile time
42660f49b0SGreg Tucker  * parameters as the igzip source code. Generating custom hufftables with
43660f49b0SGreg Tucker  * different compile time parameters may cause igzip to produce invalid output
44660f49b0SGreg Tucker  * for the reasons described below. The default parameters used by
45660f49b0SGreg Tucker  * generate_custom_hufftables are the same as the default parameters used by
46660f49b0SGreg Tucker  * igzip.
47660f49b0SGreg Tucker  *
48660f49b0SGreg Tucker  * *WARNING* generate custom hufftables must be compiled with a HIST_SIZE that
49660f49b0SGreg Tucker  * is at least as large as the HIST_SIZE used by igzip. By default HIST_SIZE is
50660f49b0SGreg Tucker  * 8, the maximum usable HIST_SIZE is 32. The reason for this is to generate
51660f49b0SGreg Tucker  * better compression. Igzip cannot produce look back distances with sizes
52660f49b0SGreg Tucker  * larger than the HIST_SIZE * 1024 igzip was compiled with, so look back
53660f49b0SGreg Tucker  * distances with sizes larger than HIST_SIZE * 1024 are not assigned a huffman
54660f49b0SGreg Tucker  * code.
55660f49b0SGreg Tucker  *
56660f49b0SGreg Tucker  * To improve compression ratio, the compile time option LIT_SUB is provided to
57660f49b0SGreg Tucker  * allow generating custom hufftables which only use a subset of all possible
58660f49b0SGreg Tucker  * literals. This can be useful for getting better compression when it is known
59660f49b0SGreg Tucker  * that the data being compressed will never contain certain symbols, for
60660f49b0SGreg Tucker  * example text files. If this option is used, it needs to be checked that every
61660f49b0SGreg Tucker  * possible literal is in fact given a valid code in the output hufftable. This
62660f49b0SGreg Tucker  * can be done by checking that every required literal has a positive value for
63660f49b0SGreg Tucker  * the length of the code associated with that literal. Literals which have not
64660f49b0SGreg Tucker  * been given codes will have a code length of zero. The compile time option
65660f49b0SGreg Tucker  * PRINT_CODES (described below) can be used to help manually perform this
66660f49b0SGreg Tucker  * check.
67660f49b0SGreg Tucker  *
68660f49b0SGreg Tucker  * The compile time parameter PRINT_CODES causes the literal/length huffman code
69660f49b0SGreg Tucker  * and the distance huffman code created by generate_custom_hufftables to be
70660f49b0SGreg Tucker  * printed out. This is printed out where each line corresponds to a different
71660f49b0SGreg Tucker  * symbol. The first column is the symbol used to represent each literal (Lit),
72660f49b0SGreg Tucker  * end of block symbol (EOB), length (Len) or distance (Dist), the second column
73660f49b0SGreg Tucker  * is the associated code value, and the third column is the length in bits of
74660f49b0SGreg Tucker  * that code.
75660f49b0SGreg Tucker  */
76660f49b0SGreg Tucker 
77660f49b0SGreg Tucker #include <stdint.h>
78660f49b0SGreg Tucker #include <stdio.h>
79660f49b0SGreg Tucker #include <inttypes.h>
80660f49b0SGreg Tucker #include "huff_codes.h"
81660f49b0SGreg Tucker #include "bitbuf2.h"
82660f49b0SGreg Tucker 
83660f49b0SGreg Tucker /*These max code lengths are limited by how the data is stored in
84660f49b0SGreg Tucker  * hufftables.asm. The deflate standard max is 15.*/
85660f49b0SGreg Tucker 
86660f49b0SGreg Tucker #define LONG_DCODE_OFFSET 26
87*9d53af0cSRoy Oursler #define SHORT_DCODE_OFFSET 0
88660f49b0SGreg Tucker 
89660f49b0SGreg Tucker #define MAX_HEADER_SIZE IGZIP_MAX_DEF_HDR_SIZE
90660f49b0SGreg Tucker 
91660f49b0SGreg Tucker #define GZIP_HEADER_SIZE 10
92660f49b0SGreg Tucker #define GZIP_TRAILER_SIZE 8
93660f49b0SGreg Tucker 
94660f49b0SGreg Tucker /**
95660f49b0SGreg Tucker  * @brief Prints a table of uint8_t elements to a file.
96660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
97660f49b0SGreg Tucker  * @param table: the table to be printed.
98660f49b0SGreg Tucker  * @param length: number of elements to be printed.
99660f49b0SGreg Tucker  * @param header: header to append in front of the table.
100660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
101660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
102660f49b0SGreg Tucker  */
103660f49b0SGreg Tucker void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header,
104660f49b0SGreg Tucker 			char *footer, char *begin_line)
105660f49b0SGreg Tucker {
106660f49b0SGreg Tucker 	int i;
107660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
108660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
109660f49b0SGreg Tucker 		if ((i & 7) == 0)
110660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
111660f49b0SGreg Tucker 		else
112660f49b0SGreg Tucker 			fprintf(outfile, " ");
113660f49b0SGreg Tucker 		fprintf(outfile, "0x%02x,", table[i]);
114660f49b0SGreg Tucker 	}
115660f49b0SGreg Tucker 
116660f49b0SGreg Tucker 	if ((i & 7) == 0)
117660f49b0SGreg Tucker 		fprintf(outfile, "\n%s", begin_line);
118660f49b0SGreg Tucker 	else
119660f49b0SGreg Tucker 		fprintf(outfile, " ");
120660f49b0SGreg Tucker 	fprintf(outfile, "0x%02x", table[i]);
121660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
122660f49b0SGreg Tucker 
123660f49b0SGreg Tucker }
124660f49b0SGreg Tucker 
125660f49b0SGreg Tucker /**
126660f49b0SGreg Tucker  * @brief Prints a table of uint16_t elements to a file.
127660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
128660f49b0SGreg Tucker  * @param table: the table to be printed.
129660f49b0SGreg Tucker  * @param length: number of elements to be printed.
130660f49b0SGreg Tucker  * @param header: header to append in front of the table.
131660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
132660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
133660f49b0SGreg Tucker  */
134660f49b0SGreg Tucker void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
135660f49b0SGreg Tucker 			 char *footer, char *begin_line)
136660f49b0SGreg Tucker {
137660f49b0SGreg Tucker 	int i;
138660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
139660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
140660f49b0SGreg Tucker 		if ((i & 7) == 0)
141660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
142660f49b0SGreg Tucker 		else
143660f49b0SGreg Tucker 			fprintf(outfile, " ");
144660f49b0SGreg Tucker 		fprintf(outfile, "0x%04x,", table[i]);
145660f49b0SGreg Tucker 	}
146660f49b0SGreg Tucker 
147660f49b0SGreg Tucker 	if ((i & 7) == 0)
148660f49b0SGreg Tucker 		fprintf(outfile, "\n%s", begin_line);
149660f49b0SGreg Tucker 	else
150660f49b0SGreg Tucker 		fprintf(outfile, " ");
151660f49b0SGreg Tucker 	fprintf(outfile, "0x%04x", table[i]);
152660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
153660f49b0SGreg Tucker 
154660f49b0SGreg Tucker }
155660f49b0SGreg Tucker 
156660f49b0SGreg Tucker /**
157660f49b0SGreg Tucker  * @brief Prints a table of uint32_t elements to a file.
158660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
159660f49b0SGreg Tucker  * @param table: the table to be printed.
160660f49b0SGreg Tucker  * @param length: number of elements to be printed.
161660f49b0SGreg Tucker  * @param header: header to append in front of the table.
162660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
163660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
164660f49b0SGreg Tucker  */
165660f49b0SGreg Tucker void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
166660f49b0SGreg Tucker 			 char *footer, char *begin_line)
167660f49b0SGreg Tucker {
168660f49b0SGreg Tucker 	int i;
169660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
170660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
171660f49b0SGreg Tucker 		if ((i & 3) == 0)
172660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
173660f49b0SGreg Tucker 		else
174660f49b0SGreg Tucker 			fprintf(outfile, " ");
175660f49b0SGreg Tucker 		fprintf(outfile, "0x%08x,", table[i]);
176660f49b0SGreg Tucker 	}
177660f49b0SGreg Tucker 
178660f49b0SGreg Tucker 	if ((i & 3) == 0)
179660f49b0SGreg Tucker 		fprintf(outfile, "%s", begin_line);
180660f49b0SGreg Tucker 	else
181660f49b0SGreg Tucker 		fprintf(outfile, " ");
182660f49b0SGreg Tucker 	fprintf(outfile, "0x%08x", table[i]);
183660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
184660f49b0SGreg Tucker 
185660f49b0SGreg Tucker }
186660f49b0SGreg Tucker 
187660f49b0SGreg Tucker /**
188660f49b0SGreg Tucker  * @brief Prints a table of uint64_t elements to a file.
189660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
190660f49b0SGreg Tucker  * @param table: the table to be printed.
191660f49b0SGreg Tucker  * @param length: number of elements to be printed.
192660f49b0SGreg Tucker  * @param header: header to append in front of the table.
193660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
194660f49b0SGreg Tucker  */
195660f49b0SGreg Tucker void fprint_uint64_table(FILE * outfile, uint64_t * table, uint64_t length, char *header,
196660f49b0SGreg Tucker 			 char *footer)
197660f49b0SGreg Tucker {
198660f49b0SGreg Tucker 	int i;
199660f49b0SGreg Tucker 	fprintf(outfile, "%s\n", header);
200660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++)
201660f49b0SGreg Tucker 		fprintf(outfile, "\t0x%016" PRIx64 ",\n", table[i]);
202660f49b0SGreg Tucker 	fprintf(outfile, "\t0x%016" PRIx64, table[i]);
203660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
204660f49b0SGreg Tucker 
205660f49b0SGreg Tucker }
206660f49b0SGreg Tucker 
207660f49b0SGreg Tucker void fprint_hufftables(FILE * output_file, uint8_t * header, uint32_t bit_count,
208660f49b0SGreg Tucker 		       uint16_t * lit_code_table, uint8_t * lit_code_size_table,
209660f49b0SGreg Tucker 		       uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
210660f49b0SGreg Tucker 		       uint32_t * packed_len_table, uint32_t * packed_dist_table)
211660f49b0SGreg Tucker {
212660f49b0SGreg Tucker 	fprintf(output_file, "struct isal_hufftables hufftables_default = {\n\n");
213660f49b0SGreg Tucker 
214660f49b0SGreg Tucker 	fprint_uint8_table(output_file, header, (bit_count + 7) / 8,
215660f49b0SGreg Tucker 			   "\t.deflate_hdr = {", "\t},\n\n", "\t\t");
216660f49b0SGreg Tucker 	fprintf(output_file, "\t.deflate_hdr_count = %d,\n", bit_count / 8);
217660f49b0SGreg Tucker 	fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n", bit_count & 7);
218660f49b0SGreg Tucker 
219660f49b0SGreg Tucker 	fprint_uint32_table(output_file, packed_dist_table, SHORT_DIST_TABLE_SIZE,
220660f49b0SGreg Tucker 			    "\t.dist_table = {", ",\n", "\t\t");
221660f49b0SGreg Tucker 	fprint_uint32_table(output_file, &packed_dist_table[SHORT_DIST_TABLE_SIZE],
222660f49b0SGreg Tucker 			    LONG_DIST_TABLE_SIZE - SHORT_DIST_TABLE_SIZE,
223660f49b0SGreg Tucker 			    "#ifdef LONGER_HUFFTABLE",
224660f49b0SGreg Tucker 			    "\n#endif /* LONGER_HUFFTABLE */\n\t},\n\n", "\t\t");
225660f49b0SGreg Tucker 
226660f49b0SGreg Tucker 	fprint_uint32_table(output_file, packed_len_table, LEN_TABLE_SIZE, "\t.len_table = {",
227660f49b0SGreg Tucker 			    "\t},\n\n", "\t\t");
228660f49b0SGreg Tucker 	fprint_uint16_table(output_file, lit_code_table, LIT_TABLE_SIZE, "\t.lit_table = {",
229660f49b0SGreg Tucker 			    "\t},\n\n", "\t\t");
230660f49b0SGreg Tucker 	fprint_uint8_table(output_file, lit_code_size_table, LIT_TABLE_SIZE,
231660f49b0SGreg Tucker 			   "\t.lit_table_sizes = {", "\t},\n\n", "\t\t");
232660f49b0SGreg Tucker 
233660f49b0SGreg Tucker 	fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n");
234660f49b0SGreg Tucker 	fprint_uint16_table(output_file, dcodes_code_table + SHORT_DCODE_OFFSET,
235660f49b0SGreg Tucker 			    DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n",
236660f49b0SGreg Tucker 			    "\t\t");
237660f49b0SGreg Tucker 	fprint_uint8_table(output_file, dcodes_code_size_table + SHORT_DCODE_OFFSET,
238660f49b0SGreg Tucker 			   DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
239660f49b0SGreg Tucker 			   "\t\t");
240660f49b0SGreg Tucker 	fprintf(output_file, "#else\n");
241660f49b0SGreg Tucker 	fprint_uint16_table(output_file, dcodes_code_table + LONG_DCODE_OFFSET,
242660f49b0SGreg Tucker 			    DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n", "\t\t");
243660f49b0SGreg Tucker 	fprint_uint8_table(output_file, dcodes_code_size_table + LONG_DCODE_OFFSET,
244660f49b0SGreg Tucker 			   DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
245660f49b0SGreg Tucker 			   "\t\t");
246660f49b0SGreg Tucker 	fprintf(output_file, "#endif\n");
247660f49b0SGreg Tucker 	fprintf(output_file, "};\n");
248660f49b0SGreg Tucker }
249660f49b0SGreg Tucker 
250660f49b0SGreg Tucker void fprint_header(FILE * output_file, uint8_t * header, uint32_t bit_count,
251660f49b0SGreg Tucker 		   uint16_t * lit_code_table, uint8_t * lit_code_size_table,
252660f49b0SGreg Tucker 		   uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
253660f49b0SGreg Tucker 		   uint32_t * packed_len_table, uint32_t * packed_dist_table)
254660f49b0SGreg Tucker {
255660f49b0SGreg Tucker 	fprintf(output_file, "#include <stdint.h>\n");
256660f49b0SGreg Tucker 	fprintf(output_file, "#include <igzip_lib.h>\n\n");
257660f49b0SGreg Tucker 
258660f49b0SGreg Tucker 	fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
259660f49b0SGreg Tucker 		"\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
260660f49b0SGreg Tucker 
261660f49b0SGreg Tucker 	fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
262660f49b0SGreg Tucker 	fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
263660f49b0SGreg Tucker 
264660f49b0SGreg Tucker 	fprint_hufftables(output_file, header, bit_count, lit_code_table, lit_code_size_table,
265660f49b0SGreg Tucker 			  dcodes_code_table, dcodes_code_size_table, packed_len_table,
266660f49b0SGreg Tucker 			  packed_dist_table);
267660f49b0SGreg Tucker }
268660f49b0SGreg Tucker 
269660f49b0SGreg Tucker int main(int argc, char *argv[])
270660f49b0SGreg Tucker {
271660f49b0SGreg Tucker 	long int file_length;
272660f49b0SGreg Tucker 	uint8_t *stream = NULL;
273660f49b0SGreg Tucker 	struct isal_huff_histogram histogram;
274660f49b0SGreg Tucker 	uint64_t *lit_histogram = histogram.lit_len_histogram;
275660f49b0SGreg Tucker 	uint64_t *dist_histogram = histogram.dist_histogram;
276660f49b0SGreg Tucker 	uint8_t header[MAX_HEADER_SIZE];
277660f49b0SGreg Tucker 	FILE *file;
278660f49b0SGreg Tucker 	struct huff_tree lit_tree, dist_tree;
279660f49b0SGreg Tucker 	struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
280660f49b0SGreg Tucker 	struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
281660f49b0SGreg Tucker 	uint64_t bit_count;
282660f49b0SGreg Tucker 	uint32_t packed_len_table[LEN_TABLE_SIZE];
283660f49b0SGreg Tucker 	uint32_t packed_dist_table[LONG_DIST_TABLE_SIZE];
284660f49b0SGreg Tucker 	uint16_t lit_code_table[LIT_TABLE_SIZE];
285660f49b0SGreg Tucker 	uint16_t dcodes_code_table[DIST_LEN];
286660f49b0SGreg Tucker 	uint8_t lit_code_size_table[LIT_TABLE_SIZE];
287660f49b0SGreg Tucker 	uint8_t dcodes_code_size_table[DIST_LEN];
288660f49b0SGreg Tucker 	int max_dist = convert_dist_to_dist_sym(D);
289660f49b0SGreg Tucker 
290660f49b0SGreg Tucker 	if (argc == 1) {
291660f49b0SGreg Tucker 		printf("Error, no input file.\n");
292660f49b0SGreg Tucker 		return 1;
293660f49b0SGreg Tucker 	}
294660f49b0SGreg Tucker 
295660f49b0SGreg Tucker 	memset(&histogram, 0, sizeof(histogram));	/* Initialize histograms. */
296660f49b0SGreg Tucker 	memset(lit_tree_array, 0, sizeof(lit_tree_array));
297660f49b0SGreg Tucker 	memset(dist_tree_array, 0, sizeof(dist_tree_array));
298660f49b0SGreg Tucker 	memset(lit_huff_table, 0, sizeof(lit_huff_table));
299660f49b0SGreg Tucker 	memset(dist_huff_table, 0, sizeof(dist_huff_table));
300660f49b0SGreg Tucker 
301660f49b0SGreg Tucker 	while (argc > 1) {
302660f49b0SGreg Tucker 		printf("Processing %s\n", argv[argc - 1]);
303660f49b0SGreg Tucker 		file = fopen(argv[argc - 1], "r");
304660f49b0SGreg Tucker 		if (file == NULL) {
305660f49b0SGreg Tucker 			printf("Error opening file\n");
306660f49b0SGreg Tucker 			return 1;
307660f49b0SGreg Tucker 		}
308660f49b0SGreg Tucker 		fseek(file, 0, SEEK_END);
309660f49b0SGreg Tucker 		file_length = ftell(file);
310660f49b0SGreg Tucker 		fseek(file, 0, SEEK_SET);
311660f49b0SGreg Tucker 		file_length -= ftell(file);
312660f49b0SGreg Tucker 		stream = malloc(file_length);
313660f49b0SGreg Tucker 		if (stream == NULL) {
314660f49b0SGreg Tucker 			printf("Failed to allocate memory to read in file\n");
315660f49b0SGreg Tucker 			fclose(file);
316660f49b0SGreg Tucker 			return 1;
317660f49b0SGreg Tucker 		}
318660f49b0SGreg Tucker 		fread(stream, 1, file_length, file);
319660f49b0SGreg Tucker 		if (ferror(file)) {
320660f49b0SGreg Tucker 			printf("Error occurred when reading file");
321660f49b0SGreg Tucker 			fclose(file);
322660f49b0SGreg Tucker 			free(stream);
323660f49b0SGreg Tucker 			return 1;
324660f49b0SGreg Tucker 		}
325660f49b0SGreg Tucker 
326660f49b0SGreg Tucker 		/* Create a histogram of frequency of symbols found in stream to
327660f49b0SGreg Tucker 		 * generate the huffman tree.*/
328660f49b0SGreg Tucker 		isal_update_histogram(stream, file_length, &histogram);
329660f49b0SGreg Tucker 
330660f49b0SGreg Tucker 		fclose(file);
331660f49b0SGreg Tucker 		free(stream);
332660f49b0SGreg Tucker 		argc--;
333660f49b0SGreg Tucker 	}
334660f49b0SGreg Tucker 
335660f49b0SGreg Tucker 	/* Create a huffman tree corresponding to the histograms created in
336660f49b0SGreg Tucker 	 * gen_histogram*/
337660f49b0SGreg Tucker #ifdef LIT_SUB
338660f49b0SGreg Tucker 	int j;
339660f49b0SGreg Tucker 	/* Guarantee every possible repeat length is given a symbol. It is hard
340660f49b0SGreg Tucker 	 * to guarantee data will never have a repeat of a given length */
341660f49b0SGreg Tucker 	for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++)
342660f49b0SGreg Tucker 		if (lit_histogram[j] == 0)
343660f49b0SGreg Tucker 			lit_histogram[j]++;
344660f49b0SGreg Tucker 
345660f49b0SGreg Tucker 	lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
346660f49b0SGreg Tucker #else
347660f49b0SGreg Tucker 	lit_tree = create_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
348660f49b0SGreg Tucker #endif
349660f49b0SGreg Tucker 	dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
350660f49b0SGreg Tucker 
351660f49b0SGreg Tucker 	/* Create a look up table to represent huffman tree above in deflate
352660f49b0SGreg Tucker 	 * standard form after it has been modified to satisfy max depth
353660f49b0SGreg Tucker 	 * criteria.*/
354660f49b0SGreg Tucker 	if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0) {
355660f49b0SGreg Tucker 		printf("Error, code with invalid length for Deflate standard.\n");
356660f49b0SGreg Tucker 		return 1;
357660f49b0SGreg Tucker 	}
358660f49b0SGreg Tucker 
359660f49b0SGreg Tucker 	if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0) {
360660f49b0SGreg Tucker 		printf("Error, code with invalid length for Deflate standard.\n");
361660f49b0SGreg Tucker 		return 1;
362660f49b0SGreg Tucker 	}
363660f49b0SGreg Tucker 
364660f49b0SGreg Tucker 	if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
365660f49b0SGreg Tucker 		if (create_huff_lookup
366660f49b0SGreg Tucker 		    (lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
367660f49b0SGreg Tucker 			printf("Error, code with invalid length for Deflate standard.\n");
368660f49b0SGreg Tucker 		return 1;
369660f49b0SGreg Tucker 
370660f49b0SGreg Tucker 		if (create_huff_lookup
371660f49b0SGreg Tucker 		    (dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
372660f49b0SGreg Tucker 			printf("Error, code with invalid length for Deflate standard.\n");
373660f49b0SGreg Tucker 		return 1;
374660f49b0SGreg Tucker 
375660f49b0SGreg Tucker 		if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
376660f49b0SGreg Tucker 			printf("Error, hufftable is not usable\n");
377660f49b0SGreg Tucker 			return 1;
378660f49b0SGreg Tucker 		}
379660f49b0SGreg Tucker 	}
380660f49b0SGreg Tucker #ifdef PRINT_CODES
381660f49b0SGreg Tucker 	int i;
382660f49b0SGreg Tucker 	printf("Lit/Len codes\n");
383660f49b0SGreg Tucker 	for (i = 0; i < LIT_TABLE_SIZE - 1; i++)
384660f49b0SGreg Tucker 		printf("Lit %3d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
385660f49b0SGreg Tucker 		       lit_huff_table[i].length);
386660f49b0SGreg Tucker 
387660f49b0SGreg Tucker 	printf("EOB %3d: Code 0x%04x, Code_Len %d\n", 256, lit_huff_table[256].code,
388660f49b0SGreg Tucker 	       lit_huff_table[256].length);
389660f49b0SGreg Tucker 
390660f49b0SGreg Tucker 	for (i = LIT_TABLE_SIZE; i < LIT_LEN; i++)
391660f49b0SGreg Tucker 		printf("Len %d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
392660f49b0SGreg Tucker 		       lit_huff_table[i].length);
393660f49b0SGreg Tucker 	printf("\n");
394660f49b0SGreg Tucker 
395660f49b0SGreg Tucker 	printf("Dist codes \n");
396660f49b0SGreg Tucker 	for (i = 0; i < DIST_LEN; i++)
397660f49b0SGreg Tucker 		printf("Dist %2d: Code 0x%04x, Code_Len %d\n", i, dist_huff_table[i].code,
398660f49b0SGreg Tucker 		       dist_huff_table[i].length);
399660f49b0SGreg Tucker 	printf("\n");
400660f49b0SGreg Tucker #endif
401660f49b0SGreg Tucker 
402660f49b0SGreg Tucker 	create_code_tables(lit_code_table, lit_code_size_table, LIT_TABLE_SIZE,
403660f49b0SGreg Tucker 			   lit_huff_table);
404660f49b0SGreg Tucker 	create_code_tables(dcodes_code_table, dcodes_code_size_table, DIST_LEN,
405660f49b0SGreg Tucker 			   dist_huff_table);
406660f49b0SGreg Tucker 	create_packed_len_table(packed_len_table, lit_huff_table);
407660f49b0SGreg Tucker 	create_packed_dist_table(packed_dist_table, LONG_DIST_TABLE_SIZE, dist_huff_table);
408660f49b0SGreg Tucker 
409660f49b0SGreg Tucker 	bit_count =
410660f49b0SGreg Tucker 	    create_header(header, sizeof(header), lit_huff_table, dist_huff_table, LAST_BLOCK);
411660f49b0SGreg Tucker 
412660f49b0SGreg Tucker 	file = fopen("hufftables_c.c", "w");
413660f49b0SGreg Tucker 	if (file == NULL) {
414660f49b0SGreg Tucker 		printf("Error creating file hufftables_c.c\n");
415660f49b0SGreg Tucker 		return 1;
416660f49b0SGreg Tucker 	}
417660f49b0SGreg Tucker 
418660f49b0SGreg Tucker 	fprint_header(file, header, bit_count, lit_code_table, lit_code_size_table,
419660f49b0SGreg Tucker 		      dcodes_code_table, dcodes_code_size_table, packed_len_table,
420660f49b0SGreg Tucker 		      packed_dist_table);
421660f49b0SGreg Tucker 
422660f49b0SGreg Tucker 	fclose(file);
423660f49b0SGreg Tucker 
424660f49b0SGreg Tucker 	return 0;
425660f49b0SGreg Tucker }
426