xref: /isa-l/igzip/generate_custom_hufftables.c (revision 660f49b02d4c3afcfd373f3f1b2d2be06222cedb)
1*660f49b0SGreg Tucker /**********************************************************************
2*660f49b0SGreg Tucker   Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3*660f49b0SGreg Tucker 
4*660f49b0SGreg Tucker   Redistribution and use in source and binary forms, with or without
5*660f49b0SGreg Tucker   modification, are permitted provided that the following conditions
6*660f49b0SGreg Tucker   are met:
7*660f49b0SGreg Tucker     * Redistributions of source code must retain the above copyright
8*660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer.
9*660f49b0SGreg Tucker     * Redistributions in binary form must reproduce the above copyright
10*660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer in
11*660f49b0SGreg Tucker       the documentation and/or other materials provided with the
12*660f49b0SGreg Tucker       distribution.
13*660f49b0SGreg Tucker     * Neither the name of Intel Corporation nor the names of its
14*660f49b0SGreg Tucker       contributors may be used to endorse or promote products derived
15*660f49b0SGreg Tucker       from this software without specific prior written permission.
16*660f49b0SGreg Tucker 
17*660f49b0SGreg Tucker   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*660f49b0SGreg Tucker   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*660f49b0SGreg Tucker   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*660f49b0SGreg Tucker   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*660f49b0SGreg Tucker   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*660f49b0SGreg Tucker   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*660f49b0SGreg Tucker   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*660f49b0SGreg Tucker   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*660f49b0SGreg Tucker   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*660f49b0SGreg Tucker   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*660f49b0SGreg Tucker   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*660f49b0SGreg Tucker **********************************************************************/
29*660f49b0SGreg Tucker 
30*660f49b0SGreg Tucker /* This program can be used to generate custom a custom huffman encoding to get
31*660f49b0SGreg Tucker  * better data compression. This is most useful when the type of data being
32*660f49b0SGreg Tucker  * compressed is well known.
33*660f49b0SGreg Tucker  *
34*660f49b0SGreg Tucker  * To use generate_custom_hufftables, pass a sequence of files to the program
35*660f49b0SGreg Tucker  * that together form an accurate representation of the data that is being
36*660f49b0SGreg Tucker  * compressed. Generate_custom_hufftables will then produce the file
37*660f49b0SGreg Tucker  * hufftables_c.c, which should be moved to replace its counterpart in the igzip
38*660f49b0SGreg Tucker  * source folder. After recompiling the Isa-l library, the igzip compression
39*660f49b0SGreg Tucker  * functions will use the new hufftables.
40*660f49b0SGreg Tucker  *
41*660f49b0SGreg Tucker  * Generate_custom_hufftables should be compiled with the same compile time
42*660f49b0SGreg Tucker  * parameters as the igzip source code. Generating custom hufftables with
43*660f49b0SGreg Tucker  * different compile time parameters may cause igzip to produce invalid output
44*660f49b0SGreg Tucker  * for the reasons described below. The default parameters used by
45*660f49b0SGreg Tucker  * generate_custom_hufftables are the same as the default parameters used by
46*660f49b0SGreg Tucker  * igzip.
47*660f49b0SGreg Tucker  *
48*660f49b0SGreg Tucker  * *WARNING* generate custom hufftables must be compiled with a HIST_SIZE that
49*660f49b0SGreg Tucker  * is at least as large as the HIST_SIZE used by igzip. By default HIST_SIZE is
50*660f49b0SGreg Tucker  * 8, the maximum usable HIST_SIZE is 32. The reason for this is to generate
51*660f49b0SGreg Tucker  * better compression. Igzip cannot produce look back distances with sizes
52*660f49b0SGreg Tucker  * larger than the HIST_SIZE * 1024 igzip was compiled with, so look back
53*660f49b0SGreg Tucker  * distances with sizes larger than HIST_SIZE * 1024 are not assigned a huffman
54*660f49b0SGreg Tucker  * code.
55*660f49b0SGreg Tucker  *
56*660f49b0SGreg Tucker  * To improve compression ratio, the compile time option LIT_SUB is provided to
57*660f49b0SGreg Tucker  * allow generating custom hufftables which only use a subset of all possible
58*660f49b0SGreg Tucker  * literals. This can be useful for getting better compression when it is known
59*660f49b0SGreg Tucker  * that the data being compressed will never contain certain symbols, for
60*660f49b0SGreg Tucker  * example text files. If this option is used, it needs to be checked that every
61*660f49b0SGreg Tucker  * possible literal is in fact given a valid code in the output hufftable. This
62*660f49b0SGreg Tucker  * can be done by checking that every required literal has a positive value for
63*660f49b0SGreg Tucker  * the length of the code associated with that literal. Literals which have not
64*660f49b0SGreg Tucker  * been given codes will have a code length of zero. The compile time option
65*660f49b0SGreg Tucker  * PRINT_CODES (described below) can be used to help manually perform this
66*660f49b0SGreg Tucker  * check.
67*660f49b0SGreg Tucker  *
68*660f49b0SGreg Tucker  * The compile time parameter PRINT_CODES causes the literal/length huffman code
69*660f49b0SGreg Tucker  * and the distance huffman code created by generate_custom_hufftables to be
70*660f49b0SGreg Tucker  * printed out. This is printed out where each line corresponds to a different
71*660f49b0SGreg Tucker  * symbol. The first column is the symbol used to represent each literal (Lit),
72*660f49b0SGreg Tucker  * end of block symbol (EOB), length (Len) or distance (Dist), the second column
73*660f49b0SGreg Tucker  * is the associated code value, and the third column is the length in bits of
74*660f49b0SGreg Tucker  * that code.
75*660f49b0SGreg Tucker  */
76*660f49b0SGreg Tucker 
77*660f49b0SGreg Tucker #include <stdint.h>
78*660f49b0SGreg Tucker #include <stdio.h>
79*660f49b0SGreg Tucker #include <inttypes.h>
80*660f49b0SGreg Tucker #include "huff_codes.h"
81*660f49b0SGreg Tucker #include "bitbuf2.h"
82*660f49b0SGreg Tucker 
83*660f49b0SGreg Tucker /*These max code lengths are limited by how the data is stored in
84*660f49b0SGreg Tucker  * hufftables.asm. The deflate standard max is 15.*/
85*660f49b0SGreg Tucker 
86*660f49b0SGreg Tucker #define LONG_DCODE_OFFSET 26
87*660f49b0SGreg Tucker #define SHORT_DCODE_OFFSET 20
88*660f49b0SGreg Tucker 
89*660f49b0SGreg Tucker #define MAX_HEADER_SIZE IGZIP_MAX_DEF_HDR_SIZE
90*660f49b0SGreg Tucker 
91*660f49b0SGreg Tucker #define GZIP_HEADER_SIZE 10
92*660f49b0SGreg Tucker #define GZIP_TRAILER_SIZE 8
93*660f49b0SGreg Tucker 
94*660f49b0SGreg Tucker /**
95*660f49b0SGreg Tucker  * @brief Prints a table of uint8_t elements to a file.
96*660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
97*660f49b0SGreg Tucker  * @param table: the table to be printed.
98*660f49b0SGreg Tucker  * @param length: number of elements to be printed.
99*660f49b0SGreg Tucker  * @param header: header to append in front of the table.
100*660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
101*660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
102*660f49b0SGreg Tucker  */
103*660f49b0SGreg Tucker void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header,
104*660f49b0SGreg Tucker 			char *footer, char *begin_line)
105*660f49b0SGreg Tucker {
106*660f49b0SGreg Tucker 	int i;
107*660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
108*660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
109*660f49b0SGreg Tucker 		if ((i & 7) == 0)
110*660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
111*660f49b0SGreg Tucker 		else
112*660f49b0SGreg Tucker 			fprintf(outfile, " ");
113*660f49b0SGreg Tucker 		fprintf(outfile, "0x%02x,", table[i]);
114*660f49b0SGreg Tucker 	}
115*660f49b0SGreg Tucker 
116*660f49b0SGreg Tucker 	if ((i & 7) == 0)
117*660f49b0SGreg Tucker 		fprintf(outfile, "\n%s", begin_line);
118*660f49b0SGreg Tucker 	else
119*660f49b0SGreg Tucker 		fprintf(outfile, " ");
120*660f49b0SGreg Tucker 	fprintf(outfile, "0x%02x", table[i]);
121*660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
122*660f49b0SGreg Tucker 
123*660f49b0SGreg Tucker }
124*660f49b0SGreg Tucker 
125*660f49b0SGreg Tucker /**
126*660f49b0SGreg Tucker  * @brief Prints a table of uint16_t elements to a file.
127*660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
128*660f49b0SGreg Tucker  * @param table: the table to be printed.
129*660f49b0SGreg Tucker  * @param length: number of elements to be printed.
130*660f49b0SGreg Tucker  * @param header: header to append in front of the table.
131*660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
132*660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
133*660f49b0SGreg Tucker  */
134*660f49b0SGreg Tucker void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
135*660f49b0SGreg Tucker 			 char *footer, char *begin_line)
136*660f49b0SGreg Tucker {
137*660f49b0SGreg Tucker 	int i;
138*660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
139*660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
140*660f49b0SGreg Tucker 		if ((i & 7) == 0)
141*660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
142*660f49b0SGreg Tucker 		else
143*660f49b0SGreg Tucker 			fprintf(outfile, " ");
144*660f49b0SGreg Tucker 		fprintf(outfile, "0x%04x,", table[i]);
145*660f49b0SGreg Tucker 	}
146*660f49b0SGreg Tucker 
147*660f49b0SGreg Tucker 	if ((i & 7) == 0)
148*660f49b0SGreg Tucker 		fprintf(outfile, "\n%s", begin_line);
149*660f49b0SGreg Tucker 	else
150*660f49b0SGreg Tucker 		fprintf(outfile, " ");
151*660f49b0SGreg Tucker 	fprintf(outfile, "0x%04x", table[i]);
152*660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
153*660f49b0SGreg Tucker 
154*660f49b0SGreg Tucker }
155*660f49b0SGreg Tucker 
156*660f49b0SGreg Tucker /**
157*660f49b0SGreg Tucker  * @brief Prints a table of uint32_t elements to a file.
158*660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
159*660f49b0SGreg Tucker  * @param table: the table to be printed.
160*660f49b0SGreg Tucker  * @param length: number of elements to be printed.
161*660f49b0SGreg Tucker  * @param header: header to append in front of the table.
162*660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
163*660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
164*660f49b0SGreg Tucker  */
165*660f49b0SGreg Tucker void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
166*660f49b0SGreg Tucker 			 char *footer, char *begin_line)
167*660f49b0SGreg Tucker {
168*660f49b0SGreg Tucker 	int i;
169*660f49b0SGreg Tucker 	fprintf(outfile, "%s", header);
170*660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++) {
171*660f49b0SGreg Tucker 		if ((i & 3) == 0)
172*660f49b0SGreg Tucker 			fprintf(outfile, "\n%s", begin_line);
173*660f49b0SGreg Tucker 		else
174*660f49b0SGreg Tucker 			fprintf(outfile, " ");
175*660f49b0SGreg Tucker 		fprintf(outfile, "0x%08x,", table[i]);
176*660f49b0SGreg Tucker 	}
177*660f49b0SGreg Tucker 
178*660f49b0SGreg Tucker 	if ((i & 3) == 0)
179*660f49b0SGreg Tucker 		fprintf(outfile, "%s", begin_line);
180*660f49b0SGreg Tucker 	else
181*660f49b0SGreg Tucker 		fprintf(outfile, " ");
182*660f49b0SGreg Tucker 	fprintf(outfile, "0x%08x", table[i]);
183*660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
184*660f49b0SGreg Tucker 
185*660f49b0SGreg Tucker }
186*660f49b0SGreg Tucker 
187*660f49b0SGreg Tucker /**
188*660f49b0SGreg Tucker  * @brief Prints a table of uint64_t elements to a file.
189*660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
190*660f49b0SGreg Tucker  * @param table: the table to be printed.
191*660f49b0SGreg Tucker  * @param length: number of elements to be printed.
192*660f49b0SGreg Tucker  * @param header: header to append in front of the table.
193*660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
194*660f49b0SGreg Tucker  */
195*660f49b0SGreg Tucker void fprint_uint64_table(FILE * outfile, uint64_t * table, uint64_t length, char *header,
196*660f49b0SGreg Tucker 			 char *footer)
197*660f49b0SGreg Tucker {
198*660f49b0SGreg Tucker 	int i;
199*660f49b0SGreg Tucker 	fprintf(outfile, "%s\n", header);
200*660f49b0SGreg Tucker 	for (i = 0; i < length - 1; i++)
201*660f49b0SGreg Tucker 		fprintf(outfile, "\t0x%016" PRIx64 ",\n", table[i]);
202*660f49b0SGreg Tucker 	fprintf(outfile, "\t0x%016" PRIx64, table[i]);
203*660f49b0SGreg Tucker 	fprintf(outfile, "%s", footer);
204*660f49b0SGreg Tucker 
205*660f49b0SGreg Tucker }
206*660f49b0SGreg Tucker 
207*660f49b0SGreg Tucker void fprint_hufftables(FILE * output_file, uint8_t * header, uint32_t bit_count,
208*660f49b0SGreg Tucker 		       uint16_t * lit_code_table, uint8_t * lit_code_size_table,
209*660f49b0SGreg Tucker 		       uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
210*660f49b0SGreg Tucker 		       uint32_t * packed_len_table, uint32_t * packed_dist_table)
211*660f49b0SGreg Tucker {
212*660f49b0SGreg Tucker 	fprintf(output_file, "struct isal_hufftables hufftables_default = {\n\n");
213*660f49b0SGreg Tucker 
214*660f49b0SGreg Tucker 	fprint_uint8_table(output_file, header, (bit_count + 7) / 8,
215*660f49b0SGreg Tucker 			   "\t.deflate_hdr = {", "\t},\n\n", "\t\t");
216*660f49b0SGreg Tucker 	fprintf(output_file, "\t.deflate_hdr_count = %d,\n", bit_count / 8);
217*660f49b0SGreg Tucker 	fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n", bit_count & 7);
218*660f49b0SGreg Tucker 
219*660f49b0SGreg Tucker 	fprint_uint32_table(output_file, packed_dist_table, SHORT_DIST_TABLE_SIZE,
220*660f49b0SGreg Tucker 			    "\t.dist_table = {", ",\n", "\t\t");
221*660f49b0SGreg Tucker 	fprint_uint32_table(output_file, &packed_dist_table[SHORT_DIST_TABLE_SIZE],
222*660f49b0SGreg Tucker 			    LONG_DIST_TABLE_SIZE - SHORT_DIST_TABLE_SIZE,
223*660f49b0SGreg Tucker 			    "#ifdef LONGER_HUFFTABLE",
224*660f49b0SGreg Tucker 			    "\n#endif /* LONGER_HUFFTABLE */\n\t},\n\n", "\t\t");
225*660f49b0SGreg Tucker 
226*660f49b0SGreg Tucker 	fprint_uint32_table(output_file, packed_len_table, LEN_TABLE_SIZE, "\t.len_table = {",
227*660f49b0SGreg Tucker 			    "\t},\n\n", "\t\t");
228*660f49b0SGreg Tucker 	fprint_uint16_table(output_file, lit_code_table, LIT_TABLE_SIZE, "\t.lit_table = {",
229*660f49b0SGreg Tucker 			    "\t},\n\n", "\t\t");
230*660f49b0SGreg Tucker 	fprint_uint8_table(output_file, lit_code_size_table, LIT_TABLE_SIZE,
231*660f49b0SGreg Tucker 			   "\t.lit_table_sizes = {", "\t},\n\n", "\t\t");
232*660f49b0SGreg Tucker 
233*660f49b0SGreg Tucker 	fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n");
234*660f49b0SGreg Tucker 	fprint_uint16_table(output_file, dcodes_code_table + SHORT_DCODE_OFFSET,
235*660f49b0SGreg Tucker 			    DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n",
236*660f49b0SGreg Tucker 			    "\t\t");
237*660f49b0SGreg Tucker 	fprint_uint8_table(output_file, dcodes_code_size_table + SHORT_DCODE_OFFSET,
238*660f49b0SGreg Tucker 			   DIST_LEN - SHORT_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
239*660f49b0SGreg Tucker 			   "\t\t");
240*660f49b0SGreg Tucker 	fprintf(output_file, "#else\n");
241*660f49b0SGreg Tucker 	fprint_uint16_table(output_file, dcodes_code_table + LONG_DCODE_OFFSET,
242*660f49b0SGreg Tucker 			    DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes = {", "\t},\n\n", "\t\t");
243*660f49b0SGreg Tucker 	fprint_uint8_table(output_file, dcodes_code_size_table + LONG_DCODE_OFFSET,
244*660f49b0SGreg Tucker 			   DIST_LEN - LONG_DCODE_OFFSET, "\t.dcodes_sizes = {", "\t}\n",
245*660f49b0SGreg Tucker 			   "\t\t");
246*660f49b0SGreg Tucker 	fprintf(output_file, "#endif\n");
247*660f49b0SGreg Tucker 	fprintf(output_file, "};\n");
248*660f49b0SGreg Tucker }
249*660f49b0SGreg Tucker 
250*660f49b0SGreg Tucker void fprint_header(FILE * output_file, uint8_t * header, uint32_t bit_count,
251*660f49b0SGreg Tucker 		   uint16_t * lit_code_table, uint8_t * lit_code_size_table,
252*660f49b0SGreg Tucker 		   uint16_t * dcodes_code_table, uint8_t * dcodes_code_size_table,
253*660f49b0SGreg Tucker 		   uint32_t * packed_len_table, uint32_t * packed_dist_table)
254*660f49b0SGreg Tucker {
255*660f49b0SGreg Tucker 	fprintf(output_file, "#include <stdint.h>\n");
256*660f49b0SGreg Tucker 	fprintf(output_file, "#include <igzip_lib.h>\n\n");
257*660f49b0SGreg Tucker 
258*660f49b0SGreg Tucker 	fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
259*660f49b0SGreg Tucker 		"\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
260*660f49b0SGreg Tucker 
261*660f49b0SGreg Tucker 	fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
262*660f49b0SGreg Tucker 	fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
263*660f49b0SGreg Tucker 
264*660f49b0SGreg Tucker 	fprint_hufftables(output_file, header, bit_count, lit_code_table, lit_code_size_table,
265*660f49b0SGreg Tucker 			  dcodes_code_table, dcodes_code_size_table, packed_len_table,
266*660f49b0SGreg Tucker 			  packed_dist_table);
267*660f49b0SGreg Tucker }
268*660f49b0SGreg Tucker 
269*660f49b0SGreg Tucker int main(int argc, char *argv[])
270*660f49b0SGreg Tucker {
271*660f49b0SGreg Tucker 	long int file_length;
272*660f49b0SGreg Tucker 	uint8_t *stream = NULL;
273*660f49b0SGreg Tucker 	struct isal_huff_histogram histogram;
274*660f49b0SGreg Tucker 	uint64_t *lit_histogram = histogram.lit_len_histogram;
275*660f49b0SGreg Tucker 	uint64_t *dist_histogram = histogram.dist_histogram;
276*660f49b0SGreg Tucker 	uint8_t header[MAX_HEADER_SIZE];
277*660f49b0SGreg Tucker 	FILE *file;
278*660f49b0SGreg Tucker 	struct huff_tree lit_tree, dist_tree;
279*660f49b0SGreg Tucker 	struct huff_tree lit_tree_array[2 * LIT_LEN - 1], dist_tree_array[2 * DIST_LEN - 1];
280*660f49b0SGreg Tucker 	struct huff_code lit_huff_table[LIT_LEN], dist_huff_table[DIST_LEN];
281*660f49b0SGreg Tucker 	uint64_t bit_count;
282*660f49b0SGreg Tucker 	uint32_t packed_len_table[LEN_TABLE_SIZE];
283*660f49b0SGreg Tucker 	uint32_t packed_dist_table[LONG_DIST_TABLE_SIZE];
284*660f49b0SGreg Tucker 	uint16_t lit_code_table[LIT_TABLE_SIZE];
285*660f49b0SGreg Tucker 	uint16_t dcodes_code_table[DIST_LEN];
286*660f49b0SGreg Tucker 	uint8_t lit_code_size_table[LIT_TABLE_SIZE];
287*660f49b0SGreg Tucker 	uint8_t dcodes_code_size_table[DIST_LEN];
288*660f49b0SGreg Tucker 	int max_dist = convert_dist_to_dist_sym(D);
289*660f49b0SGreg Tucker 
290*660f49b0SGreg Tucker 	if (argc == 1) {
291*660f49b0SGreg Tucker 		printf("Error, no input file.\n");
292*660f49b0SGreg Tucker 		return 1;
293*660f49b0SGreg Tucker 	}
294*660f49b0SGreg Tucker 
295*660f49b0SGreg Tucker 	memset(&histogram, 0, sizeof(histogram));	/* Initialize histograms. */
296*660f49b0SGreg Tucker 	memset(lit_tree_array, 0, sizeof(lit_tree_array));
297*660f49b0SGreg Tucker 	memset(dist_tree_array, 0, sizeof(dist_tree_array));
298*660f49b0SGreg Tucker 	memset(lit_huff_table, 0, sizeof(lit_huff_table));
299*660f49b0SGreg Tucker 	memset(dist_huff_table, 0, sizeof(dist_huff_table));
300*660f49b0SGreg Tucker 
301*660f49b0SGreg Tucker 	while (argc > 1) {
302*660f49b0SGreg Tucker 		printf("Processing %s\n", argv[argc - 1]);
303*660f49b0SGreg Tucker 		file = fopen(argv[argc - 1], "r");
304*660f49b0SGreg Tucker 		if (file == NULL) {
305*660f49b0SGreg Tucker 			printf("Error opening file\n");
306*660f49b0SGreg Tucker 			return 1;
307*660f49b0SGreg Tucker 		}
308*660f49b0SGreg Tucker 		fseek(file, 0, SEEK_END);
309*660f49b0SGreg Tucker 		file_length = ftell(file);
310*660f49b0SGreg Tucker 		fseek(file, 0, SEEK_SET);
311*660f49b0SGreg Tucker 		file_length -= ftell(file);
312*660f49b0SGreg Tucker 		stream = malloc(file_length);
313*660f49b0SGreg Tucker 		if (stream == NULL) {
314*660f49b0SGreg Tucker 			printf("Failed to allocate memory to read in file\n");
315*660f49b0SGreg Tucker 			fclose(file);
316*660f49b0SGreg Tucker 			return 1;
317*660f49b0SGreg Tucker 		}
318*660f49b0SGreg Tucker 		fread(stream, 1, file_length, file);
319*660f49b0SGreg Tucker 		if (ferror(file)) {
320*660f49b0SGreg Tucker 			printf("Error occurred when reading file");
321*660f49b0SGreg Tucker 			fclose(file);
322*660f49b0SGreg Tucker 			free(stream);
323*660f49b0SGreg Tucker 			return 1;
324*660f49b0SGreg Tucker 		}
325*660f49b0SGreg Tucker 
326*660f49b0SGreg Tucker 		/* Create a histogram of frequency of symbols found in stream to
327*660f49b0SGreg Tucker 		 * generate the huffman tree.*/
328*660f49b0SGreg Tucker 		isal_update_histogram(stream, file_length, &histogram);
329*660f49b0SGreg Tucker 
330*660f49b0SGreg Tucker 		fclose(file);
331*660f49b0SGreg Tucker 		free(stream);
332*660f49b0SGreg Tucker 		argc--;
333*660f49b0SGreg Tucker 	}
334*660f49b0SGreg Tucker 
335*660f49b0SGreg Tucker 	/* Create a huffman tree corresponding to the histograms created in
336*660f49b0SGreg Tucker 	 * gen_histogram*/
337*660f49b0SGreg Tucker #ifdef LIT_SUB
338*660f49b0SGreg Tucker 	int j;
339*660f49b0SGreg Tucker 	/* Guarantee every possible repeat length is given a symbol. It is hard
340*660f49b0SGreg Tucker 	 * to guarantee data will never have a repeat of a given length */
341*660f49b0SGreg Tucker 	for (j = LIT_TABLE_SIZE; j < LIT_LEN; j++)
342*660f49b0SGreg Tucker 		if (lit_histogram[j] == 0)
343*660f49b0SGreg Tucker 			lit_histogram[j]++;
344*660f49b0SGreg Tucker 
345*660f49b0SGreg Tucker 	lit_tree = create_symbol_subset_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
346*660f49b0SGreg Tucker #else
347*660f49b0SGreg Tucker 	lit_tree = create_huff_tree(lit_tree_array, lit_histogram, LIT_LEN);
348*660f49b0SGreg Tucker #endif
349*660f49b0SGreg Tucker 	dist_tree = create_huff_tree(dist_tree_array, dist_histogram, max_dist + 1);
350*660f49b0SGreg Tucker 
351*660f49b0SGreg Tucker 	/* Create a look up table to represent huffman tree above in deflate
352*660f49b0SGreg Tucker 	 * standard form after it has been modified to satisfy max depth
353*660f49b0SGreg Tucker 	 * criteria.*/
354*660f49b0SGreg Tucker 	if (create_huff_lookup(lit_huff_table, LIT_LEN, lit_tree, MAX_DEFLATE_CODE_LEN) > 0) {
355*660f49b0SGreg Tucker 		printf("Error, code with invalid length for Deflate standard.\n");
356*660f49b0SGreg Tucker 		return 1;
357*660f49b0SGreg Tucker 	}
358*660f49b0SGreg Tucker 
359*660f49b0SGreg Tucker 	if (create_huff_lookup(dist_huff_table, DIST_LEN, dist_tree, MAX_DEFLATE_CODE_LEN) > 0) {
360*660f49b0SGreg Tucker 		printf("Error, code with invalid length for Deflate standard.\n");
361*660f49b0SGreg Tucker 		return 1;
362*660f49b0SGreg Tucker 	}
363*660f49b0SGreg Tucker 
364*660f49b0SGreg Tucker 	if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
365*660f49b0SGreg Tucker 		if (create_huff_lookup
366*660f49b0SGreg Tucker 		    (lit_huff_table, LIT_LEN, lit_tree, MAX_SAFE_LIT_CODE_LEN) > 0)
367*660f49b0SGreg Tucker 			printf("Error, code with invalid length for Deflate standard.\n");
368*660f49b0SGreg Tucker 		return 1;
369*660f49b0SGreg Tucker 
370*660f49b0SGreg Tucker 		if (create_huff_lookup
371*660f49b0SGreg Tucker 		    (dist_huff_table, DIST_LEN, dist_tree, MAX_SAFE_DIST_CODE_LEN) > 0)
372*660f49b0SGreg Tucker 			printf("Error, code with invalid length for Deflate standard.\n");
373*660f49b0SGreg Tucker 		return 1;
374*660f49b0SGreg Tucker 
375*660f49b0SGreg Tucker 		if (are_hufftables_useable(lit_huff_table, dist_huff_table)) {
376*660f49b0SGreg Tucker 			printf("Error, hufftable is not usable\n");
377*660f49b0SGreg Tucker 			return 1;
378*660f49b0SGreg Tucker 		}
379*660f49b0SGreg Tucker 	}
380*660f49b0SGreg Tucker #ifdef PRINT_CODES
381*660f49b0SGreg Tucker 	int i;
382*660f49b0SGreg Tucker 	printf("Lit/Len codes\n");
383*660f49b0SGreg Tucker 	for (i = 0; i < LIT_TABLE_SIZE - 1; i++)
384*660f49b0SGreg Tucker 		printf("Lit %3d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
385*660f49b0SGreg Tucker 		       lit_huff_table[i].length);
386*660f49b0SGreg Tucker 
387*660f49b0SGreg Tucker 	printf("EOB %3d: Code 0x%04x, Code_Len %d\n", 256, lit_huff_table[256].code,
388*660f49b0SGreg Tucker 	       lit_huff_table[256].length);
389*660f49b0SGreg Tucker 
390*660f49b0SGreg Tucker 	for (i = LIT_TABLE_SIZE; i < LIT_LEN; i++)
391*660f49b0SGreg Tucker 		printf("Len %d: Code 0x%04x, Code_Len %d\n", i, lit_huff_table[i].code,
392*660f49b0SGreg Tucker 		       lit_huff_table[i].length);
393*660f49b0SGreg Tucker 	printf("\n");
394*660f49b0SGreg Tucker 
395*660f49b0SGreg Tucker 	printf("Dist codes \n");
396*660f49b0SGreg Tucker 	for (i = 0; i < DIST_LEN; i++)
397*660f49b0SGreg Tucker 		printf("Dist %2d: Code 0x%04x, Code_Len %d\n", i, dist_huff_table[i].code,
398*660f49b0SGreg Tucker 		       dist_huff_table[i].length);
399*660f49b0SGreg Tucker 	printf("\n");
400*660f49b0SGreg Tucker #endif
401*660f49b0SGreg Tucker 
402*660f49b0SGreg Tucker 	create_code_tables(lit_code_table, lit_code_size_table, LIT_TABLE_SIZE,
403*660f49b0SGreg Tucker 			   lit_huff_table);
404*660f49b0SGreg Tucker 	create_code_tables(dcodes_code_table, dcodes_code_size_table, DIST_LEN,
405*660f49b0SGreg Tucker 			   dist_huff_table);
406*660f49b0SGreg Tucker 	create_packed_len_table(packed_len_table, lit_huff_table);
407*660f49b0SGreg Tucker 	create_packed_dist_table(packed_dist_table, LONG_DIST_TABLE_SIZE, dist_huff_table);
408*660f49b0SGreg Tucker 
409*660f49b0SGreg Tucker 	bit_count =
410*660f49b0SGreg Tucker 	    create_header(header, sizeof(header), lit_huff_table, dist_huff_table, LAST_BLOCK);
411*660f49b0SGreg Tucker 
412*660f49b0SGreg Tucker 	file = fopen("hufftables_c.c", "w");
413*660f49b0SGreg Tucker 	if (file == NULL) {
414*660f49b0SGreg Tucker 		printf("Error creating file hufftables_c.c\n");
415*660f49b0SGreg Tucker 		return 1;
416*660f49b0SGreg Tucker 	}
417*660f49b0SGreg Tucker 
418*660f49b0SGreg Tucker 	fprint_header(file, header, bit_count, lit_code_table, lit_code_size_table,
419*660f49b0SGreg Tucker 		      dcodes_code_table, dcodes_code_size_table, packed_len_table,
420*660f49b0SGreg Tucker 		      packed_dist_table);
421*660f49b0SGreg Tucker 
422*660f49b0SGreg Tucker 	fclose(file);
423*660f49b0SGreg Tucker 
424*660f49b0SGreg Tucker 	return 0;
425*660f49b0SGreg Tucker }
426