xref: /isa-l/igzip/generate_custom_hufftables.c (revision 55fbfabfc60f1002bc8133b730a59f6abd22cfce)
1660f49b0SGreg Tucker /**********************************************************************
2660f49b0SGreg Tucker   Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3660f49b0SGreg Tucker 
4660f49b0SGreg Tucker   Redistribution and use in source and binary forms, with or without
5660f49b0SGreg Tucker   modification, are permitted provided that the following conditions
6660f49b0SGreg Tucker   are met:
7660f49b0SGreg Tucker     * Redistributions of source code must retain the above copyright
8660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer.
9660f49b0SGreg Tucker     * Redistributions in binary form must reproduce the above copyright
10660f49b0SGreg Tucker       notice, this list of conditions and the following disclaimer in
11660f49b0SGreg Tucker       the documentation and/or other materials provided with the
12660f49b0SGreg Tucker       distribution.
13660f49b0SGreg Tucker     * Neither the name of Intel Corporation nor the names of its
14660f49b0SGreg Tucker       contributors may be used to endorse or promote products derived
15660f49b0SGreg Tucker       from this software without specific prior written permission.
16660f49b0SGreg Tucker 
17660f49b0SGreg Tucker   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18660f49b0SGreg Tucker   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19660f49b0SGreg Tucker   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20660f49b0SGreg Tucker   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21660f49b0SGreg Tucker   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22660f49b0SGreg Tucker   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23660f49b0SGreg Tucker   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24660f49b0SGreg Tucker   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25660f49b0SGreg Tucker   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26660f49b0SGreg Tucker   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27660f49b0SGreg Tucker   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28660f49b0SGreg Tucker **********************************************************************/
29660f49b0SGreg Tucker 
30660f49b0SGreg Tucker /* This program can be used to generate custom a custom huffman encoding to get
31660f49b0SGreg Tucker  * better data compression. This is most useful when the type of data being
32660f49b0SGreg Tucker  * compressed is well known.
33660f49b0SGreg Tucker  *
34660f49b0SGreg Tucker  * To use generate_custom_hufftables, pass a sequence of files to the program
35660f49b0SGreg Tucker  * that together form an accurate representation of the data that is being
36660f49b0SGreg Tucker  * compressed. Generate_custom_hufftables will then produce the file
37660f49b0SGreg Tucker  * hufftables_c.c, which should be moved to replace its counterpart in the igzip
38660f49b0SGreg Tucker  * source folder. After recompiling the Isa-l library, the igzip compression
39660f49b0SGreg Tucker  * functions will use the new hufftables.
40660f49b0SGreg Tucker  *
41660f49b0SGreg Tucker  * Generate_custom_hufftables should be compiled with the same compile time
42660f49b0SGreg Tucker  * parameters as the igzip source code. Generating custom hufftables with
43660f49b0SGreg Tucker  * different compile time parameters may cause igzip to produce invalid output
44660f49b0SGreg Tucker  * for the reasons described below. The default parameters used by
45660f49b0SGreg Tucker  * generate_custom_hufftables are the same as the default parameters used by
46660f49b0SGreg Tucker  * igzip.
47660f49b0SGreg Tucker  *
4888f95d85SRoy Oursler  * *WARNING* generate custom hufftables must be compiled with a IGZIP_HIST_SIZE
4988f95d85SRoy Oursler  * that is at least as large as the IGZIP_HIST_SIZE used by igzip. By default
5091fef2d3SRoy Oursler  * IGZIP_HIST_SIZE is 32K, the maximum usable IGZIP_HIST_SIZE is 32K. The reason
5188f95d85SRoy Oursler  * for this is to generate better compression. Igzip cannot produce look back
5288f95d85SRoy Oursler  * distances with sizes larger than the IGZIP_HIST_SIZE igzip was compiled with,
5388f95d85SRoy Oursler  * so look back distances with sizes larger than IGZIP_HIST_SIZE are not
5491fef2d3SRoy Oursler  * assigned a huffman code. The definition of LONGER_HUFFTABLES must be
5591fef2d3SRoy Oursler  * consistent as well since that definition changes the size of the structures
5691fef2d3SRoy Oursler  * printed by this tool.
57660f49b0SGreg Tucker  *
58660f49b0SGreg Tucker  */
59660f49b0SGreg Tucker 
60660f49b0SGreg Tucker #include <stdint.h>
61660f49b0SGreg Tucker #include <stdio.h>
62660f49b0SGreg Tucker #include <inttypes.h>
6391fef2d3SRoy Oursler #include <string.h>
6491fef2d3SRoy Oursler #include <stdlib.h>
6591fef2d3SRoy Oursler #include "igzip_lib.h"
66660f49b0SGreg Tucker 
679968e7a0SGreg Tucker #include "huff_codes.h"
689968e7a0SGreg Tucker #include "huffman.h"
699968e7a0SGreg Tucker 
70660f49b0SGreg Tucker /*These max code lengths are limited by how the data is stored in
71660f49b0SGreg Tucker  * hufftables.asm. The deflate standard max is 15.*/
72660f49b0SGreg Tucker 
7388f95d85SRoy Oursler #define MAX_HEADER_SIZE ISAL_DEF_MAX_HDR_SIZE
74660f49b0SGreg Tucker 
75660f49b0SGreg Tucker #define GZIP_HEADER_SIZE  10
76660f49b0SGreg Tucker #define GZIP_TRAILER_SIZE 8
7742591691SRoy Oursler #define ZLIB_HEADER_SIZE  2
7842591691SRoy Oursler #define ZLIB_TRAILER_SIZE 4
79660f49b0SGreg Tucker 
80660f49b0SGreg Tucker /**
81660f49b0SGreg Tucker  * @brief Prints a table of uint8_t elements to a file.
82660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
83660f49b0SGreg Tucker  * @param table: the table to be printed.
84660f49b0SGreg Tucker  * @param length: number of elements to be printed.
85660f49b0SGreg Tucker  * @param header: header to append in front of the table.
86660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
87660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
88660f49b0SGreg Tucker  */
89*55fbfabfSMarcel Cornu void
fprint_uint8_table(FILE * outfile,uint8_t * table,uint64_t length,char * header,char * footer,char * begin_line)90*55fbfabfSMarcel Cornu fprint_uint8_table(FILE *outfile, uint8_t *table, uint64_t length, char *header, char *footer,
91*55fbfabfSMarcel Cornu                    char *begin_line)
92660f49b0SGreg Tucker {
93660f49b0SGreg Tucker         int i;
94660f49b0SGreg Tucker         fprintf(outfile, "%s", header);
95660f49b0SGreg Tucker         for (i = 0; i < length - 1; i++) {
96660f49b0SGreg Tucker                 if ((i & 7) == 0)
97660f49b0SGreg Tucker                         fprintf(outfile, "\n%s", begin_line);
98660f49b0SGreg Tucker                 else
99660f49b0SGreg Tucker                         fprintf(outfile, " ");
100660f49b0SGreg Tucker                 fprintf(outfile, "0x%02x,", table[i]);
101660f49b0SGreg Tucker         }
102660f49b0SGreg Tucker 
103660f49b0SGreg Tucker         if ((i & 7) == 0)
104660f49b0SGreg Tucker                 fprintf(outfile, "\n%s", begin_line);
105660f49b0SGreg Tucker         else
106660f49b0SGreg Tucker                 fprintf(outfile, " ");
107660f49b0SGreg Tucker         fprintf(outfile, "0x%02x", table[i]);
108660f49b0SGreg Tucker         fprintf(outfile, "%s", footer);
109660f49b0SGreg Tucker }
110660f49b0SGreg Tucker 
111660f49b0SGreg Tucker /**
112660f49b0SGreg Tucker  * @brief Prints a table of uint16_t elements to a file.
113660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
114660f49b0SGreg Tucker  * @param table: the table to be printed.
115660f49b0SGreg Tucker  * @param length: number of elements to be printed.
116660f49b0SGreg Tucker  * @param header: header to append in front of the table.
117660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
118660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
119660f49b0SGreg Tucker  */
120*55fbfabfSMarcel Cornu void
fprint_uint16_table(FILE * outfile,uint16_t * table,uint64_t length,char * header,char * footer,char * begin_line)121*55fbfabfSMarcel Cornu fprint_uint16_table(FILE *outfile, uint16_t *table, uint64_t length, char *header, char *footer,
122*55fbfabfSMarcel Cornu                     char *begin_line)
123660f49b0SGreg Tucker {
124660f49b0SGreg Tucker         int i;
125660f49b0SGreg Tucker         fprintf(outfile, "%s", header);
126660f49b0SGreg Tucker         for (i = 0; i < length - 1; i++) {
127660f49b0SGreg Tucker                 if ((i & 7) == 0)
128660f49b0SGreg Tucker                         fprintf(outfile, "\n%s", begin_line);
129660f49b0SGreg Tucker                 else
130660f49b0SGreg Tucker                         fprintf(outfile, " ");
131660f49b0SGreg Tucker                 fprintf(outfile, "0x%04x,", table[i]);
132660f49b0SGreg Tucker         }
133660f49b0SGreg Tucker 
134660f49b0SGreg Tucker         if ((i & 7) == 0)
135660f49b0SGreg Tucker                 fprintf(outfile, "\n%s", begin_line);
136660f49b0SGreg Tucker         else
137660f49b0SGreg Tucker                 fprintf(outfile, " ");
138660f49b0SGreg Tucker         fprintf(outfile, "0x%04x", table[i]);
139660f49b0SGreg Tucker         fprintf(outfile, "%s", footer);
140660f49b0SGreg Tucker }
141660f49b0SGreg Tucker 
142660f49b0SGreg Tucker /**
143660f49b0SGreg Tucker  * @brief Prints a table of uint32_t elements to a file.
144660f49b0SGreg Tucker  * @param outfile: the file the table is printed to.
145660f49b0SGreg Tucker  * @param table: the table to be printed.
146660f49b0SGreg Tucker  * @param length: number of elements to be printed.
147660f49b0SGreg Tucker  * @param header: header to append in front of the table.
148660f49b0SGreg Tucker  * @param footer: footer to append at the end of the table.
149660f49b0SGreg Tucker  * @param begin_line: string printed at beginning of new line
150660f49b0SGreg Tucker  */
151*55fbfabfSMarcel Cornu void
fprint_uint32_table(FILE * outfile,uint32_t * table,uint64_t length,char * header,char * footer,char * begin_line)152*55fbfabfSMarcel Cornu fprint_uint32_table(FILE *outfile, uint32_t *table, uint64_t length, char *header, char *footer,
153*55fbfabfSMarcel Cornu                     char *begin_line)
154660f49b0SGreg Tucker {
155660f49b0SGreg Tucker         int i;
156660f49b0SGreg Tucker         fprintf(outfile, "%s", header);
157660f49b0SGreg Tucker         for (i = 0; i < length - 1; i++) {
158660f49b0SGreg Tucker                 if ((i & 3) == 0)
159660f49b0SGreg Tucker                         fprintf(outfile, "\n%s", begin_line);
160660f49b0SGreg Tucker                 else
161660f49b0SGreg Tucker                         fprintf(outfile, " ");
162660f49b0SGreg Tucker                 fprintf(outfile, "0x%08x,", table[i]);
163660f49b0SGreg Tucker         }
164660f49b0SGreg Tucker 
165660f49b0SGreg Tucker         if ((i & 3) == 0)
166660f49b0SGreg Tucker                 fprintf(outfile, "%s", begin_line);
167660f49b0SGreg Tucker         else
168660f49b0SGreg Tucker                 fprintf(outfile, " ");
169660f49b0SGreg Tucker         fprintf(outfile, "0x%08x", table[i]);
170660f49b0SGreg Tucker         fprintf(outfile, "%s", footer);
171660f49b0SGreg Tucker }
172660f49b0SGreg Tucker 
173*55fbfabfSMarcel Cornu void
fprint_hufftables(FILE * output_file,char * hufftables_name,struct isal_hufftables * hufftables)174*55fbfabfSMarcel Cornu fprint_hufftables(FILE *output_file, char *hufftables_name, struct isal_hufftables *hufftables)
175660f49b0SGreg Tucker {
1767fefb539SRoy Oursler         fprintf(output_file, "struct isal_hufftables %s = {\n\n", hufftables_name);
177660f49b0SGreg Tucker 
17891fef2d3SRoy Oursler         fprint_uint8_table(output_file, hufftables->deflate_hdr,
17991fef2d3SRoy Oursler                            hufftables->deflate_hdr_count +
18091fef2d3SRoy Oursler                                    (hufftables->deflate_hdr_extra_bits + 7) / 8,
18191fef2d3SRoy Oursler                            "\t.deflate_hdr = {", "},\n\n", "\t\t");
182660f49b0SGreg Tucker 
18391fef2d3SRoy Oursler         fprintf(output_file, "\t.deflate_hdr_count = %d,\n", hufftables->deflate_hdr_count);
18491fef2d3SRoy Oursler         fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n",
18591fef2d3SRoy Oursler                 hufftables->deflate_hdr_extra_bits);
186660f49b0SGreg Tucker 
18791fef2d3SRoy Oursler         fprint_uint32_table(output_file, hufftables->dist_table, IGZIP_DIST_TABLE_SIZE,
18891fef2d3SRoy Oursler                             "\t.dist_table = {", "},\n\n", "\t\t");
189660f49b0SGreg Tucker 
19091fef2d3SRoy Oursler         fprint_uint32_table(output_file, hufftables->len_table, IGZIP_LEN_TABLE_SIZE,
19191fef2d3SRoy Oursler                             "\t.len_table = {", "},\n\n", "\t\t");
19291fef2d3SRoy Oursler 
19391fef2d3SRoy Oursler         fprint_uint16_table(output_file, hufftables->lit_table, IGZIP_LIT_TABLE_SIZE,
19491fef2d3SRoy Oursler                             "\t.lit_table = {", "},\n\n", "\t\t");
19591fef2d3SRoy Oursler         fprint_uint8_table(output_file, hufftables->lit_table_sizes, IGZIP_LIT_TABLE_SIZE,
19691fef2d3SRoy Oursler                            "\t.lit_table_sizes = {", "},\n\n", "\t\t");
19791fef2d3SRoy Oursler 
19891fef2d3SRoy Oursler         fprint_uint16_table(output_file, hufftables->dcodes,
199*55fbfabfSMarcel Cornu                             ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET, "\t.dcodes = {", "},\n\n",
200*55fbfabfSMarcel Cornu                             "\t\t");
20191fef2d3SRoy Oursler         fprint_uint8_table(output_file, hufftables->dcodes_sizes,
202*55fbfabfSMarcel Cornu                            ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET, "\t.dcodes_sizes = {",
203*55fbfabfSMarcel Cornu                            "}\n", "\t\t");
204660f49b0SGreg Tucker         fprintf(output_file, "};\n");
205660f49b0SGreg Tucker }
206660f49b0SGreg Tucker 
207*55fbfabfSMarcel Cornu void
fprint_header(FILE * output_file)208*55fbfabfSMarcel Cornu fprint_header(FILE *output_file)
209660f49b0SGreg Tucker {
21091fef2d3SRoy Oursler 
211660f49b0SGreg Tucker         fprintf(output_file, "#include <stdint.h>\n");
212660f49b0SGreg Tucker         fprintf(output_file, "#include <igzip_lib.h>\n\n");
213660f49b0SGreg Tucker 
214*55fbfabfSMarcel Cornu         fprintf(output_file,
215*55fbfabfSMarcel Cornu                 "#if IGZIP_HIST_SIZE > %d\n"
21691fef2d3SRoy Oursler                 "# error \"Invalid history size for the custom hufftable\"\n"
217*55fbfabfSMarcel Cornu                 "#endif\n",
218*55fbfabfSMarcel Cornu                 IGZIP_HIST_SIZE);
21991fef2d3SRoy Oursler 
22091fef2d3SRoy Oursler #ifdef LONGER_HUFFTABLE
221*55fbfabfSMarcel Cornu         fprintf(output_file,
222*55fbfabfSMarcel Cornu                 "#ifndef LONGER_HUFFTABLE\n"
22391fef2d3SRoy Oursler                 "# error \"Custom hufftable requires LONGER_HUFFTABLE to be defined \"\n"
22491fef2d3SRoy Oursler                 "#endif\n");
22591fef2d3SRoy Oursler #else
226*55fbfabfSMarcel Cornu         fprintf(output_file,
227*55fbfabfSMarcel Cornu                 "#ifdef LONGER_HUFFTABLE\n"
22891fef2d3SRoy Oursler                 "# error \"Custom hufftable requires LONGER_HUFFTABLE to not be defined \"\n"
22991fef2d3SRoy Oursler                 "#endif\n");
23091fef2d3SRoy Oursler #endif
23191fef2d3SRoy Oursler         fprintf(output_file, "\n");
23291fef2d3SRoy Oursler 
233660f49b0SGreg Tucker         fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
234*55fbfabfSMarcel Cornu                              "\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n"
235*55fbfabfSMarcel Cornu                              "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
236660f49b0SGreg Tucker 
237660f49b0SGreg Tucker         fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
23842591691SRoy Oursler         fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
23942591691SRoy Oursler 
24042591691SRoy Oursler         fprintf(output_file, "const uint8_t zlib_hdr[] = { 0x78, 0x01 };\n\n");
24142591691SRoy Oursler         fprintf(output_file, "const uint32_t zlib_hdr_bytes = %d;\n", ZLIB_HEADER_SIZE);
24242591691SRoy Oursler         fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE);
243660f49b0SGreg Tucker }
244660f49b0SGreg Tucker 
245*55fbfabfSMarcel Cornu static uint32_t
convert_dist_to_dist_sym(uint32_t dist)246*55fbfabfSMarcel Cornu convert_dist_to_dist_sym(uint32_t dist)
2479968e7a0SGreg Tucker {
2489968e7a0SGreg Tucker         assert(dist <= 32768 && dist > 0);
2499968e7a0SGreg Tucker         if (dist <= 32768) {
2509968e7a0SGreg Tucker                 uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0;
2519968e7a0SGreg Tucker                 return (msb * 2) + ((dist - 1) >> msb);
2529968e7a0SGreg Tucker         } else {
2539968e7a0SGreg Tucker                 return ~0;
2549968e7a0SGreg Tucker         }
2559968e7a0SGreg Tucker }
2569968e7a0SGreg Tucker 
2579968e7a0SGreg Tucker /**
2589968e7a0SGreg Tucker  * @brief  Returns the deflate symbol value for a repeat length.
2599968e7a0SGreg Tucker  */
260*55fbfabfSMarcel Cornu static uint32_t
convert_length_to_len_sym(uint32_t length)261*55fbfabfSMarcel Cornu convert_length_to_len_sym(uint32_t length)
2629968e7a0SGreg Tucker {
2639968e7a0SGreg Tucker         assert(length > 2 && length < 259);
2649968e7a0SGreg Tucker 
2659968e7a0SGreg Tucker         /* Based on tables on page 11 in RFC 1951 */
2669968e7a0SGreg Tucker         if (length < 11)
2679968e7a0SGreg Tucker                 return 257 + length - 3;
2689968e7a0SGreg Tucker         else if (length < 19)
2699968e7a0SGreg Tucker                 return 261 + (length - 3) / 2;
2709968e7a0SGreg Tucker         else if (length < 35)
2719968e7a0SGreg Tucker                 return 265 + (length - 3) / 4;
2729968e7a0SGreg Tucker         else if (length < 67)
2739968e7a0SGreg Tucker                 return 269 + (length - 3) / 8;
2749968e7a0SGreg Tucker         else if (length < 131)
2759968e7a0SGreg Tucker                 return 273 + (length - 3) / 16;
2769968e7a0SGreg Tucker         else if (length < 258)
2779968e7a0SGreg Tucker                 return 277 + (length - 3) / 32;
2789968e7a0SGreg Tucker         else
2799968e7a0SGreg Tucker                 return 285;
2809968e7a0SGreg Tucker }
2819968e7a0SGreg Tucker 
282*55fbfabfSMarcel Cornu void
isal_update_histogram_dict(uint8_t * start_stream,int dict_length,int length,struct isal_huff_histogram * histogram)283*55fbfabfSMarcel Cornu isal_update_histogram_dict(uint8_t *start_stream, int dict_length, int length,
2849968e7a0SGreg Tucker                            struct isal_huff_histogram *histogram)
2859968e7a0SGreg Tucker {
2869968e7a0SGreg Tucker         uint32_t literal = 0, hash;
2879968e7a0SGreg Tucker         uint16_t seen, *last_seen = histogram->hash_table;
2889968e7a0SGreg Tucker         uint8_t *current, *end_stream, *next_hash, *end, *end_dict;
2899968e7a0SGreg Tucker         uint32_t match_length;
2909968e7a0SGreg Tucker         uint32_t dist;
2919968e7a0SGreg Tucker         uint64_t *lit_len_histogram = histogram->lit_len_histogram;
2929968e7a0SGreg Tucker         uint64_t *dist_histogram = histogram->dist_histogram;
2939968e7a0SGreg Tucker 
2949968e7a0SGreg Tucker         if (length <= 0)
2959968e7a0SGreg Tucker                 return;
2969968e7a0SGreg Tucker 
2979968e7a0SGreg Tucker         end_stream = start_stream + dict_length + length;
2989968e7a0SGreg Tucker         end_dict = start_stream + dict_length;
2999968e7a0SGreg Tucker 
3009968e7a0SGreg Tucker         memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
3019968e7a0SGreg Tucker 
3029968e7a0SGreg Tucker         for (current = start_stream; current < end_dict - 4; current++) {
303d3cfb2fbSIlya Leoshkevich                 literal = load_le_u32(current);
3049968e7a0SGreg Tucker                 hash = compute_hash(literal) & LVL0_HASH_MASK;
3059968e7a0SGreg Tucker                 last_seen[hash] = (current - start_stream) & 0xFFFF;
3069968e7a0SGreg Tucker         }
3079968e7a0SGreg Tucker 
3089968e7a0SGreg Tucker         for (current = start_stream + dict_length; current < end_stream - 3; current++) {
309d3cfb2fbSIlya Leoshkevich                 literal = load_le_u32(current);
3109968e7a0SGreg Tucker                 hash = compute_hash(literal) & LVL0_HASH_MASK;
3119968e7a0SGreg Tucker                 seen = last_seen[hash];
3129968e7a0SGreg Tucker                 last_seen[hash] = (current - start_stream) & 0xFFFF;
3139968e7a0SGreg Tucker                 dist = (current - start_stream - seen) & 0xFFFF;
3149968e7a0SGreg Tucker                 if (dist - 1 < D - 1) {
3159968e7a0SGreg Tucker                         assert(start_stream <= current - dist);
316*55fbfabfSMarcel Cornu                         match_length = compare258(current - dist, current, end_stream - current);
3179968e7a0SGreg Tucker                         if (match_length >= SHORTEST_MATCH) {
3189968e7a0SGreg Tucker                                 next_hash = current;
3199968e7a0SGreg Tucker #ifdef ISAL_LIMIT_HASH_UPDATE
3209968e7a0SGreg Tucker                                 end = next_hash + 3;
3219968e7a0SGreg Tucker #else
3229968e7a0SGreg Tucker                                 end = next_hash + match_length;
3239968e7a0SGreg Tucker #endif
3249968e7a0SGreg Tucker                                 if (end > end_stream - 3)
3259968e7a0SGreg Tucker                                         end = end_stream - 3;
3269968e7a0SGreg Tucker                                 next_hash++;
3279968e7a0SGreg Tucker                                 for (; next_hash < end; next_hash++) {
328d3cfb2fbSIlya Leoshkevich                                         literal = load_le_u32(next_hash);
3299968e7a0SGreg Tucker                                         hash = compute_hash(literal) & LVL0_HASH_MASK;
3309968e7a0SGreg Tucker                                         last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
3319968e7a0SGreg Tucker                                 }
3329968e7a0SGreg Tucker 
3339968e7a0SGreg Tucker                                 dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
334*55fbfabfSMarcel Cornu                                 lit_len_histogram[convert_length_to_len_sym(match_length)] += 1;
3359968e7a0SGreg Tucker                                 current += match_length - 1;
3369968e7a0SGreg Tucker                                 continue;
3379968e7a0SGreg Tucker                         }
3389968e7a0SGreg Tucker                 }
3399968e7a0SGreg Tucker                 lit_len_histogram[literal & 0xFF] += 1;
3409968e7a0SGreg Tucker         }
3419968e7a0SGreg Tucker 
3429968e7a0SGreg Tucker         for (; current < end_stream; current++)
3439968e7a0SGreg Tucker                 lit_len_histogram[*current] += 1;
3449968e7a0SGreg Tucker 
3459968e7a0SGreg Tucker         lit_len_histogram[256] += 1;
3469968e7a0SGreg Tucker         return;
3479968e7a0SGreg Tucker }
3489968e7a0SGreg Tucker 
349*55fbfabfSMarcel Cornu int
main(int argc,char * argv[])350*55fbfabfSMarcel Cornu main(int argc, char *argv[])
351660f49b0SGreg Tucker {
352660f49b0SGreg Tucker         long int file_length;
3539968e7a0SGreg Tucker         int argi = 1;
354660f49b0SGreg Tucker         uint8_t *stream = NULL;
35591fef2d3SRoy Oursler         struct isal_hufftables hufftables;
356660f49b0SGreg Tucker         struct isal_huff_histogram histogram;
35791fef2d3SRoy Oursler         struct isal_zstream tmp_stream;
3589968e7a0SGreg Tucker         FILE *file = NULL;
3599968e7a0SGreg Tucker         FILE *dict_file = NULL;
360438ecd81SGreg Tucker         FILE *hist_file = NULL;
3619968e7a0SGreg Tucker         long int dict_file_length = 0;
362438ecd81SGreg Tucker         long int hist_file_length = 0;
3639968e7a0SGreg Tucker         uint8_t *dict_stream = NULL;
364660f49b0SGreg Tucker 
365660f49b0SGreg Tucker         if (argc == 1) {
366660f49b0SGreg Tucker                 printf("Error, no input file.\n");
367660f49b0SGreg Tucker                 return 1;
368660f49b0SGreg Tucker         }
369660f49b0SGreg Tucker 
3709968e7a0SGreg Tucker         if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') {
3719968e7a0SGreg Tucker                 dict_file = fopen(argv[2], "r");
3725a00eaecSTomasz Kantecki                 if (dict_file == NULL) {
3735a00eaecSTomasz Kantecki                         printf("File \"%s\" open error!\n", argv[2]);
3745a00eaecSTomasz Kantecki                         return 1;
3755a00eaecSTomasz Kantecki                 }
3769968e7a0SGreg Tucker 
3779968e7a0SGreg Tucker                 fseek(dict_file, 0, SEEK_END);
3789968e7a0SGreg Tucker                 dict_file_length = ftell(dict_file);
3799968e7a0SGreg Tucker                 fseek(dict_file, 0, SEEK_SET);
3809968e7a0SGreg Tucker                 dict_file_length -= ftell(dict_file);
3819968e7a0SGreg Tucker                 dict_stream = malloc(dict_file_length);
3829968e7a0SGreg Tucker                 if (dict_stream == NULL) {
3839968e7a0SGreg Tucker                         printf("Failed to allocate memory to read in dictionary file\n");
3849968e7a0SGreg Tucker                         fclose(dict_file);
3859968e7a0SGreg Tucker                         return 1;
3869968e7a0SGreg Tucker                 }
3879968e7a0SGreg Tucker                 if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) {
3889968e7a0SGreg Tucker                         printf("Error occurred when reading dictionary file");
3899968e7a0SGreg Tucker                         fclose(dict_file);
3909968e7a0SGreg Tucker                         free(dict_stream);
3919968e7a0SGreg Tucker                         return 1;
3929968e7a0SGreg Tucker                 }
3939968e7a0SGreg Tucker                 isal_update_histogram(dict_stream, dict_file_length, &histogram);
3949968e7a0SGreg Tucker 
3959968e7a0SGreg Tucker                 printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]);
3969968e7a0SGreg Tucker                 argi += 2;
3979968e7a0SGreg Tucker                 fclose(dict_file);
3989968e7a0SGreg Tucker                 free(dict_stream);
3999968e7a0SGreg Tucker         }
4009968e7a0SGreg Tucker 
401438ecd81SGreg Tucker         if ((argc > argi + 1) && argv[argi][0] == '-' && argv[argi][1] == 'h') {
402438ecd81SGreg Tucker                 hist_file = fopen(argv[argi + 1], "r+");
4035a00eaecSTomasz Kantecki                 if (hist_file == NULL) {
4045a00eaecSTomasz Kantecki                         printf("File \"%s\" open error!\n", argv[argi + 1]);
4055a00eaecSTomasz Kantecki                         return 1;
4065a00eaecSTomasz Kantecki                 }
407438ecd81SGreg Tucker                 fseek(hist_file, 0, SEEK_END);
408438ecd81SGreg Tucker                 hist_file_length = ftell(hist_file);
409438ecd81SGreg Tucker                 fseek(hist_file, 0, SEEK_SET);
410438ecd81SGreg Tucker                 hist_file_length -= ftell(hist_file);
411438ecd81SGreg Tucker                 if (hist_file_length > sizeof(histogram)) {
412438ecd81SGreg Tucker                         printf("Histogram file too long\n");
413438ecd81SGreg Tucker                         return 1;
414438ecd81SGreg Tucker                 }
415438ecd81SGreg Tucker                 if (fread(&histogram, 1, hist_file_length, hist_file) != hist_file_length) {
416438ecd81SGreg Tucker                         printf("Error occurred when reading history file");
417438ecd81SGreg Tucker                         fclose(hist_file);
418438ecd81SGreg Tucker                         return 1;
419438ecd81SGreg Tucker                 }
420438ecd81SGreg Tucker                 fseek(hist_file, 0, SEEK_SET);
421438ecd81SGreg Tucker 
422*55fbfabfSMarcel Cornu                 printf("Read %ld bytes of history file %s\n", hist_file_length, argv[argi + 1]);
423438ecd81SGreg Tucker                 argi += 2;
424438ecd81SGreg Tucker         } else
425660f49b0SGreg Tucker                 memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
426660f49b0SGreg Tucker 
4279968e7a0SGreg Tucker         while (argi < argc) {
4289968e7a0SGreg Tucker                 printf("Processing %s\n", argv[argi]);
4299968e7a0SGreg Tucker                 file = fopen(argv[argi], "r");
430660f49b0SGreg Tucker                 if (file == NULL) {
431660f49b0SGreg Tucker                         printf("Error opening file\n");
432660f49b0SGreg Tucker                         return 1;
433660f49b0SGreg Tucker                 }
434660f49b0SGreg Tucker                 fseek(file, 0, SEEK_END);
435660f49b0SGreg Tucker                 file_length = ftell(file);
436660f49b0SGreg Tucker                 fseek(file, 0, SEEK_SET);
437660f49b0SGreg Tucker                 file_length -= ftell(file);
4389968e7a0SGreg Tucker                 stream = malloc(file_length + dict_file_length);
439660f49b0SGreg Tucker                 if (stream == NULL) {
440660f49b0SGreg Tucker                         printf("Failed to allocate memory to read in file\n");
441660f49b0SGreg Tucker                         fclose(file);
442660f49b0SGreg Tucker                         return 1;
443660f49b0SGreg Tucker                 }
4449968e7a0SGreg Tucker                 if (dict_file_length > 0)
4459968e7a0SGreg Tucker                         memcpy(stream, dict_stream, dict_file_length);
4469968e7a0SGreg Tucker 
4479968e7a0SGreg Tucker                 if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) {
448660f49b0SGreg Tucker                         printf("Error occurred when reading file");
449660f49b0SGreg Tucker                         fclose(file);
450660f49b0SGreg Tucker                         free(stream);
451660f49b0SGreg Tucker                         return 1;
452660f49b0SGreg Tucker                 }
453660f49b0SGreg Tucker 
454660f49b0SGreg Tucker                 /* Create a histogram of frequency of symbols found in stream to
455660f49b0SGreg Tucker                  * generate the huffman tree.*/
4569968e7a0SGreg Tucker                 if (0 == dict_file_length)
457660f49b0SGreg Tucker                         isal_update_histogram(stream, file_length, &histogram);
4589968e7a0SGreg Tucker                 else
4599968e7a0SGreg Tucker                         isal_update_histogram_dict(stream, dict_file_length, file_length,
4609968e7a0SGreg Tucker                                                    &histogram);
461660f49b0SGreg Tucker 
462660f49b0SGreg Tucker                 fclose(file);
463660f49b0SGreg Tucker                 free(stream);
4649968e7a0SGreg Tucker                 argi++;
465660f49b0SGreg Tucker         }
466660f49b0SGreg Tucker 
46791fef2d3SRoy Oursler         isal_create_hufftables(&hufftables, &histogram);
468660f49b0SGreg Tucker 
469660f49b0SGreg Tucker         file = fopen("hufftables_c.c", "w");
470660f49b0SGreg Tucker         if (file == NULL) {
471660f49b0SGreg Tucker                 printf("Error creating file hufftables_c.c\n");
472660f49b0SGreg Tucker                 return 1;
473660f49b0SGreg Tucker         }
474660f49b0SGreg Tucker 
4757fefb539SRoy Oursler         fprint_header(file);
4767fefb539SRoy Oursler 
4777fefb539SRoy Oursler         fprintf(file, "\n");
4787fefb539SRoy Oursler 
47991fef2d3SRoy Oursler         fprint_hufftables(file, "hufftables_default", &hufftables);
4807fefb539SRoy Oursler 
4817fefb539SRoy Oursler         fprintf(file, "\n");
4827fefb539SRoy Oursler 
48391fef2d3SRoy Oursler         isal_deflate_stateless_init(&tmp_stream);
48491fef2d3SRoy Oursler         isal_deflate_set_hufftables(&tmp_stream, NULL, IGZIP_HUFFTABLE_STATIC);
48591fef2d3SRoy Oursler         fprint_hufftables(file, "hufftables_static", tmp_stream.hufftables);
486660f49b0SGreg Tucker 
487660f49b0SGreg Tucker         fclose(file);
488660f49b0SGreg Tucker 
489438ecd81SGreg Tucker         if (hist_file) {
490438ecd81SGreg Tucker                 int len = fwrite(&histogram, 1, sizeof(histogram), hist_file);
491438ecd81SGreg Tucker                 printf("wrote %d bytes of histogram file\n", len);
492438ecd81SGreg Tucker                 fclose(hist_file);
493438ecd81SGreg Tucker         }
494660f49b0SGreg Tucker         return 0;
495660f49b0SGreg Tucker }
496