xref: /isa-l/igzip/huffman.asm (revision c1876a12216f73bcb169524d32be29831579740c)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "options.asm"
31%include "lz0a_const.asm"
32%include "stdmac.asm"
33
34; Macros for doing Huffman Encoding
35
36%ifdef LONGER_HUFFTABLE
37	%if (D > 8192)
38		%error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
39		 % error
40	%else
41		%define DIST_TABLE_SIZE 8192
42		%define DECODE_OFFSET     26
43	%endif
44%else
45	%define DIST_TABLE_SIZE 2
46	%define DECODE_OFFSET     0
47%endif
48
49%define LEN_TABLE_SIZE 256
50%define LIT_TABLE_SIZE 257
51
52%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
53%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
54%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
55%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
56%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
57%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
58%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
59;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
60;; struct isal_hufftables {
61;;	// deflate huffman tree header
62;; 	uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
63;;
64;;	//!< Number of whole bytes in deflate_huff_hdr
65;;	uint32_t deflate_huff_hdr_count;
66;;
67;;	//!< Number of bits in the partial byte in header
68;; 	uint32_t deflate_huff_hdr_extra_bits;
69;;
70;;	//!< bits 7:0 are the code length, bits 31:8 are the code
71;; 	uint32_t dist_table[DIST_TABLE_SIZE];
72;;
73;;	//!< bits 7:0 are the code length, bits 31:8 are the code
74;; 	uint32_t len_table[LEN_TABLE_SIZE];
75;;
76;;	//!< bits 3:0 are the code length, bits 15:4 are the code
77;; 	uint16_t lit_table[LIT_TABLE_SIZE];
78;;
79;;	//!< bits 3:0 are the code length, bits 15:4 are the code
80;; 	uint16_t dcodes[30 - DECODE_OFFSET];
81
82;; };
83
84
85%ifdef LONGER_HUFFTABLE
86; Uses RCX, clobbers dist
87; get_dist_code	dist, code, len
88%macro get_dist_code 4
89%define %%dist %1	; 64-bit IN
90%define %%code %2d	; 32-bit OUT
91%define %%len  %3d	; 32-bit OUT
92%define %%hufftables %4	; address of the hufftable
93
94	mov	%%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
95	mov	%%code, %%len
96	and	%%len, 0x1F;
97	shr	%%code, 5
98%endm
99
100%macro get_packed_dist_code 3
101%define %%dist %1	; 64-bit IN
102%define %%code_len  %2d	; 32-bit OUT
103%define %%hufftables %3	; address of the hufftable
104	mov	%%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
105%endm
106
107%macro unpack_dist_code 2
108%define %%code %1d	; 32-bit OUT
109%define %%len  %2d	; 32-bit OUT
110
111	mov	%%len, %%code
112	and	%%len, 0x1F;
113	shr	%%code, 5
114%endm
115
116%else
117; Assumes (dist != 0)
118; Uses RCX, clobbers dist
119; void compute_dist_code	dist, code, len
120%macro compute_dist_code 4
121%define %%dist %1	; IN, clobbered
122%define %%distq %1
123%define %%code %2	; OUT
124%define %%len  %3	; OUT
125%define %%hufftables %4
126
127	bsr	rcx, %%dist	; ecx = msb = bsr(dist)
128	dec	rcx		; ecx = num_extra_bits = msb - N
129	BZHI	%%code, %%dist, rcx, %%len
130	SHRX	%%dist, %%dist, rcx	; dist >>= num_extra_bits
131	lea	%%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
132	mov	%%len, rcx	; len = num_extra_bits
133	movzx	rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
134	movzx	%%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
135	SHLX	%%code, %%code, rcx	; code = extra_bits << (sym & 0xF)
136	or	%%code, %%dist	; code = (sym >> 4) | (extra_bits << (sym & 0xF))
137	add	%%len, rcx	; len = num_extra_bits + (sym & 0xF)
138%endm
139
140; Uses RCX, clobbers dist
141; get_dist_code	dist, code, len
142%macro get_dist_code 4
143%define %%dist %1	; 32-bit IN, clobbered
144%define %%distq %1	; 64-bit IN, clobbered
145%define %%code %2	; 32-bit OUT
146%define %%len  %3	; 32-bit OUT
147%define %%hufftables %4
148
149	cmp	%%dist, DIST_TABLE_SIZE - 1
150	jg	%%do_compute
151%ifndef IACA
152	mov	%%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
153	mov	%%code, %%len
154	and	%%len, 0x1F;
155	shr	%%code, 5
156	jmp	%%done
157%endif
158%%do_compute:
159	compute_dist_code	%%distq, %%code, %%len, %%hufftables
160%%done:
161%endm
162
163%macro get_packed_dist_code 3
164%define %%dist %1	; 64-bit IN
165%define %%code_len  %2d	; 32-bit OUT
166%define %%hufftables %3	; address of the hufftable
167%endm
168
169%endif
170
171
172; Macros for doing Huffman Encoding
173
174; Assumes (dist != 0)
175; Uses RCX, clobbers dist
176; void compute_dist_code	dist, code, len
177%macro compute_dist_icf_code 3
178%define %%dist %1	; IN, clobbered
179%define %%distq %1
180%define %%code %2	; OUT
181%define	%%tmp1 %3
182
183	bsr	rcx, %%dist	; ecx = msb = bsr(dist)
184	dec	rcx		; ecx = num_extra_bits = msb - N
185	BZHI	%%code, %%dist, rcx, %%tmp1
186	SHRX	%%dist, %%dist, rcx	; dist >>= num_extra_bits
187	lea	%%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
188	shl	%%code, EXTRA_BITS_OFFSET - DIST_OFFSET
189	add	%%code, %%dist	; code = extra_bits | sym
190
191%endm
192
193; Uses RCX, clobbers dist
194; get_dist_code	dist, code, len
195%macro get_dist_icf_code 3
196%define %%dist %1	; 32-bit IN, clobbered
197%define %%distq %1	; 64-bit IN, clobbered
198%define %%code %2	; 32-bit OUT
199%define %%tmp1 %3
200
201	cmp	%%dist, 1
202	jg	%%do_compute
203
204%ifnidn	%%code, %%dist
205	mov	%%code, %%dist
206%endif
207	jmp 	%%done
208%%do_compute:
209	compute_dist_icf_code	%%distq, %%code, %%tmp1
210%%done:
211	shl	%%code, DIST_OFFSET
212%endm
213
214
215; "len" can be same register as "length"
216; get_len_code	length, code, len
217%macro get_len_code 4
218%define %%length %1	; 64-bit IN
219%define %%code %2d	; 32-bit OUT
220%define %%len  %3d	; 32-bit OUT
221%define %%hufftables %4
222
223	mov	%%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
224	mov	%%code, %%len
225	and	%%len, 0x1F
226	shr	%%code, 5
227%endm
228
229
230%macro get_lit_code 4
231%define %%lit %1	; 64-bit IN or CONST
232%define %%code %2d	; 32-bit OUT
233%define %%len  %3d	; 32-bit OUT
234%define %%hufftables %4
235
236	movzx	%%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
237	movzx	%%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
238
239%endm
240
241
242;; Compute hash of first 3 bytes of data
243%macro compute_hash 2
244%define %%result	%1d	; 32-bit reg
245%define %%data		%2d	; 32-bit reg (low byte not clobbered)
246
247	xor	%%result, %%result
248	crc32	%%result, %%data
249%endm
250