xref: /isa-l/igzip/huffman.asm (revision c1876a12216f73bcb169524d32be29831579740c)
1660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2660f49b0SGreg Tucker;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3660f49b0SGreg Tucker;
4660f49b0SGreg Tucker;  Redistribution and use in source and binary forms, with or without
5660f49b0SGreg Tucker;  modification, are permitted provided that the following conditions
6660f49b0SGreg Tucker;  are met:
7660f49b0SGreg Tucker;    * Redistributions of source code must retain the above copyright
8660f49b0SGreg Tucker;      notice, this list of conditions and the following disclaimer.
9660f49b0SGreg Tucker;    * Redistributions in binary form must reproduce the above copyright
10660f49b0SGreg Tucker;      notice, this list of conditions and the following disclaimer in
11660f49b0SGreg Tucker;      the documentation and/or other materials provided with the
12660f49b0SGreg Tucker;      distribution.
13660f49b0SGreg Tucker;    * Neither the name of Intel Corporation nor the names of its
14660f49b0SGreg Tucker;      contributors may be used to endorse or promote products derived
15660f49b0SGreg Tucker;      from this software without specific prior written permission.
16660f49b0SGreg Tucker;
17660f49b0SGreg Tucker;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18660f49b0SGreg Tucker;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19660f49b0SGreg Tucker;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20660f49b0SGreg Tucker;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21660f49b0SGreg Tucker;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22660f49b0SGreg Tucker;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23660f49b0SGreg Tucker;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24660f49b0SGreg Tucker;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25660f49b0SGreg Tucker;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26660f49b0SGreg Tucker;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27660f49b0SGreg Tucker;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29660f49b0SGreg Tucker
30660f49b0SGreg Tucker%include "options.asm"
31660f49b0SGreg Tucker%include "lz0a_const.asm"
32*c1876a12SRoy Oursler%include "stdmac.asm"
33660f49b0SGreg Tucker
34660f49b0SGreg Tucker; Macros for doing Huffman Encoding
35660f49b0SGreg Tucker
36660f49b0SGreg Tucker%ifdef LONGER_HUFFTABLE
37660f49b0SGreg Tucker	%if (D > 8192)
38660f49b0SGreg Tucker		%error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
39660f49b0SGreg Tucker		 % error
40660f49b0SGreg Tucker	%else
41660f49b0SGreg Tucker		%define DIST_TABLE_SIZE 8192
42660f49b0SGreg Tucker		%define DECODE_OFFSET     26
43660f49b0SGreg Tucker	%endif
44660f49b0SGreg Tucker%else
459d53af0cSRoy Oursler	%define DIST_TABLE_SIZE 2
469d53af0cSRoy Oursler	%define DECODE_OFFSET     0
47660f49b0SGreg Tucker%endif
48660f49b0SGreg Tucker
49660f49b0SGreg Tucker%define LEN_TABLE_SIZE 256
50660f49b0SGreg Tucker%define LIT_TABLE_SIZE 257
51660f49b0SGreg Tucker
5288f95d85SRoy Oursler%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
53660f49b0SGreg Tucker%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
54660f49b0SGreg Tucker%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
55660f49b0SGreg Tucker%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
56660f49b0SGreg Tucker%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
57660f49b0SGreg Tucker%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
58660f49b0SGreg Tucker%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
59660f49b0SGreg Tucker;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
60660f49b0SGreg Tucker;; struct isal_hufftables {
61660f49b0SGreg Tucker;;	// deflate huffman tree header
6288f95d85SRoy Oursler;; 	uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
63660f49b0SGreg Tucker;;
64660f49b0SGreg Tucker;;	//!< Number of whole bytes in deflate_huff_hdr
65660f49b0SGreg Tucker;;	uint32_t deflate_huff_hdr_count;
66660f49b0SGreg Tucker;;
67660f49b0SGreg Tucker;;	//!< Number of bits in the partial byte in header
68660f49b0SGreg Tucker;; 	uint32_t deflate_huff_hdr_extra_bits;
69660f49b0SGreg Tucker;;
70660f49b0SGreg Tucker;;	//!< bits 7:0 are the code length, bits 31:8 are the code
71660f49b0SGreg Tucker;; 	uint32_t dist_table[DIST_TABLE_SIZE];
72660f49b0SGreg Tucker;;
73660f49b0SGreg Tucker;;	//!< bits 7:0 are the code length, bits 31:8 are the code
74660f49b0SGreg Tucker;; 	uint32_t len_table[LEN_TABLE_SIZE];
75660f49b0SGreg Tucker;;
76660f49b0SGreg Tucker;;	//!< bits 3:0 are the code length, bits 15:4 are the code
77660f49b0SGreg Tucker;; 	uint16_t lit_table[LIT_TABLE_SIZE];
78660f49b0SGreg Tucker;;
79660f49b0SGreg Tucker;;	//!< bits 3:0 are the code length, bits 15:4 are the code
80660f49b0SGreg Tucker;; 	uint16_t dcodes[30 - DECODE_OFFSET];
81660f49b0SGreg Tucker
82660f49b0SGreg Tucker;; };
83660f49b0SGreg Tucker
84660f49b0SGreg Tucker
85660f49b0SGreg Tucker%ifdef LONGER_HUFFTABLE
86660f49b0SGreg Tucker; Uses RCX, clobbers dist
87660f49b0SGreg Tucker; get_dist_code	dist, code, len
88660f49b0SGreg Tucker%macro get_dist_code 4
89660f49b0SGreg Tucker%define %%dist %1	; 64-bit IN
90660f49b0SGreg Tucker%define %%code %2d	; 32-bit OUT
91660f49b0SGreg Tucker%define %%len  %3d	; 32-bit OUT
92660f49b0SGreg Tucker%define %%hufftables %4	; address of the hufftable
93660f49b0SGreg Tucker
949d53af0cSRoy Oursler	mov	%%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
95660f49b0SGreg Tucker	mov	%%code, %%len
96660f49b0SGreg Tucker	and	%%len, 0x1F;
97660f49b0SGreg Tucker	shr	%%code, 5
98660f49b0SGreg Tucker%endm
99660f49b0SGreg Tucker
100660f49b0SGreg Tucker%macro get_packed_dist_code 3
101660f49b0SGreg Tucker%define %%dist %1	; 64-bit IN
102660f49b0SGreg Tucker%define %%code_len  %2d	; 32-bit OUT
103660f49b0SGreg Tucker%define %%hufftables %3	; address of the hufftable
104660f49b0SGreg Tucker	mov	%%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
105660f49b0SGreg Tucker%endm
106660f49b0SGreg Tucker
107660f49b0SGreg Tucker%macro unpack_dist_code 2
108660f49b0SGreg Tucker%define %%code %1d	; 32-bit OUT
109660f49b0SGreg Tucker%define %%len  %2d	; 32-bit OUT
110660f49b0SGreg Tucker
111660f49b0SGreg Tucker	mov	%%len, %%code
112660f49b0SGreg Tucker	and	%%len, 0x1F;
113660f49b0SGreg Tucker	shr	%%code, 5
114660f49b0SGreg Tucker%endm
115660f49b0SGreg Tucker
116660f49b0SGreg Tucker%else
117660f49b0SGreg Tucker; Assumes (dist != 0)
118660f49b0SGreg Tucker; Uses RCX, clobbers dist
119660f49b0SGreg Tucker; void compute_dist_code	dist, code, len
120660f49b0SGreg Tucker%macro compute_dist_code 4
1219d53af0cSRoy Oursler%define %%dist %1	; IN, clobbered
122660f49b0SGreg Tucker%define %%distq %1
123660f49b0SGreg Tucker%define %%code %2	; OUT
124660f49b0SGreg Tucker%define %%len  %3	; OUT
125660f49b0SGreg Tucker%define %%hufftables %4
126660f49b0SGreg Tucker
1279d53af0cSRoy Oursler	bsr	rcx, %%dist	; ecx = msb = bsr(dist)
1289d53af0cSRoy Oursler	dec	rcx		; ecx = num_extra_bits = msb - N
1299d53af0cSRoy Oursler	BZHI	%%code, %%dist, rcx, %%len
1309d53af0cSRoy Oursler	SHRX	%%dist, %%dist, rcx	; dist >>= num_extra_bits
1319d53af0cSRoy Oursler	lea	%%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
1329d53af0cSRoy Oursler	mov	%%len, rcx	; len = num_extra_bits
1339d53af0cSRoy Oursler	movzx	rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
134660f49b0SGreg Tucker	movzx	%%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
1359d53af0cSRoy Oursler	SHLX	%%code, %%code, rcx	; code = extra_bits << (sym & 0xF)
136660f49b0SGreg Tucker	or	%%code, %%dist	; code = (sym >> 4) | (extra_bits << (sym & 0xF))
1379d53af0cSRoy Oursler	add	%%len, rcx	; len = num_extra_bits + (sym & 0xF)
138660f49b0SGreg Tucker%endm
139660f49b0SGreg Tucker
140660f49b0SGreg Tucker; Uses RCX, clobbers dist
141660f49b0SGreg Tucker; get_dist_code	dist, code, len
142660f49b0SGreg Tucker%macro get_dist_code 4
1439d53af0cSRoy Oursler%define %%dist %1	; 32-bit IN, clobbered
144660f49b0SGreg Tucker%define %%distq %1	; 64-bit IN, clobbered
1459d53af0cSRoy Oursler%define %%code %2	; 32-bit OUT
1469d53af0cSRoy Oursler%define %%len  %3	; 32-bit OUT
147660f49b0SGreg Tucker%define %%hufftables %4
148660f49b0SGreg Tucker
1499d53af0cSRoy Oursler	cmp	%%dist, DIST_TABLE_SIZE - 1
150660f49b0SGreg Tucker	jg	%%do_compute
1519d53af0cSRoy Oursler%ifndef IACA
1529d53af0cSRoy Oursler	mov	%%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
153660f49b0SGreg Tucker	mov	%%code, %%len
154660f49b0SGreg Tucker	and	%%len, 0x1F;
155660f49b0SGreg Tucker	shr	%%code, 5
156660f49b0SGreg Tucker	jmp	%%done
1579d53af0cSRoy Oursler%endif
158660f49b0SGreg Tucker%%do_compute:
159660f49b0SGreg Tucker	compute_dist_code	%%distq, %%code, %%len, %%hufftables
160660f49b0SGreg Tucker%%done:
161660f49b0SGreg Tucker%endm
162660f49b0SGreg Tucker
163660f49b0SGreg Tucker%macro get_packed_dist_code 3
164660f49b0SGreg Tucker%define %%dist %1	; 64-bit IN
165660f49b0SGreg Tucker%define %%code_len  %2d	; 32-bit OUT
166660f49b0SGreg Tucker%define %%hufftables %3	; address of the hufftable
167660f49b0SGreg Tucker%endm
168660f49b0SGreg Tucker
169660f49b0SGreg Tucker%endif
170660f49b0SGreg Tucker
171660f49b0SGreg Tucker
17201dfbcc4SRoy Oursler; Macros for doing Huffman Encoding
17301dfbcc4SRoy Oursler
17401dfbcc4SRoy Oursler; Assumes (dist != 0)
17501dfbcc4SRoy Oursler; Uses RCX, clobbers dist
17601dfbcc4SRoy Oursler; void compute_dist_code	dist, code, len
17701dfbcc4SRoy Oursler%macro compute_dist_icf_code 3
17801dfbcc4SRoy Oursler%define %%dist %1	; IN, clobbered
17901dfbcc4SRoy Oursler%define %%distq %1
18001dfbcc4SRoy Oursler%define %%code %2	; OUT
18101dfbcc4SRoy Oursler%define	%%tmp1 %3
18201dfbcc4SRoy Oursler
18301dfbcc4SRoy Oursler	bsr	rcx, %%dist	; ecx = msb = bsr(dist)
18401dfbcc4SRoy Oursler	dec	rcx		; ecx = num_extra_bits = msb - N
18501dfbcc4SRoy Oursler	BZHI	%%code, %%dist, rcx, %%tmp1
18601dfbcc4SRoy Oursler	SHRX	%%dist, %%dist, rcx	; dist >>= num_extra_bits
18701dfbcc4SRoy Oursler	lea	%%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
18801dfbcc4SRoy Oursler	shl	%%code, EXTRA_BITS_OFFSET - DIST_OFFSET
18901dfbcc4SRoy Oursler	add	%%code, %%dist	; code = extra_bits | sym
19001dfbcc4SRoy Oursler
19101dfbcc4SRoy Oursler%endm
19201dfbcc4SRoy Oursler
19301dfbcc4SRoy Oursler; Uses RCX, clobbers dist
19401dfbcc4SRoy Oursler; get_dist_code	dist, code, len
19501dfbcc4SRoy Oursler%macro get_dist_icf_code 3
19601dfbcc4SRoy Oursler%define %%dist %1	; 32-bit IN, clobbered
19701dfbcc4SRoy Oursler%define %%distq %1	; 64-bit IN, clobbered
19801dfbcc4SRoy Oursler%define %%code %2	; 32-bit OUT
19901dfbcc4SRoy Oursler%define %%tmp1 %3
20001dfbcc4SRoy Oursler
20101dfbcc4SRoy Oursler	cmp	%%dist, 1
20201dfbcc4SRoy Oursler	jg	%%do_compute
20301dfbcc4SRoy Oursler
20401dfbcc4SRoy Oursler%ifnidn	%%code, %%dist
20501dfbcc4SRoy Oursler	mov	%%code, %%dist
20601dfbcc4SRoy Oursler%endif
20701dfbcc4SRoy Oursler	jmp 	%%done
20801dfbcc4SRoy Oursler%%do_compute:
20901dfbcc4SRoy Oursler	compute_dist_icf_code	%%distq, %%code, %%tmp1
21001dfbcc4SRoy Oursler%%done:
21101dfbcc4SRoy Oursler	shl	%%code, DIST_OFFSET
21201dfbcc4SRoy Oursler%endm
21301dfbcc4SRoy Oursler
21401dfbcc4SRoy Oursler
215660f49b0SGreg Tucker; "len" can be same register as "length"
216660f49b0SGreg Tucker; get_len_code	length, code, len
217660f49b0SGreg Tucker%macro get_len_code 4
218660f49b0SGreg Tucker%define %%length %1	; 64-bit IN
219660f49b0SGreg Tucker%define %%code %2d	; 32-bit OUT
220660f49b0SGreg Tucker%define %%len  %3d	; 32-bit OUT
221660f49b0SGreg Tucker%define %%hufftables %4
222660f49b0SGreg Tucker
223660f49b0SGreg Tucker	mov	%%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
224660f49b0SGreg Tucker	mov	%%code, %%len
225660f49b0SGreg Tucker	and	%%len, 0x1F
226660f49b0SGreg Tucker	shr	%%code, 5
227660f49b0SGreg Tucker%endm
228660f49b0SGreg Tucker
229660f49b0SGreg Tucker
230660f49b0SGreg Tucker%macro get_lit_code 4
231660f49b0SGreg Tucker%define %%lit %1	; 64-bit IN or CONST
232660f49b0SGreg Tucker%define %%code %2d	; 32-bit OUT
233660f49b0SGreg Tucker%define %%len  %3d	; 32-bit OUT
234660f49b0SGreg Tucker%define %%hufftables %4
235660f49b0SGreg Tucker
236660f49b0SGreg Tucker	movzx	%%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
237660f49b0SGreg Tucker	movzx	%%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
238660f49b0SGreg Tucker
239660f49b0SGreg Tucker%endm
240660f49b0SGreg Tucker
241660f49b0SGreg Tucker
242660f49b0SGreg Tucker;; Compute hash of first 3 bytes of data
243660f49b0SGreg Tucker%macro compute_hash 2
244660f49b0SGreg Tucker%define %%result	%1d	; 32-bit reg
245660f49b0SGreg Tucker%define %%data		%2d	; 32-bit reg (low byte not clobbered)
246660f49b0SGreg Tucker
247660f49b0SGreg Tucker	xor	%%result, %%result
248660f49b0SGreg Tucker	crc32	%%result, %%data
249660f49b0SGreg Tucker%endm
250