xref: /isa-l/igzip/igzip_icf_finish.asm (revision cd888f01a447dd04c3a8b50362079648d432d2ca)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "options.asm"
31%include "lz0a_const.asm"
32%include "data_struct2.asm"
33%include "bitbuf2.asm"
34%include "huffman.asm"
35%include "igzip_compare_types.asm"
36
37%include "stdmac.asm"
38%include "reg_sizes.asm"
39
40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44%define curr_data	rax
45%define tmp1		rax
46
47%define f_index		rbx
48%define code		rbx
49%define tmp4		rbx
50%define tmp5		rbx
51%define tmp6		rbx
52
53%define tmp2		rcx
54%define hash		rcx
55
56%define tmp3		rdx
57
58%define stream		rsi
59
60%define f_i		rdi
61
62%define code_len2	rbp
63%define hmask1		rbp
64
65%define m_out_buf	r8
66
67%define level_buf	r9
68
69%define dist 		r10
70%define hmask2		r10
71
72%define code2		r12
73%define f_end_i		r12
74
75%define file_start	r13
76
77%define len		r14
78
79%define hufftables	r15
80
81%define hash_table level_buf + _hash8k_hash_table
82%define lit_len_hist level_buf + _hist_lit_len
83%define dist_hist level_buf + _hist_dist
84
85;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
86;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
87;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
88f_end_i_mem_offset	equ 0    ; local variable (8 bytes)
89m_out_end		equ 8
90m_out_start		equ 16
91dist_mask_offset	equ 24
92hash_mask_offset	equ 32
93stack_size		equ 5*8
94
95%xdefine METHOD hash_hist
96
97[bits 64]
98default rel
99section .text
100
101; void isal_deflate_icf_finish ( isal_zstream *stream )
102; arg 1: rcx: addr of stream
103global isal_deflate_icf_finish_ %+ METHOD %+ _01
104isal_deflate_icf_finish_ %+ METHOD %+ _01:
105	endbranch
106	PUSH_ALL	rbx, rsi, rdi, rbp, r12, r13, r14, r15
107	sub	rsp, stack_size
108
109%ifidn __OUTPUT_FORMAT__, win64
110	mov	stream, rcx
111%else
112	mov	stream, rdi
113%endif
114
115	; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
116	mov	tmp2 %+ d, dword [stream + _internal_state_dist_mask]
117	mov	tmp3 %+ d, dword [stream + _internal_state_hash_mask]
118	mov	level_buf, [stream + _level_buf]
119	mov	m_out_buf, [level_buf + _icf_buf_next]
120	mov	[rsp + m_out_start], m_out_buf
121	mov	tmp1, [level_buf + _icf_buf_avail_out]
122	add	tmp1, m_out_buf
123	sub	tmp1, 4
124
125	mov     [rsp + dist_mask_offset], tmp2
126	mov	[rsp + hash_mask_offset], tmp3
127	mov	[rsp + m_out_end], tmp1
128
129	mov	hufftables, [stream + _hufftables]
130
131	mov	file_start, [stream + _next_in]
132
133	mov	f_i %+ d, dword [stream + _total_in]
134	sub	file_start, f_i
135
136	mov	f_end_i %+ d, dword [stream + _avail_in]
137	add	f_end_i, f_i
138
139	sub	f_end_i, LAST_BYTES_COUNT
140	mov	[rsp + f_end_i_mem_offset], f_end_i
141	; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
142	cmp	f_i, f_end_i
143	jge	.end_loop_2
144
145	mov	curr_data %+ d, [file_start + f_i]
146
147	cmp	byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
148	jne	.skip_write_first_byte
149
150	cmp	m_out_buf, [rsp + m_out_end]
151	ja	.end_loop_2
152
153	mov	hmask1 %+ d, [rsp + hash_mask_offset]
154	compute_hash	hash, curr_data
155	and	hash %+ d, hmask1 %+ d
156	mov	[hash_table + 2 * hash], f_i %+ w
157	mov	byte [stream + _internal_state_has_hist], IGZIP_HIST
158	jmp	.encode_literal
159
160.skip_write_first_byte:
161
162.loop2:
163	mov	tmp3 %+ d, [rsp + dist_mask_offset]
164	mov	hmask1 %+ d, [rsp + hash_mask_offset]
165	; if (state->bitbuf.is_full()) {
166	cmp	m_out_buf, [rsp + m_out_end]
167	ja	.end_loop_2
168
169	; hash = compute_hash(state->file_start + f_i) & hash_mask;
170	mov	curr_data %+ d, [file_start + f_i]
171	compute_hash	hash, curr_data
172	and	hash %+ d, hmask1 %+ d
173
174	; f_index = state->head[hash];
175	movzx	f_index %+ d, word [hash_table + 2 * hash]
176
177	; state->head[hash] = (uint16_t) f_i;
178	mov	[hash_table + 2 * hash], f_i %+ w
179
180	; dist = f_i - f_index; // mod 64k
181	mov	dist %+ d, f_i %+ d
182	sub	dist %+ d, f_index %+ d
183	and	dist %+ d, 0xFFFF
184
185	; if ((dist-1) <= (D-1)) {
186	mov	tmp1 %+ d, dist %+ d
187	sub	tmp1 %+ d, 1
188	cmp	tmp1 %+ d, tmp3 %+ d
189	jae	.encode_literal
190
191	; len = f_end_i - f_i;
192	mov	tmp4, [rsp + f_end_i_mem_offset]
193	sub	tmp4, f_i
194	add	tmp4, LAST_BYTES_COUNT
195
196	; if (len > 258) len = 258;
197	cmp	tmp4, 258
198	cmovg	tmp4, [c258]
199
200	; len = compare(state->file_start + f_i,
201	;               state->file_start + f_i - dist, len);
202	lea	tmp1, [file_start + f_i]
203	mov	tmp2, tmp1
204	sub	tmp2, dist
205	compare	tmp4, tmp1, tmp2, len, tmp3
206
207	; if (len >= SHORTEST_MATCH) {
208	cmp	len, SHORTEST_MATCH
209	jb	.encode_literal
210
211	;; encode as dist/len
212
213	; get_dist_code(dist, &code2, &code_len2);
214	dec	dist
215	get_dist_icf_code	dist, code2, tmp3 ;; clobbers dist, rcx
216
217	;; get_len_code
218	lea	code, [len + 254]
219
220	mov	hmask2 %+ d, [rsp + hash_mask_offset]
221
222	or	code2, code
223	inc	dword [lit_len_hist + HIST_ELEM_SIZE*code]
224
225	; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
226	lea	tmp3, [f_i + 1]	; tmp3 <= k
227	add	f_i, len
228	cmp	f_i, [rsp + f_end_i_mem_offset]
229	jae	.skip_hash_update
230
231	; only update hash twice
232
233	; hash = compute_hash(state->file_start + k) & hash_mask;
234	mov	tmp6 %+ d, dword [file_start + tmp3]
235	compute_hash	hash, tmp6
236	and	hash %+ d, hmask2 %+ d
237	; state->head[hash] = k;
238	mov	[hash_table + 2 * hash], tmp3 %+ w
239
240	add	tmp3, 1
241
242	; hash = compute_hash(state->file_start + k) & hash_mask;
243	mov	tmp6 %+ d, dword [file_start + tmp3]
244	compute_hash	hash, tmp6
245	and	hash %+ d, hmask2 %+ d
246	; state->head[hash] = k;
247	mov	[hash_table + 2 * hash], tmp3 %+ w
248
249.skip_hash_update:
250	write_dword	code2, m_out_buf
251	shr	code2, DIST_OFFSET
252	and	code2, 0x1F
253	inc	dword [dist_hist + HIST_ELEM_SIZE*code2]
254	; continue
255	cmp	f_i, [rsp + f_end_i_mem_offset]
256	jl	.loop2
257	jmp	.end_loop_2
258
259.encode_literal:
260	; get_lit_code(state->file_start[f_i], &code2, &code_len2);
261	movzx	tmp5, byte [file_start + f_i]
262	inc	dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
263	or	tmp5, LIT
264	write_dword	tmp5, m_out_buf
265	; continue
266	add	f_i, 1
267	cmp	f_i, [rsp + f_end_i_mem_offset]
268	jl	.loop2
269
270.end_loop_2:
271	mov	f_end_i, [rsp + f_end_i_mem_offset]
272	add	f_end_i, LAST_BYTES_COUNT
273	mov	[rsp + f_end_i_mem_offset], f_end_i
274	; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
275	cmp	f_i, f_end_i
276	jge	.input_end
277
278	xor	tmp5, tmp5
279.final_bytes:
280	cmp	m_out_buf, [rsp + m_out_end]
281	ja	.out_end
282
283	movzx	tmp5, byte [file_start + f_i]
284	inc	dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
285	or	tmp5, LIT
286	write_dword	tmp5, m_out_buf
287
288	inc	f_i
289	cmp	f_i, [rsp + f_end_i_mem_offset]
290	jl	.final_bytes
291
292.input_end:
293	cmp	word [stream + _end_of_stream], 0
294	jne	.out_end
295	cmp	word [stream + _flush], _NO_FLUSH
296	jne	.out_end
297	jmp .end
298
299.out_end:
300	mov	dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
301.end:
302	;; Update input buffer
303	mov	f_end_i, [rsp + f_end_i_mem_offset]
304	mov	[stream + _total_in], f_i %+ d
305	mov	[stream + _internal_state_block_end], f_i %+ d
306
307	add	file_start, f_i
308	mov	[stream + _next_in], file_start
309	sub	f_end_i, f_i
310	mov	[stream + _avail_in], f_end_i %+ d
311
312	;; Update output buffer
313	mov	[level_buf + _icf_buf_next], m_out_buf
314
315	;    len = state->bitbuf.buffer_used();
316	sub	m_out_buf, [rsp + m_out_start]
317
318	;    stream->avail_out -= len;
319	sub	[level_buf + _icf_buf_avail_out], m_out_buf
320
321	add	rsp, stack_size
322	POP_ALL
323	ret
324
325section .data
326	align 4
327c258:	dq	258
328