xref: /isa-l/igzip/igzip_finish.asm (revision d3cfb2fb772e375cf2007e484e0a6ec0c6a7c993)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "options.asm"
31%include "lz0a_const.asm"
32%include "data_struct2.asm"
33%include "bitbuf2.asm"
34%include "huffman.asm"
35%include "igzip_compare_types.asm"
36
37%include "stdmac.asm"
38%include "reg_sizes.asm"
39
40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44%define curr_data	rax
45%define tmp1		rax
46
47%define f_index		rbx
48%define code		rbx
49%define tmp4		rbx
50%define tmp5		rbx
51%define tmp6		rbx
52
53%define tmp2		rcx
54%define hash		rcx
55
56%define tmp3		rdx
57
58%define stream		rsi
59
60%define f_i		rdi
61
62%define code_len2	rbp
63%define hmask1		rbp
64
65%define m_out_buf	r8
66
67%define m_bits		r9
68
69%define dist		r10
70%define hmask2		r10
71
72%define m_bit_count	r11
73
74%define code2		r12
75%define f_end_i		r12
76
77%define file_start	r13
78
79%define len		r14
80
81%define hufftables	r15
82
83;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
84;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
85;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
86f_end_i_mem_offset	equ 0    ; local variable (8 bytes)
87stack_size		equ 8
88
89[bits 64]
90default rel
91section .text
92
93; void isal_deflate_finish ( isal_zstream *stream )
94; arg 1: rcx: addr of stream
95global isal_deflate_finish_01
96isal_deflate_finish_01:
97	endbranch
98	PUSH_ALL	rbx, rsi, rdi, rbp, r12, r13, r14, r15
99	sub	rsp, stack_size
100
101%ifidn __OUTPUT_FORMAT__, elf64
102	mov	rcx, rdi
103%endif
104
105	mov	stream, rcx
106
107	; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
108	mov	m_out_buf, [stream + _next_out]
109	mov	[stream + _internal_state_bitbuf_m_out_start], m_out_buf
110	mov	tmp1 %+ d, [stream + _avail_out]
111	add	tmp1, m_out_buf
112	sub	tmp1, SLOP
113skip_SLOP:
114	mov	[stream + _internal_state_bitbuf_m_out_end], tmp1
115
116	mov	m_bits,           [stream + _internal_state_bitbuf_m_bits]
117	mov	m_bit_count %+ d, [stream + _internal_state_bitbuf_m_bit_count]
118
119	mov	hufftables, [stream + _hufftables]
120
121	mov	file_start, [stream + _next_in]
122
123	mov	f_i %+ d, dword [stream + _total_in]
124	sub	file_start, f_i
125
126	mov	f_end_i %+ d, dword [stream + _avail_in]
127	add	f_end_i, f_i
128
129	sub	f_end_i, LAST_BYTES_COUNT
130	mov	[rsp + f_end_i_mem_offset], f_end_i
131	; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
132	cmp	f_i, f_end_i
133	jge	end_loop_2
134
135	mov	curr_data %+ d, [file_start + f_i]
136
137	cmp	byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
138	jne	skip_write_first_byte
139
140	cmp	m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
141	ja	end_loop_2
142	mov	hmask1 %+ d, dword [stream + _internal_state_hash_mask]
143	compute_hash	hash, curr_data
144	and	hash %+ d, hmask1 %+ d
145	mov	[stream + _internal_state_head + 2 * hash], f_i %+ w
146	mov	byte [stream + _internal_state_has_hist], IGZIP_HIST
147	jmp	encode_literal
148
149skip_write_first_byte:
150
151loop2:
152	mov     tmp3 %+ d, dword [stream + _internal_state_dist_mask]
153	mov	hmask1 %+ d,  dword [stream + _internal_state_hash_mask]
154	; if (state->bitbuf.is_full()) {
155	cmp	m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
156	ja	end_loop_2
157
158	; hash = compute_hash(state->file_start + f_i) & hash_mask;
159	mov	curr_data %+ d, [file_start + f_i]
160	compute_hash	hash, curr_data
161	and	hash %+ d, hmask1 %+ d
162
163	; f_index = state->head[hash];
164	movzx	f_index %+ d, word [stream + _internal_state_head + 2 * hash]
165
166	; state->head[hash] = (uint16_t) f_i;
167	mov	[stream + _internal_state_head + 2 * hash], f_i %+ w
168
169	; dist = f_i - f_index; // mod 64k
170	mov	dist %+ d, f_i %+ d
171	sub	dist %+ d, f_index %+ d
172	and	dist %+ d, 0xFFFF
173
174	; if ((dist-1) <= (D-1)) {
175	mov	tmp1 %+ d, dist %+ d
176	sub	tmp1 %+ d, 1
177	cmp	tmp1 %+ d, tmp3 %+ d
178	jae	encode_literal
179
180	; len = f_end_i - f_i;
181	mov	tmp4, [rsp + f_end_i_mem_offset]
182	sub	tmp4, f_i
183	add	tmp4, LAST_BYTES_COUNT
184
185	; if (len > 258) len = 258;
186	cmp	tmp4, 258
187	cmovg	tmp4, [c258]
188
189	; len = compare(state->file_start + f_i,
190	;               state->file_start + f_i - dist, len);
191	lea	tmp1, [file_start + f_i]
192	mov	tmp2, tmp1
193	sub	tmp2, dist
194	compare	tmp4, tmp1, tmp2, len, tmp3
195
196	; if (len >= SHORTEST_MATCH) {
197	cmp	len, SHORTEST_MATCH
198	jb	encode_literal
199
200	;; encode as dist/len
201
202	; get_dist_code(dist, &code2, &code_len2);
203	dec	dist
204	get_dist_code	dist, code2, code_len2, hufftables ;; clobbers dist, rcx
205
206	; get_len_code(len, &code, &code_len);
207	get_len_code	len, code, rcx, hufftables	;; rcx is code_len
208
209	mov	hmask2 %+ d,  dword [stream + _internal_state_hash_mask]
210	; code2 <<= code_len
211	; code2 |= code
212	; code_len2 += code_len
213	SHLX	code2, code2, rcx
214	or	code2, code
215	add	code_len2, rcx
216
217	; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
218	lea	tmp3, [f_i + 1]	; tmp3 <= k
219	add	f_i, len
220	cmp	f_i, [rsp + f_end_i_mem_offset]
221	jae	skip_hash_update
222
223	; only update hash twice
224
225	; hash = compute_hash(state->file_start + k) & hash_mask;
226	mov	tmp6 %+ d, dword [file_start + tmp3]
227	compute_hash	hash, tmp6
228	and	hash %+ d, hmask2 %+ d
229	; state->head[hash] = k;
230	mov	[stream + _internal_state_head + 2 * hash], tmp3 %+ w
231
232	add	tmp3, 1
233
234	; hash = compute_hash(state->file_start + k) & hash_mask;
235	mov	tmp6 %+ d, dword [file_start + tmp3]
236	compute_hash	hash, tmp6
237	and	hash %+ d, hmask2 %+ d
238	; state->head[hash] = k;
239	mov	[stream + _internal_state_head + 2 * hash], tmp3 %+ w
240
241skip_hash_update:
242	write_bits	m_bits, m_bit_count, code2, code_len2, m_out_buf
243
244	; continue
245	cmp	f_i, [rsp + f_end_i_mem_offset]
246	jl	loop2
247	jmp	end_loop_2
248
249encode_literal:
250	; get_lit_code(state->file_start[f_i], &code2, &code_len2);
251	movzx	tmp5, byte [file_start + f_i]
252	get_lit_code	tmp5, code2, code_len2, hufftables
253
254	write_bits	m_bits, m_bit_count, code2, code_len2, m_out_buf
255
256	; continue
257	add	f_i, 1
258	cmp	f_i, [rsp + f_end_i_mem_offset]
259	jl	loop2
260
261end_loop_2:
262	mov	f_end_i, [rsp + f_end_i_mem_offset]
263	add	f_end_i, LAST_BYTES_COUNT
264	mov	[rsp + f_end_i_mem_offset], f_end_i
265	; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
266	cmp	f_i, f_end_i
267	jge	write_eob
268
269	xor	tmp5, tmp5
270final_bytes:
271	cmp	m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
272	ja	not_end
273	movzx	tmp5, byte [file_start + f_i]
274	get_lit_code	tmp5, code2, code_len2, hufftables
275	write_bits	m_bits, m_bit_count, code2, code_len2, m_out_buf
276
277	inc	f_i
278	cmp	f_i, [rsp + f_end_i_mem_offset]
279	jl	final_bytes
280
281write_eob:
282	cmp	m_out_buf, [stream + _internal_state_bitbuf_m_out_end]
283	ja	not_end
284
285	;	get_lit_code(256, &code2, &code_len2);
286	get_lit_code	256, code2, code_len2, hufftables
287
288	write_bits	m_bits, m_bit_count, code2, code_len2, m_out_buf
289
290	mov	byte [stream + _internal_state_has_eob], 1
291	cmp	word [stream + _end_of_stream], 1
292	jne	sync_flush
293	;	   state->state = ZSTATE_TRL;
294	mov	dword [stream + _internal_state_state], ZSTATE_TRL
295	jmp	not_end
296
297sync_flush:
298	;	   state->state = ZSTATE_SYNC_FLUSH;
299	mov	dword [stream + _internal_state_state], ZSTATE_SYNC_FLUSH
300	;    }
301not_end:
302
303
304	;; Update input buffer
305	mov	f_end_i, [rsp + f_end_i_mem_offset]
306	mov	[stream + _total_in], f_i %+ d
307	add	file_start, f_i
308	mov	[stream + _next_in], file_start
309	sub	f_end_i, f_i
310	mov	[stream + _avail_in], f_end_i %+ d
311
312	;; Update output buffer
313	mov	[stream + _next_out], m_out_buf
314	;    len = state->bitbuf.buffer_used();
315	sub	m_out_buf, [stream + _internal_state_bitbuf_m_out_start]
316
317	;    stream->avail_out -= len;
318	sub	[stream + _avail_out], m_out_buf %+ d
319	;    stream->total_out += len;
320	add	[stream + _total_out], m_out_buf %+ d
321
322	mov	[stream + _internal_state_bitbuf_m_bits], m_bits
323	mov	[stream + _internal_state_bitbuf_m_bit_count], m_bit_count %+ d
324	add	rsp, stack_size
325	POP_ALL
326	ret
327
328section .data
329	align 4
330c258:	dq	258
331