xref: /isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S (revision 38279f5e9e6455d16b3e97c387f209db56fa994c)
1/**********************************************************************
2  Copyright(c) 2019 Arm Corporation All rights reserved.
3
4  Redistribution and use in source and binary forms, with or without
5  modification, are permitted provided that the following conditions
6  are met:
7    * Redistributions of source code must retain the above copyright
8      notice, this list of conditions and the following disclaimer.
9    * Redistributions in binary form must reproduce the above copyright
10      notice, this list of conditions and the following disclaimer in
11      the documentation and/or other materials provided with the
12      distribution.
13    * Neither the name of Arm Corporation nor the names of its
14      contributors may be used to endorse or promote products derived
15      from this software without specific prior written permission.
16
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28**********************************************************************/
29
30#include "../include/aarch64_label.h"
31
32	.arch armv8-a+crc
33	.text
34	.align	2
35
36#include "lz0a_const_aarch64.h"
37#include "data_struct_aarch64.h"
38#include "huffman_aarch64.h"
39#include "bitbuf2_aarch64.h"
40#include "stdmac_aarch64.h"
41
42
43/*
44declare Macros
45*/
46
47.macro	declare_generic_reg name:req,reg:req,default:req
48	\name		.req	\default\reg
49	w_\name		.req	w\reg
50	x_\name		.req	x\reg
51.endm
52
53.macro  update_state	stream:req,start_in:req,next_in:req,end_in:req,	\
54	m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
55
56	//m_out_buf=bytes_written
57	sub	x_\m_out_buf,x_\m_out_buf,x_\m_out_start
58	cmp	next_in,start_in
59	bls	skip_has_hist
60	mov	w_\tmp0,1
61	strb	w_\tmp0,[x_\stream,_internal_state_has_hist]
62skip_has_hist:
63	ldr	w_\tmp0,[\stream,_total_in]
64	ldr	x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
65
66	str	x_\next_in,[\stream,_next_in]
67	sub	x_\start_in,x_\next_in,x_\start_in
68	sub	x_\end_in,x_\end_in,x_\next_in
69	add	w_\tmp0,w_\tmp0,w_\start_in
70	stp	w_\end_in,w_\tmp0,[\stream,_avail_in]
71	//next_in=avail_out,start_in=total_out
72	ldp	w_\next_in,w_\start_in,[\stream,_avail_out]
73	add	x_\m_out_start,x_\m_out_start,x_\m_out_buf
74	str	x_\m_out_start,[\stream,_next_out]
75	add	w_\start_in,w_\start_in,w_\m_out_buf
76	sub	w_\next_in,w_\next_in,w_\m_out_buf
77	stp	w_\next_in,w_\start_in,[\stream,_avail_out]
78.endm
79	.global	cdecl(isal_deflate_finish_aarch64)
80	.arch armv8-a+crc
81#ifndef __APPLE__
82	.type	isal_deflate_finish_aarch64, %function
83#endif
84/*
85	void isal_deflate_finish_aarch64(struct isal_zstream *stream)
86*/
87	declare_generic_reg	stream,		0,x	//struct isal_zstream *stream
88	declare_generic_reg	state,		8,x	//&stream->state
89	declare_generic_reg	avail_in,	9,w
90	declare_generic_reg	end_of_stream,	10,w	//can be used in loop
91
92	declare_generic_reg	hash_mask,	11,w
93	declare_generic_reg	match_length,	12,w
94	declare_generic_reg	hufftables,	13,x
95
96	declare_generic_reg	m_out_buf,	14,x
97	declare_generic_reg	m_out_start,	15,x
98	declare_generic_reg	m_out_end,	16,x
99	declare_generic_reg	m_bits,		17,x
100	declare_generic_reg	m_bit_count,	2,w
101
102	declare_generic_reg	start_in,	19,x
103	declare_generic_reg	end_in,		20,x
104	declare_generic_reg	next_in,	21,x
105	declare_generic_reg	loop_end_cnt,	22,x
106
107	declare_generic_reg	literal,	23,w
108	declare_generic_reg	hash,		24,w
109	declare_generic_reg	dist,		25,w
110
111	declare_generic_reg	last_seen,	26,x
112	declare_generic_reg	file_start,	27,x
113	declare_generic_reg	hist_size,	28,w
114
115	declare_generic_reg	tmp0,		5 ,w
116	declare_generic_reg	tmp1,		6 ,w
117	declare_generic_reg	tmp2,		7 ,w
118
119	declare_generic_reg	code,		3,x
120	declare_generic_reg	code_len,	24,x
121	declare_generic_reg	code2,		10,x
122	declare_generic_reg	code_len2,	4,x
123
124
125cdecl(isal_deflate_finish_aarch64):
126	//save registers
127	push_stack
128
129	//	set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
130	ldr	w_m_out_end,[stream,_avail_out]
131	ldr	m_out_buf,[stream,_next_out]
132	add	m_out_end,m_out_buf,w_m_out_end,uxtw
133	sub	m_out_end,m_out_end , 8
134	mov	m_out_start,m_out_buf
135	stp	m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
136	str	m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
137	ldr	m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
138	ldr	m_bits ,[stream,_internal_state_bitbuf_m_bits]
139
140	//init variables
141	//last_seen=&stream.internal_state.head = _internal_state+_head
142	add	last_seen,stream,65536
143	add	last_seen,last_seen,_internal_state+_head -65536
144
145
146	//start_in=stream->next_in;next_in=start_in
147	ldr	avail_in, [stream, _avail_in]
148	ldr	start_in,[stream,_next_in]
149	mov	next_in,start_in
150	add	end_in,start_in,avail_in,uxtw  //avail_in reg is free now
151	ldr	hufftables,[stream,_hufftables]
152	cbz	avail_in, update_not_full
153
154
155	sub	loop_end_cnt,end_in,4		//loop end
156	cmp	next_in,loop_end_cnt
157
158
159	//file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
160	ldr	w_file_start,[stream,_total_in]
161	sub	file_start, next_in, w_file_start, uxtw
162
163	//uint32_t hist_size = state->dist_mask;
164	ldr	hist_size,[stream,_internal_state + _dist_mask]
165
166	//uint32_t hash_mask = state->hash_mask;
167	ldr	hash_mask,[stream,_internal_state + _hash_mask]
168
169	bhi	main_loop_end
170main_loop_start:
171	//is_full(&state->bitbuf)
172	cmp	m_out_buf,m_out_end
173	bhi	update_state_exit
174
175	ldr	literal,[next_in]
176	crc32cw	hash,wzr,literal
177	and	hash,hash,hash_mask
178
179	///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
180	ldrh	w_tmp0,[last_seen,x_hash,lsl 1] 	//tmp_w last_seen[hash]
181	sub 	x_dist,next_in,file_start
182	//last_seen[hash] = (uint64_t) (next_in - file_start);
183	strh	dist,[last_seen,x_hash,lsl 1]
184	sub 	dist,dist,w_tmp0
185	and	dist,dist,0xffff
186
187	sub	w_tmp0,dist,1
188	cmp	hist_size,w_tmp0
189	bls	get_lit_code
190
191	/// match_length = compare258(next_in - dist, next_in, 258);
192	sub	x_tmp2,next_in,x_dist
193	sub	x_hash,end_in,next_in
194	compare_max_258_bytes	tmp2,next_in,hash,match_length,tmp0,tmp1
195	cmp	match_length,3
196	bls	get_lit_code
197
198	get_len_code 	hufftables,match_length,code,code_len,tmp0
199	get_dist_code 	hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
200
201	//code |= code2 << code_len;
202	//code_len += code_len2;
203	lsl	code2,code2,code_len
204	orr	code,code,code2
205	add	code_len,code_len,code_len2
206
207	//next_in += match_length;
208	add	next_in,next_in,match_length,uxtw
209
210	//write_bits(&state->bitbuf, code, code_len);
211	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
212
213	cmp	next_in,loop_end_cnt
214	bls	main_loop_start
215	b	main_loop_end
216get_lit_code:
217	//get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
218	and		literal,literal,0xff
219	get_lit_code	hufftables,literal,code,code_len
220
221	//next_in++;
222	add	next_in,next_in,1
223
224	//write_bits(&state->bitbuf, code, code_len);
225	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
226	cmp	next_in,loop_end_cnt
227	bls	main_loop_start
228main_loop_end:
229	sub	loop_end_cnt,end_in,1
230	cmp	next_in,loop_end_cnt
231	bhi	update_not_full
232second_loop_start:
233	cmp	m_out_buf,m_out_end
234	bhi	update_state_exit
235	ldr	literal,[next_in]
236	and		literal,literal,0xff
237	get_lit_code	hufftables,literal,code,code_len
238	//next_in++;
239	add	next_in,next_in,1
240
241	//write_bits(&state->bitbuf, code, code_len);
242	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
243	cmp	next_in,loop_end_cnt
244	bls	second_loop_start
245
246update_not_full:
247	cmp	m_out_buf,m_out_end
248	bhi	update_state_exit
249
250	mov	literal,256
251	get_lit_code	hufftables,literal,code,code_len
252
253	//write_bits(&state->bitbuf, code, code_len);
254	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
255	ldrh	w_end_of_stream, [stream, _end_of_stream]
256	mov	w_tmp0,1
257	strb	w_tmp0,[stream,_internal_state_has_eob]
258	cmp	w_end_of_stream,w_tmp0
259	mov	w_tmp0, ZSTATE_TRL
260	mov	w_tmp1,	ZSTATE_SYNC_FLUSH
261	csel	w_tmp0,w_tmp0,w_tmp1,eq
262	str	w_tmp0, [stream, _internal_state+_state]
263
264update_state_exit:
265	update_state	stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
266	pop_stack
267	ret
268#ifndef __APPLE__
269	.size	isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
270#endif
271