xref: /isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S (revision d3cfb2fb772e375cf2007e484e0a6ec0c6a7c993)
1/**********************************************************************
2  Copyright(c) 2019 Arm Corporation All rights reserved.
3
4  Redistribution and use in source and binary forms, with or without
5  modification, are permitted provided that the following conditions
6  are met:
7    * Redistributions of source code must retain the above copyright
8      notice, this list of conditions and the following disclaimer.
9    * Redistributions in binary form must reproduce the above copyright
10      notice, this list of conditions and the following disclaimer in
11      the documentation and/or other materials provided with the
12      distribution.
13    * Neither the name of Arm Corporation nor the names of its
14      contributors may be used to endorse or promote products derived
15      from this software without specific prior written permission.
16
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28**********************************************************************/
29	.arch armv8-a+crc
30	.text
31	.align	2
32
33#include "lz0a_const_aarch64.h"
34#include "data_struct_aarch64.h"
35#include "huffman_aarch64.h"
36#include "bitbuf2_aarch64.h"
37#include "stdmac_aarch64.h"
38
39
40/*
41declare Macros
42*/
43
44.macro	declare_generic_reg name:req,reg:req,default:req
45	\name		.req	\default\reg
46	w_\name		.req	w\reg
47	x_\name		.req	x\reg
48.endm
49
50.macro  update_state	stream:req,start_in:req,next_in:req,end_in:req,	\
51	m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
52
53	//m_out_buf=bytes_written
54	sub	x_\m_out_buf,x_\m_out_buf,x_\m_out_start
55	cmp	next_in,start_in
56	bls	skip_has_hist
57	mov	w_\tmp0,1
58	strb	w_\tmp0,[x_\stream,_internal_state_has_hist]
59skip_has_hist:
60	ldr	w_\tmp0,[\stream,_total_in]
61	ldr	x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
62
63	str	x_\next_in,[\stream,_next_in]
64	sub	x_\start_in,x_\next_in,x_\start_in
65	sub	x_\end_in,x_\end_in,x_\next_in
66	add	w_\tmp0,w_\tmp0,w_\start_in
67	stp	w_\end_in,w_\tmp0,[\stream,_avail_in]
68	//next_in=avail_out,start_in=total_out
69	ldp	w_\next_in,w_\start_in,[\stream,_avail_out]
70	add	x_\m_out_start,x_\m_out_start,x_\m_out_buf
71	str	x_\m_out_start,[\stream,_next_out]
72	add	w_\start_in,w_\start_in,w_\m_out_buf
73	sub	w_\next_in,w_\next_in,w_\m_out_buf
74	stp	w_\next_in,w_\start_in,[\stream,_avail_out]
75.endm
76	.global	isal_deflate_finish_aarch64
77	.arch armv8-a+crc
78	.type	isal_deflate_finish_aarch64, %function
79/*
80	void isal_deflate_finish_aarch64(struct isal_zstream *stream)
81*/
82	declare_generic_reg	stream,		0,x	//struct isal_zstream *stream
83	declare_generic_reg	state,		8,x	//&stream->state
84	declare_generic_reg	avail_in,	9,w
85	declare_generic_reg	end_of_stream,	10,w	//can be used in loop
86
87	declare_generic_reg	hash_mask,	11,w
88	declare_generic_reg	match_length,	12,w
89	declare_generic_reg	hufftables,	13,x
90
91	declare_generic_reg	m_out_buf,	14,x
92	declare_generic_reg	m_out_start,	15,x
93	declare_generic_reg	m_out_end,	16,x
94	declare_generic_reg	m_bits,		17,x
95	declare_generic_reg	m_bit_count,	18,w
96
97	declare_generic_reg	start_in,	19,x
98	declare_generic_reg	end_in,		20,x
99	declare_generic_reg	next_in,	21,x
100	declare_generic_reg	loop_end_cnt,	22,x
101
102	declare_generic_reg	literal,	23,w
103	declare_generic_reg	hash,		24,w
104	declare_generic_reg	dist,		25,w
105
106	declare_generic_reg	last_seen,	26,x
107	declare_generic_reg	file_start,	27,x
108	declare_generic_reg	hist_size,	28,w
109
110	declare_generic_reg	tmp0,		5 ,w
111	declare_generic_reg	tmp1,		6 ,w
112	declare_generic_reg	tmp2,		7 ,w
113
114	declare_generic_reg	code,		3,x
115	declare_generic_reg	code_len,	24,x
116	declare_generic_reg	code2,		10,x
117	declare_generic_reg	code_len2,	4,x
118
119
120isal_deflate_finish_aarch64:
121	//save registers
122	push_stack
123
124	//	set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
125	ldr	w_m_out_end,[stream,_avail_out]
126	ldr	m_out_buf,[stream,_next_out]
127	add	m_out_end,m_out_buf,w_m_out_end,uxtw
128	sub	m_out_end,m_out_end , 8
129	mov	m_out_start,m_out_buf
130	stp	m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
131	str	m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
132	ldr	m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
133	ldr	m_bits ,[stream,_internal_state_bitbuf_m_bits]
134
135	//init variables
136	//last_seen=&stream.internal_state.head = _internal_state+_head
137	add	last_seen,stream,65536
138	add	last_seen,last_seen,_internal_state+_head -65536
139
140
141	//start_in=stream->next_in;next_in=start_in
142	ldr	avail_in, [stream, _avail_in]
143	ldr	start_in,[stream,_next_in]
144	mov	next_in,start_in
145	add	end_in,start_in,avail_in,uxtw  //avail_in reg is free now
146	ldr	hufftables,[stream,_hufftables]
147	cbz	avail_in, update_not_full
148
149
150	sub	loop_end_cnt,end_in,4		//loop end
151	cmp	next_in,loop_end_cnt
152
153
154	//file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
155	ldr	w_file_start,[stream,_total_in]
156	sub	file_start, next_in, w_file_start, uxtw
157
158	//uint32_t hist_size = state->dist_mask;
159	ldr	hist_size,[stream,_internal_state + _dist_mask]
160
161	//uint32_t hash_mask = state->hash_mask;
162	ldr	hash_mask,[stream,_internal_state + _hash_mask]
163
164	bhi	main_loop_end
165main_loop_start:
166	//is_full(&state->bitbuf)
167	cmp	m_out_buf,m_out_end
168	bhi	update_state_exit
169
170	ldr	literal,[next_in]
171	crc32cw	hash,wzr,literal
172	and	hash,hash,hash_mask
173
174	///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
175	ldrh	w_tmp0,[last_seen,x_hash,lsl 1] 	//tmp_w last_seen[hash]
176	sub 	x_dist,next_in,file_start
177	//last_seen[hash] = (uint64_t) (next_in - file_start);
178	strh	dist,[last_seen,x_hash,lsl 1]
179	sub 	dist,dist,w_tmp0
180	and	dist,dist,0xffff
181
182	sub	w_tmp0,dist,1
183	cmp	hist_size,w_tmp0
184	bls	get_lit_code
185
186	/// match_length = compare258(next_in - dist, next_in, 258);
187	sub	x_tmp2,next_in,x_dist
188	sub	x_hash,end_in,next_in
189	compare_max_258_bytes	tmp2,next_in,hash,match_length,tmp0,tmp1
190	cmp	match_length,3
191	bls	get_lit_code
192
193	get_len_code 	hufftables,match_length,code,code_len,tmp0
194	get_dist_code 	hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
195
196	//code |= code2 << code_len;
197	//code_len += code_len2;
198	lsl	code2,code2,code_len
199	orr	code,code,code2
200	add	code_len,code_len,code_len2
201
202	//next_in += match_length;
203	add	next_in,next_in,match_length,uxtw
204
205	//write_bits(&state->bitbuf, code, code_len);
206	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
207
208	cmp	next_in,loop_end_cnt
209	bls	main_loop_start
210	b	main_loop_end
211get_lit_code:
212	//get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
213	and		literal,literal,0xff
214	get_lit_code	hufftables,literal,code,code_len
215
216	//next_in++;
217	add	next_in,next_in,1
218
219	//write_bits(&state->bitbuf, code, code_len);
220	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
221	cmp	next_in,loop_end_cnt
222	bls	main_loop_start
223main_loop_end:
224	sub	loop_end_cnt,end_in,1
225	cmp	next_in,loop_end_cnt
226	bhi	update_not_full
227second_loop_start:
228	cmp	m_out_buf,m_out_end
229	bhi	update_state_exit
230	ldr	literal,[next_in]
231	and		literal,literal,0xff
232	get_lit_code	hufftables,literal,code,code_len
233	//next_in++;
234	add	next_in,next_in,1
235
236	//write_bits(&state->bitbuf, code, code_len);
237	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
238	cmp	next_in,loop_end_cnt
239	bls	second_loop_start
240
241update_not_full:
242	cmp	m_out_buf,m_out_end
243	bhi	update_state_exit
244
245	mov	literal,256
246	get_lit_code	hufftables,literal,code,code_len
247
248	//write_bits(&state->bitbuf, code, code_len);
249	update_bits	stream,code,code_len,m_bits,m_bit_count,m_out_buf
250	ldrh	w_end_of_stream, [stream, _end_of_stream]
251	mov	w_tmp0,1
252	strb	w_tmp0,[stream,_internal_state_has_eob]
253	cmp	w_end_of_stream,w_tmp0
254	mov	w_tmp0, ZSTATE_TRL
255	mov	w_tmp1,	ZSTATE_SYNC_FLUSH
256	csel	w_tmp0,w_tmp0,w_tmp1,eq
257	str	w_tmp0, [stream, _internal_state+_state]
258
259update_state_exit:
260	update_state	stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
261	pop_stack
262	ret
263
264	.size	isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
265