xref: /isa-l/igzip/aarch64/gen_icf_map.S (revision d3cfb2fb772e375cf2007e484e0a6ec0c6a7c993)
1/**********************************************************************
2  Copyright(c) 2019 Arm Corporation All rights reserved.
3
4  Redistribution and use in source and binary forms, with or without
5  modification, are permitted provided that the following conditions
6  are met:
7    * Redistributions of source code must retain the above copyright
8      notice, this list of conditions and the following disclaimer.
9    * Redistributions in binary form must reproduce the above copyright
10      notice, this list of conditions and the following disclaimer in
11      the documentation and/or other materials provided with the
12      distribution.
13    * Neither the name of Arm Corporation nor the names of its
14      contributors may be used to endorse or promote products derived
15      from this software without specific prior written permission.
16
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28**********************************************************************/
29	.arch armv8-a+crc+crypto
30	.text
31	.align	2
32
33#include "lz0a_const_aarch64.h"
34#include "data_struct_aarch64.h"
35#include "huffman_aarch64.h"
36#include "bitbuf2_aarch64.h"
37#include "stdmac_aarch64.h"
38
39/*
40declare Macros
41*/
42
43.macro	declare_generic_reg name:req,reg:req,default:req
44	\name		.req	\default\reg
45	w_\name		.req	w\reg
46	x_\name		.req	x\reg
47.endm
48
49.macro tzbytecnt param0:req,param1:req
50	rbit	x_\param1, x_\param0
51	cmp	x_\param0, 0
52	clz	x_\param1, x_\param1
53	mov	w_\param0, 8
54	lsr	w_\param1, w_\param1, 3
55	csel	w_\param0, w_\param1, w_\param0, ne
56.endm
57
58.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req
59	orr	w_\param1, w_\param1, w_\param3, lsl 19
60	orr	w_\param1, w_\param1, w_\param2, lsl 10
61	str	w_\param1, [x_\param0]
62.endm
63
64	.align	2
65	.global	gen_icf_map_h1_aarch64
66	.type	gen_icf_map_h1_aarch64, %function
67
68	/* arguments */
69	declare_generic_reg	stream_param,			0,x
70	declare_generic_reg	matches_icf_lookup_param,	1,x
71	declare_generic_reg	input_size_param,		2,x
72
73	declare_generic_reg	param0,				0,x
74	declare_generic_reg	param1,				1,x
75	declare_generic_reg	param2,				2,x
76	declare_generic_reg	param3,				3,x
77
78	/* return */
79	declare_generic_reg	ret_val,			0,x
80
81	/* variables */
82	declare_generic_reg	input_size,			3,x
83	declare_generic_reg	next_in,			4,x
84	declare_generic_reg	matches_icf_lookup,		6,x
85	declare_generic_reg	hash_table,			7,x
86	declare_generic_reg	end_in,				8,x
87	declare_generic_reg	file_start,			9,x
88	declare_generic_reg	hash_mask,			10,w
89	declare_generic_reg	hist_size,			11,w
90	declare_generic_reg	stream_saved,			12,x
91	declare_generic_reg	literal_32,			13,w
92	declare_generic_reg	literal_1,			14,w
93	declare_generic_reg	dist,				15,w
94
95	declare_generic_reg	tmp_has_hist,			0,w
96	declare_generic_reg	tmp_offset_hash_table,		1,x
97	declare_generic_reg	tmp0,				0,x
98	declare_generic_reg	tmp1,				1,x
99	declare_generic_reg	tmp2,				2,x
100	declare_generic_reg	tmp3,				3,x
101	declare_generic_reg	tmp5,				5,x
102
103/* constant */
104.equ	ISAL_LOOK_AHEAD, 288
105.equ	SHORTEST_MATCH, 4
106.equ	LEN_OFFSET, 254
107
108/* mask */
109.equ	mask_10bit, 1023
110.equ	mask_lit_dist, 0x7800
111
112/* offset of struct isal_zstream */
113.equ	offset_next_in, 0
114.equ	offset_avail_in, 8
115.equ	offset_total_in, 12
116.equ	offset_next_out, 16
117.equ	offset_avail_out, 24
118.equ	offset_total_out, 28
119.equ	offset_hufftables, 32
120.equ	offset_level, 40
121.equ	offset_level_buf_size, 44
122.equ	offset_level_buf, 48
123.equ	offset_end_of_stream, 56
124.equ	offset_flush, 58
125.equ	offset_gzip_flag, 60
126.equ	offset_hist_bits, 62
127.equ	offset_state, 64
128.equ	offset_state_block_end, 72
129.equ	offset_state_dist_mask, 76
130.equ	offset_state_has_hist, 135
131
132/* offset of struct level_buf */
133.equ	offset_hash_map_hash_table, 4712
134
135/*
136uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
137			     struct deflate_icf *matches_icf_lookup, uint64_t input_size)
138*/
139
140gen_icf_map_h1_aarch64:
141	cmp	input_size_param, (ISAL_LOOK_AHEAD-1) // 287
142	bls	.fast_exit
143	stp	x29, x30, [sp, -16]!
144
145	mov	stream_saved, stream_param
146	mov	matches_icf_lookup, matches_icf_lookup_param
147	mov	x29, sp
148
149	ldrb	tmp_has_hist, [stream_saved, offset_state_has_hist]
150	mov	tmp_offset_hash_table, offset_hash_map_hash_table
151	ldr	end_in, [stream_saved, offset_next_in]
152	mov	input_size, input_size_param
153	ldr	hash_table, [stream_saved, offset_level_buf]
154	ldr	w_file_start, [stream_saved, offset_total_in]
155	ldp	hist_size, hash_mask, [stream_saved, offset_state_dist_mask]
156	add	hash_table, hash_table, tmp_offset_hash_table
157	sub	file_start, end_in, file_start
158	cbz	tmp_has_hist, .igzip_no_hist
159	b	.while_check1
160
161	.align 3
162.igzip_no_hist:
163	ldrb	w_tmp1, [end_in]
164	add	next_in, end_in, 1
165	ldrh	w_tmp0, [matches_icf_lookup]
166	bfi	w_tmp0, w_tmp1, 0, 10
167	strh	w_tmp0, [matches_icf_lookup]
168	ldr	w_tmp0, [matches_icf_lookup]
169	and	w_tmp0, w_tmp0, mask_10bit
170	orr	w_tmp0, w_tmp0, mask_lit_dist
171	str	w_tmp0, [matches_icf_lookup], 4
172	ldr	w_tmp0, [end_in]
173	crc32cw	w_tmp0, wzr, w_tmp0
174
175	and	w_tmp5, w_tmp0, hash_mask
176	sub	x_tmp1, end_in, file_start
177	mov	w_tmp2, 1
178	mov	x_tmp0, 1
179	strh	w_tmp1, [hash_table, x_tmp5, lsl 1]
180	strb	w_tmp2, [stream_saved, offset_state_has_hist]
181	b	.while_check2
182
183.while_check1:
184	mov	next_in, end_in
185	mov	x_tmp0, 0
186
187.while_check2:
188	sub	input_size, input_size, #288
189	add	end_in, end_in, input_size
190	cmp	next_in, end_in
191	bcs	.exit
192	mov	literal_32, 32
193	mov	literal_1, 1
194	b	.while_loop
195
196	.align 3
197.new_match_found:
198	clz	w_tmp5, w_tmp2
199	add	w_tmp1, w_tmp0, LEN_OFFSET
200	sub	w_tmp5, literal_32, w_tmp5
201	cmp	dist, 2
202	sub	w_tmp5, w_tmp5, #2
203	bls	.skip_compute_dist_icf_code
204
205	lsl	w_tmp3, literal_1, w_tmp5
206	sub	w_tmp3, w_tmp3, #1
207	lsr	w_tmp0, w_tmp2, w_tmp5
208	and	w_tmp3, w_tmp3, w_tmp2
209	add	w_tmp2, w_tmp0, w_tmp5, lsl 1
210
211.skip_compute_dist_icf_code:
212	mov	param0, matches_icf_lookup
213	write_deflate_icf param0,param1,param2,param3
214
215	add	next_in, next_in, 1
216	add	matches_icf_lookup, matches_icf_lookup, 4
217	cmp	next_in, end_in
218	beq	.save_with_exit
219
220.while_loop:
221	ldr	w_tmp0, [next_in]
222	crc32cw	w_tmp0, wzr, w_tmp0
223
224	and	w_tmp0, w_tmp0, hash_mask
225	sub	x_tmp1, next_in, file_start
226	lsl	x_tmp0, x_tmp0, 1
227	sub	w_tmp2, w_tmp1, #1
228	ldrh	w_tmp3, [hash_table, x_tmp0]
229	strh	w_tmp1, [hash_table, x_tmp0]
230	sub	w_tmp2, w_tmp2, w_tmp3
231	and	w_tmp2, w_tmp2, hist_size
232	add	dist, w_tmp2, 1
233	ldr	x_tmp0, [next_in]
234	sub	x_tmp1, next_in, w_dist, uxtw
235	ldr	x_tmp1, [x_tmp1]
236	eor	x_tmp0, x_tmp1, x_tmp0
237	tzbytecnt	param0,param1
238
239	cmp	w_tmp0, (SHORTEST_MATCH-1)
240	mov	w_tmp3, 0
241	bhi	.new_match_found
242
243	ldrb	w_param1, [next_in]
244	mov	x_param0, matches_icf_lookup
245	mov	w_param3, 0
246	mov	w_param2, 0x1e
247	write_deflate_icf param0,param1,param2,param3
248
249	add	next_in, next_in, 1
250	add	matches_icf_lookup, matches_icf_lookup, 4
251	cmp	next_in, end_in
252	bne	.while_loop
253
254.save_with_exit:
255	ldr	ret_val, [stream_saved, offset_next_in]
256	sub	ret_val, next_in, ret_val
257
258.exit:
259	ldp	x29, x30, [sp], 16
260	ret
261
262	.align 3
263.fast_exit:
264	mov	ret_val, 0
265	ret
266	.size	gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
267