xref: /isa-l/igzip/aarch64/gen_icf_map.S (revision 1187583a979dcd98945c8f01905140c9b78d8d11)
1/**********************************************************************
2  Copyright(c) 2019 Arm Corporation All rights reserved.
3
4  Redistribution and use in source and binary forms, with or without
5  modification, are permitted provided that the following conditions
6  are met:
7    * Redistributions of source code must retain the above copyright
8      notice, this list of conditions and the following disclaimer.
9    * Redistributions in binary form must reproduce the above copyright
10      notice, this list of conditions and the following disclaimer in
11      the documentation and/or other materials provided with the
12      distribution.
13    * Neither the name of Arm Corporation nor the names of its
14      contributors may be used to endorse or promote products derived
15      from this software without specific prior written permission.
16
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28**********************************************************************/
29
30#include "../include/aarch64_label.h"
31
32	.arch armv8-a+crc+crypto
33	.text
34	.align	2
35
36#include "lz0a_const_aarch64.h"
37#include "data_struct_aarch64.h"
38#include "huffman_aarch64.h"
39#include "bitbuf2_aarch64.h"
40#include "stdmac_aarch64.h"
41
42/*
43declare Macros
44*/
45
46.macro	declare_generic_reg name:req,reg:req,default:req
47	\name		.req	\default\reg
48	w_\name		.req	w\reg
49	x_\name		.req	x\reg
50.endm
51
52.macro tzbytecnt param0:req,param1:req
53	rbit	x_\param1, x_\param0
54	cmp	x_\param0, 0
55	clz	x_\param1, x_\param1
56	mov	w_\param0, 8
57	lsr	w_\param1, w_\param1, 3
58	csel	w_\param0, w_\param1, w_\param0, ne
59.endm
60
61.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req
62	orr	w_\param1, w_\param1, w_\param3, lsl 19
63	orr	w_\param1, w_\param1, w_\param2, lsl 10
64	str	w_\param1, [x_\param0]
65.endm
66
67	.align	2
68	.global	cdecl(gen_icf_map_h1_aarch64)
69#ifndef __APPLE__
70	.type	gen_icf_map_h1_aarch64, %function
71#endif
72
73	/* arguments */
74	declare_generic_reg	stream_param,			0,x
75	declare_generic_reg	matches_icf_lookup_param,	1,x
76	declare_generic_reg	input_size_param,		2,x
77
78	declare_generic_reg	param0,				0,x
79	declare_generic_reg	param1,				1,x
80	declare_generic_reg	param2,				2,x
81	declare_generic_reg	param3,				3,x
82
83	/* return */
84	declare_generic_reg	ret_val,			0,x
85
86	/* variables */
87	declare_generic_reg	input_size,			3,x
88	declare_generic_reg	next_in,			4,x
89	declare_generic_reg	matches_icf_lookup,		6,x
90	declare_generic_reg	hash_table,			7,x
91	declare_generic_reg	end_in,				8,x
92	declare_generic_reg	file_start,			9,x
93	declare_generic_reg	hash_mask,			10,w
94	declare_generic_reg	hist_size,			11,w
95	declare_generic_reg	stream_saved,			12,x
96	declare_generic_reg	literal_32,			13,w
97	declare_generic_reg	literal_1,			14,w
98	declare_generic_reg	dist,				15,w
99
100	declare_generic_reg	tmp_has_hist,			0,w
101	declare_generic_reg	tmp_offset_hash_table,		1,x
102	declare_generic_reg	tmp0,				0,x
103	declare_generic_reg	tmp1,				1,x
104	declare_generic_reg	tmp2,				2,x
105	declare_generic_reg	tmp3,				3,x
106	declare_generic_reg	tmp5,				5,x
107
108/* constant */
109.equ	ISAL_LOOK_AHEAD, 288
110.equ	SHORTEST_MATCH, 4
111.equ	LEN_OFFSET, 254
112
113/* mask */
114.equ	mask_10bit, 1023
115.equ	mask_lit_dist, 0x7800
116
117/* offset of struct isal_zstream */
118.equ	offset_next_in, 0
119.equ	offset_avail_in, 8
120.equ	offset_total_in, 12
121.equ	offset_next_out, 16
122.equ	offset_avail_out, 24
123.equ	offset_total_out, 28
124.equ	offset_hufftables, 32
125.equ	offset_level, 40
126.equ	offset_level_buf_size, 44
127.equ	offset_level_buf, 48
128.equ	offset_end_of_stream, 56
129.equ	offset_flush, 58
130.equ	offset_gzip_flag, 60
131.equ	offset_hist_bits, 62
132.equ	offset_state, 64
133.equ	offset_state_block_end, 72
134.equ	offset_state_dist_mask, 76
135.equ	offset_state_has_hist, 135
136
137/* offset of struct level_buf */
138.equ	offset_hash_map_hash_table, 4712
139
140/*
141uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
142			     struct deflate_icf *matches_icf_lookup, uint64_t input_size)
143*/
144
145cdecl(gen_icf_map_h1_aarch64):
146	cmp	input_size_param, (ISAL_LOOK_AHEAD-1) // 287
147	bls	.fast_exit
148	stp	x29, x30, [sp, -16]!
149
150	mov	stream_saved, stream_param
151	mov	matches_icf_lookup, matches_icf_lookup_param
152	mov	x29, sp
153
154	ldrb	tmp_has_hist, [stream_saved, offset_state_has_hist]
155	mov	tmp_offset_hash_table, offset_hash_map_hash_table
156	ldr	end_in, [stream_saved, offset_next_in]
157	mov	input_size, input_size_param
158	ldr	hash_table, [stream_saved, offset_level_buf]
159	ldr	w_file_start, [stream_saved, offset_total_in]
160	ldp	hist_size, hash_mask, [stream_saved, offset_state_dist_mask]
161	add	hash_table, hash_table, tmp_offset_hash_table
162	sub	file_start, end_in, file_start
163	cbz	tmp_has_hist, .igzip_no_hist
164	b	.while_check1
165
166	.align 3
167.igzip_no_hist:
168	ldrb	w_tmp1, [end_in]
169	add	next_in, end_in, 1
170	ldrh	w_tmp0, [matches_icf_lookup]
171	bfi	w_tmp0, w_tmp1, 0, 10
172	strh	w_tmp0, [matches_icf_lookup]
173	ldr	w_tmp0, [matches_icf_lookup]
174	and	w_tmp0, w_tmp0, mask_10bit
175	orr	w_tmp0, w_tmp0, mask_lit_dist
176	str	w_tmp0, [matches_icf_lookup], 4
177	ldr	w_tmp0, [end_in]
178	crc32cw	w_tmp0, wzr, w_tmp0
179
180	and	w_tmp5, w_tmp0, hash_mask
181	sub	x_tmp1, end_in, file_start
182	mov	w_tmp2, 1
183	mov	x_tmp0, 1
184	strh	w_tmp1, [hash_table, x_tmp5, lsl 1]
185	strb	w_tmp2, [stream_saved, offset_state_has_hist]
186	b	.while_check2
187
188.while_check1:
189	mov	next_in, end_in
190	mov	x_tmp0, 0
191
192.while_check2:
193	sub	input_size, input_size, #288
194	add	end_in, end_in, input_size
195	cmp	next_in, end_in
196	bcs	.exit
197	mov	literal_32, 32
198	mov	literal_1, 1
199	b	.while_loop
200
201	.align 3
202.new_match_found:
203	clz	w_tmp5, w_tmp2
204	add	w_tmp1, w_tmp0, LEN_OFFSET
205	sub	w_tmp5, literal_32, w_tmp5
206	cmp	dist, 2
207	sub	w_tmp5, w_tmp5, #2
208	bls	.skip_compute_dist_icf_code
209
210	lsl	w_tmp3, literal_1, w_tmp5
211	sub	w_tmp3, w_tmp3, #1
212	lsr	w_tmp0, w_tmp2, w_tmp5
213	and	w_tmp3, w_tmp3, w_tmp2
214	add	w_tmp2, w_tmp0, w_tmp5, lsl 1
215
216.skip_compute_dist_icf_code:
217	mov	param0, matches_icf_lookup
218	write_deflate_icf param0,param1,param2,param3
219
220	add	next_in, next_in, 1
221	add	matches_icf_lookup, matches_icf_lookup, 4
222	cmp	next_in, end_in
223	beq	.save_with_exit
224
225.while_loop:
226	ldr	w_tmp0, [next_in]
227	crc32cw	w_tmp0, wzr, w_tmp0
228
229	and	w_tmp0, w_tmp0, hash_mask
230	sub	x_tmp1, next_in, file_start
231	lsl	x_tmp0, x_tmp0, 1
232	sub	w_tmp2, w_tmp1, #1
233	ldrh	w_tmp3, [hash_table, x_tmp0]
234	strh	w_tmp1, [hash_table, x_tmp0]
235	sub	w_tmp2, w_tmp2, w_tmp3
236	and	w_tmp2, w_tmp2, hist_size
237	add	dist, w_tmp2, 1
238	ldr	x_tmp0, [next_in]
239	sub	x_tmp1, next_in, w_dist, uxtw
240	ldr	x_tmp1, [x_tmp1]
241	eor	x_tmp0, x_tmp1, x_tmp0
242	tzbytecnt	param0,param1
243
244	cmp	w_tmp0, (SHORTEST_MATCH-1)
245	mov	w_tmp3, 0
246	bhi	.new_match_found
247
248	ldrb	w_param1, [next_in]
249	mov	x_param0, matches_icf_lookup
250	mov	w_param3, 0
251	mov	w_param2, 0x1e
252	write_deflate_icf param0,param1,param2,param3
253
254	add	next_in, next_in, 1
255	add	matches_icf_lookup, matches_icf_lookup, 4
256	cmp	next_in, end_in
257	bne	.while_loop
258
259.save_with_exit:
260	ldr	ret_val, [stream_saved, offset_next_in]
261	sub	ret_val, next_in, ret_val
262
263.exit:
264	ldp	x29, x30, [sp], 16
265	ret
266
267	.align 3
268.fast_exit:
269	mov	ret_val, 0
270	ret
271#ifndef __APPLE__
272	.size	gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
273#endif
274