xref: /isa-l/igzip/aarch64/igzip_set_long_icf_fg.S (revision 1187583a979dcd98945c8f01905140c9b78d8d11)
1/**********************************************************************
2  Copyright(c) 2019 Arm Corporation All rights reserved.
3
4  Redistribution and use in source and binary forms, with or without
5  modification, are permitted provided that the following conditions
6  are met:
7    * Redistributions of source code must retain the above copyright
8      notice, this list of conditions and the following disclaimer.
9    * Redistributions in binary form must reproduce the above copyright
10      notice, this list of conditions and the following disclaimer in
11      the documentation and/or other materials provided with the
12      distribution.
13    * Neither the name of Arm Corporation nor the names of its
14      contributors may be used to endorse or promote products derived
15      from this software without specific prior written permission.
16
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28**********************************************************************/
29
30#include "../include/aarch64_label.h"
31
32	.arch armv8-a
33	.text
34	.align	2
35
36#include "lz0a_const_aarch64.h"
37#include "data_struct_aarch64.h"
38#include "huffman_aarch64.h"
39#include "bitbuf2_aarch64.h"
40#include "stdmac_aarch64.h"
41
42/*
43declare Macros
44*/
45
46.macro	declare_generic_reg name:req,reg:req,default:req
47	\name		.req	\default\reg
48	w_\name		.req	w\reg
49	x_\name		.req	x\reg
50.endm
51
52	.text
53	.align	2
54	.global	cdecl(set_long_icf_fg_aarch64)
55#ifndef __APPLE__
56	.type	set_long_icf_fg_aarch64, %function
57#endif
58
59/*
60void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
61			  struct deflate_icf *match_lookup)
62*/
63
64	/* arguments */
65	declare_generic_reg	next_in_param,		0,x
66	declare_generic_reg	processed_param,	1,x
67	declare_generic_reg	input_size_param,	2,x
68	declare_generic_reg	match_lookup_param,	3,x
69
70	declare_generic_reg	param0,			0,x
71	declare_generic_reg	param1,			1,x
72	declare_generic_reg	param2,			2,x
73
74	/* local variable */
75	declare_generic_reg	len,			7,w
76	declare_generic_reg	dist_code,		8,w
77	declare_generic_reg	shortest_match_len,	9,w
78	declare_generic_reg	len_max,		10,w
79	declare_generic_reg	dist_extra,		11,w
80	declare_generic_reg	const_8,		13,x
81	declare_generic_reg	next_in,		20,x
82	declare_generic_reg	dist_start,		21,x
83	declare_generic_reg	end_processed,		22,x
84	declare_generic_reg	end_in,			23,x
85	declare_generic_reg	match_lookup,		19,x
86
87	declare_generic_reg	match_length,		4,w
88	declare_generic_reg	tmp0,			5,w
89	declare_generic_reg	tmp1,			6,w
90
91/* constant */
92.equ	DIST_START_SIZE, 128
93.equ	ISAL_LOOK_AHEAD, 288
94.equ	LEN_OFFSET, 254
95.equ	SHORTEST_MATCH, 4
96.equ	LEN_MAX_CONST, 512
97
98cdecl(set_long_icf_fg_aarch64):
99	stp	x29, x30, [sp, -192]!
100	add	x29, sp, 0
101	stp	x21, x22, [sp, 32]
102	add	x21, x29, 64
103	stp	x19, x20, [sp, 16]
104	str	x23, [sp, 48]
105
106	add	end_processed, next_in_param, processed_param
107	mov	next_in, next_in_param
108	add	end_in, next_in_param, input_size_param
109	mov	match_lookup, match_lookup_param
110
111#ifndef __APPLE__
112	adrp	x1, .data_dist_start
113	mov	x2, DIST_START_SIZE // 128
114	add	x1, x1, :lo12:.data_dist_start
115	mov	x0, dist_start
116#else
117	adrp	x1, .data_dist_start@PAGE
118	mov	x2, DIST_START_SIZE // 128
119	add	x1, x1, .data_dist_start@PAGEOFF
120	mov	x0, dist_start
121#endif
122	bl	cdecl(memcpy)
123
124	add	x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
125	cmp	end_in, x_tmp0
126	csel	end_in, end_in, x_tmp0, cc
127	cmp	next_in, end_processed
128	bcs	.done
129
130	mov	const_8, 8
131	mov	len_max, LEN_MAX_CONST // 512
132	mov	shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
133	b	.while_outer_loop
134
135	.align 2
136.while_outer_check:
137	add	next_in, next_in, 1
138	add	match_lookup, match_lookup, 4
139	cmp	end_processed, next_in
140	bls	.done
141
142.while_outer_loop:
143	ldrh	len, [match_lookup]
144	and	len, len, LIT_LEN_MASK // 1023
145	cmp	len, (LEN_OFFSET + 8 - 1) // 261
146	bls	.while_outer_check
147
148	ldr	dist_code, [match_lookup]
149	add	x1, next_in, 8
150	ldrh	dist_extra, [match_lookup, 2]
151	sub	w2, w_end_in, w1
152	ubfx	x_dist_code, x_dist_code, 10, 9
153	ubfx	x_dist_extra, x_dist_extra, 3, 13
154	uxtw	x0, dist_code
155	ldr	w0, [dist_start, x0, lsl 2]
156	add	w0, dist_extra, w0
157	sub	x0, const_8, x0
158	add	x0, next_in, x0
159
160	compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
161	mov	w0, w_match_length
162
163	add	w0, w0, (LEN_OFFSET + 8) // 262
164	cmp	w0, len
165	bls	.while_outer_check
166
167	lsl	w2, dist_extra, 19
168	orr	w2, w2, dist_code, lsl 10
169
170	.align 3
171.while_inner_loop:
172	cmp	w0, LEN_MAX_CONST // 512
173	add	next_in, next_in, 1
174	csel	w1, w0, len_max, ls
175	sub	w0, w0, #1
176	orr	w1, w1, w2
177	str	w1, [match_lookup]
178	ldrh	w1, [match_lookup, 4]!
179
180	and	w1, w1, LIT_LEN_MASK // 1023
181	cmp	w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
182	csel	w1, w1, shortest_match_len, cs
183	cmp	w1, w0
184	bcc	.while_inner_loop
185
186	add	next_in, next_in, 1
187	add	match_lookup, match_lookup, 4
188	cmp	end_processed, next_in
189	bhi	.while_outer_loop
190
191.done:
192	ldp	x19, x20, [sp, 16]
193	ldp	x21, x22, [sp, 32]
194	ldr	x23, [sp, 48]
195	ldp	x29, x30, [sp], 192
196	ret
197#ifndef __APPLE__
198	.size	set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
199#endif
200
201ASM_DEF_RODATA
202	.align	3
203	.set	.data_dist_start,. + 0
204.real_data_dist_start:
205	.word	0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
206	.word	0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
207	.word	0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
208	.word	0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
209