xref: /isa-l_crypto/rolling_hash/rolling_hash2_until_04.asm (revision 81c7feeeb9cf25d8080286d86bc1e13ecc129c7a)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;; uint64_t _rolling_hash2_run_until_04(uint32_t *idx, uint32_t max_idx, uint64_t *t1,
31;;; 			uint64_t *t2, uint8_t *b1, uint8_t *b2, uint64_t h, uint64_t mask,
32;;;			uint64_t trigger)
33
34%include "reg_sizes.asm"
35
36%ifidn __OUTPUT_FORMAT__, elf64
37 %define arg0  rdi
38 %define arg1  rsi
39 %define arg2  rdx
40 %define arg3  rcx
41 %define arg4  r8
42 %define arg5  r9
43
44 %define arg6  r10
45 %define arg7  r11
46 %define arg8  r12		; must be saved and loaded
47 %define tmp1  rbp		; must be saved and loaded
48 %define tmp2  rbx		; must be saved and loaded
49 %define tmp3  r13		; must be saved and loaded
50 %define tmp4  r14		; must be saved and loaded
51 %define tmp5  r15		; must be saved and loaded
52 %define return rax
53 %define PS 8
54 %define frame_size 6*8
55 %define arg(x)      [rsp + frame_size + PS + PS*x]
56
57 %define func(x) x:
58 %macro FUNC_SAVE 0
59	push	rbp
60	push	rbx
61	push	r12
62	push	r13
63	push	r14
64	push	r15
65	mov	arg6, arg(0)
66	mov	arg7, arg(1)
67	mov	arg8, arg(2)
68 %endmacro
69 %macro FUNC_RESTORE 0
70	pop	r15
71	pop	r14
72	pop	r13
73	pop	r12
74	pop	rbx
75	pop	rbp
76 %endmacro
77%endif
78
79%ifidn __OUTPUT_FORMAT__, win64
80 %define arg0   rcx
81 %define arg1   rdx
82 %define arg2   r8
83 %define arg3   r9
84 %define arg4   r12 		; must be saved and loaded
85 %define arg5   r13 		; must be saved and loaded
86 %define arg6   r14 		; must be saved and loaded
87 %define arg7   r15 		; must be saved and loaded
88 %define arg8   rbx 		; must be saved and loaded
89 %define tmp1   r10
90 %define tmp2   r11
91 %define tmp3   rdi 		; must be saved and loaded
92 %define tmp4   rsi 		; must be saved and loaded
93 %define tmp5   rbp 		; must be saved and loaded
94 %define return rax
95 %define PS 8
96 %define frame_size 8*8
97 %define arg(x)      [rsp + frame_size + PS + PS*x]
98 %define func(x) proc_frame x
99 %macro FUNC_SAVE 0
100	push_reg	r12
101	push_reg	r13
102	push_reg	r14
103	push_reg	r15
104	push_reg	rbx
105	push_reg	rdi
106	push_reg	rsi
107	push_reg	rbp
108	end_prolog
109	mov	arg4, arg(4)
110	mov	arg5, arg(5)
111	mov	arg6, arg(6)
112	mov	arg7, arg(7)
113	mov	arg8, arg(8)
114 %endmacro
115
116 %macro FUNC_RESTORE 0
117	pop	rbp
118	pop	rsi
119	pop	rdi
120	pop	rbx
121	pop	r15
122	pop	r14
123	pop	r13
124	pop	r12
125 %endmacro
126%endif
127
128%define idx   arg0
129%define max   arg1
130%define t1    arg2
131%define t2    arg3
132%define b1    arg4
133%define b2    arg5
134%define hash  arg6
135%define mask  arg7
136%define trigger arg8
137
138%define pos   rax
139%define pos.w eax
140%define x     tmp2
141%define y     tmp3
142%define z     tmp4
143%define h     tmp1
144%define a     tmp5
145
146default rel
147[bits 64]
148section .text
149
150align 16
151mk_global _rolling_hash2_run_until_04, function, internal
152func(_rolling_hash2_run_until_04)
153	endbranch
154	FUNC_SAVE
155	mov	pos.w, dword [idx]
156	pext	trigger, trigger, mask
157	sub	max, 2
158	cmp	pos, max
159	jg	.less_than_2
160
161.loop2:	rorx	hash, hash, 0x3f
162	movzx	x, byte [b1 + pos]
163	movzx	a, byte [b1 + pos + 1]
164	movzx	y, byte [b2 + pos]
165	movzx	h, byte [b2 + pos + 1]
166	mov	z, [t1 + x * 8]
167	xor	z, [t2 + y * 8]
168	xor	hash, z
169	pext	x, hash, mask
170	cmp	x, trigger
171	je	.ret_0
172
173	rorx	hash, hash, 0x3f
174	mov	z, [t1 + a * 8]
175	xor	z, [t2 + h * 8]
176	xor	hash, z
177	pext	y, hash, mask
178	cmp	y, trigger
179	je	.ret_1
180
181	add	pos, 2
182	cmp	pos, max
183	jle	.loop2
184
185.less_than_2:
186	add	max, 1
187	cmp	pos, max
188	jg	.ret_0
189	rorx	hash, hash, 0x3f
190	movzx	x, byte [b1 + pos]
191	movzx	y, byte [b2 + pos]
192	mov	z, [t1 + x * 8]
193	xor	z, [t2 + y * 8]
194	xor	hash, z
195.ret_1:	add	pos, 1
196.ret_0:	mov	dword [idx], pos.w
197	mov	rax, hash
198	FUNC_RESTORE
199	ret
200
201endproc_frame
202
203section .data
204