xref: /dflybsd-src/sys/cpu/x86_64/include/asm_mjgmacros.h (revision 5b3646a93f49d0672a38799a970e6df9453fe295)
1*5b3646a9SMatthew Dillon /*-
2*5b3646a9SMatthew Dillon  * Copyright (c) 2018-2019 The FreeBSD Foundation
3*5b3646a9SMatthew Dillon  * Copyright (c) 2003 Peter Wemm.
4*5b3646a9SMatthew Dillon  * Copyright (c) 1993 The Regents of the University of California.
5*5b3646a9SMatthew Dillon  * All rights reserved.
6*5b3646a9SMatthew Dillon  *
7*5b3646a9SMatthew Dillon  * Portions of this software were developed by
8*5b3646a9SMatthew Dillon  * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9*5b3646a9SMatthew Dillon  * the FreeBSD Foundation.
10*5b3646a9SMatthew Dillon  *
11*5b3646a9SMatthew Dillon  * Primarily rewritten and redeveloped by Mateusz Guzik
12*5b3646a9SMatthew Dillon  *
13*5b3646a9SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
14*5b3646a9SMatthew Dillon  * modification, are permitted provided that the following conditions
15*5b3646a9SMatthew Dillon  * are met:
16*5b3646a9SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
17*5b3646a9SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
18*5b3646a9SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
19*5b3646a9SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in the
20*5b3646a9SMatthew Dillon  *    documentation and/or other materials provided with the distribution.
21*5b3646a9SMatthew Dillon  * 3. Neither the name of the University nor the names of its contributors
22*5b3646a9SMatthew Dillon  *    may be used to endorse or promote products derived from this software
23*5b3646a9SMatthew Dillon  *    without specific prior written permission.
24*5b3646a9SMatthew Dillon  *
25*5b3646a9SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26*5b3646a9SMatthew Dillon  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27*5b3646a9SMatthew Dillon  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28*5b3646a9SMatthew Dillon  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29*5b3646a9SMatthew Dillon  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30*5b3646a9SMatthew Dillon  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31*5b3646a9SMatthew Dillon  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32*5b3646a9SMatthew Dillon  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33*5b3646a9SMatthew Dillon  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34*5b3646a9SMatthew Dillon  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35*5b3646a9SMatthew Dillon  * SUCH DAMAGE.
36*5b3646a9SMatthew Dillon  *
37*5b3646a9SMatthew Dillon  * $FreeBSD$
38*5b3646a9SMatthew Dillon  */
39*5b3646a9SMatthew Dillon /*
40*5b3646a9SMatthew Dillon  * Macros to help implement memcmp(), bcmp(),
41*5b3646a9SMatthew Dillon  *			    bzero(), memset(),
42*5b3646a9SMatthew Dillon  *			    memcpy(), bcopy(), memmove()
43*5b3646a9SMatthew Dillon  */
44*5b3646a9SMatthew Dillon 
45*5b3646a9SMatthew Dillon /*
46*5b3646a9SMatthew Dillon  * memcmp(b1, b2, len)
47*5b3646a9SMatthew Dillon  *	  rdi,rsi,rdx
48*5b3646a9SMatthew Dillon  */
49*5b3646a9SMatthew Dillon .macro MEMCMP end
50*5b3646a9SMatthew Dillon 	xorl	%eax,%eax
51*5b3646a9SMatthew Dillon 10:
52*5b3646a9SMatthew Dillon 	cmpq	$16,%rdx
53*5b3646a9SMatthew Dillon 	ja	101632f
54*5b3646a9SMatthew Dillon 
55*5b3646a9SMatthew Dillon 100816:
56*5b3646a9SMatthew Dillon 	cmpb	$8,%dl
57*5b3646a9SMatthew Dillon 	jl	100408f
58*5b3646a9SMatthew Dillon 	movq	(%rdi),%r8
59*5b3646a9SMatthew Dillon 	movq	(%rsi),%r9
60*5b3646a9SMatthew Dillon 	cmpq	%r8,%r9
61*5b3646a9SMatthew Dillon 	jne	80f
62*5b3646a9SMatthew Dillon 	movq	-8(%rdi,%rdx),%r8
63*5b3646a9SMatthew Dillon 	movq	-8(%rsi,%rdx),%r9
64*5b3646a9SMatthew Dillon 	cmpq	%r8,%r9
65*5b3646a9SMatthew Dillon 	jne	10081608f
66*5b3646a9SMatthew Dillon 	\end
67*5b3646a9SMatthew Dillon 100408:
68*5b3646a9SMatthew Dillon 	cmpb	$4,%dl
69*5b3646a9SMatthew Dillon 	jl	100204f
70*5b3646a9SMatthew Dillon 	movl	(%rdi),%r8d
71*5b3646a9SMatthew Dillon 	movl	(%rsi),%r9d
72*5b3646a9SMatthew Dillon 	cmpl	%r8d,%r9d
73*5b3646a9SMatthew Dillon 	jne	80f
74*5b3646a9SMatthew Dillon 	movl	-4(%rdi,%rdx),%r8d
75*5b3646a9SMatthew Dillon 	movl	-4(%rsi,%rdx),%r9d
76*5b3646a9SMatthew Dillon 	cmpl	%r8d,%r9d
77*5b3646a9SMatthew Dillon 	jne	10040804f
78*5b3646a9SMatthew Dillon 	\end
79*5b3646a9SMatthew Dillon 100204:
80*5b3646a9SMatthew Dillon 	cmpb	$2,%dl
81*5b3646a9SMatthew Dillon 	jl	100001f
82*5b3646a9SMatthew Dillon 	movzwl	(%rdi),%r8d
83*5b3646a9SMatthew Dillon 	movzwl	(%rsi),%r9d
84*5b3646a9SMatthew Dillon 	cmpl	%r8d,%r9d
85*5b3646a9SMatthew Dillon 	jne	1f
86*5b3646a9SMatthew Dillon 	movzwl	-2(%rdi,%rdx),%r8d
87*5b3646a9SMatthew Dillon 	movzwl	-2(%rsi,%rdx),%r9d
88*5b3646a9SMatthew Dillon 	cmpl	%r8d,%r9d
89*5b3646a9SMatthew Dillon 	jne	1f
90*5b3646a9SMatthew Dillon 	\end
91*5b3646a9SMatthew Dillon 100001:
92*5b3646a9SMatthew Dillon 	cmpb	$1,%dl
93*5b3646a9SMatthew Dillon 	jl	100000f
94*5b3646a9SMatthew Dillon 	movzbl	(%rdi),%eax
95*5b3646a9SMatthew Dillon 	movzbl	(%rsi),%r8d
96*5b3646a9SMatthew Dillon 	subl	%r8d,%eax
97*5b3646a9SMatthew Dillon 100000:
98*5b3646a9SMatthew Dillon 	\end
99*5b3646a9SMatthew Dillon ALIGN_TEXT
100*5b3646a9SMatthew Dillon 101632:
101*5b3646a9SMatthew Dillon 	cmpq	$32,%rdx
102*5b3646a9SMatthew Dillon 	ja	103200f
103*5b3646a9SMatthew Dillon 	movq	(%rdi),%r8
104*5b3646a9SMatthew Dillon 	movq	(%rsi),%r9
105*5b3646a9SMatthew Dillon 	cmpq	%r8,%r9
106*5b3646a9SMatthew Dillon 	jne	80f
107*5b3646a9SMatthew Dillon 	movq	8(%rdi),%r8
108*5b3646a9SMatthew Dillon 	movq	8(%rsi),%r9
109*5b3646a9SMatthew Dillon 	cmpq	%r8,%r9
110*5b3646a9SMatthew Dillon 	jne	10163208f
111*5b3646a9SMatthew Dillon 	movq	-16(%rdi,%rdx),%r8
112*5b3646a9SMatthew Dillon 	movq	-16(%rsi,%rdx),%r9
113*5b3646a9SMatthew Dillon 	cmpq	%r8,%r9
114*5b3646a9SMatthew Dillon 	jne	10163216f
115*5b3646a9SMatthew Dillon 	movq	-8(%rdi,%rdx),%r8
116*5b3646a9SMatthew Dillon 	movq	-8(%rsi,%rdx),%r9
117*5b3646a9SMatthew Dillon 	cmpq	%r8,%r9
118*5b3646a9SMatthew Dillon 	jne	10163224f
119*5b3646a9SMatthew Dillon 	\end
120*5b3646a9SMatthew Dillon ALIGN_TEXT
121*5b3646a9SMatthew Dillon 103200:
122*5b3646a9SMatthew Dillon 	movq	(%rdi),%r8
123*5b3646a9SMatthew Dillon 	movq	8(%rdi),%r9
124*5b3646a9SMatthew Dillon 	subq	(%rsi),%r8
125*5b3646a9SMatthew Dillon 	subq	8(%rsi),%r9
126*5b3646a9SMatthew Dillon 	orq	%r8,%r9
127*5b3646a9SMatthew Dillon 	jnz	10320000f
128*5b3646a9SMatthew Dillon 
129*5b3646a9SMatthew Dillon 	movq    16(%rdi),%r8
130*5b3646a9SMatthew Dillon 	movq    24(%rdi),%r9
131*5b3646a9SMatthew Dillon 	subq    16(%rsi),%r8
132*5b3646a9SMatthew Dillon 	subq    24(%rsi),%r9
133*5b3646a9SMatthew Dillon 	orq	%r8,%r9
134*5b3646a9SMatthew Dillon 	jnz     10320016f
135*5b3646a9SMatthew Dillon 
136*5b3646a9SMatthew Dillon 	leaq	32(%rdi),%rdi
137*5b3646a9SMatthew Dillon 	leaq	32(%rsi),%rsi
138*5b3646a9SMatthew Dillon 	subq	$32,%rdx
139*5b3646a9SMatthew Dillon 	cmpq	$32,%rdx
140*5b3646a9SMatthew Dillon 	jae	103200b
141*5b3646a9SMatthew Dillon 	cmpb	$0,%dl
142*5b3646a9SMatthew Dillon 	jne	10b
143*5b3646a9SMatthew Dillon 	\end
144*5b3646a9SMatthew Dillon 
145*5b3646a9SMatthew Dillon /*
146*5b3646a9SMatthew Dillon  * Mismatch was found.
147*5b3646a9SMatthew Dillon  *
148*5b3646a9SMatthew Dillon  * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
149*5b3646a9SMatthew Dillon  */
150*5b3646a9SMatthew Dillon ALIGN_TEXT
151*5b3646a9SMatthew Dillon 10320016:
152*5b3646a9SMatthew Dillon 	leaq	16(%rdi),%rdi
153*5b3646a9SMatthew Dillon 	leaq	16(%rsi),%rsi
154*5b3646a9SMatthew Dillon 10320000:
155*5b3646a9SMatthew Dillon 	movq	(%rdi),%r8
156*5b3646a9SMatthew Dillon 	movq	(%rsi),%r9
157*5b3646a9SMatthew Dillon 	cmpq	%r8,%r9
158*5b3646a9SMatthew Dillon 	jne	80f
159*5b3646a9SMatthew Dillon 	leaq	8(%rdi),%rdi
160*5b3646a9SMatthew Dillon 	leaq	8(%rsi),%rsi
161*5b3646a9SMatthew Dillon 	jmp	80f
162*5b3646a9SMatthew Dillon ALIGN_TEXT
163*5b3646a9SMatthew Dillon 10081608:
164*5b3646a9SMatthew Dillon 10163224:
165*5b3646a9SMatthew Dillon 	leaq	-8(%rdi,%rdx),%rdi
166*5b3646a9SMatthew Dillon 	leaq	-8(%rsi,%rdx),%rsi
167*5b3646a9SMatthew Dillon 	jmp	80f
168*5b3646a9SMatthew Dillon ALIGN_TEXT
169*5b3646a9SMatthew Dillon 10163216:
170*5b3646a9SMatthew Dillon 	leaq	-16(%rdi,%rdx),%rdi
171*5b3646a9SMatthew Dillon 	leaq	-16(%rsi,%rdx),%rsi
172*5b3646a9SMatthew Dillon 	jmp	80f
173*5b3646a9SMatthew Dillon ALIGN_TEXT
174*5b3646a9SMatthew Dillon 10163208:
175*5b3646a9SMatthew Dillon 	leaq	8(%rdi),%rdi
176*5b3646a9SMatthew Dillon 	leaq	8(%rsi),%rsi
177*5b3646a9SMatthew Dillon 	jmp	80f
178*5b3646a9SMatthew Dillon ALIGN_TEXT
179*5b3646a9SMatthew Dillon 10040804:
180*5b3646a9SMatthew Dillon 	leaq	-4(%rdi,%rdx),%rdi
181*5b3646a9SMatthew Dillon 	leaq	-4(%rsi,%rdx),%rsi
182*5b3646a9SMatthew Dillon 	jmp	1f
183*5b3646a9SMatthew Dillon 
184*5b3646a9SMatthew Dillon ALIGN_TEXT
185*5b3646a9SMatthew Dillon 80:
186*5b3646a9SMatthew Dillon 	movl	(%rdi),%r8d
187*5b3646a9SMatthew Dillon 	movl	(%rsi),%r9d
188*5b3646a9SMatthew Dillon 	cmpl	%r8d,%r9d
189*5b3646a9SMatthew Dillon 	jne	1f
190*5b3646a9SMatthew Dillon 	leaq	4(%rdi),%rdi
191*5b3646a9SMatthew Dillon 	leaq	4(%rsi),%rsi
192*5b3646a9SMatthew Dillon 
193*5b3646a9SMatthew Dillon /*
194*5b3646a9SMatthew Dillon  * We have up to 4 bytes to inspect.
195*5b3646a9SMatthew Dillon  */
196*5b3646a9SMatthew Dillon 1:
197*5b3646a9SMatthew Dillon 	movzbl	(%rdi),%eax
198*5b3646a9SMatthew Dillon 	movzbl	(%rsi),%r8d
199*5b3646a9SMatthew Dillon 	cmpb	%r8b,%al
200*5b3646a9SMatthew Dillon 	jne	2f
201*5b3646a9SMatthew Dillon 
202*5b3646a9SMatthew Dillon 	movzbl	1(%rdi),%eax
203*5b3646a9SMatthew Dillon 	movzbl	1(%rsi),%r8d
204*5b3646a9SMatthew Dillon 	cmpb	%r8b,%al
205*5b3646a9SMatthew Dillon 	jne	2f
206*5b3646a9SMatthew Dillon 
207*5b3646a9SMatthew Dillon 	movzbl	2(%rdi),%eax
208*5b3646a9SMatthew Dillon 	movzbl	2(%rsi),%r8d
209*5b3646a9SMatthew Dillon 	cmpb	%r8b,%al
210*5b3646a9SMatthew Dillon 	jne	2f
211*5b3646a9SMatthew Dillon 
212*5b3646a9SMatthew Dillon 	movzbl	3(%rdi),%eax
213*5b3646a9SMatthew Dillon 	movzbl	3(%rsi),%r8d
214*5b3646a9SMatthew Dillon 2:
215*5b3646a9SMatthew Dillon 	subl	%r8d,%eax
216*5b3646a9SMatthew Dillon 	\end
217*5b3646a9SMatthew Dillon .endm
218*5b3646a9SMatthew Dillon 
219*5b3646a9SMatthew Dillon /*
220*5b3646a9SMatthew Dillon  * memmove(dst, src, cnt)
221*5b3646a9SMatthew Dillon  *         rdi, rsi, rdx
222*5b3646a9SMatthew Dillon  */
223*5b3646a9SMatthew Dillon 
224*5b3646a9SMatthew Dillon /*
225*5b3646a9SMatthew Dillon  * Register state at entry is supposed to be as follows:
226*5b3646a9SMatthew Dillon  * rdi - destination
227*5b3646a9SMatthew Dillon  * rsi - source
228*5b3646a9SMatthew Dillon  * rcx - count
229*5b3646a9SMatthew Dillon  *
230*5b3646a9SMatthew Dillon  * The macro possibly clobbers the above and: rcx, r8, r9, r10
231*5b3646a9SMatthew Dillon  * It does not clobber rax nor r11.
232*5b3646a9SMatthew Dillon  */
233*5b3646a9SMatthew Dillon .macro MEMMOVE erms overlap end
234*5b3646a9SMatthew Dillon 	/*
235*5b3646a9SMatthew Dillon 	 * For sizes 0..32 all data is read before it is written, so there
236*5b3646a9SMatthew Dillon 	 * is no correctness issue with direction of copying.
237*5b3646a9SMatthew Dillon 	 */
238*5b3646a9SMatthew Dillon 	movq	%rdx,%rcx
239*5b3646a9SMatthew Dillon 	cmpq	$32,%rdx
240*5b3646a9SMatthew Dillon 	jbe	101632f
241*5b3646a9SMatthew Dillon 
242*5b3646a9SMatthew Dillon .if \overlap == 1
243*5b3646a9SMatthew Dillon 	movq	%rdi,%r8
244*5b3646a9SMatthew Dillon 	subq	%rsi,%r8
245*5b3646a9SMatthew Dillon 	cmpq	%rcx,%r8	/* overlapping && src < dst? */
246*5b3646a9SMatthew Dillon 	jb	2f
247*5b3646a9SMatthew Dillon .endif
248*5b3646a9SMatthew Dillon 
249*5b3646a9SMatthew Dillon 	/*
250*5b3646a9SMatthew Dillon 	 * AMD's movsq gets better at around 1024 bytes, Intel's gets
251*5b3646a9SMatthew Dillon 	 * better at around 256 bytes (Zen 2, 9900K era)
252*5b3646a9SMatthew Dillon 	 */
253*5b3646a9SMatthew Dillon 	cmpq	$1024,%rcx
254*5b3646a9SMatthew Dillon 	ja	1256f
255*5b3646a9SMatthew Dillon 
256*5b3646a9SMatthew Dillon 103200:
257*5b3646a9SMatthew Dillon 	movq	(%rsi),%rdx
258*5b3646a9SMatthew Dillon 	movq	%rdx,(%rdi)
259*5b3646a9SMatthew Dillon 	movq	8(%rsi),%rdx
260*5b3646a9SMatthew Dillon 	movq	%rdx,8(%rdi)
261*5b3646a9SMatthew Dillon 	movq	16(%rsi),%rdx
262*5b3646a9SMatthew Dillon 	movq	%rdx,16(%rdi)
263*5b3646a9SMatthew Dillon 	movq	24(%rsi),%rdx
264*5b3646a9SMatthew Dillon 	movq	%rdx,24(%rdi)
265*5b3646a9SMatthew Dillon 	leaq	32(%rsi),%rsi
266*5b3646a9SMatthew Dillon 	leaq	32(%rdi),%rdi
267*5b3646a9SMatthew Dillon 	subq	$32,%rcx
268*5b3646a9SMatthew Dillon 	cmpq	$32,%rcx
269*5b3646a9SMatthew Dillon 	jae	103200b
270*5b3646a9SMatthew Dillon 	cmpb	$0,%cl
271*5b3646a9SMatthew Dillon 	jne	101632f
272*5b3646a9SMatthew Dillon 	\end
273*5b3646a9SMatthew Dillon 	ALIGN_TEXT
274*5b3646a9SMatthew Dillon 101632:
275*5b3646a9SMatthew Dillon 	cmpb	$16,%cl
276*5b3646a9SMatthew Dillon 	jl	100816f
277*5b3646a9SMatthew Dillon 	movq	(%rsi),%rdx
278*5b3646a9SMatthew Dillon 	movq	8(%rsi),%r8
279*5b3646a9SMatthew Dillon 	movq	-16(%rsi,%rcx),%r9
280*5b3646a9SMatthew Dillon 	movq	-8(%rsi,%rcx),%r10
281*5b3646a9SMatthew Dillon 	movq	%rdx,(%rdi)
282*5b3646a9SMatthew Dillon 	movq	%r8,8(%rdi)
283*5b3646a9SMatthew Dillon 	movq	%r9,-16(%rdi,%rcx)
284*5b3646a9SMatthew Dillon 	movq	%r10,-8(%rdi,%rcx)
285*5b3646a9SMatthew Dillon 	\end
286*5b3646a9SMatthew Dillon 	ALIGN_TEXT
287*5b3646a9SMatthew Dillon 100816:
288*5b3646a9SMatthew Dillon 	cmpb	$8,%cl
289*5b3646a9SMatthew Dillon 	jl	100408f
290*5b3646a9SMatthew Dillon 	movq	(%rsi),%rdx
291*5b3646a9SMatthew Dillon 	movq	-8(%rsi,%rcx),%r8
292*5b3646a9SMatthew Dillon 	movq	%rdx,(%rdi)
293*5b3646a9SMatthew Dillon 	movq	%r8,-8(%rdi,%rcx,)
294*5b3646a9SMatthew Dillon 	\end
295*5b3646a9SMatthew Dillon 	ALIGN_TEXT
296*5b3646a9SMatthew Dillon 100408:
297*5b3646a9SMatthew Dillon 	cmpb	$4,%cl
298*5b3646a9SMatthew Dillon 	jl	100204f
299*5b3646a9SMatthew Dillon 	movl	(%rsi),%edx
300*5b3646a9SMatthew Dillon 	movl	-4(%rsi,%rcx),%r8d
301*5b3646a9SMatthew Dillon 	movl	%edx,(%rdi)
302*5b3646a9SMatthew Dillon 	movl	%r8d,-4(%rdi,%rcx)
303*5b3646a9SMatthew Dillon 	\end
304*5b3646a9SMatthew Dillon 	ALIGN_TEXT
305*5b3646a9SMatthew Dillon 100204:
306*5b3646a9SMatthew Dillon 	cmpb	$2,%cl
307*5b3646a9SMatthew Dillon 	jl	100001f
308*5b3646a9SMatthew Dillon 	movzwl	(%rsi),%edx
309*5b3646a9SMatthew Dillon 	movzwl	-2(%rsi,%rcx),%r8d
310*5b3646a9SMatthew Dillon 	movw	%dx,(%rdi)
311*5b3646a9SMatthew Dillon 	movw	%r8w,-2(%rdi,%rcx)
312*5b3646a9SMatthew Dillon 	\end
313*5b3646a9SMatthew Dillon 	ALIGN_TEXT
314*5b3646a9SMatthew Dillon 100001:
315*5b3646a9SMatthew Dillon 	cmpb	$1,%cl
316*5b3646a9SMatthew Dillon 	jl	100000f
317*5b3646a9SMatthew Dillon 	movb	(%rsi),%dl
318*5b3646a9SMatthew Dillon 	movb	%dl,(%rdi)
319*5b3646a9SMatthew Dillon 100000:
320*5b3646a9SMatthew Dillon 	\end
321*5b3646a9SMatthew Dillon 
322*5b3646a9SMatthew Dillon 	/*
323*5b3646a9SMatthew Dillon 	 * 256 or more bytes
324*5b3646a9SMatthew Dillon 	 */
325*5b3646a9SMatthew Dillon 	ALIGN_TEXT
326*5b3646a9SMatthew Dillon 1256:
327*5b3646a9SMatthew Dillon 	testb	$15,%dil
328*5b3646a9SMatthew Dillon 	jnz	100f
329*5b3646a9SMatthew Dillon .if \erms == 1
330*5b3646a9SMatthew Dillon 	rep
331*5b3646a9SMatthew Dillon 	movsb
332*5b3646a9SMatthew Dillon .else
333*5b3646a9SMatthew Dillon 	shrq	$3,%rcx                         /* copy by 64-bit words */
334*5b3646a9SMatthew Dillon 	rep
335*5b3646a9SMatthew Dillon 	movsq
336*5b3646a9SMatthew Dillon 	movq	%rdx,%rcx
337*5b3646a9SMatthew Dillon 	andl	$7,%ecx                         /* any bytes left? */
338*5b3646a9SMatthew Dillon 	jne	100408b
339*5b3646a9SMatthew Dillon .endif
340*5b3646a9SMatthew Dillon 	\end
341*5b3646a9SMatthew Dillon 100:
342*5b3646a9SMatthew Dillon 	movq	(%rsi),%r8
343*5b3646a9SMatthew Dillon 	movq	8(%rsi),%r9
344*5b3646a9SMatthew Dillon 	movq	%rdi,%r10
345*5b3646a9SMatthew Dillon 	movq	%rdi,%rcx
346*5b3646a9SMatthew Dillon 	andq	$15,%rcx
347*5b3646a9SMatthew Dillon 	leaq	-16(%rdx,%rcx),%rdx
348*5b3646a9SMatthew Dillon 	neg	%rcx
349*5b3646a9SMatthew Dillon 	leaq	16(%rdi,%rcx),%rdi
350*5b3646a9SMatthew Dillon 	leaq	16(%rsi,%rcx),%rsi
351*5b3646a9SMatthew Dillon 	movq	%rdx,%rcx
352*5b3646a9SMatthew Dillon .if \erms == 1
353*5b3646a9SMatthew Dillon 	rep
354*5b3646a9SMatthew Dillon 	movsb
355*5b3646a9SMatthew Dillon 	movq	%r8,(%r10)
356*5b3646a9SMatthew Dillon 	movq	%r9,8(%r10)
357*5b3646a9SMatthew Dillon .else
358*5b3646a9SMatthew Dillon 	shrq	$3,%rcx                         /* copy by 64-bit words */
359*5b3646a9SMatthew Dillon 	rep
360*5b3646a9SMatthew Dillon 	movsq
361*5b3646a9SMatthew Dillon 	movq	%r8,(%r10)
362*5b3646a9SMatthew Dillon 	movq	%r9,8(%r10)
363*5b3646a9SMatthew Dillon 	movq	%rdx,%rcx
364*5b3646a9SMatthew Dillon 	andl	$7,%ecx                         /* any bytes left? */
365*5b3646a9SMatthew Dillon 	jne	100408b
366*5b3646a9SMatthew Dillon .endif
367*5b3646a9SMatthew Dillon 	\end
368*5b3646a9SMatthew Dillon 
369*5b3646a9SMatthew Dillon .if \overlap == 1
370*5b3646a9SMatthew Dillon 	/*
371*5b3646a9SMatthew Dillon 	 * Copy backwards.
372*5b3646a9SMatthew Dillon 	 */
373*5b3646a9SMatthew Dillon         ALIGN_TEXT
374*5b3646a9SMatthew Dillon 2:
375*5b3646a9SMatthew Dillon 	cmpq	$256,%rcx
376*5b3646a9SMatthew Dillon 	ja	2256f
377*5b3646a9SMatthew Dillon 
378*5b3646a9SMatthew Dillon 	leaq	-8(%rdi,%rcx),%rdi
379*5b3646a9SMatthew Dillon 	leaq	-8(%rsi,%rcx),%rsi
380*5b3646a9SMatthew Dillon 
381*5b3646a9SMatthew Dillon 	cmpq	$32,%rcx
382*5b3646a9SMatthew Dillon 	jb	2016f
383*5b3646a9SMatthew Dillon 
384*5b3646a9SMatthew Dillon 2032:
385*5b3646a9SMatthew Dillon 	movq	(%rsi),%rdx
386*5b3646a9SMatthew Dillon 	movq	%rdx,(%rdi)
387*5b3646a9SMatthew Dillon 	movq	-8(%rsi),%rdx
388*5b3646a9SMatthew Dillon 	movq	%rdx,-8(%rdi)
389*5b3646a9SMatthew Dillon 	movq	-16(%rsi),%rdx
390*5b3646a9SMatthew Dillon 	movq	%rdx,-16(%rdi)
391*5b3646a9SMatthew Dillon 	movq	-24(%rsi),%rdx
392*5b3646a9SMatthew Dillon 	movq	%rdx,-24(%rdi)
393*5b3646a9SMatthew Dillon 	leaq	-32(%rsi),%rsi
394*5b3646a9SMatthew Dillon 	leaq	-32(%rdi),%rdi
395*5b3646a9SMatthew Dillon 	subq	$32,%rcx
396*5b3646a9SMatthew Dillon 	cmpq	$32,%rcx
397*5b3646a9SMatthew Dillon 	jae	2032b
398*5b3646a9SMatthew Dillon 	cmpb	$0,%cl
399*5b3646a9SMatthew Dillon 	jne	2016f
400*5b3646a9SMatthew Dillon 	\end
401*5b3646a9SMatthew Dillon 	ALIGN_TEXT
402*5b3646a9SMatthew Dillon 2016:
403*5b3646a9SMatthew Dillon 	cmpb	$16,%cl
404*5b3646a9SMatthew Dillon 	jl	2008f
405*5b3646a9SMatthew Dillon 	movq	(%rsi),%rdx
406*5b3646a9SMatthew Dillon 	movq	%rdx,(%rdi)
407*5b3646a9SMatthew Dillon 	movq	-8(%rsi),%rdx
408*5b3646a9SMatthew Dillon 	movq	%rdx,-8(%rdi)
409*5b3646a9SMatthew Dillon 	subb	$16,%cl
410*5b3646a9SMatthew Dillon 	jz	2000f
411*5b3646a9SMatthew Dillon 	leaq	-16(%rsi),%rsi
412*5b3646a9SMatthew Dillon 	leaq	-16(%rdi),%rdi
413*5b3646a9SMatthew Dillon 2008:
414*5b3646a9SMatthew Dillon 	cmpb	$8,%cl
415*5b3646a9SMatthew Dillon 	jl	2004f
416*5b3646a9SMatthew Dillon 	movq	(%rsi),%rdx
417*5b3646a9SMatthew Dillon 	movq	%rdx,(%rdi)
418*5b3646a9SMatthew Dillon 	subb	$8,%cl
419*5b3646a9SMatthew Dillon 	jz	2000f
420*5b3646a9SMatthew Dillon 	leaq	-8(%rsi),%rsi
421*5b3646a9SMatthew Dillon 	leaq	-8(%rdi),%rdi
422*5b3646a9SMatthew Dillon 2004:
423*5b3646a9SMatthew Dillon 	cmpb	$4,%cl
424*5b3646a9SMatthew Dillon 	jl	2002f
425*5b3646a9SMatthew Dillon 	movl	4(%rsi),%edx
426*5b3646a9SMatthew Dillon 	movl	%edx,4(%rdi)
427*5b3646a9SMatthew Dillon 	subb	$4,%cl
428*5b3646a9SMatthew Dillon 	jz	2000f
429*5b3646a9SMatthew Dillon 	leaq	-4(%rsi),%rsi
430*5b3646a9SMatthew Dillon 	leaq	-4(%rdi),%rdi
431*5b3646a9SMatthew Dillon 2002:
432*5b3646a9SMatthew Dillon 	cmpb	$2,%cl
433*5b3646a9SMatthew Dillon 	jl	2001f
434*5b3646a9SMatthew Dillon 	movw	6(%rsi),%dx
435*5b3646a9SMatthew Dillon 	movw	%dx,6(%rdi)
436*5b3646a9SMatthew Dillon 	subb	$2,%cl
437*5b3646a9SMatthew Dillon 	jz	2000f
438*5b3646a9SMatthew Dillon 	leaq	-2(%rsi),%rsi
439*5b3646a9SMatthew Dillon 	leaq	-2(%rdi),%rdi
440*5b3646a9SMatthew Dillon 2001:
441*5b3646a9SMatthew Dillon 	cmpb	$1,%cl
442*5b3646a9SMatthew Dillon 	jl	2000f
443*5b3646a9SMatthew Dillon 	movb	7(%rsi),%dl
444*5b3646a9SMatthew Dillon 	movb	%dl,7(%rdi)
445*5b3646a9SMatthew Dillon 2000:
446*5b3646a9SMatthew Dillon 	\end
447*5b3646a9SMatthew Dillon 	ALIGN_TEXT
448*5b3646a9SMatthew Dillon 2256:
449*5b3646a9SMatthew Dillon 	std
450*5b3646a9SMatthew Dillon .if \erms == 1
451*5b3646a9SMatthew Dillon 	leaq	-1(%rdi,%rcx),%rdi
452*5b3646a9SMatthew Dillon 	leaq	-1(%rsi,%rcx),%rsi
453*5b3646a9SMatthew Dillon 	rep
454*5b3646a9SMatthew Dillon 	movsb
455*5b3646a9SMatthew Dillon 	cld
456*5b3646a9SMatthew Dillon .else
457*5b3646a9SMatthew Dillon 	leaq	-8(%rdi,%rcx),%rdi
458*5b3646a9SMatthew Dillon 	leaq	-8(%rsi,%rcx),%rsi
459*5b3646a9SMatthew Dillon 	shrq	$3,%rcx
460*5b3646a9SMatthew Dillon 	rep
461*5b3646a9SMatthew Dillon 	movsq
462*5b3646a9SMatthew Dillon 	cld
463*5b3646a9SMatthew Dillon 	movq	%rdx,%rcx
464*5b3646a9SMatthew Dillon 	andb	$7,%cl
465*5b3646a9SMatthew Dillon 	jne	2004b
466*5b3646a9SMatthew Dillon .endif
467*5b3646a9SMatthew Dillon 	\end
468*5b3646a9SMatthew Dillon .endif
469*5b3646a9SMatthew Dillon .endm
470*5b3646a9SMatthew Dillon 
471*5b3646a9SMatthew Dillon /*
472*5b3646a9SMatthew Dillon  * memset(dst, c,   len)
473*5b3646a9SMatthew Dillon  *        rdi, r10, rdx
474*5b3646a9SMatthew Dillon  */
475*5b3646a9SMatthew Dillon .macro MEMSET erms end
476*5b3646a9SMatthew Dillon 	movq	%rdi,%rax
477*5b3646a9SMatthew Dillon 	movq	%rdx,%rcx
478*5b3646a9SMatthew Dillon 
479*5b3646a9SMatthew Dillon 	cmpq	$32,%rcx
480*5b3646a9SMatthew Dillon 	jbe	101632f
481*5b3646a9SMatthew Dillon 
482*5b3646a9SMatthew Dillon 	cmpq	$256,%rcx
483*5b3646a9SMatthew Dillon 	ja	1256f
484*5b3646a9SMatthew Dillon 
485*5b3646a9SMatthew Dillon 103200:
486*5b3646a9SMatthew Dillon 	movq	%r10,(%rdi)
487*5b3646a9SMatthew Dillon 	movq	%r10,8(%rdi)
488*5b3646a9SMatthew Dillon 	movq	%r10,16(%rdi)
489*5b3646a9SMatthew Dillon 	movq	%r10,24(%rdi)
490*5b3646a9SMatthew Dillon 	leaq	32(%rdi),%rdi
491*5b3646a9SMatthew Dillon 	subq	$32,%rcx
492*5b3646a9SMatthew Dillon 	cmpq	$32,%rcx
493*5b3646a9SMatthew Dillon 	ja	103200b
494*5b3646a9SMatthew Dillon 	cmpb	$16,%cl
495*5b3646a9SMatthew Dillon 	ja	201632f
496*5b3646a9SMatthew Dillon 	movq	%r10,-16(%rdi,%rcx)
497*5b3646a9SMatthew Dillon 	movq	%r10,-8(%rdi,%rcx)
498*5b3646a9SMatthew Dillon 	\end
499*5b3646a9SMatthew Dillon 	ALIGN_TEXT
500*5b3646a9SMatthew Dillon 101632:
501*5b3646a9SMatthew Dillon 	cmpb	$16,%cl
502*5b3646a9SMatthew Dillon 	jl	100816f
503*5b3646a9SMatthew Dillon 201632:
504*5b3646a9SMatthew Dillon 	movq	%r10,(%rdi)
505*5b3646a9SMatthew Dillon 	movq	%r10,8(%rdi)
506*5b3646a9SMatthew Dillon 	movq	%r10,-16(%rdi,%rcx)
507*5b3646a9SMatthew Dillon 	movq	%r10,-8(%rdi,%rcx)
508*5b3646a9SMatthew Dillon 	\end
509*5b3646a9SMatthew Dillon 	ALIGN_TEXT
510*5b3646a9SMatthew Dillon 100816:
511*5b3646a9SMatthew Dillon 	cmpb	$8,%cl
512*5b3646a9SMatthew Dillon 	jl	100408f
513*5b3646a9SMatthew Dillon 	movq	%r10,(%rdi)
514*5b3646a9SMatthew Dillon 	movq	%r10,-8(%rdi,%rcx)
515*5b3646a9SMatthew Dillon 	\end
516*5b3646a9SMatthew Dillon 	ALIGN_TEXT
517*5b3646a9SMatthew Dillon 100408:
518*5b3646a9SMatthew Dillon 	cmpb	$4,%cl
519*5b3646a9SMatthew Dillon 	jl	100204f
520*5b3646a9SMatthew Dillon 	movl	%r10d,(%rdi)
521*5b3646a9SMatthew Dillon 	movl	%r10d,-4(%rdi,%rcx)
522*5b3646a9SMatthew Dillon 	\end
523*5b3646a9SMatthew Dillon 	ALIGN_TEXT
524*5b3646a9SMatthew Dillon 100204:
525*5b3646a9SMatthew Dillon 	cmpb	$2,%cl
526*5b3646a9SMatthew Dillon 	jl	100001f
527*5b3646a9SMatthew Dillon 	movw	%r10w,(%rdi)
528*5b3646a9SMatthew Dillon 	movw	%r10w,-2(%rdi,%rcx)
529*5b3646a9SMatthew Dillon 	\end
530*5b3646a9SMatthew Dillon 	ALIGN_TEXT
531*5b3646a9SMatthew Dillon 100001:
532*5b3646a9SMatthew Dillon 	cmpb	$0,%cl
533*5b3646a9SMatthew Dillon 	je	100000f
534*5b3646a9SMatthew Dillon 	movb	%r10b,(%rdi)
535*5b3646a9SMatthew Dillon 100000:
536*5b3646a9SMatthew Dillon 	\end
537*5b3646a9SMatthew Dillon 	ALIGN_TEXT
538*5b3646a9SMatthew Dillon 1256:
539*5b3646a9SMatthew Dillon 	movq	%rdi,%r9
540*5b3646a9SMatthew Dillon 	movq	%r10,%rax
541*5b3646a9SMatthew Dillon 	testl	$15,%edi
542*5b3646a9SMatthew Dillon 	jnz	3f
543*5b3646a9SMatthew Dillon 1:
544*5b3646a9SMatthew Dillon .if \erms == 1
545*5b3646a9SMatthew Dillon 	rep
546*5b3646a9SMatthew Dillon 	stosb
547*5b3646a9SMatthew Dillon 	movq	%r9,%rax
548*5b3646a9SMatthew Dillon .else
549*5b3646a9SMatthew Dillon 	movq	%rcx,%rdx
550*5b3646a9SMatthew Dillon 	shrq	$3,%rcx
551*5b3646a9SMatthew Dillon 	rep
552*5b3646a9SMatthew Dillon 	stosq
553*5b3646a9SMatthew Dillon 	movq	%r9,%rax
554*5b3646a9SMatthew Dillon 	andl	$7,%edx
555*5b3646a9SMatthew Dillon 	jnz	2f
556*5b3646a9SMatthew Dillon 	\end
557*5b3646a9SMatthew Dillon 2:
558*5b3646a9SMatthew Dillon 	movq	%r10,-8(%rdi,%rdx)
559*5b3646a9SMatthew Dillon .endif
560*5b3646a9SMatthew Dillon 	\end
561*5b3646a9SMatthew Dillon 	ALIGN_TEXT
562*5b3646a9SMatthew Dillon 3:
563*5b3646a9SMatthew Dillon 	movq	%r10,(%rdi)
564*5b3646a9SMatthew Dillon 	movq	%r10,8(%rdi)
565*5b3646a9SMatthew Dillon 	movq	%rdi,%r8
566*5b3646a9SMatthew Dillon 	andq	$15,%r8
567*5b3646a9SMatthew Dillon 	leaq	-16(%rcx,%r8),%rcx
568*5b3646a9SMatthew Dillon 	neg	%r8
569*5b3646a9SMatthew Dillon 	leaq	16(%rdi,%r8),%rdi
570*5b3646a9SMatthew Dillon 	jmp	1b
571*5b3646a9SMatthew Dillon .endm
572*5b3646a9SMatthew Dillon 
573*5b3646a9SMatthew Dillon .macro DUMMYARG
574*5b3646a9SMatthew Dillon .endm
575