xref: /minix3/common/lib/libc/arch/aarch64/string/memcmp.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc/* $NetBSD: memcmp.S,v 1.1 2014/08/10 05:47:35 matt Exp $ */
2*0a6a1f1dSLionel Sambuc
3*0a6a1f1dSLionel Sambuc/*-
4*0a6a1f1dSLionel Sambuc * Copyright (c) 2014 The NetBSD Foundation, Inc.
5*0a6a1f1dSLionel Sambuc * All rights reserved.
6*0a6a1f1dSLionel Sambuc *
7*0a6a1f1dSLionel Sambuc * This code is derived from software contributed to The NetBSD Foundation
8*0a6a1f1dSLionel Sambuc * by Matt Thomas of 3am Software Foundry.
9*0a6a1f1dSLionel Sambuc *
10*0a6a1f1dSLionel Sambuc * Redistribution and use in source and binary forms, with or without
11*0a6a1f1dSLionel Sambuc * modification, are permitted provided that the following conditions
12*0a6a1f1dSLionel Sambuc * are met:
13*0a6a1f1dSLionel Sambuc * 1. Redistributions of source code must retain the above copyright
14*0a6a1f1dSLionel Sambuc *    notice, this list of conditions and the following disclaimer.
15*0a6a1f1dSLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
16*0a6a1f1dSLionel Sambuc *    notice, this list of conditions and the following disclaimer in the
17*0a6a1f1dSLionel Sambuc *    documentation and/or other materials provided with the distribution.
18*0a6a1f1dSLionel Sambuc *
19*0a6a1f1dSLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*0a6a1f1dSLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*0a6a1f1dSLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*0a6a1f1dSLionel Sambuc * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*0a6a1f1dSLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*0a6a1f1dSLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*0a6a1f1dSLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*0a6a1f1dSLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*0a6a1f1dSLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*0a6a1f1dSLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*0a6a1f1dSLionel Sambuc * POSSIBILITY OF SUCH DAMAGE.
30*0a6a1f1dSLionel Sambuc */
31*0a6a1f1dSLionel Sambuc
32*0a6a1f1dSLionel Sambuc#include <machine/asm.h>
33*0a6a1f1dSLionel Sambuc
34*0a6a1f1dSLionel SambucRCSID("$NetBSD: memcmp.S,v 1.1 2014/08/10 05:47:35 matt Exp $")
35*0a6a1f1dSLionel Sambuc
36*0a6a1f1dSLionel SambucENTRY(memcmp)
37*0a6a1f1dSLionel Sambuc	mov	x9, x0
38*0a6a1f1dSLionel Sambuc	mov	x10, x1
39*0a6a1f1dSLionel Sambuc	mov	x0, xzr
40*0a6a1f1dSLionel Sambuc	cbz	x2, .Lmemcmp_ret
41*0a6a1f1dSLionel Sambuc#ifdef _KERNEL
42*0a6a1f1dSLionel Sambuc	cmp	x2, #6
43*0a6a1f1dSLionel Sambuc	b.eq	.Lmemcmp_6bytes
44*0a6a1f1dSLionel Sambuc#endif
45*0a6a1f1dSLionel Sambuc	cmp	x2, #7
46*0a6a1f1dSLionel Sambuc	b.ls	.Lmemcmp_lessthan8
47*0a6a1f1dSLionel Sambuc
48*0a6a1f1dSLionel Sambuc	ands	x3, x9, #7
49*0a6a1f1dSLionel Sambuc	b.eq	.Lmemcmp_dword_loop
50*0a6a1f1dSLionel Sambuc
51*0a6a1f1dSLionel Sambuc/*
52*0a6a1f1dSLionel Sambuc * The two addresses have identical alignment but are not yet dword aligned.
53*0a6a1f1dSLionel Sambuc */
54*0a6a1f1dSLionel Sambuc	add	x2, x2, x3		/* add unalignment to length */
55*0a6a1f1dSLionel Sambuc	sub	x2, x2, #8		/* now subtract a dword */
56*0a6a1f1dSLionel Sambuc
57*0a6a1f1dSLionel Sambuc	sub	x9, x9, x3		/* dword align src1 */
58*0a6a1f1dSLionel Sambuc	sub	x10, x10, x3		/* adjust src2 */
59*0a6a1f1dSLionel Sambuc
60*0a6a1f1dSLionel Sambuc	lsl	x3, x3, #3		/* convert bytes to bits */
61*0a6a1f1dSLionel Sambuc	ldr	x4, [x9], #8		/* load dword from src1 */
62*0a6a1f1dSLionel Sambuc	ldr	x6, [x10], #8		/* load dword from src2 */
63*0a6a1f1dSLionel Sambuc#ifdef __AARCH64EB__
64*0a6a1f1dSLionel Sambuc	lsl	x4, x4, x3		/* discard leading bytes from data1 */
65*0a6a1f1dSLionel Sambuc	lsl	x6, x6, x3		/* discard leading bytes from data2 */
66*0a6a1f1dSLionel Sambuc#else
67*0a6a1f1dSLionel Sambuc	lsr	x4, x4, x3		/* discard leading bytes from data1 */
68*0a6a1f1dSLionel Sambuc	lsr	x6, x6, x3		/* discard leading bytes from data2 */
69*0a6a1f1dSLionel Sambuc#endif
70*0a6a1f1dSLionel Sambuc	subs	x0, x4, x6		/* compare data */
71*0a6a1f1dSLionel Sambuc#ifdef __AARCH64EL__
72*0a6a1f1dSLionel Sambuc	b.ne	.Lmemcmp_last_compare	/* difference.  find it */
73*0a6a1f1dSLionel Sambuc#else
74*0a6a1f1dSLionel Sambuc	b.eq	.Lmemcmp_dword_loop	/* no difference.  go to loop */
75*0a6a1f1dSLionel Sambuc	rev	x4, x4			/* byte swap data1 */
76*0a6a1f1dSLionel Sambuc	rev	x6, x6			/* byte swap data2 */
77*0a6a1f1dSLionel Sambuc	b	.Lmemcmp_last_compare	/* go find the difference. */
78*0a6a1f1dSLionel Sambuc#endif
79*0a6a1f1dSLionel Sambuc
80*0a6a1f1dSLionel Sambuc.Lmemcmp_dword_loop:
81*0a6a1f1dSLionel Sambuc	subs	x2, x2, #8
82*0a6a1f1dSLionel Sambuc	b.mi	.Lmemcmp_finish_dword
83*0a6a1f1dSLionel Sambuc	ldr	x4, [x9], #8
84*0a6a1f1dSLionel Sambuc	ldr	x6, [x10], #8
85*0a6a1f1dSLionel Sambuc	subs	x0, x4, x6
86*0a6a1f1dSLionel Sambuc	b.eq	.Lmemcmp_dword_loop	/* no difference.  go to loop */
87*0a6a1f1dSLionel Sambuc#ifdef __AARCH64EB__
88*0a6a1f1dSLionel Sambuc	rev	x4, x4			/* byte swap data1 */
89*0a6a1f1dSLionel Sambuc	rev	x6, x6			/* byte swap data2 */
90*0a6a1f1dSLionel Sambuc#endif
91*0a6a1f1dSLionel Sambuc	b	.Lmemcmp_last_compare	/* go find the difference. */
92*0a6a1f1dSLionel Sambuc
93*0a6a1f1dSLionel Sambuc.Lmemcmp_finish_dword:
94*0a6a1f1dSLionel Sambuc	/*
95*0a6a1f1dSLionel Sambuc	 * we might have gotten here with nothing left.  If so, just bail.
96*0a6a1f1dSLionel Sambuc	 */
97*0a6a1f1dSLionel Sambuc	tst	x2, #7
98*0a6a1f1dSLionel Sambuc	b.eq	.Lmemcmp_ret
99*0a6a1f1dSLionel Sambuc	/*
100*0a6a1f1dSLionel Sambuc	 *
101*0a6a1f1dSLionel Sambuc	 */
102*0a6a1f1dSLionel Sambuc	tbz	x2, #2, .Lmemcmp_finish_word
103*0a6a1f1dSLionel Sambuc	ldr	w4, [x9], #4
104*0a6a1f1dSLionel Sambuc	ldr	w6, [x10], #4
105*0a6a1f1dSLionel Sambuc#ifdef __AARCH64EB__
106*0a6a1f1dSLionel Sambuc	lsl	x4, x4, #32		/* move to MSW */
107*0a6a1f1dSLionel Sambuc	lsl	x6, x6, #32		/* move to MSW */
108*0a6a1f1dSLionel Sambuc#endif
109*0a6a1f1dSLionel Sambuc
110*0a6a1f1dSLionel Sambuc.Lmemcmp_finish_word:
111*0a6a1f1dSLionel Sambuc	tbz	x2, #1, .Lmemcmp_finish_hword
112*0a6a1f1dSLionel Sambuc	ldrh	w5, [x9], #2
113*0a6a1f1dSLionel Sambuc	ldrh	w7, [x10], #2
114*0a6a1f1dSLionel Sambuc#ifdef __AARCH64EB__
115*0a6a1f1dSLionel Sambuc	orr	x4, x4, x5, lsl #16
116*0a6a1f1dSLionel Sambuc	orr	x6, x6, x7, lsl #16
117*0a6a1f1dSLionel Sambuc#else
118*0a6a1f1dSLionel Sambuc	orr	x4, x4, x5, lsl #32
119*0a6a1f1dSLionel Sambuc	orr	x6, x6, x7, lsl #32
120*0a6a1f1dSLionel Sambuc#endif
121*0a6a1f1dSLionel Sambuc
122*0a6a1f1dSLionel Sambuc.Lmemcmp_finish_hword:
123*0a6a1f1dSLionel Sambuc#ifdef __AARCH64EB__
124*0a6a1f1dSLionel Sambuc	rev	x4, x4			/* byte swap data1 */
125*0a6a1f1dSLionel Sambuc	rev	x6, x6			/* byte swap data1 */
126*0a6a1f1dSLionel Sambuc#endif
127*0a6a1f1dSLionel Sambuc	tbz	x2, #0, .Lmemcmp_last_compare
128*0a6a1f1dSLionel Sambuc	ldrb	w5, [x9]
129*0a6a1f1dSLionel Sambuc	ldrb	w7, [x10]
130*0a6a1f1dSLionel Sambuc	orr	x4, x4, x5, lsl #48
131*0a6a1f1dSLionel Sambuc	orr	x6, x6, x7, lsl #48
132*0a6a1f1dSLionel Sambuc	b	.Lmemcmp_last_compare	/* go find the difference. */
133*0a6a1f1dSLionel Sambuc
134*0a6a1f1dSLionel Sambuc/*
135*0a6a1f1dSLionel Sambuc * D
136*0a6a1f1dSLionel Sambuc */
137*0a6a1f1dSLionel Sambuc.Lmemcmp_lessthan8:
138*0a6a1f1dSLionel Sambuc	sub	x2, x2, #1
139*0a6a1f1dSLionel Sambuc1:	ldrb	w4, [x9], #1
140*0a6a1f1dSLionel Sambuc	ldrb	w5, [x10], #1
141*0a6a1f1dSLionel Sambuc	subs	x2, x2, #1
142*0a6a1f1dSLionel Sambuc	ccmp	x4, x5, #0, cs
143*0a6a1f1dSLionel Sambuc	b.eq	1b
144*0a6a1f1dSLionel Sambuc	sub	x0, x4, x5
145*0a6a1f1dSLionel Sambuc
146*0a6a1f1dSLionel Sambuc.Lmemcmp_ret:
147*0a6a1f1dSLionel Sambuc	ret
148*0a6a1f1dSLionel Sambuc
149*0a6a1f1dSLionel Sambuc#ifdef _KERNEL
150*0a6a1f1dSLionel Sambuc.Lmemcmp_6bytes:
151*0a6a1f1dSLionel Sambuc	ldr	w4, [x9], #4
152*0a6a1f1dSLionel Sambuc	ldrh	w5, [x9]
153*0a6a1f1dSLionel Sambuc#if __AARCH64EB__
154*0a6a1f1dSLionel Sambuc	orr	x4, x4, x5, lsl #48
155*0a6a1f1dSLionel Sambuc	rev	x4, x4
156*0a6a1f1dSLionel Sambuc#else
157*0a6a1f1dSLionel Sambuc	orr	x4, x4, x5, lsl #32
158*0a6a1f1dSLionel Sambuc#endif
159*0a6a1f1dSLionel Sambuc	ldr	w6, [x10], #4
160*0a6a1f1dSLionel Sambuc	ldrh	w7, [x10]
161*0a6a1f1dSLionel Sambuc#if __AARCH64EB__
162*0a6a1f1dSLionel Sambuc	orr	x6, x6, x7, lsl #48
163*0a6a1f1dSLionel Sambuc	rev	x6, x6
164*0a6a1f1dSLionel Sambuc#else
165*0a6a1f1dSLionel Sambuc	orr	x6, x6, x7, lsl #32
166*0a6a1f1dSLionel Sambuc#endif
167*0a6a1f1dSLionel Sambuc#endif /* _KERNEL */
168*0a6a1f1dSLionel Sambuc
169*0a6a1f1dSLionel Sambuc/*
170*0a6a1f1dSLionel Sambuc * We have loaded the final bytes in x4 and x6 in LE format.  Now we have
171*0a6a1f1dSLionel Sambuc * to figure what the difference is (if any).  First we subtract.  Any bytes
172*0a6a1f1dSLionel Sambuc * that are the same will be 0. So to find the first non-zero byte we byterev
173*0a6a1f1dSLionel Sambuc * and then use clz to find that byte.
174*0a6a1f1dSLionel Sambuc * We mask the location to get the start of the byte.  We shift both
175*0a6a1f1dSLionel Sambuc * data dwords left to remove the equal part.  Then we shift right to discard
176*0a6a1f1dSLionel Sambuc * the trailing bytes.  Then we subtract and return.
177*0a6a1f1dSLionel Sambuc */
178*0a6a1f1dSLionel Sambuc	subs	x0, x4, x6
179*0a6a1f1dSLionel Sambuc	b.eq	.Lmemcmp_ret
180*0a6a1f1dSLionel Sambuc.Lmemcmp_last_compare:
181*0a6a1f1dSLionel Sambuc	rev	x1, x0		/* byte reverse */
182*0a6a1f1dSLionel Sambuc	clz	x1, x1		/* find first non-zero byte */
183*0a6a1f1dSLionel Sambuc	bfi	x1, xzr, #0, #3	/* make it byte aligned */
184*0a6a1f1dSLionel Sambuc	lsr	x0, x0, x1	/* shift to LSB */
185*0a6a1f1dSLionel Sambuc	sxtb	w0, w0		/* sign extend */
186*0a6a1f1dSLionel Sambuc	ret
187*0a6a1f1dSLionel SambucEND(memcmp)
188