xref: /freebsd-src/contrib/cortex-strings/src/aarch64/memcmp.S (revision 8c4282b370bd66908b45b6a223226a9fc2b69d57)
1*09a53ad8SAndrew Turner/* memcmp - compare memory
2*09a53ad8SAndrew Turner
3*09a53ad8SAndrew Turner   Copyright (c) 2013, Linaro Limited
4*09a53ad8SAndrew Turner   All rights reserved.
5*09a53ad8SAndrew Turner
6*09a53ad8SAndrew Turner   Redistribution and use in source and binary forms, with or without
7*09a53ad8SAndrew Turner   modification, are permitted provided that the following conditions are met:
8*09a53ad8SAndrew Turner       * Redistributions of source code must retain the above copyright
9*09a53ad8SAndrew Turner         notice, this list of conditions and the following disclaimer.
10*09a53ad8SAndrew Turner       * Redistributions in binary form must reproduce the above copyright
11*09a53ad8SAndrew Turner         notice, this list of conditions and the following disclaimer in the
12*09a53ad8SAndrew Turner         documentation and/or other materials provided with the distribution.
13*09a53ad8SAndrew Turner       * Neither the name of the Linaro nor the
14*09a53ad8SAndrew Turner         names of its contributors may be used to endorse or promote products
15*09a53ad8SAndrew Turner         derived from this software without specific prior written permission.
16*09a53ad8SAndrew Turner
17*09a53ad8SAndrew Turner   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*09a53ad8SAndrew Turner   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*09a53ad8SAndrew Turner   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*09a53ad8SAndrew Turner   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*09a53ad8SAndrew Turner   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*09a53ad8SAndrew Turner   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*09a53ad8SAndrew Turner   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*09a53ad8SAndrew Turner   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*09a53ad8SAndrew Turner   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*09a53ad8SAndrew Turner   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*09a53ad8SAndrew Turner   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28*09a53ad8SAndrew Turner
29*09a53ad8SAndrew Turner/* Assumptions:
30*09a53ad8SAndrew Turner *
31*09a53ad8SAndrew Turner * ARMv8-a, AArch64
32*09a53ad8SAndrew Turner */
33*09a53ad8SAndrew Turner
34*09a53ad8SAndrew Turner	.macro def_fn f p2align=0
35*09a53ad8SAndrew Turner	.text
36*09a53ad8SAndrew Turner	.p2align \p2align
37*09a53ad8SAndrew Turner	.global \f
38*09a53ad8SAndrew Turner	.type \f, %function
39*09a53ad8SAndrew Turner\f:
40*09a53ad8SAndrew Turner	.endm
41*09a53ad8SAndrew Turner
42*09a53ad8SAndrew Turner/* Parameters and result.  */
43*09a53ad8SAndrew Turner#define src1		x0
44*09a53ad8SAndrew Turner#define src2		x1
45*09a53ad8SAndrew Turner#define limit		x2
46*09a53ad8SAndrew Turner#define result		x0
47*09a53ad8SAndrew Turner
48*09a53ad8SAndrew Turner/* Internal variables.  */
49*09a53ad8SAndrew Turner#define data1		x3
50*09a53ad8SAndrew Turner#define data1w		w3
51*09a53ad8SAndrew Turner#define data2		x4
52*09a53ad8SAndrew Turner#define data2w		w4
53*09a53ad8SAndrew Turner#define has_nul		x5
54*09a53ad8SAndrew Turner#define diff		x6
55*09a53ad8SAndrew Turner#define endloop		x7
56*09a53ad8SAndrew Turner#define tmp1		x8
57*09a53ad8SAndrew Turner#define tmp2		x9
58*09a53ad8SAndrew Turner#define tmp3		x10
59*09a53ad8SAndrew Turner#define pos		x11
60*09a53ad8SAndrew Turner#define limit_wd	x12
61*09a53ad8SAndrew Turner#define mask		x13
62*09a53ad8SAndrew Turner
63*09a53ad8SAndrew Turnerdef_fn memcmp p2align=6
64*09a53ad8SAndrew Turner	cbz	limit, .Lret0
65*09a53ad8SAndrew Turner	eor	tmp1, src1, src2
66*09a53ad8SAndrew Turner	tst	tmp1, #7
67*09a53ad8SAndrew Turner	b.ne	.Lmisaligned8
68*09a53ad8SAndrew Turner	ands	tmp1, src1, #7
69*09a53ad8SAndrew Turner	b.ne	.Lmutual_align
70*09a53ad8SAndrew Turner	add	limit_wd, limit, #7
71*09a53ad8SAndrew Turner	lsr	limit_wd, limit_wd, #3
72*09a53ad8SAndrew Turner	/* Start of performance-critical section  -- one 64B cache line.  */
73*09a53ad8SAndrew Turner.Lloop_aligned:
74*09a53ad8SAndrew Turner	ldr	data1, [src1], #8
75*09a53ad8SAndrew Turner	ldr	data2, [src2], #8
76*09a53ad8SAndrew Turner.Lstart_realigned:
77*09a53ad8SAndrew Turner	subs	limit_wd, limit_wd, #1
78*09a53ad8SAndrew Turner	eor	diff, data1, data2	/* Non-zero if differences found.  */
79*09a53ad8SAndrew Turner	csinv	endloop, diff, xzr, ne	/* Last Dword or differences.  */
80*09a53ad8SAndrew Turner	cbz	endloop, .Lloop_aligned
81*09a53ad8SAndrew Turner	/* End of performance-critical section  -- one 64B cache line.  */
82*09a53ad8SAndrew Turner
83*09a53ad8SAndrew Turner	/* Not reached the limit, must have found a diff.  */
84*09a53ad8SAndrew Turner	cbnz	limit_wd, .Lnot_limit
85*09a53ad8SAndrew Turner
86*09a53ad8SAndrew Turner	/* Limit % 8 == 0 => all bytes significant.  */
87*09a53ad8SAndrew Turner	ands	limit, limit, #7
88*09a53ad8SAndrew Turner	b.eq	.Lnot_limit
89*09a53ad8SAndrew Turner
90*09a53ad8SAndrew Turner	lsl	limit, limit, #3	/* Bits -> bytes.  */
91*09a53ad8SAndrew Turner	mov	mask, #~0
92*09a53ad8SAndrew Turner#ifdef __AARCH64EB__
93*09a53ad8SAndrew Turner	lsr	mask, mask, limit
94*09a53ad8SAndrew Turner#else
95*09a53ad8SAndrew Turner	lsl	mask, mask, limit
96*09a53ad8SAndrew Turner#endif
97*09a53ad8SAndrew Turner	bic	data1, data1, mask
98*09a53ad8SAndrew Turner	bic	data2, data2, mask
99*09a53ad8SAndrew Turner
100*09a53ad8SAndrew Turner	orr	diff, diff, mask
101*09a53ad8SAndrew Turner.Lnot_limit:
102*09a53ad8SAndrew Turner
103*09a53ad8SAndrew Turner#ifndef	__AARCH64EB__
104*09a53ad8SAndrew Turner	rev	diff, diff
105*09a53ad8SAndrew Turner	rev	data1, data1
106*09a53ad8SAndrew Turner	rev	data2, data2
107*09a53ad8SAndrew Turner#endif
108*09a53ad8SAndrew Turner	/* The MS-non-zero bit of DIFF marks either the first bit
109*09a53ad8SAndrew Turner	   that is different, or the end of the significant data.
110*09a53ad8SAndrew Turner	   Shifting left now will bring the critical information into the
111*09a53ad8SAndrew Turner	   top bits.  */
112*09a53ad8SAndrew Turner	clz	pos, diff
113*09a53ad8SAndrew Turner	lsl	data1, data1, pos
114*09a53ad8SAndrew Turner	lsl	data2, data2, pos
115*09a53ad8SAndrew Turner	/* But we need to zero-extend (char is unsigned) the value and then
116*09a53ad8SAndrew Turner	   perform a signed 32-bit subtraction.  */
117*09a53ad8SAndrew Turner	lsr	data1, data1, #56
118*09a53ad8SAndrew Turner	sub	result, data1, data2, lsr #56
119*09a53ad8SAndrew Turner	ret
120*09a53ad8SAndrew Turner
121*09a53ad8SAndrew Turner.Lmutual_align:
122*09a53ad8SAndrew Turner	/* Sources are mutually aligned, but are not currently at an
123*09a53ad8SAndrew Turner	   alignment boundary.  Round down the addresses and then mask off
124*09a53ad8SAndrew Turner	   the bytes that precede the start point.  */
125*09a53ad8SAndrew Turner	bic	src1, src1, #7
126*09a53ad8SAndrew Turner	bic	src2, src2, #7
127*09a53ad8SAndrew Turner	add	limit, limit, tmp1	/* Adjust the limit for the extra.  */
128*09a53ad8SAndrew Turner	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
129*09a53ad8SAndrew Turner	ldr	data1, [src1], #8
130*09a53ad8SAndrew Turner	neg	tmp1, tmp1		/* Bits to alignment -64.  */
131*09a53ad8SAndrew Turner	ldr	data2, [src2], #8
132*09a53ad8SAndrew Turner	mov	tmp2, #~0
133*09a53ad8SAndrew Turner#ifdef __AARCH64EB__
134*09a53ad8SAndrew Turner	/* Big-endian.  Early bytes are at MSB.  */
135*09a53ad8SAndrew Turner	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
136*09a53ad8SAndrew Turner#else
137*09a53ad8SAndrew Turner	/* Little-endian.  Early bytes are at LSB.  */
138*09a53ad8SAndrew Turner	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
139*09a53ad8SAndrew Turner#endif
140*09a53ad8SAndrew Turner	add	limit_wd, limit, #7
141*09a53ad8SAndrew Turner	orr	data1, data1, tmp2
142*09a53ad8SAndrew Turner	orr	data2, data2, tmp2
143*09a53ad8SAndrew Turner	lsr	limit_wd, limit_wd, #3
144*09a53ad8SAndrew Turner	b	.Lstart_realigned
145*09a53ad8SAndrew Turner
146*09a53ad8SAndrew Turner.Lret0:
147*09a53ad8SAndrew Turner	mov	result, #0
148*09a53ad8SAndrew Turner	ret
149*09a53ad8SAndrew Turner
150*09a53ad8SAndrew Turner	.p2align 6
151*09a53ad8SAndrew Turner.Lmisaligned8:
152*09a53ad8SAndrew Turner	sub	limit, limit, #1
153*09a53ad8SAndrew Turner1:
154*09a53ad8SAndrew Turner	/* Perhaps we can do better than this.  */
155*09a53ad8SAndrew Turner	ldrb	data1w, [src1], #1
156*09a53ad8SAndrew Turner	ldrb	data2w, [src2], #1
157*09a53ad8SAndrew Turner	subs	limit, limit, #1
158*09a53ad8SAndrew Turner	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
159*09a53ad8SAndrew Turner	b.eq	1b
160*09a53ad8SAndrew Turner	sub	result, data1, data2
161*09a53ad8SAndrew Turner	ret
162*09a53ad8SAndrew Turner	.size memcmp, . - memcmp
163