xref: /freebsd-src/contrib/cortex-strings/src/arm/strcmp.S (revision 8c4282b370bd66908b45b6a223226a9fc2b69d57)
1*09a53ad8SAndrew Turner/*
2*09a53ad8SAndrew Turner * Copyright (c) 2012-2014 ARM Ltd
3*09a53ad8SAndrew Turner * All rights reserved.
4*09a53ad8SAndrew Turner *
5*09a53ad8SAndrew Turner * Redistribution and use in source and binary forms, with or without
6*09a53ad8SAndrew Turner * modification, are permitted provided that the following conditions
7*09a53ad8SAndrew Turner * are met:
8*09a53ad8SAndrew Turner * 1. Redistributions of source code must retain the above copyright
9*09a53ad8SAndrew Turner *    notice, this list of conditions and the following disclaimer.
10*09a53ad8SAndrew Turner * 2. Redistributions in binary form must reproduce the above copyright
11*09a53ad8SAndrew Turner *    notice, this list of conditions and the following disclaimer in the
12*09a53ad8SAndrew Turner *    documentation and/or other materials provided with the distribution.
13*09a53ad8SAndrew Turner * 3. The name of the company may not be used to endorse or promote
14*09a53ad8SAndrew Turner *    products derived from this software without specific prior written
15*09a53ad8SAndrew Turner *    permission.
16*09a53ad8SAndrew Turner *
17*09a53ad8SAndrew Turner * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18*09a53ad8SAndrew Turner * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19*09a53ad8SAndrew Turner * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20*09a53ad8SAndrew Turner * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21*09a53ad8SAndrew Turner * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22*09a53ad8SAndrew Turner * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23*09a53ad8SAndrew Turner * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24*09a53ad8SAndrew Turner * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25*09a53ad8SAndrew Turner * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26*09a53ad8SAndrew Turner * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*09a53ad8SAndrew Turner */
28*09a53ad8SAndrew Turner
29*09a53ad8SAndrew Turner/* Implementation of strcmp for ARMv7 when DSP instructions are
30*09a53ad8SAndrew Turner   available.  Use ldrd to support wider loads, provided the data
31*09a53ad8SAndrew Turner   is sufficiently aligned.  Use saturating arithmetic to optimize
32*09a53ad8SAndrew Turner   the compares.  */
33*09a53ad8SAndrew Turner
34*09a53ad8SAndrew Turner/* Build Options:
35*09a53ad8SAndrew Turner   STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
36*09a53ad8SAndrew Turner   byte in the string.  If comparing completely random strings
37*09a53ad8SAndrew Turner   the pre-check will save time, since there is a very high
38*09a53ad8SAndrew Turner   probability of a mismatch in the first character: we save
39*09a53ad8SAndrew Turner   significant overhead if this is the common case.  However,
40*09a53ad8SAndrew Turner   if strings are likely to be identical (eg because we're
41*09a53ad8SAndrew Turner   verifying a hit in a hash table), then this check is largely
42*09a53ad8SAndrew Turner   redundant.  */
43*09a53ad8SAndrew Turner
44*09a53ad8SAndrew Turner#define STRCMP_NO_PRECHECK	0
45*09a53ad8SAndrew Turner
46*09a53ad8SAndrew Turner	/* This version uses Thumb-2 code.  */
47*09a53ad8SAndrew Turner	.thumb
48*09a53ad8SAndrew Turner	.syntax unified
49*09a53ad8SAndrew Turner
50*09a53ad8SAndrew Turner#ifdef __ARM_BIG_ENDIAN
51*09a53ad8SAndrew Turner#define S2LO lsl
52*09a53ad8SAndrew Turner#define S2LOEQ lsleq
53*09a53ad8SAndrew Turner#define S2HI lsr
54*09a53ad8SAndrew Turner#define MSB 0x000000ff
55*09a53ad8SAndrew Turner#define LSB 0xff000000
56*09a53ad8SAndrew Turner#define BYTE0_OFFSET 24
57*09a53ad8SAndrew Turner#define BYTE1_OFFSET 16
58*09a53ad8SAndrew Turner#define BYTE2_OFFSET 8
59*09a53ad8SAndrew Turner#define BYTE3_OFFSET 0
60*09a53ad8SAndrew Turner#else /* not  __ARM_BIG_ENDIAN */
61*09a53ad8SAndrew Turner#define S2LO lsr
62*09a53ad8SAndrew Turner#define S2LOEQ lsreq
63*09a53ad8SAndrew Turner#define S2HI lsl
64*09a53ad8SAndrew Turner#define BYTE0_OFFSET 0
65*09a53ad8SAndrew Turner#define BYTE1_OFFSET 8
66*09a53ad8SAndrew Turner#define BYTE2_OFFSET 16
67*09a53ad8SAndrew Turner#define BYTE3_OFFSET 24
68*09a53ad8SAndrew Turner#define MSB 0xff000000
69*09a53ad8SAndrew Turner#define LSB 0x000000ff
70*09a53ad8SAndrew Turner#endif /* not  __ARM_BIG_ENDIAN */
71*09a53ad8SAndrew Turner
72*09a53ad8SAndrew Turner	.macro def_fn f p2align=0
73*09a53ad8SAndrew Turner	.text
74*09a53ad8SAndrew Turner	.p2align \p2align
75*09a53ad8SAndrew Turner	.global \f
76*09a53ad8SAndrew Turner	.type \f, %function
77*09a53ad8SAndrew Turner\f:
78*09a53ad8SAndrew Turner	.endm
79*09a53ad8SAndrew Turner
80*09a53ad8SAndrew Turner/* Parameters and result.  */
81*09a53ad8SAndrew Turner#define src1		r0
82*09a53ad8SAndrew Turner#define src2		r1
83*09a53ad8SAndrew Turner#define result		r0	/* Overlaps src1.  */
84*09a53ad8SAndrew Turner
85*09a53ad8SAndrew Turner/* Internal variables.  */
86*09a53ad8SAndrew Turner#define tmp1		r4
87*09a53ad8SAndrew Turner#define tmp2		r5
88*09a53ad8SAndrew Turner#define const_m1	r12
89*09a53ad8SAndrew Turner
90*09a53ad8SAndrew Turner/* Additional internal variables for 64-bit aligned data.  */
91*09a53ad8SAndrew Turner#define data1a		r2
92*09a53ad8SAndrew Turner#define data1b		r3
93*09a53ad8SAndrew Turner#define data2a		r6
94*09a53ad8SAndrew Turner#define data2b		r7
95*09a53ad8SAndrew Turner#define syndrome_a	tmp1
96*09a53ad8SAndrew Turner#define syndrome_b	tmp2
97*09a53ad8SAndrew Turner
98*09a53ad8SAndrew Turner/* Additional internal variables for 32-bit aligned data.  */
99*09a53ad8SAndrew Turner#define data1		r2
100*09a53ad8SAndrew Turner#define data2		r3
101*09a53ad8SAndrew Turner#define syndrome	tmp2
102*09a53ad8SAndrew Turner
103*09a53ad8SAndrew Turner
104*09a53ad8SAndrew Turner	/* Macro to compute and return the result value for word-aligned
105*09a53ad8SAndrew Turner	   cases.  */
106*09a53ad8SAndrew Turner	.macro strcmp_epilogue_aligned synd d1 d2 restore_r6
107*09a53ad8SAndrew Turner#ifdef __ARM_BIG_ENDIAN
108*09a53ad8SAndrew Turner	/* If data1 contains a zero byte, then syndrome will contain a 1 in
109*09a53ad8SAndrew Turner	   bit 7 of that byte.  Otherwise, the highest set bit in the
110*09a53ad8SAndrew Turner	   syndrome will highlight the first different bit.  It is therefore
111*09a53ad8SAndrew Turner	   sufficient to extract the eight bits starting with the syndrome
112*09a53ad8SAndrew Turner	   bit.  */
113*09a53ad8SAndrew Turner	clz	tmp1, \synd
114*09a53ad8SAndrew Turner	lsl	r1, \d2, tmp1
115*09a53ad8SAndrew Turner	.if \restore_r6
116*09a53ad8SAndrew Turner	ldrd	r6, r7, [sp, #8]
117*09a53ad8SAndrew Turner	.endif
118*09a53ad8SAndrew Turner	.cfi_restore 6
119*09a53ad8SAndrew Turner	.cfi_restore 7
120*09a53ad8SAndrew Turner	lsl	\d1, \d1, tmp1
121*09a53ad8SAndrew Turner	.cfi_remember_state
122*09a53ad8SAndrew Turner	lsr	result, \d1, #24
123*09a53ad8SAndrew Turner	ldrd	r4, r5, [sp], #16
124*09a53ad8SAndrew Turner	.cfi_restore 4
125*09a53ad8SAndrew Turner	.cfi_restore 5
126*09a53ad8SAndrew Turner	sub	result, result, r1, lsr #24
127*09a53ad8SAndrew Turner	bx	lr
128*09a53ad8SAndrew Turner#else
129*09a53ad8SAndrew Turner	/* To use the big-endian trick we'd have to reverse all three words.
130*09a53ad8SAndrew Turner	   that's slower than this approach.  */
131*09a53ad8SAndrew Turner	rev	\synd, \synd
132*09a53ad8SAndrew Turner	clz	tmp1, \synd
133*09a53ad8SAndrew Turner	bic	tmp1, tmp1, #7
134*09a53ad8SAndrew Turner	lsr	r1, \d2, tmp1
135*09a53ad8SAndrew Turner	.cfi_remember_state
136*09a53ad8SAndrew Turner	.if \restore_r6
137*09a53ad8SAndrew Turner	ldrd	r6, r7, [sp, #8]
138*09a53ad8SAndrew Turner	.endif
139*09a53ad8SAndrew Turner	.cfi_restore 6
140*09a53ad8SAndrew Turner	.cfi_restore 7
141*09a53ad8SAndrew Turner	lsr	\d1, \d1, tmp1
142*09a53ad8SAndrew Turner	and	result, \d1, #255
143*09a53ad8SAndrew Turner	and	r1, r1, #255
144*09a53ad8SAndrew Turner	ldrd	r4, r5, [sp], #16
145*09a53ad8SAndrew Turner	.cfi_restore 4
146*09a53ad8SAndrew Turner	.cfi_restore 5
147*09a53ad8SAndrew Turner	sub	result, result, r1
148*09a53ad8SAndrew Turner
149*09a53ad8SAndrew Turner	bx	lr
150*09a53ad8SAndrew Turner#endif
151*09a53ad8SAndrew Turner	.endm
152*09a53ad8SAndrew Turner
153*09a53ad8SAndrew Turner	.text
154*09a53ad8SAndrew Turner	.p2align	5
155*09a53ad8SAndrew Turner.Lstrcmp_start_addr:
156*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 0
157*09a53ad8SAndrew Turner.Lfastpath_exit:
158*09a53ad8SAndrew Turner	sub	r0, r2, r3
159*09a53ad8SAndrew Turner	bx	lr
160*09a53ad8SAndrew Turner	nop
161*09a53ad8SAndrew Turner#endif
162*09a53ad8SAndrew Turnerdef_fn	strcmp
163*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 0
164*09a53ad8SAndrew Turner	ldrb	r2, [src1]
165*09a53ad8SAndrew Turner	ldrb	r3, [src2]
166*09a53ad8SAndrew Turner	cmp	r2, #1
167*09a53ad8SAndrew Turner	it	cs
168*09a53ad8SAndrew Turner	cmpcs	r2, r3
169*09a53ad8SAndrew Turner	bne	.Lfastpath_exit
170*09a53ad8SAndrew Turner#endif
171*09a53ad8SAndrew Turner	.cfi_startproc
172*09a53ad8SAndrew Turner	strd	r4, r5, [sp, #-16]!
173*09a53ad8SAndrew Turner	.cfi_def_cfa_offset 16
174*09a53ad8SAndrew Turner	.cfi_offset 4, -16
175*09a53ad8SAndrew Turner	.cfi_offset 5, -12
176*09a53ad8SAndrew Turner	orr	tmp1, src1, src2
177*09a53ad8SAndrew Turner	strd	r6, r7, [sp, #8]
178*09a53ad8SAndrew Turner	.cfi_offset 6, -8
179*09a53ad8SAndrew Turner	.cfi_offset 7, -4
180*09a53ad8SAndrew Turner	mvn	const_m1, #0
181*09a53ad8SAndrew Turner	lsl	r2, tmp1, #29
182*09a53ad8SAndrew Turner	cbz	r2, .Lloop_aligned8
183*09a53ad8SAndrew Turner
184*09a53ad8SAndrew Turner.Lnot_aligned:
185*09a53ad8SAndrew Turner	eor	tmp1, src1, src2
186*09a53ad8SAndrew Turner	tst	tmp1, #7
187*09a53ad8SAndrew Turner	bne	.Lmisaligned8
188*09a53ad8SAndrew Turner
189*09a53ad8SAndrew Turner	/* Deal with mutual misalignment by aligning downwards and then
190*09a53ad8SAndrew Turner	   masking off the unwanted loaded data to prevent a difference.  */
191*09a53ad8SAndrew Turner	and	tmp1, src1, #7
192*09a53ad8SAndrew Turner	bic	src1, src1, #7
193*09a53ad8SAndrew Turner	and	tmp2, tmp1, #3
194*09a53ad8SAndrew Turner	bic	src2, src2, #7
195*09a53ad8SAndrew Turner	lsl	tmp2, tmp2, #3	/* Bytes -> bits.  */
196*09a53ad8SAndrew Turner	ldrd	data1a, data1b, [src1], #16
197*09a53ad8SAndrew Turner	tst	tmp1, #4
198*09a53ad8SAndrew Turner	ldrd	data2a, data2b, [src2], #16
199*09a53ad8SAndrew Turner	/* In thumb code we can't use MVN with a register shift, but
200*09a53ad8SAndrew Turner	   we do have ORN.  */
201*09a53ad8SAndrew Turner	S2HI	tmp1, const_m1, tmp2
202*09a53ad8SAndrew Turner	orn	data1a, data1a, tmp1
203*09a53ad8SAndrew Turner	orn	data2a, data2a, tmp1
204*09a53ad8SAndrew Turner	beq	.Lstart_realigned8
205*09a53ad8SAndrew Turner	orn	data1b, data1b, tmp1
206*09a53ad8SAndrew Turner	mov	data1a, const_m1
207*09a53ad8SAndrew Turner	orn	data2b, data2b, tmp1
208*09a53ad8SAndrew Turner	mov	data2a, const_m1
209*09a53ad8SAndrew Turner	b	.Lstart_realigned8
210*09a53ad8SAndrew Turner
211*09a53ad8SAndrew Turner	/* Unwind the inner loop by a factor of 2, giving 16 bytes per
212*09a53ad8SAndrew Turner	   pass.  */
213*09a53ad8SAndrew Turner	.p2align 5,,12  /* Don't start in the tail bytes of a cache line.  */
214*09a53ad8SAndrew Turner	.p2align 2	/* Always word aligned.  */
215*09a53ad8SAndrew Turner.Lloop_aligned8:
216*09a53ad8SAndrew Turner	ldrd	data1a, data1b, [src1], #16
217*09a53ad8SAndrew Turner	ldrd	data2a, data2b, [src2], #16
218*09a53ad8SAndrew Turner.Lstart_realigned8:
219*09a53ad8SAndrew Turner	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
220*09a53ad8SAndrew Turner	eor	syndrome_a, data1a, data2a
221*09a53ad8SAndrew Turner	sel	syndrome_a, syndrome_a, const_m1
222*09a53ad8SAndrew Turner	cbnz	syndrome_a, .Ldiff_in_a
223*09a53ad8SAndrew Turner	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
224*09a53ad8SAndrew Turner	eor	syndrome_b, data1b, data2b
225*09a53ad8SAndrew Turner	sel	syndrome_b, syndrome_b, const_m1
226*09a53ad8SAndrew Turner	cbnz	syndrome_b, .Ldiff_in_b
227*09a53ad8SAndrew Turner
228*09a53ad8SAndrew Turner	ldrd	data1a, data1b, [src1, #-8]
229*09a53ad8SAndrew Turner	ldrd	data2a, data2b, [src2, #-8]
230*09a53ad8SAndrew Turner	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
231*09a53ad8SAndrew Turner	eor	syndrome_a, data1a, data2a
232*09a53ad8SAndrew Turner	sel	syndrome_a, syndrome_a, const_m1
233*09a53ad8SAndrew Turner	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
234*09a53ad8SAndrew Turner	eor	syndrome_b, data1b, data2b
235*09a53ad8SAndrew Turner	sel	syndrome_b, syndrome_b, const_m1
236*09a53ad8SAndrew Turner	/* Can't use CBZ for backwards branch.  */
237*09a53ad8SAndrew Turner	orrs	syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
238*09a53ad8SAndrew Turner	beq	.Lloop_aligned8
239*09a53ad8SAndrew Turner
240*09a53ad8SAndrew Turner.Ldiff_found:
241*09a53ad8SAndrew Turner	cbnz	syndrome_a, .Ldiff_in_a
242*09a53ad8SAndrew Turner
243*09a53ad8SAndrew Turner.Ldiff_in_b:
244*09a53ad8SAndrew Turner	strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
245*09a53ad8SAndrew Turner
246*09a53ad8SAndrew Turner.Ldiff_in_a:
247*09a53ad8SAndrew Turner	.cfi_restore_state
248*09a53ad8SAndrew Turner	strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
249*09a53ad8SAndrew Turner
250*09a53ad8SAndrew Turner	.cfi_restore_state
251*09a53ad8SAndrew Turner.Lmisaligned8:
252*09a53ad8SAndrew Turner	tst	tmp1, #3
253*09a53ad8SAndrew Turner	bne	.Lmisaligned4
254*09a53ad8SAndrew Turner	ands	tmp1, src1, #3
255*09a53ad8SAndrew Turner	bne	.Lmutual_align4
256*09a53ad8SAndrew Turner
257*09a53ad8SAndrew Turner	/* Unrolled by a factor of 2, to reduce the number of post-increment
258*09a53ad8SAndrew Turner	   operations.  */
259*09a53ad8SAndrew Turner.Lloop_aligned4:
260*09a53ad8SAndrew Turner	ldr	data1, [src1], #8
261*09a53ad8SAndrew Turner	ldr	data2, [src2], #8
262*09a53ad8SAndrew Turner.Lstart_realigned4:
263*09a53ad8SAndrew Turner	uadd8	syndrome, data1, const_m1	/* Only need GE bits.  */
264*09a53ad8SAndrew Turner	eor	syndrome, data1, data2
265*09a53ad8SAndrew Turner	sel	syndrome, syndrome, const_m1
266*09a53ad8SAndrew Turner	cbnz	syndrome, .Laligned4_done
267*09a53ad8SAndrew Turner	ldr	data1, [src1, #-4]
268*09a53ad8SAndrew Turner	ldr	data2, [src2, #-4]
269*09a53ad8SAndrew Turner	uadd8	syndrome, data1, const_m1
270*09a53ad8SAndrew Turner	eor	syndrome, data1, data2
271*09a53ad8SAndrew Turner	sel	syndrome, syndrome, const_m1
272*09a53ad8SAndrew Turner	cmp	syndrome, #0
273*09a53ad8SAndrew Turner	beq	.Lloop_aligned4
274*09a53ad8SAndrew Turner
275*09a53ad8SAndrew Turner.Laligned4_done:
276*09a53ad8SAndrew Turner	strcmp_epilogue_aligned syndrome, data1, data2, 0
277*09a53ad8SAndrew Turner
278*09a53ad8SAndrew Turner.Lmutual_align4:
279*09a53ad8SAndrew Turner	.cfi_restore_state
280*09a53ad8SAndrew Turner	/* Deal with mutual misalignment by aligning downwards and then
281*09a53ad8SAndrew Turner	   masking off the unwanted loaded data to prevent a difference.  */
282*09a53ad8SAndrew Turner	lsl	tmp1, tmp1, #3	/* Bytes -> bits.  */
283*09a53ad8SAndrew Turner	bic	src1, src1, #3
284*09a53ad8SAndrew Turner	ldr	data1, [src1], #8
285*09a53ad8SAndrew Turner	bic	src2, src2, #3
286*09a53ad8SAndrew Turner	ldr	data2, [src2], #8
287*09a53ad8SAndrew Turner
288*09a53ad8SAndrew Turner	/* In thumb code we can't use MVN with a register shift, but
289*09a53ad8SAndrew Turner	   we do have ORN.  */
290*09a53ad8SAndrew Turner	S2HI	tmp1, const_m1, tmp1
291*09a53ad8SAndrew Turner	orn	data1, data1, tmp1
292*09a53ad8SAndrew Turner	orn	data2, data2, tmp1
293*09a53ad8SAndrew Turner	b	.Lstart_realigned4
294*09a53ad8SAndrew Turner
295*09a53ad8SAndrew Turner.Lmisaligned4:
296*09a53ad8SAndrew Turner	ands	tmp1, src1, #3
297*09a53ad8SAndrew Turner	beq	.Lsrc1_aligned
298*09a53ad8SAndrew Turner	sub	src2, src2, tmp1
299*09a53ad8SAndrew Turner	bic	src1, src1, #3
300*09a53ad8SAndrew Turner	lsls	tmp1, tmp1, #31
301*09a53ad8SAndrew Turner	ldr	data1, [src1], #4
302*09a53ad8SAndrew Turner	beq	.Laligned_m2
303*09a53ad8SAndrew Turner	bcs	.Laligned_m1
304*09a53ad8SAndrew Turner
305*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 1
306*09a53ad8SAndrew Turner	ldrb	data2, [src2, #1]
307*09a53ad8SAndrew Turner	uxtb	tmp1, data1, ror #BYTE1_OFFSET
308*09a53ad8SAndrew Turner	subs	tmp1, tmp1, data2
309*09a53ad8SAndrew Turner	bne	.Lmisaligned_exit
310*09a53ad8SAndrew Turner	cbz	data2, .Lmisaligned_exit
311*09a53ad8SAndrew Turner
312*09a53ad8SAndrew Turner.Laligned_m2:
313*09a53ad8SAndrew Turner	ldrb	data2, [src2, #2]
314*09a53ad8SAndrew Turner	uxtb	tmp1, data1, ror #BYTE2_OFFSET
315*09a53ad8SAndrew Turner	subs	tmp1, tmp1, data2
316*09a53ad8SAndrew Turner	bne	.Lmisaligned_exit
317*09a53ad8SAndrew Turner	cbz	data2, .Lmisaligned_exit
318*09a53ad8SAndrew Turner
319*09a53ad8SAndrew Turner.Laligned_m1:
320*09a53ad8SAndrew Turner	ldrb	data2, [src2, #3]
321*09a53ad8SAndrew Turner	uxtb	tmp1, data1, ror #BYTE3_OFFSET
322*09a53ad8SAndrew Turner	subs	tmp1, tmp1, data2
323*09a53ad8SAndrew Turner	bne	.Lmisaligned_exit
324*09a53ad8SAndrew Turner	add	src2, src2, #4
325*09a53ad8SAndrew Turner	cbnz	data2, .Lsrc1_aligned
326*09a53ad8SAndrew Turner#else  /* STRCMP_NO_PRECHECK */
327*09a53ad8SAndrew Turner	/* If we've done the pre-check, then we don't need to check the
328*09a53ad8SAndrew Turner	   first byte again here.  */
329*09a53ad8SAndrew Turner	ldrb	data2, [src2, #2]
330*09a53ad8SAndrew Turner	uxtb	tmp1, data1, ror #BYTE2_OFFSET
331*09a53ad8SAndrew Turner	subs	tmp1, tmp1, data2
332*09a53ad8SAndrew Turner	bne	.Lmisaligned_exit
333*09a53ad8SAndrew Turner	cbz	data2, .Lmisaligned_exit
334*09a53ad8SAndrew Turner
335*09a53ad8SAndrew Turner.Laligned_m2:
336*09a53ad8SAndrew Turner	ldrb	data2, [src2, #3]
337*09a53ad8SAndrew Turner	uxtb	tmp1, data1, ror #BYTE3_OFFSET
338*09a53ad8SAndrew Turner	subs	tmp1, tmp1, data2
339*09a53ad8SAndrew Turner	bne	.Lmisaligned_exit
340*09a53ad8SAndrew Turner	cbnz	data2, .Laligned_m1
341*09a53ad8SAndrew Turner#endif
342*09a53ad8SAndrew Turner
343*09a53ad8SAndrew Turner.Lmisaligned_exit:
344*09a53ad8SAndrew Turner	.cfi_remember_state
345*09a53ad8SAndrew Turner	mov	result, tmp1
346*09a53ad8SAndrew Turner	ldr	r4, [sp], #16
347*09a53ad8SAndrew Turner	.cfi_restore 4
348*09a53ad8SAndrew Turner	bx	lr
349*09a53ad8SAndrew Turner
350*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 0
351*09a53ad8SAndrew Turner.Laligned_m1:
352*09a53ad8SAndrew Turner	add	src2, src2, #4
353*09a53ad8SAndrew Turner#endif
354*09a53ad8SAndrew Turner.Lsrc1_aligned:
355*09a53ad8SAndrew Turner	.cfi_restore_state
356*09a53ad8SAndrew Turner	/* src1 is word aligned, but src2 has no common alignment
357*09a53ad8SAndrew Turner	   with it.  */
358*09a53ad8SAndrew Turner	ldr	data1, [src1], #4
359*09a53ad8SAndrew Turner	lsls	tmp1, src2, #31		/* C=src2[1], Z=src2[0].  */
360*09a53ad8SAndrew Turner
361*09a53ad8SAndrew Turner	bic	src2, src2, #3
362*09a53ad8SAndrew Turner	ldr	data2, [src2], #4
363*09a53ad8SAndrew Turner	bhi	.Loverlap1		/* C=1, Z=0 => src2[1:0] = 0b11.  */
364*09a53ad8SAndrew Turner	bcs	.Loverlap2		/* C=1, Z=1 => src2[1:0] = 0b10.  */
365*09a53ad8SAndrew Turner
366*09a53ad8SAndrew Turner	/* (overlap3) C=0, Z=0 => src2[1:0] = 0b01.  */
367*09a53ad8SAndrew Turner.Loverlap3:
368*09a53ad8SAndrew Turner	bic	tmp1, data1, #MSB
369*09a53ad8SAndrew Turner	uadd8	syndrome, data1, const_m1
370*09a53ad8SAndrew Turner	eors	syndrome, tmp1, data2, S2LO #8
371*09a53ad8SAndrew Turner	sel	syndrome, syndrome, const_m1
372*09a53ad8SAndrew Turner	bne	4f
373*09a53ad8SAndrew Turner	cbnz	syndrome, 5f
374*09a53ad8SAndrew Turner	ldr	data2, [src2], #4
375*09a53ad8SAndrew Turner	eor	tmp1, tmp1, data1
376*09a53ad8SAndrew Turner	cmp	tmp1, data2, S2HI #24
377*09a53ad8SAndrew Turner	bne	6f
378*09a53ad8SAndrew Turner	ldr	data1, [src1], #4
379*09a53ad8SAndrew Turner	b	.Loverlap3
380*09a53ad8SAndrew Turner4:
381*09a53ad8SAndrew Turner	S2LO	data2, data2, #8
382*09a53ad8SAndrew Turner	b	.Lstrcmp_tail
383*09a53ad8SAndrew Turner
384*09a53ad8SAndrew Turner5:
385*09a53ad8SAndrew Turner	bics	syndrome, syndrome, #MSB
386*09a53ad8SAndrew Turner	bne	.Lstrcmp_done_equal
387*09a53ad8SAndrew Turner
388*09a53ad8SAndrew Turner	/* We can only get here if the MSB of data1 contains 0, so
389*09a53ad8SAndrew Turner	   fast-path the exit.  */
390*09a53ad8SAndrew Turner	ldrb	result, [src2]
391*09a53ad8SAndrew Turner	.cfi_remember_state
392*09a53ad8SAndrew Turner	ldrd	r4, r5, [sp], #16
393*09a53ad8SAndrew Turner	.cfi_restore 4
394*09a53ad8SAndrew Turner	.cfi_restore 5
395*09a53ad8SAndrew Turner	/* R6/7 Not used in this sequence.  */
396*09a53ad8SAndrew Turner	.cfi_restore 6
397*09a53ad8SAndrew Turner	.cfi_restore 7
398*09a53ad8SAndrew Turner	neg	result, result
399*09a53ad8SAndrew Turner	bx	lr
400*09a53ad8SAndrew Turner
401*09a53ad8SAndrew Turner6:
402*09a53ad8SAndrew Turner	.cfi_restore_state
403*09a53ad8SAndrew Turner	S2LO	data1, data1, #24
404*09a53ad8SAndrew Turner	and	data2, data2, #LSB
405*09a53ad8SAndrew Turner	b	.Lstrcmp_tail
406*09a53ad8SAndrew Turner
407*09a53ad8SAndrew Turner	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
408*09a53ad8SAndrew Turner.Loverlap2:
409*09a53ad8SAndrew Turner	and	tmp1, data1, const_m1, S2LO #16
410*09a53ad8SAndrew Turner	uadd8	syndrome, data1, const_m1
411*09a53ad8SAndrew Turner	eors	syndrome, tmp1, data2, S2LO #16
412*09a53ad8SAndrew Turner	sel	syndrome, syndrome, const_m1
413*09a53ad8SAndrew Turner	bne	4f
414*09a53ad8SAndrew Turner	cbnz	syndrome, 5f
415*09a53ad8SAndrew Turner	ldr	data2, [src2], #4
416*09a53ad8SAndrew Turner	eor	tmp1, tmp1, data1
417*09a53ad8SAndrew Turner	cmp	tmp1, data2, S2HI #16
418*09a53ad8SAndrew Turner	bne	6f
419*09a53ad8SAndrew Turner	ldr	data1, [src1], #4
420*09a53ad8SAndrew Turner	b	.Loverlap2
421*09a53ad8SAndrew Turner4:
422*09a53ad8SAndrew Turner	S2LO	data2, data2, #16
423*09a53ad8SAndrew Turner	b	.Lstrcmp_tail
424*09a53ad8SAndrew Turner5:
425*09a53ad8SAndrew Turner	ands	syndrome, syndrome, const_m1, S2LO #16
426*09a53ad8SAndrew Turner	bne	.Lstrcmp_done_equal
427*09a53ad8SAndrew Turner
428*09a53ad8SAndrew Turner	ldrh	data2, [src2]
429*09a53ad8SAndrew Turner	S2LO	data1, data1, #16
430*09a53ad8SAndrew Turner#ifdef __ARM_BIG_ENDIAN
431*09a53ad8SAndrew Turner	lsl	data2, data2, #16
432*09a53ad8SAndrew Turner#endif
433*09a53ad8SAndrew Turner	b	.Lstrcmp_tail
434*09a53ad8SAndrew Turner
435*09a53ad8SAndrew Turner6:
436*09a53ad8SAndrew Turner	S2LO	data1, data1, #16
437*09a53ad8SAndrew Turner	and	data2, data2, const_m1, S2LO #16
438*09a53ad8SAndrew Turner	b	.Lstrcmp_tail
439*09a53ad8SAndrew Turner
440*09a53ad8SAndrew Turner	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
441*09a53ad8SAndrew Turner.Loverlap1:
442*09a53ad8SAndrew Turner	and	tmp1, data1, #LSB
443*09a53ad8SAndrew Turner	uadd8	syndrome, data1, const_m1
444*09a53ad8SAndrew Turner	eors	syndrome, tmp1, data2, S2LO #24
445*09a53ad8SAndrew Turner	sel	syndrome, syndrome, const_m1
446*09a53ad8SAndrew Turner	bne	4f
447*09a53ad8SAndrew Turner	cbnz	syndrome, 5f
448*09a53ad8SAndrew Turner	ldr	data2, [src2], #4
449*09a53ad8SAndrew Turner	eor	tmp1, tmp1, data1
450*09a53ad8SAndrew Turner	cmp	tmp1, data2, S2HI #8
451*09a53ad8SAndrew Turner	bne	6f
452*09a53ad8SAndrew Turner	ldr	data1, [src1], #4
453*09a53ad8SAndrew Turner	b	.Loverlap1
454*09a53ad8SAndrew Turner4:
455*09a53ad8SAndrew Turner	S2LO	data2, data2, #24
456*09a53ad8SAndrew Turner	b	.Lstrcmp_tail
457*09a53ad8SAndrew Turner5:
458*09a53ad8SAndrew Turner	tst	syndrome, #LSB
459*09a53ad8SAndrew Turner	bne	.Lstrcmp_done_equal
460*09a53ad8SAndrew Turner	ldr	data2, [src2]
461*09a53ad8SAndrew Turner6:
462*09a53ad8SAndrew Turner	S2LO	data1, data1, #8
463*09a53ad8SAndrew Turner	bic	data2, data2, #MSB
464*09a53ad8SAndrew Turner	b	.Lstrcmp_tail
465*09a53ad8SAndrew Turner
466*09a53ad8SAndrew Turner.Lstrcmp_done_equal:
467*09a53ad8SAndrew Turner	mov	result, #0
468*09a53ad8SAndrew Turner	.cfi_remember_state
469*09a53ad8SAndrew Turner	ldrd	r4, r5, [sp], #16
470*09a53ad8SAndrew Turner	.cfi_restore 4
471*09a53ad8SAndrew Turner	.cfi_restore 5
472*09a53ad8SAndrew Turner	/* R6/7 not used in this sequence.  */
473*09a53ad8SAndrew Turner	.cfi_restore 6
474*09a53ad8SAndrew Turner	.cfi_restore 7
475*09a53ad8SAndrew Turner	bx	lr
476*09a53ad8SAndrew Turner
477*09a53ad8SAndrew Turner.Lstrcmp_tail:
478*09a53ad8SAndrew Turner	.cfi_restore_state
479*09a53ad8SAndrew Turner#ifndef __ARM_BIG_ENDIAN
480*09a53ad8SAndrew Turner	rev	data1, data1
481*09a53ad8SAndrew Turner	rev	data2, data2
482*09a53ad8SAndrew Turner	/* Now everything looks big-endian...  */
483*09a53ad8SAndrew Turner#endif
484*09a53ad8SAndrew Turner	uadd8	tmp1, data1, const_m1
485*09a53ad8SAndrew Turner	eor	tmp1, data1, data2
486*09a53ad8SAndrew Turner	sel	syndrome, tmp1, const_m1
487*09a53ad8SAndrew Turner	clz	tmp1, syndrome
488*09a53ad8SAndrew Turner	lsl	data1, data1, tmp1
489*09a53ad8SAndrew Turner	lsl	data2, data2, tmp1
490*09a53ad8SAndrew Turner	lsr	result, data1, #24
491*09a53ad8SAndrew Turner	ldrd	r4, r5, [sp], #16
492*09a53ad8SAndrew Turner	.cfi_restore 4
493*09a53ad8SAndrew Turner	.cfi_restore 5
494*09a53ad8SAndrew Turner	/* R6/7 not used in this sequence.  */
495*09a53ad8SAndrew Turner	.cfi_restore 6
496*09a53ad8SAndrew Turner	.cfi_restore 7
497*09a53ad8SAndrew Turner	sub	result, result, data2, lsr #24
498*09a53ad8SAndrew Turner	bx	lr
499*09a53ad8SAndrew Turner	.cfi_endproc
500*09a53ad8SAndrew Turner	.size strcmp, . - .Lstrcmp_start_addr
501