xref: /freebsd-src/contrib/arm-optimized-routines/string/arm/strcmp.S (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
131914882SAlex Richardson/*
231914882SAlex Richardson * strcmp for ARMv7
331914882SAlex Richardson *
4*072a4ba8SAndrew Turner * Copyright (c) 2012-2022, Arm Limited.
5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson */
731914882SAlex Richardson
831914882SAlex Richardson#if __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1
931914882SAlex Richardson
1031914882SAlex Richardson/* Implementation of strcmp for ARMv7 when DSP instructions are
1131914882SAlex Richardson   available.  Use ldrd to support wider loads, provided the data
1231914882SAlex Richardson   is sufficiently aligned.  Use saturating arithmetic to optimize
1331914882SAlex Richardson   the compares.  */
1431914882SAlex Richardson
15*072a4ba8SAndrew Turner#include "asmdefs.h"
1631914882SAlex Richardson
1731914882SAlex Richardson/* Build Options:
1831914882SAlex Richardson   STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
1931914882SAlex Richardson   byte in the string.  If comparing completely random strings
2031914882SAlex Richardson   the pre-check will save time, since there is a very high
2131914882SAlex Richardson   probability of a mismatch in the first character: we save
2231914882SAlex Richardson   significant overhead if this is the common case.  However,
2331914882SAlex Richardson   if strings are likely to be identical (eg because we're
2431914882SAlex Richardson   verifying a hit in a hash table), then this check is largely
2531914882SAlex Richardson   redundant.  */
2631914882SAlex Richardson
2731914882SAlex Richardson#define STRCMP_NO_PRECHECK	0
2831914882SAlex Richardson
29*072a4ba8SAndrew Turner/* Ensure the .cantunwind directive is prepended to .fnend.
30*072a4ba8SAndrew Turner   Leaf functions cannot throw exceptions - EHABI only supports
31*072a4ba8SAndrew Turner   synchronous exceptions.  */
32*072a4ba8SAndrew Turner#define IS_LEAF
33*072a4ba8SAndrew Turner
3431914882SAlex Richardson	/* This version uses Thumb-2 code.  */
3531914882SAlex Richardson	.thumb
3631914882SAlex Richardson	.syntax unified
3731914882SAlex Richardson
3831914882SAlex Richardson#ifdef __ARM_BIG_ENDIAN
3931914882SAlex Richardson#define S2LO lsl
4031914882SAlex Richardson#define S2LOEQ lsleq
4131914882SAlex Richardson#define S2HI lsr
4231914882SAlex Richardson#define MSB 0x000000ff
4331914882SAlex Richardson#define LSB 0xff000000
4431914882SAlex Richardson#define BYTE0_OFFSET 24
4531914882SAlex Richardson#define BYTE1_OFFSET 16
4631914882SAlex Richardson#define BYTE2_OFFSET 8
4731914882SAlex Richardson#define BYTE3_OFFSET 0
4831914882SAlex Richardson#else /* not  __ARM_BIG_ENDIAN */
4931914882SAlex Richardson#define S2LO lsr
5031914882SAlex Richardson#define S2LOEQ lsreq
5131914882SAlex Richardson#define S2HI lsl
5231914882SAlex Richardson#define BYTE0_OFFSET 0
5331914882SAlex Richardson#define BYTE1_OFFSET 8
5431914882SAlex Richardson#define BYTE2_OFFSET 16
5531914882SAlex Richardson#define BYTE3_OFFSET 24
5631914882SAlex Richardson#define MSB 0xff000000
5731914882SAlex Richardson#define LSB 0x000000ff
5831914882SAlex Richardson#endif /* not  __ARM_BIG_ENDIAN */
5931914882SAlex Richardson
6031914882SAlex Richardson/* Parameters and result.  */
6131914882SAlex Richardson#define src1		r0
6231914882SAlex Richardson#define src2		r1
6331914882SAlex Richardson#define result		r0	/* Overlaps src1.  */
6431914882SAlex Richardson
6531914882SAlex Richardson/* Internal variables.  */
6631914882SAlex Richardson#define tmp1		r4
6731914882SAlex Richardson#define tmp2		r5
6831914882SAlex Richardson#define const_m1	r12
6931914882SAlex Richardson
7031914882SAlex Richardson/* Additional internal variables for 64-bit aligned data.  */
7131914882SAlex Richardson#define data1a		r2
7231914882SAlex Richardson#define data1b		r3
7331914882SAlex Richardson#define data2a		r6
7431914882SAlex Richardson#define data2b		r7
7531914882SAlex Richardson#define syndrome_a	tmp1
7631914882SAlex Richardson#define syndrome_b	tmp2
7731914882SAlex Richardson
7831914882SAlex Richardson/* Additional internal variables for 32-bit aligned data.  */
7931914882SAlex Richardson#define data1		r2
8031914882SAlex Richardson#define data2		r3
8131914882SAlex Richardson#define syndrome	tmp2
8231914882SAlex Richardson
8331914882SAlex Richardson
8431914882SAlex Richardson	/* Macro to compute and return the result value for word-aligned
8531914882SAlex Richardson	   cases.  */
8631914882SAlex Richardson	.macro strcmp_epilogue_aligned synd d1 d2 restore_r6
8731914882SAlex Richardson#ifdef __ARM_BIG_ENDIAN
8831914882SAlex Richardson	/* If data1 contains a zero byte, then syndrome will contain a 1 in
8931914882SAlex Richardson	   bit 7 of that byte.  Otherwise, the highest set bit in the
9031914882SAlex Richardson	   syndrome will highlight the first different bit.  It is therefore
9131914882SAlex Richardson	   sufficient to extract the eight bits starting with the syndrome
9231914882SAlex Richardson	   bit.  */
9331914882SAlex Richardson	clz	tmp1, \synd
9431914882SAlex Richardson	lsl	r1, \d2, tmp1
9531914882SAlex Richardson	.if \restore_r6
9631914882SAlex Richardson	ldrd	r6, r7, [sp, #8]
9731914882SAlex Richardson	.endif
9831914882SAlex Richardson	.cfi_restore 6
9931914882SAlex Richardson	.cfi_restore 7
10031914882SAlex Richardson	lsl	\d1, \d1, tmp1
10131914882SAlex Richardson	.cfi_remember_state
10231914882SAlex Richardson	lsr	result, \d1, #24
10331914882SAlex Richardson	ldrd	r4, r5, [sp], #16
10431914882SAlex Richardson	.cfi_restore 4
10531914882SAlex Richardson	.cfi_restore 5
106*072a4ba8SAndrew Turner	.cfi_adjust_cfa_offset -16
10731914882SAlex Richardson	sub	result, result, r1, lsr #24
108*072a4ba8SAndrew Turner	epilogue push_ip=HAVE_PAC_LEAF
10931914882SAlex Richardson#else
11031914882SAlex Richardson	/* To use the big-endian trick we'd have to reverse all three words.
11131914882SAlex Richardson	   that's slower than this approach.  */
11231914882SAlex Richardson	rev	\synd, \synd
11331914882SAlex Richardson	clz	tmp1, \synd
11431914882SAlex Richardson	bic	tmp1, tmp1, #7
11531914882SAlex Richardson	lsr	r1, \d2, tmp1
11631914882SAlex Richardson	.cfi_remember_state
11731914882SAlex Richardson	.if \restore_r6
11831914882SAlex Richardson	ldrd	r6, r7, [sp, #8]
11931914882SAlex Richardson	.endif
12031914882SAlex Richardson	.cfi_restore 6
12131914882SAlex Richardson	.cfi_restore 7
12231914882SAlex Richardson	lsr	\d1, \d1, tmp1
12331914882SAlex Richardson	and	result, \d1, #255
12431914882SAlex Richardson	and	r1, r1, #255
12531914882SAlex Richardson	ldrd	r4, r5, [sp], #16
12631914882SAlex Richardson	.cfi_restore 4
12731914882SAlex Richardson	.cfi_restore 5
128*072a4ba8SAndrew Turner	.cfi_adjust_cfa_offset -16
12931914882SAlex Richardson	sub	result, result, r1
13031914882SAlex Richardson
131*072a4ba8SAndrew Turner	epilogue push_ip=HAVE_PAC_LEAF
13231914882SAlex Richardson#endif
13331914882SAlex Richardson	.endm
13431914882SAlex Richardson
135*072a4ba8SAndrew TurnerENTRY(__strcmp_arm)
136*072a4ba8SAndrew Turner	prologue push_ip=HAVE_PAC_LEAF
13731914882SAlex Richardson#if STRCMP_NO_PRECHECK == 0
13831914882SAlex Richardson	ldrb	r2, [src1]
13931914882SAlex Richardson	ldrb	r3, [src2]
14031914882SAlex Richardson	cmp	r2, #1
14131914882SAlex Richardson	it	cs
14231914882SAlex Richardson	cmpcs	r2, r3
14331914882SAlex Richardson	bne	L(fastpath_exit)
14431914882SAlex Richardson#endif
14531914882SAlex Richardson	strd	r4, r5, [sp, #-16]!
146*072a4ba8SAndrew Turner	.cfi_adjust_cfa_offset 16
147*072a4ba8SAndrew Turner	.cfi_rel_offset 4, 0
148*072a4ba8SAndrew Turner	.cfi_rel_offset 5, 4
14931914882SAlex Richardson	orr	tmp1, src1, src2
15031914882SAlex Richardson	strd	r6, r7, [sp, #8]
151*072a4ba8SAndrew Turner	.cfi_rel_offset 6, 8
152*072a4ba8SAndrew Turner	.cfi_rel_offset 7, 12
15331914882SAlex Richardson	mvn	const_m1, #0
15431914882SAlex Richardson	lsl	r2, tmp1, #29
15531914882SAlex Richardson	cbz	r2, L(loop_aligned8)
15631914882SAlex Richardson
15731914882SAlex RichardsonL(not_aligned):
15831914882SAlex Richardson	eor	tmp1, src1, src2
15931914882SAlex Richardson	tst	tmp1, #7
16031914882SAlex Richardson	bne	L(misaligned8)
16131914882SAlex Richardson
16231914882SAlex Richardson	/* Deal with mutual misalignment by aligning downwards and then
16331914882SAlex Richardson	   masking off the unwanted loaded data to prevent a difference.  */
16431914882SAlex Richardson	and	tmp1, src1, #7
16531914882SAlex Richardson	bic	src1, src1, #7
16631914882SAlex Richardson	and	tmp2, tmp1, #3
16731914882SAlex Richardson	bic	src2, src2, #7
16831914882SAlex Richardson	lsl	tmp2, tmp2, #3	/* Bytes -> bits.  */
16931914882SAlex Richardson	ldrd	data1a, data1b, [src1], #16
17031914882SAlex Richardson	tst	tmp1, #4
17131914882SAlex Richardson	ldrd	data2a, data2b, [src2], #16
17231914882SAlex Richardson	/* In thumb code we can't use MVN with a register shift, but
17331914882SAlex Richardson	   we do have ORN.  */
17431914882SAlex Richardson	S2HI	tmp1, const_m1, tmp2
17531914882SAlex Richardson	orn	data1a, data1a, tmp1
17631914882SAlex Richardson	orn	data2a, data2a, tmp1
17731914882SAlex Richardson	beq	L(start_realigned8)
17831914882SAlex Richardson	orn	data1b, data1b, tmp1
17931914882SAlex Richardson	mov	data1a, const_m1
18031914882SAlex Richardson	orn	data2b, data2b, tmp1
18131914882SAlex Richardson	mov	data2a, const_m1
18231914882SAlex Richardson	b	L(start_realigned8)
18331914882SAlex Richardson
18431914882SAlex Richardson	/* Unwind the inner loop by a factor of 2, giving 16 bytes per
18531914882SAlex Richardson	   pass.  */
18631914882SAlex Richardson	.p2align 5,,12  /* Don't start in the tail bytes of a cache line.  */
18731914882SAlex Richardson	.p2align 2	/* Always word aligned.  */
18831914882SAlex RichardsonL(loop_aligned8):
18931914882SAlex Richardson	ldrd	data1a, data1b, [src1], #16
19031914882SAlex Richardson	ldrd	data2a, data2b, [src2], #16
19131914882SAlex RichardsonL(start_realigned8):
19231914882SAlex Richardson	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
19331914882SAlex Richardson	eor	syndrome_a, data1a, data2a
19431914882SAlex Richardson	sel	syndrome_a, syndrome_a, const_m1
19531914882SAlex Richardson	cbnz	syndrome_a, L(diff_in_a)
19631914882SAlex Richardson	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
19731914882SAlex Richardson	eor	syndrome_b, data1b, data2b
19831914882SAlex Richardson	sel	syndrome_b, syndrome_b, const_m1
19931914882SAlex Richardson	cbnz	syndrome_b, L(diff_in_b)
20031914882SAlex Richardson
20131914882SAlex Richardson	ldrd	data1a, data1b, [src1, #-8]
20231914882SAlex Richardson	ldrd	data2a, data2b, [src2, #-8]
20331914882SAlex Richardson	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
20431914882SAlex Richardson	eor	syndrome_a, data1a, data2a
20531914882SAlex Richardson	sel	syndrome_a, syndrome_a, const_m1
20631914882SAlex Richardson	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
20731914882SAlex Richardson	eor	syndrome_b, data1b, data2b
20831914882SAlex Richardson	sel	syndrome_b, syndrome_b, const_m1
20931914882SAlex Richardson	/* Can't use CBZ for backwards branch.  */
21031914882SAlex Richardson	orrs	syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
21131914882SAlex Richardson	beq	L(loop_aligned8)
21231914882SAlex Richardson
21331914882SAlex RichardsonL(diff_found):
21431914882SAlex Richardson	cbnz	syndrome_a, L(diff_in_a)
21531914882SAlex Richardson
21631914882SAlex RichardsonL(diff_in_b):
21731914882SAlex Richardson	strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
21831914882SAlex Richardson
21931914882SAlex RichardsonL(diff_in_a):
22031914882SAlex Richardson	.cfi_restore_state
22131914882SAlex Richardson	strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
22231914882SAlex Richardson
22331914882SAlex Richardson	.cfi_restore_state
22431914882SAlex RichardsonL(misaligned8):
22531914882SAlex Richardson	tst	tmp1, #3
22631914882SAlex Richardson	bne	L(misaligned4)
22731914882SAlex Richardson	ands	tmp1, src1, #3
22831914882SAlex Richardson	bne	L(mutual_align4)
22931914882SAlex Richardson
23031914882SAlex Richardson	/* Unrolled by a factor of 2, to reduce the number of post-increment
23131914882SAlex Richardson	   operations.  */
23231914882SAlex RichardsonL(loop_aligned4):
23331914882SAlex Richardson	ldr	data1, [src1], #8
23431914882SAlex Richardson	ldr	data2, [src2], #8
23531914882SAlex RichardsonL(start_realigned4):
23631914882SAlex Richardson	uadd8	syndrome, data1, const_m1	/* Only need GE bits.  */
23731914882SAlex Richardson	eor	syndrome, data1, data2
23831914882SAlex Richardson	sel	syndrome, syndrome, const_m1
23931914882SAlex Richardson	cbnz	syndrome, L(aligned4_done)
24031914882SAlex Richardson	ldr	data1, [src1, #-4]
24131914882SAlex Richardson	ldr	data2, [src2, #-4]
24231914882SAlex Richardson	uadd8	syndrome, data1, const_m1
24331914882SAlex Richardson	eor	syndrome, data1, data2
24431914882SAlex Richardson	sel	syndrome, syndrome, const_m1
24531914882SAlex Richardson	cmp	syndrome, #0
24631914882SAlex Richardson	beq	L(loop_aligned4)
24731914882SAlex Richardson
24831914882SAlex RichardsonL(aligned4_done):
24931914882SAlex Richardson	strcmp_epilogue_aligned syndrome, data1, data2, 0
25031914882SAlex Richardson
25131914882SAlex RichardsonL(mutual_align4):
25231914882SAlex Richardson	.cfi_restore_state
25331914882SAlex Richardson	/* Deal with mutual misalignment by aligning downwards and then
25431914882SAlex Richardson	   masking off the unwanted loaded data to prevent a difference.  */
25531914882SAlex Richardson	lsl	tmp1, tmp1, #3	/* Bytes -> bits.  */
25631914882SAlex Richardson	bic	src1, src1, #3
25731914882SAlex Richardson	ldr	data1, [src1], #8
25831914882SAlex Richardson	bic	src2, src2, #3
25931914882SAlex Richardson	ldr	data2, [src2], #8
26031914882SAlex Richardson
26131914882SAlex Richardson	/* In thumb code we can't use MVN with a register shift, but
26231914882SAlex Richardson	   we do have ORN.  */
26331914882SAlex Richardson	S2HI	tmp1, const_m1, tmp1
26431914882SAlex Richardson	orn	data1, data1, tmp1
26531914882SAlex Richardson	orn	data2, data2, tmp1
26631914882SAlex Richardson	b	L(start_realigned4)
26731914882SAlex Richardson
26831914882SAlex RichardsonL(misaligned4):
26931914882SAlex Richardson	ands	tmp1, src1, #3
27031914882SAlex Richardson	beq	L(src1_aligned)
27131914882SAlex Richardson	sub	src2, src2, tmp1
27231914882SAlex Richardson	bic	src1, src1, #3
27331914882SAlex Richardson	lsls	tmp1, tmp1, #31
27431914882SAlex Richardson	ldr	data1, [src1], #4
27531914882SAlex Richardson	beq	L(aligned_m2)
27631914882SAlex Richardson	bcs	L(aligned_m1)
27731914882SAlex Richardson
27831914882SAlex Richardson#if STRCMP_NO_PRECHECK == 1
27931914882SAlex Richardson	ldrb	data2, [src2, #1]
28031914882SAlex Richardson	uxtb	tmp1, data1, ror #BYTE1_OFFSET
28131914882SAlex Richardson	subs	tmp1, tmp1, data2
28231914882SAlex Richardson	bne	L(misaligned_exit)
28331914882SAlex Richardson	cbz	data2, L(misaligned_exit)
28431914882SAlex Richardson
28531914882SAlex RichardsonL(aligned_m2):
28631914882SAlex Richardson	ldrb	data2, [src2, #2]
28731914882SAlex Richardson	uxtb	tmp1, data1, ror #BYTE2_OFFSET
28831914882SAlex Richardson	subs	tmp1, tmp1, data2
28931914882SAlex Richardson	bne	L(misaligned_exit)
29031914882SAlex Richardson	cbz	data2, L(misaligned_exit)
29131914882SAlex Richardson
29231914882SAlex RichardsonL(aligned_m1):
29331914882SAlex Richardson	ldrb	data2, [src2, #3]
29431914882SAlex Richardson	uxtb	tmp1, data1, ror #BYTE3_OFFSET
29531914882SAlex Richardson	subs	tmp1, tmp1, data2
29631914882SAlex Richardson	bne	L(misaligned_exit)
29731914882SAlex Richardson	add	src2, src2, #4
29831914882SAlex Richardson	cbnz	data2, L(src1_aligned)
29931914882SAlex Richardson#else  /* STRCMP_NO_PRECHECK */
30031914882SAlex Richardson	/* If we've done the pre-check, then we don't need to check the
30131914882SAlex Richardson	   first byte again here.  */
30231914882SAlex Richardson	ldrb	data2, [src2, #2]
30331914882SAlex Richardson	uxtb	tmp1, data1, ror #BYTE2_OFFSET
30431914882SAlex Richardson	subs	tmp1, tmp1, data2
30531914882SAlex Richardson	bne	L(misaligned_exit)
30631914882SAlex Richardson	cbz	data2, L(misaligned_exit)
30731914882SAlex Richardson
30831914882SAlex RichardsonL(aligned_m2):
30931914882SAlex Richardson	ldrb	data2, [src2, #3]
31031914882SAlex Richardson	uxtb	tmp1, data1, ror #BYTE3_OFFSET
31131914882SAlex Richardson	subs	tmp1, tmp1, data2
31231914882SAlex Richardson	bne	L(misaligned_exit)
31331914882SAlex Richardson	cbnz	data2, L(aligned_m1)
31431914882SAlex Richardson#endif
31531914882SAlex Richardson
31631914882SAlex RichardsonL(misaligned_exit):
31731914882SAlex Richardson	.cfi_remember_state
31831914882SAlex Richardson	mov	result, tmp1
31931914882SAlex Richardson	ldr	r4, [sp], #16
32031914882SAlex Richardson	.cfi_restore 4
321*072a4ba8SAndrew Turner	.cfi_adjust_cfa_offset -16
322*072a4ba8SAndrew Turner	epilogue push_ip=HAVE_PAC_LEAF
32331914882SAlex Richardson
32431914882SAlex Richardson#if STRCMP_NO_PRECHECK == 0
325*072a4ba8SAndrew TurnerL(fastpath_exit):
326*072a4ba8SAndrew Turner	.cfi_restore_state
327*072a4ba8SAndrew Turner	.cfi_remember_state
328*072a4ba8SAndrew Turner	sub	r0, r2, r3
329*072a4ba8SAndrew Turner	epilogue push_ip=HAVE_PAC_LEAF
330*072a4ba8SAndrew Turner
33131914882SAlex RichardsonL(aligned_m1):
332*072a4ba8SAndrew Turner	.cfi_restore_state
333*072a4ba8SAndrew Turner	.cfi_remember_state
33431914882SAlex Richardson	add	src2, src2, #4
33531914882SAlex Richardson#endif
33631914882SAlex RichardsonL(src1_aligned):
33731914882SAlex Richardson	.cfi_restore_state
33831914882SAlex Richardson	/* src1 is word aligned, but src2 has no common alignment
33931914882SAlex Richardson	   with it.  */
34031914882SAlex Richardson	ldr	data1, [src1], #4
34131914882SAlex Richardson	lsls	tmp1, src2, #31		/* C=src2[1], Z=src2[0].  */
34231914882SAlex Richardson
34331914882SAlex Richardson	bic	src2, src2, #3
34431914882SAlex Richardson	ldr	data2, [src2], #4
34531914882SAlex Richardson	bhi	L(overlap1)		/* C=1, Z=0 => src2[1:0] = 0b11.  */
34631914882SAlex Richardson	bcs	L(overlap2)		/* C=1, Z=1 => src2[1:0] = 0b10.  */
34731914882SAlex Richardson
34831914882SAlex Richardson	/* (overlap3) C=0, Z=0 => src2[1:0] = 0b01.  */
34931914882SAlex RichardsonL(overlap3):
35031914882SAlex Richardson	bic	tmp1, data1, #MSB
35131914882SAlex Richardson	uadd8	syndrome, data1, const_m1
35231914882SAlex Richardson	eors	syndrome, tmp1, data2, S2LO #8
35331914882SAlex Richardson	sel	syndrome, syndrome, const_m1
35431914882SAlex Richardson	bne	4f
35531914882SAlex Richardson	cbnz	syndrome, 5f
35631914882SAlex Richardson	ldr	data2, [src2], #4
35731914882SAlex Richardson	eor	tmp1, tmp1, data1
35831914882SAlex Richardson	cmp	tmp1, data2, S2HI #24
35931914882SAlex Richardson	bne	6f
36031914882SAlex Richardson	ldr	data1, [src1], #4
36131914882SAlex Richardson	b	L(overlap3)
36231914882SAlex Richardson4:
36331914882SAlex Richardson	S2LO	data2, data2, #8
36431914882SAlex Richardson	b	L(strcmp_tail)
36531914882SAlex Richardson
36631914882SAlex Richardson5:
36731914882SAlex Richardson	bics	syndrome, syndrome, #MSB
36831914882SAlex Richardson	bne	L(strcmp_done_equal)
36931914882SAlex Richardson
37031914882SAlex Richardson	/* We can only get here if the MSB of data1 contains 0, so
37131914882SAlex Richardson	   fast-path the exit.  */
37231914882SAlex Richardson	ldrb	result, [src2]
37331914882SAlex Richardson	.cfi_remember_state
37431914882SAlex Richardson	ldrd	r4, r5, [sp], #16
37531914882SAlex Richardson	.cfi_restore 4
37631914882SAlex Richardson	.cfi_restore 5
37731914882SAlex Richardson	/* R6/7 Not used in this sequence.  */
37831914882SAlex Richardson	.cfi_restore 6
37931914882SAlex Richardson	.cfi_restore 7
380*072a4ba8SAndrew Turner	.cfi_adjust_cfa_offset -16
38131914882SAlex Richardson	neg	result, result
382*072a4ba8SAndrew Turner	epilogue push_ip=HAVE_PAC_LEAF
38331914882SAlex Richardson6:
38431914882SAlex Richardson	.cfi_restore_state
38531914882SAlex Richardson	S2LO	data1, data1, #24
38631914882SAlex Richardson	and	data2, data2, #LSB
38731914882SAlex Richardson	b	L(strcmp_tail)
38831914882SAlex Richardson
38931914882SAlex Richardson	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
39031914882SAlex RichardsonL(overlap2):
39131914882SAlex Richardson	and	tmp1, data1, const_m1, S2LO #16
39231914882SAlex Richardson	uadd8	syndrome, data1, const_m1
39331914882SAlex Richardson	eors	syndrome, tmp1, data2, S2LO #16
39431914882SAlex Richardson	sel	syndrome, syndrome, const_m1
39531914882SAlex Richardson	bne	4f
39631914882SAlex Richardson	cbnz	syndrome, 5f
39731914882SAlex Richardson	ldr	data2, [src2], #4
39831914882SAlex Richardson	eor	tmp1, tmp1, data1
39931914882SAlex Richardson	cmp	tmp1, data2, S2HI #16
40031914882SAlex Richardson	bne	6f
40131914882SAlex Richardson	ldr	data1, [src1], #4
40231914882SAlex Richardson	b	L(overlap2)
40331914882SAlex Richardson4:
40431914882SAlex Richardson	S2LO	data2, data2, #16
40531914882SAlex Richardson	b	L(strcmp_tail)
40631914882SAlex Richardson5:
40731914882SAlex Richardson	ands	syndrome, syndrome, const_m1, S2LO #16
40831914882SAlex Richardson	bne	L(strcmp_done_equal)
40931914882SAlex Richardson
41031914882SAlex Richardson	ldrh	data2, [src2]
41131914882SAlex Richardson	S2LO	data1, data1, #16
41231914882SAlex Richardson#ifdef __ARM_BIG_ENDIAN
41331914882SAlex Richardson	lsl	data2, data2, #16
41431914882SAlex Richardson#endif
41531914882SAlex Richardson	b	L(strcmp_tail)
41631914882SAlex Richardson
41731914882SAlex Richardson6:
41831914882SAlex Richardson	S2LO	data1, data1, #16
41931914882SAlex Richardson	and	data2, data2, const_m1, S2LO #16
42031914882SAlex Richardson	b	L(strcmp_tail)
42131914882SAlex Richardson
42231914882SAlex Richardson	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
42331914882SAlex RichardsonL(overlap1):
42431914882SAlex Richardson	and	tmp1, data1, #LSB
42531914882SAlex Richardson	uadd8	syndrome, data1, const_m1
42631914882SAlex Richardson	eors	syndrome, tmp1, data2, S2LO #24
42731914882SAlex Richardson	sel	syndrome, syndrome, const_m1
42831914882SAlex Richardson	bne	4f
42931914882SAlex Richardson	cbnz	syndrome, 5f
43031914882SAlex Richardson	ldr	data2, [src2], #4
43131914882SAlex Richardson	eor	tmp1, tmp1, data1
43231914882SAlex Richardson	cmp	tmp1, data2, S2HI #8
43331914882SAlex Richardson	bne	6f
43431914882SAlex Richardson	ldr	data1, [src1], #4
43531914882SAlex Richardson	b	L(overlap1)
43631914882SAlex Richardson4:
43731914882SAlex Richardson	S2LO	data2, data2, #24
43831914882SAlex Richardson	b	L(strcmp_tail)
43931914882SAlex Richardson5:
44031914882SAlex Richardson	tst	syndrome, #LSB
44131914882SAlex Richardson	bne	L(strcmp_done_equal)
44231914882SAlex Richardson	ldr	data2, [src2]
44331914882SAlex Richardson6:
44431914882SAlex Richardson	S2LO	data1, data1, #8
44531914882SAlex Richardson	bic	data2, data2, #MSB
44631914882SAlex Richardson	b	L(strcmp_tail)
44731914882SAlex Richardson
44831914882SAlex RichardsonL(strcmp_done_equal):
44931914882SAlex Richardson	mov	result, #0
45031914882SAlex Richardson	.cfi_remember_state
45131914882SAlex Richardson	ldrd	r4, r5, [sp], #16
45231914882SAlex Richardson	.cfi_restore 4
45331914882SAlex Richardson	.cfi_restore 5
45431914882SAlex Richardson	/* R6/7 not used in this sequence.  */
45531914882SAlex Richardson	.cfi_restore 6
45631914882SAlex Richardson	.cfi_restore 7
457*072a4ba8SAndrew Turner	.cfi_adjust_cfa_offset -16
458*072a4ba8SAndrew Turner	epilogue push_ip=HAVE_PAC_LEAF
45931914882SAlex Richardson
46031914882SAlex RichardsonL(strcmp_tail):
46131914882SAlex Richardson	.cfi_restore_state
46231914882SAlex Richardson#ifndef __ARM_BIG_ENDIAN
46331914882SAlex Richardson	rev	data1, data1
46431914882SAlex Richardson	rev	data2, data2
46531914882SAlex Richardson	/* Now everything looks big-endian...  */
46631914882SAlex Richardson#endif
46731914882SAlex Richardson	uadd8	tmp1, data1, const_m1
46831914882SAlex Richardson	eor	tmp1, data1, data2
46931914882SAlex Richardson	sel	syndrome, tmp1, const_m1
47031914882SAlex Richardson	clz	tmp1, syndrome
47131914882SAlex Richardson	lsl	data1, data1, tmp1
47231914882SAlex Richardson	lsl	data2, data2, tmp1
47331914882SAlex Richardson	lsr	result, data1, #24
47431914882SAlex Richardson	ldrd	r4, r5, [sp], #16
47531914882SAlex Richardson	.cfi_restore 4
47631914882SAlex Richardson	.cfi_restore 5
47731914882SAlex Richardson	/* R6/7 not used in this sequence.  */
47831914882SAlex Richardson	.cfi_restore 6
47931914882SAlex Richardson	.cfi_restore 7
480*072a4ba8SAndrew Turner	.cfi_adjust_cfa_offset -16
48131914882SAlex Richardson	sub	result, result, data2, lsr #24
482*072a4ba8SAndrew Turner	epilogue push_ip=HAVE_PAC_LEAF
48331914882SAlex Richardson
48431914882SAlex RichardsonEND (__strcmp_arm)
48531914882SAlex Richardson
48631914882SAlex Richardson#endif /* __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1  */
487