131914882SAlex Richardson/* 231914882SAlex Richardson * strcmp for ARMv7 331914882SAlex Richardson * 4*072a4ba8SAndrew Turner * Copyright (c) 2012-2022, Arm Limited. 5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson#if __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1 931914882SAlex Richardson 1031914882SAlex Richardson/* Implementation of strcmp for ARMv7 when DSP instructions are 1131914882SAlex Richardson available. Use ldrd to support wider loads, provided the data 1231914882SAlex Richardson is sufficiently aligned. Use saturating arithmetic to optimize 1331914882SAlex Richardson the compares. */ 1431914882SAlex Richardson 15*072a4ba8SAndrew Turner#include "asmdefs.h" 1631914882SAlex Richardson 1731914882SAlex Richardson/* Build Options: 1831914882SAlex Richardson STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first 1931914882SAlex Richardson byte in the string. If comparing completely random strings 2031914882SAlex Richardson the pre-check will save time, since there is a very high 2131914882SAlex Richardson probability of a mismatch in the first character: we save 2231914882SAlex Richardson significant overhead if this is the common case. However, 2331914882SAlex Richardson if strings are likely to be identical (eg because we're 2431914882SAlex Richardson verifying a hit in a hash table), then this check is largely 2531914882SAlex Richardson redundant. */ 2631914882SAlex Richardson 2731914882SAlex Richardson#define STRCMP_NO_PRECHECK 0 2831914882SAlex Richardson 29*072a4ba8SAndrew Turner/* Ensure the .cantunwind directive is prepended to .fnend. 30*072a4ba8SAndrew Turner Leaf functions cannot throw exceptions - EHABI only supports 31*072a4ba8SAndrew Turner synchronous exceptions. */ 32*072a4ba8SAndrew Turner#define IS_LEAF 33*072a4ba8SAndrew Turner 3431914882SAlex Richardson /* This version uses Thumb-2 code. */ 3531914882SAlex Richardson .thumb 3631914882SAlex Richardson .syntax unified 3731914882SAlex Richardson 3831914882SAlex Richardson#ifdef __ARM_BIG_ENDIAN 3931914882SAlex Richardson#define S2LO lsl 4031914882SAlex Richardson#define S2LOEQ lsleq 4131914882SAlex Richardson#define S2HI lsr 4231914882SAlex Richardson#define MSB 0x000000ff 4331914882SAlex Richardson#define LSB 0xff000000 4431914882SAlex Richardson#define BYTE0_OFFSET 24 4531914882SAlex Richardson#define BYTE1_OFFSET 16 4631914882SAlex Richardson#define BYTE2_OFFSET 8 4731914882SAlex Richardson#define BYTE3_OFFSET 0 4831914882SAlex Richardson#else /* not __ARM_BIG_ENDIAN */ 4931914882SAlex Richardson#define S2LO lsr 5031914882SAlex Richardson#define S2LOEQ lsreq 5131914882SAlex Richardson#define S2HI lsl 5231914882SAlex Richardson#define BYTE0_OFFSET 0 5331914882SAlex Richardson#define BYTE1_OFFSET 8 5431914882SAlex Richardson#define BYTE2_OFFSET 16 5531914882SAlex Richardson#define BYTE3_OFFSET 24 5631914882SAlex Richardson#define MSB 0xff000000 5731914882SAlex Richardson#define LSB 0x000000ff 5831914882SAlex Richardson#endif /* not __ARM_BIG_ENDIAN */ 5931914882SAlex Richardson 6031914882SAlex Richardson/* Parameters and result. */ 6131914882SAlex Richardson#define src1 r0 6231914882SAlex Richardson#define src2 r1 6331914882SAlex Richardson#define result r0 /* Overlaps src1. */ 6431914882SAlex Richardson 6531914882SAlex Richardson/* Internal variables. */ 6631914882SAlex Richardson#define tmp1 r4 6731914882SAlex Richardson#define tmp2 r5 6831914882SAlex Richardson#define const_m1 r12 6931914882SAlex Richardson 7031914882SAlex Richardson/* Additional internal variables for 64-bit aligned data. */ 7131914882SAlex Richardson#define data1a r2 7231914882SAlex Richardson#define data1b r3 7331914882SAlex Richardson#define data2a r6 7431914882SAlex Richardson#define data2b r7 7531914882SAlex Richardson#define syndrome_a tmp1 7631914882SAlex Richardson#define syndrome_b tmp2 7731914882SAlex Richardson 7831914882SAlex Richardson/* Additional internal variables for 32-bit aligned data. */ 7931914882SAlex Richardson#define data1 r2 8031914882SAlex Richardson#define data2 r3 8131914882SAlex Richardson#define syndrome tmp2 8231914882SAlex Richardson 8331914882SAlex Richardson 8431914882SAlex Richardson /* Macro to compute and return the result value for word-aligned 8531914882SAlex Richardson cases. */ 8631914882SAlex Richardson .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 8731914882SAlex Richardson#ifdef __ARM_BIG_ENDIAN 8831914882SAlex Richardson /* If data1 contains a zero byte, then syndrome will contain a 1 in 8931914882SAlex Richardson bit 7 of that byte. Otherwise, the highest set bit in the 9031914882SAlex Richardson syndrome will highlight the first different bit. It is therefore 9131914882SAlex Richardson sufficient to extract the eight bits starting with the syndrome 9231914882SAlex Richardson bit. */ 9331914882SAlex Richardson clz tmp1, \synd 9431914882SAlex Richardson lsl r1, \d2, tmp1 9531914882SAlex Richardson .if \restore_r6 9631914882SAlex Richardson ldrd r6, r7, [sp, #8] 9731914882SAlex Richardson .endif 9831914882SAlex Richardson .cfi_restore 6 9931914882SAlex Richardson .cfi_restore 7 10031914882SAlex Richardson lsl \d1, \d1, tmp1 10131914882SAlex Richardson .cfi_remember_state 10231914882SAlex Richardson lsr result, \d1, #24 10331914882SAlex Richardson ldrd r4, r5, [sp], #16 10431914882SAlex Richardson .cfi_restore 4 10531914882SAlex Richardson .cfi_restore 5 106*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 10731914882SAlex Richardson sub result, result, r1, lsr #24 108*072a4ba8SAndrew Turner epilogue push_ip=HAVE_PAC_LEAF 10931914882SAlex Richardson#else 11031914882SAlex Richardson /* To use the big-endian trick we'd have to reverse all three words. 11131914882SAlex Richardson that's slower than this approach. */ 11231914882SAlex Richardson rev \synd, \synd 11331914882SAlex Richardson clz tmp1, \synd 11431914882SAlex Richardson bic tmp1, tmp1, #7 11531914882SAlex Richardson lsr r1, \d2, tmp1 11631914882SAlex Richardson .cfi_remember_state 11731914882SAlex Richardson .if \restore_r6 11831914882SAlex Richardson ldrd r6, r7, [sp, #8] 11931914882SAlex Richardson .endif 12031914882SAlex Richardson .cfi_restore 6 12131914882SAlex Richardson .cfi_restore 7 12231914882SAlex Richardson lsr \d1, \d1, tmp1 12331914882SAlex Richardson and result, \d1, #255 12431914882SAlex Richardson and r1, r1, #255 12531914882SAlex Richardson ldrd r4, r5, [sp], #16 12631914882SAlex Richardson .cfi_restore 4 12731914882SAlex Richardson .cfi_restore 5 128*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 12931914882SAlex Richardson sub result, result, r1 13031914882SAlex Richardson 131*072a4ba8SAndrew Turner epilogue push_ip=HAVE_PAC_LEAF 13231914882SAlex Richardson#endif 13331914882SAlex Richardson .endm 13431914882SAlex Richardson 135*072a4ba8SAndrew TurnerENTRY(__strcmp_arm) 136*072a4ba8SAndrew Turner prologue push_ip=HAVE_PAC_LEAF 13731914882SAlex Richardson#if STRCMP_NO_PRECHECK == 0 13831914882SAlex Richardson ldrb r2, [src1] 13931914882SAlex Richardson ldrb r3, [src2] 14031914882SAlex Richardson cmp r2, #1 14131914882SAlex Richardson it cs 14231914882SAlex Richardson cmpcs r2, r3 14331914882SAlex Richardson bne L(fastpath_exit) 14431914882SAlex Richardson#endif 14531914882SAlex Richardson strd r4, r5, [sp, #-16]! 146*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset 16 147*072a4ba8SAndrew Turner .cfi_rel_offset 4, 0 148*072a4ba8SAndrew Turner .cfi_rel_offset 5, 4 14931914882SAlex Richardson orr tmp1, src1, src2 15031914882SAlex Richardson strd r6, r7, [sp, #8] 151*072a4ba8SAndrew Turner .cfi_rel_offset 6, 8 152*072a4ba8SAndrew Turner .cfi_rel_offset 7, 12 15331914882SAlex Richardson mvn const_m1, #0 15431914882SAlex Richardson lsl r2, tmp1, #29 15531914882SAlex Richardson cbz r2, L(loop_aligned8) 15631914882SAlex Richardson 15731914882SAlex RichardsonL(not_aligned): 15831914882SAlex Richardson eor tmp1, src1, src2 15931914882SAlex Richardson tst tmp1, #7 16031914882SAlex Richardson bne L(misaligned8) 16131914882SAlex Richardson 16231914882SAlex Richardson /* Deal with mutual misalignment by aligning downwards and then 16331914882SAlex Richardson masking off the unwanted loaded data to prevent a difference. */ 16431914882SAlex Richardson and tmp1, src1, #7 16531914882SAlex Richardson bic src1, src1, #7 16631914882SAlex Richardson and tmp2, tmp1, #3 16731914882SAlex Richardson bic src2, src2, #7 16831914882SAlex Richardson lsl tmp2, tmp2, #3 /* Bytes -> bits. */ 16931914882SAlex Richardson ldrd data1a, data1b, [src1], #16 17031914882SAlex Richardson tst tmp1, #4 17131914882SAlex Richardson ldrd data2a, data2b, [src2], #16 17231914882SAlex Richardson /* In thumb code we can't use MVN with a register shift, but 17331914882SAlex Richardson we do have ORN. */ 17431914882SAlex Richardson S2HI tmp1, const_m1, tmp2 17531914882SAlex Richardson orn data1a, data1a, tmp1 17631914882SAlex Richardson orn data2a, data2a, tmp1 17731914882SAlex Richardson beq L(start_realigned8) 17831914882SAlex Richardson orn data1b, data1b, tmp1 17931914882SAlex Richardson mov data1a, const_m1 18031914882SAlex Richardson orn data2b, data2b, tmp1 18131914882SAlex Richardson mov data2a, const_m1 18231914882SAlex Richardson b L(start_realigned8) 18331914882SAlex Richardson 18431914882SAlex Richardson /* Unwind the inner loop by a factor of 2, giving 16 bytes per 18531914882SAlex Richardson pass. */ 18631914882SAlex Richardson .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ 18731914882SAlex Richardson .p2align 2 /* Always word aligned. */ 18831914882SAlex RichardsonL(loop_aligned8): 18931914882SAlex Richardson ldrd data1a, data1b, [src1], #16 19031914882SAlex Richardson ldrd data2a, data2b, [src2], #16 19131914882SAlex RichardsonL(start_realigned8): 19231914882SAlex Richardson uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 19331914882SAlex Richardson eor syndrome_a, data1a, data2a 19431914882SAlex Richardson sel syndrome_a, syndrome_a, const_m1 19531914882SAlex Richardson cbnz syndrome_a, L(diff_in_a) 19631914882SAlex Richardson uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 19731914882SAlex Richardson eor syndrome_b, data1b, data2b 19831914882SAlex Richardson sel syndrome_b, syndrome_b, const_m1 19931914882SAlex Richardson cbnz syndrome_b, L(diff_in_b) 20031914882SAlex Richardson 20131914882SAlex Richardson ldrd data1a, data1b, [src1, #-8] 20231914882SAlex Richardson ldrd data2a, data2b, [src2, #-8] 20331914882SAlex Richardson uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 20431914882SAlex Richardson eor syndrome_a, data1a, data2a 20531914882SAlex Richardson sel syndrome_a, syndrome_a, const_m1 20631914882SAlex Richardson uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 20731914882SAlex Richardson eor syndrome_b, data1b, data2b 20831914882SAlex Richardson sel syndrome_b, syndrome_b, const_m1 20931914882SAlex Richardson /* Can't use CBZ for backwards branch. */ 21031914882SAlex Richardson orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 21131914882SAlex Richardson beq L(loop_aligned8) 21231914882SAlex Richardson 21331914882SAlex RichardsonL(diff_found): 21431914882SAlex Richardson cbnz syndrome_a, L(diff_in_a) 21531914882SAlex Richardson 21631914882SAlex RichardsonL(diff_in_b): 21731914882SAlex Richardson strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 21831914882SAlex Richardson 21931914882SAlex RichardsonL(diff_in_a): 22031914882SAlex Richardson .cfi_restore_state 22131914882SAlex Richardson strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 22231914882SAlex Richardson 22331914882SAlex Richardson .cfi_restore_state 22431914882SAlex RichardsonL(misaligned8): 22531914882SAlex Richardson tst tmp1, #3 22631914882SAlex Richardson bne L(misaligned4) 22731914882SAlex Richardson ands tmp1, src1, #3 22831914882SAlex Richardson bne L(mutual_align4) 22931914882SAlex Richardson 23031914882SAlex Richardson /* Unrolled by a factor of 2, to reduce the number of post-increment 23131914882SAlex Richardson operations. */ 23231914882SAlex RichardsonL(loop_aligned4): 23331914882SAlex Richardson ldr data1, [src1], #8 23431914882SAlex Richardson ldr data2, [src2], #8 23531914882SAlex RichardsonL(start_realigned4): 23631914882SAlex Richardson uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ 23731914882SAlex Richardson eor syndrome, data1, data2 23831914882SAlex Richardson sel syndrome, syndrome, const_m1 23931914882SAlex Richardson cbnz syndrome, L(aligned4_done) 24031914882SAlex Richardson ldr data1, [src1, #-4] 24131914882SAlex Richardson ldr data2, [src2, #-4] 24231914882SAlex Richardson uadd8 syndrome, data1, const_m1 24331914882SAlex Richardson eor syndrome, data1, data2 24431914882SAlex Richardson sel syndrome, syndrome, const_m1 24531914882SAlex Richardson cmp syndrome, #0 24631914882SAlex Richardson beq L(loop_aligned4) 24731914882SAlex Richardson 24831914882SAlex RichardsonL(aligned4_done): 24931914882SAlex Richardson strcmp_epilogue_aligned syndrome, data1, data2, 0 25031914882SAlex Richardson 25131914882SAlex RichardsonL(mutual_align4): 25231914882SAlex Richardson .cfi_restore_state 25331914882SAlex Richardson /* Deal with mutual misalignment by aligning downwards and then 25431914882SAlex Richardson masking off the unwanted loaded data to prevent a difference. */ 25531914882SAlex Richardson lsl tmp1, tmp1, #3 /* Bytes -> bits. */ 25631914882SAlex Richardson bic src1, src1, #3 25731914882SAlex Richardson ldr data1, [src1], #8 25831914882SAlex Richardson bic src2, src2, #3 25931914882SAlex Richardson ldr data2, [src2], #8 26031914882SAlex Richardson 26131914882SAlex Richardson /* In thumb code we can't use MVN with a register shift, but 26231914882SAlex Richardson we do have ORN. */ 26331914882SAlex Richardson S2HI tmp1, const_m1, tmp1 26431914882SAlex Richardson orn data1, data1, tmp1 26531914882SAlex Richardson orn data2, data2, tmp1 26631914882SAlex Richardson b L(start_realigned4) 26731914882SAlex Richardson 26831914882SAlex RichardsonL(misaligned4): 26931914882SAlex Richardson ands tmp1, src1, #3 27031914882SAlex Richardson beq L(src1_aligned) 27131914882SAlex Richardson sub src2, src2, tmp1 27231914882SAlex Richardson bic src1, src1, #3 27331914882SAlex Richardson lsls tmp1, tmp1, #31 27431914882SAlex Richardson ldr data1, [src1], #4 27531914882SAlex Richardson beq L(aligned_m2) 27631914882SAlex Richardson bcs L(aligned_m1) 27731914882SAlex Richardson 27831914882SAlex Richardson#if STRCMP_NO_PRECHECK == 1 27931914882SAlex Richardson ldrb data2, [src2, #1] 28031914882SAlex Richardson uxtb tmp1, data1, ror #BYTE1_OFFSET 28131914882SAlex Richardson subs tmp1, tmp1, data2 28231914882SAlex Richardson bne L(misaligned_exit) 28331914882SAlex Richardson cbz data2, L(misaligned_exit) 28431914882SAlex Richardson 28531914882SAlex RichardsonL(aligned_m2): 28631914882SAlex Richardson ldrb data2, [src2, #2] 28731914882SAlex Richardson uxtb tmp1, data1, ror #BYTE2_OFFSET 28831914882SAlex Richardson subs tmp1, tmp1, data2 28931914882SAlex Richardson bne L(misaligned_exit) 29031914882SAlex Richardson cbz data2, L(misaligned_exit) 29131914882SAlex Richardson 29231914882SAlex RichardsonL(aligned_m1): 29331914882SAlex Richardson ldrb data2, [src2, #3] 29431914882SAlex Richardson uxtb tmp1, data1, ror #BYTE3_OFFSET 29531914882SAlex Richardson subs tmp1, tmp1, data2 29631914882SAlex Richardson bne L(misaligned_exit) 29731914882SAlex Richardson add src2, src2, #4 29831914882SAlex Richardson cbnz data2, L(src1_aligned) 29931914882SAlex Richardson#else /* STRCMP_NO_PRECHECK */ 30031914882SAlex Richardson /* If we've done the pre-check, then we don't need to check the 30131914882SAlex Richardson first byte again here. */ 30231914882SAlex Richardson ldrb data2, [src2, #2] 30331914882SAlex Richardson uxtb tmp1, data1, ror #BYTE2_OFFSET 30431914882SAlex Richardson subs tmp1, tmp1, data2 30531914882SAlex Richardson bne L(misaligned_exit) 30631914882SAlex Richardson cbz data2, L(misaligned_exit) 30731914882SAlex Richardson 30831914882SAlex RichardsonL(aligned_m2): 30931914882SAlex Richardson ldrb data2, [src2, #3] 31031914882SAlex Richardson uxtb tmp1, data1, ror #BYTE3_OFFSET 31131914882SAlex Richardson subs tmp1, tmp1, data2 31231914882SAlex Richardson bne L(misaligned_exit) 31331914882SAlex Richardson cbnz data2, L(aligned_m1) 31431914882SAlex Richardson#endif 31531914882SAlex Richardson 31631914882SAlex RichardsonL(misaligned_exit): 31731914882SAlex Richardson .cfi_remember_state 31831914882SAlex Richardson mov result, tmp1 31931914882SAlex Richardson ldr r4, [sp], #16 32031914882SAlex Richardson .cfi_restore 4 321*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 322*072a4ba8SAndrew Turner epilogue push_ip=HAVE_PAC_LEAF 32331914882SAlex Richardson 32431914882SAlex Richardson#if STRCMP_NO_PRECHECK == 0 325*072a4ba8SAndrew TurnerL(fastpath_exit): 326*072a4ba8SAndrew Turner .cfi_restore_state 327*072a4ba8SAndrew Turner .cfi_remember_state 328*072a4ba8SAndrew Turner sub r0, r2, r3 329*072a4ba8SAndrew Turner epilogue push_ip=HAVE_PAC_LEAF 330*072a4ba8SAndrew Turner 33131914882SAlex RichardsonL(aligned_m1): 332*072a4ba8SAndrew Turner .cfi_restore_state 333*072a4ba8SAndrew Turner .cfi_remember_state 33431914882SAlex Richardson add src2, src2, #4 33531914882SAlex Richardson#endif 33631914882SAlex RichardsonL(src1_aligned): 33731914882SAlex Richardson .cfi_restore_state 33831914882SAlex Richardson /* src1 is word aligned, but src2 has no common alignment 33931914882SAlex Richardson with it. */ 34031914882SAlex Richardson ldr data1, [src1], #4 34131914882SAlex Richardson lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ 34231914882SAlex Richardson 34331914882SAlex Richardson bic src2, src2, #3 34431914882SAlex Richardson ldr data2, [src2], #4 34531914882SAlex Richardson bhi L(overlap1) /* C=1, Z=0 => src2[1:0] = 0b11. */ 34631914882SAlex Richardson bcs L(overlap2) /* C=1, Z=1 => src2[1:0] = 0b10. */ 34731914882SAlex Richardson 34831914882SAlex Richardson /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ 34931914882SAlex RichardsonL(overlap3): 35031914882SAlex Richardson bic tmp1, data1, #MSB 35131914882SAlex Richardson uadd8 syndrome, data1, const_m1 35231914882SAlex Richardson eors syndrome, tmp1, data2, S2LO #8 35331914882SAlex Richardson sel syndrome, syndrome, const_m1 35431914882SAlex Richardson bne 4f 35531914882SAlex Richardson cbnz syndrome, 5f 35631914882SAlex Richardson ldr data2, [src2], #4 35731914882SAlex Richardson eor tmp1, tmp1, data1 35831914882SAlex Richardson cmp tmp1, data2, S2HI #24 35931914882SAlex Richardson bne 6f 36031914882SAlex Richardson ldr data1, [src1], #4 36131914882SAlex Richardson b L(overlap3) 36231914882SAlex Richardson4: 36331914882SAlex Richardson S2LO data2, data2, #8 36431914882SAlex Richardson b L(strcmp_tail) 36531914882SAlex Richardson 36631914882SAlex Richardson5: 36731914882SAlex Richardson bics syndrome, syndrome, #MSB 36831914882SAlex Richardson bne L(strcmp_done_equal) 36931914882SAlex Richardson 37031914882SAlex Richardson /* We can only get here if the MSB of data1 contains 0, so 37131914882SAlex Richardson fast-path the exit. */ 37231914882SAlex Richardson ldrb result, [src2] 37331914882SAlex Richardson .cfi_remember_state 37431914882SAlex Richardson ldrd r4, r5, [sp], #16 37531914882SAlex Richardson .cfi_restore 4 37631914882SAlex Richardson .cfi_restore 5 37731914882SAlex Richardson /* R6/7 Not used in this sequence. */ 37831914882SAlex Richardson .cfi_restore 6 37931914882SAlex Richardson .cfi_restore 7 380*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 38131914882SAlex Richardson neg result, result 382*072a4ba8SAndrew Turner epilogue push_ip=HAVE_PAC_LEAF 38331914882SAlex Richardson6: 38431914882SAlex Richardson .cfi_restore_state 38531914882SAlex Richardson S2LO data1, data1, #24 38631914882SAlex Richardson and data2, data2, #LSB 38731914882SAlex Richardson b L(strcmp_tail) 38831914882SAlex Richardson 38931914882SAlex Richardson .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 39031914882SAlex RichardsonL(overlap2): 39131914882SAlex Richardson and tmp1, data1, const_m1, S2LO #16 39231914882SAlex Richardson uadd8 syndrome, data1, const_m1 39331914882SAlex Richardson eors syndrome, tmp1, data2, S2LO #16 39431914882SAlex Richardson sel syndrome, syndrome, const_m1 39531914882SAlex Richardson bne 4f 39631914882SAlex Richardson cbnz syndrome, 5f 39731914882SAlex Richardson ldr data2, [src2], #4 39831914882SAlex Richardson eor tmp1, tmp1, data1 39931914882SAlex Richardson cmp tmp1, data2, S2HI #16 40031914882SAlex Richardson bne 6f 40131914882SAlex Richardson ldr data1, [src1], #4 40231914882SAlex Richardson b L(overlap2) 40331914882SAlex Richardson4: 40431914882SAlex Richardson S2LO data2, data2, #16 40531914882SAlex Richardson b L(strcmp_tail) 40631914882SAlex Richardson5: 40731914882SAlex Richardson ands syndrome, syndrome, const_m1, S2LO #16 40831914882SAlex Richardson bne L(strcmp_done_equal) 40931914882SAlex Richardson 41031914882SAlex Richardson ldrh data2, [src2] 41131914882SAlex Richardson S2LO data1, data1, #16 41231914882SAlex Richardson#ifdef __ARM_BIG_ENDIAN 41331914882SAlex Richardson lsl data2, data2, #16 41431914882SAlex Richardson#endif 41531914882SAlex Richardson b L(strcmp_tail) 41631914882SAlex Richardson 41731914882SAlex Richardson6: 41831914882SAlex Richardson S2LO data1, data1, #16 41931914882SAlex Richardson and data2, data2, const_m1, S2LO #16 42031914882SAlex Richardson b L(strcmp_tail) 42131914882SAlex Richardson 42231914882SAlex Richardson .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 42331914882SAlex RichardsonL(overlap1): 42431914882SAlex Richardson and tmp1, data1, #LSB 42531914882SAlex Richardson uadd8 syndrome, data1, const_m1 42631914882SAlex Richardson eors syndrome, tmp1, data2, S2LO #24 42731914882SAlex Richardson sel syndrome, syndrome, const_m1 42831914882SAlex Richardson bne 4f 42931914882SAlex Richardson cbnz syndrome, 5f 43031914882SAlex Richardson ldr data2, [src2], #4 43131914882SAlex Richardson eor tmp1, tmp1, data1 43231914882SAlex Richardson cmp tmp1, data2, S2HI #8 43331914882SAlex Richardson bne 6f 43431914882SAlex Richardson ldr data1, [src1], #4 43531914882SAlex Richardson b L(overlap1) 43631914882SAlex Richardson4: 43731914882SAlex Richardson S2LO data2, data2, #24 43831914882SAlex Richardson b L(strcmp_tail) 43931914882SAlex Richardson5: 44031914882SAlex Richardson tst syndrome, #LSB 44131914882SAlex Richardson bne L(strcmp_done_equal) 44231914882SAlex Richardson ldr data2, [src2] 44331914882SAlex Richardson6: 44431914882SAlex Richardson S2LO data1, data1, #8 44531914882SAlex Richardson bic data2, data2, #MSB 44631914882SAlex Richardson b L(strcmp_tail) 44731914882SAlex Richardson 44831914882SAlex RichardsonL(strcmp_done_equal): 44931914882SAlex Richardson mov result, #0 45031914882SAlex Richardson .cfi_remember_state 45131914882SAlex Richardson ldrd r4, r5, [sp], #16 45231914882SAlex Richardson .cfi_restore 4 45331914882SAlex Richardson .cfi_restore 5 45431914882SAlex Richardson /* R6/7 not used in this sequence. */ 45531914882SAlex Richardson .cfi_restore 6 45631914882SAlex Richardson .cfi_restore 7 457*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 458*072a4ba8SAndrew Turner epilogue push_ip=HAVE_PAC_LEAF 45931914882SAlex Richardson 46031914882SAlex RichardsonL(strcmp_tail): 46131914882SAlex Richardson .cfi_restore_state 46231914882SAlex Richardson#ifndef __ARM_BIG_ENDIAN 46331914882SAlex Richardson rev data1, data1 46431914882SAlex Richardson rev data2, data2 46531914882SAlex Richardson /* Now everything looks big-endian... */ 46631914882SAlex Richardson#endif 46731914882SAlex Richardson uadd8 tmp1, data1, const_m1 46831914882SAlex Richardson eor tmp1, data1, data2 46931914882SAlex Richardson sel syndrome, tmp1, const_m1 47031914882SAlex Richardson clz tmp1, syndrome 47131914882SAlex Richardson lsl data1, data1, tmp1 47231914882SAlex Richardson lsl data2, data2, tmp1 47331914882SAlex Richardson lsr result, data1, #24 47431914882SAlex Richardson ldrd r4, r5, [sp], #16 47531914882SAlex Richardson .cfi_restore 4 47631914882SAlex Richardson .cfi_restore 5 47731914882SAlex Richardson /* R6/7 not used in this sequence. */ 47831914882SAlex Richardson .cfi_restore 6 47931914882SAlex Richardson .cfi_restore 7 480*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 48131914882SAlex Richardson sub result, result, data2, lsr #24 482*072a4ba8SAndrew Turner epilogue push_ip=HAVE_PAC_LEAF 48331914882SAlex Richardson 48431914882SAlex RichardsonEND (__strcmp_arm) 48531914882SAlex Richardson 48631914882SAlex Richardson#endif /* __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1 */ 487