1*09a53ad8SAndrew Turner/* 2*09a53ad8SAndrew Turner * Copyright (c) 2012-2014 ARM Ltd 3*09a53ad8SAndrew Turner * All rights reserved. 4*09a53ad8SAndrew Turner * 5*09a53ad8SAndrew Turner * Redistribution and use in source and binary forms, with or without 6*09a53ad8SAndrew Turner * modification, are permitted provided that the following conditions 7*09a53ad8SAndrew Turner * are met: 8*09a53ad8SAndrew Turner * 1. Redistributions of source code must retain the above copyright 9*09a53ad8SAndrew Turner * notice, this list of conditions and the following disclaimer. 10*09a53ad8SAndrew Turner * 2. Redistributions in binary form must reproduce the above copyright 11*09a53ad8SAndrew Turner * notice, this list of conditions and the following disclaimer in the 12*09a53ad8SAndrew Turner * documentation and/or other materials provided with the distribution. 13*09a53ad8SAndrew Turner * 3. The name of the company may not be used to endorse or promote 14*09a53ad8SAndrew Turner * products derived from this software without specific prior written 15*09a53ad8SAndrew Turner * permission. 16*09a53ad8SAndrew Turner * 17*09a53ad8SAndrew Turner * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18*09a53ad8SAndrew Turner * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19*09a53ad8SAndrew Turner * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20*09a53ad8SAndrew Turner * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21*09a53ad8SAndrew Turner * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22*09a53ad8SAndrew Turner * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23*09a53ad8SAndrew Turner * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24*09a53ad8SAndrew Turner * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25*09a53ad8SAndrew Turner * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*09a53ad8SAndrew Turner * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*09a53ad8SAndrew Turner */ 28*09a53ad8SAndrew Turner 29*09a53ad8SAndrew Turner/* Implementation of strcmp for ARMv7 when DSP instructions are 30*09a53ad8SAndrew Turner available. Use ldrd to support wider loads, provided the data 31*09a53ad8SAndrew Turner is sufficiently aligned. Use saturating arithmetic to optimize 32*09a53ad8SAndrew Turner the compares. */ 33*09a53ad8SAndrew Turner 34*09a53ad8SAndrew Turner/* Build Options: 35*09a53ad8SAndrew Turner STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first 36*09a53ad8SAndrew Turner byte in the string. If comparing completely random strings 37*09a53ad8SAndrew Turner the pre-check will save time, since there is a very high 38*09a53ad8SAndrew Turner probability of a mismatch in the first character: we save 39*09a53ad8SAndrew Turner significant overhead if this is the common case. However, 40*09a53ad8SAndrew Turner if strings are likely to be identical (eg because we're 41*09a53ad8SAndrew Turner verifying a hit in a hash table), then this check is largely 42*09a53ad8SAndrew Turner redundant. */ 43*09a53ad8SAndrew Turner 44*09a53ad8SAndrew Turner#define STRCMP_NO_PRECHECK 0 45*09a53ad8SAndrew Turner 46*09a53ad8SAndrew Turner /* This version uses Thumb-2 code. */ 47*09a53ad8SAndrew Turner .thumb 48*09a53ad8SAndrew Turner .syntax unified 49*09a53ad8SAndrew Turner 50*09a53ad8SAndrew Turner#ifdef __ARM_BIG_ENDIAN 51*09a53ad8SAndrew Turner#define S2LO lsl 52*09a53ad8SAndrew Turner#define S2LOEQ lsleq 53*09a53ad8SAndrew Turner#define S2HI lsr 54*09a53ad8SAndrew Turner#define MSB 0x000000ff 55*09a53ad8SAndrew Turner#define LSB 0xff000000 56*09a53ad8SAndrew Turner#define BYTE0_OFFSET 24 57*09a53ad8SAndrew Turner#define BYTE1_OFFSET 16 58*09a53ad8SAndrew Turner#define BYTE2_OFFSET 8 59*09a53ad8SAndrew Turner#define BYTE3_OFFSET 0 60*09a53ad8SAndrew Turner#else /* not __ARM_BIG_ENDIAN */ 61*09a53ad8SAndrew Turner#define S2LO lsr 62*09a53ad8SAndrew Turner#define S2LOEQ lsreq 63*09a53ad8SAndrew Turner#define S2HI lsl 64*09a53ad8SAndrew Turner#define BYTE0_OFFSET 0 65*09a53ad8SAndrew Turner#define BYTE1_OFFSET 8 66*09a53ad8SAndrew Turner#define BYTE2_OFFSET 16 67*09a53ad8SAndrew Turner#define BYTE3_OFFSET 24 68*09a53ad8SAndrew Turner#define MSB 0xff000000 69*09a53ad8SAndrew Turner#define LSB 0x000000ff 70*09a53ad8SAndrew Turner#endif /* not __ARM_BIG_ENDIAN */ 71*09a53ad8SAndrew Turner 72*09a53ad8SAndrew Turner .macro def_fn f p2align=0 73*09a53ad8SAndrew Turner .text 74*09a53ad8SAndrew Turner .p2align \p2align 75*09a53ad8SAndrew Turner .global \f 76*09a53ad8SAndrew Turner .type \f, %function 77*09a53ad8SAndrew Turner\f: 78*09a53ad8SAndrew Turner .endm 79*09a53ad8SAndrew Turner 80*09a53ad8SAndrew Turner/* Parameters and result. */ 81*09a53ad8SAndrew Turner#define src1 r0 82*09a53ad8SAndrew Turner#define src2 r1 83*09a53ad8SAndrew Turner#define result r0 /* Overlaps src1. */ 84*09a53ad8SAndrew Turner 85*09a53ad8SAndrew Turner/* Internal variables. */ 86*09a53ad8SAndrew Turner#define tmp1 r4 87*09a53ad8SAndrew Turner#define tmp2 r5 88*09a53ad8SAndrew Turner#define const_m1 r12 89*09a53ad8SAndrew Turner 90*09a53ad8SAndrew Turner/* Additional internal variables for 64-bit aligned data. */ 91*09a53ad8SAndrew Turner#define data1a r2 92*09a53ad8SAndrew Turner#define data1b r3 93*09a53ad8SAndrew Turner#define data2a r6 94*09a53ad8SAndrew Turner#define data2b r7 95*09a53ad8SAndrew Turner#define syndrome_a tmp1 96*09a53ad8SAndrew Turner#define syndrome_b tmp2 97*09a53ad8SAndrew Turner 98*09a53ad8SAndrew Turner/* Additional internal variables for 32-bit aligned data. */ 99*09a53ad8SAndrew Turner#define data1 r2 100*09a53ad8SAndrew Turner#define data2 r3 101*09a53ad8SAndrew Turner#define syndrome tmp2 102*09a53ad8SAndrew Turner 103*09a53ad8SAndrew Turner 104*09a53ad8SAndrew Turner /* Macro to compute and return the result value for word-aligned 105*09a53ad8SAndrew Turner cases. */ 106*09a53ad8SAndrew Turner .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 107*09a53ad8SAndrew Turner#ifdef __ARM_BIG_ENDIAN 108*09a53ad8SAndrew Turner /* If data1 contains a zero byte, then syndrome will contain a 1 in 109*09a53ad8SAndrew Turner bit 7 of that byte. Otherwise, the highest set bit in the 110*09a53ad8SAndrew Turner syndrome will highlight the first different bit. It is therefore 111*09a53ad8SAndrew Turner sufficient to extract the eight bits starting with the syndrome 112*09a53ad8SAndrew Turner bit. */ 113*09a53ad8SAndrew Turner clz tmp1, \synd 114*09a53ad8SAndrew Turner lsl r1, \d2, tmp1 115*09a53ad8SAndrew Turner .if \restore_r6 116*09a53ad8SAndrew Turner ldrd r6, r7, [sp, #8] 117*09a53ad8SAndrew Turner .endif 118*09a53ad8SAndrew Turner .cfi_restore 6 119*09a53ad8SAndrew Turner .cfi_restore 7 120*09a53ad8SAndrew Turner lsl \d1, \d1, tmp1 121*09a53ad8SAndrew Turner .cfi_remember_state 122*09a53ad8SAndrew Turner lsr result, \d1, #24 123*09a53ad8SAndrew Turner ldrd r4, r5, [sp], #16 124*09a53ad8SAndrew Turner .cfi_restore 4 125*09a53ad8SAndrew Turner .cfi_restore 5 126*09a53ad8SAndrew Turner sub result, result, r1, lsr #24 127*09a53ad8SAndrew Turner bx lr 128*09a53ad8SAndrew Turner#else 129*09a53ad8SAndrew Turner /* To use the big-endian trick we'd have to reverse all three words. 130*09a53ad8SAndrew Turner that's slower than this approach. */ 131*09a53ad8SAndrew Turner rev \synd, \synd 132*09a53ad8SAndrew Turner clz tmp1, \synd 133*09a53ad8SAndrew Turner bic tmp1, tmp1, #7 134*09a53ad8SAndrew Turner lsr r1, \d2, tmp1 135*09a53ad8SAndrew Turner .cfi_remember_state 136*09a53ad8SAndrew Turner .if \restore_r6 137*09a53ad8SAndrew Turner ldrd r6, r7, [sp, #8] 138*09a53ad8SAndrew Turner .endif 139*09a53ad8SAndrew Turner .cfi_restore 6 140*09a53ad8SAndrew Turner .cfi_restore 7 141*09a53ad8SAndrew Turner lsr \d1, \d1, tmp1 142*09a53ad8SAndrew Turner and result, \d1, #255 143*09a53ad8SAndrew Turner and r1, r1, #255 144*09a53ad8SAndrew Turner ldrd r4, r5, [sp], #16 145*09a53ad8SAndrew Turner .cfi_restore 4 146*09a53ad8SAndrew Turner .cfi_restore 5 147*09a53ad8SAndrew Turner sub result, result, r1 148*09a53ad8SAndrew Turner 149*09a53ad8SAndrew Turner bx lr 150*09a53ad8SAndrew Turner#endif 151*09a53ad8SAndrew Turner .endm 152*09a53ad8SAndrew Turner 153*09a53ad8SAndrew Turner .text 154*09a53ad8SAndrew Turner .p2align 5 155*09a53ad8SAndrew Turner.Lstrcmp_start_addr: 156*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 0 157*09a53ad8SAndrew Turner.Lfastpath_exit: 158*09a53ad8SAndrew Turner sub r0, r2, r3 159*09a53ad8SAndrew Turner bx lr 160*09a53ad8SAndrew Turner nop 161*09a53ad8SAndrew Turner#endif 162*09a53ad8SAndrew Turnerdef_fn strcmp 163*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 0 164*09a53ad8SAndrew Turner ldrb r2, [src1] 165*09a53ad8SAndrew Turner ldrb r3, [src2] 166*09a53ad8SAndrew Turner cmp r2, #1 167*09a53ad8SAndrew Turner it cs 168*09a53ad8SAndrew Turner cmpcs r2, r3 169*09a53ad8SAndrew Turner bne .Lfastpath_exit 170*09a53ad8SAndrew Turner#endif 171*09a53ad8SAndrew Turner .cfi_startproc 172*09a53ad8SAndrew Turner strd r4, r5, [sp, #-16]! 173*09a53ad8SAndrew Turner .cfi_def_cfa_offset 16 174*09a53ad8SAndrew Turner .cfi_offset 4, -16 175*09a53ad8SAndrew Turner .cfi_offset 5, -12 176*09a53ad8SAndrew Turner orr tmp1, src1, src2 177*09a53ad8SAndrew Turner strd r6, r7, [sp, #8] 178*09a53ad8SAndrew Turner .cfi_offset 6, -8 179*09a53ad8SAndrew Turner .cfi_offset 7, -4 180*09a53ad8SAndrew Turner mvn const_m1, #0 181*09a53ad8SAndrew Turner lsl r2, tmp1, #29 182*09a53ad8SAndrew Turner cbz r2, .Lloop_aligned8 183*09a53ad8SAndrew Turner 184*09a53ad8SAndrew Turner.Lnot_aligned: 185*09a53ad8SAndrew Turner eor tmp1, src1, src2 186*09a53ad8SAndrew Turner tst tmp1, #7 187*09a53ad8SAndrew Turner bne .Lmisaligned8 188*09a53ad8SAndrew Turner 189*09a53ad8SAndrew Turner /* Deal with mutual misalignment by aligning downwards and then 190*09a53ad8SAndrew Turner masking off the unwanted loaded data to prevent a difference. */ 191*09a53ad8SAndrew Turner and tmp1, src1, #7 192*09a53ad8SAndrew Turner bic src1, src1, #7 193*09a53ad8SAndrew Turner and tmp2, tmp1, #3 194*09a53ad8SAndrew Turner bic src2, src2, #7 195*09a53ad8SAndrew Turner lsl tmp2, tmp2, #3 /* Bytes -> bits. */ 196*09a53ad8SAndrew Turner ldrd data1a, data1b, [src1], #16 197*09a53ad8SAndrew Turner tst tmp1, #4 198*09a53ad8SAndrew Turner ldrd data2a, data2b, [src2], #16 199*09a53ad8SAndrew Turner /* In thumb code we can't use MVN with a register shift, but 200*09a53ad8SAndrew Turner we do have ORN. */ 201*09a53ad8SAndrew Turner S2HI tmp1, const_m1, tmp2 202*09a53ad8SAndrew Turner orn data1a, data1a, tmp1 203*09a53ad8SAndrew Turner orn data2a, data2a, tmp1 204*09a53ad8SAndrew Turner beq .Lstart_realigned8 205*09a53ad8SAndrew Turner orn data1b, data1b, tmp1 206*09a53ad8SAndrew Turner mov data1a, const_m1 207*09a53ad8SAndrew Turner orn data2b, data2b, tmp1 208*09a53ad8SAndrew Turner mov data2a, const_m1 209*09a53ad8SAndrew Turner b .Lstart_realigned8 210*09a53ad8SAndrew Turner 211*09a53ad8SAndrew Turner /* Unwind the inner loop by a factor of 2, giving 16 bytes per 212*09a53ad8SAndrew Turner pass. */ 213*09a53ad8SAndrew Turner .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ 214*09a53ad8SAndrew Turner .p2align 2 /* Always word aligned. */ 215*09a53ad8SAndrew Turner.Lloop_aligned8: 216*09a53ad8SAndrew Turner ldrd data1a, data1b, [src1], #16 217*09a53ad8SAndrew Turner ldrd data2a, data2b, [src2], #16 218*09a53ad8SAndrew Turner.Lstart_realigned8: 219*09a53ad8SAndrew Turner uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 220*09a53ad8SAndrew Turner eor syndrome_a, data1a, data2a 221*09a53ad8SAndrew Turner sel syndrome_a, syndrome_a, const_m1 222*09a53ad8SAndrew Turner cbnz syndrome_a, .Ldiff_in_a 223*09a53ad8SAndrew Turner uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 224*09a53ad8SAndrew Turner eor syndrome_b, data1b, data2b 225*09a53ad8SAndrew Turner sel syndrome_b, syndrome_b, const_m1 226*09a53ad8SAndrew Turner cbnz syndrome_b, .Ldiff_in_b 227*09a53ad8SAndrew Turner 228*09a53ad8SAndrew Turner ldrd data1a, data1b, [src1, #-8] 229*09a53ad8SAndrew Turner ldrd data2a, data2b, [src2, #-8] 230*09a53ad8SAndrew Turner uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 231*09a53ad8SAndrew Turner eor syndrome_a, data1a, data2a 232*09a53ad8SAndrew Turner sel syndrome_a, syndrome_a, const_m1 233*09a53ad8SAndrew Turner uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 234*09a53ad8SAndrew Turner eor syndrome_b, data1b, data2b 235*09a53ad8SAndrew Turner sel syndrome_b, syndrome_b, const_m1 236*09a53ad8SAndrew Turner /* Can't use CBZ for backwards branch. */ 237*09a53ad8SAndrew Turner orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 238*09a53ad8SAndrew Turner beq .Lloop_aligned8 239*09a53ad8SAndrew Turner 240*09a53ad8SAndrew Turner.Ldiff_found: 241*09a53ad8SAndrew Turner cbnz syndrome_a, .Ldiff_in_a 242*09a53ad8SAndrew Turner 243*09a53ad8SAndrew Turner.Ldiff_in_b: 244*09a53ad8SAndrew Turner strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 245*09a53ad8SAndrew Turner 246*09a53ad8SAndrew Turner.Ldiff_in_a: 247*09a53ad8SAndrew Turner .cfi_restore_state 248*09a53ad8SAndrew Turner strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 249*09a53ad8SAndrew Turner 250*09a53ad8SAndrew Turner .cfi_restore_state 251*09a53ad8SAndrew Turner.Lmisaligned8: 252*09a53ad8SAndrew Turner tst tmp1, #3 253*09a53ad8SAndrew Turner bne .Lmisaligned4 254*09a53ad8SAndrew Turner ands tmp1, src1, #3 255*09a53ad8SAndrew Turner bne .Lmutual_align4 256*09a53ad8SAndrew Turner 257*09a53ad8SAndrew Turner /* Unrolled by a factor of 2, to reduce the number of post-increment 258*09a53ad8SAndrew Turner operations. */ 259*09a53ad8SAndrew Turner.Lloop_aligned4: 260*09a53ad8SAndrew Turner ldr data1, [src1], #8 261*09a53ad8SAndrew Turner ldr data2, [src2], #8 262*09a53ad8SAndrew Turner.Lstart_realigned4: 263*09a53ad8SAndrew Turner uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ 264*09a53ad8SAndrew Turner eor syndrome, data1, data2 265*09a53ad8SAndrew Turner sel syndrome, syndrome, const_m1 266*09a53ad8SAndrew Turner cbnz syndrome, .Laligned4_done 267*09a53ad8SAndrew Turner ldr data1, [src1, #-4] 268*09a53ad8SAndrew Turner ldr data2, [src2, #-4] 269*09a53ad8SAndrew Turner uadd8 syndrome, data1, const_m1 270*09a53ad8SAndrew Turner eor syndrome, data1, data2 271*09a53ad8SAndrew Turner sel syndrome, syndrome, const_m1 272*09a53ad8SAndrew Turner cmp syndrome, #0 273*09a53ad8SAndrew Turner beq .Lloop_aligned4 274*09a53ad8SAndrew Turner 275*09a53ad8SAndrew Turner.Laligned4_done: 276*09a53ad8SAndrew Turner strcmp_epilogue_aligned syndrome, data1, data2, 0 277*09a53ad8SAndrew Turner 278*09a53ad8SAndrew Turner.Lmutual_align4: 279*09a53ad8SAndrew Turner .cfi_restore_state 280*09a53ad8SAndrew Turner /* Deal with mutual misalignment by aligning downwards and then 281*09a53ad8SAndrew Turner masking off the unwanted loaded data to prevent a difference. */ 282*09a53ad8SAndrew Turner lsl tmp1, tmp1, #3 /* Bytes -> bits. */ 283*09a53ad8SAndrew Turner bic src1, src1, #3 284*09a53ad8SAndrew Turner ldr data1, [src1], #8 285*09a53ad8SAndrew Turner bic src2, src2, #3 286*09a53ad8SAndrew Turner ldr data2, [src2], #8 287*09a53ad8SAndrew Turner 288*09a53ad8SAndrew Turner /* In thumb code we can't use MVN with a register shift, but 289*09a53ad8SAndrew Turner we do have ORN. */ 290*09a53ad8SAndrew Turner S2HI tmp1, const_m1, tmp1 291*09a53ad8SAndrew Turner orn data1, data1, tmp1 292*09a53ad8SAndrew Turner orn data2, data2, tmp1 293*09a53ad8SAndrew Turner b .Lstart_realigned4 294*09a53ad8SAndrew Turner 295*09a53ad8SAndrew Turner.Lmisaligned4: 296*09a53ad8SAndrew Turner ands tmp1, src1, #3 297*09a53ad8SAndrew Turner beq .Lsrc1_aligned 298*09a53ad8SAndrew Turner sub src2, src2, tmp1 299*09a53ad8SAndrew Turner bic src1, src1, #3 300*09a53ad8SAndrew Turner lsls tmp1, tmp1, #31 301*09a53ad8SAndrew Turner ldr data1, [src1], #4 302*09a53ad8SAndrew Turner beq .Laligned_m2 303*09a53ad8SAndrew Turner bcs .Laligned_m1 304*09a53ad8SAndrew Turner 305*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 1 306*09a53ad8SAndrew Turner ldrb data2, [src2, #1] 307*09a53ad8SAndrew Turner uxtb tmp1, data1, ror #BYTE1_OFFSET 308*09a53ad8SAndrew Turner subs tmp1, tmp1, data2 309*09a53ad8SAndrew Turner bne .Lmisaligned_exit 310*09a53ad8SAndrew Turner cbz data2, .Lmisaligned_exit 311*09a53ad8SAndrew Turner 312*09a53ad8SAndrew Turner.Laligned_m2: 313*09a53ad8SAndrew Turner ldrb data2, [src2, #2] 314*09a53ad8SAndrew Turner uxtb tmp1, data1, ror #BYTE2_OFFSET 315*09a53ad8SAndrew Turner subs tmp1, tmp1, data2 316*09a53ad8SAndrew Turner bne .Lmisaligned_exit 317*09a53ad8SAndrew Turner cbz data2, .Lmisaligned_exit 318*09a53ad8SAndrew Turner 319*09a53ad8SAndrew Turner.Laligned_m1: 320*09a53ad8SAndrew Turner ldrb data2, [src2, #3] 321*09a53ad8SAndrew Turner uxtb tmp1, data1, ror #BYTE3_OFFSET 322*09a53ad8SAndrew Turner subs tmp1, tmp1, data2 323*09a53ad8SAndrew Turner bne .Lmisaligned_exit 324*09a53ad8SAndrew Turner add src2, src2, #4 325*09a53ad8SAndrew Turner cbnz data2, .Lsrc1_aligned 326*09a53ad8SAndrew Turner#else /* STRCMP_NO_PRECHECK */ 327*09a53ad8SAndrew Turner /* If we've done the pre-check, then we don't need to check the 328*09a53ad8SAndrew Turner first byte again here. */ 329*09a53ad8SAndrew Turner ldrb data2, [src2, #2] 330*09a53ad8SAndrew Turner uxtb tmp1, data1, ror #BYTE2_OFFSET 331*09a53ad8SAndrew Turner subs tmp1, tmp1, data2 332*09a53ad8SAndrew Turner bne .Lmisaligned_exit 333*09a53ad8SAndrew Turner cbz data2, .Lmisaligned_exit 334*09a53ad8SAndrew Turner 335*09a53ad8SAndrew Turner.Laligned_m2: 336*09a53ad8SAndrew Turner ldrb data2, [src2, #3] 337*09a53ad8SAndrew Turner uxtb tmp1, data1, ror #BYTE3_OFFSET 338*09a53ad8SAndrew Turner subs tmp1, tmp1, data2 339*09a53ad8SAndrew Turner bne .Lmisaligned_exit 340*09a53ad8SAndrew Turner cbnz data2, .Laligned_m1 341*09a53ad8SAndrew Turner#endif 342*09a53ad8SAndrew Turner 343*09a53ad8SAndrew Turner.Lmisaligned_exit: 344*09a53ad8SAndrew Turner .cfi_remember_state 345*09a53ad8SAndrew Turner mov result, tmp1 346*09a53ad8SAndrew Turner ldr r4, [sp], #16 347*09a53ad8SAndrew Turner .cfi_restore 4 348*09a53ad8SAndrew Turner bx lr 349*09a53ad8SAndrew Turner 350*09a53ad8SAndrew Turner#if STRCMP_NO_PRECHECK == 0 351*09a53ad8SAndrew Turner.Laligned_m1: 352*09a53ad8SAndrew Turner add src2, src2, #4 353*09a53ad8SAndrew Turner#endif 354*09a53ad8SAndrew Turner.Lsrc1_aligned: 355*09a53ad8SAndrew Turner .cfi_restore_state 356*09a53ad8SAndrew Turner /* src1 is word aligned, but src2 has no common alignment 357*09a53ad8SAndrew Turner with it. */ 358*09a53ad8SAndrew Turner ldr data1, [src1], #4 359*09a53ad8SAndrew Turner lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ 360*09a53ad8SAndrew Turner 361*09a53ad8SAndrew Turner bic src2, src2, #3 362*09a53ad8SAndrew Turner ldr data2, [src2], #4 363*09a53ad8SAndrew Turner bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ 364*09a53ad8SAndrew Turner bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ 365*09a53ad8SAndrew Turner 366*09a53ad8SAndrew Turner /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ 367*09a53ad8SAndrew Turner.Loverlap3: 368*09a53ad8SAndrew Turner bic tmp1, data1, #MSB 369*09a53ad8SAndrew Turner uadd8 syndrome, data1, const_m1 370*09a53ad8SAndrew Turner eors syndrome, tmp1, data2, S2LO #8 371*09a53ad8SAndrew Turner sel syndrome, syndrome, const_m1 372*09a53ad8SAndrew Turner bne 4f 373*09a53ad8SAndrew Turner cbnz syndrome, 5f 374*09a53ad8SAndrew Turner ldr data2, [src2], #4 375*09a53ad8SAndrew Turner eor tmp1, tmp1, data1 376*09a53ad8SAndrew Turner cmp tmp1, data2, S2HI #24 377*09a53ad8SAndrew Turner bne 6f 378*09a53ad8SAndrew Turner ldr data1, [src1], #4 379*09a53ad8SAndrew Turner b .Loverlap3 380*09a53ad8SAndrew Turner4: 381*09a53ad8SAndrew Turner S2LO data2, data2, #8 382*09a53ad8SAndrew Turner b .Lstrcmp_tail 383*09a53ad8SAndrew Turner 384*09a53ad8SAndrew Turner5: 385*09a53ad8SAndrew Turner bics syndrome, syndrome, #MSB 386*09a53ad8SAndrew Turner bne .Lstrcmp_done_equal 387*09a53ad8SAndrew Turner 388*09a53ad8SAndrew Turner /* We can only get here if the MSB of data1 contains 0, so 389*09a53ad8SAndrew Turner fast-path the exit. */ 390*09a53ad8SAndrew Turner ldrb result, [src2] 391*09a53ad8SAndrew Turner .cfi_remember_state 392*09a53ad8SAndrew Turner ldrd r4, r5, [sp], #16 393*09a53ad8SAndrew Turner .cfi_restore 4 394*09a53ad8SAndrew Turner .cfi_restore 5 395*09a53ad8SAndrew Turner /* R6/7 Not used in this sequence. */ 396*09a53ad8SAndrew Turner .cfi_restore 6 397*09a53ad8SAndrew Turner .cfi_restore 7 398*09a53ad8SAndrew Turner neg result, result 399*09a53ad8SAndrew Turner bx lr 400*09a53ad8SAndrew Turner 401*09a53ad8SAndrew Turner6: 402*09a53ad8SAndrew Turner .cfi_restore_state 403*09a53ad8SAndrew Turner S2LO data1, data1, #24 404*09a53ad8SAndrew Turner and data2, data2, #LSB 405*09a53ad8SAndrew Turner b .Lstrcmp_tail 406*09a53ad8SAndrew Turner 407*09a53ad8SAndrew Turner .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 408*09a53ad8SAndrew Turner.Loverlap2: 409*09a53ad8SAndrew Turner and tmp1, data1, const_m1, S2LO #16 410*09a53ad8SAndrew Turner uadd8 syndrome, data1, const_m1 411*09a53ad8SAndrew Turner eors syndrome, tmp1, data2, S2LO #16 412*09a53ad8SAndrew Turner sel syndrome, syndrome, const_m1 413*09a53ad8SAndrew Turner bne 4f 414*09a53ad8SAndrew Turner cbnz syndrome, 5f 415*09a53ad8SAndrew Turner ldr data2, [src2], #4 416*09a53ad8SAndrew Turner eor tmp1, tmp1, data1 417*09a53ad8SAndrew Turner cmp tmp1, data2, S2HI #16 418*09a53ad8SAndrew Turner bne 6f 419*09a53ad8SAndrew Turner ldr data1, [src1], #4 420*09a53ad8SAndrew Turner b .Loverlap2 421*09a53ad8SAndrew Turner4: 422*09a53ad8SAndrew Turner S2LO data2, data2, #16 423*09a53ad8SAndrew Turner b .Lstrcmp_tail 424*09a53ad8SAndrew Turner5: 425*09a53ad8SAndrew Turner ands syndrome, syndrome, const_m1, S2LO #16 426*09a53ad8SAndrew Turner bne .Lstrcmp_done_equal 427*09a53ad8SAndrew Turner 428*09a53ad8SAndrew Turner ldrh data2, [src2] 429*09a53ad8SAndrew Turner S2LO data1, data1, #16 430*09a53ad8SAndrew Turner#ifdef __ARM_BIG_ENDIAN 431*09a53ad8SAndrew Turner lsl data2, data2, #16 432*09a53ad8SAndrew Turner#endif 433*09a53ad8SAndrew Turner b .Lstrcmp_tail 434*09a53ad8SAndrew Turner 435*09a53ad8SAndrew Turner6: 436*09a53ad8SAndrew Turner S2LO data1, data1, #16 437*09a53ad8SAndrew Turner and data2, data2, const_m1, S2LO #16 438*09a53ad8SAndrew Turner b .Lstrcmp_tail 439*09a53ad8SAndrew Turner 440*09a53ad8SAndrew Turner .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 441*09a53ad8SAndrew Turner.Loverlap1: 442*09a53ad8SAndrew Turner and tmp1, data1, #LSB 443*09a53ad8SAndrew Turner uadd8 syndrome, data1, const_m1 444*09a53ad8SAndrew Turner eors syndrome, tmp1, data2, S2LO #24 445*09a53ad8SAndrew Turner sel syndrome, syndrome, const_m1 446*09a53ad8SAndrew Turner bne 4f 447*09a53ad8SAndrew Turner cbnz syndrome, 5f 448*09a53ad8SAndrew Turner ldr data2, [src2], #4 449*09a53ad8SAndrew Turner eor tmp1, tmp1, data1 450*09a53ad8SAndrew Turner cmp tmp1, data2, S2HI #8 451*09a53ad8SAndrew Turner bne 6f 452*09a53ad8SAndrew Turner ldr data1, [src1], #4 453*09a53ad8SAndrew Turner b .Loverlap1 454*09a53ad8SAndrew Turner4: 455*09a53ad8SAndrew Turner S2LO data2, data2, #24 456*09a53ad8SAndrew Turner b .Lstrcmp_tail 457*09a53ad8SAndrew Turner5: 458*09a53ad8SAndrew Turner tst syndrome, #LSB 459*09a53ad8SAndrew Turner bne .Lstrcmp_done_equal 460*09a53ad8SAndrew Turner ldr data2, [src2] 461*09a53ad8SAndrew Turner6: 462*09a53ad8SAndrew Turner S2LO data1, data1, #8 463*09a53ad8SAndrew Turner bic data2, data2, #MSB 464*09a53ad8SAndrew Turner b .Lstrcmp_tail 465*09a53ad8SAndrew Turner 466*09a53ad8SAndrew Turner.Lstrcmp_done_equal: 467*09a53ad8SAndrew Turner mov result, #0 468*09a53ad8SAndrew Turner .cfi_remember_state 469*09a53ad8SAndrew Turner ldrd r4, r5, [sp], #16 470*09a53ad8SAndrew Turner .cfi_restore 4 471*09a53ad8SAndrew Turner .cfi_restore 5 472*09a53ad8SAndrew Turner /* R6/7 not used in this sequence. */ 473*09a53ad8SAndrew Turner .cfi_restore 6 474*09a53ad8SAndrew Turner .cfi_restore 7 475*09a53ad8SAndrew Turner bx lr 476*09a53ad8SAndrew Turner 477*09a53ad8SAndrew Turner.Lstrcmp_tail: 478*09a53ad8SAndrew Turner .cfi_restore_state 479*09a53ad8SAndrew Turner#ifndef __ARM_BIG_ENDIAN 480*09a53ad8SAndrew Turner rev data1, data1 481*09a53ad8SAndrew Turner rev data2, data2 482*09a53ad8SAndrew Turner /* Now everything looks big-endian... */ 483*09a53ad8SAndrew Turner#endif 484*09a53ad8SAndrew Turner uadd8 tmp1, data1, const_m1 485*09a53ad8SAndrew Turner eor tmp1, data1, data2 486*09a53ad8SAndrew Turner sel syndrome, tmp1, const_m1 487*09a53ad8SAndrew Turner clz tmp1, syndrome 488*09a53ad8SAndrew Turner lsl data1, data1, tmp1 489*09a53ad8SAndrew Turner lsl data2, data2, tmp1 490*09a53ad8SAndrew Turner lsr result, data1, #24 491*09a53ad8SAndrew Turner ldrd r4, r5, [sp], #16 492*09a53ad8SAndrew Turner .cfi_restore 4 493*09a53ad8SAndrew Turner .cfi_restore 5 494*09a53ad8SAndrew Turner /* R6/7 not used in this sequence. */ 495*09a53ad8SAndrew Turner .cfi_restore 6 496*09a53ad8SAndrew Turner .cfi_restore 7 497*09a53ad8SAndrew Turner sub result, result, data2, lsr #24 498*09a53ad8SAndrew Turner bx lr 499*09a53ad8SAndrew Turner .cfi_endproc 500*09a53ad8SAndrew Turner .size strcmp, . - .Lstrcmp_start_addr 501