1/* 2 * strcmp - compare two strings 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 */ 8 9/* Assumptions: 10 * 11 * ARMv8-a, AArch64 12 */ 13 14#include "../asmdefs.h" 15 16#define REP8_01 0x0101010101010101 17#define REP8_7f 0x7f7f7f7f7f7f7f7f 18#define REP8_80 0x8080808080808080 19 20/* Parameters and result. */ 21#define src1 x0 22#define src2 x1 23#define result x0 24 25/* Internal variables. */ 26#define data1 x2 27#define data1w w2 28#define data2 x3 29#define data2w w3 30#define has_nul x4 31#define diff x5 32#define syndrome x6 33#define tmp1 x7 34#define tmp2 x8 35#define tmp3 x9 36#define zeroones x10 37#define pos x11 38 39 /* Start of performance-critical section -- one 64B cache line. */ 40ENTRY (__strcmp_aarch64) 41 eor tmp1, src1, src2 42 mov zeroones, #REP8_01 43 tst tmp1, #7 44 b.ne L(misaligned8) 45 ands tmp1, src1, #7 46 b.ne L(mutual_align) 47 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 48 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 49 can be done in parallel across the entire word. */ 50L(loop_aligned): 51 ldr data1, [src1], #8 52 ldr data2, [src2], #8 53L(start_realigned): 54 sub tmp1, data1, zeroones 55 orr tmp2, data1, #REP8_7f 56 eor diff, data1, data2 /* Non-zero if differences found. */ 57 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 58 orr syndrome, diff, has_nul 59 cbz syndrome, L(loop_aligned) 60 /* End of performance-critical section -- one 64B cache line. */ 61 62L(end): 63#ifndef __AARCH64EB__ 64 rev syndrome, syndrome 65 rev data1, data1 66 /* The MS-non-zero bit of the syndrome marks either the first bit 67 that is different, or the top bit of the first zero byte. 68 Shifting left now will bring the critical information into the 69 top bits. */ 70 clz pos, syndrome 71 rev data2, data2 72 lsl data1, data1, pos 73 lsl data2, data2, pos 74 /* But we need to zero-extend (char is unsigned) the value and then 75 perform a signed 32-bit subtraction. */ 76 lsr data1, data1, #56 77 sub result, data1, data2, lsr #56 78 ret 79#else 80 /* For big-endian we cannot use the trick with the syndrome value 81 as carry-propagation can corrupt the upper bits if the trailing 82 bytes in the string contain 0x01. */ 83 /* However, if there is no NUL byte in the dword, we can generate 84 the result directly. We can't just subtract the bytes as the 85 MSB might be significant. */ 86 cbnz has_nul, 1f 87 cmp data1, data2 88 cset result, ne 89 cneg result, result, lo 90 ret 911: 92 /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 93 rev tmp3, data1 94 sub tmp1, tmp3, zeroones 95 orr tmp2, tmp3, #REP8_7f 96 bic has_nul, tmp1, tmp2 97 rev has_nul, has_nul 98 orr syndrome, diff, has_nul 99 clz pos, syndrome 100 /* The MS-non-zero bit of the syndrome marks either the first bit 101 that is different, or the top bit of the first zero byte. 102 Shifting left now will bring the critical information into the 103 top bits. */ 104 lsl data1, data1, pos 105 lsl data2, data2, pos 106 /* But we need to zero-extend (char is unsigned) the value and then 107 perform a signed 32-bit subtraction. */ 108 lsr data1, data1, #56 109 sub result, data1, data2, lsr #56 110 ret 111#endif 112 113L(mutual_align): 114 /* Sources are mutually aligned, but are not currently at an 115 alignment boundary. Round down the addresses and then mask off 116 the bytes that preceed the start point. */ 117 bic src1, src1, #7 118 bic src2, src2, #7 119 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 120 ldr data1, [src1], #8 121 neg tmp1, tmp1 /* Bits to alignment -64. */ 122 ldr data2, [src2], #8 123 mov tmp2, #~0 124#ifdef __AARCH64EB__ 125 /* Big-endian. Early bytes are at MSB. */ 126 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 127#else 128 /* Little-endian. Early bytes are at LSB. */ 129 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 130#endif 131 orr data1, data1, tmp2 132 orr data2, data2, tmp2 133 b L(start_realigned) 134 135L(misaligned8): 136 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 137 checking to make sure that we don't access beyond page boundary in 138 SRC2. */ 139 tst src1, #7 140 b.eq L(loop_misaligned) 141L(do_misaligned): 142 ldrb data1w, [src1], #1 143 ldrb data2w, [src2], #1 144 cmp data1w, #1 145 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 146 b.ne L(done) 147 tst src1, #7 148 b.ne L(do_misaligned) 149 150L(loop_misaligned): 151 /* Test if we are within the last dword of the end of a 4K page. If 152 yes then jump back to the misaligned loop to copy a byte at a time. */ 153 and tmp1, src2, #0xff8 154 eor tmp1, tmp1, #0xff8 155 cbz tmp1, L(do_misaligned) 156 ldr data1, [src1], #8 157 ldr data2, [src2], #8 158 159 sub tmp1, data1, zeroones 160 orr tmp2, data1, #REP8_7f 161 eor diff, data1, data2 /* Non-zero if differences found. */ 162 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 163 orr syndrome, diff, has_nul 164 cbz syndrome, L(loop_misaligned) 165 b L(end) 166 167L(done): 168 sub result, data1, data2 169 ret 170 171END (__strcmp_aarch64) 172