1/* 2 * strchr - find a character in a string 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 */ 8 9/* Assumptions: 10 * 11 * ARMv8-a, AArch64 12 * Neon Available. 13 */ 14 15#include "../asmdefs.h" 16 17/* Arguments and results. */ 18#define srcin x0 19#define chrin w1 20 21#define result x0 22 23#define src x2 24#define tmp1 x3 25#define wtmp2 w4 26#define tmp3 x5 27 28#define vrepchr v0 29#define qdata q1 30#define vdata v1 31#define vhas_nul v2 32#define vhas_chr v3 33#define vrepmask_0 v4 34#define vrepmask_c v5 35#define vend v6 36 37#define L(l) .L ## l 38 39/* Core algorithm. 40 41 For each 16-byte chunk we calculate a 64-bit syndrome value, with 42 four bits per byte (LSB is always in bits 0 and 1, for both big 43 and little-endian systems). For each tuple, bit 0 is set if 44 the relevant byte matched the requested character; bit 1 is set 45 if the relevant byte matched the NUL end of string (we trigger 46 off bit0 for the special case of looking for NUL) and bits 2 and 3 47 are not used. 48 Since the bits in the syndrome reflect exactly the order in which 49 things occur in the original string a count_trailing_zeros() 50 operation will identify exactly which byte is causing the termination, 51 and why. */ 52 53/* Locals and temporaries. */ 54 55ENTRY(__strchr_aarch64_mte) 56 /* Magic constant 0x10011001 to allow us to identify which lane 57 matches the requested byte. Magic constant 0x20022002 used 58 similarly for NUL termination. */ 59 mov wtmp2, #0x1001 60 movk wtmp2, #0x1001, lsl #16 61 dup vrepchr.16b, chrin 62 bic src, srcin, #15 /* Work with aligned 16-byte chunks. */ 63 dup vrepmask_c.4s, wtmp2 64 ands tmp1, srcin, #15 65 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 66 b.eq L(loop) 67 68 /* Input string is not 16-byte aligned. Rather than forcing 69 the padding bytes to a safe value, we calculate the syndrome 70 for all the bytes, but then mask off those bits of the 71 syndrome that are related to the padding. */ 72 ldr qdata, [src], #16 73 cmeq vhas_nul.16b, vdata.16b, #0 74 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 75 and vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b 76 and vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b 77 lsl tmp1, tmp1, #2 78 orr vend.16b, vhas_nul.16b, vhas_chr.16b 79 mov tmp3, #~0 80 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 81 lsl tmp1, tmp3, tmp1 82 83 mov tmp3, vend.d[0] 84 ands tmp1, tmp3, tmp1 /* Mask padding bits. */ 85 b.ne L(tail) 86 87L(loop): 88 ldr qdata, [src], #32 89 cmeq vhas_nul.16b, vdata.16b, #0 90 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 91 /* Use a fast check for the termination condition. */ 92 orr vend.16b, vhas_nul.16b, vhas_chr.16b 93 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 94 mov tmp1, vend.d[0] 95 cbnz tmp1, L(end) 96 97 ldr qdata, [src, #-16] 98 cmeq vhas_nul.16b, vdata.16b, #0 99 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 100 /* Use a fast check for the termination condition. */ 101 orr vend.16b, vhas_nul.16b, vhas_chr.16b 102 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 103 mov tmp1, vend.d[0] 104 cbz tmp1, L(loop) 105 106 /* Adjust src for next two subtractions. */ 107 add src, src, #16 108L(end): 109 /* Termination condition found. Now need to establish exactly why 110 we terminated. */ 111 and vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b 112 and vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b 113 sub src, src, #16 114 orr vend.16b, vhas_nul.16b, vhas_chr.16b 115 addp vend.16b, vend.16b, vend.16b /* 128->64 */ 116 117 mov tmp1, vend.d[0] 118L(tail): 119 /* Count the trailing zeros, by bit reversing... */ 120 rbit tmp1, tmp1 121 /* Re-bias source. */ 122 sub src, src, #16 123 clz tmp1, tmp1 /* And counting the leading zeros. */ 124 /* Tmp1 is even if the target character was found first. Otherwise 125 we've found the end of string and we weren't looking for NUL. */ 126 tst tmp1, #1 127 add result, src, tmp1, lsr #2 128 csel result, result, xzr, eq 129 ret 130 131END(__strchr_aarch64_mte) 132