xref: /llvm-project/libc/AOR_v20.02/string/aarch64/strchr-mte.S (revision 0928368f623a0f885894f9c3ef1b740b060c0d9c)
1/*
2 * strchr - find a character in a string
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 */
8
9/* Assumptions:
10 *
11 * ARMv8-a, AArch64
12 * Neon Available.
13 */
14
15#include "../asmdefs.h"
16
17/* Arguments and results.  */
18#define srcin		x0
19#define chrin		w1
20
21#define result		x0
22
23#define src		x2
24#define	tmp1		x3
25#define wtmp2		w4
26#define tmp3		x5
27
28#define vrepchr		v0
29#define qdata		q1
30#define vdata		v1
31#define vhas_nul	v2
32#define vhas_chr	v3
33#define vrepmask_0	v4
34#define vrepmask_c	v5
35#define vend		v6
36
37#define L(l) .L ## l
38
39/* Core algorithm.
40
41   For each 16-byte chunk we calculate a 64-bit syndrome value, with
42   four bits per byte (LSB is always in bits 0 and 1, for both big
43   and little-endian systems).  For each tuple, bit 0 is set if
44   the relevant byte matched the requested character; bit 1 is set
45   if the relevant byte matched the NUL end of string (we trigger
46   off bit0 for the special case of looking for NUL) and bits 2 and 3
47   are not used.
48   Since the bits in the syndrome reflect exactly the order in which
49   things occur in the original string a count_trailing_zeros()
50   operation will identify exactly which byte is causing the termination,
51   and why. */
52
53/* Locals and temporaries. */
54
55ENTRY(__strchr_aarch64_mte)
56	/* Magic constant 0x10011001 to allow us to identify which lane
57	   matches the requested byte.  Magic constant 0x20022002 used
58	   similarly for NUL termination. */
59	mov	wtmp2, #0x1001
60	movk	wtmp2, #0x1001, lsl #16
61	dup	vrepchr.16b, chrin
62	bic	src, srcin, #15		/* Work with aligned 16-byte chunks. */
63	dup	vrepmask_c.4s, wtmp2
64	ands	tmp1, srcin, #15
65	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
66	b.eq	L(loop)
67
68	/* Input string is not 16-byte aligned.  Rather than forcing
69	   the padding bytes to a safe value, we calculate the syndrome
70	   for all the bytes, but then mask off those bits of the
71	   syndrome that are related to the padding.  */
72	ldr	qdata, [src], #16
73	cmeq	vhas_nul.16b, vdata.16b, #0
74	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
75	and	vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
76	and	vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
77	lsl	tmp1, tmp1, #2
78	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
79	mov	tmp3, #~0
80	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */
81	lsl	tmp1, tmp3, tmp1
82
83	mov	tmp3, vend.d[0]
84	ands	tmp1, tmp3, tmp1	/* Mask padding bits. */
85	b.ne	L(tail)
86
87L(loop):
88	ldr	qdata, [src], #32
89	cmeq	vhas_nul.16b, vdata.16b, #0
90	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
91	/* Use a fast check for the termination condition.  */
92	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
93	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */
94	mov	tmp1, vend.d[0]
95	cbnz	tmp1, L(end)
96
97	ldr	qdata, [src, #-16]
98	cmeq	vhas_nul.16b, vdata.16b, #0
99	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
100	/* Use a fast check for the termination condition.  */
101	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
102	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */
103	mov	tmp1, vend.d[0]
104	cbz	tmp1, L(loop)
105
106	/* Adjust src for next two subtractions. */
107	add	src, src, #16
108L(end):
109	/* Termination condition found.  Now need to establish exactly why
110	   we terminated.  */
111	and	vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
112	and	vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
113	sub	src, src, #16
114	orr	vend.16b, vhas_nul.16b, vhas_chr.16b
115	addp	vend.16b, vend.16b, vend.16b		/* 128->64 */
116
117	mov	tmp1, vend.d[0]
118L(tail):
119	/* Count the trailing zeros, by bit reversing...  */
120	rbit	tmp1, tmp1
121	/* Re-bias source.  */
122	sub	src, src, #16
123	clz	tmp1, tmp1	/* And counting the leading zeros.  */
124	/* Tmp1 is even if the target character was found first.  Otherwise
125	   we've found the end of string and we weren't looking for NUL.  */
126	tst	tmp1, #1
127	add	result, src, tmp1, lsr #2
128	csel	result, result, xzr, eq
129	ret
130
131END(__strchr_aarch64_mte)
132