xref: /freebsd-src/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
131914882SAlex Richardson/*
231914882SAlex Richardson * strlen - calculate the length of a string
331914882SAlex Richardson *
4*072a4ba8SAndrew Turner * Copyright (c) 2010-2022, Arm Limited.
5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson */
731914882SAlex Richardson
831914882SAlex Richardson#if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
931914882SAlex Richardson
1031914882SAlex Richardson/*
1131914882SAlex Richardson   Assumes:
1231914882SAlex Richardson   ARMv6T2, AArch32
1331914882SAlex Richardson
1431914882SAlex Richardson */
1531914882SAlex Richardson
16*072a4ba8SAndrew Turner#include "asmdefs.h"
1731914882SAlex Richardson
1831914882SAlex Richardson#ifdef __ARMEB__
1931914882SAlex Richardson#define S2LO		lsl
2031914882SAlex Richardson#define S2HI		lsr
2131914882SAlex Richardson#else
2231914882SAlex Richardson#define S2LO		lsr
2331914882SAlex Richardson#define S2HI		lsl
2431914882SAlex Richardson#endif
2531914882SAlex Richardson
26*072a4ba8SAndrew Turner/* Ensure the .cantunwind directive is prepended to .fnend.
27*072a4ba8SAndrew Turner   Leaf functions cannot throw exceptions - EHABI only supports
28*072a4ba8SAndrew Turner   synchronous exceptions.  */
29*072a4ba8SAndrew Turner#define IS_LEAF
30*072a4ba8SAndrew Turner
3131914882SAlex Richardson	/* This code requires Thumb.  */
3231914882SAlex Richardson	.thumb
3331914882SAlex Richardson	.syntax unified
3431914882SAlex Richardson
3531914882SAlex Richardson/* Parameters and result.  */
3631914882SAlex Richardson#define srcin		r0
3731914882SAlex Richardson#define result		r0
3831914882SAlex Richardson
3931914882SAlex Richardson/* Internal variables.  */
4031914882SAlex Richardson#define src		r1
4131914882SAlex Richardson#define data1a		r2
4231914882SAlex Richardson#define data1b		r3
4331914882SAlex Richardson#define const_m1	r12
4431914882SAlex Richardson#define const_0		r4
4531914882SAlex Richardson#define tmp1		r4		/* Overlaps const_0  */
4631914882SAlex Richardson#define tmp2		r5
4731914882SAlex Richardson
4831914882SAlex RichardsonENTRY (__strlen_armv6t2)
49*072a4ba8SAndrew Turner	prologue 4 5 push_ip=HAVE_PAC_LEAF
5031914882SAlex Richardson	pld	[srcin, #0]
5131914882SAlex Richardson	bic	src, srcin, #7
5231914882SAlex Richardson	mvn	const_m1, #0
5331914882SAlex Richardson	ands	tmp1, srcin, #7		/* (8 - bytes) to alignment.  */
5431914882SAlex Richardson	pld	[src, #32]
5531914882SAlex Richardson	bne.w	L(misaligned8)
5631914882SAlex Richardson	mov	const_0, #0
5731914882SAlex Richardson	mov	result, #-8
5831914882SAlex RichardsonL(loop_aligned):
5931914882SAlex Richardson	/* Bytes 0-7.  */
6031914882SAlex Richardson	ldrd	data1a, data1b, [src]
6131914882SAlex Richardson	pld	[src, #64]
6231914882SAlex Richardson	add	result, result, #8
6331914882SAlex RichardsonL(start_realigned):
6431914882SAlex Richardson	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
6531914882SAlex Richardson	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
6631914882SAlex Richardson	uadd8	data1b, data1b, const_m1
6731914882SAlex Richardson	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
6831914882SAlex Richardson	cbnz	data1b, L(null_found)
6931914882SAlex Richardson
7031914882SAlex Richardson	/* Bytes 8-15.  */
7131914882SAlex Richardson	ldrd	data1a, data1b, [src, #8]
7231914882SAlex Richardson	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
7331914882SAlex Richardson	add	result, result, #8
7431914882SAlex Richardson	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
7531914882SAlex Richardson	uadd8	data1b, data1b, const_m1
7631914882SAlex Richardson	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
7731914882SAlex Richardson	cbnz	data1b, L(null_found)
7831914882SAlex Richardson
7931914882SAlex Richardson	/* Bytes 16-23.  */
8031914882SAlex Richardson	ldrd	data1a, data1b, [src, #16]
8131914882SAlex Richardson	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
8231914882SAlex Richardson	add	result, result, #8
8331914882SAlex Richardson	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
8431914882SAlex Richardson	uadd8	data1b, data1b, const_m1
8531914882SAlex Richardson	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
8631914882SAlex Richardson	cbnz	data1b, L(null_found)
8731914882SAlex Richardson
8831914882SAlex Richardson	/* Bytes 24-31.  */
8931914882SAlex Richardson	ldrd	data1a, data1b, [src, #24]
9031914882SAlex Richardson	add	src, src, #32
9131914882SAlex Richardson	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
9231914882SAlex Richardson	add	result, result, #8
9331914882SAlex Richardson	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
9431914882SAlex Richardson	uadd8	data1b, data1b, const_m1
9531914882SAlex Richardson	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
9631914882SAlex Richardson	cmp	data1b, #0
9731914882SAlex Richardson	beq	L(loop_aligned)
9831914882SAlex Richardson
9931914882SAlex RichardsonL(null_found):
100*072a4ba8SAndrew Turner	.cfi_remember_state
10131914882SAlex Richardson	cmp	data1a, #0
10231914882SAlex Richardson	itt	eq
10331914882SAlex Richardson	addeq	result, result, #4
10431914882SAlex Richardson	moveq	data1a, data1b
10531914882SAlex Richardson#ifndef __ARMEB__
10631914882SAlex Richardson	rev	data1a, data1a
10731914882SAlex Richardson#endif
10831914882SAlex Richardson	clz	data1a, data1a
10931914882SAlex Richardson	add	result, result, data1a, lsr #3	/* Bits -> Bytes.  */
110*072a4ba8SAndrew Turner	epilogue 4 5 push_ip=HAVE_PAC_LEAF
11131914882SAlex Richardson
11231914882SAlex RichardsonL(misaligned8):
113*072a4ba8SAndrew Turner	.cfi_restore_state
11431914882SAlex Richardson	ldrd	data1a, data1b, [src]
11531914882SAlex Richardson	and	tmp2, tmp1, #3
11631914882SAlex Richardson	rsb	result, tmp1, #0
11731914882SAlex Richardson	lsl	tmp2, tmp2, #3			/* Bytes -> bits.  */
11831914882SAlex Richardson	tst	tmp1, #4
11931914882SAlex Richardson	pld	[src, #64]
12031914882SAlex Richardson	S2HI	tmp2, const_m1, tmp2
12131914882SAlex Richardson	orn	data1a, data1a, tmp2
12231914882SAlex Richardson	itt	ne
12331914882SAlex Richardson	ornne	data1b, data1b, tmp2
12431914882SAlex Richardson	movne	data1a, const_m1
12531914882SAlex Richardson	mov	const_0, #0
12631914882SAlex Richardson	b	L(start_realigned)
12731914882SAlex Richardson
12831914882SAlex RichardsonEND (__strlen_armv6t2)
12931914882SAlex Richardson
13031914882SAlex Richardson#endif /* __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2  */
131