xref: /freebsd-src/contrib/cortex-strings/src/arm/memchr.S (revision 8c4282b370bd66908b45b6a223226a9fc2b69d57)
1*09a53ad8SAndrew Turner/* Copyright (c) 2010-2011, Linaro Limited
2*09a53ad8SAndrew Turner   All rights reserved.
3*09a53ad8SAndrew Turner
4*09a53ad8SAndrew Turner   Redistribution and use in source and binary forms, with or without
5*09a53ad8SAndrew Turner   modification, are permitted provided that the following conditions
6*09a53ad8SAndrew Turner   are met:
7*09a53ad8SAndrew Turner
8*09a53ad8SAndrew Turner      * Redistributions of source code must retain the above copyright
9*09a53ad8SAndrew Turner      notice, this list of conditions and the following disclaimer.
10*09a53ad8SAndrew Turner
11*09a53ad8SAndrew Turner      * Redistributions in binary form must reproduce the above copyright
12*09a53ad8SAndrew Turner      notice, this list of conditions and the following disclaimer in the
13*09a53ad8SAndrew Turner      documentation and/or other materials provided with the distribution.
14*09a53ad8SAndrew Turner
15*09a53ad8SAndrew Turner      * Neither the name of Linaro Limited nor the names of its
16*09a53ad8SAndrew Turner      contributors may be used to endorse or promote products derived
17*09a53ad8SAndrew Turner      from this software without specific prior written permission.
18*09a53ad8SAndrew Turner
19*09a53ad8SAndrew Turner   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20*09a53ad8SAndrew Turner   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21*09a53ad8SAndrew Turner   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22*09a53ad8SAndrew Turner   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23*09a53ad8SAndrew Turner   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24*09a53ad8SAndrew Turner   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25*09a53ad8SAndrew Turner   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*09a53ad8SAndrew Turner   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*09a53ad8SAndrew Turner   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*09a53ad8SAndrew Turner   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29*09a53ad8SAndrew Turner   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*09a53ad8SAndrew Turner */
31*09a53ad8SAndrew Turner
32*09a53ad8SAndrew Turner/*
33*09a53ad8SAndrew Turner   Written by Dave Gilbert <david.gilbert@linaro.org>
34*09a53ad8SAndrew Turner
35*09a53ad8SAndrew Turner   This memchr routine is optimised on a Cortex-A9 and should work on
36*09a53ad8SAndrew Turner   all ARMv7 processors.   It has a fast past for short sizes, and has
37*09a53ad8SAndrew Turner   an optimised path for large data sets; the worst case is finding the
38*09a53ad8SAndrew Turner   match early in a large data set.
39*09a53ad8SAndrew Turner
40*09a53ad8SAndrew Turner */
41*09a53ad8SAndrew Turner
42*09a53ad8SAndrew Turner@ 2011-02-07 david.gilbert@linaro.org
43*09a53ad8SAndrew Turner@    Extracted from local git a5b438d861
44*09a53ad8SAndrew Turner@ 2011-07-14 david.gilbert@linaro.org
45*09a53ad8SAndrew Turner@    Import endianness fix from local git ea786f1b
46*09a53ad8SAndrew Turner@ 2011-12-07 david.gilbert@linaro.org
47*09a53ad8SAndrew Turner@    Removed unneeded cbz from align loop
48*09a53ad8SAndrew Turner
49*09a53ad8SAndrew Turner	.syntax unified
50*09a53ad8SAndrew Turner	.arch armv7-a
51*09a53ad8SAndrew Turner
52*09a53ad8SAndrew Turner@ this lets us check a flag in a 00/ff byte easily in either endianness
53*09a53ad8SAndrew Turner#ifdef __ARMEB__
54*09a53ad8SAndrew Turner#define CHARTSTMASK(c) 1<<(31-(c*8))
55*09a53ad8SAndrew Turner#else
56*09a53ad8SAndrew Turner#define CHARTSTMASK(c) 1<<(c*8)
57*09a53ad8SAndrew Turner#endif
58*09a53ad8SAndrew Turner	.text
59*09a53ad8SAndrew Turner	.thumb
60*09a53ad8SAndrew Turner
61*09a53ad8SAndrew Turner@ ---------------------------------------------------------------------------
62*09a53ad8SAndrew Turner	.thumb_func
63*09a53ad8SAndrew Turner	.align 2
64*09a53ad8SAndrew Turner	.p2align 4,,15
65*09a53ad8SAndrew Turner	.global memchr
66*09a53ad8SAndrew Turner	.type memchr,%function
67*09a53ad8SAndrew Turnermemchr:
68*09a53ad8SAndrew Turner	@ r0 = start of memory to scan
69*09a53ad8SAndrew Turner	@ r1 = character to look for
70*09a53ad8SAndrew Turner	@ r2 = length
71*09a53ad8SAndrew Turner	@ returns r0 = pointer to character or NULL if not found
72*09a53ad8SAndrew Turner	and	r1,r1,#0xff	@ Don't think we can trust the caller to actually pass a char
73*09a53ad8SAndrew Turner
74*09a53ad8SAndrew Turner	cmp	r2,#16		@ If it's short don't bother with anything clever
75*09a53ad8SAndrew Turner	blt	20f
76*09a53ad8SAndrew Turner
77*09a53ad8SAndrew Turner	tst	r0, #7		@ If it's already aligned skip the next bit
78*09a53ad8SAndrew Turner	beq	10f
79*09a53ad8SAndrew Turner
80*09a53ad8SAndrew Turner	@ Work up to an aligned point
81*09a53ad8SAndrew Turner5:
82*09a53ad8SAndrew Turner	ldrb	r3, [r0],#1
83*09a53ad8SAndrew Turner	subs	r2, r2, #1
84*09a53ad8SAndrew Turner	cmp	r3, r1
85*09a53ad8SAndrew Turner	beq	50f		@ If it matches exit found
86*09a53ad8SAndrew Turner	tst	r0, #7
87*09a53ad8SAndrew Turner	bne	5b		@ If not aligned yet then do next byte
88*09a53ad8SAndrew Turner
89*09a53ad8SAndrew Turner10:
90*09a53ad8SAndrew Turner	@ At this point, we are aligned, we know we have at least 8 bytes to work with
91*09a53ad8SAndrew Turner	push	{r4,r5,r6,r7}
92*09a53ad8SAndrew Turner	orr	r1, r1, r1, lsl #8	@ expand the match word across to all bytes
93*09a53ad8SAndrew Turner	orr	r1, r1, r1, lsl #16
94*09a53ad8SAndrew Turner	bic	r4, r2, #7	@ Number of double words to work with
95*09a53ad8SAndrew Turner	mvns	r7, #0		@ all F's
96*09a53ad8SAndrew Turner	movs	r3, #0
97*09a53ad8SAndrew Turner
98*09a53ad8SAndrew Turner15:
99*09a53ad8SAndrew Turner	ldmia	r0!,{r5,r6}
100*09a53ad8SAndrew Turner	subs	r4, r4, #8
101*09a53ad8SAndrew Turner	eor	r5,r5, r1	@ Get it so that r5,r6 have 00's where the bytes match the target
102*09a53ad8SAndrew Turner	eor	r6,r6, r1
103*09a53ad8SAndrew Turner	uadd8	r5, r5, r7	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
104*09a53ad8SAndrew Turner	sel	r5, r3, r7	@ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
105*09a53ad8SAndrew Turner	uadd8	r6, r6, r7	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
106*09a53ad8SAndrew Turner	sel	r6, r5, r7	@ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
107*09a53ad8SAndrew Turner	cbnz	r6, 60f
108*09a53ad8SAndrew Turner	bne	15b		@ (Flags from the subs above) If not run out of bytes then go around again
109*09a53ad8SAndrew Turner
110*09a53ad8SAndrew Turner	pop	{r4,r5,r6,r7}
111*09a53ad8SAndrew Turner	and	r1,r1,#0xff	@ Get r1 back to a single character from the expansion above
112*09a53ad8SAndrew Turner	and	r2,r2,#7	@ Leave the count remaining as the number after the double words have been done
113*09a53ad8SAndrew Turner
114*09a53ad8SAndrew Turner20:
115*09a53ad8SAndrew Turner	cbz	r2, 40f		@ 0 length or hit the end already then not found
116*09a53ad8SAndrew Turner
117*09a53ad8SAndrew Turner21:  @ Post aligned section, or just a short call
118*09a53ad8SAndrew Turner	ldrb	r3,[r0],#1
119*09a53ad8SAndrew Turner	subs	r2,r2,#1
120*09a53ad8SAndrew Turner	eor	r3,r3,r1	@ r3 = 0 if match - doesn't break flags from sub
121*09a53ad8SAndrew Turner	cbz	r3, 50f
122*09a53ad8SAndrew Turner	bne	21b		@ on r2 flags
123*09a53ad8SAndrew Turner
124*09a53ad8SAndrew Turner40:
125*09a53ad8SAndrew Turner	movs	r0,#0		@ not found
126*09a53ad8SAndrew Turner	bx	lr
127*09a53ad8SAndrew Turner
128*09a53ad8SAndrew Turner50:
129*09a53ad8SAndrew Turner	subs	r0,r0,#1	@ found
130*09a53ad8SAndrew Turner	bx	lr
131*09a53ad8SAndrew Turner
132*09a53ad8SAndrew Turner60:  @ We're here because the fast path found a hit - now we have to track down exactly which word it was
133*09a53ad8SAndrew Turner	@ r0 points to the start of the double word after the one that was tested
134*09a53ad8SAndrew Turner	@ r5 has the 00/ff pattern for the first word, r6 has the chained value
135*09a53ad8SAndrew Turner	cmp	r5, #0
136*09a53ad8SAndrew Turner	itte	eq
137*09a53ad8SAndrew Turner	moveq	r5, r6		@ the end is in the 2nd word
138*09a53ad8SAndrew Turner	subeq	r0,r0,#3	@ Points to 2nd byte of 2nd word
139*09a53ad8SAndrew Turner	subne	r0,r0,#7	@ or 2nd byte of 1st word
140*09a53ad8SAndrew Turner
141*09a53ad8SAndrew Turner	@ r0 currently points to the 3rd byte of the word containing the hit
142*09a53ad8SAndrew Turner	tst	r5, # CHARTSTMASK(0)	@ 1st character
143*09a53ad8SAndrew Turner	bne	61f
144*09a53ad8SAndrew Turner	adds	r0,r0,#1
145*09a53ad8SAndrew Turner	tst	r5, # CHARTSTMASK(1)	@ 2nd character
146*09a53ad8SAndrew Turner	ittt	eq
147*09a53ad8SAndrew Turner	addeq	r0,r0,#1
148*09a53ad8SAndrew Turner	tsteq	r5, # (3<<15)		@ 2nd & 3rd character
149*09a53ad8SAndrew Turner	@ If not the 3rd must be the last one
150*09a53ad8SAndrew Turner	addeq	r0,r0,#1
151*09a53ad8SAndrew Turner
152*09a53ad8SAndrew Turner61:
153*09a53ad8SAndrew Turner	pop	{r4,r5,r6,r7}
154*09a53ad8SAndrew Turner	subs	r0,r0,#1
155*09a53ad8SAndrew Turner	bx	lr
156