1/*- 2 * Copyright (c) 2013 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas of 3am Software Foundry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32RCSID("$NetBSD: strchr_arm.S,v 1.5 2013/02/08 02:19:35 matt Exp $") 33 34#ifdef __ARMEL__ 35#define BYTE0 0x000000ff 36#define BYTE1 0x0000ff00 37#define BYTE2 0x00ff0000 38#define BYTE3 0xff000000 39#define lshi lsl 40#else 41#define BYTE0 0xff000000 42#define BYTE1 0x00ff0000 43#define BYTE2 0x0000ff00 44#define BYTE3 0x000000ff 45#define lshi lsr 46#endif 47 48 .text 49ENTRY(strchr) 50 and r2, r1, #0xff /* restrict to byte value */ 511: tst r0, #3 /* test for word alignment */ 52 beq .Lpre_main_loop /* finally word aligned */ 53 ldrb r3, [r0], #1 /* load a byte */ 54 cmp r3, r2 /* is it a match? */ 55 beq 2f /* yes, return current ptr - 1 */ 56 teq r3, #0 /* no, was it 0? */ 57 bne 1b /* no, try next byte */ 58 mov r0, #0 /* yes, set return value to NULL */ 59 RET /* return */ 602: sub r0, r0, #1 /* back up by one */ 61 RET /* return */ 62.Lpre_main_loop: 63#if defined(_ARM_ARCH_7) 64 movw r1, #0xfefe /* magic constant; 254 in each byte */ 65 movt r1, #0xfefe /* magic constant; 254 in each byte */ 66#elif defined(_ARM_ARCH_6) 67 mov r1, #0xfe /* put 254 in low byte */ 68 orr r1, r1, r1, lsl #8 /* move to next byte */ 69 orr r1, r1, r1, lsl #16 /* move to next halfword */ 70#endif /* _ARM_ARCH_6 */ 71 orr r2, r2, r2, lsl #8 /* move to next byte */ 72 orr r2, r2, r2, lsl #16 /* move to next halfword */ 73.Lmain_loop: 74 ldr r3, [r0], #4 /* load next word */ 75#if defined(_ARM_ARCH_6) 76 /* 77 * Add 254 to each byte using the UQADD8 (unsigned saturating add 8) 78 * instruction. For every non-NUL byte, the result for that byte will 79 * become 255. For NUL, it will be 254. When we complement the 80 * result, if the result is non-0 then we must have encountered a NUL. 81 */ 82 uqadd8 ip, r3, r1 /* NUL detection happens here */ 83 eor r3, r3, r2 /* xor to clear each lane */ 84 uqadd8 r3, r3, r1 /* char detection happens here */ 85 and r3, r3, ip /* merge results */ 86 mvns r3, r3 /* is the complement non-0? */ 87 beq .Lmain_loop /* no, then keep going */ 88 89 /* 90 * We've encountered a NUL or a match but we don't know which happened 91 * first. 92 */ 93 teq r2, #0 /* searching for NUL? */ 94 beq .Lfind_match /* yes, find the match */ 95 mvns ip, ip /* did we encounter a NUL? */ 96 beq .Lfind_match /* no, find the match */ 97 bics r3, r3, ip /* clear match for the NUL(s) */ 98 beq .Lnomatch /* any left set? if not, no match */ 99 movs ip, ip, lshi #8 /* replicate NUL bit to other bytes */ 100 orrne ip, ip, ip, lshi #8 /* replicate NUL bit to other bytes */ 101 orrne ip, ip, ip, lshi #8 /* replicate NUL bit to other bytes */ 102 bics r3, r3, ip /* clear any match bits after the NUL */ 103 beq .Lnomatch /* any left set? if not, no match */ 104.Lfind_match: 105#ifdef __ARMEL__ 106 rev r3, r3 /* we want this in BE for the CLZ */ 107#endif 108 clz r3, r3 /* count how many leading zeros */ 109 add r0, r0, r3, lsr #3 /* divide that by 8 and add to count */ 110 sub r0, r0, #4 /* compensate for the post-inc */ 111 RET 112.Lnomatch: 113 mov r0, #0 114 RET 115#else 116 /* 117 * No fancy shortcuts so just test each byte lane for a NUL. 118 * (other tests for NULs in a word take more instructions/cycles). 119 */ 120 eor ip, r3, r2 /* xor .. */ 121 tst r3, #BYTE0 /* is this byte NUL? */ 122 tstne ip, #BYTE0 /* no, does this byte match? */ 123 tstne r3, #BYTE1 /* no, is this byte NUL? */ 124 tstne ip, #BYTE1 /* no, does this byte match? */ 125 tstne r3, #BYTE2 /* no, is this byte NUL? */ 126 tstne ip, #BYTE2 /* no, does this byte match? */ 127 tstne r3, #BYTE3 /* no, is this byte NUL? */ 128 tstne ip, #BYTE3 /* no, does this byte match? */ 129 bne .Lmain_loop 130 131 sub r2, r0, #4 /* un post-inc */ 132 mov r0, #0 /* assume no match */ 133 134 tst ip, #BYTE0 /* does this byte match? */ 135 moveq r0, r2 /* yes, point to it */ 136 RETc(eq) /* and return */ 137 tst r3, #BYTE0 /* is this byte NUL? */ 138 RETc(eq) /* yes, return NULL */ 139 140 tst ip, #BYTE1 /* does this byte match? */ 141 addeq r0, r2, #1 /* yes, point to it */ 142 RETc(eq) /* and return */ 143 tst r3, #BYTE1 /* is this byte NUL? */ 144 RETc(eq) /* yes, return NULL */ 145 146 tst ip, #BYTE2 /* does this byte match? */ 147 addeq r0, r2, #2 /* yes, point to it */ 148 RETc(eq) /* and return */ 149 tst r3, #BYTE2 /* is this byte NUL? */ 150 RETc(eq) /* yes, return NULL */ 151 152 tst ip, #BYTE3 /* does this byte match? */ 153 addeq r0, r2, #3 /* yes, point to it */ 154 /* 155 * Since no NULs and no matches this must be the only case left. 156 */ 157 RET /* return */ 158#endif /* _ARM_ARCH_6 */ 159END(strchr) 160