1/*- 2 * Copyright (c) 2013 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas of 3am Software Foundry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32RCSID("$NetBSD: strrchr_arm.S,v 1.6 2013/08/25 06:15:06 matt Exp $") 33 34#ifdef __ARMEL__ 35#define BYTE0 0x000000ff 36#define BYTE1 0x0000ff00 37#define BYTE2 0x00ff0000 38#define BYTE3 0xff000000 39#define lshi lsl 40#define lshis lsls 41#else 42#define BYTE0 0xff000000 43#define BYTE1 0x00ff0000 44#define BYTE2 0x0000ff00 45#define BYTE3 0x000000ff 46#define lshi lsr 47#define lshis lsrs 48#endif 49 50ENTRY(strrchr) 51 ands r2, r1, #0xff /* is the byte value NUL? */ 52 bne 1f /* no, do it the hard way */ 53 push {r0, lr} /* save pointer and return addr */ 54 bl PLT_SYM(strlen) /* get length */ 55 pop {r1, r2} /* restore pointer / return addr */ 56 adds r0, r0, r1 /* add pointer to length */ 57 RETr(r2) /* return */ 58 591: mov r1, r0 /* we use r0 at the return value */ 60 movs r0, #0 /* return NULL by default */ 612: tst r1, #3 /* test for word alignment */ 62 beq .Lpre_main_loop /* finally word aligned */ 63 ldrb r3, [r1], #1 /* load a byte */ 64 cmp r3, r2 /* did it match? */ 65#ifdef __thumb__ 66 it eq 67#endif 68 subeq r0, r1, #1 /* yes, remember that it did */ 69 cmp r3, #0 /* was it NUL? */ 70 bne 2b /* no, try next byte */ 71 RET /* return */ 72.Lpre_main_loop: 73 push {r4, r5} /* save some registers */ 74#if defined(_ARM_ARCH_7) 75 movw ip, #0xfefe /* magic constant; 254 in each byte */ 76 movt ip, #0xfefe /* magic constant; 254 in each byte */ 77#elif defined(_ARM_ARCH_6) 78 mov ip, #0xfe /* put 254 in low byte */ 79 orr ip, ip, ip, lsl #8 /* move to next byte */ 80 orr ip, ip, ip, lsl #16 /* move to next halfword */ 81#endif /* _ARM_ARCH_6 */ 82 orr r2, r2, r2, lsl #8 /* move to next byte */ 83 orr r2, r2, r2, lsl #16 /* move to next halfword */ 84.Lmain_loop: 85 ldr r3, [r1], #4 /* load next word */ 86#if defined(_ARM_ARCH_6) 87 /* 88 * Add 254 to each byte using the UQADD8 (unsigned saturating add 8) 89 * instruction. For every non-NUL byte, the result for that byte will 90 * become 255. For NUL, it will be 254. When we complement the 91 * result, if the result is non-0 then we must have encountered a NUL. 92 */ 93 uqadd8 r4, r3, ip /* NUL detection happens here */ 94 usub8 r3, r3, r2 /* bias for char looked for? */ 95 uqadd8 r5, r3, ip /* char detection happens here */ 96 ands r3, r4, r5 /* merge results */ 97 mvns r3, r3 /* is the complement non-0? */ 98 beq .Lmain_loop /* no, then keep going */ 99 100 mvns r5, r5 /* get we find any matching bytes? */ 101 beq .Ldone /* no, then we hit the end, return */ 102 mvns r4, r4 /* did we encounter a NUL? */ 103 beq .Lfind_match /* no, find matching byte */ 104 /* 105 * Copy the NUL bit to the following byte lanes. Then clear any match 106 * bits in those byte lanes to prevent false positives in those bytes. 107 */ 108 bics r5, r5, r4 /* clear any NUL match bits */ 109 beq .Ldone /* no remaining matches, we're done */ 110 lshis r3, r4, #8 /* shift up a byte */ 111#ifdef __thumb__ 112 itt ne 113#endif 114 orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to next byte */ 115 orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to last byte */ 116 bics r5, r5, r3 /* clear match bits */ 117 beq .Ldone /* no remaining matches, we're done */ 118.Lfind_match: 119#ifdef __ARMEL__ 120 rev r5, r5 /* we want this in BE for the CLZ */ 121#endif 122 /* 123 * If we have multiple matches, we want to the select the "last" match 124 * in the word which will be the lowest bit set. 125 */ 126 subs r3, r5, #1 /* subtract 1 */ 127 ands r3, r3, r5 /* and with mask */ 128 eors r5, r5, r3 /* only have the lowest bit set left */ 129 clz r5, r5 /* count how many leading zeros */ 130 add r0, r1, r5, lsr #3 /* divide that by 8 and add to count */ 131 subs r0, r0, #4 /* compensate for the post-inc */ 132 cmp r4, #0 /* did we read any NULs? */ 133 beq .Lmain_loop /* no, get next word */ 134#else 135 /* 136 * No fancy shortcuts so just test each byte lane for a NUL. 137 * (other tests for NULs in a word take more instructions/cycles). 138 */ 139 eor r4, r3, r2 /* xor .. */ 140 tst r3, #BYTE0 /* is byte 0 a NUL? */ 141 beq .Ldone /* yes, then we're done */ 142 tst r4, #BYTE0 /* is byte 0 a match? */ 143 subeq r0, r1, #4 /* yes, remember its location */ 144 tst r3, #BYTE1 /* is byte 1 a NUL? */ 145 beq .Ldone /* yes, then we're done */ 146 tst r4, #BYTE1 /* is byte 1 a match? */ 147 subeq r0, r1, #3 /* yes, remember its location */ 148 tst r3, #BYTE2 /* is byte 2 a NUL? */ 149 beq .Ldone /* yes, then we're done */ 150 tst r4, #BYTE2 /* is byte 2 a match? */ 151 subeq r0, r1, #2 /* yes, remember its location */ 152 tst r3, #BYTE3 /* is byte 3 a NUL? */ 153 beq .Ldone /* yes, then we're done */ 154 tst r4, #BYTE3 /* is byte 3 a match? */ 155 subeq r0, r1, #1 /* yes, remember its location */ 156 b .Lmain_loop 157#endif /* _ARM_ARCH_6 */ 158.Ldone: 159 pop {r4, r5} 160 RET 161END(strrchr) 162