131914882SAlex Richardson/* 231914882SAlex Richardson * memchr - scan memory for a character 331914882SAlex Richardson * 4*072a4ba8SAndrew Turner * Copyright (c) 2010-2022, Arm Limited. 5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson/* 931914882SAlex Richardson Written by Dave Gilbert <david.gilbert@linaro.org> 1031914882SAlex Richardson 1131914882SAlex Richardson This __memchr_arm routine is optimised on a Cortex-A9 and should work on 1231914882SAlex Richardson all ARMv7 processors. It has a fast past for short sizes, and has 1331914882SAlex Richardson an optimised path for large data sets; the worst case is finding the 1431914882SAlex Richardson match early in a large data set. 1531914882SAlex Richardson 1631914882SAlex Richardson */ 1731914882SAlex Richardson 1831914882SAlex Richardson@ 2011-02-07 david.gilbert@linaro.org 1931914882SAlex Richardson@ Extracted from local git a5b438d861 2031914882SAlex Richardson@ 2011-07-14 david.gilbert@linaro.org 2131914882SAlex Richardson@ Import endianness fix from local git ea786f1b 2231914882SAlex Richardson@ 2011-12-07 david.gilbert@linaro.org 2331914882SAlex Richardson@ Removed unneeded cbz from align loop 2431914882SAlex Richardson 2531914882SAlex Richardson .syntax unified 26*072a4ba8SAndrew Turner#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'M' 27*072a4ba8SAndrew Turner /* keep config inherited from -march= */ 28*072a4ba8SAndrew Turner#else 2931914882SAlex Richardson .arch armv7-a 30*072a4ba8SAndrew Turner#endif 3131914882SAlex Richardson 3231914882SAlex Richardson@ this lets us check a flag in a 00/ff byte easily in either endianness 3331914882SAlex Richardson#ifdef __ARMEB__ 3431914882SAlex Richardson#define CHARTSTMASK(c) 1<<(31-(c*8)) 3531914882SAlex Richardson#else 3631914882SAlex Richardson#define CHARTSTMASK(c) 1<<(c*8) 3731914882SAlex Richardson#endif 3831914882SAlex Richardson .thumb 39*072a4ba8SAndrew Turner#include "asmdefs.h" 40*072a4ba8SAndrew Turner 4131914882SAlex Richardson 4231914882SAlex Richardson@ --------------------------------------------------------------------------- 4331914882SAlex Richardson .thumb_func 4431914882SAlex Richardson .align 2 4531914882SAlex Richardson .p2align 4,,15 4631914882SAlex Richardson .global __memchr_arm 4731914882SAlex Richardson .type __memchr_arm,%function 48*072a4ba8SAndrew Turner .fnstart 49*072a4ba8SAndrew Turner .cfi_startproc 5031914882SAlex Richardson__memchr_arm: 5131914882SAlex Richardson @ r0 = start of memory to scan 5231914882SAlex Richardson @ r1 = character to look for 5331914882SAlex Richardson @ r2 = length 5431914882SAlex Richardson @ returns r0 = pointer to character or NULL if not found 55*072a4ba8SAndrew Turner prologue 5631914882SAlex Richardson and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char 5731914882SAlex Richardson 5831914882SAlex Richardson cmp r2,#16 @ If it's short don't bother with anything clever 5931914882SAlex Richardson blt 20f 6031914882SAlex Richardson 6131914882SAlex Richardson tst r0, #7 @ If it's already aligned skip the next bit 6231914882SAlex Richardson beq 10f 6331914882SAlex Richardson 6431914882SAlex Richardson @ Work up to an aligned point 6531914882SAlex Richardson5: 6631914882SAlex Richardson ldrb r3, [r0],#1 6731914882SAlex Richardson subs r2, r2, #1 6831914882SAlex Richardson cmp r3, r1 6931914882SAlex Richardson beq 50f @ If it matches exit found 7031914882SAlex Richardson tst r0, #7 7131914882SAlex Richardson bne 5b @ If not aligned yet then do next byte 7231914882SAlex Richardson 7331914882SAlex Richardson10: 7431914882SAlex Richardson @ At this point, we are aligned, we know we have at least 8 bytes to work with 7531914882SAlex Richardson push {r4,r5,r6,r7} 76*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset 16 77*072a4ba8SAndrew Turner .cfi_rel_offset 4, 0 78*072a4ba8SAndrew Turner .cfi_rel_offset 5, 4 79*072a4ba8SAndrew Turner .cfi_rel_offset 6, 8 80*072a4ba8SAndrew Turner .cfi_rel_offset 7, 12 8131914882SAlex Richardson orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes 8231914882SAlex Richardson orr r1, r1, r1, lsl #16 8331914882SAlex Richardson bic r4, r2, #7 @ Number of double words to work with 8431914882SAlex Richardson mvns r7, #0 @ all F's 8531914882SAlex Richardson movs r3, #0 8631914882SAlex Richardson 8731914882SAlex Richardson15: 8831914882SAlex Richardson ldmia r0!,{r5,r6} 8931914882SAlex Richardson subs r4, r4, #8 9031914882SAlex Richardson eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target 9131914882SAlex Richardson eor r6,r6, r1 9231914882SAlex Richardson uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 9331914882SAlex Richardson sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION 9431914882SAlex Richardson uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 9531914882SAlex Richardson sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION 9631914882SAlex Richardson cbnz r6, 60f 9731914882SAlex Richardson bne 15b @ (Flags from the subs above) If not run out of bytes then go around again 9831914882SAlex Richardson 9931914882SAlex Richardson pop {r4,r5,r6,r7} 100*072a4ba8SAndrew Turner .cfi_restore 7 101*072a4ba8SAndrew Turner .cfi_restore 6 102*072a4ba8SAndrew Turner .cfi_restore 5 103*072a4ba8SAndrew Turner .cfi_restore 4 104*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 10531914882SAlex Richardson and r1,r1,#0xff @ Get r1 back to a single character from the expansion above 10631914882SAlex Richardson and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done 10731914882SAlex Richardson 10831914882SAlex Richardson20: 10931914882SAlex Richardson cbz r2, 40f @ 0 length or hit the end already then not found 11031914882SAlex Richardson 11131914882SAlex Richardson21: @ Post aligned section, or just a short call 11231914882SAlex Richardson ldrb r3,[r0],#1 11331914882SAlex Richardson subs r2,r2,#1 11431914882SAlex Richardson eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub 11531914882SAlex Richardson cbz r3, 50f 11631914882SAlex Richardson bne 21b @ on r2 flags 11731914882SAlex Richardson 11831914882SAlex Richardson40: 119*072a4ba8SAndrew Turner .cfi_remember_state 12031914882SAlex Richardson movs r0,#0 @ not found 121*072a4ba8SAndrew Turner epilogue 12231914882SAlex Richardson 12331914882SAlex Richardson50: 124*072a4ba8SAndrew Turner .cfi_restore_state 125*072a4ba8SAndrew Turner .cfi_remember_state 12631914882SAlex Richardson subs r0,r0,#1 @ found 127*072a4ba8SAndrew Turner epilogue 12831914882SAlex Richardson 12931914882SAlex Richardson60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was 13031914882SAlex Richardson @ r0 points to the start of the double word after the one that was tested 13131914882SAlex Richardson @ r5 has the 00/ff pattern for the first word, r6 has the chained value 132*072a4ba8SAndrew Turner .cfi_restore_state @ Standard post-prologue state 133*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset 16 134*072a4ba8SAndrew Turner .cfi_rel_offset 4, 0 135*072a4ba8SAndrew Turner .cfi_rel_offset 5, 4 136*072a4ba8SAndrew Turner .cfi_rel_offset 6, 8 137*072a4ba8SAndrew Turner .cfi_rel_offset 7, 12 13831914882SAlex Richardson cmp r5, #0 13931914882SAlex Richardson itte eq 14031914882SAlex Richardson moveq r5, r6 @ the end is in the 2nd word 14131914882SAlex Richardson subeq r0,r0,#3 @ Points to 2nd byte of 2nd word 14231914882SAlex Richardson subne r0,r0,#7 @ or 2nd byte of 1st word 14331914882SAlex Richardson 14431914882SAlex Richardson @ r0 currently points to the 3rd byte of the word containing the hit 14531914882SAlex Richardson tst r5, # CHARTSTMASK(0) @ 1st character 14631914882SAlex Richardson bne 61f 14731914882SAlex Richardson adds r0,r0,#1 14831914882SAlex Richardson tst r5, # CHARTSTMASK(1) @ 2nd character 14931914882SAlex Richardson ittt eq 15031914882SAlex Richardson addeq r0,r0,#1 15131914882SAlex Richardson tsteq r5, # (3<<15) @ 2nd & 3rd character 15231914882SAlex Richardson @ If not the 3rd must be the last one 15331914882SAlex Richardson addeq r0,r0,#1 15431914882SAlex Richardson 15531914882SAlex Richardson61: 15631914882SAlex Richardson pop {r4,r5,r6,r7} 157*072a4ba8SAndrew Turner .cfi_restore 7 158*072a4ba8SAndrew Turner .cfi_restore 6 159*072a4ba8SAndrew Turner .cfi_restore 5 160*072a4ba8SAndrew Turner .cfi_restore 4 161*072a4ba8SAndrew Turner .cfi_adjust_cfa_offset -16 16231914882SAlex Richardson subs r0,r0,#1 163*072a4ba8SAndrew Turner epilogue 164*072a4ba8SAndrew Turner .cfi_endproc 165*072a4ba8SAndrew Turner .cantunwind 166*072a4ba8SAndrew Turner .fnend 16731914882SAlex Richardson 16831914882SAlex Richardson .size __memchr_arm, . - __memchr_arm 169