1*0a6a1f1dSLionel Sambuc/* $NetBSD: memchr.S,v 1.6 2014/03/22 19:16:34 jakllsch Exp $ */ 2b6cbf720SGianluca Guida 3b6cbf720SGianluca Guida/*- 4b6cbf720SGianluca Guida * Copyright (c) 2009 The NetBSD Foundation, Inc. 5b6cbf720SGianluca Guida * All rights reserved. 6b6cbf720SGianluca Guida * 7b6cbf720SGianluca Guida * This code is derived from software contributed to The NetBSD Foundation 8b6cbf720SGianluca Guida * by David Laight. 9b6cbf720SGianluca Guida * 10b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without 11b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions 12b6cbf720SGianluca Guida * are met: 13b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright 14b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer. 15b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright 16b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer in the 17b6cbf720SGianluca Guida * documentation and/or other materials provided with the distribution. 18b6cbf720SGianluca Guida * 19b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20b6cbf720SGianluca Guida * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21b6cbf720SGianluca Guida * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22b6cbf720SGianluca Guida * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23b6cbf720SGianluca Guida * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24b6cbf720SGianluca Guida * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25b6cbf720SGianluca Guida * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26b6cbf720SGianluca Guida * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27b6cbf720SGianluca Guida * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28b6cbf720SGianluca Guida * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29b6cbf720SGianluca Guida * POSSIBILITY OF SUCH DAMAGE. 30b6cbf720SGianluca Guida */ 31b6cbf720SGianluca Guida 32b6cbf720SGianluca Guida#include <machine/asm.h> 33b6cbf720SGianluca Guida 34b6cbf720SGianluca Guida#if defined(LIBC_SCCS) 35*0a6a1f1dSLionel Sambuc RCSID("$NetBSD: memchr.S,v 1.6 2014/03/22 19:16:34 jakllsch Exp $") 36b6cbf720SGianluca Guida#endif 37b6cbf720SGianluca Guida 38b6cbf720SGianluca Guida/* 39b6cbf720SGianluca Guida * The instruction sequences used try to avoid data dependencies 40b6cbf720SGianluca Guida * between adjacent instructions (to allow parallel execution). 41b6cbf720SGianluca Guida * The 'imul' for %r9 could be put into the delay following the 42b6cbf720SGianluca Guida * memory read (ie inside the loop) at no obvious cost - except 43b6cbf720SGianluca Guida * that the loop is currently exactly 32 bytes - 2 fetch blocks!. 44b6cbf720SGianluca Guida * 45b6cbf720SGianluca Guida * I don't think aligning any of the other branch targets is useful. 46b6cbf720SGianluca Guida */ 47b6cbf720SGianluca Guida 48b6cbf720SGianluca GuidaENTRY(memchr) 49b6cbf720SGianluca Guida movabsq $0x0101010101010101,%r8 50b6cbf720SGianluca Guida lea (%rdi,%rdx),%r10 /* limit of buffer to scan */ 51b6cbf720SGianluca Guida movzbq %sil,%rsi /* mask high bits! */ 52b6cbf720SGianluca Guida 53b6cbf720SGianluca Guida /* 'directpath' imuls can execute 3 at a time ... (amd) */ 54b6cbf720SGianluca Guida imul %r8,%rsi /* search byte replicated in word */ 55b6cbf720SGianluca Guida imul $0x80,%r8,%r9 /* 0x8080808080808080 */ 56b6cbf720SGianluca Guida test $7,%dil 57b6cbf720SGianluca Guida jnz 20f /* jump if misaligned */ 58b6cbf720SGianluca Guida jmp 1f /* jump to avoid 4 nops (13 bytes) in gap */ 59b6cbf720SGianluca Guida 60b6cbf720SGianluca Guida _ALIGN_TEXT /* entire loop now in 32 aligned bytes */ 61b6cbf720SGianluca Guida1: 62b6cbf720SGianluca Guida cmpq %r10,%rdi /* end of buffer ? */ 63b6cbf720SGianluca Guida jae 30f /* jump if so */ 64b6cbf720SGianluca Guida 65b6cbf720SGianluca Guida movq (%rdi),%rax /* value to check */ 66b6cbf720SGianluca Guida addq $8,%rdi 67b6cbf720SGianluca Guida xorq %rsi,%rax /* now looking for zeros */ 68b6cbf720SGianluca Guida2: 69b6cbf720SGianluca Guida mov %rax,%rcx 70b6cbf720SGianluca Guida subq %r8,%rax /* x - 0x01 */ 71b6cbf720SGianluca Guida not %rcx 72b6cbf720SGianluca Guida andq %r9,%rax /* (x - 0x01) & 0x80 */ 73b6cbf720SGianluca Guida andq %rcx,%rax /* ((x - 0x01) & 0x80) & ~x */ 74b6cbf720SGianluca Guida je 1b /* jump if not found */ 75b6cbf720SGianluca Guida 76b6cbf720SGianluca Guida/* Found byte in word, get its address */ 77b6cbf720SGianluca Guida bsf %rax,%rax 78b6cbf720SGianluca Guida shr $3,%eax 79b6cbf720SGianluca Guida lea -8(%rax,%rdi),%rax 80b6cbf720SGianluca Guida cmpq %r10,%rax /* need to check not beyond buffer */ 81b6cbf720SGianluca Guida jae 30f 82b6cbf720SGianluca Guida rep 83b6cbf720SGianluca Guida ret /* amd - no ret after jmp */ 84b6cbf720SGianluca Guida 85b6cbf720SGianluca Guida/* Input misaligned, read aligned and make low bytes invalid */ 86b6cbf720SGianluca Guida20: 87b6cbf720SGianluca Guida mov %dil,%cl /* misalignment amount 1..7 (+high bits )*/ 88b6cbf720SGianluca Guida and $~7,%dil /* %rdi now start of word */ 89b6cbf720SGianluca Guida test %rdx,%rdx /* zero length, don't read */ 90b6cbf720SGianluca Guida jz 30f 91b6cbf720SGianluca Guida 92b6cbf720SGianluca Guida neg %cl /* 7..1 (+high bits) */ 93b6cbf720SGianluca Guida mov (%rdi),%rax /* word containing first byte */ 94b6cbf720SGianluca Guida addq $8,%rdi 95b6cbf720SGianluca Guida and $7,%cl /* 7..1 */ 96b6cbf720SGianluca Guida 97b6cbf720SGianluca Guida mov %r8,%r11 /* any value with bits in each byte */ 98b6cbf720SGianluca Guida shl $3,%cl /* 56..8 */ 99b6cbf720SGianluca Guida xorq %rsi,%rax /* now looking for zeros */ 100b6cbf720SGianluca Guida 101b6cbf720SGianluca Guida /* Set low bytes non-zero */ 102b6cbf720SGianluca Guida shr %cl,%r11 /* non-zero in unwanted bytes */ 103b6cbf720SGianluca Guida or %r11,%rax /* low bytes now set */ 104b6cbf720SGianluca Guida jmp 2b 105b6cbf720SGianluca Guida 106b6cbf720SGianluca Guida/* Not found */ 107b6cbf720SGianluca Guida30: xorq %rax,%rax 108b6cbf720SGianluca Guida ret 109*0a6a1f1dSLionel SambucEND(memchr) 110