1/* 2 * Written by J.T. Conklin <jtc@acorntoolworks.com> 3 * Public domain. 4 */ 5 6#include <machine/asm.h> 7 8#if defined(LIBC_SCCS) 9 RCSID("$NetBSD: memchr.S,v 1.1 2005/12/20 19:28:51 christos Exp $") 10#endif 11 12ENTRY(memchr) 13 movzbq %sil,%rcx 14 15 /* 16 * Align to word boundary. 17 * Consider unrolling loop? 18 */ 19 testq %rdx,%rdx /* nbytes == 0? */ 20 je .Lzero 21.Lalign: 22 testb $7,%dil 23 je .Lword_aligned 24 movq %rdi,%rax 25 cmpb (%rdi),%cl 26 je .Ldone 27 incq %rdi 28 decq %rdx 29 jnz .Lalign 30 jmp .Lzero 31 32.Lword_aligned: 33 /* copy char to all bytes in word */ 34 movb %cl,%ch 35 movq %rcx,%rsi 36 salq $16,%rcx 37 orq %rsi,%rcx 38 movq %rcx,%rsi 39 salq $32,%rcx 40 orq %rsi,%rcx 41 42 movabsq $0x0101010101010101,%r8 43 movabsq $0x8080808080808080,%r9 44 45 _ALIGN_TEXT 46.Lloop: 47 cmpq $7,%rdx /* nbytes > 8 */ 48 jbe .Lbyte 49 movq (%rdi),%rsi 50 addq $8,%rdi 51 xorq %rcx,%rsi 52 subq $8,%rdx 53 subq %r8,%rsi 54 testq %r9,%rsi 55 je .Lloop 56 57 /* 58 * In rare cases, the above loop may exit prematurely. We must 59 * return to the loop if none of the bytes in the word are 60 * equal to ch. 61 */ 62 63 leaq -8(%rdi),%rax 64 cmpb -8(%rdi),%cl /* 1st byte == ch? */ 65 je .Ldone 66 67 leaq -7(%rdi),%rax 68 cmpb -7(%rdi),%cl /* 2nd byte == ch? */ 69 je .Ldone 70 71 leaq -6(%rdi),%rax 72 cmpb -6(%rdi),%cl /* 3rd byte == ch? */ 73 je .Ldone 74 75 leaq -5(%rdi),%rax 76 cmpb -5(%rdi),%cl /* 4th byte == ch? */ 77 je .Ldone 78 79 leaq -4(%rdi),%rax 80 cmpb -4(%rdi),%cl /* 5th byte == ch? */ 81 je .Ldone 82 83 leaq -3(%rdi),%rax 84 cmpb -3(%rdi),%cl /* 6th byte == ch? */ 85 je .Ldone 86 87 leaq -2(%rdi),%rax 88 cmpb -2(%rdi),%cl /* 7th byte == ch? */ 89 je .Ldone 90 91 leaq -1(%rdi),%rax 92 cmpb -1(%rdi),%cl /* 7th byte == ch? */ 93 jne .Lloop 94 ret 95 96.Lbyte: 97 testq %rdx,%rdx 98 je .Lzero 99.Lbyte_loop: 100 movq %rdi,%rax 101 cmpb (%rdi),%cl 102 je .Ldone 103 incq %rdi 104 decq %rdx 105 jnz .Lbyte_loop 106 107.Lzero: 108 xorq %rax,%rax 109 110.Ldone: 111 ret 112