1b6cbf720SGianluca Guida/* 2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com> 3b6cbf720SGianluca Guida * Public domain. 4b6cbf720SGianluca Guida */ 5b6cbf720SGianluca Guida 6b6cbf720SGianluca Guida#include <machine/asm.h> 7b6cbf720SGianluca Guida 8b6cbf720SGianluca Guida#if defined(LIBC_SCCS) 9*0a6a1f1dSLionel Sambuc RCSID("$NetBSD: memchr.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $") 10b6cbf720SGianluca Guida#endif 11b6cbf720SGianluca Guida 12b6cbf720SGianluca GuidaENTRY(memchr) 13b6cbf720SGianluca Guida pushl %esi 14b6cbf720SGianluca Guida movl 8(%esp),%eax 15b6cbf720SGianluca Guida movzbl 12(%esp),%ecx 16b6cbf720SGianluca Guida movl 16(%esp),%esi 17b6cbf720SGianluca Guida 18b6cbf720SGianluca Guida /* 19b6cbf720SGianluca Guida * Align to word boundary. 20b6cbf720SGianluca Guida * Consider unrolling loop? 21b6cbf720SGianluca Guida */ 22b6cbf720SGianluca Guida testl %esi,%esi /* nbytes == 0? */ 23b6cbf720SGianluca Guida je .Lzero 24b6cbf720SGianluca Guida.Lalign: 25b6cbf720SGianluca Guida testb $3,%al 26b6cbf720SGianluca Guida je .Lword_aligned 27b6cbf720SGianluca Guida cmpb (%eax),%cl 28b6cbf720SGianluca Guida je .Ldone 29b6cbf720SGianluca Guida incl %eax 30b6cbf720SGianluca Guida decl %esi 31b6cbf720SGianluca Guida jnz .Lalign 32b6cbf720SGianluca Guida jmp .Lzero 33b6cbf720SGianluca Guida 34b6cbf720SGianluca Guida.Lword_aligned: 35b6cbf720SGianluca Guida /* copy char to all bytes in word */ 36b6cbf720SGianluca Guida movb %cl,%ch 37b6cbf720SGianluca Guida movl %ecx,%edx 38b6cbf720SGianluca Guida sall $16,%ecx 39b6cbf720SGianluca Guida orl %edx,%ecx 40b6cbf720SGianluca Guida 41b6cbf720SGianluca Guida _ALIGN_TEXT 42b6cbf720SGianluca Guida.Lloop: 43b6cbf720SGianluca Guida cmpl $3,%esi /* nbytes > 4 */ 44b6cbf720SGianluca Guida jbe .Lbyte 45b6cbf720SGianluca Guida movl (%eax),%edx 46b6cbf720SGianluca Guida addl $4,%eax 47b6cbf720SGianluca Guida xorl %ecx,%edx 48b6cbf720SGianluca Guida subl $4,%esi 49b6cbf720SGianluca Guida subl $0x01010101,%edx 50b6cbf720SGianluca Guida testl $0x80808080,%edx 51b6cbf720SGianluca Guida je .Lloop 52b6cbf720SGianluca Guida 53b6cbf720SGianluca Guida /* 54b6cbf720SGianluca Guida * In rare cases, the above loop may exit prematurely. We must 55b6cbf720SGianluca Guida * return to the loop if none of the bytes in the word are 56b6cbf720SGianluca Guida * equal to ch. 57b6cbf720SGianluca Guida */ 58b6cbf720SGianluca Guida 59b6cbf720SGianluca Guida /* 60b6cbf720SGianluca Guida * High load-use latency on the Athlon leads to significant 61b6cbf720SGianluca Guida * stalls, so we preload the next char as soon as possible 62b6cbf720SGianluca Guida * instead of using cmp mem8, reg8. 63b6cbf720SGianluca Guida * 64b6cbf720SGianluca Guida * Alignment here avoids a stall on the Athlon, even though 65b6cbf720SGianluca Guida * it's not a branch target. 66b6cbf720SGianluca Guida */ 67b6cbf720SGianluca Guida _ALIGN_TEXT 68b6cbf720SGianluca Guida cmpb -4(%eax),%cl /* 1st byte == ch? */ 69b6cbf720SGianluca Guida movb -3(%eax),%dl 70b6cbf720SGianluca Guida jne 1f 71b6cbf720SGianluca Guida subl $4,%eax 72b6cbf720SGianluca Guida jmp .Ldone 73b6cbf720SGianluca Guida 74b6cbf720SGianluca Guida _ALIGN_TEXT 75b6cbf720SGianluca Guida1: cmpb %dl,%cl /* 2nd byte == ch? */ 76b6cbf720SGianluca Guida movb -2(%eax),%dl 77b6cbf720SGianluca Guida jne 1f 78b6cbf720SGianluca Guida subl $3,%eax 79b6cbf720SGianluca Guida jmp .Ldone 80b6cbf720SGianluca Guida 81b6cbf720SGianluca Guida _ALIGN_TEXT 82b6cbf720SGianluca Guida1: cmpb %dl,%cl /* 3rd byte == ch? */ 83b6cbf720SGianluca Guida movb -1(%eax),%dl 84b6cbf720SGianluca Guida jne 1f 85b6cbf720SGianluca Guida subl $2,%eax 86b6cbf720SGianluca Guida jmp .Ldone 87b6cbf720SGianluca Guida 88b6cbf720SGianluca Guida _ALIGN_TEXT 89b6cbf720SGianluca Guida1: cmpb %dl,%cl /* 4th byte == ch? */ 90b6cbf720SGianluca Guida jne .Lloop 91b6cbf720SGianluca Guida decl %eax 92b6cbf720SGianluca Guida jmp .Ldone 93b6cbf720SGianluca Guida 94b6cbf720SGianluca Guida.Lbyte: 95b6cbf720SGianluca Guida testl %esi,%esi 96b6cbf720SGianluca Guida je .Lzero 97b6cbf720SGianluca Guida.Lbyte_loop: 98b6cbf720SGianluca Guida cmpb (%eax),%cl 99b6cbf720SGianluca Guida je .Ldone 100b6cbf720SGianluca Guida incl %eax 101b6cbf720SGianluca Guida decl %esi 102b6cbf720SGianluca Guida jnz .Lbyte_loop 103b6cbf720SGianluca Guida 104b6cbf720SGianluca Guida.Lzero: 105b6cbf720SGianluca Guida xorl %eax,%eax 106b6cbf720SGianluca Guida 107b6cbf720SGianluca Guida.Ldone: 108b6cbf720SGianluca Guida popl %esi 109b6cbf720SGianluca Guida ret 110*0a6a1f1dSLionel SambucEND(memchr) 111