1b6cbf720SGianluca Guida/* 2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com> 3b6cbf720SGianluca Guida * Public domain. 4b6cbf720SGianluca Guida */ 5b6cbf720SGianluca Guida 6b6cbf720SGianluca Guida#include <machine/asm.h> 7b6cbf720SGianluca Guida 8b6cbf720SGianluca Guida#if defined(LIBC_SCCS) 9*0a6a1f1dSLionel Sambuc RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $") 10b6cbf720SGianluca Guida#endif 11b6cbf720SGianluca Guida 12b6cbf720SGianluca GuidaENTRY(strcat) 13b6cbf720SGianluca Guida pushl %ebx 14b6cbf720SGianluca Guida movl 8(%esp),%ecx 15b6cbf720SGianluca Guida movl 12(%esp),%eax 16b6cbf720SGianluca Guida 17b6cbf720SGianluca Guida /* 18b6cbf720SGianluca Guida * Align destination to word boundary. 19b6cbf720SGianluca Guida * Consider unrolling loop? 20b6cbf720SGianluca Guida */ 21b6cbf720SGianluca Guida.Lscan: 22b6cbf720SGianluca Guida.Lscan_align: 23b6cbf720SGianluca Guida testb $3,%cl 24b6cbf720SGianluca Guida je .Lscan_aligned 25b6cbf720SGianluca Guida cmpb $0,(%ecx) 26b6cbf720SGianluca Guida je .Lcopy 27b6cbf720SGianluca Guida incl %ecx 28b6cbf720SGianluca Guida jmp .Lscan_align 29b6cbf720SGianluca Guida 30b6cbf720SGianluca Guida _ALIGN_TEXT 31b6cbf720SGianluca Guida.Lscan_aligned: 32b6cbf720SGianluca Guida.Lscan_loop: 33b6cbf720SGianluca Guida movl (%ecx),%ebx 34b6cbf720SGianluca Guida addl $4,%ecx 35b6cbf720SGianluca Guida leal -0x01010101(%ebx),%edx 36b6cbf720SGianluca Guida testl $0x80808080,%edx 37b6cbf720SGianluca Guida je .Lscan_loop 38b6cbf720SGianluca Guida 39b6cbf720SGianluca Guida /* 40b6cbf720SGianluca Guida * In rare cases, the above loop may exit prematurely. We must 41b6cbf720SGianluca Guida * return to the loop if none of the bytes in the word equal 0. 42b6cbf720SGianluca Guida */ 43b6cbf720SGianluca Guida 44b6cbf720SGianluca Guida /* 45b6cbf720SGianluca Guida * The optimal code for determining whether each byte is zero 46b6cbf720SGianluca Guida * differs by processor. This space-optimized code should be 47b6cbf720SGianluca Guida * acceptable on all, especially since we don't expect it to 48b6cbf720SGianluca Guida * be run frequently, 49b6cbf720SGianluca Guida */ 50b6cbf720SGianluca Guida 51b6cbf720SGianluca Guida testb %bl,%bl /* 1st byte == 0? */ 52b6cbf720SGianluca Guida jne 1f 53b6cbf720SGianluca Guida subl $4,%ecx 54b6cbf720SGianluca Guida jmp .Lcopy 55b6cbf720SGianluca Guida 56b6cbf720SGianluca Guida1: testb %bh,%bh /* 2nd byte == 0? */ 57b6cbf720SGianluca Guida jne 1f 58b6cbf720SGianluca Guida subl $3,%ecx 59b6cbf720SGianluca Guida jmp .Lcopy 60b6cbf720SGianluca Guida 61b6cbf720SGianluca Guida1: shrl $16,%ebx 62b6cbf720SGianluca Guida testb %bl,%bl /* 3rd byte == 0? */ 63b6cbf720SGianluca Guida jne 1f 64b6cbf720SGianluca Guida subl $2,%ecx 65b6cbf720SGianluca Guida jmp .Lcopy 66b6cbf720SGianluca Guida 67b6cbf720SGianluca Guida1: testb %bh,%bh /* 4th byte == 0? */ 68b6cbf720SGianluca Guida jne .Lscan_loop 69b6cbf720SGianluca Guida subl $1,%ecx 70b6cbf720SGianluca Guida 71b6cbf720SGianluca Guida /* 72b6cbf720SGianluca Guida * Align source to a word boundary. 73b6cbf720SGianluca Guida * Consider unrolling loop? 74b6cbf720SGianluca Guida */ 75b6cbf720SGianluca Guida.Lcopy: 76b6cbf720SGianluca Guida.Lcopy_align: 77b6cbf720SGianluca Guida testl $3,%eax 78b6cbf720SGianluca Guida je .Lcopy_aligned 79b6cbf720SGianluca Guida movb (%eax),%bl 80b6cbf720SGianluca Guida incl %eax 81b6cbf720SGianluca Guida movb %bl,(%ecx) 82b6cbf720SGianluca Guida incl %ecx 83b6cbf720SGianluca Guida testb %bl,%bl 84b6cbf720SGianluca Guida jne .Lcopy_align 85b6cbf720SGianluca Guida jmp .Ldone 86b6cbf720SGianluca Guida 87b6cbf720SGianluca Guida _ALIGN_TEXT 88b6cbf720SGianluca Guida.Lcopy_loop: 89b6cbf720SGianluca Guida movl %ebx,(%ecx) 90b6cbf720SGianluca Guida addl $4,%ecx 91b6cbf720SGianluca Guida.Lcopy_aligned: 92b6cbf720SGianluca Guida movl (%eax),%ebx 93b6cbf720SGianluca Guida addl $4,%eax 94b6cbf720SGianluca Guida leal -0x01010101(%ebx),%edx 95b6cbf720SGianluca Guida testl $0x80808080,%edx 96b6cbf720SGianluca Guida je .Lcopy_loop 97b6cbf720SGianluca Guida 98b6cbf720SGianluca Guida /* 99b6cbf720SGianluca Guida * In rare cases, the above loop may exit prematurely. We must 100b6cbf720SGianluca Guida * return to the loop if none of the bytes in the word equal 0. 101b6cbf720SGianluca Guida */ 102b6cbf720SGianluca Guida 103b6cbf720SGianluca Guida movb %bl,(%ecx) 104b6cbf720SGianluca Guida incl %ecx 105b6cbf720SGianluca Guida testb %bl,%bl 106b6cbf720SGianluca Guida je .Ldone 107b6cbf720SGianluca Guida 108b6cbf720SGianluca Guida movb %bh,(%ecx) 109b6cbf720SGianluca Guida incl %ecx 110b6cbf720SGianluca Guida testb %bh,%bh 111b6cbf720SGianluca Guida je .Ldone 112b6cbf720SGianluca Guida 113b6cbf720SGianluca Guida shrl $16,%ebx 114b6cbf720SGianluca Guida movb %bl,(%ecx) 115b6cbf720SGianluca Guida incl %ecx 116b6cbf720SGianluca Guida testb %bl,%bl 117b6cbf720SGianluca Guida je .Ldone 118b6cbf720SGianluca Guida 119b6cbf720SGianluca Guida movb %bh,(%ecx) 120b6cbf720SGianluca Guida incl %ecx 121b6cbf720SGianluca Guida testb %bh,%bh 122b6cbf720SGianluca Guida jne .Lcopy_aligned 123b6cbf720SGianluca Guida 124b6cbf720SGianluca Guida.Ldone: 125b6cbf720SGianluca Guida movl 8(%esp),%eax 126b6cbf720SGianluca Guida popl %ebx 127b6cbf720SGianluca Guida ret 128*0a6a1f1dSLionel SambucEND(strcat) 129