137c9f0a6Schristos/* 237c9f0a6Schristos * Written by J.T. Conklin <jtc@acorntoolworks.com> 337c9f0a6Schristos * Public domain. 437c9f0a6Schristos */ 537c9f0a6Schristos 637c9f0a6Schristos#include <machine/asm.h> 737c9f0a6Schristos 837c9f0a6Schristos#if defined(LIBC_SCCS) 9*2c56941eSjakllsch RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $") 1037c9f0a6Schristos#endif 1137c9f0a6Schristos 1237c9f0a6SchristosENTRY(strcat) 1337c9f0a6Schristos pushl %ebx 1437c9f0a6Schristos movl 8(%esp),%ecx 1537c9f0a6Schristos movl 12(%esp),%eax 1637c9f0a6Schristos 1737c9f0a6Schristos /* 1837c9f0a6Schristos * Align destination to word boundary. 1937c9f0a6Schristos * Consider unrolling loop? 2037c9f0a6Schristos */ 2137c9f0a6Schristos.Lscan: 2237c9f0a6Schristos.Lscan_align: 2337c9f0a6Schristos testb $3,%cl 2437c9f0a6Schristos je .Lscan_aligned 2537c9f0a6Schristos cmpb $0,(%ecx) 2637c9f0a6Schristos je .Lcopy 2737c9f0a6Schristos incl %ecx 2837c9f0a6Schristos jmp .Lscan_align 2937c9f0a6Schristos 3037c9f0a6Schristos _ALIGN_TEXT 3137c9f0a6Schristos.Lscan_aligned: 3237c9f0a6Schristos.Lscan_loop: 3337c9f0a6Schristos movl (%ecx),%ebx 3437c9f0a6Schristos addl $4,%ecx 3537c9f0a6Schristos leal -0x01010101(%ebx),%edx 3637c9f0a6Schristos testl $0x80808080,%edx 3737c9f0a6Schristos je .Lscan_loop 3837c9f0a6Schristos 3937c9f0a6Schristos /* 4037c9f0a6Schristos * In rare cases, the above loop may exit prematurely. We must 4137c9f0a6Schristos * return to the loop if none of the bytes in the word equal 0. 4237c9f0a6Schristos */ 4337c9f0a6Schristos 4437c9f0a6Schristos /* 4537c9f0a6Schristos * The optimal code for determining whether each byte is zero 4637c9f0a6Schristos * differs by processor. This space-optimized code should be 4737c9f0a6Schristos * acceptable on all, especially since we don't expect it to 4837c9f0a6Schristos * be run frequently, 4937c9f0a6Schristos */ 5037c9f0a6Schristos 5137c9f0a6Schristos testb %bl,%bl /* 1st byte == 0? */ 5237c9f0a6Schristos jne 1f 5337c9f0a6Schristos subl $4,%ecx 5437c9f0a6Schristos jmp .Lcopy 5537c9f0a6Schristos 5637c9f0a6Schristos1: testb %bh,%bh /* 2nd byte == 0? */ 5737c9f0a6Schristos jne 1f 5837c9f0a6Schristos subl $3,%ecx 5937c9f0a6Schristos jmp .Lcopy 6037c9f0a6Schristos 6137c9f0a6Schristos1: shrl $16,%ebx 6237c9f0a6Schristos testb %bl,%bl /* 3rd byte == 0? */ 6337c9f0a6Schristos jne 1f 6437c9f0a6Schristos subl $2,%ecx 6537c9f0a6Schristos jmp .Lcopy 6637c9f0a6Schristos 6737c9f0a6Schristos1: testb %bh,%bh /* 4th byte == 0? */ 6837c9f0a6Schristos jne .Lscan_loop 6937c9f0a6Schristos subl $1,%ecx 7037c9f0a6Schristos 7137c9f0a6Schristos /* 7237c9f0a6Schristos * Align source to a word boundary. 7337c9f0a6Schristos * Consider unrolling loop? 7437c9f0a6Schristos */ 7537c9f0a6Schristos.Lcopy: 7637c9f0a6Schristos.Lcopy_align: 7737c9f0a6Schristos testl $3,%eax 7837c9f0a6Schristos je .Lcopy_aligned 7937c9f0a6Schristos movb (%eax),%bl 8037c9f0a6Schristos incl %eax 8137c9f0a6Schristos movb %bl,(%ecx) 8237c9f0a6Schristos incl %ecx 8337c9f0a6Schristos testb %bl,%bl 8437c9f0a6Schristos jne .Lcopy_align 8537c9f0a6Schristos jmp .Ldone 8637c9f0a6Schristos 8737c9f0a6Schristos _ALIGN_TEXT 8837c9f0a6Schristos.Lcopy_loop: 8937c9f0a6Schristos movl %ebx,(%ecx) 9037c9f0a6Schristos addl $4,%ecx 9137c9f0a6Schristos.Lcopy_aligned: 9237c9f0a6Schristos movl (%eax),%ebx 9337c9f0a6Schristos addl $4,%eax 9437c9f0a6Schristos leal -0x01010101(%ebx),%edx 9537c9f0a6Schristos testl $0x80808080,%edx 9637c9f0a6Schristos je .Lcopy_loop 9737c9f0a6Schristos 9837c9f0a6Schristos /* 9937c9f0a6Schristos * In rare cases, the above loop may exit prematurely. We must 10037c9f0a6Schristos * return to the loop if none of the bytes in the word equal 0. 10137c9f0a6Schristos */ 10237c9f0a6Schristos 10337c9f0a6Schristos movb %bl,(%ecx) 10437c9f0a6Schristos incl %ecx 10537c9f0a6Schristos testb %bl,%bl 10637c9f0a6Schristos je .Ldone 10737c9f0a6Schristos 10837c9f0a6Schristos movb %bh,(%ecx) 10937c9f0a6Schristos incl %ecx 11037c9f0a6Schristos testb %bh,%bh 11137c9f0a6Schristos je .Ldone 11237c9f0a6Schristos 11337c9f0a6Schristos shrl $16,%ebx 11437c9f0a6Schristos movb %bl,(%ecx) 11537c9f0a6Schristos incl %ecx 11637c9f0a6Schristos testb %bl,%bl 11737c9f0a6Schristos je .Ldone 11837c9f0a6Schristos 11937c9f0a6Schristos movb %bh,(%ecx) 12037c9f0a6Schristos incl %ecx 12137c9f0a6Schristos testb %bh,%bh 12237c9f0a6Schristos jne .Lcopy_aligned 12337c9f0a6Schristos 12437c9f0a6Schristos.Ldone: 12537c9f0a6Schristos movl 8(%esp),%eax 12637c9f0a6Schristos popl %ebx 12737c9f0a6Schristos ret 128*2c56941eSjakllschEND(strcat) 129