xref: /netbsd-src/common/lib/libc/arch/i386/string/strcat.S (revision 2c56941e163201dcb781df7fdeec6bd093647c91)
137c9f0a6Schristos/*
237c9f0a6Schristos * Written by J.T. Conklin <jtc@acorntoolworks.com>
337c9f0a6Schristos * Public domain.
437c9f0a6Schristos */
537c9f0a6Schristos
637c9f0a6Schristos#include <machine/asm.h>
737c9f0a6Schristos
837c9f0a6Schristos#if defined(LIBC_SCCS)
9*2c56941eSjakllsch	RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
1037c9f0a6Schristos#endif
1137c9f0a6Schristos
1237c9f0a6SchristosENTRY(strcat)
1337c9f0a6Schristos	pushl	%ebx
1437c9f0a6Schristos	movl	8(%esp),%ecx
1537c9f0a6Schristos	movl	12(%esp),%eax
1637c9f0a6Schristos
1737c9f0a6Schristos	/*
1837c9f0a6Schristos	 * Align destination to word boundary.
1937c9f0a6Schristos	 * Consider unrolling loop?
2037c9f0a6Schristos	 */
2137c9f0a6Schristos.Lscan:
2237c9f0a6Schristos.Lscan_align:
2337c9f0a6Schristos	testb	$3,%cl
2437c9f0a6Schristos	je	.Lscan_aligned
2537c9f0a6Schristos	cmpb	$0,(%ecx)
2637c9f0a6Schristos	je	.Lcopy
2737c9f0a6Schristos	incl	%ecx
2837c9f0a6Schristos	jmp	.Lscan_align
2937c9f0a6Schristos
3037c9f0a6Schristos	_ALIGN_TEXT
3137c9f0a6Schristos.Lscan_aligned:
3237c9f0a6Schristos.Lscan_loop:
3337c9f0a6Schristos	movl	(%ecx),%ebx
3437c9f0a6Schristos	addl	$4,%ecx
3537c9f0a6Schristos	leal	-0x01010101(%ebx),%edx
3637c9f0a6Schristos	testl	$0x80808080,%edx
3737c9f0a6Schristos	je	.Lscan_loop
3837c9f0a6Schristos
3937c9f0a6Schristos	/*
4037c9f0a6Schristos	 * In rare cases, the above loop may exit prematurely. We must
4137c9f0a6Schristos	 * return to the loop if none of the bytes in the word equal 0.
4237c9f0a6Schristos	 */
4337c9f0a6Schristos
4437c9f0a6Schristos	/*
4537c9f0a6Schristos	 * The optimal code for determining whether each byte is zero
4637c9f0a6Schristos	 * differs by processor.  This space-optimized code should be
4737c9f0a6Schristos	 * acceptable on all, especially since we don't expect it to
4837c9f0a6Schristos	 * be run frequently,
4937c9f0a6Schristos	 */
5037c9f0a6Schristos
5137c9f0a6Schristos	testb	%bl,%bl		/* 1st byte == 0? */
5237c9f0a6Schristos	jne	1f
5337c9f0a6Schristos	subl	$4,%ecx
5437c9f0a6Schristos	jmp	.Lcopy
5537c9f0a6Schristos
5637c9f0a6Schristos1:	testb	%bh,%bh		/* 2nd byte == 0? */
5737c9f0a6Schristos	jne	1f
5837c9f0a6Schristos	subl	$3,%ecx
5937c9f0a6Schristos	jmp	.Lcopy
6037c9f0a6Schristos
6137c9f0a6Schristos1:	shrl	$16,%ebx
6237c9f0a6Schristos	testb	%bl,%bl		/* 3rd byte == 0? */
6337c9f0a6Schristos	jne	1f
6437c9f0a6Schristos	subl	$2,%ecx
6537c9f0a6Schristos	jmp	.Lcopy
6637c9f0a6Schristos
6737c9f0a6Schristos1:	testb	%bh,%bh		/* 4th byte == 0? */
6837c9f0a6Schristos	jne	.Lscan_loop
6937c9f0a6Schristos	subl	$1,%ecx
7037c9f0a6Schristos
7137c9f0a6Schristos	/*
7237c9f0a6Schristos	 * Align source to a word boundary.
7337c9f0a6Schristos	 * Consider unrolling loop?
7437c9f0a6Schristos	 */
7537c9f0a6Schristos.Lcopy:
7637c9f0a6Schristos.Lcopy_align:
7737c9f0a6Schristos	testl	$3,%eax
7837c9f0a6Schristos	je	.Lcopy_aligned
7937c9f0a6Schristos	movb	(%eax),%bl
8037c9f0a6Schristos	incl	%eax
8137c9f0a6Schristos	movb	%bl,(%ecx)
8237c9f0a6Schristos	incl	%ecx
8337c9f0a6Schristos	testb	%bl,%bl
8437c9f0a6Schristos	jne	.Lcopy_align
8537c9f0a6Schristos	jmp	.Ldone
8637c9f0a6Schristos
8737c9f0a6Schristos	_ALIGN_TEXT
8837c9f0a6Schristos.Lcopy_loop:
8937c9f0a6Schristos	movl	%ebx,(%ecx)
9037c9f0a6Schristos	addl	$4,%ecx
9137c9f0a6Schristos.Lcopy_aligned:
9237c9f0a6Schristos	movl	(%eax),%ebx
9337c9f0a6Schristos	addl	$4,%eax
9437c9f0a6Schristos	leal	-0x01010101(%ebx),%edx
9537c9f0a6Schristos	testl	$0x80808080,%edx
9637c9f0a6Schristos	je	.Lcopy_loop
9737c9f0a6Schristos
9837c9f0a6Schristos	/*
9937c9f0a6Schristos	 * In rare cases, the above loop may exit prematurely. We must
10037c9f0a6Schristos	 * return to the loop if none of the bytes in the word equal 0.
10137c9f0a6Schristos	 */
10237c9f0a6Schristos
10337c9f0a6Schristos	movb	%bl,(%ecx)
10437c9f0a6Schristos	incl	%ecx
10537c9f0a6Schristos	testb	%bl,%bl
10637c9f0a6Schristos	je	.Ldone
10737c9f0a6Schristos
10837c9f0a6Schristos	movb	%bh,(%ecx)
10937c9f0a6Schristos	incl	%ecx
11037c9f0a6Schristos	testb	%bh,%bh
11137c9f0a6Schristos	je	.Ldone
11237c9f0a6Schristos
11337c9f0a6Schristos	shrl	$16,%ebx
11437c9f0a6Schristos	movb	%bl,(%ecx)
11537c9f0a6Schristos	incl	%ecx
11637c9f0a6Schristos	testb	%bl,%bl
11737c9f0a6Schristos	je	.Ldone
11837c9f0a6Schristos
11937c9f0a6Schristos	movb	%bh,(%ecx)
12037c9f0a6Schristos	incl	%ecx
12137c9f0a6Schristos	testb	%bh,%bh
12237c9f0a6Schristos	jne	.Lcopy_aligned
12337c9f0a6Schristos
12437c9f0a6Schristos.Ldone:
12537c9f0a6Schristos	movl	8(%esp),%eax
12637c9f0a6Schristos	popl	%ebx
12737c9f0a6Schristos	ret
128*2c56941eSjakllschEND(strcat)
129