xref: /netbsd-src/common/lib/libc/arch/i386/string/memchr.S (revision 2c56941e163201dcb781df7fdeec6bd093647c91)
137c9f0a6Schristos/*
237c9f0a6Schristos * Written by J.T. Conklin <jtc@acorntoolworks.com>
337c9f0a6Schristos * Public domain.
437c9f0a6Schristos */
537c9f0a6Schristos
637c9f0a6Schristos#include <machine/asm.h>
737c9f0a6Schristos
837c9f0a6Schristos#if defined(LIBC_SCCS)
9*2c56941eSjakllsch	RCSID("$NetBSD: memchr.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
1037c9f0a6Schristos#endif
1137c9f0a6Schristos
1237c9f0a6SchristosENTRY(memchr)
1337c9f0a6Schristos	pushl	%esi
1437c9f0a6Schristos	movl	8(%esp),%eax
1537c9f0a6Schristos	movzbl	12(%esp),%ecx
1637c9f0a6Schristos	movl	16(%esp),%esi
1737c9f0a6Schristos
1837c9f0a6Schristos	/*
1937c9f0a6Schristos	 * Align to word boundary.
2037c9f0a6Schristos	 * Consider unrolling loop?
2137c9f0a6Schristos	 */
2237c9f0a6Schristos	testl	%esi,%esi	/* nbytes == 0? */
2337c9f0a6Schristos	je	.Lzero
2437c9f0a6Schristos.Lalign:
2537c9f0a6Schristos	testb	$3,%al
2637c9f0a6Schristos	je	.Lword_aligned
2737c9f0a6Schristos	cmpb	(%eax),%cl
2837c9f0a6Schristos	je	.Ldone
2937c9f0a6Schristos	incl	%eax
3037c9f0a6Schristos	decl	%esi
3137c9f0a6Schristos	jnz	.Lalign
3237c9f0a6Schristos	jmp	.Lzero
3337c9f0a6Schristos
3437c9f0a6Schristos.Lword_aligned:
3537c9f0a6Schristos	/* copy char to all bytes in word */
3637c9f0a6Schristos	movb	%cl,%ch
3737c9f0a6Schristos	movl	%ecx,%edx
3837c9f0a6Schristos	sall	$16,%ecx
3937c9f0a6Schristos	orl	%edx,%ecx
4037c9f0a6Schristos
4137c9f0a6Schristos	_ALIGN_TEXT
4237c9f0a6Schristos.Lloop:
4337c9f0a6Schristos	cmpl	$3,%esi		/* nbytes > 4 */
4437c9f0a6Schristos	jbe	.Lbyte
4537c9f0a6Schristos	movl	(%eax),%edx
4637c9f0a6Schristos	addl	$4,%eax
4737c9f0a6Schristos	xorl	%ecx,%edx
4837c9f0a6Schristos	subl	$4,%esi
4937c9f0a6Schristos	subl	$0x01010101,%edx
5037c9f0a6Schristos	testl	$0x80808080,%edx
5137c9f0a6Schristos	je	.Lloop
5237c9f0a6Schristos
5337c9f0a6Schristos	/*
5437c9f0a6Schristos	 * In rare cases, the above loop may exit prematurely. We must
5537c9f0a6Schristos	 * return to the loop if none of the bytes in the word are
5637c9f0a6Schristos	 * equal to ch.
5737c9f0a6Schristos	 */
5837c9f0a6Schristos
5937c9f0a6Schristos	/*
6037c9f0a6Schristos	 * High load-use latency on the Athlon leads to significant
6137c9f0a6Schristos	 * stalls, so we preload the next char as soon as possible
6237c9f0a6Schristos	 * instead of using cmp mem8, reg8.
6337c9f0a6Schristos	 *
6437c9f0a6Schristos	 * Alignment here avoids a stall on the Athlon, even though
6537c9f0a6Schristos	 * it's not a branch target.
6637c9f0a6Schristos	 */
6737c9f0a6Schristos	_ALIGN_TEXT
6837c9f0a6Schristos	cmpb	-4(%eax),%cl	/* 1st byte == ch? */
6937c9f0a6Schristos	movb	-3(%eax),%dl
7037c9f0a6Schristos	jne	1f
7137c9f0a6Schristos	subl	$4,%eax
7237c9f0a6Schristos	jmp	.Ldone
7337c9f0a6Schristos
7437c9f0a6Schristos	_ALIGN_TEXT
7537c9f0a6Schristos1:	cmpb	%dl,%cl		/* 2nd byte == ch? */
7637c9f0a6Schristos	movb	-2(%eax),%dl
7737c9f0a6Schristos	jne	1f
7837c9f0a6Schristos	subl	$3,%eax
7937c9f0a6Schristos	jmp	.Ldone
8037c9f0a6Schristos
8137c9f0a6Schristos	_ALIGN_TEXT
8237c9f0a6Schristos1:	cmpb	%dl,%cl		/* 3rd byte == ch? */
8337c9f0a6Schristos	movb	-1(%eax),%dl
8437c9f0a6Schristos	jne	1f
8537c9f0a6Schristos	subl	$2,%eax
8637c9f0a6Schristos	jmp	.Ldone
8737c9f0a6Schristos
8837c9f0a6Schristos	_ALIGN_TEXT
8937c9f0a6Schristos1:	cmpb	%dl,%cl		/* 4th byte == ch? */
9037c9f0a6Schristos	jne	.Lloop
9137c9f0a6Schristos	decl	%eax
9237c9f0a6Schristos	jmp	.Ldone
9337c9f0a6Schristos
9437c9f0a6Schristos.Lbyte:
9537c9f0a6Schristos	testl	%esi,%esi
9637c9f0a6Schristos	je	.Lzero
9737c9f0a6Schristos.Lbyte_loop:
9837c9f0a6Schristos	cmpb	(%eax),%cl
9937c9f0a6Schristos	je	.Ldone
10037c9f0a6Schristos	incl	%eax
10137c9f0a6Schristos	decl	%esi
10237c9f0a6Schristos	jnz	.Lbyte_loop
10337c9f0a6Schristos
10437c9f0a6Schristos.Lzero:
10537c9f0a6Schristos	xorl	%eax,%eax
10637c9f0a6Schristos
10737c9f0a6Schristos.Ldone:
10837c9f0a6Schristos	popl	%esi
10937c9f0a6Schristos	ret
110*2c56941eSjakllschEND(memchr)
111