xref: /minix3/common/lib/libc/arch/i386/string/memchr.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1b6cbf720SGianluca Guida/*
2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com>
3b6cbf720SGianluca Guida * Public domain.
4b6cbf720SGianluca Guida */
5b6cbf720SGianluca Guida
6b6cbf720SGianluca Guida#include <machine/asm.h>
7b6cbf720SGianluca Guida
8b6cbf720SGianluca Guida#if defined(LIBC_SCCS)
9*0a6a1f1dSLionel Sambuc	RCSID("$NetBSD: memchr.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
10b6cbf720SGianluca Guida#endif
11b6cbf720SGianluca Guida
12b6cbf720SGianluca GuidaENTRY(memchr)
13b6cbf720SGianluca Guida	pushl	%esi
14b6cbf720SGianluca Guida	movl	8(%esp),%eax
15b6cbf720SGianluca Guida	movzbl	12(%esp),%ecx
16b6cbf720SGianluca Guida	movl	16(%esp),%esi
17b6cbf720SGianluca Guida
18b6cbf720SGianluca Guida	/*
19b6cbf720SGianluca Guida	 * Align to word boundary.
20b6cbf720SGianluca Guida	 * Consider unrolling loop?
21b6cbf720SGianluca Guida	 */
22b6cbf720SGianluca Guida	testl	%esi,%esi	/* nbytes == 0? */
23b6cbf720SGianluca Guida	je	.Lzero
24b6cbf720SGianluca Guida.Lalign:
25b6cbf720SGianluca Guida	testb	$3,%al
26b6cbf720SGianluca Guida	je	.Lword_aligned
27b6cbf720SGianluca Guida	cmpb	(%eax),%cl
28b6cbf720SGianluca Guida	je	.Ldone
29b6cbf720SGianluca Guida	incl	%eax
30b6cbf720SGianluca Guida	decl	%esi
31b6cbf720SGianluca Guida	jnz	.Lalign
32b6cbf720SGianluca Guida	jmp	.Lzero
33b6cbf720SGianluca Guida
34b6cbf720SGianluca Guida.Lword_aligned:
35b6cbf720SGianluca Guida	/* copy char to all bytes in word */
36b6cbf720SGianluca Guida	movb	%cl,%ch
37b6cbf720SGianluca Guida	movl	%ecx,%edx
38b6cbf720SGianluca Guida	sall	$16,%ecx
39b6cbf720SGianluca Guida	orl	%edx,%ecx
40b6cbf720SGianluca Guida
41b6cbf720SGianluca Guida	_ALIGN_TEXT
42b6cbf720SGianluca Guida.Lloop:
43b6cbf720SGianluca Guida	cmpl	$3,%esi		/* nbytes > 4 */
44b6cbf720SGianluca Guida	jbe	.Lbyte
45b6cbf720SGianluca Guida	movl	(%eax),%edx
46b6cbf720SGianluca Guida	addl	$4,%eax
47b6cbf720SGianluca Guida	xorl	%ecx,%edx
48b6cbf720SGianluca Guida	subl	$4,%esi
49b6cbf720SGianluca Guida	subl	$0x01010101,%edx
50b6cbf720SGianluca Guida	testl	$0x80808080,%edx
51b6cbf720SGianluca Guida	je	.Lloop
52b6cbf720SGianluca Guida
53b6cbf720SGianluca Guida	/*
54b6cbf720SGianluca Guida	 * In rare cases, the above loop may exit prematurely. We must
55b6cbf720SGianluca Guida	 * return to the loop if none of the bytes in the word are
56b6cbf720SGianluca Guida	 * equal to ch.
57b6cbf720SGianluca Guida	 */
58b6cbf720SGianluca Guida
59b6cbf720SGianluca Guida	/*
60b6cbf720SGianluca Guida	 * High load-use latency on the Athlon leads to significant
61b6cbf720SGianluca Guida	 * stalls, so we preload the next char as soon as possible
62b6cbf720SGianluca Guida	 * instead of using cmp mem8, reg8.
63b6cbf720SGianluca Guida	 *
64b6cbf720SGianluca Guida	 * Alignment here avoids a stall on the Athlon, even though
65b6cbf720SGianluca Guida	 * it's not a branch target.
66b6cbf720SGianluca Guida	 */
67b6cbf720SGianluca Guida	_ALIGN_TEXT
68b6cbf720SGianluca Guida	cmpb	-4(%eax),%cl	/* 1st byte == ch? */
69b6cbf720SGianluca Guida	movb	-3(%eax),%dl
70b6cbf720SGianluca Guida	jne	1f
71b6cbf720SGianluca Guida	subl	$4,%eax
72b6cbf720SGianluca Guida	jmp	.Ldone
73b6cbf720SGianluca Guida
74b6cbf720SGianluca Guida	_ALIGN_TEXT
75b6cbf720SGianluca Guida1:	cmpb	%dl,%cl		/* 2nd byte == ch? */
76b6cbf720SGianluca Guida	movb	-2(%eax),%dl
77b6cbf720SGianluca Guida	jne	1f
78b6cbf720SGianluca Guida	subl	$3,%eax
79b6cbf720SGianluca Guida	jmp	.Ldone
80b6cbf720SGianluca Guida
81b6cbf720SGianluca Guida	_ALIGN_TEXT
82b6cbf720SGianluca Guida1:	cmpb	%dl,%cl		/* 3rd byte == ch? */
83b6cbf720SGianluca Guida	movb	-1(%eax),%dl
84b6cbf720SGianluca Guida	jne	1f
85b6cbf720SGianluca Guida	subl	$2,%eax
86b6cbf720SGianluca Guida	jmp	.Ldone
87b6cbf720SGianluca Guida
88b6cbf720SGianluca Guida	_ALIGN_TEXT
89b6cbf720SGianluca Guida1:	cmpb	%dl,%cl		/* 4th byte == ch? */
90b6cbf720SGianluca Guida	jne	.Lloop
91b6cbf720SGianluca Guida	decl	%eax
92b6cbf720SGianluca Guida	jmp	.Ldone
93b6cbf720SGianluca Guida
94b6cbf720SGianluca Guida.Lbyte:
95b6cbf720SGianluca Guida	testl	%esi,%esi
96b6cbf720SGianluca Guida	je	.Lzero
97b6cbf720SGianluca Guida.Lbyte_loop:
98b6cbf720SGianluca Guida	cmpb	(%eax),%cl
99b6cbf720SGianluca Guida	je	.Ldone
100b6cbf720SGianluca Guida	incl	%eax
101b6cbf720SGianluca Guida	decl	%esi
102b6cbf720SGianluca Guida	jnz	.Lbyte_loop
103b6cbf720SGianluca Guida
104b6cbf720SGianluca Guida.Lzero:
105b6cbf720SGianluca Guida	xorl	%eax,%eax
106b6cbf720SGianluca Guida
107b6cbf720SGianluca Guida.Ldone:
108b6cbf720SGianluca Guida	popl	%esi
109b6cbf720SGianluca Guida	ret
110*0a6a1f1dSLionel SambucEND(memchr)
111