xref: /netbsd-src/common/lib/libc/arch/i386/string/strchr.S (revision d48f14661dda8638fee055ba15d35bdfb29b9fa8)
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9	RCSID("$NetBSD: strchr.S,v 1.1 2005/12/20 19:28:49 christos Exp $")
10#endif
11
12#ifdef INDEX
13ENTRY(index)
14#else
15ENTRY(strchr)
16#endif
17	pushl	%esi
18	pushl	%ebx
19	movl	12(%esp),%eax
20	movzbl	16(%esp),%ecx
21
22	/*
23	 * Align to word boundary.
24	 * Consider unrolling loop?
25	 */
26.Lalign:
27	testb	$3,%al
28	je	.Lword_aligned
29	movb	(%eax),%bl
30	cmpb	%cl,%bl
31	je	.Ldone
32	testb	%bl,%bl
33	je	.Lzero
34	incl	%eax
35	jmp	.Lalign
36
37.Lword_aligned:
38	/* copy char to all bytes in word */
39	movb	%cl,%ch
40	movl	%ecx,%edx
41	sall	$16,%ecx
42	orl	%edx,%ecx
43
44	/* Check whether any byte in the word is equal to ch or 0. */
45	_ALIGN_TEXT
46.Lloop:
47	movl	(%eax),%ebx
48	addl	$4,%eax
49	movl	%ebx,%esi
50	leal	-0x01010101(%ebx),%edx
51	xorl	%ecx,%esi
52	subl	$0x01010101,%esi
53	orl	%esi,%edx
54	testl	$0x80808080,%edx
55	je	.Lloop
56
57	/*
58	 * In rare cases, the above loop may exit prematurely. We must
59	 * return to the loop if none of the bytes in the word match
60	 * ch or are equal to 0.
61	 */
62
63	/*
64	 * Alignment here avoids a stall on the Athlon, even though
65	 * it's not a branch target.
66	 */
67
68	_ALIGN_TEXT
69	cmpb	%cl,%bl		/* 1st byte == ch? */
70	jne	1f
71	subl	$4,%eax
72	jmp	.Ldone
731:	testb	%bl,%bl		/* 1st byte == 0? */
74	je	.Lzero
75
76	cmpb	%cl,%bh		/* 2nd byte == ch? */
77	jne	1f
78	subl	$3,%eax
79	jmp	.Ldone
801:	testb	%bh,%bh		/* 2nd byte == 0? */
81	je	.Lzero
82
83	shrl	$16,%ebx
84	cmpb	%cl,%bl		/* 3rd byte == ch? */
85	jne	1f
86	subl	$2,%eax
87	jmp	.Ldone
881:	testb	%bl,%bl		/* 3rd byte == 0? */
89	je	.Lzero
90
91	cmpb	%cl,%bh		/* 4th byte == ch? */
92	jne	1f
93	decl	%eax
94	jmp	.Ldone
951:	testb	%bh,%bh		/* 4th byte == 0? */
96	jne	.Lloop
97
98.Lzero:
99	/* If a ch wasn't found, return 0. */
100	xorl	%eax,%eax
101
102.Ldone:
103	popl	%ebx
104	popl	%esi
105	ret
106