xref: /netbsd-src/common/lib/libc/arch/x86_64/string/memchr.S (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9	RCSID("$NetBSD: memchr.S,v 1.1 2005/12/20 19:28:51 christos Exp $")
10#endif
11
12ENTRY(memchr)
13	movzbq	%sil,%rcx
14
15	/*
16	 * Align to word boundary.
17	 * Consider unrolling loop?
18	 */
19	testq	%rdx,%rdx	/* nbytes == 0? */
20	je	.Lzero
21.Lalign:
22	testb	$7,%dil
23	je	.Lword_aligned
24	movq	%rdi,%rax
25	cmpb	(%rdi),%cl
26	je	.Ldone
27	incq	%rdi
28	decq	%rdx
29	jnz	.Lalign
30	jmp	.Lzero
31
32.Lword_aligned:
33	/* copy char to all bytes in word */
34	movb	%cl,%ch
35	movq	%rcx,%rsi
36	salq	$16,%rcx
37	orq	%rsi,%rcx
38	movq	%rcx,%rsi
39	salq	$32,%rcx
40	orq	%rsi,%rcx
41
42	movabsq	$0x0101010101010101,%r8
43	movabsq	$0x8080808080808080,%r9
44
45	_ALIGN_TEXT
46.Lloop:
47	cmpq	$7,%rdx		/* nbytes > 8 */
48	jbe	.Lbyte
49	movq	(%rdi),%rsi
50	addq	$8,%rdi
51	xorq	%rcx,%rsi
52	subq	$8,%rdx
53	subq	%r8,%rsi
54	testq	%r9,%rsi
55	je	.Lloop
56
57	/*
58	 * In rare cases, the above loop may exit prematurely. We must
59	 * return to the loop if none of the bytes in the word are
60	 * equal to ch.
61	 */
62
63	leaq	-8(%rdi),%rax
64	cmpb	-8(%rdi),%cl	/* 1st byte == ch? */
65	je	.Ldone
66
67	leaq	-7(%rdi),%rax
68	cmpb	-7(%rdi),%cl	/* 2nd byte == ch? */
69	je	.Ldone
70
71	leaq	-6(%rdi),%rax
72	cmpb	-6(%rdi),%cl	/* 3rd byte == ch? */
73	je	.Ldone
74
75	leaq	-5(%rdi),%rax
76	cmpb	-5(%rdi),%cl	/* 4th byte == ch? */
77	je	.Ldone
78
79	leaq	-4(%rdi),%rax
80	cmpb	-4(%rdi),%cl	/* 5th byte == ch? */
81	je	.Ldone
82
83	leaq	-3(%rdi),%rax
84	cmpb	-3(%rdi),%cl	/* 6th byte == ch? */
85	je	.Ldone
86
87	leaq	-2(%rdi),%rax
88	cmpb	-2(%rdi),%cl	/* 7th byte == ch? */
89	je	.Ldone
90
91	leaq	-1(%rdi),%rax
92	cmpb	-1(%rdi),%cl	/* 7th byte == ch? */
93	jne	.Lloop
94	ret
95
96.Lbyte:
97	testq	%rdx,%rdx
98	je	.Lzero
99.Lbyte_loop:
100	movq	%rdi,%rax
101	cmpb	(%rdi),%cl
102	je	.Ldone
103	incq	%rdi
104	decq	%rdx
105	jnz	.Lbyte_loop
106
107.Lzero:
108	xorq	%rax,%rax
109
110.Ldone:
111	ret
112