xref: /openbsd-src/sys/lib/libkern/arch/amd64/strrchr.S (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1/*	$OpenBSD: strrchr.S,v 1.3 2014/12/09 15:13:57 reyk Exp $	*/
2/*	$NetBSD: strrchr.S,v 1.3 2014/03/22 19:16:34 jakllsch Exp $	*/
3
4/*
5 * Written by J.T. Conklin <jtc@acorntoolworks.com>
6 * Public domain.
7 */
8
9#include <machine/asm.h>
10
11STRONG_ALIAS(rindex, strrchr)
12
13ENTRY(strrchr)
14	movzbq	%sil,%rcx
15
16	/* zero return value */
17	xorq	%rax,%rax
18
19	/*
20	 * Align to word boundary.
21	 * Consider unrolling loop?
22	 */
23.Lalign:
24	testb	$7,%dil
25	je	.Lword_aligned
26	movb	(%rdi),%dl
27	cmpb	%cl,%dl
28	cmoveq	%rdi,%rax
29	incq	%rdi
30	testb	%dl,%dl
31	jne	.Lalign
32	jmp	.Ldone
33
34.Lword_aligned:
35	/* copy char to all bytes in word */
36	movb	%cl,%ch
37	movq	%rcx,%rdx
38	salq	$16,%rcx
39	orq	%rdx,%rcx
40	movq	%rcx,%rdx
41	salq	$32,%rcx
42	orq	%rdx,%rcx
43
44	movabsq	$0x0101010101010101,%r8
45	movabsq	$0x8080808080808080,%r9
46
47	/* Check whether any byte in the word is equal to ch or 0. */
48	_ALIGN_TEXT
49.Lloop:
50	movq	(%rdi),%rdx
51	addq	$8,%rdi
52	movq	%rdx,%rsi
53	subq	%r8,%rdx
54	xorq	%rcx,%rsi
55	subq	%r8,%rsi
56	orq	%rsi,%rdx
57	testq	%r9,%rdx
58	je	.Lloop
59
60	/*
61	 * In rare cases, the above loop may exit prematurely. We must
62	 * return to the loop if none of the bytes in the word match
63	 * ch or are equal to 0.
64	 */
65
66	movb	-8(%rdi),%dl
67	cmpb	%cl,%dl		/* 1st byte == ch? */
68	jne	1f
69	leaq	-8(%rdi),%rax
701:	testb	%dl,%dl		/* 1st byte == 0? */
71	je	.Ldone
72
73	movb	-7(%rdi),%dl
74	cmpb	%cl,%dl		/* 2nd byte == ch? */
75	jne	1f
76	leaq	-7(%rdi),%rax
771:	testb	%dl,%dl		/* 2nd byte == 0? */
78	je	.Ldone
79
80	movb	-6(%rdi),%dl
81	cmpb	%cl,%dl		/* 3rd byte == ch? */
82	jne	1f
83	leaq	-6(%rdi),%rax
841:	testb	%dl,%dl		/* 3rd byte == 0? */
85	je	.Ldone
86
87	movb	-5(%rdi),%dl
88	cmpb	%cl,%dl		/* 4th byte == ch? */
89	jne	1f
90	leaq	-5(%rdi),%rax
911:	testb	%dl,%dl		/* 4th byte == 0? */
92	je	.Ldone
93
94	movb	-4(%rdi),%dl
95	cmpb	%cl,%dl		/* 5th byte == ch? */
96	jne	1f
97	leaq	-4(%rdi),%rax
981:	testb	%dl,%dl		/* 5th byte == 0? */
99	je	.Ldone
100
101	movb	-3(%rdi),%dl
102	cmpb	%cl,%dl		/* 6th byte == ch? */
103	jne	1f
104	leaq	-3(%rdi),%rax
1051:	testb	%dl,%dl		/* 6th byte == 0? */
106	je	.Ldone
107
108	movb	-2(%rdi),%dl
109	cmpb	%cl,%dl		/* 7th byte == ch? */
110	jne	1f
111	leaq	-2(%rdi),%rax
1121:	testb	%dl,%dl		/* 7th byte == 0? */
113	je	.Ldone
114
115	movb	-1(%rdi),%dl
116	cmpb	%cl,%dl		/* 8th byte == ch? */
117	jne	1f
118	leaq	-1(%rdi),%rax
1191:	testb	%dl,%dl		/* 8th byte == 0? */
120	jne	.Lloop
121
122.Ldone:
123	ret
124