xref: /minix3/common/lib/libc/arch/x86_64/string/strcat.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1b6cbf720SGianluca Guida/*
2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com>
3b6cbf720SGianluca Guida * Public domain.
4b6cbf720SGianluca Guida */
5b6cbf720SGianluca Guida
6b6cbf720SGianluca Guida#include <machine/asm.h>
7b6cbf720SGianluca Guida
8b6cbf720SGianluca Guida#if defined(LIBC_SCCS)
9*0a6a1f1dSLionel Sambuc	RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:16:34 jakllsch Exp $")
10b6cbf720SGianluca Guida#endif
11b6cbf720SGianluca Guida
12b6cbf720SGianluca GuidaENTRY(strcat)
13b6cbf720SGianluca Guida	movq	%rdi,%rax
14b6cbf720SGianluca Guida	movabsq	$0x0101010101010101,%r8
15b6cbf720SGianluca Guida	movabsq	$0x8080808080808080,%r9
16b6cbf720SGianluca Guida
17b6cbf720SGianluca Guida	/*
18b6cbf720SGianluca Guida	 * Align destination to word boundary.
19b6cbf720SGianluca Guida	 * Consider unrolling loop?
20b6cbf720SGianluca Guida	 */
21b6cbf720SGianluca Guida.Lscan:
22b6cbf720SGianluca Guida.Lscan_align:
23b6cbf720SGianluca Guida	testb	$7,%dil
24b6cbf720SGianluca Guida	je	.Lscan_aligned
25b6cbf720SGianluca Guida	cmpb	$0,(%rdi)
26b6cbf720SGianluca Guida	je	.Lcopy
27b6cbf720SGianluca Guida	incq	%rdi
28b6cbf720SGianluca Guida	jmp	.Lscan_align
29b6cbf720SGianluca Guida
30b6cbf720SGianluca Guida	_ALIGN_TEXT
31b6cbf720SGianluca Guida.Lscan_aligned:
32b6cbf720SGianluca Guida.Lscan_loop:
33b6cbf720SGianluca Guida	movq	(%rdi),%rdx
34b6cbf720SGianluca Guida	addq	$8,%rdi
35b6cbf720SGianluca Guida	subq	%r8,%rdx
36b6cbf720SGianluca Guida	testq	%r9,%rdx
37b6cbf720SGianluca Guida	je	.Lscan_loop
38b6cbf720SGianluca Guida
39b6cbf720SGianluca Guida	/*
40b6cbf720SGianluca Guida	 * In rare cases, the above loop may exit prematurely. We must
41b6cbf720SGianluca Guida	 * return to the loop if none of the bytes in the word equal 0.
42b6cbf720SGianluca Guida	 */
43b6cbf720SGianluca Guida
44b6cbf720SGianluca Guida	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
45b6cbf720SGianluca Guida	jne	1f
46b6cbf720SGianluca Guida	subq	$8,%rdi
47b6cbf720SGianluca Guida	jmp	.Lcopy
48b6cbf720SGianluca Guida
49b6cbf720SGianluca Guida1:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
50b6cbf720SGianluca Guida	jne	1f
51b6cbf720SGianluca Guida	subq	$7,%rdi
52b6cbf720SGianluca Guida	jmp	.Lcopy
53b6cbf720SGianluca Guida
54b6cbf720SGianluca Guida1:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
55b6cbf720SGianluca Guida	jne	1f
56b6cbf720SGianluca Guida	subq	$6,%rdi
57b6cbf720SGianluca Guida	jmp	.Lcopy
58b6cbf720SGianluca Guida
59b6cbf720SGianluca Guida1:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
60b6cbf720SGianluca Guida	jne	1f
61b6cbf720SGianluca Guida	subq	$5,%rdi
62b6cbf720SGianluca Guida	jmp	.Lcopy
63b6cbf720SGianluca Guida
64b6cbf720SGianluca Guida1:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
65b6cbf720SGianluca Guida	jne	1f
66b6cbf720SGianluca Guida	subq	$4,%rdi
67b6cbf720SGianluca Guida	jmp	.Lcopy
68b6cbf720SGianluca Guida
69b6cbf720SGianluca Guida1:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
70b6cbf720SGianluca Guida	jne	1f
71b6cbf720SGianluca Guida	subq	$3,%rdi
72b6cbf720SGianluca Guida	jmp	.Lcopy
73b6cbf720SGianluca Guida
74b6cbf720SGianluca Guida1:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
75b6cbf720SGianluca Guida	jne	1f
76b6cbf720SGianluca Guida	subq	$2,%rdi
77b6cbf720SGianluca Guida	jmp	.Lcopy
78b6cbf720SGianluca Guida
79b6cbf720SGianluca Guida1:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
80b6cbf720SGianluca Guida	jne	.Lscan_loop
81b6cbf720SGianluca Guida	subq	$1,%rdi
82b6cbf720SGianluca Guida
83b6cbf720SGianluca Guida	/*
84b6cbf720SGianluca Guida	 * Align source to a word boundary.
85b6cbf720SGianluca Guida	 * Consider unrolling loop?
86b6cbf720SGianluca Guida	 */
87b6cbf720SGianluca Guida.Lcopy:
88b6cbf720SGianluca Guida.Lcopy_align:
89b6cbf720SGianluca Guida	testb	$7,%sil
90b6cbf720SGianluca Guida	je	.Lcopy_aligned
91b6cbf720SGianluca Guida	movb	(%rsi),%dl
92b6cbf720SGianluca Guida	incq	%rsi
93b6cbf720SGianluca Guida	movb	%dl,(%rdi)
94b6cbf720SGianluca Guida	incq	%rdi
95b6cbf720SGianluca Guida	testb	%dl,%dl
96b6cbf720SGianluca Guida	jne	.Lcopy_align
97b6cbf720SGianluca Guida	ret
98b6cbf720SGianluca Guida
99b6cbf720SGianluca Guida	_ALIGN_TEXT
100b6cbf720SGianluca Guida.Lcopy_loop:
101b6cbf720SGianluca Guida	movq	%rdx,(%rdi)
102b6cbf720SGianluca Guida	addq	$8,%rdi
103b6cbf720SGianluca Guida.Lcopy_aligned:
104b6cbf720SGianluca Guida	movq	(%rsi),%rdx
105b6cbf720SGianluca Guida	movq	%rdx,%rcx
106b6cbf720SGianluca Guida	addq	$8,%rsi
107b6cbf720SGianluca Guida	subq	%r8,%rcx
108b6cbf720SGianluca Guida	testq	%r9,%rcx
109b6cbf720SGianluca Guida	je	.Lcopy_loop
110b6cbf720SGianluca Guida
111b6cbf720SGianluca Guida	/*
112b6cbf720SGianluca Guida	 * In rare cases, the above loop may exit prematurely. We must
113b6cbf720SGianluca Guida	 * return to the loop if none of the bytes in the word equal 0.
114b6cbf720SGianluca Guida	 */
115b6cbf720SGianluca Guida
116b6cbf720SGianluca Guida	movb	%dl,(%rdi)
117b6cbf720SGianluca Guida	incq	%rdi
118b6cbf720SGianluca Guida	testb	%dl,%dl		/* 1st byte == 0? */
119b6cbf720SGianluca Guida	je	.Ldone
120b6cbf720SGianluca Guida
121b6cbf720SGianluca Guida	shrq	$8,%rdx
122b6cbf720SGianluca Guida	movb	%dl,(%rdi)
123b6cbf720SGianluca Guida	incq	%rdi
124b6cbf720SGianluca Guida	testb	%dl,%dl		/* 2nd byte == 0? */
125b6cbf720SGianluca Guida	je	.Ldone
126b6cbf720SGianluca Guida
127b6cbf720SGianluca Guida	shrq	$8,%rdx
128b6cbf720SGianluca Guida	movb	%dl,(%rdi)
129b6cbf720SGianluca Guida	incq	%rdi
130b6cbf720SGianluca Guida	testb	%dl,%dl		/* 3rd byte == 0? */
131b6cbf720SGianluca Guida	je	.Ldone
132b6cbf720SGianluca Guida
133b6cbf720SGianluca Guida	shrq	$8,%rdx
134b6cbf720SGianluca Guida	movb	%dl,(%rdi)
135b6cbf720SGianluca Guida	incq	%rdi
136b6cbf720SGianluca Guida	testb	%dl,%dl		/* 4th byte == 0? */
137b6cbf720SGianluca Guida	je	.Ldone
138b6cbf720SGianluca Guida
139b6cbf720SGianluca Guida	shrq	$8,%rdx
140b6cbf720SGianluca Guida	movb	%dl,(%rdi)
141b6cbf720SGianluca Guida	incq	%rdi
142b6cbf720SGianluca Guida	testb	%dl,%dl		/* 5th byte == 0? */
143b6cbf720SGianluca Guida	je	.Ldone
144b6cbf720SGianluca Guida
145b6cbf720SGianluca Guida	shrq	$8,%rdx
146b6cbf720SGianluca Guida	movb	%dl,(%rdi)
147b6cbf720SGianluca Guida	incq	%rdi
148b6cbf720SGianluca Guida	testb	%dl,%dl		/* 6th byte == 0? */
149b6cbf720SGianluca Guida	je	.Ldone
150b6cbf720SGianluca Guida
151b6cbf720SGianluca Guida	shrq	$8,%rdx
152b6cbf720SGianluca Guida	movb	%dl,(%rdi)
153b6cbf720SGianluca Guida	incq	%rdi
154b6cbf720SGianluca Guida	testb	%dl,%dl		/* 7th byte == 0? */
155b6cbf720SGianluca Guida	je	.Ldone
156b6cbf720SGianluca Guida
157b6cbf720SGianluca Guida	shrq	$8,%rdx
158b6cbf720SGianluca Guida	movb	%dl,(%rdi)
159b6cbf720SGianluca Guida	incq	%rdi
160b6cbf720SGianluca Guida	testb	%dl,%dl		/* 8th byte == 0? */
161b6cbf720SGianluca Guida	jne	.Lcopy_aligned
162b6cbf720SGianluca Guida
163b6cbf720SGianluca Guida.Ldone:
164b6cbf720SGianluca Guida	ret
165*0a6a1f1dSLionel SambucEND(strcat)
166