xref: /dflybsd-src/lib/libc/x86_64/string/strcat.S (revision 8b927cb7b72266b97393ee565d882d7fddfa4375)
1b2b3ffcdSSimon Schubert/*
2b2b3ffcdSSimon Schubert * Written by J.T. Conklin <jtc@acorntoolworks.com>
3b2b3ffcdSSimon Schubert * Public domain.
4b2b3ffcdSSimon Schubert *
5b2b3ffcdSSimon Schubert * $NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $
6b2b3ffcdSSimon Schubert * $FreeBSD: src/lib/libc/amd64/string/strcat.S,v 1.2 2008/11/02 01:10:54 peter Exp $
7b2b3ffcdSSimon Schubert */
8b2b3ffcdSSimon Schubert
9b2b3ffcdSSimon Schubert#include <machine/asm.h>
10b2b3ffcdSSimon Schubert
11b2b3ffcdSSimon SchubertENTRY(strcat)
12b2b3ffcdSSimon Schubert	movq	%rdi,%rax
13b2b3ffcdSSimon Schubert	movabsq	$0x0101010101010101,%r8
14b2b3ffcdSSimon Schubert	movabsq	$0x8080808080808080,%r9
15b2b3ffcdSSimon Schubert
16b2b3ffcdSSimon Schubert	/*
17b2b3ffcdSSimon Schubert	 * Align destination to word boundary.
18b2b3ffcdSSimon Schubert	 * Consider unrolling loop?
19b2b3ffcdSSimon Schubert	 */
20b2b3ffcdSSimon Schubert.Lscan:
21b2b3ffcdSSimon Schubert.Lscan_align:
22b2b3ffcdSSimon Schubert	testb	$7,%dil
23b2b3ffcdSSimon Schubert	je	.Lscan_aligned
24b2b3ffcdSSimon Schubert	cmpb	$0,(%rdi)
25b2b3ffcdSSimon Schubert	je	.Lcopy
26b2b3ffcdSSimon Schubert	incq	%rdi
27b2b3ffcdSSimon Schubert	jmp	.Lscan_align
28b2b3ffcdSSimon Schubert
29b2b3ffcdSSimon Schubert	.align	4
30b2b3ffcdSSimon Schubert.Lscan_aligned:
31b2b3ffcdSSimon Schubert.Lscan_loop:
32b2b3ffcdSSimon Schubert	movq	(%rdi),%rdx
33b2b3ffcdSSimon Schubert	addq	$8,%rdi
34b2b3ffcdSSimon Schubert	subq	%r8,%rdx
35b2b3ffcdSSimon Schubert	testq	%r9,%rdx
36b2b3ffcdSSimon Schubert	je	.Lscan_loop
37b2b3ffcdSSimon Schubert
38b2b3ffcdSSimon Schubert	/*
39b2b3ffcdSSimon Schubert	 * In rare cases, the above loop may exit prematurely. We must
40b2b3ffcdSSimon Schubert	 * return to the loop if none of the bytes in the word equal 0.
41b2b3ffcdSSimon Schubert	 */
42b2b3ffcdSSimon Schubert
43b2b3ffcdSSimon Schubert	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
44b2b3ffcdSSimon Schubert	jne	1f
45b2b3ffcdSSimon Schubert	subq	$8,%rdi
46b2b3ffcdSSimon Schubert	jmp	.Lcopy
47b2b3ffcdSSimon Schubert
48b2b3ffcdSSimon Schubert1:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
49b2b3ffcdSSimon Schubert	jne	1f
50b2b3ffcdSSimon Schubert	subq	$7,%rdi
51b2b3ffcdSSimon Schubert	jmp	.Lcopy
52b2b3ffcdSSimon Schubert
53b2b3ffcdSSimon Schubert1:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
54b2b3ffcdSSimon Schubert	jne	1f
55b2b3ffcdSSimon Schubert	subq	$6,%rdi
56b2b3ffcdSSimon Schubert	jmp	.Lcopy
57b2b3ffcdSSimon Schubert
58b2b3ffcdSSimon Schubert1:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
59b2b3ffcdSSimon Schubert	jne	1f
60b2b3ffcdSSimon Schubert	subq	$5,%rdi
61b2b3ffcdSSimon Schubert	jmp	.Lcopy
62b2b3ffcdSSimon Schubert
63b2b3ffcdSSimon Schubert1:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
64b2b3ffcdSSimon Schubert	jne	1f
65b2b3ffcdSSimon Schubert	subq	$4,%rdi
66b2b3ffcdSSimon Schubert	jmp	.Lcopy
67b2b3ffcdSSimon Schubert
68b2b3ffcdSSimon Schubert1:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
69b2b3ffcdSSimon Schubert	jne	1f
70b2b3ffcdSSimon Schubert	subq	$3,%rdi
71b2b3ffcdSSimon Schubert	jmp	.Lcopy
72b2b3ffcdSSimon Schubert
73b2b3ffcdSSimon Schubert1:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
74b2b3ffcdSSimon Schubert	jne	1f
75b2b3ffcdSSimon Schubert	subq	$2,%rdi
76b2b3ffcdSSimon Schubert	jmp	.Lcopy
77b2b3ffcdSSimon Schubert
78b2b3ffcdSSimon Schubert1:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
79b2b3ffcdSSimon Schubert	jne	.Lscan_loop
80b2b3ffcdSSimon Schubert	subq	$1,%rdi
81b2b3ffcdSSimon Schubert
82b2b3ffcdSSimon Schubert	/*
83b2b3ffcdSSimon Schubert	 * Align source to a word boundary.
84b2b3ffcdSSimon Schubert	 * Consider unrolling loop?
85b2b3ffcdSSimon Schubert	 */
86b2b3ffcdSSimon Schubert.Lcopy:
87b2b3ffcdSSimon Schubert.Lcopy_align:
88b2b3ffcdSSimon Schubert	testb	$7,%sil
89b2b3ffcdSSimon Schubert	je	.Lcopy_aligned
90b2b3ffcdSSimon Schubert	movb	(%rsi),%dl
91b2b3ffcdSSimon Schubert	incq	%rsi
92b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
93b2b3ffcdSSimon Schubert	incq	%rdi
94b2b3ffcdSSimon Schubert	testb	%dl,%dl
95b2b3ffcdSSimon Schubert	jne	.Lcopy_align
96b2b3ffcdSSimon Schubert	ret
97b2b3ffcdSSimon Schubert
98b2b3ffcdSSimon Schubert	.align	4
99b2b3ffcdSSimon Schubert.Lcopy_loop:
100b2b3ffcdSSimon Schubert	movq	%rdx,(%rdi)
101b2b3ffcdSSimon Schubert	addq	$8,%rdi
102b2b3ffcdSSimon Schubert.Lcopy_aligned:
103b2b3ffcdSSimon Schubert	movq	(%rsi),%rdx
104b2b3ffcdSSimon Schubert	movq	%rdx,%rcx
105b2b3ffcdSSimon Schubert	addq	$8,%rsi
106b2b3ffcdSSimon Schubert	subq	%r8,%rcx
107b2b3ffcdSSimon Schubert	testq	%r9,%rcx
108b2b3ffcdSSimon Schubert	je	.Lcopy_loop
109b2b3ffcdSSimon Schubert
110b2b3ffcdSSimon Schubert	/*
111b2b3ffcdSSimon Schubert	 * In rare cases, the above loop may exit prematurely. We must
112b2b3ffcdSSimon Schubert	 * return to the loop if none of the bytes in the word equal 0.
113b2b3ffcdSSimon Schubert	 */
114b2b3ffcdSSimon Schubert
115b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
116b2b3ffcdSSimon Schubert	incq	%rdi
117b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 1st byte == 0? */
118b2b3ffcdSSimon Schubert	je	.Ldone
119b2b3ffcdSSimon Schubert
120b2b3ffcdSSimon Schubert	shrq	$8,%rdx
121b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
122b2b3ffcdSSimon Schubert	incq	%rdi
123b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 2nd byte == 0? */
124b2b3ffcdSSimon Schubert	je	.Ldone
125b2b3ffcdSSimon Schubert
126b2b3ffcdSSimon Schubert	shrq	$8,%rdx
127b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
128b2b3ffcdSSimon Schubert	incq	%rdi
129b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 3rd byte == 0? */
130b2b3ffcdSSimon Schubert	je	.Ldone
131b2b3ffcdSSimon Schubert
132b2b3ffcdSSimon Schubert	shrq	$8,%rdx
133b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
134b2b3ffcdSSimon Schubert	incq	%rdi
135b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 4th byte == 0? */
136b2b3ffcdSSimon Schubert	je	.Ldone
137b2b3ffcdSSimon Schubert
138b2b3ffcdSSimon Schubert	shrq	$8,%rdx
139b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
140b2b3ffcdSSimon Schubert	incq	%rdi
141b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 5th byte == 0? */
142b2b3ffcdSSimon Schubert	je	.Ldone
143b2b3ffcdSSimon Schubert
144b2b3ffcdSSimon Schubert	shrq	$8,%rdx
145b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
146b2b3ffcdSSimon Schubert	incq	%rdi
147b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 6th byte == 0? */
148b2b3ffcdSSimon Schubert	je	.Ldone
149b2b3ffcdSSimon Schubert
150b2b3ffcdSSimon Schubert	shrq	$8,%rdx
151b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
152b2b3ffcdSSimon Schubert	incq	%rdi
153b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 7th byte == 0? */
154b2b3ffcdSSimon Schubert	je	.Ldone
155b2b3ffcdSSimon Schubert
156b2b3ffcdSSimon Schubert	shrq	$8,%rdx
157b2b3ffcdSSimon Schubert	movb	%dl,(%rdi)
158b2b3ffcdSSimon Schubert	incq	%rdi
159b2b3ffcdSSimon Schubert	testb	%dl,%dl		/* 8th byte == 0? */
160b2b3ffcdSSimon Schubert	jne	.Lcopy_aligned
161b2b3ffcdSSimon Schubert
162b2b3ffcdSSimon Schubert.Ldone:
163b2b3ffcdSSimon Schubert	ret
164b2b3ffcdSSimon SchubertEND(strcat)
165*8b927cb7SJohn Marino
166*8b927cb7SJohn Marino	.section .note.GNU-stack,"",%progbits
167