xref: /minix3/common/lib/libc/arch/x86_64/string/strcpy.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1b6cbf720SGianluca Guida/*
2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com>
3b6cbf720SGianluca Guida * Public domain.
4b6cbf720SGianluca Guida */
5b6cbf720SGianluca Guida
6b6cbf720SGianluca Guida#include <machine/asm.h>
7b6cbf720SGianluca Guida
8b6cbf720SGianluca Guida#if defined(LIBC_SCCS)
9*0a6a1f1dSLionel Sambuc	RCSID("$NetBSD: strcpy.S,v 1.2 2014/03/22 19:16:34 jakllsch Exp $")
10b6cbf720SGianluca Guida#endif
11b6cbf720SGianluca Guida
12b6cbf720SGianluca Guida/*
13b6cbf720SGianluca Guida * This strcpy implementation copies a byte at a time until the
14b6cbf720SGianluca Guida * source pointer is aligned to a word boundary, it then copies by
15b6cbf720SGianluca Guida * words until it finds a word containing a zero byte, and finally
16b6cbf720SGianluca Guida * copies by bytes until the end of the string is reached.
17b6cbf720SGianluca Guida *
18b6cbf720SGianluca Guida * While this may result in unaligned stores if the source and
19b6cbf720SGianluca Guida * destination pointers are unaligned with respect to each other,
20b6cbf720SGianluca Guida * it is still faster than either byte copies or the overhead of
21b6cbf720SGianluca Guida * an implementation suitable for machines with strict alignment
22b6cbf720SGianluca Guida * requirements.
23b6cbf720SGianluca Guida */
24b6cbf720SGianluca Guida
25b6cbf720SGianluca GuidaENTRY(strcpy)
26b6cbf720SGianluca Guida	movq	%rdi,%rax
27b6cbf720SGianluca Guida	movabsq	$0x0101010101010101,%r8
28b6cbf720SGianluca Guida	movabsq	$0x8080808080808080,%r9
29b6cbf720SGianluca Guida
30b6cbf720SGianluca Guida	/*
31b6cbf720SGianluca Guida	 * Align source to a word boundary.
32b6cbf720SGianluca Guida	 * Consider unrolling loop?
33b6cbf720SGianluca Guida	 */
34b6cbf720SGianluca Guida	_ALIGN_TEXT
35b6cbf720SGianluca Guida.Lalign:
36b6cbf720SGianluca Guida	testb	$7,%sil
37b6cbf720SGianluca Guida	je	.Lword_aligned
38b6cbf720SGianluca Guida	movb	(%rsi),%dl
39b6cbf720SGianluca Guida	incq	%rsi
40b6cbf720SGianluca Guida	movb	%dl,(%rdi)
41b6cbf720SGianluca Guida	incq	%rdi
42b6cbf720SGianluca Guida	testb	%dl,%dl
43b6cbf720SGianluca Guida	jne	.Lalign
44b6cbf720SGianluca Guida	ret
45b6cbf720SGianluca Guida
46b6cbf720SGianluca Guida	_ALIGN_TEXT
47b6cbf720SGianluca Guida.Lloop:
48b6cbf720SGianluca Guida	movq	%rdx,(%rdi)
49b6cbf720SGianluca Guida	addq	$8,%rdi
50b6cbf720SGianluca Guida.Lword_aligned:
51b6cbf720SGianluca Guida	movq	(%rsi),%rdx
52b6cbf720SGianluca Guida	movq	%rdx,%rcx
53b6cbf720SGianluca Guida	addq	$8,%rsi
54b6cbf720SGianluca Guida	subq	%r8,%rcx
55b6cbf720SGianluca Guida	testq	%r9,%rcx
56b6cbf720SGianluca Guida	je	.Lloop
57b6cbf720SGianluca Guida
58b6cbf720SGianluca Guida	/*
59b6cbf720SGianluca Guida	 * In rare cases, the above loop may exit prematurely. We must
60b6cbf720SGianluca Guida	 * return to the loop if none of the bytes in the word equal 0.
61b6cbf720SGianluca Guida	 */
62b6cbf720SGianluca Guida
63b6cbf720SGianluca Guida	movb	%dl,(%rdi)
64b6cbf720SGianluca Guida	incq	%rdi
65b6cbf720SGianluca Guida	testb	%dl,%dl		/* 1st byte == 0? */
66b6cbf720SGianluca Guida	je	.Ldone
67b6cbf720SGianluca Guida
68b6cbf720SGianluca Guida	shrq	$8,%rdx
69b6cbf720SGianluca Guida	movb	%dl,(%rdi)
70b6cbf720SGianluca Guida	incq	%rdi
71b6cbf720SGianluca Guida	testb	%dl,%dl		/* 2nd byte == 0? */
72b6cbf720SGianluca Guida	je	.Ldone
73b6cbf720SGianluca Guida
74b6cbf720SGianluca Guida	shrq	$8,%rdx
75b6cbf720SGianluca Guida	movb	%dl,(%rdi)
76b6cbf720SGianluca Guida	incq	%rdi
77b6cbf720SGianluca Guida	testb	%dl,%dl		/* 3rd byte == 0? */
78b6cbf720SGianluca Guida	je	.Ldone
79b6cbf720SGianluca Guida
80b6cbf720SGianluca Guida	shrq	$8,%rdx
81b6cbf720SGianluca Guida	movb	%dl,(%rdi)
82b6cbf720SGianluca Guida	incq	%rdi
83b6cbf720SGianluca Guida	testb	%dl,%dl		/* 4th byte == 0? */
84b6cbf720SGianluca Guida	je	.Ldone
85b6cbf720SGianluca Guida
86b6cbf720SGianluca Guida	shrq	$8,%rdx
87b6cbf720SGianluca Guida	movb	%dl,(%rdi)
88b6cbf720SGianluca Guida	incq	%rdi
89b6cbf720SGianluca Guida	testb	%dl,%dl		/* 5th byte == 0? */
90b6cbf720SGianluca Guida	je	.Ldone
91b6cbf720SGianluca Guida
92b6cbf720SGianluca Guida	shrq	$8,%rdx
93b6cbf720SGianluca Guida	movb	%dl,(%rdi)
94b6cbf720SGianluca Guida	incq	%rdi
95b6cbf720SGianluca Guida	testb	%dl,%dl		/* 6th byte == 0? */
96b6cbf720SGianluca Guida	je	.Ldone
97b6cbf720SGianluca Guida
98b6cbf720SGianluca Guida	shrq	$8,%rdx
99b6cbf720SGianluca Guida	movb	%dl,(%rdi)
100b6cbf720SGianluca Guida	incq	%rdi
101b6cbf720SGianluca Guida	testb	%dl,%dl		/* 7th byte == 0? */
102b6cbf720SGianluca Guida	je	.Ldone
103b6cbf720SGianluca Guida
104b6cbf720SGianluca Guida	shrq	$8,%rdx
105b6cbf720SGianluca Guida	movb	%dl,(%rdi)
106b6cbf720SGianluca Guida	incq	%rdi
107b6cbf720SGianluca Guida	testb	%dl,%dl		/* 8th byte == 0? */
108b6cbf720SGianluca Guida	jne	.Lword_aligned
109b6cbf720SGianluca Guida
110b6cbf720SGianluca Guida.Ldone:
111b6cbf720SGianluca Guida	ret
112*0a6a1f1dSLionel SambucEND(strcpy)
113