xref: /netbsd-src/common/lib/libc/arch/x86_64/string/strcpy.S (revision 6ac3c1f4d733c326672516f410f023e80dd3f1bb)
137c9f0a6Schristos/*
237c9f0a6Schristos * Written by J.T. Conklin <jtc@acorntoolworks.com>
337c9f0a6Schristos * Public domain.
437c9f0a6Schristos */
537c9f0a6Schristos
637c9f0a6Schristos#include <machine/asm.h>
737c9f0a6Schristos
837c9f0a6Schristos#if defined(LIBC_SCCS)
9*6ac3c1f4Sjakllsch	RCSID("$NetBSD: strcpy.S,v 1.2 2014/03/22 19:16:34 jakllsch Exp $")
1037c9f0a6Schristos#endif
1137c9f0a6Schristos
1237c9f0a6Schristos/*
1337c9f0a6Schristos * This strcpy implementation copies a byte at a time until the
1437c9f0a6Schristos * source pointer is aligned to a word boundary, it then copies by
1537c9f0a6Schristos * words until it finds a word containing a zero byte, and finally
1637c9f0a6Schristos * copies by bytes until the end of the string is reached.
1737c9f0a6Schristos *
1837c9f0a6Schristos * While this may result in unaligned stores if the source and
1937c9f0a6Schristos * destination pointers are unaligned with respect to each other,
2037c9f0a6Schristos * it is still faster than either byte copies or the overhead of
2137c9f0a6Schristos * an implementation suitable for machines with strict alignment
2237c9f0a6Schristos * requirements.
2337c9f0a6Schristos */
2437c9f0a6Schristos
2537c9f0a6SchristosENTRY(strcpy)
2637c9f0a6Schristos	movq	%rdi,%rax
2737c9f0a6Schristos	movabsq	$0x0101010101010101,%r8
2837c9f0a6Schristos	movabsq	$0x8080808080808080,%r9
2937c9f0a6Schristos
3037c9f0a6Schristos	/*
3137c9f0a6Schristos	 * Align source to a word boundary.
3237c9f0a6Schristos	 * Consider unrolling loop?
3337c9f0a6Schristos	 */
3437c9f0a6Schristos	_ALIGN_TEXT
3537c9f0a6Schristos.Lalign:
3637c9f0a6Schristos	testb	$7,%sil
3737c9f0a6Schristos	je	.Lword_aligned
3837c9f0a6Schristos	movb	(%rsi),%dl
3937c9f0a6Schristos	incq	%rsi
4037c9f0a6Schristos	movb	%dl,(%rdi)
4137c9f0a6Schristos	incq	%rdi
4237c9f0a6Schristos	testb	%dl,%dl
4337c9f0a6Schristos	jne	.Lalign
4437c9f0a6Schristos	ret
4537c9f0a6Schristos
4637c9f0a6Schristos	_ALIGN_TEXT
4737c9f0a6Schristos.Lloop:
4837c9f0a6Schristos	movq	%rdx,(%rdi)
4937c9f0a6Schristos	addq	$8,%rdi
5037c9f0a6Schristos.Lword_aligned:
5137c9f0a6Schristos	movq	(%rsi),%rdx
5237c9f0a6Schristos	movq	%rdx,%rcx
5337c9f0a6Schristos	addq	$8,%rsi
5437c9f0a6Schristos	subq	%r8,%rcx
5537c9f0a6Schristos	testq	%r9,%rcx
5637c9f0a6Schristos	je	.Lloop
5737c9f0a6Schristos
5837c9f0a6Schristos	/*
5937c9f0a6Schristos	 * In rare cases, the above loop may exit prematurely. We must
6037c9f0a6Schristos	 * return to the loop if none of the bytes in the word equal 0.
6137c9f0a6Schristos	 */
6237c9f0a6Schristos
6337c9f0a6Schristos	movb	%dl,(%rdi)
6437c9f0a6Schristos	incq	%rdi
6537c9f0a6Schristos	testb	%dl,%dl		/* 1st byte == 0? */
6637c9f0a6Schristos	je	.Ldone
6737c9f0a6Schristos
6837c9f0a6Schristos	shrq	$8,%rdx
6937c9f0a6Schristos	movb	%dl,(%rdi)
7037c9f0a6Schristos	incq	%rdi
7137c9f0a6Schristos	testb	%dl,%dl		/* 2nd byte == 0? */
7237c9f0a6Schristos	je	.Ldone
7337c9f0a6Schristos
7437c9f0a6Schristos	shrq	$8,%rdx
7537c9f0a6Schristos	movb	%dl,(%rdi)
7637c9f0a6Schristos	incq	%rdi
7737c9f0a6Schristos	testb	%dl,%dl		/* 3rd byte == 0? */
7837c9f0a6Schristos	je	.Ldone
7937c9f0a6Schristos
8037c9f0a6Schristos	shrq	$8,%rdx
8137c9f0a6Schristos	movb	%dl,(%rdi)
8237c9f0a6Schristos	incq	%rdi
8337c9f0a6Schristos	testb	%dl,%dl		/* 4th byte == 0? */
8437c9f0a6Schristos	je	.Ldone
8537c9f0a6Schristos
8637c9f0a6Schristos	shrq	$8,%rdx
8737c9f0a6Schristos	movb	%dl,(%rdi)
8837c9f0a6Schristos	incq	%rdi
8937c9f0a6Schristos	testb	%dl,%dl		/* 5th byte == 0? */
9037c9f0a6Schristos	je	.Ldone
9137c9f0a6Schristos
9237c9f0a6Schristos	shrq	$8,%rdx
9337c9f0a6Schristos	movb	%dl,(%rdi)
9437c9f0a6Schristos	incq	%rdi
9537c9f0a6Schristos	testb	%dl,%dl		/* 6th byte == 0? */
9637c9f0a6Schristos	je	.Ldone
9737c9f0a6Schristos
9837c9f0a6Schristos	shrq	$8,%rdx
9937c9f0a6Schristos	movb	%dl,(%rdi)
10037c9f0a6Schristos	incq	%rdi
10137c9f0a6Schristos	testb	%dl,%dl		/* 7th byte == 0? */
10237c9f0a6Schristos	je	.Ldone
10337c9f0a6Schristos
10437c9f0a6Schristos	shrq	$8,%rdx
10537c9f0a6Schristos	movb	%dl,(%rdi)
10637c9f0a6Schristos	incq	%rdi
10737c9f0a6Schristos	testb	%dl,%dl		/* 8th byte == 0? */
10837c9f0a6Schristos	jne	.Lword_aligned
10937c9f0a6Schristos
11037c9f0a6Schristos.Ldone:
11137c9f0a6Schristos	ret
112*6ac3c1f4SjakllschEND(strcpy)
113