xref: /netbsd-src/common/lib/libc/arch/i386/string/strcpy.S (revision 2c56941e163201dcb781df7fdeec6bd093647c91)
137c9f0a6Schristos/*
237c9f0a6Schristos * Written by J.T. Conklin <jtc@acorntoolworks.com>
337c9f0a6Schristos * Public domain.
437c9f0a6Schristos */
537c9f0a6Schristos
637c9f0a6Schristos#include <machine/asm.h>
737c9f0a6Schristos
837c9f0a6Schristos#if defined(LIBC_SCCS)
9*2c56941eSjakllsch	RCSID("$NetBSD: strcpy.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
1037c9f0a6Schristos#endif
1137c9f0a6Schristos
1237c9f0a6Schristos/*
1337c9f0a6Schristos * This strcpy implementation copies a byte at a time until the
1437c9f0a6Schristos * source pointer is aligned to a word boundary, it then copies by
1537c9f0a6Schristos * words until it finds a word containing a zero byte, and finally
1637c9f0a6Schristos * copies by bytes until the end of the string is reached.
1737c9f0a6Schristos *
1837c9f0a6Schristos * While this may result in unaligned stores if the source and
1937c9f0a6Schristos * destination pointers are unaligned with respect to each other,
2037c9f0a6Schristos * it is still faster than either byte copies or the overhead of
2137c9f0a6Schristos * an implementation suitable for machines with strict alignment
2237c9f0a6Schristos * requirements.
2337c9f0a6Schristos */
2437c9f0a6Schristos
2537c9f0a6SchristosENTRY(strcpy)
2637c9f0a6Schristos	pushl	%ebx
2737c9f0a6Schristos	movl	8(%esp),%ecx
2837c9f0a6Schristos	movl	12(%esp),%eax
2937c9f0a6Schristos
3037c9f0a6Schristos	/*
3137c9f0a6Schristos	 * Align source to a word boundary.
3237c9f0a6Schristos	 * Consider unrolling loop?
3337c9f0a6Schristos	 */
3437c9f0a6Schristos	_ALIGN_TEXT
3537c9f0a6Schristos.Lalign:
3637c9f0a6Schristos	testl	$3,%eax
3737c9f0a6Schristos	je	.Lword_aligned
3837c9f0a6Schristos	movb	(%eax),%bl
3937c9f0a6Schristos	incl	%eax
4037c9f0a6Schristos	movb	%bl,(%ecx)
4137c9f0a6Schristos	incl	%ecx
4237c9f0a6Schristos	testb	%bl,%bl
4337c9f0a6Schristos	jne	.Lalign
4437c9f0a6Schristos	jmp	.Ldone
4537c9f0a6Schristos
4637c9f0a6Schristos	_ALIGN_TEXT
4737c9f0a6Schristos.Lloop:
4837c9f0a6Schristos	movl	%ebx,(%ecx)
4937c9f0a6Schristos	addl	$4,%ecx
5037c9f0a6Schristos.Lword_aligned:
5137c9f0a6Schristos	movl	(%eax),%ebx
5237c9f0a6Schristos	addl	$4,%eax
5337c9f0a6Schristos	leal	-0x01010101(%ebx),%edx
5437c9f0a6Schristos	testl	$0x80808080,%edx
5537c9f0a6Schristos	je	.Lloop
5637c9f0a6Schristos
5737c9f0a6Schristos	/*
5837c9f0a6Schristos	 * In rare cases, the above loop may exit prematurely. We must
5937c9f0a6Schristos	 * return to the loop if none of the bytes in the word equal 0.
6037c9f0a6Schristos	 */
6137c9f0a6Schristos
6237c9f0a6Schristos	movb	%bl,(%ecx)
6337c9f0a6Schristos	incl	%ecx
6437c9f0a6Schristos	testb	%bl,%bl
6537c9f0a6Schristos	je	.Ldone
6637c9f0a6Schristos
6737c9f0a6Schristos	movb	%bh,(%ecx)
6837c9f0a6Schristos	incl	%ecx
6937c9f0a6Schristos	testb	%bh,%bh
7037c9f0a6Schristos	je	.Ldone
7137c9f0a6Schristos
7237c9f0a6Schristos	shrl	$16,%ebx
7337c9f0a6Schristos	movb	%bl,(%ecx)
7437c9f0a6Schristos	incl	%ecx
7537c9f0a6Schristos	testb	%bl,%bl
7637c9f0a6Schristos	je	.Ldone
7737c9f0a6Schristos
7837c9f0a6Schristos	movb	%bh,(%ecx)
7937c9f0a6Schristos	incl	%ecx
8037c9f0a6Schristos	testb	%bh,%bh
8137c9f0a6Schristos	jne	.Lword_aligned
8237c9f0a6Schristos
8337c9f0a6Schristos.Ldone:
8437c9f0a6Schristos	movl	8(%esp),%eax
8537c9f0a6Schristos	popl	%ebx
8637c9f0a6Schristos	ret
87*2c56941eSjakllschEND(strcpy)
88