xref: /minix3/common/lib/libc/arch/i386/string/strcpy.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1b6cbf720SGianluca Guida/*
2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com>
3b6cbf720SGianluca Guida * Public domain.
4b6cbf720SGianluca Guida */
5b6cbf720SGianluca Guida
6b6cbf720SGianluca Guida#include <machine/asm.h>
7b6cbf720SGianluca Guida
8b6cbf720SGianluca Guida#if defined(LIBC_SCCS)
9*0a6a1f1dSLionel Sambuc	RCSID("$NetBSD: strcpy.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
10b6cbf720SGianluca Guida#endif
11b6cbf720SGianluca Guida
12b6cbf720SGianluca Guida/*
13b6cbf720SGianluca Guida * This strcpy implementation copies a byte at a time until the
14b6cbf720SGianluca Guida * source pointer is aligned to a word boundary, it then copies by
15b6cbf720SGianluca Guida * words until it finds a word containing a zero byte, and finally
16b6cbf720SGianluca Guida * copies by bytes until the end of the string is reached.
17b6cbf720SGianluca Guida *
18b6cbf720SGianluca Guida * While this may result in unaligned stores if the source and
19b6cbf720SGianluca Guida * destination pointers are unaligned with respect to each other,
20b6cbf720SGianluca Guida * it is still faster than either byte copies or the overhead of
21b6cbf720SGianluca Guida * an implementation suitable for machines with strict alignment
22b6cbf720SGianluca Guida * requirements.
23b6cbf720SGianluca Guida */
24b6cbf720SGianluca Guida
25b6cbf720SGianluca GuidaENTRY(strcpy)
26b6cbf720SGianluca Guida	pushl	%ebx
27b6cbf720SGianluca Guida	movl	8(%esp),%ecx
28b6cbf720SGianluca Guida	movl	12(%esp),%eax
29b6cbf720SGianluca Guida
30b6cbf720SGianluca Guida	/*
31b6cbf720SGianluca Guida	 * Align source to a word boundary.
32b6cbf720SGianluca Guida	 * Consider unrolling loop?
33b6cbf720SGianluca Guida	 */
34b6cbf720SGianluca Guida	_ALIGN_TEXT
35b6cbf720SGianluca Guida.Lalign:
36b6cbf720SGianluca Guida	testl	$3,%eax
37b6cbf720SGianluca Guida	je	.Lword_aligned
38b6cbf720SGianluca Guida	movb	(%eax),%bl
39b6cbf720SGianluca Guida	incl	%eax
40b6cbf720SGianluca Guida	movb	%bl,(%ecx)
41b6cbf720SGianluca Guida	incl	%ecx
42b6cbf720SGianluca Guida	testb	%bl,%bl
43b6cbf720SGianluca Guida	jne	.Lalign
44b6cbf720SGianluca Guida	jmp	.Ldone
45b6cbf720SGianluca Guida
46b6cbf720SGianluca Guida	_ALIGN_TEXT
47b6cbf720SGianluca Guida.Lloop:
48b6cbf720SGianluca Guida	movl	%ebx,(%ecx)
49b6cbf720SGianluca Guida	addl	$4,%ecx
50b6cbf720SGianluca Guida.Lword_aligned:
51b6cbf720SGianluca Guida	movl	(%eax),%ebx
52b6cbf720SGianluca Guida	addl	$4,%eax
53b6cbf720SGianluca Guida	leal	-0x01010101(%ebx),%edx
54b6cbf720SGianluca Guida	testl	$0x80808080,%edx
55b6cbf720SGianluca Guida	je	.Lloop
56b6cbf720SGianluca Guida
57b6cbf720SGianluca Guida	/*
58b6cbf720SGianluca Guida	 * In rare cases, the above loop may exit prematurely. We must
59b6cbf720SGianluca Guida	 * return to the loop if none of the bytes in the word equal 0.
60b6cbf720SGianluca Guida	 */
61b6cbf720SGianluca Guida
62b6cbf720SGianluca Guida	movb	%bl,(%ecx)
63b6cbf720SGianluca Guida	incl	%ecx
64b6cbf720SGianluca Guida	testb	%bl,%bl
65b6cbf720SGianluca Guida	je	.Ldone
66b6cbf720SGianluca Guida
67b6cbf720SGianluca Guida	movb	%bh,(%ecx)
68b6cbf720SGianluca Guida	incl	%ecx
69b6cbf720SGianluca Guida	testb	%bh,%bh
70b6cbf720SGianluca Guida	je	.Ldone
71b6cbf720SGianluca Guida
72b6cbf720SGianluca Guida	shrl	$16,%ebx
73b6cbf720SGianluca Guida	movb	%bl,(%ecx)
74b6cbf720SGianluca Guida	incl	%ecx
75b6cbf720SGianluca Guida	testb	%bl,%bl
76b6cbf720SGianluca Guida	je	.Ldone
77b6cbf720SGianluca Guida
78b6cbf720SGianluca Guida	movb	%bh,(%ecx)
79b6cbf720SGianluca Guida	incl	%ecx
80b6cbf720SGianluca Guida	testb	%bh,%bh
81b6cbf720SGianluca Guida	jne	.Lword_aligned
82b6cbf720SGianluca Guida
83b6cbf720SGianluca Guida.Ldone:
84b6cbf720SGianluca Guida	movl	8(%esp),%eax
85b6cbf720SGianluca Guida	popl	%ebx
86b6cbf720SGianluca Guida	ret
87*0a6a1f1dSLionel SambucEND(strcpy)
88