xref: /minix3/common/lib/libc/arch/i386/string/strcat.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1b6cbf720SGianluca Guida/*
2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com>
3b6cbf720SGianluca Guida * Public domain.
4b6cbf720SGianluca Guida */
5b6cbf720SGianluca Guida
6b6cbf720SGianluca Guida#include <machine/asm.h>
7b6cbf720SGianluca Guida
8b6cbf720SGianluca Guida#if defined(LIBC_SCCS)
9*0a6a1f1dSLionel Sambuc	RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
10b6cbf720SGianluca Guida#endif
11b6cbf720SGianluca Guida
12b6cbf720SGianluca GuidaENTRY(strcat)
13b6cbf720SGianluca Guida	pushl	%ebx
14b6cbf720SGianluca Guida	movl	8(%esp),%ecx
15b6cbf720SGianluca Guida	movl	12(%esp),%eax
16b6cbf720SGianluca Guida
17b6cbf720SGianluca Guida	/*
18b6cbf720SGianluca Guida	 * Align destination to word boundary.
19b6cbf720SGianluca Guida	 * Consider unrolling loop?
20b6cbf720SGianluca Guida	 */
21b6cbf720SGianluca Guida.Lscan:
22b6cbf720SGianluca Guida.Lscan_align:
23b6cbf720SGianluca Guida	testb	$3,%cl
24b6cbf720SGianluca Guida	je	.Lscan_aligned
25b6cbf720SGianluca Guida	cmpb	$0,(%ecx)
26b6cbf720SGianluca Guida	je	.Lcopy
27b6cbf720SGianluca Guida	incl	%ecx
28b6cbf720SGianluca Guida	jmp	.Lscan_align
29b6cbf720SGianluca Guida
30b6cbf720SGianluca Guida	_ALIGN_TEXT
31b6cbf720SGianluca Guida.Lscan_aligned:
32b6cbf720SGianluca Guida.Lscan_loop:
33b6cbf720SGianluca Guida	movl	(%ecx),%ebx
34b6cbf720SGianluca Guida	addl	$4,%ecx
35b6cbf720SGianluca Guida	leal	-0x01010101(%ebx),%edx
36b6cbf720SGianluca Guida	testl	$0x80808080,%edx
37b6cbf720SGianluca Guida	je	.Lscan_loop
38b6cbf720SGianluca Guida
39b6cbf720SGianluca Guida	/*
40b6cbf720SGianluca Guida	 * In rare cases, the above loop may exit prematurely. We must
41b6cbf720SGianluca Guida	 * return to the loop if none of the bytes in the word equal 0.
42b6cbf720SGianluca Guida	 */
43b6cbf720SGianluca Guida
44b6cbf720SGianluca Guida	/*
45b6cbf720SGianluca Guida	 * The optimal code for determining whether each byte is zero
46b6cbf720SGianluca Guida	 * differs by processor.  This space-optimized code should be
47b6cbf720SGianluca Guida	 * acceptable on all, especially since we don't expect it to
48b6cbf720SGianluca Guida	 * be run frequently,
49b6cbf720SGianluca Guida	 */
50b6cbf720SGianluca Guida
51b6cbf720SGianluca Guida	testb	%bl,%bl		/* 1st byte == 0? */
52b6cbf720SGianluca Guida	jne	1f
53b6cbf720SGianluca Guida	subl	$4,%ecx
54b6cbf720SGianluca Guida	jmp	.Lcopy
55b6cbf720SGianluca Guida
56b6cbf720SGianluca Guida1:	testb	%bh,%bh		/* 2nd byte == 0? */
57b6cbf720SGianluca Guida	jne	1f
58b6cbf720SGianluca Guida	subl	$3,%ecx
59b6cbf720SGianluca Guida	jmp	.Lcopy
60b6cbf720SGianluca Guida
61b6cbf720SGianluca Guida1:	shrl	$16,%ebx
62b6cbf720SGianluca Guida	testb	%bl,%bl		/* 3rd byte == 0? */
63b6cbf720SGianluca Guida	jne	1f
64b6cbf720SGianluca Guida	subl	$2,%ecx
65b6cbf720SGianluca Guida	jmp	.Lcopy
66b6cbf720SGianluca Guida
67b6cbf720SGianluca Guida1:	testb	%bh,%bh		/* 4th byte == 0? */
68b6cbf720SGianluca Guida	jne	.Lscan_loop
69b6cbf720SGianluca Guida	subl	$1,%ecx
70b6cbf720SGianluca Guida
71b6cbf720SGianluca Guida	/*
72b6cbf720SGianluca Guida	 * Align source to a word boundary.
73b6cbf720SGianluca Guida	 * Consider unrolling loop?
74b6cbf720SGianluca Guida	 */
75b6cbf720SGianluca Guida.Lcopy:
76b6cbf720SGianluca Guida.Lcopy_align:
77b6cbf720SGianluca Guida	testl	$3,%eax
78b6cbf720SGianluca Guida	je	.Lcopy_aligned
79b6cbf720SGianluca Guida	movb	(%eax),%bl
80b6cbf720SGianluca Guida	incl	%eax
81b6cbf720SGianluca Guida	movb	%bl,(%ecx)
82b6cbf720SGianluca Guida	incl	%ecx
83b6cbf720SGianluca Guida	testb	%bl,%bl
84b6cbf720SGianluca Guida	jne	.Lcopy_align
85b6cbf720SGianluca Guida	jmp	.Ldone
86b6cbf720SGianluca Guida
87b6cbf720SGianluca Guida	_ALIGN_TEXT
88b6cbf720SGianluca Guida.Lcopy_loop:
89b6cbf720SGianluca Guida	movl	%ebx,(%ecx)
90b6cbf720SGianluca Guida	addl	$4,%ecx
91b6cbf720SGianluca Guida.Lcopy_aligned:
92b6cbf720SGianluca Guida	movl	(%eax),%ebx
93b6cbf720SGianluca Guida	addl	$4,%eax
94b6cbf720SGianluca Guida	leal	-0x01010101(%ebx),%edx
95b6cbf720SGianluca Guida	testl	$0x80808080,%edx
96b6cbf720SGianluca Guida	je	.Lcopy_loop
97b6cbf720SGianluca Guida
98b6cbf720SGianluca Guida	/*
99b6cbf720SGianluca Guida	 * In rare cases, the above loop may exit prematurely. We must
100b6cbf720SGianluca Guida	 * return to the loop if none of the bytes in the word equal 0.
101b6cbf720SGianluca Guida	 */
102b6cbf720SGianluca Guida
103b6cbf720SGianluca Guida	movb	%bl,(%ecx)
104b6cbf720SGianluca Guida	incl	%ecx
105b6cbf720SGianluca Guida	testb	%bl,%bl
106b6cbf720SGianluca Guida	je	.Ldone
107b6cbf720SGianluca Guida
108b6cbf720SGianluca Guida	movb	%bh,(%ecx)
109b6cbf720SGianluca Guida	incl	%ecx
110b6cbf720SGianluca Guida	testb	%bh,%bh
111b6cbf720SGianluca Guida	je	.Ldone
112b6cbf720SGianluca Guida
113b6cbf720SGianluca Guida	shrl	$16,%ebx
114b6cbf720SGianluca Guida	movb	%bl,(%ecx)
115b6cbf720SGianluca Guida	incl	%ecx
116b6cbf720SGianluca Guida	testb	%bl,%bl
117b6cbf720SGianluca Guida	je	.Ldone
118b6cbf720SGianluca Guida
119b6cbf720SGianluca Guida	movb	%bh,(%ecx)
120b6cbf720SGianluca Guida	incl	%ecx
121b6cbf720SGianluca Guida	testb	%bh,%bh
122b6cbf720SGianluca Guida	jne	.Lcopy_aligned
123b6cbf720SGianluca Guida
124b6cbf720SGianluca Guida.Ldone:
125b6cbf720SGianluca Guida	movl	8(%esp),%eax
126b6cbf720SGianluca Guida	popl	%ebx
127b6cbf720SGianluca Guida	ret
128*0a6a1f1dSLionel SambucEND(strcat)
129