xref: /minix3/common/lib/libc/arch/sh3/string/memset.S (revision b6cbf7203b080219de306404f8022a65b7884f33)
1*b6cbf720SGianluca Guida/*	$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $	*/
2*b6cbf720SGianluca Guida
3*b6cbf720SGianluca Guida/*-
4*b6cbf720SGianluca Guida * Copyright (c) 2002 SHIMIZU Ryo.  All rights reserved.
5*b6cbf720SGianluca Guida *
6*b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without
7*b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions
8*b6cbf720SGianluca Guida * are met:
9*b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright
10*b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer.
11*b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright
12*b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer in the
13*b6cbf720SGianluca Guida *    documentation and/or other materials provided with the distribution.
14*b6cbf720SGianluca Guida * 3. The name of the author may not be used to endorse or promote products
15*b6cbf720SGianluca Guida *    derived from this software without specific prior written permission.
16*b6cbf720SGianluca Guida *
17*b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18*b6cbf720SGianluca Guida * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19*b6cbf720SGianluca Guida * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20*b6cbf720SGianluca Guida * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21*b6cbf720SGianluca Guida * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22*b6cbf720SGianluca Guida * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23*b6cbf720SGianluca Guida * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24*b6cbf720SGianluca Guida * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25*b6cbf720SGianluca Guida * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26*b6cbf720SGianluca Guida * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*b6cbf720SGianluca Guida */
28*b6cbf720SGianluca Guida
29*b6cbf720SGianluca Guida#include <machine/asm.h>
30*b6cbf720SGianluca Guida
31*b6cbf720SGianluca Guida#if defined(LIBC_SCCS) && !defined(lint)
32*b6cbf720SGianluca Guida	RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $")
33*b6cbf720SGianluca Guida#endif
34*b6cbf720SGianluca Guida
35*b6cbf720SGianluca Guida#define	REG_PTR				r0
36*b6cbf720SGianluca Guida#define	REG_TMP1			r1
37*b6cbf720SGianluca Guida
38*b6cbf720SGianluca Guida#ifdef BZERO
39*b6cbf720SGianluca Guida# define	REG_C			r2
40*b6cbf720SGianluca Guida# define	REG_DST			r4
41*b6cbf720SGianluca Guida# define	REG_LEN			r5
42*b6cbf720SGianluca Guida#else
43*b6cbf720SGianluca Guida# define	REG_DST0		r3
44*b6cbf720SGianluca Guida# define	REG_DST			r4
45*b6cbf720SGianluca Guida# define	REG_C			r5
46*b6cbf720SGianluca Guida# define	REG_LEN			r6
47*b6cbf720SGianluca Guida#endif
48*b6cbf720SGianluca Guida
49*b6cbf720SGianluca Guida#ifdef BZERO
50*b6cbf720SGianluca GuidaENTRY(bzero)
51*b6cbf720SGianluca Guida#else
52*b6cbf720SGianluca GuidaENTRY(memset)
53*b6cbf720SGianluca Guida	mov	REG_DST,REG_DST0	/* for return value */
54*b6cbf720SGianluca Guida#endif
55*b6cbf720SGianluca Guida	/* small amount to fill ? */
56*b6cbf720SGianluca Guida	mov	#28,REG_TMP1
57*b6cbf720SGianluca Guida	cmp/hs	REG_TMP1,REG_LEN	/* if (len >= 28) goto large; */
58*b6cbf720SGianluca Guida	bt/s	large
59*b6cbf720SGianluca Guida	mov	#12,REG_TMP1		/* if (len >= 12) goto small; */
60*b6cbf720SGianluca Guida	cmp/hs	REG_TMP1,REG_LEN
61*b6cbf720SGianluca Guida	bt/s	small
62*b6cbf720SGianluca Guida#ifdef BZERO
63*b6cbf720SGianluca Guida	mov	#0,REG_C
64*b6cbf720SGianluca Guida#endif
65*b6cbf720SGianluca Guida	/* very little fill (0 ~ 11 bytes) */
66*b6cbf720SGianluca Guida	tst	REG_LEN,REG_LEN
67*b6cbf720SGianluca Guida	add	REG_DST,REG_LEN
68*b6cbf720SGianluca Guida	bt/s	done
69*b6cbf720SGianluca Guida	add	#1,REG_DST
70*b6cbf720SGianluca Guida
71*b6cbf720SGianluca Guida	/* unroll 4 loops */
72*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_LEN
73*b6cbf720SGianluca Guida1:	mov.b	REG_C,@-REG_LEN
74*b6cbf720SGianluca Guida	bt/s	done
75*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_LEN
76*b6cbf720SGianluca Guida	mov.b	REG_C,@-REG_LEN
77*b6cbf720SGianluca Guida	bt/s	done
78*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_LEN
79*b6cbf720SGianluca Guida	mov.b	REG_C,@-REG_LEN
80*b6cbf720SGianluca Guida	bt/s	done
81*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_LEN
82*b6cbf720SGianluca Guida	mov.b	REG_C,@-REG_LEN
83*b6cbf720SGianluca Guida	bf/s	1b
84*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_LEN
85*b6cbf720SGianluca Guidadone:
86*b6cbf720SGianluca Guida#ifdef BZERO
87*b6cbf720SGianluca Guida	rts
88*b6cbf720SGianluca Guida	nop
89*b6cbf720SGianluca Guida#else
90*b6cbf720SGianluca Guida	rts
91*b6cbf720SGianluca Guida	mov	REG_DST0,r0
92*b6cbf720SGianluca Guida#endif
93*b6cbf720SGianluca Guida
94*b6cbf720SGianluca Guida
95*b6cbf720SGianluca Guidasmall:
96*b6cbf720SGianluca Guida	mov	REG_DST,r0
97*b6cbf720SGianluca Guida	tst	#1,r0
98*b6cbf720SGianluca Guida	bt/s	small_aligned
99*b6cbf720SGianluca Guida	mov	REG_DST,REG_TMP1
100*b6cbf720SGianluca Guida	shll	REG_LEN
101*b6cbf720SGianluca Guida	mova	1f,r0			/* 1f must be 4bytes aligned! */
102*b6cbf720SGianluca Guida	add	#16,REG_TMP1		/* REG_TMP1 = dst+16; */
103*b6cbf720SGianluca Guida	sub	REG_LEN,r0
104*b6cbf720SGianluca Guida	jmp	@r0
105*b6cbf720SGianluca Guida	mov	REG_C,r0
106*b6cbf720SGianluca Guida
107*b6cbf720SGianluca Guida	.align	2
108*b6cbf720SGianluca Guida	mov.b	r0,@(15,REG_TMP1)
109*b6cbf720SGianluca Guida	mov.b	r0,@(14,REG_TMP1)
110*b6cbf720SGianluca Guida	mov.b	r0,@(13,REG_TMP1)
111*b6cbf720SGianluca Guida	mov.b	r0,@(12,REG_TMP1)
112*b6cbf720SGianluca Guida	mov.b	r0,@(11,REG_TMP1)
113*b6cbf720SGianluca Guida	mov.b	r0,@(10,REG_TMP1)
114*b6cbf720SGianluca Guida	mov.b	r0,@(9,REG_TMP1)
115*b6cbf720SGianluca Guida	mov.b	r0,@(8,REG_TMP1)
116*b6cbf720SGianluca Guida	mov.b	r0,@(7,REG_TMP1)
117*b6cbf720SGianluca Guida	mov.b	r0,@(6,REG_TMP1)
118*b6cbf720SGianluca Guida	mov.b	r0,@(5,REG_TMP1)
119*b6cbf720SGianluca Guida	mov.b	r0,@(4,REG_TMP1)
120*b6cbf720SGianluca Guida	mov.b	r0,@(3,REG_TMP1)
121*b6cbf720SGianluca Guida	mov.b	r0,@(2,REG_TMP1)
122*b6cbf720SGianluca Guida	mov.b	r0,@(1,REG_TMP1)
123*b6cbf720SGianluca Guida	mov.b	r0,@REG_TMP1
124*b6cbf720SGianluca Guida	mov.b	r0,@(15,REG_DST)
125*b6cbf720SGianluca Guida	mov.b	r0,@(14,REG_DST)
126*b6cbf720SGianluca Guida	mov.b	r0,@(13,REG_DST)
127*b6cbf720SGianluca Guida	mov.b	r0,@(12,REG_DST)
128*b6cbf720SGianluca Guida	mov.b	r0,@(11,REG_DST)
129*b6cbf720SGianluca Guida	mov.b	r0,@(10,REG_DST)
130*b6cbf720SGianluca Guida	mov.b	r0,@(9,REG_DST)
131*b6cbf720SGianluca Guida	mov.b	r0,@(8,REG_DST)
132*b6cbf720SGianluca Guida	mov.b	r0,@(7,REG_DST)
133*b6cbf720SGianluca Guida	mov.b	r0,@(6,REG_DST)
134*b6cbf720SGianluca Guida	mov.b	r0,@(5,REG_DST)
135*b6cbf720SGianluca Guida	mov.b	r0,@(4,REG_DST)
136*b6cbf720SGianluca Guida	mov.b	r0,@(3,REG_DST)
137*b6cbf720SGianluca Guida	mov.b	r0,@(2,REG_DST)
138*b6cbf720SGianluca Guida	mov.b	r0,@(1,REG_DST)
139*b6cbf720SGianluca Guida#ifdef BZERO
140*b6cbf720SGianluca Guida	rts
141*b6cbf720SGianluca Guida1:	mov.b	r0,@REG_DST
142*b6cbf720SGianluca Guida#else
143*b6cbf720SGianluca Guida	mov.b	r0,@REG_DST
144*b6cbf720SGianluca Guida1:	rts
145*b6cbf720SGianluca Guida	mov	REG_DST0,r0
146*b6cbf720SGianluca Guida#endif
147*b6cbf720SGianluca Guida
148*b6cbf720SGianluca Guida
149*b6cbf720SGianluca Guida/* 2 bytes aligned small fill */
150*b6cbf720SGianluca Guidasmall_aligned:
151*b6cbf720SGianluca Guida#ifndef BZERO
152*b6cbf720SGianluca Guida	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
153*b6cbf720SGianluca Guida	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
154*b6cbf720SGianluca Guida	or	REG_TMP1,REG_C		/* REG_C = ????xxxx */
155*b6cbf720SGianluca Guida#endif
156*b6cbf720SGianluca Guida
157*b6cbf720SGianluca Guida	mov	REG_LEN,r0
158*b6cbf720SGianluca Guida	tst	#1,r0			/* len is aligned? */
159*b6cbf720SGianluca Guida	bt/s	1f
160*b6cbf720SGianluca Guida	add	#-1,r0
161*b6cbf720SGianluca Guida	mov.b	REG_C,@(r0,REG_DST)	/* fill last a byte */
162*b6cbf720SGianluca Guida	mov	r0,REG_LEN
163*b6cbf720SGianluca Guida1:
164*b6cbf720SGianluca Guida
165*b6cbf720SGianluca Guida	mova	1f,r0			/* 1f must be 4bytes aligned! */
166*b6cbf720SGianluca Guida	sub	REG_LEN,r0
167*b6cbf720SGianluca Guida	jmp	@r0
168*b6cbf720SGianluca Guida	mov	REG_C,r0
169*b6cbf720SGianluca Guida
170*b6cbf720SGianluca Guida	.align	2
171*b6cbf720SGianluca Guida	mov.w	r0,@(30,REG_DST)
172*b6cbf720SGianluca Guida	mov.w	r0,@(28,REG_DST)
173*b6cbf720SGianluca Guida	mov.w	r0,@(26,REG_DST)
174*b6cbf720SGianluca Guida	mov.w	r0,@(24,REG_DST)
175*b6cbf720SGianluca Guida	mov.w	r0,@(22,REG_DST)
176*b6cbf720SGianluca Guida	mov.w	r0,@(20,REG_DST)
177*b6cbf720SGianluca Guida	mov.w	r0,@(18,REG_DST)
178*b6cbf720SGianluca Guida	mov.w	r0,@(16,REG_DST)
179*b6cbf720SGianluca Guida	mov.w	r0,@(14,REG_DST)
180*b6cbf720SGianluca Guida	mov.w	r0,@(12,REG_DST)
181*b6cbf720SGianluca Guida	mov.w	r0,@(10,REG_DST)
182*b6cbf720SGianluca Guida	mov.w	r0,@(8,REG_DST)
183*b6cbf720SGianluca Guida	mov.w	r0,@(6,REG_DST)
184*b6cbf720SGianluca Guida	mov.w	r0,@(4,REG_DST)
185*b6cbf720SGianluca Guida	mov.w	r0,@(2,REG_DST)
186*b6cbf720SGianluca Guida#ifdef BZERO
187*b6cbf720SGianluca Guida	rts
188*b6cbf720SGianluca Guida1:	mov.w	r0,@REG_DST
189*b6cbf720SGianluca Guida#else
190*b6cbf720SGianluca Guida	mov.w	r0,@REG_DST
191*b6cbf720SGianluca Guida1:	rts
192*b6cbf720SGianluca Guida	mov	REG_DST0,r0
193*b6cbf720SGianluca Guida#endif
194*b6cbf720SGianluca Guida
195*b6cbf720SGianluca Guida
196*b6cbf720SGianluca Guida
197*b6cbf720SGianluca Guida	.align	2
198*b6cbf720SGianluca Guidalarge:
199*b6cbf720SGianluca Guida#ifdef BZERO
200*b6cbf720SGianluca Guida	mov	#0,REG_C
201*b6cbf720SGianluca Guida#else
202*b6cbf720SGianluca Guida	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
203*b6cbf720SGianluca Guida	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
204*b6cbf720SGianluca Guida	or	REG_C,REG_TMP1		/* REG_C = ????xx00, REG_TMP1 = ????xxxx */
205*b6cbf720SGianluca Guida	swap.w	REG_TMP1,REG_C		/* REG_C = xxxx????, REG_TMP1 = ????xxxx */
206*b6cbf720SGianluca Guida	xtrct	REG_TMP1,REG_C		/* REG_C = xxxxxxxx */
207*b6cbf720SGianluca Guida#endif
208*b6cbf720SGianluca Guida
209*b6cbf720SGianluca Guida	mov	#3,REG_TMP1
210*b6cbf720SGianluca Guida	tst	REG_TMP1,REG_DST
211*b6cbf720SGianluca Guida	mov	REG_DST,REG_PTR
212*b6cbf720SGianluca Guida	bf/s	unaligned_dst
213*b6cbf720SGianluca Guida	add	REG_LEN,REG_PTR		/* REG_PTR = dst + len; */
214*b6cbf720SGianluca Guida	tst	REG_TMP1,REG_LEN
215*b6cbf720SGianluca Guida	bf/s	unaligned_len
216*b6cbf720SGianluca Guida
217*b6cbf720SGianluca Guidaaligned:
218*b6cbf720SGianluca Guida	/* fill 32*n bytes */
219*b6cbf720SGianluca Guida	mov	#32,REG_TMP1
220*b6cbf720SGianluca Guida	cmp/hi	REG_LEN,REG_TMP1
221*b6cbf720SGianluca Guida	bt	9f
222*b6cbf720SGianluca Guida	.align	2
223*b6cbf720SGianluca Guida1:	sub	REG_TMP1,REG_PTR
224*b6cbf720SGianluca Guida	mov.l	REG_C,@REG_PTR
225*b6cbf720SGianluca Guida	sub	REG_TMP1,REG_LEN
226*b6cbf720SGianluca Guida	mov.l	REG_C,@(4,REG_PTR)
227*b6cbf720SGianluca Guida	cmp/hi	REG_LEN,REG_TMP1
228*b6cbf720SGianluca Guida	mov.l	REG_C,@(8,REG_PTR)
229*b6cbf720SGianluca Guida	mov.l	REG_C,@(12,REG_PTR)
230*b6cbf720SGianluca Guida	mov.l	REG_C,@(16,REG_PTR)
231*b6cbf720SGianluca Guida	mov.l	REG_C,@(20,REG_PTR)
232*b6cbf720SGianluca Guida	mov.l	REG_C,@(24,REG_PTR)
233*b6cbf720SGianluca Guida	bf/s	1b
234*b6cbf720SGianluca Guida	mov.l	REG_C,@(28,REG_PTR)
235*b6cbf720SGianluca Guida9:
236*b6cbf720SGianluca Guida
237*b6cbf720SGianluca Guida	/* fill left 4*n bytes */
238*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_PTR
239*b6cbf720SGianluca Guida	bt	9f
240*b6cbf720SGianluca Guida	add	#4,REG_DST
241*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_PTR
242*b6cbf720SGianluca Guida1:	mov.l	REG_C,@-REG_PTR
243*b6cbf720SGianluca Guida	bt/s	9f
244*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_PTR
245*b6cbf720SGianluca Guida	mov.l	REG_C,@-REG_PTR
246*b6cbf720SGianluca Guida	bt/s	9f
247*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_PTR
248*b6cbf720SGianluca Guida	mov.l	REG_C,@-REG_PTR
249*b6cbf720SGianluca Guida	bt/s	9f
250*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_PTR
251*b6cbf720SGianluca Guida	mov.l	REG_C,@-REG_PTR
252*b6cbf720SGianluca Guida	bf/s	1b
253*b6cbf720SGianluca Guida	cmp/eq	REG_DST,REG_PTR
254*b6cbf720SGianluca Guida9:
255*b6cbf720SGianluca Guida#ifdef BZERO
256*b6cbf720SGianluca Guida	rts
257*b6cbf720SGianluca Guida	nop
258*b6cbf720SGianluca Guida#else
259*b6cbf720SGianluca Guida	rts
260*b6cbf720SGianluca Guida	mov	REG_DST0,r0
261*b6cbf720SGianluca Guida#endif
262*b6cbf720SGianluca Guida
263*b6cbf720SGianluca Guida
264*b6cbf720SGianluca Guidaunaligned_dst:
265*b6cbf720SGianluca Guida	mov	#1,REG_TMP1
266*b6cbf720SGianluca Guida	tst	REG_TMP1,REG_DST	/* if (dst & 1) {               */
267*b6cbf720SGianluca Guida	add	#1,REG_TMP1
268*b6cbf720SGianluca Guida	bt/s	2f
269*b6cbf720SGianluca Guida	tst	REG_TMP1,REG_DST
270*b6cbf720SGianluca Guida	mov.b	REG_C,@REG_DST		/*   *dst++ = c;                */
271*b6cbf720SGianluca Guida	add	#1,REG_DST
272*b6cbf720SGianluca Guida	tst	REG_TMP1,REG_DST
273*b6cbf720SGianluca Guida2:					/* }                            */
274*b6cbf720SGianluca Guida					/* if (dst & 2) {               */
275*b6cbf720SGianluca Guida	bt	4f
276*b6cbf720SGianluca Guida	mov.w	REG_C,@REG_DST		/*   *(uint16_t*)dst++ = c;    */
277*b6cbf720SGianluca Guida	add	#2,REG_DST
278*b6cbf720SGianluca Guida4:					/* }                            */
279*b6cbf720SGianluca Guida
280*b6cbf720SGianluca Guida
281*b6cbf720SGianluca Guida	tst	#3,REG_PTR		/* if (ptr & 3) {               */
282*b6cbf720SGianluca Guida	bt/s	4f			/*                              */
283*b6cbf720SGianluca Guidaunaligned_len:
284*b6cbf720SGianluca Guida	tst	#1,REG_PTR		/*   if (ptr & 1) {             */
285*b6cbf720SGianluca Guida	bt/s	2f
286*b6cbf720SGianluca Guida	tst	#2,REG_PTR
287*b6cbf720SGianluca Guida	mov.b	REG_C,@-REG_PTR		/*     --ptr = c;               */
288*b6cbf720SGianluca Guida2:					/*   }                          */
289*b6cbf720SGianluca Guida					/*   if (ptr & 2) {             */
290*b6cbf720SGianluca Guida	bt	4f
291*b6cbf720SGianluca Guida	mov.w	REG_C,@-REG_PTR		/*     *--(uint16_t*)ptr = c;  */
292*b6cbf720SGianluca Guida4:					/*   }                          */
293*b6cbf720SGianluca Guida					/* }                            */
294*b6cbf720SGianluca Guida
295*b6cbf720SGianluca Guida	mov	REG_PTR,REG_LEN
296*b6cbf720SGianluca Guida	bra	aligned
297*b6cbf720SGianluca Guida	sub	REG_DST,REG_LEN
298*b6cbf720SGianluca Guida
299