xref: /openbsd-src/sys/lib/libkern/arch/sh/memset.S (revision a7422b3c5c3da7cd7744e9d209d5ebaad8a66b9a)
1*b12d7c48Smickey/*	$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $	*/
2*b12d7c48Smickey
3*b12d7c48Smickey/*-
4*b12d7c48Smickey * Copyright (c) 2002 SHIMIZU Ryo.  All rights reserved.
5*b12d7c48Smickey *
6*b12d7c48Smickey * Redistribution and use in source and binary forms, with or without
7*b12d7c48Smickey * modification, are permitted provided that the following conditions
8*b12d7c48Smickey * are met:
9*b12d7c48Smickey * 1. Redistributions of source code must retain the above copyright
10*b12d7c48Smickey *    notice, this list of conditions and the following disclaimer.
11*b12d7c48Smickey * 2. Redistributions in binary form must reproduce the above copyright
12*b12d7c48Smickey *    notice, this list of conditions and the following disclaimer in the
13*b12d7c48Smickey *    documentation and/or other materials provided with the distribution.
14*b12d7c48Smickey * 3. The name of the author may not be used to endorse or promote products
15*b12d7c48Smickey *    derived from this software without specific prior written permission.
16*b12d7c48Smickey *
17*b12d7c48Smickey * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18*b12d7c48Smickey * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19*b12d7c48Smickey * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20*b12d7c48Smickey * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21*b12d7c48Smickey * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22*b12d7c48Smickey * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23*b12d7c48Smickey * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24*b12d7c48Smickey * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25*b12d7c48Smickey * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26*b12d7c48Smickey * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*b12d7c48Smickey */
28*b12d7c48Smickey
29*b12d7c48Smickey#include <machine/asm.h>
30*b12d7c48Smickey
31*b12d7c48Smickey#define	REG_PTR				r0
32*b12d7c48Smickey#define	REG_TMP1			r1
33*b12d7c48Smickey
34*b12d7c48Smickey#ifdef BZERO
35*b12d7c48Smickey# define	REG_C			r2
36*b12d7c48Smickey# define	REG_DST			r4
37*b12d7c48Smickey# define	REG_LEN			r5
38*b12d7c48Smickey#else
39*b12d7c48Smickey# define	REG_DST0		r3
40*b12d7c48Smickey# define	REG_DST			r4
41*b12d7c48Smickey# define	REG_C			r5
42*b12d7c48Smickey# define	REG_LEN			r6
43*b12d7c48Smickey#endif
44*b12d7c48Smickey
45*b12d7c48Smickey#ifdef BZERO
46*b12d7c48SmickeyENTRY(bzero)
47*b12d7c48Smickey#else
48*b12d7c48SmickeyENTRY(memset)
49*b12d7c48Smickey	mov	REG_DST,REG_DST0	/* for return value */
50*b12d7c48Smickey#endif
51*b12d7c48Smickey	/* small amount to fill ? */
52*b12d7c48Smickey	mov	#28,REG_TMP1
53*b12d7c48Smickey	cmp/hs	REG_TMP1,REG_LEN	/* if (len >= 28) goto large; */
54*b12d7c48Smickey	bt/s	large
55*b12d7c48Smickey	mov	#12,REG_TMP1		/* if (len >= 12) goto small; */
56*b12d7c48Smickey	cmp/hs	REG_TMP1,REG_LEN
57*b12d7c48Smickey	bt/s	small
58*b12d7c48Smickey#ifdef BZERO
59*b12d7c48Smickey	mov	#0,REG_C
60*b12d7c48Smickey#endif
61*b12d7c48Smickey	/* very little fill (0 ~ 11 bytes) */
62*b12d7c48Smickey	tst	REG_LEN,REG_LEN
63*b12d7c48Smickey	add	REG_DST,REG_LEN
64*b12d7c48Smickey	bt/s	done
65*b12d7c48Smickey	add	#1,REG_DST
66*b12d7c48Smickey
67*b12d7c48Smickey	/* unroll 4 loops */
68*b12d7c48Smickey	cmp/eq	REG_DST,REG_LEN
69*b12d7c48Smickey1:	mov.b	REG_C,@-REG_LEN
70*b12d7c48Smickey	bt/s	done
71*b12d7c48Smickey	cmp/eq	REG_DST,REG_LEN
72*b12d7c48Smickey	mov.b	REG_C,@-REG_LEN
73*b12d7c48Smickey	bt/s	done
74*b12d7c48Smickey	cmp/eq	REG_DST,REG_LEN
75*b12d7c48Smickey	mov.b	REG_C,@-REG_LEN
76*b12d7c48Smickey	bt/s	done
77*b12d7c48Smickey	cmp/eq	REG_DST,REG_LEN
78*b12d7c48Smickey	mov.b	REG_C,@-REG_LEN
79*b12d7c48Smickey	bf/s	1b
80*b12d7c48Smickey	cmp/eq	REG_DST,REG_LEN
81*b12d7c48Smickeydone:
82*b12d7c48Smickey#ifdef BZERO
83*b12d7c48Smickey	rts
84*b12d7c48Smickey	nop
85*b12d7c48Smickey#else
86*b12d7c48Smickey	rts
87*b12d7c48Smickey	mov	REG_DST0,r0
88*b12d7c48Smickey#endif
89*b12d7c48Smickey
90*b12d7c48Smickey
91*b12d7c48Smickeysmall:
92*b12d7c48Smickey	mov	REG_DST,r0
93*b12d7c48Smickey	tst	#1,r0
94*b12d7c48Smickey	bt/s	small_aligned
95*b12d7c48Smickey	mov	REG_DST,REG_TMP1
96*b12d7c48Smickey	shll	REG_LEN
97*b12d7c48Smickey	mova	1f,r0			/* 1f must be 4bytes aligned! */
98*b12d7c48Smickey	add	#16,REG_TMP1		/* REG_TMP1 = dst+16; */
99*b12d7c48Smickey	sub	REG_LEN,r0
100*b12d7c48Smickey	jmp	@r0
101*b12d7c48Smickey	mov	REG_C,r0
102*b12d7c48Smickey
103*b12d7c48Smickey	.align	2
104*b12d7c48Smickey	mov.b	r0,@(15,REG_TMP1)
105*b12d7c48Smickey	mov.b	r0,@(14,REG_TMP1)
106*b12d7c48Smickey	mov.b	r0,@(13,REG_TMP1)
107*b12d7c48Smickey	mov.b	r0,@(12,REG_TMP1)
108*b12d7c48Smickey	mov.b	r0,@(11,REG_TMP1)
109*b12d7c48Smickey	mov.b	r0,@(10,REG_TMP1)
110*b12d7c48Smickey	mov.b	r0,@(9,REG_TMP1)
111*b12d7c48Smickey	mov.b	r0,@(8,REG_TMP1)
112*b12d7c48Smickey	mov.b	r0,@(7,REG_TMP1)
113*b12d7c48Smickey	mov.b	r0,@(6,REG_TMP1)
114*b12d7c48Smickey	mov.b	r0,@(5,REG_TMP1)
115*b12d7c48Smickey	mov.b	r0,@(4,REG_TMP1)
116*b12d7c48Smickey	mov.b	r0,@(3,REG_TMP1)
117*b12d7c48Smickey	mov.b	r0,@(2,REG_TMP1)
118*b12d7c48Smickey	mov.b	r0,@(1,REG_TMP1)
119*b12d7c48Smickey	mov.b	r0,@REG_TMP1
120*b12d7c48Smickey	mov.b	r0,@(15,REG_DST)
121*b12d7c48Smickey	mov.b	r0,@(14,REG_DST)
122*b12d7c48Smickey	mov.b	r0,@(13,REG_DST)
123*b12d7c48Smickey	mov.b	r0,@(12,REG_DST)
124*b12d7c48Smickey	mov.b	r0,@(11,REG_DST)
125*b12d7c48Smickey	mov.b	r0,@(10,REG_DST)
126*b12d7c48Smickey	mov.b	r0,@(9,REG_DST)
127*b12d7c48Smickey	mov.b	r0,@(8,REG_DST)
128*b12d7c48Smickey	mov.b	r0,@(7,REG_DST)
129*b12d7c48Smickey	mov.b	r0,@(6,REG_DST)
130*b12d7c48Smickey	mov.b	r0,@(5,REG_DST)
131*b12d7c48Smickey	mov.b	r0,@(4,REG_DST)
132*b12d7c48Smickey	mov.b	r0,@(3,REG_DST)
133*b12d7c48Smickey	mov.b	r0,@(2,REG_DST)
134*b12d7c48Smickey	mov.b	r0,@(1,REG_DST)
135*b12d7c48Smickey#ifdef BZERO
136*b12d7c48Smickey	rts
137*b12d7c48Smickey1:	mov.b	r0,@REG_DST
138*b12d7c48Smickey#else
139*b12d7c48Smickey	mov.b	r0,@REG_DST
140*b12d7c48Smickey1:	rts
141*b12d7c48Smickey	mov	REG_DST0,r0
142*b12d7c48Smickey#endif
143*b12d7c48Smickey
144*b12d7c48Smickey
145*b12d7c48Smickey/* 2 bytes aligned small fill */
146*b12d7c48Smickeysmall_aligned:
147*b12d7c48Smickey#ifndef BZERO
148*b12d7c48Smickey	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
149*b12d7c48Smickey	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
150*b12d7c48Smickey	or	REG_TMP1,REG_C		/* REG_C = ????xxxx */
151*b12d7c48Smickey#endif
152*b12d7c48Smickey
153*b12d7c48Smickey	mov	REG_LEN,r0
154*b12d7c48Smickey	tst	#1,r0			/* len is aligned? */
155*b12d7c48Smickey	bt/s	1f
156*b12d7c48Smickey	add	#-1,r0
157*b12d7c48Smickey	mov.b	REG_C,@(r0,REG_DST)	/* fill last a byte */
158*b12d7c48Smickey	mov	r0,REG_LEN
159*b12d7c48Smickey1:
160*b12d7c48Smickey
161*b12d7c48Smickey	mova	1f,r0			/* 1f must be 4bytes aligned! */
162*b12d7c48Smickey	sub	REG_LEN,r0
163*b12d7c48Smickey	jmp	@r0
164*b12d7c48Smickey	mov	REG_C,r0
165*b12d7c48Smickey
166*b12d7c48Smickey	.align	2
167*b12d7c48Smickey	mov.w	r0,@(30,REG_DST)
168*b12d7c48Smickey	mov.w	r0,@(28,REG_DST)
169*b12d7c48Smickey	mov.w	r0,@(26,REG_DST)
170*b12d7c48Smickey	mov.w	r0,@(24,REG_DST)
171*b12d7c48Smickey	mov.w	r0,@(22,REG_DST)
172*b12d7c48Smickey	mov.w	r0,@(20,REG_DST)
173*b12d7c48Smickey	mov.w	r0,@(18,REG_DST)
174*b12d7c48Smickey	mov.w	r0,@(16,REG_DST)
175*b12d7c48Smickey	mov.w	r0,@(14,REG_DST)
176*b12d7c48Smickey	mov.w	r0,@(12,REG_DST)
177*b12d7c48Smickey	mov.w	r0,@(10,REG_DST)
178*b12d7c48Smickey	mov.w	r0,@(8,REG_DST)
179*b12d7c48Smickey	mov.w	r0,@(6,REG_DST)
180*b12d7c48Smickey	mov.w	r0,@(4,REG_DST)
181*b12d7c48Smickey	mov.w	r0,@(2,REG_DST)
182*b12d7c48Smickey#ifdef BZERO
183*b12d7c48Smickey	rts
184*b12d7c48Smickey1:	mov.w	r0,@REG_DST
185*b12d7c48Smickey#else
186*b12d7c48Smickey	mov.w	r0,@REG_DST
187*b12d7c48Smickey1:	rts
188*b12d7c48Smickey	mov	REG_DST0,r0
189*b12d7c48Smickey#endif
190*b12d7c48Smickey
191*b12d7c48Smickey
192*b12d7c48Smickey
193*b12d7c48Smickey	.align	2
194*b12d7c48Smickeylarge:
195*b12d7c48Smickey#ifdef BZERO
196*b12d7c48Smickey	mov	#0,REG_C
197*b12d7c48Smickey#else
198*b12d7c48Smickey	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
199*b12d7c48Smickey	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
200*b12d7c48Smickey	or	REG_C,REG_TMP1		/* REG_C = ????xx00, REG_TMP1 = ????xxxx */
201*b12d7c48Smickey	swap.w	REG_TMP1,REG_C		/* REG_C = xxxx????, REG_TMP1 = ????xxxx */
202*b12d7c48Smickey	xtrct	REG_TMP1,REG_C		/* REG_C = xxxxxxxx */
203*b12d7c48Smickey#endif
204*b12d7c48Smickey
205*b12d7c48Smickey	mov	#3,REG_TMP1
206*b12d7c48Smickey	tst	REG_TMP1,REG_DST
207*b12d7c48Smickey	mov	REG_DST,REG_PTR
208*b12d7c48Smickey	bf/s	unaligned_dst
209*b12d7c48Smickey	add	REG_LEN,REG_PTR		/* REG_PTR = dst + len; */
210*b12d7c48Smickey	tst	REG_TMP1,REG_LEN
211*b12d7c48Smickey	bf/s	unaligned_len
212*b12d7c48Smickey
213*b12d7c48Smickeyaligned:
214*b12d7c48Smickey	/* fill 32*n bytes */
215*b12d7c48Smickey	mov	#32,REG_TMP1
216*b12d7c48Smickey	cmp/hi	REG_LEN,REG_TMP1
217*b12d7c48Smickey	bt	9f
218*b12d7c48Smickey	.align	2
219*b12d7c48Smickey1:	sub	REG_TMP1,REG_PTR
220*b12d7c48Smickey	mov.l	REG_C,@REG_PTR
221*b12d7c48Smickey	sub	REG_TMP1,REG_LEN
222*b12d7c48Smickey	mov.l	REG_C,@(4,REG_PTR)
223*b12d7c48Smickey	cmp/hi	REG_LEN,REG_TMP1
224*b12d7c48Smickey	mov.l	REG_C,@(8,REG_PTR)
225*b12d7c48Smickey	mov.l	REG_C,@(12,REG_PTR)
226*b12d7c48Smickey	mov.l	REG_C,@(16,REG_PTR)
227*b12d7c48Smickey	mov.l	REG_C,@(20,REG_PTR)
228*b12d7c48Smickey	mov.l	REG_C,@(24,REG_PTR)
229*b12d7c48Smickey	bf/s	1b
230*b12d7c48Smickey	mov.l	REG_C,@(28,REG_PTR)
231*b12d7c48Smickey9:
232*b12d7c48Smickey
233*b12d7c48Smickey	/* fill left 4*n bytes */
234*b12d7c48Smickey	cmp/eq	REG_DST,REG_PTR
235*b12d7c48Smickey	bt	9f
236*b12d7c48Smickey	add	#4,REG_DST
237*b12d7c48Smickey	cmp/eq	REG_DST,REG_PTR
238*b12d7c48Smickey1:	mov.l	REG_C,@-REG_PTR
239*b12d7c48Smickey	bt/s	9f
240*b12d7c48Smickey	cmp/eq	REG_DST,REG_PTR
241*b12d7c48Smickey	mov.l	REG_C,@-REG_PTR
242*b12d7c48Smickey	bt/s	9f
243*b12d7c48Smickey	cmp/eq	REG_DST,REG_PTR
244*b12d7c48Smickey	mov.l	REG_C,@-REG_PTR
245*b12d7c48Smickey	bt/s	9f
246*b12d7c48Smickey	cmp/eq	REG_DST,REG_PTR
247*b12d7c48Smickey	mov.l	REG_C,@-REG_PTR
248*b12d7c48Smickey	bf/s	1b
249*b12d7c48Smickey	cmp/eq	REG_DST,REG_PTR
250*b12d7c48Smickey9:
251*b12d7c48Smickey#ifdef BZERO
252*b12d7c48Smickey	rts
253*b12d7c48Smickey	nop
254*b12d7c48Smickey#else
255*b12d7c48Smickey	rts
256*b12d7c48Smickey	mov	REG_DST0,r0
257*b12d7c48Smickey#endif
258*b12d7c48Smickey
259*b12d7c48Smickey
260*b12d7c48Smickeyunaligned_dst:
261*b12d7c48Smickey	mov	#1,REG_TMP1
262*b12d7c48Smickey	tst	REG_TMP1,REG_DST	/* if (dst & 1) {               */
263*b12d7c48Smickey	add	#1,REG_TMP1
264*b12d7c48Smickey	bt/s	2f
265*b12d7c48Smickey	tst	REG_TMP1,REG_DST
266*b12d7c48Smickey	mov.b	REG_C,@REG_DST		/*   *dst++ = c;                */
267*b12d7c48Smickey	add	#1,REG_DST
268*b12d7c48Smickey	tst	REG_TMP1,REG_DST
269*b12d7c48Smickey2:					/* }                            */
270*b12d7c48Smickey					/* if (dst & 2) {               */
271*b12d7c48Smickey	bt	4f
272*b12d7c48Smickey	mov.w	REG_C,@REG_DST		/*   *(u_int16_t*)dst++ = c;    */
273*b12d7c48Smickey	add	#2,REG_DST
274*b12d7c48Smickey4:					/* }                            */
275*b12d7c48Smickey
276*b12d7c48Smickey
277*b12d7c48Smickey	tst	#3,REG_PTR		/* if (ptr & 3) {               */
278*b12d7c48Smickey	bt/s	4f			/*                              */
279*b12d7c48Smickeyunaligned_len:
280*b12d7c48Smickey	tst	#1,REG_PTR		/*   if (ptr & 1) {             */
281*b12d7c48Smickey	bt/s	2f
282*b12d7c48Smickey	tst	#2,REG_PTR
283*b12d7c48Smickey	mov.b	REG_C,@-REG_PTR		/*     --ptr = c;               */
284*b12d7c48Smickey2:					/*   }                          */
285*b12d7c48Smickey					/*   if (ptr & 2) {             */
286*b12d7c48Smickey	bt	4f
287*b12d7c48Smickey	mov.w	REG_C,@-REG_PTR		/*     *--(u_int16_t*)ptr = c;  */
288*b12d7c48Smickey4:					/*   }                          */
289*b12d7c48Smickey					/* }                            */
290*b12d7c48Smickey
291*b12d7c48Smickey	mov	REG_PTR,REG_LEN
292*b12d7c48Smickey	bra	aligned
293*b12d7c48Smickey	sub	REG_DST,REG_LEN
294*b12d7c48Smickey
295