xref: /openbsd-src/lib/libc/arch/sh/string/memset.S (revision 9b9d2a55a62c8e82206c25f94fcc7f4e2765250e)
1*9b9d2a55Sguenther/*	$OpenBSD: memset.S,v 1.2 2015/08/31 02:53:57 guenther Exp $	*/
2cf252584Smiod/*	$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $	*/
3cf252584Smiod
4cf252584Smiod/*-
5cf252584Smiod * Copyright (c) 2002 SHIMIZU Ryo.  All rights reserved.
6cf252584Smiod *
7cf252584Smiod * Redistribution and use in source and binary forms, with or without
8cf252584Smiod * modification, are permitted provided that the following conditions
9cf252584Smiod * are met:
10cf252584Smiod * 1. Redistributions of source code must retain the above copyright
11cf252584Smiod *    notice, this list of conditions and the following disclaimer.
12cf252584Smiod * 2. Redistributions in binary form must reproduce the above copyright
13cf252584Smiod *    notice, this list of conditions and the following disclaimer in the
14cf252584Smiod *    documentation and/or other materials provided with the distribution.
15cf252584Smiod * 3. The name of the author may not be used to endorse or promote products
16cf252584Smiod *    derived from this software without specific prior written permission.
17cf252584Smiod *
18cf252584Smiod * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19cf252584Smiod * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20cf252584Smiod * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21cf252584Smiod * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22cf252584Smiod * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23cf252584Smiod * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24cf252584Smiod * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25cf252584Smiod * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26cf252584Smiod * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27cf252584Smiod * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28cf252584Smiod */
29cf252584Smiod
30*9b9d2a55Sguenther#include "SYS.h"
31cf252584Smiod
32cf252584Smiod#define	REG_PTR				r0
33cf252584Smiod#define	REG_TMP1			r1
34cf252584Smiod
35cf252584Smiod#ifdef BZERO
36cf252584Smiod# define	REG_C			r2
37cf252584Smiod# define	REG_DST			r4
38cf252584Smiod# define	REG_LEN			r5
39cf252584Smiod#else
40cf252584Smiod# define	REG_DST0		r3
41cf252584Smiod# define	REG_DST			r4
42cf252584Smiod# define	REG_C			r5
43cf252584Smiod# define	REG_LEN			r6
44cf252584Smiod#endif
45cf252584Smiod
46cf252584Smiod#ifdef BZERO
47cf252584SmiodENTRY(bzero)
48cf252584Smiod#else
49cf252584SmiodENTRY(memset)
50cf252584Smiod	mov	REG_DST,REG_DST0	/* for return value */
51cf252584Smiod#endif
52cf252584Smiod	/* small amount to fill ? */
53cf252584Smiod	mov	#28,REG_TMP1
54cf252584Smiod	cmp/hs	REG_TMP1,REG_LEN	/* if (len >= 28) goto large; */
55cf252584Smiod	bt/s	large
56cf252584Smiod	mov	#12,REG_TMP1		/* if (len >= 12) goto small; */
57cf252584Smiod	cmp/hs	REG_TMP1,REG_LEN
58cf252584Smiod	bt/s	small
59cf252584Smiod#ifdef BZERO
60cf252584Smiod	mov	#0,REG_C
61cf252584Smiod#endif
62cf252584Smiod	/* very little fill (0 ~ 11 bytes) */
63cf252584Smiod	tst	REG_LEN,REG_LEN
64cf252584Smiod	add	REG_DST,REG_LEN
65cf252584Smiod	bt/s	done
66cf252584Smiod	add	#1,REG_DST
67cf252584Smiod
68cf252584Smiod	/* unroll 4 loops */
69cf252584Smiod	cmp/eq	REG_DST,REG_LEN
70cf252584Smiod1:	mov.b	REG_C,@-REG_LEN
71cf252584Smiod	bt/s	done
72cf252584Smiod	cmp/eq	REG_DST,REG_LEN
73cf252584Smiod	mov.b	REG_C,@-REG_LEN
74cf252584Smiod	bt/s	done
75cf252584Smiod	cmp/eq	REG_DST,REG_LEN
76cf252584Smiod	mov.b	REG_C,@-REG_LEN
77cf252584Smiod	bt/s	done
78cf252584Smiod	cmp/eq	REG_DST,REG_LEN
79cf252584Smiod	mov.b	REG_C,@-REG_LEN
80cf252584Smiod	bf/s	1b
81cf252584Smiod	cmp/eq	REG_DST,REG_LEN
82cf252584Smioddone:
83cf252584Smiod#ifdef BZERO
84cf252584Smiod	rts
85cf252584Smiod	nop
86cf252584Smiod#else
87cf252584Smiod	rts
88cf252584Smiod	mov	REG_DST0,r0
89cf252584Smiod#endif
90cf252584Smiod
91cf252584Smiod
92cf252584Smiodsmall:
93cf252584Smiod	mov	REG_DST,r0
94cf252584Smiod	tst	#1,r0
95cf252584Smiod	bt/s	small_aligned
96cf252584Smiod	mov	REG_DST,REG_TMP1
97cf252584Smiod	shll	REG_LEN
98cf252584Smiod	mova	1f,r0			/* 1f must be 4bytes aligned! */
99cf252584Smiod	add	#16,REG_TMP1		/* REG_TMP1 = dst+16; */
100cf252584Smiod	sub	REG_LEN,r0
101cf252584Smiod	jmp	@r0
102cf252584Smiod	mov	REG_C,r0
103cf252584Smiod
104cf252584Smiod	.align	2
105cf252584Smiod	mov.b	r0,@(15,REG_TMP1)
106cf252584Smiod	mov.b	r0,@(14,REG_TMP1)
107cf252584Smiod	mov.b	r0,@(13,REG_TMP1)
108cf252584Smiod	mov.b	r0,@(12,REG_TMP1)
109cf252584Smiod	mov.b	r0,@(11,REG_TMP1)
110cf252584Smiod	mov.b	r0,@(10,REG_TMP1)
111cf252584Smiod	mov.b	r0,@(9,REG_TMP1)
112cf252584Smiod	mov.b	r0,@(8,REG_TMP1)
113cf252584Smiod	mov.b	r0,@(7,REG_TMP1)
114cf252584Smiod	mov.b	r0,@(6,REG_TMP1)
115cf252584Smiod	mov.b	r0,@(5,REG_TMP1)
116cf252584Smiod	mov.b	r0,@(4,REG_TMP1)
117cf252584Smiod	mov.b	r0,@(3,REG_TMP1)
118cf252584Smiod	mov.b	r0,@(2,REG_TMP1)
119cf252584Smiod	mov.b	r0,@(1,REG_TMP1)
120cf252584Smiod	mov.b	r0,@REG_TMP1
121cf252584Smiod	mov.b	r0,@(15,REG_DST)
122cf252584Smiod	mov.b	r0,@(14,REG_DST)
123cf252584Smiod	mov.b	r0,@(13,REG_DST)
124cf252584Smiod	mov.b	r0,@(12,REG_DST)
125cf252584Smiod	mov.b	r0,@(11,REG_DST)
126cf252584Smiod	mov.b	r0,@(10,REG_DST)
127cf252584Smiod	mov.b	r0,@(9,REG_DST)
128cf252584Smiod	mov.b	r0,@(8,REG_DST)
129cf252584Smiod	mov.b	r0,@(7,REG_DST)
130cf252584Smiod	mov.b	r0,@(6,REG_DST)
131cf252584Smiod	mov.b	r0,@(5,REG_DST)
132cf252584Smiod	mov.b	r0,@(4,REG_DST)
133cf252584Smiod	mov.b	r0,@(3,REG_DST)
134cf252584Smiod	mov.b	r0,@(2,REG_DST)
135cf252584Smiod	mov.b	r0,@(1,REG_DST)
136cf252584Smiod#ifdef BZERO
137cf252584Smiod	rts
138cf252584Smiod1:	mov.b	r0,@REG_DST
139cf252584Smiod#else
140cf252584Smiod	mov.b	r0,@REG_DST
141cf252584Smiod1:	rts
142cf252584Smiod	mov	REG_DST0,r0
143cf252584Smiod#endif
144cf252584Smiod
145cf252584Smiod
146cf252584Smiod/* 2 bytes aligned small fill */
147cf252584Smiodsmall_aligned:
148cf252584Smiod#ifndef BZERO
149cf252584Smiod	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
150cf252584Smiod	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
151cf252584Smiod	or	REG_TMP1,REG_C		/* REG_C = ????xxxx */
152cf252584Smiod#endif
153cf252584Smiod
154cf252584Smiod	mov	REG_LEN,r0
155cf252584Smiod	tst	#1,r0			/* len is aligned? */
156cf252584Smiod	bt/s	1f
157cf252584Smiod	add	#-1,r0
158cf252584Smiod	mov.b	REG_C,@(r0,REG_DST)	/* fill last a byte */
159cf252584Smiod	mov	r0,REG_LEN
160cf252584Smiod1:
161cf252584Smiod
162cf252584Smiod	mova	1f,r0			/* 1f must be 4bytes aligned! */
163cf252584Smiod	sub	REG_LEN,r0
164cf252584Smiod	jmp	@r0
165cf252584Smiod	mov	REG_C,r0
166cf252584Smiod
167cf252584Smiod	.align	2
168cf252584Smiod	mov.w	r0,@(30,REG_DST)
169cf252584Smiod	mov.w	r0,@(28,REG_DST)
170cf252584Smiod	mov.w	r0,@(26,REG_DST)
171cf252584Smiod	mov.w	r0,@(24,REG_DST)
172cf252584Smiod	mov.w	r0,@(22,REG_DST)
173cf252584Smiod	mov.w	r0,@(20,REG_DST)
174cf252584Smiod	mov.w	r0,@(18,REG_DST)
175cf252584Smiod	mov.w	r0,@(16,REG_DST)
176cf252584Smiod	mov.w	r0,@(14,REG_DST)
177cf252584Smiod	mov.w	r0,@(12,REG_DST)
178cf252584Smiod	mov.w	r0,@(10,REG_DST)
179cf252584Smiod	mov.w	r0,@(8,REG_DST)
180cf252584Smiod	mov.w	r0,@(6,REG_DST)
181cf252584Smiod	mov.w	r0,@(4,REG_DST)
182cf252584Smiod	mov.w	r0,@(2,REG_DST)
183cf252584Smiod#ifdef BZERO
184cf252584Smiod	rts
185cf252584Smiod1:	mov.w	r0,@REG_DST
186cf252584Smiod#else
187cf252584Smiod	mov.w	r0,@REG_DST
188cf252584Smiod1:	rts
189cf252584Smiod	mov	REG_DST0,r0
190cf252584Smiod#endif
191cf252584Smiod
192cf252584Smiod
193cf252584Smiod
194cf252584Smiod	.align	2
195cf252584Smiodlarge:
196cf252584Smiod#ifdef BZERO
197cf252584Smiod	mov	#0,REG_C
198cf252584Smiod#else
199cf252584Smiod	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
200cf252584Smiod	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
201cf252584Smiod	or	REG_C,REG_TMP1		/* REG_C = ????xx00, REG_TMP1 = ????xxxx */
202cf252584Smiod	swap.w	REG_TMP1,REG_C		/* REG_C = xxxx????, REG_TMP1 = ????xxxx */
203cf252584Smiod	xtrct	REG_TMP1,REG_C		/* REG_C = xxxxxxxx */
204cf252584Smiod#endif
205cf252584Smiod
206cf252584Smiod	mov	#3,REG_TMP1
207cf252584Smiod	tst	REG_TMP1,REG_DST
208cf252584Smiod	mov	REG_DST,REG_PTR
209cf252584Smiod	bf/s	unaligned_dst
210cf252584Smiod	add	REG_LEN,REG_PTR		/* REG_PTR = dst + len; */
211cf252584Smiod	tst	REG_TMP1,REG_LEN
212cf252584Smiod	bf/s	unaligned_len
213cf252584Smiod
214cf252584Smiodaligned:
215cf252584Smiod	/* fill 32*n bytes */
216cf252584Smiod	mov	#32,REG_TMP1
217cf252584Smiod	cmp/hi	REG_LEN,REG_TMP1
218cf252584Smiod	bt	9f
219cf252584Smiod	.align	2
220cf252584Smiod1:	sub	REG_TMP1,REG_PTR
221cf252584Smiod	mov.l	REG_C,@REG_PTR
222cf252584Smiod	sub	REG_TMP1,REG_LEN
223cf252584Smiod	mov.l	REG_C,@(4,REG_PTR)
224cf252584Smiod	cmp/hi	REG_LEN,REG_TMP1
225cf252584Smiod	mov.l	REG_C,@(8,REG_PTR)
226cf252584Smiod	mov.l	REG_C,@(12,REG_PTR)
227cf252584Smiod	mov.l	REG_C,@(16,REG_PTR)
228cf252584Smiod	mov.l	REG_C,@(20,REG_PTR)
229cf252584Smiod	mov.l	REG_C,@(24,REG_PTR)
230cf252584Smiod	bf/s	1b
231cf252584Smiod	mov.l	REG_C,@(28,REG_PTR)
232cf252584Smiod9:
233cf252584Smiod
234cf252584Smiod	/* fill left 4*n bytes */
235cf252584Smiod	cmp/eq	REG_DST,REG_PTR
236cf252584Smiod	bt	9f
237cf252584Smiod	add	#4,REG_DST
238cf252584Smiod	cmp/eq	REG_DST,REG_PTR
239cf252584Smiod1:	mov.l	REG_C,@-REG_PTR
240cf252584Smiod	bt/s	9f
241cf252584Smiod	cmp/eq	REG_DST,REG_PTR
242cf252584Smiod	mov.l	REG_C,@-REG_PTR
243cf252584Smiod	bt/s	9f
244cf252584Smiod	cmp/eq	REG_DST,REG_PTR
245cf252584Smiod	mov.l	REG_C,@-REG_PTR
246cf252584Smiod	bt/s	9f
247cf252584Smiod	cmp/eq	REG_DST,REG_PTR
248cf252584Smiod	mov.l	REG_C,@-REG_PTR
249cf252584Smiod	bf/s	1b
250cf252584Smiod	cmp/eq	REG_DST,REG_PTR
251cf252584Smiod9:
252cf252584Smiod#ifdef BZERO
253cf252584Smiod	rts
254cf252584Smiod	nop
255cf252584Smiod#else
256cf252584Smiod	rts
257cf252584Smiod	mov	REG_DST0,r0
258cf252584Smiod#endif
259cf252584Smiod
260cf252584Smiod
261cf252584Smiodunaligned_dst:
262cf252584Smiod	mov	#1,REG_TMP1
263cf252584Smiod	tst	REG_TMP1,REG_DST	/* if (dst & 1) {               */
264cf252584Smiod	add	#1,REG_TMP1
265cf252584Smiod	bt/s	2f
266cf252584Smiod	tst	REG_TMP1,REG_DST
267cf252584Smiod	mov.b	REG_C,@REG_DST		/*   *dst++ = c;                */
268cf252584Smiod	add	#1,REG_DST
269cf252584Smiod	tst	REG_TMP1,REG_DST
270cf252584Smiod2:					/* }                            */
271cf252584Smiod					/* if (dst & 2) {               */
272cf252584Smiod	bt	4f
273cf252584Smiod	mov.w	REG_C,@REG_DST		/*   *(u_int16_t*)dst++ = c;    */
274cf252584Smiod	add	#2,REG_DST
275cf252584Smiod4:					/* }                            */
276cf252584Smiod
277cf252584Smiod
278cf252584Smiod	tst	#3,REG_PTR		/* if (ptr & 3) {               */
279cf252584Smiod	bt/s	4f			/*                              */
280cf252584Smiodunaligned_len:
281cf252584Smiod	tst	#1,REG_PTR		/*   if (ptr & 1) {             */
282cf252584Smiod	bt/s	2f
283cf252584Smiod	tst	#2,REG_PTR
284cf252584Smiod	mov.b	REG_C,@-REG_PTR		/*     --ptr = c;               */
285cf252584Smiod2:					/*   }                          */
286cf252584Smiod					/*   if (ptr & 2) {             */
287cf252584Smiod	bt	4f
288cf252584Smiod	mov.w	REG_C,@-REG_PTR		/*     *--(u_int16_t*)ptr = c;  */
289cf252584Smiod4:					/*   }                          */
290cf252584Smiod					/* }                            */
291cf252584Smiod
292cf252584Smiod	mov	REG_PTR,REG_LEN
293cf252584Smiod	bra	aligned
294cf252584Smiod	sub	REG_DST,REG_LEN
295cf252584Smiod
296*9b9d2a55Sguenther#ifdef BZERO
297*9b9d2a55SguentherEND_WEAK(bzero)
298*9b9d2a55Sguenther#else
299*9b9d2a55SguentherEND_STRONG(memset)
300*9b9d2a55Sguenther#endif
301