xref: /openbsd-src/sys/lib/libkern/arch/sh/memset.S (revision a7422b3c5c3da7cd7744e9d209d5ebaad8a66b9a)
1/*	$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $	*/
2
3/*-
4 * Copyright (c) 2002 SHIMIZU Ryo.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <machine/asm.h>
30
31#define	REG_PTR				r0
32#define	REG_TMP1			r1
33
34#ifdef BZERO
35# define	REG_C			r2
36# define	REG_DST			r4
37# define	REG_LEN			r5
38#else
39# define	REG_DST0		r3
40# define	REG_DST			r4
41# define	REG_C			r5
42# define	REG_LEN			r6
43#endif
44
45#ifdef BZERO
46ENTRY(bzero)
47#else
48ENTRY(memset)
49	mov	REG_DST,REG_DST0	/* for return value */
50#endif
51	/* small amount to fill ? */
52	mov	#28,REG_TMP1
53	cmp/hs	REG_TMP1,REG_LEN	/* if (len >= 28) goto large; */
54	bt/s	large
55	mov	#12,REG_TMP1		/* if (len >= 12) goto small; */
56	cmp/hs	REG_TMP1,REG_LEN
57	bt/s	small
58#ifdef BZERO
59	mov	#0,REG_C
60#endif
61	/* very little fill (0 ~ 11 bytes) */
62	tst	REG_LEN,REG_LEN
63	add	REG_DST,REG_LEN
64	bt/s	done
65	add	#1,REG_DST
66
67	/* unroll 4 loops */
68	cmp/eq	REG_DST,REG_LEN
691:	mov.b	REG_C,@-REG_LEN
70	bt/s	done
71	cmp/eq	REG_DST,REG_LEN
72	mov.b	REG_C,@-REG_LEN
73	bt/s	done
74	cmp/eq	REG_DST,REG_LEN
75	mov.b	REG_C,@-REG_LEN
76	bt/s	done
77	cmp/eq	REG_DST,REG_LEN
78	mov.b	REG_C,@-REG_LEN
79	bf/s	1b
80	cmp/eq	REG_DST,REG_LEN
81done:
82#ifdef BZERO
83	rts
84	nop
85#else
86	rts
87	mov	REG_DST0,r0
88#endif
89
90
91small:
92	mov	REG_DST,r0
93	tst	#1,r0
94	bt/s	small_aligned
95	mov	REG_DST,REG_TMP1
96	shll	REG_LEN
97	mova	1f,r0			/* 1f must be 4bytes aligned! */
98	add	#16,REG_TMP1		/* REG_TMP1 = dst+16; */
99	sub	REG_LEN,r0
100	jmp	@r0
101	mov	REG_C,r0
102
103	.align	2
104	mov.b	r0,@(15,REG_TMP1)
105	mov.b	r0,@(14,REG_TMP1)
106	mov.b	r0,@(13,REG_TMP1)
107	mov.b	r0,@(12,REG_TMP1)
108	mov.b	r0,@(11,REG_TMP1)
109	mov.b	r0,@(10,REG_TMP1)
110	mov.b	r0,@(9,REG_TMP1)
111	mov.b	r0,@(8,REG_TMP1)
112	mov.b	r0,@(7,REG_TMP1)
113	mov.b	r0,@(6,REG_TMP1)
114	mov.b	r0,@(5,REG_TMP1)
115	mov.b	r0,@(4,REG_TMP1)
116	mov.b	r0,@(3,REG_TMP1)
117	mov.b	r0,@(2,REG_TMP1)
118	mov.b	r0,@(1,REG_TMP1)
119	mov.b	r0,@REG_TMP1
120	mov.b	r0,@(15,REG_DST)
121	mov.b	r0,@(14,REG_DST)
122	mov.b	r0,@(13,REG_DST)
123	mov.b	r0,@(12,REG_DST)
124	mov.b	r0,@(11,REG_DST)
125	mov.b	r0,@(10,REG_DST)
126	mov.b	r0,@(9,REG_DST)
127	mov.b	r0,@(8,REG_DST)
128	mov.b	r0,@(7,REG_DST)
129	mov.b	r0,@(6,REG_DST)
130	mov.b	r0,@(5,REG_DST)
131	mov.b	r0,@(4,REG_DST)
132	mov.b	r0,@(3,REG_DST)
133	mov.b	r0,@(2,REG_DST)
134	mov.b	r0,@(1,REG_DST)
135#ifdef BZERO
136	rts
1371:	mov.b	r0,@REG_DST
138#else
139	mov.b	r0,@REG_DST
1401:	rts
141	mov	REG_DST0,r0
142#endif
143
144
145/* 2 bytes aligned small fill */
146small_aligned:
147#ifndef BZERO
148	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
149	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
150	or	REG_TMP1,REG_C		/* REG_C = ????xxxx */
151#endif
152
153	mov	REG_LEN,r0
154	tst	#1,r0			/* len is aligned? */
155	bt/s	1f
156	add	#-1,r0
157	mov.b	REG_C,@(r0,REG_DST)	/* fill last a byte */
158	mov	r0,REG_LEN
1591:
160
161	mova	1f,r0			/* 1f must be 4bytes aligned! */
162	sub	REG_LEN,r0
163	jmp	@r0
164	mov	REG_C,r0
165
166	.align	2
167	mov.w	r0,@(30,REG_DST)
168	mov.w	r0,@(28,REG_DST)
169	mov.w	r0,@(26,REG_DST)
170	mov.w	r0,@(24,REG_DST)
171	mov.w	r0,@(22,REG_DST)
172	mov.w	r0,@(20,REG_DST)
173	mov.w	r0,@(18,REG_DST)
174	mov.w	r0,@(16,REG_DST)
175	mov.w	r0,@(14,REG_DST)
176	mov.w	r0,@(12,REG_DST)
177	mov.w	r0,@(10,REG_DST)
178	mov.w	r0,@(8,REG_DST)
179	mov.w	r0,@(6,REG_DST)
180	mov.w	r0,@(4,REG_DST)
181	mov.w	r0,@(2,REG_DST)
182#ifdef BZERO
183	rts
1841:	mov.w	r0,@REG_DST
185#else
186	mov.w	r0,@REG_DST
1871:	rts
188	mov	REG_DST0,r0
189#endif
190
191
192
193	.align	2
194large:
195#ifdef BZERO
196	mov	#0,REG_C
197#else
198	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
199	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
200	or	REG_C,REG_TMP1		/* REG_C = ????xx00, REG_TMP1 = ????xxxx */
201	swap.w	REG_TMP1,REG_C		/* REG_C = xxxx????, REG_TMP1 = ????xxxx */
202	xtrct	REG_TMP1,REG_C		/* REG_C = xxxxxxxx */
203#endif
204
205	mov	#3,REG_TMP1
206	tst	REG_TMP1,REG_DST
207	mov	REG_DST,REG_PTR
208	bf/s	unaligned_dst
209	add	REG_LEN,REG_PTR		/* REG_PTR = dst + len; */
210	tst	REG_TMP1,REG_LEN
211	bf/s	unaligned_len
212
213aligned:
214	/* fill 32*n bytes */
215	mov	#32,REG_TMP1
216	cmp/hi	REG_LEN,REG_TMP1
217	bt	9f
218	.align	2
2191:	sub	REG_TMP1,REG_PTR
220	mov.l	REG_C,@REG_PTR
221	sub	REG_TMP1,REG_LEN
222	mov.l	REG_C,@(4,REG_PTR)
223	cmp/hi	REG_LEN,REG_TMP1
224	mov.l	REG_C,@(8,REG_PTR)
225	mov.l	REG_C,@(12,REG_PTR)
226	mov.l	REG_C,@(16,REG_PTR)
227	mov.l	REG_C,@(20,REG_PTR)
228	mov.l	REG_C,@(24,REG_PTR)
229	bf/s	1b
230	mov.l	REG_C,@(28,REG_PTR)
2319:
232
233	/* fill left 4*n bytes */
234	cmp/eq	REG_DST,REG_PTR
235	bt	9f
236	add	#4,REG_DST
237	cmp/eq	REG_DST,REG_PTR
2381:	mov.l	REG_C,@-REG_PTR
239	bt/s	9f
240	cmp/eq	REG_DST,REG_PTR
241	mov.l	REG_C,@-REG_PTR
242	bt/s	9f
243	cmp/eq	REG_DST,REG_PTR
244	mov.l	REG_C,@-REG_PTR
245	bt/s	9f
246	cmp/eq	REG_DST,REG_PTR
247	mov.l	REG_C,@-REG_PTR
248	bf/s	1b
249	cmp/eq	REG_DST,REG_PTR
2509:
251#ifdef BZERO
252	rts
253	nop
254#else
255	rts
256	mov	REG_DST0,r0
257#endif
258
259
260unaligned_dst:
261	mov	#1,REG_TMP1
262	tst	REG_TMP1,REG_DST	/* if (dst & 1) {               */
263	add	#1,REG_TMP1
264	bt/s	2f
265	tst	REG_TMP1,REG_DST
266	mov.b	REG_C,@REG_DST		/*   *dst++ = c;                */
267	add	#1,REG_DST
268	tst	REG_TMP1,REG_DST
2692:					/* }                            */
270					/* if (dst & 2) {               */
271	bt	4f
272	mov.w	REG_C,@REG_DST		/*   *(u_int16_t*)dst++ = c;    */
273	add	#2,REG_DST
2744:					/* }                            */
275
276
277	tst	#3,REG_PTR		/* if (ptr & 3) {               */
278	bt/s	4f			/*                              */
279unaligned_len:
280	tst	#1,REG_PTR		/*   if (ptr & 1) {             */
281	bt/s	2f
282	tst	#2,REG_PTR
283	mov.b	REG_C,@-REG_PTR		/*     --ptr = c;               */
2842:					/*   }                          */
285					/*   if (ptr & 2) {             */
286	bt	4f
287	mov.w	REG_C,@-REG_PTR		/*     *--(u_int16_t*)ptr = c;  */
2884:					/*   }                          */
289					/* }                            */
290
291	mov	REG_PTR,REG_LEN
292	bra	aligned
293	sub	REG_DST,REG_LEN
294
295