xref: /netbsd-src/common/lib/libc/arch/sh3/string/memset.S (revision 267197ec1eebfcb9810ea27a89625b6ddf68e3e7)
1*267197ecSapb/*	$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $	*/
237c9f0a6Schristos
337c9f0a6Schristos/*-
437c9f0a6Schristos * Copyright (c) 2002 SHIMIZU Ryo.  All rights reserved.
537c9f0a6Schristos *
637c9f0a6Schristos * Redistribution and use in source and binary forms, with or without
737c9f0a6Schristos * modification, are permitted provided that the following conditions
837c9f0a6Schristos * are met:
937c9f0a6Schristos * 1. Redistributions of source code must retain the above copyright
1037c9f0a6Schristos *    notice, this list of conditions and the following disclaimer.
1137c9f0a6Schristos * 2. Redistributions in binary form must reproduce the above copyright
1237c9f0a6Schristos *    notice, this list of conditions and the following disclaimer in the
1337c9f0a6Schristos *    documentation and/or other materials provided with the distribution.
1437c9f0a6Schristos * 3. The name of the author may not be used to endorse or promote products
1537c9f0a6Schristos *    derived from this software without specific prior written permission.
1637c9f0a6Schristos *
1737c9f0a6Schristos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837c9f0a6Schristos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937c9f0a6Schristos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037c9f0a6Schristos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137c9f0a6Schristos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237c9f0a6Schristos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337c9f0a6Schristos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437c9f0a6Schristos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537c9f0a6Schristos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2637c9f0a6Schristos * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737c9f0a6Schristos */
2837c9f0a6Schristos
2937c9f0a6Schristos#include <machine/asm.h>
3037c9f0a6Schristos
3137c9f0a6Schristos#if defined(LIBC_SCCS) && !defined(lint)
32*267197ecSapb	RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $")
3337c9f0a6Schristos#endif
3437c9f0a6Schristos
3537c9f0a6Schristos#define	REG_PTR				r0
3637c9f0a6Schristos#define	REG_TMP1			r1
3737c9f0a6Schristos
3837c9f0a6Schristos#ifdef BZERO
3937c9f0a6Schristos# define	REG_C			r2
4037c9f0a6Schristos# define	REG_DST			r4
4137c9f0a6Schristos# define	REG_LEN			r5
4237c9f0a6Schristos#else
4337c9f0a6Schristos# define	REG_DST0		r3
4437c9f0a6Schristos# define	REG_DST			r4
4537c9f0a6Schristos# define	REG_C			r5
4637c9f0a6Schristos# define	REG_LEN			r6
4737c9f0a6Schristos#endif
4837c9f0a6Schristos
4937c9f0a6Schristos#ifdef BZERO
5037c9f0a6SchristosENTRY(bzero)
5137c9f0a6Schristos#else
5237c9f0a6SchristosENTRY(memset)
5337c9f0a6Schristos	mov	REG_DST,REG_DST0	/* for return value */
5437c9f0a6Schristos#endif
5537c9f0a6Schristos	/* small amount to fill ? */
5637c9f0a6Schristos	mov	#28,REG_TMP1
5737c9f0a6Schristos	cmp/hs	REG_TMP1,REG_LEN	/* if (len >= 28) goto large; */
5837c9f0a6Schristos	bt/s	large
5937c9f0a6Schristos	mov	#12,REG_TMP1		/* if (len >= 12) goto small; */
6037c9f0a6Schristos	cmp/hs	REG_TMP1,REG_LEN
6137c9f0a6Schristos	bt/s	small
6237c9f0a6Schristos#ifdef BZERO
6337c9f0a6Schristos	mov	#0,REG_C
6437c9f0a6Schristos#endif
6537c9f0a6Schristos	/* very little fill (0 ~ 11 bytes) */
6637c9f0a6Schristos	tst	REG_LEN,REG_LEN
6737c9f0a6Schristos	add	REG_DST,REG_LEN
6837c9f0a6Schristos	bt/s	done
6937c9f0a6Schristos	add	#1,REG_DST
7037c9f0a6Schristos
7137c9f0a6Schristos	/* unroll 4 loops */
7237c9f0a6Schristos	cmp/eq	REG_DST,REG_LEN
7337c9f0a6Schristos1:	mov.b	REG_C,@-REG_LEN
7437c9f0a6Schristos	bt/s	done
7537c9f0a6Schristos	cmp/eq	REG_DST,REG_LEN
7637c9f0a6Schristos	mov.b	REG_C,@-REG_LEN
7737c9f0a6Schristos	bt/s	done
7837c9f0a6Schristos	cmp/eq	REG_DST,REG_LEN
7937c9f0a6Schristos	mov.b	REG_C,@-REG_LEN
8037c9f0a6Schristos	bt/s	done
8137c9f0a6Schristos	cmp/eq	REG_DST,REG_LEN
8237c9f0a6Schristos	mov.b	REG_C,@-REG_LEN
8337c9f0a6Schristos	bf/s	1b
8437c9f0a6Schristos	cmp/eq	REG_DST,REG_LEN
8537c9f0a6Schristosdone:
8637c9f0a6Schristos#ifdef BZERO
8737c9f0a6Schristos	rts
8837c9f0a6Schristos	nop
8937c9f0a6Schristos#else
9037c9f0a6Schristos	rts
9137c9f0a6Schristos	mov	REG_DST0,r0
9237c9f0a6Schristos#endif
9337c9f0a6Schristos
9437c9f0a6Schristos
9537c9f0a6Schristossmall:
9637c9f0a6Schristos	mov	REG_DST,r0
9737c9f0a6Schristos	tst	#1,r0
9837c9f0a6Schristos	bt/s	small_aligned
9937c9f0a6Schristos	mov	REG_DST,REG_TMP1
10037c9f0a6Schristos	shll	REG_LEN
10137c9f0a6Schristos	mova	1f,r0			/* 1f must be 4bytes aligned! */
10237c9f0a6Schristos	add	#16,REG_TMP1		/* REG_TMP1 = dst+16; */
10337c9f0a6Schristos	sub	REG_LEN,r0
10437c9f0a6Schristos	jmp	@r0
10537c9f0a6Schristos	mov	REG_C,r0
10637c9f0a6Schristos
10737c9f0a6Schristos	.align	2
10837c9f0a6Schristos	mov.b	r0,@(15,REG_TMP1)
10937c9f0a6Schristos	mov.b	r0,@(14,REG_TMP1)
11037c9f0a6Schristos	mov.b	r0,@(13,REG_TMP1)
11137c9f0a6Schristos	mov.b	r0,@(12,REG_TMP1)
11237c9f0a6Schristos	mov.b	r0,@(11,REG_TMP1)
11337c9f0a6Schristos	mov.b	r0,@(10,REG_TMP1)
11437c9f0a6Schristos	mov.b	r0,@(9,REG_TMP1)
11537c9f0a6Schristos	mov.b	r0,@(8,REG_TMP1)
11637c9f0a6Schristos	mov.b	r0,@(7,REG_TMP1)
11737c9f0a6Schristos	mov.b	r0,@(6,REG_TMP1)
11837c9f0a6Schristos	mov.b	r0,@(5,REG_TMP1)
11937c9f0a6Schristos	mov.b	r0,@(4,REG_TMP1)
12037c9f0a6Schristos	mov.b	r0,@(3,REG_TMP1)
12137c9f0a6Schristos	mov.b	r0,@(2,REG_TMP1)
12237c9f0a6Schristos	mov.b	r0,@(1,REG_TMP1)
12337c9f0a6Schristos	mov.b	r0,@REG_TMP1
12437c9f0a6Schristos	mov.b	r0,@(15,REG_DST)
12537c9f0a6Schristos	mov.b	r0,@(14,REG_DST)
12637c9f0a6Schristos	mov.b	r0,@(13,REG_DST)
12737c9f0a6Schristos	mov.b	r0,@(12,REG_DST)
12837c9f0a6Schristos	mov.b	r0,@(11,REG_DST)
12937c9f0a6Schristos	mov.b	r0,@(10,REG_DST)
13037c9f0a6Schristos	mov.b	r0,@(9,REG_DST)
13137c9f0a6Schristos	mov.b	r0,@(8,REG_DST)
13237c9f0a6Schristos	mov.b	r0,@(7,REG_DST)
13337c9f0a6Schristos	mov.b	r0,@(6,REG_DST)
13437c9f0a6Schristos	mov.b	r0,@(5,REG_DST)
13537c9f0a6Schristos	mov.b	r0,@(4,REG_DST)
13637c9f0a6Schristos	mov.b	r0,@(3,REG_DST)
13737c9f0a6Schristos	mov.b	r0,@(2,REG_DST)
13837c9f0a6Schristos	mov.b	r0,@(1,REG_DST)
13937c9f0a6Schristos#ifdef BZERO
14037c9f0a6Schristos	rts
14137c9f0a6Schristos1:	mov.b	r0,@REG_DST
14237c9f0a6Schristos#else
14337c9f0a6Schristos	mov.b	r0,@REG_DST
14437c9f0a6Schristos1:	rts
14537c9f0a6Schristos	mov	REG_DST0,r0
14637c9f0a6Schristos#endif
14737c9f0a6Schristos
14837c9f0a6Schristos
14937c9f0a6Schristos/* 2 bytes aligned small fill */
15037c9f0a6Schristossmall_aligned:
15137c9f0a6Schristos#ifndef BZERO
15237c9f0a6Schristos	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
15337c9f0a6Schristos	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
15437c9f0a6Schristos	or	REG_TMP1,REG_C		/* REG_C = ????xxxx */
15537c9f0a6Schristos#endif
15637c9f0a6Schristos
15737c9f0a6Schristos	mov	REG_LEN,r0
15837c9f0a6Schristos	tst	#1,r0			/* len is aligned? */
15937c9f0a6Schristos	bt/s	1f
16037c9f0a6Schristos	add	#-1,r0
16137c9f0a6Schristos	mov.b	REG_C,@(r0,REG_DST)	/* fill last a byte */
16237c9f0a6Schristos	mov	r0,REG_LEN
16337c9f0a6Schristos1:
16437c9f0a6Schristos
16537c9f0a6Schristos	mova	1f,r0			/* 1f must be 4bytes aligned! */
16637c9f0a6Schristos	sub	REG_LEN,r0
16737c9f0a6Schristos	jmp	@r0
16837c9f0a6Schristos	mov	REG_C,r0
16937c9f0a6Schristos
17037c9f0a6Schristos	.align	2
17137c9f0a6Schristos	mov.w	r0,@(30,REG_DST)
17237c9f0a6Schristos	mov.w	r0,@(28,REG_DST)
17337c9f0a6Schristos	mov.w	r0,@(26,REG_DST)
17437c9f0a6Schristos	mov.w	r0,@(24,REG_DST)
17537c9f0a6Schristos	mov.w	r0,@(22,REG_DST)
17637c9f0a6Schristos	mov.w	r0,@(20,REG_DST)
17737c9f0a6Schristos	mov.w	r0,@(18,REG_DST)
17837c9f0a6Schristos	mov.w	r0,@(16,REG_DST)
17937c9f0a6Schristos	mov.w	r0,@(14,REG_DST)
18037c9f0a6Schristos	mov.w	r0,@(12,REG_DST)
18137c9f0a6Schristos	mov.w	r0,@(10,REG_DST)
18237c9f0a6Schristos	mov.w	r0,@(8,REG_DST)
18337c9f0a6Schristos	mov.w	r0,@(6,REG_DST)
18437c9f0a6Schristos	mov.w	r0,@(4,REG_DST)
18537c9f0a6Schristos	mov.w	r0,@(2,REG_DST)
18637c9f0a6Schristos#ifdef BZERO
18737c9f0a6Schristos	rts
18837c9f0a6Schristos1:	mov.w	r0,@REG_DST
18937c9f0a6Schristos#else
19037c9f0a6Schristos	mov.w	r0,@REG_DST
19137c9f0a6Schristos1:	rts
19237c9f0a6Schristos	mov	REG_DST0,r0
19337c9f0a6Schristos#endif
19437c9f0a6Schristos
19537c9f0a6Schristos
19637c9f0a6Schristos
19737c9f0a6Schristos	.align	2
19837c9f0a6Schristoslarge:
19937c9f0a6Schristos#ifdef BZERO
20037c9f0a6Schristos	mov	#0,REG_C
20137c9f0a6Schristos#else
20237c9f0a6Schristos	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
20337c9f0a6Schristos	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
20437c9f0a6Schristos	or	REG_C,REG_TMP1		/* REG_C = ????xx00, REG_TMP1 = ????xxxx */
20537c9f0a6Schristos	swap.w	REG_TMP1,REG_C		/* REG_C = xxxx????, REG_TMP1 = ????xxxx */
20637c9f0a6Schristos	xtrct	REG_TMP1,REG_C		/* REG_C = xxxxxxxx */
20737c9f0a6Schristos#endif
20837c9f0a6Schristos
20937c9f0a6Schristos	mov	#3,REG_TMP1
21037c9f0a6Schristos	tst	REG_TMP1,REG_DST
21137c9f0a6Schristos	mov	REG_DST,REG_PTR
21237c9f0a6Schristos	bf/s	unaligned_dst
21337c9f0a6Schristos	add	REG_LEN,REG_PTR		/* REG_PTR = dst + len; */
21437c9f0a6Schristos	tst	REG_TMP1,REG_LEN
21537c9f0a6Schristos	bf/s	unaligned_len
21637c9f0a6Schristos
21737c9f0a6Schristosaligned:
21837c9f0a6Schristos	/* fill 32*n bytes */
21937c9f0a6Schristos	mov	#32,REG_TMP1
22037c9f0a6Schristos	cmp/hi	REG_LEN,REG_TMP1
22137c9f0a6Schristos	bt	9f
22237c9f0a6Schristos	.align	2
22337c9f0a6Schristos1:	sub	REG_TMP1,REG_PTR
22437c9f0a6Schristos	mov.l	REG_C,@REG_PTR
22537c9f0a6Schristos	sub	REG_TMP1,REG_LEN
22637c9f0a6Schristos	mov.l	REG_C,@(4,REG_PTR)
22737c9f0a6Schristos	cmp/hi	REG_LEN,REG_TMP1
22837c9f0a6Schristos	mov.l	REG_C,@(8,REG_PTR)
22937c9f0a6Schristos	mov.l	REG_C,@(12,REG_PTR)
23037c9f0a6Schristos	mov.l	REG_C,@(16,REG_PTR)
23137c9f0a6Schristos	mov.l	REG_C,@(20,REG_PTR)
23237c9f0a6Schristos	mov.l	REG_C,@(24,REG_PTR)
23337c9f0a6Schristos	bf/s	1b
23437c9f0a6Schristos	mov.l	REG_C,@(28,REG_PTR)
23537c9f0a6Schristos9:
23637c9f0a6Schristos
23737c9f0a6Schristos	/* fill left 4*n bytes */
23837c9f0a6Schristos	cmp/eq	REG_DST,REG_PTR
23937c9f0a6Schristos	bt	9f
24037c9f0a6Schristos	add	#4,REG_DST
24137c9f0a6Schristos	cmp/eq	REG_DST,REG_PTR
24237c9f0a6Schristos1:	mov.l	REG_C,@-REG_PTR
24337c9f0a6Schristos	bt/s	9f
24437c9f0a6Schristos	cmp/eq	REG_DST,REG_PTR
24537c9f0a6Schristos	mov.l	REG_C,@-REG_PTR
24637c9f0a6Schristos	bt/s	9f
24737c9f0a6Schristos	cmp/eq	REG_DST,REG_PTR
24837c9f0a6Schristos	mov.l	REG_C,@-REG_PTR
24937c9f0a6Schristos	bt/s	9f
25037c9f0a6Schristos	cmp/eq	REG_DST,REG_PTR
25137c9f0a6Schristos	mov.l	REG_C,@-REG_PTR
25237c9f0a6Schristos	bf/s	1b
25337c9f0a6Schristos	cmp/eq	REG_DST,REG_PTR
25437c9f0a6Schristos9:
25537c9f0a6Schristos#ifdef BZERO
25637c9f0a6Schristos	rts
25737c9f0a6Schristos	nop
25837c9f0a6Schristos#else
25937c9f0a6Schristos	rts
26037c9f0a6Schristos	mov	REG_DST0,r0
26137c9f0a6Schristos#endif
26237c9f0a6Schristos
26337c9f0a6Schristos
26437c9f0a6Schristosunaligned_dst:
26537c9f0a6Schristos	mov	#1,REG_TMP1
26637c9f0a6Schristos	tst	REG_TMP1,REG_DST	/* if (dst & 1) {               */
26737c9f0a6Schristos	add	#1,REG_TMP1
26837c9f0a6Schristos	bt/s	2f
26937c9f0a6Schristos	tst	REG_TMP1,REG_DST
27037c9f0a6Schristos	mov.b	REG_C,@REG_DST		/*   *dst++ = c;                */
27137c9f0a6Schristos	add	#1,REG_DST
27237c9f0a6Schristos	tst	REG_TMP1,REG_DST
27337c9f0a6Schristos2:					/* }                            */
27437c9f0a6Schristos					/* if (dst & 2) {               */
27537c9f0a6Schristos	bt	4f
276*267197ecSapb	mov.w	REG_C,@REG_DST		/*   *(uint16_t*)dst++ = c;    */
27737c9f0a6Schristos	add	#2,REG_DST
27837c9f0a6Schristos4:					/* }                            */
27937c9f0a6Schristos
28037c9f0a6Schristos
28137c9f0a6Schristos	tst	#3,REG_PTR		/* if (ptr & 3) {               */
28237c9f0a6Schristos	bt/s	4f			/*                              */
28337c9f0a6Schristosunaligned_len:
28437c9f0a6Schristos	tst	#1,REG_PTR		/*   if (ptr & 1) {             */
28537c9f0a6Schristos	bt/s	2f
28637c9f0a6Schristos	tst	#2,REG_PTR
28737c9f0a6Schristos	mov.b	REG_C,@-REG_PTR		/*     --ptr = c;               */
28837c9f0a6Schristos2:					/*   }                          */
28937c9f0a6Schristos					/*   if (ptr & 2) {             */
29037c9f0a6Schristos	bt	4f
291*267197ecSapb	mov.w	REG_C,@-REG_PTR		/*     *--(uint16_t*)ptr = c;  */
29237c9f0a6Schristos4:					/*   }                          */
29337c9f0a6Schristos					/* }                            */
29437c9f0a6Schristos
29537c9f0a6Schristos	mov	REG_PTR,REG_LEN
29637c9f0a6Schristos	bra	aligned
29737c9f0a6Schristos	sub	REG_DST,REG_LEN
29837c9f0a6Schristos
299