1*267197ecSapb/* $NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $ */ 237c9f0a6Schristos 337c9f0a6Schristos/*- 437c9f0a6Schristos * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 537c9f0a6Schristos * 637c9f0a6Schristos * Redistribution and use in source and binary forms, with or without 737c9f0a6Schristos * modification, are permitted provided that the following conditions 837c9f0a6Schristos * are met: 937c9f0a6Schristos * 1. Redistributions of source code must retain the above copyright 1037c9f0a6Schristos * notice, this list of conditions and the following disclaimer. 1137c9f0a6Schristos * 2. Redistributions in binary form must reproduce the above copyright 1237c9f0a6Schristos * notice, this list of conditions and the following disclaimer in the 1337c9f0a6Schristos * documentation and/or other materials provided with the distribution. 1437c9f0a6Schristos * 3. The name of the author may not be used to endorse or promote products 1537c9f0a6Schristos * derived from this software without specific prior written permission. 1637c9f0a6Schristos * 1737c9f0a6Schristos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837c9f0a6Schristos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937c9f0a6Schristos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037c9f0a6Schristos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137c9f0a6Schristos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237c9f0a6Schristos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337c9f0a6Schristos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437c9f0a6Schristos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537c9f0a6Schristos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2637c9f0a6Schristos * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737c9f0a6Schristos */ 2837c9f0a6Schristos 2937c9f0a6Schristos#include <machine/asm.h> 3037c9f0a6Schristos 3137c9f0a6Schristos#if defined(LIBC_SCCS) && !defined(lint) 32*267197ecSapb RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $") 3337c9f0a6Schristos#endif 3437c9f0a6Schristos 3537c9f0a6Schristos#define REG_PTR r0 3637c9f0a6Schristos#define REG_TMP1 r1 3737c9f0a6Schristos 3837c9f0a6Schristos#ifdef BZERO 3937c9f0a6Schristos# define REG_C r2 4037c9f0a6Schristos# define REG_DST r4 4137c9f0a6Schristos# define REG_LEN r5 4237c9f0a6Schristos#else 4337c9f0a6Schristos# define REG_DST0 r3 4437c9f0a6Schristos# define REG_DST r4 4537c9f0a6Schristos# define REG_C r5 4637c9f0a6Schristos# define REG_LEN r6 4737c9f0a6Schristos#endif 4837c9f0a6Schristos 4937c9f0a6Schristos#ifdef BZERO 5037c9f0a6SchristosENTRY(bzero) 5137c9f0a6Schristos#else 5237c9f0a6SchristosENTRY(memset) 5337c9f0a6Schristos mov REG_DST,REG_DST0 /* for return value */ 5437c9f0a6Schristos#endif 5537c9f0a6Schristos /* small amount to fill ? */ 5637c9f0a6Schristos mov #28,REG_TMP1 5737c9f0a6Schristos cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 5837c9f0a6Schristos bt/s large 5937c9f0a6Schristos mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 6037c9f0a6Schristos cmp/hs REG_TMP1,REG_LEN 6137c9f0a6Schristos bt/s small 6237c9f0a6Schristos#ifdef BZERO 6337c9f0a6Schristos mov #0,REG_C 6437c9f0a6Schristos#endif 6537c9f0a6Schristos /* very little fill (0 ~ 11 bytes) */ 6637c9f0a6Schristos tst REG_LEN,REG_LEN 6737c9f0a6Schristos add REG_DST,REG_LEN 6837c9f0a6Schristos bt/s done 6937c9f0a6Schristos add #1,REG_DST 7037c9f0a6Schristos 7137c9f0a6Schristos /* unroll 4 loops */ 7237c9f0a6Schristos cmp/eq REG_DST,REG_LEN 7337c9f0a6Schristos1: mov.b REG_C,@-REG_LEN 7437c9f0a6Schristos bt/s done 7537c9f0a6Schristos cmp/eq REG_DST,REG_LEN 7637c9f0a6Schristos mov.b REG_C,@-REG_LEN 7737c9f0a6Schristos bt/s done 7837c9f0a6Schristos cmp/eq REG_DST,REG_LEN 7937c9f0a6Schristos mov.b REG_C,@-REG_LEN 8037c9f0a6Schristos bt/s done 8137c9f0a6Schristos cmp/eq REG_DST,REG_LEN 8237c9f0a6Schristos mov.b REG_C,@-REG_LEN 8337c9f0a6Schristos bf/s 1b 8437c9f0a6Schristos cmp/eq REG_DST,REG_LEN 8537c9f0a6Schristosdone: 8637c9f0a6Schristos#ifdef BZERO 8737c9f0a6Schristos rts 8837c9f0a6Schristos nop 8937c9f0a6Schristos#else 9037c9f0a6Schristos rts 9137c9f0a6Schristos mov REG_DST0,r0 9237c9f0a6Schristos#endif 9337c9f0a6Schristos 9437c9f0a6Schristos 9537c9f0a6Schristossmall: 9637c9f0a6Schristos mov REG_DST,r0 9737c9f0a6Schristos tst #1,r0 9837c9f0a6Schristos bt/s small_aligned 9937c9f0a6Schristos mov REG_DST,REG_TMP1 10037c9f0a6Schristos shll REG_LEN 10137c9f0a6Schristos mova 1f,r0 /* 1f must be 4bytes aligned! */ 10237c9f0a6Schristos add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 10337c9f0a6Schristos sub REG_LEN,r0 10437c9f0a6Schristos jmp @r0 10537c9f0a6Schristos mov REG_C,r0 10637c9f0a6Schristos 10737c9f0a6Schristos .align 2 10837c9f0a6Schristos mov.b r0,@(15,REG_TMP1) 10937c9f0a6Schristos mov.b r0,@(14,REG_TMP1) 11037c9f0a6Schristos mov.b r0,@(13,REG_TMP1) 11137c9f0a6Schristos mov.b r0,@(12,REG_TMP1) 11237c9f0a6Schristos mov.b r0,@(11,REG_TMP1) 11337c9f0a6Schristos mov.b r0,@(10,REG_TMP1) 11437c9f0a6Schristos mov.b r0,@(9,REG_TMP1) 11537c9f0a6Schristos mov.b r0,@(8,REG_TMP1) 11637c9f0a6Schristos mov.b r0,@(7,REG_TMP1) 11737c9f0a6Schristos mov.b r0,@(6,REG_TMP1) 11837c9f0a6Schristos mov.b r0,@(5,REG_TMP1) 11937c9f0a6Schristos mov.b r0,@(4,REG_TMP1) 12037c9f0a6Schristos mov.b r0,@(3,REG_TMP1) 12137c9f0a6Schristos mov.b r0,@(2,REG_TMP1) 12237c9f0a6Schristos mov.b r0,@(1,REG_TMP1) 12337c9f0a6Schristos mov.b r0,@REG_TMP1 12437c9f0a6Schristos mov.b r0,@(15,REG_DST) 12537c9f0a6Schristos mov.b r0,@(14,REG_DST) 12637c9f0a6Schristos mov.b r0,@(13,REG_DST) 12737c9f0a6Schristos mov.b r0,@(12,REG_DST) 12837c9f0a6Schristos mov.b r0,@(11,REG_DST) 12937c9f0a6Schristos mov.b r0,@(10,REG_DST) 13037c9f0a6Schristos mov.b r0,@(9,REG_DST) 13137c9f0a6Schristos mov.b r0,@(8,REG_DST) 13237c9f0a6Schristos mov.b r0,@(7,REG_DST) 13337c9f0a6Schristos mov.b r0,@(6,REG_DST) 13437c9f0a6Schristos mov.b r0,@(5,REG_DST) 13537c9f0a6Schristos mov.b r0,@(4,REG_DST) 13637c9f0a6Schristos mov.b r0,@(3,REG_DST) 13737c9f0a6Schristos mov.b r0,@(2,REG_DST) 13837c9f0a6Schristos mov.b r0,@(1,REG_DST) 13937c9f0a6Schristos#ifdef BZERO 14037c9f0a6Schristos rts 14137c9f0a6Schristos1: mov.b r0,@REG_DST 14237c9f0a6Schristos#else 14337c9f0a6Schristos mov.b r0,@REG_DST 14437c9f0a6Schristos1: rts 14537c9f0a6Schristos mov REG_DST0,r0 14637c9f0a6Schristos#endif 14737c9f0a6Schristos 14837c9f0a6Schristos 14937c9f0a6Schristos/* 2 bytes aligned small fill */ 15037c9f0a6Schristossmall_aligned: 15137c9f0a6Schristos#ifndef BZERO 15237c9f0a6Schristos extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 15337c9f0a6Schristos shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 15437c9f0a6Schristos or REG_TMP1,REG_C /* REG_C = ????xxxx */ 15537c9f0a6Schristos#endif 15637c9f0a6Schristos 15737c9f0a6Schristos mov REG_LEN,r0 15837c9f0a6Schristos tst #1,r0 /* len is aligned? */ 15937c9f0a6Schristos bt/s 1f 16037c9f0a6Schristos add #-1,r0 16137c9f0a6Schristos mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 16237c9f0a6Schristos mov r0,REG_LEN 16337c9f0a6Schristos1: 16437c9f0a6Schristos 16537c9f0a6Schristos mova 1f,r0 /* 1f must be 4bytes aligned! */ 16637c9f0a6Schristos sub REG_LEN,r0 16737c9f0a6Schristos jmp @r0 16837c9f0a6Schristos mov REG_C,r0 16937c9f0a6Schristos 17037c9f0a6Schristos .align 2 17137c9f0a6Schristos mov.w r0,@(30,REG_DST) 17237c9f0a6Schristos mov.w r0,@(28,REG_DST) 17337c9f0a6Schristos mov.w r0,@(26,REG_DST) 17437c9f0a6Schristos mov.w r0,@(24,REG_DST) 17537c9f0a6Schristos mov.w r0,@(22,REG_DST) 17637c9f0a6Schristos mov.w r0,@(20,REG_DST) 17737c9f0a6Schristos mov.w r0,@(18,REG_DST) 17837c9f0a6Schristos mov.w r0,@(16,REG_DST) 17937c9f0a6Schristos mov.w r0,@(14,REG_DST) 18037c9f0a6Schristos mov.w r0,@(12,REG_DST) 18137c9f0a6Schristos mov.w r0,@(10,REG_DST) 18237c9f0a6Schristos mov.w r0,@(8,REG_DST) 18337c9f0a6Schristos mov.w r0,@(6,REG_DST) 18437c9f0a6Schristos mov.w r0,@(4,REG_DST) 18537c9f0a6Schristos mov.w r0,@(2,REG_DST) 18637c9f0a6Schristos#ifdef BZERO 18737c9f0a6Schristos rts 18837c9f0a6Schristos1: mov.w r0,@REG_DST 18937c9f0a6Schristos#else 19037c9f0a6Schristos mov.w r0,@REG_DST 19137c9f0a6Schristos1: rts 19237c9f0a6Schristos mov REG_DST0,r0 19337c9f0a6Schristos#endif 19437c9f0a6Schristos 19537c9f0a6Schristos 19637c9f0a6Schristos 19737c9f0a6Schristos .align 2 19837c9f0a6Schristoslarge: 19937c9f0a6Schristos#ifdef BZERO 20037c9f0a6Schristos mov #0,REG_C 20137c9f0a6Schristos#else 20237c9f0a6Schristos extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 20337c9f0a6Schristos shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 20437c9f0a6Schristos or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 20537c9f0a6Schristos swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 20637c9f0a6Schristos xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 20737c9f0a6Schristos#endif 20837c9f0a6Schristos 20937c9f0a6Schristos mov #3,REG_TMP1 21037c9f0a6Schristos tst REG_TMP1,REG_DST 21137c9f0a6Schristos mov REG_DST,REG_PTR 21237c9f0a6Schristos bf/s unaligned_dst 21337c9f0a6Schristos add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 21437c9f0a6Schristos tst REG_TMP1,REG_LEN 21537c9f0a6Schristos bf/s unaligned_len 21637c9f0a6Schristos 21737c9f0a6Schristosaligned: 21837c9f0a6Schristos /* fill 32*n bytes */ 21937c9f0a6Schristos mov #32,REG_TMP1 22037c9f0a6Schristos cmp/hi REG_LEN,REG_TMP1 22137c9f0a6Schristos bt 9f 22237c9f0a6Schristos .align 2 22337c9f0a6Schristos1: sub REG_TMP1,REG_PTR 22437c9f0a6Schristos mov.l REG_C,@REG_PTR 22537c9f0a6Schristos sub REG_TMP1,REG_LEN 22637c9f0a6Schristos mov.l REG_C,@(4,REG_PTR) 22737c9f0a6Schristos cmp/hi REG_LEN,REG_TMP1 22837c9f0a6Schristos mov.l REG_C,@(8,REG_PTR) 22937c9f0a6Schristos mov.l REG_C,@(12,REG_PTR) 23037c9f0a6Schristos mov.l REG_C,@(16,REG_PTR) 23137c9f0a6Schristos mov.l REG_C,@(20,REG_PTR) 23237c9f0a6Schristos mov.l REG_C,@(24,REG_PTR) 23337c9f0a6Schristos bf/s 1b 23437c9f0a6Schristos mov.l REG_C,@(28,REG_PTR) 23537c9f0a6Schristos9: 23637c9f0a6Schristos 23737c9f0a6Schristos /* fill left 4*n bytes */ 23837c9f0a6Schristos cmp/eq REG_DST,REG_PTR 23937c9f0a6Schristos bt 9f 24037c9f0a6Schristos add #4,REG_DST 24137c9f0a6Schristos cmp/eq REG_DST,REG_PTR 24237c9f0a6Schristos1: mov.l REG_C,@-REG_PTR 24337c9f0a6Schristos bt/s 9f 24437c9f0a6Schristos cmp/eq REG_DST,REG_PTR 24537c9f0a6Schristos mov.l REG_C,@-REG_PTR 24637c9f0a6Schristos bt/s 9f 24737c9f0a6Schristos cmp/eq REG_DST,REG_PTR 24837c9f0a6Schristos mov.l REG_C,@-REG_PTR 24937c9f0a6Schristos bt/s 9f 25037c9f0a6Schristos cmp/eq REG_DST,REG_PTR 25137c9f0a6Schristos mov.l REG_C,@-REG_PTR 25237c9f0a6Schristos bf/s 1b 25337c9f0a6Schristos cmp/eq REG_DST,REG_PTR 25437c9f0a6Schristos9: 25537c9f0a6Schristos#ifdef BZERO 25637c9f0a6Schristos rts 25737c9f0a6Schristos nop 25837c9f0a6Schristos#else 25937c9f0a6Schristos rts 26037c9f0a6Schristos mov REG_DST0,r0 26137c9f0a6Schristos#endif 26237c9f0a6Schristos 26337c9f0a6Schristos 26437c9f0a6Schristosunaligned_dst: 26537c9f0a6Schristos mov #1,REG_TMP1 26637c9f0a6Schristos tst REG_TMP1,REG_DST /* if (dst & 1) { */ 26737c9f0a6Schristos add #1,REG_TMP1 26837c9f0a6Schristos bt/s 2f 26937c9f0a6Schristos tst REG_TMP1,REG_DST 27037c9f0a6Schristos mov.b REG_C,@REG_DST /* *dst++ = c; */ 27137c9f0a6Schristos add #1,REG_DST 27237c9f0a6Schristos tst REG_TMP1,REG_DST 27337c9f0a6Schristos2: /* } */ 27437c9f0a6Schristos /* if (dst & 2) { */ 27537c9f0a6Schristos bt 4f 276*267197ecSapb mov.w REG_C,@REG_DST /* *(uint16_t*)dst++ = c; */ 27737c9f0a6Schristos add #2,REG_DST 27837c9f0a6Schristos4: /* } */ 27937c9f0a6Schristos 28037c9f0a6Schristos 28137c9f0a6Schristos tst #3,REG_PTR /* if (ptr & 3) { */ 28237c9f0a6Schristos bt/s 4f /* */ 28337c9f0a6Schristosunaligned_len: 28437c9f0a6Schristos tst #1,REG_PTR /* if (ptr & 1) { */ 28537c9f0a6Schristos bt/s 2f 28637c9f0a6Schristos tst #2,REG_PTR 28737c9f0a6Schristos mov.b REG_C,@-REG_PTR /* --ptr = c; */ 28837c9f0a6Schristos2: /* } */ 28937c9f0a6Schristos /* if (ptr & 2) { */ 29037c9f0a6Schristos bt 4f 291*267197ecSapb mov.w REG_C,@-REG_PTR /* *--(uint16_t*)ptr = c; */ 29237c9f0a6Schristos4: /* } */ 29337c9f0a6Schristos /* } */ 29437c9f0a6Schristos 29537c9f0a6Schristos mov REG_PTR,REG_LEN 29637c9f0a6Schristos bra aligned 29737c9f0a6Schristos sub REG_DST,REG_LEN 29837c9f0a6Schristos 299