1*b12d7c48Smickey/* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ 2*b12d7c48Smickey 3*b12d7c48Smickey/*- 4*b12d7c48Smickey * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 5*b12d7c48Smickey * 6*b12d7c48Smickey * Redistribution and use in source and binary forms, with or without 7*b12d7c48Smickey * modification, are permitted provided that the following conditions 8*b12d7c48Smickey * are met: 9*b12d7c48Smickey * 1. Redistributions of source code must retain the above copyright 10*b12d7c48Smickey * notice, this list of conditions and the following disclaimer. 11*b12d7c48Smickey * 2. Redistributions in binary form must reproduce the above copyright 12*b12d7c48Smickey * notice, this list of conditions and the following disclaimer in the 13*b12d7c48Smickey * documentation and/or other materials provided with the distribution. 14*b12d7c48Smickey * 3. The name of the author may not be used to endorse or promote products 15*b12d7c48Smickey * derived from this software without specific prior written permission. 16*b12d7c48Smickey * 17*b12d7c48Smickey * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18*b12d7c48Smickey * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19*b12d7c48Smickey * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20*b12d7c48Smickey * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21*b12d7c48Smickey * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22*b12d7c48Smickey * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23*b12d7c48Smickey * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24*b12d7c48Smickey * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*b12d7c48Smickey * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26*b12d7c48Smickey * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*b12d7c48Smickey */ 28*b12d7c48Smickey 29*b12d7c48Smickey#include <machine/asm.h> 30*b12d7c48Smickey 31*b12d7c48Smickey#define REG_PTR r0 32*b12d7c48Smickey#define REG_TMP1 r1 33*b12d7c48Smickey 34*b12d7c48Smickey#ifdef BZERO 35*b12d7c48Smickey# define REG_C r2 36*b12d7c48Smickey# define REG_DST r4 37*b12d7c48Smickey# define REG_LEN r5 38*b12d7c48Smickey#else 39*b12d7c48Smickey# define REG_DST0 r3 40*b12d7c48Smickey# define REG_DST r4 41*b12d7c48Smickey# define REG_C r5 42*b12d7c48Smickey# define REG_LEN r6 43*b12d7c48Smickey#endif 44*b12d7c48Smickey 45*b12d7c48Smickey#ifdef BZERO 46*b12d7c48SmickeyENTRY(bzero) 47*b12d7c48Smickey#else 48*b12d7c48SmickeyENTRY(memset) 49*b12d7c48Smickey mov REG_DST,REG_DST0 /* for return value */ 50*b12d7c48Smickey#endif 51*b12d7c48Smickey /* small amount to fill ? */ 52*b12d7c48Smickey mov #28,REG_TMP1 53*b12d7c48Smickey cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 54*b12d7c48Smickey bt/s large 55*b12d7c48Smickey mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 56*b12d7c48Smickey cmp/hs REG_TMP1,REG_LEN 57*b12d7c48Smickey bt/s small 58*b12d7c48Smickey#ifdef BZERO 59*b12d7c48Smickey mov #0,REG_C 60*b12d7c48Smickey#endif 61*b12d7c48Smickey /* very little fill (0 ~ 11 bytes) */ 62*b12d7c48Smickey tst REG_LEN,REG_LEN 63*b12d7c48Smickey add REG_DST,REG_LEN 64*b12d7c48Smickey bt/s done 65*b12d7c48Smickey add #1,REG_DST 66*b12d7c48Smickey 67*b12d7c48Smickey /* unroll 4 loops */ 68*b12d7c48Smickey cmp/eq REG_DST,REG_LEN 69*b12d7c48Smickey1: mov.b REG_C,@-REG_LEN 70*b12d7c48Smickey bt/s done 71*b12d7c48Smickey cmp/eq REG_DST,REG_LEN 72*b12d7c48Smickey mov.b REG_C,@-REG_LEN 73*b12d7c48Smickey bt/s done 74*b12d7c48Smickey cmp/eq REG_DST,REG_LEN 75*b12d7c48Smickey mov.b REG_C,@-REG_LEN 76*b12d7c48Smickey bt/s done 77*b12d7c48Smickey cmp/eq REG_DST,REG_LEN 78*b12d7c48Smickey mov.b REG_C,@-REG_LEN 79*b12d7c48Smickey bf/s 1b 80*b12d7c48Smickey cmp/eq REG_DST,REG_LEN 81*b12d7c48Smickeydone: 82*b12d7c48Smickey#ifdef BZERO 83*b12d7c48Smickey rts 84*b12d7c48Smickey nop 85*b12d7c48Smickey#else 86*b12d7c48Smickey rts 87*b12d7c48Smickey mov REG_DST0,r0 88*b12d7c48Smickey#endif 89*b12d7c48Smickey 90*b12d7c48Smickey 91*b12d7c48Smickeysmall: 92*b12d7c48Smickey mov REG_DST,r0 93*b12d7c48Smickey tst #1,r0 94*b12d7c48Smickey bt/s small_aligned 95*b12d7c48Smickey mov REG_DST,REG_TMP1 96*b12d7c48Smickey shll REG_LEN 97*b12d7c48Smickey mova 1f,r0 /* 1f must be 4bytes aligned! */ 98*b12d7c48Smickey add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 99*b12d7c48Smickey sub REG_LEN,r0 100*b12d7c48Smickey jmp @r0 101*b12d7c48Smickey mov REG_C,r0 102*b12d7c48Smickey 103*b12d7c48Smickey .align 2 104*b12d7c48Smickey mov.b r0,@(15,REG_TMP1) 105*b12d7c48Smickey mov.b r0,@(14,REG_TMP1) 106*b12d7c48Smickey mov.b r0,@(13,REG_TMP1) 107*b12d7c48Smickey mov.b r0,@(12,REG_TMP1) 108*b12d7c48Smickey mov.b r0,@(11,REG_TMP1) 109*b12d7c48Smickey mov.b r0,@(10,REG_TMP1) 110*b12d7c48Smickey mov.b r0,@(9,REG_TMP1) 111*b12d7c48Smickey mov.b r0,@(8,REG_TMP1) 112*b12d7c48Smickey mov.b r0,@(7,REG_TMP1) 113*b12d7c48Smickey mov.b r0,@(6,REG_TMP1) 114*b12d7c48Smickey mov.b r0,@(5,REG_TMP1) 115*b12d7c48Smickey mov.b r0,@(4,REG_TMP1) 116*b12d7c48Smickey mov.b r0,@(3,REG_TMP1) 117*b12d7c48Smickey mov.b r0,@(2,REG_TMP1) 118*b12d7c48Smickey mov.b r0,@(1,REG_TMP1) 119*b12d7c48Smickey mov.b r0,@REG_TMP1 120*b12d7c48Smickey mov.b r0,@(15,REG_DST) 121*b12d7c48Smickey mov.b r0,@(14,REG_DST) 122*b12d7c48Smickey mov.b r0,@(13,REG_DST) 123*b12d7c48Smickey mov.b r0,@(12,REG_DST) 124*b12d7c48Smickey mov.b r0,@(11,REG_DST) 125*b12d7c48Smickey mov.b r0,@(10,REG_DST) 126*b12d7c48Smickey mov.b r0,@(9,REG_DST) 127*b12d7c48Smickey mov.b r0,@(8,REG_DST) 128*b12d7c48Smickey mov.b r0,@(7,REG_DST) 129*b12d7c48Smickey mov.b r0,@(6,REG_DST) 130*b12d7c48Smickey mov.b r0,@(5,REG_DST) 131*b12d7c48Smickey mov.b r0,@(4,REG_DST) 132*b12d7c48Smickey mov.b r0,@(3,REG_DST) 133*b12d7c48Smickey mov.b r0,@(2,REG_DST) 134*b12d7c48Smickey mov.b r0,@(1,REG_DST) 135*b12d7c48Smickey#ifdef BZERO 136*b12d7c48Smickey rts 137*b12d7c48Smickey1: mov.b r0,@REG_DST 138*b12d7c48Smickey#else 139*b12d7c48Smickey mov.b r0,@REG_DST 140*b12d7c48Smickey1: rts 141*b12d7c48Smickey mov REG_DST0,r0 142*b12d7c48Smickey#endif 143*b12d7c48Smickey 144*b12d7c48Smickey 145*b12d7c48Smickey/* 2 bytes aligned small fill */ 146*b12d7c48Smickeysmall_aligned: 147*b12d7c48Smickey#ifndef BZERO 148*b12d7c48Smickey extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 149*b12d7c48Smickey shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 150*b12d7c48Smickey or REG_TMP1,REG_C /* REG_C = ????xxxx */ 151*b12d7c48Smickey#endif 152*b12d7c48Smickey 153*b12d7c48Smickey mov REG_LEN,r0 154*b12d7c48Smickey tst #1,r0 /* len is aligned? */ 155*b12d7c48Smickey bt/s 1f 156*b12d7c48Smickey add #-1,r0 157*b12d7c48Smickey mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 158*b12d7c48Smickey mov r0,REG_LEN 159*b12d7c48Smickey1: 160*b12d7c48Smickey 161*b12d7c48Smickey mova 1f,r0 /* 1f must be 4bytes aligned! */ 162*b12d7c48Smickey sub REG_LEN,r0 163*b12d7c48Smickey jmp @r0 164*b12d7c48Smickey mov REG_C,r0 165*b12d7c48Smickey 166*b12d7c48Smickey .align 2 167*b12d7c48Smickey mov.w r0,@(30,REG_DST) 168*b12d7c48Smickey mov.w r0,@(28,REG_DST) 169*b12d7c48Smickey mov.w r0,@(26,REG_DST) 170*b12d7c48Smickey mov.w r0,@(24,REG_DST) 171*b12d7c48Smickey mov.w r0,@(22,REG_DST) 172*b12d7c48Smickey mov.w r0,@(20,REG_DST) 173*b12d7c48Smickey mov.w r0,@(18,REG_DST) 174*b12d7c48Smickey mov.w r0,@(16,REG_DST) 175*b12d7c48Smickey mov.w r0,@(14,REG_DST) 176*b12d7c48Smickey mov.w r0,@(12,REG_DST) 177*b12d7c48Smickey mov.w r0,@(10,REG_DST) 178*b12d7c48Smickey mov.w r0,@(8,REG_DST) 179*b12d7c48Smickey mov.w r0,@(6,REG_DST) 180*b12d7c48Smickey mov.w r0,@(4,REG_DST) 181*b12d7c48Smickey mov.w r0,@(2,REG_DST) 182*b12d7c48Smickey#ifdef BZERO 183*b12d7c48Smickey rts 184*b12d7c48Smickey1: mov.w r0,@REG_DST 185*b12d7c48Smickey#else 186*b12d7c48Smickey mov.w r0,@REG_DST 187*b12d7c48Smickey1: rts 188*b12d7c48Smickey mov REG_DST0,r0 189*b12d7c48Smickey#endif 190*b12d7c48Smickey 191*b12d7c48Smickey 192*b12d7c48Smickey 193*b12d7c48Smickey .align 2 194*b12d7c48Smickeylarge: 195*b12d7c48Smickey#ifdef BZERO 196*b12d7c48Smickey mov #0,REG_C 197*b12d7c48Smickey#else 198*b12d7c48Smickey extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 199*b12d7c48Smickey shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 200*b12d7c48Smickey or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 201*b12d7c48Smickey swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 202*b12d7c48Smickey xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 203*b12d7c48Smickey#endif 204*b12d7c48Smickey 205*b12d7c48Smickey mov #3,REG_TMP1 206*b12d7c48Smickey tst REG_TMP1,REG_DST 207*b12d7c48Smickey mov REG_DST,REG_PTR 208*b12d7c48Smickey bf/s unaligned_dst 209*b12d7c48Smickey add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 210*b12d7c48Smickey tst REG_TMP1,REG_LEN 211*b12d7c48Smickey bf/s unaligned_len 212*b12d7c48Smickey 213*b12d7c48Smickeyaligned: 214*b12d7c48Smickey /* fill 32*n bytes */ 215*b12d7c48Smickey mov #32,REG_TMP1 216*b12d7c48Smickey cmp/hi REG_LEN,REG_TMP1 217*b12d7c48Smickey bt 9f 218*b12d7c48Smickey .align 2 219*b12d7c48Smickey1: sub REG_TMP1,REG_PTR 220*b12d7c48Smickey mov.l REG_C,@REG_PTR 221*b12d7c48Smickey sub REG_TMP1,REG_LEN 222*b12d7c48Smickey mov.l REG_C,@(4,REG_PTR) 223*b12d7c48Smickey cmp/hi REG_LEN,REG_TMP1 224*b12d7c48Smickey mov.l REG_C,@(8,REG_PTR) 225*b12d7c48Smickey mov.l REG_C,@(12,REG_PTR) 226*b12d7c48Smickey mov.l REG_C,@(16,REG_PTR) 227*b12d7c48Smickey mov.l REG_C,@(20,REG_PTR) 228*b12d7c48Smickey mov.l REG_C,@(24,REG_PTR) 229*b12d7c48Smickey bf/s 1b 230*b12d7c48Smickey mov.l REG_C,@(28,REG_PTR) 231*b12d7c48Smickey9: 232*b12d7c48Smickey 233*b12d7c48Smickey /* fill left 4*n bytes */ 234*b12d7c48Smickey cmp/eq REG_DST,REG_PTR 235*b12d7c48Smickey bt 9f 236*b12d7c48Smickey add #4,REG_DST 237*b12d7c48Smickey cmp/eq REG_DST,REG_PTR 238*b12d7c48Smickey1: mov.l REG_C,@-REG_PTR 239*b12d7c48Smickey bt/s 9f 240*b12d7c48Smickey cmp/eq REG_DST,REG_PTR 241*b12d7c48Smickey mov.l REG_C,@-REG_PTR 242*b12d7c48Smickey bt/s 9f 243*b12d7c48Smickey cmp/eq REG_DST,REG_PTR 244*b12d7c48Smickey mov.l REG_C,@-REG_PTR 245*b12d7c48Smickey bt/s 9f 246*b12d7c48Smickey cmp/eq REG_DST,REG_PTR 247*b12d7c48Smickey mov.l REG_C,@-REG_PTR 248*b12d7c48Smickey bf/s 1b 249*b12d7c48Smickey cmp/eq REG_DST,REG_PTR 250*b12d7c48Smickey9: 251*b12d7c48Smickey#ifdef BZERO 252*b12d7c48Smickey rts 253*b12d7c48Smickey nop 254*b12d7c48Smickey#else 255*b12d7c48Smickey rts 256*b12d7c48Smickey mov REG_DST0,r0 257*b12d7c48Smickey#endif 258*b12d7c48Smickey 259*b12d7c48Smickey 260*b12d7c48Smickeyunaligned_dst: 261*b12d7c48Smickey mov #1,REG_TMP1 262*b12d7c48Smickey tst REG_TMP1,REG_DST /* if (dst & 1) { */ 263*b12d7c48Smickey add #1,REG_TMP1 264*b12d7c48Smickey bt/s 2f 265*b12d7c48Smickey tst REG_TMP1,REG_DST 266*b12d7c48Smickey mov.b REG_C,@REG_DST /* *dst++ = c; */ 267*b12d7c48Smickey add #1,REG_DST 268*b12d7c48Smickey tst REG_TMP1,REG_DST 269*b12d7c48Smickey2: /* } */ 270*b12d7c48Smickey /* if (dst & 2) { */ 271*b12d7c48Smickey bt 4f 272*b12d7c48Smickey mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */ 273*b12d7c48Smickey add #2,REG_DST 274*b12d7c48Smickey4: /* } */ 275*b12d7c48Smickey 276*b12d7c48Smickey 277*b12d7c48Smickey tst #3,REG_PTR /* if (ptr & 3) { */ 278*b12d7c48Smickey bt/s 4f /* */ 279*b12d7c48Smickeyunaligned_len: 280*b12d7c48Smickey tst #1,REG_PTR /* if (ptr & 1) { */ 281*b12d7c48Smickey bt/s 2f 282*b12d7c48Smickey tst #2,REG_PTR 283*b12d7c48Smickey mov.b REG_C,@-REG_PTR /* --ptr = c; */ 284*b12d7c48Smickey2: /* } */ 285*b12d7c48Smickey /* if (ptr & 2) { */ 286*b12d7c48Smickey bt 4f 287*b12d7c48Smickey mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */ 288*b12d7c48Smickey4: /* } */ 289*b12d7c48Smickey /* } */ 290*b12d7c48Smickey 291*b12d7c48Smickey mov REG_PTR,REG_LEN 292*b12d7c48Smickey bra aligned 293*b12d7c48Smickey sub REG_DST,REG_LEN 294*b12d7c48Smickey 295