1*b6cbf720SGianluca Guida/* $NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $ */ 2*b6cbf720SGianluca Guida 3*b6cbf720SGianluca Guida/*- 4*b6cbf720SGianluca Guida * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 5*b6cbf720SGianluca Guida * 6*b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without 7*b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions 8*b6cbf720SGianluca Guida * are met: 9*b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright 10*b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer. 11*b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright 12*b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer in the 13*b6cbf720SGianluca Guida * documentation and/or other materials provided with the distribution. 14*b6cbf720SGianluca Guida * 3. The name of the author may not be used to endorse or promote products 15*b6cbf720SGianluca Guida * derived from this software without specific prior written permission. 16*b6cbf720SGianluca Guida * 17*b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18*b6cbf720SGianluca Guida * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19*b6cbf720SGianluca Guida * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20*b6cbf720SGianluca Guida * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21*b6cbf720SGianluca Guida * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22*b6cbf720SGianluca Guida * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23*b6cbf720SGianluca Guida * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24*b6cbf720SGianluca Guida * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*b6cbf720SGianluca Guida * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26*b6cbf720SGianluca Guida * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*b6cbf720SGianluca Guida */ 28*b6cbf720SGianluca Guida 29*b6cbf720SGianluca Guida#include <machine/asm.h> 30*b6cbf720SGianluca Guida 31*b6cbf720SGianluca Guida#if defined(LIBC_SCCS) && !defined(lint) 32*b6cbf720SGianluca Guida RCSID("$NetBSD: memset.S,v 1.2 2008/02/16 17:37:13 apb Exp $") 33*b6cbf720SGianluca Guida#endif 34*b6cbf720SGianluca Guida 35*b6cbf720SGianluca Guida#define REG_PTR r0 36*b6cbf720SGianluca Guida#define REG_TMP1 r1 37*b6cbf720SGianluca Guida 38*b6cbf720SGianluca Guida#ifdef BZERO 39*b6cbf720SGianluca Guida# define REG_C r2 40*b6cbf720SGianluca Guida# define REG_DST r4 41*b6cbf720SGianluca Guida# define REG_LEN r5 42*b6cbf720SGianluca Guida#else 43*b6cbf720SGianluca Guida# define REG_DST0 r3 44*b6cbf720SGianluca Guida# define REG_DST r4 45*b6cbf720SGianluca Guida# define REG_C r5 46*b6cbf720SGianluca Guida# define REG_LEN r6 47*b6cbf720SGianluca Guida#endif 48*b6cbf720SGianluca Guida 49*b6cbf720SGianluca Guida#ifdef BZERO 50*b6cbf720SGianluca GuidaENTRY(bzero) 51*b6cbf720SGianluca Guida#else 52*b6cbf720SGianluca GuidaENTRY(memset) 53*b6cbf720SGianluca Guida mov REG_DST,REG_DST0 /* for return value */ 54*b6cbf720SGianluca Guida#endif 55*b6cbf720SGianluca Guida /* small amount to fill ? */ 56*b6cbf720SGianluca Guida mov #28,REG_TMP1 57*b6cbf720SGianluca Guida cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 58*b6cbf720SGianluca Guida bt/s large 59*b6cbf720SGianluca Guida mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 60*b6cbf720SGianluca Guida cmp/hs REG_TMP1,REG_LEN 61*b6cbf720SGianluca Guida bt/s small 62*b6cbf720SGianluca Guida#ifdef BZERO 63*b6cbf720SGianluca Guida mov #0,REG_C 64*b6cbf720SGianluca Guida#endif 65*b6cbf720SGianluca Guida /* very little fill (0 ~ 11 bytes) */ 66*b6cbf720SGianluca Guida tst REG_LEN,REG_LEN 67*b6cbf720SGianluca Guida add REG_DST,REG_LEN 68*b6cbf720SGianluca Guida bt/s done 69*b6cbf720SGianluca Guida add #1,REG_DST 70*b6cbf720SGianluca Guida 71*b6cbf720SGianluca Guida /* unroll 4 loops */ 72*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_LEN 73*b6cbf720SGianluca Guida1: mov.b REG_C,@-REG_LEN 74*b6cbf720SGianluca Guida bt/s done 75*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_LEN 76*b6cbf720SGianluca Guida mov.b REG_C,@-REG_LEN 77*b6cbf720SGianluca Guida bt/s done 78*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_LEN 79*b6cbf720SGianluca Guida mov.b REG_C,@-REG_LEN 80*b6cbf720SGianluca Guida bt/s done 81*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_LEN 82*b6cbf720SGianluca Guida mov.b REG_C,@-REG_LEN 83*b6cbf720SGianluca Guida bf/s 1b 84*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_LEN 85*b6cbf720SGianluca Guidadone: 86*b6cbf720SGianluca Guida#ifdef BZERO 87*b6cbf720SGianluca Guida rts 88*b6cbf720SGianluca Guida nop 89*b6cbf720SGianluca Guida#else 90*b6cbf720SGianluca Guida rts 91*b6cbf720SGianluca Guida mov REG_DST0,r0 92*b6cbf720SGianluca Guida#endif 93*b6cbf720SGianluca Guida 94*b6cbf720SGianluca Guida 95*b6cbf720SGianluca Guidasmall: 96*b6cbf720SGianluca Guida mov REG_DST,r0 97*b6cbf720SGianluca Guida tst #1,r0 98*b6cbf720SGianluca Guida bt/s small_aligned 99*b6cbf720SGianluca Guida mov REG_DST,REG_TMP1 100*b6cbf720SGianluca Guida shll REG_LEN 101*b6cbf720SGianluca Guida mova 1f,r0 /* 1f must be 4bytes aligned! */ 102*b6cbf720SGianluca Guida add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 103*b6cbf720SGianluca Guida sub REG_LEN,r0 104*b6cbf720SGianluca Guida jmp @r0 105*b6cbf720SGianluca Guida mov REG_C,r0 106*b6cbf720SGianluca Guida 107*b6cbf720SGianluca Guida .align 2 108*b6cbf720SGianluca Guida mov.b r0,@(15,REG_TMP1) 109*b6cbf720SGianluca Guida mov.b r0,@(14,REG_TMP1) 110*b6cbf720SGianluca Guida mov.b r0,@(13,REG_TMP1) 111*b6cbf720SGianluca Guida mov.b r0,@(12,REG_TMP1) 112*b6cbf720SGianluca Guida mov.b r0,@(11,REG_TMP1) 113*b6cbf720SGianluca Guida mov.b r0,@(10,REG_TMP1) 114*b6cbf720SGianluca Guida mov.b r0,@(9,REG_TMP1) 115*b6cbf720SGianluca Guida mov.b r0,@(8,REG_TMP1) 116*b6cbf720SGianluca Guida mov.b r0,@(7,REG_TMP1) 117*b6cbf720SGianluca Guida mov.b r0,@(6,REG_TMP1) 118*b6cbf720SGianluca Guida mov.b r0,@(5,REG_TMP1) 119*b6cbf720SGianluca Guida mov.b r0,@(4,REG_TMP1) 120*b6cbf720SGianluca Guida mov.b r0,@(3,REG_TMP1) 121*b6cbf720SGianluca Guida mov.b r0,@(2,REG_TMP1) 122*b6cbf720SGianluca Guida mov.b r0,@(1,REG_TMP1) 123*b6cbf720SGianluca Guida mov.b r0,@REG_TMP1 124*b6cbf720SGianluca Guida mov.b r0,@(15,REG_DST) 125*b6cbf720SGianluca Guida mov.b r0,@(14,REG_DST) 126*b6cbf720SGianluca Guida mov.b r0,@(13,REG_DST) 127*b6cbf720SGianluca Guida mov.b r0,@(12,REG_DST) 128*b6cbf720SGianluca Guida mov.b r0,@(11,REG_DST) 129*b6cbf720SGianluca Guida mov.b r0,@(10,REG_DST) 130*b6cbf720SGianluca Guida mov.b r0,@(9,REG_DST) 131*b6cbf720SGianluca Guida mov.b r0,@(8,REG_DST) 132*b6cbf720SGianluca Guida mov.b r0,@(7,REG_DST) 133*b6cbf720SGianluca Guida mov.b r0,@(6,REG_DST) 134*b6cbf720SGianluca Guida mov.b r0,@(5,REG_DST) 135*b6cbf720SGianluca Guida mov.b r0,@(4,REG_DST) 136*b6cbf720SGianluca Guida mov.b r0,@(3,REG_DST) 137*b6cbf720SGianluca Guida mov.b r0,@(2,REG_DST) 138*b6cbf720SGianluca Guida mov.b r0,@(1,REG_DST) 139*b6cbf720SGianluca Guida#ifdef BZERO 140*b6cbf720SGianluca Guida rts 141*b6cbf720SGianluca Guida1: mov.b r0,@REG_DST 142*b6cbf720SGianluca Guida#else 143*b6cbf720SGianluca Guida mov.b r0,@REG_DST 144*b6cbf720SGianluca Guida1: rts 145*b6cbf720SGianluca Guida mov REG_DST0,r0 146*b6cbf720SGianluca Guida#endif 147*b6cbf720SGianluca Guida 148*b6cbf720SGianluca Guida 149*b6cbf720SGianluca Guida/* 2 bytes aligned small fill */ 150*b6cbf720SGianluca Guidasmall_aligned: 151*b6cbf720SGianluca Guida#ifndef BZERO 152*b6cbf720SGianluca Guida extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 153*b6cbf720SGianluca Guida shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 154*b6cbf720SGianluca Guida or REG_TMP1,REG_C /* REG_C = ????xxxx */ 155*b6cbf720SGianluca Guida#endif 156*b6cbf720SGianluca Guida 157*b6cbf720SGianluca Guida mov REG_LEN,r0 158*b6cbf720SGianluca Guida tst #1,r0 /* len is aligned? */ 159*b6cbf720SGianluca Guida bt/s 1f 160*b6cbf720SGianluca Guida add #-1,r0 161*b6cbf720SGianluca Guida mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 162*b6cbf720SGianluca Guida mov r0,REG_LEN 163*b6cbf720SGianluca Guida1: 164*b6cbf720SGianluca Guida 165*b6cbf720SGianluca Guida mova 1f,r0 /* 1f must be 4bytes aligned! */ 166*b6cbf720SGianluca Guida sub REG_LEN,r0 167*b6cbf720SGianluca Guida jmp @r0 168*b6cbf720SGianluca Guida mov REG_C,r0 169*b6cbf720SGianluca Guida 170*b6cbf720SGianluca Guida .align 2 171*b6cbf720SGianluca Guida mov.w r0,@(30,REG_DST) 172*b6cbf720SGianluca Guida mov.w r0,@(28,REG_DST) 173*b6cbf720SGianluca Guida mov.w r0,@(26,REG_DST) 174*b6cbf720SGianluca Guida mov.w r0,@(24,REG_DST) 175*b6cbf720SGianluca Guida mov.w r0,@(22,REG_DST) 176*b6cbf720SGianluca Guida mov.w r0,@(20,REG_DST) 177*b6cbf720SGianluca Guida mov.w r0,@(18,REG_DST) 178*b6cbf720SGianluca Guida mov.w r0,@(16,REG_DST) 179*b6cbf720SGianluca Guida mov.w r0,@(14,REG_DST) 180*b6cbf720SGianluca Guida mov.w r0,@(12,REG_DST) 181*b6cbf720SGianluca Guida mov.w r0,@(10,REG_DST) 182*b6cbf720SGianluca Guida mov.w r0,@(8,REG_DST) 183*b6cbf720SGianluca Guida mov.w r0,@(6,REG_DST) 184*b6cbf720SGianluca Guida mov.w r0,@(4,REG_DST) 185*b6cbf720SGianluca Guida mov.w r0,@(2,REG_DST) 186*b6cbf720SGianluca Guida#ifdef BZERO 187*b6cbf720SGianluca Guida rts 188*b6cbf720SGianluca Guida1: mov.w r0,@REG_DST 189*b6cbf720SGianluca Guida#else 190*b6cbf720SGianluca Guida mov.w r0,@REG_DST 191*b6cbf720SGianluca Guida1: rts 192*b6cbf720SGianluca Guida mov REG_DST0,r0 193*b6cbf720SGianluca Guida#endif 194*b6cbf720SGianluca Guida 195*b6cbf720SGianluca Guida 196*b6cbf720SGianluca Guida 197*b6cbf720SGianluca Guida .align 2 198*b6cbf720SGianluca Guidalarge: 199*b6cbf720SGianluca Guida#ifdef BZERO 200*b6cbf720SGianluca Guida mov #0,REG_C 201*b6cbf720SGianluca Guida#else 202*b6cbf720SGianluca Guida extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 203*b6cbf720SGianluca Guida shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 204*b6cbf720SGianluca Guida or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 205*b6cbf720SGianluca Guida swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 206*b6cbf720SGianluca Guida xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 207*b6cbf720SGianluca Guida#endif 208*b6cbf720SGianluca Guida 209*b6cbf720SGianluca Guida mov #3,REG_TMP1 210*b6cbf720SGianluca Guida tst REG_TMP1,REG_DST 211*b6cbf720SGianluca Guida mov REG_DST,REG_PTR 212*b6cbf720SGianluca Guida bf/s unaligned_dst 213*b6cbf720SGianluca Guida add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 214*b6cbf720SGianluca Guida tst REG_TMP1,REG_LEN 215*b6cbf720SGianluca Guida bf/s unaligned_len 216*b6cbf720SGianluca Guida 217*b6cbf720SGianluca Guidaaligned: 218*b6cbf720SGianluca Guida /* fill 32*n bytes */ 219*b6cbf720SGianluca Guida mov #32,REG_TMP1 220*b6cbf720SGianluca Guida cmp/hi REG_LEN,REG_TMP1 221*b6cbf720SGianluca Guida bt 9f 222*b6cbf720SGianluca Guida .align 2 223*b6cbf720SGianluca Guida1: sub REG_TMP1,REG_PTR 224*b6cbf720SGianluca Guida mov.l REG_C,@REG_PTR 225*b6cbf720SGianluca Guida sub REG_TMP1,REG_LEN 226*b6cbf720SGianluca Guida mov.l REG_C,@(4,REG_PTR) 227*b6cbf720SGianluca Guida cmp/hi REG_LEN,REG_TMP1 228*b6cbf720SGianluca Guida mov.l REG_C,@(8,REG_PTR) 229*b6cbf720SGianluca Guida mov.l REG_C,@(12,REG_PTR) 230*b6cbf720SGianluca Guida mov.l REG_C,@(16,REG_PTR) 231*b6cbf720SGianluca Guida mov.l REG_C,@(20,REG_PTR) 232*b6cbf720SGianluca Guida mov.l REG_C,@(24,REG_PTR) 233*b6cbf720SGianluca Guida bf/s 1b 234*b6cbf720SGianluca Guida mov.l REG_C,@(28,REG_PTR) 235*b6cbf720SGianluca Guida9: 236*b6cbf720SGianluca Guida 237*b6cbf720SGianluca Guida /* fill left 4*n bytes */ 238*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_PTR 239*b6cbf720SGianluca Guida bt 9f 240*b6cbf720SGianluca Guida add #4,REG_DST 241*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_PTR 242*b6cbf720SGianluca Guida1: mov.l REG_C,@-REG_PTR 243*b6cbf720SGianluca Guida bt/s 9f 244*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_PTR 245*b6cbf720SGianluca Guida mov.l REG_C,@-REG_PTR 246*b6cbf720SGianluca Guida bt/s 9f 247*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_PTR 248*b6cbf720SGianluca Guida mov.l REG_C,@-REG_PTR 249*b6cbf720SGianluca Guida bt/s 9f 250*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_PTR 251*b6cbf720SGianluca Guida mov.l REG_C,@-REG_PTR 252*b6cbf720SGianluca Guida bf/s 1b 253*b6cbf720SGianluca Guida cmp/eq REG_DST,REG_PTR 254*b6cbf720SGianluca Guida9: 255*b6cbf720SGianluca Guida#ifdef BZERO 256*b6cbf720SGianluca Guida rts 257*b6cbf720SGianluca Guida nop 258*b6cbf720SGianluca Guida#else 259*b6cbf720SGianluca Guida rts 260*b6cbf720SGianluca Guida mov REG_DST0,r0 261*b6cbf720SGianluca Guida#endif 262*b6cbf720SGianluca Guida 263*b6cbf720SGianluca Guida 264*b6cbf720SGianluca Guidaunaligned_dst: 265*b6cbf720SGianluca Guida mov #1,REG_TMP1 266*b6cbf720SGianluca Guida tst REG_TMP1,REG_DST /* if (dst & 1) { */ 267*b6cbf720SGianluca Guida add #1,REG_TMP1 268*b6cbf720SGianluca Guida bt/s 2f 269*b6cbf720SGianluca Guida tst REG_TMP1,REG_DST 270*b6cbf720SGianluca Guida mov.b REG_C,@REG_DST /* *dst++ = c; */ 271*b6cbf720SGianluca Guida add #1,REG_DST 272*b6cbf720SGianluca Guida tst REG_TMP1,REG_DST 273*b6cbf720SGianluca Guida2: /* } */ 274*b6cbf720SGianluca Guida /* if (dst & 2) { */ 275*b6cbf720SGianluca Guida bt 4f 276*b6cbf720SGianluca Guida mov.w REG_C,@REG_DST /* *(uint16_t*)dst++ = c; */ 277*b6cbf720SGianluca Guida add #2,REG_DST 278*b6cbf720SGianluca Guida4: /* } */ 279*b6cbf720SGianluca Guida 280*b6cbf720SGianluca Guida 281*b6cbf720SGianluca Guida tst #3,REG_PTR /* if (ptr & 3) { */ 282*b6cbf720SGianluca Guida bt/s 4f /* */ 283*b6cbf720SGianluca Guidaunaligned_len: 284*b6cbf720SGianluca Guida tst #1,REG_PTR /* if (ptr & 1) { */ 285*b6cbf720SGianluca Guida bt/s 2f 286*b6cbf720SGianluca Guida tst #2,REG_PTR 287*b6cbf720SGianluca Guida mov.b REG_C,@-REG_PTR /* --ptr = c; */ 288*b6cbf720SGianluca Guida2: /* } */ 289*b6cbf720SGianluca Guida /* if (ptr & 2) { */ 290*b6cbf720SGianluca Guida bt 4f 291*b6cbf720SGianluca Guida mov.w REG_C,@-REG_PTR /* *--(uint16_t*)ptr = c; */ 292*b6cbf720SGianluca Guida4: /* } */ 293*b6cbf720SGianluca Guida /* } */ 294*b6cbf720SGianluca Guida 295*b6cbf720SGianluca Guida mov REG_PTR,REG_LEN 296*b6cbf720SGianluca Guida bra aligned 297*b6cbf720SGianluca Guida sub REG_DST,REG_LEN 298*b6cbf720SGianluca Guida 299