1*84d9c625SLionel Sambuc/* $NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $ */ 2*84d9c625SLionel Sambuc 3*84d9c625SLionel Sambuc/* 4*84d9c625SLionel Sambuc * Copyright (c) 1996-2002 Eduardo Horvath 5*84d9c625SLionel Sambuc * All rights reserved. 6*84d9c625SLionel Sambuc * 7*84d9c625SLionel Sambuc * Redistribution and use in source and binary forms, with or without 8*84d9c625SLionel Sambuc * modification, are permitted provided that the following conditions 9*84d9c625SLionel Sambuc * are met: 10*84d9c625SLionel Sambuc * 1. Redistributions of source code must retain the above copyright 11*84d9c625SLionel Sambuc * notice, this list of conditions and the following disclaimer. 12*84d9c625SLionel Sambuc * 13*84d9c625SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 14*84d9c625SLionel Sambuc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15*84d9c625SLionel Sambuc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16*84d9c625SLionel Sambuc * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 17*84d9c625SLionel Sambuc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18*84d9c625SLionel Sambuc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19*84d9c625SLionel Sambuc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20*84d9c625SLionel Sambuc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21*84d9c625SLionel Sambuc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22*84d9c625SLionel Sambuc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23*84d9c625SLionel Sambuc * SUCH DAMAGE. 24*84d9c625SLionel Sambuc * 25*84d9c625SLionel Sambuc */ 26*84d9c625SLionel Sambuc#include "strmacros.h" 27*84d9c625SLionel Sambuc#if defined(LIBC_SCCS) && !defined(lint) 28*84d9c625SLionel SambucRCSID("$NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $") 29*84d9c625SLionel Sambuc#endif /* LIBC_SCCS and not lint */ 30*84d9c625SLionel Sambuc 31*84d9c625SLionel Sambuc 32*84d9c625SLionel Sambuc/* 33*84d9c625SLionel Sambuc * XXXXXXXXXXXXXXXXXXXX 34*84d9c625SLionel Sambuc * We need to make sure that this doesn't use floating point 35*84d9c625SLionel Sambuc * before our trap handlers are installed or we could panic 36*84d9c625SLionel Sambuc * XXXXXXXXXXXXXXXXXXXX 37*84d9c625SLionel Sambuc */ 38*84d9c625SLionel Sambuc/* 39*84d9c625SLionel Sambuc * memset(addr, c, len) 40*84d9c625SLionel Sambuc * 41*84d9c625SLionel Sambuc * We want to use VIS instructions if we're clearing out more than 42*84d9c625SLionel Sambuc * 256 bytes, but to do that we need to properly save and restore the 43*84d9c625SLionel Sambuc * FP registers. Unfortunately the code to do that in the kernel needs 44*84d9c625SLionel Sambuc * to keep track of the current owner of the FPU, hence the different 45*84d9c625SLionel Sambuc * code. 46*84d9c625SLionel Sambuc * 47*84d9c625SLionel Sambuc * XXXXX To produce more efficient code, we do not allow lengths 48*84d9c625SLionel Sambuc * greater than 0x80000000000000000, which are negative numbers. 49*84d9c625SLionel Sambuc * This should not really be an issue since the VA hole should 50*84d9c625SLionel Sambuc * cause any such ranges to fail anyway. 51*84d9c625SLionel Sambuc */ 52*84d9c625SLionel Sambuc#if !defined(_KERNEL) || defined(_RUMPKERNEL) 53*84d9c625SLionel SambucENTRY(bzero) 54*84d9c625SLionel Sambuc ! %o0 = addr, %o1 = len 55*84d9c625SLionel Sambuc mov %o1, %o2 56*84d9c625SLionel Sambuc clr %o1 ! ser pattern 57*84d9c625SLionel Sambuc#endif 58*84d9c625SLionel SambucENTRY(memset) 59*84d9c625SLionel Sambuc ! %o0 = addr, %o1 = pattern, %o2 = len 60*84d9c625SLionel Sambuc mov %o0, %o4 ! Save original pointer 61*84d9c625SLionel Sambuc 62*84d9c625SLionel SambucLmemset_internal: 63*84d9c625SLionel Sambuc btst 7, %o0 ! Word aligned? 64*84d9c625SLionel Sambuc bz,pn %xcc, 0f 65*84d9c625SLionel Sambuc nop 66*84d9c625SLionel Sambuc inc %o0 67*84d9c625SLionel Sambuc deccc %o2 ! Store up to 7 bytes 68*84d9c625SLionel Sambuc bge,a,pt CCCR, Lmemset_internal 69*84d9c625SLionel Sambuc stb %o1, [%o0 - 1] 70*84d9c625SLionel Sambuc 71*84d9c625SLionel Sambuc retl ! Duplicate Lmemset_done 72*84d9c625SLionel Sambuc mov %o4, %o0 73*84d9c625SLionel Sambuc0: 74*84d9c625SLionel Sambuc /* 75*84d9c625SLionel Sambuc * Duplicate the pattern so it fills 64-bits. 76*84d9c625SLionel Sambuc */ 77*84d9c625SLionel Sambuc andcc %o1, 0x0ff, %o1 ! No need to extend zero 78*84d9c625SLionel Sambuc bz,pt %icc, 1f 79*84d9c625SLionel Sambuc sllx %o1, 8, %o3 ! sigh. all dependent insns. 80*84d9c625SLionel Sambuc or %o1, %o3, %o1 81*84d9c625SLionel Sambuc sllx %o1, 16, %o3 82*84d9c625SLionel Sambuc or %o1, %o3, %o1 83*84d9c625SLionel Sambuc sllx %o1, 32, %o3 84*84d9c625SLionel Sambuc or %o1, %o3, %o1 85*84d9c625SLionel Sambuc1: 86*84d9c625SLionel Sambuc#ifdef USE_BLOCK_STORE_LOAD 87*84d9c625SLionel Sambuc !! Now we are 64-bit aligned 88*84d9c625SLionel Sambuc cmp %o2, 256 ! Use block clear if len > 256 89*84d9c625SLionel Sambuc bge,pt CCCR, Lmemset_block ! use block store insns 90*84d9c625SLionel Sambuc#endif /* USE_BLOCK_STORE_LOAD */ 91*84d9c625SLionel Sambuc deccc 8, %o2 92*84d9c625SLionel SambucLmemset_longs: 93*84d9c625SLionel Sambuc bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left 94*84d9c625SLionel Sambuc nop 95*84d9c625SLionel Sambuc3: 96*84d9c625SLionel Sambuc inc 8, %o0 97*84d9c625SLionel Sambuc deccc 8, %o2 98*84d9c625SLionel Sambuc bge,pt CCCR, 3b 99*84d9c625SLionel Sambuc stx %o1, [%o0 - 8] ! Do 1 longword at a time 100*84d9c625SLionel Sambuc 101*84d9c625SLionel Sambuc /* 102*84d9c625SLionel Sambuc * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero, 103*84d9c625SLionel Sambuc * -6 => two bytes, etc. Mop up this remainder, if any. 104*84d9c625SLionel Sambuc */ 105*84d9c625SLionel SambucLmemset_cleanup: 106*84d9c625SLionel Sambuc btst 4, %o2 107*84d9c625SLionel Sambuc bz,pt CCCR, 5f ! if (len & 4) { 108*84d9c625SLionel Sambuc nop 109*84d9c625SLionel Sambuc stw %o1, [%o0] ! *(int *)addr = 0; 110*84d9c625SLionel Sambuc inc 4, %o0 ! addr += 4; 111*84d9c625SLionel Sambuc5: 112*84d9c625SLionel Sambuc btst 2, %o2 113*84d9c625SLionel Sambuc bz,pt CCCR, 7f ! if (len & 2) { 114*84d9c625SLionel Sambuc nop 115*84d9c625SLionel Sambuc sth %o1, [%o0] ! *(short *)addr = 0; 116*84d9c625SLionel Sambuc inc 2, %o0 ! addr += 2; 117*84d9c625SLionel Sambuc7: 118*84d9c625SLionel Sambuc btst 1, %o2 119*84d9c625SLionel Sambuc bnz,a %icc, Lmemset_done ! if (len & 1) 120*84d9c625SLionel Sambuc stb %o1, [%o0] ! *addr = 0; 121*84d9c625SLionel SambucLmemset_done: 122*84d9c625SLionel Sambuc retl 123*84d9c625SLionel Sambuc mov %o4, %o0 ! Restore ponter for memset (ugh) 124*84d9c625SLionel Sambuc 125*84d9c625SLionel Sambuc#ifdef USE_BLOCK_STORE_LOAD 126*84d9c625SLionel SambucLmemset_block: 127*84d9c625SLionel Sambuc sethi %hi(block_disable), %o3 128*84d9c625SLionel Sambuc ldx [ %o3 + %lo(block_disable) ], %o3 129*84d9c625SLionel Sambuc brnz,pn %o3, Lmemset_longs 130*84d9c625SLionel Sambuc !! Make sure our trap table is installed 131*84d9c625SLionel Sambuc set _C_LABEL(trapbase), %o5 132*84d9c625SLionel Sambuc rdpr %tba, %o3 133*84d9c625SLionel Sambuc sub %o3, %o5, %o3 134*84d9c625SLionel Sambuc brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store 135*84d9c625SLionel Sambuc nop 136*84d9c625SLionel Sambuc/* 137*84d9c625SLionel Sambuc * Kernel: 138*84d9c625SLionel Sambuc * 139*84d9c625SLionel Sambuc * Here we use VIS instructions to do a block clear of a page. 140*84d9c625SLionel Sambuc * But before we can do that we need to save and enable the FPU. 141*84d9c625SLionel Sambuc * The last owner of the FPU registers is fplwp, and 142*84d9c625SLionel Sambuc * fplwp->l_md.md_fpstate is the current fpstate. If that's not 143*84d9c625SLionel Sambuc * null, call savefpstate() with it to store our current fp state. 144*84d9c625SLionel Sambuc * 145*84d9c625SLionel Sambuc * Next, allocate an aligned fpstate on the stack. We will properly 146*84d9c625SLionel Sambuc * nest calls on a particular stack so this should not be a problem. 147*84d9c625SLionel Sambuc * 148*84d9c625SLionel Sambuc * Now we grab either curlwp (or if we're on the interrupt stack 149*84d9c625SLionel Sambuc * lwp0). We stash its existing fpstate in a local register and 150*84d9c625SLionel Sambuc * put our new fpstate in curlwp->p_md.md_fpstate. We point 151*84d9c625SLionel Sambuc * fplwp at curlwp (or lwp0) and enable the FPU. 152*84d9c625SLionel Sambuc * 153*84d9c625SLionel Sambuc * If we are ever preempted, our FPU state will be saved in our 154*84d9c625SLionel Sambuc * fpstate. Then, when we're resumed and we take an FPDISABLED 155*84d9c625SLionel Sambuc * trap, the trap handler will be able to fish our FPU state out 156*84d9c625SLionel Sambuc * of curlwp (or lwp0). 157*84d9c625SLionel Sambuc * 158*84d9c625SLionel Sambuc * On exiting this routine we undo the damage: restore the original 159*84d9c625SLionel Sambuc * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable 160*84d9c625SLionel Sambuc * the MMU. 161*84d9c625SLionel Sambuc * 162*84d9c625SLionel Sambuc */ 163*84d9c625SLionel Sambuc 164*84d9c625SLionel Sambuc ENABLE_FPU(0) 165*84d9c625SLionel Sambuc 166*84d9c625SLionel Sambuc !! We are now 8-byte aligned. We need to become 64-byte aligned. 167*84d9c625SLionel Sambuc btst 63, %i0 168*84d9c625SLionel Sambuc bz,pt CCCR, 2f 169*84d9c625SLionel Sambuc nop 170*84d9c625SLionel Sambuc1: 171*84d9c625SLionel Sambuc stx %i1, [%i0] 172*84d9c625SLionel Sambuc inc 8, %i0 173*84d9c625SLionel Sambuc btst 63, %i0 174*84d9c625SLionel Sambuc bnz,pt %xcc, 1b 175*84d9c625SLionel Sambuc dec 8, %i2 176*84d9c625SLionel Sambuc 177*84d9c625SLionel Sambuc2: 178*84d9c625SLionel Sambuc brz %i1, 3f ! Skip the memory op 179*84d9c625SLionel Sambuc fzero %f0 ! if pattern is 0 180*84d9c625SLionel Sambuc 181*84d9c625SLionel Sambuc#ifdef _LP64 182*84d9c625SLionel Sambuc stx %i1, [%i0] ! Flush this puppy to RAM 183*84d9c625SLionel Sambuc membar #StoreLoad 184*84d9c625SLionel Sambuc ldd [%i0], %f0 185*84d9c625SLionel Sambuc#else 186*84d9c625SLionel Sambuc stw %i1, [%i0] ! Flush this puppy to RAM 187*84d9c625SLionel Sambuc membar #StoreLoad 188*84d9c625SLionel Sambuc ld [%i0], %f0 189*84d9c625SLionel Sambuc fmovsa %icc, %f0, %f1 190*84d9c625SLionel Sambuc#endif 191*84d9c625SLionel Sambuc 192*84d9c625SLionel Sambuc3: 193*84d9c625SLionel Sambuc fmovd %f0, %f2 ! Duplicate the pattern 194*84d9c625SLionel Sambuc fmovd %f0, %f4 195*84d9c625SLionel Sambuc fmovd %f0, %f6 196*84d9c625SLionel Sambuc fmovd %f0, %f8 197*84d9c625SLionel Sambuc fmovd %f0, %f10 198*84d9c625SLionel Sambuc fmovd %f0, %f12 199*84d9c625SLionel Sambuc fmovd %f0, %f14 200*84d9c625SLionel Sambuc 201*84d9c625SLionel Sambuc !! Remember: we were 8 bytes too far 202*84d9c625SLionel Sambuc dec 56, %i2 ! Go one iteration too far 203*84d9c625SLionel Sambuc5: 204*84d9c625SLionel Sambuc stda %f0, [%i0] ASI_STORE ! Store 64 bytes 205*84d9c625SLionel Sambuc deccc BLOCK_SIZE, %i2 206*84d9c625SLionel Sambuc bg,pt %icc, 5b 207*84d9c625SLionel Sambuc inc BLOCK_SIZE, %i0 208*84d9c625SLionel Sambuc 209*84d9c625SLionel Sambuc membar #Sync 210*84d9c625SLionel Sambuc/* 211*84d9c625SLionel Sambuc * We've saved our possible fpstate, now disable the fpu 212*84d9c625SLionel Sambuc * and continue with life. 213*84d9c625SLionel Sambuc */ 214*84d9c625SLionel Sambuc RESTORE_FPU 215*84d9c625SLionel Sambuc addcc %i2, 56, %i2 ! Restore the count 216*84d9c625SLionel Sambuc ba,pt %xcc, Lmemset_longs ! Finish up the remainder 217*84d9c625SLionel Sambuc restore 218*84d9c625SLionel Sambuc#endif /* USE_BLOCK_STORE_LOAD */ 219