1*ebbc7028Sandvar/* $NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $ */ 2ca8f29b6Schristos 3ca8f29b6Schristos/* 4ca8f29b6Schristos * Copyright (c) 1996-2002 Eduardo Horvath 5ca8f29b6Schristos * All rights reserved. 6ca8f29b6Schristos * 7ca8f29b6Schristos * Redistribution and use in source and binary forms, with or without 8ca8f29b6Schristos * modification, are permitted provided that the following conditions 9ca8f29b6Schristos * are met: 10ca8f29b6Schristos * 1. Redistributions of source code must retain the above copyright 11ca8f29b6Schristos * notice, this list of conditions and the following disclaimer. 12ca8f29b6Schristos * 13ca8f29b6Schristos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 14ca8f29b6Schristos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15ca8f29b6Schristos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16ca8f29b6Schristos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 17ca8f29b6Schristos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18ca8f29b6Schristos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19ca8f29b6Schristos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20ca8f29b6Schristos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21ca8f29b6Schristos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22ca8f29b6Schristos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23ca8f29b6Schristos * SUCH DAMAGE. 24ca8f29b6Schristos * 25ca8f29b6Schristos */ 26ca8f29b6Schristos#include "strmacros.h" 2712ea7fb3Schristos#if defined(LIBC_SCCS) && !defined(lint) 28*ebbc7028SandvarRCSID("$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $") 2912ea7fb3Schristos#endif /* LIBC_SCCS and not lint */ 3012ea7fb3Schristos 31ca8f29b6Schristos 32ca8f29b6Schristos/* 33ca8f29b6Schristos * XXXXXXXXXXXXXXXXXXXX 34ca8f29b6Schristos * We need to make sure that this doesn't use floating point 35ca8f29b6Schristos * before our trap handlers are installed or we could panic 36ca8f29b6Schristos * XXXXXXXXXXXXXXXXXXXX 37ca8f29b6Schristos */ 38ca8f29b6Schristos/* 39ca8f29b6Schristos * memset(addr, c, len) 40ca8f29b6Schristos * 41ca8f29b6Schristos * We want to use VIS instructions if we're clearing out more than 42ca8f29b6Schristos * 256 bytes, but to do that we need to properly save and restore the 43ca8f29b6Schristos * FP registers. Unfortunately the code to do that in the kernel needs 44ca8f29b6Schristos * to keep track of the current owner of the FPU, hence the different 45ca8f29b6Schristos * code. 46ca8f29b6Schristos * 47ca8f29b6Schristos * XXXXX To produce more efficient code, we do not allow lengths 48ca8f29b6Schristos * greater than 0x80000000000000000, which are negative numbers. 49ca8f29b6Schristos * This should not really be an issue since the VA hole should 50ca8f29b6Schristos * cause any such ranges to fail anyway. 51ca8f29b6Schristos */ 52ca8f29b6Schristos#if !defined(_KERNEL) || defined(_RUMPKERNEL) 53ca8f29b6SchristosENTRY(bzero) 54ca8f29b6Schristos ! %o0 = addr, %o1 = len 55ca8f29b6Schristos mov %o1, %o2 5612ea7fb3Schristos clr %o1 ! ser pattern 57ca8f29b6Schristos#endif 58ca8f29b6SchristosENTRY(memset) 59ca8f29b6Schristos ! %o0 = addr, %o1 = pattern, %o2 = len 60ca8f29b6Schristos mov %o0, %o4 ! Save original pointer 61ca8f29b6Schristos 62ca8f29b6SchristosLmemset_internal: 63ca8f29b6Schristos btst 7, %o0 ! Word aligned? 64ca8f29b6Schristos bz,pn %xcc, 0f 65ca8f29b6Schristos nop 66ca8f29b6Schristos inc %o0 67ca8f29b6Schristos deccc %o2 ! Store up to 7 bytes 68ca8f29b6Schristos bge,a,pt CCCR, Lmemset_internal 69ca8f29b6Schristos stb %o1, [%o0 - 1] 70ca8f29b6Schristos 71ca8f29b6Schristos retl ! Duplicate Lmemset_done 72ca8f29b6Schristos mov %o4, %o0 73ca8f29b6Schristos0: 74ca8f29b6Schristos /* 75ca8f29b6Schristos * Duplicate the pattern so it fills 64-bits. 76ca8f29b6Schristos */ 77ca8f29b6Schristos andcc %o1, 0x0ff, %o1 ! No need to extend zero 78ca8f29b6Schristos bz,pt %icc, 1f 79ca8f29b6Schristos sllx %o1, 8, %o3 ! sigh. all dependent insns. 80ca8f29b6Schristos or %o1, %o3, %o1 81ca8f29b6Schristos sllx %o1, 16, %o3 82ca8f29b6Schristos or %o1, %o3, %o1 83ca8f29b6Schristos sllx %o1, 32, %o3 84ca8f29b6Schristos or %o1, %o3, %o1 85ca8f29b6Schristos1: 86ca8f29b6Schristos#ifdef USE_BLOCK_STORE_LOAD 87ca8f29b6Schristos !! Now we are 64-bit aligned 88ca8f29b6Schristos cmp %o2, 256 ! Use block clear if len > 256 89ca8f29b6Schristos bge,pt CCCR, Lmemset_block ! use block store insns 90ca8f29b6Schristos#endif /* USE_BLOCK_STORE_LOAD */ 91ca8f29b6Schristos deccc 8, %o2 92ca8f29b6SchristosLmemset_longs: 93ca8f29b6Schristos bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left 94ca8f29b6Schristos nop 95ca8f29b6Schristos3: 96ca8f29b6Schristos inc 8, %o0 97ca8f29b6Schristos deccc 8, %o2 98ca8f29b6Schristos bge,pt CCCR, 3b 99ca8f29b6Schristos stx %o1, [%o0 - 8] ! Do 1 longword at a time 100ca8f29b6Schristos 101ca8f29b6Schristos /* 102ca8f29b6Schristos * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero, 103ca8f29b6Schristos * -6 => two bytes, etc. Mop up this remainder, if any. 104ca8f29b6Schristos */ 105ca8f29b6SchristosLmemset_cleanup: 106ca8f29b6Schristos btst 4, %o2 107ca8f29b6Schristos bz,pt CCCR, 5f ! if (len & 4) { 108ca8f29b6Schristos nop 109ca8f29b6Schristos stw %o1, [%o0] ! *(int *)addr = 0; 110ca8f29b6Schristos inc 4, %o0 ! addr += 4; 111ca8f29b6Schristos5: 112ca8f29b6Schristos btst 2, %o2 113ca8f29b6Schristos bz,pt CCCR, 7f ! if (len & 2) { 114ca8f29b6Schristos nop 115ca8f29b6Schristos sth %o1, [%o0] ! *(short *)addr = 0; 116ca8f29b6Schristos inc 2, %o0 ! addr += 2; 117ca8f29b6Schristos7: 118ca8f29b6Schristos btst 1, %o2 119ca8f29b6Schristos bnz,a %icc, Lmemset_done ! if (len & 1) 120ca8f29b6Schristos stb %o1, [%o0] ! *addr = 0; 121ca8f29b6SchristosLmemset_done: 122ca8f29b6Schristos retl 123*ebbc7028Sandvar mov %o4, %o0 ! Restore pointer for memset (ugh) 124ca8f29b6Schristos 125ca8f29b6Schristos#ifdef USE_BLOCK_STORE_LOAD 126ca8f29b6SchristosLmemset_block: 127ca8f29b6Schristos sethi %hi(block_disable), %o3 128ca8f29b6Schristos ldx [ %o3 + %lo(block_disable) ], %o3 129ca8f29b6Schristos brnz,pn %o3, Lmemset_longs 130ca8f29b6Schristos !! Make sure our trap table is installed 131ca8f29b6Schristos set _C_LABEL(trapbase), %o5 132ca8f29b6Schristos rdpr %tba, %o3 133ca8f29b6Schristos sub %o3, %o5, %o3 134ca8f29b6Schristos brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store 135ca8f29b6Schristos nop 136ca8f29b6Schristos/* 137ca8f29b6Schristos * Kernel: 138ca8f29b6Schristos * 139ca8f29b6Schristos * Here we use VIS instructions to do a block clear of a page. 140ca8f29b6Schristos * But before we can do that we need to save and enable the FPU. 141ca8f29b6Schristos * The last owner of the FPU registers is fplwp, and 142ca8f29b6Schristos * fplwp->l_md.md_fpstate is the current fpstate. If that's not 143ca8f29b6Schristos * null, call savefpstate() with it to store our current fp state. 144ca8f29b6Schristos * 145ca8f29b6Schristos * Next, allocate an aligned fpstate on the stack. We will properly 146ca8f29b6Schristos * nest calls on a particular stack so this should not be a problem. 147ca8f29b6Schristos * 148ca8f29b6Schristos * Now we grab either curlwp (or if we're on the interrupt stack 149ca8f29b6Schristos * lwp0). We stash its existing fpstate in a local register and 150ca8f29b6Schristos * put our new fpstate in curlwp->p_md.md_fpstate. We point 151ca8f29b6Schristos * fplwp at curlwp (or lwp0) and enable the FPU. 152ca8f29b6Schristos * 153ca8f29b6Schristos * If we are ever preempted, our FPU state will be saved in our 154ca8f29b6Schristos * fpstate. Then, when we're resumed and we take an FPDISABLED 155ca8f29b6Schristos * trap, the trap handler will be able to fish our FPU state out 156ca8f29b6Schristos * of curlwp (or lwp0). 157ca8f29b6Schristos * 158ca8f29b6Schristos * On exiting this routine we undo the damage: restore the original 159ca8f29b6Schristos * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable 160ca8f29b6Schristos * the MMU. 161ca8f29b6Schristos * 162ca8f29b6Schristos */ 163ca8f29b6Schristos 164ca8f29b6Schristos ENABLE_FPU(0) 165ca8f29b6Schristos 166ca8f29b6Schristos !! We are now 8-byte aligned. We need to become 64-byte aligned. 167ca8f29b6Schristos btst 63, %i0 168ca8f29b6Schristos bz,pt CCCR, 2f 169ca8f29b6Schristos nop 170ca8f29b6Schristos1: 171ca8f29b6Schristos stx %i1, [%i0] 172ca8f29b6Schristos inc 8, %i0 173ca8f29b6Schristos btst 63, %i0 174ca8f29b6Schristos bnz,pt %xcc, 1b 175ca8f29b6Schristos dec 8, %i2 176ca8f29b6Schristos 177ca8f29b6Schristos2: 178ca8f29b6Schristos brz %i1, 3f ! Skip the memory op 179ca8f29b6Schristos fzero %f0 ! if pattern is 0 180ca8f29b6Schristos 181ca8f29b6Schristos#ifdef _LP64 182ca8f29b6Schristos stx %i1, [%i0] ! Flush this puppy to RAM 183ca8f29b6Schristos membar #StoreLoad 184ca8f29b6Schristos ldd [%i0], %f0 185ca8f29b6Schristos#else 186ca8f29b6Schristos stw %i1, [%i0] ! Flush this puppy to RAM 187ca8f29b6Schristos membar #StoreLoad 188ca8f29b6Schristos ld [%i0], %f0 189ca8f29b6Schristos fmovsa %icc, %f0, %f1 190ca8f29b6Schristos#endif 191ca8f29b6Schristos 192ca8f29b6Schristos3: 193ca8f29b6Schristos fmovd %f0, %f2 ! Duplicate the pattern 194ca8f29b6Schristos fmovd %f0, %f4 195ca8f29b6Schristos fmovd %f0, %f6 196ca8f29b6Schristos fmovd %f0, %f8 197ca8f29b6Schristos fmovd %f0, %f10 198ca8f29b6Schristos fmovd %f0, %f12 199ca8f29b6Schristos fmovd %f0, %f14 200ca8f29b6Schristos 201ca8f29b6Schristos !! Remember: we were 8 bytes too far 202ca8f29b6Schristos dec 56, %i2 ! Go one iteration too far 203ca8f29b6Schristos5: 204ca8f29b6Schristos stda %f0, [%i0] ASI_STORE ! Store 64 bytes 205ca8f29b6Schristos deccc BLOCK_SIZE, %i2 206ca8f29b6Schristos bg,pt %icc, 5b 207ca8f29b6Schristos inc BLOCK_SIZE, %i0 208ca8f29b6Schristos 209ca8f29b6Schristos membar #Sync 210ca8f29b6Schristos/* 211ca8f29b6Schristos * We've saved our possible fpstate, now disable the fpu 212ca8f29b6Schristos * and continue with life. 213ca8f29b6Schristos */ 214ca8f29b6Schristos RESTORE_FPU 215ca8f29b6Schristos addcc %i2, 56, %i2 ! Restore the count 216ca8f29b6Schristos ba,pt %xcc, Lmemset_longs ! Finish up the remainder 217ca8f29b6Schristos restore 218ca8f29b6Schristos#endif /* USE_BLOCK_STORE_LOAD */ 219