1/* $NetBSD: memset.S,v 1.1 2013/03/17 00:42:32 christos Exp $ */ 2 3/* 4 * Copyright (c) 1996-2002 Eduardo Horvath 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 */ 26#include "strmacros.h" 27 28/* 29 * XXXXXXXXXXXXXXXXXXXX 30 * We need to make sure that this doesn't use floating point 31 * before our trap handlers are installed or we could panic 32 * XXXXXXXXXXXXXXXXXXXX 33 */ 34/* 35 * memset(addr, c, len) 36 * 37 * We want to use VIS instructions if we're clearing out more than 38 * 256 bytes, but to do that we need to properly save and restore the 39 * FP registers. Unfortunately the code to do that in the kernel needs 40 * to keep track of the current owner of the FPU, hence the different 41 * code. 42 * 43 * XXXXX To produce more efficient code, we do not allow lengths 44 * greater than 0x80000000000000000, which are negative numbers. 45 * This should not really be an issue since the VA hole should 46 * cause any such ranges to fail anyway. 47 */ 48#if !defined(_KERNEL) || defined(_RUMPKERNEL) 49ENTRY(bzero) 50 ! %o0 = addr, %o1 = len 51 mov %o1, %o2 52 mov 0, %o1 53#endif 54ENTRY(memset) 55 ! %o0 = addr, %o1 = pattern, %o2 = len 56 mov %o0, %o4 ! Save original pointer 57 58Lmemset_internal: 59 btst 7, %o0 ! Word aligned? 60 bz,pn %xcc, 0f 61 nop 62 inc %o0 63 deccc %o2 ! Store up to 7 bytes 64 bge,a,pt CCCR, Lmemset_internal 65 stb %o1, [%o0 - 1] 66 67 retl ! Duplicate Lmemset_done 68 mov %o4, %o0 690: 70 /* 71 * Duplicate the pattern so it fills 64-bits. 72 */ 73 andcc %o1, 0x0ff, %o1 ! No need to extend zero 74 bz,pt %icc, 1f 75 sllx %o1, 8, %o3 ! sigh. all dependent insns. 76 or %o1, %o3, %o1 77 sllx %o1, 16, %o3 78 or %o1, %o3, %o1 79 sllx %o1, 32, %o3 80 or %o1, %o3, %o1 811: 82#ifdef USE_BLOCK_STORE_LOAD 83 !! Now we are 64-bit aligned 84 cmp %o2, 256 ! Use block clear if len > 256 85 bge,pt CCCR, Lmemset_block ! use block store insns 86#endif /* USE_BLOCK_STORE_LOAD */ 87 deccc 8, %o2 88Lmemset_longs: 89 bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left 90 nop 913: 92 inc 8, %o0 93 deccc 8, %o2 94 bge,pt CCCR, 3b 95 stx %o1, [%o0 - 8] ! Do 1 longword at a time 96 97 /* 98 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero, 99 * -6 => two bytes, etc. Mop up this remainder, if any. 100 */ 101Lmemset_cleanup: 102 btst 4, %o2 103 bz,pt CCCR, 5f ! if (len & 4) { 104 nop 105 stw %o1, [%o0] ! *(int *)addr = 0; 106 inc 4, %o0 ! addr += 4; 1075: 108 btst 2, %o2 109 bz,pt CCCR, 7f ! if (len & 2) { 110 nop 111 sth %o1, [%o0] ! *(short *)addr = 0; 112 inc 2, %o0 ! addr += 2; 1137: 114 btst 1, %o2 115 bnz,a %icc, Lmemset_done ! if (len & 1) 116 stb %o1, [%o0] ! *addr = 0; 117Lmemset_done: 118 retl 119 mov %o4, %o0 ! Restore ponter for memset (ugh) 120 121#ifdef USE_BLOCK_STORE_LOAD 122Lmemset_block: 123 sethi %hi(block_disable), %o3 124 ldx [ %o3 + %lo(block_disable) ], %o3 125 brnz,pn %o3, Lmemset_longs 126 !! Make sure our trap table is installed 127 set _C_LABEL(trapbase), %o5 128 rdpr %tba, %o3 129 sub %o3, %o5, %o3 130 brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store 131 nop 132/* 133 * Kernel: 134 * 135 * Here we use VIS instructions to do a block clear of a page. 136 * But before we can do that we need to save and enable the FPU. 137 * The last owner of the FPU registers is fplwp, and 138 * fplwp->l_md.md_fpstate is the current fpstate. If that's not 139 * null, call savefpstate() with it to store our current fp state. 140 * 141 * Next, allocate an aligned fpstate on the stack. We will properly 142 * nest calls on a particular stack so this should not be a problem. 143 * 144 * Now we grab either curlwp (or if we're on the interrupt stack 145 * lwp0). We stash its existing fpstate in a local register and 146 * put our new fpstate in curlwp->p_md.md_fpstate. We point 147 * fplwp at curlwp (or lwp0) and enable the FPU. 148 * 149 * If we are ever preempted, our FPU state will be saved in our 150 * fpstate. Then, when we're resumed and we take an FPDISABLED 151 * trap, the trap handler will be able to fish our FPU state out 152 * of curlwp (or lwp0). 153 * 154 * On exiting this routine we undo the damage: restore the original 155 * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable 156 * the MMU. 157 * 158 */ 159 160 ENABLE_FPU(0) 161 162 !! We are now 8-byte aligned. We need to become 64-byte aligned. 163 btst 63, %i0 164 bz,pt CCCR, 2f 165 nop 1661: 167 stx %i1, [%i0] 168 inc 8, %i0 169 btst 63, %i0 170 bnz,pt %xcc, 1b 171 dec 8, %i2 172 1732: 174 brz %i1, 3f ! Skip the memory op 175 fzero %f0 ! if pattern is 0 176 177#ifdef _LP64 178 stx %i1, [%i0] ! Flush this puppy to RAM 179 membar #StoreLoad 180 ldd [%i0], %f0 181#else 182 stw %i1, [%i0] ! Flush this puppy to RAM 183 membar #StoreLoad 184 ld [%i0], %f0 185 fmovsa %icc, %f0, %f1 186#endif 187 1883: 189 fmovd %f0, %f2 ! Duplicate the pattern 190 fmovd %f0, %f4 191 fmovd %f0, %f6 192 fmovd %f0, %f8 193 fmovd %f0, %f10 194 fmovd %f0, %f12 195 fmovd %f0, %f14 196 197 !! Remember: we were 8 bytes too far 198 dec 56, %i2 ! Go one iteration too far 1995: 200 stda %f0, [%i0] ASI_STORE ! Store 64 bytes 201 deccc BLOCK_SIZE, %i2 202 bg,pt %icc, 5b 203 inc BLOCK_SIZE, %i0 204 205 membar #Sync 206/* 207 * We've saved our possible fpstate, now disable the fpu 208 * and continue with life. 209 */ 210 RESTORE_FPU 211 addcc %i2, 56, %i2 ! Restore the count 212 ba,pt %xcc, Lmemset_longs ! Finish up the remainder 213 restore 214#endif /* USE_BLOCK_STORE_LOAD */ 215