1 /* $NetBSD: altivec.c,v 1.4 2003/06/23 11:01:36 martin Exp $ */ 2 3 /* 4 * Copyright (C) 1996 Wolfgang Solfrank. 5 * Copyright (C) 1996 TooLs GmbH. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by TooLs GmbH. 19 * 4. The name of TooLs GmbH may not be used to endorse or promote products 20 * derived from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "opt_multiprocessor.h" 35 36 #include <sys/param.h> 37 #include <sys/proc.h> 38 #include <sys/sa.h> 39 #include <sys/systm.h> 40 #include <sys/user.h> 41 #include <sys/malloc.h> 42 #include <sys/pool.h> 43 44 #include <uvm/uvm_extern.h> 45 46 #include <powerpc/altivec.h> 47 #include <powerpc/spr.h> 48 #include <powerpc/psl.h> 49 50 void 51 enable_vec() 52 { 53 struct cpu_info *ci = curcpu(); 54 struct lwp *l = curlwp; 55 struct pcb *pcb = &l->l_addr->u_pcb; 56 struct trapframe *tf = trapframe(l); 57 struct vreg *vr = &pcb->pcb_vr; 58 register_t msr; 59 60 KASSERT(pcb->pcb_veccpu == NULL); 61 62 pcb->pcb_flags |= PCB_ALTIVEC; 63 64 /* 65 * Enable AltiVec temporarily (and disable interrupts). 66 */ 67 msr = mfmsr(); 68 mtmsr((msr & ~PSL_EE) | PSL_VEC); 69 __asm __volatile ("isync"); 70 if (ci->ci_veclwp) { 71 save_vec_cpu(); 72 } 73 KASSERT(curcpu()->ci_veclwp == NULL); 74 75 /* 76 * Restore VSCR by first loading it into a vector and then into VSCR. 77 * (this needs to done before loading the user's vector registers 78 * since we need to use a scratch vector register) 79 */ 80 __asm __volatile("vxor %2,%2,%2; lvewx %2,%0,%1; mtvscr %2" \ 81 :: "b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0)); 82 83 /* 84 * VRSAVE will be restored when trap frame returns 85 */ 86 tf->tf_xtra[TF_VRSAVE] = vr->vrsave; 87 88 #define LVX(n,vr) __asm /*__volatile*/("lvx %2,%0,%1" \ 89 :: "b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n)); 90 91 /* 92 * Load all 32 vector registers 93 */ 94 LVX( 0,vr); LVX( 1,vr); LVX( 2,vr); LVX( 3,vr); 95 LVX( 4,vr); LVX( 5,vr); LVX( 6,vr); LVX( 7,vr); 96 LVX( 8,vr); LVX( 9,vr); LVX(10,vr); LVX(11,vr); 97 LVX(12,vr); LVX(13,vr); LVX(14,vr); LVX(15,vr); 98 99 LVX(16,vr); LVX(17,vr); LVX(18,vr); LVX(19,vr); 100 LVX(20,vr); LVX(21,vr); LVX(22,vr); LVX(23,vr); 101 LVX(24,vr); LVX(25,vr); LVX(26,vr); LVX(27,vr); 102 LVX(28,vr); LVX(29,vr); LVX(30,vr); LVX(31,vr); 103 __asm __volatile ("isync"); 104 105 /* 106 * Enable AltiVec when we return to user-mode. 107 * Record the new ownership of the AltiVec unit. 108 */ 109 tf->srr1 |= PSL_VEC; 110 curcpu()->ci_veclwp = l; 111 pcb->pcb_veccpu = curcpu(); 112 __asm __volatile ("sync"); 113 114 /* 115 * Restore MSR (turn off AltiVec) 116 */ 117 mtmsr(msr); 118 } 119 120 void 121 save_vec_cpu(void) 122 { 123 struct cpu_info *ci = curcpu(); 124 struct lwp *l; 125 struct pcb *pcb; 126 struct vreg *vr; 127 struct trapframe *tf; 128 register_t msr; 129 130 /* 131 * Turn on AltiVEC, turn off interrupts. 132 */ 133 msr = mfmsr(); 134 mtmsr((msr & ~PSL_EE) | PSL_VEC); 135 __asm __volatile ("isync"); 136 l = ci->ci_veclwp; 137 if (l == NULL) { 138 goto out; 139 } 140 pcb = &l->l_addr->u_pcb; 141 vr = &pcb->pcb_vr; 142 tf = trapframe(l); 143 144 #define STVX(n,vr) __asm /*__volatile*/("stvx %2,%0,%1" \ 145 :: "b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n)); 146 147 /* 148 * Save the vector registers. 149 */ 150 STVX( 0,vr); STVX( 1,vr); STVX( 2,vr); STVX( 3,vr); 151 STVX( 4,vr); STVX( 5,vr); STVX( 6,vr); STVX( 7,vr); 152 STVX( 8,vr); STVX( 9,vr); STVX(10,vr); STVX(11,vr); 153 STVX(12,vr); STVX(13,vr); STVX(14,vr); STVX(15,vr); 154 155 STVX(16,vr); STVX(17,vr); STVX(18,vr); STVX(19,vr); 156 STVX(20,vr); STVX(21,vr); STVX(22,vr); STVX(23,vr); 157 STVX(24,vr); STVX(25,vr); STVX(26,vr); STVX(27,vr); 158 STVX(28,vr); STVX(29,vr); STVX(30,vr); STVX(31,vr); 159 160 /* 161 * Save VSCR (this needs to be done after save the vector registers 162 * since we need to use one as scratch). 163 */ 164 __asm __volatile("mfvscr %2; stvewx %2,%0,%1" \ 165 :: "b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0)); 166 167 /* 168 * Save VRSAVE 169 */ 170 vr->vrsave = tf->tf_xtra[TF_VRSAVE]; 171 172 /* 173 * Note that we aren't using any CPU resources and stop any 174 * data streams. 175 */ 176 tf->srr1 &= ~PSL_VEC; 177 pcb->pcb_veccpu = NULL; 178 ci->ci_veclwp = NULL; 179 __asm __volatile ("dssall; sync"); 180 181 out: 182 183 /* 184 * Restore MSR (turn off AltiVec) 185 */ 186 mtmsr(msr); 187 } 188 189 /* 190 * Save a process's AltiVEC state to its PCB. The state may be in any CPU. 191 * The process must either be curproc or traced by curproc (and stopped). 192 * (The point being that the process must not run on another CPU during 193 * this function). 194 */ 195 void 196 save_vec_lwp(l) 197 struct lwp *l; 198 { 199 struct pcb *pcb = &l->l_addr->u_pcb; 200 struct cpu_info *ci = curcpu(); 201 202 /* 203 * If it's already in the PCB, there's nothing to do. 204 */ 205 206 if (pcb->pcb_veccpu == NULL) { 207 return; 208 } 209 210 /* 211 * If the state is in the current CPU, just flush the current CPU's 212 * state. 213 */ 214 215 if (l == ci->ci_veclwp) { 216 save_vec_cpu(); 217 return; 218 } 219 220 #ifdef MULTIPROCESSOR 221 222 /* 223 * It must be on another CPU, flush it from there. 224 */ 225 226 mp_save_vec_lwp(l); 227 #endif 228 } 229 230 #define ZERO_VEC 19 231 232 void 233 vzeropage(paddr_t pa) 234 { 235 const paddr_t ea = pa + PAGE_SIZE; 236 uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16); 237 register_t omsr, msr; 238 239 __asm __volatile("mfmsr %0" : "=r"(omsr) :); 240 241 /* 242 * Turn on AltiVec, turn off interrupts. 243 */ 244 msr = (omsr & ~PSL_EE) | PSL_VEC; 245 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr)); 246 247 /* 248 * Save the VEC register we are going to use before we disable 249 * relocation. 250 */ 251 __asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC)); 252 __asm("vxor %0,%0,%0" :: "n"(ZERO_VEC)); 253 254 /* 255 * Turn off data relocation (DMMU off). 256 */ 257 msr &= ~PSL_DR; 258 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr)); 259 260 /* 261 * Zero the page using a single cache line. 262 */ 263 do { 264 __asm("stvx %2,%0,%1" :: "b"(pa), "r"( 0), "n"(ZERO_VEC)); 265 __asm("stvxl %2,%0,%1" :: "b"(pa), "r"(16), "n"(ZERO_VEC)); 266 __asm("stvx %2,%0,%1" :: "b"(pa), "r"(32), "n"(ZERO_VEC)); 267 __asm("stvxl %2,%0,%1" :: "b"(pa), "r"(48), "n"(ZERO_VEC)); 268 pa += 64; 269 } while (pa < ea); 270 271 /* 272 * Restore data relocation (DMMU on); 273 */ 274 msr |= PSL_DR; 275 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr)); 276 277 /* 278 * Restore VEC register (now that we can access the stack again). 279 */ 280 __asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC)); 281 282 /* 283 * Restore old MSR (AltiVec OFF). 284 */ 285 __asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr)); 286 } 287 288 #define LO_VEC 16 289 #define HI_VEC 17 290 291 void 292 vcopypage(paddr_t dst, paddr_t src) 293 { 294 const paddr_t edst = dst + PAGE_SIZE; 295 uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16); 296 register_t omsr, msr; 297 298 __asm __volatile("mfmsr %0" : "=r"(omsr) :); 299 300 /* 301 * Turn on AltiVec, turn off interrupts. 302 */ 303 msr = (omsr & ~PSL_EE) | PSL_VEC; 304 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr)); 305 306 /* 307 * Save the VEC registers we will be using before we disable 308 * relocation. 309 */ 310 __asm("stvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC)); 311 __asm("stvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC)); 312 313 /* 314 * Turn off data relocation (DMMU off). 315 */ 316 msr &= ~PSL_DR; 317 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr)); 318 319 /* 320 * Copy the page using a single cache line. On most PPCs, two 321 * vector registers occupy one cache line. 322 */ 323 do { 324 __asm("lvx %2,%0,%1" :: "b"(src), "r"( 0), "n"(LO_VEC)); 325 __asm("stvx %2,%0,%1" :: "b"(dst), "r"( 0), "n"(LO_VEC)); 326 __asm("lvxl %2,%0,%1" :: "b"(src), "r"(16), "n"(HI_VEC)); 327 __asm("stvxl %2,%0,%1" :: "b"(dst), "r"(16), "n"(HI_VEC)); 328 src += 32; 329 dst += 32; 330 } while (dst < edst); 331 332 /* 333 * Restore data relocation (DMMU on); 334 */ 335 msr |= PSL_DR; 336 __asm __volatile("sync; mtmsr %0; isync" :: "r"(msr)); 337 338 /* 339 * Restore VEC registers (now that we can access the stack again). 340 */ 341 __asm("lvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC)); 342 __asm("lvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC)); 343 344 /* 345 * Restore old MSR (AltiVec OFF). 346 */ 347 __asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr)); 348 } 349