1 /* $NetBSD: altivec.c,v 1.13 2007/10/17 19:56:42 garbled Exp $ */ 2 3 /* 4 * Copyright (C) 1996 Wolfgang Solfrank. 5 * Copyright (C) 1996 TooLs GmbH. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by TooLs GmbH. 19 * 4. The name of TooLs GmbH may not be used to endorse or promote products 20 * derived from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: altivec.c,v 1.13 2007/10/17 19:56:42 garbled Exp $"); 36 37 #include "opt_multiprocessor.h" 38 39 #include <sys/param.h> 40 #include <sys/proc.h> 41 #include <sys/systm.h> 42 #include <sys/user.h> 43 #include <sys/malloc.h> 44 #include <sys/pool.h> 45 46 #include <uvm/uvm_extern.h> 47 48 #include <powerpc/altivec.h> 49 #include <powerpc/spr.h> 50 #include <powerpc/psl.h> 51 52 #ifdef MULTIPROCESSOR 53 #include <arch/powerpc/pic/picvar.h> 54 #include <arch/powerpc/pic/ipivar.h> 55 static void mp_save_vec_lwp(struct lwp *); 56 #endif 57 58 void 59 enable_vec(void) 60 { 61 struct cpu_info *ci = curcpu(); 62 struct lwp *l = curlwp; 63 struct pcb *pcb = &l->l_addr->u_pcb; 64 struct trapframe *tf = trapframe(l); 65 struct vreg *vr = &pcb->pcb_vr; 66 register_t msr; 67 68 KASSERT(pcb->pcb_veccpu == NULL); 69 70 pcb->pcb_flags |= PCB_ALTIVEC; 71 72 /* 73 * Enable AltiVec temporarily (and disable interrupts). 74 */ 75 msr = mfmsr(); 76 mtmsr((msr & ~PSL_EE) | PSL_VEC); 77 __asm volatile ("isync"); 78 if (ci->ci_veclwp) { 79 save_vec_cpu(); 80 } 81 KASSERT(curcpu()->ci_veclwp == NULL); 82 83 /* 84 * Restore VSCR by first loading it into a vector and then into VSCR. 85 * (this needs to done before loading the user's vector registers 86 * since we need to use a scratch vector register) 87 */ 88 __asm volatile("vxor %2,%2,%2; lvewx %2,%0,%1; mtvscr %2" \ 89 :: "b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0)); 90 91 /* 92 * VRSAVE will be restored when trap frame returns 93 */ 94 tf->tf_xtra[TF_VRSAVE] = vr->vrsave; 95 96 #define LVX(n,vr) __asm /*volatile*/("lvx %2,%0,%1" \ 97 :: "b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n)); 98 99 /* 100 * Load all 32 vector registers 101 */ 102 LVX( 0,vr); LVX( 1,vr); LVX( 2,vr); LVX( 3,vr); 103 LVX( 4,vr); LVX( 5,vr); LVX( 6,vr); LVX( 7,vr); 104 LVX( 8,vr); LVX( 9,vr); LVX(10,vr); LVX(11,vr); 105 LVX(12,vr); LVX(13,vr); LVX(14,vr); LVX(15,vr); 106 107 LVX(16,vr); LVX(17,vr); LVX(18,vr); LVX(19,vr); 108 LVX(20,vr); LVX(21,vr); LVX(22,vr); LVX(23,vr); 109 LVX(24,vr); LVX(25,vr); LVX(26,vr); LVX(27,vr); 110 LVX(28,vr); LVX(29,vr); LVX(30,vr); LVX(31,vr); 111 __asm volatile ("isync"); 112 113 /* 114 * Enable AltiVec when we return to user-mode. 115 * Record the new ownership of the AltiVec unit. 116 */ 117 curcpu()->ci_veclwp = l; 118 pcb->pcb_veccpu = curcpu(); 119 pcb->pcb_flags |= PCB_OWNALTIVEC; 120 __asm volatile ("sync"); 121 122 /* 123 * Restore MSR (turn off AltiVec) 124 */ 125 mtmsr(msr); 126 } 127 128 void 129 save_vec_cpu(void) 130 { 131 struct cpu_info *ci = curcpu(); 132 struct lwp *l; 133 struct pcb *pcb; 134 struct vreg *vr; 135 struct trapframe *tf; 136 register_t msr; 137 138 /* 139 * Turn on AltiVEC, turn off interrupts. 140 */ 141 msr = mfmsr(); 142 mtmsr((msr & ~PSL_EE) | PSL_VEC); 143 __asm volatile ("isync"); 144 l = ci->ci_veclwp; 145 if (l == NULL) 146 goto out; 147 pcb = &l->l_addr->u_pcb; 148 vr = &pcb->pcb_vr; 149 tf = trapframe(l); 150 151 #define STVX(n,vr) __asm /*volatile*/("stvx %2,%0,%1" \ 152 :: "b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n)); 153 154 /* 155 * Save the vector registers. 156 */ 157 STVX( 0,vr); STVX( 1,vr); STVX( 2,vr); STVX( 3,vr); 158 STVX( 4,vr); STVX( 5,vr); STVX( 6,vr); STVX( 7,vr); 159 STVX( 8,vr); STVX( 9,vr); STVX(10,vr); STVX(11,vr); 160 STVX(12,vr); STVX(13,vr); STVX(14,vr); STVX(15,vr); 161 162 STVX(16,vr); STVX(17,vr); STVX(18,vr); STVX(19,vr); 163 STVX(20,vr); STVX(21,vr); STVX(22,vr); STVX(23,vr); 164 STVX(24,vr); STVX(25,vr); STVX(26,vr); STVX(27,vr); 165 STVX(28,vr); STVX(29,vr); STVX(30,vr); STVX(31,vr); 166 167 /* 168 * Save VSCR (this needs to be done after save the vector registers 169 * since we need to use one as scratch). 170 */ 171 __asm volatile("mfvscr %2; stvewx %2,%0,%1" \ 172 :: "b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0)); 173 174 /* 175 * Save VRSAVE 176 */ 177 vr->vrsave = tf->tf_xtra[TF_VRSAVE]; 178 179 /* 180 * Note that we aren't using any CPU resources and stop any 181 * data streams. 182 */ 183 pcb->pcb_veccpu = NULL; 184 ci->ci_veclwp = NULL; 185 __asm volatile ("dssall; sync"); 186 187 out: 188 189 /* 190 * Restore MSR (turn off AltiVec) 191 */ 192 mtmsr(msr); 193 } 194 195 #ifdef MULTIPROCESSOR 196 /* 197 * Save a process's AltiVEC state to its PCB. The state may be in any CPU. 198 * The process must either be curproc or traced by curproc (and stopped). 199 * (The point being that the process must not run on another CPU during 200 * this function). 201 */ 202 static void 203 mp_save_vec_lwp(struct lwp *l) 204 { 205 struct pcb *pcb = &l->l_addr->u_pcb; 206 struct cpu_info *veccpu; 207 int i; 208 209 /* 210 * Send an IPI to the other CPU with the data and wait for that CPU 211 * to flush the data. Note that the other CPU might have switched 212 * to a different proc's AltiVEC state by the time it receives the IPI, 213 * but that will only result in an unnecessary reload. 214 */ 215 216 veccpu = pcb->pcb_veccpu; 217 if (veccpu == NULL) 218 return; 219 220 ppc_send_ipi(veccpu->ci_cpuid, PPC_IPI_FLUSH_VEC); 221 222 /* Wait for flush. */ 223 for (i = 0; i < 0x3fffffff; i++) 224 if (pcb->pcb_veccpu == NULL) 225 return; 226 227 aprint_error("mp_save_vec_lwp{%d} pid = %d.%d, veccpu->ci_cpuid = %d\n", 228 cpu_number(), l->l_proc->p_pid, l->l_lid, veccpu->ci_cpuid); 229 panic("mp_save_vec_lwp: timed out"); 230 } 231 #endif /*MULTIPROCESSOR*/ 232 233 /* 234 * Save a process's AltiVEC state to its PCB. The state may be in any CPU. 235 * The process must either be curproc or traced by curproc (and stopped). 236 * (The point being that the process must not run on another CPU during 237 * this function). 238 */ 239 void 240 save_vec_lwp(struct lwp *l, int discard) 241 { 242 struct pcb * const pcb = &l->l_addr->u_pcb; 243 struct cpu_info * const ci = curcpu(); 244 245 /* 246 * If it's already in the PCB, there's nothing to do. 247 */ 248 if (pcb->pcb_veccpu == NULL) 249 return; 250 251 /* 252 * If we simply need to discard the information, then don't 253 * to save anything. 254 */ 255 if (discard) { 256 #ifndef MULTIPROCESSOR 257 KASSERT(ci == pcb->pcb_veccpu); 258 #endif 259 KASSERT(l == pcb->pcb_veccpu->ci_veclwp); 260 pcb->pcb_veccpu->ci_veclwp = NULL; 261 pcb->pcb_veccpu = NULL; 262 pcb->pcb_flags &= ~PCB_OWNALTIVEC; 263 return; 264 } 265 266 /* 267 * If the state is in the current CPU, just flush the current CPU's 268 * state. 269 */ 270 if (l == ci->ci_veclwp) { 271 save_vec_cpu(); 272 return; 273 } 274 275 276 #ifdef MULTIPROCESSOR 277 /* 278 * It must be on another CPU, flush it from there. 279 */ 280 281 mp_save_vec_lwp(l); 282 #endif 283 } 284 285 #define ZERO_VEC 19 286 287 void 288 vzeropage(paddr_t pa) 289 { 290 const paddr_t ea = pa + PAGE_SIZE; 291 uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16); 292 register_t omsr, msr; 293 294 __asm volatile("mfmsr %0" : "=r"(omsr) :); 295 296 /* 297 * Turn on AltiVec, turn off interrupts. 298 */ 299 msr = (omsr & ~PSL_EE) | PSL_VEC; 300 __asm volatile("sync; mtmsr %0; isync" :: "r"(msr)); 301 302 /* 303 * Save the VEC register we are going to use before we disable 304 * relocation. 305 */ 306 __asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC)); 307 __asm("vxor %0,%0,%0" :: "n"(ZERO_VEC)); 308 309 /* 310 * Zero the page using a single cache line. 311 */ 312 __asm volatile( 313 " sync ;" 314 " mfmsr %[msr];" 315 " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */ 316 " mtmsr %[msr];" /* Turn off DMMU */ 317 " isync;" 318 "1: stvx %[zv], %[pa], %[off0];" 319 " stvxl %[zv], %[pa], %[off16];" 320 " stvx %[zv], %[pa], %[off32];" 321 " stvxl %[zv], %[pa], %[off48];" 322 " addi %[pa], %[pa], 64;" 323 " cmplw %[pa], %[ea];" 324 " blt+ 1b;" 325 " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */ 326 " sync;" 327 " mtmsr %[msr];" /* Turn on DMMU */ 328 " isync;" 329 :: [msr] "r"(msr), [pa] "b"(pa), [ea] "b"(ea), 330 [off0] "r"(0), [off16] "r"(16), [off32] "r"(32), [off48] "r"(48), 331 [zv] "n"(ZERO_VEC)); 332 333 /* 334 * Restore VEC register (now that we can access the stack again). 335 */ 336 __asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC)); 337 338 /* 339 * Restore old MSR (AltiVec OFF). 340 */ 341 __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr)); 342 } 343 344 #define LO_VEC 16 345 #define HI_VEC 17 346 347 void 348 vcopypage(paddr_t dst, paddr_t src) 349 { 350 const paddr_t edst = dst + PAGE_SIZE; 351 uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16); 352 register_t omsr, msr; 353 354 __asm volatile("mfmsr %0" : "=r"(omsr) :); 355 356 /* 357 * Turn on AltiVec, turn off interrupts. 358 */ 359 msr = (omsr & ~PSL_EE) | PSL_VEC; 360 __asm volatile("sync; mtmsr %0; isync" :: "r"(msr)); 361 362 /* 363 * Save the VEC registers we will be using before we disable 364 * relocation. 365 */ 366 __asm("stvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC)); 367 __asm("stvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC)); 368 369 /* 370 * Copy the page using a single cache line, with DMMU 371 * disabled. On most PPCs, two vector registers occupy one 372 * cache line. 373 */ 374 __asm volatile( 375 " sync ;" 376 " mfmsr %[msr];" 377 " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */ 378 " mtmsr %[msr];" /* Turn off DMMU */ 379 " isync;" 380 "1: lvx %[lv], %[src], %[off0];" 381 " stvx %[lv], %[dst], %[off0];" 382 " lvxl %[hv], %[src], %[off16];" 383 " stvxl %[hv], %[dst], %[off16];" 384 " addi %[src], %[src], 32;" 385 " addi %[dst], %[dst], 32;" 386 " cmplw %[dst], %[edst];" 387 " blt+ 1b;" 388 " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */ 389 " sync;" 390 " mtmsr %[msr];" /* Turn on DMMU */ 391 " isync;" 392 :: [msr] "r"(msr), [src] "b"(src), [dst] "b"(dst), 393 [edst] "b"(edst), [off0] "r"(0), [off16] "r"(16), 394 [lv] "n"(LO_VEC), [hv] "n"(HI_VEC)); 395 396 /* 397 * Restore VEC registers (now that we can access the stack again). 398 */ 399 __asm("lvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC)); 400 __asm("lvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC)); 401 402 /* 403 * Restore old MSR (AltiVec OFF). 404 */ 405 __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr)); 406 } 407