1 /*- 2 * Copyright (c) 2015-2016 The FreeBSD Foundation 3 * 4 * This software was developed by Andrew Turner under 5 * sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #ifdef VFP 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/elf.h> 34 #include <sys/eventhandler.h> 35 #include <sys/limits.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/pcpu.h> 39 #include <sys/proc.h> 40 #include <sys/reg.h> 41 #include <sys/smp.h> 42 43 #include <vm/uma.h> 44 45 #include <machine/armreg.h> 46 #include <machine/md_var.h> 47 #include <machine/pcb.h> 48 #include <machine/vfp.h> 49 50 /* Sanity check we can store all the VFP registers */ 51 CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32); 52 53 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 54 "Kernel contexts for VFP state"); 55 56 struct fpu_kern_ctx { 57 struct vfpstate *prev; 58 #define FPU_KERN_CTX_DUMMY 0x01 /* avoided save for the kern thread */ 59 #define FPU_KERN_CTX_INUSE 0x02 60 uint32_t flags; 61 struct vfpstate state; 62 }; 63 64 static uma_zone_t fpu_save_area_zone; 65 static struct vfpstate *fpu_initialstate; 66 67 static u_int sve_max_vector_len; 68 69 static size_t 70 _sve_buf_size(u_int sve_len) 71 { 72 size_t len; 73 74 /* 32 vector registers */ 75 len = (size_t)sve_len * 32; 76 /* 77 * 16 predicate registers and the fault fault register, each 1/8th 78 * the size of a vector register. 79 */ 80 len += ((size_t)sve_len * 17) / 8; 81 /* 82 * FPSR and FPCR 83 */ 84 len += sizeof(uint64_t) * 2; 85 86 return (len); 87 } 88 89 size_t 90 sve_max_buf_size(void) 91 { 92 MPASS(sve_max_vector_len > 0); 93 return (_sve_buf_size(sve_max_vector_len)); 94 } 95 96 size_t 97 sve_buf_size(struct thread *td) 98 { 99 struct pcb *pcb; 100 101 pcb = td->td_pcb; 102 MPASS(pcb->pcb_svesaved != NULL); 103 MPASS(pcb->pcb_sve_len > 0); 104 105 return (_sve_buf_size(pcb->pcb_sve_len)); 106 } 107 108 static void * 109 sve_alloc(void) 110 { 111 void *buf; 112 113 buf = malloc(sve_max_buf_size(), M_FPUKERN_CTX, M_WAITOK | M_ZERO); 114 115 return (buf); 116 } 117 118 static void 119 sve_free(void *buf) 120 { 121 free(buf, M_FPUKERN_CTX); 122 } 123 124 void 125 vfp_enable(void) 126 { 127 uint32_t cpacr; 128 129 cpacr = READ_SPECIALREG(cpacr_el1); 130 cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE; 131 WRITE_SPECIALREG(cpacr_el1, cpacr); 132 isb(); 133 } 134 135 static void 136 sve_enable(void) 137 { 138 uint32_t cpacr; 139 140 cpacr = READ_SPECIALREG(cpacr_el1); 141 /* Enable FP */ 142 cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE; 143 /* Enable SVE */ 144 cpacr = (cpacr & ~CPACR_ZEN_MASK) | CPACR_ZEN_TRAP_NONE; 145 WRITE_SPECIALREG(cpacr_el1, cpacr); 146 isb(); 147 } 148 149 void 150 vfp_disable(void) 151 { 152 uint32_t cpacr; 153 154 cpacr = READ_SPECIALREG(cpacr_el1); 155 /* Disable FP */ 156 cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1; 157 /* Disable SVE */ 158 cpacr = (cpacr & ~CPACR_ZEN_MASK) | CPACR_ZEN_TRAP_ALL1; 159 WRITE_SPECIALREG(cpacr_el1, cpacr); 160 isb(); 161 } 162 163 /* 164 * Called when the thread is dying or when discarding the kernel VFP state. 165 * If the thread was the last to use the VFP unit mark it as unused to tell 166 * the kernel the fp state is unowned. Ensure the VFP unit is off so we get 167 * an exception on the next access. 168 */ 169 void 170 vfp_discard(struct thread *td) 171 { 172 173 #ifdef INVARIANTS 174 if (td != NULL) 175 CRITICAL_ASSERT(td); 176 #endif 177 if (PCPU_GET(fpcurthread) == td) 178 PCPU_SET(fpcurthread, NULL); 179 180 vfp_disable(); 181 } 182 183 void 184 vfp_store(struct vfpstate *state) 185 { 186 __uint128_t *vfp_state; 187 uint64_t fpcr, fpsr; 188 189 vfp_state = state->vfp_regs; 190 __asm __volatile( 191 ".arch_extension fp\n" 192 "mrs %0, fpcr \n" 193 "mrs %1, fpsr \n" 194 "stp q0, q1, [%2, #16 * 0]\n" 195 "stp q2, q3, [%2, #16 * 2]\n" 196 "stp q4, q5, [%2, #16 * 4]\n" 197 "stp q6, q7, [%2, #16 * 6]\n" 198 "stp q8, q9, [%2, #16 * 8]\n" 199 "stp q10, q11, [%2, #16 * 10]\n" 200 "stp q12, q13, [%2, #16 * 12]\n" 201 "stp q14, q15, [%2, #16 * 14]\n" 202 "stp q16, q17, [%2, #16 * 16]\n" 203 "stp q18, q19, [%2, #16 * 18]\n" 204 "stp q20, q21, [%2, #16 * 20]\n" 205 "stp q22, q23, [%2, #16 * 22]\n" 206 "stp q24, q25, [%2, #16 * 24]\n" 207 "stp q26, q27, [%2, #16 * 26]\n" 208 "stp q28, q29, [%2, #16 * 28]\n" 209 "stp q30, q31, [%2, #16 * 30]\n" 210 ".arch_extension nofp\n" 211 : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state)); 212 213 state->vfp_fpcr = fpcr; 214 state->vfp_fpsr = fpsr; 215 } 216 217 void 218 vfp_restore(struct vfpstate *state) 219 { 220 __uint128_t *vfp_state; 221 uint64_t fpcr, fpsr; 222 223 vfp_state = state->vfp_regs; 224 fpcr = state->vfp_fpcr; 225 fpsr = state->vfp_fpsr; 226 227 __asm __volatile( 228 ".arch_extension fp\n" 229 "ldp q0, q1, [%2, #16 * 0]\n" 230 "ldp q2, q3, [%2, #16 * 2]\n" 231 "ldp q4, q5, [%2, #16 * 4]\n" 232 "ldp q6, q7, [%2, #16 * 6]\n" 233 "ldp q8, q9, [%2, #16 * 8]\n" 234 "ldp q10, q11, [%2, #16 * 10]\n" 235 "ldp q12, q13, [%2, #16 * 12]\n" 236 "ldp q14, q15, [%2, #16 * 14]\n" 237 "ldp q16, q17, [%2, #16 * 16]\n" 238 "ldp q18, q19, [%2, #16 * 18]\n" 239 "ldp q20, q21, [%2, #16 * 20]\n" 240 "ldp q22, q23, [%2, #16 * 22]\n" 241 "ldp q24, q25, [%2, #16 * 24]\n" 242 "ldp q26, q27, [%2, #16 * 26]\n" 243 "ldp q28, q29, [%2, #16 * 28]\n" 244 "ldp q30, q31, [%2, #16 * 30]\n" 245 "msr fpcr, %0 \n" 246 "msr fpsr, %1 \n" 247 ".arch_extension nofp\n" 248 : : "r"(fpcr), "r"(fpsr), "r"(vfp_state)); 249 } 250 251 static void 252 sve_store(void *state, u_int sve_len) 253 { 254 vm_offset_t f_start, p_start, z_start; 255 uint64_t fpcr, fpsr; 256 257 /* 258 * Calculate the start of each register groups. There are three 259 * groups depending on size, with the First Fault Register (FFR) 260 * stored with the predicate registers as we use one of them to 261 * temporarily hold it. 262 * 263 * +-------------------------+-------------------+ 264 * | Contents | Register size | 265 * z_start -> +-------------------------+-------------------+ 266 * | | | 267 * | 32 Z regs | sve_len | 268 * | | | 269 * p_start -> +-------------------------+-------------------+ 270 * | | | 271 * | 16 Predicate registers | 1/8 size of Z reg | 272 * | 1 First Fault register | | 273 * | | | 274 * f_start -> +-------------------------+-------------------+ 275 * | | | 276 * | FPSR/FPCR | 32 bit | 277 * | | | 278 * +-------------------------+-------------------+ 279 */ 280 z_start = (vm_offset_t)state; 281 p_start = z_start + sve_len * 32; 282 f_start = p_start + (sve_len / 8) * 17; 283 284 __asm __volatile( 285 ".arch_extension sve \n" 286 "str z0, [%0, #0, MUL VL] \n" 287 "str z1, [%0, #1, MUL VL] \n" 288 "str z2, [%0, #2, MUL VL] \n" 289 "str z3, [%0, #3, MUL VL] \n" 290 "str z4, [%0, #4, MUL VL] \n" 291 "str z5, [%0, #5, MUL VL] \n" 292 "str z6, [%0, #6, MUL VL] \n" 293 "str z7, [%0, #7, MUL VL] \n" 294 "str z8, [%0, #8, MUL VL] \n" 295 "str z9, [%0, #9, MUL VL] \n" 296 "str z10, [%0, #10, MUL VL] \n" 297 "str z11, [%0, #11, MUL VL] \n" 298 "str z12, [%0, #12, MUL VL] \n" 299 "str z13, [%0, #13, MUL VL] \n" 300 "str z14, [%0, #14, MUL VL] \n" 301 "str z15, [%0, #15, MUL VL] \n" 302 "str z16, [%0, #16, MUL VL] \n" 303 "str z17, [%0, #17, MUL VL] \n" 304 "str z18, [%0, #18, MUL VL] \n" 305 "str z19, [%0, #19, MUL VL] \n" 306 "str z20, [%0, #20, MUL VL] \n" 307 "str z21, [%0, #21, MUL VL] \n" 308 "str z22, [%0, #22, MUL VL] \n" 309 "str z23, [%0, #23, MUL VL] \n" 310 "str z24, [%0, #24, MUL VL] \n" 311 "str z25, [%0, #25, MUL VL] \n" 312 "str z26, [%0, #26, MUL VL] \n" 313 "str z27, [%0, #27, MUL VL] \n" 314 "str z28, [%0, #28, MUL VL] \n" 315 "str z29, [%0, #29, MUL VL] \n" 316 "str z30, [%0, #30, MUL VL] \n" 317 "str z31, [%0, #31, MUL VL] \n" 318 /* Store the predicate registers */ 319 "str p0, [%1, #0, MUL VL] \n" 320 "str p1, [%1, #1, MUL VL] \n" 321 "str p2, [%1, #2, MUL VL] \n" 322 "str p3, [%1, #3, MUL VL] \n" 323 "str p4, [%1, #4, MUL VL] \n" 324 "str p5, [%1, #5, MUL VL] \n" 325 "str p6, [%1, #6, MUL VL] \n" 326 "str p7, [%1, #7, MUL VL] \n" 327 "str p8, [%1, #8, MUL VL] \n" 328 "str p9, [%1, #9, MUL VL] \n" 329 "str p10, [%1, #10, MUL VL] \n" 330 "str p11, [%1, #11, MUL VL] \n" 331 "str p12, [%1, #12, MUL VL] \n" 332 "str p13, [%1, #13, MUL VL] \n" 333 "str p14, [%1, #14, MUL VL] \n" 334 "str p15, [%1, #15, MUL VL] \n" 335 ".arch_extension nosve \n" 336 : : "r"(z_start), "r"(p_start)); 337 338 /* Save the FFR if needed */ 339 /* TODO: Skip if in SME streaming mode (when supported) */ 340 __asm __volatile( 341 ".arch_extension sve \n" 342 "rdffr p0.b \n" 343 "str p0, [%0, #16, MUL VL] \n" 344 /* 345 * Load the old p0 value to ensure it is consistent if we enable 346 * without calling sve_restore, e.g. switch to a kernel thread and 347 * back. 348 */ 349 "ldr p0, [%0, #0, MUL VL] \n" 350 ".arch_extension nosve \n" 351 : : "r"(p_start)); 352 353 __asm __volatile( 354 ".arch_extension fp \n" 355 "mrs %0, fpsr \n" 356 "mrs %1, fpcr \n" 357 "stp %w0, %w1, [%2] \n" 358 ".arch_extension nofp \n" 359 : "=&r"(fpsr), "=&r"(fpcr) : "r"(f_start)); 360 } 361 362 static void 363 sve_restore(void *state, u_int sve_len) 364 { 365 vm_offset_t f_start, p_start, z_start; 366 uint64_t fpcr, fpsr; 367 368 /* See sve_store for the layout of the state buffer */ 369 z_start = (vm_offset_t)state; 370 p_start = z_start + sve_len * 32; 371 f_start = p_start + (sve_len / 8) * 17; 372 373 __asm __volatile( 374 ".arch_extension sve \n" 375 "ldr p0, [%0, #16, MUL VL] \n" 376 "wrffr p0.b \n" 377 ".arch_extension nosve \n" 378 : : "r"(p_start)); 379 380 __asm __volatile( 381 ".arch_extension sve \n" 382 "ldr z0, [%0, #0, MUL VL] \n" 383 "ldr z1, [%0, #1, MUL VL] \n" 384 "ldr z2, [%0, #2, MUL VL] \n" 385 "ldr z3, [%0, #3, MUL VL] \n" 386 "ldr z4, [%0, #4, MUL VL] \n" 387 "ldr z5, [%0, #5, MUL VL] \n" 388 "ldr z6, [%0, #6, MUL VL] \n" 389 "ldr z7, [%0, #7, MUL VL] \n" 390 "ldr z8, [%0, #8, MUL VL] \n" 391 "ldr z9, [%0, #9, MUL VL] \n" 392 "ldr z10, [%0, #10, MUL VL] \n" 393 "ldr z11, [%0, #11, MUL VL] \n" 394 "ldr z12, [%0, #12, MUL VL] \n" 395 "ldr z13, [%0, #13, MUL VL] \n" 396 "ldr z14, [%0, #14, MUL VL] \n" 397 "ldr z15, [%0, #15, MUL VL] \n" 398 "ldr z16, [%0, #16, MUL VL] \n" 399 "ldr z17, [%0, #17, MUL VL] \n" 400 "ldr z18, [%0, #18, MUL VL] \n" 401 "ldr z19, [%0, #19, MUL VL] \n" 402 "ldr z20, [%0, #20, MUL VL] \n" 403 "ldr z21, [%0, #21, MUL VL] \n" 404 "ldr z22, [%0, #22, MUL VL] \n" 405 "ldr z23, [%0, #23, MUL VL] \n" 406 "ldr z24, [%0, #24, MUL VL] \n" 407 "ldr z25, [%0, #25, MUL VL] \n" 408 "ldr z26, [%0, #26, MUL VL] \n" 409 "ldr z27, [%0, #27, MUL VL] \n" 410 "ldr z28, [%0, #28, MUL VL] \n" 411 "ldr z29, [%0, #29, MUL VL] \n" 412 "ldr z30, [%0, #30, MUL VL] \n" 413 "ldr z31, [%0, #31, MUL VL] \n" 414 /* Store the predicate registers */ 415 "ldr p0, [%1, #0, MUL VL] \n" 416 "ldr p1, [%1, #1, MUL VL] \n" 417 "ldr p2, [%1, #2, MUL VL] \n" 418 "ldr p3, [%1, #3, MUL VL] \n" 419 "ldr p4, [%1, #4, MUL VL] \n" 420 "ldr p5, [%1, #5, MUL VL] \n" 421 "ldr p6, [%1, #6, MUL VL] \n" 422 "ldr p7, [%1, #7, MUL VL] \n" 423 "ldr p8, [%1, #8, MUL VL] \n" 424 "ldr p9, [%1, #9, MUL VL] \n" 425 "ldr p10, [%1, #10, MUL VL] \n" 426 "ldr p11, [%1, #11, MUL VL] \n" 427 "ldr p12, [%1, #12, MUL VL] \n" 428 "ldr p13, [%1, #13, MUL VL] \n" 429 "ldr p14, [%1, #14, MUL VL] \n" 430 "ldr p15, [%1, #15, MUL VL] \n" 431 ".arch_extension nosve \n" 432 : : "r"(z_start), "r"(p_start)); 433 434 __asm __volatile( 435 ".arch_extension fp \n" 436 "ldp %w0, %w1, [%2] \n" 437 "msr fpsr, %0 \n" 438 "msr fpcr, %1 \n" 439 ".arch_extension nofp \n" 440 : "=&r"(fpsr), "=&r"(fpcr) : "r"(f_start)); 441 } 442 443 /* 444 * Sync the VFP registers to the SVE register state, e.g. in signal return 445 * when userspace may have changed the vfp register values and expect them 446 * to be used when the signal handler returns. 447 */ 448 void 449 vfp_to_sve_sync(struct thread *td) 450 { 451 struct pcb *pcb; 452 uint32_t *fpxr; 453 454 pcb = td->td_pcb; 455 if (pcb->pcb_svesaved == NULL) 456 return; 457 458 MPASS(pcb->pcb_fpusaved != NULL); 459 460 /* Copy the VFP registers to the SVE region */ 461 for (int i = 0; i < nitems(pcb->pcb_fpusaved->vfp_regs); i++) { 462 __uint128_t *sve_reg; 463 464 sve_reg = (__uint128_t *)((uintptr_t)pcb->pcb_svesaved + 465 i * pcb->pcb_sve_len); 466 *sve_reg = pcb->pcb_fpusaved->vfp_regs[i]; 467 } 468 469 fpxr = (uint32_t *)((uintptr_t)pcb->pcb_svesaved + 470 (32 * pcb->pcb_sve_len) + (17 * pcb->pcb_sve_len / 8)); 471 fpxr[0] = pcb->pcb_fpusaved->vfp_fpsr; 472 fpxr[1] = pcb->pcb_fpusaved->vfp_fpcr; 473 } 474 475 /* 476 * Sync the SVE registers to the VFP register state. 477 */ 478 void 479 sve_to_vfp_sync(struct thread *td) 480 { 481 struct pcb *pcb; 482 uint32_t *fpxr; 483 484 pcb = td->td_pcb; 485 if (pcb->pcb_svesaved == NULL) 486 return; 487 488 MPASS(pcb->pcb_fpusaved == &pcb->pcb_fpustate); 489 490 /* Copy the SVE registers to the VFP saved state */ 491 for (int i = 0; i < nitems(pcb->pcb_fpusaved->vfp_regs); i++) { 492 __uint128_t *sve_reg; 493 494 sve_reg = (__uint128_t *)((uintptr_t)pcb->pcb_svesaved + 495 i * pcb->pcb_sve_len); 496 pcb->pcb_fpusaved->vfp_regs[i] = *sve_reg; 497 } 498 499 fpxr = (uint32_t *)((uintptr_t)pcb->pcb_svesaved + 500 (32 * pcb->pcb_sve_len) + (17 * pcb->pcb_sve_len / 8)); 501 pcb->pcb_fpusaved->vfp_fpsr = fpxr[0]; 502 pcb->pcb_fpusaved->vfp_fpcr = fpxr[1]; 503 } 504 505 static void 506 vfp_save_state_common(struct thread *td, struct pcb *pcb, bool full_save) 507 { 508 uint32_t cpacr; 509 bool save_sve; 510 511 save_sve = false; 512 513 critical_enter(); 514 /* 515 * Only store the registers if the VFP is enabled, 516 * i.e. return if we are trapping on FP access. 517 */ 518 cpacr = READ_SPECIALREG(cpacr_el1); 519 if ((cpacr & CPACR_FPEN_MASK) != CPACR_FPEN_TRAP_NONE) 520 goto done; 521 522 KASSERT(PCPU_GET(fpcurthread) == td, 523 ("Storing an invalid VFP state")); 524 525 /* 526 * Also save the SVE state. As SVE depends on the VFP being 527 * enabled we can rely on only needing to check this when 528 * the VFP unit has been enabled. 529 */ 530 if ((cpacr & CPACR_ZEN_MASK) == CPACR_ZEN_TRAP_NONE) { 531 /* If SVE is enabled it should be valid */ 532 MPASS((pcb->pcb_fpflags & PCB_FP_SVEVALID) != 0); 533 534 /* 535 * If we are switching while in a system call skip saving 536 * SVE registers. The ABI allows us to drop them over any 537 * system calls, however doing so is expensive in SVE 538 * heavy userspace code. This would require us to disable 539 * SVE for all system calls and trap the next use of them. 540 * As an optimisation only disable SVE on context switch. 541 */ 542 if (td->td_frame == NULL || 543 (ESR_ELx_EXCEPTION(td->td_frame->tf_esr) != EXCP_SVC64 && 544 td->td_sa.code != (u_int)-1)) 545 save_sve = true; 546 } 547 548 if (save_sve) { 549 KASSERT(pcb->pcb_svesaved != NULL, 550 ("Storing to a NULL SVE state")); 551 sve_store(pcb->pcb_svesaved, pcb->pcb_sve_len); 552 if (full_save) 553 sve_to_vfp_sync(td); 554 } else { 555 pcb->pcb_fpflags &= ~PCB_FP_SVEVALID; 556 vfp_store(pcb->pcb_fpusaved); 557 } 558 dsb(ish); 559 vfp_disable(); 560 561 done: 562 critical_exit(); 563 } 564 565 void 566 vfp_save_state(struct thread *td, struct pcb *pcb) 567 { 568 KASSERT(td != NULL, ("NULL vfp thread")); 569 KASSERT(pcb != NULL, ("NULL vfp pcb")); 570 KASSERT(td->td_pcb == pcb, ("Invalid vfp pcb")); 571 572 vfp_save_state_common(td, pcb, true); 573 } 574 575 void 576 vfp_save_state_savectx(struct pcb *pcb) 577 { 578 /* 579 * savectx() will be called on panic with dumppcb as an argument, 580 * dumppcb either has no pcb_fpusaved set or it was previously set 581 * to its own fpu state. 582 * 583 * In both cases we can set it here to the pcb fpu state. 584 */ 585 MPASS(pcb->pcb_fpusaved == NULL || 586 pcb->pcb_fpusaved == &pcb->pcb_fpustate); 587 pcb->pcb_fpusaved = &pcb->pcb_fpustate; 588 589 vfp_save_state_common(curthread, pcb, true); 590 } 591 592 void 593 vfp_save_state_switch(struct thread *td) 594 { 595 KASSERT(td != NULL, ("NULL vfp thread")); 596 597 vfp_save_state_common(td, td->td_pcb, false); 598 } 599 600 /* 601 * Update the VFP state for a forked process or new thread. The PCB will 602 * have been copied from the old thread. 603 */ 604 void 605 vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork) 606 { 607 struct pcb *newpcb, *oldpcb; 608 609 newpcb = newtd->td_pcb; 610 oldpcb = oldtd->td_pcb; 611 612 /* Kernel threads start with clean VFP */ 613 if ((oldtd->td_pflags & TDP_KTHREAD) != 0) { 614 newpcb->pcb_fpflags &= 615 ~(PCB_FP_STARTED | PCB_FP_SVEVALID | PCB_FP_KERN | 616 PCB_FP_NOSAVE); 617 } else { 618 MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0); 619 620 /* 621 * The only SVE register state to be guaranteed to be saved 622 * a system call is the lower bits of the Z registers as 623 * these are aliased with the existing FP registers. Because 624 * we can only create a new thread or fork through a system 625 * call it is safe to drop the SVE state in the new thread. 626 */ 627 newpcb->pcb_fpflags &= ~PCB_FP_SVEVALID; 628 if (!fork) { 629 newpcb->pcb_fpflags &= ~PCB_FP_STARTED; 630 } 631 } 632 633 newpcb->pcb_svesaved = NULL; 634 if (oldpcb->pcb_svesaved == NULL) 635 newpcb->pcb_sve_len = sve_max_vector_len; 636 else 637 KASSERT(newpcb->pcb_sve_len == oldpcb->pcb_sve_len, 638 ("%s: pcb sve vector length differs: %x != %x", __func__, 639 newpcb->pcb_sve_len, oldpcb->pcb_sve_len)); 640 641 newpcb->pcb_fpusaved = &newpcb->pcb_fpustate; 642 newpcb->pcb_vfpcpu = UINT_MAX; 643 } 644 645 /* 646 * Reset the FP state to avoid leaking state from the parent process across 647 * execve() (and to ensure that we get a consistent floating point environment 648 * in every new process). 649 */ 650 void 651 vfp_reset_state(struct thread *td, struct pcb *pcb) 652 { 653 /* Discard the threads VFP state before resetting it */ 654 critical_enter(); 655 vfp_discard(td); 656 critical_exit(); 657 658 /* 659 * Clear the thread state. The VFP is disabled and is not the current 660 * VFP thread so we won't change any of these on context switch. 661 */ 662 bzero(&pcb->pcb_fpustate.vfp_regs, sizeof(pcb->pcb_fpustate.vfp_regs)); 663 KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, 664 ("pcb_fpusaved should point to pcb_fpustate.")); 665 pcb->pcb_fpustate.vfp_fpcr = VFPCR_INIT; 666 pcb->pcb_fpustate.vfp_fpsr = 0; 667 /* XXX: Memory leak when using SVE between fork & exec? */ 668 pcb->pcb_svesaved = NULL; 669 pcb->pcb_vfpcpu = UINT_MAX; 670 pcb->pcb_fpflags = 0; 671 } 672 673 static void 674 vfp_restore_state_common(struct thread *td, int flags) 675 { 676 struct pcb *curpcb; 677 u_int cpu; 678 bool restore_sve; 679 680 KASSERT(td == curthread, ("%s: Called with non-current thread", 681 __func__)); 682 683 critical_enter(); 684 685 cpu = PCPU_GET(cpuid); 686 curpcb = td->td_pcb; 687 688 /* 689 * If SVE has been used and the base VFP state is in use then 690 * restore the SVE registers. A non-base VFP state should only 691 * be used by the kernel and SVE should onlu be used by userspace. 692 */ 693 restore_sve = false; 694 if ((curpcb->pcb_fpflags & PCB_FP_SVEVALID) != 0 && 695 curpcb->pcb_fpusaved == &curpcb->pcb_fpustate) { 696 MPASS(curpcb->pcb_svesaved != NULL); 697 /* SVE shouldn't be enabled in the kernel */ 698 MPASS((flags & PCB_FP_KERN) == 0); 699 restore_sve = true; 700 } 701 702 if (restore_sve) { 703 MPASS((curpcb->pcb_fpflags & PCB_FP_SVEVALID) != 0); 704 sve_enable(); 705 } else { 706 curpcb->pcb_fpflags |= PCB_FP_STARTED; 707 vfp_enable(); 708 } 709 710 /* 711 * If the previous thread on this cpu to use the VFP was not the 712 * current thread, or the current thread last used it on a different 713 * cpu we need to restore the old state. 714 */ 715 if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) { 716 /* 717 * The VFP registers are the lower 128 bits of the SVE 718 * registers. Use the SVE store state if it was previously 719 * enabled. 720 */ 721 if (restore_sve) { 722 MPASS(td->td_pcb->pcb_svesaved != NULL); 723 sve_restore(td->td_pcb->pcb_svesaved, 724 td->td_pcb->pcb_sve_len); 725 } else { 726 vfp_restore(td->td_pcb->pcb_fpusaved); 727 } 728 PCPU_SET(fpcurthread, td); 729 curpcb->pcb_vfpcpu = cpu; 730 } 731 732 critical_exit(); 733 } 734 735 void 736 vfp_restore_state(void) 737 { 738 struct thread *td; 739 740 td = curthread; 741 vfp_restore_state_common(td, td->td_pcb->pcb_fpflags); 742 } 743 744 bool 745 sve_restore_state(struct thread *td) 746 { 747 struct pcb *curpcb; 748 void *svesaved; 749 uint64_t cpacr; 750 751 KASSERT(td == curthread, ("%s: Called with non-current thread", 752 __func__)); 753 754 curpcb = td->td_pcb; 755 756 /* The SVE state should alias the base VFP state */ 757 MPASS(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate); 758 759 /* SVE not enabled, tell the caller to raise a fault */ 760 if (curpcb->pcb_sve_len == 0) { 761 /* 762 * The init pcb is created before we read the vector length. 763 * Set it to the default length. 764 */ 765 if (sve_max_vector_len == 0) 766 return (false); 767 768 MPASS(curpcb->pcb_svesaved == NULL); 769 curpcb->pcb_sve_len = sve_max_vector_len; 770 } 771 772 if (curpcb->pcb_svesaved == NULL) { 773 /* SVE should be disabled so will be invalid */ 774 MPASS((curpcb->pcb_fpflags & PCB_FP_SVEVALID) == 0); 775 776 /* 777 * Allocate the SVE buffer of this thread. 778 * Enable interrupts so the allocation can sleep 779 */ 780 svesaved = sve_alloc(); 781 782 critical_enter(); 783 784 /* Restore the VFP state if needed */ 785 cpacr = READ_SPECIALREG(cpacr_el1); 786 if ((cpacr & CPACR_FPEN_MASK) != CPACR_FPEN_TRAP_NONE) { 787 vfp_restore_state_common(td, curpcb->pcb_fpflags); 788 } 789 790 /* 791 * Set the flags after enabling the VFP as the SVE saved 792 * state will be invalid. 793 */ 794 curpcb->pcb_svesaved = svesaved; 795 curpcb->pcb_fpflags |= PCB_FP_SVEVALID; 796 sve_enable(); 797 798 critical_exit(); 799 } else { 800 vfp_restore_state_common(td, curpcb->pcb_fpflags); 801 802 /* Enable SVE if it wasn't previously enabled */ 803 if ((curpcb->pcb_fpflags & PCB_FP_SVEVALID) == 0) { 804 critical_enter(); 805 sve_enable(); 806 curpcb->pcb_fpflags |= PCB_FP_SVEVALID; 807 critical_exit(); 808 } 809 } 810 811 return (true); 812 } 813 814 void 815 vfp_init_secondary(void) 816 { 817 uint64_t pfr; 818 819 /* Check if there is a vfp unit present */ 820 pfr = READ_SPECIALREG(id_aa64pfr0_el1); 821 if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE) 822 return; 823 824 /* Disable to be enabled when it's used */ 825 vfp_disable(); 826 } 827 828 static void 829 vfp_init(const void *dummy __unused) 830 { 831 uint64_t pfr; 832 833 /* Check if there is a vfp unit present */ 834 pfr = READ_SPECIALREG(id_aa64pfr0_el1); 835 if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE) 836 return; 837 838 fpu_save_area_zone = uma_zcreate("VFP_save_area", 839 sizeof(struct vfpstate), NULL, NULL, NULL, NULL, 840 _Alignof(struct vfpstate) - 1, 0); 841 fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); 842 843 /* Ensure the VFP is enabled before accessing it in vfp_store */ 844 vfp_enable(); 845 vfp_store(fpu_initialstate); 846 847 /* Disable to be enabled when it's used */ 848 vfp_disable(); 849 850 /* Zero the VFP registers but keep fpcr and fpsr */ 851 bzero(fpu_initialstate->vfp_regs, sizeof(fpu_initialstate->vfp_regs)); 852 853 thread0.td_pcb->pcb_fpusaved->vfp_fpcr = VFPCR_INIT; 854 } 855 856 SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL); 857 858 static void 859 sve_thread_dtor(void *arg __unused, struct thread *td) 860 { 861 sve_free(td->td_pcb->pcb_svesaved); 862 } 863 864 static void 865 sve_pcpu_read(void *arg) 866 { 867 u_int *len; 868 uint64_t vl; 869 870 len = arg; 871 872 /* Enable SVE to read zcr_el1 and VFP for rdvl */ 873 sve_enable(); 874 875 /* Set the longest vector length */ 876 WRITE_SPECIALREG(ZCR_EL1_REG, ZCR_LEN_MASK); 877 isb(); 878 879 /* Read the real vector length */ 880 __asm __volatile( 881 ".arch_extension sve \n" 882 "rdvl %0, #1 \n" 883 ".arch_extension nosve \n" 884 : "=&r"(vl)); 885 886 vfp_disable(); 887 888 len[PCPU_GET(cpuid)] = vl; 889 } 890 891 static void 892 sve_init(const void *dummy __unused) 893 { 894 u_int *len_list; 895 uint64_t reg; 896 int i; 897 898 if (!get_kernel_reg(ID_AA64PFR0_EL1, ®)) 899 return; 900 901 if (ID_AA64PFR0_SVE_VAL(reg) == ID_AA64PFR0_SVE_NONE) 902 return; 903 904 len_list = malloc(sizeof(*len_list) * (mp_maxid + 1), M_TEMP, 905 M_WAITOK | M_ZERO); 906 smp_rendezvous(NULL, sve_pcpu_read, NULL, len_list); 907 908 sve_max_vector_len = ZCR_LEN_BYTES(ZCR_LEN_MASK); 909 CPU_FOREACH(i) { 910 if (bootverbose) 911 printf("CPU%d SVE vector length: %u\n", i, len_list[i]); 912 sve_max_vector_len = MIN(sve_max_vector_len, len_list[i]); 913 } 914 free(len_list, M_TEMP); 915 916 if (bootverbose) 917 printf("SVE with %u byte vectors\n", sve_max_vector_len); 918 919 if (sve_max_vector_len > 0) { 920 EVENTHANDLER_REGISTER(thread_dtor, sve_thread_dtor, NULL, 921 EVENTHANDLER_PRI_ANY); 922 } 923 } 924 SYSINIT(sve, SI_SUB_SMP, SI_ORDER_ANY, sve_init, NULL); 925 926 static bool 927 get_arm64_sve(struct regset *rs, struct thread *td, void *buf, 928 size_t *sizep) 929 { 930 struct svereg_header *header; 931 struct pcb *pcb; 932 size_t buf_size; 933 uint16_t sve_flags; 934 935 pcb = td->td_pcb; 936 937 /* If there is no SVE support in HW then we don't support NT_ARM_SVE */ 938 if (pcb->pcb_sve_len == 0) 939 return (false); 940 941 sve_flags = 0; 942 if ((pcb->pcb_fpflags & PCB_FP_SVEVALID) == 0) { 943 /* If SVE hasn't been used yet provide the VFP registers */ 944 buf_size = sizeof(struct fpreg); 945 sve_flags |= SVEREG_FLAG_FP; 946 } else { 947 /* We have SVE registers */ 948 buf_size = sve_buf_size(td); 949 sve_flags |= SVEREG_FLAG_SVE; 950 KASSERT(pcb->pcb_svesaved != NULL, ("%s: no saved sve", 951 __func__)); 952 } 953 954 if (buf != NULL) { 955 KASSERT(*sizep == sizeof(struct svereg_header) + buf_size, 956 ("%s: invalid size", __func__)); 957 958 if (td == curthread && (pcb->pcb_fpflags & PCB_FP_STARTED) != 0) 959 vfp_save_state(td, pcb); 960 961 header = buf; 962 memset(header, 0, sizeof(*header)); 963 964 header->sve_size = sizeof(struct svereg_header) + buf_size; 965 header->sve_maxsize = sizeof(struct svereg_header) + 966 sve_max_buf_size(); 967 header->sve_vec_len = pcb->pcb_sve_len; 968 header->sve_max_vec_len = sve_max_vector_len; 969 header->sve_flags = sve_flags; 970 971 if ((sve_flags & SVEREG_FLAG_REGS_MASK) == SVEREG_FLAG_FP) { 972 struct fpreg *fpregs; 973 974 fpregs = (void *)(&header[1]); 975 memcpy(fpregs->fp_q, pcb->pcb_fpustate.vfp_regs, 976 sizeof(fpregs->fp_q)); 977 fpregs->fp_cr = pcb->pcb_fpustate.vfp_fpcr; 978 fpregs->fp_sr = pcb->pcb_fpustate.vfp_fpsr; 979 } else { 980 memcpy((void *)(&header[1]), pcb->pcb_svesaved, 981 buf_size); 982 } 983 } 984 *sizep = sizeof(struct svereg_header) + buf_size; 985 986 return (true); 987 } 988 989 static bool 990 set_arm64_sve(struct regset *rs, struct thread *td, void *buf, size_t size) 991 { 992 struct svereg_header *header; 993 struct pcb *pcb; 994 size_t buf_size; 995 uint16_t sve_flags; 996 997 pcb = td->td_pcb; 998 999 /* If there is no SVE support in HW then we don't support NT_ARM_SVE */ 1000 if (pcb->pcb_sve_len == 0) 1001 return (false); 1002 1003 sve_flags = 0; 1004 if ((pcb->pcb_fpflags & PCB_FP_SVEVALID) == 0) { 1005 /* 1006 * If the SVE state is invalid it provide the FP registers. 1007 * This may be beause it hasn't been used, or it has but 1008 * was switched out in a system call. 1009 */ 1010 buf_size = sizeof(struct fpreg); 1011 sve_flags |= SVEREG_FLAG_FP; 1012 } else { 1013 /* We have SVE registers */ 1014 MPASS(pcb->pcb_svesaved != NULL); 1015 buf_size = sve_buf_size(td); 1016 sve_flags |= SVEREG_FLAG_SVE; 1017 KASSERT(pcb->pcb_svesaved != NULL, ("%s: no saved sve", 1018 __func__)); 1019 } 1020 1021 if (size != sizeof(struct svereg_header) + buf_size) 1022 return (false); 1023 1024 header = buf; 1025 /* Sanity checks on the header */ 1026 if (header->sve_size != sizeof(struct svereg_header) + buf_size) 1027 return (false); 1028 1029 if (header->sve_maxsize != sizeof(struct svereg_header) + 1030 sve_max_buf_size()) 1031 return (false); 1032 1033 if (header->sve_vec_len != pcb->pcb_sve_len) 1034 return (false); 1035 1036 if (header->sve_max_vec_len != sve_max_vector_len) 1037 return (false); 1038 1039 if (header->sve_flags != sve_flags) 1040 return (false); 1041 1042 if ((sve_flags & SVEREG_FLAG_REGS_MASK) == SVEREG_FLAG_FP) { 1043 struct fpreg *fpregs; 1044 1045 fpregs = (void *)(&header[1]); 1046 memcpy(pcb->pcb_fpustate.vfp_regs, fpregs->fp_q, 1047 sizeof(fpregs->fp_q)); 1048 pcb->pcb_fpustate.vfp_fpcr = fpregs->fp_cr; 1049 pcb->pcb_fpustate.vfp_fpsr = fpregs->fp_sr; 1050 } else { 1051 /* Restore the SVE registers */ 1052 memcpy(pcb->pcb_svesaved, (void *)(&header[1]), buf_size); 1053 } 1054 1055 return (true); 1056 } 1057 1058 static struct regset regset_arm64_sve = { 1059 .note = NT_ARM_SVE, 1060 .get = get_arm64_sve, 1061 .set = set_arm64_sve, 1062 }; 1063 ELF_REGSET(regset_arm64_sve); 1064 1065 struct fpu_kern_ctx * 1066 fpu_kern_alloc_ctx(u_int flags) 1067 { 1068 struct fpu_kern_ctx *res; 1069 size_t sz; 1070 1071 sz = sizeof(struct fpu_kern_ctx); 1072 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? 1073 M_NOWAIT : M_WAITOK) | M_ZERO); 1074 return (res); 1075 } 1076 1077 void 1078 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 1079 { 1080 1081 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); 1082 /* XXXAndrew clear the memory ? */ 1083 free(ctx, M_FPUKERN_CTX); 1084 } 1085 1086 void 1087 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 1088 { 1089 struct pcb *pcb; 1090 1091 pcb = td->td_pcb; 1092 KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, 1093 ("ctx is required when !FPU_KERN_NOCTX")); 1094 KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, 1095 ("using inuse ctx")); 1096 KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0, 1097 ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state")); 1098 1099 if ((flags & FPU_KERN_NOCTX) != 0) { 1100 critical_enter(); 1101 if (curthread == PCPU_GET(fpcurthread)) { 1102 vfp_save_state(curthread, pcb); 1103 } 1104 PCPU_SET(fpcurthread, NULL); 1105 1106 vfp_enable(); 1107 pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE | 1108 PCB_FP_STARTED; 1109 return; 1110 } 1111 1112 if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { 1113 ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; 1114 return; 1115 } 1116 /* 1117 * Check either we are already using the VFP in the kernel, or 1118 * the saved state points to the default user space. 1119 */ 1120 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 || 1121 pcb->pcb_fpusaved == &pcb->pcb_fpustate, 1122 ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate)); 1123 ctx->flags = FPU_KERN_CTX_INUSE; 1124 vfp_save_state(curthread, pcb); 1125 ctx->prev = pcb->pcb_fpusaved; 1126 pcb->pcb_fpusaved = &ctx->state; 1127 pcb->pcb_fpflags |= PCB_FP_KERN; 1128 pcb->pcb_fpflags &= ~PCB_FP_STARTED; 1129 1130 return; 1131 } 1132 1133 int 1134 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 1135 { 1136 struct pcb *pcb; 1137 1138 pcb = td->td_pcb; 1139 1140 if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) { 1141 KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); 1142 KASSERT(PCPU_GET(fpcurthread) == NULL, 1143 ("non-NULL fpcurthread for PCB_FP_NOSAVE")); 1144 CRITICAL_ASSERT(td); 1145 1146 vfp_disable(); 1147 pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED); 1148 critical_exit(); 1149 } else { 1150 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, 1151 ("FPU context not inuse")); 1152 ctx->flags &= ~FPU_KERN_CTX_INUSE; 1153 1154 if (is_fpu_kern_thread(0) && 1155 (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) 1156 return (0); 1157 KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); 1158 critical_enter(); 1159 vfp_discard(td); 1160 critical_exit(); 1161 pcb->pcb_fpflags &= ~PCB_FP_STARTED; 1162 pcb->pcb_fpusaved = ctx->prev; 1163 } 1164 1165 if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) { 1166 pcb->pcb_fpflags &= ~PCB_FP_KERN; 1167 } else { 1168 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0, 1169 ("unpaired fpu_kern_leave")); 1170 } 1171 1172 return (0); 1173 } 1174 1175 int 1176 fpu_kern_thread(u_int flags __unused) 1177 { 1178 struct pcb *pcb = curthread->td_pcb; 1179 1180 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 1181 ("Only kthread may use fpu_kern_thread")); 1182 KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, 1183 ("Mangled pcb_fpusaved")); 1184 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0, 1185 ("Thread already setup for the VFP")); 1186 pcb->pcb_fpflags |= PCB_FP_KERN; 1187 return (0); 1188 } 1189 1190 int 1191 is_fpu_kern_thread(u_int flags __unused) 1192 { 1193 struct pcb *curpcb; 1194 1195 if ((curthread->td_pflags & TDP_KTHREAD) == 0) 1196 return (0); 1197 curpcb = curthread->td_pcb; 1198 return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0); 1199 } 1200 1201 /* 1202 * FPU save area alloc/free/init utility routines 1203 */ 1204 struct vfpstate * 1205 fpu_save_area_alloc(void) 1206 { 1207 return (uma_zalloc(fpu_save_area_zone, M_WAITOK)); 1208 } 1209 1210 void 1211 fpu_save_area_free(struct vfpstate *fsa) 1212 { 1213 uma_zfree(fpu_save_area_zone, fsa); 1214 } 1215 1216 void 1217 fpu_save_area_reset(struct vfpstate *fsa) 1218 { 1219 memcpy(fsa, fpu_initialstate, sizeof(*fsa)); 1220 } 1221 #endif 1222