1 /* $NetBSD: kern_cpu.c,v 1.98 2025/01/17 04:11:33 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c)2007 YAMAMOTO Takashi, 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58 /* 59 * CPU related routines not shared with rump. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.98 2025/01/17 04:11:33 mrg Exp $"); 64 65 #ifdef _KERNEL_OPT 66 #include "opt_cpu_ucode.h" 67 #include "opt_heartbeat.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/idle.h> 73 #include <sys/sched.h> 74 #include <sys/intr.h> 75 #include <sys/conf.h> 76 #include <sys/cpu.h> 77 #include <sys/cpuio.h> 78 #include <sys/proc.h> 79 #include <sys/percpu.h> 80 #include <sys/kernel.h> 81 #include <sys/kauth.h> 82 #include <sys/xcall.h> 83 #include <sys/pool.h> 84 #include <sys/kmem.h> 85 #include <sys/select.h> 86 #include <sys/namei.h> 87 #include <sys/callout.h> 88 #include <sys/pcu.h> 89 #include <sys/heartbeat.h> 90 91 #include <uvm/uvm_extern.h> 92 93 #include "ioconf.h" 94 95 /* 96 * If the port has stated that cpu_data is the first thing in cpu_info, 97 * verify that the claim is true. This will prevent them from getting out 98 * of sync. 99 */ 100 #ifdef __HAVE_CPU_DATA_FIRST 101 CTASSERT(offsetof(struct cpu_info, ci_data) == 0); 102 #else 103 CTASSERT(offsetof(struct cpu_info, ci_data) != 0); 104 #endif 105 106 int (*compat_cpuctl_ioctl)(struct lwp *, u_long, void *) = (void *)enosys; 107 108 static void cpu_xc_online(struct cpu_info *, void *); 109 static void cpu_xc_offline(struct cpu_info *, void *); 110 111 dev_type_ioctl(cpuctl_ioctl); 112 113 const struct cdevsw cpuctl_cdevsw = { 114 .d_open = nullopen, 115 .d_close = nullclose, 116 .d_read = nullread, 117 .d_write = nullwrite, 118 .d_ioctl = cpuctl_ioctl, 119 .d_stop = nullstop, 120 .d_tty = notty, 121 .d_poll = nopoll, 122 .d_mmap = nommap, 123 .d_kqfilter = nokqfilter, 124 .d_discard = nodiscard, 125 .d_flag = D_OTHER | D_MPSAFE 126 }; 127 128 int 129 mi_cpu_attach(struct cpu_info *ci) 130 { 131 int error; 132 133 KASSERT(maxcpus > 0); 134 135 if ((ci->ci_index = ncpu) >= maxcpus) 136 panic("Too many CPUs. Increase MAXCPUS?"); 137 kcpuset_set(kcpuset_attached, cpu_index(ci)); 138 139 /* 140 * Create a convenience cpuset of just ourselves. 141 */ 142 kcpuset_create(&ci->ci_kcpuset, true); 143 kcpuset_set(ci->ci_kcpuset, cpu_index(ci)); 144 145 TAILQ_INIT(&ci->ci_data.cpu_ld_locks); 146 __cpu_simple_lock_init(&ci->ci_data.cpu_ld_lock); 147 148 /* This is useful for eg, per-cpu evcnt */ 149 snprintf(ci->ci_data.cpu_name, sizeof(ci->ci_data.cpu_name), "cpu%d", 150 cpu_index(ci)); 151 152 if (__predict_false(cpu_infos == NULL)) { 153 size_t ci_bufsize = (maxcpus + 1) * sizeof(struct cpu_info *); 154 cpu_infos = kmem_zalloc(ci_bufsize, KM_SLEEP); 155 } 156 cpu_infos[cpu_index(ci)] = ci; 157 158 sched_cpuattach(ci); 159 160 error = create_idle_lwp(ci); 161 if (error != 0) { 162 /* XXX revert sched_cpuattach */ 163 return error; 164 } 165 166 if (ci == curcpu()) 167 ci->ci_onproc = curlwp; 168 else 169 ci->ci_onproc = ci->ci_data.cpu_idlelwp; 170 171 percpu_init_cpu(ci); 172 softint_init(ci); 173 callout_init_cpu(ci); 174 xc_init_cpu(ci); 175 pool_cache_cpu_init(ci); 176 selsysinit(ci); 177 cache_cpu_init(ci); 178 TAILQ_INIT(&ci->ci_data.cpu_biodone); 179 ncpu++; 180 ncpuonline++; 181 182 return 0; 183 } 184 185 void 186 cpuctlattach(int dummy __unused) 187 { 188 189 KASSERT(cpu_infos != NULL); 190 } 191 192 int 193 cpuctl_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l) 194 { 195 CPU_INFO_ITERATOR cii; 196 cpustate_t *cs; 197 struct cpu_info *ci; 198 int error, i; 199 u_int id; 200 201 error = 0; 202 203 mutex_enter(&cpu_lock); 204 switch (cmd) { 205 case IOC_CPU_SETSTATE: 206 cs = data; 207 error = kauth_authorize_system(l->l_cred, 208 KAUTH_SYSTEM_CPU, KAUTH_REQ_SYSTEM_CPU_SETSTATE, cs, NULL, 209 NULL); 210 if (error != 0) 211 break; 212 if (cs->cs_id >= maxcpus || 213 (ci = cpu_lookup(cs->cs_id)) == NULL) { 214 error = ESRCH; 215 break; 216 } 217 cpu_setintr(ci, cs->cs_intr); /* XXX neglect errors */ 218 error = cpu_setstate(ci, cs->cs_online); 219 break; 220 221 case IOC_CPU_GETSTATE: 222 cs = data; 223 id = cs->cs_id; 224 memset(cs, 0, sizeof(*cs)); 225 cs->cs_id = id; 226 if (cs->cs_id >= maxcpus || 227 (ci = cpu_lookup(id)) == NULL) { 228 error = ESRCH; 229 break; 230 } 231 if ((ci->ci_schedstate.spc_flags & SPCF_OFFLINE) != 0) 232 cs->cs_online = false; 233 else 234 cs->cs_online = true; 235 if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) 236 cs->cs_intr = false; 237 else 238 cs->cs_intr = true; 239 cs->cs_lastmod = (int32_t)ci->ci_schedstate.spc_lastmod; 240 cs->cs_lastmodhi = (int32_t) 241 (ci->ci_schedstate.spc_lastmod >> 32); 242 cs->cs_intrcnt = cpu_intr_count(ci) + 1; 243 cs->cs_hwid = ci->ci_cpuid; 244 break; 245 246 case IOC_CPU_MAPID: 247 i = 0; 248 for (CPU_INFO_FOREACH(cii, ci)) { 249 if (i++ == *(int *)data) 250 break; 251 } 252 if (ci == NULL) 253 error = ESRCH; 254 else 255 *(int *)data = cpu_index(ci); 256 break; 257 258 case IOC_CPU_GETCOUNT: 259 *(int *)data = ncpu; 260 break; 261 262 #ifdef CPU_UCODE 263 case IOC_CPU_UCODE_GET_VERSION: 264 error = cpu_ucode_get_version((struct cpu_ucode_version *)data); 265 break; 266 267 case IOC_CPU_UCODE_APPLY: 268 error = kauth_authorize_machdep(l->l_cred, 269 KAUTH_MACHDEP_CPU_UCODE_APPLY, 270 NULL, NULL, NULL, NULL); 271 if (error != 0) 272 break; 273 error = cpu_ucode_apply((const struct cpu_ucode *)data); 274 break; 275 #endif 276 277 default: 278 error = (*compat_cpuctl_ioctl)(l, cmd, data); 279 break; 280 } 281 mutex_exit(&cpu_lock); 282 283 return error; 284 } 285 286 struct cpu_info * 287 cpu_lookup(u_int idx) 288 { 289 struct cpu_info *ci; 290 291 /* 292 * cpu_infos is a NULL terminated array of MAXCPUS + 1 entries, 293 * so an index of MAXCPUS here is ok. See mi_cpu_attach. 294 */ 295 KASSERT(idx <= maxcpus); 296 297 if (__predict_false(cpu_infos == NULL)) { 298 KASSERT(idx == 0); 299 return curcpu(); 300 } 301 302 ci = cpu_infos[idx]; 303 KASSERT(ci == NULL || cpu_index(ci) == idx); 304 KASSERTMSG(idx < maxcpus || ci == NULL, "idx %d ci %p", idx, ci); 305 306 return ci; 307 } 308 309 static void 310 cpu_xc_offline(struct cpu_info *ci, void *unused) 311 { 312 struct schedstate_percpu *spc, *mspc = NULL; 313 struct cpu_info *target_ci; 314 struct lwp *l; 315 CPU_INFO_ITERATOR cii; 316 int s; 317 318 /* 319 * Thread that made the cross call (separate context) holds 320 * cpu_lock on our behalf. 321 */ 322 spc = &ci->ci_schedstate; 323 s = splsched(); 324 spc->spc_flags |= SPCF_OFFLINE; 325 splx(s); 326 327 /* Take the first available CPU for the migration. */ 328 for (CPU_INFO_FOREACH(cii, target_ci)) { 329 mspc = &target_ci->ci_schedstate; 330 if ((mspc->spc_flags & SPCF_OFFLINE) == 0) 331 break; 332 } 333 KASSERT(target_ci != NULL); 334 335 /* 336 * Migrate all non-bound threads to the other CPU. Note that this 337 * runs from the xcall thread, thus handling of LSONPROC is not needed. 338 */ 339 mutex_enter(&proc_lock); 340 LIST_FOREACH(l, &alllwp, l_list) { 341 struct cpu_info *mci; 342 343 lwp_lock(l); 344 if (l->l_cpu != ci || (l->l_pflag & (LP_BOUND | LP_INTR))) { 345 lwp_unlock(l); 346 continue; 347 } 348 /* Regular case - no affinity. */ 349 if (l->l_affinity == NULL) { 350 lwp_migrate(l, target_ci); 351 continue; 352 } 353 /* Affinity is set, find an online CPU in the set. */ 354 for (CPU_INFO_FOREACH(cii, mci)) { 355 mspc = &mci->ci_schedstate; 356 if ((mspc->spc_flags & SPCF_OFFLINE) == 0 && 357 kcpuset_isset(l->l_affinity, cpu_index(mci))) 358 break; 359 } 360 if (mci == NULL) { 361 lwp_unlock(l); 362 mutex_exit(&proc_lock); 363 goto fail; 364 } 365 lwp_migrate(l, mci); 366 } 367 mutex_exit(&proc_lock); 368 369 #if PCU_UNIT_COUNT > 0 370 pcu_save_all_on_cpu(); 371 #endif 372 373 heartbeat_suspend(); 374 375 #ifdef __HAVE_MD_CPU_OFFLINE 376 cpu_offline_md(); 377 #endif 378 return; 379 fail: 380 /* Just unset the SPCF_OFFLINE flag, caller will check */ 381 s = splsched(); 382 spc->spc_flags &= ~SPCF_OFFLINE; 383 splx(s); 384 } 385 386 static void 387 cpu_xc_online(struct cpu_info *ci, void *unused) 388 { 389 struct schedstate_percpu *spc; 390 int s; 391 392 heartbeat_resume(); 393 394 spc = &ci->ci_schedstate; 395 s = splsched(); 396 spc->spc_flags &= ~SPCF_OFFLINE; 397 splx(s); 398 } 399 400 int 401 cpu_setstate(struct cpu_info *ci, bool online) 402 { 403 struct schedstate_percpu *spc; 404 CPU_INFO_ITERATOR cii; 405 struct cpu_info *ci2; 406 uint64_t where; 407 xcfunc_t func; 408 int nonline; 409 410 spc = &ci->ci_schedstate; 411 412 KASSERT(mutex_owned(&cpu_lock)); 413 414 if (online) { 415 if ((spc->spc_flags & SPCF_OFFLINE) == 0) 416 return 0; 417 func = (xcfunc_t)cpu_xc_online; 418 } else { 419 if ((spc->spc_flags & SPCF_OFFLINE) != 0) 420 return 0; 421 nonline = 0; 422 /* 423 * Ensure that at least one CPU within the processor set 424 * stays online. Revisit this later. 425 */ 426 for (CPU_INFO_FOREACH(cii, ci2)) { 427 if ((ci2->ci_schedstate.spc_flags & SPCF_OFFLINE) != 0) 428 continue; 429 if (ci2->ci_schedstate.spc_psid != spc->spc_psid) 430 continue; 431 nonline++; 432 } 433 if (nonline == 1) 434 return EBUSY; 435 func = (xcfunc_t)cpu_xc_offline; 436 } 437 438 where = xc_unicast(0, func, ci, NULL, ci); 439 xc_wait(where); 440 if (online) { 441 KASSERT((spc->spc_flags & SPCF_OFFLINE) == 0); 442 ncpuonline++; 443 } else { 444 if ((spc->spc_flags & SPCF_OFFLINE) == 0) { 445 /* If was not set offline, then it is busy */ 446 return EBUSY; 447 } 448 ncpuonline--; 449 } 450 451 spc->spc_lastmod = time_second; 452 return 0; 453 } 454 455 bool 456 cpu_is_type(struct cpu_info *ci, int wanted) 457 { 458 459 return (ci->ci_schedstate.spc_flags & wanted) == wanted; 460 } 461 462 bool 463 cpu_is_idle_1stclass(struct cpu_info *ci) 464 { 465 const int wanted = SPCF_IDLE | SPCF_1STCLASS; 466 467 return cpu_is_type(ci, wanted); 468 } 469 470 bool 471 cpu_is_1stclass(struct cpu_info *ci) 472 { 473 const int wanted = SPCF_1STCLASS; 474 475 return cpu_is_type(ci, wanted); 476 } 477 478 bool 479 cpu_is_better(struct cpu_info *ci1, struct cpu_info *ci2) 480 { 481 const int ci1_flags = ci1->ci_schedstate.spc_flags; 482 const int ci2_flags = ci2->ci_schedstate.spc_flags; 483 484 if ((ci1_flags & SPCF_1STCLASS) != 0 && 485 (ci2_flags & SPCF_1STCLASS) == 0) 486 return ci1; 487 488 return ci2; 489 } 490 491 #if defined(__HAVE_INTR_CONTROL) 492 static void 493 cpu_xc_intr(struct cpu_info *ci, void *unused) 494 { 495 struct schedstate_percpu *spc; 496 int s; 497 498 spc = &ci->ci_schedstate; 499 s = splsched(); 500 spc->spc_flags &= ~SPCF_NOINTR; 501 splx(s); 502 } 503 504 static void 505 cpu_xc_nointr(struct cpu_info *ci, void *unused) 506 { 507 struct schedstate_percpu *spc; 508 int s; 509 510 spc = &ci->ci_schedstate; 511 s = splsched(); 512 spc->spc_flags |= SPCF_NOINTR; 513 splx(s); 514 } 515 516 int 517 cpu_setintr(struct cpu_info *ci, bool intr) 518 { 519 struct schedstate_percpu *spc; 520 CPU_INFO_ITERATOR cii; 521 struct cpu_info *ci2; 522 uint64_t where; 523 xcfunc_t func; 524 int nintr; 525 526 spc = &ci->ci_schedstate; 527 528 KASSERT(mutex_owned(&cpu_lock)); 529 530 if (intr) { 531 if ((spc->spc_flags & SPCF_NOINTR) == 0) 532 return 0; 533 func = (xcfunc_t)cpu_xc_intr; 534 } else { 535 if (CPU_IS_PRIMARY(ci)) /* XXX kern/45117 */ 536 return EINVAL; 537 if ((spc->spc_flags & SPCF_NOINTR) != 0) 538 return 0; 539 /* 540 * Ensure that at least one CPU within the system 541 * is handing device interrupts. 542 */ 543 nintr = 0; 544 for (CPU_INFO_FOREACH(cii, ci2)) { 545 if ((ci2->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) 546 continue; 547 if (ci2 == ci) 548 continue; 549 nintr++; 550 } 551 if (nintr == 0) 552 return EBUSY; 553 func = (xcfunc_t)cpu_xc_nointr; 554 } 555 556 where = xc_unicast(0, func, ci, NULL, ci); 557 xc_wait(where); 558 if (intr) { 559 KASSERT((spc->spc_flags & SPCF_NOINTR) == 0); 560 } else if ((spc->spc_flags & SPCF_NOINTR) == 0) { 561 /* If was not set offline, then it is busy */ 562 return EBUSY; 563 } 564 565 /* Direct interrupts away from the CPU and record the change. */ 566 cpu_intr_redistribute(); 567 spc->spc_lastmod = time_second; 568 return 0; 569 } 570 #else /* __HAVE_INTR_CONTROL */ 571 int 572 cpu_setintr(struct cpu_info *ci, bool intr) 573 { 574 575 return EOPNOTSUPP; 576 } 577 578 u_int 579 cpu_intr_count(struct cpu_info *ci) 580 { 581 582 return 0; /* 0 == "don't know" */ 583 } 584 #endif /* __HAVE_INTR_CONTROL */ 585 586 #ifdef CPU_UCODE 587 int 588 cpu_ucode_load(struct cpu_ucode_softc *sc, const char *fwname) 589 { 590 firmware_handle_t fwh; 591 int error; 592 593 if (sc->sc_blob != NULL) { 594 firmware_free(sc->sc_blob, sc->sc_blobsize); 595 sc->sc_blob = NULL; 596 sc->sc_blobsize = 0; 597 } 598 599 error = cpu_ucode_md_open(&fwh, sc->loader_version, fwname); 600 if (error != 0) { 601 #ifdef DEBUG 602 printf("ucode: firmware_open(%s) failed: %i\n", fwname, error); 603 #endif 604 goto err0; 605 } 606 607 sc->sc_blobsize = firmware_get_size(fwh); 608 if (sc->sc_blobsize == 0) { 609 error = EFTYPE; 610 firmware_close(fwh); 611 goto err0; 612 } 613 sc->sc_blob = firmware_malloc(sc->sc_blobsize); 614 if (sc->sc_blob == NULL) { 615 error = ENOMEM; 616 firmware_close(fwh); 617 goto err0; 618 } 619 620 error = firmware_read(fwh, 0, sc->sc_blob, sc->sc_blobsize); 621 firmware_close(fwh); 622 if (error != 0) 623 goto err1; 624 625 return 0; 626 627 err1: 628 firmware_free(sc->sc_blob, sc->sc_blobsize); 629 sc->sc_blob = NULL; 630 sc->sc_blobsize = 0; 631 err0: 632 return error; 633 } 634 #endif 635