1 /* $OpenBSD: kern_sched.c,v 1.86 2023/08/14 08:33:24 mpi Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 20 #include <sys/sched.h> 21 #include <sys/proc.h> 22 #include <sys/kthread.h> 23 #include <sys/systm.h> 24 #include <sys/clockintr.h> 25 #include <sys/resourcevar.h> 26 #include <sys/task.h> 27 #include <sys/time.h> 28 #include <sys/smr.h> 29 #include <sys/tracepoint.h> 30 31 #include <uvm/uvm_extern.h> 32 33 void sched_kthreads_create(void *); 34 35 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); 36 struct proc *sched_steal_proc(struct cpu_info *); 37 38 /* 39 * To help choosing which cpu should run which process we keep track 40 * of cpus which are currently idle and which cpus have processes 41 * queued. 42 */ 43 struct cpuset sched_idle_cpus; 44 struct cpuset sched_queued_cpus; 45 struct cpuset sched_all_cpus; 46 47 /* 48 * Some general scheduler counters. 49 */ 50 uint64_t sched_nmigrations; /* Cpu migration counter */ 51 uint64_t sched_nomigrations; /* Cpu no migration counter */ 52 uint64_t sched_noidle; /* Times we didn't pick the idle task */ 53 uint64_t sched_stolen; /* Times we stole proc from other cpus */ 54 uint64_t sched_choose; /* Times we chose a cpu */ 55 uint64_t sched_wasidle; /* Times we came out of idle */ 56 57 int sched_smt; 58 59 /* 60 * A few notes about cpu_switchto that is implemented in MD code. 61 * 62 * cpu_switchto takes two arguments, the old proc and the proc 63 * it should switch to. The new proc will never be NULL, so we always have 64 * a saved state that we need to switch to. The old proc however can 65 * be NULL if the process is exiting. NULL for the old proc simply 66 * means "don't bother saving old state". 67 * 68 * cpu_switchto is supposed to atomically load the new state of the process 69 * including the pcb, pmap and setting curproc, the p_cpu pointer in the 70 * proc and p_stat to SONPROC. Atomically with respect to interrupts, other 71 * cpus in the system must not depend on this state being consistent. 72 * Therefore no locking is necessary in cpu_switchto other than blocking 73 * interrupts during the context switch. 74 */ 75 76 /* 77 * sched_init_cpu is called from main() for the boot cpu, then it's the 78 * responsibility of the MD code to call it for all other cpus. 79 */ 80 void 81 sched_init_cpu(struct cpu_info *ci) 82 { 83 struct schedstate_percpu *spc = &ci->ci_schedstate; 84 int i; 85 86 for (i = 0; i < SCHED_NQS; i++) 87 TAILQ_INIT(&spc->spc_qs[i]); 88 89 spc->spc_idleproc = NULL; 90 91 if (spc->spc_itimer == NULL) { 92 spc->spc_itimer = clockintr_establish(&ci->ci_queue, 93 itimer_update); 94 if (spc->spc_itimer == NULL) { 95 panic("%s: clockintr_establish itimer_update", 96 __func__); 97 } 98 } 99 if (spc->spc_profclock == NULL) { 100 spc->spc_profclock = clockintr_establish(&ci->ci_queue, 101 profclock); 102 if (spc->spc_profclock == NULL) 103 panic("%s: clockintr_establish profclock", __func__); 104 } 105 if (spc->spc_roundrobin == NULL) { 106 spc->spc_roundrobin = clockintr_establish(&ci->ci_queue, 107 roundrobin); 108 if (spc->spc_roundrobin == NULL) 109 panic("%s: clockintr_establish roundrobin", __func__); 110 } 111 112 kthread_create_deferred(sched_kthreads_create, ci); 113 114 LIST_INIT(&spc->spc_deadproc); 115 SIMPLEQ_INIT(&spc->spc_deferred); 116 117 /* 118 * Slight hack here until the cpuset code handles cpu_info 119 * structures. 120 */ 121 cpuset_init_cpu(ci); 122 123 #ifdef __HAVE_CPU_TOPOLOGY 124 if (!sched_smt && ci->ci_smt_id > 0) 125 return; 126 #endif 127 cpuset_add(&sched_all_cpus, ci); 128 } 129 130 void 131 sched_kthreads_create(void *v) 132 { 133 struct cpu_info *ci = v; 134 struct schedstate_percpu *spc = &ci->ci_schedstate; 135 static int num; 136 137 if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE| 138 FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL, 139 &spc->spc_idleproc)) 140 panic("fork idle"); 141 142 /* Name it as specified. */ 143 snprintf(spc->spc_idleproc->p_p->ps_comm, 144 sizeof(spc->spc_idleproc->p_p->ps_comm), 145 "idle%d", num); 146 147 num++; 148 } 149 150 void 151 sched_idle(void *v) 152 { 153 struct schedstate_percpu *spc; 154 struct proc *p = curproc; 155 struct cpu_info *ci = v; 156 int s; 157 158 KERNEL_UNLOCK(); 159 160 spc = &ci->ci_schedstate; 161 162 /* 163 * First time we enter here, we're not supposed to idle, 164 * just go away for a while. 165 */ 166 SCHED_LOCK(s); 167 cpuset_add(&sched_idle_cpus, ci); 168 p->p_stat = SSLEEP; 169 p->p_cpu = ci; 170 atomic_setbits_int(&p->p_flag, P_CPUPEG); 171 mi_switch(); 172 cpuset_del(&sched_idle_cpus, ci); 173 SCHED_UNLOCK(s); 174 175 KASSERT(ci == curcpu()); 176 KASSERT(curproc == spc->spc_idleproc); 177 178 while (1) { 179 while (!cpu_is_idle(curcpu())) { 180 struct proc *dead; 181 182 SCHED_LOCK(s); 183 p->p_stat = SSLEEP; 184 mi_switch(); 185 SCHED_UNLOCK(s); 186 187 while ((dead = LIST_FIRST(&spc->spc_deadproc))) { 188 LIST_REMOVE(dead, p_hash); 189 exit2(dead); 190 } 191 } 192 193 splassert(IPL_NONE); 194 195 smr_idle(); 196 197 cpuset_add(&sched_idle_cpus, ci); 198 cpu_idle_enter(); 199 while (spc->spc_whichqs == 0) { 200 #ifdef MULTIPROCESSOR 201 if (spc->spc_schedflags & SPCF_SHOULDHALT && 202 (spc->spc_schedflags & SPCF_HALTED) == 0) { 203 cpuset_del(&sched_idle_cpus, ci); 204 SCHED_LOCK(s); 205 atomic_setbits_int(&spc->spc_schedflags, 206 spc->spc_whichqs ? 0 : SPCF_HALTED); 207 SCHED_UNLOCK(s); 208 wakeup(spc); 209 } 210 #endif 211 cpu_idle_cycle(); 212 } 213 cpu_idle_leave(); 214 cpuset_del(&sched_idle_cpus, ci); 215 } 216 } 217 218 /* 219 * To free our address space we have to jump through a few hoops. 220 * The freeing is done by the reaper, but until we have one reaper 221 * per cpu, we have no way of putting this proc on the deadproc list 222 * and waking up the reaper without risking having our address space and 223 * stack torn from under us before we manage to switch to another proc. 224 * Therefore we have a per-cpu list of dead processes where we put this 225 * proc and have idle clean up that list and move it to the reaper list. 226 * All this will be unnecessary once we can bind the reaper this cpu 227 * and not risk having it switch to another in case it sleeps. 228 */ 229 void 230 sched_exit(struct proc *p) 231 { 232 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 233 struct timespec ts; 234 struct proc *idle; 235 int s; 236 237 nanouptime(&ts); 238 timespecsub(&ts, &spc->spc_runtime, &ts); 239 timespecadd(&p->p_rtime, &ts, &p->p_rtime); 240 241 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { 242 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); 243 clockintr_cancel(spc->spc_itimer); 244 } 245 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { 246 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); 247 clockintr_cancel(spc->spc_profclock); 248 } 249 250 LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash); 251 252 #ifdef MULTIPROCESSOR 253 /* This process no longer needs to hold the kernel lock. */ 254 KERNEL_ASSERT_LOCKED(); 255 __mp_release_all(&kernel_lock); 256 #endif 257 258 SCHED_LOCK(s); 259 idle = spc->spc_idleproc; 260 idle->p_stat = SRUN; 261 cpu_switchto(NULL, idle); 262 panic("cpu_switchto returned"); 263 } 264 265 /* 266 * Run queue management. 267 */ 268 void 269 sched_init_runqueues(void) 270 { 271 } 272 273 void 274 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio) 275 { 276 struct schedstate_percpu *spc; 277 int queue = prio >> 2; 278 279 if (ci == NULL) 280 ci = sched_choosecpu(p); 281 282 KASSERT(ci != NULL); 283 SCHED_ASSERT_LOCKED(); 284 285 p->p_cpu = ci; 286 p->p_stat = SRUN; 287 p->p_runpri = prio; 288 289 spc = &p->p_cpu->ci_schedstate; 290 spc->spc_nrun++; 291 TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET, 292 p->p_p->ps_pid); 293 294 TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq); 295 spc->spc_whichqs |= (1U << queue); 296 cpuset_add(&sched_queued_cpus, p->p_cpu); 297 298 if (cpuset_isset(&sched_idle_cpus, p->p_cpu)) 299 cpu_unidle(p->p_cpu); 300 301 if (prio < spc->spc_curpriority) 302 need_resched(ci); 303 } 304 305 void 306 remrunqueue(struct proc *p) 307 { 308 struct schedstate_percpu *spc; 309 int queue = p->p_runpri >> 2; 310 311 SCHED_ASSERT_LOCKED(); 312 spc = &p->p_cpu->ci_schedstate; 313 spc->spc_nrun--; 314 TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET, 315 p->p_p->ps_pid); 316 317 TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq); 318 if (TAILQ_EMPTY(&spc->spc_qs[queue])) { 319 spc->spc_whichqs &= ~(1U << queue); 320 if (spc->spc_whichqs == 0) 321 cpuset_del(&sched_queued_cpus, p->p_cpu); 322 } 323 } 324 325 struct proc * 326 sched_chooseproc(void) 327 { 328 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 329 struct proc *p; 330 int queue; 331 332 SCHED_ASSERT_LOCKED(); 333 334 #ifdef MULTIPROCESSOR 335 if (spc->spc_schedflags & SPCF_SHOULDHALT) { 336 if (spc->spc_whichqs) { 337 for (queue = 0; queue < SCHED_NQS; queue++) { 338 while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) { 339 remrunqueue(p); 340 setrunqueue(NULL, p, p->p_runpri); 341 if (p->p_cpu == curcpu()) { 342 KASSERT(p->p_flag & P_CPUPEG); 343 goto again; 344 } 345 } 346 } 347 } 348 p = spc->spc_idleproc; 349 KASSERT(p); 350 KASSERT(p->p_wchan == NULL); 351 p->p_stat = SRUN; 352 return (p); 353 } 354 #endif 355 356 again: 357 if (spc->spc_whichqs) { 358 queue = ffs(spc->spc_whichqs) - 1; 359 p = TAILQ_FIRST(&spc->spc_qs[queue]); 360 remrunqueue(p); 361 sched_noidle++; 362 if (p->p_stat != SRUN) 363 panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat); 364 } else if ((p = sched_steal_proc(curcpu())) == NULL) { 365 p = spc->spc_idleproc; 366 if (p == NULL) { 367 int s; 368 /* 369 * We get here if someone decides to switch during 370 * boot before forking kthreads, bleh. 371 * This is kind of like a stupid idle loop. 372 */ 373 #ifdef MULTIPROCESSOR 374 __mp_unlock(&sched_lock); 375 #endif 376 spl0(); 377 delay(10); 378 SCHED_LOCK(s); 379 goto again; 380 } 381 KASSERT(p); 382 p->p_stat = SRUN; 383 } 384 385 KASSERT(p->p_wchan == NULL); 386 return (p); 387 } 388 389 struct cpu_info * 390 sched_choosecpu_fork(struct proc *parent, int flags) 391 { 392 #ifdef MULTIPROCESSOR 393 struct cpu_info *choice = NULL; 394 int run, best_run = INT_MAX; 395 struct cpu_info *ci; 396 struct cpuset set; 397 398 #if 0 399 /* 400 * XXX 401 * Don't do this until we have a painless way to move the cpu in exec. 402 * Preferably when nuking the old pmap and getting a new one on a 403 * new cpu. 404 */ 405 /* 406 * PPWAIT forks are simple. We know that the parent will not 407 * run until we exec and choose another cpu, so we just steal its 408 * cpu. 409 */ 410 if (flags & FORK_PPWAIT) 411 return (parent->p_cpu); 412 #endif 413 414 /* 415 * Look at all cpus that are currently idle and have nothing queued. 416 * If there are none, pick the one with least queued procs first, 417 * then the one with lowest load average. 418 */ 419 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 420 cpuset_intersection(&set, &set, &sched_all_cpus); 421 if (cpuset_first(&set) == NULL) 422 cpuset_copy(&set, &sched_all_cpus); 423 424 while ((ci = cpuset_first(&set)) != NULL) { 425 cpuset_del(&set, ci); 426 427 run = ci->ci_schedstate.spc_nrun; 428 429 if (choice == NULL || run < best_run) { 430 choice = ci; 431 best_run = run; 432 } 433 } 434 435 return (choice); 436 #else 437 return (curcpu()); 438 #endif 439 } 440 441 struct cpu_info * 442 sched_choosecpu(struct proc *p) 443 { 444 #ifdef MULTIPROCESSOR 445 struct cpu_info *choice = NULL; 446 int last_cost = INT_MAX; 447 struct cpu_info *ci; 448 struct cpuset set; 449 450 /* 451 * If pegged to a cpu, don't allow it to move. 452 */ 453 if (p->p_flag & P_CPUPEG) 454 return (p->p_cpu); 455 456 sched_choose++; 457 458 /* 459 * Look at all cpus that are currently idle and have nothing queued. 460 * If there are none, pick the cheapest of those. 461 * (idle + queued could mean that the cpu is handling an interrupt 462 * at this moment and haven't had time to leave idle yet). 463 */ 464 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 465 cpuset_intersection(&set, &set, &sched_all_cpus); 466 467 /* 468 * First, just check if our current cpu is in that set, if it is, 469 * this is simple. 470 * Also, our cpu might not be idle, but if it's the current cpu 471 * and it has nothing else queued and we're curproc, take it. 472 */ 473 if (cpuset_isset(&set, p->p_cpu) || 474 (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 && 475 (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 && 476 curproc == p)) { 477 sched_wasidle++; 478 return (p->p_cpu); 479 } 480 481 if (cpuset_first(&set) == NULL) 482 cpuset_copy(&set, &sched_all_cpus); 483 484 while ((ci = cpuset_first(&set)) != NULL) { 485 int cost = sched_proc_to_cpu_cost(ci, p); 486 487 if (choice == NULL || cost < last_cost) { 488 choice = ci; 489 last_cost = cost; 490 } 491 cpuset_del(&set, ci); 492 } 493 494 if (p->p_cpu != choice) 495 sched_nmigrations++; 496 else 497 sched_nomigrations++; 498 499 return (choice); 500 #else 501 return (curcpu()); 502 #endif 503 } 504 505 /* 506 * Attempt to steal a proc from some cpu. 507 */ 508 struct proc * 509 sched_steal_proc(struct cpu_info *self) 510 { 511 struct proc *best = NULL; 512 #ifdef MULTIPROCESSOR 513 struct schedstate_percpu *spc; 514 int bestcost = INT_MAX; 515 struct cpu_info *ci; 516 struct cpuset set; 517 518 KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0); 519 520 /* Don't steal if we don't want to schedule processes in this CPU. */ 521 if (!cpuset_isset(&sched_all_cpus, self)) 522 return (NULL); 523 524 cpuset_copy(&set, &sched_queued_cpus); 525 526 while ((ci = cpuset_first(&set)) != NULL) { 527 struct proc *p; 528 int queue; 529 int cost; 530 531 cpuset_del(&set, ci); 532 533 spc = &ci->ci_schedstate; 534 535 queue = ffs(spc->spc_whichqs) - 1; 536 TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) { 537 if (p->p_flag & P_CPUPEG) 538 continue; 539 540 cost = sched_proc_to_cpu_cost(self, p); 541 542 if (best == NULL || cost < bestcost) { 543 best = p; 544 bestcost = cost; 545 } 546 } 547 } 548 if (best == NULL) 549 return (NULL); 550 551 TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET, 552 best->p_p->ps_pid, CPU_INFO_UNIT(self)); 553 554 remrunqueue(best); 555 best->p_cpu = self; 556 557 sched_stolen++; 558 #endif 559 return (best); 560 } 561 562 #ifdef MULTIPROCESSOR 563 /* 564 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). 565 */ 566 static int 567 log2(unsigned int i) 568 { 569 int ret = 0; 570 571 while (i >>= 1) 572 ret++; 573 574 return (ret); 575 } 576 577 /* 578 * Calculate the cost of moving the proc to this cpu. 579 * 580 * What we want is some guesstimate of how much "performance" it will 581 * cost us to move the proc here. Not just for caches and TLBs and NUMA 582 * memory, but also for the proc itself. A highly loaded cpu might not 583 * be the best candidate for this proc since it won't get run. 584 * 585 * Just total guesstimates for now. 586 */ 587 588 int sched_cost_load = 1; 589 int sched_cost_priority = 1; 590 int sched_cost_runnable = 3; 591 int sched_cost_resident = 1; 592 #endif 593 594 int 595 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p) 596 { 597 int cost = 0; 598 #ifdef MULTIPROCESSOR 599 struct schedstate_percpu *spc; 600 int l2resident = 0; 601 602 spc = &ci->ci_schedstate; 603 604 /* 605 * First, account for the priority of the proc we want to move. 606 * More willing to move, the lower the priority of the destination 607 * and the higher the priority of the proc. 608 */ 609 if (!cpuset_isset(&sched_idle_cpus, ci)) { 610 cost += (p->p_usrpri - spc->spc_curpriority) * 611 sched_cost_priority; 612 cost += sched_cost_runnable; 613 } 614 if (cpuset_isset(&sched_queued_cpus, ci)) 615 cost += spc->spc_nrun * sched_cost_runnable; 616 617 /* 618 * Try to avoid the primary cpu as it handles hardware interrupts. 619 * 620 * XXX Needs to be revisited when we distribute interrupts 621 * over cpus. 622 */ 623 if (CPU_IS_PRIMARY(ci)) 624 cost += sched_cost_runnable; 625 626 /* 627 * If the proc is on this cpu already, lower the cost by how much 628 * it has been running and an estimate of its footprint. 629 */ 630 if (p->p_cpu == ci && p->p_slptime == 0) { 631 l2resident = 632 log2(pmap_resident_count(p->p_vmspace->vm_map.pmap)); 633 cost -= l2resident * sched_cost_resident; 634 } 635 #endif 636 return (cost); 637 } 638 639 /* 640 * Peg a proc to a cpu. 641 */ 642 void 643 sched_peg_curproc(struct cpu_info *ci) 644 { 645 struct proc *p = curproc; 646 int s; 647 648 SCHED_LOCK(s); 649 atomic_setbits_int(&p->p_flag, P_CPUPEG); 650 setrunqueue(ci, p, p->p_usrpri); 651 p->p_ru.ru_nvcsw++; 652 mi_switch(); 653 SCHED_UNLOCK(s); 654 } 655 656 #ifdef MULTIPROCESSOR 657 658 void 659 sched_start_secondary_cpus(void) 660 { 661 CPU_INFO_ITERATOR cii; 662 struct cpu_info *ci; 663 664 CPU_INFO_FOREACH(cii, ci) { 665 struct schedstate_percpu *spc = &ci->ci_schedstate; 666 667 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 668 continue; 669 atomic_clearbits_int(&spc->spc_schedflags, 670 SPCF_SHOULDHALT | SPCF_HALTED); 671 #ifdef __HAVE_CPU_TOPOLOGY 672 if (!sched_smt && ci->ci_smt_id > 0) 673 continue; 674 #endif 675 cpuset_add(&sched_all_cpus, ci); 676 } 677 } 678 679 void 680 sched_stop_secondary_cpus(void) 681 { 682 CPU_INFO_ITERATOR cii; 683 struct cpu_info *ci; 684 685 /* 686 * Make sure we stop the secondary CPUs. 687 */ 688 CPU_INFO_FOREACH(cii, ci) { 689 struct schedstate_percpu *spc = &ci->ci_schedstate; 690 691 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 692 continue; 693 cpuset_del(&sched_all_cpus, ci); 694 atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT); 695 } 696 CPU_INFO_FOREACH(cii, ci) { 697 struct schedstate_percpu *spc = &ci->ci_schedstate; 698 699 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 700 continue; 701 while ((spc->spc_schedflags & SPCF_HALTED) == 0) { 702 sleep_setup(spc, PZERO, "schedstate"); 703 sleep_finish(0, 704 (spc->spc_schedflags & SPCF_HALTED) == 0); 705 } 706 } 707 } 708 709 struct sched_barrier_state { 710 struct cpu_info *ci; 711 struct cond cond; 712 }; 713 714 void 715 sched_barrier_task(void *arg) 716 { 717 struct sched_barrier_state *sb = arg; 718 struct cpu_info *ci = sb->ci; 719 720 sched_peg_curproc(ci); 721 cond_signal(&sb->cond); 722 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 723 } 724 725 void 726 sched_barrier(struct cpu_info *ci) 727 { 728 struct sched_barrier_state sb; 729 struct task task; 730 CPU_INFO_ITERATOR cii; 731 732 if (ci == NULL) { 733 CPU_INFO_FOREACH(cii, ci) { 734 if (CPU_IS_PRIMARY(ci)) 735 break; 736 } 737 } 738 KASSERT(ci != NULL); 739 740 if (ci == curcpu()) 741 return; 742 743 sb.ci = ci; 744 cond_init(&sb.cond); 745 task_set(&task, sched_barrier_task, &sb); 746 747 task_add(systqmp, &task); 748 cond_wait(&sb.cond, "sbar"); 749 } 750 751 #else 752 753 void 754 sched_barrier(struct cpu_info *ci) 755 { 756 } 757 758 #endif 759 760 /* 761 * Functions to manipulate cpu sets. 762 */ 763 struct cpu_info *cpuset_infos[MAXCPUS]; 764 static struct cpuset cpuset_all; 765 766 void 767 cpuset_init_cpu(struct cpu_info *ci) 768 { 769 cpuset_add(&cpuset_all, ci); 770 cpuset_infos[CPU_INFO_UNIT(ci)] = ci; 771 } 772 773 void 774 cpuset_clear(struct cpuset *cs) 775 { 776 memset(cs, 0, sizeof(*cs)); 777 } 778 779 void 780 cpuset_add(struct cpuset *cs, struct cpu_info *ci) 781 { 782 unsigned int num = CPU_INFO_UNIT(ci); 783 atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 784 } 785 786 void 787 cpuset_del(struct cpuset *cs, struct cpu_info *ci) 788 { 789 unsigned int num = CPU_INFO_UNIT(ci); 790 atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 791 } 792 793 int 794 cpuset_isset(struct cpuset *cs, struct cpu_info *ci) 795 { 796 unsigned int num = CPU_INFO_UNIT(ci); 797 return (cs->cs_set[num/32] & (1U << (num % 32))); 798 } 799 800 void 801 cpuset_add_all(struct cpuset *cs) 802 { 803 cpuset_copy(cs, &cpuset_all); 804 } 805 806 void 807 cpuset_copy(struct cpuset *to, struct cpuset *from) 808 { 809 memcpy(to, from, sizeof(*to)); 810 } 811 812 struct cpu_info * 813 cpuset_first(struct cpuset *cs) 814 { 815 int i; 816 817 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 818 if (cs->cs_set[i]) 819 return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]); 820 821 return (NULL); 822 } 823 824 void 825 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b) 826 { 827 int i; 828 829 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 830 to->cs_set[i] = a->cs_set[i] | b->cs_set[i]; 831 } 832 833 void 834 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b) 835 { 836 int i; 837 838 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 839 to->cs_set[i] = a->cs_set[i] & b->cs_set[i]; 840 } 841 842 void 843 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b) 844 { 845 int i; 846 847 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 848 to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i]; 849 } 850 851 int 852 cpuset_cardinality(struct cpuset *cs) 853 { 854 int cardinality, i, n; 855 856 cardinality = 0; 857 858 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 859 for (n = cs->cs_set[i]; n != 0; n &= n - 1) 860 cardinality++; 861 862 return (cardinality); 863 } 864 865 int 866 sysctl_hwncpuonline(void) 867 { 868 return cpuset_cardinality(&sched_all_cpus); 869 } 870 871 int 872 cpu_is_online(struct cpu_info *ci) 873 { 874 return cpuset_isset(&sched_all_cpus, ci); 875 } 876 877 #ifdef __HAVE_CPU_TOPOLOGY 878 879 #include <sys/sysctl.h> 880 881 int 882 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 883 { 884 CPU_INFO_ITERATOR cii; 885 struct cpu_info *ci; 886 int err, newsmt; 887 888 newsmt = sched_smt; 889 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); 890 if (err) 891 return err; 892 if (newsmt == sched_smt) 893 return 0; 894 895 sched_smt = newsmt; 896 CPU_INFO_FOREACH(cii, ci) { 897 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 898 continue; 899 if (ci->ci_smt_id == 0) 900 continue; 901 if (sched_smt) 902 cpuset_add(&sched_all_cpus, ci); 903 else 904 cpuset_del(&sched_all_cpus, ci); 905 } 906 907 return 0; 908 } 909 910 #endif 911