1 /* $OpenBSD: kern_sched.c,v 1.93 2023/10/24 13:20:11 claudio Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 20 #include <sys/sched.h> 21 #include <sys/proc.h> 22 #include <sys/kthread.h> 23 #include <sys/systm.h> 24 #include <sys/clockintr.h> 25 #include <sys/resourcevar.h> 26 #include <sys/task.h> 27 #include <sys/time.h> 28 #include <sys/smr.h> 29 #include <sys/tracepoint.h> 30 31 #include <uvm/uvm_extern.h> 32 33 void sched_kthreads_create(void *); 34 35 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); 36 struct proc *sched_steal_proc(struct cpu_info *); 37 38 /* 39 * To help choosing which cpu should run which process we keep track 40 * of cpus which are currently idle and which cpus have processes 41 * queued. 42 */ 43 struct cpuset sched_idle_cpus; 44 struct cpuset sched_queued_cpus; 45 struct cpuset sched_all_cpus; 46 47 /* 48 * Some general scheduler counters. 49 */ 50 uint64_t sched_nmigrations; /* Cpu migration counter */ 51 uint64_t sched_nomigrations; /* Cpu no migration counter */ 52 uint64_t sched_noidle; /* Times we didn't pick the idle task */ 53 uint64_t sched_stolen; /* Times we stole proc from other cpus */ 54 uint64_t sched_choose; /* Times we chose a cpu */ 55 uint64_t sched_wasidle; /* Times we came out of idle */ 56 57 int sched_smt; 58 59 /* 60 * A few notes about cpu_switchto that is implemented in MD code. 61 * 62 * cpu_switchto takes two arguments, the old proc and the proc 63 * it should switch to. The new proc will never be NULL, so we always have 64 * a saved state that we need to switch to. The old proc however can 65 * be NULL if the process is exiting. NULL for the old proc simply 66 * means "don't bother saving old state". 67 * 68 * cpu_switchto is supposed to atomically load the new state of the process 69 * including the pcb, pmap and setting curproc, the p_cpu pointer in the 70 * proc and p_stat to SONPROC. Atomically with respect to interrupts, other 71 * cpus in the system must not depend on this state being consistent. 72 * Therefore no locking is necessary in cpu_switchto other than blocking 73 * interrupts during the context switch. 74 */ 75 76 /* 77 * sched_init_cpu is called from main() for the boot cpu, then it's the 78 * responsibility of the MD code to call it for all other cpus. 79 */ 80 void 81 sched_init_cpu(struct cpu_info *ci) 82 { 83 struct schedstate_percpu *spc = &ci->ci_schedstate; 84 int i; 85 86 for (i = 0; i < SCHED_NQS; i++) 87 TAILQ_INIT(&spc->spc_qs[i]); 88 89 spc->spc_idleproc = NULL; 90 91 spc->spc_itimer = clockintr_establish(ci, itimer_update, NULL); 92 if (spc->spc_itimer == NULL) 93 panic("%s: clockintr_establish itimer_update", __func__); 94 spc->spc_profclock = clockintr_establish(ci, profclock, NULL); 95 if (spc->spc_profclock == NULL) 96 panic("%s: clockintr_establish profclock", __func__); 97 spc->spc_roundrobin = clockintr_establish(ci, roundrobin, NULL); 98 if (spc->spc_roundrobin == NULL) 99 panic("%s: clockintr_establish roundrobin", __func__); 100 spc->spc_statclock = clockintr_establish(ci, statclock, NULL); 101 if (spc->spc_statclock == NULL) 102 panic("%s: clockintr_establish statclock", __func__); 103 104 kthread_create_deferred(sched_kthreads_create, ci); 105 106 LIST_INIT(&spc->spc_deadproc); 107 SIMPLEQ_INIT(&spc->spc_deferred); 108 109 /* 110 * Slight hack here until the cpuset code handles cpu_info 111 * structures. 112 */ 113 cpuset_init_cpu(ci); 114 115 #ifdef __HAVE_CPU_TOPOLOGY 116 if (!sched_smt && ci->ci_smt_id > 0) 117 return; 118 #endif 119 cpuset_add(&sched_all_cpus, ci); 120 } 121 122 void 123 sched_kthreads_create(void *v) 124 { 125 struct cpu_info *ci = v; 126 struct schedstate_percpu *spc = &ci->ci_schedstate; 127 static int num; 128 129 if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE| 130 FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL, 131 &spc->spc_idleproc)) 132 panic("fork idle"); 133 134 /* Name it as specified. */ 135 snprintf(spc->spc_idleproc->p_p->ps_comm, 136 sizeof(spc->spc_idleproc->p_p->ps_comm), 137 "idle%d", num); 138 139 num++; 140 } 141 142 void 143 sched_idle(void *v) 144 { 145 struct schedstate_percpu *spc; 146 struct proc *p = curproc; 147 struct cpu_info *ci = v; 148 int s; 149 150 KERNEL_UNLOCK(); 151 152 spc = &ci->ci_schedstate; 153 154 /* 155 * First time we enter here, we're not supposed to idle, 156 * just go away for a while. 157 */ 158 SCHED_LOCK(s); 159 cpuset_add(&sched_idle_cpus, ci); 160 p->p_stat = SSLEEP; 161 p->p_cpu = ci; 162 atomic_setbits_int(&p->p_flag, P_CPUPEG); 163 mi_switch(); 164 cpuset_del(&sched_idle_cpus, ci); 165 SCHED_UNLOCK(s); 166 167 KASSERT(ci == curcpu()); 168 KASSERT(curproc == spc->spc_idleproc); 169 170 while (1) { 171 while (!cpu_is_idle(curcpu())) { 172 struct proc *dead; 173 174 SCHED_LOCK(s); 175 p->p_stat = SSLEEP; 176 mi_switch(); 177 SCHED_UNLOCK(s); 178 179 while ((dead = LIST_FIRST(&spc->spc_deadproc))) { 180 LIST_REMOVE(dead, p_hash); 181 exit2(dead); 182 } 183 } 184 185 splassert(IPL_NONE); 186 187 smr_idle(); 188 189 cpuset_add(&sched_idle_cpus, ci); 190 cpu_idle_enter(); 191 while (spc->spc_whichqs == 0) { 192 #ifdef MULTIPROCESSOR 193 if (spc->spc_schedflags & SPCF_SHOULDHALT && 194 (spc->spc_schedflags & SPCF_HALTED) == 0) { 195 cpuset_del(&sched_idle_cpus, ci); 196 SCHED_LOCK(s); 197 atomic_setbits_int(&spc->spc_schedflags, 198 spc->spc_whichqs ? 0 : SPCF_HALTED); 199 SCHED_UNLOCK(s); 200 wakeup(spc); 201 } 202 #endif 203 cpu_idle_cycle(); 204 } 205 cpu_idle_leave(); 206 cpuset_del(&sched_idle_cpus, ci); 207 } 208 } 209 210 /* 211 * To free our address space we have to jump through a few hoops. 212 * The freeing is done by the reaper, but until we have one reaper 213 * per cpu, we have no way of putting this proc on the deadproc list 214 * and waking up the reaper without risking having our address space and 215 * stack torn from under us before we manage to switch to another proc. 216 * Therefore we have a per-cpu list of dead processes where we put this 217 * proc and have idle clean up that list and move it to the reaper list. 218 * All this will be unnecessary once we can bind the reaper this cpu 219 * and not risk having it switch to another in case it sleeps. 220 */ 221 void 222 sched_exit(struct proc *p) 223 { 224 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 225 226 LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash); 227 228 KERNEL_ASSERT_LOCKED(); 229 sched_toidle(); 230 } 231 232 void 233 sched_toidle(void) 234 { 235 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 236 struct proc *idle; 237 int s; 238 239 #ifdef MULTIPROCESSOR 240 /* This process no longer needs to hold the kernel lock. */ 241 if (_kernel_lock_held()) 242 __mp_release_all(&kernel_lock); 243 #endif 244 245 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { 246 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); 247 clockintr_cancel(spc->spc_itimer); 248 } 249 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { 250 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); 251 clockintr_cancel(spc->spc_profclock); 252 } 253 254 atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR); 255 256 SCHED_LOCK(s); 257 258 idle = spc->spc_idleproc; 259 idle->p_stat = SRUN; 260 261 uvmexp.swtch++; 262 TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET, 263 idle->p_p->ps_pid); 264 cpu_switchto(NULL, idle); 265 panic("cpu_switchto returned"); 266 } 267 268 /* 269 * Run queue management. 270 */ 271 void 272 sched_init_runqueues(void) 273 { 274 } 275 276 void 277 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio) 278 { 279 struct schedstate_percpu *spc; 280 int queue = prio >> 2; 281 282 if (ci == NULL) 283 ci = sched_choosecpu(p); 284 285 KASSERT(ci != NULL); 286 SCHED_ASSERT_LOCKED(); 287 KASSERT(p->p_wchan == NULL); 288 289 p->p_cpu = ci; 290 p->p_stat = SRUN; 291 p->p_runpri = prio; 292 293 spc = &p->p_cpu->ci_schedstate; 294 spc->spc_nrun++; 295 TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET, 296 p->p_p->ps_pid); 297 298 TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq); 299 spc->spc_whichqs |= (1U << queue); 300 cpuset_add(&sched_queued_cpus, p->p_cpu); 301 302 if (cpuset_isset(&sched_idle_cpus, p->p_cpu)) 303 cpu_unidle(p->p_cpu); 304 305 if (prio < spc->spc_curpriority) 306 need_resched(ci); 307 } 308 309 void 310 remrunqueue(struct proc *p) 311 { 312 struct schedstate_percpu *spc; 313 int queue = p->p_runpri >> 2; 314 315 SCHED_ASSERT_LOCKED(); 316 spc = &p->p_cpu->ci_schedstate; 317 spc->spc_nrun--; 318 TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET, 319 p->p_p->ps_pid); 320 321 TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq); 322 if (TAILQ_EMPTY(&spc->spc_qs[queue])) { 323 spc->spc_whichqs &= ~(1U << queue); 324 if (spc->spc_whichqs == 0) 325 cpuset_del(&sched_queued_cpus, p->p_cpu); 326 } 327 } 328 329 struct proc * 330 sched_chooseproc(void) 331 { 332 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 333 struct proc *p; 334 int queue; 335 336 SCHED_ASSERT_LOCKED(); 337 338 #ifdef MULTIPROCESSOR 339 if (spc->spc_schedflags & SPCF_SHOULDHALT) { 340 if (spc->spc_whichqs) { 341 for (queue = 0; queue < SCHED_NQS; queue++) { 342 while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) { 343 remrunqueue(p); 344 setrunqueue(NULL, p, p->p_runpri); 345 if (p->p_cpu == curcpu()) { 346 KASSERT(p->p_flag & P_CPUPEG); 347 goto again; 348 } 349 } 350 } 351 } 352 p = spc->spc_idleproc; 353 if (p == NULL) 354 panic("no idleproc set on CPU%d", 355 CPU_INFO_UNIT(curcpu())); 356 p->p_stat = SRUN; 357 KASSERT(p->p_wchan == NULL); 358 return (p); 359 } 360 again: 361 #endif 362 363 if (spc->spc_whichqs) { 364 queue = ffs(spc->spc_whichqs) - 1; 365 p = TAILQ_FIRST(&spc->spc_qs[queue]); 366 remrunqueue(p); 367 sched_noidle++; 368 if (p->p_stat != SRUN) 369 panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat); 370 } else if ((p = sched_steal_proc(curcpu())) == NULL) { 371 p = spc->spc_idleproc; 372 if (p == NULL) 373 panic("no idleproc set on CPU%d", 374 CPU_INFO_UNIT(curcpu())); 375 p->p_stat = SRUN; 376 } 377 378 KASSERT(p->p_wchan == NULL); 379 return (p); 380 } 381 382 struct cpu_info * 383 sched_choosecpu_fork(struct proc *parent, int flags) 384 { 385 #ifdef MULTIPROCESSOR 386 struct cpu_info *choice = NULL; 387 int run, best_run = INT_MAX; 388 struct cpu_info *ci; 389 struct cpuset set; 390 391 #if 0 392 /* 393 * XXX 394 * Don't do this until we have a painless way to move the cpu in exec. 395 * Preferably when nuking the old pmap and getting a new one on a 396 * new cpu. 397 */ 398 /* 399 * PPWAIT forks are simple. We know that the parent will not 400 * run until we exec and choose another cpu, so we just steal its 401 * cpu. 402 */ 403 if (flags & FORK_PPWAIT) 404 return (parent->p_cpu); 405 #endif 406 407 /* 408 * Look at all cpus that are currently idle and have nothing queued. 409 * If there are none, pick the one with least queued procs first, 410 * then the one with lowest load average. 411 */ 412 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 413 cpuset_intersection(&set, &set, &sched_all_cpus); 414 if (cpuset_first(&set) == NULL) 415 cpuset_copy(&set, &sched_all_cpus); 416 417 while ((ci = cpuset_first(&set)) != NULL) { 418 cpuset_del(&set, ci); 419 420 run = ci->ci_schedstate.spc_nrun; 421 422 if (choice == NULL || run < best_run) { 423 choice = ci; 424 best_run = run; 425 } 426 } 427 428 return (choice); 429 #else 430 return (curcpu()); 431 #endif 432 } 433 434 struct cpu_info * 435 sched_choosecpu(struct proc *p) 436 { 437 #ifdef MULTIPROCESSOR 438 struct cpu_info *choice = NULL; 439 int last_cost = INT_MAX; 440 struct cpu_info *ci; 441 struct cpuset set; 442 443 /* 444 * If pegged to a cpu, don't allow it to move. 445 */ 446 if (p->p_flag & P_CPUPEG) 447 return (p->p_cpu); 448 449 sched_choose++; 450 451 /* 452 * Look at all cpus that are currently idle and have nothing queued. 453 * If there are none, pick the cheapest of those. 454 * (idle + queued could mean that the cpu is handling an interrupt 455 * at this moment and haven't had time to leave idle yet). 456 */ 457 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 458 cpuset_intersection(&set, &set, &sched_all_cpus); 459 460 /* 461 * First, just check if our current cpu is in that set, if it is, 462 * this is simple. 463 * Also, our cpu might not be idle, but if it's the current cpu 464 * and it has nothing else queued and we're curproc, take it. 465 */ 466 if (cpuset_isset(&set, p->p_cpu) || 467 (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 && 468 (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 && 469 curproc == p)) { 470 sched_wasidle++; 471 return (p->p_cpu); 472 } 473 474 if (cpuset_first(&set) == NULL) 475 cpuset_copy(&set, &sched_all_cpus); 476 477 while ((ci = cpuset_first(&set)) != NULL) { 478 int cost = sched_proc_to_cpu_cost(ci, p); 479 480 if (choice == NULL || cost < last_cost) { 481 choice = ci; 482 last_cost = cost; 483 } 484 cpuset_del(&set, ci); 485 } 486 487 if (p->p_cpu != choice) 488 sched_nmigrations++; 489 else 490 sched_nomigrations++; 491 492 return (choice); 493 #else 494 return (curcpu()); 495 #endif 496 } 497 498 /* 499 * Attempt to steal a proc from some cpu. 500 */ 501 struct proc * 502 sched_steal_proc(struct cpu_info *self) 503 { 504 struct proc *best = NULL; 505 #ifdef MULTIPROCESSOR 506 struct schedstate_percpu *spc; 507 int bestcost = INT_MAX; 508 struct cpu_info *ci; 509 struct cpuset set; 510 511 KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0); 512 513 /* Don't steal if we don't want to schedule processes in this CPU. */ 514 if (!cpuset_isset(&sched_all_cpus, self)) 515 return (NULL); 516 517 cpuset_copy(&set, &sched_queued_cpus); 518 519 while ((ci = cpuset_first(&set)) != NULL) { 520 struct proc *p; 521 int queue; 522 int cost; 523 524 cpuset_del(&set, ci); 525 526 spc = &ci->ci_schedstate; 527 528 queue = ffs(spc->spc_whichqs) - 1; 529 TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) { 530 if (p->p_flag & P_CPUPEG) 531 continue; 532 533 cost = sched_proc_to_cpu_cost(self, p); 534 535 if (best == NULL || cost < bestcost) { 536 best = p; 537 bestcost = cost; 538 } 539 } 540 } 541 if (best == NULL) 542 return (NULL); 543 544 TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET, 545 best->p_p->ps_pid, CPU_INFO_UNIT(self)); 546 547 remrunqueue(best); 548 best->p_cpu = self; 549 550 sched_stolen++; 551 #endif 552 return (best); 553 } 554 555 #ifdef MULTIPROCESSOR 556 /* 557 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). 558 */ 559 static int 560 log2(unsigned int i) 561 { 562 int ret = 0; 563 564 while (i >>= 1) 565 ret++; 566 567 return (ret); 568 } 569 570 /* 571 * Calculate the cost of moving the proc to this cpu. 572 * 573 * What we want is some guesstimate of how much "performance" it will 574 * cost us to move the proc here. Not just for caches and TLBs and NUMA 575 * memory, but also for the proc itself. A highly loaded cpu might not 576 * be the best candidate for this proc since it won't get run. 577 * 578 * Just total guesstimates for now. 579 */ 580 581 int sched_cost_load = 1; 582 int sched_cost_priority = 1; 583 int sched_cost_runnable = 3; 584 int sched_cost_resident = 1; 585 #endif 586 587 int 588 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p) 589 { 590 int cost = 0; 591 #ifdef MULTIPROCESSOR 592 struct schedstate_percpu *spc; 593 int l2resident = 0; 594 595 spc = &ci->ci_schedstate; 596 597 /* 598 * First, account for the priority of the proc we want to move. 599 * More willing to move, the lower the priority of the destination 600 * and the higher the priority of the proc. 601 */ 602 if (!cpuset_isset(&sched_idle_cpus, ci)) { 603 cost += (p->p_usrpri - spc->spc_curpriority) * 604 sched_cost_priority; 605 cost += sched_cost_runnable; 606 } 607 if (cpuset_isset(&sched_queued_cpus, ci)) 608 cost += spc->spc_nrun * sched_cost_runnable; 609 610 /* 611 * Try to avoid the primary cpu as it handles hardware interrupts. 612 * 613 * XXX Needs to be revisited when we distribute interrupts 614 * over cpus. 615 */ 616 if (CPU_IS_PRIMARY(ci)) 617 cost += sched_cost_runnable; 618 619 /* 620 * If the proc is on this cpu already, lower the cost by how much 621 * it has been running and an estimate of its footprint. 622 */ 623 if (p->p_cpu == ci && p->p_slptime == 0) { 624 l2resident = 625 log2(pmap_resident_count(p->p_vmspace->vm_map.pmap)); 626 cost -= l2resident * sched_cost_resident; 627 } 628 #endif 629 return (cost); 630 } 631 632 /* 633 * Peg a proc to a cpu. 634 */ 635 void 636 sched_peg_curproc(struct cpu_info *ci) 637 { 638 struct proc *p = curproc; 639 int s; 640 641 SCHED_LOCK(s); 642 atomic_setbits_int(&p->p_flag, P_CPUPEG); 643 setrunqueue(ci, p, p->p_usrpri); 644 p->p_ru.ru_nvcsw++; 645 mi_switch(); 646 SCHED_UNLOCK(s); 647 } 648 649 #ifdef MULTIPROCESSOR 650 651 void 652 sched_start_secondary_cpus(void) 653 { 654 CPU_INFO_ITERATOR cii; 655 struct cpu_info *ci; 656 657 CPU_INFO_FOREACH(cii, ci) { 658 struct schedstate_percpu *spc = &ci->ci_schedstate; 659 660 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 661 continue; 662 atomic_clearbits_int(&spc->spc_schedflags, 663 SPCF_SHOULDHALT | SPCF_HALTED); 664 #ifdef __HAVE_CPU_TOPOLOGY 665 if (!sched_smt && ci->ci_smt_id > 0) 666 continue; 667 #endif 668 cpuset_add(&sched_all_cpus, ci); 669 } 670 } 671 672 void 673 sched_stop_secondary_cpus(void) 674 { 675 CPU_INFO_ITERATOR cii; 676 struct cpu_info *ci; 677 678 /* 679 * Make sure we stop the secondary CPUs. 680 */ 681 CPU_INFO_FOREACH(cii, ci) { 682 struct schedstate_percpu *spc = &ci->ci_schedstate; 683 684 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 685 continue; 686 cpuset_del(&sched_all_cpus, ci); 687 atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT); 688 } 689 CPU_INFO_FOREACH(cii, ci) { 690 struct schedstate_percpu *spc = &ci->ci_schedstate; 691 692 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 693 continue; 694 while ((spc->spc_schedflags & SPCF_HALTED) == 0) { 695 sleep_setup(spc, PZERO, "schedstate"); 696 sleep_finish(0, 697 (spc->spc_schedflags & SPCF_HALTED) == 0); 698 } 699 } 700 } 701 702 struct sched_barrier_state { 703 struct cpu_info *ci; 704 struct cond cond; 705 }; 706 707 void 708 sched_barrier_task(void *arg) 709 { 710 struct sched_barrier_state *sb = arg; 711 struct cpu_info *ci = sb->ci; 712 713 sched_peg_curproc(ci); 714 cond_signal(&sb->cond); 715 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 716 } 717 718 void 719 sched_barrier(struct cpu_info *ci) 720 { 721 struct sched_barrier_state sb; 722 struct task task; 723 CPU_INFO_ITERATOR cii; 724 725 if (ci == NULL) { 726 CPU_INFO_FOREACH(cii, ci) { 727 if (CPU_IS_PRIMARY(ci)) 728 break; 729 } 730 } 731 KASSERT(ci != NULL); 732 733 if (ci == curcpu()) 734 return; 735 736 sb.ci = ci; 737 cond_init(&sb.cond); 738 task_set(&task, sched_barrier_task, &sb); 739 740 task_add(systqmp, &task); 741 cond_wait(&sb.cond, "sbar"); 742 } 743 744 #else 745 746 void 747 sched_barrier(struct cpu_info *ci) 748 { 749 } 750 751 #endif 752 753 /* 754 * Functions to manipulate cpu sets. 755 */ 756 struct cpu_info *cpuset_infos[MAXCPUS]; 757 static struct cpuset cpuset_all; 758 759 void 760 cpuset_init_cpu(struct cpu_info *ci) 761 { 762 cpuset_add(&cpuset_all, ci); 763 cpuset_infos[CPU_INFO_UNIT(ci)] = ci; 764 } 765 766 void 767 cpuset_clear(struct cpuset *cs) 768 { 769 memset(cs, 0, sizeof(*cs)); 770 } 771 772 void 773 cpuset_add(struct cpuset *cs, struct cpu_info *ci) 774 { 775 unsigned int num = CPU_INFO_UNIT(ci); 776 atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 777 } 778 779 void 780 cpuset_del(struct cpuset *cs, struct cpu_info *ci) 781 { 782 unsigned int num = CPU_INFO_UNIT(ci); 783 atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 784 } 785 786 int 787 cpuset_isset(struct cpuset *cs, struct cpu_info *ci) 788 { 789 unsigned int num = CPU_INFO_UNIT(ci); 790 return (cs->cs_set[num/32] & (1U << (num % 32))); 791 } 792 793 void 794 cpuset_add_all(struct cpuset *cs) 795 { 796 cpuset_copy(cs, &cpuset_all); 797 } 798 799 void 800 cpuset_copy(struct cpuset *to, struct cpuset *from) 801 { 802 memcpy(to, from, sizeof(*to)); 803 } 804 805 struct cpu_info * 806 cpuset_first(struct cpuset *cs) 807 { 808 int i; 809 810 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 811 if (cs->cs_set[i]) 812 return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]); 813 814 return (NULL); 815 } 816 817 void 818 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b) 819 { 820 int i; 821 822 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 823 to->cs_set[i] = a->cs_set[i] | b->cs_set[i]; 824 } 825 826 void 827 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b) 828 { 829 int i; 830 831 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 832 to->cs_set[i] = a->cs_set[i] & b->cs_set[i]; 833 } 834 835 void 836 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b) 837 { 838 int i; 839 840 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 841 to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i]; 842 } 843 844 int 845 cpuset_cardinality(struct cpuset *cs) 846 { 847 int cardinality, i, n; 848 849 cardinality = 0; 850 851 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 852 for (n = cs->cs_set[i]; n != 0; n &= n - 1) 853 cardinality++; 854 855 return (cardinality); 856 } 857 858 int 859 sysctl_hwncpuonline(void) 860 { 861 return cpuset_cardinality(&sched_all_cpus); 862 } 863 864 int 865 cpu_is_online(struct cpu_info *ci) 866 { 867 return cpuset_isset(&sched_all_cpus, ci); 868 } 869 870 #ifdef __HAVE_CPU_TOPOLOGY 871 872 #include <sys/sysctl.h> 873 874 int 875 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 876 { 877 CPU_INFO_ITERATOR cii; 878 struct cpu_info *ci; 879 int err, newsmt; 880 881 newsmt = sched_smt; 882 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); 883 if (err) 884 return err; 885 if (newsmt == sched_smt) 886 return 0; 887 888 sched_smt = newsmt; 889 CPU_INFO_FOREACH(cii, ci) { 890 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 891 continue; 892 if (ci->ci_smt_id == 0) 893 continue; 894 if (sched_smt) 895 cpuset_add(&sched_all_cpus, ci); 896 else 897 cpuset_del(&sched_all_cpus, ci); 898 } 899 900 return 0; 901 } 902 903 #endif 904