xref: /openbsd-src/sys/kern/kern_sched.c (revision 8e0c768258d4632c51876b4397034bc3152bf8db)
1 /*	$OpenBSD: kern_sched.c,v 1.61 2019/11/02 05:31:20 visa Exp $	*/
2 /*
3  * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/resourcevar.h>
25 #include <sys/signalvar.h>
26 #include <sys/mutex.h>
27 #include <sys/task.h>
28 #include <sys/smr.h>
29 
30 #include <uvm/uvm_extern.h>
31 
32 void sched_kthreads_create(void *);
33 
34 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
35 struct proc *sched_steal_proc(struct cpu_info *);
36 
37 /*
38  * To help choosing which cpu should run which process we keep track
39  * of cpus which are currently idle and which cpus have processes
40  * queued.
41  */
42 struct cpuset sched_idle_cpus;
43 struct cpuset sched_queued_cpus;
44 struct cpuset sched_all_cpus;
45 
46 /*
47  * Some general scheduler counters.
48  */
49 uint64_t sched_nmigrations;	/* Cpu migration counter */
50 uint64_t sched_nomigrations;	/* Cpu no migration counter */
51 uint64_t sched_noidle;		/* Times we didn't pick the idle task */
52 uint64_t sched_stolen;		/* Times we stole proc from other cpus */
53 uint64_t sched_choose;		/* Times we chose a cpu */
54 uint64_t sched_wasidle;		/* Times we came out of idle */
55 
56 #ifdef MULTIPROCESSOR
57 struct taskq *sbartq;
58 #endif
59 
60 int sched_smt;
61 
62 /*
63  * A few notes about cpu_switchto that is implemented in MD code.
64  *
65  * cpu_switchto takes two arguments, the old proc and the proc
66  * it should switch to. The new proc will never be NULL, so we always have
67  * a saved state that we need to switch to. The old proc however can
68  * be NULL if the process is exiting. NULL for the old proc simply
69  * means "don't bother saving old state".
70  *
71  * cpu_switchto is supposed to atomically load the new state of the process
72  * including the pcb, pmap and setting curproc, the p_cpu pointer in the
73  * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
74  * cpus in the system must not depend on this state being consistent.
75  * Therefore no locking is necessary in cpu_switchto other than blocking
76  * interrupts during the context switch.
77  */
78 
79 /*
80  * sched_init_cpu is called from main() for the boot cpu, then it's the
81  * responsibility of the MD code to call it for all other cpus.
82  */
83 void
84 sched_init_cpu(struct cpu_info *ci)
85 {
86 	struct schedstate_percpu *spc = &ci->ci_schedstate;
87 	int i;
88 
89 	for (i = 0; i < SCHED_NQS; i++)
90 		TAILQ_INIT(&spc->spc_qs[i]);
91 
92 	spc->spc_idleproc = NULL;
93 
94 	kthread_create_deferred(sched_kthreads_create, ci);
95 
96 	LIST_INIT(&spc->spc_deadproc);
97 	SIMPLEQ_INIT(&spc->spc_deferred);
98 
99 	/*
100 	 * Slight hack here until the cpuset code handles cpu_info
101 	 * structures.
102 	 */
103 	cpuset_init_cpu(ci);
104 
105 #ifdef __HAVE_CPU_TOPOLOGY
106 	if (!sched_smt && ci->ci_smt_id > 0)
107 		return;
108 #endif
109 	cpuset_add(&sched_all_cpus, ci);
110 }
111 
112 void
113 sched_kthreads_create(void *v)
114 {
115 	struct cpu_info *ci = v;
116 	struct schedstate_percpu *spc = &ci->ci_schedstate;
117 	static int num;
118 
119 	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
120 	    FORK_SYSTEM|FORK_SIGHAND|FORK_IDLE, sched_idle, ci, NULL,
121 	    &spc->spc_idleproc))
122 		panic("fork idle");
123 
124 	/* Name it as specified. */
125 	snprintf(spc->spc_idleproc->p_p->ps_comm,
126 	    sizeof(spc->spc_idleproc->p_p->ps_comm),
127 	    "idle%d", num);
128 
129 	num++;
130 }
131 
132 void
133 sched_idle(void *v)
134 {
135 	struct schedstate_percpu *spc;
136 	struct proc *p = curproc;
137 	struct cpu_info *ci = v;
138 	int s;
139 
140 	KERNEL_UNLOCK();
141 
142 	spc = &ci->ci_schedstate;
143 
144 	/*
145 	 * First time we enter here, we're not supposed to idle,
146 	 * just go away for a while.
147 	 */
148 	SCHED_LOCK(s);
149 	cpuset_add(&sched_idle_cpus, ci);
150 	p->p_stat = SSLEEP;
151 	p->p_cpu = ci;
152 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
153 	mi_switch();
154 	cpuset_del(&sched_idle_cpus, ci);
155 	SCHED_UNLOCK(s);
156 
157 	KASSERT(ci == curcpu());
158 	KASSERT(curproc == spc->spc_idleproc);
159 
160 	while (1) {
161 		while (!cpu_is_idle(curcpu())) {
162 			SCHED_LOCK(s);
163 			p->p_stat = SSLEEP;
164 			mi_switch();
165 			SCHED_UNLOCK(s);
166 		}
167 
168 		splassert(IPL_NONE);
169 
170 		smr_idle();
171 
172 		cpuset_add(&sched_idle_cpus, ci);
173 		cpu_idle_enter();
174 		while (spc->spc_whichqs == 0) {
175 #ifdef MULTIPROCESSOR
176 			if (spc->spc_schedflags & SPCF_SHOULDHALT &&
177 			    (spc->spc_schedflags & SPCF_HALTED) == 0) {
178 				cpuset_del(&sched_idle_cpus, ci);
179 				SCHED_LOCK(s);
180 				atomic_setbits_int(&spc->spc_schedflags,
181 				    spc->spc_whichqs ? 0 : SPCF_HALTED);
182 				SCHED_UNLOCK(s);
183 				wakeup(spc);
184 			}
185 #endif
186 			cpu_idle_cycle();
187 		}
188 		cpu_idle_leave();
189 		cpuset_del(&sched_idle_cpus, ci);
190 	}
191 }
192 
193 /*
194  * To free our address space we have to jump through a few hoops.
195  * The freeing is done by the reaper, but until we have one reaper
196  * per cpu, we have no way of putting this proc on the deadproc list
197  * and waking up the reaper without risking having our address space and
198  * stack torn from under us before we manage to switch to another proc.
199  * Therefore we have a per-cpu list of dead processes where we put this
200  * proc. We move the list to the reaper list after context switch.
201  * All this will be unnecessary once we can bind the reaper this cpu
202  * and not risk having it switch to another in case it sleeps.
203  */
204 void
205 sched_exit(struct proc *p)
206 {
207 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
208 	int s;
209 
210 	LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
211 
212 #ifdef MULTIPROCESSOR
213 	/* This process no longer needs to hold the kernel lock. */
214 	KERNEL_ASSERT_LOCKED();
215 	__mp_release_all(&kernel_lock);
216 #endif
217 
218 	SCHED_LOCK(s);
219 	mi_switch();
220 	panic("mi_switch returned");
221 }
222 
223 /*
224  * Run queue management.
225  */
226 void
227 sched_init_runqueues(void)
228 {
229 }
230 
231 void
232 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
233 {
234 	struct schedstate_percpu *spc;
235 	int queue = prio >> 2;
236 
237 	if (ci == NULL)
238 		ci = sched_choosecpu(p);
239 
240 	KASSERT(ci != NULL);
241 	SCHED_ASSERT_LOCKED();
242 
243 	p->p_cpu = ci;
244 	p->p_stat = SRUN;
245 	p->p_priority = prio;
246 
247 	spc = &p->p_cpu->ci_schedstate;
248 	spc->spc_nrun++;
249 
250 	TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
251 	spc->spc_whichqs |= (1 << queue);
252 	cpuset_add(&sched_queued_cpus, p->p_cpu);
253 
254 	if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
255 		cpu_unidle(p->p_cpu);
256 
257 	if (prio < spc->spc_curpriority)
258 		need_resched(ci);
259 }
260 
261 void
262 remrunqueue(struct proc *p)
263 {
264 	struct schedstate_percpu *spc;
265 	int queue = p->p_priority >> 2;
266 
267 	SCHED_ASSERT_LOCKED();
268 	spc = &p->p_cpu->ci_schedstate;
269 	spc->spc_nrun--;
270 
271 	TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
272 	if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
273 		spc->spc_whichqs &= ~(1 << queue);
274 		if (spc->spc_whichqs == 0)
275 			cpuset_del(&sched_queued_cpus, p->p_cpu);
276 	}
277 }
278 
279 struct proc *
280 sched_chooseproc(void)
281 {
282 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
283 	struct proc *p;
284 	int queue;
285 
286 	SCHED_ASSERT_LOCKED();
287 
288 #ifdef MULTIPROCESSOR
289 	if (spc->spc_schedflags & SPCF_SHOULDHALT) {
290 		if (spc->spc_whichqs) {
291 			for (queue = 0; queue < SCHED_NQS; queue++) {
292 				while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
293 					remrunqueue(p);
294 					setrunqueue(NULL, p, p->p_priority);
295 					if (p->p_cpu == curcpu()) {
296 						KASSERT(p->p_flag & P_CPUPEG);
297 						goto again;
298 					}
299 				}
300 			}
301 		}
302 		p = spc->spc_idleproc;
303 		KASSERT(p);
304 		KASSERT(p->p_wchan == NULL);
305 		p->p_stat = SRUN;
306 		return (p);
307 	}
308 #endif
309 
310 again:
311 	if (spc->spc_whichqs) {
312 		queue = ffs(spc->spc_whichqs) - 1;
313 		p = TAILQ_FIRST(&spc->spc_qs[queue]);
314 		remrunqueue(p);
315 		sched_noidle++;
316 		if (p->p_stat != SRUN)
317 			panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat);
318 	} else if ((p = sched_steal_proc(curcpu())) == NULL) {
319 		p = spc->spc_idleproc;
320 		if (p == NULL) {
321                         int s;
322 			/*
323 			 * We get here if someone decides to switch during
324 			 * boot before forking kthreads, bleh.
325 			 * This is kind of like a stupid idle loop.
326 			 */
327 #ifdef MULTIPROCESSOR
328 			__mp_unlock(&sched_lock);
329 #endif
330 			spl0();
331 			delay(10);
332 			SCHED_LOCK(s);
333 			goto again;
334                 }
335 		KASSERT(p);
336 		p->p_stat = SRUN;
337 	}
338 
339 	KASSERT(p->p_wchan == NULL);
340 	return (p);
341 }
342 
343 struct cpu_info *
344 sched_choosecpu_fork(struct proc *parent, int flags)
345 {
346 #ifdef MULTIPROCESSOR
347 	struct cpu_info *choice = NULL;
348 	fixpt_t load, best_load = ~0;
349 	int run, best_run = INT_MAX;
350 	struct cpu_info *ci;
351 	struct cpuset set;
352 
353 #if 0
354 	/*
355 	 * XXX
356 	 * Don't do this until we have a painless way to move the cpu in exec.
357 	 * Preferably when nuking the old pmap and getting a new one on a
358 	 * new cpu.
359 	 */
360 	/*
361 	 * PPWAIT forks are simple. We know that the parent will not
362 	 * run until we exec and choose another cpu, so we just steal its
363 	 * cpu.
364 	 */
365 	if (flags & FORK_PPWAIT)
366 		return (parent->p_cpu);
367 #endif
368 
369 	/*
370 	 * Look at all cpus that are currently idle and have nothing queued.
371 	 * If there are none, pick the one with least queued procs first,
372 	 * then the one with lowest load average.
373 	 */
374 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
375 	cpuset_intersection(&set, &set, &sched_all_cpus);
376 	if (cpuset_first(&set) == NULL)
377 		cpuset_copy(&set, &sched_all_cpus);
378 
379 	while ((ci = cpuset_first(&set)) != NULL) {
380 		cpuset_del(&set, ci);
381 
382 		load = ci->ci_schedstate.spc_ldavg;
383 		run = ci->ci_schedstate.spc_nrun;
384 
385 		if (choice == NULL || run < best_run ||
386 		    (run == best_run &&load < best_load)) {
387 			choice = ci;
388 			best_load = load;
389 			best_run = run;
390 		}
391 	}
392 
393 	return (choice);
394 #else
395 	return (curcpu());
396 #endif
397 }
398 
399 struct cpu_info *
400 sched_choosecpu(struct proc *p)
401 {
402 #ifdef MULTIPROCESSOR
403 	struct cpu_info *choice = NULL;
404 	int last_cost = INT_MAX;
405 	struct cpu_info *ci;
406 	struct cpuset set;
407 
408 	/*
409 	 * If pegged to a cpu, don't allow it to move.
410 	 */
411 	if (p->p_flag & P_CPUPEG)
412 		return (p->p_cpu);
413 
414 	sched_choose++;
415 
416 	/*
417 	 * Look at all cpus that are currently idle and have nothing queued.
418 	 * If there are none, pick the cheapest of those.
419 	 * (idle + queued could mean that the cpu is handling an interrupt
420 	 * at this moment and haven't had time to leave idle yet).
421 	 */
422 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
423 	cpuset_intersection(&set, &set, &sched_all_cpus);
424 
425 	/*
426 	 * First, just check if our current cpu is in that set, if it is,
427 	 * this is simple.
428 	 * Also, our cpu might not be idle, but if it's the current cpu
429 	 * and it has nothing else queued and we're curproc, take it.
430 	 */
431 	if (cpuset_isset(&set, p->p_cpu) ||
432 	    (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
433 	    (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
434 	    curproc == p)) {
435 		sched_wasidle++;
436 		return (p->p_cpu);
437 	}
438 
439 	if (cpuset_first(&set) == NULL)
440 		cpuset_copy(&set, &sched_all_cpus);
441 
442 	while ((ci = cpuset_first(&set)) != NULL) {
443 		int cost = sched_proc_to_cpu_cost(ci, p);
444 
445 		if (choice == NULL || cost < last_cost) {
446 			choice = ci;
447 			last_cost = cost;
448 		}
449 		cpuset_del(&set, ci);
450 	}
451 
452 	if (p->p_cpu != choice)
453 		sched_nmigrations++;
454 	else
455 		sched_nomigrations++;
456 
457 	return (choice);
458 #else
459 	return (curcpu());
460 #endif
461 }
462 
463 /*
464  * Attempt to steal a proc from some cpu.
465  */
466 struct proc *
467 sched_steal_proc(struct cpu_info *self)
468 {
469 	struct proc *best = NULL;
470 #ifdef MULTIPROCESSOR
471 	struct schedstate_percpu *spc;
472 	int bestcost = INT_MAX;
473 	struct cpu_info *ci;
474 	struct cpuset set;
475 
476 	KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
477 
478 	/* Don't steal if we don't want to schedule processes in this CPU. */
479 	if (!cpuset_isset(&sched_all_cpus, self))
480 		return (NULL);
481 
482 	cpuset_copy(&set, &sched_queued_cpus);
483 
484 	while ((ci = cpuset_first(&set)) != NULL) {
485 		struct proc *p;
486 		int queue;
487 		int cost;
488 
489 		cpuset_del(&set, ci);
490 
491 		spc = &ci->ci_schedstate;
492 
493 		queue = ffs(spc->spc_whichqs) - 1;
494 		TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
495 			if (p->p_flag & P_CPUPEG)
496 				continue;
497 
498 			cost = sched_proc_to_cpu_cost(self, p);
499 
500 			if (best == NULL || cost < bestcost) {
501 				best = p;
502 				bestcost = cost;
503 			}
504 		}
505 	}
506 	if (best == NULL)
507 		return (NULL);
508 
509 	spc = &best->p_cpu->ci_schedstate;
510 	remrunqueue(best);
511 	best->p_cpu = self;
512 
513 	sched_stolen++;
514 #endif
515 	return (best);
516 }
517 
518 #ifdef MULTIPROCESSOR
519 /*
520  * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
521  */
522 static int
523 log2(unsigned int i)
524 {
525 	int ret = 0;
526 
527 	while (i >>= 1)
528 		ret++;
529 
530 	return (ret);
531 }
532 
533 /*
534  * Calculate the cost of moving the proc to this cpu.
535  *
536  * What we want is some guesstimate of how much "performance" it will
537  * cost us to move the proc here. Not just for caches and TLBs and NUMA
538  * memory, but also for the proc itself. A highly loaded cpu might not
539  * be the best candidate for this proc since it won't get run.
540  *
541  * Just total guesstimates for now.
542  */
543 
544 int sched_cost_load = 1;
545 int sched_cost_priority = 1;
546 int sched_cost_runnable = 3;
547 int sched_cost_resident = 1;
548 #endif
549 
550 int
551 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
552 {
553 	int cost = 0;
554 #ifdef MULTIPROCESSOR
555 	struct schedstate_percpu *spc;
556 	int l2resident = 0;
557 
558 	spc = &ci->ci_schedstate;
559 
560 	/*
561 	 * First, account for the priority of the proc we want to move.
562 	 * More willing to move, the lower the priority of the destination
563 	 * and the higher the priority of the proc.
564 	 */
565 	if (!cpuset_isset(&sched_idle_cpus, ci)) {
566 		cost += (p->p_priority - spc->spc_curpriority) *
567 		    sched_cost_priority;
568 		cost += sched_cost_runnable;
569 	}
570 	if (cpuset_isset(&sched_queued_cpus, ci))
571 		cost += spc->spc_nrun * sched_cost_runnable;
572 
573 	/*
574 	 * Try to avoid the primary cpu as it handles hardware interrupts.
575 	 *
576 	 * XXX Needs to be revisited when we distribute interrupts
577 	 * over cpus.
578 	 */
579 	if (CPU_IS_PRIMARY(ci))
580 		cost += sched_cost_runnable;
581 
582 	/*
583 	 * Higher load on the destination means we don't want to go there.
584 	 */
585 	cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
586 
587 	/*
588 	 * If the proc is on this cpu already, lower the cost by how much
589 	 * it has been running and an estimate of its footprint.
590 	 */
591 	if (p->p_cpu == ci && p->p_slptime == 0) {
592 		l2resident =
593 		    log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
594 		cost -= l2resident * sched_cost_resident;
595 	}
596 #endif
597 	return (cost);
598 }
599 
600 /*
601  * Peg a proc to a cpu.
602  */
603 void
604 sched_peg_curproc(struct cpu_info *ci)
605 {
606 	struct proc *p = curproc;
607 	int s;
608 
609 	SCHED_LOCK(s);
610 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
611 	setrunqueue(ci, p, p->p_usrpri);
612 	p->p_ru.ru_nvcsw++;
613 	mi_switch();
614 	SCHED_UNLOCK(s);
615 }
616 
617 #ifdef MULTIPROCESSOR
618 
619 void
620 sched_start_secondary_cpus(void)
621 {
622 	CPU_INFO_ITERATOR cii;
623 	struct cpu_info *ci;
624 
625 	CPU_INFO_FOREACH(cii, ci) {
626 		struct schedstate_percpu *spc = &ci->ci_schedstate;
627 
628 		if (CPU_IS_PRIMARY(ci))
629 			continue;
630 		atomic_clearbits_int(&spc->spc_schedflags,
631 		    SPCF_SHOULDHALT | SPCF_HALTED);
632 #ifdef __HAVE_CPU_TOPOLOGY
633 		if (!sched_smt && ci->ci_smt_id > 0)
634 			continue;
635 #endif
636 		cpuset_add(&sched_all_cpus, ci);
637 	}
638 }
639 
640 void
641 sched_stop_secondary_cpus(void)
642 {
643 	CPU_INFO_ITERATOR cii;
644 	struct cpu_info *ci;
645 
646 	/*
647 	 * Make sure we stop the secondary CPUs.
648 	 */
649 	CPU_INFO_FOREACH(cii, ci) {
650 		struct schedstate_percpu *spc = &ci->ci_schedstate;
651 
652 		if (CPU_IS_PRIMARY(ci))
653 			continue;
654 		cpuset_del(&sched_all_cpus, ci);
655 		atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
656 	}
657 	CPU_INFO_FOREACH(cii, ci) {
658 		struct schedstate_percpu *spc = &ci->ci_schedstate;
659 		struct sleep_state sls;
660 
661 		if (CPU_IS_PRIMARY(ci))
662 			continue;
663 		while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
664 			sleep_setup(&sls, spc, PZERO, "schedstate");
665 			sleep_finish(&sls,
666 			    (spc->spc_schedflags & SPCF_HALTED) == 0);
667 		}
668 	}
669 }
670 
671 struct sched_barrier_state {
672 	struct cpu_info *ci;
673 	struct cond cond;
674 };
675 
676 void
677 sched_barrier_task(void *arg)
678 {
679 	struct sched_barrier_state *sb = arg;
680 	struct cpu_info *ci = sb->ci;
681 
682 	sched_peg_curproc(ci);
683 	cond_signal(&sb->cond);
684 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
685 }
686 
687 void
688 sched_barrier(struct cpu_info *ci)
689 {
690 	struct sched_barrier_state sb;
691 	struct task task;
692 	CPU_INFO_ITERATOR cii;
693 
694 	if (ci == NULL) {
695 		CPU_INFO_FOREACH(cii, ci) {
696 			if (CPU_IS_PRIMARY(ci))
697 				break;
698 		}
699 	}
700 	KASSERT(ci != NULL);
701 
702 	if (ci == curcpu())
703 		return;
704 
705 	sb.ci = ci;
706 	cond_init(&sb.cond);
707 	task_set(&task, sched_barrier_task, &sb);
708 
709 	task_add(systqmp, &task);
710 	cond_wait(&sb.cond, "sbar");
711 }
712 
713 #else
714 
715 void
716 sched_barrier(struct cpu_info *ci)
717 {
718 }
719 
720 #endif
721 
722 /*
723  * Functions to manipulate cpu sets.
724  */
725 struct cpu_info *cpuset_infos[MAXCPUS];
726 static struct cpuset cpuset_all;
727 
728 void
729 cpuset_init_cpu(struct cpu_info *ci)
730 {
731 	cpuset_add(&cpuset_all, ci);
732 	cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
733 }
734 
735 void
736 cpuset_clear(struct cpuset *cs)
737 {
738 	memset(cs, 0, sizeof(*cs));
739 }
740 
741 void
742 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
743 {
744 	unsigned int num = CPU_INFO_UNIT(ci);
745 	atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
746 }
747 
748 void
749 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
750 {
751 	unsigned int num = CPU_INFO_UNIT(ci);
752 	atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
753 }
754 
755 int
756 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
757 {
758 	unsigned int num = CPU_INFO_UNIT(ci);
759 	return (cs->cs_set[num/32] & (1 << (num % 32)));
760 }
761 
762 void
763 cpuset_add_all(struct cpuset *cs)
764 {
765 	cpuset_copy(cs, &cpuset_all);
766 }
767 
768 void
769 cpuset_copy(struct cpuset *to, struct cpuset *from)
770 {
771 	memcpy(to, from, sizeof(*to));
772 }
773 
774 struct cpu_info *
775 cpuset_first(struct cpuset *cs)
776 {
777 	int i;
778 
779 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
780 		if (cs->cs_set[i])
781 			return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
782 
783 	return (NULL);
784 }
785 
786 void
787 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
788 {
789 	int i;
790 
791 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
792 		to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
793 }
794 
795 void
796 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
797 {
798 	int i;
799 
800 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
801 		to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
802 }
803 
804 void
805 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
806 {
807 	int i;
808 
809 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
810 		to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
811 }
812 
813 int
814 cpuset_cardinality(struct cpuset *cs)
815 {
816 	int cardinality, i, n;
817 
818 	cardinality = 0;
819 
820 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
821 		for (n = cs->cs_set[i]; n != 0; n &= n - 1)
822 			cardinality++;
823 
824 	return (cardinality);
825 }
826 
827 int
828 sysctl_hwncpuonline(void)
829 {
830 	return cpuset_cardinality(&sched_all_cpus);
831 }
832 
833 int
834 cpu_is_online(struct cpu_info *ci)
835 {
836 	return cpuset_isset(&sched_all_cpus, ci);
837 }
838 
839 #ifdef __HAVE_CPU_TOPOLOGY
840 
841 #include <sys/sysctl.h>
842 
843 int
844 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
845 {
846 	CPU_INFO_ITERATOR cii;
847 	struct cpu_info *ci;
848 	int err, newsmt;
849 
850 	newsmt = sched_smt;
851 	err = sysctl_int(oldp, oldlenp, newp, newlen, &newsmt);
852 	if (err)
853 		return err;
854 	if (newsmt > 1)
855 		newsmt = 1;
856 	if (newsmt < 0)
857 		newsmt = 0;
858 	if (newsmt == sched_smt)
859 		return 0;
860 
861 	sched_smt = newsmt;
862 	CPU_INFO_FOREACH(cii, ci) {
863 		if (CPU_IS_PRIMARY(ci))
864 			continue;
865 		if (ci->ci_smt_id == 0)
866 			continue;
867 		if (sched_smt)
868 			cpuset_add(&sched_all_cpus, ci);
869 		else
870 			cpuset_del(&sched_all_cpus, ci);
871 	}
872 
873 	return 0;
874 }
875 
876 #endif
877