xref: /openbsd-src/sys/kern/kern_sched.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: kern_sched.c,v 1.56 2019/03/26 04:24:22 visa Exp $	*/
2 /*
3  * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/resourcevar.h>
25 #include <sys/signalvar.h>
26 #include <sys/mutex.h>
27 #include <sys/task.h>
28 #include <sys/smr.h>
29 
30 #include <uvm/uvm_extern.h>
31 
32 void sched_kthreads_create(void *);
33 
34 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
35 struct proc *sched_steal_proc(struct cpu_info *);
36 
37 /*
38  * To help choosing which cpu should run which process we keep track
39  * of cpus which are currently idle and which cpus have processes
40  * queued.
41  */
42 struct cpuset sched_idle_cpus;
43 struct cpuset sched_queued_cpus;
44 struct cpuset sched_all_cpus;
45 
46 /*
47  * Some general scheduler counters.
48  */
49 uint64_t sched_nmigrations;	/* Cpu migration counter */
50 uint64_t sched_nomigrations;	/* Cpu no migration counter */
51 uint64_t sched_noidle;		/* Times we didn't pick the idle task */
52 uint64_t sched_stolen;		/* Times we stole proc from other cpus */
53 uint64_t sched_choose;		/* Times we chose a cpu */
54 uint64_t sched_wasidle;		/* Times we came out of idle */
55 
56 #ifdef MULTIPROCESSOR
57 struct taskq *sbartq;
58 #endif
59 
60 int sched_smt;
61 
62 /*
63  * A few notes about cpu_switchto that is implemented in MD code.
64  *
65  * cpu_switchto takes two arguments, the old proc and the proc
66  * it should switch to. The new proc will never be NULL, so we always have
67  * a saved state that we need to switch to. The old proc however can
68  * be NULL if the process is exiting. NULL for the old proc simply
69  * means "don't bother saving old state".
70  *
71  * cpu_switchto is supposed to atomically load the new state of the process
72  * including the pcb, pmap and setting curproc, the p_cpu pointer in the
73  * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
74  * cpus in the system must not depend on this state being consistent.
75  * Therefore no locking is necessary in cpu_switchto other than blocking
76  * interrupts during the context switch.
77  */
78 
79 /*
80  * sched_init_cpu is called from main() for the boot cpu, then it's the
81  * responsibility of the MD code to call it for all other cpus.
82  */
83 void
84 sched_init_cpu(struct cpu_info *ci)
85 {
86 	struct schedstate_percpu *spc = &ci->ci_schedstate;
87 	int i;
88 
89 	for (i = 0; i < SCHED_NQS; i++)
90 		TAILQ_INIT(&spc->spc_qs[i]);
91 
92 	spc->spc_idleproc = NULL;
93 
94 	kthread_create_deferred(sched_kthreads_create, ci);
95 
96 	LIST_INIT(&spc->spc_deadproc);
97 	SIMPLEQ_INIT(&spc->spc_deferred);
98 
99 	/*
100 	 * Slight hack here until the cpuset code handles cpu_info
101 	 * structures.
102 	 */
103 	cpuset_init_cpu(ci);
104 
105 #ifdef __HAVE_CPU_TOPOLOGY
106 	if (!sched_smt && ci->ci_smt_id > 0)
107 		return;
108 #endif
109 	cpuset_add(&sched_all_cpus, ci);
110 }
111 
112 void
113 sched_kthreads_create(void *v)
114 {
115 	struct cpu_info *ci = v;
116 	struct schedstate_percpu *spc = &ci->ci_schedstate;
117 	static int num;
118 
119 	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
120 	    FORK_SYSTEM|FORK_SIGHAND|FORK_IDLE, sched_idle, ci, NULL,
121 	    &spc->spc_idleproc))
122 		panic("fork idle");
123 
124 	/* Name it as specified. */
125 	snprintf(spc->spc_idleproc->p_p->ps_comm,
126 	    sizeof(spc->spc_idleproc->p_p->ps_comm),
127 	    "idle%d", num);
128 
129 	num++;
130 }
131 
132 void
133 sched_idle(void *v)
134 {
135 	struct schedstate_percpu *spc;
136 	struct proc *p = curproc;
137 	struct cpu_info *ci = v;
138 	int s;
139 
140 	KERNEL_UNLOCK();
141 
142 	spc = &ci->ci_schedstate;
143 
144 	/*
145 	 * First time we enter here, we're not supposed to idle,
146 	 * just go away for a while.
147 	 */
148 	SCHED_LOCK(s);
149 	cpuset_add(&sched_idle_cpus, ci);
150 	p->p_stat = SSLEEP;
151 	p->p_cpu = ci;
152 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
153 	mi_switch();
154 	cpuset_del(&sched_idle_cpus, ci);
155 	SCHED_UNLOCK(s);
156 
157 	KASSERT(ci == curcpu());
158 	KASSERT(curproc == spc->spc_idleproc);
159 
160 	while (1) {
161 		while (!cpu_is_idle(curcpu())) {
162 			struct proc *dead;
163 
164 			SCHED_LOCK(s);
165 			p->p_stat = SSLEEP;
166 			mi_switch();
167 			SCHED_UNLOCK(s);
168 
169 			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
170 				LIST_REMOVE(dead, p_hash);
171 				exit2(dead);
172 			}
173 		}
174 
175 		splassert(IPL_NONE);
176 
177 		smr_idle();
178 
179 		cpuset_add(&sched_idle_cpus, ci);
180 		cpu_idle_enter();
181 		while (spc->spc_whichqs == 0) {
182 #ifdef MULTIPROCESSOR
183 			if (spc->spc_schedflags & SPCF_SHOULDHALT &&
184 			    (spc->spc_schedflags & SPCF_HALTED) == 0) {
185 				cpuset_del(&sched_idle_cpus, ci);
186 				SCHED_LOCK(s);
187 				atomic_setbits_int(&spc->spc_schedflags,
188 				    spc->spc_whichqs ? 0 : SPCF_HALTED);
189 				SCHED_UNLOCK(s);
190 				wakeup(spc);
191 			}
192 #endif
193 			cpu_idle_cycle();
194 		}
195 		cpu_idle_leave();
196 		cpuset_del(&sched_idle_cpus, ci);
197 	}
198 }
199 
200 /*
201  * To free our address space we have to jump through a few hoops.
202  * The freeing is done by the reaper, but until we have one reaper
203  * per cpu, we have no way of putting this proc on the deadproc list
204  * and waking up the reaper without risking having our address space and
205  * stack torn from under us before we manage to switch to another proc.
206  * Therefore we have a per-cpu list of dead processes where we put this
207  * proc and have idle clean up that list and move it to the reaper list.
208  * All this will be unnecessary once we can bind the reaper this cpu
209  * and not risk having it switch to another in case it sleeps.
210  */
211 void
212 sched_exit(struct proc *p)
213 {
214 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
215 	struct timespec ts;
216 	struct proc *idle;
217 	int s;
218 
219 	nanouptime(&ts);
220 	timespecsub(&ts, &spc->spc_runtime, &ts);
221 	timespecadd(&p->p_rtime, &ts, &p->p_rtime);
222 
223 	LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
224 
225 #ifdef MULTIPROCESSOR
226 	/* This process no longer needs to hold the kernel lock. */
227 	KERNEL_ASSERT_LOCKED();
228 	__mp_release_all(&kernel_lock);
229 #endif
230 
231 	SCHED_LOCK(s);
232 	idle = spc->spc_idleproc;
233 	idle->p_stat = SRUN;
234 	cpu_switchto(NULL, idle);
235 	panic("cpu_switchto returned");
236 }
237 
238 /*
239  * Run queue management.
240  */
241 void
242 sched_init_runqueues(void)
243 {
244 }
245 
246 void
247 setrunqueue(struct proc *p)
248 {
249 	struct schedstate_percpu *spc;
250 	int queue = p->p_priority >> 2;
251 
252 	SCHED_ASSERT_LOCKED();
253 	spc = &p->p_cpu->ci_schedstate;
254 	spc->spc_nrun++;
255 
256 	TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
257 	spc->spc_whichqs |= (1 << queue);
258 	cpuset_add(&sched_queued_cpus, p->p_cpu);
259 
260 	if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
261 		cpu_unidle(p->p_cpu);
262 }
263 
264 void
265 remrunqueue(struct proc *p)
266 {
267 	struct schedstate_percpu *spc;
268 	int queue = p->p_priority >> 2;
269 
270 	SCHED_ASSERT_LOCKED();
271 	spc = &p->p_cpu->ci_schedstate;
272 	spc->spc_nrun--;
273 
274 	TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
275 	if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
276 		spc->spc_whichqs &= ~(1 << queue);
277 		if (spc->spc_whichqs == 0)
278 			cpuset_del(&sched_queued_cpus, p->p_cpu);
279 	}
280 }
281 
282 struct proc *
283 sched_chooseproc(void)
284 {
285 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
286 	struct proc *p;
287 	int queue;
288 
289 	SCHED_ASSERT_LOCKED();
290 
291 #ifdef MULTIPROCESSOR
292 	if (spc->spc_schedflags & SPCF_SHOULDHALT) {
293 		if (spc->spc_whichqs) {
294 			for (queue = 0; queue < SCHED_NQS; queue++) {
295 				while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
296 					remrunqueue(p);
297 					p->p_cpu = sched_choosecpu(p);
298 					setrunqueue(p);
299 					if (p->p_cpu == curcpu()) {
300 						KASSERT(p->p_flag & P_CPUPEG);
301 						goto again;
302 					}
303 				}
304 			}
305 		}
306 		p = spc->spc_idleproc;
307 		KASSERT(p);
308 		KASSERT(p->p_wchan == NULL);
309 		p->p_stat = SRUN;
310 		return (p);
311 	}
312 #endif
313 
314 again:
315 	if (spc->spc_whichqs) {
316 		queue = ffs(spc->spc_whichqs) - 1;
317 		p = TAILQ_FIRST(&spc->spc_qs[queue]);
318 		remrunqueue(p);
319 		sched_noidle++;
320 		KASSERT(p->p_stat == SRUN);
321 	} else if ((p = sched_steal_proc(curcpu())) == NULL) {
322 		p = spc->spc_idleproc;
323 		if (p == NULL) {
324                         int s;
325 			/*
326 			 * We get here if someone decides to switch during
327 			 * boot before forking kthreads, bleh.
328 			 * This is kind of like a stupid idle loop.
329 			 */
330 #ifdef MULTIPROCESSOR
331 			__mp_unlock(&sched_lock);
332 #endif
333 			spl0();
334 			delay(10);
335 			SCHED_LOCK(s);
336 			goto again;
337                 }
338 		KASSERT(p);
339 		p->p_stat = SRUN;
340 	}
341 
342 	KASSERT(p->p_wchan == NULL);
343 	return (p);
344 }
345 
346 struct cpu_info *
347 sched_choosecpu_fork(struct proc *parent, int flags)
348 {
349 #ifdef MULTIPROCESSOR
350 	struct cpu_info *choice = NULL;
351 	fixpt_t load, best_load = ~0;
352 	int run, best_run = INT_MAX;
353 	struct cpu_info *ci;
354 	struct cpuset set;
355 
356 #if 0
357 	/*
358 	 * XXX
359 	 * Don't do this until we have a painless way to move the cpu in exec.
360 	 * Preferably when nuking the old pmap and getting a new one on a
361 	 * new cpu.
362 	 */
363 	/*
364 	 * PPWAIT forks are simple. We know that the parent will not
365 	 * run until we exec and choose another cpu, so we just steal its
366 	 * cpu.
367 	 */
368 	if (flags & FORK_PPWAIT)
369 		return (parent->p_cpu);
370 #endif
371 
372 	/*
373 	 * Look at all cpus that are currently idle and have nothing queued.
374 	 * If there are none, pick the one with least queued procs first,
375 	 * then the one with lowest load average.
376 	 */
377 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
378 	cpuset_intersection(&set, &set, &sched_all_cpus);
379 	if (cpuset_first(&set) == NULL)
380 		cpuset_copy(&set, &sched_all_cpus);
381 
382 	while ((ci = cpuset_first(&set)) != NULL) {
383 		cpuset_del(&set, ci);
384 
385 		load = ci->ci_schedstate.spc_ldavg;
386 		run = ci->ci_schedstate.spc_nrun;
387 
388 		if (choice == NULL || run < best_run ||
389 		    (run == best_run &&load < best_load)) {
390 			choice = ci;
391 			best_load = load;
392 			best_run = run;
393 		}
394 	}
395 
396 	return (choice);
397 #else
398 	return (curcpu());
399 #endif
400 }
401 
402 struct cpu_info *
403 sched_choosecpu(struct proc *p)
404 {
405 #ifdef MULTIPROCESSOR
406 	struct cpu_info *choice = NULL;
407 	int last_cost = INT_MAX;
408 	struct cpu_info *ci;
409 	struct cpuset set;
410 
411 	/*
412 	 * If pegged to a cpu, don't allow it to move.
413 	 */
414 	if (p->p_flag & P_CPUPEG)
415 		return (p->p_cpu);
416 
417 	sched_choose++;
418 
419 	/*
420 	 * Look at all cpus that are currently idle and have nothing queued.
421 	 * If there are none, pick the cheapest of those.
422 	 * (idle + queued could mean that the cpu is handling an interrupt
423 	 * at this moment and haven't had time to leave idle yet).
424 	 */
425 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
426 	cpuset_intersection(&set, &set, &sched_all_cpus);
427 
428 	/*
429 	 * First, just check if our current cpu is in that set, if it is,
430 	 * this is simple.
431 	 * Also, our cpu might not be idle, but if it's the current cpu
432 	 * and it has nothing else queued and we're curproc, take it.
433 	 */
434 	if (cpuset_isset(&set, p->p_cpu) ||
435 	    (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
436 	    (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
437 	    curproc == p)) {
438 		sched_wasidle++;
439 		return (p->p_cpu);
440 	}
441 
442 	if (cpuset_first(&set) == NULL)
443 		cpuset_copy(&set, &sched_all_cpus);
444 
445 	while ((ci = cpuset_first(&set)) != NULL) {
446 		int cost = sched_proc_to_cpu_cost(ci, p);
447 
448 		if (choice == NULL || cost < last_cost) {
449 			choice = ci;
450 			last_cost = cost;
451 		}
452 		cpuset_del(&set, ci);
453 	}
454 
455 	if (p->p_cpu != choice)
456 		sched_nmigrations++;
457 	else
458 		sched_nomigrations++;
459 
460 	return (choice);
461 #else
462 	return (curcpu());
463 #endif
464 }
465 
466 /*
467  * Attempt to steal a proc from some cpu.
468  */
469 struct proc *
470 sched_steal_proc(struct cpu_info *self)
471 {
472 	struct proc *best = NULL;
473 #ifdef MULTIPROCESSOR
474 	struct schedstate_percpu *spc;
475 	int bestcost = INT_MAX;
476 	struct cpu_info *ci;
477 	struct cpuset set;
478 
479 	KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
480 
481 	/* Don't steal if we don't want to schedule processes in this CPU. */
482 	if (!cpuset_isset(&sched_all_cpus, self))
483 		return (NULL);
484 
485 	cpuset_copy(&set, &sched_queued_cpus);
486 
487 	while ((ci = cpuset_first(&set)) != NULL) {
488 		struct proc *p;
489 		int queue;
490 		int cost;
491 
492 		cpuset_del(&set, ci);
493 
494 		spc = &ci->ci_schedstate;
495 
496 		queue = ffs(spc->spc_whichqs) - 1;
497 		TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
498 			if (p->p_flag & P_CPUPEG)
499 				continue;
500 
501 			cost = sched_proc_to_cpu_cost(self, p);
502 
503 			if (best == NULL || cost < bestcost) {
504 				best = p;
505 				bestcost = cost;
506 			}
507 		}
508 	}
509 	if (best == NULL)
510 		return (NULL);
511 
512 	spc = &best->p_cpu->ci_schedstate;
513 	remrunqueue(best);
514 	best->p_cpu = self;
515 
516 	sched_stolen++;
517 #endif
518 	return (best);
519 }
520 
521 #ifdef MULTIPROCESSOR
522 /*
523  * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
524  */
525 static int
526 log2(unsigned int i)
527 {
528 	int ret = 0;
529 
530 	while (i >>= 1)
531 		ret++;
532 
533 	return (ret);
534 }
535 
536 /*
537  * Calculate the cost of moving the proc to this cpu.
538  *
539  * What we want is some guesstimate of how much "performance" it will
540  * cost us to move the proc here. Not just for caches and TLBs and NUMA
541  * memory, but also for the proc itself. A highly loaded cpu might not
542  * be the best candidate for this proc since it won't get run.
543  *
544  * Just total guesstimates for now.
545  */
546 
547 int sched_cost_load = 1;
548 int sched_cost_priority = 1;
549 int sched_cost_runnable = 3;
550 int sched_cost_resident = 1;
551 #endif
552 
553 int
554 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
555 {
556 	int cost = 0;
557 #ifdef MULTIPROCESSOR
558 	struct schedstate_percpu *spc;
559 	int l2resident = 0;
560 
561 	spc = &ci->ci_schedstate;
562 
563 	/*
564 	 * First, account for the priority of the proc we want to move.
565 	 * More willing to move, the lower the priority of the destination
566 	 * and the higher the priority of the proc.
567 	 */
568 	if (!cpuset_isset(&sched_idle_cpus, ci)) {
569 		cost += (p->p_priority - spc->spc_curpriority) *
570 		    sched_cost_priority;
571 		cost += sched_cost_runnable;
572 	}
573 	if (cpuset_isset(&sched_queued_cpus, ci))
574 		cost += spc->spc_nrun * sched_cost_runnable;
575 
576 	/*
577 	 * Try to avoid the primary cpu as it handles hardware interrupts.
578 	 *
579 	 * XXX Needs to be revisited when we distribute interrupts
580 	 * over cpus.
581 	 */
582 	if (CPU_IS_PRIMARY(ci))
583 		cost += sched_cost_runnable;
584 
585 	/*
586 	 * Higher load on the destination means we don't want to go there.
587 	 */
588 	cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
589 
590 	/*
591 	 * If the proc is on this cpu already, lower the cost by how much
592 	 * it has been running and an estimate of its footprint.
593 	 */
594 	if (p->p_cpu == ci && p->p_slptime == 0) {
595 		l2resident =
596 		    log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
597 		cost -= l2resident * sched_cost_resident;
598 	}
599 #endif
600 	return (cost);
601 }
602 
603 /*
604  * Peg a proc to a cpu.
605  */
606 void
607 sched_peg_curproc(struct cpu_info *ci)
608 {
609 	struct proc *p = curproc;
610 	int s;
611 
612 	SCHED_LOCK(s);
613 	p->p_priority = p->p_usrpri;
614 	p->p_stat = SRUN;
615 	p->p_cpu = ci;
616 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
617 	setrunqueue(p);
618 	p->p_ru.ru_nvcsw++;
619 	mi_switch();
620 	SCHED_UNLOCK(s);
621 }
622 
623 #ifdef MULTIPROCESSOR
624 
625 void
626 sched_start_secondary_cpus(void)
627 {
628 	CPU_INFO_ITERATOR cii;
629 	struct cpu_info *ci;
630 
631 	CPU_INFO_FOREACH(cii, ci) {
632 		struct schedstate_percpu *spc = &ci->ci_schedstate;
633 
634 		if (CPU_IS_PRIMARY(ci))
635 			continue;
636 		atomic_clearbits_int(&spc->spc_schedflags,
637 		    SPCF_SHOULDHALT | SPCF_HALTED);
638 #ifdef __HAVE_CPU_TOPOLOGY
639 		if (!sched_smt && ci->ci_smt_id > 0)
640 			continue;
641 #endif
642 		cpuset_add(&sched_all_cpus, ci);
643 	}
644 }
645 
646 void
647 sched_stop_secondary_cpus(void)
648 {
649 	CPU_INFO_ITERATOR cii;
650 	struct cpu_info *ci;
651 
652 	/*
653 	 * Make sure we stop the secondary CPUs.
654 	 */
655 	CPU_INFO_FOREACH(cii, ci) {
656 		struct schedstate_percpu *spc = &ci->ci_schedstate;
657 
658 		if (CPU_IS_PRIMARY(ci))
659 			continue;
660 		cpuset_del(&sched_all_cpus, ci);
661 		atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
662 	}
663 	CPU_INFO_FOREACH(cii, ci) {
664 		struct schedstate_percpu *spc = &ci->ci_schedstate;
665 		struct sleep_state sls;
666 
667 		if (CPU_IS_PRIMARY(ci))
668 			continue;
669 		while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
670 			sleep_setup(&sls, spc, PZERO, "schedstate");
671 			sleep_finish(&sls,
672 			    (spc->spc_schedflags & SPCF_HALTED) == 0);
673 		}
674 	}
675 }
676 
677 struct sched_barrier_state {
678 	struct cpu_info *ci;
679 	struct cond cond;
680 };
681 
682 void
683 sched_barrier_task(void *arg)
684 {
685 	struct sched_barrier_state *sb = arg;
686 	struct cpu_info *ci = sb->ci;
687 
688 	sched_peg_curproc(ci);
689 	cond_signal(&sb->cond);
690 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
691 }
692 
693 void
694 sched_barrier(struct cpu_info *ci)
695 {
696 	struct sched_barrier_state sb;
697 	struct task task;
698 	CPU_INFO_ITERATOR cii;
699 
700 	if (ci == NULL) {
701 		CPU_INFO_FOREACH(cii, ci) {
702 			if (CPU_IS_PRIMARY(ci))
703 				break;
704 		}
705 	}
706 	KASSERT(ci != NULL);
707 
708 	if (ci == curcpu())
709 		return;
710 
711 	sb.ci = ci;
712 	cond_init(&sb.cond);
713 	task_set(&task, sched_barrier_task, &sb);
714 
715 	task_add(systqmp, &task);
716 	cond_wait(&sb.cond, "sbar");
717 }
718 
719 #else
720 
721 void
722 sched_barrier(struct cpu_info *ci)
723 {
724 }
725 
726 #endif
727 
728 /*
729  * Functions to manipulate cpu sets.
730  */
731 struct cpu_info *cpuset_infos[MAXCPUS];
732 static struct cpuset cpuset_all;
733 
734 void
735 cpuset_init_cpu(struct cpu_info *ci)
736 {
737 	cpuset_add(&cpuset_all, ci);
738 	cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
739 }
740 
741 void
742 cpuset_clear(struct cpuset *cs)
743 {
744 	memset(cs, 0, sizeof(*cs));
745 }
746 
747 void
748 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
749 {
750 	unsigned int num = CPU_INFO_UNIT(ci);
751 	atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
752 }
753 
754 void
755 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
756 {
757 	unsigned int num = CPU_INFO_UNIT(ci);
758 	atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
759 }
760 
761 int
762 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
763 {
764 	unsigned int num = CPU_INFO_UNIT(ci);
765 	return (cs->cs_set[num/32] & (1 << (num % 32)));
766 }
767 
768 void
769 cpuset_add_all(struct cpuset *cs)
770 {
771 	cpuset_copy(cs, &cpuset_all);
772 }
773 
774 void
775 cpuset_copy(struct cpuset *to, struct cpuset *from)
776 {
777 	memcpy(to, from, sizeof(*to));
778 }
779 
780 struct cpu_info *
781 cpuset_first(struct cpuset *cs)
782 {
783 	int i;
784 
785 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
786 		if (cs->cs_set[i])
787 			return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
788 
789 	return (NULL);
790 }
791 
792 void
793 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
794 {
795 	int i;
796 
797 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
798 		to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
799 }
800 
801 void
802 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
803 {
804 	int i;
805 
806 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
807 		to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
808 }
809 
810 void
811 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
812 {
813 	int i;
814 
815 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
816 		to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
817 }
818 
819 int
820 cpuset_cardinality(struct cpuset *cs)
821 {
822 	int cardinality, i, n;
823 
824 	cardinality = 0;
825 
826 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
827 		for (n = cs->cs_set[i]; n != 0; n &= n - 1)
828 			cardinality++;
829 
830 	return (cardinality);
831 }
832 
833 int
834 sysctl_hwncpuonline(void)
835 {
836 	return cpuset_cardinality(&sched_all_cpus);
837 }
838 
839 int
840 cpu_is_online(struct cpu_info *ci)
841 {
842 	return cpuset_isset(&sched_all_cpus, ci);
843 }
844 
845 #ifdef __HAVE_CPU_TOPOLOGY
846 
847 #include <sys/sysctl.h>
848 
849 int
850 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
851 {
852 	CPU_INFO_ITERATOR cii;
853 	struct cpu_info *ci;
854 	int err, newsmt;
855 
856 	newsmt = sched_smt;
857 	err = sysctl_int(oldp, oldlenp, newp, newlen, &newsmt);
858 	if (err)
859 		return err;
860 	if (newsmt > 1)
861 		newsmt = 1;
862 	if (newsmt < 0)
863 		newsmt = 0;
864 	if (newsmt == sched_smt)
865 		return 0;
866 
867 	sched_smt = newsmt;
868 	CPU_INFO_FOREACH(cii, ci) {
869 		if (CPU_IS_PRIMARY(ci))
870 			continue;
871 		if (ci->ci_smt_id == 0)
872 			continue;
873 		if (sched_smt)
874 			cpuset_add(&sched_all_cpus, ci);
875 		else
876 			cpuset_del(&sched_all_cpus, ci);
877 	}
878 
879 	return 0;
880 }
881 
882 #endif
883