xref: /openbsd-src/sys/kern/kern_sched.c (revision 3634178a0c5a0a28dfab1e9f55da37f26001668c)
1 /*	$OpenBSD: kern_sched.c,v 1.52 2018/09/26 17:23:13 cheloha Exp $	*/
2 /*
3  * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/resourcevar.h>
25 #include <sys/signalvar.h>
26 #include <sys/mutex.h>
27 #include <sys/task.h>
28 
29 #include <uvm/uvm_extern.h>
30 
31 void sched_kthreads_create(void *);
32 
33 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
34 struct proc *sched_steal_proc(struct cpu_info *);
35 
36 /*
37  * To help choosing which cpu should run which process we keep track
38  * of cpus which are currently idle and which cpus have processes
39  * queued.
40  */
41 struct cpuset sched_idle_cpus;
42 struct cpuset sched_queued_cpus;
43 struct cpuset sched_all_cpus;
44 
45 /*
46  * Some general scheduler counters.
47  */
48 uint64_t sched_nmigrations;	/* Cpu migration counter */
49 uint64_t sched_nomigrations;	/* Cpu no migration counter */
50 uint64_t sched_noidle;		/* Times we didn't pick the idle task */
51 uint64_t sched_stolen;		/* Times we stole proc from other cpus */
52 uint64_t sched_choose;		/* Times we chose a cpu */
53 uint64_t sched_wasidle;		/* Times we came out of idle */
54 
55 #ifdef MULTIPROCESSOR
56 struct taskq *sbartq;
57 #endif
58 
59 int sched_smt;
60 
61 /*
62  * A few notes about cpu_switchto that is implemented in MD code.
63  *
64  * cpu_switchto takes two arguments, the old proc and the proc
65  * it should switch to. The new proc will never be NULL, so we always have
66  * a saved state that we need to switch to. The old proc however can
67  * be NULL if the process is exiting. NULL for the old proc simply
68  * means "don't bother saving old state".
69  *
70  * cpu_switchto is supposed to atomically load the new state of the process
71  * including the pcb, pmap and setting curproc, the p_cpu pointer in the
72  * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
73  * cpus in the system must not depend on this state being consistent.
74  * Therefore no locking is necessary in cpu_switchto other than blocking
75  * interrupts during the context switch.
76  */
77 
78 /*
79  * sched_init_cpu is called from main() for the boot cpu, then it's the
80  * responsibility of the MD code to call it for all other cpus.
81  */
82 void
83 sched_init_cpu(struct cpu_info *ci)
84 {
85 	struct schedstate_percpu *spc = &ci->ci_schedstate;
86 	int i;
87 
88 	for (i = 0; i < SCHED_NQS; i++)
89 		TAILQ_INIT(&spc->spc_qs[i]);
90 
91 	spc->spc_idleproc = NULL;
92 
93 	kthread_create_deferred(sched_kthreads_create, ci);
94 
95 	LIST_INIT(&spc->spc_deadproc);
96 
97 	/*
98 	 * Slight hack here until the cpuset code handles cpu_info
99 	 * structures.
100 	 */
101 	cpuset_init_cpu(ci);
102 
103 #ifdef __HAVE_CPU_TOPOLOGY
104 	if (!sched_smt && ci->ci_smt_id > 0)
105 		return;
106 #endif
107 	cpuset_add(&sched_all_cpus, ci);
108 }
109 
110 void
111 sched_kthreads_create(void *v)
112 {
113 	struct cpu_info *ci = v;
114 	struct schedstate_percpu *spc = &ci->ci_schedstate;
115 	static int num;
116 
117 	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
118 	    FORK_SYSTEM|FORK_SIGHAND|FORK_IDLE, sched_idle, ci, NULL,
119 	    &spc->spc_idleproc))
120 		panic("fork idle");
121 
122 	/* Name it as specified. */
123 	snprintf(spc->spc_idleproc->p_p->ps_comm,
124 	    sizeof(spc->spc_idleproc->p_p->ps_comm),
125 	    "idle%d", num);
126 
127 	num++;
128 }
129 
130 void
131 sched_idle(void *v)
132 {
133 	struct schedstate_percpu *spc;
134 	struct proc *p = curproc;
135 	struct cpu_info *ci = v;
136 	int s;
137 
138 	KERNEL_UNLOCK();
139 
140 	spc = &ci->ci_schedstate;
141 
142 	/*
143 	 * First time we enter here, we're not supposed to idle,
144 	 * just go away for a while.
145 	 */
146 	SCHED_LOCK(s);
147 	cpuset_add(&sched_idle_cpus, ci);
148 	p->p_stat = SSLEEP;
149 	p->p_cpu = ci;
150 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
151 	mi_switch();
152 	cpuset_del(&sched_idle_cpus, ci);
153 	SCHED_UNLOCK(s);
154 
155 	KASSERT(ci == curcpu());
156 	KASSERT(curproc == spc->spc_idleproc);
157 
158 	while (1) {
159 		while (!cpu_is_idle(curcpu())) {
160 			struct proc *dead;
161 
162 			SCHED_LOCK(s);
163 			p->p_stat = SSLEEP;
164 			mi_switch();
165 			SCHED_UNLOCK(s);
166 
167 			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
168 				LIST_REMOVE(dead, p_hash);
169 				exit2(dead);
170 			}
171 		}
172 
173 		splassert(IPL_NONE);
174 
175 		cpuset_add(&sched_idle_cpus, ci);
176 		cpu_idle_enter();
177 		while (spc->spc_whichqs == 0) {
178 #ifdef MULTIPROCESSOR
179 			if (spc->spc_schedflags & SPCF_SHOULDHALT &&
180 			    (spc->spc_schedflags & SPCF_HALTED) == 0) {
181 				cpuset_del(&sched_idle_cpus, ci);
182 				SCHED_LOCK(s);
183 				atomic_setbits_int(&spc->spc_schedflags,
184 				    spc->spc_whichqs ? 0 : SPCF_HALTED);
185 				SCHED_UNLOCK(s);
186 				wakeup(spc);
187 			}
188 #endif
189 			cpu_idle_cycle();
190 		}
191 		cpu_idle_leave();
192 		cpuset_del(&sched_idle_cpus, ci);
193 	}
194 }
195 
196 /*
197  * To free our address space we have to jump through a few hoops.
198  * The freeing is done by the reaper, but until we have one reaper
199  * per cpu, we have no way of putting this proc on the deadproc list
200  * and waking up the reaper without risking having our address space and
201  * stack torn from under us before we manage to switch to another proc.
202  * Therefore we have a per-cpu list of dead processes where we put this
203  * proc and have idle clean up that list and move it to the reaper list.
204  * All this will be unnecessary once we can bind the reaper this cpu
205  * and not risk having it switch to another in case it sleeps.
206  */
207 void
208 sched_exit(struct proc *p)
209 {
210 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
211 	struct timespec ts;
212 	struct proc *idle;
213 	int s;
214 
215 	nanouptime(&ts);
216 	timespecsub(&ts, &spc->spc_runtime, &ts);
217 	timespecadd(&p->p_rtime, &ts, &p->p_rtime);
218 
219 	LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
220 
221 #ifdef MULTIPROCESSOR
222 	/* This process no longer needs to hold the kernel lock. */
223 	KERNEL_ASSERT_LOCKED();
224 	__mp_release_all(&kernel_lock);
225 #endif
226 
227 	SCHED_LOCK(s);
228 	idle = spc->spc_idleproc;
229 	idle->p_stat = SRUN;
230 	cpu_switchto(NULL, idle);
231 	panic("cpu_switchto returned");
232 }
233 
234 /*
235  * Run queue management.
236  */
237 void
238 sched_init_runqueues(void)
239 {
240 }
241 
242 void
243 setrunqueue(struct proc *p)
244 {
245 	struct schedstate_percpu *spc;
246 	int queue = p->p_priority >> 2;
247 
248 	SCHED_ASSERT_LOCKED();
249 	spc = &p->p_cpu->ci_schedstate;
250 	spc->spc_nrun++;
251 
252 	TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
253 	spc->spc_whichqs |= (1 << queue);
254 	cpuset_add(&sched_queued_cpus, p->p_cpu);
255 
256 	if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
257 		cpu_unidle(p->p_cpu);
258 }
259 
260 void
261 remrunqueue(struct proc *p)
262 {
263 	struct schedstate_percpu *spc;
264 	int queue = p->p_priority >> 2;
265 
266 	SCHED_ASSERT_LOCKED();
267 	spc = &p->p_cpu->ci_schedstate;
268 	spc->spc_nrun--;
269 
270 	TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
271 	if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
272 		spc->spc_whichqs &= ~(1 << queue);
273 		if (spc->spc_whichqs == 0)
274 			cpuset_del(&sched_queued_cpus, p->p_cpu);
275 	}
276 }
277 
278 struct proc *
279 sched_chooseproc(void)
280 {
281 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
282 	struct proc *p;
283 	int queue;
284 
285 	SCHED_ASSERT_LOCKED();
286 
287 #ifdef MULTIPROCESSOR
288 	if (spc->spc_schedflags & SPCF_SHOULDHALT) {
289 		if (spc->spc_whichqs) {
290 			for (queue = 0; queue < SCHED_NQS; queue++) {
291 				while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
292 					remrunqueue(p);
293 					p->p_cpu = sched_choosecpu(p);
294 					setrunqueue(p);
295 					if (p->p_cpu == curcpu()) {
296 						KASSERT(p->p_flag & P_CPUPEG);
297 						goto again;
298 					}
299 				}
300 			}
301 		}
302 		p = spc->spc_idleproc;
303 		KASSERT(p);
304 		KASSERT(p->p_wchan == NULL);
305 		p->p_stat = SRUN;
306 		return (p);
307 	}
308 #endif
309 
310 again:
311 	if (spc->spc_whichqs) {
312 		queue = ffs(spc->spc_whichqs) - 1;
313 		p = TAILQ_FIRST(&spc->spc_qs[queue]);
314 		remrunqueue(p);
315 		sched_noidle++;
316 		KASSERT(p->p_stat == SRUN);
317 	} else if ((p = sched_steal_proc(curcpu())) == NULL) {
318 		p = spc->spc_idleproc;
319 		if (p == NULL) {
320                         int s;
321 			/*
322 			 * We get here if someone decides to switch during
323 			 * boot before forking kthreads, bleh.
324 			 * This is kind of like a stupid idle loop.
325 			 */
326 #ifdef MULTIPROCESSOR
327 			__mp_unlock(&sched_lock);
328 #endif
329 			spl0();
330 			delay(10);
331 			SCHED_LOCK(s);
332 			goto again;
333                 }
334 		KASSERT(p);
335 		p->p_stat = SRUN;
336 	}
337 
338 	KASSERT(p->p_wchan == NULL);
339 	return (p);
340 }
341 
342 struct cpu_info *
343 sched_choosecpu_fork(struct proc *parent, int flags)
344 {
345 #ifdef MULTIPROCESSOR
346 	struct cpu_info *choice = NULL;
347 	fixpt_t load, best_load = ~0;
348 	int run, best_run = INT_MAX;
349 	struct cpu_info *ci;
350 	struct cpuset set;
351 
352 #if 0
353 	/*
354 	 * XXX
355 	 * Don't do this until we have a painless way to move the cpu in exec.
356 	 * Preferably when nuking the old pmap and getting a new one on a
357 	 * new cpu.
358 	 */
359 	/*
360 	 * PPWAIT forks are simple. We know that the parent will not
361 	 * run until we exec and choose another cpu, so we just steal its
362 	 * cpu.
363 	 */
364 	if (flags & FORK_PPWAIT)
365 		return (parent->p_cpu);
366 #endif
367 
368 	/*
369 	 * Look at all cpus that are currently idle and have nothing queued.
370 	 * If there are none, pick the one with least queued procs first,
371 	 * then the one with lowest load average.
372 	 */
373 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
374 	cpuset_intersection(&set, &set, &sched_all_cpus);
375 	if (cpuset_first(&set) == NULL)
376 		cpuset_copy(&set, &sched_all_cpus);
377 
378 	while ((ci = cpuset_first(&set)) != NULL) {
379 		cpuset_del(&set, ci);
380 
381 		load = ci->ci_schedstate.spc_ldavg;
382 		run = ci->ci_schedstate.spc_nrun;
383 
384 		if (choice == NULL || run < best_run ||
385 		    (run == best_run &&load < best_load)) {
386 			choice = ci;
387 			best_load = load;
388 			best_run = run;
389 		}
390 	}
391 
392 	return (choice);
393 #else
394 	return (curcpu());
395 #endif
396 }
397 
398 struct cpu_info *
399 sched_choosecpu(struct proc *p)
400 {
401 #ifdef MULTIPROCESSOR
402 	struct cpu_info *choice = NULL;
403 	int last_cost = INT_MAX;
404 	struct cpu_info *ci;
405 	struct cpuset set;
406 
407 	/*
408 	 * If pegged to a cpu, don't allow it to move.
409 	 */
410 	if (p->p_flag & P_CPUPEG)
411 		return (p->p_cpu);
412 
413 	sched_choose++;
414 
415 	/*
416 	 * Look at all cpus that are currently idle and have nothing queued.
417 	 * If there are none, pick the cheapest of those.
418 	 * (idle + queued could mean that the cpu is handling an interrupt
419 	 * at this moment and haven't had time to leave idle yet).
420 	 */
421 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
422 	cpuset_intersection(&set, &set, &sched_all_cpus);
423 
424 	/*
425 	 * First, just check if our current cpu is in that set, if it is,
426 	 * this is simple.
427 	 * Also, our cpu might not be idle, but if it's the current cpu
428 	 * and it has nothing else queued and we're curproc, take it.
429 	 */
430 	if (cpuset_isset(&set, p->p_cpu) ||
431 	    (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
432 	    (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
433 	    curproc == p)) {
434 		sched_wasidle++;
435 		return (p->p_cpu);
436 	}
437 
438 	if (cpuset_first(&set) == NULL)
439 		cpuset_copy(&set, &sched_all_cpus);
440 
441 	while ((ci = cpuset_first(&set)) != NULL) {
442 		int cost = sched_proc_to_cpu_cost(ci, p);
443 
444 		if (choice == NULL || cost < last_cost) {
445 			choice = ci;
446 			last_cost = cost;
447 		}
448 		cpuset_del(&set, ci);
449 	}
450 
451 	if (p->p_cpu != choice)
452 		sched_nmigrations++;
453 	else
454 		sched_nomigrations++;
455 
456 	return (choice);
457 #else
458 	return (curcpu());
459 #endif
460 }
461 
462 /*
463  * Attempt to steal a proc from some cpu.
464  */
465 struct proc *
466 sched_steal_proc(struct cpu_info *self)
467 {
468 	struct proc *best = NULL;
469 #ifdef MULTIPROCESSOR
470 	struct schedstate_percpu *spc;
471 	int bestcost = INT_MAX;
472 	struct cpu_info *ci;
473 	struct cpuset set;
474 
475 	KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
476 
477 	/* Don't steal if we don't want to schedule processes in this CPU. */
478 	if (!cpuset_isset(&sched_all_cpus, self))
479 		return (NULL);
480 
481 	cpuset_copy(&set, &sched_queued_cpus);
482 
483 	while ((ci = cpuset_first(&set)) != NULL) {
484 		struct proc *p;
485 		int queue;
486 		int cost;
487 
488 		cpuset_del(&set, ci);
489 
490 		spc = &ci->ci_schedstate;
491 
492 		queue = ffs(spc->spc_whichqs) - 1;
493 		TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
494 			if (p->p_flag & P_CPUPEG)
495 				continue;
496 
497 			cost = sched_proc_to_cpu_cost(self, p);
498 
499 			if (best == NULL || cost < bestcost) {
500 				best = p;
501 				bestcost = cost;
502 			}
503 		}
504 	}
505 	if (best == NULL)
506 		return (NULL);
507 
508 	spc = &best->p_cpu->ci_schedstate;
509 	remrunqueue(best);
510 	best->p_cpu = self;
511 
512 	sched_stolen++;
513 #endif
514 	return (best);
515 }
516 
517 #ifdef MULTIPROCESSOR
518 /*
519  * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
520  */
521 static int
522 log2(unsigned int i)
523 {
524 	int ret = 0;
525 
526 	while (i >>= 1)
527 		ret++;
528 
529 	return (ret);
530 }
531 
532 /*
533  * Calculate the cost of moving the proc to this cpu.
534  *
535  * What we want is some guesstimate of how much "performance" it will
536  * cost us to move the proc here. Not just for caches and TLBs and NUMA
537  * memory, but also for the proc itself. A highly loaded cpu might not
538  * be the best candidate for this proc since it won't get run.
539  *
540  * Just total guesstimates for now.
541  */
542 
543 int sched_cost_load = 1;
544 int sched_cost_priority = 1;
545 int sched_cost_runnable = 3;
546 int sched_cost_resident = 1;
547 #endif
548 
549 int
550 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
551 {
552 	int cost = 0;
553 #ifdef MULTIPROCESSOR
554 	struct schedstate_percpu *spc;
555 	int l2resident = 0;
556 
557 	spc = &ci->ci_schedstate;
558 
559 	/*
560 	 * First, account for the priority of the proc we want to move.
561 	 * More willing to move, the lower the priority of the destination
562 	 * and the higher the priority of the proc.
563 	 */
564 	if (!cpuset_isset(&sched_idle_cpus, ci)) {
565 		cost += (p->p_priority - spc->spc_curpriority) *
566 		    sched_cost_priority;
567 		cost += sched_cost_runnable;
568 	}
569 	if (cpuset_isset(&sched_queued_cpus, ci))
570 		cost += spc->spc_nrun * sched_cost_runnable;
571 
572 	/*
573 	 * Try to avoid the primary cpu as it handles hardware interrupts.
574 	 *
575 	 * XXX Needs to be revisited when we distribute interrupts
576 	 * over cpus.
577 	 */
578 	if (CPU_IS_PRIMARY(ci))
579 		cost += sched_cost_runnable;
580 
581 	/*
582 	 * Higher load on the destination means we don't want to go there.
583 	 */
584 	cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
585 
586 	/*
587 	 * If the proc is on this cpu already, lower the cost by how much
588 	 * it has been running and an estimate of its footprint.
589 	 */
590 	if (p->p_cpu == ci && p->p_slptime == 0) {
591 		l2resident =
592 		    log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
593 		cost -= l2resident * sched_cost_resident;
594 	}
595 #endif
596 	return (cost);
597 }
598 
599 /*
600  * Peg a proc to a cpu.
601  */
602 void
603 sched_peg_curproc(struct cpu_info *ci)
604 {
605 	struct proc *p = curproc;
606 	int s;
607 
608 	SCHED_LOCK(s);
609 	p->p_priority = p->p_usrpri;
610 	p->p_stat = SRUN;
611 	p->p_cpu = ci;
612 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
613 	setrunqueue(p);
614 	p->p_ru.ru_nvcsw++;
615 	mi_switch();
616 	SCHED_UNLOCK(s);
617 }
618 
619 #ifdef MULTIPROCESSOR
620 
621 void
622 sched_start_secondary_cpus(void)
623 {
624 	CPU_INFO_ITERATOR cii;
625 	struct cpu_info *ci;
626 
627 	CPU_INFO_FOREACH(cii, ci) {
628 		struct schedstate_percpu *spc = &ci->ci_schedstate;
629 
630 		if (CPU_IS_PRIMARY(ci))
631 			continue;
632 		atomic_clearbits_int(&spc->spc_schedflags,
633 		    SPCF_SHOULDHALT | SPCF_HALTED);
634 #ifdef __HAVE_CPU_TOPOLOGY
635 		if (!sched_smt && ci->ci_smt_id > 0)
636 			continue;
637 #endif
638 		cpuset_add(&sched_all_cpus, ci);
639 	}
640 }
641 
642 void
643 sched_stop_secondary_cpus(void)
644 {
645 	CPU_INFO_ITERATOR cii;
646 	struct cpu_info *ci;
647 
648 	/*
649 	 * Make sure we stop the secondary CPUs.
650 	 */
651 	CPU_INFO_FOREACH(cii, ci) {
652 		struct schedstate_percpu *spc = &ci->ci_schedstate;
653 
654 		if (CPU_IS_PRIMARY(ci))
655 			continue;
656 		cpuset_del(&sched_all_cpus, ci);
657 		atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
658 	}
659 	CPU_INFO_FOREACH(cii, ci) {
660 		struct schedstate_percpu *spc = &ci->ci_schedstate;
661 		struct sleep_state sls;
662 
663 		if (CPU_IS_PRIMARY(ci))
664 			continue;
665 		while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
666 			sleep_setup(&sls, spc, PZERO, "schedstate");
667 			sleep_finish(&sls,
668 			    (spc->spc_schedflags & SPCF_HALTED) == 0);
669 		}
670 	}
671 }
672 
673 struct sched_barrier_state {
674 	struct cpu_info *ci;
675 	struct cond cond;
676 };
677 
678 void
679 sched_barrier_task(void *arg)
680 {
681 	struct sched_barrier_state *sb = arg;
682 	struct cpu_info *ci = sb->ci;
683 
684 	sched_peg_curproc(ci);
685 	cond_signal(&sb->cond);
686 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
687 }
688 
689 void
690 sched_barrier(struct cpu_info *ci)
691 {
692 	struct sched_barrier_state sb;
693 	struct task task;
694 	CPU_INFO_ITERATOR cii;
695 
696 	if (ci == NULL) {
697 		CPU_INFO_FOREACH(cii, ci) {
698 			if (CPU_IS_PRIMARY(ci))
699 				break;
700 		}
701 	}
702 	KASSERT(ci != NULL);
703 
704 	if (ci == curcpu())
705 		return;
706 
707 	sb.ci = ci;
708 	cond_init(&sb.cond);
709 	task_set(&task, sched_barrier_task, &sb);
710 
711 	task_add(systqmp, &task);
712 	cond_wait(&sb.cond, "sbar");
713 }
714 
715 #else
716 
717 void
718 sched_barrier(struct cpu_info *ci)
719 {
720 }
721 
722 #endif
723 
724 /*
725  * Functions to manipulate cpu sets.
726  */
727 struct cpu_info *cpuset_infos[MAXCPUS];
728 static struct cpuset cpuset_all;
729 
730 void
731 cpuset_init_cpu(struct cpu_info *ci)
732 {
733 	cpuset_add(&cpuset_all, ci);
734 	cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
735 }
736 
737 void
738 cpuset_clear(struct cpuset *cs)
739 {
740 	memset(cs, 0, sizeof(*cs));
741 }
742 
743 void
744 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
745 {
746 	unsigned int num = CPU_INFO_UNIT(ci);
747 	atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
748 }
749 
750 void
751 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
752 {
753 	unsigned int num = CPU_INFO_UNIT(ci);
754 	atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
755 }
756 
757 int
758 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
759 {
760 	unsigned int num = CPU_INFO_UNIT(ci);
761 	return (cs->cs_set[num/32] & (1 << (num % 32)));
762 }
763 
764 void
765 cpuset_add_all(struct cpuset *cs)
766 {
767 	cpuset_copy(cs, &cpuset_all);
768 }
769 
770 void
771 cpuset_copy(struct cpuset *to, struct cpuset *from)
772 {
773 	memcpy(to, from, sizeof(*to));
774 }
775 
776 struct cpu_info *
777 cpuset_first(struct cpuset *cs)
778 {
779 	int i;
780 
781 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
782 		if (cs->cs_set[i])
783 			return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
784 
785 	return (NULL);
786 }
787 
788 void
789 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
790 {
791 	int i;
792 
793 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
794 		to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
795 }
796 
797 void
798 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
799 {
800 	int i;
801 
802 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
803 		to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
804 }
805 
806 void
807 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
808 {
809 	int i;
810 
811 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
812 		to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
813 }
814 
815 int
816 cpuset_cardinality(struct cpuset *cs)
817 {
818 	int cardinality, i, n;
819 
820 	cardinality = 0;
821 
822 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
823 		for (n = cs->cs_set[i]; n != 0; n &= n - 1)
824 			cardinality++;
825 
826 	return (cardinality);
827 }
828 
829 int
830 sysctl_hwncpuonline(void)
831 {
832 	return cpuset_cardinality(&sched_all_cpus);
833 }
834 
835 int
836 cpu_is_online(struct cpu_info *ci)
837 {
838 	return cpuset_isset(&sched_all_cpus, ci);
839 }
840 
841 #ifdef __HAVE_CPU_TOPOLOGY
842 
843 #include <sys/sysctl.h>
844 
845 int
846 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
847 {
848 	CPU_INFO_ITERATOR cii;
849 	struct cpu_info *ci;
850 	int err, newsmt;
851 
852 	newsmt = sched_smt;
853 	err = sysctl_int(oldp, oldlenp, newp, newlen, &newsmt);
854 	if (err)
855 		return err;
856 	if (newsmt > 1)
857 		newsmt = 1;
858 	if (newsmt < 0)
859 		newsmt = 0;
860 	if (newsmt == sched_smt)
861 		return 0;
862 
863 	sched_smt = newsmt;
864 	CPU_INFO_FOREACH(cii, ci) {
865 		if (CPU_IS_PRIMARY(ci))
866 			continue;
867 		if (ci->ci_smt_id == 0)
868 			continue;
869 		if (sched_smt)
870 			cpuset_add(&sched_all_cpus, ci);
871 		else
872 			cpuset_del(&sched_all_cpus, ci);
873 	}
874 
875 	return 0;
876 }
877 
878 #endif
879