xref: /openbsd-src/sys/kern/kern_sched.c (revision 2584ca0b0c079044b412124fefd2e9be6e9a2447)
1 /*	$OpenBSD: kern_sched.c,v 1.55 2019/02/26 14:24:21 visa Exp $	*/
2 /*
3  * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/resourcevar.h>
25 #include <sys/signalvar.h>
26 #include <sys/mutex.h>
27 #include <sys/task.h>
28 #include <sys/smr.h>
29 
30 #include <uvm/uvm_extern.h>
31 
32 void sched_kthreads_create(void *);
33 
34 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
35 struct proc *sched_steal_proc(struct cpu_info *);
36 
37 /*
38  * To help choosing which cpu should run which process we keep track
39  * of cpus which are currently idle and which cpus have processes
40  * queued.
41  */
42 struct cpuset sched_idle_cpus;
43 struct cpuset sched_queued_cpus;
44 struct cpuset sched_all_cpus;
45 
46 /*
47  * Some general scheduler counters.
48  */
49 uint64_t sched_nmigrations;	/* Cpu migration counter */
50 uint64_t sched_nomigrations;	/* Cpu no migration counter */
51 uint64_t sched_noidle;		/* Times we didn't pick the idle task */
52 uint64_t sched_stolen;		/* Times we stole proc from other cpus */
53 uint64_t sched_choose;		/* Times we chose a cpu */
54 uint64_t sched_wasidle;		/* Times we came out of idle */
55 
56 #ifdef MULTIPROCESSOR
57 struct taskq *sbartq;
58 #endif
59 
60 int sched_smt;
61 
62 /*
63  * A few notes about cpu_switchto that is implemented in MD code.
64  *
65  * cpu_switchto takes two arguments, the old proc and the proc
66  * it should switch to. The new proc will never be NULL, so we always have
67  * a saved state that we need to switch to. The old proc however can
68  * be NULL if the process is exiting. NULL for the old proc simply
69  * means "don't bother saving old state".
70  *
71  * cpu_switchto is supposed to atomically load the new state of the process
72  * including the pcb, pmap and setting curproc, the p_cpu pointer in the
73  * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
74  * cpus in the system must not depend on this state being consistent.
75  * Therefore no locking is necessary in cpu_switchto other than blocking
76  * interrupts during the context switch.
77  */
78 
79 /*
80  * sched_init_cpu is called from main() for the boot cpu, then it's the
81  * responsibility of the MD code to call it for all other cpus.
82  */
83 void
84 sched_init_cpu(struct cpu_info *ci)
85 {
86 	struct schedstate_percpu *spc = &ci->ci_schedstate;
87 	int i;
88 
89 	for (i = 0; i < SCHED_NQS; i++)
90 		TAILQ_INIT(&spc->spc_qs[i]);
91 
92 	spc->spc_idleproc = NULL;
93 
94 	kthread_create_deferred(sched_kthreads_create, ci);
95 
96 	LIST_INIT(&spc->spc_deadproc);
97 
98 	/*
99 	 * Slight hack here until the cpuset code handles cpu_info
100 	 * structures.
101 	 */
102 	cpuset_init_cpu(ci);
103 
104 #ifdef __HAVE_CPU_TOPOLOGY
105 	if (!sched_smt && ci->ci_smt_id > 0)
106 		return;
107 #endif
108 	cpuset_add(&sched_all_cpus, ci);
109 
110 	SIMPLEQ_INIT(&spc->spc_deferred);
111 }
112 
113 void
114 sched_kthreads_create(void *v)
115 {
116 	struct cpu_info *ci = v;
117 	struct schedstate_percpu *spc = &ci->ci_schedstate;
118 	static int num;
119 
120 	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
121 	    FORK_SYSTEM|FORK_SIGHAND|FORK_IDLE, sched_idle, ci, NULL,
122 	    &spc->spc_idleproc))
123 		panic("fork idle");
124 
125 	/* Name it as specified. */
126 	snprintf(spc->spc_idleproc->p_p->ps_comm,
127 	    sizeof(spc->spc_idleproc->p_p->ps_comm),
128 	    "idle%d", num);
129 
130 	num++;
131 }
132 
133 void
134 sched_idle(void *v)
135 {
136 	struct schedstate_percpu *spc;
137 	struct proc *p = curproc;
138 	struct cpu_info *ci = v;
139 	int s;
140 
141 	KERNEL_UNLOCK();
142 
143 	spc = &ci->ci_schedstate;
144 
145 	/*
146 	 * First time we enter here, we're not supposed to idle,
147 	 * just go away for a while.
148 	 */
149 	SCHED_LOCK(s);
150 	cpuset_add(&sched_idle_cpus, ci);
151 	p->p_stat = SSLEEP;
152 	p->p_cpu = ci;
153 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
154 	mi_switch();
155 	cpuset_del(&sched_idle_cpus, ci);
156 	SCHED_UNLOCK(s);
157 
158 	KASSERT(ci == curcpu());
159 	KASSERT(curproc == spc->spc_idleproc);
160 
161 	while (1) {
162 		while (!cpu_is_idle(curcpu())) {
163 			struct proc *dead;
164 
165 			SCHED_LOCK(s);
166 			p->p_stat = SSLEEP;
167 			mi_switch();
168 			SCHED_UNLOCK(s);
169 
170 			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
171 				LIST_REMOVE(dead, p_hash);
172 				exit2(dead);
173 			}
174 		}
175 
176 		splassert(IPL_NONE);
177 
178 		smr_idle();
179 
180 		cpuset_add(&sched_idle_cpus, ci);
181 		cpu_idle_enter();
182 		while (spc->spc_whichqs == 0) {
183 #ifdef MULTIPROCESSOR
184 			if (spc->spc_schedflags & SPCF_SHOULDHALT &&
185 			    (spc->spc_schedflags & SPCF_HALTED) == 0) {
186 				cpuset_del(&sched_idle_cpus, ci);
187 				SCHED_LOCK(s);
188 				atomic_setbits_int(&spc->spc_schedflags,
189 				    spc->spc_whichqs ? 0 : SPCF_HALTED);
190 				SCHED_UNLOCK(s);
191 				wakeup(spc);
192 			}
193 #endif
194 			cpu_idle_cycle();
195 		}
196 		cpu_idle_leave();
197 		cpuset_del(&sched_idle_cpus, ci);
198 	}
199 }
200 
201 /*
202  * To free our address space we have to jump through a few hoops.
203  * The freeing is done by the reaper, but until we have one reaper
204  * per cpu, we have no way of putting this proc on the deadproc list
205  * and waking up the reaper without risking having our address space and
206  * stack torn from under us before we manage to switch to another proc.
207  * Therefore we have a per-cpu list of dead processes where we put this
208  * proc and have idle clean up that list and move it to the reaper list.
209  * All this will be unnecessary once we can bind the reaper this cpu
210  * and not risk having it switch to another in case it sleeps.
211  */
212 void
213 sched_exit(struct proc *p)
214 {
215 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
216 	struct timespec ts;
217 	struct proc *idle;
218 	int s;
219 
220 	nanouptime(&ts);
221 	timespecsub(&ts, &spc->spc_runtime, &ts);
222 	timespecadd(&p->p_rtime, &ts, &p->p_rtime);
223 
224 	LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
225 
226 #ifdef MULTIPROCESSOR
227 	/* This process no longer needs to hold the kernel lock. */
228 	KERNEL_ASSERT_LOCKED();
229 	__mp_release_all(&kernel_lock);
230 #endif
231 
232 	SCHED_LOCK(s);
233 	idle = spc->spc_idleproc;
234 	idle->p_stat = SRUN;
235 	cpu_switchto(NULL, idle);
236 	panic("cpu_switchto returned");
237 }
238 
239 /*
240  * Run queue management.
241  */
242 void
243 sched_init_runqueues(void)
244 {
245 }
246 
247 void
248 setrunqueue(struct proc *p)
249 {
250 	struct schedstate_percpu *spc;
251 	int queue = p->p_priority >> 2;
252 
253 	SCHED_ASSERT_LOCKED();
254 	spc = &p->p_cpu->ci_schedstate;
255 	spc->spc_nrun++;
256 
257 	TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
258 	spc->spc_whichqs |= (1 << queue);
259 	cpuset_add(&sched_queued_cpus, p->p_cpu);
260 
261 	if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
262 		cpu_unidle(p->p_cpu);
263 }
264 
265 void
266 remrunqueue(struct proc *p)
267 {
268 	struct schedstate_percpu *spc;
269 	int queue = p->p_priority >> 2;
270 
271 	SCHED_ASSERT_LOCKED();
272 	spc = &p->p_cpu->ci_schedstate;
273 	spc->spc_nrun--;
274 
275 	TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
276 	if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
277 		spc->spc_whichqs &= ~(1 << queue);
278 		if (spc->spc_whichqs == 0)
279 			cpuset_del(&sched_queued_cpus, p->p_cpu);
280 	}
281 }
282 
283 struct proc *
284 sched_chooseproc(void)
285 {
286 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
287 	struct proc *p;
288 	int queue;
289 
290 	SCHED_ASSERT_LOCKED();
291 
292 #ifdef MULTIPROCESSOR
293 	if (spc->spc_schedflags & SPCF_SHOULDHALT) {
294 		if (spc->spc_whichqs) {
295 			for (queue = 0; queue < SCHED_NQS; queue++) {
296 				while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
297 					remrunqueue(p);
298 					p->p_cpu = sched_choosecpu(p);
299 					setrunqueue(p);
300 					if (p->p_cpu == curcpu()) {
301 						KASSERT(p->p_flag & P_CPUPEG);
302 						goto again;
303 					}
304 				}
305 			}
306 		}
307 		p = spc->spc_idleproc;
308 		KASSERT(p);
309 		KASSERT(p->p_wchan == NULL);
310 		p->p_stat = SRUN;
311 		return (p);
312 	}
313 #endif
314 
315 again:
316 	if (spc->spc_whichqs) {
317 		queue = ffs(spc->spc_whichqs) - 1;
318 		p = TAILQ_FIRST(&spc->spc_qs[queue]);
319 		remrunqueue(p);
320 		sched_noidle++;
321 		KASSERT(p->p_stat == SRUN);
322 	} else if ((p = sched_steal_proc(curcpu())) == NULL) {
323 		p = spc->spc_idleproc;
324 		if (p == NULL) {
325                         int s;
326 			/*
327 			 * We get here if someone decides to switch during
328 			 * boot before forking kthreads, bleh.
329 			 * This is kind of like a stupid idle loop.
330 			 */
331 #ifdef MULTIPROCESSOR
332 			__mp_unlock(&sched_lock);
333 #endif
334 			spl0();
335 			delay(10);
336 			SCHED_LOCK(s);
337 			goto again;
338                 }
339 		KASSERT(p);
340 		p->p_stat = SRUN;
341 	}
342 
343 	KASSERT(p->p_wchan == NULL);
344 	return (p);
345 }
346 
347 struct cpu_info *
348 sched_choosecpu_fork(struct proc *parent, int flags)
349 {
350 #ifdef MULTIPROCESSOR
351 	struct cpu_info *choice = NULL;
352 	fixpt_t load, best_load = ~0;
353 	int run, best_run = INT_MAX;
354 	struct cpu_info *ci;
355 	struct cpuset set;
356 
357 #if 0
358 	/*
359 	 * XXX
360 	 * Don't do this until we have a painless way to move the cpu in exec.
361 	 * Preferably when nuking the old pmap and getting a new one on a
362 	 * new cpu.
363 	 */
364 	/*
365 	 * PPWAIT forks are simple. We know that the parent will not
366 	 * run until we exec and choose another cpu, so we just steal its
367 	 * cpu.
368 	 */
369 	if (flags & FORK_PPWAIT)
370 		return (parent->p_cpu);
371 #endif
372 
373 	/*
374 	 * Look at all cpus that are currently idle and have nothing queued.
375 	 * If there are none, pick the one with least queued procs first,
376 	 * then the one with lowest load average.
377 	 */
378 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
379 	cpuset_intersection(&set, &set, &sched_all_cpus);
380 	if (cpuset_first(&set) == NULL)
381 		cpuset_copy(&set, &sched_all_cpus);
382 
383 	while ((ci = cpuset_first(&set)) != NULL) {
384 		cpuset_del(&set, ci);
385 
386 		load = ci->ci_schedstate.spc_ldavg;
387 		run = ci->ci_schedstate.spc_nrun;
388 
389 		if (choice == NULL || run < best_run ||
390 		    (run == best_run &&load < best_load)) {
391 			choice = ci;
392 			best_load = load;
393 			best_run = run;
394 		}
395 	}
396 
397 	return (choice);
398 #else
399 	return (curcpu());
400 #endif
401 }
402 
403 struct cpu_info *
404 sched_choosecpu(struct proc *p)
405 {
406 #ifdef MULTIPROCESSOR
407 	struct cpu_info *choice = NULL;
408 	int last_cost = INT_MAX;
409 	struct cpu_info *ci;
410 	struct cpuset set;
411 
412 	/*
413 	 * If pegged to a cpu, don't allow it to move.
414 	 */
415 	if (p->p_flag & P_CPUPEG)
416 		return (p->p_cpu);
417 
418 	sched_choose++;
419 
420 	/*
421 	 * Look at all cpus that are currently idle and have nothing queued.
422 	 * If there are none, pick the cheapest of those.
423 	 * (idle + queued could mean that the cpu is handling an interrupt
424 	 * at this moment and haven't had time to leave idle yet).
425 	 */
426 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
427 	cpuset_intersection(&set, &set, &sched_all_cpus);
428 
429 	/*
430 	 * First, just check if our current cpu is in that set, if it is,
431 	 * this is simple.
432 	 * Also, our cpu might not be idle, but if it's the current cpu
433 	 * and it has nothing else queued and we're curproc, take it.
434 	 */
435 	if (cpuset_isset(&set, p->p_cpu) ||
436 	    (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
437 	    (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
438 	    curproc == p)) {
439 		sched_wasidle++;
440 		return (p->p_cpu);
441 	}
442 
443 	if (cpuset_first(&set) == NULL)
444 		cpuset_copy(&set, &sched_all_cpus);
445 
446 	while ((ci = cpuset_first(&set)) != NULL) {
447 		int cost = sched_proc_to_cpu_cost(ci, p);
448 
449 		if (choice == NULL || cost < last_cost) {
450 			choice = ci;
451 			last_cost = cost;
452 		}
453 		cpuset_del(&set, ci);
454 	}
455 
456 	if (p->p_cpu != choice)
457 		sched_nmigrations++;
458 	else
459 		sched_nomigrations++;
460 
461 	return (choice);
462 #else
463 	return (curcpu());
464 #endif
465 }
466 
467 /*
468  * Attempt to steal a proc from some cpu.
469  */
470 struct proc *
471 sched_steal_proc(struct cpu_info *self)
472 {
473 	struct proc *best = NULL;
474 #ifdef MULTIPROCESSOR
475 	struct schedstate_percpu *spc;
476 	int bestcost = INT_MAX;
477 	struct cpu_info *ci;
478 	struct cpuset set;
479 
480 	KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
481 
482 	/* Don't steal if we don't want to schedule processes in this CPU. */
483 	if (!cpuset_isset(&sched_all_cpus, self))
484 		return (NULL);
485 
486 	cpuset_copy(&set, &sched_queued_cpus);
487 
488 	while ((ci = cpuset_first(&set)) != NULL) {
489 		struct proc *p;
490 		int queue;
491 		int cost;
492 
493 		cpuset_del(&set, ci);
494 
495 		spc = &ci->ci_schedstate;
496 
497 		queue = ffs(spc->spc_whichqs) - 1;
498 		TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
499 			if (p->p_flag & P_CPUPEG)
500 				continue;
501 
502 			cost = sched_proc_to_cpu_cost(self, p);
503 
504 			if (best == NULL || cost < bestcost) {
505 				best = p;
506 				bestcost = cost;
507 			}
508 		}
509 	}
510 	if (best == NULL)
511 		return (NULL);
512 
513 	spc = &best->p_cpu->ci_schedstate;
514 	remrunqueue(best);
515 	best->p_cpu = self;
516 
517 	sched_stolen++;
518 #endif
519 	return (best);
520 }
521 
522 #ifdef MULTIPROCESSOR
523 /*
524  * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
525  */
526 static int
527 log2(unsigned int i)
528 {
529 	int ret = 0;
530 
531 	while (i >>= 1)
532 		ret++;
533 
534 	return (ret);
535 }
536 
537 /*
538  * Calculate the cost of moving the proc to this cpu.
539  *
540  * What we want is some guesstimate of how much "performance" it will
541  * cost us to move the proc here. Not just for caches and TLBs and NUMA
542  * memory, but also for the proc itself. A highly loaded cpu might not
543  * be the best candidate for this proc since it won't get run.
544  *
545  * Just total guesstimates for now.
546  */
547 
548 int sched_cost_load = 1;
549 int sched_cost_priority = 1;
550 int sched_cost_runnable = 3;
551 int sched_cost_resident = 1;
552 #endif
553 
554 int
555 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
556 {
557 	int cost = 0;
558 #ifdef MULTIPROCESSOR
559 	struct schedstate_percpu *spc;
560 	int l2resident = 0;
561 
562 	spc = &ci->ci_schedstate;
563 
564 	/*
565 	 * First, account for the priority of the proc we want to move.
566 	 * More willing to move, the lower the priority of the destination
567 	 * and the higher the priority of the proc.
568 	 */
569 	if (!cpuset_isset(&sched_idle_cpus, ci)) {
570 		cost += (p->p_priority - spc->spc_curpriority) *
571 		    sched_cost_priority;
572 		cost += sched_cost_runnable;
573 	}
574 	if (cpuset_isset(&sched_queued_cpus, ci))
575 		cost += spc->spc_nrun * sched_cost_runnable;
576 
577 	/*
578 	 * Try to avoid the primary cpu as it handles hardware interrupts.
579 	 *
580 	 * XXX Needs to be revisited when we distribute interrupts
581 	 * over cpus.
582 	 */
583 	if (CPU_IS_PRIMARY(ci))
584 		cost += sched_cost_runnable;
585 
586 	/*
587 	 * Higher load on the destination means we don't want to go there.
588 	 */
589 	cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
590 
591 	/*
592 	 * If the proc is on this cpu already, lower the cost by how much
593 	 * it has been running and an estimate of its footprint.
594 	 */
595 	if (p->p_cpu == ci && p->p_slptime == 0) {
596 		l2resident =
597 		    log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
598 		cost -= l2resident * sched_cost_resident;
599 	}
600 #endif
601 	return (cost);
602 }
603 
604 /*
605  * Peg a proc to a cpu.
606  */
607 void
608 sched_peg_curproc(struct cpu_info *ci)
609 {
610 	struct proc *p = curproc;
611 	int s;
612 
613 	SCHED_LOCK(s);
614 	p->p_priority = p->p_usrpri;
615 	p->p_stat = SRUN;
616 	p->p_cpu = ci;
617 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
618 	setrunqueue(p);
619 	p->p_ru.ru_nvcsw++;
620 	mi_switch();
621 	SCHED_UNLOCK(s);
622 }
623 
624 #ifdef MULTIPROCESSOR
625 
626 void
627 sched_start_secondary_cpus(void)
628 {
629 	CPU_INFO_ITERATOR cii;
630 	struct cpu_info *ci;
631 
632 	CPU_INFO_FOREACH(cii, ci) {
633 		struct schedstate_percpu *spc = &ci->ci_schedstate;
634 
635 		if (CPU_IS_PRIMARY(ci))
636 			continue;
637 		atomic_clearbits_int(&spc->spc_schedflags,
638 		    SPCF_SHOULDHALT | SPCF_HALTED);
639 #ifdef __HAVE_CPU_TOPOLOGY
640 		if (!sched_smt && ci->ci_smt_id > 0)
641 			continue;
642 #endif
643 		cpuset_add(&sched_all_cpus, ci);
644 	}
645 }
646 
647 void
648 sched_stop_secondary_cpus(void)
649 {
650 	CPU_INFO_ITERATOR cii;
651 	struct cpu_info *ci;
652 
653 	/*
654 	 * Make sure we stop the secondary CPUs.
655 	 */
656 	CPU_INFO_FOREACH(cii, ci) {
657 		struct schedstate_percpu *spc = &ci->ci_schedstate;
658 
659 		if (CPU_IS_PRIMARY(ci))
660 			continue;
661 		cpuset_del(&sched_all_cpus, ci);
662 		atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
663 	}
664 	CPU_INFO_FOREACH(cii, ci) {
665 		struct schedstate_percpu *spc = &ci->ci_schedstate;
666 		struct sleep_state sls;
667 
668 		if (CPU_IS_PRIMARY(ci))
669 			continue;
670 		while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
671 			sleep_setup(&sls, spc, PZERO, "schedstate");
672 			sleep_finish(&sls,
673 			    (spc->spc_schedflags & SPCF_HALTED) == 0);
674 		}
675 	}
676 }
677 
678 struct sched_barrier_state {
679 	struct cpu_info *ci;
680 	struct cond cond;
681 };
682 
683 void
684 sched_barrier_task(void *arg)
685 {
686 	struct sched_barrier_state *sb = arg;
687 	struct cpu_info *ci = sb->ci;
688 
689 	sched_peg_curproc(ci);
690 	cond_signal(&sb->cond);
691 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
692 }
693 
694 void
695 sched_barrier(struct cpu_info *ci)
696 {
697 	struct sched_barrier_state sb;
698 	struct task task;
699 	CPU_INFO_ITERATOR cii;
700 
701 	if (ci == NULL) {
702 		CPU_INFO_FOREACH(cii, ci) {
703 			if (CPU_IS_PRIMARY(ci))
704 				break;
705 		}
706 	}
707 	KASSERT(ci != NULL);
708 
709 	if (ci == curcpu())
710 		return;
711 
712 	sb.ci = ci;
713 	cond_init(&sb.cond);
714 	task_set(&task, sched_barrier_task, &sb);
715 
716 	task_add(systqmp, &task);
717 	cond_wait(&sb.cond, "sbar");
718 }
719 
720 #else
721 
722 void
723 sched_barrier(struct cpu_info *ci)
724 {
725 }
726 
727 #endif
728 
729 /*
730  * Functions to manipulate cpu sets.
731  */
732 struct cpu_info *cpuset_infos[MAXCPUS];
733 static struct cpuset cpuset_all;
734 
735 void
736 cpuset_init_cpu(struct cpu_info *ci)
737 {
738 	cpuset_add(&cpuset_all, ci);
739 	cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
740 }
741 
742 void
743 cpuset_clear(struct cpuset *cs)
744 {
745 	memset(cs, 0, sizeof(*cs));
746 }
747 
748 void
749 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
750 {
751 	unsigned int num = CPU_INFO_UNIT(ci);
752 	atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
753 }
754 
755 void
756 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
757 {
758 	unsigned int num = CPU_INFO_UNIT(ci);
759 	atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
760 }
761 
762 int
763 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
764 {
765 	unsigned int num = CPU_INFO_UNIT(ci);
766 	return (cs->cs_set[num/32] & (1 << (num % 32)));
767 }
768 
769 void
770 cpuset_add_all(struct cpuset *cs)
771 {
772 	cpuset_copy(cs, &cpuset_all);
773 }
774 
775 void
776 cpuset_copy(struct cpuset *to, struct cpuset *from)
777 {
778 	memcpy(to, from, sizeof(*to));
779 }
780 
781 struct cpu_info *
782 cpuset_first(struct cpuset *cs)
783 {
784 	int i;
785 
786 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
787 		if (cs->cs_set[i])
788 			return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
789 
790 	return (NULL);
791 }
792 
793 void
794 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
795 {
796 	int i;
797 
798 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
799 		to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
800 }
801 
802 void
803 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
804 {
805 	int i;
806 
807 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
808 		to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
809 }
810 
811 void
812 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
813 {
814 	int i;
815 
816 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
817 		to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
818 }
819 
820 int
821 cpuset_cardinality(struct cpuset *cs)
822 {
823 	int cardinality, i, n;
824 
825 	cardinality = 0;
826 
827 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
828 		for (n = cs->cs_set[i]; n != 0; n &= n - 1)
829 			cardinality++;
830 
831 	return (cardinality);
832 }
833 
834 int
835 sysctl_hwncpuonline(void)
836 {
837 	return cpuset_cardinality(&sched_all_cpus);
838 }
839 
840 int
841 cpu_is_online(struct cpu_info *ci)
842 {
843 	return cpuset_isset(&sched_all_cpus, ci);
844 }
845 
846 #ifdef __HAVE_CPU_TOPOLOGY
847 
848 #include <sys/sysctl.h>
849 
850 int
851 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
852 {
853 	CPU_INFO_ITERATOR cii;
854 	struct cpu_info *ci;
855 	int err, newsmt;
856 
857 	newsmt = sched_smt;
858 	err = sysctl_int(oldp, oldlenp, newp, newlen, &newsmt);
859 	if (err)
860 		return err;
861 	if (newsmt > 1)
862 		newsmt = 1;
863 	if (newsmt < 0)
864 		newsmt = 0;
865 	if (newsmt == sched_smt)
866 		return 0;
867 
868 	sched_smt = newsmt;
869 	CPU_INFO_FOREACH(cii, ci) {
870 		if (CPU_IS_PRIMARY(ci))
871 			continue;
872 		if (ci->ci_smt_id == 0)
873 			continue;
874 		if (sched_smt)
875 			cpuset_add(&sched_all_cpus, ci);
876 		else
877 			cpuset_del(&sched_all_cpus, ci);
878 	}
879 
880 	return 0;
881 }
882 
883 #endif
884