xref: /openbsd-src/sys/kern/kern_sched.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: kern_sched.c,v 1.66 2020/02/21 11:10:23 claudio Exp $	*/
2 /*
3  * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/resourcevar.h>
25 #include <sys/signalvar.h>
26 #include <sys/mutex.h>
27 #include <sys/task.h>
28 #include <sys/smr.h>
29 #include <sys/tracepoint.h>
30 
31 #include <uvm/uvm_extern.h>
32 
33 void sched_kthreads_create(void *);
34 
35 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
36 struct proc *sched_steal_proc(struct cpu_info *);
37 
38 /*
39  * To help choosing which cpu should run which process we keep track
40  * of cpus which are currently idle and which cpus have processes
41  * queued.
42  */
43 struct cpuset sched_idle_cpus;
44 struct cpuset sched_queued_cpus;
45 struct cpuset sched_all_cpus;
46 
47 /*
48  * Some general scheduler counters.
49  */
50 uint64_t sched_nmigrations;	/* Cpu migration counter */
51 uint64_t sched_nomigrations;	/* Cpu no migration counter */
52 uint64_t sched_noidle;		/* Times we didn't pick the idle task */
53 uint64_t sched_stolen;		/* Times we stole proc from other cpus */
54 uint64_t sched_choose;		/* Times we chose a cpu */
55 uint64_t sched_wasidle;		/* Times we came out of idle */
56 
57 #ifdef MULTIPROCESSOR
58 struct taskq *sbartq;
59 #endif
60 
61 int sched_smt;
62 
63 /*
64  * A few notes about cpu_switchto that is implemented in MD code.
65  *
66  * cpu_switchto takes two arguments, the old proc and the proc
67  * it should switch to. The new proc will never be NULL, so we always have
68  * a saved state that we need to switch to. The old proc however can
69  * be NULL if the process is exiting. NULL for the old proc simply
70  * means "don't bother saving old state".
71  *
72  * cpu_switchto is supposed to atomically load the new state of the process
73  * including the pcb, pmap and setting curproc, the p_cpu pointer in the
74  * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
75  * cpus in the system must not depend on this state being consistent.
76  * Therefore no locking is necessary in cpu_switchto other than blocking
77  * interrupts during the context switch.
78  */
79 
80 /*
81  * sched_init_cpu is called from main() for the boot cpu, then it's the
82  * responsibility of the MD code to call it for all other cpus.
83  */
84 void
85 sched_init_cpu(struct cpu_info *ci)
86 {
87 	struct schedstate_percpu *spc = &ci->ci_schedstate;
88 	int i;
89 
90 	for (i = 0; i < SCHED_NQS; i++)
91 		TAILQ_INIT(&spc->spc_qs[i]);
92 
93 	spc->spc_idleproc = NULL;
94 
95 	kthread_create_deferred(sched_kthreads_create, ci);
96 
97 	LIST_INIT(&spc->spc_deadproc);
98 	SIMPLEQ_INIT(&spc->spc_deferred);
99 
100 	/*
101 	 * Slight hack here until the cpuset code handles cpu_info
102 	 * structures.
103 	 */
104 	cpuset_init_cpu(ci);
105 
106 #ifdef __HAVE_CPU_TOPOLOGY
107 	if (!sched_smt && ci->ci_smt_id > 0)
108 		return;
109 #endif
110 	cpuset_add(&sched_all_cpus, ci);
111 }
112 
113 void
114 sched_kthreads_create(void *v)
115 {
116 	struct cpu_info *ci = v;
117 	struct schedstate_percpu *spc = &ci->ci_schedstate;
118 	static int num;
119 
120 	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
121 	    FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL,
122 	    &spc->spc_idleproc))
123 		panic("fork idle");
124 
125 	/* Name it as specified. */
126 	snprintf(spc->spc_idleproc->p_p->ps_comm,
127 	    sizeof(spc->spc_idleproc->p_p->ps_comm),
128 	    "idle%d", num);
129 
130 	num++;
131 }
132 
133 void
134 sched_idle(void *v)
135 {
136 	struct schedstate_percpu *spc;
137 	struct proc *p = curproc;
138 	struct cpu_info *ci = v;
139 	int s;
140 
141 	KERNEL_UNLOCK();
142 
143 	spc = &ci->ci_schedstate;
144 
145 	/*
146 	 * First time we enter here, we're not supposed to idle,
147 	 * just go away for a while.
148 	 */
149 	SCHED_LOCK(s);
150 	cpuset_add(&sched_idle_cpus, ci);
151 	p->p_stat = SSLEEP;
152 	p->p_cpu = ci;
153 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
154 	mi_switch();
155 	cpuset_del(&sched_idle_cpus, ci);
156 	SCHED_UNLOCK(s);
157 
158 	KASSERT(ci == curcpu());
159 	KASSERT(curproc == spc->spc_idleproc);
160 
161 	while (1) {
162 		while (!cpu_is_idle(curcpu())) {
163 			struct proc *dead;
164 
165 			SCHED_LOCK(s);
166 			p->p_stat = SSLEEP;
167 			mi_switch();
168 			SCHED_UNLOCK(s);
169 
170 			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
171 				LIST_REMOVE(dead, p_hash);
172 				exit2(dead);
173 			}
174 		}
175 
176 		splassert(IPL_NONE);
177 
178 		smr_idle();
179 
180 		cpuset_add(&sched_idle_cpus, ci);
181 		cpu_idle_enter();
182 		while (spc->spc_whichqs == 0) {
183 #ifdef MULTIPROCESSOR
184 			if (spc->spc_schedflags & SPCF_SHOULDHALT &&
185 			    (spc->spc_schedflags & SPCF_HALTED) == 0) {
186 				cpuset_del(&sched_idle_cpus, ci);
187 				SCHED_LOCK(s);
188 				atomic_setbits_int(&spc->spc_schedflags,
189 				    spc->spc_whichqs ? 0 : SPCF_HALTED);
190 				SCHED_UNLOCK(s);
191 				wakeup(spc);
192 			}
193 #endif
194 			cpu_idle_cycle();
195 		}
196 		cpu_idle_leave();
197 		cpuset_del(&sched_idle_cpus, ci);
198 	}
199 }
200 
201 /*
202  * To free our address space we have to jump through a few hoops.
203  * The freeing is done by the reaper, but until we have one reaper
204  * per cpu, we have no way of putting this proc on the deadproc list
205  * and waking up the reaper without risking having our address space and
206  * stack torn from under us before we manage to switch to another proc.
207  * Therefore we have a per-cpu list of dead processes where we put this
208  * proc and have idle clean up that list and move it to the reaper list.
209  * All this will be unnecessary once we can bind the reaper this cpu
210  * and not risk having it switch to another in case it sleeps.
211  */
212 void
213 sched_exit(struct proc *p)
214 {
215 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
216 	struct timespec ts;
217 	struct proc *idle;
218 	int s;
219 
220 	nanouptime(&ts);
221 	timespecsub(&ts, &spc->spc_runtime, &ts);
222 	timespecadd(&p->p_rtime, &ts, &p->p_rtime);
223 
224 	LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
225 
226 #ifdef MULTIPROCESSOR
227 	/* This process no longer needs to hold the kernel lock. */
228 	KERNEL_ASSERT_LOCKED();
229 	__mp_release_all(&kernel_lock);
230 #endif
231 
232 	SCHED_LOCK(s);
233 	idle = spc->spc_idleproc;
234 	idle->p_stat = SRUN;
235 	cpu_switchto(NULL, idle);
236 	panic("cpu_switchto returned");
237 }
238 
239 /*
240  * Run queue management.
241  */
242 void
243 sched_init_runqueues(void)
244 {
245 }
246 
247 void
248 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
249 {
250 	struct schedstate_percpu *spc;
251 	int queue = prio >> 2;
252 
253 	if (ci == NULL)
254 		ci = sched_choosecpu(p);
255 
256 	KASSERT(ci != NULL);
257 	SCHED_ASSERT_LOCKED();
258 
259 	p->p_cpu = ci;
260 	p->p_stat = SRUN;
261 	p->p_runpri = prio;
262 
263 	spc = &p->p_cpu->ci_schedstate;
264 	spc->spc_nrun++;
265 	TRACEPOINT(sched, enqueue, p->p_tid, p->p_p->ps_pid);
266 
267 	TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
268 	spc->spc_whichqs |= (1 << queue);
269 	cpuset_add(&sched_queued_cpus, p->p_cpu);
270 
271 	if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
272 		cpu_unidle(p->p_cpu);
273 
274 	if (prio < spc->spc_curpriority)
275 		need_resched(ci);
276 }
277 
278 void
279 remrunqueue(struct proc *p)
280 {
281 	struct schedstate_percpu *spc;
282 	int queue = p->p_runpri >> 2;
283 
284 	SCHED_ASSERT_LOCKED();
285 	spc = &p->p_cpu->ci_schedstate;
286 	spc->spc_nrun--;
287 	TRACEPOINT(sched, dequeue, p->p_tid, p->p_p->ps_pid);
288 
289 	TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
290 	if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
291 		spc->spc_whichqs &= ~(1 << queue);
292 		if (spc->spc_whichqs == 0)
293 			cpuset_del(&sched_queued_cpus, p->p_cpu);
294 	}
295 }
296 
297 struct proc *
298 sched_chooseproc(void)
299 {
300 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
301 	struct proc *p;
302 	int queue;
303 
304 	SCHED_ASSERT_LOCKED();
305 
306 #ifdef MULTIPROCESSOR
307 	if (spc->spc_schedflags & SPCF_SHOULDHALT) {
308 		if (spc->spc_whichqs) {
309 			for (queue = 0; queue < SCHED_NQS; queue++) {
310 				while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
311 					remrunqueue(p);
312 					setrunqueue(NULL, p, p->p_runpri);
313 					if (p->p_cpu == curcpu()) {
314 						KASSERT(p->p_flag & P_CPUPEG);
315 						goto again;
316 					}
317 				}
318 			}
319 		}
320 		p = spc->spc_idleproc;
321 		KASSERT(p);
322 		KASSERT(p->p_wchan == NULL);
323 		p->p_stat = SRUN;
324 		return (p);
325 	}
326 #endif
327 
328 again:
329 	if (spc->spc_whichqs) {
330 		queue = ffs(spc->spc_whichqs) - 1;
331 		p = TAILQ_FIRST(&spc->spc_qs[queue]);
332 		remrunqueue(p);
333 		sched_noidle++;
334 		if (p->p_stat != SRUN)
335 			panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat);
336 	} else if ((p = sched_steal_proc(curcpu())) == NULL) {
337 		p = spc->spc_idleproc;
338 		if (p == NULL) {
339                         int s;
340 			/*
341 			 * We get here if someone decides to switch during
342 			 * boot before forking kthreads, bleh.
343 			 * This is kind of like a stupid idle loop.
344 			 */
345 #ifdef MULTIPROCESSOR
346 			__mp_unlock(&sched_lock);
347 #endif
348 			spl0();
349 			delay(10);
350 			SCHED_LOCK(s);
351 			goto again;
352                 }
353 		KASSERT(p);
354 		p->p_stat = SRUN;
355 	}
356 
357 	KASSERT(p->p_wchan == NULL);
358 	return (p);
359 }
360 
361 struct cpu_info *
362 sched_choosecpu_fork(struct proc *parent, int flags)
363 {
364 #ifdef MULTIPROCESSOR
365 	struct cpu_info *choice = NULL;
366 	fixpt_t load, best_load = ~0;
367 	int run, best_run = INT_MAX;
368 	struct cpu_info *ci;
369 	struct cpuset set;
370 
371 #if 0
372 	/*
373 	 * XXX
374 	 * Don't do this until we have a painless way to move the cpu in exec.
375 	 * Preferably when nuking the old pmap and getting a new one on a
376 	 * new cpu.
377 	 */
378 	/*
379 	 * PPWAIT forks are simple. We know that the parent will not
380 	 * run until we exec and choose another cpu, so we just steal its
381 	 * cpu.
382 	 */
383 	if (flags & FORK_PPWAIT)
384 		return (parent->p_cpu);
385 #endif
386 
387 	/*
388 	 * Look at all cpus that are currently idle and have nothing queued.
389 	 * If there are none, pick the one with least queued procs first,
390 	 * then the one with lowest load average.
391 	 */
392 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
393 	cpuset_intersection(&set, &set, &sched_all_cpus);
394 	if (cpuset_first(&set) == NULL)
395 		cpuset_copy(&set, &sched_all_cpus);
396 
397 	while ((ci = cpuset_first(&set)) != NULL) {
398 		cpuset_del(&set, ci);
399 
400 		load = ci->ci_schedstate.spc_ldavg;
401 		run = ci->ci_schedstate.spc_nrun;
402 
403 		if (choice == NULL || run < best_run ||
404 		    (run == best_run &&load < best_load)) {
405 			choice = ci;
406 			best_load = load;
407 			best_run = run;
408 		}
409 	}
410 
411 	return (choice);
412 #else
413 	return (curcpu());
414 #endif
415 }
416 
417 struct cpu_info *
418 sched_choosecpu(struct proc *p)
419 {
420 #ifdef MULTIPROCESSOR
421 	struct cpu_info *choice = NULL;
422 	int last_cost = INT_MAX;
423 	struct cpu_info *ci;
424 	struct cpuset set;
425 
426 	/*
427 	 * If pegged to a cpu, don't allow it to move.
428 	 */
429 	if (p->p_flag & P_CPUPEG)
430 		return (p->p_cpu);
431 
432 	sched_choose++;
433 
434 	/*
435 	 * Look at all cpus that are currently idle and have nothing queued.
436 	 * If there are none, pick the cheapest of those.
437 	 * (idle + queued could mean that the cpu is handling an interrupt
438 	 * at this moment and haven't had time to leave idle yet).
439 	 */
440 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
441 	cpuset_intersection(&set, &set, &sched_all_cpus);
442 
443 	/*
444 	 * First, just check if our current cpu is in that set, if it is,
445 	 * this is simple.
446 	 * Also, our cpu might not be idle, but if it's the current cpu
447 	 * and it has nothing else queued and we're curproc, take it.
448 	 */
449 	if (cpuset_isset(&set, p->p_cpu) ||
450 	    (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
451 	    (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
452 	    curproc == p)) {
453 		sched_wasidle++;
454 		return (p->p_cpu);
455 	}
456 
457 	if (cpuset_first(&set) == NULL)
458 		cpuset_copy(&set, &sched_all_cpus);
459 
460 	while ((ci = cpuset_first(&set)) != NULL) {
461 		int cost = sched_proc_to_cpu_cost(ci, p);
462 
463 		if (choice == NULL || cost < last_cost) {
464 			choice = ci;
465 			last_cost = cost;
466 		}
467 		cpuset_del(&set, ci);
468 	}
469 
470 	if (p->p_cpu != choice)
471 		sched_nmigrations++;
472 	else
473 		sched_nomigrations++;
474 
475 	return (choice);
476 #else
477 	return (curcpu());
478 #endif
479 }
480 
481 /*
482  * Attempt to steal a proc from some cpu.
483  */
484 struct proc *
485 sched_steal_proc(struct cpu_info *self)
486 {
487 	struct proc *best = NULL;
488 #ifdef MULTIPROCESSOR
489 	struct schedstate_percpu *spc;
490 	int bestcost = INT_MAX;
491 	struct cpu_info *ci;
492 	struct cpuset set;
493 
494 	KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
495 
496 	/* Don't steal if we don't want to schedule processes in this CPU. */
497 	if (!cpuset_isset(&sched_all_cpus, self))
498 		return (NULL);
499 
500 	cpuset_copy(&set, &sched_queued_cpus);
501 
502 	while ((ci = cpuset_first(&set)) != NULL) {
503 		struct proc *p;
504 		int queue;
505 		int cost;
506 
507 		cpuset_del(&set, ci);
508 
509 		spc = &ci->ci_schedstate;
510 
511 		queue = ffs(spc->spc_whichqs) - 1;
512 		TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
513 			if (p->p_flag & P_CPUPEG)
514 				continue;
515 
516 			cost = sched_proc_to_cpu_cost(self, p);
517 
518 			if (best == NULL || cost < bestcost) {
519 				best = p;
520 				bestcost = cost;
521 			}
522 		}
523 	}
524 	if (best == NULL)
525 		return (NULL);
526 
527 	remrunqueue(best);
528 	best->p_cpu = self;
529 
530 	sched_stolen++;
531 #endif
532 	return (best);
533 }
534 
535 #ifdef MULTIPROCESSOR
536 /*
537  * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
538  */
539 static int
540 log2(unsigned int i)
541 {
542 	int ret = 0;
543 
544 	while (i >>= 1)
545 		ret++;
546 
547 	return (ret);
548 }
549 
550 /*
551  * Calculate the cost of moving the proc to this cpu.
552  *
553  * What we want is some guesstimate of how much "performance" it will
554  * cost us to move the proc here. Not just for caches and TLBs and NUMA
555  * memory, but also for the proc itself. A highly loaded cpu might not
556  * be the best candidate for this proc since it won't get run.
557  *
558  * Just total guesstimates for now.
559  */
560 
561 int sched_cost_load = 1;
562 int sched_cost_priority = 1;
563 int sched_cost_runnable = 3;
564 int sched_cost_resident = 1;
565 #endif
566 
567 int
568 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
569 {
570 	int cost = 0;
571 #ifdef MULTIPROCESSOR
572 	struct schedstate_percpu *spc;
573 	int l2resident = 0;
574 
575 	spc = &ci->ci_schedstate;
576 
577 	/*
578 	 * First, account for the priority of the proc we want to move.
579 	 * More willing to move, the lower the priority of the destination
580 	 * and the higher the priority of the proc.
581 	 */
582 	if (!cpuset_isset(&sched_idle_cpus, ci)) {
583 		cost += (p->p_usrpri - spc->spc_curpriority) *
584 		    sched_cost_priority;
585 		cost += sched_cost_runnable;
586 	}
587 	if (cpuset_isset(&sched_queued_cpus, ci))
588 		cost += spc->spc_nrun * sched_cost_runnable;
589 
590 	/*
591 	 * Try to avoid the primary cpu as it handles hardware interrupts.
592 	 *
593 	 * XXX Needs to be revisited when we distribute interrupts
594 	 * over cpus.
595 	 */
596 	if (CPU_IS_PRIMARY(ci))
597 		cost += sched_cost_runnable;
598 
599 	/*
600 	 * Higher load on the destination means we don't want to go there.
601 	 */
602 	cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
603 
604 	/*
605 	 * If the proc is on this cpu already, lower the cost by how much
606 	 * it has been running and an estimate of its footprint.
607 	 */
608 	if (p->p_cpu == ci && p->p_slptime == 0) {
609 		l2resident =
610 		    log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
611 		cost -= l2resident * sched_cost_resident;
612 	}
613 #endif
614 	return (cost);
615 }
616 
617 /*
618  * Peg a proc to a cpu.
619  */
620 void
621 sched_peg_curproc(struct cpu_info *ci)
622 {
623 	struct proc *p = curproc;
624 	int s;
625 
626 	SCHED_LOCK(s);
627 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
628 	setrunqueue(ci, p, p->p_usrpri);
629 	p->p_ru.ru_nvcsw++;
630 	mi_switch();
631 	SCHED_UNLOCK(s);
632 }
633 
634 #ifdef MULTIPROCESSOR
635 
636 void
637 sched_start_secondary_cpus(void)
638 {
639 	CPU_INFO_ITERATOR cii;
640 	struct cpu_info *ci;
641 
642 	CPU_INFO_FOREACH(cii, ci) {
643 		struct schedstate_percpu *spc = &ci->ci_schedstate;
644 
645 		if (CPU_IS_PRIMARY(ci))
646 			continue;
647 		atomic_clearbits_int(&spc->spc_schedflags,
648 		    SPCF_SHOULDHALT | SPCF_HALTED);
649 #ifdef __HAVE_CPU_TOPOLOGY
650 		if (!sched_smt && ci->ci_smt_id > 0)
651 			continue;
652 #endif
653 		cpuset_add(&sched_all_cpus, ci);
654 	}
655 }
656 
657 void
658 sched_stop_secondary_cpus(void)
659 {
660 	CPU_INFO_ITERATOR cii;
661 	struct cpu_info *ci;
662 
663 	/*
664 	 * Make sure we stop the secondary CPUs.
665 	 */
666 	CPU_INFO_FOREACH(cii, ci) {
667 		struct schedstate_percpu *spc = &ci->ci_schedstate;
668 
669 		if (CPU_IS_PRIMARY(ci))
670 			continue;
671 		cpuset_del(&sched_all_cpus, ci);
672 		atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
673 	}
674 	CPU_INFO_FOREACH(cii, ci) {
675 		struct schedstate_percpu *spc = &ci->ci_schedstate;
676 		struct sleep_state sls;
677 
678 		if (CPU_IS_PRIMARY(ci))
679 			continue;
680 		while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
681 			sleep_setup(&sls, spc, PZERO, "schedstate");
682 			sleep_finish(&sls,
683 			    (spc->spc_schedflags & SPCF_HALTED) == 0);
684 		}
685 	}
686 }
687 
688 struct sched_barrier_state {
689 	struct cpu_info *ci;
690 	struct cond cond;
691 };
692 
693 void
694 sched_barrier_task(void *arg)
695 {
696 	struct sched_barrier_state *sb = arg;
697 	struct cpu_info *ci = sb->ci;
698 
699 	sched_peg_curproc(ci);
700 	cond_signal(&sb->cond);
701 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
702 }
703 
704 void
705 sched_barrier(struct cpu_info *ci)
706 {
707 	struct sched_barrier_state sb;
708 	struct task task;
709 	CPU_INFO_ITERATOR cii;
710 
711 	if (ci == NULL) {
712 		CPU_INFO_FOREACH(cii, ci) {
713 			if (CPU_IS_PRIMARY(ci))
714 				break;
715 		}
716 	}
717 	KASSERT(ci != NULL);
718 
719 	if (ci == curcpu())
720 		return;
721 
722 	sb.ci = ci;
723 	cond_init(&sb.cond);
724 	task_set(&task, sched_barrier_task, &sb);
725 
726 	task_add(systqmp, &task);
727 	cond_wait(&sb.cond, "sbar");
728 }
729 
730 #else
731 
732 void
733 sched_barrier(struct cpu_info *ci)
734 {
735 }
736 
737 #endif
738 
739 /*
740  * Functions to manipulate cpu sets.
741  */
742 struct cpu_info *cpuset_infos[MAXCPUS];
743 static struct cpuset cpuset_all;
744 
745 void
746 cpuset_init_cpu(struct cpu_info *ci)
747 {
748 	cpuset_add(&cpuset_all, ci);
749 	cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
750 }
751 
752 void
753 cpuset_clear(struct cpuset *cs)
754 {
755 	memset(cs, 0, sizeof(*cs));
756 }
757 
758 void
759 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
760 {
761 	unsigned int num = CPU_INFO_UNIT(ci);
762 	atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
763 }
764 
765 void
766 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
767 {
768 	unsigned int num = CPU_INFO_UNIT(ci);
769 	atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
770 }
771 
772 int
773 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
774 {
775 	unsigned int num = CPU_INFO_UNIT(ci);
776 	return (cs->cs_set[num/32] & (1 << (num % 32)));
777 }
778 
779 void
780 cpuset_add_all(struct cpuset *cs)
781 {
782 	cpuset_copy(cs, &cpuset_all);
783 }
784 
785 void
786 cpuset_copy(struct cpuset *to, struct cpuset *from)
787 {
788 	memcpy(to, from, sizeof(*to));
789 }
790 
791 struct cpu_info *
792 cpuset_first(struct cpuset *cs)
793 {
794 	int i;
795 
796 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
797 		if (cs->cs_set[i])
798 			return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
799 
800 	return (NULL);
801 }
802 
803 void
804 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
805 {
806 	int i;
807 
808 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
809 		to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
810 }
811 
812 void
813 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
814 {
815 	int i;
816 
817 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
818 		to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
819 }
820 
821 void
822 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
823 {
824 	int i;
825 
826 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
827 		to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
828 }
829 
830 int
831 cpuset_cardinality(struct cpuset *cs)
832 {
833 	int cardinality, i, n;
834 
835 	cardinality = 0;
836 
837 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
838 		for (n = cs->cs_set[i]; n != 0; n &= n - 1)
839 			cardinality++;
840 
841 	return (cardinality);
842 }
843 
844 int
845 sysctl_hwncpuonline(void)
846 {
847 	return cpuset_cardinality(&sched_all_cpus);
848 }
849 
850 int
851 cpu_is_online(struct cpu_info *ci)
852 {
853 	return cpuset_isset(&sched_all_cpus, ci);
854 }
855 
856 #ifdef __HAVE_CPU_TOPOLOGY
857 
858 #include <sys/sysctl.h>
859 
860 int
861 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
862 {
863 	CPU_INFO_ITERATOR cii;
864 	struct cpu_info *ci;
865 	int err, newsmt;
866 
867 	newsmt = sched_smt;
868 	err = sysctl_int(oldp, oldlenp, newp, newlen, &newsmt);
869 	if (err)
870 		return err;
871 	if (newsmt > 1)
872 		newsmt = 1;
873 	if (newsmt < 0)
874 		newsmt = 0;
875 	if (newsmt == sched_smt)
876 		return 0;
877 
878 	sched_smt = newsmt;
879 	CPU_INFO_FOREACH(cii, ci) {
880 		if (CPU_IS_PRIMARY(ci))
881 			continue;
882 		if (ci->ci_smt_id == 0)
883 			continue;
884 		if (sched_smt)
885 			cpuset_add(&sched_all_cpus, ci);
886 		else
887 			cpuset_del(&sched_all_cpus, ci);
888 	}
889 
890 	return 0;
891 }
892 
893 #endif
894