xref: /openbsd-src/sys/kern/kern_sched.c (revision 0b5493cb0c742202428c9cd6ec8d6e309804d203)
1 /*	$OpenBSD: kern_sched.c,v 1.78 2023/07/11 07:02:43 claudio Exp $	*/
2 /*
3  * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/task.h>
25 #include <sys/smr.h>
26 #include <sys/tracepoint.h>
27 
28 #include <uvm/uvm_extern.h>
29 
30 void sched_kthreads_create(void *);
31 
32 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
33 struct proc *sched_steal_proc(struct cpu_info *);
34 
35 /*
36  * To help choosing which cpu should run which process we keep track
37  * of cpus which are currently idle and which cpus have processes
38  * queued.
39  */
40 struct cpuset sched_idle_cpus;
41 struct cpuset sched_queued_cpus;
42 struct cpuset sched_all_cpus;
43 
44 /*
45  * Some general scheduler counters.
46  */
47 uint64_t sched_nmigrations;	/* Cpu migration counter */
48 uint64_t sched_nomigrations;	/* Cpu no migration counter */
49 uint64_t sched_noidle;		/* Times we didn't pick the idle task */
50 uint64_t sched_stolen;		/* Times we stole proc from other cpus */
51 uint64_t sched_choose;		/* Times we chose a cpu */
52 uint64_t sched_wasidle;		/* Times we came out of idle */
53 
54 int sched_smt;
55 
56 /*
57  * A few notes about cpu_switchto that is implemented in MD code.
58  *
59  * cpu_switchto takes two arguments, the old proc and the proc
60  * it should switch to. The new proc will never be NULL, so we always have
61  * a saved state that we need to switch to. The old proc however can
62  * be NULL if the process is exiting. NULL for the old proc simply
63  * means "don't bother saving old state".
64  *
65  * cpu_switchto is supposed to atomically load the new state of the process
66  * including the pcb, pmap and setting curproc, the p_cpu pointer in the
67  * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
68  * cpus in the system must not depend on this state being consistent.
69  * Therefore no locking is necessary in cpu_switchto other than blocking
70  * interrupts during the context switch.
71  */
72 
73 /*
74  * sched_init_cpu is called from main() for the boot cpu, then it's the
75  * responsibility of the MD code to call it for all other cpus.
76  */
77 void
78 sched_init_cpu(struct cpu_info *ci)
79 {
80 	struct schedstate_percpu *spc = &ci->ci_schedstate;
81 	int i;
82 
83 	for (i = 0; i < SCHED_NQS; i++)
84 		TAILQ_INIT(&spc->spc_qs[i]);
85 
86 	spc->spc_idleproc = NULL;
87 
88 	kthread_create_deferred(sched_kthreads_create, ci);
89 
90 	LIST_INIT(&spc->spc_deadproc);
91 	SIMPLEQ_INIT(&spc->spc_deferred);
92 
93 	/*
94 	 * Slight hack here until the cpuset code handles cpu_info
95 	 * structures.
96 	 */
97 	cpuset_init_cpu(ci);
98 
99 #ifdef __HAVE_CPU_TOPOLOGY
100 	if (!sched_smt && ci->ci_smt_id > 0)
101 		return;
102 #endif
103 	cpuset_add(&sched_all_cpus, ci);
104 }
105 
106 void
107 sched_kthreads_create(void *v)
108 {
109 	struct cpu_info *ci = v;
110 	struct schedstate_percpu *spc = &ci->ci_schedstate;
111 	static int num;
112 
113 	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
114 	    FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL,
115 	    &spc->spc_idleproc))
116 		panic("fork idle");
117 
118 	/* Name it as specified. */
119 	snprintf(spc->spc_idleproc->p_p->ps_comm,
120 	    sizeof(spc->spc_idleproc->p_p->ps_comm),
121 	    "idle%d", num);
122 
123 	num++;
124 }
125 
126 void
127 sched_idle(void *v)
128 {
129 	struct schedstate_percpu *spc;
130 	struct proc *p = curproc;
131 	struct cpu_info *ci = v;
132 	int s;
133 
134 	KERNEL_UNLOCK();
135 
136 	spc = &ci->ci_schedstate;
137 
138 	/*
139 	 * First time we enter here, we're not supposed to idle,
140 	 * just go away for a while.
141 	 */
142 	SCHED_LOCK(s);
143 	cpuset_add(&sched_idle_cpus, ci);
144 	p->p_stat = SSLEEP;
145 	p->p_cpu = ci;
146 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
147 	mi_switch();
148 	cpuset_del(&sched_idle_cpus, ci);
149 	SCHED_UNLOCK(s);
150 
151 	KASSERT(ci == curcpu());
152 	KASSERT(curproc == spc->spc_idleproc);
153 
154 	while (1) {
155 		while (!cpu_is_idle(curcpu())) {
156 			struct proc *dead;
157 
158 			SCHED_LOCK(s);
159 			p->p_stat = SSLEEP;
160 			mi_switch();
161 			SCHED_UNLOCK(s);
162 
163 			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
164 				LIST_REMOVE(dead, p_hash);
165 				exit2(dead);
166 			}
167 		}
168 
169 		splassert(IPL_NONE);
170 
171 		smr_idle();
172 
173 		cpuset_add(&sched_idle_cpus, ci);
174 		cpu_idle_enter();
175 		while (spc->spc_whichqs == 0) {
176 #ifdef MULTIPROCESSOR
177 			if (spc->spc_schedflags & SPCF_SHOULDHALT &&
178 			    (spc->spc_schedflags & SPCF_HALTED) == 0) {
179 				cpuset_del(&sched_idle_cpus, ci);
180 				SCHED_LOCK(s);
181 				atomic_setbits_int(&spc->spc_schedflags,
182 				    spc->spc_whichqs ? 0 : SPCF_HALTED);
183 				SCHED_UNLOCK(s);
184 				wakeup(spc);
185 			}
186 #endif
187 			cpu_idle_cycle();
188 		}
189 		cpu_idle_leave();
190 		cpuset_del(&sched_idle_cpus, ci);
191 	}
192 }
193 
194 /*
195  * To free our address space we have to jump through a few hoops.
196  * The freeing is done by the reaper, but until we have one reaper
197  * per cpu, we have no way of putting this proc on the deadproc list
198  * and waking up the reaper without risking having our address space and
199  * stack torn from under us before we manage to switch to another proc.
200  * Therefore we have a per-cpu list of dead processes where we put this
201  * proc and have idle clean up that list and move it to the reaper list.
202  * All this will be unnecessary once we can bind the reaper this cpu
203  * and not risk having it switch to another in case it sleeps.
204  */
205 void
206 sched_exit(struct proc *p)
207 {
208 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
209 	struct timespec ts;
210 	struct proc *idle;
211 	int s;
212 
213 	nanouptime(&ts);
214 	timespecsub(&ts, &spc->spc_runtime, &ts);
215 	timespecadd(&p->p_rtime, &ts, &p->p_rtime);
216 
217 	LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
218 
219 #ifdef MULTIPROCESSOR
220 	/* This process no longer needs to hold the kernel lock. */
221 	KERNEL_ASSERT_LOCKED();
222 	__mp_release_all(&kernel_lock);
223 #endif
224 
225 	SCHED_LOCK(s);
226 	idle = spc->spc_idleproc;
227 	idle->p_stat = SRUN;
228 	cpu_switchto(NULL, idle);
229 	panic("cpu_switchto returned");
230 }
231 
232 /*
233  * Run queue management.
234  */
235 void
236 sched_init_runqueues(void)
237 {
238 }
239 
240 void
241 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
242 {
243 	struct schedstate_percpu *spc;
244 	int queue = prio >> 2;
245 
246 	if (ci == NULL)
247 		ci = sched_choosecpu(p);
248 
249 	KASSERT(ci != NULL);
250 	SCHED_ASSERT_LOCKED();
251 	KASSERT(!ISSET(p->p_flag, P_WSLEEP) || p->p_stat == SSTOP);
252 
253 	p->p_cpu = ci;
254 	p->p_stat = SRUN;
255 	p->p_runpri = prio;
256 
257 	spc = &p->p_cpu->ci_schedstate;
258 	spc->spc_nrun++;
259 	TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET,
260 	    p->p_p->ps_pid);
261 
262 	TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
263 	spc->spc_whichqs |= (1U << queue);
264 	cpuset_add(&sched_queued_cpus, p->p_cpu);
265 
266 	if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
267 		cpu_unidle(p->p_cpu);
268 
269 	if (prio < spc->spc_curpriority)
270 		need_resched(ci);
271 }
272 
273 void
274 remrunqueue(struct proc *p)
275 {
276 	struct schedstate_percpu *spc;
277 	int queue = p->p_runpri >> 2;
278 
279 	SCHED_ASSERT_LOCKED();
280 	spc = &p->p_cpu->ci_schedstate;
281 	spc->spc_nrun--;
282 	TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET,
283 	    p->p_p->ps_pid);
284 
285 	TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
286 	if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
287 		spc->spc_whichqs &= ~(1U << queue);
288 		if (spc->spc_whichqs == 0)
289 			cpuset_del(&sched_queued_cpus, p->p_cpu);
290 	}
291 }
292 
293 struct proc *
294 sched_chooseproc(void)
295 {
296 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
297 	struct proc *p;
298 	int queue;
299 
300 	SCHED_ASSERT_LOCKED();
301 
302 #ifdef MULTIPROCESSOR
303 	if (spc->spc_schedflags & SPCF_SHOULDHALT) {
304 		if (spc->spc_whichqs) {
305 			for (queue = 0; queue < SCHED_NQS; queue++) {
306 				while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
307 					remrunqueue(p);
308 					setrunqueue(NULL, p, p->p_runpri);
309 					if (p->p_cpu == curcpu()) {
310 						KASSERT(p->p_flag & P_CPUPEG);
311 						goto again;
312 					}
313 				}
314 			}
315 		}
316 		p = spc->spc_idleproc;
317 		KASSERT(p);
318 		KASSERT(p->p_wchan == NULL);
319 		p->p_stat = SRUN;
320 		return (p);
321 	}
322 #endif
323 
324 again:
325 	if (spc->spc_whichqs) {
326 		queue = ffs(spc->spc_whichqs) - 1;
327 		p = TAILQ_FIRST(&spc->spc_qs[queue]);
328 		remrunqueue(p);
329 		sched_noidle++;
330 		if (p->p_stat != SRUN)
331 			panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat);
332 	} else if ((p = sched_steal_proc(curcpu())) == NULL) {
333 		p = spc->spc_idleproc;
334 		if (p == NULL) {
335                         int s;
336 			/*
337 			 * We get here if someone decides to switch during
338 			 * boot before forking kthreads, bleh.
339 			 * This is kind of like a stupid idle loop.
340 			 */
341 #ifdef MULTIPROCESSOR
342 			__mp_unlock(&sched_lock);
343 #endif
344 			spl0();
345 			delay(10);
346 			SCHED_LOCK(s);
347 			goto again;
348                 }
349 		KASSERT(p);
350 		p->p_stat = SRUN;
351 	}
352 
353 	KASSERT(p->p_wchan == NULL);
354 	return (p);
355 }
356 
357 struct cpu_info *
358 sched_choosecpu_fork(struct proc *parent, int flags)
359 {
360 #ifdef MULTIPROCESSOR
361 	struct cpu_info *choice = NULL;
362 	fixpt_t load, best_load = ~0;
363 	int run, best_run = INT_MAX;
364 	struct cpu_info *ci;
365 	struct cpuset set;
366 
367 #if 0
368 	/*
369 	 * XXX
370 	 * Don't do this until we have a painless way to move the cpu in exec.
371 	 * Preferably when nuking the old pmap and getting a new one on a
372 	 * new cpu.
373 	 */
374 	/*
375 	 * PPWAIT forks are simple. We know that the parent will not
376 	 * run until we exec and choose another cpu, so we just steal its
377 	 * cpu.
378 	 */
379 	if (flags & FORK_PPWAIT)
380 		return (parent->p_cpu);
381 #endif
382 
383 	/*
384 	 * Look at all cpus that are currently idle and have nothing queued.
385 	 * If there are none, pick the one with least queued procs first,
386 	 * then the one with lowest load average.
387 	 */
388 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
389 	cpuset_intersection(&set, &set, &sched_all_cpus);
390 	if (cpuset_first(&set) == NULL)
391 		cpuset_copy(&set, &sched_all_cpus);
392 
393 	while ((ci = cpuset_first(&set)) != NULL) {
394 		cpuset_del(&set, ci);
395 
396 		load = ci->ci_schedstate.spc_ldavg;
397 		run = ci->ci_schedstate.spc_nrun;
398 
399 		if (choice == NULL || run < best_run ||
400 		    (run == best_run &&load < best_load)) {
401 			choice = ci;
402 			best_load = load;
403 			best_run = run;
404 		}
405 	}
406 
407 	return (choice);
408 #else
409 	return (curcpu());
410 #endif
411 }
412 
413 struct cpu_info *
414 sched_choosecpu(struct proc *p)
415 {
416 #ifdef MULTIPROCESSOR
417 	struct cpu_info *choice = NULL;
418 	int last_cost = INT_MAX;
419 	struct cpu_info *ci;
420 	struct cpuset set;
421 
422 	/*
423 	 * If pegged to a cpu, don't allow it to move.
424 	 */
425 	if (p->p_flag & P_CPUPEG)
426 		return (p->p_cpu);
427 
428 	sched_choose++;
429 
430 	/*
431 	 * Look at all cpus that are currently idle and have nothing queued.
432 	 * If there are none, pick the cheapest of those.
433 	 * (idle + queued could mean that the cpu is handling an interrupt
434 	 * at this moment and haven't had time to leave idle yet).
435 	 */
436 	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
437 	cpuset_intersection(&set, &set, &sched_all_cpus);
438 
439 	/*
440 	 * First, just check if our current cpu is in that set, if it is,
441 	 * this is simple.
442 	 * Also, our cpu might not be idle, but if it's the current cpu
443 	 * and it has nothing else queued and we're curproc, take it.
444 	 */
445 	if (cpuset_isset(&set, p->p_cpu) ||
446 	    (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
447 	    (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
448 	    curproc == p)) {
449 		sched_wasidle++;
450 		return (p->p_cpu);
451 	}
452 
453 	if (cpuset_first(&set) == NULL)
454 		cpuset_copy(&set, &sched_all_cpus);
455 
456 	while ((ci = cpuset_first(&set)) != NULL) {
457 		int cost = sched_proc_to_cpu_cost(ci, p);
458 
459 		if (choice == NULL || cost < last_cost) {
460 			choice = ci;
461 			last_cost = cost;
462 		}
463 		cpuset_del(&set, ci);
464 	}
465 
466 	if (p->p_cpu != choice)
467 		sched_nmigrations++;
468 	else
469 		sched_nomigrations++;
470 
471 	return (choice);
472 #else
473 	return (curcpu());
474 #endif
475 }
476 
477 /*
478  * Attempt to steal a proc from some cpu.
479  */
480 struct proc *
481 sched_steal_proc(struct cpu_info *self)
482 {
483 	struct proc *best = NULL;
484 #ifdef MULTIPROCESSOR
485 	struct schedstate_percpu *spc;
486 	int bestcost = INT_MAX;
487 	struct cpu_info *ci;
488 	struct cpuset set;
489 
490 	KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
491 
492 	/* Don't steal if we don't want to schedule processes in this CPU. */
493 	if (!cpuset_isset(&sched_all_cpus, self))
494 		return (NULL);
495 
496 	cpuset_copy(&set, &sched_queued_cpus);
497 
498 	while ((ci = cpuset_first(&set)) != NULL) {
499 		struct proc *p;
500 		int queue;
501 		int cost;
502 
503 		cpuset_del(&set, ci);
504 
505 		spc = &ci->ci_schedstate;
506 
507 		queue = ffs(spc->spc_whichqs) - 1;
508 		TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
509 			if (p->p_flag & P_CPUPEG)
510 				continue;
511 
512 			cost = sched_proc_to_cpu_cost(self, p);
513 
514 			if (best == NULL || cost < bestcost) {
515 				best = p;
516 				bestcost = cost;
517 			}
518 		}
519 	}
520 	if (best == NULL)
521 		return (NULL);
522 
523 	remrunqueue(best);
524 	best->p_cpu = self;
525 
526 	sched_stolen++;
527 #endif
528 	return (best);
529 }
530 
531 #ifdef MULTIPROCESSOR
532 /*
533  * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
534  */
535 static int
536 log2(unsigned int i)
537 {
538 	int ret = 0;
539 
540 	while (i >>= 1)
541 		ret++;
542 
543 	return (ret);
544 }
545 
546 /*
547  * Calculate the cost of moving the proc to this cpu.
548  *
549  * What we want is some guesstimate of how much "performance" it will
550  * cost us to move the proc here. Not just for caches and TLBs and NUMA
551  * memory, but also for the proc itself. A highly loaded cpu might not
552  * be the best candidate for this proc since it won't get run.
553  *
554  * Just total guesstimates for now.
555  */
556 
557 int sched_cost_load = 1;
558 int sched_cost_priority = 1;
559 int sched_cost_runnable = 3;
560 int sched_cost_resident = 1;
561 #endif
562 
563 int
564 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
565 {
566 	int cost = 0;
567 #ifdef MULTIPROCESSOR
568 	struct schedstate_percpu *spc;
569 	int l2resident = 0;
570 
571 	spc = &ci->ci_schedstate;
572 
573 	/*
574 	 * First, account for the priority of the proc we want to move.
575 	 * More willing to move, the lower the priority of the destination
576 	 * and the higher the priority of the proc.
577 	 */
578 	if (!cpuset_isset(&sched_idle_cpus, ci)) {
579 		cost += (p->p_usrpri - spc->spc_curpriority) *
580 		    sched_cost_priority;
581 		cost += sched_cost_runnable;
582 	}
583 	if (cpuset_isset(&sched_queued_cpus, ci))
584 		cost += spc->spc_nrun * sched_cost_runnable;
585 
586 	/*
587 	 * Try to avoid the primary cpu as it handles hardware interrupts.
588 	 *
589 	 * XXX Needs to be revisited when we distribute interrupts
590 	 * over cpus.
591 	 */
592 	if (CPU_IS_PRIMARY(ci))
593 		cost += sched_cost_runnable;
594 
595 	/*
596 	 * Higher load on the destination means we don't want to go there.
597 	 */
598 	cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
599 
600 	/*
601 	 * If the proc is on this cpu already, lower the cost by how much
602 	 * it has been running and an estimate of its footprint.
603 	 */
604 	if (p->p_cpu == ci && p->p_slptime == 0) {
605 		l2resident =
606 		    log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
607 		cost -= l2resident * sched_cost_resident;
608 	}
609 #endif
610 	return (cost);
611 }
612 
613 /*
614  * Peg a proc to a cpu.
615  */
616 void
617 sched_peg_curproc(struct cpu_info *ci)
618 {
619 	struct proc *p = curproc;
620 	int s;
621 
622 	SCHED_LOCK(s);
623 	atomic_setbits_int(&p->p_flag, P_CPUPEG);
624 	setrunqueue(ci, p, p->p_usrpri);
625 	p->p_ru.ru_nvcsw++;
626 	mi_switch();
627 	SCHED_UNLOCK(s);
628 }
629 
630 #ifdef MULTIPROCESSOR
631 
632 void
633 sched_start_secondary_cpus(void)
634 {
635 	CPU_INFO_ITERATOR cii;
636 	struct cpu_info *ci;
637 
638 	CPU_INFO_FOREACH(cii, ci) {
639 		struct schedstate_percpu *spc = &ci->ci_schedstate;
640 
641 		if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
642 			continue;
643 		atomic_clearbits_int(&spc->spc_schedflags,
644 		    SPCF_SHOULDHALT | SPCF_HALTED);
645 #ifdef __HAVE_CPU_TOPOLOGY
646 		if (!sched_smt && ci->ci_smt_id > 0)
647 			continue;
648 #endif
649 		cpuset_add(&sched_all_cpus, ci);
650 	}
651 }
652 
653 void
654 sched_stop_secondary_cpus(void)
655 {
656 	CPU_INFO_ITERATOR cii;
657 	struct cpu_info *ci;
658 
659 	/*
660 	 * Make sure we stop the secondary CPUs.
661 	 */
662 	CPU_INFO_FOREACH(cii, ci) {
663 		struct schedstate_percpu *spc = &ci->ci_schedstate;
664 
665 		if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
666 			continue;
667 		cpuset_del(&sched_all_cpus, ci);
668 		atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
669 	}
670 	CPU_INFO_FOREACH(cii, ci) {
671 		struct schedstate_percpu *spc = &ci->ci_schedstate;
672 		struct sleep_state sls;
673 
674 		if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
675 			continue;
676 		while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
677 			sleep_setup(&sls, spc, PZERO, "schedstate");
678 			sleep_finish(&sls, PZERO, 0,
679 			    (spc->spc_schedflags & SPCF_HALTED) == 0);
680 		}
681 	}
682 }
683 
684 struct sched_barrier_state {
685 	struct cpu_info *ci;
686 	struct cond cond;
687 };
688 
689 void
690 sched_barrier_task(void *arg)
691 {
692 	struct sched_barrier_state *sb = arg;
693 	struct cpu_info *ci = sb->ci;
694 
695 	sched_peg_curproc(ci);
696 	cond_signal(&sb->cond);
697 	atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
698 }
699 
700 void
701 sched_barrier(struct cpu_info *ci)
702 {
703 	struct sched_barrier_state sb;
704 	struct task task;
705 	CPU_INFO_ITERATOR cii;
706 
707 	if (ci == NULL) {
708 		CPU_INFO_FOREACH(cii, ci) {
709 			if (CPU_IS_PRIMARY(ci))
710 				break;
711 		}
712 	}
713 	KASSERT(ci != NULL);
714 
715 	if (ci == curcpu())
716 		return;
717 
718 	sb.ci = ci;
719 	cond_init(&sb.cond);
720 	task_set(&task, sched_barrier_task, &sb);
721 
722 	task_add(systqmp, &task);
723 	cond_wait(&sb.cond, "sbar");
724 }
725 
726 #else
727 
728 void
729 sched_barrier(struct cpu_info *ci)
730 {
731 }
732 
733 #endif
734 
735 /*
736  * Functions to manipulate cpu sets.
737  */
738 struct cpu_info *cpuset_infos[MAXCPUS];
739 static struct cpuset cpuset_all;
740 
741 void
742 cpuset_init_cpu(struct cpu_info *ci)
743 {
744 	cpuset_add(&cpuset_all, ci);
745 	cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
746 }
747 
748 void
749 cpuset_clear(struct cpuset *cs)
750 {
751 	memset(cs, 0, sizeof(*cs));
752 }
753 
754 void
755 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
756 {
757 	unsigned int num = CPU_INFO_UNIT(ci);
758 	atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
759 }
760 
761 void
762 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
763 {
764 	unsigned int num = CPU_INFO_UNIT(ci);
765 	atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
766 }
767 
768 int
769 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
770 {
771 	unsigned int num = CPU_INFO_UNIT(ci);
772 	return (cs->cs_set[num/32] & (1U << (num % 32)));
773 }
774 
775 void
776 cpuset_add_all(struct cpuset *cs)
777 {
778 	cpuset_copy(cs, &cpuset_all);
779 }
780 
781 void
782 cpuset_copy(struct cpuset *to, struct cpuset *from)
783 {
784 	memcpy(to, from, sizeof(*to));
785 }
786 
787 struct cpu_info *
788 cpuset_first(struct cpuset *cs)
789 {
790 	int i;
791 
792 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
793 		if (cs->cs_set[i])
794 			return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
795 
796 	return (NULL);
797 }
798 
799 void
800 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
801 {
802 	int i;
803 
804 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
805 		to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
806 }
807 
808 void
809 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
810 {
811 	int i;
812 
813 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
814 		to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
815 }
816 
817 void
818 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
819 {
820 	int i;
821 
822 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
823 		to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
824 }
825 
826 int
827 cpuset_cardinality(struct cpuset *cs)
828 {
829 	int cardinality, i, n;
830 
831 	cardinality = 0;
832 
833 	for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
834 		for (n = cs->cs_set[i]; n != 0; n &= n - 1)
835 			cardinality++;
836 
837 	return (cardinality);
838 }
839 
840 int
841 sysctl_hwncpuonline(void)
842 {
843 	return cpuset_cardinality(&sched_all_cpus);
844 }
845 
846 int
847 cpu_is_online(struct cpu_info *ci)
848 {
849 	return cpuset_isset(&sched_all_cpus, ci);
850 }
851 
852 #ifdef __HAVE_CPU_TOPOLOGY
853 
854 #include <sys/sysctl.h>
855 
856 int
857 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
858 {
859 	CPU_INFO_ITERATOR cii;
860 	struct cpu_info *ci;
861 	int err, newsmt;
862 
863 	newsmt = sched_smt;
864 	err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
865 	if (err)
866 		return err;
867 	if (newsmt == sched_smt)
868 		return 0;
869 
870 	sched_smt = newsmt;
871 	CPU_INFO_FOREACH(cii, ci) {
872 		if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
873 			continue;
874 		if (ci->ci_smt_id == 0)
875 			continue;
876 		if (sched_smt)
877 			cpuset_add(&sched_all_cpus, ci);
878 		else
879 			cpuset_del(&sched_all_cpus, ci);
880 	}
881 
882 	return 0;
883 }
884 
885 #endif
886