xref: /netbsd-src/sys/kern/kern_runq.c (revision cd22f25e6f6d1cc1f197fe8c5468a80f51d1c4e1)
1 /*	$NetBSD: kern_runq.c,v 1.3 2008/04/30 09:17:12 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.3 2008/04/30 09:17:12 rmind Exp $");
31 
32 #include "opt_multiprocessor.h"
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 
37 #include <sys/bitops.h>
38 #include <sys/cpu.h>
39 #include <sys/idle.h>
40 #include <sys/intr.h>
41 #include <sys/kmem.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/syscallargs.h>
47 #include <sys/sysctl.h>
48 #include <sys/systm.h>
49 #include <sys/types.h>
50 
51 /*
52  * Priority related defintions.
53  */
54 #define	PRI_TS_COUNT	(NPRI_USER)
55 #define	PRI_RT_COUNT	(PRI_COUNT - PRI_TS_COUNT)
56 #define	PRI_HTS_RANGE	(PRI_TS_COUNT / 10)
57 
58 #define	PRI_HIGHEST_TS	(MAXPRI_USER)
59 
60 /*
61  * Bits per map.
62  */
63 #define	BITMAP_BITS	(32)
64 #define	BITMAP_SHIFT	(5)
65 #define	BITMAP_MSB	(0x80000000U)
66 #define	BITMAP_MASK	(BITMAP_BITS - 1)
67 
68 /*
69  * Structures, runqueue.
70  */
71 
72 const int	schedppq = 1;
73 
74 typedef struct {
75 	TAILQ_HEAD(, lwp) q_head;
76 } queue_t;
77 
78 typedef struct {
79 	/* Lock and bitmap */
80 	uint32_t	r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
81 	/* Counters */
82 	u_int		r_count;	/* Count of the threads */
83 	u_int		r_avgcount;	/* Average count of threads */
84 	u_int		r_mcount;	/* Count of migratable threads */
85 	/* Runqueues */
86 	queue_t		r_rt_queue[PRI_RT_COUNT];
87 	queue_t		r_ts_queue[PRI_TS_COUNT];
88 } runqueue_t;
89 
90 static void *	sched_getrq(runqueue_t *, const pri_t);
91 #ifdef MULTIPROCESSOR
92 static lwp_t *	sched_catchlwp(void);
93 static void	sched_balance(void *);
94 #endif
95 
96 /*
97  * Preemption control.
98  */
99 int		sched_upreempt_pri = PRI_KERNEL;
100 #if 0
101 int		sched_kpreempt_pri = PRI_USER_RT;
102 #else
103 /* XXX disable for now until any bugs are worked out. */
104 int		sched_kpreempt_pri = 1000;
105 #endif
106 
107 /*
108  * Migration and balancing.
109  */
110 static u_int	cacheht_time;		/* Cache hotness time */
111 static u_int	min_catch;		/* Minimal LWP count for catching */
112 static u_int	balance_period;		/* Balance period */
113 static struct cpu_info *worker_ci;	/* Victim CPU */
114 #ifdef MULTIPROCESSOR
115 static struct callout balance_ch;	/* Callout of balancer */
116 #endif
117 
118 void
119 runq_init(void)
120 {
121 
122 	/* Balancing */
123 	worker_ci = curcpu();
124 	cacheht_time = mstohz(3);		/* ~3 ms  */
125 	balance_period = mstohz(300);		/* ~300ms */
126 
127 	/* Minimal count of LWPs for catching */
128 	min_catch = 1;
129 
130 	/* Initialize balancing callout and run it */
131 #ifdef MULTIPROCESSOR
132 	callout_init(&balance_ch, CALLOUT_MPSAFE);
133 	callout_setfunc(&balance_ch, sched_balance, NULL);
134 	callout_schedule(&balance_ch, balance_period);
135 #endif
136 }
137 
138 void
139 sched_cpuattach(struct cpu_info *ci)
140 {
141 	runqueue_t *ci_rq;
142 	void *rq_ptr;
143 	u_int i, size;
144 
145 	if (ci->ci_schedstate.spc_lwplock == NULL) {
146 		ci->ci_schedstate.spc_lwplock =
147 		    mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
148 	}
149 	if (ci == lwp0.l_cpu) {
150 		/* Initialize the scheduler structure of the primary LWP */
151 		lwp0.l_mutex = ci->ci_schedstate.spc_lwplock;
152 	}
153 	if (ci->ci_schedstate.spc_mutex != NULL) {
154 		/* Already initialized. */
155 		return;
156 	}
157 
158 	/* Allocate the run queue */
159 	size = roundup2(sizeof(runqueue_t), coherency_unit) + coherency_unit;
160 	rq_ptr = kmem_zalloc(size, KM_SLEEP);
161 	if (rq_ptr == NULL) {
162 		panic("sched_cpuattach: could not allocate the runqueue");
163 	}
164 	ci_rq = (void *)(roundup2((uintptr_t)(rq_ptr), coherency_unit));
165 
166 	/* Initialize run queues */
167 	ci->ci_schedstate.spc_mutex =
168 	    mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
169 	for (i = 0; i < PRI_RT_COUNT; i++)
170 		TAILQ_INIT(&ci_rq->r_rt_queue[i].q_head);
171 	for (i = 0; i < PRI_TS_COUNT; i++)
172 		TAILQ_INIT(&ci_rq->r_ts_queue[i].q_head);
173 
174 	ci->ci_schedstate.spc_sched_info = ci_rq;
175 }
176 
177 /*
178  * Control of the runqueue.
179  */
180 
181 static void *
182 sched_getrq(runqueue_t *ci_rq, const pri_t prio)
183 {
184 
185 	KASSERT(prio < PRI_COUNT);
186 	return (prio <= PRI_HIGHEST_TS) ?
187 	    &ci_rq->r_ts_queue[prio].q_head :
188 	    &ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - 1].q_head;
189 }
190 
191 void
192 sched_enqueue(struct lwp *l, bool swtch)
193 {
194 	runqueue_t *ci_rq;
195 	struct schedstate_percpu *spc;
196 	TAILQ_HEAD(, lwp) *q_head;
197 	const pri_t eprio = lwp_eprio(l);
198 	struct cpu_info *ci;
199 	int type;
200 
201 	ci = l->l_cpu;
202 	spc = &ci->ci_schedstate;
203 	ci_rq = spc->spc_sched_info;
204 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
205 
206 	/* Update the last run time on switch */
207 	if (__predict_true(swtch == true)) {
208 		l->l_rticks = hardclock_ticks;
209 		l->l_rticksum += (hardclock_ticks - l->l_rticks);
210 	} else if (l->l_rticks == 0)
211 		l->l_rticks = hardclock_ticks;
212 
213 	/* Enqueue the thread */
214 	q_head = sched_getrq(ci_rq, eprio);
215 	if (TAILQ_EMPTY(q_head)) {
216 		u_int i;
217 		uint32_t q;
218 
219 		/* Mark bit */
220 		i = eprio >> BITMAP_SHIFT;
221 		q = BITMAP_MSB >> (eprio & BITMAP_MASK);
222 		KASSERT((ci_rq->r_bitmap[i] & q) == 0);
223 		ci_rq->r_bitmap[i] |= q;
224 	}
225 	TAILQ_INSERT_TAIL(q_head, l, l_runq);
226 	ci_rq->r_count++;
227 	if ((l->l_pflag & LP_BOUND) == 0)
228 		ci_rq->r_mcount++;
229 
230 	/*
231 	 * Update the value of highest priority in the runqueue,
232 	 * if priority of this thread is higher.
233 	 */
234 	if (eprio > spc->spc_maxpriority)
235 		spc->spc_maxpriority = eprio;
236 
237 	sched_newts(l);
238 
239 	/*
240 	 * Wake the chosen CPU or cause a preemption if the newly
241 	 * enqueued thread has higher priority.  Don't cause a
242 	 * preemption if the thread is yielding (swtch).
243 	 */
244 	if (!swtch && eprio > spc->spc_curpriority) {
245 		if (eprio >= sched_kpreempt_pri)
246 			type = RESCHED_KPREEMPT;
247 		else if (eprio >= sched_upreempt_pri)
248 			type = RESCHED_IMMED;
249 		else
250 			type = 0;
251 		cpu_need_resched(ci, type);
252 	}
253 }
254 
255 void
256 sched_dequeue(struct lwp *l)
257 {
258 	runqueue_t *ci_rq;
259 	TAILQ_HEAD(, lwp) *q_head;
260 	struct schedstate_percpu *spc;
261 	const pri_t eprio = lwp_eprio(l);
262 
263 	spc = & l->l_cpu->ci_schedstate;
264 	ci_rq = spc->spc_sched_info;
265 	KASSERT(lwp_locked(l, spc->spc_mutex));
266 
267 	KASSERT(eprio <= spc->spc_maxpriority);
268 	KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != 0);
269 	KASSERT(ci_rq->r_count > 0);
270 
271 	ci_rq->r_count--;
272 	if ((l->l_pflag & LP_BOUND) == 0)
273 		ci_rq->r_mcount--;
274 
275 	q_head = sched_getrq(ci_rq, eprio);
276 	TAILQ_REMOVE(q_head, l, l_runq);
277 	if (TAILQ_EMPTY(q_head)) {
278 		u_int i;
279 		uint32_t q;
280 
281 		/* Unmark bit */
282 		i = eprio >> BITMAP_SHIFT;
283 		q = BITMAP_MSB >> (eprio & BITMAP_MASK);
284 		KASSERT((ci_rq->r_bitmap[i] & q) != 0);
285 		ci_rq->r_bitmap[i] &= ~q;
286 
287 		/*
288 		 * Update the value of highest priority in the runqueue, in a
289 		 * case it was a last thread in the queue of highest priority.
290 		 */
291 		if (eprio != spc->spc_maxpriority)
292 			return;
293 
294 		do {
295 			if (ci_rq->r_bitmap[i] != 0) {
296 				q = ffs(ci_rq->r_bitmap[i]);
297 				spc->spc_maxpriority =
298 				    (i << BITMAP_SHIFT) + (BITMAP_BITS - q);
299 				return;
300 			}
301 		} while (i--);
302 
303 		/* If not found - set the lowest value */
304 		spc->spc_maxpriority = 0;
305 	}
306 }
307 
308 /*
309  * Migration and balancing.
310  */
311 
312 #ifdef MULTIPROCESSOR
313 
314 /* Estimate if LWP is cache-hot */
315 static inline bool
316 lwp_cache_hot(const struct lwp *l)
317 {
318 
319 	if (l->l_slptime || l->l_rticks == 0)
320 		return false;
321 
322 	return (hardclock_ticks - l->l_rticks <= cacheht_time);
323 }
324 
325 /* Check if LWP can migrate to the chosen CPU */
326 static inline bool
327 sched_migratable(const struct lwp *l, struct cpu_info *ci)
328 {
329 	const struct schedstate_percpu *spc = &ci->ci_schedstate;
330 
331 	/* CPU is offline */
332 	if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
333 		return false;
334 
335 	/* Affinity bind */
336 	if (__predict_false(l->l_flag & LW_AFFINITY))
337 		return CPU_ISSET(cpu_index(ci), &l->l_affinity);
338 
339 	/* Processor-set */
340 	return (spc->spc_psid == l->l_psid);
341 }
342 
343 /*
344  * Estimate the migration of LWP to the other CPU.
345  * Take and return the CPU, if migration is needed.
346  */
347 struct cpu_info *
348 sched_takecpu(struct lwp *l)
349 {
350 	struct cpu_info *ci, *tci, *first, *next;
351 	struct schedstate_percpu *spc;
352 	runqueue_t *ci_rq, *ici_rq;
353 	pri_t eprio, lpri, pri;
354 
355 	KASSERT(lwp_locked(l, NULL));
356 
357 	ci = l->l_cpu;
358 	spc = &ci->ci_schedstate;
359 	ci_rq = spc->spc_sched_info;
360 
361 	/* If thread is strictly bound, do not estimate other CPUs */
362 	if (l->l_pflag & LP_BOUND)
363 		return ci;
364 
365 	/* CPU of this thread is idling - run there */
366 	if (ci_rq->r_count == 0)
367 		return ci;
368 
369 	eprio = lwp_eprio(l);
370 
371 	/* Stay if thread is cache-hot */
372 	if (__predict_true(l->l_stat != LSIDL) &&
373 	    lwp_cache_hot(l) && eprio >= spc->spc_curpriority)
374 		return ci;
375 
376 	/* Run on current CPU if priority of thread is higher */
377 	ci = curcpu();
378 	spc = &ci->ci_schedstate;
379 	if (eprio > spc->spc_curpriority && sched_migratable(l, ci))
380 		return ci;
381 
382 	/*
383 	 * Look for the CPU with the lowest priority thread.  In case of
384 	 * equal priority, choose the CPU with the fewest of threads.
385 	 */
386 	first = l->l_cpu;
387 	ci = first;
388 	tci = first;
389 	lpri = PRI_COUNT;
390 	do {
391 		next = CIRCLEQ_LOOP_NEXT(&cpu_queue, ci, ci_data.cpu_qchain);
392 		spc = &ci->ci_schedstate;
393 		ici_rq = spc->spc_sched_info;
394 		pri = max(spc->spc_curpriority, spc->spc_maxpriority);
395 		if (pri > lpri)
396 			continue;
397 
398 		if (pri == lpri && ci_rq->r_count < ici_rq->r_count)
399 			continue;
400 
401 		if (!sched_migratable(l, ci))
402 			continue;
403 
404 		lpri = pri;
405 		tci = ci;
406 		ci_rq = ici_rq;
407 	} while (ci = next, ci != first);
408 
409 	return tci;
410 }
411 
412 /*
413  * Tries to catch an LWP from the runqueue of other CPU.
414  */
415 static struct lwp *
416 sched_catchlwp(void)
417 {
418 	struct cpu_info *curci = curcpu(), *ci = worker_ci;
419 	struct schedstate_percpu *spc;
420 	TAILQ_HEAD(, lwp) *q_head;
421 	runqueue_t *ci_rq;
422 	struct lwp *l;
423 
424 	if (curci == ci)
425 		return NULL;
426 
427 	/* Lockless check */
428 	spc = &ci->ci_schedstate;
429 	ci_rq = spc->spc_sched_info;
430 	if (ci_rq->r_mcount < min_catch)
431 		return NULL;
432 
433 	/*
434 	 * Double-lock the runqueues.
435 	 */
436 	if (curci < ci) {
437 		spc_lock(ci);
438 	} else if (!mutex_tryenter(ci->ci_schedstate.spc_mutex)) {
439 		const runqueue_t *cur_rq = curci->ci_schedstate.spc_sched_info;
440 
441 		spc_unlock(curci);
442 		spc_lock(ci);
443 		spc_lock(curci);
444 
445 		if (cur_rq->r_count) {
446 			spc_unlock(ci);
447 			return NULL;
448 		}
449 	}
450 
451 	if (ci_rq->r_mcount < min_catch) {
452 		spc_unlock(ci);
453 		return NULL;
454 	}
455 
456 	/* Take the highest priority thread */
457 	q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
458 	l = TAILQ_FIRST(q_head);
459 
460 	for (;;) {
461 		/* Check the first and next result from the queue */
462 		if (l == NULL)
463 			break;
464 		KASSERT(l->l_stat == LSRUN);
465 		KASSERT(l->l_flag & LW_INMEM);
466 
467 		/* Look for threads, whose are allowed to migrate */
468 		if ((l->l_pflag & LP_BOUND) || lwp_cache_hot(l) ||
469 		    !sched_migratable(l, curci)) {
470 			l = TAILQ_NEXT(l, l_runq);
471 			continue;
472 		}
473 
474 		/* Grab the thread, and move to the local run queue */
475 		sched_dequeue(l);
476 		l->l_cpu = curci;
477 		lwp_unlock_to(l, curci->ci_schedstate.spc_mutex);
478 		sched_enqueue(l, false);
479 		return l;
480 	}
481 	spc_unlock(ci);
482 
483 	return l;
484 }
485 
486 /*
487  * Periodical calculations for balancing.
488  */
489 static void
490 sched_balance(void *nocallout)
491 {
492 	struct cpu_info *ci, *hci;
493 	runqueue_t *ci_rq;
494 	CPU_INFO_ITERATOR cii;
495 	u_int highest;
496 
497 	hci = curcpu();
498 	highest = 0;
499 
500 	/* Make lockless countings */
501 	for (CPU_INFO_FOREACH(cii, ci)) {
502 		ci_rq = ci->ci_schedstate.spc_sched_info;
503 
504 		/* Average count of the threads */
505 		ci_rq->r_avgcount = (ci_rq->r_avgcount + ci_rq->r_mcount) >> 1;
506 
507 		/* Look for CPU with the highest average */
508 		if (ci_rq->r_avgcount > highest) {
509 			hci = ci;
510 			highest = ci_rq->r_avgcount;
511 		}
512 	}
513 
514 	/* Update the worker */
515 	worker_ci = hci;
516 
517 	if (nocallout == NULL)
518 		callout_schedule(&balance_ch, balance_period);
519 }
520 
521 #else
522 
523 struct cpu_info *
524 sched_takecpu(struct lwp *l)
525 {
526 
527 	return l->l_cpu;
528 }
529 
530 #endif	/* MULTIPROCESSOR */
531 
532 /*
533  * Scheduler mill.
534  */
535 struct lwp *
536 sched_nextlwp(void)
537 {
538 	struct cpu_info *ci = curcpu();
539 	struct schedstate_percpu *spc;
540 	TAILQ_HEAD(, lwp) *q_head;
541 	runqueue_t *ci_rq;
542 	struct lwp *l;
543 
544 	spc = &ci->ci_schedstate;
545 	ci_rq = spc->spc_sched_info;
546 
547 #ifdef MULTIPROCESSOR
548 	/* If runqueue is empty, try to catch some thread from other CPU */
549 	if (__predict_false(spc->spc_flags & SPCF_OFFLINE)) {
550 		if ((ci_rq->r_count - ci_rq->r_mcount) == 0)
551 			return NULL;
552 	} else if (ci_rq->r_count == 0) {
553 		/* Reset the counter, and call the balancer */
554 		ci_rq->r_avgcount = 0;
555 		sched_balance(ci);
556 
557 		/* The re-locking will be done inside */
558 		return sched_catchlwp();
559 	}
560 #else
561 	if (ci_rq->r_count == 0)
562 		return NULL;
563 #endif
564 
565 	/* Take the highest priority thread */
566 	KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
567 	q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
568 	l = TAILQ_FIRST(q_head);
569 	KASSERT(l != NULL);
570 
571 	sched_oncpu(l);
572 	l->l_rticks = hardclock_ticks;
573 
574 	return l;
575 }
576 
577 bool
578 sched_curcpu_runnable_p(void)
579 {
580 	const struct cpu_info *ci;
581 	const runqueue_t *ci_rq;
582 	bool rv;
583 
584 	kpreempt_disable();
585 	ci = curcpu();
586 	ci_rq = ci->ci_schedstate.spc_sched_info;
587 
588 #ifndef __HAVE_FAST_SOFTINTS
589 	if (ci->ci_data.cpu_softints) {
590 		kpreempt_enable();
591 		return true;
592 	}
593 #endif
594 
595 	if (ci->ci_schedstate.spc_flags & SPCF_OFFLINE)
596 		rv = (ci_rq->r_count - ci_rq->r_mcount);
597 	else
598 		rv = ci_rq->r_count != 0;
599 	kpreempt_enable();
600 
601 	return rv;
602 }
603 
604 /*
605  * Sysctl nodes and initialization.
606  */
607 
608 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
609 {
610 	const struct sysctlnode *node = NULL;
611 
612 	sysctl_createv(clog, 0, NULL, NULL,
613 		CTLFLAG_PERMANENT,
614 		CTLTYPE_NODE, "kern", NULL,
615 		NULL, 0, NULL, 0,
616 		CTL_KERN, CTL_EOL);
617 	sysctl_createv(clog, 0, NULL, &node,
618 		CTLFLAG_PERMANENT,
619 		CTLTYPE_NODE, "sched",
620 		SYSCTL_DESCR("Scheduler options"),
621 		NULL, 0, NULL, 0,
622 		CTL_KERN, CTL_CREATE, CTL_EOL);
623 
624 	if (node == NULL)
625 		return;
626 
627 	sysctl_createv(clog, 0, &node, NULL,
628 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
629 		CTLTYPE_INT, "cacheht_time",
630 		SYSCTL_DESCR("Cache hotness time (in ticks)"),
631 		NULL, 0, &cacheht_time, 0,
632 		CTL_CREATE, CTL_EOL);
633 	sysctl_createv(clog, 0, &node, NULL,
634 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
635 		CTLTYPE_INT, "balance_period",
636 		SYSCTL_DESCR("Balance period (in ticks)"),
637 		NULL, 0, &balance_period, 0,
638 		CTL_CREATE, CTL_EOL);
639 	sysctl_createv(clog, 0, &node, NULL,
640 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
641 		CTLTYPE_INT, "min_catch",
642 		SYSCTL_DESCR("Minimal count of threads for catching"),
643 		NULL, 0, &min_catch, 0,
644 		CTL_CREATE, CTL_EOL);
645 	sysctl_createv(clog, 0, &node, NULL,
646 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
647 		CTLTYPE_INT, "timesoftints",
648 		SYSCTL_DESCR("Track CPU time for soft interrupts"),
649 		NULL, 0, &softint_timing, 0,
650 		CTL_CREATE, CTL_EOL);
651 	sysctl_createv(clog, 0, &node, NULL,
652 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
653 		CTLTYPE_INT, "kpreempt_pri",
654 		SYSCTL_DESCR("Minimum priority to trigger kernel preemption"),
655 		NULL, 0, &sched_kpreempt_pri, 0,
656 		CTL_CREATE, CTL_EOL);
657 	sysctl_createv(clog, 0, &node, NULL,
658 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
659 		CTLTYPE_INT, "upreempt_pri",
660 		SYSCTL_DESCR("Minimum priority to trigger user preemption"),
661 		NULL, 0, &sched_upreempt_pri, 0,
662 		CTL_CREATE, CTL_EOL);
663 }
664 
665 /*
666  * Debugging.
667  */
668 
669 #ifdef DDB
670 
671 void
672 sched_print_runqueue(void (*pr)(const char *, ...)
673     __attribute__((__format__(__printf__,1,2))))
674 {
675 	runqueue_t *ci_rq;
676 	struct schedstate_percpu *spc;
677 	struct lwp *l;
678 	struct proc *p;
679 	int i;
680 	struct cpu_info *ci;
681 	CPU_INFO_ITERATOR cii;
682 
683 	for (CPU_INFO_FOREACH(cii, ci)) {
684 		spc = &ci->ci_schedstate;
685 		ci_rq = spc->spc_sched_info;
686 
687 		(*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
688 		(*pr)(" pid.lid = %d.%d, threads count = %u, "
689 		    "avgcount = %u, highest pri = %d\n",
690 #ifdef MULTIPROCESSOR
691 		    ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid,
692 #else
693 		    curlwp->l_proc->p_pid, curlwp->l_lid,
694 #endif
695 		    ci_rq->r_count, ci_rq->r_avgcount, spc->spc_maxpriority);
696 		i = (PRI_COUNT >> BITMAP_SHIFT) - 1;
697 		do {
698 			uint32_t q;
699 			q = ci_rq->r_bitmap[i];
700 			(*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
701 		} while (i--);
702 	}
703 
704 	(*pr)("   %5s %4s %4s %10s %3s %18s %4s %s\n",
705 	    "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "LRTIME");
706 
707 	PROCLIST_FOREACH(p, &allproc) {
708 		if ((p->p_flag & PK_MARKER) != 0)
709 			continue;
710 		(*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm);
711 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
712 			ci = l->l_cpu;
713 			(*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %u\n",
714 			    (int)l->l_lid, l->l_priority, lwp_eprio(l),
715 			    l->l_flag, l->l_stat == LSRUN ? "RQ" :
716 			    (l->l_stat == LSSLEEP ? "SQ" : "-"),
717 			    l, ci->ci_index,
718 			    (u_int)(hardclock_ticks - l->l_rticks));
719 		}
720 	}
721 }
722 
723 #endif
724