xref: /openbsd-src/sys/kern/kern_synch.c (revision b725ae7711052a2233e31a66fefb8a752c388d7a)
1 /*	$OpenBSD: kern_synch.c,v 1.54 2004/01/26 01:27:02 deraadt Exp $	*/
2 /*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/buf.h>
45 #include <sys/signalvar.h>
46 #include <sys/resourcevar.h>
47 #include <uvm/uvm_extern.h>
48 #include <sys/sched.h>
49 #include <sys/timeout.h>
50 
51 #ifdef KTRACE
52 #include <sys/ktrace.h>
53 #endif
54 
55 #include <machine/cpu.h>
56 
57 u_char	curpriority;		/* usrpri of curproc */
58 int	lbolt;			/* once a second sleep address */
59 
60 int whichqs;			/* Bit mask summary of non-empty Q's. */
61 struct prochd qs[NQS];
62 
63 void scheduler_start(void);
64 
65 void roundrobin(void *);
66 void schedcpu(void *);
67 void updatepri(struct proc *);
68 void endtsleep(void *);
69 
70 void
71 scheduler_start()
72 {
73 	static struct timeout roundrobin_to;
74 	static struct timeout schedcpu_to;
75 
76 	/*
77 	 * We avoid polluting the global namespace by keeping the scheduler
78 	 * timeouts static in this function.
79 	 * We setup the timeouts here and kick roundrobin and schedcpu once to
80 	 * make them do their job.
81 	 */
82 
83 	timeout_set(&roundrobin_to, roundrobin, &roundrobin_to);
84 	timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
85 
86 	roundrobin(&roundrobin_to);
87 	schedcpu(&schedcpu_to);
88 }
89 
90 /*
91  * Force switch among equal priority processes every 100ms.
92  */
93 /* ARGSUSED */
94 void
95 roundrobin(arg)
96 	void *arg;
97 {
98 	struct timeout *to = (struct timeout *)arg;
99 	struct proc *p = curproc;
100 	int s;
101 
102 	if (p != NULL) {
103 		s = splstatclock();
104 		if (p->p_schedflags & PSCHED_SEENRR) {
105 			/*
106 			 * The process has already been through a roundrobin
107 			 * without switching and may be hogging the CPU.
108 			 * Indicate that the process should yield.
109 			 */
110 			p->p_schedflags |= PSCHED_SHOULDYIELD;
111 		} else {
112 			p->p_schedflags |= PSCHED_SEENRR;
113 		}
114 		splx(s);
115 	}
116 	need_resched();
117 	timeout_add(to, hz / 10);
118 }
119 
120 /*
121  * Constants for digital decay and forget:
122  *	90% of (p_estcpu) usage in 5 * loadav time
123  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
124  *          Note that, as ps(1) mentions, this can let percentages
125  *          total over 100% (I've seen 137.9% for 3 processes).
126  *
127  * Note that hardclock updates p_estcpu and p_cpticks independently.
128  *
129  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
130  * That is, the system wants to compute a value of decay such
131  * that the following for loop:
132  * 	for (i = 0; i < (5 * loadavg); i++)
133  * 		p_estcpu *= decay;
134  * will compute
135  * 	p_estcpu *= 0.1;
136  * for all values of loadavg:
137  *
138  * Mathematically this loop can be expressed by saying:
139  * 	decay ** (5 * loadavg) ~= .1
140  *
141  * The system computes decay as:
142  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
143  *
144  * We wish to prove that the system's computation of decay
145  * will always fulfill the equation:
146  * 	decay ** (5 * loadavg) ~= .1
147  *
148  * If we compute b as:
149  * 	b = 2 * loadavg
150  * then
151  * 	decay = b / (b + 1)
152  *
153  * We now need to prove two things:
154  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
155  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
156  *
157  * Facts:
158  *         For x close to zero, exp(x) =~ 1 + x, since
159  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
160  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
161  *         For x close to zero, ln(1+x) =~ x, since
162  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
163  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
164  *         ln(.1) =~ -2.30
165  *
166  * Proof of (1):
167  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
168  *	solving for factor,
169  *      ln(factor) =~ (-2.30/5*loadav), or
170  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
171  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
172  *
173  * Proof of (2):
174  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
175  *	solving for power,
176  *      power*ln(b/(b+1)) =~ -2.30, or
177  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
178  *
179  * Actual power values for the implemented algorithm are as follows:
180  *      loadav: 1       2       3       4
181  *      power:  5.68    10.32   14.94   19.55
182  */
183 
184 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
185 #define	loadfactor(loadav)	(2 * (loadav))
186 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
187 
188 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
189 fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
190 
191 /*
192  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
193  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
194  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
195  *
196  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
197  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
198  *
199  * If you dont want to bother with the faster/more-accurate formula, you
200  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
201  * (more general) method of calculating the %age of CPU used by a process.
202  */
203 #define	CCPU_SHIFT	11
204 
205 /*
206  * Recompute process priorities, every hz ticks.
207  */
208 /* ARGSUSED */
209 void
210 schedcpu(arg)
211 	void *arg;
212 {
213 	struct timeout *to = (struct timeout *)arg;
214 	fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
215 	struct proc *p;
216 	int s;
217 	unsigned int newcpu;
218 	int phz;
219 
220 	/*
221 	 * If we have a statistics clock, use that to calculate CPU
222 	 * time, otherwise revert to using the profiling clock (which,
223 	 * in turn, defaults to hz if there is no separate profiling
224 	 * clock available)
225 	 */
226 	phz = stathz ? stathz : profhz;
227 	KASSERT(phz);
228 
229 	for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) {
230 		/*
231 		 * Increment time in/out of memory and sleep time
232 		 * (if sleeping).  We ignore overflow; with 16-bit int's
233 		 * (remember them?) overflow takes 45 days.
234 		 */
235 		p->p_swtime++;
236 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
237 			p->p_slptime++;
238 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
239 		/*
240 		 * If the process has slept the entire second,
241 		 * stop recalculating its priority until it wakes up.
242 		 */
243 		if (p->p_slptime > 1)
244 			continue;
245 		s = splstatclock();	/* prevent state changes */
246 		/*
247 		 * p_pctcpu is only for ps.
248 		 */
249 #if	(FSHIFT >= CCPU_SHIFT)
250 		p->p_pctcpu += (phz == 100)?
251 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
252                 	100 * (((fixpt_t) p->p_cpticks)
253 				<< (FSHIFT - CCPU_SHIFT)) / phz;
254 #else
255 		p->p_pctcpu += ((FSCALE - ccpu) *
256 			(p->p_cpticks * FSCALE / phz)) >> FSHIFT;
257 #endif
258 		p->p_cpticks = 0;
259 		newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
260 		p->p_estcpu = newcpu;
261 		resetpriority(p);
262 		if (p->p_priority >= PUSER) {
263 			if ((p != curproc) &&
264 			    p->p_stat == SRUN &&
265 			    (p->p_flag & P_INMEM) &&
266 			    (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
267 				remrunqueue(p);
268 				p->p_priority = p->p_usrpri;
269 				setrunqueue(p);
270 			} else
271 				p->p_priority = p->p_usrpri;
272 		}
273 		splx(s);
274 	}
275 	uvm_meter();
276 	wakeup((caddr_t)&lbolt);
277 	timeout_add(to, hz);
278 }
279 
280 /*
281  * Recalculate the priority of a process after it has slept for a while.
282  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
283  * least six times the loadfactor will decay p_estcpu to zero.
284  */
285 void
286 updatepri(p)
287 	register struct proc *p;
288 {
289 	register unsigned int newcpu = p->p_estcpu;
290 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
291 
292 	if (p->p_slptime > 5 * loadfac)
293 		p->p_estcpu = 0;
294 	else {
295 		p->p_slptime--;	/* the first time was done in schedcpu */
296 		while (newcpu && --p->p_slptime)
297 			newcpu = (int) decay_cpu(loadfac, newcpu);
298 		p->p_estcpu = newcpu;
299 	}
300 	resetpriority(p);
301 }
302 
303 /*
304  * We're only looking at 7 bits of the address; everything is
305  * aligned to 4, lots of things are aligned to greater powers
306  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
307  */
308 #define TABLESIZE	128
309 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
310 struct slpque {
311 	struct proc *sq_head;
312 	struct proc **sq_tailp;
313 } slpque[TABLESIZE];
314 
315 /*
316  * During autoconfiguration or after a panic, a sleep will simply
317  * lower the priority briefly to allow interrupts, then return.
318  * The priority to be used (safepri) is machine-dependent, thus this
319  * value is initialized and maintained in the machine-dependent layers.
320  * This priority will typically be 0, or the lowest priority
321  * that is safe for use on the interrupt stack; it can be made
322  * higher to block network software interrupts after panics.
323  */
324 int safepri;
325 
326 /*
327  * General sleep call.  Suspends the current process until a wakeup is
328  * performed on the specified identifier.  The process will then be made
329  * runnable with the specified priority.  Sleeps at most timo/hz seconds
330  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
331  * before and after sleeping, else signals are not checked.  Returns 0 if
332  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
333  * signal needs to be delivered, ERESTART is returned if the current system
334  * call should be restarted if possible, and EINTR is returned if the system
335  * call should be interrupted by the signal (return EINTR).
336  *
337  * The interlock is held until the scheduler_slock (XXX) is held.  The
338  * interlock will be locked before returning back to the caller
339  * unless the PNORELOCK flag is specified, in which case the
340  * interlock will always be unlocked upon return.
341  */
342 int
343 ltsleep(ident, priority, wmesg, timo, interlock)
344 	void *ident;
345 	int priority, timo;
346 	const char *wmesg;
347 	volatile struct simplelock *interlock;
348 {
349 	struct proc *p = curproc;
350 	struct slpque *qp;
351 	int s, sig;
352 	int catch = priority & PCATCH;
353 	int relock = (priority & PNORELOCK) == 0;
354 
355 #ifdef KTRACE
356 	if (KTRPOINT(p, KTR_CSW))
357 		ktrcsw(p, 1, 0);
358 #endif
359 	s = splhigh();
360 	if (cold || panicstr) {
361 		/*
362 		 * After a panic, or during autoconfiguration,
363 		 * just give interrupts a chance, then just return;
364 		 * don't run any other procs or panic below,
365 		 * in case this is the idle process and already asleep.
366 		 */
367 		splx(safepri);
368 		splx(s);
369 		if (interlock != NULL && relock == 0)
370 			simple_unlock(interlock);
371 		return (0);
372 	}
373 #ifdef DIAGNOSTIC
374 	if (ident == NULL || p->p_stat != SRUN || p->p_back)
375 		panic("tsleep");
376 #endif
377 	p->p_wchan = ident;
378 	p->p_wmesg = wmesg;
379 	p->p_slptime = 0;
380 	p->p_priority = priority & PRIMASK;
381 	qp = &slpque[LOOKUP(ident)];
382 	if (qp->sq_head == 0)
383 		qp->sq_head = p;
384 	else
385 		*qp->sq_tailp = p;
386 	*(qp->sq_tailp = &p->p_forw) = 0;
387 	if (timo)
388 		timeout_add(&p->p_sleep_to, timo);
389 	/*
390 	 * We can now release the interlock; the scheduler_slock
391 	 * is held, so a thread can't get in to do wakeup() before
392 	 * we do the switch.
393 	 *
394 	 * XXX We leave the code block here, after inserting ourselves
395 	 * on the sleep queue, because we might want a more clever
396 	 * data structure for the sleep queues at some point.
397 	 */
398 	if (interlock != NULL)
399 		simple_unlock(interlock);
400 
401 	/*
402 	 * We put ourselves on the sleep queue and start our timeout
403 	 * before calling CURSIG, as we could stop there, and a wakeup
404 	 * or a SIGCONT (or both) could occur while we were stopped.
405 	 * A SIGCONT would cause us to be marked as SSLEEP
406 	 * without resuming us, thus we must be ready for sleep
407 	 * when CURSIG is called.  If the wakeup happens while we're
408 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
409 	 */
410 	if (catch) {
411 		p->p_flag |= P_SINTR;
412 		if ((sig = CURSIG(p)) != 0) {
413 			if (p->p_wchan)
414 				unsleep(p);
415 			p->p_stat = SRUN;
416 			goto resume;
417 		}
418 		if (p->p_wchan == 0) {
419 			catch = 0;
420 			goto resume;
421 		}
422 	} else
423 		sig = 0;
424 	p->p_stat = SSLEEP;
425 	p->p_stats->p_ru.ru_nvcsw++;
426 	mi_switch();
427 #ifdef	DDB
428 	/* handy breakpoint location after process "wakes" */
429 	__asm(".globl bpendtsleep\nbpendtsleep:");
430 #endif
431 resume:
432 	curpriority = p->p_usrpri;
433 	splx(s);
434 	p->p_flag &= ~P_SINTR;
435 	if (p->p_flag & P_TIMEOUT) {
436 		p->p_flag &= ~P_TIMEOUT;
437 		if (sig == 0) {
438 #ifdef KTRACE
439 			if (KTRPOINT(p, KTR_CSW))
440 				ktrcsw(p, 0, 0);
441 #endif
442 			if (interlock != NULL && relock)
443 				simple_lock(interlock);
444 			return (EWOULDBLOCK);
445 		}
446 	} else if (timo)
447 		timeout_del(&p->p_sleep_to);
448 	if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) {
449 #ifdef KTRACE
450 		if (KTRPOINT(p, KTR_CSW))
451 			ktrcsw(p, 0, 0);
452 #endif
453 		if (interlock != NULL && relock)
454 			simple_lock(interlock);
455 		if (p->p_sigacts->ps_sigintr & sigmask(sig))
456 			return (EINTR);
457 		return (ERESTART);
458 	}
459 #ifdef KTRACE
460 	if (KTRPOINT(p, KTR_CSW))
461 		ktrcsw(p, 0, 0);
462 #endif
463 	if (interlock != NULL && relock)
464 		simple_lock(interlock);
465 	return (0);
466 }
467 
468 /*
469  * Implement timeout for tsleep.
470  * If process hasn't been awakened (wchan non-zero),
471  * set timeout flag and undo the sleep.  If proc
472  * is stopped, just unsleep so it will remain stopped.
473  */
474 void
475 endtsleep(arg)
476 	void *arg;
477 {
478 	struct proc *p;
479 	int s;
480 
481 	p = (struct proc *)arg;
482 	s = splhigh();
483 	if (p->p_wchan) {
484 		if (p->p_stat == SSLEEP)
485 			setrunnable(p);
486 		else
487 			unsleep(p);
488 		p->p_flag |= P_TIMEOUT;
489 	}
490 	splx(s);
491 }
492 
493 /*
494  * Short-term, non-interruptable sleep.
495  */
496 void
497 sleep(ident, priority)
498 	void *ident;
499 	int priority;
500 {
501 	register struct proc *p = curproc;
502 	register struct slpque *qp;
503 	register int s;
504 
505 #ifdef DIAGNOSTIC
506 	if (priority > PZERO) {
507 		printf("sleep called with priority %d > PZERO, wchan: %p\n",
508 		    priority, ident);
509 		panic("old sleep");
510 	}
511 #endif
512 	s = splhigh();
513 	if (cold || panicstr) {
514 		/*
515 		 * After a panic, or during autoconfiguration,
516 		 * just give interrupts a chance, then just return;
517 		 * don't run any other procs or panic below,
518 		 * in case this is the idle process and already asleep.
519 		 */
520 		splx(safepri);
521 		splx(s);
522 		return;
523 	}
524 #ifdef DIAGNOSTIC
525 	if (ident == NULL || p->p_stat != SRUN || p->p_back)
526 		panic("sleep");
527 #endif
528 	p->p_wchan = ident;
529 	p->p_wmesg = NULL;
530 	p->p_slptime = 0;
531 	p->p_priority = priority;
532 	qp = &slpque[LOOKUP(ident)];
533 	if (qp->sq_head == 0)
534 		qp->sq_head = p;
535 	else
536 		*qp->sq_tailp = p;
537 	*(qp->sq_tailp = &p->p_forw) = 0;
538 	p->p_stat = SSLEEP;
539 	p->p_stats->p_ru.ru_nvcsw++;
540 #ifdef KTRACE
541 	if (KTRPOINT(p, KTR_CSW))
542 		ktrcsw(p, 1, 0);
543 #endif
544 	mi_switch();
545 #ifdef	DDB
546 	/* handy breakpoint location after process "wakes" */
547 	__asm(".globl bpendsleep\nbpendsleep:");
548 #endif
549 #ifdef KTRACE
550 	if (KTRPOINT(p, KTR_CSW))
551 		ktrcsw(p, 0, 0);
552 #endif
553 	curpriority = p->p_usrpri;
554 	splx(s);
555 }
556 
557 /*
558  * Remove a process from its wait queue
559  */
560 void
561 unsleep(p)
562 	register struct proc *p;
563 {
564 	register struct slpque *qp;
565 	register struct proc **hp;
566 	int s;
567 
568 	s = splhigh();
569 	if (p->p_wchan) {
570 		hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
571 		while (*hp != p)
572 			hp = &(*hp)->p_forw;
573 		*hp = p->p_forw;
574 		if (qp->sq_tailp == &p->p_forw)
575 			qp->sq_tailp = hp;
576 		p->p_wchan = 0;
577 	}
578 	splx(s);
579 }
580 
581 /*
582  * Make all processes sleeping on the specified identifier runnable.
583  */
584 void
585 wakeup_n(ident, n)
586 	void *ident;
587 	int n;
588 {
589 	struct slpque *qp;
590 	struct proc *p, **q;
591 	int s;
592 
593 	s = splhigh();
594 	qp = &slpque[LOOKUP(ident)];
595 restart:
596 	for (q = &qp->sq_head; (p = *q) != NULL; ) {
597 #ifdef DIAGNOSTIC
598 		if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
599 			panic("wakeup");
600 #endif
601 		if (p->p_wchan == ident) {
602 			--n;
603 			p->p_wchan = 0;
604 			*q = p->p_forw;
605 			if (qp->sq_tailp == &p->p_forw)
606 				qp->sq_tailp = q;
607 			if (p->p_stat == SSLEEP) {
608 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
609 				if (p->p_slptime > 1)
610 					updatepri(p);
611 				p->p_slptime = 0;
612 				p->p_stat = SRUN;
613 
614 				/*
615 				 * Since curpriority is a user priority,
616 				 * p->p_priority is always better than
617 				 * curpriority.
618 				 */
619 
620 				if ((p->p_flag & P_INMEM) != 0) {
621 					setrunqueue(p);
622 					need_resched();
623 				} else {
624 					wakeup((caddr_t)&proc0);
625 				}
626 				/* END INLINE EXPANSION */
627 
628 				if (n != 0)
629 					goto restart;
630 				else
631 					break;
632 			}
633 		} else
634 			q = &p->p_forw;
635 	}
636 	splx(s);
637 }
638 
639 void
640 wakeup(chan)
641 	void *chan;
642 {
643 	wakeup_n(chan, -1);
644 }
645 
646 /*
647  * General yield call.  Puts the current process back on its run queue and
648  * performs a voluntary context switch.
649  */
650 void
651 yield()
652 {
653 	struct proc *p = curproc;
654 	int s;
655 
656 	s = splstatclock();
657 	p->p_priority = p->p_usrpri;
658 	setrunqueue(p);
659 	p->p_stats->p_ru.ru_nvcsw++;
660 	mi_switch();
661 	splx(s);
662 }
663 
664 /*
665  * General preemption call.  Puts the current process back on its run queue
666  * and performs an involuntary context switch.  If a process is supplied,
667  * we switch to that process.  Otherwise, we use the normal process selection
668  * criteria.
669  */
670 void
671 preempt(newp)
672 	struct proc *newp;
673 {
674 	struct proc *p = curproc;
675 	int s;
676 
677 	/*
678 	 * XXX Switching to a specific process is not supported yet.
679 	 */
680 	if (newp != NULL)
681 		panic("preempt: cpu_preempt not yet implemented");
682 
683 	s = splstatclock();
684 	p->p_priority = p->p_usrpri;
685 	setrunqueue(p);
686 	p->p_stats->p_ru.ru_nivcsw++;
687 	mi_switch();
688 	splx(s);
689 }
690 
691 
692 /*
693  * Must be called at splstatclock() or higher.
694  */
695 void
696 mi_switch()
697 {
698 	struct proc *p = curproc;	/* XXX */
699 	struct rlimit *rlim;
700 	struct timeval tv;
701 
702 	splassert(IPL_STATCLOCK);
703 
704 	/*
705 	 * Compute the amount of time during which the current
706 	 * process was running, and add that to its total so far.
707 	 */
708 	microtime(&tv);
709 	if (timercmp(&tv, &runtime, <)) {
710 #if 0
711 		printf("time is not monotonic! "
712 		    "tv=%ld.%06ld, runtime=%ld.%06ld\n",
713 		    tv.tv_sec, tv.tv_usec, runtime.tv_sec, runtime.tv_usec);
714 #endif
715 	} else {
716 		timersub(&tv, &runtime, &tv);
717 		timeradd(&p->p_rtime, &tv, &p->p_rtime);
718 	}
719 
720 	/*
721 	 * Check if the process exceeds its cpu resource allocation.
722 	 * If over max, kill it.
723 	 */
724 	rlim = &p->p_rlimit[RLIMIT_CPU];
725 	if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_cur) {
726 		if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_max) {
727 			psignal(p, SIGKILL);
728 		} else {
729 			psignal(p, SIGXCPU);
730 			if (rlim->rlim_cur < rlim->rlim_max)
731 				rlim->rlim_cur += 5;
732 		}
733 	}
734 
735 	/*
736 	 * Process is about to yield the CPU; clear the appropriate
737 	 * scheduling flags.
738 	 */
739 	p->p_schedflags &= ~PSCHED_SWITCHCLEAR;
740 
741 	/*
742 	 * Pick a new current process and record its start time.
743 	 */
744 	uvmexp.swtch++;
745 	cpu_switch(p);
746 	microtime(&runtime);
747 }
748 
749 /*
750  * Initialize the (doubly-linked) run queues
751  * to be empty.
752  */
753 void
754 rqinit()
755 {
756 	register int i;
757 
758 	for (i = 0; i < NQS; i++)
759 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
760 }
761 
762 /*
763  * Change process state to be runnable,
764  * placing it on the run queue if it is in memory,
765  * and awakening the swapper if it isn't in memory.
766  */
767 void
768 setrunnable(p)
769 	register struct proc *p;
770 {
771 	register int s;
772 
773 	s = splhigh();
774 	switch (p->p_stat) {
775 	case 0:
776 	case SRUN:
777 	case SZOMB:
778 	case SDEAD:
779 	default:
780 		panic("setrunnable");
781 	case SSTOP:
782 		/*
783 		 * If we're being traced (possibly because someone attached us
784 		 * while we were stopped), check for a signal from the debugger.
785 		 */
786 		if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0)
787 			p->p_siglist |= sigmask(p->p_xstat);
788 	case SSLEEP:
789 		unsleep(p);		/* e.g. when sending signals */
790 		break;
791 	case SIDL:
792 		break;
793 	}
794 	p->p_stat = SRUN;
795 	if (p->p_flag & P_INMEM)
796 		setrunqueue(p);
797 	splx(s);
798 	if (p->p_slptime > 1)
799 		updatepri(p);
800 	p->p_slptime = 0;
801 	if ((p->p_flag & P_INMEM) == 0)
802 		wakeup((caddr_t)&proc0);
803 	else if (p->p_priority < curpriority)
804 		need_resched();
805 }
806 
807 /*
808  * Compute the priority of a process when running in user mode.
809  * Arrange to reschedule if the resulting priority is better
810  * than that of the current process.
811  */
812 void
813 resetpriority(p)
814 	register struct proc *p;
815 {
816 	register unsigned int newpriority;
817 
818 	newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
819 	newpriority = min(newpriority, MAXPRI);
820 	p->p_usrpri = newpriority;
821 	if (newpriority < curpriority)
822 		need_resched();
823 }
824 
825 /*
826  * We adjust the priority of the current process.  The priority of a process
827  * gets worse as it accumulates CPU time.  The cpu usage estimator (p_estcpu)
828  * is increased here.  The formula for computing priorities (in kern_synch.c)
829  * will compute a different value each time p_estcpu increases. This can
830  * cause a switch, but unless the priority crosses a PPQ boundary the actual
831  * queue will not change.  The cpu usage estimator ramps up quite quickly
832  * when the process is running (linearly), and decays away exponentially, at
833  * a rate which is proportionally slower when the system is busy.  The basic
834  * principle is that the system will 90% forget that the process used a lot
835  * of CPU time in 5 * loadav seconds.  This causes the system to favor
836  * processes which haven't run much recently, and to round-robin among other
837  * processes.
838  */
839 
840 void
841 schedclock(p)
842 	struct proc *p;
843 {
844 	p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
845 	resetpriority(p);
846 	if (p->p_priority >= PUSER)
847 		p->p_priority = p->p_usrpri;
848 }
849 
850 #ifdef DDB
851 #include <machine/db_machdep.h>
852 
853 #include <ddb/db_interface.h>
854 #include <ddb/db_output.h>
855 
856 void
857 db_show_all_procs(addr, haddr, count, modif)
858 	db_expr_t addr;
859 	int haddr;
860 	db_expr_t count;
861 	char *modif;
862 {
863 	char *mode;
864 	int doingzomb = 0;
865 	struct proc *p, *pp;
866 
867 	if (modif[0] == 0)
868 		modif[0] = 'n';			/* default == normal mode */
869 
870 	mode = "mawn";
871 	while (*mode && *mode != modif[0])
872 		mode++;
873 	if (*mode == 0 || *mode == 'm') {
874 		db_printf("usage: show all procs [/a] [/n] [/w]\n");
875 		db_printf("\t/a == show process address info\n");
876 		db_printf("\t/n == show normal process info [default]\n");
877 		db_printf("\t/w == show process wait/emul info\n");
878 		return;
879 	}
880 
881 	p = LIST_FIRST(&allproc);
882 
883 	switch (*mode) {
884 
885 	case 'a':
886 		db_printf("   PID  %-10s  %18s  %18s  %18s\n",
887 		    "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP");
888 		break;
889 	case 'n':
890 		db_printf("   PID  %5s  %5s  %5s  S  %10s  %-9s  %-16s\n",
891 		    "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND");
892 		break;
893 	case 'w':
894 		db_printf("   PID  %-16s  %-8s  %18s  %s\n",
895 		    "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG");
896 		break;
897 	}
898 
899 	while (p != 0) {
900 		pp = p->p_pptr;
901 		if (p->p_stat) {
902 
903 			db_printf("%c%5d  ", p == curproc ? '*' : ' ',
904 				p->p_pid);
905 
906 			switch (*mode) {
907 
908 			case 'a':
909 				db_printf("%-10.10s  %18p  %18p  %18p\n",
910 				    p->p_comm, p, p->p_addr, p->p_vmspace);
911 				break;
912 
913 			case 'n':
914 				db_printf("%5d  %5d  %5d  %d  %#10x  "
915 				    "%-9.9s  %-16s\n",
916 				    pp ? pp->p_pid : -1, p->p_pgrp->pg_id,
917 				    p->p_cred->p_ruid, p->p_stat, p->p_flag,
918 				    (p->p_wchan && p->p_wmesg) ?
919 					p->p_wmesg : "", p->p_comm);
920 				break;
921 
922 			case 'w':
923 				db_printf("%-16s  %-8s  %18p  %s\n", p->p_comm,
924 				    p->p_emul->e_name, p->p_wchan,
925 				    (p->p_wchan && p->p_wmesg) ?
926 					p->p_wmesg : "");
927 				break;
928 
929 			}
930 		}
931 		p = LIST_NEXT(p, p_list);
932 		if (p == 0 && doingzomb == 0) {
933 			doingzomb = 1;
934 			p = LIST_FIRST(&zombproc);
935 		}
936 	}
937 }
938 #endif
939