xref: /openbsd-src/sys/kern/kern_synch.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: kern_synch.c,v 1.37 2001/08/07 22:57:15 art Exp $	*/
2 /*	$NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1990, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/kernel.h>
48 #include <sys/buf.h>
49 #include <sys/signalvar.h>
50 #include <sys/resourcevar.h>
51 #include <vm/vm.h>
52 #include <sys/sched.h>
53 #include <sys/timeout.h>
54 
55 #include <uvm/uvm_extern.h>
56 
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60 
61 #include <machine/cpu.h>
62 
63 u_char	curpriority;		/* usrpri of curproc */
64 int	lbolt;			/* once a second sleep address */
65 
66 void scheduler_start __P((void));
67 
68 void roundrobin __P((void *));
69 void schedcpu __P((void *));
70 void updatepri __P((struct proc *));
71 void endtsleep __P((void *));
72 
73 void
74 scheduler_start()
75 {
76 	static struct timeout roundrobin_to;
77 	static struct timeout schedcpu_to;
78 
79 	/*
80 	 * We avoid polluting the global namespace by keeping the scheduler
81 	 * timeouts static in this function.
82 	 * We setup the timeouts here and kick roundrobin and schedcpu once to
83 	 * make them do their job.
84 	 */
85 
86 	timeout_set(&roundrobin_to, roundrobin, &roundrobin_to);
87 	timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
88 
89 	roundrobin(&roundrobin_to);
90 	schedcpu(&schedcpu_to);
91 }
92 
93 /*
94  * Force switch among equal priority processes every 100ms.
95  */
96 /* ARGSUSED */
97 void
98 roundrobin(arg)
99 	void *arg;
100 {
101 	struct timeout *to = (struct timeout *)arg;
102 	struct proc *p = curproc;
103 	int s;
104 
105 	if (p != NULL) {
106 		s = splstatclock();
107 		if (p->p_schedflags & PSCHED_SEENRR) {
108 			/*
109 			 * The process has already been through a roundrobin
110 			 * without switching and may be hogging the CPU.
111 			 * Indicate that the process should yield.
112 			 */
113 			p->p_schedflags |= PSCHED_SHOULDYIELD;
114 		} else {
115 			p->p_schedflags |= PSCHED_SEENRR;
116 		}
117 		splx(s);
118 	}
119 	need_resched();
120 	timeout_add(to, hz / 10);
121 }
122 
123 /*
124  * Constants for digital decay and forget:
125  *	90% of (p_estcpu) usage in 5 * loadav time
126  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
127  *          Note that, as ps(1) mentions, this can let percentages
128  *          total over 100% (I've seen 137.9% for 3 processes).
129  *
130  * Note that hardclock updates p_estcpu and p_cpticks independently.
131  *
132  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
133  * That is, the system wants to compute a value of decay such
134  * that the following for loop:
135  * 	for (i = 0; i < (5 * loadavg); i++)
136  * 		p_estcpu *= decay;
137  * will compute
138  * 	p_estcpu *= 0.1;
139  * for all values of loadavg:
140  *
141  * Mathematically this loop can be expressed by saying:
142  * 	decay ** (5 * loadavg) ~= .1
143  *
144  * The system computes decay as:
145  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
146  *
147  * We wish to prove that the system's computation of decay
148  * will always fulfill the equation:
149  * 	decay ** (5 * loadavg) ~= .1
150  *
151  * If we compute b as:
152  * 	b = 2 * loadavg
153  * then
154  * 	decay = b / (b + 1)
155  *
156  * We now need to prove two things:
157  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
158  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
159  *
160  * Facts:
161  *         For x close to zero, exp(x) =~ 1 + x, since
162  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
163  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
164  *         For x close to zero, ln(1+x) =~ x, since
165  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
166  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
167  *         ln(.1) =~ -2.30
168  *
169  * Proof of (1):
170  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
171  *	solving for factor,
172  *      ln(factor) =~ (-2.30/5*loadav), or
173  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
174  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
175  *
176  * Proof of (2):
177  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
178  *	solving for power,
179  *      power*ln(b/(b+1)) =~ -2.30, or
180  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
181  *
182  * Actual power values for the implemented algorithm are as follows:
183  *      loadav: 1       2       3       4
184  *      power:  5.68    10.32   14.94   19.55
185  */
186 
187 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
188 #define	loadfactor(loadav)	(2 * (loadav))
189 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
190 
191 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
192 fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
193 
194 /*
195  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
196  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
197  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
198  *
199  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
200  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
201  *
202  * If you dont want to bother with the faster/more-accurate formula, you
203  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
204  * (more general) method of calculating the %age of CPU used by a process.
205  */
206 #define	CCPU_SHIFT	11
207 
208 /*
209  * Recompute process priorities, every hz ticks.
210  */
211 /* ARGSUSED */
212 void
213 schedcpu(arg)
214 	void *arg;
215 {
216 	struct timeout *to = (struct timeout *)arg;
217 	fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
218 	struct proc *p;
219 	int s;
220 	unsigned int newcpu;
221 	int phz;
222 
223 	/*
224 	 * If we have a statistics clock, use that to calculate CPU
225 	 * time, otherwise revert to using the profiling clock (which,
226 	 * in turn, defaults to hz if there is no separate profiling
227 	 * clock available)
228 	 */
229 	phz = stathz ? stathz : profhz;
230 	KASSERT(phz);
231 
232 	for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) {
233 		/*
234 		 * Increment time in/out of memory and sleep time
235 		 * (if sleeping).  We ignore overflow; with 16-bit int's
236 		 * (remember them?) overflow takes 45 days.
237 		 */
238 		p->p_swtime++;
239 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
240 			p->p_slptime++;
241 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
242 		/*
243 		 * If the process has slept the entire second,
244 		 * stop recalculating its priority until it wakes up.
245 		 */
246 		if (p->p_slptime > 1)
247 			continue;
248 		s = splstatclock();	/* prevent state changes */
249 		/*
250 		 * p_pctcpu is only for ps.
251 		 */
252 #if	(FSHIFT >= CCPU_SHIFT)
253 		p->p_pctcpu += (phz == 100)?
254 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
255                 	100 * (((fixpt_t) p->p_cpticks)
256 				<< (FSHIFT - CCPU_SHIFT)) / phz;
257 #else
258 		p->p_pctcpu += ((FSCALE - ccpu) *
259 			(p->p_cpticks * FSCALE / phz)) >> FSHIFT;
260 #endif
261 		p->p_cpticks = 0;
262 		newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
263 		p->p_estcpu = newcpu;
264 		resetpriority(p);
265 		if (p->p_priority >= PUSER) {
266 			if ((p != curproc) &&
267 			    p->p_stat == SRUN &&
268 			    (p->p_flag & P_INMEM) &&
269 			    (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
270 				remrunqueue(p);
271 				p->p_priority = p->p_usrpri;
272 				setrunqueue(p);
273 			} else
274 				p->p_priority = p->p_usrpri;
275 		}
276 		splx(s);
277 	}
278 	uvm_meter();
279 	wakeup((caddr_t)&lbolt);
280 	timeout_add(to, hz);
281 }
282 
283 /*
284  * Recalculate the priority of a process after it has slept for a while.
285  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
286  * least six times the loadfactor will decay p_estcpu to zero.
287  */
288 void
289 updatepri(p)
290 	register struct proc *p;
291 {
292 	register unsigned int newcpu = p->p_estcpu;
293 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
294 
295 	if (p->p_slptime > 5 * loadfac)
296 		p->p_estcpu = 0;
297 	else {
298 		p->p_slptime--;	/* the first time was done in schedcpu */
299 		while (newcpu && --p->p_slptime)
300 			newcpu = (int) decay_cpu(loadfac, newcpu);
301 		p->p_estcpu = newcpu;
302 	}
303 	resetpriority(p);
304 }
305 
306 /*
307  * We're only looking at 7 bits of the address; everything is
308  * aligned to 4, lots of things are aligned to greater powers
309  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
310  */
311 #define TABLESIZE	128
312 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
313 struct slpque {
314 	struct proc *sq_head;
315 	struct proc **sq_tailp;
316 } slpque[TABLESIZE];
317 
318 /*
319  * During autoconfiguration or after a panic, a sleep will simply
320  * lower the priority briefly to allow interrupts, then return.
321  * The priority to be used (safepri) is machine-dependent, thus this
322  * value is initialized and maintained in the machine-dependent layers.
323  * This priority will typically be 0, or the lowest priority
324  * that is safe for use on the interrupt stack; it can be made
325  * higher to block network software interrupts after panics.
326  */
327 int safepri;
328 
329 /*
330  * General sleep call.  Suspends the current process until a wakeup is
331  * performed on the specified identifier.  The process will then be made
332  * runnable with the specified priority.  Sleeps at most timo/hz seconds
333  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
334  * before and after sleeping, else signals are not checked.  Returns 0 if
335  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
336  * signal needs to be delivered, ERESTART is returned if the current system
337  * call should be restarted if possible, and EINTR is returned if the system
338  * call should be interrupted by the signal (return EINTR).
339  *
340  * The interlock is held until the scheduler_slock (XXX) is held.  The
341  * interlock will be locked before returning back to the caller
342  * unless the PNORELOCK flag is specified, in which case the
343  * interlock will always be unlocked upon return.
344  */
345 int
346 ltsleep(ident, priority, wmesg, timo, interlock)
347 	void *ident;
348 	int priority, timo;
349 	char *wmesg;
350 	volatile struct simplelock *interlock;
351 {
352 	struct proc *p = curproc;
353 	struct slpque *qp;
354 	int s, sig;
355 	int catch = priority & PCATCH;
356 	int relock = (priority & PNORELOCK) == 0;
357 
358 #ifdef KTRACE
359 	if (KTRPOINT(p, KTR_CSW))
360 		ktrcsw(p, 1, 0);
361 #endif
362 	s = splhigh();
363 	if (cold || panicstr) {
364 		/*
365 		 * After a panic, or during autoconfiguration,
366 		 * just give interrupts a chance, then just return;
367 		 * don't run any other procs or panic below,
368 		 * in case this is the idle process and already asleep.
369 		 */
370 		splx(safepri);
371 		splx(s);
372 		if (interlock != NULL && relock == 0)
373 			simple_unlock(interlock);
374 		return (0);
375 	}
376 #ifdef DIAGNOSTIC
377 	if (ident == NULL || p->p_stat != SRUN || p->p_back)
378 		panic("tsleep");
379 #endif
380 	p->p_wchan = ident;
381 	p->p_wmesg = wmesg;
382 	p->p_slptime = 0;
383 	p->p_priority = priority & PRIMASK;
384 	qp = &slpque[LOOKUP(ident)];
385 	if (qp->sq_head == 0)
386 		qp->sq_head = p;
387 	else
388 		*qp->sq_tailp = p;
389 	*(qp->sq_tailp = &p->p_forw) = 0;
390 	if (timo)
391 		timeout_add(&p->p_sleep_to, timo);
392 	/*
393 	 * We can now release the interlock; the scheduler_slock
394 	 * is held, so a thread can't get in to do wakeup() before
395 	 * we do the switch.
396 	 *
397 	 * XXX We leave the code block here, after inserting ourselves
398 	 * on the sleep queue, because we might want a more clever
399 	 * data structure for the sleep queues at some point.
400 	 */
401 	if (interlock != NULL)
402 		simple_unlock(interlock);
403 
404 	/*
405 	 * We put ourselves on the sleep queue and start our timeout
406 	 * before calling CURSIG, as we could stop there, and a wakeup
407 	 * or a SIGCONT (or both) could occur while we were stopped.
408 	 * A SIGCONT would cause us to be marked as SSLEEP
409 	 * without resuming us, thus we must be ready for sleep
410 	 * when CURSIG is called.  If the wakeup happens while we're
411 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
412 	 */
413 	if (catch) {
414 		p->p_flag |= P_SINTR;
415 		if ((sig = CURSIG(p)) != 0) {
416 			if (p->p_wchan)
417 				unsleep(p);
418 			p->p_stat = SRUN;
419 			goto resume;
420 		}
421 		if (p->p_wchan == 0) {
422 			catch = 0;
423 			goto resume;
424 		}
425 	} else
426 		sig = 0;
427 	p->p_stat = SSLEEP;
428 	p->p_stats->p_ru.ru_nvcsw++;
429 	mi_switch();
430 #ifdef	DDB
431 	/* handy breakpoint location after process "wakes" */
432 	__asm(".globl bpendtsleep ; bpendtsleep:");
433 #endif
434 resume:
435 	curpriority = p->p_usrpri;
436 	splx(s);
437 	p->p_flag &= ~P_SINTR;
438 	if (p->p_flag & P_TIMEOUT) {
439 		p->p_flag &= ~P_TIMEOUT;
440 		if (sig == 0) {
441 #ifdef KTRACE
442 			if (KTRPOINT(p, KTR_CSW))
443 				ktrcsw(p, 0, 0);
444 #endif
445 			if (interlock != NULL && relock)
446 				simple_lock(interlock);
447 			return (EWOULDBLOCK);
448 		}
449 	} else if (timo)
450 		timeout_del(&p->p_sleep_to);
451 	if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) {
452 #ifdef KTRACE
453 		if (KTRPOINT(p, KTR_CSW))
454 			ktrcsw(p, 0, 0);
455 #endif
456 		if (interlock != NULL && relock)
457 			simple_lock(interlock);
458 		if (p->p_sigacts->ps_sigintr & sigmask(sig))
459 			return (EINTR);
460 		return (ERESTART);
461 	}
462 #ifdef KTRACE
463 	if (KTRPOINT(p, KTR_CSW))
464 		ktrcsw(p, 0, 0);
465 #endif
466 	if (interlock != NULL && relock)
467 		simple_lock(interlock);
468 	return (0);
469 }
470 
471 /*
472  * Implement timeout for tsleep.
473  * If process hasn't been awakened (wchan non-zero),
474  * set timeout flag and undo the sleep.  If proc
475  * is stopped, just unsleep so it will remain stopped.
476  */
477 void
478 endtsleep(arg)
479 	void *arg;
480 {
481 	struct proc *p;
482 	int s;
483 
484 	p = (struct proc *)arg;
485 	s = splhigh();
486 	if (p->p_wchan) {
487 		if (p->p_stat == SSLEEP)
488 			setrunnable(p);
489 		else
490 			unsleep(p);
491 		p->p_flag |= P_TIMEOUT;
492 	}
493 	splx(s);
494 }
495 
496 /*
497  * Short-term, non-interruptable sleep.
498  */
499 void
500 sleep(ident, priority)
501 	void *ident;
502 	int priority;
503 {
504 	register struct proc *p = curproc;
505 	register struct slpque *qp;
506 	register int s;
507 
508 #ifdef DIAGNOSTIC
509 	if (priority > PZERO) {
510 		printf("sleep called with priority %d > PZERO, wchan: %p\n",
511 		    priority, ident);
512 		panic("old sleep");
513 	}
514 #endif
515 	s = splhigh();
516 	if (cold || panicstr) {
517 		/*
518 		 * After a panic, or during autoconfiguration,
519 		 * just give interrupts a chance, then just return;
520 		 * don't run any other procs or panic below,
521 		 * in case this is the idle process and already asleep.
522 		 */
523 		splx(safepri);
524 		splx(s);
525 		return;
526 	}
527 #ifdef DIAGNOSTIC
528 	if (ident == NULL || p->p_stat != SRUN || p->p_back)
529 		panic("sleep");
530 #endif
531 	p->p_wchan = ident;
532 	p->p_wmesg = NULL;
533 	p->p_slptime = 0;
534 	p->p_priority = priority;
535 	qp = &slpque[LOOKUP(ident)];
536 	if (qp->sq_head == 0)
537 		qp->sq_head = p;
538 	else
539 		*qp->sq_tailp = p;
540 	*(qp->sq_tailp = &p->p_forw) = 0;
541 	p->p_stat = SSLEEP;
542 	p->p_stats->p_ru.ru_nvcsw++;
543 #ifdef KTRACE
544 	if (KTRPOINT(p, KTR_CSW))
545 		ktrcsw(p, 1, 0);
546 #endif
547 	mi_switch();
548 #ifdef	DDB
549 	/* handy breakpoint location after process "wakes" */
550 	__asm(".globl bpendsleep ; bpendsleep:");
551 #endif
552 #ifdef KTRACE
553 	if (KTRPOINT(p, KTR_CSW))
554 		ktrcsw(p, 0, 0);
555 #endif
556 	curpriority = p->p_usrpri;
557 	splx(s);
558 }
559 
560 /*
561  * Remove a process from its wait queue
562  */
563 void
564 unsleep(p)
565 	register struct proc *p;
566 {
567 	register struct slpque *qp;
568 	register struct proc **hp;
569 	int s;
570 
571 	s = splhigh();
572 	if (p->p_wchan) {
573 		hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
574 		while (*hp != p)
575 			hp = &(*hp)->p_forw;
576 		*hp = p->p_forw;
577 		if (qp->sq_tailp == &p->p_forw)
578 			qp->sq_tailp = hp;
579 		p->p_wchan = 0;
580 	}
581 	splx(s);
582 }
583 
584 /*
585  * Make all processes sleeping on the specified identifier runnable.
586  */
587 void
588 wakeup_n(ident, n)
589 	void *ident;
590 	int n;
591 {
592 	struct slpque *qp;
593 	struct proc *p, **q;
594 	int s;
595 
596 	s = splhigh();
597 	qp = &slpque[LOOKUP(ident)];
598 restart:
599 	for (q = &qp->sq_head; (p = *q) != NULL; ) {
600 #ifdef DIAGNOSTIC
601 		if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
602 			panic("wakeup");
603 #endif
604 		if (p->p_wchan == ident) {
605 			--n;
606 			p->p_wchan = 0;
607 			*q = p->p_forw;
608 			if (qp->sq_tailp == &p->p_forw)
609 				qp->sq_tailp = q;
610 			if (p->p_stat == SSLEEP) {
611 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
612 				if (p->p_slptime > 1)
613 					updatepri(p);
614 				p->p_slptime = 0;
615 				p->p_stat = SRUN;
616 
617 				/*
618 				 * Since curpriority is a user priority,
619 				 * p->p_priority is always better than
620 				 * curpriority.
621 				 */
622 
623 				if ((p->p_flag & P_INMEM) != 0) {
624 					setrunqueue(p);
625 					need_resched();
626 				} else {
627 					wakeup((caddr_t)&proc0);
628 				}
629 				/* END INLINE EXPANSION */
630 
631 				if (n != 0)
632 					goto restart;
633 				else
634 					break;
635 			}
636 		} else
637 			q = &p->p_forw;
638 	}
639 	splx(s);
640 }
641 
642 void
643 wakeup(chan)
644 	void *chan;
645 {
646 	wakeup_n(chan, -1);
647 }
648 
649 /*
650  * General yield call.  Puts the current process back on its run queue and
651  * performs a voluntary context switch.
652  */
653 void
654 yield()
655 {
656 	struct proc *p = curproc;
657 	int s;
658 
659 	p->p_priority = p->p_usrpri;
660 	s = splstatclock();
661 	setrunqueue(p);
662 	p->p_stats->p_ru.ru_nvcsw++;
663 	mi_switch();
664 	splx(s);
665 }
666 
667 /*
668  * General preemption call.  Puts the current process back on its run queue
669  * and performs an involuntary context switch.  If a process is supplied,
670  * we switch to that process.  Otherwise, we use the normal process selection
671  * criteria.
672  */
673 void
674 preempt(newp)
675 	struct proc *newp;
676 {
677 	struct proc *p = curproc;
678 	int s;
679 
680 	/*
681 	 * XXX Switching to a specific process is not supported yet.
682 	 */
683 	if (newp != NULL)
684 		panic("preempt: cpu_preempt not yet implemented");
685 
686 	p->p_priority = p->p_usrpri;
687 	s = splstatclock();
688 	setrunqueue(p);
689 	p->p_stats->p_ru.ru_nivcsw++;
690 	mi_switch();
691 	splx(s);
692 }
693 
694 
695 /*
696  * The machine independent parts of mi_switch().
697  * Must be called at splstatclock() or higher.
698  */
699 void
700 mi_switch()
701 {
702 	register struct proc *p = curproc;	/* XXX */
703 	register struct rlimit *rlim;
704 	register long s, u;
705 	struct timeval tv;
706 
707 	/*
708 	 * Compute the amount of time during which the current
709 	 * process was running, and add that to its total so far.
710 	 */
711 	microtime(&tv);
712 	u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
713 	s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
714 	if (u < 0) {
715 		u += 1000000;
716 		s--;
717 	} else if (u >= 1000000) {
718 		u -= 1000000;
719 		s++;
720 	}
721 	p->p_rtime.tv_usec = u;
722 	p->p_rtime.tv_sec = s;
723 
724 	/*
725 	 * Check if the process exceeds its cpu resource allocation.
726 	 * If over max, kill it.  In any case, if it has run for more
727 	 * than 10 minutes, reduce priority to give others a chance.
728 	 */
729 	rlim = &p->p_rlimit[RLIMIT_CPU];
730 	if (s >= rlim->rlim_cur) {
731 		if (s >= rlim->rlim_max)
732 			psignal(p, SIGKILL);
733 		else {
734 			psignal(p, SIGXCPU);
735 			if (rlim->rlim_cur < rlim->rlim_max)
736 				rlim->rlim_cur += 5;
737 		}
738 	}
739 	if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) {
740 		p->p_nice = NZERO + 4;
741 		resetpriority(p);
742 	}
743 
744 
745 	/*
746 	 * Process is about to yield the CPU; clear the appropriate
747 	 * scheduling flags.
748 	 */
749 	p->p_schedflags &= ~PSCHED_SWITCHCLEAR;
750 
751 	/*
752 	 * Pick a new current process and record its start time.
753 	 */
754 	uvmexp.swtch++;
755 	cpu_switch(p);
756 	microtime(&runtime);
757 }
758 
759 /*
760  * Initialize the (doubly-linked) run queues
761  * to be empty.
762  */
763 void
764 rqinit()
765 {
766 	register int i;
767 
768 	for (i = 0; i < NQS; i++)
769 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
770 }
771 
772 /*
773  * Change process state to be runnable,
774  * placing it on the run queue if it is in memory,
775  * and awakening the swapper if it isn't in memory.
776  */
777 void
778 setrunnable(p)
779 	register struct proc *p;
780 {
781 	register int s;
782 
783 	s = splhigh();
784 	switch (p->p_stat) {
785 	case 0:
786 	case SRUN:
787 	case SZOMB:
788 	case SDEAD:
789 	default:
790 		panic("setrunnable");
791 	case SSTOP:
792 		/*
793 		 * If we're being traced (possibly because someone attached us
794 		 * while we were stopped), check for a signal from the debugger.
795 		 */
796 		if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0)
797 			p->p_siglist |= sigmask(p->p_xstat);
798 	case SSLEEP:
799 		unsleep(p);		/* e.g. when sending signals */
800 		break;
801 	case SIDL:
802 		break;
803 	}
804 	p->p_stat = SRUN;
805 	if (p->p_flag & P_INMEM)
806 		setrunqueue(p);
807 	splx(s);
808 	if (p->p_slptime > 1)
809 		updatepri(p);
810 	p->p_slptime = 0;
811 	if ((p->p_flag & P_INMEM) == 0)
812 		wakeup((caddr_t)&proc0);
813 	else if (p->p_priority < curpriority)
814 		need_resched();
815 }
816 
817 /*
818  * Compute the priority of a process when running in user mode.
819  * Arrange to reschedule if the resulting priority is better
820  * than that of the current process.
821  */
822 void
823 resetpriority(p)
824 	register struct proc *p;
825 {
826 	register unsigned int newpriority;
827 
828 	newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
829 	newpriority = min(newpriority, MAXPRI);
830 	p->p_usrpri = newpriority;
831 	if (newpriority < curpriority)
832 		need_resched();
833 }
834 
835 /*
836  * We adjust the priority of the current process.  The priority of a process
837  * gets worse as it accumulates CPU time.  The cpu usage estimator (p_estcpu)
838  * is increased here.  The formula for computing priorities (in kern_synch.c)
839  * will compute a different value each time p_estcpu increases. This can
840  * cause a switch, but unless the priority crosses a PPQ boundary the actual
841  * queue will not change.  The cpu usage estimator ramps up quite quickly
842  * when the process is running (linearly), and decays away exponentially, at
843  * a rate which is proportionally slower when the system is busy.  The basic
844  * principle is that the system will 90% forget that the process used a lot
845  * of CPU time in 5 * loadav seconds.  This causes the system to favor
846  * processes which haven't run much recently, and to round-robin among other
847  * processes.
848  */
849 
850 void
851 schedclock(p)
852 	struct proc *p;
853 {
854 	p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
855 	resetpriority(p);
856 	if (p->p_priority >= PUSER)
857 		p->p_priority = p->p_usrpri;
858 }
859 
860 #ifdef DDB
861 #include <machine/db_machdep.h>
862 
863 #include <ddb/db_interface.h>
864 #include <ddb/db_output.h>
865 
866 void
867 db_show_all_procs(addr, haddr, count, modif)
868 	db_expr_t addr;
869 	int haddr;
870 	db_expr_t count;
871 	char *modif;
872 {
873 	char *mode;
874 	int doingzomb = 0;
875 	struct proc *p, *pp;
876 
877 	if (modif[0] == 0)
878 		modif[0] = 'n';			/* default == normal mode */
879 
880 	mode = "mawn";
881 	while (*mode && *mode != modif[0])
882 		mode++;
883 	if (*mode == 0 || *mode == 'm') {
884 		db_printf("usage: show all procs [/a] [/n] [/w]\n");
885 		db_printf("\t/a == show process address info\n");
886 		db_printf("\t/n == show normal process info [default]\n");
887 		db_printf("\t/w == show process wait/emul info\n");
888 		return;
889 	}
890 
891 	p = LIST_FIRST(&allproc);
892 
893 	switch (*mode) {
894 
895 	case 'a':
896 		db_printf("  PID  %-10s  %18s  %18s  %18s\n",
897 		    "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP");
898 		break;
899 	case 'n':
900 		db_printf("  PID  %5s  %5s  %5s  S  %10s  %-9s  %-16s\n",
901 		    "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND");
902 		break;
903 	case 'w':
904 		db_printf("  PID  %-16s  %-8s  %18s  %s\n",
905 		    "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG");
906 		break;
907 	}
908 
909 	while (p != 0) {
910 		pp = p->p_pptr;
911 		if (p->p_stat) {
912 
913 			db_printf("%c%5d  ", p == curproc ? '*' : ' ',
914 				p->p_pid);
915 
916 			switch (*mode) {
917 
918 			case 'a':
919 				db_printf("%-10.10s  %18p  %18p  %18p\n",
920 				    p->p_comm, p, p->p_addr, p->p_vmspace);
921 				break;
922 
923 			case 'n':
924 				db_printf("%5d  %5d  %5d  %d  %#10x  "
925 				    "%-9.9s  %-16s\n",
926 				    pp ? pp->p_pid : -1, p->p_pgrp->pg_id,
927 				    p->p_cred->p_ruid, p->p_stat, p->p_flag,
928 				    (p->p_wchan && p->p_wmesg) ?
929 					p->p_wmesg : "", p->p_comm);
930 				break;
931 
932 			case 'w':
933 				db_printf("%-16s  %-8s  %18p  %s\n", p->p_comm,
934 				    p->p_emul->e_name, p->p_wchan,
935 				    (p->p_wchan && p->p_wmesg) ?
936 					p->p_wmesg : "");
937 				break;
938 
939 			}
940 		}
941 		p = LIST_NEXT(p, p_list);
942 		if (p == 0 && doingzomb == 0) {
943 			doingzomb = 1;
944 			p = LIST_FIRST(&zombproc);
945 		}
946 	}
947 }
948 #endif
949