1 /* $NetBSD: kern_synch.c,v 1.67 1999/11/15 18:49:09 fvdl Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /*- 41 * Copyright (c) 1982, 1986, 1990, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 78 */ 79 80 #include "opt_ddb.h" 81 #include "opt_ktrace.h" 82 83 #include <sys/param.h> 84 #include <sys/systm.h> 85 #include <sys/proc.h> 86 #include <sys/kernel.h> 87 #include <sys/buf.h> 88 #include <sys/signalvar.h> 89 #include <sys/resourcevar.h> 90 #include <vm/vm.h> 91 #include <sys/sched.h> 92 93 #include <uvm/uvm_extern.h> 94 95 #ifdef KTRACE 96 #include <sys/ktrace.h> 97 #endif 98 99 #define NICE_WEIGHT 2 /* priorities per nice level */ 100 #define PPQ (128 / NQS) /* priorities per queue */ 101 102 #define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - PPQ) 103 104 #include <machine/cpu.h> 105 106 u_char curpriority; /* usrpri of curproc */ 107 int lbolt; /* once a second sleep address */ 108 109 void roundrobin __P((void *)); 110 void schedcpu __P((void *)); 111 void updatepri __P((struct proc *)); 112 void endtsleep __P((void *)); 113 114 __inline void awaken __P((struct proc *)); 115 116 /* 117 * Force switch among equal priority processes every 100ms. 118 */ 119 /* ARGSUSED */ 120 void 121 roundrobin(arg) 122 void *arg; 123 { 124 125 need_resched(); 126 timeout(roundrobin, NULL, hz / 10); 127 } 128 129 /* 130 * Constants for digital decay and forget: 131 * 90% of (p_estcpu) usage in 5 * loadav time 132 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 133 * Note that, as ps(1) mentions, this can let percentages 134 * total over 100% (I've seen 137.9% for 3 processes). 135 * 136 * Note that hardclock updates p_estcpu and p_cpticks independently. 137 * 138 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 139 * That is, the system wants to compute a value of decay such 140 * that the following for loop: 141 * for (i = 0; i < (5 * loadavg); i++) 142 * p_estcpu *= decay; 143 * will compute 144 * p_estcpu *= 0.1; 145 * for all values of loadavg: 146 * 147 * Mathematically this loop can be expressed by saying: 148 * decay ** (5 * loadavg) ~= .1 149 * 150 * The system computes decay as: 151 * decay = (2 * loadavg) / (2 * loadavg + 1) 152 * 153 * We wish to prove that the system's computation of decay 154 * will always fulfill the equation: 155 * decay ** (5 * loadavg) ~= .1 156 * 157 * If we compute b as: 158 * b = 2 * loadavg 159 * then 160 * decay = b / (b + 1) 161 * 162 * We now need to prove two things: 163 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 164 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 165 * 166 * Facts: 167 * For x close to zero, exp(x) =~ 1 + x, since 168 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 169 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 170 * For x close to zero, ln(1+x) =~ x, since 171 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 172 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 173 * ln(.1) =~ -2.30 174 * 175 * Proof of (1): 176 * Solve (factor)**(power) =~ .1 given power (5*loadav): 177 * solving for factor, 178 * ln(factor) =~ (-2.30/5*loadav), or 179 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 180 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 181 * 182 * Proof of (2): 183 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 184 * solving for power, 185 * power*ln(b/(b+1)) =~ -2.30, or 186 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 187 * 188 * Actual power values for the implemented algorithm are as follows: 189 * loadav: 1 2 3 4 190 * power: 5.68 10.32 14.94 19.55 191 */ 192 193 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 194 #define loadfactor(loadav) (2 * (loadav)) 195 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 196 197 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 198 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 199 200 /* 201 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 202 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 203 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 204 * 205 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 206 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 207 * 208 * If you dont want to bother with the faster/more-accurate formula, you 209 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 210 * (more general) method of calculating the %age of CPU used by a process. 211 */ 212 #define CCPU_SHIFT 11 213 214 /* 215 * Recompute process priorities, every hz ticks. 216 */ 217 /* ARGSUSED */ 218 void 219 schedcpu(arg) 220 void *arg; 221 { 222 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 223 register struct proc *p; 224 register int s; 225 register unsigned int newcpu; 226 int clkhz; 227 228 proclist_lock_read(); 229 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 230 /* 231 * Increment time in/out of memory and sleep time 232 * (if sleeping). We ignore overflow; with 16-bit int's 233 * (remember them?) overflow takes 45 days. 234 */ 235 p->p_swtime++; 236 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 237 p->p_slptime++; 238 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 239 /* 240 * If the process has slept the entire second, 241 * stop recalculating its priority until it wakes up. 242 */ 243 if (p->p_slptime > 1) 244 continue; 245 s = splstatclock(); /* prevent state changes */ 246 /* 247 * p_pctcpu is only for ps. 248 */ 249 clkhz = stathz != 0 ? stathz : hz; 250 #if (FSHIFT >= CCPU_SHIFT) 251 p->p_pctcpu += (clkhz == 100)? 252 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 253 100 * (((fixpt_t) p->p_cpticks) 254 << (FSHIFT - CCPU_SHIFT)) / clkhz; 255 #else 256 p->p_pctcpu += ((FSCALE - ccpu) * 257 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT; 258 #endif 259 p->p_cpticks = 0; 260 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu); 261 p->p_estcpu = newcpu; 262 resetpriority(p); 263 if (p->p_priority >= PUSER) { 264 if ((p != curproc) && 265 p->p_stat == SRUN && 266 (p->p_flag & P_INMEM) && 267 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 268 remrunqueue(p); 269 p->p_priority = p->p_usrpri; 270 setrunqueue(p); 271 } else 272 p->p_priority = p->p_usrpri; 273 } 274 splx(s); 275 } 276 proclist_unlock_read(); 277 uvm_meter(); 278 wakeup((caddr_t)&lbolt); 279 timeout(schedcpu, (void *)0, hz); 280 } 281 282 /* 283 * Recalculate the priority of a process after it has slept for a while. 284 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 285 * least six times the loadfactor will decay p_estcpu to zero. 286 */ 287 void 288 updatepri(p) 289 register struct proc *p; 290 { 291 register unsigned int newcpu = p->p_estcpu; 292 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 293 294 if (p->p_slptime > 5 * loadfac) 295 p->p_estcpu = 0; 296 else { 297 p->p_slptime--; /* the first time was done in schedcpu */ 298 while (newcpu && --p->p_slptime) 299 newcpu = (int) decay_cpu(loadfac, newcpu); 300 p->p_estcpu = newcpu; 301 } 302 resetpriority(p); 303 } 304 305 /* 306 * We're only looking at 7 bits of the address; everything is 307 * aligned to 4, lots of things are aligned to greater powers 308 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 309 */ 310 #define TABLESIZE 128 311 #define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 312 struct slpque { 313 struct proc *sq_head; 314 struct proc **sq_tailp; 315 } slpque[TABLESIZE]; 316 317 /* 318 * During autoconfiguration or after a panic, a sleep will simply 319 * lower the priority briefly to allow interrupts, then return. 320 * The priority to be used (safepri) is machine-dependent, thus this 321 * value is initialized and maintained in the machine-dependent layers. 322 * This priority will typically be 0, or the lowest priority 323 * that is safe for use on the interrupt stack; it can be made 324 * higher to block network software interrupts after panics. 325 */ 326 int safepri; 327 328 /* 329 * General sleep call. Suspends the current process until a wakeup is 330 * performed on the specified identifier. The process will then be made 331 * runnable with the specified priority. Sleeps at most timo/hz seconds 332 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 333 * before and after sleeping, else signals are not checked. Returns 0 if 334 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 335 * signal needs to be delivered, ERESTART is returned if the current system 336 * call should be restarted if possible, and EINTR is returned if the system 337 * call should be interrupted by the signal (return EINTR). 338 */ 339 int 340 tsleep(ident, priority, wmesg, timo) 341 void *ident; 342 int priority, timo; 343 const char *wmesg; 344 { 345 register struct proc *p = curproc; 346 register struct slpque *qp; 347 register int s; 348 int sig, catch = priority & PCATCH; 349 void endtsleep __P((void *)); 350 351 if (cold || panicstr) { 352 /* 353 * After a panic, or during autoconfiguration, 354 * just give interrupts a chance, then just return; 355 * don't run any other procs or panic below, 356 * in case this is the idle process and already asleep. 357 */ 358 s = splhigh(); 359 splx(safepri); 360 splx(s); 361 return (0); 362 } 363 364 #ifdef KTRACE 365 if (KTRPOINT(p, KTR_CSW)) 366 ktrcsw(p->p_tracep, 1, 0); 367 #endif 368 s = splhigh(); 369 370 #ifdef DIAGNOSTIC 371 if (ident == NULL) 372 panic("tsleep: ident == NULL"); 373 if (p->p_stat != SRUN) 374 panic("tsleep: p_stat %d != SRUN", p->p_stat); 375 if (p->p_back != NULL) 376 panic("tsleep: p_back != NULL"); 377 #endif 378 p->p_wchan = ident; 379 p->p_wmesg = wmesg; 380 p->p_slptime = 0; 381 p->p_priority = priority & PRIMASK; 382 qp = &slpque[LOOKUP(ident)]; 383 if (qp->sq_head == 0) 384 qp->sq_head = p; 385 else 386 *qp->sq_tailp = p; 387 *(qp->sq_tailp = &p->p_forw) = 0; 388 if (timo) 389 timeout(endtsleep, (void *)p, timo); 390 /* 391 * We put ourselves on the sleep queue and start our timeout 392 * before calling CURSIG, as we could stop there, and a wakeup 393 * or a SIGCONT (or both) could occur while we were stopped. 394 * A SIGCONT would cause us to be marked as SSLEEP 395 * without resuming us, thus we must be ready for sleep 396 * when CURSIG is called. If the wakeup happens while we're 397 * stopped, p->p_wchan will be 0 upon return from CURSIG. 398 */ 399 if (catch) { 400 p->p_flag |= P_SINTR; 401 if ((sig = CURSIG(p)) != 0) { 402 if (p->p_wchan) 403 unsleep(p); 404 p->p_stat = SRUN; 405 goto resume; 406 } 407 if (p->p_wchan == 0) { 408 catch = 0; 409 goto resume; 410 } 411 } else 412 sig = 0; 413 p->p_stat = SSLEEP; 414 p->p_stats->p_ru.ru_nvcsw++; 415 mi_switch(); 416 #ifdef DDB 417 /* handy breakpoint location after process "wakes" */ 418 asm(".globl bpendtsleep ; bpendtsleep:"); 419 #endif 420 resume: 421 curpriority = p->p_usrpri; 422 splx(s); 423 p->p_flag &= ~P_SINTR; 424 if (p->p_flag & P_TIMEOUT) { 425 p->p_flag &= ~P_TIMEOUT; 426 if (sig == 0) { 427 #ifdef KTRACE 428 if (KTRPOINT(p, KTR_CSW)) 429 ktrcsw(p->p_tracep, 0, 0); 430 #endif 431 return (EWOULDBLOCK); 432 } 433 } else if (timo) 434 untimeout(endtsleep, (void *)p); 435 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 436 #ifdef KTRACE 437 if (KTRPOINT(p, KTR_CSW)) 438 ktrcsw(p->p_tracep, 0, 0); 439 #endif 440 if ((p->p_sigacts->ps_sigact[sig].sa_flags & SA_RESTART) == 0) 441 return (EINTR); 442 return (ERESTART); 443 } 444 #ifdef KTRACE 445 if (KTRPOINT(p, KTR_CSW)) 446 ktrcsw(p->p_tracep, 0, 0); 447 #endif 448 return (0); 449 } 450 451 /* 452 * Implement timeout for tsleep. 453 * If process hasn't been awakened (wchan non-zero), 454 * set timeout flag and undo the sleep. If proc 455 * is stopped, just unsleep so it will remain stopped. 456 */ 457 void 458 endtsleep(arg) 459 void *arg; 460 { 461 register struct proc *p; 462 int s; 463 464 p = (struct proc *)arg; 465 s = splhigh(); 466 if (p->p_wchan) { 467 if (p->p_stat == SSLEEP) 468 setrunnable(p); 469 else 470 unsleep(p); 471 p->p_flag |= P_TIMEOUT; 472 } 473 splx(s); 474 } 475 476 /* 477 * Short-term, non-interruptable sleep. 478 */ 479 void 480 sleep(ident, priority) 481 void *ident; 482 int priority; 483 { 484 register struct proc *p = curproc; 485 register struct slpque *qp; 486 register int s; 487 488 #ifdef DIAGNOSTIC 489 if (priority > PZERO) { 490 printf("sleep called with priority %d > PZERO, wchan: %p\n", 491 priority, ident); 492 panic("old sleep"); 493 } 494 #endif 495 s = splhigh(); 496 if (cold || panicstr) { 497 /* 498 * After a panic, or during autoconfiguration, 499 * just give interrupts a chance, then just return; 500 * don't run any other procs or panic below, 501 * in case this is the idle process and already asleep. 502 */ 503 splx(safepri); 504 splx(s); 505 return; 506 } 507 #ifdef DIAGNOSTIC 508 if (ident == NULL || p->p_stat != SRUN || p->p_back) 509 panic("sleep"); 510 #endif 511 p->p_wchan = ident; 512 p->p_wmesg = NULL; 513 p->p_slptime = 0; 514 p->p_priority = priority; 515 qp = &slpque[LOOKUP(ident)]; 516 if (qp->sq_head == 0) 517 qp->sq_head = p; 518 else 519 *qp->sq_tailp = p; 520 *(qp->sq_tailp = &p->p_forw) = 0; 521 p->p_stat = SSLEEP; 522 p->p_stats->p_ru.ru_nvcsw++; 523 #ifdef KTRACE 524 if (KTRPOINT(p, KTR_CSW)) 525 ktrcsw(p->p_tracep, 1, 0); 526 #endif 527 mi_switch(); 528 #ifdef DDB 529 /* handy breakpoint location after process "wakes" */ 530 asm(".globl bpendsleep ; bpendsleep:"); 531 #endif 532 #ifdef KTRACE 533 if (KTRPOINT(p, KTR_CSW)) 534 ktrcsw(p->p_tracep, 0, 0); 535 #endif 536 curpriority = p->p_usrpri; 537 splx(s); 538 } 539 540 /* 541 * Remove a process from its wait queue 542 */ 543 void 544 unsleep(p) 545 register struct proc *p; 546 { 547 register struct slpque *qp; 548 register struct proc **hp; 549 int s; 550 551 s = splhigh(); 552 if (p->p_wchan) { 553 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; 554 while (*hp != p) 555 hp = &(*hp)->p_forw; 556 *hp = p->p_forw; 557 if (qp->sq_tailp == &p->p_forw) 558 qp->sq_tailp = hp; 559 p->p_wchan = 0; 560 } 561 splx(s); 562 } 563 564 /* 565 * Optimized-for-wakeup() version of setrunnable(). 566 */ 567 __inline void 568 awaken(p) 569 struct proc *p; 570 { 571 572 if (p->p_slptime > 1) 573 updatepri(p); 574 p->p_slptime = 0; 575 p->p_stat = SRUN; 576 /* 577 * Since curpriority is a user priority, p->p_priority 578 * is always better than curpriority. 579 */ 580 if (p->p_flag & P_INMEM) { 581 setrunqueue(p); 582 need_resched(); 583 } else 584 wakeup((caddr_t)&proc0); 585 } 586 587 /* 588 * Make all processes sleeping on the specified identifier runnable. 589 */ 590 void 591 wakeup(ident) 592 register void *ident; 593 { 594 register struct slpque *qp; 595 register struct proc *p, **q; 596 int s; 597 598 s = splhigh(); 599 qp = &slpque[LOOKUP(ident)]; 600 restart: 601 for (q = &qp->sq_head; (p = *q) != NULL; ) { 602 #ifdef DIAGNOSTIC 603 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 604 panic("wakeup"); 605 #endif 606 if (p->p_wchan == ident) { 607 p->p_wchan = 0; 608 *q = p->p_forw; 609 if (qp->sq_tailp == &p->p_forw) 610 qp->sq_tailp = q; 611 if (p->p_stat == SSLEEP) { 612 awaken(p); 613 goto restart; 614 } 615 } else 616 q = &p->p_forw; 617 } 618 splx(s); 619 } 620 621 /* 622 * Make the highest priority process first in line on the specified 623 * identifier runnable. 624 */ 625 void 626 wakeup_one(ident) 627 void *ident; 628 { 629 struct slpque *qp; 630 struct proc *p, **q; 631 struct proc *best_sleepp, **best_sleepq; 632 struct proc *best_stopp, **best_stopq; 633 int s; 634 635 best_sleepp = best_stopp = NULL; 636 best_sleepq = best_stopq = NULL; 637 638 s = splhigh(); 639 qp = &slpque[LOOKUP(ident)]; 640 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) { 641 #ifdef DIAGNOSTIC 642 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 643 panic("wakeup_one"); 644 #endif 645 if (p->p_wchan == ident) { 646 if (p->p_stat == SSLEEP) { 647 if (best_sleepp == NULL || 648 p->p_priority < best_sleepp->p_priority) { 649 best_sleepp = p; 650 best_sleepq = q; 651 } 652 } else { 653 if (best_stopp == NULL || 654 p->p_priority < best_stopp->p_priority) { 655 best_stopp = p; 656 best_stopq = q; 657 } 658 } 659 } 660 } 661 662 /* 663 * Consider any SSLEEP process higher than the highest priority SSTOP 664 * process. 665 */ 666 if (best_sleepp != NULL) { 667 p = best_sleepp; 668 q = best_sleepq; 669 } else { 670 p = best_stopp; 671 q = best_stopq; 672 } 673 674 if (p != NULL) { 675 p->p_wchan = 0; 676 *q = p->p_forw; 677 if (qp->sq_tailp == &p->p_forw) 678 qp->sq_tailp = q; 679 if (p->p_stat == SSLEEP) 680 awaken(p); 681 } 682 splx(s); 683 } 684 685 /* 686 * The machine independent parts of mi_switch(). 687 * Must be called at splstatclock() or higher. 688 */ 689 void 690 mi_switch() 691 { 692 register struct proc *p = curproc; /* XXX */ 693 register struct rlimit *rlim; 694 register long s, u; 695 struct timeval tv; 696 697 #ifdef DEBUG 698 if (p->p_simple_locks) { 699 printf("p->p_simple_locks %d\n", p->p_simple_locks); 700 #ifdef LOCKDEBUG 701 simple_lock_dump(); 702 #endif 703 panic("sleep: holding simple lock"); 704 } 705 #endif 706 /* 707 * Compute the amount of time during which the current 708 * process was running, and add that to its total so far. 709 */ 710 microtime(&tv); 711 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); 712 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); 713 if (u < 0) { 714 u += 1000000; 715 s--; 716 } else if (u >= 1000000) { 717 u -= 1000000; 718 s++; 719 } 720 p->p_rtime.tv_usec = u; 721 p->p_rtime.tv_sec = s; 722 723 /* 724 * Check if the process exceeds its cpu resource allocation. 725 * If over max, kill it. In any case, if it has run for more 726 * than 10 minutes, reduce priority to give others a chance. 727 */ 728 rlim = &p->p_rlimit[RLIMIT_CPU]; 729 if (s >= rlim->rlim_cur) { 730 if (s >= rlim->rlim_max) 731 psignal(p, SIGKILL); 732 else { 733 psignal(p, SIGXCPU); 734 if (rlim->rlim_cur < rlim->rlim_max) 735 rlim->rlim_cur += 5; 736 } 737 } 738 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid && p->p_nice == NZERO) { 739 p->p_nice = autoniceval + NZERO; 740 resetpriority(p); 741 } 742 743 /* 744 * Pick a new current process and record its start time. 745 */ 746 uvmexp.swtch++; 747 cpu_switch(p); 748 microtime(&runtime); 749 } 750 751 /* 752 * Initialize the (doubly-linked) run queues 753 * to be empty. 754 */ 755 void 756 rqinit() 757 { 758 register int i; 759 760 for (i = 0; i < NQS; i++) 761 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 762 } 763 764 /* 765 * Change process state to be runnable, 766 * placing it on the run queue if it is in memory, 767 * and awakening the swapper if it isn't in memory. 768 */ 769 void 770 setrunnable(p) 771 register struct proc *p; 772 { 773 register int s; 774 775 s = splhigh(); 776 switch (p->p_stat) { 777 case 0: 778 case SRUN: 779 case SZOMB: 780 case SDEAD: 781 default: 782 panic("setrunnable"); 783 case SSTOP: 784 /* 785 * If we're being traced (possibly because someone attached us 786 * while we were stopped), check for a signal from the debugger. 787 */ 788 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) { 789 sigaddset(&p->p_siglist, p->p_xstat); 790 p->p_sigcheck = 1; 791 } 792 case SSLEEP: 793 unsleep(p); /* e.g. when sending signals */ 794 break; 795 796 case SIDL: 797 break; 798 } 799 p->p_stat = SRUN; 800 if (p->p_flag & P_INMEM) 801 setrunqueue(p); 802 splx(s); 803 if (p->p_slptime > 1) 804 updatepri(p); 805 p->p_slptime = 0; 806 if ((p->p_flag & P_INMEM) == 0) 807 wakeup((caddr_t)&proc0); 808 else if (p->p_priority < curpriority) 809 need_resched(); 810 } 811 812 /* 813 * Compute the priority of a process when running in user mode. 814 * Arrange to reschedule if the resulting priority is better 815 * than that of the current process. 816 */ 817 void 818 resetpriority(p) 819 register struct proc *p; 820 { 821 register unsigned int newpriority; 822 823 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 824 newpriority = min(newpriority, MAXPRI); 825 p->p_usrpri = newpriority; 826 if (newpriority < curpriority) 827 need_resched(); 828 } 829 830 /* 831 * We adjust the priority of the current process. The priority of a process 832 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 833 * is increased here. The formula for computing priorities (in kern_synch.c) 834 * will compute a different value each time p_estcpu increases. This can 835 * cause a switch, but unless the priority crosses a PPQ boundary the actual 836 * queue will not change. The cpu usage estimator ramps up quite quickly 837 * when the process is running (linearly), and decays away exponentially, at 838 * a rate which is proportionally slower when the system is busy. The basic 839 * principal is that the system will 90% forget that the process used a lot 840 * of CPU time in 5 * loadav seconds. This causes the system to favor 841 * processes which haven't run much recently, and to round-robin among other 842 * processes. 843 */ 844 845 void 846 schedclock(p) 847 struct proc *p; 848 { 849 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 850 resetpriority(p); 851 if (p->p_priority >= PUSER) 852 p->p_priority = p->p_usrpri; 853 } 854