1 /* $NetBSD: kern_synch.c,v 1.79 2000/06/27 17:41:27 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /*- 41 * Copyright (c) 1982, 1986, 1990, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 78 */ 79 80 #include "opt_ddb.h" 81 #include "opt_ktrace.h" 82 83 #include <sys/param.h> 84 #include <sys/systm.h> 85 #include <sys/callout.h> 86 #include <sys/proc.h> 87 #include <sys/kernel.h> 88 #include <sys/buf.h> 89 #include <sys/signalvar.h> 90 #include <sys/resourcevar.h> 91 #include <sys/sched.h> 92 93 #include <uvm/uvm_extern.h> 94 95 #ifdef KTRACE 96 #include <sys/ktrace.h> 97 #endif 98 99 #include <machine/cpu.h> 100 101 int lbolt; /* once a second sleep address */ 102 103 /* 104 * The global scheduler state. 105 */ 106 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */ 107 __volatile u_int32_t sched_whichqs; /* bitmap of non-empty queues */ 108 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */ 109 110 void roundrobin(void *); 111 void schedcpu(void *); 112 void updatepri(struct proc *); 113 void endtsleep(void *); 114 115 __inline void awaken(struct proc *); 116 117 struct callout roundrobin_ch = CALLOUT_INITIALIZER; 118 struct callout schedcpu_ch = CALLOUT_INITIALIZER; 119 120 /* 121 * Force switch among equal priority processes every 100ms. 122 */ 123 /* ARGSUSED */ 124 void 125 roundrobin(void *arg) 126 { 127 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 128 int s; 129 130 if (curproc != NULL) { 131 s = splstatclock(); 132 if (spc->spc_flags & SPCF_SEENRR) { 133 /* 134 * The process has already been through a roundrobin 135 * without switching and may be hogging the CPU. 136 * Indicate that the process should yield. 137 */ 138 spc->spc_flags |= SPCF_SHOULDYIELD; 139 } else 140 spc->spc_flags |= SPCF_SEENRR; 141 splx(s); 142 } 143 need_resched(); 144 callout_reset(&roundrobin_ch, hz / 10, roundrobin, NULL); 145 } 146 147 /* 148 * Constants for digital decay and forget: 149 * 90% of (p_estcpu) usage in 5 * loadav time 150 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 151 * Note that, as ps(1) mentions, this can let percentages 152 * total over 100% (I've seen 137.9% for 3 processes). 153 * 154 * Note that hardclock updates p_estcpu and p_cpticks independently. 155 * 156 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 157 * That is, the system wants to compute a value of decay such 158 * that the following for loop: 159 * for (i = 0; i < (5 * loadavg); i++) 160 * p_estcpu *= decay; 161 * will compute 162 * p_estcpu *= 0.1; 163 * for all values of loadavg: 164 * 165 * Mathematically this loop can be expressed by saying: 166 * decay ** (5 * loadavg) ~= .1 167 * 168 * The system computes decay as: 169 * decay = (2 * loadavg) / (2 * loadavg + 1) 170 * 171 * We wish to prove that the system's computation of decay 172 * will always fulfill the equation: 173 * decay ** (5 * loadavg) ~= .1 174 * 175 * If we compute b as: 176 * b = 2 * loadavg 177 * then 178 * decay = b / (b + 1) 179 * 180 * We now need to prove two things: 181 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 182 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 183 * 184 * Facts: 185 * For x close to zero, exp(x) =~ 1 + x, since 186 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 187 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 188 * For x close to zero, ln(1+x) =~ x, since 189 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 190 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 191 * ln(.1) =~ -2.30 192 * 193 * Proof of (1): 194 * Solve (factor)**(power) =~ .1 given power (5*loadav): 195 * solving for factor, 196 * ln(factor) =~ (-2.30/5*loadav), or 197 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 198 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 199 * 200 * Proof of (2): 201 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 202 * solving for power, 203 * power*ln(b/(b+1)) =~ -2.30, or 204 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 205 * 206 * Actual power values for the implemented algorithm are as follows: 207 * loadav: 1 2 3 4 208 * power: 5.68 10.32 14.94 19.55 209 */ 210 211 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 212 #define loadfactor(loadav) (2 * (loadav)) 213 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 214 215 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 216 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 217 218 /* 219 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 220 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 221 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 222 * 223 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 224 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 225 * 226 * If you dont want to bother with the faster/more-accurate formula, you 227 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 228 * (more general) method of calculating the %age of CPU used by a process. 229 */ 230 #define CCPU_SHIFT 11 231 232 /* 233 * Recompute process priorities, every hz ticks. 234 */ 235 /* ARGSUSED */ 236 void 237 schedcpu(void *arg) 238 { 239 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 240 struct proc *p; 241 int s; 242 unsigned int newcpu; 243 int clkhz; 244 245 proclist_lock_read(); 246 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 247 /* 248 * Increment time in/out of memory and sleep time 249 * (if sleeping). We ignore overflow; with 16-bit int's 250 * (remember them?) overflow takes 45 days. 251 */ 252 p->p_swtime++; 253 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 254 p->p_slptime++; 255 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 256 /* 257 * If the process has slept the entire second, 258 * stop recalculating its priority until it wakes up. 259 */ 260 if (p->p_slptime > 1) 261 continue; 262 s = splstatclock(); /* prevent state changes */ 263 /* 264 * p_pctcpu is only for ps. 265 */ 266 clkhz = stathz != 0 ? stathz : hz; 267 #if (FSHIFT >= CCPU_SHIFT) 268 p->p_pctcpu += (clkhz == 100)? 269 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 270 100 * (((fixpt_t) p->p_cpticks) 271 << (FSHIFT - CCPU_SHIFT)) / clkhz; 272 #else 273 p->p_pctcpu += ((FSCALE - ccpu) * 274 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT; 275 #endif 276 p->p_cpticks = 0; 277 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu); 278 p->p_estcpu = newcpu; 279 resetpriority(p); 280 if (p->p_priority >= PUSER) { 281 if (p->p_stat == SRUN && 282 (p->p_flag & P_INMEM) && 283 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 284 remrunqueue(p); 285 p->p_priority = p->p_usrpri; 286 setrunqueue(p); 287 } else 288 p->p_priority = p->p_usrpri; 289 } 290 splx(s); 291 } 292 proclist_unlock_read(); 293 uvm_meter(); 294 wakeup((caddr_t)&lbolt); 295 callout_reset(&schedcpu_ch, hz, schedcpu, NULL); 296 } 297 298 /* 299 * Recalculate the priority of a process after it has slept for a while. 300 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 301 * least six times the loadfactor will decay p_estcpu to zero. 302 */ 303 void 304 updatepri(struct proc *p) 305 { 306 unsigned int newcpu = p->p_estcpu; 307 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 308 309 if (p->p_slptime > 5 * loadfac) 310 p->p_estcpu = 0; 311 else { 312 p->p_slptime--; /* the first time was done in schedcpu */ 313 while (newcpu && --p->p_slptime) 314 newcpu = (int) decay_cpu(loadfac, newcpu); 315 p->p_estcpu = newcpu; 316 } 317 resetpriority(p); 318 } 319 320 /* 321 * During autoconfiguration or after a panic, a sleep will simply 322 * lower the priority briefly to allow interrupts, then return. 323 * The priority to be used (safepri) is machine-dependent, thus this 324 * value is initialized and maintained in the machine-dependent layers. 325 * This priority will typically be 0, or the lowest priority 326 * that is safe for use on the interrupt stack; it can be made 327 * higher to block network software interrupts after panics. 328 */ 329 int safepri; 330 331 /* 332 * General sleep call. Suspends the current process until a wakeup is 333 * performed on the specified identifier. The process will then be made 334 * runnable with the specified priority. Sleeps at most timo/hz seconds 335 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 336 * before and after sleeping, else signals are not checked. Returns 0 if 337 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 338 * signal needs to be delivered, ERESTART is returned if the current system 339 * call should be restarted if possible, and EINTR is returned if the system 340 * call should be interrupted by the signal (return EINTR). 341 * 342 * The interlock is held until the scheduler_slock is held. The 343 * interlock will be locked before returning back to the caller 344 * unless the PNORELOCK flag is specified, in which case the 345 * interlock will always be unlocked upon return. 346 */ 347 int 348 ltsleep(void *ident, int priority, const char *wmesg, int timo, 349 __volatile struct simplelock *interlock) 350 { 351 struct proc *p = curproc; 352 struct slpque *qp; 353 int sig, s; 354 int catch = priority & PCATCH; 355 int relock = (priority & PNORELOCK) == 0; 356 #if 0 /* XXXSMP */ 357 int dobiglock; 358 #endif 359 360 /* 361 * XXXSMP 362 * This is probably bogus. Figure out what the right 363 * thing to do here really is. 364 * Note that not sleeping if ltsleep is called with curproc == NULL 365 * in the shutdown case is disgusting but partly necessary given 366 * how shutdown (barely) works. 367 */ 368 if (cold || (doing_shutdown && (panicstr || (p == NULL)))) { 369 /* 370 * After a panic, or during autoconfiguration, 371 * just give interrupts a chance, then just return; 372 * don't run any other procs or panic below, 373 * in case this is the idle process and already asleep. 374 */ 375 s = splhigh(); 376 splx(safepri); 377 splx(s); 378 if (interlock != NULL && relock == 0) 379 simple_unlock(interlock); 380 return (0); 381 } 382 383 #if 0 /* XXXSMP */ 384 dobiglock = (p->p_flags & P_BIGLOCK) != 0; 385 #endif 386 387 #ifdef KTRACE 388 if (KTRPOINT(p, KTR_CSW)) 389 ktrcsw(p, 1, 0); 390 #endif 391 392 s = splhigh(); /* XXXSMP: SCHED_LOCK(s) */ 393 394 #ifdef DIAGNOSTIC 395 if (ident == NULL) 396 panic("ltsleep: ident == NULL"); 397 if (p->p_stat != SONPROC) 398 panic("ltsleep: p_stat %d != SONPROC", p->p_stat); 399 if (p->p_back != NULL) 400 panic("ltsleep: p_back != NULL"); 401 #endif 402 403 p->p_wchan = ident; 404 p->p_wmesg = wmesg; 405 p->p_slptime = 0; 406 p->p_priority = priority & PRIMASK; 407 408 qp = SLPQUE(ident); 409 if (qp->sq_head == 0) 410 qp->sq_head = p; 411 else 412 *qp->sq_tailp = p; 413 *(qp->sq_tailp = &p->p_forw) = 0; 414 415 if (timo) 416 callout_reset(&p->p_tsleep_ch, timo, endtsleep, p); 417 418 /* 419 * We can now release the interlock; the scheduler_slock 420 * is held, so a thread can't get in to do wakeup() before 421 * we do the switch. 422 * 423 * XXX We leave the code block here, after inserting ourselves 424 * on the sleep queue, because we might want a more clever 425 * data structure for the sleep queues at some point. 426 */ 427 if (interlock != NULL) 428 simple_unlock(interlock); 429 430 /* 431 * We put ourselves on the sleep queue and start our timeout 432 * before calling CURSIG, as we could stop there, and a wakeup 433 * or a SIGCONT (or both) could occur while we were stopped. 434 * A SIGCONT would cause us to be marked as SSLEEP 435 * without resuming us, thus we must be ready for sleep 436 * when CURSIG is called. If the wakeup happens while we're 437 * stopped, p->p_wchan will be 0 upon return from CURSIG. 438 */ 439 if (catch) { 440 p->p_flag |= P_SINTR; 441 if ((sig = CURSIG(p)) != 0) { 442 if (p->p_wchan != NULL) 443 unsleep(p); 444 p->p_stat = SONPROC; 445 #if 0 /* XXXSMP */ 446 /* 447 * We're going to skip the unlock, so 448 * we don't need to relock after resume. 449 */ 450 dobiglock = 0; 451 #endif 452 goto resume; 453 } 454 if (p->p_wchan == NULL) { 455 catch = 0; 456 #if 0 /* XXXSMP */ 457 /* See above. */ 458 dobiglock = 0; 459 #endif 460 goto resume; 461 } 462 } else 463 sig = 0; 464 p->p_stat = SSLEEP; 465 p->p_stats->p_ru.ru_nvcsw++; 466 467 #if 0 /* XXXSMP */ 468 if (dobiglock) { 469 /* 470 * Release the kernel_lock, as we are about to 471 * yield the CPU. The scheduler_slock is still 472 * held until cpu_switch() selects a new process 473 * and removes it from the run queue. 474 */ 475 kernel_lock_release(); 476 } 477 #endif 478 479 /* scheduler_slock held */ 480 mi_switch(p); 481 /* scheduler_slock held */ 482 #ifdef DDB 483 /* handy breakpoint location after process "wakes" */ 484 asm(".globl bpendtsleep ; bpendtsleep:"); 485 #endif 486 487 resume: 488 KDASSERT(p->p_cpu != NULL); 489 KDASSERT(p->p_cpu == curcpu()); 490 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 491 splx(s); /* XXXSMP: SCHED_UNLOCK(s) */ 492 #if 0 /* XXXSMP */ 493 if (dobiglock) { 494 /* 495 * Reacquire the kernel_lock now. We do this after 496 * we've released scheduler_slock to avoid deadlock. 497 */ 498 kernel_lock_acquire(LK_EXCLUSIVE); 499 } 500 #endif 501 p->p_flag &= ~P_SINTR; 502 if (p->p_flag & P_TIMEOUT) { 503 p->p_flag &= ~P_TIMEOUT; 504 if (sig == 0) { 505 #ifdef KTRACE 506 if (KTRPOINT(p, KTR_CSW)) 507 ktrcsw(p, 0, 0); 508 #endif 509 if (relock && interlock != NULL) 510 simple_lock(interlock); 511 return (EWOULDBLOCK); 512 } 513 } else if (timo) 514 callout_stop(&p->p_tsleep_ch); 515 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 516 #ifdef KTRACE 517 if (KTRPOINT(p, KTR_CSW)) 518 ktrcsw(p, 0, 0); 519 #endif 520 if (relock && interlock != NULL) 521 simple_lock(interlock); 522 if ((p->p_sigacts->ps_sigact[sig].sa_flags & SA_RESTART) == 0) 523 return (EINTR); 524 return (ERESTART); 525 } 526 #ifdef KTRACE 527 if (KTRPOINT(p, KTR_CSW)) 528 ktrcsw(p, 0, 0); 529 #endif 530 if (relock && interlock != NULL) 531 simple_lock(interlock); 532 return (0); 533 } 534 535 /* 536 * Implement timeout for tsleep. 537 * If process hasn't been awakened (wchan non-zero), 538 * set timeout flag and undo the sleep. If proc 539 * is stopped, just unsleep so it will remain stopped. 540 */ 541 void 542 endtsleep(void *arg) 543 { 544 struct proc *p; 545 int s; 546 547 p = (struct proc *)arg; 548 s = splhigh(); 549 if (p->p_wchan) { 550 if (p->p_stat == SSLEEP) 551 setrunnable(p); 552 else 553 unsleep(p); 554 p->p_flag |= P_TIMEOUT; 555 } 556 splx(s); 557 } 558 559 /* 560 * Remove a process from its wait queue 561 */ 562 void 563 unsleep(struct proc *p) 564 { 565 struct slpque *qp; 566 struct proc **hp; 567 int s; 568 569 s = splhigh(); 570 if (p->p_wchan) { 571 hp = &(qp = SLPQUE(p->p_wchan))->sq_head; 572 while (*hp != p) 573 hp = &(*hp)->p_forw; 574 *hp = p->p_forw; 575 if (qp->sq_tailp == &p->p_forw) 576 qp->sq_tailp = hp; 577 p->p_wchan = 0; 578 } 579 splx(s); 580 } 581 582 /* 583 * Optimized-for-wakeup() version of setrunnable(). 584 */ 585 __inline void 586 awaken(struct proc *p) 587 { 588 589 if (p->p_slptime > 1) 590 updatepri(p); 591 p->p_slptime = 0; 592 p->p_stat = SRUN; 593 594 /* 595 * Since curpriority is a user priority, p->p_priority 596 * is always better than curpriority. 597 */ 598 if (p->p_flag & P_INMEM) { 599 setrunqueue(p); 600 need_resched(); 601 } else 602 wakeup(&proc0); 603 } 604 605 /* 606 * Make all processes sleeping on the specified identifier runnable. 607 */ 608 void 609 wakeup(void *ident) 610 { 611 struct slpque *qp; 612 struct proc *p, **q; 613 int s; 614 615 s = splhigh(); /* XXXSMP: SCHED_LOCK(s) */ 616 617 qp = SLPQUE(ident); 618 restart: 619 for (q = &qp->sq_head; (p = *q) != NULL; ) { 620 #ifdef DIAGNOSTIC 621 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 622 panic("wakeup"); 623 #endif 624 if (p->p_wchan == ident) { 625 p->p_wchan = 0; 626 *q = p->p_forw; 627 if (qp->sq_tailp == &p->p_forw) 628 qp->sq_tailp = q; 629 if (p->p_stat == SSLEEP) { 630 awaken(p); 631 goto restart; 632 } 633 } else 634 q = &p->p_forw; 635 } 636 splx(s); /* XXXSMP: SCHED_UNLOCK(s) */ 637 } 638 639 /* 640 * Make the highest priority process first in line on the specified 641 * identifier runnable. 642 */ 643 void 644 wakeup_one(void *ident) 645 { 646 struct slpque *qp; 647 struct proc *p, **q; 648 struct proc *best_sleepp, **best_sleepq; 649 struct proc *best_stopp, **best_stopq; 650 int s; 651 652 best_sleepp = best_stopp = NULL; 653 best_sleepq = best_stopq = NULL; 654 655 s = splhigh(); /* XXXSMP: SCHED_LOCK(s) */ 656 657 qp = SLPQUE(ident); 658 659 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) { 660 #ifdef DIAGNOSTIC 661 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 662 panic("wakeup_one"); 663 #endif 664 if (p->p_wchan == ident) { 665 if (p->p_stat == SSLEEP) { 666 if (best_sleepp == NULL || 667 p->p_priority < best_sleepp->p_priority) { 668 best_sleepp = p; 669 best_sleepq = q; 670 } 671 } else { 672 if (best_stopp == NULL || 673 p->p_priority < best_stopp->p_priority) { 674 best_stopp = p; 675 best_stopq = q; 676 } 677 } 678 } 679 } 680 681 /* 682 * Consider any SSLEEP process higher than the highest priority SSTOP 683 * process. 684 */ 685 if (best_sleepp != NULL) { 686 p = best_sleepp; 687 q = best_sleepq; 688 } else { 689 p = best_stopp; 690 q = best_stopq; 691 } 692 693 if (p != NULL) { 694 p->p_wchan = NULL; 695 *q = p->p_forw; 696 if (qp->sq_tailp == &p->p_forw) 697 qp->sq_tailp = q; 698 if (p->p_stat == SSLEEP) 699 awaken(p); 700 } 701 splx(s); /* XXXSMP: SCHED_UNLOCK(s) */ 702 } 703 704 /* 705 * General yield call. Puts the current process back on its run queue and 706 * performs a voluntary context switch. 707 */ 708 void 709 yield(void) 710 { 711 struct proc *p = curproc; 712 int s; 713 714 s = splstatclock(); 715 p->p_priority = p->p_usrpri; 716 p->p_stat = SRUN; 717 setrunqueue(p); 718 p->p_stats->p_ru.ru_nvcsw++; 719 mi_switch(p); 720 splx(s); 721 } 722 723 /* 724 * General preemption call. Puts the current process back on its run queue 725 * and performs an involuntary context switch. If a process is supplied, 726 * we switch to that process. Otherwise, we use the normal process selection 727 * criteria. 728 */ 729 void 730 preempt(struct proc *newp) 731 { 732 struct proc *p = curproc; 733 int s; 734 735 /* 736 * XXX Switching to a specific process is not supported yet. 737 */ 738 if (newp != NULL) 739 panic("preempt: cpu_preempt not yet implemented"); 740 741 s = splstatclock(); 742 p->p_priority = p->p_usrpri; 743 p->p_stat = SRUN; 744 setrunqueue(p); 745 p->p_stats->p_ru.ru_nivcsw++; 746 mi_switch(p); 747 splx(s); 748 } 749 750 /* 751 * The machine independent parts of context switch. 752 * Must be called at splstatclock() or higher. 753 */ 754 void 755 mi_switch(struct proc *p) 756 { 757 struct schedstate_percpu *spc; 758 struct rlimit *rlim; 759 long s, u; 760 struct timeval tv; 761 762 KDASSERT(p->p_cpu != NULL); 763 KDASSERT(p->p_cpu == curcpu()); 764 765 spc = &p->p_cpu->ci_schedstate; 766 767 #ifdef DEBUG 768 if (p->p_simple_locks) { 769 printf("p->p_simple_locks %d\n", p->p_simple_locks); 770 #ifdef LOCKDEBUG 771 simple_lock_dump(); 772 #endif 773 panic("sleep: holding simple lock"); 774 } 775 #endif 776 /* 777 * Compute the amount of time during which the current 778 * process was running, and add that to its total so far. 779 */ 780 microtime(&tv); 781 u = p->p_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec); 782 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec); 783 if (u < 0) { 784 u += 1000000; 785 s--; 786 } else if (u >= 1000000) { 787 u -= 1000000; 788 s++; 789 } 790 p->p_rtime.tv_usec = u; 791 p->p_rtime.tv_sec = s; 792 793 /* 794 * Check if the process exceeds its cpu resource allocation. 795 * If over max, kill it. In any case, if it has run for more 796 * than 10 minutes, reduce priority to give others a chance. 797 */ 798 rlim = &p->p_rlimit[RLIMIT_CPU]; 799 if (s >= rlim->rlim_cur) { 800 if (s >= rlim->rlim_max) 801 psignal(p, SIGKILL); 802 else { 803 psignal(p, SIGXCPU); 804 if (rlim->rlim_cur < rlim->rlim_max) 805 rlim->rlim_cur += 5; 806 } 807 } 808 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid && 809 p->p_nice == NZERO) { 810 p->p_nice = autoniceval + NZERO; 811 resetpriority(p); 812 } 813 814 /* 815 * Process is about to yield the CPU; clear the appropriate 816 * scheduling flags. 817 */ 818 spc->spc_flags &= ~SPCF_SWITCHCLEAR; 819 820 /* 821 * Pick a new current process and switch to it. When we 822 * run again, we'll return back here. 823 */ 824 uvmexp.swtch++; 825 cpu_switch(p); 826 827 /* 828 * We're running again; record our new start time. We might 829 * be running on a new CPU now, so don't use the cache'd 830 * schedstate_percpu pointer. 831 */ 832 KDASSERT(p->p_cpu != NULL); 833 KDASSERT(p->p_cpu == curcpu()); 834 microtime(&p->p_cpu->ci_schedstate.spc_runtime); 835 } 836 837 /* 838 * Initialize the (doubly-linked) run queues 839 * to be empty. 840 */ 841 void 842 rqinit() 843 { 844 int i; 845 846 for (i = 0; i < RUNQUE_NQS; i++) 847 sched_qs[i].ph_link = sched_qs[i].ph_rlink = 848 (struct proc *)&sched_qs[i]; 849 } 850 851 /* 852 * Change process state to be runnable, 853 * placing it on the run queue if it is in memory, 854 * and awakening the swapper if it isn't in memory. 855 */ 856 void 857 setrunnable(struct proc *p) 858 { 859 int s; 860 861 s = splhigh(); 862 switch (p->p_stat) { 863 case 0: 864 case SRUN: 865 case SONPROC: 866 case SZOMB: 867 case SDEAD: 868 default: 869 panic("setrunnable"); 870 case SSTOP: 871 /* 872 * If we're being traced (possibly because someone attached us 873 * while we were stopped), check for a signal from the debugger. 874 */ 875 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) { 876 sigaddset(&p->p_siglist, p->p_xstat); 877 p->p_sigcheck = 1; 878 } 879 case SSLEEP: 880 unsleep(p); /* e.g. when sending signals */ 881 break; 882 883 case SIDL: 884 break; 885 } 886 p->p_stat = SRUN; 887 if (p->p_flag & P_INMEM) 888 setrunqueue(p); 889 splx(s); 890 if (p->p_slptime > 1) 891 updatepri(p); 892 p->p_slptime = 0; 893 if ((p->p_flag & P_INMEM) == 0) 894 wakeup((caddr_t)&proc0); 895 else if (p->p_priority < curcpu()->ci_schedstate.spc_curpriority) { 896 /* 897 * XXXSMP 898 * This is wrong. It will work, but what really 899 * needs to happen is: 900 * 901 * - Need to check if p is higher priority 902 * than the process currently running on 903 * the CPU p last ran on (let p_cpu persist 904 * after a context switch?), and preempt 905 * that one (or, if there is no process 906 * there, simply need_resched() that CPU. 907 * 908 * - Failing that, traverse a list of 909 * available CPUs and need_resched() the 910 * CPU with the lowest priority that's 911 * lower than p's. 912 */ 913 need_resched(); 914 } 915 } 916 917 /* 918 * Compute the priority of a process when running in user mode. 919 * Arrange to reschedule if the resulting priority is better 920 * than that of the current process. 921 */ 922 void 923 resetpriority(struct proc *p) 924 { 925 unsigned int newpriority; 926 927 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 928 newpriority = min(newpriority, MAXPRI); 929 p->p_usrpri = newpriority; 930 if (newpriority < curcpu()->ci_schedstate.spc_curpriority) { 931 /* 932 * XXXSMP 933 * Same applies as in setrunnable() above. 934 */ 935 need_resched(); 936 } 937 } 938 939 /* 940 * We adjust the priority of the current process. The priority of a process 941 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 942 * is increased here. The formula for computing priorities (in kern_synch.c) 943 * will compute a different value each time p_estcpu increases. This can 944 * cause a switch, but unless the priority crosses a PPQ boundary the actual 945 * queue will not change. The cpu usage estimator ramps up quite quickly 946 * when the process is running (linearly), and decays away exponentially, at 947 * a rate which is proportionally slower when the system is busy. The basic 948 * principal is that the system will 90% forget that the process used a lot 949 * of CPU time in 5 * loadav seconds. This causes the system to favor 950 * processes which haven't run much recently, and to round-robin among other 951 * processes. 952 */ 953 954 void 955 schedclock(struct proc *p) 956 { 957 958 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 959 resetpriority(p); 960 if (p->p_priority >= PUSER) 961 p->p_priority = p->p_usrpri; 962 } 963