1 /* $NetBSD: kern_synch.c,v 1.99 2000/12/22 22:59:00 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /*- 41 * Copyright (c) 1982, 1986, 1990, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 78 */ 79 80 #include "opt_ddb.h" 81 #include "opt_ktrace.h" 82 #include "opt_lockdebug.h" 83 #include "opt_multiprocessor.h" 84 85 #include <sys/param.h> 86 #include <sys/systm.h> 87 #include <sys/callout.h> 88 #include <sys/proc.h> 89 #include <sys/kernel.h> 90 #include <sys/buf.h> 91 #include <sys/signalvar.h> 92 #include <sys/resourcevar.h> 93 #include <sys/sched.h> 94 95 #include <uvm/uvm_extern.h> 96 97 #ifdef KTRACE 98 #include <sys/ktrace.h> 99 #endif 100 101 #include <machine/cpu.h> 102 103 int lbolt; /* once a second sleep address */ 104 int rrticks; /* number of hardclock ticks per roundrobin() */ 105 106 /* 107 * The global scheduler state. 108 */ 109 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */ 110 __volatile u_int32_t sched_whichqs; /* bitmap of non-empty queues */ 111 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */ 112 113 struct simplelock sched_lock = SIMPLELOCK_INITIALIZER; 114 #if defined(MULTIPROCESSOR) 115 struct lock kernel_lock; 116 #endif 117 118 void schedcpu(void *); 119 void updatepri(struct proc *); 120 void endtsleep(void *); 121 122 __inline void awaken(struct proc *); 123 124 struct callout schedcpu_ch = CALLOUT_INITIALIZER; 125 126 /* 127 * Force switch among equal priority processes every 100ms. 128 * Called from hardclock every hz/10 == rrticks hardclock ticks. 129 */ 130 /* ARGSUSED */ 131 void 132 roundrobin(struct cpu_info *ci) 133 { 134 struct schedstate_percpu *spc = &ci->ci_schedstate; 135 136 spc->spc_rrticks = rrticks; 137 138 if (curproc != NULL) { 139 if (spc->spc_flags & SPCF_SEENRR) { 140 /* 141 * The process has already been through a roundrobin 142 * without switching and may be hogging the CPU. 143 * Indicate that the process should yield. 144 */ 145 spc->spc_flags |= SPCF_SHOULDYIELD; 146 } else 147 spc->spc_flags |= SPCF_SEENRR; 148 } 149 need_resched(curcpu()); 150 } 151 152 /* 153 * Constants for digital decay and forget: 154 * 90% of (p_estcpu) usage in 5 * loadav time 155 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 156 * Note that, as ps(1) mentions, this can let percentages 157 * total over 100% (I've seen 137.9% for 3 processes). 158 * 159 * Note that hardclock updates p_estcpu and p_cpticks independently. 160 * 161 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 162 * That is, the system wants to compute a value of decay such 163 * that the following for loop: 164 * for (i = 0; i < (5 * loadavg); i++) 165 * p_estcpu *= decay; 166 * will compute 167 * p_estcpu *= 0.1; 168 * for all values of loadavg: 169 * 170 * Mathematically this loop can be expressed by saying: 171 * decay ** (5 * loadavg) ~= .1 172 * 173 * The system computes decay as: 174 * decay = (2 * loadavg) / (2 * loadavg + 1) 175 * 176 * We wish to prove that the system's computation of decay 177 * will always fulfill the equation: 178 * decay ** (5 * loadavg) ~= .1 179 * 180 * If we compute b as: 181 * b = 2 * loadavg 182 * then 183 * decay = b / (b + 1) 184 * 185 * We now need to prove two things: 186 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 187 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 188 * 189 * Facts: 190 * For x close to zero, exp(x) =~ 1 + x, since 191 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 192 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 193 * For x close to zero, ln(1+x) =~ x, since 194 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 195 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 196 * ln(.1) =~ -2.30 197 * 198 * Proof of (1): 199 * Solve (factor)**(power) =~ .1 given power (5*loadav): 200 * solving for factor, 201 * ln(factor) =~ (-2.30/5*loadav), or 202 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 203 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 204 * 205 * Proof of (2): 206 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 207 * solving for power, 208 * power*ln(b/(b+1)) =~ -2.30, or 209 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 210 * 211 * Actual power values for the implemented algorithm are as follows: 212 * loadav: 1 2 3 4 213 * power: 5.68 10.32 14.94 19.55 214 */ 215 216 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 217 #define loadfactor(loadav) (2 * (loadav)) 218 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 219 220 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 221 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 222 223 /* 224 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 225 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 226 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 227 * 228 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 229 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 230 * 231 * If you dont want to bother with the faster/more-accurate formula, you 232 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 233 * (more general) method of calculating the %age of CPU used by a process. 234 */ 235 #define CCPU_SHIFT 11 236 237 /* 238 * Recompute process priorities, every hz ticks. 239 */ 240 /* ARGSUSED */ 241 void 242 schedcpu(void *arg) 243 { 244 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 245 struct proc *p; 246 int s, s1; 247 unsigned int newcpu; 248 int clkhz; 249 250 proclist_lock_read(); 251 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 252 /* 253 * Increment time in/out of memory and sleep time 254 * (if sleeping). We ignore overflow; with 16-bit int's 255 * (remember them?) overflow takes 45 days. 256 */ 257 p->p_swtime++; 258 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 259 p->p_slptime++; 260 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 261 /* 262 * If the process has slept the entire second, 263 * stop recalculating its priority until it wakes up. 264 */ 265 if (p->p_slptime > 1) 266 continue; 267 s = splstatclock(); /* prevent state changes */ 268 /* 269 * p_pctcpu is only for ps. 270 */ 271 clkhz = stathz != 0 ? stathz : hz; 272 #if (FSHIFT >= CCPU_SHIFT) 273 p->p_pctcpu += (clkhz == 100)? 274 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 275 100 * (((fixpt_t) p->p_cpticks) 276 << (FSHIFT - CCPU_SHIFT)) / clkhz; 277 #else 278 p->p_pctcpu += ((FSCALE - ccpu) * 279 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT; 280 #endif 281 p->p_cpticks = 0; 282 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu); 283 p->p_estcpu = newcpu; 284 SCHED_LOCK(s1); 285 resetpriority(p); 286 if (p->p_priority >= PUSER) { 287 if (p->p_stat == SRUN && 288 (p->p_flag & P_INMEM) && 289 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 290 remrunqueue(p); 291 p->p_priority = p->p_usrpri; 292 setrunqueue(p); 293 } else 294 p->p_priority = p->p_usrpri; 295 } 296 SCHED_UNLOCK(s1); 297 splx(s); 298 } 299 proclist_unlock_read(); 300 uvm_meter(); 301 wakeup((caddr_t)&lbolt); 302 callout_reset(&schedcpu_ch, hz, schedcpu, NULL); 303 } 304 305 /* 306 * Recalculate the priority of a process after it has slept for a while. 307 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 308 * least six times the loadfactor will decay p_estcpu to zero. 309 */ 310 void 311 updatepri(struct proc *p) 312 { 313 unsigned int newcpu; 314 fixpt_t loadfac; 315 316 SCHED_ASSERT_LOCKED(); 317 318 newcpu = p->p_estcpu; 319 loadfac = loadfactor(averunnable.ldavg[0]); 320 321 if (p->p_slptime > 5 * loadfac) 322 p->p_estcpu = 0; 323 else { 324 p->p_slptime--; /* the first time was done in schedcpu */ 325 while (newcpu && --p->p_slptime) 326 newcpu = (int) decay_cpu(loadfac, newcpu); 327 p->p_estcpu = newcpu; 328 } 329 resetpriority(p); 330 } 331 332 /* 333 * During autoconfiguration or after a panic, a sleep will simply 334 * lower the priority briefly to allow interrupts, then return. 335 * The priority to be used (safepri) is machine-dependent, thus this 336 * value is initialized and maintained in the machine-dependent layers. 337 * This priority will typically be 0, or the lowest priority 338 * that is safe for use on the interrupt stack; it can be made 339 * higher to block network software interrupts after panics. 340 */ 341 int safepri; 342 343 /* 344 * General sleep call. Suspends the current process until a wakeup is 345 * performed on the specified identifier. The process will then be made 346 * runnable with the specified priority. Sleeps at most timo/hz seconds 347 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 348 * before and after sleeping, else signals are not checked. Returns 0 if 349 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 350 * signal needs to be delivered, ERESTART is returned if the current system 351 * call should be restarted if possible, and EINTR is returned if the system 352 * call should be interrupted by the signal (return EINTR). 353 * 354 * The interlock is held until the scheduler_slock is held. The 355 * interlock will be locked before returning back to the caller 356 * unless the PNORELOCK flag is specified, in which case the 357 * interlock will always be unlocked upon return. 358 */ 359 int 360 ltsleep(void *ident, int priority, const char *wmesg, int timo, 361 __volatile struct simplelock *interlock) 362 { 363 struct proc *p = curproc; 364 struct slpque *qp; 365 int sig, s; 366 int catch = priority & PCATCH; 367 int relock = (priority & PNORELOCK) == 0; 368 369 /* 370 * XXXSMP 371 * This is probably bogus. Figure out what the right 372 * thing to do here really is. 373 * Note that not sleeping if ltsleep is called with curproc == NULL 374 * in the shutdown case is disgusting but partly necessary given 375 * how shutdown (barely) works. 376 */ 377 if (cold || (doing_shutdown && (panicstr || (p == NULL)))) { 378 /* 379 * After a panic, or during autoconfiguration, 380 * just give interrupts a chance, then just return; 381 * don't run any other procs or panic below, 382 * in case this is the idle process and already asleep. 383 */ 384 s = splhigh(); 385 splx(safepri); 386 splx(s); 387 if (interlock != NULL && relock == 0) 388 simple_unlock(interlock); 389 return (0); 390 } 391 392 393 #ifdef KTRACE 394 if (KTRPOINT(p, KTR_CSW)) 395 ktrcsw(p, 1, 0); 396 #endif 397 398 SCHED_LOCK(s); 399 400 #ifdef DIAGNOSTIC 401 if (ident == NULL) 402 panic("ltsleep: ident == NULL"); 403 if (p->p_stat != SONPROC) 404 panic("ltsleep: p_stat %d != SONPROC", p->p_stat); 405 if (p->p_back != NULL) 406 panic("ltsleep: p_back != NULL"); 407 #endif 408 409 p->p_wchan = ident; 410 p->p_wmesg = wmesg; 411 p->p_slptime = 0; 412 p->p_priority = priority & PRIMASK; 413 414 qp = SLPQUE(ident); 415 if (qp->sq_head == 0) 416 qp->sq_head = p; 417 else 418 *qp->sq_tailp = p; 419 *(qp->sq_tailp = &p->p_forw) = 0; 420 421 if (timo) 422 callout_reset(&p->p_tsleep_ch, timo, endtsleep, p); 423 424 /* 425 * We can now release the interlock; the scheduler_slock 426 * is held, so a thread can't get in to do wakeup() before 427 * we do the switch. 428 * 429 * XXX We leave the code block here, after inserting ourselves 430 * on the sleep queue, because we might want a more clever 431 * data structure for the sleep queues at some point. 432 */ 433 if (interlock != NULL) 434 simple_unlock(interlock); 435 436 /* 437 * We put ourselves on the sleep queue and start our timeout 438 * before calling CURSIG, as we could stop there, and a wakeup 439 * or a SIGCONT (or both) could occur while we were stopped. 440 * A SIGCONT would cause us to be marked as SSLEEP 441 * without resuming us, thus we must be ready for sleep 442 * when CURSIG is called. If the wakeup happens while we're 443 * stopped, p->p_wchan will be 0 upon return from CURSIG. 444 */ 445 if (catch) { 446 p->p_flag |= P_SINTR; 447 if ((sig = CURSIG(p)) != 0) { 448 if (p->p_wchan != NULL) 449 unsleep(p); 450 p->p_stat = SONPROC; 451 SCHED_UNLOCK(s); 452 goto resume; 453 } 454 if (p->p_wchan == NULL) { 455 catch = 0; 456 SCHED_UNLOCK(s); 457 goto resume; 458 } 459 } else 460 sig = 0; 461 p->p_stat = SSLEEP; 462 p->p_stats->p_ru.ru_nvcsw++; 463 464 SCHED_ASSERT_LOCKED(); 465 mi_switch(p); 466 467 #ifdef DDB 468 /* handy breakpoint location after process "wakes" */ 469 asm(".globl bpendtsleep ; bpendtsleep:"); 470 #endif 471 472 SCHED_ASSERT_UNLOCKED(); 473 splx(s); 474 475 resume: 476 KDASSERT(p->p_cpu != NULL); 477 KDASSERT(p->p_cpu == curcpu()); 478 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 479 480 p->p_flag &= ~P_SINTR; 481 if (p->p_flag & P_TIMEOUT) { 482 p->p_flag &= ~P_TIMEOUT; 483 if (sig == 0) { 484 #ifdef KTRACE 485 if (KTRPOINT(p, KTR_CSW)) 486 ktrcsw(p, 0, 0); 487 #endif 488 if (relock && interlock != NULL) 489 simple_lock(interlock); 490 return (EWOULDBLOCK); 491 } 492 } else if (timo) 493 callout_stop(&p->p_tsleep_ch); 494 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 495 #ifdef KTRACE 496 if (KTRPOINT(p, KTR_CSW)) 497 ktrcsw(p, 0, 0); 498 #endif 499 if (relock && interlock != NULL) 500 simple_lock(interlock); 501 if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) 502 return (EINTR); 503 return (ERESTART); 504 } 505 #ifdef KTRACE 506 if (KTRPOINT(p, KTR_CSW)) 507 ktrcsw(p, 0, 0); 508 #endif 509 if (relock && interlock != NULL) 510 simple_lock(interlock); 511 return (0); 512 } 513 514 /* 515 * Implement timeout for tsleep. 516 * If process hasn't been awakened (wchan non-zero), 517 * set timeout flag and undo the sleep. If proc 518 * is stopped, just unsleep so it will remain stopped. 519 */ 520 void 521 endtsleep(void *arg) 522 { 523 struct proc *p; 524 int s; 525 526 p = (struct proc *)arg; 527 528 SCHED_LOCK(s); 529 if (p->p_wchan) { 530 if (p->p_stat == SSLEEP) 531 setrunnable(p); 532 else 533 unsleep(p); 534 p->p_flag |= P_TIMEOUT; 535 } 536 SCHED_UNLOCK(s); 537 } 538 539 /* 540 * Remove a process from its wait queue 541 */ 542 void 543 unsleep(struct proc *p) 544 { 545 struct slpque *qp; 546 struct proc **hp; 547 548 SCHED_ASSERT_LOCKED(); 549 550 if (p->p_wchan) { 551 hp = &(qp = SLPQUE(p->p_wchan))->sq_head; 552 while (*hp != p) 553 hp = &(*hp)->p_forw; 554 *hp = p->p_forw; 555 if (qp->sq_tailp == &p->p_forw) 556 qp->sq_tailp = hp; 557 p->p_wchan = 0; 558 } 559 } 560 561 /* 562 * Optimized-for-wakeup() version of setrunnable(). 563 */ 564 __inline void 565 awaken(struct proc *p) 566 { 567 568 SCHED_ASSERT_LOCKED(); 569 570 if (p->p_slptime > 1) 571 updatepri(p); 572 p->p_slptime = 0; 573 p->p_stat = SRUN; 574 575 /* 576 * Since curpriority is a user priority, p->p_priority 577 * is always better than curpriority. 578 */ 579 if (p->p_flag & P_INMEM) { 580 setrunqueue(p); 581 KASSERT(p->p_cpu != NULL); 582 need_resched(p->p_cpu); 583 } else 584 sched_wakeup(&proc0); 585 } 586 587 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 588 void 589 sched_unlock_idle(void) 590 { 591 592 simple_unlock(&sched_lock); 593 } 594 595 void 596 sched_lock_idle(void) 597 { 598 599 simple_lock(&sched_lock); 600 } 601 #endif /* MULTIPROCESSOR || LOCKDEBUG */ 602 603 /* 604 * Make all processes sleeping on the specified identifier runnable. 605 */ 606 607 void 608 wakeup(void *ident) 609 { 610 int s; 611 612 SCHED_ASSERT_UNLOCKED(); 613 614 SCHED_LOCK(s); 615 sched_wakeup(ident); 616 SCHED_UNLOCK(s); 617 } 618 619 void 620 sched_wakeup(void *ident) 621 { 622 struct slpque *qp; 623 struct proc *p, **q; 624 625 SCHED_ASSERT_LOCKED(); 626 627 qp = SLPQUE(ident); 628 restart: 629 for (q = &qp->sq_head; (p = *q) != NULL; ) { 630 #ifdef DIAGNOSTIC 631 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 632 panic("wakeup"); 633 #endif 634 if (p->p_wchan == ident) { 635 p->p_wchan = 0; 636 *q = p->p_forw; 637 if (qp->sq_tailp == &p->p_forw) 638 qp->sq_tailp = q; 639 if (p->p_stat == SSLEEP) { 640 awaken(p); 641 goto restart; 642 } 643 } else 644 q = &p->p_forw; 645 } 646 } 647 648 /* 649 * Make the highest priority process first in line on the specified 650 * identifier runnable. 651 */ 652 void 653 wakeup_one(void *ident) 654 { 655 struct slpque *qp; 656 struct proc *p, **q; 657 struct proc *best_sleepp, **best_sleepq; 658 struct proc *best_stopp, **best_stopq; 659 int s; 660 661 best_sleepp = best_stopp = NULL; 662 best_sleepq = best_stopq = NULL; 663 664 SCHED_LOCK(s); 665 666 qp = SLPQUE(ident); 667 668 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) { 669 #ifdef DIAGNOSTIC 670 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 671 panic("wakeup_one"); 672 #endif 673 if (p->p_wchan == ident) { 674 if (p->p_stat == SSLEEP) { 675 if (best_sleepp == NULL || 676 p->p_priority < best_sleepp->p_priority) { 677 best_sleepp = p; 678 best_sleepq = q; 679 } 680 } else { 681 if (best_stopp == NULL || 682 p->p_priority < best_stopp->p_priority) { 683 best_stopp = p; 684 best_stopq = q; 685 } 686 } 687 } 688 } 689 690 /* 691 * Consider any SSLEEP process higher than the highest priority SSTOP 692 * process. 693 */ 694 if (best_sleepp != NULL) { 695 p = best_sleepp; 696 q = best_sleepq; 697 } else { 698 p = best_stopp; 699 q = best_stopq; 700 } 701 702 if (p != NULL) { 703 p->p_wchan = NULL; 704 *q = p->p_forw; 705 if (qp->sq_tailp == &p->p_forw) 706 qp->sq_tailp = q; 707 if (p->p_stat == SSLEEP) 708 awaken(p); 709 } 710 SCHED_UNLOCK(s); 711 } 712 713 /* 714 * General yield call. Puts the current process back on its run queue and 715 * performs a voluntary context switch. 716 */ 717 void 718 yield(void) 719 { 720 struct proc *p = curproc; 721 int s; 722 723 SCHED_LOCK(s); 724 p->p_priority = p->p_usrpri; 725 p->p_stat = SRUN; 726 setrunqueue(p); 727 p->p_stats->p_ru.ru_nvcsw++; 728 mi_switch(p); 729 SCHED_ASSERT_UNLOCKED(); 730 splx(s); 731 } 732 733 /* 734 * General preemption call. Puts the current process back on its run queue 735 * and performs an involuntary context switch. If a process is supplied, 736 * we switch to that process. Otherwise, we use the normal process selection 737 * criteria. 738 */ 739 void 740 preempt(struct proc *newp) 741 { 742 struct proc *p = curproc; 743 int s; 744 745 /* 746 * XXX Switching to a specific process is not supported yet. 747 */ 748 if (newp != NULL) 749 panic("preempt: cpu_preempt not yet implemented"); 750 751 SCHED_LOCK(s); 752 p->p_priority = p->p_usrpri; 753 p->p_stat = SRUN; 754 setrunqueue(p); 755 p->p_stats->p_ru.ru_nivcsw++; 756 mi_switch(p); 757 SCHED_ASSERT_UNLOCKED(); 758 splx(s); 759 } 760 761 /* 762 * The machine independent parts of context switch. 763 * Must be called at splsched() (no higher!) and with 764 * the sched_lock held. 765 */ 766 void 767 mi_switch(struct proc *p) 768 { 769 struct schedstate_percpu *spc; 770 struct rlimit *rlim; 771 long s, u; 772 struct timeval tv; 773 #if defined(MULTIPROCESSOR) 774 int hold_count; 775 #endif 776 777 SCHED_ASSERT_LOCKED(); 778 779 #if defined(MULTIPROCESSOR) 780 /* 781 * Release the kernel_lock, as we are about to yield the CPU. 782 * The scheduler lock is still held until cpu_switch() 783 * selects a new process and removes it from the run queue. 784 */ 785 if (p->p_flag & P_BIGLOCK) 786 hold_count = spinlock_release_all(&kernel_lock); 787 #endif 788 789 KDASSERT(p->p_cpu != NULL); 790 KDASSERT(p->p_cpu == curcpu()); 791 792 spc = &p->p_cpu->ci_schedstate; 793 794 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) 795 spinlock_switchcheck(); 796 #endif 797 #ifdef LOCKDEBUG 798 simple_lock_switchcheck(); 799 #endif 800 801 /* 802 * Compute the amount of time during which the current 803 * process was running, and add that to its total so far. 804 */ 805 microtime(&tv); 806 u = p->p_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec); 807 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec); 808 if (u < 0) { 809 u += 1000000; 810 s--; 811 } else if (u >= 1000000) { 812 u -= 1000000; 813 s++; 814 } 815 p->p_rtime.tv_usec = u; 816 p->p_rtime.tv_sec = s; 817 818 /* 819 * Check if the process exceeds its cpu resource allocation. 820 * If over max, kill it. In any case, if it has run for more 821 * than 10 minutes, reduce priority to give others a chance. 822 */ 823 rlim = &p->p_rlimit[RLIMIT_CPU]; 824 if (s >= rlim->rlim_cur) { 825 if (s >= rlim->rlim_max) 826 psignal(p, SIGKILL); 827 else { 828 psignal(p, SIGXCPU); 829 if (rlim->rlim_cur < rlim->rlim_max) 830 rlim->rlim_cur += 5; 831 } 832 } 833 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid && 834 p->p_nice == NZERO) { 835 p->p_nice = autoniceval + NZERO; 836 resetpriority(p); 837 } 838 839 /* 840 * Process is about to yield the CPU; clear the appropriate 841 * scheduling flags. 842 */ 843 spc->spc_flags &= ~SPCF_SWITCHCLEAR; 844 845 /* 846 * Pick a new current process and switch to it. When we 847 * run again, we'll return back here. 848 */ 849 uvmexp.swtch++; 850 cpu_switch(p); 851 852 /* 853 * Make sure that MD code released the scheduler lock before 854 * resuming us. 855 */ 856 SCHED_ASSERT_UNLOCKED(); 857 858 /* 859 * We're running again; record our new start time. We might 860 * be running on a new CPU now, so don't use the cache'd 861 * schedstate_percpu pointer. 862 */ 863 KDASSERT(p->p_cpu != NULL); 864 KDASSERT(p->p_cpu == curcpu()); 865 microtime(&p->p_cpu->ci_schedstate.spc_runtime); 866 867 #if defined(MULTIPROCESSOR) 868 /* 869 * Reacquire the kernel_lock now. We do this after we've 870 * released the scheduler lock to avoid deadlock, and before 871 * we reacquire the interlock. 872 */ 873 if (p->p_flag & P_BIGLOCK) 874 spinlock_acquire_count(&kernel_lock, hold_count); 875 #endif 876 } 877 878 /* 879 * Initialize the (doubly-linked) run queues 880 * to be empty. 881 */ 882 void 883 rqinit() 884 { 885 int i; 886 887 for (i = 0; i < RUNQUE_NQS; i++) 888 sched_qs[i].ph_link = sched_qs[i].ph_rlink = 889 (struct proc *)&sched_qs[i]; 890 } 891 892 /* 893 * Change process state to be runnable, 894 * placing it on the run queue if it is in memory, 895 * and awakening the swapper if it isn't in memory. 896 */ 897 void 898 setrunnable(struct proc *p) 899 { 900 901 SCHED_ASSERT_LOCKED(); 902 903 switch (p->p_stat) { 904 case 0: 905 case SRUN: 906 case SONPROC: 907 case SZOMB: 908 case SDEAD: 909 default: 910 panic("setrunnable"); 911 case SSTOP: 912 /* 913 * If we're being traced (possibly because someone attached us 914 * while we were stopped), check for a signal from the debugger. 915 */ 916 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) { 917 sigaddset(&p->p_sigctx.ps_siglist, p->p_xstat); 918 p->p_sigctx.ps_sigcheck = 1; 919 } 920 case SSLEEP: 921 unsleep(p); /* e.g. when sending signals */ 922 break; 923 924 case SIDL: 925 break; 926 } 927 p->p_stat = SRUN; 928 if (p->p_flag & P_INMEM) 929 setrunqueue(p); 930 931 if (p->p_slptime > 1) 932 updatepri(p); 933 p->p_slptime = 0; 934 if ((p->p_flag & P_INMEM) == 0) 935 sched_wakeup((caddr_t)&proc0); 936 else if (p->p_priority < curcpu()->ci_schedstate.spc_curpriority) { 937 /* 938 * XXXSMP 939 * This is not exactly right. Since p->p_cpu persists 940 * across a context switch, this gives us some sort 941 * of processor affinity. But we need to figure out 942 * at what point it's better to reschedule on a different 943 * CPU than the last one. 944 */ 945 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu()); 946 } 947 } 948 949 /* 950 * Compute the priority of a process when running in user mode. 951 * Arrange to reschedule if the resulting priority is better 952 * than that of the current process. 953 */ 954 void 955 resetpriority(struct proc *p) 956 { 957 unsigned int newpriority; 958 959 SCHED_ASSERT_LOCKED(); 960 961 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 962 newpriority = min(newpriority, MAXPRI); 963 p->p_usrpri = newpriority; 964 if (newpriority < curcpu()->ci_schedstate.spc_curpriority) { 965 /* 966 * XXXSMP 967 * Same applies as in setrunnable() above. 968 */ 969 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu()); 970 } 971 } 972 973 /* 974 * We adjust the priority of the current process. The priority of a process 975 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 976 * is increased here. The formula for computing priorities (in kern_synch.c) 977 * will compute a different value each time p_estcpu increases. This can 978 * cause a switch, but unless the priority crosses a PPQ boundary the actual 979 * queue will not change. The cpu usage estimator ramps up quite quickly 980 * when the process is running (linearly), and decays away exponentially, at 981 * a rate which is proportionally slower when the system is busy. The basic 982 * principle is that the system will 90% forget that the process used a lot 983 * of CPU time in 5 * loadav seconds. This causes the system to favor 984 * processes which haven't run much recently, and to round-robin among other 985 * processes. 986 */ 987 988 void 989 schedclock(struct proc *p) 990 { 991 int s; 992 993 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 994 995 SCHED_LOCK(s); 996 resetpriority(p); 997 SCHED_UNLOCK(s); 998 999 if (p->p_priority >= PUSER) 1000 p->p_priority = p->p_usrpri; 1001 } 1002 1003 void 1004 suspendsched() 1005 { 1006 struct proc *p; 1007 int s; 1008 1009 /* 1010 * Convert all non-P_SYSTEM SSLEEP or SRUN processes to SSTOP. 1011 */ 1012 proclist_lock_read(); 1013 SCHED_LOCK(s); 1014 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) { 1015 if ((p->p_flag & P_SYSTEM) != 0) 1016 continue; 1017 switch (p->p_stat) { 1018 case SRUN: 1019 if ((p->p_flag & P_INMEM) != 0) 1020 remrunqueue(p); 1021 /* FALLTHROUGH */ 1022 case SSLEEP: 1023 p->p_stat = SSTOP; 1024 break; 1025 case SONPROC: 1026 /* 1027 * XXX SMP: we need to deal with processes on 1028 * others CPU ! 1029 */ 1030 break; 1031 default: 1032 break; 1033 } 1034 } 1035 SCHED_UNLOCK(s); 1036 proclist_unlock_read(); 1037 } 1038