1 #include <u.h> 2 #include "../port/lib.h" 3 #include "mem.h" 4 #include "dat.h" 5 #include "fns.h" 6 #include "../port/error.h" 7 #include "edf.h" 8 #include <trace.h> 9 10 int schedgain = 30; /* units in seconds */ 11 int nrdy; 12 Ref noteidalloc; 13 14 void updatecpu(Proc*); 15 int reprioritize(Proc*); 16 17 ulong delayedscheds; /* statistics */ 18 long skipscheds; 19 long preempts; 20 ulong load; 21 22 static Ref pidalloc; 23 24 static struct Procalloc 25 { 26 Lock; 27 Proc* ht[128]; 28 Proc* arena; 29 Proc* free; 30 } procalloc; 31 32 enum 33 { 34 Q=10, 35 DQ=4, 36 Scaling=2, 37 }; 38 39 Schedq runq[Nrq]; 40 ulong runvec; 41 42 char *statename[] = 43 { /* BUG: generate automatically */ 44 "Dead", 45 "Moribund", 46 "Ready", 47 "Scheding", 48 "Running", 49 "Queueing", 50 "QueueingR", 51 "QueueingW", 52 "Wakeme", 53 "Broken", 54 "Stopped", 55 "Rendez", 56 "Waitrelease", 57 }; 58 59 static void pidhash(Proc*); 60 static void pidunhash(Proc*); 61 static void rebalance(void); 62 63 /* 64 * Always splhi()'ed. 65 */ 66 void 67 schedinit(void) /* never returns */ 68 { 69 Edf *e; 70 71 setlabel(&m->sched); 72 if(up) { 73 if((e = up->edf) && (e->flags & Admitted)) 74 edfrecord(up); 75 m->proc = 0; 76 switch(up->state) { 77 case Running: 78 ready(up); 79 break; 80 case Moribund: 81 up->state = Dead; 82 edfstop(up); 83 if (up->edf) 84 free(up->edf); 85 up->edf = nil; 86 87 /* 88 * Holding locks from pexit: 89 * procalloc 90 * palloc 91 */ 92 mmurelease(up); 93 94 up->qnext = procalloc.free; 95 procalloc.free = up; 96 97 unlock(&palloc); 98 unlock(&procalloc); 99 break; 100 } 101 up->mach = nil; 102 updatecpu(up); 103 up = nil; 104 } 105 sched(); 106 } 107 108 /* 109 * If changing this routine, look also at sleep(). It 110 * contains a copy of the guts of sched(). 111 */ 112 void 113 sched(void) 114 { 115 Proc *p; 116 117 if(m->ilockdepth) 118 panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", 119 m->machno, 120 m->ilockdepth, 121 up? up->lastilock: nil, 122 (up && up->lastilock)? up->lastilock->pc: 0, 123 getcallerpc(&p+2)); 124 if(up){ 125 /* 126 * Delay the sched until the process gives up the locks 127 * it is holding. This avoids dumb lock loops. 128 * Don't delay if the process is Moribund. 129 * It called sched to die. 130 * But do sched eventually. This avoids a missing unlock 131 * from hanging the entire kernel. 132 * But don't reschedule procs holding palloc or procalloc. 133 * Those are far too important to be holding while asleep. 134 * 135 * This test is not exact. There can still be a few instructions 136 * in the middle of taslock when a process holds a lock 137 * but Lock.p has not yet been initialized. 138 */ 139 if(up->nlocks.ref) 140 if(up->state != Moribund) 141 if(up->delaysched < 20 142 || palloc.Lock.p == up 143 || procalloc.Lock.p == up){ 144 up->delaysched++; 145 delayedscheds++; 146 return; 147 } 148 up->delaysched = 0; 149 150 splhi(); 151 152 /* statistics */ 153 m->cs++; 154 155 procsave(up); 156 if(setlabel(&up->sched)){ 157 procrestore(up); 158 spllo(); 159 return; 160 } 161 gotolabel(&m->sched); 162 } 163 p = runproc(); 164 if(!p->edf){ 165 updatecpu(p); 166 p->priority = reprioritize(p); 167 } 168 if(p != m->readied) 169 m->schedticks = m->ticks + HZ/10; 170 m->readied = 0; 171 up = p; 172 up->state = Running; 173 up->mach = MACHP(m->machno); 174 m->proc = up; 175 mmuswitch(up); 176 gotolabel(&up->sched); 177 } 178 179 int 180 anyready(void) 181 { 182 return runvec; 183 } 184 185 int 186 anyhigher(void) 187 { 188 return runvec & ~((1<<(up->priority+1))-1); 189 } 190 191 /* 192 * here once per clock tick to see if we should resched 193 */ 194 void 195 hzsched(void) 196 { 197 /* once a second, rebalance will reprioritize ready procs */ 198 if(m->machno == 0) 199 rebalance(); 200 201 /* unless preempted, get to run for at least 100ms */ 202 if(anyhigher() 203 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){ 204 m->readied = nil; /* avoid cooperative scheduling */ 205 up->delaysched++; 206 } 207 } 208 209 /* 210 * here at the end of non-clock interrupts to see if we should preempt the 211 * current process. Returns 1 if preempted, 0 otherwise. 212 */ 213 int 214 preempted(void) 215 { 216 if(up && up->state == Running) 217 if(up->preempted == 0) 218 if(anyhigher()) 219 if(!active.exiting){ 220 m->readied = nil; /* avoid cooperative scheduling */ 221 up->preempted = 1; 222 sched(); 223 splhi(); 224 up->preempted = 0; 225 return 1; 226 } 227 return 0; 228 } 229 230 /* 231 * Update the cpu time average for this particular process, 232 * which is about to change from up -> not up or vice versa. 233 * p->lastupdate is the last time an updatecpu happened. 234 * 235 * The cpu time average is a decaying average that lasts 236 * about D clock ticks. D is chosen to be approximately 237 * the cpu time of a cpu-intensive "quick job". A job has to run 238 * for approximately D clock ticks before we home in on its 239 * actual cpu usage. Thus if you manage to get in and get out 240 * quickly, you won't be penalized during your burst. Once you 241 * start using your share of the cpu for more than about D 242 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 243 * below all the other quick jobs. Interactive tasks, because 244 * they basically always use less than their fair share of cpu, 245 * will be rewarded. 246 * 247 * If the process has not been running, then we want to 248 * apply the filter 249 * 250 * cpu = cpu * (D-1)/D 251 * 252 * n times, yielding 253 * 254 * cpu = cpu * ((D-1)/D)^n 255 * 256 * but D is big enough that this is approximately 257 * 258 * cpu = cpu * (D-n)/D 259 * 260 * so we use that instead. 261 * 262 * If the process has been running, we apply the filter to 263 * 1 - cpu, yielding a similar equation. Note that cpu is 264 * stored in fixed point (* 1000). 265 * 266 * Updatecpu must be called before changing up, in order 267 * to maintain accurate cpu usage statistics. It can be called 268 * at any time to bring the stats for a given proc up-to-date. 269 */ 270 void 271 updatecpu(Proc *p) 272 { 273 int n, t, ocpu; 274 int D = schedgain*HZ*Scaling; 275 276 if(p->edf) 277 return; 278 279 t = MACHP(0)->ticks*Scaling + Scaling/2; 280 n = t - p->lastupdate; 281 p->lastupdate = t; 282 283 if(n == 0) 284 return; 285 if(n > D) 286 n = D; 287 288 ocpu = p->cpu; 289 if(p != up) 290 p->cpu = (ocpu*(D-n))/D; 291 else{ 292 t = 1000 - ocpu; 293 t = (t*(D-n))/D; 294 p->cpu = 1000 - t; 295 } 296 297 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); 298 } 299 300 /* 301 * On average, p has used p->cpu of a cpu recently. 302 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting 303 * too much, penalize it. If it has been getting not enough, reward it. 304 * I don't think you can get much more than your fair share that 305 * often, so most of the queues are for using less. Having a priority 306 * of 3 means you're just right. Having a higher priority (up to p->basepri) 307 * means you're not using as much as you could. 308 */ 309 int 310 reprioritize(Proc *p) 311 { 312 int fairshare, n, load, ratio; 313 314 load = MACHP(0)->load; 315 if(load == 0) 316 return p->basepri; 317 318 /* 319 * fairshare = 1.000 * conf.nproc * 1.000/load, 320 * except the decimal point is moved three places 321 * on both load and fairshare. 322 */ 323 fairshare = (conf.nmach*1000*1000)/load; 324 n = p->cpu; 325 if(n == 0) 326 n = 1; 327 ratio = (fairshare+n/2) / n; 328 if(ratio > p->basepri) 329 ratio = p->basepri; 330 if(ratio < 0) 331 panic("reprioritize"); 332 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); 333 return ratio; 334 } 335 336 /* 337 * add a process to a scheduling queue 338 */ 339 void 340 queueproc(Schedq *rq, Proc *p) 341 { 342 int pri; 343 344 pri = rq - runq; 345 lock(runq); 346 p->priority = pri; 347 p->rnext = 0; 348 if(rq->tail) 349 rq->tail->rnext = p; 350 else 351 rq->head = p; 352 rq->tail = p; 353 rq->n++; 354 nrdy++; 355 runvec |= 1<<pri; 356 unlock(runq); 357 } 358 359 /* 360 * try to remove a process from a scheduling queue (called splhi) 361 */ 362 Proc* 363 dequeueproc(Schedq *rq, Proc *tp) 364 { 365 Proc *l, *p; 366 367 if(!canlock(runq)) 368 return nil; 369 370 /* 371 * the queue may have changed before we locked runq, 372 * refind the target process. 373 */ 374 l = 0; 375 for(p = rq->head; p; p = p->rnext){ 376 if(p == tp) 377 break; 378 l = p; 379 } 380 381 /* 382 * p->mach==0 only when process state is saved 383 */ 384 if(p == 0 || p->mach){ 385 unlock(runq); 386 return nil; 387 } 388 if(p->rnext == 0) 389 rq->tail = l; 390 if(l) 391 l->rnext = p->rnext; 392 else 393 rq->head = p->rnext; 394 if(rq->head == nil) 395 runvec &= ~(1<<(rq-runq)); 396 rq->n--; 397 nrdy--; 398 if(p->state != Ready) 399 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]); 400 401 unlock(runq); 402 return p; 403 } 404 405 /* 406 * ready(p) picks a new priority for a process and sticks it in the 407 * runq for that priority. 408 */ 409 void 410 ready(Proc *p) 411 { 412 int s, pri; 413 Schedq *rq; 414 void (*pt)(Proc*, int, vlong); 415 416 s = splhi(); 417 if(edfready(p)){ 418 splx(s); 419 return; 420 } 421 422 if(up != p) 423 m->readied = p; /* group scheduling */ 424 425 updatecpu(p); 426 pri = reprioritize(p); 427 p->priority = pri; 428 rq = &runq[pri]; 429 p->state = Ready; 430 queueproc(rq, p); 431 pt = proctrace; 432 if(pt) 433 pt(p, SReady, 0); 434 splx(s); 435 } 436 437 /* 438 * yield the processor and drop our priority 439 */ 440 void 441 yield(void) 442 { 443 if(anyready()){ 444 /* pretend we just used 1/2 tick */ 445 up->lastupdate -= Scaling/2; 446 sched(); 447 } 448 } 449 450 /* 451 * recalculate priorities once a second. We need to do this 452 * since priorities will otherwise only be recalculated when 453 * the running process blocks. 454 */ 455 ulong balancetime; 456 457 static void 458 rebalance(void) 459 { 460 int pri, npri, t, x; 461 Schedq *rq; 462 Proc *p; 463 464 t = m->ticks; 465 if(t - balancetime < HZ) 466 return; 467 balancetime = t; 468 469 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){ 470 another: 471 p = rq->head; 472 if(p == nil) 473 continue; 474 if(p->mp != MACHP(m->machno)) 475 continue; 476 if(pri == p->basepri) 477 continue; 478 updatecpu(p); 479 npri = reprioritize(p); 480 if(npri != pri){ 481 x = splhi(); 482 p = dequeueproc(rq, p); 483 if(p) 484 queueproc(&runq[npri], p); 485 splx(x); 486 goto another; 487 } 488 } 489 } 490 491 492 /* 493 * pick a process to run 494 */ 495 Proc* 496 runproc(void) 497 { 498 Schedq *rq; 499 Proc *p; 500 ulong start, now; 501 int i; 502 void (*pt)(Proc*, int, vlong); 503 504 start = perfticks(); 505 506 /* cooperative scheduling until the clock ticks */ 507 if((p=m->readied) && p->mach==0 && p->state==Ready 508 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ 509 skipscheds++; 510 rq = &runq[p->priority]; 511 goto found; 512 } 513 514 preempts++; 515 516 loop: 517 /* 518 * find a process that last ran on this processor (affinity), 519 * or one that hasn't moved in a while (load balancing). Every 520 * time around the loop affinity goes down. 521 */ 522 spllo(); 523 for(i = 0;; i++){ 524 /* 525 * find the highest priority target process that this 526 * processor can run given affinity constraints. 527 * 528 */ 529 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 530 for(p = rq->head; p; p = p->rnext){ 531 if(p->mp == nil || p->mp == MACHP(m->machno) 532 || (!p->wired && i > 0)) 533 goto found; 534 } 535 } 536 537 /* waste time or halt the CPU */ 538 idlehands(); 539 540 /* remember how much time we're here */ 541 now = perfticks(); 542 m->perf.inidle += now-start; 543 start = now; 544 } 545 546 found: 547 splhi(); 548 p = dequeueproc(rq, p); 549 if(p == nil) 550 goto loop; 551 552 p->state = Scheding; 553 p->mp = MACHP(m->machno); 554 555 if(edflock(p)){ 556 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ 557 edfunlock(); 558 } 559 pt = proctrace; 560 if(pt) 561 pt(p, SRun, 0); 562 return p; 563 } 564 565 int 566 canpage(Proc *p) 567 { 568 int ok = 0; 569 570 splhi(); 571 lock(runq); 572 /* Only reliable way to see if we are Running */ 573 if(p->mach == 0) { 574 p->newtlb = 1; 575 ok = 1; 576 } 577 unlock(runq); 578 spllo(); 579 580 return ok; 581 } 582 583 Proc* 584 newproc(void) 585 { 586 char msg[64]; 587 Proc *p; 588 589 lock(&procalloc); 590 for(;;) { 591 if(p = procalloc.free) 592 break; 593 594 snprint(msg, sizeof msg, "no procs; %s forking", 595 up? up->text: "kernel"); 596 unlock(&procalloc); 597 resrcwait(msg); 598 lock(&procalloc); 599 } 600 procalloc.free = p->qnext; 601 unlock(&procalloc); 602 603 p->state = Scheding; 604 p->psstate = "New"; 605 p->mach = 0; 606 p->qnext = 0; 607 p->nchild = 0; 608 p->nwait = 0; 609 p->waitq = 0; 610 p->parent = 0; 611 p->pgrp = 0; 612 p->egrp = 0; 613 p->fgrp = 0; 614 p->rgrp = 0; 615 p->pdbg = 0; 616 p->fpstate = FPinit; 617 p->kp = 0; 618 if(up && up->procctl == Proc_tracesyscall) 619 p->procctl = Proc_tracesyscall; 620 else 621 p->procctl = 0; 622 p->syscalltrace = 0; 623 p->notepending = 0; 624 p->ureg = 0; 625 p->privatemem = 0; 626 p->noswap = 0; 627 p->errstr = p->errbuf0; 628 p->syserrstr = p->errbuf1; 629 p->errbuf0[0] = '\0'; 630 p->errbuf1[0] = '\0'; 631 p->nlocks.ref = 0; 632 p->delaysched = 0; 633 p->trace = 0; 634 kstrdup(&p->user, "*nouser"); 635 kstrdup(&p->text, "*notext"); 636 kstrdup(&p->args, ""); 637 p->nargs = 0; 638 p->setargs = 0; 639 memset(p->seg, 0, sizeof p->seg); 640 p->pid = incref(&pidalloc); 641 pidhash(p); 642 p->noteid = incref(¬eidalloc); 643 if(p->pid==0 || p->noteid==0) 644 panic("pidalloc"); 645 if(p->kstack == 0) 646 p->kstack = smalloc(KSTACK); 647 648 /* sched params */ 649 p->mp = 0; 650 p->wired = 0; 651 procpriority(p, PriNormal, 0); 652 p->cpu = 0; 653 p->lastupdate = MACHP(0)->ticks*Scaling; 654 p->edf = nil; 655 656 return p; 657 } 658 659 /* 660 * wire this proc to a machine 661 */ 662 void 663 procwired(Proc *p, int bm) 664 { 665 Proc *pp; 666 int i; 667 char nwired[MAXMACH]; 668 Mach *wm; 669 670 if(bm < 0){ 671 /* pick a machine to wire to */ 672 memset(nwired, 0, sizeof(nwired)); 673 p->wired = 0; 674 pp = proctab(0); 675 for(i=0; i<conf.nproc; i++, pp++){ 676 wm = pp->wired; 677 if(wm && pp->pid) 678 nwired[wm->machno]++; 679 } 680 bm = 0; 681 for(i=0; i<conf.nmach; i++) 682 if(nwired[i] < nwired[bm]) 683 bm = i; 684 } else { 685 /* use the virtual machine requested */ 686 bm = bm % conf.nmach; 687 } 688 689 p->wired = MACHP(bm); 690 p->mp = p->wired; 691 } 692 693 void 694 procpriority(Proc *p, int pri, int fixed) 695 { 696 if(pri >= Npriq) 697 pri = Npriq - 1; 698 else if(pri < 0) 699 pri = 0; 700 p->basepri = pri; 701 p->priority = pri; 702 if(fixed){ 703 p->fixedpri = 1; 704 } else { 705 p->fixedpri = 0; 706 } 707 } 708 709 void 710 procinit0(void) /* bad planning - clashes with devproc.c */ 711 { 712 Proc *p; 713 int i; 714 715 procalloc.free = xalloc(conf.nproc*sizeof(Proc)); 716 if(procalloc.free == nil){ 717 xsummary(); 718 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024)); 719 } 720 procalloc.arena = procalloc.free; 721 722 p = procalloc.free; 723 for(i=0; i<conf.nproc-1; i++,p++) 724 p->qnext = p+1; 725 p->qnext = 0; 726 } 727 728 /* 729 * sleep if a condition is not true. Another process will 730 * awaken us after it sets the condition. When we awaken 731 * the condition may no longer be true. 732 * 733 * we lock both the process and the rendezvous to keep r->p 734 * and p->r synchronized. 735 */ 736 void 737 sleep(Rendez *r, int (*f)(void*), void *arg) 738 { 739 int s; 740 void (*pt)(Proc*, int, vlong); 741 742 s = splhi(); 743 744 if(up->nlocks.ref) 745 print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n", 746 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r)); 747 lock(r); 748 lock(&up->rlock); 749 if(r->p){ 750 print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid); 751 dumpstack(); 752 } 753 754 /* 755 * Wakeup only knows there may be something to do by testing 756 * r->p in order to get something to lock on. 757 * Flush that information out to memory in case the sleep is 758 * committed. 759 */ 760 r->p = up; 761 762 if((*f)(arg) || up->notepending){ 763 /* 764 * if condition happened or a note is pending 765 * never mind 766 */ 767 r->p = nil; 768 unlock(&up->rlock); 769 unlock(r); 770 } else { 771 /* 772 * now we are committed to 773 * change state and call scheduler 774 */ 775 pt = proctrace; 776 if(pt) 777 pt(up, SSleep, 0); 778 up->state = Wakeme; 779 up->r = r; 780 781 /* statistics */ 782 m->cs++; 783 784 procsave(up); 785 if(setlabel(&up->sched)) { 786 /* 787 * here when the process is awakened 788 */ 789 procrestore(up); 790 spllo(); 791 } else { 792 /* 793 * here to go to sleep (i.e. stop Running) 794 */ 795 unlock(&up->rlock); 796 unlock(r); 797 gotolabel(&m->sched); 798 } 799 } 800 801 if(up->notepending) { 802 up->notepending = 0; 803 splx(s); 804 if(up->procctl == Proc_exitme && up->closingfgrp) 805 forceclosefgrp(); 806 error(Eintr); 807 } 808 809 splx(s); 810 } 811 812 static int 813 tfn(void *arg) 814 { 815 return up->trend == nil || up->tfn(arg); 816 } 817 818 void 819 twakeup(Ureg*, Timer *t) 820 { 821 Proc *p; 822 Rendez *trend; 823 824 p = t->ta; 825 trend = p->trend; 826 p->trend = 0; 827 if(trend) 828 wakeup(trend); 829 } 830 831 void 832 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) 833 { 834 if (up->tt){ 835 print("tsleep: timer active: mode %d, tf %#p\n", up->tmode, up->tf); 836 timerdel(up); 837 } 838 up->tns = MS2NS(ms); 839 up->tf = twakeup; 840 up->tmode = Trelative; 841 up->ta = up; 842 up->trend = r; 843 up->tfn = fn; 844 timeradd(up); 845 846 if(waserror()){ 847 timerdel(up); 848 nexterror(); 849 } 850 sleep(r, tfn, arg); 851 if (up->tt) 852 timerdel(up); 853 up->twhen = 0; 854 poperror(); 855 } 856 857 /* 858 * Expects that only one process can call wakeup for any given Rendez. 859 * We hold both locks to ensure that r->p and p->r remain consistent. 860 * Richard Miller has a better solution that doesn't require both to 861 * be held simultaneously, but I'm a paranoid - presotto. 862 */ 863 Proc* 864 wakeup(Rendez *r) 865 { 866 Proc *p; 867 int s; 868 869 s = splhi(); 870 871 lock(r); 872 p = r->p; 873 874 if(p != nil){ 875 lock(&p->rlock); 876 if(p->state != Wakeme || p->r != r){ 877 iprint("%p %p %d\n", p->r, r, p->state); 878 panic("wakeup: state"); 879 } 880 r->p = nil; 881 p->r = nil; 882 ready(p); 883 unlock(&p->rlock); 884 } 885 unlock(r); 886 887 splx(s); 888 889 return p; 890 } 891 892 /* 893 * if waking a sleeping process, this routine must hold both 894 * p->rlock and r->lock. However, it can't know them in 895 * the same order as wakeup causing a possible lock ordering 896 * deadlock. We break the deadlock by giving up the p->rlock 897 * lock if we can't get the r->lock and retrying. 898 */ 899 int 900 postnote(Proc *p, int dolock, char *n, int flag) 901 { 902 int s, ret; 903 Rendez *r; 904 Proc *d, **l; 905 906 if(dolock) 907 qlock(&p->debug); 908 909 if(flag != NUser && (p->notify == 0 || p->notified)) 910 p->nnote = 0; 911 912 ret = 0; 913 if(p->nnote < NNOTE) { 914 strcpy(p->note[p->nnote].msg, n); 915 p->note[p->nnote++].flag = flag; 916 ret = 1; 917 } 918 p->notepending = 1; 919 if(dolock) 920 qunlock(&p->debug); 921 922 /* this loop is to avoid lock ordering problems. */ 923 for(;;){ 924 s = splhi(); 925 lock(&p->rlock); 926 r = p->r; 927 928 /* waiting for a wakeup? */ 929 if(r == nil) 930 break; /* no */ 931 932 /* try for the second lock */ 933 if(canlock(r)){ 934 if(p->state != Wakeme || r->p != p) 935 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); 936 p->r = nil; 937 r->p = nil; 938 ready(p); 939 unlock(r); 940 break; 941 } 942 943 /* give other process time to get out of critical section and try again */ 944 unlock(&p->rlock); 945 splx(s); 946 sched(); 947 } 948 unlock(&p->rlock); 949 splx(s); 950 951 if(p->state != Rendezvous) 952 return ret; 953 954 /* Try and pull out of a rendezvous */ 955 lock(p->rgrp); 956 if(p->state == Rendezvous) { 957 p->rendval = ~0; 958 l = &REND(p->rgrp, p->rendtag); 959 for(d = *l; d; d = d->rendhash) { 960 if(d == p) { 961 *l = p->rendhash; 962 break; 963 } 964 l = &d->rendhash; 965 } 966 ready(p); 967 } 968 unlock(p->rgrp); 969 return ret; 970 } 971 972 /* 973 * weird thing: keep at most NBROKEN around 974 */ 975 #define NBROKEN 4 976 struct 977 { 978 QLock; 979 int n; 980 Proc *p[NBROKEN]; 981 }broken; 982 983 void 984 addbroken(Proc *p) 985 { 986 qlock(&broken); 987 if(broken.n == NBROKEN) { 988 ready(broken.p[0]); 989 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); 990 --broken.n; 991 } 992 broken.p[broken.n++] = p; 993 qunlock(&broken); 994 995 edfstop(up); 996 p->state = Broken; 997 p->psstate = 0; 998 sched(); 999 } 1000 1001 void 1002 unbreak(Proc *p) 1003 { 1004 int b; 1005 1006 qlock(&broken); 1007 for(b=0; b < broken.n; b++) 1008 if(broken.p[b] == p) { 1009 broken.n--; 1010 memmove(&broken.p[b], &broken.p[b+1], 1011 sizeof(Proc*)*(NBROKEN-(b+1))); 1012 ready(p); 1013 break; 1014 } 1015 qunlock(&broken); 1016 } 1017 1018 int 1019 freebroken(void) 1020 { 1021 int i, n; 1022 1023 qlock(&broken); 1024 n = broken.n; 1025 for(i=0; i<n; i++) { 1026 ready(broken.p[i]); 1027 broken.p[i] = 0; 1028 } 1029 broken.n = 0; 1030 qunlock(&broken); 1031 return n; 1032 } 1033 1034 void 1035 pexit(char *exitstr, int freemem) 1036 { 1037 Proc *p; 1038 Segment **s, **es; 1039 long utime, stime; 1040 Waitq *wq, *f, *next; 1041 Fgrp *fgrp; 1042 Egrp *egrp; 1043 Rgrp *rgrp; 1044 Pgrp *pgrp; 1045 Chan *dot; 1046 void (*pt)(Proc*, int, vlong); 1047 1048 if(up->syscalltrace) 1049 free(up->syscalltrace); 1050 up->alarm = 0; 1051 if (up->tt) 1052 timerdel(up); 1053 pt = proctrace; 1054 if(pt) 1055 pt(up, SDead, 0); 1056 1057 /* nil out all the resources under lock (free later) */ 1058 qlock(&up->debug); 1059 fgrp = up->fgrp; 1060 up->fgrp = nil; 1061 egrp = up->egrp; 1062 up->egrp = nil; 1063 rgrp = up->rgrp; 1064 up->rgrp = nil; 1065 pgrp = up->pgrp; 1066 up->pgrp = nil; 1067 dot = up->dot; 1068 up->dot = nil; 1069 qunlock(&up->debug); 1070 1071 if(fgrp) 1072 closefgrp(fgrp); 1073 if(egrp) 1074 closeegrp(egrp); 1075 if(rgrp) 1076 closergrp(rgrp); 1077 if(dot) 1078 cclose(dot); 1079 if(pgrp) 1080 closepgrp(pgrp); 1081 1082 /* 1083 * if not a kernel process and have a parent, 1084 * do some housekeeping. 1085 */ 1086 if(up->kp == 0) { 1087 p = up->parent; 1088 if(p == 0) { 1089 if(exitstr == 0) 1090 exitstr = "unknown"; 1091 panic("boot process died: %s", exitstr); 1092 } 1093 1094 while(waserror()) 1095 ; 1096 1097 wq = smalloc(sizeof(Waitq)); 1098 poperror(); 1099 1100 wq->w.pid = up->pid; 1101 utime = up->time[TUser] + up->time[TCUser]; 1102 stime = up->time[TSys] + up->time[TCSys]; 1103 wq->w.time[TUser] = tk2ms(utime); 1104 wq->w.time[TSys] = tk2ms(stime); 1105 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]); 1106 if(exitstr && exitstr[0]) 1107 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr); 1108 else 1109 wq->w.msg[0] = '\0'; 1110 1111 lock(&p->exl); 1112 /* 1113 * Check that parent is still alive. 1114 */ 1115 if(p->pid == up->parentpid && p->state != Broken) { 1116 p->nchild--; 1117 p->time[TCUser] += utime; 1118 p->time[TCSys] += stime; 1119 /* 1120 * If there would be more than 128 wait records 1121 * processes for my parent, then don't leave a wait 1122 * record behind. This helps prevent badly written 1123 * daemon processes from accumulating lots of wait 1124 * records. 1125 */ 1126 if(p->nwait < 128) { 1127 wq->next = p->waitq; 1128 p->waitq = wq; 1129 p->nwait++; 1130 wq = nil; 1131 wakeup(&p->waitr); 1132 } 1133 } 1134 unlock(&p->exl); 1135 if(wq) 1136 free(wq); 1137 } 1138 1139 if(!freemem) 1140 addbroken(up); 1141 1142 qlock(&up->seglock); 1143 es = &up->seg[NSEG]; 1144 for(s = up->seg; s < es; s++) { 1145 if(*s) { 1146 putseg(*s); 1147 *s = 0; 1148 } 1149 } 1150 qunlock(&up->seglock); 1151 1152 lock(&up->exl); /* Prevent my children from leaving waits */ 1153 pidunhash(up); 1154 up->pid = 0; 1155 wakeup(&up->waitr); 1156 unlock(&up->exl); 1157 1158 for(f = up->waitq; f; f = next) { 1159 next = f->next; 1160 free(f); 1161 } 1162 1163 /* release debuggers */ 1164 qlock(&up->debug); 1165 if(up->pdbg) { 1166 wakeup(&up->pdbg->sleep); 1167 up->pdbg = 0; 1168 } 1169 qunlock(&up->debug); 1170 1171 /* Sched must not loop for these locks */ 1172 lock(&procalloc); 1173 lock(&palloc); 1174 1175 edfstop(up); 1176 up->state = Moribund; 1177 sched(); 1178 panic("pexit"); 1179 } 1180 1181 int 1182 haswaitq(void *x) 1183 { 1184 Proc *p; 1185 1186 p = (Proc *)x; 1187 return p->waitq != 0; 1188 } 1189 1190 ulong 1191 pwait(Waitmsg *w) 1192 { 1193 ulong cpid; 1194 Waitq *wq; 1195 1196 if(!canqlock(&up->qwaitr)) 1197 error(Einuse); 1198 1199 if(waserror()) { 1200 qunlock(&up->qwaitr); 1201 nexterror(); 1202 } 1203 1204 lock(&up->exl); 1205 if(up->nchild == 0 && up->waitq == 0) { 1206 unlock(&up->exl); 1207 error(Enochild); 1208 } 1209 unlock(&up->exl); 1210 1211 sleep(&up->waitr, haswaitq, up); 1212 1213 lock(&up->exl); 1214 wq = up->waitq; 1215 up->waitq = wq->next; 1216 up->nwait--; 1217 unlock(&up->exl); 1218 1219 qunlock(&up->qwaitr); 1220 poperror(); 1221 1222 if(w) 1223 memmove(w, &wq->w, sizeof(Waitmsg)); 1224 cpid = wq->w.pid; 1225 free(wq); 1226 return cpid; 1227 } 1228 1229 Proc* 1230 proctab(int i) 1231 { 1232 return &procalloc.arena[i]; 1233 } 1234 1235 void 1236 dumpaproc(Proc *p) 1237 { 1238 ulong bss; 1239 char *s; 1240 1241 if(p == 0) 1242 return; 1243 1244 bss = 0; 1245 if(p->seg[BSEG]) 1246 bss = p->seg[BSEG]->top; 1247 1248 s = p->psstate; 1249 if(s == 0) 1250 s = statename[p->state]; 1251 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n", 1252 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], 1253 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority); 1254 } 1255 1256 void 1257 procdump(void) 1258 { 1259 int i; 1260 Proc *p; 1261 1262 if(up) 1263 print("up %lud\n", up->pid); 1264 else 1265 print("no current process\n"); 1266 for(i=0; i<conf.nproc; i++) { 1267 p = &procalloc.arena[i]; 1268 if(p->state == Dead) 1269 continue; 1270 1271 dumpaproc(p); 1272 } 1273 } 1274 1275 /* 1276 * wait till all processes have flushed their mmu 1277 * state about segement s 1278 */ 1279 void 1280 procflushseg(Segment *s) 1281 { 1282 int i, ns, nm, nwait; 1283 Proc *p; 1284 1285 /* 1286 * tell all processes with this 1287 * segment to flush their mmu's 1288 */ 1289 nwait = 0; 1290 for(i=0; i<conf.nproc; i++) { 1291 p = &procalloc.arena[i]; 1292 if(p->state == Dead) 1293 continue; 1294 for(ns = 0; ns < NSEG; ns++) 1295 if(p->seg[ns] == s){ 1296 p->newtlb = 1; 1297 for(nm = 0; nm < conf.nmach; nm++){ 1298 if(MACHP(nm)->proc == p){ 1299 MACHP(nm)->flushmmu = 1; 1300 nwait++; 1301 } 1302 } 1303 break; 1304 } 1305 } 1306 1307 if(nwait == 0) 1308 return; 1309 1310 /* 1311 * wait for all processors to take a clock interrupt 1312 * and flush their mmu's 1313 */ 1314 for(nm = 0; nm < conf.nmach; nm++) 1315 if(MACHP(nm) != m) 1316 while(MACHP(nm)->flushmmu) 1317 sched(); 1318 } 1319 1320 void 1321 scheddump(void) 1322 { 1323 Proc *p; 1324 Schedq *rq; 1325 1326 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 1327 if(rq->head == 0) 1328 continue; 1329 print("rq%ld:", rq-runq); 1330 for(p = rq->head; p; p = p->rnext) 1331 print(" %lud(%lud)", p->pid, m->ticks - p->readytime); 1332 print("\n"); 1333 delay(150); 1334 } 1335 print("nrdy %d\n", nrdy); 1336 } 1337 1338 void 1339 kproc(char *name, void (*func)(void *), void *arg) 1340 { 1341 Proc *p; 1342 static Pgrp *kpgrp; 1343 1344 p = newproc(); 1345 p->psstate = 0; 1346 p->procmode = 0640; 1347 p->kp = 1; 1348 p->noswap = 1; 1349 1350 p->fpsave = up->fpsave; 1351 p->scallnr = up->scallnr; 1352 p->s = up->s; 1353 p->nerrlab = 0; 1354 p->slash = up->slash; 1355 p->dot = up->dot; 1356 if(p->dot) 1357 incref(p->dot); 1358 1359 memmove(p->note, up->note, sizeof(p->note)); 1360 p->nnote = up->nnote; 1361 p->notified = 0; 1362 p->lastnote = up->lastnote; 1363 p->notify = up->notify; 1364 p->ureg = 0; 1365 p->dbgreg = 0; 1366 1367 procpriority(p, PriKproc, 0); 1368 1369 kprocchild(p, func, arg); 1370 1371 kstrdup(&p->user, eve); 1372 kstrdup(&p->text, name); 1373 if(kpgrp == 0) 1374 kpgrp = newpgrp(); 1375 p->pgrp = kpgrp; 1376 incref(kpgrp); 1377 1378 memset(p->time, 0, sizeof(p->time)); 1379 p->time[TReal] = MACHP(0)->ticks; 1380 ready(p); 1381 /* 1382 * since the bss/data segments are now shareable, 1383 * any mmu info about this process is now stale 1384 * and has to be discarded. 1385 */ 1386 p->newtlb = 1; 1387 flushmmu(); 1388 } 1389 1390 /* 1391 * called splhi() by notify(). See comment in notify for the 1392 * reasoning. 1393 */ 1394 void 1395 procctl(Proc *p) 1396 { 1397 char *state; 1398 ulong s; 1399 1400 switch(p->procctl) { 1401 case Proc_exitbig: 1402 spllo(); 1403 pexit("Killed: Insufficient physical memory", 1); 1404 1405 case Proc_exitme: 1406 spllo(); /* pexit has locks in it */ 1407 pexit("Killed", 1); 1408 1409 case Proc_traceme: 1410 if(p->nnote == 0) 1411 return; 1412 /* No break */ 1413 1414 case Proc_stopme: 1415 p->procctl = 0; 1416 state = p->psstate; 1417 p->psstate = "Stopped"; 1418 /* free a waiting debugger */ 1419 s = spllo(); 1420 qlock(&p->debug); 1421 if(p->pdbg) { 1422 wakeup(&p->pdbg->sleep); 1423 p->pdbg = 0; 1424 } 1425 qunlock(&p->debug); 1426 splhi(); 1427 p->state = Stopped; 1428 sched(); 1429 p->psstate = state; 1430 splx(s); 1431 return; 1432 } 1433 } 1434 1435 #include "errstr.h" 1436 1437 void 1438 error(char *err) 1439 { 1440 spllo(); 1441 1442 assert(up->nerrlab < NERR); 1443 kstrcpy(up->errstr, err, ERRMAX); 1444 setlabel(&up->errlab[NERR-1]); 1445 nexterror(); 1446 } 1447 1448 void 1449 nexterror(void) 1450 { 1451 gotolabel(&up->errlab[--up->nerrlab]); 1452 } 1453 1454 void 1455 exhausted(char *resource) 1456 { 1457 char buf[ERRMAX]; 1458 1459 sprint(buf, "no free %s", resource); 1460 iprint("%s\n", buf); 1461 error(buf); 1462 } 1463 1464 void 1465 killbig(char *why) 1466 { 1467 int i; 1468 Segment *s; 1469 ulong l, max; 1470 Proc *p, *ep, *kp; 1471 1472 max = 0; 1473 kp = 0; 1474 ep = procalloc.arena+conf.nproc; 1475 for(p = procalloc.arena; p < ep; p++) { 1476 if(p->state == Dead || p->kp) 1477 continue; 1478 l = 0; 1479 for(i=1; i<NSEG; i++) { 1480 s = p->seg[i]; 1481 if(s != 0) 1482 l += s->top - s->base; 1483 } 1484 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) { 1485 kp = p; 1486 max = l; 1487 } 1488 } 1489 1490 print("%lud: %s killed: %s\n", kp->pid, kp->text, why); 1491 for(p = procalloc.arena; p < ep; p++) { 1492 if(p->state == Dead || p->kp) 1493 continue; 1494 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG]) 1495 p->procctl = Proc_exitbig; 1496 } 1497 kp->procctl = Proc_exitbig; 1498 for(i = 0; i < NSEG; i++) { 1499 s = kp->seg[i]; 1500 if(s != 0 && canqlock(&s->lk)) { 1501 mfreeseg(s, s->base, (s->top - s->base)/BY2PG); 1502 qunlock(&s->lk); 1503 } 1504 } 1505 } 1506 1507 /* 1508 * change ownership to 'new' of all processes owned by 'old'. Used when 1509 * eve changes. 1510 */ 1511 void 1512 renameuser(char *old, char *new) 1513 { 1514 Proc *p, *ep; 1515 1516 ep = procalloc.arena+conf.nproc; 1517 for(p = procalloc.arena; p < ep; p++) 1518 if(p->user!=nil && strcmp(old, p->user)==0) 1519 kstrdup(&p->user, new); 1520 } 1521 1522 /* 1523 * time accounting called by clock() splhi'd 1524 */ 1525 void 1526 accounttime(void) 1527 { 1528 Proc *p; 1529 ulong n, per; 1530 static ulong nrun; 1531 1532 p = m->proc; 1533 if(p) { 1534 nrun++; 1535 p->time[p->insyscall]++; 1536 } 1537 1538 /* calculate decaying duty cycles */ 1539 n = perfticks(); 1540 per = n - m->perf.last; 1541 m->perf.last = n; 1542 per = (m->perf.period*(HZ-1) + per)/HZ; 1543 if(per != 0) 1544 m->perf.period = per; 1545 1546 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; 1547 m->perf.inidle = 0; 1548 1549 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; 1550 m->perf.inintr = 0; 1551 1552 /* only one processor gets to compute system load averages */ 1553 if(m->machno != 0) 1554 return; 1555 1556 /* 1557 * calculate decaying load average. 1558 * if we decay by (n-1)/n then it takes 1559 * n clock ticks to go from load L to .36 L once 1560 * things quiet down. it takes about 5 n clock 1561 * ticks to go to zero. so using HZ means this is 1562 * approximately the load over the last second, 1563 * with a tail lasting about 5 seconds. 1564 */ 1565 n = nrun; 1566 nrun = 0; 1567 n = (nrdy+n)*1000; 1568 m->load = (m->load*(HZ-1)+n)/HZ; 1569 } 1570 1571 static void 1572 pidhash(Proc *p) 1573 { 1574 int h; 1575 1576 h = p->pid % nelem(procalloc.ht); 1577 lock(&procalloc); 1578 p->pidhash = procalloc.ht[h]; 1579 procalloc.ht[h] = p; 1580 unlock(&procalloc); 1581 } 1582 1583 static void 1584 pidunhash(Proc *p) 1585 { 1586 int h; 1587 Proc **l; 1588 1589 h = p->pid % nelem(procalloc.ht); 1590 lock(&procalloc); 1591 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) 1592 if(*l == p){ 1593 *l = p->pidhash; 1594 break; 1595 } 1596 unlock(&procalloc); 1597 } 1598 1599 int 1600 procindex(ulong pid) 1601 { 1602 Proc *p; 1603 int h; 1604 int s; 1605 1606 s = -1; 1607 h = pid % nelem(procalloc.ht); 1608 lock(&procalloc); 1609 for(p = procalloc.ht[h]; p != nil; p = p->pidhash) 1610 if(p->pid == pid){ 1611 s = p - procalloc.arena; 1612 break; 1613 } 1614 unlock(&procalloc); 1615 return s; 1616 } 1617