1 #include <u.h> 2 #include "../port/lib.h" 3 #include "mem.h" 4 #include "dat.h" 5 #include "fns.h" 6 #include "../port/error.h" 7 #include "edf.h" 8 #include <trace.h> 9 10 int coopsched; 11 int schedgain = 30; /* units in seconds */ 12 int nrdy; 13 Ref noteidalloc; 14 15 void updatecpu(Proc*); 16 int reprioritize(Proc*); 17 18 ulong delayedscheds; /* statistics */ 19 long skipscheds; 20 long preempts; 21 ulong load; 22 23 static Ref pidalloc; 24 25 static struct Procalloc 26 { 27 Lock; 28 Proc* ht[128]; 29 Proc* arena; 30 Proc* free; 31 } procalloc; 32 33 enum 34 { 35 Q=10, 36 DQ=4, 37 Scaling=2, 38 }; 39 40 Schedq runq[Nrq]; 41 ulong runvec; 42 43 char *statename[] = 44 { /* BUG: generate automatically */ 45 "Dead", 46 "Moribund", 47 "Ready", 48 "Scheding", 49 "Running", 50 "Queueing", 51 "QueueingR", 52 "QueueingW", 53 "Wakeme", 54 "Broken", 55 "Stopped", 56 "Rendez", 57 "Waitrelease", 58 }; 59 60 static void pidhash(Proc*); 61 static void pidunhash(Proc*); 62 static void rebalance(void); 63 64 /* 65 * Always splhi()'ed. 66 */ 67 void 68 schedinit(void) /* never returns */ 69 { 70 Edf *e; 71 72 setlabel(&m->sched); 73 if(up) { 74 if((e = up->edf) && (e->flags & Admitted)) 75 edfrecord(up); 76 m->proc = 0; 77 switch(up->state) { 78 case Running: 79 ready(up); 80 break; 81 case Moribund: 82 up->state = Dead; 83 edfstop(up); 84 if (up->edf) 85 free(up->edf); 86 up->edf = nil; 87 88 /* 89 * Holding locks from pexit: 90 * procalloc 91 * palloc 92 */ 93 mmurelease(up); 94 95 up->qnext = procalloc.free; 96 procalloc.free = up; 97 98 unlock(&palloc); 99 unlock(&procalloc); 100 break; 101 } 102 up->mach = nil; 103 updatecpu(up); 104 up = nil; 105 } 106 sched(); 107 } 108 109 /* 110 * If changing this routine, look also at sleep(). It 111 * contains a copy of the guts of sched(). 112 */ 113 void 114 sched(void) 115 { 116 Proc *p; 117 118 if(m->ilockdepth) 119 panic("ilockdepth %d, last lock 0x%p at 0x%lux, sched called from 0x%lux", 120 m->ilockdepth, up?up->lastilock:nil, 121 (up && up->lastilock)?up->lastilock->pc:0, 122 getcallerpc(&p+2)); 123 124 if(up){ 125 /* 126 * Delay the sched until the process gives up the locks 127 * it is holding. This avoids dumb lock loops. 128 * Don't delay if the process is Moribund. 129 * It called sched to die. 130 * But do sched eventually. This avoids a missing unlock 131 * from hanging the entire kernel. 132 * But don't reschedule procs holding palloc or procalloc. 133 * Those are far too important to be holding while asleep. 134 * 135 * This test is not exact. There can still be a few instructions 136 * in the middle of taslock when a process holds a lock 137 * but Lock.p has not yet been initialized. 138 */ 139 if(up->nlocks.ref) 140 if(up->state != Moribund) 141 if(up->delaysched < 20 142 || palloc.Lock.p == up 143 || procalloc.Lock.p == up){ 144 up->delaysched++; 145 delayedscheds++; 146 return; 147 } 148 up->delaysched = 0; 149 150 splhi(); 151 152 /* statistics */ 153 m->cs++; 154 155 procsave(up); 156 if(setlabel(&up->sched)){ 157 procrestore(up); 158 spllo(); 159 return; 160 } 161 gotolabel(&m->sched); 162 } 163 p = runproc(); 164 if(!p->edf){ 165 updatecpu(p); 166 p->priority = reprioritize(p); 167 } 168 if(p != m->readied) 169 m->schedticks = m->ticks + HZ/10; 170 m->readied = 0; 171 up = p; 172 up->state = Running; 173 up->mach = MACHP(m->machno); 174 m->proc = up; 175 mmuswitch(up); 176 gotolabel(&up->sched); 177 } 178 179 int 180 anyready(void) 181 { 182 return runvec; 183 } 184 185 int 186 anyhigher(void) 187 { 188 return runvec & ~((1<<(up->priority+1))-1); 189 } 190 191 /* 192 * here once per clock tick to see if we should resched 193 */ 194 void 195 hzsched(void) 196 { 197 /* once a second, rebalance will reprioritize ready procs */ 198 if(m->machno == 0) 199 rebalance(); 200 201 /* unless preempted, get to run for at least 100ms */ 202 if(anyhigher() 203 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){ 204 m->readied = nil; /* avoid cooperative scheduling */ 205 up->delaysched++; 206 } 207 } 208 209 /* 210 * here at the end of non-clock interrupts to see if we should preempt the 211 * current process. Returns 1 if preempted, 0 otherwise. 212 */ 213 int 214 preempted(void) 215 { 216 if(up && up->state == Running) 217 if(up->preempted == 0) 218 if(anyhigher()) 219 if(!active.exiting){ 220 m->readied = nil; /* avoid cooperative scheduling */ 221 up->preempted = 1; 222 sched(); 223 splhi(); 224 up->preempted = 0; 225 return 1; 226 } 227 return 0; 228 } 229 230 /* 231 * Update the cpu time average for this particular process, 232 * which is about to change from up -> not up or vice versa. 233 * p->lastupdate is the last time an updatecpu happened. 234 * 235 * The cpu time average is a decaying average that lasts 236 * about D clock ticks. D is chosen to be approximately 237 * the cpu time of a cpu-intensive "quick job". A job has to run 238 * for approximately D clock ticks before we home in on its 239 * actual cpu usage. Thus if you manage to get in and get out 240 * quickly, you won't be penalized during your burst. Once you 241 * start using your share of the cpu for more than about D 242 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 243 * below all the other quick jobs. Interactive tasks, because 244 * they basically always use less than their fair share of cpu, 245 * will be rewarded. 246 * 247 * If the process has not been running, then we want to 248 * apply the filter 249 * 250 * cpu = cpu * (D-1)/D 251 * 252 * n times, yielding 253 * 254 * cpu = cpu * ((D-1)/D)^n 255 * 256 * but D is big enough that this is approximately 257 * 258 * cpu = cpu * (D-n)/D 259 * 260 * so we use that instead. 261 * 262 * If the process has been running, we apply the filter to 263 * 1 - cpu, yielding a similar equation. Note that cpu is 264 * stored in fixed point (* 1000). 265 * 266 * Updatecpu must be called before changing up, in order 267 * to maintain accurate cpu usage statistics. It can be called 268 * at any time to bring the stats for a given proc up-to-date. 269 */ 270 void 271 updatecpu(Proc *p) 272 { 273 int n, t, ocpu; 274 int D = schedgain*HZ*Scaling; 275 276 if(p->edf) 277 return; 278 279 t = MACHP(0)->ticks*Scaling + Scaling/2; 280 n = t - p->lastupdate; 281 p->lastupdate = t; 282 283 if(n == 0) 284 return; 285 if(n > D) 286 n = D; 287 288 ocpu = p->cpu; 289 if(p != up) 290 p->cpu = (ocpu*(D-n))/D; 291 else{ 292 t = 1000 - ocpu; 293 t = (t*(D-n))/D; 294 p->cpu = 1000 - t; 295 } 296 297 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); 298 } 299 300 /* 301 * On average, p has used p->cpu of a cpu recently. 302 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting 303 * too much, penalize it. If it has been getting not enough, reward it. 304 * I don't think you can get much more than your fair share that 305 * often, so most of the queues are for using less. Having a priority 306 * of 3 means you're just right. Having a higher priority (up to p->basepri) 307 * means you're not using as much as you could. 308 */ 309 int 310 reprioritize(Proc *p) 311 { 312 int fairshare, n, load, ratio; 313 314 load = MACHP(0)->load; 315 if(load == 0) 316 return p->basepri; 317 318 /* 319 * fairshare = 1.000 * conf.nproc * 1.000/load, 320 * except the decimal point is moved three places 321 * on both load and fairshare. 322 */ 323 fairshare = (conf.nmach*1000*1000)/load; 324 n = p->cpu; 325 if(n == 0) 326 n = 1; 327 ratio = (fairshare+n/2) / n; 328 if(ratio > p->basepri) 329 ratio = p->basepri; 330 if(ratio < 0) 331 panic("reprioritize"); 332 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); 333 return ratio; 334 } 335 336 /* 337 * add a process to a scheduling queue 338 */ 339 void 340 queueproc(Schedq *rq, Proc *p) 341 { 342 int pri; 343 344 pri = rq - runq; 345 lock(runq); 346 p->priority = pri; 347 p->rnext = 0; 348 if(rq->tail) 349 rq->tail->rnext = p; 350 else 351 rq->head = p; 352 rq->tail = p; 353 rq->n++; 354 nrdy++; 355 runvec |= 1<<pri; 356 unlock(runq); 357 } 358 359 /* 360 * try to remove a process from a scheduling queue (called splhi) 361 */ 362 Proc* 363 dequeueproc(Schedq *rq, Proc *tp) 364 { 365 Proc *l, *p; 366 367 if(!canlock(runq)) 368 return nil; 369 370 /* 371 * the queue may have changed before we locked runq, 372 * refind the target process. 373 */ 374 l = 0; 375 for(p = rq->head; p; p = p->rnext){ 376 if(p == tp) 377 break; 378 l = p; 379 } 380 381 /* 382 * p->mach==0 only when process state is saved 383 */ 384 if(p == 0 || p->mach){ 385 unlock(runq); 386 return nil; 387 } 388 if(p->rnext == 0) 389 rq->tail = l; 390 if(l) 391 l->rnext = p->rnext; 392 else 393 rq->head = p->rnext; 394 if(rq->head == nil) 395 runvec &= ~(1<<(rq-runq)); 396 rq->n--; 397 nrdy--; 398 if(p->state != Ready) 399 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]); 400 401 unlock(runq); 402 return p; 403 } 404 405 /* 406 * ready(p) picks a new priority for a process and sticks it in the 407 * runq for that priority. 408 */ 409 void 410 ready(Proc *p) 411 { 412 int s, pri; 413 Schedq *rq; 414 void (*pt)(Proc*, int, vlong); 415 416 s = splhi(); 417 if(edfready(p)){ 418 splx(s); 419 return; 420 } 421 422 if(up != p) 423 m->readied = p; /* group scheduling */ 424 425 updatecpu(p); 426 pri = reprioritize(p); 427 p->priority = pri; 428 rq = &runq[pri]; 429 p->state = Ready; 430 queueproc(rq, p); 431 pt = proctrace; 432 if(pt) 433 pt(p, SReady, 0); 434 splx(s); 435 } 436 437 /* 438 * yield the processor and drop our priority 439 */ 440 void 441 yield(void) 442 { 443 if(anyready()){ 444 /* pretend we just used 1/2 tick */ 445 up->lastupdate -= Scaling/2; 446 sched(); 447 } 448 } 449 450 /* 451 * recalculate priorities once a second. We need to do this 452 * since priorities will otherwise only be recalculated when 453 * the running process blocks. 454 */ 455 ulong balancetime; 456 457 static void 458 rebalance(void) 459 { 460 int pri, npri, t, x; 461 Schedq *rq; 462 Proc *p; 463 464 t = m->ticks; 465 if(t - balancetime < HZ) 466 return; 467 balancetime = t; 468 469 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){ 470 another: 471 p = rq->head; 472 if(p == nil) 473 continue; 474 if(p->mp != MACHP(m->machno)) 475 continue; 476 if(pri == p->basepri) 477 continue; 478 updatecpu(p); 479 npri = reprioritize(p); 480 if(npri != pri){ 481 x = splhi(); 482 p = dequeueproc(rq, p); 483 if(p) 484 queueproc(&runq[npri], p); 485 splx(x); 486 goto another; 487 } 488 } 489 } 490 491 492 /* 493 * pick a process to run 494 */ 495 Proc* 496 runproc(void) 497 { 498 Schedq *rq; 499 Proc *p; 500 ulong start, now; 501 int i; 502 void (*pt)(Proc*, int, vlong); 503 504 start = perfticks(); 505 506 /* cooperative scheduling until the clock ticks */ 507 if(coopsched && (p=m->readied) && p->mach==0 && p->state==Ready 508 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ 509 skipscheds++; 510 rq = &runq[p->priority]; 511 goto found; 512 } 513 514 preempts++; 515 516 loop: 517 /* 518 * find a process that last ran on this processor (affinity), 519 * or one that hasn't moved in a while (load balancing). Every 520 * time around the loop affinity goes down. 521 */ 522 spllo(); 523 for(i = 0;; i++){ 524 /* 525 * find the highest priority target process that this 526 * processor can run given affinity constraints. 527 * 528 */ 529 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 530 for(p = rq->head; p; p = p->rnext){ 531 if(p->mp == nil || p->mp == MACHP(m->machno) 532 || (!p->wired && i > 0)) 533 goto found; 534 } 535 } 536 537 /* waste time or halt the CPU */ 538 idlehands(); 539 540 /* remember how much time we're here */ 541 now = perfticks(); 542 m->perf.inidle += now-start; 543 start = now; 544 } 545 546 found: 547 splhi(); 548 p = dequeueproc(rq, p); 549 if(p == nil) 550 goto loop; 551 552 p->state = Scheding; 553 p->mp = MACHP(m->machno); 554 555 if(edflock(p)){ 556 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ 557 edfunlock(); 558 } 559 pt = proctrace; 560 if(pt) 561 pt(p, SRun, 0); 562 return p; 563 } 564 565 int 566 canpage(Proc *p) 567 { 568 int ok = 0; 569 570 splhi(); 571 lock(runq); 572 /* Only reliable way to see if we are Running */ 573 if(p->mach == 0) { 574 p->newtlb = 1; 575 ok = 1; 576 } 577 unlock(runq); 578 spllo(); 579 580 return ok; 581 } 582 583 Proc* 584 newproc(void) 585 { 586 Proc *p; 587 588 lock(&procalloc); 589 for(;;) { 590 if(p = procalloc.free) 591 break; 592 593 unlock(&procalloc); 594 resrcwait("no procs"); 595 lock(&procalloc); 596 } 597 procalloc.free = p->qnext; 598 unlock(&procalloc); 599 600 p->state = Scheding; 601 p->psstate = "New"; 602 p->mach = 0; 603 p->qnext = 0; 604 p->nchild = 0; 605 p->nwait = 0; 606 p->waitq = 0; 607 p->parent = 0; 608 p->pgrp = 0; 609 p->egrp = 0; 610 p->fgrp = 0; 611 p->rgrp = 0; 612 p->pdbg = 0; 613 p->fpstate = FPinit; 614 p->kp = 0; 615 p->procctl = 0; 616 p->notepending = 0; 617 p->ureg = 0; 618 p->privatemem = 0; 619 p->noswap = 0; 620 p->errstr = p->errbuf0; 621 p->syserrstr = p->errbuf1; 622 p->errbuf0[0] = '\0'; 623 p->errbuf1[0] = '\0'; 624 p->nlocks.ref = 0; 625 p->delaysched = 0; 626 p->trace = 0; 627 kstrdup(&p->user, "*nouser"); 628 kstrdup(&p->text, "*notext"); 629 kstrdup(&p->args, ""); 630 p->nargs = 0; 631 p->setargs = 0; 632 memset(p->seg, 0, sizeof p->seg); 633 p->pid = incref(&pidalloc); 634 pidhash(p); 635 p->noteid = incref(¬eidalloc); 636 if(p->pid==0 || p->noteid==0) 637 panic("pidalloc"); 638 if(p->kstack == 0) 639 p->kstack = smalloc(KSTACK); 640 641 /* sched params */ 642 p->mp = 0; 643 p->wired = 0; 644 procpriority(p, PriNormal, 0); 645 p->cpu = 0; 646 p->lastupdate = MACHP(0)->ticks*Scaling; 647 p->edf = nil; 648 649 return p; 650 } 651 652 /* 653 * wire this proc to a machine 654 */ 655 void 656 procwired(Proc *p, int bm) 657 { 658 Proc *pp; 659 int i; 660 char nwired[MAXMACH]; 661 Mach *wm; 662 663 if(bm < 0){ 664 /* pick a machine to wire to */ 665 memset(nwired, 0, sizeof(nwired)); 666 p->wired = 0; 667 pp = proctab(0); 668 for(i=0; i<conf.nproc; i++, pp++){ 669 wm = pp->wired; 670 if(wm && pp->pid) 671 nwired[wm->machno]++; 672 } 673 bm = 0; 674 for(i=0; i<conf.nmach; i++) 675 if(nwired[i] < nwired[bm]) 676 bm = i; 677 } else { 678 /* use the virtual machine requested */ 679 bm = bm % conf.nmach; 680 } 681 682 p->wired = MACHP(bm); 683 p->mp = p->wired; 684 } 685 686 void 687 procpriority(Proc *p, int pri, int fixed) 688 { 689 if(pri >= Npriq) 690 pri = Npriq - 1; 691 else if(pri < 0) 692 pri = 0; 693 p->basepri = pri; 694 p->priority = pri; 695 if(fixed){ 696 p->fixedpri = 1; 697 } else { 698 p->fixedpri = 0; 699 } 700 } 701 702 void 703 procinit0(void) /* bad planning - clashes with devproc.c */ 704 { 705 Proc *p; 706 int i; 707 708 procalloc.free = xalloc(conf.nproc*sizeof(Proc)); 709 if(procalloc.free == nil){ 710 xsummary(); 711 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024)); 712 } 713 procalloc.arena = procalloc.free; 714 715 p = procalloc.free; 716 for(i=0; i<conf.nproc-1; i++,p++) 717 p->qnext = p+1; 718 p->qnext = 0; 719 } 720 721 /* 722 * sleep if a condition is not true. Another process will 723 * awaken us after it sets the condition. When we awaken 724 * the condition may no longer be true. 725 * 726 * we lock both the process and the rendezvous to keep r->p 727 * and p->r synchronized. 728 */ 729 void 730 sleep(Rendez *r, int (*f)(void*), void *arg) 731 { 732 int s; 733 void (*pt)(Proc*, int, vlong); 734 735 s = splhi(); 736 737 if(up->nlocks.ref) 738 print("process %lud sleeps with %lud locks held, last lock 0x%p locked at pc 0x%lux, sleep called from 0x%lux\n", 739 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r)); 740 lock(r); 741 lock(&up->rlock); 742 if(r->p){ 743 print("double sleep called from 0x%lux, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid); 744 dumpstack(); 745 } 746 747 /* 748 * Wakeup only knows there may be something to do by testing 749 * r->p in order to get something to lock on. 750 * Flush that information out to memory in case the sleep is 751 * committed. 752 */ 753 r->p = up; 754 755 if((*f)(arg) || up->notepending){ 756 /* 757 * if condition happened or a note is pending 758 * never mind 759 */ 760 r->p = nil; 761 unlock(&up->rlock); 762 unlock(r); 763 } else { 764 /* 765 * now we are committed to 766 * change state and call scheduler 767 */ 768 pt = proctrace; 769 if(pt) 770 pt(up, SSleep, 0); 771 up->state = Wakeme; 772 up->r = r; 773 774 /* statistics */ 775 m->cs++; 776 777 procsave(up); 778 if(setlabel(&up->sched)) { 779 /* 780 * here when the process is awakened 781 */ 782 procrestore(up); 783 spllo(); 784 } else { 785 /* 786 * here to go to sleep (i.e. stop Running) 787 */ 788 unlock(&up->rlock); 789 unlock(r); 790 gotolabel(&m->sched); 791 } 792 } 793 794 if(up->notepending) { 795 up->notepending = 0; 796 splx(s); 797 if(up->procctl == Proc_exitme && up->closingfgrp) 798 forceclosefgrp(); 799 error(Eintr); 800 } 801 802 splx(s); 803 } 804 805 static int 806 tfn(void *arg) 807 { 808 return up->trend == nil || up->tfn(arg); 809 } 810 811 void 812 twakeup(Ureg*, Timer *t) 813 { 814 Proc *p; 815 Rendez *trend; 816 817 p = t->ta; 818 trend = p->trend; 819 p->trend = 0; 820 if(trend) 821 wakeup(trend); 822 } 823 824 void 825 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) 826 { 827 if (up->tt){ 828 print("tsleep: timer active: mode %d, tf 0x%lux\n", up->tmode, up->tf); 829 timerdel(up); 830 } 831 up->tns = MS2NS(ms); 832 up->tf = twakeup; 833 up->tmode = Trelative; 834 up->ta = up; 835 up->trend = r; 836 up->tfn = fn; 837 timeradd(up); 838 839 if(waserror()){ 840 timerdel(up); 841 nexterror(); 842 } 843 sleep(r, tfn, arg); 844 if (up->tt) 845 timerdel(up); 846 up->twhen = 0; 847 poperror(); 848 } 849 850 /* 851 * Expects that only one process can call wakeup for any given Rendez. 852 * We hold both locks to ensure that r->p and p->r remain consistent. 853 * Richard Miller has a better solution that doesn't require both to 854 * be held simultaneously, but I'm a paranoid - presotto. 855 */ 856 Proc* 857 wakeup(Rendez *r) 858 { 859 Proc *p; 860 int s; 861 862 s = splhi(); 863 864 lock(r); 865 p = r->p; 866 867 if(p != nil){ 868 lock(&p->rlock); 869 if(p->state != Wakeme || p->r != r) 870 panic("wakeup: state"); 871 r->p = nil; 872 p->r = nil; 873 ready(p); 874 unlock(&p->rlock); 875 } 876 unlock(r); 877 878 splx(s); 879 880 return p; 881 } 882 883 /* 884 * if waking a sleeping process, this routine must hold both 885 * p->rlock and r->lock. However, it can't know them in 886 * the same order as wakeup causing a possible lock ordering 887 * deadlock. We break the deadlock by giving up the p->rlock 888 * lock if we can't get the r->lock and retrying. 889 */ 890 int 891 postnote(Proc *p, int dolock, char *n, int flag) 892 { 893 int s, ret; 894 Rendez *r; 895 Proc *d, **l; 896 897 if(dolock) 898 qlock(&p->debug); 899 900 if(flag != NUser && (p->notify == 0 || p->notified)) 901 p->nnote = 0; 902 903 ret = 0; 904 if(p->nnote < NNOTE) { 905 strcpy(p->note[p->nnote].msg, n); 906 p->note[p->nnote++].flag = flag; 907 ret = 1; 908 } 909 p->notepending = 1; 910 if(dolock) 911 qunlock(&p->debug); 912 913 /* this loop is to avoid lock ordering problems. */ 914 for(;;){ 915 s = splhi(); 916 lock(&p->rlock); 917 r = p->r; 918 919 /* waiting for a wakeup? */ 920 if(r == nil) 921 break; /* no */ 922 923 /* try for the second lock */ 924 if(canlock(r)){ 925 if(p->state != Wakeme || r->p != p) 926 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); 927 p->r = nil; 928 r->p = nil; 929 ready(p); 930 unlock(r); 931 break; 932 } 933 934 /* give other process time to get out of critical section and try again */ 935 unlock(&p->rlock); 936 splx(s); 937 sched(); 938 } 939 unlock(&p->rlock); 940 splx(s); 941 942 if(p->state != Rendezvous) 943 return ret; 944 945 /* Try and pull out of a rendezvous */ 946 lock(p->rgrp); 947 if(p->state == Rendezvous) { 948 p->rendval = ~0; 949 l = &REND(p->rgrp, p->rendtag); 950 for(d = *l; d; d = d->rendhash) { 951 if(d == p) { 952 *l = p->rendhash; 953 break; 954 } 955 l = &d->rendhash; 956 } 957 ready(p); 958 } 959 unlock(p->rgrp); 960 return ret; 961 } 962 963 /* 964 * weird thing: keep at most NBROKEN around 965 */ 966 #define NBROKEN 4 967 struct 968 { 969 QLock; 970 int n; 971 Proc *p[NBROKEN]; 972 }broken; 973 974 void 975 addbroken(Proc *p) 976 { 977 qlock(&broken); 978 if(broken.n == NBROKEN) { 979 ready(broken.p[0]); 980 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); 981 --broken.n; 982 } 983 broken.p[broken.n++] = p; 984 qunlock(&broken); 985 986 edfstop(up); 987 p->state = Broken; 988 p->psstate = 0; 989 sched(); 990 } 991 992 void 993 unbreak(Proc *p) 994 { 995 int b; 996 997 qlock(&broken); 998 for(b=0; b < broken.n; b++) 999 if(broken.p[b] == p) { 1000 broken.n--; 1001 memmove(&broken.p[b], &broken.p[b+1], 1002 sizeof(Proc*)*(NBROKEN-(b+1))); 1003 ready(p); 1004 break; 1005 } 1006 qunlock(&broken); 1007 } 1008 1009 int 1010 freebroken(void) 1011 { 1012 int i, n; 1013 1014 qlock(&broken); 1015 n = broken.n; 1016 for(i=0; i<n; i++) { 1017 ready(broken.p[i]); 1018 broken.p[i] = 0; 1019 } 1020 broken.n = 0; 1021 qunlock(&broken); 1022 return n; 1023 } 1024 1025 void 1026 pexit(char *exitstr, int freemem) 1027 { 1028 Proc *p; 1029 Segment **s, **es; 1030 long utime, stime; 1031 Waitq *wq, *f, *next; 1032 Fgrp *fgrp; 1033 Egrp *egrp; 1034 Rgrp *rgrp; 1035 Pgrp *pgrp; 1036 Chan *dot; 1037 void (*pt)(Proc*, int, vlong); 1038 1039 up->alarm = 0; 1040 if (up->tt) 1041 timerdel(up); 1042 pt = proctrace; 1043 if(pt) 1044 pt(up, SDead, 0); 1045 1046 /* nil out all the resources under lock (free later) */ 1047 qlock(&up->debug); 1048 fgrp = up->fgrp; 1049 up->fgrp = nil; 1050 egrp = up->egrp; 1051 up->egrp = nil; 1052 rgrp = up->rgrp; 1053 up->rgrp = nil; 1054 pgrp = up->pgrp; 1055 up->pgrp = nil; 1056 dot = up->dot; 1057 up->dot = nil; 1058 qunlock(&up->debug); 1059 1060 if(fgrp) 1061 closefgrp(fgrp); 1062 if(egrp) 1063 closeegrp(egrp); 1064 if(rgrp) 1065 closergrp(rgrp); 1066 if(dot) 1067 cclose(dot); 1068 if(pgrp) 1069 closepgrp(pgrp); 1070 1071 /* 1072 * if not a kernel process and have a parent, 1073 * do some housekeeping. 1074 */ 1075 if(up->kp == 0) { 1076 p = up->parent; 1077 if(p == 0) { 1078 if(exitstr == 0) 1079 exitstr = "unknown"; 1080 panic("boot process died: %s", exitstr); 1081 } 1082 1083 while(waserror()) 1084 ; 1085 1086 wq = smalloc(sizeof(Waitq)); 1087 poperror(); 1088 1089 wq->w.pid = up->pid; 1090 utime = up->time[TUser] + up->time[TCUser]; 1091 stime = up->time[TSys] + up->time[TCSys]; 1092 wq->w.time[TUser] = tk2ms(utime); 1093 wq->w.time[TSys] = tk2ms(stime); 1094 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]); 1095 if(exitstr && exitstr[0]) 1096 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr); 1097 else 1098 wq->w.msg[0] = '\0'; 1099 1100 lock(&p->exl); 1101 /* 1102 * If my parent is no longer alive, or if there would be more 1103 * than 128 zombie child processes for my parent, then don't 1104 * leave a wait record behind. This helps prevent badly 1105 * written daemon processes from accumulating lots of wait 1106 * records. 1107 */ 1108 if(p->pid == up->parentpid && p->state != Broken && p->nwait < 128) { 1109 p->nchild--; 1110 p->time[TCUser] += utime; 1111 p->time[TCSys] += stime; 1112 1113 wq->next = p->waitq; 1114 p->waitq = wq; 1115 p->nwait++; 1116 1117 wakeup(&p->waitr); 1118 unlock(&p->exl); 1119 } 1120 else { 1121 unlock(&p->exl); 1122 free(wq); 1123 } 1124 } 1125 1126 if(!freemem) 1127 addbroken(up); 1128 1129 qlock(&up->seglock); 1130 es = &up->seg[NSEG]; 1131 for(s = up->seg; s < es; s++) { 1132 if(*s) { 1133 putseg(*s); 1134 *s = 0; 1135 } 1136 } 1137 qunlock(&up->seglock); 1138 1139 lock(&up->exl); /* Prevent my children from leaving waits */ 1140 pidunhash(up); 1141 up->pid = 0; 1142 wakeup(&up->waitr); 1143 unlock(&up->exl); 1144 1145 for(f = up->waitq; f; f = next) { 1146 next = f->next; 1147 free(f); 1148 } 1149 1150 /* release debuggers */ 1151 qlock(&up->debug); 1152 if(up->pdbg) { 1153 wakeup(&up->pdbg->sleep); 1154 up->pdbg = 0; 1155 } 1156 qunlock(&up->debug); 1157 1158 /* Sched must not loop for these locks */ 1159 lock(&procalloc); 1160 lock(&palloc); 1161 1162 edfstop(up); 1163 up->state = Moribund; 1164 sched(); 1165 panic("pexit"); 1166 } 1167 1168 int 1169 haswaitq(void *x) 1170 { 1171 Proc *p; 1172 1173 p = (Proc *)x; 1174 return p->waitq != 0; 1175 } 1176 1177 ulong 1178 pwait(Waitmsg *w) 1179 { 1180 ulong cpid; 1181 Waitq *wq; 1182 1183 if(!canqlock(&up->qwaitr)) 1184 error(Einuse); 1185 1186 if(waserror()) { 1187 qunlock(&up->qwaitr); 1188 nexterror(); 1189 } 1190 1191 lock(&up->exl); 1192 if(up->nchild == 0 && up->waitq == 0) { 1193 unlock(&up->exl); 1194 error(Enochild); 1195 } 1196 unlock(&up->exl); 1197 1198 sleep(&up->waitr, haswaitq, up); 1199 1200 lock(&up->exl); 1201 wq = up->waitq; 1202 up->waitq = wq->next; 1203 up->nwait--; 1204 unlock(&up->exl); 1205 1206 qunlock(&up->qwaitr); 1207 poperror(); 1208 1209 if(w) 1210 memmove(w, &wq->w, sizeof(Waitmsg)); 1211 cpid = wq->w.pid; 1212 free(wq); 1213 return cpid; 1214 } 1215 1216 Proc* 1217 proctab(int i) 1218 { 1219 return &procalloc.arena[i]; 1220 } 1221 1222 void 1223 dumpaproc(Proc *p) 1224 { 1225 ulong bss; 1226 char *s; 1227 1228 if(p == 0) 1229 return; 1230 1231 bss = 0; 1232 if(p->seg[BSEG]) 1233 bss = p->seg[BSEG]->top; 1234 1235 s = p->psstate; 1236 if(s == 0) 1237 s = statename[p->state]; 1238 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n", 1239 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], 1240 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority); 1241 } 1242 1243 void 1244 procdump(void) 1245 { 1246 int i; 1247 Proc *p; 1248 1249 if(up) 1250 print("up %lud\n", up->pid); 1251 else 1252 print("no current process\n"); 1253 for(i=0; i<conf.nproc; i++) { 1254 p = &procalloc.arena[i]; 1255 if(p->state == Dead) 1256 continue; 1257 1258 dumpaproc(p); 1259 } 1260 } 1261 1262 /* 1263 * wait till all processes have flushed their mmu 1264 * state about segement s 1265 */ 1266 void 1267 procflushseg(Segment *s) 1268 { 1269 int i, ns, nm, nwait; 1270 Proc *p; 1271 1272 /* 1273 * tell all processes with this 1274 * segment to flush their mmu's 1275 */ 1276 nwait = 0; 1277 for(i=0; i<conf.nproc; i++) { 1278 p = &procalloc.arena[i]; 1279 if(p->state == Dead) 1280 continue; 1281 for(ns = 0; ns < NSEG; ns++) 1282 if(p->seg[ns] == s){ 1283 p->newtlb = 1; 1284 for(nm = 0; nm < conf.nmach; nm++){ 1285 if(MACHP(nm)->proc == p){ 1286 MACHP(nm)->flushmmu = 1; 1287 nwait++; 1288 } 1289 } 1290 break; 1291 } 1292 } 1293 1294 if(nwait == 0) 1295 return; 1296 1297 /* 1298 * wait for all processors to take a clock interrupt 1299 * and flush their mmu's 1300 */ 1301 for(nm = 0; nm < conf.nmach; nm++) 1302 if(MACHP(nm) != m) 1303 while(MACHP(nm)->flushmmu) 1304 sched(); 1305 } 1306 1307 void 1308 scheddump(void) 1309 { 1310 Proc *p; 1311 Schedq *rq; 1312 1313 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 1314 if(rq->head == 0) 1315 continue; 1316 print("rq%ld:", rq-runq); 1317 for(p = rq->head; p; p = p->rnext) 1318 print(" %lud(%lud)", p->pid, m->ticks - p->readytime); 1319 print("\n"); 1320 delay(150); 1321 } 1322 print("nrdy %d\n", nrdy); 1323 } 1324 1325 void 1326 kproc(char *name, void (*func)(void *), void *arg) 1327 { 1328 Proc *p; 1329 static Pgrp *kpgrp; 1330 1331 p = newproc(); 1332 p->psstate = 0; 1333 p->procmode = 0640; 1334 p->kp = 1; 1335 p->noswap = 1; 1336 1337 p->fpsave = up->fpsave; 1338 p->scallnr = up->scallnr; 1339 p->s = up->s; 1340 p->nerrlab = 0; 1341 p->slash = up->slash; 1342 p->dot = up->dot; 1343 if(p->dot) 1344 incref(p->dot); 1345 1346 memmove(p->note, up->note, sizeof(p->note)); 1347 p->nnote = up->nnote; 1348 p->notified = 0; 1349 p->lastnote = up->lastnote; 1350 p->notify = up->notify; 1351 p->ureg = 0; 1352 p->dbgreg = 0; 1353 1354 procpriority(p, PriKproc, 0); 1355 1356 kprocchild(p, func, arg); 1357 1358 kstrdup(&p->user, eve); 1359 kstrdup(&p->text, name); 1360 if(kpgrp == 0) 1361 kpgrp = newpgrp(); 1362 p->pgrp = kpgrp; 1363 incref(kpgrp); 1364 1365 memset(p->time, 0, sizeof(p->time)); 1366 p->time[TReal] = MACHP(0)->ticks; 1367 ready(p); 1368 /* 1369 * since the bss/data segments are now shareable, 1370 * any mmu info about this process is now stale 1371 * and has to be discarded. 1372 */ 1373 p->newtlb = 1; 1374 flushmmu(); 1375 } 1376 1377 /* 1378 * called splhi() by notify(). See comment in notify for the 1379 * reasoning. 1380 */ 1381 void 1382 procctl(Proc *p) 1383 { 1384 char *state; 1385 ulong s; 1386 1387 switch(p->procctl) { 1388 case Proc_exitbig: 1389 spllo(); 1390 pexit("Killed: Insufficient physical memory", 1); 1391 1392 case Proc_exitme: 1393 spllo(); /* pexit has locks in it */ 1394 pexit("Killed", 1); 1395 1396 case Proc_traceme: 1397 if(p->nnote == 0) 1398 return; 1399 /* No break */ 1400 1401 case Proc_stopme: 1402 p->procctl = 0; 1403 state = p->psstate; 1404 p->psstate = "Stopped"; 1405 /* free a waiting debugger */ 1406 s = spllo(); 1407 qlock(&p->debug); 1408 if(p->pdbg) { 1409 wakeup(&p->pdbg->sleep); 1410 p->pdbg = 0; 1411 } 1412 qunlock(&p->debug); 1413 splhi(); 1414 p->state = Stopped; 1415 sched(); 1416 p->psstate = state; 1417 splx(s); 1418 return; 1419 } 1420 } 1421 1422 #include "errstr.h" 1423 1424 void 1425 error(char *err) 1426 { 1427 spllo(); 1428 1429 assert(up->nerrlab < NERR); 1430 kstrcpy(up->errstr, err, ERRMAX); 1431 setlabel(&up->errlab[NERR-1]); 1432 nexterror(); 1433 } 1434 1435 void 1436 nexterror(void) 1437 { 1438 gotolabel(&up->errlab[--up->nerrlab]); 1439 } 1440 1441 void 1442 exhausted(char *resource) 1443 { 1444 char buf[ERRMAX]; 1445 1446 sprint(buf, "no free %s", resource); 1447 iprint("%s\n", buf); 1448 error(buf); 1449 } 1450 1451 void 1452 killbig(char *why) 1453 { 1454 int i; 1455 Segment *s; 1456 ulong l, max; 1457 Proc *p, *ep, *kp; 1458 1459 max = 0; 1460 kp = 0; 1461 ep = procalloc.arena+conf.nproc; 1462 for(p = procalloc.arena; p < ep; p++) { 1463 if(p->state == Dead || p->kp) 1464 continue; 1465 l = 0; 1466 for(i=1; i<NSEG; i++) { 1467 s = p->seg[i]; 1468 if(s != 0) 1469 l += s->top - s->base; 1470 } 1471 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) { 1472 kp = p; 1473 max = l; 1474 } 1475 } 1476 1477 print("%lud: %s killed: %s\n", kp->pid, kp->text, why); 1478 for(p = procalloc.arena; p < ep; p++) { 1479 if(p->state == Dead || p->kp) 1480 continue; 1481 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG]) 1482 p->procctl = Proc_exitbig; 1483 } 1484 kp->procctl = Proc_exitbig; 1485 for(i = 0; i < NSEG; i++) { 1486 s = kp->seg[i]; 1487 if(s != 0 && canqlock(&s->lk)) { 1488 mfreeseg(s, s->base, (s->top - s->base)/BY2PG); 1489 qunlock(&s->lk); 1490 } 1491 } 1492 } 1493 1494 /* 1495 * change ownership to 'new' of all processes owned by 'old'. Used when 1496 * eve changes. 1497 */ 1498 void 1499 renameuser(char *old, char *new) 1500 { 1501 Proc *p, *ep; 1502 1503 ep = procalloc.arena+conf.nproc; 1504 for(p = procalloc.arena; p < ep; p++) 1505 if(p->user!=nil && strcmp(old, p->user)==0) 1506 kstrdup(&p->user, new); 1507 } 1508 1509 /* 1510 * time accounting called by clock() splhi'd 1511 */ 1512 void 1513 accounttime(void) 1514 { 1515 Proc *p; 1516 ulong n, per; 1517 static ulong nrun; 1518 1519 p = m->proc; 1520 if(p) { 1521 nrun++; 1522 p->time[p->insyscall]++; 1523 } 1524 1525 /* calculate decaying duty cycles */ 1526 n = perfticks(); 1527 per = n - m->perf.last; 1528 m->perf.last = n; 1529 per = (m->perf.period*(HZ-1) + per)/HZ; 1530 if(per != 0) 1531 m->perf.period = per; 1532 1533 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; 1534 m->perf.inidle = 0; 1535 1536 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; 1537 m->perf.inintr = 0; 1538 1539 /* only one processor gets to compute system load averages */ 1540 if(m->machno != 0) 1541 return; 1542 1543 /* 1544 * calculate decaying load average. 1545 * if we decay by (n-1)/n then it takes 1546 * n clock ticks to go from load L to .36 L once 1547 * things quiet down. it takes about 5 n clock 1548 * ticks to go to zero. so using HZ means this is 1549 * approximately the load over the last second, 1550 * with a tail lasting about 5 seconds. 1551 */ 1552 n = nrun; 1553 nrun = 0; 1554 n = (nrdy+n)*1000; 1555 m->load = (m->load*(HZ-1)+n)/HZ; 1556 } 1557 1558 static void 1559 pidhash(Proc *p) 1560 { 1561 int h; 1562 1563 h = p->pid % nelem(procalloc.ht); 1564 lock(&procalloc); 1565 p->pidhash = procalloc.ht[h]; 1566 procalloc.ht[h] = p; 1567 unlock(&procalloc); 1568 } 1569 1570 static void 1571 pidunhash(Proc *p) 1572 { 1573 int h; 1574 Proc **l; 1575 1576 h = p->pid % nelem(procalloc.ht); 1577 lock(&procalloc); 1578 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) 1579 if(*l == p){ 1580 *l = p->pidhash; 1581 break; 1582 } 1583 unlock(&procalloc); 1584 } 1585 1586 int 1587 procindex(ulong pid) 1588 { 1589 Proc *p; 1590 int h; 1591 int s; 1592 1593 s = -1; 1594 h = pid % nelem(procalloc.ht); 1595 lock(&procalloc); 1596 for(p = procalloc.ht[h]; p != nil; p = p->pidhash) 1597 if(p->pid == pid){ 1598 s = p - procalloc.arena; 1599 break; 1600 } 1601 unlock(&procalloc); 1602 return s; 1603 } 1604