1 #include <u.h> 2 #include "../port/lib.h" 3 #include "mem.h" 4 #include "dat.h" 5 #include "fns.h" 6 #include "../port/error.h" 7 #include "edf.h" 8 #include <trace.h> 9 10 int coopsched; 11 int schedgain = 30; /* units in seconds */ 12 int nrdy; 13 Ref noteidalloc; 14 15 void updatecpu(Proc*); 16 int reprioritize(Proc*); 17 18 ulong delayedscheds; /* statistics */ 19 long skipscheds; 20 long preempts; 21 ulong load; 22 23 static Ref pidalloc; 24 25 static struct Procalloc 26 { 27 Lock; 28 Proc* ht[128]; 29 Proc* arena; 30 Proc* free; 31 } procalloc; 32 33 enum 34 { 35 Q=10, 36 DQ=4, 37 Scaling=2, 38 }; 39 40 Schedq runq[Nrq]; 41 ulong runvec; 42 43 char *statename[] = 44 { /* BUG: generate automatically */ 45 "Dead", 46 "Moribund", 47 "Ready", 48 "Scheding", 49 "Running", 50 "Queueing", 51 "QueueingR", 52 "QueueingW", 53 "Wakeme", 54 "Broken", 55 "Stopped", 56 "Rendez", 57 "Waitrelease", 58 }; 59 60 static void pidhash(Proc*); 61 static void pidunhash(Proc*); 62 static void rebalance(void); 63 64 /* 65 * Always splhi()'ed. 66 */ 67 void 68 schedinit(void) /* never returns */ 69 { 70 Edf *e; 71 72 setlabel(&m->sched); 73 if(up) { 74 if((e = up->edf) && (e->flags & Admitted)) 75 edfrecord(up); 76 m->proc = 0; 77 switch(up->state) { 78 case Running: 79 ready(up); 80 break; 81 case Moribund: 82 up->state = Dead; 83 edfstop(up); 84 if (up->edf) 85 free(up->edf); 86 up->edf = nil; 87 88 /* 89 * Holding locks from pexit: 90 * procalloc 91 * palloc 92 */ 93 mmurelease(up); 94 95 up->qnext = procalloc.free; 96 procalloc.free = up; 97 98 unlock(&palloc); 99 unlock(&procalloc); 100 break; 101 } 102 up->mach = nil; 103 updatecpu(up); 104 up = nil; 105 } 106 sched(); 107 } 108 109 /* 110 * If changing this routine, look also at sleep(). It 111 * contains a copy of the guts of sched(). 112 */ 113 void 114 sched(void) 115 { 116 int x[1]; 117 Proc *p; 118 119 if(m->ilockdepth) 120 panic("ilockdepth %d, last lock 0x%p at 0x%lux, sched called from 0x%lux", 121 m->ilockdepth, up?up->lastilock:nil, 122 (up && up->lastilock)?up->lastilock->pc:0, getcallerpc(x+3)); 123 124 if(up){ 125 if(up->nlocks.ref && up->state != Moribund && up->delaysched < 20){ 126 up->delaysched++; 127 delayedscheds++; 128 return; 129 } 130 up->delaysched = 0; 131 132 splhi(); 133 134 /* statistics */ 135 m->cs++; 136 137 procsave(up); 138 if(setlabel(&up->sched)){ 139 procrestore(up); 140 spllo(); 141 return; 142 } 143 gotolabel(&m->sched); 144 } 145 p = runproc(); 146 if(!p->edf){ 147 updatecpu(p); 148 p->priority = reprioritize(p); 149 } 150 if(p != m->readied) 151 m->schedticks = m->ticks + HZ/10; 152 m->readied = 0; 153 up = p; 154 up->state = Running; 155 up->mach = MACHP(m->machno); 156 m->proc = up; 157 mmuswitch(up); 158 gotolabel(&up->sched); 159 } 160 161 int 162 anyready(void) 163 { 164 return runvec; 165 } 166 167 int 168 anyhigher(void) 169 { 170 return runvec & ~((1<<(up->priority+1))-1); 171 } 172 173 /* 174 * here once per clock tick to see if we should resched 175 */ 176 void 177 hzsched(void) 178 { 179 /* once a second, rebalance will reprioritize ready procs */ 180 if(m->machno == 0) 181 rebalance(); 182 183 /* unless preempted, get to run for at least 100ms */ 184 if(anyhigher() 185 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){ 186 m->readied = nil; /* avoid cooperative scheduling */ 187 up->delaysched++; 188 } 189 } 190 191 /* 192 * here at the end of non-clock interrupts to see if we should preempt the 193 * current process. Returns 1 if preempted, 0 otherwise. 194 */ 195 int 196 preempted(void) 197 { 198 if(up && up->state == Running) 199 if(up->preempted == 0) 200 if(anyhigher()) 201 if(!active.exiting){ 202 m->readied = nil; /* avoid cooperative scheduling */ 203 up->preempted = 1; 204 sched(); 205 splhi(); 206 up->preempted = 0; 207 return 1; 208 } 209 return 0; 210 } 211 212 /* 213 * Update the cpu time average for this particular process, 214 * which is about to change from up -> not up or vice versa. 215 * p->lastupdate is the last time an updatecpu happened. 216 * 217 * The cpu time average is a decaying average that lasts 218 * about D clock ticks. D is chosen to be approximately 219 * the cpu time of a cpu-intensive "quick job". A job has to run 220 * for approximately D clock ticks before we home in on its 221 * actual cpu usage. Thus if you manage to get in and get out 222 * quickly, you won't be penalized during your burst. Once you 223 * start using your share of the cpu for more than about D 224 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 225 * below all the other quick jobs. Interactive tasks, because 226 * they basically always use less than their fair share of cpu, 227 * will be rewarded. 228 * 229 * If the process has not been running, then we want to 230 * apply the filter 231 * 232 * cpu = cpu * (D-1)/D 233 * 234 * n times, yielding 235 * 236 * cpu = cpu * ((D-1)/D)^n 237 * 238 * but D is big enough that this is approximately 239 * 240 * cpu = cpu * (D-n)/D 241 * 242 * so we use that instead. 243 * 244 * If the process has been running, we apply the filter to 245 * 1 - cpu, yielding a similar equation. Note that cpu is 246 * stored in fixed point (* 1000). 247 * 248 * Updatecpu must be called before changing up, in order 249 * to maintain accurate cpu usage statistics. It can be called 250 * at any time to bring the stats for a given proc up-to-date. 251 */ 252 void 253 updatecpu(Proc *p) 254 { 255 int n, t, ocpu; 256 int D = schedgain*HZ*Scaling; 257 258 if(p->edf) 259 return; 260 261 t = MACHP(0)->ticks*Scaling + Scaling/2; 262 n = t - p->lastupdate; 263 p->lastupdate = t; 264 265 if(n == 0) 266 return; 267 if(n > D) 268 n = D; 269 270 ocpu = p->cpu; 271 if(p != up) 272 p->cpu = (ocpu*(D-n))/D; 273 else{ 274 t = 1000 - ocpu; 275 t = (t*(D-n))/D; 276 p->cpu = 1000 - t; 277 } 278 279 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); 280 } 281 282 /* 283 * On average, p has used p->cpu of a cpu recently. 284 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting 285 * too much, penalize it. If it has been getting not enough, reward it. 286 * I don't think you can get much more than your fair share that 287 * often, so most of the queues are for using less. Having a priority 288 * of 3 means you're just right. Having a higher priority (up to p->basepri) 289 * means you're not using as much as you could. 290 */ 291 int 292 reprioritize(Proc *p) 293 { 294 int fairshare, n, load, ratio; 295 296 load = MACHP(0)->load; 297 if(load == 0) 298 return p->basepri; 299 300 /* 301 * fairshare = 1.000 * conf.nproc * 1.000/load, 302 * except the decimal point is moved three places 303 * on both load and fairshare. 304 */ 305 fairshare = (conf.nmach*1000*1000)/load; 306 n = p->cpu; 307 if(n == 0) 308 n = 1; 309 ratio = (fairshare+n/2) / n; 310 if(ratio > p->basepri) 311 ratio = p->basepri; 312 if(ratio < 0) 313 panic("reprioritize"); 314 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); 315 return ratio; 316 } 317 318 /* 319 * add a process to a scheduling queue 320 */ 321 void 322 queueproc(Schedq *rq, Proc *p) 323 { 324 int pri; 325 326 pri = rq - runq; 327 lock(runq); 328 p->priority = pri; 329 p->rnext = 0; 330 if(rq->tail) 331 rq->tail->rnext = p; 332 else 333 rq->head = p; 334 rq->tail = p; 335 rq->n++; 336 nrdy++; 337 runvec |= 1<<pri; 338 unlock(runq); 339 } 340 341 /* 342 * try to remove a process from a scheduling queue (called splhi) 343 */ 344 Proc* 345 dequeueproc(Schedq *rq, Proc *tp) 346 { 347 Proc *l, *p; 348 349 if(!canlock(runq)) 350 return nil; 351 352 /* 353 * the queue may have changed before we locked runq, 354 * refind the target process. 355 */ 356 l = 0; 357 for(p = rq->head; p; p = p->rnext){ 358 if(p == tp) 359 break; 360 l = p; 361 } 362 363 /* 364 * p->mach==0 only when process state is saved 365 */ 366 if(p == 0 || p->mach){ 367 unlock(runq); 368 return nil; 369 } 370 if(p->rnext == 0) 371 rq->tail = l; 372 if(l) 373 l->rnext = p->rnext; 374 else 375 rq->head = p->rnext; 376 if(rq->head == nil) 377 runvec &= ~(1<<(rq-runq)); 378 rq->n--; 379 nrdy--; 380 if(p->state != Ready) 381 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]); 382 383 unlock(runq); 384 return p; 385 } 386 387 /* 388 * ready(p) picks a new priority for a process and sticks it in the 389 * runq for that priority. 390 */ 391 void 392 ready(Proc *p) 393 { 394 int s, pri; 395 Schedq *rq; 396 void (*pt)(Proc*, int, vlong); 397 398 s = splhi(); 399 if(edfready(p)){ 400 splx(s); 401 return; 402 } 403 404 if(up != p) 405 m->readied = p; /* group scheduling */ 406 407 updatecpu(p); 408 pri = reprioritize(p); 409 p->priority = pri; 410 rq = &runq[pri]; 411 p->state = Ready; 412 queueproc(rq, p); 413 pt = proctrace; 414 if(pt) 415 pt(p, SReady, 0); 416 splx(s); 417 } 418 419 /* 420 * yield the processor and drop our priority 421 */ 422 void 423 yield(void) 424 { 425 if(anyready()){ 426 /* pretend we just used 1/2 tick */ 427 up->lastupdate -= Scaling/2; 428 sched(); 429 } 430 } 431 432 /* 433 * recalculate priorities once a second. We need to do this 434 * since priorities will otherwise only be recalculated when 435 * the running process blocks. 436 */ 437 ulong balancetime; 438 439 static void 440 rebalance(void) 441 { 442 int pri, npri, t, x; 443 Schedq *rq; 444 Proc *p; 445 446 t = m->ticks; 447 if(t - balancetime < HZ) 448 return; 449 balancetime = t; 450 451 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){ 452 another: 453 p = rq->head; 454 if(p == nil) 455 continue; 456 if(p->mp != MACHP(m->machno)) 457 continue; 458 if(pri == p->basepri) 459 continue; 460 updatecpu(p); 461 npri = reprioritize(p); 462 if(npri != pri){ 463 x = splhi(); 464 p = dequeueproc(rq, p); 465 if(p) 466 queueproc(&runq[npri], p); 467 splx(x); 468 goto another; 469 } 470 } 471 } 472 473 474 /* 475 * pick a process to run 476 */ 477 Proc* 478 runproc(void) 479 { 480 Schedq *rq; 481 Proc *p; 482 ulong start, now; 483 int i; 484 void (*pt)(Proc*, int, vlong); 485 486 start = perfticks(); 487 488 /* cooperative scheduling until the clock ticks */ 489 if(coopsched && (p=m->readied) && p->mach==0 && p->state==Ready 490 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ 491 skipscheds++; 492 rq = &runq[p->priority]; 493 goto found; 494 } 495 496 preempts++; 497 498 loop: 499 /* 500 * find a process that last ran on this processor (affinity), 501 * or one that hasn't moved in a while (load balancing). Every 502 * time around the loop affinity goes down. 503 */ 504 spllo(); 505 for(i = 0;; i++){ 506 /* 507 * find the highest priority target process that this 508 * processor can run given affinity constraints. 509 * 510 */ 511 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 512 for(p = rq->head; p; p = p->rnext){ 513 if(p->mp == nil || p->mp == MACHP(m->machno) 514 || (!p->wired && i > 0)) 515 goto found; 516 } 517 } 518 519 /* waste time or halt the CPU */ 520 idlehands(); 521 522 /* remember how much time we're here */ 523 now = perfticks(); 524 m->perf.inidle += now-start; 525 start = now; 526 } 527 528 found: 529 splhi(); 530 p = dequeueproc(rq, p); 531 if(p == nil) 532 goto loop; 533 534 p->state = Scheding; 535 p->mp = MACHP(m->machno); 536 537 if(edflock(p)){ 538 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ 539 edfunlock(); 540 } 541 pt = proctrace; 542 if(pt) 543 pt(p, SRun, 0); 544 return p; 545 } 546 547 int 548 canpage(Proc *p) 549 { 550 int ok = 0; 551 552 splhi(); 553 lock(runq); 554 /* Only reliable way to see if we are Running */ 555 if(p->mach == 0) { 556 p->newtlb = 1; 557 ok = 1; 558 } 559 unlock(runq); 560 spllo(); 561 562 return ok; 563 } 564 565 Proc* 566 newproc(void) 567 { 568 Proc *p; 569 570 lock(&procalloc); 571 for(;;) { 572 if(p = procalloc.free) 573 break; 574 575 unlock(&procalloc); 576 resrcwait("no procs"); 577 lock(&procalloc); 578 } 579 procalloc.free = p->qnext; 580 unlock(&procalloc); 581 582 p->state = Scheding; 583 p->psstate = "New"; 584 p->mach = 0; 585 p->qnext = 0; 586 p->nchild = 0; 587 p->nwait = 0; 588 p->waitq = 0; 589 p->parent = 0; 590 p->pgrp = 0; 591 p->egrp = 0; 592 p->fgrp = 0; 593 p->rgrp = 0; 594 p->pdbg = 0; 595 p->fpstate = FPinit; 596 p->kp = 0; 597 p->procctl = 0; 598 p->notepending = 0; 599 p->ureg = 0; 600 p->privatemem = 0; 601 p->noswap = 0; 602 p->errstr = p->errbuf0; 603 p->syserrstr = p->errbuf1; 604 p->errbuf0[0] = '\0'; 605 p->errbuf1[0] = '\0'; 606 p->nlocks.ref = 0; 607 p->delaysched = 0; 608 p->trace = 0; 609 kstrdup(&p->user, "*nouser"); 610 kstrdup(&p->text, "*notext"); 611 kstrdup(&p->args, ""); 612 p->nargs = 0; 613 p->setargs = 0; 614 memset(p->seg, 0, sizeof p->seg); 615 p->pid = incref(&pidalloc); 616 pidhash(p); 617 p->noteid = incref(¬eidalloc); 618 if(p->pid==0 || p->noteid==0) 619 panic("pidalloc"); 620 if(p->kstack == 0) 621 p->kstack = smalloc(KSTACK); 622 623 /* sched params */ 624 p->mp = 0; 625 p->wired = 0; 626 procpriority(p, PriNormal, 0); 627 p->cpu = 0; 628 p->lastupdate = MACHP(0)->ticks*Scaling; 629 p->edf = nil; 630 631 return p; 632 } 633 634 /* 635 * wire this proc to a machine 636 */ 637 void 638 procwired(Proc *p, int bm) 639 { 640 Proc *pp; 641 int i; 642 char nwired[MAXMACH]; 643 Mach *wm; 644 645 if(bm < 0){ 646 /* pick a machine to wire to */ 647 memset(nwired, 0, sizeof(nwired)); 648 p->wired = 0; 649 pp = proctab(0); 650 for(i=0; i<conf.nproc; i++, pp++){ 651 wm = pp->wired; 652 if(wm && pp->pid) 653 nwired[wm->machno]++; 654 } 655 bm = 0; 656 for(i=0; i<conf.nmach; i++) 657 if(nwired[i] < nwired[bm]) 658 bm = i; 659 } else { 660 /* use the virtual machine requested */ 661 bm = bm % conf.nmach; 662 } 663 664 p->wired = MACHP(bm); 665 p->mp = p->wired; 666 } 667 668 void 669 procpriority(Proc *p, int pri, int fixed) 670 { 671 if(pri >= Npriq) 672 pri = Npriq - 1; 673 else if(pri < 0) 674 pri = 0; 675 p->basepri = pri; 676 p->priority = pri; 677 if(fixed){ 678 p->fixedpri = 1; 679 } else { 680 p->fixedpri = 0; 681 } 682 } 683 684 void 685 procinit0(void) /* bad planning - clashes with devproc.c */ 686 { 687 Proc *p; 688 int i; 689 690 procalloc.free = xalloc(conf.nproc*sizeof(Proc)); 691 if(procalloc.free == nil) 692 panic("cannot allocate %lud procs\n", conf.nproc); 693 procalloc.arena = procalloc.free; 694 695 p = procalloc.free; 696 for(i=0; i<conf.nproc-1; i++,p++) 697 p->qnext = p+1; 698 p->qnext = 0; 699 } 700 701 /* 702 * sleep if a condition is not true. Another process will 703 * awaken us after it sets the condition. When we awaken 704 * the condition may no longer be true. 705 * 706 * we lock both the process and the rendezvous to keep r->p 707 * and p->r synchronized. 708 */ 709 void 710 sleep(Rendez *r, int (*f)(void*), void *arg) 711 { 712 int s; 713 void (*pt)(Proc*, int, vlong); 714 715 s = splhi(); 716 717 if(up->nlocks.ref) 718 print("process %lud sleeps with %lud locks held, last lock 0x%p locked at pc 0x%lux, sleep called from 0x%lux\n", 719 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r)); 720 lock(r); 721 lock(&up->rlock); 722 if(r->p){ 723 print("double sleep called from 0x%lux, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid); 724 dumpstack(); 725 } 726 727 /* 728 * Wakeup only knows there may be something to do by testing 729 * r->p in order to get something to lock on. 730 * Flush that information out to memory in case the sleep is 731 * committed. 732 */ 733 r->p = up; 734 735 if((*f)(arg) || up->notepending){ 736 /* 737 * if condition happened or a note is pending 738 * never mind 739 */ 740 r->p = nil; 741 unlock(&up->rlock); 742 unlock(r); 743 } else { 744 /* 745 * now we are committed to 746 * change state and call scheduler 747 */ 748 pt = proctrace; 749 if(pt) 750 pt(up, SSleep, 0); 751 up->state = Wakeme; 752 up->r = r; 753 754 /* statistics */ 755 m->cs++; 756 757 procsave(up); 758 if(setlabel(&up->sched)) { 759 /* 760 * here when the process is awakened 761 */ 762 procrestore(up); 763 spllo(); 764 } else { 765 /* 766 * here to go to sleep (i.e. stop Running) 767 */ 768 unlock(&up->rlock); 769 unlock(r); 770 gotolabel(&m->sched); 771 } 772 } 773 774 if(up->notepending) { 775 up->notepending = 0; 776 splx(s); 777 error(Eintr); 778 } 779 780 splx(s); 781 } 782 783 static int 784 tfn(void *arg) 785 { 786 return up->trend == nil || up->tfn(arg); 787 } 788 789 void 790 twakeup(Ureg*, Timer *t) 791 { 792 Proc *p; 793 Rendez *trend; 794 795 p = t->ta; 796 trend = p->trend; 797 p->trend = 0; 798 if(trend) 799 wakeup(trend); 800 } 801 802 /* Sleep until todget() >= ns. NOTE: ns is not an interval */ 803 void 804 tnsleep(Rendez *r, int (*fn)(void*), void *arg, vlong ns) 805 { 806 if (up->tt){ 807 print("tnsleep: timer active: mode %d, tf 0x%lux\n", up->tmode, up->tf); 808 timerdel(up); 809 } 810 up->tns = ns; 811 up->tf = twakeup; 812 up->tmode = Tabsolute; 813 up->ta = up; 814 up->trend = r; 815 up->tfn = fn; 816 timeradd(up); 817 818 if(waserror()){ 819 timerdel(up); 820 nexterror(); 821 } 822 sleep(r, tfn, arg); 823 if (up->tt) 824 timerdel(up); 825 up->twhen = 0; 826 poperror(); 827 } 828 829 void 830 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) 831 { 832 if (up->tt){ 833 print("tsleep: timer active: mode %d, tf 0x%lux\n", up->tmode, up->tf); 834 timerdel(up); 835 } 836 up->tns = MS2NS(ms); 837 up->tf = twakeup; 838 up->tmode = Trelative; 839 up->ta = up; 840 up->trend = r; 841 up->tfn = fn; 842 timeradd(up); 843 844 if(waserror()){ 845 timerdel(up); 846 nexterror(); 847 } 848 sleep(r, tfn, arg); 849 if (up->tt) 850 timerdel(up); 851 up->twhen = 0; 852 poperror(); 853 } 854 855 /* 856 * Expects that only one process can call wakeup for any given Rendez. 857 * We hold both locks to ensure that r->p and p->r remain consistent. 858 * Richard Miller has a better solution that doesn't require both to 859 * be held simultaneously, but I'm a paranoid - presotto. 860 */ 861 Proc* 862 wakeup(Rendez *r) 863 { 864 Proc *p; 865 int s; 866 867 s = splhi(); 868 869 lock(r); 870 p = r->p; 871 872 if(p != nil){ 873 lock(&p->rlock); 874 if(p->state != Wakeme || p->r != r) 875 panic("wakeup: state"); 876 r->p = nil; 877 p->r = nil; 878 ready(p); 879 unlock(&p->rlock); 880 } 881 unlock(r); 882 883 splx(s); 884 885 return p; 886 } 887 888 /* 889 * if waking a sleeping process, this routine must hold both 890 * p->rlock and r->lock. However, it can't know them in 891 * the same order as wakeup causing a possible lock ordering 892 * deadlock. We break the deadlock by giving up the p->rlock 893 * lock if we can't get the r->lock and retrying. 894 */ 895 int 896 postnote(Proc *p, int dolock, char *n, int flag) 897 { 898 int s, ret; 899 Rendez *r; 900 Proc *d, **l; 901 902 if(dolock) 903 qlock(&p->debug); 904 905 if(flag != NUser && (p->notify == 0 || p->notified)) 906 p->nnote = 0; 907 908 ret = 0; 909 if(p->nnote < NNOTE) { 910 strcpy(p->note[p->nnote].msg, n); 911 p->note[p->nnote++].flag = flag; 912 ret = 1; 913 } 914 p->notepending = 1; 915 if(dolock) 916 qunlock(&p->debug); 917 918 /* this loop is to avoid lock ordering problems. */ 919 for(;;){ 920 s = splhi(); 921 lock(&p->rlock); 922 r = p->r; 923 924 /* waiting for a wakeup? */ 925 if(r == nil) 926 break; /* no */ 927 928 /* try for the second lock */ 929 if(canlock(r)){ 930 if(p->state != Wakeme || r->p != p) 931 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); 932 p->r = nil; 933 r->p = nil; 934 ready(p); 935 unlock(r); 936 break; 937 } 938 939 /* give other process time to get out of critical section and try again */ 940 unlock(&p->rlock); 941 splx(s); 942 sched(); 943 } 944 unlock(&p->rlock); 945 splx(s); 946 947 if(p->state != Rendezvous) 948 return ret; 949 950 /* Try and pull out of a rendezvous */ 951 lock(p->rgrp); 952 if(p->state == Rendezvous) { 953 p->rendval = ~0; 954 l = &REND(p->rgrp, p->rendtag); 955 for(d = *l; d; d = d->rendhash) { 956 if(d == p) { 957 *l = p->rendhash; 958 break; 959 } 960 l = &d->rendhash; 961 } 962 ready(p); 963 } 964 unlock(p->rgrp); 965 return ret; 966 } 967 968 /* 969 * weird thing: keep at most NBROKEN around 970 */ 971 #define NBROKEN 4 972 struct 973 { 974 QLock; 975 int n; 976 Proc *p[NBROKEN]; 977 }broken; 978 979 void 980 addbroken(Proc *p) 981 { 982 qlock(&broken); 983 if(broken.n == NBROKEN) { 984 ready(broken.p[0]); 985 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); 986 --broken.n; 987 } 988 broken.p[broken.n++] = p; 989 qunlock(&broken); 990 991 edfstop(up); 992 p->state = Broken; 993 p->psstate = 0; 994 sched(); 995 } 996 997 void 998 unbreak(Proc *p) 999 { 1000 int b; 1001 1002 qlock(&broken); 1003 for(b=0; b < broken.n; b++) 1004 if(broken.p[b] == p) { 1005 broken.n--; 1006 memmove(&broken.p[b], &broken.p[b+1], 1007 sizeof(Proc*)*(NBROKEN-(b+1))); 1008 ready(p); 1009 break; 1010 } 1011 qunlock(&broken); 1012 } 1013 1014 int 1015 freebroken(void) 1016 { 1017 int i, n; 1018 1019 qlock(&broken); 1020 n = broken.n; 1021 for(i=0; i<n; i++) { 1022 ready(broken.p[i]); 1023 broken.p[i] = 0; 1024 } 1025 broken.n = 0; 1026 qunlock(&broken); 1027 return n; 1028 } 1029 1030 void 1031 pexit(char *exitstr, int freemem) 1032 { 1033 Proc *p; 1034 Segment **s, **es; 1035 long utime, stime; 1036 Waitq *wq, *f, *next; 1037 Fgrp *fgrp; 1038 Egrp *egrp; 1039 Rgrp *rgrp; 1040 Pgrp *pgrp; 1041 Chan *dot; 1042 void (*pt)(Proc*, int, vlong); 1043 1044 up->alarm = 0; 1045 if (up->tt) 1046 timerdel(up); 1047 pt = proctrace; 1048 if(pt) 1049 pt(up, SDead, 0); 1050 1051 /* nil out all the resources under lock (free later) */ 1052 qlock(&up->debug); 1053 fgrp = up->fgrp; 1054 up->fgrp = nil; 1055 egrp = up->egrp; 1056 up->egrp = nil; 1057 rgrp = up->rgrp; 1058 up->rgrp = nil; 1059 pgrp = up->pgrp; 1060 up->pgrp = nil; 1061 dot = up->dot; 1062 up->dot = nil; 1063 qunlock(&up->debug); 1064 1065 if(fgrp) 1066 closefgrp(fgrp); 1067 if(egrp) 1068 closeegrp(egrp); 1069 if(rgrp) 1070 closergrp(rgrp); 1071 if(dot) 1072 cclose(dot); 1073 if(pgrp) 1074 closepgrp(pgrp); 1075 1076 /* 1077 * if not a kernel process and have a parent, 1078 * do some housekeeping. 1079 */ 1080 if(up->kp == 0) { 1081 p = up->parent; 1082 if(p == 0) { 1083 if(exitstr == 0) 1084 exitstr = "unknown"; 1085 panic("boot process died: %s", exitstr); 1086 } 1087 1088 while(waserror()) 1089 ; 1090 1091 wq = smalloc(sizeof(Waitq)); 1092 poperror(); 1093 1094 wq->w.pid = up->pid; 1095 utime = up->time[TUser] + up->time[TCUser]; 1096 stime = up->time[TSys] + up->time[TCSys]; 1097 wq->w.time[TUser] = tk2ms(utime); 1098 wq->w.time[TSys] = tk2ms(stime); 1099 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]); 1100 if(exitstr && exitstr[0]) 1101 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr); 1102 else 1103 wq->w.msg[0] = '\0'; 1104 1105 lock(&p->exl); 1106 /* 1107 * If my parent is no longer alive, or if there would be more 1108 * than 128 zombie child processes for my parent, then don't 1109 * leave a wait record behind. This helps prevent badly 1110 * written daemon processes from accumulating lots of wait 1111 * records. 1112 */ 1113 if(p->pid == up->parentpid && p->state != Broken && p->nwait < 128) { 1114 p->nchild--; 1115 p->time[TCUser] += utime; 1116 p->time[TCSys] += stime; 1117 1118 wq->next = p->waitq; 1119 p->waitq = wq; 1120 p->nwait++; 1121 1122 wakeup(&p->waitr); 1123 unlock(&p->exl); 1124 } 1125 else { 1126 unlock(&p->exl); 1127 free(wq); 1128 } 1129 } 1130 1131 if(!freemem) 1132 addbroken(up); 1133 1134 qlock(&up->seglock); 1135 es = &up->seg[NSEG]; 1136 for(s = up->seg; s < es; s++) { 1137 if(*s) { 1138 putseg(*s); 1139 *s = 0; 1140 } 1141 } 1142 qunlock(&up->seglock); 1143 1144 lock(&up->exl); /* Prevent my children from leaving waits */ 1145 pidunhash(up); 1146 up->pid = 0; 1147 wakeup(&up->waitr); 1148 unlock(&up->exl); 1149 1150 for(f = up->waitq; f; f = next) { 1151 next = f->next; 1152 free(f); 1153 } 1154 1155 /* release debuggers */ 1156 qlock(&up->debug); 1157 if(up->pdbg) { 1158 wakeup(&up->pdbg->sleep); 1159 up->pdbg = 0; 1160 } 1161 qunlock(&up->debug); 1162 1163 /* Sched must not loop for these locks */ 1164 lock(&procalloc); 1165 lock(&palloc); 1166 1167 edfstop(up); 1168 up->state = Moribund; 1169 sched(); 1170 panic("pexit"); 1171 } 1172 1173 int 1174 haswaitq(void *x) 1175 { 1176 Proc *p; 1177 1178 p = (Proc *)x; 1179 return p->waitq != 0; 1180 } 1181 1182 ulong 1183 pwait(Waitmsg *w) 1184 { 1185 ulong cpid; 1186 Waitq *wq; 1187 1188 if(!canqlock(&up->qwaitr)) 1189 error(Einuse); 1190 1191 if(waserror()) { 1192 qunlock(&up->qwaitr); 1193 nexterror(); 1194 } 1195 1196 lock(&up->exl); 1197 if(up->nchild == 0 && up->waitq == 0) { 1198 unlock(&up->exl); 1199 error(Enochild); 1200 } 1201 unlock(&up->exl); 1202 1203 sleep(&up->waitr, haswaitq, up); 1204 1205 lock(&up->exl); 1206 wq = up->waitq; 1207 up->waitq = wq->next; 1208 up->nwait--; 1209 unlock(&up->exl); 1210 1211 qunlock(&up->qwaitr); 1212 poperror(); 1213 1214 if(w) 1215 memmove(w, &wq->w, sizeof(Waitmsg)); 1216 cpid = wq->w.pid; 1217 free(wq); 1218 return cpid; 1219 } 1220 1221 Proc* 1222 proctab(int i) 1223 { 1224 return &procalloc.arena[i]; 1225 } 1226 1227 void 1228 dumpaproc(Proc *p) 1229 { 1230 ulong bss; 1231 char *s; 1232 1233 if(p == 0) 1234 return; 1235 1236 bss = 0; 1237 if(p->seg[BSEG]) 1238 bss = p->seg[BSEG]->top; 1239 1240 s = p->psstate; 1241 if(s == 0) 1242 s = statename[p->state]; 1243 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n", 1244 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], 1245 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority); 1246 } 1247 1248 void 1249 procdump(void) 1250 { 1251 int i; 1252 Proc *p; 1253 1254 if(up) 1255 print("up %lud\n", up->pid); 1256 else 1257 print("no current process\n"); 1258 for(i=0; i<conf.nproc; i++) { 1259 p = &procalloc.arena[i]; 1260 if(p->state == Dead) 1261 continue; 1262 1263 dumpaproc(p); 1264 } 1265 } 1266 1267 /* 1268 * wait till all processes have flushed their mmu 1269 * state about segement s 1270 */ 1271 void 1272 procflushseg(Segment *s) 1273 { 1274 int i, ns, nm, nwait; 1275 Proc *p; 1276 1277 /* 1278 * tell all processes with this 1279 * segment to flush their mmu's 1280 */ 1281 nwait = 0; 1282 for(i=0; i<conf.nproc; i++) { 1283 p = &procalloc.arena[i]; 1284 if(p->state == Dead) 1285 continue; 1286 for(ns = 0; ns < NSEG; ns++) 1287 if(p->seg[ns] == s){ 1288 p->newtlb = 1; 1289 for(nm = 0; nm < conf.nmach; nm++){ 1290 if(MACHP(nm)->proc == p){ 1291 MACHP(nm)->flushmmu = 1; 1292 nwait++; 1293 } 1294 } 1295 break; 1296 } 1297 } 1298 1299 if(nwait == 0) 1300 return; 1301 1302 /* 1303 * wait for all processors to take a clock interrupt 1304 * and flush their mmu's 1305 */ 1306 for(nm = 0; nm < conf.nmach; nm++) 1307 if(MACHP(nm) != m) 1308 while(MACHP(nm)->flushmmu) 1309 sched(); 1310 } 1311 1312 void 1313 scheddump(void) 1314 { 1315 Proc *p; 1316 Schedq *rq; 1317 1318 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 1319 if(rq->head == 0) 1320 continue; 1321 print("rq%ld:", rq-runq); 1322 for(p = rq->head; p; p = p->rnext) 1323 print(" %lud(%lud)", p->pid, m->ticks - p->readytime); 1324 print("\n"); 1325 delay(150); 1326 } 1327 print("nrdy %d\n", nrdy); 1328 } 1329 1330 void 1331 kproc(char *name, void (*func)(void *), void *arg) 1332 { 1333 Proc *p; 1334 static Pgrp *kpgrp; 1335 1336 p = newproc(); 1337 p->psstate = 0; 1338 p->procmode = 0640; 1339 p->kp = 1; 1340 p->noswap = 1; 1341 1342 p->fpsave = up->fpsave; 1343 p->scallnr = up->scallnr; 1344 p->s = up->s; 1345 p->nerrlab = 0; 1346 p->slash = up->slash; 1347 p->dot = up->dot; 1348 incref(p->dot); 1349 1350 memmove(p->note, up->note, sizeof(p->note)); 1351 p->nnote = up->nnote; 1352 p->notified = 0; 1353 p->lastnote = up->lastnote; 1354 p->notify = up->notify; 1355 p->ureg = 0; 1356 p->dbgreg = 0; 1357 1358 procpriority(p, PriKproc, 0); 1359 1360 kprocchild(p, func, arg); 1361 1362 kstrdup(&p->user, eve); 1363 kstrdup(&p->text, name); 1364 if(kpgrp == 0) 1365 kpgrp = newpgrp(); 1366 p->pgrp = kpgrp; 1367 incref(kpgrp); 1368 1369 memset(p->time, 0, sizeof(p->time)); 1370 p->time[TReal] = MACHP(0)->ticks; 1371 ready(p); 1372 /* 1373 * since the bss/data segments are now shareable, 1374 * any mmu info about this process is now stale 1375 * and has to be discarded. 1376 */ 1377 p->newtlb = 1; 1378 flushmmu(); 1379 } 1380 1381 /* 1382 * called splhi() by notify(). See comment in notify for the 1383 * reasoning. 1384 */ 1385 void 1386 procctl(Proc *p) 1387 { 1388 char *state; 1389 ulong s; 1390 1391 switch(p->procctl) { 1392 case Proc_exitbig: 1393 spllo(); 1394 pexit("Killed: Insufficient physical memory", 1); 1395 1396 case Proc_exitme: 1397 spllo(); /* pexit has locks in it */ 1398 pexit("Killed", 1); 1399 1400 case Proc_traceme: 1401 if(p->nnote == 0) 1402 return; 1403 /* No break */ 1404 1405 case Proc_stopme: 1406 p->procctl = 0; 1407 state = p->psstate; 1408 p->psstate = "Stopped"; 1409 /* free a waiting debugger */ 1410 s = spllo(); 1411 qlock(&p->debug); 1412 if(p->pdbg) { 1413 wakeup(&p->pdbg->sleep); 1414 p->pdbg = 0; 1415 } 1416 qunlock(&p->debug); 1417 splhi(); 1418 p->state = Stopped; 1419 sched(); 1420 p->psstate = state; 1421 splx(s); 1422 return; 1423 } 1424 } 1425 1426 #include "errstr.h" 1427 1428 void 1429 error(char *err) 1430 { 1431 spllo(); 1432 1433 assert(up->nerrlab < NERR); 1434 kstrcpy(up->errstr, err, ERRMAX); 1435 setlabel(&up->errlab[NERR-1]); 1436 nexterror(); 1437 } 1438 1439 void 1440 nexterror(void) 1441 { 1442 gotolabel(&up->errlab[--up->nerrlab]); 1443 } 1444 1445 void 1446 exhausted(char *resource) 1447 { 1448 char buf[ERRMAX]; 1449 1450 sprint(buf, "no free %s", resource); 1451 iprint("%s\n", buf); 1452 error(buf); 1453 } 1454 1455 void 1456 killbig(void) 1457 { 1458 int i; 1459 Segment *s; 1460 ulong l, max; 1461 Proc *p, *ep, *kp; 1462 1463 max = 0; 1464 kp = 0; 1465 ep = procalloc.arena+conf.nproc; 1466 for(p = procalloc.arena; p < ep; p++) { 1467 if(p->state == Dead || p->kp) 1468 continue; 1469 l = 0; 1470 for(i=1; i<NSEG; i++) { 1471 s = p->seg[i]; 1472 if(s != 0) 1473 l += s->top - s->base; 1474 } 1475 if(l > max && strcmp(p->text, "kfs") != 0){ 1476 kp = p; 1477 max = l; 1478 } 1479 } 1480 kp->procctl = Proc_exitbig; 1481 for(i = 0; i < NSEG; i++) { 1482 s = kp->seg[i]; 1483 if(s != 0 && canqlock(&s->lk)) { 1484 mfreeseg(s, s->base, (s->top - s->base)/BY2PG); 1485 qunlock(&s->lk); 1486 } 1487 } 1488 print("%lud: %s killed because no swap configured\n", kp->pid, kp->text); 1489 } 1490 1491 /* 1492 * change ownership to 'new' of all processes owned by 'old'. Used when 1493 * eve changes. 1494 */ 1495 void 1496 renameuser(char *old, char *new) 1497 { 1498 Proc *p, *ep; 1499 1500 ep = procalloc.arena+conf.nproc; 1501 for(p = procalloc.arena; p < ep; p++) 1502 if(p->user!=nil && strcmp(old, p->user)==0) 1503 kstrdup(&p->user, new); 1504 } 1505 1506 /* 1507 * time accounting called by clock() splhi'd 1508 */ 1509 void 1510 accounttime(void) 1511 { 1512 Proc *p; 1513 ulong n, per; 1514 static ulong nrun; 1515 1516 p = m->proc; 1517 if(p) { 1518 nrun++; 1519 p->time[p->insyscall]++; 1520 } 1521 1522 /* calculate decaying duty cycles */ 1523 n = perfticks(); 1524 per = n - m->perf.last; 1525 m->perf.last = n; 1526 per = (m->perf.period*(HZ-1) + per)/HZ; 1527 if(per != 0) 1528 m->perf.period = per; 1529 1530 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; 1531 m->perf.inidle = 0; 1532 1533 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; 1534 m->perf.inintr = 0; 1535 1536 /* only one processor gets to compute system load averages */ 1537 if(m->machno != 0) 1538 return; 1539 1540 /* 1541 * calculate decaying load average. 1542 * if we decay by (n-1)/n then it takes 1543 * n clock ticks to go from load L to .36 L once 1544 * things quiet down. it takes about 5 n clock 1545 * ticks to go to zero. so using HZ means this is 1546 * approximately the load over the last second, 1547 * with a tail lasting about 5 seconds. 1548 */ 1549 n = nrun; 1550 nrun = 0; 1551 n = (nrdy+n)*1000; 1552 m->load = (m->load*(HZ-1)+n)/HZ; 1553 } 1554 1555 static void 1556 pidhash(Proc *p) 1557 { 1558 int h; 1559 1560 h = p->pid % nelem(procalloc.ht); 1561 lock(&procalloc); 1562 p->pidhash = procalloc.ht[h]; 1563 procalloc.ht[h] = p; 1564 unlock(&procalloc); 1565 } 1566 1567 static void 1568 pidunhash(Proc *p) 1569 { 1570 int h; 1571 Proc **l; 1572 1573 h = p->pid % nelem(procalloc.ht); 1574 lock(&procalloc); 1575 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) 1576 if(*l == p){ 1577 *l = p->pidhash; 1578 break; 1579 } 1580 unlock(&procalloc); 1581 } 1582 1583 int 1584 procindex(ulong pid) 1585 { 1586 Proc *p; 1587 int h; 1588 int s; 1589 1590 s = -1; 1591 h = pid % nelem(procalloc.ht); 1592 lock(&procalloc); 1593 for(p = procalloc.ht[h]; p != nil; p = p->pidhash) 1594 if(p->pid == pid){ 1595 s = p - procalloc.arena; 1596 break; 1597 } 1598 unlock(&procalloc); 1599 return s; 1600 } 1601