1 #include <u.h> 2 #include "../port/lib.h" 3 #include "mem.h" 4 #include "dat.h" 5 #include "fns.h" 6 #include "../port/error.h" 7 #include "edf.h" 8 #include <trace.h> 9 10 int schedgain = 30; /* units in seconds */ 11 int nrdy; 12 Ref noteidalloc; 13 14 void updatecpu(Proc*); 15 int reprioritize(Proc*); 16 17 ulong delayedscheds; /* statistics */ 18 long skipscheds; 19 long preempts; 20 ulong load; 21 22 static Ref pidalloc; 23 24 static struct Procalloc 25 { 26 Lock; 27 Proc* ht[128]; 28 Proc* arena; 29 Proc* free; 30 } procalloc; 31 32 enum 33 { 34 Q=10, 35 DQ=4, 36 Scaling=2, 37 }; 38 39 Schedq runq[Nrq]; 40 ulong runvec; 41 42 char *statename[] = 43 { /* BUG: generate automatically */ 44 "Dead", 45 "Moribund", 46 "Ready", 47 "Scheding", 48 "Running", 49 "Queueing", 50 "QueueingR", 51 "QueueingW", 52 "Wakeme", 53 "Broken", 54 "Stopped", 55 "Rendez", 56 "Waitrelease", 57 }; 58 59 static void pidhash(Proc*); 60 static void pidunhash(Proc*); 61 static void rebalance(void); 62 63 /* 64 * Always splhi()'ed. 65 */ 66 void 67 schedinit(void) /* never returns */ 68 { 69 Edf *e; 70 71 setlabel(&m->sched); 72 if(up) { 73 if((e = up->edf) && (e->flags & Admitted)) 74 edfrecord(up); 75 m->proc = 0; 76 switch(up->state) { 77 case Running: 78 ready(up); 79 break; 80 case Moribund: 81 up->state = Dead; 82 edfstop(up); 83 if (up->edf) 84 free(up->edf); 85 up->edf = nil; 86 87 /* 88 * Holding locks from pexit: 89 * procalloc 90 * palloc 91 */ 92 mmurelease(up); 93 94 up->qnext = procalloc.free; 95 procalloc.free = up; 96 97 unlock(&palloc); 98 unlock(&procalloc); 99 break; 100 } 101 up->mach = nil; 102 updatecpu(up); 103 up = nil; 104 } 105 sched(); 106 } 107 108 /* 109 * If changing this routine, look also at sleep(). It 110 * contains a copy of the guts of sched(). 111 */ 112 void 113 sched(void) 114 { 115 Proc *p; 116 117 if(m->ilockdepth) 118 panic("ilockdepth %d, last lock 0x%p at 0x%lux, sched called from 0x%lux", 119 m->ilockdepth, up?up->lastilock:nil, 120 (up && up->lastilock)?up->lastilock->pc:0, 121 getcallerpc(&p+2)); 122 123 if(up){ 124 /* 125 * Delay the sched until the process gives up the locks 126 * it is holding. This avoids dumb lock loops. 127 * Don't delay if the process is Moribund. 128 * It called sched to die. 129 * But do sched eventually. This avoids a missing unlock 130 * from hanging the entire kernel. 131 * But don't reschedule procs holding palloc or procalloc. 132 * Those are far too important to be holding while asleep. 133 * 134 * This test is not exact. There can still be a few instructions 135 * in the middle of taslock when a process holds a lock 136 * but Lock.p has not yet been initialized. 137 */ 138 if(up->nlocks.ref) 139 if(up->state != Moribund) 140 if(up->delaysched < 20 141 || palloc.Lock.p == up 142 || procalloc.Lock.p == up){ 143 up->delaysched++; 144 delayedscheds++; 145 return; 146 } 147 up->delaysched = 0; 148 149 splhi(); 150 151 /* statistics */ 152 m->cs++; 153 154 procsave(up); 155 if(setlabel(&up->sched)){ 156 procrestore(up); 157 spllo(); 158 return; 159 } 160 gotolabel(&m->sched); 161 } 162 p = runproc(); 163 if(!p->edf){ 164 updatecpu(p); 165 p->priority = reprioritize(p); 166 } 167 if(p != m->readied) 168 m->schedticks = m->ticks + HZ/10; 169 m->readied = 0; 170 up = p; 171 up->state = Running; 172 up->mach = MACHP(m->machno); 173 m->proc = up; 174 mmuswitch(up); 175 gotolabel(&up->sched); 176 } 177 178 int 179 anyready(void) 180 { 181 return runvec; 182 } 183 184 int 185 anyhigher(void) 186 { 187 return runvec & ~((1<<(up->priority+1))-1); 188 } 189 190 /* 191 * here once per clock tick to see if we should resched 192 */ 193 void 194 hzsched(void) 195 { 196 /* once a second, rebalance will reprioritize ready procs */ 197 if(m->machno == 0) 198 rebalance(); 199 200 /* unless preempted, get to run for at least 100ms */ 201 if(anyhigher() 202 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){ 203 m->readied = nil; /* avoid cooperative scheduling */ 204 up->delaysched++; 205 } 206 } 207 208 /* 209 * here at the end of non-clock interrupts to see if we should preempt the 210 * current process. Returns 1 if preempted, 0 otherwise. 211 */ 212 int 213 preempted(void) 214 { 215 if(up && up->state == Running) 216 if(up->preempted == 0) 217 if(anyhigher()) 218 if(!active.exiting){ 219 m->readied = nil; /* avoid cooperative scheduling */ 220 up->preempted = 1; 221 sched(); 222 splhi(); 223 up->preempted = 0; 224 return 1; 225 } 226 return 0; 227 } 228 229 /* 230 * Update the cpu time average for this particular process, 231 * which is about to change from up -> not up or vice versa. 232 * p->lastupdate is the last time an updatecpu happened. 233 * 234 * The cpu time average is a decaying average that lasts 235 * about D clock ticks. D is chosen to be approximately 236 * the cpu time of a cpu-intensive "quick job". A job has to run 237 * for approximately D clock ticks before we home in on its 238 * actual cpu usage. Thus if you manage to get in and get out 239 * quickly, you won't be penalized during your burst. Once you 240 * start using your share of the cpu for more than about D 241 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 242 * below all the other quick jobs. Interactive tasks, because 243 * they basically always use less than their fair share of cpu, 244 * will be rewarded. 245 * 246 * If the process has not been running, then we want to 247 * apply the filter 248 * 249 * cpu = cpu * (D-1)/D 250 * 251 * n times, yielding 252 * 253 * cpu = cpu * ((D-1)/D)^n 254 * 255 * but D is big enough that this is approximately 256 * 257 * cpu = cpu * (D-n)/D 258 * 259 * so we use that instead. 260 * 261 * If the process has been running, we apply the filter to 262 * 1 - cpu, yielding a similar equation. Note that cpu is 263 * stored in fixed point (* 1000). 264 * 265 * Updatecpu must be called before changing up, in order 266 * to maintain accurate cpu usage statistics. It can be called 267 * at any time to bring the stats for a given proc up-to-date. 268 */ 269 void 270 updatecpu(Proc *p) 271 { 272 int n, t, ocpu; 273 int D = schedgain*HZ*Scaling; 274 275 if(p->edf) 276 return; 277 278 t = MACHP(0)->ticks*Scaling + Scaling/2; 279 n = t - p->lastupdate; 280 p->lastupdate = t; 281 282 if(n == 0) 283 return; 284 if(n > D) 285 n = D; 286 287 ocpu = p->cpu; 288 if(p != up) 289 p->cpu = (ocpu*(D-n))/D; 290 else{ 291 t = 1000 - ocpu; 292 t = (t*(D-n))/D; 293 p->cpu = 1000 - t; 294 } 295 296 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); 297 } 298 299 /* 300 * On average, p has used p->cpu of a cpu recently. 301 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting 302 * too much, penalize it. If it has been getting not enough, reward it. 303 * I don't think you can get much more than your fair share that 304 * often, so most of the queues are for using less. Having a priority 305 * of 3 means you're just right. Having a higher priority (up to p->basepri) 306 * means you're not using as much as you could. 307 */ 308 int 309 reprioritize(Proc *p) 310 { 311 int fairshare, n, load, ratio; 312 313 load = MACHP(0)->load; 314 if(load == 0) 315 return p->basepri; 316 317 /* 318 * fairshare = 1.000 * conf.nproc * 1.000/load, 319 * except the decimal point is moved three places 320 * on both load and fairshare. 321 */ 322 fairshare = (conf.nmach*1000*1000)/load; 323 n = p->cpu; 324 if(n == 0) 325 n = 1; 326 ratio = (fairshare+n/2) / n; 327 if(ratio > p->basepri) 328 ratio = p->basepri; 329 if(ratio < 0) 330 panic("reprioritize"); 331 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); 332 return ratio; 333 } 334 335 /* 336 * add a process to a scheduling queue 337 */ 338 void 339 queueproc(Schedq *rq, Proc *p) 340 { 341 int pri; 342 343 pri = rq - runq; 344 lock(runq); 345 p->priority = pri; 346 p->rnext = 0; 347 if(rq->tail) 348 rq->tail->rnext = p; 349 else 350 rq->head = p; 351 rq->tail = p; 352 rq->n++; 353 nrdy++; 354 runvec |= 1<<pri; 355 unlock(runq); 356 } 357 358 /* 359 * try to remove a process from a scheduling queue (called splhi) 360 */ 361 Proc* 362 dequeueproc(Schedq *rq, Proc *tp) 363 { 364 Proc *l, *p; 365 366 if(!canlock(runq)) 367 return nil; 368 369 /* 370 * the queue may have changed before we locked runq, 371 * refind the target process. 372 */ 373 l = 0; 374 for(p = rq->head; p; p = p->rnext){ 375 if(p == tp) 376 break; 377 l = p; 378 } 379 380 /* 381 * p->mach==0 only when process state is saved 382 */ 383 if(p == 0 || p->mach){ 384 unlock(runq); 385 return nil; 386 } 387 if(p->rnext == 0) 388 rq->tail = l; 389 if(l) 390 l->rnext = p->rnext; 391 else 392 rq->head = p->rnext; 393 if(rq->head == nil) 394 runvec &= ~(1<<(rq-runq)); 395 rq->n--; 396 nrdy--; 397 if(p->state != Ready) 398 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]); 399 400 unlock(runq); 401 return p; 402 } 403 404 /* 405 * ready(p) picks a new priority for a process and sticks it in the 406 * runq for that priority. 407 */ 408 void 409 ready(Proc *p) 410 { 411 int s, pri; 412 Schedq *rq; 413 void (*pt)(Proc*, int, vlong); 414 415 s = splhi(); 416 if(edfready(p)){ 417 splx(s); 418 return; 419 } 420 421 if(up != p) 422 m->readied = p; /* group scheduling */ 423 424 updatecpu(p); 425 pri = reprioritize(p); 426 p->priority = pri; 427 rq = &runq[pri]; 428 p->state = Ready; 429 queueproc(rq, p); 430 pt = proctrace; 431 if(pt) 432 pt(p, SReady, 0); 433 splx(s); 434 } 435 436 /* 437 * yield the processor and drop our priority 438 */ 439 void 440 yield(void) 441 { 442 if(anyready()){ 443 /* pretend we just used 1/2 tick */ 444 up->lastupdate -= Scaling/2; 445 sched(); 446 } 447 } 448 449 /* 450 * recalculate priorities once a second. We need to do this 451 * since priorities will otherwise only be recalculated when 452 * the running process blocks. 453 */ 454 ulong balancetime; 455 456 static void 457 rebalance(void) 458 { 459 int pri, npri, t, x; 460 Schedq *rq; 461 Proc *p; 462 463 t = m->ticks; 464 if(t - balancetime < HZ) 465 return; 466 balancetime = t; 467 468 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){ 469 another: 470 p = rq->head; 471 if(p == nil) 472 continue; 473 if(p->mp != MACHP(m->machno)) 474 continue; 475 if(pri == p->basepri) 476 continue; 477 updatecpu(p); 478 npri = reprioritize(p); 479 if(npri != pri){ 480 x = splhi(); 481 p = dequeueproc(rq, p); 482 if(p) 483 queueproc(&runq[npri], p); 484 splx(x); 485 goto another; 486 } 487 } 488 } 489 490 491 /* 492 * pick a process to run 493 */ 494 Proc* 495 runproc(void) 496 { 497 Schedq *rq; 498 Proc *p; 499 ulong start, now; 500 int i; 501 void (*pt)(Proc*, int, vlong); 502 503 start = perfticks(); 504 505 /* cooperative scheduling until the clock ticks */ 506 if((p=m->readied) && p->mach==0 && p->state==Ready 507 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ 508 skipscheds++; 509 rq = &runq[p->priority]; 510 goto found; 511 } 512 513 preempts++; 514 515 loop: 516 /* 517 * find a process that last ran on this processor (affinity), 518 * or one that hasn't moved in a while (load balancing). Every 519 * time around the loop affinity goes down. 520 */ 521 spllo(); 522 for(i = 0;; i++){ 523 /* 524 * find the highest priority target process that this 525 * processor can run given affinity constraints. 526 * 527 */ 528 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 529 for(p = rq->head; p; p = p->rnext){ 530 if(p->mp == nil || p->mp == MACHP(m->machno) 531 || (!p->wired && i > 0)) 532 goto found; 533 } 534 } 535 536 /* waste time or halt the CPU */ 537 idlehands(); 538 539 /* remember how much time we're here */ 540 now = perfticks(); 541 m->perf.inidle += now-start; 542 start = now; 543 } 544 545 found: 546 splhi(); 547 p = dequeueproc(rq, p); 548 if(p == nil) 549 goto loop; 550 551 p->state = Scheding; 552 p->mp = MACHP(m->machno); 553 554 if(edflock(p)){ 555 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ 556 edfunlock(); 557 } 558 pt = proctrace; 559 if(pt) 560 pt(p, SRun, 0); 561 return p; 562 } 563 564 int 565 canpage(Proc *p) 566 { 567 int ok = 0; 568 569 splhi(); 570 lock(runq); 571 /* Only reliable way to see if we are Running */ 572 if(p->mach == 0) { 573 p->newtlb = 1; 574 ok = 1; 575 } 576 unlock(runq); 577 spllo(); 578 579 return ok; 580 } 581 582 Proc* 583 newproc(void) 584 { 585 Proc *p; 586 587 lock(&procalloc); 588 for(;;) { 589 if(p = procalloc.free) 590 break; 591 592 unlock(&procalloc); 593 resrcwait("no procs"); 594 lock(&procalloc); 595 } 596 procalloc.free = p->qnext; 597 unlock(&procalloc); 598 599 p->state = Scheding; 600 p->psstate = "New"; 601 p->mach = 0; 602 p->qnext = 0; 603 p->nchild = 0; 604 p->nwait = 0; 605 p->waitq = 0; 606 p->parent = 0; 607 p->pgrp = 0; 608 p->egrp = 0; 609 p->fgrp = 0; 610 p->rgrp = 0; 611 p->pdbg = 0; 612 p->fpstate = FPinit; 613 p->kp = 0; 614 p->procctl = 0; 615 p->notepending = 0; 616 p->ureg = 0; 617 p->privatemem = 0; 618 p->noswap = 0; 619 p->errstr = p->errbuf0; 620 p->syserrstr = p->errbuf1; 621 p->errbuf0[0] = '\0'; 622 p->errbuf1[0] = '\0'; 623 p->nlocks.ref = 0; 624 p->delaysched = 0; 625 p->trace = 0; 626 kstrdup(&p->user, "*nouser"); 627 kstrdup(&p->text, "*notext"); 628 kstrdup(&p->args, ""); 629 p->nargs = 0; 630 p->setargs = 0; 631 memset(p->seg, 0, sizeof p->seg); 632 p->pid = incref(&pidalloc); 633 pidhash(p); 634 p->noteid = incref(¬eidalloc); 635 if(p->pid==0 || p->noteid==0) 636 panic("pidalloc"); 637 if(p->kstack == 0) 638 p->kstack = smalloc(KSTACK); 639 640 /* sched params */ 641 p->mp = 0; 642 p->wired = 0; 643 procpriority(p, PriNormal, 0); 644 p->cpu = 0; 645 p->lastupdate = MACHP(0)->ticks*Scaling; 646 p->edf = nil; 647 648 return p; 649 } 650 651 /* 652 * wire this proc to a machine 653 */ 654 void 655 procwired(Proc *p, int bm) 656 { 657 Proc *pp; 658 int i; 659 char nwired[MAXMACH]; 660 Mach *wm; 661 662 if(bm < 0){ 663 /* pick a machine to wire to */ 664 memset(nwired, 0, sizeof(nwired)); 665 p->wired = 0; 666 pp = proctab(0); 667 for(i=0; i<conf.nproc; i++, pp++){ 668 wm = pp->wired; 669 if(wm && pp->pid) 670 nwired[wm->machno]++; 671 } 672 bm = 0; 673 for(i=0; i<conf.nmach; i++) 674 if(nwired[i] < nwired[bm]) 675 bm = i; 676 } else { 677 /* use the virtual machine requested */ 678 bm = bm % conf.nmach; 679 } 680 681 p->wired = MACHP(bm); 682 p->mp = p->wired; 683 } 684 685 void 686 procpriority(Proc *p, int pri, int fixed) 687 { 688 if(pri >= Npriq) 689 pri = Npriq - 1; 690 else if(pri < 0) 691 pri = 0; 692 p->basepri = pri; 693 p->priority = pri; 694 if(fixed){ 695 p->fixedpri = 1; 696 } else { 697 p->fixedpri = 0; 698 } 699 } 700 701 void 702 procinit0(void) /* bad planning - clashes with devproc.c */ 703 { 704 Proc *p; 705 int i; 706 707 procalloc.free = xalloc(conf.nproc*sizeof(Proc)); 708 if(procalloc.free == nil){ 709 xsummary(); 710 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024)); 711 } 712 procalloc.arena = procalloc.free; 713 714 p = procalloc.free; 715 for(i=0; i<conf.nproc-1; i++,p++) 716 p->qnext = p+1; 717 p->qnext = 0; 718 } 719 720 /* 721 * sleep if a condition is not true. Another process will 722 * awaken us after it sets the condition. When we awaken 723 * the condition may no longer be true. 724 * 725 * we lock both the process and the rendezvous to keep r->p 726 * and p->r synchronized. 727 */ 728 void 729 sleep(Rendez *r, int (*f)(void*), void *arg) 730 { 731 int s; 732 void (*pt)(Proc*, int, vlong); 733 734 s = splhi(); 735 736 if(up->nlocks.ref) 737 print("process %lud sleeps with %lud locks held, last lock 0x%p locked at pc 0x%lux, sleep called from 0x%lux\n", 738 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r)); 739 lock(r); 740 lock(&up->rlock); 741 if(r->p){ 742 print("double sleep called from 0x%lux, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid); 743 dumpstack(); 744 } 745 746 /* 747 * Wakeup only knows there may be something to do by testing 748 * r->p in order to get something to lock on. 749 * Flush that information out to memory in case the sleep is 750 * committed. 751 */ 752 r->p = up; 753 754 if((*f)(arg) || up->notepending){ 755 /* 756 * if condition happened or a note is pending 757 * never mind 758 */ 759 r->p = nil; 760 unlock(&up->rlock); 761 unlock(r); 762 } else { 763 /* 764 * now we are committed to 765 * change state and call scheduler 766 */ 767 pt = proctrace; 768 if(pt) 769 pt(up, SSleep, 0); 770 up->state = Wakeme; 771 up->r = r; 772 773 /* statistics */ 774 m->cs++; 775 776 procsave(up); 777 if(setlabel(&up->sched)) { 778 /* 779 * here when the process is awakened 780 */ 781 procrestore(up); 782 spllo(); 783 } else { 784 /* 785 * here to go to sleep (i.e. stop Running) 786 */ 787 unlock(&up->rlock); 788 unlock(r); 789 gotolabel(&m->sched); 790 } 791 } 792 793 if(up->notepending) { 794 up->notepending = 0; 795 splx(s); 796 if(up->procctl == Proc_exitme && up->closingfgrp) 797 forceclosefgrp(); 798 error(Eintr); 799 } 800 801 splx(s); 802 } 803 804 static int 805 tfn(void *arg) 806 { 807 return up->trend == nil || up->tfn(arg); 808 } 809 810 void 811 twakeup(Ureg*, Timer *t) 812 { 813 Proc *p; 814 Rendez *trend; 815 816 p = t->ta; 817 trend = p->trend; 818 p->trend = 0; 819 if(trend) 820 wakeup(trend); 821 } 822 823 void 824 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) 825 { 826 if (up->tt){ 827 print("tsleep: timer active: mode %d, tf 0x%lux\n", up->tmode, up->tf); 828 timerdel(up); 829 } 830 up->tns = MS2NS(ms); 831 up->tf = twakeup; 832 up->tmode = Trelative; 833 up->ta = up; 834 up->trend = r; 835 up->tfn = fn; 836 timeradd(up); 837 838 if(waserror()){ 839 timerdel(up); 840 nexterror(); 841 } 842 sleep(r, tfn, arg); 843 if (up->tt) 844 timerdel(up); 845 up->twhen = 0; 846 poperror(); 847 } 848 849 /* 850 * Expects that only one process can call wakeup for any given Rendez. 851 * We hold both locks to ensure that r->p and p->r remain consistent. 852 * Richard Miller has a better solution that doesn't require both to 853 * be held simultaneously, but I'm a paranoid - presotto. 854 */ 855 Proc* 856 wakeup(Rendez *r) 857 { 858 Proc *p; 859 int s; 860 861 s = splhi(); 862 863 lock(r); 864 p = r->p; 865 866 if(p != nil){ 867 lock(&p->rlock); 868 if(p->state != Wakeme || p->r != r){ 869 iprint("%p %p %d\n", p->r, r, p->state); 870 panic("wakeup: state"); 871 } 872 r->p = nil; 873 p->r = nil; 874 ready(p); 875 unlock(&p->rlock); 876 } 877 unlock(r); 878 879 splx(s); 880 881 return p; 882 } 883 884 /* 885 * if waking a sleeping process, this routine must hold both 886 * p->rlock and r->lock. However, it can't know them in 887 * the same order as wakeup causing a possible lock ordering 888 * deadlock. We break the deadlock by giving up the p->rlock 889 * lock if we can't get the r->lock and retrying. 890 */ 891 int 892 postnote(Proc *p, int dolock, char *n, int flag) 893 { 894 int s, ret; 895 Rendez *r; 896 Proc *d, **l; 897 898 if(dolock) 899 qlock(&p->debug); 900 901 if(flag != NUser && (p->notify == 0 || p->notified)) 902 p->nnote = 0; 903 904 ret = 0; 905 if(p->nnote < NNOTE) { 906 strcpy(p->note[p->nnote].msg, n); 907 p->note[p->nnote++].flag = flag; 908 ret = 1; 909 } 910 p->notepending = 1; 911 if(dolock) 912 qunlock(&p->debug); 913 914 /* this loop is to avoid lock ordering problems. */ 915 for(;;){ 916 s = splhi(); 917 lock(&p->rlock); 918 r = p->r; 919 920 /* waiting for a wakeup? */ 921 if(r == nil) 922 break; /* no */ 923 924 /* try for the second lock */ 925 if(canlock(r)){ 926 if(p->state != Wakeme || r->p != p) 927 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); 928 p->r = nil; 929 r->p = nil; 930 ready(p); 931 unlock(r); 932 break; 933 } 934 935 /* give other process time to get out of critical section and try again */ 936 unlock(&p->rlock); 937 splx(s); 938 sched(); 939 } 940 unlock(&p->rlock); 941 splx(s); 942 943 if(p->state != Rendezvous) 944 return ret; 945 946 /* Try and pull out of a rendezvous */ 947 lock(p->rgrp); 948 if(p->state == Rendezvous) { 949 p->rendval = ~0; 950 l = &REND(p->rgrp, p->rendtag); 951 for(d = *l; d; d = d->rendhash) { 952 if(d == p) { 953 *l = p->rendhash; 954 break; 955 } 956 l = &d->rendhash; 957 } 958 ready(p); 959 } 960 unlock(p->rgrp); 961 return ret; 962 } 963 964 /* 965 * weird thing: keep at most NBROKEN around 966 */ 967 #define NBROKEN 4 968 struct 969 { 970 QLock; 971 int n; 972 Proc *p[NBROKEN]; 973 }broken; 974 975 void 976 addbroken(Proc *p) 977 { 978 qlock(&broken); 979 if(broken.n == NBROKEN) { 980 ready(broken.p[0]); 981 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); 982 --broken.n; 983 } 984 broken.p[broken.n++] = p; 985 qunlock(&broken); 986 987 edfstop(up); 988 p->state = Broken; 989 p->psstate = 0; 990 sched(); 991 } 992 993 void 994 unbreak(Proc *p) 995 { 996 int b; 997 998 qlock(&broken); 999 for(b=0; b < broken.n; b++) 1000 if(broken.p[b] == p) { 1001 broken.n--; 1002 memmove(&broken.p[b], &broken.p[b+1], 1003 sizeof(Proc*)*(NBROKEN-(b+1))); 1004 ready(p); 1005 break; 1006 } 1007 qunlock(&broken); 1008 } 1009 1010 int 1011 freebroken(void) 1012 { 1013 int i, n; 1014 1015 qlock(&broken); 1016 n = broken.n; 1017 for(i=0; i<n; i++) { 1018 ready(broken.p[i]); 1019 broken.p[i] = 0; 1020 } 1021 broken.n = 0; 1022 qunlock(&broken); 1023 return n; 1024 } 1025 1026 void 1027 pexit(char *exitstr, int freemem) 1028 { 1029 Proc *p; 1030 Segment **s, **es; 1031 long utime, stime; 1032 Waitq *wq, *f, *next; 1033 Fgrp *fgrp; 1034 Egrp *egrp; 1035 Rgrp *rgrp; 1036 Pgrp *pgrp; 1037 Chan *dot; 1038 void (*pt)(Proc*, int, vlong); 1039 1040 up->alarm = 0; 1041 if (up->tt) 1042 timerdel(up); 1043 pt = proctrace; 1044 if(pt) 1045 pt(up, SDead, 0); 1046 1047 /* nil out all the resources under lock (free later) */ 1048 qlock(&up->debug); 1049 fgrp = up->fgrp; 1050 up->fgrp = nil; 1051 egrp = up->egrp; 1052 up->egrp = nil; 1053 rgrp = up->rgrp; 1054 up->rgrp = nil; 1055 pgrp = up->pgrp; 1056 up->pgrp = nil; 1057 dot = up->dot; 1058 up->dot = nil; 1059 qunlock(&up->debug); 1060 1061 if(fgrp) 1062 closefgrp(fgrp); 1063 if(egrp) 1064 closeegrp(egrp); 1065 if(rgrp) 1066 closergrp(rgrp); 1067 if(dot) 1068 cclose(dot); 1069 if(pgrp) 1070 closepgrp(pgrp); 1071 1072 /* 1073 * if not a kernel process and have a parent, 1074 * do some housekeeping. 1075 */ 1076 if(up->kp == 0) { 1077 p = up->parent; 1078 if(p == 0) { 1079 if(exitstr == 0) 1080 exitstr = "unknown"; 1081 panic("boot process died: %s", exitstr); 1082 } 1083 1084 while(waserror()) 1085 ; 1086 1087 wq = smalloc(sizeof(Waitq)); 1088 poperror(); 1089 1090 wq->w.pid = up->pid; 1091 utime = up->time[TUser] + up->time[TCUser]; 1092 stime = up->time[TSys] + up->time[TCSys]; 1093 wq->w.time[TUser] = tk2ms(utime); 1094 wq->w.time[TSys] = tk2ms(stime); 1095 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]); 1096 if(exitstr && exitstr[0]) 1097 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr); 1098 else 1099 wq->w.msg[0] = '\0'; 1100 1101 lock(&p->exl); 1102 /* 1103 * Check that parent is still alive. 1104 */ 1105 if(p->pid == up->parentpid && p->state != Broken) { 1106 p->nchild--; 1107 p->time[TCUser] += utime; 1108 p->time[TCSys] += stime; 1109 /* 1110 * If there would be more than 128 wait records 1111 * processes for my parent, then don't leave a wait 1112 * record behind. This helps prevent badly written 1113 * daemon processes from accumulating lots of wait 1114 * records. 1115 */ 1116 if(p->nwait < 128) { 1117 wq->next = p->waitq; 1118 p->waitq = wq; 1119 p->nwait++; 1120 wq = nil; 1121 wakeup(&p->waitr); 1122 } 1123 } 1124 unlock(&p->exl); 1125 if(wq) 1126 free(wq); 1127 } 1128 1129 if(!freemem) 1130 addbroken(up); 1131 1132 qlock(&up->seglock); 1133 es = &up->seg[NSEG]; 1134 for(s = up->seg; s < es; s++) { 1135 if(*s) { 1136 putseg(*s); 1137 *s = 0; 1138 } 1139 } 1140 qunlock(&up->seglock); 1141 1142 lock(&up->exl); /* Prevent my children from leaving waits */ 1143 pidunhash(up); 1144 up->pid = 0; 1145 wakeup(&up->waitr); 1146 unlock(&up->exl); 1147 1148 for(f = up->waitq; f; f = next) { 1149 next = f->next; 1150 free(f); 1151 } 1152 1153 /* release debuggers */ 1154 qlock(&up->debug); 1155 if(up->pdbg) { 1156 wakeup(&up->pdbg->sleep); 1157 up->pdbg = 0; 1158 } 1159 qunlock(&up->debug); 1160 1161 /* Sched must not loop for these locks */ 1162 lock(&procalloc); 1163 lock(&palloc); 1164 1165 edfstop(up); 1166 up->state = Moribund; 1167 sched(); 1168 panic("pexit"); 1169 } 1170 1171 int 1172 haswaitq(void *x) 1173 { 1174 Proc *p; 1175 1176 p = (Proc *)x; 1177 return p->waitq != 0; 1178 } 1179 1180 ulong 1181 pwait(Waitmsg *w) 1182 { 1183 ulong cpid; 1184 Waitq *wq; 1185 1186 if(!canqlock(&up->qwaitr)) 1187 error(Einuse); 1188 1189 if(waserror()) { 1190 qunlock(&up->qwaitr); 1191 nexterror(); 1192 } 1193 1194 lock(&up->exl); 1195 if(up->nchild == 0 && up->waitq == 0) { 1196 unlock(&up->exl); 1197 error(Enochild); 1198 } 1199 unlock(&up->exl); 1200 1201 sleep(&up->waitr, haswaitq, up); 1202 1203 lock(&up->exl); 1204 wq = up->waitq; 1205 up->waitq = wq->next; 1206 up->nwait--; 1207 unlock(&up->exl); 1208 1209 qunlock(&up->qwaitr); 1210 poperror(); 1211 1212 if(w) 1213 memmove(w, &wq->w, sizeof(Waitmsg)); 1214 cpid = wq->w.pid; 1215 free(wq); 1216 return cpid; 1217 } 1218 1219 Proc* 1220 proctab(int i) 1221 { 1222 return &procalloc.arena[i]; 1223 } 1224 1225 void 1226 dumpaproc(Proc *p) 1227 { 1228 ulong bss; 1229 char *s; 1230 1231 if(p == 0) 1232 return; 1233 1234 bss = 0; 1235 if(p->seg[BSEG]) 1236 bss = p->seg[BSEG]->top; 1237 1238 s = p->psstate; 1239 if(s == 0) 1240 s = statename[p->state]; 1241 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n", 1242 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], 1243 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority); 1244 } 1245 1246 void 1247 procdump(void) 1248 { 1249 int i; 1250 Proc *p; 1251 1252 if(up) 1253 print("up %lud\n", up->pid); 1254 else 1255 print("no current process\n"); 1256 for(i=0; i<conf.nproc; i++) { 1257 p = &procalloc.arena[i]; 1258 if(p->state == Dead) 1259 continue; 1260 1261 dumpaproc(p); 1262 } 1263 } 1264 1265 /* 1266 * wait till all processes have flushed their mmu 1267 * state about segement s 1268 */ 1269 void 1270 procflushseg(Segment *s) 1271 { 1272 int i, ns, nm, nwait; 1273 Proc *p; 1274 1275 /* 1276 * tell all processes with this 1277 * segment to flush their mmu's 1278 */ 1279 nwait = 0; 1280 for(i=0; i<conf.nproc; i++) { 1281 p = &procalloc.arena[i]; 1282 if(p->state == Dead) 1283 continue; 1284 for(ns = 0; ns < NSEG; ns++) 1285 if(p->seg[ns] == s){ 1286 p->newtlb = 1; 1287 for(nm = 0; nm < conf.nmach; nm++){ 1288 if(MACHP(nm)->proc == p){ 1289 MACHP(nm)->flushmmu = 1; 1290 nwait++; 1291 } 1292 } 1293 break; 1294 } 1295 } 1296 1297 if(nwait == 0) 1298 return; 1299 1300 /* 1301 * wait for all processors to take a clock interrupt 1302 * and flush their mmu's 1303 */ 1304 for(nm = 0; nm < conf.nmach; nm++) 1305 if(MACHP(nm) != m) 1306 while(MACHP(nm)->flushmmu) 1307 sched(); 1308 } 1309 1310 void 1311 scheddump(void) 1312 { 1313 Proc *p; 1314 Schedq *rq; 1315 1316 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 1317 if(rq->head == 0) 1318 continue; 1319 print("rq%ld:", rq-runq); 1320 for(p = rq->head; p; p = p->rnext) 1321 print(" %lud(%lud)", p->pid, m->ticks - p->readytime); 1322 print("\n"); 1323 delay(150); 1324 } 1325 print("nrdy %d\n", nrdy); 1326 } 1327 1328 void 1329 kproc(char *name, void (*func)(void *), void *arg) 1330 { 1331 Proc *p; 1332 static Pgrp *kpgrp; 1333 1334 p = newproc(); 1335 p->psstate = 0; 1336 p->procmode = 0640; 1337 p->kp = 1; 1338 p->noswap = 1; 1339 1340 p->fpsave = up->fpsave; 1341 p->scallnr = up->scallnr; 1342 p->s = up->s; 1343 p->nerrlab = 0; 1344 p->slash = up->slash; 1345 p->dot = up->dot; 1346 if(p->dot) 1347 incref(p->dot); 1348 1349 memmove(p->note, up->note, sizeof(p->note)); 1350 p->nnote = up->nnote; 1351 p->notified = 0; 1352 p->lastnote = up->lastnote; 1353 p->notify = up->notify; 1354 p->ureg = 0; 1355 p->dbgreg = 0; 1356 1357 procpriority(p, PriKproc, 0); 1358 1359 kprocchild(p, func, arg); 1360 1361 kstrdup(&p->user, eve); 1362 kstrdup(&p->text, name); 1363 if(kpgrp == 0) 1364 kpgrp = newpgrp(); 1365 p->pgrp = kpgrp; 1366 incref(kpgrp); 1367 1368 memset(p->time, 0, sizeof(p->time)); 1369 p->time[TReal] = MACHP(0)->ticks; 1370 ready(p); 1371 /* 1372 * since the bss/data segments are now shareable, 1373 * any mmu info about this process is now stale 1374 * and has to be discarded. 1375 */ 1376 p->newtlb = 1; 1377 flushmmu(); 1378 } 1379 1380 /* 1381 * called splhi() by notify(). See comment in notify for the 1382 * reasoning. 1383 */ 1384 void 1385 procctl(Proc *p) 1386 { 1387 char *state; 1388 ulong s; 1389 1390 switch(p->procctl) { 1391 case Proc_exitbig: 1392 spllo(); 1393 pexit("Killed: Insufficient physical memory", 1); 1394 1395 case Proc_exitme: 1396 spllo(); /* pexit has locks in it */ 1397 pexit("Killed", 1); 1398 1399 case Proc_traceme: 1400 if(p->nnote == 0) 1401 return; 1402 /* No break */ 1403 1404 case Proc_stopme: 1405 p->procctl = 0; 1406 state = p->psstate; 1407 p->psstate = "Stopped"; 1408 /* free a waiting debugger */ 1409 s = spllo(); 1410 qlock(&p->debug); 1411 if(p->pdbg) { 1412 wakeup(&p->pdbg->sleep); 1413 p->pdbg = 0; 1414 } 1415 qunlock(&p->debug); 1416 splhi(); 1417 p->state = Stopped; 1418 sched(); 1419 p->psstate = state; 1420 splx(s); 1421 return; 1422 } 1423 } 1424 1425 #include "errstr.h" 1426 1427 void 1428 error(char *err) 1429 { 1430 spllo(); 1431 1432 assert(up->nerrlab < NERR); 1433 kstrcpy(up->errstr, err, ERRMAX); 1434 setlabel(&up->errlab[NERR-1]); 1435 nexterror(); 1436 } 1437 1438 void 1439 nexterror(void) 1440 { 1441 gotolabel(&up->errlab[--up->nerrlab]); 1442 } 1443 1444 void 1445 exhausted(char *resource) 1446 { 1447 char buf[ERRMAX]; 1448 1449 sprint(buf, "no free %s", resource); 1450 iprint("%s\n", buf); 1451 error(buf); 1452 } 1453 1454 void 1455 killbig(char *why) 1456 { 1457 int i; 1458 Segment *s; 1459 ulong l, max; 1460 Proc *p, *ep, *kp; 1461 1462 max = 0; 1463 kp = 0; 1464 ep = procalloc.arena+conf.nproc; 1465 for(p = procalloc.arena; p < ep; p++) { 1466 if(p->state == Dead || p->kp) 1467 continue; 1468 l = 0; 1469 for(i=1; i<NSEG; i++) { 1470 s = p->seg[i]; 1471 if(s != 0) 1472 l += s->top - s->base; 1473 } 1474 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) { 1475 kp = p; 1476 max = l; 1477 } 1478 } 1479 1480 print("%lud: %s killed: %s\n", kp->pid, kp->text, why); 1481 for(p = procalloc.arena; p < ep; p++) { 1482 if(p->state == Dead || p->kp) 1483 continue; 1484 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG]) 1485 p->procctl = Proc_exitbig; 1486 } 1487 kp->procctl = Proc_exitbig; 1488 for(i = 0; i < NSEG; i++) { 1489 s = kp->seg[i]; 1490 if(s != 0 && canqlock(&s->lk)) { 1491 mfreeseg(s, s->base, (s->top - s->base)/BY2PG); 1492 qunlock(&s->lk); 1493 } 1494 } 1495 } 1496 1497 /* 1498 * change ownership to 'new' of all processes owned by 'old'. Used when 1499 * eve changes. 1500 */ 1501 void 1502 renameuser(char *old, char *new) 1503 { 1504 Proc *p, *ep; 1505 1506 ep = procalloc.arena+conf.nproc; 1507 for(p = procalloc.arena; p < ep; p++) 1508 if(p->user!=nil && strcmp(old, p->user)==0) 1509 kstrdup(&p->user, new); 1510 } 1511 1512 /* 1513 * time accounting called by clock() splhi'd 1514 */ 1515 void 1516 accounttime(void) 1517 { 1518 Proc *p; 1519 ulong n, per; 1520 static ulong nrun; 1521 1522 p = m->proc; 1523 if(p) { 1524 nrun++; 1525 p->time[p->insyscall]++; 1526 } 1527 1528 /* calculate decaying duty cycles */ 1529 n = perfticks(); 1530 per = n - m->perf.last; 1531 m->perf.last = n; 1532 per = (m->perf.period*(HZ-1) + per)/HZ; 1533 if(per != 0) 1534 m->perf.period = per; 1535 1536 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; 1537 m->perf.inidle = 0; 1538 1539 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; 1540 m->perf.inintr = 0; 1541 1542 /* only one processor gets to compute system load averages */ 1543 if(m->machno != 0) 1544 return; 1545 1546 /* 1547 * calculate decaying load average. 1548 * if we decay by (n-1)/n then it takes 1549 * n clock ticks to go from load L to .36 L once 1550 * things quiet down. it takes about 5 n clock 1551 * ticks to go to zero. so using HZ means this is 1552 * approximately the load over the last second, 1553 * with a tail lasting about 5 seconds. 1554 */ 1555 n = nrun; 1556 nrun = 0; 1557 n = (nrdy+n)*1000; 1558 m->load = (m->load*(HZ-1)+n)/HZ; 1559 } 1560 1561 static void 1562 pidhash(Proc *p) 1563 { 1564 int h; 1565 1566 h = p->pid % nelem(procalloc.ht); 1567 lock(&procalloc); 1568 p->pidhash = procalloc.ht[h]; 1569 procalloc.ht[h] = p; 1570 unlock(&procalloc); 1571 } 1572 1573 static void 1574 pidunhash(Proc *p) 1575 { 1576 int h; 1577 Proc **l; 1578 1579 h = p->pid % nelem(procalloc.ht); 1580 lock(&procalloc); 1581 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) 1582 if(*l == p){ 1583 *l = p->pidhash; 1584 break; 1585 } 1586 unlock(&procalloc); 1587 } 1588 1589 int 1590 procindex(ulong pid) 1591 { 1592 Proc *p; 1593 int h; 1594 int s; 1595 1596 s = -1; 1597 h = pid % nelem(procalloc.ht); 1598 lock(&procalloc); 1599 for(p = procalloc.ht[h]; p != nil; p = p->pidhash) 1600 if(p->pid == pid){ 1601 s = p - procalloc.arena; 1602 break; 1603 } 1604 unlock(&procalloc); 1605 return s; 1606 } 1607