1 #include <u.h> 2 #include "../port/lib.h" 3 #include "mem.h" 4 #include "dat.h" 5 #include "fns.h" 6 #include "../port/error.h" 7 #include "../port/edf.h" 8 #include <trace.h> 9 10 int schedgain = 30; /* units in seconds */ 11 int nrdy; 12 Ref noteidalloc; 13 14 void updatecpu(Proc*); 15 int reprioritize(Proc*); 16 17 ulong delayedscheds; /* statistics */ 18 long skipscheds; 19 long preempts; 20 ulong load; 21 22 static Ref pidalloc; 23 24 static struct Procalloc 25 { 26 Lock; 27 Proc* ht[128]; 28 Proc* arena; 29 Proc* free; 30 } procalloc; 31 32 enum 33 { 34 Q=10, 35 DQ=4, 36 Scaling=2, 37 }; 38 39 Schedq runq[Nrq]; 40 ulong runvec; 41 42 char *statename[] = 43 { /* BUG: generate automatically */ 44 "Dead", 45 "Moribund", 46 "Ready", 47 "Scheding", 48 "Running", 49 "Queueing", 50 "QueueingR", 51 "QueueingW", 52 "Wakeme", 53 "Broken", 54 "Stopped", 55 "Rendez", 56 "Waitrelease", 57 }; 58 59 static void pidhash(Proc*); 60 static void pidunhash(Proc*); 61 static void rebalance(void); 62 63 /* 64 * Always splhi()'ed. 65 */ 66 void 67 schedinit(void) /* never returns */ 68 { 69 Edf *e; 70 71 setlabel(&m->sched); 72 if(up) { 73 if((e = up->edf) && (e->flags & Admitted)) 74 edfrecord(up); 75 m->proc = 0; 76 switch(up->state) { 77 case Running: 78 ready(up); 79 break; 80 case Moribund: 81 up->state = Dead; 82 edfstop(up); 83 if (up->edf) 84 free(up->edf); 85 up->edf = nil; 86 87 /* 88 * Holding locks from pexit: 89 * procalloc 90 * palloc 91 */ 92 mmurelease(up); 93 94 up->qnext = procalloc.free; 95 procalloc.free = up; 96 97 unlock(&palloc); 98 unlock(&procalloc); 99 break; 100 } 101 up->mach = nil; 102 updatecpu(up); 103 up = nil; 104 } 105 sched(); 106 } 107 108 /* 109 * If changing this routine, look also at sleep(). It 110 * contains a copy of the guts of sched(). 111 */ 112 void 113 sched(void) 114 { 115 Proc *p; 116 117 if(m->ilockdepth) 118 panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", 119 m->machno, 120 m->ilockdepth, 121 up? up->lastilock: nil, 122 (up && up->lastilock)? up->lastilock->pc: 0, 123 getcallerpc(&p+2)); 124 if(up){ 125 /* 126 * Delay the sched until the process gives up the locks 127 * it is holding. This avoids dumb lock loops. 128 * Don't delay if the process is Moribund. 129 * It called sched to die. 130 * But do sched eventually. This avoids a missing unlock 131 * from hanging the entire kernel. 132 * But don't reschedule procs holding palloc or procalloc. 133 * Those are far too important to be holding while asleep. 134 * 135 * This test is not exact. There can still be a few instructions 136 * in the middle of taslock when a process holds a lock 137 * but Lock.p has not yet been initialized. 138 */ 139 if(up->nlocks.ref) 140 if(up->state != Moribund) 141 if(up->delaysched < 20 142 || palloc.Lock.p == up 143 || procalloc.Lock.p == up){ 144 up->delaysched++; 145 delayedscheds++; 146 return; 147 } 148 up->delaysched = 0; 149 150 splhi(); 151 152 /* statistics */ 153 m->cs++; 154 155 procsave(up); 156 if(setlabel(&up->sched)){ 157 procrestore(up); 158 spllo(); 159 return; 160 } 161 gotolabel(&m->sched); 162 } 163 p = runproc(); 164 if(!p->edf){ 165 updatecpu(p); 166 p->priority = reprioritize(p); 167 } 168 if(p != m->readied) 169 m->schedticks = m->ticks + HZ/10; 170 m->readied = 0; 171 up = p; 172 up->state = Running; 173 up->mach = MACHP(m->machno); 174 m->proc = up; 175 mmuswitch(up); 176 gotolabel(&up->sched); 177 } 178 179 int 180 anyready(void) 181 { 182 return runvec; 183 } 184 185 int 186 anyhigher(void) 187 { 188 return runvec & ~((1<<(up->priority+1))-1); 189 } 190 191 /* 192 * here once per clock tick to see if we should resched 193 */ 194 void 195 hzsched(void) 196 { 197 /* once a second, rebalance will reprioritize ready procs */ 198 if(m->machno == 0) 199 rebalance(); 200 201 /* unless preempted, get to run for at least 100ms */ 202 if(anyhigher() 203 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){ 204 m->readied = nil; /* avoid cooperative scheduling */ 205 up->delaysched++; 206 } 207 } 208 209 /* 210 * here at the end of non-clock interrupts to see if we should preempt the 211 * current process. Returns 1 if preempted, 0 otherwise. 212 */ 213 int 214 preempted(void) 215 { 216 if(up && up->state == Running) 217 if(up->preempted == 0) 218 if(anyhigher()) 219 if(!active.exiting){ 220 m->readied = nil; /* avoid cooperative scheduling */ 221 up->preempted = 1; 222 sched(); 223 splhi(); 224 up->preempted = 0; 225 return 1; 226 } 227 return 0; 228 } 229 230 /* 231 * Update the cpu time average for this particular process, 232 * which is about to change from up -> not up or vice versa. 233 * p->lastupdate is the last time an updatecpu happened. 234 * 235 * The cpu time average is a decaying average that lasts 236 * about D clock ticks. D is chosen to be approximately 237 * the cpu time of a cpu-intensive "quick job". A job has to run 238 * for approximately D clock ticks before we home in on its 239 * actual cpu usage. Thus if you manage to get in and get out 240 * quickly, you won't be penalized during your burst. Once you 241 * start using your share of the cpu for more than about D 242 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 243 * below all the other quick jobs. Interactive tasks, because 244 * they basically always use less than their fair share of cpu, 245 * will be rewarded. 246 * 247 * If the process has not been running, then we want to 248 * apply the filter 249 * 250 * cpu = cpu * (D-1)/D 251 * 252 * n times, yielding 253 * 254 * cpu = cpu * ((D-1)/D)^n 255 * 256 * but D is big enough that this is approximately 257 * 258 * cpu = cpu * (D-n)/D 259 * 260 * so we use that instead. 261 * 262 * If the process has been running, we apply the filter to 263 * 1 - cpu, yielding a similar equation. Note that cpu is 264 * stored in fixed point (* 1000). 265 * 266 * Updatecpu must be called before changing up, in order 267 * to maintain accurate cpu usage statistics. It can be called 268 * at any time to bring the stats for a given proc up-to-date. 269 */ 270 void 271 updatecpu(Proc *p) 272 { 273 int n, t, ocpu; 274 int D = schedgain*HZ*Scaling; 275 276 if(p->edf) 277 return; 278 279 t = MACHP(0)->ticks*Scaling + Scaling/2; 280 n = t - p->lastupdate; 281 p->lastupdate = t; 282 283 if(n == 0) 284 return; 285 if(n > D) 286 n = D; 287 288 ocpu = p->cpu; 289 if(p != up) 290 p->cpu = (ocpu*(D-n))/D; 291 else{ 292 t = 1000 - ocpu; 293 t = (t*(D-n))/D; 294 p->cpu = 1000 - t; 295 } 296 297 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); 298 } 299 300 /* 301 * On average, p has used p->cpu of a cpu recently. 302 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting 303 * too much, penalize it. If it has been getting not enough, reward it. 304 * I don't think you can get much more than your fair share that 305 * often, so most of the queues are for using less. Having a priority 306 * of 3 means you're just right. Having a higher priority (up to p->basepri) 307 * means you're not using as much as you could. 308 */ 309 int 310 reprioritize(Proc *p) 311 { 312 int fairshare, n, load, ratio; 313 314 load = MACHP(0)->load; 315 if(load == 0) 316 return p->basepri; 317 318 /* 319 * fairshare = 1.000 * conf.nproc * 1.000/load, 320 * except the decimal point is moved three places 321 * on both load and fairshare. 322 */ 323 fairshare = (conf.nmach*1000*1000)/load; 324 n = p->cpu; 325 if(n == 0) 326 n = 1; 327 ratio = (fairshare+n/2) / n; 328 if(ratio > p->basepri) 329 ratio = p->basepri; 330 if(ratio < 0) 331 panic("reprioritize"); 332 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); 333 return ratio; 334 } 335 336 /* 337 * add a process to a scheduling queue 338 */ 339 void 340 queueproc(Schedq *rq, Proc *p) 341 { 342 int pri; 343 344 pri = rq - runq; 345 lock(runq); 346 p->priority = pri; 347 p->rnext = 0; 348 if(rq->tail) 349 rq->tail->rnext = p; 350 else 351 rq->head = p; 352 rq->tail = p; 353 rq->n++; 354 nrdy++; 355 runvec |= 1<<pri; 356 unlock(runq); 357 } 358 359 /* 360 * try to remove a process from a scheduling queue (called splhi) 361 */ 362 Proc* 363 dequeueproc(Schedq *rq, Proc *tp) 364 { 365 Proc *l, *p; 366 367 if(!canlock(runq)) 368 return nil; 369 370 /* 371 * the queue may have changed before we locked runq, 372 * refind the target process. 373 */ 374 l = 0; 375 for(p = rq->head; p; p = p->rnext){ 376 if(p == tp) 377 break; 378 l = p; 379 } 380 381 /* 382 * p->mach==0 only when process state is saved 383 */ 384 if(p == 0 || p->mach){ 385 unlock(runq); 386 return nil; 387 } 388 if(p->rnext == 0) 389 rq->tail = l; 390 if(l) 391 l->rnext = p->rnext; 392 else 393 rq->head = p->rnext; 394 if(rq->head == nil) 395 runvec &= ~(1<<(rq-runq)); 396 rq->n--; 397 nrdy--; 398 if(p->state != Ready) 399 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]); 400 401 unlock(runq); 402 return p; 403 } 404 405 /* 406 * ready(p) picks a new priority for a process and sticks it in the 407 * runq for that priority. 408 */ 409 void 410 ready(Proc *p) 411 { 412 int s, pri; 413 Schedq *rq; 414 void (*pt)(Proc*, int, vlong); 415 416 s = splhi(); 417 if(edfready(p)){ 418 splx(s); 419 return; 420 } 421 422 if(up != p && (p->wired == nil || p->wired == m)) 423 m->readied = p; /* group scheduling */ 424 425 updatecpu(p); 426 pri = reprioritize(p); 427 p->priority = pri; 428 rq = &runq[pri]; 429 p->state = Ready; 430 queueproc(rq, p); 431 pt = proctrace; 432 if(pt) 433 pt(p, SReady, 0); 434 splx(s); 435 } 436 437 /* 438 * yield the processor and drop our priority 439 */ 440 void 441 yield(void) 442 { 443 if(anyready()){ 444 /* pretend we just used 1/2 tick */ 445 up->lastupdate -= Scaling/2; 446 sched(); 447 } 448 } 449 450 /* 451 * recalculate priorities once a second. We need to do this 452 * since priorities will otherwise only be recalculated when 453 * the running process blocks. 454 */ 455 ulong balancetime; 456 457 static void 458 rebalance(void) 459 { 460 int pri, npri, t, x; 461 Schedq *rq; 462 Proc *p; 463 464 t = m->ticks; 465 if(t - balancetime < HZ) 466 return; 467 balancetime = t; 468 469 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){ 470 another: 471 p = rq->head; 472 if(p == nil) 473 continue; 474 if(p->mp != MACHP(m->machno)) 475 continue; 476 if(pri == p->basepri) 477 continue; 478 updatecpu(p); 479 npri = reprioritize(p); 480 if(npri != pri){ 481 x = splhi(); 482 p = dequeueproc(rq, p); 483 if(p) 484 queueproc(&runq[npri], p); 485 splx(x); 486 goto another; 487 } 488 } 489 } 490 491 492 /* 493 * pick a process to run 494 */ 495 Proc* 496 runproc(void) 497 { 498 Schedq *rq; 499 Proc *p; 500 ulong start, now; 501 int i; 502 void (*pt)(Proc*, int, vlong); 503 504 start = perfticks(); 505 506 /* cooperative scheduling until the clock ticks */ 507 if((p=m->readied) && p->mach==0 && p->state==Ready 508 && (p->wired == nil || p->wired == m) 509 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ 510 skipscheds++; 511 rq = &runq[p->priority]; 512 goto found; 513 } 514 515 preempts++; 516 517 loop: 518 /* 519 * find a process that last ran on this processor (affinity), 520 * or one that hasn't moved in a while (load balancing). Every 521 * time around the loop affinity goes down. 522 */ 523 spllo(); 524 for(i = 0;; i++){ 525 /* 526 * find the highest priority target process that this 527 * processor can run given affinity constraints. 528 * 529 */ 530 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 531 for(p = rq->head; p; p = p->rnext){ 532 if(p->mp == nil || p->mp == MACHP(m->machno) 533 || (!p->wired && i > 0)) 534 goto found; 535 } 536 } 537 538 /* waste time or halt the CPU */ 539 idlehands(); 540 541 /* remember how much time we're here */ 542 now = perfticks(); 543 m->perf.inidle += now-start; 544 start = now; 545 } 546 547 found: 548 splhi(); 549 p = dequeueproc(rq, p); 550 if(p == nil) 551 goto loop; 552 553 p->state = Scheding; 554 p->mp = MACHP(m->machno); 555 556 if(edflock(p)){ 557 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ 558 edfunlock(); 559 } 560 pt = proctrace; 561 if(pt) 562 pt(p, SRun, 0); 563 return p; 564 } 565 566 int 567 canpage(Proc *p) 568 { 569 int ok = 0; 570 571 splhi(); 572 lock(runq); 573 /* Only reliable way to see if we are Running */ 574 if(p->mach == 0) { 575 p->newtlb = 1; 576 ok = 1; 577 } 578 unlock(runq); 579 spllo(); 580 581 return ok; 582 } 583 584 void 585 noprocpanic(char *msg) 586 { 587 /* 588 * setting exiting will make hzclock() on each processor call exit(0). 589 * clearing our bit in machs avoids calling exit(0) from hzclock() 590 * on this processor. 591 */ 592 lock(&active); 593 active.machs &= ~(1<<m->machno); 594 active.exiting = 1; 595 unlock(&active); 596 597 procdump(); 598 delay(1000); 599 panic(msg); 600 } 601 602 Proc* 603 newproc(void) 604 { 605 char msg[64]; 606 Proc *p; 607 608 lock(&procalloc); 609 while((p = procalloc.free) == nil) { 610 unlock(&procalloc); 611 612 snprint(msg, sizeof msg, "no procs; %s forking", 613 up? up->text: "kernel"); 614 /* 615 * the situation is unlikely to heal itself. 616 * dump the proc table and restart by default. 617 * *noprocspersist in plan9.ini will yield the old 618 * behaviour of trying forever. 619 */ 620 if(getconf("*noprocspersist") == nil) 621 noprocpanic(msg); 622 resrcwait(msg); 623 lock(&procalloc); 624 } 625 procalloc.free = p->qnext; 626 unlock(&procalloc); 627 628 p->state = Scheding; 629 p->psstate = "New"; 630 p->mach = 0; 631 p->qnext = 0; 632 p->nchild = 0; 633 p->nwait = 0; 634 p->waitq = 0; 635 p->parent = 0; 636 p->pgrp = 0; 637 p->egrp = 0; 638 p->fgrp = 0; 639 p->rgrp = 0; 640 p->pdbg = 0; 641 p->fpstate = FPinit; 642 p->kp = 0; 643 if(up && up->procctl == Proc_tracesyscall) 644 p->procctl = Proc_tracesyscall; 645 else 646 p->procctl = 0; 647 p->syscalltrace = 0; 648 p->notepending = 0; 649 p->ureg = 0; 650 p->privatemem = 0; 651 p->noswap = 0; 652 p->errstr = p->errbuf0; 653 p->syserrstr = p->errbuf1; 654 p->errbuf0[0] = '\0'; 655 p->errbuf1[0] = '\0'; 656 p->nlocks.ref = 0; 657 p->delaysched = 0; 658 p->trace = 0; 659 kstrdup(&p->user, "*nouser"); 660 kstrdup(&p->text, "*notext"); 661 kstrdup(&p->args, ""); 662 p->nargs = 0; 663 p->setargs = 0; 664 memset(p->seg, 0, sizeof p->seg); 665 p->pid = incref(&pidalloc); 666 pidhash(p); 667 p->noteid = incref(¬eidalloc); 668 if(p->pid==0 || p->noteid==0) 669 panic("pidalloc"); 670 if(p->kstack == 0) 671 p->kstack = smalloc(KSTACK); 672 673 /* sched params */ 674 p->mp = 0; 675 p->wired = 0; 676 procpriority(p, PriNormal, 0); 677 p->cpu = 0; 678 p->lastupdate = MACHP(0)->ticks*Scaling; 679 p->edf = nil; 680 681 return p; 682 } 683 684 /* 685 * wire this proc to a machine 686 */ 687 void 688 procwired(Proc *p, int bm) 689 { 690 Proc *pp; 691 int i; 692 char nwired[MAXMACH]; 693 Mach *wm; 694 695 if(bm < 0){ 696 /* pick a machine to wire to */ 697 memset(nwired, 0, sizeof(nwired)); 698 p->wired = 0; 699 pp = proctab(0); 700 for(i=0; i<conf.nproc; i++, pp++){ 701 wm = pp->wired; 702 if(wm && pp->pid) 703 nwired[wm->machno]++; 704 } 705 bm = 0; 706 for(i=0; i<conf.nmach; i++) 707 if(nwired[i] < nwired[bm]) 708 bm = i; 709 } else { 710 /* use the virtual machine requested */ 711 bm = bm % conf.nmach; 712 } 713 714 p->wired = MACHP(bm); 715 p->mp = p->wired; 716 } 717 718 void 719 procpriority(Proc *p, int pri, int fixed) 720 { 721 if(pri >= Npriq) 722 pri = Npriq - 1; 723 else if(pri < 0) 724 pri = 0; 725 p->basepri = pri; 726 p->priority = pri; 727 if(fixed){ 728 p->fixedpri = 1; 729 } else { 730 p->fixedpri = 0; 731 } 732 } 733 734 void 735 procinit0(void) /* bad planning - clashes with devproc.c */ 736 { 737 Proc *p; 738 int i; 739 740 procalloc.free = xalloc(conf.nproc*sizeof(Proc)); 741 if(procalloc.free == nil){ 742 xsummary(); 743 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024)); 744 } 745 procalloc.arena = procalloc.free; 746 747 p = procalloc.free; 748 for(i=0; i<conf.nproc-1; i++,p++) 749 p->qnext = p+1; 750 p->qnext = 0; 751 } 752 753 /* 754 * sleep if a condition is not true. Another process will 755 * awaken us after it sets the condition. When we awaken 756 * the condition may no longer be true. 757 * 758 * we lock both the process and the rendezvous to keep r->p 759 * and p->r synchronized. 760 */ 761 void 762 sleep(Rendez *r, int (*f)(void*), void *arg) 763 { 764 int s; 765 void (*pt)(Proc*, int, vlong); 766 767 s = splhi(); 768 769 if(up->nlocks.ref) 770 print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n", 771 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r)); 772 lock(r); 773 lock(&up->rlock); 774 if(r->p){ 775 print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid); 776 dumpstack(); 777 } 778 779 /* 780 * Wakeup only knows there may be something to do by testing 781 * r->p in order to get something to lock on. 782 * Flush that information out to memory in case the sleep is 783 * committed. 784 */ 785 r->p = up; 786 787 if((*f)(arg) || up->notepending){ 788 /* 789 * if condition happened or a note is pending 790 * never mind 791 */ 792 r->p = nil; 793 unlock(&up->rlock); 794 unlock(r); 795 } else { 796 /* 797 * now we are committed to 798 * change state and call scheduler 799 */ 800 pt = proctrace; 801 if(pt) 802 pt(up, SSleep, 0); 803 up->state = Wakeme; 804 up->r = r; 805 806 /* statistics */ 807 m->cs++; 808 809 procsave(up); 810 if(setlabel(&up->sched)) { 811 /* 812 * here when the process is awakened 813 */ 814 procrestore(up); 815 spllo(); 816 } else { 817 /* 818 * here to go to sleep (i.e. stop Running) 819 */ 820 unlock(&up->rlock); 821 unlock(r); 822 gotolabel(&m->sched); 823 } 824 } 825 826 if(up->notepending) { 827 up->notepending = 0; 828 splx(s); 829 if(up->procctl == Proc_exitme && up->closingfgrp) 830 forceclosefgrp(); 831 error(Eintr); 832 } 833 834 splx(s); 835 } 836 837 static int 838 tfn(void *arg) 839 { 840 return up->trend == nil || up->tfn(arg); 841 } 842 843 void 844 twakeup(Ureg*, Timer *t) 845 { 846 Proc *p; 847 Rendez *trend; 848 849 p = t->ta; 850 trend = p->trend; 851 p->trend = 0; 852 if(trend) 853 wakeup(trend); 854 } 855 856 void 857 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) 858 { 859 if (up->tt){ 860 print("tsleep: timer active: mode %d, tf %#p\n", up->tmode, up->tf); 861 timerdel(up); 862 } 863 up->tns = MS2NS(ms); 864 up->tf = twakeup; 865 up->tmode = Trelative; 866 up->ta = up; 867 up->trend = r; 868 up->tfn = fn; 869 timeradd(up); 870 871 if(waserror()){ 872 timerdel(up); 873 nexterror(); 874 } 875 sleep(r, tfn, arg); 876 if(up->tt) 877 timerdel(up); 878 up->twhen = 0; 879 poperror(); 880 } 881 882 /* 883 * Expects that only one process can call wakeup for any given Rendez. 884 * We hold both locks to ensure that r->p and p->r remain consistent. 885 * Richard Miller has a better solution that doesn't require both to 886 * be held simultaneously, but I'm a paranoid - presotto. 887 */ 888 Proc* 889 wakeup(Rendez *r) 890 { 891 Proc *p; 892 int s; 893 894 s = splhi(); 895 896 lock(r); 897 p = r->p; 898 899 if(p != nil){ 900 lock(&p->rlock); 901 if(p->state != Wakeme || p->r != r){ 902 iprint("%p %p %d\n", p->r, r, p->state); 903 panic("wakeup: state"); 904 } 905 r->p = nil; 906 p->r = nil; 907 ready(p); 908 unlock(&p->rlock); 909 } 910 unlock(r); 911 912 splx(s); 913 914 return p; 915 } 916 917 /* 918 * if waking a sleeping process, this routine must hold both 919 * p->rlock and r->lock. However, it can't know them in 920 * the same order as wakeup causing a possible lock ordering 921 * deadlock. We break the deadlock by giving up the p->rlock 922 * lock if we can't get the r->lock and retrying. 923 */ 924 int 925 postnote(Proc *p, int dolock, char *n, int flag) 926 { 927 int s, ret; 928 Rendez *r; 929 Proc *d, **l; 930 931 if(dolock) 932 qlock(&p->debug); 933 934 if(flag != NUser && (p->notify == 0 || p->notified)) 935 p->nnote = 0; 936 937 ret = 0; 938 if(p->nnote < NNOTE) { 939 strcpy(p->note[p->nnote].msg, n); 940 p->note[p->nnote++].flag = flag; 941 ret = 1; 942 } 943 p->notepending = 1; 944 if(dolock) 945 qunlock(&p->debug); 946 947 /* this loop is to avoid lock ordering problems. */ 948 for(;;){ 949 s = splhi(); 950 lock(&p->rlock); 951 r = p->r; 952 953 /* waiting for a wakeup? */ 954 if(r == nil) 955 break; /* no */ 956 957 /* try for the second lock */ 958 if(canlock(r)){ 959 if(p->state != Wakeme || r->p != p) 960 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); 961 p->r = nil; 962 r->p = nil; 963 ready(p); 964 unlock(r); 965 break; 966 } 967 968 /* give other process time to get out of critical section and try again */ 969 unlock(&p->rlock); 970 splx(s); 971 sched(); 972 } 973 unlock(&p->rlock); 974 splx(s); 975 976 if(p->state != Rendezvous) 977 return ret; 978 979 /* Try and pull out of a rendezvous */ 980 lock(p->rgrp); 981 if(p->state == Rendezvous) { 982 p->rendval = ~0; 983 l = &REND(p->rgrp, p->rendtag); 984 for(d = *l; d; d = d->rendhash) { 985 if(d == p) { 986 *l = p->rendhash; 987 break; 988 } 989 l = &d->rendhash; 990 } 991 ready(p); 992 } 993 unlock(p->rgrp); 994 return ret; 995 } 996 997 /* 998 * weird thing: keep at most NBROKEN around 999 */ 1000 #define NBROKEN 4 1001 struct 1002 { 1003 QLock; 1004 int n; 1005 Proc *p[NBROKEN]; 1006 }broken; 1007 1008 void 1009 addbroken(Proc *p) 1010 { 1011 qlock(&broken); 1012 if(broken.n == NBROKEN) { 1013 ready(broken.p[0]); 1014 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); 1015 --broken.n; 1016 } 1017 broken.p[broken.n++] = p; 1018 qunlock(&broken); 1019 1020 edfstop(up); 1021 p->state = Broken; 1022 p->psstate = 0; 1023 sched(); 1024 } 1025 1026 void 1027 unbreak(Proc *p) 1028 { 1029 int b; 1030 1031 qlock(&broken); 1032 for(b=0; b < broken.n; b++) 1033 if(broken.p[b] == p) { 1034 broken.n--; 1035 memmove(&broken.p[b], &broken.p[b+1], 1036 sizeof(Proc*)*(NBROKEN-(b+1))); 1037 ready(p); 1038 break; 1039 } 1040 qunlock(&broken); 1041 } 1042 1043 int 1044 freebroken(void) 1045 { 1046 int i, n; 1047 1048 qlock(&broken); 1049 n = broken.n; 1050 for(i=0; i<n; i++) { 1051 ready(broken.p[i]); 1052 broken.p[i] = 0; 1053 } 1054 broken.n = 0; 1055 qunlock(&broken); 1056 return n; 1057 } 1058 1059 void 1060 pexit(char *exitstr, int freemem) 1061 { 1062 Proc *p; 1063 Segment **s, **es; 1064 long utime, stime; 1065 Waitq *wq, *f, *next; 1066 Fgrp *fgrp; 1067 Egrp *egrp; 1068 Rgrp *rgrp; 1069 Pgrp *pgrp; 1070 Chan *dot; 1071 void (*pt)(Proc*, int, vlong); 1072 1073 if(up->syscalltrace) 1074 free(up->syscalltrace); 1075 up->alarm = 0; 1076 if (up->tt) 1077 timerdel(up); 1078 pt = proctrace; 1079 if(pt) 1080 pt(up, SDead, 0); 1081 1082 /* nil out all the resources under lock (free later) */ 1083 qlock(&up->debug); 1084 fgrp = up->fgrp; 1085 up->fgrp = nil; 1086 egrp = up->egrp; 1087 up->egrp = nil; 1088 rgrp = up->rgrp; 1089 up->rgrp = nil; 1090 pgrp = up->pgrp; 1091 up->pgrp = nil; 1092 dot = up->dot; 1093 up->dot = nil; 1094 qunlock(&up->debug); 1095 1096 if(fgrp) 1097 closefgrp(fgrp); 1098 if(egrp) 1099 closeegrp(egrp); 1100 if(rgrp) 1101 closergrp(rgrp); 1102 if(dot) 1103 cclose(dot); 1104 if(pgrp) 1105 closepgrp(pgrp); 1106 1107 /* 1108 * if not a kernel process and have a parent, 1109 * do some housekeeping. 1110 */ 1111 if(up->kp == 0) { 1112 p = up->parent; 1113 if(p == 0) { 1114 if(exitstr == 0) 1115 exitstr = "unknown"; 1116 panic("boot process died: %s", exitstr); 1117 } 1118 1119 while(waserror()) 1120 ; 1121 1122 wq = smalloc(sizeof(Waitq)); 1123 poperror(); 1124 1125 wq->w.pid = up->pid; 1126 utime = up->time[TUser] + up->time[TCUser]; 1127 stime = up->time[TSys] + up->time[TCSys]; 1128 wq->w.time[TUser] = tk2ms(utime); 1129 wq->w.time[TSys] = tk2ms(stime); 1130 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]); 1131 if(exitstr && exitstr[0]) 1132 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr); 1133 else 1134 wq->w.msg[0] = '\0'; 1135 1136 lock(&p->exl); 1137 /* 1138 * Check that parent is still alive. 1139 */ 1140 if(p->pid == up->parentpid && p->state != Broken) { 1141 p->nchild--; 1142 p->time[TCUser] += utime; 1143 p->time[TCSys] += stime; 1144 /* 1145 * If there would be more than 128 wait records 1146 * processes for my parent, then don't leave a wait 1147 * record behind. This helps prevent badly written 1148 * daemon processes from accumulating lots of wait 1149 * records. 1150 */ 1151 if(p->nwait < 128) { 1152 wq->next = p->waitq; 1153 p->waitq = wq; 1154 p->nwait++; 1155 wq = nil; 1156 wakeup(&p->waitr); 1157 } 1158 } 1159 unlock(&p->exl); 1160 if(wq) 1161 free(wq); 1162 } 1163 1164 if(!freemem) 1165 addbroken(up); 1166 1167 qlock(&up->seglock); 1168 es = &up->seg[NSEG]; 1169 for(s = up->seg; s < es; s++) { 1170 if(*s) { 1171 putseg(*s); 1172 *s = 0; 1173 } 1174 } 1175 qunlock(&up->seglock); 1176 1177 lock(&up->exl); /* Prevent my children from leaving waits */ 1178 pidunhash(up); 1179 up->pid = 0; 1180 wakeup(&up->waitr); 1181 unlock(&up->exl); 1182 1183 for(f = up->waitq; f; f = next) { 1184 next = f->next; 1185 free(f); 1186 } 1187 1188 /* release debuggers */ 1189 qlock(&up->debug); 1190 if(up->pdbg) { 1191 wakeup(&up->pdbg->sleep); 1192 up->pdbg = 0; 1193 } 1194 qunlock(&up->debug); 1195 1196 /* Sched must not loop for these locks */ 1197 lock(&procalloc); 1198 lock(&palloc); 1199 1200 edfstop(up); 1201 up->state = Moribund; 1202 sched(); 1203 panic("pexit"); 1204 } 1205 1206 int 1207 haswaitq(void *x) 1208 { 1209 Proc *p; 1210 1211 p = (Proc *)x; 1212 return p->waitq != 0; 1213 } 1214 1215 ulong 1216 pwait(Waitmsg *w) 1217 { 1218 ulong cpid; 1219 Waitq *wq; 1220 1221 if(!canqlock(&up->qwaitr)) 1222 error(Einuse); 1223 1224 if(waserror()) { 1225 qunlock(&up->qwaitr); 1226 nexterror(); 1227 } 1228 1229 lock(&up->exl); 1230 if(up->nchild == 0 && up->waitq == 0) { 1231 unlock(&up->exl); 1232 error(Enochild); 1233 } 1234 unlock(&up->exl); 1235 1236 sleep(&up->waitr, haswaitq, up); 1237 1238 lock(&up->exl); 1239 wq = up->waitq; 1240 up->waitq = wq->next; 1241 up->nwait--; 1242 unlock(&up->exl); 1243 1244 qunlock(&up->qwaitr); 1245 poperror(); 1246 1247 if(w) 1248 memmove(w, &wq->w, sizeof(Waitmsg)); 1249 cpid = wq->w.pid; 1250 free(wq); 1251 return cpid; 1252 } 1253 1254 Proc* 1255 proctab(int i) 1256 { 1257 return &procalloc.arena[i]; 1258 } 1259 1260 void 1261 dumpaproc(Proc *p) 1262 { 1263 ulong bss; 1264 char *s; 1265 1266 if(p == 0) 1267 return; 1268 1269 bss = 0; 1270 if(p->seg[BSEG]) 1271 bss = p->seg[BSEG]->top; 1272 1273 s = p->psstate; 1274 if(s == 0) 1275 s = statename[p->state]; 1276 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n", 1277 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], 1278 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority); 1279 } 1280 1281 void 1282 procdump(void) 1283 { 1284 int i; 1285 Proc *p; 1286 1287 if(up) 1288 print("up %lud\n", up->pid); 1289 else 1290 print("no current process\n"); 1291 for(i=0; i<conf.nproc; i++) { 1292 p = &procalloc.arena[i]; 1293 if(p->state == Dead) 1294 continue; 1295 1296 dumpaproc(p); 1297 } 1298 } 1299 1300 /* 1301 * wait till all processes have flushed their mmu 1302 * state about segement s 1303 */ 1304 void 1305 procflushseg(Segment *s) 1306 { 1307 int i, ns, nm, nwait; 1308 Proc *p; 1309 1310 /* 1311 * tell all processes with this 1312 * segment to flush their mmu's 1313 */ 1314 nwait = 0; 1315 for(i=0; i<conf.nproc; i++) { 1316 p = &procalloc.arena[i]; 1317 if(p->state == Dead) 1318 continue; 1319 for(ns = 0; ns < NSEG; ns++) 1320 if(p->seg[ns] == s){ 1321 p->newtlb = 1; 1322 for(nm = 0; nm < conf.nmach; nm++){ 1323 if(MACHP(nm)->proc == p){ 1324 MACHP(nm)->flushmmu = 1; 1325 nwait++; 1326 } 1327 } 1328 break; 1329 } 1330 } 1331 1332 if(nwait == 0) 1333 return; 1334 1335 /* 1336 * wait for all processors to take a clock interrupt 1337 * and flush their mmu's 1338 */ 1339 for(nm = 0; nm < conf.nmach; nm++) 1340 if(MACHP(nm) != m) 1341 while(MACHP(nm)->flushmmu) 1342 sched(); 1343 } 1344 1345 void 1346 scheddump(void) 1347 { 1348 Proc *p; 1349 Schedq *rq; 1350 1351 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 1352 if(rq->head == 0) 1353 continue; 1354 print("rq%ld:", rq-runq); 1355 for(p = rq->head; p; p = p->rnext) 1356 print(" %lud(%lud)", p->pid, m->ticks - p->readytime); 1357 print("\n"); 1358 delay(150); 1359 } 1360 print("nrdy %d\n", nrdy); 1361 } 1362 1363 void 1364 kproc(char *name, void (*func)(void *), void *arg) 1365 { 1366 Proc *p; 1367 static Pgrp *kpgrp; 1368 1369 p = newproc(); 1370 p->psstate = 0; 1371 p->procmode = 0640; 1372 p->kp = 1; 1373 p->noswap = 1; 1374 1375 p->fpsave = up->fpsave; 1376 p->scallnr = up->scallnr; 1377 p->s = up->s; 1378 p->nerrlab = 0; 1379 p->slash = up->slash; 1380 p->dot = up->dot; 1381 if(p->dot) 1382 incref(p->dot); 1383 1384 memmove(p->note, up->note, sizeof(p->note)); 1385 p->nnote = up->nnote; 1386 p->notified = 0; 1387 p->lastnote = up->lastnote; 1388 p->notify = up->notify; 1389 p->ureg = 0; 1390 p->dbgreg = 0; 1391 1392 procpriority(p, PriKproc, 0); 1393 1394 kprocchild(p, func, arg); 1395 1396 kstrdup(&p->user, eve); 1397 kstrdup(&p->text, name); 1398 if(kpgrp == 0) 1399 kpgrp = newpgrp(); 1400 p->pgrp = kpgrp; 1401 incref(kpgrp); 1402 1403 memset(p->time, 0, sizeof(p->time)); 1404 p->time[TReal] = MACHP(0)->ticks; 1405 ready(p); 1406 } 1407 1408 /* 1409 * called splhi() by notify(). See comment in notify for the 1410 * reasoning. 1411 */ 1412 void 1413 procctl(Proc *p) 1414 { 1415 char *state; 1416 ulong s; 1417 1418 switch(p->procctl) { 1419 case Proc_exitbig: 1420 spllo(); 1421 pexit("Killed: Insufficient physical memory", 1); 1422 1423 case Proc_exitme: 1424 spllo(); /* pexit has locks in it */ 1425 pexit("Killed", 1); 1426 1427 case Proc_traceme: 1428 if(p->nnote == 0) 1429 return; 1430 /* No break */ 1431 1432 case Proc_stopme: 1433 p->procctl = 0; 1434 state = p->psstate; 1435 p->psstate = "Stopped"; 1436 /* free a waiting debugger */ 1437 s = spllo(); 1438 qlock(&p->debug); 1439 if(p->pdbg) { 1440 wakeup(&p->pdbg->sleep); 1441 p->pdbg = 0; 1442 } 1443 qunlock(&p->debug); 1444 splhi(); 1445 p->state = Stopped; 1446 sched(); 1447 p->psstate = state; 1448 splx(s); 1449 return; 1450 } 1451 } 1452 1453 #include "errstr.h" 1454 1455 void 1456 error(char *err) 1457 { 1458 spllo(); 1459 1460 assert(up->nerrlab < NERR); 1461 kstrcpy(up->errstr, err, ERRMAX); 1462 setlabel(&up->errlab[NERR-1]); 1463 nexterror(); 1464 } 1465 1466 void 1467 nexterror(void) 1468 { 1469 gotolabel(&up->errlab[--up->nerrlab]); 1470 } 1471 1472 void 1473 exhausted(char *resource) 1474 { 1475 char buf[ERRMAX]; 1476 1477 snprint(buf, sizeof buf, "no free %s", resource); 1478 iprint("%s\n", buf); 1479 error(buf); 1480 } 1481 1482 void 1483 killbig(char *why) 1484 { 1485 int i; 1486 Segment *s; 1487 ulong l, max; 1488 Proc *p, *ep, *kp; 1489 1490 max = 0; 1491 kp = 0; 1492 ep = procalloc.arena+conf.nproc; 1493 for(p = procalloc.arena; p < ep; p++) { 1494 if(p->state == Dead || p->kp) 1495 continue; 1496 l = 0; 1497 for(i=1; i<NSEG; i++) { 1498 s = p->seg[i]; 1499 if(s != 0) 1500 l += s->top - s->base; 1501 } 1502 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) { 1503 kp = p; 1504 max = l; 1505 } 1506 } 1507 1508 print("%lud: %s killed: %s\n", kp->pid, kp->text, why); 1509 for(p = procalloc.arena; p < ep; p++) { 1510 if(p->state == Dead || p->kp) 1511 continue; 1512 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG]) 1513 p->procctl = Proc_exitbig; 1514 } 1515 kp->procctl = Proc_exitbig; 1516 for(i = 0; i < NSEG; i++) { 1517 s = kp->seg[i]; 1518 if(s != 0 && canqlock(&s->lk)) { 1519 mfreeseg(s, s->base, (s->top - s->base)/BY2PG); 1520 qunlock(&s->lk); 1521 } 1522 } 1523 } 1524 1525 /* 1526 * change ownership to 'new' of all processes owned by 'old'. Used when 1527 * eve changes. 1528 */ 1529 void 1530 renameuser(char *old, char *new) 1531 { 1532 Proc *p, *ep; 1533 1534 ep = procalloc.arena+conf.nproc; 1535 for(p = procalloc.arena; p < ep; p++) 1536 if(p->user!=nil && strcmp(old, p->user)==0) 1537 kstrdup(&p->user, new); 1538 } 1539 1540 /* 1541 * time accounting called by clock() splhi'd 1542 */ 1543 void 1544 accounttime(void) 1545 { 1546 Proc *p; 1547 ulong n, per; 1548 static ulong nrun; 1549 1550 p = m->proc; 1551 if(p) { 1552 nrun++; 1553 p->time[p->insyscall]++; 1554 } 1555 1556 /* calculate decaying duty cycles */ 1557 n = perfticks(); 1558 per = n - m->perf.last; 1559 m->perf.last = n; 1560 per = (m->perf.period*(HZ-1) + per)/HZ; 1561 if(per != 0) 1562 m->perf.period = per; 1563 1564 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; 1565 m->perf.inidle = 0; 1566 1567 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; 1568 m->perf.inintr = 0; 1569 1570 /* only one processor gets to compute system load averages */ 1571 if(m->machno != 0) 1572 return; 1573 1574 /* 1575 * calculate decaying load average. 1576 * if we decay by (n-1)/n then it takes 1577 * n clock ticks to go from load L to .36 L once 1578 * things quiet down. it takes about 5 n clock 1579 * ticks to go to zero. so using HZ means this is 1580 * approximately the load over the last second, 1581 * with a tail lasting about 5 seconds. 1582 */ 1583 n = nrun; 1584 nrun = 0; 1585 n = (nrdy+n)*1000; 1586 m->load = (m->load*(HZ-1)+n)/HZ; 1587 } 1588 1589 static void 1590 pidhash(Proc *p) 1591 { 1592 int h; 1593 1594 h = p->pid % nelem(procalloc.ht); 1595 lock(&procalloc); 1596 p->pidhash = procalloc.ht[h]; 1597 procalloc.ht[h] = p; 1598 unlock(&procalloc); 1599 } 1600 1601 static void 1602 pidunhash(Proc *p) 1603 { 1604 int h; 1605 Proc **l; 1606 1607 h = p->pid % nelem(procalloc.ht); 1608 lock(&procalloc); 1609 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) 1610 if(*l == p){ 1611 *l = p->pidhash; 1612 break; 1613 } 1614 unlock(&procalloc); 1615 } 1616 1617 int 1618 procindex(ulong pid) 1619 { 1620 Proc *p; 1621 int h; 1622 int s; 1623 1624 s = -1; 1625 h = pid % nelem(procalloc.ht); 1626 lock(&procalloc); 1627 for(p = procalloc.ht[h]; p != nil; p = p->pidhash) 1628 if(p->pid == pid){ 1629 s = p - procalloc.arena; 1630 break; 1631 } 1632 unlock(&procalloc); 1633 return s; 1634 } 1635