1 #include <u.h>
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7
8 #include "../port/edf.h"
9 #include "errstr.h"
10 #include <ptrace.h>
11
12 int nrdy;
13 Ref noteidalloc;
14
15 ulong delayedscheds; /* statistics */
16 long skipscheds;
17 long preempts;
18
19 static Ref pidalloc;
20
21 struct Procalloc procalloc;
22
23 extern Proc* psalloc(void);
24 extern void pshash(Proc*);
25 extern void psrelease(Proc*);
26 extern void psunhash(Proc*);
27
28 enum
29 {
30 Scaling=2,
31 };
32
33 static int reprioritize(Proc*);
34 static void updatecpu(Proc*);
35 static int schedgain = 30; /* units in seconds */
36
37 static void rebalance(void);
38 static ulong balancetime;
39
40 Schedq runq[Nrq];
41 ulong runvec;
42
43 char *statename[] =
44 { /* BUG: generate automatically */
45 "Dead",
46 "Moribund",
47 "Ready",
48 "Scheding",
49 "Running",
50 "Queueing",
51 "QueueingR",
52 "QueueingW",
53 "Wakeme",
54 "Broken",
55 "Stopped",
56 "Rendez",
57 "Waitrelease",
58 };
59
60 /*
61 * Always splhi()'ed.
62 */
63 void
schedinit(void)64 schedinit(void) /* never returns */
65 {
66 Edf *e;
67
68 setlabel(&m->sched);
69 if(up) {
70 if((e = up->edf) && (e->flags & Admitted))
71 edfrecord(up);
72 updatecpu(up);
73 m->proc = nil;
74 switch(up->state) {
75 case Running:
76 ready(up);
77 break;
78 case Moribund:
79 up->state = Dead;
80
81 /*
82 * Holding locks from pexit:
83 * procalloc
84 * palloc
85 */
86 mmurelease(up);
87 unlock(&palloc);
88
89 psrelease(up);
90 unlock(&procalloc);
91 break;
92 }
93 up->mach = nil;
94 up = nil;
95 }
96 sched();
97 }
98
99 /*
100 * If changing this routine, look also at sleep(). It
101 * contains a copy of the guts of sched().
102 */
103 void
sched(void)104 sched(void)
105 {
106 Proc *p;
107
108 if(m->ilockdepth)
109 panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
110 m->machno,
111 m->ilockdepth,
112 up? up->lastilock: nil,
113 (up && up->lastilock)? up->lastilock->pc: m->ilockpc,
114 getcallerpc(&p+2));
115
116 if(up){
117 /*
118 * Delay the sched until the process gives up the locks
119 * it is holding. This avoids dumb lock loops.
120 * Don't delay if the process is Moribund.
121 * It called sched to die.
122 * But do sched eventually. This avoids a missing unlock
123 * from hanging the entire kernel.
124 * But don't reschedule procs holding palloc or procalloc.
125 * Those are far too important to be holding while asleep.
126 *
127 * This test is not exact. There can still be a few
128 * instructions in the middle of taslock when a process
129 * holds a lock but Lock.p has not yet been initialized.
130 */
131 if(up->nlocks)
132 if(up->state != Moribund)
133 if(up->delaysched < 20
134 || palloc.Lock.p == up
135 || procalloc.Lock.p == up){
136 up->delaysched++;
137 delayedscheds++;
138 return;
139 }
140 up->delaysched = 0;
141
142 splhi();
143
144 /* statistics */
145 m->cs++;
146
147 procsave(up);
148 if(setlabel(&up->sched)){
149 procrestore(up);
150 spllo();
151 return;
152 }
153 gotolabel(&m->sched);
154 }
155 p = runproc();
156 if(!p->edf){
157 updatecpu(p);
158 p->priority = reprioritize(p);
159 }
160 if(p != m->readied)
161 m->schedticks = m->ticks + HZ/10;
162 m->readied = nil;
163 up = p;
164 up->state = Running;
165 up->mach = m;
166 m->proc = up;
167 mmuswitch(up);
168 gotolabel(&up->sched);
169 }
170
171 int
anyready(void)172 anyready(void)
173 {
174 return runvec;
175 }
176
177 int
anyhigher(void)178 anyhigher(void)
179 {
180 return runvec & ~((1<<(up->priority+1))-1);
181 }
182
183 /*
184 * here once per clock tick to see if we should resched
185 */
186 void
hzsched(void)187 hzsched(void)
188 {
189 /* once a second, rebalance will reprioritize ready procs */
190 if(m->machno == 0)
191 rebalance();
192
193 /* unless preempted, get to run for at least 100ms */
194 if(anyhigher()
195 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){
196 m->readied = nil; /* avoid cooperative scheduling */
197 up->delaysched++;
198 }
199 }
200
201 /*
202 * here at the end of non-clock interrupts to see if we should preempt the
203 * current process. Returns 1 if preempted, 0 otherwise.
204 */
205 int
preempted(void)206 preempted(void)
207 {
208 if(up && up->state == Running)
209 if(up->preempted == 0)
210 if(anyhigher())
211 if(!active.exiting){
212 m->readied = nil; /* avoid cooperative scheduling */
213 up->preempted = 1;
214 sched();
215 splhi();
216 up->preempted = 0;
217 return 1;
218 }
219 return 0;
220 }
221
222 /*
223 * Update the cpu time average for this particular process,
224 * which is about to change from up -> not up or vice versa.
225 * p->lastupdate is the last time an updatecpu happened.
226 *
227 * The cpu time average is a decaying average that lasts
228 * about D clock ticks. D is chosen to be approximately
229 * the cpu time of a cpu-intensive "quick job". A job has to run
230 * for approximately D clock ticks before we home in on its
231 * actual cpu usage. Thus if you manage to get in and get out
232 * quickly, you won't be penalized during your burst. Once you
233 * start using your share of the cpu for more than about D
234 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
235 * below all the other quick jobs. Interactive tasks, because
236 * they basically always use less than their fair share of cpu,
237 * will be rewarded.
238 *
239 * If the process has not been running, then we want to
240 * apply the filter
241 *
242 * cpu = cpu * (D-1)/D
243 *
244 * n times, yielding
245 *
246 * cpu = cpu * ((D-1)/D)^n
247 *
248 * but D is big enough that this is approximately
249 *
250 * cpu = cpu * (D-n)/D
251 *
252 * so we use that instead.
253 *
254 * If the process has been running, we apply the filter to
255 * 1 - cpu, yielding a similar equation. Note that cpu is
256 * stored in fixed point (* 1000).
257 *
258 * Updatecpu must be called before changing up, in order
259 * to maintain accurate cpu usage statistics. It can be called
260 * at any time to bring the stats for a given proc up-to-date.
261 */
262 static void
updatecpu(Proc * p)263 updatecpu(Proc *p)
264 {
265 int D, n, t, ocpu;
266
267 if(p->edf)
268 return;
269
270 t = sys->ticks*Scaling + Scaling/2;
271 n = t - p->lastupdate;
272 p->lastupdate = t;
273
274 if(n == 0)
275 return;
276 D = schedgain*HZ*Scaling;
277 if(n > D)
278 n = D;
279
280 ocpu = p->cpu;
281 if(p != up)
282 p->cpu = (ocpu*(D-n))/D;
283 else{
284 t = 1000 - ocpu;
285 t = (t*(D-n))/D;
286 p->cpu = 1000 - t;
287 }
288
289 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
290 }
291
292 /*
293 * On average, p has used p->cpu of a cpu recently.
294 * Its fair share is sys.nonline/m->load of a cpu. If it has been getting
295 * too much, penalize it. If it has been getting not enough, reward it.
296 * I don't think you can get much more than your fair share that
297 * often, so most of the queues are for using less. Having a priority
298 * of 3 means you're just right. Having a higher priority (up to p->basepri)
299 * means you're not using as much as you could.
300 */
301 static int
reprioritize(Proc * p)302 reprioritize(Proc *p)
303 {
304 int fairshare, n, load, ratio;
305
306 load = sys->machptr[0]->load;
307 if(load == 0)
308 return p->basepri;
309
310 /*
311 * fairshare = 1.000 * PROCMAX * 1.000/load,
312 * except the decimal point is moved three places
313 * on both load and fairshare.
314 */
315 fairshare = (sys->nonline*1000*1000)/load;
316 n = p->cpu;
317 if(n == 0)
318 n = 1;
319 ratio = (fairshare+n/2) / n;
320 if(ratio > p->basepri)
321 ratio = p->basepri;
322 if(ratio < 0)
323 panic("reprioritize");
324 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
325 return ratio;
326 }
327
328 /*
329 * add a process to a scheduling queue
330 */
331 void
queueproc(Schedq * rq,Proc * p)332 queueproc(Schedq *rq, Proc *p)
333 {
334 int pri;
335
336 pri = rq - runq;
337 lock(runq);
338 p->priority = pri;
339 p->rnext = 0;
340 if(rq->tail)
341 rq->tail->rnext = p;
342 else
343 rq->head = p;
344 rq->tail = p;
345 rq->n++;
346 nrdy++;
347 runvec |= 1<<pri;
348 unlock(runq);
349 }
350
351 /*
352 * try to remove a process from a scheduling queue (called splhi)
353 */
354 Proc*
dequeueproc(Schedq * rq,Proc * tp)355 dequeueproc(Schedq *rq, Proc *tp)
356 {
357 Proc *l, *p;
358
359 if(!canlock(runq))
360 return nil;
361
362 /*
363 * the queue may have changed before we locked runq,
364 * refind the target process.
365 */
366 l = 0;
367 for(p = rq->head; p; p = p->rnext){
368 if(p == tp)
369 break;
370 l = p;
371 }
372
373 /*
374 * p->mach==0 only when process state is saved
375 */
376 if(p == nil || p->mach){
377 unlock(runq);
378 return nil;
379 }
380 if(p->rnext == nil)
381 rq->tail = l;
382 if(l)
383 l->rnext = p->rnext;
384 else
385 rq->head = p->rnext;
386 if(rq->head == nil)
387 runvec &= ~(1<<(rq-runq));
388 rq->n--;
389 nrdy--;
390 if(p->state != Ready)
391 print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]);
392
393 unlock(runq);
394 return p;
395 }
396
397 /*
398 * ready(p) picks a new priority for a process and sticks it in the
399 * runq for that priority.
400 */
401 void
ready(Proc * p)402 ready(Proc *p)
403 {
404 Mreg s;
405 int pri;
406 Schedq *rq;
407 void (*pt)(Proc*, int, vlong, vlong);
408
409 s = splhi();
410 if(edfready(p)){
411 splx(s);
412 return;
413 }
414
415 if(up != p && (p->wired == nil || p->wired == m))
416 m->readied = p; /* group scheduling */
417
418 updatecpu(p);
419 pri = reprioritize(p);
420 p->priority = pri;
421 rq = &runq[pri];
422 p->state = Ready;
423 queueproc(rq, p);
424 pt = proctrace;
425 if(pt)
426 pt(p, SReady, 0, 0);
427 splx(s);
428 }
429
430 /*
431 * yield the processor and drop our priority
432 */
433 void
yield(void)434 yield(void)
435 {
436 if(anyready()){
437 /* pretend we just used 1/2 tick */
438 up->lastupdate -= Scaling/2;
439 sched();
440 }
441 }
442
443 /*
444 * recalculate priorities once a second. We need to do this
445 * since priorities will otherwise only be recalculated when
446 * the running process blocks.
447 */
448 static void
rebalance(void)449 rebalance(void)
450 {
451 Mreg s;
452 int pri, npri, t;
453 Schedq *rq;
454 Proc *p;
455
456 t = m->ticks;
457 if(t - balancetime < HZ)
458 return;
459 balancetime = t;
460
461 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
462 another:
463 p = rq->head;
464 if(p == nil)
465 continue;
466 if(p->mp != m)
467 continue;
468 if(pri == p->basepri)
469 continue;
470 updatecpu(p);
471 npri = reprioritize(p);
472 if(npri != pri){
473 s = splhi();
474 p = dequeueproc(rq, p);
475 if(p)
476 queueproc(&runq[npri], p);
477 splx(s);
478 goto another;
479 }
480 }
481 }
482
483
484 /*
485 * pick a process to run
486 */
487 Proc*
runproc(void)488 runproc(void)
489 {
490 Schedq *rq;
491 Proc *p;
492 ulong start, now;
493 int i;
494 void (*pt)(Proc*, int, vlong, vlong);
495
496 start = perfticks();
497
498 /* cooperative scheduling until the clock ticks */
499 if((p=m->readied) && p->mach==0 && p->state==Ready
500 && (p->wired == nil || p->wired == m)
501 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
502 skipscheds++;
503 rq = &runq[p->priority];
504 goto found;
505 }
506
507 preempts++;
508
509 loop:
510 /*
511 * find a process that last ran on this processor (affinity),
512 * or one that hasn't moved in a while (load balancing). Every
513 * time around the loop affinity goes down.
514 */
515 spllo();
516 for(i = 0;; i++){
517 /*
518 * find the highest priority target process that this
519 * processor can run given affinity constraints.
520 *
521 */
522 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
523 for(p = rq->head; p; p = p->rnext){
524 if(p->mp == nil || p->mp == m
525 || (!p->wired && i > 0))
526 goto found;
527 }
528 }
529
530 /* waste time or halt the CPU */
531 idlehands();
532
533 /* remember how much time we're here */
534 now = perfticks();
535 m->perf.inidle += now-start;
536 start = now;
537 }
538
539 found:
540 splhi();
541 p = dequeueproc(rq, p);
542 if(p == nil)
543 goto loop;
544
545 p->state = Scheding;
546 p->mp = m;
547
548 if(edflock(p)){
549 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */
550 edfunlock();
551 }
552 pt = proctrace;
553 if(pt)
554 pt(p, SRun, 0, 0);
555 return p;
556 }
557
558 int
canpage(Proc * p)559 canpage(Proc *p)
560 {
561 int ok;
562
563 splhi();
564 lock(runq);
565 /* Only reliable way to see if we are Running */
566 if(p->mach == 0) {
567 p->newtlb = 1;
568 ok = 1;
569 }
570 else
571 ok = 0;
572 unlock(runq);
573 spllo();
574
575 return ok;
576 }
577
578 Proc*
newproc(void)579 newproc(void)
580 {
581 Proc *p;
582
583 p = psalloc();
584
585 p->state = Scheding;
586 p->psstate = "New";
587 p->mach = nil;
588 p->qnext = nil;
589 p->nchild = 0;
590 p->nwait = 0;
591 p->waitq = nil;
592 p->parent = nil;
593 p->pgrp = nil;
594 p->egrp = nil;
595 p->fgrp = nil;
596 p->rgrp = nil;
597 p->pdbg = 0;
598 p->kp = 0;
599 if(up != nil && up->procctl == Proc_tracesyscall)
600 p->procctl = Proc_tracesyscall;
601 else
602 p->procctl = 0;
603 p->syscalltrace = nil;
604 p->notepending = 0;
605 p->ureg = nil;
606 p->privatemem = 0;
607 p->errstr = p->errbuf0;
608 p->syserrstr = p->errbuf1;
609 p->errbuf0[0] = '\0';
610 p->errbuf1[0] = '\0';
611 p->nlocks = 0;
612 p->delaysched = 0;
613 p->trace = 0;
614 kstrdup(&p->user, "*nouser");
615 kstrdup(&p->text, "*notext");
616 kstrdup(&p->args, "");
617 p->nargs = 0;
618 p->setargs = 0;
619 memset(p->seg, 0, sizeof p->seg);
620 p->pid = incref(&pidalloc);
621 pshash(p);
622 p->noteid = incref(¬eidalloc);
623 if(p->pid <= 0 || p->noteid <= 0)
624 panic("pidalloc");
625 if(p->kstack == nil)
626 p->kstack = smalloc(KSTACK);
627
628 /* sched params */
629 p->mp = nil;
630 p->wired = nil;
631 procpriority(p, PriNormal, 0);
632 p->cpu = 0;
633 p->lastupdate = sys->ticks*Scaling;
634 p->edf = nil;
635
636 return p;
637 }
638
639 /*
640 * wire this proc to a machine
641 */
642 void
procwired(Proc * p,int bm)643 procwired(Proc *p, int bm)
644 {
645 Proc *pp;
646 int i;
647 char nwired[MACHMAX];
648 Mach *wm, *mp;
649
650 if(bm < 0){
651 /* pick a machine to wire to */
652 memset(nwired, 0, sizeof(nwired));
653 p->wired = nil;
654 for(i=0; (pp = psincref(i)) != nil; i++){
655 wm = pp->wired;
656 if(wm && pp->pid)
657 nwired[wm->machno]++;
658 psdecref(pp);
659 }
660 bm = 0;
661 for(i=0; i<MACHMAX; i++){
662 if((mp = sys->machptr[i]) == nil || !mp->online)
663 continue;
664 if(nwired[i] < nwired[bm])
665 bm = i;
666 }
667 } else {
668 /* use the virtual machine requested */
669 bm = bm % MACHMAX;
670 }
671
672 p->wired = sys->machptr[bm];
673 p->mp = p->wired;
674 }
675
676 void
procpriority(Proc * p,int pri,int fixed)677 procpriority(Proc *p, int pri, int fixed)
678 {
679 if(pri >= Npriq)
680 pri = Npriq - 1;
681 else if(pri < 0)
682 pri = 0;
683 p->basepri = pri;
684 p->priority = pri;
685 if(fixed){
686 p->fixedpri = 1;
687 } else {
688 p->fixedpri = 0;
689 }
690 }
691
692 /*
693 * sleep if a condition is not true. Another process will
694 * awaken us after it sets the condition. When we awaken
695 * the condition may no longer be true.
696 *
697 * we lock both the process and the rendezvous to keep r->p
698 * and p->r synchronized.
699 */
700 void
sleep(Rendez * r,int (* f)(void *),void * arg)701 sleep(Rendez *r, int (*f)(void*), void *arg)
702 {
703 Mreg s;
704 void (*pt)(Proc*, int, vlong, vlong);
705
706 s = splhi();
707
708 if(up->nlocks)
709 print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n",
710 up->pid, up->nlocks, up->lastlock, up->lastlock->pc, getcallerpc(&r));
711 lock(r);
712 lock(&up->rlock);
713 if(r->p){
714 print("double sleep called from %#p, %d %d\n",
715 getcallerpc(&r), r->p->pid, up->pid);
716 dumpstack();
717 }
718
719 /*
720 * Wakeup only knows there may be something to do by testing
721 * r->p in order to get something to lock on.
722 * Flush that information out to memory in case the sleep is
723 * committed.
724 */
725 r->p = up;
726
727 if((*f)(arg) || up->notepending){
728 /*
729 * if condition happened or a note is pending
730 * never mind
731 */
732 r->p = nil;
733 unlock(&up->rlock);
734 unlock(r);
735 } else {
736 /*
737 * now we are committed to
738 * change state and call scheduler
739 */
740 pt = proctrace;
741 if(pt)
742 pt(up, SSleep, 0, 0);
743 up->state = Wakeme;
744 up->r = r;
745
746 /* statistics */
747 m->cs++;
748
749 procsave(up);
750 if(setlabel(&up->sched)) {
751 /*
752 * here when the process is awakened
753 */
754 procrestore(up);
755 spllo();
756 } else {
757 /*
758 * here to go to sleep (i.e. stop Running)
759 */
760 unlock(&up->rlock);
761 unlock(r);
762 gotolabel(&m->sched);
763 }
764 }
765
766 if(up->notepending) {
767 up->notepending = 0;
768 splx(s);
769 if(up->procctl == Proc_exitme && up->closingfgrp)
770 forceclosefgrp();
771 error(Eintr);
772 }
773
774 splx(s);
775 }
776
777 static int
tfn(void * arg)778 tfn(void *arg)
779 {
780 return up->trend == nil || up->tfn(arg);
781 }
782
783 void
twakeup(Ureg *,Timer * t)784 twakeup(Ureg*, Timer *t)
785 {
786 Proc *p;
787 Rendez *trend;
788
789 p = t->ta;
790 trend = p->trend;
791 p->trend = 0;
792 if(trend)
793 wakeup(trend);
794 }
795
796 void
tsleep(Rendez * r,int (* fn)(void *),void * arg,long ms)797 tsleep(Rendez *r, int (*fn)(void*), void *arg, long ms)
798 {
799 if (up->tt){
800 print("tsleep: timer active: mode %d, tf %#p\n",
801 up->tmode, up->tf);
802 timerdel(up);
803 }
804 up->tns = MS2NS(ms);
805 up->tf = twakeup;
806 up->tmode = Trelative;
807 up->ta = up;
808 up->trend = r;
809 up->tfn = fn;
810 timeradd(up);
811
812 if(waserror()){
813 timerdel(up);
814 nexterror();
815 }
816 sleep(r, tfn, arg);
817 if (up->tt)
818 timerdel(up);
819 up->twhen = 0;
820 poperror();
821 }
822
823 /*
824 * Expects that only one process can call wakeup for any given Rendez.
825 * We hold both locks to ensure that r->p and p->r remain consistent.
826 * Richard Miller has a better solution that doesn't require both to
827 * be held simultaneously, but I'm a paranoid - presotto.
828 */
829 Proc*
wakeup(Rendez * r)830 wakeup(Rendez *r)
831 {
832 Mreg s;
833 Proc *p;
834
835 s = splhi();
836
837 lock(r);
838 p = r->p;
839
840 if(p != nil){
841 lock(&p->rlock);
842 if(p->state != Wakeme || p->r != r)
843 panic("wakeup: state");
844 r->p = nil;
845 p->r = nil;
846 ready(p);
847 unlock(&p->rlock);
848 }
849 unlock(r);
850
851 splx(s);
852
853 return p;
854 }
855
856 /*
857 * if waking a sleeping process, this routine must hold both
858 * p->rlock and r->lock. However, it can't know them in
859 * the same order as wakeup causing a possible lock ordering
860 * deadlock. We break the deadlock by giving up the p->rlock
861 * lock if we can't get the r->lock and retrying.
862 */
863 int
postnote(Proc * p,int dolock,char * n,int flag)864 postnote(Proc *p, int dolock, char *n, int flag)
865 {
866 Mreg s;
867 int ret;
868 Rendez *r;
869 Proc *d, **l;
870
871 if(dolock && m->ilockdepth != 0)
872 panic("postnote: caller %#p: ilockdepth %d note %s",
873 getcallerpc(&p), m->ilockdepth, n);
874 if(dolock)
875 qlock(&p->debug);
876
877 if(flag != NUser && (p->notify == nil || p->notified))
878 p->nnote = 0;
879
880 ret = 0;
881 if(p->nnote < NNOTE) {
882 strcpy(p->note[p->nnote].msg, n);
883 p->note[p->nnote++].flag = flag;
884 ret = 1;
885 }
886 p->notepending = 1;
887 if(dolock)
888 qunlock(&p->debug);
889
890 /* this loop is to avoid lock ordering problems. */
891 for(;;){
892 s = splhi();
893 lock(&p->rlock);
894 r = p->r;
895
896 /* waiting for a wakeup? */
897 if(r == nil)
898 break; /* no */
899
900 /* try for the second lock */
901 if(canlock(r)){
902 if(p->state != Wakeme || r->p != p)
903 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
904 p->r = nil;
905 r->p = nil;
906 ready(p);
907 unlock(r);
908 break;
909 }
910
911 /* give other process time to get out of critical section and try again */
912 unlock(&p->rlock);
913 splx(s);
914 sched();
915 }
916 unlock(&p->rlock);
917 splx(s);
918
919 if(p->state != Rendezvous)
920 return ret;
921
922 /* Try and pull out of a rendezvous */
923 lock(p->rgrp);
924 if(p->state == Rendezvous) {
925 p->rendval = ~0;
926 l = &REND(p->rgrp, p->rendtag);
927 for(d = *l; d; d = d->rendhash) {
928 if(d == p) {
929 *l = p->rendhash;
930 break;
931 }
932 l = &d->rendhash;
933 }
934 ready(p);
935 }
936 unlock(p->rgrp);
937 return ret;
938 }
939
940 /*
941 * weird thing: keep at most NBROKEN around
942 */
943 #define NBROKEN 4
944 struct
945 {
946 QLock;
947 int n;
948 Proc *p[NBROKEN];
949 }broken;
950
951 void
addbroken(Proc * p)952 addbroken(Proc *p)
953 {
954 qlock(&broken);
955 if(broken.n == NBROKEN) {
956 ready(broken.p[0]);
957 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
958 --broken.n;
959 }
960 broken.p[broken.n++] = p;
961 qunlock(&broken);
962
963 edfstop(up);
964 p->state = Broken;
965 p->psstate = 0;
966 sched();
967 }
968
969 void
unbreak(Proc * p)970 unbreak(Proc *p)
971 {
972 int b;
973
974 qlock(&broken);
975 for(b=0; b < broken.n; b++)
976 if(broken.p[b] == p) {
977 broken.n--;
978 memmove(&broken.p[b], &broken.p[b+1],
979 sizeof(Proc*)*(NBROKEN-(b+1)));
980 ready(p);
981 break;
982 }
983 qunlock(&broken);
984 }
985
986 int
freebroken(void)987 freebroken(void)
988 {
989 int i, n;
990
991 qlock(&broken);
992 n = broken.n;
993 for(i=0; i<n; i++) {
994 ready(broken.p[i]);
995 broken.p[i] = 0;
996 }
997 broken.n = 0;
998 qunlock(&broken);
999 return n;
1000 }
1001
1002 void
pexit(char * exitstr,int freemem)1003 pexit(char *exitstr, int freemem)
1004 {
1005 Proc *p;
1006 Segment **s, **es;
1007 long utime, stime;
1008 Waitq *wq, *f, *next;
1009 Fgrp *fgrp;
1010 Egrp *egrp;
1011 Rgrp *rgrp;
1012 Pgrp *pgrp;
1013 Chan *dot;
1014 void (*pt)(Proc*, int, vlong, vlong);
1015
1016 if(up->syscalltrace != nil)
1017 free(up->syscalltrace);
1018 up->syscalltrace = nil;
1019 up->alarm = 0;
1020 if (up->tt)
1021 timerdel(up);
1022 pt = proctrace;
1023 if(pt)
1024 pt(up, SDead, 0, 0);
1025
1026 /* nil out all the resources under lock (free later) */
1027 qlock(&up->debug);
1028 fgrp = up->fgrp;
1029 up->fgrp = nil;
1030 egrp = up->egrp;
1031 up->egrp = nil;
1032 rgrp = up->rgrp;
1033 up->rgrp = nil;
1034 pgrp = up->pgrp;
1035 up->pgrp = nil;
1036 dot = up->dot;
1037 up->dot = nil;
1038 qunlock(&up->debug);
1039
1040 if(fgrp)
1041 closefgrp(fgrp);
1042 if(egrp)
1043 closeegrp(egrp);
1044 if(rgrp)
1045 closergrp(rgrp);
1046 if(dot)
1047 cclose(dot);
1048 if(pgrp)
1049 closepgrp(pgrp);
1050
1051 /*
1052 * if not a kernel process and have a parent,
1053 * do some housekeeping.
1054 */
1055 if(up->kp == 0) {
1056 p = up->parent;
1057 if(p == nil) {
1058 if(exitstr == nil)
1059 exitstr = "unknown";
1060 panic("boot process died: %s", exitstr);
1061 }
1062
1063 while(waserror())
1064 ;
1065
1066 wq = smalloc(sizeof(Waitq));
1067 poperror();
1068
1069 wq->w.pid = up->pid;
1070 utime = up->time[TUser] + up->time[TCUser];
1071 stime = up->time[TSys] + up->time[TCSys];
1072 wq->w.time[TUser] = tk2ms(utime);
1073 wq->w.time[TSys] = tk2ms(stime);
1074 wq->w.time[TReal] = tk2ms(sys->ticks - up->time[TReal]);
1075 if(exitstr && exitstr[0])
1076 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s",
1077 up->text, up->pid, exitstr);
1078 else
1079 wq->w.msg[0] = '\0';
1080
1081 lock(&p->exl);
1082 /*
1083 * Check that parent is still alive.
1084 */
1085 if(p->pid == up->parentpid && p->state != Broken) {
1086 p->nchild--;
1087 p->time[TCUser] += utime;
1088 p->time[TCSys] += stime;
1089 /*
1090 * If there would be more than 128 wait records
1091 * processes for my parent, then don't leave a wait
1092 * record behind. This helps prevent badly written
1093 * daemon processes from accumulating lots of wait
1094 * records.
1095 */
1096 if(p->nwait < 128) {
1097 wq->next = p->waitq;
1098 p->waitq = wq;
1099 p->nwait++;
1100 wq = nil;
1101 wakeup(&p->waitr);
1102 }
1103 }
1104 unlock(&p->exl);
1105 if(wq)
1106 free(wq);
1107 }
1108
1109 if(!freemem)
1110 addbroken(up);
1111
1112 qlock(&up->seglock);
1113 es = &up->seg[NSEG];
1114 for(s = up->seg; s < es; s++) {
1115 if(*s) {
1116 putseg(*s);
1117 *s = 0;
1118 }
1119 }
1120 qunlock(&up->seglock);
1121
1122 lock(&up->exl); /* Prevent my children from leaving waits */
1123 psunhash(up);
1124 up->pid = 0;
1125 wakeup(&up->waitr);
1126 unlock(&up->exl);
1127
1128 for(f = up->waitq; f; f = next) {
1129 next = f->next;
1130 free(f);
1131 }
1132
1133 /* release debuggers */
1134 qlock(&up->debug);
1135 if(up->pdbg) {
1136 wakeup(&up->pdbg->sleep);
1137 up->pdbg = 0;
1138 }
1139 qunlock(&up->debug);
1140
1141 edfstop(up);
1142 if(up->edf != nil){
1143 free(up->edf);
1144 up->edf = nil;
1145 }
1146
1147 /* Sched must not loop for these locks */
1148 lock(&procalloc);
1149 lock(&palloc);
1150
1151 up->state = Moribund;
1152 sched();
1153 panic("pexit");
1154 }
1155
1156 int
haswaitq(void * x)1157 haswaitq(void *x)
1158 {
1159 Proc *p;
1160
1161 p = (Proc *)x;
1162 return p->waitq != 0;
1163 }
1164
1165 int
pwait(Waitmsg * w)1166 pwait(Waitmsg *w)
1167 {
1168 int cpid;
1169 Waitq *wq;
1170
1171 if(!canqlock(&up->qwaitr))
1172 error(Einuse);
1173
1174 if(waserror()) {
1175 qunlock(&up->qwaitr);
1176 nexterror();
1177 }
1178
1179 lock(&up->exl);
1180 if(up->nchild == 0 && up->waitq == nil) {
1181 unlock(&up->exl);
1182 error(Enochild);
1183 }
1184 unlock(&up->exl);
1185
1186 sleep(&up->waitr, haswaitq, up);
1187
1188 lock(&up->exl);
1189 wq = up->waitq;
1190 up->waitq = wq->next;
1191 up->nwait--;
1192 unlock(&up->exl);
1193
1194 qunlock(&up->qwaitr);
1195 poperror();
1196
1197 if(w)
1198 memmove(w, &wq->w, sizeof(Waitmsg));
1199 cpid = wq->w.pid;
1200 free(wq);
1201
1202 return cpid;
1203 }
1204
1205 void
dumpaproc(Proc * p)1206 dumpaproc(Proc *p)
1207 {
1208 uintptr bss;
1209 char *s;
1210
1211 if(p == nil)
1212 return;
1213
1214 bss = 0;
1215 if(p->seg[BSEG])
1216 bss = p->seg[BSEG]->top;
1217
1218 s = p->psstate;
1219 if(s == nil)
1220 s = statename[p->state];
1221 print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %lud lpc %#p pri %lud\n",
1222 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
1223 p->time[0], p->time[1], bss, p->qpc, p->nlocks,
1224 p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
1225 }
1226
1227 void
procdump(void)1228 procdump(void)
1229 {
1230 int i;
1231 Proc *p;
1232
1233 if(up != nil)
1234 print("up %d\n", up->pid);
1235 else
1236 print("no current process\n");
1237 for(i=0; (p = psincref(i)) != nil; i++) {
1238 if(p->state != Dead)
1239 dumpaproc(p);
1240 psdecref(p);
1241 }
1242 }
1243
1244 /*
1245 * wait till all processes have flushed their mmu
1246 * state about segment s
1247 */
1248 void
procflushseg(Segment * s)1249 procflushseg(Segment *s)
1250 {
1251 int i, ns, nm, nwait;
1252 Proc *p;
1253 Mach *mp;
1254
1255 /*
1256 * tell all processes with this
1257 * segment to flush their mmu's
1258 */
1259 nwait = 0;
1260 for(i=0; (p = psincref(i)) != nil; i++) {
1261 if(p->state == Dead){
1262 psdecref(p);
1263 continue;
1264 }
1265 for(ns = 0; ns < NSEG; ns++){
1266 if(p->seg[ns] == s){
1267 p->newtlb = 1;
1268 for(nm = 0; nm < MACHMAX; nm++){
1269 if((mp = sys->machptr[nm]) == nil || !mp->online)
1270 continue;
1271 if(mp->proc == p){
1272 mp->mmuflush = 1;
1273 nwait++;
1274 }
1275 }
1276 break;
1277 }
1278 }
1279 psdecref(p);
1280 }
1281
1282 if(nwait == 0)
1283 return;
1284
1285 /*
1286 * wait for all processors to take a clock interrupt
1287 * and flush their mmu's
1288 */
1289 for(i = 0; i < MACHMAX; i++){
1290 if((mp = sys->machptr[i]) == nil || !mp->online || mp == m)
1291 continue;
1292 while(mp->mmuflush)
1293 sched();
1294 }
1295 }
1296
1297 void
scheddump(void)1298 scheddump(void)
1299 {
1300 Proc *p;
1301 Schedq *rq;
1302
1303 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
1304 if(rq->head == 0)
1305 continue;
1306 print("rq%ld:", rq-runq);
1307 for(p = rq->head; p; p = p->rnext)
1308 print(" %d(%lud)", p->pid, m->ticks - p->readytime);
1309 print("\n");
1310 delay(150);
1311 }
1312 print("nrdy %d\n", nrdy);
1313 }
1314
1315 void
kproc(char * name,void (* func)(void *),void * arg)1316 kproc(char *name, void (*func)(void *), void *arg)
1317 {
1318 Proc *p;
1319 static Pgrp *kpgrp;
1320
1321 p = newproc();
1322 p->psstate = 0;
1323 p->procmode = 0640;
1324 p->kp = 1;
1325
1326 p->scallnr = up->scallnr;
1327 memmove(p->arg, up->arg, sizeof(up->arg));
1328 p->nerrlab = 0;
1329 p->slash = up->slash;
1330 p->dot = up->dot;
1331 if(p->dot)
1332 incref(p->dot);
1333
1334 memmove(p->note, up->note, sizeof(p->note));
1335 p->nnote = up->nnote;
1336 p->notified = 0;
1337 p->lastnote = up->lastnote;
1338 p->notify = up->notify;
1339 p->ureg = nil;
1340 p->dbgreg = nil;
1341
1342 procpriority(p, PriKproc, 0);
1343
1344 kprocchild(p, func, arg);
1345
1346 kstrdup(&p->user, eve);
1347 kstrdup(&p->text, name);
1348 if(kpgrp == nil)
1349 kpgrp = newpgrp();
1350 p->pgrp = kpgrp;
1351 incref(kpgrp);
1352
1353 memset(p->time, 0, sizeof(p->time));
1354 p->time[TReal] = sys->ticks;
1355 ready(p);
1356 }
1357
1358 /*
1359 * called splhi() by notify(). See comment in notify for the
1360 * reasoning.
1361 */
1362 void
procctl(Proc * p)1363 procctl(Proc *p)
1364 {
1365 Mreg s;
1366 char *state;
1367
1368 switch(p->procctl) {
1369 case Proc_exitbig:
1370 spllo();
1371 pexit("Killed: Insufficient physical memory", 1);
1372
1373 case Proc_exitme:
1374 spllo(); /* pexit has locks in it */
1375 pexit("Killed", 1);
1376
1377 case Proc_traceme:
1378 if(p->nnote == 0)
1379 return;
1380 /* No break */
1381
1382 case Proc_stopme:
1383 p->procctl = 0;
1384 state = p->psstate;
1385 p->psstate = "Stopped";
1386 /* free a waiting debugger */
1387 s = spllo();
1388 qlock(&p->debug);
1389 if(p->pdbg) {
1390 wakeup(&p->pdbg->sleep);
1391 p->pdbg = 0;
1392 }
1393 qunlock(&p->debug);
1394 splhi();
1395 p->state = Stopped;
1396 sched();
1397 p->psstate = state;
1398 splx(s);
1399 return;
1400 }
1401 }
1402
1403 void
error(char * err)1404 error(char *err)
1405 {
1406 spllo();
1407
1408 assert(up->nerrlab < NERR);
1409 kstrcpy(up->errstr, err, ERRMAX);
1410 setlabel(&up->errlab[NERR-1]);
1411 nexterror();
1412 }
1413
1414 void
nexterror(void)1415 nexterror(void)
1416 {
1417 gotolabel(&up->errlab[--up->nerrlab]);
1418 }
1419
1420 void
exhausted(char * resource)1421 exhausted(char *resource)
1422 {
1423 char buf[ERRMAX];
1424
1425 snprint(buf, sizeof buf, "no free %s", resource);
1426 iprint("%s\n", buf);
1427 error(buf);
1428 }
1429
1430 void
killbig(char * why)1431 killbig(char *why)
1432 {
1433 int i, x;
1434 Segment *s;
1435 ulong l, max;
1436 Proc *p, *kp;
1437
1438 max = 0;
1439 kp = nil;
1440 for(x = 0; (p = psincref(x)) != nil; x++) {
1441 if(p->state == Dead || p->kp){
1442 psdecref(p);
1443 continue;
1444 }
1445 l = 0;
1446 for(i=1; i<NSEG; i++) {
1447 s = p->seg[i];
1448 if(s != 0)
1449 l += s->top - s->base;
1450 }
1451 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
1452 if(kp != nil)
1453 psdecref(kp);
1454 kp = p;
1455 max = l;
1456 }
1457 else
1458 psdecref(p);
1459 }
1460 if(kp == nil)
1461 return;
1462
1463 print("%d: %s killed: %s\n", kp->pid, kp->text, why);
1464 for(x = 0; (p = psincref(x)) != nil; x++) {
1465 if(p->state == Dead || p->kp){
1466 psdecref(p);
1467 continue;
1468 }
1469 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
1470 p->procctl = Proc_exitbig;
1471 psdecref(p);
1472 }
1473
1474 kp->procctl = Proc_exitbig;
1475 for(i = 0; i < NSEG; i++) {
1476 s = kp->seg[i];
1477 if(s != nil && canqlock(&s->lk)) {
1478 mfreeseg(s, s->base, s->top);
1479 qunlock(&s->lk);
1480 }
1481 }
1482 psdecref(kp);
1483 }
1484
1485 /*
1486 * change ownership to 'new' of all processes owned by 'old'. Used when
1487 * eve changes.
1488 */
1489 void
renameuser(char * old,char * new)1490 renameuser(char *old, char *new)
1491 {
1492 int i;
1493 Proc *p;
1494
1495 for(i = 0; (p = psincref(i)) != nil; i++){
1496 if(p->user!=nil && strcmp(old, p->user)==0)
1497 kstrdup(&p->user, new);
1498 psdecref(p);
1499 }
1500 }
1501
1502 /*
1503 * time accounting called by clock() splhi'd
1504 */
1505 void
accounttime(void)1506 accounttime(void)
1507 {
1508 Proc *p;
1509 ulong n, per;
1510 static ulong nrun;
1511
1512 p = m->proc;
1513 if(p != nil) {
1514 nrun++;
1515 p->time[p->insyscall]++;
1516 }
1517
1518 /* calculate decaying duty cycles */
1519 n = perfticks();
1520 per = n - m->perf.last;
1521 m->perf.last = n;
1522 per = (m->perf.period*(HZ-1) + per)/HZ;
1523 if(per != 0)
1524 m->perf.period = per;
1525
1526 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
1527 m->perf.inidle = 0;
1528
1529 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
1530 m->perf.inintr = 0;
1531
1532 /* only one processor gets to compute system load averages */
1533 if(m->machno != 0)
1534 return;
1535
1536 /*
1537 * calculate decaying load average.
1538 * if we decay by (n-1)/n then it takes
1539 * n clock ticks to go from load L to .36 L once
1540 * things quiet down. it takes about 5 n clock
1541 * ticks to go to zero. so using HZ means this is
1542 * approximately the load over the last second,
1543 * with a tail lasting about 5 seconds.
1544 */
1545 n = nrun;
1546 nrun = 0;
1547 n = (nrdy+n)*1000;
1548 m->load = (m->load*(HZ-1)+n)/HZ;
1549 }
1550
1551