1 #include <u.h>
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7 #include "../port/edf.h"
8 #include <trace.h>
9
10 int schedgain = 30; /* units in seconds */
11 int nrdy;
12 Ref noteidalloc;
13
14 void updatecpu(Proc*);
15 int reprioritize(Proc*);
16
17 ulong delayedscheds; /* statistics */
18 long skipscheds;
19 long preempts;
20 ulong load;
21
22 static Ref pidalloc;
23
24 static struct Procalloc
25 {
26 Lock;
27 Proc* ht[128];
28 Proc* arena;
29 Proc* free;
30 } procalloc;
31
32 enum
33 {
34 Q=10,
35 DQ=4,
36 Scaling=2,
37 };
38
39 Schedq runq[Nrq];
40 ulong runvec;
41
42 char *statename[] =
43 { /* BUG: generate automatically */
44 "Dead",
45 "Moribund",
46 "Ready",
47 "Scheding",
48 "Running",
49 "Queueing",
50 "QueueingR",
51 "QueueingW",
52 "Wakeme",
53 "Broken",
54 "Stopped",
55 "Rendez",
56 "Waitrelease",
57 };
58
59 static void pidhash(Proc*);
60 static void pidunhash(Proc*);
61 static void rebalance(void);
62
63 /*
64 * Always splhi()'ed.
65 */
66 void
schedinit(void)67 schedinit(void) /* never returns */
68 {
69 Edf *e;
70
71 setlabel(&m->sched);
72 if(up) {
73 if((e = up->edf) && (e->flags & Admitted))
74 edfrecord(up);
75 m->proc = 0;
76 switch(up->state) {
77 case Running:
78 ready(up);
79 break;
80 case Moribund:
81 up->state = Dead;
82 edfstop(up);
83 if (up->edf)
84 free(up->edf);
85 up->edf = nil;
86
87 /*
88 * Holding locks from pexit:
89 * procalloc
90 * palloc
91 */
92 mmurelease(up);
93
94 up->qnext = procalloc.free;
95 procalloc.free = up;
96
97 unlock(&palloc);
98 unlock(&procalloc);
99 break;
100 }
101 up->mach = nil;
102 updatecpu(up);
103 up = nil;
104 }
105 sched();
106 }
107
108 /*
109 * If changing this routine, look also at sleep(). It
110 * contains a copy of the guts of sched().
111 */
112 void
sched(void)113 sched(void)
114 {
115 Proc *p;
116
117 if(m->ilockdepth)
118 panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
119 m->machno,
120 m->ilockdepth,
121 up? up->lastilock: nil,
122 (up && up->lastilock)? up->lastilock->pc: 0,
123 getcallerpc(&p+2));
124 if(up){
125 /*
126 * Delay the sched until the process gives up the locks
127 * it is holding. This avoids dumb lock loops.
128 * Don't delay if the process is Moribund.
129 * It called sched to die.
130 * But do sched eventually. This avoids a missing unlock
131 * from hanging the entire kernel.
132 * But don't reschedule procs holding palloc or procalloc.
133 * Those are far too important to be holding while asleep.
134 *
135 * This test is not exact. There can still be a few instructions
136 * in the middle of taslock when a process holds a lock
137 * but Lock.p has not yet been initialized.
138 */
139 if(up->nlocks.ref)
140 if(up->state != Moribund)
141 if(up->delaysched < 20
142 || palloc.Lock.p == up
143 || procalloc.Lock.p == up){
144 up->delaysched++;
145 delayedscheds++;
146 return;
147 }
148 up->delaysched = 0;
149
150 splhi();
151
152 /* statistics */
153 m->cs++;
154
155 procsave(up);
156 if(setlabel(&up->sched)){
157 procrestore(up);
158 spllo();
159 return;
160 }
161 gotolabel(&m->sched);
162 }
163 p = runproc();
164 if(!p->edf){
165 updatecpu(p);
166 p->priority = reprioritize(p);
167 }
168 if(p != m->readied)
169 m->schedticks = m->ticks + HZ/10;
170 m->readied = 0;
171 up = p;
172 up->state = Running;
173 up->mach = MACHP(m->machno);
174 m->proc = up;
175 mmuswitch(up);
176 gotolabel(&up->sched);
177 }
178
179 int
anyready(void)180 anyready(void)
181 {
182 return runvec;
183 }
184
185 int
anyhigher(void)186 anyhigher(void)
187 {
188 return runvec & ~((1<<(up->priority+1))-1);
189 }
190
191 /*
192 * here once per clock tick to see if we should resched
193 */
194 void
hzsched(void)195 hzsched(void)
196 {
197 /* once a second, rebalance will reprioritize ready procs */
198 if(m->machno == 0)
199 rebalance();
200
201 /* unless preempted, get to run for at least 100ms */
202 if(anyhigher()
203 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){
204 m->readied = nil; /* avoid cooperative scheduling */
205 up->delaysched++;
206 }
207 }
208
209 /*
210 * here at the end of non-clock interrupts to see if we should preempt the
211 * current process. Returns 1 if preempted, 0 otherwise.
212 */
213 int
preempted(void)214 preempted(void)
215 {
216 if(up && up->state == Running)
217 if(up->preempted == 0)
218 if(anyhigher())
219 if(!active.exiting){
220 m->readied = nil; /* avoid cooperative scheduling */
221 up->preempted = 1;
222 sched();
223 splhi();
224 up->preempted = 0;
225 return 1;
226 }
227 return 0;
228 }
229
230 /*
231 * Update the cpu time average for this particular process,
232 * which is about to change from up -> not up or vice versa.
233 * p->lastupdate is the last time an updatecpu happened.
234 *
235 * The cpu time average is a decaying average that lasts
236 * about D clock ticks. D is chosen to be approximately
237 * the cpu time of a cpu-intensive "quick job". A job has to run
238 * for approximately D clock ticks before we home in on its
239 * actual cpu usage. Thus if you manage to get in and get out
240 * quickly, you won't be penalized during your burst. Once you
241 * start using your share of the cpu for more than about D
242 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
243 * below all the other quick jobs. Interactive tasks, because
244 * they basically always use less than their fair share of cpu,
245 * will be rewarded.
246 *
247 * If the process has not been running, then we want to
248 * apply the filter
249 *
250 * cpu = cpu * (D-1)/D
251 *
252 * n times, yielding
253 *
254 * cpu = cpu * ((D-1)/D)^n
255 *
256 * but D is big enough that this is approximately
257 *
258 * cpu = cpu * (D-n)/D
259 *
260 * so we use that instead.
261 *
262 * If the process has been running, we apply the filter to
263 * 1 - cpu, yielding a similar equation. Note that cpu is
264 * stored in fixed point (* 1000).
265 *
266 * Updatecpu must be called before changing up, in order
267 * to maintain accurate cpu usage statistics. It can be called
268 * at any time to bring the stats for a given proc up-to-date.
269 */
270 void
updatecpu(Proc * p)271 updatecpu(Proc *p)
272 {
273 int n, t, ocpu;
274 int D = schedgain*HZ*Scaling;
275
276 if(p->edf)
277 return;
278
279 t = MACHP(0)->ticks*Scaling + Scaling/2;
280 n = t - p->lastupdate;
281 p->lastupdate = t;
282
283 if(n == 0)
284 return;
285 if(n > D)
286 n = D;
287
288 ocpu = p->cpu;
289 if(p != up)
290 p->cpu = (ocpu*(D-n))/D;
291 else{
292 t = 1000 - ocpu;
293 t = (t*(D-n))/D;
294 p->cpu = 1000 - t;
295 }
296
297 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
298 }
299
300 /*
301 * On average, p has used p->cpu of a cpu recently.
302 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting
303 * too much, penalize it. If it has been getting not enough, reward it.
304 * I don't think you can get much more than your fair share that
305 * often, so most of the queues are for using less. Having a priority
306 * of 3 means you're just right. Having a higher priority (up to p->basepri)
307 * means you're not using as much as you could.
308 */
309 int
reprioritize(Proc * p)310 reprioritize(Proc *p)
311 {
312 int fairshare, n, load, ratio;
313
314 load = MACHP(0)->load;
315 if(load == 0)
316 return p->basepri;
317
318 /*
319 * fairshare = 1.000 * conf.nproc * 1.000/load,
320 * except the decimal point is moved three places
321 * on both load and fairshare.
322 */
323 fairshare = (conf.nmach*1000*1000)/load;
324 n = p->cpu;
325 if(n == 0)
326 n = 1;
327 ratio = (fairshare+n/2) / n;
328 if(ratio > p->basepri)
329 ratio = p->basepri;
330 if(ratio < 0)
331 panic("reprioritize");
332 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
333 return ratio;
334 }
335
336 /*
337 * add a process to a scheduling queue
338 */
339 void
queueproc(Schedq * rq,Proc * p)340 queueproc(Schedq *rq, Proc *p)
341 {
342 int pri;
343
344 pri = rq - runq;
345 lock(runq);
346 p->priority = pri;
347 p->rnext = 0;
348 if(rq->tail)
349 rq->tail->rnext = p;
350 else
351 rq->head = p;
352 rq->tail = p;
353 rq->n++;
354 nrdy++;
355 runvec |= 1<<pri;
356 unlock(runq);
357 }
358
359 /*
360 * try to remove a process from a scheduling queue (called splhi)
361 */
362 Proc*
dequeueproc(Schedq * rq,Proc * tp)363 dequeueproc(Schedq *rq, Proc *tp)
364 {
365 Proc *l, *p;
366
367 if(!canlock(runq))
368 return nil;
369
370 /*
371 * the queue may have changed before we locked runq,
372 * refind the target process.
373 */
374 l = 0;
375 for(p = rq->head; p; p = p->rnext){
376 if(p == tp)
377 break;
378 l = p;
379 }
380
381 /*
382 * p->mach==0 only when process state is saved
383 */
384 if(p == 0 || p->mach){
385 unlock(runq);
386 return nil;
387 }
388 if(p->rnext == 0)
389 rq->tail = l;
390 if(l)
391 l->rnext = p->rnext;
392 else
393 rq->head = p->rnext;
394 if(rq->head == nil)
395 runvec &= ~(1<<(rq-runq));
396 rq->n--;
397 nrdy--;
398 if(p->state != Ready)
399 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
400
401 unlock(runq);
402 return p;
403 }
404
405 /*
406 * ready(p) picks a new priority for a process and sticks it in the
407 * runq for that priority.
408 */
409 void
ready(Proc * p)410 ready(Proc *p)
411 {
412 int s, pri;
413 Schedq *rq;
414 void (*pt)(Proc*, int, vlong);
415
416 s = splhi();
417 if(edfready(p)){
418 splx(s);
419 return;
420 }
421
422 /*
423 * since the 386 is short of registers, m always contains the constant
424 * MACHADDR, not MACHP(m->machno); see ../pc/dat.h. so we can't just
425 * compare addresses with m.
426 */
427 if(up != p && (p->wired == nil || p->wired == MACHP(m->machno)))
428 m->readied = p; /* group scheduling */
429
430 updatecpu(p);
431 pri = reprioritize(p);
432 p->priority = pri;
433 rq = &runq[pri];
434 p->state = Ready;
435 queueproc(rq, p);
436 pt = proctrace;
437 if(pt)
438 pt(p, SReady, 0);
439 splx(s);
440 }
441
442 /*
443 * yield the processor and drop our priority
444 */
445 void
yield(void)446 yield(void)
447 {
448 if(anyready()){
449 /* pretend we just used 1/2 tick */
450 up->lastupdate -= Scaling/2;
451 sched();
452 }
453 }
454
455 /*
456 * recalculate priorities once a second. We need to do this
457 * since priorities will otherwise only be recalculated when
458 * the running process blocks.
459 */
460 ulong balancetime;
461
462 static void
rebalance(void)463 rebalance(void)
464 {
465 int pri, npri, t, x;
466 Schedq *rq;
467 Proc *p;
468
469 t = m->ticks;
470 if(t - balancetime < HZ)
471 return;
472 balancetime = t;
473
474 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
475 another:
476 p = rq->head;
477 if(p == nil)
478 continue;
479 if(pri == p->basepri)
480 continue;
481 updatecpu(p);
482 npri = reprioritize(p);
483 if(npri != pri){
484 x = splhi();
485 p = dequeueproc(rq, p);
486 if(p)
487 queueproc(&runq[npri], p);
488 splx(x);
489 goto another;
490 }
491 }
492 }
493
494
495 /*
496 * pick a process to run
497 */
498 Proc*
runproc(void)499 runproc(void)
500 {
501 Schedq *rq;
502 Proc *p;
503 ulong start, now;
504 int i;
505 void (*pt)(Proc*, int, vlong);
506
507 start = perfticks();
508
509 /* cooperative scheduling until the clock ticks */
510 /*
511 * since the 386 is short of registers, m always contains the constant
512 * MACHADDR, not MACHP(m->machno); see ../pc/dat.h. so we can't just
513 * compare addresses with m.
514 */
515 if((p=m->readied) && p->mach==0 && p->state==Ready
516 && (p->wired == nil || p->wired == MACHP(m->machno))
517 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
518 skipscheds++;
519 rq = &runq[p->priority];
520 goto found;
521 }
522
523 preempts++;
524
525 loop:
526 /*
527 * find a process that last ran on this processor (affinity),
528 * or one that hasn't moved in a while (load balancing). Every
529 * time around the loop affinity goes down.
530 */
531 spllo();
532 for(i = 0;; i++){
533 /*
534 * find the highest priority target process that this
535 * processor can run given affinity constraints.
536 *
537 */
538 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
539 for(p = rq->head; p; p = p->rnext){
540 if(p->mp == nil || p->mp == MACHP(m->machno)
541 || (!p->wired && i > 0))
542 goto found;
543 }
544 }
545
546 /* waste time or halt the CPU */
547 idlehands();
548
549 /* remember how much time we're here */
550 now = perfticks();
551 m->perf.inidle += now-start;
552 start = now;
553 }
554
555 found:
556 splhi();
557 p = dequeueproc(rq, p);
558 if(p == nil)
559 goto loop;
560
561 p->state = Scheding;
562 p->mp = MACHP(m->machno);
563
564 if(edflock(p)){
565 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */
566 edfunlock();
567 }
568 pt = proctrace;
569 if(pt)
570 pt(p, SRun, 0);
571 return p;
572 }
573
574 int
canpage(Proc * p)575 canpage(Proc *p)
576 {
577 int ok = 0;
578
579 splhi();
580 lock(runq);
581 /* Only reliable way to see if we are Running */
582 if(p->mach == 0) {
583 p->newtlb = 1;
584 ok = 1;
585 }
586 unlock(runq);
587 spllo();
588
589 return ok;
590 }
591
592 /*
593 * these assume that active.Lock is held when needed.
594 */
595
596 void
cpuactive(uint cpu)597 cpuactive(uint cpu)
598 {
599 #ifdef MACHS_BITMAP
600 active.machsmap[cpu / BI2WD] |= 1ULL << (cpu % BI2WD);
601 active.nmachs++;
602 #else
603 active.machs |= 1ULL << cpu;
604 #endif
605 }
606
607 void
cpuinactive(uint cpu)608 cpuinactive(uint cpu)
609 {
610 #ifdef MACHS_BITMAP
611 active.machsmap[cpu / BI2WD] &= ~(1ULL << (cpu % BI2WD));
612 active.nmachs--;
613 #else
614 active.machs &= ~(1ULL << cpu);
615 #endif
616 }
617
618 int
iscpuactive(uint cpu)619 iscpuactive(uint cpu)
620 {
621 #ifdef MACHS_BITMAP
622 return (active.machsmap[cpu / BI2WD] & (1ULL << (cpu % BI2WD))) != 0;
623 #else
624 return (active.machs & (1ULL << cpu)) != 0;
625 #endif
626 }
627
628
629 void
noprocpanic(char * msg)630 noprocpanic(char *msg)
631 {
632 /*
633 * setting exiting will make hzclock() on each processor call exit(0).
634 * clearing our bit in machs avoids calling exit(0) from hzclock()
635 * on this processor.
636 */
637 lock(&active);
638 cpuinactive(m->machno);
639 active.exiting = 1;
640 unlock(&active);
641
642 procdump();
643 delay(1000);
644 panic(msg);
645 }
646
647 Proc*
newproc(void)648 newproc(void)
649 {
650 char msg[64];
651 Proc *p;
652
653 lock(&procalloc);
654 while((p = procalloc.free) == nil) {
655 unlock(&procalloc);
656
657 snprint(msg, sizeof msg, "no procs; %s forking",
658 up? up->text: "kernel");
659 /*
660 * the situation is unlikely to heal itself.
661 * dump the proc table and restart by default.
662 * *noprocspersist in plan9.ini will yield the old
663 * behaviour of trying forever.
664 */
665 if(getconf("*noprocspersist") == nil)
666 noprocpanic(msg);
667 resrcwait(msg);
668 lock(&procalloc);
669 }
670 procalloc.free = p->qnext;
671 unlock(&procalloc);
672
673 p->state = Scheding;
674 p->psstate = "New";
675 p->mach = 0;
676 p->qnext = 0;
677 p->nchild = 0;
678 p->nwait = 0;
679 p->waitq = 0;
680 p->parent = 0;
681 p->pgrp = 0;
682 p->egrp = 0;
683 p->fgrp = 0;
684 p->rgrp = 0;
685 p->pdbg = 0;
686 p->fpstate = FPinit;
687 p->kp = 0;
688 if(up && up->procctl == Proc_tracesyscall)
689 p->procctl = Proc_tracesyscall;
690 else
691 p->procctl = 0;
692 p->syscalltrace = 0;
693 p->notepending = 0;
694 p->ureg = 0;
695 p->privatemem = 0;
696 p->noswap = 0;
697 p->errstr = p->errbuf0;
698 p->syserrstr = p->errbuf1;
699 p->errbuf0[0] = '\0';
700 p->errbuf1[0] = '\0';
701 p->nlocks.ref = 0;
702 p->delaysched = 0;
703 p->trace = 0;
704 kstrdup(&p->user, "*nouser");
705 kstrdup(&p->text, "*notext");
706 kstrdup(&p->args, "");
707 p->nargs = 0;
708 p->setargs = 0;
709 memset(p->seg, 0, sizeof p->seg);
710 p->pid = incref(&pidalloc);
711 pidhash(p);
712 p->noteid = incref(¬eidalloc);
713 if(p->pid==0 || p->noteid==0)
714 panic("pidalloc");
715 if(p->kstack == 0)
716 p->kstack = smalloc(KSTACK);
717
718 /* sched params */
719 p->mp = 0;
720 p->wired = 0;
721 procpriority(p, PriNormal, 0);
722 p->cpu = 0;
723 p->lastupdate = MACHP(0)->ticks*Scaling;
724 p->edf = nil;
725
726 return p;
727 }
728
729 /*
730 * wire this proc to a machine
731 */
732 void
procwired(Proc * p,int bm)733 procwired(Proc *p, int bm)
734 {
735 Proc *pp;
736 int i;
737 char nwired[MAXMACH];
738 Mach *wm;
739
740 if(bm < 0){
741 /* pick a machine to wire to */
742 memset(nwired, 0, sizeof(nwired));
743 p->wired = 0;
744 pp = proctab(0);
745 for(i=0; i<conf.nproc; i++, pp++){
746 wm = pp->wired;
747 if(wm && pp->pid)
748 nwired[wm->machno]++;
749 }
750 bm = 0;
751 for(i=0; i<conf.nmach; i++)
752 if(nwired[i] < nwired[bm])
753 bm = i;
754 } else {
755 /* use the virtual machine requested */
756 bm = bm % conf.nmach;
757 }
758
759 p->wired = MACHP(bm);
760 p->mp = p->wired;
761 }
762
763 void
procpriority(Proc * p,int pri,int fixed)764 procpriority(Proc *p, int pri, int fixed)
765 {
766 if(pri >= Npriq)
767 pri = Npriq - 1;
768 else if(pri < 0)
769 pri = 0;
770 p->basepri = pri;
771 p->priority = pri;
772 if(fixed){
773 p->fixedpri = 1;
774 } else {
775 p->fixedpri = 0;
776 }
777 }
778
779 void
procinit0(void)780 procinit0(void) /* bad planning - clashes with devproc.c */
781 {
782 Proc *p;
783 int i;
784
785 procalloc.free = xalloc(conf.nproc*sizeof(Proc));
786 if(procalloc.free == nil){
787 xsummary();
788 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
789 }
790 procalloc.arena = procalloc.free;
791
792 p = procalloc.free;
793 for(i=0; i<conf.nproc-1; i++,p++)
794 p->qnext = p+1;
795 p->qnext = 0;
796 }
797
798 /*
799 * sleep if a condition is not true. Another process will
800 * awaken us after it sets the condition. When we awaken
801 * the condition may no longer be true.
802 *
803 * we lock both the process and the rendezvous to keep r->p
804 * and p->r synchronized.
805 */
806 void
sleep(Rendez * r,int (* f)(void *),void * arg)807 sleep(Rendez *r, int (*f)(void*), void *arg)
808 {
809 int s;
810 void (*pt)(Proc*, int, vlong);
811
812 s = splhi();
813
814 if(up->nlocks.ref)
815 print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n",
816 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r));
817 lock(r);
818 lock(&up->rlock);
819 if(r->p){
820 print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid);
821 dumpstack();
822 }
823
824 /*
825 * Wakeup only knows there may be something to do by testing
826 * r->p in order to get something to lock on.
827 * Flush that information out to memory in case the sleep is
828 * committed.
829 */
830 r->p = up;
831
832 if((*f)(arg) || up->notepending){
833 /*
834 * if condition happened or a note is pending
835 * never mind
836 */
837 r->p = nil;
838 unlock(&up->rlock);
839 unlock(r);
840 } else {
841 /*
842 * now we are committed to
843 * change state and call scheduler
844 */
845 pt = proctrace;
846 if(pt)
847 pt(up, SSleep, 0);
848 up->state = Wakeme;
849 up->r = r;
850
851 /* statistics */
852 m->cs++;
853
854 procsave(up);
855 if(setlabel(&up->sched)) {
856 /*
857 * here when the process is awakened
858 */
859 procrestore(up);
860 spllo();
861 } else {
862 /*
863 * here to go to sleep (i.e. stop Running)
864 */
865 unlock(&up->rlock);
866 unlock(r);
867 gotolabel(&m->sched);
868 }
869 }
870
871 if(up->notepending) {
872 up->notepending = 0;
873 splx(s);
874 if(up->procctl == Proc_exitme && up->closingfgrp)
875 forceclosefgrp();
876 error(Eintr);
877 }
878
879 splx(s);
880 }
881
882 static int
tfn(void * arg)883 tfn(void *arg)
884 {
885 return up->trend == nil || up->tfn(arg);
886 }
887
888 void
twakeup(Ureg *,Timer * t)889 twakeup(Ureg*, Timer *t)
890 {
891 Proc *p;
892 Rendez *trend;
893
894 p = t->ta;
895 trend = p->trend;
896 p->trend = 0;
897 if(trend)
898 wakeup(trend);
899 }
900
901 void
tsleep(Rendez * r,int (* fn)(void *),void * arg,ulong ms)902 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
903 {
904 if (up->tt){
905 print("tsleep: timer active: mode %d, tf %#p\n", up->tmode, up->tf);
906 timerdel(up);
907 }
908 up->tns = MS2NS(ms);
909 up->tf = twakeup;
910 up->tmode = Trelative;
911 up->ta = up;
912 up->trend = r;
913 up->tfn = fn;
914 timeradd(up);
915
916 if(waserror()){
917 timerdel(up);
918 nexterror();
919 }
920 sleep(r, tfn, arg);
921 if(up->tt)
922 timerdel(up);
923 up->twhen = 0;
924 poperror();
925 }
926
927 /*
928 * Expects that only one process can call wakeup for any given Rendez.
929 * We hold both locks to ensure that r->p and p->r remain consistent.
930 * Richard Miller has a better solution that doesn't require both to
931 * be held simultaneously, but I'm a paranoid - presotto.
932 */
933 Proc*
wakeup(Rendez * r)934 wakeup(Rendez *r)
935 {
936 Proc *p;
937 int s;
938
939 s = splhi();
940
941 lock(r);
942 p = r->p;
943
944 if(p != nil){
945 lock(&p->rlock);
946 if(p->state != Wakeme || p->r != r){
947 iprint("%p %p %d\n", p->r, r, p->state);
948 panic("wakeup: state");
949 }
950 r->p = nil;
951 p->r = nil;
952 ready(p);
953 unlock(&p->rlock);
954 }
955 unlock(r);
956
957 splx(s);
958
959 return p;
960 }
961
962 /*
963 * if waking a sleeping process, this routine must hold both
964 * p->rlock and r->lock. However, it can't know them in
965 * the same order as wakeup causing a possible lock ordering
966 * deadlock. We break the deadlock by giving up the p->rlock
967 * lock if we can't get the r->lock and retrying.
968 */
969 int
postnote(Proc * p,int dolock,char * n,int flag)970 postnote(Proc *p, int dolock, char *n, int flag)
971 {
972 int s, ret;
973 Rendez *r;
974 Proc *d, **l;
975
976 if(dolock)
977 qlock(&p->debug);
978
979 if(flag != NUser && (p->notify == 0 || p->notified))
980 p->nnote = 0;
981
982 ret = 0;
983 if(p->nnote < NNOTE) {
984 strcpy(p->note[p->nnote].msg, n);
985 p->note[p->nnote++].flag = flag;
986 ret = 1;
987 }
988 p->notepending = 1;
989 if(dolock)
990 qunlock(&p->debug);
991
992 /* this loop is to avoid lock ordering problems. */
993 for(;;){
994 s = splhi();
995 lock(&p->rlock);
996 r = p->r;
997
998 /* waiting for a wakeup? */
999 if(r == nil)
1000 break; /* no */
1001
1002 /* try for the second lock */
1003 if(canlock(r)){
1004 if(p->state != Wakeme || r->p != p)
1005 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
1006 p->r = nil;
1007 r->p = nil;
1008 ready(p);
1009 unlock(r);
1010 break;
1011 }
1012
1013 /* give other process time to get out of critical section and try again */
1014 unlock(&p->rlock);
1015 splx(s);
1016 sched();
1017 }
1018 unlock(&p->rlock);
1019 splx(s);
1020
1021 if(p->state != Rendezvous)
1022 return ret;
1023
1024 /* Try and pull out of a rendezvous */
1025 lock(p->rgrp);
1026 if(p->state == Rendezvous) {
1027 p->rendval = ~0;
1028 l = &REND(p->rgrp, p->rendtag);
1029 for(d = *l; d; d = d->rendhash) {
1030 if(d == p) {
1031 *l = p->rendhash;
1032 break;
1033 }
1034 l = &d->rendhash;
1035 }
1036 ready(p);
1037 }
1038 unlock(p->rgrp);
1039 return ret;
1040 }
1041
1042 /*
1043 * weird thing: keep at most NBROKEN around
1044 */
1045 #define NBROKEN 4
1046 struct
1047 {
1048 QLock;
1049 int n;
1050 Proc *p[NBROKEN];
1051 }broken;
1052
1053 void
addbroken(Proc * p)1054 addbroken(Proc *p)
1055 {
1056 qlock(&broken);
1057 if(broken.n == NBROKEN) {
1058 ready(broken.p[0]);
1059 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
1060 --broken.n;
1061 }
1062 broken.p[broken.n++] = p;
1063 qunlock(&broken);
1064
1065 edfstop(up);
1066 p->state = Broken;
1067 p->psstate = 0;
1068 sched();
1069 }
1070
1071 void
unbreak(Proc * p)1072 unbreak(Proc *p)
1073 {
1074 int b;
1075
1076 qlock(&broken);
1077 for(b=0; b < broken.n; b++)
1078 if(broken.p[b] == p) {
1079 broken.n--;
1080 memmove(&broken.p[b], &broken.p[b+1],
1081 sizeof(Proc*)*(NBROKEN-(b+1)));
1082 ready(p);
1083 break;
1084 }
1085 qunlock(&broken);
1086 }
1087
1088 int
freebroken(void)1089 freebroken(void)
1090 {
1091 int i, n;
1092
1093 qlock(&broken);
1094 n = broken.n;
1095 for(i=0; i<n; i++) {
1096 ready(broken.p[i]);
1097 broken.p[i] = 0;
1098 }
1099 broken.n = 0;
1100 qunlock(&broken);
1101 return n;
1102 }
1103
1104 void
pexit(char * exitstr,int freemem)1105 pexit(char *exitstr, int freemem)
1106 {
1107 Proc *p;
1108 Segment **s, **es;
1109 long utime, stime;
1110 Waitq *wq, *f, *next;
1111 Fgrp *fgrp;
1112 Egrp *egrp;
1113 Rgrp *rgrp;
1114 Pgrp *pgrp;
1115 Chan *dot;
1116 void (*pt)(Proc*, int, vlong);
1117
1118 if(up->syscalltrace)
1119 free(up->syscalltrace);
1120 up->alarm = 0;
1121 if (up->tt)
1122 timerdel(up);
1123 pt = proctrace;
1124 if(pt)
1125 pt(up, SDead, 0);
1126
1127 /* nil out all the resources under lock (free later) */
1128 qlock(&up->debug);
1129 fgrp = up->fgrp;
1130 up->fgrp = nil;
1131 egrp = up->egrp;
1132 up->egrp = nil;
1133 rgrp = up->rgrp;
1134 up->rgrp = nil;
1135 pgrp = up->pgrp;
1136 up->pgrp = nil;
1137 dot = up->dot;
1138 up->dot = nil;
1139 qunlock(&up->debug);
1140
1141 if(fgrp)
1142 closefgrp(fgrp);
1143 if(egrp)
1144 closeegrp(egrp);
1145 if(rgrp)
1146 closergrp(rgrp);
1147 if(dot)
1148 cclose(dot);
1149 if(pgrp)
1150 closepgrp(pgrp);
1151
1152 /*
1153 * if not a kernel process and have a parent,
1154 * do some housekeeping.
1155 */
1156 if(up->kp == 0 && up->parentpid != 0) {
1157 p = up->parent;
1158 if(p == 0) {
1159 if(exitstr == 0)
1160 exitstr = "unknown";
1161 panic("boot process died: %s", exitstr);
1162 }
1163
1164 while(waserror())
1165 ;
1166
1167 wq = smalloc(sizeof(Waitq));
1168 poperror();
1169
1170 wq->w.pid = up->pid;
1171 utime = up->time[TUser] + up->time[TCUser];
1172 stime = up->time[TSys] + up->time[TCSys];
1173 wq->w.time[TUser] = tk2ms(utime);
1174 wq->w.time[TSys] = tk2ms(stime);
1175 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]);
1176 if(exitstr && exitstr[0])
1177 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
1178 else
1179 wq->w.msg[0] = '\0';
1180
1181 lock(&p->exl);
1182 /*
1183 * Check that parent is still alive.
1184 */
1185 if(p->pid == up->parentpid && p->state != Broken) {
1186 p->nchild--;
1187 p->time[TCUser] += utime;
1188 p->time[TCSys] += stime;
1189 /*
1190 * If there would be more than 2000 wait records
1191 * processes for my parent, then don't leave a wait
1192 * record behind. This helps prevent badly written
1193 * daemon processes from accumulating lots of wait
1194 * records.
1195 */
1196 if(p->nwait < 2000) {
1197 wq->next = p->waitq;
1198 p->waitq = wq;
1199 p->nwait++;
1200 wq = nil;
1201 wakeup(&p->waitr);
1202 }
1203 }
1204 unlock(&p->exl);
1205 if(wq)
1206 free(wq);
1207 }
1208
1209 if(!freemem)
1210 addbroken(up);
1211
1212 qlock(&up->seglock);
1213 es = &up->seg[NSEG];
1214 for(s = up->seg; s < es; s++) {
1215 if(*s) {
1216 putseg(*s);
1217 *s = 0;
1218 }
1219 }
1220 qunlock(&up->seglock);
1221
1222 lock(&up->exl); /* Prevent my children from leaving waits */
1223 pidunhash(up);
1224 up->pid = 0;
1225 wakeup(&up->waitr);
1226 unlock(&up->exl);
1227
1228 for(f = up->waitq; f; f = next) {
1229 next = f->next;
1230 free(f);
1231 }
1232
1233 /* release debuggers */
1234 qlock(&up->debug);
1235 if(up->pdbg) {
1236 wakeup(&up->pdbg->sleep);
1237 up->pdbg = 0;
1238 }
1239 qunlock(&up->debug);
1240
1241 /* Sched must not loop for these locks */
1242 lock(&procalloc);
1243 lock(&palloc);
1244
1245 edfstop(up);
1246 up->state = Moribund;
1247 sched();
1248 panic("pexit");
1249 }
1250
1251 int
haswaitq(void * x)1252 haswaitq(void *x)
1253 {
1254 Proc *p;
1255
1256 p = (Proc *)x;
1257 return p->waitq != 0;
1258 }
1259
1260 ulong
pwait(Waitmsg * w)1261 pwait(Waitmsg *w)
1262 {
1263 ulong cpid;
1264 Waitq *wq;
1265
1266 if(!canqlock(&up->qwaitr))
1267 error(Einuse);
1268
1269 if(waserror()) {
1270 qunlock(&up->qwaitr);
1271 nexterror();
1272 }
1273
1274 lock(&up->exl);
1275 if(up->nchild == 0 && up->waitq == 0) {
1276 unlock(&up->exl);
1277 error(Enochild);
1278 }
1279 unlock(&up->exl);
1280
1281 sleep(&up->waitr, haswaitq, up);
1282
1283 lock(&up->exl);
1284 wq = up->waitq;
1285 up->waitq = wq->next;
1286 up->nwait--;
1287 unlock(&up->exl);
1288
1289 qunlock(&up->qwaitr);
1290 poperror();
1291
1292 if(w)
1293 memmove(w, &wq->w, sizeof(Waitmsg));
1294 cpid = wq->w.pid;
1295 free(wq);
1296 return cpid;
1297 }
1298
1299 Proc*
proctab(int i)1300 proctab(int i)
1301 {
1302 return &procalloc.arena[i];
1303 }
1304
1305 void
dumpaproc(Proc * p)1306 dumpaproc(Proc *p)
1307 {
1308 ulong bss;
1309 char *s;
1310
1311 if(p == 0)
1312 return;
1313
1314 bss = 0;
1315 if(p->seg[BSEG])
1316 bss = p->seg[BSEG]->top;
1317
1318 s = p->psstate;
1319 if(s == 0)
1320 s = statename[p->state];
1321 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n",
1322 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
1323 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
1324 }
1325
1326 void
procdump(void)1327 procdump(void)
1328 {
1329 int i;
1330 Proc *p;
1331
1332 if(up)
1333 print("up %lud\n", up->pid);
1334 else
1335 print("no current process\n");
1336 for(i=0; i<conf.nproc; i++) {
1337 p = &procalloc.arena[i];
1338 if(p->state == Dead)
1339 continue;
1340
1341 dumpaproc(p);
1342 }
1343 }
1344
1345 /*
1346 * wait till all processes have flushed their mmu
1347 * state about segment s
1348 */
1349 void
procflushseg(Segment * s)1350 procflushseg(Segment *s)
1351 {
1352 int i, ns, nm, nwait;
1353 Proc *p;
1354
1355 /*
1356 * tell all processes with this
1357 * segment to flush their mmu's
1358 */
1359 nwait = 0;
1360 for(i=0; i<conf.nproc; i++) {
1361 p = &procalloc.arena[i];
1362 if(p->state == Dead)
1363 continue;
1364 for(ns = 0; ns < NSEG; ns++)
1365 if(p->seg[ns] == s){
1366 p->newtlb = 1;
1367 for(nm = 0; nm < conf.nmach; nm++){
1368 if(MACHP(nm)->proc == p){
1369 MACHP(nm)->flushmmu = 1;
1370 nwait++;
1371 }
1372 }
1373 break;
1374 }
1375 }
1376
1377 if(nwait == 0)
1378 return;
1379
1380 /*
1381 * wait for all processors to take a clock interrupt
1382 * and flush their mmu's
1383 */
1384 again:
1385 for(nm = 0; nm < conf.nmach; nm++){
1386 if(nm != m->machno && MACHP(nm)->flushmmu){
1387 sched();
1388 goto again;
1389 }
1390 }
1391 }
1392
1393 void
scheddump(void)1394 scheddump(void)
1395 {
1396 Proc *p;
1397 Schedq *rq;
1398
1399 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
1400 if(rq->head == 0)
1401 continue;
1402 print("rq%ld:", rq-runq);
1403 for(p = rq->head; p; p = p->rnext)
1404 print(" %lud(%lud)", p->pid, m->ticks - p->readytime);
1405 print("\n");
1406 delay(150);
1407 }
1408 print("nrdy %d\n", nrdy);
1409 }
1410
1411 void
kproc(char * name,void (* func)(void *),void * arg)1412 kproc(char *name, void (*func)(void *), void *arg)
1413 {
1414 Proc *p;
1415 static Pgrp *kpgrp;
1416
1417 p = newproc();
1418 p->psstate = 0;
1419 p->procmode = 0640;
1420 p->kp = 1;
1421 p->noswap = 1;
1422
1423 p->fpsave = up->fpsave;
1424 p->scallnr = up->scallnr;
1425 p->s = up->s;
1426 p->nerrlab = 0;
1427 p->slash = up->slash;
1428 p->dot = up->dot;
1429 if(p->dot)
1430 incref(p->dot);
1431
1432 memmove(p->note, up->note, sizeof(p->note));
1433 p->nnote = up->nnote;
1434 p->notified = 0;
1435 p->lastnote = up->lastnote;
1436 p->notify = up->notify;
1437 p->ureg = 0;
1438 p->dbgreg = 0;
1439
1440 procpriority(p, PriKproc, 0);
1441
1442 kprocchild(p, func, arg);
1443
1444 kstrdup(&p->user, eve);
1445 kstrdup(&p->text, name);
1446 if(kpgrp == 0)
1447 kpgrp = newpgrp();
1448 p->pgrp = kpgrp;
1449 incref(kpgrp);
1450
1451 memset(p->time, 0, sizeof(p->time));
1452 p->time[TReal] = MACHP(0)->ticks;
1453 ready(p);
1454 }
1455
1456 /*
1457 * called splhi() by notify(). See comment in notify for the
1458 * reasoning.
1459 */
1460 void
procctl(Proc * p)1461 procctl(Proc *p)
1462 {
1463 char *state;
1464 ulong s;
1465
1466 switch(p->procctl) {
1467 case Proc_exitbig:
1468 spllo();
1469 pexit("Killed: Insufficient physical memory", 1);
1470
1471 case Proc_exitme:
1472 spllo(); /* pexit has locks in it */
1473 pexit("Killed", 1);
1474
1475 case Proc_traceme:
1476 if(p->nnote == 0)
1477 return;
1478 /* No break */
1479
1480 case Proc_stopme:
1481 p->procctl = 0;
1482 state = p->psstate;
1483 p->psstate = "Stopped";
1484 /* free a waiting debugger */
1485 s = spllo();
1486 qlock(&p->debug);
1487 if(p->pdbg) {
1488 wakeup(&p->pdbg->sleep);
1489 p->pdbg = 0;
1490 }
1491 qunlock(&p->debug);
1492 splhi();
1493 p->state = Stopped;
1494 sched();
1495 p->psstate = state;
1496 splx(s);
1497 return;
1498 }
1499 }
1500
1501 #include "errstr.h"
1502
1503 void
error(char * err)1504 error(char *err)
1505 {
1506 spllo();
1507
1508 assert(up->nerrlab < NERR);
1509 kstrcpy(up->errstr, err, ERRMAX);
1510 setlabel(&up->errlab[NERR-1]);
1511 nexterror();
1512 }
1513
1514 void
nexterror(void)1515 nexterror(void)
1516 {
1517 gotolabel(&up->errlab[--up->nerrlab]);
1518 }
1519
1520 void
exhausted(char * resource)1521 exhausted(char *resource)
1522 {
1523 char buf[ERRMAX];
1524
1525 snprint(buf, sizeof buf, "no free %s", resource);
1526 iprint("%s\n", buf);
1527 error(buf);
1528 }
1529
1530 void
killbig(char * why)1531 killbig(char *why)
1532 {
1533 int i;
1534 Segment *s;
1535 ulong l, max;
1536 Proc *p, *ep, *kp;
1537
1538 max = 0;
1539 kp = 0;
1540 ep = procalloc.arena+conf.nproc;
1541 for(p = procalloc.arena; p < ep; p++) {
1542 if(p->state == Dead || p->kp)
1543 continue;
1544 l = 0;
1545 for(i=1; i<NSEG; i++) {
1546 s = p->seg[i];
1547 if(s != 0)
1548 l += s->top - s->base;
1549 }
1550 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
1551 kp = p;
1552 max = l;
1553 }
1554 }
1555
1556 print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
1557 for(p = procalloc.arena; p < ep; p++) {
1558 if(p->state == Dead || p->kp)
1559 continue;
1560 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
1561 p->procctl = Proc_exitbig;
1562 }
1563 kp->procctl = Proc_exitbig;
1564 for(i = 0; i < NSEG; i++) {
1565 s = kp->seg[i];
1566 if(s != 0 && canqlock(&s->lk)) {
1567 mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
1568 qunlock(&s->lk);
1569 }
1570 }
1571 }
1572
1573 /*
1574 * change ownership to 'new' of all processes owned by 'old'. Used when
1575 * eve changes.
1576 */
1577 void
renameuser(char * old,char * new)1578 renameuser(char *old, char *new)
1579 {
1580 Proc *p, *ep;
1581
1582 ep = procalloc.arena+conf.nproc;
1583 for(p = procalloc.arena; p < ep; p++)
1584 if(p->user!=nil && strcmp(old, p->user)==0)
1585 kstrdup(&p->user, new);
1586 }
1587
1588 /*
1589 * time accounting called by clock() splhi'd
1590 */
1591 void
accounttime(void)1592 accounttime(void)
1593 {
1594 Proc *p;
1595 ulong n, per;
1596 static ulong nrun;
1597
1598 p = m->proc;
1599 if(p) {
1600 nrun++;
1601 p->time[p->insyscall]++;
1602 }
1603
1604 /* calculate decaying duty cycles */
1605 n = perfticks();
1606 per = n - m->perf.last;
1607 m->perf.last = n;
1608 per = (m->perf.period*(HZ-1) + per)/HZ;
1609 if(per != 0)
1610 m->perf.period = per;
1611
1612 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
1613 m->perf.inidle = 0;
1614
1615 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
1616 m->perf.inintr = 0;
1617
1618 /* only one processor gets to compute system load averages */
1619 if(m->machno != 0)
1620 return;
1621
1622 /*
1623 * calculate decaying load average.
1624 * if we decay by (n-1)/n then it takes
1625 * n clock ticks to go from load L to .36 L once
1626 * things quiet down. it takes about 5 n clock
1627 * ticks to go to zero. so using HZ means this is
1628 * approximately the load over the last second,
1629 * with a tail lasting about 5 seconds.
1630 */
1631 n = nrun;
1632 nrun = 0;
1633 n = (nrdy+n)*1000;
1634 m->load = (m->load*(HZ-1)+n)/HZ;
1635 }
1636
1637 static void
pidhash(Proc * p)1638 pidhash(Proc *p)
1639 {
1640 int h;
1641
1642 h = p->pid % nelem(procalloc.ht);
1643 lock(&procalloc);
1644 p->pidhash = procalloc.ht[h];
1645 procalloc.ht[h] = p;
1646 unlock(&procalloc);
1647 }
1648
1649 static void
pidunhash(Proc * p)1650 pidunhash(Proc *p)
1651 {
1652 int h;
1653 Proc **l;
1654
1655 h = p->pid % nelem(procalloc.ht);
1656 lock(&procalloc);
1657 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
1658 if(*l == p){
1659 *l = p->pidhash;
1660 break;
1661 }
1662 unlock(&procalloc);
1663 }
1664
1665 int
procindex(ulong pid)1666 procindex(ulong pid)
1667 {
1668 Proc *p;
1669 int h;
1670 int s;
1671
1672 s = -1;
1673 h = pid % nelem(procalloc.ht);
1674 lock(&procalloc);
1675 for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
1676 if(p->pid == pid){
1677 s = p - procalloc.arena;
1678 break;
1679 }
1680 unlock(&procalloc);
1681 return s;
1682 }
1683