1 #include "u.h"
2 #include "tos.h"
3 #include "../port/lib.h"
4 #include "mem.h"
5 #include "dat.h"
6 #include "fns.h"
7 #include "../port/error.h"
8 #include "../port/edf.h"
9
10 #include <a.out.h>
11
12 int shargs(char*, int, char**);
13
14 extern void checkpages(void);
15 extern void checkpagerefs(void);
16
17 long
sysr1(ulong *)18 sysr1(ulong*)
19 {
20 checkpagerefs();
21 return 0;
22 }
23
24 long
sysrfork(ulong * arg)25 sysrfork(ulong *arg)
26 {
27 Proc *p;
28 int n, i;
29 Fgrp *ofg;
30 Pgrp *opg;
31 Rgrp *org;
32 Egrp *oeg;
33 ulong pid, flag;
34 Mach *wm;
35
36 flag = arg[0];
37 /* Check flags before we commit */
38 if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
39 error(Ebadarg);
40 if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
41 error(Ebadarg);
42 if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
43 error(Ebadarg);
44
45 if((flag&RFPROC) == 0) {
46 if(flag & (RFMEM|RFNOWAIT))
47 error(Ebadarg);
48 if(flag & (RFFDG|RFCFDG)) {
49 ofg = up->fgrp;
50 if(flag & RFFDG)
51 up->fgrp = dupfgrp(ofg);
52 else
53 up->fgrp = dupfgrp(nil);
54 closefgrp(ofg);
55 }
56 if(flag & (RFNAMEG|RFCNAMEG)) {
57 opg = up->pgrp;
58 up->pgrp = newpgrp();
59 if(flag & RFNAMEG)
60 pgrpcpy(up->pgrp, opg);
61 /* inherit noattach */
62 up->pgrp->noattach = opg->noattach;
63 closepgrp(opg);
64 }
65 if(flag & RFNOMNT)
66 up->pgrp->noattach = 1;
67 if(flag & RFREND) {
68 org = up->rgrp;
69 up->rgrp = newrgrp();
70 closergrp(org);
71 }
72 if(flag & (RFENVG|RFCENVG)) {
73 oeg = up->egrp;
74 up->egrp = smalloc(sizeof(Egrp));
75 up->egrp->ref = 1;
76 if(flag & RFENVG)
77 envcpy(up->egrp, oeg);
78 closeegrp(oeg);
79 }
80 if(flag & RFNOTEG)
81 up->noteid = incref(¬eidalloc);
82 return 0;
83 }
84
85 p = newproc();
86
87 p->fpsave = up->fpsave;
88 p->scallnr = up->scallnr;
89 p->s = up->s;
90 p->nerrlab = 0;
91 p->slash = up->slash;
92 p->dot = up->dot;
93 incref(p->dot);
94
95 memmove(p->note, up->note, sizeof(p->note));
96 p->privatemem = up->privatemem;
97 p->noswap = up->noswap;
98 p->nnote = up->nnote;
99 p->notified = 0;
100 p->lastnote = up->lastnote;
101 p->notify = up->notify;
102 p->ureg = up->ureg;
103 p->dbgreg = 0;
104
105 /* Make a new set of memory segments */
106 n = flag & RFMEM;
107 qlock(&p->seglock);
108 if(waserror()){
109 qunlock(&p->seglock);
110 nexterror();
111 }
112 for(i = 0; i < NSEG; i++)
113 if(up->seg[i])
114 p->seg[i] = dupseg(up->seg, i, n);
115 qunlock(&p->seglock);
116 poperror();
117
118 /* File descriptors */
119 if(flag & (RFFDG|RFCFDG)) {
120 if(flag & RFFDG)
121 p->fgrp = dupfgrp(up->fgrp);
122 else
123 p->fgrp = dupfgrp(nil);
124 }
125 else {
126 p->fgrp = up->fgrp;
127 incref(p->fgrp);
128 }
129
130 /* Process groups */
131 if(flag & (RFNAMEG|RFCNAMEG)) {
132 p->pgrp = newpgrp();
133 if(flag & RFNAMEG)
134 pgrpcpy(p->pgrp, up->pgrp);
135 /* inherit noattach */
136 p->pgrp->noattach = up->pgrp->noattach;
137 }
138 else {
139 p->pgrp = up->pgrp;
140 incref(p->pgrp);
141 }
142 if(flag & RFNOMNT)
143 p->pgrp->noattach = 1;
144
145 if(flag & RFREND)
146 p->rgrp = newrgrp();
147 else {
148 incref(up->rgrp);
149 p->rgrp = up->rgrp;
150 }
151
152 /* Environment group */
153 if(flag & (RFENVG|RFCENVG)) {
154 p->egrp = smalloc(sizeof(Egrp));
155 p->egrp->ref = 1;
156 if(flag & RFENVG)
157 envcpy(p->egrp, up->egrp);
158 }
159 else {
160 p->egrp = up->egrp;
161 incref(p->egrp);
162 }
163 p->hang = up->hang;
164 p->procmode = up->procmode;
165
166 /* Craft a return frame which will cause the child to pop out of
167 * the scheduler in user mode with the return register zero
168 */
169 forkchild(p, up->dbgreg);
170
171 p->parent = up;
172 p->parentpid = up->pid;
173 if(flag&RFNOWAIT)
174 p->parentpid = 0;
175 else {
176 lock(&up->exl);
177 up->nchild++;
178 unlock(&up->exl);
179 }
180 if((flag&RFNOTEG) == 0)
181 p->noteid = up->noteid;
182
183 /* don't penalize the child, it hasn't done FP in a note handler. */
184 p->fpstate = up->fpstate & ~FPillegal;
185 pid = p->pid;
186 memset(p->time, 0, sizeof(p->time));
187 p->time[TReal] = MACHP(0)->ticks;
188
189 kstrdup(&p->text, up->text);
190 kstrdup(&p->user, up->user);
191 /*
192 * since the bss/data segments are now shareable,
193 * any mmu info about this process is now stale
194 * (i.e. has bad properties) and has to be discarded.
195 */
196 flushmmu();
197 p->basepri = up->basepri;
198 p->priority = up->basepri;
199 p->fixedpri = up->fixedpri;
200 p->mp = up->mp;
201 wm = up->wired;
202 if(wm)
203 procwired(p, wm->machno);
204 ready(p);
205 sched();
206 return pid;
207 }
208
209 ulong
l2be(long l)210 l2be(long l)
211 {
212 uchar *cp;
213
214 cp = (uchar*)&l;
215 return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
216 }
217
218 long
sysexec(ulong * arg)219 sysexec(ulong *arg)
220 {
221 Segment *s, *ts;
222 ulong t, d, b;
223 int i;
224 Chan *tc;
225 char **argv, **argp;
226 char *a, *charp, *args, *file, *file0;
227 char *progarg[sizeof(Exec)/2+1], *elem, progelem[64];
228 ulong ssize, spage, nargs, nbytes, n, bssend;
229 int indir;
230 Exec exec;
231 char line[sizeof(Exec)];
232 Fgrp *f;
233 Image *img;
234 ulong magic, text, entry, data, bss;
235 Tos *tos;
236
237 indir = 0;
238 elem = nil;
239 validaddr(arg[0], 1, 0);
240 file0 = validnamedup((char*)arg[0], 1);
241 if(waserror()){
242 free(file0);
243 free(elem);
244 nexterror();
245 }
246 file = file0;
247 for(;;){
248 tc = namec(file, Aopen, OEXEC, 0);
249 if(waserror()){
250 cclose(tc);
251 nexterror();
252 }
253 if(!indir)
254 kstrdup(&elem, up->genbuf);
255
256 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
257 if(n < 2)
258 error(Ebadexec);
259 magic = l2be(exec.magic);
260 text = l2be(exec.text);
261 entry = l2be(exec.entry);
262 if(n==sizeof(Exec) && (magic == AOUT_MAGIC)){
263 if(text >= USTKTOP-UTZERO
264 || entry < UTZERO+sizeof(Exec)
265 || entry >= UTZERO+sizeof(Exec)+text)
266 error(Ebadexec);
267 break; /* for binary */
268 }
269
270 /*
271 * Process #! /bin/sh args ...
272 */
273 memmove(line, &exec, sizeof(Exec));
274 if(indir || line[0]!='#' || line[1]!='!')
275 error(Ebadexec);
276 n = shargs(line, n, progarg);
277 if(n == 0)
278 error(Ebadexec);
279 indir = 1;
280 /*
281 * First arg becomes complete file name
282 */
283 progarg[n++] = file;
284 progarg[n] = 0;
285 validaddr(arg[1], BY2WD, 1);
286 arg[1] += BY2WD;
287 file = progarg[0];
288 if(strlen(elem) >= sizeof progelem)
289 error(Ebadexec);
290 strcpy(progelem, elem);
291 progarg[0] = progelem;
292 poperror();
293 cclose(tc);
294 }
295
296 data = l2be(exec.data);
297 bss = l2be(exec.bss);
298 t = UTROUND(UTZERO+sizeof(Exec)+text);
299 d = (t + data + (BY2PG-1)) & ~(BY2PG-1);
300 bssend = t + data + bss;
301 b = (bssend + (BY2PG-1)) & ~(BY2PG-1);
302 if(t >= KZERO || d >= KZERO || b >= KZERO)
303 error(Ebadexec);
304
305 /*
306 * Args: pass 1: count
307 */
308 nbytes = sizeof(Tos); /* hole for profiling clock at top of stack (and more) */
309 nargs = 0;
310 if(indir){
311 argp = progarg;
312 while(*argp){
313 a = *argp++;
314 nbytes += strlen(a) + 1;
315 nargs++;
316 }
317 }
318 validalign(arg[1], sizeof(char**));
319 argp = (char**)arg[1];
320 validaddr((ulong)argp, BY2WD, 0);
321 while(*argp){
322 a = *argp++;
323 if(((ulong)argp&(BY2PG-1)) < BY2WD)
324 validaddr((ulong)argp, BY2WD, 0);
325 validaddr((ulong)a, 1, 0);
326 nbytes += ((char*)vmemchr(a, 0, 0x7FFFFFFF) - a) + 1;
327 nargs++;
328 }
329 ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
330
331 /*
332 * 8-byte align SP for those (e.g. sparc) that need it.
333 * execregs() will subtract another 4 bytes for argc.
334 */
335 if((ssize+4) & 7)
336 ssize += 4;
337 spage = (ssize+(BY2PG-1)) >> PGSHIFT;
338
339 /*
340 * Build the stack segment, putting it in kernel virtual for the moment
341 */
342 if(spage > TSTKSIZ)
343 error(Enovmem);
344
345 qlock(&up->seglock);
346 if(waserror()){
347 qunlock(&up->seglock);
348 nexterror();
349 }
350 up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
351
352 /*
353 * Args: pass 2: assemble; the pages will be faulted in
354 */
355 tos = (Tos*)(TSTKTOP - sizeof(Tos));
356 tos->cyclefreq = m->cyclefreq;
357 cycles((uvlong*)&tos->pcycles);
358 tos->pcycles = -tos->pcycles;
359 tos->kcycles = tos->pcycles;
360 tos->clock = 0;
361 argv = (char**)(TSTKTOP - ssize);
362 charp = (char*)(TSTKTOP - nbytes);
363 args = charp;
364 if(indir)
365 argp = progarg;
366 else
367 argp = (char**)arg[1];
368
369 for(i=0; i<nargs; i++){
370 if(indir && *argp==0) {
371 indir = 0;
372 argp = (char**)arg[1];
373 }
374 *argv++ = charp + (USTKTOP-TSTKTOP);
375 n = strlen(*argp) + 1;
376 memmove(charp, *argp++, n);
377 charp += n;
378 }
379 free(file0);
380
381 free(up->text);
382 up->text = elem;
383 elem = nil; /* so waserror() won't free elem */
384 USED(elem);
385
386 /* copy args; easiest from new process's stack */
387 n = charp - args;
388 if(n > 128) /* don't waste too much space on huge arg lists */
389 n = 128;
390 a = up->args;
391 up->args = nil;
392 free(a);
393 up->args = smalloc(n);
394 memmove(up->args, args, n);
395 if(n>0 && up->args[n-1]!='\0'){
396 /* make sure last arg is NUL-terminated */
397 /* put NUL at UTF-8 character boundary */
398 for(i=n-1; i>0; --i)
399 if(fullrune(up->args+i, n-i))
400 break;
401 up->args[i] = 0;
402 n = i+1;
403 }
404 up->nargs = n;
405
406 /*
407 * Committed.
408 * Free old memory.
409 * Special segments are maintained across exec
410 */
411 for(i = SSEG; i <= BSEG; i++) {
412 putseg(up->seg[i]);
413 /* prevent a second free if we have an error */
414 up->seg[i] = 0;
415 }
416 for(i = BSEG+1; i < NSEG; i++) {
417 s = up->seg[i];
418 if(s != 0 && (s->type&SG_CEXEC)) {
419 putseg(s);
420 up->seg[i] = 0;
421 }
422 }
423
424 /*
425 * Close on exec
426 */
427 f = up->fgrp;
428 for(i=0; i<=f->maxfd; i++)
429 fdclose(i, CCEXEC);
430
431 /* Text. Shared. Attaches to cache image if possible */
432 /* attachimage returns a locked cache image */
433 img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (t-UTZERO)>>PGSHIFT);
434 ts = img->s;
435 up->seg[TSEG] = ts;
436 ts->flushme = 1;
437 ts->fstart = 0;
438 ts->flen = sizeof(Exec)+text;
439 unlock(img);
440
441 /* Data. Shared. */
442 s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
443 up->seg[DSEG] = s;
444
445 /* Attached by hand */
446 incref(img);
447 s->image = img;
448 s->fstart = ts->fstart+ts->flen;
449 s->flen = data;
450
451 /* BSS. Zero fill on demand */
452 up->seg[BSEG] = newseg(SG_BSS, d, (b-d)>>PGSHIFT);
453
454 /*
455 * Move the stack
456 */
457 s = up->seg[ESEG];
458 up->seg[ESEG] = 0;
459 up->seg[SSEG] = s;
460 qunlock(&up->seglock);
461 poperror(); /* seglock */
462 poperror(); /* elem */
463 s->base = USTKTOP-USTKSIZE;
464 s->top = USTKTOP;
465 relocateseg(s, USTKTOP-TSTKTOP);
466
467 /*
468 * '/' processes are higher priority (hack to make /ip more responsive).
469 */
470 if(devtab[tc->type]->dc == L'/')
471 up->basepri = PriRoot;
472 up->priority = up->basepri;
473 poperror();
474 cclose(tc);
475
476 /*
477 * At this point, the mmu contains info about the old address
478 * space and needs to be flushed
479 */
480 flushmmu();
481 qlock(&up->debug);
482 up->nnote = 0;
483 up->notify = 0;
484 up->notified = 0;
485 up->privatemem = 0;
486 procsetup(up);
487 qunlock(&up->debug);
488 if(up->hang)
489 up->procctl = Proc_stopme;
490
491 return execregs(entry, ssize, nargs);
492 }
493
494 int
shargs(char * s,int n,char ** ap)495 shargs(char *s, int n, char **ap)
496 {
497 int i;
498
499 s += 2;
500 n -= 2; /* skip #! */
501 for(i=0; s[i]!='\n'; i++)
502 if(i == n-1)
503 return 0;
504 s[i] = 0;
505 *ap = 0;
506 i = 0;
507 for(;;) {
508 while(*s==' ' || *s=='\t')
509 s++;
510 if(*s == 0)
511 break;
512 i++;
513 *ap++ = s;
514 *ap = 0;
515 while(*s && *s!=' ' && *s!='\t')
516 s++;
517 if(*s == 0)
518 break;
519 else
520 *s++ = 0;
521 }
522 return i;
523 }
524
525 int
return0(void *)526 return0(void*)
527 {
528 return 0;
529 }
530
531 long
syssleep(ulong * arg)532 syssleep(ulong *arg)
533 {
534
535 int n;
536
537 n = arg[0];
538 if(n <= 0) {
539 if (up->edf && (up->edf->flags & Admitted))
540 edfyield();
541 else
542 yield();
543 return 0;
544 }
545 if(n < TK2MS(1))
546 n = TK2MS(1);
547 tsleep(&up->sleep, return0, 0, n);
548 return 0;
549 }
550
551 long
sysalarm(ulong * arg)552 sysalarm(ulong *arg)
553 {
554 return procalarm(arg[0]);
555 }
556
557 long
sysexits(ulong * arg)558 sysexits(ulong *arg)
559 {
560 char *status;
561 char *inval = "invalid exit string";
562 char buf[ERRMAX];
563
564 status = (char*)arg[0];
565 if(status){
566 if(waserror())
567 status = inval;
568 else{
569 validaddr((ulong)status, 1, 0);
570 if(vmemchr(status, 0, ERRMAX) == 0){
571 memmove(buf, status, ERRMAX);
572 buf[ERRMAX-1] = 0;
573 status = buf;
574 }
575 poperror();
576 }
577
578 }
579 pexit(status, 1);
580 return 0; /* not reached */
581 }
582
583 long
sys_wait(ulong * arg)584 sys_wait(ulong *arg)
585 {
586 int pid;
587 Waitmsg w;
588 OWaitmsg *ow;
589
590 if(arg[0] == 0)
591 return pwait(nil);
592
593 validaddr(arg[0], sizeof(OWaitmsg), 1);
594 validalign(arg[0], BY2WD); /* who cares? */
595 pid = pwait(&w);
596 if(pid >= 0){
597 ow = (OWaitmsg*)arg[0];
598 readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
599 readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
600 readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
601 readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
602 strncpy(ow->msg, w.msg, sizeof(ow->msg));
603 ow->msg[sizeof(ow->msg)-1] = '\0';
604 }
605 return pid;
606 }
607
608 long
sysawait(ulong * arg)609 sysawait(ulong *arg)
610 {
611 int i;
612 int pid;
613 Waitmsg w;
614 ulong n;
615
616 n = arg[1];
617 validaddr(arg[0], n, 1);
618 pid = pwait(&w);
619 if(pid < 0)
620 return -1;
621 i = snprint((char*)arg[0], n, "%d %lud %lud %lud %q",
622 w.pid,
623 w.time[TUser], w.time[TSys], w.time[TReal],
624 w.msg);
625
626 return i;
627 }
628
629 void
werrstr(char * fmt,...)630 werrstr(char *fmt, ...)
631 {
632 va_list va;
633
634 if(up == nil)
635 return;
636
637 va_start(va, fmt);
638 vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
639 va_end(va);
640 }
641
642 static long
generrstr(char * buf,uint nbuf)643 generrstr(char *buf, uint nbuf)
644 {
645 char tmp[ERRMAX];
646
647 if(nbuf == 0)
648 error(Ebadarg);
649 validaddr((ulong)buf, nbuf, 1);
650 if(nbuf > sizeof tmp)
651 nbuf = sizeof tmp;
652 memmove(tmp, buf, nbuf);
653
654 /* make sure it's NUL-terminated */
655 tmp[nbuf-1] = '\0';
656 memmove(buf, up->syserrstr, nbuf);
657 buf[nbuf-1] = '\0';
658 memmove(up->syserrstr, tmp, nbuf);
659 return 0;
660 }
661
662 long
syserrstr(ulong * arg)663 syserrstr(ulong *arg)
664 {
665 return generrstr((char*)arg[0], arg[1]);
666 }
667
668 /* compatibility for old binaries */
669 long
sys_errstr(ulong * arg)670 sys_errstr(ulong *arg)
671 {
672 return generrstr((char*)arg[0], 64);
673 }
674
675 long
sysnotify(ulong * arg)676 sysnotify(ulong *arg)
677 {
678 if(arg[0] != 0)
679 validaddr(arg[0], sizeof(ulong), 0);
680 up->notify = (int(*)(void*, char*))(arg[0]);
681 return 0;
682 }
683
684 long
sysnoted(ulong * arg)685 sysnoted(ulong *arg)
686 {
687 if(arg[0]!=NRSTR && !up->notified)
688 error(Egreg);
689 return 0;
690 }
691
692 long
syssegbrk(ulong * arg)693 syssegbrk(ulong *arg)
694 {
695 int i;
696 ulong addr;
697 Segment *s;
698
699 addr = arg[0];
700 for(i = 0; i < NSEG; i++) {
701 s = up->seg[i];
702 if(s == 0 || addr < s->base || addr >= s->top)
703 continue;
704 switch(s->type&SG_TYPE) {
705 case SG_TEXT:
706 case SG_DATA:
707 case SG_STACK:
708 error(Ebadarg);
709 default:
710 return ibrk(arg[1], i);
711 }
712 }
713
714 error(Ebadarg);
715 return 0; /* not reached */
716 }
717
718 long
syssegattach(ulong * arg)719 syssegattach(ulong *arg)
720 {
721 return segattach(up, arg[0], (char*)arg[1], arg[2], arg[3]);
722 }
723
724 long
syssegdetach(ulong * arg)725 syssegdetach(ulong *arg)
726 {
727 int i;
728 ulong addr;
729 Segment *s;
730
731 qlock(&up->seglock);
732 if(waserror()){
733 qunlock(&up->seglock);
734 nexterror();
735 }
736
737 s = 0;
738 addr = arg[0];
739 for(i = 0; i < NSEG; i++)
740 if(s = up->seg[i]) {
741 qlock(&s->lk);
742 if((addr >= s->base && addr < s->top) ||
743 (s->top == s->base && addr == s->base))
744 goto found;
745 qunlock(&s->lk);
746 }
747
748 error(Ebadarg);
749
750 found:
751 /*
752 * Check we are not detaching the initial stack segment.
753 */
754 if(s == up->seg[SSEG]){
755 qunlock(&s->lk);
756 error(Ebadarg);
757 }
758 up->seg[i] = 0;
759 qunlock(&s->lk);
760 putseg(s);
761 qunlock(&up->seglock);
762 poperror();
763
764 /* Ensure we flush any entries from the lost segment */
765 flushmmu();
766 return 0;
767 }
768
769 long
syssegfree(ulong * arg)770 syssegfree(ulong *arg)
771 {
772 Segment *s;
773 ulong from, to;
774
775 from = arg[0];
776 s = seg(up, from, 1);
777 if(s == nil)
778 error(Ebadarg);
779 to = (from + arg[1]) & ~(BY2PG-1);
780 from = PGROUND(from);
781
782 if(to > s->top) {
783 qunlock(&s->lk);
784 error(Ebadarg);
785 }
786
787 mfreeseg(s, from, (to - from) / BY2PG);
788 qunlock(&s->lk);
789 flushmmu();
790
791 return 0;
792 }
793
794 /* For binary compatibility */
795 long
sysbrk_(ulong * arg)796 sysbrk_(ulong *arg)
797 {
798 return ibrk(arg[0], BSEG);
799 }
800
801 long
sysrendezvous(ulong * arg)802 sysrendezvous(ulong *arg)
803 {
804 uintptr tag, val;
805 Proc *p, **l;
806
807 tag = arg[0];
808 l = &REND(up->rgrp, tag);
809 up->rendval = ~(uintptr)0;
810
811 lock(up->rgrp);
812 for(p = *l; p; p = p->rendhash) {
813 if(p->rendtag == tag) {
814 *l = p->rendhash;
815 val = p->rendval;
816 p->rendval = arg[1];
817
818 while(p->mach != 0)
819 ;
820 ready(p);
821 unlock(up->rgrp);
822 return val;
823 }
824 l = &p->rendhash;
825 }
826
827 /* Going to sleep here */
828 up->rendtag = tag;
829 up->rendval = arg[1];
830 up->rendhash = *l;
831 *l = up;
832 up->state = Rendezvous;
833 unlock(up->rgrp);
834
835 sched();
836
837 return up->rendval;
838 }
839
840 /*
841 * The implementation of semaphores is complicated by needing
842 * to avoid rescheduling in syssemrelease, so that it is safe
843 * to call from real-time processes. This means syssemrelease
844 * cannot acquire any qlocks, only spin locks.
845 *
846 * Semacquire and semrelease must both manipulate the semaphore
847 * wait list. Lock-free linked lists only exist in theory, not
848 * in practice, so the wait list is protected by a spin lock.
849 *
850 * The semaphore value *addr is stored in user memory, so it
851 * cannot be read or written while holding spin locks.
852 *
853 * Thus, we can access the list only when holding the lock, and
854 * we can access the semaphore only when not holding the lock.
855 * This makes things interesting. Note that sleep's condition function
856 * is called while holding two locks - r and up->rlock - so it cannot
857 * access the semaphore value either.
858 *
859 * An acquirer announces its intention to try for the semaphore
860 * by putting a Sema structure onto the wait list and then
861 * setting Sema.waiting. After one last check of semaphore,
862 * the acquirer sleeps until Sema.waiting==0. A releaser of n
863 * must wake up n acquirers who have Sema.waiting set. It does
864 * this by clearing Sema.waiting and then calling wakeup.
865 *
866 * There are three interesting races here.
867
868 * The first is that in this particular sleep/wakeup usage, a single
869 * wakeup can rouse a process from two consecutive sleeps!
870 * The ordering is:
871 *
872 * (a) set Sema.waiting = 1
873 * (a) call sleep
874 * (b) set Sema.waiting = 0
875 * (a) check Sema.waiting inside sleep, return w/o sleeping
876 * (a) try for semaphore, fail
877 * (a) set Sema.waiting = 1
878 * (a) call sleep
879 * (b) call wakeup(a)
880 * (a) wake up again
881 *
882 * This is okay - semacquire will just go around the loop
883 * again. It does mean that at the top of the for(;;) loop in
884 * semacquire, phore.waiting might already be set to 1.
885 *
886 * The second is that a releaser might wake an acquirer who is
887 * interrupted before he can acquire the lock. Since
888 * release(n) issues only n wakeup calls -- only n can be used
889 * anyway -- if the interrupted process is not going to use his
890 * wakeup call he must pass it on to another acquirer.
891 *
892 * The third race is similar to the second but more subtle. An
893 * acquirer sets waiting=1 and then does a final canacquire()
894 * before going to sleep. The opposite order would result in
895 * missing wakeups that happen between canacquire and
896 * waiting=1. (In fact, the whole point of Sema.waiting is to
897 * avoid missing wakeups between canacquire() and sleep().) But
898 * there can be spurious wakeups between a successful
899 * canacquire() and the following semdequeue(). This wakeup is
900 * not useful to the acquirer, since he has already acquired
901 * the semaphore. Like in the previous case, though, the
902 * acquirer must pass the wakeup call along.
903 *
904 * This is all rather subtle. The code below has been verified
905 * with the spin model /sys/src/9/port/semaphore.p. The
906 * original code anticipated the second race but not the first
907 * or third, which were caught only with spin. The first race
908 * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
909 * It was lucky that my abstract model of sleep/wakeup still managed
910 * to preserve that behavior.
911 *
912 * I remain slightly concerned about memory coherence
913 * outside of locks. The spin model does not take
914 * queued processor writes into account so we have to
915 * think hard. The only variables accessed outside locks
916 * are the semaphore value itself and the boolean flag
917 * Sema.waiting. The value is only accessed with cmpswap,
918 * whose job description includes doing the right thing as
919 * far as memory coherence across processors. That leaves
920 * Sema.waiting. To handle it, we call coherence() before each
921 * read and after each write. - rsc
922 */
923
924 /* Add semaphore p with addr a to list in seg. */
925 static void
semqueue(Segment * s,long * a,Sema * p)926 semqueue(Segment *s, long *a, Sema *p)
927 {
928 memset(p, 0, sizeof *p);
929 p->addr = a;
930 lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
931 p->next = &s->sema;
932 p->prev = s->sema.prev;
933 p->next->prev = p;
934 p->prev->next = p;
935 unlock(&s->sema);
936 }
937
938 /* Remove semaphore p from list in seg. */
939 static void
semdequeue(Segment * s,Sema * p)940 semdequeue(Segment *s, Sema *p)
941 {
942 lock(&s->sema);
943 p->next->prev = p->prev;
944 p->prev->next = p->next;
945 unlock(&s->sema);
946 }
947
948 /* Wake up n waiters with addr a on list in seg. */
949 static void
semwakeup(Segment * s,long * a,long n)950 semwakeup(Segment *s, long *a, long n)
951 {
952 Sema *p;
953
954 lock(&s->sema);
955 for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
956 if(p->addr == a && p->waiting){
957 p->waiting = 0;
958 coherence();
959 wakeup(p);
960 n--;
961 }
962 }
963 unlock(&s->sema);
964 }
965
966 /* Add delta to semaphore and wake up waiters as appropriate. */
967 static long
semrelease(Segment * s,long * addr,long delta)968 semrelease(Segment *s, long *addr, long delta)
969 {
970 long value;
971
972 do
973 value = *addr;
974 while(!cmpswap(addr, value, value+delta));
975 semwakeup(s, addr, delta);
976 return value+delta;
977 }
978
979 /* Try to acquire semaphore using compare-and-swap */
980 static int
canacquire(long * addr)981 canacquire(long *addr)
982 {
983 long value;
984
985 while((value=*addr) > 0)
986 if(cmpswap(addr, value, value-1))
987 return 1;
988 return 0;
989 }
990
991 /* Should we wake up? */
992 static int
semawoke(void * p)993 semawoke(void *p)
994 {
995 coherence();
996 return !((Sema*)p)->waiting;
997 }
998
999 /* Acquire semaphore (subtract 1). */
1000 static int
semacquire(Segment * s,long * addr,int block)1001 semacquire(Segment *s, long *addr, int block)
1002 {
1003 int acquired;
1004 Sema phore;
1005
1006 if(canacquire(addr))
1007 return 1;
1008 if(!block)
1009 return 0;
1010
1011 acquired = 0;
1012 semqueue(s, addr, &phore);
1013 for(;;){
1014 phore.waiting = 1;
1015 coherence();
1016 if(canacquire(addr)){
1017 acquired = 1;
1018 break;
1019 }
1020 if(waserror())
1021 break;
1022 sleep(&phore, semawoke, &phore);
1023 poperror();
1024 }
1025 semdequeue(s, &phore);
1026 coherence(); /* not strictly necessary due to lock in semdequeue */
1027 if(!phore.waiting)
1028 semwakeup(s, addr, 1);
1029 if(!acquired)
1030 nexterror();
1031 return 1;
1032 }
1033
1034 /* Acquire semaphore or time-out */
1035 static int
tsemacquire(Segment * s,long * addr,ulong ms)1036 tsemacquire(Segment *s, long *addr, ulong ms)
1037 {
1038 int acquired, timedout;
1039 ulong t, elms;
1040 Sema phore;
1041
1042 if(canacquire(addr))
1043 return 1;
1044 if(ms == 0)
1045 return 0;
1046 acquired = timedout = 0;
1047 semqueue(s, addr, &phore);
1048 for(;;){
1049 phore.waiting = 1;
1050 coherence();
1051 if(canacquire(addr)){
1052 acquired = 1;
1053 break;
1054 }
1055 if(waserror())
1056 break;
1057 t = m->ticks;
1058 tsleep(&phore, semawoke, &phore, ms);
1059 elms = TK2MS(m->ticks - t);
1060 poperror();
1061 if(elms >= ms){
1062 timedout = 1;
1063 break;
1064 }
1065 ms -= elms;
1066 }
1067 semdequeue(s, &phore);
1068 coherence(); /* not strictly necessary due to lock in semdequeue */
1069 if(!phore.waiting)
1070 semwakeup(s, addr, 1);
1071 if(timedout)
1072 return 0;
1073 if(!acquired)
1074 nexterror();
1075 return 1;
1076 }
1077
1078 long
syssemacquire(ulong * arg)1079 syssemacquire(ulong *arg)
1080 {
1081 int block;
1082 long *addr;
1083 Segment *s;
1084
1085 validaddr(arg[0], sizeof(long), 1);
1086 validalign(arg[0], sizeof(long));
1087 addr = (long*)arg[0];
1088 block = arg[1];
1089
1090 if((s = seg(up, (ulong)addr, 0)) == nil)
1091 error(Ebadarg);
1092 if(*addr < 0)
1093 error(Ebadarg);
1094 return semacquire(s, addr, block);
1095 }
1096
1097 long
systsemacquire(ulong * arg)1098 systsemacquire(ulong *arg)
1099 {
1100 long *addr;
1101 ulong ms;
1102 Segment *s;
1103
1104 validaddr(arg[0], sizeof(long), 1);
1105 validalign(arg[0], sizeof(long));
1106 addr = (long*)arg[0];
1107 ms = arg[1];
1108
1109 if((s = seg(up, (ulong)addr, 0)) == nil)
1110 error(Ebadarg);
1111 if(*addr < 0)
1112 error(Ebadarg);
1113 return tsemacquire(s, addr, ms);
1114 }
1115
1116 long
syssemrelease(ulong * arg)1117 syssemrelease(ulong *arg)
1118 {
1119 long *addr, delta;
1120 Segment *s;
1121
1122 validaddr(arg[0], sizeof(long), 1);
1123 validalign(arg[0], sizeof(long));
1124 addr = (long*)arg[0];
1125 delta = arg[1];
1126
1127 if((s = seg(up, (ulong)addr, 0)) == nil)
1128 error(Ebadarg);
1129 /* delta == 0 is a no-op, not a release */
1130 if(delta < 0 || *addr < 0)
1131 error(Ebadarg);
1132 return semrelease(s, addr, delta);
1133 }
1134
1135 long
sysnsec(ulong * arg)1136 sysnsec(ulong *arg)
1137 {
1138 validaddr(arg[0], sizeof(vlong), 1);
1139 validalign(arg[0], sizeof(vlong));
1140
1141 *(vlong*)arg[0] = todget(nil);
1142
1143 return 0;
1144 }
1145