1 #include "u.h"
2 #include "tos.h"
3 #include "../port/lib.h"
4 #include "mem.h"
5 #include "dat.h"
6 #include "fns.h"
7 #include "../port/error.h"
8
9 #include "../port/edf.h"
10 #include <a.out.h>
11 #include <ptrace.h>
12
13 void
sysr1(Ar0 * ar0,va_list list)14 sysr1(Ar0* ar0, va_list list)
15 {
16 USED(list);
17
18 ar0->i = 0;
19 }
20
21 void
sysrfork(Ar0 * ar0,va_list list)22 sysrfork(Ar0* ar0, va_list list)
23 {
24 Proc *p;
25 int flag, i, n, pid;
26 Fgrp *ofg;
27 Pgrp *opg;
28 Rgrp *org;
29 Egrp *oeg;
30 Mach *wm;
31 void (*pt)(Proc*, int, vlong, vlong);
32 u64int ptarg;
33
34 /*
35 * int rfork(int);
36 */
37 flag = va_arg(list, int);
38
39 /* Check flags before we commit */
40 if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
41 error(Ebadarg);
42 if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
43 error(Ebadarg);
44 if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
45 error(Ebadarg);
46
47 if((flag&RFPROC) == 0) {
48 if(flag & (RFMEM|RFNOWAIT))
49 error(Ebadarg);
50 if(flag & (RFFDG|RFCFDG)) {
51 ofg = up->fgrp;
52 if(flag & RFFDG)
53 up->fgrp = dupfgrp(ofg);
54 else
55 up->fgrp = dupfgrp(nil);
56 closefgrp(ofg);
57 }
58 if(flag & (RFNAMEG|RFCNAMEG)) {
59 opg = up->pgrp;
60 up->pgrp = newpgrp();
61 if(flag & RFNAMEG)
62 pgrpcpy(up->pgrp, opg);
63 /* inherit noattach */
64 up->pgrp->noattach = opg->noattach;
65 closepgrp(opg);
66 }
67 if(flag & RFNOMNT)
68 up->pgrp->noattach = 1;
69 if(flag & RFREND) {
70 org = up->rgrp;
71 up->rgrp = newrgrp();
72 closergrp(org);
73 }
74 if(flag & (RFENVG|RFCENVG)) {
75 oeg = up->egrp;
76 up->egrp = smalloc(sizeof(Egrp));
77 up->egrp->ref = 1;
78 if(flag & RFENVG)
79 envcpy(up->egrp, oeg);
80 closeegrp(oeg);
81 }
82 if(flag & RFNOTEG)
83 up->noteid = incref(¬eidalloc);
84
85 ar0->i = 0;
86 return;
87 }
88
89 p = newproc();
90
91 p->trace = up->trace;
92 p->scallnr = up->scallnr;
93 memmove(p->arg, up->arg, sizeof(up->arg));
94 p->nerrlab = 0;
95 p->slash = up->slash;
96 p->dot = up->dot;
97 incref(p->dot);
98
99 memmove(p->note, up->note, sizeof(p->note));
100 p->privatemem = up->privatemem;
101 p->nnote = up->nnote;
102 p->notified = 0;
103 p->lastnote = up->lastnote;
104 p->notify = up->notify;
105 p->ureg = up->ureg;
106 p->dbgreg = 0;
107
108 /* Make a new set of memory segments */
109 n = flag & RFMEM;
110 qlock(&p->seglock);
111 if(waserror()){
112 qunlock(&p->seglock);
113 nexterror();
114 }
115 for(i = 0; i < NSEG; i++)
116 if(up->seg[i] != nil)
117 p->seg[i] = dupseg(up->seg, i, n);
118 qunlock(&p->seglock);
119 poperror();
120
121 /* File descriptors */
122 if(flag & (RFFDG|RFCFDG)) {
123 if(flag & RFFDG)
124 p->fgrp = dupfgrp(up->fgrp);
125 else
126 p->fgrp = dupfgrp(nil);
127 }
128 else {
129 p->fgrp = up->fgrp;
130 incref(p->fgrp);
131 }
132
133 /* Process groups */
134 if(flag & (RFNAMEG|RFCNAMEG)) {
135 p->pgrp = newpgrp();
136 if(flag & RFNAMEG)
137 pgrpcpy(p->pgrp, up->pgrp);
138 /* inherit noattach */
139 p->pgrp->noattach = up->pgrp->noattach;
140 }
141 else {
142 p->pgrp = up->pgrp;
143 incref(p->pgrp);
144 }
145 if(flag & RFNOMNT)
146 p->pgrp->noattach = 1;
147
148 if(flag & RFREND)
149 p->rgrp = newrgrp();
150 else {
151 incref(up->rgrp);
152 p->rgrp = up->rgrp;
153 }
154
155 /* Environment group */
156 if(flag & (RFENVG|RFCENVG)) {
157 p->egrp = smalloc(sizeof(Egrp));
158 p->egrp->ref = 1;
159 if(flag & RFENVG)
160 envcpy(p->egrp, up->egrp);
161 }
162 else {
163 p->egrp = up->egrp;
164 incref(p->egrp);
165 }
166 p->hang = up->hang;
167 p->procmode = up->procmode;
168
169 /* Craft a return frame which will cause the child to pop out of
170 * the scheduler in user mode with the return register zero
171 */
172 sysrforkchild(p, up);
173
174 p->parent = up;
175 p->parentpid = up->pid;
176 if(flag&RFNOWAIT)
177 p->parentpid = 0;
178 else {
179 lock(&up->exl);
180 up->nchild++;
181 unlock(&up->exl);
182 }
183 if((flag&RFNOTEG) == 0)
184 p->noteid = up->noteid;
185
186 pid = p->pid;
187 memset(p->time, 0, sizeof(p->time));
188 p->time[TReal] = sys->ticks;
189
190 kstrdup(&p->text, up->text);
191 kstrdup(&p->user, up->user);
192 /*
193 * since the bss/data segments are now shareable,
194 * any mmu info about this process is now stale
195 * (i.e. has bad properties) and has to be discarded.
196 */
197 mmuflush();
198 p->basepri = up->basepri;
199 p->priority = up->basepri;
200 p->fixedpri = up->fixedpri;
201 p->mp = up->mp;
202 wm = up->wired;
203 if(wm != nil)
204 procwired(p, wm->machno);
205 if(p->trace && (pt = proctrace) != nil){
206 strncpy((char*)&ptarg, p->text, sizeof ptarg);
207 pt(p, SName, 0, ptarg);
208 }
209 p->color = up->color;
210 ready(p);
211 sched();
212
213 ar0->i = pid;
214 }
215
216 static uvlong
vl2be(uvlong v)217 vl2be(uvlong v)
218 {
219 uchar *p;
220
221 p = (uchar*)&v;
222 return ((uvlong)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32)
223 |((uvlong)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]);
224 }
225
226 ulong
l2be(long l)227 l2be(long l)
228 {
229 uchar *cp;
230
231 cp = (uchar*)&l;
232 return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
233 }
234
235 typedef struct {
236 Exec;
237 uvlong hdr[1];
238 } Hdr;
239
240 void
sysexec(Ar0 * ar0,va_list list)241 sysexec(Ar0* ar0, va_list list)
242 {
243 Hdr hdr;
244 Fgrp *f;
245 Tos *tos;
246 Chan *chan;
247 Image *img;
248 Segment *s;
249 int argc, i, n, nargs;
250 char *a, *args, **argv, elem[sizeof(up->genbuf)], *file, *p;
251 char line[sizeof(Exec)], *progarg[sizeof(Exec)/2+1];
252 long hdrsz, magic, textsz, datasz, bsssz;
253 uintptr textlim, textmin, datalim, bsslim, entry, stack;
254 void (*pt)(Proc*, int, vlong, vlong);
255 u64int ptarg;
256
257 /*
258 * void* exec(char* name, char* argv[]);
259 */
260
261 /*
262 * Remember the full name of the file,
263 * open it, and remember the final element of the
264 * name left in up->genbuf by namec.
265 */
266 p = va_arg(list, char*);
267 p = validaddr(p, 1, 0);
268 file = validnamedup(p, 1);
269 if(waserror()){
270 free(file);
271 nexterror();
272 }
273 chan = namec(file, Aopen, OEXEC, 0);
274 if(waserror()){
275 cclose(chan);
276 nexterror();
277 }
278 strncpy(elem, up->genbuf, sizeof(elem));
279
280 /*
281 * Read the header.
282 * If it's a #!, fill in progarg[] with info then read a new header
283 * from the file indicated by the #!.
284 * The #! line must be less than sizeof(Exec) in size,
285 * including the terminating \n.
286 */
287 hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0);
288 if(hdrsz < 2)
289 error(Ebadexec);
290 p = (char*)&hdr;
291 argc = 0;
292 if(p[0] == '#' && p[1] == '!'){
293 p = memccpy(line, (char*)&hdr, '\n', MIN(sizeof(Exec), hdrsz));
294 if(p == nil)
295 error(Ebadexec);
296 *(p-1) = '\0';
297 argc = tokenize(line+2, progarg, nelem(progarg));
298 if(argc == 0)
299 error(Ebadexec);
300
301 /* The original file becomes an extra arg after #! line */
302 progarg[argc++] = file;
303
304 /*
305 * Take the #! $0 as a file to open, and replace
306 * $0 with the original path's name.
307 */
308 p = progarg[0];
309 progarg[0] = elem;
310 poperror(); /* chan */
311 cclose(chan);
312
313 chan = namec(p, Aopen, OEXEC, 0);
314 if(waserror()){
315 cclose(chan);
316 nexterror();
317 }
318 hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0);
319 if(hdrsz < 2)
320 error(Ebadexec);
321 }
322
323 /*
324 * #! has had its chance, now we need a real binary.
325 */
326 magic = l2be(hdr.magic);
327 if(hdrsz != sizeof(Hdr) || magic != AOUT_MAGIC)
328 error(Ebadexec);
329 if(magic & HDR_MAGIC){
330 entry = vl2be(hdr.hdr[0]);
331 hdrsz = sizeof(Hdr);
332 }
333 else{
334 entry = l2be(hdr.entry);
335 hdrsz = sizeof(Exec);
336 }
337
338 textsz = l2be(hdr.text);
339 datasz = l2be(hdr.data);
340 bsssz = l2be(hdr.bss);
341
342 textmin = ROUNDUP(UTZERO+hdrsz+textsz, PGSZ);
343 textlim = UTROUND(textmin);
344 datalim = ROUNDUP(textlim+datasz, PGSZ);
345 bsslim = ROUNDUP(textlim+datasz+bsssz, PGSZ);
346
347 /*
348 * Check the binary header for consistency,
349 * e.g. the entry point is within the text segment and
350 * the segments don't overlap each other.
351 */
352 if(entry < UTZERO+hdrsz || entry >= UTZERO+hdrsz+textsz)
353 error(Ebadexec);
354
355 if(textsz >= textlim || datasz > datalim || bsssz > bsslim
356 || textlim >= USTKTOP || datalim >= USTKTOP || bsslim >= USTKTOP
357 || datalim < textlim || bsslim < datalim)
358 error(Ebadexec);
359
360 up->color = corecolor(m->machno);
361
362 /*
363 * The new stack is created in ESEG, temporarily mapped elsewhere.
364 * The stack contains, in descending address order:
365 * a structure containing housekeeping and profiling data (Tos);
366 * argument strings;
367 * array of vectors to the argument strings with a terminating
368 * nil (argv).
369 * When the exec is committed, this temporary stack in ESEG will
370 * become SSEG.
371 * The architecture-dependent code which jumps to the new image
372 * will also push a count of the argument array onto the stack (argc).
373 */
374 qlock(&up->seglock);
375 if(waserror()){
376 if(up->seg[ESEG] != nil){
377 putseg(up->seg[ESEG]);
378 up->seg[ESEG] = nil;
379 }
380 qunlock(&up->seglock);
381 nexterror();
382 }
383 up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, TSTKTOP);
384 up->seg[ESEG]->color = up->color;
385
386 /*
387 * Stack is a pointer into the temporary stack
388 * segment, and will move as items are pushed.
389 */
390 stack = TSTKTOP-sizeof(Tos);
391
392 /*
393 * First, the top-of-stack structure.
394 */
395 tos = (Tos*)stack;
396 tos->cyclefreq = m->cyclefreq;
397 cycles((uvlong*)&tos->pcycles);
398 tos->pcycles = -tos->pcycles;
399 tos->kcycles = tos->pcycles;
400 tos->clock = 0;
401
402 /*
403 * As the pass is made over the arguments and they are pushed onto
404 * the temporary stack, make a good faith copy in args for up->args.
405 */
406 args = smalloc(128);
407 if(waserror()){
408 free(args);
409 nexterror();
410 }
411 nargs = 0;
412
413 /*
414 * Next push any arguments found from a #! header.
415 */
416 for(i = 0; i < argc; i++){
417 n = strlen(progarg[i])+1;
418 stack -= n;
419 memmove(UINT2PTR(stack), progarg[i], n);
420
421 if((n = MIN(n, 128-nargs)) <= 0)
422 continue;
423 memmove(&args[nargs], progarg[i], n);
424 nargs += n;
425 }
426
427 /*
428 * Copy the strings pointed to by the syscall argument argv into
429 * the temporary stack segment, being careful to check both argv and
430 * the strings it points to are valid.
431 */
432 argv = va_arg(list, char**);
433 evenaddr(PTR2UINT(argv));
434 for(i = 0;; i++, argv++){
435 a = *(char**)validaddr(argv, sizeof(char**), 0);
436 if(a == nil)
437 break;
438 a = validaddr(a, 1, 0);
439 n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1;
440
441 /*
442 * This futzing is so argv[0] gets validated even
443 * though it will be thrown away if this is a shell
444 * script.
445 */
446 if(argc > 0 && i == 0)
447 continue;
448
449 /*
450 * Before copying the string into the temporary stack,
451 * which might involve a demand-page, check the string
452 * will not overflow the bottom of the stack.
453 */
454 stack -= n;
455 if(stack < TSTKTOP-USTKSIZE)
456 error(Enovmem);
457 p = UINT2PTR(stack);
458 memmove(p, a, n);
459 p[n-1] = 0;
460 argc++;
461
462 if((n = MIN(n, 128-nargs)) <= 0)
463 continue;
464 memmove(&args[nargs], p, n);
465 nargs += n;
466 }
467 if(argc < 1)
468 error(Ebadexec);
469
470 /*
471 * Before pushing the argument pointers onto the temporary stack,
472 * which might involve a demand-page, check there is room for the
473 * terminating nil pointer, plus pointers, plus some slop for however
474 * argc might be passed on the stack by sysexecregs (give a page
475 * of slop, it is an overestimate, but why not).
476 * Sysexecstack does any architecture-dependent stack alignment.
477 * Keep a copy of the start of the argument strings before alignment
478 * so up->args can be created later.
479 * Although the argument vectors are being pushed onto the stack in
480 * the temporary segment, the values must be adjusted to reflect
481 * the segment address after it replaces the current SSEG.
482 */
483 p = UINT2PTR(stack);
484 stack = sysexecstack(stack, argc);
485 if(stack-(argc+1)*sizeof(char**)-segpgsize(up->seg[ESEG]) < TSTKTOP-USTKSIZE)
486 error(Ebadexec);
487
488 argv = (char**)stack;
489 *--argv = nil;
490 for(i = 0; i < argc; i++){
491 *--argv = p + (USTKTOP-TSTKTOP);
492 p += strlen(p) + 1;
493 }
494
495 /*
496 * Fix up the up->args copy in args. The length must be > 0 as it
497 * includes the \0 on the last argument and argc was checked earlier
498 * to be > 0. Compensate for any UTF character boundary before
499 * placing the terminating \0.
500 */
501 if(nargs <= 0)
502 error(Egreg);
503
504 while(nargs > 0 && (args[nargs-1] & 0xc0) == 0x80)
505 nargs--;
506 args[nargs-1] = '\0';
507
508 /*
509 * All the argument processing is now done, ready to commit.
510 */
511 kstrdup(&up->text, elem);
512 free(up->args);
513 up->args = args;
514 up->nargs = nargs;
515 poperror(); /* args */
516
517 /*
518 * Close on exec
519 */
520 f = up->fgrp;
521 for(i=0; i<=f->maxfd; i++)
522 fdclose(i, CCEXEC);
523
524 /*
525 * Free old memory.
526 * Special segments maintained across exec.
527 */
528 for(i = SSEG; i <= BSEG; i++) {
529 putseg(up->seg[i]);
530 up->seg[i] = nil; /* in case of error */
531 }
532 for(i = BSEG+1; i< NSEG; i++) {
533 s = up->seg[i];
534 if(s && (s->type&SG_CEXEC)) {
535 putseg(s);
536 up->seg[i] = nil;
537 }
538 }
539
540 if(up->trace && (pt = proctrace) != nil){
541 strncpy((char*)&ptarg, elem, sizeof ptarg);
542 pt(up, SName, 0, ptarg);
543 }
544
545 /* Text. Shared. Attaches to cache image if possible */
546 /* attachimage returns a locked cache image */
547
548 img = attachimage(SG_TEXT|SG_RONLY, chan, up->color, UTZERO, textmin);
549 s = img->s;
550 up->seg[TSEG] = s;
551 s->flushme = 1;
552 s->fstart = 0;
553 s->flen = hdrsz+textsz;
554 if(img->color != up->color){
555 up->color = img->color;
556 }
557 unlock(img);
558
559 /* Data. Shared. */
560 s = newseg(SG_DATA, textlim, datalim);
561 up->seg[DSEG] = s;
562 s->color = up->color;
563
564 /* Attached by hand */
565 incref(img);
566 s->image = img;
567 s->fstart = hdrsz+textsz;
568 s->flen = datasz;
569
570 /* BSS. Zero fill on demand */
571 up->seg[BSEG] = newseg(SG_BSS, datalim, bsslim);
572 up->seg[BSEG]->color= up->color;
573
574 /*
575 * Move the stack
576 */
577 s = up->seg[ESEG];
578 up->seg[ESEG] = nil;
579 up->seg[SSEG] = s;
580 qunlock(&up->seglock);
581 poperror(); /* seglock */
582
583 s->base = USTKTOP-USTKSIZE;
584 s->top = USTKTOP;
585 relocateseg(s, USTKTOP-TSTKTOP);
586
587 /*
588 * '/' processes are higher priority.
589 */
590 if(chan->dev->dc == L'/')
591 up->basepri = PriRoot;
592 up->priority = up->basepri;
593 poperror(); /* chan */
594 cclose(chan);
595 poperror(); /* file */
596 free(file);
597
598 /*
599 * At this point, the mmu contains info about the old address
600 * space and needs to be flushed
601 */
602 mmuflush();
603 qlock(&up->debug);
604 up->nnote = 0;
605 up->notify = 0;
606 up->notified = 0;
607 up->privatemem = 0;
608 sysprocsetup(up);
609 qunlock(&up->debug);
610 if(up->hang)
611 up->procctl = Proc_stopme;
612
613 ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), argc);
614 }
615
616 int
return0(void *)617 return0(void*)
618 {
619 return 0;
620 }
621
622 void
syssleep(Ar0 * ar0,va_list list)623 syssleep(Ar0* ar0, va_list list)
624 {
625 long ms;
626
627 /*
628 * int sleep(long millisecs);
629 */
630 ms = va_arg(list, long);
631
632 ar0->i = 0;
633 if(ms <= 0) {
634 if (up->edf && (up->edf->flags & Admitted))
635 edfyield();
636 else
637 yield();
638 return;
639 }
640 if(ms < TK2MS(1))
641 ms = TK2MS(1);
642 tsleep(&up->sleep, return0, 0, ms);
643 }
644
645 void
sysalarm(Ar0 * ar0,va_list list)646 sysalarm(Ar0* ar0, va_list list)
647 {
648 unsigned long ms;
649
650 /*
651 * long alarm(unsigned long millisecs);
652 * Odd argument type...
653 */
654 ms = va_arg(list, unsigned long);
655
656 ar0->l = procalarm(ms);
657 }
658
659 void
sysexits(Ar0 *,va_list list)660 sysexits(Ar0*, va_list list)
661 {
662 char *status;
663 char *inval = "invalid exit string";
664 char buf[ERRMAX];
665
666 /*
667 * void exits(char *msg);
668 */
669 status = va_arg(list, char*);
670
671 if(status){
672 if(waserror())
673 status = inval;
674 else{
675 status = validaddr(status, 1, 0);
676 if(vmemchr(status, 0, ERRMAX) == 0){
677 memmove(buf, status, ERRMAX);
678 buf[ERRMAX-1] = 0;
679 status = buf;
680 }
681 poperror();
682 }
683
684 }
685 pexit(status, 1);
686 }
687
688 void
sys_wait(Ar0 * ar0,va_list list)689 sys_wait(Ar0* ar0, va_list list)
690 {
691 int pid;
692 Waitmsg w;
693 OWaitmsg *ow;
694
695 /*
696 * int wait(Waitmsg* w);
697 *
698 * Deprecated; backwards compatibility only.
699 */
700 ow = va_arg(list, OWaitmsg*);
701 if(ow == nil){
702 ar0->i = pwait(nil);
703 return;
704 }
705
706 ow = validaddr(ow, sizeof(OWaitmsg), 1);
707 evenaddr(PTR2UINT(ow));
708 pid = pwait(&w);
709 if(pid >= 0){
710 readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
711 readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
712 readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
713 readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
714 strncpy(ow->msg, w.msg, sizeof(ow->msg));
715 ow->msg[sizeof(ow->msg)-1] = '\0';
716 }
717
718 ar0->i = pid;
719 }
720
721 void
sysawait(Ar0 * ar0,va_list list)722 sysawait(Ar0* ar0, va_list list)
723 {
724 int i;
725 int pid;
726 Waitmsg w;
727 usize n;
728 char *p;
729
730 /*
731 * int await(char* s, int n);
732 * should really be
733 * usize await(char* s, usize n);
734 */
735 p = va_arg(list, char*);
736 n = va_arg(list, long);
737 p = validaddr(p, n, 1);
738
739 pid = pwait(&w);
740 if(pid < 0){
741 ar0->i = -1;
742 return;
743 }
744 i = snprint(p, n, "%d %lud %lud %lud %q",
745 w.pid,
746 w.time[TUser], w.time[TSys], w.time[TReal],
747 w.msg);
748
749 ar0->i = i;
750 }
751
752 void
werrstr(char * fmt,...)753 werrstr(char *fmt, ...)
754 {
755 va_list va;
756
757 if(up == nil)
758 return;
759
760 va_start(va, fmt);
761 vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
762 va_end(va);
763 }
764
765 static void
generrstr(char * buf,long n)766 generrstr(char *buf, long n)
767 {
768 char *p, tmp[ERRMAX];
769
770 if(n <= 0)
771 error(Ebadarg);
772 p = validaddr(buf, n, 1);
773 if(n > sizeof tmp)
774 n = sizeof tmp;
775 memmove(tmp, p, n);
776
777 /* make sure it's NUL-terminated */
778 tmp[n-1] = '\0';
779 memmove(p, up->syserrstr, n);
780 p[n-1] = '\0';
781 memmove(up->syserrstr, tmp, n);
782 }
783
784 void
syserrstr(Ar0 * ar0,va_list list)785 syserrstr(Ar0* ar0, va_list list)
786 {
787 char *err;
788 usize nerr;
789
790 /*
791 * int errstr(char* err, uint nerr);
792 * should really be
793 * usize errstr(char* err, usize nerr);
794 * but errstr always returns 0.
795 */
796 err = va_arg(list, char*);
797 nerr = va_arg(list, usize);
798 generrstr(err, nerr);
799
800 ar0->i = 0;
801 }
802
803 void
sys_errstr(Ar0 * ar0,va_list list)804 sys_errstr(Ar0* ar0, va_list list)
805 {
806 char *p;
807
808 /*
809 * int errstr(char* err);
810 *
811 * Deprecated; backwards compatibility only.
812 */
813 p = va_arg(list, char*);
814 generrstr(p, 64);
815
816 ar0->i = 0;
817 }
818
819 void
sysnotify(Ar0 * ar0,va_list list)820 sysnotify(Ar0* ar0, va_list list)
821 {
822 void (*f)(void*, char*);
823
824 /*
825 * int notify(void (*f)(void*, char*));
826 */
827 f = (void (*)(void*, char*))va_arg(list, void*);
828
829 if(f != nil)
830 validaddr(f, sizeof(void (*)(void*, char*)), 0);
831 up->notify = f;
832
833 ar0->i = 0;
834 }
835
836 void
sysnoted(Ar0 * ar0,va_list list)837 sysnoted(Ar0* ar0, va_list list)
838 {
839 int v;
840
841 /*
842 * int noted(int v);
843 */
844 v = va_arg(list, int);
845
846 if(v != NRSTR && !up->notified)
847 error(Egreg);
848
849 ar0->i = 0;
850 }
851
852 void
sysrendezvous(Ar0 * ar0,va_list list)853 sysrendezvous(Ar0* ar0, va_list list)
854 {
855 Proc *p, **l;
856 uintptr tag, val, pc;
857 void (*pt)(Proc*, int, vlong, vlong);
858
859 /*
860 * void* rendezvous(void*, void*);
861 */
862 tag = PTR2UINT(va_arg(list, void*));
863
864 l = &REND(up->rgrp, tag);
865 up->rendval = ~0;
866
867 lock(up->rgrp);
868 for(p = *l; p; p = p->rendhash) {
869 if(p->rendtag == tag) {
870 *l = p->rendhash;
871 val = p->rendval;
872 p->rendval = PTR2UINT(va_arg(list, void*));
873
874 while(p->mach != 0)
875 ;
876 ready(p);
877 unlock(up->rgrp);
878
879 ar0->v = UINT2PTR(val);
880 return;
881 }
882 l = &p->rendhash;
883 }
884
885 /* Going to sleep here */
886 up->rendtag = tag;
887 up->rendval = PTR2UINT(va_arg(list, void*));
888 up->rendhash = *l;
889 *l = up;
890 up->state = Rendezvous;
891 if(up->trace && (pt = proctrace) != nil){
892 pc = (uintptr)sysrendezvous;
893 pt(up, SSleep, 0, Rendezvous|(pc<<8));
894 }
895 unlock(up->rgrp);
896
897 sched();
898
899 ar0->v = UINT2PTR(up->rendval);
900 }
901
902 /*
903 * The implementation of semaphores is complicated by needing
904 * to avoid rescheduling in syssemrelease, so that it is safe
905 * to call from real-time processes. This means syssemrelease
906 * cannot acquire any qlocks, only spin locks.
907 *
908 * Semacquire and semrelease must both manipulate the semaphore
909 * wait list. Lock-free linked lists only exist in theory, not
910 * in practice, so the wait list is protected by a spin lock.
911 *
912 * The semaphore value *addr is stored in user memory, so it
913 * cannot be read or written while holding spin locks.
914 *
915 * Thus, we can access the list only when holding the lock, and
916 * we can access the semaphore only when not holding the lock.
917 * This makes things interesting. Note that sleep's condition function
918 * is called while holding two locks - r and up->rlock - so it cannot
919 * access the semaphore value either.
920 *
921 * An acquirer announces its intention to try for the semaphore
922 * by putting a Sema structure onto the wait list and then
923 * setting Sema.waiting. After one last check of semaphore,
924 * the acquirer sleeps until Sema.waiting==0. A releaser of n
925 * must wake up n acquirers who have Sema.waiting set. It does
926 * this by clearing Sema.waiting and then calling wakeup.
927 *
928 * There are three interesting races here.
929
930 * The first is that in this particular sleep/wakeup usage, a single
931 * wakeup can rouse a process from two consecutive sleeps!
932 * The ordering is:
933 *
934 * (a) set Sema.waiting = 1
935 * (a) call sleep
936 * (b) set Sema.waiting = 0
937 * (a) check Sema.waiting inside sleep, return w/o sleeping
938 * (a) try for semaphore, fail
939 * (a) set Sema.waiting = 1
940 * (a) call sleep
941 * (b) call wakeup(a)
942 * (a) wake up again
943 *
944 * This is okay - semacquire will just go around the loop
945 * again. It does mean that at the top of the for(;;) loop in
946 * semacquire, phore.waiting might already be set to 1.
947 *
948 * The second is that a releaser might wake an acquirer who is
949 * interrupted before he can acquire the lock. Since
950 * release(n) issues only n wakeup calls -- only n can be used
951 * anyway -- if the interrupted process is not going to use his
952 * wakeup call he must pass it on to another acquirer.
953 *
954 * The third race is similar to the second but more subtle. An
955 * acquirer sets waiting=1 and then does a final canacquire()
956 * before going to sleep. The opposite order would result in
957 * missing wakeups that happen between canacquire and
958 * waiting=1. (In fact, the whole point of Sema.waiting is to
959 * avoid missing wakeups between canacquire() and sleep().) But
960 * there can be spurious wakeups between a successful
961 * canacquire() and the following semdequeue(). This wakeup is
962 * not useful to the acquirer, since he has already acquired
963 * the semaphore. Like in the previous case, though, the
964 * acquirer must pass the wakeup call along.
965 *
966 * This is all rather subtle. The code below has been verified
967 * with the spin model /sys/src/9/port/semaphore.p. The
968 * original code anticipated the second race but not the first
969 * or third, which were caught only with spin. The first race
970 * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
971 * It was lucky that my abstract model of sleep/wakeup still managed
972 * to preserve that behavior.
973 *
974 * I remain slightly concerned about memory coherence
975 * outside of locks. The spin model does not take
976 * queued processor writes into account so we have to
977 * think hard. The only variables accessed outside locks
978 * are the semaphore value itself and the boolean flag
979 * Sema.waiting. The value is only accessed with CAS,
980 * whose job description includes doing the right thing as
981 * far as memory coherence across processors. That leaves
982 * Sema.waiting. To handle it, we call coherence() before each
983 * read and after each write. - rsc
984 */
985
986 /* Add semaphore p with addr a to list in seg. */
987 static void
semqueue(Segment * s,int * addr,Sema * p)988 semqueue(Segment* s, int* addr, Sema* p)
989 {
990 memset(p, 0, sizeof *p);
991 p->addr = addr;
992
993 lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */
994 p->next = &s->sema;
995 p->prev = s->sema.prev;
996 p->next->prev = p;
997 p->prev->next = p;
998 unlock(&s->sema);
999 }
1000
1001 /* Remove semaphore p from list in seg. */
1002 static void
semdequeue(Segment * s,Sema * p)1003 semdequeue(Segment* s, Sema* p)
1004 {
1005 lock(&s->sema);
1006 p->next->prev = p->prev;
1007 p->prev->next = p->next;
1008 unlock(&s->sema);
1009 }
1010
1011 /* Wake up n waiters with addr on list in seg. */
1012 static void
semwakeup(Segment * s,int * addr,int n)1013 semwakeup(Segment* s, int* addr, int n)
1014 {
1015 Sema *p;
1016
1017 lock(&s->sema);
1018 for(p = s->sema.next; p != &s->sema && n > 0; p = p->next){
1019 if(p->addr == addr && p->waiting){
1020 p->waiting = 0;
1021 coherence();
1022 wakeup(p);
1023 n--;
1024 }
1025 }
1026 unlock(&s->sema);
1027 }
1028
1029 /* Add delta to semaphore and wake up waiters as appropriate. */
1030 static int
semrelease(Segment * s,int * addr,int delta)1031 semrelease(Segment* s, int* addr, int delta)
1032 {
1033 int value;
1034
1035 do
1036 value = *addr;
1037 while(!CASW(addr, value, value+delta));
1038 semwakeup(s, addr, delta);
1039
1040 return value+delta;
1041 }
1042
1043 /* Try to acquire semaphore using compare-and-swap */
1044 static int
canacquire(int * addr)1045 canacquire(int* addr)
1046 {
1047 int value;
1048
1049 while((value = *addr) > 0){
1050 if(CASW(addr, value, value-1))
1051 return 1;
1052 }
1053
1054 return 0;
1055 }
1056
1057 /* Should we wake up? */
1058 static int
semawoke(void * p)1059 semawoke(void* p)
1060 {
1061 coherence();
1062 return !((Sema*)p)->waiting;
1063 }
1064
1065 /* Acquire semaphore (subtract 1). */
1066 static int
semacquire(Segment * s,int * addr,int block)1067 semacquire(Segment* s, int* addr, int block)
1068 {
1069 int acquired;
1070 Sema phore;
1071
1072 if(canacquire(addr))
1073 return 1;
1074 if(!block)
1075 return 0;
1076
1077 acquired = 0;
1078 semqueue(s, addr, &phore);
1079 for(;;){
1080 phore.waiting = 1;
1081 coherence();
1082 if(canacquire(addr)){
1083 acquired = 1;
1084 break;
1085 }
1086 if(waserror())
1087 break;
1088 sleep(&phore, semawoke, &phore);
1089 poperror();
1090 }
1091 semdequeue(s, &phore);
1092 coherence(); /* not strictly necessary due to lock in semdequeue */
1093 if(!phore.waiting)
1094 semwakeup(s, addr, 1);
1095 if(!acquired)
1096 nexterror();
1097
1098 return 1;
1099 }
1100
1101 /* Acquire semaphore or time-out */
1102 static int
tsemacquire(Segment * s,int * addr,long ms)1103 tsemacquire(Segment* s, int* addr, long ms)
1104 {
1105 int acquired;
1106 ulong t;
1107 Sema phore;
1108
1109 if(canacquire(addr))
1110 return 1;
1111 if(ms == 0)
1112 return 0;
1113
1114 acquired = 0;
1115 semqueue(s, addr, &phore);
1116 for(;;){
1117 phore.waiting = 1;
1118 coherence();
1119 if(canacquire(addr)){
1120 acquired = 1;
1121 break;
1122 }
1123 if(waserror())
1124 break;
1125 t = m->ticks;
1126 tsleep(&phore, semawoke, &phore, ms);
1127 ms -= TK2MS(m->ticks-t);
1128 poperror();
1129 if(ms <= 0)
1130 break;
1131 }
1132 semdequeue(s, &phore);
1133 coherence(); /* not strictly necessary due to lock in semdequeue */
1134 if(!phore.waiting)
1135 semwakeup(s, addr, 1);
1136 if(ms <= 0)
1137 return 0;
1138 if(!acquired)
1139 nexterror();
1140 return 1;
1141 }
1142
1143 void
syssemacquire(Ar0 * ar0,va_list list)1144 syssemacquire(Ar0* ar0, va_list list)
1145 {
1146 Segment *s;
1147 int *addr, block;
1148
1149 /*
1150 * int semacquire(long* addr, int block);
1151 * should be (and will be implemented below as) perhaps
1152 * int semacquire(int* addr, int block);
1153 */
1154 addr = va_arg(list, int*);
1155 addr = validaddr(addr, sizeof(int), 1);
1156 evenaddr(PTR2UINT(addr));
1157 block = va_arg(list, int);
1158
1159 if((s = seg(up, PTR2UINT(addr), 0)) == nil)
1160 error(Ebadarg);
1161 if(*addr < 0)
1162 error(Ebadarg);
1163
1164 ar0->i = semacquire(s, addr, block);
1165 }
1166
1167 void
systsemacquire(Ar0 * ar0,va_list list)1168 systsemacquire(Ar0* ar0, va_list list)
1169 {
1170 Segment *s;
1171 int *addr, ms;
1172
1173 /*
1174 * int tsemacquire(long* addr, ulong ms);
1175 * should be (and will be implemented below as) perhaps
1176 * int tsemacquire(int* addr, ulong ms);
1177 */
1178 addr = va_arg(list, int*);
1179 addr = validaddr(addr, sizeof(int), 1);
1180 evenaddr(PTR2UINT(addr));
1181 ms = va_arg(list, ulong);
1182
1183 if((s = seg(up, PTR2UINT(addr), 0)) == nil)
1184 error(Ebadarg);
1185 if(*addr < 0)
1186 error(Ebadarg);
1187
1188 ar0->i = tsemacquire(s, addr, ms);
1189 }
1190
1191 void
syssemrelease(Ar0 * ar0,va_list list)1192 syssemrelease(Ar0* ar0, va_list list)
1193 {
1194 Segment *s;
1195 int *addr, delta;
1196
1197 /*
1198 * long semrelease(long* addr, long count);
1199 * should be (and will be implemented below as) perhaps
1200 * int semrelease(int* addr, int count);
1201 */
1202 addr = va_arg(list, int*);
1203 addr = validaddr(addr, sizeof(int), 1);
1204 evenaddr(PTR2UINT(addr));
1205 delta = va_arg(list, int);
1206
1207 if((s = seg(up, PTR2UINT(addr), 0)) == nil)
1208 error(Ebadarg);
1209 if(delta < 0 || *addr < 0)
1210 error(Ebadarg);
1211
1212 ar0->i = semrelease(s, addr, delta);
1213 }
1214
1215 void
sysnsec(Ar0 * ar0,va_list)1216 sysnsec(Ar0* ar0, va_list)
1217 {
1218 ar0->vl = todget(nil);
1219 }
1220