xref: /inferno-os/os/port/exportfs.c (revision 50b0dbb170df61467e42c7ea4deb0b5692d15f4c)
1 #include	"u.h"
2 #include	"../port/lib.h"
3 #include	"mem.h"
4 #include	"dat.h"
5 #include	"fns.h"
6 #include	"../port/error.h"
7 #include	"kernel.h"
8 
9 typedef	struct Fid	Fid;
10 typedef	struct Export	Export;
11 typedef	struct Exq	Exq;
12 typedef	struct Uqid Uqid;
13 
14 enum
15 {
16 	Nfidhash	= 32,
17 	Nqidhash = 32,
18 	QIDMASK = ((vlong)1<<48)-1,
19 	MAXFDATA	= 8192,
20 	MAXRPCDEF		= IOHDRSZ+MAXFDATA,	/* initial/default */
21 	MAXRPCMAX	= IOHDRSZ+64*1024,	/* most every allowed */
22 	MSGHDRSZ	= BIT32SZ+BIT8SZ+BIT16SZ
23 };
24 
25 struct Export
26 {
27 	Lock;
28 	Ref	r;
29 	Exq*	work;
30 	Lock	fidlock;
31 	Fid*	fid[Nfidhash];
32 	QLock	qidlock;
33 	Uqid*	qids[Nqidhash];
34 	ulong	pathgen;
35 	Chan*	io;
36 	Chan*	root;
37 	Pgrp*	pgrp;
38 	Egrp*	egrp;
39 	Fgrp*	fgrp;
40 	int	async;
41 	int	readonly;
42 	int	msize;
43 	char*	user;
44 };
45 
46 struct Fid
47 {
48 	Fid*	next;
49 	Fid**	last;
50 	Chan*	chan;
51 	int	fid;
52 	int	ref;		/* fcalls using the fid; locked by Export.Lock */
53 	vlong	offset;	/* last offset used (within directory) */
54 	int	attached;	/* fid attached or cloned but not clunked */
55 	Uqid*	qid;	/* generated qid */
56 };
57 
58 struct Uqid
59 {
60 	Ref;
61 	int	type;
62 	int	dev;
63 	vlong	oldpath;
64 	vlong	newpath;
65 	Uqid*	next;
66 };
67 
68 struct Exq
69 {
70 	Lock;
71 	int	busy;	/* fcall in progress */
72 	int	finished;	/* will do no more work on this request or flushes */
73 	Exq*	next;
74 	int	shut;		/* has been noted for shutdown */
75 	Exq*	flush;	/* queued flush requests */
76 	Exq*	flusht;	/* tail of flush queue */
77 	Export*	export;
78 	Proc*	slave;
79 	Fcall	in, out;
80 	uchar*	buf;
81 	int	bsize;
82 };
83 
84 struct
85 {
86 	Lock	l;
87 	QLock	qwait;
88 	Rendez	rwait;
89 	Exq	*head;		/* work waiting for a slave */
90 	Exq	*tail;
91 }exq;
92 
93 static void	exshutdown(Export*);
94 static int	exflushed(Export*, Exq*);
95 static void	exslave(void*);
96 static void	exfree(Export*);
97 static void	exfreeq(Exq*);
98 static void	exportproc(void*);
99 static void	exreply(Exq*, char*);
100 static int	exisroot(Export*, Chan*);
101 static Uqid*	uqidalloc(Export*, Chan*);
102 static void		freeuqid(Export*, Uqid*);
103 
104 static char*	Exversion(Export*, Fcall*, Fcall*);
105 static char*	Exauth(Export*, Fcall*, Fcall*);
106 static char*	Exattach(Export*, Fcall*, Fcall*);
107 static char*	Exclunk(Export*, Fcall*, Fcall*);
108 static char*	Excreate(Export*, Fcall*, Fcall*);
109 static char*	Exopen(Export*, Fcall*, Fcall*);
110 static char*	Exread(Export*, Fcall*, Fcall*);
111 static char*	Exremove(Export*, Fcall*, Fcall*);
112 static char*	Exstat(Export*, Fcall*, Fcall*);
113 static char*	Exwalk(Export*, Fcall*, Fcall*);
114 static char*	Exwrite(Export*, Fcall*, Fcall*);
115 static char*	Exwstat(Export*, Fcall*, Fcall*);
116 
117 static char	*(*fcalls[Tmax])(Export*, Fcall*, Fcall*);
118 
119 static char	Enofid[]   = "no such fid";
120 static char	Eseekdir[] = "can't seek on a directory";
121 static char	Eopen[]	= "walk of open fid";
122 static char	Emode[] = "open/create -- unknown mode";
123 static char	Edupfid[]	= "fid in use";
124 static char	Eaccess[] = "read/write -- not open in suitable mode";
125 static char	Ecount[] = "read/write -- count too big";
126 int	exdebug = 0;
127 
128 int
129 export(int fd, char *dir, int async)
130 {
131 	Chan *c, *dc;
132 	Pgrp *pg;
133 	Egrp *eg;
134 	Export *fs;
135 
136 	if(waserror())
137 		return -1;
138 	c = fdtochan(up->env->fgrp, fd, ORDWR, 1, 1);
139 	poperror();
140 
141 	if(waserror()){
142 		cclose(c);
143 		return -1;
144 	}
145 	dc = namec(dir, Atodir, 0, 0);
146 	poperror();
147 
148 	fs = malloc(sizeof(Export));
149 	if(fs == nil){
150 		cclose(c);
151 		cclose(dc);
152 		error(Enomem);
153 	}
154 
155 	fs->r.ref = 1;
156 	pg = up->env->pgrp;
157 	fs->pgrp = pg;
158 	incref(pg);
159 	eg = up->env->egrp;
160 	fs->egrp = eg;
161 	if(eg != nil)
162 		incref(eg);
163 	fs->fgrp = newfgrp(nil);
164 	kstrdup(&fs->user, up->env->user);
165 	fs->root = dc;
166 	fs->io = c;
167 	fs->pathgen = 0;
168 	fs->msize = 0;
169 	c->flag |= CMSG;
170 	fs->async = async;
171 
172 	if(async){
173 		if(waserror())
174 			return -1;
175 		kproc("exportfs", exportproc, fs, 0);
176 		poperror();
177 	}else
178 		exportproc(fs);
179 
180 	return 0;
181 }
182 
183 static void
184 exportinit(void)
185 {
186 	lock(&exq.l);
187 	if(fcalls[Tversion] != nil) {
188 		unlock(&exq.l);
189 		return;
190 	}
191 	fcalls[Tversion] = Exversion;
192 	fcalls[Tauth] = Exauth;
193 	fcalls[Tattach] = Exattach;
194 	fcalls[Twalk] = Exwalk;
195 	fcalls[Topen] = Exopen;
196 	fcalls[Tcreate] = Excreate;
197 	fcalls[Tread] = Exread;
198 	fcalls[Twrite] = Exwrite;
199 	fcalls[Tclunk] = Exclunk;
200 	fcalls[Tremove] = Exremove;
201 	fcalls[Tstat] = Exstat;
202 	fcalls[Twstat] = Exwstat;
203 	unlock(&exq.l);
204 }
205 
206 static int
207 exisroot(Export *fs, Chan *c)
208 {
209 	return eqchan(fs->root, c, 1);
210 }
211 
212 static int
213 exreadn(Chan *c, void *buf, int n)
214 {
215 	int nr, t;
216 
217 	if(waserror())
218 		return -1;
219 	for(nr = 0; nr < n;){
220 		t = devtab[c->type]->read(c, (char*)buf+nr, n-nr, 0);
221 		if(t <= 0)
222 			break;
223 		nr += t;
224 	}
225 	poperror();
226 	return nr;
227 }
228 
229 static int
230 exreadmsg(Chan *c, void *a, uint n)
231 {
232 	int m, len;
233 	uchar *buf;
234 
235 	buf = a;
236 	m = exreadn(c, buf, BIT32SZ);
237 	if(m < BIT32SZ){
238 		if(m < 0)
239 			return -1;
240 		return 0;
241 	}
242 	len = GBIT32(buf);
243 	if(len <= BIT32SZ || len > n){
244 		kwerrstr("bad length in Styx message header");
245 		return -1;
246 	}
247 	len -= BIT32SZ;
248 	m = exreadn(c, buf+BIT32SZ, len);
249 	if(m < len){
250 		if(m < 0)
251 			return -1;
252 		return 0;
253 	}
254 	return BIT32SZ+m;
255 }
256 
257 static void
258 exportproc(void *a)
259 {
260 	Exq *q;
261 	int async, msize;
262 	int n, type;
263 	Export *fs = a;
264 
265 	exportinit();
266 
267 	for(;;){
268 
269 		msize = fs->msize;
270 		if(msize == 0)
271 			msize = MAXRPCDEF;
272 		for(n=0;; n++){	/* we don't use smalloc, to avoid memset */
273 			q = mallocz(sizeof(*q)+msize, 0);
274 			if(q != nil || n > 6000)
275 				break;
276 			if(n%600 == 0)
277 				print("exportproc %ld: waiting for memory (%d) for request\n", up->pid, msize);
278 			tsleep(&up->sleep, return0, nil, 100);
279 		}
280 		if(q == nil){
281 			kwerrstr("out of memory: read request");
282 			n = -1;
283 			break;
284 		}
285 		memset(q, 0, sizeof(*q));
286 		q->buf = (uchar*)q + sizeof(*q);
287 		q->bsize = msize;
288 
289 		n = exreadmsg(fs->io, q->buf, msize);	/* TO DO: avoid copy */
290 		if(n <= 0)
291 			break;
292 		if(convM2S(q->buf, n, &q->in) != n){
293 			kwerrstr("bad T-message");
294 			n = -1;
295 			break;
296 		}
297 		type = q->in.type;
298 		if(type < Tversion || type >= Tmax || type&1 || type == Terror){
299 			kwerrstr("invalid T-message type %d", type);
300 			n = -1;
301 			break;
302 		}
303 
304 		if(exdebug)
305 			print("export %ld <- %F\n", up->pid, &q->in);
306 
307 		q->out.type = type+1;
308 		q->out.tag = q->in.tag;
309 
310 		q->export = fs;
311 		incref(&fs->r);
312 
313 		if(fs->readonly){
314 			switch(type){
315 			case Topen:
316 				if((q->in.mode & (ORCLOSE|OTRUNC|3)) == OREAD)
317 					break;
318 				/* FALL THROUGH */
319 			case Tcreate:
320 			case Twrite:
321 			case Tremove:
322 			case Twstat:
323 				q->out.type = Rerror;
324 				q->out.ename = "file system read only";
325 				exreply(q, "exportproc");
326 				exfreeq(q);
327 				continue;
328 			}
329 		}
330 
331 		if(q->in.type == Tflush){
332 			if(exflushed(fs, q)){
333 				/* not yet started or not found (flush arrived after reply); we reply */
334 				if(exdebug)
335 					print("export: flush %d\n", q->in.oldtag);
336 				exreply(q, "exportproc");
337 				exfreeq(q);
338 			}
339 			continue;
340 		}
341 
342 		lock(&exq.l);
343 		if(exq.head == nil)
344 			exq.head = q;
345 		else
346 			exq.tail->next = q;
347 		q->next = nil;
348 		exq.tail = q;
349 		unlock(&exq.l);
350 		if(exq.qwait.head == nil)
351 			kproc("exslave", exslave, nil, 0);
352 		wakeup(&exq.rwait);
353 	}
354 
355 	if(exdebug){
356 		if(n < 0)
357 			print("exportproc %ld shut down: %s\n", up->pid, up->env->errstr);
358 		else
359 			print("exportproc %ld shut down\n", up->pid);
360 	}
361 
362 	free(q);
363 	exshutdown(fs);
364 	async = fs->async;
365 	exfree(fs);
366 
367 	if(async)
368 		pexit("mount shut down", 0);
369 }
370 
371 static int
372 exflushed(Export *fs, Exq *fq)
373 {
374 	Exq *q, **last;
375 	ulong pid;
376 
377 	/* not yet started? */
378 	lock(&exq.l);
379 	for(last = &exq.head; (q = *last) != nil; last = &q->next)
380 		if(q->export == fs && q->in.tag == fq->in.oldtag){
381 			*last = q->next;
382 			unlock(&exq.l);
383 			/* not yet started: discard, and Rflush */
384 			exfreeq(q);
385 			return 1;
386 		}
387 	unlock(&exq.l);
388 
389 	/* tricky case: in progress */
390 	lock(fs);
391 	for(q = fs->work; q != nil; q = q->next)
392 		if(q->in.tag == fq->in.oldtag){
393 			pid = 0;
394 			lock(q);
395 			if(q->finished){
396 				/* slave replied and emptied its flush queue; we can Rflush now */
397 				unlock(q);
398 				return 1;
399 			}
400 			/* append to slave's flush queue */
401 			fq->next = nil;
402 			if(q->flush != nil)
403 				q->flusht->next = fq;
404 			else
405 				q->flush = fq;
406 			q->flusht = fq;
407 			if(q->busy){
408 				pid = q->slave->pid;
409 				swiproc(q->slave, 0);
410 			}
411 			unlock(q);
412 			unlock(fs);
413 			if(exdebug && pid)
414 				print("export: swiproc %ld to flush %d\n", pid, fq->in.oldtag);
415 			return 0;
416 		}
417 	unlock(fs);
418 
419 	/* not found */
420 	return 1;
421 }
422 
423 static void
424 exfreeq(Exq *q)
425 {
426 	Exq *fq;
427 
428 	while((fq = q->flush) != nil){
429 		q->flush = fq->next;
430 		exfree(fq->export);
431 		free(fq);
432 	}
433 	exfree(q->export);
434 	free(q);
435 }
436 
437 static void
438 exshutdown(Export *fs)
439 {
440 	Exq *q, **last;
441 
442 	/* work not started */
443 	lock(&exq.l);
444 	for(last = &exq.head; (q = *last) != nil;)
445 		if(q->export == fs){
446 			*last = q->next;
447 			exfreeq(q);
448 		}else
449 			last = &q->next;
450 	unlock(&exq.l);
451 
452 	/* tell slaves to abandon work in progress */
453 	lock(fs);
454 	while((q = fs->work) != nil){
455 		fs->work = q->next;
456 		lock(q);
457 		q->shut = 1;
458 		swiproc(q->slave, 0);	/* whether busy or not */
459 		unlock(q);
460 	}
461 	unlock(fs);
462 }
463 
464 static void
465 exfreefids(Export *fs)
466 {
467 	Fid *f, *n;
468 	int i;
469 
470 	for(i = 0; i < Nfidhash; i++){
471 		for(f = fs->fid[i]; f != nil; f = n){
472 			n = f->next;
473 			f->attached = 0;
474 			if(f->ref == 0) {
475 				if(f->chan != nil)
476 					cclose(f->chan);
477 				freeuqid(fs, f->qid);
478 				free(f);
479 			} else
480 				print("exfreefids: busy fid\n");
481 		}
482 	}
483 }
484 
485 static void
486 exfree(Export *fs)
487 {
488 	if(exdebug)
489 		print("export p/s %ld free %p ref %ld\n", up->pid, fs, fs->r.ref);
490 	if(decref(&fs->r) != 0)
491 		return;
492 	closepgrp(fs->pgrp);
493 	closeegrp(fs->egrp);
494 	closefgrp(fs->fgrp);
495 	cclose(fs->root);
496 	cclose(fs->io);
497 	exfreefids(fs);
498 	free(fs->user);
499 	free(fs);
500 }
501 
502 static int
503 exwork(void*)
504 {
505 	return exq.head != nil;
506 }
507 
508 static void
509 exslave(void*)
510 {
511 	Export *fs;
512 	Exq *q, *t, *fq, **last;
513 	char *err;
514 	int nstat;
515 
516 	for(;;){
517 		qlock(&exq.qwait);
518 		if(waserror()){
519 			qunlock(&exq.qwait);
520 			continue;
521 		}
522 		sleep(&exq.rwait, exwork, nil);
523 		poperror();
524 
525 		lock(&exq.l);
526 		q = exq.head;
527 		if(q == nil) {
528 			unlock(&exq.l);
529 			qunlock(&exq.qwait);
530 			continue;
531 		}
532 		exq.head = q->next;
533 
534 		qunlock(&exq.qwait);
535 
536 		/*
537 		 * put the job on the work queue before it's
538 		 * visible as off of the head queue, so it's always
539 		 * findable for flushes and shutdown
540 		 */
541 		notkilled();
542 		q->slave = up;
543 		q->busy = 1;	/* fcall in progress: interruptible */
544 		fs = q->export;
545 		lock(fs);
546 		q->next = fs->work;
547 		fs->work = q;
548 		unlock(fs);
549 		unlock(&exq.l);
550 
551 		up->env->pgrp = q->export->pgrp;
552 		up->env->egrp = q->export->egrp;
553 		up->env->fgrp = q->export->fgrp;
554 		kstrdup(&up->env->user, q->export->user);
555 
556 		if(exdebug > 1)
557 			print("exslave %ld dispatch %F\n", up->pid, &q->in);
558 
559 		if(waserror()){
560 			print("exslave %ld err %s\n", up->pid, up->env->errstr);	/* shouldn't happen */
561 			err = up->env->errstr;
562 		}else{
563 			if(q->in.type >= Tmax || !fcalls[q->in.type]){
564 				snprint(up->genbuf, sizeof(up->genbuf), "unknown message: %F", &q->in);
565 				err = up->genbuf;
566 			}else{
567 				switch(q->in.type){
568 				case Tread:
569 					q->out.data = (char*)q->buf + IOHDRSZ;
570 					break;
571 				case Tstat:
572 					q->out.stat = q->buf + MSGHDRSZ + BIT16SZ;	/* leaves it just where we want it */
573 					nstat = q->bsize;
574 					if(nstat > STATMAX)
575 						nstat = STATMAX;
576 					nstat -= MSGHDRSZ+BIT16SZ;
577 					q->out.nstat = nstat;
578 					break;
579 				}
580 				err = (*fcalls[q->in.type])(fs, &q->in, &q->out);
581 			}
582 			poperror();
583 		}
584 
585 		/*
586 		 * if the fcall completed without error we must reply,
587 		 * even if a flush is pending (because the underlying server
588 		 * might have changed state), unless the export has shut down completely.
589 		 * must also reply to each flush in order, and only after the original reply (if sent).
590 		 */
591 		lock(q);
592 		notkilled();
593 		q->busy = 0;	/* operation complete */
594 		if(!q->shut){
595 			if(q->flush == nil || err == nil){
596 				unlock(q);
597 				q->out.type = q->in.type+1;
598 				q->out.tag = q->in.tag;
599 				if(err){
600 					q->out.type = Rerror;
601 					q->out.ename = err;
602 				}
603 				exreply(q, "exslave");
604 				lock(q);
605 			}
606 			while((fq = q->flush) != nil && !q->shut){
607 				q->flush = fq->next;
608 				unlock(q);
609 				exreply(fq, "exslave");
610 				exfreeq(fq);
611 				lock(q);
612 			}
613 		}
614 		q->finished = 1;	/* promise not to send any more */
615 		unlock(q);
616 
617 		lock(fs);
618 		for(last = &fs->work; (t = *last) != nil; last = &t->next)
619 			if(t == q){
620 				*last = q->next;
621 				break;
622 			}
623 		unlock(fs);
624 
625 		notkilled();
626 		exfreeq(q);
627 	}
628 }
629 
630 static void
631 exreply(Exq *q, char *who)
632 {
633 	Export *fs;
634 	Fcall *r;
635 	int n;
636 
637 	fs = q->export;
638 	r = &q->out;
639 
640 	n = convS2M(r, q->buf, q->bsize);
641 	if(n == 0){
642 		r->type = Rerror;
643 		if(fs->msize == 0)
644 			r->ename = "Tversion not seen";
645 		else
646 			r->ename = "failed to convert R-message";
647 		n = convS2M(r, q->buf, q->bsize);
648 	}
649 
650 	if(exdebug)
651 		print("%s %ld -> %F\n", who, up->pid, r);
652 
653 	if(!waserror()){
654 		devtab[fs->io->type]->write(fs->io, q->buf, n, 0);
655 		poperror();
656 	}
657 }
658 
659 static int
660 exiounit(Export *fs, Chan *c)
661 {
662 	int iounit;
663 
664 	iounit = fs->msize-IOHDRSZ;
665 	if(c->iounit != 0 && c->iounit < fs->msize)
666 		iounit = c->iounit;
667 	return iounit;
668 }
669 
670 static Qid
671 Exrmtqid(Chan *c, Uqid *qid)
672 {
673 	Qid q;
674 
675 	q.path = qid->newpath;
676 	q.vers = c->qid.vers;
677 	q.type = c->qid.type;
678 	return q;
679 }
680 
681 static Fid*
682 Exmkfid(Export *fs, ulong fid)
683 {
684 	ulong h;
685 	Fid *f, *nf;
686 
687 	nf = malloc(sizeof(Fid));
688 	if(nf == nil)
689 		return nil;
690 	lock(&fs->fidlock);
691 	h = fid % Nfidhash;
692 	for(f = fs->fid[h]; f != nil; f = f->next){
693 		if(f->fid == fid){
694 			unlock(&fs->fidlock);
695 			free(nf);
696 			return nil;
697 		}
698 	}
699 
700 	nf->next = fs->fid[h];
701 	if(nf->next != nil)
702 		nf->next->last = &nf->next;
703 	nf->last = &fs->fid[h];
704 	fs->fid[h] = nf;
705 
706 	nf->fid = fid;
707 	nf->ref = 1;
708 	nf->attached = 1;
709 	nf->offset = 0;
710 	nf->chan = nil;
711 	nf->qid = nil;
712 	unlock(&fs->fidlock);
713 	return nf;
714 }
715 
716 static Fid*
717 Exgetfid(Export *fs, ulong fid)
718 {
719 	Fid *f;
720 	ulong h;
721 
722 	lock(&fs->fidlock);
723 	h = fid % Nfidhash;
724 	for(f = fs->fid[h]; f; f = f->next) {
725 		if(f->fid == fid){
726 			if(f->attached == 0)
727 				break;
728 			f->ref++;
729 			unlock(&fs->fidlock);
730 			return f;
731 		}
732 	}
733 	unlock(&fs->fidlock);
734 	return nil;
735 }
736 
737 static void
738 Exputfid(Export *fs, Fid *f)
739 {
740 	Chan *c;
741 
742 	lock(&fs->fidlock);
743 	f->ref--;
744 	if(f->ref == 0 && f->attached == 0){
745 		c = f->chan;
746 		f->chan = nil;
747 		*f->last = f->next;
748 		if(f->next != nil)
749 			f->next->last = f->last;
750 		unlock(&fs->fidlock);
751 		if(c != nil)
752 			cclose(c);
753 		freeuqid(fs, f->qid);
754 		free(f);
755 		return;
756 	}
757 	unlock(&fs->fidlock);
758 }
759 
760 static Chan*
761 exmount(Chan *c, Mhead **mp, int doname)
762 {
763 	Chan *nc;
764 	Cname *oname;
765 
766 	nc = nil;
767 	if((c->flag & COPEN) == 0 && findmount(&nc, mp, c->type, c->dev, c->qid)){
768 		if(waserror()){
769 			cclose(nc);
770 			nexterror();
771 		}
772 		nc = cunique(nc);
773 		poperror();
774 		if(doname){
775 			oname = c->name;
776 			incref(oname);
777 			cnameclose(nc->name);
778 			nc->name = oname;
779 		}
780 		return nc;
781 	}
782 	incref(c);
783 	return c;
784 }
785 
786 static char*
787 Exversion(Export *fs, Fcall *t, Fcall *r)
788 {
789 	char *p;
790 	static char version[] = VERSION9P;
791 	int iounit;
792 
793 	r->msize = t->msize;
794 	if(r->msize > MAXRPCMAX)
795 		r->msize = MAXRPCMAX;
796 	iounit = fs->io->iounit;
797 	if(iounit != 0 && iounit > 64 && iounit < r->msize)
798 		r->msize = iounit;
799 	if(r->msize < 64)
800 		return "message size too small";
801 	if((p = strchr(t->version, '.')) != nil)
802 		*p = 0;
803 	if(strncmp(t->version, "9P", 2) ==0 && strcmp(version, t->version) <= 0){
804 		r->version = version;
805 		fs->msize = r->msize;
806 	}else
807 		r->version = "unknown";
808 	return nil;
809 }
810 
811 static char*
812 Exauth(Export *fs, Fcall *t, Fcall *r)
813 {
814 	USED(fs);
815 	USED(t);
816 	USED(r);
817 	return "authentication not required";
818 }
819 
820 static char*
821 Exattach(Export *fs, Fcall *t, Fcall *r)
822 {
823 	Fid *f;
824 
825 	f = Exmkfid(fs, t->fid);
826 	if(f == nil)
827 		return Edupfid;
828 	if(waserror()){
829 		f->attached = 0;
830 		Exputfid(fs, f);
831 		return up->env->errstr;
832 	}
833 	f->chan = cclone(fs->root);
834 	f->qid = uqidalloc(fs, f->chan);
835 	poperror();
836 	r->qid = Exrmtqid(f->chan, f->qid);
837 	Exputfid(fs, f);
838 	return nil;
839 }
840 
841 static char*
842 Exclunk(Export *fs, Fcall *t, Fcall *r)
843 {
844 	Fid *f;
845 
846 	USED(r);
847 	f = Exgetfid(fs, t->fid);
848 	if(f == nil)
849 		return Enofid;
850 	f->attached = 0;
851 	Exputfid(fs, f);
852 	return nil;
853 }
854 
855 static int
856 safewalk(Chan **cp, char **names, int nnames, int nomount, int *nerror)
857 {
858 	int r;
859 
860 	/* walk can raise error */
861 	if(waserror())
862 		return -1;
863 	r = walk(cp, names, nnames, nomount, nerror);
864 	poperror();
865 	return r;
866 }
867 
868 static char*
869 Exwalk(Export *fs, Fcall *t, Fcall *r)
870 {
871 	Fid *f, *nf;
872 	Chan *c;
873 	char *name;
874 	Uqid *qid;
875 	int i;
876 
877 	f = Exgetfid(fs, t->fid);
878 	if(f == nil)
879 		return Enofid;
880 	if(f->chan->flag & COPEN){
881 		Exputfid(fs, f);
882 		return Eopen;
883 	}
884 
885 	if(waserror())
886 		return up->env->errstr;
887 	c = cclone(f->chan);
888 	poperror();
889 	qid = f->qid;
890 	incref(qid);
891 	r->nwqid = 0;
892 	if(t->nwname > 0){
893 		for(i=0; i<t->nwname; i++){
894 			name = t->wname[i];
895 			if(!exisroot(fs, c) || *name != '\0' && strcmp(name, "..") != 0){
896 				if(safewalk(&c, &name, 1, 0, nil) < 0){
897 					/* leave the original state on error */
898 					cclose(c);
899 					freeuqid(fs, qid);
900 					Exputfid(fs, f);
901 					if(i == 0)
902 						return up->env->errstr;
903 					return nil;
904 				}
905 				freeuqid(fs, qid);
906 				qid = uqidalloc(fs, c);
907 			}
908 			r->wqid[r->nwqid++] = Exrmtqid(c, qid);
909 		}
910 	}
911 
912 	if(t->newfid != t->fid){
913 		nf = Exmkfid(fs, t->newfid);
914 		if(nf == nil){
915 			cclose(c);
916 			freeuqid(fs, qid);
917 			Exputfid(fs, f);
918 			return Edupfid;
919 		}
920 		nf->chan = c;
921 		nf->qid = qid;
922 		Exputfid(fs, nf);
923 	}else{
924 		cclose(f->chan);
925 		f->chan = c;
926 		freeuqid(fs, f->qid);
927 		f->qid = qid;
928 	}
929 	Exputfid(fs, f);
930 	return nil;
931 }
932 
933 static char*
934 Exopen(Export *fs, Fcall *t, Fcall *r)
935 {
936 	Fid *f;
937 	Chan *c;
938 	Uqid *qid;
939 	Mhead *m;
940 
941 	f = Exgetfid(fs, t->fid);
942 	if(f == nil)
943 		return Enofid;
944 	if(f->chan->flag & COPEN){
945 		Exputfid(fs, f);
946 		return Emode;
947 	}
948 	m = nil;
949 	c = exmount(f->chan, &m, 1);
950 	if(waserror()){
951 		cclose(c);
952 		Exputfid(fs, f);
953 		return up->env->errstr;
954 	}
955 
956 	/* only save the mount head if it's a multiple element union */
957 	if(m && m->mount && m->mount->next)
958 		c->umh = m;
959 	else
960 		putmhead(m);
961 
962 	c = devtab[c->type]->open(c, t->mode);
963 	if(t->mode & ORCLOSE)
964 		c->flag |= CRCLOSE;
965 
966 	qid = uqidalloc(fs, c);
967 	poperror();
968 	freeuqid(fs, f->qid);
969 	cclose(f->chan);
970 	f->chan = c;
971 	f->qid = qid;
972 	f->offset = 0;
973 	r->qid = Exrmtqid(c, f->qid);
974 	r->iounit = exiounit(fs, c);
975 	Exputfid(fs, f);
976 	return nil;
977 }
978 
979 static char*
980 Excreate(Export *fs, Fcall *t, Fcall *r)
981 {
982 	Fid *f;
983 	volatile struct {Chan *c;} c, dc;
984 	Cname *oname;
985 	Uqid *qid;
986 	Mhead *m;
987 
988 	f = Exgetfid(fs, t->fid);
989 	if(f == nil)
990 		return Enofid;
991 	if(f->chan->flag & COPEN){
992 		Exputfid(fs, f);
993 		return Emode;
994 	}
995 	if(waserror()){
996 		Exputfid(fs, f);
997 		return up->env->errstr;
998 	}
999 	validname(t->name, 0);
1000 	if(t->name[0] == '.' && (t->name[1] == '\0' || t->name[1] == '.' && t->name[2] == '\0'))
1001 		error(Efilename);	/* underlying server should check, but stop it here */
1002 
1003 	m = nil;
1004 	c.c = exmount(f->chan, &m, 1);
1005 	if(waserror()){
1006 		cclose(c.c);
1007 		if(m != nil)
1008 			putmhead(m);
1009 		nexterror();
1010 	}
1011 	if(m != nil){
1012 		oname = c.c->name;
1013 		incref(oname);
1014 		if(waserror()){
1015 			cnameclose(oname);
1016 			nexterror();
1017 		}
1018 		dc.c = createdir(c.c, m);
1019 		if(waserror()){
1020 			cclose(dc.c);
1021 			nexterror();
1022 		}
1023 		c.c = cunique(dc.c);
1024 		poperror();
1025 		cnameclose(c.c->name);
1026 		poperror();
1027 		c.c->name = oname;
1028 	}
1029 	devtab[c.c->type]->create(c.c, t->name, t->mode, t->perm);
1030 	c.c->name = addelem(c.c->name, t->name);
1031 	if(t->mode & ORCLOSE)
1032 		c.c->flag |= CRCLOSE;
1033 	qid = uqidalloc(fs, c.c);
1034 	poperror();
1035 	if(m != nil)
1036 		putmhead(m);
1037 
1038 	poperror();
1039 	cclose(f->chan);
1040 	f->chan = c.c;
1041 	freeuqid(fs, f->qid);
1042 	f->qid = qid;
1043 	r->qid = Exrmtqid(c.c, f->qid);
1044 	r->iounit = exiounit(fs, c.c);
1045 	Exputfid(fs, f);
1046 	return nil;
1047 }
1048 
1049 static char*
1050 Exread(Export *fs, Fcall *t, Fcall *r)
1051 {
1052 	Fid *f;
1053 	Chan *c;
1054 	vlong off;
1055 	int dir, n, seek;
1056 
1057 	f = Exgetfid(fs, t->fid);
1058 	if(f == nil)
1059 		return Enofid;
1060 
1061 	if(waserror()) {
1062 		Exputfid(fs, f);
1063 		return up->env->errstr;
1064 	}
1065 	c = f->chan;
1066 	if((c->flag & COPEN) == 0)
1067 		error(Emode);
1068 	if(c->mode != OREAD && c->mode != ORDWR)
1069 		error(Eaccess);
1070 	if(t->count < 0 || t->count > fs->msize-IOHDRSZ)
1071 		error(Ecount);
1072 	if(t->offset < 0)
1073 		error(Enegoff);
1074 	dir = c->qid.type & QTDIR;
1075 	if(dir && t->offset != f->offset){
1076 		if(t->offset != 0)
1077 			error(Eseekdir);
1078 		f->offset = 0;
1079 		c->uri = 0;
1080 		c->dri = 0;
1081 	}
1082 
1083 	for(;;){
1084 		n = t->count;
1085 		seek = 0;
1086 		off = t->offset;
1087 		if(dir && f->offset != off){
1088 			off = f->offset;
1089 			n = t->offset - off;
1090 			if(n > MAXFDATA)
1091 				n = MAXFDATA;
1092 			seek = 1;
1093 		}
1094 		if(dir && c->umh != nil){
1095 			if(0)
1096 				print("union read %d uri %d dri %d\n", seek, c->uri, c->dri);
1097 			n = unionread(c, r->data, n);
1098 		}
1099 		else{
1100 			c->offset = off;
1101 			n = devtab[c->type]->read(c, r->data, n, off);
1102 			lock(c);
1103 			c->offset += n;
1104 			unlock(c);
1105 		}
1106 		f->offset = off + n;
1107 		if(n == 0 || !seek)
1108 			break;
1109 	}
1110 	r->count = n;
1111 
1112 	poperror();
1113 	Exputfid(fs, f);
1114 	return nil;
1115 }
1116 
1117 static char*
1118 Exwrite(Export *fs, Fcall *t, Fcall *r)
1119 {
1120 	Fid *f;
1121 	Chan *c;
1122 
1123 	f = Exgetfid(fs, t->fid);
1124 	if(f == nil)
1125 		return Enofid;
1126 	if(waserror()){
1127 		Exputfid(fs, f);
1128 		return up->env->errstr;
1129 	}
1130 	c = f->chan;
1131 	if((c->flag & COPEN) == 0)
1132 		error(Emode);
1133 	if(c->mode != OWRITE && c->mode != ORDWR)
1134 		error(Eaccess);
1135 	if(c->qid.type & QTDIR)
1136 		error(Eisdir);
1137 	if(t->count < 0 || t->count > fs->msize-IOHDRSZ)
1138 		error(Ecount);
1139 	if(t->offset < 0)
1140 		error(Enegoff);
1141 	r->count = devtab[c->type]->write(c, t->data, t->count, t->offset);
1142 	poperror();
1143 	Exputfid(fs, f);
1144 	return nil;
1145 }
1146 
1147 static char*
1148 Exstat(Export *fs, Fcall *t, Fcall *r)
1149 {
1150 	Fid *f;
1151 	Chan *c;
1152 	int n;
1153 
1154 	f = Exgetfid(fs, t->fid);
1155 	if(f == nil)
1156 		return Enofid;
1157 	c = exmount(f->chan, nil, 1);
1158 	if(waserror()){
1159 		cclose(c);
1160 		Exputfid(fs, f);
1161 		return up->env->errstr;
1162 	}
1163 	n = devtab[c->type]->stat(c, r->stat, r->nstat);
1164 	if(n <= BIT16SZ)
1165 		error(Eshortstat);
1166 	r->nstat = n;
1167 	poperror();
1168 	cclose(c);
1169 	Exputfid(fs, f);
1170 	return nil;
1171 }
1172 
1173 static char*
1174 Exwstat(Export *fs, Fcall *t, Fcall *r)
1175 {
1176 	Fid *f;
1177 	Chan *c;
1178 
1179 	USED(r);
1180 	f = Exgetfid(fs, t->fid);
1181 	if(f == nil)
1182 		return Enofid;
1183 	if(waserror()){
1184 		Exputfid(fs, f);
1185 		return up->env->errstr;
1186 	}
1187 	validstat(t->stat, t->nstat);	/* check name */
1188 
1189 	c = exmount(f->chan, nil, 0);
1190 	if(waserror()){
1191 		cclose(c);
1192 		nexterror();
1193 	}
1194 	devtab[c->type]->wstat(c, t->stat, t->nstat);
1195 	poperror();
1196 
1197 	cclose(c);
1198 	poperror();
1199 	Exputfid(fs, f);
1200 	return nil;
1201 }
1202 
1203 static char*
1204 Exremove(Export *fs, Fcall *t, Fcall *r)
1205 {
1206 	Fid *f;
1207 	Chan *c;
1208 
1209 	USED(r);
1210 	f = Exgetfid(fs, t->fid);
1211 	if(f == nil)
1212 		return Enofid;
1213 	if(waserror()){
1214 		f->attached = 0;
1215 		Exputfid(fs, f);
1216 		return up->env->errstr;
1217 	}
1218 	c = exmount(f->chan, nil, 0);
1219 	if(waserror()){
1220 		c->type = 0;	/* see below */
1221 		cclose(c);
1222 		nexterror();
1223 	}
1224 	devtab[c->type]->remove(c);
1225 	poperror();
1226 	poperror();
1227 
1228 	/*
1229 	 * chan is already clunked by remove.
1230 	 * however, we need to recover the chan,
1231 	 * and follow sysremove's lead in making it point to root.
1232 	 */
1233 	c->type = 0;
1234 
1235 	cclose(c);
1236 	f->attached = 0;
1237 	Exputfid(fs, f);
1238 	return nil;
1239 }
1240 
1241 /*
1242  * unique path generation
1243  */
1244 
1245 static int
1246 uqidhash(vlong path)
1247 {
1248 	ulong p;
1249 	p = (ulong)path;
1250 	return ((p>>16) ^ (p>>8) ^ p) & (Nqidhash-1);
1251 }
1252 
1253 static Uqid **
1254 uqidlook(Uqid **tab, Chan *c, vlong path)
1255 {
1256 	Uqid **hp, *q;
1257 
1258 	for(hp = &tab[uqidhash(path)]; (q = *hp) != nil; hp = &q->next)
1259 		if(q->type == c->type && q->dev == c->dev && q->oldpath == path)
1260 			break;
1261 	return hp;
1262 }
1263 
1264 static int
1265 uqidexists(Uqid **tab, vlong path)
1266 {
1267 	int i;
1268 	Uqid *q;
1269 
1270 	for(i=0; i<Nqidhash; i++)
1271 		for(q = tab[i]; q != nil; q = q->next)
1272 			if(q->newpath == path)
1273 				return 1;
1274 	return 0;
1275 }
1276 
1277 static Uqid *
1278 uqidalloc(Export *fs, Chan *c)
1279 {
1280 	Uqid **hp, *q;
1281 
1282 	qlock(&fs->qidlock);
1283 	hp = uqidlook(fs->qids, c, c->qid.path);
1284 	if((q = *hp) != nil){
1285 		incref(q);
1286 		qunlock(&fs->qidlock);
1287 		return q;
1288 	}
1289 	q = mallocz(sizeof(*q), 1);
1290 	if(q == nil){
1291 		qunlock(&fs->qidlock);
1292 		error(Enomem);
1293 	}
1294 	q->ref = 1;
1295 	q->type = c->type;
1296 	q->dev = c->dev;
1297 	q->oldpath = c->qid.path;
1298 	q->newpath = c->qid.path;
1299 	while(uqidexists(fs->qids, q->newpath)){
1300 		if(++fs->pathgen >= (1<<16))
1301 			fs->pathgen = 1;
1302 		q->newpath = ((vlong)fs->pathgen<<48) | (q->newpath & QIDMASK);
1303 	}
1304 	q->next = nil;
1305 	*hp = q;
1306 	qunlock(&fs->qidlock);
1307 	return q;
1308 }
1309 
1310 static void
1311 freeuqid(Export *fs, Uqid *q)
1312 {
1313 	Uqid **hp;
1314 
1315 	if(q == nil)
1316 		return;
1317 	qlock(&fs->qidlock);
1318 	if(decref(q) == 0){
1319 		hp = &fs->qids[uqidhash(q->oldpath)];
1320 		for(; *hp != nil; hp = &(*hp)->next)
1321 			if(*hp == q){
1322 				*hp = q->next;
1323 				free(q);
1324 				break;
1325 			}
1326 	}
1327 	qunlock(&fs->qidlock);
1328 }
1329