xref: /plan9/sys/src/9/port/devfs.c (revision b65f1be64966e4c3e4f2be91509008e1b9a3a980)
1 /*
2  * File system devices.
3  * Follows device config in Ken's file server.
4  * Builds mirrors, concatenations, interleavings, and partitions
5  * of devices out of other (inner) devices.
6  * It is ok if inner devices are provided by this driver.
7  *
8  * Built files are grouped on different directories
9  * (called trees, and used to represent disks).
10  * The "#k/fs" tree is always available and never goes away.
11  * Configuration changes happen only while no I/O is in progress.
12  *
13  * Default sector size is one byte unless changed by the "disk" ctl.
14  */
15 
16 #include "u.h"
17 #include "../port/lib.h"
18 #include "mem.h"
19 #include "dat.h"
20 #include "fns.h"
21 #include "io.h"
22 #include "ureg.h"
23 #include "../port/error.h"
24 
25 enum
26 {
27 	Fnone,
28 	Fmirror,		/* mirror of others */
29 	Fcat,			/* catenation of others */
30 	Finter,			/* interleaving of others */
31 	Fpart,			/* part of other */
32 	Fclear,			/* start over */
33 	Fdel,			/* delete a configure device */
34 	Fdisk,			/* set default tree and sector sz*/
35 
36 	Sectorsz = 1,
37 	Blksize	= 8*1024,	/* for Finter only */
38 
39 	Incr = 5,		/* Increments for the dev array */
40 
41 	/*
42 	 * All qids are decorated with the tree number.
43 	 * #k/fs is tree number 0, is automatically added and
44 	 * its first qid is for the ctl file. It never goes away.
45 	 */
46 	Qtop	= 0,		/* #k */
47 	Qdir,			/* directory (#k/fs) */
48 	Qctl,			/* ctl, only for #k/fs/ctl */
49 	Qfirst,			/* first qid assigned for device */
50 
51 	Iswrite = 0,
52 	Isread,
53 
54 	Optional = 0,
55 	Mustexist,
56 
57 	/* tunable parameters */
58 	Maxconf	= 4*1024,	/* max length for config */
59 	Ndevs	= 32,		/* max. inner devs per command */
60 	Ntrees	= 128,		/* max. number of trees */
61 	Maxretries = 3,		/* max. retries of i/o errors */
62 	Retrypause = 5000,	/* ms. to pause between retries */
63 };
64 
65 typedef struct Inner Inner;
66 typedef struct Fsdev Fsdev;
67 typedef struct Tree Tree;
68 
69 struct Inner
70 {
71 	char	*iname;		/* inner device name */
72 	vlong	isize;		/* size of inner device */
73 	Chan	*idev;		/* inner device */
74 };
75 
76 struct Fsdev
77 {
78 	Ref;			/* one per Chan doing I/O */
79 	int	gone;		/* true if removed */
80 	int	vers;		/* qid version for this device */
81 	int	type;		/* Fnone, Fmirror, ... */
82 	char	*name;		/* name for this fsdev */
83 	Tree*	tree;		/* where the device is kept */
84 	vlong	size;		/* min(inner[X].isize) */
85 	vlong	start;		/* start address (for Fpart) */
86 	uint	ndevs;		/* number of inner devices */
87 	int	perm;		/* minimum of inner device perms */
88 	Inner	*inner[Ndevs];	/* inner devices */
89 };
90 
91 struct Tree
92 {
93 	char	*name;		/* name for #k/<name> */
94 	Fsdev	**devs;		/* devices in dir. */
95 	uint	ndevs;		/* number of devices */
96 	uint	nadevs;		/* number of allocated devices in devs */
97 };
98 
99 #define dprint if(debug)print
100 
101 extern Dev fsdevtab;		/* forward */
102 
103 static RWlock lck;		/* r: use devices; w: change config  */
104 static Tree fstree;		/* The main "fs" tree. Never goes away */
105 static Tree *trees[Ntrees];	/* internal representation of config */
106 static int ntrees;		/* max number of trees */
107 static int qidvers;
108 
109 static char *disk;		/* default tree name used */
110 static char *source;		/* default inner device used */
111 static int sectorsz = Sectorsz;	/* default sector size */
112 
113 static char confstr[Maxconf];	/* textual configuration */
114 
115 static int debug;
116 
117 static char cfgstr[] = "fsdev:\n";
118 
119 static Qid tqid = {Qtop, 0, QTDIR};
120 static Qid cqid = {Qctl, 0, 0};
121 
122 static char* tnames[] = {
123 	[Fmirror]	"mirror",
124 	[Fcat]		"cat",
125 	[Finter]	"inter",
126 	[Fpart]		"part",
127 };
128 
129 static Cmdtab configs[] = {
130 	Fmirror,"mirror",	0,
131 	Fcat,	"cat",		0,
132 	Finter,	"inter",	0,
133 	Fpart,	"part",		0,
134 	Fclear,	"clear",	1,
135 	Fdel,	"del",		2,
136 	Fdisk,	"disk",		0,
137 };
138 
139 static char Egone[] = "device is gone";		/* file has been removed */
140 
141 static char*
seprintdev(char * s,char * e,Fsdev * mp)142 seprintdev(char *s, char *e, Fsdev *mp)
143 {
144 	int i;
145 
146 	if(mp == nil)
147 		return seprint(s, e, "<null Fsdev>");
148 	if(mp->type < 0 || mp->type >= nelem(tnames) || tnames[mp->type] == nil)
149 		return seprint(s, e, "bad device type %d\n", mp->type);
150 
151 	s = strecpy(s, e, tnames[mp->type]);
152 	if(mp->tree != &fstree)
153 		s = seprint(s, e, " %s/%s", mp->tree->name, mp->name);
154 	else
155 		s = seprint(s, e, " %s", mp->name);
156 	for(i = 0; i < mp->ndevs; i++)
157 		s = seprint(s, e, " %s", mp->inner[i]->iname);
158 	switch(mp->type){
159 	case Fmirror:
160 	case Fcat:
161 	case Finter:
162 		s = strecpy(s, e, "\n");
163 		break;
164 	case Fpart:
165 		s = seprint(s, e, " %ulld %ulld\n", mp->start, mp->size);
166 		break;
167 	default:
168 		panic("#k: seprintdev bug");
169 	}
170 	return s;
171 }
172 
173 static vlong
mkpath(int tree,int devno)174 mkpath(int tree, int devno)
175 {
176 	return (tree&0xFFFF)<<16 | devno&0xFFFF;
177 }
178 
179 static int
path2treeno(int q)180 path2treeno(int q)
181 {
182 	return q>>16 & 0xFFFF;
183 }
184 
185 static int
path2devno(int q)186 path2devno(int q)
187 {
188 	return q & 0xFFFF;
189 }
190 
191 static Tree*
gettree(int i,int mustexist)192 gettree(int i, int mustexist)
193 {
194 	dprint("gettree %d\n", i);
195 	if(i < 0)
196 		panic("#k: bug: bad tree index %d in gettree", i);
197 	if(i >= ntrees || trees[i] == nil)
198 		if(mustexist)
199 			error(Enonexist);
200 		else
201 			return nil;
202 	return trees[i];
203 }
204 
205 static Fsdev*
getdev(Tree * t,int i,int mustexist)206 getdev(Tree *t, int i, int mustexist)
207 {
208 	dprint("getdev %d\n", i);
209 	if(i < 0)
210 		panic("#k: bug: bad dev index %d in getdev", i);
211 	if(i >= t->nadevs || t->devs[i] == nil)
212 		if(mustexist)
213 			error(Enonexist);
214 		else
215 			return nil;
216 	return t->devs[i];
217 }
218 
219 static Fsdev*
path2dev(int q)220 path2dev(int q)
221 {
222 	Tree	*t;
223 
224 	dprint("path2dev %ux\n", q);
225 	t = gettree(path2treeno(q), Mustexist);
226 	return getdev(t, path2devno(q) - Qfirst, Mustexist);
227 }
228 
229 static Tree*
treealloc(char * name)230 treealloc(char *name)
231 {
232 	int	i;
233 	Tree	*t;
234 
235 	dprint("treealloc %s\n", name);
236 	for(i = 0; i < nelem(trees); i++)
237 		if(trees[i] == nil)
238 			break;
239 	if(i == nelem(trees))
240 		return nil;
241 	t = trees[i] = mallocz(sizeof(Tree), 1);
242 	if(t == nil)
243 		return nil;
244 	if(i == ntrees)
245 		ntrees++;
246 	kstrdup(&t->name, name);
247 	return t;
248 }
249 
250 static Tree*
lookuptree(char * name)251 lookuptree(char *name)
252 {
253 	int i;
254 
255 	dprint("lookuptree %s\n", name);
256 	for(i = 0; i < ntrees; i++)
257 		if(trees[i] != nil && strcmp(trees[i]->name, name) == 0)
258 			return trees[i];
259 	return nil;
260 }
261 
262 static Fsdev*
devalloc(Tree * t,char * name)263 devalloc(Tree *t, char *name)
264 {
265 	int	i, ndevs;
266 	Fsdev	*mp, **devs;
267 
268 	dprint("devalloc %s %s\n", t->name, name);
269 	mp = mallocz(sizeof(Fsdev), 1);
270 	if(mp == nil)
271 		return nil;
272 	for(i = 0; i < t->nadevs; i++)
273 		if(t->devs[i] == nil)
274 			break;
275 	if(i >= t->nadevs){
276 		if(t->nadevs % Incr == 0){
277 			ndevs = t->nadevs + Incr;
278 			devs = realloc(t->devs, ndevs * sizeof(Fsdev*));
279 			if(devs == nil){
280 				free(mp);
281 				return nil;
282 			}
283 			t->devs = devs;
284 		}
285 		t->devs[t->nadevs] = nil;
286 		t->nadevs++;
287 	}
288 	kstrdup(&mp->name, name);
289 	mp->vers = ++qidvers;
290 	mp->tree = t;
291 	t->devs[i] = mp;
292 	t->ndevs++;
293 	return mp;
294 }
295 
296 static void
deltree(Tree * t)297 deltree(Tree *t)
298 {
299 	int i;
300 
301 	dprint("deltree %s\n", t->name);
302 	for(i = 0; i < ntrees; i++)
303 		if(trees[i] == t){
304 			if(i > 0){		/* "fs" never goes away */
305 				free(t->name);
306 				free(t->devs);
307 				free(t);
308 				trees[i] = nil;
309 			}
310 			return;
311 		}
312 	panic("#k: deltree: bug: tree not found");
313 }
314 
315 /*
316  * A device is gone and we know that all its users are gone.
317  * A tree is gone when all its devices are gone ("fs" is never gone).
318  * Must close devices outside locks, so we could nest our own devices.
319  */
320 static void
mdeldev(Fsdev * mp)321 mdeldev(Fsdev *mp)
322 {
323 	int	i;
324 	Inner	*in;
325 	Tree	*t;
326 
327 	dprint("deldev %s gone %d ref %uld\n", mp->name, mp->gone, mp->ref);
328 
329 	mp->gone = 1;
330 	mp->vers = ++qidvers;
331 
332 	wlock(&lck);
333 	t = mp->tree;
334 	for(i = 0; i < t->nadevs; i++)
335 		if(t->devs[i] == mp){
336 			t->devs[i] = nil;
337 			t->ndevs--;
338 			if(t->ndevs == 0)
339 				deltree(t);
340 			break;
341 		}
342 	wunlock(&lck);
343 
344 	free(mp->name);
345 	for(i = 0; i < mp->ndevs; i++){
346 		in = mp->inner[i];
347 		if(in->idev != nil)
348 			cclose(in->idev);
349 		free(in->iname);
350 		free(in);
351 	}
352 	if(debug)
353 		memset(mp, 9, sizeof *mp);	/* poison */
354 	free(mp);
355 }
356 
357 /*
358  * Delete one or all devices in one or all trees.
359  */
360 static void
mdelctl(char * tname,char * dname)361 mdelctl(char *tname, char *dname)
362 {
363 	int i, alldevs, alltrees, some;
364 	Fsdev *mp;
365 	Tree *t;
366 
367 	dprint("delctl %s\n", dname);
368 	alldevs = strcmp(dname, "*") == 0;
369 	alltrees = strcmp(tname, "*") == 0;
370 	some = 0;
371 Again:
372 	wlock(&lck);
373 	for(i = 0; i < ntrees; i++){
374 		t = trees[i];
375 		if(t == nil)
376 			continue;
377 		if(alltrees == 0 && strcmp(t->name, tname) != 0)
378 			continue;
379 		for(i = 0; i < t->nadevs; i++){
380 			mp = t->devs[i];
381 			if(t->devs[i] == nil)
382 				continue;
383 			if(alldevs == 0 && strcmp(mp->name, dname) != 0)
384 				continue;
385 			/*
386 			 * Careful: must close outside locks and that
387 			 * may change the file tree we are looking at.
388 			 */
389 			some++;
390 			mp->gone = 1;
391 			if(mp->ref == 0){
392 				incref(mp);	/* keep it there */
393 				wunlock(&lck);
394 				mdeldev(mp);
395 				goto Again;	/* tree can change */
396 			}
397 		}
398 	}
399 	wunlock(&lck);
400 	if(some == 0 && alltrees == 0)
401 		error(Enonexist);
402 }
403 
404 static void
setdsize(Fsdev * mp,vlong * ilen)405 setdsize(Fsdev* mp, vlong *ilen)
406 {
407 	int	i;
408 	vlong	inlen;
409 	Inner	*in;
410 
411 	dprint("setdsize %s\n", mp->name);
412 	for (i = 0; i < mp->ndevs; i++){
413 		in = mp->inner[i];
414 		in->isize = ilen[i];
415 		inlen = in->isize;
416 		switch(mp->type){
417 		case Finter:
418 			/* truncate to multiple of Blksize */
419 			inlen &= ~(Blksize-1);
420 			in->isize = inlen;
421 			/* fall through */
422 		case Fmirror:
423 			/* use size of smallest inner device */
424 			if (mp->size == 0 || mp->size > inlen)
425 				mp->size = inlen;
426 			break;
427 		case Fcat:
428 			mp->size += inlen;
429 			break;
430 		case Fpart:
431 			if(mp->start > inlen)
432 				error("partition starts after device end");
433 			if(inlen < mp->start + mp->size){
434 				print("#k: %s: partition truncated from "
435 					"%lld to %lld bytes\n", mp->name,
436 					mp->size, inlen - mp->start);
437 				mp->size = inlen - mp->start;
438 			}
439 			break;
440 		}
441 	}
442 	if(mp->type == Finter)
443 		mp->size *= mp->ndevs;
444 }
445 
446 static void
validdevname(Tree * t,char * dname)447 validdevname(Tree *t, char *dname)
448 {
449 	int i;
450 
451 	for(i = 0; i < t->nadevs; i++)
452 		if(t->devs[i] != nil && strcmp(t->devs[i]->name, dname) == 0)
453 			error(Eexist);
454 }
455 
456 static void
parseconfig(char * a,long n,Cmdbuf ** cbp,Cmdtab ** ctp)457 parseconfig(char *a, long n, Cmdbuf **cbp, Cmdtab **ctp)
458 {
459 	Cmdbuf	*cb;
460 	Cmdtab	*ct;
461 
462 	*cbp = cb = parsecmd(a, n);
463 	*ctp = ct = lookupcmd(cb, configs, nelem(configs));
464 
465 	cb->f++;			/* skip command */
466 	cb->nf--;
467 	switch(ct->index){
468 	case Fmirror:
469 	case Fcat:
470 	case Finter:
471 		if(cb->nf < 2)
472 			error("too few arguments for ctl");
473 		if(cb->nf - 1 > Ndevs)
474 			error("too many devices in ctl");
475 		break;
476 	case Fdisk:
477 		if(cb->nf < 1 || cb->nf > 3)
478 			error("ctl usage: disk name [sz dev]");
479 		break;
480 	case Fpart:
481 		if(cb->nf != 4 && (cb->nf != 3 || source == nil))
482 			error("ctl usage: part new [file] off len");
483 		break;
484 	}
485 }
486 
487 static void
parsename(char * name,char * disk,char ** tree,char ** dev)488 parsename(char *name, char *disk, char **tree, char **dev)
489 {
490 	char *slash;
491 
492 	slash = strchr(name, '/');
493 	if(slash == nil){
494 		if(disk != nil)
495 			*tree = disk;
496 		else
497 			*tree = "fs";
498 		*dev = name;
499 	}else{
500 		*tree = name;
501 		*slash++ = 0;
502 		*dev = slash;
503 	}
504 	validname(*tree, 0);
505 	validname(*dev, 0);
506 }
507 
508 static int
getattrs(Chan * c,vlong * lenp,int * permp)509 getattrs(Chan *c, vlong *lenp, int *permp)
510 {
511 	uchar	buf[128];	/* old DIRLEN plus a little should be plenty */
512 	Dir	d;
513 	long	l;
514 
515 	*lenp = 0;
516 	*permp = 0;
517 	l = devtab[c->type]->stat(c, buf, sizeof buf);
518 	if (l >= 0 && convM2D(buf, l, &d, nil) > 0) {
519 		*lenp = d.length;
520 		*permp = d.mode & 0777;
521 	}
522 	return l;
523 }
524 
525 /*
526  * Process a single line of configuration,
527  * often of the form "cmd newname idev0 idev1".
528  * locking is tricky, because we need a write lock to
529  * add/remove devices yet adding/removing them may lead
530  * to calls to this driver that require a read lock (when
531  * inner devices are also provided by us).
532  */
533 static void
mconfig(char * a,long n)534 mconfig(char* a, long n)
535 {
536 	int	i;
537 	int	*iperm;
538 	vlong	size, start;
539 	vlong	*ilen;
540 	char	*tname, *dname, *fakef[4];
541 	Chan	**idev;
542 	Cmdbuf	*cb;
543 	Cmdtab	*ct;
544 	Fsdev	*mp;
545 	Inner	*inprv;
546 	Tree	*t;
547 
548 	/* ignore comments & empty lines */
549 	if (*a == '\0' || *a == '#' || *a == '\n')
550 		return;
551 
552 	dprint("mconfig\n");
553 	size = 0;
554 	start = 0;
555 	mp = nil;
556 	cb = nil;
557 	idev = nil;
558 	ilen = nil;
559 	iperm = nil;
560 
561 	if(waserror()){
562 		free(cb);
563 		nexterror();
564 	}
565 
566 	parseconfig(a, n, &cb, &ct);
567 	switch (ct->index) {
568 	case Fdisk:
569 		kstrdup(&disk, cb->f[0]);
570 		if(cb->nf >= 2)
571 			sectorsz = strtoul(cb->f[1], 0, 0);
572 		else
573 			sectorsz = Sectorsz;
574 		if(cb->nf == 3)
575 			kstrdup(&source, cb->f[2]);
576 		else{
577 			free(source);
578 			source = nil;
579 		}
580 		poperror();
581 		free(cb);
582 		return;
583 	case Fclear:
584 		poperror();
585 		free(cb);
586 		mdelctl("*", "*");		/* del everything */
587 		return;
588 	case Fpart:
589 		if(cb->nf == 3){
590 			/*
591 			 * got a request in the format of sd(3),
592 			 * pretend we got one in our format.
593 			 * later we change end to be len.
594 			 */
595 			fakef[0] = cb->f[0];
596 			fakef[1] = source;
597 			fakef[2] = cb->f[1];
598 			fakef[3] = cb->f[2];
599 			cb->f = fakef;
600 			cb->nf = 4;
601 		}
602 		start = strtoll(cb->f[2], nil, 10);
603 		size =  strtoll(cb->f[3], nil, 10);
604 		if(cb->f == fakef)
605 			size -= start;		/* it was end */
606 		cb->nf -= 2;
607 		break;
608 	}
609 	parsename(cb->f[0], disk, &tname, &dname);
610 	for(i = 1; i < cb->nf; i++)
611 		validname(cb->f[i], 1);
612 
613 	if(ct->index == Fdel){
614 		mdelctl(tname, dname);
615 		poperror();
616 		free(cb);
617 		return;
618 	}
619 
620 	/*
621 	 * Open all inner devices while we have only a read lock.
622 	 */
623 	poperror();
624 	rlock(&lck);
625 	if(waserror()){
626 		runlock(&lck);
627 Fail:
628 		for(i = 1; i < cb->nf; i++)
629 			if(idev != nil && idev[i-1] != nil)
630 				cclose(idev[i]);
631 		if(mp != nil)
632 			mdeldev(mp);
633 		free(idev);
634 		free(ilen);
635 		free(iperm);
636 		free(cb);
637 		nexterror();
638 	}
639 	/* record names, lengths and perms of all named files */
640 	idev = smalloc(sizeof(Chan*) * Ndevs);
641 	ilen = smalloc(sizeof(vlong) * Ndevs);
642 	iperm = smalloc(sizeof(int) * Ndevs);
643 	for(i = 1; i < cb->nf; i++){
644 		idev[i-1] = namec(cb->f[i], Aopen, ORDWR, 0);
645 		getattrs(idev[i-1], &ilen[i-1], &iperm[i-1]);
646 	}
647 	poperror();
648 	runlock(&lck);
649 
650 	/*
651 	 * Get a write lock and add the device if we can.
652 	 */
653 	wlock(&lck);
654 	if(waserror()){
655 		wunlock(&lck);
656 		goto Fail;
657 	}
658 
659 	t = lookuptree(tname);
660 	if(t != nil)
661 		validdevname(t, dname);
662 	else
663 		t = treealloc(tname);
664 	if(t == nil)
665 		error("no more trees");
666 	mp = devalloc(t, dname);
667 	if(mp == nil){
668 		if(t->ndevs == 0)	/* it was created for us */
669 			deltree(t);	/* but we will not mdeldev() */
670 		error(Enomem);
671 	}
672 
673 	/* construct mp from iname, idev and iperm arrays */
674 	mp->type = ct->index;
675 	if(mp->type == Fpart){
676 		mp->start = start * sectorsz;
677 		mp->size = size * sectorsz;
678 	}
679 	mp->perm = 0666;
680 	for(i = 1; i < cb->nf; i++){
681 		inprv = mp->inner[i-1] = mallocz(sizeof(Inner), 1);
682 		if(inprv == nil)
683 			error(Enomem);
684 		mp->ndevs++;
685 		kstrdup(&inprv->iname, cb->f[i]);
686 		inprv->idev = idev[i-1];
687 		idev[i-1] = nil;
688 		/* use the most restrictive of the inner permissions */
689 		mp->perm &= iperm[i-1];
690 	}
691 	setdsize(mp, ilen);
692 
693 	poperror();
694 	wunlock(&lck);
695 	free(idev);
696 	free(ilen);
697 	free(iperm);
698 	free(cb);
699 }
700 
701 static void
rdconf(void)702 rdconf(void)
703 {
704 	int mustrd;
705 	char *c, *e, *p, *s;
706 	Chan *cc;
707 	static int configed;
708 
709 	/* only read config file once */
710 	if (configed)
711 		return;
712 	configed = 1;
713 
714 	dprint("rdconf\n");
715 	/* add the std "fs" tree */
716 	trees[0] = &fstree;
717 	ntrees++;
718 	fstree.name = "fs";
719 
720 	/* identify the config file */
721 	s = getconf("fsconfig");
722 	if (s == nil){
723 		mustrd = 0;
724 		s = "/dev/sdC0/fscfg";
725 	} else
726 		mustrd = 1;
727 
728 	/* read it */
729 	cc = nil;
730 	c = nil;
731 	if (waserror()){
732 		if (cc != nil)
733 			cclose(cc);
734 		if (c)
735 			free(c);
736 		if (!mustrd)
737 			return;
738 		nexterror();
739 	}
740 	cc = namec(s, Aopen, OREAD, 0);
741 	devtab[cc->type]->read(cc, confstr, sizeof confstr, 0);
742 	cclose(cc);
743 	cc = nil;
744 
745 	/* validate, copy and erase config; mconfig will repopulate confstr */
746 	if (strncmp(confstr, cfgstr, sizeof cfgstr - 1) != 0)
747 		error("bad #k config, first line must be: 'fsdev:\\n'");
748 	kstrdup(&c, confstr + sizeof cfgstr - 1);
749 	memset(confstr, 0, sizeof confstr);
750 
751 	/* process config copy one line at a time */
752 	for (p = c; p != nil && *p != '\0'; p = e){
753 		e = strchr(p, '\n');
754 		if (e == nil)
755 			e = p + strlen(p);
756 		else
757 			e++;
758 		mconfig(p, e - p);
759 	}
760 	USED(cc);		/* until now, can be used in waserror clause */
761 	poperror();
762 }
763 
764 static int
mgen(Chan * c,char *,Dirtab *,int,int i,Dir * dp)765 mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
766 {
767 	int	treeno;
768 	Fsdev	*mp;
769 	Qid	qid;
770 	Tree	*t;
771 
772 	dprint("mgen %#ullx %d\n", c->qid.path, i);
773 	qid.type = QTDIR;
774 	qid.vers = 0;
775 	if(c->qid.path == Qtop){
776 		if(i == DEVDOTDOT){
777 			devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
778 			return 1;
779 		}
780 		t = gettree(i, Optional);
781 		if(t == nil){
782 			dprint("no\n");
783 			return -1;
784 		}
785 		qid.path = mkpath(i, Qdir);
786 		devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp);
787 		return 1;
788 	}
789 
790 	treeno = path2treeno(c->qid.path);
791 	t = gettree(treeno, Optional);
792 	if(t == nil){
793 		dprint("no\n");
794 		return -1;
795 	}
796 	if((c->qid.type & QTDIR) != 0){
797 		if(i == DEVDOTDOT){
798 			devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
799 			return 1;
800 		}
801 		if(treeno == 0){
802 			/* take care of #k/fs/ctl */
803 			if(i == 0){
804 				devdir(c, cqid, "ctl", 0, eve, 0664, dp);
805 				return 1;
806 			}
807 			i--;
808 		}
809 		mp = getdev(t, i, Optional);
810 		if(mp == nil){
811 			dprint("no\n");
812 			return -1;
813 		}
814 		qid.type = QTFILE;
815 		qid.vers = mp->vers;
816 		qid.path = mkpath(treeno, Qfirst+i);
817 		devdir(c, qid, mp->name, mp->size, eve, mp->perm, dp);
818 		return 1;
819 	}
820 
821 	if(i == DEVDOTDOT){
822 		qid.path = mkpath(treeno, Qdir);
823 		devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp);
824 		return 1;
825 	}
826 	dprint("no\n");
827 	return -1;
828 }
829 
830 static Chan*
mattach(char * spec)831 mattach(char *spec)
832 {
833 	dprint("mattach\n");
834 	return devattach(fsdevtab.dc, spec);
835 }
836 
837 static Walkqid*
mwalk(Chan * c,Chan * nc,char ** name,int nname)838 mwalk(Chan *c, Chan *nc, char **name, int nname)
839 {
840 	Walkqid *wq;
841 
842 	rdconf();
843 
844 	dprint("mwalk %llux\n", c->qid.path);
845 	rlock(&lck);
846 	if(waserror()){
847 		runlock(&lck);
848 		nexterror();
849 	}
850 	wq = devwalk(c, nc, name, nname, 0, 0, mgen);
851 	poperror();
852 	runlock(&lck);
853 	return wq;
854 }
855 
856 static int
mstat(Chan * c,uchar * db,int n)857 mstat(Chan *c, uchar *db, int n)
858 {
859 	int	p;
860 	Dir	d;
861 	Fsdev	*mp;
862 	Qid	q;
863 	Tree	*t;
864 
865 	dprint("mstat %llux\n", c->qid.path);
866 	rlock(&lck);
867 	if(waserror()){
868 		runlock(&lck);
869 		nexterror();
870 	}
871 	p = c->qid.path;
872 	memset(&d, 0, sizeof d);
873 	switch(p){
874 	case Qtop:
875 		devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d);
876 		break;
877 	case Qctl:
878 		devdir(c, cqid, "ctl", 0, eve, 0664, &d);
879 		break;
880 	default:
881 		t = gettree(path2treeno(p), Mustexist);
882 		if(c->qid.type & QTDIR)
883 			devdir(c, c->qid, t->name, 0, eve, DMDIR|0775, &d);
884 		else{
885 			mp = getdev(t, path2devno(p) - Qfirst, Mustexist);
886 			q = c->qid;
887 			q.vers = mp->vers;
888 			devdir(c, q, mp->name, mp->size, eve, mp->perm, &d);
889 		}
890 	}
891 	n = convD2M(&d, db, n);
892 	if (n == 0)
893 		error(Ebadarg);
894 	poperror();
895 	runlock(&lck);
896 	return n;
897 }
898 
899 static Chan*
mopen(Chan * c,int omode)900 mopen(Chan *c, int omode)
901 {
902 	int	q;
903 	Fsdev	*mp;
904 
905 	dprint("mopen %llux\n", c->qid.path);
906 	if((c->qid.type & QTDIR) && omode != OREAD)
907 		error(Eperm);
908 	if(c->qid.path != Qctl && (c->qid.type&QTDIR) == 0){
909 		rlock(&lck);
910 		if(waserror()){
911 			runlock(&lck);
912 			nexterror();
913 		}
914 		q = c->qid.path;
915 		mp = path2dev(q);
916 		if(mp->gone)
917 			error(Egone);
918 		devpermcheck(eve, mp->perm, omode);
919 		incref(mp);
920 		poperror();
921 		runlock(&lck);
922 	}
923 	/*
924 	 * Our mgen does not return the info for the qid
925 	 * but only for its children. Don't use devopen here.
926 	 */
927 	c->offset = 0;
928 	c->mode = openmode(omode & ~OTRUNC);
929 	c->flag |= COPEN;
930 	return c;
931 }
932 
933 static void
mclose(Chan * c)934 mclose(Chan *c)
935 {
936 	int	mustdel, q;
937 	Fsdev	*mp;
938 
939 	dprint("mclose %llux\n", c->qid.path);
940 	if(c->qid.type & QTDIR || !(c->flag & COPEN))
941 		return;
942 	rlock(&lck);
943 	if(waserror()){
944 		runlock(&lck);
945 		nexterror();
946 	}
947 	mustdel = 0;
948 	mp = nil;
949 	q = c->qid.path;
950 	if(q == Qctl){
951 		free(disk);
952 		disk = nil;	/* restore defaults */
953 		free(source);
954 		source = nil;
955 		sectorsz = Sectorsz;
956 	}else{
957 		mp = path2dev(q);
958 		if(mp->gone != 0 && mp->ref == 1)
959 			mustdel = 1;
960 		else
961 			decref(mp);
962 	}
963 	poperror();
964 	runlock(&lck);
965 	if(mustdel)
966 		mdeldev(mp);
967 }
968 
969 static long
io(Fsdev * mp,Inner * in,int isread,void * a,long l,vlong off)970 io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
971 {
972 	long wl;
973 	Chan	*mc;
974 
975 	mc = in->idev;
976 	if(mc == nil)
977 		error(Egone);
978 	if (waserror()) {
979 		print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
980 			in->iname, off, l, mp->name, (isread? "read": "write"),
981 			(up && up->errstr? up->errstr: ""));
982 		nexterror();
983 	}
984 	if (isread)
985 		wl = devtab[mc->type]->read(mc, a, l, off);
986 	else
987 		wl = devtab[mc->type]->write(mc, a, l, off);
988 	poperror();
989 	return wl;
990 }
991 
992 /* NB: a transfer could span multiple inner devices */
993 static long
catio(Fsdev * mp,int isread,void * a,long n,vlong off)994 catio(Fsdev *mp, int isread, void *a, long n, vlong off)
995 {
996 	int	i;
997 	long	l, res;
998 	Inner	*in;
999 
1000 	if(debug)
1001 		print("catio %d %p %ld %lld\n", isread, a, n, off);
1002 	res = n;
1003 	for (i = 0; n > 0 && i < mp->ndevs; i++){
1004 		in = mp->inner[i];
1005 		if (off >= in->isize){
1006 			off -= in->isize;
1007 			continue;		/* not there yet */
1008 		}
1009 		if (off + n > in->isize)
1010 			l = in->isize - off;
1011 		else
1012 			l = n;
1013 		if(debug)
1014 			print("\tdev %d %p %ld %lld\n", i, a, l, off);
1015 
1016 		if (io(mp, in, isread, a, l, off) != l)
1017 			error(Eio);
1018 
1019 		a = (char*)a + l;
1020 		off = 0;
1021 		n -= l;
1022 	}
1023 	if(debug)
1024 		print("\tres %ld\n", res - n);
1025 	return res - n;
1026 }
1027 
1028 static long
interio(Fsdev * mp,int isread,void * a,long n,vlong off)1029 interio(Fsdev *mp, int isread, void *a, long n, vlong off)
1030 {
1031 	int	i;
1032 	long	boff, res, l, wl, wsz;
1033 	vlong	woff, blk, mblk;
1034 
1035 	blk  = off / Blksize;
1036 	boff = off % Blksize;
1037 	wsz  = Blksize - boff;
1038 	res = n;
1039 	while(n > 0){
1040 		mblk = blk / mp->ndevs;
1041 		i    = blk % mp->ndevs;
1042 		woff = mblk*Blksize + boff;
1043 		if (n > wsz)
1044 			l = wsz;
1045 		else
1046 			l = n;
1047 
1048 		wl = io(mp, mp->inner[i], isread, a, l, woff);
1049 		if (wl != l)
1050 			error(Eio);
1051 
1052 		blk++;
1053 		boff = 0;
1054 		wsz = Blksize;
1055 		a = (char*)a + l;
1056 		n -= l;
1057 	}
1058 	return res;
1059 }
1060 
1061 static char*
seprintconf(char * s,char * e)1062 seprintconf(char *s, char *e)
1063 {
1064 	int	i, j;
1065 	Tree	*t;
1066 
1067 	*s = 0;
1068 	for(i = 0; i < ntrees; i++){
1069 		t = trees[i];
1070 		if(t != nil)
1071 			for(j = 0; j < t->nadevs; j++)
1072 				if(t->devs[j] != nil)
1073 					s = seprintdev(s, e, t->devs[j]);
1074 	}
1075 	return s;
1076 }
1077 
1078 static long
mread(Chan * c,void * a,long n,vlong off)1079 mread(Chan *c, void *a, long n, vlong off)
1080 {
1081 	int	i, retry;
1082 	long	l, res;
1083 	Fsdev	*mp;
1084 	Tree	*t;
1085 
1086 	dprint("mread %llux\n", c->qid.path);
1087 	rlock(&lck);
1088 	if(waserror()){
1089 		runlock(&lck);
1090 		nexterror();
1091 	}
1092 	res = -1;
1093 	if(c->qid.type & QTDIR){
1094 		res = devdirread(c, a, n, 0, 0, mgen);
1095 		goto Done;
1096 	}
1097 	if(c->qid.path == Qctl){
1098 		seprintconf(confstr, confstr + sizeof(confstr));
1099 		res = readstr((long)off, a, n, confstr);
1100 		goto Done;
1101 	}
1102 
1103 	t = gettree(path2treeno(c->qid.path), Mustexist);
1104 	mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
1105 
1106 	if(off >= mp->size){
1107 		res = 0;
1108 		goto Done;
1109 	}
1110 	if(off + n > mp->size)
1111 		n = mp->size - off;
1112 	if(n == 0){
1113 		res = 0;
1114 		goto Done;
1115 	}
1116 
1117 	switch(mp->type){
1118 	case Fcat:
1119 		res = catio(mp, Isread, a, n, off);
1120 		break;
1121 	case Finter:
1122 		res = interio(mp, Isread, a, n, off);
1123 		break;
1124 	case Fpart:
1125 		res = io(mp, mp->inner[0], Isread, a, n, mp->start + off);
1126 		break;
1127 	case Fmirror:
1128 		retry = 0;
1129 		do {
1130 			if (retry > 0) {
1131 				print("#k/%s: retry %d read for byte %,lld "
1132 					"count %ld: %s\n", mp->name, retry, off,
1133 					n, (up && up->errstr? up->errstr: ""));
1134 				/*
1135 				 * pause before retrying in case it's due to
1136 				 * a transient bus or controller problem.
1137 				 */
1138 				tsleep(&up->sleep, return0, 0, Retrypause);
1139 			}
1140 			for (i = 0; i < mp->ndevs; i++){
1141 				if (waserror())
1142 					continue;
1143 				l = io(mp, mp->inner[i], Isread, a, n, off);
1144 				poperror();
1145 				if (l >= 0){
1146 					res = l;
1147 					break;		/* read a good copy */
1148 				}
1149 			}
1150 		} while (i == mp->ndevs && ++retry <= Maxretries);
1151 		if (retry > Maxretries) {
1152 			/* no mirror had a good copy of the block */
1153 			print("#k/%s: byte %,lld count %ld: CAN'T READ "
1154 				"from mirror: %s\n", mp->name, off, n,
1155 				(up && up->errstr? up->errstr: ""));
1156 			error(Eio);
1157 		} else if (retry > 0)
1158 			print("#k/%s: byte %,lld count %ld: retry read OK "
1159 				"from mirror: %s\n", mp->name, off, n,
1160 				(up && up->errstr? up->errstr: ""));
1161 		break;
1162 	}
1163 Done:
1164 	poperror();
1165 	runlock(&lck);
1166 	return res;
1167 }
1168 
1169 static long
mwrite(Chan * c,void * a,long n,vlong off)1170 mwrite(Chan *c, void *a, long n, vlong off)
1171 {
1172 	int	i, allbad, anybad, retry;
1173 	long	l, res;
1174 	Fsdev	*mp;
1175 	Tree	*t;
1176 
1177 	dprint("mwrite %llux\n", c->qid.path);
1178 	if (c->qid.type & QTDIR)
1179 		error(Eisdir);
1180 	if (c->qid.path == Qctl){
1181 		mconfig(a, n);
1182 		return n;
1183 	}
1184 
1185 	rlock(&lck);
1186 	if(waserror()){
1187 		runlock(&lck);
1188 		nexterror();
1189 	}
1190 
1191 	t = gettree(path2treeno(c->qid.path), Mustexist);
1192 	mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
1193 
1194 	if(off >= mp->size){
1195 		res = 0;
1196 		goto Done;
1197 	}
1198 	if(off + n > mp->size)
1199 		n = mp->size - off;
1200 	if(n == 0){
1201 		res = 0;
1202 		goto Done;
1203 	}
1204 	res = n;
1205 	switch(mp->type){
1206 	case Fcat:
1207 		res = catio(mp, Iswrite, a, n, off);
1208 		break;
1209 	case Finter:
1210 		res = interio(mp, Iswrite, a, n, off);
1211 		break;
1212 	case Fpart:
1213 		res = io(mp, mp->inner[0], Iswrite, a, n, mp->start + off);
1214 		if (res != n)
1215 			error(Eio);
1216 		break;
1217 	case Fmirror:
1218 		retry = 0;
1219 		do {
1220 			if (retry > 0) {
1221 				print("#k/%s: retry %d write for byte %,lld "
1222 					"count %ld: %s\n", mp->name, retry, off,
1223 					n, (up && up->errstr? up->errstr: ""));
1224 				/*
1225 				 * pause before retrying in case it's due to
1226 				 * a transient bus or controller problem.
1227 				 */
1228 				tsleep(&up->sleep, return0, 0, Retrypause);
1229 			}
1230 			allbad = 1;
1231 			anybad = 0;
1232 			for (i = mp->ndevs - 1; i >= 0; i--){
1233 				if (waserror()) {
1234 					anybad = 1;
1235 					continue;
1236 				}
1237 				l = io(mp, mp->inner[i], Iswrite, a, n, off);
1238 				poperror();
1239 				if (l == n)
1240 					allbad = 0;	/* wrote a good copy */
1241 				else
1242 					anybad = 1;
1243 			}
1244 		} while (anybad && ++retry <= Maxretries);
1245 		if (allbad) {
1246 			/* no mirror took a good copy of the block */
1247 			print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
1248 				"to mirror: %s\n", mp->name, off, n,
1249 				(up && up->errstr? up->errstr: ""));
1250 			error(Eio);
1251 		} else if (retry > 0)
1252 			print("#k/%s: byte %,lld count %ld: retry wrote OK "
1253 				"to mirror: %s\n", mp->name, off, n,
1254 				(up && up->errstr? up->errstr: ""));
1255 
1256 		break;
1257 	}
1258 Done:
1259 	poperror();
1260 	runlock(&lck);
1261 	return res;
1262 }
1263 
1264 Dev fsdevtab = {
1265 	'k',
1266 	"fs",
1267 
1268 	devreset,
1269 	devinit,
1270 	devshutdown,
1271 	mattach,
1272 	mwalk,
1273 	mstat,
1274 	mopen,
1275 	devcreate,
1276 	mclose,
1277 	mread,
1278 	devbread,
1279 	mwrite,
1280 	devbwrite,
1281 	devremove,
1282 	devwstat,
1283 	devpower,
1284 	devconfig,
1285 };
1286