xref: /plan9/sys/src/9/port/devfs.c (revision 850dd0ca1bdf035e1410f3ad77ab360045f4571d)
1 /*
2  * File system devices.
3  * Follows device config in Ken's file server.
4  * Builds mirrors, concatenations, interleavings, and partitions
5  * of devices out of other (inner) devices.
6  */
7 
8 #include "u.h"
9 #include "../port/lib.h"
10 #include "mem.h"
11 #include "dat.h"
12 #include "fns.h"
13 #include "io.h"
14 #include "ureg.h"
15 #include "../port/error.h"
16 
17 enum {
18 	Fmirror,		/* mirror of others */
19 	Fcat,			/* catenation of others */
20 	Finter,			/* interleaving of others */
21 	Fpart,			/* part of others */
22 	Fclear,			/* start over */
23 
24 	Blksize	= 8*1024,	/* for Finter only */
25 
26 	Qtop	= 0,		/* top dir (contains "fs") */
27 	Qdir,			/* actual dir */
28 	Qctl,			/* ctl file */
29 	Qfirst,			/* first fs file */
30 
31 	Iswrite = 0,
32 	Isread,
33 
34 	/* tunable parameters */
35 	Maxconf	= 4*1024,	/* max length for config */
36 	Ndevs	= 32,		/* max. inner devs per command */
37 	Nfsdevs = 128,		/* max. created devs, total */
38 };
39 
40 #define	Cfgstr	"fsdev:\n"
41 
42 typedef struct Inner Inner;
43 struct Inner
44 {
45 	char	*iname;		/* inner device name */
46 	vlong	isize;		/* size of inner device */
47 	Chan	*idev;		/* inner device */
48 };
49 
50 typedef struct Fsdev Fsdev;
51 struct Fsdev
52 {
53 	int	type;
54 	char	*name;		/* name for this fsdev */
55 	vlong	size;		/* min(inner[X].isize) */
56 	vlong	start;		/* start address (for Fpart) */
57 	int	ndevs;		/* number of inner devices */
58 	Inner	inner[Ndevs];
59 };
60 
61 extern Dev fsdevtab;		/* forward */
62 
63 /*
64  * Once configured, a fsdev is never removed.  The name of those
65  * configured is never nil.  We have no locks here.
66  */
67 static Fsdev	fsdev[Nfsdevs];
68 
69 static Qid	tqid = {Qtop, 0, QTDIR};
70 static Qid	dqid = {Qdir, 0, QTDIR};
71 static Qid	cqid = {Qctl, 0, 0};
72 
73 static Cmdtab configs[] = {
74 	Fmirror,"mirror",	0,
75 	Fcat,	"cat",		0,
76 	Finter,	"inter",	0,
77 	Fpart,	"part",		5,
78 	Fclear,	"clear",	1,
79 };
80 
81 static char	confstr[Maxconf];
82 static int	configed;
83 
84 
85 static Fsdev*
86 path2dev(int i, int mustexist)
87 {
88 	if (i < 0 || i >= nelem(fsdev))
89 		error("bug: bad index in devfsdev");
90 	if (mustexist && fsdev[i].name == nil)
91 		error(Enonexist);
92 
93 	if (fsdev[i].name == nil)
94 		return nil;
95 	else
96 		return &fsdev[i];
97 }
98 
99 static Fsdev*
100 devalloc(void)
101 {
102 	int	i;
103 
104 	for (i = 0; i < nelem(fsdev); i++)
105 		if (fsdev[i].name == nil)
106 			break;
107 	if (i == nelem(fsdev))
108 		error(Enodev);
109 
110 	return &fsdev[i];
111 }
112 
113 static void
114 setdsize(Fsdev* mp)
115 {
116 	int	i;
117 	long	l;
118 	uchar	buf[128];	/* old DIRLEN plus a little should be plenty */
119 	Dir	d;
120 	Inner	*in;
121 
122 	if (mp->type != Fpart){
123 		mp->start= 0;
124 		mp->size = 0;
125 	}
126 	for (i = 0; i < mp->ndevs; i++){
127 		in = &mp->inner[i];
128 		l = devtab[in->idev->type]->stat(in->idev, buf, sizeof buf);
129 		convM2D(buf, l, &d, nil);
130 		in->isize = d.length;
131 		switch(mp->type){
132 		case Fmirror:
133 			if (mp->size == 0 || mp->size > d.length)
134 				mp->size = d.length;
135 			break;
136 		case Fcat:
137 			mp->size += d.length;
138 			break;
139 		case Finter:
140 			/* truncate to multiple of Blksize */
141 			d.length &= ~(Blksize-1);
142 			in->isize = d.length;
143 			mp->size += d.length;
144 			break;
145 		case Fpart:
146 			/* should raise errors here? */
147 			if (mp->start > d.length)
148 				mp->start = d.length;
149 			if (d.length < mp->start + mp->size)
150 				mp->size = d.length - mp->start;
151 			break;
152 		}
153 	}
154 }
155 
156 static void
157 mpshut(Fsdev *mp)
158 {
159 	int	i;
160 	char	*nm;
161 
162 	nm = mp->name;
163 	mp->name = nil;		/* prevent others from using this. */
164 	if (nm)
165 		free(nm);
166 	for (i = 0; i < mp->ndevs; i++){
167 		if (mp->inner[i].idev != nil)
168 			cclose(mp->inner[i].idev);
169 		if (mp->inner[i].iname)
170 			free(mp->inner[i].iname);
171 	}
172 	memset(mp, 0, sizeof *mp);
173 }
174 
175 
176 static void
177 mconfig(char* a, long n)	/* "name idev0 idev1" */
178 {
179 	int	i;
180 	vlong	size, start;
181 	char	*c, *oldc;
182 	Cmdbuf	*cb;
183 	Cmdtab	*ct;
184 	Fsdev	*mp;
185 	Inner	*inprv;
186 	static	QLock	lck;
187 
188 	size = 0;
189 	start = 0;
190 	if (confstr[0] == 0)
191 		seprint(confstr, confstr + sizeof confstr, Cfgstr);
192 	mp = nil;
193 	cb = nil;
194 	oldc = confstr + strlen(confstr);
195 	if (*a == '\0' || *a == '#' || *a == '\n')
196 		return;
197 
198 	qlock(&lck);
199 	if (waserror()){
200 		*oldc = 0;
201 		if (mp != nil)
202 			mpshut(mp);
203 		qunlock(&lck);
204 		if (cb)
205 			free(cb);
206 		nexterror();
207 	}
208 
209 	cb = parsecmd(a, n);
210 	c = oldc;
211 	for (i = 0; i < cb->nf; i++)
212 		c = seprint(c, confstr + sizeof confstr, "%s ", cb->f[i]);
213 	if (c > confstr)
214 		c[-1] = '\n';
215 	ct = lookupcmd(cb, configs, nelem(configs));
216 	cb->f++;			/* skip command */
217 	cb->nf--;
218 	if (cb->nf < 0)			/* nothing to see here, move along */
219 		ct->index = -1;
220 	switch (ct->index) {
221 	case Fpart:
222 		if (cb->nf < 4)
223 			error("too few fields in fs config");
224 		start = strtoll(cb->f[2], nil, 10);
225 		size =  strtoll(cb->f[3], nil, 10);
226 		cb->nf -= 2;
227 		break;
228 	case Fclear:
229 		for (mp = fsdev; mp < fsdev + nelem(fsdev); mp++)
230 			mpshut(mp);
231 		*confstr = '\0';
232 		/* FALL THROUGH */
233 	case -1:
234 		poperror();
235 		qunlock(&lck);
236 		free(cb);
237 		return;
238 	}
239 	if (cb->nf < 2)
240 		error("too few fields in fs config");
241 
242 	/* reject name if already in use */
243 	for (i = 0; i < nelem(fsdev); i++)
244 		if (fsdev[i].name != nil && strcmp(fsdev[i].name, cb->f[0])==0)
245 			error(Eexist);
246 
247 	if (cb->nf - 1 > Ndevs)
248 		error("too many devices; fix #k: increase Ndevs");
249 	for (i = 0; i < cb->nf; i++)
250 		validname(cb->f[i], (i != 0));
251 
252 	mp = devalloc();
253 	mp->type = ct->index;
254 	if (mp->type == Fpart){
255 		mp->start = start;
256 		mp->size = size;
257 	}
258 	kstrdup(&mp->name, cb->f[0]);
259 	for (i = 1; i < cb->nf; i++){
260 		inprv = &mp->inner[i-1];
261 		kstrdup(&inprv->iname, cb->f[i]);
262 		inprv->idev = namec(inprv->iname, Aopen, ORDWR, 0);
263 		if (inprv->idev == nil) {
264 			free(mp->name);
265 			mp->name = nil;		/* free mp */
266 			error(Egreg);
267 		}
268 		mp->ndevs++;
269 	}
270 	setdsize(mp);
271 	configed = 1;
272 
273 	poperror();
274 	qunlock(&lck);
275 	free(cb);
276 }
277 
278 static void
279 rdconf(void)
280 {
281 	int mustrd;
282 	char *c, *e, *p, *s;
283 	Chan *cc;
284 	Chan **ccp;
285 
286 	s = getconf("fsconfig");
287 	if (s == nil){
288 		mustrd = 0;
289 		s = "/dev/sdC0/fscfg";
290 	} else
291 		mustrd = 1;
292 	ccp = &cc;
293 	*ccp = nil;
294 	c = nil;
295 	if (waserror()){
296 		configed = 1;
297 		if (*ccp != nil)
298 			cclose(*ccp);
299 		if (c)
300 			free(c);
301 		if (!mustrd)
302 			return;
303 		nexterror();
304 	}
305 	*ccp = namec(s, Aopen, OREAD, 0);
306 	devtab[(*ccp)->type]->read(*ccp, confstr, sizeof confstr, 0);
307 	cclose(*ccp);
308 	*ccp = nil;
309 	if (strncmp(confstr, Cfgstr, strlen(Cfgstr)) != 0)
310 		error("bad #k config, first line must be: 'fsdev:\\n'");
311 	kstrdup(&c, confstr + strlen(Cfgstr));
312 	memset(confstr, 0, sizeof confstr);
313 	for (p = c; p != nil && *p != 0; p = e){
314 		e = strchr(p, '\n');
315 		if (e == nil)
316 			e = p + strlen(p);
317 		if (e == p) {
318 			e++;
319 			continue;
320 		}
321 		mconfig(p, e - p);
322 	}
323 	poperror();
324 }
325 
326 
327 static int
328 mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
329 {
330 	Qid	qid;
331 	Fsdev	*mp;
332 
333 	if (c->qid.path == Qtop)
334 		switch(i){
335 		case DEVDOTDOT:
336 			devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
337 			return 1;
338 		case 0:
339 			devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
340 			return 1;
341 		default:
342 			return -1;
343 		}
344 	if (c->qid.path != Qdir)
345 		switch(i){
346 		case DEVDOTDOT:
347 			devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp);
348 			return 1;
349 		default:
350 			return -1;
351 		}
352 	switch(i){
353 	case DEVDOTDOT:
354 		devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
355 		return 1;
356 	case 0:
357 		devdir(c, cqid, "ctl", 0, eve, 0664, dp);
358 		return 1;
359 	}
360 	i--;			/* for ctl */
361 	qid.path = Qfirst + i;
362 	qid.vers = 0;
363 	qid.type = 0;
364 	mp = path2dev(i, 0);
365 	if (mp == nil)
366 		return -1;
367 	kstrcpy(up->genbuf, mp->name, sizeof(up->genbuf));
368 	devdir(c, qid, up->genbuf, mp->size, eve, 0664, dp);
369 	return 1;
370 }
371 
372 static Chan*
373 mattach(char *spec)
374 {
375 	return devattach(fsdevtab.dc, spec);
376 }
377 
378 static Walkqid*
379 mwalk(Chan *c, Chan *nc, char **name, int nname)
380 {
381 	if (!configed)
382 		rdconf();
383 	return devwalk(c, nc, name, nname, 0, 0, mgen);
384 }
385 
386 static int
387 mstat(Chan *c, uchar *db, int n)
388 {
389 	Dir	d;
390 	Fsdev	*mp;
391 	int	p;
392 
393 	p = c->qid.path;
394 	memset(&d, 0, sizeof d);
395 	switch(p){
396 	case Qtop:
397 		devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d);
398 		break;
399 	case Qdir:
400 		devdir(c, dqid, "fs", 0, eve, DMDIR|0775, &d);
401 		break;
402 	case Qctl:
403 		devdir(c, cqid, "ctl", 0, eve, 0664, &d);
404 		break;
405 	default:
406 		mp = path2dev(p - Qfirst, 1);
407 		devdir(c, c->qid, mp->name, mp->size, eve, 0664, &d);
408 	}
409 	n = convD2M(&d, db, n);
410 	if (n == 0)
411 		error(Ebadarg);
412 	return n;
413 }
414 
415 static Chan*
416 mopen(Chan *c, int omode)
417 {
418 //	TODO: call devopen()?
419 	if((c->qid.type & QTDIR) && omode != OREAD)
420 		error(Eperm);
421 //	if (c->flag & COPEN)
422 //		return c;
423 	c->mode = openmode(omode & ~OTRUNC);
424 	c->flag |= COPEN;
425 	c->offset = 0;
426 	return c;
427 }
428 
429 static void
430 mclose(Chan*)
431 {
432 	/* that's easy */
433 }
434 
435 
436 static long
437 io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
438 {
439 	long wl;
440 	Chan *mc = in->idev;
441 
442 	if (waserror()) {
443 		print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
444 			in->iname, off, l, mp->name, (isread? "read": "write"),
445 			(up && up->errstr? up->errstr: ""));
446 		nexterror();
447 	}
448 	if (isread) {
449 		wl = devtab[mc->type]->read(mc, a, l, off);
450 		if (wl != l)
451 			error("#k: short read");
452 	} else {
453 		wl = devtab[mc->type]->write(mc, a, l, off);
454 		if (wl != l)
455 			error("#k: write error");
456 	}
457 	poperror();
458 	return wl;
459 }
460 
461 static long
462 catio(Fsdev *mp, int isread, void *a, long n, vlong off)
463 {
464 	int	i;
465 	long	l, wl, res;
466 	Inner	*in;
467 
468 	// print("catio %d %p %ld %lld\n", isread, a, n, off);
469 	res = n;
470 	for (i = 0; n >= 0 && i < mp->ndevs ; i++){
471 		in = &mp->inner[i];
472 		if (off > in->isize){
473 			off -= in->isize;
474 			continue;		/* not there yet */
475 		}
476 		if (off + n > in->isize)
477 			l = in->isize - off;
478 		else
479 			l = n;
480 		// print("\tdev %d %p %ld %lld\n", i, a, l, off);
481 
482 		wl = io(mp, in, isread, a, l, off);
483 		assert(wl == l);
484 
485 		a = (char*)a + l;
486 		off = 0;
487 		n -= l;
488 	}
489 	// print("\tres %ld\n", res - n);
490 	return res - n;
491 }
492 
493 static long
494 interio(Fsdev *mp, int isread, void *a, long n, vlong off)
495 {
496 	int	i;
497 	long	boff, res, l, wl, wsz;
498 	vlong	woff, blk, mblk;
499 	Inner	*in;
500 
501 	blk  = off / Blksize;
502 	boff = off % Blksize;
503 	wsz  = Blksize - boff;
504 	res = n;
505 	while(n > 0){
506 		mblk = blk / mp->ndevs;
507 		i    = blk % mp->ndevs;
508 		woff = mblk*Blksize + boff;
509 		if (n > wsz)
510 			l = wsz;
511 		else
512 			l = n;
513 
514 		in = &mp->inner[i];
515 		wl = io(mp, in, isread, a, l, woff);
516 		if (wl != l || l == 0)
517 			error(Eio);
518 
519 		a = (char*)a + l;
520 		n -= l;
521 		blk++;
522 		boff = 0;
523 		wsz = Blksize;
524 	}
525 	return res;
526 }
527 
528 static long
529 mread(Chan *c, void *a, long n, vlong off)
530 {
531 	int	i, retry;
532 	long	l, res;
533 	Fsdev	*mp;
534 	Inner	*in;
535 
536 	if (c->qid.type & QTDIR)
537 		return devdirread(c, a, n, 0, 0, mgen);
538 	if (c->qid.path == Qctl)
539 		return readstr((long)off, a, n, confstr + strlen(Cfgstr));
540 	i = c->qid.path - Qfirst;
541 	mp = path2dev(i, 1);
542 
543 	if (off >= mp->size)
544 		return 0;
545 	if (off + n > mp->size)
546 		n = mp->size - off;
547 	if (n == 0)
548 		return 0;
549 
550 	res = -1;
551 	switch(mp->type){
552 	case Fcat:
553 		res = catio(mp, Isread, a, n, off);
554 		break;
555 	case Finter:
556 		res = interio(mp, Isread, a, n, off);
557 		break;
558 	case Fpart:
559 		in = &mp->inner[0];
560 		res = io(mp, in, Isread, a, n, mp->start + off);
561 		assert(res == n);
562 		break;
563 	case Fmirror:
564 		retry = 0;
565 		do {
566 			if (retry > 0) {
567 				print("#k/%s: retry %d read for byte %,lld "
568 					"count %ld: %s\n", mp->name, retry, off,
569 					n, (up && up->errstr? up->errstr: ""));
570 				tsleep(&up->sleep, return0, 0, 2000);
571 			}
572 			for (i = 0; i < mp->ndevs; i++){
573 				if (waserror())
574 					continue;
575 				in = &mp->inner[i];
576 				l = io(mp, in, Isread, a, n, off);
577 				poperror();
578 				if (l >= 0){
579 					res = l;
580 					break;		/* read a good copy */
581 				}
582 			}
583 		} while (i == mp->ndevs && ++retry < 2);
584 		if (i == mp->ndevs) {
585 			/* no mirror had a good copy of the block */
586 			print("#k/%s: byte %,lld count %ld: CAN'T READ "
587 				"from mirror: %s\n", mp->name, off, n,
588 				(up && up->errstr? up->errstr: ""));
589 			error(Eio);
590 		} else if (retry > 0)
591 			print("#k/%s: byte %,lld count %ld: retry read OK "
592 				"from mirror: %s\n", mp->name, off, n,
593 				(up && up->errstr? up->errstr: ""));
594 		break;
595 	}
596 	return res;
597 }
598 
599 static long
600 mwrite(Chan *c, void *a, long n, vlong off)
601 {
602 	int	i, allbad, anybad, retry;
603 	long	l, res;
604 	Fsdev	*mp;
605 	Inner	*in;
606 
607 	if (c->qid.type & QTDIR)
608 		error(Eperm);
609 	if (c->qid.path == Qctl){
610 		mconfig(a, n);
611 		return n;
612 	}
613 	mp = path2dev(c->qid.path - Qfirst, 1);
614 
615 	if (off >= mp->size)
616 		return 0;
617 	if (off + n > mp->size)
618 		n = mp->size - off;
619 	if (n == 0)
620 		return 0;
621 	res = n;
622 	switch(mp->type){
623 	case Fcat:
624 		res = catio(mp, Iswrite, a, n, off);
625 		break;
626 	case Finter:
627 		res = interio(mp, Iswrite, a, n, off);
628 		break;
629 	case Fpart:
630 		in = &mp->inner[0];
631 		res = io(mp, in, Iswrite, a, n, mp->start + off);
632 		if (res > n)
633 			res = n;
634 		break;
635 	case Fmirror:
636 		retry = 0;
637 		do {
638 			if (retry > 0) {
639 				print("#k/%s: retry %d write for byte %,lld "
640 					"count %ld: %s\n", mp->name, retry, off,
641 					n, (up && up->errstr? up->errstr: ""));
642 				tsleep(&up->sleep, return0, 0, 2000);
643 			}
644 			allbad = 1;
645 			anybad = 0;
646 			for (i = mp->ndevs - 1; i >= 0; i--){
647 				if (waserror()) {
648 					anybad = 1;
649 					continue;
650 				}
651 				in = &mp->inner[i];
652 				l = io(mp, in, Iswrite, a, n, off);
653 				poperror();
654 				if (res > l)
655 					res = l;	/* shortest OK write */
656 				if (l == n)
657 					allbad = 0;	/* wrote a good copy */
658 				else
659 					anybad = 1;
660 			}
661 		} while (anybad && ++retry < 2);
662 		if (allbad) {
663 			/* no mirror took a good copy of the block */
664 			print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
665 				"to mirror: %s\n", mp->name, off, n,
666 				(up && up->errstr? up->errstr: ""));
667 			error(Eio);
668 		} else if (retry > 0)
669 			print("#k/%s: byte %,lld count %ld: retry wrote OK "
670 				"to mirror: %s\n", mp->name, off, n,
671 				(up && up->errstr? up->errstr: ""));
672 
673 		break;
674 	}
675 	return res;
676 }
677 
678 Dev fsdevtab = {
679 	'k',
680 	"devfs",
681 
682 	devreset,
683 	devinit,
684 	devshutdown,
685 	mattach,
686 	mwalk,
687 	mstat,
688 	mopen,
689 	devcreate,
690 	mclose,
691 	mread,
692 	devbread,
693 	mwrite,
694 	devbwrite,
695 	devremove,
696 	devwstat,
697 	devpower,
698 	devconfig,
699 };
700