1 /*
2 * File system devices.
3 * Follows device config in Ken's file server.
4 * Builds mirrors, concatenations, interleavings, and partitions
5 * of devices out of other (inner) devices.
6 * It is ok if inner devices are provided by this driver.
7 *
8 * Built files are grouped on different directories
9 * (called trees, and used to represent disks).
10 * The "#k/fs" tree is always available and never goes away.
11 * Configuration changes happen only while no I/O is in progress.
12 *
13 * Default sector size is one byte unless changed by the "disk" ctl.
14 */
15
16 #include "u.h"
17 #include "../port/lib.h"
18 #include "mem.h"
19 #include "dat.h"
20 #include "fns.h"
21 #include "io.h"
22 #include "ureg.h"
23 #include "../port/error.h"
24
25 enum
26 {
27 Fnone,
28 Fmirror, /* mirror of others */
29 Fcat, /* catenation of others */
30 Finter, /* interleaving of others */
31 Fpart, /* part of other */
32 Fclear, /* start over */
33 Fdel, /* delete a configure device */
34 Fdisk, /* set default tree and sector sz*/
35
36 Sectorsz = 1,
37 Blksize = 8*1024, /* for Finter only */
38
39 Incr = 5, /* Increments for the dev array */
40
41 /*
42 * All qids are decorated with the tree number.
43 * #k/fs is tree number 0, is automatically added and
44 * its first qid is for the ctl file. It never goes away.
45 */
46 Qtop = 0, /* #k */
47 Qdir, /* directory (#k/fs) */
48 Qctl, /* ctl, only for #k/fs/ctl */
49 Qfirst, /* first qid assigned for device */
50
51 Iswrite = 0,
52 Isread,
53
54 Optional = 0,
55 Mustexist,
56
57 /* tunable parameters */
58 Maxconf = 4*1024, /* max length for config */
59 Ndevs = 32, /* max. inner devs per command */
60 Ntrees = 128, /* max. number of trees */
61 Maxretries = 3, /* max. retries of i/o errors */
62 Retrypause = 5000, /* ms. to pause between retries */
63 };
64
65 typedef struct Inner Inner;
66 typedef struct Fsdev Fsdev;
67 typedef struct Tree Tree;
68
69 struct Inner
70 {
71 char *iname; /* inner device name */
72 vlong isize; /* size of inner device */
73 Chan *idev; /* inner device */
74 };
75
76 struct Fsdev
77 {
78 Ref; /* one per Chan doing I/O */
79 int gone; /* true if removed */
80 int vers; /* qid version for this device */
81 int type; /* Fnone, Fmirror, ... */
82 char *name; /* name for this fsdev */
83 Tree* tree; /* where the device is kept */
84 vlong size; /* min(inner[X].isize) */
85 vlong start; /* start address (for Fpart) */
86 uint ndevs; /* number of inner devices */
87 int perm; /* minimum of inner device perms */
88 Inner *inner[Ndevs]; /* inner devices */
89 };
90
91 struct Tree
92 {
93 char *name; /* name for #k/<name> */
94 Fsdev **devs; /* devices in dir. */
95 uint ndevs; /* number of devices */
96 uint nadevs; /* number of allocated devices in devs */
97 };
98
99 #define dprint if(debug)print
100
101 extern Dev fsdevtab; /* forward */
102
103 static RWlock lck; /* r: use devices; w: change config */
104 static Tree fstree; /* The main "fs" tree. Never goes away */
105 static Tree *trees[Ntrees]; /* internal representation of config */
106 static int ntrees; /* max number of trees */
107 static int qidvers;
108
109 static char *disk; /* default tree name used */
110 static char *source; /* default inner device used */
111 static int sectorsz = Sectorsz; /* default sector size */
112
113 static char confstr[Maxconf]; /* textual configuration */
114
115 static int debug;
116
117 static char cfgstr[] = "fsdev:\n";
118
119 static Qid tqid = {Qtop, 0, QTDIR};
120 static Qid cqid = {Qctl, 0, 0};
121
122 static char* tnames[] = {
123 [Fmirror] "mirror",
124 [Fcat] "cat",
125 [Finter] "inter",
126 [Fpart] "part",
127 };
128
129 static Cmdtab configs[] = {
130 Fmirror,"mirror", 0,
131 Fcat, "cat", 0,
132 Finter, "inter", 0,
133 Fpart, "part", 0,
134 Fclear, "clear", 1,
135 Fdel, "del", 2,
136 Fdisk, "disk", 0,
137 };
138
139 static char Egone[] = "device is gone"; /* file has been removed */
140
141 static char*
seprintdev(char * s,char * e,Fsdev * mp)142 seprintdev(char *s, char *e, Fsdev *mp)
143 {
144 int i;
145
146 if(mp == nil)
147 return seprint(s, e, "<null Fsdev>");
148 if(mp->type < 0 || mp->type >= nelem(tnames) || tnames[mp->type] == nil)
149 return seprint(s, e, "bad device type %d\n", mp->type);
150
151 s = strecpy(s, e, tnames[mp->type]);
152 if(mp->tree != &fstree)
153 s = seprint(s, e, " %s/%s", mp->tree->name, mp->name);
154 else
155 s = seprint(s, e, " %s", mp->name);
156 for(i = 0; i < mp->ndevs; i++)
157 s = seprint(s, e, " %s", mp->inner[i]->iname);
158 switch(mp->type){
159 case Fmirror:
160 case Fcat:
161 case Finter:
162 s = strecpy(s, e, "\n");
163 break;
164 case Fpart:
165 s = seprint(s, e, " %ulld %ulld\n", mp->start, mp->size);
166 break;
167 default:
168 panic("#k: seprintdev bug");
169 }
170 return s;
171 }
172
173 static vlong
mkpath(int tree,int devno)174 mkpath(int tree, int devno)
175 {
176 return (tree&0xFFFF)<<16 | devno&0xFFFF;
177 }
178
179 static int
path2treeno(int q)180 path2treeno(int q)
181 {
182 return q>>16 & 0xFFFF;
183 }
184
185 static int
path2devno(int q)186 path2devno(int q)
187 {
188 return q & 0xFFFF;
189 }
190
191 static Tree*
gettree(int i,int mustexist)192 gettree(int i, int mustexist)
193 {
194 dprint("gettree %d\n", i);
195 if(i < 0)
196 panic("#k: bug: bad tree index %d in gettree", i);
197 if(i >= ntrees || trees[i] == nil)
198 if(mustexist)
199 error(Enonexist);
200 else
201 return nil;
202 return trees[i];
203 }
204
205 static Fsdev*
getdev(Tree * t,int i,int mustexist)206 getdev(Tree *t, int i, int mustexist)
207 {
208 dprint("getdev %d\n", i);
209 if(i < 0)
210 panic("#k: bug: bad dev index %d in getdev", i);
211 if(i >= t->nadevs || t->devs[i] == nil)
212 if(mustexist)
213 error(Enonexist);
214 else
215 return nil;
216 return t->devs[i];
217 }
218
219 static Fsdev*
path2dev(int q)220 path2dev(int q)
221 {
222 Tree *t;
223
224 dprint("path2dev %ux\n", q);
225 t = gettree(path2treeno(q), Mustexist);
226 return getdev(t, path2devno(q) - Qfirst, Mustexist);
227 }
228
229 static Tree*
treealloc(char * name)230 treealloc(char *name)
231 {
232 int i;
233 Tree *t;
234
235 dprint("treealloc %s\n", name);
236 for(i = 0; i < nelem(trees); i++)
237 if(trees[i] == nil)
238 break;
239 if(i == nelem(trees))
240 return nil;
241 t = trees[i] = mallocz(sizeof(Tree), 1);
242 if(t == nil)
243 return nil;
244 if(i == ntrees)
245 ntrees++;
246 kstrdup(&t->name, name);
247 return t;
248 }
249
250 static Tree*
lookuptree(char * name)251 lookuptree(char *name)
252 {
253 int i;
254
255 dprint("lookuptree %s\n", name);
256 for(i = 0; i < ntrees; i++)
257 if(trees[i] != nil && strcmp(trees[i]->name, name) == 0)
258 return trees[i];
259 return nil;
260 }
261
262 static Fsdev*
devalloc(Tree * t,char * name)263 devalloc(Tree *t, char *name)
264 {
265 int i, ndevs;
266 Fsdev *mp, **devs;
267
268 dprint("devalloc %s %s\n", t->name, name);
269 mp = mallocz(sizeof(Fsdev), 1);
270 if(mp == nil)
271 return nil;
272 for(i = 0; i < t->nadevs; i++)
273 if(t->devs[i] == nil)
274 break;
275 if(i >= t->nadevs){
276 if(t->nadevs % Incr == 0){
277 ndevs = t->nadevs + Incr;
278 devs = realloc(t->devs, ndevs * sizeof(Fsdev*));
279 if(devs == nil){
280 free(mp);
281 return nil;
282 }
283 t->devs = devs;
284 }
285 t->devs[t->nadevs] = nil;
286 t->nadevs++;
287 }
288 kstrdup(&mp->name, name);
289 mp->vers = ++qidvers;
290 mp->tree = t;
291 t->devs[i] = mp;
292 t->ndevs++;
293 return mp;
294 }
295
296 static void
deltree(Tree * t)297 deltree(Tree *t)
298 {
299 int i;
300
301 dprint("deltree %s\n", t->name);
302 for(i = 0; i < ntrees; i++)
303 if(trees[i] == t){
304 if(i > 0){ /* "fs" never goes away */
305 free(t->name);
306 free(t->devs);
307 free(t);
308 trees[i] = nil;
309 }
310 return;
311 }
312 panic("#k: deltree: bug: tree not found");
313 }
314
315 /*
316 * A device is gone and we know that all its users are gone.
317 * A tree is gone when all its devices are gone ("fs" is never gone).
318 * Must close devices outside locks, so we could nest our own devices.
319 */
320 static void
mdeldev(Fsdev * mp)321 mdeldev(Fsdev *mp)
322 {
323 int i;
324 Inner *in;
325 Tree *t;
326
327 dprint("deldev %s gone %d ref %uld\n", mp->name, mp->gone, mp->ref);
328
329 mp->gone = 1;
330 mp->vers = ++qidvers;
331
332 wlock(&lck);
333 t = mp->tree;
334 for(i = 0; i < t->nadevs; i++)
335 if(t->devs[i] == mp){
336 t->devs[i] = nil;
337 t->ndevs--;
338 if(t->ndevs == 0)
339 deltree(t);
340 break;
341 }
342 wunlock(&lck);
343
344 free(mp->name);
345 for(i = 0; i < mp->ndevs; i++){
346 in = mp->inner[i];
347 if(in->idev != nil)
348 cclose(in->idev);
349 free(in->iname);
350 free(in);
351 }
352 if(debug)
353 memset(mp, 9, sizeof *mp); /* poison */
354 free(mp);
355 }
356
357 /*
358 * Delete one or all devices in one or all trees.
359 */
360 static void
mdelctl(char * tname,char * dname)361 mdelctl(char *tname, char *dname)
362 {
363 int i, alldevs, alltrees, some;
364 Fsdev *mp;
365 Tree *t;
366
367 dprint("delctl %s\n", dname);
368 alldevs = strcmp(dname, "*") == 0;
369 alltrees = strcmp(tname, "*") == 0;
370 some = 0;
371 Again:
372 wlock(&lck);
373 for(i = 0; i < ntrees; i++){
374 t = trees[i];
375 if(t == nil)
376 continue;
377 if(alltrees == 0 && strcmp(t->name, tname) != 0)
378 continue;
379 for(i = 0; i < t->nadevs; i++){
380 mp = t->devs[i];
381 if(t->devs[i] == nil)
382 continue;
383 if(alldevs == 0 && strcmp(mp->name, dname) != 0)
384 continue;
385 /*
386 * Careful: must close outside locks and that
387 * may change the file tree we are looking at.
388 */
389 some++;
390 mp->gone = 1;
391 if(mp->ref == 0){
392 incref(mp); /* keep it there */
393 wunlock(&lck);
394 mdeldev(mp);
395 goto Again; /* tree can change */
396 }
397 }
398 }
399 wunlock(&lck);
400 if(some == 0 && alltrees == 0)
401 error(Enonexist);
402 }
403
404 static void
setdsize(Fsdev * mp,vlong * ilen)405 setdsize(Fsdev* mp, vlong *ilen)
406 {
407 int i;
408 vlong inlen;
409 Inner *in;
410
411 dprint("setdsize %s\n", mp->name);
412 for (i = 0; i < mp->ndevs; i++){
413 in = mp->inner[i];
414 in->isize = ilen[i];
415 inlen = in->isize;
416 switch(mp->type){
417 case Finter:
418 /* truncate to multiple of Blksize */
419 inlen &= ~(Blksize-1);
420 in->isize = inlen;
421 /* fall through */
422 case Fmirror:
423 /* use size of smallest inner device */
424 if (mp->size == 0 || mp->size > inlen)
425 mp->size = inlen;
426 break;
427 case Fcat:
428 mp->size += inlen;
429 break;
430 case Fpart:
431 if(mp->start > inlen)
432 error("partition starts after device end");
433 if(inlen < mp->start + mp->size){
434 print("#k: %s: partition truncated from "
435 "%lld to %lld bytes\n", mp->name,
436 mp->size, inlen - mp->start);
437 mp->size = inlen - mp->start;
438 }
439 break;
440 }
441 }
442 if(mp->type == Finter)
443 mp->size *= mp->ndevs;
444 }
445
446 static void
validdevname(Tree * t,char * dname)447 validdevname(Tree *t, char *dname)
448 {
449 int i;
450
451 for(i = 0; i < t->nadevs; i++)
452 if(t->devs[i] != nil && strcmp(t->devs[i]->name, dname) == 0)
453 error(Eexist);
454 }
455
456 static void
parseconfig(char * a,long n,Cmdbuf ** cbp,Cmdtab ** ctp)457 parseconfig(char *a, long n, Cmdbuf **cbp, Cmdtab **ctp)
458 {
459 Cmdbuf *cb;
460 Cmdtab *ct;
461
462 *cbp = cb = parsecmd(a, n);
463 *ctp = ct = lookupcmd(cb, configs, nelem(configs));
464
465 cb->f++; /* skip command */
466 cb->nf--;
467 switch(ct->index){
468 case Fmirror:
469 case Fcat:
470 case Finter:
471 if(cb->nf < 2)
472 error("too few arguments for ctl");
473 if(cb->nf - 1 > Ndevs)
474 error("too many devices in ctl");
475 break;
476 case Fdisk:
477 if(cb->nf < 1 || cb->nf > 3)
478 error("ctl usage: disk name [sz dev]");
479 break;
480 case Fpart:
481 if(cb->nf != 4 && (cb->nf != 3 || source == nil))
482 error("ctl usage: part new [file] off len");
483 break;
484 }
485 }
486
487 static void
parsename(char * name,char * disk,char ** tree,char ** dev)488 parsename(char *name, char *disk, char **tree, char **dev)
489 {
490 char *slash;
491
492 slash = strchr(name, '/');
493 if(slash == nil){
494 if(disk != nil)
495 *tree = disk;
496 else
497 *tree = "fs";
498 *dev = name;
499 }else{
500 *tree = name;
501 *slash++ = 0;
502 *dev = slash;
503 }
504 validname(*tree, 0);
505 validname(*dev, 0);
506 }
507
508 static int
getattrs(Chan * c,vlong * lenp,int * permp)509 getattrs(Chan *c, vlong *lenp, int *permp)
510 {
511 uchar buf[128]; /* old DIRLEN plus a little should be plenty */
512 Dir d;
513 long l;
514
515 *lenp = 0;
516 *permp = 0;
517 l = devtab[c->type]->stat(c, buf, sizeof buf);
518 if (l >= 0 && convM2D(buf, l, &d, nil) > 0) {
519 *lenp = d.length;
520 *permp = d.mode & 0777;
521 }
522 return l;
523 }
524
525 /*
526 * Process a single line of configuration,
527 * often of the form "cmd newname idev0 idev1".
528 * locking is tricky, because we need a write lock to
529 * add/remove devices yet adding/removing them may lead
530 * to calls to this driver that require a read lock (when
531 * inner devices are also provided by us).
532 */
533 static void
mconfig(char * a,long n)534 mconfig(char* a, long n)
535 {
536 int i;
537 int *iperm;
538 vlong size, start;
539 vlong *ilen;
540 char *tname, *dname, *fakef[4];
541 Chan **idev;
542 Cmdbuf *cb;
543 Cmdtab *ct;
544 Fsdev *mp;
545 Inner *inprv;
546 Tree *t;
547
548 /* ignore comments & empty lines */
549 if (*a == '\0' || *a == '#' || *a == '\n')
550 return;
551
552 dprint("mconfig\n");
553 size = 0;
554 start = 0;
555 mp = nil;
556 cb = nil;
557 idev = nil;
558 ilen = nil;
559 iperm = nil;
560
561 if(waserror()){
562 free(cb);
563 nexterror();
564 }
565
566 parseconfig(a, n, &cb, &ct);
567 switch (ct->index) {
568 case Fdisk:
569 kstrdup(&disk, cb->f[0]);
570 if(cb->nf >= 2)
571 sectorsz = strtoul(cb->f[1], 0, 0);
572 else
573 sectorsz = Sectorsz;
574 if(cb->nf == 3)
575 kstrdup(&source, cb->f[2]);
576 else{
577 free(source);
578 source = nil;
579 }
580 poperror();
581 free(cb);
582 return;
583 case Fclear:
584 poperror();
585 free(cb);
586 mdelctl("*", "*"); /* del everything */
587 return;
588 case Fpart:
589 if(cb->nf == 3){
590 /*
591 * got a request in the format of sd(3),
592 * pretend we got one in our format.
593 * later we change end to be len.
594 */
595 fakef[0] = cb->f[0];
596 fakef[1] = source;
597 fakef[2] = cb->f[1];
598 fakef[3] = cb->f[2];
599 cb->f = fakef;
600 cb->nf = 4;
601 }
602 start = strtoll(cb->f[2], nil, 10);
603 size = strtoll(cb->f[3], nil, 10);
604 if(cb->f == fakef)
605 size -= start; /* it was end */
606 cb->nf -= 2;
607 break;
608 }
609 parsename(cb->f[0], disk, &tname, &dname);
610 for(i = 1; i < cb->nf; i++)
611 validname(cb->f[i], 1);
612
613 if(ct->index == Fdel){
614 mdelctl(tname, dname);
615 poperror();
616 free(cb);
617 return;
618 }
619
620 /*
621 * Open all inner devices while we have only a read lock.
622 */
623 poperror();
624 rlock(&lck);
625 if(waserror()){
626 runlock(&lck);
627 Fail:
628 for(i = 1; i < cb->nf; i++)
629 if(idev != nil && idev[i-1] != nil)
630 cclose(idev[i]);
631 if(mp != nil)
632 mdeldev(mp);
633 free(idev);
634 free(ilen);
635 free(iperm);
636 free(cb);
637 nexterror();
638 }
639 /* record names, lengths and perms of all named files */
640 idev = smalloc(sizeof(Chan*) * Ndevs);
641 ilen = smalloc(sizeof(vlong) * Ndevs);
642 iperm = smalloc(sizeof(int) * Ndevs);
643 for(i = 1; i < cb->nf; i++){
644 idev[i-1] = namec(cb->f[i], Aopen, ORDWR, 0);
645 getattrs(idev[i-1], &ilen[i-1], &iperm[i-1]);
646 }
647 poperror();
648 runlock(&lck);
649
650 /*
651 * Get a write lock and add the device if we can.
652 */
653 wlock(&lck);
654 if(waserror()){
655 wunlock(&lck);
656 goto Fail;
657 }
658
659 t = lookuptree(tname);
660 if(t != nil)
661 validdevname(t, dname);
662 else
663 t = treealloc(tname);
664 if(t == nil)
665 error("no more trees");
666 mp = devalloc(t, dname);
667 if(mp == nil){
668 if(t->ndevs == 0) /* it was created for us */
669 deltree(t); /* but we will not mdeldev() */
670 error(Enomem);
671 }
672
673 /* construct mp from iname, idev and iperm arrays */
674 mp->type = ct->index;
675 if(mp->type == Fpart){
676 mp->start = start * sectorsz;
677 mp->size = size * sectorsz;
678 }
679 mp->perm = 0666;
680 for(i = 1; i < cb->nf; i++){
681 inprv = mp->inner[i-1] = mallocz(sizeof(Inner), 1);
682 if(inprv == nil)
683 error(Enomem);
684 mp->ndevs++;
685 kstrdup(&inprv->iname, cb->f[i]);
686 inprv->idev = idev[i-1];
687 idev[i-1] = nil;
688 /* use the most restrictive of the inner permissions */
689 mp->perm &= iperm[i-1];
690 }
691 setdsize(mp, ilen);
692
693 poperror();
694 wunlock(&lck);
695 free(idev);
696 free(ilen);
697 free(iperm);
698 free(cb);
699 }
700
701 static void
rdconf(void)702 rdconf(void)
703 {
704 int mustrd;
705 char *c, *e, *p, *s;
706 Chan *cc;
707 static int configed;
708
709 /* only read config file once */
710 if (configed)
711 return;
712 configed = 1;
713
714 dprint("rdconf\n");
715 /* add the std "fs" tree */
716 trees[0] = &fstree;
717 ntrees++;
718 fstree.name = "fs";
719
720 /* identify the config file */
721 s = getconf("fsconfig");
722 if (s == nil){
723 mustrd = 0;
724 s = "/dev/sdC0/fscfg";
725 } else
726 mustrd = 1;
727
728 /* read it */
729 cc = nil;
730 c = nil;
731 if (waserror()){
732 if (cc != nil)
733 cclose(cc);
734 if (c)
735 free(c);
736 if (!mustrd)
737 return;
738 nexterror();
739 }
740 cc = namec(s, Aopen, OREAD, 0);
741 devtab[cc->type]->read(cc, confstr, sizeof confstr, 0);
742 cclose(cc);
743 cc = nil;
744
745 /* validate, copy and erase config; mconfig will repopulate confstr */
746 if (strncmp(confstr, cfgstr, sizeof cfgstr - 1) != 0)
747 error("bad #k config, first line must be: 'fsdev:\\n'");
748 kstrdup(&c, confstr + sizeof cfgstr - 1);
749 memset(confstr, 0, sizeof confstr);
750
751 /* process config copy one line at a time */
752 for (p = c; p != nil && *p != '\0'; p = e){
753 e = strchr(p, '\n');
754 if (e == nil)
755 e = p + strlen(p);
756 else
757 e++;
758 mconfig(p, e - p);
759 }
760 USED(cc); /* until now, can be used in waserror clause */
761 poperror();
762 }
763
764 static int
mgen(Chan * c,char *,Dirtab *,int,int i,Dir * dp)765 mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
766 {
767 int treeno;
768 Fsdev *mp;
769 Qid qid;
770 Tree *t;
771
772 dprint("mgen %#ullx %d\n", c->qid.path, i);
773 qid.type = QTDIR;
774 qid.vers = 0;
775 if(c->qid.path == Qtop){
776 if(i == DEVDOTDOT){
777 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
778 return 1;
779 }
780 t = gettree(i, Optional);
781 if(t == nil){
782 dprint("no\n");
783 return -1;
784 }
785 qid.path = mkpath(i, Qdir);
786 devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp);
787 return 1;
788 }
789
790 treeno = path2treeno(c->qid.path);
791 t = gettree(treeno, Optional);
792 if(t == nil){
793 dprint("no\n");
794 return -1;
795 }
796 if((c->qid.type & QTDIR) != 0){
797 if(i == DEVDOTDOT){
798 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
799 return 1;
800 }
801 if(treeno == 0){
802 /* take care of #k/fs/ctl */
803 if(i == 0){
804 devdir(c, cqid, "ctl", 0, eve, 0664, dp);
805 return 1;
806 }
807 i--;
808 }
809 mp = getdev(t, i, Optional);
810 if(mp == nil){
811 dprint("no\n");
812 return -1;
813 }
814 qid.type = QTFILE;
815 qid.vers = mp->vers;
816 qid.path = mkpath(treeno, Qfirst+i);
817 devdir(c, qid, mp->name, mp->size, eve, mp->perm, dp);
818 return 1;
819 }
820
821 if(i == DEVDOTDOT){
822 qid.path = mkpath(treeno, Qdir);
823 devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp);
824 return 1;
825 }
826 dprint("no\n");
827 return -1;
828 }
829
830 static Chan*
mattach(char * spec)831 mattach(char *spec)
832 {
833 dprint("mattach\n");
834 return devattach(fsdevtab.dc, spec);
835 }
836
837 static Walkqid*
mwalk(Chan * c,Chan * nc,char ** name,int nname)838 mwalk(Chan *c, Chan *nc, char **name, int nname)
839 {
840 Walkqid *wq;
841
842 rdconf();
843
844 dprint("mwalk %llux\n", c->qid.path);
845 rlock(&lck);
846 if(waserror()){
847 runlock(&lck);
848 nexterror();
849 }
850 wq = devwalk(c, nc, name, nname, 0, 0, mgen);
851 poperror();
852 runlock(&lck);
853 return wq;
854 }
855
856 static int
mstat(Chan * c,uchar * db,int n)857 mstat(Chan *c, uchar *db, int n)
858 {
859 int p;
860 Dir d;
861 Fsdev *mp;
862 Qid q;
863 Tree *t;
864
865 dprint("mstat %llux\n", c->qid.path);
866 rlock(&lck);
867 if(waserror()){
868 runlock(&lck);
869 nexterror();
870 }
871 p = c->qid.path;
872 memset(&d, 0, sizeof d);
873 switch(p){
874 case Qtop:
875 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d);
876 break;
877 case Qctl:
878 devdir(c, cqid, "ctl", 0, eve, 0664, &d);
879 break;
880 default:
881 t = gettree(path2treeno(p), Mustexist);
882 if(c->qid.type & QTDIR)
883 devdir(c, c->qid, t->name, 0, eve, DMDIR|0775, &d);
884 else{
885 mp = getdev(t, path2devno(p) - Qfirst, Mustexist);
886 q = c->qid;
887 q.vers = mp->vers;
888 devdir(c, q, mp->name, mp->size, eve, mp->perm, &d);
889 }
890 }
891 n = convD2M(&d, db, n);
892 if (n == 0)
893 error(Ebadarg);
894 poperror();
895 runlock(&lck);
896 return n;
897 }
898
899 static Chan*
mopen(Chan * c,int omode)900 mopen(Chan *c, int omode)
901 {
902 int q;
903 Fsdev *mp;
904
905 dprint("mopen %llux\n", c->qid.path);
906 if((c->qid.type & QTDIR) && omode != OREAD)
907 error(Eperm);
908 if(c->qid.path != Qctl && (c->qid.type&QTDIR) == 0){
909 rlock(&lck);
910 if(waserror()){
911 runlock(&lck);
912 nexterror();
913 }
914 q = c->qid.path;
915 mp = path2dev(q);
916 if(mp->gone)
917 error(Egone);
918 devpermcheck(eve, mp->perm, omode);
919 incref(mp);
920 poperror();
921 runlock(&lck);
922 }
923 /*
924 * Our mgen does not return the info for the qid
925 * but only for its children. Don't use devopen here.
926 */
927 c->offset = 0;
928 c->mode = openmode(omode & ~OTRUNC);
929 c->flag |= COPEN;
930 return c;
931 }
932
933 static void
mclose(Chan * c)934 mclose(Chan *c)
935 {
936 int mustdel, q;
937 Fsdev *mp;
938
939 dprint("mclose %llux\n", c->qid.path);
940 if(c->qid.type & QTDIR || !(c->flag & COPEN))
941 return;
942 rlock(&lck);
943 if(waserror()){
944 runlock(&lck);
945 nexterror();
946 }
947 mustdel = 0;
948 mp = nil;
949 q = c->qid.path;
950 if(q == Qctl){
951 free(disk);
952 disk = nil; /* restore defaults */
953 free(source);
954 source = nil;
955 sectorsz = Sectorsz;
956 }else{
957 mp = path2dev(q);
958 if(mp->gone != 0 && mp->ref == 1)
959 mustdel = 1;
960 else
961 decref(mp);
962 }
963 poperror();
964 runlock(&lck);
965 if(mustdel)
966 mdeldev(mp);
967 }
968
969 static long
io(Fsdev * mp,Inner * in,int isread,void * a,long l,vlong off)970 io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
971 {
972 long wl;
973 Chan *mc;
974
975 mc = in->idev;
976 if(mc == nil)
977 error(Egone);
978 if (waserror()) {
979 print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
980 in->iname, off, l, mp->name, (isread? "read": "write"),
981 (up && up->errstr? up->errstr: ""));
982 nexterror();
983 }
984 if (isread)
985 wl = devtab[mc->type]->read(mc, a, l, off);
986 else
987 wl = devtab[mc->type]->write(mc, a, l, off);
988 poperror();
989 return wl;
990 }
991
992 /* NB: a transfer could span multiple inner devices */
993 static long
catio(Fsdev * mp,int isread,void * a,long n,vlong off)994 catio(Fsdev *mp, int isread, void *a, long n, vlong off)
995 {
996 int i;
997 long l, res;
998 Inner *in;
999
1000 if(debug)
1001 print("catio %d %p %ld %lld\n", isread, a, n, off);
1002 res = n;
1003 for (i = 0; n > 0 && i < mp->ndevs; i++){
1004 in = mp->inner[i];
1005 if (off >= in->isize){
1006 off -= in->isize;
1007 continue; /* not there yet */
1008 }
1009 if (off + n > in->isize)
1010 l = in->isize - off;
1011 else
1012 l = n;
1013 if(debug)
1014 print("\tdev %d %p %ld %lld\n", i, a, l, off);
1015
1016 if (io(mp, in, isread, a, l, off) != l)
1017 error(Eio);
1018
1019 a = (char*)a + l;
1020 off = 0;
1021 n -= l;
1022 }
1023 if(debug)
1024 print("\tres %ld\n", res - n);
1025 return res - n;
1026 }
1027
1028 static long
interio(Fsdev * mp,int isread,void * a,long n,vlong off)1029 interio(Fsdev *mp, int isread, void *a, long n, vlong off)
1030 {
1031 int i;
1032 long boff, res, l, wl, wsz;
1033 vlong woff, blk, mblk;
1034
1035 blk = off / Blksize;
1036 boff = off % Blksize;
1037 wsz = Blksize - boff;
1038 res = n;
1039 while(n > 0){
1040 mblk = blk / mp->ndevs;
1041 i = blk % mp->ndevs;
1042 woff = mblk*Blksize + boff;
1043 if (n > wsz)
1044 l = wsz;
1045 else
1046 l = n;
1047
1048 wl = io(mp, mp->inner[i], isread, a, l, woff);
1049 if (wl != l)
1050 error(Eio);
1051
1052 blk++;
1053 boff = 0;
1054 wsz = Blksize;
1055 a = (char*)a + l;
1056 n -= l;
1057 }
1058 return res;
1059 }
1060
1061 static char*
seprintconf(char * s,char * e)1062 seprintconf(char *s, char *e)
1063 {
1064 int i, j;
1065 Tree *t;
1066
1067 *s = 0;
1068 for(i = 0; i < ntrees; i++){
1069 t = trees[i];
1070 if(t != nil)
1071 for(j = 0; j < t->nadevs; j++)
1072 if(t->devs[j] != nil)
1073 s = seprintdev(s, e, t->devs[j]);
1074 }
1075 return s;
1076 }
1077
1078 static long
mread(Chan * c,void * a,long n,vlong off)1079 mread(Chan *c, void *a, long n, vlong off)
1080 {
1081 int i, retry;
1082 long l, res;
1083 Fsdev *mp;
1084 Tree *t;
1085
1086 dprint("mread %llux\n", c->qid.path);
1087 rlock(&lck);
1088 if(waserror()){
1089 runlock(&lck);
1090 nexterror();
1091 }
1092 res = -1;
1093 if(c->qid.type & QTDIR){
1094 res = devdirread(c, a, n, 0, 0, mgen);
1095 goto Done;
1096 }
1097 if(c->qid.path == Qctl){
1098 seprintconf(confstr, confstr + sizeof(confstr));
1099 res = readstr((long)off, a, n, confstr);
1100 goto Done;
1101 }
1102
1103 t = gettree(path2treeno(c->qid.path), Mustexist);
1104 mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
1105
1106 if(off >= mp->size){
1107 res = 0;
1108 goto Done;
1109 }
1110 if(off + n > mp->size)
1111 n = mp->size - off;
1112 if(n == 0){
1113 res = 0;
1114 goto Done;
1115 }
1116
1117 switch(mp->type){
1118 case Fcat:
1119 res = catio(mp, Isread, a, n, off);
1120 break;
1121 case Finter:
1122 res = interio(mp, Isread, a, n, off);
1123 break;
1124 case Fpart:
1125 res = io(mp, mp->inner[0], Isread, a, n, mp->start + off);
1126 break;
1127 case Fmirror:
1128 retry = 0;
1129 do {
1130 if (retry > 0) {
1131 print("#k/%s: retry %d read for byte %,lld "
1132 "count %ld: %s\n", mp->name, retry, off,
1133 n, (up && up->errstr? up->errstr: ""));
1134 /*
1135 * pause before retrying in case it's due to
1136 * a transient bus or controller problem.
1137 */
1138 tsleep(&up->sleep, return0, 0, Retrypause);
1139 }
1140 for (i = 0; i < mp->ndevs; i++){
1141 if (waserror())
1142 continue;
1143 l = io(mp, mp->inner[i], Isread, a, n, off);
1144 poperror();
1145 if (l >= 0){
1146 res = l;
1147 break; /* read a good copy */
1148 }
1149 }
1150 } while (i == mp->ndevs && ++retry <= Maxretries);
1151 if (retry > Maxretries) {
1152 /* no mirror had a good copy of the block */
1153 print("#k/%s: byte %,lld count %ld: CAN'T READ "
1154 "from mirror: %s\n", mp->name, off, n,
1155 (up && up->errstr? up->errstr: ""));
1156 error(Eio);
1157 } else if (retry > 0)
1158 print("#k/%s: byte %,lld count %ld: retry read OK "
1159 "from mirror: %s\n", mp->name, off, n,
1160 (up && up->errstr? up->errstr: ""));
1161 break;
1162 }
1163 Done:
1164 poperror();
1165 runlock(&lck);
1166 return res;
1167 }
1168
1169 static long
mwrite(Chan * c,void * a,long n,vlong off)1170 mwrite(Chan *c, void *a, long n, vlong off)
1171 {
1172 int i, allbad, anybad, retry;
1173 long l, res;
1174 Fsdev *mp;
1175 Tree *t;
1176
1177 dprint("mwrite %llux\n", c->qid.path);
1178 if (c->qid.type & QTDIR)
1179 error(Eisdir);
1180 if (c->qid.path == Qctl){
1181 mconfig(a, n);
1182 return n;
1183 }
1184
1185 rlock(&lck);
1186 if(waserror()){
1187 runlock(&lck);
1188 nexterror();
1189 }
1190
1191 t = gettree(path2treeno(c->qid.path), Mustexist);
1192 mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
1193
1194 if(off >= mp->size){
1195 res = 0;
1196 goto Done;
1197 }
1198 if(off + n > mp->size)
1199 n = mp->size - off;
1200 if(n == 0){
1201 res = 0;
1202 goto Done;
1203 }
1204 res = n;
1205 switch(mp->type){
1206 case Fcat:
1207 res = catio(mp, Iswrite, a, n, off);
1208 break;
1209 case Finter:
1210 res = interio(mp, Iswrite, a, n, off);
1211 break;
1212 case Fpart:
1213 res = io(mp, mp->inner[0], Iswrite, a, n, mp->start + off);
1214 if (res != n)
1215 error(Eio);
1216 break;
1217 case Fmirror:
1218 retry = 0;
1219 do {
1220 if (retry > 0) {
1221 print("#k/%s: retry %d write for byte %,lld "
1222 "count %ld: %s\n", mp->name, retry, off,
1223 n, (up && up->errstr? up->errstr: ""));
1224 /*
1225 * pause before retrying in case it's due to
1226 * a transient bus or controller problem.
1227 */
1228 tsleep(&up->sleep, return0, 0, Retrypause);
1229 }
1230 allbad = 1;
1231 anybad = 0;
1232 for (i = mp->ndevs - 1; i >= 0; i--){
1233 if (waserror()) {
1234 anybad = 1;
1235 continue;
1236 }
1237 l = io(mp, mp->inner[i], Iswrite, a, n, off);
1238 poperror();
1239 if (l == n)
1240 allbad = 0; /* wrote a good copy */
1241 else
1242 anybad = 1;
1243 }
1244 } while (anybad && ++retry <= Maxretries);
1245 if (allbad) {
1246 /* no mirror took a good copy of the block */
1247 print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
1248 "to mirror: %s\n", mp->name, off, n,
1249 (up && up->errstr? up->errstr: ""));
1250 error(Eio);
1251 } else if (retry > 0)
1252 print("#k/%s: byte %,lld count %ld: retry wrote OK "
1253 "to mirror: %s\n", mp->name, off, n,
1254 (up && up->errstr? up->errstr: ""));
1255
1256 break;
1257 }
1258 Done:
1259 poperror();
1260 runlock(&lck);
1261 return res;
1262 }
1263
1264 Dev fsdevtab = {
1265 'k',
1266 "fs",
1267
1268 devreset,
1269 devinit,
1270 devshutdown,
1271 mattach,
1272 mwalk,
1273 mstat,
1274 mopen,
1275 devcreate,
1276 mclose,
1277 mread,
1278 devbread,
1279 mwrite,
1280 devbwrite,
1281 devremove,
1282 devwstat,
1283 devpower,
1284 devconfig,
1285 };
1286