1 /* 2 * File system devices. 3 * Follows device config in Ken's file server. 4 * Builds mirrors, concatenations, interleavings, and partitions 5 * of devices out of other (inner) devices. 6 */ 7 8 #include "u.h" 9 #include "../port/lib.h" 10 #include "mem.h" 11 #include "dat.h" 12 #include "fns.h" 13 #include "io.h" 14 #include "ureg.h" 15 #include "../port/error.h" 16 17 enum { 18 Fmirror, /* mirror of others */ 19 Fcat, /* catenation of others */ 20 Finter, /* interleaving of others */ 21 Fpart, /* part of others */ 22 Fclear, /* start over */ 23 24 Blksize = 8*1024, /* for Finter only */ 25 26 Qtop = 0, /* top dir (contains "fs") */ 27 Qdir, /* actual dir */ 28 Qctl, /* ctl file */ 29 Qfirst, /* first fs file */ 30 31 Iswrite = 0, 32 Isread, 33 34 /* tunable parameters */ 35 Maxconf = 4*1024, /* max length for config */ 36 Ndevs = 32, /* max. inner devs per command */ 37 Nfsdevs = 128, /* max. created devs, total */ 38 }; 39 40 #define Cfgstr "fsdev:\n" 41 42 typedef struct Inner Inner; 43 struct Inner 44 { 45 char *iname; /* inner device name */ 46 vlong isize; /* size of inner device */ 47 Chan *idev; /* inner device */ 48 }; 49 50 typedef struct Fsdev Fsdev; 51 struct Fsdev 52 { 53 int type; 54 char *name; /* name for this fsdev */ 55 vlong size; /* min(inner[X].isize) */ 56 vlong start; /* start address (for Fpart) */ 57 int ndevs; /* number of inner devices */ 58 Inner inner[Ndevs]; 59 }; 60 61 extern Dev fsdevtab; /* forward */ 62 63 /* 64 * Once configured, a fsdev is never removed. The name of those 65 * configured is never nil. We have no locks here. 66 */ 67 static Fsdev fsdev[Nfsdevs]; 68 69 static Qid tqid = {Qtop, 0, QTDIR}; 70 static Qid dqid = {Qdir, 0, QTDIR}; 71 static Qid cqid = {Qctl, 0, 0}; 72 73 static Cmdtab configs[] = { 74 Fmirror,"mirror", 0, 75 Fcat, "cat", 0, 76 Finter, "inter", 0, 77 Fpart, "part", 5, 78 Fclear, "clear", 1, 79 }; 80 81 static char confstr[Maxconf]; 82 static int configed; 83 84 85 static Fsdev* 86 path2dev(int i, int mustexist) 87 { 88 if (i < 0 || i >= nelem(fsdev)) 89 error("bug: bad index in devfsdev"); 90 if (mustexist && fsdev[i].name == nil) 91 error(Enonexist); 92 93 if (fsdev[i].name == nil) 94 return nil; 95 else 96 return &fsdev[i]; 97 } 98 99 static Fsdev* 100 devalloc(void) 101 { 102 int i; 103 104 for (i = 0; i < nelem(fsdev); i++) 105 if (fsdev[i].name == nil) 106 break; 107 if (i == nelem(fsdev)) 108 error(Enodev); 109 110 return &fsdev[i]; 111 } 112 113 static void 114 setdsize(Fsdev* mp) 115 { 116 int i; 117 long l; 118 uchar buf[128]; /* old DIRLEN plus a little should be plenty */ 119 Dir d; 120 Inner *in; 121 122 if (mp->type != Fpart){ 123 mp->start= 0; 124 mp->size = 0; 125 } 126 for (i = 0; i < mp->ndevs; i++){ 127 in = &mp->inner[i]; 128 l = devtab[in->idev->type]->stat(in->idev, buf, sizeof buf); 129 convM2D(buf, l, &d, nil); 130 in->isize = d.length; 131 switch(mp->type){ 132 case Fmirror: 133 if (mp->size == 0 || mp->size > d.length) 134 mp->size = d.length; 135 break; 136 case Fcat: 137 mp->size += d.length; 138 break; 139 case Finter: 140 /* truncate to multiple of Blksize */ 141 d.length &= ~(Blksize-1); 142 in->isize = d.length; 143 mp->size += d.length; 144 break; 145 case Fpart: 146 /* should raise errors here? */ 147 if (mp->start > d.length) 148 mp->start = d.length; 149 if (d.length < mp->start + mp->size) 150 mp->size = d.length - mp->start; 151 break; 152 } 153 } 154 } 155 156 static void 157 mpshut(Fsdev *mp) 158 { 159 int i; 160 char *nm; 161 162 nm = mp->name; 163 mp->name = nil; /* prevent others from using this. */ 164 if (nm) 165 free(nm); 166 for (i = 0; i < mp->ndevs; i++){ 167 if (mp->inner[i].idev != nil) 168 cclose(mp->inner[i].idev); 169 if (mp->inner[i].iname) 170 free(mp->inner[i].iname); 171 } 172 memset(mp, 0, sizeof *mp); 173 } 174 175 176 static void 177 mconfig(char* a, long n) /* "name idev0 idev1" */ 178 { 179 int i; 180 vlong size, start; 181 char *c, *oldc; 182 Cmdbuf *cb; 183 Cmdtab *ct; 184 Fsdev *mp; 185 Inner *inprv; 186 static QLock lck; 187 188 size = 0; 189 start = 0; 190 if (confstr[0] == 0) 191 seprint(confstr, confstr + sizeof confstr, Cfgstr); 192 mp = nil; 193 cb = nil; 194 oldc = confstr + strlen(confstr); 195 if (*a == '\0' || *a == '#' || *a == '\n') 196 return; 197 198 qlock(&lck); 199 if (waserror()){ 200 *oldc = 0; 201 if (mp != nil) 202 mpshut(mp); 203 qunlock(&lck); 204 if (cb) 205 free(cb); 206 nexterror(); 207 } 208 209 cb = parsecmd(a, n); 210 c = oldc; 211 for (i = 0; i < cb->nf; i++) 212 c = seprint(c, confstr + sizeof confstr, "%s ", cb->f[i]); 213 if (c > confstr) 214 c[-1] = '\n'; 215 ct = lookupcmd(cb, configs, nelem(configs)); 216 cb->f++; /* skip command */ 217 cb->nf--; 218 if (cb->nf < 0) /* nothing to see here, move along */ 219 ct->index = -1; 220 switch (ct->index) { 221 case Fpart: 222 if (cb->nf < 4) 223 error("too few fields in fs config"); 224 start = strtoll(cb->f[2], nil, 10); 225 size = strtoll(cb->f[3], nil, 10); 226 cb->nf -= 2; 227 break; 228 case Fclear: 229 for (mp = fsdev; mp < fsdev + nelem(fsdev); mp++) 230 mpshut(mp); 231 *confstr = '\0'; 232 /* FALL THROUGH */ 233 case -1: 234 poperror(); 235 qunlock(&lck); 236 free(cb); 237 return; 238 } 239 if (cb->nf < 2) 240 error("too few fields in fs config"); 241 242 /* reject name if already in use */ 243 for (i = 0; i < nelem(fsdev); i++) 244 if (fsdev[i].name != nil && strcmp(fsdev[i].name, cb->f[0])==0) 245 error(Eexist); 246 247 if (cb->nf - 1 > Ndevs) 248 error("too many devices; fix #k: increase Ndevs"); 249 for (i = 0; i < cb->nf; i++) 250 validname(cb->f[i], (i != 0)); 251 252 mp = devalloc(); 253 mp->type = ct->index; 254 if (mp->type == Fpart){ 255 mp->start = start; 256 mp->size = size; 257 } 258 kstrdup(&mp->name, cb->f[0]); 259 for (i = 1; i < cb->nf; i++){ 260 inprv = &mp->inner[i-1]; 261 kstrdup(&inprv->iname, cb->f[i]); 262 inprv->idev = namec(inprv->iname, Aopen, ORDWR, 0); 263 if (inprv->idev == nil) { 264 free(mp->name); 265 mp->name = nil; /* free mp */ 266 error(Egreg); 267 } 268 mp->ndevs++; 269 } 270 setdsize(mp); 271 configed = 1; 272 273 poperror(); 274 qunlock(&lck); 275 free(cb); 276 } 277 278 static void 279 rdconf(void) 280 { 281 int mustrd; 282 char *c, *e, *p, *s; 283 Chan *cc; 284 Chan **ccp; 285 286 s = getconf("fsconfig"); 287 if (s == nil){ 288 mustrd = 0; 289 s = "/dev/sdC0/fscfg"; 290 } else 291 mustrd = 1; 292 ccp = &cc; 293 *ccp = nil; 294 c = nil; 295 if (waserror()){ 296 configed = 1; 297 if (*ccp != nil) 298 cclose(*ccp); 299 if (c) 300 free(c); 301 if (!mustrd) 302 return; 303 nexterror(); 304 } 305 *ccp = namec(s, Aopen, OREAD, 0); 306 devtab[(*ccp)->type]->read(*ccp, confstr, sizeof confstr, 0); 307 cclose(*ccp); 308 *ccp = nil; 309 if (strncmp(confstr, Cfgstr, strlen(Cfgstr)) != 0) 310 error("bad #k config, first line must be: 'fsdev:\\n'"); 311 kstrdup(&c, confstr + strlen(Cfgstr)); 312 memset(confstr, 0, sizeof confstr); 313 for (p = c; p != nil && *p != 0; p = e){ 314 e = strchr(p, '\n'); 315 if (e == nil) 316 e = p + strlen(p); 317 if (e == p) { 318 e++; 319 continue; 320 } 321 mconfig(p, e - p); 322 } 323 poperror(); 324 } 325 326 327 static int 328 mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp) 329 { 330 Qid qid; 331 Fsdev *mp; 332 333 if (c->qid.path == Qtop) 334 switch(i){ 335 case DEVDOTDOT: 336 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp); 337 return 1; 338 case 0: 339 devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp); 340 return 1; 341 default: 342 return -1; 343 } 344 if (c->qid.path != Qdir) 345 switch(i){ 346 case DEVDOTDOT: 347 devdir(c, dqid, "fs", 0, eve, DMDIR|0775, dp); 348 return 1; 349 default: 350 return -1; 351 } 352 switch(i){ 353 case DEVDOTDOT: 354 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp); 355 return 1; 356 case 0: 357 devdir(c, cqid, "ctl", 0, eve, 0664, dp); 358 return 1; 359 } 360 i--; /* for ctl */ 361 qid.path = Qfirst + i; 362 qid.vers = 0; 363 qid.type = 0; 364 mp = path2dev(i, 0); 365 if (mp == nil) 366 return -1; 367 kstrcpy(up->genbuf, mp->name, sizeof(up->genbuf)); 368 devdir(c, qid, up->genbuf, mp->size, eve, 0664, dp); 369 return 1; 370 } 371 372 static Chan* 373 mattach(char *spec) 374 { 375 return devattach(fsdevtab.dc, spec); 376 } 377 378 static Walkqid* 379 mwalk(Chan *c, Chan *nc, char **name, int nname) 380 { 381 if (!configed) 382 rdconf(); 383 return devwalk(c, nc, name, nname, 0, 0, mgen); 384 } 385 386 static int 387 mstat(Chan *c, uchar *db, int n) 388 { 389 Dir d; 390 Fsdev *mp; 391 int p; 392 393 p = c->qid.path; 394 memset(&d, 0, sizeof d); 395 switch(p){ 396 case Qtop: 397 devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d); 398 break; 399 case Qdir: 400 devdir(c, dqid, "fs", 0, eve, DMDIR|0775, &d); 401 break; 402 case Qctl: 403 devdir(c, cqid, "ctl", 0, eve, 0664, &d); 404 break; 405 default: 406 mp = path2dev(p - Qfirst, 1); 407 devdir(c, c->qid, mp->name, mp->size, eve, 0664, &d); 408 } 409 n = convD2M(&d, db, n); 410 if (n == 0) 411 error(Ebadarg); 412 return n; 413 } 414 415 static Chan* 416 mopen(Chan *c, int omode) 417 { 418 // TODO: call devopen()? 419 if((c->qid.type & QTDIR) && omode != OREAD) 420 error(Eperm); 421 // if (c->flag & COPEN) 422 // return c; 423 c->mode = openmode(omode & ~OTRUNC); 424 c->flag |= COPEN; 425 c->offset = 0; 426 return c; 427 } 428 429 static void 430 mclose(Chan*) 431 { 432 /* that's easy */ 433 } 434 435 436 static long 437 io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off) 438 { 439 long wl; 440 Chan *mc = in->idev; 441 442 if (waserror()) { 443 print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n", 444 in->iname, off, l, mp->name, (isread? "read": "write"), 445 (up && up->errstr? up->errstr: "")); 446 nexterror(); 447 } 448 if (isread) { 449 wl = devtab[mc->type]->read(mc, a, l, off); 450 if (wl != l) 451 error("#k: short read"); 452 } else { 453 wl = devtab[mc->type]->write(mc, a, l, off); 454 if (wl != l) 455 error("#k: write error"); 456 } 457 poperror(); 458 return wl; 459 } 460 461 static long 462 catio(Fsdev *mp, int isread, void *a, long n, vlong off) 463 { 464 int i; 465 long l, wl, res; 466 Inner *in; 467 468 // print("catio %d %p %ld %lld\n", isread, a, n, off); 469 res = n; 470 for (i = 0; n >= 0 && i < mp->ndevs ; i++){ 471 in = &mp->inner[i]; 472 if (off > in->isize){ 473 off -= in->isize; 474 continue; /* not there yet */ 475 } 476 if (off + n > in->isize) 477 l = in->isize - off; 478 else 479 l = n; 480 // print("\tdev %d %p %ld %lld\n", i, a, l, off); 481 482 wl = io(mp, in, isread, a, l, off); 483 assert(wl == l); 484 485 a = (char*)a + l; 486 off = 0; 487 n -= l; 488 } 489 // print("\tres %ld\n", res - n); 490 return res - n; 491 } 492 493 static long 494 interio(Fsdev *mp, int isread, void *a, long n, vlong off) 495 { 496 int i; 497 long boff, res, l, wl, wsz; 498 vlong woff, blk, mblk; 499 Inner *in; 500 501 blk = off / Blksize; 502 boff = off % Blksize; 503 wsz = Blksize - boff; 504 res = n; 505 while(n > 0){ 506 mblk = blk / mp->ndevs; 507 i = blk % mp->ndevs; 508 woff = mblk*Blksize + boff; 509 if (n > wsz) 510 l = wsz; 511 else 512 l = n; 513 514 in = &mp->inner[i]; 515 wl = io(mp, in, isread, a, l, woff); 516 if (wl != l || l == 0) 517 error(Eio); 518 519 a = (char*)a + l; 520 n -= l; 521 blk++; 522 boff = 0; 523 wsz = Blksize; 524 } 525 return res; 526 } 527 528 static long 529 mread(Chan *c, void *a, long n, vlong off) 530 { 531 int i, retry; 532 long l, res; 533 Fsdev *mp; 534 Inner *in; 535 536 if (c->qid.type & QTDIR) 537 return devdirread(c, a, n, 0, 0, mgen); 538 if (c->qid.path == Qctl) 539 return readstr((long)off, a, n, confstr + strlen(Cfgstr)); 540 i = c->qid.path - Qfirst; 541 mp = path2dev(i, 1); 542 543 if (off >= mp->size) 544 return 0; 545 if (off + n > mp->size) 546 n = mp->size - off; 547 if (n == 0) 548 return 0; 549 550 res = -1; 551 switch(mp->type){ 552 case Fcat: 553 res = catio(mp, Isread, a, n, off); 554 break; 555 case Finter: 556 res = interio(mp, Isread, a, n, off); 557 break; 558 case Fpart: 559 in = &mp->inner[0]; 560 res = io(mp, in, Isread, a, n, mp->start + off); 561 assert(res == n); 562 break; 563 case Fmirror: 564 retry = 0; 565 do { 566 if (retry > 0) { 567 print("#k/%s: retry %d read for byte %,lld " 568 "count %ld: %s\n", mp->name, retry, off, 569 n, (up && up->errstr? up->errstr: "")); 570 tsleep(&up->sleep, return0, 0, 2000); 571 } 572 for (i = 0; i < mp->ndevs; i++){ 573 if (waserror()) 574 continue; 575 in = &mp->inner[i]; 576 l = io(mp, in, Isread, a, n, off); 577 poperror(); 578 if (l >= 0){ 579 res = l; 580 break; /* read a good copy */ 581 } 582 } 583 } while (i == mp->ndevs && ++retry < 2); 584 if (i == mp->ndevs) { 585 /* no mirror had a good copy of the block */ 586 print("#k/%s: byte %,lld count %ld: CAN'T READ " 587 "from mirror: %s\n", mp->name, off, n, 588 (up && up->errstr? up->errstr: "")); 589 error(Eio); 590 } else if (retry > 0) 591 print("#k/%s: byte %,lld count %ld: retry read OK " 592 "from mirror: %s\n", mp->name, off, n, 593 (up && up->errstr? up->errstr: "")); 594 break; 595 } 596 return res; 597 } 598 599 static long 600 mwrite(Chan *c, void *a, long n, vlong off) 601 { 602 int i, allbad, anybad, retry; 603 long l, res; 604 Fsdev *mp; 605 Inner *in; 606 607 if (c->qid.type & QTDIR) 608 error(Eperm); 609 if (c->qid.path == Qctl){ 610 mconfig(a, n); 611 return n; 612 } 613 mp = path2dev(c->qid.path - Qfirst, 1); 614 615 if (off >= mp->size) 616 return 0; 617 if (off + n > mp->size) 618 n = mp->size - off; 619 if (n == 0) 620 return 0; 621 res = n; 622 switch(mp->type){ 623 case Fcat: 624 res = catio(mp, Iswrite, a, n, off); 625 break; 626 case Finter: 627 res = interio(mp, Iswrite, a, n, off); 628 break; 629 case Fpart: 630 in = &mp->inner[0]; 631 res = io(mp, in, Iswrite, a, n, mp->start + off); 632 if (res > n) 633 res = n; 634 break; 635 case Fmirror: 636 retry = 0; 637 do { 638 if (retry > 0) { 639 print("#k/%s: retry %d write for byte %,lld " 640 "count %ld: %s\n", mp->name, retry, off, 641 n, (up && up->errstr? up->errstr: "")); 642 tsleep(&up->sleep, return0, 0, 2000); 643 } 644 allbad = 1; 645 anybad = 0; 646 for (i = mp->ndevs - 1; i >= 0; i--){ 647 if (waserror()) { 648 anybad = 1; 649 continue; 650 } 651 in = &mp->inner[i]; 652 l = io(mp, in, Iswrite, a, n, off); 653 poperror(); 654 if (res > l) 655 res = l; /* shortest OK write */ 656 if (l == n) 657 allbad = 0; /* wrote a good copy */ 658 else 659 anybad = 1; 660 } 661 } while (anybad && ++retry < 2); 662 if (allbad) { 663 /* no mirror took a good copy of the block */ 664 print("#k/%s: byte %,lld count %ld: CAN'T WRITE " 665 "to mirror: %s\n", mp->name, off, n, 666 (up && up->errstr? up->errstr: "")); 667 error(Eio); 668 } else if (retry > 0) 669 print("#k/%s: byte %,lld count %ld: retry wrote OK " 670 "to mirror: %s\n", mp->name, off, n, 671 (up && up->errstr? up->errstr: "")); 672 673 break; 674 } 675 return res; 676 } 677 678 Dev fsdevtab = { 679 'k', 680 "devfs", 681 682 devreset, 683 devinit, 684 devshutdown, 685 mattach, 686 mwalk, 687 mstat, 688 mopen, 689 devcreate, 690 mclose, 691 mread, 692 devbread, 693 mwrite, 694 devbwrite, 695 devremove, 696 devwstat, 697 devpower, 698 devconfig, 699 }; 700