1 /* $NetBSD: rumpblk.c,v 1.55 2014/03/16 05:20:30 dholland Exp $ */ 2 3 /* 4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by the 7 * Finnish Cultural Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * Block device emulation. Presents a block device interface and 33 * uses rumpuser system calls to satisfy I/O requests. 34 * 35 * We provide fault injection. The driver can be made to fail 36 * I/O occasionally. 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.55 2014/03/16 05:20:30 dholland Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/buf.h> 44 #include <sys/conf.h> 45 #include <sys/condvar.h> 46 #include <sys/disklabel.h> 47 #include <sys/evcnt.h> 48 #include <sys/fcntl.h> 49 #include <sys/kmem.h> 50 #include <sys/malloc.h> 51 #include <sys/queue.h> 52 #include <sys/stat.h> 53 #include <sys/cprng.h> 54 55 #include <rump/rumpuser.h> 56 57 #include "rump_private.h" 58 #include "rump_vfs_private.h" 59 60 #if 0 61 #define DPRINTF(x) printf x 62 #else 63 #define DPRINTF(x) 64 #endif 65 66 #define RUMPBLK_SIZE 16 67 static struct rblkdev { 68 char *rblk_path; 69 int rblk_fd; 70 int rblk_mode; 71 72 uint64_t rblk_size; 73 uint64_t rblk_hostoffset; 74 uint64_t rblk_hostsize; 75 int rblk_ftype; 76 77 struct disklabel rblk_label; 78 } minors[RUMPBLK_SIZE]; 79 80 static struct evcnt ev_io_total; 81 static struct evcnt ev_io_async; 82 83 static struct evcnt ev_bwrite_total; 84 static struct evcnt ev_bwrite_async; 85 static struct evcnt ev_bread_total; 86 87 dev_type_open(rumpblk_open); 88 dev_type_close(rumpblk_close); 89 dev_type_read(rumpblk_read); 90 dev_type_write(rumpblk_write); 91 dev_type_ioctl(rumpblk_ioctl); 92 dev_type_strategy(rumpblk_strategy); 93 dev_type_strategy(rumpblk_strategy_fail); 94 dev_type_dump(rumpblk_dump); 95 dev_type_size(rumpblk_size); 96 97 static const struct bdevsw rumpblk_bdevsw = { 98 .d_open = rumpblk_open, 99 .d_close = rumpblk_close, 100 .d_strategy = rumpblk_strategy, 101 .d_ioctl = rumpblk_ioctl, 102 .d_dump = nodump, 103 .d_psize = nosize, 104 .d_flag = D_DISK 105 }; 106 107 static const struct bdevsw rumpblk_bdevsw_fail = { 108 .d_open = rumpblk_open, 109 .d_close = rumpblk_close, 110 .d_strategy = rumpblk_strategy_fail, 111 .d_ioctl = rumpblk_ioctl, 112 .d_dump = nodump, 113 .d_psize = nosize, 114 .d_flag = D_DISK 115 }; 116 117 static const struct cdevsw rumpblk_cdevsw = { 118 .d_open = rumpblk_open, 119 .d_close = rumpblk_close, 120 .d_read = rumpblk_read, 121 .d_write = rumpblk_write, 122 .d_ioctl = rumpblk_ioctl, 123 .d_stop = nostop, 124 .d_tty = notty, 125 .d_poll = nopoll, 126 .d_mmap = nommap, 127 .d_kqfilter = nokqfilter, 128 .d_flag = D_DISK 129 }; 130 131 static int backend_open(struct rblkdev *, const char *); 132 static int backend_close(struct rblkdev *); 133 134 /* fail every n out of BLKFAIL_MAX */ 135 #define BLKFAIL_MAX 10000 136 static int blkfail; 137 static unsigned randstate; 138 static kmutex_t rumpblk_lock; 139 static int sectshift = DEV_BSHIFT; 140 141 static void 142 makedefaultlabel(struct disklabel *lp, off_t size, int part) 143 { 144 int i; 145 146 memset(lp, 0, sizeof(*lp)); 147 148 lp->d_secperunit = size; 149 lp->d_secsize = 1 << sectshift; 150 lp->d_nsectors = size >> sectshift; 151 lp->d_ntracks = 1; 152 lp->d_ncylinders = 1; 153 lp->d_secpercyl = lp->d_nsectors; 154 155 /* oh dear oh dear */ 156 strncpy(lp->d_typename, "rumpd", sizeof(lp->d_typename)); 157 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 158 159 lp->d_type = DTYPE_RUMPD; 160 lp->d_rpm = 11; 161 lp->d_interleave = 1; 162 lp->d_flags = 0; 163 164 /* XXX: RAW_PART handling? */ 165 for (i = 0; i < part; i++) { 166 lp->d_partitions[i].p_fstype = FS_UNUSED; 167 } 168 lp->d_partitions[part].p_size = size >> sectshift; 169 lp->d_npartitions = part+1; 170 /* XXX: file system type? */ 171 172 lp->d_magic = DISKMAGIC; 173 lp->d_magic2 = DISKMAGIC; 174 lp->d_checksum = 0; /* XXX */ 175 } 176 177 int 178 rumpblk_init(void) 179 { 180 char buf[64]; 181 devmajor_t rumpblkmaj = RUMPBLK_DEVMAJOR; 182 unsigned tmp; 183 int i; 184 185 mutex_init(&rumpblk_lock, MUTEX_DEFAULT, IPL_NONE); 186 187 if (rumpuser_getparam("RUMP_BLKFAIL", buf, sizeof(buf)) == 0) { 188 blkfail = strtoul(buf, NULL, 10); 189 /* fail everything */ 190 if (blkfail > BLKFAIL_MAX) 191 blkfail = BLKFAIL_MAX; 192 if (rumpuser_getparam("RUMP_BLKFAIL_SEED", 193 buf, sizeof(buf)) == 0) { 194 randstate = strtoul(buf, NULL, 10); 195 } else { 196 randstate = cprng_fast32(); 197 } 198 printf("rumpblk: FAULT INJECTION ACTIVE! fail %d/%d. " 199 "seed %u\n", blkfail, BLKFAIL_MAX, randstate); 200 } else { 201 blkfail = 0; 202 } 203 204 if (rumpuser_getparam("RUMP_BLKSECTSHIFT", buf, sizeof(buf)) == 0) { 205 printf("rumpblk: "); 206 tmp = strtoul(buf, NULL, 10); 207 if (tmp >= DEV_BSHIFT) 208 sectshift = tmp; 209 else 210 printf("RUMP_BLKSECTSHIFT must be least %d (now %d), ", 211 DEV_BSHIFT, tmp); 212 printf("using %d for sector shift (size %d)\n", 213 sectshift, 1<<sectshift); 214 } 215 216 memset(minors, 0, sizeof(minors)); 217 for (i = 0; i < RUMPBLK_SIZE; i++) { 218 minors[i].rblk_fd = -1; 219 } 220 221 evcnt_attach_dynamic(&ev_io_total, EVCNT_TYPE_MISC, NULL, 222 "rumpblk", "I/O reqs"); 223 evcnt_attach_dynamic(&ev_io_async, EVCNT_TYPE_MISC, NULL, 224 "rumpblk", "async I/O"); 225 226 evcnt_attach_dynamic(&ev_bread_total, EVCNT_TYPE_MISC, NULL, 227 "rumpblk", "bytes read"); 228 evcnt_attach_dynamic(&ev_bwrite_total, EVCNT_TYPE_MISC, NULL, 229 "rumpblk", "bytes written"); 230 evcnt_attach_dynamic(&ev_bwrite_async, EVCNT_TYPE_MISC, NULL, 231 "rumpblk", "bytes written async"); 232 233 if (blkfail) { 234 return devsw_attach("rumpblk", 235 &rumpblk_bdevsw_fail, &rumpblkmaj, 236 &rumpblk_cdevsw, &rumpblkmaj); 237 } else { 238 return devsw_attach("rumpblk", 239 &rumpblk_bdevsw, &rumpblkmaj, 240 &rumpblk_cdevsw, &rumpblkmaj); 241 } 242 } 243 244 int 245 rumpblk_register(const char *path, devminor_t *dmin, 246 uint64_t offset, uint64_t size) 247 { 248 struct rblkdev *rblk; 249 uint64_t flen; 250 size_t len; 251 int ftype, error, i; 252 253 /* devices might not report correct size unless they're open */ 254 if ((error = rumpuser_getfileinfo(path, &flen, &ftype)) != 0) 255 return error; 256 257 /* verify host file is of supported type */ 258 if (!(ftype == RUMPUSER_FT_REG 259 || ftype == RUMPUSER_FT_BLK 260 || ftype == RUMPUSER_FT_CHR)) 261 return EINVAL; 262 263 mutex_enter(&rumpblk_lock); 264 for (i = 0; i < RUMPBLK_SIZE; i++) { 265 if (minors[i].rblk_path&&strcmp(minors[i].rblk_path, path)==0) { 266 mutex_exit(&rumpblk_lock); 267 *dmin = i; 268 return 0; 269 } 270 } 271 272 for (i = 0; i < RUMPBLK_SIZE; i++) 273 if (minors[i].rblk_path == NULL) 274 break; 275 if (i == RUMPBLK_SIZE) { 276 mutex_exit(&rumpblk_lock); 277 return EBUSY; 278 } 279 280 rblk = &minors[i]; 281 rblk->rblk_path = __UNCONST("taken"); 282 mutex_exit(&rumpblk_lock); 283 284 len = strlen(path); 285 rblk->rblk_path = malloc(len + 1, M_TEMP, M_WAITOK); 286 strcpy(rblk->rblk_path, path); 287 rblk->rblk_hostoffset = offset; 288 if (size != RUMPBLK_SIZENOTSET) { 289 KASSERT(size + offset <= flen); 290 rblk->rblk_size = size; 291 } else { 292 KASSERT(offset < flen); 293 rblk->rblk_size = flen - offset; 294 } 295 rblk->rblk_hostsize = flen; 296 rblk->rblk_ftype = ftype; 297 makedefaultlabel(&rblk->rblk_label, rblk->rblk_size, i); 298 299 if ((error = backend_open(rblk, path)) != 0) { 300 memset(&rblk->rblk_label, 0, sizeof(rblk->rblk_label)); 301 free(rblk->rblk_path, M_TEMP); 302 rblk->rblk_path = NULL; 303 return error; 304 } 305 306 *dmin = i; 307 return 0; 308 } 309 310 /* 311 * Unregister rumpblk. It's the callers responsibility to make 312 * sure it's no longer in use. 313 */ 314 int 315 rumpblk_deregister(const char *path) 316 { 317 struct rblkdev *rblk; 318 int i; 319 320 mutex_enter(&rumpblk_lock); 321 for (i = 0; i < RUMPBLK_SIZE; i++) { 322 if (minors[i].rblk_path&&strcmp(minors[i].rblk_path, path)==0) { 323 break; 324 } 325 } 326 mutex_exit(&rumpblk_lock); 327 328 if (i == RUMPBLK_SIZE) 329 return ENOENT; 330 331 rblk = &minors[i]; 332 backend_close(rblk); 333 334 free(rblk->rblk_path, M_TEMP); 335 memset(&rblk->rblk_label, 0, sizeof(rblk->rblk_label)); 336 rblk->rblk_path = NULL; 337 338 return 0; 339 } 340 341 static int 342 backend_open(struct rblkdev *rblk, const char *path) 343 { 344 int error, fd; 345 346 KASSERT(rblk->rblk_fd == -1); 347 error = rumpuser_open(path, 348 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_BIO, &fd); 349 if (error) { 350 error = rumpuser_open(path, 351 RUMPUSER_OPEN_RDONLY | RUMPUSER_OPEN_BIO, &fd); 352 if (error) 353 return error; 354 rblk->rblk_mode = FREAD; 355 } else { 356 rblk->rblk_mode = FREAD|FWRITE; 357 } 358 359 rblk->rblk_fd = fd; 360 KASSERT(rblk->rblk_fd != -1); 361 return 0; 362 } 363 364 static int 365 backend_close(struct rblkdev *rblk) 366 { 367 368 rumpuser_close(rblk->rblk_fd); 369 rblk->rblk_fd = -1; 370 371 return 0; 372 } 373 374 int 375 rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l) 376 { 377 struct rblkdev *rblk = &minors[minor(dev)]; 378 379 if (rblk->rblk_fd == -1) 380 return ENXIO; 381 382 if (((flag & (FREAD|FWRITE)) & ~rblk->rblk_mode) != 0) { 383 return EACCES; 384 } 385 386 return 0; 387 } 388 389 int 390 rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l) 391 { 392 393 return 0; 394 } 395 396 int 397 rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l) 398 { 399 devminor_t dmin = minor(dev); 400 struct rblkdev *rblk = &minors[dmin]; 401 struct partinfo *pi; 402 int error = 0; 403 404 /* well, me should support a few more, but we don't for now */ 405 switch (xfer) { 406 case DIOCGDINFO: 407 *(struct disklabel *)addr = rblk->rblk_label; 408 break; 409 410 case DIOCGPART: 411 pi = addr; 412 pi->part = &rblk->rblk_label.d_partitions[DISKPART(dmin)]; 413 pi->disklab = &rblk->rblk_label; 414 break; 415 416 /* it's synced enough along the write path */ 417 case DIOCCACHESYNC: 418 break; 419 420 default: 421 error = ENOTTY; 422 break; 423 } 424 425 return error; 426 } 427 428 static int 429 do_physio(dev_t dev, struct uio *uio, int which) 430 { 431 void (*strat)(struct buf *); 432 433 if (blkfail) 434 strat = rumpblk_strategy_fail; 435 else 436 strat = rumpblk_strategy; 437 438 return physio(strat, NULL, dev, which, minphys, uio); 439 } 440 441 int 442 rumpblk_read(dev_t dev, struct uio *uio, int flags) 443 { 444 445 return do_physio(dev, uio, B_READ); 446 } 447 448 int 449 rumpblk_write(dev_t dev, struct uio *uio, int flags) 450 { 451 452 return do_physio(dev, uio, B_WRITE); 453 } 454 455 static void 456 dostrategy(struct buf *bp) 457 { 458 struct rblkdev *rblk = &minors[minor(bp->b_dev)]; 459 off_t off; 460 int async = bp->b_flags & B_ASYNC; 461 int op; 462 463 if (bp->b_bcount % (1<<sectshift) != 0) { 464 rump_biodone(bp, 0, EINVAL); 465 return; 466 } 467 468 /* collect statistics */ 469 ev_io_total.ev_count++; 470 if (async) 471 ev_io_async.ev_count++; 472 if (BUF_ISWRITE(bp)) { 473 ev_bwrite_total.ev_count += bp->b_bcount; 474 if (async) 475 ev_bwrite_async.ev_count += bp->b_bcount; 476 } else { 477 ev_bread_total.ev_count++; 478 } 479 480 /* 481 * b_blkno is always in terms of DEV_BSIZE, and since we need 482 * to translate to a byte offset for the host read, this 483 * calculation does not need sectshift. 484 */ 485 off = bp->b_blkno << DEV_BSHIFT; 486 487 /* 488 * Do bounds checking if we're working on a file. Otherwise 489 * invalid file systems might attempt to read beyond EOF. This 490 * is bad(tm) especially on mmapped images. This is essentially 491 * the kernel bounds_check() routines. 492 */ 493 if (off + bp->b_bcount > rblk->rblk_size) { 494 int64_t sz = rblk->rblk_size - off; 495 496 /* EOF */ 497 if (sz == 0) { 498 rump_biodone(bp, 0, 0); 499 return; 500 } 501 /* beyond EOF ==> error */ 502 if (sz < 0) { 503 rump_biodone(bp, 0, EINVAL); 504 return; 505 } 506 507 /* truncate to device size */ 508 bp->b_bcount = sz; 509 } 510 511 off += rblk->rblk_hostoffset; 512 DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64 513 " (0x%" PRIx64 " - 0x%" PRIx64 "), %ssync\n", 514 bp->b_bcount, BUF_ISREAD(bp) ? "READ" : "WRITE", 515 off, off, (off + bp->b_bcount), async ? "a" : "")); 516 517 op = BUF_ISREAD(bp) ? RUMPUSER_BIO_READ : RUMPUSER_BIO_WRITE; 518 if (BUF_ISWRITE(bp) && !async) 519 op |= RUMPUSER_BIO_SYNC; 520 521 rumpuser_bio(rblk->rblk_fd, op, bp->b_data, bp->b_bcount, off, 522 rump_biodone, bp); 523 } 524 525 void 526 rumpblk_strategy(struct buf *bp) 527 { 528 529 dostrategy(bp); 530 } 531 532 /* 533 * Simple random number generator. This is private so that we can 534 * very repeatedly control which blocks will fail. 535 * 536 * <mlelstv> pooka, rand() 537 * <mlelstv> [paste] 538 */ 539 static unsigned 540 gimmerand(void) 541 { 542 543 return (randstate = randstate * 1103515245 + 12345) % (0x80000000L); 544 } 545 546 /* 547 * Block device with very simple fault injection. Fails every 548 * n out of BLKFAIL_MAX I/O with EIO. n is determined by the env 549 * variable RUMP_BLKFAIL. 550 */ 551 void 552 rumpblk_strategy_fail(struct buf *bp) 553 { 554 555 if (gimmerand() % BLKFAIL_MAX >= blkfail) { 556 dostrategy(bp); 557 } else { 558 printf("block fault injection: failing I/O on block %lld\n", 559 (long long)bp->b_blkno); 560 bp->b_error = EIO; 561 biodone(bp); 562 } 563 } 564