1 /* $NetBSD: rumpblk.c,v 1.25 2009/08/03 16:22:00 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by the 7 * Finnish Cultural Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * Block device emulation. Presents a block device interface and 33 * uses rumpuser system calls to satisfy I/O requests. 34 * 35 * We provide fault injection. The driver can be made to fail 36 * I/O occasionally. 37 * 38 * The driver also provides an optimization for regular files by 39 * using memory-mapped I/O. This avoids kernel access for every 40 * I/O operation. It also gives finer-grained control of how to 41 * flush data. Additionally, in case the rump kernel dumps core, 42 * we get way less carnage. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.25 2009/08/03 16:22:00 pooka Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/buf.h> 50 #include <sys/conf.h> 51 #include <sys/condvar.h> 52 #include <sys/disklabel.h> 53 #include <sys/evcnt.h> 54 #include <sys/fcntl.h> 55 #include <sys/kmem.h> 56 #include <sys/malloc.h> 57 #include <sys/queue.h> 58 #include <sys/stat.h> 59 60 #include <rump/rumpuser.h> 61 62 #include "rump_private.h" 63 #include "rump_vfs_private.h" 64 65 #if 0 66 #define DPRINTF(x) printf x 67 #else 68 #define DPRINTF(x) 69 #endif 70 71 /* Default: 16 x 1MB windows */ 72 unsigned memwinsize = (1<<20); 73 unsigned memwincnt = 16; 74 75 #define STARTWIN(off) ((off) & ~(memwinsize-1)) 76 #define INWIN(win,off) ((win)->win_off == STARTWIN(off)) 77 #define WINSIZE(rblk, win) (MIN((rblk->rblk_size-win->win_off),memwinsize)) 78 #define WINVALID(win) ((win)->win_off != (off_t)-1) 79 #define WINVALIDATE(win) ((win)->win_off = (off_t)-1) 80 struct blkwin { 81 off_t win_off; 82 void *win_mem; 83 int win_refcnt; 84 85 TAILQ_ENTRY(blkwin) win_lru; 86 }; 87 88 #define RUMPBLK_SIZE 16 89 static struct rblkdev { 90 char *rblk_path; 91 int rblk_fd; 92 int rblk_opencnt; 93 #ifdef HAS_ODIRECT 94 int rblk_dfd; 95 #endif 96 97 /* for mmap */ 98 int rblk_mmflags; 99 kmutex_t rblk_memmtx; 100 kcondvar_t rblk_memcv; 101 TAILQ_HEAD(winlru, blkwin) rblk_lruq; 102 size_t rblk_size; 103 bool rblk_waiting; 104 105 struct partition *rblk_curpi; 106 struct partition rblk_pi; 107 struct disklabel rblk_dl; 108 } minors[RUMPBLK_SIZE]; 109 110 static struct evcnt ev_io_total; 111 static struct evcnt ev_io_async; 112 113 static struct evcnt ev_memblk_hits; 114 static struct evcnt ev_memblk_busy; 115 116 static struct evcnt ev_bwrite_total; 117 static struct evcnt ev_bwrite_async; 118 static struct evcnt ev_bread_total; 119 120 dev_type_open(rumpblk_open); 121 dev_type_close(rumpblk_close); 122 dev_type_read(rumpblk_read); 123 dev_type_write(rumpblk_write); 124 dev_type_ioctl(rumpblk_ioctl); 125 dev_type_strategy(rumpblk_strategy); 126 dev_type_strategy(rumpblk_strategy_fail); 127 dev_type_dump(rumpblk_dump); 128 dev_type_size(rumpblk_size); 129 130 static const struct bdevsw rumpblk_bdevsw = { 131 rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl, 132 nodump, nosize, D_DISK 133 }; 134 135 static const struct bdevsw rumpblk_bdevsw_fail = { 136 rumpblk_open, rumpblk_close, rumpblk_strategy_fail, rumpblk_ioctl, 137 nodump, nosize, D_DISK 138 }; 139 140 static const struct cdevsw rumpblk_cdevsw = { 141 rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write, 142 rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK 143 }; 144 145 /* fail every n out of BLKFAIL_MAX */ 146 #define BLKFAIL_MAX 10000 147 static int blkfail; 148 static unsigned randstate; 149 static kmutex_t rumpblk_lock; 150 151 static struct blkwin * 152 getwindow(struct rblkdev *rblk, off_t off, int *wsize, int *error) 153 { 154 struct blkwin *win; 155 156 mutex_enter(&rblk->rblk_memmtx); 157 retry: 158 /* search for window */ 159 TAILQ_FOREACH(win, &rblk->rblk_lruq, win_lru) { 160 if (INWIN(win, off) && WINVALID(win)) 161 break; 162 } 163 164 /* found? return */ 165 if (win) { 166 ev_memblk_hits.ev_count++; 167 TAILQ_REMOVE(&rblk->rblk_lruq, win, win_lru); 168 goto good; 169 } 170 171 /* 172 * Else, create new window. If the least recently used is not 173 * currently in use, reuse that. Otherwise we need to wait. 174 */ 175 win = TAILQ_LAST(&rblk->rblk_lruq, winlru); 176 if (win->win_refcnt == 0) { 177 TAILQ_REMOVE(&rblk->rblk_lruq, win, win_lru); 178 mutex_exit(&rblk->rblk_memmtx); 179 180 if (WINVALID(win)) { 181 DPRINTF(("win %p, unmap mem %p, off 0x%" PRIx64 "\n", 182 win, win->win_mem, win->win_off)); 183 rumpuser_unmap(win->win_mem, WINSIZE(rblk, win)); 184 WINVALIDATE(win); 185 } 186 187 win->win_off = STARTWIN(off); 188 win->win_mem = rumpuser_filemmap(rblk->rblk_fd, win->win_off, 189 WINSIZE(rblk, win), rblk->rblk_mmflags, error); 190 DPRINTF(("win %p, off 0x%" PRIx64 ", mem %p\n", 191 win, win->win_off, win->win_mem)); 192 193 mutex_enter(&rblk->rblk_memmtx); 194 if (win->win_mem == NULL) { 195 WINVALIDATE(win); 196 TAILQ_INSERT_TAIL(&rblk->rblk_lruq, win, win_lru); 197 mutex_exit(&rblk->rblk_memmtx); 198 return NULL; 199 } 200 } else { 201 DPRINTF(("memwin wait\n")); 202 ev_memblk_busy.ev_count++; 203 204 rblk->rblk_waiting = true; 205 cv_wait(&rblk->rblk_memcv, &rblk->rblk_memmtx); 206 goto retry; 207 } 208 209 good: 210 KASSERT(win); 211 win->win_refcnt++; 212 TAILQ_INSERT_HEAD(&rblk->rblk_lruq, win, win_lru); 213 mutex_exit(&rblk->rblk_memmtx); 214 *wsize = MIN(*wsize, memwinsize - (off-win->win_off)); 215 KASSERT(*wsize); 216 217 return win; 218 } 219 220 static void 221 putwindow(struct rblkdev *rblk, struct blkwin *win) 222 { 223 224 mutex_enter(&rblk->rblk_memmtx); 225 if (--win->win_refcnt == 0 && rblk->rblk_waiting) { 226 rblk->rblk_waiting = false; 227 cv_signal(&rblk->rblk_memcv); 228 } 229 KASSERT(win->win_refcnt >= 0); 230 mutex_exit(&rblk->rblk_memmtx); 231 } 232 233 static void 234 wincleanup(struct rblkdev *rblk) 235 { 236 struct blkwin *win; 237 238 while ((win = TAILQ_FIRST(&rblk->rblk_lruq)) != NULL) { 239 TAILQ_REMOVE(&rblk->rblk_lruq, win, win_lru); 240 if (WINVALID(win)) { 241 DPRINTF(("cleanup win %p addr %p\n", 242 win, win->win_mem)); 243 rumpuser_unmap(win->win_mem, WINSIZE(rblk, win)); 244 } 245 kmem_free(win, sizeof(*win)); 246 } 247 rblk->rblk_mmflags = 0; 248 } 249 250 int 251 rumpblk_init(void) 252 { 253 char buf[64]; 254 int rumpblk = RUMPBLK; 255 unsigned tmp; 256 int error, i; 257 258 mutex_init(&rumpblk_lock, MUTEX_DEFAULT, IPL_NONE); 259 260 if (rumpuser_getenv("RUMP_BLKFAIL", buf, sizeof(buf), &error) == 0) { 261 blkfail = strtoul(buf, NULL, 10); 262 /* fail everything */ 263 if (blkfail > BLKFAIL_MAX) 264 blkfail = BLKFAIL_MAX; 265 if (rumpuser_getenv("RUMP_BLKFAIL_SEED", buf, sizeof(buf), 266 &error) == 0) { 267 randstate = strtoul(buf, NULL, 10); 268 } else { 269 randstate = arc4random(); 270 } 271 printf("rumpblk: FAULT INJECTION ACTIVE! fail %d/%d. " 272 "seed %u\n", blkfail, BLKFAIL_MAX, randstate); 273 } else { 274 blkfail = 0; 275 } 276 277 if (rumpuser_getenv("RUMP_BLKWINSIZE", buf, sizeof(buf), &error) == 0) { 278 printf("rumpblk: "); 279 tmp = strtoul(buf, NULL, 10); 280 if (tmp && !(tmp & (tmp-1))) 281 memwinsize = tmp; 282 else 283 printf("invalid RUMP_BLKWINSIZE %d, ", tmp); 284 printf("using %d for memwinsize\n", memwinsize); 285 } 286 if (rumpuser_getenv("RUMP_BLKWINCOUNT", buf, sizeof(buf), &error) == 0){ 287 printf("rumpblk: "); 288 tmp = strtoul(buf, NULL, 10); 289 if (tmp) 290 memwincnt = tmp; 291 else 292 printf("invalid RUMP_BLKWINCOUNT %d, ", tmp); 293 printf("using %d for memwincount\n", memwincnt); 294 } 295 296 memset(minors, 0, sizeof(minors)); 297 for (i = 0; i < RUMPBLK_SIZE; i++) { 298 mutex_init(&minors[i].rblk_memmtx, MUTEX_DEFAULT, IPL_NONE); 299 cv_init(&minors[i].rblk_memcv, "rblkmcv"); 300 } 301 302 evcnt_attach_dynamic(&ev_io_total, EVCNT_TYPE_MISC, NULL, 303 "rumpblk", "rumpblk I/O reqs"); 304 evcnt_attach_dynamic(&ev_io_async, EVCNT_TYPE_MISC, NULL, 305 "rumpblk", "rumpblk async I/O"); 306 307 evcnt_attach_dynamic(&ev_bread_total, EVCNT_TYPE_MISC, NULL, 308 "rumpblk", "rumpblk bytes read"); 309 evcnt_attach_dynamic(&ev_bwrite_total, EVCNT_TYPE_MISC, NULL, 310 "rumpblk", "rumpblk bytes written"); 311 evcnt_attach_dynamic(&ev_bwrite_async, EVCNT_TYPE_MISC, NULL, 312 "rumpblk", "rumpblk bytes written async"); 313 314 evcnt_attach_dynamic(&ev_memblk_hits, EVCNT_TYPE_MISC, NULL, 315 "rumpblk", "memblk window hits"); 316 evcnt_attach_dynamic(&ev_memblk_busy, EVCNT_TYPE_MISC, NULL, 317 "rumpblk", "memblk all windows busy"); 318 319 if (blkfail) { 320 return devsw_attach("rumpblk", &rumpblk_bdevsw_fail, &rumpblk, 321 &rumpblk_cdevsw, &rumpblk); 322 } else { 323 return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk, 324 &rumpblk_cdevsw, &rumpblk); 325 } 326 } 327 328 /* XXX: no deregister */ 329 int 330 rumpblk_register(const char *path, devminor_t *dmin) 331 { 332 uint64_t flen; 333 size_t len; 334 int ftype, error, i; 335 336 if (rumpuser_getfileinfo(path, &flen, &ftype, &error)) 337 return error; 338 /* verify host file is of supported type */ 339 if (!(ftype == RUMPUSER_FT_REG 340 || ftype == RUMPUSER_FT_BLK 341 || ftype == RUMPUSER_FT_CHR)) 342 return EINVAL; 343 344 mutex_enter(&rumpblk_lock); 345 for (i = 0; i < RUMPBLK_SIZE; i++) { 346 if (minors[i].rblk_path&&strcmp(minors[i].rblk_path, path)==0) { 347 mutex_exit(&rumpblk_lock); 348 *dmin = i; 349 return 0; 350 } 351 } 352 353 for (i = 0; i < RUMPBLK_SIZE; i++) 354 if (minors[i].rblk_path == NULL) 355 break; 356 if (i == RUMPBLK_SIZE) { 357 mutex_exit(&rumpblk_lock); 358 return EBUSY; 359 } 360 361 len = strlen(path); 362 minors[i].rblk_path = malloc(len + 1, M_TEMP, M_WAITOK); 363 strcpy(minors[i].rblk_path, path); 364 minors[i].rblk_fd = -1; 365 mutex_exit(&rumpblk_lock); 366 367 *dmin = i; 368 return 0; 369 } 370 371 int 372 rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l) 373 { 374 struct rblkdev *rblk = &minors[minor(dev)]; 375 uint64_t fsize; 376 int ft, dummy; 377 int error, fd; 378 379 if (rblk->rblk_fd != -1) 380 return 0; /* XXX: refcount, open mode */ 381 fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error); 382 if (error) 383 return error; 384 385 if (rumpuser_getfileinfo(rblk->rblk_path, &fsize, &ft, &error) == -1) { 386 rumpuser_close(fd, &dummy); 387 return error; 388 } 389 390 #ifdef HAS_ODIRECT 391 rblk->rblk_dfd = rumpuser_open(rblk->rblk_path, 392 OFLAGS(flag) | O_DIRECT, &error); 393 if (error) 394 return error; 395 #endif 396 397 if (ft == RUMPUSER_FT_REG) { 398 struct blkwin *win; 399 int i, winsize; 400 401 /* 402 * Use mmap to access a regular file. Allocate and 403 * cache initial windows here. Failure to allocate one 404 * means fallback to read/write i/o. 405 */ 406 407 rblk->rblk_mmflags = 0; 408 if (flag & FREAD) 409 rblk->rblk_mmflags |= RUMPUSER_FILEMMAP_READ; 410 if (flag & FWRITE) { 411 rblk->rblk_mmflags |= RUMPUSER_FILEMMAP_WRITE; 412 rblk->rblk_mmflags |= RUMPUSER_FILEMMAP_SHARED; 413 } 414 415 TAILQ_INIT(&rblk->rblk_lruq); 416 rblk->rblk_size = fsize; 417 rblk->rblk_fd = fd; 418 419 for (i = 0; i < memwincnt && i * memwinsize < fsize; i++) { 420 win = kmem_zalloc(sizeof(*win), KM_SLEEP); 421 WINVALIDATE(win); 422 TAILQ_INSERT_TAIL(&rblk->rblk_lruq, win, win_lru); 423 424 /* 425 * Allocate first windows. Here we just generally 426 * make sure a) we can mmap at all b) we have the 427 * necessary VA available 428 */ 429 winsize = 1; 430 win = getwindow(rblk, i*memwinsize, &winsize, &error); 431 if (win) { 432 putwindow(rblk, win); 433 } else { 434 wincleanup(rblk); 435 break; 436 } 437 } 438 439 memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl)); 440 rblk->rblk_pi.p_size = fsize >> DEV_BSHIFT; 441 rblk->rblk_dl.d_secsize = DEV_BSIZE; 442 rblk->rblk_curpi = &rblk->rblk_pi; 443 } else { 444 if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl, 445 &error) == -1) { 446 KASSERT(error); 447 rumpuser_close(fd, &dummy); 448 return error; 449 } 450 451 rblk->rblk_fd = fd; 452 rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0]; 453 } 454 455 KASSERT(rblk->rblk_fd != -1); 456 return 0; 457 } 458 459 int 460 rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l) 461 { 462 struct rblkdev *rblk = &minors[minor(dev)]; 463 int dummy; 464 465 if (rblk->rblk_mmflags) 466 wincleanup(rblk); 467 rumpuser_fsync(rblk->rblk_fd, &dummy); 468 rumpuser_close(rblk->rblk_fd, &dummy); 469 rblk->rblk_fd = -1; 470 471 return 0; 472 } 473 474 int 475 rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l) 476 { 477 struct rblkdev *rblk = &minors[minor(dev)]; 478 int rv, error; 479 480 if (xfer == DIOCGPART) { 481 struct partinfo *pi = (struct partinfo *)addr; 482 483 pi->part = rblk->rblk_curpi; 484 pi->disklab = &rblk->rblk_dl; 485 486 return 0; 487 } 488 489 rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error); 490 if (rv == -1) 491 return error; 492 493 return 0; 494 } 495 496 static int 497 do_physio(dev_t dev, struct uio *uio, int which) 498 { 499 void (*strat)(struct buf *); 500 501 if (blkfail) 502 strat = rumpblk_strategy_fail; 503 else 504 strat = rumpblk_strategy; 505 506 return physio(strat, NULL, dev, which, minphys, uio); 507 } 508 509 int 510 rumpblk_read(dev_t dev, struct uio *uio, int flags) 511 { 512 513 return do_physio(dev, uio, B_READ); 514 } 515 516 int 517 rumpblk_write(dev_t dev, struct uio *uio, int flags) 518 { 519 520 return do_physio(dev, uio, B_WRITE); 521 } 522 523 static void 524 dostrategy(struct buf *bp) 525 { 526 struct rblkdev *rblk = &minors[minor(bp->b_dev)]; 527 off_t off; 528 int async = bp->b_flags & B_ASYNC; 529 int error; 530 531 /* collect statistics */ 532 ev_io_total.ev_count++; 533 if (async) 534 ev_io_async.ev_count++; 535 if (BUF_ISWRITE(bp)) { 536 ev_bwrite_total.ev_count += bp->b_bcount; 537 if (async) 538 ev_bwrite_async.ev_count += bp->b_bcount; 539 } else { 540 ev_bread_total.ev_count++; 541 } 542 543 off = bp->b_blkno << DEV_BSHIFT; 544 /* 545 * Do bounds checking if we're working on a file. Otherwise 546 * invalid file systems might attempt to read beyond EOF. This 547 * is bad(tm) especially on mmapped images. This is essentially 548 * the kernel bounds_check() routines. 549 */ 550 if (rblk->rblk_size && off + bp->b_bcount > rblk->rblk_size) { 551 int64_t sz = rblk->rblk_size - off; 552 553 /* EOF */ 554 if (sz == 0) { 555 rump_biodone(bp, 0, 0); 556 return; 557 } 558 /* beyond EOF ==> error */ 559 if (sz < 0) { 560 rump_biodone(bp, 0, EINVAL); 561 return; 562 } 563 564 /* truncate to device size */ 565 bp->b_bcount = sz; 566 } 567 568 DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64 569 " (0x%" PRIx64 " - 0x%" PRIx64 "), %ssync\n", 570 bp->b_bcount, BUF_ISREAD(bp) ? "READ" : "WRITE", 571 off, off, (off + bp->b_bcount), async ? "a" : "")); 572 573 /* mmap? handle here and return */ 574 if (rblk->rblk_mmflags) { 575 struct blkwin *win; 576 int winsize, iodone; 577 uint8_t *ioaddr, *bufaddr; 578 579 for (iodone = 0; iodone < bp->b_bcount; 580 iodone += winsize, off += winsize) { 581 winsize = bp->b_bcount - iodone; 582 win = getwindow(rblk, off, &winsize, &error); 583 if (win == NULL) { 584 rump_biodone(bp, iodone, error); 585 return; 586 } 587 588 ioaddr = (uint8_t *)win->win_mem + (off-STARTWIN(off)); 589 bufaddr = (uint8_t *)bp->b_data + iodone; 590 591 DPRINTF(("strat: %p off 0x%" PRIx64 592 ", ioaddr %p (%p)/buf %p\n", win, 593 win->win_off, ioaddr, win->win_mem, bufaddr)); 594 if (BUF_ISREAD(bp)) { 595 memcpy(bufaddr, ioaddr, winsize); 596 } else { 597 memcpy(ioaddr, bufaddr, winsize); 598 } 599 600 /* synchronous write, sync bits back to disk */ 601 if (BUF_ISWRITE(bp) && !async) { 602 rumpuser_memsync(ioaddr, winsize, &error); 603 } 604 putwindow(rblk, win); 605 } 606 607 rump_biodone(bp, bp->b_bcount, 0); 608 return; 609 } 610 611 /* 612 * Do I/O. We have different paths for async and sync I/O. 613 * Async I/O is done by passing a request to rumpuser where 614 * it is executed. The rumpuser routine then calls 615 * biodone() to signal any waiters in the kernel. I/O's are 616 * executed in series. Technically executing them in parallel 617 * would produce better results, but then we'd need either 618 * more threads or posix aio. Maybe worth investigating 619 * this later. 620 * 621 * Using bufq here might be a good idea. 622 */ 623 624 if (rump_threads) { 625 struct rumpuser_aio *rua; 626 int op, fd; 627 628 fd = rblk->rblk_fd; 629 if (BUF_ISREAD(bp)) { 630 op = RUA_OP_READ; 631 } else { 632 op = RUA_OP_WRITE; 633 if (!async) { 634 /* O_DIRECT not fully automatic yet */ 635 #ifdef HAS_ODIRECT 636 if ((off & (DEV_BSIZE-1)) == 0 637 && ((intptr_t)bp->b_data&(DEV_BSIZE-1)) == 0 638 && (bp->b_bcount & (DEV_BSIZE-1)) == 0) 639 fd = rblk->rblk_dfd; 640 else 641 #endif 642 op |= RUA_OP_SYNC; 643 } 644 } 645 646 rumpuser_mutex_enter(&rumpuser_aio_mtx); 647 while ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) { 648 rumpuser_cv_wait(&rumpuser_aio_cv, &rumpuser_aio_mtx); 649 } 650 651 rua = &rumpuser_aios[rumpuser_aio_head]; 652 KASSERT(rua->rua_bp == NULL); 653 rua->rua_fd = fd; 654 rua->rua_data = bp->b_data; 655 rua->rua_dlen = bp->b_bcount; 656 rua->rua_off = off; 657 rua->rua_bp = bp; 658 rua->rua_op = op; 659 660 /* insert into queue & signal */ 661 rumpuser_aio_head = (rumpuser_aio_head+1) % N_AIOS; 662 rumpuser_cv_signal(&rumpuser_aio_cv); 663 rumpuser_mutex_exit(&rumpuser_aio_mtx); 664 } else { 665 if (BUF_ISREAD(bp)) { 666 rumpuser_read_bio(rblk->rblk_fd, bp->b_data, 667 bp->b_bcount, off, rump_biodone, bp); 668 } else { 669 rumpuser_write_bio(rblk->rblk_fd, bp->b_data, 670 bp->b_bcount, off, rump_biodone, bp); 671 } 672 if (BUF_ISWRITE(bp) && !async) 673 rumpuser_fsync(rblk->rblk_fd, &error); 674 } 675 } 676 677 void 678 rumpblk_strategy(struct buf *bp) 679 { 680 681 dostrategy(bp); 682 } 683 684 /* 685 * Simple random number generator. This is private so that we can 686 * very repeatedly control which blocks will fail. 687 * 688 * <mlelstv> pooka, rand() 689 * <mlelstv> [paste] 690 */ 691 static unsigned 692 gimmerand(void) 693 { 694 695 return (randstate = randstate * 1103515245 + 12345) % (0x80000000L); 696 } 697 698 /* 699 * Block device with very simple fault injection. Fails every 700 * n out of BLKFAIL_MAX I/O with EIO. n is determined by the env 701 * variable RUMP_BLKFAIL. 702 */ 703 void 704 rumpblk_strategy_fail(struct buf *bp) 705 { 706 707 if (gimmerand() % BLKFAIL_MAX >= blkfail) { 708 dostrategy(bp); 709 } else { 710 printf("block fault injection: failing I/O on block %lld\n", 711 (long long)bp->b_blkno); 712 bp->b_error = EIO; 713 biodone(bp); 714 } 715 } 716