1 /* $OpenBSD: vioblk.c,v 1.21 2024/11/27 22:32:14 kirill Exp $ */ 2 3 /* 4 * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> 5 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <stdint.h> 20 21 #include <dev/pci/virtio_pcireg.h> 22 #include <dev/pv/vioblkreg.h> 23 #include <dev/pv/virtioreg.h> 24 25 #include <errno.h> 26 #include <event.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "atomicio.h" 32 #include "pci.h" 33 #include "virtio.h" 34 #include "vmd.h" 35 36 extern char *__progname; 37 extern struct vmd_vm *current_vm; 38 struct iovec io_v[VIOBLK_QUEUE_SIZE]; 39 40 static const char *disk_type(int); 41 static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, 42 int8_t *); 43 static int handle_io_write(struct viodev_msg *, struct virtio_dev *); 44 45 static void vioblk_update_qs(struct vioblk_dev *); 46 static void vioblk_update_qa(struct vioblk_dev *); 47 static int vioblk_notifyq(struct vioblk_dev *); 48 static ssize_t vioblk_rw(struct vioblk_dev *, int, off_t, 49 struct vring_desc *, struct vring_desc **); 50 51 static void dev_dispatch_vm(int, short, void *); 52 static void handle_sync_io(int, short, void *); 53 54 static const char * 55 disk_type(int type) 56 { 57 switch (type) { 58 case VMDF_RAW: return "raw"; 59 case VMDF_QCOW2: return "qcow2"; 60 } 61 return "unknown"; 62 } 63 64 __dead void 65 vioblk_main(int fd, int fd_vmm) 66 { 67 struct virtio_dev dev; 68 struct vioblk_dev *vioblk = NULL; 69 struct viodev_msg msg; 70 struct vmd_vm vm; 71 struct vm_create_params *vcp; 72 ssize_t sz; 73 off_t szp = 0; 74 int i, ret, type; 75 76 /* 77 * stdio - needed for read/write to disk fds and channels to the vm. 78 * vmm + proc - needed to create shared vm mappings. 79 */ 80 if (pledge("stdio vmm proc", NULL) == -1) 81 fatal("pledge"); 82 83 /* Zero and initialize io work queue. */ 84 memset(io_v, 0, nitems(io_v)*sizeof(io_v[0])); 85 86 /* Receive our virtio_dev, mostly preconfigured. */ 87 memset(&dev, 0, sizeof(dev)); 88 sz = atomicio(read, fd, &dev, sizeof(dev)); 89 if (sz != sizeof(dev)) { 90 ret = errno; 91 log_warn("failed to receive vioblk"); 92 goto fail; 93 } 94 if (dev.dev_type != VMD_DEVTYPE_DISK) { 95 ret = EINVAL; 96 log_warn("received invalid device type"); 97 goto fail; 98 } 99 dev.sync_fd = fd; 100 vioblk = &dev.vioblk; 101 102 log_debug("%s: got viblk dev. num disk fds = %d, sync fd = %d, " 103 "async fd = %d, capacity = %lld seg_max = %u, vmm fd = %d", 104 __func__, vioblk->ndisk_fd, dev.sync_fd, dev.async_fd, 105 vioblk->capacity, vioblk->seg_max, fd_vmm); 106 107 /* Receive our vm information from the vm process. */ 108 memset(&vm, 0, sizeof(vm)); 109 sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm)); 110 if (sz != sizeof(vm)) { 111 ret = EIO; 112 log_warnx("failed to receive vm details"); 113 goto fail; 114 } 115 vcp = &vm.vm_params.vmc_params; 116 current_vm = &vm; 117 118 setproctitle("%s/vioblk%d", vcp->vcp_name, vioblk->idx); 119 log_procinit("vm/%s/vioblk%d", vcp->vcp_name, vioblk->idx); 120 121 /* Now that we have our vm information, we can remap memory. */ 122 ret = remap_guest_mem(&vm, fd_vmm); 123 if (ret) { 124 log_warnx("failed to remap guest memory"); 125 goto fail; 126 } 127 128 /* 129 * We no longer need /dev/vmm access. 130 */ 131 close_fd(fd_vmm); 132 if (pledge("stdio", NULL) == -1) 133 fatal("pledge2"); 134 135 /* Initialize the virtio block abstractions. */ 136 type = vm.vm_params.vmc_disktypes[vioblk->idx]; 137 switch (type) { 138 case VMDF_RAW: 139 ret = virtio_raw_init(&vioblk->file, &szp, vioblk->disk_fd, 140 vioblk->ndisk_fd); 141 break; 142 case VMDF_QCOW2: 143 ret = virtio_qcow2_init(&vioblk->file, &szp, vioblk->disk_fd, 144 vioblk->ndisk_fd); 145 break; 146 default: 147 log_warnx("invalid disk image type"); 148 goto fail; 149 } 150 if (ret || szp < 0) { 151 log_warnx("failed to init disk %s image", disk_type(type)); 152 goto fail; 153 } 154 vioblk->capacity = szp / 512; 155 log_debug("%s: initialized vioblk%d with %s image (capacity=%lld)", 156 __func__, vioblk->idx, disk_type(type), vioblk->capacity); 157 158 /* If we're restoring hardware, reinitialize the virtqueue hva. */ 159 if (vm.vm_state & VM_STATE_RECEIVED) 160 vioblk_update_qa(vioblk); 161 162 /* Initialize libevent so we can start wiring event handlers. */ 163 event_init(); 164 165 /* Wire up an async imsg channel. */ 166 log_debug("%s: wiring in async vm event handler (fd=%d)", __func__, 167 dev.async_fd); 168 if (vm_device_pipe(&dev, dev_dispatch_vm, NULL)) { 169 ret = EIO; 170 log_warnx("vm_device_pipe"); 171 goto fail; 172 } 173 174 /* Configure our sync channel event handler. */ 175 log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, 176 dev.sync_fd); 177 if (imsgbuf_init(&dev.sync_iev.ibuf, dev.sync_fd) == -1) { 178 log_warn("imsgbuf_init"); 179 goto fail; 180 } 181 imsgbuf_allow_fdpass(&dev.sync_iev.ibuf); 182 dev.sync_iev.handler = handle_sync_io; 183 dev.sync_iev.data = &dev; 184 dev.sync_iev.events = EV_READ; 185 imsg_event_add(&dev.sync_iev); 186 187 /* Send a ready message over the sync channel. */ 188 log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name); 189 memset(&msg, 0, sizeof(msg)); 190 msg.type = VIODEV_MSG_READY; 191 imsg_compose_event(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 192 sizeof(msg)); 193 194 /* Send a ready message over the async channel. */ 195 log_debug("%s: sending heartbeat", __func__); 196 ret = imsg_compose_event(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 197 &msg, sizeof(msg)); 198 if (ret == -1) { 199 log_warnx("%s: failed to send async ready message!", __func__); 200 goto fail; 201 } 202 203 /* Engage the event loop! */ 204 ret = event_dispatch(); 205 206 if (ret == 0) { 207 /* Clean shutdown. */ 208 close_fd(dev.sync_fd); 209 close_fd(dev.async_fd); 210 for (i = 0; i < vioblk->ndisk_fd; i++) 211 close_fd(vioblk->disk_fd[i]); 212 _exit(0); 213 /* NOTREACHED */ 214 } 215 216 fail: 217 /* Try letting the vm know we've failed something. */ 218 memset(&msg, 0, sizeof(msg)); 219 msg.type = VIODEV_MSG_ERROR; 220 msg.data = ret; 221 imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 222 sizeof(msg)); 223 imsgbuf_flush(&dev.sync_iev.ibuf); 224 225 close_fd(dev.sync_fd); 226 close_fd(dev.async_fd); 227 if (vioblk != NULL) { 228 for (i = 0; i < vioblk->ndisk_fd; i++) 229 close_fd(vioblk->disk_fd[i]); 230 } 231 _exit(ret); 232 /* NOTREACHED */ 233 } 234 235 const char * 236 vioblk_cmd_name(uint32_t type) 237 { 238 switch (type) { 239 case VIRTIO_BLK_T_IN: return "read"; 240 case VIRTIO_BLK_T_OUT: return "write"; 241 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 242 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 243 case VIRTIO_BLK_T_FLUSH: return "flush"; 244 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 245 case VIRTIO_BLK_T_GET_ID: return "get id"; 246 default: return "unknown"; 247 } 248 } 249 250 static void 251 vioblk_update_qa(struct vioblk_dev *dev) 252 { 253 struct virtio_vq_info *vq_info; 254 void *hva = NULL; 255 256 /* Invalid queue? */ 257 if (dev->cfg.queue_select > 0) 258 return; 259 260 vq_info = &dev->vq[dev->cfg.queue_select]; 261 vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; 262 263 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE)); 264 if (hva == NULL) 265 fatal("vioblk_update_qa"); 266 vq_info->q_hva = hva; 267 } 268 269 static void 270 vioblk_update_qs(struct vioblk_dev *dev) 271 { 272 struct virtio_vq_info *vq_info; 273 274 /* Invalid queue? */ 275 if (dev->cfg.queue_select > 0) { 276 dev->cfg.queue_size = 0; 277 return; 278 } 279 280 vq_info = &dev->vq[dev->cfg.queue_select]; 281 282 /* Update queue pfn/size based on queue select */ 283 dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 284 dev->cfg.queue_size = vq_info->qs; 285 } 286 287 /* 288 * Process virtqueue notifications. If an unrecoverable error occurs, puts 289 * device into a "needs reset" state. 290 * 291 * Returns 1 if an we need to assert an IRQ. 292 */ 293 static int 294 vioblk_notifyq(struct vioblk_dev *dev) 295 { 296 uint32_t cmd_len; 297 uint16_t idx, cmd_desc_idx; 298 uint8_t ds; 299 off_t offset; 300 ssize_t sz; 301 int is_write, notify = 0, i; 302 char *vr; 303 struct vring_desc *table, *desc; 304 struct vring_avail *avail; 305 struct vring_used *used; 306 struct virtio_blk_req_hdr *cmd; 307 struct virtio_vq_info *vq_info; 308 309 /* Invalid queue? */ 310 if (dev->cfg.queue_notify > 0) 311 return (0); 312 313 vq_info = &dev->vq[dev->cfg.queue_notify]; 314 idx = vq_info->last_avail; 315 vr = vq_info->q_hva; 316 if (vr == NULL) 317 fatalx("%s: null vring", __func__); 318 319 /* Compute offsets in table of descriptors, avail ring, and used ring */ 320 table = (struct vring_desc *)(vr); 321 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 322 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 323 324 while (idx != avail->idx) { 325 /* Retrieve Command descriptor. */ 326 cmd_desc_idx = avail->ring[idx & VIOBLK_QUEUE_MASK]; 327 desc = &table[cmd_desc_idx]; 328 cmd_len = desc->len; 329 330 /* 331 * Validate Command descriptor. It should be chained to another 332 * descriptor and not be itself writable. 333 */ 334 if ((desc->flags & VRING_DESC_F_NEXT) == 0) { 335 log_warnx("%s: unchained cmd descriptor", __func__); 336 goto reset; 337 } 338 if (DESC_WRITABLE(desc)) { 339 log_warnx("%s: invalid cmd descriptor state", __func__); 340 goto reset; 341 } 342 343 /* Retrieve the vioblk command request. */ 344 cmd = hvaddr_mem(desc->addr, sizeof(*cmd)); 345 if (cmd == NULL) 346 goto reset; 347 348 /* Advance to the 2nd descriptor. */ 349 desc = &table[desc->next & VIOBLK_QUEUE_MASK]; 350 351 /* Process each available command & chain. */ 352 switch (cmd->type) { 353 case VIRTIO_BLK_T_IN: 354 case VIRTIO_BLK_T_OUT: 355 /* Read (IN) & Write (OUT) */ 356 is_write = (cmd->type == VIRTIO_BLK_T_OUT) ? 1 : 0; 357 offset = cmd->sector * VIRTIO_BLK_SECTOR_SIZE; 358 sz = vioblk_rw(dev, is_write, offset, table, &desc); 359 if (sz == -1) 360 ds = VIRTIO_BLK_S_IOERR; 361 else 362 ds = VIRTIO_BLK_S_OK; 363 break; 364 case VIRTIO_BLK_T_GET_ID: 365 /* 366 * We don't support this command yet. While it's not 367 * officially part of the virtio spec (will be in v1.2) 368 * there's no feature to negotiate. Linux drivers will 369 * often send this command regardless. 370 */ 371 ds = VIRTIO_BLK_S_UNSUPP; 372 break; 373 default: 374 log_warnx("%s: unsupported vioblk command %d", __func__, 375 cmd->type); 376 ds = VIRTIO_BLK_S_UNSUPP; 377 break; 378 } 379 380 /* Advance to the end of the chain, if needed. */ 381 i = 0; 382 while (desc->flags & VRING_DESC_F_NEXT) { 383 desc = &table[desc->next & VIOBLK_QUEUE_MASK]; 384 if (++i >= VIOBLK_QUEUE_SIZE) { 385 /* 386 * If we encounter an infinite/looping chain, 387 * not much we can do but say we need a reset. 388 */ 389 log_warnx("%s: descriptor chain overflow", 390 __func__); 391 goto reset; 392 } 393 } 394 395 /* Provide the status of our command processing. */ 396 if (!DESC_WRITABLE(desc)) { 397 log_warnx("%s: status descriptor unwritable", __func__); 398 goto reset; 399 } 400 /* Overkill as ds is 1 byte, but validates gpa. */ 401 if (write_mem(desc->addr, &ds, sizeof(ds))) 402 log_warnx("%s: can't write device status data " 403 "@ 0x%llx",__func__, desc->addr); 404 405 dev->cfg.isr_status |= 1; 406 notify = 1; 407 408 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 409 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_len; 410 411 __sync_synchronize(); 412 used->idx++; 413 idx++; 414 } 415 416 vq_info->last_avail = idx; 417 return (notify); 418 419 reset: 420 /* 421 * When setting the "needs reset" flag, the driver is notified 422 * via a configuration change interrupt. 423 */ 424 dev->cfg.device_status |= DEVICE_NEEDS_RESET; 425 dev->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 426 return (1); 427 } 428 429 static void 430 dev_dispatch_vm(int fd, short event, void *arg) 431 { 432 struct virtio_dev *dev = (struct virtio_dev *)arg; 433 struct imsgev *iev = &dev->async_iev; 434 struct imsgbuf *ibuf = &iev->ibuf; 435 struct imsg imsg; 436 ssize_t n = 0; 437 int verbose; 438 439 if (event & EV_READ) { 440 if ((n = imsgbuf_read(ibuf)) == -1) 441 fatal("%s: imsgbuf_read", __func__); 442 if (n == 0) { 443 /* this pipe is dead, so remove the event handler */ 444 log_debug("%s: pipe dead (EV_READ)", __func__); 445 event_del(&iev->ev); 446 event_loopexit(NULL); 447 return; 448 } 449 } 450 451 if (event & EV_WRITE) { 452 if (imsgbuf_write(ibuf) == -1) { 453 if (errno == EPIPE) { 454 /* this pipe is dead, remove the handler */ 455 log_debug("%s: pipe dead (EV_WRITE)", __func__); 456 event_del(&iev->ev); 457 event_loopexit(NULL); 458 return; 459 } 460 fatal("%s: imsgbuf_write", __func__); 461 } 462 } 463 464 for (;;) { 465 if ((n = imsg_get(ibuf, &imsg)) == -1) 466 fatal("%s: imsg_get", __func__); 467 if (n == 0) 468 break; 469 470 switch (imsg.hdr.type) { 471 case IMSG_VMDOP_PAUSE_VM: 472 log_debug("%s: pausing", __func__); 473 break; 474 case IMSG_VMDOP_UNPAUSE_VM: 475 log_debug("%s: unpausing", __func__); 476 break; 477 case IMSG_CTL_VERBOSE: 478 IMSG_SIZE_CHECK(&imsg, &verbose); 479 memcpy(&verbose, imsg.data, sizeof(verbose)); 480 log_setverbose(verbose); 481 break; 482 default: 483 log_warnx("%s: unhandled imsg type %d", __func__, 484 imsg.hdr.type); 485 break; 486 } 487 imsg_free(&imsg); 488 } 489 imsg_event_add(iev); 490 } 491 492 /* 493 * Synchronous IO handler. 494 * 495 */ 496 static void 497 handle_sync_io(int fd, short event, void *arg) 498 { 499 struct virtio_dev *dev = (struct virtio_dev *)arg; 500 struct imsgev *iev = &dev->sync_iev; 501 struct imsgbuf *ibuf = &iev->ibuf; 502 struct viodev_msg msg; 503 struct imsg imsg; 504 ssize_t n; 505 int8_t intr = INTR_STATE_NOOP; 506 507 if (event & EV_READ) { 508 if ((n = imsgbuf_read(ibuf)) == -1) 509 fatal("%s: imsgbuf_read", __func__); 510 if (n == 0) { 511 /* this pipe is dead, so remove the event handler */ 512 log_debug("%s: vioblk pipe dead (EV_READ)", __func__); 513 event_del(&iev->ev); 514 event_loopexit(NULL); 515 return; 516 } 517 } 518 519 if (event & EV_WRITE) { 520 if (imsgbuf_write(ibuf) == -1) { 521 if (errno == EPIPE) { 522 /* this pipe is dead, remove the handler */ 523 log_debug("%s: pipe dead (EV_WRITE)", __func__); 524 event_del(&iev->ev); 525 event_loopexit(NULL); 526 return; 527 } 528 fatal("%s: imsgbuf_write", __func__); 529 } 530 } 531 532 for (;;) { 533 if ((n = imsg_get(ibuf, &imsg)) == -1) 534 fatalx("%s: imsg_get (n=%ld)", __func__, n); 535 if (n == 0) 536 break; 537 538 /* Unpack our message. They ALL should be dev messeges! */ 539 IMSG_SIZE_CHECK(&imsg, &msg); 540 memcpy(&msg, imsg.data, sizeof(msg)); 541 imsg_free(&imsg); 542 543 switch (msg.type) { 544 case VIODEV_MSG_DUMP: 545 /* Dump device */ 546 n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev)); 547 if (n != sizeof(*dev)) { 548 log_warnx("%s: failed to dump vioblk device", 549 __func__); 550 break; 551 } 552 case VIODEV_MSG_IO_READ: 553 /* Read IO: make sure to send a reply */ 554 msg.data = handle_io_read(&msg, dev, &intr); 555 msg.data_valid = 1; 556 msg.state = intr; 557 imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 558 sizeof(msg)); 559 break; 560 case VIODEV_MSG_IO_WRITE: 561 /* Write IO: no reply needed */ 562 if (handle_io_write(&msg, dev) == 1) 563 virtio_assert_irq(dev, 0); 564 break; 565 case VIODEV_MSG_SHUTDOWN: 566 event_del(&dev->sync_iev.ev); 567 event_loopbreak(); 568 return; 569 default: 570 fatalx("%s: invalid msg type %d", __func__, msg.type); 571 } 572 } 573 imsg_event_add(iev); 574 } 575 576 static int 577 handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) 578 { 579 struct vioblk_dev *vioblk = &dev->vioblk; 580 uint32_t data = msg->data; 581 int intr = 0; 582 583 switch (msg->reg) { 584 case VIRTIO_CONFIG_DEVICE_FEATURES: 585 case VIRTIO_CONFIG_QUEUE_SIZE: 586 case VIRTIO_CONFIG_ISR_STATUS: 587 log_warnx("%s: illegal write %x to %s", __progname, data, 588 virtio_reg_name(msg->reg)); 589 break; 590 case VIRTIO_CONFIG_GUEST_FEATURES: 591 vioblk->cfg.guest_feature = data; 592 break; 593 case VIRTIO_CONFIG_QUEUE_PFN: 594 vioblk->cfg.queue_pfn = data; 595 vioblk_update_qa(vioblk); 596 break; 597 case VIRTIO_CONFIG_QUEUE_SELECT: 598 vioblk->cfg.queue_select = data; 599 vioblk_update_qs(vioblk); 600 break; 601 case VIRTIO_CONFIG_QUEUE_NOTIFY: 602 /* XXX We should be stricter about status checks. */ 603 if (!(vioblk->cfg.device_status & DEVICE_NEEDS_RESET)) { 604 vioblk->cfg.queue_notify = data; 605 if (vioblk_notifyq(vioblk)) 606 intr = 1; 607 } 608 break; 609 case VIRTIO_CONFIG_DEVICE_STATUS: 610 vioblk->cfg.device_status = data; 611 if (vioblk->cfg.device_status == 0) { 612 vioblk->cfg.guest_feature = 0; 613 vioblk->cfg.queue_pfn = 0; 614 vioblk_update_qa(vioblk); 615 vioblk->cfg.queue_size = 0; 616 vioblk_update_qs(vioblk); 617 vioblk->cfg.queue_select = 0; 618 vioblk->cfg.queue_notify = 0; 619 vioblk->cfg.isr_status = 0; 620 vioblk->vq[0].last_avail = 0; 621 vioblk->vq[0].notified_avail = 0; 622 virtio_deassert_irq(dev, msg->vcpu); 623 } 624 break; 625 default: 626 break; 627 } 628 return (intr); 629 } 630 631 static uint32_t 632 handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) 633 { 634 struct vioblk_dev *vioblk = &dev->vioblk; 635 uint8_t sz = msg->io_sz; 636 uint32_t data; 637 638 if (msg->data_valid) 639 data = msg->data; 640 else 641 data = 0; 642 643 switch (msg->reg) { 644 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 645 switch (sz) { 646 case 4: 647 data = (uint32_t)(vioblk->capacity); 648 break; 649 case 2: 650 data &= 0xFFFF0000; 651 data |= (uint32_t)(vioblk->capacity) & 0xFFFF; 652 break; 653 case 1: 654 data &= 0xFFFFFF00; 655 data |= (uint32_t)(vioblk->capacity) & 0xFF; 656 break; 657 } 658 /* XXX handle invalid sz */ 659 break; 660 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 661 if (sz == 1) { 662 data &= 0xFFFFFF00; 663 data |= (uint32_t)(vioblk->capacity >> 8) & 0xFF; 664 } 665 /* XXX handle invalid sz */ 666 break; 667 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 668 if (sz == 1) { 669 data &= 0xFFFFFF00; 670 data |= (uint32_t)(vioblk->capacity >> 16) & 0xFF; 671 } else if (sz == 2) { 672 data &= 0xFFFF0000; 673 data |= (uint32_t)(vioblk->capacity >> 16) & 0xFFFF; 674 } 675 /* XXX handle invalid sz */ 676 break; 677 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 678 if (sz == 1) { 679 data &= 0xFFFFFF00; 680 data |= (uint32_t)(vioblk->capacity >> 24) & 0xFF; 681 } 682 /* XXX handle invalid sz */ 683 break; 684 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 685 switch (sz) { 686 case 4: 687 data = (uint32_t)(vioblk->capacity >> 32); 688 break; 689 case 2: 690 data &= 0xFFFF0000; 691 data |= (uint32_t)(vioblk->capacity >> 32) & 0xFFFF; 692 break; 693 case 1: 694 data &= 0xFFFFFF00; 695 data |= (uint32_t)(vioblk->capacity >> 32) & 0xFF; 696 break; 697 } 698 /* XXX handle invalid sz */ 699 break; 700 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 701 if (sz == 1) { 702 data &= 0xFFFFFF00; 703 data |= (uint32_t)(vioblk->capacity >> 40) & 0xFF; 704 } 705 /* XXX handle invalid sz */ 706 break; 707 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: 708 if (sz == 1) { 709 data &= 0xFFFFFF00; 710 data |= (uint32_t)(vioblk->capacity >> 48) & 0xFF; 711 } else if (sz == 2) { 712 data &= 0xFFFF0000; 713 data |= (uint32_t)(vioblk->capacity >> 48) & 0xFFFF; 714 } 715 /* XXX handle invalid sz */ 716 break; 717 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: 718 if (sz == 1) { 719 data &= 0xFFFFFF00; 720 data |= (uint32_t)(vioblk->capacity >> 56) & 0xFF; 721 } 722 /* XXX handle invalid sz */ 723 break; 724 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: 725 switch (sz) { 726 case 4: 727 data = (uint32_t)(vioblk->seg_max); 728 break; 729 case 2: 730 data &= 0xFFFF0000; 731 data |= (uint32_t)(vioblk->seg_max) & 0xFFFF; 732 break; 733 case 1: 734 data &= 0xFFFFFF00; 735 data |= (uint32_t)(vioblk->seg_max) & 0xFF; 736 break; 737 } 738 /* XXX handle invalid sz */ 739 break; 740 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 13: 741 if (sz == 1) { 742 data &= 0xFFFFFF00; 743 data |= (uint32_t)(vioblk->seg_max >> 8) & 0xFF; 744 } 745 /* XXX handle invalid sz */ 746 break; 747 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 14: 748 if (sz == 1) { 749 data &= 0xFFFFFF00; 750 data |= (uint32_t)(vioblk->seg_max >> 16) & 0xFF; 751 } else if (sz == 2) { 752 data &= 0xFFFF0000; 753 data |= (uint32_t)(vioblk->seg_max >> 16) 754 & 0xFFFF; 755 } 756 /* XXX handle invalid sz */ 757 break; 758 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 15: 759 if (sz == 1) { 760 data &= 0xFFFFFF00; 761 data |= (uint32_t)(vioblk->seg_max >> 24) & 0xFF; 762 } 763 /* XXX handle invalid sz */ 764 break; 765 case VIRTIO_CONFIG_DEVICE_FEATURES: 766 data = vioblk->cfg.device_feature; 767 break; 768 case VIRTIO_CONFIG_GUEST_FEATURES: 769 data = vioblk->cfg.guest_feature; 770 break; 771 case VIRTIO_CONFIG_QUEUE_PFN: 772 data = vioblk->cfg.queue_pfn; 773 break; 774 case VIRTIO_CONFIG_QUEUE_SIZE: 775 data = vioblk->cfg.queue_size; 776 break; 777 case VIRTIO_CONFIG_QUEUE_SELECT: 778 data = vioblk->cfg.queue_select; 779 break; 780 case VIRTIO_CONFIG_QUEUE_NOTIFY: 781 data = vioblk->cfg.queue_notify; 782 break; 783 case VIRTIO_CONFIG_DEVICE_STATUS: 784 data = vioblk->cfg.device_status; 785 break; 786 case VIRTIO_CONFIG_ISR_STATUS: 787 data = vioblk->cfg.isr_status; 788 vioblk->cfg.isr_status = 0; 789 if (intr != NULL) 790 *intr = INTR_STATE_DEASSERT; 791 break; 792 default: 793 return (0xFFFFFFFF); 794 } 795 796 return (data); 797 } 798 799 /* 800 * Emulate read/write io. Walks the descriptor chain, collecting io work and 801 * then emulates the read or write. 802 * 803 * On success, returns bytes read/written. 804 * On error, returns -1 and descriptor (desc) remains at its current position. 805 */ 806 static ssize_t 807 vioblk_rw(struct vioblk_dev *dev, int is_write, off_t offset, 808 struct vring_desc *desc_tbl, struct vring_desc **desc) 809 { 810 struct iovec *iov = NULL; 811 ssize_t sz = 0; 812 size_t io_idx = 0; /* Index into iovec workqueue. */ 813 size_t xfer_sz = 0; /* Total accumulated io bytes. */ 814 815 do { 816 iov = &io_v[io_idx]; 817 818 /* 819 * Reads require writable descriptors. Writes require 820 * non-writeable descriptors. 821 */ 822 if ((!is_write) ^ DESC_WRITABLE(*desc)) { 823 log_warnx("%s: invalid descriptor for %s command", 824 __func__, is_write ? "write" : "read"); 825 return (-1); 826 } 827 828 /* Collect the IO segment information. */ 829 iov->iov_len = (size_t)(*desc)->len; 830 iov->iov_base = hvaddr_mem((*desc)->addr, iov->iov_len); 831 if (iov->iov_base == NULL) 832 return (-1); 833 834 /* Move our counters. */ 835 xfer_sz += iov->iov_len; 836 io_idx++; 837 838 /* Guard against infinite chains */ 839 if (io_idx >= nitems(io_v)) { 840 log_warnx("%s: descriptor table " 841 "invalid", __func__); 842 return (-1); 843 } 844 845 /* Advance to the next descriptor. */ 846 *desc = &desc_tbl[(*desc)->next & VIOBLK_QUEUE_MASK]; 847 } while ((*desc)->flags & VRING_DESC_F_NEXT); 848 849 /* 850 * Validate the requested block io operation alignment and size. 851 * Checking offset is just an extra caution as it is derived from 852 * a disk sector and is done for completeness in bounds checking. 853 */ 854 if (offset % VIRTIO_BLK_SECTOR_SIZE != 0 && 855 xfer_sz % VIRTIO_BLK_SECTOR_SIZE != 0) { 856 log_warnx("%s: unaligned read", __func__); 857 return (-1); 858 } 859 if (xfer_sz > SSIZE_MAX) { /* iovec_copyin limit */ 860 log_warnx("%s: invalid %s size: %zu", __func__, 861 is_write ? "write" : "read", xfer_sz); 862 return (-1); 863 } 864 865 /* Emulate the Read or Write operation. */ 866 if (is_write) 867 sz = dev->file.pwritev(dev->file.p, io_v, io_idx, offset); 868 else 869 sz = dev->file.preadv(dev->file.p, io_v, io_idx, offset); 870 if (sz != (ssize_t)xfer_sz) { 871 log_warnx("%s: %s failure at offset 0x%llx, xfer_sz=%zu, " 872 "sz=%ld", __func__, (is_write ? "write" : "read"), offset, 873 xfer_sz, sz); 874 return (-1); 875 } 876 877 return (sz); 878 } 879