1 /* $OpenBSD: vioblk.c,v 1.3 2023/05/13 23:15:28 dv Exp $ */ 2 3 /* 4 * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> 5 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include <sys/mman.h> 20 #include <sys/param.h> /* PAGE_SIZE */ 21 22 #include <dev/pci/virtio_pcireg.h> 23 #include <dev/pv/vioblkreg.h> 24 #include <dev/pv/virtioreg.h> 25 26 #include <errno.h> 27 #include <event.h> 28 #include <fcntl.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <unistd.h> 32 33 #include "atomicio.h" 34 #include "pci.h" 35 #include "virtio.h" 36 #include "vmd.h" 37 38 extern char *__progname; 39 extern struct vmd_vm *current_vm; 40 41 static const char *disk_type(int); 42 static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *); 43 static int handle_io_write(struct viodev_msg *, struct virtio_dev *); 44 void vioblk_notify_rx(struct vioblk_dev *); 45 int vioblk_notifyq(struct vioblk_dev *); 46 47 static void dev_dispatch_vm(int, short, void *); 48 static void handle_sync_io(int, short, void *); 49 50 static const char * 51 disk_type(int type) 52 { 53 switch (type) { 54 case VMDF_RAW: return "raw"; 55 case VMDF_QCOW2: return "qcow2"; 56 } 57 return "unknown"; 58 } 59 60 __dead void 61 vioblk_main(int fd, int fd_vmm) 62 { 63 struct virtio_dev dev; 64 struct vioblk_dev *vioblk; 65 struct viodev_msg msg; 66 struct vmd_vm vm; 67 struct vm_create_params *vcp; 68 ssize_t sz; 69 off_t szp = 0; 70 int i, ret, type; 71 72 log_procinit("vioblk"); 73 74 /* 75 * stdio - needed for read/write to disk fds and channels to the vm. 76 * vmm + proc - needed to create shared vm mappings. 77 */ 78 if (pledge("stdio vmm proc", NULL) == -1) 79 fatal("pledge"); 80 81 /* Receive our virtio_dev, mostly preconfigured. */ 82 memset(&dev, 0, sizeof(dev)); 83 sz = atomicio(read, fd, &dev, sizeof(dev)); 84 if (sz != sizeof(dev)) { 85 ret = errno; 86 log_warn("failed to receive vionet"); 87 goto fail; 88 } 89 if (dev.dev_type != VMD_DEVTYPE_DISK) { 90 ret = EINVAL; 91 log_warn("received invalid device type"); 92 goto fail; 93 } 94 dev.sync_fd = fd; 95 vioblk = &dev.vioblk; 96 97 log_debug("%s: got viblk dev. num disk fds = %d, sync fd = %d, " 98 "async fd = %d, sz = %lld maxfer = %d, vmm fd = %d", __func__, 99 vioblk->ndisk_fd, dev.sync_fd, dev.async_fd, vioblk->sz, 100 vioblk->max_xfer, fd_vmm); 101 102 /* Receive our vm information from the vm process. */ 103 memset(&vm, 0, sizeof(vm)); 104 sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm)); 105 if (sz != sizeof(vm)) { 106 ret = EIO; 107 log_warnx("failed to receive vm details"); 108 goto fail; 109 } 110 vcp = &vm.vm_params.vmc_params; 111 current_vm = &vm; 112 setproctitle("%s/vioblk[%d]", vcp->vcp_name, vioblk->idx); 113 114 /* Now that we have our vm information, we can remap memory. */ 115 ret = remap_guest_mem(&vm, fd_vmm); 116 if (ret) { 117 log_warnx("failed to remap guest memory"); 118 goto fail; 119 } 120 121 /* 122 * We no longer need /dev/vmm access. 123 */ 124 close_fd(fd_vmm); 125 if (pledge("stdio", NULL) == -1) 126 fatal("pledge2"); 127 128 /* Initialize the virtio block abstractions. */ 129 type = vm.vm_params.vmc_disktypes[vioblk->idx]; 130 switch (type) { 131 case VMDF_RAW: 132 ret = virtio_raw_init(&vioblk->file, &szp, vioblk->disk_fd, 133 vioblk->ndisk_fd); 134 break; 135 case VMDF_QCOW2: 136 ret = virtio_qcow2_init(&vioblk->file, &szp, vioblk->disk_fd, 137 vioblk->ndisk_fd); 138 break; 139 default: 140 log_warnx("invalid disk image type"); 141 goto fail; 142 } 143 if (ret || szp < 0) { 144 log_warnx("failed to init disk %s image", disk_type(type)); 145 goto fail; 146 } 147 vioblk->sz = szp; 148 log_debug("%s: initialized vioblk[%d] with %s image (sz=%lld)", 149 __func__, vioblk->idx, disk_type(type), vioblk->sz); 150 151 /* If we're restoring hardware, reinitialize the virtqueue hva. */ 152 if (vm.vm_state & VM_STATE_RECEIVED) 153 vioblk_update_qa(vioblk); 154 155 /* Initialize libevent so we can start wiring event handlers. */ 156 event_init(); 157 158 /* Wire up an async imsg channel. */ 159 log_debug("%s: wiring in async vm event handler (fd=%d)", __func__, 160 dev.async_fd); 161 if (vm_device_pipe(&dev, dev_dispatch_vm)) { 162 ret = EIO; 163 log_warnx("vm_device_pipe"); 164 goto fail; 165 } 166 167 /* Configure our sync channel event handler. */ 168 log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, 169 dev.sync_fd); 170 if (fcntl(dev.sync_fd, F_SETFL, O_NONBLOCK) == -1) { 171 ret = errno; 172 log_warn("%s: fcntl", __func__); 173 goto fail; 174 } 175 imsg_init(&dev.sync_iev.ibuf, dev.sync_fd); 176 dev.sync_iev.handler = handle_sync_io; 177 dev.sync_iev.data = &dev; 178 dev.sync_iev.events = EV_READ; 179 imsg_event_add(&dev.sync_iev); 180 181 /* Send a ready message over the sync channel. */ 182 log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name); 183 memset(&msg, 0, sizeof(msg)); 184 msg.type = VIODEV_MSG_READY; 185 imsg_compose_event(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 186 sizeof(msg)); 187 188 /* Send a ready message over the async channel. */ 189 log_debug("%s: sending heartbeat", __func__); 190 ret = imsg_compose_event(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 191 &msg, sizeof(msg)); 192 if (ret == -1) { 193 log_warnx("%s: failed to send async ready message!", __func__); 194 goto fail; 195 } 196 197 /* Engage the event loop! */ 198 ret = event_dispatch(); 199 200 if (ret == 0) { 201 /* Clean shutdown. */ 202 close_fd(dev.sync_fd); 203 close_fd(dev.async_fd); 204 for (i = 0; i < (int)sizeof(vioblk->disk_fd); i++) 205 close_fd(vioblk->disk_fd[i]); 206 _exit(0); 207 /* NOTREACHED */ 208 } 209 210 fail: 211 /* Try letting the vm know we've failed something. */ 212 memset(&msg, 0, sizeof(msg)); 213 msg.type = VIODEV_MSG_ERROR; 214 msg.data = ret; 215 imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 216 sizeof(msg)); 217 imsg_flush(&dev.sync_iev.ibuf); 218 219 close_fd(dev.sync_fd); 220 close_fd(dev.async_fd); 221 for (i = 0; i < (int)sizeof(vioblk->disk_fd); i++) 222 close_fd(vioblk->disk_fd[i]); 223 _exit(ret); 224 /* NOTREACHED */ 225 } 226 227 const char * 228 vioblk_cmd_name(uint32_t type) 229 { 230 switch (type) { 231 case VIRTIO_BLK_T_IN: return "read"; 232 case VIRTIO_BLK_T_OUT: return "write"; 233 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 234 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 235 case VIRTIO_BLK_T_FLUSH: return "flush"; 236 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 237 case VIRTIO_BLK_T_GET_ID: return "get id"; 238 default: return "unknown"; 239 } 240 } 241 242 void 243 vioblk_update_qa(struct vioblk_dev *dev) 244 { 245 struct virtio_vq_info *vq_info; 246 void *hva = NULL; 247 248 /* Invalid queue? */ 249 if (dev->cfg.queue_select > 0) 250 return; 251 252 vq_info = &dev->vq[dev->cfg.queue_select]; 253 vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; 254 255 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE)); 256 if (hva == NULL) 257 fatal("vioblk_update_qa"); 258 vq_info->q_hva = hva; 259 } 260 261 void 262 vioblk_update_qs(struct vioblk_dev *dev) 263 { 264 struct virtio_vq_info *vq_info; 265 266 /* Invalid queue? */ 267 if (dev->cfg.queue_select > 0) { 268 dev->cfg.queue_size = 0; 269 return; 270 } 271 272 vq_info = &dev->vq[dev->cfg.queue_select]; 273 274 /* Update queue pfn/size based on queue select */ 275 dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 276 dev->cfg.queue_size = vq_info->qs; 277 } 278 279 static void 280 vioblk_free_info(struct ioinfo *info) 281 { 282 if (!info) 283 return; 284 free(info->buf); 285 free(info); 286 } 287 288 static struct ioinfo * 289 vioblk_start_read(struct vioblk_dev *dev, off_t sector, size_t sz) 290 { 291 struct ioinfo *info; 292 293 /* Limit to 64M for now */ 294 if (sz > (1 << 26)) { 295 log_warnx("%s: read size exceeded 64M", __func__); 296 return (NULL); 297 } 298 299 info = calloc(1, sizeof(*info)); 300 if (!info) 301 goto nomem; 302 info->buf = malloc(sz); 303 if (info->buf == NULL) 304 goto nomem; 305 info->len = sz; 306 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 307 info->file = &dev->file; 308 return info; 309 310 nomem: 311 free(info); 312 log_warn("malloc error vioblk read"); 313 return (NULL); 314 } 315 316 317 static const uint8_t * 318 vioblk_finish_read(struct ioinfo *info) 319 { 320 struct virtio_backing *file; 321 322 file = info->file; 323 if (file == NULL || file->pread == NULL) { 324 log_warnx("%s: XXX null?!", __func__); 325 return NULL; 326 } 327 if (file->pread(file->p, info->buf, info->len, info->offset) != info->len) { 328 info->error = errno; 329 log_warn("vioblk read error"); 330 return NULL; 331 } 332 333 return info->buf; 334 } 335 336 static struct ioinfo * 337 vioblk_start_write(struct vioblk_dev *dev, off_t sector, 338 paddr_t addr, size_t len) 339 { 340 struct ioinfo *info; 341 342 /* Limit to 64M for now */ 343 if (len > (1 << 26)) { 344 log_warnx("%s: write size exceeded 64M", __func__); 345 return (NULL); 346 } 347 348 info = calloc(1, sizeof(*info)); 349 if (!info) 350 goto nomem; 351 352 info->buf = malloc(len); 353 if (info->buf == NULL) 354 goto nomem; 355 info->len = len; 356 info->offset = sector * VIRTIO_BLK_SECTOR_SIZE; 357 info->file = &dev->file; 358 359 if (read_mem(addr, info->buf, info->len)) { 360 vioblk_free_info(info); 361 return NULL; 362 } 363 364 return info; 365 366 nomem: 367 free(info); 368 log_warn("malloc error vioblk write"); 369 return (NULL); 370 } 371 372 static int 373 vioblk_finish_write(struct ioinfo *info) 374 { 375 struct virtio_backing *file; 376 377 file = info->file; 378 if (file->pwrite(file->p, info->buf, info->len, info->offset) != info->len) { 379 log_warn("vioblk write error"); 380 return EIO; 381 } 382 return 0; 383 } 384 385 /* 386 * XXX in various cases, ds should be set to VIRTIO_BLK_S_IOERR, if we can 387 */ 388 int 389 vioblk_notifyq(struct vioblk_dev *dev) 390 { 391 uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; 392 uint8_t ds; 393 int cnt; 394 off_t secbias; 395 char *vr; 396 struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; 397 struct vring_avail *avail; 398 struct vring_used *used; 399 struct virtio_blk_req_hdr cmd; 400 struct virtio_vq_info *vq_info; 401 402 /* Invalid queue? */ 403 if (dev->cfg.queue_notify > 0) 404 return (0); 405 406 vq_info = &dev->vq[dev->cfg.queue_notify]; 407 vr = vq_info->q_hva; 408 if (vr == NULL) 409 fatalx("%s: null vring", __func__); 410 411 /* Compute offsets in ring of descriptors, avail ring, and used ring */ 412 desc = (struct vring_desc *)(vr); 413 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 414 used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 415 416 idx = vq_info->last_avail & VIOBLK_QUEUE_MASK; 417 418 if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { 419 log_debug("%s - nothing to do?", __func__); 420 return (0); 421 } 422 423 while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { 424 425 ds = VIRTIO_BLK_S_IOERR; 426 cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; 427 cmd_desc = &desc[cmd_desc_idx]; 428 429 if ((cmd_desc->flags & VRING_DESC_F_NEXT) == 0) { 430 log_warnx("unchained vioblk cmd descriptor received " 431 "(idx %d)", cmd_desc_idx); 432 goto out; 433 } 434 435 /* Read command from descriptor ring */ 436 if (cmd_desc->flags & VRING_DESC_F_WRITE) { 437 log_warnx("vioblk: unexpected writable cmd descriptor " 438 "%d", cmd_desc_idx); 439 goto out; 440 } 441 if (read_mem(cmd_desc->addr, &cmd, sizeof(cmd))) { 442 log_warnx("vioblk: command read_mem error @ 0x%llx", 443 cmd_desc->addr); 444 goto out; 445 } 446 447 switch (cmd.type) { 448 case VIRTIO_BLK_T_IN: 449 /* first descriptor */ 450 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 451 secdata_desc = &desc[secdata_desc_idx]; 452 453 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 454 log_warnx("unchained vioblk data descriptor " 455 "received (idx %d)", cmd_desc_idx); 456 goto out; 457 } 458 459 cnt = 0; 460 secbias = 0; 461 do { 462 struct ioinfo *info; 463 const uint8_t *secdata; 464 465 if ((secdata_desc->flags & VRING_DESC_F_WRITE) 466 == 0) { 467 log_warnx("vioblk: unwritable data " 468 "descriptor %d", secdata_desc_idx); 469 goto out; 470 } 471 472 info = vioblk_start_read(dev, 473 cmd.sector + secbias, secdata_desc->len); 474 475 if (info == NULL) { 476 log_warnx("vioblk: can't start read"); 477 goto out; 478 } 479 480 /* read the data, use current data descriptor */ 481 secdata = vioblk_finish_read(info); 482 if (secdata == NULL) { 483 vioblk_free_info(info); 484 log_warnx("vioblk: block read error, " 485 "sector %lld", cmd.sector); 486 goto out; 487 } 488 489 if (write_mem(secdata_desc->addr, secdata, 490 secdata_desc->len)) { 491 log_warnx("can't write sector " 492 "data to gpa @ 0x%llx", 493 secdata_desc->addr); 494 vioblk_free_info(info); 495 goto out; 496 } 497 498 vioblk_free_info(info); 499 500 secbias += (secdata_desc->len / 501 VIRTIO_BLK_SECTOR_SIZE); 502 secdata_desc_idx = secdata_desc->next & 503 VIOBLK_QUEUE_MASK; 504 secdata_desc = &desc[secdata_desc_idx]; 505 506 /* Guard against infinite chains */ 507 if (++cnt >= VIOBLK_QUEUE_SIZE) { 508 log_warnx("%s: descriptor table " 509 "invalid", __func__); 510 goto out; 511 } 512 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 513 514 ds_desc_idx = secdata_desc_idx; 515 ds_desc = secdata_desc; 516 517 ds = VIRTIO_BLK_S_OK; 518 break; 519 case VIRTIO_BLK_T_OUT: 520 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 521 secdata_desc = &desc[secdata_desc_idx]; 522 523 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 524 log_warnx("wr vioblk: unchained vioblk data " 525 "descriptor received (idx %d)", 526 cmd_desc_idx); 527 goto out; 528 } 529 530 if (secdata_desc->len > dev->max_xfer) { 531 log_warnx("%s: invalid read size %d requested", 532 __func__, secdata_desc->len); 533 goto out; 534 } 535 536 cnt = 0; 537 secbias = 0; 538 do { 539 struct ioinfo *info; 540 541 if (secdata_desc->flags & VRING_DESC_F_WRITE) { 542 log_warnx("wr vioblk: unexpected " 543 "writable data descriptor %d", 544 secdata_desc_idx); 545 goto out; 546 } 547 548 info = vioblk_start_write(dev, 549 cmd.sector + secbias, 550 secdata_desc->addr, secdata_desc->len); 551 552 if (info == NULL) { 553 log_warnx("wr vioblk: can't read " 554 "sector data @ 0x%llx", 555 secdata_desc->addr); 556 goto out; 557 } 558 559 if (vioblk_finish_write(info)) { 560 log_warnx("wr vioblk: disk write " 561 "error"); 562 vioblk_free_info(info); 563 goto out; 564 } 565 566 vioblk_free_info(info); 567 568 secbias += secdata_desc->len / 569 VIRTIO_BLK_SECTOR_SIZE; 570 571 secdata_desc_idx = secdata_desc->next & 572 VIOBLK_QUEUE_MASK; 573 secdata_desc = &desc[secdata_desc_idx]; 574 575 /* Guard against infinite chains */ 576 if (++cnt >= VIOBLK_QUEUE_SIZE) { 577 log_warnx("%s: descriptor table " 578 "invalid", __func__); 579 goto out; 580 } 581 } while (secdata_desc->flags & VRING_DESC_F_NEXT); 582 583 ds_desc_idx = secdata_desc_idx; 584 ds_desc = secdata_desc; 585 586 ds = VIRTIO_BLK_S_OK; 587 break; 588 case VIRTIO_BLK_T_FLUSH: 589 case VIRTIO_BLK_T_FLUSH_OUT: 590 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 591 ds_desc = &desc[ds_desc_idx]; 592 593 ds = VIRTIO_BLK_S_UNSUPP; 594 break; 595 case VIRTIO_BLK_T_GET_ID: 596 secdata_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 597 secdata_desc = &desc[secdata_desc_idx]; 598 599 /* 600 * We don't support this command yet. While it's not 601 * officially part of the virtio spec (will be in v1.2) 602 * there's no feature to negotiate. Linux drivers will 603 * often send this command regardless. 604 * 605 * When the command is received, it should appear as a 606 * chain of 3 descriptors, similar to the IN/OUT 607 * commands. The middle descriptor should have have a 608 * length of VIRTIO_BLK_ID_BYTES bytes. 609 */ 610 if ((secdata_desc->flags & VRING_DESC_F_NEXT) == 0) { 611 log_warnx("id vioblk: unchained vioblk data " 612 "descriptor received (idx %d)", 613 cmd_desc_idx); 614 goto out; 615 } 616 617 /* Skip the data descriptor. */ 618 ds_desc_idx = secdata_desc->next & VIOBLK_QUEUE_MASK; 619 ds_desc = &desc[ds_desc_idx]; 620 621 ds = VIRTIO_BLK_S_UNSUPP; 622 break; 623 default: 624 log_warnx("%s: unsupported command 0x%x", __func__, 625 cmd.type); 626 ds_desc_idx = cmd_desc->next & VIOBLK_QUEUE_MASK; 627 ds_desc = &desc[ds_desc_idx]; 628 629 ds = VIRTIO_BLK_S_UNSUPP; 630 break; 631 } 632 633 if ((ds_desc->flags & VRING_DESC_F_WRITE) == 0) { 634 log_warnx("%s: ds descriptor %d unwritable", __func__, 635 ds_desc_idx); 636 goto out; 637 } 638 if (write_mem(ds_desc->addr, &ds, sizeof(ds))) { 639 log_warnx("%s: can't write device status data @ 0x%llx", 640 __func__, ds_desc->addr); 641 goto out; 642 } 643 644 dev->cfg.isr_status = 1; 645 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 646 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; 647 __sync_synchronize(); 648 used->idx++; 649 650 vq_info->last_avail = avail->idx & VIOBLK_QUEUE_MASK; 651 idx = (idx + 1) & VIOBLK_QUEUE_MASK; 652 } 653 out: 654 return (1); 655 } 656 657 static void 658 dev_dispatch_vm(int fd, short event, void *arg) 659 { 660 struct virtio_dev *dev = (struct virtio_dev *)arg; 661 struct imsgev *iev = &dev->async_iev; 662 struct imsgbuf *ibuf = &iev->ibuf; 663 struct imsg imsg; 664 ssize_t n = 0; 665 666 if (event & EV_READ) { 667 if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN) 668 fatal("%s: imsg_read", __func__); 669 if (n == 0) { 670 /* this pipe is dead, so remove the event handler */ 671 log_debug("%s: pipe dead (EV_READ)", __func__); 672 event_del(&iev->ev); 673 event_loopexit(NULL); 674 return; 675 } 676 } 677 678 if (event & EV_WRITE) { 679 if ((n = msgbuf_write(&ibuf->w)) == -1 && errno != EAGAIN) 680 fatal("%s: msgbuf_write", __func__); 681 if (n == 0) { 682 /* this pipe is dead, so remove the event handler */ 683 log_debug("%s: pipe dead (EV_WRITE)", __func__); 684 event_del(&iev->ev); 685 event_loopbreak(); 686 return; 687 } 688 } 689 690 for (;;) { 691 if ((n = imsg_get(ibuf, &imsg)) == -1) 692 fatal("%s: imsg_get", __func__); 693 if (n == 0) 694 break; 695 696 switch (imsg.hdr.type) { 697 case IMSG_VMDOP_PAUSE_VM: 698 log_debug("%s: pausing", __func__); 699 break; 700 case IMSG_VMDOP_UNPAUSE_VM: 701 log_debug("%s: unpausing", __func__); 702 break; 703 default: 704 log_warnx("%s: unhandled imsg type %d", __func__, 705 imsg.hdr.type); 706 break; 707 } 708 imsg_free(&imsg); 709 } 710 imsg_event_add(iev); 711 } 712 713 /* 714 * Synchronous IO handler. 715 * 716 */ 717 static void 718 handle_sync_io(int fd, short event, void *arg) 719 { 720 struct virtio_dev *dev = (struct virtio_dev *)arg; 721 struct imsgev *iev = &dev->sync_iev; 722 struct imsgbuf *ibuf = &iev->ibuf; 723 struct viodev_msg msg; 724 struct imsg imsg; 725 ssize_t n; 726 727 if (event & EV_READ) { 728 if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN) 729 fatal("%s: imsg_read", __func__); 730 if (n == 0) { 731 /* this pipe is dead, so remove the event handler */ 732 log_debug("%s: vioblk pipe dead (EV_READ)", __func__); 733 event_del(&iev->ev); 734 event_loopexit(NULL); 735 return; 736 } 737 } 738 739 if (event & EV_WRITE) { 740 if ((n = msgbuf_write(&ibuf->w)) == -1 && errno != EAGAIN) 741 fatal("%s: msgbuf_write", __func__); 742 if (n == 0) { 743 /* this pipe is dead, so remove the event handler */ 744 log_debug("%s: vioblk pipe dead (EV_WRITE)", __func__); 745 event_del(&iev->ev); 746 event_loopexit(NULL); 747 return; 748 } 749 } 750 751 for (;;) { 752 if ((n = imsg_get(ibuf, &imsg)) == -1) 753 fatalx("%s: imsg_get (n=%ld)", __func__, n); 754 if (n == 0) 755 break; 756 757 /* Unpack our message. They ALL should be dev messeges! */ 758 IMSG_SIZE_CHECK(&imsg, &msg); 759 memcpy(&msg, imsg.data, sizeof(msg)); 760 imsg_free(&imsg); 761 762 switch (msg.type) { 763 case VIODEV_MSG_DUMP: 764 /* Dump device */ 765 n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev)); 766 if (n != sizeof(*dev)) { 767 log_warnx("%s: failed to dump vioblk device", 768 __func__); 769 break; 770 } 771 case VIODEV_MSG_IO_READ: 772 /* Read IO: make sure to send a reply */ 773 msg.data = handle_io_read(&msg, dev); 774 msg.data_valid = 1; 775 imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 776 sizeof(msg)); 777 break; 778 case VIODEV_MSG_IO_WRITE: 779 /* Write IO: no reply needed */ 780 if (handle_io_write(&msg, dev) == 1) 781 virtio_assert_pic_irq(dev, 0); 782 break; 783 case VIODEV_MSG_SHUTDOWN: 784 event_del(&dev->sync_iev.ev); 785 event_loopbreak(); 786 return; 787 default: 788 fatalx("%s: invalid msg type %d", __func__, msg.type); 789 } 790 } 791 imsg_event_add(iev); 792 } 793 794 static int 795 handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) 796 { 797 struct vioblk_dev *vioblk = &dev->vioblk; 798 uint32_t data = msg->data; 799 int intr = 0; 800 801 switch (msg->reg) { 802 case VIRTIO_CONFIG_DEVICE_FEATURES: 803 case VIRTIO_CONFIG_QUEUE_SIZE: 804 case VIRTIO_CONFIG_ISR_STATUS: 805 log_warnx("%s: illegal write %x to %s", __progname, data, 806 virtio_reg_name(msg->reg)); 807 break; 808 case VIRTIO_CONFIG_GUEST_FEATURES: 809 vioblk->cfg.guest_feature = data; 810 break; 811 case VIRTIO_CONFIG_QUEUE_PFN: 812 vioblk->cfg.queue_pfn = data; 813 vioblk_update_qa(vioblk); 814 break; 815 case VIRTIO_CONFIG_QUEUE_SELECT: 816 vioblk->cfg.queue_select = data; 817 vioblk_update_qs(vioblk); 818 break; 819 case VIRTIO_CONFIG_QUEUE_NOTIFY: 820 vioblk->cfg.queue_notify = data; 821 if (vioblk_notifyq(vioblk)) 822 intr = 1; 823 break; 824 case VIRTIO_CONFIG_DEVICE_STATUS: 825 vioblk->cfg.device_status = data; 826 if (vioblk->cfg.device_status == 0) { 827 vioblk->cfg.guest_feature = 0; 828 vioblk->cfg.queue_pfn = 0; 829 vioblk_update_qa(vioblk); 830 vioblk->cfg.queue_size = 0; 831 vioblk_update_qs(vioblk); 832 vioblk->cfg.queue_select = 0; 833 vioblk->cfg.queue_notify = 0; 834 vioblk->cfg.isr_status = 0; 835 vioblk->vq[0].last_avail = 0; 836 vioblk->vq[0].notified_avail = 0; 837 virtio_deassert_pic_irq(dev, msg->vcpu); 838 } 839 break; 840 default: 841 break; 842 } 843 return (intr); 844 } 845 846 static uint32_t 847 handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev) 848 { 849 struct vioblk_dev *vioblk = &dev->vioblk; 850 uint8_t sz = msg->io_sz; 851 uint32_t data; 852 853 if (msg->data_valid) 854 data = msg->data; 855 else 856 data = 0; 857 858 switch (msg->reg) { 859 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 860 switch (sz) { 861 case 4: 862 data = (uint32_t)(vioblk->sz); 863 break; 864 case 2: 865 data &= 0xFFFF0000; 866 data |= (uint32_t)(vioblk->sz) & 0xFFFF; 867 break; 868 case 1: 869 data &= 0xFFFFFF00; 870 data |= (uint32_t)(vioblk->sz) & 0xFF; 871 break; 872 } 873 /* XXX handle invalid sz */ 874 break; 875 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 876 if (sz == 1) { 877 data &= 0xFFFFFF00; 878 data |= (uint32_t)(vioblk->sz >> 8) & 0xFF; 879 } 880 /* XXX handle invalid sz */ 881 break; 882 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 883 if (sz == 1) { 884 data &= 0xFFFFFF00; 885 data |= (uint32_t)(vioblk->sz >> 16) & 0xFF; 886 } else if (sz == 2) { 887 data &= 0xFFFF0000; 888 data |= (uint32_t)(vioblk->sz >> 16) & 0xFFFF; 889 } 890 /* XXX handle invalid sz */ 891 break; 892 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 893 if (sz == 1) { 894 data &= 0xFFFFFF00; 895 data |= (uint32_t)(vioblk->sz >> 24) & 0xFF; 896 } 897 /* XXX handle invalid sz */ 898 break; 899 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 900 switch (sz) { 901 case 4: 902 data = (uint32_t)(vioblk->sz >> 32); 903 break; 904 case 2: 905 data &= 0xFFFF0000; 906 data |= (uint32_t)(vioblk->sz >> 32) & 0xFFFF; 907 break; 908 case 1: 909 data &= 0xFFFFFF00; 910 data |= (uint32_t)(vioblk->sz >> 32) & 0xFF; 911 break; 912 } 913 /* XXX handle invalid sz */ 914 break; 915 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 916 if (sz == 1) { 917 data &= 0xFFFFFF00; 918 data |= (uint32_t)(vioblk->sz >> 40) & 0xFF; 919 } 920 /* XXX handle invalid sz */ 921 break; 922 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: 923 if (sz == 1) { 924 data &= 0xFFFFFF00; 925 data |= (uint32_t)(vioblk->sz >> 48) & 0xFF; 926 } else if (sz == 2) { 927 data &= 0xFFFF0000; 928 data |= (uint32_t)(vioblk->sz >> 48) & 0xFFFF; 929 } 930 /* XXX handle invalid sz */ 931 break; 932 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: 933 if (sz == 1) { 934 data &= 0xFFFFFF00; 935 data |= (uint32_t)(vioblk->sz >> 56) & 0xFF; 936 } 937 /* XXX handle invalid sz */ 938 break; 939 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: 940 switch (sz) { 941 case 4: 942 data = (uint32_t)(vioblk->max_xfer); 943 break; 944 case 2: 945 data &= 0xFFFF0000; 946 data |= (uint32_t)(vioblk->max_xfer) & 0xFFFF; 947 break; 948 case 1: 949 data &= 0xFFFFFF00; 950 data |= (uint32_t)(vioblk->max_xfer) & 0xFF; 951 break; 952 } 953 /* XXX handle invalid sz */ 954 break; 955 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 9: 956 if (sz == 1) { 957 data &= 0xFFFFFF00; 958 data |= (uint32_t)(vioblk->max_xfer >> 8) & 0xFF; 959 } 960 /* XXX handle invalid sz */ 961 break; 962 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 10: 963 if (sz == 1) { 964 data &= 0xFFFFFF00; 965 data |= (uint32_t)(vioblk->max_xfer >> 16) & 0xFF; 966 } else if (sz == 2) { 967 data &= 0xFFFF0000; 968 data |= (uint32_t)(vioblk->max_xfer >> 16) 969 & 0xFFFF; 970 } 971 /* XXX handle invalid sz */ 972 break; 973 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 11: 974 if (sz == 1) { 975 data &= 0xFFFFFF00; 976 data |= (uint32_t)(vioblk->max_xfer >> 24) & 0xFF; 977 } 978 /* XXX handle invalid sz */ 979 break; 980 case VIRTIO_CONFIG_DEVICE_FEATURES: 981 data = vioblk->cfg.device_feature; 982 break; 983 case VIRTIO_CONFIG_GUEST_FEATURES: 984 data = vioblk->cfg.guest_feature; 985 break; 986 case VIRTIO_CONFIG_QUEUE_PFN: 987 data = vioblk->cfg.queue_pfn; 988 break; 989 case VIRTIO_CONFIG_QUEUE_SIZE: 990 data = vioblk->cfg.queue_size; 991 break; 992 case VIRTIO_CONFIG_QUEUE_SELECT: 993 data = vioblk->cfg.queue_select; 994 break; 995 case VIRTIO_CONFIG_QUEUE_NOTIFY: 996 data = vioblk->cfg.queue_notify; 997 break; 998 case VIRTIO_CONFIG_DEVICE_STATUS: 999 data = vioblk->cfg.device_status; 1000 break; 1001 case VIRTIO_CONFIG_ISR_STATUS: 1002 data = vioblk->cfg.isr_status; 1003 vioblk->cfg.isr_status = 0; 1004 virtio_deassert_pic_irq(dev, 0); 1005 break; 1006 default: 1007 return (0xFFFFFFFF); 1008 } 1009 1010 return (data); 1011 } 1012