1*8fbe85f5Skirill /* $OpenBSD: vioblk.c,v 1.21 2024/11/27 22:32:14 kirill Exp $ */ 23481ecdfSdv 33481ecdfSdv /* 43481ecdfSdv * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> 53481ecdfSdv * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 63481ecdfSdv * 73481ecdfSdv * Permission to use, copy, modify, and distribute this software for any 83481ecdfSdv * purpose with or without fee is hereby granted, provided that the above 93481ecdfSdv * copyright notice and this permission notice appear in all copies. 103481ecdfSdv * 113481ecdfSdv * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 123481ecdfSdv * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 133481ecdfSdv * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 143481ecdfSdv * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 153481ecdfSdv * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 163481ecdfSdv * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 173481ecdfSdv * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 183481ecdfSdv */ 1965bbee46Sjsg #include <stdint.h> 203481ecdfSdv 213481ecdfSdv #include <dev/pci/virtio_pcireg.h> 223481ecdfSdv #include <dev/pv/vioblkreg.h> 233481ecdfSdv #include <dev/pv/virtioreg.h> 243481ecdfSdv 253481ecdfSdv #include <errno.h> 263481ecdfSdv #include <event.h> 273481ecdfSdv #include <stdlib.h> 283481ecdfSdv #include <string.h> 293481ecdfSdv #include <unistd.h> 303481ecdfSdv 313481ecdfSdv #include "atomicio.h" 323481ecdfSdv #include "pci.h" 333481ecdfSdv #include "virtio.h" 343481ecdfSdv #include "vmd.h" 353481ecdfSdv 363481ecdfSdv extern char *__progname; 373481ecdfSdv extern struct vmd_vm *current_vm; 3820e554f8Sdv struct iovec io_v[VIOBLK_QUEUE_SIZE]; 393481ecdfSdv 403481ecdfSdv static const char *disk_type(int); 418761e6b4Sdv static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, 428761e6b4Sdv int8_t *); 433481ecdfSdv static int handle_io_write(struct viodev_msg *, struct virtio_dev *); 4420e554f8Sdv 4520e554f8Sdv static void vioblk_update_qs(struct vioblk_dev *); 4620e554f8Sdv static void vioblk_update_qa(struct vioblk_dev *); 4720e554f8Sdv static int vioblk_notifyq(struct vioblk_dev *); 4820e554f8Sdv static ssize_t vioblk_rw(struct vioblk_dev *, int, off_t, 4920e554f8Sdv struct vring_desc *, struct vring_desc **); 503481ecdfSdv 513481ecdfSdv static void dev_dispatch_vm(int, short, void *); 523481ecdfSdv static void handle_sync_io(int, short, void *); 533481ecdfSdv 543481ecdfSdv static const char * 553481ecdfSdv disk_type(int type) 563481ecdfSdv { 573481ecdfSdv switch (type) { 583481ecdfSdv case VMDF_RAW: return "raw"; 593481ecdfSdv case VMDF_QCOW2: return "qcow2"; 603481ecdfSdv } 613481ecdfSdv return "unknown"; 623481ecdfSdv } 633481ecdfSdv 643481ecdfSdv __dead void 653c817da7Sdv vioblk_main(int fd, int fd_vmm) 663481ecdfSdv { 673481ecdfSdv struct virtio_dev dev; 6808e69010Sjsg struct vioblk_dev *vioblk = NULL; 693481ecdfSdv struct viodev_msg msg; 703481ecdfSdv struct vmd_vm vm; 713481ecdfSdv struct vm_create_params *vcp; 723481ecdfSdv ssize_t sz; 733481ecdfSdv off_t szp = 0; 743481ecdfSdv int i, ret, type; 753481ecdfSdv 763c817da7Sdv /* 773c817da7Sdv * stdio - needed for read/write to disk fds and channels to the vm. 783c817da7Sdv * vmm + proc - needed to create shared vm mappings. 793c817da7Sdv */ 803c817da7Sdv if (pledge("stdio vmm proc", NULL) == -1) 813481ecdfSdv fatal("pledge"); 823481ecdfSdv 8320e554f8Sdv /* Zero and initialize io work queue. */ 8420e554f8Sdv memset(io_v, 0, nitems(io_v)*sizeof(io_v[0])); 8520e554f8Sdv 863481ecdfSdv /* Receive our virtio_dev, mostly preconfigured. */ 873481ecdfSdv memset(&dev, 0, sizeof(dev)); 883481ecdfSdv sz = atomicio(read, fd, &dev, sizeof(dev)); 893481ecdfSdv if (sz != sizeof(dev)) { 903481ecdfSdv ret = errno; 91d074e402Sdv log_warn("failed to receive vioblk"); 923481ecdfSdv goto fail; 933481ecdfSdv } 943481ecdfSdv if (dev.dev_type != VMD_DEVTYPE_DISK) { 953481ecdfSdv ret = EINVAL; 963481ecdfSdv log_warn("received invalid device type"); 973481ecdfSdv goto fail; 983481ecdfSdv } 993481ecdfSdv dev.sync_fd = fd; 1003481ecdfSdv vioblk = &dev.vioblk; 1013481ecdfSdv 1023481ecdfSdv log_debug("%s: got viblk dev. num disk fds = %d, sync fd = %d, " 10320e554f8Sdv "async fd = %d, capacity = %lld seg_max = %u, vmm fd = %d", 10420e554f8Sdv __func__, vioblk->ndisk_fd, dev.sync_fd, dev.async_fd, 10520e554f8Sdv vioblk->capacity, vioblk->seg_max, fd_vmm); 1063481ecdfSdv 1073481ecdfSdv /* Receive our vm information from the vm process. */ 1083481ecdfSdv memset(&vm, 0, sizeof(vm)); 1093481ecdfSdv sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm)); 1103481ecdfSdv if (sz != sizeof(vm)) { 1113481ecdfSdv ret = EIO; 1123481ecdfSdv log_warnx("failed to receive vm details"); 1133481ecdfSdv goto fail; 1143481ecdfSdv } 1153481ecdfSdv vcp = &vm.vm_params.vmc_params; 1163481ecdfSdv current_vm = &vm; 11708d0da61Sdv 11808d0da61Sdv setproctitle("%s/vioblk%d", vcp->vcp_name, vioblk->idx); 11908d0da61Sdv log_procinit("vm/%s/vioblk%d", vcp->vcp_name, vioblk->idx); 1203481ecdfSdv 1213481ecdfSdv /* Now that we have our vm information, we can remap memory. */ 1223c817da7Sdv ret = remap_guest_mem(&vm, fd_vmm); 1233481ecdfSdv if (ret) { 1243481ecdfSdv log_warnx("failed to remap guest memory"); 1253481ecdfSdv goto fail; 1263481ecdfSdv } 1273481ecdfSdv 1283c817da7Sdv /* 1293c817da7Sdv * We no longer need /dev/vmm access. 1303c817da7Sdv */ 1313c817da7Sdv close_fd(fd_vmm); 1323c817da7Sdv if (pledge("stdio", NULL) == -1) 1333c817da7Sdv fatal("pledge2"); 1343c817da7Sdv 1353481ecdfSdv /* Initialize the virtio block abstractions. */ 1363481ecdfSdv type = vm.vm_params.vmc_disktypes[vioblk->idx]; 1373481ecdfSdv switch (type) { 1383481ecdfSdv case VMDF_RAW: 1393481ecdfSdv ret = virtio_raw_init(&vioblk->file, &szp, vioblk->disk_fd, 1403481ecdfSdv vioblk->ndisk_fd); 1413481ecdfSdv break; 1423481ecdfSdv case VMDF_QCOW2: 1433481ecdfSdv ret = virtio_qcow2_init(&vioblk->file, &szp, vioblk->disk_fd, 1443481ecdfSdv vioblk->ndisk_fd); 1453481ecdfSdv break; 1463481ecdfSdv default: 1473481ecdfSdv log_warnx("invalid disk image type"); 1483481ecdfSdv goto fail; 1493481ecdfSdv } 1503481ecdfSdv if (ret || szp < 0) { 1513481ecdfSdv log_warnx("failed to init disk %s image", disk_type(type)); 1523481ecdfSdv goto fail; 1533481ecdfSdv } 15420e554f8Sdv vioblk->capacity = szp / 512; 15508d0da61Sdv log_debug("%s: initialized vioblk%d with %s image (capacity=%lld)", 15620e554f8Sdv __func__, vioblk->idx, disk_type(type), vioblk->capacity); 1573481ecdfSdv 1583481ecdfSdv /* If we're restoring hardware, reinitialize the virtqueue hva. */ 1593481ecdfSdv if (vm.vm_state & VM_STATE_RECEIVED) 1603481ecdfSdv vioblk_update_qa(vioblk); 1613481ecdfSdv 1623481ecdfSdv /* Initialize libevent so we can start wiring event handlers. */ 1633481ecdfSdv event_init(); 1643481ecdfSdv 1653481ecdfSdv /* Wire up an async imsg channel. */ 1663481ecdfSdv log_debug("%s: wiring in async vm event handler (fd=%d)", __func__, 1673481ecdfSdv dev.async_fd); 168a246f7a0Sdv if (vm_device_pipe(&dev, dev_dispatch_vm, NULL)) { 1693481ecdfSdv ret = EIO; 1703481ecdfSdv log_warnx("vm_device_pipe"); 1713481ecdfSdv goto fail; 1723481ecdfSdv } 1733481ecdfSdv 1743481ecdfSdv /* Configure our sync channel event handler. */ 1753481ecdfSdv log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, 1763481ecdfSdv dev.sync_fd); 1770a9d031fSclaudio if (imsgbuf_init(&dev.sync_iev.ibuf, dev.sync_fd) == -1) { 1780a9d031fSclaudio log_warn("imsgbuf_init"); 1790a9d031fSclaudio goto fail; 1800a9d031fSclaudio } 1810a9d031fSclaudio imsgbuf_allow_fdpass(&dev.sync_iev.ibuf); 1823481ecdfSdv dev.sync_iev.handler = handle_sync_io; 1833481ecdfSdv dev.sync_iev.data = &dev; 1843481ecdfSdv dev.sync_iev.events = EV_READ; 1853481ecdfSdv imsg_event_add(&dev.sync_iev); 1863481ecdfSdv 1873481ecdfSdv /* Send a ready message over the sync channel. */ 1883481ecdfSdv log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name); 1893481ecdfSdv memset(&msg, 0, sizeof(msg)); 1903481ecdfSdv msg.type = VIODEV_MSG_READY; 1913481ecdfSdv imsg_compose_event(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 1923481ecdfSdv sizeof(msg)); 1933481ecdfSdv 1943481ecdfSdv /* Send a ready message over the async channel. */ 1953481ecdfSdv log_debug("%s: sending heartbeat", __func__); 1963481ecdfSdv ret = imsg_compose_event(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 1973481ecdfSdv &msg, sizeof(msg)); 1983481ecdfSdv if (ret == -1) { 1993481ecdfSdv log_warnx("%s: failed to send async ready message!", __func__); 2003481ecdfSdv goto fail; 2013481ecdfSdv } 2023481ecdfSdv 2033481ecdfSdv /* Engage the event loop! */ 2043481ecdfSdv ret = event_dispatch(); 2053481ecdfSdv 2063481ecdfSdv if (ret == 0) { 2073481ecdfSdv /* Clean shutdown. */ 2083481ecdfSdv close_fd(dev.sync_fd); 2093481ecdfSdv close_fd(dev.async_fd); 21008e69010Sjsg for (i = 0; i < vioblk->ndisk_fd; i++) 2113481ecdfSdv close_fd(vioblk->disk_fd[i]); 2123481ecdfSdv _exit(0); 2133481ecdfSdv /* NOTREACHED */ 2143481ecdfSdv } 2153481ecdfSdv 2163481ecdfSdv fail: 2173481ecdfSdv /* Try letting the vm know we've failed something. */ 2183481ecdfSdv memset(&msg, 0, sizeof(msg)); 2193481ecdfSdv msg.type = VIODEV_MSG_ERROR; 2203481ecdfSdv msg.data = ret; 2213481ecdfSdv imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 2223481ecdfSdv sizeof(msg)); 223dd7efffeSclaudio imsgbuf_flush(&dev.sync_iev.ibuf); 2243481ecdfSdv 2253481ecdfSdv close_fd(dev.sync_fd); 2263481ecdfSdv close_fd(dev.async_fd); 22708e69010Sjsg if (vioblk != NULL) { 22808e69010Sjsg for (i = 0; i < vioblk->ndisk_fd; i++) 2293481ecdfSdv close_fd(vioblk->disk_fd[i]); 23008e69010Sjsg } 2313481ecdfSdv _exit(ret); 2323481ecdfSdv /* NOTREACHED */ 2333481ecdfSdv } 2343481ecdfSdv 2353481ecdfSdv const char * 2363481ecdfSdv vioblk_cmd_name(uint32_t type) 2373481ecdfSdv { 2383481ecdfSdv switch (type) { 2393481ecdfSdv case VIRTIO_BLK_T_IN: return "read"; 2403481ecdfSdv case VIRTIO_BLK_T_OUT: return "write"; 2413481ecdfSdv case VIRTIO_BLK_T_SCSI_CMD: return "scsi read"; 2423481ecdfSdv case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write"; 2433481ecdfSdv case VIRTIO_BLK_T_FLUSH: return "flush"; 2443481ecdfSdv case VIRTIO_BLK_T_FLUSH_OUT: return "flush out"; 2453481ecdfSdv case VIRTIO_BLK_T_GET_ID: return "get id"; 2463481ecdfSdv default: return "unknown"; 2473481ecdfSdv } 2483481ecdfSdv } 2493481ecdfSdv 25020e554f8Sdv static void 2513481ecdfSdv vioblk_update_qa(struct vioblk_dev *dev) 2523481ecdfSdv { 2533481ecdfSdv struct virtio_vq_info *vq_info; 2543481ecdfSdv void *hva = NULL; 2553481ecdfSdv 2563481ecdfSdv /* Invalid queue? */ 2573481ecdfSdv if (dev->cfg.queue_select > 0) 2583481ecdfSdv return; 2593481ecdfSdv 2603481ecdfSdv vq_info = &dev->vq[dev->cfg.queue_select]; 2613481ecdfSdv vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; 2623481ecdfSdv 2633481ecdfSdv hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE)); 2643481ecdfSdv if (hva == NULL) 2653481ecdfSdv fatal("vioblk_update_qa"); 2663481ecdfSdv vq_info->q_hva = hva; 2673481ecdfSdv } 2683481ecdfSdv 26920e554f8Sdv static void 2703481ecdfSdv vioblk_update_qs(struct vioblk_dev *dev) 2713481ecdfSdv { 2723481ecdfSdv struct virtio_vq_info *vq_info; 2733481ecdfSdv 2743481ecdfSdv /* Invalid queue? */ 2753481ecdfSdv if (dev->cfg.queue_select > 0) { 2763481ecdfSdv dev->cfg.queue_size = 0; 2773481ecdfSdv return; 2783481ecdfSdv } 2793481ecdfSdv 2803481ecdfSdv vq_info = &dev->vq[dev->cfg.queue_select]; 2813481ecdfSdv 2823481ecdfSdv /* Update queue pfn/size based on queue select */ 2833481ecdfSdv dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 2843481ecdfSdv dev->cfg.queue_size = vq_info->qs; 2853481ecdfSdv } 2863481ecdfSdv 2873481ecdfSdv /* 28820e554f8Sdv * Process virtqueue notifications. If an unrecoverable error occurs, puts 28920e554f8Sdv * device into a "needs reset" state. 29020e554f8Sdv * 29120e554f8Sdv * Returns 1 if an we need to assert an IRQ. 2923481ecdfSdv */ 29320e554f8Sdv static int 2943481ecdfSdv vioblk_notifyq(struct vioblk_dev *dev) 2953481ecdfSdv { 29620e554f8Sdv uint32_t cmd_len; 29720e554f8Sdv uint16_t idx, cmd_desc_idx; 2983481ecdfSdv uint8_t ds; 29920e554f8Sdv off_t offset; 30020e554f8Sdv ssize_t sz; 301f0a11786Sdv int is_write, notify = 0, i; 3023481ecdfSdv char *vr; 30320e554f8Sdv struct vring_desc *table, *desc; 3043481ecdfSdv struct vring_avail *avail; 3053481ecdfSdv struct vring_used *used; 30620e554f8Sdv struct virtio_blk_req_hdr *cmd; 3073481ecdfSdv struct virtio_vq_info *vq_info; 3083481ecdfSdv 3093481ecdfSdv /* Invalid queue? */ 3103481ecdfSdv if (dev->cfg.queue_notify > 0) 3113481ecdfSdv return (0); 3123481ecdfSdv 3133481ecdfSdv vq_info = &dev->vq[dev->cfg.queue_notify]; 31420e554f8Sdv idx = vq_info->last_avail; 3153481ecdfSdv vr = vq_info->q_hva; 3163481ecdfSdv if (vr == NULL) 3173481ecdfSdv fatalx("%s: null vring", __func__); 3183481ecdfSdv 31920e554f8Sdv /* Compute offsets in table of descriptors, avail ring, and used ring */ 32020e554f8Sdv table = (struct vring_desc *)(vr); 3213481ecdfSdv avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 3223481ecdfSdv used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 3233481ecdfSdv 32420e554f8Sdv while (idx != avail->idx) { 32520e554f8Sdv /* Retrieve Command descriptor. */ 32620e554f8Sdv cmd_desc_idx = avail->ring[idx & VIOBLK_QUEUE_MASK]; 32720e554f8Sdv desc = &table[cmd_desc_idx]; 32820e554f8Sdv cmd_len = desc->len; 3293481ecdfSdv 33020e554f8Sdv /* 33120e554f8Sdv * Validate Command descriptor. It should be chained to another 33220e554f8Sdv * descriptor and not be itself writable. 33320e554f8Sdv */ 33420e554f8Sdv if ((desc->flags & VRING_DESC_F_NEXT) == 0) { 33520e554f8Sdv log_warnx("%s: unchained cmd descriptor", __func__); 33620e554f8Sdv goto reset; 33720e554f8Sdv } 33820e554f8Sdv if (DESC_WRITABLE(desc)) { 33920e554f8Sdv log_warnx("%s: invalid cmd descriptor state", __func__); 34020e554f8Sdv goto reset; 3413481ecdfSdv } 3423481ecdfSdv 34320e554f8Sdv /* Retrieve the vioblk command request. */ 34420e554f8Sdv cmd = hvaddr_mem(desc->addr, sizeof(*cmd)); 34520e554f8Sdv if (cmd == NULL) 34620e554f8Sdv goto reset; 3473481ecdfSdv 34820e554f8Sdv /* Advance to the 2nd descriptor. */ 34920e554f8Sdv desc = &table[desc->next & VIOBLK_QUEUE_MASK]; 3503481ecdfSdv 35120e554f8Sdv /* Process each available command & chain. */ 35220e554f8Sdv switch (cmd->type) { 3533481ecdfSdv case VIRTIO_BLK_T_IN: 3543481ecdfSdv case VIRTIO_BLK_T_OUT: 35520e554f8Sdv /* Read (IN) & Write (OUT) */ 35620e554f8Sdv is_write = (cmd->type == VIRTIO_BLK_T_OUT) ? 1 : 0; 35720e554f8Sdv offset = cmd->sector * VIRTIO_BLK_SECTOR_SIZE; 35820e554f8Sdv sz = vioblk_rw(dev, is_write, offset, table, &desc); 35920e554f8Sdv if (sz == -1) 36020e554f8Sdv ds = VIRTIO_BLK_S_IOERR; 36120e554f8Sdv else 3623481ecdfSdv ds = VIRTIO_BLK_S_OK; 3633481ecdfSdv break; 3643481ecdfSdv case VIRTIO_BLK_T_GET_ID: 3653481ecdfSdv /* 3663481ecdfSdv * We don't support this command yet. While it's not 3673481ecdfSdv * officially part of the virtio spec (will be in v1.2) 3683481ecdfSdv * there's no feature to negotiate. Linux drivers will 3693481ecdfSdv * often send this command regardless. 3703481ecdfSdv */ 3713481ecdfSdv ds = VIRTIO_BLK_S_UNSUPP; 372*8fbe85f5Skirill break; 3733481ecdfSdv default: 37420e554f8Sdv log_warnx("%s: unsupported vioblk command %d", __func__, 37520e554f8Sdv cmd->type); 3763481ecdfSdv ds = VIRTIO_BLK_S_UNSUPP; 3773481ecdfSdv break; 3783481ecdfSdv } 3793481ecdfSdv 38020e554f8Sdv /* Advance to the end of the chain, if needed. */ 38120e554f8Sdv i = 0; 38220e554f8Sdv while (desc->flags & VRING_DESC_F_NEXT) { 38320e554f8Sdv desc = &table[desc->next & VIOBLK_QUEUE_MASK]; 38420e554f8Sdv if (++i >= VIOBLK_QUEUE_SIZE) { 38520e554f8Sdv /* 38620e554f8Sdv * If we encounter an infinite/looping chain, 38720e554f8Sdv * not much we can do but say we need a reset. 38820e554f8Sdv */ 38920e554f8Sdv log_warnx("%s: descriptor chain overflow", 39020e554f8Sdv __func__); 39120e554f8Sdv goto reset; 3923481ecdfSdv } 3933481ecdfSdv } 3943481ecdfSdv 39520e554f8Sdv /* Provide the status of our command processing. */ 39620e554f8Sdv if (!DESC_WRITABLE(desc)) { 39720e554f8Sdv log_warnx("%s: status descriptor unwritable", __func__); 39820e554f8Sdv goto reset; 39920e554f8Sdv } 40020e554f8Sdv /* Overkill as ds is 1 byte, but validates gpa. */ 40120e554f8Sdv if (write_mem(desc->addr, &ds, sizeof(ds))) 40220e554f8Sdv log_warnx("%s: can't write device status data " 40320e554f8Sdv "@ 0x%llx",__func__, desc->addr); 40420e554f8Sdv 40520e554f8Sdv dev->cfg.isr_status |= 1; 40620e554f8Sdv notify = 1; 40720e554f8Sdv 4083481ecdfSdv used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; 40920e554f8Sdv used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_len; 41020e554f8Sdv 4113481ecdfSdv __sync_synchronize(); 4123481ecdfSdv used->idx++; 41320e554f8Sdv idx++; 4143481ecdfSdv } 41520e554f8Sdv 41620e554f8Sdv vq_info->last_avail = idx; 41720e554f8Sdv return (notify); 41820e554f8Sdv 41920e554f8Sdv reset: 42020e554f8Sdv /* 42120e554f8Sdv * When setting the "needs reset" flag, the driver is notified 42220e554f8Sdv * via a configuration change interrupt. 42320e554f8Sdv */ 42420e554f8Sdv dev->cfg.device_status |= DEVICE_NEEDS_RESET; 42520e554f8Sdv dev->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 4263481ecdfSdv return (1); 4273481ecdfSdv } 4283481ecdfSdv 4293481ecdfSdv static void 4303481ecdfSdv dev_dispatch_vm(int fd, short event, void *arg) 4313481ecdfSdv { 4323481ecdfSdv struct virtio_dev *dev = (struct virtio_dev *)arg; 4333481ecdfSdv struct imsgev *iev = &dev->async_iev; 4343481ecdfSdv struct imsgbuf *ibuf = &iev->ibuf; 4353481ecdfSdv struct imsg imsg; 4363481ecdfSdv ssize_t n = 0; 43708d0da61Sdv int verbose; 4383481ecdfSdv 4393481ecdfSdv if (event & EV_READ) { 440d12ef5f3Sclaudio if ((n = imsgbuf_read(ibuf)) == -1) 441dd7efffeSclaudio fatal("%s: imsgbuf_read", __func__); 4423481ecdfSdv if (n == 0) { 4433481ecdfSdv /* this pipe is dead, so remove the event handler */ 4443481ecdfSdv log_debug("%s: pipe dead (EV_READ)", __func__); 4453481ecdfSdv event_del(&iev->ev); 4463481ecdfSdv event_loopexit(NULL); 4473481ecdfSdv return; 4483481ecdfSdv } 4493481ecdfSdv } 4503481ecdfSdv 4513481ecdfSdv if (event & EV_WRITE) { 452dd7efffeSclaudio if (imsgbuf_write(ibuf) == -1) { 453c1aa9554Sclaudio if (errno == EPIPE) { 454c1aa9554Sclaudio /* this pipe is dead, remove the handler */ 4553481ecdfSdv log_debug("%s: pipe dead (EV_WRITE)", __func__); 4563481ecdfSdv event_del(&iev->ev); 457c1aa9554Sclaudio event_loopexit(NULL); 4583481ecdfSdv return; 4593481ecdfSdv } 460dd7efffeSclaudio fatal("%s: imsgbuf_write", __func__); 461c1aa9554Sclaudio } 4623481ecdfSdv } 4633481ecdfSdv 4643481ecdfSdv for (;;) { 4653481ecdfSdv if ((n = imsg_get(ibuf, &imsg)) == -1) 4663481ecdfSdv fatal("%s: imsg_get", __func__); 4673481ecdfSdv if (n == 0) 4683481ecdfSdv break; 4693481ecdfSdv 4703481ecdfSdv switch (imsg.hdr.type) { 4713481ecdfSdv case IMSG_VMDOP_PAUSE_VM: 4723481ecdfSdv log_debug("%s: pausing", __func__); 4733481ecdfSdv break; 4743481ecdfSdv case IMSG_VMDOP_UNPAUSE_VM: 4753481ecdfSdv log_debug("%s: unpausing", __func__); 4763481ecdfSdv break; 47708d0da61Sdv case IMSG_CTL_VERBOSE: 47808d0da61Sdv IMSG_SIZE_CHECK(&imsg, &verbose); 47908d0da61Sdv memcpy(&verbose, imsg.data, sizeof(verbose)); 48008d0da61Sdv log_setverbose(verbose); 48108d0da61Sdv break; 4823481ecdfSdv default: 4833481ecdfSdv log_warnx("%s: unhandled imsg type %d", __func__, 4843481ecdfSdv imsg.hdr.type); 4853481ecdfSdv break; 4863481ecdfSdv } 4873481ecdfSdv imsg_free(&imsg); 4883481ecdfSdv } 4893481ecdfSdv imsg_event_add(iev); 4903481ecdfSdv } 4913481ecdfSdv 4923481ecdfSdv /* 4933481ecdfSdv * Synchronous IO handler. 4943481ecdfSdv * 4953481ecdfSdv */ 4963481ecdfSdv static void 4973481ecdfSdv handle_sync_io(int fd, short event, void *arg) 4983481ecdfSdv { 4993481ecdfSdv struct virtio_dev *dev = (struct virtio_dev *)arg; 5003481ecdfSdv struct imsgev *iev = &dev->sync_iev; 5013481ecdfSdv struct imsgbuf *ibuf = &iev->ibuf; 5023481ecdfSdv struct viodev_msg msg; 5033481ecdfSdv struct imsg imsg; 5043481ecdfSdv ssize_t n; 5058761e6b4Sdv int8_t intr = INTR_STATE_NOOP; 5063481ecdfSdv 5073481ecdfSdv if (event & EV_READ) { 508d12ef5f3Sclaudio if ((n = imsgbuf_read(ibuf)) == -1) 509dd7efffeSclaudio fatal("%s: imsgbuf_read", __func__); 5103481ecdfSdv if (n == 0) { 5113481ecdfSdv /* this pipe is dead, so remove the event handler */ 5123481ecdfSdv log_debug("%s: vioblk pipe dead (EV_READ)", __func__); 5133481ecdfSdv event_del(&iev->ev); 5143481ecdfSdv event_loopexit(NULL); 5153481ecdfSdv return; 5163481ecdfSdv } 5173481ecdfSdv } 5183481ecdfSdv 5193481ecdfSdv if (event & EV_WRITE) { 520dd7efffeSclaudio if (imsgbuf_write(ibuf) == -1) { 521c1aa9554Sclaudio if (errno == EPIPE) { 522c1aa9554Sclaudio /* this pipe is dead, remove the handler */ 523c1aa9554Sclaudio log_debug("%s: pipe dead (EV_WRITE)", __func__); 5243481ecdfSdv event_del(&iev->ev); 5253481ecdfSdv event_loopexit(NULL); 5263481ecdfSdv return; 5273481ecdfSdv } 528dd7efffeSclaudio fatal("%s: imsgbuf_write", __func__); 529c1aa9554Sclaudio } 5303481ecdfSdv } 5313481ecdfSdv 5323481ecdfSdv for (;;) { 5333481ecdfSdv if ((n = imsg_get(ibuf, &imsg)) == -1) 5343481ecdfSdv fatalx("%s: imsg_get (n=%ld)", __func__, n); 5353481ecdfSdv if (n == 0) 5363481ecdfSdv break; 5373481ecdfSdv 5383481ecdfSdv /* Unpack our message. They ALL should be dev messeges! */ 5393481ecdfSdv IMSG_SIZE_CHECK(&imsg, &msg); 5403481ecdfSdv memcpy(&msg, imsg.data, sizeof(msg)); 5413481ecdfSdv imsg_free(&imsg); 5423481ecdfSdv 5433481ecdfSdv switch (msg.type) { 5443481ecdfSdv case VIODEV_MSG_DUMP: 5453481ecdfSdv /* Dump device */ 5463481ecdfSdv n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev)); 5473481ecdfSdv if (n != sizeof(*dev)) { 5483481ecdfSdv log_warnx("%s: failed to dump vioblk device", 5493481ecdfSdv __func__); 5503481ecdfSdv break; 5513481ecdfSdv } 5523481ecdfSdv case VIODEV_MSG_IO_READ: 5533481ecdfSdv /* Read IO: make sure to send a reply */ 5548761e6b4Sdv msg.data = handle_io_read(&msg, dev, &intr); 5553481ecdfSdv msg.data_valid = 1; 5568761e6b4Sdv msg.state = intr; 5573481ecdfSdv imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 5583481ecdfSdv sizeof(msg)); 5593481ecdfSdv break; 5603481ecdfSdv case VIODEV_MSG_IO_WRITE: 5613481ecdfSdv /* Write IO: no reply needed */ 5623481ecdfSdv if (handle_io_write(&msg, dev) == 1) 563c4fd4c5bSdv virtio_assert_irq(dev, 0); 5643481ecdfSdv break; 5653481ecdfSdv case VIODEV_MSG_SHUTDOWN: 5663481ecdfSdv event_del(&dev->sync_iev.ev); 5673481ecdfSdv event_loopbreak(); 5683481ecdfSdv return; 5693481ecdfSdv default: 5703481ecdfSdv fatalx("%s: invalid msg type %d", __func__, msg.type); 5713481ecdfSdv } 5723481ecdfSdv } 5733481ecdfSdv imsg_event_add(iev); 5743481ecdfSdv } 5753481ecdfSdv 5763481ecdfSdv static int 5773481ecdfSdv handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) 5783481ecdfSdv { 5793481ecdfSdv struct vioblk_dev *vioblk = &dev->vioblk; 5803481ecdfSdv uint32_t data = msg->data; 5813481ecdfSdv int intr = 0; 5823481ecdfSdv 5833481ecdfSdv switch (msg->reg) { 5843481ecdfSdv case VIRTIO_CONFIG_DEVICE_FEATURES: 5853481ecdfSdv case VIRTIO_CONFIG_QUEUE_SIZE: 5863481ecdfSdv case VIRTIO_CONFIG_ISR_STATUS: 5873481ecdfSdv log_warnx("%s: illegal write %x to %s", __progname, data, 5883481ecdfSdv virtio_reg_name(msg->reg)); 5893481ecdfSdv break; 5903481ecdfSdv case VIRTIO_CONFIG_GUEST_FEATURES: 5913481ecdfSdv vioblk->cfg.guest_feature = data; 5923481ecdfSdv break; 5933481ecdfSdv case VIRTIO_CONFIG_QUEUE_PFN: 5943481ecdfSdv vioblk->cfg.queue_pfn = data; 5953481ecdfSdv vioblk_update_qa(vioblk); 5963481ecdfSdv break; 5973481ecdfSdv case VIRTIO_CONFIG_QUEUE_SELECT: 5983481ecdfSdv vioblk->cfg.queue_select = data; 5993481ecdfSdv vioblk_update_qs(vioblk); 6003481ecdfSdv break; 6013481ecdfSdv case VIRTIO_CONFIG_QUEUE_NOTIFY: 60220e554f8Sdv /* XXX We should be stricter about status checks. */ 60320e554f8Sdv if (!(vioblk->cfg.device_status & DEVICE_NEEDS_RESET)) { 6043481ecdfSdv vioblk->cfg.queue_notify = data; 6053481ecdfSdv if (vioblk_notifyq(vioblk)) 6063481ecdfSdv intr = 1; 60720e554f8Sdv } 6083481ecdfSdv break; 6093481ecdfSdv case VIRTIO_CONFIG_DEVICE_STATUS: 6103481ecdfSdv vioblk->cfg.device_status = data; 6113481ecdfSdv if (vioblk->cfg.device_status == 0) { 6123481ecdfSdv vioblk->cfg.guest_feature = 0; 6133481ecdfSdv vioblk->cfg.queue_pfn = 0; 6143481ecdfSdv vioblk_update_qa(vioblk); 6153481ecdfSdv vioblk->cfg.queue_size = 0; 6163481ecdfSdv vioblk_update_qs(vioblk); 6173481ecdfSdv vioblk->cfg.queue_select = 0; 6183481ecdfSdv vioblk->cfg.queue_notify = 0; 6193481ecdfSdv vioblk->cfg.isr_status = 0; 6203481ecdfSdv vioblk->vq[0].last_avail = 0; 6213481ecdfSdv vioblk->vq[0].notified_avail = 0; 622c4fd4c5bSdv virtio_deassert_irq(dev, msg->vcpu); 6233481ecdfSdv } 6243481ecdfSdv break; 6253481ecdfSdv default: 6263481ecdfSdv break; 6273481ecdfSdv } 6283481ecdfSdv return (intr); 6293481ecdfSdv } 6303481ecdfSdv 6313481ecdfSdv static uint32_t 6328761e6b4Sdv handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) 6333481ecdfSdv { 6343481ecdfSdv struct vioblk_dev *vioblk = &dev->vioblk; 6353481ecdfSdv uint8_t sz = msg->io_sz; 6363481ecdfSdv uint32_t data; 6373481ecdfSdv 6383481ecdfSdv if (msg->data_valid) 6393481ecdfSdv data = msg->data; 6403481ecdfSdv else 6413481ecdfSdv data = 0; 6423481ecdfSdv 6433481ecdfSdv switch (msg->reg) { 6443481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 6453481ecdfSdv switch (sz) { 6463481ecdfSdv case 4: 64720e554f8Sdv data = (uint32_t)(vioblk->capacity); 6483481ecdfSdv break; 6493481ecdfSdv case 2: 6503481ecdfSdv data &= 0xFFFF0000; 65120e554f8Sdv data |= (uint32_t)(vioblk->capacity) & 0xFFFF; 6523481ecdfSdv break; 6533481ecdfSdv case 1: 6543481ecdfSdv data &= 0xFFFFFF00; 65520e554f8Sdv data |= (uint32_t)(vioblk->capacity) & 0xFF; 6563481ecdfSdv break; 6573481ecdfSdv } 6583481ecdfSdv /* XXX handle invalid sz */ 6593481ecdfSdv break; 6603481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 6613481ecdfSdv if (sz == 1) { 6623481ecdfSdv data &= 0xFFFFFF00; 66320e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 8) & 0xFF; 6643481ecdfSdv } 6653481ecdfSdv /* XXX handle invalid sz */ 6663481ecdfSdv break; 6673481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 6683481ecdfSdv if (sz == 1) { 6693481ecdfSdv data &= 0xFFFFFF00; 67020e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 16) & 0xFF; 6713481ecdfSdv } else if (sz == 2) { 6723481ecdfSdv data &= 0xFFFF0000; 67320e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 16) & 0xFFFF; 6743481ecdfSdv } 6753481ecdfSdv /* XXX handle invalid sz */ 6763481ecdfSdv break; 6773481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 6783481ecdfSdv if (sz == 1) { 6793481ecdfSdv data &= 0xFFFFFF00; 68020e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 24) & 0xFF; 6813481ecdfSdv } 6823481ecdfSdv /* XXX handle invalid sz */ 6833481ecdfSdv break; 6843481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 6853481ecdfSdv switch (sz) { 6863481ecdfSdv case 4: 68720e554f8Sdv data = (uint32_t)(vioblk->capacity >> 32); 6883481ecdfSdv break; 6893481ecdfSdv case 2: 6903481ecdfSdv data &= 0xFFFF0000; 69120e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 32) & 0xFFFF; 6923481ecdfSdv break; 6933481ecdfSdv case 1: 6943481ecdfSdv data &= 0xFFFFFF00; 69520e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 32) & 0xFF; 6963481ecdfSdv break; 6973481ecdfSdv } 6983481ecdfSdv /* XXX handle invalid sz */ 6993481ecdfSdv break; 7003481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 7013481ecdfSdv if (sz == 1) { 7023481ecdfSdv data &= 0xFFFFFF00; 70320e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 40) & 0xFF; 7043481ecdfSdv } 7053481ecdfSdv /* XXX handle invalid sz */ 7063481ecdfSdv break; 7073481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6: 7083481ecdfSdv if (sz == 1) { 7093481ecdfSdv data &= 0xFFFFFF00; 71020e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 48) & 0xFF; 7113481ecdfSdv } else if (sz == 2) { 7123481ecdfSdv data &= 0xFFFF0000; 71320e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 48) & 0xFFFF; 7143481ecdfSdv } 7153481ecdfSdv /* XXX handle invalid sz */ 7163481ecdfSdv break; 7173481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7: 7183481ecdfSdv if (sz == 1) { 7193481ecdfSdv data &= 0xFFFFFF00; 72020e554f8Sdv data |= (uint32_t)(vioblk->capacity >> 56) & 0xFF; 7213481ecdfSdv } 7223481ecdfSdv /* XXX handle invalid sz */ 7233481ecdfSdv break; 72420e554f8Sdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: 7253481ecdfSdv switch (sz) { 7263481ecdfSdv case 4: 72720e554f8Sdv data = (uint32_t)(vioblk->seg_max); 7283481ecdfSdv break; 7293481ecdfSdv case 2: 7303481ecdfSdv data &= 0xFFFF0000; 73120e554f8Sdv data |= (uint32_t)(vioblk->seg_max) & 0xFFFF; 7323481ecdfSdv break; 7333481ecdfSdv case 1: 7343481ecdfSdv data &= 0xFFFFFF00; 73520e554f8Sdv data |= (uint32_t)(vioblk->seg_max) & 0xFF; 7363481ecdfSdv break; 7373481ecdfSdv } 7383481ecdfSdv /* XXX handle invalid sz */ 7393481ecdfSdv break; 74020e554f8Sdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 13: 7413481ecdfSdv if (sz == 1) { 7423481ecdfSdv data &= 0xFFFFFF00; 74320e554f8Sdv data |= (uint32_t)(vioblk->seg_max >> 8) & 0xFF; 7443481ecdfSdv } 7453481ecdfSdv /* XXX handle invalid sz */ 7463481ecdfSdv break; 74720e554f8Sdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 14: 7483481ecdfSdv if (sz == 1) { 7493481ecdfSdv data &= 0xFFFFFF00; 75020e554f8Sdv data |= (uint32_t)(vioblk->seg_max >> 16) & 0xFF; 7513481ecdfSdv } else if (sz == 2) { 7523481ecdfSdv data &= 0xFFFF0000; 75320e554f8Sdv data |= (uint32_t)(vioblk->seg_max >> 16) 7543481ecdfSdv & 0xFFFF; 7553481ecdfSdv } 7563481ecdfSdv /* XXX handle invalid sz */ 7573481ecdfSdv break; 75820e554f8Sdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 15: 7593481ecdfSdv if (sz == 1) { 7603481ecdfSdv data &= 0xFFFFFF00; 76120e554f8Sdv data |= (uint32_t)(vioblk->seg_max >> 24) & 0xFF; 7623481ecdfSdv } 7633481ecdfSdv /* XXX handle invalid sz */ 7643481ecdfSdv break; 7653481ecdfSdv case VIRTIO_CONFIG_DEVICE_FEATURES: 7663481ecdfSdv data = vioblk->cfg.device_feature; 7673481ecdfSdv break; 7683481ecdfSdv case VIRTIO_CONFIG_GUEST_FEATURES: 7693481ecdfSdv data = vioblk->cfg.guest_feature; 7703481ecdfSdv break; 7713481ecdfSdv case VIRTIO_CONFIG_QUEUE_PFN: 7723481ecdfSdv data = vioblk->cfg.queue_pfn; 7733481ecdfSdv break; 7743481ecdfSdv case VIRTIO_CONFIG_QUEUE_SIZE: 7753481ecdfSdv data = vioblk->cfg.queue_size; 7763481ecdfSdv break; 7773481ecdfSdv case VIRTIO_CONFIG_QUEUE_SELECT: 7783481ecdfSdv data = vioblk->cfg.queue_select; 7793481ecdfSdv break; 7803481ecdfSdv case VIRTIO_CONFIG_QUEUE_NOTIFY: 7813481ecdfSdv data = vioblk->cfg.queue_notify; 7823481ecdfSdv break; 7833481ecdfSdv case VIRTIO_CONFIG_DEVICE_STATUS: 7843481ecdfSdv data = vioblk->cfg.device_status; 7853481ecdfSdv break; 7863481ecdfSdv case VIRTIO_CONFIG_ISR_STATUS: 7873481ecdfSdv data = vioblk->cfg.isr_status; 7883481ecdfSdv vioblk->cfg.isr_status = 0; 7898761e6b4Sdv if (intr != NULL) 7908761e6b4Sdv *intr = INTR_STATE_DEASSERT; 7913481ecdfSdv break; 7923481ecdfSdv default: 7933481ecdfSdv return (0xFFFFFFFF); 7943481ecdfSdv } 7953481ecdfSdv 7963481ecdfSdv return (data); 7973481ecdfSdv } 79820e554f8Sdv 79920e554f8Sdv /* 80020e554f8Sdv * Emulate read/write io. Walks the descriptor chain, collecting io work and 80120e554f8Sdv * then emulates the read or write. 80220e554f8Sdv * 80320e554f8Sdv * On success, returns bytes read/written. 80420e554f8Sdv * On error, returns -1 and descriptor (desc) remains at its current position. 80520e554f8Sdv */ 80620e554f8Sdv static ssize_t 80720e554f8Sdv vioblk_rw(struct vioblk_dev *dev, int is_write, off_t offset, 80820e554f8Sdv struct vring_desc *desc_tbl, struct vring_desc **desc) 80920e554f8Sdv { 81020e554f8Sdv struct iovec *iov = NULL; 81120e554f8Sdv ssize_t sz = 0; 81220e554f8Sdv size_t io_idx = 0; /* Index into iovec workqueue. */ 81320e554f8Sdv size_t xfer_sz = 0; /* Total accumulated io bytes. */ 81420e554f8Sdv 81520e554f8Sdv do { 81620e554f8Sdv iov = &io_v[io_idx]; 81720e554f8Sdv 81820e554f8Sdv /* 81920e554f8Sdv * Reads require writable descriptors. Writes require 82020e554f8Sdv * non-writeable descriptors. 82120e554f8Sdv */ 82220e554f8Sdv if ((!is_write) ^ DESC_WRITABLE(*desc)) { 82320e554f8Sdv log_warnx("%s: invalid descriptor for %s command", 82420e554f8Sdv __func__, is_write ? "write" : "read"); 82520e554f8Sdv return (-1); 82620e554f8Sdv } 82720e554f8Sdv 82820e554f8Sdv /* Collect the IO segment information. */ 82920e554f8Sdv iov->iov_len = (size_t)(*desc)->len; 83020e554f8Sdv iov->iov_base = hvaddr_mem((*desc)->addr, iov->iov_len); 83120e554f8Sdv if (iov->iov_base == NULL) 83220e554f8Sdv return (-1); 83320e554f8Sdv 83420e554f8Sdv /* Move our counters. */ 83520e554f8Sdv xfer_sz += iov->iov_len; 83620e554f8Sdv io_idx++; 83720e554f8Sdv 83820e554f8Sdv /* Guard against infinite chains */ 83920e554f8Sdv if (io_idx >= nitems(io_v)) { 84020e554f8Sdv log_warnx("%s: descriptor table " 84120e554f8Sdv "invalid", __func__); 84220e554f8Sdv return (-1); 84320e554f8Sdv } 84420e554f8Sdv 84520e554f8Sdv /* Advance to the next descriptor. */ 84620e554f8Sdv *desc = &desc_tbl[(*desc)->next & VIOBLK_QUEUE_MASK]; 84720e554f8Sdv } while ((*desc)->flags & VRING_DESC_F_NEXT); 84820e554f8Sdv 84920e554f8Sdv /* 85020e554f8Sdv * Validate the requested block io operation alignment and size. 85120e554f8Sdv * Checking offset is just an extra caution as it is derived from 85220e554f8Sdv * a disk sector and is done for completeness in bounds checking. 85320e554f8Sdv */ 85420e554f8Sdv if (offset % VIRTIO_BLK_SECTOR_SIZE != 0 && 85520e554f8Sdv xfer_sz % VIRTIO_BLK_SECTOR_SIZE != 0) { 85620e554f8Sdv log_warnx("%s: unaligned read", __func__); 85720e554f8Sdv return (-1); 85820e554f8Sdv } 85920e554f8Sdv if (xfer_sz > SSIZE_MAX) { /* iovec_copyin limit */ 86020e554f8Sdv log_warnx("%s: invalid %s size: %zu", __func__, 86120e554f8Sdv is_write ? "write" : "read", xfer_sz); 86220e554f8Sdv return (-1); 86320e554f8Sdv } 86420e554f8Sdv 86520e554f8Sdv /* Emulate the Read or Write operation. */ 86620e554f8Sdv if (is_write) 86720e554f8Sdv sz = dev->file.pwritev(dev->file.p, io_v, io_idx, offset); 86820e554f8Sdv else 86920e554f8Sdv sz = dev->file.preadv(dev->file.p, io_v, io_idx, offset); 87020e554f8Sdv if (sz != (ssize_t)xfer_sz) { 87120e554f8Sdv log_warnx("%s: %s failure at offset 0x%llx, xfer_sz=%zu, " 87220e554f8Sdv "sz=%ld", __func__, (is_write ? "write" : "read"), offset, 87320e554f8Sdv xfer_sz, sz); 87420e554f8Sdv return (-1); 87520e554f8Sdv } 87620e554f8Sdv 87720e554f8Sdv return (sz); 87820e554f8Sdv } 879