1*0a9d031fSclaudio /* $OpenBSD: vionet.c,v 1.22 2024/11/21 13:39:34 claudio Exp $ */ 23481ecdfSdv 33481ecdfSdv /* 43481ecdfSdv * Copyright (c) 2023 Dave Voutila <dv@openbsd.org> 53481ecdfSdv * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> 63481ecdfSdv * 73481ecdfSdv * Permission to use, copy, modify, and distribute this software for any 83481ecdfSdv * purpose with or without fee is hereby granted, provided that the above 93481ecdfSdv * copyright notice and this permission notice appear in all copies. 103481ecdfSdv * 113481ecdfSdv * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 123481ecdfSdv * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 133481ecdfSdv * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 143481ecdfSdv * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 153481ecdfSdv * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 163481ecdfSdv * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 173481ecdfSdv * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 183481ecdfSdv */ 194d307b04Sdv #include <sys/types.h> 203481ecdfSdv 213481ecdfSdv #include <dev/pci/virtio_pcireg.h> 223481ecdfSdv #include <dev/pv/virtioreg.h> 233481ecdfSdv 243481ecdfSdv #include <net/if.h> 253481ecdfSdv #include <netinet/in.h> 263481ecdfSdv #include <netinet/if_ether.h> 273481ecdfSdv 283481ecdfSdv #include <errno.h> 293481ecdfSdv #include <event.h> 303481ecdfSdv #include <fcntl.h> 31a246f7a0Sdv #include <pthread.h> 32a246f7a0Sdv #include <pthread_np.h> 333481ecdfSdv #include <stdlib.h> 343481ecdfSdv #include <string.h> 353481ecdfSdv #include <unistd.h> 363481ecdfSdv 373481ecdfSdv #include "atomicio.h" 383481ecdfSdv #include "virtio.h" 393481ecdfSdv #include "vmd.h" 403481ecdfSdv 413481ecdfSdv #define VIRTIO_NET_F_MAC (1 << 5) 423481ecdfSdv #define RXQ 0 433481ecdfSdv #define TXQ 1 443481ecdfSdv 453481ecdfSdv extern char *__progname; 463481ecdfSdv extern struct vmd_vm *current_vm; 473481ecdfSdv 484d307b04Sdv struct packet { 494d307b04Sdv uint8_t *buf; 504d307b04Sdv size_t len; 514d307b04Sdv }; 523481ecdfSdv 53a246f7a0Sdv static void *rx_run_loop(void *); 54a246f7a0Sdv static void *tx_run_loop(void *); 554d307b04Sdv static int vionet_rx(struct vionet_dev *, int); 564d307b04Sdv static ssize_t vionet_rx_copy(struct vionet_dev *, int, const struct iovec *, 574d307b04Sdv int, size_t); 584d307b04Sdv static ssize_t vionet_rx_zerocopy(struct vionet_dev *, int, 594d307b04Sdv const struct iovec *, int); 603481ecdfSdv static void vionet_rx_event(int, short, void *); 618761e6b4Sdv static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *, 628761e6b4Sdv int8_t *); 63a246f7a0Sdv static void handle_io_write(struct viodev_msg *, struct virtio_dev *); 64a246f7a0Sdv static int vionet_tx(struct virtio_dev *); 65a246f7a0Sdv static void vionet_notifyq(struct virtio_dev *); 663481ecdfSdv static void dev_dispatch_vm(int, short, void *); 673481ecdfSdv static void handle_sync_io(int, short, void *); 68a246f7a0Sdv static void read_pipe_main(int, short, void *); 69a246f7a0Sdv static void read_pipe_rx(int, short, void *); 70a246f7a0Sdv static void read_pipe_tx(int, short, void *); 71a246f7a0Sdv static void vionet_assert_pic_irq(struct virtio_dev *); 72a246f7a0Sdv static void vionet_deassert_pic_irq(struct virtio_dev *); 733481ecdfSdv 744d307b04Sdv /* Device Globals */ 754d307b04Sdv struct event ev_tap; 764d307b04Sdv struct event ev_inject; 77a246f7a0Sdv struct event_base *ev_base_main; 78a246f7a0Sdv struct event_base *ev_base_rx; 79a246f7a0Sdv struct event_base *ev_base_tx; 80a246f7a0Sdv pthread_t rx_thread; 81a246f7a0Sdv pthread_t tx_thread; 82a246f7a0Sdv struct vm_dev_pipe pipe_main; 83a246f7a0Sdv struct vm_dev_pipe pipe_rx; 84a246f7a0Sdv struct vm_dev_pipe pipe_tx; 854d307b04Sdv int pipe_inject[2]; 864d307b04Sdv #define READ 0 874d307b04Sdv #define WRITE 1 884d307b04Sdv struct iovec iov_rx[VIONET_QUEUE_SIZE]; 894d307b04Sdv struct iovec iov_tx[VIONET_QUEUE_SIZE]; 90a246f7a0Sdv pthread_rwlock_t lock = NULL; /* Guards device config state. */ 916b07697fSdv int resetting = 0; /* Transient reset state used to coordinate reset. */ 926b07697fSdv int rx_enabled = 0; /* 1: we expect to read the tap, 0: wait for notify. */ 934d307b04Sdv 943481ecdfSdv __dead void 953c817da7Sdv vionet_main(int fd, int fd_vmm) 963481ecdfSdv { 973481ecdfSdv struct virtio_dev dev; 983481ecdfSdv struct vionet_dev *vionet = NULL; 993481ecdfSdv struct viodev_msg msg; 1003481ecdfSdv struct vmd_vm vm; 1013481ecdfSdv struct vm_create_params *vcp; 1023481ecdfSdv ssize_t sz; 1033481ecdfSdv int ret; 1043481ecdfSdv 1053c817da7Sdv /* 1063c817da7Sdv * stdio - needed for read/write to disk fds and channels to the vm. 1073c817da7Sdv * vmm + proc - needed to create shared vm mappings. 1083c817da7Sdv */ 1093c817da7Sdv if (pledge("stdio vmm proc", NULL) == -1) 1103481ecdfSdv fatal("pledge"); 1113481ecdfSdv 1124d307b04Sdv /* Initialize iovec arrays. */ 1134d307b04Sdv memset(iov_rx, 0, sizeof(iov_rx)); 1144d307b04Sdv memset(iov_tx, 0, sizeof(iov_tx)); 1154d307b04Sdv 1163481ecdfSdv /* Receive our vionet_dev, mostly preconfigured. */ 1173481ecdfSdv sz = atomicio(read, fd, &dev, sizeof(dev)); 1183481ecdfSdv if (sz != sizeof(dev)) { 1193481ecdfSdv ret = errno; 1203481ecdfSdv log_warn("failed to receive vionet"); 1213481ecdfSdv goto fail; 1223481ecdfSdv } 1233481ecdfSdv if (dev.dev_type != VMD_DEVTYPE_NET) { 1243481ecdfSdv ret = EINVAL; 1253481ecdfSdv log_warn("received invalid device type"); 1263481ecdfSdv goto fail; 1273481ecdfSdv } 1283481ecdfSdv dev.sync_fd = fd; 1293481ecdfSdv vionet = &dev.vionet; 1303481ecdfSdv 1313c817da7Sdv log_debug("%s: got vionet dev. tap fd = %d, syncfd = %d, asyncfd = %d" 1323c817da7Sdv ", vmm fd = %d", __func__, vionet->data_fd, dev.sync_fd, 1333c817da7Sdv dev.async_fd, fd_vmm); 1343481ecdfSdv 1353481ecdfSdv /* Receive our vm information from the vm process. */ 1363481ecdfSdv memset(&vm, 0, sizeof(vm)); 1373481ecdfSdv sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm)); 1383481ecdfSdv if (sz != sizeof(vm)) { 1393481ecdfSdv ret = EIO; 1403481ecdfSdv log_warnx("failed to receive vm details"); 1413481ecdfSdv goto fail; 1423481ecdfSdv } 1433481ecdfSdv vcp = &vm.vm_params.vmc_params; 1443481ecdfSdv current_vm = &vm; 14508d0da61Sdv setproctitle("%s/vionet%d", vcp->vcp_name, vionet->idx); 14608d0da61Sdv log_procinit("vm/%s/vionet%d", vcp->vcp_name, vionet->idx); 1473481ecdfSdv 1483481ecdfSdv /* Now that we have our vm information, we can remap memory. */ 1493c817da7Sdv ret = remap_guest_mem(&vm, fd_vmm); 1503c817da7Sdv if (ret) { 1513c817da7Sdv fatal("%s: failed to remap", __func__); 1523481ecdfSdv goto fail; 1533c817da7Sdv } 1543c817da7Sdv 1553c817da7Sdv /* 1563c817da7Sdv * We no longer need /dev/vmm access. 1573c817da7Sdv */ 1583c817da7Sdv close_fd(fd_vmm); 1593c817da7Sdv if (pledge("stdio", NULL) == -1) 1603c817da7Sdv fatal("pledge2"); 1613481ecdfSdv 1623481ecdfSdv /* If we're restoring hardware, re-initialize virtqueue hva's. */ 1633481ecdfSdv if (vm.vm_state & VM_STATE_RECEIVED) { 1643481ecdfSdv struct virtio_vq_info *vq_info; 1653481ecdfSdv void *hva = NULL; 1663481ecdfSdv 1673481ecdfSdv vq_info = &dev.vionet.vq[TXQ]; 1683481ecdfSdv if (vq_info->q_gpa != 0) { 1693481ecdfSdv log_debug("%s: restoring TX virtqueue for gpa 0x%llx", 1703481ecdfSdv __func__, vq_info->q_gpa); 1713481ecdfSdv hva = hvaddr_mem(vq_info->q_gpa, 1723481ecdfSdv vring_size(VIONET_QUEUE_SIZE)); 1733481ecdfSdv if (hva == NULL) 1743481ecdfSdv fatalx("%s: hva == NULL", __func__); 1753481ecdfSdv vq_info->q_hva = hva; 1763481ecdfSdv } 1773481ecdfSdv 1783481ecdfSdv vq_info = &dev.vionet.vq[RXQ]; 1793481ecdfSdv if (vq_info->q_gpa != 0) { 1803481ecdfSdv log_debug("%s: restoring RX virtqueue for gpa 0x%llx", 1813481ecdfSdv __func__, vq_info->q_gpa); 1823481ecdfSdv hva = hvaddr_mem(vq_info->q_gpa, 1833481ecdfSdv vring_size(VIONET_QUEUE_SIZE)); 1843481ecdfSdv if (hva == NULL) 1853481ecdfSdv fatalx("%s: hva == NULL", __func__); 1863481ecdfSdv vq_info->q_hva = hva; 1873481ecdfSdv } 1883481ecdfSdv } 1893481ecdfSdv 1904d307b04Sdv /* Initialize our packet injection pipe. */ 1914d307b04Sdv if (pipe2(pipe_inject, O_NONBLOCK) == -1) { 1924d307b04Sdv log_warn("%s: injection pipe", __func__); 1934d307b04Sdv goto fail; 1944d307b04Sdv } 1954d307b04Sdv 196a246f7a0Sdv /* Initialize inter-thread communication channels. */ 197a246f7a0Sdv vm_pipe_init2(&pipe_main, read_pipe_main, &dev); 198a246f7a0Sdv vm_pipe_init2(&pipe_rx, read_pipe_rx, &dev); 199a246f7a0Sdv vm_pipe_init2(&pipe_tx, read_pipe_tx, &dev); 200a246f7a0Sdv 201a246f7a0Sdv /* Initialize RX and TX threads . */ 202a246f7a0Sdv ret = pthread_create(&rx_thread, NULL, rx_run_loop, &dev); 203a246f7a0Sdv if (ret) { 204a246f7a0Sdv errno = ret; 205a246f7a0Sdv log_warn("%s: failed to initialize rx thread", __func__); 206a246f7a0Sdv goto fail; 207a246f7a0Sdv } 208a246f7a0Sdv pthread_set_name_np(rx_thread, "rx"); 209a246f7a0Sdv ret = pthread_create(&tx_thread, NULL, tx_run_loop, &dev); 210a246f7a0Sdv if (ret) { 211a246f7a0Sdv errno = ret; 212a246f7a0Sdv log_warn("%s: failed to initialize tx thread", __func__); 213a246f7a0Sdv goto fail; 214a246f7a0Sdv } 215a246f7a0Sdv pthread_set_name_np(tx_thread, "tx"); 216a246f7a0Sdv 217a246f7a0Sdv /* Initialize our rwlock for guarding shared device state. */ 218a246f7a0Sdv ret = pthread_rwlock_init(&lock, NULL); 219a246f7a0Sdv if (ret) { 220a246f7a0Sdv errno = ret; 221a246f7a0Sdv log_warn("%s: failed to initialize rwlock", __func__); 222a246f7a0Sdv goto fail; 223a246f7a0Sdv } 224a246f7a0Sdv 2253481ecdfSdv /* Initialize libevent so we can start wiring event handlers. */ 226a246f7a0Sdv ev_base_main = event_base_new(); 227a246f7a0Sdv 228a246f7a0Sdv /* Add our handler for receiving messages from the RX/TX threads. */ 229a246f7a0Sdv event_base_set(ev_base_main, &pipe_main.read_ev); 230a246f7a0Sdv event_add(&pipe_main.read_ev, NULL); 2313481ecdfSdv 2323481ecdfSdv /* Wire up an async imsg channel. */ 2333481ecdfSdv log_debug("%s: wiring in async vm event handler (fd=%d)", __func__, 2343481ecdfSdv dev.async_fd); 235a246f7a0Sdv if (vm_device_pipe(&dev, dev_dispatch_vm, ev_base_main)) { 2363481ecdfSdv ret = EIO; 2373481ecdfSdv log_warnx("vm_device_pipe"); 2383481ecdfSdv goto fail; 2393481ecdfSdv } 2403481ecdfSdv 2413481ecdfSdv /* Configure our sync channel event handler. */ 2423481ecdfSdv log_debug("%s: wiring in sync channel handler (fd=%d)", __func__, 2433481ecdfSdv dev.sync_fd); 244*0a9d031fSclaudio if (imsgbuf_init(&dev.sync_iev.ibuf, dev.sync_fd) == -1) { 245*0a9d031fSclaudio log_warnx("imsgbuf_init"); 246*0a9d031fSclaudio goto fail; 247*0a9d031fSclaudio } 248*0a9d031fSclaudio imsgbuf_allow_fdpass(&dev.sync_iev.ibuf); 2493481ecdfSdv dev.sync_iev.handler = handle_sync_io; 2503481ecdfSdv dev.sync_iev.data = &dev; 2513481ecdfSdv dev.sync_iev.events = EV_READ; 252a246f7a0Sdv imsg_event_add2(&dev.sync_iev, ev_base_main); 2533481ecdfSdv 2543481ecdfSdv /* Send a ready message over the sync channel. */ 2553481ecdfSdv log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name); 2563481ecdfSdv memset(&msg, 0, sizeof(msg)); 2573481ecdfSdv msg.type = VIODEV_MSG_READY; 258a246f7a0Sdv imsg_compose_event2(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 259a246f7a0Sdv sizeof(msg), ev_base_main); 2603481ecdfSdv 2613481ecdfSdv /* Send a ready message over the async channel. */ 2623481ecdfSdv log_debug("%s: sending async ready message", __func__); 263a246f7a0Sdv ret = imsg_compose_event2(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 264a246f7a0Sdv &msg, sizeof(msg), ev_base_main); 2653481ecdfSdv if (ret == -1) { 2663481ecdfSdv log_warnx("%s: failed to send async ready message!", __func__); 2673481ecdfSdv goto fail; 2683481ecdfSdv } 2693481ecdfSdv 2703481ecdfSdv /* Engage the event loop! */ 271a246f7a0Sdv ret = event_base_dispatch(ev_base_main); 272a246f7a0Sdv event_base_free(ev_base_main); 273a246f7a0Sdv 274a246f7a0Sdv /* Try stopping the rx & tx threads cleanly by messaging them. */ 275a246f7a0Sdv vm_pipe_send(&pipe_rx, VIRTIO_THREAD_STOP); 276a246f7a0Sdv vm_pipe_send(&pipe_tx, VIRTIO_THREAD_STOP); 277a246f7a0Sdv 278a246f7a0Sdv /* Wait for threads to stop. */ 279a246f7a0Sdv pthread_join(rx_thread, NULL); 280a246f7a0Sdv pthread_join(tx_thread, NULL); 281a246f7a0Sdv pthread_rwlock_destroy(&lock); 2823481ecdfSdv 2833481ecdfSdv /* Cleanup */ 2843481ecdfSdv if (ret == 0) { 2853481ecdfSdv close_fd(dev.sync_fd); 2863481ecdfSdv close_fd(dev.async_fd); 2873481ecdfSdv close_fd(vionet->data_fd); 288a246f7a0Sdv close_fd(pipe_main.read); 289a246f7a0Sdv close_fd(pipe_main.write); 290a246f7a0Sdv close_fd(pipe_rx.write); 291a246f7a0Sdv close_fd(pipe_tx.write); 2924d307b04Sdv close_fd(pipe_inject[READ]); 2934d307b04Sdv close_fd(pipe_inject[WRITE]); 2943481ecdfSdv _exit(ret); 2953481ecdfSdv /* NOTREACHED */ 2963481ecdfSdv } 2973481ecdfSdv fail: 2983481ecdfSdv /* Try firing off a message to the vm saying we're dying. */ 2993481ecdfSdv memset(&msg, 0, sizeof(msg)); 3003481ecdfSdv msg.type = VIODEV_MSG_ERROR; 3013481ecdfSdv msg.data = ret; 3023481ecdfSdv imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 3033481ecdfSdv sizeof(msg)); 304dd7efffeSclaudio imsgbuf_flush(&dev.sync_iev.ibuf); 3053481ecdfSdv 3063481ecdfSdv close_fd(dev.sync_fd); 3073481ecdfSdv close_fd(dev.async_fd); 3084d307b04Sdv close_fd(pipe_inject[READ]); 3094d307b04Sdv close_fd(pipe_inject[WRITE]); 3103481ecdfSdv if (vionet != NULL) 3113481ecdfSdv close_fd(vionet->data_fd); 312a246f7a0Sdv if (lock != NULL) 313a246f7a0Sdv pthread_rwlock_destroy(&lock); 3143481ecdfSdv _exit(ret); 3153481ecdfSdv } 3163481ecdfSdv 3173481ecdfSdv /* 3183481ecdfSdv * Update the gpa and hva of the virtqueue. 3193481ecdfSdv */ 3204d307b04Sdv static void 3213481ecdfSdv vionet_update_qa(struct vionet_dev *dev) 3223481ecdfSdv { 3233481ecdfSdv struct virtio_vq_info *vq_info; 3243481ecdfSdv void *hva = NULL; 3253481ecdfSdv 3263481ecdfSdv /* Invalid queue? */ 3273481ecdfSdv if (dev->cfg.queue_select > 1) 3283481ecdfSdv return; 3293481ecdfSdv 3303481ecdfSdv vq_info = &dev->vq[dev->cfg.queue_select]; 3313481ecdfSdv vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; 3323481ecdfSdv dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 3333481ecdfSdv 3343481ecdfSdv if (vq_info->q_gpa == 0) 3353481ecdfSdv vq_info->q_hva = NULL; 3363481ecdfSdv 3373481ecdfSdv hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE)); 3383481ecdfSdv if (hva == NULL) 3393481ecdfSdv fatalx("%s: hva == NULL", __func__); 3403481ecdfSdv 3413481ecdfSdv vq_info->q_hva = hva; 3423481ecdfSdv } 3433481ecdfSdv 3443481ecdfSdv /* 3453481ecdfSdv * Update the queue size. 3463481ecdfSdv */ 3474d307b04Sdv static void 3483481ecdfSdv vionet_update_qs(struct vionet_dev *dev) 3493481ecdfSdv { 3503481ecdfSdv struct virtio_vq_info *vq_info; 3513481ecdfSdv 3523481ecdfSdv /* Invalid queue? */ 3533481ecdfSdv if (dev->cfg.queue_select > 1) { 3543481ecdfSdv log_warnx("%s: !!! invalid queue selector %d", __func__, 3553481ecdfSdv dev->cfg.queue_select); 3563481ecdfSdv dev->cfg.queue_size = 0; 3573481ecdfSdv return; 3583481ecdfSdv } 3593481ecdfSdv 3603481ecdfSdv vq_info = &dev->vq[dev->cfg.queue_select]; 3613481ecdfSdv 3623481ecdfSdv /* Update queue pfn/size based on queue select */ 3633481ecdfSdv dev->cfg.queue_pfn = vq_info->q_gpa >> 12; 3643481ecdfSdv dev->cfg.queue_size = vq_info->qs; 3653481ecdfSdv } 3663481ecdfSdv 3673481ecdfSdv /* 3684d307b04Sdv * vionet_rx 3693481ecdfSdv * 3704d307b04Sdv * Pull packet from the provided fd and fill the receive-side virtqueue. We 3714d307b04Sdv * selectively use zero-copy approaches when possible. 3724d307b04Sdv * 3734d307b04Sdv * Returns 1 if guest notification is needed. Otherwise, returns -1 on failure 3744d307b04Sdv * or 0 if no notification is needed. 3753481ecdfSdv */ 3764d307b04Sdv static int 3774d307b04Sdv vionet_rx(struct vionet_dev *dev, int fd) 3783481ecdfSdv { 3794d307b04Sdv uint16_t idx, hdr_idx; 3803481ecdfSdv char *vr = NULL; 3814d307b04Sdv size_t chain_len = 0, iov_cnt; 3824d307b04Sdv struct vring_desc *desc, *table; 3833481ecdfSdv struct vring_avail *avail; 3843481ecdfSdv struct vring_used *used; 3853481ecdfSdv struct virtio_vq_info *vq_info; 3864d307b04Sdv struct iovec *iov; 3874d307b04Sdv int notify = 0; 3884d307b04Sdv ssize_t sz; 389a246f7a0Sdv uint8_t status = 0; 3903481ecdfSdv 391a246f7a0Sdv status = dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; 392a246f7a0Sdv if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) { 3933481ecdfSdv log_warnx("%s: driver not ready", __func__); 3943481ecdfSdv return (0); 3953481ecdfSdv } 3963481ecdfSdv 3973481ecdfSdv vq_info = &dev->vq[RXQ]; 3984d307b04Sdv idx = vq_info->last_avail; 3993481ecdfSdv vr = vq_info->q_hva; 4003481ecdfSdv if (vr == NULL) 4013481ecdfSdv fatalx("%s: vr == NULL", __func__); 4023481ecdfSdv 4033481ecdfSdv /* Compute offsets in ring of descriptors, avail ring, and used ring */ 4044d307b04Sdv table = (struct vring_desc *)(vr); 4053481ecdfSdv avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 4063481ecdfSdv used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 4074d307b04Sdv used->flags |= VRING_USED_F_NO_NOTIFY; 4083481ecdfSdv 4094d307b04Sdv while (idx != avail->idx) { 4104d307b04Sdv hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK]; 4114d307b04Sdv desc = &table[hdr_idx & VIONET_QUEUE_MASK]; 4124d307b04Sdv if (!DESC_WRITABLE(desc)) { 4134d307b04Sdv log_warnx("%s: invalid descriptor state", __func__); 4144d307b04Sdv goto reset; 4153481ecdfSdv } 4163481ecdfSdv 4174d307b04Sdv iov = &iov_rx[0]; 4184d307b04Sdv iov_cnt = 1; 4193481ecdfSdv 4204d307b04Sdv /* 4214d307b04Sdv * First descriptor should be at least as large as the 4224d307b04Sdv * virtio_net_hdr. It's not technically required, but in 4234d307b04Sdv * legacy devices it should be safe to assume. 4244d307b04Sdv */ 4254d307b04Sdv iov->iov_len = desc->len; 4264d307b04Sdv if (iov->iov_len < sizeof(struct virtio_net_hdr)) { 4274d307b04Sdv log_warnx("%s: invalid descriptor length", __func__); 4284d307b04Sdv goto reset; 4294d307b04Sdv } 4303481ecdfSdv 4314d307b04Sdv /* 4324d307b04Sdv * Insert the virtio_net_hdr and adjust len/base. We do the 4334d307b04Sdv * pointer math here before it's a void*. 4344d307b04Sdv */ 4354d307b04Sdv iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); 4364d307b04Sdv if (iov->iov_base == NULL) 4374d307b04Sdv goto reset; 4384d307b04Sdv memset(iov->iov_base, 0, sizeof(struct virtio_net_hdr)); 4394d307b04Sdv 4404d307b04Sdv /* Tweak the iovec to account for the virtio_net_hdr. */ 4414d307b04Sdv iov->iov_len -= sizeof(struct virtio_net_hdr); 4424d307b04Sdv iov->iov_base = hvaddr_mem(desc->addr + 4434d307b04Sdv sizeof(struct virtio_net_hdr), iov->iov_len); 4444d307b04Sdv if (iov->iov_base == NULL) 4454d307b04Sdv goto reset; 4464d307b04Sdv chain_len = iov->iov_len; 4474d307b04Sdv 4484d307b04Sdv /* 4494d307b04Sdv * Walk the remaining chain and collect remaining addresses 4504d307b04Sdv * and lengths. 4514d307b04Sdv */ 4524d307b04Sdv while (desc->flags & VRING_DESC_F_NEXT) { 4534d307b04Sdv desc = &table[desc->next & VIONET_QUEUE_MASK]; 4544d307b04Sdv if (!DESC_WRITABLE(desc)) { 4554d307b04Sdv log_warnx("%s: invalid descriptor state", 4563481ecdfSdv __func__); 4574d307b04Sdv goto reset; 4583481ecdfSdv } 4593481ecdfSdv 4604d307b04Sdv /* Collect our IO information. Translate gpa's. */ 4614d307b04Sdv iov = &iov_rx[iov_cnt]; 4624d307b04Sdv iov->iov_len = desc->len; 4634d307b04Sdv iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); 4644d307b04Sdv if (iov->iov_base == NULL) 4654d307b04Sdv goto reset; 4664d307b04Sdv chain_len += iov->iov_len; 4674d307b04Sdv 4684d307b04Sdv /* Guard against infinitely looping chains. */ 4694d307b04Sdv if (++iov_cnt >= nitems(iov_rx)) { 4704d307b04Sdv log_warnx("%s: infinite chain detected", 4714d307b04Sdv __func__); 4724d307b04Sdv goto reset; 4734d307b04Sdv } 4744d307b04Sdv } 4754d307b04Sdv 4764d307b04Sdv /* Make sure the driver gave us the bare minimum buffers. */ 4774d307b04Sdv if (chain_len < VIONET_MIN_TXLEN) { 4784d307b04Sdv log_warnx("%s: insufficient buffers provided", 4794d307b04Sdv __func__); 4804d307b04Sdv goto reset; 4814d307b04Sdv } 4824d307b04Sdv 4834d307b04Sdv /* 4844d307b04Sdv * If we're enforcing hardware address or handling an injected 4854d307b04Sdv * packet, we need to use a copy-based approach. 4864d307b04Sdv */ 4874d307b04Sdv if (dev->lockedmac || fd != dev->data_fd) 4884d307b04Sdv sz = vionet_rx_copy(dev, fd, iov_rx, iov_cnt, 4894d307b04Sdv chain_len); 4903481ecdfSdv else 4914d307b04Sdv sz = vionet_rx_zerocopy(dev, fd, iov_rx, iov_cnt); 4924d307b04Sdv if (sz == -1) 4934d307b04Sdv goto reset; 4944d307b04Sdv if (sz == 0) /* No packets, so bail out for now. */ 4954d307b04Sdv break; 4963481ecdfSdv 4974d307b04Sdv /* 4984d307b04Sdv * Account for the prefixed header since it wasn't included 4994d307b04Sdv * in the copy or zerocopy operations. 5004d307b04Sdv */ 5014d307b04Sdv sz += sizeof(struct virtio_net_hdr); 5023481ecdfSdv 5034d307b04Sdv /* Mark our buffers as used. */ 5044d307b04Sdv used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx; 5054d307b04Sdv used->ring[used->idx & VIONET_QUEUE_MASK].len = sz; 5063481ecdfSdv __sync_synchronize(); 5073481ecdfSdv used->idx++; 5084d307b04Sdv idx++; 5094d307b04Sdv } 5103481ecdfSdv 5114d307b04Sdv if (idx != vq_info->last_avail && 5124d307b04Sdv !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 5134d307b04Sdv notify = 1; 5144d307b04Sdv } 5154d307b04Sdv 5164d307b04Sdv vq_info->last_avail = idx; 5174d307b04Sdv return (notify); 5184d307b04Sdv reset: 519a246f7a0Sdv return (-1); 5203481ecdfSdv } 5213481ecdfSdv 5223481ecdfSdv /* 5234d307b04Sdv * vionet_rx_copy 5243481ecdfSdv * 5254d307b04Sdv * Read a packet off the provided file descriptor, validating packet 5264d307b04Sdv * characteristics, and copy into the provided buffers in the iovec array. 5274d307b04Sdv * 5284d307b04Sdv * It's assumed that the provided iovec array contains validated host virtual 5294d307b04Sdv * address translations and not guest physical addreses. 5304d307b04Sdv * 5314d307b04Sdv * Returns number of bytes copied on success, 0 if packet is dropped, and 5324d307b04Sdv * -1 on an error. 5333481ecdfSdv */ 5344d307b04Sdv ssize_t 5354d307b04Sdv vionet_rx_copy(struct vionet_dev *dev, int fd, const struct iovec *iov, 5364d307b04Sdv int iov_cnt, size_t chain_len) 5373481ecdfSdv { 5384d307b04Sdv static uint8_t buf[VIONET_HARD_MTU]; 5394d307b04Sdv struct packet *pkt = NULL; 5404d307b04Sdv struct ether_header *eh = NULL; 5414d307b04Sdv uint8_t *payload = buf; 5424d307b04Sdv size_t i, chunk, nbytes, copied = 0; 5433481ecdfSdv ssize_t sz; 5443481ecdfSdv 5454d307b04Sdv /* If reading from the tap(4), try to right-size the read. */ 5464d307b04Sdv if (fd == dev->data_fd) 5474d307b04Sdv nbytes = MIN(chain_len, VIONET_HARD_MTU); 5484d307b04Sdv else if (fd == pipe_inject[READ]) 5494d307b04Sdv nbytes = sizeof(struct packet); 5504d307b04Sdv else { 5514d307b04Sdv log_warnx("%s: invalid fd: %d", __func__, fd); 5524d307b04Sdv return (-1); 5533481ecdfSdv } 5543481ecdfSdv 5554d307b04Sdv /* 5564d307b04Sdv * Try to pull a packet. The fd should be non-blocking and we don't 5574d307b04Sdv * care if we under-read (i.e. sz != nbytes) as we may not have a 5584d307b04Sdv * packet large enough to fill the buffer. 5594d307b04Sdv */ 5604d307b04Sdv sz = read(fd, buf, nbytes); 5614d307b04Sdv if (sz == -1) { 5624d307b04Sdv if (errno != EAGAIN) { 5634d307b04Sdv log_warn("%s: error reading packet", __func__); 5644d307b04Sdv return (-1); 5653481ecdfSdv } 5664d307b04Sdv return (0); 5674d307b04Sdv } else if (fd == dev->data_fd && sz < VIONET_MIN_TXLEN) { 5684d307b04Sdv /* If reading the tap(4), we should get valid ethernet. */ 5694d307b04Sdv log_warnx("%s: invalid packet size", __func__); 5704d307b04Sdv return (0); 571b72f0742Sdv } else if (fd == pipe_inject[READ] && sz != sizeof(struct packet)) { 572b72f0742Sdv log_warnx("%s: invalid injected packet object (sz=%ld)", 573b72f0742Sdv __func__, sz); 5744d307b04Sdv return (0); 5754d307b04Sdv } 5764d307b04Sdv 5774d307b04Sdv /* Decompose an injected packet, if that's what we're working with. */ 5784d307b04Sdv if (fd == pipe_inject[READ]) { 5794d307b04Sdv pkt = (struct packet *)buf; 5804d307b04Sdv if (pkt->buf == NULL) { 5814d307b04Sdv log_warnx("%s: invalid injected packet, no buffer", 5824d307b04Sdv __func__); 5834d307b04Sdv return (0); 5844d307b04Sdv } 5854d307b04Sdv if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) { 5864d307b04Sdv log_warnx("%s: invalid injected packet size", __func__); 5874d307b04Sdv goto drop; 5884d307b04Sdv } 5894d307b04Sdv payload = pkt->buf; 5904d307b04Sdv sz = (ssize_t)pkt->len; 5914d307b04Sdv } 5924d307b04Sdv 5934d307b04Sdv /* Validate the ethernet header, if required. */ 5944d307b04Sdv if (dev->lockedmac) { 5954d307b04Sdv eh = (struct ether_header *)(payload); 5964d307b04Sdv if (!ETHER_IS_MULTICAST(eh->ether_dhost) && 5974d307b04Sdv memcmp(eh->ether_dhost, dev->mac, 5984d307b04Sdv sizeof(eh->ether_dhost)) != 0) 5994d307b04Sdv goto drop; 6004d307b04Sdv } 6014d307b04Sdv 6024d307b04Sdv /* Truncate one last time to the chain length, if shorter. */ 6034d307b04Sdv sz = MIN(chain_len, (size_t)sz); 6044d307b04Sdv 6054d307b04Sdv /* 6064d307b04Sdv * Copy the packet into the provided buffers. We can use memcpy(3) 6074d307b04Sdv * here as the gpa was validated and translated to an hva previously. 6084d307b04Sdv */ 6094d307b04Sdv for (i = 0; (int)i < iov_cnt && (size_t)sz > copied; i++) { 6104d307b04Sdv chunk = MIN(iov[i].iov_len, (size_t)(sz - copied)); 6114d307b04Sdv memcpy(iov[i].iov_base, payload + copied, chunk); 6124d307b04Sdv copied += chunk; 6134d307b04Sdv } 6144d307b04Sdv 6154d307b04Sdv drop: 6164d307b04Sdv /* Free any injected packet buffer. */ 6174d307b04Sdv if (pkt != NULL) 6184d307b04Sdv free(pkt->buf); 6194d307b04Sdv 6204d307b04Sdv return (copied); 6214d307b04Sdv } 6224d307b04Sdv 6234d307b04Sdv /* 6244d307b04Sdv * vionet_rx_zerocopy 6254d307b04Sdv * 6264d307b04Sdv * Perform a vectorized read from the given fd into the guest physical memory 6274d307b04Sdv * pointed to by iovecs. 6284d307b04Sdv * 6294d307b04Sdv * Returns number of bytes read on success, -1 on error, or 0 if EAGAIN was 6304d307b04Sdv * returned by readv. 6314d307b04Sdv * 6324d307b04Sdv */ 6334d307b04Sdv static ssize_t 6344d307b04Sdv vionet_rx_zerocopy(struct vionet_dev *dev, int fd, const struct iovec *iov, 6354d307b04Sdv int iov_cnt) 6364d307b04Sdv { 6374d307b04Sdv ssize_t sz; 6384d307b04Sdv 6394d307b04Sdv if (dev->lockedmac) { 6404d307b04Sdv log_warnx("%s: zerocopy not available for locked lladdr", 6414d307b04Sdv __func__); 6424d307b04Sdv return (-1); 6434d307b04Sdv } 6444d307b04Sdv 6454d307b04Sdv sz = readv(fd, iov, iov_cnt); 6464d307b04Sdv if (sz == -1 && errno == EAGAIN) 6474d307b04Sdv return (0); 6484d307b04Sdv return (sz); 6494d307b04Sdv } 6504d307b04Sdv 6513481ecdfSdv 6523481ecdfSdv /* 6533481ecdfSdv * vionet_rx_event 6543481ecdfSdv * 6553481ecdfSdv * Called when new data can be received on the tap fd of a vionet device. 6563481ecdfSdv */ 6573481ecdfSdv static void 6584d307b04Sdv vionet_rx_event(int fd, short event, void *arg) 6593481ecdfSdv { 6603481ecdfSdv struct virtio_dev *dev = (struct virtio_dev *)arg; 661a246f7a0Sdv struct vionet_dev *vionet = &dev->vionet; 662a246f7a0Sdv int ret = 0; 6633481ecdfSdv 6644d307b04Sdv if (!(event & EV_READ)) 6654d307b04Sdv fatalx("%s: invalid event type", __func__); 6664d307b04Sdv 667a246f7a0Sdv pthread_rwlock_rdlock(&lock); 668a246f7a0Sdv ret = vionet_rx(vionet, fd); 669a246f7a0Sdv pthread_rwlock_unlock(&lock); 670a246f7a0Sdv 671a246f7a0Sdv if (ret == 0) { 672a246f7a0Sdv /* Nothing to do. */ 673a246f7a0Sdv return; 6743481ecdfSdv } 6753481ecdfSdv 676a246f7a0Sdv pthread_rwlock_wrlock(&lock); 677a246f7a0Sdv if (ret == 1) { 678a246f7a0Sdv /* Notify the driver. */ 679a246f7a0Sdv vionet->cfg.isr_status |= 1; 680a246f7a0Sdv } else { 681a246f7a0Sdv /* Need a reset. Something went wrong. */ 682a246f7a0Sdv log_warnx("%s: requesting device reset", __func__); 683a246f7a0Sdv vionet->cfg.device_status |= DEVICE_NEEDS_RESET; 684a246f7a0Sdv vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 685a246f7a0Sdv } 686a246f7a0Sdv pthread_rwlock_unlock(&lock); 687a246f7a0Sdv 688a246f7a0Sdv vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ); 689a246f7a0Sdv } 690a246f7a0Sdv 691a246f7a0Sdv static void 6923481ecdfSdv vionet_notifyq(struct virtio_dev *dev) 6933481ecdfSdv { 6943481ecdfSdv struct vionet_dev *vionet = &dev->vionet; 6953481ecdfSdv 6963481ecdfSdv switch (vionet->cfg.queue_notify) { 69725efc169Sdv case RXQ: 6986b07697fSdv rx_enabled = 1; 699a246f7a0Sdv vm_pipe_send(&pipe_rx, VIRTIO_NOTIFY); 70025efc169Sdv break; 70125efc169Sdv case TXQ: 702a246f7a0Sdv vm_pipe_send(&pipe_tx, VIRTIO_NOTIFY); 703a246f7a0Sdv break; 7043481ecdfSdv default: 7053481ecdfSdv /* 7063481ecdfSdv * Catch the unimplemented queue ID 2 (control queue) as 7073481ecdfSdv * well as any bogus queue IDs. 7083481ecdfSdv */ 7093481ecdfSdv log_debug("%s: notify for unimplemented queue ID %d", 7103481ecdfSdv __func__, vionet->cfg.queue_notify); 7113481ecdfSdv break; 7123481ecdfSdv } 7133481ecdfSdv } 7143481ecdfSdv 7154d307b04Sdv static int 716a246f7a0Sdv vionet_tx(struct virtio_dev *dev) 7173481ecdfSdv { 7184d307b04Sdv uint16_t idx, hdr_idx; 7194d307b04Sdv size_t chain_len, iov_cnt; 7204d307b04Sdv ssize_t dhcpsz = 0, sz; 7214d307b04Sdv int notify = 0; 7224d307b04Sdv char *vr = NULL, *dhcppkt = NULL; 7233481ecdfSdv struct vionet_dev *vionet = &dev->vionet; 7244d307b04Sdv struct vring_desc *desc, *table; 7253481ecdfSdv struct vring_avail *avail; 7263481ecdfSdv struct vring_used *used; 7273481ecdfSdv struct virtio_vq_info *vq_info; 7283481ecdfSdv struct ether_header *eh; 7294d307b04Sdv struct iovec *iov; 7304d307b04Sdv struct packet pkt; 731a246f7a0Sdv uint8_t status = 0; 7323481ecdfSdv 733a246f7a0Sdv status = vionet->cfg.device_status 734a246f7a0Sdv & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK; 735a246f7a0Sdv if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) { 7363481ecdfSdv log_warnx("%s: driver not ready", __func__); 7373481ecdfSdv return (0); 7383481ecdfSdv } 7393481ecdfSdv 7403481ecdfSdv vq_info = &vionet->vq[TXQ]; 7414d307b04Sdv idx = vq_info->last_avail; 7423481ecdfSdv vr = vq_info->q_hva; 7433481ecdfSdv if (vr == NULL) 7443481ecdfSdv fatalx("%s: vr == NULL", __func__); 7453481ecdfSdv 7463481ecdfSdv /* Compute offsets in ring of descriptors, avail ring, and used ring */ 7474d307b04Sdv table = (struct vring_desc *)(vr); 7483481ecdfSdv avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); 7493481ecdfSdv used = (struct vring_used *)(vr + vq_info->vq_usedoffset); 7503481ecdfSdv 7514d307b04Sdv while (idx != avail->idx) { 7524d307b04Sdv hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK]; 7534d307b04Sdv desc = &table[hdr_idx & VIONET_QUEUE_MASK]; 7544d307b04Sdv if (DESC_WRITABLE(desc)) { 7554d307b04Sdv log_warnx("%s: invalid descriptor state", __func__); 7564d307b04Sdv goto reset; 7574d307b04Sdv } 7583481ecdfSdv 7594d307b04Sdv iov = &iov_tx[0]; 7604d307b04Sdv iov_cnt = 0; 7614d307b04Sdv chain_len = 0; 7623481ecdfSdv 7633481ecdfSdv /* 7644d307b04Sdv * As a legacy device, we most likely will receive a lead 7654d307b04Sdv * descriptor sized to the virtio_net_hdr. However, the framing 7664d307b04Sdv * is not guaranteed, so check for packet data. 7673481ecdfSdv */ 7684d307b04Sdv iov->iov_len = desc->len; 7694d307b04Sdv if (iov->iov_len < sizeof(struct virtio_net_hdr)) { 7704d307b04Sdv log_warnx("%s: invalid descriptor length", __func__); 7714d307b04Sdv goto reset; 7724d307b04Sdv } else if (iov->iov_len > sizeof(struct virtio_net_hdr)) { 7734d307b04Sdv /* Chop off the virtio header, leaving packet data. */ 7744d307b04Sdv iov->iov_len -= sizeof(struct virtio_net_hdr); 7754d307b04Sdv chain_len += iov->iov_len; 7764d307b04Sdv iov->iov_base = hvaddr_mem(desc->addr + 7774d307b04Sdv sizeof(struct virtio_net_hdr), iov->iov_len); 7784d307b04Sdv if (iov->iov_base == NULL) 7794d307b04Sdv goto reset; 7804d307b04Sdv iov_cnt++; 7814d307b04Sdv } 7824d307b04Sdv 7834d307b04Sdv /* 7844d307b04Sdv * Walk the chain and collect remaining addresses and lengths. 7854d307b04Sdv */ 7864d307b04Sdv while (desc->flags & VRING_DESC_F_NEXT) { 7874d307b04Sdv desc = &table[desc->next & VIONET_QUEUE_MASK]; 7884d307b04Sdv if (DESC_WRITABLE(desc)) { 7894d307b04Sdv log_warnx("%s: invalid descriptor state", 7903481ecdfSdv __func__); 7914d307b04Sdv goto reset; 7923481ecdfSdv } 7933481ecdfSdv 7944d307b04Sdv /* Collect our IO information, translating gpa's. */ 7954d307b04Sdv iov = &iov_tx[iov_cnt]; 7964d307b04Sdv iov->iov_len = desc->len; 7974d307b04Sdv iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); 7984d307b04Sdv if (iov->iov_base == NULL) 7994d307b04Sdv goto reset; 8004d307b04Sdv chain_len += iov->iov_len; 8013481ecdfSdv 8024d307b04Sdv /* Guard against infinitely looping chains. */ 8034d307b04Sdv if (++iov_cnt >= nitems(iov_tx)) { 8044d307b04Sdv log_warnx("%s: infinite chain detected", 8053481ecdfSdv __func__); 8064d307b04Sdv goto reset; 8074d307b04Sdv } 8083481ecdfSdv } 8093481ecdfSdv 8104d307b04Sdv /* Check if we've got a minimum viable amount of data. */ 811c01d0122Sjan if (chain_len < VIONET_MIN_TXLEN) 8124d307b04Sdv goto drop; 8133481ecdfSdv 8144d307b04Sdv /* 8154d307b04Sdv * Packet inspection for ethernet header (if using a "local" 8164d307b04Sdv * interface) for possibility of a DHCP packet or (if using 8174d307b04Sdv * locked lladdr) for validating ethernet header. 8184d307b04Sdv * 8194d307b04Sdv * To help preserve zero-copy semantics, we require the first 8204d307b04Sdv * descriptor with packet data contains a large enough buffer 8214d307b04Sdv * for this inspection. 8224d307b04Sdv */ 8234d307b04Sdv iov = &iov_tx[0]; 8244d307b04Sdv if (vionet->lockedmac) { 8254d307b04Sdv if (iov->iov_len < ETHER_HDR_LEN) { 8264d307b04Sdv log_warnx("%s: insufficient header data", 8274d307b04Sdv __func__); 8284d307b04Sdv goto drop; 8294d307b04Sdv } 8304d307b04Sdv eh = (struct ether_header *)iov->iov_base; 8314d307b04Sdv if (memcmp(eh->ether_shost, vionet->mac, 8324d307b04Sdv sizeof(eh->ether_shost)) != 0) { 8334d307b04Sdv log_warnx("%s: bad source address %s", 8344d307b04Sdv __func__, ether_ntoa((struct ether_addr *) 8354d307b04Sdv eh->ether_shost)); 8364d307b04Sdv goto drop; 8374d307b04Sdv } 8384d307b04Sdv } 8394d307b04Sdv if (vionet->local) { 8404d307b04Sdv dhcpsz = dhcp_request(dev, iov->iov_base, iov->iov_len, 8414d307b04Sdv &dhcppkt); 84282ace5feSjan if (dhcpsz > 0) { 8434d307b04Sdv log_debug("%s: detected dhcp request of %zu bytes", 8444d307b04Sdv __func__, dhcpsz); 84582ace5feSjan goto drop; 84682ace5feSjan } 8473481ecdfSdv } 8483481ecdfSdv 8494d307b04Sdv /* Write our packet to the tap(4). */ 8504d307b04Sdv sz = writev(vionet->data_fd, iov_tx, iov_cnt); 8514d307b04Sdv if (sz == -1 && errno != ENOBUFS) { 8524d307b04Sdv log_warn("%s", __func__); 8534d307b04Sdv goto reset; 8543481ecdfSdv } 855c01d0122Sjan chain_len += sizeof(struct virtio_net_hdr); 8564d307b04Sdv drop: 8574d307b04Sdv used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx; 858c01d0122Sjan used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len; 8593481ecdfSdv __sync_synchronize(); 8603481ecdfSdv used->idx++; 8614d307b04Sdv idx++; 8623481ecdfSdv 8634d307b04Sdv /* Facilitate DHCP reply injection, if needed. */ 8644d307b04Sdv if (dhcpsz > 0) { 8654d307b04Sdv pkt.buf = dhcppkt; 8664d307b04Sdv pkt.len = dhcpsz; 8674d307b04Sdv sz = write(pipe_inject[WRITE], &pkt, sizeof(pkt)); 8684d307b04Sdv if (sz == -1 && errno != EAGAIN) { 8694d307b04Sdv log_warn("%s: packet injection", __func__); 8704d307b04Sdv free(pkt.buf); 8714d307b04Sdv } else if (sz == -1 && errno == EAGAIN) { 8724d307b04Sdv log_debug("%s: dropping dhcp reply", __func__); 8734d307b04Sdv free(pkt.buf); 8744d307b04Sdv } else if (sz != sizeof(pkt)) { 8754d307b04Sdv log_warnx("%s: failed packet injection", 8764d307b04Sdv __func__); 8774d307b04Sdv free(pkt.buf); 8784d307b04Sdv } 8794d307b04Sdv log_debug("%s: injected dhcp reply with %ld bytes", 8804d307b04Sdv __func__, sz); 8814d307b04Sdv } 8823481ecdfSdv } 8833481ecdfSdv 8844d307b04Sdv if (idx != vq_info->last_avail && 885a246f7a0Sdv !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 8864d307b04Sdv notify = 1; 887a246f7a0Sdv 8883481ecdfSdv 8894d307b04Sdv vq_info->last_avail = idx; 8904d307b04Sdv return (notify); 8914d307b04Sdv reset: 892a246f7a0Sdv return (-1); 8933481ecdfSdv } 8943481ecdfSdv 8953481ecdfSdv static void 8963481ecdfSdv dev_dispatch_vm(int fd, short event, void *arg) 8973481ecdfSdv { 8983481ecdfSdv struct virtio_dev *dev = arg; 8993481ecdfSdv struct vionet_dev *vionet = &dev->vionet; 9003481ecdfSdv struct imsgev *iev = &dev->async_iev; 9013481ecdfSdv struct imsgbuf *ibuf = &iev->ibuf; 9023481ecdfSdv struct imsg imsg; 9033481ecdfSdv ssize_t n = 0; 90408d0da61Sdv int verbose; 9053481ecdfSdv 9063481ecdfSdv if (dev == NULL) 9073481ecdfSdv fatalx("%s: missing vionet pointer", __func__); 9083481ecdfSdv 9093481ecdfSdv if (event & EV_READ) { 910d12ef5f3Sclaudio if ((n = imsgbuf_read(ibuf)) == -1) 911dd7efffeSclaudio fatal("%s: imsgbuf_read", __func__); 9123481ecdfSdv if (n == 0) { 9133481ecdfSdv /* this pipe is dead, so remove the event handler */ 9143481ecdfSdv log_debug("%s: pipe dead (EV_READ)", __func__); 9153481ecdfSdv event_del(&iev->ev); 916a246f7a0Sdv event_base_loopexit(ev_base_main, NULL); 9173481ecdfSdv return; 9183481ecdfSdv } 9193481ecdfSdv } 9203481ecdfSdv 9213481ecdfSdv if (event & EV_WRITE) { 922dd7efffeSclaudio if (imsgbuf_write(ibuf) == -1) { 923c1aa9554Sclaudio if (errno == EPIPE) { 924c1aa9554Sclaudio /* this pipe is dead, remove the handler */ 9253481ecdfSdv log_debug("%s: pipe dead (EV_WRITE)", __func__); 9263481ecdfSdv event_del(&iev->ev); 927c1aa9554Sclaudio event_loopexit(NULL); 9283481ecdfSdv return; 9293481ecdfSdv } 930dd7efffeSclaudio fatal("%s: imsgbuf_write", __func__); 931c1aa9554Sclaudio } 9323481ecdfSdv } 9333481ecdfSdv 9343481ecdfSdv for (;;) { 9353481ecdfSdv if ((n = imsg_get(ibuf, &imsg)) == -1) 9363481ecdfSdv fatal("%s: imsg_get", __func__); 9373481ecdfSdv if (n == 0) 9383481ecdfSdv break; 9393481ecdfSdv 9403481ecdfSdv switch (imsg.hdr.type) { 9413481ecdfSdv case IMSG_DEVOP_HOSTMAC: 9423481ecdfSdv IMSG_SIZE_CHECK(&imsg, vionet->hostmac); 9433481ecdfSdv memcpy(vionet->hostmac, imsg.data, 9443481ecdfSdv sizeof(vionet->hostmac)); 9453481ecdfSdv log_debug("%s: set hostmac", __func__); 9463481ecdfSdv break; 9473481ecdfSdv case IMSG_VMDOP_PAUSE_VM: 9483481ecdfSdv log_debug("%s: pausing", __func__); 949a246f7a0Sdv vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE); 9503481ecdfSdv break; 9513481ecdfSdv case IMSG_VMDOP_UNPAUSE_VM: 9523481ecdfSdv log_debug("%s: unpausing", __func__); 9536b07697fSdv if (rx_enabled) 954a246f7a0Sdv vm_pipe_send(&pipe_rx, VIRTIO_THREAD_START); 9553481ecdfSdv break; 95608d0da61Sdv case IMSG_CTL_VERBOSE: 95708d0da61Sdv IMSG_SIZE_CHECK(&imsg, &verbose); 95808d0da61Sdv memcpy(&verbose, imsg.data, sizeof(verbose)); 95908d0da61Sdv log_setverbose(verbose); 96008d0da61Sdv break; 9613481ecdfSdv } 9623481ecdfSdv imsg_free(&imsg); 9633481ecdfSdv } 964a246f7a0Sdv imsg_event_add2(iev, ev_base_main); 9653481ecdfSdv } 9663481ecdfSdv 9673481ecdfSdv /* 9683481ecdfSdv * Synchronous IO handler. 9693481ecdfSdv * 9703481ecdfSdv */ 9713481ecdfSdv static void 9723481ecdfSdv handle_sync_io(int fd, short event, void *arg) 9733481ecdfSdv { 9743481ecdfSdv struct virtio_dev *dev = (struct virtio_dev *)arg; 9753481ecdfSdv struct imsgev *iev = &dev->sync_iev; 9763481ecdfSdv struct imsgbuf *ibuf = &iev->ibuf; 9773481ecdfSdv struct viodev_msg msg; 9783481ecdfSdv struct imsg imsg; 9793481ecdfSdv ssize_t n; 9808761e6b4Sdv int8_t intr = INTR_STATE_NOOP; 9813481ecdfSdv 9823481ecdfSdv if (event & EV_READ) { 983d12ef5f3Sclaudio if ((n = imsgbuf_read(ibuf)) == -1) 984dd7efffeSclaudio fatal("%s: imsgbuf_read", __func__); 9853481ecdfSdv if (n == 0) { 9863481ecdfSdv /* this pipe is dead, so remove the event handler */ 9873481ecdfSdv log_debug("%s: pipe dead (EV_READ)", __func__); 9883481ecdfSdv event_del(&iev->ev); 989a246f7a0Sdv event_base_loopexit(ev_base_main, NULL); 9903481ecdfSdv return; 9913481ecdfSdv } 9923481ecdfSdv } 9933481ecdfSdv 9943481ecdfSdv if (event & EV_WRITE) { 995dd7efffeSclaudio if (imsgbuf_write(ibuf) == -1) { 996c1aa9554Sclaudio if (errno == EPIPE) { 997c1aa9554Sclaudio /* this pipe is dead, remove the handler */ 9983481ecdfSdv log_debug("%s: pipe dead (EV_WRITE)", __func__); 9993481ecdfSdv event_del(&iev->ev); 1000c1aa9554Sclaudio event_loopexit(NULL); 10013481ecdfSdv return; 10023481ecdfSdv } 1003dd7efffeSclaudio fatal("%s: imsgbuf_write", __func__); 1004c1aa9554Sclaudio } 10053481ecdfSdv } 10063481ecdfSdv 10073481ecdfSdv for (;;) { 10083481ecdfSdv if ((n = imsg_get(ibuf, &imsg)) == -1) 10093481ecdfSdv fatalx("%s: imsg_get (n=%ld)", __func__, n); 10103481ecdfSdv if (n == 0) 10113481ecdfSdv break; 10123481ecdfSdv 10133481ecdfSdv /* Unpack our message. They ALL should be dev messeges! */ 10143481ecdfSdv IMSG_SIZE_CHECK(&imsg, &msg); 10153481ecdfSdv memcpy(&msg, imsg.data, sizeof(msg)); 10163481ecdfSdv imsg_free(&imsg); 10173481ecdfSdv 10183481ecdfSdv switch (msg.type) { 10193481ecdfSdv case VIODEV_MSG_DUMP: 10203481ecdfSdv /* Dump device */ 10213481ecdfSdv n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev)); 10223481ecdfSdv if (n != sizeof(*dev)) { 10234cd4f486Sdv log_warnx("%s: failed to dump vionet device", 10243481ecdfSdv __func__); 10253481ecdfSdv break; 10263481ecdfSdv } 10273481ecdfSdv case VIODEV_MSG_IO_READ: 10283481ecdfSdv /* Read IO: make sure to send a reply */ 10298761e6b4Sdv msg.data = handle_io_read(&msg, dev, &intr); 10303481ecdfSdv msg.data_valid = 1; 10318761e6b4Sdv msg.state = intr; 1032a246f7a0Sdv imsg_compose_event2(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg, 1033a246f7a0Sdv sizeof(msg), ev_base_main); 10343481ecdfSdv break; 10353481ecdfSdv case VIODEV_MSG_IO_WRITE: 10363481ecdfSdv /* Write IO: no reply needed */ 1037a246f7a0Sdv handle_io_write(&msg, dev); 10383481ecdfSdv break; 10393481ecdfSdv case VIODEV_MSG_SHUTDOWN: 10403481ecdfSdv event_del(&dev->sync_iev.ev); 1041a246f7a0Sdv event_base_loopbreak(ev_base_main); 10423481ecdfSdv return; 10433481ecdfSdv default: 10443481ecdfSdv fatalx("%s: invalid msg type %d", __func__, msg.type); 10453481ecdfSdv } 10463481ecdfSdv } 1047a246f7a0Sdv imsg_event_add2(iev, ev_base_main); 10483481ecdfSdv } 10493481ecdfSdv 1050a246f7a0Sdv static void 10513481ecdfSdv handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev) 10523481ecdfSdv { 10533481ecdfSdv struct vionet_dev *vionet = &dev->vionet; 10543481ecdfSdv uint32_t data = msg->data; 1055a246f7a0Sdv int pause_devices = 0; 1056a246f7a0Sdv 1057a246f7a0Sdv pthread_rwlock_wrlock(&lock); 10583481ecdfSdv 10593481ecdfSdv switch (msg->reg) { 10603481ecdfSdv case VIRTIO_CONFIG_DEVICE_FEATURES: 10613481ecdfSdv case VIRTIO_CONFIG_QUEUE_SIZE: 10623481ecdfSdv case VIRTIO_CONFIG_ISR_STATUS: 10633481ecdfSdv log_warnx("%s: illegal write %x to %s", __progname, data, 10643481ecdfSdv virtio_reg_name(msg->reg)); 10653481ecdfSdv break; 10663481ecdfSdv case VIRTIO_CONFIG_GUEST_FEATURES: 10673481ecdfSdv vionet->cfg.guest_feature = data; 10683481ecdfSdv break; 10693481ecdfSdv case VIRTIO_CONFIG_QUEUE_PFN: 10703481ecdfSdv vionet->cfg.queue_pfn = data; 10713481ecdfSdv vionet_update_qa(vionet); 10723481ecdfSdv break; 10733481ecdfSdv case VIRTIO_CONFIG_QUEUE_SELECT: 10743481ecdfSdv vionet->cfg.queue_select = data; 10753481ecdfSdv vionet_update_qs(vionet); 10763481ecdfSdv break; 10773481ecdfSdv case VIRTIO_CONFIG_QUEUE_NOTIFY: 10783481ecdfSdv vionet->cfg.queue_notify = data; 1079a246f7a0Sdv vionet_notifyq(dev); 10803481ecdfSdv break; 10813481ecdfSdv case VIRTIO_CONFIG_DEVICE_STATUS: 1082a246f7a0Sdv if (data == 0) { 1083a246f7a0Sdv resetting = 2; /* Wait on two acks: rx & tx */ 1084a246f7a0Sdv pause_devices = 1; 1085a246f7a0Sdv } else { 1086a246f7a0Sdv // XXX is this correct? 10873481ecdfSdv vionet->cfg.device_status = data; 10883481ecdfSdv } 10893481ecdfSdv break; 10903481ecdfSdv } 1091a246f7a0Sdv 1092a246f7a0Sdv pthread_rwlock_unlock(&lock); 1093a246f7a0Sdv if (pause_devices) { 10946b07697fSdv rx_enabled = 0; 1095a246f7a0Sdv vionet_deassert_pic_irq(dev); 1096a246f7a0Sdv vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE); 1097a246f7a0Sdv vm_pipe_send(&pipe_tx, VIRTIO_THREAD_PAUSE); 1098a246f7a0Sdv } 10993481ecdfSdv } 11003481ecdfSdv 11013481ecdfSdv static uint32_t 11028761e6b4Sdv handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr) 11033481ecdfSdv { 11043481ecdfSdv struct vionet_dev *vionet = &dev->vionet; 11053481ecdfSdv uint32_t data; 11063481ecdfSdv 1107a246f7a0Sdv pthread_rwlock_rdlock(&lock); 1108a246f7a0Sdv 11093481ecdfSdv switch (msg->reg) { 11103481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI: 11113481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1: 11123481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2: 11133481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3: 11143481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4: 11153481ecdfSdv case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5: 11163481ecdfSdv data = vionet->mac[msg->reg - 11173481ecdfSdv VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI]; 11183481ecdfSdv break; 11193481ecdfSdv case VIRTIO_CONFIG_DEVICE_FEATURES: 11203481ecdfSdv data = vionet->cfg.device_feature; 11213481ecdfSdv break; 11223481ecdfSdv case VIRTIO_CONFIG_GUEST_FEATURES: 11233481ecdfSdv data = vionet->cfg.guest_feature; 11243481ecdfSdv break; 11253481ecdfSdv case VIRTIO_CONFIG_QUEUE_PFN: 11263481ecdfSdv data = vionet->cfg.queue_pfn; 11273481ecdfSdv break; 11283481ecdfSdv case VIRTIO_CONFIG_QUEUE_SIZE: 11293481ecdfSdv data = vionet->cfg.queue_size; 11303481ecdfSdv break; 11313481ecdfSdv case VIRTIO_CONFIG_QUEUE_SELECT: 11323481ecdfSdv data = vionet->cfg.queue_select; 11333481ecdfSdv break; 11343481ecdfSdv case VIRTIO_CONFIG_QUEUE_NOTIFY: 11353481ecdfSdv data = vionet->cfg.queue_notify; 11363481ecdfSdv break; 11373481ecdfSdv case VIRTIO_CONFIG_DEVICE_STATUS: 11383481ecdfSdv data = vionet->cfg.device_status; 11393481ecdfSdv break; 11403481ecdfSdv case VIRTIO_CONFIG_ISR_STATUS: 1141a246f7a0Sdv pthread_rwlock_unlock(&lock); 1142a246f7a0Sdv pthread_rwlock_wrlock(&lock); 11433481ecdfSdv data = vionet->cfg.isr_status; 11443481ecdfSdv vionet->cfg.isr_status = 0; 11458761e6b4Sdv if (intr != NULL) 11468761e6b4Sdv *intr = INTR_STATE_DEASSERT; 11473481ecdfSdv break; 11483481ecdfSdv default: 1149a246f7a0Sdv data = 0xFFFFFFFF; 11503481ecdfSdv } 11513481ecdfSdv 1152a246f7a0Sdv pthread_rwlock_unlock(&lock); 11533481ecdfSdv return (data); 11543481ecdfSdv } 1155a246f7a0Sdv 1156a246f7a0Sdv /* 1157a246f7a0Sdv * Handle the rx side processing, communicating to the main thread via pipe. 1158a246f7a0Sdv */ 1159a246f7a0Sdv static void * 1160a246f7a0Sdv rx_run_loop(void *arg) 1161a246f7a0Sdv { 1162a246f7a0Sdv struct virtio_dev *dev = (struct virtio_dev *)arg; 1163a246f7a0Sdv struct vionet_dev *vionet = &dev->vionet; 1164a246f7a0Sdv int ret; 1165a246f7a0Sdv 1166a246f7a0Sdv ev_base_rx = event_base_new(); 1167a246f7a0Sdv 1168a246f7a0Sdv /* Wire up event handling for the tap fd. */ 1169a246f7a0Sdv event_set(&ev_tap, vionet->data_fd, EV_READ | EV_PERSIST, 1170a246f7a0Sdv vionet_rx_event, dev); 1171a246f7a0Sdv event_base_set(ev_base_rx, &ev_tap); 1172a246f7a0Sdv 1173a246f7a0Sdv /* Wire up event handling for the packet injection pipe. */ 1174a246f7a0Sdv event_set(&ev_inject, pipe_inject[READ], EV_READ | EV_PERSIST, 1175a246f7a0Sdv vionet_rx_event, dev); 1176a246f7a0Sdv event_base_set(ev_base_rx, &ev_inject); 1177a246f7a0Sdv 1178a246f7a0Sdv /* Wire up event handling for our inter-thread communication channel. */ 1179a246f7a0Sdv event_base_set(ev_base_rx, &pipe_rx.read_ev); 1180a246f7a0Sdv event_add(&pipe_rx.read_ev, NULL); 1181a246f7a0Sdv 1182a246f7a0Sdv /* Begin our event loop with our channel event active. */ 1183a246f7a0Sdv ret = event_base_dispatch(ev_base_rx); 1184a246f7a0Sdv event_base_free(ev_base_rx); 1185a246f7a0Sdv 1186a246f7a0Sdv log_debug("%s: exiting (%d)", __func__, ret); 1187a246f7a0Sdv 1188a246f7a0Sdv close_fd(pipe_rx.read); 1189a246f7a0Sdv close_fd(pipe_inject[READ]); 1190a246f7a0Sdv 1191a246f7a0Sdv return (NULL); 1192a246f7a0Sdv } 1193a246f7a0Sdv 1194a246f7a0Sdv /* 1195a246f7a0Sdv * Handle the tx side processing, communicating to the main thread via pipe. 1196a246f7a0Sdv */ 1197a246f7a0Sdv static void * 1198a246f7a0Sdv tx_run_loop(void *arg) 1199a246f7a0Sdv { 1200a246f7a0Sdv int ret; 1201a246f7a0Sdv 1202a246f7a0Sdv ev_base_tx = event_base_new(); 1203a246f7a0Sdv 1204a246f7a0Sdv /* Wire up event handling for our inter-thread communication channel. */ 1205a246f7a0Sdv event_base_set(ev_base_tx, &pipe_tx.read_ev); 1206a246f7a0Sdv event_add(&pipe_tx.read_ev, NULL); 1207a246f7a0Sdv 1208a246f7a0Sdv /* Begin our event loop with our channel event active. */ 1209a246f7a0Sdv ret = event_base_dispatch(ev_base_tx); 1210a246f7a0Sdv event_base_free(ev_base_tx); 1211a246f7a0Sdv 1212a246f7a0Sdv log_debug("%s: exiting (%d)", __func__, ret); 1213a246f7a0Sdv 1214a246f7a0Sdv close_fd(pipe_tx.read); 1215a246f7a0Sdv 1216a246f7a0Sdv return (NULL); 1217a246f7a0Sdv } 1218a246f7a0Sdv 1219a246f7a0Sdv /* 1220a246f7a0Sdv * Read events sent by the main thread to the rx thread. 1221a246f7a0Sdv */ 1222a246f7a0Sdv static void 1223a246f7a0Sdv read_pipe_rx(int fd, short event, void *arg) 1224a246f7a0Sdv { 1225a246f7a0Sdv enum pipe_msg_type msg; 1226a246f7a0Sdv 1227a246f7a0Sdv if (!(event & EV_READ)) 1228a246f7a0Sdv fatalx("%s: invalid event type", __func__); 1229a246f7a0Sdv 1230a246f7a0Sdv msg = vm_pipe_recv(&pipe_rx); 1231a246f7a0Sdv 1232a246f7a0Sdv switch (msg) { 1233a246f7a0Sdv case VIRTIO_NOTIFY: 1234a246f7a0Sdv case VIRTIO_THREAD_START: 1235a246f7a0Sdv event_add(&ev_tap, NULL); 1236a246f7a0Sdv event_add(&ev_inject, NULL); 1237a246f7a0Sdv break; 1238a246f7a0Sdv case VIRTIO_THREAD_PAUSE: 1239a246f7a0Sdv event_del(&ev_tap); 1240a246f7a0Sdv event_del(&ev_inject); 1241a246f7a0Sdv vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK); 1242a246f7a0Sdv break; 1243a246f7a0Sdv case VIRTIO_THREAD_STOP: 1244a246f7a0Sdv event_del(&ev_tap); 1245a246f7a0Sdv event_del(&ev_inject); 1246a246f7a0Sdv event_base_loopexit(ev_base_rx, NULL); 1247a246f7a0Sdv break; 1248a246f7a0Sdv default: 1249a246f7a0Sdv fatalx("%s: invalid channel message: %d", __func__, msg); 1250a246f7a0Sdv } 1251a246f7a0Sdv } 1252a246f7a0Sdv 1253a246f7a0Sdv /* 1254a246f7a0Sdv * Read events sent by the main thread to the tx thread. 1255a246f7a0Sdv */ 1256a246f7a0Sdv static void 1257a246f7a0Sdv read_pipe_tx(int fd, short event, void *arg) 1258a246f7a0Sdv { 1259a246f7a0Sdv struct virtio_dev *dev = (struct virtio_dev*)arg; 1260a246f7a0Sdv struct vionet_dev *vionet = &dev->vionet; 1261a246f7a0Sdv enum pipe_msg_type msg; 1262a246f7a0Sdv int ret = 0; 1263a246f7a0Sdv 1264a246f7a0Sdv if (!(event & EV_READ)) 1265a246f7a0Sdv fatalx("%s: invalid event type", __func__); 1266a246f7a0Sdv 1267a246f7a0Sdv msg = vm_pipe_recv(&pipe_tx); 1268a246f7a0Sdv 1269a246f7a0Sdv switch (msg) { 1270a246f7a0Sdv case VIRTIO_NOTIFY: 1271a246f7a0Sdv pthread_rwlock_rdlock(&lock); 1272a246f7a0Sdv ret = vionet_tx(dev); 1273a246f7a0Sdv pthread_rwlock_unlock(&lock); 1274a246f7a0Sdv break; 1275a246f7a0Sdv case VIRTIO_THREAD_START: 1276a246f7a0Sdv /* Ignore Start messages. */ 1277a246f7a0Sdv break; 1278a246f7a0Sdv case VIRTIO_THREAD_PAUSE: 1279a246f7a0Sdv /* 1280a246f7a0Sdv * Nothing to do when pausing on the tx side, but ACK so main 1281a246f7a0Sdv * thread knows we're not transmitting. 1282a246f7a0Sdv */ 1283a246f7a0Sdv vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK); 1284a246f7a0Sdv break; 1285a246f7a0Sdv case VIRTIO_THREAD_STOP: 1286a246f7a0Sdv event_base_loopexit(ev_base_tx, NULL); 1287a246f7a0Sdv break; 1288a246f7a0Sdv default: 1289a246f7a0Sdv fatalx("%s: invalid channel message: %d", __func__, msg); 1290a246f7a0Sdv } 1291a246f7a0Sdv 1292a246f7a0Sdv if (ret == 0) { 1293a246f7a0Sdv /* No notification needed. Return early. */ 1294a246f7a0Sdv return; 1295a246f7a0Sdv } 1296a246f7a0Sdv 1297a246f7a0Sdv pthread_rwlock_wrlock(&lock); 1298a246f7a0Sdv if (ret == 1) { 1299a246f7a0Sdv /* Notify the driver. */ 1300a246f7a0Sdv vionet->cfg.isr_status |= 1; 1301a246f7a0Sdv } else { 1302a246f7a0Sdv /* Need a reset. Something went wrong. */ 1303a246f7a0Sdv log_warnx("%s: requesting device reset", __func__); 1304a246f7a0Sdv vionet->cfg.device_status |= DEVICE_NEEDS_RESET; 1305a246f7a0Sdv vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE; 1306a246f7a0Sdv } 1307a246f7a0Sdv pthread_rwlock_unlock(&lock); 1308a246f7a0Sdv 1309a246f7a0Sdv vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ); 1310a246f7a0Sdv } 1311a246f7a0Sdv 1312a246f7a0Sdv /* 1313a246f7a0Sdv * Read events sent by the rx/tx threads to the main thread. 1314a246f7a0Sdv */ 1315a246f7a0Sdv static void 1316a246f7a0Sdv read_pipe_main(int fd, short event, void *arg) 1317a246f7a0Sdv { 1318a246f7a0Sdv struct virtio_dev *dev = (struct virtio_dev*)arg; 1319a246f7a0Sdv struct vionet_dev *vionet = &dev->vionet; 1320a246f7a0Sdv enum pipe_msg_type msg; 1321a246f7a0Sdv 1322a246f7a0Sdv if (!(event & EV_READ)) 1323a246f7a0Sdv fatalx("%s: invalid event type", __func__); 1324a246f7a0Sdv 1325a246f7a0Sdv msg = vm_pipe_recv(&pipe_main); 1326a246f7a0Sdv switch (msg) { 1327a246f7a0Sdv case VIRTIO_RAISE_IRQ: 1328a246f7a0Sdv vionet_assert_pic_irq(dev); 1329a246f7a0Sdv break; 1330a246f7a0Sdv case VIRTIO_THREAD_ACK: 1331a246f7a0Sdv resetting--; 1332a246f7a0Sdv if (resetting == 0) { 1333a246f7a0Sdv log_debug("%s: resetting virtio network device %d", 1334a246f7a0Sdv __func__, vionet->idx); 1335a246f7a0Sdv 1336a246f7a0Sdv pthread_rwlock_wrlock(&lock); 1337a246f7a0Sdv vionet->cfg.device_status = 0; 1338a246f7a0Sdv vionet->cfg.guest_feature = 0; 1339a246f7a0Sdv vionet->cfg.queue_pfn = 0; 1340a246f7a0Sdv vionet_update_qa(vionet); 1341a246f7a0Sdv vionet->cfg.queue_size = 0; 1342a246f7a0Sdv vionet_update_qs(vionet); 1343a246f7a0Sdv vionet->cfg.queue_select = 0; 1344a246f7a0Sdv vionet->cfg.queue_notify = 0; 1345a246f7a0Sdv vionet->cfg.isr_status = 0; 1346a246f7a0Sdv vionet->vq[RXQ].last_avail = 0; 1347a246f7a0Sdv vionet->vq[RXQ].notified_avail = 0; 1348a246f7a0Sdv vionet->vq[TXQ].last_avail = 0; 1349a246f7a0Sdv vionet->vq[TXQ].notified_avail = 0; 1350a246f7a0Sdv pthread_rwlock_unlock(&lock); 1351a246f7a0Sdv } 1352a246f7a0Sdv break; 1353a246f7a0Sdv default: 1354a246f7a0Sdv fatalx("%s: invalid channel msg: %d", __func__, msg); 1355a246f7a0Sdv } 1356a246f7a0Sdv } 1357a246f7a0Sdv 1358a246f7a0Sdv /* 1359a246f7a0Sdv * Message the vm process asking to raise the irq. Must be called from the main 1360a246f7a0Sdv * thread. 1361a246f7a0Sdv */ 1362a246f7a0Sdv static void 1363a246f7a0Sdv vionet_assert_pic_irq(struct virtio_dev *dev) 1364a246f7a0Sdv { 1365a246f7a0Sdv struct viodev_msg msg; 1366a246f7a0Sdv int ret; 1367a246f7a0Sdv 1368a246f7a0Sdv memset(&msg, 0, sizeof(msg)); 1369a246f7a0Sdv msg.irq = dev->irq; 1370a246f7a0Sdv msg.vcpu = 0; // XXX 1371a246f7a0Sdv msg.type = VIODEV_MSG_KICK; 1372a246f7a0Sdv msg.state = INTR_STATE_ASSERT; 1373a246f7a0Sdv 1374a246f7a0Sdv ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 1375a246f7a0Sdv &msg, sizeof(msg), ev_base_main); 1376a246f7a0Sdv if (ret == -1) 1377a246f7a0Sdv log_warnx("%s: failed to assert irq %d", __func__, dev->irq); 1378a246f7a0Sdv } 1379a246f7a0Sdv 1380a246f7a0Sdv /* 1381a246f7a0Sdv * Message the vm process asking to lower the irq. Must be called from the main 1382a246f7a0Sdv * thread. 1383a246f7a0Sdv */ 1384a246f7a0Sdv static void 1385a246f7a0Sdv vionet_deassert_pic_irq(struct virtio_dev *dev) 1386a246f7a0Sdv { 1387a246f7a0Sdv struct viodev_msg msg; 1388a246f7a0Sdv int ret; 1389a246f7a0Sdv 1390a246f7a0Sdv memset(&msg, 0, sizeof(msg)); 1391a246f7a0Sdv msg.irq = dev->irq; 1392a246f7a0Sdv msg.vcpu = 0; // XXX 1393a246f7a0Sdv msg.type = VIODEV_MSG_KICK; 1394a246f7a0Sdv msg.state = INTR_STATE_DEASSERT; 1395a246f7a0Sdv 1396a246f7a0Sdv ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1, 1397a246f7a0Sdv &msg, sizeof(msg), ev_base_main); 1398a246f7a0Sdv if (ret == -1) 1399a246f7a0Sdv log_warnx("%s: failed to assert irq %d", __func__, dev->irq); 1400a246f7a0Sdv } 1401