/* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2010-2016 Intel Corporation */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vhost.h" #include "virtio.h" #include "virtio_user_dev.h" #include "../virtio_ethdev.h" #define VIRTIO_USER_MEM_EVENT_CLB_NAME "virtio_user_mem_event_clb" const char * const virtio_user_backend_strings[] = { [VIRTIO_USER_BACKEND_UNKNOWN] = "VIRTIO_USER_BACKEND_UNKNOWN", [VIRTIO_USER_BACKEND_VHOST_USER] = "VHOST_USER", [VIRTIO_USER_BACKEND_VHOST_KERNEL] = "VHOST_NET", [VIRTIO_USER_BACKEND_VHOST_VDPA] = "VHOST_VDPA", }; static int virtio_user_uninit_notify_queue(struct virtio_user_dev *dev, uint32_t queue_sel) { if (dev->kickfds[queue_sel] >= 0) { close(dev->kickfds[queue_sel]); dev->kickfds[queue_sel] = -1; } if (dev->callfds[queue_sel] >= 0) { close(dev->callfds[queue_sel]); dev->callfds[queue_sel] = -1; } return 0; } static int virtio_user_init_notify_queue(struct virtio_user_dev *dev, uint32_t queue_sel) { /* May use invalid flag, but some backend uses kickfd and * callfd as criteria to judge if dev is alive. so finally we * use real event_fd. */ dev->callfds[queue_sel] = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); if (dev->callfds[queue_sel] < 0) { PMD_DRV_LOG(ERR, "(%s) Failed to setup callfd for queue %u: %s", dev->path, queue_sel, strerror(errno)); return -1; } dev->kickfds[queue_sel] = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); if (dev->kickfds[queue_sel] < 0) { PMD_DRV_LOG(ERR, "(%s) Failed to setup kickfd for queue %u: %s", dev->path, queue_sel, strerror(errno)); return -1; } return 0; } static int virtio_user_destroy_queue(struct virtio_user_dev *dev, uint32_t queue_sel) { struct vhost_vring_state state; int ret; state.index = queue_sel; ret = dev->ops->get_vring_base(dev, &state); if (ret < 0) { PMD_DRV_LOG(ERR, "(%s) Failed to destroy queue %u", dev->path, queue_sel); return -1; } return 0; } static int virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel) { /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come * firstly because vhost depends on this msg to allocate virtqueue * pair. */ struct vhost_vring_file file; int ret; file.index = queue_sel; file.fd = dev->callfds[queue_sel]; ret = dev->ops->set_vring_call(dev, &file); if (ret < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to create queue %u", dev->path, queue_sel); return -1; } return 0; } static int virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) { int ret; struct vhost_vring_file file; struct vhost_vring_state state; struct vring *vring = &dev->vrings.split[queue_sel]; struct vring_packed *pq_vring = &dev->vrings.packed[queue_sel]; uint64_t desc_addr, avail_addr, used_addr; struct vhost_vring_addr addr = { .index = queue_sel, .log_guest_addr = 0, .flags = 0, /* disable log */ }; if (queue_sel == dev->max_queue_pairs * 2) { if (!dev->scvq) { PMD_INIT_LOG(ERR, "(%s) Shadow control queue expected but missing", dev->path); goto err; } /* Use shadow control queue information */ vring = &dev->scvq->vq_split.ring; pq_vring = &dev->scvq->vq_packed.ring; } if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) { desc_addr = pq_vring->desc_iova; avail_addr = desc_addr + pq_vring->num * sizeof(struct vring_packed_desc); used_addr = RTE_ALIGN_CEIL(avail_addr + sizeof(struct vring_packed_desc_event), VIRTIO_VRING_ALIGN); addr.desc_user_addr = desc_addr; addr.avail_user_addr = avail_addr; addr.used_user_addr = used_addr; } else { desc_addr = vring->desc_iova; avail_addr = desc_addr + vring->num * sizeof(struct vring_desc); used_addr = RTE_ALIGN_CEIL((uintptr_t)(&vring->avail->ring[vring->num]), VIRTIO_VRING_ALIGN); addr.desc_user_addr = desc_addr; addr.avail_user_addr = avail_addr; addr.used_user_addr = used_addr; } state.index = queue_sel; state.num = vring->num; ret = dev->ops->set_vring_num(dev, &state); if (ret < 0) goto err; state.index = queue_sel; state.num = 0; /* no reservation */ if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) state.num |= (1 << 15); ret = dev->ops->set_vring_base(dev, &state); if (ret < 0) goto err; ret = dev->ops->set_vring_addr(dev, &addr); if (ret < 0) goto err; /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes * lastly because vhost depends on this msg to judge if * virtio is ready. */ file.index = queue_sel; file.fd = dev->kickfds[queue_sel]; ret = dev->ops->set_vring_kick(dev, &file); if (ret < 0) goto err; return 0; err: PMD_INIT_LOG(ERR, "(%s) Failed to kick queue %u", dev->path, queue_sel); return -1; } static int virtio_user_foreach_queue(struct virtio_user_dev *dev, int (*fn)(struct virtio_user_dev *, uint32_t)) { uint32_t i, nr_vq; nr_vq = dev->max_queue_pairs * 2; if (dev->hw_cvq) nr_vq++; for (i = 0; i < nr_vq; i++) if (fn(dev, i) < 0) return -1; return 0; } int virtio_user_dev_set_features(struct virtio_user_dev *dev) { uint64_t features; int ret = -1; pthread_mutex_lock(&dev->mutex); /* Step 0: tell vhost to create queues */ if (virtio_user_foreach_queue(dev, virtio_user_create_queue) < 0) goto error; features = dev->features; /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */ features &= ~(1ull << VIRTIO_NET_F_MAC); /* Strip VIRTIO_NET_F_CTRL_VQ if the devices does not really support control VQ */ if (!dev->hw_cvq) features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); features &= ~(1ull << VIRTIO_NET_F_STATUS); ret = dev->ops->set_features(dev, features); if (ret < 0) goto error; PMD_DRV_LOG(INFO, "(%s) set features: 0x%" PRIx64, dev->path, features); error: pthread_mutex_unlock(&dev->mutex); return ret; } int virtio_user_start_device(struct virtio_user_dev *dev) { int ret; /* * XXX workaround! * * We need to make sure that the locks will be * taken in the correct order to avoid deadlocks. * * Before releasing this lock, this thread should * not trigger any memory hotplug events. * * This is a temporary workaround, and should be * replaced when we get proper supports from the * memory subsystem in the future. */ rte_mcfg_mem_read_lock(); pthread_mutex_lock(&dev->mutex); /* Step 2: share memory regions */ ret = dev->ops->set_memory_table(dev); if (ret < 0) goto error; /* Step 3: kick queues */ ret = virtio_user_foreach_queue(dev, virtio_user_kick_queue); if (ret < 0) goto error; /* Step 4: enable queues * we enable the 1st queue pair by default. */ ret = dev->ops->enable_qp(dev, 0, 1); if (ret < 0) goto error; if (dev->scvq) { ret = dev->ops->cvq_enable(dev, 1); if (ret < 0) goto error; } dev->started = true; pthread_mutex_unlock(&dev->mutex); rte_mcfg_mem_read_unlock(); return 0; error: pthread_mutex_unlock(&dev->mutex); rte_mcfg_mem_read_unlock(); PMD_INIT_LOG(ERR, "(%s) Failed to start device", dev->path); /* TODO: free resource here or caller to check */ return -1; } int virtio_user_stop_device(struct virtio_user_dev *dev) { uint32_t i; int ret; pthread_mutex_lock(&dev->mutex); if (!dev->started) goto out; for (i = 0; i < dev->max_queue_pairs; ++i) { ret = dev->ops->enable_qp(dev, i, 0); if (ret < 0) goto err; } if (dev->scvq) { ret = dev->ops->cvq_enable(dev, 0); if (ret < 0) goto err; } /* Stop the backend. */ if (virtio_user_foreach_queue(dev, virtio_user_destroy_queue) < 0) goto err; dev->started = false; out: pthread_mutex_unlock(&dev->mutex); return 0; err: pthread_mutex_unlock(&dev->mutex); PMD_INIT_LOG(ERR, "(%s) Failed to stop device", dev->path); return -1; } static int virtio_user_dev_init_max_queue_pairs(struct virtio_user_dev *dev, uint32_t user_max_qp) { int ret; if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MQ))) { dev->max_queue_pairs = 1; return 0; } if (!dev->ops->get_config) { dev->max_queue_pairs = user_max_qp; return 0; } ret = dev->ops->get_config(dev, (uint8_t *)&dev->max_queue_pairs, offsetof(struct virtio_net_config, max_virtqueue_pairs), sizeof(uint16_t)); if (ret) { /* * We need to know the max queue pair from the device so that * the control queue gets the right index. */ dev->max_queue_pairs = 1; PMD_DRV_LOG(ERR, "(%s) Failed to get max queue pairs from device", dev->path); return ret; } return 0; } int virtio_user_dev_get_rss_config(struct virtio_user_dev *dev, void *dst, size_t offset, int length) { int ret = 0; if (!(dev->device_features & (1ULL << VIRTIO_NET_F_RSS))) return -ENOTSUP; if (!dev->ops->get_config) return -ENOTSUP; ret = dev->ops->get_config(dev, dst, offset, length); if (ret) PMD_DRV_LOG(ERR, "(%s) Failed to get rss config in device", dev->path); return ret; } int virtio_user_dev_set_mac(struct virtio_user_dev *dev) { int ret = 0; if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC))) return -ENOTSUP; if (!dev->ops->set_config) return -ENOTSUP; ret = dev->ops->set_config(dev, dev->mac_addr, offsetof(struct virtio_net_config, mac), RTE_ETHER_ADDR_LEN); if (ret) PMD_DRV_LOG(ERR, "(%s) Failed to set MAC address in device", dev->path); return ret; } int virtio_user_dev_get_mac(struct virtio_user_dev *dev) { int ret = 0; if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MAC))) return -ENOTSUP; if (!dev->ops->get_config) return -ENOTSUP; ret = dev->ops->get_config(dev, dev->mac_addr, offsetof(struct virtio_net_config, mac), RTE_ETHER_ADDR_LEN); if (ret) PMD_DRV_LOG(ERR, "(%s) Failed to get MAC address from device", dev->path); return ret; } static void virtio_user_dev_init_mac(struct virtio_user_dev *dev, const char *mac) { struct rte_ether_addr cmdline_mac; char buf[RTE_ETHER_ADDR_FMT_SIZE]; int ret; if (mac && rte_ether_unformat_addr(mac, &cmdline_mac) == 0) { /* * MAC address was passed from command-line, try to store * it in the device if it supports it. Otherwise try to use * the device one. */ memcpy(dev->mac_addr, &cmdline_mac, RTE_ETHER_ADDR_LEN); dev->mac_specified = 1; /* Setting MAC may fail, continue to get the device one in this case */ virtio_user_dev_set_mac(dev); ret = virtio_user_dev_get_mac(dev); if (ret == -ENOTSUP) goto out; if (memcmp(&cmdline_mac, dev->mac_addr, RTE_ETHER_ADDR_LEN)) PMD_DRV_LOG(INFO, "(%s) Device MAC update failed", dev->path); } else { ret = virtio_user_dev_get_mac(dev); if (ret) { PMD_DRV_LOG(ERR, "(%s) No valid MAC in devargs or device, use random", dev->path); return; } dev->mac_specified = 1; } out: rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, (struct rte_ether_addr *)dev->mac_addr); PMD_DRV_LOG(INFO, "(%s) MAC %s specified", dev->path, buf); } static int virtio_user_dev_init_notify(struct virtio_user_dev *dev) { if (virtio_user_foreach_queue(dev, virtio_user_init_notify_queue) < 0) goto err; if (dev->device_features & (1ULL << VIRTIO_F_NOTIFICATION_DATA)) if (dev->ops->map_notification_area && dev->ops->map_notification_area(dev)) goto err; return 0; err: virtio_user_foreach_queue(dev, virtio_user_uninit_notify_queue); return -1; } static void virtio_user_dev_uninit_notify(struct virtio_user_dev *dev) { virtio_user_foreach_queue(dev, virtio_user_uninit_notify_queue); if (dev->ops->unmap_notification_area && dev->notify_area) dev->ops->unmap_notification_area(dev); } static int virtio_user_fill_intr_handle(struct virtio_user_dev *dev) { uint32_t i; struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id]; if (eth_dev->intr_handle == NULL) { eth_dev->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE); if (eth_dev->intr_handle == NULL) { PMD_DRV_LOG(ERR, "(%s) failed to allocate intr_handle", dev->path); return -1; } } for (i = 0; i < dev->max_queue_pairs; ++i) { if (rte_intr_efds_index_set(eth_dev->intr_handle, i, dev->callfds[2 * i + VTNET_SQ_RQ_QUEUE_IDX])) return -rte_errno; } if (rte_intr_nb_efd_set(eth_dev->intr_handle, dev->max_queue_pairs)) return -rte_errno; if (rte_intr_max_intr_set(eth_dev->intr_handle, dev->max_queue_pairs + 1)) return -rte_errno; if (rte_intr_type_set(eth_dev->intr_handle, RTE_INTR_HANDLE_VDEV)) return -rte_errno; /* For virtio vdev, no need to read counter for clean */ if (rte_intr_efd_counter_size_set(eth_dev->intr_handle, 0)) return -rte_errno; if (rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev))) return -rte_errno; return 0; } static void virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused, const void *addr, size_t len __rte_unused, void *arg) { struct virtio_user_dev *dev = arg; struct rte_memseg_list *msl; uint16_t i; int ret = 0; /* ignore externally allocated memory */ msl = rte_mem_virt2memseg_list(addr); if (msl->external) return; pthread_mutex_lock(&dev->mutex); if (dev->started == false) goto exit; /* Step 1: pause the active queues */ for (i = 0; i < dev->queue_pairs; i++) { ret = dev->ops->enable_qp(dev, i, 0); if (ret < 0) goto exit; } /* Step 2: update memory regions */ ret = dev->ops->set_memory_table(dev); if (ret < 0) goto exit; /* Step 3: resume the active queues */ for (i = 0; i < dev->queue_pairs; i++) { ret = dev->ops->enable_qp(dev, i, 1); if (ret < 0) goto exit; } exit: pthread_mutex_unlock(&dev->mutex); if (ret < 0) PMD_DRV_LOG(ERR, "(%s) Failed to update memory table", dev->path); } static int virtio_user_dev_setup(struct virtio_user_dev *dev) { if (dev->is_server) { if (dev->backend_type != VIRTIO_USER_BACKEND_VHOST_USER) { PMD_DRV_LOG(ERR, "Server mode only supports vhost-user!"); return -1; } } switch (dev->backend_type) { case VIRTIO_USER_BACKEND_VHOST_USER: dev->ops = &virtio_ops_user; break; case VIRTIO_USER_BACKEND_VHOST_KERNEL: dev->ops = &virtio_ops_kernel; break; case VIRTIO_USER_BACKEND_VHOST_VDPA: dev->ops = &virtio_ops_vdpa; break; default: PMD_DRV_LOG(ERR, "(%s) Unknown backend type", dev->path); return -1; } if (dev->ops->setup(dev) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to setup backend", dev->path); return -1; } return 0; } static int virtio_user_alloc_vrings(struct virtio_user_dev *dev) { int i, size, nr_vrings; bool packed_ring = !!(dev->device_features & (1ull << VIRTIO_F_RING_PACKED)); nr_vrings = dev->max_queue_pairs * 2; if (dev->frontend_features & (1ull << VIRTIO_NET_F_CTRL_VQ)) nr_vrings++; dev->callfds = rte_zmalloc("virtio_user_dev", nr_vrings * sizeof(*dev->callfds), 0); if (!dev->callfds) { PMD_INIT_LOG(ERR, "(%s) Failed to alloc callfds", dev->path); return -1; } dev->kickfds = rte_zmalloc("virtio_user_dev", nr_vrings * sizeof(*dev->kickfds), 0); if (!dev->kickfds) { PMD_INIT_LOG(ERR, "(%s) Failed to alloc kickfds", dev->path); goto free_callfds; } for (i = 0; i < nr_vrings; i++) { dev->callfds[i] = -1; dev->kickfds[i] = -1; } if (packed_ring) size = sizeof(*dev->vrings.packed); else size = sizeof(*dev->vrings.split); dev->vrings.ptr = rte_zmalloc("virtio_user_dev", nr_vrings * size, 0); if (!dev->vrings.ptr) { PMD_INIT_LOG(ERR, "(%s) Failed to alloc vrings metadata", dev->path); goto free_kickfds; } if (packed_ring) { dev->packed_queues = rte_zmalloc("virtio_user_dev", nr_vrings * sizeof(*dev->packed_queues), 0); if (!dev->packed_queues) { PMD_INIT_LOG(ERR, "(%s) Failed to alloc packed queues metadata", dev->path); goto free_vrings; } } dev->qp_enabled = rte_zmalloc("virtio_user_dev", dev->max_queue_pairs * sizeof(*dev->qp_enabled), 0); if (!dev->qp_enabled) { PMD_INIT_LOG(ERR, "(%s) Failed to alloc QP enable states", dev->path); goto free_packed_queues; } return 0; free_packed_queues: rte_free(dev->packed_queues); dev->packed_queues = NULL; free_vrings: rte_free(dev->vrings.ptr); dev->vrings.ptr = NULL; free_kickfds: rte_free(dev->kickfds); dev->kickfds = NULL; free_callfds: rte_free(dev->callfds); dev->callfds = NULL; return -1; } static void virtio_user_free_vrings(struct virtio_user_dev *dev) { rte_free(dev->qp_enabled); dev->qp_enabled = NULL; rte_free(dev->packed_queues); dev->packed_queues = NULL; rte_free(dev->vrings.ptr); dev->vrings.ptr = NULL; rte_free(dev->kickfds); dev->kickfds = NULL; rte_free(dev->callfds); dev->callfds = NULL; } /* Use below macro to filter features from vhost backend */ #define VIRTIO_USER_SUPPORTED_FEATURES \ (1ULL << VIRTIO_NET_F_MAC | \ 1ULL << VIRTIO_NET_F_STATUS | \ 1ULL << VIRTIO_NET_F_MQ | \ 1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR | \ 1ULL << VIRTIO_NET_F_CTRL_VQ | \ 1ULL << VIRTIO_NET_F_CTRL_RX | \ 1ULL << VIRTIO_NET_F_CTRL_VLAN | \ 1ULL << VIRTIO_NET_F_CSUM | \ 1ULL << VIRTIO_NET_F_HOST_TSO4 | \ 1ULL << VIRTIO_NET_F_HOST_TSO6 | \ 1ULL << VIRTIO_NET_F_MRG_RXBUF | \ 1ULL << VIRTIO_RING_F_INDIRECT_DESC | \ 1ULL << VIRTIO_NET_F_GUEST_CSUM | \ 1ULL << VIRTIO_NET_F_GUEST_TSO4 | \ 1ULL << VIRTIO_NET_F_GUEST_TSO6 | \ 1ULL << VIRTIO_F_IN_ORDER | \ 1ULL << VIRTIO_F_VERSION_1 | \ 1ULL << VIRTIO_F_RING_PACKED | \ 1ULL << VIRTIO_F_NOTIFICATION_DATA | \ 1ULL << VIRTIO_F_ORDER_PLATFORM | \ 1ULL << VIRTIO_NET_F_RSS) int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, uint16_t queues, int cq, int queue_size, const char *mac, char **ifname, int server, int mrg_rxbuf, int in_order, int packed_vq, enum virtio_user_backend_type backend_type) { uint64_t backend_features; pthread_mutex_init(&dev->mutex, NULL); strlcpy(dev->path, path, PATH_MAX); dev->started = 0; dev->queue_pairs = 1; /* mq disabled by default */ dev->max_queue_pairs = queues; /* initialize to user requested value for kernel backend */ dev->queue_size = queue_size; dev->is_server = server; dev->mac_specified = 0; dev->frontend_features = 0; dev->unsupported_features = 0; dev->backend_type = backend_type; dev->ifname = *ifname; if (virtio_user_dev_setup(dev) < 0) { PMD_INIT_LOG(ERR, "(%s) backend set up fails", dev->path); return -1; } if (dev->ops->set_owner(dev) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to set backend owner", dev->path); goto destroy; } if (dev->ops->get_backend_features(&backend_features) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to get backend features", dev->path); goto destroy; } dev->unsupported_features = ~(VIRTIO_USER_SUPPORTED_FEATURES | backend_features); if (dev->ops->get_features(dev, &dev->device_features) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to get device features", dev->path); goto destroy; } virtio_user_dev_init_mac(dev, mac); if (virtio_user_dev_init_max_queue_pairs(dev, queues)) dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ); if (dev->max_queue_pairs > 1 || dev->hw_cvq) cq = 1; if (!mrg_rxbuf) dev->unsupported_features |= (1ull << VIRTIO_NET_F_MRG_RXBUF); if (!in_order) dev->unsupported_features |= (1ull << VIRTIO_F_IN_ORDER); if (!packed_vq) dev->unsupported_features |= (1ull << VIRTIO_F_RING_PACKED); if (dev->mac_specified) dev->frontend_features |= (1ull << VIRTIO_NET_F_MAC); else dev->unsupported_features |= (1ull << VIRTIO_NET_F_MAC); if (cq) { /* Except for vDPA, the device does not really need to know * anything about CQ, so if necessary, we just claim to support * control queue. */ dev->frontend_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); } else { dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); /* Also disable features that depend on VIRTIO_NET_F_CTRL_VQ */ dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_RX); dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_VLAN); dev->unsupported_features |= (1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ); dev->unsupported_features |= (1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); } /* The backend will not report this feature, we add it explicitly */ if (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_USER) dev->frontend_features |= (1ull << VIRTIO_NET_F_STATUS); dev->frontend_features &= ~dev->unsupported_features; dev->device_features &= ~dev->unsupported_features; if (virtio_user_alloc_vrings(dev) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to allocate vring metadata", dev->path); goto destroy; } if (virtio_user_dev_init_notify(dev) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to init notifiers", dev->path); goto free_vrings; } if (virtio_user_fill_intr_handle(dev) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to init interrupt handler", dev->path); goto notify_uninit; } if (rte_mem_event_callback_register(VIRTIO_USER_MEM_EVENT_CLB_NAME, virtio_user_mem_event_cb, dev)) { if (rte_errno != ENOTSUP) { PMD_INIT_LOG(ERR, "(%s) Failed to register mem event callback", dev->path); goto notify_uninit; } } *ifname = NULL; return 0; notify_uninit: virtio_user_dev_uninit_notify(dev); free_vrings: virtio_user_free_vrings(dev); destroy: dev->ops->destroy(dev); return -1; } void virtio_user_dev_uninit(struct virtio_user_dev *dev) { struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id]; rte_intr_instance_free(eth_dev->intr_handle); eth_dev->intr_handle = NULL; virtio_user_stop_device(dev); rte_mem_event_callback_unregister(VIRTIO_USER_MEM_EVENT_CLB_NAME, dev); virtio_user_dev_uninit_notify(dev); virtio_user_free_vrings(dev); free(dev->ifname); if (dev->is_server) unlink(dev->path); dev->ops->destroy(dev); } static uint8_t virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) { uint16_t i; uint8_t ret = 0; if (q_pairs > dev->max_queue_pairs) { PMD_INIT_LOG(ERR, "(%s) multi-q config %u, but only %u supported", dev->path, q_pairs, dev->max_queue_pairs); return -1; } for (i = 0; i < q_pairs; ++i) ret |= dev->ops->enable_qp(dev, i, 1); for (i = q_pairs; i < dev->max_queue_pairs; ++i) ret |= dev->ops->enable_qp(dev, i, 0); dev->queue_pairs = q_pairs; return ret; } #define CVQ_MAX_DATA_DESCS 32 static inline void * virtio_user_iova2virt(struct virtio_user_dev *dev, rte_iova_t iova) { if (rte_eal_iova_mode() == RTE_IOVA_VA || dev->hw.use_va) return (void *)(uintptr_t)iova; else return rte_mem_iova2virt(iova); } static uint32_t virtio_user_handle_ctrl_msg_split(struct virtio_user_dev *dev, struct vring *vring, uint16_t idx_hdr) { struct virtio_net_ctrl_hdr *hdr; virtio_net_ctrl_ack status = ~0; uint16_t i, idx_data, idx_status; uint32_t n_descs = 0; int dlen[CVQ_MAX_DATA_DESCS], nb_dlen = 0; /* locate desc for header, data, and status */ idx_data = vring->desc[idx_hdr].next; n_descs++; i = idx_data; while (vring->desc[i].flags == VRING_DESC_F_NEXT) { dlen[nb_dlen++] = vring->desc[i].len; i = vring->desc[i].next; n_descs++; } /* locate desc for status */ idx_status = i; n_descs++; hdr = virtio_user_iova2virt(dev, vring->desc[idx_hdr].addr); if (hdr->class == VIRTIO_NET_CTRL_MQ && hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { uint16_t queues, *addr; addr = virtio_user_iova2virt(dev, vring->desc[idx_data].addr); queues = *addr; status = virtio_user_handle_mq(dev, queues); } else if (hdr->class == VIRTIO_NET_CTRL_MQ && hdr->cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { struct virtio_net_ctrl_rss *rss; rss = virtio_user_iova2virt(dev, vring->desc[idx_data].addr); status = virtio_user_handle_mq(dev, rss->max_tx_vq); } else if (hdr->class == VIRTIO_NET_CTRL_RX || hdr->class == VIRTIO_NET_CTRL_MAC || hdr->class == VIRTIO_NET_CTRL_VLAN) { status = 0; } if (!status && dev->scvq) status = virtio_send_command(&dev->scvq->cq, (struct virtio_pmd_ctrl *)hdr, dlen, nb_dlen); /* Update status */ *(virtio_net_ctrl_ack *)virtio_user_iova2virt(dev, vring->desc[idx_status].addr) = status; return n_descs; } static inline int desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter) { uint16_t flags = rte_atomic_load_explicit(&desc->flags, rte_memory_order_acquire); return wrap_counter == !!(flags & VRING_PACKED_DESC_F_AVAIL) && wrap_counter != !!(flags & VRING_PACKED_DESC_F_USED); } static uint32_t virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev, struct vring_packed *vring, uint16_t idx_hdr) { struct virtio_net_ctrl_hdr *hdr; virtio_net_ctrl_ack status = ~0; uint16_t idx_data, idx_status; /* initialize to one, header is first */ uint32_t n_descs = 1; int dlen[CVQ_MAX_DATA_DESCS], nb_dlen = 0; /* locate desc for header, data, and status */ idx_data = idx_hdr + 1; if (idx_data >= dev->queue_size) idx_data -= dev->queue_size; n_descs++; idx_status = idx_data; while (vring->desc[idx_status].flags & VRING_DESC_F_NEXT) { dlen[nb_dlen++] = vring->desc[idx_status].len; idx_status++; if (idx_status >= dev->queue_size) idx_status -= dev->queue_size; n_descs++; } hdr = virtio_user_iova2virt(dev, vring->desc[idx_hdr].addr); if (hdr->class == VIRTIO_NET_CTRL_MQ && hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { uint16_t queues, *addr; addr = virtio_user_iova2virt(dev, vring->desc[idx_data].addr); queues = *addr; status = virtio_user_handle_mq(dev, queues); } else if (hdr->class == VIRTIO_NET_CTRL_MQ && hdr->cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { struct virtio_net_ctrl_rss *rss; rss = virtio_user_iova2virt(dev, vring->desc[idx_data].addr); status = virtio_user_handle_mq(dev, rss->max_tx_vq); } else if (hdr->class == VIRTIO_NET_CTRL_RX || hdr->class == VIRTIO_NET_CTRL_MAC || hdr->class == VIRTIO_NET_CTRL_VLAN) { status = 0; } if (!status && dev->scvq) status = virtio_send_command(&dev->scvq->cq, (struct virtio_pmd_ctrl *)hdr, dlen, nb_dlen); /* Update status */ *(virtio_net_ctrl_ack *)virtio_user_iova2virt(dev, vring->desc[idx_status].addr) = status; /* Update used descriptor */ vring->desc[idx_hdr].id = vring->desc[idx_status].id; vring->desc[idx_hdr].len = sizeof(status); return n_descs; } static void virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx) { struct virtio_user_queue *vq = &dev->packed_queues[queue_idx]; struct vring_packed *vring = &dev->vrings.packed[queue_idx]; uint16_t n_descs, flags; /* Perform a load-acquire barrier in desc_is_avail to * enforce the ordering between desc flags and desc * content. */ while (desc_is_avail(&vring->desc[vq->used_idx], vq->used_wrap_counter)) { n_descs = virtio_user_handle_ctrl_msg_packed(dev, vring, vq->used_idx); flags = VRING_DESC_F_WRITE; if (vq->used_wrap_counter) flags |= VRING_PACKED_DESC_F_AVAIL_USED; rte_atomic_store_explicit(&vring->desc[vq->used_idx].flags, flags, rte_memory_order_release); vq->used_idx += n_descs; if (vq->used_idx >= dev->queue_size) { vq->used_idx -= dev->queue_size; vq->used_wrap_counter ^= 1; } } } static void virtio_user_handle_cq_split(struct virtio_user_dev *dev, uint16_t queue_idx) { uint16_t avail_idx, desc_idx; struct vring_used_elem *uep; uint32_t n_descs; struct vring *vring = &dev->vrings.split[queue_idx]; /* Consume avail ring, using used ring idx as first one */ while (rte_atomic_load_explicit(&vring->used->idx, rte_memory_order_relaxed) != vring->avail->idx) { avail_idx = rte_atomic_load_explicit(&vring->used->idx, rte_memory_order_relaxed) & (vring->num - 1); desc_idx = vring->avail->ring[avail_idx]; n_descs = virtio_user_handle_ctrl_msg_split(dev, vring, desc_idx); /* Update used ring */ uep = &vring->used->ring[avail_idx]; uep->id = desc_idx; uep->len = n_descs; rte_atomic_fetch_add_explicit(&vring->used->idx, 1, rte_memory_order_relaxed); } } void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) { if (virtio_with_packed_queue(&dev->hw)) virtio_user_handle_cq_packed(dev, queue_idx); else virtio_user_handle_cq_split(dev, queue_idx); } static void virtio_user_control_queue_notify(struct virtqueue *vq, void *cookie) { struct virtio_user_dev *dev = cookie; uint64_t notify_data = 1; if (!dev->notify_area) { if (write(dev->kickfds[vq->vq_queue_index], ¬ify_data, sizeof(notify_data)) < 0) PMD_DRV_LOG(ERR, "failed to kick backend: %s", strerror(errno)); return; } else if (!virtio_with_feature(&dev->hw, VIRTIO_F_NOTIFICATION_DATA)) { rte_write16(vq->vq_queue_index, vq->notify_addr); return; } if (virtio_with_packed_queue(&dev->hw)) { /* Bit[0:15]: vq queue index * Bit[16:30]: avail index * Bit[31]: avail wrap counter */ notify_data = ((uint32_t)(!!(vq->vq_packed.cached_flags & VRING_PACKED_DESC_F_AVAIL)) << 31) | ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index; } else { /* Bit[0:15]: vq queue index * Bit[16:31]: avail index */ notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index; } rte_write32(notify_data, vq->notify_addr); } int virtio_user_dev_create_shadow_cvq(struct virtio_user_dev *dev, struct virtqueue *vq) { char name[VIRTQUEUE_MAX_NAME_SZ]; struct virtqueue *scvq; snprintf(name, sizeof(name), "port%d_shadow_cvq", vq->hw->port_id); scvq = virtqueue_alloc(&dev->hw, vq->vq_queue_index, vq->vq_nentries, VTNET_CQ, SOCKET_ID_ANY, name); if (!scvq) { PMD_INIT_LOG(ERR, "(%s) Failed to alloc shadow control vq", dev->path); return -ENOMEM; } scvq->cq.notify_queue = &virtio_user_control_queue_notify; scvq->cq.notify_cookie = dev; scvq->notify_addr = vq->notify_addr; dev->scvq = scvq; return 0; } void virtio_user_dev_destroy_shadow_cvq(struct virtio_user_dev *dev) { if (!dev->scvq) return; virtqueue_free(dev->scvq); dev->scvq = NULL; } int virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status) { int ret; pthread_mutex_lock(&dev->mutex); dev->status = status; ret = dev->ops->set_status(dev, status); if (ret && ret != -ENOTSUP) PMD_INIT_LOG(ERR, "(%s) Failed to set backend status", dev->path); pthread_mutex_unlock(&dev->mutex); return ret; } int virtio_user_dev_update_status(struct virtio_user_dev *dev) { int ret; uint8_t status; pthread_mutex_lock(&dev->mutex); ret = dev->ops->get_status(dev, &status); if (!ret) { dev->status = status; PMD_INIT_LOG(DEBUG, "Updated Device Status(0x%08x):", dev->status); PMD_INIT_LOG(DEBUG, "\t-RESET: %u", (dev->status == VIRTIO_CONFIG_STATUS_RESET)); PMD_INIT_LOG(DEBUG, "\t-ACKNOWLEDGE: %u", !!(dev->status & VIRTIO_CONFIG_STATUS_ACK)); PMD_INIT_LOG(DEBUG, "\t-DRIVER: %u", !!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER)); PMD_INIT_LOG(DEBUG, "\t-DRIVER_OK: %u", !!(dev->status & VIRTIO_CONFIG_STATUS_DRIVER_OK)); PMD_INIT_LOG(DEBUG, "\t-FEATURES_OK: %u", !!(dev->status & VIRTIO_CONFIG_STATUS_FEATURES_OK)); PMD_INIT_LOG(DEBUG, "\t-DEVICE_NEED_RESET: %u", !!(dev->status & VIRTIO_CONFIG_STATUS_DEV_NEED_RESET)); PMD_INIT_LOG(DEBUG, "\t-FAILED: %u", !!(dev->status & VIRTIO_CONFIG_STATUS_FAILED)); } else if (ret != -ENOTSUP) { PMD_INIT_LOG(ERR, "(%s) Failed to get backend status", dev->path); } pthread_mutex_unlock(&dev->mutex); return ret; } int virtio_user_dev_update_link_state(struct virtio_user_dev *dev) { if (dev->ops->update_link_state) return dev->ops->update_link_state(dev); return 0; } static void virtio_user_dev_reset_queues_packed(struct rte_eth_dev *eth_dev) { struct virtio_user_dev *dev = eth_dev->data->dev_private; struct virtio_hw *hw = &dev->hw; struct virtnet_rx *rxvq; struct virtnet_tx *txvq; uint16_t i; /* Add lock to avoid queue contention. */ rte_spinlock_lock(&hw->state_lock); hw->started = 0; /* * Waiting for datapath to complete before resetting queues. * 1 ms should be enough for the ongoing Tx/Rx function to finish. */ rte_delay_ms(1); /* Vring reset for each Tx queue and Rx queue. */ for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { rxvq = eth_dev->data->rx_queues[i]; virtqueue_rxvq_reset_packed(virtnet_rxq_to_vq(rxvq)); virtio_dev_rx_queue_setup_finish(eth_dev, i); } for (i = 0; i < eth_dev->data->nb_tx_queues; i++) { txvq = eth_dev->data->tx_queues[i]; virtqueue_txvq_reset_packed(virtnet_txq_to_vq(txvq)); } hw->started = 1; rte_spinlock_unlock(&hw->state_lock); } void virtio_user_dev_delayed_disconnect_handler(void *param) { struct virtio_user_dev *dev = param; struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id]; if (rte_intr_disable(eth_dev->intr_handle) < 0) { PMD_DRV_LOG(ERR, "interrupt disable failed"); return; } PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d", rte_intr_fd_get(eth_dev->intr_handle)); if (rte_intr_callback_unregister(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev) != 1) PMD_DRV_LOG(ERR, "interrupt unregister failed"); if (dev->is_server) { if (dev->ops->server_disconnect) dev->ops->server_disconnect(dev); rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev)); PMD_DRV_LOG(DEBUG, "Registering intr fd: %d", rte_intr_fd_get(eth_dev->intr_handle)); if (rte_intr_callback_register(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev)) PMD_DRV_LOG(ERR, "interrupt register failed"); if (rte_intr_enable(eth_dev->intr_handle) < 0) { PMD_DRV_LOG(ERR, "interrupt enable failed"); return; } } } static void virtio_user_dev_delayed_intr_reconfig_handler(void *param) { struct virtio_user_dev *dev = param; struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id]; PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d", rte_intr_fd_get(eth_dev->intr_handle)); if (rte_intr_callback_unregister(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev) != 1) PMD_DRV_LOG(ERR, "interrupt unregister failed"); rte_intr_fd_set(eth_dev->intr_handle, dev->ops->get_intr_fd(dev)); PMD_DRV_LOG(DEBUG, "Registering intr fd: %d", rte_intr_fd_get(eth_dev->intr_handle)); if (rte_intr_callback_register(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev)) PMD_DRV_LOG(ERR, "interrupt register failed"); if (rte_intr_enable(eth_dev->intr_handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); } int virtio_user_dev_server_reconnect(struct virtio_user_dev *dev) { int ret, old_status; struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->hw.port_id]; struct virtio_hw *hw = &dev->hw; if (!dev->ops->server_reconnect) { PMD_DRV_LOG(ERR, "(%s) Missing server reconnect callback", dev->path); return -1; } if (dev->ops->server_reconnect(dev)) { PMD_DRV_LOG(ERR, "(%s) Reconnect callback call failed", dev->path); return -1; } old_status = dev->status; virtio_reset(hw); virtio_set_status(hw, VIRTIO_CONFIG_STATUS_ACK); virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); if (dev->ops->get_features(dev, &dev->device_features) < 0) { PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno)); return -1; } /* unmask vhost-user unsupported features */ dev->device_features &= ~(dev->unsupported_features); dev->features &= (dev->device_features | dev->frontend_features); /* For packed ring, resetting queues is required in reconnection. */ if (virtio_with_packed_queue(hw) && (old_status & VIRTIO_CONFIG_STATUS_DRIVER_OK)) { PMD_INIT_LOG(NOTICE, "Packets on the fly will be dropped" " when packed ring reconnecting."); virtio_user_dev_reset_queues_packed(eth_dev); } virtio_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK); /* Start the device */ virtio_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK); if (!dev->started) return -1; if (dev->queue_pairs > 1) { ret = virtio_user_handle_mq(dev, dev->queue_pairs); if (ret != 0) { PMD_INIT_LOG(ERR, "Fails to enable multi-queue pairs!"); return -1; } } if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { if (rte_intr_disable(eth_dev->intr_handle) < 0) { PMD_DRV_LOG(ERR, "interrupt disable failed"); return -1; } /* * This function can be called from the interrupt handler, so * we can't unregister interrupt handler here. Setting * alarm to do that later. */ rte_eal_alarm_set(1, virtio_user_dev_delayed_intr_reconfig_handler, (void *)dev); } PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!"); return 0; }