145ef232aSNipun Gupta /* SPDX-License-Identifier: BSD-3-Clause 245ef232aSNipun Gupta * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. 345ef232aSNipun Gupta */ 445ef232aSNipun Gupta 545ef232aSNipun Gupta /** 645ef232aSNipun Gupta * @file 745ef232aSNipun Gupta * CDX probing using Linux VFIO. 845ef232aSNipun Gupta * 945ef232aSNipun Gupta * This code tries to determine if the CDX device is bound to VFIO driver, 1045ef232aSNipun Gupta * and initialize it (map MMIO regions, set up interrupts) if that's the case. 1145ef232aSNipun Gupta * 1245ef232aSNipun Gupta */ 1345ef232aSNipun Gupta 1445ef232aSNipun Gupta #include <fcntl.h> 1545ef232aSNipun Gupta #include <unistd.h> 1645ef232aSNipun Gupta #include <sys/eventfd.h> 1745ef232aSNipun Gupta #include <sys/socket.h> 1845ef232aSNipun Gupta #include <sys/ioctl.h> 1945ef232aSNipun Gupta #include <sys/mman.h> 2045ef232aSNipun Gupta #include <rte_eal_paging.h> 2145ef232aSNipun Gupta #include <rte_malloc.h> 2245ef232aSNipun Gupta #include <rte_vfio.h> 2345ef232aSNipun Gupta 2445ef232aSNipun Gupta #include "bus_cdx_driver.h" 2545ef232aSNipun Gupta #include "cdx_logs.h" 2645ef232aSNipun Gupta #include "private.h" 2745ef232aSNipun Gupta 2845ef232aSNipun Gupta /** 2945ef232aSNipun Gupta * A structure describing a CDX mapping. 3045ef232aSNipun Gupta */ 3145ef232aSNipun Gupta struct cdx_map { 3245ef232aSNipun Gupta void *addr; 3345ef232aSNipun Gupta char *path; 3445ef232aSNipun Gupta uint64_t offset; 3545ef232aSNipun Gupta uint64_t size; 3645ef232aSNipun Gupta }; 3745ef232aSNipun Gupta 3845ef232aSNipun Gupta /** 3945ef232aSNipun Gupta * A structure describing a mapped CDX resource. 4045ef232aSNipun Gupta * For multi-process we need to reproduce all CDX mappings in secondary 4145ef232aSNipun Gupta * processes, so save them in a tailq. 4245ef232aSNipun Gupta */ 4345ef232aSNipun Gupta struct mapped_cdx_resource { 4445ef232aSNipun Gupta TAILQ_ENTRY(mapped_cdx_resource) next; 4545ef232aSNipun Gupta char name[RTE_DEV_NAME_MAX_LEN]; /**< CDX device name */ 4645ef232aSNipun Gupta char path[PATH_MAX]; 4745ef232aSNipun Gupta int nb_maps; 4845ef232aSNipun Gupta struct cdx_map maps[RTE_CDX_MAX_RESOURCE]; 4945ef232aSNipun Gupta }; 5045ef232aSNipun Gupta 5145ef232aSNipun Gupta /** mapped cdx device list */ 5245ef232aSNipun Gupta TAILQ_HEAD(mapped_cdx_res_list, mapped_cdx_resource); 5345ef232aSNipun Gupta 54f29fb5caSNipun Gupta /* IRQ set buffer length for MSI interrupts */ 55f29fb5caSNipun Gupta #define MSI_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ 56f29fb5caSNipun Gupta sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) 57f29fb5caSNipun Gupta 5845ef232aSNipun Gupta static struct rte_tailq_elem cdx_vfio_tailq = { 5945ef232aSNipun Gupta .name = "VFIO_CDX_RESOURCE_LIST", 6045ef232aSNipun Gupta }; 6145ef232aSNipun Gupta EAL_REGISTER_TAILQ(cdx_vfio_tailq) 6245ef232aSNipun Gupta 6345ef232aSNipun Gupta static struct mapped_cdx_resource * 6445ef232aSNipun Gupta cdx_vfio_find_and_unmap_resource(struct mapped_cdx_res_list *vfio_res_list, 6545ef232aSNipun Gupta struct rte_cdx_device *dev) 6645ef232aSNipun Gupta { 6745ef232aSNipun Gupta struct mapped_cdx_resource *vfio_res = NULL; 6845ef232aSNipun Gupta const char *dev_name = dev->device.name; 6945ef232aSNipun Gupta struct cdx_map *maps; 7045ef232aSNipun Gupta int i; 7145ef232aSNipun Gupta 7245ef232aSNipun Gupta /* Get vfio_res */ 7345ef232aSNipun Gupta TAILQ_FOREACH(vfio_res, vfio_res_list, next) { 7445ef232aSNipun Gupta if (strcmp(vfio_res->name, dev_name)) 7545ef232aSNipun Gupta continue; 7645ef232aSNipun Gupta break; 7745ef232aSNipun Gupta } 7845ef232aSNipun Gupta 7945ef232aSNipun Gupta if (vfio_res == NULL) 8045ef232aSNipun Gupta return vfio_res; 8145ef232aSNipun Gupta 8245ef232aSNipun Gupta CDX_BUS_INFO("Releasing CDX mapped resource for %s", dev_name); 8345ef232aSNipun Gupta 8445ef232aSNipun Gupta maps = vfio_res->maps; 8545ef232aSNipun Gupta for (i = 0; i < vfio_res->nb_maps; i++) { 8645ef232aSNipun Gupta if (maps[i].addr) { 8745ef232aSNipun Gupta CDX_BUS_DEBUG("Calling cdx_unmap_resource for %s at %p", 8845ef232aSNipun Gupta dev_name, maps[i].addr); 8945ef232aSNipun Gupta cdx_unmap_resource(maps[i].addr, maps[i].size); 9045ef232aSNipun Gupta } 9145ef232aSNipun Gupta } 9245ef232aSNipun Gupta 9345ef232aSNipun Gupta return vfio_res; 9445ef232aSNipun Gupta } 9545ef232aSNipun Gupta 9645ef232aSNipun Gupta static int 9745ef232aSNipun Gupta cdx_vfio_unmap_resource_primary(struct rte_cdx_device *dev) 9845ef232aSNipun Gupta { 9945ef232aSNipun Gupta char cdx_addr[PATH_MAX] = {0}; 10045ef232aSNipun Gupta struct mapped_cdx_resource *vfio_res = NULL; 10145ef232aSNipun Gupta struct mapped_cdx_res_list *vfio_res_list; 102f29fb5caSNipun Gupta int ret, vfio_dev_fd; 103f29fb5caSNipun Gupta 104250b2b38SNikhil Agarwal if (rte_intr_fd_get(dev->intr_handle) >= 0) { 1052284d672SShubham Rohila if (rte_cdx_vfio_bm_disable(dev) < 0) 1062284d672SShubham Rohila CDX_BUS_ERR("Error when disabling bus master for %s", 1072284d672SShubham Rohila dev->device.name); 1082284d672SShubham Rohila 109f29fb5caSNipun Gupta if (close(rte_intr_fd_get(dev->intr_handle)) < 0) { 110f29fb5caSNipun Gupta CDX_BUS_ERR("Error when closing eventfd file descriptor for %s", 111f29fb5caSNipun Gupta dev->device.name); 112f29fb5caSNipun Gupta return -1; 113f29fb5caSNipun Gupta } 114250b2b38SNikhil Agarwal } 115f29fb5caSNipun Gupta 116f29fb5caSNipun Gupta vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle); 117f29fb5caSNipun Gupta if (vfio_dev_fd < 0) 118f29fb5caSNipun Gupta return -1; 119f29fb5caSNipun Gupta 120f29fb5caSNipun Gupta ret = rte_vfio_release_device(RTE_CDX_BUS_DEVICES_PATH, dev->device.name, 121f29fb5caSNipun Gupta vfio_dev_fd); 122f29fb5caSNipun Gupta if (ret < 0) { 123f29fb5caSNipun Gupta CDX_BUS_ERR("Cannot release VFIO device"); 124f29fb5caSNipun Gupta return ret; 125f29fb5caSNipun Gupta } 12645ef232aSNipun Gupta 12745ef232aSNipun Gupta vfio_res_list = 12845ef232aSNipun Gupta RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); 12945ef232aSNipun Gupta vfio_res = cdx_vfio_find_and_unmap_resource(vfio_res_list, dev); 13045ef232aSNipun Gupta 13145ef232aSNipun Gupta /* if we haven't found our tailq entry, something's wrong */ 13245ef232aSNipun Gupta if (vfio_res == NULL) { 13345ef232aSNipun Gupta CDX_BUS_ERR("%s cannot find TAILQ entry for cdx device!", 13445ef232aSNipun Gupta cdx_addr); 13545ef232aSNipun Gupta return -1; 13645ef232aSNipun Gupta } 13745ef232aSNipun Gupta 13845ef232aSNipun Gupta TAILQ_REMOVE(vfio_res_list, vfio_res, next); 13945ef232aSNipun Gupta rte_free(vfio_res); 14045ef232aSNipun Gupta return 0; 14145ef232aSNipun Gupta } 14245ef232aSNipun Gupta 14345ef232aSNipun Gupta static int 14445ef232aSNipun Gupta cdx_vfio_unmap_resource_secondary(struct rte_cdx_device *dev) 14545ef232aSNipun Gupta { 14645ef232aSNipun Gupta struct mapped_cdx_resource *vfio_res = NULL; 14745ef232aSNipun Gupta struct mapped_cdx_res_list *vfio_res_list; 148f29fb5caSNipun Gupta int ret, vfio_dev_fd; 149f29fb5caSNipun Gupta 150f29fb5caSNipun Gupta vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle); 151f29fb5caSNipun Gupta if (vfio_dev_fd < 0) 152f29fb5caSNipun Gupta return -1; 153f29fb5caSNipun Gupta 154f29fb5caSNipun Gupta ret = rte_vfio_release_device(RTE_CDX_BUS_DEVICES_PATH, dev->device.name, 155f29fb5caSNipun Gupta vfio_dev_fd); 156f29fb5caSNipun Gupta if (ret < 0) { 157f29fb5caSNipun Gupta CDX_BUS_ERR("Cannot release VFIO device"); 158f29fb5caSNipun Gupta return ret; 159f29fb5caSNipun Gupta } 16045ef232aSNipun Gupta 16145ef232aSNipun Gupta vfio_res_list = 16245ef232aSNipun Gupta RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); 16345ef232aSNipun Gupta vfio_res = cdx_vfio_find_and_unmap_resource(vfio_res_list, dev); 16445ef232aSNipun Gupta 16545ef232aSNipun Gupta /* if we haven't found our tailq entry, something's wrong */ 16645ef232aSNipun Gupta if (vfio_res == NULL) { 16745ef232aSNipun Gupta CDX_BUS_ERR("%s cannot find TAILQ entry for CDX device!", 16845ef232aSNipun Gupta dev->device.name); 16945ef232aSNipun Gupta return -1; 17045ef232aSNipun Gupta } 17145ef232aSNipun Gupta 17245ef232aSNipun Gupta return 0; 17345ef232aSNipun Gupta } 17445ef232aSNipun Gupta 17545ef232aSNipun Gupta int 17645ef232aSNipun Gupta cdx_vfio_unmap_resource(struct rte_cdx_device *dev) 17745ef232aSNipun Gupta { 17845ef232aSNipun Gupta if (rte_eal_process_type() == RTE_PROC_PRIMARY) 17945ef232aSNipun Gupta return cdx_vfio_unmap_resource_primary(dev); 18045ef232aSNipun Gupta else 18145ef232aSNipun Gupta return cdx_vfio_unmap_resource_secondary(dev); 18245ef232aSNipun Gupta } 18345ef232aSNipun Gupta 184f29fb5caSNipun Gupta /* set up interrupt support (but not enable interrupts) */ 18545ef232aSNipun Gupta static int 186f29fb5caSNipun Gupta cdx_vfio_setup_interrupts(struct rte_cdx_device *dev, int vfio_dev_fd, 187f29fb5caSNipun Gupta int num_irqs) 18845ef232aSNipun Gupta { 189f29fb5caSNipun Gupta int i, ret; 190f29fb5caSNipun Gupta 191250b2b38SNikhil Agarwal if (rte_intr_dev_fd_set(dev->intr_handle, vfio_dev_fd)) 192250b2b38SNikhil Agarwal return -1; 193250b2b38SNikhil Agarwal 194f29fb5caSNipun Gupta if (num_irqs == 0) 195f29fb5caSNipun Gupta return 0; 196f29fb5caSNipun Gupta 197f29fb5caSNipun Gupta /* start from MSI interrupt type */ 198f29fb5caSNipun Gupta for (i = 0; i < num_irqs; i++) { 199f29fb5caSNipun Gupta struct vfio_irq_info irq = { .argsz = sizeof(irq) }; 200f29fb5caSNipun Gupta int fd = -1; 201f29fb5caSNipun Gupta 202f29fb5caSNipun Gupta irq.index = i; 203f29fb5caSNipun Gupta 204f29fb5caSNipun Gupta ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); 205f29fb5caSNipun Gupta if (ret < 0) { 206f29fb5caSNipun Gupta CDX_BUS_ERR("Cannot get VFIO IRQ info, error %i (%s)", 207f29fb5caSNipun Gupta errno, strerror(errno)); 208f29fb5caSNipun Gupta return -1; 209f29fb5caSNipun Gupta } 210f29fb5caSNipun Gupta 211f29fb5caSNipun Gupta /* if this vector cannot be used with eventfd, fail if we explicitly 212f29fb5caSNipun Gupta * specified interrupt type, otherwise continue 213f29fb5caSNipun Gupta */ 214f29fb5caSNipun Gupta if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) 215f29fb5caSNipun Gupta continue; 216f29fb5caSNipun Gupta 217f29fb5caSNipun Gupta /* Set nb_intr to the total number of interrupts */ 218f29fb5caSNipun Gupta if (rte_intr_event_list_update(dev->intr_handle, irq.count)) 219f29fb5caSNipun Gupta return -1; 220f29fb5caSNipun Gupta 221f29fb5caSNipun Gupta /* set up an eventfd for interrupts */ 222f29fb5caSNipun Gupta fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 223f29fb5caSNipun Gupta if (fd < 0) { 224f29fb5caSNipun Gupta CDX_BUS_ERR("Cannot set up eventfd, error %i (%s)", 225f29fb5caSNipun Gupta errno, strerror(errno)); 226f29fb5caSNipun Gupta return -1; 227f29fb5caSNipun Gupta } 228f29fb5caSNipun Gupta 229f29fb5caSNipun Gupta if (rte_intr_fd_set(dev->intr_handle, fd)) 230f29fb5caSNipun Gupta return -1; 231f29fb5caSNipun Gupta 232f29fb5caSNipun Gupta /* DPDK CDX bus currently supports only MSI-X */ 233f29fb5caSNipun Gupta if (rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_VFIO_MSIX)) 234f29fb5caSNipun Gupta return -1; 235f29fb5caSNipun Gupta 236f29fb5caSNipun Gupta return 0; 237f29fb5caSNipun Gupta } 238f29fb5caSNipun Gupta 239f29fb5caSNipun Gupta /* if we're here, we haven't found a suitable interrupt vector */ 240f29fb5caSNipun Gupta return -1; 241f29fb5caSNipun Gupta } 242f29fb5caSNipun Gupta 243f29fb5caSNipun Gupta static int 244f29fb5caSNipun Gupta cdx_vfio_setup_device(struct rte_cdx_device *dev, int vfio_dev_fd, 245f29fb5caSNipun Gupta int num_irqs) 246f29fb5caSNipun Gupta { 247f29fb5caSNipun Gupta if (cdx_vfio_setup_interrupts(dev, vfio_dev_fd, num_irqs) != 0) { 248f29fb5caSNipun Gupta CDX_BUS_ERR("Error setting up interrupts!"); 249f29fb5caSNipun Gupta return -1; 250f29fb5caSNipun Gupta } 251f29fb5caSNipun Gupta 25245ef232aSNipun Gupta /* 25345ef232aSNipun Gupta * Reset the device. If the device is not capable of resetting, 25445ef232aSNipun Gupta * then it updates errno as EINVAL. 25545ef232aSNipun Gupta */ 25645ef232aSNipun Gupta if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) && errno != EINVAL) { 25745ef232aSNipun Gupta CDX_BUS_ERR("Unable to reset device! Error: %d (%s)", errno, 25845ef232aSNipun Gupta strerror(errno)); 25945ef232aSNipun Gupta return -1; 26045ef232aSNipun Gupta } 26145ef232aSNipun Gupta 2622284d672SShubham Rohila /* 2632284d672SShubham Rohila * Enable Bus mastering for the device. errno is set as ENOTTY if 2642284d672SShubham Rohila * device does not support configuring bus master. 2652284d672SShubham Rohila */ 2662284d672SShubham Rohila if (rte_cdx_vfio_bm_enable(dev) && (errno != -ENOTTY)) { 2672284d672SShubham Rohila CDX_BUS_ERR("Bus master enable failure! Error: %d (%s)", errno, 2682284d672SShubham Rohila strerror(errno)); 2692284d672SShubham Rohila return -1; 2702284d672SShubham Rohila } 2712284d672SShubham Rohila 27245ef232aSNipun Gupta return 0; 27345ef232aSNipun Gupta } 27445ef232aSNipun Gupta 27545ef232aSNipun Gupta static int 27645ef232aSNipun Gupta cdx_vfio_mmap_resource(int vfio_dev_fd, struct mapped_cdx_resource *vfio_res, 27745ef232aSNipun Gupta int index, int additional_flags) 27845ef232aSNipun Gupta { 27945ef232aSNipun Gupta struct cdx_map *map = &vfio_res->maps[index]; 28045ef232aSNipun Gupta void *vaddr; 28145ef232aSNipun Gupta 28245ef232aSNipun Gupta if (map->size == 0) { 28345ef232aSNipun Gupta CDX_BUS_DEBUG("map size is 0, skip region %d", index); 28445ef232aSNipun Gupta return 0; 28545ef232aSNipun Gupta } 28645ef232aSNipun Gupta 28745ef232aSNipun Gupta /* reserve the address using an inaccessible mapping */ 28845ef232aSNipun Gupta vaddr = mmap(map->addr, map->size, 0, MAP_PRIVATE | 28945ef232aSNipun Gupta MAP_ANONYMOUS | additional_flags, -1, 0); 29045ef232aSNipun Gupta if (vaddr != MAP_FAILED) { 29145ef232aSNipun Gupta void *map_addr = NULL; 29245ef232aSNipun Gupta 29345ef232aSNipun Gupta if (map->size) { 29445ef232aSNipun Gupta /* actual map of first part */ 29545ef232aSNipun Gupta map_addr = cdx_map_resource(vaddr, vfio_dev_fd, 29645ef232aSNipun Gupta map->offset, map->size, 29745ef232aSNipun Gupta RTE_MAP_FORCE_ADDRESS); 29845ef232aSNipun Gupta } 29945ef232aSNipun Gupta 30045ef232aSNipun Gupta if (map_addr == NULL) { 30145ef232aSNipun Gupta munmap(vaddr, map->size); 30245ef232aSNipun Gupta vaddr = MAP_FAILED; 30345ef232aSNipun Gupta CDX_BUS_ERR("Failed to map cdx MMIO region %d", index); 30445ef232aSNipun Gupta return -1; 30545ef232aSNipun Gupta } 30645ef232aSNipun Gupta } else { 30745ef232aSNipun Gupta CDX_BUS_ERR("Failed to create inaccessible mapping for MMIO region %d", 30845ef232aSNipun Gupta index); 30945ef232aSNipun Gupta return -1; 31045ef232aSNipun Gupta } 31145ef232aSNipun Gupta 31245ef232aSNipun Gupta map->addr = vaddr; 31345ef232aSNipun Gupta return 0; 31445ef232aSNipun Gupta } 31545ef232aSNipun Gupta 31645ef232aSNipun Gupta /* 31745ef232aSNipun Gupta * region info may contain capability headers, so we need to keep reallocating 31845ef232aSNipun Gupta * the memory until we match allocated memory size with argsz. 31945ef232aSNipun Gupta */ 32045ef232aSNipun Gupta static int 32145ef232aSNipun Gupta cdx_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, 32245ef232aSNipun Gupta int region) 32345ef232aSNipun Gupta { 32445ef232aSNipun Gupta struct vfio_region_info *ri; 32545ef232aSNipun Gupta size_t argsz = sizeof(*ri); 32645ef232aSNipun Gupta int ret; 32745ef232aSNipun Gupta 32845ef232aSNipun Gupta ri = malloc(sizeof(*ri)); 32945ef232aSNipun Gupta if (ri == NULL) { 33045ef232aSNipun Gupta CDX_BUS_ERR("Cannot allocate memory for VFIO region info"); 33145ef232aSNipun Gupta return -1; 33245ef232aSNipun Gupta } 33345ef232aSNipun Gupta again: 33445ef232aSNipun Gupta memset(ri, 0, argsz); 33545ef232aSNipun Gupta ri->argsz = argsz; 33645ef232aSNipun Gupta ri->index = region; 33745ef232aSNipun Gupta 33845ef232aSNipun Gupta ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri); 33945ef232aSNipun Gupta if (ret < 0) { 34045ef232aSNipun Gupta free(ri); 34145ef232aSNipun Gupta return ret; 34245ef232aSNipun Gupta } 34345ef232aSNipun Gupta if (ri->argsz != argsz) { 34445ef232aSNipun Gupta struct vfio_region_info *tmp; 34545ef232aSNipun Gupta 34645ef232aSNipun Gupta argsz = ri->argsz; 34745ef232aSNipun Gupta tmp = realloc(ri, argsz); 34845ef232aSNipun Gupta 34945ef232aSNipun Gupta if (tmp == NULL) { 35045ef232aSNipun Gupta /* realloc failed but the ri is still there */ 35145ef232aSNipun Gupta free(ri); 35245ef232aSNipun Gupta CDX_BUS_ERR("Cannot reallocate memory for VFIO region info"); 35345ef232aSNipun Gupta return -1; 35445ef232aSNipun Gupta } 35545ef232aSNipun Gupta ri = tmp; 35645ef232aSNipun Gupta goto again; 35745ef232aSNipun Gupta } 35845ef232aSNipun Gupta *info = ri; 35945ef232aSNipun Gupta 36045ef232aSNipun Gupta return 0; 36145ef232aSNipun Gupta } 36245ef232aSNipun Gupta 36345ef232aSNipun Gupta static int 36445ef232aSNipun Gupta find_max_end_va(const struct rte_memseg_list *msl, void *arg) 36545ef232aSNipun Gupta { 36645ef232aSNipun Gupta size_t sz = msl->len; 36745ef232aSNipun Gupta void *end_va = RTE_PTR_ADD(msl->base_va, sz); 36845ef232aSNipun Gupta void **max_va = arg; 36945ef232aSNipun Gupta 37045ef232aSNipun Gupta if (*max_va < end_va) 37145ef232aSNipun Gupta *max_va = end_va; 37245ef232aSNipun Gupta return 0; 37345ef232aSNipun Gupta } 37445ef232aSNipun Gupta 37545ef232aSNipun Gupta static void * 37645ef232aSNipun Gupta cdx_find_max_end_va(void) 37745ef232aSNipun Gupta { 37845ef232aSNipun Gupta void *va = NULL; 37945ef232aSNipun Gupta 38045ef232aSNipun Gupta rte_memseg_list_walk(find_max_end_va, &va); 38145ef232aSNipun Gupta return va; 38245ef232aSNipun Gupta } 38345ef232aSNipun Gupta 38445ef232aSNipun Gupta static int 38545ef232aSNipun Gupta cdx_vfio_map_resource_primary(struct rte_cdx_device *dev) 38645ef232aSNipun Gupta { 38745ef232aSNipun Gupta struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 38845ef232aSNipun Gupta char cdx_addr[PATH_MAX] = {0}; 38945ef232aSNipun Gupta static void *cdx_map_addr; 39045ef232aSNipun Gupta struct mapped_cdx_resource *vfio_res = NULL; 39145ef232aSNipun Gupta struct mapped_cdx_res_list *vfio_res_list = 39245ef232aSNipun Gupta RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); 39345ef232aSNipun Gupta const char *dev_name = dev->device.name; 39445ef232aSNipun Gupta struct cdx_map *maps; 39545ef232aSNipun Gupta int vfio_dev_fd, i, ret; 39645ef232aSNipun Gupta 397f29fb5caSNipun Gupta if (rte_intr_fd_set(dev->intr_handle, -1)) 398f29fb5caSNipun Gupta return -1; 399f29fb5caSNipun Gupta 40045ef232aSNipun Gupta ret = rte_vfio_setup_device(RTE_CDX_BUS_DEVICES_PATH, dev_name, 40145ef232aSNipun Gupta &vfio_dev_fd, &device_info); 40245ef232aSNipun Gupta if (ret) 40345ef232aSNipun Gupta return ret; 40445ef232aSNipun Gupta 40545ef232aSNipun Gupta /* allocate vfio_res and get region info */ 40645ef232aSNipun Gupta vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); 40745ef232aSNipun Gupta if (vfio_res == NULL) { 40845ef232aSNipun Gupta CDX_BUS_ERR("Cannot store VFIO mmap details"); 40945ef232aSNipun Gupta goto err_vfio_dev_fd; 41045ef232aSNipun Gupta } 41145ef232aSNipun Gupta memcpy(vfio_res->name, dev_name, RTE_DEV_NAME_MAX_LEN); 41245ef232aSNipun Gupta 41345ef232aSNipun Gupta /* get number of registers */ 41445ef232aSNipun Gupta vfio_res->nb_maps = device_info.num_regions; 41545ef232aSNipun Gupta 41645ef232aSNipun Gupta /* map memory regions */ 41745ef232aSNipun Gupta maps = vfio_res->maps; 41845ef232aSNipun Gupta 41945ef232aSNipun Gupta for (i = 0; i < vfio_res->nb_maps; i++) { 42045ef232aSNipun Gupta struct vfio_region_info *reg = NULL; 42145ef232aSNipun Gupta void *vaddr; 42245ef232aSNipun Gupta 42345ef232aSNipun Gupta ret = cdx_vfio_get_region_info(vfio_dev_fd, ®, i); 42445ef232aSNipun Gupta if (ret < 0) { 42545ef232aSNipun Gupta CDX_BUS_ERR("%s cannot get device region info error %i (%s)", 42645ef232aSNipun Gupta dev_name, errno, strerror(errno)); 42745ef232aSNipun Gupta goto err_vfio_res; 42845ef232aSNipun Gupta } 42945ef232aSNipun Gupta 43045ef232aSNipun Gupta /* skip non-mmappable regions */ 43145ef232aSNipun Gupta if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { 43245ef232aSNipun Gupta free(reg); 43345ef232aSNipun Gupta continue; 43445ef232aSNipun Gupta } 43545ef232aSNipun Gupta 43645ef232aSNipun Gupta /* try mapping somewhere close to the end of hugepages */ 43745ef232aSNipun Gupta if (cdx_map_addr == NULL) 43845ef232aSNipun Gupta cdx_map_addr = cdx_find_max_end_va(); 43945ef232aSNipun Gupta 44045ef232aSNipun Gupta vaddr = cdx_map_addr; 44145ef232aSNipun Gupta cdx_map_addr = RTE_PTR_ADD(vaddr, (size_t)reg->size); 44245ef232aSNipun Gupta 44345ef232aSNipun Gupta cdx_map_addr = RTE_PTR_ALIGN(cdx_map_addr, 44445ef232aSNipun Gupta sysconf(_SC_PAGE_SIZE)); 44545ef232aSNipun Gupta 44645ef232aSNipun Gupta maps[i].addr = vaddr; 44745ef232aSNipun Gupta maps[i].offset = reg->offset; 44845ef232aSNipun Gupta maps[i].size = reg->size; 44945ef232aSNipun Gupta maps[i].path = NULL; /* vfio doesn't have per-resource paths */ 45045ef232aSNipun Gupta 45145ef232aSNipun Gupta ret = cdx_vfio_mmap_resource(vfio_dev_fd, vfio_res, i, 0); 45245ef232aSNipun Gupta if (ret < 0) { 45345ef232aSNipun Gupta CDX_BUS_ERR("%s mapping region %i failed: %s", 45445ef232aSNipun Gupta cdx_addr, i, strerror(errno)); 45545ef232aSNipun Gupta free(reg); 45645ef232aSNipun Gupta goto err_vfio_res; 45745ef232aSNipun Gupta } 45845ef232aSNipun Gupta 45945ef232aSNipun Gupta dev->mem_resource[i].addr = maps[i].addr; 46045ef232aSNipun Gupta dev->mem_resource[i].len = maps[i].size; 46145ef232aSNipun Gupta 46245ef232aSNipun Gupta free(reg); 46345ef232aSNipun Gupta } 46445ef232aSNipun Gupta 465f29fb5caSNipun Gupta if (cdx_vfio_setup_device(dev, vfio_dev_fd, device_info.num_irqs) < 0) { 46645ef232aSNipun Gupta CDX_BUS_ERR("%s setup device failed", dev_name); 46745ef232aSNipun Gupta goto err_vfio_res; 46845ef232aSNipun Gupta } 46945ef232aSNipun Gupta 47045ef232aSNipun Gupta TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); 47145ef232aSNipun Gupta 47245ef232aSNipun Gupta return 0; 47345ef232aSNipun Gupta err_vfio_res: 47445ef232aSNipun Gupta cdx_vfio_find_and_unmap_resource(vfio_res_list, dev); 47545ef232aSNipun Gupta rte_free(vfio_res); 47645ef232aSNipun Gupta err_vfio_dev_fd: 47745ef232aSNipun Gupta rte_vfio_release_device(RTE_CDX_BUS_DEVICES_PATH, dev_name, vfio_dev_fd); 47845ef232aSNipun Gupta return -1; 47945ef232aSNipun Gupta } 48045ef232aSNipun Gupta 48145ef232aSNipun Gupta static int 48245ef232aSNipun Gupta cdx_vfio_map_resource_secondary(struct rte_cdx_device *dev) 48345ef232aSNipun Gupta { 48445ef232aSNipun Gupta struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 48545ef232aSNipun Gupta char cdx_addr[PATH_MAX] = {0}; 48645ef232aSNipun Gupta int vfio_dev_fd; 48745ef232aSNipun Gupta int i, ret; 48845ef232aSNipun Gupta struct mapped_cdx_resource *vfio_res = NULL; 48945ef232aSNipun Gupta struct mapped_cdx_res_list *vfio_res_list = 49045ef232aSNipun Gupta RTE_TAILQ_CAST(cdx_vfio_tailq.head, mapped_cdx_res_list); 49145ef232aSNipun Gupta const char *dev_name = dev->device.name; 49245ef232aSNipun Gupta struct cdx_map *maps; 49345ef232aSNipun Gupta 494f29fb5caSNipun Gupta if (rte_intr_fd_set(dev->intr_handle, -1)) 495f29fb5caSNipun Gupta return -1; 496f29fb5caSNipun Gupta 49745ef232aSNipun Gupta /* if we're in a secondary process, just find our tailq entry */ 49845ef232aSNipun Gupta TAILQ_FOREACH(vfio_res, vfio_res_list, next) { 49945ef232aSNipun Gupta if (strcmp(vfio_res->name, dev_name)) 50045ef232aSNipun Gupta continue; 50145ef232aSNipun Gupta break; 50245ef232aSNipun Gupta } 50345ef232aSNipun Gupta /* if we haven't found our tailq entry, something's wrong */ 50445ef232aSNipun Gupta if (vfio_res == NULL) { 50545ef232aSNipun Gupta CDX_BUS_ERR("%s cannot find TAILQ entry for cdx device!", 50645ef232aSNipun Gupta dev_name); 50745ef232aSNipun Gupta return -1; 50845ef232aSNipun Gupta } 50945ef232aSNipun Gupta 51045ef232aSNipun Gupta ret = rte_vfio_setup_device(RTE_CDX_BUS_DEVICES_PATH, dev_name, 51145ef232aSNipun Gupta &vfio_dev_fd, &device_info); 51245ef232aSNipun Gupta if (ret) 51345ef232aSNipun Gupta return ret; 51445ef232aSNipun Gupta 51545ef232aSNipun Gupta /* map MMIO regions */ 51645ef232aSNipun Gupta maps = vfio_res->maps; 51745ef232aSNipun Gupta 51845ef232aSNipun Gupta for (i = 0; i < vfio_res->nb_maps; i++) { 51945ef232aSNipun Gupta ret = cdx_vfio_mmap_resource(vfio_dev_fd, vfio_res, i, MAP_FIXED); 52045ef232aSNipun Gupta if (ret < 0) { 52145ef232aSNipun Gupta CDX_BUS_ERR("%s mapping MMIO region %i failed: %s", 52245ef232aSNipun Gupta dev_name, i, strerror(errno)); 52345ef232aSNipun Gupta goto err_vfio_dev_fd; 52445ef232aSNipun Gupta } 52545ef232aSNipun Gupta 52645ef232aSNipun Gupta dev->mem_resource[i].addr = maps[i].addr; 52745ef232aSNipun Gupta dev->mem_resource[i].len = maps[i].size; 52845ef232aSNipun Gupta } 52945ef232aSNipun Gupta 530f29fb5caSNipun Gupta /* we need save vfio_dev_fd, so it can be used during release */ 531f29fb5caSNipun Gupta if (rte_intr_dev_fd_set(dev->intr_handle, vfio_dev_fd)) 532f29fb5caSNipun Gupta goto err_vfio_dev_fd; 533f29fb5caSNipun Gupta 53445ef232aSNipun Gupta return 0; 53545ef232aSNipun Gupta err_vfio_dev_fd: 53645ef232aSNipun Gupta rte_vfio_release_device(RTE_CDX_BUS_DEVICES_PATH, cdx_addr, vfio_dev_fd); 53745ef232aSNipun Gupta return -1; 53845ef232aSNipun Gupta } 53945ef232aSNipun Gupta 54045ef232aSNipun Gupta /* 54145ef232aSNipun Gupta * map the CDX resources of a CDX device in virtual memory (VFIO version). 54245ef232aSNipun Gupta * primary and secondary processes follow almost exactly the same path 54345ef232aSNipun Gupta */ 54445ef232aSNipun Gupta int 54545ef232aSNipun Gupta cdx_vfio_map_resource(struct rte_cdx_device *dev) 54645ef232aSNipun Gupta { 54745ef232aSNipun Gupta if (rte_eal_process_type() == RTE_PROC_PRIMARY) 54845ef232aSNipun Gupta return cdx_vfio_map_resource_primary(dev); 54945ef232aSNipun Gupta else 55045ef232aSNipun Gupta return cdx_vfio_map_resource_secondary(dev); 55145ef232aSNipun Gupta } 552f29fb5caSNipun Gupta 553f29fb5caSNipun Gupta int 554f29fb5caSNipun Gupta rte_cdx_vfio_intr_enable(const struct rte_intr_handle *intr_handle) 555f29fb5caSNipun Gupta { 556f29fb5caSNipun Gupta char irq_set_buf[MSI_IRQ_SET_BUF_LEN]; 557f29fb5caSNipun Gupta struct vfio_irq_set *irq_set; 558f29fb5caSNipun Gupta int *fd_ptr, vfio_dev_fd, i; 559f29fb5caSNipun Gupta int ret; 560f29fb5caSNipun Gupta 561f29fb5caSNipun Gupta irq_set = (struct vfio_irq_set *) irq_set_buf; 562f29fb5caSNipun Gupta irq_set->count = rte_intr_nb_intr_get(intr_handle); 563f29fb5caSNipun Gupta irq_set->argsz = sizeof(struct vfio_irq_set) + 564f29fb5caSNipun Gupta (sizeof(int) * irq_set->count); 565f29fb5caSNipun Gupta 566f29fb5caSNipun Gupta irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 567f29fb5caSNipun Gupta irq_set->index = 0; 568f29fb5caSNipun Gupta irq_set->start = 0; 569f29fb5caSNipun Gupta fd_ptr = (int *) &irq_set->data; 570f29fb5caSNipun Gupta 571f29fb5caSNipun Gupta for (i = 0; i < rte_intr_nb_efd_get(intr_handle); i++) 572f29fb5caSNipun Gupta fd_ptr[i] = rte_intr_efds_index_get(intr_handle, i); 573f29fb5caSNipun Gupta 574f29fb5caSNipun Gupta vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 575f29fb5caSNipun Gupta ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 576f29fb5caSNipun Gupta 577f29fb5caSNipun Gupta if (ret) { 578f29fb5caSNipun Gupta CDX_BUS_ERR("Error enabling MSI interrupts for fd %d", 579f29fb5caSNipun Gupta rte_intr_fd_get(intr_handle)); 580f29fb5caSNipun Gupta return -1; 581f29fb5caSNipun Gupta } 582f29fb5caSNipun Gupta 583f29fb5caSNipun Gupta return 0; 584f29fb5caSNipun Gupta } 585f29fb5caSNipun Gupta 586f29fb5caSNipun Gupta /* disable MSI interrupts */ 587f29fb5caSNipun Gupta int 588f29fb5caSNipun Gupta rte_cdx_vfio_intr_disable(const struct rte_intr_handle *intr_handle) 589f29fb5caSNipun Gupta { 590f29fb5caSNipun Gupta struct vfio_irq_set *irq_set; 591f29fb5caSNipun Gupta char irq_set_buf[MSI_IRQ_SET_BUF_LEN]; 592f29fb5caSNipun Gupta int len, ret, vfio_dev_fd; 593f29fb5caSNipun Gupta 594f29fb5caSNipun Gupta len = sizeof(struct vfio_irq_set); 595f29fb5caSNipun Gupta 596f29fb5caSNipun Gupta irq_set = (struct vfio_irq_set *) irq_set_buf; 597f29fb5caSNipun Gupta irq_set->argsz = len; 598f29fb5caSNipun Gupta irq_set->count = 0; 599f29fb5caSNipun Gupta irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 600f29fb5caSNipun Gupta irq_set->index = 0; 601f29fb5caSNipun Gupta irq_set->start = 0; 602f29fb5caSNipun Gupta 603f29fb5caSNipun Gupta vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 604f29fb5caSNipun Gupta ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 605f29fb5caSNipun Gupta 606f29fb5caSNipun Gupta if (ret) 607f29fb5caSNipun Gupta CDX_BUS_ERR("Error disabling MSI interrupts for fd %d", 608f29fb5caSNipun Gupta rte_intr_fd_get(intr_handle)); 609f29fb5caSNipun Gupta 610f29fb5caSNipun Gupta return ret; 611f29fb5caSNipun Gupta } 6122284d672SShubham Rohila 6132284d672SShubham Rohila /* Enable Bus Mastering */ 6142284d672SShubham Rohila int 6152284d672SShubham Rohila rte_cdx_vfio_bm_enable(struct rte_cdx_device *dev) 6162284d672SShubham Rohila { 6172284d672SShubham Rohila struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 6182284d672SShubham Rohila struct vfio_device_feature_bus_master *vfio_bm_feature; 6192284d672SShubham Rohila struct vfio_device_feature *feature; 6202284d672SShubham Rohila int vfio_dev_fd, ret; 6212284d672SShubham Rohila size_t argsz; 6222284d672SShubham Rohila 6232284d672SShubham Rohila vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle); 6242284d672SShubham Rohila if (vfio_dev_fd < 0) 6252284d672SShubham Rohila return -1; 6262284d672SShubham Rohila 6272284d672SShubham Rohila argsz = sizeof(struct vfio_device_feature) + sizeof(struct vfio_device_feature_bus_master); 6282284d672SShubham Rohila 6292284d672SShubham Rohila feature = (struct vfio_device_feature *)malloc(argsz); 6302284d672SShubham Rohila if (!feature) 6312284d672SShubham Rohila return -ENOMEM; 6322284d672SShubham Rohila 6332284d672SShubham Rohila vfio_bm_feature = (struct vfio_device_feature_bus_master *) feature->data; 6342284d672SShubham Rohila 6352284d672SShubham Rohila feature->argsz = argsz; 6362284d672SShubham Rohila 6372284d672SShubham Rohila feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_PROBE; 6382284d672SShubham Rohila feature->flags |= VFIO_DEVICE_FEATURE_SET; 6392284d672SShubham Rohila ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); 6402284d672SShubham Rohila if (ret) { 641*f665790aSDavid Marchand CDX_BUS_ERR("Bus Master configuring not supported for device: %s, error: %d (%s)", 6422284d672SShubham Rohila dev->name, errno, strerror(errno)); 6432284d672SShubham Rohila free(feature); 6442284d672SShubham Rohila return ret; 6452284d672SShubham Rohila } 6462284d672SShubham Rohila 6472284d672SShubham Rohila feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_SET; 6482284d672SShubham Rohila vfio_bm_feature->op = VFIO_DEVICE_FEATURE_SET_MASTER; 6492284d672SShubham Rohila ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); 6502284d672SShubham Rohila if (ret < 0) 651*f665790aSDavid Marchand CDX_BUS_ERR("BM Enable Error for device: %s, Error: %d (%s)", 6522284d672SShubham Rohila dev->name, errno, strerror(errno)); 6532284d672SShubham Rohila 6542284d672SShubham Rohila free(feature); 6552284d672SShubham Rohila return ret; 6562284d672SShubham Rohila } 6572284d672SShubham Rohila 6582284d672SShubham Rohila /* Disable Bus Mastering */ 6592284d672SShubham Rohila int 6602284d672SShubham Rohila rte_cdx_vfio_bm_disable(struct rte_cdx_device *dev) 6612284d672SShubham Rohila { 6622284d672SShubham Rohila struct vfio_device_feature_bus_master *vfio_bm_feature; 6632284d672SShubham Rohila struct vfio_device_feature *feature; 6642284d672SShubham Rohila int vfio_dev_fd, ret; 6652284d672SShubham Rohila size_t argsz; 6662284d672SShubham Rohila 6672284d672SShubham Rohila vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle); 6682284d672SShubham Rohila if (vfio_dev_fd < 0) 6692284d672SShubham Rohila return -1; 6702284d672SShubham Rohila 6712284d672SShubham Rohila argsz = sizeof(struct vfio_device_feature) + sizeof(struct vfio_device_feature_bus_master); 6722284d672SShubham Rohila 6732284d672SShubham Rohila feature = (struct vfio_device_feature *)malloc(argsz); 6742284d672SShubham Rohila if (!feature) 6752284d672SShubham Rohila return -ENOMEM; 6762284d672SShubham Rohila 6772284d672SShubham Rohila vfio_bm_feature = (struct vfio_device_feature_bus_master *) feature->data; 6782284d672SShubham Rohila 6792284d672SShubham Rohila feature->argsz = argsz; 6802284d672SShubham Rohila 6812284d672SShubham Rohila feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_PROBE; 6822284d672SShubham Rohila feature->flags |= VFIO_DEVICE_FEATURE_SET; 6832284d672SShubham Rohila ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); 6842284d672SShubham Rohila if (ret) { 685*f665790aSDavid Marchand CDX_BUS_ERR("Bus Master configuring not supported for device: %s, Error: %d (%s)", 6862284d672SShubham Rohila dev->name, errno, strerror(errno)); 6872284d672SShubham Rohila free(feature); 6882284d672SShubham Rohila return ret; 6892284d672SShubham Rohila } 6902284d672SShubham Rohila 6912284d672SShubham Rohila feature->flags = RTE_VFIO_DEVICE_FEATURE_BUS_MASTER | VFIO_DEVICE_FEATURE_SET; 6922284d672SShubham Rohila vfio_bm_feature->op = VFIO_DEVICE_FEATURE_CLEAR_MASTER; 6932284d672SShubham Rohila ret = ioctl(vfio_dev_fd, RTE_VFIO_DEVICE_FEATURE, feature); 6942284d672SShubham Rohila if (ret < 0) 695*f665790aSDavid Marchand CDX_BUS_ERR("BM Disable Error for device: %s, Error: %d (%s)", 6962284d672SShubham Rohila dev->name, errno, strerror(errno)); 6972284d672SShubham Rohila 6982284d672SShubham Rohila free(feature); 6992284d672SShubham Rohila return ret; 7002284d672SShubham Rohila } 701