15566a3e3SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 25566a3e3SBruce Richardson * Copyright(c) 2010-2014 Intel Corporation 3c752998bSGaetan Rivet */ 4c752998bSGaetan Rivet 5c41a103cSPhilip Prindeville #include <unistd.h> 6c752998bSGaetan Rivet #include <string.h> 7c752998bSGaetan Rivet #include <fcntl.h> 8c752998bSGaetan Rivet #include <sys/eventfd.h> 9c752998bSGaetan Rivet #include <sys/socket.h> 10c752998bSGaetan Rivet #include <sys/ioctl.h> 11c752998bSGaetan Rivet #include <sys/mman.h> 12c752998bSGaetan Rivet #include <stdbool.h> 13c752998bSGaetan Rivet 14c752998bSGaetan Rivet #include <rte_log.h> 15c752998bSGaetan Rivet #include <rte_pci.h> 16c752998bSGaetan Rivet #include <rte_bus_pci.h> 172fd3567eSTal Shnaiderman #include <rte_eal_paging.h> 18c752998bSGaetan Rivet #include <rte_malloc.h> 19c752998bSGaetan Rivet #include <rte_vfio.h> 20c115fd00SJeff Guo #include <rte_eal.h> 21a04322f6SDavid Marchand #include <bus_driver.h> 228ffe7386SJeff Guo #include <rte_spinlock.h> 23028669bcSAnatoly Burakov #include <rte_tailq.h> 24c752998bSGaetan Rivet 25c752998bSGaetan Rivet #include "eal_filesystem.h" 26c752998bSGaetan Rivet 27c752998bSGaetan Rivet #include "pci_init.h" 28c752998bSGaetan Rivet #include "private.h" 29c752998bSGaetan Rivet 30c752998bSGaetan Rivet /** 31c752998bSGaetan Rivet * @file 32aa777f00SThomas Monjalon * PCI probing using Linux VFIO. 33c752998bSGaetan Rivet * 34c752998bSGaetan Rivet * This code tries to determine if the PCI device is bound to VFIO driver, 35c752998bSGaetan Rivet * and initialize it (map BARs, set up interrupts) if that's the case. 36c752998bSGaetan Rivet * 37c752998bSGaetan Rivet */ 38c752998bSGaetan Rivet 39bc104bb8SFerruh Yigit #ifdef VFIO_PRESENT 40c752998bSGaetan Rivet 41c752998bSGaetan Rivet static struct rte_tailq_elem rte_vfio_tailq = { 42c752998bSGaetan Rivet .name = "VFIO_RESOURCE_LIST", 43c752998bSGaetan Rivet }; 44c752998bSGaetan Rivet EAL_REGISTER_TAILQ(rte_vfio_tailq) 45c752998bSGaetan Rivet 464b741542SChenbo Xia static int 474b741542SChenbo Xia pci_vfio_get_region(const struct rte_pci_device *dev, int index, 484b741542SChenbo Xia uint64_t *size, uint64_t *offset) 49c752998bSGaetan Rivet { 504b741542SChenbo Xia const struct rte_pci_device_internal *pdev = 514b741542SChenbo Xia RTE_PCI_DEVICE_INTERNAL_CONST(dev); 52d61138d4SHarman Kalra 534b741542SChenbo Xia if (index >= VFIO_PCI_NUM_REGIONS || index >= RTE_MAX_PCI_REGIONS) 54aedd054cSHarman Kalra return -1; 55aedd054cSHarman Kalra 564b741542SChenbo Xia if (pdev->region[index].size == 0 && pdev->region[index].offset == 0) 574b741542SChenbo Xia return -1; 584b741542SChenbo Xia 594b741542SChenbo Xia *size = pdev->region[index].size; 604b741542SChenbo Xia *offset = pdev->region[index].offset; 614b741542SChenbo Xia 624b741542SChenbo Xia return 0; 63c752998bSGaetan Rivet } 64c752998bSGaetan Rivet 65c752998bSGaetan Rivet int 664b741542SChenbo Xia pci_vfio_read_config(const struct rte_pci_device *dev, 674b741542SChenbo Xia void *buf, size_t len, off_t offs) 68c752998bSGaetan Rivet { 694b741542SChenbo Xia uint64_t size, offset; 704b741542SChenbo Xia int fd; 71d61138d4SHarman Kalra 724b741542SChenbo Xia fd = rte_intr_dev_fd_get(dev->intr_handle); 73becb028aSChenbo Xia if (fd < 0) 74becb028aSChenbo Xia return -1; 754b741542SChenbo Xia 764b741542SChenbo Xia if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, 774b741542SChenbo Xia &size, &offset) != 0) 78aedd054cSHarman Kalra return -1; 79aedd054cSHarman Kalra 804b741542SChenbo Xia if ((uint64_t)len + offs > size) 814b741542SChenbo Xia return -1; 824b741542SChenbo Xia 83884f83ccSDavid Marchand return pread(fd, buf, len, offset + offs); 844b741542SChenbo Xia } 854b741542SChenbo Xia 864b741542SChenbo Xia int 874b741542SChenbo Xia pci_vfio_write_config(const struct rte_pci_device *dev, 884b741542SChenbo Xia const void *buf, size_t len, off_t offs) 894b741542SChenbo Xia { 904b741542SChenbo Xia uint64_t size, offset; 914b741542SChenbo Xia int fd; 924b741542SChenbo Xia 934b741542SChenbo Xia fd = rte_intr_dev_fd_get(dev->intr_handle); 94becb028aSChenbo Xia if (fd < 0) 95becb028aSChenbo Xia return -1; 964b741542SChenbo Xia 974b741542SChenbo Xia if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, 984b741542SChenbo Xia &size, &offset) != 0) 994b741542SChenbo Xia return -1; 1004b741542SChenbo Xia 1014b741542SChenbo Xia if ((uint64_t)len + offs > size) 1024b741542SChenbo Xia return -1; 1034b741542SChenbo Xia 104884f83ccSDavid Marchand return pwrite(fd, buf, len, offset + offs); 105c752998bSGaetan Rivet } 106c752998bSGaetan Rivet 107c752998bSGaetan Rivet /* get PCI BAR number where MSI-X interrupts are */ 108c752998bSGaetan Rivet static int 1093dae12acSDavid Marchand pci_vfio_get_msix_bar(const struct rte_pci_device *dev, 1104b741542SChenbo Xia struct pci_msix_table *msix_table) 111c752998bSGaetan Rivet { 112a10b6e53SDavid Marchand off_t cap_offset; 113a10b6e53SDavid Marchand 114baa9c550SDavid Marchand cap_offset = rte_pci_find_capability(dev, RTE_PCI_CAP_ID_MSIX); 115a10b6e53SDavid Marchand if (cap_offset < 0) 116a10b6e53SDavid Marchand return -1; 117a10b6e53SDavid Marchand 118a10b6e53SDavid Marchand if (cap_offset != 0) { 119c752998bSGaetan Rivet uint16_t flags; 120a10b6e53SDavid Marchand uint32_t reg; 121c752998bSGaetan Rivet 1227bb1168dSDavid Marchand if (rte_pci_read_config(dev, ®, sizeof(reg), cap_offset + 1237bb1168dSDavid Marchand RTE_PCI_MSIX_TABLE) < 0) { 124849f773bSDavid Marchand PCI_LOG(ERR, "Cannot read MSIX table from PCI config space!"); 125c752998bSGaetan Rivet return -1; 126c752998bSGaetan Rivet } 127c752998bSGaetan Rivet 1287bb1168dSDavid Marchand if (rte_pci_read_config(dev, &flags, sizeof(flags), cap_offset + 1297bb1168dSDavid Marchand RTE_PCI_MSIX_FLAGS) < 0) { 130849f773bSDavid Marchand PCI_LOG(ERR, "Cannot read MSIX flags from PCI config space!"); 131c752998bSGaetan Rivet return -1; 132c752998bSGaetan Rivet } 133c752998bSGaetan Rivet 134c752998bSGaetan Rivet msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR; 135c752998bSGaetan Rivet msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; 136a10b6e53SDavid Marchand msix_table->size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); 137a10b6e53SDavid Marchand } 138c752998bSGaetan Rivet 139c752998bSGaetan Rivet return 0; 140c752998bSGaetan Rivet } 141c752998bSGaetan Rivet 14254f3fb12SHaiyue Wang /* enable PCI bus memory space */ 14354f3fb12SHaiyue Wang static int 1444b741542SChenbo Xia pci_vfio_enable_bus_memory(struct rte_pci_device *dev, int dev_fd) 14554f3fb12SHaiyue Wang { 1464b741542SChenbo Xia uint64_t size, offset; 14754f3fb12SHaiyue Wang uint16_t cmd; 14854f3fb12SHaiyue Wang int ret; 14954f3fb12SHaiyue Wang 1504b741542SChenbo Xia if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, 1514b741542SChenbo Xia &size, &offset) != 0) { 152849f773bSDavid Marchand PCI_LOG(ERR, "Cannot get offset of CONFIG region."); 1534b741542SChenbo Xia return -1; 1544b741542SChenbo Xia } 1554b741542SChenbo Xia 156884f83ccSDavid Marchand ret = pread(dev_fd, &cmd, sizeof(cmd), offset + RTE_PCI_COMMAND); 15754f3fb12SHaiyue Wang 15854f3fb12SHaiyue Wang if (ret != sizeof(cmd)) { 159849f773bSDavid Marchand PCI_LOG(ERR, "Cannot read command from PCI config space!"); 16054f3fb12SHaiyue Wang return -1; 16154f3fb12SHaiyue Wang } 16254f3fb12SHaiyue Wang 163c89450cbSDavid Marchand if (cmd & RTE_PCI_COMMAND_MEMORY) 16454f3fb12SHaiyue Wang return 0; 16554f3fb12SHaiyue Wang 166c89450cbSDavid Marchand cmd |= RTE_PCI_COMMAND_MEMORY; 167884f83ccSDavid Marchand ret = pwrite(dev_fd, &cmd, sizeof(cmd), offset + RTE_PCI_COMMAND); 16854f3fb12SHaiyue Wang 16954f3fb12SHaiyue Wang if (ret != sizeof(cmd)) { 170849f773bSDavid Marchand PCI_LOG(ERR, "Cannot write command to PCI config space!"); 17154f3fb12SHaiyue Wang return -1; 17254f3fb12SHaiyue Wang } 17354f3fb12SHaiyue Wang 17454f3fb12SHaiyue Wang return 0; 17554f3fb12SHaiyue Wang } 17654f3fb12SHaiyue Wang 177c752998bSGaetan Rivet /* set up interrupt support (but not enable interrupts) */ 178c752998bSGaetan Rivet static int 179c752998bSGaetan Rivet pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) 180c752998bSGaetan Rivet { 18189aac60eSDavid Marchand int i, ret, intr_idx; 18233543fb3SNithin Dabilpuram enum rte_intr_mode intr_mode; 183c752998bSGaetan Rivet 184c752998bSGaetan Rivet /* default to invalid index */ 185c752998bSGaetan Rivet intr_idx = VFIO_PCI_NUM_IRQS; 186c752998bSGaetan Rivet 187c752998bSGaetan Rivet /* Get default / configured intr_mode */ 188c752998bSGaetan Rivet intr_mode = rte_eal_vfio_intr_mode(); 189c752998bSGaetan Rivet 190c752998bSGaetan Rivet /* get interrupt type from internal config (MSI-X by default, can be 191c752998bSGaetan Rivet * overridden from the command line 192c752998bSGaetan Rivet */ 193c752998bSGaetan Rivet switch (intr_mode) { 194c752998bSGaetan Rivet case RTE_INTR_MODE_MSIX: 195c752998bSGaetan Rivet intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; 196c752998bSGaetan Rivet break; 197c752998bSGaetan Rivet case RTE_INTR_MODE_MSI: 198c752998bSGaetan Rivet intr_idx = VFIO_PCI_MSI_IRQ_INDEX; 199c752998bSGaetan Rivet break; 200c752998bSGaetan Rivet case RTE_INTR_MODE_LEGACY: 201c752998bSGaetan Rivet intr_idx = VFIO_PCI_INTX_IRQ_INDEX; 202c752998bSGaetan Rivet break; 203c752998bSGaetan Rivet /* don't do anything if we want to automatically determine interrupt type */ 204c752998bSGaetan Rivet case RTE_INTR_MODE_NONE: 205c752998bSGaetan Rivet break; 206c752998bSGaetan Rivet default: 207849f773bSDavid Marchand PCI_LOG(ERR, "Unknown default interrupt type!"); 208c752998bSGaetan Rivet return -1; 209c752998bSGaetan Rivet } 210c752998bSGaetan Rivet 211c752998bSGaetan Rivet /* start from MSI-X interrupt type */ 212c752998bSGaetan Rivet for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { 213c752998bSGaetan Rivet struct vfio_irq_info irq = { .argsz = sizeof(irq) }; 21433543fb3SNithin Dabilpuram int fd = -1; 215c752998bSGaetan Rivet 216c752998bSGaetan Rivet /* skip interrupt modes we don't want */ 217c752998bSGaetan Rivet if (intr_mode != RTE_INTR_MODE_NONE && 218c752998bSGaetan Rivet i != intr_idx) 219c752998bSGaetan Rivet continue; 220c752998bSGaetan Rivet 221c752998bSGaetan Rivet irq.index = i; 222c752998bSGaetan Rivet 223c752998bSGaetan Rivet ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); 224c752998bSGaetan Rivet if (ret < 0) { 225849f773bSDavid Marchand PCI_LOG(ERR, "Cannot get VFIO IRQ info, error %i (%s)", 226849f773bSDavid Marchand errno, strerror(errno)); 227c752998bSGaetan Rivet return -1; 228c752998bSGaetan Rivet } 229c752998bSGaetan Rivet 230c752998bSGaetan Rivet /* if this vector cannot be used with eventfd, fail if we explicitly 231c752998bSGaetan Rivet * specified interrupt type, otherwise continue */ 23233543fb3SNithin Dabilpuram if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { 233c752998bSGaetan Rivet if (intr_mode != RTE_INTR_MODE_NONE) { 234849f773bSDavid Marchand PCI_LOG(ERR, "Interrupt vector does not support eventfd!"); 235c752998bSGaetan Rivet return -1; 23633543fb3SNithin Dabilpuram } else 23733543fb3SNithin Dabilpuram continue; 238c752998bSGaetan Rivet } 239c752998bSGaetan Rivet 240eb89595dSDavid Marchand /* Reallocate the efds and elist fields of intr_handle based 241eb89595dSDavid Marchand * on PCI device MSIX size. 242eb89595dSDavid Marchand */ 243eb89595dSDavid Marchand if (i == VFIO_PCI_MSIX_IRQ_INDEX && 244eb89595dSDavid Marchand (uint32_t)rte_intr_nb_intr_get(dev->intr_handle) < irq.count && 245eb89595dSDavid Marchand rte_intr_event_list_update(dev->intr_handle, irq.count)) 246eb89595dSDavid Marchand return -1; 247eb89595dSDavid Marchand 24833543fb3SNithin Dabilpuram /* set up an eventfd for interrupts */ 249c752998bSGaetan Rivet fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 250c752998bSGaetan Rivet if (fd < 0) { 251849f773bSDavid Marchand PCI_LOG(ERR, "Cannot set up eventfd, error %i (%s)", 252849f773bSDavid Marchand errno, strerror(errno)); 253c752998bSGaetan Rivet return -1; 254c752998bSGaetan Rivet } 255c752998bSGaetan Rivet 256d61138d4SHarman Kalra if (rte_intr_fd_set(dev->intr_handle, fd)) 257d61138d4SHarman Kalra return -1; 258d61138d4SHarman Kalra 25933543fb3SNithin Dabilpuram switch (i) { 26033543fb3SNithin Dabilpuram case VFIO_PCI_MSIX_IRQ_INDEX: 26133543fb3SNithin Dabilpuram intr_mode = RTE_INTR_MODE_MSIX; 262d61138d4SHarman Kalra rte_intr_type_set(dev->intr_handle, 263d61138d4SHarman Kalra RTE_INTR_HANDLE_VFIO_MSIX); 26433543fb3SNithin Dabilpuram break; 26533543fb3SNithin Dabilpuram case VFIO_PCI_MSI_IRQ_INDEX: 26633543fb3SNithin Dabilpuram intr_mode = RTE_INTR_MODE_MSI; 267d61138d4SHarman Kalra rte_intr_type_set(dev->intr_handle, 268d61138d4SHarman Kalra RTE_INTR_HANDLE_VFIO_MSI); 26933543fb3SNithin Dabilpuram break; 27033543fb3SNithin Dabilpuram case VFIO_PCI_INTX_IRQ_INDEX: 27133543fb3SNithin Dabilpuram intr_mode = RTE_INTR_MODE_LEGACY; 272d61138d4SHarman Kalra rte_intr_type_set(dev->intr_handle, 273d61138d4SHarman Kalra RTE_INTR_HANDLE_VFIO_LEGACY); 27433543fb3SNithin Dabilpuram break; 27533543fb3SNithin Dabilpuram default: 276849f773bSDavid Marchand PCI_LOG(ERR, "Unknown interrupt type!"); 27733543fb3SNithin Dabilpuram return -1; 27833543fb3SNithin Dabilpuram } 279c752998bSGaetan Rivet 280c752998bSGaetan Rivet return 0; 281c752998bSGaetan Rivet } 282c752998bSGaetan Rivet 28333543fb3SNithin Dabilpuram /* if we're here, we haven't found a suitable interrupt vector */ 28433543fb3SNithin Dabilpuram return -1; 28533543fb3SNithin Dabilpuram } 28633543fb3SNithin Dabilpuram 287cda94419SJeff Guo #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 288b91bc6f3SThomas Monjalon /* 289b91bc6f3SThomas Monjalon * Spinlock for device hot-unplug failure handling. 290b91bc6f3SThomas Monjalon * If it tries to access bus or device, such as handle sigbus on bus 291b91bc6f3SThomas Monjalon * or handle memory failure for device, just need to use this lock. 292b91bc6f3SThomas Monjalon * It could protect the bus and the device to avoid race condition. 293b91bc6f3SThomas Monjalon */ 294b91bc6f3SThomas Monjalon static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER; 295b91bc6f3SThomas Monjalon 296c115fd00SJeff Guo static void 297c115fd00SJeff Guo pci_vfio_req_handler(void *param) 298c115fd00SJeff Guo { 299c115fd00SJeff Guo struct rte_bus *bus; 300c115fd00SJeff Guo int ret; 301c115fd00SJeff Guo struct rte_device *device = (struct rte_device *)param; 302c115fd00SJeff Guo 3038ffe7386SJeff Guo rte_spinlock_lock(&failure_handle_lock); 304c115fd00SJeff Guo bus = rte_bus_find_by_device(device); 305c115fd00SJeff Guo if (bus == NULL) { 306849f773bSDavid Marchand PCI_LOG(ERR, "Cannot find bus for device (%s)", device->name); 3078ffe7386SJeff Guo goto handle_end; 308c115fd00SJeff Guo } 309c115fd00SJeff Guo 310c115fd00SJeff Guo /* 311c115fd00SJeff Guo * vfio kernel module request user space to release allocated 312c115fd00SJeff Guo * resources before device be deleted in kernel, so it can directly 313c115fd00SJeff Guo * call the vfio bus hot-unplug handler to process it. 314c115fd00SJeff Guo */ 315c115fd00SJeff Guo ret = bus->hot_unplug_handler(device); 316c115fd00SJeff Guo if (ret) 317849f773bSDavid Marchand PCI_LOG(ERR, "Can not handle hot-unplug for device (%s)", device->name); 3188ffe7386SJeff Guo handle_end: 3198ffe7386SJeff Guo rte_spinlock_unlock(&failure_handle_lock); 320c115fd00SJeff Guo } 321c115fd00SJeff Guo 322c115fd00SJeff Guo /* enable notifier (only enable req now) */ 323c115fd00SJeff Guo static int 324c115fd00SJeff Guo pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd) 325c115fd00SJeff Guo { 326c115fd00SJeff Guo int ret; 327c115fd00SJeff Guo int fd = -1; 328c115fd00SJeff Guo 329c115fd00SJeff Guo /* set up an eventfd for req notifier */ 330c115fd00SJeff Guo fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 331c115fd00SJeff Guo if (fd < 0) { 332849f773bSDavid Marchand PCI_LOG(ERR, "Cannot set up eventfd, error %i (%s)", 333c115fd00SJeff Guo errno, strerror(errno)); 334c115fd00SJeff Guo return -1; 335c115fd00SJeff Guo } 336c115fd00SJeff Guo 337d61138d4SHarman Kalra if (rte_intr_fd_set(dev->vfio_req_intr_handle, fd)) 338d61138d4SHarman Kalra return -1; 339c115fd00SJeff Guo 340d61138d4SHarman Kalra if (rte_intr_type_set(dev->vfio_req_intr_handle, RTE_INTR_HANDLE_VFIO_REQ)) 341d61138d4SHarman Kalra return -1; 342d61138d4SHarman Kalra 343d61138d4SHarman Kalra if (rte_intr_dev_fd_set(dev->vfio_req_intr_handle, vfio_dev_fd)) 344d61138d4SHarman Kalra return -1; 345d61138d4SHarman Kalra 346d61138d4SHarman Kalra ret = rte_intr_callback_register(dev->vfio_req_intr_handle, 347c115fd00SJeff Guo pci_vfio_req_handler, 348c115fd00SJeff Guo (void *)&dev->device); 349c115fd00SJeff Guo if (ret) { 350849f773bSDavid Marchand PCI_LOG(ERR, "Fail to register req notifier handler."); 351c115fd00SJeff Guo goto error; 352c115fd00SJeff Guo } 353c115fd00SJeff Guo 354d61138d4SHarman Kalra ret = rte_intr_enable(dev->vfio_req_intr_handle); 355c115fd00SJeff Guo if (ret) { 356849f773bSDavid Marchand PCI_LOG(ERR, "Fail to enable req notifier."); 357d61138d4SHarman Kalra ret = rte_intr_callback_unregister(dev->vfio_req_intr_handle, 358c115fd00SJeff Guo pci_vfio_req_handler, 359c115fd00SJeff Guo (void *)&dev->device); 360d59ba029SDarek Stojaczyk if (ret < 0) 361849f773bSDavid Marchand PCI_LOG(ERR, "Fail to unregister req notifier handler."); 362c115fd00SJeff Guo goto error; 363c115fd00SJeff Guo } 364c115fd00SJeff Guo 365c115fd00SJeff Guo return 0; 366c115fd00SJeff Guo error: 367c115fd00SJeff Guo close(fd); 368c115fd00SJeff Guo 369d61138d4SHarman Kalra rte_intr_fd_set(dev->vfio_req_intr_handle, -1); 370d61138d4SHarman Kalra rte_intr_type_set(dev->vfio_req_intr_handle, RTE_INTR_HANDLE_UNKNOWN); 371d61138d4SHarman Kalra rte_intr_dev_fd_set(dev->vfio_req_intr_handle, -1); 372c115fd00SJeff Guo 373c115fd00SJeff Guo return -1; 374c115fd00SJeff Guo } 375c115fd00SJeff Guo 376c115fd00SJeff Guo /* disable notifier (only disable req now) */ 377c115fd00SJeff Guo static int 378c115fd00SJeff Guo pci_vfio_disable_notifier(struct rte_pci_device *dev) 379c115fd00SJeff Guo { 380c115fd00SJeff Guo int ret; 381c115fd00SJeff Guo 382d61138d4SHarman Kalra ret = rte_intr_disable(dev->vfio_req_intr_handle); 383c115fd00SJeff Guo if (ret) { 384849f773bSDavid Marchand PCI_LOG(ERR, "fail to disable req notifier."); 385c115fd00SJeff Guo return -1; 386c115fd00SJeff Guo } 387c115fd00SJeff Guo 388d61138d4SHarman Kalra ret = rte_intr_callback_unregister_sync(dev->vfio_req_intr_handle, 389c115fd00SJeff Guo pci_vfio_req_handler, 390c115fd00SJeff Guo (void *)&dev->device); 391d59ba029SDarek Stojaczyk if (ret < 0) { 392849f773bSDavid Marchand PCI_LOG(ERR, "fail to unregister req notifier handler."); 393c115fd00SJeff Guo return -1; 394c115fd00SJeff Guo } 395c115fd00SJeff Guo 396d61138d4SHarman Kalra close(rte_intr_fd_get(dev->vfio_req_intr_handle)); 397c115fd00SJeff Guo 398d61138d4SHarman Kalra rte_intr_fd_set(dev->vfio_req_intr_handle, -1); 399d61138d4SHarman Kalra rte_intr_type_set(dev->vfio_req_intr_handle, RTE_INTR_HANDLE_UNKNOWN); 400d61138d4SHarman Kalra rte_intr_dev_fd_set(dev->vfio_req_intr_handle, -1); 401c115fd00SJeff Guo 402c115fd00SJeff Guo return 0; 403c115fd00SJeff Guo } 404cda94419SJeff Guo #endif 405c115fd00SJeff Guo 406c752998bSGaetan Rivet static int 4074b741542SChenbo Xia pci_vfio_is_ioport_bar(const struct rte_pci_device *dev, int vfio_dev_fd, 4084b741542SChenbo Xia int bar_index) 409c752998bSGaetan Rivet { 4104b741542SChenbo Xia uint64_t size, offset; 411c752998bSGaetan Rivet uint32_t ioport_bar; 412c752998bSGaetan Rivet int ret; 413c752998bSGaetan Rivet 4144b741542SChenbo Xia if (pci_vfio_get_region(dev, VFIO_PCI_CONFIG_REGION_INDEX, 4154b741542SChenbo Xia &size, &offset) != 0) { 416849f773bSDavid Marchand PCI_LOG(ERR, "Cannot get offset of CONFIG region."); 4174b741542SChenbo Xia return -1; 4184b741542SChenbo Xia } 4194b741542SChenbo Xia 420884f83ccSDavid Marchand ret = pread(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), 42187146142SDavid Marchand offset + RTE_PCI_BASE_ADDRESS_0 + bar_index * 4); 422c752998bSGaetan Rivet if (ret != sizeof(ioport_bar)) { 423849f773bSDavid Marchand PCI_LOG(ERR, "Cannot read command (%x) from config space!", 42487146142SDavid Marchand RTE_PCI_BASE_ADDRESS_0 + bar_index*4); 425c752998bSGaetan Rivet return -1; 426c752998bSGaetan Rivet } 427c752998bSGaetan Rivet 42887146142SDavid Marchand return (ioport_bar & RTE_PCI_BASE_ADDRESS_SPACE_IO) != 0; 429c752998bSGaetan Rivet } 430c752998bSGaetan Rivet 431c752998bSGaetan Rivet static int 43277dad68cSGaetan Rivet pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) 433c752998bSGaetan Rivet { 434c752998bSGaetan Rivet if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { 435849f773bSDavid Marchand PCI_LOG(ERR, "Error setting up interrupts!"); 436c752998bSGaetan Rivet return -1; 437c752998bSGaetan Rivet } 438c752998bSGaetan Rivet 4394b741542SChenbo Xia if (pci_vfio_enable_bus_memory(dev, vfio_dev_fd)) { 440849f773bSDavid Marchand PCI_LOG(ERR, "Cannot enable bus memory!"); 44154f3fb12SHaiyue Wang return -1; 44254f3fb12SHaiyue Wang } 44354f3fb12SHaiyue Wang 444b3d590a0SDavid Marchand if (rte_pci_set_bus_master(dev, true)) { 445849f773bSDavid Marchand PCI_LOG(ERR, "Cannot set up bus mastering!"); 446c752998bSGaetan Rivet return -1; 447c752998bSGaetan Rivet } 448c752998bSGaetan Rivet 4496fb00f8bSJerin Jacob /* 4506fb00f8bSJerin Jacob * Reset the device. If the device is not capable of resetting, 4516fb00f8bSJerin Jacob * then it updates errno as EINVAL. 4526fb00f8bSJerin Jacob */ 4536fb00f8bSJerin Jacob if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) && errno != EINVAL) { 454849f773bSDavid Marchand PCI_LOG(ERR, "Unable to reset device! Error: %d (%s)", errno, strerror(errno)); 455f25f8f36SJonas Pfefferle return -1; 456f25f8f36SJonas Pfefferle } 457c752998bSGaetan Rivet 458c752998bSGaetan Rivet return 0; 459c752998bSGaetan Rivet } 460c752998bSGaetan Rivet 461c752998bSGaetan Rivet static int 462c752998bSGaetan Rivet pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, 463c752998bSGaetan Rivet int bar_index, int additional_flags) 464c752998bSGaetan Rivet { 465c752998bSGaetan Rivet struct memreg { 4668108393dSMichal Krawczyk uint64_t offset; 4678108393dSMichal Krawczyk size_t size; 468c752998bSGaetan Rivet } memreg[2] = {}; 469c752998bSGaetan Rivet void *bar_addr; 470c752998bSGaetan Rivet struct pci_msix_table *msix_table = &vfio_res->msix_table; 471c752998bSGaetan Rivet struct pci_map *bar = &vfio_res->maps[bar_index]; 472c752998bSGaetan Rivet 4739cea8774STone Zhang if (bar->size == 0) { 474849f773bSDavid Marchand PCI_LOG(DEBUG, "Bar size is 0, skip BAR%d", bar_index); 475c752998bSGaetan Rivet return 0; 4769cea8774STone Zhang } 477c752998bSGaetan Rivet 47845fdc3edSAnatoly Burakov if (msix_table->bar_index == bar_index) { 479c752998bSGaetan Rivet /* 48045fdc3edSAnatoly Burakov * VFIO will not let us map the MSI-X table, 481c752998bSGaetan Rivet * but we can map around it. 482c752998bSGaetan Rivet */ 483c752998bSGaetan Rivet uint32_t table_start = msix_table->offset; 484c752998bSGaetan Rivet uint32_t table_end = table_start + msix_table->size; 485924e6b76SThomas Monjalon table_end = RTE_ALIGN(table_end, rte_mem_page_size()); 486924e6b76SThomas Monjalon table_start = RTE_ALIGN_FLOOR(table_start, rte_mem_page_size()); 4879cea8774STone Zhang 4889cea8774STone Zhang /* If page-aligned start of MSI-X table is less than the 4899cea8774STone Zhang * actual MSI-X table start address, reassign to the actual 4909cea8774STone Zhang * start address. 4919cea8774STone Zhang */ 4929cea8774STone Zhang if (table_start < msix_table->offset) 4939cea8774STone Zhang table_start = msix_table->offset; 494c752998bSGaetan Rivet 495c752998bSGaetan Rivet if (table_start == 0 && table_end >= bar->size) { 496c752998bSGaetan Rivet /* Cannot map this BAR */ 497849f773bSDavid Marchand PCI_LOG(DEBUG, "Skipping BAR%d", bar_index); 498c752998bSGaetan Rivet bar->size = 0; 499c752998bSGaetan Rivet bar->addr = 0; 500c752998bSGaetan Rivet return 0; 501c752998bSGaetan Rivet } 502c752998bSGaetan Rivet 503c752998bSGaetan Rivet memreg[0].offset = bar->offset; 504c752998bSGaetan Rivet memreg[0].size = table_start; 5059cea8774STone Zhang if (bar->size < table_end) { 5069cea8774STone Zhang /* 5079cea8774STone Zhang * If MSI-X table end is beyond BAR end, don't attempt 5089cea8774STone Zhang * to perform second mapping. 5099cea8774STone Zhang */ 5109cea8774STone Zhang memreg[1].offset = 0; 5119cea8774STone Zhang memreg[1].size = 0; 5129cea8774STone Zhang } else { 513c752998bSGaetan Rivet memreg[1].offset = bar->offset + table_end; 514c752998bSGaetan Rivet memreg[1].size = bar->size - table_end; 5159cea8774STone Zhang } 516c752998bSGaetan Rivet 517849f773bSDavid Marchand PCI_LOG(DEBUG, "Trying to map BAR%d that contains the MSI-X table. " 518849f773bSDavid Marchand "Trying offsets: 0x%04" PRIx64 ":0x%04zx, 0x%04" PRIx64 ":0x%04zx", 5198108393dSMichal Krawczyk bar_index, 520c752998bSGaetan Rivet memreg[0].offset, memreg[0].size, 521c752998bSGaetan Rivet memreg[1].offset, memreg[1].size); 52245fdc3edSAnatoly Burakov } else { 52345fdc3edSAnatoly Burakov memreg[0].offset = bar->offset; 52445fdc3edSAnatoly Burakov memreg[0].size = bar->size; 525c752998bSGaetan Rivet } 526c752998bSGaetan Rivet 52745fdc3edSAnatoly Burakov /* reserve the address using an inaccessible mapping */ 52845fdc3edSAnatoly Burakov bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE | 52945fdc3edSAnatoly Burakov MAP_ANONYMOUS | additional_flags, -1, 0); 53045fdc3edSAnatoly Burakov if (bar_addr != MAP_FAILED) { 53145fdc3edSAnatoly Burakov void *map_addr = NULL; 532c752998bSGaetan Rivet if (memreg[0].size) { 533c752998bSGaetan Rivet /* actual map of first part */ 534c752998bSGaetan Rivet map_addr = pci_map_resource(bar_addr, vfio_dev_fd, 535c752998bSGaetan Rivet memreg[0].offset, 536c752998bSGaetan Rivet memreg[0].size, 5372fd3567eSTal Shnaiderman RTE_MAP_FORCE_ADDRESS); 538c752998bSGaetan Rivet } 539c752998bSGaetan Rivet 54077a8884cSHyong Youb Kim /* 54177a8884cSHyong Youb Kim * Regarding "memreg[0].size == 0": 54277a8884cSHyong Youb Kim * If this BAR has MSI-X table, memreg[0].size (the 54377a8884cSHyong Youb Kim * first part or the part before the table) can 54477a8884cSHyong Youb Kim * legitimately be 0 for hardware using vector table 54577a8884cSHyong Youb Kim * offset 0 (i.e. first part does not exist). 54677a8884cSHyong Youb Kim * 54777a8884cSHyong Youb Kim * When memreg[0].size is 0, "mapping the first part" 54877a8884cSHyong Youb Kim * never happens, and map_addr is NULL at this 54977a8884cSHyong Youb Kim * point. So check that mapping has been actually 55077a8884cSHyong Youb Kim * attempted. 55177a8884cSHyong Youb Kim */ 552c752998bSGaetan Rivet /* if there's a second part, try to map it */ 55377a8884cSHyong Youb Kim if ((map_addr != NULL || memreg[0].size == 0) 554c752998bSGaetan Rivet && memreg[1].offset && memreg[1].size) { 555c752998bSGaetan Rivet void *second_addr = RTE_PTR_ADD(bar_addr, 5568108393dSMichal Krawczyk (uintptr_t)(memreg[1].offset - 5578108393dSMichal Krawczyk bar->offset)); 558c752998bSGaetan Rivet map_addr = pci_map_resource(second_addr, 559c752998bSGaetan Rivet vfio_dev_fd, 560c752998bSGaetan Rivet memreg[1].offset, 561c752998bSGaetan Rivet memreg[1].size, 5622fd3567eSTal Shnaiderman RTE_MAP_FORCE_ADDRESS); 563c752998bSGaetan Rivet } 564c752998bSGaetan Rivet 565e200535cSDavid Marchand if (map_addr == NULL) { 566c752998bSGaetan Rivet munmap(bar_addr, bar->size); 567c752998bSGaetan Rivet bar_addr = MAP_FAILED; 568849f773bSDavid Marchand PCI_LOG(ERR, "Failed to map pci BAR%d", bar_index); 569c752998bSGaetan Rivet return -1; 570c752998bSGaetan Rivet } 57145fdc3edSAnatoly Burakov } else { 572849f773bSDavid Marchand PCI_LOG(ERR, "Failed to create inaccessible mapping for BAR%d", bar_index); 57345fdc3edSAnatoly Burakov return -1; 57445fdc3edSAnatoly Burakov } 575c752998bSGaetan Rivet 576c752998bSGaetan Rivet bar->addr = bar_addr; 577c752998bSGaetan Rivet return 0; 578c752998bSGaetan Rivet } 579c752998bSGaetan Rivet 580f60abf97SMiao Li static int 581f60abf97SMiao Li pci_vfio_sparse_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, 582f60abf97SMiao Li int bar_index, int additional_flags) 583f60abf97SMiao Li { 584f60abf97SMiao Li struct pci_map *bar = &vfio_res->maps[bar_index]; 585f60abf97SMiao Li struct vfio_region_sparse_mmap_area *sparse; 586f60abf97SMiao Li void *bar_addr; 587f60abf97SMiao Li uint32_t i; 588f60abf97SMiao Li 589f60abf97SMiao Li if (bar->size == 0) { 590849f773bSDavid Marchand PCI_LOG(DEBUG, "Bar size is 0, skip BAR%d", bar_index); 591f60abf97SMiao Li return 0; 592f60abf97SMiao Li } 593f60abf97SMiao Li 594f60abf97SMiao Li /* reserve the address using an inaccessible mapping */ 595f60abf97SMiao Li bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE | 596f60abf97SMiao Li MAP_ANONYMOUS | additional_flags, -1, 0); 597f60abf97SMiao Li if (bar_addr != MAP_FAILED) { 598f60abf97SMiao Li void *map_addr = NULL; 599f60abf97SMiao Li for (i = 0; i < bar->nr_areas; i++) { 600f60abf97SMiao Li sparse = &bar->areas[i]; 601f60abf97SMiao Li if (sparse->size) { 602f60abf97SMiao Li void *addr = RTE_PTR_ADD(bar_addr, (uintptr_t)sparse->offset); 603f60abf97SMiao Li map_addr = pci_map_resource(addr, vfio_dev_fd, 604f60abf97SMiao Li bar->offset + sparse->offset, sparse->size, 605f60abf97SMiao Li RTE_MAP_FORCE_ADDRESS); 606f60abf97SMiao Li if (map_addr == NULL) { 607f60abf97SMiao Li munmap(bar_addr, bar->size); 608849f773bSDavid Marchand PCI_LOG(ERR, "Failed to map pci BAR%d", bar_index); 609f60abf97SMiao Li goto err_map; 610f60abf97SMiao Li } 611f60abf97SMiao Li } 612f60abf97SMiao Li } 613f60abf97SMiao Li } else { 614849f773bSDavid Marchand PCI_LOG(ERR, "Failed to create inaccessible mapping for BAR%d", bar_index); 615f60abf97SMiao Li goto err_map; 616f60abf97SMiao Li } 617f60abf97SMiao Li 618f60abf97SMiao Li bar->addr = bar_addr; 619f60abf97SMiao Li return 0; 620f60abf97SMiao Li 621f60abf97SMiao Li err_map: 622f60abf97SMiao Li bar->nr_areas = 0; 623f60abf97SMiao Li return -1; 624f60abf97SMiao Li } 625f60abf97SMiao Li 62603ba15caSAnatoly Burakov /* 62703ba15caSAnatoly Burakov * region info may contain capability headers, so we need to keep reallocating 62803ba15caSAnatoly Burakov * the memory until we match allocated memory size with argsz. 62903ba15caSAnatoly Burakov */ 63003ba15caSAnatoly Burakov static int 63103ba15caSAnatoly Burakov pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, 63203ba15caSAnatoly Burakov int region) 63303ba15caSAnatoly Burakov { 63403ba15caSAnatoly Burakov struct vfio_region_info *ri; 63503ba15caSAnatoly Burakov size_t argsz = sizeof(*ri); 63603ba15caSAnatoly Burakov int ret; 63703ba15caSAnatoly Burakov 63803ba15caSAnatoly Burakov ri = malloc(sizeof(*ri)); 63903ba15caSAnatoly Burakov if (ri == NULL) { 640849f773bSDavid Marchand PCI_LOG(ERR, "Cannot allocate memory for VFIO region info"); 64103ba15caSAnatoly Burakov return -1; 64203ba15caSAnatoly Burakov } 64303ba15caSAnatoly Burakov again: 64403ba15caSAnatoly Burakov memset(ri, 0, argsz); 64503ba15caSAnatoly Burakov ri->argsz = argsz; 64603ba15caSAnatoly Burakov ri->index = region; 64703ba15caSAnatoly Burakov 64803ba15caSAnatoly Burakov ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri); 64903ba15caSAnatoly Burakov if (ret < 0) { 65003ba15caSAnatoly Burakov free(ri); 65103ba15caSAnatoly Burakov return ret; 65203ba15caSAnatoly Burakov } 65303ba15caSAnatoly Burakov if (ri->argsz != argsz) { 65403ba15caSAnatoly Burakov struct vfio_region_info *tmp; 65503ba15caSAnatoly Burakov 65603ba15caSAnatoly Burakov argsz = ri->argsz; 65703ba15caSAnatoly Burakov tmp = realloc(ri, argsz); 65803ba15caSAnatoly Burakov 65903ba15caSAnatoly Burakov if (tmp == NULL) { 66003ba15caSAnatoly Burakov /* realloc failed but the ri is still there */ 66103ba15caSAnatoly Burakov free(ri); 662849f773bSDavid Marchand PCI_LOG(ERR, "Cannot reallocate memory for VFIO region info"); 66303ba15caSAnatoly Burakov return -1; 66403ba15caSAnatoly Burakov } 66503ba15caSAnatoly Burakov ri = tmp; 66603ba15caSAnatoly Burakov goto again; 66703ba15caSAnatoly Burakov } 66803ba15caSAnatoly Burakov *info = ri; 66903ba15caSAnatoly Burakov 67003ba15caSAnatoly Burakov return 0; 67103ba15caSAnatoly Burakov } 67203ba15caSAnatoly Burakov 67303ba15caSAnatoly Burakov static struct vfio_info_cap_header * 67403ba15caSAnatoly Burakov pci_vfio_info_cap(struct vfio_region_info *info, int cap) 67503ba15caSAnatoly Burakov { 67603ba15caSAnatoly Burakov struct vfio_info_cap_header *h; 67703ba15caSAnatoly Burakov size_t offset; 67803ba15caSAnatoly Burakov 67903ba15caSAnatoly Burakov if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) { 68003ba15caSAnatoly Burakov /* VFIO info does not advertise capabilities */ 68103ba15caSAnatoly Burakov return NULL; 68203ba15caSAnatoly Burakov } 68303ba15caSAnatoly Burakov 68403ba15caSAnatoly Burakov offset = VFIO_CAP_OFFSET(info); 68503ba15caSAnatoly Burakov while (offset != 0) { 68603ba15caSAnatoly Burakov h = RTE_PTR_ADD(info, offset); 68703ba15caSAnatoly Burakov if (h->id == cap) 68803ba15caSAnatoly Burakov return h; 68903ba15caSAnatoly Burakov offset = h->next; 69003ba15caSAnatoly Burakov } 69103ba15caSAnatoly Burakov return NULL; 69203ba15caSAnatoly Burakov } 69303ba15caSAnatoly Burakov 69403ba15caSAnatoly Burakov static int 69503ba15caSAnatoly Burakov pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) 69603ba15caSAnatoly Burakov { 6974b741542SChenbo Xia struct vfio_region_info *info = NULL; 69803ba15caSAnatoly Burakov int ret; 69903ba15caSAnatoly Burakov 70003ba15caSAnatoly Burakov ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region); 70103ba15caSAnatoly Burakov if (ret < 0) 70203ba15caSAnatoly Burakov return -1; 70303ba15caSAnatoly Burakov 70403ba15caSAnatoly Burakov ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL; 70503ba15caSAnatoly Burakov 70603ba15caSAnatoly Burakov /* cleanup */ 70703ba15caSAnatoly Burakov free(info); 70803ba15caSAnatoly Burakov 70903ba15caSAnatoly Burakov return ret; 71003ba15caSAnatoly Burakov } 71103ba15caSAnatoly Burakov 7124b741542SChenbo Xia static int 7134b741542SChenbo Xia pci_vfio_fill_regions(struct rte_pci_device *dev, int vfio_dev_fd, 7144b741542SChenbo Xia struct vfio_device_info *device_info) 7154b741542SChenbo Xia { 7164b741542SChenbo Xia struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); 7174b741542SChenbo Xia struct vfio_region_info *reg = NULL; 7184b741542SChenbo Xia int nb_maps, i, ret; 7194b741542SChenbo Xia 7204b741542SChenbo Xia nb_maps = RTE_MIN((int)device_info->num_regions, 7214b741542SChenbo Xia VFIO_PCI_CONFIG_REGION_INDEX + 1); 7224b741542SChenbo Xia 7234b741542SChenbo Xia for (i = 0; i < nb_maps; i++) { 7244b741542SChenbo Xia ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); 7254b741542SChenbo Xia if (ret < 0) { 726849f773bSDavid Marchand PCI_LOG(DEBUG, "%s cannot get device region info error %i (%s)", 7274b741542SChenbo Xia dev->name, errno, strerror(errno)); 7284b741542SChenbo Xia return -1; 7294b741542SChenbo Xia } 7304b741542SChenbo Xia 7314b741542SChenbo Xia pdev->region[i].size = reg->size; 7324b741542SChenbo Xia pdev->region[i].offset = reg->offset; 7334b741542SChenbo Xia 7344b741542SChenbo Xia free(reg); 7354b741542SChenbo Xia } 7364b741542SChenbo Xia 7374b741542SChenbo Xia return 0; 7384b741542SChenbo Xia } 73903ba15caSAnatoly Burakov 740c752998bSGaetan Rivet static int 741c752998bSGaetan Rivet pci_vfio_map_resource_primary(struct rte_pci_device *dev) 742c752998bSGaetan Rivet { 7434b741542SChenbo Xia struct rte_pci_device_internal *pdev = RTE_PCI_DEVICE_INTERNAL(dev); 744c752998bSGaetan Rivet struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 7454b741542SChenbo Xia struct vfio_region_info *reg = NULL; 746c752998bSGaetan Rivet char pci_addr[PATH_MAX] = {0}; 747c752998bSGaetan Rivet int vfio_dev_fd; 748c752998bSGaetan Rivet struct rte_pci_addr *loc = &dev->addr; 749f60abf97SMiao Li int i, j, ret; 750c752998bSGaetan Rivet struct mapped_pci_resource *vfio_res = NULL; 751c752998bSGaetan Rivet struct mapped_pci_res_list *vfio_res_list = 752c752998bSGaetan Rivet RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); 753c752998bSGaetan Rivet 754c752998bSGaetan Rivet struct pci_map *maps; 755c752998bSGaetan Rivet 756d61138d4SHarman Kalra if (rte_intr_fd_set(dev->intr_handle, -1)) 757d61138d4SHarman Kalra return -1; 758d61138d4SHarman Kalra 759cda94419SJeff Guo #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 760d61138d4SHarman Kalra if (rte_intr_fd_set(dev->vfio_req_intr_handle, -1)) 761d61138d4SHarman Kalra return -1; 762cda94419SJeff Guo #endif 763c752998bSGaetan Rivet 764c752998bSGaetan Rivet /* store PCI address string */ 765c752998bSGaetan Rivet snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, 766c752998bSGaetan Rivet loc->domain, loc->bus, loc->devid, loc->function); 767c752998bSGaetan Rivet 768c52dd394SThomas Monjalon ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, 769c752998bSGaetan Rivet &vfio_dev_fd, &device_info); 770c752998bSGaetan Rivet if (ret) 771c752998bSGaetan Rivet return ret; 772c752998bSGaetan Rivet 7733dae12acSDavid Marchand if (rte_intr_dev_fd_set(dev->intr_handle, vfio_dev_fd)) 7743dae12acSDavid Marchand goto err_vfio_dev_fd; 7753dae12acSDavid Marchand 776c752998bSGaetan Rivet /* allocate vfio_res and get region info */ 777c752998bSGaetan Rivet vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); 778c752998bSGaetan Rivet if (vfio_res == NULL) { 779849f773bSDavid Marchand PCI_LOG(ERR, "Cannot store VFIO mmap details"); 780c752998bSGaetan Rivet goto err_vfio_dev_fd; 781c752998bSGaetan Rivet } 782c752998bSGaetan Rivet memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); 783c752998bSGaetan Rivet 784c752998bSGaetan Rivet /* get number of registers (up to BAR5) */ 785c752998bSGaetan Rivet vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, 786c752998bSGaetan Rivet VFIO_PCI_BAR5_REGION_INDEX + 1); 787c752998bSGaetan Rivet 788c752998bSGaetan Rivet /* map BARs */ 789c752998bSGaetan Rivet maps = vfio_res->maps; 790c752998bSGaetan Rivet 7914b741542SChenbo Xia ret = pci_vfio_get_region_info(vfio_dev_fd, ®, 7924b741542SChenbo Xia VFIO_PCI_CONFIG_REGION_INDEX); 7934b741542SChenbo Xia if (ret < 0) { 794849f773bSDavid Marchand PCI_LOG(ERR, "%s cannot get device region info error %i (%s)", 7954b741542SChenbo Xia dev->name, errno, strerror(errno)); 7964b741542SChenbo Xia goto err_vfio_res; 7974b741542SChenbo Xia } 7984b741542SChenbo Xia pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].size = reg->size; 7994b741542SChenbo Xia pdev->region[VFIO_PCI_CONFIG_REGION_INDEX].offset = reg->offset; 8004b741542SChenbo Xia free(reg); 8014b741542SChenbo Xia 802c752998bSGaetan Rivet vfio_res->msix_table.bar_index = -1; 803c752998bSGaetan Rivet /* get MSI-X BAR, if any (we have to know where it is because we can't 804c752998bSGaetan Rivet * easily mmap it when using VFIO) 805c752998bSGaetan Rivet */ 8063dae12acSDavid Marchand ret = pci_vfio_get_msix_bar(dev, &vfio_res->msix_table); 807c752998bSGaetan Rivet if (ret < 0) { 808849f773bSDavid Marchand PCI_LOG(ERR, "%s cannot get MSI-X BAR number!", pci_addr); 80903ba15caSAnatoly Burakov goto err_vfio_res; 81003ba15caSAnatoly Burakov } 81103ba15caSAnatoly Burakov /* if we found our MSI-X BAR region, check if we can mmap it */ 81203ba15caSAnatoly Burakov if (vfio_res->msix_table.bar_index != -1) { 81303ba15caSAnatoly Burakov int ret = pci_vfio_msix_is_mappable(vfio_dev_fd, 81403ba15caSAnatoly Burakov vfio_res->msix_table.bar_index); 81503ba15caSAnatoly Burakov if (ret < 0) { 816849f773bSDavid Marchand PCI_LOG(ERR, "Couldn't check if MSI-X BAR is mappable"); 81703ba15caSAnatoly Burakov goto err_vfio_res; 81803ba15caSAnatoly Burakov } else if (ret != 0) { 81903ba15caSAnatoly Burakov /* we can map it, so we don't care where it is */ 820849f773bSDavid Marchand PCI_LOG(DEBUG, "VFIO reports MSI-X BAR as mappable"); 82103ba15caSAnatoly Burakov vfio_res->msix_table.bar_index = -1; 82203ba15caSAnatoly Burakov } 823c752998bSGaetan Rivet } 824c752998bSGaetan Rivet 82540094b8eSYunjian Wang for (i = 0; i < vfio_res->nb_maps; i++) { 826c752998bSGaetan Rivet void *bar_addr; 827f60abf97SMiao Li struct vfio_info_cap_header *hdr; 828f60abf97SMiao Li struct vfio_region_info_cap_sparse_mmap *sparse; 829c752998bSGaetan Rivet 83003ba15caSAnatoly Burakov ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); 83103ba15caSAnatoly Burakov if (ret < 0) { 832849f773bSDavid Marchand PCI_LOG(ERR, "%s cannot get device region info error %i (%s)", 833849f773bSDavid Marchand pci_addr, errno, strerror(errno)); 834f60abf97SMiao Li goto err_map; 835c752998bSGaetan Rivet } 836c752998bSGaetan Rivet 8374b741542SChenbo Xia pdev->region[i].size = reg->size; 8384b741542SChenbo Xia pdev->region[i].offset = reg->offset; 8394b741542SChenbo Xia 840c752998bSGaetan Rivet /* chk for io port region */ 8414b741542SChenbo Xia ret = pci_vfio_is_ioport_bar(dev, vfio_dev_fd, i); 84203ba15caSAnatoly Burakov if (ret < 0) { 84303ba15caSAnatoly Burakov free(reg); 844f60abf97SMiao Li goto err_map; 84503ba15caSAnatoly Burakov } else if (ret) { 846849f773bSDavid Marchand PCI_LOG(INFO, "Ignore mapping IO port bar(%d)", i); 84703ba15caSAnatoly Burakov free(reg); 848c752998bSGaetan Rivet continue; 849c752998bSGaetan Rivet } 850c752998bSGaetan Rivet 8517be78d02SJosh Soref /* skip non-mmappable BARs */ 85203ba15caSAnatoly Burakov if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { 85303ba15caSAnatoly Burakov free(reg); 854c752998bSGaetan Rivet continue; 85503ba15caSAnatoly Burakov } 856c752998bSGaetan Rivet 857c752998bSGaetan Rivet /* try mapping somewhere close to the end of hugepages */ 858c752998bSGaetan Rivet if (pci_map_addr == NULL) 859c752998bSGaetan Rivet pci_map_addr = pci_find_max_end_va(); 860c752998bSGaetan Rivet 861c752998bSGaetan Rivet bar_addr = pci_map_addr; 86203ba15caSAnatoly Burakov pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size); 863c752998bSGaetan Rivet 864d25ab4b7SWangyu (Eric) pci_map_addr = RTE_PTR_ALIGN(pci_map_addr, 865d25ab4b7SWangyu (Eric) sysconf(_SC_PAGE_SIZE)); 866d25ab4b7SWangyu (Eric) 867c752998bSGaetan Rivet maps[i].addr = bar_addr; 86803ba15caSAnatoly Burakov maps[i].offset = reg->offset; 86903ba15caSAnatoly Burakov maps[i].size = reg->size; 870c752998bSGaetan Rivet maps[i].path = NULL; /* vfio doesn't have per-resource paths */ 871c752998bSGaetan Rivet 872f60abf97SMiao Li hdr = pci_vfio_info_cap(reg, VFIO_REGION_INFO_CAP_SPARSE_MMAP); 873f60abf97SMiao Li 874f60abf97SMiao Li if (hdr != NULL) { 875f60abf97SMiao Li sparse = container_of(hdr, 876f60abf97SMiao Li struct vfio_region_info_cap_sparse_mmap, header); 877f60abf97SMiao Li if (sparse->nr_areas > 0) { 878f60abf97SMiao Li maps[i].nr_areas = sparse->nr_areas; 879f60abf97SMiao Li maps[i].areas = rte_zmalloc(NULL, 880f60abf97SMiao Li sizeof(*maps[i].areas) * maps[i].nr_areas, 0); 881f60abf97SMiao Li if (maps[i].areas == NULL) { 882849f773bSDavid Marchand PCI_LOG(ERR, "Cannot alloc memory for sparse map areas"); 883f60abf97SMiao Li goto err_map; 884f60abf97SMiao Li } 885f60abf97SMiao Li memcpy(maps[i].areas, sparse->areas, 886f60abf97SMiao Li sizeof(*maps[i].areas) * maps[i].nr_areas); 887f60abf97SMiao Li } 888f60abf97SMiao Li } 889f60abf97SMiao Li 890f60abf97SMiao Li if (maps[i].nr_areas > 0) { 891f60abf97SMiao Li ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res, i, 0); 892f60abf97SMiao Li if (ret < 0) { 893849f773bSDavid Marchand PCI_LOG(ERR, "%s sparse mapping BAR%i failed: %s", 894f60abf97SMiao Li pci_addr, i, strerror(errno)); 895f60abf97SMiao Li free(reg); 896f60abf97SMiao Li goto err_map; 897f60abf97SMiao Li } 898f60abf97SMiao Li } else { 899c752998bSGaetan Rivet ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); 900c752998bSGaetan Rivet if (ret < 0) { 901849f773bSDavid Marchand PCI_LOG(ERR, "%s mapping BAR%i failed: %s", 902c752998bSGaetan Rivet pci_addr, i, strerror(errno)); 90303ba15caSAnatoly Burakov free(reg); 904f60abf97SMiao Li goto err_map; 905f60abf97SMiao Li } 906c752998bSGaetan Rivet } 907c752998bSGaetan Rivet 908c752998bSGaetan Rivet dev->mem_resource[i].addr = maps[i].addr; 90903ba15caSAnatoly Burakov 91003ba15caSAnatoly Burakov free(reg); 911c752998bSGaetan Rivet } 912c752998bSGaetan Rivet 91377dad68cSGaetan Rivet if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { 914849f773bSDavid Marchand PCI_LOG(ERR, "%s setup device failed", pci_addr); 915f60abf97SMiao Li goto err_map; 916c752998bSGaetan Rivet } 917c752998bSGaetan Rivet 918cda94419SJeff Guo #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 919c115fd00SJeff Guo if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { 920849f773bSDavid Marchand PCI_LOG(ERR, "Error setting up notifier!"); 921f60abf97SMiao Li goto err_map; 922c115fd00SJeff Guo } 923c115fd00SJeff Guo 924cda94419SJeff Guo #endif 925c752998bSGaetan Rivet TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); 926c752998bSGaetan Rivet 927c752998bSGaetan Rivet return 0; 928f60abf97SMiao Li err_map: 929f60abf97SMiao Li for (j = 0; j < i; j++) { 930f60abf97SMiao Li if (maps[j].addr) 931f60abf97SMiao Li pci_unmap_resource(maps[j].addr, maps[j].size); 932f60abf97SMiao Li if (maps[j].nr_areas > 0) 933f60abf97SMiao Li rte_free(maps[j].areas); 934f60abf97SMiao Li } 935c752998bSGaetan Rivet err_vfio_res: 936c752998bSGaetan Rivet rte_free(vfio_res); 937c752998bSGaetan Rivet err_vfio_dev_fd: 9382a089d2dSYunjian Wang rte_vfio_release_device(rte_pci_get_sysfs_path(), 9392a089d2dSYunjian Wang pci_addr, vfio_dev_fd); 940c752998bSGaetan Rivet return -1; 941c752998bSGaetan Rivet } 942c752998bSGaetan Rivet 943c752998bSGaetan Rivet static int 944c752998bSGaetan Rivet pci_vfio_map_resource_secondary(struct rte_pci_device *dev) 945c752998bSGaetan Rivet { 946c752998bSGaetan Rivet struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 947c752998bSGaetan Rivet char pci_addr[PATH_MAX] = {0}; 948c752998bSGaetan Rivet int vfio_dev_fd; 949c752998bSGaetan Rivet struct rte_pci_addr *loc = &dev->addr; 950f60abf97SMiao Li int i, j, ret; 951c752998bSGaetan Rivet struct mapped_pci_resource *vfio_res = NULL; 952c752998bSGaetan Rivet struct mapped_pci_res_list *vfio_res_list = 953c752998bSGaetan Rivet RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); 954c752998bSGaetan Rivet 955c752998bSGaetan Rivet struct pci_map *maps; 956c752998bSGaetan Rivet 957d61138d4SHarman Kalra if (rte_intr_fd_set(dev->intr_handle, -1)) 958d61138d4SHarman Kalra return -1; 959cda94419SJeff Guo #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 960d61138d4SHarman Kalra if (rte_intr_fd_set(dev->vfio_req_intr_handle, -1)) 961d61138d4SHarman Kalra return -1; 962cda94419SJeff Guo #endif 963c752998bSGaetan Rivet 964c752998bSGaetan Rivet /* store PCI address string */ 965c752998bSGaetan Rivet snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, 966c752998bSGaetan Rivet loc->domain, loc->bus, loc->devid, loc->function); 967c752998bSGaetan Rivet 968c752998bSGaetan Rivet /* if we're in a secondary process, just find our tailq entry */ 969c752998bSGaetan Rivet TAILQ_FOREACH(vfio_res, vfio_res_list, next) { 9700e3ef055SGaetan Rivet if (rte_pci_addr_cmp(&vfio_res->pci_addr, 971c752998bSGaetan Rivet &dev->addr)) 972c752998bSGaetan Rivet continue; 973c752998bSGaetan Rivet break; 974c752998bSGaetan Rivet } 975c752998bSGaetan Rivet /* if we haven't found our tailq entry, something's wrong */ 976c752998bSGaetan Rivet if (vfio_res == NULL) { 977849f773bSDavid Marchand PCI_LOG(ERR, "%s cannot find TAILQ entry for PCI device!", pci_addr); 978047e3f9fSDarek Stojaczyk return -1; 979c752998bSGaetan Rivet } 980c752998bSGaetan Rivet 981047e3f9fSDarek Stojaczyk ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, 982047e3f9fSDarek Stojaczyk &vfio_dev_fd, &device_info); 983047e3f9fSDarek Stojaczyk if (ret) 984047e3f9fSDarek Stojaczyk return ret; 985047e3f9fSDarek Stojaczyk 9864b741542SChenbo Xia ret = pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info); 9874b741542SChenbo Xia if (ret) 9884b741542SChenbo Xia return ret; 9894b741542SChenbo Xia 990c752998bSGaetan Rivet /* map BARs */ 991c752998bSGaetan Rivet maps = vfio_res->maps; 992c752998bSGaetan Rivet 99340094b8eSYunjian Wang for (i = 0; i < vfio_res->nb_maps; i++) { 994f60abf97SMiao Li if (maps[i].nr_areas > 0) { 995f60abf97SMiao Li ret = pci_vfio_sparse_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); 996f60abf97SMiao Li if (ret < 0) { 997849f773bSDavid Marchand PCI_LOG(ERR, "%s sparse mapping BAR%i failed: %s", 998f60abf97SMiao Li pci_addr, i, strerror(errno)); 999f60abf97SMiao Li goto err_vfio_dev_fd; 1000f60abf97SMiao Li } 1001f60abf97SMiao Li } else { 1002c752998bSGaetan Rivet ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); 1003c752998bSGaetan Rivet if (ret < 0) { 1004849f773bSDavid Marchand PCI_LOG(ERR, "%s mapping BAR%i failed: %s", 1005c752998bSGaetan Rivet pci_addr, i, strerror(errno)); 1006c752998bSGaetan Rivet goto err_vfio_dev_fd; 1007c752998bSGaetan Rivet } 1008f60abf97SMiao Li } 1009c752998bSGaetan Rivet 1010c752998bSGaetan Rivet dev->mem_resource[i].addr = maps[i].addr; 1011c752998bSGaetan Rivet } 1012c752998bSGaetan Rivet 1013ab53203eSQi Zhang /* we need save vfio_dev_fd, so it can be used during release */ 1014d61138d4SHarman Kalra if (rte_intr_dev_fd_set(dev->intr_handle, vfio_dev_fd)) 1015d61138d4SHarman Kalra goto err_vfio_dev_fd; 1016cda94419SJeff Guo #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 1017d61138d4SHarman Kalra if (rte_intr_dev_fd_set(dev->vfio_req_intr_handle, vfio_dev_fd)) 1018d61138d4SHarman Kalra goto err_vfio_dev_fd; 1019cda94419SJeff Guo #endif 1020ab53203eSQi Zhang 1021c752998bSGaetan Rivet return 0; 1022c752998bSGaetan Rivet err_vfio_dev_fd: 1023f60abf97SMiao Li for (j = 0; j < i; j++) { 1024f60abf97SMiao Li if (maps[j].addr) 1025f60abf97SMiao Li pci_unmap_resource(maps[j].addr, maps[j].size); 1026f60abf97SMiao Li } 10272a089d2dSYunjian Wang rte_vfio_release_device(rte_pci_get_sysfs_path(), 10282a089d2dSYunjian Wang pci_addr, vfio_dev_fd); 1029c752998bSGaetan Rivet return -1; 1030c752998bSGaetan Rivet } 1031c752998bSGaetan Rivet 1032c752998bSGaetan Rivet /* 1033c752998bSGaetan Rivet * map the PCI resources of a PCI device in virtual memory (VFIO version). 1034c752998bSGaetan Rivet * primary and secondary processes follow almost exactly the same path 1035c752998bSGaetan Rivet */ 1036c752998bSGaetan Rivet int 1037c752998bSGaetan Rivet pci_vfio_map_resource(struct rte_pci_device *dev) 1038c752998bSGaetan Rivet { 1039c752998bSGaetan Rivet if (rte_eal_process_type() == RTE_PROC_PRIMARY) 1040c752998bSGaetan Rivet return pci_vfio_map_resource_primary(dev); 1041c752998bSGaetan Rivet else 1042c752998bSGaetan Rivet return pci_vfio_map_resource_secondary(dev); 1043c752998bSGaetan Rivet } 1044c752998bSGaetan Rivet 1045ab53203eSQi Zhang static struct mapped_pci_resource * 1046ab53203eSQi Zhang find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list, 1047ab53203eSQi Zhang struct rte_pci_device *dev, 1048ab53203eSQi Zhang const char *pci_addr) 1049ab53203eSQi Zhang { 1050ab53203eSQi Zhang struct mapped_pci_resource *vfio_res = NULL; 1051ab53203eSQi Zhang struct pci_map *maps; 1052ab53203eSQi Zhang int i; 1053ab53203eSQi Zhang 1054ab53203eSQi Zhang /* Get vfio_res */ 1055ab53203eSQi Zhang TAILQ_FOREACH(vfio_res, vfio_res_list, next) { 1056ab53203eSQi Zhang if (rte_pci_addr_cmp(&vfio_res->pci_addr, &dev->addr)) 1057ab53203eSQi Zhang continue; 1058ab53203eSQi Zhang break; 1059ab53203eSQi Zhang } 1060ab53203eSQi Zhang 1061ab53203eSQi Zhang if (vfio_res == NULL) 1062ab53203eSQi Zhang return vfio_res; 1063ab53203eSQi Zhang 1064849f773bSDavid Marchand PCI_LOG(INFO, "Releasing PCI mapped resource for %s", pci_addr); 1065ab53203eSQi Zhang 1066ab53203eSQi Zhang maps = vfio_res->maps; 106740094b8eSYunjian Wang for (i = 0; i < vfio_res->nb_maps; i++) { 1068ab53203eSQi Zhang 1069ab53203eSQi Zhang /* 1070ab53203eSQi Zhang * We do not need to be aware of MSI-X table BAR mappings as 1071ab53203eSQi Zhang * when mapping. Just using current maps array is enough 1072ab53203eSQi Zhang */ 1073ab53203eSQi Zhang if (maps[i].addr) { 1074849f773bSDavid Marchand PCI_LOG(INFO, "Calling pci_unmap_resource for %s at %p", 1075ab53203eSQi Zhang pci_addr, maps[i].addr); 1076ab53203eSQi Zhang pci_unmap_resource(maps[i].addr, maps[i].size); 1077ab53203eSQi Zhang } 1078f60abf97SMiao Li 1079f60abf97SMiao Li if (maps[i].nr_areas > 0) 1080f60abf97SMiao Li rte_free(maps[i].areas); 1081ab53203eSQi Zhang } 1082ab53203eSQi Zhang 1083ab53203eSQi Zhang return vfio_res; 1084ab53203eSQi Zhang } 1085ab53203eSQi Zhang 1086ab53203eSQi Zhang static int 1087ab53203eSQi Zhang pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) 1088c752998bSGaetan Rivet { 1089c752998bSGaetan Rivet char pci_addr[PATH_MAX] = {0}; 1090c752998bSGaetan Rivet struct rte_pci_addr *loc = &dev->addr; 1091c752998bSGaetan Rivet struct mapped_pci_resource *vfio_res = NULL; 1092c752998bSGaetan Rivet struct mapped_pci_res_list *vfio_res_list; 1093d61138d4SHarman Kalra int ret, vfio_dev_fd; 1094c752998bSGaetan Rivet 1095c752998bSGaetan Rivet /* store PCI address string */ 1096c752998bSGaetan Rivet snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, 1097c752998bSGaetan Rivet loc->domain, loc->bus, loc->devid, loc->function); 1098c752998bSGaetan Rivet 1099cda94419SJeff Guo #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 1100c115fd00SJeff Guo ret = pci_vfio_disable_notifier(dev); 1101c115fd00SJeff Guo if (ret) { 1102849f773bSDavid Marchand PCI_LOG(ERR, "fail to disable req notifier."); 1103c115fd00SJeff Guo return -1; 1104c115fd00SJeff Guo } 1105c115fd00SJeff Guo 1106cda94419SJeff Guo #endif 1107aedd054cSHarman Kalra if (rte_intr_fd_get(dev->intr_handle) < 0) 1108aedd054cSHarman Kalra return -1; 1109aedd054cSHarman Kalra 1110d61138d4SHarman Kalra if (close(rte_intr_fd_get(dev->intr_handle)) < 0) { 1111849f773bSDavid Marchand PCI_LOG(INFO, "Error when closing eventfd file descriptor for %s", pci_addr); 1112c752998bSGaetan Rivet return -1; 1113c752998bSGaetan Rivet } 1114c752998bSGaetan Rivet 1115d61138d4SHarman Kalra vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle); 1116aedd054cSHarman Kalra if (vfio_dev_fd < 0) 1117aedd054cSHarman Kalra return -1; 1118aedd054cSHarman Kalra 1119b3d590a0SDavid Marchand if (rte_pci_set_bus_master(dev, false)) { 1120849f773bSDavid Marchand PCI_LOG(ERR, "%s cannot unset bus mastering for PCI device!", pci_addr); 1121c752998bSGaetan Rivet return -1; 1122c752998bSGaetan Rivet } 1123c752998bSGaetan Rivet 1124c52dd394SThomas Monjalon ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, 1125d61138d4SHarman Kalra vfio_dev_fd); 1126c752998bSGaetan Rivet if (ret < 0) { 1127849f773bSDavid Marchand PCI_LOG(ERR, "Cannot release VFIO device"); 1128c752998bSGaetan Rivet return ret; 1129c752998bSGaetan Rivet } 1130c752998bSGaetan Rivet 1131ab53203eSQi Zhang vfio_res_list = 1132ab53203eSQi Zhang RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); 1133ab53203eSQi Zhang vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr); 1134ab53203eSQi Zhang 1135c752998bSGaetan Rivet /* if we haven't found our tailq entry, something's wrong */ 1136c752998bSGaetan Rivet if (vfio_res == NULL) { 1137849f773bSDavid Marchand PCI_LOG(ERR, "%s cannot find TAILQ entry for PCI device!", pci_addr); 1138c752998bSGaetan Rivet return -1; 1139c752998bSGaetan Rivet } 1140c752998bSGaetan Rivet 1141c752998bSGaetan Rivet TAILQ_REMOVE(vfio_res_list, vfio_res, next); 1142e34a43a6SYunjian Wang rte_free(vfio_res); 1143c752998bSGaetan Rivet return 0; 1144c752998bSGaetan Rivet } 1145c752998bSGaetan Rivet 1146ab53203eSQi Zhang static int 1147ab53203eSQi Zhang pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev) 1148ab53203eSQi Zhang { 1149ab53203eSQi Zhang char pci_addr[PATH_MAX] = {0}; 1150ab53203eSQi Zhang struct rte_pci_addr *loc = &dev->addr; 1151ab53203eSQi Zhang struct mapped_pci_resource *vfio_res = NULL; 1152ab53203eSQi Zhang struct mapped_pci_res_list *vfio_res_list; 1153d61138d4SHarman Kalra int ret, vfio_dev_fd; 1154ab53203eSQi Zhang 1155ab53203eSQi Zhang /* store PCI address string */ 1156ab53203eSQi Zhang snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, 1157ab53203eSQi Zhang loc->domain, loc->bus, loc->devid, loc->function); 1158ab53203eSQi Zhang 1159d61138d4SHarman Kalra vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle); 1160aedd054cSHarman Kalra if (vfio_dev_fd < 0) 1161aedd054cSHarman Kalra return -1; 1162aedd054cSHarman Kalra 1163ab53203eSQi Zhang ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, 1164d61138d4SHarman Kalra vfio_dev_fd); 1165ab53203eSQi Zhang if (ret < 0) { 1166849f773bSDavid Marchand PCI_LOG(ERR, "Cannot release VFIO device"); 1167ab53203eSQi Zhang return ret; 1168ab53203eSQi Zhang } 1169ab53203eSQi Zhang 1170ab53203eSQi Zhang vfio_res_list = 1171ab53203eSQi Zhang RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); 1172ab53203eSQi Zhang vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr); 1173ab53203eSQi Zhang 1174ab53203eSQi Zhang /* if we haven't found our tailq entry, something's wrong */ 1175ab53203eSQi Zhang if (vfio_res == NULL) { 1176849f773bSDavid Marchand PCI_LOG(ERR, "%s cannot find TAILQ entry for PCI device!", pci_addr); 1177ab53203eSQi Zhang return -1; 1178ab53203eSQi Zhang } 1179ab53203eSQi Zhang 1180ab53203eSQi Zhang return 0; 1181ab53203eSQi Zhang } 1182ab53203eSQi Zhang 1183ab53203eSQi Zhang int 1184ab53203eSQi Zhang pci_vfio_unmap_resource(struct rte_pci_device *dev) 1185ab53203eSQi Zhang { 1186ab53203eSQi Zhang if (rte_eal_process_type() == RTE_PROC_PRIMARY) 1187ab53203eSQi Zhang return pci_vfio_unmap_resource_primary(dev); 1188ab53203eSQi Zhang else 1189ab53203eSQi Zhang return pci_vfio_unmap_resource_secondary(dev); 1190ab53203eSQi Zhang } 1191ab53203eSQi Zhang 1192c752998bSGaetan Rivet int 1193c752998bSGaetan Rivet pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, 1194c752998bSGaetan Rivet struct rte_pci_ioport *p) 1195c752998bSGaetan Rivet { 11964b741542SChenbo Xia uint64_t size, offset; 11974b741542SChenbo Xia 1198c752998bSGaetan Rivet if (bar < VFIO_PCI_BAR0_REGION_INDEX || 1199c752998bSGaetan Rivet bar > VFIO_PCI_BAR5_REGION_INDEX) { 1200849f773bSDavid Marchand PCI_LOG(ERR, "invalid bar (%d)!", bar); 1201c752998bSGaetan Rivet return -1; 1202c752998bSGaetan Rivet } 1203c752998bSGaetan Rivet 1204647a0a6eSMingjin Ye if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1205647a0a6eSMingjin Ye struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; 1206647a0a6eSMingjin Ye char pci_addr[PATH_MAX]; 1207647a0a6eSMingjin Ye int vfio_dev_fd; 1208647a0a6eSMingjin Ye struct rte_pci_addr *loc = &dev->addr; 1209647a0a6eSMingjin Ye 1210647a0a6eSMingjin Ye /* store PCI address string */ 1211647a0a6eSMingjin Ye snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, 1212647a0a6eSMingjin Ye loc->domain, loc->bus, loc->devid, loc->function); 1213647a0a6eSMingjin Ye 1214647a0a6eSMingjin Ye vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle); 1215647a0a6eSMingjin Ye if (vfio_dev_fd < 0) { 1216647a0a6eSMingjin Ye return -1; 1217647a0a6eSMingjin Ye } else if (vfio_dev_fd == 0) { 1218647a0a6eSMingjin Ye if (rte_vfio_get_device_info(rte_pci_get_sysfs_path(), pci_addr, 1219647a0a6eSMingjin Ye &vfio_dev_fd, &device_info) != 0) 1220647a0a6eSMingjin Ye return -1; 1221647a0a6eSMingjin Ye /* save vfio_dev_fd so it can be used during release */ 1222647a0a6eSMingjin Ye if (rte_intr_dev_fd_set(dev->intr_handle, vfio_dev_fd) != 0) 1223647a0a6eSMingjin Ye return -1; 1224647a0a6eSMingjin Ye 1225647a0a6eSMingjin Ye if (pci_vfio_fill_regions(dev, vfio_dev_fd, &device_info) != 0) 1226647a0a6eSMingjin Ye return -1; 1227647a0a6eSMingjin Ye } 1228647a0a6eSMingjin Ye } 1229647a0a6eSMingjin Ye 12304b741542SChenbo Xia if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) { 1231849f773bSDavid Marchand PCI_LOG(ERR, "Cannot get offset of region %d.", bar); 12324b741542SChenbo Xia return -1; 12334b741542SChenbo Xia } 12344b741542SChenbo Xia 1235c752998bSGaetan Rivet p->dev = dev; 12364b741542SChenbo Xia p->base = offset; 1237c752998bSGaetan Rivet return 0; 1238c752998bSGaetan Rivet } 1239c752998bSGaetan Rivet 1240c752998bSGaetan Rivet void 1241c752998bSGaetan Rivet pci_vfio_ioport_read(struct rte_pci_ioport *p, 1242c752998bSGaetan Rivet void *data, size_t len, off_t offset) 1243c752998bSGaetan Rivet { 1244d61138d4SHarman Kalra const struct rte_intr_handle *intr_handle = p->dev->intr_handle; 1245d61138d4SHarman Kalra int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 1246c752998bSGaetan Rivet 1247aedd054cSHarman Kalra if (vfio_dev_fd < 0) 1248aedd054cSHarman Kalra return; 1249aedd054cSHarman Kalra 1250884f83ccSDavid Marchand if (pread(vfio_dev_fd, data, 1251c752998bSGaetan Rivet len, p->base + offset) <= 0) 1252849f773bSDavid Marchand PCI_LOG(ERR, "Can't read from PCI bar (%" PRIu64 ") : offset (%x)", 1253c752998bSGaetan Rivet VFIO_GET_REGION_IDX(p->base), (int)offset); 1254c752998bSGaetan Rivet } 1255c752998bSGaetan Rivet 1256c752998bSGaetan Rivet void 1257c752998bSGaetan Rivet pci_vfio_ioport_write(struct rte_pci_ioport *p, 1258c752998bSGaetan Rivet const void *data, size_t len, off_t offset) 1259c752998bSGaetan Rivet { 1260d61138d4SHarman Kalra const struct rte_intr_handle *intr_handle = p->dev->intr_handle; 1261d61138d4SHarman Kalra int vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 1262c752998bSGaetan Rivet 1263aedd054cSHarman Kalra if (vfio_dev_fd < 0) 1264aedd054cSHarman Kalra return; 1265aedd054cSHarman Kalra 1266884f83ccSDavid Marchand if (pwrite(vfio_dev_fd, data, 1267c752998bSGaetan Rivet len, p->base + offset) <= 0) 1268849f773bSDavid Marchand PCI_LOG(ERR, "Can't write to PCI bar (%" PRIu64 ") : offset (%x)", 1269c752998bSGaetan Rivet VFIO_GET_REGION_IDX(p->base), (int)offset); 1270c752998bSGaetan Rivet } 1271c752998bSGaetan Rivet 1272c752998bSGaetan Rivet int 1273c752998bSGaetan Rivet pci_vfio_ioport_unmap(struct rte_pci_ioport *p) 1274c752998bSGaetan Rivet { 1275c752998bSGaetan Rivet RTE_SET_USED(p); 1276c752998bSGaetan Rivet return -1; 1277c752998bSGaetan Rivet } 1278c752998bSGaetan Rivet 1279c752998bSGaetan Rivet int 1280095cf6e6SChenbo Xia pci_vfio_mmio_read(const struct rte_pci_device *dev, int bar, 1281095cf6e6SChenbo Xia void *buf, size_t len, off_t offs) 1282095cf6e6SChenbo Xia { 1283095cf6e6SChenbo Xia uint64_t size, offset; 1284095cf6e6SChenbo Xia int fd; 1285095cf6e6SChenbo Xia 1286095cf6e6SChenbo Xia fd = rte_intr_dev_fd_get(dev->intr_handle); 1287becb028aSChenbo Xia if (fd < 0) 1288becb028aSChenbo Xia return -1; 1289095cf6e6SChenbo Xia 1290095cf6e6SChenbo Xia if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) 1291095cf6e6SChenbo Xia return -1; 1292095cf6e6SChenbo Xia 1293095cf6e6SChenbo Xia if ((uint64_t)len + offs > size) 1294095cf6e6SChenbo Xia return -1; 1295095cf6e6SChenbo Xia 1296884f83ccSDavid Marchand return pread(fd, buf, len, offset + offs); 1297095cf6e6SChenbo Xia } 1298095cf6e6SChenbo Xia 1299095cf6e6SChenbo Xia int 1300095cf6e6SChenbo Xia pci_vfio_mmio_write(const struct rte_pci_device *dev, int bar, 1301095cf6e6SChenbo Xia const void *buf, size_t len, off_t offs) 1302095cf6e6SChenbo Xia { 1303095cf6e6SChenbo Xia uint64_t size, offset; 1304095cf6e6SChenbo Xia int fd; 1305095cf6e6SChenbo Xia 1306095cf6e6SChenbo Xia fd = rte_intr_dev_fd_get(dev->intr_handle); 1307becb028aSChenbo Xia if (fd < 0) 1308becb028aSChenbo Xia return -1; 1309095cf6e6SChenbo Xia 1310095cf6e6SChenbo Xia if (pci_vfio_get_region(dev, bar, &size, &offset) != 0) 1311095cf6e6SChenbo Xia return -1; 1312095cf6e6SChenbo Xia 1313095cf6e6SChenbo Xia if ((uint64_t)len + offs > size) 1314095cf6e6SChenbo Xia return -1; 1315095cf6e6SChenbo Xia 1316884f83ccSDavid Marchand return pwrite(fd, buf, len, offset + offs); 1317095cf6e6SChenbo Xia } 1318095cf6e6SChenbo Xia 1319095cf6e6SChenbo Xia int 1320c752998bSGaetan Rivet pci_vfio_is_enabled(void) 1321c752998bSGaetan Rivet { 1322*5f7b9818SDavid Marchand int status = rte_vfio_is_enabled("vfio_pci"); 1323*5f7b9818SDavid Marchand 1324*5f7b9818SDavid Marchand if (!status) { 1325*5f7b9818SDavid Marchand rte_vfio_enable("vfio"); 1326*5f7b9818SDavid Marchand status = rte_vfio_is_enabled("vfio_pci"); 1327*5f7b9818SDavid Marchand } 1328*5f7b9818SDavid Marchand return status; 1329c752998bSGaetan Rivet } 1330c752998bSGaetan Rivet #endif 1331