17f11d166SChaoyong He /* SPDX-License-Identifier: BSD-3-Clause 27f11d166SChaoyong He * Copyright (c) 2023 Corigine, Inc. 37f11d166SChaoyong He * All rights reserved. 47f11d166SChaoyong He */ 57f11d166SChaoyong He 67f11d166SChaoyong He #include <pthread.h> 776ea5ebeSChaoyong He #include <sys/epoll.h> 8b47a0373SChaoyong He #include <sys/ioctl.h> 976ea5ebeSChaoyong He #include <unistd.h> 107f11d166SChaoyong He 117f11d166SChaoyong He #include <nfp_common_pci.h> 127f11d166SChaoyong He #include <nfp_dev.h> 137b2a1228SChaoyong He #include <rte_vfio.h> 14e6ac31e0SXinying Yu #include <rte_eal_paging.h> 15e6ac31e0SXinying Yu #include <rte_malloc.h> 167f11d166SChaoyong He #include <vdpa_driver.h> 177f11d166SChaoyong He 18d89f4990SChaoyong He #include "nfp_vdpa_core.h" 197f11d166SChaoyong He #include "nfp_vdpa_log.h" 207f11d166SChaoyong He 217f11d166SChaoyong He #define NFP_VDPA_DRIVER_NAME nfp_vdpa 227f11d166SChaoyong He 23b47a0373SChaoyong He #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ 24b47a0373SChaoyong He sizeof(int) * (NFP_VDPA_MAX_QUEUES * 2 + 1)) 25b47a0373SChaoyong He 26e6ac31e0SXinying Yu #define NFP_VDPA_USED_RING_LEN(size) \ 27e6ac31e0SXinying Yu ((size) * sizeof(struct vring_used_elem) + sizeof(struct vring_used)) 28e6ac31e0SXinying Yu 2902fe8366SXinying Yu #define EPOLL_DATA_INTR 1 3002fe8366SXinying Yu 317f11d166SChaoyong He struct nfp_vdpa_dev { 327f11d166SChaoyong He struct rte_pci_device *pci_dev; 337f11d166SChaoyong He struct rte_vdpa_device *vdev; 34d89f4990SChaoyong He struct nfp_vdpa_hw hw; 357b2a1228SChaoyong He 367b2a1228SChaoyong He int vfio_container_fd; 377b2a1228SChaoyong He int vfio_group_fd; 387b2a1228SChaoyong He int vfio_dev_fd; 397b2a1228SChaoyong He int iommu_group; 40d89f4990SChaoyong He 4176ea5ebeSChaoyong He rte_thread_t tid; /**< Thread for notify relay */ 4276ea5ebeSChaoyong He int epoll_fd; 4376ea5ebeSChaoyong He 44b47a0373SChaoyong He int vid; 45d89f4990SChaoyong He uint16_t max_queues; 46b47a0373SChaoyong He RTE_ATOMIC(uint32_t) started; 47b47a0373SChaoyong He RTE_ATOMIC(uint32_t) dev_attached; 48b47a0373SChaoyong He RTE_ATOMIC(uint32_t) running; 49b47a0373SChaoyong He rte_spinlock_t lock; 50b47a0373SChaoyong He 51b47a0373SChaoyong He /** Eventfd for used ring interrupt */ 52b47a0373SChaoyong He int intr_fd[NFP_VDPA_MAX_QUEUES * 2]; 537f11d166SChaoyong He }; 547f11d166SChaoyong He 557f11d166SChaoyong He struct nfp_vdpa_dev_node { 567f11d166SChaoyong He TAILQ_ENTRY(nfp_vdpa_dev_node) next; 577f11d166SChaoyong He struct nfp_vdpa_dev *device; 587f11d166SChaoyong He }; 597f11d166SChaoyong He 607f11d166SChaoyong He TAILQ_HEAD(vdpa_dev_list_head, nfp_vdpa_dev_node); 617f11d166SChaoyong He 627f11d166SChaoyong He static struct vdpa_dev_list_head vdpa_dev_list = 637f11d166SChaoyong He TAILQ_HEAD_INITIALIZER(vdpa_dev_list); 647f11d166SChaoyong He 657f11d166SChaoyong He static pthread_mutex_t vdpa_list_lock = PTHREAD_MUTEX_INITIALIZER; 667f11d166SChaoyong He 677f11d166SChaoyong He static struct nfp_vdpa_dev_node * 680141f545SChaoyong He nfp_vdpa_find_node_by_vdev(struct rte_vdpa_device *vdev) 690141f545SChaoyong He { 700141f545SChaoyong He bool found = false; 710141f545SChaoyong He struct nfp_vdpa_dev_node *node; 720141f545SChaoyong He 730141f545SChaoyong He pthread_mutex_lock(&vdpa_list_lock); 740141f545SChaoyong He 750141f545SChaoyong He TAILQ_FOREACH(node, &vdpa_dev_list, next) { 760141f545SChaoyong He if (vdev == node->device->vdev) { 770141f545SChaoyong He found = true; 780141f545SChaoyong He break; 790141f545SChaoyong He } 800141f545SChaoyong He } 810141f545SChaoyong He 820141f545SChaoyong He pthread_mutex_unlock(&vdpa_list_lock); 830141f545SChaoyong He 840141f545SChaoyong He if (found) 850141f545SChaoyong He return node; 860141f545SChaoyong He 870141f545SChaoyong He return NULL; 880141f545SChaoyong He } 890141f545SChaoyong He 900141f545SChaoyong He static struct nfp_vdpa_dev_node * 917f11d166SChaoyong He nfp_vdpa_find_node_by_pdev(struct rte_pci_device *pdev) 927f11d166SChaoyong He { 937f11d166SChaoyong He bool found = false; 947f11d166SChaoyong He struct nfp_vdpa_dev_node *node; 957f11d166SChaoyong He 967f11d166SChaoyong He pthread_mutex_lock(&vdpa_list_lock); 977f11d166SChaoyong He 987f11d166SChaoyong He TAILQ_FOREACH(node, &vdpa_dev_list, next) { 997f11d166SChaoyong He if (pdev == node->device->pci_dev) { 1007f11d166SChaoyong He found = true; 1017f11d166SChaoyong He break; 1027f11d166SChaoyong He } 1037f11d166SChaoyong He } 1047f11d166SChaoyong He 1057f11d166SChaoyong He pthread_mutex_unlock(&vdpa_list_lock); 1067f11d166SChaoyong He 1077f11d166SChaoyong He if (found) 1087f11d166SChaoyong He return node; 1097f11d166SChaoyong He 1107f11d166SChaoyong He return NULL; 1117f11d166SChaoyong He } 1127f11d166SChaoyong He 1137b2a1228SChaoyong He static int 1147b2a1228SChaoyong He nfp_vdpa_vfio_setup(struct nfp_vdpa_dev *device) 1157b2a1228SChaoyong He { 1167b2a1228SChaoyong He int ret; 1177b2a1228SChaoyong He char dev_name[RTE_DEV_NAME_MAX_LEN] = {0}; 1187b2a1228SChaoyong He struct rte_pci_device *pci_dev = device->pci_dev; 1197b2a1228SChaoyong He 1207b2a1228SChaoyong He rte_pci_unmap_device(pci_dev); 1217b2a1228SChaoyong He 1227b2a1228SChaoyong He rte_pci_device_name(&pci_dev->addr, dev_name, RTE_DEV_NAME_MAX_LEN); 1231a2bb56aSChaoyong He ret = rte_vfio_get_group_num(rte_pci_get_sysfs_path(), dev_name, 1247b2a1228SChaoyong He &device->iommu_group); 1251a2bb56aSChaoyong He if (ret <= 0) 1261a2bb56aSChaoyong He return -1; 1277b2a1228SChaoyong He 1287b2a1228SChaoyong He device->vfio_container_fd = rte_vfio_container_create(); 1297b2a1228SChaoyong He if (device->vfio_container_fd < 0) 1307b2a1228SChaoyong He return -1; 1317b2a1228SChaoyong He 1327b2a1228SChaoyong He device->vfio_group_fd = rte_vfio_container_group_bind( 1337b2a1228SChaoyong He device->vfio_container_fd, device->iommu_group); 1347b2a1228SChaoyong He if (device->vfio_group_fd < 0) 1357b2a1228SChaoyong He goto container_destroy; 1367b2a1228SChaoyong He 137*b6de4353SZerun Fu DRV_VDPA_LOG(DEBUG, "The container_fd=%d, group_fd=%d.", 1387b2a1228SChaoyong He device->vfio_container_fd, device->vfio_group_fd); 1397b2a1228SChaoyong He 1407b2a1228SChaoyong He ret = rte_pci_map_device(pci_dev); 1417b2a1228SChaoyong He if (ret != 0) 1427b2a1228SChaoyong He goto group_unbind; 1437b2a1228SChaoyong He 1447b2a1228SChaoyong He device->vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle); 1457b2a1228SChaoyong He 1467b2a1228SChaoyong He return 0; 1477b2a1228SChaoyong He 1487b2a1228SChaoyong He group_unbind: 1497b2a1228SChaoyong He rte_vfio_container_group_unbind(device->vfio_container_fd, device->iommu_group); 1507b2a1228SChaoyong He container_destroy: 1517b2a1228SChaoyong He rte_vfio_container_destroy(device->vfio_container_fd); 1527b2a1228SChaoyong He 1537b2a1228SChaoyong He return -1; 1547b2a1228SChaoyong He } 1557b2a1228SChaoyong He 1567b2a1228SChaoyong He static void 1577b2a1228SChaoyong He nfp_vdpa_vfio_teardown(struct nfp_vdpa_dev *device) 1587b2a1228SChaoyong He { 1597b2a1228SChaoyong He rte_pci_unmap_device(device->pci_dev); 1607b2a1228SChaoyong He rte_vfio_container_group_unbind(device->vfio_container_fd, device->iommu_group); 1617b2a1228SChaoyong He rte_vfio_container_destroy(device->vfio_container_fd); 1627b2a1228SChaoyong He } 1637b2a1228SChaoyong He 164b47a0373SChaoyong He static int 165b47a0373SChaoyong He nfp_vdpa_dma_do_unmap(struct rte_vhost_memory *mem, 166b47a0373SChaoyong He uint32_t times, 167b47a0373SChaoyong He int vfio_container_fd) 168b47a0373SChaoyong He { 169b47a0373SChaoyong He uint32_t i; 170b47a0373SChaoyong He int ret = 0; 171b47a0373SChaoyong He struct rte_vhost_mem_region *region; 172b47a0373SChaoyong He 173b47a0373SChaoyong He for (i = 0; i < times; i++) { 174b47a0373SChaoyong He region = &mem->regions[i]; 175b47a0373SChaoyong He 176b47a0373SChaoyong He ret = rte_vfio_container_dma_unmap(vfio_container_fd, 177b47a0373SChaoyong He region->host_user_addr, region->guest_phys_addr, 178b47a0373SChaoyong He region->size); 179b47a0373SChaoyong He if (ret < 0) { 180b47a0373SChaoyong He /* Here should not return, even error happened. */ 181*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "DMA unmap failed. Times: %u.", i); 182b47a0373SChaoyong He } 183b47a0373SChaoyong He } 184b47a0373SChaoyong He 185b47a0373SChaoyong He return ret; 186b47a0373SChaoyong He } 187b47a0373SChaoyong He 188b47a0373SChaoyong He static int 189b47a0373SChaoyong He nfp_vdpa_dma_do_map(struct rte_vhost_memory *mem, 190b47a0373SChaoyong He uint32_t times, 191b47a0373SChaoyong He int vfio_container_fd) 192b47a0373SChaoyong He { 193b47a0373SChaoyong He int ret; 194b47a0373SChaoyong He uint32_t i; 195b47a0373SChaoyong He struct rte_vhost_mem_region *region; 196b47a0373SChaoyong He 197b47a0373SChaoyong He for (i = 0; i < times; i++) { 198b47a0373SChaoyong He region = &mem->regions[i]; 199b47a0373SChaoyong He 200b47a0373SChaoyong He ret = rte_vfio_container_dma_map(vfio_container_fd, 201b47a0373SChaoyong He region->host_user_addr, region->guest_phys_addr, 202b47a0373SChaoyong He region->size); 203b47a0373SChaoyong He if (ret < 0) { 204b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "DMA map failed."); 205b47a0373SChaoyong He nfp_vdpa_dma_do_unmap(mem, i, vfio_container_fd); 206b47a0373SChaoyong He return ret; 207b47a0373SChaoyong He } 208b47a0373SChaoyong He } 209b47a0373SChaoyong He 210b47a0373SChaoyong He return 0; 211b47a0373SChaoyong He } 212b47a0373SChaoyong He 213b47a0373SChaoyong He static int 214b47a0373SChaoyong He nfp_vdpa_dma_map(struct nfp_vdpa_dev *device, 215b47a0373SChaoyong He bool do_map) 216b47a0373SChaoyong He { 217b47a0373SChaoyong He int ret; 218b47a0373SChaoyong He int vfio_container_fd; 219b47a0373SChaoyong He struct rte_vhost_memory *mem = NULL; 220b47a0373SChaoyong He 221b47a0373SChaoyong He ret = rte_vhost_get_mem_table(device->vid, &mem); 222b47a0373SChaoyong He if (ret < 0) { 223b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "Failed to get memory layout."); 224b47a0373SChaoyong He return ret; 225b47a0373SChaoyong He } 226b47a0373SChaoyong He 227b47a0373SChaoyong He vfio_container_fd = device->vfio_container_fd; 228*b6de4353SZerun Fu DRV_VDPA_LOG(DEBUG, "The vfio_container_fd %d.", vfio_container_fd); 229b47a0373SChaoyong He 230b47a0373SChaoyong He if (do_map) 231b47a0373SChaoyong He ret = nfp_vdpa_dma_do_map(mem, mem->nregions, vfio_container_fd); 232b47a0373SChaoyong He else 233b47a0373SChaoyong He ret = nfp_vdpa_dma_do_unmap(mem, mem->nregions, vfio_container_fd); 234b47a0373SChaoyong He 235b47a0373SChaoyong He free(mem); 236b47a0373SChaoyong He 237b47a0373SChaoyong He return ret; 238b47a0373SChaoyong He } 239b47a0373SChaoyong He 240b47a0373SChaoyong He static uint64_t 241b47a0373SChaoyong He nfp_vdpa_qva_to_gpa(int vid, 242b47a0373SChaoyong He uint64_t qva) 243b47a0373SChaoyong He { 244b47a0373SChaoyong He int ret; 245b47a0373SChaoyong He uint32_t i; 246b47a0373SChaoyong He uint64_t gpa = 0; 247b47a0373SChaoyong He struct rte_vhost_memory *mem = NULL; 248b47a0373SChaoyong He struct rte_vhost_mem_region *region; 249b47a0373SChaoyong He 250b47a0373SChaoyong He ret = rte_vhost_get_mem_table(vid, &mem); 251b47a0373SChaoyong He if (ret < 0) { 252b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "Failed to get memory layout."); 253b47a0373SChaoyong He return gpa; 254b47a0373SChaoyong He } 255b47a0373SChaoyong He 256b47a0373SChaoyong He for (i = 0; i < mem->nregions; i++) { 257b47a0373SChaoyong He region = &mem->regions[i]; 258b47a0373SChaoyong He 259b47a0373SChaoyong He if (qva >= region->host_user_addr && 260b47a0373SChaoyong He qva < region->host_user_addr + region->size) { 261b47a0373SChaoyong He gpa = qva - region->host_user_addr + region->guest_phys_addr; 262b47a0373SChaoyong He break; 263b47a0373SChaoyong He } 264b47a0373SChaoyong He } 265b47a0373SChaoyong He 266b47a0373SChaoyong He free(mem); 267b47a0373SChaoyong He 268b47a0373SChaoyong He return gpa; 269b47a0373SChaoyong He } 270b47a0373SChaoyong He 271e6ac31e0SXinying Yu static void 272e6ac31e0SXinying Yu nfp_vdpa_relay_vring_free(struct nfp_vdpa_dev *device, 273e6ac31e0SXinying Yu uint16_t vring_index) 274e6ac31e0SXinying Yu { 275e6ac31e0SXinying Yu uint16_t i; 276e6ac31e0SXinying Yu uint64_t size; 277e6ac31e0SXinying Yu struct rte_vhost_vring vring; 278e6ac31e0SXinying Yu uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING; 279e6ac31e0SXinying Yu 280e6ac31e0SXinying Yu for (i = 0; i < vring_index; i++) { 281e6ac31e0SXinying Yu rte_vhost_get_vhost_vring(device->vid, i, &vring); 282e6ac31e0SXinying Yu 283e6ac31e0SXinying Yu size = RTE_ALIGN_CEIL(vring_size(vring.size, rte_mem_page_size()), 284e6ac31e0SXinying Yu rte_mem_page_size()); 285e6ac31e0SXinying Yu rte_vfio_container_dma_unmap(device->vfio_container_fd, 286e6ac31e0SXinying Yu (uint64_t)(uintptr_t)device->hw.m_vring[i].desc, 287e6ac31e0SXinying Yu m_vring_iova, size); 288e6ac31e0SXinying Yu 289e6ac31e0SXinying Yu rte_free(device->hw.m_vring[i].desc); 290e6ac31e0SXinying Yu m_vring_iova += size; 291e6ac31e0SXinying Yu } 292e6ac31e0SXinying Yu } 293e6ac31e0SXinying Yu 294b47a0373SChaoyong He static int 295e6ac31e0SXinying Yu nfp_vdpa_relay_vring_alloc(struct nfp_vdpa_dev *device) 296e6ac31e0SXinying Yu { 297e6ac31e0SXinying Yu int ret; 298e6ac31e0SXinying Yu uint16_t i; 299e6ac31e0SXinying Yu uint64_t size; 300e6ac31e0SXinying Yu void *vring_buf; 301e6ac31e0SXinying Yu uint64_t page_size; 302e6ac31e0SXinying Yu struct rte_vhost_vring vring; 303e6ac31e0SXinying Yu struct nfp_vdpa_hw *vdpa_hw = &device->hw; 304e6ac31e0SXinying Yu uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING; 305e6ac31e0SXinying Yu 306e6ac31e0SXinying Yu page_size = rte_mem_page_size(); 307e6ac31e0SXinying Yu 308e6ac31e0SXinying Yu for (i = 0; i < vdpa_hw->nr_vring; i++) { 309e6ac31e0SXinying Yu rte_vhost_get_vhost_vring(device->vid, i, &vring); 310e6ac31e0SXinying Yu 311e6ac31e0SXinying Yu size = RTE_ALIGN_CEIL(vring_size(vring.size, page_size), page_size); 312e6ac31e0SXinying Yu vring_buf = rte_zmalloc("nfp_vdpa_relay", size, page_size); 313e6ac31e0SXinying Yu if (vring_buf == NULL) 314e6ac31e0SXinying Yu goto vring_free_all; 315e6ac31e0SXinying Yu 316e6ac31e0SXinying Yu vring_init(&vdpa_hw->m_vring[i], vring.size, vring_buf, page_size); 317e6ac31e0SXinying Yu 318e6ac31e0SXinying Yu ret = rte_vfio_container_dma_map(device->vfio_container_fd, 319e6ac31e0SXinying Yu (uint64_t)(uintptr_t)vring_buf, m_vring_iova, size); 320e6ac31e0SXinying Yu if (ret != 0) { 321e6ac31e0SXinying Yu DRV_VDPA_LOG(ERR, "vDPA vring relay dma map failed."); 322e6ac31e0SXinying Yu goto vring_free_one; 323e6ac31e0SXinying Yu } 324e6ac31e0SXinying Yu 325e6ac31e0SXinying Yu m_vring_iova += size; 326e6ac31e0SXinying Yu } 327e6ac31e0SXinying Yu 328e6ac31e0SXinying Yu return 0; 329e6ac31e0SXinying Yu 330e6ac31e0SXinying Yu vring_free_one: 331e6ac31e0SXinying Yu rte_free(device->hw.m_vring[i].desc); 332e6ac31e0SXinying Yu vring_free_all: 333e6ac31e0SXinying Yu nfp_vdpa_relay_vring_free(device, i); 334e6ac31e0SXinying Yu 335e6ac31e0SXinying Yu return -ENOSPC; 336e6ac31e0SXinying Yu } 337e6ac31e0SXinying Yu 338e6ac31e0SXinying Yu static int 339e6ac31e0SXinying Yu nfp_vdpa_start(struct nfp_vdpa_dev *device, 340e6ac31e0SXinying Yu bool relay) 341b47a0373SChaoyong He { 342b47a0373SChaoyong He int ret; 343b47a0373SChaoyong He int vid; 344b47a0373SChaoyong He uint16_t i; 345b47a0373SChaoyong He uint64_t gpa; 346e6ac31e0SXinying Yu uint16_t size; 347b47a0373SChaoyong He struct rte_vhost_vring vring; 348b47a0373SChaoyong He struct nfp_vdpa_hw *vdpa_hw = &device->hw; 349e6ac31e0SXinying Yu uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING; 350b47a0373SChaoyong He 351b47a0373SChaoyong He vid = device->vid; 352b47a0373SChaoyong He vdpa_hw->nr_vring = rte_vhost_get_vring_num(vid); 353b47a0373SChaoyong He 354b47a0373SChaoyong He ret = rte_vhost_get_negotiated_features(vid, &vdpa_hw->req_features); 355b47a0373SChaoyong He if (ret != 0) 356b47a0373SChaoyong He return ret; 357b47a0373SChaoyong He 358e6ac31e0SXinying Yu if (relay) { 359e6ac31e0SXinying Yu ret = nfp_vdpa_relay_vring_alloc(device); 360e6ac31e0SXinying Yu if (ret != 0) 361e6ac31e0SXinying Yu return ret; 362e6ac31e0SXinying Yu } 363e6ac31e0SXinying Yu 364b47a0373SChaoyong He for (i = 0; i < vdpa_hw->nr_vring; i++) { 365b47a0373SChaoyong He ret = rte_vhost_get_vhost_vring(vid, i, &vring); 366b47a0373SChaoyong He if (ret != 0) 367e6ac31e0SXinying Yu goto relay_vring_free; 368b47a0373SChaoyong He 369b47a0373SChaoyong He gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.desc); 370b47a0373SChaoyong He if (gpa == 0) { 371b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "Fail to get GPA for descriptor ring."); 372e6ac31e0SXinying Yu goto relay_vring_free; 373b47a0373SChaoyong He } 374b47a0373SChaoyong He 375b47a0373SChaoyong He vdpa_hw->vring[i].desc = gpa; 376b47a0373SChaoyong He 377b47a0373SChaoyong He gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.avail); 378b47a0373SChaoyong He if (gpa == 0) { 379b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "Fail to get GPA for available ring."); 380e6ac31e0SXinying Yu goto relay_vring_free; 381b47a0373SChaoyong He } 382b47a0373SChaoyong He 383b47a0373SChaoyong He vdpa_hw->vring[i].avail = gpa; 384b47a0373SChaoyong He 385e6ac31e0SXinying Yu /* Direct I/O for Tx queue, relay for Rx queue */ 386e6ac31e0SXinying Yu if (relay && ((i & 1) == 0)) { 387e6ac31e0SXinying Yu vdpa_hw->vring[i].used = m_vring_iova + 388e6ac31e0SXinying Yu (char *)vdpa_hw->m_vring[i].used - 389e6ac31e0SXinying Yu (char *)vdpa_hw->m_vring[i].desc; 390e6ac31e0SXinying Yu 391e6ac31e0SXinying Yu ret = rte_vhost_get_vring_base(vid, i, 392e6ac31e0SXinying Yu &vdpa_hw->m_vring[i].avail->idx, 393e6ac31e0SXinying Yu &vdpa_hw->m_vring[i].used->idx); 394e6ac31e0SXinying Yu if (ret != 0) 395e6ac31e0SXinying Yu goto relay_vring_free; 396e6ac31e0SXinying Yu } else { 397b47a0373SChaoyong He gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.used); 398b47a0373SChaoyong He if (gpa == 0) { 399b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "Fail to get GPA for used ring."); 400e6ac31e0SXinying Yu goto relay_vring_free; 401b47a0373SChaoyong He } 402b47a0373SChaoyong He 403b47a0373SChaoyong He vdpa_hw->vring[i].used = gpa; 404e6ac31e0SXinying Yu } 405b47a0373SChaoyong He 406b47a0373SChaoyong He vdpa_hw->vring[i].size = vring.size; 407b47a0373SChaoyong He 408e6ac31e0SXinying Yu if (relay) { 409e6ac31e0SXinying Yu size = RTE_ALIGN_CEIL(vring_size(vring.size, 410e6ac31e0SXinying Yu rte_mem_page_size()), rte_mem_page_size()); 411e6ac31e0SXinying Yu m_vring_iova += size; 412e6ac31e0SXinying Yu } 413e6ac31e0SXinying Yu 414b47a0373SChaoyong He ret = rte_vhost_get_vring_base(vid, i, 415b47a0373SChaoyong He &vdpa_hw->vring[i].last_avail_idx, 416b47a0373SChaoyong He &vdpa_hw->vring[i].last_used_idx); 417b47a0373SChaoyong He if (ret != 0) 418e6ac31e0SXinying Yu goto relay_vring_free; 419b47a0373SChaoyong He } 420b47a0373SChaoyong He 421e6ac31e0SXinying Yu if (relay) 422e6ac31e0SXinying Yu return nfp_vdpa_relay_hw_start(&device->hw, vid); 423e6ac31e0SXinying Yu else 424b47a0373SChaoyong He return nfp_vdpa_hw_start(&device->hw, vid); 425e6ac31e0SXinying Yu 426e6ac31e0SXinying Yu relay_vring_free: 427e6ac31e0SXinying Yu if (relay) 428e6ac31e0SXinying Yu nfp_vdpa_relay_vring_free(device, vdpa_hw->nr_vring); 429e6ac31e0SXinying Yu 430e6ac31e0SXinying Yu return -EFAULT; 431b47a0373SChaoyong He } 432b47a0373SChaoyong He 433b47a0373SChaoyong He static void 434e6ac31e0SXinying Yu nfp_vdpa_update_used_ring(struct nfp_vdpa_dev *dev, 435e6ac31e0SXinying Yu uint16_t qid) 436e6ac31e0SXinying Yu { 437e6ac31e0SXinying Yu rte_vdpa_relay_vring_used(dev->vid, qid, &dev->hw.m_vring[qid]); 438e6ac31e0SXinying Yu rte_vhost_vring_call(dev->vid, qid); 439e6ac31e0SXinying Yu } 440e6ac31e0SXinying Yu 441e6ac31e0SXinying Yu static void 442e6ac31e0SXinying Yu nfp_vdpa_relay_stop(struct nfp_vdpa_dev *device) 443e6ac31e0SXinying Yu { 444e6ac31e0SXinying Yu int vid; 445e6ac31e0SXinying Yu uint32_t i; 446e6ac31e0SXinying Yu uint64_t len; 447e6ac31e0SXinying Yu struct rte_vhost_vring vring; 448e6ac31e0SXinying Yu struct nfp_vdpa_hw *vdpa_hw = &device->hw; 449e6ac31e0SXinying Yu 450e6ac31e0SXinying Yu nfp_vdpa_hw_stop(vdpa_hw); 451e6ac31e0SXinying Yu 452e6ac31e0SXinying Yu vid = device->vid; 453e6ac31e0SXinying Yu for (i = 0; i < vdpa_hw->nr_vring; i++) { 454e6ac31e0SXinying Yu /* Synchronize remaining new used entries if any */ 455e6ac31e0SXinying Yu if ((i & 1) == 0) 456e6ac31e0SXinying Yu nfp_vdpa_update_used_ring(device, i); 457e6ac31e0SXinying Yu 458e6ac31e0SXinying Yu rte_vhost_get_vhost_vring(vid, i, &vring); 459e6ac31e0SXinying Yu len = NFP_VDPA_USED_RING_LEN(vring.size); 460e6ac31e0SXinying Yu vdpa_hw->vring[i].last_avail_idx = vring.avail->idx; 461e6ac31e0SXinying Yu vdpa_hw->vring[i].last_used_idx = vring.used->idx; 462e6ac31e0SXinying Yu 463e6ac31e0SXinying Yu rte_vhost_set_vring_base(vid, i, 464e6ac31e0SXinying Yu vdpa_hw->vring[i].last_avail_idx, 465e6ac31e0SXinying Yu vdpa_hw->vring[i].last_used_idx); 466e6ac31e0SXinying Yu 467e6ac31e0SXinying Yu rte_vhost_log_used_vring(vid, i, 0, len); 468e6ac31e0SXinying Yu 469e6ac31e0SXinying Yu if (vring.used->idx != vring.avail->idx) 470e6ac31e0SXinying Yu rte_atomic_store_explicit( 471e6ac31e0SXinying Yu (unsigned short __rte_atomic *)&vring.used->idx, 472e6ac31e0SXinying Yu vring.avail->idx, rte_memory_order_release); 473e6ac31e0SXinying Yu } 474e6ac31e0SXinying Yu 475e6ac31e0SXinying Yu nfp_vdpa_relay_vring_free(device, vdpa_hw->nr_vring); 476e6ac31e0SXinying Yu } 477e6ac31e0SXinying Yu 478e6ac31e0SXinying Yu static void 479e6ac31e0SXinying Yu nfp_vdpa_stop(struct nfp_vdpa_dev *device, 480e6ac31e0SXinying Yu bool relay) 481b47a0373SChaoyong He { 482b47a0373SChaoyong He int vid; 483b47a0373SChaoyong He uint32_t i; 484b47a0373SChaoyong He struct nfp_vdpa_hw *vdpa_hw = &device->hw; 485b47a0373SChaoyong He 486b47a0373SChaoyong He nfp_vdpa_hw_stop(vdpa_hw); 487b47a0373SChaoyong He 488b47a0373SChaoyong He vid = device->vid; 489e6ac31e0SXinying Yu if (relay) 490e6ac31e0SXinying Yu nfp_vdpa_relay_stop(device); 491e6ac31e0SXinying Yu else 492b47a0373SChaoyong He for (i = 0; i < vdpa_hw->nr_vring; i++) 493b47a0373SChaoyong He rte_vhost_set_vring_base(vid, i, 494b47a0373SChaoyong He vdpa_hw->vring[i].last_avail_idx, 495b47a0373SChaoyong He vdpa_hw->vring[i].last_used_idx); 496e6ac31e0SXinying Yu 497b47a0373SChaoyong He } 498b47a0373SChaoyong He 499b47a0373SChaoyong He static int 50010421b0dSXinying Yu nfp_vdpa_enable_vfio_intr(struct nfp_vdpa_dev *device, 50110421b0dSXinying Yu bool relay) 502b47a0373SChaoyong He { 50310421b0dSXinying Yu int fd; 504b47a0373SChaoyong He int ret; 505b47a0373SChaoyong He uint16_t i; 506b47a0373SChaoyong He int *fd_ptr; 507b47a0373SChaoyong He uint16_t nr_vring; 508b47a0373SChaoyong He struct vfio_irq_set *irq_set; 509b47a0373SChaoyong He struct rte_vhost_vring vring; 510b47a0373SChaoyong He char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 511b47a0373SChaoyong He 512b47a0373SChaoyong He nr_vring = rte_vhost_get_vring_num(device->vid); 513b47a0373SChaoyong He 514b47a0373SChaoyong He irq_set = (struct vfio_irq_set *)irq_set_buf; 515b47a0373SChaoyong He irq_set->argsz = sizeof(irq_set_buf); 516b47a0373SChaoyong He irq_set->count = nr_vring + 1; 517b47a0373SChaoyong He irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 518b47a0373SChaoyong He irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 519b47a0373SChaoyong He irq_set->start = 0; 520b47a0373SChaoyong He 521b47a0373SChaoyong He fd_ptr = (int *)&irq_set->data; 522b47a0373SChaoyong He fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(device->pci_dev->intr_handle); 523b47a0373SChaoyong He 524b47a0373SChaoyong He for (i = 0; i < nr_vring; i++) 525b47a0373SChaoyong He device->intr_fd[i] = -1; 526b47a0373SChaoyong He 527b47a0373SChaoyong He for (i = 0; i < nr_vring; i++) { 528b47a0373SChaoyong He rte_vhost_get_vhost_vring(device->vid, i, &vring); 529b47a0373SChaoyong He fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; 530b47a0373SChaoyong He } 531b47a0373SChaoyong He 53210421b0dSXinying Yu if (relay) { 53310421b0dSXinying Yu for (i = 0; i < nr_vring; i += 2) { 53410421b0dSXinying Yu fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 53510421b0dSXinying Yu if (fd < 0) { 536*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Can't setup eventfd."); 53710421b0dSXinying Yu return -EINVAL; 53810421b0dSXinying Yu } 53910421b0dSXinying Yu 54010421b0dSXinying Yu device->intr_fd[i] = fd; 54110421b0dSXinying Yu fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; 54210421b0dSXinying Yu } 54310421b0dSXinying Yu } 54410421b0dSXinying Yu 545b47a0373SChaoyong He ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 546b47a0373SChaoyong He if (ret != 0) { 547b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "Error enabling MSI-X interrupts."); 548b47a0373SChaoyong He return -EIO; 549b47a0373SChaoyong He } 550b47a0373SChaoyong He 551b47a0373SChaoyong He return 0; 552b47a0373SChaoyong He } 553b47a0373SChaoyong He 554b47a0373SChaoyong He static int 555b47a0373SChaoyong He nfp_vdpa_disable_vfio_intr(struct nfp_vdpa_dev *device) 556b47a0373SChaoyong He { 557b47a0373SChaoyong He int ret; 558b47a0373SChaoyong He struct vfio_irq_set *irq_set; 559b47a0373SChaoyong He char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 560b47a0373SChaoyong He 561b47a0373SChaoyong He irq_set = (struct vfio_irq_set *)irq_set_buf; 562b47a0373SChaoyong He irq_set->argsz = sizeof(irq_set_buf); 563b47a0373SChaoyong He irq_set->count = 0; 564b47a0373SChaoyong He irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 565b47a0373SChaoyong He irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 566b47a0373SChaoyong He irq_set->start = 0; 567b47a0373SChaoyong He 568b47a0373SChaoyong He ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 569b47a0373SChaoyong He if (ret != 0) { 570b47a0373SChaoyong He DRV_VDPA_LOG(ERR, "Error disabling MSI-X interrupts."); 571b47a0373SChaoyong He return -EIO; 572b47a0373SChaoyong He } 573b47a0373SChaoyong He 574b47a0373SChaoyong He return 0; 575b47a0373SChaoyong He } 576b47a0373SChaoyong He 57776ea5ebeSChaoyong He static void 57876ea5ebeSChaoyong He nfp_vdpa_read_kickfd(int kickfd) 57976ea5ebeSChaoyong He { 58076ea5ebeSChaoyong He int bytes; 58176ea5ebeSChaoyong He uint64_t buf; 58276ea5ebeSChaoyong He 58376ea5ebeSChaoyong He for (;;) { 58476ea5ebeSChaoyong He bytes = read(kickfd, &buf, 8); 58576ea5ebeSChaoyong He if (bytes >= 0) 58676ea5ebeSChaoyong He break; 58776ea5ebeSChaoyong He 58876ea5ebeSChaoyong He if (errno != EINTR && errno != EWOULDBLOCK && 58976ea5ebeSChaoyong He errno != EAGAIN) { 590*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Error reading kickfd."); 59176ea5ebeSChaoyong He break; 59276ea5ebeSChaoyong He } 59376ea5ebeSChaoyong He } 59476ea5ebeSChaoyong He } 59576ea5ebeSChaoyong He 59676ea5ebeSChaoyong He static int 59776ea5ebeSChaoyong He nfp_vdpa_notify_epoll_ctl(uint32_t queue_num, 59876ea5ebeSChaoyong He struct nfp_vdpa_dev *device) 59976ea5ebeSChaoyong He { 60076ea5ebeSChaoyong He int ret; 60176ea5ebeSChaoyong He uint32_t qid; 60276ea5ebeSChaoyong He 60376ea5ebeSChaoyong He for (qid = 0; qid < queue_num; qid++) { 60476ea5ebeSChaoyong He struct epoll_event ev; 60576ea5ebeSChaoyong He struct rte_vhost_vring vring; 60676ea5ebeSChaoyong He 60776ea5ebeSChaoyong He ev.events = EPOLLIN | EPOLLPRI; 60876ea5ebeSChaoyong He rte_vhost_get_vhost_vring(device->vid, qid, &vring); 60976ea5ebeSChaoyong He ev.data.u64 = qid | (uint64_t)vring.kickfd << 32; 61076ea5ebeSChaoyong He ret = epoll_ctl(device->epoll_fd, EPOLL_CTL_ADD, vring.kickfd, &ev); 61176ea5ebeSChaoyong He if (ret < 0) { 612*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Epoll add error for queue %d.", qid); 61376ea5ebeSChaoyong He return ret; 61476ea5ebeSChaoyong He } 61576ea5ebeSChaoyong He } 61676ea5ebeSChaoyong He 61776ea5ebeSChaoyong He return 0; 61876ea5ebeSChaoyong He } 61976ea5ebeSChaoyong He 62076ea5ebeSChaoyong He static int 62176ea5ebeSChaoyong He nfp_vdpa_notify_epoll_wait(uint32_t queue_num, 62276ea5ebeSChaoyong He struct nfp_vdpa_dev *device) 62376ea5ebeSChaoyong He { 62476ea5ebeSChaoyong He int i; 62576ea5ebeSChaoyong He int fds; 62676ea5ebeSChaoyong He int kickfd; 62776ea5ebeSChaoyong He uint32_t qid; 62876ea5ebeSChaoyong He struct epoll_event events[NFP_VDPA_MAX_QUEUES * 2]; 62976ea5ebeSChaoyong He 63076ea5ebeSChaoyong He for (;;) { 63176ea5ebeSChaoyong He fds = epoll_wait(device->epoll_fd, events, queue_num, -1); 63276ea5ebeSChaoyong He if (fds < 0) { 63376ea5ebeSChaoyong He if (errno == EINTR) 63476ea5ebeSChaoyong He continue; 63576ea5ebeSChaoyong He 636*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Epoll wait fail."); 63776ea5ebeSChaoyong He return -EACCES; 63876ea5ebeSChaoyong He } 63976ea5ebeSChaoyong He 64076ea5ebeSChaoyong He for (i = 0; i < fds; i++) { 64176ea5ebeSChaoyong He qid = events[i].data.u32; 64276ea5ebeSChaoyong He kickfd = (uint32_t)(events[i].data.u64 >> 32); 64376ea5ebeSChaoyong He 64476ea5ebeSChaoyong He nfp_vdpa_read_kickfd(kickfd); 64576ea5ebeSChaoyong He nfp_vdpa_notify_queue(&device->hw, qid); 64676ea5ebeSChaoyong He } 64776ea5ebeSChaoyong He } 64876ea5ebeSChaoyong He 64976ea5ebeSChaoyong He return 0; 65076ea5ebeSChaoyong He } 65176ea5ebeSChaoyong He 65276ea5ebeSChaoyong He static uint32_t 65376ea5ebeSChaoyong He nfp_vdpa_notify_relay(void *arg) 65476ea5ebeSChaoyong He { 65576ea5ebeSChaoyong He int ret; 65676ea5ebeSChaoyong He int epoll_fd; 65776ea5ebeSChaoyong He uint32_t queue_num; 65876ea5ebeSChaoyong He struct nfp_vdpa_dev *device = arg; 65976ea5ebeSChaoyong He 66076ea5ebeSChaoyong He epoll_fd = epoll_create(NFP_VDPA_MAX_QUEUES * 2); 66176ea5ebeSChaoyong He if (epoll_fd < 0) { 662f6272c7aSZerun Fu DRV_VDPA_LOG(ERR, "Failed to create epoll instance."); 66376ea5ebeSChaoyong He return 1; 66476ea5ebeSChaoyong He } 66576ea5ebeSChaoyong He 66676ea5ebeSChaoyong He device->epoll_fd = epoll_fd; 66776ea5ebeSChaoyong He 66876ea5ebeSChaoyong He queue_num = rte_vhost_get_vring_num(device->vid); 66976ea5ebeSChaoyong He 67076ea5ebeSChaoyong He ret = nfp_vdpa_notify_epoll_ctl(queue_num, device); 67176ea5ebeSChaoyong He if (ret != 0) 67276ea5ebeSChaoyong He goto notify_exit; 67376ea5ebeSChaoyong He 67476ea5ebeSChaoyong He ret = nfp_vdpa_notify_epoll_wait(queue_num, device); 67576ea5ebeSChaoyong He if (ret != 0) 67676ea5ebeSChaoyong He goto notify_exit; 67776ea5ebeSChaoyong He 67876ea5ebeSChaoyong He return 0; 67976ea5ebeSChaoyong He 68076ea5ebeSChaoyong He notify_exit: 68176ea5ebeSChaoyong He close(device->epoll_fd); 68276ea5ebeSChaoyong He device->epoll_fd = -1; 68376ea5ebeSChaoyong He 68476ea5ebeSChaoyong He return 1; 68576ea5ebeSChaoyong He } 68676ea5ebeSChaoyong He 68776ea5ebeSChaoyong He static int 68876ea5ebeSChaoyong He nfp_vdpa_setup_notify_relay(struct nfp_vdpa_dev *device) 68976ea5ebeSChaoyong He { 69076ea5ebeSChaoyong He int ret; 69176ea5ebeSChaoyong He char name[RTE_THREAD_INTERNAL_NAME_SIZE]; 69276ea5ebeSChaoyong He 69376ea5ebeSChaoyong He snprintf(name, sizeof(name), "nfp-noti%d", device->vid); 69476ea5ebeSChaoyong He ret = rte_thread_create_internal_control(&device->tid, name, 69576ea5ebeSChaoyong He nfp_vdpa_notify_relay, (void *)device); 69676ea5ebeSChaoyong He if (ret != 0) { 69776ea5ebeSChaoyong He DRV_VDPA_LOG(ERR, "Failed to create notify relay pthread."); 69876ea5ebeSChaoyong He return -1; 69976ea5ebeSChaoyong He } 70076ea5ebeSChaoyong He 70176ea5ebeSChaoyong He return 0; 70276ea5ebeSChaoyong He } 70376ea5ebeSChaoyong He 70476ea5ebeSChaoyong He static void 70576ea5ebeSChaoyong He nfp_vdpa_unset_notify_relay(struct nfp_vdpa_dev *device) 70676ea5ebeSChaoyong He { 70776ea5ebeSChaoyong He if (device->tid.opaque_id != 0) { 70876ea5ebeSChaoyong He pthread_cancel((pthread_t)device->tid.opaque_id); 70976ea5ebeSChaoyong He rte_thread_join(device->tid, NULL); 71076ea5ebeSChaoyong He device->tid.opaque_id = 0; 71176ea5ebeSChaoyong He } 71276ea5ebeSChaoyong He 71376ea5ebeSChaoyong He if (device->epoll_fd >= 0) { 71476ea5ebeSChaoyong He close(device->epoll_fd); 71576ea5ebeSChaoyong He device->epoll_fd = -1; 71676ea5ebeSChaoyong He } 71776ea5ebeSChaoyong He } 71876ea5ebeSChaoyong He 719b47a0373SChaoyong He static int 720b47a0373SChaoyong He update_datapath(struct nfp_vdpa_dev *device) 721b47a0373SChaoyong He { 722b47a0373SChaoyong He int ret; 723b47a0373SChaoyong He 724b47a0373SChaoyong He rte_spinlock_lock(&device->lock); 725b47a0373SChaoyong He 726b47a0373SChaoyong He if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) == 0) && 727b47a0373SChaoyong He (rte_atomic_load_explicit(&device->started, 728b47a0373SChaoyong He rte_memory_order_relaxed) != 0) && 729b47a0373SChaoyong He (rte_atomic_load_explicit(&device->dev_attached, 730b47a0373SChaoyong He rte_memory_order_relaxed) != 0)) { 731b47a0373SChaoyong He ret = nfp_vdpa_dma_map(device, true); 732b47a0373SChaoyong He if (ret != 0) 733b47a0373SChaoyong He goto unlock_exit; 734b47a0373SChaoyong He 73510421b0dSXinying Yu ret = nfp_vdpa_enable_vfio_intr(device, false); 736b47a0373SChaoyong He if (ret != 0) 737b47a0373SChaoyong He goto dma_map_rollback; 738b47a0373SChaoyong He 739e6ac31e0SXinying Yu ret = nfp_vdpa_start(device, false); 740b47a0373SChaoyong He if (ret != 0) 741b47a0373SChaoyong He goto disable_vfio_intr; 742b47a0373SChaoyong He 74376ea5ebeSChaoyong He ret = nfp_vdpa_setup_notify_relay(device); 74476ea5ebeSChaoyong He if (ret != 0) 74576ea5ebeSChaoyong He goto vdpa_stop; 74676ea5ebeSChaoyong He 747b47a0373SChaoyong He rte_atomic_store_explicit(&device->running, 1, rte_memory_order_relaxed); 748b47a0373SChaoyong He } else if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) != 0) && 749b47a0373SChaoyong He ((rte_atomic_load_explicit(&device->started, 750b47a0373SChaoyong He rte_memory_order_relaxed) != 0) || 751b47a0373SChaoyong He (rte_atomic_load_explicit(&device->dev_attached, 752b47a0373SChaoyong He rte_memory_order_relaxed) != 0))) { 75376ea5ebeSChaoyong He nfp_vdpa_unset_notify_relay(device); 75476ea5ebeSChaoyong He 755e6ac31e0SXinying Yu nfp_vdpa_stop(device, false); 756b47a0373SChaoyong He 757b47a0373SChaoyong He ret = nfp_vdpa_disable_vfio_intr(device); 758b47a0373SChaoyong He if (ret != 0) 759b47a0373SChaoyong He goto unlock_exit; 760b47a0373SChaoyong He 761b47a0373SChaoyong He ret = nfp_vdpa_dma_map(device, false); 762b47a0373SChaoyong He if (ret != 0) 763b47a0373SChaoyong He goto unlock_exit; 764b47a0373SChaoyong He 765b47a0373SChaoyong He rte_atomic_store_explicit(&device->running, 0, rte_memory_order_relaxed); 766b47a0373SChaoyong He } 767b47a0373SChaoyong He 768b47a0373SChaoyong He rte_spinlock_unlock(&device->lock); 769b47a0373SChaoyong He return 0; 770b47a0373SChaoyong He 77176ea5ebeSChaoyong He vdpa_stop: 772e6ac31e0SXinying Yu nfp_vdpa_stop(device, false); 773b47a0373SChaoyong He disable_vfio_intr: 774b47a0373SChaoyong He nfp_vdpa_disable_vfio_intr(device); 775b47a0373SChaoyong He dma_map_rollback: 776b47a0373SChaoyong He nfp_vdpa_dma_map(device, false); 777b47a0373SChaoyong He unlock_exit: 778b47a0373SChaoyong He rte_spinlock_unlock(&device->lock); 779b47a0373SChaoyong He return ret; 780b47a0373SChaoyong He } 781b47a0373SChaoyong He 7820141f545SChaoyong He static int 78302fe8366SXinying Yu nfp_vdpa_vring_epoll_ctl(uint32_t queue_num, 78402fe8366SXinying Yu struct nfp_vdpa_dev *device) 78502fe8366SXinying Yu { 78602fe8366SXinying Yu int ret; 78702fe8366SXinying Yu uint32_t qid; 78802fe8366SXinying Yu struct epoll_event ev; 78902fe8366SXinying Yu struct rte_vhost_vring vring; 79002fe8366SXinying Yu 79102fe8366SXinying Yu for (qid = 0; qid < queue_num; qid++) { 79202fe8366SXinying Yu ev.events = EPOLLIN | EPOLLPRI; 79302fe8366SXinying Yu rte_vhost_get_vhost_vring(device->vid, qid, &vring); 79402fe8366SXinying Yu ev.data.u64 = qid << 1 | (uint64_t)vring.kickfd << 32; 79502fe8366SXinying Yu ret = epoll_ctl(device->epoll_fd, EPOLL_CTL_ADD, vring.kickfd, &ev); 79602fe8366SXinying Yu if (ret < 0) { 797*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Epoll add error for queue %u.", qid); 79802fe8366SXinying Yu return ret; 79902fe8366SXinying Yu } 80002fe8366SXinying Yu } 80102fe8366SXinying Yu 80202fe8366SXinying Yu /* vDPA driver interrupt */ 80302fe8366SXinying Yu for (qid = 0; qid < queue_num; qid += 2) { 80402fe8366SXinying Yu ev.events = EPOLLIN | EPOLLPRI; 80502fe8366SXinying Yu /* Leave a flag to mark it's for interrupt */ 80602fe8366SXinying Yu ev.data.u64 = EPOLL_DATA_INTR | qid << 1 | 80702fe8366SXinying Yu (uint64_t)device->intr_fd[qid] << 32; 80802fe8366SXinying Yu ret = epoll_ctl(device->epoll_fd, EPOLL_CTL_ADD, 80902fe8366SXinying Yu device->intr_fd[qid], &ev); 81002fe8366SXinying Yu if (ret < 0) { 811*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Epoll add error for queue %u.", qid); 81202fe8366SXinying Yu return ret; 81302fe8366SXinying Yu } 81402fe8366SXinying Yu 81502fe8366SXinying Yu nfp_vdpa_update_used_ring(device, qid); 81602fe8366SXinying Yu } 81702fe8366SXinying Yu 81802fe8366SXinying Yu return 0; 81902fe8366SXinying Yu } 82002fe8366SXinying Yu 82102fe8366SXinying Yu static int 82202fe8366SXinying Yu nfp_vdpa_vring_epoll_wait(uint32_t queue_num, 82302fe8366SXinying Yu struct nfp_vdpa_dev *device) 82402fe8366SXinying Yu { 82502fe8366SXinying Yu int i; 82602fe8366SXinying Yu int fds; 82702fe8366SXinying Yu int kickfd; 82802fe8366SXinying Yu uint32_t qid; 82902fe8366SXinying Yu struct epoll_event events[NFP_VDPA_MAX_QUEUES * 2]; 83002fe8366SXinying Yu 83102fe8366SXinying Yu for (;;) { 83202fe8366SXinying Yu fds = epoll_wait(device->epoll_fd, events, queue_num * 2, -1); 83302fe8366SXinying Yu if (fds < 0) { 83402fe8366SXinying Yu if (errno == EINTR) 83502fe8366SXinying Yu continue; 83602fe8366SXinying Yu 837*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Epoll wait fail."); 83802fe8366SXinying Yu return -EACCES; 83902fe8366SXinying Yu } 84002fe8366SXinying Yu 84102fe8366SXinying Yu for (i = 0; i < fds; i++) { 84202fe8366SXinying Yu qid = events[i].data.u32 >> 1; 84302fe8366SXinying Yu kickfd = (uint32_t)(events[i].data.u64 >> 32); 84402fe8366SXinying Yu 84502fe8366SXinying Yu nfp_vdpa_read_kickfd(kickfd); 84602fe8366SXinying Yu if ((events[i].data.u32 & EPOLL_DATA_INTR) != 0) { 84702fe8366SXinying Yu nfp_vdpa_update_used_ring(device, qid); 84802fe8366SXinying Yu nfp_vdpa_irq_unmask(&device->hw); 84902fe8366SXinying Yu } else { 85002fe8366SXinying Yu nfp_vdpa_notify_queue(&device->hw, qid); 85102fe8366SXinying Yu } 85202fe8366SXinying Yu } 85302fe8366SXinying Yu } 85402fe8366SXinying Yu 85502fe8366SXinying Yu return 0; 85602fe8366SXinying Yu } 85702fe8366SXinying Yu 85802fe8366SXinying Yu static uint32_t 85902fe8366SXinying Yu nfp_vdpa_vring_relay(void *arg) 86002fe8366SXinying Yu { 86102fe8366SXinying Yu int ret; 86202fe8366SXinying Yu int epoll_fd; 86302fe8366SXinying Yu uint16_t queue_id; 86402fe8366SXinying Yu uint32_t queue_num; 86502fe8366SXinying Yu struct nfp_vdpa_dev *device = arg; 86602fe8366SXinying Yu 86702fe8366SXinying Yu epoll_fd = epoll_create(NFP_VDPA_MAX_QUEUES * 2); 86802fe8366SXinying Yu if (epoll_fd < 0) { 86902fe8366SXinying Yu DRV_VDPA_LOG(ERR, "failed to create epoll instance."); 87002fe8366SXinying Yu return 1; 87102fe8366SXinying Yu } 87202fe8366SXinying Yu 87302fe8366SXinying Yu device->epoll_fd = epoll_fd; 87402fe8366SXinying Yu 87502fe8366SXinying Yu queue_num = rte_vhost_get_vring_num(device->vid); 87602fe8366SXinying Yu 87702fe8366SXinying Yu ret = nfp_vdpa_vring_epoll_ctl(queue_num, device); 87802fe8366SXinying Yu if (ret != 0) 87902fe8366SXinying Yu goto notify_exit; 88002fe8366SXinying Yu 88102fe8366SXinying Yu /* Start relay with a first kick */ 88202fe8366SXinying Yu for (queue_id = 0; queue_id < queue_num; queue_id++) 88302fe8366SXinying Yu nfp_vdpa_notify_queue(&device->hw, queue_id); 88402fe8366SXinying Yu 88502fe8366SXinying Yu ret = nfp_vdpa_vring_epoll_wait(queue_num, device); 88602fe8366SXinying Yu if (ret != 0) 88702fe8366SXinying Yu goto notify_exit; 88802fe8366SXinying Yu 88902fe8366SXinying Yu return 0; 89002fe8366SXinying Yu 89102fe8366SXinying Yu notify_exit: 89202fe8366SXinying Yu close(device->epoll_fd); 89302fe8366SXinying Yu device->epoll_fd = -1; 89402fe8366SXinying Yu 89502fe8366SXinying Yu return 1; 89602fe8366SXinying Yu } 89702fe8366SXinying Yu 89802fe8366SXinying Yu static int 89902fe8366SXinying Yu nfp_vdpa_setup_vring_relay(struct nfp_vdpa_dev *device) 90002fe8366SXinying Yu { 90102fe8366SXinying Yu int ret; 90202fe8366SXinying Yu char name[RTE_THREAD_INTERNAL_NAME_SIZE]; 90302fe8366SXinying Yu 90402fe8366SXinying Yu snprintf(name, sizeof(name), "nfp_vring%d", device->vid); 90502fe8366SXinying Yu ret = rte_thread_create_internal_control(&device->tid, name, 90602fe8366SXinying Yu nfp_vdpa_vring_relay, (void *)device); 90702fe8366SXinying Yu if (ret != 0) { 90802fe8366SXinying Yu DRV_VDPA_LOG(ERR, "Failed to create vring relay pthread."); 90902fe8366SXinying Yu return -EPERM; 91002fe8366SXinying Yu } 91102fe8366SXinying Yu 91202fe8366SXinying Yu return 0; 91302fe8366SXinying Yu } 91402fe8366SXinying Yu 91502fe8366SXinying Yu static int 91694fde3a7SXinying Yu nfp_vdpa_sw_fallback(struct nfp_vdpa_dev *device) 91794fde3a7SXinying Yu { 91894fde3a7SXinying Yu int ret; 91994fde3a7SXinying Yu int vid = device->vid; 92094fde3a7SXinying Yu 92194fde3a7SXinying Yu /* Stop the direct IO data path */ 92294fde3a7SXinying Yu nfp_vdpa_unset_notify_relay(device); 92394fde3a7SXinying Yu nfp_vdpa_disable_vfio_intr(device); 92494fde3a7SXinying Yu 92594fde3a7SXinying Yu ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false); 92694fde3a7SXinying Yu if ((ret != 0) && (ret != -ENOTSUP)) { 92794fde3a7SXinying Yu DRV_VDPA_LOG(ERR, "Unset the host notifier failed."); 92894fde3a7SXinying Yu goto error; 92994fde3a7SXinying Yu } 93094fde3a7SXinying Yu 93110421b0dSXinying Yu /* Setup interrupt for vring relay */ 93210421b0dSXinying Yu ret = nfp_vdpa_enable_vfio_intr(device, true); 93310421b0dSXinying Yu if (ret != 0) 93410421b0dSXinying Yu goto error; 93510421b0dSXinying Yu 936e6ac31e0SXinying Yu /* Config the VF */ 937e6ac31e0SXinying Yu ret = nfp_vdpa_start(device, true); 938e6ac31e0SXinying Yu if (ret != 0) 939e6ac31e0SXinying Yu goto unset_intr; 940e6ac31e0SXinying Yu 94102fe8366SXinying Yu /* Setup vring relay thread */ 94202fe8366SXinying Yu ret = nfp_vdpa_setup_vring_relay(device); 94302fe8366SXinying Yu if (ret != 0) 94402fe8366SXinying Yu goto stop_vf; 94502fe8366SXinying Yu 94694fde3a7SXinying Yu device->hw.sw_fallback_running = true; 94794fde3a7SXinying Yu 94894fde3a7SXinying Yu return 0; 94994fde3a7SXinying Yu 95002fe8366SXinying Yu stop_vf: 95102fe8366SXinying Yu nfp_vdpa_stop(device, true); 952e6ac31e0SXinying Yu unset_intr: 953e6ac31e0SXinying Yu nfp_vdpa_disable_vfio_intr(device); 95494fde3a7SXinying Yu error: 95594fde3a7SXinying Yu return ret; 95694fde3a7SXinying Yu } 95794fde3a7SXinying Yu 95894fde3a7SXinying Yu static int 9590141f545SChaoyong He nfp_vdpa_dev_config(int vid) 9600141f545SChaoyong He { 9610141f545SChaoyong He int ret; 9620141f545SChaoyong He struct nfp_vdpa_dev *device; 9630141f545SChaoyong He struct rte_vdpa_device *vdev; 9640141f545SChaoyong He struct nfp_vdpa_dev_node *node; 9650141f545SChaoyong He 9660141f545SChaoyong He vdev = rte_vhost_get_vdpa_device(vid); 9670141f545SChaoyong He node = nfp_vdpa_find_node_by_vdev(vdev); 9680141f545SChaoyong He if (node == NULL) { 969*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev); 9700141f545SChaoyong He return -ENODEV; 9710141f545SChaoyong He } 9720141f545SChaoyong He 9730141f545SChaoyong He device = node->device; 9740141f545SChaoyong He device->vid = vid; 9750141f545SChaoyong He rte_atomic_store_explicit(&device->dev_attached, 1, rte_memory_order_relaxed); 9760141f545SChaoyong He update_datapath(device); 9770141f545SChaoyong He 9780141f545SChaoyong He ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true); 9790141f545SChaoyong He if (ret != 0) 9800141f545SChaoyong He DRV_VDPA_LOG(INFO, "vDPA (%s): software relay is used.", 9810141f545SChaoyong He vdev->device->name); 9820141f545SChaoyong He 9830141f545SChaoyong He return 0; 9840141f545SChaoyong He } 9850141f545SChaoyong He 9860141f545SChaoyong He static int 9870141f545SChaoyong He nfp_vdpa_dev_close(int vid) 9880141f545SChaoyong He { 9890141f545SChaoyong He struct nfp_vdpa_dev *device; 9900141f545SChaoyong He struct rte_vdpa_device *vdev; 9910141f545SChaoyong He struct nfp_vdpa_dev_node *node; 9920141f545SChaoyong He 9930141f545SChaoyong He vdev = rte_vhost_get_vdpa_device(vid); 9940141f545SChaoyong He node = nfp_vdpa_find_node_by_vdev(vdev); 9950141f545SChaoyong He if (node == NULL) { 996*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev); 9970141f545SChaoyong He return -ENODEV; 9980141f545SChaoyong He } 9990141f545SChaoyong He 10000141f545SChaoyong He device = node->device; 100194fde3a7SXinying Yu if (device->hw.sw_fallback_running) { 1002e6ac31e0SXinying Yu /* Reset VF */ 1003e6ac31e0SXinying Yu nfp_vdpa_stop(device, true); 1004e6ac31e0SXinying Yu 100502fe8366SXinying Yu /* Remove interrupt setting */ 100602fe8366SXinying Yu nfp_vdpa_disable_vfio_intr(device); 100702fe8366SXinying Yu 100802fe8366SXinying Yu /* Unset DMA map for guest memory */ 100902fe8366SXinying Yu nfp_vdpa_dma_map(device, false); 101002fe8366SXinying Yu 101194fde3a7SXinying Yu device->hw.sw_fallback_running = false; 101294fde3a7SXinying Yu 101394fde3a7SXinying Yu rte_atomic_store_explicit(&device->dev_attached, 0, 101494fde3a7SXinying Yu rte_memory_order_relaxed); 101594fde3a7SXinying Yu rte_atomic_store_explicit(&device->running, 0, 101694fde3a7SXinying Yu rte_memory_order_relaxed); 101794fde3a7SXinying Yu } else { 101894fde3a7SXinying Yu rte_atomic_store_explicit(&device->dev_attached, 0, 101994fde3a7SXinying Yu rte_memory_order_relaxed); 10200141f545SChaoyong He update_datapath(device); 102194fde3a7SXinying Yu } 10220141f545SChaoyong He 10230141f545SChaoyong He return 0; 10240141f545SChaoyong He } 10250141f545SChaoyong He 10260141f545SChaoyong He static int 10270141f545SChaoyong He nfp_vdpa_get_vfio_group_fd(int vid) 10280141f545SChaoyong He { 10290141f545SChaoyong He struct rte_vdpa_device *vdev; 10300141f545SChaoyong He struct nfp_vdpa_dev_node *node; 10310141f545SChaoyong He 10320141f545SChaoyong He vdev = rte_vhost_get_vdpa_device(vid); 10330141f545SChaoyong He node = nfp_vdpa_find_node_by_vdev(vdev); 10340141f545SChaoyong He if (node == NULL) { 1035*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev); 10360141f545SChaoyong He return -ENODEV; 10370141f545SChaoyong He } 10380141f545SChaoyong He 10390141f545SChaoyong He return node->device->vfio_group_fd; 10400141f545SChaoyong He } 10410141f545SChaoyong He 10420141f545SChaoyong He static int 10430141f545SChaoyong He nfp_vdpa_get_vfio_device_fd(int vid) 10440141f545SChaoyong He { 10450141f545SChaoyong He struct rte_vdpa_device *vdev; 10460141f545SChaoyong He struct nfp_vdpa_dev_node *node; 10470141f545SChaoyong He 10480141f545SChaoyong He vdev = rte_vhost_get_vdpa_device(vid); 10490141f545SChaoyong He node = nfp_vdpa_find_node_by_vdev(vdev); 10500141f545SChaoyong He if (node == NULL) { 1051*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev); 10520141f545SChaoyong He return -ENODEV; 10530141f545SChaoyong He } 10540141f545SChaoyong He 10550141f545SChaoyong He return node->device->vfio_dev_fd; 10560141f545SChaoyong He } 10570141f545SChaoyong He 10580141f545SChaoyong He static int 10590141f545SChaoyong He nfp_vdpa_get_notify_area(int vid, 10600141f545SChaoyong He int qid, 10610141f545SChaoyong He uint64_t *offset, 10620141f545SChaoyong He uint64_t *size) 10630141f545SChaoyong He { 10640141f545SChaoyong He int ret; 10650141f545SChaoyong He struct nfp_vdpa_dev *device; 10660141f545SChaoyong He struct rte_vdpa_device *vdev; 10670141f545SChaoyong He struct nfp_vdpa_dev_node *node; 10680141f545SChaoyong He struct vfio_region_info region = { 10690141f545SChaoyong He .argsz = sizeof(region) 10700141f545SChaoyong He }; 10710141f545SChaoyong He 10720141f545SChaoyong He vdev = rte_vhost_get_vdpa_device(vid); 10730141f545SChaoyong He node = nfp_vdpa_find_node_by_vdev(vdev); 10740141f545SChaoyong He if (node == NULL) { 10750141f545SChaoyong He DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p", vdev); 10760141f545SChaoyong He return -ENODEV; 10770141f545SChaoyong He } 10780141f545SChaoyong He 10790141f545SChaoyong He device = node->device; 10800141f545SChaoyong He region.index = device->hw.notify_region; 10810141f545SChaoyong He 10820141f545SChaoyong He ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®ion); 10830141f545SChaoyong He if (ret != 0) { 10840141f545SChaoyong He DRV_VDPA_LOG(ERR, "Get not get device region info."); 10850141f545SChaoyong He return -EIO; 10860141f545SChaoyong He } 10870141f545SChaoyong He 10880141f545SChaoyong He *offset = nfp_vdpa_get_queue_notify_offset(&device->hw, qid) + region.offset; 10890141f545SChaoyong He *size = NFP_VDPA_NOTIFY_ADDR_INTERVAL; 10900141f545SChaoyong He 10910141f545SChaoyong He return 0; 10920141f545SChaoyong He } 10930141f545SChaoyong He 10940141f545SChaoyong He static int 10950141f545SChaoyong He nfp_vdpa_get_queue_num(struct rte_vdpa_device *vdev, 10960141f545SChaoyong He uint32_t *queue_num) 10970141f545SChaoyong He { 10980141f545SChaoyong He struct nfp_vdpa_dev_node *node; 10990141f545SChaoyong He 11000141f545SChaoyong He node = nfp_vdpa_find_node_by_vdev(vdev); 11010141f545SChaoyong He if (node == NULL) { 1102*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev); 11030141f545SChaoyong He return -ENODEV; 11040141f545SChaoyong He } 11050141f545SChaoyong He 11060141f545SChaoyong He *queue_num = node->device->max_queues; 11070141f545SChaoyong He 11080141f545SChaoyong He return 0; 11090141f545SChaoyong He } 11100141f545SChaoyong He 11110141f545SChaoyong He static int 11120141f545SChaoyong He nfp_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, 11130141f545SChaoyong He uint64_t *features) 11140141f545SChaoyong He { 11150141f545SChaoyong He struct nfp_vdpa_dev_node *node; 11160141f545SChaoyong He 11170141f545SChaoyong He node = nfp_vdpa_find_node_by_vdev(vdev); 11180141f545SChaoyong He if (node == NULL) { 11190141f545SChaoyong He DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p", vdev); 11200141f545SChaoyong He return -ENODEV; 11210141f545SChaoyong He } 11220141f545SChaoyong He 11230141f545SChaoyong He *features = node->device->hw.features; 11240141f545SChaoyong He 11250141f545SChaoyong He return 0; 11260141f545SChaoyong He } 11270141f545SChaoyong He 11280141f545SChaoyong He static int 11290141f545SChaoyong He nfp_vdpa_get_protocol_features(struct rte_vdpa_device *vdev __rte_unused, 11300141f545SChaoyong He uint64_t *features) 11310141f545SChaoyong He { 11320141f545SChaoyong He *features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | 11330141f545SChaoyong He 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK | 11340141f545SChaoyong He 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ | 11350141f545SChaoyong He 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD | 11360141f545SChaoyong He 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER; 11370141f545SChaoyong He 11380141f545SChaoyong He return 0; 11390141f545SChaoyong He } 11400141f545SChaoyong He 11410141f545SChaoyong He static int 11420141f545SChaoyong He nfp_vdpa_set_features(int32_t vid) 11430141f545SChaoyong He { 114494fde3a7SXinying Yu int ret; 114594fde3a7SXinying Yu uint64_t features = 0; 114694fde3a7SXinying Yu struct nfp_vdpa_dev *device; 114794fde3a7SXinying Yu struct rte_vdpa_device *vdev; 114894fde3a7SXinying Yu struct nfp_vdpa_dev_node *node; 114994fde3a7SXinying Yu 1150*b6de4353SZerun Fu DRV_VDPA_LOG(DEBUG, "Start vid=%d.", vid); 115194fde3a7SXinying Yu 115294fde3a7SXinying Yu vdev = rte_vhost_get_vdpa_device(vid); 115394fde3a7SXinying Yu node = nfp_vdpa_find_node_by_vdev(vdev); 115494fde3a7SXinying Yu if (node == NULL) { 1155*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev); 115694fde3a7SXinying Yu return -ENODEV; 115794fde3a7SXinying Yu } 115894fde3a7SXinying Yu 115994fde3a7SXinying Yu rte_vhost_get_negotiated_features(vid, &features); 116094fde3a7SXinying Yu 116194fde3a7SXinying Yu if (RTE_VHOST_NEED_LOG(features) == 0) 116294fde3a7SXinying Yu return 0; 116394fde3a7SXinying Yu 116494fde3a7SXinying Yu device = node->device; 116594fde3a7SXinying Yu if (device->hw.sw_lm) { 116694fde3a7SXinying Yu ret = nfp_vdpa_sw_fallback(device); 116794fde3a7SXinying Yu if (ret != 0) { 1168*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Software fallback start failed."); 116994fde3a7SXinying Yu return -1; 117094fde3a7SXinying Yu } 117194fde3a7SXinying Yu } 117294fde3a7SXinying Yu 11730141f545SChaoyong He return 0; 11740141f545SChaoyong He } 11750141f545SChaoyong He 11760141f545SChaoyong He static int 11770141f545SChaoyong He nfp_vdpa_set_vring_state(int vid, 11780141f545SChaoyong He int vring, 11790141f545SChaoyong He int state) 11800141f545SChaoyong He { 1181*b6de4353SZerun Fu DRV_VDPA_LOG(DEBUG, "Start vid=%d, vring=%d, state=%d.", vid, vring, state); 11820141f545SChaoyong He return 0; 11830141f545SChaoyong He } 11840141f545SChaoyong He 11857f11d166SChaoyong He struct rte_vdpa_dev_ops nfp_vdpa_ops = { 11860141f545SChaoyong He .get_queue_num = nfp_vdpa_get_queue_num, 11870141f545SChaoyong He .get_features = nfp_vdpa_get_vdpa_features, 11880141f545SChaoyong He .get_protocol_features = nfp_vdpa_get_protocol_features, 11890141f545SChaoyong He .dev_conf = nfp_vdpa_dev_config, 11900141f545SChaoyong He .dev_close = nfp_vdpa_dev_close, 11910141f545SChaoyong He .set_vring_state = nfp_vdpa_set_vring_state, 11920141f545SChaoyong He .set_features = nfp_vdpa_set_features, 11930141f545SChaoyong He .get_vfio_group_fd = nfp_vdpa_get_vfio_group_fd, 11940141f545SChaoyong He .get_vfio_device_fd = nfp_vdpa_get_vfio_device_fd, 11950141f545SChaoyong He .get_notify_area = nfp_vdpa_get_notify_area, 11967f11d166SChaoyong He }; 11977f11d166SChaoyong He 11987f11d166SChaoyong He static int 11997f11d166SChaoyong He nfp_vdpa_pci_probe(struct rte_pci_device *pci_dev) 12007f11d166SChaoyong He { 12017b2a1228SChaoyong He int ret; 12027f11d166SChaoyong He struct nfp_vdpa_dev *device; 12037f11d166SChaoyong He struct nfp_vdpa_dev_node *node; 12047f11d166SChaoyong He 12057f11d166SChaoyong He if (rte_eal_process_type() != RTE_PROC_PRIMARY) 12067f11d166SChaoyong He return 0; 12077f11d166SChaoyong He 12087f11d166SChaoyong He node = calloc(1, sizeof(*node)); 12097f11d166SChaoyong He if (node == NULL) 12107f11d166SChaoyong He return -ENOMEM; 12117f11d166SChaoyong He 12127f11d166SChaoyong He device = calloc(1, sizeof(*device)); 12137f11d166SChaoyong He if (device == NULL) 12147f11d166SChaoyong He goto free_node; 12157f11d166SChaoyong He 12167f11d166SChaoyong He device->pci_dev = pci_dev; 12177f11d166SChaoyong He 12187b2a1228SChaoyong He ret = nfp_vdpa_vfio_setup(device); 12197b2a1228SChaoyong He if (ret != 0) 12207b2a1228SChaoyong He goto free_device; 12217b2a1228SChaoyong He 1222d89f4990SChaoyong He ret = nfp_vdpa_hw_init(&device->hw, pci_dev); 1223d89f4990SChaoyong He if (ret != 0) 1224d89f4990SChaoyong He goto vfio_teardown; 1225d89f4990SChaoyong He 1226d89f4990SChaoyong He device->max_queues = NFP_VDPA_MAX_QUEUES; 1227d89f4990SChaoyong He 12287f11d166SChaoyong He device->vdev = rte_vdpa_register_device(&pci_dev->device, &nfp_vdpa_ops); 12297f11d166SChaoyong He if (device->vdev == NULL) { 1230*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Failed to register device %s.", pci_dev->name); 12317b2a1228SChaoyong He goto vfio_teardown; 12327f11d166SChaoyong He } 12337f11d166SChaoyong He 12347f11d166SChaoyong He node->device = device; 12357f11d166SChaoyong He pthread_mutex_lock(&vdpa_list_lock); 12367f11d166SChaoyong He TAILQ_INSERT_TAIL(&vdpa_dev_list, node, next); 12377f11d166SChaoyong He pthread_mutex_unlock(&vdpa_list_lock); 12387f11d166SChaoyong He 1239b47a0373SChaoyong He rte_spinlock_init(&device->lock); 1240b47a0373SChaoyong He rte_atomic_store_explicit(&device->started, 1, rte_memory_order_relaxed); 1241b47a0373SChaoyong He update_datapath(device); 1242b47a0373SChaoyong He 12437f11d166SChaoyong He return 0; 12447f11d166SChaoyong He 12457b2a1228SChaoyong He vfio_teardown: 12467b2a1228SChaoyong He nfp_vdpa_vfio_teardown(device); 12477f11d166SChaoyong He free_device: 12487f11d166SChaoyong He free(device); 12497f11d166SChaoyong He free_node: 12507f11d166SChaoyong He free(node); 12517f11d166SChaoyong He 12527f11d166SChaoyong He return -1; 12537f11d166SChaoyong He } 12547f11d166SChaoyong He 12557f11d166SChaoyong He static int 12567f11d166SChaoyong He nfp_vdpa_pci_remove(struct rte_pci_device *pci_dev) 12577f11d166SChaoyong He { 12587f11d166SChaoyong He struct nfp_vdpa_dev *device; 12597f11d166SChaoyong He struct nfp_vdpa_dev_node *node; 12607f11d166SChaoyong He 12617f11d166SChaoyong He if (rte_eal_process_type() != RTE_PROC_PRIMARY) 12627f11d166SChaoyong He return 0; 12637f11d166SChaoyong He 12647f11d166SChaoyong He node = nfp_vdpa_find_node_by_pdev(pci_dev); 12657f11d166SChaoyong He if (node == NULL) { 1266*b6de4353SZerun Fu DRV_VDPA_LOG(ERR, "Invalid device: %s.", pci_dev->name); 12677f11d166SChaoyong He return -ENODEV; 12687f11d166SChaoyong He } 12697f11d166SChaoyong He 12707f11d166SChaoyong He device = node->device; 12717f11d166SChaoyong He 1272b47a0373SChaoyong He rte_atomic_store_explicit(&device->started, 0, rte_memory_order_relaxed); 1273b47a0373SChaoyong He update_datapath(device); 1274b47a0373SChaoyong He 12757f11d166SChaoyong He pthread_mutex_lock(&vdpa_list_lock); 12767f11d166SChaoyong He TAILQ_REMOVE(&vdpa_dev_list, node, next); 12777f11d166SChaoyong He pthread_mutex_unlock(&vdpa_list_lock); 12787f11d166SChaoyong He 12797f11d166SChaoyong He rte_vdpa_unregister_device(device->vdev); 12807b2a1228SChaoyong He nfp_vdpa_vfio_teardown(device); 12817f11d166SChaoyong He 12827f11d166SChaoyong He free(device); 12837f11d166SChaoyong He free(node); 12847f11d166SChaoyong He 12857f11d166SChaoyong He return 0; 12867f11d166SChaoyong He } 12877f11d166SChaoyong He 12887f11d166SChaoyong He static const struct rte_pci_id pci_id_nfp_vdpa_map[] = { 12897f11d166SChaoyong He { 12907f11d166SChaoyong He RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME, 12917f11d166SChaoyong He PCI_DEVICE_ID_NFP6000_VF_NIC) 12927f11d166SChaoyong He }, 12937f11d166SChaoyong He { 12947f11d166SChaoyong He .vendor_id = 0, 12957f11d166SChaoyong He }, 12967f11d166SChaoyong He }; 12977f11d166SChaoyong He 12987f11d166SChaoyong He static struct nfp_class_driver nfp_vdpa = { 12997f11d166SChaoyong He .drv_class = NFP_CLASS_VDPA, 13007f11d166SChaoyong He .name = RTE_STR(NFP_VDPA_DRIVER_NAME), 13017f11d166SChaoyong He .id_table = pci_id_nfp_vdpa_map, 13027f11d166SChaoyong He .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, 13037f11d166SChaoyong He .probe = nfp_vdpa_pci_probe, 13047f11d166SChaoyong He .remove = nfp_vdpa_pci_remove, 13057f11d166SChaoyong He }; 13067f11d166SChaoyong He 13077f11d166SChaoyong He RTE_INIT(nfp_vdpa_init) 13087f11d166SChaoyong He { 13097f11d166SChaoyong He nfp_class_driver_register(&nfp_vdpa); 13107f11d166SChaoyong He } 13117f11d166SChaoyong He 13127f11d166SChaoyong He RTE_PMD_REGISTER_PCI_TABLE(NFP_VDPA_DRIVER_NAME, pci_id_nfp_vdpa_map); 13137f11d166SChaoyong He RTE_PMD_REGISTER_KMOD_DEP(NFP_VDPA_DRIVER_NAME, "* vfio-pci"); 1314