xref: /dpdk/drivers/vdpa/nfp/nfp_vdpa.c (revision b6de43530dfa30cbf6b70857e3835099701063d4)
17f11d166SChaoyong He /* SPDX-License-Identifier: BSD-3-Clause
27f11d166SChaoyong He  * Copyright (c) 2023 Corigine, Inc.
37f11d166SChaoyong He  * All rights reserved.
47f11d166SChaoyong He  */
57f11d166SChaoyong He 
67f11d166SChaoyong He #include <pthread.h>
776ea5ebeSChaoyong He #include <sys/epoll.h>
8b47a0373SChaoyong He #include <sys/ioctl.h>
976ea5ebeSChaoyong He #include <unistd.h>
107f11d166SChaoyong He 
117f11d166SChaoyong He #include <nfp_common_pci.h>
127f11d166SChaoyong He #include <nfp_dev.h>
137b2a1228SChaoyong He #include <rte_vfio.h>
14e6ac31e0SXinying Yu #include <rte_eal_paging.h>
15e6ac31e0SXinying Yu #include <rte_malloc.h>
167f11d166SChaoyong He #include <vdpa_driver.h>
177f11d166SChaoyong He 
18d89f4990SChaoyong He #include "nfp_vdpa_core.h"
197f11d166SChaoyong He #include "nfp_vdpa_log.h"
207f11d166SChaoyong He 
217f11d166SChaoyong He #define NFP_VDPA_DRIVER_NAME nfp_vdpa
227f11d166SChaoyong He 
23b47a0373SChaoyong He #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
24b47a0373SChaoyong He 		sizeof(int) * (NFP_VDPA_MAX_QUEUES * 2 + 1))
25b47a0373SChaoyong He 
26e6ac31e0SXinying Yu #define NFP_VDPA_USED_RING_LEN(size) \
27e6ac31e0SXinying Yu 		((size) * sizeof(struct vring_used_elem) + sizeof(struct vring_used))
28e6ac31e0SXinying Yu 
2902fe8366SXinying Yu #define EPOLL_DATA_INTR        1
3002fe8366SXinying Yu 
317f11d166SChaoyong He struct nfp_vdpa_dev {
327f11d166SChaoyong He 	struct rte_pci_device *pci_dev;
337f11d166SChaoyong He 	struct rte_vdpa_device *vdev;
34d89f4990SChaoyong He 	struct nfp_vdpa_hw hw;
357b2a1228SChaoyong He 
367b2a1228SChaoyong He 	int vfio_container_fd;
377b2a1228SChaoyong He 	int vfio_group_fd;
387b2a1228SChaoyong He 	int vfio_dev_fd;
397b2a1228SChaoyong He 	int iommu_group;
40d89f4990SChaoyong He 
4176ea5ebeSChaoyong He 	rte_thread_t tid;    /**< Thread for notify relay */
4276ea5ebeSChaoyong He 	int epoll_fd;
4376ea5ebeSChaoyong He 
44b47a0373SChaoyong He 	int vid;
45d89f4990SChaoyong He 	uint16_t max_queues;
46b47a0373SChaoyong He 	RTE_ATOMIC(uint32_t) started;
47b47a0373SChaoyong He 	RTE_ATOMIC(uint32_t) dev_attached;
48b47a0373SChaoyong He 	RTE_ATOMIC(uint32_t) running;
49b47a0373SChaoyong He 	rte_spinlock_t lock;
50b47a0373SChaoyong He 
51b47a0373SChaoyong He 	/** Eventfd for used ring interrupt */
52b47a0373SChaoyong He 	int intr_fd[NFP_VDPA_MAX_QUEUES * 2];
537f11d166SChaoyong He };
547f11d166SChaoyong He 
557f11d166SChaoyong He struct nfp_vdpa_dev_node {
567f11d166SChaoyong He 	TAILQ_ENTRY(nfp_vdpa_dev_node) next;
577f11d166SChaoyong He 	struct nfp_vdpa_dev *device;
587f11d166SChaoyong He };
597f11d166SChaoyong He 
607f11d166SChaoyong He TAILQ_HEAD(vdpa_dev_list_head, nfp_vdpa_dev_node);
617f11d166SChaoyong He 
627f11d166SChaoyong He static struct vdpa_dev_list_head vdpa_dev_list =
637f11d166SChaoyong He 	TAILQ_HEAD_INITIALIZER(vdpa_dev_list);
647f11d166SChaoyong He 
657f11d166SChaoyong He static pthread_mutex_t vdpa_list_lock = PTHREAD_MUTEX_INITIALIZER;
667f11d166SChaoyong He 
677f11d166SChaoyong He static struct nfp_vdpa_dev_node *
680141f545SChaoyong He nfp_vdpa_find_node_by_vdev(struct rte_vdpa_device *vdev)
690141f545SChaoyong He {
700141f545SChaoyong He 	bool found = false;
710141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
720141f545SChaoyong He 
730141f545SChaoyong He 	pthread_mutex_lock(&vdpa_list_lock);
740141f545SChaoyong He 
750141f545SChaoyong He 	TAILQ_FOREACH(node, &vdpa_dev_list, next) {
760141f545SChaoyong He 		if (vdev == node->device->vdev) {
770141f545SChaoyong He 			found = true;
780141f545SChaoyong He 			break;
790141f545SChaoyong He 		}
800141f545SChaoyong He 	}
810141f545SChaoyong He 
820141f545SChaoyong He 	pthread_mutex_unlock(&vdpa_list_lock);
830141f545SChaoyong He 
840141f545SChaoyong He 	if (found)
850141f545SChaoyong He 		return node;
860141f545SChaoyong He 
870141f545SChaoyong He 	return NULL;
880141f545SChaoyong He }
890141f545SChaoyong He 
900141f545SChaoyong He static struct nfp_vdpa_dev_node *
917f11d166SChaoyong He nfp_vdpa_find_node_by_pdev(struct rte_pci_device *pdev)
927f11d166SChaoyong He {
937f11d166SChaoyong He 	bool found = false;
947f11d166SChaoyong He 	struct nfp_vdpa_dev_node *node;
957f11d166SChaoyong He 
967f11d166SChaoyong He 	pthread_mutex_lock(&vdpa_list_lock);
977f11d166SChaoyong He 
987f11d166SChaoyong He 	TAILQ_FOREACH(node, &vdpa_dev_list, next) {
997f11d166SChaoyong He 		if (pdev == node->device->pci_dev) {
1007f11d166SChaoyong He 			found = true;
1017f11d166SChaoyong He 			break;
1027f11d166SChaoyong He 		}
1037f11d166SChaoyong He 	}
1047f11d166SChaoyong He 
1057f11d166SChaoyong He 	pthread_mutex_unlock(&vdpa_list_lock);
1067f11d166SChaoyong He 
1077f11d166SChaoyong He 	if (found)
1087f11d166SChaoyong He 		return node;
1097f11d166SChaoyong He 
1107f11d166SChaoyong He 	return NULL;
1117f11d166SChaoyong He }
1127f11d166SChaoyong He 
1137b2a1228SChaoyong He static int
1147b2a1228SChaoyong He nfp_vdpa_vfio_setup(struct nfp_vdpa_dev *device)
1157b2a1228SChaoyong He {
1167b2a1228SChaoyong He 	int ret;
1177b2a1228SChaoyong He 	char dev_name[RTE_DEV_NAME_MAX_LEN] = {0};
1187b2a1228SChaoyong He 	struct rte_pci_device *pci_dev = device->pci_dev;
1197b2a1228SChaoyong He 
1207b2a1228SChaoyong He 	rte_pci_unmap_device(pci_dev);
1217b2a1228SChaoyong He 
1227b2a1228SChaoyong He 	rte_pci_device_name(&pci_dev->addr, dev_name, RTE_DEV_NAME_MAX_LEN);
1231a2bb56aSChaoyong He 	ret = rte_vfio_get_group_num(rte_pci_get_sysfs_path(), dev_name,
1247b2a1228SChaoyong He 			&device->iommu_group);
1251a2bb56aSChaoyong He 	if (ret <= 0)
1261a2bb56aSChaoyong He 		return -1;
1277b2a1228SChaoyong He 
1287b2a1228SChaoyong He 	device->vfio_container_fd = rte_vfio_container_create();
1297b2a1228SChaoyong He 	if (device->vfio_container_fd < 0)
1307b2a1228SChaoyong He 		return -1;
1317b2a1228SChaoyong He 
1327b2a1228SChaoyong He 	device->vfio_group_fd = rte_vfio_container_group_bind(
1337b2a1228SChaoyong He 			device->vfio_container_fd, device->iommu_group);
1347b2a1228SChaoyong He 	if (device->vfio_group_fd < 0)
1357b2a1228SChaoyong He 		goto container_destroy;
1367b2a1228SChaoyong He 
137*b6de4353SZerun Fu 	DRV_VDPA_LOG(DEBUG, "The container_fd=%d, group_fd=%d.",
1387b2a1228SChaoyong He 			device->vfio_container_fd, device->vfio_group_fd);
1397b2a1228SChaoyong He 
1407b2a1228SChaoyong He 	ret = rte_pci_map_device(pci_dev);
1417b2a1228SChaoyong He 	if (ret != 0)
1427b2a1228SChaoyong He 		goto group_unbind;
1437b2a1228SChaoyong He 
1447b2a1228SChaoyong He 	device->vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
1457b2a1228SChaoyong He 
1467b2a1228SChaoyong He 	return 0;
1477b2a1228SChaoyong He 
1487b2a1228SChaoyong He group_unbind:
1497b2a1228SChaoyong He 	rte_vfio_container_group_unbind(device->vfio_container_fd, device->iommu_group);
1507b2a1228SChaoyong He container_destroy:
1517b2a1228SChaoyong He 	rte_vfio_container_destroy(device->vfio_container_fd);
1527b2a1228SChaoyong He 
1537b2a1228SChaoyong He 	return -1;
1547b2a1228SChaoyong He }
1557b2a1228SChaoyong He 
1567b2a1228SChaoyong He static void
1577b2a1228SChaoyong He nfp_vdpa_vfio_teardown(struct nfp_vdpa_dev *device)
1587b2a1228SChaoyong He {
1597b2a1228SChaoyong He 	rte_pci_unmap_device(device->pci_dev);
1607b2a1228SChaoyong He 	rte_vfio_container_group_unbind(device->vfio_container_fd, device->iommu_group);
1617b2a1228SChaoyong He 	rte_vfio_container_destroy(device->vfio_container_fd);
1627b2a1228SChaoyong He }
1637b2a1228SChaoyong He 
164b47a0373SChaoyong He static int
165b47a0373SChaoyong He nfp_vdpa_dma_do_unmap(struct rte_vhost_memory *mem,
166b47a0373SChaoyong He 		uint32_t times,
167b47a0373SChaoyong He 		int vfio_container_fd)
168b47a0373SChaoyong He {
169b47a0373SChaoyong He 	uint32_t i;
170b47a0373SChaoyong He 	int ret = 0;
171b47a0373SChaoyong He 	struct rte_vhost_mem_region *region;
172b47a0373SChaoyong He 
173b47a0373SChaoyong He 	for (i = 0; i < times; i++) {
174b47a0373SChaoyong He 		region = &mem->regions[i];
175b47a0373SChaoyong He 
176b47a0373SChaoyong He 		ret = rte_vfio_container_dma_unmap(vfio_container_fd,
177b47a0373SChaoyong He 				region->host_user_addr, region->guest_phys_addr,
178b47a0373SChaoyong He 				region->size);
179b47a0373SChaoyong He 		if (ret < 0) {
180b47a0373SChaoyong He 			/* Here should not return, even error happened. */
181*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "DMA unmap failed. Times: %u.", i);
182b47a0373SChaoyong He 		}
183b47a0373SChaoyong He 	}
184b47a0373SChaoyong He 
185b47a0373SChaoyong He 	return ret;
186b47a0373SChaoyong He }
187b47a0373SChaoyong He 
188b47a0373SChaoyong He static int
189b47a0373SChaoyong He nfp_vdpa_dma_do_map(struct rte_vhost_memory *mem,
190b47a0373SChaoyong He 		uint32_t times,
191b47a0373SChaoyong He 		int vfio_container_fd)
192b47a0373SChaoyong He {
193b47a0373SChaoyong He 	int ret;
194b47a0373SChaoyong He 	uint32_t i;
195b47a0373SChaoyong He 	struct rte_vhost_mem_region *region;
196b47a0373SChaoyong He 
197b47a0373SChaoyong He 	for (i = 0; i < times; i++) {
198b47a0373SChaoyong He 		region = &mem->regions[i];
199b47a0373SChaoyong He 
200b47a0373SChaoyong He 		ret = rte_vfio_container_dma_map(vfio_container_fd,
201b47a0373SChaoyong He 				region->host_user_addr, region->guest_phys_addr,
202b47a0373SChaoyong He 				region->size);
203b47a0373SChaoyong He 		if (ret < 0) {
204b47a0373SChaoyong He 			DRV_VDPA_LOG(ERR, "DMA map failed.");
205b47a0373SChaoyong He 			nfp_vdpa_dma_do_unmap(mem, i, vfio_container_fd);
206b47a0373SChaoyong He 			return ret;
207b47a0373SChaoyong He 		}
208b47a0373SChaoyong He 	}
209b47a0373SChaoyong He 
210b47a0373SChaoyong He 	return 0;
211b47a0373SChaoyong He }
212b47a0373SChaoyong He 
213b47a0373SChaoyong He static int
214b47a0373SChaoyong He nfp_vdpa_dma_map(struct nfp_vdpa_dev *device,
215b47a0373SChaoyong He 		bool do_map)
216b47a0373SChaoyong He {
217b47a0373SChaoyong He 	int ret;
218b47a0373SChaoyong He 	int vfio_container_fd;
219b47a0373SChaoyong He 	struct rte_vhost_memory *mem = NULL;
220b47a0373SChaoyong He 
221b47a0373SChaoyong He 	ret = rte_vhost_get_mem_table(device->vid, &mem);
222b47a0373SChaoyong He 	if (ret < 0) {
223b47a0373SChaoyong He 		DRV_VDPA_LOG(ERR, "Failed to get memory layout.");
224b47a0373SChaoyong He 		return ret;
225b47a0373SChaoyong He 	}
226b47a0373SChaoyong He 
227b47a0373SChaoyong He 	vfio_container_fd = device->vfio_container_fd;
228*b6de4353SZerun Fu 	DRV_VDPA_LOG(DEBUG, "The vfio_container_fd %d.", vfio_container_fd);
229b47a0373SChaoyong He 
230b47a0373SChaoyong He 	if (do_map)
231b47a0373SChaoyong He 		ret = nfp_vdpa_dma_do_map(mem, mem->nregions, vfio_container_fd);
232b47a0373SChaoyong He 	else
233b47a0373SChaoyong He 		ret = nfp_vdpa_dma_do_unmap(mem, mem->nregions, vfio_container_fd);
234b47a0373SChaoyong He 
235b47a0373SChaoyong He 	free(mem);
236b47a0373SChaoyong He 
237b47a0373SChaoyong He 	return ret;
238b47a0373SChaoyong He }
239b47a0373SChaoyong He 
240b47a0373SChaoyong He static uint64_t
241b47a0373SChaoyong He nfp_vdpa_qva_to_gpa(int vid,
242b47a0373SChaoyong He 		uint64_t qva)
243b47a0373SChaoyong He {
244b47a0373SChaoyong He 	int ret;
245b47a0373SChaoyong He 	uint32_t i;
246b47a0373SChaoyong He 	uint64_t gpa = 0;
247b47a0373SChaoyong He 	struct rte_vhost_memory *mem = NULL;
248b47a0373SChaoyong He 	struct rte_vhost_mem_region *region;
249b47a0373SChaoyong He 
250b47a0373SChaoyong He 	ret = rte_vhost_get_mem_table(vid, &mem);
251b47a0373SChaoyong He 	if (ret < 0) {
252b47a0373SChaoyong He 		DRV_VDPA_LOG(ERR, "Failed to get memory layout.");
253b47a0373SChaoyong He 		return gpa;
254b47a0373SChaoyong He 	}
255b47a0373SChaoyong He 
256b47a0373SChaoyong He 	for (i = 0; i < mem->nregions; i++) {
257b47a0373SChaoyong He 		region = &mem->regions[i];
258b47a0373SChaoyong He 
259b47a0373SChaoyong He 		if (qva >= region->host_user_addr &&
260b47a0373SChaoyong He 				qva < region->host_user_addr + region->size) {
261b47a0373SChaoyong He 			gpa = qva - region->host_user_addr + region->guest_phys_addr;
262b47a0373SChaoyong He 			break;
263b47a0373SChaoyong He 		}
264b47a0373SChaoyong He 	}
265b47a0373SChaoyong He 
266b47a0373SChaoyong He 	free(mem);
267b47a0373SChaoyong He 
268b47a0373SChaoyong He 	return gpa;
269b47a0373SChaoyong He }
270b47a0373SChaoyong He 
271e6ac31e0SXinying Yu static void
272e6ac31e0SXinying Yu nfp_vdpa_relay_vring_free(struct nfp_vdpa_dev *device,
273e6ac31e0SXinying Yu 		uint16_t vring_index)
274e6ac31e0SXinying Yu {
275e6ac31e0SXinying Yu 	uint16_t i;
276e6ac31e0SXinying Yu 	uint64_t size;
277e6ac31e0SXinying Yu 	struct rte_vhost_vring vring;
278e6ac31e0SXinying Yu 	uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING;
279e6ac31e0SXinying Yu 
280e6ac31e0SXinying Yu 	for (i = 0; i < vring_index; i++) {
281e6ac31e0SXinying Yu 		rte_vhost_get_vhost_vring(device->vid, i, &vring);
282e6ac31e0SXinying Yu 
283e6ac31e0SXinying Yu 		size = RTE_ALIGN_CEIL(vring_size(vring.size, rte_mem_page_size()),
284e6ac31e0SXinying Yu 				rte_mem_page_size());
285e6ac31e0SXinying Yu 		rte_vfio_container_dma_unmap(device->vfio_container_fd,
286e6ac31e0SXinying Yu 				(uint64_t)(uintptr_t)device->hw.m_vring[i].desc,
287e6ac31e0SXinying Yu 				m_vring_iova, size);
288e6ac31e0SXinying Yu 
289e6ac31e0SXinying Yu 		rte_free(device->hw.m_vring[i].desc);
290e6ac31e0SXinying Yu 		m_vring_iova += size;
291e6ac31e0SXinying Yu 	}
292e6ac31e0SXinying Yu }
293e6ac31e0SXinying Yu 
294b47a0373SChaoyong He static int
295e6ac31e0SXinying Yu nfp_vdpa_relay_vring_alloc(struct nfp_vdpa_dev *device)
296e6ac31e0SXinying Yu {
297e6ac31e0SXinying Yu 	int ret;
298e6ac31e0SXinying Yu 	uint16_t i;
299e6ac31e0SXinying Yu 	uint64_t size;
300e6ac31e0SXinying Yu 	void *vring_buf;
301e6ac31e0SXinying Yu 	uint64_t page_size;
302e6ac31e0SXinying Yu 	struct rte_vhost_vring vring;
303e6ac31e0SXinying Yu 	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
304e6ac31e0SXinying Yu 	uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING;
305e6ac31e0SXinying Yu 
306e6ac31e0SXinying Yu 	page_size = rte_mem_page_size();
307e6ac31e0SXinying Yu 
308e6ac31e0SXinying Yu 	for (i = 0; i < vdpa_hw->nr_vring; i++) {
309e6ac31e0SXinying Yu 		rte_vhost_get_vhost_vring(device->vid, i, &vring);
310e6ac31e0SXinying Yu 
311e6ac31e0SXinying Yu 		size = RTE_ALIGN_CEIL(vring_size(vring.size, page_size), page_size);
312e6ac31e0SXinying Yu 		vring_buf = rte_zmalloc("nfp_vdpa_relay", size, page_size);
313e6ac31e0SXinying Yu 		if (vring_buf == NULL)
314e6ac31e0SXinying Yu 			goto vring_free_all;
315e6ac31e0SXinying Yu 
316e6ac31e0SXinying Yu 		vring_init(&vdpa_hw->m_vring[i], vring.size, vring_buf, page_size);
317e6ac31e0SXinying Yu 
318e6ac31e0SXinying Yu 		ret = rte_vfio_container_dma_map(device->vfio_container_fd,
319e6ac31e0SXinying Yu 				(uint64_t)(uintptr_t)vring_buf, m_vring_iova, size);
320e6ac31e0SXinying Yu 		if (ret != 0) {
321e6ac31e0SXinying Yu 			DRV_VDPA_LOG(ERR, "vDPA vring relay dma map failed.");
322e6ac31e0SXinying Yu 			goto vring_free_one;
323e6ac31e0SXinying Yu 		}
324e6ac31e0SXinying Yu 
325e6ac31e0SXinying Yu 		m_vring_iova += size;
326e6ac31e0SXinying Yu 	}
327e6ac31e0SXinying Yu 
328e6ac31e0SXinying Yu 	return 0;
329e6ac31e0SXinying Yu 
330e6ac31e0SXinying Yu vring_free_one:
331e6ac31e0SXinying Yu 	rte_free(device->hw.m_vring[i].desc);
332e6ac31e0SXinying Yu vring_free_all:
333e6ac31e0SXinying Yu 	nfp_vdpa_relay_vring_free(device, i);
334e6ac31e0SXinying Yu 
335e6ac31e0SXinying Yu 	return -ENOSPC;
336e6ac31e0SXinying Yu }
337e6ac31e0SXinying Yu 
338e6ac31e0SXinying Yu static int
339e6ac31e0SXinying Yu nfp_vdpa_start(struct nfp_vdpa_dev *device,
340e6ac31e0SXinying Yu 		bool relay)
341b47a0373SChaoyong He {
342b47a0373SChaoyong He 	int ret;
343b47a0373SChaoyong He 	int vid;
344b47a0373SChaoyong He 	uint16_t i;
345b47a0373SChaoyong He 	uint64_t gpa;
346e6ac31e0SXinying Yu 	uint16_t size;
347b47a0373SChaoyong He 	struct rte_vhost_vring vring;
348b47a0373SChaoyong He 	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
349e6ac31e0SXinying Yu 	uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING;
350b47a0373SChaoyong He 
351b47a0373SChaoyong He 	vid = device->vid;
352b47a0373SChaoyong He 	vdpa_hw->nr_vring = rte_vhost_get_vring_num(vid);
353b47a0373SChaoyong He 
354b47a0373SChaoyong He 	ret = rte_vhost_get_negotiated_features(vid, &vdpa_hw->req_features);
355b47a0373SChaoyong He 	if (ret != 0)
356b47a0373SChaoyong He 		return ret;
357b47a0373SChaoyong He 
358e6ac31e0SXinying Yu 	if (relay) {
359e6ac31e0SXinying Yu 		ret = nfp_vdpa_relay_vring_alloc(device);
360e6ac31e0SXinying Yu 		if (ret != 0)
361e6ac31e0SXinying Yu 			return ret;
362e6ac31e0SXinying Yu 	}
363e6ac31e0SXinying Yu 
364b47a0373SChaoyong He 	for (i = 0; i < vdpa_hw->nr_vring; i++) {
365b47a0373SChaoyong He 		ret = rte_vhost_get_vhost_vring(vid, i, &vring);
366b47a0373SChaoyong He 		if (ret != 0)
367e6ac31e0SXinying Yu 			goto relay_vring_free;
368b47a0373SChaoyong He 
369b47a0373SChaoyong He 		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.desc);
370b47a0373SChaoyong He 		if (gpa == 0) {
371b47a0373SChaoyong He 			DRV_VDPA_LOG(ERR, "Fail to get GPA for descriptor ring.");
372e6ac31e0SXinying Yu 			goto relay_vring_free;
373b47a0373SChaoyong He 		}
374b47a0373SChaoyong He 
375b47a0373SChaoyong He 		vdpa_hw->vring[i].desc = gpa;
376b47a0373SChaoyong He 
377b47a0373SChaoyong He 		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.avail);
378b47a0373SChaoyong He 		if (gpa == 0) {
379b47a0373SChaoyong He 			DRV_VDPA_LOG(ERR, "Fail to get GPA for available ring.");
380e6ac31e0SXinying Yu 			goto relay_vring_free;
381b47a0373SChaoyong He 		}
382b47a0373SChaoyong He 
383b47a0373SChaoyong He 		vdpa_hw->vring[i].avail = gpa;
384b47a0373SChaoyong He 
385e6ac31e0SXinying Yu 		/* Direct I/O for Tx queue, relay for Rx queue */
386e6ac31e0SXinying Yu 		if (relay && ((i & 1) == 0)) {
387e6ac31e0SXinying Yu 			vdpa_hw->vring[i].used = m_vring_iova +
388e6ac31e0SXinying Yu 					(char *)vdpa_hw->m_vring[i].used -
389e6ac31e0SXinying Yu 					(char *)vdpa_hw->m_vring[i].desc;
390e6ac31e0SXinying Yu 
391e6ac31e0SXinying Yu 			ret = rte_vhost_get_vring_base(vid, i,
392e6ac31e0SXinying Yu 					&vdpa_hw->m_vring[i].avail->idx,
393e6ac31e0SXinying Yu 					&vdpa_hw->m_vring[i].used->idx);
394e6ac31e0SXinying Yu 			if (ret != 0)
395e6ac31e0SXinying Yu 				goto relay_vring_free;
396e6ac31e0SXinying Yu 		} else {
397b47a0373SChaoyong He 			gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.used);
398b47a0373SChaoyong He 			if (gpa == 0) {
399b47a0373SChaoyong He 				DRV_VDPA_LOG(ERR, "Fail to get GPA for used ring.");
400e6ac31e0SXinying Yu 				goto relay_vring_free;
401b47a0373SChaoyong He 			}
402b47a0373SChaoyong He 
403b47a0373SChaoyong He 			vdpa_hw->vring[i].used = gpa;
404e6ac31e0SXinying Yu 		}
405b47a0373SChaoyong He 
406b47a0373SChaoyong He 		vdpa_hw->vring[i].size = vring.size;
407b47a0373SChaoyong He 
408e6ac31e0SXinying Yu 		if (relay) {
409e6ac31e0SXinying Yu 			size = RTE_ALIGN_CEIL(vring_size(vring.size,
410e6ac31e0SXinying Yu 					rte_mem_page_size()), rte_mem_page_size());
411e6ac31e0SXinying Yu 			m_vring_iova += size;
412e6ac31e0SXinying Yu 		}
413e6ac31e0SXinying Yu 
414b47a0373SChaoyong He 		ret = rte_vhost_get_vring_base(vid, i,
415b47a0373SChaoyong He 				&vdpa_hw->vring[i].last_avail_idx,
416b47a0373SChaoyong He 				&vdpa_hw->vring[i].last_used_idx);
417b47a0373SChaoyong He 		if (ret != 0)
418e6ac31e0SXinying Yu 			goto relay_vring_free;
419b47a0373SChaoyong He 	}
420b47a0373SChaoyong He 
421e6ac31e0SXinying Yu 	if (relay)
422e6ac31e0SXinying Yu 		return nfp_vdpa_relay_hw_start(&device->hw, vid);
423e6ac31e0SXinying Yu 	else
424b47a0373SChaoyong He 		return nfp_vdpa_hw_start(&device->hw, vid);
425e6ac31e0SXinying Yu 
426e6ac31e0SXinying Yu relay_vring_free:
427e6ac31e0SXinying Yu 	if (relay)
428e6ac31e0SXinying Yu 		nfp_vdpa_relay_vring_free(device, vdpa_hw->nr_vring);
429e6ac31e0SXinying Yu 
430e6ac31e0SXinying Yu 	return -EFAULT;
431b47a0373SChaoyong He }
432b47a0373SChaoyong He 
433b47a0373SChaoyong He static void
434e6ac31e0SXinying Yu nfp_vdpa_update_used_ring(struct nfp_vdpa_dev *dev,
435e6ac31e0SXinying Yu 		uint16_t qid)
436e6ac31e0SXinying Yu {
437e6ac31e0SXinying Yu 	rte_vdpa_relay_vring_used(dev->vid, qid, &dev->hw.m_vring[qid]);
438e6ac31e0SXinying Yu 	rte_vhost_vring_call(dev->vid, qid);
439e6ac31e0SXinying Yu }
440e6ac31e0SXinying Yu 
441e6ac31e0SXinying Yu static void
442e6ac31e0SXinying Yu nfp_vdpa_relay_stop(struct nfp_vdpa_dev *device)
443e6ac31e0SXinying Yu {
444e6ac31e0SXinying Yu 	int vid;
445e6ac31e0SXinying Yu 	uint32_t i;
446e6ac31e0SXinying Yu 	uint64_t len;
447e6ac31e0SXinying Yu 	struct rte_vhost_vring vring;
448e6ac31e0SXinying Yu 	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
449e6ac31e0SXinying Yu 
450e6ac31e0SXinying Yu 	nfp_vdpa_hw_stop(vdpa_hw);
451e6ac31e0SXinying Yu 
452e6ac31e0SXinying Yu 	vid = device->vid;
453e6ac31e0SXinying Yu 	for (i = 0; i < vdpa_hw->nr_vring; i++) {
454e6ac31e0SXinying Yu 		/* Synchronize remaining new used entries if any */
455e6ac31e0SXinying Yu 		if ((i & 1) == 0)
456e6ac31e0SXinying Yu 			nfp_vdpa_update_used_ring(device, i);
457e6ac31e0SXinying Yu 
458e6ac31e0SXinying Yu 		rte_vhost_get_vhost_vring(vid, i, &vring);
459e6ac31e0SXinying Yu 		len = NFP_VDPA_USED_RING_LEN(vring.size);
460e6ac31e0SXinying Yu 		vdpa_hw->vring[i].last_avail_idx = vring.avail->idx;
461e6ac31e0SXinying Yu 		vdpa_hw->vring[i].last_used_idx = vring.used->idx;
462e6ac31e0SXinying Yu 
463e6ac31e0SXinying Yu 		rte_vhost_set_vring_base(vid, i,
464e6ac31e0SXinying Yu 				vdpa_hw->vring[i].last_avail_idx,
465e6ac31e0SXinying Yu 				vdpa_hw->vring[i].last_used_idx);
466e6ac31e0SXinying Yu 
467e6ac31e0SXinying Yu 		rte_vhost_log_used_vring(vid, i, 0, len);
468e6ac31e0SXinying Yu 
469e6ac31e0SXinying Yu 		if (vring.used->idx != vring.avail->idx)
470e6ac31e0SXinying Yu 			rte_atomic_store_explicit(
471e6ac31e0SXinying Yu 					(unsigned short __rte_atomic *)&vring.used->idx,
472e6ac31e0SXinying Yu 					vring.avail->idx, rte_memory_order_release);
473e6ac31e0SXinying Yu 	}
474e6ac31e0SXinying Yu 
475e6ac31e0SXinying Yu 	nfp_vdpa_relay_vring_free(device, vdpa_hw->nr_vring);
476e6ac31e0SXinying Yu }
477e6ac31e0SXinying Yu 
478e6ac31e0SXinying Yu static void
479e6ac31e0SXinying Yu nfp_vdpa_stop(struct nfp_vdpa_dev *device,
480e6ac31e0SXinying Yu 		bool relay)
481b47a0373SChaoyong He {
482b47a0373SChaoyong He 	int vid;
483b47a0373SChaoyong He 	uint32_t i;
484b47a0373SChaoyong He 	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
485b47a0373SChaoyong He 
486b47a0373SChaoyong He 	nfp_vdpa_hw_stop(vdpa_hw);
487b47a0373SChaoyong He 
488b47a0373SChaoyong He 	vid = device->vid;
489e6ac31e0SXinying Yu 	if (relay)
490e6ac31e0SXinying Yu 		nfp_vdpa_relay_stop(device);
491e6ac31e0SXinying Yu 	else
492b47a0373SChaoyong He 		for (i = 0; i < vdpa_hw->nr_vring; i++)
493b47a0373SChaoyong He 			rte_vhost_set_vring_base(vid, i,
494b47a0373SChaoyong He 					vdpa_hw->vring[i].last_avail_idx,
495b47a0373SChaoyong He 					vdpa_hw->vring[i].last_used_idx);
496e6ac31e0SXinying Yu 
497b47a0373SChaoyong He }
498b47a0373SChaoyong He 
499b47a0373SChaoyong He static int
50010421b0dSXinying Yu nfp_vdpa_enable_vfio_intr(struct nfp_vdpa_dev *device,
50110421b0dSXinying Yu 		bool relay)
502b47a0373SChaoyong He {
50310421b0dSXinying Yu 	int fd;
504b47a0373SChaoyong He 	int ret;
505b47a0373SChaoyong He 	uint16_t i;
506b47a0373SChaoyong He 	int *fd_ptr;
507b47a0373SChaoyong He 	uint16_t nr_vring;
508b47a0373SChaoyong He 	struct vfio_irq_set *irq_set;
509b47a0373SChaoyong He 	struct rte_vhost_vring vring;
510b47a0373SChaoyong He 	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
511b47a0373SChaoyong He 
512b47a0373SChaoyong He 	nr_vring = rte_vhost_get_vring_num(device->vid);
513b47a0373SChaoyong He 
514b47a0373SChaoyong He 	irq_set = (struct vfio_irq_set *)irq_set_buf;
515b47a0373SChaoyong He 	irq_set->argsz = sizeof(irq_set_buf);
516b47a0373SChaoyong He 	irq_set->count = nr_vring + 1;
517b47a0373SChaoyong He 	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
518b47a0373SChaoyong He 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
519b47a0373SChaoyong He 	irq_set->start = 0;
520b47a0373SChaoyong He 
521b47a0373SChaoyong He 	fd_ptr = (int *)&irq_set->data;
522b47a0373SChaoyong He 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(device->pci_dev->intr_handle);
523b47a0373SChaoyong He 
524b47a0373SChaoyong He 	for (i = 0; i < nr_vring; i++)
525b47a0373SChaoyong He 		device->intr_fd[i] = -1;
526b47a0373SChaoyong He 
527b47a0373SChaoyong He 	for (i = 0; i < nr_vring; i++) {
528b47a0373SChaoyong He 		rte_vhost_get_vhost_vring(device->vid, i, &vring);
529b47a0373SChaoyong He 		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
530b47a0373SChaoyong He 	}
531b47a0373SChaoyong He 
53210421b0dSXinying Yu 	if (relay) {
53310421b0dSXinying Yu 		for (i = 0; i < nr_vring; i += 2) {
53410421b0dSXinying Yu 			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
53510421b0dSXinying Yu 			if (fd < 0) {
536*b6de4353SZerun Fu 				DRV_VDPA_LOG(ERR, "Can't setup eventfd.");
53710421b0dSXinying Yu 				return -EINVAL;
53810421b0dSXinying Yu 			}
53910421b0dSXinying Yu 
54010421b0dSXinying Yu 			device->intr_fd[i] = fd;
54110421b0dSXinying Yu 			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
54210421b0dSXinying Yu 		}
54310421b0dSXinying Yu 	}
54410421b0dSXinying Yu 
545b47a0373SChaoyong He 	ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
546b47a0373SChaoyong He 	if (ret != 0) {
547b47a0373SChaoyong He 		DRV_VDPA_LOG(ERR, "Error enabling MSI-X interrupts.");
548b47a0373SChaoyong He 		return -EIO;
549b47a0373SChaoyong He 	}
550b47a0373SChaoyong He 
551b47a0373SChaoyong He 	return 0;
552b47a0373SChaoyong He }
553b47a0373SChaoyong He 
554b47a0373SChaoyong He static int
555b47a0373SChaoyong He nfp_vdpa_disable_vfio_intr(struct nfp_vdpa_dev *device)
556b47a0373SChaoyong He {
557b47a0373SChaoyong He 	int ret;
558b47a0373SChaoyong He 	struct vfio_irq_set *irq_set;
559b47a0373SChaoyong He 	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
560b47a0373SChaoyong He 
561b47a0373SChaoyong He 	irq_set = (struct vfio_irq_set *)irq_set_buf;
562b47a0373SChaoyong He 	irq_set->argsz = sizeof(irq_set_buf);
563b47a0373SChaoyong He 	irq_set->count = 0;
564b47a0373SChaoyong He 	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
565b47a0373SChaoyong He 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
566b47a0373SChaoyong He 	irq_set->start = 0;
567b47a0373SChaoyong He 
568b47a0373SChaoyong He 	ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
569b47a0373SChaoyong He 	if (ret != 0) {
570b47a0373SChaoyong He 		DRV_VDPA_LOG(ERR, "Error disabling MSI-X interrupts.");
571b47a0373SChaoyong He 		return -EIO;
572b47a0373SChaoyong He 	}
573b47a0373SChaoyong He 
574b47a0373SChaoyong He 	return 0;
575b47a0373SChaoyong He }
576b47a0373SChaoyong He 
57776ea5ebeSChaoyong He static void
57876ea5ebeSChaoyong He nfp_vdpa_read_kickfd(int kickfd)
57976ea5ebeSChaoyong He {
58076ea5ebeSChaoyong He 	int bytes;
58176ea5ebeSChaoyong He 	uint64_t buf;
58276ea5ebeSChaoyong He 
58376ea5ebeSChaoyong He 	for (;;) {
58476ea5ebeSChaoyong He 		bytes = read(kickfd, &buf, 8);
58576ea5ebeSChaoyong He 		if (bytes >= 0)
58676ea5ebeSChaoyong He 			break;
58776ea5ebeSChaoyong He 
58876ea5ebeSChaoyong He 		if (errno != EINTR && errno != EWOULDBLOCK &&
58976ea5ebeSChaoyong He 				errno != EAGAIN) {
590*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "Error reading kickfd.");
59176ea5ebeSChaoyong He 			break;
59276ea5ebeSChaoyong He 		}
59376ea5ebeSChaoyong He 	}
59476ea5ebeSChaoyong He }
59576ea5ebeSChaoyong He 
59676ea5ebeSChaoyong He static int
59776ea5ebeSChaoyong He nfp_vdpa_notify_epoll_ctl(uint32_t queue_num,
59876ea5ebeSChaoyong He 		struct nfp_vdpa_dev *device)
59976ea5ebeSChaoyong He {
60076ea5ebeSChaoyong He 	int ret;
60176ea5ebeSChaoyong He 	uint32_t qid;
60276ea5ebeSChaoyong He 
60376ea5ebeSChaoyong He 	for (qid = 0; qid < queue_num; qid++) {
60476ea5ebeSChaoyong He 		struct epoll_event ev;
60576ea5ebeSChaoyong He 		struct rte_vhost_vring vring;
60676ea5ebeSChaoyong He 
60776ea5ebeSChaoyong He 		ev.events = EPOLLIN | EPOLLPRI;
60876ea5ebeSChaoyong He 		rte_vhost_get_vhost_vring(device->vid, qid, &vring);
60976ea5ebeSChaoyong He 		ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
61076ea5ebeSChaoyong He 		ret = epoll_ctl(device->epoll_fd, EPOLL_CTL_ADD, vring.kickfd, &ev);
61176ea5ebeSChaoyong He 		if (ret < 0) {
612*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "Epoll add error for queue %d.", qid);
61376ea5ebeSChaoyong He 			return ret;
61476ea5ebeSChaoyong He 		}
61576ea5ebeSChaoyong He 	}
61676ea5ebeSChaoyong He 
61776ea5ebeSChaoyong He 	return 0;
61876ea5ebeSChaoyong He }
61976ea5ebeSChaoyong He 
62076ea5ebeSChaoyong He static int
62176ea5ebeSChaoyong He nfp_vdpa_notify_epoll_wait(uint32_t queue_num,
62276ea5ebeSChaoyong He 		struct nfp_vdpa_dev *device)
62376ea5ebeSChaoyong He {
62476ea5ebeSChaoyong He 	int i;
62576ea5ebeSChaoyong He 	int fds;
62676ea5ebeSChaoyong He 	int kickfd;
62776ea5ebeSChaoyong He 	uint32_t qid;
62876ea5ebeSChaoyong He 	struct epoll_event events[NFP_VDPA_MAX_QUEUES * 2];
62976ea5ebeSChaoyong He 
63076ea5ebeSChaoyong He 	for (;;) {
63176ea5ebeSChaoyong He 		fds = epoll_wait(device->epoll_fd, events, queue_num, -1);
63276ea5ebeSChaoyong He 		if (fds < 0) {
63376ea5ebeSChaoyong He 			if (errno == EINTR)
63476ea5ebeSChaoyong He 				continue;
63576ea5ebeSChaoyong He 
636*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "Epoll wait fail.");
63776ea5ebeSChaoyong He 			return -EACCES;
63876ea5ebeSChaoyong He 		}
63976ea5ebeSChaoyong He 
64076ea5ebeSChaoyong He 		for (i = 0; i < fds; i++) {
64176ea5ebeSChaoyong He 			qid = events[i].data.u32;
64276ea5ebeSChaoyong He 			kickfd = (uint32_t)(events[i].data.u64 >> 32);
64376ea5ebeSChaoyong He 
64476ea5ebeSChaoyong He 			nfp_vdpa_read_kickfd(kickfd);
64576ea5ebeSChaoyong He 			nfp_vdpa_notify_queue(&device->hw, qid);
64676ea5ebeSChaoyong He 		}
64776ea5ebeSChaoyong He 	}
64876ea5ebeSChaoyong He 
64976ea5ebeSChaoyong He 	return 0;
65076ea5ebeSChaoyong He }
65176ea5ebeSChaoyong He 
65276ea5ebeSChaoyong He static uint32_t
65376ea5ebeSChaoyong He nfp_vdpa_notify_relay(void *arg)
65476ea5ebeSChaoyong He {
65576ea5ebeSChaoyong He 	int ret;
65676ea5ebeSChaoyong He 	int epoll_fd;
65776ea5ebeSChaoyong He 	uint32_t queue_num;
65876ea5ebeSChaoyong He 	struct nfp_vdpa_dev *device = arg;
65976ea5ebeSChaoyong He 
66076ea5ebeSChaoyong He 	epoll_fd = epoll_create(NFP_VDPA_MAX_QUEUES * 2);
66176ea5ebeSChaoyong He 	if (epoll_fd < 0) {
662f6272c7aSZerun Fu 		DRV_VDPA_LOG(ERR, "Failed to create epoll instance.");
66376ea5ebeSChaoyong He 		return 1;
66476ea5ebeSChaoyong He 	}
66576ea5ebeSChaoyong He 
66676ea5ebeSChaoyong He 	device->epoll_fd = epoll_fd;
66776ea5ebeSChaoyong He 
66876ea5ebeSChaoyong He 	queue_num = rte_vhost_get_vring_num(device->vid);
66976ea5ebeSChaoyong He 
67076ea5ebeSChaoyong He 	ret = nfp_vdpa_notify_epoll_ctl(queue_num, device);
67176ea5ebeSChaoyong He 	if (ret != 0)
67276ea5ebeSChaoyong He 		goto notify_exit;
67376ea5ebeSChaoyong He 
67476ea5ebeSChaoyong He 	ret = nfp_vdpa_notify_epoll_wait(queue_num, device);
67576ea5ebeSChaoyong He 	if (ret != 0)
67676ea5ebeSChaoyong He 		goto notify_exit;
67776ea5ebeSChaoyong He 
67876ea5ebeSChaoyong He 	return 0;
67976ea5ebeSChaoyong He 
68076ea5ebeSChaoyong He notify_exit:
68176ea5ebeSChaoyong He 	close(device->epoll_fd);
68276ea5ebeSChaoyong He 	device->epoll_fd = -1;
68376ea5ebeSChaoyong He 
68476ea5ebeSChaoyong He 	return 1;
68576ea5ebeSChaoyong He }
68676ea5ebeSChaoyong He 
68776ea5ebeSChaoyong He static int
68876ea5ebeSChaoyong He nfp_vdpa_setup_notify_relay(struct nfp_vdpa_dev *device)
68976ea5ebeSChaoyong He {
69076ea5ebeSChaoyong He 	int ret;
69176ea5ebeSChaoyong He 	char name[RTE_THREAD_INTERNAL_NAME_SIZE];
69276ea5ebeSChaoyong He 
69376ea5ebeSChaoyong He 	snprintf(name, sizeof(name), "nfp-noti%d", device->vid);
69476ea5ebeSChaoyong He 	ret = rte_thread_create_internal_control(&device->tid, name,
69576ea5ebeSChaoyong He 			nfp_vdpa_notify_relay, (void *)device);
69676ea5ebeSChaoyong He 	if (ret != 0) {
69776ea5ebeSChaoyong He 		DRV_VDPA_LOG(ERR, "Failed to create notify relay pthread.");
69876ea5ebeSChaoyong He 		return -1;
69976ea5ebeSChaoyong He 	}
70076ea5ebeSChaoyong He 
70176ea5ebeSChaoyong He 	return 0;
70276ea5ebeSChaoyong He }
70376ea5ebeSChaoyong He 
70476ea5ebeSChaoyong He static void
70576ea5ebeSChaoyong He nfp_vdpa_unset_notify_relay(struct nfp_vdpa_dev *device)
70676ea5ebeSChaoyong He {
70776ea5ebeSChaoyong He 	if (device->tid.opaque_id != 0) {
70876ea5ebeSChaoyong He 		pthread_cancel((pthread_t)device->tid.opaque_id);
70976ea5ebeSChaoyong He 		rte_thread_join(device->tid, NULL);
71076ea5ebeSChaoyong He 		device->tid.opaque_id = 0;
71176ea5ebeSChaoyong He 	}
71276ea5ebeSChaoyong He 
71376ea5ebeSChaoyong He 	if (device->epoll_fd >= 0) {
71476ea5ebeSChaoyong He 		close(device->epoll_fd);
71576ea5ebeSChaoyong He 		device->epoll_fd = -1;
71676ea5ebeSChaoyong He 	}
71776ea5ebeSChaoyong He }
71876ea5ebeSChaoyong He 
719b47a0373SChaoyong He static int
720b47a0373SChaoyong He update_datapath(struct nfp_vdpa_dev *device)
721b47a0373SChaoyong He {
722b47a0373SChaoyong He 	int ret;
723b47a0373SChaoyong He 
724b47a0373SChaoyong He 	rte_spinlock_lock(&device->lock);
725b47a0373SChaoyong He 
726b47a0373SChaoyong He 	if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) == 0) &&
727b47a0373SChaoyong He 			(rte_atomic_load_explicit(&device->started,
728b47a0373SChaoyong He 					rte_memory_order_relaxed) != 0) &&
729b47a0373SChaoyong He 			(rte_atomic_load_explicit(&device->dev_attached,
730b47a0373SChaoyong He 					rte_memory_order_relaxed) != 0)) {
731b47a0373SChaoyong He 		ret = nfp_vdpa_dma_map(device, true);
732b47a0373SChaoyong He 		if (ret != 0)
733b47a0373SChaoyong He 			goto unlock_exit;
734b47a0373SChaoyong He 
73510421b0dSXinying Yu 		ret = nfp_vdpa_enable_vfio_intr(device, false);
736b47a0373SChaoyong He 		if (ret != 0)
737b47a0373SChaoyong He 			goto dma_map_rollback;
738b47a0373SChaoyong He 
739e6ac31e0SXinying Yu 		ret = nfp_vdpa_start(device, false);
740b47a0373SChaoyong He 		if (ret != 0)
741b47a0373SChaoyong He 			goto disable_vfio_intr;
742b47a0373SChaoyong He 
74376ea5ebeSChaoyong He 		ret = nfp_vdpa_setup_notify_relay(device);
74476ea5ebeSChaoyong He 		if (ret != 0)
74576ea5ebeSChaoyong He 			goto vdpa_stop;
74676ea5ebeSChaoyong He 
747b47a0373SChaoyong He 		rte_atomic_store_explicit(&device->running, 1, rte_memory_order_relaxed);
748b47a0373SChaoyong He 	} else if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) != 0) &&
749b47a0373SChaoyong He 			((rte_atomic_load_explicit(&device->started,
750b47a0373SChaoyong He 					rte_memory_order_relaxed) != 0) ||
751b47a0373SChaoyong He 			(rte_atomic_load_explicit(&device->dev_attached,
752b47a0373SChaoyong He 					rte_memory_order_relaxed) != 0))) {
75376ea5ebeSChaoyong He 		nfp_vdpa_unset_notify_relay(device);
75476ea5ebeSChaoyong He 
755e6ac31e0SXinying Yu 		nfp_vdpa_stop(device, false);
756b47a0373SChaoyong He 
757b47a0373SChaoyong He 		ret = nfp_vdpa_disable_vfio_intr(device);
758b47a0373SChaoyong He 		if (ret != 0)
759b47a0373SChaoyong He 			goto unlock_exit;
760b47a0373SChaoyong He 
761b47a0373SChaoyong He 		ret = nfp_vdpa_dma_map(device, false);
762b47a0373SChaoyong He 		if (ret != 0)
763b47a0373SChaoyong He 			goto unlock_exit;
764b47a0373SChaoyong He 
765b47a0373SChaoyong He 		rte_atomic_store_explicit(&device->running, 0, rte_memory_order_relaxed);
766b47a0373SChaoyong He 	}
767b47a0373SChaoyong He 
768b47a0373SChaoyong He 	rte_spinlock_unlock(&device->lock);
769b47a0373SChaoyong He 	return 0;
770b47a0373SChaoyong He 
77176ea5ebeSChaoyong He vdpa_stop:
772e6ac31e0SXinying Yu 	nfp_vdpa_stop(device, false);
773b47a0373SChaoyong He disable_vfio_intr:
774b47a0373SChaoyong He 	nfp_vdpa_disable_vfio_intr(device);
775b47a0373SChaoyong He dma_map_rollback:
776b47a0373SChaoyong He 	nfp_vdpa_dma_map(device, false);
777b47a0373SChaoyong He unlock_exit:
778b47a0373SChaoyong He 	rte_spinlock_unlock(&device->lock);
779b47a0373SChaoyong He 	return ret;
780b47a0373SChaoyong He }
781b47a0373SChaoyong He 
7820141f545SChaoyong He static int
78302fe8366SXinying Yu nfp_vdpa_vring_epoll_ctl(uint32_t queue_num,
78402fe8366SXinying Yu 		struct nfp_vdpa_dev *device)
78502fe8366SXinying Yu {
78602fe8366SXinying Yu 	int ret;
78702fe8366SXinying Yu 	uint32_t qid;
78802fe8366SXinying Yu 	struct epoll_event ev;
78902fe8366SXinying Yu 	struct rte_vhost_vring vring;
79002fe8366SXinying Yu 
79102fe8366SXinying Yu 	for (qid = 0; qid < queue_num; qid++) {
79202fe8366SXinying Yu 		ev.events = EPOLLIN | EPOLLPRI;
79302fe8366SXinying Yu 		rte_vhost_get_vhost_vring(device->vid, qid, &vring);
79402fe8366SXinying Yu 		ev.data.u64 = qid << 1 | (uint64_t)vring.kickfd << 32;
79502fe8366SXinying Yu 		ret = epoll_ctl(device->epoll_fd, EPOLL_CTL_ADD, vring.kickfd, &ev);
79602fe8366SXinying Yu 		if (ret < 0) {
797*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "Epoll add error for queue %u.", qid);
79802fe8366SXinying Yu 			return ret;
79902fe8366SXinying Yu 		}
80002fe8366SXinying Yu 	}
80102fe8366SXinying Yu 
80202fe8366SXinying Yu 	/* vDPA driver interrupt */
80302fe8366SXinying Yu 	for (qid = 0; qid < queue_num; qid += 2) {
80402fe8366SXinying Yu 		ev.events = EPOLLIN | EPOLLPRI;
80502fe8366SXinying Yu 		/* Leave a flag to mark it's for interrupt */
80602fe8366SXinying Yu 		ev.data.u64 = EPOLL_DATA_INTR | qid << 1 |
80702fe8366SXinying Yu 				(uint64_t)device->intr_fd[qid] << 32;
80802fe8366SXinying Yu 		ret = epoll_ctl(device->epoll_fd, EPOLL_CTL_ADD,
80902fe8366SXinying Yu 				device->intr_fd[qid], &ev);
81002fe8366SXinying Yu 		if (ret < 0) {
811*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "Epoll add error for queue %u.", qid);
81202fe8366SXinying Yu 			return ret;
81302fe8366SXinying Yu 		}
81402fe8366SXinying Yu 
81502fe8366SXinying Yu 		nfp_vdpa_update_used_ring(device, qid);
81602fe8366SXinying Yu 	}
81702fe8366SXinying Yu 
81802fe8366SXinying Yu 	return 0;
81902fe8366SXinying Yu }
82002fe8366SXinying Yu 
82102fe8366SXinying Yu static int
82202fe8366SXinying Yu nfp_vdpa_vring_epoll_wait(uint32_t queue_num,
82302fe8366SXinying Yu 		struct nfp_vdpa_dev *device)
82402fe8366SXinying Yu {
82502fe8366SXinying Yu 	int i;
82602fe8366SXinying Yu 	int fds;
82702fe8366SXinying Yu 	int kickfd;
82802fe8366SXinying Yu 	uint32_t qid;
82902fe8366SXinying Yu 	struct epoll_event events[NFP_VDPA_MAX_QUEUES * 2];
83002fe8366SXinying Yu 
83102fe8366SXinying Yu 	for (;;) {
83202fe8366SXinying Yu 		fds = epoll_wait(device->epoll_fd, events, queue_num * 2, -1);
83302fe8366SXinying Yu 		if (fds < 0) {
83402fe8366SXinying Yu 			if (errno == EINTR)
83502fe8366SXinying Yu 				continue;
83602fe8366SXinying Yu 
837*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "Epoll wait fail.");
83802fe8366SXinying Yu 			return -EACCES;
83902fe8366SXinying Yu 		}
84002fe8366SXinying Yu 
84102fe8366SXinying Yu 		for (i = 0; i < fds; i++) {
84202fe8366SXinying Yu 			qid = events[i].data.u32 >> 1;
84302fe8366SXinying Yu 			kickfd = (uint32_t)(events[i].data.u64 >> 32);
84402fe8366SXinying Yu 
84502fe8366SXinying Yu 			nfp_vdpa_read_kickfd(kickfd);
84602fe8366SXinying Yu 			if ((events[i].data.u32 & EPOLL_DATA_INTR) != 0) {
84702fe8366SXinying Yu 				nfp_vdpa_update_used_ring(device, qid);
84802fe8366SXinying Yu 				nfp_vdpa_irq_unmask(&device->hw);
84902fe8366SXinying Yu 			} else {
85002fe8366SXinying Yu 				nfp_vdpa_notify_queue(&device->hw, qid);
85102fe8366SXinying Yu 			}
85202fe8366SXinying Yu 		}
85302fe8366SXinying Yu 	}
85402fe8366SXinying Yu 
85502fe8366SXinying Yu 	return 0;
85602fe8366SXinying Yu }
85702fe8366SXinying Yu 
85802fe8366SXinying Yu static uint32_t
85902fe8366SXinying Yu nfp_vdpa_vring_relay(void *arg)
86002fe8366SXinying Yu {
86102fe8366SXinying Yu 	int ret;
86202fe8366SXinying Yu 	int epoll_fd;
86302fe8366SXinying Yu 	uint16_t queue_id;
86402fe8366SXinying Yu 	uint32_t queue_num;
86502fe8366SXinying Yu 	struct nfp_vdpa_dev *device = arg;
86602fe8366SXinying Yu 
86702fe8366SXinying Yu 	epoll_fd = epoll_create(NFP_VDPA_MAX_QUEUES * 2);
86802fe8366SXinying Yu 	if (epoll_fd < 0) {
86902fe8366SXinying Yu 		DRV_VDPA_LOG(ERR, "failed to create epoll instance.");
87002fe8366SXinying Yu 		return 1;
87102fe8366SXinying Yu 	}
87202fe8366SXinying Yu 
87302fe8366SXinying Yu 	device->epoll_fd = epoll_fd;
87402fe8366SXinying Yu 
87502fe8366SXinying Yu 	queue_num = rte_vhost_get_vring_num(device->vid);
87602fe8366SXinying Yu 
87702fe8366SXinying Yu 	ret = nfp_vdpa_vring_epoll_ctl(queue_num, device);
87802fe8366SXinying Yu 	if (ret != 0)
87902fe8366SXinying Yu 		goto notify_exit;
88002fe8366SXinying Yu 
88102fe8366SXinying Yu 	/* Start relay with a first kick */
88202fe8366SXinying Yu 	for (queue_id = 0; queue_id < queue_num; queue_id++)
88302fe8366SXinying Yu 		nfp_vdpa_notify_queue(&device->hw, queue_id);
88402fe8366SXinying Yu 
88502fe8366SXinying Yu 	ret = nfp_vdpa_vring_epoll_wait(queue_num, device);
88602fe8366SXinying Yu 	if (ret != 0)
88702fe8366SXinying Yu 		goto notify_exit;
88802fe8366SXinying Yu 
88902fe8366SXinying Yu 	return 0;
89002fe8366SXinying Yu 
89102fe8366SXinying Yu notify_exit:
89202fe8366SXinying Yu 	close(device->epoll_fd);
89302fe8366SXinying Yu 	device->epoll_fd = -1;
89402fe8366SXinying Yu 
89502fe8366SXinying Yu 	return 1;
89602fe8366SXinying Yu }
89702fe8366SXinying Yu 
89802fe8366SXinying Yu static int
89902fe8366SXinying Yu nfp_vdpa_setup_vring_relay(struct nfp_vdpa_dev *device)
90002fe8366SXinying Yu {
90102fe8366SXinying Yu 	int ret;
90202fe8366SXinying Yu 	char name[RTE_THREAD_INTERNAL_NAME_SIZE];
90302fe8366SXinying Yu 
90402fe8366SXinying Yu 	snprintf(name, sizeof(name), "nfp_vring%d", device->vid);
90502fe8366SXinying Yu 	ret = rte_thread_create_internal_control(&device->tid, name,
90602fe8366SXinying Yu 			nfp_vdpa_vring_relay, (void *)device);
90702fe8366SXinying Yu 	if (ret != 0) {
90802fe8366SXinying Yu 		DRV_VDPA_LOG(ERR, "Failed to create vring relay pthread.");
90902fe8366SXinying Yu 		return -EPERM;
91002fe8366SXinying Yu 	}
91102fe8366SXinying Yu 
91202fe8366SXinying Yu 	return 0;
91302fe8366SXinying Yu }
91402fe8366SXinying Yu 
91502fe8366SXinying Yu static int
91694fde3a7SXinying Yu nfp_vdpa_sw_fallback(struct nfp_vdpa_dev *device)
91794fde3a7SXinying Yu {
91894fde3a7SXinying Yu 	int ret;
91994fde3a7SXinying Yu 	int vid = device->vid;
92094fde3a7SXinying Yu 
92194fde3a7SXinying Yu 	/* Stop the direct IO data path */
92294fde3a7SXinying Yu 	nfp_vdpa_unset_notify_relay(device);
92394fde3a7SXinying Yu 	nfp_vdpa_disable_vfio_intr(device);
92494fde3a7SXinying Yu 
92594fde3a7SXinying Yu 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
92694fde3a7SXinying Yu 	if ((ret != 0) && (ret != -ENOTSUP)) {
92794fde3a7SXinying Yu 		DRV_VDPA_LOG(ERR, "Unset the host notifier failed.");
92894fde3a7SXinying Yu 		goto error;
92994fde3a7SXinying Yu 	}
93094fde3a7SXinying Yu 
93110421b0dSXinying Yu 	/* Setup interrupt for vring relay */
93210421b0dSXinying Yu 	ret = nfp_vdpa_enable_vfio_intr(device, true);
93310421b0dSXinying Yu 	if (ret != 0)
93410421b0dSXinying Yu 		goto error;
93510421b0dSXinying Yu 
936e6ac31e0SXinying Yu 	/* Config the VF */
937e6ac31e0SXinying Yu 	ret = nfp_vdpa_start(device, true);
938e6ac31e0SXinying Yu 	if (ret != 0)
939e6ac31e0SXinying Yu 		goto unset_intr;
940e6ac31e0SXinying Yu 
94102fe8366SXinying Yu 	/* Setup vring relay thread */
94202fe8366SXinying Yu 	ret = nfp_vdpa_setup_vring_relay(device);
94302fe8366SXinying Yu 	if (ret != 0)
94402fe8366SXinying Yu 		goto stop_vf;
94502fe8366SXinying Yu 
94694fde3a7SXinying Yu 	device->hw.sw_fallback_running = true;
94794fde3a7SXinying Yu 
94894fde3a7SXinying Yu 	return 0;
94994fde3a7SXinying Yu 
95002fe8366SXinying Yu stop_vf:
95102fe8366SXinying Yu 	nfp_vdpa_stop(device, true);
952e6ac31e0SXinying Yu unset_intr:
953e6ac31e0SXinying Yu 	nfp_vdpa_disable_vfio_intr(device);
95494fde3a7SXinying Yu error:
95594fde3a7SXinying Yu 	return ret;
95694fde3a7SXinying Yu }
95794fde3a7SXinying Yu 
95894fde3a7SXinying Yu static int
9590141f545SChaoyong He nfp_vdpa_dev_config(int vid)
9600141f545SChaoyong He {
9610141f545SChaoyong He 	int ret;
9620141f545SChaoyong He 	struct nfp_vdpa_dev *device;
9630141f545SChaoyong He 	struct rte_vdpa_device *vdev;
9640141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
9650141f545SChaoyong He 
9660141f545SChaoyong He 	vdev = rte_vhost_get_vdpa_device(vid);
9670141f545SChaoyong He 	node = nfp_vdpa_find_node_by_vdev(vdev);
9680141f545SChaoyong He 	if (node == NULL) {
969*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev);
9700141f545SChaoyong He 		return -ENODEV;
9710141f545SChaoyong He 	}
9720141f545SChaoyong He 
9730141f545SChaoyong He 	device = node->device;
9740141f545SChaoyong He 	device->vid = vid;
9750141f545SChaoyong He 	rte_atomic_store_explicit(&device->dev_attached, 1, rte_memory_order_relaxed);
9760141f545SChaoyong He 	update_datapath(device);
9770141f545SChaoyong He 
9780141f545SChaoyong He 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true);
9790141f545SChaoyong He 	if (ret != 0)
9800141f545SChaoyong He 		DRV_VDPA_LOG(INFO, "vDPA (%s): software relay is used.",
9810141f545SChaoyong He 				vdev->device->name);
9820141f545SChaoyong He 
9830141f545SChaoyong He 	return 0;
9840141f545SChaoyong He }
9850141f545SChaoyong He 
9860141f545SChaoyong He static int
9870141f545SChaoyong He nfp_vdpa_dev_close(int vid)
9880141f545SChaoyong He {
9890141f545SChaoyong He 	struct nfp_vdpa_dev *device;
9900141f545SChaoyong He 	struct rte_vdpa_device *vdev;
9910141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
9920141f545SChaoyong He 
9930141f545SChaoyong He 	vdev = rte_vhost_get_vdpa_device(vid);
9940141f545SChaoyong He 	node = nfp_vdpa_find_node_by_vdev(vdev);
9950141f545SChaoyong He 	if (node == NULL) {
996*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev);
9970141f545SChaoyong He 		return -ENODEV;
9980141f545SChaoyong He 	}
9990141f545SChaoyong He 
10000141f545SChaoyong He 	device = node->device;
100194fde3a7SXinying Yu 	if (device->hw.sw_fallback_running) {
1002e6ac31e0SXinying Yu 		/* Reset VF */
1003e6ac31e0SXinying Yu 		nfp_vdpa_stop(device, true);
1004e6ac31e0SXinying Yu 
100502fe8366SXinying Yu 		/* Remove interrupt setting */
100602fe8366SXinying Yu 		nfp_vdpa_disable_vfio_intr(device);
100702fe8366SXinying Yu 
100802fe8366SXinying Yu 		/* Unset DMA map for guest memory */
100902fe8366SXinying Yu 		nfp_vdpa_dma_map(device, false);
101002fe8366SXinying Yu 
101194fde3a7SXinying Yu 		device->hw.sw_fallback_running = false;
101294fde3a7SXinying Yu 
101394fde3a7SXinying Yu 		rte_atomic_store_explicit(&device->dev_attached, 0,
101494fde3a7SXinying Yu 				rte_memory_order_relaxed);
101594fde3a7SXinying Yu 		rte_atomic_store_explicit(&device->running, 0,
101694fde3a7SXinying Yu 				rte_memory_order_relaxed);
101794fde3a7SXinying Yu 	} else {
101894fde3a7SXinying Yu 		rte_atomic_store_explicit(&device->dev_attached, 0,
101994fde3a7SXinying Yu 				rte_memory_order_relaxed);
10200141f545SChaoyong He 		update_datapath(device);
102194fde3a7SXinying Yu 	}
10220141f545SChaoyong He 
10230141f545SChaoyong He 	return 0;
10240141f545SChaoyong He }
10250141f545SChaoyong He 
10260141f545SChaoyong He static int
10270141f545SChaoyong He nfp_vdpa_get_vfio_group_fd(int vid)
10280141f545SChaoyong He {
10290141f545SChaoyong He 	struct rte_vdpa_device *vdev;
10300141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
10310141f545SChaoyong He 
10320141f545SChaoyong He 	vdev = rte_vhost_get_vdpa_device(vid);
10330141f545SChaoyong He 	node = nfp_vdpa_find_node_by_vdev(vdev);
10340141f545SChaoyong He 	if (node == NULL) {
1035*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev);
10360141f545SChaoyong He 		return -ENODEV;
10370141f545SChaoyong He 	}
10380141f545SChaoyong He 
10390141f545SChaoyong He 	return node->device->vfio_group_fd;
10400141f545SChaoyong He }
10410141f545SChaoyong He 
10420141f545SChaoyong He static int
10430141f545SChaoyong He nfp_vdpa_get_vfio_device_fd(int vid)
10440141f545SChaoyong He {
10450141f545SChaoyong He 	struct rte_vdpa_device *vdev;
10460141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
10470141f545SChaoyong He 
10480141f545SChaoyong He 	vdev = rte_vhost_get_vdpa_device(vid);
10490141f545SChaoyong He 	node = nfp_vdpa_find_node_by_vdev(vdev);
10500141f545SChaoyong He 	if (node == NULL) {
1051*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev);
10520141f545SChaoyong He 		return -ENODEV;
10530141f545SChaoyong He 	}
10540141f545SChaoyong He 
10550141f545SChaoyong He 	return node->device->vfio_dev_fd;
10560141f545SChaoyong He }
10570141f545SChaoyong He 
10580141f545SChaoyong He static int
10590141f545SChaoyong He nfp_vdpa_get_notify_area(int vid,
10600141f545SChaoyong He 		int qid,
10610141f545SChaoyong He 		uint64_t *offset,
10620141f545SChaoyong He 		uint64_t *size)
10630141f545SChaoyong He {
10640141f545SChaoyong He 	int ret;
10650141f545SChaoyong He 	struct nfp_vdpa_dev *device;
10660141f545SChaoyong He 	struct rte_vdpa_device *vdev;
10670141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
10680141f545SChaoyong He 	struct vfio_region_info region = {
10690141f545SChaoyong He 		.argsz = sizeof(region)
10700141f545SChaoyong He 	};
10710141f545SChaoyong He 
10720141f545SChaoyong He 	vdev = rte_vhost_get_vdpa_device(vid);
10730141f545SChaoyong He 	node = nfp_vdpa_find_node_by_vdev(vdev);
10740141f545SChaoyong He 	if (node == NULL) {
10750141f545SChaoyong He 		DRV_VDPA_LOG(ERR,  "Invalid vDPA device: %p", vdev);
10760141f545SChaoyong He 		return -ENODEV;
10770141f545SChaoyong He 	}
10780141f545SChaoyong He 
10790141f545SChaoyong He 	device = node->device;
10800141f545SChaoyong He 	region.index = device->hw.notify_region;
10810141f545SChaoyong He 
10820141f545SChaoyong He 	ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &region);
10830141f545SChaoyong He 	if (ret != 0) {
10840141f545SChaoyong He 		DRV_VDPA_LOG(ERR, "Get not get device region info.");
10850141f545SChaoyong He 		return -EIO;
10860141f545SChaoyong He 	}
10870141f545SChaoyong He 
10880141f545SChaoyong He 	*offset = nfp_vdpa_get_queue_notify_offset(&device->hw, qid) + region.offset;
10890141f545SChaoyong He 	*size = NFP_VDPA_NOTIFY_ADDR_INTERVAL;
10900141f545SChaoyong He 
10910141f545SChaoyong He 	return 0;
10920141f545SChaoyong He }
10930141f545SChaoyong He 
10940141f545SChaoyong He static int
10950141f545SChaoyong He nfp_vdpa_get_queue_num(struct rte_vdpa_device *vdev,
10960141f545SChaoyong He 		uint32_t *queue_num)
10970141f545SChaoyong He {
10980141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
10990141f545SChaoyong He 
11000141f545SChaoyong He 	node = nfp_vdpa_find_node_by_vdev(vdev);
11010141f545SChaoyong He 	if (node == NULL) {
1102*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev);
11030141f545SChaoyong He 		return -ENODEV;
11040141f545SChaoyong He 	}
11050141f545SChaoyong He 
11060141f545SChaoyong He 	*queue_num = node->device->max_queues;
11070141f545SChaoyong He 
11080141f545SChaoyong He 	return 0;
11090141f545SChaoyong He }
11100141f545SChaoyong He 
11110141f545SChaoyong He static int
11120141f545SChaoyong He nfp_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev,
11130141f545SChaoyong He 		uint64_t *features)
11140141f545SChaoyong He {
11150141f545SChaoyong He 	struct nfp_vdpa_dev_node *node;
11160141f545SChaoyong He 
11170141f545SChaoyong He 	node = nfp_vdpa_find_node_by_vdev(vdev);
11180141f545SChaoyong He 	if (node == NULL) {
11190141f545SChaoyong He 		DRV_VDPA_LOG(ERR,  "Invalid vDPA device: %p", vdev);
11200141f545SChaoyong He 		return -ENODEV;
11210141f545SChaoyong He 	}
11220141f545SChaoyong He 
11230141f545SChaoyong He 	*features = node->device->hw.features;
11240141f545SChaoyong He 
11250141f545SChaoyong He 	return 0;
11260141f545SChaoyong He }
11270141f545SChaoyong He 
11280141f545SChaoyong He static int
11290141f545SChaoyong He nfp_vdpa_get_protocol_features(struct rte_vdpa_device *vdev __rte_unused,
11300141f545SChaoyong He 		uint64_t *features)
11310141f545SChaoyong He {
11320141f545SChaoyong He 	*features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
11330141f545SChaoyong He 			1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |
11340141f545SChaoyong He 			1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ |
11350141f545SChaoyong He 			1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD |
11360141f545SChaoyong He 			1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER;
11370141f545SChaoyong He 
11380141f545SChaoyong He 	return 0;
11390141f545SChaoyong He }
11400141f545SChaoyong He 
11410141f545SChaoyong He static int
11420141f545SChaoyong He nfp_vdpa_set_features(int32_t vid)
11430141f545SChaoyong He {
114494fde3a7SXinying Yu 	int ret;
114594fde3a7SXinying Yu 	uint64_t features = 0;
114694fde3a7SXinying Yu 	struct nfp_vdpa_dev *device;
114794fde3a7SXinying Yu 	struct rte_vdpa_device *vdev;
114894fde3a7SXinying Yu 	struct nfp_vdpa_dev_node *node;
114994fde3a7SXinying Yu 
1150*b6de4353SZerun Fu 	DRV_VDPA_LOG(DEBUG, "Start vid=%d.", vid);
115194fde3a7SXinying Yu 
115294fde3a7SXinying Yu 	vdev = rte_vhost_get_vdpa_device(vid);
115394fde3a7SXinying Yu 	node = nfp_vdpa_find_node_by_vdev(vdev);
115494fde3a7SXinying Yu 	if (node == NULL) {
1155*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Invalid vDPA device: %p.", vdev);
115694fde3a7SXinying Yu 		return -ENODEV;
115794fde3a7SXinying Yu 	}
115894fde3a7SXinying Yu 
115994fde3a7SXinying Yu 	rte_vhost_get_negotiated_features(vid, &features);
116094fde3a7SXinying Yu 
116194fde3a7SXinying Yu 	if (RTE_VHOST_NEED_LOG(features) == 0)
116294fde3a7SXinying Yu 		return 0;
116394fde3a7SXinying Yu 
116494fde3a7SXinying Yu 	device = node->device;
116594fde3a7SXinying Yu 	if (device->hw.sw_lm) {
116694fde3a7SXinying Yu 		ret = nfp_vdpa_sw_fallback(device);
116794fde3a7SXinying Yu 		if (ret != 0) {
1168*b6de4353SZerun Fu 			DRV_VDPA_LOG(ERR, "Software fallback start failed.");
116994fde3a7SXinying Yu 			return -1;
117094fde3a7SXinying Yu 		}
117194fde3a7SXinying Yu 	}
117294fde3a7SXinying Yu 
11730141f545SChaoyong He 	return 0;
11740141f545SChaoyong He }
11750141f545SChaoyong He 
11760141f545SChaoyong He static int
11770141f545SChaoyong He nfp_vdpa_set_vring_state(int vid,
11780141f545SChaoyong He 		int vring,
11790141f545SChaoyong He 		int state)
11800141f545SChaoyong He {
1181*b6de4353SZerun Fu 	DRV_VDPA_LOG(DEBUG, "Start vid=%d, vring=%d, state=%d.", vid, vring, state);
11820141f545SChaoyong He 	return 0;
11830141f545SChaoyong He }
11840141f545SChaoyong He 
11857f11d166SChaoyong He struct rte_vdpa_dev_ops nfp_vdpa_ops = {
11860141f545SChaoyong He 	.get_queue_num = nfp_vdpa_get_queue_num,
11870141f545SChaoyong He 	.get_features = nfp_vdpa_get_vdpa_features,
11880141f545SChaoyong He 	.get_protocol_features = nfp_vdpa_get_protocol_features,
11890141f545SChaoyong He 	.dev_conf = nfp_vdpa_dev_config,
11900141f545SChaoyong He 	.dev_close = nfp_vdpa_dev_close,
11910141f545SChaoyong He 	.set_vring_state = nfp_vdpa_set_vring_state,
11920141f545SChaoyong He 	.set_features = nfp_vdpa_set_features,
11930141f545SChaoyong He 	.get_vfio_group_fd = nfp_vdpa_get_vfio_group_fd,
11940141f545SChaoyong He 	.get_vfio_device_fd = nfp_vdpa_get_vfio_device_fd,
11950141f545SChaoyong He 	.get_notify_area = nfp_vdpa_get_notify_area,
11967f11d166SChaoyong He };
11977f11d166SChaoyong He 
11987f11d166SChaoyong He static int
11997f11d166SChaoyong He nfp_vdpa_pci_probe(struct rte_pci_device *pci_dev)
12007f11d166SChaoyong He {
12017b2a1228SChaoyong He 	int ret;
12027f11d166SChaoyong He 	struct nfp_vdpa_dev *device;
12037f11d166SChaoyong He 	struct nfp_vdpa_dev_node *node;
12047f11d166SChaoyong He 
12057f11d166SChaoyong He 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
12067f11d166SChaoyong He 		return 0;
12077f11d166SChaoyong He 
12087f11d166SChaoyong He 	node = calloc(1, sizeof(*node));
12097f11d166SChaoyong He 	if (node == NULL)
12107f11d166SChaoyong He 		return -ENOMEM;
12117f11d166SChaoyong He 
12127f11d166SChaoyong He 	device = calloc(1, sizeof(*device));
12137f11d166SChaoyong He 	if (device == NULL)
12147f11d166SChaoyong He 		goto free_node;
12157f11d166SChaoyong He 
12167f11d166SChaoyong He 	device->pci_dev = pci_dev;
12177f11d166SChaoyong He 
12187b2a1228SChaoyong He 	ret = nfp_vdpa_vfio_setup(device);
12197b2a1228SChaoyong He 	if (ret != 0)
12207b2a1228SChaoyong He 		goto free_device;
12217b2a1228SChaoyong He 
1222d89f4990SChaoyong He 	ret = nfp_vdpa_hw_init(&device->hw, pci_dev);
1223d89f4990SChaoyong He 	if (ret != 0)
1224d89f4990SChaoyong He 		goto vfio_teardown;
1225d89f4990SChaoyong He 
1226d89f4990SChaoyong He 	device->max_queues = NFP_VDPA_MAX_QUEUES;
1227d89f4990SChaoyong He 
12287f11d166SChaoyong He 	device->vdev = rte_vdpa_register_device(&pci_dev->device, &nfp_vdpa_ops);
12297f11d166SChaoyong He 	if (device->vdev == NULL) {
1230*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Failed to register device %s.", pci_dev->name);
12317b2a1228SChaoyong He 		goto vfio_teardown;
12327f11d166SChaoyong He 	}
12337f11d166SChaoyong He 
12347f11d166SChaoyong He 	node->device = device;
12357f11d166SChaoyong He 	pthread_mutex_lock(&vdpa_list_lock);
12367f11d166SChaoyong He 	TAILQ_INSERT_TAIL(&vdpa_dev_list, node, next);
12377f11d166SChaoyong He 	pthread_mutex_unlock(&vdpa_list_lock);
12387f11d166SChaoyong He 
1239b47a0373SChaoyong He 	rte_spinlock_init(&device->lock);
1240b47a0373SChaoyong He 	rte_atomic_store_explicit(&device->started, 1, rte_memory_order_relaxed);
1241b47a0373SChaoyong He 	update_datapath(device);
1242b47a0373SChaoyong He 
12437f11d166SChaoyong He 	return 0;
12447f11d166SChaoyong He 
12457b2a1228SChaoyong He vfio_teardown:
12467b2a1228SChaoyong He 	nfp_vdpa_vfio_teardown(device);
12477f11d166SChaoyong He free_device:
12487f11d166SChaoyong He 	free(device);
12497f11d166SChaoyong He free_node:
12507f11d166SChaoyong He 	free(node);
12517f11d166SChaoyong He 
12527f11d166SChaoyong He 	return -1;
12537f11d166SChaoyong He }
12547f11d166SChaoyong He 
12557f11d166SChaoyong He static int
12567f11d166SChaoyong He nfp_vdpa_pci_remove(struct rte_pci_device *pci_dev)
12577f11d166SChaoyong He {
12587f11d166SChaoyong He 	struct nfp_vdpa_dev *device;
12597f11d166SChaoyong He 	struct nfp_vdpa_dev_node *node;
12607f11d166SChaoyong He 
12617f11d166SChaoyong He 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
12627f11d166SChaoyong He 		return 0;
12637f11d166SChaoyong He 
12647f11d166SChaoyong He 	node = nfp_vdpa_find_node_by_pdev(pci_dev);
12657f11d166SChaoyong He 	if (node == NULL) {
1266*b6de4353SZerun Fu 		DRV_VDPA_LOG(ERR, "Invalid device: %s.", pci_dev->name);
12677f11d166SChaoyong He 		return -ENODEV;
12687f11d166SChaoyong He 	}
12697f11d166SChaoyong He 
12707f11d166SChaoyong He 	device = node->device;
12717f11d166SChaoyong He 
1272b47a0373SChaoyong He 	rte_atomic_store_explicit(&device->started, 0, rte_memory_order_relaxed);
1273b47a0373SChaoyong He 	update_datapath(device);
1274b47a0373SChaoyong He 
12757f11d166SChaoyong He 	pthread_mutex_lock(&vdpa_list_lock);
12767f11d166SChaoyong He 	TAILQ_REMOVE(&vdpa_dev_list, node, next);
12777f11d166SChaoyong He 	pthread_mutex_unlock(&vdpa_list_lock);
12787f11d166SChaoyong He 
12797f11d166SChaoyong He 	rte_vdpa_unregister_device(device->vdev);
12807b2a1228SChaoyong He 	nfp_vdpa_vfio_teardown(device);
12817f11d166SChaoyong He 
12827f11d166SChaoyong He 	free(device);
12837f11d166SChaoyong He 	free(node);
12847f11d166SChaoyong He 
12857f11d166SChaoyong He 	return 0;
12867f11d166SChaoyong He }
12877f11d166SChaoyong He 
12887f11d166SChaoyong He static const struct rte_pci_id pci_id_nfp_vdpa_map[] = {
12897f11d166SChaoyong He 	{
12907f11d166SChaoyong He 		RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
12917f11d166SChaoyong He 				PCI_DEVICE_ID_NFP6000_VF_NIC)
12927f11d166SChaoyong He 	},
12937f11d166SChaoyong He 	{
12947f11d166SChaoyong He 		.vendor_id = 0,
12957f11d166SChaoyong He 	},
12967f11d166SChaoyong He };
12977f11d166SChaoyong He 
12987f11d166SChaoyong He static struct nfp_class_driver nfp_vdpa = {
12997f11d166SChaoyong He 	.drv_class = NFP_CLASS_VDPA,
13007f11d166SChaoyong He 	.name = RTE_STR(NFP_VDPA_DRIVER_NAME),
13017f11d166SChaoyong He 	.id_table = pci_id_nfp_vdpa_map,
13027f11d166SChaoyong He 	.drv_flags =  RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
13037f11d166SChaoyong He 	.probe = nfp_vdpa_pci_probe,
13047f11d166SChaoyong He 	.remove = nfp_vdpa_pci_remove,
13057f11d166SChaoyong He };
13067f11d166SChaoyong He 
13077f11d166SChaoyong He RTE_INIT(nfp_vdpa_init)
13087f11d166SChaoyong He {
13097f11d166SChaoyong He 	nfp_class_driver_register(&nfp_vdpa);
13107f11d166SChaoyong He }
13117f11d166SChaoyong He 
13127f11d166SChaoyong He RTE_PMD_REGISTER_PCI_TABLE(NFP_VDPA_DRIVER_NAME, pci_id_nfp_vdpa_map);
13137f11d166SChaoyong He RTE_PMD_REGISTER_KMOD_DEP(NFP_VDPA_DRIVER_NAME, "* vfio-pci");
1314