xref: /dpdk/drivers/vdpa/ifc/ifcvf_vdpa.c (revision 2b843cac232eb3f2fa79e4254e21766817e2019f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <unistd.h>
6 #include <pthread.h>
7 #include <fcntl.h>
8 #include <string.h>
9 #include <sys/ioctl.h>
10 #include <sys/epoll.h>
11 #include <linux/virtio_net.h>
12 #include <stdbool.h>
13 
14 #include <rte_eal_paging.h>
15 #include <rte_malloc.h>
16 #include <rte_memory.h>
17 #include <bus_pci_driver.h>
18 #include <rte_vhost.h>
19 #include <rte_vdpa.h>
20 #include <vdpa_driver.h>
21 #include <rte_vfio.h>
22 #include <rte_spinlock.h>
23 #include <rte_log.h>
24 #include <rte_kvargs.h>
25 #include <rte_devargs.h>
26 
27 #include "base/ifcvf.h"
28 
29 /*
30  * RTE_MIN() cannot be used since braced-group within expression allowed
31  * only inside a function.
32  */
33 #define MIN(v1, v2)	((v1) < (v2) ? (v1) : (v2))
34 
35 RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
36 #define RTE_LOGTYPE_IFCVF_VDPA ifcvf_vdpa_logtype
37 #define DRV_LOG(level, ...) \
38 	RTE_LOG_LINE_PREFIX(level, IFCVF_VDPA, "%s(): ", __func__, __VA_ARGS__)
39 
40 #define IFCVF_USED_RING_LEN(size) \
41 	((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3)
42 
43 #define IFCVF_VDPA_MODE		"vdpa"
44 #define IFCVF_SW_FALLBACK_LM	"sw-live-migration"
45 
46 #define THREAD_NAME_LEN	16
47 
48 static const char * const ifcvf_valid_arguments[] = {
49 	IFCVF_VDPA_MODE,
50 	IFCVF_SW_FALLBACK_LM,
51 	NULL
52 };
53 
54 struct ifcvf_internal {
55 	struct rte_pci_device *pdev;
56 	struct ifcvf_hw hw;
57 	int configured;
58 	int vfio_container_fd;
59 	int vfio_group_fd;
60 	int vfio_dev_fd;
61 	rte_thread_t tid; /* thread for notify relay */
62 	rte_thread_t intr_tid; /* thread for config space change interrupt relay */
63 	int epfd;
64 	int csc_epfd;
65 	int vid;
66 	struct rte_vdpa_device *vdev;
67 	uint16_t max_queues;
68 	uint64_t features;
69 	rte_atomic32_t started;
70 	rte_atomic32_t dev_attached;
71 	rte_atomic32_t running;
72 	rte_spinlock_t lock;
73 	bool sw_lm;
74 	bool sw_fallback_running;
75 	/* mediated vring for sw fallback */
76 	struct vring m_vring[IFCVF_MAX_QUEUES * 2];
77 	/* eventfd for used ring interrupt */
78 	int intr_fd[IFCVF_MAX_QUEUES * 2];
79 };
80 
81 struct internal_list {
82 	TAILQ_ENTRY(internal_list) next;
83 	struct ifcvf_internal *internal;
84 };
85 
86 /* vdpa device info includes device features and devcic operation. */
87 struct rte_vdpa_dev_info {
88 	uint64_t features;
89 	struct rte_vdpa_dev_ops *ops;
90 };
91 
92 TAILQ_HEAD(internal_list_head, internal_list);
93 static struct internal_list_head internal_list =
94 	TAILQ_HEAD_INITIALIZER(internal_list);
95 
96 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
97 
98 static void update_used_ring(struct ifcvf_internal *internal, uint16_t qid);
99 
100 static struct internal_list *
101 find_internal_resource_by_vdev(struct rte_vdpa_device *vdev)
102 {
103 	int found = 0;
104 	struct internal_list *list;
105 
106 	pthread_mutex_lock(&internal_list_lock);
107 
108 	TAILQ_FOREACH(list, &internal_list, next) {
109 		if (vdev == list->internal->vdev) {
110 			found = 1;
111 			break;
112 		}
113 	}
114 
115 	pthread_mutex_unlock(&internal_list_lock);
116 
117 	if (!found)
118 		return NULL;
119 
120 	return list;
121 }
122 
123 static struct internal_list *
124 find_internal_resource_by_pci_dev(struct rte_pci_device *pdev)
125 {
126 	int found = 0;
127 	struct internal_list *list;
128 
129 	pthread_mutex_lock(&internal_list_lock);
130 
131 	TAILQ_FOREACH(list, &internal_list, next) {
132 		if (!rte_pci_addr_cmp(&pdev->addr,
133 					&list->internal->pdev->addr)) {
134 			found = 1;
135 			break;
136 		}
137 	}
138 
139 	pthread_mutex_unlock(&internal_list_lock);
140 
141 	if (!found)
142 		return NULL;
143 
144 	return list;
145 }
146 
147 static struct internal_list *
148 find_internal_resource_by_rte_dev(struct rte_device *rte_dev)
149 {
150 	int found = 0;
151 	struct internal_list *list;
152 
153 	pthread_mutex_lock(&internal_list_lock);
154 
155 	TAILQ_FOREACH(list, &internal_list, next) {
156 		if (rte_dev == &list->internal->pdev->device) {
157 			found = 1;
158 			break;
159 		}
160 	}
161 
162 	pthread_mutex_unlock(&internal_list_lock);
163 
164 	if (!found)
165 		return NULL;
166 
167 	return list;
168 }
169 
170 static int
171 ifcvf_vfio_setup(struct ifcvf_internal *internal)
172 {
173 	struct rte_pci_device *dev = internal->pdev;
174 	char devname[RTE_DEV_NAME_MAX_LEN] = {0};
175 	int iommu_group_num;
176 	int i, ret;
177 
178 	internal->vfio_dev_fd = -1;
179 	internal->vfio_group_fd = -1;
180 	internal->vfio_container_fd = -1;
181 
182 	rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN);
183 	ret = rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname,
184 			&iommu_group_num);
185 	if (ret <= 0) {
186 		DRV_LOG(ERR, "%s failed to get IOMMU group", devname);
187 		return -1;
188 	}
189 
190 	internal->vfio_container_fd = rte_vfio_container_create();
191 	if (internal->vfio_container_fd < 0)
192 		return -1;
193 
194 	internal->vfio_group_fd = rte_vfio_container_group_bind(
195 			internal->vfio_container_fd, iommu_group_num);
196 	if (internal->vfio_group_fd < 0)
197 		goto err;
198 
199 	if (rte_pci_map_device(dev))
200 		goto err;
201 
202 	internal->vfio_dev_fd = rte_intr_dev_fd_get(dev->intr_handle);
203 
204 	for (i = 0; i < RTE_MIN(PCI_MAX_RESOURCE, IFCVF_PCI_MAX_RESOURCE);
205 			i++) {
206 		internal->hw.mem_resource[i].addr =
207 			internal->pdev->mem_resource[i].addr;
208 		internal->hw.mem_resource[i].phys_addr =
209 			internal->pdev->mem_resource[i].phys_addr;
210 		internal->hw.mem_resource[i].len =
211 			internal->pdev->mem_resource[i].len;
212 	}
213 
214 	return 0;
215 
216 err:
217 	rte_vfio_container_destroy(internal->vfio_container_fd);
218 	return -1;
219 }
220 
221 static int
222 ifcvf_dma_map(struct ifcvf_internal *internal, bool do_map)
223 {
224 	uint32_t i;
225 	int ret;
226 	struct rte_vhost_memory *mem = NULL;
227 	int vfio_container_fd;
228 
229 	ret = rte_vhost_get_mem_table(internal->vid, &mem);
230 	if (ret < 0) {
231 		DRV_LOG(ERR, "failed to get VM memory layout.");
232 		goto exit;
233 	}
234 
235 	vfio_container_fd = internal->vfio_container_fd;
236 
237 	for (i = 0; i < mem->nregions; i++) {
238 		struct rte_vhost_mem_region *reg;
239 
240 		reg = &mem->regions[i];
241 		DRV_LOG(INFO, "%s, region %u: HVA 0x%" PRIx64 ", "
242 			"GPA 0x%" PRIx64 ", size 0x%" PRIx64 ".",
243 			do_map ? "DMA map" : "DMA unmap", i,
244 			reg->host_user_addr, reg->guest_phys_addr, reg->size);
245 
246 		if (do_map) {
247 			ret = rte_vfio_container_dma_map(vfio_container_fd,
248 				reg->host_user_addr, reg->guest_phys_addr,
249 				reg->size);
250 			if (ret < 0) {
251 				DRV_LOG(ERR, "DMA map failed.");
252 				goto exit;
253 			}
254 		} else {
255 			ret = rte_vfio_container_dma_unmap(vfio_container_fd,
256 				reg->host_user_addr, reg->guest_phys_addr,
257 				reg->size);
258 			if (ret < 0) {
259 				DRV_LOG(ERR, "DMA unmap failed.");
260 				goto exit;
261 			}
262 		}
263 	}
264 
265 exit:
266 	free(mem);
267 	return ret;
268 }
269 
270 static uint64_t
271 hva_to_gpa(int vid, uint64_t hva)
272 {
273 	struct rte_vhost_memory *mem = NULL;
274 	struct rte_vhost_mem_region *reg;
275 	uint32_t i;
276 	uint64_t gpa = 0;
277 
278 	if (rte_vhost_get_mem_table(vid, &mem) < 0)
279 		goto exit;
280 
281 	for (i = 0; i < mem->nregions; i++) {
282 		reg = &mem->regions[i];
283 
284 		if (hva >= reg->host_user_addr &&
285 				hva < reg->host_user_addr + reg->size) {
286 			gpa = hva - reg->host_user_addr + reg->guest_phys_addr;
287 			break;
288 		}
289 	}
290 
291 exit:
292 	free(mem);
293 	return gpa;
294 }
295 
296 static int
297 vdpa_ifcvf_start(struct ifcvf_internal *internal)
298 {
299 	struct ifcvf_hw *hw = &internal->hw;
300 	int i, nr_vring;
301 	int vid;
302 	struct rte_vhost_vring vq;
303 	uint64_t gpa;
304 
305 	vid = internal->vid;
306 	nr_vring = rte_vhost_get_vring_num(vid);
307 	rte_vhost_get_negotiated_features(vid, &hw->req_features);
308 
309 	for (i = 0; i < nr_vring; i++) {
310 		if (!hw->vring[i].enable)
311 			continue;
312 		rte_vhost_get_vhost_vring(vid, i, &vq);
313 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
314 		if (gpa == 0) {
315 			DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
316 			return -1;
317 		}
318 		hw->vring[i].desc = gpa;
319 
320 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.avail);
321 		if (gpa == 0) {
322 			DRV_LOG(ERR, "Fail to get GPA for available ring.");
323 			return -1;
324 		}
325 		hw->vring[i].avail = gpa;
326 
327 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
328 		if (gpa == 0) {
329 			DRV_LOG(ERR, "Fail to get GPA for used ring.");
330 			return -1;
331 		}
332 		hw->vring[i].used = gpa;
333 
334 		hw->vring[i].size = vq.size;
335 		rte_vhost_get_vring_base(vid, i, &hw->vring[i].last_avail_idx,
336 				&hw->vring[i].last_used_idx);
337 	}
338 	hw->nr_vring = i;
339 
340 	return ifcvf_start_hw(&internal->hw);
341 }
342 
343 static void
344 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
345 {
346 	struct ifcvf_hw *hw = &internal->hw;
347 	uint32_t i;
348 	int vid;
349 	uint64_t features = 0;
350 	uint64_t log_base = 0, log_size = 0;
351 	uint64_t len;
352 	u32 ring_state = 0;
353 
354 	vid = internal->vid;
355 
356 	/* to make sure no packet is lost for blk device
357 	 * do not stop until last_avail_idx == last_used_idx
358 	 */
359 	if (internal->hw.device_type == IFCVF_BLK) {
360 		for (i = 0; i < hw->nr_vring; i++) {
361 			do {
362 				if (hw->lm_cfg != NULL)
363 					ring_state = *(u32 *)(hw->lm_cfg +
364 						IFCVF_LM_RING_STATE_OFFSET +
365 						i * IFCVF_LM_CFG_SIZE);
366 				hw->vring[i].last_avail_idx =
367 					(u16)(ring_state & IFCVF_16_BIT_MASK);
368 				hw->vring[i].last_used_idx =
369 					(u16)(ring_state >> 16);
370 				usleep(10);
371 			} while (hw->vring[i].last_avail_idx !=
372 				hw->vring[i].last_used_idx);
373 		}
374 	}
375 
376 	ifcvf_stop_hw(hw);
377 
378 	for (i = 0; i < hw->nr_vring; i++)
379 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
380 				hw->vring[i].last_used_idx);
381 
382 	if (internal->sw_lm)
383 		return;
384 
385 	rte_vhost_get_negotiated_features(vid, &features);
386 	if (RTE_VHOST_NEED_LOG(features)) {
387 		ifcvf_disable_logging(hw);
388 		rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
389 		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
390 				log_base, IFCVF_LOG_BASE, log_size);
391 		/*
392 		 * IFCVF marks dirty memory pages for only packet buffer,
393 		 * SW helps to mark the used ring as dirty after device stops.
394 		 */
395 		for (i = 0; i < hw->nr_vring; i++) {
396 			len = IFCVF_USED_RING_LEN(hw->vring[i].size);
397 			rte_vhost_log_used_vring(vid, i, 0, len);
398 		}
399 	}
400 }
401 
402 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
403 		sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
404 static int
405 vdpa_enable_vfio_intr(struct ifcvf_internal *internal, bool m_rx)
406 {
407 	int ret;
408 	uint32_t i, nr_vring;
409 	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
410 	struct vfio_irq_set *irq_set;
411 	int *fd_ptr;
412 	struct rte_vhost_vring vring;
413 	int fd;
414 
415 	vring.callfd = -1;
416 
417 	nr_vring = rte_vhost_get_vring_num(internal->vid);
418 	if (nr_vring > IFCVF_MAX_QUEUES * 2)
419 		return -1;
420 
421 	irq_set = (struct vfio_irq_set *)irq_set_buf;
422 	irq_set->argsz = sizeof(irq_set_buf);
423 	irq_set->count = nr_vring + 1;
424 	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
425 			 VFIO_IRQ_SET_ACTION_TRIGGER;
426 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
427 	irq_set->start = 0;
428 	fd_ptr = (int *)&irq_set->data;
429 	/* The first interrupt is for the configure space change notification */
430 	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
431 		rte_intr_fd_get(internal->pdev->intr_handle);
432 
433 	for (i = 0; i < nr_vring; i++)
434 		internal->intr_fd[i] = -1;
435 
436 	for (i = 0; i < nr_vring; i++) {
437 		rte_vhost_get_vhost_vring(internal->vid, i, &vring);
438 		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
439 		if (m_rx == true &&
440 			((i & 1) == 0 || internal->hw.device_type == IFCVF_BLK)) {
441 			/* For the net we only need to relay rx queue,
442 			 * which will change the mem of VM.
443 			 * For the blk we need to relay all the read cmd
444 			 * of each queue
445 			 */
446 			fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
447 			if (fd < 0) {
448 				DRV_LOG(ERR, "can't setup eventfd: %s",
449 					strerror(errno));
450 				return -1;
451 			}
452 			internal->intr_fd[i] = fd;
453 			fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
454 		}
455 	}
456 
457 	ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
458 	if (ret) {
459 		DRV_LOG(ERR, "Error enabling MSI-X interrupts: %s",
460 				strerror(errno));
461 		return -1;
462 	}
463 
464 	return 0;
465 }
466 
467 static int
468 vdpa_disable_vfio_intr(struct ifcvf_internal *internal)
469 {
470 	int ret;
471 	uint32_t i, nr_vring;
472 	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
473 	struct vfio_irq_set *irq_set;
474 
475 	irq_set = (struct vfio_irq_set *)irq_set_buf;
476 	irq_set->argsz = sizeof(irq_set_buf);
477 	irq_set->count = 0;
478 	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
479 	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
480 	irq_set->start = 0;
481 
482 	nr_vring = rte_vhost_get_vring_num(internal->vid);
483 	for (i = 0; i < nr_vring; i++) {
484 		if (internal->intr_fd[i] >= 0)
485 			close(internal->intr_fd[i]);
486 		internal->intr_fd[i] = -1;
487 	}
488 
489 	ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
490 	if (ret) {
491 		DRV_LOG(ERR, "Error disabling MSI-X interrupts: %s",
492 				strerror(errno));
493 		return -1;
494 	}
495 
496 	return 0;
497 }
498 
499 static uint32_t
500 notify_relay(void *arg)
501 {
502 	int i, kickfd, epfd, nfds = 0;
503 	uint32_t qid, q_num;
504 	struct epoll_event events[IFCVF_MAX_QUEUES * 2];
505 	struct epoll_event ev;
506 	uint64_t buf;
507 	int nbytes;
508 	struct rte_vhost_vring vring;
509 	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
510 	struct ifcvf_hw *hw = &internal->hw;
511 
512 	q_num = rte_vhost_get_vring_num(internal->vid);
513 
514 	epfd = epoll_create(IFCVF_MAX_QUEUES * 2);
515 	if (epfd < 0) {
516 		DRV_LOG(ERR, "failed to create epoll instance.");
517 		return 1;
518 	}
519 	internal->epfd = epfd;
520 
521 	vring.kickfd = -1;
522 	for (qid = 0; qid < q_num; qid++) {
523 		if (!hw->vring[qid].enable)
524 			continue;
525 		ev.events = EPOLLIN | EPOLLPRI;
526 		rte_vhost_get_vhost_vring(internal->vid, qid, &vring);
527 		ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
528 		if (epoll_ctl(epfd, EPOLL_CTL_ADD, vring.kickfd, &ev) < 0) {
529 			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
530 			return 1;
531 		}
532 	}
533 
534 	for (;;) {
535 		nfds = epoll_wait(epfd, events, q_num, -1);
536 		if (nfds < 0) {
537 			if (errno == EINTR)
538 				continue;
539 			DRV_LOG(ERR, "epoll_wait return fail");
540 			return 1;
541 		}
542 
543 		for (i = 0; i < nfds; i++) {
544 			qid = events[i].data.u32;
545 			kickfd = (uint32_t)(events[i].data.u64 >> 32);
546 			do {
547 				nbytes = read(kickfd, &buf, 8);
548 				if (nbytes < 0) {
549 					if (errno == EINTR ||
550 					    errno == EWOULDBLOCK ||
551 					    errno == EAGAIN)
552 						continue;
553 					DRV_LOG(INFO, "Error reading "
554 						"kickfd: %s",
555 						strerror(errno));
556 				}
557 				break;
558 			} while (1);
559 
560 			ifcvf_notify_queue(hw, qid);
561 		}
562 	}
563 
564 	return 0;
565 }
566 
567 static int
568 setup_notify_relay(struct ifcvf_internal *internal)
569 {
570 	char name[RTE_THREAD_INTERNAL_NAME_SIZE];
571 	int ret;
572 
573 	snprintf(name, sizeof(name), "ifc-noti%d", internal->vid);
574 	ret = rte_thread_create_internal_control(&internal->tid, name,
575 			notify_relay, internal);
576 	if (ret != 0) {
577 		DRV_LOG(ERR, "failed to create notify relay pthread.");
578 		return -1;
579 	}
580 
581 	return 0;
582 }
583 
584 static int
585 unset_notify_relay(struct ifcvf_internal *internal)
586 {
587 	if (internal->tid.opaque_id != 0) {
588 		pthread_cancel((pthread_t)internal->tid.opaque_id);
589 		rte_thread_join(internal->tid, NULL);
590 	}
591 	internal->tid.opaque_id = 0;
592 
593 	if (internal->epfd >= 0)
594 		close(internal->epfd);
595 	internal->epfd = -1;
596 
597 	return 0;
598 }
599 
600 static void
601 virtio_interrupt_handler(struct ifcvf_internal *internal)
602 {
603 	int vid = internal->vid;
604 	int ret;
605 
606 	ret = rte_vhost_backend_config_change(vid, 1);
607 	if (ret)
608 		DRV_LOG(ERR, "failed to notify the guest about configuration space change.");
609 }
610 
611 static uint32_t
612 intr_relay(void *arg)
613 {
614 	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
615 	struct epoll_event csc_event;
616 	struct epoll_event ev;
617 	uint64_t buf;
618 	int nbytes;
619 	int csc_epfd, csc_val = 0;
620 
621 	csc_epfd = epoll_create(1);
622 	if (csc_epfd < 0) {
623 		DRV_LOG(ERR, "failed to create epoll for config space change.");
624 		return 1;
625 	}
626 
627 	ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
628 	ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
629 	if (epoll_ctl(csc_epfd, EPOLL_CTL_ADD,
630 		rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
631 		DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
632 		goto out;
633 	}
634 
635 	internal->csc_epfd = csc_epfd;
636 
637 	for (;;) {
638 		csc_val = epoll_wait(csc_epfd, &csc_event, 1, -1);
639 		if (csc_val < 0) {
640 			if (errno == EINTR)
641 				continue;
642 			DRV_LOG(ERR, "epoll_wait return fail.");
643 			goto out;
644 		} else if (csc_val == 0) {
645 			continue;
646 		} else {
647 			/* csc_val > 0 */
648 			nbytes = read(csc_event.data.fd, &buf, 8);
649 			if (nbytes < 0) {
650 				if (errno == EINTR ||
651 				    errno == EWOULDBLOCK ||
652 				    errno == EAGAIN)
653 					continue;
654 				DRV_LOG(ERR, "Error reading from file descriptor %d: %s",
655 					csc_event.data.fd,
656 					strerror(errno));
657 				goto out;
658 			} else if (nbytes == 0) {
659 				DRV_LOG(ERR, "Read nothing from file descriptor %d",
660 					csc_event.data.fd);
661 				continue;
662 			} else {
663 				virtio_interrupt_handler(internal);
664 			}
665 		}
666 	}
667 
668 out:
669 	if (csc_epfd >= 0)
670 		close(csc_epfd);
671 	internal->csc_epfd = -1;
672 
673 	return 0;
674 }
675 
676 static int
677 setup_intr_relay(struct ifcvf_internal *internal)
678 {
679 	char name[RTE_THREAD_INTERNAL_NAME_SIZE];
680 	int ret;
681 
682 	snprintf(name, sizeof(name), "ifc-int%d", internal->vid);
683 	ret = rte_thread_create_internal_control(&internal->intr_tid, name,
684 			intr_relay, (void *)internal);
685 	if (ret) {
686 		DRV_LOG(ERR, "failed to create notify relay pthread.");
687 		return -1;
688 	}
689 	return 0;
690 }
691 
692 static void
693 unset_intr_relay(struct ifcvf_internal *internal)
694 {
695 	if (internal->intr_tid.opaque_id != 0) {
696 		pthread_cancel((pthread_t)internal->intr_tid.opaque_id);
697 		rte_thread_join(internal->intr_tid, NULL);
698 	}
699 	internal->intr_tid.opaque_id = 0;
700 
701 	if (internal->csc_epfd >= 0)
702 		close(internal->csc_epfd);
703 	internal->csc_epfd = -1;
704 }
705 
706 static int
707 update_datapath(struct ifcvf_internal *internal)
708 {
709 	int ret;
710 
711 	rte_spinlock_lock(&internal->lock);
712 
713 	if (!rte_atomic32_read(&internal->running) &&
714 	    (rte_atomic32_read(&internal->started) &&
715 	     rte_atomic32_read(&internal->dev_attached))) {
716 		ret = ifcvf_dma_map(internal, true);
717 		if (ret)
718 			goto err;
719 
720 		ret = vdpa_enable_vfio_intr(internal, false);
721 		if (ret)
722 			goto err;
723 
724 		ret = vdpa_ifcvf_start(internal);
725 		if (ret)
726 			goto err;
727 
728 		ret = setup_notify_relay(internal);
729 		if (ret)
730 			goto err;
731 
732 		ret = setup_intr_relay(internal);
733 		if (ret)
734 			goto err;
735 
736 		rte_atomic32_set(&internal->running, 1);
737 	} else if (rte_atomic32_read(&internal->running) &&
738 		   (!rte_atomic32_read(&internal->started) ||
739 		    !rte_atomic32_read(&internal->dev_attached))) {
740 		unset_intr_relay(internal);
741 
742 		ret = unset_notify_relay(internal);
743 		if (ret)
744 			goto err;
745 
746 		vdpa_ifcvf_stop(internal);
747 
748 		ret = vdpa_disable_vfio_intr(internal);
749 		if (ret)
750 			goto err;
751 
752 		ret = ifcvf_dma_map(internal, false);
753 		if (ret)
754 			goto err;
755 
756 		rte_atomic32_set(&internal->running, 0);
757 	}
758 
759 	rte_spinlock_unlock(&internal->lock);
760 	return 0;
761 err:
762 	rte_spinlock_unlock(&internal->lock);
763 	return ret;
764 }
765 
766 static int
767 m_ifcvf_start(struct ifcvf_internal *internal)
768 {
769 	struct ifcvf_hw *hw = &internal->hw;
770 	uint32_t i, nr_vring;
771 	int vid, ret;
772 	struct rte_vhost_vring vq;
773 	void *vring_buf;
774 	uint64_t m_vring_iova = IFCVF_MEDIATED_VRING;
775 	uint64_t size;
776 	uint64_t gpa;
777 
778 	memset(&vq, 0, sizeof(vq));
779 	vid = internal->vid;
780 	nr_vring = rte_vhost_get_vring_num(vid);
781 	rte_vhost_get_negotiated_features(vid, &hw->req_features);
782 
783 	for (i = 0; i < nr_vring; i++) {
784 		rte_vhost_get_vhost_vring(vid, i, &vq);
785 
786 		size = RTE_ALIGN_CEIL(vring_size(vq.size, rte_mem_page_size()),
787 				rte_mem_page_size());
788 		vring_buf = rte_zmalloc("ifcvf", size, rte_mem_page_size());
789 		vring_init(&internal->m_vring[i], vq.size, vring_buf,
790 				rte_mem_page_size());
791 
792 		ret = rte_vfio_container_dma_map(internal->vfio_container_fd,
793 			(uint64_t)(uintptr_t)vring_buf, m_vring_iova, size);
794 		if (ret < 0) {
795 			DRV_LOG(ERR, "mediated vring DMA map failed.");
796 			goto error;
797 		}
798 
799 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
800 		if (gpa == 0) {
801 			DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
802 			return -1;
803 		}
804 		hw->vring[i].desc = gpa;
805 
806 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.avail);
807 		if (gpa == 0) {
808 			DRV_LOG(ERR, "Fail to get GPA for available ring.");
809 			return -1;
810 		}
811 		hw->vring[i].avail = gpa;
812 
813 		/* NET: Direct I/O for Tx queue, relay for Rx queue
814 		 * BLK: relay every queue
815 		 */
816 		if ((internal->hw.device_type == IFCVF_NET) && (i & 1)) {
817 			gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
818 			if (gpa == 0) {
819 				DRV_LOG(ERR, "Fail to get GPA for used ring.");
820 				return -1;
821 			}
822 			hw->vring[i].used = gpa;
823 		} else {
824 			hw->vring[i].used = m_vring_iova +
825 				(char *)internal->m_vring[i].used -
826 				(char *)internal->m_vring[i].desc;
827 		}
828 
829 		hw->vring[i].size = vq.size;
830 
831 		rte_vhost_get_vring_base(vid, i,
832 				&internal->m_vring[i].avail->idx,
833 				&internal->m_vring[i].used->idx);
834 
835 		rte_vhost_get_vring_base(vid, i, &hw->vring[i].last_avail_idx,
836 				&hw->vring[i].last_used_idx);
837 
838 		m_vring_iova += size;
839 	}
840 	hw->nr_vring = nr_vring;
841 
842 	return ifcvf_start_hw(&internal->hw);
843 
844 error:
845 	for (i = 0; i < nr_vring; i++)
846 		rte_free(internal->m_vring[i].desc);
847 
848 	return -1;
849 }
850 
851 static int
852 m_ifcvf_stop(struct ifcvf_internal *internal)
853 {
854 	int vid;
855 	uint32_t i;
856 	struct rte_vhost_vring vq;
857 	struct ifcvf_hw *hw = &internal->hw;
858 	uint64_t m_vring_iova = IFCVF_MEDIATED_VRING;
859 	uint64_t size, len;
860 	u32 ring_state = 0;
861 
862 	vid = internal->vid;
863 
864 	/* to make sure no packet is lost for blk device
865 	 * do not stop until last_avail_idx == last_used_idx
866 	 */
867 	if (internal->hw.device_type == IFCVF_BLK) {
868 		for (i = 0; i < hw->nr_vring; i++) {
869 			do {
870 				if (hw->lm_cfg != NULL)
871 					ring_state = *(u32 *)(hw->lm_cfg +
872 						IFCVF_LM_RING_STATE_OFFSET +
873 						i * IFCVF_LM_CFG_SIZE);
874 				hw->vring[i].last_avail_idx =
875 					(u16)(ring_state & IFCVF_16_BIT_MASK);
876 				hw->vring[i].last_used_idx =
877 					(u16)(ring_state >> 16);
878 				usleep(10);
879 			} while (hw->vring[i].last_avail_idx !=
880 				hw->vring[i].last_used_idx);
881 		}
882 	}
883 
884 	ifcvf_stop_hw(hw);
885 
886 	for (i = 0; i < hw->nr_vring; i++) {
887 		/* synchronize remaining new used entries if any */
888 		if (internal->hw.device_type == IFCVF_NET) {
889 			if ((i & 1) == 0)
890 				update_used_ring(internal, i);
891 		} else if (internal->hw.device_type == IFCVF_BLK) {
892 			update_used_ring(internal, i);
893 		}
894 
895 		rte_vhost_get_vhost_vring(vid, i, &vq);
896 		len = IFCVF_USED_RING_LEN(vq.size);
897 		rte_vhost_log_used_vring(vid, i, 0, len);
898 
899 		size = RTE_ALIGN_CEIL(vring_size(vq.size, rte_mem_page_size()),
900 				rte_mem_page_size());
901 		rte_vfio_container_dma_unmap(internal->vfio_container_fd,
902 			(uint64_t)(uintptr_t)internal->m_vring[i].desc,
903 			m_vring_iova, size);
904 
905 		rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
906 				hw->vring[i].last_used_idx);
907 		rte_free(internal->m_vring[i].desc);
908 		m_vring_iova += size;
909 	}
910 
911 	return 0;
912 }
913 
914 static void
915 update_used_ring(struct ifcvf_internal *internal, uint16_t qid)
916 {
917 	rte_vdpa_relay_vring_used(internal->vid, qid, &internal->m_vring[qid]);
918 	rte_vhost_vring_call(internal->vid, qid);
919 }
920 
921 static uint32_t
922 vring_relay(void *arg)
923 {
924 	int i, vid, epfd, fd, nfds;
925 	struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
926 	struct rte_vhost_vring vring;
927 	uint16_t qid, q_num;
928 	struct epoll_event events[IFCVF_MAX_QUEUES * 4];
929 	struct epoll_event ev;
930 	int nbytes;
931 	uint64_t buf;
932 
933 	vid = internal->vid;
934 	q_num = rte_vhost_get_vring_num(vid);
935 
936 	/* add notify fd and interrupt fd to epoll */
937 	epfd = epoll_create(IFCVF_MAX_QUEUES * 2);
938 	if (epfd < 0) {
939 		DRV_LOG(ERR, "failed to create epoll instance.");
940 		return 1;
941 	}
942 	internal->epfd = epfd;
943 
944 	vring.kickfd = -1;
945 	for (qid = 0; qid < q_num; qid++) {
946 		ev.events = EPOLLIN | EPOLLPRI;
947 		rte_vhost_get_vhost_vring(vid, qid, &vring);
948 		ev.data.u64 = qid << 1 | (uint64_t)vring.kickfd << 32;
949 		if (epoll_ctl(epfd, EPOLL_CTL_ADD, vring.kickfd, &ev) < 0) {
950 			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
951 			return 1;
952 		}
953 	}
954 
955 	for (qid = 0; qid < q_num; qid += 1) {
956 		if ((internal->hw.device_type == IFCVF_NET) && (qid & 1))
957 			continue;
958 		ev.events = EPOLLIN | EPOLLPRI;
959 		/* leave a flag to mark it's for interrupt */
960 		ev.data.u64 = 1 | qid << 1 |
961 			(uint64_t)internal->intr_fd[qid] << 32;
962 		if (epoll_ctl(epfd, EPOLL_CTL_ADD, internal->intr_fd[qid], &ev)
963 				< 0) {
964 			DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
965 			return 1;
966 		}
967 		update_used_ring(internal, qid);
968 	}
969 
970 	/* start relay with a first kick */
971 	for (qid = 0; qid < q_num; qid++)
972 		ifcvf_notify_queue(&internal->hw, qid);
973 
974 	/* listen to the events and react accordingly */
975 	for (;;) {
976 		nfds = epoll_wait(epfd, events, q_num * 2, -1);
977 		if (nfds < 0) {
978 			if (errno == EINTR)
979 				continue;
980 			DRV_LOG(ERR, "epoll_wait return fail.");
981 			return 1;
982 		}
983 
984 		for (i = 0; i < nfds; i++) {
985 			fd = (uint32_t)(events[i].data.u64 >> 32);
986 			do {
987 				nbytes = read(fd, &buf, 8);
988 				if (nbytes < 0) {
989 					if (errno == EINTR ||
990 					    errno == EWOULDBLOCK ||
991 					    errno == EAGAIN)
992 						continue;
993 					DRV_LOG(INFO, "Error reading "
994 						"kickfd: %s",
995 						strerror(errno));
996 				}
997 				break;
998 			} while (1);
999 
1000 			qid = events[i].data.u32 >> 1;
1001 
1002 			if (events[i].data.u32 & 1)
1003 				update_used_ring(internal, qid);
1004 			else
1005 				ifcvf_notify_queue(&internal->hw, qid);
1006 		}
1007 	}
1008 
1009 	return 0;
1010 }
1011 
1012 static int
1013 setup_vring_relay(struct ifcvf_internal *internal)
1014 {
1015 	char name[RTE_THREAD_INTERNAL_NAME_SIZE];
1016 	int ret;
1017 
1018 	snprintf(name, sizeof(name), "ifc-ring%d", internal->vid);
1019 	ret = rte_thread_create_internal_control(&internal->tid, name,
1020 			vring_relay, internal);
1021 	if (ret != 0) {
1022 		DRV_LOG(ERR, "failed to create ring relay pthread.");
1023 		return -1;
1024 	}
1025 
1026 	return 0;
1027 }
1028 
1029 static int
1030 unset_vring_relay(struct ifcvf_internal *internal)
1031 {
1032 	if (internal->tid.opaque_id != 0) {
1033 		pthread_cancel((pthread_t)internal->tid.opaque_id);
1034 		rte_thread_join(internal->tid, NULL);
1035 	}
1036 	internal->tid.opaque_id = 0;
1037 
1038 	if (internal->epfd >= 0)
1039 		close(internal->epfd);
1040 	internal->epfd = -1;
1041 
1042 	return 0;
1043 }
1044 
1045 static int
1046 ifcvf_sw_fallback_switchover(struct ifcvf_internal *internal)
1047 {
1048 	int ret;
1049 	int vid = internal->vid;
1050 
1051 	/* stop the direct IO data path */
1052 	unset_notify_relay(internal);
1053 	vdpa_ifcvf_stop(internal);
1054 
1055 	unset_intr_relay(internal);
1056 
1057 	vdpa_disable_vfio_intr(internal);
1058 
1059 	rte_atomic32_set(&internal->running, 0);
1060 
1061 	ret = rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, false);
1062 	if (ret && ret != -ENOTSUP)
1063 		goto error;
1064 
1065 	/* set up interrupt for interrupt relay */
1066 	ret = vdpa_enable_vfio_intr(internal, true);
1067 	if (ret)
1068 		goto unmap;
1069 
1070 	/* config the VF */
1071 	ret = m_ifcvf_start(internal);
1072 	if (ret)
1073 		goto unset_intr;
1074 
1075 	/* set up vring relay thread */
1076 	ret = setup_vring_relay(internal);
1077 	if (ret)
1078 		goto stop_vf;
1079 
1080 	rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true);
1081 
1082 	internal->sw_fallback_running = true;
1083 
1084 	return 0;
1085 
1086 stop_vf:
1087 	m_ifcvf_stop(internal);
1088 unset_intr:
1089 	vdpa_disable_vfio_intr(internal);
1090 unmap:
1091 	ifcvf_dma_map(internal, false);
1092 error:
1093 	return -1;
1094 }
1095 
1096 static int
1097 ifcvf_dev_config(int vid)
1098 {
1099 	struct rte_vdpa_device *vdev;
1100 	struct internal_list *list;
1101 	struct ifcvf_internal *internal;
1102 	struct ifcvf_hw *hw;
1103 	uint16_t i;
1104 
1105 	vdev = rte_vhost_get_vdpa_device(vid);
1106 	list = find_internal_resource_by_vdev(vdev);
1107 	if (list == NULL) {
1108 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1109 		return -1;
1110 	}
1111 
1112 	internal = list->internal;
1113 	internal->vid = vid;
1114 	rte_atomic32_set(&internal->dev_attached, 1);
1115 	if (update_datapath(internal) < 0) {
1116 		DRV_LOG(ERR, "failed to update datapath for vDPA device %s",
1117 			vdev->device->name);
1118 		rte_atomic32_set(&internal->dev_attached, 0);
1119 		return -1;
1120 	}
1121 
1122 	hw = &internal->hw;
1123 	for (i = 0; i < hw->nr_vring; i++) {
1124 		if (!hw->vring[i].enable)
1125 			continue;
1126 		if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0)
1127 			DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
1128 				vdev->device->name);
1129 	}
1130 
1131 	internal->configured = 1;
1132 	DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name);
1133 	return 0;
1134 }
1135 
1136 static int
1137 ifcvf_dev_close(int vid)
1138 {
1139 	struct rte_vdpa_device *vdev;
1140 	struct internal_list *list;
1141 	struct ifcvf_internal *internal;
1142 
1143 	vdev = rte_vhost_get_vdpa_device(vid);
1144 	list = find_internal_resource_by_vdev(vdev);
1145 	if (list == NULL) {
1146 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1147 		return -1;
1148 	}
1149 
1150 	internal = list->internal;
1151 
1152 	if (internal->sw_fallback_running) {
1153 		/* unset ring relay */
1154 		unset_vring_relay(internal);
1155 
1156 		/* reset VF */
1157 		m_ifcvf_stop(internal);
1158 
1159 		/* remove interrupt setting */
1160 		vdpa_disable_vfio_intr(internal);
1161 
1162 		/* unset DMA map for guest memory */
1163 		ifcvf_dma_map(internal, false);
1164 
1165 		internal->sw_fallback_running = false;
1166 	} else {
1167 		rte_atomic32_set(&internal->dev_attached, 0);
1168 		if (update_datapath(internal) < 0) {
1169 			DRV_LOG(ERR, "failed to update datapath for vDPA device %s",
1170 				vdev->device->name);
1171 			internal->configured = 0;
1172 			return -1;
1173 		}
1174 	}
1175 
1176 	internal->configured = 0;
1177 	return 0;
1178 }
1179 
1180 static int
1181 ifcvf_set_features(int vid)
1182 {
1183 	uint64_t features = 0;
1184 	struct rte_vdpa_device *vdev;
1185 	struct internal_list *list;
1186 	struct ifcvf_internal *internal;
1187 	uint64_t log_base = 0, log_size = 0;
1188 
1189 	vdev = rte_vhost_get_vdpa_device(vid);
1190 	list = find_internal_resource_by_vdev(vdev);
1191 	if (list == NULL) {
1192 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1193 		return -1;
1194 	}
1195 
1196 	internal = list->internal;
1197 	rte_vhost_get_negotiated_features(vid, &features);
1198 
1199 	if (!RTE_VHOST_NEED_LOG(features))
1200 		return 0;
1201 
1202 	if (internal->sw_lm) {
1203 		ifcvf_sw_fallback_switchover(internal);
1204 	} else {
1205 		rte_vhost_get_log_base(vid, &log_base, &log_size);
1206 		rte_vfio_container_dma_map(internal->vfio_container_fd,
1207 				log_base, IFCVF_LOG_BASE, log_size);
1208 		ifcvf_enable_logging(&internal->hw, IFCVF_LOG_BASE, log_size);
1209 	}
1210 
1211 	return 0;
1212 }
1213 
1214 static int
1215 ifcvf_get_vfio_group_fd(int vid)
1216 {
1217 	struct rte_vdpa_device *vdev;
1218 	struct internal_list *list;
1219 
1220 	vdev = rte_vhost_get_vdpa_device(vid);
1221 	list = find_internal_resource_by_vdev(vdev);
1222 	if (list == NULL) {
1223 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1224 		return -1;
1225 	}
1226 
1227 	return list->internal->vfio_group_fd;
1228 }
1229 
1230 static int
1231 ifcvf_get_vfio_device_fd(int vid)
1232 {
1233 	struct rte_vdpa_device *vdev;
1234 	struct internal_list *list;
1235 
1236 	vdev = rte_vhost_get_vdpa_device(vid);
1237 	list = find_internal_resource_by_vdev(vdev);
1238 	if (list == NULL) {
1239 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1240 		return -1;
1241 	}
1242 
1243 	return list->internal->vfio_dev_fd;
1244 }
1245 
1246 static int
1247 ifcvf_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
1248 {
1249 	struct rte_vdpa_device *vdev;
1250 	struct internal_list *list;
1251 	struct ifcvf_internal *internal;
1252 	struct vfio_region_info reg = { .argsz = sizeof(reg) };
1253 	int ret;
1254 
1255 	vdev = rte_vhost_get_vdpa_device(vid);
1256 	list = find_internal_resource_by_vdev(vdev);
1257 	if (list == NULL) {
1258 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1259 		return -1;
1260 	}
1261 
1262 	internal = list->internal;
1263 
1264 	reg.index = ifcvf_get_notify_region(&internal->hw);
1265 	ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
1266 	if (ret) {
1267 		DRV_LOG(ERR, "Get not get device region info: %s",
1268 				strerror(errno));
1269 		return -1;
1270 	}
1271 
1272 	*offset = ifcvf_get_queue_notify_off(&internal->hw, qid) + reg.offset;
1273 	*size = 0x1000;
1274 
1275 	return 0;
1276 }
1277 
1278 static int
1279 ifcvf_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
1280 {
1281 	struct internal_list *list;
1282 
1283 	list = find_internal_resource_by_vdev(vdev);
1284 	if (list == NULL) {
1285 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1286 		return -1;
1287 	}
1288 
1289 	*queue_num = list->internal->max_queues;
1290 
1291 	return 0;
1292 }
1293 
1294 static int
1295 ifcvf_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
1296 {
1297 	struct internal_list *list;
1298 
1299 	list = find_internal_resource_by_vdev(vdev);
1300 	if (list == NULL) {
1301 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1302 		return -1;
1303 	}
1304 
1305 	*features = list->internal->features;
1306 
1307 	return 0;
1308 }
1309 
1310 #define VDPA_SUPPORTED_PROTOCOL_FEATURES \
1311 		(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK | \
1312 		 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ | \
1313 		 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD | \
1314 		 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
1315 		 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
1316 		 1ULL << VHOST_USER_PROTOCOL_F_MQ | \
1317 		 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
1318 
1319 #define VDPA_BLK_PROTOCOL_FEATURES \
1320 		(1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
1321 
1322 static int
1323 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
1324 {
1325 	RTE_SET_USED(vdev);
1326 
1327 	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
1328 	return 0;
1329 }
1330 
1331 static int
1332 ifcvf_config_vring(struct ifcvf_internal *internal, int vring)
1333 {
1334 	struct ifcvf_hw *hw = &internal->hw;
1335 	int vid = internal->vid;
1336 	struct rte_vhost_vring vq;
1337 	uint64_t gpa;
1338 
1339 	if (hw->vring[vring].enable) {
1340 		rte_vhost_get_vhost_vring(vid, vring, &vq);
1341 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
1342 		if (gpa == 0) {
1343 			DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
1344 			return -1;
1345 		}
1346 		hw->vring[vring].desc = gpa;
1347 
1348 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.avail);
1349 		if (gpa == 0) {
1350 			DRV_LOG(ERR, "Fail to get GPA for available ring.");
1351 			return -1;
1352 		}
1353 		hw->vring[vring].avail = gpa;
1354 
1355 		gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
1356 		if (gpa == 0) {
1357 			DRV_LOG(ERR, "Fail to get GPA for used ring.");
1358 			return -1;
1359 		}
1360 		hw->vring[vring].used = gpa;
1361 
1362 		hw->vring[vring].size = vq.size;
1363 		rte_vhost_get_vring_base(vid, vring,
1364 				&hw->vring[vring].last_avail_idx,
1365 				&hw->vring[vring].last_used_idx);
1366 		ifcvf_enable_vring_hw(&internal->hw, vring);
1367 	} else {
1368 		ifcvf_disable_vring_hw(&internal->hw, vring);
1369 		rte_vhost_set_vring_base(vid, vring,
1370 				hw->vring[vring].last_avail_idx,
1371 				hw->vring[vring].last_used_idx);
1372 	}
1373 
1374 	return 0;
1375 }
1376 
1377 static int
1378 ifcvf_set_vring_state(int vid, int vring, int state)
1379 {
1380 	struct rte_vdpa_device *vdev;
1381 	struct internal_list *list;
1382 	struct ifcvf_internal *internal;
1383 	struct ifcvf_hw *hw;
1384 	bool enable = !!state;
1385 	int ret = 0;
1386 
1387 	vdev = rte_vhost_get_vdpa_device(vid);
1388 	list = find_internal_resource_by_vdev(vdev);
1389 	if (list == NULL) {
1390 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1391 		return -1;
1392 	}
1393 
1394 	DRV_LOG(INFO, "%s queue %d of vDPA device %s",
1395 		enable ? "enable" : "disable", vring, vdev->device->name);
1396 
1397 	internal = list->internal;
1398 	if (vring < 0 || vring >= internal->max_queues * 2) {
1399 		DRV_LOG(ERR, "Vring index %d not correct", vring);
1400 		return -1;
1401 	}
1402 
1403 	hw = &internal->hw;
1404 	hw->vring[vring].enable = enable;
1405 
1406 	if (!internal->configured)
1407 		return 0;
1408 
1409 	unset_notify_relay(internal);
1410 
1411 	ret = vdpa_enable_vfio_intr(internal, false);
1412 	if (ret) {
1413 		DRV_LOG(ERR, "failed to set vfio interrupt of vDPA device %s",
1414 			vdev->device->name);
1415 		return ret;
1416 	}
1417 
1418 	ret = ifcvf_config_vring(internal, vring);
1419 	if (ret) {
1420 		DRV_LOG(ERR, "failed to configure queue %d of vDPA device %s",
1421 			vring, vdev->device->name);
1422 		return ret;
1423 	}
1424 
1425 	ret = setup_notify_relay(internal);
1426 	if (ret) {
1427 		DRV_LOG(ERR, "failed to setup notify relay of vDPA device %s",
1428 			vdev->device->name);
1429 		return ret;
1430 	}
1431 
1432 	ret = rte_vhost_host_notifier_ctrl(vid, vring, enable);
1433 	if (ret) {
1434 		DRV_LOG(ERR, "vDPA device %s queue %d host notifier ctrl fail",
1435 			vdev->device->name, vring);
1436 		return ret;
1437 	}
1438 
1439 	return 0;
1440 }
1441 
1442 static int
1443 ifcvf_get_device_type(struct rte_vdpa_device *vdev,
1444 	uint32_t *type)
1445 {
1446 	struct ifcvf_internal *internal;
1447 	struct internal_list *list;
1448 	struct rte_device *rte_dev = vdev->device;
1449 
1450 	list = find_internal_resource_by_rte_dev(rte_dev);
1451 	if (list == NULL) {
1452 		DRV_LOG(ERR, "Invalid rte device: %p", rte_dev);
1453 		return -1;
1454 	}
1455 
1456 	internal = list->internal;
1457 
1458 	if (internal->hw.device_type == IFCVF_BLK)
1459 		*type = RTE_VHOST_VDPA_DEVICE_TYPE_BLK;
1460 	else
1461 		*type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
1462 
1463 	return 0;
1464 }
1465 
1466 static struct rte_vdpa_dev_ops ifcvf_net_ops = {
1467 	.get_queue_num = ifcvf_get_queue_num,
1468 	.get_features = ifcvf_get_vdpa_features,
1469 	.get_protocol_features = ifcvf_get_protocol_features,
1470 	.dev_conf = ifcvf_dev_config,
1471 	.dev_close = ifcvf_dev_close,
1472 	.set_vring_state = ifcvf_set_vring_state,
1473 	.set_features = ifcvf_set_features,
1474 	.migration_done = NULL,
1475 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
1476 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
1477 	.get_notify_area = ifcvf_get_notify_area,
1478 	.get_dev_type = ifcvf_get_device_type,
1479 };
1480 
1481 static inline int
1482 open_int(const char *key __rte_unused, const char *value, void *extra_args)
1483 {
1484 	uint16_t *n = extra_args;
1485 
1486 	if (value == NULL || extra_args == NULL)
1487 		return -EINVAL;
1488 
1489 	*n = (uint16_t)strtoul(value, NULL, 0);
1490 	if (*n == USHRT_MAX && errno == ERANGE)
1491 		return -1;
1492 
1493 	return 0;
1494 }
1495 
1496 static int16_t
1497 ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
1498 {
1499 	uint16_t pci_device_id = pci_dev->id.device_id;
1500 	uint16_t device_id;
1501 
1502 	if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
1503 		DRV_LOG(ERR, "Probe device is not a virtio device");
1504 		return -1;
1505 	}
1506 
1507 	if (pci_device_id < 0x1040) {
1508 		/* Transitional devices: use the PCI subsystem device id as
1509 		 * virtio device id, same as legacy driver always did.
1510 		 */
1511 		device_id = pci_dev->id.subsystem_device_id;
1512 	} else {
1513 		/* Modern devices: simply use PCI device id,
1514 		 * but start from 0x1040.
1515 		 */
1516 		device_id = pci_device_id - 0x1040;
1517 	}
1518 
1519 	return device_id;
1520 }
1521 
1522 static int
1523 ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t size)
1524 {
1525 	struct virtio_blk_config *dev_cfg;
1526 	struct ifcvf_internal *internal;
1527 	struct rte_vdpa_device *vdev;
1528 	struct internal_list *list;
1529 	uint32_t i;
1530 	uint64_t capacity = 0;
1531 	uint8_t *byte;
1532 
1533 	if (size < sizeof(struct virtio_blk_config)) {
1534 		DRV_LOG(ERR, "Invalid len: %u, required: %u",
1535 			size, (uint32_t)sizeof(struct virtio_blk_config));
1536 		return -1;
1537 	}
1538 
1539 	vdev = rte_vhost_get_vdpa_device(vid);
1540 	if (vdev == NULL) {
1541 		DRV_LOG(ERR, "Invalid vDPA device vid: %d", vid);
1542 		return -1;
1543 	}
1544 
1545 	list = find_internal_resource_by_vdev(vdev);
1546 	if (list == NULL) {
1547 		DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
1548 		return -1;
1549 	}
1550 
1551 	internal = list->internal;
1552 
1553 	for (i = 0; i < sizeof(struct virtio_blk_config); i++)
1554 		config[i] = *((u8 *)internal->hw.blk_cfg + i);
1555 
1556 	dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
1557 
1558 	/* cannot read 64-bit register in one attempt, so read byte by byte. */
1559 	for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
1560 		byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
1561 		capacity |= (uint64_t)*byte << (i * 8);
1562 	}
1563 	/* The capacity is number of sectors in 512-byte.
1564 	 * So right shift 1 bit  we get in K,
1565 	 * another right shift 10 bits we get in M,
1566 	 * right shift 10 more bits, we get in G.
1567 	 * To show capacity in G, we right shift 21 bits in total.
1568 	 */
1569 	DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
1570 
1571 	DRV_LOG(DEBUG, "size_max  : 0x%08x", dev_cfg->size_max);
1572 	DRV_LOG(DEBUG, "seg_max   : 0x%08x", dev_cfg->seg_max);
1573 	DRV_LOG(DEBUG, "blk_size  : 0x%08x", dev_cfg->blk_size);
1574 	DRV_LOG(DEBUG, "geometry");
1575 	DRV_LOG(DEBUG, "      cylinders: %u", dev_cfg->geometry.cylinders);
1576 	DRV_LOG(DEBUG, "      heads    : %u", dev_cfg->geometry.heads);
1577 	DRV_LOG(DEBUG, "      sectors  : %u", dev_cfg->geometry.sectors);
1578 	DRV_LOG(DEBUG, "num_queues: 0x%08x", dev_cfg->num_queues);
1579 
1580 	DRV_LOG(DEBUG, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]",
1581 		config[0], config[1], config[2], config[3], config[4],
1582 		config[5], config[6], config[7]);
1583 	return 0;
1584 }
1585 
1586 static int
1587 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
1588 	uint64_t *features)
1589 {
1590 	RTE_SET_USED(vdev);
1591 
1592 	*features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
1593 	*features |= VDPA_BLK_PROTOCOL_FEATURES;
1594 	return 0;
1595 }
1596 
1597 static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
1598 	.get_queue_num = ifcvf_get_queue_num,
1599 	.get_features = ifcvf_get_vdpa_features,
1600 	.set_features = ifcvf_set_features,
1601 	.get_protocol_features = ifcvf_blk_get_protocol_features,
1602 	.dev_conf = ifcvf_dev_config,
1603 	.dev_close = ifcvf_dev_close,
1604 	.set_vring_state = ifcvf_set_vring_state,
1605 	.migration_done = NULL,
1606 	.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
1607 	.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
1608 	.get_notify_area = ifcvf_get_notify_area,
1609 	.get_config = ifcvf_blk_get_config,
1610 	.get_dev_type = ifcvf_get_device_type,
1611 };
1612 
1613 struct rte_vdpa_dev_info dev_info[] = {
1614 	{
1615 		.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
1616 			    (1ULL << VIRTIO_NET_F_CTRL_VQ) |
1617 			    (1ULL << VIRTIO_NET_F_STATUS) |
1618 			    (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
1619 			    (1ULL << VHOST_F_LOG_ALL),
1620 		.ops = &ifcvf_net_ops,
1621 	},
1622 	{
1623 		.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
1624 			    (1ULL << VHOST_F_LOG_ALL),
1625 		.ops = &ifcvf_blk_ops,
1626 	},
1627 };
1628 
1629 static int
1630 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1631 		struct rte_pci_device *pci_dev)
1632 {
1633 	uint64_t features;
1634 	struct ifcvf_internal *internal = NULL;
1635 	struct internal_list *list = NULL;
1636 	int vdpa_mode = 0;
1637 	int sw_fallback_lm = 0;
1638 	struct rte_kvargs *kvlist = NULL;
1639 	int ret = 0;
1640 	int16_t device_id;
1641 	uint64_t capacity = 0;
1642 	uint8_t *byte;
1643 	uint32_t i;
1644 	uint16_t queue_pairs;
1645 
1646 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1647 		return 0;
1648 
1649 	if (!pci_dev->device.devargs)
1650 		return 1;
1651 
1652 	kvlist = rte_kvargs_parse(pci_dev->device.devargs->args,
1653 			ifcvf_valid_arguments);
1654 	if (kvlist == NULL)
1655 		return 1;
1656 
1657 	/* probe only when vdpa mode is specified */
1658 	if (rte_kvargs_count(kvlist, IFCVF_VDPA_MODE) == 0) {
1659 		rte_kvargs_free(kvlist);
1660 		return 1;
1661 	}
1662 
1663 	ret = rte_kvargs_process(kvlist, IFCVF_VDPA_MODE, &open_int,
1664 			&vdpa_mode);
1665 	if (ret < 0 || vdpa_mode == 0) {
1666 		rte_kvargs_free(kvlist);
1667 		return 1;
1668 	}
1669 
1670 	list = rte_zmalloc("ifcvf", sizeof(*list), 0);
1671 	if (list == NULL)
1672 		goto error;
1673 
1674 	internal = rte_zmalloc("ifcvf", sizeof(*internal), 0);
1675 	if (internal == NULL)
1676 		goto error;
1677 
1678 	internal->pdev = pci_dev;
1679 	rte_spinlock_init(&internal->lock);
1680 
1681 	if (ifcvf_vfio_setup(internal) < 0) {
1682 		DRV_LOG(ERR, "failed to setup device %s", pci_dev->name);
1683 		goto error;
1684 	}
1685 
1686 	if (ifcvf_init_hw(&internal->hw, internal->pdev) < 0) {
1687 		DRV_LOG(ERR, "failed to init device %s", pci_dev->name);
1688 		goto error;
1689 	}
1690 
1691 	internal->configured = 0;
1692 	features = ifcvf_get_features(&internal->hw);
1693 
1694 	device_id = ifcvf_pci_get_device_type(pci_dev);
1695 	if (device_id < 0) {
1696 		DRV_LOG(ERR, "failed to get device %s type", pci_dev->name);
1697 		goto error;
1698 	}
1699 
1700 	if (device_id == VIRTIO_ID_NET) {
1701 		internal->hw.device_type = IFCVF_NET;
1702 		/*
1703 		 * ifc device always has CTRL_VQ,
1704 		 * and supports VIRTIO_NET_F_CTRL_VQ feature.
1705 		 */
1706 		queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2;
1707 		DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name,
1708 			queue_pairs);
1709 		internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs);
1710 		internal->features = features &
1711 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
1712 		internal->features |= dev_info[IFCVF_NET].features;
1713 	} else if (device_id == VIRTIO_ID_BLOCK) {
1714 		internal->hw.device_type = IFCVF_BLK;
1715 		internal->features = features &
1716 					~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
1717 		internal->features |= dev_info[IFCVF_BLK].features;
1718 
1719 		/* cannot read 64-bit register in one attempt,
1720 		 * so read byte by byte.
1721 		 */
1722 		for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
1723 			byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
1724 			capacity |= (uint64_t)*byte << (i * 8);
1725 		}
1726 		/* The capacity is number of sectors in 512-byte.
1727 		 * So right shift 1 bit  we get in K,
1728 		 * another right shift 10 bits we get in M,
1729 		 * right shift 10 more bits, we get in G.
1730 		 * To show capacity in G, we right shift 21 bits in total.
1731 		 */
1732 		DRV_LOG(DEBUG, "capacity  : %"PRIu64"G", capacity >> 21);
1733 
1734 		DRV_LOG(DEBUG, "size_max  : 0x%08x",
1735 			internal->hw.blk_cfg->size_max);
1736 		DRV_LOG(DEBUG, "seg_max   : 0x%08x",
1737 			internal->hw.blk_cfg->seg_max);
1738 		DRV_LOG(DEBUG, "blk_size  : 0x%08x",
1739 			internal->hw.blk_cfg->blk_size);
1740 		DRV_LOG(DEBUG, "geometry");
1741 		DRV_LOG(DEBUG, "    cylinders: %u",
1742 			internal->hw.blk_cfg->geometry.cylinders);
1743 		DRV_LOG(DEBUG, "    heads    : %u",
1744 			internal->hw.blk_cfg->geometry.heads);
1745 		DRV_LOG(DEBUG, "    sectors  : %u",
1746 			internal->hw.blk_cfg->geometry.sectors);
1747 		DRV_LOG(DEBUG, "num_queues: 0x%08x",
1748 			internal->hw.blk_cfg->num_queues);
1749 
1750 		internal->max_queues = MIN(IFCVF_MAX_QUEUES,
1751 			internal->hw.blk_cfg->num_queues);
1752 	}
1753 
1754 	list->internal = internal;
1755 
1756 	if (rte_kvargs_count(kvlist, IFCVF_SW_FALLBACK_LM)) {
1757 		ret = rte_kvargs_process(kvlist, IFCVF_SW_FALLBACK_LM,
1758 				&open_int, &sw_fallback_lm);
1759 		if (ret < 0)
1760 			goto error;
1761 	}
1762 	internal->sw_lm = sw_fallback_lm;
1763 	if (!internal->sw_lm && !internal->hw.lm_cfg) {
1764 		DRV_LOG(ERR, "Device %s does not support HW assist live migration, please enable sw-live-migration!",
1765 			pci_dev->name);
1766 		goto error;
1767 	}
1768 
1769 	pthread_mutex_lock(&internal_list_lock);
1770 	TAILQ_INSERT_TAIL(&internal_list, list, next);
1771 	pthread_mutex_unlock(&internal_list_lock);
1772 
1773 	internal->vdev = rte_vdpa_register_device(&pci_dev->device,
1774 				dev_info[internal->hw.device_type].ops);
1775 	if (internal->vdev == NULL) {
1776 		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
1777 		pthread_mutex_lock(&internal_list_lock);
1778 		TAILQ_REMOVE(&internal_list, list, next);
1779 		pthread_mutex_unlock(&internal_list_lock);
1780 		goto error;
1781 	}
1782 
1783 	rte_atomic32_set(&internal->started, 1);
1784 	if (update_datapath(internal) < 0) {
1785 		DRV_LOG(ERR, "failed to update datapath %s", pci_dev->name);
1786 		rte_atomic32_set(&internal->started, 0);
1787 		rte_vdpa_unregister_device(internal->vdev);
1788 		pthread_mutex_lock(&internal_list_lock);
1789 		TAILQ_REMOVE(&internal_list, list, next);
1790 		pthread_mutex_unlock(&internal_list_lock);
1791 		goto error;
1792 	}
1793 
1794 	rte_kvargs_free(kvlist);
1795 	return 0;
1796 
1797 error:
1798 	rte_kvargs_free(kvlist);
1799 	rte_free(list);
1800 	rte_free(internal);
1801 	return -1;
1802 }
1803 
1804 static int
1805 ifcvf_pci_remove(struct rte_pci_device *pci_dev)
1806 {
1807 	struct ifcvf_internal *internal;
1808 	struct internal_list *list;
1809 
1810 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1811 		return 0;
1812 
1813 	list = find_internal_resource_by_pci_dev(pci_dev);
1814 	if (list == NULL) {
1815 		DRV_LOG(ERR, "Invalid device: %s", pci_dev->name);
1816 		return -1;
1817 	}
1818 
1819 	internal = list->internal;
1820 	rte_atomic32_set(&internal->started, 0);
1821 	if (update_datapath(internal) < 0)
1822 		DRV_LOG(ERR, "failed to update datapath %s", pci_dev->name);
1823 
1824 	rte_pci_unmap_device(internal->pdev);
1825 	rte_vfio_container_destroy(internal->vfio_container_fd);
1826 	rte_vdpa_unregister_device(internal->vdev);
1827 
1828 	pthread_mutex_lock(&internal_list_lock);
1829 	TAILQ_REMOVE(&internal_list, list, next);
1830 	pthread_mutex_unlock(&internal_list_lock);
1831 
1832 	rte_free(list);
1833 	rte_free(internal);
1834 
1835 	return 0;
1836 }
1837 
1838 /*
1839  * IFCVF has the same vendor ID and device ID as virtio net PCI
1840  * device, with its specific subsystem vendor ID and device ID.
1841  */
1842 static const struct rte_pci_id pci_id_ifcvf_map[] = {
1843 	{ .class_id = RTE_CLASS_ANY_ID,
1844 	  .vendor_id = IFCVF_VENDOR_ID,
1845 	  .device_id = IFCVF_NET_MODERN_DEVICE_ID,
1846 	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
1847 	  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
1848 	},
1849 
1850 	{ .class_id = RTE_CLASS_ANY_ID,
1851 	  .vendor_id = IFCVF_VENDOR_ID,
1852 	  .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID,
1853 	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
1854 	  .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID,
1855 	},
1856 
1857 	{ .class_id = RTE_CLASS_ANY_ID,
1858 	  .vendor_id = IFCVF_VENDOR_ID,
1859 	  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
1860 	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
1861 	  .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
1862 	},
1863 
1864 	{ .class_id = RTE_CLASS_ANY_ID,
1865 	  .vendor_id = IFCVF_VENDOR_ID,
1866 	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
1867 	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
1868 	  .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
1869 	},
1870 
1871 	{ .class_id = RTE_CLASS_ANY_ID,
1872 	  .vendor_id = IFCVF_VENDOR_ID,
1873 	  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
1874 	  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
1875 	  .subsystem_device_id = IFCVF_SUBSYS_DEFAULT_DEVICE_ID,
1876 	}, /* virtio-blk devices with default subsystem IDs */
1877 
1878 	{ .vendor_id = 0, /* sentinel */
1879 	},
1880 };
1881 
1882 static struct rte_pci_driver rte_ifcvf_vdpa = {
1883 	.id_table = pci_id_ifcvf_map,
1884 	.drv_flags = 0,
1885 	.probe = ifcvf_pci_probe,
1886 	.remove = ifcvf_pci_remove,
1887 };
1888 
1889 RTE_PMD_REGISTER_PCI(net_ifcvf, rte_ifcvf_vdpa);
1890 RTE_PMD_REGISTER_PCI_TABLE(net_ifcvf, pci_id_ifcvf_map);
1891 RTE_PMD_REGISTER_KMOD_DEP(net_ifcvf, "* vfio-pci");
1892