xref: /dpdk/lib/vhost/vhost.c (revision 99f9d799ce21ab22e922ffec8aad51d56e24d04d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14 
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23 
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27 
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30 
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34 		    uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36 	uint64_t vva, tmp_size;
37 
38 	if (unlikely(!*size))
39 		return 0;
40 
41 	tmp_size = *size;
42 
43 	vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44 	if (tmp_size == *size)
45 		return vva;
46 
47 	iova += tmp_size;
48 
49 	if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50 		/*
51 		 * iotlb_lock is read-locked for a full burst,
52 		 * but it only protects the iotlb cache.
53 		 * In case of IOTLB miss, we might block on the socket,
54 		 * which could cause a deadlock with QEMU if an IOTLB update
55 		 * is being handled. We can safely unlock here to avoid it.
56 		 */
57 		vhost_user_iotlb_rd_unlock(vq);
58 
59 		vhost_user_iotlb_pending_insert(vq, iova, perm);
60 		if (vhost_user_iotlb_miss(dev, iova, perm)) {
61 			VHOST_LOG_CONFIG(ERR,
62 				"IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
63 				iova);
64 			vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
65 		}
66 
67 		vhost_user_iotlb_rd_lock(vq);
68 	}
69 
70 	return 0;
71 }
72 
73 #define VHOST_LOG_PAGE	4096
74 
75 /*
76  * Atomically set a bit in memory.
77  */
78 static __rte_always_inline void
79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
80 {
81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
82 	/*
83 	 * __sync_ built-ins are deprecated, but __atomic_ ones
84 	 * are sub-optimized in older GCC versions.
85 	 */
86 	__sync_fetch_and_or_1(addr, (1U << nr));
87 #else
88 	__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
89 #endif
90 }
91 
92 static __rte_always_inline void
93 vhost_log_page(uint8_t *log_base, uint64_t page)
94 {
95 	vhost_set_bit(page % 8, &log_base[page / 8]);
96 }
97 
98 void
99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
100 {
101 	uint64_t page;
102 
103 	if (unlikely(!dev->log_base || !len))
104 		return;
105 
106 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
107 		return;
108 
109 	/* To make sure guest memory updates are committed before logging */
110 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
111 
112 	page = addr / VHOST_LOG_PAGE;
113 	while (page * VHOST_LOG_PAGE < addr + len) {
114 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
115 		page += 1;
116 	}
117 }
118 
119 void
120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
121 			     uint64_t iova, uint64_t len)
122 {
123 	uint64_t hva, gpa, map_len;
124 	map_len = len;
125 
126 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
127 	if (map_len != len) {
128 		VHOST_LOG_DATA(ERR,
129 			"Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
130 			iova);
131 		return;
132 	}
133 
134 	gpa = hva_to_gpa(dev, hva, len);
135 	if (gpa)
136 		__vhost_log_write(dev, gpa, len);
137 }
138 
139 void
140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
141 {
142 	unsigned long *log_base;
143 	int i;
144 
145 	if (unlikely(!dev->log_base))
146 		return;
147 
148 	/* No cache, nothing to sync */
149 	if (unlikely(!vq->log_cache))
150 		return;
151 
152 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
153 
154 	log_base = (unsigned long *)(uintptr_t)dev->log_base;
155 
156 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
157 		struct log_cache_entry *elem = vq->log_cache + i;
158 
159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
160 		/*
161 		 * '__sync' builtins are deprecated, but '__atomic' ones
162 		 * are sub-optimized in older GCC versions.
163 		 */
164 		__sync_fetch_and_or(log_base + elem->offset, elem->val);
165 #else
166 		__atomic_fetch_or(log_base + elem->offset, elem->val,
167 				__ATOMIC_RELAXED);
168 #endif
169 	}
170 
171 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
172 
173 	vq->log_cache_nb_elem = 0;
174 }
175 
176 static __rte_always_inline void
177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
178 			uint64_t page)
179 {
180 	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
181 	uint32_t offset = page / (sizeof(unsigned long) << 3);
182 	int i;
183 
184 	if (unlikely(!vq->log_cache)) {
185 		/* No logging cache allocated, write dirty log map directly */
186 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
187 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
188 
189 		return;
190 	}
191 
192 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
193 		struct log_cache_entry *elem = vq->log_cache + i;
194 
195 		if (elem->offset == offset) {
196 			elem->val |= (1UL << bit_nr);
197 			return;
198 		}
199 	}
200 
201 	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
202 		/*
203 		 * No more room for a new log cache entry,
204 		 * so write the dirty log map directly.
205 		 */
206 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
207 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
208 
209 		return;
210 	}
211 
212 	vq->log_cache[i].offset = offset;
213 	vq->log_cache[i].val = (1UL << bit_nr);
214 	vq->log_cache_nb_elem++;
215 }
216 
217 void
218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
219 			uint64_t addr, uint64_t len)
220 {
221 	uint64_t page;
222 
223 	if (unlikely(!dev->log_base || !len))
224 		return;
225 
226 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
227 		return;
228 
229 	page = addr / VHOST_LOG_PAGE;
230 	while (page * VHOST_LOG_PAGE < addr + len) {
231 		vhost_log_cache_page(dev, vq, page);
232 		page += 1;
233 	}
234 }
235 
236 void
237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
238 			     uint64_t iova, uint64_t len)
239 {
240 	uint64_t hva, gpa, map_len;
241 	map_len = len;
242 
243 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
244 	if (map_len != len) {
245 		VHOST_LOG_DATA(ERR,
246 			"Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
247 			iova);
248 		return;
249 	}
250 
251 	gpa = hva_to_gpa(dev, hva, len);
252 	if (gpa)
253 		__vhost_log_cache_write(dev, vq, gpa, len);
254 }
255 
256 void *
257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
258 		uint64_t desc_addr, uint64_t desc_len)
259 {
260 	void *idesc;
261 	uint64_t src, dst;
262 	uint64_t len, remain = desc_len;
263 
264 	idesc = rte_malloc(__func__, desc_len, 0);
265 	if (unlikely(!idesc))
266 		return NULL;
267 
268 	dst = (uint64_t)(uintptr_t)idesc;
269 
270 	while (remain) {
271 		len = remain;
272 		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
273 				VHOST_ACCESS_RO);
274 		if (unlikely(!src || !len)) {
275 			rte_free(idesc);
276 			return NULL;
277 		}
278 
279 		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
280 
281 		remain -= len;
282 		dst += len;
283 		desc_addr += len;
284 	}
285 
286 	return idesc;
287 }
288 
289 void
290 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
291 {
292 	if ((vq->callfd >= 0) && (destroy != 0))
293 		close(vq->callfd);
294 	if (vq->kickfd >= 0)
295 		close(vq->kickfd);
296 }
297 
298 void
299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
300 {
301 	if (!(dev->protocol_features &
302 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
303 		return;
304 
305 	if (vq_is_packed(dev)) {
306 		if (vq->inflight_packed)
307 			vq->inflight_packed = NULL;
308 	} else {
309 		if (vq->inflight_split)
310 			vq->inflight_split = NULL;
311 	}
312 
313 	if (vq->resubmit_inflight) {
314 		if (vq->resubmit_inflight->resubmit_list) {
315 			free(vq->resubmit_inflight->resubmit_list);
316 			vq->resubmit_inflight->resubmit_list = NULL;
317 		}
318 		free(vq->resubmit_inflight);
319 		vq->resubmit_inflight = NULL;
320 	}
321 }
322 
323 /*
324  * Unmap any memory, close any file descriptors and
325  * free any memory owned by a device.
326  */
327 void
328 cleanup_device(struct virtio_net *dev, int destroy)
329 {
330 	uint32_t i;
331 
332 	vhost_backend_cleanup(dev);
333 
334 	for (i = 0; i < dev->nr_vring; i++) {
335 		cleanup_vq(dev->virtqueue[i], destroy);
336 		cleanup_vq_inflight(dev, dev->virtqueue[i]);
337 	}
338 }
339 
340 static void
341 vhost_free_async_mem(struct vhost_virtqueue *vq)
342 {
343 	rte_free(vq->async_pkts_info);
344 
345 	rte_free(vq->async_buffers_packed);
346 	vq->async_buffers_packed = NULL;
347 	rte_free(vq->async_descs_split);
348 	vq->async_descs_split = NULL;
349 
350 	rte_free(vq->it_pool);
351 	rte_free(vq->vec_pool);
352 
353 	vq->async_pkts_info = NULL;
354 	vq->it_pool = NULL;
355 	vq->vec_pool = NULL;
356 }
357 
358 void
359 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
360 {
361 	if (vq_is_packed(dev))
362 		rte_free(vq->shadow_used_packed);
363 	else
364 		rte_free(vq->shadow_used_split);
365 
366 	vhost_free_async_mem(vq);
367 	rte_free(vq->batch_copy_elems);
368 	if (vq->iotlb_pool)
369 		rte_mempool_free(vq->iotlb_pool);
370 	rte_free(vq->log_cache);
371 	rte_free(vq);
372 }
373 
374 /*
375  * Release virtqueues and device memory.
376  */
377 static void
378 free_device(struct virtio_net *dev)
379 {
380 	uint32_t i;
381 
382 	for (i = 0; i < dev->nr_vring; i++)
383 		free_vq(dev, dev->virtqueue[i]);
384 
385 	rte_free(dev);
386 }
387 
388 static __rte_always_inline int
389 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
390 {
391 	if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
392 		return 0;
393 
394 	vq->log_guest_addr = translate_log_addr(dev, vq,
395 						vq->ring_addrs.log_guest_addr);
396 	if (vq->log_guest_addr == 0)
397 		return -1;
398 
399 	return 0;
400 }
401 
402 /*
403  * Converts vring log address to GPA
404  * If IOMMU is enabled, the log address is IOVA
405  * If IOMMU not enabled, the log address is already GPA
406  *
407  * Caller should have iotlb_lock read-locked
408  */
409 uint64_t
410 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
411 		uint64_t log_addr)
412 {
413 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
414 		const uint64_t exp_size = sizeof(uint64_t);
415 		uint64_t hva, gpa;
416 		uint64_t size = exp_size;
417 
418 		hva = vhost_iova_to_vva(dev, vq, log_addr,
419 					&size, VHOST_ACCESS_RW);
420 
421 		if (size != exp_size)
422 			return 0;
423 
424 		gpa = hva_to_gpa(dev, hva, exp_size);
425 		if (!gpa) {
426 			VHOST_LOG_CONFIG(ERR,
427 				"VQ: Failed to find GPA for log_addr: 0x%"
428 				PRIx64 " hva: 0x%" PRIx64 "\n",
429 				log_addr, hva);
430 			return 0;
431 		}
432 		return gpa;
433 
434 	} else
435 		return log_addr;
436 }
437 
438 /* Caller should have iotlb_lock read-locked */
439 static int
440 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
441 {
442 	uint64_t req_size, size;
443 
444 	req_size = sizeof(struct vring_desc) * vq->size;
445 	size = req_size;
446 	vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
447 						vq->ring_addrs.desc_user_addr,
448 						&size, VHOST_ACCESS_RW);
449 	if (!vq->desc || size != req_size)
450 		return -1;
451 
452 	req_size = sizeof(struct vring_avail);
453 	req_size += sizeof(uint16_t) * vq->size;
454 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
455 		req_size += sizeof(uint16_t);
456 	size = req_size;
457 	vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
458 						vq->ring_addrs.avail_user_addr,
459 						&size, VHOST_ACCESS_RW);
460 	if (!vq->avail || size != req_size)
461 		return -1;
462 
463 	req_size = sizeof(struct vring_used);
464 	req_size += sizeof(struct vring_used_elem) * vq->size;
465 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
466 		req_size += sizeof(uint16_t);
467 	size = req_size;
468 	vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
469 						vq->ring_addrs.used_user_addr,
470 						&size, VHOST_ACCESS_RW);
471 	if (!vq->used || size != req_size)
472 		return -1;
473 
474 	return 0;
475 }
476 
477 /* Caller should have iotlb_lock read-locked */
478 static int
479 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
480 {
481 	uint64_t req_size, size;
482 
483 	req_size = sizeof(struct vring_packed_desc) * vq->size;
484 	size = req_size;
485 	vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
486 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
487 				&size, VHOST_ACCESS_RW);
488 	if (!vq->desc_packed || size != req_size)
489 		return -1;
490 
491 	req_size = sizeof(struct vring_packed_desc_event);
492 	size = req_size;
493 	vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
494 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
495 				&size, VHOST_ACCESS_RW);
496 	if (!vq->driver_event || size != req_size)
497 		return -1;
498 
499 	req_size = sizeof(struct vring_packed_desc_event);
500 	size = req_size;
501 	vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
502 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
503 				&size, VHOST_ACCESS_RW);
504 	if (!vq->device_event || size != req_size)
505 		return -1;
506 
507 	return 0;
508 }
509 
510 int
511 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
512 {
513 
514 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
515 		return -1;
516 
517 	if (vq_is_packed(dev)) {
518 		if (vring_translate_packed(dev, vq) < 0)
519 			return -1;
520 	} else {
521 		if (vring_translate_split(dev, vq) < 0)
522 			return -1;
523 	}
524 
525 	if (log_translate(dev, vq) < 0)
526 		return -1;
527 
528 	vq->access_ok = true;
529 
530 	return 0;
531 }
532 
533 void
534 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
535 {
536 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
537 		vhost_user_iotlb_wr_lock(vq);
538 
539 	vq->access_ok = false;
540 	vq->desc = NULL;
541 	vq->avail = NULL;
542 	vq->used = NULL;
543 	vq->log_guest_addr = 0;
544 
545 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
546 		vhost_user_iotlb_wr_unlock(vq);
547 }
548 
549 static void
550 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
551 {
552 	struct vhost_virtqueue *vq;
553 
554 	if (vring_idx >= VHOST_MAX_VRING) {
555 		VHOST_LOG_CONFIG(ERR,
556 				"Failed not init vring, out of bound (%d)\n",
557 				vring_idx);
558 		return;
559 	}
560 
561 	vq = dev->virtqueue[vring_idx];
562 	if (!vq) {
563 		VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
564 				vring_idx);
565 		return;
566 	}
567 
568 	memset(vq, 0, sizeof(struct vhost_virtqueue));
569 
570 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
571 	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
572 	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
573 }
574 
575 static void
576 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
577 {
578 	struct vhost_virtqueue *vq;
579 	int callfd;
580 
581 	if (vring_idx >= VHOST_MAX_VRING) {
582 		VHOST_LOG_CONFIG(ERR,
583 				"Failed not init vring, out of bound (%d)\n",
584 				vring_idx);
585 		return;
586 	}
587 
588 	vq = dev->virtqueue[vring_idx];
589 	if (!vq) {
590 		VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
591 				vring_idx);
592 		return;
593 	}
594 
595 	callfd = vq->callfd;
596 	init_vring_queue(dev, vring_idx);
597 	vq->callfd = callfd;
598 }
599 
600 int
601 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
602 {
603 	struct vhost_virtqueue *vq;
604 	uint32_t i;
605 
606 	/* Also allocate holes, if any, up to requested vring index. */
607 	for (i = 0; i <= vring_idx; i++) {
608 		if (dev->virtqueue[i])
609 			continue;
610 
611 		vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
612 		if (vq == NULL) {
613 			VHOST_LOG_CONFIG(ERR,
614 				"Failed to allocate memory for vring:%u.\n", i);
615 			return -1;
616 		}
617 
618 		dev->virtqueue[i] = vq;
619 		init_vring_queue(dev, i);
620 		rte_spinlock_init(&vq->access_lock);
621 		vq->avail_wrap_counter = 1;
622 		vq->used_wrap_counter = 1;
623 		vq->signalled_used_valid = false;
624 	}
625 
626 	dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
627 
628 	return 0;
629 }
630 
631 /*
632  * Reset some variables in device structure, while keeping few
633  * others untouched, such as vid, ifname, nr_vring: they
634  * should be same unless the device is removed.
635  */
636 void
637 reset_device(struct virtio_net *dev)
638 {
639 	uint32_t i;
640 
641 	dev->features = 0;
642 	dev->protocol_features = 0;
643 	dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
644 
645 	for (i = 0; i < dev->nr_vring; i++)
646 		reset_vring_queue(dev, i);
647 }
648 
649 /*
650  * Invoked when there is a new vhost-user connection established (when
651  * there is a new virtio device being attached).
652  */
653 int
654 vhost_new_device(void)
655 {
656 	struct virtio_net *dev;
657 	int i;
658 
659 	pthread_mutex_lock(&vhost_dev_lock);
660 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
661 		if (vhost_devices[i] == NULL)
662 			break;
663 	}
664 
665 	if (i == MAX_VHOST_DEVICE) {
666 		VHOST_LOG_CONFIG(ERR,
667 			"Failed to find a free slot for new device.\n");
668 		pthread_mutex_unlock(&vhost_dev_lock);
669 		return -1;
670 	}
671 
672 	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
673 	if (dev == NULL) {
674 		VHOST_LOG_CONFIG(ERR,
675 			"Failed to allocate memory for new dev.\n");
676 		pthread_mutex_unlock(&vhost_dev_lock);
677 		return -1;
678 	}
679 
680 	vhost_devices[i] = dev;
681 	pthread_mutex_unlock(&vhost_dev_lock);
682 
683 	dev->vid = i;
684 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
685 	dev->slave_req_fd = -1;
686 	dev->postcopy_ufd = -1;
687 	rte_spinlock_init(&dev->slave_req_lock);
688 
689 	return i;
690 }
691 
692 void
693 vhost_destroy_device_notify(struct virtio_net *dev)
694 {
695 	struct rte_vdpa_device *vdpa_dev;
696 
697 	if (dev->flags & VIRTIO_DEV_RUNNING) {
698 		vdpa_dev = dev->vdpa_dev;
699 		if (vdpa_dev)
700 			vdpa_dev->ops->dev_close(dev->vid);
701 		dev->flags &= ~VIRTIO_DEV_RUNNING;
702 		dev->notify_ops->destroy_device(dev->vid);
703 	}
704 }
705 
706 /*
707  * Invoked when there is the vhost-user connection is broken (when
708  * the virtio device is being detached).
709  */
710 void
711 vhost_destroy_device(int vid)
712 {
713 	struct virtio_net *dev = get_device(vid);
714 
715 	if (dev == NULL)
716 		return;
717 
718 	vhost_destroy_device_notify(dev);
719 
720 	cleanup_device(dev, 1);
721 	free_device(dev);
722 
723 	vhost_devices[vid] = NULL;
724 }
725 
726 void
727 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
728 {
729 	struct virtio_net *dev = get_device(vid);
730 
731 	if (dev == NULL)
732 		return;
733 
734 	dev->vdpa_dev = vdpa_dev;
735 }
736 
737 void
738 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
739 {
740 	struct virtio_net *dev;
741 	unsigned int len;
742 
743 	dev = get_device(vid);
744 	if (dev == NULL)
745 		return;
746 
747 	len = if_len > sizeof(dev->ifname) ?
748 		sizeof(dev->ifname) : if_len;
749 
750 	strncpy(dev->ifname, if_name, len);
751 	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
752 }
753 
754 void
755 vhost_set_builtin_virtio_net(int vid, bool enable)
756 {
757 	struct virtio_net *dev = get_device(vid);
758 
759 	if (dev == NULL)
760 		return;
761 
762 	if (enable)
763 		dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
764 	else
765 		dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
766 }
767 
768 void
769 vhost_enable_extbuf(int vid)
770 {
771 	struct virtio_net *dev = get_device(vid);
772 
773 	if (dev == NULL)
774 		return;
775 
776 	dev->extbuf = 1;
777 }
778 
779 void
780 vhost_enable_linearbuf(int vid)
781 {
782 	struct virtio_net *dev = get_device(vid);
783 
784 	if (dev == NULL)
785 		return;
786 
787 	dev->linearbuf = 1;
788 }
789 
790 int
791 rte_vhost_get_mtu(int vid, uint16_t *mtu)
792 {
793 	struct virtio_net *dev = get_device(vid);
794 
795 	if (dev == NULL || mtu == NULL)
796 		return -ENODEV;
797 
798 	if (!(dev->flags & VIRTIO_DEV_READY))
799 		return -EAGAIN;
800 
801 	if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
802 		return -ENOTSUP;
803 
804 	*mtu = dev->mtu;
805 
806 	return 0;
807 }
808 
809 int
810 rte_vhost_get_numa_node(int vid)
811 {
812 #ifdef RTE_LIBRTE_VHOST_NUMA
813 	struct virtio_net *dev = get_device(vid);
814 	int numa_node;
815 	int ret;
816 
817 	if (dev == NULL || numa_available() != 0)
818 		return -1;
819 
820 	ret = get_mempolicy(&numa_node, NULL, 0, dev,
821 			    MPOL_F_NODE | MPOL_F_ADDR);
822 	if (ret < 0) {
823 		VHOST_LOG_CONFIG(ERR,
824 			"(%d) failed to query numa node: %s\n",
825 			vid, rte_strerror(errno));
826 		return -1;
827 	}
828 
829 	return numa_node;
830 #else
831 	RTE_SET_USED(vid);
832 	return -1;
833 #endif
834 }
835 
836 uint32_t
837 rte_vhost_get_queue_num(int vid)
838 {
839 	struct virtio_net *dev = get_device(vid);
840 
841 	if (dev == NULL)
842 		return 0;
843 
844 	return dev->nr_vring / 2;
845 }
846 
847 uint16_t
848 rte_vhost_get_vring_num(int vid)
849 {
850 	struct virtio_net *dev = get_device(vid);
851 
852 	if (dev == NULL)
853 		return 0;
854 
855 	return dev->nr_vring;
856 }
857 
858 int
859 rte_vhost_get_ifname(int vid, char *buf, size_t len)
860 {
861 	struct virtio_net *dev = get_device(vid);
862 
863 	if (dev == NULL || buf == NULL)
864 		return -1;
865 
866 	len = RTE_MIN(len, sizeof(dev->ifname));
867 
868 	strncpy(buf, dev->ifname, len);
869 	buf[len - 1] = '\0';
870 
871 	return 0;
872 }
873 
874 int
875 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
876 {
877 	struct virtio_net *dev;
878 
879 	dev = get_device(vid);
880 	if (dev == NULL || features == NULL)
881 		return -1;
882 
883 	*features = dev->features;
884 	return 0;
885 }
886 
887 int
888 rte_vhost_get_negotiated_protocol_features(int vid,
889 					   uint64_t *protocol_features)
890 {
891 	struct virtio_net *dev;
892 
893 	dev = get_device(vid);
894 	if (dev == NULL || protocol_features == NULL)
895 		return -1;
896 
897 	*protocol_features = dev->protocol_features;
898 	return 0;
899 }
900 
901 int
902 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
903 {
904 	struct virtio_net *dev;
905 	struct rte_vhost_memory *m;
906 	size_t size;
907 
908 	dev = get_device(vid);
909 	if (dev == NULL || mem == NULL)
910 		return -1;
911 
912 	size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
913 	m = malloc(sizeof(struct rte_vhost_memory) + size);
914 	if (!m)
915 		return -1;
916 
917 	m->nregions = dev->mem->nregions;
918 	memcpy(m->regions, dev->mem->regions, size);
919 	*mem = m;
920 
921 	return 0;
922 }
923 
924 int
925 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
926 			  struct rte_vhost_vring *vring)
927 {
928 	struct virtio_net *dev;
929 	struct vhost_virtqueue *vq;
930 
931 	dev = get_device(vid);
932 	if (dev == NULL || vring == NULL)
933 		return -1;
934 
935 	if (vring_idx >= VHOST_MAX_VRING)
936 		return -1;
937 
938 	vq = dev->virtqueue[vring_idx];
939 	if (!vq)
940 		return -1;
941 
942 	if (vq_is_packed(dev)) {
943 		vring->desc_packed = vq->desc_packed;
944 		vring->driver_event = vq->driver_event;
945 		vring->device_event = vq->device_event;
946 	} else {
947 		vring->desc = vq->desc;
948 		vring->avail = vq->avail;
949 		vring->used = vq->used;
950 	}
951 	vring->log_guest_addr  = vq->log_guest_addr;
952 
953 	vring->callfd  = vq->callfd;
954 	vring->kickfd  = vq->kickfd;
955 	vring->size    = vq->size;
956 
957 	return 0;
958 }
959 
960 int
961 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
962 				  struct rte_vhost_ring_inflight *vring)
963 {
964 	struct virtio_net *dev;
965 	struct vhost_virtqueue *vq;
966 
967 	dev = get_device(vid);
968 	if (unlikely(!dev))
969 		return -1;
970 
971 	if (vring_idx >= VHOST_MAX_VRING)
972 		return -1;
973 
974 	vq = dev->virtqueue[vring_idx];
975 	if (unlikely(!vq))
976 		return -1;
977 
978 	if (vq_is_packed(dev)) {
979 		if (unlikely(!vq->inflight_packed))
980 			return -1;
981 
982 		vring->inflight_packed = vq->inflight_packed;
983 	} else {
984 		if (unlikely(!vq->inflight_split))
985 			return -1;
986 
987 		vring->inflight_split = vq->inflight_split;
988 	}
989 
990 	vring->resubmit_inflight = vq->resubmit_inflight;
991 
992 	return 0;
993 }
994 
995 int
996 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
997 				  uint16_t idx)
998 {
999 	struct vhost_virtqueue *vq;
1000 	struct virtio_net *dev;
1001 
1002 	dev = get_device(vid);
1003 	if (unlikely(!dev))
1004 		return -1;
1005 
1006 	if (unlikely(!(dev->protocol_features &
1007 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1008 		return 0;
1009 
1010 	if (unlikely(vq_is_packed(dev)))
1011 		return -1;
1012 
1013 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1014 		return -1;
1015 
1016 	vq = dev->virtqueue[vring_idx];
1017 	if (unlikely(!vq))
1018 		return -1;
1019 
1020 	if (unlikely(!vq->inflight_split))
1021 		return -1;
1022 
1023 	if (unlikely(idx >= vq->size))
1024 		return -1;
1025 
1026 	vq->inflight_split->desc[idx].counter = vq->global_counter++;
1027 	vq->inflight_split->desc[idx].inflight = 1;
1028 	return 0;
1029 }
1030 
1031 int
1032 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1033 				   uint16_t head, uint16_t last,
1034 				   uint16_t *inflight_entry)
1035 {
1036 	struct rte_vhost_inflight_info_packed *inflight_info;
1037 	struct virtio_net *dev;
1038 	struct vhost_virtqueue *vq;
1039 	struct vring_packed_desc *desc;
1040 	uint16_t old_free_head, free_head;
1041 
1042 	dev = get_device(vid);
1043 	if (unlikely(!dev))
1044 		return -1;
1045 
1046 	if (unlikely(!(dev->protocol_features &
1047 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1048 		return 0;
1049 
1050 	if (unlikely(!vq_is_packed(dev)))
1051 		return -1;
1052 
1053 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1054 		return -1;
1055 
1056 	vq = dev->virtqueue[vring_idx];
1057 	if (unlikely(!vq))
1058 		return -1;
1059 
1060 	inflight_info = vq->inflight_packed;
1061 	if (unlikely(!inflight_info))
1062 		return -1;
1063 
1064 	if (unlikely(head >= vq->size))
1065 		return -1;
1066 
1067 	desc = vq->desc_packed;
1068 	old_free_head = inflight_info->old_free_head;
1069 	if (unlikely(old_free_head >= vq->size))
1070 		return -1;
1071 
1072 	free_head = old_free_head;
1073 
1074 	/* init header descriptor */
1075 	inflight_info->desc[old_free_head].num = 0;
1076 	inflight_info->desc[old_free_head].counter = vq->global_counter++;
1077 	inflight_info->desc[old_free_head].inflight = 1;
1078 
1079 	/* save desc entry in flight entry */
1080 	while (head != ((last + 1) % vq->size)) {
1081 		inflight_info->desc[old_free_head].num++;
1082 		inflight_info->desc[free_head].addr = desc[head].addr;
1083 		inflight_info->desc[free_head].len = desc[head].len;
1084 		inflight_info->desc[free_head].flags = desc[head].flags;
1085 		inflight_info->desc[free_head].id = desc[head].id;
1086 
1087 		inflight_info->desc[old_free_head].last = free_head;
1088 		free_head = inflight_info->desc[free_head].next;
1089 		inflight_info->free_head = free_head;
1090 		head = (head + 1) % vq->size;
1091 	}
1092 
1093 	inflight_info->old_free_head = free_head;
1094 	*inflight_entry = old_free_head;
1095 
1096 	return 0;
1097 }
1098 
1099 int
1100 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1101 				  uint16_t last_used_idx, uint16_t idx)
1102 {
1103 	struct virtio_net *dev;
1104 	struct vhost_virtqueue *vq;
1105 
1106 	dev = get_device(vid);
1107 	if (unlikely(!dev))
1108 		return -1;
1109 
1110 	if (unlikely(!(dev->protocol_features &
1111 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1112 		return 0;
1113 
1114 	if (unlikely(vq_is_packed(dev)))
1115 		return -1;
1116 
1117 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1118 		return -1;
1119 
1120 	vq = dev->virtqueue[vring_idx];
1121 	if (unlikely(!vq))
1122 		return -1;
1123 
1124 	if (unlikely(!vq->inflight_split))
1125 		return -1;
1126 
1127 	if (unlikely(idx >= vq->size))
1128 		return -1;
1129 
1130 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1131 
1132 	vq->inflight_split->desc[idx].inflight = 0;
1133 
1134 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1135 
1136 	vq->inflight_split->used_idx = last_used_idx;
1137 	return 0;
1138 }
1139 
1140 int
1141 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1142 				   uint16_t head)
1143 {
1144 	struct rte_vhost_inflight_info_packed *inflight_info;
1145 	struct virtio_net *dev;
1146 	struct vhost_virtqueue *vq;
1147 
1148 	dev = get_device(vid);
1149 	if (unlikely(!dev))
1150 		return -1;
1151 
1152 	if (unlikely(!(dev->protocol_features &
1153 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1154 		return 0;
1155 
1156 	if (unlikely(!vq_is_packed(dev)))
1157 		return -1;
1158 
1159 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1160 		return -1;
1161 
1162 	vq = dev->virtqueue[vring_idx];
1163 	if (unlikely(!vq))
1164 		return -1;
1165 
1166 	inflight_info = vq->inflight_packed;
1167 	if (unlikely(!inflight_info))
1168 		return -1;
1169 
1170 	if (unlikely(head >= vq->size))
1171 		return -1;
1172 
1173 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1174 
1175 	inflight_info->desc[head].inflight = 0;
1176 
1177 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1178 
1179 	inflight_info->old_free_head = inflight_info->free_head;
1180 	inflight_info->old_used_idx = inflight_info->used_idx;
1181 	inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1182 
1183 	return 0;
1184 }
1185 
1186 int
1187 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1188 				     uint16_t idx)
1189 {
1190 	struct virtio_net *dev;
1191 	struct vhost_virtqueue *vq;
1192 
1193 	dev = get_device(vid);
1194 	if (unlikely(!dev))
1195 		return -1;
1196 
1197 	if (unlikely(!(dev->protocol_features &
1198 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1199 		return 0;
1200 
1201 	if (unlikely(vq_is_packed(dev)))
1202 		return -1;
1203 
1204 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1205 		return -1;
1206 
1207 	vq = dev->virtqueue[vring_idx];
1208 	if (unlikely(!vq))
1209 		return -1;
1210 
1211 	if (unlikely(!vq->inflight_split))
1212 		return -1;
1213 
1214 	vq->inflight_split->last_inflight_io = idx;
1215 	return 0;
1216 }
1217 
1218 int
1219 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1220 				      uint16_t head)
1221 {
1222 	struct rte_vhost_inflight_info_packed *inflight_info;
1223 	struct virtio_net *dev;
1224 	struct vhost_virtqueue *vq;
1225 	uint16_t last;
1226 
1227 	dev = get_device(vid);
1228 	if (unlikely(!dev))
1229 		return -1;
1230 
1231 	if (unlikely(!(dev->protocol_features &
1232 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1233 		return 0;
1234 
1235 	if (unlikely(!vq_is_packed(dev)))
1236 		return -1;
1237 
1238 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1239 		return -1;
1240 
1241 	vq = dev->virtqueue[vring_idx];
1242 	if (unlikely(!vq))
1243 		return -1;
1244 
1245 	inflight_info = vq->inflight_packed;
1246 	if (unlikely(!inflight_info))
1247 		return -1;
1248 
1249 	if (unlikely(head >= vq->size))
1250 		return -1;
1251 
1252 	last = inflight_info->desc[head].last;
1253 	if (unlikely(last >= vq->size))
1254 		return -1;
1255 
1256 	inflight_info->desc[last].next = inflight_info->free_head;
1257 	inflight_info->free_head = head;
1258 	inflight_info->used_idx += inflight_info->desc[head].num;
1259 	if (inflight_info->used_idx >= inflight_info->desc_num) {
1260 		inflight_info->used_idx -= inflight_info->desc_num;
1261 		inflight_info->used_wrap_counter =
1262 			!inflight_info->used_wrap_counter;
1263 	}
1264 
1265 	return 0;
1266 }
1267 
1268 int
1269 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1270 {
1271 	struct virtio_net *dev;
1272 	struct vhost_virtqueue *vq;
1273 
1274 	dev = get_device(vid);
1275 	if (!dev)
1276 		return -1;
1277 
1278 	if (vring_idx >= VHOST_MAX_VRING)
1279 		return -1;
1280 
1281 	vq = dev->virtqueue[vring_idx];
1282 	if (!vq)
1283 		return -1;
1284 
1285 	if (vq_is_packed(dev))
1286 		vhost_vring_call_packed(dev, vq);
1287 	else
1288 		vhost_vring_call_split(dev, vq);
1289 
1290 	return 0;
1291 }
1292 
1293 uint16_t
1294 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1295 {
1296 	struct virtio_net *dev;
1297 	struct vhost_virtqueue *vq;
1298 	uint16_t ret = 0;
1299 
1300 	dev = get_device(vid);
1301 	if (!dev)
1302 		return 0;
1303 
1304 	if (queue_id >= VHOST_MAX_VRING)
1305 		return 0;
1306 
1307 	vq = dev->virtqueue[queue_id];
1308 	if (!vq)
1309 		return 0;
1310 
1311 	rte_spinlock_lock(&vq->access_lock);
1312 
1313 	if (unlikely(!vq->enabled || vq->avail == NULL))
1314 		goto out;
1315 
1316 	ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1317 
1318 out:
1319 	rte_spinlock_unlock(&vq->access_lock);
1320 	return ret;
1321 }
1322 
1323 static inline int
1324 vhost_enable_notify_split(struct virtio_net *dev,
1325 		struct vhost_virtqueue *vq, int enable)
1326 {
1327 	if (vq->used == NULL)
1328 		return -1;
1329 
1330 	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1331 		if (enable)
1332 			vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1333 		else
1334 			vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1335 	} else {
1336 		if (enable)
1337 			vhost_avail_event(vq) = vq->last_avail_idx;
1338 	}
1339 	return 0;
1340 }
1341 
1342 static inline int
1343 vhost_enable_notify_packed(struct virtio_net *dev,
1344 		struct vhost_virtqueue *vq, int enable)
1345 {
1346 	uint16_t flags;
1347 
1348 	if (vq->device_event == NULL)
1349 		return -1;
1350 
1351 	if (!enable) {
1352 		vq->device_event->flags = VRING_EVENT_F_DISABLE;
1353 		return 0;
1354 	}
1355 
1356 	flags = VRING_EVENT_F_ENABLE;
1357 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1358 		flags = VRING_EVENT_F_DESC;
1359 		vq->device_event->off_wrap = vq->last_avail_idx |
1360 			vq->avail_wrap_counter << 15;
1361 	}
1362 
1363 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
1364 
1365 	vq->device_event->flags = flags;
1366 	return 0;
1367 }
1368 
1369 int
1370 vhost_enable_guest_notification(struct virtio_net *dev,
1371 		struct vhost_virtqueue *vq, int enable)
1372 {
1373 	/*
1374 	 * If the virtqueue is not ready yet, it will be applied
1375 	 * when it will become ready.
1376 	 */
1377 	if (!vq->ready)
1378 		return 0;
1379 
1380 	if (vq_is_packed(dev))
1381 		return vhost_enable_notify_packed(dev, vq, enable);
1382 	else
1383 		return vhost_enable_notify_split(dev, vq, enable);
1384 }
1385 
1386 int
1387 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1388 {
1389 	struct virtio_net *dev = get_device(vid);
1390 	struct vhost_virtqueue *vq;
1391 	int ret;
1392 
1393 	if (!dev)
1394 		return -1;
1395 
1396 	if (queue_id >= VHOST_MAX_VRING)
1397 		return -1;
1398 
1399 	vq = dev->virtqueue[queue_id];
1400 	if (!vq)
1401 		return -1;
1402 
1403 	rte_spinlock_lock(&vq->access_lock);
1404 
1405 	vq->notif_enable = enable;
1406 	ret = vhost_enable_guest_notification(dev, vq, enable);
1407 
1408 	rte_spinlock_unlock(&vq->access_lock);
1409 
1410 	return ret;
1411 }
1412 
1413 void
1414 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1415 {
1416 	struct virtio_net *dev = get_device(vid);
1417 
1418 	if (dev == NULL)
1419 		return;
1420 
1421 	vhost_log_write(dev, addr, len);
1422 }
1423 
1424 void
1425 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1426 			 uint64_t offset, uint64_t len)
1427 {
1428 	struct virtio_net *dev;
1429 	struct vhost_virtqueue *vq;
1430 
1431 	dev = get_device(vid);
1432 	if (dev == NULL)
1433 		return;
1434 
1435 	if (vring_idx >= VHOST_MAX_VRING)
1436 		return;
1437 	vq = dev->virtqueue[vring_idx];
1438 	if (!vq)
1439 		return;
1440 
1441 	vhost_log_used_vring(dev, vq, offset, len);
1442 }
1443 
1444 uint32_t
1445 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1446 {
1447 	struct virtio_net *dev;
1448 	struct vhost_virtqueue *vq;
1449 	uint32_t ret = 0;
1450 
1451 	dev = get_device(vid);
1452 	if (dev == NULL)
1453 		return 0;
1454 
1455 	if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1456 		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1457 			dev->vid, __func__, qid);
1458 		return 0;
1459 	}
1460 
1461 	vq = dev->virtqueue[qid];
1462 	if (vq == NULL)
1463 		return 0;
1464 
1465 	rte_spinlock_lock(&vq->access_lock);
1466 
1467 	if (unlikely(!vq->enabled || vq->avail == NULL))
1468 		goto out;
1469 
1470 	ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1471 
1472 out:
1473 	rte_spinlock_unlock(&vq->access_lock);
1474 	return ret;
1475 }
1476 
1477 struct rte_vdpa_device *
1478 rte_vhost_get_vdpa_device(int vid)
1479 {
1480 	struct virtio_net *dev = get_device(vid);
1481 
1482 	if (dev == NULL)
1483 		return NULL;
1484 
1485 	return dev->vdpa_dev;
1486 }
1487 
1488 int rte_vhost_get_log_base(int vid, uint64_t *log_base,
1489 		uint64_t *log_size)
1490 {
1491 	struct virtio_net *dev = get_device(vid);
1492 
1493 	if (dev == NULL || log_base == NULL || log_size == NULL)
1494 		return -1;
1495 
1496 	*log_base = dev->log_base;
1497 	*log_size = dev->log_size;
1498 
1499 	return 0;
1500 }
1501 
1502 int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1503 		uint16_t *last_avail_idx, uint16_t *last_used_idx)
1504 {
1505 	struct vhost_virtqueue *vq;
1506 	struct virtio_net *dev = get_device(vid);
1507 
1508 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1509 		return -1;
1510 
1511 	if (queue_id >= VHOST_MAX_VRING)
1512 		return -1;
1513 
1514 	vq = dev->virtqueue[queue_id];
1515 	if (!vq)
1516 		return -1;
1517 
1518 	if (vq_is_packed(dev)) {
1519 		*last_avail_idx = (vq->avail_wrap_counter << 15) |
1520 				  vq->last_avail_idx;
1521 		*last_used_idx = (vq->used_wrap_counter << 15) |
1522 				 vq->last_used_idx;
1523 	} else {
1524 		*last_avail_idx = vq->last_avail_idx;
1525 		*last_used_idx = vq->last_used_idx;
1526 	}
1527 
1528 	return 0;
1529 }
1530 
1531 int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1532 		uint16_t last_avail_idx, uint16_t last_used_idx)
1533 {
1534 	struct vhost_virtqueue *vq;
1535 	struct virtio_net *dev = get_device(vid);
1536 
1537 	if (!dev)
1538 		return -1;
1539 
1540 	if (queue_id >= VHOST_MAX_VRING)
1541 		return -1;
1542 
1543 	vq = dev->virtqueue[queue_id];
1544 	if (!vq)
1545 		return -1;
1546 
1547 	if (vq_is_packed(dev)) {
1548 		vq->last_avail_idx = last_avail_idx & 0x7fff;
1549 		vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1550 		vq->last_used_idx = last_used_idx & 0x7fff;
1551 		vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1552 	} else {
1553 		vq->last_avail_idx = last_avail_idx;
1554 		vq->last_used_idx = last_used_idx;
1555 	}
1556 
1557 	return 0;
1558 }
1559 
1560 int
1561 rte_vhost_get_vring_base_from_inflight(int vid,
1562 				       uint16_t queue_id,
1563 				       uint16_t *last_avail_idx,
1564 				       uint16_t *last_used_idx)
1565 {
1566 	struct rte_vhost_inflight_info_packed *inflight_info;
1567 	struct vhost_virtqueue *vq;
1568 	struct virtio_net *dev = get_device(vid);
1569 
1570 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1571 		return -1;
1572 
1573 	if (queue_id >= VHOST_MAX_VRING)
1574 		return -1;
1575 
1576 	vq = dev->virtqueue[queue_id];
1577 	if (!vq)
1578 		return -1;
1579 
1580 	if (!vq_is_packed(dev))
1581 		return -1;
1582 
1583 	inflight_info = vq->inflight_packed;
1584 	if (!inflight_info)
1585 		return -1;
1586 
1587 	*last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1588 			  inflight_info->old_used_idx;
1589 	*last_used_idx = *last_avail_idx;
1590 
1591 	return 0;
1592 }
1593 
1594 int rte_vhost_extern_callback_register(int vid,
1595 		struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1596 {
1597 	struct virtio_net *dev = get_device(vid);
1598 
1599 	if (dev == NULL || ops == NULL)
1600 		return -1;
1601 
1602 	dev->extern_ops = *ops;
1603 	dev->extern_data = ctx;
1604 	return 0;
1605 }
1606 
1607 int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1608 					uint32_t features,
1609 					struct rte_vhost_async_channel_ops *ops)
1610 {
1611 	struct vhost_virtqueue *vq;
1612 	struct virtio_net *dev = get_device(vid);
1613 	struct rte_vhost_async_features f;
1614 	int node;
1615 
1616 	if (dev == NULL || ops == NULL)
1617 		return -1;
1618 
1619 	f.intval = features;
1620 
1621 	if (queue_id >= VHOST_MAX_VRING)
1622 		return -1;
1623 
1624 	vq = dev->virtqueue[queue_id];
1625 
1626 	if (unlikely(vq == NULL || !dev->async_copy))
1627 		return -1;
1628 
1629 	if (unlikely(!f.async_inorder)) {
1630 		VHOST_LOG_CONFIG(ERR,
1631 			"async copy is not supported on non-inorder mode "
1632 			"(vid %d, qid: %d)\n", vid, queue_id);
1633 		return -1;
1634 	}
1635 
1636 	if (unlikely(ops->check_completed_copies == NULL ||
1637 		ops->transfer_data == NULL))
1638 		return -1;
1639 
1640 	rte_spinlock_lock(&vq->access_lock);
1641 
1642 	if (unlikely(vq->async_registered)) {
1643 		VHOST_LOG_CONFIG(ERR,
1644 			"async register failed: channel already registered "
1645 			"(vid %d, qid: %d)\n", vid, queue_id);
1646 		goto reg_out;
1647 	}
1648 
1649 #ifdef RTE_LIBRTE_VHOST_NUMA
1650 	if (get_mempolicy(&node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
1651 		VHOST_LOG_CONFIG(ERR,
1652 			"unable to get numa information in async register. "
1653 			"allocating async buffer memory on the caller thread node\n");
1654 		node = SOCKET_ID_ANY;
1655 	}
1656 #else
1657 	node = SOCKET_ID_ANY;
1658 #endif
1659 
1660 	vq->async_pkts_info = rte_malloc_socket(NULL,
1661 			vq->size * sizeof(struct async_inflight_info),
1662 			RTE_CACHE_LINE_SIZE, node);
1663 	if (!vq->async_pkts_info) {
1664 		vhost_free_async_mem(vq);
1665 		VHOST_LOG_CONFIG(ERR,
1666 			"async register failed: cannot allocate memory for async_pkts_info "
1667 			"(vid %d, qid: %d)\n", vid, queue_id);
1668 		goto reg_out;
1669 	}
1670 
1671 	vq->it_pool = rte_malloc_socket(NULL,
1672 			VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
1673 			RTE_CACHE_LINE_SIZE, node);
1674 	if (!vq->it_pool) {
1675 		vhost_free_async_mem(vq);
1676 		VHOST_LOG_CONFIG(ERR,
1677 			"async register failed: cannot allocate memory for it_pool "
1678 			"(vid %d, qid: %d)\n", vid, queue_id);
1679 		goto reg_out;
1680 	}
1681 
1682 	vq->vec_pool = rte_malloc_socket(NULL,
1683 			VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
1684 			RTE_CACHE_LINE_SIZE, node);
1685 	if (!vq->vec_pool) {
1686 		vhost_free_async_mem(vq);
1687 		VHOST_LOG_CONFIG(ERR,
1688 			"async register failed: cannot allocate memory for vec_pool "
1689 			"(vid %d, qid: %d)\n", vid, queue_id);
1690 		goto reg_out;
1691 	}
1692 
1693 	if (vq_is_packed(dev)) {
1694 		vq->async_buffers_packed = rte_malloc_socket(NULL,
1695 			vq->size * sizeof(struct vring_used_elem_packed),
1696 			RTE_CACHE_LINE_SIZE, node);
1697 		if (!vq->async_buffers_packed) {
1698 			vhost_free_async_mem(vq);
1699 			VHOST_LOG_CONFIG(ERR,
1700 				"async register failed: cannot allocate memory for async buffers "
1701 				"(vid %d, qid: %d)\n", vid, queue_id);
1702 			goto reg_out;
1703 		}
1704 	} else {
1705 		vq->async_descs_split = rte_malloc_socket(NULL,
1706 			vq->size * sizeof(struct vring_used_elem),
1707 			RTE_CACHE_LINE_SIZE, node);
1708 		if (!vq->async_descs_split) {
1709 			vhost_free_async_mem(vq);
1710 			VHOST_LOG_CONFIG(ERR,
1711 				"async register failed: cannot allocate memory for async descs "
1712 				"(vid %d, qid: %d)\n", vid, queue_id);
1713 			goto reg_out;
1714 		}
1715 	}
1716 
1717 	vq->async_ops.check_completed_copies = ops->check_completed_copies;
1718 	vq->async_ops.transfer_data = ops->transfer_data;
1719 
1720 	vq->async_inorder = f.async_inorder;
1721 	vq->async_threshold = f.async_threshold;
1722 
1723 	vq->async_registered = true;
1724 
1725 reg_out:
1726 	rte_spinlock_unlock(&vq->access_lock);
1727 
1728 	return 0;
1729 }
1730 
1731 int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1732 {
1733 	struct vhost_virtqueue *vq;
1734 	struct virtio_net *dev = get_device(vid);
1735 	int ret = -1;
1736 
1737 	if (dev == NULL)
1738 		return ret;
1739 
1740 	if (queue_id >= VHOST_MAX_VRING)
1741 		return ret;
1742 
1743 	vq = dev->virtqueue[queue_id];
1744 
1745 	if (vq == NULL)
1746 		return ret;
1747 
1748 	ret = 0;
1749 
1750 	if (!vq->async_registered)
1751 		return ret;
1752 
1753 	if (!rte_spinlock_trylock(&vq->access_lock)) {
1754 		VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1755 			"virt queue busy.\n");
1756 		return -1;
1757 	}
1758 
1759 	if (vq->async_pkts_inflight_n) {
1760 		VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1761 			"async inflight packets must be completed before unregistration.\n");
1762 		ret = -1;
1763 		goto out;
1764 	}
1765 
1766 	vhost_free_async_mem(vq);
1767 
1768 	vq->async_ops.transfer_data = NULL;
1769 	vq->async_ops.check_completed_copies = NULL;
1770 	vq->async_registered = false;
1771 
1772 out:
1773 	rte_spinlock_unlock(&vq->access_lock);
1774 
1775 	return ret;
1776 }
1777 
1778 RTE_LOG_REGISTER(vhost_config_log_level, lib.vhost.config, INFO);
1779 RTE_LOG_REGISTER(vhost_data_log_level, lib.vhost.data, WARNING);
1780