xref: /dpdk/lib/vhost/vhost.c (revision 15677ca2c751b3be2f02429bb006d859dccae0c0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <pthread.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14 
15 #include <rte_errno.h>
16 #include <rte_log.h>
17 #include <rte_memory.h>
18 #include <rte_malloc.h>
19 #include <rte_vhost.h>
20 
21 #include "iotlb.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24 
25 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
26 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
27 pthread_mutex_t vhost_dma_lock = PTHREAD_MUTEX_INITIALIZER;
28 
29 struct vhost_vq_stats_name_off {
30 	char name[RTE_VHOST_STATS_NAME_SIZE];
31 	unsigned int offset;
32 };
33 
34 static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
35 	{"good_packets",           offsetof(struct vhost_virtqueue, stats.packets)},
36 	{"good_bytes",             offsetof(struct vhost_virtqueue, stats.bytes)},
37 	{"multicast_packets",      offsetof(struct vhost_virtqueue, stats.multicast)},
38 	{"broadcast_packets",      offsetof(struct vhost_virtqueue, stats.broadcast)},
39 	{"undersize_packets",      offsetof(struct vhost_virtqueue, stats.size_bins[0])},
40 	{"size_64_packets",        offsetof(struct vhost_virtqueue, stats.size_bins[1])},
41 	{"size_65_127_packets",    offsetof(struct vhost_virtqueue, stats.size_bins[2])},
42 	{"size_128_255_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[3])},
43 	{"size_256_511_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[4])},
44 	{"size_512_1023_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[5])},
45 	{"size_1024_1518_packets", offsetof(struct vhost_virtqueue, stats.size_bins[6])},
46 	{"size_1519_max_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[7])},
47 	{"guest_notifications",    offsetof(struct vhost_virtqueue, stats.guest_notifications)},
48 	{"guest_notifications_offloaded", offsetof(struct vhost_virtqueue,
49 		stats.guest_notifications_offloaded)},
50 	{"guest_notifications_error", offsetof(struct vhost_virtqueue,
51 		stats.guest_notifications_error)},
52 	{"guest_notifications_suppressed", offsetof(struct vhost_virtqueue,
53 		stats.guest_notifications_suppressed)},
54 	{"iotlb_hits",             offsetof(struct vhost_virtqueue, stats.iotlb_hits)},
55 	{"iotlb_misses",           offsetof(struct vhost_virtqueue, stats.iotlb_misses)},
56 	{"inflight_submitted",     offsetof(struct vhost_virtqueue, stats.inflight_submitted)},
57 	{"inflight_completed",     offsetof(struct vhost_virtqueue, stats.inflight_completed)},
58 	{"mbuf_alloc_failed",      offsetof(struct vhost_virtqueue, stats.mbuf_alloc_failed)},
59 };
60 
61 #define VHOST_NB_VQ_STATS RTE_DIM(vhost_vq_stat_strings)
62 
63 static int
64 vhost_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
65 {
66 	return dev->backend_ops->iotlb_miss(dev, iova, perm);
67 }
68 
69 uint64_t
70 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
71 		    uint64_t iova, uint64_t *size, uint8_t perm)
72 {
73 	uint64_t vva, tmp_size;
74 
75 	if (unlikely(!*size))
76 		return 0;
77 
78 	tmp_size = *size;
79 
80 	vva = vhost_user_iotlb_cache_find(dev, iova, &tmp_size, perm);
81 	if (tmp_size == *size) {
82 		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
83 			vq->stats.iotlb_hits++;
84 		return vva;
85 	}
86 
87 	if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
88 		vq->stats.iotlb_misses++;
89 
90 	iova += tmp_size;
91 
92 	if (!vhost_user_iotlb_pending_miss(dev, iova, perm)) {
93 		/*
94 		 * iotlb_lock is read-locked for a full burst,
95 		 * but it only protects the iotlb cache.
96 		 * In case of IOTLB miss, we might block on the socket,
97 		 * which could cause a deadlock with QEMU if an IOTLB update
98 		 * is being handled. We can safely unlock here to avoid it.
99 		 */
100 		vhost_user_iotlb_rd_unlock(vq);
101 
102 		vhost_user_iotlb_pending_insert(dev, iova, perm);
103 		if (vhost_iotlb_miss(dev, iova, perm)) {
104 			VHOST_DATA_LOG(dev->ifname, ERR,
105 				"IOTLB miss req failed for IOVA 0x%" PRIx64,
106 				iova);
107 			vhost_user_iotlb_pending_remove(dev, iova, 1, perm);
108 		}
109 
110 		vhost_user_iotlb_rd_lock(vq);
111 	}
112 
113 	tmp_size = *size;
114 	/* Retry in case of VDUSE, as it is synchronous */
115 	vva = vhost_user_iotlb_cache_find(dev, iova, &tmp_size, perm);
116 	if (tmp_size == *size)
117 		return vva;
118 
119 	return 0;
120 }
121 
122 #define VHOST_LOG_PAGE	4096
123 
124 /*
125  * Atomically set a bit in memory.
126  */
127 static __rte_always_inline void
128 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
129 {
130 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
131 	/*
132 	 * __sync_ built-ins are deprecated, but rte_atomic_ ones
133 	 * are sub-optimized in older GCC versions.
134 	 */
135 	__sync_fetch_and_or_1(addr, (1U << nr));
136 #else
137 	rte_atomic_fetch_or_explicit((volatile uint8_t __rte_atomic *)addr, (1U << nr),
138 		rte_memory_order_relaxed);
139 #endif
140 }
141 
142 static __rte_always_inline void
143 vhost_log_page(uint8_t *log_base, uint64_t page)
144 {
145 	vhost_set_bit(page % 8, &log_base[page / 8]);
146 }
147 
148 void
149 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
150 {
151 	uint64_t page;
152 
153 	if (unlikely(!dev->log_base || !len))
154 		return;
155 
156 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
157 		return;
158 
159 	/* To make sure guest memory updates are committed before logging */
160 	rte_atomic_thread_fence(rte_memory_order_release);
161 
162 	page = addr / VHOST_LOG_PAGE;
163 	while (page * VHOST_LOG_PAGE < addr + len) {
164 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
165 		page += 1;
166 	}
167 }
168 
169 void
170 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
171 			     uint64_t iova, uint64_t len)
172 {
173 	uint64_t hva, gpa, map_len;
174 	map_len = len;
175 
176 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
177 	if (map_len != len) {
178 		VHOST_DATA_LOG(dev->ifname, ERR,
179 			"failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found",
180 			iova);
181 		return;
182 	}
183 
184 	gpa = hva_to_gpa(dev, hva, len);
185 	if (gpa)
186 		__vhost_log_write(dev, gpa, len);
187 }
188 
189 void
190 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
191 {
192 	unsigned long *log_base;
193 	int i;
194 
195 	if (unlikely(!dev->log_base))
196 		return;
197 
198 	/* No cache, nothing to sync */
199 	if (unlikely(!vq->log_cache))
200 		return;
201 
202 	rte_atomic_thread_fence(rte_memory_order_release);
203 
204 	log_base = (unsigned long *)(uintptr_t)dev->log_base;
205 
206 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
207 		struct log_cache_entry *elem = vq->log_cache + i;
208 
209 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
210 		/*
211 		 * '__sync' builtins are deprecated, but 'rte_atomic' ones
212 		 * are sub-optimized in older GCC versions.
213 		 */
214 		__sync_fetch_and_or(log_base + elem->offset, elem->val);
215 #else
216 		rte_atomic_fetch_or_explicit(
217 			(unsigned long __rte_atomic *)(log_base + elem->offset),
218 			elem->val, rte_memory_order_relaxed);
219 #endif
220 	}
221 
222 	rte_atomic_thread_fence(rte_memory_order_release);
223 
224 	vq->log_cache_nb_elem = 0;
225 }
226 
227 static __rte_always_inline void
228 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
229 			uint64_t page)
230 {
231 	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
232 	uint32_t offset = page / (sizeof(unsigned long) << 3);
233 	int i;
234 
235 	if (unlikely(!vq->log_cache)) {
236 		/* No logging cache allocated, write dirty log map directly */
237 		rte_atomic_thread_fence(rte_memory_order_release);
238 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
239 
240 		return;
241 	}
242 
243 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
244 		struct log_cache_entry *elem = vq->log_cache + i;
245 
246 		if (elem->offset == offset) {
247 			elem->val |= (1UL << bit_nr);
248 			return;
249 		}
250 	}
251 
252 	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
253 		/*
254 		 * No more room for a new log cache entry,
255 		 * so write the dirty log map directly.
256 		 */
257 		rte_atomic_thread_fence(rte_memory_order_release);
258 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
259 
260 		return;
261 	}
262 
263 	vq->log_cache[i].offset = offset;
264 	vq->log_cache[i].val = (1UL << bit_nr);
265 	vq->log_cache_nb_elem++;
266 }
267 
268 void
269 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
270 			uint64_t addr, uint64_t len)
271 {
272 	uint64_t page;
273 
274 	if (unlikely(!dev->log_base || !len))
275 		return;
276 
277 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
278 		return;
279 
280 	page = addr / VHOST_LOG_PAGE;
281 	while (page * VHOST_LOG_PAGE < addr + len) {
282 		vhost_log_cache_page(dev, vq, page);
283 		page += 1;
284 	}
285 }
286 
287 void
288 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
289 			     uint64_t iova, uint64_t len)
290 {
291 	uint64_t hva, gpa, map_len;
292 	map_len = len;
293 
294 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
295 	if (map_len != len) {
296 		VHOST_DATA_LOG(dev->ifname, ERR,
297 			"failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found",
298 			iova);
299 		return;
300 	}
301 
302 	gpa = hva_to_gpa(dev, hva, len);
303 	if (gpa)
304 		__vhost_log_cache_write(dev, vq, gpa, len);
305 }
306 
307 void *
308 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
309 		uint64_t desc_addr, uint64_t desc_len)
310 {
311 	void *idesc;
312 	uint64_t src, dst;
313 	uint64_t len, remain = desc_len;
314 
315 	idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
316 	if (unlikely(!idesc))
317 		return NULL;
318 
319 	dst = (uint64_t)(uintptr_t)idesc;
320 
321 	while (remain) {
322 		len = remain;
323 		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
324 				VHOST_ACCESS_RO);
325 		if (unlikely(!src || !len)) {
326 			rte_free(idesc);
327 			return NULL;
328 		}
329 
330 		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
331 
332 		remain -= len;
333 		dst += len;
334 		desc_addr += len;
335 	}
336 
337 	return idesc;
338 }
339 
340 void
341 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
342 {
343 	if ((vq->callfd >= 0) && (destroy != 0))
344 		close(vq->callfd);
345 	if (vq->kickfd >= 0)
346 		close(vq->kickfd);
347 }
348 
349 void
350 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
351 {
352 	if (!(dev->protocol_features &
353 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
354 		return;
355 
356 	if (vq_is_packed(dev)) {
357 		if (vq->inflight_packed)
358 			vq->inflight_packed = NULL;
359 	} else {
360 		if (vq->inflight_split)
361 			vq->inflight_split = NULL;
362 	}
363 
364 	if (vq->resubmit_inflight) {
365 		if (vq->resubmit_inflight->resubmit_list) {
366 			rte_free(vq->resubmit_inflight->resubmit_list);
367 			vq->resubmit_inflight->resubmit_list = NULL;
368 		}
369 		rte_free(vq->resubmit_inflight);
370 		vq->resubmit_inflight = NULL;
371 	}
372 }
373 
374 /*
375  * Unmap any memory, close any file descriptors and
376  * free any memory owned by a device.
377  */
378 void
379 cleanup_device(struct virtio_net *dev, int destroy)
380 {
381 	uint32_t i;
382 
383 	vhost_backend_cleanup(dev);
384 
385 	for (i = 0; i < dev->nr_vring; i++) {
386 		cleanup_vq(dev->virtqueue[i], destroy);
387 		cleanup_vq_inflight(dev, dev->virtqueue[i]);
388 	}
389 }
390 
391 static void
392 vhost_free_async_mem(struct vhost_virtqueue *vq)
393 	__rte_exclusive_locks_required(&vq->access_lock)
394 {
395 	if (!vq->async)
396 		return;
397 
398 	rte_free(vq->async->pkts_info);
399 	rte_free(vq->async->pkts_cmpl_flag);
400 
401 	rte_free(vq->async->buffers_packed);
402 	vq->async->buffers_packed = NULL;
403 	rte_free(vq->async->descs_split);
404 	vq->async->descs_split = NULL;
405 
406 	rte_free(vq->async);
407 	vq->async = NULL;
408 }
409 
410 void
411 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
412 {
413 	if (vq_is_packed(dev))
414 		rte_free(vq->shadow_used_packed);
415 	else
416 		rte_free(vq->shadow_used_split);
417 
418 	rte_rwlock_write_lock(&vq->access_lock);
419 	vhost_free_async_mem(vq);
420 	rte_rwlock_write_unlock(&vq->access_lock);
421 	rte_free(vq->batch_copy_elems);
422 	rte_free(vq->log_cache);
423 	rte_free(vq);
424 }
425 
426 /*
427  * Release virtqueues and device memory.
428  */
429 static void
430 free_device(struct virtio_net *dev)
431 {
432 	uint32_t i;
433 
434 	for (i = 0; i < dev->nr_vring; i++)
435 		free_vq(dev, dev->virtqueue[i]);
436 
437 	rte_free(dev);
438 }
439 
440 static __rte_always_inline int
441 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
442 	__rte_shared_locks_required(&vq->iotlb_lock)
443 {
444 	if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
445 		return 0;
446 
447 	vq->log_guest_addr = translate_log_addr(dev, vq,
448 						vq->ring_addrs.log_guest_addr);
449 	if (vq->log_guest_addr == 0)
450 		return -1;
451 
452 	return 0;
453 }
454 
455 /*
456  * Converts vring log address to GPA
457  * If IOMMU is enabled, the log address is IOVA
458  * If IOMMU not enabled, the log address is already GPA
459  */
460 uint64_t
461 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
462 		uint64_t log_addr)
463 {
464 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
465 		const uint64_t exp_size = sizeof(uint64_t);
466 		uint64_t hva, gpa;
467 		uint64_t size = exp_size;
468 
469 		hva = vhost_iova_to_vva(dev, vq, log_addr,
470 					&size, VHOST_ACCESS_RW);
471 
472 		if (size != exp_size)
473 			return 0;
474 
475 		gpa = hva_to_gpa(dev, hva, exp_size);
476 		if (!gpa) {
477 			VHOST_DATA_LOG(dev->ifname, ERR,
478 				"failed to find GPA for log_addr: 0x%"
479 				PRIx64 " hva: 0x%" PRIx64,
480 				log_addr, hva);
481 			return 0;
482 		}
483 		return gpa;
484 
485 	} else
486 		return log_addr;
487 }
488 
489 static int
490 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
491 	__rte_shared_locks_required(&vq->iotlb_lock)
492 {
493 	uint64_t req_size, size;
494 
495 	req_size = sizeof(struct vring_desc) * vq->size;
496 	size = req_size;
497 	vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
498 						vq->ring_addrs.desc_user_addr,
499 						&size, VHOST_ACCESS_RW);
500 	if (!vq->desc || size != req_size)
501 		return -1;
502 
503 	req_size = sizeof(struct vring_avail);
504 	req_size += sizeof(uint16_t) * vq->size;
505 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
506 		req_size += sizeof(uint16_t);
507 	size = req_size;
508 	vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
509 						vq->ring_addrs.avail_user_addr,
510 						&size, VHOST_ACCESS_RW);
511 	if (!vq->avail || size != req_size)
512 		return -1;
513 
514 	req_size = sizeof(struct vring_used);
515 	req_size += sizeof(struct vring_used_elem) * vq->size;
516 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
517 		req_size += sizeof(uint16_t);
518 	size = req_size;
519 	vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
520 						vq->ring_addrs.used_user_addr,
521 						&size, VHOST_ACCESS_RW);
522 	if (!vq->used || size != req_size)
523 		return -1;
524 
525 	return 0;
526 }
527 
528 static int
529 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
530 	__rte_shared_locks_required(&vq->iotlb_lock)
531 {
532 	uint64_t req_size, size;
533 
534 	req_size = sizeof(struct vring_packed_desc) * vq->size;
535 	size = req_size;
536 	vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
537 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
538 				&size, VHOST_ACCESS_RW);
539 	if (!vq->desc_packed || size != req_size)
540 		return -1;
541 
542 	req_size = sizeof(struct vring_packed_desc_event);
543 	size = req_size;
544 	vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
545 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
546 				&size, VHOST_ACCESS_RW);
547 	if (!vq->driver_event || size != req_size)
548 		return -1;
549 
550 	req_size = sizeof(struct vring_packed_desc_event);
551 	size = req_size;
552 	vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
553 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
554 				&size, VHOST_ACCESS_RW);
555 	if (!vq->device_event || size != req_size)
556 		return -1;
557 
558 	return 0;
559 }
560 
561 int
562 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
563 {
564 
565 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
566 		return -1;
567 
568 	if (vq_is_packed(dev)) {
569 		if (vring_translate_packed(dev, vq) < 0)
570 			return -1;
571 	} else {
572 		if (vring_translate_split(dev, vq) < 0)
573 			return -1;
574 	}
575 
576 	if (log_translate(dev, vq) < 0)
577 		return -1;
578 
579 	vq->access_ok = true;
580 
581 	return 0;
582 }
583 
584 void
585 vring_invalidate(struct virtio_net *dev __rte_unused, struct vhost_virtqueue *vq)
586 {
587 	vhost_user_iotlb_wr_lock(vq);
588 
589 	vq->access_ok = false;
590 	vq->desc = NULL;
591 	vq->avail = NULL;
592 	vq->used = NULL;
593 	vq->log_guest_addr = 0;
594 
595 	vhost_user_iotlb_wr_unlock(vq);
596 }
597 
598 static void
599 init_vring_queue(struct virtio_net *dev __rte_unused, struct vhost_virtqueue *vq,
600 	uint32_t vring_idx)
601 {
602 	int numa_node = SOCKET_ID_ANY;
603 
604 	memset(vq, 0, sizeof(struct vhost_virtqueue));
605 
606 	vq->index = vring_idx;
607 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
608 	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
609 	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
610 
611 #ifdef RTE_LIBRTE_VHOST_NUMA
612 	if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
613 		VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to query numa node: %s",
614 			rte_strerror(errno));
615 		numa_node = SOCKET_ID_ANY;
616 	}
617 #endif
618 	vq->numa_node = numa_node;
619 }
620 
621 static void
622 reset_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq)
623 {
624 	int callfd;
625 
626 	callfd = vq->callfd;
627 	init_vring_queue(dev, vq, vq->index);
628 	vq->callfd = callfd;
629 }
630 
631 int
632 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
633 {
634 	struct vhost_virtqueue *vq;
635 	uint32_t i;
636 
637 	/* Also allocate holes, if any, up to requested vring index. */
638 	for (i = 0; i <= vring_idx; i++) {
639 		if (dev->virtqueue[i])
640 			continue;
641 
642 		vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
643 		if (vq == NULL) {
644 			VHOST_CONFIG_LOG(dev->ifname, ERR,
645 				"failed to allocate memory for vring %u.",
646 				i);
647 			return -1;
648 		}
649 
650 		dev->virtqueue[i] = vq;
651 		init_vring_queue(dev, vq, i);
652 		rte_rwlock_init(&vq->access_lock);
653 		rte_rwlock_init(&vq->iotlb_lock);
654 		vq->avail_wrap_counter = 1;
655 		vq->used_wrap_counter = 1;
656 		vq->signalled_used_valid = false;
657 	}
658 
659 	dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
660 
661 	return 0;
662 }
663 
664 /*
665  * Reset some variables in device structure, while keeping few
666  * others untouched, such as vid, ifname, nr_vring: they
667  * should be same unless the device is removed.
668  */
669 void
670 reset_device(struct virtio_net *dev)
671 {
672 	uint32_t i;
673 
674 	dev->features = 0;
675 	dev->protocol_features = 0;
676 	dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
677 
678 	for (i = 0; i < dev->nr_vring; i++) {
679 		struct vhost_virtqueue *vq = dev->virtqueue[i];
680 
681 		if (!vq) {
682 			VHOST_CONFIG_LOG(dev->ifname, ERR,
683 				"failed to reset vring, virtqueue not allocated (%d)", i);
684 			continue;
685 		}
686 		reset_vring_queue(dev, vq);
687 	}
688 }
689 
690 /*
691  * Invoked when there is a new vhost-user connection established (when
692  * there is a new virtio device being attached).
693  */
694 int
695 vhost_new_device(struct vhost_backend_ops *ops)
696 {
697 	struct virtio_net *dev;
698 	int i;
699 
700 	if (ops == NULL) {
701 		VHOST_CONFIG_LOG("device", ERR, "missing backend ops.");
702 		return -1;
703 	}
704 
705 	if (ops->iotlb_miss == NULL) {
706 		VHOST_CONFIG_LOG("device", ERR, "missing IOTLB miss backend op.");
707 		return -1;
708 	}
709 
710 	if (ops->inject_irq == NULL) {
711 		VHOST_CONFIG_LOG("device", ERR, "missing IRQ injection backend op.");
712 		return -1;
713 	}
714 
715 	pthread_mutex_lock(&vhost_dev_lock);
716 	for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
717 		if (vhost_devices[i] == NULL)
718 			break;
719 	}
720 
721 	if (i == RTE_MAX_VHOST_DEVICE) {
722 		VHOST_CONFIG_LOG("device", ERR, "failed to find a free slot for new device.");
723 		pthread_mutex_unlock(&vhost_dev_lock);
724 		return -1;
725 	}
726 
727 	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
728 	if (dev == NULL) {
729 		VHOST_CONFIG_LOG("device", ERR, "failed to allocate memory for new device.");
730 		pthread_mutex_unlock(&vhost_dev_lock);
731 		return -1;
732 	}
733 
734 	vhost_devices[i] = dev;
735 	pthread_mutex_unlock(&vhost_dev_lock);
736 
737 	dev->vid = i;
738 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
739 	dev->backend_req_fd = -1;
740 	dev->postcopy_ufd = -1;
741 	rte_spinlock_init(&dev->backend_req_lock);
742 	dev->backend_ops = ops;
743 
744 	return i;
745 }
746 
747 void
748 vhost_destroy_device_notify(struct virtio_net *dev)
749 {
750 	struct rte_vdpa_device *vdpa_dev;
751 
752 	if (dev->flags & VIRTIO_DEV_RUNNING) {
753 		vdpa_dev = dev->vdpa_dev;
754 		if (vdpa_dev)
755 			vdpa_dev->ops->dev_close(dev->vid);
756 		dev->flags &= ~VIRTIO_DEV_RUNNING;
757 		dev->notify_ops->destroy_device(dev->vid);
758 	}
759 }
760 
761 /*
762  * Invoked when there is the vhost-user connection is broken (when
763  * the virtio device is being detached).
764  */
765 void
766 vhost_destroy_device(int vid)
767 {
768 	struct virtio_net *dev = get_device(vid);
769 
770 	if (dev == NULL)
771 		return;
772 
773 	vhost_destroy_device_notify(dev);
774 
775 	cleanup_device(dev, 1);
776 	free_device(dev);
777 
778 	vhost_devices[vid] = NULL;
779 }
780 
781 void
782 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
783 {
784 	struct virtio_net *dev = get_device(vid);
785 
786 	if (dev == NULL)
787 		return;
788 
789 	dev->vdpa_dev = vdpa_dev;
790 }
791 
792 void
793 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
794 {
795 	struct virtio_net *dev;
796 	unsigned int len;
797 
798 	dev = get_device(vid);
799 	if (dev == NULL)
800 		return;
801 
802 	len = if_len > sizeof(dev->ifname) ?
803 		sizeof(dev->ifname) : if_len;
804 
805 	strncpy(dev->ifname, if_name, len);
806 	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
807 }
808 
809 void
810 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags, bool stats_enabled,
811 	bool support_iommu)
812 {
813 	struct virtio_net *dev = get_device(vid);
814 
815 	if (dev == NULL)
816 		return;
817 
818 	if (enable)
819 		dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
820 	else
821 		dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
822 	if (!compliant_ol_flags)
823 		dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
824 	else
825 		dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
826 	if (stats_enabled)
827 		dev->flags |= VIRTIO_DEV_STATS_ENABLED;
828 	else
829 		dev->flags &= ~VIRTIO_DEV_STATS_ENABLED;
830 	if (support_iommu)
831 		dev->flags |= VIRTIO_DEV_SUPPORT_IOMMU;
832 	else
833 		dev->flags &= ~VIRTIO_DEV_SUPPORT_IOMMU;
834 
835 	if (vhost_user_iotlb_init(dev) < 0)
836 		VHOST_CONFIG_LOG("device", ERR, "failed to init IOTLB");
837 
838 }
839 
840 void
841 vhost_enable_extbuf(int vid)
842 {
843 	struct virtio_net *dev = get_device(vid);
844 
845 	if (dev == NULL)
846 		return;
847 
848 	dev->extbuf = 1;
849 }
850 
851 void
852 vhost_enable_linearbuf(int vid)
853 {
854 	struct virtio_net *dev = get_device(vid);
855 
856 	if (dev == NULL)
857 		return;
858 
859 	dev->linearbuf = 1;
860 }
861 
862 int
863 rte_vhost_get_mtu(int vid, uint16_t *mtu)
864 {
865 	struct virtio_net *dev = get_device(vid);
866 
867 	if (dev == NULL || mtu == NULL)
868 		return -ENODEV;
869 
870 	if (!(dev->flags & VIRTIO_DEV_READY))
871 		return -EAGAIN;
872 
873 	if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
874 		return -ENOTSUP;
875 
876 	*mtu = dev->mtu;
877 
878 	return 0;
879 }
880 
881 int
882 rte_vhost_get_numa_node(int vid)
883 {
884 #ifdef RTE_LIBRTE_VHOST_NUMA
885 	struct virtio_net *dev = get_device(vid);
886 	int numa_node;
887 	int ret;
888 
889 	if (dev == NULL || numa_available() != 0)
890 		return -1;
891 
892 	ret = get_mempolicy(&numa_node, NULL, 0, dev,
893 			    MPOL_F_NODE | MPOL_F_ADDR);
894 	if (ret < 0) {
895 		VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to query numa node: %s",
896 			rte_strerror(errno));
897 		return -1;
898 	}
899 
900 	return numa_node;
901 #else
902 	RTE_SET_USED(vid);
903 	return -1;
904 #endif
905 }
906 
907 uint16_t
908 rte_vhost_get_vring_num(int vid)
909 {
910 	struct virtio_net *dev = get_device(vid);
911 
912 	if (dev == NULL)
913 		return 0;
914 
915 	return dev->nr_vring;
916 }
917 
918 int
919 rte_vhost_get_ifname(int vid, char *buf, size_t len)
920 {
921 	struct virtio_net *dev = get_device(vid);
922 
923 	if (dev == NULL || buf == NULL)
924 		return -1;
925 
926 	len = RTE_MIN(len, sizeof(dev->ifname));
927 
928 	strncpy(buf, dev->ifname, len);
929 	buf[len - 1] = '\0';
930 
931 	return 0;
932 }
933 
934 int
935 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
936 {
937 	struct virtio_net *dev;
938 
939 	dev = get_device(vid);
940 	if (dev == NULL || features == NULL)
941 		return -1;
942 
943 	*features = dev->features;
944 	return 0;
945 }
946 
947 int
948 rte_vhost_get_negotiated_protocol_features(int vid,
949 					   uint64_t *protocol_features)
950 {
951 	struct virtio_net *dev;
952 
953 	dev = get_device(vid);
954 	if (dev == NULL || protocol_features == NULL)
955 		return -1;
956 
957 	*protocol_features = dev->protocol_features;
958 	return 0;
959 }
960 
961 int
962 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
963 {
964 	struct virtio_net *dev;
965 	struct rte_vhost_memory *m;
966 	size_t size;
967 
968 	dev = get_device(vid);
969 	if (dev == NULL || mem == NULL)
970 		return -1;
971 
972 	size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
973 	m = malloc(sizeof(struct rte_vhost_memory) + size);
974 	if (!m)
975 		return -1;
976 
977 	m->nregions = dev->mem->nregions;
978 	memcpy(m->regions, dev->mem->regions, size);
979 	*mem = m;
980 
981 	return 0;
982 }
983 
984 int
985 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
986 			  struct rte_vhost_vring *vring)
987 {
988 	struct virtio_net *dev;
989 	struct vhost_virtqueue *vq;
990 
991 	dev = get_device(vid);
992 	if (dev == NULL || vring == NULL)
993 		return -1;
994 
995 	if (vring_idx >= VHOST_MAX_VRING)
996 		return -1;
997 
998 	vq = dev->virtqueue[vring_idx];
999 	if (!vq)
1000 		return -1;
1001 
1002 	if (vq_is_packed(dev)) {
1003 		vring->desc_packed = vq->desc_packed;
1004 		vring->driver_event = vq->driver_event;
1005 		vring->device_event = vq->device_event;
1006 	} else {
1007 		vring->desc = vq->desc;
1008 		vring->avail = vq->avail;
1009 		vring->used = vq->used;
1010 	}
1011 	vring->log_guest_addr  = vq->log_guest_addr;
1012 
1013 	vring->callfd  = vq->callfd;
1014 	vring->kickfd  = vq->kickfd;
1015 	vring->size    = vq->size;
1016 
1017 	return 0;
1018 }
1019 
1020 int
1021 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
1022 				  struct rte_vhost_ring_inflight *vring)
1023 {
1024 	struct virtio_net *dev;
1025 	struct vhost_virtqueue *vq;
1026 
1027 	dev = get_device(vid);
1028 	if (unlikely(!dev))
1029 		return -1;
1030 
1031 	if (vring_idx >= VHOST_MAX_VRING)
1032 		return -1;
1033 
1034 	vq = dev->virtqueue[vring_idx];
1035 	if (unlikely(!vq))
1036 		return -1;
1037 
1038 	if (vq_is_packed(dev)) {
1039 		if (unlikely(!vq->inflight_packed))
1040 			return -1;
1041 
1042 		vring->inflight_packed = vq->inflight_packed;
1043 	} else {
1044 		if (unlikely(!vq->inflight_split))
1045 			return -1;
1046 
1047 		vring->inflight_split = vq->inflight_split;
1048 	}
1049 
1050 	vring->resubmit_inflight = vq->resubmit_inflight;
1051 
1052 	return 0;
1053 }
1054 
1055 int
1056 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1057 				  uint16_t idx)
1058 {
1059 	struct vhost_virtqueue *vq;
1060 	struct virtio_net *dev;
1061 
1062 	dev = get_device(vid);
1063 	if (unlikely(!dev))
1064 		return -1;
1065 
1066 	if (unlikely(!(dev->protocol_features &
1067 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1068 		return 0;
1069 
1070 	if (unlikely(vq_is_packed(dev)))
1071 		return -1;
1072 
1073 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1074 		return -1;
1075 
1076 	vq = dev->virtqueue[vring_idx];
1077 	if (unlikely(!vq))
1078 		return -1;
1079 
1080 	if (unlikely(!vq->inflight_split))
1081 		return -1;
1082 
1083 	if (unlikely(idx >= vq->size))
1084 		return -1;
1085 
1086 	vq->inflight_split->desc[idx].counter = vq->global_counter++;
1087 	vq->inflight_split->desc[idx].inflight = 1;
1088 	return 0;
1089 }
1090 
1091 int
1092 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1093 				   uint16_t head, uint16_t last,
1094 				   uint16_t *inflight_entry)
1095 {
1096 	struct rte_vhost_inflight_info_packed *inflight_info;
1097 	struct virtio_net *dev;
1098 	struct vhost_virtqueue *vq;
1099 	struct vring_packed_desc *desc;
1100 	uint16_t old_free_head, free_head;
1101 
1102 	dev = get_device(vid);
1103 	if (unlikely(!dev))
1104 		return -1;
1105 
1106 	if (unlikely(!(dev->protocol_features &
1107 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1108 		return 0;
1109 
1110 	if (unlikely(!vq_is_packed(dev)))
1111 		return -1;
1112 
1113 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1114 		return -1;
1115 
1116 	vq = dev->virtqueue[vring_idx];
1117 	if (unlikely(!vq))
1118 		return -1;
1119 
1120 	inflight_info = vq->inflight_packed;
1121 	if (unlikely(!inflight_info))
1122 		return -1;
1123 
1124 	if (unlikely(head >= vq->size))
1125 		return -1;
1126 
1127 	desc = vq->desc_packed;
1128 	old_free_head = inflight_info->old_free_head;
1129 	if (unlikely(old_free_head >= vq->size))
1130 		return -1;
1131 
1132 	free_head = old_free_head;
1133 
1134 	/* init header descriptor */
1135 	inflight_info->desc[old_free_head].num = 0;
1136 	inflight_info->desc[old_free_head].counter = vq->global_counter++;
1137 	inflight_info->desc[old_free_head].inflight = 1;
1138 
1139 	/* save desc entry in flight entry */
1140 	while (head != ((last + 1) % vq->size)) {
1141 		inflight_info->desc[old_free_head].num++;
1142 		inflight_info->desc[free_head].addr = desc[head].addr;
1143 		inflight_info->desc[free_head].len = desc[head].len;
1144 		inflight_info->desc[free_head].flags = desc[head].flags;
1145 		inflight_info->desc[free_head].id = desc[head].id;
1146 
1147 		inflight_info->desc[old_free_head].last = free_head;
1148 		free_head = inflight_info->desc[free_head].next;
1149 		inflight_info->free_head = free_head;
1150 		head = (head + 1) % vq->size;
1151 	}
1152 
1153 	inflight_info->old_free_head = free_head;
1154 	*inflight_entry = old_free_head;
1155 
1156 	return 0;
1157 }
1158 
1159 int
1160 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1161 				  uint16_t last_used_idx, uint16_t idx)
1162 {
1163 	struct virtio_net *dev;
1164 	struct vhost_virtqueue *vq;
1165 
1166 	dev = get_device(vid);
1167 	if (unlikely(!dev))
1168 		return -1;
1169 
1170 	if (unlikely(!(dev->protocol_features &
1171 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1172 		return 0;
1173 
1174 	if (unlikely(vq_is_packed(dev)))
1175 		return -1;
1176 
1177 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1178 		return -1;
1179 
1180 	vq = dev->virtqueue[vring_idx];
1181 	if (unlikely(!vq))
1182 		return -1;
1183 
1184 	if (unlikely(!vq->inflight_split))
1185 		return -1;
1186 
1187 	if (unlikely(idx >= vq->size))
1188 		return -1;
1189 
1190 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
1191 
1192 	vq->inflight_split->desc[idx].inflight = 0;
1193 
1194 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
1195 
1196 	vq->inflight_split->used_idx = last_used_idx;
1197 	return 0;
1198 }
1199 
1200 int
1201 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1202 				   uint16_t head)
1203 {
1204 	struct rte_vhost_inflight_info_packed *inflight_info;
1205 	struct virtio_net *dev;
1206 	struct vhost_virtqueue *vq;
1207 
1208 	dev = get_device(vid);
1209 	if (unlikely(!dev))
1210 		return -1;
1211 
1212 	if (unlikely(!(dev->protocol_features &
1213 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1214 		return 0;
1215 
1216 	if (unlikely(!vq_is_packed(dev)))
1217 		return -1;
1218 
1219 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1220 		return -1;
1221 
1222 	vq = dev->virtqueue[vring_idx];
1223 	if (unlikely(!vq))
1224 		return -1;
1225 
1226 	inflight_info = vq->inflight_packed;
1227 	if (unlikely(!inflight_info))
1228 		return -1;
1229 
1230 	if (unlikely(head >= vq->size))
1231 		return -1;
1232 
1233 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
1234 
1235 	inflight_info->desc[head].inflight = 0;
1236 
1237 	rte_atomic_thread_fence(rte_memory_order_seq_cst);
1238 
1239 	inflight_info->old_free_head = inflight_info->free_head;
1240 	inflight_info->old_used_idx = inflight_info->used_idx;
1241 	inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1242 
1243 	return 0;
1244 }
1245 
1246 int
1247 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1248 				     uint16_t idx)
1249 {
1250 	struct virtio_net *dev;
1251 	struct vhost_virtqueue *vq;
1252 
1253 	dev = get_device(vid);
1254 	if (unlikely(!dev))
1255 		return -1;
1256 
1257 	if (unlikely(!(dev->protocol_features &
1258 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1259 		return 0;
1260 
1261 	if (unlikely(vq_is_packed(dev)))
1262 		return -1;
1263 
1264 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1265 		return -1;
1266 
1267 	vq = dev->virtqueue[vring_idx];
1268 	if (unlikely(!vq))
1269 		return -1;
1270 
1271 	if (unlikely(!vq->inflight_split))
1272 		return -1;
1273 
1274 	if (unlikely(idx >= vq->size))
1275 		return -1;
1276 
1277 	vq->inflight_split->last_inflight_io = idx;
1278 	return 0;
1279 }
1280 
1281 int
1282 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1283 				      uint16_t head)
1284 {
1285 	struct rte_vhost_inflight_info_packed *inflight_info;
1286 	struct virtio_net *dev;
1287 	struct vhost_virtqueue *vq;
1288 	uint16_t last;
1289 
1290 	dev = get_device(vid);
1291 	if (unlikely(!dev))
1292 		return -1;
1293 
1294 	if (unlikely(!(dev->protocol_features &
1295 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1296 		return 0;
1297 
1298 	if (unlikely(!vq_is_packed(dev)))
1299 		return -1;
1300 
1301 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1302 		return -1;
1303 
1304 	vq = dev->virtqueue[vring_idx];
1305 	if (unlikely(!vq))
1306 		return -1;
1307 
1308 	inflight_info = vq->inflight_packed;
1309 	if (unlikely(!inflight_info))
1310 		return -1;
1311 
1312 	if (unlikely(head >= vq->size))
1313 		return -1;
1314 
1315 	last = inflight_info->desc[head].last;
1316 	if (unlikely(last >= vq->size))
1317 		return -1;
1318 
1319 	inflight_info->desc[last].next = inflight_info->free_head;
1320 	inflight_info->free_head = head;
1321 	inflight_info->used_idx += inflight_info->desc[head].num;
1322 	if (inflight_info->used_idx >= inflight_info->desc_num) {
1323 		inflight_info->used_idx -= inflight_info->desc_num;
1324 		inflight_info->used_wrap_counter =
1325 			!inflight_info->used_wrap_counter;
1326 	}
1327 
1328 	return 0;
1329 }
1330 
1331 int
1332 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1333 {
1334 	struct virtio_net *dev;
1335 	struct vhost_virtqueue *vq;
1336 	int ret = 0;
1337 
1338 	dev = get_device(vid);
1339 	if (!dev)
1340 		return -1;
1341 
1342 	if (vring_idx >= VHOST_MAX_VRING)
1343 		return -1;
1344 
1345 	vq = dev->virtqueue[vring_idx];
1346 	if (!vq)
1347 		return -1;
1348 
1349 	rte_rwlock_read_lock(&vq->access_lock);
1350 
1351 	if (unlikely(!vq->access_ok)) {
1352 		ret = -1;
1353 		goto out_unlock;
1354 	}
1355 
1356 	if (vq_is_packed(dev))
1357 		vhost_vring_call_packed(dev, vq);
1358 	else
1359 		vhost_vring_call_split(dev, vq);
1360 
1361 out_unlock:
1362 	rte_rwlock_read_unlock(&vq->access_lock);
1363 
1364 	return ret;
1365 }
1366 
1367 int
1368 rte_vhost_vring_call_nonblock(int vid, uint16_t vring_idx)
1369 {
1370 	struct virtio_net *dev;
1371 	struct vhost_virtqueue *vq;
1372 	int ret = 0;
1373 
1374 	dev = get_device(vid);
1375 	if (!dev)
1376 		return -1;
1377 
1378 	if (vring_idx >= VHOST_MAX_VRING)
1379 		return -1;
1380 
1381 	vq = dev->virtqueue[vring_idx];
1382 	if (!vq)
1383 		return -1;
1384 
1385 	if (rte_rwlock_read_trylock(&vq->access_lock))
1386 		return -EAGAIN;
1387 
1388 	if (unlikely(!vq->access_ok)) {
1389 		ret = -1;
1390 		goto out_unlock;
1391 	}
1392 
1393 	if (vq_is_packed(dev))
1394 		vhost_vring_call_packed(dev, vq);
1395 	else
1396 		vhost_vring_call_split(dev, vq);
1397 
1398 out_unlock:
1399 	rte_rwlock_read_unlock(&vq->access_lock);
1400 
1401 	return ret;
1402 }
1403 
1404 uint16_t
1405 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1406 {
1407 	struct virtio_net *dev;
1408 	struct vhost_virtqueue *vq;
1409 	uint16_t ret = 0;
1410 
1411 	dev = get_device(vid);
1412 	if (!dev)
1413 		return 0;
1414 
1415 	if (queue_id >= VHOST_MAX_VRING)
1416 		return 0;
1417 
1418 	vq = dev->virtqueue[queue_id];
1419 	if (!vq)
1420 		return 0;
1421 
1422 	rte_rwlock_write_lock(&vq->access_lock);
1423 
1424 	if (unlikely(!vq->access_ok))
1425 		goto out;
1426 
1427 	if (unlikely(!vq->enabled))
1428 		goto out;
1429 
1430 	ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1431 
1432 out:
1433 	rte_rwlock_write_unlock(&vq->access_lock);
1434 	return ret;
1435 }
1436 
1437 static inline int
1438 vhost_enable_notify_split(struct virtio_net *dev,
1439 		struct vhost_virtqueue *vq, int enable)
1440 {
1441 	if (vq->used == NULL)
1442 		return -1;
1443 
1444 	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1445 		if (enable)
1446 			vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1447 		else
1448 			vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1449 	} else {
1450 		if (enable)
1451 			vhost_avail_event(vq) = vq->last_avail_idx;
1452 	}
1453 	return 0;
1454 }
1455 
1456 static inline int
1457 vhost_enable_notify_packed(struct virtio_net *dev,
1458 		struct vhost_virtqueue *vq, int enable)
1459 {
1460 	uint16_t flags;
1461 
1462 	if (vq->device_event == NULL)
1463 		return -1;
1464 
1465 	if (!enable) {
1466 		vq->device_event->flags = VRING_EVENT_F_DISABLE;
1467 		return 0;
1468 	}
1469 
1470 	flags = VRING_EVENT_F_ENABLE;
1471 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1472 		flags = VRING_EVENT_F_DESC;
1473 		vq->device_event->off_wrap = vq->last_avail_idx |
1474 			vq->avail_wrap_counter << 15;
1475 	}
1476 
1477 	rte_atomic_thread_fence(rte_memory_order_release);
1478 
1479 	vq->device_event->flags = flags;
1480 	return 0;
1481 }
1482 
1483 int
1484 vhost_enable_guest_notification(struct virtio_net *dev,
1485 		struct vhost_virtqueue *vq, int enable)
1486 {
1487 	/*
1488 	 * If the virtqueue is not ready yet, it will be applied
1489 	 * when it will become ready.
1490 	 */
1491 	if (!vq->ready)
1492 		return 0;
1493 
1494 	if (vq_is_packed(dev))
1495 		return vhost_enable_notify_packed(dev, vq, enable);
1496 	else
1497 		return vhost_enable_notify_split(dev, vq, enable);
1498 }
1499 
1500 int
1501 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1502 {
1503 	struct virtio_net *dev = get_device(vid);
1504 	struct vhost_virtqueue *vq;
1505 	int ret;
1506 
1507 	if (!dev)
1508 		return -1;
1509 
1510 	if (queue_id >= VHOST_MAX_VRING)
1511 		return -1;
1512 
1513 	vq = dev->virtqueue[queue_id];
1514 	if (!vq)
1515 		return -1;
1516 
1517 	rte_rwlock_write_lock(&vq->access_lock);
1518 
1519 	if (unlikely(!vq->access_ok)) {
1520 		ret = -1;
1521 		goto out_unlock;
1522 	}
1523 
1524 	vq->notif_enable = enable;
1525 	ret = vhost_enable_guest_notification(dev, vq, enable);
1526 
1527 out_unlock:
1528 	rte_rwlock_write_unlock(&vq->access_lock);
1529 
1530 	return ret;
1531 }
1532 
1533 void
1534 rte_vhost_notify_guest(int vid, uint16_t queue_id)
1535 {
1536 	struct virtio_net *dev = get_device(vid);
1537 	struct vhost_virtqueue *vq;
1538 
1539 	if (!dev ||  queue_id >= VHOST_MAX_VRING)
1540 		return;
1541 
1542 	vq = dev->virtqueue[queue_id];
1543 	if (!vq)
1544 		return;
1545 
1546 	rte_rwlock_read_lock(&vq->access_lock);
1547 
1548 	if (unlikely(!vq->access_ok))
1549 		goto out_unlock;
1550 
1551 	rte_atomic_store_explicit(&vq->irq_pending, false, rte_memory_order_release);
1552 
1553 	if (dev->backend_ops->inject_irq(dev, vq)) {
1554 		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
1555 			rte_atomic_fetch_add_explicit(&vq->stats.guest_notifications_error,
1556 					1, rte_memory_order_relaxed);
1557 	} else {
1558 		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
1559 			rte_atomic_fetch_add_explicit(&vq->stats.guest_notifications,
1560 					1, rte_memory_order_relaxed);
1561 		if (dev->notify_ops->guest_notified)
1562 			dev->notify_ops->guest_notified(dev->vid);
1563 	}
1564 
1565 out_unlock:
1566 	rte_rwlock_read_unlock(&vq->access_lock);
1567 }
1568 
1569 void
1570 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1571 {
1572 	struct virtio_net *dev = get_device(vid);
1573 
1574 	if (dev == NULL)
1575 		return;
1576 
1577 	vhost_log_write(dev, addr, len);
1578 }
1579 
1580 void
1581 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1582 			 uint64_t offset, uint64_t len)
1583 {
1584 	struct virtio_net *dev;
1585 	struct vhost_virtqueue *vq;
1586 
1587 	dev = get_device(vid);
1588 	if (dev == NULL)
1589 		return;
1590 
1591 	if (vring_idx >= VHOST_MAX_VRING)
1592 		return;
1593 	vq = dev->virtqueue[vring_idx];
1594 	if (!vq)
1595 		return;
1596 
1597 	vhost_log_used_vring(dev, vq, offset, len);
1598 }
1599 
1600 uint32_t
1601 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1602 {
1603 	struct virtio_net *dev;
1604 	struct vhost_virtqueue *vq;
1605 	uint32_t ret = 0;
1606 
1607 	dev = get_device(vid);
1608 	if (dev == NULL)
1609 		return 0;
1610 
1611 	if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1612 		VHOST_DATA_LOG(dev->ifname, ERR,
1613 			"%s: invalid virtqueue idx %d.",
1614 			__func__, qid);
1615 		return 0;
1616 	}
1617 
1618 	vq = dev->virtqueue[qid];
1619 	if (vq == NULL)
1620 		return 0;
1621 
1622 	rte_rwlock_write_lock(&vq->access_lock);
1623 
1624 	if (unlikely(!vq->access_ok))
1625 		goto out;
1626 
1627 	if (unlikely(!vq->enabled))
1628 		goto out;
1629 
1630 	ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1631 
1632 out:
1633 	rte_rwlock_write_unlock(&vq->access_lock);
1634 	return ret;
1635 }
1636 
1637 struct rte_vdpa_device *
1638 rte_vhost_get_vdpa_device(int vid)
1639 {
1640 	struct virtio_net *dev = get_device(vid);
1641 
1642 	if (dev == NULL)
1643 		return NULL;
1644 
1645 	return dev->vdpa_dev;
1646 }
1647 
1648 int
1649 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1650 		uint64_t *log_size)
1651 {
1652 	struct virtio_net *dev = get_device(vid);
1653 
1654 	if (dev == NULL || log_base == NULL || log_size == NULL)
1655 		return -1;
1656 
1657 	*log_base = dev->log_base;
1658 	*log_size = dev->log_size;
1659 
1660 	return 0;
1661 }
1662 
1663 int
1664 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1665 		uint16_t *last_avail_idx, uint16_t *last_used_idx)
1666 {
1667 	struct vhost_virtqueue *vq;
1668 	struct virtio_net *dev = get_device(vid);
1669 
1670 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1671 		return -1;
1672 
1673 	if (queue_id >= VHOST_MAX_VRING)
1674 		return -1;
1675 
1676 	vq = dev->virtqueue[queue_id];
1677 	if (!vq)
1678 		return -1;
1679 
1680 	if (vq_is_packed(dev)) {
1681 		*last_avail_idx = (vq->avail_wrap_counter << 15) |
1682 				  vq->last_avail_idx;
1683 		*last_used_idx = (vq->used_wrap_counter << 15) |
1684 				 vq->last_used_idx;
1685 	} else {
1686 		*last_avail_idx = vq->last_avail_idx;
1687 		*last_used_idx = vq->last_used_idx;
1688 	}
1689 
1690 	return 0;
1691 }
1692 
1693 int
1694 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1695 		uint16_t last_avail_idx, uint16_t last_used_idx)
1696 {
1697 	struct vhost_virtqueue *vq;
1698 	struct virtio_net *dev = get_device(vid);
1699 
1700 	if (!dev)
1701 		return -1;
1702 
1703 	if (queue_id >= VHOST_MAX_VRING)
1704 		return -1;
1705 
1706 	vq = dev->virtqueue[queue_id];
1707 	if (!vq)
1708 		return -1;
1709 
1710 	if (vq_is_packed(dev)) {
1711 		vq->last_avail_idx = last_avail_idx & 0x7fff;
1712 		vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1713 		vq->last_used_idx = last_used_idx & 0x7fff;
1714 		vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1715 		vhost_virtqueue_reconnect_log_packed(vq);
1716 	} else {
1717 		vq->last_avail_idx = last_avail_idx;
1718 		vq->last_used_idx = last_used_idx;
1719 		vhost_virtqueue_reconnect_log_split(vq);
1720 	}
1721 
1722 	return 0;
1723 }
1724 
1725 int
1726 rte_vhost_get_vring_base_from_inflight(int vid,
1727 				       uint16_t queue_id,
1728 				       uint16_t *last_avail_idx,
1729 				       uint16_t *last_used_idx)
1730 {
1731 	struct rte_vhost_inflight_info_packed *inflight_info;
1732 	struct vhost_virtqueue *vq;
1733 	struct virtio_net *dev = get_device(vid);
1734 
1735 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1736 		return -1;
1737 
1738 	if (queue_id >= VHOST_MAX_VRING)
1739 		return -1;
1740 
1741 	vq = dev->virtqueue[queue_id];
1742 	if (!vq)
1743 		return -1;
1744 
1745 	if (!vq_is_packed(dev))
1746 		return -1;
1747 
1748 	inflight_info = vq->inflight_packed;
1749 	if (!inflight_info)
1750 		return -1;
1751 
1752 	*last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1753 			  inflight_info->old_used_idx;
1754 	*last_used_idx = *last_avail_idx;
1755 
1756 	return 0;
1757 }
1758 
1759 int
1760 rte_vhost_extern_callback_register(int vid,
1761 		struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1762 {
1763 	struct virtio_net *dev = get_device(vid);
1764 
1765 	if (dev == NULL || ops == NULL)
1766 		return -1;
1767 
1768 	dev->extern_ops = *ops;
1769 	dev->extern_data = ctx;
1770 	return 0;
1771 }
1772 
1773 static __rte_always_inline int
1774 async_channel_register(struct virtio_net *dev, struct vhost_virtqueue *vq)
1775 	__rte_exclusive_locks_required(&vq->access_lock)
1776 {
1777 	struct vhost_async *async;
1778 	int node = vq->numa_node;
1779 
1780 	if (unlikely(vq->async)) {
1781 		VHOST_CONFIG_LOG(dev->ifname, ERR,
1782 			"async register failed: already registered (qid: %d)",
1783 			vq->index);
1784 		return -1;
1785 	}
1786 
1787 	async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1788 	if (!async) {
1789 		VHOST_CONFIG_LOG(dev->ifname, ERR,
1790 			"failed to allocate async metadata (qid: %d)",
1791 			vq->index);
1792 		return -1;
1793 	}
1794 
1795 	async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1796 			RTE_CACHE_LINE_SIZE, node);
1797 	if (!async->pkts_info) {
1798 		VHOST_CONFIG_LOG(dev->ifname, ERR,
1799 			"failed to allocate async_pkts_info (qid: %d)",
1800 			vq->index);
1801 		goto out_free_async;
1802 	}
1803 
1804 	async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1805 			RTE_CACHE_LINE_SIZE, node);
1806 	if (!async->pkts_cmpl_flag) {
1807 		VHOST_CONFIG_LOG(dev->ifname, ERR,
1808 			"failed to allocate async pkts_cmpl_flag (qid: %d)",
1809 			vq->index);
1810 		goto out_free_async;
1811 	}
1812 
1813 	if (vq_is_packed(dev)) {
1814 		async->buffers_packed = rte_malloc_socket(NULL,
1815 				vq->size * sizeof(struct vring_used_elem_packed),
1816 				RTE_CACHE_LINE_SIZE, node);
1817 		if (!async->buffers_packed) {
1818 			VHOST_CONFIG_LOG(dev->ifname, ERR,
1819 				"failed to allocate async buffers (qid: %d)",
1820 				vq->index);
1821 			goto out_free_inflight;
1822 		}
1823 	} else {
1824 		async->descs_split = rte_malloc_socket(NULL,
1825 				vq->size * sizeof(struct vring_used_elem),
1826 				RTE_CACHE_LINE_SIZE, node);
1827 		if (!async->descs_split) {
1828 			VHOST_CONFIG_LOG(dev->ifname, ERR,
1829 				"failed to allocate async descs (qid: %d)",
1830 				vq->index);
1831 			goto out_free_inflight;
1832 		}
1833 	}
1834 
1835 	vq->async = async;
1836 
1837 	return 0;
1838 out_free_inflight:
1839 	rte_free(async->pkts_info);
1840 out_free_async:
1841 	rte_free(async);
1842 
1843 	return -1;
1844 }
1845 
1846 int
1847 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1848 {
1849 	struct vhost_virtqueue *vq;
1850 	struct virtio_net *dev = get_device(vid);
1851 	int ret;
1852 
1853 	if (dev == NULL)
1854 		return -1;
1855 
1856 	if (queue_id >= VHOST_MAX_VRING)
1857 		return -1;
1858 
1859 	vq = dev->virtqueue[queue_id];
1860 
1861 	if (unlikely(vq == NULL || !dev->async_copy || dev->vdpa_dev != NULL))
1862 		return -1;
1863 
1864 	rte_rwlock_write_lock(&vq->access_lock);
1865 
1866 	if (unlikely(!vq->access_ok)) {
1867 		ret = -1;
1868 		goto out_unlock;
1869 	}
1870 
1871 	ret = async_channel_register(dev, vq);
1872 
1873 out_unlock:
1874 	rte_rwlock_write_unlock(&vq->access_lock);
1875 
1876 	return ret;
1877 }
1878 
1879 int
1880 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1881 {
1882 	struct vhost_virtqueue *vq;
1883 	struct virtio_net *dev = get_device(vid);
1884 
1885 	if (dev == NULL)
1886 		return -1;
1887 
1888 	if (queue_id >= VHOST_MAX_VRING)
1889 		return -1;
1890 
1891 	vq = dev->virtqueue[queue_id];
1892 
1893 	if (unlikely(vq == NULL || !dev->async_copy || dev->vdpa_dev != NULL))
1894 		return -1;
1895 
1896 	vq_assert_lock(dev, vq);
1897 
1898 	return async_channel_register(dev, vq);
1899 }
1900 
1901 int
1902 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1903 {
1904 	struct vhost_virtqueue *vq;
1905 	struct virtio_net *dev = get_device(vid);
1906 	int ret = -1;
1907 
1908 	if (dev == NULL)
1909 		return ret;
1910 
1911 	if (queue_id >= VHOST_MAX_VRING)
1912 		return ret;
1913 
1914 	vq = dev->virtqueue[queue_id];
1915 
1916 	if (vq == NULL)
1917 		return ret;
1918 
1919 	if (rte_rwlock_write_trylock(&vq->access_lock)) {
1920 		VHOST_CONFIG_LOG(dev->ifname, ERR,
1921 			"failed to unregister async channel, virtqueue busy.");
1922 		return ret;
1923 	}
1924 
1925 	if (unlikely(!vq->access_ok)) {
1926 		ret = -1;
1927 		goto out_unlock;
1928 	}
1929 
1930 	if (!vq->async) {
1931 		ret = 0;
1932 	} else if (vq->async->pkts_inflight_n) {
1933 		VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to unregister async channel.");
1934 		VHOST_CONFIG_LOG(dev->ifname, ERR,
1935 			"inflight packets must be completed before unregistration.");
1936 	} else {
1937 		vhost_free_async_mem(vq);
1938 		ret = 0;
1939 	}
1940 
1941 out_unlock:
1942 	rte_rwlock_write_unlock(&vq->access_lock);
1943 
1944 	return ret;
1945 }
1946 
1947 int
1948 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1949 {
1950 	struct vhost_virtqueue *vq;
1951 	struct virtio_net *dev = get_device(vid);
1952 
1953 	if (dev == NULL)
1954 		return -1;
1955 
1956 	if (queue_id >= VHOST_MAX_VRING)
1957 		return -1;
1958 
1959 	vq = dev->virtqueue[queue_id];
1960 
1961 	if (vq == NULL)
1962 		return -1;
1963 
1964 	vq_assert_lock(dev, vq);
1965 
1966 	if (!vq->async)
1967 		return 0;
1968 
1969 	if (vq->async->pkts_inflight_n) {
1970 		VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to unregister async channel.");
1971 		VHOST_CONFIG_LOG(dev->ifname, ERR,
1972 			"inflight packets must be completed before unregistration.");
1973 		return -1;
1974 	}
1975 
1976 	vhost_free_async_mem(vq);
1977 
1978 	return 0;
1979 }
1980 
1981 int
1982 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1983 {
1984 	struct rte_dma_info info;
1985 	void *pkts_cmpl_flag_addr;
1986 	uint16_t max_desc;
1987 
1988 	pthread_mutex_lock(&vhost_dma_lock);
1989 
1990 	if (!rte_dma_is_valid(dma_id)) {
1991 		VHOST_CONFIG_LOG("dma", ERR, "DMA %d is not found.", dma_id);
1992 		goto error;
1993 	}
1994 
1995 	if (rte_dma_info_get(dma_id, &info) != 0) {
1996 		VHOST_CONFIG_LOG("dma", ERR, "Fail to get DMA %d information.", dma_id);
1997 		goto error;
1998 	}
1999 
2000 	if (vchan_id >= info.max_vchans) {
2001 		VHOST_CONFIG_LOG("dma", ERR, "Invalid DMA %d vChannel %u.", dma_id, vchan_id);
2002 		goto error;
2003 	}
2004 
2005 	if (!dma_copy_track[dma_id].vchans) {
2006 		struct async_dma_vchan_info *vchans;
2007 
2008 		vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
2009 				RTE_CACHE_LINE_SIZE);
2010 		if (vchans == NULL) {
2011 			VHOST_CONFIG_LOG("dma", ERR,
2012 				"Failed to allocate vchans for DMA %d vChannel %u.",
2013 				dma_id, vchan_id);
2014 			goto error;
2015 		}
2016 
2017 		dma_copy_track[dma_id].vchans = vchans;
2018 	}
2019 
2020 	if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
2021 		VHOST_CONFIG_LOG("dma", INFO, "DMA %d vChannel %u already registered.",
2022 			dma_id, vchan_id);
2023 		pthread_mutex_unlock(&vhost_dma_lock);
2024 		return 0;
2025 	}
2026 
2027 	max_desc = info.max_desc;
2028 	if (!rte_is_power_of_2(max_desc))
2029 		max_desc = rte_align32pow2(max_desc);
2030 
2031 	pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
2032 	if (!pkts_cmpl_flag_addr) {
2033 		VHOST_CONFIG_LOG("dma", ERR,
2034 			"Failed to allocate pkts_cmpl_flag_addr for DMA %d vChannel %u.",
2035 			dma_id, vchan_id);
2036 
2037 		if (dma_copy_track[dma_id].nr_vchans == 0) {
2038 			rte_free(dma_copy_track[dma_id].vchans);
2039 			dma_copy_track[dma_id].vchans = NULL;
2040 		}
2041 		goto error;
2042 	}
2043 
2044 	dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
2045 	dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
2046 	dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
2047 	dma_copy_track[dma_id].nr_vchans++;
2048 
2049 	pthread_mutex_unlock(&vhost_dma_lock);
2050 	return 0;
2051 
2052 error:
2053 	pthread_mutex_unlock(&vhost_dma_lock);
2054 	return -1;
2055 }
2056 
2057 int
2058 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
2059 {
2060 	struct vhost_virtqueue *vq;
2061 	struct virtio_net *dev = get_device(vid);
2062 	int ret = -1;
2063 
2064 	if (dev == NULL)
2065 		return ret;
2066 
2067 	if (queue_id >= VHOST_MAX_VRING)
2068 		return ret;
2069 
2070 	vq = dev->virtqueue[queue_id];
2071 
2072 	if (vq == NULL)
2073 		return ret;
2074 
2075 	if (rte_rwlock_write_trylock(&vq->access_lock)) {
2076 		VHOST_CONFIG_LOG(dev->ifname, DEBUG,
2077 			"failed to check in-flight packets. virtqueue busy.");
2078 		return ret;
2079 	}
2080 
2081 	if (unlikely(!vq->access_ok)) {
2082 		ret = -1;
2083 		goto out_unlock;
2084 	}
2085 
2086 	if (vq->async)
2087 		ret = vq->async->pkts_inflight_n;
2088 
2089 out_unlock:
2090 	rte_rwlock_write_unlock(&vq->access_lock);
2091 
2092 	return ret;
2093 }
2094 
2095 int
2096 rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
2097 {
2098 	struct vhost_virtqueue *vq;
2099 	struct virtio_net *dev = get_device(vid);
2100 	int ret = -1;
2101 
2102 	if (dev == NULL)
2103 		return ret;
2104 
2105 	if (queue_id >= VHOST_MAX_VRING)
2106 		return ret;
2107 
2108 	vq = dev->virtqueue[queue_id];
2109 
2110 	if (vq == NULL)
2111 		return ret;
2112 
2113 	vq_assert_lock(dev, vq);
2114 
2115 	if (!vq->async)
2116 		return ret;
2117 
2118 	ret = vq->async->pkts_inflight_n;
2119 
2120 	return ret;
2121 }
2122 
2123 int
2124 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
2125 		struct rte_vhost_power_monitor_cond *pmc)
2126 {
2127 	struct virtio_net *dev = get_device(vid);
2128 	struct vhost_virtqueue *vq;
2129 	int ret = 0;
2130 
2131 	if (dev == NULL)
2132 		return -1;
2133 	if (queue_id >= VHOST_MAX_VRING)
2134 		return -1;
2135 
2136 	vq = dev->virtqueue[queue_id];
2137 	if (vq == NULL)
2138 		return -1;
2139 
2140 	rte_rwlock_read_lock(&vq->access_lock);
2141 
2142 	if (unlikely(!vq->access_ok)) {
2143 		ret = -1;
2144 		goto out_unlock;
2145 	}
2146 
2147 	if (vq_is_packed(dev)) {
2148 		struct vring_packed_desc *desc;
2149 		desc = vq->desc_packed;
2150 		pmc->addr = &desc[vq->last_avail_idx].flags;
2151 		if (vq->avail_wrap_counter)
2152 			pmc->val = VRING_DESC_F_AVAIL;
2153 		else
2154 			pmc->val = VRING_DESC_F_USED;
2155 		pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
2156 		pmc->size = sizeof(desc[vq->last_avail_idx].flags);
2157 		pmc->match = 1;
2158 	} else {
2159 		pmc->addr = &vq->avail->idx;
2160 		pmc->val = vq->last_avail_idx & (vq->size - 1);
2161 		pmc->mask = vq->size - 1;
2162 		pmc->size = sizeof(vq->avail->idx);
2163 		pmc->match = 0;
2164 	}
2165 
2166 out_unlock:
2167 	rte_rwlock_read_unlock(&vq->access_lock);
2168 
2169 	return ret;
2170 }
2171 
2172 
2173 int
2174 rte_vhost_vring_stats_get_names(int vid, uint16_t queue_id,
2175 		struct rte_vhost_stat_name *name, unsigned int size)
2176 {
2177 	struct virtio_net *dev = get_device(vid);
2178 	unsigned int i;
2179 
2180 	if (dev == NULL)
2181 		return -1;
2182 
2183 	if (queue_id >= dev->nr_vring)
2184 		return -1;
2185 
2186 	if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2187 		return -1;
2188 
2189 	if (name == NULL || size < VHOST_NB_VQ_STATS)
2190 		return VHOST_NB_VQ_STATS;
2191 
2192 	for (i = 0; i < VHOST_NB_VQ_STATS; i++)
2193 		snprintf(name[i].name, sizeof(name[i].name), "%s_q%u_%s",
2194 				(queue_id & 1) ? "rx" : "tx",
2195 				queue_id / 2, vhost_vq_stat_strings[i].name);
2196 
2197 	return VHOST_NB_VQ_STATS;
2198 }
2199 
2200 int
2201 rte_vhost_vring_stats_get(int vid, uint16_t queue_id,
2202 		struct rte_vhost_stat *stats, unsigned int n)
2203 {
2204 	struct virtio_net *dev = get_device(vid);
2205 	struct vhost_virtqueue *vq;
2206 	unsigned int i;
2207 	int ret = VHOST_NB_VQ_STATS;
2208 
2209 	if (dev == NULL)
2210 		return -1;
2211 
2212 	if (queue_id >= dev->nr_vring)
2213 		return -1;
2214 
2215 	if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2216 		return -1;
2217 
2218 	if (stats == NULL || n < VHOST_NB_VQ_STATS)
2219 		return VHOST_NB_VQ_STATS;
2220 
2221 	vq = dev->virtqueue[queue_id];
2222 
2223 	rte_rwlock_write_lock(&vq->access_lock);
2224 
2225 	if (unlikely(!vq->access_ok)) {
2226 		ret = -1;
2227 		goto out_unlock;
2228 	}
2229 
2230 	for (i = 0; i < VHOST_NB_VQ_STATS; i++) {
2231 		/*
2232 		 * No need to the read atomic counters as such, due to the
2233 		 * above write access_lock preventing them to be updated.
2234 		 */
2235 		stats[i].value =
2236 			*(uint64_t *)(((char *)vq) + vhost_vq_stat_strings[i].offset);
2237 		stats[i].id = i;
2238 	}
2239 
2240 out_unlock:
2241 	rte_rwlock_write_unlock(&vq->access_lock);
2242 
2243 	return ret;
2244 }
2245 
2246 int rte_vhost_vring_stats_reset(int vid, uint16_t queue_id)
2247 {
2248 	struct virtio_net *dev = get_device(vid);
2249 	struct vhost_virtqueue *vq;
2250 	int ret = 0;
2251 
2252 	if (dev == NULL)
2253 		return -1;
2254 
2255 	if (queue_id >= dev->nr_vring)
2256 		return -1;
2257 
2258 	if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2259 		return -1;
2260 
2261 	vq = dev->virtqueue[queue_id];
2262 
2263 	rte_rwlock_write_lock(&vq->access_lock);
2264 
2265 	if (unlikely(!vq->access_ok)) {
2266 		ret = -1;
2267 		goto out_unlock;
2268 	}
2269 	/*
2270 	 * No need to the reset atomic counters as such, due to the
2271 	 * above write access_lock preventing them to be updated.
2272 	 */
2273 	memset(&vq->stats, 0, sizeof(vq->stats));
2274 
2275 out_unlock:
2276 	rte_rwlock_write_unlock(&vq->access_lock);
2277 
2278 	return ret;
2279 }
2280 
2281 int
2282 rte_vhost_async_dma_unconfigure(int16_t dma_id, uint16_t vchan_id)
2283 {
2284 	struct rte_dma_info info;
2285 	struct rte_dma_stats stats = { 0 };
2286 
2287 	pthread_mutex_lock(&vhost_dma_lock);
2288 
2289 	if (!rte_dma_is_valid(dma_id)) {
2290 		VHOST_CONFIG_LOG("dma", ERR, "DMA %d is not found.", dma_id);
2291 		goto error;
2292 	}
2293 
2294 	if (rte_dma_info_get(dma_id, &info) != 0) {
2295 		VHOST_CONFIG_LOG("dma", ERR, "Fail to get DMA %d information.", dma_id);
2296 		goto error;
2297 	}
2298 
2299 	if (vchan_id >= info.max_vchans || !dma_copy_track[dma_id].vchans ||
2300 		!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
2301 		VHOST_CONFIG_LOG("dma", ERR, "Invalid channel %d:%u.", dma_id, vchan_id);
2302 		goto error;
2303 	}
2304 
2305 	if (rte_dma_stats_get(dma_id, vchan_id, &stats) != 0) {
2306 		VHOST_CONFIG_LOG("dma", ERR,
2307 				 "Failed to get stats for DMA %d vChannel %u.", dma_id, vchan_id);
2308 		goto error;
2309 	}
2310 
2311 	if (stats.submitted - stats.completed != 0) {
2312 		VHOST_CONFIG_LOG("dma", ERR,
2313 				 "Do not unconfigure when there are inflight packets.");
2314 		goto error;
2315 	}
2316 
2317 	rte_free(dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr);
2318 	dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = NULL;
2319 	dma_copy_track[dma_id].nr_vchans--;
2320 
2321 	if (dma_copy_track[dma_id].nr_vchans == 0) {
2322 		rte_free(dma_copy_track[dma_id].vchans);
2323 		dma_copy_track[dma_id].vchans = NULL;
2324 	}
2325 
2326 	pthread_mutex_unlock(&vhost_dma_lock);
2327 	return 0;
2328 
2329 error:
2330 	pthread_mutex_unlock(&vhost_dma_lock);
2331 	return -1;
2332 }
2333 
2334 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
2335 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);
2336