xref: /dpdk/lib/vhost/vhost.c (revision c56185fc183fc0532d2f03aaf04bbf0989ea91a5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <pthread.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14 
15 #include <rte_errno.h>
16 #include <rte_log.h>
17 #include <rte_memory.h>
18 #include <rte_malloc.h>
19 #include <rte_vhost.h>
20 
21 #include "iotlb.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24 
25 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
26 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
27 pthread_mutex_t vhost_dma_lock = PTHREAD_MUTEX_INITIALIZER;
28 
29 struct vhost_vq_stats_name_off {
30 	char name[RTE_VHOST_STATS_NAME_SIZE];
31 	unsigned int offset;
32 };
33 
34 static const struct vhost_vq_stats_name_off vhost_vq_stat_strings[] = {
35 	{"good_packets",           offsetof(struct vhost_virtqueue, stats.packets)},
36 	{"good_bytes",             offsetof(struct vhost_virtqueue, stats.bytes)},
37 	{"multicast_packets",      offsetof(struct vhost_virtqueue, stats.multicast)},
38 	{"broadcast_packets",      offsetof(struct vhost_virtqueue, stats.broadcast)},
39 	{"undersize_packets",      offsetof(struct vhost_virtqueue, stats.size_bins[0])},
40 	{"size_64_packets",        offsetof(struct vhost_virtqueue, stats.size_bins[1])},
41 	{"size_65_127_packets",    offsetof(struct vhost_virtqueue, stats.size_bins[2])},
42 	{"size_128_255_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[3])},
43 	{"size_256_511_packets",   offsetof(struct vhost_virtqueue, stats.size_bins[4])},
44 	{"size_512_1023_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[5])},
45 	{"size_1024_1518_packets", offsetof(struct vhost_virtqueue, stats.size_bins[6])},
46 	{"size_1519_max_packets",  offsetof(struct vhost_virtqueue, stats.size_bins[7])},
47 	{"guest_notifications",    offsetof(struct vhost_virtqueue, stats.guest_notifications)},
48 	{"guest_notifications_offloaded", offsetof(struct vhost_virtqueue,
49 		stats.guest_notifications_offloaded)},
50 	{"guest_notifications_error", offsetof(struct vhost_virtqueue,
51 		stats.guest_notifications_error)},
52 	{"iotlb_hits",             offsetof(struct vhost_virtqueue, stats.iotlb_hits)},
53 	{"iotlb_misses",           offsetof(struct vhost_virtqueue, stats.iotlb_misses)},
54 	{"inflight_submitted",     offsetof(struct vhost_virtqueue, stats.inflight_submitted)},
55 	{"inflight_completed",     offsetof(struct vhost_virtqueue, stats.inflight_completed)},
56 };
57 
58 #define VHOST_NB_VQ_STATS RTE_DIM(vhost_vq_stat_strings)
59 
60 static int
61 vhost_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
62 {
63 	return dev->backend_ops->iotlb_miss(dev, iova, perm);
64 }
65 
66 uint64_t
67 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
68 		    uint64_t iova, uint64_t *size, uint8_t perm)
69 {
70 	uint64_t vva, tmp_size;
71 
72 	if (unlikely(!*size))
73 		return 0;
74 
75 	tmp_size = *size;
76 
77 	vva = vhost_user_iotlb_cache_find(dev, iova, &tmp_size, perm);
78 	if (tmp_size == *size) {
79 		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
80 			vq->stats.iotlb_hits++;
81 		return vva;
82 	}
83 
84 	if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
85 		vq->stats.iotlb_misses++;
86 
87 	iova += tmp_size;
88 
89 	if (!vhost_user_iotlb_pending_miss(dev, iova, perm)) {
90 		/*
91 		 * iotlb_lock is read-locked for a full burst,
92 		 * but it only protects the iotlb cache.
93 		 * In case of IOTLB miss, we might block on the socket,
94 		 * which could cause a deadlock with QEMU if an IOTLB update
95 		 * is being handled. We can safely unlock here to avoid it.
96 		 */
97 		vhost_user_iotlb_rd_unlock(vq);
98 
99 		vhost_user_iotlb_pending_insert(dev, iova, perm);
100 		if (vhost_iotlb_miss(dev, iova, perm)) {
101 			VHOST_LOG_DATA(dev->ifname, ERR,
102 				"IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
103 				iova);
104 			vhost_user_iotlb_pending_remove(dev, iova, 1, perm);
105 		}
106 
107 		vhost_user_iotlb_rd_lock(vq);
108 	}
109 
110 	tmp_size = *size;
111 	/* Retry in case of VDUSE, as it is synchronous */
112 	vva = vhost_user_iotlb_cache_find(dev, iova, &tmp_size, perm);
113 	if (tmp_size == *size)
114 		return vva;
115 
116 	return 0;
117 }
118 
119 #define VHOST_LOG_PAGE	4096
120 
121 /*
122  * Atomically set a bit in memory.
123  */
124 static __rte_always_inline void
125 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
126 {
127 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
128 	/*
129 	 * __sync_ built-ins are deprecated, but __atomic_ ones
130 	 * are sub-optimized in older GCC versions.
131 	 */
132 	__sync_fetch_and_or_1(addr, (1U << nr));
133 #else
134 	__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
135 #endif
136 }
137 
138 static __rte_always_inline void
139 vhost_log_page(uint8_t *log_base, uint64_t page)
140 {
141 	vhost_set_bit(page % 8, &log_base[page / 8]);
142 }
143 
144 void
145 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
146 {
147 	uint64_t page;
148 
149 	if (unlikely(!dev->log_base || !len))
150 		return;
151 
152 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
153 		return;
154 
155 	/* To make sure guest memory updates are committed before logging */
156 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
157 
158 	page = addr / VHOST_LOG_PAGE;
159 	while (page * VHOST_LOG_PAGE < addr + len) {
160 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
161 		page += 1;
162 	}
163 }
164 
165 void
166 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
167 			     uint64_t iova, uint64_t len)
168 {
169 	uint64_t hva, gpa, map_len;
170 	map_len = len;
171 
172 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
173 	if (map_len != len) {
174 		VHOST_LOG_DATA(dev->ifname, ERR,
175 			"failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
176 			iova);
177 		return;
178 	}
179 
180 	gpa = hva_to_gpa(dev, hva, len);
181 	if (gpa)
182 		__vhost_log_write(dev, gpa, len);
183 }
184 
185 void
186 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
187 {
188 	unsigned long *log_base;
189 	int i;
190 
191 	if (unlikely(!dev->log_base))
192 		return;
193 
194 	/* No cache, nothing to sync */
195 	if (unlikely(!vq->log_cache))
196 		return;
197 
198 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
199 
200 	log_base = (unsigned long *)(uintptr_t)dev->log_base;
201 
202 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
203 		struct log_cache_entry *elem = vq->log_cache + i;
204 
205 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
206 		/*
207 		 * '__sync' builtins are deprecated, but '__atomic' ones
208 		 * are sub-optimized in older GCC versions.
209 		 */
210 		__sync_fetch_and_or(log_base + elem->offset, elem->val);
211 #else
212 		__atomic_fetch_or(log_base + elem->offset, elem->val,
213 				__ATOMIC_RELAXED);
214 #endif
215 	}
216 
217 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
218 
219 	vq->log_cache_nb_elem = 0;
220 }
221 
222 static __rte_always_inline void
223 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
224 			uint64_t page)
225 {
226 	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
227 	uint32_t offset = page / (sizeof(unsigned long) << 3);
228 	int i;
229 
230 	if (unlikely(!vq->log_cache)) {
231 		/* No logging cache allocated, write dirty log map directly */
232 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
233 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
234 
235 		return;
236 	}
237 
238 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
239 		struct log_cache_entry *elem = vq->log_cache + i;
240 
241 		if (elem->offset == offset) {
242 			elem->val |= (1UL << bit_nr);
243 			return;
244 		}
245 	}
246 
247 	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
248 		/*
249 		 * No more room for a new log cache entry,
250 		 * so write the dirty log map directly.
251 		 */
252 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
253 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
254 
255 		return;
256 	}
257 
258 	vq->log_cache[i].offset = offset;
259 	vq->log_cache[i].val = (1UL << bit_nr);
260 	vq->log_cache_nb_elem++;
261 }
262 
263 void
264 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
265 			uint64_t addr, uint64_t len)
266 {
267 	uint64_t page;
268 
269 	if (unlikely(!dev->log_base || !len))
270 		return;
271 
272 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
273 		return;
274 
275 	page = addr / VHOST_LOG_PAGE;
276 	while (page * VHOST_LOG_PAGE < addr + len) {
277 		vhost_log_cache_page(dev, vq, page);
278 		page += 1;
279 	}
280 }
281 
282 void
283 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
284 			     uint64_t iova, uint64_t len)
285 {
286 	uint64_t hva, gpa, map_len;
287 	map_len = len;
288 
289 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
290 	if (map_len != len) {
291 		VHOST_LOG_DATA(dev->ifname, ERR,
292 			"failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
293 			iova);
294 		return;
295 	}
296 
297 	gpa = hva_to_gpa(dev, hva, len);
298 	if (gpa)
299 		__vhost_log_cache_write(dev, vq, gpa, len);
300 }
301 
302 void *
303 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
304 		uint64_t desc_addr, uint64_t desc_len)
305 {
306 	void *idesc;
307 	uint64_t src, dst;
308 	uint64_t len, remain = desc_len;
309 
310 	idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
311 	if (unlikely(!idesc))
312 		return NULL;
313 
314 	dst = (uint64_t)(uintptr_t)idesc;
315 
316 	while (remain) {
317 		len = remain;
318 		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
319 				VHOST_ACCESS_RO);
320 		if (unlikely(!src || !len)) {
321 			rte_free(idesc);
322 			return NULL;
323 		}
324 
325 		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
326 
327 		remain -= len;
328 		dst += len;
329 		desc_addr += len;
330 	}
331 
332 	return idesc;
333 }
334 
335 void
336 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
337 {
338 	if ((vq->callfd >= 0) && (destroy != 0))
339 		close(vq->callfd);
340 	if (vq->kickfd >= 0)
341 		close(vq->kickfd);
342 }
343 
344 void
345 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
346 {
347 	if (!(dev->protocol_features &
348 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
349 		return;
350 
351 	if (vq_is_packed(dev)) {
352 		if (vq->inflight_packed)
353 			vq->inflight_packed = NULL;
354 	} else {
355 		if (vq->inflight_split)
356 			vq->inflight_split = NULL;
357 	}
358 
359 	if (vq->resubmit_inflight) {
360 		if (vq->resubmit_inflight->resubmit_list) {
361 			rte_free(vq->resubmit_inflight->resubmit_list);
362 			vq->resubmit_inflight->resubmit_list = NULL;
363 		}
364 		rte_free(vq->resubmit_inflight);
365 		vq->resubmit_inflight = NULL;
366 	}
367 }
368 
369 /*
370  * Unmap any memory, close any file descriptors and
371  * free any memory owned by a device.
372  */
373 void
374 cleanup_device(struct virtio_net *dev, int destroy)
375 {
376 	uint32_t i;
377 
378 	vhost_backend_cleanup(dev);
379 
380 	for (i = 0; i < dev->nr_vring; i++) {
381 		cleanup_vq(dev->virtqueue[i], destroy);
382 		cleanup_vq_inflight(dev, dev->virtqueue[i]);
383 	}
384 }
385 
386 static void
387 vhost_free_async_mem(struct vhost_virtqueue *vq)
388 	__rte_exclusive_locks_required(&vq->access_lock)
389 {
390 	if (!vq->async)
391 		return;
392 
393 	rte_free(vq->async->pkts_info);
394 	rte_free(vq->async->pkts_cmpl_flag);
395 
396 	rte_free(vq->async->buffers_packed);
397 	vq->async->buffers_packed = NULL;
398 	rte_free(vq->async->descs_split);
399 	vq->async->descs_split = NULL;
400 
401 	rte_free(vq->async);
402 	vq->async = NULL;
403 }
404 
405 void
406 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
407 {
408 	if (vq_is_packed(dev))
409 		rte_free(vq->shadow_used_packed);
410 	else
411 		rte_free(vq->shadow_used_split);
412 
413 	rte_rwlock_write_lock(&vq->access_lock);
414 	vhost_free_async_mem(vq);
415 	rte_rwlock_write_unlock(&vq->access_lock);
416 	rte_free(vq->batch_copy_elems);
417 	rte_free(vq->log_cache);
418 	rte_free(vq);
419 }
420 
421 /*
422  * Release virtqueues and device memory.
423  */
424 static void
425 free_device(struct virtio_net *dev)
426 {
427 	uint32_t i;
428 
429 	for (i = 0; i < dev->nr_vring; i++)
430 		free_vq(dev, dev->virtqueue[i]);
431 
432 	rte_free(dev);
433 }
434 
435 static __rte_always_inline int
436 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
437 	__rte_shared_locks_required(&vq->iotlb_lock)
438 {
439 	if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
440 		return 0;
441 
442 	vq->log_guest_addr = translate_log_addr(dev, vq,
443 						vq->ring_addrs.log_guest_addr);
444 	if (vq->log_guest_addr == 0)
445 		return -1;
446 
447 	return 0;
448 }
449 
450 /*
451  * Converts vring log address to GPA
452  * If IOMMU is enabled, the log address is IOVA
453  * If IOMMU not enabled, the log address is already GPA
454  */
455 uint64_t
456 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
457 		uint64_t log_addr)
458 {
459 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
460 		const uint64_t exp_size = sizeof(uint64_t);
461 		uint64_t hva, gpa;
462 		uint64_t size = exp_size;
463 
464 		hva = vhost_iova_to_vva(dev, vq, log_addr,
465 					&size, VHOST_ACCESS_RW);
466 
467 		if (size != exp_size)
468 			return 0;
469 
470 		gpa = hva_to_gpa(dev, hva, exp_size);
471 		if (!gpa) {
472 			VHOST_LOG_DATA(dev->ifname, ERR,
473 				"failed to find GPA for log_addr: 0x%"
474 				PRIx64 " hva: 0x%" PRIx64 "\n",
475 				log_addr, hva);
476 			return 0;
477 		}
478 		return gpa;
479 
480 	} else
481 		return log_addr;
482 }
483 
484 static int
485 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
486 	__rte_shared_locks_required(&vq->iotlb_lock)
487 {
488 	uint64_t req_size, size;
489 
490 	req_size = sizeof(struct vring_desc) * vq->size;
491 	size = req_size;
492 	vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
493 						vq->ring_addrs.desc_user_addr,
494 						&size, VHOST_ACCESS_RW);
495 	if (!vq->desc || size != req_size)
496 		return -1;
497 
498 	req_size = sizeof(struct vring_avail);
499 	req_size += sizeof(uint16_t) * vq->size;
500 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
501 		req_size += sizeof(uint16_t);
502 	size = req_size;
503 	vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
504 						vq->ring_addrs.avail_user_addr,
505 						&size, VHOST_ACCESS_RW);
506 	if (!vq->avail || size != req_size)
507 		return -1;
508 
509 	req_size = sizeof(struct vring_used);
510 	req_size += sizeof(struct vring_used_elem) * vq->size;
511 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
512 		req_size += sizeof(uint16_t);
513 	size = req_size;
514 	vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
515 						vq->ring_addrs.used_user_addr,
516 						&size, VHOST_ACCESS_RW);
517 	if (!vq->used || size != req_size)
518 		return -1;
519 
520 	return 0;
521 }
522 
523 static int
524 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
525 	__rte_shared_locks_required(&vq->iotlb_lock)
526 {
527 	uint64_t req_size, size;
528 
529 	req_size = sizeof(struct vring_packed_desc) * vq->size;
530 	size = req_size;
531 	vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
532 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
533 				&size, VHOST_ACCESS_RW);
534 	if (!vq->desc_packed || size != req_size)
535 		return -1;
536 
537 	req_size = sizeof(struct vring_packed_desc_event);
538 	size = req_size;
539 	vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
540 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
541 				&size, VHOST_ACCESS_RW);
542 	if (!vq->driver_event || size != req_size)
543 		return -1;
544 
545 	req_size = sizeof(struct vring_packed_desc_event);
546 	size = req_size;
547 	vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
548 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
549 				&size, VHOST_ACCESS_RW);
550 	if (!vq->device_event || size != req_size)
551 		return -1;
552 
553 	return 0;
554 }
555 
556 int
557 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
558 {
559 
560 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
561 		return -1;
562 
563 	if (vq_is_packed(dev)) {
564 		if (vring_translate_packed(dev, vq) < 0)
565 			return -1;
566 	} else {
567 		if (vring_translate_split(dev, vq) < 0)
568 			return -1;
569 	}
570 
571 	if (log_translate(dev, vq) < 0)
572 		return -1;
573 
574 	vq->access_ok = true;
575 
576 	return 0;
577 }
578 
579 void
580 vring_invalidate(struct virtio_net *dev __rte_unused, struct vhost_virtqueue *vq)
581 {
582 	vhost_user_iotlb_wr_lock(vq);
583 
584 	vq->access_ok = false;
585 	vq->desc = NULL;
586 	vq->avail = NULL;
587 	vq->used = NULL;
588 	vq->log_guest_addr = 0;
589 
590 	vhost_user_iotlb_wr_unlock(vq);
591 }
592 
593 static void
594 init_vring_queue(struct virtio_net *dev __rte_unused, struct vhost_virtqueue *vq,
595 	uint32_t vring_idx)
596 {
597 	int numa_node = SOCKET_ID_ANY;
598 
599 	memset(vq, 0, sizeof(struct vhost_virtqueue));
600 
601 	vq->index = vring_idx;
602 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
603 	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
604 	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
605 
606 #ifdef RTE_LIBRTE_VHOST_NUMA
607 	if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
608 		VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to query numa node: %s\n",
609 			rte_strerror(errno));
610 		numa_node = SOCKET_ID_ANY;
611 	}
612 #endif
613 	vq->numa_node = numa_node;
614 }
615 
616 static void
617 reset_vring_queue(struct virtio_net *dev, struct vhost_virtqueue *vq)
618 {
619 	int callfd;
620 
621 	callfd = vq->callfd;
622 	init_vring_queue(dev, vq, vq->index);
623 	vq->callfd = callfd;
624 }
625 
626 int
627 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
628 {
629 	struct vhost_virtqueue *vq;
630 	uint32_t i;
631 
632 	/* Also allocate holes, if any, up to requested vring index. */
633 	for (i = 0; i <= vring_idx; i++) {
634 		if (dev->virtqueue[i])
635 			continue;
636 
637 		vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
638 		if (vq == NULL) {
639 			VHOST_LOG_CONFIG(dev->ifname, ERR,
640 				"failed to allocate memory for vring %u.\n",
641 				i);
642 			return -1;
643 		}
644 
645 		dev->virtqueue[i] = vq;
646 		init_vring_queue(dev, vq, i);
647 		rte_rwlock_init(&vq->access_lock);
648 		rte_rwlock_init(&vq->iotlb_lock);
649 		vq->avail_wrap_counter = 1;
650 		vq->used_wrap_counter = 1;
651 		vq->signalled_used_valid = false;
652 	}
653 
654 	dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
655 
656 	return 0;
657 }
658 
659 /*
660  * Reset some variables in device structure, while keeping few
661  * others untouched, such as vid, ifname, nr_vring: they
662  * should be same unless the device is removed.
663  */
664 void
665 reset_device(struct virtio_net *dev)
666 {
667 	uint32_t i;
668 
669 	dev->features = 0;
670 	dev->protocol_features = 0;
671 	dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
672 
673 	for (i = 0; i < dev->nr_vring; i++) {
674 		struct vhost_virtqueue *vq = dev->virtqueue[i];
675 
676 		if (!vq) {
677 			VHOST_LOG_CONFIG(dev->ifname, ERR,
678 				"failed to reset vring, virtqueue not allocated (%d)\n", i);
679 			continue;
680 		}
681 		reset_vring_queue(dev, vq);
682 	}
683 }
684 
685 /*
686  * Invoked when there is a new vhost-user connection established (when
687  * there is a new virtio device being attached).
688  */
689 int
690 vhost_new_device(struct vhost_backend_ops *ops)
691 {
692 	struct virtio_net *dev;
693 	int i;
694 
695 	if (ops == NULL) {
696 		VHOST_LOG_CONFIG("device", ERR, "missing backend ops.\n");
697 		return -1;
698 	}
699 
700 	if (ops->iotlb_miss == NULL) {
701 		VHOST_LOG_CONFIG("device", ERR, "missing IOTLB miss backend op.\n");
702 		return -1;
703 	}
704 
705 	if (ops->inject_irq == NULL) {
706 		VHOST_LOG_CONFIG("device", ERR, "missing IRQ injection backend op.\n");
707 		return -1;
708 	}
709 
710 	pthread_mutex_lock(&vhost_dev_lock);
711 	for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
712 		if (vhost_devices[i] == NULL)
713 			break;
714 	}
715 
716 	if (i == RTE_MAX_VHOST_DEVICE) {
717 		VHOST_LOG_CONFIG("device", ERR, "failed to find a free slot for new device.\n");
718 		pthread_mutex_unlock(&vhost_dev_lock);
719 		return -1;
720 	}
721 
722 	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
723 	if (dev == NULL) {
724 		VHOST_LOG_CONFIG("device", ERR, "failed to allocate memory for new device.\n");
725 		pthread_mutex_unlock(&vhost_dev_lock);
726 		return -1;
727 	}
728 
729 	vhost_devices[i] = dev;
730 	pthread_mutex_unlock(&vhost_dev_lock);
731 
732 	dev->vid = i;
733 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
734 	dev->backend_req_fd = -1;
735 	dev->postcopy_ufd = -1;
736 	rte_spinlock_init(&dev->backend_req_lock);
737 	dev->backend_ops = ops;
738 
739 	return i;
740 }
741 
742 void
743 vhost_destroy_device_notify(struct virtio_net *dev)
744 {
745 	struct rte_vdpa_device *vdpa_dev;
746 
747 	if (dev->flags & VIRTIO_DEV_RUNNING) {
748 		vdpa_dev = dev->vdpa_dev;
749 		if (vdpa_dev)
750 			vdpa_dev->ops->dev_close(dev->vid);
751 		dev->flags &= ~VIRTIO_DEV_RUNNING;
752 		dev->notify_ops->destroy_device(dev->vid);
753 	}
754 }
755 
756 /*
757  * Invoked when there is the vhost-user connection is broken (when
758  * the virtio device is being detached).
759  */
760 void
761 vhost_destroy_device(int vid)
762 {
763 	struct virtio_net *dev = get_device(vid);
764 
765 	if (dev == NULL)
766 		return;
767 
768 	vhost_destroy_device_notify(dev);
769 
770 	cleanup_device(dev, 1);
771 	free_device(dev);
772 
773 	vhost_devices[vid] = NULL;
774 }
775 
776 void
777 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
778 {
779 	struct virtio_net *dev = get_device(vid);
780 
781 	if (dev == NULL)
782 		return;
783 
784 	dev->vdpa_dev = vdpa_dev;
785 }
786 
787 void
788 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
789 {
790 	struct virtio_net *dev;
791 	unsigned int len;
792 
793 	dev = get_device(vid);
794 	if (dev == NULL)
795 		return;
796 
797 	len = if_len > sizeof(dev->ifname) ?
798 		sizeof(dev->ifname) : if_len;
799 
800 	strncpy(dev->ifname, if_name, len);
801 	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
802 }
803 
804 void
805 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags, bool stats_enabled,
806 	bool support_iommu)
807 {
808 	struct virtio_net *dev = get_device(vid);
809 
810 	if (dev == NULL)
811 		return;
812 
813 	if (enable)
814 		dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
815 	else
816 		dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
817 	if (!compliant_ol_flags)
818 		dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
819 	else
820 		dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
821 	if (stats_enabled)
822 		dev->flags |= VIRTIO_DEV_STATS_ENABLED;
823 	else
824 		dev->flags &= ~VIRTIO_DEV_STATS_ENABLED;
825 	if (support_iommu)
826 		dev->flags |= VIRTIO_DEV_SUPPORT_IOMMU;
827 	else
828 		dev->flags &= ~VIRTIO_DEV_SUPPORT_IOMMU;
829 
830 	if (vhost_user_iotlb_init(dev) < 0)
831 		VHOST_LOG_CONFIG("device", ERR, "failed to init IOTLB\n");
832 
833 }
834 
835 void
836 vhost_enable_extbuf(int vid)
837 {
838 	struct virtio_net *dev = get_device(vid);
839 
840 	if (dev == NULL)
841 		return;
842 
843 	dev->extbuf = 1;
844 }
845 
846 void
847 vhost_enable_linearbuf(int vid)
848 {
849 	struct virtio_net *dev = get_device(vid);
850 
851 	if (dev == NULL)
852 		return;
853 
854 	dev->linearbuf = 1;
855 }
856 
857 int
858 rte_vhost_get_mtu(int vid, uint16_t *mtu)
859 {
860 	struct virtio_net *dev = get_device(vid);
861 
862 	if (dev == NULL || mtu == NULL)
863 		return -ENODEV;
864 
865 	if (!(dev->flags & VIRTIO_DEV_READY))
866 		return -EAGAIN;
867 
868 	if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
869 		return -ENOTSUP;
870 
871 	*mtu = dev->mtu;
872 
873 	return 0;
874 }
875 
876 int
877 rte_vhost_get_numa_node(int vid)
878 {
879 #ifdef RTE_LIBRTE_VHOST_NUMA
880 	struct virtio_net *dev = get_device(vid);
881 	int numa_node;
882 	int ret;
883 
884 	if (dev == NULL || numa_available() != 0)
885 		return -1;
886 
887 	ret = get_mempolicy(&numa_node, NULL, 0, dev,
888 			    MPOL_F_NODE | MPOL_F_ADDR);
889 	if (ret < 0) {
890 		VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to query numa node: %s\n",
891 			rte_strerror(errno));
892 		return -1;
893 	}
894 
895 	return numa_node;
896 #else
897 	RTE_SET_USED(vid);
898 	return -1;
899 #endif
900 }
901 
902 uint16_t
903 rte_vhost_get_vring_num(int vid)
904 {
905 	struct virtio_net *dev = get_device(vid);
906 
907 	if (dev == NULL)
908 		return 0;
909 
910 	return dev->nr_vring;
911 }
912 
913 int
914 rte_vhost_get_ifname(int vid, char *buf, size_t len)
915 {
916 	struct virtio_net *dev = get_device(vid);
917 
918 	if (dev == NULL || buf == NULL)
919 		return -1;
920 
921 	len = RTE_MIN(len, sizeof(dev->ifname));
922 
923 	strncpy(buf, dev->ifname, len);
924 	buf[len - 1] = '\0';
925 
926 	return 0;
927 }
928 
929 int
930 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
931 {
932 	struct virtio_net *dev;
933 
934 	dev = get_device(vid);
935 	if (dev == NULL || features == NULL)
936 		return -1;
937 
938 	*features = dev->features;
939 	return 0;
940 }
941 
942 int
943 rte_vhost_get_negotiated_protocol_features(int vid,
944 					   uint64_t *protocol_features)
945 {
946 	struct virtio_net *dev;
947 
948 	dev = get_device(vid);
949 	if (dev == NULL || protocol_features == NULL)
950 		return -1;
951 
952 	*protocol_features = dev->protocol_features;
953 	return 0;
954 }
955 
956 int
957 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
958 {
959 	struct virtio_net *dev;
960 	struct rte_vhost_memory *m;
961 	size_t size;
962 
963 	dev = get_device(vid);
964 	if (dev == NULL || mem == NULL)
965 		return -1;
966 
967 	size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
968 	m = malloc(sizeof(struct rte_vhost_memory) + size);
969 	if (!m)
970 		return -1;
971 
972 	m->nregions = dev->mem->nregions;
973 	memcpy(m->regions, dev->mem->regions, size);
974 	*mem = m;
975 
976 	return 0;
977 }
978 
979 int
980 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
981 			  struct rte_vhost_vring *vring)
982 {
983 	struct virtio_net *dev;
984 	struct vhost_virtqueue *vq;
985 
986 	dev = get_device(vid);
987 	if (dev == NULL || vring == NULL)
988 		return -1;
989 
990 	if (vring_idx >= VHOST_MAX_VRING)
991 		return -1;
992 
993 	vq = dev->virtqueue[vring_idx];
994 	if (!vq)
995 		return -1;
996 
997 	if (vq_is_packed(dev)) {
998 		vring->desc_packed = vq->desc_packed;
999 		vring->driver_event = vq->driver_event;
1000 		vring->device_event = vq->device_event;
1001 	} else {
1002 		vring->desc = vq->desc;
1003 		vring->avail = vq->avail;
1004 		vring->used = vq->used;
1005 	}
1006 	vring->log_guest_addr  = vq->log_guest_addr;
1007 
1008 	vring->callfd  = vq->callfd;
1009 	vring->kickfd  = vq->kickfd;
1010 	vring->size    = vq->size;
1011 
1012 	return 0;
1013 }
1014 
1015 int
1016 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
1017 				  struct rte_vhost_ring_inflight *vring)
1018 {
1019 	struct virtio_net *dev;
1020 	struct vhost_virtqueue *vq;
1021 
1022 	dev = get_device(vid);
1023 	if (unlikely(!dev))
1024 		return -1;
1025 
1026 	if (vring_idx >= VHOST_MAX_VRING)
1027 		return -1;
1028 
1029 	vq = dev->virtqueue[vring_idx];
1030 	if (unlikely(!vq))
1031 		return -1;
1032 
1033 	if (vq_is_packed(dev)) {
1034 		if (unlikely(!vq->inflight_packed))
1035 			return -1;
1036 
1037 		vring->inflight_packed = vq->inflight_packed;
1038 	} else {
1039 		if (unlikely(!vq->inflight_split))
1040 			return -1;
1041 
1042 		vring->inflight_split = vq->inflight_split;
1043 	}
1044 
1045 	vring->resubmit_inflight = vq->resubmit_inflight;
1046 
1047 	return 0;
1048 }
1049 
1050 int
1051 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1052 				  uint16_t idx)
1053 {
1054 	struct vhost_virtqueue *vq;
1055 	struct virtio_net *dev;
1056 
1057 	dev = get_device(vid);
1058 	if (unlikely(!dev))
1059 		return -1;
1060 
1061 	if (unlikely(!(dev->protocol_features &
1062 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1063 		return 0;
1064 
1065 	if (unlikely(vq_is_packed(dev)))
1066 		return -1;
1067 
1068 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1069 		return -1;
1070 
1071 	vq = dev->virtqueue[vring_idx];
1072 	if (unlikely(!vq))
1073 		return -1;
1074 
1075 	if (unlikely(!vq->inflight_split))
1076 		return -1;
1077 
1078 	if (unlikely(idx >= vq->size))
1079 		return -1;
1080 
1081 	vq->inflight_split->desc[idx].counter = vq->global_counter++;
1082 	vq->inflight_split->desc[idx].inflight = 1;
1083 	return 0;
1084 }
1085 
1086 int
1087 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1088 				   uint16_t head, uint16_t last,
1089 				   uint16_t *inflight_entry)
1090 {
1091 	struct rte_vhost_inflight_info_packed *inflight_info;
1092 	struct virtio_net *dev;
1093 	struct vhost_virtqueue *vq;
1094 	struct vring_packed_desc *desc;
1095 	uint16_t old_free_head, free_head;
1096 
1097 	dev = get_device(vid);
1098 	if (unlikely(!dev))
1099 		return -1;
1100 
1101 	if (unlikely(!(dev->protocol_features &
1102 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1103 		return 0;
1104 
1105 	if (unlikely(!vq_is_packed(dev)))
1106 		return -1;
1107 
1108 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1109 		return -1;
1110 
1111 	vq = dev->virtqueue[vring_idx];
1112 	if (unlikely(!vq))
1113 		return -1;
1114 
1115 	inflight_info = vq->inflight_packed;
1116 	if (unlikely(!inflight_info))
1117 		return -1;
1118 
1119 	if (unlikely(head >= vq->size))
1120 		return -1;
1121 
1122 	desc = vq->desc_packed;
1123 	old_free_head = inflight_info->old_free_head;
1124 	if (unlikely(old_free_head >= vq->size))
1125 		return -1;
1126 
1127 	free_head = old_free_head;
1128 
1129 	/* init header descriptor */
1130 	inflight_info->desc[old_free_head].num = 0;
1131 	inflight_info->desc[old_free_head].counter = vq->global_counter++;
1132 	inflight_info->desc[old_free_head].inflight = 1;
1133 
1134 	/* save desc entry in flight entry */
1135 	while (head != ((last + 1) % vq->size)) {
1136 		inflight_info->desc[old_free_head].num++;
1137 		inflight_info->desc[free_head].addr = desc[head].addr;
1138 		inflight_info->desc[free_head].len = desc[head].len;
1139 		inflight_info->desc[free_head].flags = desc[head].flags;
1140 		inflight_info->desc[free_head].id = desc[head].id;
1141 
1142 		inflight_info->desc[old_free_head].last = free_head;
1143 		free_head = inflight_info->desc[free_head].next;
1144 		inflight_info->free_head = free_head;
1145 		head = (head + 1) % vq->size;
1146 	}
1147 
1148 	inflight_info->old_free_head = free_head;
1149 	*inflight_entry = old_free_head;
1150 
1151 	return 0;
1152 }
1153 
1154 int
1155 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1156 				  uint16_t last_used_idx, uint16_t idx)
1157 {
1158 	struct virtio_net *dev;
1159 	struct vhost_virtqueue *vq;
1160 
1161 	dev = get_device(vid);
1162 	if (unlikely(!dev))
1163 		return -1;
1164 
1165 	if (unlikely(!(dev->protocol_features &
1166 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1167 		return 0;
1168 
1169 	if (unlikely(vq_is_packed(dev)))
1170 		return -1;
1171 
1172 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1173 		return -1;
1174 
1175 	vq = dev->virtqueue[vring_idx];
1176 	if (unlikely(!vq))
1177 		return -1;
1178 
1179 	if (unlikely(!vq->inflight_split))
1180 		return -1;
1181 
1182 	if (unlikely(idx >= vq->size))
1183 		return -1;
1184 
1185 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1186 
1187 	vq->inflight_split->desc[idx].inflight = 0;
1188 
1189 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1190 
1191 	vq->inflight_split->used_idx = last_used_idx;
1192 	return 0;
1193 }
1194 
1195 int
1196 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1197 				   uint16_t head)
1198 {
1199 	struct rte_vhost_inflight_info_packed *inflight_info;
1200 	struct virtio_net *dev;
1201 	struct vhost_virtqueue *vq;
1202 
1203 	dev = get_device(vid);
1204 	if (unlikely(!dev))
1205 		return -1;
1206 
1207 	if (unlikely(!(dev->protocol_features &
1208 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1209 		return 0;
1210 
1211 	if (unlikely(!vq_is_packed(dev)))
1212 		return -1;
1213 
1214 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1215 		return -1;
1216 
1217 	vq = dev->virtqueue[vring_idx];
1218 	if (unlikely(!vq))
1219 		return -1;
1220 
1221 	inflight_info = vq->inflight_packed;
1222 	if (unlikely(!inflight_info))
1223 		return -1;
1224 
1225 	if (unlikely(head >= vq->size))
1226 		return -1;
1227 
1228 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1229 
1230 	inflight_info->desc[head].inflight = 0;
1231 
1232 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1233 
1234 	inflight_info->old_free_head = inflight_info->free_head;
1235 	inflight_info->old_used_idx = inflight_info->used_idx;
1236 	inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1237 
1238 	return 0;
1239 }
1240 
1241 int
1242 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1243 				     uint16_t idx)
1244 {
1245 	struct virtio_net *dev;
1246 	struct vhost_virtqueue *vq;
1247 
1248 	dev = get_device(vid);
1249 	if (unlikely(!dev))
1250 		return -1;
1251 
1252 	if (unlikely(!(dev->protocol_features &
1253 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1254 		return 0;
1255 
1256 	if (unlikely(vq_is_packed(dev)))
1257 		return -1;
1258 
1259 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1260 		return -1;
1261 
1262 	vq = dev->virtqueue[vring_idx];
1263 	if (unlikely(!vq))
1264 		return -1;
1265 
1266 	if (unlikely(!vq->inflight_split))
1267 		return -1;
1268 
1269 	if (unlikely(idx >= vq->size))
1270 		return -1;
1271 
1272 	vq->inflight_split->last_inflight_io = idx;
1273 	return 0;
1274 }
1275 
1276 int
1277 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1278 				      uint16_t head)
1279 {
1280 	struct rte_vhost_inflight_info_packed *inflight_info;
1281 	struct virtio_net *dev;
1282 	struct vhost_virtqueue *vq;
1283 	uint16_t last;
1284 
1285 	dev = get_device(vid);
1286 	if (unlikely(!dev))
1287 		return -1;
1288 
1289 	if (unlikely(!(dev->protocol_features &
1290 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1291 		return 0;
1292 
1293 	if (unlikely(!vq_is_packed(dev)))
1294 		return -1;
1295 
1296 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1297 		return -1;
1298 
1299 	vq = dev->virtqueue[vring_idx];
1300 	if (unlikely(!vq))
1301 		return -1;
1302 
1303 	inflight_info = vq->inflight_packed;
1304 	if (unlikely(!inflight_info))
1305 		return -1;
1306 
1307 	if (unlikely(head >= vq->size))
1308 		return -1;
1309 
1310 	last = inflight_info->desc[head].last;
1311 	if (unlikely(last >= vq->size))
1312 		return -1;
1313 
1314 	inflight_info->desc[last].next = inflight_info->free_head;
1315 	inflight_info->free_head = head;
1316 	inflight_info->used_idx += inflight_info->desc[head].num;
1317 	if (inflight_info->used_idx >= inflight_info->desc_num) {
1318 		inflight_info->used_idx -= inflight_info->desc_num;
1319 		inflight_info->used_wrap_counter =
1320 			!inflight_info->used_wrap_counter;
1321 	}
1322 
1323 	return 0;
1324 }
1325 
1326 int
1327 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1328 {
1329 	struct virtio_net *dev;
1330 	struct vhost_virtqueue *vq;
1331 
1332 	dev = get_device(vid);
1333 	if (!dev)
1334 		return -1;
1335 
1336 	if (vring_idx >= VHOST_MAX_VRING)
1337 		return -1;
1338 
1339 	vq = dev->virtqueue[vring_idx];
1340 	if (!vq)
1341 		return -1;
1342 
1343 	rte_rwlock_read_lock(&vq->access_lock);
1344 
1345 	if (vq_is_packed(dev))
1346 		vhost_vring_call_packed(dev, vq);
1347 	else
1348 		vhost_vring_call_split(dev, vq);
1349 
1350 	rte_rwlock_read_unlock(&vq->access_lock);
1351 
1352 	return 0;
1353 }
1354 
1355 int
1356 rte_vhost_vring_call_nonblock(int vid, uint16_t vring_idx)
1357 {
1358 	struct virtio_net *dev;
1359 	struct vhost_virtqueue *vq;
1360 
1361 	dev = get_device(vid);
1362 	if (!dev)
1363 		return -1;
1364 
1365 	if (vring_idx >= VHOST_MAX_VRING)
1366 		return -1;
1367 
1368 	vq = dev->virtqueue[vring_idx];
1369 	if (!vq)
1370 		return -1;
1371 
1372 	if (rte_rwlock_read_trylock(&vq->access_lock))
1373 		return -EAGAIN;
1374 
1375 	if (vq_is_packed(dev))
1376 		vhost_vring_call_packed(dev, vq);
1377 	else
1378 		vhost_vring_call_split(dev, vq);
1379 
1380 	rte_rwlock_read_unlock(&vq->access_lock);
1381 
1382 	return 0;
1383 }
1384 
1385 uint16_t
1386 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1387 {
1388 	struct virtio_net *dev;
1389 	struct vhost_virtqueue *vq;
1390 	uint16_t ret = 0;
1391 
1392 	dev = get_device(vid);
1393 	if (!dev)
1394 		return 0;
1395 
1396 	if (queue_id >= VHOST_MAX_VRING)
1397 		return 0;
1398 
1399 	vq = dev->virtqueue[queue_id];
1400 	if (!vq)
1401 		return 0;
1402 
1403 	rte_rwlock_write_lock(&vq->access_lock);
1404 
1405 	if (unlikely(!vq->enabled || vq->avail == NULL))
1406 		goto out;
1407 
1408 	ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1409 
1410 out:
1411 	rte_rwlock_write_unlock(&vq->access_lock);
1412 	return ret;
1413 }
1414 
1415 static inline int
1416 vhost_enable_notify_split(struct virtio_net *dev,
1417 		struct vhost_virtqueue *vq, int enable)
1418 {
1419 	if (vq->used == NULL)
1420 		return -1;
1421 
1422 	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1423 		if (enable)
1424 			vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1425 		else
1426 			vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1427 	} else {
1428 		if (enable)
1429 			vhost_avail_event(vq) = vq->last_avail_idx;
1430 	}
1431 	return 0;
1432 }
1433 
1434 static inline int
1435 vhost_enable_notify_packed(struct virtio_net *dev,
1436 		struct vhost_virtqueue *vq, int enable)
1437 {
1438 	uint16_t flags;
1439 
1440 	if (vq->device_event == NULL)
1441 		return -1;
1442 
1443 	if (!enable) {
1444 		vq->device_event->flags = VRING_EVENT_F_DISABLE;
1445 		return 0;
1446 	}
1447 
1448 	flags = VRING_EVENT_F_ENABLE;
1449 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1450 		flags = VRING_EVENT_F_DESC;
1451 		vq->device_event->off_wrap = vq->last_avail_idx |
1452 			vq->avail_wrap_counter << 15;
1453 	}
1454 
1455 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
1456 
1457 	vq->device_event->flags = flags;
1458 	return 0;
1459 }
1460 
1461 int
1462 vhost_enable_guest_notification(struct virtio_net *dev,
1463 		struct vhost_virtqueue *vq, int enable)
1464 {
1465 	/*
1466 	 * If the virtqueue is not ready yet, it will be applied
1467 	 * when it will become ready.
1468 	 */
1469 	if (!vq->ready)
1470 		return 0;
1471 
1472 	if (vq_is_packed(dev))
1473 		return vhost_enable_notify_packed(dev, vq, enable);
1474 	else
1475 		return vhost_enable_notify_split(dev, vq, enable);
1476 }
1477 
1478 int
1479 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1480 {
1481 	struct virtio_net *dev = get_device(vid);
1482 	struct vhost_virtqueue *vq;
1483 	int ret;
1484 
1485 	if (!dev)
1486 		return -1;
1487 
1488 	if (queue_id >= VHOST_MAX_VRING)
1489 		return -1;
1490 
1491 	vq = dev->virtqueue[queue_id];
1492 	if (!vq)
1493 		return -1;
1494 
1495 	rte_rwlock_write_lock(&vq->access_lock);
1496 
1497 	vq->notif_enable = enable;
1498 	ret = vhost_enable_guest_notification(dev, vq, enable);
1499 
1500 	rte_rwlock_write_unlock(&vq->access_lock);
1501 
1502 	return ret;
1503 }
1504 
1505 void
1506 rte_vhost_notify_guest(int vid, uint16_t queue_id)
1507 {
1508 	struct virtio_net *dev = get_device(vid);
1509 	struct vhost_virtqueue *vq;
1510 
1511 	if (!dev ||  queue_id >= VHOST_MAX_VRING)
1512 		return;
1513 
1514 	vq = dev->virtqueue[queue_id];
1515 	if (!vq)
1516 		return;
1517 
1518 	rte_rwlock_read_lock(&vq->access_lock);
1519 
1520 	if (dev->backend_ops->inject_irq(dev, vq)) {
1521 		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
1522 			__atomic_fetch_add(&vq->stats.guest_notifications_error,
1523 					1, __ATOMIC_RELAXED);
1524 	} else {
1525 		if (dev->flags & VIRTIO_DEV_STATS_ENABLED)
1526 			__atomic_fetch_add(&vq->stats.guest_notifications,
1527 					1, __ATOMIC_RELAXED);
1528 		if (dev->notify_ops->guest_notified)
1529 			dev->notify_ops->guest_notified(dev->vid);
1530 	}
1531 
1532 	rte_rwlock_read_unlock(&vq->access_lock);
1533 }
1534 
1535 void
1536 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1537 {
1538 	struct virtio_net *dev = get_device(vid);
1539 
1540 	if (dev == NULL)
1541 		return;
1542 
1543 	vhost_log_write(dev, addr, len);
1544 }
1545 
1546 void
1547 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1548 			 uint64_t offset, uint64_t len)
1549 {
1550 	struct virtio_net *dev;
1551 	struct vhost_virtqueue *vq;
1552 
1553 	dev = get_device(vid);
1554 	if (dev == NULL)
1555 		return;
1556 
1557 	if (vring_idx >= VHOST_MAX_VRING)
1558 		return;
1559 	vq = dev->virtqueue[vring_idx];
1560 	if (!vq)
1561 		return;
1562 
1563 	vhost_log_used_vring(dev, vq, offset, len);
1564 }
1565 
1566 uint32_t
1567 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1568 {
1569 	struct virtio_net *dev;
1570 	struct vhost_virtqueue *vq;
1571 	uint32_t ret = 0;
1572 
1573 	dev = get_device(vid);
1574 	if (dev == NULL)
1575 		return 0;
1576 
1577 	if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1578 		VHOST_LOG_DATA(dev->ifname, ERR,
1579 			"%s: invalid virtqueue idx %d.\n",
1580 			__func__, qid);
1581 		return 0;
1582 	}
1583 
1584 	vq = dev->virtqueue[qid];
1585 	if (vq == NULL)
1586 		return 0;
1587 
1588 	rte_rwlock_write_lock(&vq->access_lock);
1589 
1590 	if (unlikely(!vq->enabled || vq->avail == NULL))
1591 		goto out;
1592 
1593 	ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1594 
1595 out:
1596 	rte_rwlock_write_unlock(&vq->access_lock);
1597 	return ret;
1598 }
1599 
1600 struct rte_vdpa_device *
1601 rte_vhost_get_vdpa_device(int vid)
1602 {
1603 	struct virtio_net *dev = get_device(vid);
1604 
1605 	if (dev == NULL)
1606 		return NULL;
1607 
1608 	return dev->vdpa_dev;
1609 }
1610 
1611 int
1612 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1613 		uint64_t *log_size)
1614 {
1615 	struct virtio_net *dev = get_device(vid);
1616 
1617 	if (dev == NULL || log_base == NULL || log_size == NULL)
1618 		return -1;
1619 
1620 	*log_base = dev->log_base;
1621 	*log_size = dev->log_size;
1622 
1623 	return 0;
1624 }
1625 
1626 int
1627 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1628 		uint16_t *last_avail_idx, uint16_t *last_used_idx)
1629 {
1630 	struct vhost_virtqueue *vq;
1631 	struct virtio_net *dev = get_device(vid);
1632 
1633 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1634 		return -1;
1635 
1636 	if (queue_id >= VHOST_MAX_VRING)
1637 		return -1;
1638 
1639 	vq = dev->virtqueue[queue_id];
1640 	if (!vq)
1641 		return -1;
1642 
1643 	if (vq_is_packed(dev)) {
1644 		*last_avail_idx = (vq->avail_wrap_counter << 15) |
1645 				  vq->last_avail_idx;
1646 		*last_used_idx = (vq->used_wrap_counter << 15) |
1647 				 vq->last_used_idx;
1648 	} else {
1649 		*last_avail_idx = vq->last_avail_idx;
1650 		*last_used_idx = vq->last_used_idx;
1651 	}
1652 
1653 	return 0;
1654 }
1655 
1656 int
1657 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1658 		uint16_t last_avail_idx, uint16_t last_used_idx)
1659 {
1660 	struct vhost_virtqueue *vq;
1661 	struct virtio_net *dev = get_device(vid);
1662 
1663 	if (!dev)
1664 		return -1;
1665 
1666 	if (queue_id >= VHOST_MAX_VRING)
1667 		return -1;
1668 
1669 	vq = dev->virtqueue[queue_id];
1670 	if (!vq)
1671 		return -1;
1672 
1673 	if (vq_is_packed(dev)) {
1674 		vq->last_avail_idx = last_avail_idx & 0x7fff;
1675 		vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1676 		vq->last_used_idx = last_used_idx & 0x7fff;
1677 		vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1678 	} else {
1679 		vq->last_avail_idx = last_avail_idx;
1680 		vq->last_used_idx = last_used_idx;
1681 	}
1682 
1683 	return 0;
1684 }
1685 
1686 int
1687 rte_vhost_get_vring_base_from_inflight(int vid,
1688 				       uint16_t queue_id,
1689 				       uint16_t *last_avail_idx,
1690 				       uint16_t *last_used_idx)
1691 {
1692 	struct rte_vhost_inflight_info_packed *inflight_info;
1693 	struct vhost_virtqueue *vq;
1694 	struct virtio_net *dev = get_device(vid);
1695 
1696 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1697 		return -1;
1698 
1699 	if (queue_id >= VHOST_MAX_VRING)
1700 		return -1;
1701 
1702 	vq = dev->virtqueue[queue_id];
1703 	if (!vq)
1704 		return -1;
1705 
1706 	if (!vq_is_packed(dev))
1707 		return -1;
1708 
1709 	inflight_info = vq->inflight_packed;
1710 	if (!inflight_info)
1711 		return -1;
1712 
1713 	*last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1714 			  inflight_info->old_used_idx;
1715 	*last_used_idx = *last_avail_idx;
1716 
1717 	return 0;
1718 }
1719 
1720 int
1721 rte_vhost_extern_callback_register(int vid,
1722 		struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1723 {
1724 	struct virtio_net *dev = get_device(vid);
1725 
1726 	if (dev == NULL || ops == NULL)
1727 		return -1;
1728 
1729 	dev->extern_ops = *ops;
1730 	dev->extern_data = ctx;
1731 	return 0;
1732 }
1733 
1734 static __rte_always_inline int
1735 async_channel_register(struct virtio_net *dev, struct vhost_virtqueue *vq)
1736 	__rte_exclusive_locks_required(&vq->access_lock)
1737 {
1738 	struct vhost_async *async;
1739 	int node = vq->numa_node;
1740 
1741 	if (unlikely(vq->async)) {
1742 		VHOST_LOG_CONFIG(dev->ifname, ERR,
1743 			"async register failed: already registered (qid: %d)\n",
1744 			vq->index);
1745 		return -1;
1746 	}
1747 
1748 	async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1749 	if (!async) {
1750 		VHOST_LOG_CONFIG(dev->ifname, ERR,
1751 			"failed to allocate async metadata (qid: %d)\n",
1752 			vq->index);
1753 		return -1;
1754 	}
1755 
1756 	async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1757 			RTE_CACHE_LINE_SIZE, node);
1758 	if (!async->pkts_info) {
1759 		VHOST_LOG_CONFIG(dev->ifname, ERR,
1760 			"failed to allocate async_pkts_info (qid: %d)\n",
1761 			vq->index);
1762 		goto out_free_async;
1763 	}
1764 
1765 	async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1766 			RTE_CACHE_LINE_SIZE, node);
1767 	if (!async->pkts_cmpl_flag) {
1768 		VHOST_LOG_CONFIG(dev->ifname, ERR,
1769 			"failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1770 			vq->index);
1771 		goto out_free_async;
1772 	}
1773 
1774 	if (vq_is_packed(dev)) {
1775 		async->buffers_packed = rte_malloc_socket(NULL,
1776 				vq->size * sizeof(struct vring_used_elem_packed),
1777 				RTE_CACHE_LINE_SIZE, node);
1778 		if (!async->buffers_packed) {
1779 			VHOST_LOG_CONFIG(dev->ifname, ERR,
1780 				"failed to allocate async buffers (qid: %d)\n",
1781 				vq->index);
1782 			goto out_free_inflight;
1783 		}
1784 	} else {
1785 		async->descs_split = rte_malloc_socket(NULL,
1786 				vq->size * sizeof(struct vring_used_elem),
1787 				RTE_CACHE_LINE_SIZE, node);
1788 		if (!async->descs_split) {
1789 			VHOST_LOG_CONFIG(dev->ifname, ERR,
1790 				"failed to allocate async descs (qid: %d)\n",
1791 				vq->index);
1792 			goto out_free_inflight;
1793 		}
1794 	}
1795 
1796 	vq->async = async;
1797 
1798 	return 0;
1799 out_free_inflight:
1800 	rte_free(async->pkts_info);
1801 out_free_async:
1802 	rte_free(async);
1803 
1804 	return -1;
1805 }
1806 
1807 int
1808 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1809 {
1810 	struct vhost_virtqueue *vq;
1811 	struct virtio_net *dev = get_device(vid);
1812 	int ret;
1813 
1814 	if (dev == NULL)
1815 		return -1;
1816 
1817 	if (queue_id >= VHOST_MAX_VRING)
1818 		return -1;
1819 
1820 	vq = dev->virtqueue[queue_id];
1821 
1822 	if (unlikely(vq == NULL || !dev->async_copy || dev->vdpa_dev != NULL))
1823 		return -1;
1824 
1825 	rte_rwlock_write_lock(&vq->access_lock);
1826 	ret = async_channel_register(dev, vq);
1827 	rte_rwlock_write_unlock(&vq->access_lock);
1828 
1829 	return ret;
1830 }
1831 
1832 int
1833 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1834 {
1835 	struct vhost_virtqueue *vq;
1836 	struct virtio_net *dev = get_device(vid);
1837 
1838 	if (dev == NULL)
1839 		return -1;
1840 
1841 	if (queue_id >= VHOST_MAX_VRING)
1842 		return -1;
1843 
1844 	vq = dev->virtqueue[queue_id];
1845 
1846 	if (unlikely(vq == NULL || !dev->async_copy || dev->vdpa_dev != NULL))
1847 		return -1;
1848 
1849 	vq_assert_lock(dev, vq);
1850 
1851 	return async_channel_register(dev, vq);
1852 }
1853 
1854 int
1855 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1856 {
1857 	struct vhost_virtqueue *vq;
1858 	struct virtio_net *dev = get_device(vid);
1859 	int ret = -1;
1860 
1861 	if (dev == NULL)
1862 		return ret;
1863 
1864 	if (queue_id >= VHOST_MAX_VRING)
1865 		return ret;
1866 
1867 	vq = dev->virtqueue[queue_id];
1868 
1869 	if (vq == NULL)
1870 		return ret;
1871 
1872 	if (rte_rwlock_write_trylock(&vq->access_lock)) {
1873 		VHOST_LOG_CONFIG(dev->ifname, ERR,
1874 			"failed to unregister async channel, virtqueue busy.\n");
1875 		return ret;
1876 	}
1877 
1878 	if (!vq->async) {
1879 		ret = 0;
1880 	} else if (vq->async->pkts_inflight_n) {
1881 		VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to unregister async channel.\n");
1882 		VHOST_LOG_CONFIG(dev->ifname, ERR,
1883 			"inflight packets must be completed before unregistration.\n");
1884 	} else {
1885 		vhost_free_async_mem(vq);
1886 		ret = 0;
1887 	}
1888 
1889 	rte_rwlock_write_unlock(&vq->access_lock);
1890 
1891 	return ret;
1892 }
1893 
1894 int
1895 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1896 {
1897 	struct vhost_virtqueue *vq;
1898 	struct virtio_net *dev = get_device(vid);
1899 
1900 	if (dev == NULL)
1901 		return -1;
1902 
1903 	if (queue_id >= VHOST_MAX_VRING)
1904 		return -1;
1905 
1906 	vq = dev->virtqueue[queue_id];
1907 
1908 	if (vq == NULL)
1909 		return -1;
1910 
1911 	vq_assert_lock(dev, vq);
1912 
1913 	if (!vq->async)
1914 		return 0;
1915 
1916 	if (vq->async->pkts_inflight_n) {
1917 		VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to unregister async channel.\n");
1918 		VHOST_LOG_CONFIG(dev->ifname, ERR,
1919 			"inflight packets must be completed before unregistration.\n");
1920 		return -1;
1921 	}
1922 
1923 	vhost_free_async_mem(vq);
1924 
1925 	return 0;
1926 }
1927 
1928 int
1929 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1930 {
1931 	struct rte_dma_info info;
1932 	void *pkts_cmpl_flag_addr;
1933 	uint16_t max_desc;
1934 
1935 	pthread_mutex_lock(&vhost_dma_lock);
1936 
1937 	if (!rte_dma_is_valid(dma_id)) {
1938 		VHOST_LOG_CONFIG("dma", ERR, "DMA %d is not found.\n", dma_id);
1939 		goto error;
1940 	}
1941 
1942 	if (rte_dma_info_get(dma_id, &info) != 0) {
1943 		VHOST_LOG_CONFIG("dma", ERR, "Fail to get DMA %d information.\n", dma_id);
1944 		goto error;
1945 	}
1946 
1947 	if (vchan_id >= info.max_vchans) {
1948 		VHOST_LOG_CONFIG("dma", ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1949 		goto error;
1950 	}
1951 
1952 	if (!dma_copy_track[dma_id].vchans) {
1953 		struct async_dma_vchan_info *vchans;
1954 
1955 		vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1956 				RTE_CACHE_LINE_SIZE);
1957 		if (vchans == NULL) {
1958 			VHOST_LOG_CONFIG("dma", ERR,
1959 				"Failed to allocate vchans for DMA %d vChannel %u.\n",
1960 				dma_id, vchan_id);
1961 			goto error;
1962 		}
1963 
1964 		dma_copy_track[dma_id].vchans = vchans;
1965 	}
1966 
1967 	if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1968 		VHOST_LOG_CONFIG("dma", INFO, "DMA %d vChannel %u already registered.\n",
1969 			dma_id, vchan_id);
1970 		pthread_mutex_unlock(&vhost_dma_lock);
1971 		return 0;
1972 	}
1973 
1974 	max_desc = info.max_desc;
1975 	if (!rte_is_power_of_2(max_desc))
1976 		max_desc = rte_align32pow2(max_desc);
1977 
1978 	pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1979 	if (!pkts_cmpl_flag_addr) {
1980 		VHOST_LOG_CONFIG("dma", ERR,
1981 			"Failed to allocate pkts_cmpl_flag_addr for DMA %d vChannel %u.\n",
1982 			dma_id, vchan_id);
1983 
1984 		if (dma_copy_track[dma_id].nr_vchans == 0) {
1985 			rte_free(dma_copy_track[dma_id].vchans);
1986 			dma_copy_track[dma_id].vchans = NULL;
1987 		}
1988 		goto error;
1989 	}
1990 
1991 	dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1992 	dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1993 	dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1994 	dma_copy_track[dma_id].nr_vchans++;
1995 
1996 	pthread_mutex_unlock(&vhost_dma_lock);
1997 	return 0;
1998 
1999 error:
2000 	pthread_mutex_unlock(&vhost_dma_lock);
2001 	return -1;
2002 }
2003 
2004 int
2005 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
2006 {
2007 	struct vhost_virtqueue *vq;
2008 	struct virtio_net *dev = get_device(vid);
2009 	int ret = -1;
2010 
2011 	if (dev == NULL)
2012 		return ret;
2013 
2014 	if (queue_id >= VHOST_MAX_VRING)
2015 		return ret;
2016 
2017 	vq = dev->virtqueue[queue_id];
2018 
2019 	if (vq == NULL)
2020 		return ret;
2021 
2022 	if (rte_rwlock_write_trylock(&vq->access_lock)) {
2023 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
2024 			"failed to check in-flight packets. virtqueue busy.\n");
2025 		return ret;
2026 	}
2027 
2028 	if (vq->async)
2029 		ret = vq->async->pkts_inflight_n;
2030 
2031 	rte_rwlock_write_unlock(&vq->access_lock);
2032 
2033 	return ret;
2034 }
2035 
2036 int
2037 rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
2038 {
2039 	struct vhost_virtqueue *vq;
2040 	struct virtio_net *dev = get_device(vid);
2041 	int ret = -1;
2042 
2043 	if (dev == NULL)
2044 		return ret;
2045 
2046 	if (queue_id >= VHOST_MAX_VRING)
2047 		return ret;
2048 
2049 	vq = dev->virtqueue[queue_id];
2050 
2051 	if (vq == NULL)
2052 		return ret;
2053 
2054 	vq_assert_lock(dev, vq);
2055 
2056 	if (!vq->async)
2057 		return ret;
2058 
2059 	ret = vq->async->pkts_inflight_n;
2060 
2061 	return ret;
2062 }
2063 
2064 int
2065 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
2066 		struct rte_vhost_power_monitor_cond *pmc)
2067 {
2068 	struct virtio_net *dev = get_device(vid);
2069 	struct vhost_virtqueue *vq;
2070 
2071 	if (dev == NULL)
2072 		return -1;
2073 	if (queue_id >= VHOST_MAX_VRING)
2074 		return -1;
2075 
2076 	vq = dev->virtqueue[queue_id];
2077 	if (vq == NULL)
2078 		return -1;
2079 
2080 	if (vq_is_packed(dev)) {
2081 		struct vring_packed_desc *desc;
2082 		desc = vq->desc_packed;
2083 		pmc->addr = &desc[vq->last_avail_idx].flags;
2084 		if (vq->avail_wrap_counter)
2085 			pmc->val = VRING_DESC_F_AVAIL;
2086 		else
2087 			pmc->val = VRING_DESC_F_USED;
2088 		pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
2089 		pmc->size = sizeof(desc[vq->last_avail_idx].flags);
2090 		pmc->match = 1;
2091 	} else {
2092 		pmc->addr = &vq->avail->idx;
2093 		pmc->val = vq->last_avail_idx & (vq->size - 1);
2094 		pmc->mask = vq->size - 1;
2095 		pmc->size = sizeof(vq->avail->idx);
2096 		pmc->match = 0;
2097 	}
2098 
2099 	return 0;
2100 }
2101 
2102 
2103 int
2104 rte_vhost_vring_stats_get_names(int vid, uint16_t queue_id,
2105 		struct rte_vhost_stat_name *name, unsigned int size)
2106 {
2107 	struct virtio_net *dev = get_device(vid);
2108 	unsigned int i;
2109 
2110 	if (dev == NULL)
2111 		return -1;
2112 
2113 	if (queue_id >= dev->nr_vring)
2114 		return -1;
2115 
2116 	if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2117 		return -1;
2118 
2119 	if (name == NULL || size < VHOST_NB_VQ_STATS)
2120 		return VHOST_NB_VQ_STATS;
2121 
2122 	for (i = 0; i < VHOST_NB_VQ_STATS; i++)
2123 		snprintf(name[i].name, sizeof(name[i].name), "%s_q%u_%s",
2124 				(queue_id & 1) ? "rx" : "tx",
2125 				queue_id / 2, vhost_vq_stat_strings[i].name);
2126 
2127 	return VHOST_NB_VQ_STATS;
2128 }
2129 
2130 int
2131 rte_vhost_vring_stats_get(int vid, uint16_t queue_id,
2132 		struct rte_vhost_stat *stats, unsigned int n)
2133 {
2134 	struct virtio_net *dev = get_device(vid);
2135 	struct vhost_virtqueue *vq;
2136 	unsigned int i;
2137 
2138 	if (dev == NULL)
2139 		return -1;
2140 
2141 	if (queue_id >= dev->nr_vring)
2142 		return -1;
2143 
2144 	if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2145 		return -1;
2146 
2147 	if (stats == NULL || n < VHOST_NB_VQ_STATS)
2148 		return VHOST_NB_VQ_STATS;
2149 
2150 	vq = dev->virtqueue[queue_id];
2151 
2152 	rte_rwlock_write_lock(&vq->access_lock);
2153 	for (i = 0; i < VHOST_NB_VQ_STATS; i++) {
2154 		/*
2155 		 * No need to the read atomic counters as such, due to the
2156 		 * above write access_lock preventing them to be updated.
2157 		 */
2158 		stats[i].value =
2159 			*(uint64_t *)(((char *)vq) + vhost_vq_stat_strings[i].offset);
2160 		stats[i].id = i;
2161 	}
2162 	rte_rwlock_write_unlock(&vq->access_lock);
2163 
2164 	return VHOST_NB_VQ_STATS;
2165 }
2166 
2167 int rte_vhost_vring_stats_reset(int vid, uint16_t queue_id)
2168 {
2169 	struct virtio_net *dev = get_device(vid);
2170 	struct vhost_virtqueue *vq;
2171 
2172 	if (dev == NULL)
2173 		return -1;
2174 
2175 	if (queue_id >= dev->nr_vring)
2176 		return -1;
2177 
2178 	if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
2179 		return -1;
2180 
2181 	vq = dev->virtqueue[queue_id];
2182 
2183 	rte_rwlock_write_lock(&vq->access_lock);
2184 	/*
2185 	 * No need to the reset atomic counters as such, due to the
2186 	 * above write access_lock preventing them to be updated.
2187 	 */
2188 	memset(&vq->stats, 0, sizeof(vq->stats));
2189 	rte_rwlock_write_unlock(&vq->access_lock);
2190 
2191 	return 0;
2192 }
2193 
2194 int
2195 rte_vhost_async_dma_unconfigure(int16_t dma_id, uint16_t vchan_id)
2196 {
2197 	struct rte_dma_info info;
2198 	struct rte_dma_stats stats = { 0 };
2199 
2200 	pthread_mutex_lock(&vhost_dma_lock);
2201 
2202 	if (!rte_dma_is_valid(dma_id)) {
2203 		VHOST_LOG_CONFIG("dma", ERR, "DMA %d is not found.\n", dma_id);
2204 		goto error;
2205 	}
2206 
2207 	if (rte_dma_info_get(dma_id, &info) != 0) {
2208 		VHOST_LOG_CONFIG("dma", ERR, "Fail to get DMA %d information.\n", dma_id);
2209 		goto error;
2210 	}
2211 
2212 	if (vchan_id >= info.max_vchans || !dma_copy_track[dma_id].vchans ||
2213 		!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
2214 		VHOST_LOG_CONFIG("dma", ERR, "Invalid channel %d:%u.\n", dma_id, vchan_id);
2215 		goto error;
2216 	}
2217 
2218 	if (rte_dma_stats_get(dma_id, vchan_id, &stats) != 0) {
2219 		VHOST_LOG_CONFIG("dma", ERR,
2220 				 "Failed to get stats for DMA %d vChannel %u.\n", dma_id, vchan_id);
2221 		goto error;
2222 	}
2223 
2224 	if (stats.submitted - stats.completed != 0) {
2225 		VHOST_LOG_CONFIG("dma", ERR,
2226 				 "Do not unconfigure when there are inflight packets.\n");
2227 		goto error;
2228 	}
2229 
2230 	rte_free(dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr);
2231 	dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = NULL;
2232 	dma_copy_track[dma_id].nr_vchans--;
2233 
2234 	if (dma_copy_track[dma_id].nr_vchans == 0) {
2235 		rte_free(dma_copy_track[dma_id].vchans);
2236 		dma_copy_track[dma_id].vchans = NULL;
2237 	}
2238 
2239 	pthread_mutex_unlock(&vhost_dma_lock);
2240 	return 0;
2241 
2242 error:
2243 	pthread_mutex_unlock(&vhost_dma_lock);
2244 	return -1;
2245 }
2246 
2247 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
2248 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);
2249