xref: /dpdk/lib/vhost/vhost.c (revision f8dbaebbf1c9efcbb2e2354b341ed62175466a57)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #ifdef RTE_LIBRTE_VHOST_NUMA
11 #include <numa.h>
12 #include <numaif.h>
13 #endif
14 
15 #include <rte_errno.h>
16 #include <rte_ethdev.h>
17 #include <rte_log.h>
18 #include <rte_string_fns.h>
19 #include <rte_memory.h>
20 #include <rte_malloc.h>
21 #include <rte_vhost.h>
22 #include <rte_rwlock.h>
23 
24 #include "iotlb.h"
25 #include "vhost.h"
26 #include "vhost_user.h"
27 
28 struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
29 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
30 
31 /* Called with iotlb_lock read-locked */
32 uint64_t
33 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
34 		    uint64_t iova, uint64_t *size, uint8_t perm)
35 {
36 	uint64_t vva, tmp_size;
37 
38 	if (unlikely(!*size))
39 		return 0;
40 
41 	tmp_size = *size;
42 
43 	vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
44 	if (tmp_size == *size)
45 		return vva;
46 
47 	iova += tmp_size;
48 
49 	if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
50 		/*
51 		 * iotlb_lock is read-locked for a full burst,
52 		 * but it only protects the iotlb cache.
53 		 * In case of IOTLB miss, we might block on the socket,
54 		 * which could cause a deadlock with QEMU if an IOTLB update
55 		 * is being handled. We can safely unlock here to avoid it.
56 		 */
57 		vhost_user_iotlb_rd_unlock(vq);
58 
59 		vhost_user_iotlb_pending_insert(vq, iova, perm);
60 		if (vhost_user_iotlb_miss(dev, iova, perm)) {
61 			VHOST_LOG_CONFIG(ERR,
62 				"IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
63 				iova);
64 			vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
65 		}
66 
67 		vhost_user_iotlb_rd_lock(vq);
68 	}
69 
70 	return 0;
71 }
72 
73 #define VHOST_LOG_PAGE	4096
74 
75 /*
76  * Atomically set a bit in memory.
77  */
78 static __rte_always_inline void
79 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
80 {
81 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
82 	/*
83 	 * __sync_ built-ins are deprecated, but __atomic_ ones
84 	 * are sub-optimized in older GCC versions.
85 	 */
86 	__sync_fetch_and_or_1(addr, (1U << nr));
87 #else
88 	__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
89 #endif
90 }
91 
92 static __rte_always_inline void
93 vhost_log_page(uint8_t *log_base, uint64_t page)
94 {
95 	vhost_set_bit(page % 8, &log_base[page / 8]);
96 }
97 
98 void
99 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
100 {
101 	uint64_t page;
102 
103 	if (unlikely(!dev->log_base || !len))
104 		return;
105 
106 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
107 		return;
108 
109 	/* To make sure guest memory updates are committed before logging */
110 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
111 
112 	page = addr / VHOST_LOG_PAGE;
113 	while (page * VHOST_LOG_PAGE < addr + len) {
114 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
115 		page += 1;
116 	}
117 }
118 
119 void
120 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
121 			     uint64_t iova, uint64_t len)
122 {
123 	uint64_t hva, gpa, map_len;
124 	map_len = len;
125 
126 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
127 	if (map_len != len) {
128 		VHOST_LOG_DATA(ERR,
129 			"Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
130 			iova);
131 		return;
132 	}
133 
134 	gpa = hva_to_gpa(dev, hva, len);
135 	if (gpa)
136 		__vhost_log_write(dev, gpa, len);
137 }
138 
139 void
140 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
141 {
142 	unsigned long *log_base;
143 	int i;
144 
145 	if (unlikely(!dev->log_base))
146 		return;
147 
148 	/* No cache, nothing to sync */
149 	if (unlikely(!vq->log_cache))
150 		return;
151 
152 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
153 
154 	log_base = (unsigned long *)(uintptr_t)dev->log_base;
155 
156 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
157 		struct log_cache_entry *elem = vq->log_cache + i;
158 
159 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
160 		/*
161 		 * '__sync' builtins are deprecated, but '__atomic' ones
162 		 * are sub-optimized in older GCC versions.
163 		 */
164 		__sync_fetch_and_or(log_base + elem->offset, elem->val);
165 #else
166 		__atomic_fetch_or(log_base + elem->offset, elem->val,
167 				__ATOMIC_RELAXED);
168 #endif
169 	}
170 
171 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
172 
173 	vq->log_cache_nb_elem = 0;
174 }
175 
176 static __rte_always_inline void
177 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
178 			uint64_t page)
179 {
180 	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
181 	uint32_t offset = page / (sizeof(unsigned long) << 3);
182 	int i;
183 
184 	if (unlikely(!vq->log_cache)) {
185 		/* No logging cache allocated, write dirty log map directly */
186 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
187 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
188 
189 		return;
190 	}
191 
192 	for (i = 0; i < vq->log_cache_nb_elem; i++) {
193 		struct log_cache_entry *elem = vq->log_cache + i;
194 
195 		if (elem->offset == offset) {
196 			elem->val |= (1UL << bit_nr);
197 			return;
198 		}
199 	}
200 
201 	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
202 		/*
203 		 * No more room for a new log cache entry,
204 		 * so write the dirty log map directly.
205 		 */
206 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
207 		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
208 
209 		return;
210 	}
211 
212 	vq->log_cache[i].offset = offset;
213 	vq->log_cache[i].val = (1UL << bit_nr);
214 	vq->log_cache_nb_elem++;
215 }
216 
217 void
218 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
219 			uint64_t addr, uint64_t len)
220 {
221 	uint64_t page;
222 
223 	if (unlikely(!dev->log_base || !len))
224 		return;
225 
226 	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
227 		return;
228 
229 	page = addr / VHOST_LOG_PAGE;
230 	while (page * VHOST_LOG_PAGE < addr + len) {
231 		vhost_log_cache_page(dev, vq, page);
232 		page += 1;
233 	}
234 }
235 
236 void
237 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
238 			     uint64_t iova, uint64_t len)
239 {
240 	uint64_t hva, gpa, map_len;
241 	map_len = len;
242 
243 	hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
244 	if (map_len != len) {
245 		VHOST_LOG_DATA(ERR,
246 			"Failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
247 			iova);
248 		return;
249 	}
250 
251 	gpa = hva_to_gpa(dev, hva, len);
252 	if (gpa)
253 		__vhost_log_cache_write(dev, vq, gpa, len);
254 }
255 
256 void *
257 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
258 		uint64_t desc_addr, uint64_t desc_len)
259 {
260 	void *idesc;
261 	uint64_t src, dst;
262 	uint64_t len, remain = desc_len;
263 
264 	idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
265 	if (unlikely(!idesc))
266 		return NULL;
267 
268 	dst = (uint64_t)(uintptr_t)idesc;
269 
270 	while (remain) {
271 		len = remain;
272 		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
273 				VHOST_ACCESS_RO);
274 		if (unlikely(!src || !len)) {
275 			rte_free(idesc);
276 			return NULL;
277 		}
278 
279 		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
280 
281 		remain -= len;
282 		dst += len;
283 		desc_addr += len;
284 	}
285 
286 	return idesc;
287 }
288 
289 void
290 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
291 {
292 	if ((vq->callfd >= 0) && (destroy != 0))
293 		close(vq->callfd);
294 	if (vq->kickfd >= 0)
295 		close(vq->kickfd);
296 }
297 
298 void
299 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
300 {
301 	if (!(dev->protocol_features &
302 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
303 		return;
304 
305 	if (vq_is_packed(dev)) {
306 		if (vq->inflight_packed)
307 			vq->inflight_packed = NULL;
308 	} else {
309 		if (vq->inflight_split)
310 			vq->inflight_split = NULL;
311 	}
312 
313 	if (vq->resubmit_inflight) {
314 		if (vq->resubmit_inflight->resubmit_list) {
315 			rte_free(vq->resubmit_inflight->resubmit_list);
316 			vq->resubmit_inflight->resubmit_list = NULL;
317 		}
318 		rte_free(vq->resubmit_inflight);
319 		vq->resubmit_inflight = NULL;
320 	}
321 }
322 
323 /*
324  * Unmap any memory, close any file descriptors and
325  * free any memory owned by a device.
326  */
327 void
328 cleanup_device(struct virtio_net *dev, int destroy)
329 {
330 	uint32_t i;
331 
332 	vhost_backend_cleanup(dev);
333 
334 	for (i = 0; i < dev->nr_vring; i++) {
335 		cleanup_vq(dev->virtqueue[i], destroy);
336 		cleanup_vq_inflight(dev, dev->virtqueue[i]);
337 	}
338 }
339 
340 static void
341 vhost_free_async_mem(struct vhost_virtqueue *vq)
342 {
343 	if (!vq->async)
344 		return;
345 
346 	rte_free(vq->async->pkts_info);
347 
348 	rte_free(vq->async->buffers_packed);
349 	vq->async->buffers_packed = NULL;
350 	rte_free(vq->async->descs_split);
351 	vq->async->descs_split = NULL;
352 
353 	rte_free(vq->async);
354 	vq->async = NULL;
355 }
356 
357 void
358 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
359 {
360 	if (vq_is_packed(dev))
361 		rte_free(vq->shadow_used_packed);
362 	else
363 		rte_free(vq->shadow_used_split);
364 
365 	vhost_free_async_mem(vq);
366 	rte_free(vq->batch_copy_elems);
367 	rte_mempool_free(vq->iotlb_pool);
368 	rte_free(vq->log_cache);
369 	rte_free(vq);
370 }
371 
372 /*
373  * Release virtqueues and device memory.
374  */
375 static void
376 free_device(struct virtio_net *dev)
377 {
378 	uint32_t i;
379 
380 	for (i = 0; i < dev->nr_vring; i++)
381 		free_vq(dev, dev->virtqueue[i]);
382 
383 	rte_free(dev);
384 }
385 
386 static __rte_always_inline int
387 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
388 {
389 	if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
390 		return 0;
391 
392 	vq->log_guest_addr = translate_log_addr(dev, vq,
393 						vq->ring_addrs.log_guest_addr);
394 	if (vq->log_guest_addr == 0)
395 		return -1;
396 
397 	return 0;
398 }
399 
400 /*
401  * Converts vring log address to GPA
402  * If IOMMU is enabled, the log address is IOVA
403  * If IOMMU not enabled, the log address is already GPA
404  *
405  * Caller should have iotlb_lock read-locked
406  */
407 uint64_t
408 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
409 		uint64_t log_addr)
410 {
411 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
412 		const uint64_t exp_size = sizeof(uint64_t);
413 		uint64_t hva, gpa;
414 		uint64_t size = exp_size;
415 
416 		hva = vhost_iova_to_vva(dev, vq, log_addr,
417 					&size, VHOST_ACCESS_RW);
418 
419 		if (size != exp_size)
420 			return 0;
421 
422 		gpa = hva_to_gpa(dev, hva, exp_size);
423 		if (!gpa) {
424 			VHOST_LOG_CONFIG(ERR,
425 				"VQ: Failed to find GPA for log_addr: 0x%"
426 				PRIx64 " hva: 0x%" PRIx64 "\n",
427 				log_addr, hva);
428 			return 0;
429 		}
430 		return gpa;
431 
432 	} else
433 		return log_addr;
434 }
435 
436 /* Caller should have iotlb_lock read-locked */
437 static int
438 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
439 {
440 	uint64_t req_size, size;
441 
442 	req_size = sizeof(struct vring_desc) * vq->size;
443 	size = req_size;
444 	vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
445 						vq->ring_addrs.desc_user_addr,
446 						&size, VHOST_ACCESS_RW);
447 	if (!vq->desc || size != req_size)
448 		return -1;
449 
450 	req_size = sizeof(struct vring_avail);
451 	req_size += sizeof(uint16_t) * vq->size;
452 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
453 		req_size += sizeof(uint16_t);
454 	size = req_size;
455 	vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
456 						vq->ring_addrs.avail_user_addr,
457 						&size, VHOST_ACCESS_RW);
458 	if (!vq->avail || size != req_size)
459 		return -1;
460 
461 	req_size = sizeof(struct vring_used);
462 	req_size += sizeof(struct vring_used_elem) * vq->size;
463 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
464 		req_size += sizeof(uint16_t);
465 	size = req_size;
466 	vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
467 						vq->ring_addrs.used_user_addr,
468 						&size, VHOST_ACCESS_RW);
469 	if (!vq->used || size != req_size)
470 		return -1;
471 
472 	return 0;
473 }
474 
475 /* Caller should have iotlb_lock read-locked */
476 static int
477 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
478 {
479 	uint64_t req_size, size;
480 
481 	req_size = sizeof(struct vring_packed_desc) * vq->size;
482 	size = req_size;
483 	vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
484 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
485 				&size, VHOST_ACCESS_RW);
486 	if (!vq->desc_packed || size != req_size)
487 		return -1;
488 
489 	req_size = sizeof(struct vring_packed_desc_event);
490 	size = req_size;
491 	vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
492 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
493 				&size, VHOST_ACCESS_RW);
494 	if (!vq->driver_event || size != req_size)
495 		return -1;
496 
497 	req_size = sizeof(struct vring_packed_desc_event);
498 	size = req_size;
499 	vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
500 		vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
501 				&size, VHOST_ACCESS_RW);
502 	if (!vq->device_event || size != req_size)
503 		return -1;
504 
505 	return 0;
506 }
507 
508 int
509 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
510 {
511 
512 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
513 		return -1;
514 
515 	if (vq_is_packed(dev)) {
516 		if (vring_translate_packed(dev, vq) < 0)
517 			return -1;
518 	} else {
519 		if (vring_translate_split(dev, vq) < 0)
520 			return -1;
521 	}
522 
523 	if (log_translate(dev, vq) < 0)
524 		return -1;
525 
526 	vq->access_ok = true;
527 
528 	return 0;
529 }
530 
531 void
532 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
533 {
534 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
535 		vhost_user_iotlb_wr_lock(vq);
536 
537 	vq->access_ok = false;
538 	vq->desc = NULL;
539 	vq->avail = NULL;
540 	vq->used = NULL;
541 	vq->log_guest_addr = 0;
542 
543 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
544 		vhost_user_iotlb_wr_unlock(vq);
545 }
546 
547 static void
548 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
549 {
550 	struct vhost_virtqueue *vq;
551 	int numa_node = SOCKET_ID_ANY;
552 
553 	if (vring_idx >= VHOST_MAX_VRING) {
554 		VHOST_LOG_CONFIG(ERR,
555 				"Failed not init vring, out of bound (%d)\n",
556 				vring_idx);
557 		return;
558 	}
559 
560 	vq = dev->virtqueue[vring_idx];
561 	if (!vq) {
562 		VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
563 				vring_idx);
564 		return;
565 	}
566 
567 	memset(vq, 0, sizeof(struct vhost_virtqueue));
568 
569 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
570 	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
571 	vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
572 
573 #ifdef RTE_LIBRTE_VHOST_NUMA
574 	if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
575 		VHOST_LOG_CONFIG(ERR, "(%d) failed to query numa node: %s\n",
576 			dev->vid, rte_strerror(errno));
577 		numa_node = SOCKET_ID_ANY;
578 	}
579 #endif
580 	vq->numa_node = numa_node;
581 
582 	vhost_user_iotlb_init(dev, vring_idx);
583 }
584 
585 static void
586 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
587 {
588 	struct vhost_virtqueue *vq;
589 	int callfd;
590 
591 	if (vring_idx >= VHOST_MAX_VRING) {
592 		VHOST_LOG_CONFIG(ERR,
593 				"Failed not init vring, out of bound (%d)\n",
594 				vring_idx);
595 		return;
596 	}
597 
598 	vq = dev->virtqueue[vring_idx];
599 	if (!vq) {
600 		VHOST_LOG_CONFIG(ERR, "Virtqueue not allocated (%d)\n",
601 				vring_idx);
602 		return;
603 	}
604 
605 	callfd = vq->callfd;
606 	init_vring_queue(dev, vring_idx);
607 	vq->callfd = callfd;
608 }
609 
610 int
611 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
612 {
613 	struct vhost_virtqueue *vq;
614 	uint32_t i;
615 
616 	/* Also allocate holes, if any, up to requested vring index. */
617 	for (i = 0; i <= vring_idx; i++) {
618 		if (dev->virtqueue[i])
619 			continue;
620 
621 		vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
622 		if (vq == NULL) {
623 			VHOST_LOG_CONFIG(ERR,
624 				"Failed to allocate memory for vring:%u.\n", i);
625 			return -1;
626 		}
627 
628 		dev->virtqueue[i] = vq;
629 		init_vring_queue(dev, i);
630 		rte_spinlock_init(&vq->access_lock);
631 		vq->avail_wrap_counter = 1;
632 		vq->used_wrap_counter = 1;
633 		vq->signalled_used_valid = false;
634 	}
635 
636 	dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
637 
638 	return 0;
639 }
640 
641 /*
642  * Reset some variables in device structure, while keeping few
643  * others untouched, such as vid, ifname, nr_vring: they
644  * should be same unless the device is removed.
645  */
646 void
647 reset_device(struct virtio_net *dev)
648 {
649 	uint32_t i;
650 
651 	dev->features = 0;
652 	dev->protocol_features = 0;
653 	dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
654 
655 	for (i = 0; i < dev->nr_vring; i++)
656 		reset_vring_queue(dev, i);
657 }
658 
659 /*
660  * Invoked when there is a new vhost-user connection established (when
661  * there is a new virtio device being attached).
662  */
663 int
664 vhost_new_device(void)
665 {
666 	struct virtio_net *dev;
667 	int i;
668 
669 	pthread_mutex_lock(&vhost_dev_lock);
670 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
671 		if (vhost_devices[i] == NULL)
672 			break;
673 	}
674 
675 	if (i == MAX_VHOST_DEVICE) {
676 		VHOST_LOG_CONFIG(ERR,
677 			"Failed to find a free slot for new device.\n");
678 		pthread_mutex_unlock(&vhost_dev_lock);
679 		return -1;
680 	}
681 
682 	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
683 	if (dev == NULL) {
684 		VHOST_LOG_CONFIG(ERR,
685 			"Failed to allocate memory for new dev.\n");
686 		pthread_mutex_unlock(&vhost_dev_lock);
687 		return -1;
688 	}
689 
690 	vhost_devices[i] = dev;
691 	pthread_mutex_unlock(&vhost_dev_lock);
692 
693 	dev->vid = i;
694 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
695 	dev->slave_req_fd = -1;
696 	dev->postcopy_ufd = -1;
697 	rte_spinlock_init(&dev->slave_req_lock);
698 
699 	return i;
700 }
701 
702 void
703 vhost_destroy_device_notify(struct virtio_net *dev)
704 {
705 	struct rte_vdpa_device *vdpa_dev;
706 
707 	if (dev->flags & VIRTIO_DEV_RUNNING) {
708 		vdpa_dev = dev->vdpa_dev;
709 		if (vdpa_dev)
710 			vdpa_dev->ops->dev_close(dev->vid);
711 		dev->flags &= ~VIRTIO_DEV_RUNNING;
712 		dev->notify_ops->destroy_device(dev->vid);
713 	}
714 }
715 
716 /*
717  * Invoked when there is the vhost-user connection is broken (when
718  * the virtio device is being detached).
719  */
720 void
721 vhost_destroy_device(int vid)
722 {
723 	struct virtio_net *dev = get_device(vid);
724 
725 	if (dev == NULL)
726 		return;
727 
728 	vhost_destroy_device_notify(dev);
729 
730 	cleanup_device(dev, 1);
731 	free_device(dev);
732 
733 	vhost_devices[vid] = NULL;
734 }
735 
736 void
737 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
738 {
739 	struct virtio_net *dev = get_device(vid);
740 
741 	if (dev == NULL)
742 		return;
743 
744 	dev->vdpa_dev = vdpa_dev;
745 }
746 
747 void
748 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
749 {
750 	struct virtio_net *dev;
751 	unsigned int len;
752 
753 	dev = get_device(vid);
754 	if (dev == NULL)
755 		return;
756 
757 	len = if_len > sizeof(dev->ifname) ?
758 		sizeof(dev->ifname) : if_len;
759 
760 	strncpy(dev->ifname, if_name, len);
761 	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
762 }
763 
764 void
765 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
766 {
767 	struct virtio_net *dev = get_device(vid);
768 
769 	if (dev == NULL)
770 		return;
771 
772 	if (enable)
773 		dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
774 	else
775 		dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
776 	if (!compliant_ol_flags)
777 		dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
778 	else
779 		dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
780 }
781 
782 void
783 vhost_enable_extbuf(int vid)
784 {
785 	struct virtio_net *dev = get_device(vid);
786 
787 	if (dev == NULL)
788 		return;
789 
790 	dev->extbuf = 1;
791 }
792 
793 void
794 vhost_enable_linearbuf(int vid)
795 {
796 	struct virtio_net *dev = get_device(vid);
797 
798 	if (dev == NULL)
799 		return;
800 
801 	dev->linearbuf = 1;
802 }
803 
804 int
805 rte_vhost_get_mtu(int vid, uint16_t *mtu)
806 {
807 	struct virtio_net *dev = get_device(vid);
808 
809 	if (dev == NULL || mtu == NULL)
810 		return -ENODEV;
811 
812 	if (!(dev->flags & VIRTIO_DEV_READY))
813 		return -EAGAIN;
814 
815 	if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
816 		return -ENOTSUP;
817 
818 	*mtu = dev->mtu;
819 
820 	return 0;
821 }
822 
823 int
824 rte_vhost_get_numa_node(int vid)
825 {
826 #ifdef RTE_LIBRTE_VHOST_NUMA
827 	struct virtio_net *dev = get_device(vid);
828 	int numa_node;
829 	int ret;
830 
831 	if (dev == NULL || numa_available() != 0)
832 		return -1;
833 
834 	ret = get_mempolicy(&numa_node, NULL, 0, dev,
835 			    MPOL_F_NODE | MPOL_F_ADDR);
836 	if (ret < 0) {
837 		VHOST_LOG_CONFIG(ERR,
838 			"(%d) failed to query numa node: %s\n",
839 			vid, rte_strerror(errno));
840 		return -1;
841 	}
842 
843 	return numa_node;
844 #else
845 	RTE_SET_USED(vid);
846 	return -1;
847 #endif
848 }
849 
850 uint32_t
851 rte_vhost_get_queue_num(int vid)
852 {
853 	struct virtio_net *dev = get_device(vid);
854 
855 	if (dev == NULL)
856 		return 0;
857 
858 	return dev->nr_vring / 2;
859 }
860 
861 uint16_t
862 rte_vhost_get_vring_num(int vid)
863 {
864 	struct virtio_net *dev = get_device(vid);
865 
866 	if (dev == NULL)
867 		return 0;
868 
869 	return dev->nr_vring;
870 }
871 
872 int
873 rte_vhost_get_ifname(int vid, char *buf, size_t len)
874 {
875 	struct virtio_net *dev = get_device(vid);
876 
877 	if (dev == NULL || buf == NULL)
878 		return -1;
879 
880 	len = RTE_MIN(len, sizeof(dev->ifname));
881 
882 	strncpy(buf, dev->ifname, len);
883 	buf[len - 1] = '\0';
884 
885 	return 0;
886 }
887 
888 int
889 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
890 {
891 	struct virtio_net *dev;
892 
893 	dev = get_device(vid);
894 	if (dev == NULL || features == NULL)
895 		return -1;
896 
897 	*features = dev->features;
898 	return 0;
899 }
900 
901 int
902 rte_vhost_get_negotiated_protocol_features(int vid,
903 					   uint64_t *protocol_features)
904 {
905 	struct virtio_net *dev;
906 
907 	dev = get_device(vid);
908 	if (dev == NULL || protocol_features == NULL)
909 		return -1;
910 
911 	*protocol_features = dev->protocol_features;
912 	return 0;
913 }
914 
915 int
916 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
917 {
918 	struct virtio_net *dev;
919 	struct rte_vhost_memory *m;
920 	size_t size;
921 
922 	dev = get_device(vid);
923 	if (dev == NULL || mem == NULL)
924 		return -1;
925 
926 	size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
927 	m = malloc(sizeof(struct rte_vhost_memory) + size);
928 	if (!m)
929 		return -1;
930 
931 	m->nregions = dev->mem->nregions;
932 	memcpy(m->regions, dev->mem->regions, size);
933 	*mem = m;
934 
935 	return 0;
936 }
937 
938 int
939 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
940 			  struct rte_vhost_vring *vring)
941 {
942 	struct virtio_net *dev;
943 	struct vhost_virtqueue *vq;
944 
945 	dev = get_device(vid);
946 	if (dev == NULL || vring == NULL)
947 		return -1;
948 
949 	if (vring_idx >= VHOST_MAX_VRING)
950 		return -1;
951 
952 	vq = dev->virtqueue[vring_idx];
953 	if (!vq)
954 		return -1;
955 
956 	if (vq_is_packed(dev)) {
957 		vring->desc_packed = vq->desc_packed;
958 		vring->driver_event = vq->driver_event;
959 		vring->device_event = vq->device_event;
960 	} else {
961 		vring->desc = vq->desc;
962 		vring->avail = vq->avail;
963 		vring->used = vq->used;
964 	}
965 	vring->log_guest_addr  = vq->log_guest_addr;
966 
967 	vring->callfd  = vq->callfd;
968 	vring->kickfd  = vq->kickfd;
969 	vring->size    = vq->size;
970 
971 	return 0;
972 }
973 
974 int
975 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
976 				  struct rte_vhost_ring_inflight *vring)
977 {
978 	struct virtio_net *dev;
979 	struct vhost_virtqueue *vq;
980 
981 	dev = get_device(vid);
982 	if (unlikely(!dev))
983 		return -1;
984 
985 	if (vring_idx >= VHOST_MAX_VRING)
986 		return -1;
987 
988 	vq = dev->virtqueue[vring_idx];
989 	if (unlikely(!vq))
990 		return -1;
991 
992 	if (vq_is_packed(dev)) {
993 		if (unlikely(!vq->inflight_packed))
994 			return -1;
995 
996 		vring->inflight_packed = vq->inflight_packed;
997 	} else {
998 		if (unlikely(!vq->inflight_split))
999 			return -1;
1000 
1001 		vring->inflight_split = vq->inflight_split;
1002 	}
1003 
1004 	vring->resubmit_inflight = vq->resubmit_inflight;
1005 
1006 	return 0;
1007 }
1008 
1009 int
1010 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1011 				  uint16_t idx)
1012 {
1013 	struct vhost_virtqueue *vq;
1014 	struct virtio_net *dev;
1015 
1016 	dev = get_device(vid);
1017 	if (unlikely(!dev))
1018 		return -1;
1019 
1020 	if (unlikely(!(dev->protocol_features &
1021 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1022 		return 0;
1023 
1024 	if (unlikely(vq_is_packed(dev)))
1025 		return -1;
1026 
1027 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1028 		return -1;
1029 
1030 	vq = dev->virtqueue[vring_idx];
1031 	if (unlikely(!vq))
1032 		return -1;
1033 
1034 	if (unlikely(!vq->inflight_split))
1035 		return -1;
1036 
1037 	if (unlikely(idx >= vq->size))
1038 		return -1;
1039 
1040 	vq->inflight_split->desc[idx].counter = vq->global_counter++;
1041 	vq->inflight_split->desc[idx].inflight = 1;
1042 	return 0;
1043 }
1044 
1045 int
1046 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1047 				   uint16_t head, uint16_t last,
1048 				   uint16_t *inflight_entry)
1049 {
1050 	struct rte_vhost_inflight_info_packed *inflight_info;
1051 	struct virtio_net *dev;
1052 	struct vhost_virtqueue *vq;
1053 	struct vring_packed_desc *desc;
1054 	uint16_t old_free_head, free_head;
1055 
1056 	dev = get_device(vid);
1057 	if (unlikely(!dev))
1058 		return -1;
1059 
1060 	if (unlikely(!(dev->protocol_features &
1061 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1062 		return 0;
1063 
1064 	if (unlikely(!vq_is_packed(dev)))
1065 		return -1;
1066 
1067 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1068 		return -1;
1069 
1070 	vq = dev->virtqueue[vring_idx];
1071 	if (unlikely(!vq))
1072 		return -1;
1073 
1074 	inflight_info = vq->inflight_packed;
1075 	if (unlikely(!inflight_info))
1076 		return -1;
1077 
1078 	if (unlikely(head >= vq->size))
1079 		return -1;
1080 
1081 	desc = vq->desc_packed;
1082 	old_free_head = inflight_info->old_free_head;
1083 	if (unlikely(old_free_head >= vq->size))
1084 		return -1;
1085 
1086 	free_head = old_free_head;
1087 
1088 	/* init header descriptor */
1089 	inflight_info->desc[old_free_head].num = 0;
1090 	inflight_info->desc[old_free_head].counter = vq->global_counter++;
1091 	inflight_info->desc[old_free_head].inflight = 1;
1092 
1093 	/* save desc entry in flight entry */
1094 	while (head != ((last + 1) % vq->size)) {
1095 		inflight_info->desc[old_free_head].num++;
1096 		inflight_info->desc[free_head].addr = desc[head].addr;
1097 		inflight_info->desc[free_head].len = desc[head].len;
1098 		inflight_info->desc[free_head].flags = desc[head].flags;
1099 		inflight_info->desc[free_head].id = desc[head].id;
1100 
1101 		inflight_info->desc[old_free_head].last = free_head;
1102 		free_head = inflight_info->desc[free_head].next;
1103 		inflight_info->free_head = free_head;
1104 		head = (head + 1) % vq->size;
1105 	}
1106 
1107 	inflight_info->old_free_head = free_head;
1108 	*inflight_entry = old_free_head;
1109 
1110 	return 0;
1111 }
1112 
1113 int
1114 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1115 				  uint16_t last_used_idx, uint16_t idx)
1116 {
1117 	struct virtio_net *dev;
1118 	struct vhost_virtqueue *vq;
1119 
1120 	dev = get_device(vid);
1121 	if (unlikely(!dev))
1122 		return -1;
1123 
1124 	if (unlikely(!(dev->protocol_features &
1125 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1126 		return 0;
1127 
1128 	if (unlikely(vq_is_packed(dev)))
1129 		return -1;
1130 
1131 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1132 		return -1;
1133 
1134 	vq = dev->virtqueue[vring_idx];
1135 	if (unlikely(!vq))
1136 		return -1;
1137 
1138 	if (unlikely(!vq->inflight_split))
1139 		return -1;
1140 
1141 	if (unlikely(idx >= vq->size))
1142 		return -1;
1143 
1144 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1145 
1146 	vq->inflight_split->desc[idx].inflight = 0;
1147 
1148 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1149 
1150 	vq->inflight_split->used_idx = last_used_idx;
1151 	return 0;
1152 }
1153 
1154 int
1155 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1156 				   uint16_t head)
1157 {
1158 	struct rte_vhost_inflight_info_packed *inflight_info;
1159 	struct virtio_net *dev;
1160 	struct vhost_virtqueue *vq;
1161 
1162 	dev = get_device(vid);
1163 	if (unlikely(!dev))
1164 		return -1;
1165 
1166 	if (unlikely(!(dev->protocol_features &
1167 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1168 		return 0;
1169 
1170 	if (unlikely(!vq_is_packed(dev)))
1171 		return -1;
1172 
1173 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1174 		return -1;
1175 
1176 	vq = dev->virtqueue[vring_idx];
1177 	if (unlikely(!vq))
1178 		return -1;
1179 
1180 	inflight_info = vq->inflight_packed;
1181 	if (unlikely(!inflight_info))
1182 		return -1;
1183 
1184 	if (unlikely(head >= vq->size))
1185 		return -1;
1186 
1187 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1188 
1189 	inflight_info->desc[head].inflight = 0;
1190 
1191 	rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1192 
1193 	inflight_info->old_free_head = inflight_info->free_head;
1194 	inflight_info->old_used_idx = inflight_info->used_idx;
1195 	inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1196 
1197 	return 0;
1198 }
1199 
1200 int
1201 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1202 				     uint16_t idx)
1203 {
1204 	struct virtio_net *dev;
1205 	struct vhost_virtqueue *vq;
1206 
1207 	dev = get_device(vid);
1208 	if (unlikely(!dev))
1209 		return -1;
1210 
1211 	if (unlikely(!(dev->protocol_features &
1212 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1213 		return 0;
1214 
1215 	if (unlikely(vq_is_packed(dev)))
1216 		return -1;
1217 
1218 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1219 		return -1;
1220 
1221 	vq = dev->virtqueue[vring_idx];
1222 	if (unlikely(!vq))
1223 		return -1;
1224 
1225 	if (unlikely(!vq->inflight_split))
1226 		return -1;
1227 
1228 	if (unlikely(idx >= vq->size))
1229 		return -1;
1230 
1231 	vq->inflight_split->last_inflight_io = idx;
1232 	return 0;
1233 }
1234 
1235 int
1236 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1237 				      uint16_t head)
1238 {
1239 	struct rte_vhost_inflight_info_packed *inflight_info;
1240 	struct virtio_net *dev;
1241 	struct vhost_virtqueue *vq;
1242 	uint16_t last;
1243 
1244 	dev = get_device(vid);
1245 	if (unlikely(!dev))
1246 		return -1;
1247 
1248 	if (unlikely(!(dev->protocol_features &
1249 	    (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1250 		return 0;
1251 
1252 	if (unlikely(!vq_is_packed(dev)))
1253 		return -1;
1254 
1255 	if (unlikely(vring_idx >= VHOST_MAX_VRING))
1256 		return -1;
1257 
1258 	vq = dev->virtqueue[vring_idx];
1259 	if (unlikely(!vq))
1260 		return -1;
1261 
1262 	inflight_info = vq->inflight_packed;
1263 	if (unlikely(!inflight_info))
1264 		return -1;
1265 
1266 	if (unlikely(head >= vq->size))
1267 		return -1;
1268 
1269 	last = inflight_info->desc[head].last;
1270 	if (unlikely(last >= vq->size))
1271 		return -1;
1272 
1273 	inflight_info->desc[last].next = inflight_info->free_head;
1274 	inflight_info->free_head = head;
1275 	inflight_info->used_idx += inflight_info->desc[head].num;
1276 	if (inflight_info->used_idx >= inflight_info->desc_num) {
1277 		inflight_info->used_idx -= inflight_info->desc_num;
1278 		inflight_info->used_wrap_counter =
1279 			!inflight_info->used_wrap_counter;
1280 	}
1281 
1282 	return 0;
1283 }
1284 
1285 int
1286 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1287 {
1288 	struct virtio_net *dev;
1289 	struct vhost_virtqueue *vq;
1290 
1291 	dev = get_device(vid);
1292 	if (!dev)
1293 		return -1;
1294 
1295 	if (vring_idx >= VHOST_MAX_VRING)
1296 		return -1;
1297 
1298 	vq = dev->virtqueue[vring_idx];
1299 	if (!vq)
1300 		return -1;
1301 
1302 	if (vq_is_packed(dev))
1303 		vhost_vring_call_packed(dev, vq);
1304 	else
1305 		vhost_vring_call_split(dev, vq);
1306 
1307 	return 0;
1308 }
1309 
1310 uint16_t
1311 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1312 {
1313 	struct virtio_net *dev;
1314 	struct vhost_virtqueue *vq;
1315 	uint16_t ret = 0;
1316 
1317 	dev = get_device(vid);
1318 	if (!dev)
1319 		return 0;
1320 
1321 	if (queue_id >= VHOST_MAX_VRING)
1322 		return 0;
1323 
1324 	vq = dev->virtqueue[queue_id];
1325 	if (!vq)
1326 		return 0;
1327 
1328 	rte_spinlock_lock(&vq->access_lock);
1329 
1330 	if (unlikely(!vq->enabled || vq->avail == NULL))
1331 		goto out;
1332 
1333 	ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1334 
1335 out:
1336 	rte_spinlock_unlock(&vq->access_lock);
1337 	return ret;
1338 }
1339 
1340 static inline int
1341 vhost_enable_notify_split(struct virtio_net *dev,
1342 		struct vhost_virtqueue *vq, int enable)
1343 {
1344 	if (vq->used == NULL)
1345 		return -1;
1346 
1347 	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1348 		if (enable)
1349 			vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1350 		else
1351 			vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1352 	} else {
1353 		if (enable)
1354 			vhost_avail_event(vq) = vq->last_avail_idx;
1355 	}
1356 	return 0;
1357 }
1358 
1359 static inline int
1360 vhost_enable_notify_packed(struct virtio_net *dev,
1361 		struct vhost_virtqueue *vq, int enable)
1362 {
1363 	uint16_t flags;
1364 
1365 	if (vq->device_event == NULL)
1366 		return -1;
1367 
1368 	if (!enable) {
1369 		vq->device_event->flags = VRING_EVENT_F_DISABLE;
1370 		return 0;
1371 	}
1372 
1373 	flags = VRING_EVENT_F_ENABLE;
1374 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1375 		flags = VRING_EVENT_F_DESC;
1376 		vq->device_event->off_wrap = vq->last_avail_idx |
1377 			vq->avail_wrap_counter << 15;
1378 	}
1379 
1380 	rte_atomic_thread_fence(__ATOMIC_RELEASE);
1381 
1382 	vq->device_event->flags = flags;
1383 	return 0;
1384 }
1385 
1386 int
1387 vhost_enable_guest_notification(struct virtio_net *dev,
1388 		struct vhost_virtqueue *vq, int enable)
1389 {
1390 	/*
1391 	 * If the virtqueue is not ready yet, it will be applied
1392 	 * when it will become ready.
1393 	 */
1394 	if (!vq->ready)
1395 		return 0;
1396 
1397 	if (vq_is_packed(dev))
1398 		return vhost_enable_notify_packed(dev, vq, enable);
1399 	else
1400 		return vhost_enable_notify_split(dev, vq, enable);
1401 }
1402 
1403 int
1404 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1405 {
1406 	struct virtio_net *dev = get_device(vid);
1407 	struct vhost_virtqueue *vq;
1408 	int ret;
1409 
1410 	if (!dev)
1411 		return -1;
1412 
1413 	if (queue_id >= VHOST_MAX_VRING)
1414 		return -1;
1415 
1416 	vq = dev->virtqueue[queue_id];
1417 	if (!vq)
1418 		return -1;
1419 
1420 	rte_spinlock_lock(&vq->access_lock);
1421 
1422 	vq->notif_enable = enable;
1423 	ret = vhost_enable_guest_notification(dev, vq, enable);
1424 
1425 	rte_spinlock_unlock(&vq->access_lock);
1426 
1427 	return ret;
1428 }
1429 
1430 void
1431 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1432 {
1433 	struct virtio_net *dev = get_device(vid);
1434 
1435 	if (dev == NULL)
1436 		return;
1437 
1438 	vhost_log_write(dev, addr, len);
1439 }
1440 
1441 void
1442 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1443 			 uint64_t offset, uint64_t len)
1444 {
1445 	struct virtio_net *dev;
1446 	struct vhost_virtqueue *vq;
1447 
1448 	dev = get_device(vid);
1449 	if (dev == NULL)
1450 		return;
1451 
1452 	if (vring_idx >= VHOST_MAX_VRING)
1453 		return;
1454 	vq = dev->virtqueue[vring_idx];
1455 	if (!vq)
1456 		return;
1457 
1458 	vhost_log_used_vring(dev, vq, offset, len);
1459 }
1460 
1461 uint32_t
1462 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1463 {
1464 	struct virtio_net *dev;
1465 	struct vhost_virtqueue *vq;
1466 	uint32_t ret = 0;
1467 
1468 	dev = get_device(vid);
1469 	if (dev == NULL)
1470 		return 0;
1471 
1472 	if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1473 		VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
1474 			dev->vid, __func__, qid);
1475 		return 0;
1476 	}
1477 
1478 	vq = dev->virtqueue[qid];
1479 	if (vq == NULL)
1480 		return 0;
1481 
1482 	rte_spinlock_lock(&vq->access_lock);
1483 
1484 	if (unlikely(!vq->enabled || vq->avail == NULL))
1485 		goto out;
1486 
1487 	ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1488 
1489 out:
1490 	rte_spinlock_unlock(&vq->access_lock);
1491 	return ret;
1492 }
1493 
1494 struct rte_vdpa_device *
1495 rte_vhost_get_vdpa_device(int vid)
1496 {
1497 	struct virtio_net *dev = get_device(vid);
1498 
1499 	if (dev == NULL)
1500 		return NULL;
1501 
1502 	return dev->vdpa_dev;
1503 }
1504 
1505 int
1506 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1507 		uint64_t *log_size)
1508 {
1509 	struct virtio_net *dev = get_device(vid);
1510 
1511 	if (dev == NULL || log_base == NULL || log_size == NULL)
1512 		return -1;
1513 
1514 	*log_base = dev->log_base;
1515 	*log_size = dev->log_size;
1516 
1517 	return 0;
1518 }
1519 
1520 int
1521 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1522 		uint16_t *last_avail_idx, uint16_t *last_used_idx)
1523 {
1524 	struct vhost_virtqueue *vq;
1525 	struct virtio_net *dev = get_device(vid);
1526 
1527 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1528 		return -1;
1529 
1530 	if (queue_id >= VHOST_MAX_VRING)
1531 		return -1;
1532 
1533 	vq = dev->virtqueue[queue_id];
1534 	if (!vq)
1535 		return -1;
1536 
1537 	if (vq_is_packed(dev)) {
1538 		*last_avail_idx = (vq->avail_wrap_counter << 15) |
1539 				  vq->last_avail_idx;
1540 		*last_used_idx = (vq->used_wrap_counter << 15) |
1541 				 vq->last_used_idx;
1542 	} else {
1543 		*last_avail_idx = vq->last_avail_idx;
1544 		*last_used_idx = vq->last_used_idx;
1545 	}
1546 
1547 	return 0;
1548 }
1549 
1550 int
1551 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1552 		uint16_t last_avail_idx, uint16_t last_used_idx)
1553 {
1554 	struct vhost_virtqueue *vq;
1555 	struct virtio_net *dev = get_device(vid);
1556 
1557 	if (!dev)
1558 		return -1;
1559 
1560 	if (queue_id >= VHOST_MAX_VRING)
1561 		return -1;
1562 
1563 	vq = dev->virtqueue[queue_id];
1564 	if (!vq)
1565 		return -1;
1566 
1567 	if (vq_is_packed(dev)) {
1568 		vq->last_avail_idx = last_avail_idx & 0x7fff;
1569 		vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1570 		vq->last_used_idx = last_used_idx & 0x7fff;
1571 		vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1572 	} else {
1573 		vq->last_avail_idx = last_avail_idx;
1574 		vq->last_used_idx = last_used_idx;
1575 	}
1576 
1577 	return 0;
1578 }
1579 
1580 int
1581 rte_vhost_get_vring_base_from_inflight(int vid,
1582 				       uint16_t queue_id,
1583 				       uint16_t *last_avail_idx,
1584 				       uint16_t *last_used_idx)
1585 {
1586 	struct rte_vhost_inflight_info_packed *inflight_info;
1587 	struct vhost_virtqueue *vq;
1588 	struct virtio_net *dev = get_device(vid);
1589 
1590 	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1591 		return -1;
1592 
1593 	if (queue_id >= VHOST_MAX_VRING)
1594 		return -1;
1595 
1596 	vq = dev->virtqueue[queue_id];
1597 	if (!vq)
1598 		return -1;
1599 
1600 	if (!vq_is_packed(dev))
1601 		return -1;
1602 
1603 	inflight_info = vq->inflight_packed;
1604 	if (!inflight_info)
1605 		return -1;
1606 
1607 	*last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1608 			  inflight_info->old_used_idx;
1609 	*last_used_idx = *last_avail_idx;
1610 
1611 	return 0;
1612 }
1613 
1614 int
1615 rte_vhost_extern_callback_register(int vid,
1616 		struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1617 {
1618 	struct virtio_net *dev = get_device(vid);
1619 
1620 	if (dev == NULL || ops == NULL)
1621 		return -1;
1622 
1623 	dev->extern_ops = *ops;
1624 	dev->extern_data = ctx;
1625 	return 0;
1626 }
1627 
1628 static __rte_always_inline int
1629 async_channel_register(int vid, uint16_t queue_id,
1630 		struct rte_vhost_async_channel_ops *ops)
1631 {
1632 	struct virtio_net *dev = get_device(vid);
1633 	struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1634 	struct vhost_async *async;
1635 	int node = vq->numa_node;
1636 
1637 	if (unlikely(vq->async)) {
1638 		VHOST_LOG_CONFIG(ERR,
1639 				"async register failed: already registered (vid %d, qid: %d)\n",
1640 				vid, queue_id);
1641 		return -1;
1642 	}
1643 
1644 	async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1645 	if (!async) {
1646 		VHOST_LOG_CONFIG(ERR, "failed to allocate async metadata (vid %d, qid: %d)\n",
1647 				vid, queue_id);
1648 		return -1;
1649 	}
1650 
1651 	async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1652 			RTE_CACHE_LINE_SIZE, node);
1653 	if (!async->pkts_info) {
1654 		VHOST_LOG_CONFIG(ERR, "failed to allocate async_pkts_info (vid %d, qid: %d)\n",
1655 				vid, queue_id);
1656 		goto out_free_async;
1657 	}
1658 
1659 	if (vq_is_packed(dev)) {
1660 		async->buffers_packed = rte_malloc_socket(NULL,
1661 				vq->size * sizeof(struct vring_used_elem_packed),
1662 				RTE_CACHE_LINE_SIZE, node);
1663 		if (!async->buffers_packed) {
1664 			VHOST_LOG_CONFIG(ERR, "failed to allocate async buffers (vid %d, qid: %d)\n",
1665 					vid, queue_id);
1666 			goto out_free_inflight;
1667 		}
1668 	} else {
1669 		async->descs_split = rte_malloc_socket(NULL,
1670 				vq->size * sizeof(struct vring_used_elem),
1671 				RTE_CACHE_LINE_SIZE, node);
1672 		if (!async->descs_split) {
1673 			VHOST_LOG_CONFIG(ERR, "failed to allocate async descs (vid %d, qid: %d)\n",
1674 					vid, queue_id);
1675 			goto out_free_inflight;
1676 		}
1677 	}
1678 
1679 	async->ops.check_completed_copies = ops->check_completed_copies;
1680 	async->ops.transfer_data = ops->transfer_data;
1681 
1682 	vq->async = async;
1683 
1684 	return 0;
1685 out_free_inflight:
1686 	rte_free(async->pkts_info);
1687 out_free_async:
1688 	rte_free(async);
1689 
1690 	return -1;
1691 }
1692 
1693 int
1694 rte_vhost_async_channel_register(int vid, uint16_t queue_id,
1695 		struct rte_vhost_async_config config,
1696 		struct rte_vhost_async_channel_ops *ops)
1697 {
1698 	struct vhost_virtqueue *vq;
1699 	struct virtio_net *dev = get_device(vid);
1700 	int ret;
1701 
1702 	if (dev == NULL || ops == NULL)
1703 		return -1;
1704 
1705 	if (queue_id >= VHOST_MAX_VRING)
1706 		return -1;
1707 
1708 	vq = dev->virtqueue[queue_id];
1709 
1710 	if (unlikely(vq == NULL || !dev->async_copy))
1711 		return -1;
1712 
1713 	if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1714 		VHOST_LOG_CONFIG(ERR,
1715 			"async copy is not supported on non-inorder mode "
1716 			"(vid %d, qid: %d)\n", vid, queue_id);
1717 		return -1;
1718 	}
1719 
1720 	if (unlikely(ops->check_completed_copies == NULL ||
1721 		ops->transfer_data == NULL))
1722 		return -1;
1723 
1724 	rte_spinlock_lock(&vq->access_lock);
1725 	ret = async_channel_register(vid, queue_id, ops);
1726 	rte_spinlock_unlock(&vq->access_lock);
1727 
1728 	return ret;
1729 }
1730 
1731 int
1732 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id,
1733 		struct rte_vhost_async_config config,
1734 		struct rte_vhost_async_channel_ops *ops)
1735 {
1736 	struct vhost_virtqueue *vq;
1737 	struct virtio_net *dev = get_device(vid);
1738 
1739 	if (dev == NULL || ops == NULL)
1740 		return -1;
1741 
1742 	if (queue_id >= VHOST_MAX_VRING)
1743 		return -1;
1744 
1745 	vq = dev->virtqueue[queue_id];
1746 
1747 	if (unlikely(vq == NULL || !dev->async_copy))
1748 		return -1;
1749 
1750 	if (unlikely(!(config.features & RTE_VHOST_ASYNC_INORDER))) {
1751 		VHOST_LOG_CONFIG(ERR,
1752 			"async copy is not supported on non-inorder mode "
1753 			"(vid %d, qid: %d)\n", vid, queue_id);
1754 		return -1;
1755 	}
1756 
1757 	if (unlikely(ops->check_completed_copies == NULL ||
1758 		ops->transfer_data == NULL))
1759 		return -1;
1760 
1761 	return async_channel_register(vid, queue_id, ops);
1762 }
1763 
1764 int
1765 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1766 {
1767 	struct vhost_virtqueue *vq;
1768 	struct virtio_net *dev = get_device(vid);
1769 	int ret = -1;
1770 
1771 	if (dev == NULL)
1772 		return ret;
1773 
1774 	if (queue_id >= VHOST_MAX_VRING)
1775 		return ret;
1776 
1777 	vq = dev->virtqueue[queue_id];
1778 
1779 	if (vq == NULL)
1780 		return ret;
1781 
1782 	ret = 0;
1783 
1784 	if (!vq->async)
1785 		return ret;
1786 
1787 	if (!rte_spinlock_trylock(&vq->access_lock)) {
1788 		VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1789 			"virt queue busy.\n");
1790 		return -1;
1791 	}
1792 
1793 	if (vq->async->pkts_inflight_n) {
1794 		VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1795 			"async inflight packets must be completed before unregistration.\n");
1796 		ret = -1;
1797 		goto out;
1798 	}
1799 
1800 	vhost_free_async_mem(vq);
1801 out:
1802 	rte_spinlock_unlock(&vq->access_lock);
1803 
1804 	return ret;
1805 }
1806 
1807 int
1808 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1809 {
1810 	struct vhost_virtqueue *vq;
1811 	struct virtio_net *dev = get_device(vid);
1812 
1813 	if (dev == NULL)
1814 		return -1;
1815 
1816 	if (queue_id >= VHOST_MAX_VRING)
1817 		return -1;
1818 
1819 	vq = dev->virtqueue[queue_id];
1820 
1821 	if (vq == NULL)
1822 		return -1;
1823 
1824 	if (!vq->async)
1825 		return 0;
1826 
1827 	if (vq->async->pkts_inflight_n) {
1828 		VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
1829 			"async inflight packets must be completed before unregistration.\n");
1830 		return -1;
1831 	}
1832 
1833 	vhost_free_async_mem(vq);
1834 
1835 	return 0;
1836 }
1837 
1838 int
1839 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1840 {
1841 	struct vhost_virtqueue *vq;
1842 	struct virtio_net *dev = get_device(vid);
1843 	int ret = -1;
1844 
1845 	if (dev == NULL)
1846 		return ret;
1847 
1848 	if (queue_id >= VHOST_MAX_VRING)
1849 		return ret;
1850 
1851 	vq = dev->virtqueue[queue_id];
1852 
1853 	if (vq == NULL)
1854 		return ret;
1855 
1856 	if (!vq->async)
1857 		return ret;
1858 
1859 	if (!rte_spinlock_trylock(&vq->access_lock)) {
1860 		VHOST_LOG_CONFIG(DEBUG, "Failed to check in-flight packets. "
1861 			"virt queue busy.\n");
1862 		return ret;
1863 	}
1864 
1865 	ret = vq->async->pkts_inflight_n;
1866 	rte_spinlock_unlock(&vq->access_lock);
1867 
1868 	return ret;
1869 }
1870 
1871 int
1872 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1873 		struct rte_vhost_power_monitor_cond *pmc)
1874 {
1875 	struct virtio_net *dev = get_device(vid);
1876 	struct vhost_virtqueue *vq;
1877 
1878 	if (dev == NULL)
1879 		return -1;
1880 	if (queue_id >= VHOST_MAX_VRING)
1881 		return -1;
1882 
1883 	vq = dev->virtqueue[queue_id];
1884 	if (vq == NULL)
1885 		return -1;
1886 
1887 	if (vq_is_packed(dev)) {
1888 		struct vring_packed_desc *desc;
1889 		desc = vq->desc_packed;
1890 		pmc->addr = &desc[vq->last_avail_idx].flags;
1891 		if (vq->avail_wrap_counter)
1892 			pmc->val = VRING_DESC_F_AVAIL;
1893 		else
1894 			pmc->val = VRING_DESC_F_USED;
1895 		pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1896 		pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1897 		pmc->match = 1;
1898 	} else {
1899 		pmc->addr = &vq->avail->idx;
1900 		pmc->val = vq->last_avail_idx & (vq->size - 1);
1901 		pmc->mask = vq->size - 1;
1902 		pmc->size = sizeof(vq->avail->idx);
1903 		pmc->match = 0;
1904 	}
1905 
1906 	return 0;
1907 }
1908 
1909 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1910 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);
1911