xref: /dpdk/lib/vhost/iotlb.c (revision 09442498ef736d0a96632cf8b8c15d8ca78a6468)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2017 Red Hat, Inc.
3  */
4 
5 #ifdef RTE_LIBRTE_VHOST_NUMA
6 #include <numaif.h>
7 #endif
8 
9 #include <rte_tailq.h>
10 
11 #include "iotlb.h"
12 #include "vhost.h"
13 
14 struct vhost_iotlb_entry {
15 	TAILQ_ENTRY(vhost_iotlb_entry) next;
16 	SLIST_ENTRY(vhost_iotlb_entry) next_free;
17 
18 	uint64_t iova;
19 	uint64_t uaddr;
20 	uint64_t size;
21 	uint8_t perm;
22 };
23 
24 #define IOTLB_CACHE_SIZE 2048
25 
26 static struct vhost_iotlb_entry *
27 vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq)
28 {
29 	struct vhost_iotlb_entry *node;
30 
31 	rte_spinlock_lock(&vq->iotlb_free_lock);
32 	node = SLIST_FIRST(&vq->iotlb_free_list);
33 	if (node != NULL)
34 		SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free);
35 	rte_spinlock_unlock(&vq->iotlb_free_lock);
36 	return node;
37 }
38 
39 static void
40 vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq,
41 	struct vhost_iotlb_entry *node)
42 {
43 	rte_spinlock_lock(&vq->iotlb_free_lock);
44 	SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free);
45 	rte_spinlock_unlock(&vq->iotlb_free_lock);
46 }
47 
48 static void
49 vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq);
50 
51 static void
52 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
53 {
54 	struct vhost_iotlb_entry *node, *temp_node;
55 
56 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
57 
58 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
59 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
60 		vhost_user_iotlb_pool_put(vq, node);
61 	}
62 
63 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
64 }
65 
66 bool
67 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
68 				uint8_t perm)
69 {
70 	struct vhost_iotlb_entry *node;
71 	bool found = false;
72 
73 	rte_rwlock_read_lock(&vq->iotlb_pending_lock);
74 
75 	TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
76 		if ((node->iova == iova) && (node->perm == perm)) {
77 			found = true;
78 			break;
79 		}
80 	}
81 
82 	rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
83 
84 	return found;
85 }
86 
87 void
88 vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
89 				uint64_t iova, uint8_t perm)
90 {
91 	struct vhost_iotlb_entry *node;
92 
93 	node = vhost_user_iotlb_pool_get(vq);
94 	if (node == NULL) {
95 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
96 			"IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n",
97 			vq->index);
98 		if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
99 			vhost_user_iotlb_pending_remove_all(vq);
100 		else
101 			vhost_user_iotlb_cache_random_evict(vq);
102 		node = vhost_user_iotlb_pool_get(vq);
103 		if (node == NULL) {
104 			VHOST_LOG_CONFIG(dev->ifname, ERR,
105 				"IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n",
106 				vq->index);
107 			return;
108 		}
109 	}
110 
111 	node->iova = iova;
112 	node->perm = perm;
113 
114 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
115 
116 	TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
117 
118 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
119 }
120 
121 void
122 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
123 				uint64_t iova, uint64_t size, uint8_t perm)
124 {
125 	struct vhost_iotlb_entry *node, *temp_node;
126 
127 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
128 
129 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next,
130 				temp_node) {
131 		if (node->iova < iova)
132 			continue;
133 		if (node->iova >= iova + size)
134 			continue;
135 		if ((node->perm & perm) != node->perm)
136 			continue;
137 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
138 		vhost_user_iotlb_pool_put(vq, node);
139 	}
140 
141 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
142 }
143 
144 static void
145 vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
146 {
147 	struct vhost_iotlb_entry *node, *temp_node;
148 
149 	rte_rwlock_write_lock(&vq->iotlb_lock);
150 
151 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
152 		mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
153 		TAILQ_REMOVE(&vq->iotlb_list, node, next);
154 		vhost_user_iotlb_pool_put(vq, node);
155 	}
156 
157 	vq->iotlb_cache_nr = 0;
158 
159 	rte_rwlock_write_unlock(&vq->iotlb_lock);
160 }
161 
162 static void
163 vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
164 {
165 	struct vhost_iotlb_entry *node, *temp_node;
166 	int entry_idx;
167 
168 	rte_rwlock_write_lock(&vq->iotlb_lock);
169 
170 	entry_idx = rte_rand() % vq->iotlb_cache_nr;
171 
172 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
173 		if (!entry_idx) {
174 			mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
175 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
176 			vhost_user_iotlb_pool_put(vq, node);
177 			vq->iotlb_cache_nr--;
178 			break;
179 		}
180 		entry_idx--;
181 	}
182 
183 	rte_rwlock_write_unlock(&vq->iotlb_lock);
184 }
185 
186 void
187 vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
188 				uint64_t iova, uint64_t uaddr,
189 				uint64_t size, uint8_t perm)
190 {
191 	struct vhost_iotlb_entry *node, *new_node;
192 
193 	new_node = vhost_user_iotlb_pool_get(vq);
194 	if (new_node == NULL) {
195 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
196 			"IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n",
197 			vq->index);
198 		if (!TAILQ_EMPTY(&vq->iotlb_list))
199 			vhost_user_iotlb_cache_random_evict(vq);
200 		else
201 			vhost_user_iotlb_pending_remove_all(vq);
202 		new_node = vhost_user_iotlb_pool_get(vq);
203 		if (new_node == NULL) {
204 			VHOST_LOG_CONFIG(dev->ifname, ERR,
205 				"IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n",
206 				vq->index);
207 			return;
208 		}
209 	}
210 
211 	new_node->iova = iova;
212 	new_node->uaddr = uaddr;
213 	new_node->size = size;
214 	new_node->perm = perm;
215 
216 	rte_rwlock_write_lock(&vq->iotlb_lock);
217 
218 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
219 		/*
220 		 * Entries must be invalidated before being updated.
221 		 * So if iova already in list, assume identical.
222 		 */
223 		if (node->iova == new_node->iova) {
224 			vhost_user_iotlb_pool_put(vq, new_node);
225 			goto unlock;
226 		} else if (node->iova > new_node->iova) {
227 			mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
228 			TAILQ_INSERT_BEFORE(node, new_node, next);
229 			vq->iotlb_cache_nr++;
230 			goto unlock;
231 		}
232 	}
233 
234 	mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
235 	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
236 	vq->iotlb_cache_nr++;
237 
238 unlock:
239 	vhost_user_iotlb_pending_remove(vq, iova, size, perm);
240 
241 	rte_rwlock_write_unlock(&vq->iotlb_lock);
242 
243 }
244 
245 void
246 vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
247 					uint64_t iova, uint64_t size)
248 {
249 	struct vhost_iotlb_entry *node, *temp_node;
250 
251 	if (unlikely(!size))
252 		return;
253 
254 	rte_rwlock_write_lock(&vq->iotlb_lock);
255 
256 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
257 		/* Sorted list */
258 		if (unlikely(iova + size < node->iova))
259 			break;
260 
261 		if (iova < node->iova + node->size) {
262 			mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true);
263 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
264 			vhost_user_iotlb_pool_put(vq, node);
265 			vq->iotlb_cache_nr--;
266 		}
267 	}
268 
269 	rte_rwlock_write_unlock(&vq->iotlb_lock);
270 }
271 
272 uint64_t
273 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
274 						uint64_t *size, uint8_t perm)
275 {
276 	struct vhost_iotlb_entry *node;
277 	uint64_t offset, vva = 0, mapped = 0;
278 
279 	if (unlikely(!*size))
280 		goto out;
281 
282 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
283 		/* List sorted by iova */
284 		if (unlikely(iova < node->iova))
285 			break;
286 
287 		if (iova >= node->iova + node->size)
288 			continue;
289 
290 		if (unlikely((perm & node->perm) != perm)) {
291 			vva = 0;
292 			break;
293 		}
294 
295 		offset = iova - node->iova;
296 		if (!vva)
297 			vva = node->uaddr + offset;
298 
299 		mapped += node->size - offset;
300 		iova = node->iova + node->size;
301 
302 		if (mapped >= *size)
303 			break;
304 	}
305 
306 out:
307 	/* Only part of the requested chunk is mapped */
308 	if (unlikely(mapped < *size))
309 		*size = mapped;
310 
311 	return vva;
312 }
313 
314 void
315 vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
316 {
317 	vhost_user_iotlb_cache_remove_all(vq);
318 	vhost_user_iotlb_pending_remove_all(vq);
319 }
320 
321 int
322 vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
323 {
324 	unsigned int i;
325 	int socket = 0;
326 
327 	if (vq->iotlb_pool) {
328 		/*
329 		 * The cache has already been initialized,
330 		 * just drop all cached and pending entries.
331 		 */
332 		vhost_user_iotlb_flush_all(vq);
333 		rte_free(vq->iotlb_pool);
334 	}
335 
336 #ifdef RTE_LIBRTE_VHOST_NUMA
337 	if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
338 		socket = 0;
339 #endif
340 
341 	rte_spinlock_init(&vq->iotlb_free_lock);
342 	rte_rwlock_init(&vq->iotlb_lock);
343 	rte_rwlock_init(&vq->iotlb_pending_lock);
344 
345 	SLIST_INIT(&vq->iotlb_free_list);
346 	TAILQ_INIT(&vq->iotlb_list);
347 	TAILQ_INIT(&vq->iotlb_pending_list);
348 
349 	if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) {
350 		vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
351 			sizeof(struct vhost_iotlb_entry), 0, socket);
352 		if (!vq->iotlb_pool) {
353 			VHOST_LOG_CONFIG(dev->ifname, ERR,
354 				"Failed to create IOTLB cache pool for vq %"PRIu32"\n",
355 				vq->index);
356 			return -1;
357 		}
358 		for (i = 0; i < IOTLB_CACHE_SIZE; i++)
359 			vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]);
360 	}
361 
362 	vq->iotlb_cache_nr = 0;
363 
364 	return 0;
365 }
366 
367 void
368 vhost_user_iotlb_destroy(struct vhost_virtqueue *vq)
369 {
370 	rte_free(vq->iotlb_pool);
371 }
372