xref: /dpdk/lib/vhost/iotlb.c (revision c56185fc183fc0532d2f03aaf04bbf0989ea91a5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2017 Red Hat, Inc.
3  */
4 
5 #ifdef RTE_LIBRTE_VHOST_NUMA
6 #include <numaif.h>
7 #endif
8 
9 #include <rte_tailq.h>
10 
11 #include "iotlb.h"
12 #include "vhost.h"
13 
14 struct vhost_iotlb_entry {
15 	TAILQ_ENTRY(vhost_iotlb_entry) next;
16 	SLIST_ENTRY(vhost_iotlb_entry) next_free;
17 
18 	uint64_t iova;
19 	uint64_t uaddr;
20 	uint64_t uoffset;
21 	uint64_t size;
22 	uint8_t page_shift;
23 	uint8_t perm;
24 };
25 
26 #define IOTLB_CACHE_SIZE 2048
27 
28 static void
29 vhost_user_iotlb_remove_notify(struct virtio_net *dev, struct vhost_iotlb_entry *entry)
30 {
31 	if (dev->backend_ops->iotlb_remove_notify == NULL)
32 		return;
33 
34 	dev->backend_ops->iotlb_remove_notify(entry->uaddr, entry->uoffset, entry->size);
35 }
36 
37 static bool
38 vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct vhost_iotlb_entry *b)
39 {
40 	uint64_t a_start, a_end, b_start;
41 
42 	if (a == NULL || b == NULL)
43 		return false;
44 
45 	a_start = a->uaddr + a->uoffset;
46 	b_start = b->uaddr + b->uoffset;
47 
48 	/* Assumes entry a lower than entry b */
49 	RTE_ASSERT(a_start < b_start);
50 	a_end = RTE_ALIGN_CEIL(a_start + a->size, RTE_BIT64(a->page_shift));
51 	b_start = RTE_ALIGN_FLOOR(b_start, RTE_BIT64(b->page_shift));
52 
53 	return a_end > b_start;
54 }
55 
56 static void
57 vhost_user_iotlb_set_dump(struct vhost_iotlb_entry *node)
58 {
59 	uint64_t start;
60 
61 	start = node->uaddr + node->uoffset;
62 	mem_set_dump((void *)(uintptr_t)start, node->size, true, RTE_BIT64(node->page_shift));
63 }
64 
65 static void
66 vhost_user_iotlb_clear_dump(struct vhost_iotlb_entry *node,
67 		struct vhost_iotlb_entry *prev, struct vhost_iotlb_entry *next)
68 {
69 	uint64_t start, end;
70 
71 	start = node->uaddr + node->uoffset;
72 	end = start + node->size;
73 
74 	/* Skip first page if shared with previous entry. */
75 	if (vhost_user_iotlb_share_page(prev, node))
76 		start = RTE_ALIGN_CEIL(start, RTE_BIT64(node->page_shift));
77 
78 	/* Skip last page if shared with next entry. */
79 	if (vhost_user_iotlb_share_page(node, next))
80 		end = RTE_ALIGN_FLOOR(end, RTE_BIT64(node->page_shift));
81 
82 	if (end > start)
83 		mem_set_dump((void *)(uintptr_t)start, end - start, false,
84 			RTE_BIT64(node->page_shift));
85 }
86 
87 static struct vhost_iotlb_entry *
88 vhost_user_iotlb_pool_get(struct virtio_net *dev)
89 {
90 	struct vhost_iotlb_entry *node;
91 
92 	rte_spinlock_lock(&dev->iotlb_free_lock);
93 	node = SLIST_FIRST(&dev->iotlb_free_list);
94 	if (node != NULL)
95 		SLIST_REMOVE_HEAD(&dev->iotlb_free_list, next_free);
96 	rte_spinlock_unlock(&dev->iotlb_free_lock);
97 	return node;
98 }
99 
100 static void
101 vhost_user_iotlb_pool_put(struct virtio_net *dev, struct vhost_iotlb_entry *node)
102 {
103 	rte_spinlock_lock(&dev->iotlb_free_lock);
104 	SLIST_INSERT_HEAD(&dev->iotlb_free_list, node, next_free);
105 	rte_spinlock_unlock(&dev->iotlb_free_lock);
106 }
107 
108 static void
109 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev);
110 
111 static void
112 vhost_user_iotlb_pending_remove_all(struct virtio_net *dev)
113 {
114 	struct vhost_iotlb_entry *node, *temp_node;
115 
116 	rte_rwlock_write_lock(&dev->iotlb_pending_lock);
117 
118 	RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next, temp_node) {
119 		TAILQ_REMOVE(&dev->iotlb_pending_list, node, next);
120 		vhost_user_iotlb_pool_put(dev, node);
121 	}
122 
123 	rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
124 }
125 
126 bool
127 vhost_user_iotlb_pending_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
128 {
129 	struct vhost_iotlb_entry *node;
130 	bool found = false;
131 
132 	rte_rwlock_read_lock(&dev->iotlb_pending_lock);
133 
134 	TAILQ_FOREACH(node, &dev->iotlb_pending_list, next) {
135 		if ((node->iova == iova) && (node->perm == perm)) {
136 			found = true;
137 			break;
138 		}
139 	}
140 
141 	rte_rwlock_read_unlock(&dev->iotlb_pending_lock);
142 
143 	return found;
144 }
145 
146 void
147 vhost_user_iotlb_pending_insert(struct virtio_net *dev, uint64_t iova, uint8_t perm)
148 {
149 	struct vhost_iotlb_entry *node;
150 
151 	node = vhost_user_iotlb_pool_get(dev);
152 	if (node == NULL) {
153 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
154 			"IOTLB pool empty, clear entries for pending insertion\n");
155 		if (!TAILQ_EMPTY(&dev->iotlb_pending_list))
156 			vhost_user_iotlb_pending_remove_all(dev);
157 		else
158 			vhost_user_iotlb_cache_random_evict(dev);
159 		node = vhost_user_iotlb_pool_get(dev);
160 		if (node == NULL) {
161 			VHOST_LOG_CONFIG(dev->ifname, ERR,
162 				"IOTLB pool still empty, pending insertion failure\n");
163 			return;
164 		}
165 	}
166 
167 	node->iova = iova;
168 	node->perm = perm;
169 
170 	rte_rwlock_write_lock(&dev->iotlb_pending_lock);
171 
172 	TAILQ_INSERT_TAIL(&dev->iotlb_pending_list, node, next);
173 
174 	rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
175 }
176 
177 void
178 vhost_user_iotlb_pending_remove(struct virtio_net *dev, uint64_t iova, uint64_t size, uint8_t perm)
179 {
180 	struct vhost_iotlb_entry *node, *temp_node;
181 
182 	rte_rwlock_write_lock(&dev->iotlb_pending_lock);
183 
184 	RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next,
185 				temp_node) {
186 		if (node->iova < iova)
187 			continue;
188 		if (node->iova >= iova + size)
189 			continue;
190 		if ((node->perm & perm) != node->perm)
191 			continue;
192 		TAILQ_REMOVE(&dev->iotlb_pending_list, node, next);
193 		vhost_user_iotlb_pool_put(dev, node);
194 	}
195 
196 	rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
197 }
198 
199 static void
200 vhost_user_iotlb_cache_remove_all(struct virtio_net *dev)
201 {
202 	struct vhost_iotlb_entry *node, *temp_node;
203 
204 	vhost_user_iotlb_wr_lock_all(dev);
205 
206 	RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
207 		vhost_user_iotlb_clear_dump(node, NULL, NULL);
208 
209 		TAILQ_REMOVE(&dev->iotlb_list, node, next);
210 		vhost_user_iotlb_remove_notify(dev, node);
211 		vhost_user_iotlb_pool_put(dev, node);
212 	}
213 
214 	dev->iotlb_cache_nr = 0;
215 
216 	vhost_user_iotlb_wr_unlock_all(dev);
217 }
218 
219 static void
220 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev)
221 {
222 	struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
223 	int entry_idx;
224 
225 	vhost_user_iotlb_wr_lock_all(dev);
226 
227 	entry_idx = rte_rand() % dev->iotlb_cache_nr;
228 
229 	RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
230 		if (!entry_idx) {
231 			struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next);
232 
233 			vhost_user_iotlb_clear_dump(node, prev_node, next_node);
234 
235 			TAILQ_REMOVE(&dev->iotlb_list, node, next);
236 			vhost_user_iotlb_remove_notify(dev, node);
237 			vhost_user_iotlb_pool_put(dev, node);
238 			dev->iotlb_cache_nr--;
239 			break;
240 		}
241 		prev_node = node;
242 		entry_idx--;
243 	}
244 
245 	vhost_user_iotlb_wr_unlock_all(dev);
246 }
247 
248 void
249 vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova, uint64_t uaddr,
250 				uint64_t uoffset, uint64_t size, uint64_t page_size, uint8_t perm)
251 {
252 	struct vhost_iotlb_entry *node, *new_node;
253 
254 	new_node = vhost_user_iotlb_pool_get(dev);
255 	if (new_node == NULL) {
256 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
257 			"IOTLB pool empty, clear entries for cache insertion\n");
258 		if (!TAILQ_EMPTY(&dev->iotlb_list))
259 			vhost_user_iotlb_cache_random_evict(dev);
260 		else
261 			vhost_user_iotlb_pending_remove_all(dev);
262 		new_node = vhost_user_iotlb_pool_get(dev);
263 		if (new_node == NULL) {
264 			VHOST_LOG_CONFIG(dev->ifname, ERR,
265 				"IOTLB pool still empty, cache insertion failed\n");
266 			return;
267 		}
268 	}
269 
270 	new_node->iova = iova;
271 	new_node->uaddr = uaddr;
272 	new_node->uoffset = uoffset;
273 	new_node->size = size;
274 	new_node->page_shift = rte_ctz64(page_size);
275 	new_node->perm = perm;
276 
277 	vhost_user_iotlb_wr_lock_all(dev);
278 
279 	TAILQ_FOREACH(node, &dev->iotlb_list, next) {
280 		/*
281 		 * Entries must be invalidated before being updated.
282 		 * So if iova already in list, assume identical.
283 		 */
284 		if (node->iova == new_node->iova) {
285 			vhost_user_iotlb_pool_put(dev, new_node);
286 			goto unlock;
287 		} else if (node->iova > new_node->iova) {
288 			vhost_user_iotlb_set_dump(new_node);
289 
290 			TAILQ_INSERT_BEFORE(node, new_node, next);
291 			dev->iotlb_cache_nr++;
292 			goto unlock;
293 		}
294 	}
295 
296 	vhost_user_iotlb_set_dump(new_node);
297 
298 	TAILQ_INSERT_TAIL(&dev->iotlb_list, new_node, next);
299 	dev->iotlb_cache_nr++;
300 
301 unlock:
302 	vhost_user_iotlb_pending_remove(dev, iova, size, perm);
303 
304 	vhost_user_iotlb_wr_unlock_all(dev);
305 }
306 
307 void
308 vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t size)
309 {
310 	struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
311 
312 	if (unlikely(!size))
313 		return;
314 
315 	vhost_user_iotlb_wr_lock_all(dev);
316 
317 	RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
318 		/* Sorted list */
319 		if (unlikely(iova + size < node->iova))
320 			break;
321 
322 		if (iova < node->iova + node->size) {
323 			struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next);
324 
325 			vhost_user_iotlb_clear_dump(node, prev_node, next_node);
326 
327 			TAILQ_REMOVE(&dev->iotlb_list, node, next);
328 			vhost_user_iotlb_remove_notify(dev, node);
329 			vhost_user_iotlb_pool_put(dev, node);
330 			dev->iotlb_cache_nr--;
331 		} else {
332 			prev_node = node;
333 		}
334 	}
335 
336 	vhost_user_iotlb_wr_unlock_all(dev);
337 }
338 
339 uint64_t
340 vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t iova, uint64_t *size, uint8_t perm)
341 {
342 	struct vhost_iotlb_entry *node;
343 	uint64_t offset, vva = 0, mapped = 0;
344 
345 	if (unlikely(!*size))
346 		goto out;
347 
348 	TAILQ_FOREACH(node, &dev->iotlb_list, next) {
349 		/* List sorted by iova */
350 		if (unlikely(iova < node->iova))
351 			break;
352 
353 		if (iova >= node->iova + node->size)
354 			continue;
355 
356 		if (unlikely((perm & node->perm) != perm)) {
357 			vva = 0;
358 			break;
359 		}
360 
361 		offset = iova - node->iova;
362 		if (!vva)
363 			vva = node->uaddr + node->uoffset + offset;
364 
365 		mapped += node->size - offset;
366 		iova = node->iova + node->size;
367 
368 		if (mapped >= *size)
369 			break;
370 	}
371 
372 out:
373 	/* Only part of the requested chunk is mapped */
374 	if (unlikely(mapped < *size))
375 		*size = mapped;
376 
377 	return vva;
378 }
379 
380 void
381 vhost_user_iotlb_flush_all(struct virtio_net *dev)
382 {
383 	vhost_user_iotlb_cache_remove_all(dev);
384 	vhost_user_iotlb_pending_remove_all(dev);
385 }
386 
387 int
388 vhost_user_iotlb_init(struct virtio_net *dev)
389 {
390 	unsigned int i;
391 	int socket = 0;
392 
393 	if (dev->iotlb_pool) {
394 		/*
395 		 * The cache has already been initialized,
396 		 * just drop all cached and pending entries.
397 		 */
398 		vhost_user_iotlb_flush_all(dev);
399 		rte_free(dev->iotlb_pool);
400 	}
401 
402 #ifdef RTE_LIBRTE_VHOST_NUMA
403 	if (get_mempolicy(&socket, NULL, 0, dev, MPOL_F_NODE | MPOL_F_ADDR) != 0)
404 		socket = 0;
405 #endif
406 
407 	rte_spinlock_init(&dev->iotlb_free_lock);
408 	rte_rwlock_init(&dev->iotlb_pending_lock);
409 
410 	SLIST_INIT(&dev->iotlb_free_list);
411 	TAILQ_INIT(&dev->iotlb_list);
412 	TAILQ_INIT(&dev->iotlb_pending_list);
413 
414 	if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) {
415 		dev->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
416 			sizeof(struct vhost_iotlb_entry), 0, socket);
417 		if (!dev->iotlb_pool) {
418 			VHOST_LOG_CONFIG(dev->ifname, ERR, "Failed to create IOTLB cache pool\n");
419 			return -1;
420 		}
421 		for (i = 0; i < IOTLB_CACHE_SIZE; i++)
422 			vhost_user_iotlb_pool_put(dev, &dev->iotlb_pool[i]);
423 	}
424 
425 	dev->iotlb_cache_nr = 0;
426 
427 	return 0;
428 }
429 
430 void
431 vhost_user_iotlb_destroy(struct virtio_net *dev)
432 {
433 	rte_free(dev->iotlb_pool);
434 }
435