xref: /dpdk/lib/vhost/iotlb.c (revision cd8e4bee68bd31672ff1f766172e89d25f55189d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2017 Red Hat, Inc.
3  */
4 
5 #ifdef RTE_LIBRTE_VHOST_NUMA
6 #include <numaif.h>
7 #endif
8 
9 #include <rte_tailq.h>
10 
11 #include "iotlb.h"
12 #include "vhost.h"
13 
14 struct vhost_iotlb_entry {
15 	TAILQ_ENTRY(vhost_iotlb_entry) next;
16 	SLIST_ENTRY(vhost_iotlb_entry) next_free;
17 
18 	uint64_t iova;
19 	uint64_t uaddr;
20 	uint64_t size;
21 	uint8_t perm;
22 };
23 
24 #define IOTLB_CACHE_SIZE 2048
25 
26 static bool
27 vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct vhost_iotlb_entry *b,
28 		uint64_t align)
29 {
30 	uint64_t a_end, b_start;
31 
32 	if (a == NULL || b == NULL)
33 		return false;
34 
35 	/* Assumes entry a lower than entry b */
36 	RTE_ASSERT(a->uaddr < b->uaddr);
37 	a_end = RTE_ALIGN_CEIL(a->uaddr + a->size, align);
38 	b_start = RTE_ALIGN_FLOOR(b->uaddr, align);
39 
40 	return a_end > b_start;
41 }
42 
43 static void
44 vhost_user_iotlb_set_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node)
45 {
46 	uint64_t align;
47 
48 	align = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
49 
50 	mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true, align);
51 }
52 
53 static void
54 vhost_user_iotlb_clear_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node,
55 		struct vhost_iotlb_entry *prev, struct vhost_iotlb_entry *next)
56 {
57 	uint64_t align, start, end;
58 
59 	start = node->uaddr;
60 	end = node->uaddr + node->size;
61 
62 	align = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
63 
64 	/* Skip first page if shared with previous entry. */
65 	if (vhost_user_iotlb_share_page(prev, node, align))
66 		start = RTE_ALIGN_CEIL(start, align);
67 
68 	/* Skip last page if shared with next entry. */
69 	if (vhost_user_iotlb_share_page(node, next, align))
70 		end = RTE_ALIGN_FLOOR(end, align);
71 
72 	if (end > start)
73 		mem_set_dump((void *)(uintptr_t)start, end - start, false, align);
74 }
75 
76 static struct vhost_iotlb_entry *
77 vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq)
78 {
79 	struct vhost_iotlb_entry *node;
80 
81 	rte_spinlock_lock(&vq->iotlb_free_lock);
82 	node = SLIST_FIRST(&vq->iotlb_free_list);
83 	if (node != NULL)
84 		SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free);
85 	rte_spinlock_unlock(&vq->iotlb_free_lock);
86 	return node;
87 }
88 
89 static void
90 vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq,
91 	struct vhost_iotlb_entry *node)
92 {
93 	rte_spinlock_lock(&vq->iotlb_free_lock);
94 	SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free);
95 	rte_spinlock_unlock(&vq->iotlb_free_lock);
96 }
97 
98 static void
99 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq);
100 
101 static void
102 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
103 {
104 	struct vhost_iotlb_entry *node, *temp_node;
105 
106 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
107 
108 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
109 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
110 		vhost_user_iotlb_pool_put(vq, node);
111 	}
112 
113 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
114 }
115 
116 bool
117 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
118 				uint8_t perm)
119 {
120 	struct vhost_iotlb_entry *node;
121 	bool found = false;
122 
123 	rte_rwlock_read_lock(&vq->iotlb_pending_lock);
124 
125 	TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
126 		if ((node->iova == iova) && (node->perm == perm)) {
127 			found = true;
128 			break;
129 		}
130 	}
131 
132 	rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
133 
134 	return found;
135 }
136 
137 void
138 vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
139 				uint64_t iova, uint8_t perm)
140 {
141 	struct vhost_iotlb_entry *node;
142 
143 	node = vhost_user_iotlb_pool_get(vq);
144 	if (node == NULL) {
145 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
146 			"IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n",
147 			vq->index);
148 		if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
149 			vhost_user_iotlb_pending_remove_all(vq);
150 		else
151 			vhost_user_iotlb_cache_random_evict(dev, vq);
152 		node = vhost_user_iotlb_pool_get(vq);
153 		if (node == NULL) {
154 			VHOST_LOG_CONFIG(dev->ifname, ERR,
155 				"IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n",
156 				vq->index);
157 			return;
158 		}
159 	}
160 
161 	node->iova = iova;
162 	node->perm = perm;
163 
164 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
165 
166 	TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
167 
168 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
169 }
170 
171 void
172 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
173 				uint64_t iova, uint64_t size, uint8_t perm)
174 {
175 	struct vhost_iotlb_entry *node, *temp_node;
176 
177 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
178 
179 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next,
180 				temp_node) {
181 		if (node->iova < iova)
182 			continue;
183 		if (node->iova >= iova + size)
184 			continue;
185 		if ((node->perm & perm) != node->perm)
186 			continue;
187 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
188 		vhost_user_iotlb_pool_put(vq, node);
189 	}
190 
191 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
192 }
193 
194 static void
195 vhost_user_iotlb_cache_remove_all(struct virtio_net *dev, struct vhost_virtqueue *vq)
196 {
197 	struct vhost_iotlb_entry *node, *temp_node;
198 
199 	rte_rwlock_write_lock(&vq->iotlb_lock);
200 
201 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
202 		vhost_user_iotlb_clear_dump(dev, node, NULL, NULL);
203 
204 		TAILQ_REMOVE(&vq->iotlb_list, node, next);
205 		vhost_user_iotlb_pool_put(vq, node);
206 	}
207 
208 	vq->iotlb_cache_nr = 0;
209 
210 	rte_rwlock_write_unlock(&vq->iotlb_lock);
211 }
212 
213 static void
214 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq)
215 {
216 	struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
217 	int entry_idx;
218 
219 	rte_rwlock_write_lock(&vq->iotlb_lock);
220 
221 	entry_idx = rte_rand() % vq->iotlb_cache_nr;
222 
223 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
224 		if (!entry_idx) {
225 			struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next);
226 
227 			vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node);
228 
229 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
230 			vhost_user_iotlb_pool_put(vq, node);
231 			vq->iotlb_cache_nr--;
232 			break;
233 		}
234 		prev_node = node;
235 		entry_idx--;
236 	}
237 
238 	rte_rwlock_write_unlock(&vq->iotlb_lock);
239 }
240 
241 void
242 vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
243 				uint64_t iova, uint64_t uaddr,
244 				uint64_t size, uint8_t perm)
245 {
246 	struct vhost_iotlb_entry *node, *new_node;
247 
248 	new_node = vhost_user_iotlb_pool_get(vq);
249 	if (new_node == NULL) {
250 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
251 			"IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n",
252 			vq->index);
253 		if (!TAILQ_EMPTY(&vq->iotlb_list))
254 			vhost_user_iotlb_cache_random_evict(dev, vq);
255 		else
256 			vhost_user_iotlb_pending_remove_all(vq);
257 		new_node = vhost_user_iotlb_pool_get(vq);
258 		if (new_node == NULL) {
259 			VHOST_LOG_CONFIG(dev->ifname, ERR,
260 				"IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n",
261 				vq->index);
262 			return;
263 		}
264 	}
265 
266 	new_node->iova = iova;
267 	new_node->uaddr = uaddr;
268 	new_node->size = size;
269 	new_node->perm = perm;
270 
271 	rte_rwlock_write_lock(&vq->iotlb_lock);
272 
273 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
274 		/*
275 		 * Entries must be invalidated before being updated.
276 		 * So if iova already in list, assume identical.
277 		 */
278 		if (node->iova == new_node->iova) {
279 			vhost_user_iotlb_pool_put(vq, new_node);
280 			goto unlock;
281 		} else if (node->iova > new_node->iova) {
282 			vhost_user_iotlb_set_dump(dev, new_node);
283 
284 			TAILQ_INSERT_BEFORE(node, new_node, next);
285 			vq->iotlb_cache_nr++;
286 			goto unlock;
287 		}
288 	}
289 
290 	vhost_user_iotlb_set_dump(dev, new_node);
291 
292 	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
293 	vq->iotlb_cache_nr++;
294 
295 unlock:
296 	vhost_user_iotlb_pending_remove(vq, iova, size, perm);
297 
298 	rte_rwlock_write_unlock(&vq->iotlb_lock);
299 
300 }
301 
302 void
303 vhost_user_iotlb_cache_remove(struct virtio_net *dev, struct vhost_virtqueue *vq,
304 					uint64_t iova, uint64_t size)
305 {
306 	struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
307 
308 	if (unlikely(!size))
309 		return;
310 
311 	rte_rwlock_write_lock(&vq->iotlb_lock);
312 
313 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
314 		/* Sorted list */
315 		if (unlikely(iova + size < node->iova))
316 			break;
317 
318 		if (iova < node->iova + node->size) {
319 			struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next);
320 
321 			vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node);
322 
323 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
324 			vhost_user_iotlb_pool_put(vq, node);
325 			vq->iotlb_cache_nr--;
326 		} else
327 			prev_node = node;
328 	}
329 
330 	rte_rwlock_write_unlock(&vq->iotlb_lock);
331 }
332 
333 uint64_t
334 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
335 						uint64_t *size, uint8_t perm)
336 {
337 	struct vhost_iotlb_entry *node;
338 	uint64_t offset, vva = 0, mapped = 0;
339 
340 	if (unlikely(!*size))
341 		goto out;
342 
343 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
344 		/* List sorted by iova */
345 		if (unlikely(iova < node->iova))
346 			break;
347 
348 		if (iova >= node->iova + node->size)
349 			continue;
350 
351 		if (unlikely((perm & node->perm) != perm)) {
352 			vva = 0;
353 			break;
354 		}
355 
356 		offset = iova - node->iova;
357 		if (!vva)
358 			vva = node->uaddr + offset;
359 
360 		mapped += node->size - offset;
361 		iova = node->iova + node->size;
362 
363 		if (mapped >= *size)
364 			break;
365 	}
366 
367 out:
368 	/* Only part of the requested chunk is mapped */
369 	if (unlikely(mapped < *size))
370 		*size = mapped;
371 
372 	return vva;
373 }
374 
375 void
376 vhost_user_iotlb_flush_all(struct virtio_net *dev, struct vhost_virtqueue *vq)
377 {
378 	vhost_user_iotlb_cache_remove_all(dev, vq);
379 	vhost_user_iotlb_pending_remove_all(vq);
380 }
381 
382 int
383 vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
384 {
385 	unsigned int i;
386 	int socket = 0;
387 
388 	if (vq->iotlb_pool) {
389 		/*
390 		 * The cache has already been initialized,
391 		 * just drop all cached and pending entries.
392 		 */
393 		vhost_user_iotlb_flush_all(dev, vq);
394 		rte_free(vq->iotlb_pool);
395 	}
396 
397 #ifdef RTE_LIBRTE_VHOST_NUMA
398 	if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
399 		socket = 0;
400 #endif
401 
402 	rte_spinlock_init(&vq->iotlb_free_lock);
403 	rte_rwlock_init(&vq->iotlb_lock);
404 	rte_rwlock_init(&vq->iotlb_pending_lock);
405 
406 	SLIST_INIT(&vq->iotlb_free_list);
407 	TAILQ_INIT(&vq->iotlb_list);
408 	TAILQ_INIT(&vq->iotlb_pending_list);
409 
410 	if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) {
411 		vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
412 			sizeof(struct vhost_iotlb_entry), 0, socket);
413 		if (!vq->iotlb_pool) {
414 			VHOST_LOG_CONFIG(dev->ifname, ERR,
415 				"Failed to create IOTLB cache pool for vq %"PRIu32"\n",
416 				vq->index);
417 			return -1;
418 		}
419 		for (i = 0; i < IOTLB_CACHE_SIZE; i++)
420 			vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]);
421 	}
422 
423 	vq->iotlb_cache_nr = 0;
424 
425 	return 0;
426 }
427 
428 void
429 vhost_user_iotlb_destroy(struct vhost_virtqueue *vq)
430 {
431 	rte_free(vq->iotlb_pool);
432 }
433