xref: /dpdk/lib/vhost/iotlb.c (revision 2bf48044dca1892e571fd4964eecaacf6cb0c1c2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2017 Red Hat, Inc.
3  */
4 
5 #ifdef RTE_LIBRTE_VHOST_NUMA
6 #include <numaif.h>
7 #endif
8 
9 #include <rte_tailq.h>
10 
11 #include "iotlb.h"
12 #include "vhost.h"
13 
14 struct vhost_iotlb_entry {
15 	TAILQ_ENTRY(vhost_iotlb_entry) next;
16 	SLIST_ENTRY(vhost_iotlb_entry) next_free;
17 
18 	uint64_t iova;
19 	uint64_t uaddr;
20 	uint64_t size;
21 	uint8_t perm;
22 };
23 
24 #define IOTLB_CACHE_SIZE 2048
25 
26 static struct vhost_iotlb_entry *
27 vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq)
28 {
29 	struct vhost_iotlb_entry *node;
30 
31 	rte_spinlock_lock(&vq->iotlb_free_lock);
32 	node = SLIST_FIRST(&vq->iotlb_free_list);
33 	if (node != NULL)
34 		SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free);
35 	rte_spinlock_unlock(&vq->iotlb_free_lock);
36 	return node;
37 }
38 
39 static void
40 vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq,
41 	struct vhost_iotlb_entry *node)
42 {
43 	rte_spinlock_lock(&vq->iotlb_free_lock);
44 	SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free);
45 	rte_spinlock_unlock(&vq->iotlb_free_lock);
46 }
47 
48 static void
49 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq);
50 
51 static void
52 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
53 {
54 	struct vhost_iotlb_entry *node, *temp_node;
55 
56 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
57 
58 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
59 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
60 		vhost_user_iotlb_pool_put(vq, node);
61 	}
62 
63 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
64 }
65 
66 bool
67 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
68 				uint8_t perm)
69 {
70 	struct vhost_iotlb_entry *node;
71 	bool found = false;
72 
73 	rte_rwlock_read_lock(&vq->iotlb_pending_lock);
74 
75 	TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
76 		if ((node->iova == iova) && (node->perm == perm)) {
77 			found = true;
78 			break;
79 		}
80 	}
81 
82 	rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
83 
84 	return found;
85 }
86 
87 void
88 vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
89 				uint64_t iova, uint8_t perm)
90 {
91 	struct vhost_iotlb_entry *node;
92 
93 	node = vhost_user_iotlb_pool_get(vq);
94 	if (node == NULL) {
95 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
96 			"IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n",
97 			vq->index);
98 		if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
99 			vhost_user_iotlb_pending_remove_all(vq);
100 		else
101 			vhost_user_iotlb_cache_random_evict(dev, vq);
102 		node = vhost_user_iotlb_pool_get(vq);
103 		if (node == NULL) {
104 			VHOST_LOG_CONFIG(dev->ifname, ERR,
105 				"IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n",
106 				vq->index);
107 			return;
108 		}
109 	}
110 
111 	node->iova = iova;
112 	node->perm = perm;
113 
114 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
115 
116 	TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
117 
118 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
119 }
120 
121 void
122 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
123 				uint64_t iova, uint64_t size, uint8_t perm)
124 {
125 	struct vhost_iotlb_entry *node, *temp_node;
126 
127 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
128 
129 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next,
130 				temp_node) {
131 		if (node->iova < iova)
132 			continue;
133 		if (node->iova >= iova + size)
134 			continue;
135 		if ((node->perm & perm) != node->perm)
136 			continue;
137 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
138 		vhost_user_iotlb_pool_put(vq, node);
139 	}
140 
141 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
142 }
143 
144 static void
145 vhost_user_iotlb_cache_remove_all(struct virtio_net *dev, struct vhost_virtqueue *vq)
146 {
147 	struct vhost_iotlb_entry *node, *temp_node;
148 
149 	rte_rwlock_write_lock(&vq->iotlb_lock);
150 
151 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
152 		mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, false,
153 			hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr));
154 		TAILQ_REMOVE(&vq->iotlb_list, node, next);
155 		vhost_user_iotlb_pool_put(vq, node);
156 	}
157 
158 	vq->iotlb_cache_nr = 0;
159 
160 	rte_rwlock_write_unlock(&vq->iotlb_lock);
161 }
162 
163 static void
164 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq)
165 {
166 	struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
167 	uint64_t alignment, mask;
168 	int entry_idx;
169 
170 	rte_rwlock_write_lock(&vq->iotlb_lock);
171 
172 	entry_idx = rte_rand() % vq->iotlb_cache_nr;
173 
174 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
175 		if (!entry_idx) {
176 			struct vhost_iotlb_entry *next_node;
177 			alignment = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
178 			mask = ~(alignment - 1);
179 
180 			/* Don't disable coredump if the previous node is in the same page */
181 			if (prev_node == NULL ||
182 					(node->uaddr & mask) != (prev_node->uaddr & mask)) {
183 				next_node = RTE_TAILQ_NEXT(node, next);
184 				/* Don't disable coredump if the next node is in the same page */
185 				if (next_node == NULL || ((node->uaddr + node->size - 1) & mask) !=
186 						(next_node->uaddr & mask))
187 					mem_set_dump((void *)(uintptr_t)node->uaddr, node->size,
188 							false, alignment);
189 			}
190 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
191 			vhost_user_iotlb_pool_put(vq, node);
192 			vq->iotlb_cache_nr--;
193 			break;
194 		}
195 		prev_node = node;
196 		entry_idx--;
197 	}
198 
199 	rte_rwlock_write_unlock(&vq->iotlb_lock);
200 }
201 
202 void
203 vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
204 				uint64_t iova, uint64_t uaddr,
205 				uint64_t size, uint8_t perm)
206 {
207 	struct vhost_iotlb_entry *node, *new_node;
208 
209 	new_node = vhost_user_iotlb_pool_get(vq);
210 	if (new_node == NULL) {
211 		VHOST_LOG_CONFIG(dev->ifname, DEBUG,
212 			"IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n",
213 			vq->index);
214 		if (!TAILQ_EMPTY(&vq->iotlb_list))
215 			vhost_user_iotlb_cache_random_evict(dev, vq);
216 		else
217 			vhost_user_iotlb_pending_remove_all(vq);
218 		new_node = vhost_user_iotlb_pool_get(vq);
219 		if (new_node == NULL) {
220 			VHOST_LOG_CONFIG(dev->ifname, ERR,
221 				"IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n",
222 				vq->index);
223 			return;
224 		}
225 	}
226 
227 	new_node->iova = iova;
228 	new_node->uaddr = uaddr;
229 	new_node->size = size;
230 	new_node->perm = perm;
231 
232 	rte_rwlock_write_lock(&vq->iotlb_lock);
233 
234 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
235 		/*
236 		 * Entries must be invalidated before being updated.
237 		 * So if iova already in list, assume identical.
238 		 */
239 		if (node->iova == new_node->iova) {
240 			vhost_user_iotlb_pool_put(vq, new_node);
241 			goto unlock;
242 		} else if (node->iova > new_node->iova) {
243 			mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true,
244 				hua_to_alignment(dev->mem, (void *)(uintptr_t)new_node->uaddr));
245 			TAILQ_INSERT_BEFORE(node, new_node, next);
246 			vq->iotlb_cache_nr++;
247 			goto unlock;
248 		}
249 	}
250 
251 	mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true,
252 		hua_to_alignment(dev->mem, (void *)(uintptr_t)new_node->uaddr));
253 	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
254 	vq->iotlb_cache_nr++;
255 
256 unlock:
257 	vhost_user_iotlb_pending_remove(vq, iova, size, perm);
258 
259 	rte_rwlock_write_unlock(&vq->iotlb_lock);
260 
261 }
262 
263 void
264 vhost_user_iotlb_cache_remove(struct virtio_net *dev, struct vhost_virtqueue *vq,
265 					uint64_t iova, uint64_t size)
266 {
267 	struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
268 	uint64_t alignment, mask;
269 
270 	if (unlikely(!size))
271 		return;
272 
273 	rte_rwlock_write_lock(&vq->iotlb_lock);
274 
275 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
276 		/* Sorted list */
277 		if (unlikely(iova + size < node->iova))
278 			break;
279 
280 		if (iova < node->iova + node->size) {
281 			struct vhost_iotlb_entry *next_node;
282 			alignment = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
283 			mask = ~(alignment-1);
284 
285 			/* Don't disable coredump if the previous node is in the same page */
286 			if (prev_node == NULL ||
287 					(node->uaddr & mask) != (prev_node->uaddr & mask)) {
288 				next_node = RTE_TAILQ_NEXT(node, next);
289 				/* Don't disable coredump if the next node is in the same page */
290 				if (next_node == NULL || ((node->uaddr + node->size - 1) & mask) !=
291 						(next_node->uaddr & mask))
292 					mem_set_dump((void *)(uintptr_t)node->uaddr, node->size,
293 							false, alignment);
294 			}
295 
296 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
297 			vhost_user_iotlb_pool_put(vq, node);
298 			vq->iotlb_cache_nr--;
299 		} else
300 			prev_node = node;
301 	}
302 
303 	rte_rwlock_write_unlock(&vq->iotlb_lock);
304 }
305 
306 uint64_t
307 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
308 						uint64_t *size, uint8_t perm)
309 {
310 	struct vhost_iotlb_entry *node;
311 	uint64_t offset, vva = 0, mapped = 0;
312 
313 	if (unlikely(!*size))
314 		goto out;
315 
316 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
317 		/* List sorted by iova */
318 		if (unlikely(iova < node->iova))
319 			break;
320 
321 		if (iova >= node->iova + node->size)
322 			continue;
323 
324 		if (unlikely((perm & node->perm) != perm)) {
325 			vva = 0;
326 			break;
327 		}
328 
329 		offset = iova - node->iova;
330 		if (!vva)
331 			vva = node->uaddr + offset;
332 
333 		mapped += node->size - offset;
334 		iova = node->iova + node->size;
335 
336 		if (mapped >= *size)
337 			break;
338 	}
339 
340 out:
341 	/* Only part of the requested chunk is mapped */
342 	if (unlikely(mapped < *size))
343 		*size = mapped;
344 
345 	return vva;
346 }
347 
348 void
349 vhost_user_iotlb_flush_all(struct virtio_net *dev, struct vhost_virtqueue *vq)
350 {
351 	vhost_user_iotlb_cache_remove_all(dev, vq);
352 	vhost_user_iotlb_pending_remove_all(vq);
353 }
354 
355 int
356 vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
357 {
358 	unsigned int i;
359 	int socket = 0;
360 
361 	if (vq->iotlb_pool) {
362 		/*
363 		 * The cache has already been initialized,
364 		 * just drop all cached and pending entries.
365 		 */
366 		vhost_user_iotlb_flush_all(dev, vq);
367 		rte_free(vq->iotlb_pool);
368 	}
369 
370 #ifdef RTE_LIBRTE_VHOST_NUMA
371 	if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
372 		socket = 0;
373 #endif
374 
375 	rte_spinlock_init(&vq->iotlb_free_lock);
376 	rte_rwlock_init(&vq->iotlb_lock);
377 	rte_rwlock_init(&vq->iotlb_pending_lock);
378 
379 	SLIST_INIT(&vq->iotlb_free_list);
380 	TAILQ_INIT(&vq->iotlb_list);
381 	TAILQ_INIT(&vq->iotlb_pending_list);
382 
383 	if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) {
384 		vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
385 			sizeof(struct vhost_iotlb_entry), 0, socket);
386 		if (!vq->iotlb_pool) {
387 			VHOST_LOG_CONFIG(dev->ifname, ERR,
388 				"Failed to create IOTLB cache pool for vq %"PRIu32"\n",
389 				vq->index);
390 			return -1;
391 		}
392 		for (i = 0; i < IOTLB_CACHE_SIZE; i++)
393 			vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]);
394 	}
395 
396 	vq->iotlb_cache_nr = 0;
397 
398 	return 0;
399 }
400 
401 void
402 vhost_user_iotlb_destroy(struct vhost_virtqueue *vq)
403 {
404 	rte_free(vq->iotlb_pool);
405 }
406