xref: /dpdk/lib/vhost/iotlb.c (revision f4eac3a09c51a1a2dab1f2fd3a10fe0619286a0d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2017 Red Hat, Inc.
3  */
4 
5 #ifdef RTE_LIBRTE_VHOST_NUMA
6 #include <numaif.h>
7 #endif
8 
9 #include <rte_tailq.h>
10 
11 #include "iotlb.h"
12 #include "vhost.h"
13 
14 struct vhost_iotlb_entry {
15 	TAILQ_ENTRY(vhost_iotlb_entry) next;
16 
17 	uint64_t iova;
18 	uint64_t uaddr;
19 	uint64_t size;
20 	uint8_t perm;
21 };
22 
23 #define IOTLB_CACHE_SIZE 2048
24 
25 static void
26 vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq);
27 
28 static void
29 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
30 {
31 	struct vhost_iotlb_entry *node, *temp_node;
32 
33 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
34 
35 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
36 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
37 		rte_mempool_put(vq->iotlb_pool, node);
38 	}
39 
40 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
41 }
42 
43 bool
44 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
45 				uint8_t perm)
46 {
47 	struct vhost_iotlb_entry *node;
48 	bool found = false;
49 
50 	rte_rwlock_read_lock(&vq->iotlb_pending_lock);
51 
52 	TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
53 		if ((node->iova == iova) && (node->perm == perm)) {
54 			found = true;
55 			break;
56 		}
57 	}
58 
59 	rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
60 
61 	return found;
62 }
63 
64 void
65 vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
66 				uint64_t iova, uint8_t perm)
67 {
68 	struct vhost_iotlb_entry *node;
69 	int ret;
70 
71 	ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
72 	if (ret) {
73 		VHOST_LOG_CONFIG(DEBUG,
74 				"(%s) IOTLB pool %s empty, clear entries for pending insertion\n",
75 				dev->ifname, vq->iotlb_pool->name);
76 		if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
77 			vhost_user_iotlb_pending_remove_all(vq);
78 		else
79 			vhost_user_iotlb_cache_random_evict(vq);
80 		ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
81 		if (ret) {
82 			VHOST_LOG_CONFIG(ERR,
83 					"(%s) IOTLB pool %s still empty, pending insertion failure\n",
84 					dev->ifname, vq->iotlb_pool->name);
85 			return;
86 		}
87 	}
88 
89 	node->iova = iova;
90 	node->perm = perm;
91 
92 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
93 
94 	TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
95 
96 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
97 }
98 
99 void
100 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
101 				uint64_t iova, uint64_t size, uint8_t perm)
102 {
103 	struct vhost_iotlb_entry *node, *temp_node;
104 
105 	rte_rwlock_write_lock(&vq->iotlb_pending_lock);
106 
107 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next,
108 				temp_node) {
109 		if (node->iova < iova)
110 			continue;
111 		if (node->iova >= iova + size)
112 			continue;
113 		if ((node->perm & perm) != node->perm)
114 			continue;
115 		TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
116 		rte_mempool_put(vq->iotlb_pool, node);
117 	}
118 
119 	rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
120 }
121 
122 static void
123 vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
124 {
125 	struct vhost_iotlb_entry *node, *temp_node;
126 
127 	rte_rwlock_write_lock(&vq->iotlb_lock);
128 
129 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
130 		TAILQ_REMOVE(&vq->iotlb_list, node, next);
131 		rte_mempool_put(vq->iotlb_pool, node);
132 	}
133 
134 	vq->iotlb_cache_nr = 0;
135 
136 	rte_rwlock_write_unlock(&vq->iotlb_lock);
137 }
138 
139 static void
140 vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
141 {
142 	struct vhost_iotlb_entry *node, *temp_node;
143 	int entry_idx;
144 
145 	rte_rwlock_write_lock(&vq->iotlb_lock);
146 
147 	entry_idx = rte_rand() % vq->iotlb_cache_nr;
148 
149 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
150 		if (!entry_idx) {
151 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
152 			rte_mempool_put(vq->iotlb_pool, node);
153 			vq->iotlb_cache_nr--;
154 			break;
155 		}
156 		entry_idx--;
157 	}
158 
159 	rte_rwlock_write_unlock(&vq->iotlb_lock);
160 }
161 
162 void
163 vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
164 				uint64_t iova, uint64_t uaddr,
165 				uint64_t size, uint8_t perm)
166 {
167 	struct vhost_iotlb_entry *node, *new_node;
168 	int ret;
169 
170 	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
171 	if (ret) {
172 		VHOST_LOG_CONFIG(DEBUG,
173 				"(%s) IOTLB pool %s empty, clear entries for cache insertion\n",
174 				dev->ifname, vq->iotlb_pool->name);
175 		if (!TAILQ_EMPTY(&vq->iotlb_list))
176 			vhost_user_iotlb_cache_random_evict(vq);
177 		else
178 			vhost_user_iotlb_pending_remove_all(vq);
179 		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
180 		if (ret) {
181 			VHOST_LOG_CONFIG(ERR,
182 					"(%s) IOTLB pool %s still empty, cache insertion failed\n",
183 					dev->ifname, vq->iotlb_pool->name);
184 			return;
185 		}
186 	}
187 
188 	new_node->iova = iova;
189 	new_node->uaddr = uaddr;
190 	new_node->size = size;
191 	new_node->perm = perm;
192 
193 	rte_rwlock_write_lock(&vq->iotlb_lock);
194 
195 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
196 		/*
197 		 * Entries must be invalidated before being updated.
198 		 * So if iova already in list, assume identical.
199 		 */
200 		if (node->iova == new_node->iova) {
201 			rte_mempool_put(vq->iotlb_pool, new_node);
202 			goto unlock;
203 		} else if (node->iova > new_node->iova) {
204 			TAILQ_INSERT_BEFORE(node, new_node, next);
205 			vq->iotlb_cache_nr++;
206 			goto unlock;
207 		}
208 	}
209 
210 	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
211 	vq->iotlb_cache_nr++;
212 
213 unlock:
214 	vhost_user_iotlb_pending_remove(vq, iova, size, perm);
215 
216 	rte_rwlock_write_unlock(&vq->iotlb_lock);
217 
218 }
219 
220 void
221 vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
222 					uint64_t iova, uint64_t size)
223 {
224 	struct vhost_iotlb_entry *node, *temp_node;
225 
226 	if (unlikely(!size))
227 		return;
228 
229 	rte_rwlock_write_lock(&vq->iotlb_lock);
230 
231 	RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
232 		/* Sorted list */
233 		if (unlikely(iova + size < node->iova))
234 			break;
235 
236 		if (iova < node->iova + node->size) {
237 			TAILQ_REMOVE(&vq->iotlb_list, node, next);
238 			rte_mempool_put(vq->iotlb_pool, node);
239 			vq->iotlb_cache_nr--;
240 		}
241 	}
242 
243 	rte_rwlock_write_unlock(&vq->iotlb_lock);
244 }
245 
246 uint64_t
247 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
248 						uint64_t *size, uint8_t perm)
249 {
250 	struct vhost_iotlb_entry *node;
251 	uint64_t offset, vva = 0, mapped = 0;
252 
253 	if (unlikely(!*size))
254 		goto out;
255 
256 	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
257 		/* List sorted by iova */
258 		if (unlikely(iova < node->iova))
259 			break;
260 
261 		if (iova >= node->iova + node->size)
262 			continue;
263 
264 		if (unlikely((perm & node->perm) != perm)) {
265 			vva = 0;
266 			break;
267 		}
268 
269 		offset = iova - node->iova;
270 		if (!vva)
271 			vva = node->uaddr + offset;
272 
273 		mapped += node->size - offset;
274 		iova = node->iova + node->size;
275 
276 		if (mapped >= *size)
277 			break;
278 	}
279 
280 out:
281 	/* Only part of the requested chunk is mapped */
282 	if (unlikely(mapped < *size))
283 		*size = mapped;
284 
285 	return vva;
286 }
287 
288 void
289 vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
290 {
291 	vhost_user_iotlb_cache_remove_all(vq);
292 	vhost_user_iotlb_pending_remove_all(vq);
293 }
294 
295 int
296 vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
297 {
298 	char pool_name[RTE_MEMPOOL_NAMESIZE];
299 	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
300 	int socket = 0;
301 
302 	if (vq->iotlb_pool) {
303 		/*
304 		 * The cache has already been initialized,
305 		 * just drop all cached and pending entries.
306 		 */
307 		vhost_user_iotlb_flush_all(vq);
308 	}
309 
310 #ifdef RTE_LIBRTE_VHOST_NUMA
311 	if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
312 		socket = 0;
313 #endif
314 
315 	rte_rwlock_init(&vq->iotlb_lock);
316 	rte_rwlock_init(&vq->iotlb_pending_lock);
317 
318 	TAILQ_INIT(&vq->iotlb_list);
319 	TAILQ_INIT(&vq->iotlb_pending_list);
320 
321 	snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d",
322 			getpid(), dev->vid, vq_index);
323 	VHOST_LOG_CONFIG(DEBUG, "(%s) IOTLB cache name: %s\n", dev->ifname, pool_name);
324 
325 	/* If already created, free it and recreate */
326 	vq->iotlb_pool = rte_mempool_lookup(pool_name);
327 	rte_mempool_free(vq->iotlb_pool);
328 
329 	vq->iotlb_pool = rte_mempool_create(pool_name,
330 			IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
331 			0, 0, NULL, NULL, NULL, socket,
332 			RTE_MEMPOOL_F_NO_CACHE_ALIGN |
333 			RTE_MEMPOOL_F_SP_PUT);
334 	if (!vq->iotlb_pool) {
335 		VHOST_LOG_CONFIG(ERR, "(%s) Failed to create IOTLB cache pool %s\n",
336 				dev->ifname, pool_name);
337 		return -1;
338 	}
339 
340 	vq->iotlb_cache_nr = 0;
341 
342 	return 0;
343 }
344