1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2017 Red Hat, Inc.
3 */
4
5 #ifdef RTE_LIBRTE_VHOST_NUMA
6 #include <numaif.h>
7 #endif
8
9 #include <rte_tailq.h>
10
11 #include "iotlb.h"
12 #include "vhost.h"
13
14 struct vhost_iotlb_entry {
15 TAILQ_ENTRY(vhost_iotlb_entry) next;
16 SLIST_ENTRY(vhost_iotlb_entry) next_free;
17
18 uint64_t iova;
19 uint64_t uaddr;
20 uint64_t uoffset;
21 uint64_t size;
22 uint8_t page_shift;
23 uint8_t perm;
24 };
25
26 #define IOTLB_CACHE_SIZE 2048
27
28 static void
vhost_user_iotlb_remove_notify(struct virtio_net * dev,struct vhost_iotlb_entry * entry)29 vhost_user_iotlb_remove_notify(struct virtio_net *dev, struct vhost_iotlb_entry *entry)
30 {
31 if (dev->backend_ops->iotlb_remove_notify == NULL)
32 return;
33
34 dev->backend_ops->iotlb_remove_notify(entry->uaddr, entry->uoffset, entry->size);
35 }
36
37 static bool
vhost_user_iotlb_share_page(struct vhost_iotlb_entry * a,struct vhost_iotlb_entry * b)38 vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct vhost_iotlb_entry *b)
39 {
40 uint64_t a_start, a_end, b_start;
41
42 if (a == NULL || b == NULL)
43 return false;
44
45 a_start = a->uaddr + a->uoffset;
46 b_start = b->uaddr + b->uoffset;
47
48 /* Assumes entry a lower than entry b */
49 RTE_ASSERT(a_start < b_start);
50 a_end = RTE_ALIGN_CEIL(a_start + a->size, RTE_BIT64(a->page_shift));
51 b_start = RTE_ALIGN_FLOOR(b_start, RTE_BIT64(b->page_shift));
52
53 return a_end > b_start;
54 }
55
56 static void
vhost_user_iotlb_set_dump(struct virtio_net * dev,struct vhost_iotlb_entry * node)57 vhost_user_iotlb_set_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node)
58 {
59 uint64_t start;
60
61 start = node->uaddr + node->uoffset;
62 mem_set_dump(dev, (void *)(uintptr_t)start, node->size, true, RTE_BIT64(node->page_shift));
63 }
64
65 static void
vhost_user_iotlb_clear_dump(struct virtio_net * dev,struct vhost_iotlb_entry * node,struct vhost_iotlb_entry * prev,struct vhost_iotlb_entry * next)66 vhost_user_iotlb_clear_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node,
67 struct vhost_iotlb_entry *prev, struct vhost_iotlb_entry *next)
68 {
69 uint64_t start, end;
70
71 start = node->uaddr + node->uoffset;
72 end = start + node->size;
73
74 /* Skip first page if shared with previous entry. */
75 if (vhost_user_iotlb_share_page(prev, node))
76 start = RTE_ALIGN_CEIL(start, RTE_BIT64(node->page_shift));
77
78 /* Skip last page if shared with next entry. */
79 if (vhost_user_iotlb_share_page(node, next))
80 end = RTE_ALIGN_FLOOR(end, RTE_BIT64(node->page_shift));
81
82 if (end > start)
83 mem_set_dump(dev, (void *)(uintptr_t)start, end - start, false,
84 RTE_BIT64(node->page_shift));
85 }
86
87 static struct vhost_iotlb_entry *
vhost_user_iotlb_pool_get(struct virtio_net * dev)88 vhost_user_iotlb_pool_get(struct virtio_net *dev)
89 {
90 struct vhost_iotlb_entry *node;
91
92 rte_spinlock_lock(&dev->iotlb_free_lock);
93 node = SLIST_FIRST(&dev->iotlb_free_list);
94 if (node != NULL)
95 SLIST_REMOVE_HEAD(&dev->iotlb_free_list, next_free);
96 rte_spinlock_unlock(&dev->iotlb_free_lock);
97 return node;
98 }
99
100 static void
vhost_user_iotlb_pool_put(struct virtio_net * dev,struct vhost_iotlb_entry * node)101 vhost_user_iotlb_pool_put(struct virtio_net *dev, struct vhost_iotlb_entry *node)
102 {
103 rte_spinlock_lock(&dev->iotlb_free_lock);
104 SLIST_INSERT_HEAD(&dev->iotlb_free_list, node, next_free);
105 rte_spinlock_unlock(&dev->iotlb_free_lock);
106 }
107
108 static void
109 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev);
110
111 static void
vhost_user_iotlb_pending_remove_all(struct virtio_net * dev)112 vhost_user_iotlb_pending_remove_all(struct virtio_net *dev)
113 {
114 struct vhost_iotlb_entry *node, *temp_node;
115
116 rte_rwlock_write_lock(&dev->iotlb_pending_lock);
117
118 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next, temp_node) {
119 TAILQ_REMOVE(&dev->iotlb_pending_list, node, next);
120 vhost_user_iotlb_pool_put(dev, node);
121 }
122
123 rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
124 }
125
126 bool
vhost_user_iotlb_pending_miss(struct virtio_net * dev,uint64_t iova,uint8_t perm)127 vhost_user_iotlb_pending_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
128 {
129 struct vhost_iotlb_entry *node;
130 bool found = false;
131
132 rte_rwlock_read_lock(&dev->iotlb_pending_lock);
133
134 TAILQ_FOREACH(node, &dev->iotlb_pending_list, next) {
135 if ((node->iova == iova) && (node->perm == perm)) {
136 found = true;
137 break;
138 }
139 }
140
141 rte_rwlock_read_unlock(&dev->iotlb_pending_lock);
142
143 return found;
144 }
145
146 void
vhost_user_iotlb_pending_insert(struct virtio_net * dev,uint64_t iova,uint8_t perm)147 vhost_user_iotlb_pending_insert(struct virtio_net *dev, uint64_t iova, uint8_t perm)
148 {
149 struct vhost_iotlb_entry *node;
150
151 node = vhost_user_iotlb_pool_get(dev);
152 if (node == NULL) {
153 VHOST_CONFIG_LOG(dev->ifname, DEBUG,
154 "IOTLB pool empty, clear entries for pending insertion");
155 if (!TAILQ_EMPTY(&dev->iotlb_pending_list))
156 vhost_user_iotlb_pending_remove_all(dev);
157 else
158 vhost_user_iotlb_cache_random_evict(dev);
159 node = vhost_user_iotlb_pool_get(dev);
160 if (node == NULL) {
161 VHOST_CONFIG_LOG(dev->ifname, ERR,
162 "IOTLB pool still empty, pending insertion failure");
163 return;
164 }
165 }
166
167 node->iova = iova;
168 node->perm = perm;
169
170 rte_rwlock_write_lock(&dev->iotlb_pending_lock);
171
172 TAILQ_INSERT_TAIL(&dev->iotlb_pending_list, node, next);
173
174 rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
175 }
176
177 void
vhost_user_iotlb_pending_remove(struct virtio_net * dev,uint64_t iova,uint64_t size,uint8_t perm)178 vhost_user_iotlb_pending_remove(struct virtio_net *dev, uint64_t iova, uint64_t size, uint8_t perm)
179 {
180 struct vhost_iotlb_entry *node, *temp_node;
181
182 rte_rwlock_write_lock(&dev->iotlb_pending_lock);
183
184 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next,
185 temp_node) {
186 if (node->iova < iova)
187 continue;
188 if (node->iova >= iova + size)
189 continue;
190 if ((node->perm & perm) != node->perm)
191 continue;
192 TAILQ_REMOVE(&dev->iotlb_pending_list, node, next);
193 vhost_user_iotlb_pool_put(dev, node);
194 }
195
196 rte_rwlock_write_unlock(&dev->iotlb_pending_lock);
197 }
198
199 static void
vhost_user_iotlb_cache_remove_all(struct virtio_net * dev)200 vhost_user_iotlb_cache_remove_all(struct virtio_net *dev)
201 {
202 struct vhost_iotlb_entry *node, *temp_node;
203
204 vhost_user_iotlb_wr_lock_all(dev);
205
206 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
207 vhost_user_iotlb_clear_dump(dev, node, NULL, NULL);
208
209 TAILQ_REMOVE(&dev->iotlb_list, node, next);
210 vhost_user_iotlb_remove_notify(dev, node);
211 vhost_user_iotlb_pool_put(dev, node);
212 }
213
214 dev->iotlb_cache_nr = 0;
215
216 vhost_user_iotlb_wr_unlock_all(dev);
217 }
218
219 static void
vhost_user_iotlb_cache_random_evict(struct virtio_net * dev)220 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev)
221 {
222 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
223 int entry_idx;
224
225 vhost_user_iotlb_wr_lock_all(dev);
226
227 entry_idx = rte_rand() % dev->iotlb_cache_nr;
228
229 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
230 if (!entry_idx) {
231 struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next);
232
233 vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node);
234
235 TAILQ_REMOVE(&dev->iotlb_list, node, next);
236 vhost_user_iotlb_remove_notify(dev, node);
237 vhost_user_iotlb_pool_put(dev, node);
238 dev->iotlb_cache_nr--;
239 break;
240 }
241 prev_node = node;
242 entry_idx--;
243 }
244
245 vhost_user_iotlb_wr_unlock_all(dev);
246 }
247
248 void
vhost_user_iotlb_cache_insert(struct virtio_net * dev,uint64_t iova,uint64_t uaddr,uint64_t uoffset,uint64_t size,uint64_t page_size,uint8_t perm)249 vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova, uint64_t uaddr,
250 uint64_t uoffset, uint64_t size, uint64_t page_size, uint8_t perm)
251 {
252 struct vhost_iotlb_entry *node, *new_node;
253
254 new_node = vhost_user_iotlb_pool_get(dev);
255 if (new_node == NULL) {
256 VHOST_CONFIG_LOG(dev->ifname, DEBUG,
257 "IOTLB pool empty, clear entries for cache insertion");
258 if (!TAILQ_EMPTY(&dev->iotlb_list))
259 vhost_user_iotlb_cache_random_evict(dev);
260 else
261 vhost_user_iotlb_pending_remove_all(dev);
262 new_node = vhost_user_iotlb_pool_get(dev);
263 if (new_node == NULL) {
264 VHOST_CONFIG_LOG(dev->ifname, ERR,
265 "IOTLB pool still empty, cache insertion failed");
266 return;
267 }
268 }
269
270 new_node->iova = iova;
271 new_node->uaddr = uaddr;
272 new_node->uoffset = uoffset;
273 new_node->size = size;
274 new_node->page_shift = rte_ctz64(page_size);
275 new_node->perm = perm;
276
277 vhost_user_iotlb_wr_lock_all(dev);
278
279 TAILQ_FOREACH(node, &dev->iotlb_list, next) {
280 /*
281 * Entries must be invalidated before being updated.
282 * So if iova already in list, assume identical.
283 */
284 if (node->iova == new_node->iova) {
285 vhost_user_iotlb_pool_put(dev, new_node);
286 goto unlock;
287 } else if (node->iova > new_node->iova) {
288 vhost_user_iotlb_set_dump(dev, new_node);
289
290 TAILQ_INSERT_BEFORE(node, new_node, next);
291 dev->iotlb_cache_nr++;
292 goto unlock;
293 }
294 }
295
296 vhost_user_iotlb_set_dump(dev, new_node);
297
298 TAILQ_INSERT_TAIL(&dev->iotlb_list, new_node, next);
299 dev->iotlb_cache_nr++;
300
301 unlock:
302 vhost_user_iotlb_pending_remove(dev, iova, size, perm);
303
304 vhost_user_iotlb_wr_unlock_all(dev);
305 }
306
307 void
vhost_user_iotlb_cache_remove(struct virtio_net * dev,uint64_t iova,uint64_t size)308 vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t size)
309 {
310 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL;
311
312 if (unlikely(!size))
313 return;
314
315 vhost_user_iotlb_wr_lock_all(dev);
316
317 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
318 /* Sorted list */
319 if (unlikely(iova + size < node->iova))
320 break;
321
322 if (iova < node->iova + node->size) {
323 struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next);
324
325 vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node);
326
327 TAILQ_REMOVE(&dev->iotlb_list, node, next);
328 vhost_user_iotlb_remove_notify(dev, node);
329 vhost_user_iotlb_pool_put(dev, node);
330 dev->iotlb_cache_nr--;
331 } else {
332 prev_node = node;
333 }
334 }
335
336 vhost_user_iotlb_wr_unlock_all(dev);
337 }
338
339 uint64_t
vhost_user_iotlb_cache_find(struct virtio_net * dev,uint64_t iova,uint64_t * size,uint8_t perm)340 vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t iova, uint64_t *size, uint8_t perm)
341 {
342 struct vhost_iotlb_entry *node;
343 uint64_t offset, vva = 0, mapped = 0;
344
345 if (unlikely(!*size))
346 goto out;
347
348 TAILQ_FOREACH(node, &dev->iotlb_list, next) {
349 /* List sorted by iova */
350 if (unlikely(iova < node->iova))
351 break;
352
353 if (iova >= node->iova + node->size)
354 continue;
355
356 if (unlikely((perm & node->perm) != perm)) {
357 vva = 0;
358 break;
359 }
360
361 offset = iova - node->iova;
362 if (!vva)
363 vva = node->uaddr + node->uoffset + offset;
364
365 mapped += node->size - offset;
366 iova = node->iova + node->size;
367
368 if (mapped >= *size)
369 break;
370 }
371
372 out:
373 /* Only part of the requested chunk is mapped */
374 if (unlikely(mapped < *size))
375 *size = mapped;
376
377 return vva;
378 }
379
380 void
vhost_user_iotlb_flush_all(struct virtio_net * dev)381 vhost_user_iotlb_flush_all(struct virtio_net *dev)
382 {
383 vhost_user_iotlb_cache_remove_all(dev);
384 vhost_user_iotlb_pending_remove_all(dev);
385 }
386
387 int
vhost_user_iotlb_init(struct virtio_net * dev)388 vhost_user_iotlb_init(struct virtio_net *dev)
389 {
390 unsigned int i;
391 int socket = 0;
392
393 if (dev->iotlb_pool) {
394 /*
395 * The cache has already been initialized,
396 * just drop all cached and pending entries.
397 */
398 vhost_user_iotlb_flush_all(dev);
399 rte_free(dev->iotlb_pool);
400 }
401
402 #ifdef RTE_LIBRTE_VHOST_NUMA
403 if (get_mempolicy(&socket, NULL, 0, dev, MPOL_F_NODE | MPOL_F_ADDR) != 0)
404 socket = 0;
405 #endif
406
407 rte_spinlock_init(&dev->iotlb_free_lock);
408 rte_rwlock_init(&dev->iotlb_pending_lock);
409
410 SLIST_INIT(&dev->iotlb_free_list);
411 TAILQ_INIT(&dev->iotlb_list);
412 TAILQ_INIT(&dev->iotlb_pending_list);
413
414 if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) {
415 dev->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
416 sizeof(struct vhost_iotlb_entry), 0, socket);
417 if (!dev->iotlb_pool) {
418 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to create IOTLB cache pool");
419 return -1;
420 }
421 for (i = 0; i < IOTLB_CACHE_SIZE; i++)
422 vhost_user_iotlb_pool_put(dev, &dev->iotlb_pool[i]);
423 }
424
425 dev->iotlb_cache_nr = 0;
426
427 return 0;
428 }
429
430 void
vhost_user_iotlb_destroy(struct virtio_net * dev)431 vhost_user_iotlb_destroy(struct virtio_net *dev)
432 {
433 rte_free(dev->iotlb_pool);
434 }
435