1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2017 Red Hat, Inc. 3 */ 4 5 #ifdef RTE_LIBRTE_VHOST_NUMA 6 #include <numaif.h> 7 #endif 8 9 #include <rte_tailq.h> 10 11 #include "iotlb.h" 12 #include "vhost.h" 13 14 struct vhost_iotlb_entry { 15 TAILQ_ENTRY(vhost_iotlb_entry) next; 16 SLIST_ENTRY(vhost_iotlb_entry) next_free; 17 18 uint64_t iova; 19 uint64_t uaddr; 20 uint64_t size; 21 uint8_t perm; 22 }; 23 24 #define IOTLB_CACHE_SIZE 2048 25 26 static bool 27 vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct vhost_iotlb_entry *b, 28 uint64_t align) 29 { 30 uint64_t a_end, b_start; 31 32 if (a == NULL || b == NULL) 33 return false; 34 35 /* Assumes entry a lower than entry b */ 36 RTE_ASSERT(a->uaddr < b->uaddr); 37 a_end = RTE_ALIGN_CEIL(a->uaddr + a->size, align); 38 b_start = RTE_ALIGN_FLOOR(b->uaddr, align); 39 40 return a_end > b_start; 41 } 42 43 static void 44 vhost_user_iotlb_set_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node) 45 { 46 uint64_t align; 47 48 align = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr); 49 50 mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, true, align); 51 } 52 53 static void 54 vhost_user_iotlb_clear_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node, 55 struct vhost_iotlb_entry *prev, struct vhost_iotlb_entry *next) 56 { 57 uint64_t align, start, end; 58 59 start = node->uaddr; 60 end = node->uaddr + node->size; 61 62 align = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr); 63 64 /* Skip first page if shared with previous entry. */ 65 if (vhost_user_iotlb_share_page(prev, node, align)) 66 start = RTE_ALIGN_CEIL(start, align); 67 68 /* Skip last page if shared with next entry. */ 69 if (vhost_user_iotlb_share_page(node, next, align)) 70 end = RTE_ALIGN_FLOOR(end, align); 71 72 if (end > start) 73 mem_set_dump((void *)(uintptr_t)start, end - start, false, align); 74 } 75 76 static struct vhost_iotlb_entry * 77 vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq) 78 { 79 struct vhost_iotlb_entry *node; 80 81 rte_spinlock_lock(&vq->iotlb_free_lock); 82 node = SLIST_FIRST(&vq->iotlb_free_list); 83 if (node != NULL) 84 SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free); 85 rte_spinlock_unlock(&vq->iotlb_free_lock); 86 return node; 87 } 88 89 static void 90 vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq, 91 struct vhost_iotlb_entry *node) 92 { 93 rte_spinlock_lock(&vq->iotlb_free_lock); 94 SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free); 95 rte_spinlock_unlock(&vq->iotlb_free_lock); 96 } 97 98 static void 99 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq); 100 101 static void 102 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) 103 { 104 struct vhost_iotlb_entry *node, *temp_node; 105 106 rte_rwlock_write_lock(&vq->iotlb_pending_lock); 107 108 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { 109 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); 110 vhost_user_iotlb_pool_put(vq, node); 111 } 112 113 rte_rwlock_write_unlock(&vq->iotlb_pending_lock); 114 } 115 116 bool 117 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova, 118 uint8_t perm) 119 { 120 struct vhost_iotlb_entry *node; 121 bool found = false; 122 123 rte_rwlock_read_lock(&vq->iotlb_pending_lock); 124 125 TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) { 126 if ((node->iova == iova) && (node->perm == perm)) { 127 found = true; 128 break; 129 } 130 } 131 132 rte_rwlock_read_unlock(&vq->iotlb_pending_lock); 133 134 return found; 135 } 136 137 void 138 vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq, 139 uint64_t iova, uint8_t perm) 140 { 141 struct vhost_iotlb_entry *node; 142 143 node = vhost_user_iotlb_pool_get(vq); 144 if (node == NULL) { 145 VHOST_LOG_CONFIG(dev->ifname, DEBUG, 146 "IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n", 147 vq->index); 148 if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) 149 vhost_user_iotlb_pending_remove_all(vq); 150 else 151 vhost_user_iotlb_cache_random_evict(dev, vq); 152 node = vhost_user_iotlb_pool_get(vq); 153 if (node == NULL) { 154 VHOST_LOG_CONFIG(dev->ifname, ERR, 155 "IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n", 156 vq->index); 157 return; 158 } 159 } 160 161 node->iova = iova; 162 node->perm = perm; 163 164 rte_rwlock_write_lock(&vq->iotlb_pending_lock); 165 166 TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next); 167 168 rte_rwlock_write_unlock(&vq->iotlb_pending_lock); 169 } 170 171 void 172 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, 173 uint64_t iova, uint64_t size, uint8_t perm) 174 { 175 struct vhost_iotlb_entry *node, *temp_node; 176 177 rte_rwlock_write_lock(&vq->iotlb_pending_lock); 178 179 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, 180 temp_node) { 181 if (node->iova < iova) 182 continue; 183 if (node->iova >= iova + size) 184 continue; 185 if ((node->perm & perm) != node->perm) 186 continue; 187 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); 188 vhost_user_iotlb_pool_put(vq, node); 189 } 190 191 rte_rwlock_write_unlock(&vq->iotlb_pending_lock); 192 } 193 194 static void 195 vhost_user_iotlb_cache_remove_all(struct virtio_net *dev, struct vhost_virtqueue *vq) 196 { 197 struct vhost_iotlb_entry *node, *temp_node; 198 199 rte_rwlock_write_lock(&vq->iotlb_lock); 200 201 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { 202 vhost_user_iotlb_clear_dump(dev, node, NULL, NULL); 203 204 TAILQ_REMOVE(&vq->iotlb_list, node, next); 205 vhost_user_iotlb_pool_put(vq, node); 206 } 207 208 vq->iotlb_cache_nr = 0; 209 210 rte_rwlock_write_unlock(&vq->iotlb_lock); 211 } 212 213 static void 214 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq) 215 { 216 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL; 217 int entry_idx; 218 219 rte_rwlock_write_lock(&vq->iotlb_lock); 220 221 entry_idx = rte_rand() % vq->iotlb_cache_nr; 222 223 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { 224 if (!entry_idx) { 225 struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next); 226 227 vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node); 228 229 TAILQ_REMOVE(&vq->iotlb_list, node, next); 230 vhost_user_iotlb_pool_put(vq, node); 231 vq->iotlb_cache_nr--; 232 break; 233 } 234 prev_node = node; 235 entry_idx--; 236 } 237 238 rte_rwlock_write_unlock(&vq->iotlb_lock); 239 } 240 241 void 242 vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq, 243 uint64_t iova, uint64_t uaddr, 244 uint64_t size, uint8_t perm) 245 { 246 struct vhost_iotlb_entry *node, *new_node; 247 248 new_node = vhost_user_iotlb_pool_get(vq); 249 if (new_node == NULL) { 250 VHOST_LOG_CONFIG(dev->ifname, DEBUG, 251 "IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n", 252 vq->index); 253 if (!TAILQ_EMPTY(&vq->iotlb_list)) 254 vhost_user_iotlb_cache_random_evict(dev, vq); 255 else 256 vhost_user_iotlb_pending_remove_all(vq); 257 new_node = vhost_user_iotlb_pool_get(vq); 258 if (new_node == NULL) { 259 VHOST_LOG_CONFIG(dev->ifname, ERR, 260 "IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n", 261 vq->index); 262 return; 263 } 264 } 265 266 new_node->iova = iova; 267 new_node->uaddr = uaddr; 268 new_node->size = size; 269 new_node->perm = perm; 270 271 rte_rwlock_write_lock(&vq->iotlb_lock); 272 273 TAILQ_FOREACH(node, &vq->iotlb_list, next) { 274 /* 275 * Entries must be invalidated before being updated. 276 * So if iova already in list, assume identical. 277 */ 278 if (node->iova == new_node->iova) { 279 vhost_user_iotlb_pool_put(vq, new_node); 280 goto unlock; 281 } else if (node->iova > new_node->iova) { 282 vhost_user_iotlb_set_dump(dev, new_node); 283 284 TAILQ_INSERT_BEFORE(node, new_node, next); 285 vq->iotlb_cache_nr++; 286 goto unlock; 287 } 288 } 289 290 vhost_user_iotlb_set_dump(dev, new_node); 291 292 TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next); 293 vq->iotlb_cache_nr++; 294 295 unlock: 296 vhost_user_iotlb_pending_remove(vq, iova, size, perm); 297 298 rte_rwlock_write_unlock(&vq->iotlb_lock); 299 300 } 301 302 void 303 vhost_user_iotlb_cache_remove(struct virtio_net *dev, struct vhost_virtqueue *vq, 304 uint64_t iova, uint64_t size) 305 { 306 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL; 307 308 if (unlikely(!size)) 309 return; 310 311 rte_rwlock_write_lock(&vq->iotlb_lock); 312 313 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { 314 /* Sorted list */ 315 if (unlikely(iova + size < node->iova)) 316 break; 317 318 if (iova < node->iova + node->size) { 319 struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next); 320 321 vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node); 322 323 TAILQ_REMOVE(&vq->iotlb_list, node, next); 324 vhost_user_iotlb_pool_put(vq, node); 325 vq->iotlb_cache_nr--; 326 } else 327 prev_node = node; 328 } 329 330 rte_rwlock_write_unlock(&vq->iotlb_lock); 331 } 332 333 uint64_t 334 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova, 335 uint64_t *size, uint8_t perm) 336 { 337 struct vhost_iotlb_entry *node; 338 uint64_t offset, vva = 0, mapped = 0; 339 340 if (unlikely(!*size)) 341 goto out; 342 343 TAILQ_FOREACH(node, &vq->iotlb_list, next) { 344 /* List sorted by iova */ 345 if (unlikely(iova < node->iova)) 346 break; 347 348 if (iova >= node->iova + node->size) 349 continue; 350 351 if (unlikely((perm & node->perm) != perm)) { 352 vva = 0; 353 break; 354 } 355 356 offset = iova - node->iova; 357 if (!vva) 358 vva = node->uaddr + offset; 359 360 mapped += node->size - offset; 361 iova = node->iova + node->size; 362 363 if (mapped >= *size) 364 break; 365 } 366 367 out: 368 /* Only part of the requested chunk is mapped */ 369 if (unlikely(mapped < *size)) 370 *size = mapped; 371 372 return vva; 373 } 374 375 void 376 vhost_user_iotlb_flush_all(struct virtio_net *dev, struct vhost_virtqueue *vq) 377 { 378 vhost_user_iotlb_cache_remove_all(dev, vq); 379 vhost_user_iotlb_pending_remove_all(vq); 380 } 381 382 int 383 vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) 384 { 385 unsigned int i; 386 int socket = 0; 387 388 if (vq->iotlb_pool) { 389 /* 390 * The cache has already been initialized, 391 * just drop all cached and pending entries. 392 */ 393 vhost_user_iotlb_flush_all(dev, vq); 394 rte_free(vq->iotlb_pool); 395 } 396 397 #ifdef RTE_LIBRTE_VHOST_NUMA 398 if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0) 399 socket = 0; 400 #endif 401 402 rte_spinlock_init(&vq->iotlb_free_lock); 403 rte_rwlock_init(&vq->iotlb_lock); 404 rte_rwlock_init(&vq->iotlb_pending_lock); 405 406 SLIST_INIT(&vq->iotlb_free_list); 407 TAILQ_INIT(&vq->iotlb_list); 408 TAILQ_INIT(&vq->iotlb_pending_list); 409 410 if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) { 411 vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE, 412 sizeof(struct vhost_iotlb_entry), 0, socket); 413 if (!vq->iotlb_pool) { 414 VHOST_LOG_CONFIG(dev->ifname, ERR, 415 "Failed to create IOTLB cache pool for vq %"PRIu32"\n", 416 vq->index); 417 return -1; 418 } 419 for (i = 0; i < IOTLB_CACHE_SIZE; i++) 420 vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]); 421 } 422 423 vq->iotlb_cache_nr = 0; 424 425 return 0; 426 } 427 428 void 429 vhost_user_iotlb_destroy(struct vhost_virtqueue *vq) 430 { 431 rte_free(vq->iotlb_pool); 432 } 433