1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2017 Red Hat, Inc. 3 */ 4 5 #ifdef RTE_LIBRTE_VHOST_NUMA 6 #include <numaif.h> 7 #endif 8 9 #include <rte_tailq.h> 10 11 #include "iotlb.h" 12 #include "vhost.h" 13 14 struct vhost_iotlb_entry { 15 TAILQ_ENTRY(vhost_iotlb_entry) next; 16 SLIST_ENTRY(vhost_iotlb_entry) next_free; 17 18 uint64_t iova; 19 uint64_t uaddr; 20 uint64_t uoffset; 21 uint64_t size; 22 uint8_t page_shift; 23 uint8_t perm; 24 }; 25 26 #define IOTLB_CACHE_SIZE 2048 27 28 static void 29 vhost_user_iotlb_remove_notify(struct virtio_net *dev, struct vhost_iotlb_entry *entry) 30 { 31 if (dev->backend_ops->iotlb_remove_notify == NULL) 32 return; 33 34 dev->backend_ops->iotlb_remove_notify(entry->uaddr, entry->uoffset, entry->size); 35 } 36 37 static bool 38 vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct vhost_iotlb_entry *b) 39 { 40 uint64_t a_start, a_end, b_start; 41 42 if (a == NULL || b == NULL) 43 return false; 44 45 a_start = a->uaddr + a->uoffset; 46 b_start = b->uaddr + b->uoffset; 47 48 /* Assumes entry a lower than entry b */ 49 RTE_ASSERT(a_start < b_start); 50 a_end = RTE_ALIGN_CEIL(a_start + a->size, RTE_BIT64(a->page_shift)); 51 b_start = RTE_ALIGN_FLOOR(b_start, RTE_BIT64(b->page_shift)); 52 53 return a_end > b_start; 54 } 55 56 static void 57 vhost_user_iotlb_set_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node) 58 { 59 uint64_t start; 60 61 start = node->uaddr + node->uoffset; 62 mem_set_dump(dev, (void *)(uintptr_t)start, node->size, true, RTE_BIT64(node->page_shift)); 63 } 64 65 static void 66 vhost_user_iotlb_clear_dump(struct virtio_net *dev, struct vhost_iotlb_entry *node, 67 struct vhost_iotlb_entry *prev, struct vhost_iotlb_entry *next) 68 { 69 uint64_t start, end; 70 71 start = node->uaddr + node->uoffset; 72 end = start + node->size; 73 74 /* Skip first page if shared with previous entry. */ 75 if (vhost_user_iotlb_share_page(prev, node)) 76 start = RTE_ALIGN_CEIL(start, RTE_BIT64(node->page_shift)); 77 78 /* Skip last page if shared with next entry. */ 79 if (vhost_user_iotlb_share_page(node, next)) 80 end = RTE_ALIGN_FLOOR(end, RTE_BIT64(node->page_shift)); 81 82 if (end > start) 83 mem_set_dump(dev, (void *)(uintptr_t)start, end - start, false, 84 RTE_BIT64(node->page_shift)); 85 } 86 87 static struct vhost_iotlb_entry * 88 vhost_user_iotlb_pool_get(struct virtio_net *dev) 89 { 90 struct vhost_iotlb_entry *node; 91 92 rte_spinlock_lock(&dev->iotlb_free_lock); 93 node = SLIST_FIRST(&dev->iotlb_free_list); 94 if (node != NULL) 95 SLIST_REMOVE_HEAD(&dev->iotlb_free_list, next_free); 96 rte_spinlock_unlock(&dev->iotlb_free_lock); 97 return node; 98 } 99 100 static void 101 vhost_user_iotlb_pool_put(struct virtio_net *dev, struct vhost_iotlb_entry *node) 102 { 103 rte_spinlock_lock(&dev->iotlb_free_lock); 104 SLIST_INSERT_HEAD(&dev->iotlb_free_list, node, next_free); 105 rte_spinlock_unlock(&dev->iotlb_free_lock); 106 } 107 108 static void 109 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev); 110 111 static void 112 vhost_user_iotlb_pending_remove_all(struct virtio_net *dev) 113 { 114 struct vhost_iotlb_entry *node, *temp_node; 115 116 rte_rwlock_write_lock(&dev->iotlb_pending_lock); 117 118 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next, temp_node) { 119 TAILQ_REMOVE(&dev->iotlb_pending_list, node, next); 120 vhost_user_iotlb_pool_put(dev, node); 121 } 122 123 rte_rwlock_write_unlock(&dev->iotlb_pending_lock); 124 } 125 126 bool 127 vhost_user_iotlb_pending_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) 128 { 129 struct vhost_iotlb_entry *node; 130 bool found = false; 131 132 rte_rwlock_read_lock(&dev->iotlb_pending_lock); 133 134 TAILQ_FOREACH(node, &dev->iotlb_pending_list, next) { 135 if ((node->iova == iova) && (node->perm == perm)) { 136 found = true; 137 break; 138 } 139 } 140 141 rte_rwlock_read_unlock(&dev->iotlb_pending_lock); 142 143 return found; 144 } 145 146 void 147 vhost_user_iotlb_pending_insert(struct virtio_net *dev, uint64_t iova, uint8_t perm) 148 { 149 struct vhost_iotlb_entry *node; 150 151 node = vhost_user_iotlb_pool_get(dev); 152 if (node == NULL) { 153 VHOST_CONFIG_LOG(dev->ifname, DEBUG, 154 "IOTLB pool empty, clear entries for pending insertion"); 155 if (!TAILQ_EMPTY(&dev->iotlb_pending_list)) 156 vhost_user_iotlb_pending_remove_all(dev); 157 else 158 vhost_user_iotlb_cache_random_evict(dev); 159 node = vhost_user_iotlb_pool_get(dev); 160 if (node == NULL) { 161 VHOST_CONFIG_LOG(dev->ifname, ERR, 162 "IOTLB pool still empty, pending insertion failure"); 163 return; 164 } 165 } 166 167 node->iova = iova; 168 node->perm = perm; 169 170 rte_rwlock_write_lock(&dev->iotlb_pending_lock); 171 172 TAILQ_INSERT_TAIL(&dev->iotlb_pending_list, node, next); 173 174 rte_rwlock_write_unlock(&dev->iotlb_pending_lock); 175 } 176 177 void 178 vhost_user_iotlb_pending_remove(struct virtio_net *dev, uint64_t iova, uint64_t size, uint8_t perm) 179 { 180 struct vhost_iotlb_entry *node, *temp_node; 181 182 rte_rwlock_write_lock(&dev->iotlb_pending_lock); 183 184 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_pending_list, next, 185 temp_node) { 186 if (node->iova < iova) 187 continue; 188 if (node->iova >= iova + size) 189 continue; 190 if ((node->perm & perm) != node->perm) 191 continue; 192 TAILQ_REMOVE(&dev->iotlb_pending_list, node, next); 193 vhost_user_iotlb_pool_put(dev, node); 194 } 195 196 rte_rwlock_write_unlock(&dev->iotlb_pending_lock); 197 } 198 199 static void 200 vhost_user_iotlb_cache_remove_all(struct virtio_net *dev) 201 { 202 struct vhost_iotlb_entry *node, *temp_node; 203 204 vhost_user_iotlb_wr_lock_all(dev); 205 206 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) { 207 vhost_user_iotlb_clear_dump(dev, node, NULL, NULL); 208 209 TAILQ_REMOVE(&dev->iotlb_list, node, next); 210 vhost_user_iotlb_remove_notify(dev, node); 211 vhost_user_iotlb_pool_put(dev, node); 212 } 213 214 dev->iotlb_cache_nr = 0; 215 216 vhost_user_iotlb_wr_unlock_all(dev); 217 } 218 219 static void 220 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev) 221 { 222 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL; 223 int entry_idx; 224 225 vhost_user_iotlb_wr_lock_all(dev); 226 227 entry_idx = rte_rand() % dev->iotlb_cache_nr; 228 229 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) { 230 if (!entry_idx) { 231 struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next); 232 233 vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node); 234 235 TAILQ_REMOVE(&dev->iotlb_list, node, next); 236 vhost_user_iotlb_remove_notify(dev, node); 237 vhost_user_iotlb_pool_put(dev, node); 238 dev->iotlb_cache_nr--; 239 break; 240 } 241 prev_node = node; 242 entry_idx--; 243 } 244 245 vhost_user_iotlb_wr_unlock_all(dev); 246 } 247 248 void 249 vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova, uint64_t uaddr, 250 uint64_t uoffset, uint64_t size, uint64_t page_size, uint8_t perm) 251 { 252 struct vhost_iotlb_entry *node, *new_node; 253 254 new_node = vhost_user_iotlb_pool_get(dev); 255 if (new_node == NULL) { 256 VHOST_CONFIG_LOG(dev->ifname, DEBUG, 257 "IOTLB pool empty, clear entries for cache insertion"); 258 if (!TAILQ_EMPTY(&dev->iotlb_list)) 259 vhost_user_iotlb_cache_random_evict(dev); 260 else 261 vhost_user_iotlb_pending_remove_all(dev); 262 new_node = vhost_user_iotlb_pool_get(dev); 263 if (new_node == NULL) { 264 VHOST_CONFIG_LOG(dev->ifname, ERR, 265 "IOTLB pool still empty, cache insertion failed"); 266 return; 267 } 268 } 269 270 new_node->iova = iova; 271 new_node->uaddr = uaddr; 272 new_node->uoffset = uoffset; 273 new_node->size = size; 274 new_node->page_shift = rte_ctz64(page_size); 275 new_node->perm = perm; 276 277 vhost_user_iotlb_wr_lock_all(dev); 278 279 TAILQ_FOREACH(node, &dev->iotlb_list, next) { 280 /* 281 * Entries must be invalidated before being updated. 282 * So if iova already in list, assume identical. 283 */ 284 if (node->iova == new_node->iova) { 285 vhost_user_iotlb_pool_put(dev, new_node); 286 goto unlock; 287 } else if (node->iova > new_node->iova) { 288 vhost_user_iotlb_set_dump(dev, new_node); 289 290 TAILQ_INSERT_BEFORE(node, new_node, next); 291 dev->iotlb_cache_nr++; 292 goto unlock; 293 } 294 } 295 296 vhost_user_iotlb_set_dump(dev, new_node); 297 298 TAILQ_INSERT_TAIL(&dev->iotlb_list, new_node, next); 299 dev->iotlb_cache_nr++; 300 301 unlock: 302 vhost_user_iotlb_pending_remove(dev, iova, size, perm); 303 304 vhost_user_iotlb_wr_unlock_all(dev); 305 } 306 307 void 308 vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova, uint64_t size) 309 { 310 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL; 311 312 if (unlikely(!size)) 313 return; 314 315 vhost_user_iotlb_wr_lock_all(dev); 316 317 RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) { 318 /* Sorted list */ 319 if (unlikely(iova + size < node->iova)) 320 break; 321 322 if (iova < node->iova + node->size) { 323 struct vhost_iotlb_entry *next_node = RTE_TAILQ_NEXT(node, next); 324 325 vhost_user_iotlb_clear_dump(dev, node, prev_node, next_node); 326 327 TAILQ_REMOVE(&dev->iotlb_list, node, next); 328 vhost_user_iotlb_remove_notify(dev, node); 329 vhost_user_iotlb_pool_put(dev, node); 330 dev->iotlb_cache_nr--; 331 } else { 332 prev_node = node; 333 } 334 } 335 336 vhost_user_iotlb_wr_unlock_all(dev); 337 } 338 339 uint64_t 340 vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t iova, uint64_t *size, uint8_t perm) 341 { 342 struct vhost_iotlb_entry *node; 343 uint64_t offset, vva = 0, mapped = 0; 344 345 if (unlikely(!*size)) 346 goto out; 347 348 TAILQ_FOREACH(node, &dev->iotlb_list, next) { 349 /* List sorted by iova */ 350 if (unlikely(iova < node->iova)) 351 break; 352 353 if (iova >= node->iova + node->size) 354 continue; 355 356 if (unlikely((perm & node->perm) != perm)) { 357 vva = 0; 358 break; 359 } 360 361 offset = iova - node->iova; 362 if (!vva) 363 vva = node->uaddr + node->uoffset + offset; 364 365 mapped += node->size - offset; 366 iova = node->iova + node->size; 367 368 if (mapped >= *size) 369 break; 370 } 371 372 out: 373 /* Only part of the requested chunk is mapped */ 374 if (unlikely(mapped < *size)) 375 *size = mapped; 376 377 return vva; 378 } 379 380 void 381 vhost_user_iotlb_flush_all(struct virtio_net *dev) 382 { 383 vhost_user_iotlb_cache_remove_all(dev); 384 vhost_user_iotlb_pending_remove_all(dev); 385 } 386 387 int 388 vhost_user_iotlb_init(struct virtio_net *dev) 389 { 390 unsigned int i; 391 int socket = 0; 392 393 if (dev->iotlb_pool) { 394 /* 395 * The cache has already been initialized, 396 * just drop all cached and pending entries. 397 */ 398 vhost_user_iotlb_flush_all(dev); 399 rte_free(dev->iotlb_pool); 400 } 401 402 #ifdef RTE_LIBRTE_VHOST_NUMA 403 if (get_mempolicy(&socket, NULL, 0, dev, MPOL_F_NODE | MPOL_F_ADDR) != 0) 404 socket = 0; 405 #endif 406 407 rte_spinlock_init(&dev->iotlb_free_lock); 408 rte_rwlock_init(&dev->iotlb_pending_lock); 409 410 SLIST_INIT(&dev->iotlb_free_list); 411 TAILQ_INIT(&dev->iotlb_list); 412 TAILQ_INIT(&dev->iotlb_pending_list); 413 414 if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) { 415 dev->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE, 416 sizeof(struct vhost_iotlb_entry), 0, socket); 417 if (!dev->iotlb_pool) { 418 VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to create IOTLB cache pool"); 419 return -1; 420 } 421 for (i = 0; i < IOTLB_CACHE_SIZE; i++) 422 vhost_user_iotlb_pool_put(dev, &dev->iotlb_pool[i]); 423 } 424 425 dev->iotlb_cache_nr = 0; 426 427 return 0; 428 } 429 430 void 431 vhost_user_iotlb_destroy(struct virtio_net *dev) 432 { 433 rte_free(dev->iotlb_pool); 434 } 435