1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2017 Red Hat, Inc. 3 */ 4 5 #ifdef RTE_LIBRTE_VHOST_NUMA 6 #include <numaif.h> 7 #endif 8 9 #include <rte_tailq.h> 10 11 #include "iotlb.h" 12 #include "vhost.h" 13 14 struct vhost_iotlb_entry { 15 TAILQ_ENTRY(vhost_iotlb_entry) next; 16 SLIST_ENTRY(vhost_iotlb_entry) next_free; 17 18 uint64_t iova; 19 uint64_t uaddr; 20 uint64_t size; 21 uint8_t perm; 22 }; 23 24 #define IOTLB_CACHE_SIZE 2048 25 26 static struct vhost_iotlb_entry * 27 vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq) 28 { 29 struct vhost_iotlb_entry *node; 30 31 rte_spinlock_lock(&vq->iotlb_free_lock); 32 node = SLIST_FIRST(&vq->iotlb_free_list); 33 if (node != NULL) 34 SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free); 35 rte_spinlock_unlock(&vq->iotlb_free_lock); 36 return node; 37 } 38 39 static void 40 vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq, 41 struct vhost_iotlb_entry *node) 42 { 43 rte_spinlock_lock(&vq->iotlb_free_lock); 44 SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free); 45 rte_spinlock_unlock(&vq->iotlb_free_lock); 46 } 47 48 static void 49 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq); 50 51 static void 52 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) 53 { 54 struct vhost_iotlb_entry *node, *temp_node; 55 56 rte_rwlock_write_lock(&vq->iotlb_pending_lock); 57 58 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { 59 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); 60 vhost_user_iotlb_pool_put(vq, node); 61 } 62 63 rte_rwlock_write_unlock(&vq->iotlb_pending_lock); 64 } 65 66 bool 67 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova, 68 uint8_t perm) 69 { 70 struct vhost_iotlb_entry *node; 71 bool found = false; 72 73 rte_rwlock_read_lock(&vq->iotlb_pending_lock); 74 75 TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) { 76 if ((node->iova == iova) && (node->perm == perm)) { 77 found = true; 78 break; 79 } 80 } 81 82 rte_rwlock_read_unlock(&vq->iotlb_pending_lock); 83 84 return found; 85 } 86 87 void 88 vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq, 89 uint64_t iova, uint8_t perm) 90 { 91 struct vhost_iotlb_entry *node; 92 93 node = vhost_user_iotlb_pool_get(vq); 94 if (node == NULL) { 95 VHOST_LOG_CONFIG(dev->ifname, DEBUG, 96 "IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n", 97 vq->index); 98 if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) 99 vhost_user_iotlb_pending_remove_all(vq); 100 else 101 vhost_user_iotlb_cache_random_evict(dev, vq); 102 node = vhost_user_iotlb_pool_get(vq); 103 if (node == NULL) { 104 VHOST_LOG_CONFIG(dev->ifname, ERR, 105 "IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n", 106 vq->index); 107 return; 108 } 109 } 110 111 node->iova = iova; 112 node->perm = perm; 113 114 rte_rwlock_write_lock(&vq->iotlb_pending_lock); 115 116 TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next); 117 118 rte_rwlock_write_unlock(&vq->iotlb_pending_lock); 119 } 120 121 void 122 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, 123 uint64_t iova, uint64_t size, uint8_t perm) 124 { 125 struct vhost_iotlb_entry *node, *temp_node; 126 127 rte_rwlock_write_lock(&vq->iotlb_pending_lock); 128 129 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, 130 temp_node) { 131 if (node->iova < iova) 132 continue; 133 if (node->iova >= iova + size) 134 continue; 135 if ((node->perm & perm) != node->perm) 136 continue; 137 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); 138 vhost_user_iotlb_pool_put(vq, node); 139 } 140 141 rte_rwlock_write_unlock(&vq->iotlb_pending_lock); 142 } 143 144 static void 145 vhost_user_iotlb_cache_remove_all(struct virtio_net *dev, struct vhost_virtqueue *vq) 146 { 147 struct vhost_iotlb_entry *node, *temp_node; 148 149 rte_rwlock_write_lock(&vq->iotlb_lock); 150 151 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { 152 mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, false, 153 hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr)); 154 TAILQ_REMOVE(&vq->iotlb_list, node, next); 155 vhost_user_iotlb_pool_put(vq, node); 156 } 157 158 vq->iotlb_cache_nr = 0; 159 160 rte_rwlock_write_unlock(&vq->iotlb_lock); 161 } 162 163 static void 164 vhost_user_iotlb_cache_random_evict(struct virtio_net *dev, struct vhost_virtqueue *vq) 165 { 166 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL; 167 uint64_t alignment, mask; 168 int entry_idx; 169 170 rte_rwlock_write_lock(&vq->iotlb_lock); 171 172 entry_idx = rte_rand() % vq->iotlb_cache_nr; 173 174 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { 175 if (!entry_idx) { 176 struct vhost_iotlb_entry *next_node; 177 alignment = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr); 178 mask = ~(alignment - 1); 179 180 /* Don't disable coredump if the previous node is in the same page */ 181 if (prev_node == NULL || 182 (node->uaddr & mask) != (prev_node->uaddr & mask)) { 183 next_node = RTE_TAILQ_NEXT(node, next); 184 /* Don't disable coredump if the next node is in the same page */ 185 if (next_node == NULL || ((node->uaddr + node->size - 1) & mask) != 186 (next_node->uaddr & mask)) 187 mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, 188 false, alignment); 189 } 190 TAILQ_REMOVE(&vq->iotlb_list, node, next); 191 vhost_user_iotlb_pool_put(vq, node); 192 vq->iotlb_cache_nr--; 193 break; 194 } 195 prev_node = node; 196 entry_idx--; 197 } 198 199 rte_rwlock_write_unlock(&vq->iotlb_lock); 200 } 201 202 void 203 vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq, 204 uint64_t iova, uint64_t uaddr, 205 uint64_t size, uint8_t perm) 206 { 207 struct vhost_iotlb_entry *node, *new_node; 208 209 new_node = vhost_user_iotlb_pool_get(vq); 210 if (new_node == NULL) { 211 VHOST_LOG_CONFIG(dev->ifname, DEBUG, 212 "IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n", 213 vq->index); 214 if (!TAILQ_EMPTY(&vq->iotlb_list)) 215 vhost_user_iotlb_cache_random_evict(dev, vq); 216 else 217 vhost_user_iotlb_pending_remove_all(vq); 218 new_node = vhost_user_iotlb_pool_get(vq); 219 if (new_node == NULL) { 220 VHOST_LOG_CONFIG(dev->ifname, ERR, 221 "IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n", 222 vq->index); 223 return; 224 } 225 } 226 227 new_node->iova = iova; 228 new_node->uaddr = uaddr; 229 new_node->size = size; 230 new_node->perm = perm; 231 232 rte_rwlock_write_lock(&vq->iotlb_lock); 233 234 TAILQ_FOREACH(node, &vq->iotlb_list, next) { 235 /* 236 * Entries must be invalidated before being updated. 237 * So if iova already in list, assume identical. 238 */ 239 if (node->iova == new_node->iova) { 240 vhost_user_iotlb_pool_put(vq, new_node); 241 goto unlock; 242 } else if (node->iova > new_node->iova) { 243 mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true, 244 hua_to_alignment(dev->mem, (void *)(uintptr_t)new_node->uaddr)); 245 TAILQ_INSERT_BEFORE(node, new_node, next); 246 vq->iotlb_cache_nr++; 247 goto unlock; 248 } 249 } 250 251 mem_set_dump((void *)(uintptr_t)new_node->uaddr, new_node->size, true, 252 hua_to_alignment(dev->mem, (void *)(uintptr_t)new_node->uaddr)); 253 TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next); 254 vq->iotlb_cache_nr++; 255 256 unlock: 257 vhost_user_iotlb_pending_remove(vq, iova, size, perm); 258 259 rte_rwlock_write_unlock(&vq->iotlb_lock); 260 261 } 262 263 void 264 vhost_user_iotlb_cache_remove(struct virtio_net *dev, struct vhost_virtqueue *vq, 265 uint64_t iova, uint64_t size) 266 { 267 struct vhost_iotlb_entry *node, *temp_node, *prev_node = NULL; 268 uint64_t alignment, mask; 269 270 if (unlikely(!size)) 271 return; 272 273 rte_rwlock_write_lock(&vq->iotlb_lock); 274 275 RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { 276 /* Sorted list */ 277 if (unlikely(iova + size < node->iova)) 278 break; 279 280 if (iova < node->iova + node->size) { 281 struct vhost_iotlb_entry *next_node; 282 alignment = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr); 283 mask = ~(alignment-1); 284 285 /* Don't disable coredump if the previous node is in the same page */ 286 if (prev_node == NULL || 287 (node->uaddr & mask) != (prev_node->uaddr & mask)) { 288 next_node = RTE_TAILQ_NEXT(node, next); 289 /* Don't disable coredump if the next node is in the same page */ 290 if (next_node == NULL || ((node->uaddr + node->size - 1) & mask) != 291 (next_node->uaddr & mask)) 292 mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, 293 false, alignment); 294 } 295 296 TAILQ_REMOVE(&vq->iotlb_list, node, next); 297 vhost_user_iotlb_pool_put(vq, node); 298 vq->iotlb_cache_nr--; 299 } else 300 prev_node = node; 301 } 302 303 rte_rwlock_write_unlock(&vq->iotlb_lock); 304 } 305 306 uint64_t 307 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova, 308 uint64_t *size, uint8_t perm) 309 { 310 struct vhost_iotlb_entry *node; 311 uint64_t offset, vva = 0, mapped = 0; 312 313 if (unlikely(!*size)) 314 goto out; 315 316 TAILQ_FOREACH(node, &vq->iotlb_list, next) { 317 /* List sorted by iova */ 318 if (unlikely(iova < node->iova)) 319 break; 320 321 if (iova >= node->iova + node->size) 322 continue; 323 324 if (unlikely((perm & node->perm) != perm)) { 325 vva = 0; 326 break; 327 } 328 329 offset = iova - node->iova; 330 if (!vva) 331 vva = node->uaddr + offset; 332 333 mapped += node->size - offset; 334 iova = node->iova + node->size; 335 336 if (mapped >= *size) 337 break; 338 } 339 340 out: 341 /* Only part of the requested chunk is mapped */ 342 if (unlikely(mapped < *size)) 343 *size = mapped; 344 345 return vva; 346 } 347 348 void 349 vhost_user_iotlb_flush_all(struct virtio_net *dev, struct vhost_virtqueue *vq) 350 { 351 vhost_user_iotlb_cache_remove_all(dev, vq); 352 vhost_user_iotlb_pending_remove_all(vq); 353 } 354 355 int 356 vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) 357 { 358 unsigned int i; 359 int socket = 0; 360 361 if (vq->iotlb_pool) { 362 /* 363 * The cache has already been initialized, 364 * just drop all cached and pending entries. 365 */ 366 vhost_user_iotlb_flush_all(dev, vq); 367 rte_free(vq->iotlb_pool); 368 } 369 370 #ifdef RTE_LIBRTE_VHOST_NUMA 371 if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0) 372 socket = 0; 373 #endif 374 375 rte_spinlock_init(&vq->iotlb_free_lock); 376 rte_rwlock_init(&vq->iotlb_lock); 377 rte_rwlock_init(&vq->iotlb_pending_lock); 378 379 SLIST_INIT(&vq->iotlb_free_list); 380 TAILQ_INIT(&vq->iotlb_list); 381 TAILQ_INIT(&vq->iotlb_pending_list); 382 383 if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) { 384 vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE, 385 sizeof(struct vhost_iotlb_entry), 0, socket); 386 if (!vq->iotlb_pool) { 387 VHOST_LOG_CONFIG(dev->ifname, ERR, 388 "Failed to create IOTLB cache pool for vq %"PRIu32"\n", 389 vq->index); 390 return -1; 391 } 392 for (i = 0; i < IOTLB_CACHE_SIZE; i++) 393 vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]); 394 } 395 396 vq->iotlb_cache_nr = 0; 397 398 return 0; 399 } 400 401 void 402 vhost_user_iotlb_destroy(struct vhost_virtqueue *vq) 403 { 404 rte_free(vq->iotlb_pool); 405 } 406