1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2020 Mellanox Technologies, Ltd 4 */ 5 #include <stddef.h> 6 7 #include <rte_eal_memconfig.h> 8 #include <rte_eal_paging.h> 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_malloc.h> 12 #include <rte_rwlock.h> 13 14 #include "mlx5_glue.h" 15 #include "mlx5_common.h" 16 #include "mlx5_common_mp.h" 17 #include "mlx5_common_mr.h" 18 #include "mlx5_common_os.h" 19 #include "mlx5_common_log.h" 20 #include "mlx5_malloc.h" 21 22 struct mr_find_contig_memsegs_data { 23 uintptr_t addr; 24 uintptr_t start; 25 uintptr_t end; 26 const struct rte_memseg_list *msl; 27 }; 28 29 /* Virtual memory range. */ 30 struct mlx5_range { 31 uintptr_t start; 32 uintptr_t end; 33 }; 34 35 /** Memory region for a mempool. */ 36 struct mlx5_mempool_mr { 37 struct mlx5_pmd_mr pmd_mr; 38 uint32_t refcnt; /**< Number of mempools sharing this MR. */ 39 }; 40 41 /* Mempool registration. */ 42 struct mlx5_mempool_reg { 43 LIST_ENTRY(mlx5_mempool_reg) next; 44 /** Registered mempool, used to designate registrations. */ 45 struct rte_mempool *mp; 46 /** Memory regions for the address ranges of the mempool. */ 47 struct mlx5_mempool_mr *mrs; 48 /** Number of memory regions. */ 49 unsigned int mrs_n; 50 }; 51 52 void 53 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 54 { 55 struct mlx5_mprq_buf *buf = opaque; 56 57 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 58 rte_mempool_put(buf->mp, buf); 59 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 60 __ATOMIC_RELAXED) == 0)) { 61 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 62 rte_mempool_put(buf->mp, buf); 63 } 64 } 65 66 /** 67 * Expand B-tree table to a given size. Can't be called with holding 68 * memory_hotplug_lock or share_cache.rwlock due to rte_realloc(). 69 * 70 * @param bt 71 * Pointer to B-tree structure. 72 * @param n 73 * Number of entries for expansion. 74 * 75 * @return 76 * 0 on success, -1 on failure. 77 */ 78 static int 79 mr_btree_expand(struct mlx5_mr_btree *bt, int n) 80 { 81 void *mem; 82 int ret = 0; 83 84 if (n <= bt->size) 85 return ret; 86 /* 87 * Downside of directly using rte_realloc() is that SOCKET_ID_ANY is 88 * used inside if there's no room to expand. Because this is a quite 89 * rare case and a part of very slow path, it is very acceptable. 90 * Initially cache_bh[] will be given practically enough space and once 91 * it is expanded, expansion wouldn't be needed again ever. 92 */ 93 mem = mlx5_realloc(bt->table, MLX5_MEM_RTE | MLX5_MEM_ZERO, 94 n * sizeof(struct mr_cache_entry), 0, SOCKET_ID_ANY); 95 if (mem == NULL) { 96 /* Not an error, B-tree search will be skipped. */ 97 DRV_LOG(WARNING, "failed to expand MR B-tree (%p) table", 98 (void *)bt); 99 ret = -1; 100 } else { 101 DRV_LOG(DEBUG, "expanded MR B-tree table (size=%u)", n); 102 bt->table = mem; 103 bt->size = n; 104 } 105 return ret; 106 } 107 108 /** 109 * Look up LKey from given B-tree lookup table, store the last index and return 110 * searched LKey. 111 * 112 * @param bt 113 * Pointer to B-tree structure. 114 * @param[out] idx 115 * Pointer to index. Even on search failure, returns index where it stops 116 * searching so that index can be used when inserting a new entry. 117 * @param addr 118 * Search key. 119 * 120 * @return 121 * Searched LKey on success, UINT32_MAX on no match. 122 */ 123 static uint32_t 124 mr_btree_lookup(struct mlx5_mr_btree *bt, uint16_t *idx, uintptr_t addr) 125 { 126 struct mr_cache_entry *lkp_tbl; 127 uint16_t n; 128 uint16_t base = 0; 129 130 MLX5_ASSERT(bt != NULL); 131 lkp_tbl = *bt->table; 132 n = bt->len; 133 /* First entry must be NULL for comparison. */ 134 MLX5_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 && 135 lkp_tbl[0].lkey == UINT32_MAX)); 136 /* Binary search. */ 137 do { 138 register uint16_t delta = n >> 1; 139 140 if (addr < lkp_tbl[base + delta].start) { 141 n = delta; 142 } else { 143 base += delta; 144 n -= delta; 145 } 146 } while (n > 1); 147 MLX5_ASSERT(addr >= lkp_tbl[base].start); 148 *idx = base; 149 if (addr < lkp_tbl[base].end) 150 return lkp_tbl[base].lkey; 151 /* Not found. */ 152 return UINT32_MAX; 153 } 154 155 /** 156 * Insert an entry to B-tree lookup table. 157 * 158 * @param bt 159 * Pointer to B-tree structure. 160 * @param entry 161 * Pointer to new entry to insert. 162 * 163 * @return 164 * 0 on success, -1 on failure. 165 */ 166 static int 167 mr_btree_insert(struct mlx5_mr_btree *bt, struct mr_cache_entry *entry) 168 { 169 struct mr_cache_entry *lkp_tbl; 170 uint16_t idx = 0; 171 size_t shift; 172 173 MLX5_ASSERT(bt != NULL); 174 MLX5_ASSERT(bt->len <= bt->size); 175 MLX5_ASSERT(bt->len > 0); 176 lkp_tbl = *bt->table; 177 /* Find out the slot for insertion. */ 178 if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) { 179 DRV_LOG(DEBUG, 180 "abort insertion to B-tree(%p): already exist at" 181 " idx=%u [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 182 (void *)bt, idx, entry->start, entry->end, entry->lkey); 183 /* Already exist, return. */ 184 return 0; 185 } 186 /* If table is full, return error. */ 187 if (unlikely(bt->len == bt->size)) { 188 bt->overflow = 1; 189 return -1; 190 } 191 /* Insert entry. */ 192 ++idx; 193 shift = (bt->len - idx) * sizeof(struct mr_cache_entry); 194 if (shift) 195 memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift); 196 lkp_tbl[idx] = *entry; 197 bt->len++; 198 DRV_LOG(DEBUG, 199 "inserted B-tree(%p)[%u]," 200 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 201 (void *)bt, idx, entry->start, entry->end, entry->lkey); 202 return 0; 203 } 204 205 /** 206 * Initialize B-tree and allocate memory for lookup table. 207 * 208 * @param bt 209 * Pointer to B-tree structure. 210 * @param n 211 * Number of entries to allocate. 212 * @param socket 213 * NUMA socket on which memory must be allocated. 214 * 215 * @return 216 * 0 on success, a negative errno value otherwise and rte_errno is set. 217 */ 218 static int 219 mlx5_mr_btree_init(struct mlx5_mr_btree *bt, int n, int socket) 220 { 221 if (bt == NULL) { 222 rte_errno = EINVAL; 223 return -rte_errno; 224 } 225 MLX5_ASSERT(!bt->table && !bt->size); 226 memset(bt, 0, sizeof(*bt)); 227 bt->table = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 228 sizeof(struct mr_cache_entry) * n, 229 0, socket); 230 if (bt->table == NULL) { 231 rte_errno = ENOMEM; 232 DRV_LOG(DEBUG, 233 "failed to allocate memory for btree cache on socket " 234 "%d", socket); 235 return -rte_errno; 236 } 237 bt->size = n; 238 /* First entry must be NULL for binary search. */ 239 (*bt->table)[bt->len++] = (struct mr_cache_entry) { 240 .lkey = UINT32_MAX, 241 }; 242 DRV_LOG(DEBUG, "initialized B-tree %p with table %p", 243 (void *)bt, (void *)bt->table); 244 return 0; 245 } 246 247 /** 248 * Free B-tree resources. 249 * 250 * @param bt 251 * Pointer to B-tree structure. 252 */ 253 void 254 mlx5_mr_btree_free(struct mlx5_mr_btree *bt) 255 { 256 if (bt == NULL) 257 return; 258 DRV_LOG(DEBUG, "freeing B-tree %p with table %p", 259 (void *)bt, (void *)bt->table); 260 mlx5_free(bt->table); 261 memset(bt, 0, sizeof(*bt)); 262 } 263 264 /** 265 * Dump all the entries in a B-tree 266 * 267 * @param bt 268 * Pointer to B-tree structure. 269 */ 270 void 271 mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused) 272 { 273 #ifdef RTE_LIBRTE_MLX5_DEBUG 274 int idx; 275 struct mr_cache_entry *lkp_tbl; 276 277 if (bt == NULL) 278 return; 279 lkp_tbl = *bt->table; 280 for (idx = 0; idx < bt->len; ++idx) { 281 struct mr_cache_entry *entry = &lkp_tbl[idx]; 282 283 DRV_LOG(DEBUG, "B-tree(%p)[%u]," 284 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 285 (void *)bt, idx, entry->start, entry->end, entry->lkey); 286 } 287 #endif 288 } 289 290 /** 291 * Initialize per-queue MR control descriptor. 292 * 293 * @param mr_ctrl 294 * Pointer to MR control structure. 295 * @param dev_gen_ptr 296 * Pointer to generation number of global cache. 297 * @param socket 298 * NUMA socket on which memory must be allocated. 299 * 300 * @return 301 * 0 on success, a negative errno value otherwise and rte_errno is set. 302 */ 303 int 304 mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr, 305 int socket) 306 { 307 if (mr_ctrl == NULL) { 308 rte_errno = EINVAL; 309 return -rte_errno; 310 } 311 /* Save pointer of global generation number to check memory event. */ 312 mr_ctrl->dev_gen_ptr = dev_gen_ptr; 313 /* Initialize B-tree and allocate memory for bottom-half cache table. */ 314 return mlx5_mr_btree_init(&mr_ctrl->cache_bh, MLX5_MR_BTREE_CACHE_N, 315 socket); 316 } 317 318 /** 319 * Find virtually contiguous memory chunk in a given MR. 320 * 321 * @param dev 322 * Pointer to MR structure. 323 * @param[out] entry 324 * Pointer to returning MR cache entry. If not found, this will not be 325 * updated. 326 * @param start_idx 327 * Start index of the memseg bitmap. 328 * 329 * @return 330 * Next index to go on lookup. 331 */ 332 static int 333 mr_find_next_chunk(struct mlx5_mr *mr, struct mr_cache_entry *entry, 334 int base_idx) 335 { 336 uintptr_t start = 0; 337 uintptr_t end = 0; 338 uint32_t idx = 0; 339 340 /* MR for external memory doesn't have memseg list. */ 341 if (mr->msl == NULL) { 342 MLX5_ASSERT(mr->ms_bmp_n == 1); 343 MLX5_ASSERT(mr->ms_n == 1); 344 MLX5_ASSERT(base_idx == 0); 345 /* 346 * Can't search it from memseg list but get it directly from 347 * pmd_mr as there's only one chunk. 348 */ 349 entry->start = (uintptr_t)mr->pmd_mr.addr; 350 entry->end = (uintptr_t)mr->pmd_mr.addr + mr->pmd_mr.len; 351 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 352 /* Returning 1 ends iteration. */ 353 return 1; 354 } 355 for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) { 356 if (rte_bitmap_get(mr->ms_bmp, idx)) { 357 const struct rte_memseg_list *msl; 358 const struct rte_memseg *ms; 359 360 msl = mr->msl; 361 ms = rte_fbarray_get(&msl->memseg_arr, 362 mr->ms_base_idx + idx); 363 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 364 if (!start) 365 start = ms->addr_64; 366 end = ms->addr_64 + ms->hugepage_sz; 367 } else if (start) { 368 /* Passed the end of a fragment. */ 369 break; 370 } 371 } 372 if (start) { 373 /* Found one chunk. */ 374 entry->start = start; 375 entry->end = end; 376 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 377 } 378 return idx; 379 } 380 381 /** 382 * Insert a MR to the global B-tree cache. It may fail due to low-on-memory. 383 * Then, this entry will have to be searched by mr_lookup_list() in 384 * mlx5_mr_create() on miss. 385 * 386 * @param share_cache 387 * Pointer to a global shared MR cache. 388 * @param mr 389 * Pointer to MR to insert. 390 * 391 * @return 392 * 0 on success, -1 on failure. 393 */ 394 int 395 mlx5_mr_insert_cache(struct mlx5_mr_share_cache *share_cache, 396 struct mlx5_mr *mr) 397 { 398 unsigned int n; 399 400 DRV_LOG(DEBUG, "Inserting MR(%p) to global cache(%p)", 401 (void *)mr, (void *)share_cache); 402 for (n = 0; n < mr->ms_bmp_n; ) { 403 struct mr_cache_entry entry; 404 405 memset(&entry, 0, sizeof(entry)); 406 /* Find a contiguous chunk and advance the index. */ 407 n = mr_find_next_chunk(mr, &entry, n); 408 if (!entry.end) 409 break; 410 if (mr_btree_insert(&share_cache->cache, &entry) < 0) { 411 /* 412 * Overflowed, but the global table cannot be expanded 413 * because of deadlock. 414 */ 415 return -1; 416 } 417 } 418 return 0; 419 } 420 421 /** 422 * Look up address in the original global MR list. 423 * 424 * @param share_cache 425 * Pointer to a global shared MR cache. 426 * @param[out] entry 427 * Pointer to returning MR cache entry. If no match, this will not be updated. 428 * @param addr 429 * Search key. 430 * 431 * @return 432 * Found MR on match, NULL otherwise. 433 */ 434 struct mlx5_mr * 435 mlx5_mr_lookup_list(struct mlx5_mr_share_cache *share_cache, 436 struct mr_cache_entry *entry, uintptr_t addr) 437 { 438 struct mlx5_mr *mr; 439 440 /* Iterate all the existing MRs. */ 441 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 442 unsigned int n; 443 444 if (mr->ms_n == 0) 445 continue; 446 for (n = 0; n < mr->ms_bmp_n; ) { 447 struct mr_cache_entry ret; 448 449 memset(&ret, 0, sizeof(ret)); 450 n = mr_find_next_chunk(mr, &ret, n); 451 if (addr >= ret.start && addr < ret.end) { 452 /* Found. */ 453 *entry = ret; 454 return mr; 455 } 456 } 457 } 458 return NULL; 459 } 460 461 /** 462 * Look up address on global MR cache. 463 * 464 * @param share_cache 465 * Pointer to a global shared MR cache. 466 * @param[out] entry 467 * Pointer to returning MR cache entry. If no match, this will not be updated. 468 * @param addr 469 * Search key. 470 * 471 * @return 472 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 473 */ 474 static uint32_t 475 mlx5_mr_lookup_cache(struct mlx5_mr_share_cache *share_cache, 476 struct mr_cache_entry *entry, uintptr_t addr) 477 { 478 uint16_t idx; 479 uint32_t lkey = UINT32_MAX; 480 struct mlx5_mr *mr; 481 482 /* 483 * If the global cache has overflowed since it failed to expand the 484 * B-tree table, it can't have all the existing MRs. Then, the address 485 * has to be searched by traversing the original MR list instead, which 486 * is very slow path. Otherwise, the global cache is all inclusive. 487 */ 488 if (!unlikely(share_cache->cache.overflow)) { 489 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 490 if (lkey != UINT32_MAX) 491 *entry = (*share_cache->cache.table)[idx]; 492 } else { 493 /* Falling back to the slowest path. */ 494 mr = mlx5_mr_lookup_list(share_cache, entry, addr); 495 if (mr != NULL) 496 lkey = entry->lkey; 497 } 498 MLX5_ASSERT(lkey == UINT32_MAX || (addr >= entry->start && 499 addr < entry->end)); 500 return lkey; 501 } 502 503 /** 504 * Free MR resources. MR lock must not be held to avoid a deadlock. rte_free() 505 * can raise memory free event and the callback function will spin on the lock. 506 * 507 * @param mr 508 * Pointer to MR to free. 509 */ 510 void 511 mlx5_mr_free(struct mlx5_mr *mr, mlx5_dereg_mr_t dereg_mr_cb) 512 { 513 if (mr == NULL) 514 return; 515 DRV_LOG(DEBUG, "freeing MR(%p):", (void *)mr); 516 dereg_mr_cb(&mr->pmd_mr); 517 if (mr->ms_bmp != NULL) 518 rte_bitmap_free(mr->ms_bmp); 519 mlx5_free(mr); 520 } 521 522 void 523 mlx5_mr_rebuild_cache(struct mlx5_mr_share_cache *share_cache) 524 { 525 struct mlx5_mr *mr; 526 527 DRV_LOG(DEBUG, "Rebuild dev cache[] %p", (void *)share_cache); 528 /* Flush cache to rebuild. */ 529 share_cache->cache.len = 1; 530 share_cache->cache.overflow = 0; 531 /* Iterate all the existing MRs. */ 532 LIST_FOREACH(mr, &share_cache->mr_list, mr) 533 if (mlx5_mr_insert_cache(share_cache, mr) < 0) 534 return; 535 } 536 537 /** 538 * Release resources of detached MR having no online entry. 539 * 540 * @param share_cache 541 * Pointer to a global shared MR cache. 542 */ 543 static void 544 mlx5_mr_garbage_collect(struct mlx5_mr_share_cache *share_cache) 545 { 546 struct mlx5_mr *mr_next; 547 struct mlx5_mr_list free_list = LIST_HEAD_INITIALIZER(free_list); 548 549 /* Must be called from the primary process. */ 550 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 551 /* 552 * MR can't be freed with holding the lock because rte_free() could call 553 * memory free callback function. This will be a deadlock situation. 554 */ 555 rte_rwlock_write_lock(&share_cache->rwlock); 556 /* Detach the whole free list and release it after unlocking. */ 557 free_list = share_cache->mr_free_list; 558 LIST_INIT(&share_cache->mr_free_list); 559 rte_rwlock_write_unlock(&share_cache->rwlock); 560 /* Release resources. */ 561 mr_next = LIST_FIRST(&free_list); 562 while (mr_next != NULL) { 563 struct mlx5_mr *mr = mr_next; 564 565 mr_next = LIST_NEXT(mr, mr); 566 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 567 } 568 } 569 570 /* Called during rte_memseg_contig_walk() by mlx5_mr_create(). */ 571 static int 572 mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, 573 const struct rte_memseg *ms, size_t len, void *arg) 574 { 575 struct mr_find_contig_memsegs_data *data = arg; 576 577 if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len) 578 return 0; 579 /* Found, save it and stop walking. */ 580 data->start = ms->addr_64; 581 data->end = ms->addr_64 + len; 582 data->msl = msl; 583 return 1; 584 } 585 586 /** 587 * Create a new global Memory Region (MR) for a missing virtual address. 588 * This API should be called on a secondary process, then a request is sent to 589 * the primary process in order to create a MR for the address. As the global MR 590 * list is on the shared memory, following LKey lookup should succeed unless the 591 * request fails. 592 * 593 * @param cdev 594 * Pointer to the mlx5 common device. 595 * @param share_cache 596 * Pointer to a global shared MR cache. 597 * @param[out] entry 598 * Pointer to returning MR cache entry, found in the global cache or newly 599 * created. If failed to create one, this will not be updated. 600 * @param addr 601 * Target virtual address to register. 602 * 603 * @return 604 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 605 */ 606 static uint32_t 607 mlx5_mr_create_secondary(struct mlx5_common_device *cdev, 608 struct mlx5_mr_share_cache *share_cache, 609 struct mr_cache_entry *entry, uintptr_t addr) 610 { 611 int ret; 612 613 DRV_LOG(DEBUG, "Requesting MR creation for address (%p)", (void *)addr); 614 ret = mlx5_mp_req_mr_create(cdev, addr); 615 if (ret) { 616 DRV_LOG(DEBUG, "Fail to request MR creation for address (%p)", 617 (void *)addr); 618 return UINT32_MAX; 619 } 620 rte_rwlock_read_lock(&share_cache->rwlock); 621 /* Fill in output data. */ 622 mlx5_mr_lookup_cache(share_cache, entry, addr); 623 /* Lookup can't fail. */ 624 MLX5_ASSERT(entry->lkey != UINT32_MAX); 625 rte_rwlock_read_unlock(&share_cache->rwlock); 626 DRV_LOG(DEBUG, "MR CREATED by primary process for %p:\n" 627 " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", 628 (void *)addr, entry->start, entry->end, entry->lkey); 629 return entry->lkey; 630 } 631 632 /** 633 * Create a new global Memory Region (MR) for a missing virtual address. 634 * Register entire virtually contiguous memory chunk around the address. 635 * 636 * @param pd 637 * Pointer to pd of a device (net, regex, vdpa,...). 638 * @param share_cache 639 * Pointer to a global shared MR cache. 640 * @param[out] entry 641 * Pointer to returning MR cache entry, found in the global cache or newly 642 * created. If failed to create one, this will not be updated. 643 * @param addr 644 * Target virtual address to register. 645 * @param mr_ext_memseg_en 646 * Configurable flag about external memory segment enable or not. 647 * 648 * @return 649 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 650 */ 651 static uint32_t 652 mlx5_mr_create_primary(void *pd, 653 struct mlx5_mr_share_cache *share_cache, 654 struct mr_cache_entry *entry, uintptr_t addr, 655 unsigned int mr_ext_memseg_en) 656 { 657 struct mr_find_contig_memsegs_data data = {.addr = addr, }; 658 struct mr_find_contig_memsegs_data data_re; 659 const struct rte_memseg_list *msl; 660 const struct rte_memseg *ms; 661 struct mlx5_mr *mr = NULL; 662 int ms_idx_shift = -1; 663 uint32_t bmp_size; 664 void *bmp_mem; 665 uint32_t ms_n; 666 uint32_t n; 667 size_t len; 668 669 DRV_LOG(DEBUG, "Creating a MR using address (%p)", (void *)addr); 670 /* 671 * Release detached MRs if any. This can't be called with holding either 672 * memory_hotplug_lock or share_cache->rwlock. MRs on the free list have 673 * been detached by the memory free event but it couldn't be released 674 * inside the callback due to deadlock. As a result, releasing resources 675 * is quite opportunistic. 676 */ 677 mlx5_mr_garbage_collect(share_cache); 678 /* 679 * If enabled, find out a contiguous virtual address chunk in use, to 680 * which the given address belongs, in order to register maximum range. 681 * In the best case where mempools are not dynamically recreated and 682 * '--socket-mem' is specified as an EAL option, it is very likely to 683 * have only one MR(LKey) per a socket and per a hugepage-size even 684 * though the system memory is highly fragmented. As the whole memory 685 * chunk will be pinned by kernel, it can't be reused unless entire 686 * chunk is freed from EAL. 687 * 688 * If disabled, just register one memseg (page). Then, memory 689 * consumption will be minimized but it may drop performance if there 690 * are many MRs to lookup on the datapath. 691 */ 692 if (!mr_ext_memseg_en) { 693 data.msl = rte_mem_virt2memseg_list((void *)addr); 694 data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); 695 data.end = data.start + data.msl->page_sz; 696 } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { 697 DRV_LOG(WARNING, 698 "Unable to find virtually contiguous" 699 " chunk for address (%p)." 700 " rte_memseg_contig_walk() failed.", (void *)addr); 701 rte_errno = ENXIO; 702 goto err_nolock; 703 } 704 alloc_resources: 705 /* Addresses must be page-aligned. */ 706 MLX5_ASSERT(data.msl); 707 MLX5_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz)); 708 MLX5_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz)); 709 msl = data.msl; 710 ms = rte_mem_virt2memseg((void *)data.start, msl); 711 len = data.end - data.start; 712 MLX5_ASSERT(ms); 713 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 714 /* Number of memsegs in the range. */ 715 ms_n = len / msl->page_sz; 716 DRV_LOG(DEBUG, "Extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 717 " page_sz=0x%" PRIx64 ", ms_n=%u", 718 (void *)addr, data.start, data.end, msl->page_sz, ms_n); 719 /* Size of memory for bitmap. */ 720 bmp_size = rte_bitmap_get_memory_footprint(ms_n); 721 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 722 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE) + 723 bmp_size, RTE_CACHE_LINE_SIZE, msl->socket_id); 724 if (mr == NULL) { 725 DRV_LOG(DEBUG, "Unable to allocate memory for a new MR of" 726 " address (%p).", (void *)addr); 727 rte_errno = ENOMEM; 728 goto err_nolock; 729 } 730 mr->msl = msl; 731 /* 732 * Save the index of the first memseg and initialize memseg bitmap. To 733 * see if a memseg of ms_idx in the memseg-list is still valid, check: 734 * rte_bitmap_get(mr->bmp, ms_idx - mr->ms_base_idx) 735 */ 736 mr->ms_base_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 737 bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE); 738 mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size); 739 if (mr->ms_bmp == NULL) { 740 DRV_LOG(DEBUG, "Unable to initialize bitmap for a new MR of" 741 " address (%p).", (void *)addr); 742 rte_errno = EINVAL; 743 goto err_nolock; 744 } 745 /* 746 * Should recheck whether the extended contiguous chunk is still valid. 747 * Because memory_hotplug_lock can't be held if there's any memory 748 * related calls in a critical path, resource allocation above can't be 749 * locked. If the memory has been changed at this point, try again with 750 * just single page. If not, go on with the big chunk atomically from 751 * here. 752 */ 753 rte_mcfg_mem_read_lock(); 754 data_re = data; 755 if (len > msl->page_sz && 756 !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) { 757 DRV_LOG(DEBUG, 758 "Unable to find virtually contiguous chunk for address " 759 "(%p). rte_memseg_contig_walk() failed.", (void *)addr); 760 rte_errno = ENXIO; 761 goto err_memlock; 762 } 763 if (data.start != data_re.start || data.end != data_re.end) { 764 /* 765 * The extended contiguous chunk has been changed. Try again 766 * with single memseg instead. 767 */ 768 data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz); 769 data.end = data.start + msl->page_sz; 770 rte_mcfg_mem_read_unlock(); 771 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 772 goto alloc_resources; 773 } 774 MLX5_ASSERT(data.msl == data_re.msl); 775 rte_rwlock_write_lock(&share_cache->rwlock); 776 /* 777 * Check the address is really missing. If other thread already created 778 * one or it is not found due to overflow, abort and return. 779 */ 780 if (mlx5_mr_lookup_cache(share_cache, entry, addr) != UINT32_MAX) { 781 /* 782 * Insert to the global cache table. It may fail due to 783 * low-on-memory. Then, this entry will have to be searched 784 * here again. 785 */ 786 mr_btree_insert(&share_cache->cache, entry); 787 DRV_LOG(DEBUG, "Found MR for %p on final lookup, abort", 788 (void *)addr); 789 rte_rwlock_write_unlock(&share_cache->rwlock); 790 rte_mcfg_mem_read_unlock(); 791 /* 792 * Must be unlocked before calling rte_free() because 793 * mlx5_mr_mem_event_free_cb() can be called inside. 794 */ 795 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 796 return entry->lkey; 797 } 798 /* 799 * Trim start and end addresses for verbs MR. Set bits for registering 800 * memsegs but exclude already registered ones. Bitmap can be 801 * fragmented. 802 */ 803 for (n = 0; n < ms_n; ++n) { 804 uintptr_t start; 805 struct mr_cache_entry ret; 806 807 memset(&ret, 0, sizeof(ret)); 808 start = data_re.start + n * msl->page_sz; 809 /* Exclude memsegs already registered by other MRs. */ 810 if (mlx5_mr_lookup_cache(share_cache, &ret, start) == 811 UINT32_MAX) { 812 /* 813 * Start from the first unregistered memseg in the 814 * extended range. 815 */ 816 if (ms_idx_shift == -1) { 817 mr->ms_base_idx += n; 818 data.start = start; 819 ms_idx_shift = n; 820 } 821 data.end = start + msl->page_sz; 822 rte_bitmap_set(mr->ms_bmp, n - ms_idx_shift); 823 ++mr->ms_n; 824 } 825 } 826 len = data.end - data.start; 827 mr->ms_bmp_n = len / msl->page_sz; 828 MLX5_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n); 829 /* 830 * Finally create an MR for the memory chunk. Verbs: ibv_reg_mr() can 831 * be called with holding the memory lock because it doesn't use 832 * mlx5_alloc_buf_extern() which eventually calls rte_malloc_socket() 833 * through mlx5_alloc_verbs_buf(). 834 */ 835 share_cache->reg_mr_cb(pd, (void *)data.start, len, &mr->pmd_mr); 836 if (mr->pmd_mr.obj == NULL) { 837 DRV_LOG(DEBUG, "Fail to create an MR for address (%p)", 838 (void *)addr); 839 rte_errno = EINVAL; 840 goto err_mrlock; 841 } 842 MLX5_ASSERT((uintptr_t)mr->pmd_mr.addr == data.start); 843 MLX5_ASSERT(mr->pmd_mr.len); 844 LIST_INSERT_HEAD(&share_cache->mr_list, mr, mr); 845 DRV_LOG(DEBUG, "MR CREATED (%p) for %p:\n" 846 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 847 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 848 (void *)mr, (void *)addr, data.start, data.end, 849 rte_cpu_to_be_32(mr->pmd_mr.lkey), 850 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 851 /* Insert to the global cache table. */ 852 mlx5_mr_insert_cache(share_cache, mr); 853 /* Fill in output data. */ 854 mlx5_mr_lookup_cache(share_cache, entry, addr); 855 /* Lookup can't fail. */ 856 MLX5_ASSERT(entry->lkey != UINT32_MAX); 857 rte_rwlock_write_unlock(&share_cache->rwlock); 858 rte_mcfg_mem_read_unlock(); 859 return entry->lkey; 860 err_mrlock: 861 rte_rwlock_write_unlock(&share_cache->rwlock); 862 err_memlock: 863 rte_mcfg_mem_read_unlock(); 864 err_nolock: 865 /* 866 * In case of error, as this can be called in a datapath, a warning 867 * message per an error is preferable instead. Must be unlocked before 868 * calling rte_free() because mlx5_mr_mem_event_free_cb() can be called 869 * inside. 870 */ 871 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 872 return UINT32_MAX; 873 } 874 875 /** 876 * Create a new global Memory Region (MR) for a missing virtual address. 877 * This can be called from primary and secondary process. 878 * 879 * @param cdev 880 * Pointer to the mlx5 common device. 881 * @param share_cache 882 * Pointer to a global shared MR cache. 883 * @param[out] entry 884 * Pointer to returning MR cache entry, found in the global cache or newly 885 * created. If failed to create one, this will not be updated. 886 * @param addr 887 * Target virtual address to register. 888 * 889 * @return 890 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 891 */ 892 uint32_t 893 mlx5_mr_create(struct mlx5_common_device *cdev, 894 struct mlx5_mr_share_cache *share_cache, 895 struct mr_cache_entry *entry, uintptr_t addr) 896 { 897 uint32_t ret = 0; 898 899 switch (rte_eal_process_type()) { 900 case RTE_PROC_PRIMARY: 901 ret = mlx5_mr_create_primary(cdev->pd, share_cache, entry, addr, 902 cdev->config.mr_ext_memseg_en); 903 break; 904 case RTE_PROC_SECONDARY: 905 ret = mlx5_mr_create_secondary(cdev, share_cache, entry, addr); 906 break; 907 default: 908 break; 909 } 910 return ret; 911 } 912 913 /** 914 * Look up address in the global MR cache table. If not found, create a new MR. 915 * Insert the found/created entry to local bottom-half cache table. 916 * 917 * @param mr_ctrl 918 * Pointer to per-queue MR control structure. 919 * @param[out] entry 920 * Pointer to returning MR cache entry, found in the global cache or newly 921 * created. If failed to create one, this is not written. 922 * @param addr 923 * Search key. 924 * 925 * @return 926 * Searched LKey on success, UINT32_MAX on no match. 927 */ 928 static uint32_t 929 mr_lookup_caches(struct mlx5_mr_ctrl *mr_ctrl, 930 struct mr_cache_entry *entry, uintptr_t addr) 931 { 932 struct mlx5_mr_share_cache *share_cache = 933 container_of(mr_ctrl->dev_gen_ptr, struct mlx5_mr_share_cache, 934 dev_gen); 935 struct mlx5_common_device *cdev = 936 container_of(share_cache, struct mlx5_common_device, mr_scache); 937 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 938 uint32_t lkey; 939 uint16_t idx; 940 941 /* If local cache table is full, try to double it. */ 942 if (unlikely(bt->len == bt->size)) 943 mr_btree_expand(bt, bt->size << 1); 944 /* Look up in the global cache. */ 945 rte_rwlock_read_lock(&share_cache->rwlock); 946 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 947 if (lkey != UINT32_MAX) { 948 /* Found. */ 949 *entry = (*share_cache->cache.table)[idx]; 950 rte_rwlock_read_unlock(&share_cache->rwlock); 951 /* 952 * Update local cache. Even if it fails, return the found entry 953 * to update top-half cache. Next time, this entry will be found 954 * in the global cache. 955 */ 956 mr_btree_insert(bt, entry); 957 return lkey; 958 } 959 rte_rwlock_read_unlock(&share_cache->rwlock); 960 /* First time to see the address? Create a new MR. */ 961 lkey = mlx5_mr_create(cdev, share_cache, entry, addr); 962 /* 963 * Update the local cache if successfully created a new global MR. Even 964 * if failed to create one, there's no action to take in this datapath 965 * code. As returning LKey is invalid, this will eventually make HW 966 * fail. 967 */ 968 if (lkey != UINT32_MAX) 969 mr_btree_insert(bt, entry); 970 return lkey; 971 } 972 973 /** 974 * Bottom-half of LKey search on datapath. First search in cache_bh[] and if 975 * misses, search in the global MR cache table and update the new entry to 976 * per-queue local caches. 977 * 978 * @param mr_ctrl 979 * Pointer to per-queue MR control structure. 980 * @param addr 981 * Search key. 982 * 983 * @return 984 * Searched LKey on success, UINT32_MAX on no match. 985 */ 986 static uint32_t 987 mlx5_mr_addr2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, uintptr_t addr) 988 { 989 uint32_t lkey; 990 uint16_t bh_idx = 0; 991 /* Victim in top-half cache to replace with new entry. */ 992 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 993 994 /* Binary-search MR translation table. */ 995 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 996 /* Update top-half cache. */ 997 if (likely(lkey != UINT32_MAX)) { 998 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 999 } else { 1000 /* 1001 * If missed in local lookup table, search in the global cache 1002 * and local cache_bh[] will be updated inside if possible. 1003 * Top-half cache entry will also be updated. 1004 */ 1005 lkey = mr_lookup_caches(mr_ctrl, repl, addr); 1006 if (unlikely(lkey == UINT32_MAX)) 1007 return UINT32_MAX; 1008 } 1009 /* Update the most recently used entry. */ 1010 mr_ctrl->mru = mr_ctrl->head; 1011 /* Point to the next victim, the oldest. */ 1012 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1013 return lkey; 1014 } 1015 1016 /** 1017 * Release all the created MRs and resources on global MR cache of a device 1018 * list. 1019 * 1020 * @param share_cache 1021 * Pointer to a global shared MR cache. 1022 */ 1023 void 1024 mlx5_mr_release_cache(struct mlx5_mr_share_cache *share_cache) 1025 { 1026 struct mlx5_mr *mr_next; 1027 1028 rte_rwlock_write_lock(&share_cache->rwlock); 1029 /* Detach from MR list and move to free list. */ 1030 mr_next = LIST_FIRST(&share_cache->mr_list); 1031 while (mr_next != NULL) { 1032 struct mlx5_mr *mr = mr_next; 1033 1034 mr_next = LIST_NEXT(mr, mr); 1035 LIST_REMOVE(mr, mr); 1036 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1037 } 1038 LIST_INIT(&share_cache->mr_list); 1039 /* Free global cache. */ 1040 mlx5_mr_btree_free(&share_cache->cache); 1041 rte_rwlock_write_unlock(&share_cache->rwlock); 1042 /* Free all remaining MRs. */ 1043 mlx5_mr_garbage_collect(share_cache); 1044 } 1045 1046 /** 1047 * Initialize global MR cache of a device. 1048 * 1049 * @param share_cache 1050 * Pointer to a global shared MR cache. 1051 * @param socket 1052 * NUMA socket on which memory must be allocated. 1053 * 1054 * @return 1055 * 0 on success, a negative errno value otherwise and rte_errno is set. 1056 */ 1057 int 1058 mlx5_mr_create_cache(struct mlx5_mr_share_cache *share_cache, int socket) 1059 { 1060 /* Set the reg_mr and dereg_mr callback functions */ 1061 mlx5_os_set_reg_mr_cb(&share_cache->reg_mr_cb, 1062 &share_cache->dereg_mr_cb); 1063 rte_rwlock_init(&share_cache->rwlock); 1064 rte_rwlock_init(&share_cache->mprwlock); 1065 share_cache->mp_cb_registered = 0; 1066 /* Initialize B-tree and allocate memory for global MR cache table. */ 1067 return mlx5_mr_btree_init(&share_cache->cache, 1068 MLX5_MR_BTREE_CACHE_N * 2, socket); 1069 } 1070 1071 /** 1072 * Flush all of the local cache entries. 1073 * 1074 * @param mr_ctrl 1075 * Pointer to per-queue MR local cache. 1076 */ 1077 void 1078 mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl) 1079 { 1080 /* Reset the most-recently-used index. */ 1081 mr_ctrl->mru = 0; 1082 /* Reset the linear search array. */ 1083 mr_ctrl->head = 0; 1084 memset(mr_ctrl->cache, 0, sizeof(mr_ctrl->cache)); 1085 /* Reset the B-tree table. */ 1086 mr_ctrl->cache_bh.len = 1; 1087 mr_ctrl->cache_bh.overflow = 0; 1088 /* Update the generation number. */ 1089 mr_ctrl->cur_gen = *mr_ctrl->dev_gen_ptr; 1090 DRV_LOG(DEBUG, "mr_ctrl(%p): flushed, cur_gen=%d", 1091 (void *)mr_ctrl, mr_ctrl->cur_gen); 1092 } 1093 1094 /** 1095 * Creates a memory region for external memory, that is memory which is not 1096 * part of the DPDK memory segments. 1097 * 1098 * @param pd 1099 * Pointer to pd of a device (net, regex, vdpa,...). 1100 * @param addr 1101 * Starting virtual address of memory. 1102 * @param len 1103 * Length of memory segment being mapped. 1104 * @param socked_id 1105 * Socket to allocate heap memory for the control structures. 1106 * 1107 * @return 1108 * Pointer to MR structure on success, NULL otherwise. 1109 */ 1110 struct mlx5_mr * 1111 mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, 1112 mlx5_reg_mr_t reg_mr_cb) 1113 { 1114 struct mlx5_mr *mr = NULL; 1115 1116 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1117 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE), 1118 RTE_CACHE_LINE_SIZE, socket_id); 1119 if (mr == NULL) 1120 return NULL; 1121 reg_mr_cb(pd, (void *)addr, len, &mr->pmd_mr); 1122 if (mr->pmd_mr.obj == NULL) { 1123 DRV_LOG(WARNING, 1124 "Fail to create MR for address (%p)", 1125 (void *)addr); 1126 mlx5_free(mr); 1127 return NULL; 1128 } 1129 mr->msl = NULL; /* Mark it is external memory. */ 1130 mr->ms_bmp = NULL; 1131 mr->ms_n = 1; 1132 mr->ms_bmp_n = 1; 1133 DRV_LOG(DEBUG, 1134 "MR CREATED (%p) for external memory %p:\n" 1135 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 1136 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 1137 (void *)mr, (void *)addr, 1138 addr, addr + len, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1139 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 1140 return mr; 1141 } 1142 1143 /** 1144 * Callback for memory free event. Iterate freed memsegs and check whether it 1145 * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a 1146 * result, the MR would be fragmented. If it becomes empty, the MR will be freed 1147 * later by mlx5_mr_garbage_collect(). Even if this callback is called from a 1148 * secondary process, the garbage collector will be called in primary process 1149 * as the secondary process can't call mlx5_mr_create(). 1150 * 1151 * The global cache must be rebuilt if there's any change and this event has to 1152 * be propagated to dataplane threads to flush the local caches. 1153 * 1154 * @param share_cache 1155 * Pointer to a global shared MR cache. 1156 * @param ibdev_name 1157 * Name of ibv device. 1158 * @param addr 1159 * Address of freed memory. 1160 * @param len 1161 * Size of freed memory. 1162 */ 1163 void 1164 mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, 1165 const char *ibdev_name, const void *addr, size_t len) 1166 { 1167 const struct rte_memseg_list *msl; 1168 struct mlx5_mr *mr; 1169 int ms_n; 1170 int i; 1171 int rebuild = 0; 1172 1173 DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu", 1174 ibdev_name, addr, len); 1175 msl = rte_mem_virt2memseg_list(addr); 1176 /* addr and len must be page-aligned. */ 1177 MLX5_ASSERT((uintptr_t)addr == 1178 RTE_ALIGN((uintptr_t)addr, msl->page_sz)); 1179 MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); 1180 ms_n = len / msl->page_sz; 1181 rte_rwlock_write_lock(&share_cache->rwlock); 1182 /* Clear bits of freed memsegs from MR. */ 1183 for (i = 0; i < ms_n; ++i) { 1184 const struct rte_memseg *ms; 1185 struct mr_cache_entry entry; 1186 uintptr_t start; 1187 int ms_idx; 1188 uint32_t pos; 1189 1190 /* Find MR having this memseg. */ 1191 start = (uintptr_t)addr + i * msl->page_sz; 1192 mr = mlx5_mr_lookup_list(share_cache, &entry, start); 1193 if (mr == NULL) 1194 continue; 1195 MLX5_ASSERT(mr->msl); /* Can't be external memory. */ 1196 ms = rte_mem_virt2memseg((void *)start, msl); 1197 MLX5_ASSERT(ms != NULL); 1198 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 1199 ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 1200 pos = ms_idx - mr->ms_base_idx; 1201 MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); 1202 MLX5_ASSERT(pos < mr->ms_bmp_n); 1203 DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p", 1204 ibdev_name, (void *)mr, pos, (void *)start); 1205 rte_bitmap_clear(mr->ms_bmp, pos); 1206 if (--mr->ms_n == 0) { 1207 LIST_REMOVE(mr, mr); 1208 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1209 DRV_LOG(DEBUG, "device %s remove MR(%p) from list", 1210 ibdev_name, (void *)mr); 1211 } 1212 /* 1213 * MR is fragmented or will be freed. the global cache must be 1214 * rebuilt. 1215 */ 1216 rebuild = 1; 1217 } 1218 if (rebuild) { 1219 mlx5_mr_rebuild_cache(share_cache); 1220 /* 1221 * No explicit wmb is needed after updating dev_gen due to 1222 * store-release ordering in unlock that provides the 1223 * implicit barrier at the software visible level. 1224 */ 1225 ++share_cache->dev_gen; 1226 DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", 1227 share_cache->dev_gen); 1228 } 1229 rte_rwlock_write_unlock(&share_cache->rwlock); 1230 } 1231 1232 /** 1233 * Dump all the created MRs and the global cache entries. 1234 * 1235 * @param share_cache 1236 * Pointer to a global shared MR cache. 1237 */ 1238 void 1239 mlx5_mr_dump_cache(struct mlx5_mr_share_cache *share_cache __rte_unused) 1240 { 1241 #ifdef RTE_LIBRTE_MLX5_DEBUG 1242 struct mlx5_mr *mr; 1243 int mr_n = 0; 1244 int chunk_n = 0; 1245 1246 rte_rwlock_read_lock(&share_cache->rwlock); 1247 /* Iterate all the existing MRs. */ 1248 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 1249 unsigned int n; 1250 1251 DRV_LOG(DEBUG, "MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u", 1252 mr_n++, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1253 mr->ms_n, mr->ms_bmp_n); 1254 if (mr->ms_n == 0) 1255 continue; 1256 for (n = 0; n < mr->ms_bmp_n; ) { 1257 struct mr_cache_entry ret = { 0, }; 1258 1259 n = mr_find_next_chunk(mr, &ret, n); 1260 if (!ret.end) 1261 break; 1262 DRV_LOG(DEBUG, 1263 " chunk[%u], [0x%" PRIxPTR ", 0x%" PRIxPTR ")", 1264 chunk_n++, ret.start, ret.end); 1265 } 1266 } 1267 DRV_LOG(DEBUG, "Dumping global cache %p", (void *)share_cache); 1268 mlx5_mr_btree_dump(&share_cache->cache); 1269 rte_rwlock_read_unlock(&share_cache->rwlock); 1270 #endif 1271 } 1272 1273 static int 1274 mlx5_range_compare_start(const void *lhs, const void *rhs) 1275 { 1276 const struct mlx5_range *r1 = lhs, *r2 = rhs; 1277 1278 if (r1->start > r2->start) 1279 return 1; 1280 else if (r1->start < r2->start) 1281 return -1; 1282 return 0; 1283 } 1284 1285 static void 1286 mlx5_range_from_mempool_chunk(struct rte_mempool *mp, void *opaque, 1287 struct rte_mempool_memhdr *memhdr, 1288 unsigned int idx) 1289 { 1290 struct mlx5_range *ranges = opaque, *range = &ranges[idx]; 1291 uint64_t page_size = rte_mem_page_size(); 1292 1293 RTE_SET_USED(mp); 1294 range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size); 1295 range->end = RTE_ALIGN_CEIL(range->start + memhdr->len, page_size); 1296 } 1297 1298 /** 1299 * Collect page-aligned memory ranges of the mempool. 1300 */ 1301 static int 1302 mlx5_mempool_get_chunks(struct rte_mempool *mp, struct mlx5_range **out, 1303 unsigned int *out_n) 1304 { 1305 struct mlx5_range *chunks; 1306 unsigned int n; 1307 1308 DRV_LOG(DEBUG, "Collecting chunks of regular mempool %s", mp->name); 1309 n = mp->nb_mem_chunks; 1310 chunks = calloc(sizeof(chunks[0]), n); 1311 if (chunks == NULL) 1312 return -1; 1313 rte_mempool_mem_iter(mp, mlx5_range_from_mempool_chunk, chunks); 1314 *out = chunks; 1315 *out_n = n; 1316 return 0; 1317 } 1318 1319 struct mlx5_mempool_get_extmem_data { 1320 struct mlx5_range *heap; 1321 unsigned int heap_size; 1322 int ret; 1323 }; 1324 1325 static void 1326 mlx5_mempool_get_extmem_cb(struct rte_mempool *mp, void *opaque, 1327 void *obj, unsigned int obj_idx) 1328 { 1329 struct mlx5_mempool_get_extmem_data *data = opaque; 1330 struct rte_mbuf *mbuf = obj; 1331 uintptr_t addr = (uintptr_t)mbuf->buf_addr; 1332 struct mlx5_range *seg, *heap; 1333 struct rte_memseg_list *msl; 1334 size_t page_size; 1335 uintptr_t page_start; 1336 unsigned int pos = 0, len = data->heap_size, delta; 1337 1338 RTE_SET_USED(mp); 1339 RTE_SET_USED(obj_idx); 1340 if (data->ret < 0) 1341 return; 1342 /* Binary search for an already visited page. */ 1343 while (len > 1) { 1344 delta = len / 2; 1345 if (addr < data->heap[pos + delta].start) { 1346 len = delta; 1347 } else { 1348 pos += delta; 1349 len -= delta; 1350 } 1351 } 1352 if (data->heap != NULL) { 1353 seg = &data->heap[pos]; 1354 if (seg->start <= addr && addr < seg->end) 1355 return; 1356 } 1357 /* Determine the page boundaries and remember them. */ 1358 heap = realloc(data->heap, sizeof(heap[0]) * (data->heap_size + 1)); 1359 if (heap == NULL) { 1360 free(data->heap); 1361 data->heap = NULL; 1362 data->ret = -1; 1363 return; 1364 } 1365 data->heap = heap; 1366 data->heap_size++; 1367 seg = &heap[data->heap_size - 1]; 1368 msl = rte_mem_virt2memseg_list((void *)addr); 1369 page_size = msl != NULL ? msl->page_sz : rte_mem_page_size(); 1370 page_start = RTE_PTR_ALIGN_FLOOR(addr, page_size); 1371 seg->start = page_start; 1372 seg->end = page_start + page_size; 1373 /* Maintain the heap order. */ 1374 qsort(data->heap, data->heap_size, sizeof(heap[0]), 1375 mlx5_range_compare_start); 1376 } 1377 1378 /** 1379 * Recover pages of external memory as close as possible 1380 * for a mempool with RTE_PKTMBUF_POOL_PINNED_EXT_BUF. 1381 * Pages are stored in a heap for efficient search, for mbufs are many. 1382 */ 1383 static int 1384 mlx5_mempool_get_extmem(struct rte_mempool *mp, struct mlx5_range **out, 1385 unsigned int *out_n) 1386 { 1387 struct mlx5_mempool_get_extmem_data data; 1388 1389 DRV_LOG(DEBUG, "Recovering external pinned pages of mempool %s", 1390 mp->name); 1391 memset(&data, 0, sizeof(data)); 1392 rte_mempool_obj_iter(mp, mlx5_mempool_get_extmem_cb, &data); 1393 if (data.ret < 0) 1394 return -1; 1395 *out = data.heap; 1396 *out_n = data.heap_size; 1397 return 0; 1398 } 1399 1400 /** 1401 * Get VA-contiguous ranges of the mempool memory. 1402 * Each range start and end is aligned to the system page size. 1403 * 1404 * @param[in] mp 1405 * Analyzed mempool. 1406 * @param[out] out 1407 * Receives the ranges, caller must release it with free(). 1408 * @param[out] ount_n 1409 * Receives the number of @p out elements. 1410 * 1411 * @return 1412 * 0 on success, (-1) on failure. 1413 */ 1414 static int 1415 mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out, 1416 unsigned int *out_n) 1417 { 1418 struct mlx5_range *chunks; 1419 unsigned int chunks_n, contig_n, i; 1420 int ret; 1421 1422 /* Collect the pool underlying memory. */ 1423 ret = mlx5_mempool_is_extmem(mp) ? 1424 mlx5_mempool_get_extmem(mp, &chunks, &chunks_n) : 1425 mlx5_mempool_get_chunks(mp, &chunks, &chunks_n); 1426 if (ret < 0) 1427 return ret; 1428 /* Merge adjacent chunks and place them at the beginning. */ 1429 qsort(chunks, chunks_n, sizeof(chunks[0]), mlx5_range_compare_start); 1430 contig_n = 1; 1431 for (i = 1; i < chunks_n; i++) 1432 if (chunks[i - 1].end != chunks[i].start) { 1433 chunks[contig_n - 1].end = chunks[i - 1].end; 1434 chunks[contig_n] = chunks[i]; 1435 contig_n++; 1436 } 1437 /* Extend the last contiguous chunk to the end of the mempool. */ 1438 chunks[contig_n - 1].end = chunks[i - 1].end; 1439 *out = chunks; 1440 *out_n = contig_n; 1441 return 0; 1442 } 1443 1444 /** 1445 * Analyze mempool memory to select memory ranges to register. 1446 * 1447 * @param[in] mp 1448 * Mempool to analyze. 1449 * @param[out] out 1450 * Receives memory ranges to register, aligned to the system page size. 1451 * The caller must release them with free(). 1452 * @param[out] out_n 1453 * Receives the number of @p out items. 1454 * @param[out] share_hugepage 1455 * Receives True if the entire pool resides within a single hugepage. 1456 * 1457 * @return 1458 * 0 on success, (-1) on failure. 1459 */ 1460 static int 1461 mlx5_mempool_reg_analyze(struct rte_mempool *mp, struct mlx5_range **out, 1462 unsigned int *out_n, bool *share_hugepage) 1463 { 1464 struct mlx5_range *ranges = NULL; 1465 unsigned int i, ranges_n = 0; 1466 struct rte_memseg_list *msl; 1467 1468 if (mlx5_get_mempool_ranges(mp, &ranges, &ranges_n) < 0) { 1469 DRV_LOG(ERR, "Cannot get address ranges for mempool %s", 1470 mp->name); 1471 return -1; 1472 } 1473 /* Check if the hugepage of the pool can be shared. */ 1474 *share_hugepage = false; 1475 msl = rte_mem_virt2memseg_list((void *)ranges[0].start); 1476 if (msl != NULL) { 1477 uint64_t hugepage_sz = 0; 1478 1479 /* Check that all ranges are on pages of the same size. */ 1480 for (i = 0; i < ranges_n; i++) { 1481 if (hugepage_sz != 0 && hugepage_sz != msl->page_sz) 1482 break; 1483 hugepage_sz = msl->page_sz; 1484 } 1485 if (i == ranges_n) { 1486 /* 1487 * If the entire pool is within one hugepage, 1488 * combine all ranges into one of the hugepage size. 1489 */ 1490 uintptr_t reg_start = ranges[0].start; 1491 uintptr_t reg_end = ranges[ranges_n - 1].end; 1492 uintptr_t hugepage_start = 1493 RTE_ALIGN_FLOOR(reg_start, hugepage_sz); 1494 uintptr_t hugepage_end = hugepage_start + hugepage_sz; 1495 if (reg_end < hugepage_end) { 1496 ranges[0].start = hugepage_start; 1497 ranges[0].end = hugepage_end; 1498 ranges_n = 1; 1499 *share_hugepage = true; 1500 } 1501 } 1502 } 1503 *out = ranges; 1504 *out_n = ranges_n; 1505 return 0; 1506 } 1507 1508 /** Create a registration object for the mempool. */ 1509 static struct mlx5_mempool_reg * 1510 mlx5_mempool_reg_create(struct rte_mempool *mp, unsigned int mrs_n) 1511 { 1512 struct mlx5_mempool_reg *mpr = NULL; 1513 1514 mpr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1515 sizeof(*mpr) + mrs_n * sizeof(mpr->mrs[0]), 1516 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1517 if (mpr == NULL) { 1518 DRV_LOG(ERR, "Cannot allocate mempool %s registration object", 1519 mp->name); 1520 return NULL; 1521 } 1522 mpr->mp = mp; 1523 mpr->mrs = (struct mlx5_mempool_mr *)(mpr + 1); 1524 mpr->mrs_n = mrs_n; 1525 return mpr; 1526 } 1527 1528 /** 1529 * Destroy a mempool registration object. 1530 * 1531 * @param standalone 1532 * Whether @p mpr owns its MRs excludively, i.e. they are not shared. 1533 */ 1534 static void 1535 mlx5_mempool_reg_destroy(struct mlx5_mr_share_cache *share_cache, 1536 struct mlx5_mempool_reg *mpr, bool standalone) 1537 { 1538 if (standalone) { 1539 unsigned int i; 1540 1541 for (i = 0; i < mpr->mrs_n; i++) 1542 share_cache->dereg_mr_cb(&mpr->mrs[i].pmd_mr); 1543 } 1544 mlx5_free(mpr); 1545 } 1546 1547 /** Find registration object of a mempool. */ 1548 static struct mlx5_mempool_reg * 1549 mlx5_mempool_reg_lookup(struct mlx5_mr_share_cache *share_cache, 1550 struct rte_mempool *mp) 1551 { 1552 struct mlx5_mempool_reg *mpr; 1553 1554 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1555 if (mpr->mp == mp) 1556 break; 1557 return mpr; 1558 } 1559 1560 /** Increment reference counters of MRs used in the registration. */ 1561 static void 1562 mlx5_mempool_reg_attach(struct mlx5_mempool_reg *mpr) 1563 { 1564 unsigned int i; 1565 1566 for (i = 0; i < mpr->mrs_n; i++) 1567 __atomic_add_fetch(&mpr->mrs[i].refcnt, 1, __ATOMIC_RELAXED); 1568 } 1569 1570 /** 1571 * Decrement reference counters of MRs used in the registration. 1572 * 1573 * @return True if no more references to @p mpr MRs exist, False otherwise. 1574 */ 1575 static bool 1576 mlx5_mempool_reg_detach(struct mlx5_mempool_reg *mpr) 1577 { 1578 unsigned int i; 1579 bool ret = false; 1580 1581 for (i = 0; i < mpr->mrs_n; i++) 1582 ret |= __atomic_sub_fetch(&mpr->mrs[i].refcnt, 1, 1583 __ATOMIC_RELAXED) == 0; 1584 return ret; 1585 } 1586 1587 static int 1588 mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache, 1589 void *pd, struct rte_mempool *mp) 1590 { 1591 struct mlx5_range *ranges = NULL; 1592 struct mlx5_mempool_reg *mpr, *new_mpr; 1593 unsigned int i, ranges_n; 1594 bool share_hugepage; 1595 int ret = -1; 1596 1597 /* Early check to avoid unnecessary creation of MRs. */ 1598 rte_rwlock_read_lock(&share_cache->rwlock); 1599 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1600 rte_rwlock_read_unlock(&share_cache->rwlock); 1601 if (mpr != NULL) { 1602 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1603 mp->name, pd); 1604 rte_errno = EEXIST; 1605 goto exit; 1606 } 1607 if (mlx5_mempool_reg_analyze(mp, &ranges, &ranges_n, 1608 &share_hugepage) < 0) { 1609 DRV_LOG(ERR, "Cannot get mempool %s memory ranges", mp->name); 1610 rte_errno = ENOMEM; 1611 goto exit; 1612 } 1613 new_mpr = mlx5_mempool_reg_create(mp, ranges_n); 1614 if (new_mpr == NULL) { 1615 DRV_LOG(ERR, 1616 "Cannot create a registration object for mempool %s in PD %p", 1617 mp->name, pd); 1618 rte_errno = ENOMEM; 1619 goto exit; 1620 } 1621 /* 1622 * If the entire mempool fits in a single hugepage, the MR for this 1623 * hugepage can be shared across mempools that also fit in it. 1624 */ 1625 if (share_hugepage) { 1626 rte_rwlock_write_lock(&share_cache->rwlock); 1627 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) { 1628 if (mpr->mrs[0].pmd_mr.addr == (void *)ranges[0].start) 1629 break; 1630 } 1631 if (mpr != NULL) { 1632 new_mpr->mrs = mpr->mrs; 1633 mlx5_mempool_reg_attach(new_mpr); 1634 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, 1635 new_mpr, next); 1636 } 1637 rte_rwlock_write_unlock(&share_cache->rwlock); 1638 if (mpr != NULL) { 1639 DRV_LOG(DEBUG, "Shared MR %#x in PD %p for mempool %s with mempool %s", 1640 mpr->mrs[0].pmd_mr.lkey, pd, mp->name, 1641 mpr->mp->name); 1642 ret = 0; 1643 goto exit; 1644 } 1645 } 1646 for (i = 0; i < ranges_n; i++) { 1647 struct mlx5_mempool_mr *mr = &new_mpr->mrs[i]; 1648 const struct mlx5_range *range = &ranges[i]; 1649 size_t len = range->end - range->start; 1650 1651 if (share_cache->reg_mr_cb(pd, (void *)range->start, len, 1652 &mr->pmd_mr) < 0) { 1653 DRV_LOG(ERR, 1654 "Failed to create an MR in PD %p for address range " 1655 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1656 pd, range->start, range->end, len, mp->name); 1657 break; 1658 } 1659 DRV_LOG(DEBUG, 1660 "Created a new MR %#x in PD %p for address range " 1661 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1662 mr->pmd_mr.lkey, pd, range->start, range->end, len, 1663 mp->name); 1664 } 1665 if (i != ranges_n) { 1666 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1667 rte_errno = EINVAL; 1668 goto exit; 1669 } 1670 /* Concurrent registration is not supposed to happen. */ 1671 rte_rwlock_write_lock(&share_cache->rwlock); 1672 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1673 if (mpr == NULL) { 1674 mlx5_mempool_reg_attach(new_mpr); 1675 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, new_mpr, next); 1676 ret = 0; 1677 } 1678 rte_rwlock_write_unlock(&share_cache->rwlock); 1679 if (mpr != NULL) { 1680 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1681 mp->name, pd); 1682 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1683 rte_errno = EEXIST; 1684 goto exit; 1685 } 1686 exit: 1687 free(ranges); 1688 return ret; 1689 } 1690 1691 static int 1692 mlx5_mr_mempool_register_secondary(struct mlx5_common_device *cdev, 1693 struct rte_mempool *mp) 1694 { 1695 return mlx5_mp_req_mempool_reg(cdev, mp, true); 1696 } 1697 1698 /** 1699 * Register the memory of a mempool in the protection domain. 1700 * 1701 * @param cdev 1702 * Pointer to the mlx5 common device. 1703 * @param mp 1704 * Mempool to register. 1705 * 1706 * @return 1707 * 0 on success, (-1) on failure and rte_errno is set. 1708 */ 1709 int 1710 mlx5_mr_mempool_register(struct mlx5_common_device *cdev, 1711 struct rte_mempool *mp) 1712 { 1713 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1714 return 0; 1715 switch (rte_eal_process_type()) { 1716 case RTE_PROC_PRIMARY: 1717 return mlx5_mr_mempool_register_primary(&cdev->mr_scache, 1718 cdev->pd, mp); 1719 case RTE_PROC_SECONDARY: 1720 return mlx5_mr_mempool_register_secondary(cdev, mp); 1721 default: 1722 return -1; 1723 } 1724 } 1725 1726 static int 1727 mlx5_mr_mempool_unregister_primary(struct mlx5_mr_share_cache *share_cache, 1728 struct rte_mempool *mp) 1729 { 1730 struct mlx5_mempool_reg *mpr; 1731 bool standalone = false; 1732 1733 rte_rwlock_write_lock(&share_cache->rwlock); 1734 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1735 if (mpr->mp == mp) { 1736 LIST_REMOVE(mpr, next); 1737 standalone = mlx5_mempool_reg_detach(mpr); 1738 if (standalone) 1739 /* 1740 * The unlock operation below provides a memory 1741 * barrier due to its store-release semantics. 1742 */ 1743 ++share_cache->dev_gen; 1744 break; 1745 } 1746 rte_rwlock_write_unlock(&share_cache->rwlock); 1747 if (mpr == NULL) { 1748 rte_errno = ENOENT; 1749 return -1; 1750 } 1751 mlx5_mempool_reg_destroy(share_cache, mpr, standalone); 1752 return 0; 1753 } 1754 1755 static int 1756 mlx5_mr_mempool_unregister_secondary(struct mlx5_common_device *cdev, 1757 struct rte_mempool *mp) 1758 { 1759 return mlx5_mp_req_mempool_reg(cdev, mp, false); 1760 } 1761 1762 /** 1763 * Unregister the memory of a mempool from the protection domain. 1764 * 1765 * @param cdev 1766 * Pointer to the mlx5 common device. 1767 * @param mp 1768 * Mempool to unregister. 1769 * 1770 * @return 1771 * 0 on success, (-1) on failure and rte_errno is set. 1772 */ 1773 int 1774 mlx5_mr_mempool_unregister(struct mlx5_common_device *cdev, 1775 struct rte_mempool *mp) 1776 { 1777 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1778 return 0; 1779 switch (rte_eal_process_type()) { 1780 case RTE_PROC_PRIMARY: 1781 return mlx5_mr_mempool_unregister_primary(&cdev->mr_scache, mp); 1782 case RTE_PROC_SECONDARY: 1783 return mlx5_mr_mempool_unregister_secondary(cdev, mp); 1784 default: 1785 return -1; 1786 } 1787 } 1788 1789 /** 1790 * Lookup a MR key by and address in a registered mempool. 1791 * 1792 * @param mpr 1793 * Mempool registration object. 1794 * @param addr 1795 * Address within the mempool. 1796 * @param entry 1797 * Bottom-half cache entry to fill. 1798 * 1799 * @return 1800 * MR key or UINT32_MAX on failure, which can only happen 1801 * if the address is not from within the mempool. 1802 */ 1803 static uint32_t 1804 mlx5_mempool_reg_addr2mr(struct mlx5_mempool_reg *mpr, uintptr_t addr, 1805 struct mr_cache_entry *entry) 1806 { 1807 uint32_t lkey = UINT32_MAX; 1808 unsigned int i; 1809 1810 for (i = 0; i < mpr->mrs_n; i++) { 1811 const struct mlx5_pmd_mr *mr = &mpr->mrs[i].pmd_mr; 1812 uintptr_t mr_addr = (uintptr_t)mr->addr; 1813 1814 if (mr_addr <= addr) { 1815 lkey = rte_cpu_to_be_32(mr->lkey); 1816 entry->start = mr_addr; 1817 entry->end = mr_addr + mr->len; 1818 entry->lkey = lkey; 1819 break; 1820 } 1821 } 1822 return lkey; 1823 } 1824 1825 /** 1826 * Update bottom-half cache from the list of mempool registrations. 1827 * 1828 * @param mr_ctrl 1829 * Per-queue MR control handle. 1830 * @param entry 1831 * Pointer to an entry in the bottom-half cache to update 1832 * with the MR lkey looked up. 1833 * @param mp 1834 * Mempool containing the address. 1835 * @param addr 1836 * Address to lookup. 1837 * @return 1838 * MR lkey on success, UINT32_MAX on failure. 1839 */ 1840 static uint32_t 1841 mlx5_lookup_mempool_regs(struct mlx5_mr_ctrl *mr_ctrl, 1842 struct mr_cache_entry *entry, 1843 struct rte_mempool *mp, uintptr_t addr) 1844 { 1845 struct mlx5_mr_share_cache *share_cache = 1846 container_of(mr_ctrl->dev_gen_ptr, struct mlx5_mr_share_cache, 1847 dev_gen); 1848 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 1849 struct mlx5_mempool_reg *mpr; 1850 uint32_t lkey = UINT32_MAX; 1851 1852 /* If local cache table is full, try to double it. */ 1853 if (unlikely(bt->len == bt->size)) 1854 mr_btree_expand(bt, bt->size << 1); 1855 /* Look up in mempool registrations. */ 1856 rte_rwlock_read_lock(&share_cache->rwlock); 1857 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1858 if (mpr != NULL) 1859 lkey = mlx5_mempool_reg_addr2mr(mpr, addr, entry); 1860 rte_rwlock_read_unlock(&share_cache->rwlock); 1861 /* 1862 * Update local cache. Even if it fails, return the found entry 1863 * to update top-half cache. Next time, this entry will be found 1864 * in the global cache. 1865 */ 1866 if (lkey != UINT32_MAX) 1867 mr_btree_insert(bt, entry); 1868 return lkey; 1869 } 1870 1871 /** 1872 * Bottom-half lookup for the address from the mempool. 1873 * 1874 * @param mr_ctrl 1875 * Per-queue MR control handle. 1876 * @param mp 1877 * Mempool containing the address. 1878 * @param addr 1879 * Address to lookup. 1880 * @return 1881 * MR lkey on success, UINT32_MAX on failure. 1882 */ 1883 uint32_t 1884 mlx5_mr_mempool2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, 1885 struct rte_mempool *mp, uintptr_t addr) 1886 { 1887 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 1888 uint32_t lkey; 1889 uint16_t bh_idx = 0; 1890 1891 /* Binary-search MR translation table. */ 1892 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 1893 /* Update top-half cache. */ 1894 if (likely(lkey != UINT32_MAX)) { 1895 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 1896 } else { 1897 lkey = mlx5_lookup_mempool_regs(mr_ctrl, repl, mp, addr); 1898 /* Can only fail if the address is not from the mempool. */ 1899 if (unlikely(lkey == UINT32_MAX)) 1900 return UINT32_MAX; 1901 } 1902 /* Update the most recently used entry. */ 1903 mr_ctrl->mru = mr_ctrl->head; 1904 /* Point to the next victim, the oldest. */ 1905 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1906 return lkey; 1907 } 1908 1909 uint32_t 1910 mlx5_mr_mb2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb) 1911 { 1912 uint32_t lkey; 1913 uintptr_t addr = (uintptr_t)mb->buf_addr; 1914 struct mlx5_mr_share_cache *share_cache = 1915 container_of(mr_ctrl->dev_gen_ptr, struct mlx5_mr_share_cache, 1916 dev_gen); 1917 struct mlx5_common_device *cdev = 1918 container_of(share_cache, struct mlx5_common_device, mr_scache); 1919 1920 if (cdev->config.mr_mempool_reg_en) { 1921 struct rte_mempool *mp = NULL; 1922 struct mlx5_mprq_buf *buf; 1923 1924 if (!RTE_MBUF_HAS_EXTBUF(mb)) { 1925 mp = mlx5_mb2mp(mb); 1926 } else if (mb->shinfo->free_cb == mlx5_mprq_buf_free_cb) { 1927 /* Recover MPRQ mempool. */ 1928 buf = mb->shinfo->fcb_opaque; 1929 mp = buf->mp; 1930 } 1931 if (mp != NULL) { 1932 lkey = mlx5_mr_mempool2mr_bh(mr_ctrl, mp, addr); 1933 /* 1934 * Lookup can only fail on invalid input, e.g. "addr" 1935 * is not from "mp" or "mp" has MEMPOOL_F_NON_IO set. 1936 */ 1937 if (lkey != UINT32_MAX) 1938 return lkey; 1939 } 1940 /* Fallback for generic mechanism in corner cases. */ 1941 } 1942 return mlx5_mr_addr2mr_bh(mr_ctrl, addr); 1943 } 1944