1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2020 Mellanox Technologies, Ltd 4 */ 5 #include <stddef.h> 6 7 #include <rte_eal_memconfig.h> 8 #include <rte_eal_paging.h> 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_malloc.h> 12 #include <rte_rwlock.h> 13 14 #include "mlx5_glue.h" 15 #include "mlx5_common_mp.h" 16 #include "mlx5_common_mr.h" 17 #include "mlx5_common_log.h" 18 #include "mlx5_malloc.h" 19 20 struct mr_find_contig_memsegs_data { 21 uintptr_t addr; 22 uintptr_t start; 23 uintptr_t end; 24 const struct rte_memseg_list *msl; 25 }; 26 27 /* Virtual memory range. */ 28 struct mlx5_range { 29 uintptr_t start; 30 uintptr_t end; 31 }; 32 33 /** Memory region for a mempool. */ 34 struct mlx5_mempool_mr { 35 struct mlx5_pmd_mr pmd_mr; 36 uint32_t refcnt; /**< Number of mempools sharing this MR. */ 37 }; 38 39 /* Mempool registration. */ 40 struct mlx5_mempool_reg { 41 LIST_ENTRY(mlx5_mempool_reg) next; 42 /** Registered mempool, used to designate registrations. */ 43 struct rte_mempool *mp; 44 /** Memory regions for the address ranges of the mempool. */ 45 struct mlx5_mempool_mr *mrs; 46 /** Number of memory regions. */ 47 unsigned int mrs_n; 48 }; 49 50 /** 51 * Expand B-tree table to a given size. Can't be called with holding 52 * memory_hotplug_lock or share_cache.rwlock due to rte_realloc(). 53 * 54 * @param bt 55 * Pointer to B-tree structure. 56 * @param n 57 * Number of entries for expansion. 58 * 59 * @return 60 * 0 on success, -1 on failure. 61 */ 62 static int 63 mr_btree_expand(struct mlx5_mr_btree *bt, int n) 64 { 65 void *mem; 66 int ret = 0; 67 68 if (n <= bt->size) 69 return ret; 70 /* 71 * Downside of directly using rte_realloc() is that SOCKET_ID_ANY is 72 * used inside if there's no room to expand. Because this is a quite 73 * rare case and a part of very slow path, it is very acceptable. 74 * Initially cache_bh[] will be given practically enough space and once 75 * it is expanded, expansion wouldn't be needed again ever. 76 */ 77 mem = mlx5_realloc(bt->table, MLX5_MEM_RTE | MLX5_MEM_ZERO, 78 n * sizeof(struct mr_cache_entry), 0, SOCKET_ID_ANY); 79 if (mem == NULL) { 80 /* Not an error, B-tree search will be skipped. */ 81 DRV_LOG(WARNING, "failed to expand MR B-tree (%p) table", 82 (void *)bt); 83 ret = -1; 84 } else { 85 DRV_LOG(DEBUG, "expanded MR B-tree table (size=%u)", n); 86 bt->table = mem; 87 bt->size = n; 88 } 89 return ret; 90 } 91 92 /** 93 * Look up LKey from given B-tree lookup table, store the last index and return 94 * searched LKey. 95 * 96 * @param bt 97 * Pointer to B-tree structure. 98 * @param[out] idx 99 * Pointer to index. Even on search failure, returns index where it stops 100 * searching so that index can be used when inserting a new entry. 101 * @param addr 102 * Search key. 103 * 104 * @return 105 * Searched LKey on success, UINT32_MAX on no match. 106 */ 107 static uint32_t 108 mr_btree_lookup(struct mlx5_mr_btree *bt, uint16_t *idx, uintptr_t addr) 109 { 110 struct mr_cache_entry *lkp_tbl; 111 uint16_t n; 112 uint16_t base = 0; 113 114 MLX5_ASSERT(bt != NULL); 115 lkp_tbl = *bt->table; 116 n = bt->len; 117 /* First entry must be NULL for comparison. */ 118 MLX5_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 && 119 lkp_tbl[0].lkey == UINT32_MAX)); 120 /* Binary search. */ 121 do { 122 register uint16_t delta = n >> 1; 123 124 if (addr < lkp_tbl[base + delta].start) { 125 n = delta; 126 } else { 127 base += delta; 128 n -= delta; 129 } 130 } while (n > 1); 131 MLX5_ASSERT(addr >= lkp_tbl[base].start); 132 *idx = base; 133 if (addr < lkp_tbl[base].end) 134 return lkp_tbl[base].lkey; 135 /* Not found. */ 136 return UINT32_MAX; 137 } 138 139 /** 140 * Insert an entry to B-tree lookup table. 141 * 142 * @param bt 143 * Pointer to B-tree structure. 144 * @param entry 145 * Pointer to new entry to insert. 146 * 147 * @return 148 * 0 on success, -1 on failure. 149 */ 150 static int 151 mr_btree_insert(struct mlx5_mr_btree *bt, struct mr_cache_entry *entry) 152 { 153 struct mr_cache_entry *lkp_tbl; 154 uint16_t idx = 0; 155 size_t shift; 156 157 MLX5_ASSERT(bt != NULL); 158 MLX5_ASSERT(bt->len <= bt->size); 159 MLX5_ASSERT(bt->len > 0); 160 lkp_tbl = *bt->table; 161 /* Find out the slot for insertion. */ 162 if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) { 163 DRV_LOG(DEBUG, 164 "abort insertion to B-tree(%p): already exist at" 165 " idx=%u [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 166 (void *)bt, idx, entry->start, entry->end, entry->lkey); 167 /* Already exist, return. */ 168 return 0; 169 } 170 /* If table is full, return error. */ 171 if (unlikely(bt->len == bt->size)) { 172 bt->overflow = 1; 173 return -1; 174 } 175 /* Insert entry. */ 176 ++idx; 177 shift = (bt->len - idx) * sizeof(struct mr_cache_entry); 178 if (shift) 179 memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift); 180 lkp_tbl[idx] = *entry; 181 bt->len++; 182 DRV_LOG(DEBUG, 183 "inserted B-tree(%p)[%u]," 184 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 185 (void *)bt, idx, entry->start, entry->end, entry->lkey); 186 return 0; 187 } 188 189 /** 190 * Initialize B-tree and allocate memory for lookup table. 191 * 192 * @param bt 193 * Pointer to B-tree structure. 194 * @param n 195 * Number of entries to allocate. 196 * @param socket 197 * NUMA socket on which memory must be allocated. 198 * 199 * @return 200 * 0 on success, a negative errno value otherwise and rte_errno is set. 201 */ 202 static int 203 mlx5_mr_btree_init(struct mlx5_mr_btree *bt, int n, int socket) 204 { 205 if (bt == NULL) { 206 rte_errno = EINVAL; 207 return -rte_errno; 208 } 209 MLX5_ASSERT(!bt->table && !bt->size); 210 memset(bt, 0, sizeof(*bt)); 211 bt->table = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 212 sizeof(struct mr_cache_entry) * n, 213 0, socket); 214 if (bt->table == NULL) { 215 rte_errno = ENOMEM; 216 DRV_LOG(DEBUG, 217 "failed to allocate memory for btree cache on socket " 218 "%d", socket); 219 return -rte_errno; 220 } 221 bt->size = n; 222 /* First entry must be NULL for binary search. */ 223 (*bt->table)[bt->len++] = (struct mr_cache_entry) { 224 .lkey = UINT32_MAX, 225 }; 226 DRV_LOG(DEBUG, "initialized B-tree %p with table %p", 227 (void *)bt, (void *)bt->table); 228 return 0; 229 } 230 231 /** 232 * Free B-tree resources. 233 * 234 * @param bt 235 * Pointer to B-tree structure. 236 */ 237 void 238 mlx5_mr_btree_free(struct mlx5_mr_btree *bt) 239 { 240 if (bt == NULL) 241 return; 242 DRV_LOG(DEBUG, "freeing B-tree %p with table %p", 243 (void *)bt, (void *)bt->table); 244 mlx5_free(bt->table); 245 memset(bt, 0, sizeof(*bt)); 246 } 247 248 /** 249 * Dump all the entries in a B-tree 250 * 251 * @param bt 252 * Pointer to B-tree structure. 253 */ 254 void 255 mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused) 256 { 257 #ifdef RTE_LIBRTE_MLX5_DEBUG 258 int idx; 259 struct mr_cache_entry *lkp_tbl; 260 261 if (bt == NULL) 262 return; 263 lkp_tbl = *bt->table; 264 for (idx = 0; idx < bt->len; ++idx) { 265 struct mr_cache_entry *entry = &lkp_tbl[idx]; 266 267 DRV_LOG(DEBUG, "B-tree(%p)[%u]," 268 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 269 (void *)bt, idx, entry->start, entry->end, entry->lkey); 270 } 271 #endif 272 } 273 274 /** 275 * Initialize per-queue MR control descriptor. 276 * 277 * @param mr_ctrl 278 * Pointer to MR control structure. 279 * @param dev_gen_ptr 280 * Pointer to generation number of global cache. 281 * @param socket 282 * NUMA socket on which memory must be allocated. 283 * 284 * @return 285 * 0 on success, a negative errno value otherwise and rte_errno is set. 286 */ 287 int 288 mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr, 289 int socket) 290 { 291 if (mr_ctrl == NULL) { 292 rte_errno = EINVAL; 293 return -rte_errno; 294 } 295 /* Save pointer of global generation number to check memory event. */ 296 mr_ctrl->dev_gen_ptr = dev_gen_ptr; 297 /* Initialize B-tree and allocate memory for bottom-half cache table. */ 298 return mlx5_mr_btree_init(&mr_ctrl->cache_bh, MLX5_MR_BTREE_CACHE_N, 299 socket); 300 } 301 302 /** 303 * Find virtually contiguous memory chunk in a given MR. 304 * 305 * @param dev 306 * Pointer to MR structure. 307 * @param[out] entry 308 * Pointer to returning MR cache entry. If not found, this will not be 309 * updated. 310 * @param start_idx 311 * Start index of the memseg bitmap. 312 * 313 * @return 314 * Next index to go on lookup. 315 */ 316 static int 317 mr_find_next_chunk(struct mlx5_mr *mr, struct mr_cache_entry *entry, 318 int base_idx) 319 { 320 uintptr_t start = 0; 321 uintptr_t end = 0; 322 uint32_t idx = 0; 323 324 /* MR for external memory doesn't have memseg list. */ 325 if (mr->msl == NULL) { 326 MLX5_ASSERT(mr->ms_bmp_n == 1); 327 MLX5_ASSERT(mr->ms_n == 1); 328 MLX5_ASSERT(base_idx == 0); 329 /* 330 * Can't search it from memseg list but get it directly from 331 * pmd_mr as there's only one chunk. 332 */ 333 entry->start = (uintptr_t)mr->pmd_mr.addr; 334 entry->end = (uintptr_t)mr->pmd_mr.addr + mr->pmd_mr.len; 335 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 336 /* Returning 1 ends iteration. */ 337 return 1; 338 } 339 for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) { 340 if (rte_bitmap_get(mr->ms_bmp, idx)) { 341 const struct rte_memseg_list *msl; 342 const struct rte_memseg *ms; 343 344 msl = mr->msl; 345 ms = rte_fbarray_get(&msl->memseg_arr, 346 mr->ms_base_idx + idx); 347 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 348 if (!start) 349 start = ms->addr_64; 350 end = ms->addr_64 + ms->hugepage_sz; 351 } else if (start) { 352 /* Passed the end of a fragment. */ 353 break; 354 } 355 } 356 if (start) { 357 /* Found one chunk. */ 358 entry->start = start; 359 entry->end = end; 360 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 361 } 362 return idx; 363 } 364 365 /** 366 * Insert a MR to the global B-tree cache. It may fail due to low-on-memory. 367 * Then, this entry will have to be searched by mr_lookup_list() in 368 * mlx5_mr_create() on miss. 369 * 370 * @param share_cache 371 * Pointer to a global shared MR cache. 372 * @param mr 373 * Pointer to MR to insert. 374 * 375 * @return 376 * 0 on success, -1 on failure. 377 */ 378 int 379 mlx5_mr_insert_cache(struct mlx5_mr_share_cache *share_cache, 380 struct mlx5_mr *mr) 381 { 382 unsigned int n; 383 384 DRV_LOG(DEBUG, "Inserting MR(%p) to global cache(%p)", 385 (void *)mr, (void *)share_cache); 386 for (n = 0; n < mr->ms_bmp_n; ) { 387 struct mr_cache_entry entry; 388 389 memset(&entry, 0, sizeof(entry)); 390 /* Find a contiguous chunk and advance the index. */ 391 n = mr_find_next_chunk(mr, &entry, n); 392 if (!entry.end) 393 break; 394 if (mr_btree_insert(&share_cache->cache, &entry) < 0) { 395 /* 396 * Overflowed, but the global table cannot be expanded 397 * because of deadlock. 398 */ 399 return -1; 400 } 401 } 402 return 0; 403 } 404 405 /** 406 * Look up address in the original global MR list. 407 * 408 * @param share_cache 409 * Pointer to a global shared MR cache. 410 * @param[out] entry 411 * Pointer to returning MR cache entry. If no match, this will not be updated. 412 * @param addr 413 * Search key. 414 * 415 * @return 416 * Found MR on match, NULL otherwise. 417 */ 418 struct mlx5_mr * 419 mlx5_mr_lookup_list(struct mlx5_mr_share_cache *share_cache, 420 struct mr_cache_entry *entry, uintptr_t addr) 421 { 422 struct mlx5_mr *mr; 423 424 /* Iterate all the existing MRs. */ 425 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 426 unsigned int n; 427 428 if (mr->ms_n == 0) 429 continue; 430 for (n = 0; n < mr->ms_bmp_n; ) { 431 struct mr_cache_entry ret; 432 433 memset(&ret, 0, sizeof(ret)); 434 n = mr_find_next_chunk(mr, &ret, n); 435 if (addr >= ret.start && addr < ret.end) { 436 /* Found. */ 437 *entry = ret; 438 return mr; 439 } 440 } 441 } 442 return NULL; 443 } 444 445 /** 446 * Look up address on global MR cache. 447 * 448 * @param share_cache 449 * Pointer to a global shared MR cache. 450 * @param[out] entry 451 * Pointer to returning MR cache entry. If no match, this will not be updated. 452 * @param addr 453 * Search key. 454 * 455 * @return 456 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 457 */ 458 uint32_t 459 mlx5_mr_lookup_cache(struct mlx5_mr_share_cache *share_cache, 460 struct mr_cache_entry *entry, uintptr_t addr) 461 { 462 uint16_t idx; 463 uint32_t lkey = UINT32_MAX; 464 struct mlx5_mr *mr; 465 466 /* 467 * If the global cache has overflowed since it failed to expand the 468 * B-tree table, it can't have all the existing MRs. Then, the address 469 * has to be searched by traversing the original MR list instead, which 470 * is very slow path. Otherwise, the global cache is all inclusive. 471 */ 472 if (!unlikely(share_cache->cache.overflow)) { 473 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 474 if (lkey != UINT32_MAX) 475 *entry = (*share_cache->cache.table)[idx]; 476 } else { 477 /* Falling back to the slowest path. */ 478 mr = mlx5_mr_lookup_list(share_cache, entry, addr); 479 if (mr != NULL) 480 lkey = entry->lkey; 481 } 482 MLX5_ASSERT(lkey == UINT32_MAX || (addr >= entry->start && 483 addr < entry->end)); 484 return lkey; 485 } 486 487 /** 488 * Free MR resources. MR lock must not be held to avoid a deadlock. rte_free() 489 * can raise memory free event and the callback function will spin on the lock. 490 * 491 * @param mr 492 * Pointer to MR to free. 493 */ 494 void 495 mlx5_mr_free(struct mlx5_mr *mr, mlx5_dereg_mr_t dereg_mr_cb) 496 { 497 if (mr == NULL) 498 return; 499 DRV_LOG(DEBUG, "freeing MR(%p):", (void *)mr); 500 dereg_mr_cb(&mr->pmd_mr); 501 if (mr->ms_bmp != NULL) 502 rte_bitmap_free(mr->ms_bmp); 503 mlx5_free(mr); 504 } 505 506 void 507 mlx5_mr_rebuild_cache(struct mlx5_mr_share_cache *share_cache) 508 { 509 struct mlx5_mr *mr; 510 511 DRV_LOG(DEBUG, "Rebuild dev cache[] %p", (void *)share_cache); 512 /* Flush cache to rebuild. */ 513 share_cache->cache.len = 1; 514 share_cache->cache.overflow = 0; 515 /* Iterate all the existing MRs. */ 516 LIST_FOREACH(mr, &share_cache->mr_list, mr) 517 if (mlx5_mr_insert_cache(share_cache, mr) < 0) 518 return; 519 } 520 521 /** 522 * Release resources of detached MR having no online entry. 523 * 524 * @param share_cache 525 * Pointer to a global shared MR cache. 526 */ 527 static void 528 mlx5_mr_garbage_collect(struct mlx5_mr_share_cache *share_cache) 529 { 530 struct mlx5_mr *mr_next; 531 struct mlx5_mr_list free_list = LIST_HEAD_INITIALIZER(free_list); 532 533 /* Must be called from the primary process. */ 534 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 535 /* 536 * MR can't be freed with holding the lock because rte_free() could call 537 * memory free callback function. This will be a deadlock situation. 538 */ 539 rte_rwlock_write_lock(&share_cache->rwlock); 540 /* Detach the whole free list and release it after unlocking. */ 541 free_list = share_cache->mr_free_list; 542 LIST_INIT(&share_cache->mr_free_list); 543 rte_rwlock_write_unlock(&share_cache->rwlock); 544 /* Release resources. */ 545 mr_next = LIST_FIRST(&free_list); 546 while (mr_next != NULL) { 547 struct mlx5_mr *mr = mr_next; 548 549 mr_next = LIST_NEXT(mr, mr); 550 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 551 } 552 } 553 554 /* Called during rte_memseg_contig_walk() by mlx5_mr_create(). */ 555 static int 556 mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, 557 const struct rte_memseg *ms, size_t len, void *arg) 558 { 559 struct mr_find_contig_memsegs_data *data = arg; 560 561 if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len) 562 return 0; 563 /* Found, save it and stop walking. */ 564 data->start = ms->addr_64; 565 data->end = ms->addr_64 + len; 566 data->msl = msl; 567 return 1; 568 } 569 570 /** 571 * Create a new global Memory Region (MR) for a missing virtual address. 572 * This API should be called on a secondary process, then a request is sent to 573 * the primary process in order to create a MR for the address. As the global MR 574 * list is on the shared memory, following LKey lookup should succeed unless the 575 * request fails. 576 * 577 * @param pd 578 * Pointer to pd of a device (net, regex, vdpa,...). 579 * @param mp_id 580 * Multi-process identifier, may be NULL for the primary process. 581 * @param share_cache 582 * Pointer to a global shared MR cache. 583 * @param[out] entry 584 * Pointer to returning MR cache entry, found in the global cache or newly 585 * created. If failed to create one, this will not be updated. 586 * @param addr 587 * Target virtual address to register. 588 * @param mr_ext_memseg_en 589 * Configurable flag about external memory segment enable or not. 590 * 591 * @return 592 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 593 */ 594 static uint32_t 595 mlx5_mr_create_secondary(void *pd __rte_unused, 596 struct mlx5_mp_id *mp_id, 597 struct mlx5_mr_share_cache *share_cache, 598 struct mr_cache_entry *entry, uintptr_t addr, 599 unsigned int mr_ext_memseg_en __rte_unused) 600 { 601 int ret; 602 603 DRV_LOG(DEBUG, "port %u requesting MR creation for address (%p)", 604 mp_id->port_id, (void *)addr); 605 ret = mlx5_mp_req_mr_create(mp_id, addr); 606 if (ret) { 607 DRV_LOG(DEBUG, "Fail to request MR creation for address (%p)", 608 (void *)addr); 609 return UINT32_MAX; 610 } 611 rte_rwlock_read_lock(&share_cache->rwlock); 612 /* Fill in output data. */ 613 mlx5_mr_lookup_cache(share_cache, entry, addr); 614 /* Lookup can't fail. */ 615 MLX5_ASSERT(entry->lkey != UINT32_MAX); 616 rte_rwlock_read_unlock(&share_cache->rwlock); 617 DRV_LOG(DEBUG, "MR CREATED by primary process for %p:\n" 618 " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", 619 (void *)addr, entry->start, entry->end, entry->lkey); 620 return entry->lkey; 621 } 622 623 /** 624 * Create a new global Memory Region (MR) for a missing virtual address. 625 * Register entire virtually contiguous memory chunk around the address. 626 * 627 * @param pd 628 * Pointer to pd of a device (net, regex, vdpa,...). 629 * @param share_cache 630 * Pointer to a global shared MR cache. 631 * @param[out] entry 632 * Pointer to returning MR cache entry, found in the global cache or newly 633 * created. If failed to create one, this will not be updated. 634 * @param addr 635 * Target virtual address to register. 636 * @param mr_ext_memseg_en 637 * Configurable flag about external memory segment enable or not. 638 * 639 * @return 640 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 641 */ 642 uint32_t 643 mlx5_mr_create_primary(void *pd, 644 struct mlx5_mr_share_cache *share_cache, 645 struct mr_cache_entry *entry, uintptr_t addr, 646 unsigned int mr_ext_memseg_en) 647 { 648 struct mr_find_contig_memsegs_data data = {.addr = addr, }; 649 struct mr_find_contig_memsegs_data data_re; 650 const struct rte_memseg_list *msl; 651 const struct rte_memseg *ms; 652 struct mlx5_mr *mr = NULL; 653 int ms_idx_shift = -1; 654 uint32_t bmp_size; 655 void *bmp_mem; 656 uint32_t ms_n; 657 uint32_t n; 658 size_t len; 659 660 DRV_LOG(DEBUG, "Creating a MR using address (%p)", (void *)addr); 661 /* 662 * Release detached MRs if any. This can't be called with holding either 663 * memory_hotplug_lock or share_cache->rwlock. MRs on the free list have 664 * been detached by the memory free event but it couldn't be released 665 * inside the callback due to deadlock. As a result, releasing resources 666 * is quite opportunistic. 667 */ 668 mlx5_mr_garbage_collect(share_cache); 669 /* 670 * If enabled, find out a contiguous virtual address chunk in use, to 671 * which the given address belongs, in order to register maximum range. 672 * In the best case where mempools are not dynamically recreated and 673 * '--socket-mem' is specified as an EAL option, it is very likely to 674 * have only one MR(LKey) per a socket and per a hugepage-size even 675 * though the system memory is highly fragmented. As the whole memory 676 * chunk will be pinned by kernel, it can't be reused unless entire 677 * chunk is freed from EAL. 678 * 679 * If disabled, just register one memseg (page). Then, memory 680 * consumption will be minimized but it may drop performance if there 681 * are many MRs to lookup on the datapath. 682 */ 683 if (!mr_ext_memseg_en) { 684 data.msl = rte_mem_virt2memseg_list((void *)addr); 685 data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); 686 data.end = data.start + data.msl->page_sz; 687 } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { 688 DRV_LOG(WARNING, 689 "Unable to find virtually contiguous" 690 " chunk for address (%p)." 691 " rte_memseg_contig_walk() failed.", (void *)addr); 692 rte_errno = ENXIO; 693 goto err_nolock; 694 } 695 alloc_resources: 696 /* Addresses must be page-aligned. */ 697 MLX5_ASSERT(data.msl); 698 MLX5_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz)); 699 MLX5_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz)); 700 msl = data.msl; 701 ms = rte_mem_virt2memseg((void *)data.start, msl); 702 len = data.end - data.start; 703 MLX5_ASSERT(ms); 704 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 705 /* Number of memsegs in the range. */ 706 ms_n = len / msl->page_sz; 707 DRV_LOG(DEBUG, "Extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 708 " page_sz=0x%" PRIx64 ", ms_n=%u", 709 (void *)addr, data.start, data.end, msl->page_sz, ms_n); 710 /* Size of memory for bitmap. */ 711 bmp_size = rte_bitmap_get_memory_footprint(ms_n); 712 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 713 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE) + 714 bmp_size, RTE_CACHE_LINE_SIZE, msl->socket_id); 715 if (mr == NULL) { 716 DRV_LOG(DEBUG, "Unable to allocate memory for a new MR of" 717 " address (%p).", (void *)addr); 718 rte_errno = ENOMEM; 719 goto err_nolock; 720 } 721 mr->msl = msl; 722 /* 723 * Save the index of the first memseg and initialize memseg bitmap. To 724 * see if a memseg of ms_idx in the memseg-list is still valid, check: 725 * rte_bitmap_get(mr->bmp, ms_idx - mr->ms_base_idx) 726 */ 727 mr->ms_base_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 728 bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE); 729 mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size); 730 if (mr->ms_bmp == NULL) { 731 DRV_LOG(DEBUG, "Unable to initialize bitmap for a new MR of" 732 " address (%p).", (void *)addr); 733 rte_errno = EINVAL; 734 goto err_nolock; 735 } 736 /* 737 * Should recheck whether the extended contiguous chunk is still valid. 738 * Because memory_hotplug_lock can't be held if there's any memory 739 * related calls in a critical path, resource allocation above can't be 740 * locked. If the memory has been changed at this point, try again with 741 * just single page. If not, go on with the big chunk atomically from 742 * here. 743 */ 744 rte_mcfg_mem_read_lock(); 745 data_re = data; 746 if (len > msl->page_sz && 747 !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) { 748 DRV_LOG(DEBUG, 749 "Unable to find virtually contiguous chunk for address " 750 "(%p). rte_memseg_contig_walk() failed.", (void *)addr); 751 rte_errno = ENXIO; 752 goto err_memlock; 753 } 754 if (data.start != data_re.start || data.end != data_re.end) { 755 /* 756 * The extended contiguous chunk has been changed. Try again 757 * with single memseg instead. 758 */ 759 data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz); 760 data.end = data.start + msl->page_sz; 761 rte_mcfg_mem_read_unlock(); 762 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 763 goto alloc_resources; 764 } 765 MLX5_ASSERT(data.msl == data_re.msl); 766 rte_rwlock_write_lock(&share_cache->rwlock); 767 /* 768 * Check the address is really missing. If other thread already created 769 * one or it is not found due to overflow, abort and return. 770 */ 771 if (mlx5_mr_lookup_cache(share_cache, entry, addr) != UINT32_MAX) { 772 /* 773 * Insert to the global cache table. It may fail due to 774 * low-on-memory. Then, this entry will have to be searched 775 * here again. 776 */ 777 mr_btree_insert(&share_cache->cache, entry); 778 DRV_LOG(DEBUG, "Found MR for %p on final lookup, abort", 779 (void *)addr); 780 rte_rwlock_write_unlock(&share_cache->rwlock); 781 rte_mcfg_mem_read_unlock(); 782 /* 783 * Must be unlocked before calling rte_free() because 784 * mlx5_mr_mem_event_free_cb() can be called inside. 785 */ 786 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 787 return entry->lkey; 788 } 789 /* 790 * Trim start and end addresses for verbs MR. Set bits for registering 791 * memsegs but exclude already registered ones. Bitmap can be 792 * fragmented. 793 */ 794 for (n = 0; n < ms_n; ++n) { 795 uintptr_t start; 796 struct mr_cache_entry ret; 797 798 memset(&ret, 0, sizeof(ret)); 799 start = data_re.start + n * msl->page_sz; 800 /* Exclude memsegs already registered by other MRs. */ 801 if (mlx5_mr_lookup_cache(share_cache, &ret, start) == 802 UINT32_MAX) { 803 /* 804 * Start from the first unregistered memseg in the 805 * extended range. 806 */ 807 if (ms_idx_shift == -1) { 808 mr->ms_base_idx += n; 809 data.start = start; 810 ms_idx_shift = n; 811 } 812 data.end = start + msl->page_sz; 813 rte_bitmap_set(mr->ms_bmp, n - ms_idx_shift); 814 ++mr->ms_n; 815 } 816 } 817 len = data.end - data.start; 818 mr->ms_bmp_n = len / msl->page_sz; 819 MLX5_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n); 820 /* 821 * Finally create an MR for the memory chunk. Verbs: ibv_reg_mr() can 822 * be called with holding the memory lock because it doesn't use 823 * mlx5_alloc_buf_extern() which eventually calls rte_malloc_socket() 824 * through mlx5_alloc_verbs_buf(). 825 */ 826 share_cache->reg_mr_cb(pd, (void *)data.start, len, &mr->pmd_mr); 827 if (mr->pmd_mr.obj == NULL) { 828 DRV_LOG(DEBUG, "Fail to create an MR for address (%p)", 829 (void *)addr); 830 rte_errno = EINVAL; 831 goto err_mrlock; 832 } 833 MLX5_ASSERT((uintptr_t)mr->pmd_mr.addr == data.start); 834 MLX5_ASSERT(mr->pmd_mr.len); 835 LIST_INSERT_HEAD(&share_cache->mr_list, mr, mr); 836 DRV_LOG(DEBUG, "MR CREATED (%p) for %p:\n" 837 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 838 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 839 (void *)mr, (void *)addr, data.start, data.end, 840 rte_cpu_to_be_32(mr->pmd_mr.lkey), 841 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 842 /* Insert to the global cache table. */ 843 mlx5_mr_insert_cache(share_cache, mr); 844 /* Fill in output data. */ 845 mlx5_mr_lookup_cache(share_cache, entry, addr); 846 /* Lookup can't fail. */ 847 MLX5_ASSERT(entry->lkey != UINT32_MAX); 848 rte_rwlock_write_unlock(&share_cache->rwlock); 849 rte_mcfg_mem_read_unlock(); 850 return entry->lkey; 851 err_mrlock: 852 rte_rwlock_write_unlock(&share_cache->rwlock); 853 err_memlock: 854 rte_mcfg_mem_read_unlock(); 855 err_nolock: 856 /* 857 * In case of error, as this can be called in a datapath, a warning 858 * message per an error is preferable instead. Must be unlocked before 859 * calling rte_free() because mlx5_mr_mem_event_free_cb() can be called 860 * inside. 861 */ 862 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 863 return UINT32_MAX; 864 } 865 866 /** 867 * Create a new global Memory Region (MR) for a missing virtual address. 868 * This can be called from primary and secondary process. 869 * 870 * @param pd 871 * Pointer to pd handle of a device (net, regex, vdpa,...). 872 * @param mp_id 873 * Multi-process identifier, may be NULL for the primary process. 874 * @param share_cache 875 * Pointer to a global shared MR cache. 876 * @param[out] entry 877 * Pointer to returning MR cache entry, found in the global cache or newly 878 * created. If failed to create one, this will not be updated. 879 * @param addr 880 * Target virtual address to register. 881 * @param mr_ext_memseg_en 882 * Configurable flag about external memory segment enable or not. 883 * 884 * @return 885 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 886 */ 887 static uint32_t 888 mlx5_mr_create(void *pd, struct mlx5_mp_id *mp_id, 889 struct mlx5_mr_share_cache *share_cache, 890 struct mr_cache_entry *entry, uintptr_t addr, 891 unsigned int mr_ext_memseg_en) 892 { 893 uint32_t ret = 0; 894 895 switch (rte_eal_process_type()) { 896 case RTE_PROC_PRIMARY: 897 ret = mlx5_mr_create_primary(pd, share_cache, entry, 898 addr, mr_ext_memseg_en); 899 break; 900 case RTE_PROC_SECONDARY: 901 ret = mlx5_mr_create_secondary(pd, mp_id, share_cache, entry, 902 addr, mr_ext_memseg_en); 903 break; 904 default: 905 break; 906 } 907 return ret; 908 } 909 910 /** 911 * Look up address in the global MR cache table. If not found, create a new MR. 912 * Insert the found/created entry to local bottom-half cache table. 913 * 914 * @param pd 915 * Pointer to pd of a device (net, regex, vdpa,...). 916 * @param mp_id 917 * Multi-process identifier, may be NULL for the primary process. 918 * @param share_cache 919 * Pointer to a global shared MR cache. 920 * @param mr_ctrl 921 * Pointer to per-queue MR control structure. 922 * @param[out] entry 923 * Pointer to returning MR cache entry, found in the global cache or newly 924 * created. If failed to create one, this is not written. 925 * @param addr 926 * Search key. 927 * @param mr_ext_memseg_en 928 * Configurable flag about external memory segment enable or not. 929 * 930 * @return 931 * Searched LKey on success, UINT32_MAX on no match. 932 */ 933 static uint32_t 934 mr_lookup_caches(void *pd, struct mlx5_mp_id *mp_id, 935 struct mlx5_mr_share_cache *share_cache, 936 struct mlx5_mr_ctrl *mr_ctrl, 937 struct mr_cache_entry *entry, uintptr_t addr, 938 unsigned int mr_ext_memseg_en) 939 { 940 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 941 uint32_t lkey; 942 uint16_t idx; 943 944 /* If local cache table is full, try to double it. */ 945 if (unlikely(bt->len == bt->size)) 946 mr_btree_expand(bt, bt->size << 1); 947 /* Look up in the global cache. */ 948 rte_rwlock_read_lock(&share_cache->rwlock); 949 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 950 if (lkey != UINT32_MAX) { 951 /* Found. */ 952 *entry = (*share_cache->cache.table)[idx]; 953 rte_rwlock_read_unlock(&share_cache->rwlock); 954 /* 955 * Update local cache. Even if it fails, return the found entry 956 * to update top-half cache. Next time, this entry will be found 957 * in the global cache. 958 */ 959 mr_btree_insert(bt, entry); 960 return lkey; 961 } 962 rte_rwlock_read_unlock(&share_cache->rwlock); 963 /* First time to see the address? Create a new MR. */ 964 lkey = mlx5_mr_create(pd, mp_id, share_cache, entry, addr, 965 mr_ext_memseg_en); 966 /* 967 * Update the local cache if successfully created a new global MR. Even 968 * if failed to create one, there's no action to take in this datapath 969 * code. As returning LKey is invalid, this will eventually make HW 970 * fail. 971 */ 972 if (lkey != UINT32_MAX) 973 mr_btree_insert(bt, entry); 974 return lkey; 975 } 976 977 /** 978 * Bottom-half of LKey search on datapath. First search in cache_bh[] and if 979 * misses, search in the global MR cache table and update the new entry to 980 * per-queue local caches. 981 * 982 * @param pd 983 * Pointer to pd of a device (net, regex, vdpa,...). 984 * @param mp_id 985 * Multi-process identifier, may be NULL for the primary process. 986 * @param share_cache 987 * Pointer to a global shared MR cache. 988 * @param mr_ctrl 989 * Pointer to per-queue MR control structure. 990 * @param addr 991 * Search key. 992 * @param mr_ext_memseg_en 993 * Configurable flag about external memory segment enable or not. 994 * 995 * @return 996 * Searched LKey on success, UINT32_MAX on no match. 997 */ 998 uint32_t mlx5_mr_addr2mr_bh(void *pd, struct mlx5_mp_id *mp_id, 999 struct mlx5_mr_share_cache *share_cache, 1000 struct mlx5_mr_ctrl *mr_ctrl, 1001 uintptr_t addr, unsigned int mr_ext_memseg_en) 1002 { 1003 uint32_t lkey; 1004 uint16_t bh_idx = 0; 1005 /* Victim in top-half cache to replace with new entry. */ 1006 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 1007 1008 /* Binary-search MR translation table. */ 1009 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 1010 /* Update top-half cache. */ 1011 if (likely(lkey != UINT32_MAX)) { 1012 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 1013 } else { 1014 /* 1015 * If missed in local lookup table, search in the global cache 1016 * and local cache_bh[] will be updated inside if possible. 1017 * Top-half cache entry will also be updated. 1018 */ 1019 lkey = mr_lookup_caches(pd, mp_id, share_cache, mr_ctrl, 1020 repl, addr, mr_ext_memseg_en); 1021 if (unlikely(lkey == UINT32_MAX)) 1022 return UINT32_MAX; 1023 } 1024 /* Update the most recently used entry. */ 1025 mr_ctrl->mru = mr_ctrl->head; 1026 /* Point to the next victim, the oldest. */ 1027 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1028 return lkey; 1029 } 1030 1031 /** 1032 * Release all the created MRs and resources on global MR cache of a device. 1033 * list. 1034 * 1035 * @param share_cache 1036 * Pointer to a global shared MR cache. 1037 */ 1038 void 1039 mlx5_mr_release_cache(struct mlx5_mr_share_cache *share_cache) 1040 { 1041 struct mlx5_mr *mr_next; 1042 1043 rte_rwlock_write_lock(&share_cache->rwlock); 1044 /* Detach from MR list and move to free list. */ 1045 mr_next = LIST_FIRST(&share_cache->mr_list); 1046 while (mr_next != NULL) { 1047 struct mlx5_mr *mr = mr_next; 1048 1049 mr_next = LIST_NEXT(mr, mr); 1050 LIST_REMOVE(mr, mr); 1051 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1052 } 1053 LIST_INIT(&share_cache->mr_list); 1054 /* Free global cache. */ 1055 mlx5_mr_btree_free(&share_cache->cache); 1056 rte_rwlock_write_unlock(&share_cache->rwlock); 1057 /* Free all remaining MRs. */ 1058 mlx5_mr_garbage_collect(share_cache); 1059 } 1060 1061 /** 1062 * Initialize global MR cache of a device. 1063 * 1064 * @param share_cache 1065 * Pointer to a global shared MR cache. 1066 * @param socket 1067 * NUMA socket on which memory must be allocated. 1068 * 1069 * @return 1070 * 0 on success, a negative errno value otherwise and rte_errno is set. 1071 */ 1072 int 1073 mlx5_mr_create_cache(struct mlx5_mr_share_cache *share_cache, int socket) 1074 { 1075 /* Set the reg_mr and dereg_mr callback functions */ 1076 mlx5_os_set_reg_mr_cb(&share_cache->reg_mr_cb, 1077 &share_cache->dereg_mr_cb); 1078 rte_rwlock_init(&share_cache->rwlock); 1079 /* Initialize B-tree and allocate memory for global MR cache table. */ 1080 return mlx5_mr_btree_init(&share_cache->cache, 1081 MLX5_MR_BTREE_CACHE_N * 2, socket); 1082 } 1083 1084 /** 1085 * Flush all of the local cache entries. 1086 * 1087 * @param mr_ctrl 1088 * Pointer to per-queue MR local cache. 1089 */ 1090 void 1091 mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl) 1092 { 1093 /* Reset the most-recently-used index. */ 1094 mr_ctrl->mru = 0; 1095 /* Reset the linear search array. */ 1096 mr_ctrl->head = 0; 1097 memset(mr_ctrl->cache, 0, sizeof(mr_ctrl->cache)); 1098 /* Reset the B-tree table. */ 1099 mr_ctrl->cache_bh.len = 1; 1100 mr_ctrl->cache_bh.overflow = 0; 1101 /* Update the generation number. */ 1102 mr_ctrl->cur_gen = *mr_ctrl->dev_gen_ptr; 1103 DRV_LOG(DEBUG, "mr_ctrl(%p): flushed, cur_gen=%d", 1104 (void *)mr_ctrl, mr_ctrl->cur_gen); 1105 } 1106 1107 /** 1108 * Creates a memory region for external memory, that is memory which is not 1109 * part of the DPDK memory segments. 1110 * 1111 * @param pd 1112 * Pointer to pd of a device (net, regex, vdpa,...). 1113 * @param addr 1114 * Starting virtual address of memory. 1115 * @param len 1116 * Length of memory segment being mapped. 1117 * @param socked_id 1118 * Socket to allocate heap memory for the control structures. 1119 * 1120 * @return 1121 * Pointer to MR structure on success, NULL otherwise. 1122 */ 1123 struct mlx5_mr * 1124 mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, 1125 mlx5_reg_mr_t reg_mr_cb) 1126 { 1127 struct mlx5_mr *mr = NULL; 1128 1129 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1130 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE), 1131 RTE_CACHE_LINE_SIZE, socket_id); 1132 if (mr == NULL) 1133 return NULL; 1134 reg_mr_cb(pd, (void *)addr, len, &mr->pmd_mr); 1135 if (mr->pmd_mr.obj == NULL) { 1136 DRV_LOG(WARNING, 1137 "Fail to create MR for address (%p)", 1138 (void *)addr); 1139 mlx5_free(mr); 1140 return NULL; 1141 } 1142 mr->msl = NULL; /* Mark it is external memory. */ 1143 mr->ms_bmp = NULL; 1144 mr->ms_n = 1; 1145 mr->ms_bmp_n = 1; 1146 DRV_LOG(DEBUG, 1147 "MR CREATED (%p) for external memory %p:\n" 1148 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 1149 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 1150 (void *)mr, (void *)addr, 1151 addr, addr + len, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1152 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 1153 return mr; 1154 } 1155 1156 /** 1157 * Callback for memory free event. Iterate freed memsegs and check whether it 1158 * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a 1159 * result, the MR would be fragmented. If it becomes empty, the MR will be freed 1160 * later by mlx5_mr_garbage_collect(). Even if this callback is called from a 1161 * secondary process, the garbage collector will be called in primary process 1162 * as the secondary process can't call mlx5_mr_create(). 1163 * 1164 * The global cache must be rebuilt if there's any change and this event has to 1165 * be propagated to dataplane threads to flush the local caches. 1166 * 1167 * @param share_cache 1168 * Pointer to a global shared MR cache. 1169 * @param ibdev_name 1170 * Name of ibv device. 1171 * @param addr 1172 * Address of freed memory. 1173 * @param len 1174 * Size of freed memory. 1175 */ 1176 void 1177 mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, 1178 const char *ibdev_name, const void *addr, size_t len) 1179 { 1180 const struct rte_memseg_list *msl; 1181 struct mlx5_mr *mr; 1182 int ms_n; 1183 int i; 1184 int rebuild = 0; 1185 1186 DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu", 1187 ibdev_name, addr, len); 1188 msl = rte_mem_virt2memseg_list(addr); 1189 /* addr and len must be page-aligned. */ 1190 MLX5_ASSERT((uintptr_t)addr == 1191 RTE_ALIGN((uintptr_t)addr, msl->page_sz)); 1192 MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); 1193 ms_n = len / msl->page_sz; 1194 rte_rwlock_write_lock(&share_cache->rwlock); 1195 /* Clear bits of freed memsegs from MR. */ 1196 for (i = 0; i < ms_n; ++i) { 1197 const struct rte_memseg *ms; 1198 struct mr_cache_entry entry; 1199 uintptr_t start; 1200 int ms_idx; 1201 uint32_t pos; 1202 1203 /* Find MR having this memseg. */ 1204 start = (uintptr_t)addr + i * msl->page_sz; 1205 mr = mlx5_mr_lookup_list(share_cache, &entry, start); 1206 if (mr == NULL) 1207 continue; 1208 MLX5_ASSERT(mr->msl); /* Can't be external memory. */ 1209 ms = rte_mem_virt2memseg((void *)start, msl); 1210 MLX5_ASSERT(ms != NULL); 1211 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 1212 ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 1213 pos = ms_idx - mr->ms_base_idx; 1214 MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); 1215 MLX5_ASSERT(pos < mr->ms_bmp_n); 1216 DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p", 1217 ibdev_name, (void *)mr, pos, (void *)start); 1218 rte_bitmap_clear(mr->ms_bmp, pos); 1219 if (--mr->ms_n == 0) { 1220 LIST_REMOVE(mr, mr); 1221 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1222 DRV_LOG(DEBUG, "device %s remove MR(%p) from list", 1223 ibdev_name, (void *)mr); 1224 } 1225 /* 1226 * MR is fragmented or will be freed. the global cache must be 1227 * rebuilt. 1228 */ 1229 rebuild = 1; 1230 } 1231 if (rebuild) { 1232 mlx5_mr_rebuild_cache(share_cache); 1233 /* 1234 * No explicit wmb is needed after updating dev_gen due to 1235 * store-release ordering in unlock that provides the 1236 * implicit barrier at the software visible level. 1237 */ 1238 ++share_cache->dev_gen; 1239 DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", 1240 share_cache->dev_gen); 1241 } 1242 rte_rwlock_write_unlock(&share_cache->rwlock); 1243 } 1244 1245 /** 1246 * Dump all the created MRs and the global cache entries. 1247 * 1248 * @param sh 1249 * Pointer to Ethernet device shared context. 1250 */ 1251 void 1252 mlx5_mr_dump_cache(struct mlx5_mr_share_cache *share_cache __rte_unused) 1253 { 1254 #ifdef RTE_LIBRTE_MLX5_DEBUG 1255 struct mlx5_mr *mr; 1256 int mr_n = 0; 1257 int chunk_n = 0; 1258 1259 rte_rwlock_read_lock(&share_cache->rwlock); 1260 /* Iterate all the existing MRs. */ 1261 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 1262 unsigned int n; 1263 1264 DRV_LOG(DEBUG, "MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u", 1265 mr_n++, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1266 mr->ms_n, mr->ms_bmp_n); 1267 if (mr->ms_n == 0) 1268 continue; 1269 for (n = 0; n < mr->ms_bmp_n; ) { 1270 struct mr_cache_entry ret = { 0, }; 1271 1272 n = mr_find_next_chunk(mr, &ret, n); 1273 if (!ret.end) 1274 break; 1275 DRV_LOG(DEBUG, 1276 " chunk[%u], [0x%" PRIxPTR ", 0x%" PRIxPTR ")", 1277 chunk_n++, ret.start, ret.end); 1278 } 1279 } 1280 DRV_LOG(DEBUG, "Dumping global cache %p", (void *)share_cache); 1281 mlx5_mr_btree_dump(&share_cache->cache); 1282 rte_rwlock_read_unlock(&share_cache->rwlock); 1283 #endif 1284 } 1285 1286 static int 1287 mlx5_range_compare_start(const void *lhs, const void *rhs) 1288 { 1289 const struct mlx5_range *r1 = lhs, *r2 = rhs; 1290 1291 if (r1->start > r2->start) 1292 return 1; 1293 else if (r1->start < r2->start) 1294 return -1; 1295 return 0; 1296 } 1297 1298 static void 1299 mlx5_range_from_mempool_chunk(struct rte_mempool *mp, void *opaque, 1300 struct rte_mempool_memhdr *memhdr, 1301 unsigned int idx) 1302 { 1303 struct mlx5_range *ranges = opaque, *range = &ranges[idx]; 1304 uint64_t page_size = rte_mem_page_size(); 1305 1306 RTE_SET_USED(mp); 1307 range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size); 1308 range->end = RTE_ALIGN_CEIL(range->start + memhdr->len, page_size); 1309 } 1310 1311 /** 1312 * Get VA-contiguous ranges of the mempool memory. 1313 * Each range start and end is aligned to the system page size. 1314 * 1315 * @param[in] mp 1316 * Analyzed mempool. 1317 * @param[out] out 1318 * Receives the ranges, caller must release it with free(). 1319 * @param[out] ount_n 1320 * Receives the number of @p out elements. 1321 * 1322 * @return 1323 * 0 on success, (-1) on failure. 1324 */ 1325 static int 1326 mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out, 1327 unsigned int *out_n) 1328 { 1329 struct mlx5_range *chunks; 1330 unsigned int chunks_n = mp->nb_mem_chunks, contig_n, i; 1331 1332 /* Collect page-aligned memory ranges of the mempool. */ 1333 chunks = calloc(sizeof(chunks[0]), chunks_n); 1334 if (chunks == NULL) 1335 return -1; 1336 rte_mempool_mem_iter(mp, mlx5_range_from_mempool_chunk, chunks); 1337 /* Merge adjacent chunks and place them at the beginning. */ 1338 qsort(chunks, chunks_n, sizeof(chunks[0]), mlx5_range_compare_start); 1339 contig_n = 1; 1340 for (i = 1; i < chunks_n; i++) 1341 if (chunks[i - 1].end != chunks[i].start) { 1342 chunks[contig_n - 1].end = chunks[i - 1].end; 1343 chunks[contig_n] = chunks[i]; 1344 contig_n++; 1345 } 1346 /* Extend the last contiguous chunk to the end of the mempool. */ 1347 chunks[contig_n - 1].end = chunks[i - 1].end; 1348 *out = chunks; 1349 *out_n = contig_n; 1350 return 0; 1351 } 1352 1353 /** 1354 * Analyze mempool memory to select memory ranges to register. 1355 * 1356 * @param[in] mp 1357 * Mempool to analyze. 1358 * @param[out] out 1359 * Receives memory ranges to register, aligned to the system page size. 1360 * The caller must release them with free(). 1361 * @param[out] out_n 1362 * Receives the number of @p out items. 1363 * @param[out] share_hugepage 1364 * Receives True if the entire pool resides within a single hugepage. 1365 * 1366 * @return 1367 * 0 on success, (-1) on failure. 1368 */ 1369 static int 1370 mlx5_mempool_reg_analyze(struct rte_mempool *mp, struct mlx5_range **out, 1371 unsigned int *out_n, bool *share_hugepage) 1372 { 1373 struct mlx5_range *ranges = NULL; 1374 unsigned int i, ranges_n = 0; 1375 struct rte_memseg_list *msl; 1376 1377 if (mlx5_get_mempool_ranges(mp, &ranges, &ranges_n) < 0) { 1378 DRV_LOG(ERR, "Cannot get address ranges for mempool %s", 1379 mp->name); 1380 return -1; 1381 } 1382 /* Check if the hugepage of the pool can be shared. */ 1383 *share_hugepage = false; 1384 msl = rte_mem_virt2memseg_list((void *)ranges[0].start); 1385 if (msl != NULL) { 1386 uint64_t hugepage_sz = 0; 1387 1388 /* Check that all ranges are on pages of the same size. */ 1389 for (i = 0; i < ranges_n; i++) { 1390 if (hugepage_sz != 0 && hugepage_sz != msl->page_sz) 1391 break; 1392 hugepage_sz = msl->page_sz; 1393 } 1394 if (i == ranges_n) { 1395 /* 1396 * If the entire pool is within one hugepage, 1397 * combine all ranges into one of the hugepage size. 1398 */ 1399 uintptr_t reg_start = ranges[0].start; 1400 uintptr_t reg_end = ranges[ranges_n - 1].end; 1401 uintptr_t hugepage_start = 1402 RTE_ALIGN_FLOOR(reg_start, hugepage_sz); 1403 uintptr_t hugepage_end = hugepage_start + hugepage_sz; 1404 if (reg_end < hugepage_end) { 1405 ranges[0].start = hugepage_start; 1406 ranges[0].end = hugepage_end; 1407 ranges_n = 1; 1408 *share_hugepage = true; 1409 } 1410 } 1411 } 1412 *out = ranges; 1413 *out_n = ranges_n; 1414 return 0; 1415 } 1416 1417 /** Create a registration object for the mempool. */ 1418 static struct mlx5_mempool_reg * 1419 mlx5_mempool_reg_create(struct rte_mempool *mp, unsigned int mrs_n) 1420 { 1421 struct mlx5_mempool_reg *mpr = NULL; 1422 1423 mpr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1424 sizeof(*mpr) + mrs_n * sizeof(mpr->mrs[0]), 1425 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1426 if (mpr == NULL) { 1427 DRV_LOG(ERR, "Cannot allocate mempool %s registration object", 1428 mp->name); 1429 return NULL; 1430 } 1431 mpr->mp = mp; 1432 mpr->mrs = (struct mlx5_mempool_mr *)(mpr + 1); 1433 mpr->mrs_n = mrs_n; 1434 return mpr; 1435 } 1436 1437 /** 1438 * Destroy a mempool registration object. 1439 * 1440 * @param standalone 1441 * Whether @p mpr owns its MRs excludively, i.e. they are not shared. 1442 */ 1443 static void 1444 mlx5_mempool_reg_destroy(struct mlx5_mr_share_cache *share_cache, 1445 struct mlx5_mempool_reg *mpr, bool standalone) 1446 { 1447 if (standalone) { 1448 unsigned int i; 1449 1450 for (i = 0; i < mpr->mrs_n; i++) 1451 share_cache->dereg_mr_cb(&mpr->mrs[i].pmd_mr); 1452 } 1453 mlx5_free(mpr); 1454 } 1455 1456 /** Find registration object of a mempool. */ 1457 static struct mlx5_mempool_reg * 1458 mlx5_mempool_reg_lookup(struct mlx5_mr_share_cache *share_cache, 1459 struct rte_mempool *mp) 1460 { 1461 struct mlx5_mempool_reg *mpr; 1462 1463 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1464 if (mpr->mp == mp) 1465 break; 1466 return mpr; 1467 } 1468 1469 /** Increment reference counters of MRs used in the registration. */ 1470 static void 1471 mlx5_mempool_reg_attach(struct mlx5_mempool_reg *mpr) 1472 { 1473 unsigned int i; 1474 1475 for (i = 0; i < mpr->mrs_n; i++) 1476 __atomic_add_fetch(&mpr->mrs[i].refcnt, 1, __ATOMIC_RELAXED); 1477 } 1478 1479 /** 1480 * Decrement reference counters of MRs used in the registration. 1481 * 1482 * @return True if no more references to @p mpr MRs exist, False otherwise. 1483 */ 1484 static bool 1485 mlx5_mempool_reg_detach(struct mlx5_mempool_reg *mpr) 1486 { 1487 unsigned int i; 1488 bool ret = false; 1489 1490 for (i = 0; i < mpr->mrs_n; i++) 1491 ret |= __atomic_sub_fetch(&mpr->mrs[i].refcnt, 1, 1492 __ATOMIC_RELAXED) == 0; 1493 return ret; 1494 } 1495 1496 static int 1497 mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache, 1498 void *pd, struct rte_mempool *mp) 1499 { 1500 struct mlx5_range *ranges = NULL; 1501 struct mlx5_mempool_reg *mpr, *new_mpr; 1502 unsigned int i, ranges_n; 1503 bool share_hugepage; 1504 int ret = -1; 1505 1506 /* Early check to avoid unnecessary creation of MRs. */ 1507 rte_rwlock_read_lock(&share_cache->rwlock); 1508 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1509 rte_rwlock_read_unlock(&share_cache->rwlock); 1510 if (mpr != NULL) { 1511 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1512 mp->name, pd); 1513 rte_errno = EEXIST; 1514 goto exit; 1515 } 1516 if (mlx5_mempool_reg_analyze(mp, &ranges, &ranges_n, 1517 &share_hugepage) < 0) { 1518 DRV_LOG(ERR, "Cannot get mempool %s memory ranges", mp->name); 1519 rte_errno = ENOMEM; 1520 goto exit; 1521 } 1522 new_mpr = mlx5_mempool_reg_create(mp, ranges_n); 1523 if (new_mpr == NULL) { 1524 DRV_LOG(ERR, 1525 "Cannot create a registration object for mempool %s in PD %p", 1526 mp->name, pd); 1527 rte_errno = ENOMEM; 1528 goto exit; 1529 } 1530 /* 1531 * If the entire mempool fits in a single hugepage, the MR for this 1532 * hugepage can be shared across mempools that also fit in it. 1533 */ 1534 if (share_hugepage) { 1535 rte_rwlock_write_lock(&share_cache->rwlock); 1536 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) { 1537 if (mpr->mrs[0].pmd_mr.addr == (void *)ranges[0].start) 1538 break; 1539 } 1540 if (mpr != NULL) { 1541 new_mpr->mrs = mpr->mrs; 1542 mlx5_mempool_reg_attach(new_mpr); 1543 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, 1544 new_mpr, next); 1545 } 1546 rte_rwlock_write_unlock(&share_cache->rwlock); 1547 if (mpr != NULL) { 1548 DRV_LOG(DEBUG, "Shared MR %#x in PD %p for mempool %s with mempool %s", 1549 mpr->mrs[0].pmd_mr.lkey, pd, mp->name, 1550 mpr->mp->name); 1551 ret = 0; 1552 goto exit; 1553 } 1554 } 1555 for (i = 0; i < ranges_n; i++) { 1556 struct mlx5_mempool_mr *mr = &new_mpr->mrs[i]; 1557 const struct mlx5_range *range = &ranges[i]; 1558 size_t len = range->end - range->start; 1559 1560 if (share_cache->reg_mr_cb(pd, (void *)range->start, len, 1561 &mr->pmd_mr) < 0) { 1562 DRV_LOG(ERR, 1563 "Failed to create an MR in PD %p for address range " 1564 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1565 pd, range->start, range->end, len, mp->name); 1566 break; 1567 } 1568 DRV_LOG(DEBUG, 1569 "Created a new MR %#x in PD %p for address range " 1570 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1571 mr->pmd_mr.lkey, pd, range->start, range->end, len, 1572 mp->name); 1573 } 1574 if (i != ranges_n) { 1575 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1576 rte_errno = EINVAL; 1577 goto exit; 1578 } 1579 /* Concurrent registration is not supposed to happen. */ 1580 rte_rwlock_write_lock(&share_cache->rwlock); 1581 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1582 if (mpr == NULL) { 1583 mlx5_mempool_reg_attach(new_mpr); 1584 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, 1585 new_mpr, next); 1586 ret = 0; 1587 } 1588 rte_rwlock_write_unlock(&share_cache->rwlock); 1589 if (mpr != NULL) { 1590 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1591 mp->name, pd); 1592 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1593 rte_errno = EEXIST; 1594 goto exit; 1595 } 1596 exit: 1597 free(ranges); 1598 return ret; 1599 } 1600 1601 static int 1602 mlx5_mr_mempool_register_secondary(struct mlx5_mr_share_cache *share_cache, 1603 void *pd, struct rte_mempool *mp, 1604 struct mlx5_mp_id *mp_id) 1605 { 1606 if (mp_id == NULL) { 1607 rte_errno = EINVAL; 1608 return -1; 1609 } 1610 return mlx5_mp_req_mempool_reg(mp_id, share_cache, pd, mp, true); 1611 } 1612 1613 /** 1614 * Register the memory of a mempool in the protection domain. 1615 * 1616 * @param share_cache 1617 * Shared MR cache of the protection domain. 1618 * @param pd 1619 * Protection domain object. 1620 * @param mp 1621 * Mempool to register. 1622 * @param mp_id 1623 * Multi-process identifier, may be NULL for the primary process. 1624 * 1625 * @return 1626 * 0 on success, (-1) on failure and rte_errno is set. 1627 */ 1628 int 1629 mlx5_mr_mempool_register(struct mlx5_mr_share_cache *share_cache, void *pd, 1630 struct rte_mempool *mp, struct mlx5_mp_id *mp_id) 1631 { 1632 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1633 return 0; 1634 switch (rte_eal_process_type()) { 1635 case RTE_PROC_PRIMARY: 1636 return mlx5_mr_mempool_register_primary(share_cache, pd, mp); 1637 case RTE_PROC_SECONDARY: 1638 return mlx5_mr_mempool_register_secondary(share_cache, pd, mp, 1639 mp_id); 1640 default: 1641 return -1; 1642 } 1643 } 1644 1645 static int 1646 mlx5_mr_mempool_unregister_primary(struct mlx5_mr_share_cache *share_cache, 1647 struct rte_mempool *mp) 1648 { 1649 struct mlx5_mempool_reg *mpr; 1650 bool standalone = false; 1651 1652 rte_rwlock_write_lock(&share_cache->rwlock); 1653 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1654 if (mpr->mp == mp) { 1655 LIST_REMOVE(mpr, next); 1656 standalone = mlx5_mempool_reg_detach(mpr); 1657 if (standalone) 1658 /* 1659 * The unlock operation below provides a memory 1660 * barrier due to its store-release semantics. 1661 */ 1662 ++share_cache->dev_gen; 1663 break; 1664 } 1665 rte_rwlock_write_unlock(&share_cache->rwlock); 1666 if (mpr == NULL) { 1667 rte_errno = ENOENT; 1668 return -1; 1669 } 1670 mlx5_mempool_reg_destroy(share_cache, mpr, standalone); 1671 return 0; 1672 } 1673 1674 static int 1675 mlx5_mr_mempool_unregister_secondary(struct mlx5_mr_share_cache *share_cache, 1676 struct rte_mempool *mp, 1677 struct mlx5_mp_id *mp_id) 1678 { 1679 if (mp_id == NULL) { 1680 rte_errno = EINVAL; 1681 return -1; 1682 } 1683 return mlx5_mp_req_mempool_reg(mp_id, share_cache, NULL, mp, false); 1684 } 1685 1686 /** 1687 * Unregister the memory of a mempool from the protection domain. 1688 * 1689 * @param share_cache 1690 * Shared MR cache of the protection domain. 1691 * @param mp 1692 * Mempool to unregister. 1693 * @param mp_id 1694 * Multi-process identifier, may be NULL for the primary process. 1695 * 1696 * @return 1697 * 0 on success, (-1) on failure and rte_errno is set. 1698 */ 1699 int 1700 mlx5_mr_mempool_unregister(struct mlx5_mr_share_cache *share_cache, 1701 struct rte_mempool *mp, struct mlx5_mp_id *mp_id) 1702 { 1703 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1704 return 0; 1705 switch (rte_eal_process_type()) { 1706 case RTE_PROC_PRIMARY: 1707 return mlx5_mr_mempool_unregister_primary(share_cache, mp); 1708 case RTE_PROC_SECONDARY: 1709 return mlx5_mr_mempool_unregister_secondary(share_cache, mp, 1710 mp_id); 1711 default: 1712 return -1; 1713 } 1714 } 1715 1716 /** 1717 * Lookup a MR key by and address in a registered mempool. 1718 * 1719 * @param mpr 1720 * Mempool registration object. 1721 * @param addr 1722 * Address within the mempool. 1723 * @param entry 1724 * Bottom-half cache entry to fill. 1725 * 1726 * @return 1727 * MR key or UINT32_MAX on failure, which can only happen 1728 * if the address is not from within the mempool. 1729 */ 1730 static uint32_t 1731 mlx5_mempool_reg_addr2mr(struct mlx5_mempool_reg *mpr, uintptr_t addr, 1732 struct mr_cache_entry *entry) 1733 { 1734 uint32_t lkey = UINT32_MAX; 1735 unsigned int i; 1736 1737 for (i = 0; i < mpr->mrs_n; i++) { 1738 const struct mlx5_pmd_mr *mr = &mpr->mrs[i].pmd_mr; 1739 uintptr_t mr_addr = (uintptr_t)mr->addr; 1740 1741 if (mr_addr <= addr) { 1742 lkey = rte_cpu_to_be_32(mr->lkey); 1743 entry->start = mr_addr; 1744 entry->end = mr_addr + mr->len; 1745 entry->lkey = lkey; 1746 break; 1747 } 1748 } 1749 return lkey; 1750 } 1751 1752 /** 1753 * Update bottom-half cache from the list of mempool registrations. 1754 * 1755 * @param share_cache 1756 * Pointer to a global shared MR cache. 1757 * @param mr_ctrl 1758 * Per-queue MR control handle. 1759 * @param entry 1760 * Pointer to an entry in the bottom-half cache to update 1761 * with the MR lkey looked up. 1762 * @param mp 1763 * Mempool containing the address. 1764 * @param addr 1765 * Address to lookup. 1766 * @return 1767 * MR lkey on success, UINT32_MAX on failure. 1768 */ 1769 static uint32_t 1770 mlx5_lookup_mempool_regs(struct mlx5_mr_share_cache *share_cache, 1771 struct mlx5_mr_ctrl *mr_ctrl, 1772 struct mr_cache_entry *entry, 1773 struct rte_mempool *mp, uintptr_t addr) 1774 { 1775 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 1776 struct mlx5_mempool_reg *mpr; 1777 uint32_t lkey = UINT32_MAX; 1778 1779 /* If local cache table is full, try to double it. */ 1780 if (unlikely(bt->len == bt->size)) 1781 mr_btree_expand(bt, bt->size << 1); 1782 /* Look up in mempool registrations. */ 1783 rte_rwlock_read_lock(&share_cache->rwlock); 1784 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1785 if (mpr != NULL) 1786 lkey = mlx5_mempool_reg_addr2mr(mpr, addr, entry); 1787 rte_rwlock_read_unlock(&share_cache->rwlock); 1788 /* 1789 * Update local cache. Even if it fails, return the found entry 1790 * to update top-half cache. Next time, this entry will be found 1791 * in the global cache. 1792 */ 1793 if (lkey != UINT32_MAX) 1794 mr_btree_insert(bt, entry); 1795 return lkey; 1796 } 1797 1798 /** 1799 * Bottom-half lookup for the address from the mempool. 1800 * 1801 * @param share_cache 1802 * Pointer to a global shared MR cache. 1803 * @param mr_ctrl 1804 * Per-queue MR control handle. 1805 * @param mp 1806 * Mempool containing the address. 1807 * @param addr 1808 * Address to lookup. 1809 * @return 1810 * MR lkey on success, UINT32_MAX on failure. 1811 */ 1812 uint32_t 1813 mlx5_mr_mempool2mr_bh(struct mlx5_mr_share_cache *share_cache, 1814 struct mlx5_mr_ctrl *mr_ctrl, 1815 struct rte_mempool *mp, uintptr_t addr) 1816 { 1817 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 1818 uint32_t lkey; 1819 uint16_t bh_idx = 0; 1820 1821 /* Binary-search MR translation table. */ 1822 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 1823 /* Update top-half cache. */ 1824 if (likely(lkey != UINT32_MAX)) { 1825 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 1826 } else { 1827 lkey = mlx5_lookup_mempool_regs(share_cache, mr_ctrl, repl, 1828 mp, addr); 1829 /* Can only fail if the address is not from the mempool. */ 1830 if (unlikely(lkey == UINT32_MAX)) 1831 return UINT32_MAX; 1832 } 1833 /* Update the most recently used entry. */ 1834 mr_ctrl->mru = mr_ctrl->head; 1835 /* Point to the next victim, the oldest. */ 1836 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1837 return lkey; 1838 } 1839 1840 /** 1841 * Query LKey from a packet buffer. 1842 * 1843 * @param cdev 1844 * Pointer to the mlx5 device structure. 1845 * @param mp_id 1846 * Multi-process identifier, may be NULL for the primary process. 1847 * @param mr_ctrl 1848 * Pointer to per-queue MR control structure. 1849 * @param mbuf 1850 * Pointer to mbuf. 1851 * 1852 * @return 1853 * Searched LKey on success, UINT32_MAX on no match. 1854 */ 1855 uint32_t 1856 mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id, 1857 struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf) 1858 { 1859 uint32_t lkey; 1860 uintptr_t addr = (uintptr_t)mbuf->buf_addr; 1861 1862 /* Check generation bit to see if there's any change on existing MRs. */ 1863 if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen)) 1864 mlx5_mr_flush_local_cache(mr_ctrl); 1865 /* Linear search on MR cache array. */ 1866 lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru, 1867 MLX5_MR_CACHE_N, (uintptr_t)mbuf->buf_addr); 1868 if (likely(lkey != UINT32_MAX)) 1869 return lkey; 1870 /* Take slower bottom-half on miss. */ 1871 return mlx5_mr_addr2mr_bh(cdev->pd, mp_id, &cdev->mr_scache, mr_ctrl, 1872 addr, cdev->config.mr_ext_memseg_en); 1873 } 1874