1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2020 Mellanox Technologies, Ltd 4 */ 5 #include <stddef.h> 6 7 #include <rte_eal_memconfig.h> 8 #include <rte_eal_paging.h> 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_malloc.h> 12 #include <rte_rwlock.h> 13 14 #include "mlx5_glue.h" 15 #include "mlx5_common.h" 16 #include "mlx5_common_mp.h" 17 #include "mlx5_common_mr.h" 18 #include "mlx5_common_os.h" 19 #include "mlx5_common_log.h" 20 #include "mlx5_malloc.h" 21 22 struct mr_find_contig_memsegs_data { 23 uintptr_t addr; 24 uintptr_t start; 25 uintptr_t end; 26 const struct rte_memseg_list *msl; 27 }; 28 29 /* Virtual memory range. */ 30 struct mlx5_range { 31 uintptr_t start; 32 uintptr_t end; 33 }; 34 35 /** Memory region for a mempool. */ 36 struct mlx5_mempool_mr { 37 struct mlx5_pmd_mr pmd_mr; 38 uint32_t refcnt; /**< Number of mempools sharing this MR. */ 39 }; 40 41 /* Mempool registration. */ 42 struct mlx5_mempool_reg { 43 LIST_ENTRY(mlx5_mempool_reg) next; 44 /** Registered mempool, used to designate registrations. */ 45 struct rte_mempool *mp; 46 /** Memory regions for the address ranges of the mempool. */ 47 struct mlx5_mempool_mr *mrs; 48 /** Number of memory regions. */ 49 unsigned int mrs_n; 50 }; 51 52 void 53 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 54 { 55 struct mlx5_mprq_buf *buf = opaque; 56 57 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 58 rte_mempool_put(buf->mp, buf); 59 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 60 __ATOMIC_RELAXED) == 0)) { 61 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 62 rte_mempool_put(buf->mp, buf); 63 } 64 } 65 66 /** 67 * Expand B-tree table to a given size. Can't be called with holding 68 * memory_hotplug_lock or share_cache.rwlock due to rte_realloc(). 69 * 70 * @param bt 71 * Pointer to B-tree structure. 72 * @param n 73 * Number of entries for expansion. 74 * 75 * @return 76 * 0 on success, -1 on failure. 77 */ 78 static int 79 mr_btree_expand(struct mlx5_mr_btree *bt, int n) 80 { 81 void *mem; 82 int ret = 0; 83 84 if (n <= bt->size) 85 return ret; 86 /* 87 * Downside of directly using rte_realloc() is that SOCKET_ID_ANY is 88 * used inside if there's no room to expand. Because this is a quite 89 * rare case and a part of very slow path, it is very acceptable. 90 * Initially cache_bh[] will be given practically enough space and once 91 * it is expanded, expansion wouldn't be needed again ever. 92 */ 93 mem = mlx5_realloc(bt->table, MLX5_MEM_RTE | MLX5_MEM_ZERO, 94 n * sizeof(struct mr_cache_entry), 0, SOCKET_ID_ANY); 95 if (mem == NULL) { 96 /* Not an error, B-tree search will be skipped. */ 97 DRV_LOG(WARNING, "failed to expand MR B-tree (%p) table", 98 (void *)bt); 99 ret = -1; 100 } else { 101 DRV_LOG(DEBUG, "expanded MR B-tree table (size=%u)", n); 102 bt->table = mem; 103 bt->size = n; 104 } 105 return ret; 106 } 107 108 /** 109 * Look up LKey from given B-tree lookup table, store the last index and return 110 * searched LKey. 111 * 112 * @param bt 113 * Pointer to B-tree structure. 114 * @param[out] idx 115 * Pointer to index. Even on search failure, returns index where it stops 116 * searching so that index can be used when inserting a new entry. 117 * @param addr 118 * Search key. 119 * 120 * @return 121 * Searched LKey on success, UINT32_MAX on no match. 122 */ 123 static uint32_t 124 mr_btree_lookup(struct mlx5_mr_btree *bt, uint16_t *idx, uintptr_t addr) 125 { 126 struct mr_cache_entry *lkp_tbl; 127 uint16_t n; 128 uint16_t base = 0; 129 130 MLX5_ASSERT(bt != NULL); 131 lkp_tbl = *bt->table; 132 n = bt->len; 133 /* First entry must be NULL for comparison. */ 134 MLX5_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 && 135 lkp_tbl[0].lkey == UINT32_MAX)); 136 /* Binary search. */ 137 do { 138 register uint16_t delta = n >> 1; 139 140 if (addr < lkp_tbl[base + delta].start) { 141 n = delta; 142 } else { 143 base += delta; 144 n -= delta; 145 } 146 } while (n > 1); 147 MLX5_ASSERT(addr >= lkp_tbl[base].start); 148 *idx = base; 149 if (addr < lkp_tbl[base].end) 150 return lkp_tbl[base].lkey; 151 /* Not found. */ 152 return UINT32_MAX; 153 } 154 155 /** 156 * Insert an entry to B-tree lookup table. 157 * 158 * @param bt 159 * Pointer to B-tree structure. 160 * @param entry 161 * Pointer to new entry to insert. 162 * 163 * @return 164 * 0 on success, -1 on failure. 165 */ 166 static int 167 mr_btree_insert(struct mlx5_mr_btree *bt, struct mr_cache_entry *entry) 168 { 169 struct mr_cache_entry *lkp_tbl; 170 uint16_t idx = 0; 171 size_t shift; 172 173 MLX5_ASSERT(bt != NULL); 174 MLX5_ASSERT(bt->len <= bt->size); 175 MLX5_ASSERT(bt->len > 0); 176 lkp_tbl = *bt->table; 177 /* Find out the slot for insertion. */ 178 if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) { 179 DRV_LOG(DEBUG, 180 "abort insertion to B-tree(%p): already exist at" 181 " idx=%u [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 182 (void *)bt, idx, entry->start, entry->end, entry->lkey); 183 /* Already exist, return. */ 184 return 0; 185 } 186 /* If table is full, return error. */ 187 if (unlikely(bt->len == bt->size)) { 188 bt->overflow = 1; 189 return -1; 190 } 191 /* Insert entry. */ 192 ++idx; 193 shift = (bt->len - idx) * sizeof(struct mr_cache_entry); 194 if (shift) 195 memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift); 196 lkp_tbl[idx] = *entry; 197 bt->len++; 198 DRV_LOG(DEBUG, 199 "inserted B-tree(%p)[%u]," 200 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 201 (void *)bt, idx, entry->start, entry->end, entry->lkey); 202 return 0; 203 } 204 205 /** 206 * Initialize B-tree and allocate memory for lookup table. 207 * 208 * @param bt 209 * Pointer to B-tree structure. 210 * @param n 211 * Number of entries to allocate. 212 * @param socket 213 * NUMA socket on which memory must be allocated. 214 * 215 * @return 216 * 0 on success, a negative errno value otherwise and rte_errno is set. 217 */ 218 static int 219 mlx5_mr_btree_init(struct mlx5_mr_btree *bt, int n, int socket) 220 { 221 if (bt == NULL) { 222 rte_errno = EINVAL; 223 return -rte_errno; 224 } 225 MLX5_ASSERT(!bt->table && !bt->size); 226 memset(bt, 0, sizeof(*bt)); 227 bt->table = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 228 sizeof(struct mr_cache_entry) * n, 229 0, socket); 230 if (bt->table == NULL) { 231 rte_errno = ENOMEM; 232 DRV_LOG(DEBUG, 233 "failed to allocate memory for btree cache on socket " 234 "%d", socket); 235 return -rte_errno; 236 } 237 bt->size = n; 238 /* First entry must be NULL for binary search. */ 239 (*bt->table)[bt->len++] = (struct mr_cache_entry) { 240 .lkey = UINT32_MAX, 241 }; 242 DRV_LOG(DEBUG, "initialized B-tree %p with table %p", 243 (void *)bt, (void *)bt->table); 244 return 0; 245 } 246 247 /** 248 * Free B-tree resources. 249 * 250 * @param bt 251 * Pointer to B-tree structure. 252 */ 253 void 254 mlx5_mr_btree_free(struct mlx5_mr_btree *bt) 255 { 256 if (bt == NULL) 257 return; 258 DRV_LOG(DEBUG, "freeing B-tree %p with table %p", 259 (void *)bt, (void *)bt->table); 260 mlx5_free(bt->table); 261 memset(bt, 0, sizeof(*bt)); 262 } 263 264 /** 265 * Dump all the entries in a B-tree 266 * 267 * @param bt 268 * Pointer to B-tree structure. 269 */ 270 void 271 mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused) 272 { 273 #ifdef RTE_LIBRTE_MLX5_DEBUG 274 int idx; 275 struct mr_cache_entry *lkp_tbl; 276 277 if (bt == NULL) 278 return; 279 lkp_tbl = *bt->table; 280 for (idx = 0; idx < bt->len; ++idx) { 281 struct mr_cache_entry *entry = &lkp_tbl[idx]; 282 283 DRV_LOG(DEBUG, "B-tree(%p)[%u]," 284 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 285 (void *)bt, idx, entry->start, entry->end, entry->lkey); 286 } 287 #endif 288 } 289 290 /** 291 * Initialize per-queue MR control descriptor. 292 * 293 * @param mr_ctrl 294 * Pointer to MR control structure. 295 * @param dev_gen_ptr 296 * Pointer to generation number of global cache. 297 * @param socket 298 * NUMA socket on which memory must be allocated. 299 * 300 * @return 301 * 0 on success, a negative errno value otherwise and rte_errno is set. 302 */ 303 int 304 mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr, 305 int socket) 306 { 307 if (mr_ctrl == NULL) { 308 rte_errno = EINVAL; 309 return -rte_errno; 310 } 311 /* Save pointer of global generation number to check memory event. */ 312 mr_ctrl->dev_gen_ptr = dev_gen_ptr; 313 /* Initialize B-tree and allocate memory for bottom-half cache table. */ 314 return mlx5_mr_btree_init(&mr_ctrl->cache_bh, MLX5_MR_BTREE_CACHE_N, 315 socket); 316 } 317 318 /** 319 * Find virtually contiguous memory chunk in a given MR. 320 * 321 * @param dev 322 * Pointer to MR structure. 323 * @param[out] entry 324 * Pointer to returning MR cache entry. If not found, this will not be 325 * updated. 326 * @param start_idx 327 * Start index of the memseg bitmap. 328 * 329 * @return 330 * Next index to go on lookup. 331 */ 332 static int 333 mr_find_next_chunk(struct mlx5_mr *mr, struct mr_cache_entry *entry, 334 int base_idx) 335 { 336 uintptr_t start = 0; 337 uintptr_t end = 0; 338 uint32_t idx = 0; 339 340 /* MR for external memory doesn't have memseg list. */ 341 if (mr->msl == NULL) { 342 MLX5_ASSERT(mr->ms_bmp_n == 1); 343 MLX5_ASSERT(mr->ms_n == 1); 344 MLX5_ASSERT(base_idx == 0); 345 /* 346 * Can't search it from memseg list but get it directly from 347 * pmd_mr as there's only one chunk. 348 */ 349 entry->start = (uintptr_t)mr->pmd_mr.addr; 350 entry->end = (uintptr_t)mr->pmd_mr.addr + mr->pmd_mr.len; 351 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 352 /* Returning 1 ends iteration. */ 353 return 1; 354 } 355 for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) { 356 if (rte_bitmap_get(mr->ms_bmp, idx)) { 357 const struct rte_memseg_list *msl; 358 const struct rte_memseg *ms; 359 360 msl = mr->msl; 361 ms = rte_fbarray_get(&msl->memseg_arr, 362 mr->ms_base_idx + idx); 363 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 364 if (!start) 365 start = ms->addr_64; 366 end = ms->addr_64 + ms->hugepage_sz; 367 } else if (start) { 368 /* Passed the end of a fragment. */ 369 break; 370 } 371 } 372 if (start) { 373 /* Found one chunk. */ 374 entry->start = start; 375 entry->end = end; 376 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 377 } 378 return idx; 379 } 380 381 /** 382 * Insert a MR to the global B-tree cache. It may fail due to low-on-memory. 383 * Then, this entry will have to be searched by mr_lookup_list() in 384 * mlx5_mr_create() on miss. 385 * 386 * @param share_cache 387 * Pointer to a global shared MR cache. 388 * @param mr 389 * Pointer to MR to insert. 390 * 391 * @return 392 * 0 on success, -1 on failure. 393 */ 394 int 395 mlx5_mr_insert_cache(struct mlx5_mr_share_cache *share_cache, 396 struct mlx5_mr *mr) 397 { 398 unsigned int n; 399 400 DRV_LOG(DEBUG, "Inserting MR(%p) to global cache(%p)", 401 (void *)mr, (void *)share_cache); 402 for (n = 0; n < mr->ms_bmp_n; ) { 403 struct mr_cache_entry entry; 404 405 memset(&entry, 0, sizeof(entry)); 406 /* Find a contiguous chunk and advance the index. */ 407 n = mr_find_next_chunk(mr, &entry, n); 408 if (!entry.end) 409 break; 410 if (mr_btree_insert(&share_cache->cache, &entry) < 0) { 411 /* 412 * Overflowed, but the global table cannot be expanded 413 * because of deadlock. 414 */ 415 return -1; 416 } 417 } 418 return 0; 419 } 420 421 /** 422 * Look up address in the original global MR list. 423 * 424 * @param share_cache 425 * Pointer to a global shared MR cache. 426 * @param[out] entry 427 * Pointer to returning MR cache entry. If no match, this will not be updated. 428 * @param addr 429 * Search key. 430 * 431 * @return 432 * Found MR on match, NULL otherwise. 433 */ 434 struct mlx5_mr * 435 mlx5_mr_lookup_list(struct mlx5_mr_share_cache *share_cache, 436 struct mr_cache_entry *entry, uintptr_t addr) 437 { 438 struct mlx5_mr *mr; 439 440 /* Iterate all the existing MRs. */ 441 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 442 unsigned int n; 443 444 if (mr->ms_n == 0) 445 continue; 446 for (n = 0; n < mr->ms_bmp_n; ) { 447 struct mr_cache_entry ret; 448 449 memset(&ret, 0, sizeof(ret)); 450 n = mr_find_next_chunk(mr, &ret, n); 451 if (addr >= ret.start && addr < ret.end) { 452 /* Found. */ 453 *entry = ret; 454 return mr; 455 } 456 } 457 } 458 return NULL; 459 } 460 461 /** 462 * Look up address on global MR cache. 463 * 464 * @param share_cache 465 * Pointer to a global shared MR cache. 466 * @param[out] entry 467 * Pointer to returning MR cache entry. If no match, this will not be updated. 468 * @param addr 469 * Search key. 470 * 471 * @return 472 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 473 */ 474 static uint32_t 475 mlx5_mr_lookup_cache(struct mlx5_mr_share_cache *share_cache, 476 struct mr_cache_entry *entry, uintptr_t addr) 477 { 478 uint16_t idx; 479 uint32_t lkey = UINT32_MAX; 480 struct mlx5_mr *mr; 481 482 /* 483 * If the global cache has overflowed since it failed to expand the 484 * B-tree table, it can't have all the existing MRs. Then, the address 485 * has to be searched by traversing the original MR list instead, which 486 * is very slow path. Otherwise, the global cache is all inclusive. 487 */ 488 if (!unlikely(share_cache->cache.overflow)) { 489 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 490 if (lkey != UINT32_MAX) 491 *entry = (*share_cache->cache.table)[idx]; 492 } else { 493 /* Falling back to the slowest path. */ 494 mr = mlx5_mr_lookup_list(share_cache, entry, addr); 495 if (mr != NULL) 496 lkey = entry->lkey; 497 } 498 MLX5_ASSERT(lkey == UINT32_MAX || (addr >= entry->start && 499 addr < entry->end)); 500 return lkey; 501 } 502 503 /** 504 * Free MR resources. MR lock must not be held to avoid a deadlock. rte_free() 505 * can raise memory free event and the callback function will spin on the lock. 506 * 507 * @param mr 508 * Pointer to MR to free. 509 */ 510 void 511 mlx5_mr_free(struct mlx5_mr *mr, mlx5_dereg_mr_t dereg_mr_cb) 512 { 513 if (mr == NULL) 514 return; 515 DRV_LOG(DEBUG, "freeing MR(%p):", (void *)mr); 516 dereg_mr_cb(&mr->pmd_mr); 517 if (mr->ms_bmp != NULL) 518 rte_bitmap_free(mr->ms_bmp); 519 mlx5_free(mr); 520 } 521 522 void 523 mlx5_mr_rebuild_cache(struct mlx5_mr_share_cache *share_cache) 524 { 525 struct mlx5_mr *mr; 526 527 DRV_LOG(DEBUG, "Rebuild dev cache[] %p", (void *)share_cache); 528 /* Flush cache to rebuild. */ 529 share_cache->cache.len = 1; 530 share_cache->cache.overflow = 0; 531 /* Iterate all the existing MRs. */ 532 LIST_FOREACH(mr, &share_cache->mr_list, mr) 533 if (mlx5_mr_insert_cache(share_cache, mr) < 0) 534 return; 535 } 536 537 /** 538 * Release resources of detached MR having no online entry. 539 * 540 * @param share_cache 541 * Pointer to a global shared MR cache. 542 */ 543 static void 544 mlx5_mr_garbage_collect(struct mlx5_mr_share_cache *share_cache) 545 { 546 struct mlx5_mr *mr_next; 547 struct mlx5_mr_list free_list = LIST_HEAD_INITIALIZER(free_list); 548 549 /* Must be called from the primary process. */ 550 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 551 /* 552 * MR can't be freed with holding the lock because rte_free() could call 553 * memory free callback function. This will be a deadlock situation. 554 */ 555 rte_rwlock_write_lock(&share_cache->rwlock); 556 /* Detach the whole free list and release it after unlocking. */ 557 free_list = share_cache->mr_free_list; 558 LIST_INIT(&share_cache->mr_free_list); 559 rte_rwlock_write_unlock(&share_cache->rwlock); 560 /* Release resources. */ 561 mr_next = LIST_FIRST(&free_list); 562 while (mr_next != NULL) { 563 struct mlx5_mr *mr = mr_next; 564 565 mr_next = LIST_NEXT(mr, mr); 566 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 567 } 568 } 569 570 /* Called during rte_memseg_contig_walk() by mlx5_mr_create(). */ 571 static int 572 mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, 573 const struct rte_memseg *ms, size_t len, void *arg) 574 { 575 struct mr_find_contig_memsegs_data *data = arg; 576 577 if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len) 578 return 0; 579 /* Found, save it and stop walking. */ 580 data->start = ms->addr_64; 581 data->end = ms->addr_64 + len; 582 data->msl = msl; 583 return 1; 584 } 585 586 /** 587 * Create a new global Memory Region (MR) for a missing virtual address. 588 * This API should be called on a secondary process, then a request is sent to 589 * the primary process in order to create a MR for the address. As the global MR 590 * list is on the shared memory, following LKey lookup should succeed unless the 591 * request fails. 592 * 593 * @param cdev 594 * Pointer to the mlx5 common device. 595 * @param share_cache 596 * Pointer to a global shared MR cache. 597 * @param[out] entry 598 * Pointer to returning MR cache entry, found in the global cache or newly 599 * created. If failed to create one, this will not be updated. 600 * @param addr 601 * Target virtual address to register. 602 * 603 * @return 604 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 605 */ 606 static uint32_t 607 mlx5_mr_create_secondary(struct mlx5_common_device *cdev, 608 struct mlx5_mr_share_cache *share_cache, 609 struct mr_cache_entry *entry, uintptr_t addr) 610 { 611 int ret; 612 613 DRV_LOG(DEBUG, "Requesting MR creation for address (%p)", (void *)addr); 614 ret = mlx5_mp_req_mr_create(cdev, addr); 615 if (ret) { 616 DRV_LOG(DEBUG, "Fail to request MR creation for address (%p)", 617 (void *)addr); 618 return UINT32_MAX; 619 } 620 rte_rwlock_read_lock(&share_cache->rwlock); 621 /* Fill in output data. */ 622 mlx5_mr_lookup_cache(share_cache, entry, addr); 623 /* Lookup can't fail. */ 624 MLX5_ASSERT(entry->lkey != UINT32_MAX); 625 rte_rwlock_read_unlock(&share_cache->rwlock); 626 DRV_LOG(DEBUG, "MR CREATED by primary process for %p:\n" 627 " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", 628 (void *)addr, entry->start, entry->end, entry->lkey); 629 return entry->lkey; 630 } 631 632 /** 633 * Create a new global Memory Region (MR) for a missing virtual address. 634 * Register entire virtually contiguous memory chunk around the address. 635 * 636 * @param pd 637 * Pointer to pd of a device (net, regex, vdpa,...). 638 * @param share_cache 639 * Pointer to a global shared MR cache. 640 * @param[out] entry 641 * Pointer to returning MR cache entry, found in the global cache or newly 642 * created. If failed to create one, this will not be updated. 643 * @param addr 644 * Target virtual address to register. 645 * @param mr_ext_memseg_en 646 * Configurable flag about external memory segment enable or not. 647 * 648 * @return 649 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 650 */ 651 static uint32_t 652 mlx5_mr_create_primary(void *pd, 653 struct mlx5_mr_share_cache *share_cache, 654 struct mr_cache_entry *entry, uintptr_t addr, 655 unsigned int mr_ext_memseg_en) 656 { 657 struct mr_find_contig_memsegs_data data = {.addr = addr, }; 658 struct mr_find_contig_memsegs_data data_re; 659 const struct rte_memseg_list *msl; 660 const struct rte_memseg *ms; 661 struct mlx5_mr *mr = NULL; 662 int ms_idx_shift = -1; 663 uint32_t bmp_size; 664 void *bmp_mem; 665 uint32_t ms_n; 666 uint32_t n; 667 size_t len; 668 669 DRV_LOG(DEBUG, "Creating a MR using address (%p)", (void *)addr); 670 /* 671 * Release detached MRs if any. This can't be called with holding either 672 * memory_hotplug_lock or share_cache->rwlock. MRs on the free list have 673 * been detached by the memory free event but it couldn't be released 674 * inside the callback due to deadlock. As a result, releasing resources 675 * is quite opportunistic. 676 */ 677 mlx5_mr_garbage_collect(share_cache); 678 /* 679 * If enabled, find out a contiguous virtual address chunk in use, to 680 * which the given address belongs, in order to register maximum range. 681 * In the best case where mempools are not dynamically recreated and 682 * '--socket-mem' is specified as an EAL option, it is very likely to 683 * have only one MR(LKey) per a socket and per a hugepage-size even 684 * though the system memory is highly fragmented. As the whole memory 685 * chunk will be pinned by kernel, it can't be reused unless entire 686 * chunk is freed from EAL. 687 * 688 * If disabled, just register one memseg (page). Then, memory 689 * consumption will be minimized but it may drop performance if there 690 * are many MRs to lookup on the datapath. 691 */ 692 if (!mr_ext_memseg_en) { 693 data.msl = rte_mem_virt2memseg_list((void *)addr); 694 data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); 695 data.end = data.start + data.msl->page_sz; 696 } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { 697 DRV_LOG(WARNING, 698 "Unable to find virtually contiguous" 699 " chunk for address (%p)." 700 " rte_memseg_contig_walk() failed.", (void *)addr); 701 rte_errno = ENXIO; 702 goto err_nolock; 703 } 704 alloc_resources: 705 /* Addresses must be page-aligned. */ 706 MLX5_ASSERT(data.msl); 707 MLX5_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz)); 708 MLX5_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz)); 709 msl = data.msl; 710 ms = rte_mem_virt2memseg((void *)data.start, msl); 711 len = data.end - data.start; 712 MLX5_ASSERT(ms); 713 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 714 /* Number of memsegs in the range. */ 715 ms_n = len / msl->page_sz; 716 DRV_LOG(DEBUG, "Extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 717 " page_sz=0x%" PRIx64 ", ms_n=%u", 718 (void *)addr, data.start, data.end, msl->page_sz, ms_n); 719 /* Size of memory for bitmap. */ 720 bmp_size = rte_bitmap_get_memory_footprint(ms_n); 721 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 722 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE) + 723 bmp_size, RTE_CACHE_LINE_SIZE, msl->socket_id); 724 if (mr == NULL) { 725 DRV_LOG(DEBUG, "Unable to allocate memory for a new MR of" 726 " address (%p).", (void *)addr); 727 rte_errno = ENOMEM; 728 goto err_nolock; 729 } 730 mr->msl = msl; 731 /* 732 * Save the index of the first memseg and initialize memseg bitmap. To 733 * see if a memseg of ms_idx in the memseg-list is still valid, check: 734 * rte_bitmap_get(mr->bmp, ms_idx - mr->ms_base_idx) 735 */ 736 mr->ms_base_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 737 bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE); 738 mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size); 739 if (mr->ms_bmp == NULL) { 740 DRV_LOG(DEBUG, "Unable to initialize bitmap for a new MR of" 741 " address (%p).", (void *)addr); 742 rte_errno = EINVAL; 743 goto err_nolock; 744 } 745 /* 746 * Should recheck whether the extended contiguous chunk is still valid. 747 * Because memory_hotplug_lock can't be held if there's any memory 748 * related calls in a critical path, resource allocation above can't be 749 * locked. If the memory has been changed at this point, try again with 750 * just single page. If not, go on with the big chunk atomically from 751 * here. 752 */ 753 rte_mcfg_mem_read_lock(); 754 data_re = data; 755 if (len > msl->page_sz && 756 !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) { 757 DRV_LOG(DEBUG, 758 "Unable to find virtually contiguous chunk for address " 759 "(%p). rte_memseg_contig_walk() failed.", (void *)addr); 760 rte_errno = ENXIO; 761 goto err_memlock; 762 } 763 if (data.start != data_re.start || data.end != data_re.end) { 764 /* 765 * The extended contiguous chunk has been changed. Try again 766 * with single memseg instead. 767 */ 768 data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz); 769 data.end = data.start + msl->page_sz; 770 rte_mcfg_mem_read_unlock(); 771 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 772 goto alloc_resources; 773 } 774 MLX5_ASSERT(data.msl == data_re.msl); 775 rte_rwlock_write_lock(&share_cache->rwlock); 776 /* 777 * Check the address is really missing. If other thread already created 778 * one or it is not found due to overflow, abort and return. 779 */ 780 if (mlx5_mr_lookup_cache(share_cache, entry, addr) != UINT32_MAX) { 781 /* 782 * Insert to the global cache table. It may fail due to 783 * low-on-memory. Then, this entry will have to be searched 784 * here again. 785 */ 786 mr_btree_insert(&share_cache->cache, entry); 787 DRV_LOG(DEBUG, "Found MR for %p on final lookup, abort", 788 (void *)addr); 789 rte_rwlock_write_unlock(&share_cache->rwlock); 790 rte_mcfg_mem_read_unlock(); 791 /* 792 * Must be unlocked before calling rte_free() because 793 * mlx5_mr_mem_event_free_cb() can be called inside. 794 */ 795 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 796 return entry->lkey; 797 } 798 /* 799 * Trim start and end addresses for verbs MR. Set bits for registering 800 * memsegs but exclude already registered ones. Bitmap can be 801 * fragmented. 802 */ 803 for (n = 0; n < ms_n; ++n) { 804 uintptr_t start; 805 struct mr_cache_entry ret; 806 807 memset(&ret, 0, sizeof(ret)); 808 start = data_re.start + n * msl->page_sz; 809 /* Exclude memsegs already registered by other MRs. */ 810 if (mlx5_mr_lookup_cache(share_cache, &ret, start) == 811 UINT32_MAX) { 812 /* 813 * Start from the first unregistered memseg in the 814 * extended range. 815 */ 816 if (ms_idx_shift == -1) { 817 mr->ms_base_idx += n; 818 data.start = start; 819 ms_idx_shift = n; 820 } 821 data.end = start + msl->page_sz; 822 rte_bitmap_set(mr->ms_bmp, n - ms_idx_shift); 823 ++mr->ms_n; 824 } 825 } 826 len = data.end - data.start; 827 mr->ms_bmp_n = len / msl->page_sz; 828 MLX5_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n); 829 /* 830 * Finally create an MR for the memory chunk. Verbs: ibv_reg_mr() can 831 * be called with holding the memory lock because it doesn't use 832 * mlx5_alloc_buf_extern() which eventually calls rte_malloc_socket() 833 * through mlx5_alloc_verbs_buf(). 834 */ 835 share_cache->reg_mr_cb(pd, (void *)data.start, len, &mr->pmd_mr); 836 if (mr->pmd_mr.obj == NULL) { 837 DRV_LOG(DEBUG, "Fail to create an MR for address (%p)", 838 (void *)addr); 839 rte_errno = EINVAL; 840 goto err_mrlock; 841 } 842 MLX5_ASSERT((uintptr_t)mr->pmd_mr.addr == data.start); 843 MLX5_ASSERT(mr->pmd_mr.len); 844 LIST_INSERT_HEAD(&share_cache->mr_list, mr, mr); 845 DRV_LOG(DEBUG, "MR CREATED (%p) for %p:\n" 846 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 847 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 848 (void *)mr, (void *)addr, data.start, data.end, 849 rte_cpu_to_be_32(mr->pmd_mr.lkey), 850 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 851 /* Insert to the global cache table. */ 852 mlx5_mr_insert_cache(share_cache, mr); 853 /* Fill in output data. */ 854 mlx5_mr_lookup_cache(share_cache, entry, addr); 855 /* Lookup can't fail. */ 856 MLX5_ASSERT(entry->lkey != UINT32_MAX); 857 rte_rwlock_write_unlock(&share_cache->rwlock); 858 rte_mcfg_mem_read_unlock(); 859 return entry->lkey; 860 err_mrlock: 861 rte_rwlock_write_unlock(&share_cache->rwlock); 862 err_memlock: 863 rte_mcfg_mem_read_unlock(); 864 err_nolock: 865 /* 866 * In case of error, as this can be called in a datapath, a warning 867 * message per an error is preferable instead. Must be unlocked before 868 * calling rte_free() because mlx5_mr_mem_event_free_cb() can be called 869 * inside. 870 */ 871 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 872 return UINT32_MAX; 873 } 874 875 /** 876 * Create a new global Memory Region (MR) for a missing virtual address. 877 * This can be called from primary and secondary process. 878 * 879 * @param cdev 880 * Pointer to the mlx5 common device. 881 * @param share_cache 882 * Pointer to a global shared MR cache. 883 * @param[out] entry 884 * Pointer to returning MR cache entry, found in the global cache or newly 885 * created. If failed to create one, this will not be updated. 886 * @param addr 887 * Target virtual address to register. 888 * 889 * @return 890 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 891 */ 892 uint32_t 893 mlx5_mr_create(struct mlx5_common_device *cdev, 894 struct mlx5_mr_share_cache *share_cache, 895 struct mr_cache_entry *entry, uintptr_t addr) 896 { 897 uint32_t ret = 0; 898 899 switch (rte_eal_process_type()) { 900 case RTE_PROC_PRIMARY: 901 ret = mlx5_mr_create_primary(cdev->pd, share_cache, entry, addr, 902 cdev->config.mr_ext_memseg_en); 903 break; 904 case RTE_PROC_SECONDARY: 905 ret = mlx5_mr_create_secondary(cdev, share_cache, entry, addr); 906 break; 907 default: 908 break; 909 } 910 return ret; 911 } 912 913 /** 914 * Look up address in the global MR cache table. If not found, create a new MR. 915 * Insert the found/created entry to local bottom-half cache table. 916 * 917 * @param mr_ctrl 918 * Pointer to per-queue MR control structure. 919 * @param[out] entry 920 * Pointer to returning MR cache entry, found in the global cache or newly 921 * created. If failed to create one, this is not written. 922 * @param addr 923 * Search key. 924 * 925 * @return 926 * Searched LKey on success, UINT32_MAX on no match. 927 */ 928 static uint32_t 929 mr_lookup_caches(struct mlx5_mr_ctrl *mr_ctrl, 930 struct mr_cache_entry *entry, uintptr_t addr) 931 { 932 struct mlx5_mr_share_cache *share_cache = 933 container_of(mr_ctrl->dev_gen_ptr, struct mlx5_mr_share_cache, 934 dev_gen); 935 struct mlx5_common_device *cdev = 936 container_of(share_cache, struct mlx5_common_device, mr_scache); 937 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 938 uint32_t lkey; 939 uint16_t idx; 940 941 /* If local cache table is full, try to double it. */ 942 if (unlikely(bt->len == bt->size)) 943 mr_btree_expand(bt, bt->size << 1); 944 /* Look up in the global cache. */ 945 rte_rwlock_read_lock(&share_cache->rwlock); 946 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 947 if (lkey != UINT32_MAX) { 948 /* Found. */ 949 *entry = (*share_cache->cache.table)[idx]; 950 rte_rwlock_read_unlock(&share_cache->rwlock); 951 /* 952 * Update local cache. Even if it fails, return the found entry 953 * to update top-half cache. Next time, this entry will be found 954 * in the global cache. 955 */ 956 mr_btree_insert(bt, entry); 957 return lkey; 958 } 959 rte_rwlock_read_unlock(&share_cache->rwlock); 960 /* First time to see the address? Create a new MR. */ 961 lkey = mlx5_mr_create(cdev, share_cache, entry, addr); 962 /* 963 * Update the local cache if successfully created a new global MR. Even 964 * if failed to create one, there's no action to take in this datapath 965 * code. As returning LKey is invalid, this will eventually make HW 966 * fail. 967 */ 968 if (lkey != UINT32_MAX) 969 mr_btree_insert(bt, entry); 970 return lkey; 971 } 972 973 /** 974 * Bottom-half of LKey search on datapath. First search in cache_bh[] and if 975 * misses, search in the global MR cache table and update the new entry to 976 * per-queue local caches. 977 * 978 * @param mr_ctrl 979 * Pointer to per-queue MR control structure. 980 * @param addr 981 * Search key. 982 * 983 * @return 984 * Searched LKey on success, UINT32_MAX on no match. 985 */ 986 static uint32_t 987 mlx5_mr_addr2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, uintptr_t addr) 988 { 989 uint32_t lkey; 990 uint16_t bh_idx = 0; 991 /* Victim in top-half cache to replace with new entry. */ 992 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 993 994 /* Binary-search MR translation table. */ 995 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 996 /* Update top-half cache. */ 997 if (likely(lkey != UINT32_MAX)) { 998 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 999 } else { 1000 /* 1001 * If missed in local lookup table, search in the global cache 1002 * and local cache_bh[] will be updated inside if possible. 1003 * Top-half cache entry will also be updated. 1004 */ 1005 lkey = mr_lookup_caches(mr_ctrl, repl, addr); 1006 if (unlikely(lkey == UINT32_MAX)) 1007 return UINT32_MAX; 1008 } 1009 /* Update the most recently used entry. */ 1010 mr_ctrl->mru = mr_ctrl->head; 1011 /* Point to the next victim, the oldest. */ 1012 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1013 return lkey; 1014 } 1015 1016 /** 1017 * Release all the created MRs and resources on global MR cache of a device 1018 * list. 1019 * 1020 * @param share_cache 1021 * Pointer to a global shared MR cache. 1022 */ 1023 void 1024 mlx5_mr_release_cache(struct mlx5_mr_share_cache *share_cache) 1025 { 1026 struct mlx5_mr *mr_next; 1027 1028 rte_rwlock_write_lock(&share_cache->rwlock); 1029 /* Detach from MR list and move to free list. */ 1030 mr_next = LIST_FIRST(&share_cache->mr_list); 1031 while (mr_next != NULL) { 1032 struct mlx5_mr *mr = mr_next; 1033 1034 mr_next = LIST_NEXT(mr, mr); 1035 LIST_REMOVE(mr, mr); 1036 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1037 } 1038 LIST_INIT(&share_cache->mr_list); 1039 /* Free global cache. */ 1040 mlx5_mr_btree_free(&share_cache->cache); 1041 rte_rwlock_write_unlock(&share_cache->rwlock); 1042 /* Free all remaining MRs. */ 1043 mlx5_mr_garbage_collect(share_cache); 1044 } 1045 1046 /** 1047 * Initialize global MR cache of a device. 1048 * 1049 * @param share_cache 1050 * Pointer to a global shared MR cache. 1051 * @param socket 1052 * NUMA socket on which memory must be allocated. 1053 * 1054 * @return 1055 * 0 on success, a negative errno value otherwise and rte_errno is set. 1056 */ 1057 int 1058 mlx5_mr_create_cache(struct mlx5_mr_share_cache *share_cache, int socket) 1059 { 1060 /* Set the reg_mr and dereg_mr callback functions */ 1061 mlx5_os_set_reg_mr_cb(&share_cache->reg_mr_cb, 1062 &share_cache->dereg_mr_cb); 1063 rte_rwlock_init(&share_cache->rwlock); 1064 rte_rwlock_init(&share_cache->mprwlock); 1065 share_cache->mp_cb_registered = 0; 1066 /* Initialize B-tree and allocate memory for global MR cache table. */ 1067 return mlx5_mr_btree_init(&share_cache->cache, 1068 MLX5_MR_BTREE_CACHE_N * 2, socket); 1069 } 1070 1071 /** 1072 * Flush all of the local cache entries. 1073 * 1074 * @param mr_ctrl 1075 * Pointer to per-queue MR local cache. 1076 */ 1077 void 1078 mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl) 1079 { 1080 /* Reset the most-recently-used index. */ 1081 mr_ctrl->mru = 0; 1082 /* Reset the linear search array. */ 1083 mr_ctrl->head = 0; 1084 memset(mr_ctrl->cache, 0, sizeof(mr_ctrl->cache)); 1085 /* Reset the B-tree table. */ 1086 mr_ctrl->cache_bh.len = 1; 1087 mr_ctrl->cache_bh.overflow = 0; 1088 /* Update the generation number. */ 1089 mr_ctrl->cur_gen = *mr_ctrl->dev_gen_ptr; 1090 DRV_LOG(DEBUG, "mr_ctrl(%p): flushed, cur_gen=%d", 1091 (void *)mr_ctrl, mr_ctrl->cur_gen); 1092 } 1093 1094 /** 1095 * Creates a memory region for external memory, that is memory which is not 1096 * part of the DPDK memory segments. 1097 * 1098 * @param pd 1099 * Pointer to pd of a device (net, regex, vdpa,...). 1100 * @param addr 1101 * Starting virtual address of memory. 1102 * @param len 1103 * Length of memory segment being mapped. 1104 * @param socked_id 1105 * Socket to allocate heap memory for the control structures. 1106 * 1107 * @return 1108 * Pointer to MR structure on success, NULL otherwise. 1109 */ 1110 struct mlx5_mr * 1111 mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, 1112 mlx5_reg_mr_t reg_mr_cb) 1113 { 1114 struct mlx5_mr *mr = NULL; 1115 1116 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1117 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE), 1118 RTE_CACHE_LINE_SIZE, socket_id); 1119 if (mr == NULL) 1120 return NULL; 1121 reg_mr_cb(pd, (void *)addr, len, &mr->pmd_mr); 1122 if (mr->pmd_mr.obj == NULL) { 1123 DRV_LOG(WARNING, 1124 "Fail to create MR for address (%p)", 1125 (void *)addr); 1126 mlx5_free(mr); 1127 return NULL; 1128 } 1129 mr->msl = NULL; /* Mark it is external memory. */ 1130 mr->ms_bmp = NULL; 1131 mr->ms_n = 1; 1132 mr->ms_bmp_n = 1; 1133 DRV_LOG(DEBUG, 1134 "MR CREATED (%p) for external memory %p:\n" 1135 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 1136 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 1137 (void *)mr, (void *)addr, 1138 addr, addr + len, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1139 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 1140 return mr; 1141 } 1142 1143 /** 1144 * Callback for memory free event. Iterate freed memsegs and check whether it 1145 * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a 1146 * result, the MR would be fragmented. If it becomes empty, the MR will be freed 1147 * later by mlx5_mr_garbage_collect(). Even if this callback is called from a 1148 * secondary process, the garbage collector will be called in primary process 1149 * as the secondary process can't call mlx5_mr_create(). 1150 * 1151 * The global cache must be rebuilt if there's any change and this event has to 1152 * be propagated to dataplane threads to flush the local caches. 1153 * 1154 * @param share_cache 1155 * Pointer to a global shared MR cache. 1156 * @param ibdev_name 1157 * Name of ibv device. 1158 * @param addr 1159 * Address of freed memory. 1160 * @param len 1161 * Size of freed memory. 1162 */ 1163 void 1164 mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, 1165 const char *ibdev_name, const void *addr, size_t len) 1166 { 1167 const struct rte_memseg_list *msl; 1168 struct mlx5_mr *mr; 1169 int ms_n; 1170 int i; 1171 int rebuild = 0; 1172 1173 DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu", 1174 ibdev_name, addr, len); 1175 msl = rte_mem_virt2memseg_list(addr); 1176 /* addr and len must be page-aligned. */ 1177 MLX5_ASSERT((uintptr_t)addr == 1178 RTE_ALIGN((uintptr_t)addr, msl->page_sz)); 1179 MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); 1180 ms_n = len / msl->page_sz; 1181 rte_rwlock_write_lock(&share_cache->rwlock); 1182 /* Clear bits of freed memsegs from MR. */ 1183 for (i = 0; i < ms_n; ++i) { 1184 const struct rte_memseg *ms; 1185 struct mr_cache_entry entry; 1186 uintptr_t start; 1187 int ms_idx; 1188 uint32_t pos; 1189 1190 /* Find MR having this memseg. */ 1191 start = (uintptr_t)addr + i * msl->page_sz; 1192 mr = mlx5_mr_lookup_list(share_cache, &entry, start); 1193 if (mr == NULL) 1194 continue; 1195 MLX5_ASSERT(mr->msl); /* Can't be external memory. */ 1196 ms = rte_mem_virt2memseg((void *)start, msl); 1197 MLX5_ASSERT(ms != NULL); 1198 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 1199 ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 1200 pos = ms_idx - mr->ms_base_idx; 1201 MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); 1202 MLX5_ASSERT(pos < mr->ms_bmp_n); 1203 DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p", 1204 ibdev_name, (void *)mr, pos, (void *)start); 1205 rte_bitmap_clear(mr->ms_bmp, pos); 1206 if (--mr->ms_n == 0) { 1207 LIST_REMOVE(mr, mr); 1208 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1209 DRV_LOG(DEBUG, "device %s remove MR(%p) from list", 1210 ibdev_name, (void *)mr); 1211 } 1212 /* 1213 * MR is fragmented or will be freed. the global cache must be 1214 * rebuilt. 1215 */ 1216 rebuild = 1; 1217 } 1218 if (rebuild) { 1219 mlx5_mr_rebuild_cache(share_cache); 1220 /* 1221 * No explicit wmb is needed after updating dev_gen due to 1222 * store-release ordering in unlock that provides the 1223 * implicit barrier at the software visible level. 1224 */ 1225 ++share_cache->dev_gen; 1226 DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", 1227 share_cache->dev_gen); 1228 } 1229 rte_rwlock_write_unlock(&share_cache->rwlock); 1230 } 1231 1232 /** 1233 * Dump all the created MRs and the global cache entries. 1234 * 1235 * @param share_cache 1236 * Pointer to a global shared MR cache. 1237 */ 1238 void 1239 mlx5_mr_dump_cache(struct mlx5_mr_share_cache *share_cache __rte_unused) 1240 { 1241 #ifdef RTE_LIBRTE_MLX5_DEBUG 1242 struct mlx5_mr *mr; 1243 int mr_n = 0; 1244 int chunk_n = 0; 1245 1246 rte_rwlock_read_lock(&share_cache->rwlock); 1247 /* Iterate all the existing MRs. */ 1248 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 1249 unsigned int n; 1250 1251 DRV_LOG(DEBUG, "MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u", 1252 mr_n++, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1253 mr->ms_n, mr->ms_bmp_n); 1254 if (mr->ms_n == 0) 1255 continue; 1256 for (n = 0; n < mr->ms_bmp_n; ) { 1257 struct mr_cache_entry ret = { 0, }; 1258 1259 n = mr_find_next_chunk(mr, &ret, n); 1260 if (!ret.end) 1261 break; 1262 DRV_LOG(DEBUG, 1263 " chunk[%u], [0x%" PRIxPTR ", 0x%" PRIxPTR ")", 1264 chunk_n++, ret.start, ret.end); 1265 } 1266 } 1267 DRV_LOG(DEBUG, "Dumping global cache %p", (void *)share_cache); 1268 mlx5_mr_btree_dump(&share_cache->cache); 1269 rte_rwlock_read_unlock(&share_cache->rwlock); 1270 #endif 1271 } 1272 1273 static int 1274 mlx5_range_compare_start(const void *lhs, const void *rhs) 1275 { 1276 const struct mlx5_range *r1 = lhs, *r2 = rhs; 1277 1278 if (r1->start > r2->start) 1279 return 1; 1280 else if (r1->start < r2->start) 1281 return -1; 1282 return 0; 1283 } 1284 1285 static void 1286 mlx5_range_from_mempool_chunk(struct rte_mempool *mp, void *opaque, 1287 struct rte_mempool_memhdr *memhdr, 1288 unsigned int idx) 1289 { 1290 struct mlx5_range *ranges = opaque, *range = &ranges[idx]; 1291 uint64_t page_size = rte_mem_page_size(); 1292 1293 RTE_SET_USED(mp); 1294 range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size); 1295 range->end = RTE_ALIGN_CEIL(range->start + memhdr->len, page_size); 1296 } 1297 1298 /** 1299 * Collect page-aligned memory ranges of the mempool. 1300 */ 1301 static int 1302 mlx5_mempool_get_chunks(struct rte_mempool *mp, struct mlx5_range **out, 1303 unsigned int *out_n) 1304 { 1305 struct mlx5_range *chunks; 1306 unsigned int n; 1307 1308 n = mp->nb_mem_chunks; 1309 chunks = calloc(sizeof(chunks[0]), n); 1310 if (chunks == NULL) 1311 return -1; 1312 rte_mempool_mem_iter(mp, mlx5_range_from_mempool_chunk, chunks); 1313 *out = chunks; 1314 *out_n = n; 1315 return 0; 1316 } 1317 1318 struct mlx5_mempool_get_extmem_data { 1319 struct mlx5_range *heap; 1320 unsigned int heap_size; 1321 int ret; 1322 }; 1323 1324 static void 1325 mlx5_mempool_get_extmem_cb(struct rte_mempool *mp, void *opaque, 1326 void *obj, unsigned int obj_idx) 1327 { 1328 struct mlx5_mempool_get_extmem_data *data = opaque; 1329 struct rte_mbuf *mbuf = obj; 1330 uintptr_t addr = (uintptr_t)mbuf->buf_addr; 1331 struct mlx5_range *seg, *heap; 1332 struct rte_memseg_list *msl; 1333 size_t page_size; 1334 uintptr_t page_start; 1335 unsigned int pos = 0, len = data->heap_size, delta; 1336 1337 RTE_SET_USED(mp); 1338 RTE_SET_USED(obj_idx); 1339 if (data->ret < 0) 1340 return; 1341 /* Binary search for an already visited page. */ 1342 while (len > 1) { 1343 delta = len / 2; 1344 if (addr < data->heap[pos + delta].start) { 1345 len = delta; 1346 } else { 1347 pos += delta; 1348 len -= delta; 1349 } 1350 } 1351 if (data->heap != NULL) { 1352 seg = &data->heap[pos]; 1353 if (seg->start <= addr && addr < seg->end) 1354 return; 1355 } 1356 /* Determine the page boundaries and remember them. */ 1357 heap = realloc(data->heap, sizeof(heap[0]) * (data->heap_size + 1)); 1358 if (heap == NULL) { 1359 free(data->heap); 1360 data->heap = NULL; 1361 data->ret = -1; 1362 return; 1363 } 1364 data->heap = heap; 1365 data->heap_size++; 1366 seg = &heap[data->heap_size - 1]; 1367 msl = rte_mem_virt2memseg_list((void *)addr); 1368 page_size = msl != NULL ? msl->page_sz : rte_mem_page_size(); 1369 page_start = RTE_PTR_ALIGN_FLOOR(addr, page_size); 1370 seg->start = page_start; 1371 seg->end = page_start + page_size; 1372 /* Maintain the heap order. */ 1373 qsort(data->heap, data->heap_size, sizeof(heap[0]), 1374 mlx5_range_compare_start); 1375 } 1376 1377 /** 1378 * Recover pages of external memory as close as possible 1379 * for a mempool with RTE_PKTMBUF_POOL_PINNED_EXT_BUF. 1380 * Pages are stored in a heap for efficient search, for mbufs are many. 1381 */ 1382 static int 1383 mlx5_mempool_get_extmem(struct rte_mempool *mp, struct mlx5_range **out, 1384 unsigned int *out_n) 1385 { 1386 struct mlx5_mempool_get_extmem_data data; 1387 1388 memset(&data, 0, sizeof(data)); 1389 rte_mempool_obj_iter(mp, mlx5_mempool_get_extmem_cb, &data); 1390 if (data.ret < 0) 1391 return -1; 1392 *out = data.heap; 1393 *out_n = data.heap_size; 1394 return 0; 1395 } 1396 1397 /** 1398 * Get VA-contiguous ranges of the mempool memory. 1399 * Each range start and end is aligned to the system page size. 1400 * 1401 * @param[in] mp 1402 * Analyzed mempool. 1403 * @param[out] out 1404 * Receives the ranges, caller must release it with free(). 1405 * @param[out] ount_n 1406 * Receives the number of @p out elements. 1407 * 1408 * @return 1409 * 0 on success, (-1) on failure. 1410 */ 1411 static int 1412 mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out, 1413 unsigned int *out_n) 1414 { 1415 struct mlx5_range *chunks; 1416 unsigned int chunks_n, contig_n, i; 1417 int ret; 1418 1419 /* Collect the pool underlying memory. */ 1420 ret = (rte_pktmbuf_priv_flags(mp) & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) ? 1421 mlx5_mempool_get_extmem(mp, &chunks, &chunks_n) : 1422 mlx5_mempool_get_chunks(mp, &chunks, &chunks_n); 1423 if (ret < 0) 1424 return ret; 1425 /* Merge adjacent chunks and place them at the beginning. */ 1426 qsort(chunks, chunks_n, sizeof(chunks[0]), mlx5_range_compare_start); 1427 contig_n = 1; 1428 for (i = 1; i < chunks_n; i++) 1429 if (chunks[i - 1].end != chunks[i].start) { 1430 chunks[contig_n - 1].end = chunks[i - 1].end; 1431 chunks[contig_n] = chunks[i]; 1432 contig_n++; 1433 } 1434 /* Extend the last contiguous chunk to the end of the mempool. */ 1435 chunks[contig_n - 1].end = chunks[i - 1].end; 1436 *out = chunks; 1437 *out_n = contig_n; 1438 return 0; 1439 } 1440 1441 /** 1442 * Analyze mempool memory to select memory ranges to register. 1443 * 1444 * @param[in] mp 1445 * Mempool to analyze. 1446 * @param[out] out 1447 * Receives memory ranges to register, aligned to the system page size. 1448 * The caller must release them with free(). 1449 * @param[out] out_n 1450 * Receives the number of @p out items. 1451 * @param[out] share_hugepage 1452 * Receives True if the entire pool resides within a single hugepage. 1453 * 1454 * @return 1455 * 0 on success, (-1) on failure. 1456 */ 1457 static int 1458 mlx5_mempool_reg_analyze(struct rte_mempool *mp, struct mlx5_range **out, 1459 unsigned int *out_n, bool *share_hugepage) 1460 { 1461 struct mlx5_range *ranges = NULL; 1462 unsigned int i, ranges_n = 0; 1463 struct rte_memseg_list *msl; 1464 1465 if (mlx5_get_mempool_ranges(mp, &ranges, &ranges_n) < 0) { 1466 DRV_LOG(ERR, "Cannot get address ranges for mempool %s", 1467 mp->name); 1468 return -1; 1469 } 1470 /* Check if the hugepage of the pool can be shared. */ 1471 *share_hugepage = false; 1472 msl = rte_mem_virt2memseg_list((void *)ranges[0].start); 1473 if (msl != NULL) { 1474 uint64_t hugepage_sz = 0; 1475 1476 /* Check that all ranges are on pages of the same size. */ 1477 for (i = 0; i < ranges_n; i++) { 1478 if (hugepage_sz != 0 && hugepage_sz != msl->page_sz) 1479 break; 1480 hugepage_sz = msl->page_sz; 1481 } 1482 if (i == ranges_n) { 1483 /* 1484 * If the entire pool is within one hugepage, 1485 * combine all ranges into one of the hugepage size. 1486 */ 1487 uintptr_t reg_start = ranges[0].start; 1488 uintptr_t reg_end = ranges[ranges_n - 1].end; 1489 uintptr_t hugepage_start = 1490 RTE_ALIGN_FLOOR(reg_start, hugepage_sz); 1491 uintptr_t hugepage_end = hugepage_start + hugepage_sz; 1492 if (reg_end < hugepage_end) { 1493 ranges[0].start = hugepage_start; 1494 ranges[0].end = hugepage_end; 1495 ranges_n = 1; 1496 *share_hugepage = true; 1497 } 1498 } 1499 } 1500 *out = ranges; 1501 *out_n = ranges_n; 1502 return 0; 1503 } 1504 1505 /** Create a registration object for the mempool. */ 1506 static struct mlx5_mempool_reg * 1507 mlx5_mempool_reg_create(struct rte_mempool *mp, unsigned int mrs_n) 1508 { 1509 struct mlx5_mempool_reg *mpr = NULL; 1510 1511 mpr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1512 sizeof(*mpr) + mrs_n * sizeof(mpr->mrs[0]), 1513 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1514 if (mpr == NULL) { 1515 DRV_LOG(ERR, "Cannot allocate mempool %s registration object", 1516 mp->name); 1517 return NULL; 1518 } 1519 mpr->mp = mp; 1520 mpr->mrs = (struct mlx5_mempool_mr *)(mpr + 1); 1521 mpr->mrs_n = mrs_n; 1522 return mpr; 1523 } 1524 1525 /** 1526 * Destroy a mempool registration object. 1527 * 1528 * @param standalone 1529 * Whether @p mpr owns its MRs excludively, i.e. they are not shared. 1530 */ 1531 static void 1532 mlx5_mempool_reg_destroy(struct mlx5_mr_share_cache *share_cache, 1533 struct mlx5_mempool_reg *mpr, bool standalone) 1534 { 1535 if (standalone) { 1536 unsigned int i; 1537 1538 for (i = 0; i < mpr->mrs_n; i++) 1539 share_cache->dereg_mr_cb(&mpr->mrs[i].pmd_mr); 1540 } 1541 mlx5_free(mpr); 1542 } 1543 1544 /** Find registration object of a mempool. */ 1545 static struct mlx5_mempool_reg * 1546 mlx5_mempool_reg_lookup(struct mlx5_mr_share_cache *share_cache, 1547 struct rte_mempool *mp) 1548 { 1549 struct mlx5_mempool_reg *mpr; 1550 1551 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1552 if (mpr->mp == mp) 1553 break; 1554 return mpr; 1555 } 1556 1557 /** Increment reference counters of MRs used in the registration. */ 1558 static void 1559 mlx5_mempool_reg_attach(struct mlx5_mempool_reg *mpr) 1560 { 1561 unsigned int i; 1562 1563 for (i = 0; i < mpr->mrs_n; i++) 1564 __atomic_add_fetch(&mpr->mrs[i].refcnt, 1, __ATOMIC_RELAXED); 1565 } 1566 1567 /** 1568 * Decrement reference counters of MRs used in the registration. 1569 * 1570 * @return True if no more references to @p mpr MRs exist, False otherwise. 1571 */ 1572 static bool 1573 mlx5_mempool_reg_detach(struct mlx5_mempool_reg *mpr) 1574 { 1575 unsigned int i; 1576 bool ret = false; 1577 1578 for (i = 0; i < mpr->mrs_n; i++) 1579 ret |= __atomic_sub_fetch(&mpr->mrs[i].refcnt, 1, 1580 __ATOMIC_RELAXED) == 0; 1581 return ret; 1582 } 1583 1584 static int 1585 mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache, 1586 void *pd, struct rte_mempool *mp) 1587 { 1588 struct mlx5_range *ranges = NULL; 1589 struct mlx5_mempool_reg *mpr, *new_mpr; 1590 unsigned int i, ranges_n; 1591 bool share_hugepage; 1592 int ret = -1; 1593 1594 /* Early check to avoid unnecessary creation of MRs. */ 1595 rte_rwlock_read_lock(&share_cache->rwlock); 1596 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1597 rte_rwlock_read_unlock(&share_cache->rwlock); 1598 if (mpr != NULL) { 1599 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1600 mp->name, pd); 1601 rte_errno = EEXIST; 1602 goto exit; 1603 } 1604 if (mlx5_mempool_reg_analyze(mp, &ranges, &ranges_n, 1605 &share_hugepage) < 0) { 1606 DRV_LOG(ERR, "Cannot get mempool %s memory ranges", mp->name); 1607 rte_errno = ENOMEM; 1608 goto exit; 1609 } 1610 new_mpr = mlx5_mempool_reg_create(mp, ranges_n); 1611 if (new_mpr == NULL) { 1612 DRV_LOG(ERR, 1613 "Cannot create a registration object for mempool %s in PD %p", 1614 mp->name, pd); 1615 rte_errno = ENOMEM; 1616 goto exit; 1617 } 1618 /* 1619 * If the entire mempool fits in a single hugepage, the MR for this 1620 * hugepage can be shared across mempools that also fit in it. 1621 */ 1622 if (share_hugepage) { 1623 rte_rwlock_write_lock(&share_cache->rwlock); 1624 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) { 1625 if (mpr->mrs[0].pmd_mr.addr == (void *)ranges[0].start) 1626 break; 1627 } 1628 if (mpr != NULL) { 1629 new_mpr->mrs = mpr->mrs; 1630 mlx5_mempool_reg_attach(new_mpr); 1631 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, 1632 new_mpr, next); 1633 } 1634 rte_rwlock_write_unlock(&share_cache->rwlock); 1635 if (mpr != NULL) { 1636 DRV_LOG(DEBUG, "Shared MR %#x in PD %p for mempool %s with mempool %s", 1637 mpr->mrs[0].pmd_mr.lkey, pd, mp->name, 1638 mpr->mp->name); 1639 ret = 0; 1640 goto exit; 1641 } 1642 } 1643 for (i = 0; i < ranges_n; i++) { 1644 struct mlx5_mempool_mr *mr = &new_mpr->mrs[i]; 1645 const struct mlx5_range *range = &ranges[i]; 1646 size_t len = range->end - range->start; 1647 1648 if (share_cache->reg_mr_cb(pd, (void *)range->start, len, 1649 &mr->pmd_mr) < 0) { 1650 DRV_LOG(ERR, 1651 "Failed to create an MR in PD %p for address range " 1652 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1653 pd, range->start, range->end, len, mp->name); 1654 break; 1655 } 1656 DRV_LOG(DEBUG, 1657 "Created a new MR %#x in PD %p for address range " 1658 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1659 mr->pmd_mr.lkey, pd, range->start, range->end, len, 1660 mp->name); 1661 } 1662 if (i != ranges_n) { 1663 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1664 rte_errno = EINVAL; 1665 goto exit; 1666 } 1667 /* Concurrent registration is not supposed to happen. */ 1668 rte_rwlock_write_lock(&share_cache->rwlock); 1669 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1670 if (mpr == NULL) { 1671 mlx5_mempool_reg_attach(new_mpr); 1672 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, new_mpr, next); 1673 ret = 0; 1674 } 1675 rte_rwlock_write_unlock(&share_cache->rwlock); 1676 if (mpr != NULL) { 1677 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1678 mp->name, pd); 1679 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1680 rte_errno = EEXIST; 1681 goto exit; 1682 } 1683 exit: 1684 free(ranges); 1685 return ret; 1686 } 1687 1688 static int 1689 mlx5_mr_mempool_register_secondary(struct mlx5_common_device *cdev, 1690 struct rte_mempool *mp) 1691 { 1692 return mlx5_mp_req_mempool_reg(cdev, mp, true); 1693 } 1694 1695 /** 1696 * Register the memory of a mempool in the protection domain. 1697 * 1698 * @param cdev 1699 * Pointer to the mlx5 common device. 1700 * @param mp 1701 * Mempool to register. 1702 * 1703 * @return 1704 * 0 on success, (-1) on failure and rte_errno is set. 1705 */ 1706 int 1707 mlx5_mr_mempool_register(struct mlx5_common_device *cdev, 1708 struct rte_mempool *mp) 1709 { 1710 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1711 return 0; 1712 switch (rte_eal_process_type()) { 1713 case RTE_PROC_PRIMARY: 1714 return mlx5_mr_mempool_register_primary(&cdev->mr_scache, 1715 cdev->pd, mp); 1716 case RTE_PROC_SECONDARY: 1717 return mlx5_mr_mempool_register_secondary(cdev, mp); 1718 default: 1719 return -1; 1720 } 1721 } 1722 1723 static int 1724 mlx5_mr_mempool_unregister_primary(struct mlx5_mr_share_cache *share_cache, 1725 struct rte_mempool *mp) 1726 { 1727 struct mlx5_mempool_reg *mpr; 1728 bool standalone = false; 1729 1730 rte_rwlock_write_lock(&share_cache->rwlock); 1731 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1732 if (mpr->mp == mp) { 1733 LIST_REMOVE(mpr, next); 1734 standalone = mlx5_mempool_reg_detach(mpr); 1735 if (standalone) 1736 /* 1737 * The unlock operation below provides a memory 1738 * barrier due to its store-release semantics. 1739 */ 1740 ++share_cache->dev_gen; 1741 break; 1742 } 1743 rte_rwlock_write_unlock(&share_cache->rwlock); 1744 if (mpr == NULL) { 1745 rte_errno = ENOENT; 1746 return -1; 1747 } 1748 mlx5_mempool_reg_destroy(share_cache, mpr, standalone); 1749 return 0; 1750 } 1751 1752 static int 1753 mlx5_mr_mempool_unregister_secondary(struct mlx5_common_device *cdev, 1754 struct rte_mempool *mp) 1755 { 1756 return mlx5_mp_req_mempool_reg(cdev, mp, false); 1757 } 1758 1759 /** 1760 * Unregister the memory of a mempool from the protection domain. 1761 * 1762 * @param cdev 1763 * Pointer to the mlx5 common device. 1764 * @param mp 1765 * Mempool to unregister. 1766 * 1767 * @return 1768 * 0 on success, (-1) on failure and rte_errno is set. 1769 */ 1770 int 1771 mlx5_mr_mempool_unregister(struct mlx5_common_device *cdev, 1772 struct rte_mempool *mp) 1773 { 1774 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1775 return 0; 1776 switch (rte_eal_process_type()) { 1777 case RTE_PROC_PRIMARY: 1778 return mlx5_mr_mempool_unregister_primary(&cdev->mr_scache, mp); 1779 case RTE_PROC_SECONDARY: 1780 return mlx5_mr_mempool_unregister_secondary(cdev, mp); 1781 default: 1782 return -1; 1783 } 1784 } 1785 1786 /** 1787 * Lookup a MR key by and address in a registered mempool. 1788 * 1789 * @param mpr 1790 * Mempool registration object. 1791 * @param addr 1792 * Address within the mempool. 1793 * @param entry 1794 * Bottom-half cache entry to fill. 1795 * 1796 * @return 1797 * MR key or UINT32_MAX on failure, which can only happen 1798 * if the address is not from within the mempool. 1799 */ 1800 static uint32_t 1801 mlx5_mempool_reg_addr2mr(struct mlx5_mempool_reg *mpr, uintptr_t addr, 1802 struct mr_cache_entry *entry) 1803 { 1804 uint32_t lkey = UINT32_MAX; 1805 unsigned int i; 1806 1807 for (i = 0; i < mpr->mrs_n; i++) { 1808 const struct mlx5_pmd_mr *mr = &mpr->mrs[i].pmd_mr; 1809 uintptr_t mr_addr = (uintptr_t)mr->addr; 1810 1811 if (mr_addr <= addr) { 1812 lkey = rte_cpu_to_be_32(mr->lkey); 1813 entry->start = mr_addr; 1814 entry->end = mr_addr + mr->len; 1815 entry->lkey = lkey; 1816 break; 1817 } 1818 } 1819 return lkey; 1820 } 1821 1822 /** 1823 * Update bottom-half cache from the list of mempool registrations. 1824 * 1825 * @param mr_ctrl 1826 * Per-queue MR control handle. 1827 * @param entry 1828 * Pointer to an entry in the bottom-half cache to update 1829 * with the MR lkey looked up. 1830 * @param mp 1831 * Mempool containing the address. 1832 * @param addr 1833 * Address to lookup. 1834 * @return 1835 * MR lkey on success, UINT32_MAX on failure. 1836 */ 1837 static uint32_t 1838 mlx5_lookup_mempool_regs(struct mlx5_mr_ctrl *mr_ctrl, 1839 struct mr_cache_entry *entry, 1840 struct rte_mempool *mp, uintptr_t addr) 1841 { 1842 struct mlx5_mr_share_cache *share_cache = 1843 container_of(mr_ctrl->dev_gen_ptr, struct mlx5_mr_share_cache, 1844 dev_gen); 1845 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 1846 struct mlx5_mempool_reg *mpr; 1847 uint32_t lkey = UINT32_MAX; 1848 1849 /* If local cache table is full, try to double it. */ 1850 if (unlikely(bt->len == bt->size)) 1851 mr_btree_expand(bt, bt->size << 1); 1852 /* Look up in mempool registrations. */ 1853 rte_rwlock_read_lock(&share_cache->rwlock); 1854 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1855 if (mpr != NULL) 1856 lkey = mlx5_mempool_reg_addr2mr(mpr, addr, entry); 1857 rte_rwlock_read_unlock(&share_cache->rwlock); 1858 /* 1859 * Update local cache. Even if it fails, return the found entry 1860 * to update top-half cache. Next time, this entry will be found 1861 * in the global cache. 1862 */ 1863 if (lkey != UINT32_MAX) 1864 mr_btree_insert(bt, entry); 1865 return lkey; 1866 } 1867 1868 /** 1869 * Bottom-half lookup for the address from the mempool. 1870 * 1871 * @param mr_ctrl 1872 * Per-queue MR control handle. 1873 * @param mp 1874 * Mempool containing the address. 1875 * @param addr 1876 * Address to lookup. 1877 * @return 1878 * MR lkey on success, UINT32_MAX on failure. 1879 */ 1880 uint32_t 1881 mlx5_mr_mempool2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, 1882 struct rte_mempool *mp, uintptr_t addr) 1883 { 1884 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 1885 uint32_t lkey; 1886 uint16_t bh_idx = 0; 1887 1888 /* Binary-search MR translation table. */ 1889 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 1890 /* Update top-half cache. */ 1891 if (likely(lkey != UINT32_MAX)) { 1892 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 1893 } else { 1894 lkey = mlx5_lookup_mempool_regs(mr_ctrl, repl, mp, addr); 1895 /* Can only fail if the address is not from the mempool. */ 1896 if (unlikely(lkey == UINT32_MAX)) 1897 return UINT32_MAX; 1898 } 1899 /* Update the most recently used entry. */ 1900 mr_ctrl->mru = mr_ctrl->head; 1901 /* Point to the next victim, the oldest. */ 1902 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1903 return lkey; 1904 } 1905 1906 uint32_t 1907 mlx5_mr_mb2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb) 1908 { 1909 uint32_t lkey; 1910 uintptr_t addr = (uintptr_t)mb->buf_addr; 1911 struct mlx5_mr_share_cache *share_cache = 1912 container_of(mr_ctrl->dev_gen_ptr, struct mlx5_mr_share_cache, 1913 dev_gen); 1914 struct mlx5_common_device *cdev = 1915 container_of(share_cache, struct mlx5_common_device, mr_scache); 1916 1917 if (cdev->config.mr_mempool_reg_en) { 1918 struct rte_mempool *mp = NULL; 1919 struct mlx5_mprq_buf *buf; 1920 1921 if (!RTE_MBUF_HAS_EXTBUF(mb)) { 1922 mp = mlx5_mb2mp(mb); 1923 } else if (mb->shinfo->free_cb == mlx5_mprq_buf_free_cb) { 1924 /* Recover MPRQ mempool. */ 1925 buf = mb->shinfo->fcb_opaque; 1926 mp = buf->mp; 1927 } 1928 if (mp != NULL) { 1929 lkey = mlx5_mr_mempool2mr_bh(mr_ctrl, mp, addr); 1930 /* 1931 * Lookup can only fail on invalid input, e.g. "addr" 1932 * is not from "mp" or "mp" has MEMPOOL_F_NON_IO set. 1933 */ 1934 if (lkey != UINT32_MAX) 1935 return lkey; 1936 } 1937 /* Fallback for generic mechanism in corner cases. */ 1938 } 1939 return mlx5_mr_addr2mr_bh(mr_ctrl, addr); 1940 } 1941