1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2020 Mellanox Technologies, Ltd 4 */ 5 #include <stddef.h> 6 7 #include <rte_eal_memconfig.h> 8 #include <rte_eal_paging.h> 9 #include <rte_errno.h> 10 #include <rte_mempool.h> 11 #include <rte_malloc.h> 12 #include <rte_rwlock.h> 13 14 #include "mlx5_glue.h" 15 #include "mlx5_common.h" 16 #include "mlx5_common_mp.h" 17 #include "mlx5_common_mr.h" 18 #include "mlx5_common_os.h" 19 #include "mlx5_common_log.h" 20 #include "mlx5_malloc.h" 21 22 struct mr_find_contig_memsegs_data { 23 uintptr_t addr; 24 uintptr_t start; 25 uintptr_t end; 26 const struct rte_memseg_list *msl; 27 }; 28 29 /* Virtual memory range. */ 30 struct mlx5_range { 31 uintptr_t start; 32 uintptr_t end; 33 }; 34 35 /** Memory region for a mempool. */ 36 struct mlx5_mempool_mr { 37 struct mlx5_pmd_mr pmd_mr; 38 uint32_t refcnt; /**< Number of mempools sharing this MR. */ 39 }; 40 41 /* Mempool registration. */ 42 struct mlx5_mempool_reg { 43 LIST_ENTRY(mlx5_mempool_reg) next; 44 /** Registered mempool, used to designate registrations. */ 45 struct rte_mempool *mp; 46 /** Memory regions for the address ranges of the mempool. */ 47 struct mlx5_mempool_mr *mrs; 48 /** Number of memory regions. */ 49 unsigned int mrs_n; 50 }; 51 52 void 53 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 54 { 55 struct mlx5_mprq_buf *buf = opaque; 56 57 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 58 rte_mempool_put(buf->mp, buf); 59 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 60 __ATOMIC_RELAXED) == 0)) { 61 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 62 rte_mempool_put(buf->mp, buf); 63 } 64 } 65 66 /** 67 * Expand B-tree table to a given size. Can't be called with holding 68 * memory_hotplug_lock or share_cache.rwlock due to rte_realloc(). 69 * 70 * @param bt 71 * Pointer to B-tree structure. 72 * @param n 73 * Number of entries for expansion. 74 * 75 * @return 76 * 0 on success, -1 on failure. 77 */ 78 static int 79 mr_btree_expand(struct mlx5_mr_btree *bt, int n) 80 { 81 void *mem; 82 int ret = 0; 83 84 if (n <= bt->size) 85 return ret; 86 /* 87 * Downside of directly using rte_realloc() is that SOCKET_ID_ANY is 88 * used inside if there's no room to expand. Because this is a quite 89 * rare case and a part of very slow path, it is very acceptable. 90 * Initially cache_bh[] will be given practically enough space and once 91 * it is expanded, expansion wouldn't be needed again ever. 92 */ 93 mem = mlx5_realloc(bt->table, MLX5_MEM_RTE | MLX5_MEM_ZERO, 94 n * sizeof(struct mr_cache_entry), 0, SOCKET_ID_ANY); 95 if (mem == NULL) { 96 /* Not an error, B-tree search will be skipped. */ 97 DRV_LOG(WARNING, "failed to expand MR B-tree (%p) table", 98 (void *)bt); 99 ret = -1; 100 } else { 101 DRV_LOG(DEBUG, "expanded MR B-tree table (size=%u)", n); 102 bt->table = mem; 103 bt->size = n; 104 } 105 return ret; 106 } 107 108 /** 109 * Look up LKey from given B-tree lookup table, store the last index and return 110 * searched LKey. 111 * 112 * @param bt 113 * Pointer to B-tree structure. 114 * @param[out] idx 115 * Pointer to index. Even on search failure, returns index where it stops 116 * searching so that index can be used when inserting a new entry. 117 * @param addr 118 * Search key. 119 * 120 * @return 121 * Searched LKey on success, UINT32_MAX on no match. 122 */ 123 static uint32_t 124 mr_btree_lookup(struct mlx5_mr_btree *bt, uint16_t *idx, uintptr_t addr) 125 { 126 struct mr_cache_entry *lkp_tbl; 127 uint16_t n; 128 uint16_t base = 0; 129 130 MLX5_ASSERT(bt != NULL); 131 lkp_tbl = *bt->table; 132 n = bt->len; 133 /* First entry must be NULL for comparison. */ 134 MLX5_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 && 135 lkp_tbl[0].lkey == UINT32_MAX)); 136 /* Binary search. */ 137 do { 138 register uint16_t delta = n >> 1; 139 140 if (addr < lkp_tbl[base + delta].start) { 141 n = delta; 142 } else { 143 base += delta; 144 n -= delta; 145 } 146 } while (n > 1); 147 MLX5_ASSERT(addr >= lkp_tbl[base].start); 148 *idx = base; 149 if (addr < lkp_tbl[base].end) 150 return lkp_tbl[base].lkey; 151 /* Not found. */ 152 return UINT32_MAX; 153 } 154 155 /** 156 * Insert an entry to B-tree lookup table. 157 * 158 * @param bt 159 * Pointer to B-tree structure. 160 * @param entry 161 * Pointer to new entry to insert. 162 * 163 * @return 164 * 0 on success, -1 on failure. 165 */ 166 static int 167 mr_btree_insert(struct mlx5_mr_btree *bt, struct mr_cache_entry *entry) 168 { 169 struct mr_cache_entry *lkp_tbl; 170 uint16_t idx = 0; 171 size_t shift; 172 173 MLX5_ASSERT(bt != NULL); 174 MLX5_ASSERT(bt->len <= bt->size); 175 MLX5_ASSERT(bt->len > 0); 176 lkp_tbl = *bt->table; 177 /* Find out the slot for insertion. */ 178 if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) { 179 DRV_LOG(DEBUG, 180 "abort insertion to B-tree(%p): already exist at" 181 " idx=%u [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 182 (void *)bt, idx, entry->start, entry->end, entry->lkey); 183 /* Already exist, return. */ 184 return 0; 185 } 186 /* If table is full, return error. */ 187 if (unlikely(bt->len == bt->size)) { 188 bt->overflow = 1; 189 return -1; 190 } 191 /* Insert entry. */ 192 ++idx; 193 shift = (bt->len - idx) * sizeof(struct mr_cache_entry); 194 if (shift) 195 memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift); 196 lkp_tbl[idx] = *entry; 197 bt->len++; 198 DRV_LOG(DEBUG, 199 "inserted B-tree(%p)[%u]," 200 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 201 (void *)bt, idx, entry->start, entry->end, entry->lkey); 202 return 0; 203 } 204 205 /** 206 * Initialize B-tree and allocate memory for lookup table. 207 * 208 * @param bt 209 * Pointer to B-tree structure. 210 * @param n 211 * Number of entries to allocate. 212 * @param socket 213 * NUMA socket on which memory must be allocated. 214 * 215 * @return 216 * 0 on success, a negative errno value otherwise and rte_errno is set. 217 */ 218 static int 219 mlx5_mr_btree_init(struct mlx5_mr_btree *bt, int n, int socket) 220 { 221 if (bt == NULL) { 222 rte_errno = EINVAL; 223 return -rte_errno; 224 } 225 MLX5_ASSERT(!bt->table && !bt->size); 226 memset(bt, 0, sizeof(*bt)); 227 bt->table = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 228 sizeof(struct mr_cache_entry) * n, 229 0, socket); 230 if (bt->table == NULL) { 231 rte_errno = ENOMEM; 232 DRV_LOG(DEBUG, 233 "failed to allocate memory for btree cache on socket " 234 "%d", socket); 235 return -rte_errno; 236 } 237 bt->size = n; 238 /* First entry must be NULL for binary search. */ 239 (*bt->table)[bt->len++] = (struct mr_cache_entry) { 240 .lkey = UINT32_MAX, 241 }; 242 DRV_LOG(DEBUG, "initialized B-tree %p with table %p", 243 (void *)bt, (void *)bt->table); 244 return 0; 245 } 246 247 /** 248 * Free B-tree resources. 249 * 250 * @param bt 251 * Pointer to B-tree structure. 252 */ 253 void 254 mlx5_mr_btree_free(struct mlx5_mr_btree *bt) 255 { 256 if (bt == NULL) 257 return; 258 DRV_LOG(DEBUG, "freeing B-tree %p with table %p", 259 (void *)bt, (void *)bt->table); 260 mlx5_free(bt->table); 261 memset(bt, 0, sizeof(*bt)); 262 } 263 264 /** 265 * Dump all the entries in a B-tree 266 * 267 * @param bt 268 * Pointer to B-tree structure. 269 */ 270 void 271 mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused) 272 { 273 #ifdef RTE_LIBRTE_MLX5_DEBUG 274 int idx; 275 struct mr_cache_entry *lkp_tbl; 276 277 if (bt == NULL) 278 return; 279 lkp_tbl = *bt->table; 280 for (idx = 0; idx < bt->len; ++idx) { 281 struct mr_cache_entry *entry = &lkp_tbl[idx]; 282 283 DRV_LOG(DEBUG, "B-tree(%p)[%u]," 284 " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 285 (void *)bt, idx, entry->start, entry->end, entry->lkey); 286 } 287 #endif 288 } 289 290 /** 291 * Initialize per-queue MR control descriptor. 292 * 293 * @param mr_ctrl 294 * Pointer to MR control structure. 295 * @param dev_gen_ptr 296 * Pointer to generation number of global cache. 297 * @param socket 298 * NUMA socket on which memory must be allocated. 299 * 300 * @return 301 * 0 on success, a negative errno value otherwise and rte_errno is set. 302 */ 303 int 304 mlx5_mr_ctrl_init(struct mlx5_mr_ctrl *mr_ctrl, uint32_t *dev_gen_ptr, 305 int socket) 306 { 307 if (mr_ctrl == NULL) { 308 rte_errno = EINVAL; 309 return -rte_errno; 310 } 311 /* Save pointer of global generation number to check memory event. */ 312 mr_ctrl->dev_gen_ptr = dev_gen_ptr; 313 /* Initialize B-tree and allocate memory for bottom-half cache table. */ 314 return mlx5_mr_btree_init(&mr_ctrl->cache_bh, MLX5_MR_BTREE_CACHE_N, 315 socket); 316 } 317 318 /** 319 * Find virtually contiguous memory chunk in a given MR. 320 * 321 * @param dev 322 * Pointer to MR structure. 323 * @param[out] entry 324 * Pointer to returning MR cache entry. If not found, this will not be 325 * updated. 326 * @param start_idx 327 * Start index of the memseg bitmap. 328 * 329 * @return 330 * Next index to go on lookup. 331 */ 332 static int 333 mr_find_next_chunk(struct mlx5_mr *mr, struct mr_cache_entry *entry, 334 int base_idx) 335 { 336 uintptr_t start = 0; 337 uintptr_t end = 0; 338 uint32_t idx = 0; 339 340 /* MR for external memory doesn't have memseg list. */ 341 if (mr->msl == NULL) { 342 MLX5_ASSERT(mr->ms_bmp_n == 1); 343 MLX5_ASSERT(mr->ms_n == 1); 344 MLX5_ASSERT(base_idx == 0); 345 /* 346 * Can't search it from memseg list but get it directly from 347 * pmd_mr as there's only one chunk. 348 */ 349 entry->start = (uintptr_t)mr->pmd_mr.addr; 350 entry->end = (uintptr_t)mr->pmd_mr.addr + mr->pmd_mr.len; 351 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 352 /* Returning 1 ends iteration. */ 353 return 1; 354 } 355 for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) { 356 if (rte_bitmap_get(mr->ms_bmp, idx)) { 357 const struct rte_memseg_list *msl; 358 const struct rte_memseg *ms; 359 360 msl = mr->msl; 361 ms = rte_fbarray_get(&msl->memseg_arr, 362 mr->ms_base_idx + idx); 363 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 364 if (!start) 365 start = ms->addr_64; 366 end = ms->addr_64 + ms->hugepage_sz; 367 } else if (start) { 368 /* Passed the end of a fragment. */ 369 break; 370 } 371 } 372 if (start) { 373 /* Found one chunk. */ 374 entry->start = start; 375 entry->end = end; 376 entry->lkey = rte_cpu_to_be_32(mr->pmd_mr.lkey); 377 } 378 return idx; 379 } 380 381 /** 382 * Insert a MR to the global B-tree cache. It may fail due to low-on-memory. 383 * Then, this entry will have to be searched by mr_lookup_list() in 384 * mlx5_mr_create() on miss. 385 * 386 * @param share_cache 387 * Pointer to a global shared MR cache. 388 * @param mr 389 * Pointer to MR to insert. 390 * 391 * @return 392 * 0 on success, -1 on failure. 393 */ 394 int 395 mlx5_mr_insert_cache(struct mlx5_mr_share_cache *share_cache, 396 struct mlx5_mr *mr) 397 { 398 unsigned int n; 399 400 DRV_LOG(DEBUG, "Inserting MR(%p) to global cache(%p)", 401 (void *)mr, (void *)share_cache); 402 for (n = 0; n < mr->ms_bmp_n; ) { 403 struct mr_cache_entry entry; 404 405 memset(&entry, 0, sizeof(entry)); 406 /* Find a contiguous chunk and advance the index. */ 407 n = mr_find_next_chunk(mr, &entry, n); 408 if (!entry.end) 409 break; 410 if (mr_btree_insert(&share_cache->cache, &entry) < 0) { 411 /* 412 * Overflowed, but the global table cannot be expanded 413 * because of deadlock. 414 */ 415 return -1; 416 } 417 } 418 return 0; 419 } 420 421 /** 422 * Look up address in the original global MR list. 423 * 424 * @param share_cache 425 * Pointer to a global shared MR cache. 426 * @param[out] entry 427 * Pointer to returning MR cache entry. If no match, this will not be updated. 428 * @param addr 429 * Search key. 430 * 431 * @return 432 * Found MR on match, NULL otherwise. 433 */ 434 struct mlx5_mr * 435 mlx5_mr_lookup_list(struct mlx5_mr_share_cache *share_cache, 436 struct mr_cache_entry *entry, uintptr_t addr) 437 { 438 struct mlx5_mr *mr; 439 440 /* Iterate all the existing MRs. */ 441 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 442 unsigned int n; 443 444 if (mr->ms_n == 0) 445 continue; 446 for (n = 0; n < mr->ms_bmp_n; ) { 447 struct mr_cache_entry ret; 448 449 memset(&ret, 0, sizeof(ret)); 450 n = mr_find_next_chunk(mr, &ret, n); 451 if (addr >= ret.start && addr < ret.end) { 452 /* Found. */ 453 *entry = ret; 454 return mr; 455 } 456 } 457 } 458 return NULL; 459 } 460 461 /** 462 * Look up address on global MR cache. 463 * 464 * @param share_cache 465 * Pointer to a global shared MR cache. 466 * @param[out] entry 467 * Pointer to returning MR cache entry. If no match, this will not be updated. 468 * @param addr 469 * Search key. 470 * 471 * @return 472 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 473 */ 474 static uint32_t 475 mlx5_mr_lookup_cache(struct mlx5_mr_share_cache *share_cache, 476 struct mr_cache_entry *entry, uintptr_t addr) 477 { 478 uint16_t idx; 479 uint32_t lkey = UINT32_MAX; 480 struct mlx5_mr *mr; 481 482 /* 483 * If the global cache has overflowed since it failed to expand the 484 * B-tree table, it can't have all the existing MRs. Then, the address 485 * has to be searched by traversing the original MR list instead, which 486 * is very slow path. Otherwise, the global cache is all inclusive. 487 */ 488 if (!unlikely(share_cache->cache.overflow)) { 489 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 490 if (lkey != UINT32_MAX) 491 *entry = (*share_cache->cache.table)[idx]; 492 } else { 493 /* Falling back to the slowest path. */ 494 mr = mlx5_mr_lookup_list(share_cache, entry, addr); 495 if (mr != NULL) 496 lkey = entry->lkey; 497 } 498 MLX5_ASSERT(lkey == UINT32_MAX || (addr >= entry->start && 499 addr < entry->end)); 500 return lkey; 501 } 502 503 /** 504 * Free MR resources. MR lock must not be held to avoid a deadlock. rte_free() 505 * can raise memory free event and the callback function will spin on the lock. 506 * 507 * @param mr 508 * Pointer to MR to free. 509 */ 510 void 511 mlx5_mr_free(struct mlx5_mr *mr, mlx5_dereg_mr_t dereg_mr_cb) 512 { 513 if (mr == NULL) 514 return; 515 DRV_LOG(DEBUG, "freeing MR(%p):", (void *)mr); 516 dereg_mr_cb(&mr->pmd_mr); 517 if (mr->ms_bmp != NULL) 518 rte_bitmap_free(mr->ms_bmp); 519 mlx5_free(mr); 520 } 521 522 void 523 mlx5_mr_rebuild_cache(struct mlx5_mr_share_cache *share_cache) 524 { 525 struct mlx5_mr *mr; 526 527 DRV_LOG(DEBUG, "Rebuild dev cache[] %p", (void *)share_cache); 528 /* Flush cache to rebuild. */ 529 share_cache->cache.len = 1; 530 share_cache->cache.overflow = 0; 531 /* Iterate all the existing MRs. */ 532 LIST_FOREACH(mr, &share_cache->mr_list, mr) 533 if (mlx5_mr_insert_cache(share_cache, mr) < 0) 534 return; 535 } 536 537 /** 538 * Release resources of detached MR having no online entry. 539 * 540 * @param share_cache 541 * Pointer to a global shared MR cache. 542 */ 543 static void 544 mlx5_mr_garbage_collect(struct mlx5_mr_share_cache *share_cache) 545 { 546 struct mlx5_mr *mr_next; 547 struct mlx5_mr_list free_list = LIST_HEAD_INITIALIZER(free_list); 548 549 /* Must be called from the primary process. */ 550 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 551 /* 552 * MR can't be freed with holding the lock because rte_free() could call 553 * memory free callback function. This will be a deadlock situation. 554 */ 555 rte_rwlock_write_lock(&share_cache->rwlock); 556 /* Detach the whole free list and release it after unlocking. */ 557 free_list = share_cache->mr_free_list; 558 LIST_INIT(&share_cache->mr_free_list); 559 rte_rwlock_write_unlock(&share_cache->rwlock); 560 /* Release resources. */ 561 mr_next = LIST_FIRST(&free_list); 562 while (mr_next != NULL) { 563 struct mlx5_mr *mr = mr_next; 564 565 mr_next = LIST_NEXT(mr, mr); 566 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 567 } 568 } 569 570 /* Called during rte_memseg_contig_walk() by mlx5_mr_create(). */ 571 static int 572 mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, 573 const struct rte_memseg *ms, size_t len, void *arg) 574 { 575 struct mr_find_contig_memsegs_data *data = arg; 576 577 if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len) 578 return 0; 579 /* Found, save it and stop walking. */ 580 data->start = ms->addr_64; 581 data->end = ms->addr_64 + len; 582 data->msl = msl; 583 return 1; 584 } 585 586 /** 587 * Create a new global Memory Region (MR) for a missing virtual address. 588 * This API should be called on a secondary process, then a request is sent to 589 * the primary process in order to create a MR for the address. As the global MR 590 * list is on the shared memory, following LKey lookup should succeed unless the 591 * request fails. 592 * 593 * @param pd 594 * Pointer to pd of a device (net, regex, vdpa,...). 595 * @param mp_id 596 * Multi-process identifier, may be NULL for the primary process. 597 * @param share_cache 598 * Pointer to a global shared MR cache. 599 * @param[out] entry 600 * Pointer to returning MR cache entry, found in the global cache or newly 601 * created. If failed to create one, this will not be updated. 602 * @param addr 603 * Target virtual address to register. 604 * @param mr_ext_memseg_en 605 * Configurable flag about external memory segment enable or not. 606 * 607 * @return 608 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 609 */ 610 static uint32_t 611 mlx5_mr_create_secondary(void *pd __rte_unused, 612 struct mlx5_mp_id *mp_id, 613 struct mlx5_mr_share_cache *share_cache, 614 struct mr_cache_entry *entry, uintptr_t addr, 615 unsigned int mr_ext_memseg_en __rte_unused) 616 { 617 int ret; 618 619 if (mp_id == NULL) { 620 rte_errno = EINVAL; 621 return UINT32_MAX; 622 } 623 DRV_LOG(DEBUG, "port %u requesting MR creation for address (%p)", 624 mp_id->port_id, (void *)addr); 625 ret = mlx5_mp_req_mr_create(mp_id, addr); 626 if (ret) { 627 DRV_LOG(DEBUG, "Fail to request MR creation for address (%p)", 628 (void *)addr); 629 return UINT32_MAX; 630 } 631 rte_rwlock_read_lock(&share_cache->rwlock); 632 /* Fill in output data. */ 633 mlx5_mr_lookup_cache(share_cache, entry, addr); 634 /* Lookup can't fail. */ 635 MLX5_ASSERT(entry->lkey != UINT32_MAX); 636 rte_rwlock_read_unlock(&share_cache->rwlock); 637 DRV_LOG(DEBUG, "MR CREATED by primary process for %p:\n" 638 " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", 639 (void *)addr, entry->start, entry->end, entry->lkey); 640 return entry->lkey; 641 } 642 643 /** 644 * Create a new global Memory Region (MR) for a missing virtual address. 645 * Register entire virtually contiguous memory chunk around the address. 646 * 647 * @param pd 648 * Pointer to pd of a device (net, regex, vdpa,...). 649 * @param share_cache 650 * Pointer to a global shared MR cache. 651 * @param[out] entry 652 * Pointer to returning MR cache entry, found in the global cache or newly 653 * created. If failed to create one, this will not be updated. 654 * @param addr 655 * Target virtual address to register. 656 * @param mr_ext_memseg_en 657 * Configurable flag about external memory segment enable or not. 658 * 659 * @return 660 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 661 */ 662 uint32_t 663 mlx5_mr_create_primary(void *pd, 664 struct mlx5_mr_share_cache *share_cache, 665 struct mr_cache_entry *entry, uintptr_t addr, 666 unsigned int mr_ext_memseg_en) 667 { 668 struct mr_find_contig_memsegs_data data = {.addr = addr, }; 669 struct mr_find_contig_memsegs_data data_re; 670 const struct rte_memseg_list *msl; 671 const struct rte_memseg *ms; 672 struct mlx5_mr *mr = NULL; 673 int ms_idx_shift = -1; 674 uint32_t bmp_size; 675 void *bmp_mem; 676 uint32_t ms_n; 677 uint32_t n; 678 size_t len; 679 680 DRV_LOG(DEBUG, "Creating a MR using address (%p)", (void *)addr); 681 /* 682 * Release detached MRs if any. This can't be called with holding either 683 * memory_hotplug_lock or share_cache->rwlock. MRs on the free list have 684 * been detached by the memory free event but it couldn't be released 685 * inside the callback due to deadlock. As a result, releasing resources 686 * is quite opportunistic. 687 */ 688 mlx5_mr_garbage_collect(share_cache); 689 /* 690 * If enabled, find out a contiguous virtual address chunk in use, to 691 * which the given address belongs, in order to register maximum range. 692 * In the best case where mempools are not dynamically recreated and 693 * '--socket-mem' is specified as an EAL option, it is very likely to 694 * have only one MR(LKey) per a socket and per a hugepage-size even 695 * though the system memory is highly fragmented. As the whole memory 696 * chunk will be pinned by kernel, it can't be reused unless entire 697 * chunk is freed from EAL. 698 * 699 * If disabled, just register one memseg (page). Then, memory 700 * consumption will be minimized but it may drop performance if there 701 * are many MRs to lookup on the datapath. 702 */ 703 if (!mr_ext_memseg_en) { 704 data.msl = rte_mem_virt2memseg_list((void *)addr); 705 data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); 706 data.end = data.start + data.msl->page_sz; 707 } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { 708 DRV_LOG(WARNING, 709 "Unable to find virtually contiguous" 710 " chunk for address (%p)." 711 " rte_memseg_contig_walk() failed.", (void *)addr); 712 rte_errno = ENXIO; 713 goto err_nolock; 714 } 715 alloc_resources: 716 /* Addresses must be page-aligned. */ 717 MLX5_ASSERT(data.msl); 718 MLX5_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz)); 719 MLX5_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz)); 720 msl = data.msl; 721 ms = rte_mem_virt2memseg((void *)data.start, msl); 722 len = data.end - data.start; 723 MLX5_ASSERT(ms); 724 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 725 /* Number of memsegs in the range. */ 726 ms_n = len / msl->page_sz; 727 DRV_LOG(DEBUG, "Extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 728 " page_sz=0x%" PRIx64 ", ms_n=%u", 729 (void *)addr, data.start, data.end, msl->page_sz, ms_n); 730 /* Size of memory for bitmap. */ 731 bmp_size = rte_bitmap_get_memory_footprint(ms_n); 732 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 733 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE) + 734 bmp_size, RTE_CACHE_LINE_SIZE, msl->socket_id); 735 if (mr == NULL) { 736 DRV_LOG(DEBUG, "Unable to allocate memory for a new MR of" 737 " address (%p).", (void *)addr); 738 rte_errno = ENOMEM; 739 goto err_nolock; 740 } 741 mr->msl = msl; 742 /* 743 * Save the index of the first memseg and initialize memseg bitmap. To 744 * see if a memseg of ms_idx in the memseg-list is still valid, check: 745 * rte_bitmap_get(mr->bmp, ms_idx - mr->ms_base_idx) 746 */ 747 mr->ms_base_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 748 bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE); 749 mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size); 750 if (mr->ms_bmp == NULL) { 751 DRV_LOG(DEBUG, "Unable to initialize bitmap for a new MR of" 752 " address (%p).", (void *)addr); 753 rte_errno = EINVAL; 754 goto err_nolock; 755 } 756 /* 757 * Should recheck whether the extended contiguous chunk is still valid. 758 * Because memory_hotplug_lock can't be held if there's any memory 759 * related calls in a critical path, resource allocation above can't be 760 * locked. If the memory has been changed at this point, try again with 761 * just single page. If not, go on with the big chunk atomically from 762 * here. 763 */ 764 rte_mcfg_mem_read_lock(); 765 data_re = data; 766 if (len > msl->page_sz && 767 !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) { 768 DRV_LOG(DEBUG, 769 "Unable to find virtually contiguous chunk for address " 770 "(%p). rte_memseg_contig_walk() failed.", (void *)addr); 771 rte_errno = ENXIO; 772 goto err_memlock; 773 } 774 if (data.start != data_re.start || data.end != data_re.end) { 775 /* 776 * The extended contiguous chunk has been changed. Try again 777 * with single memseg instead. 778 */ 779 data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz); 780 data.end = data.start + msl->page_sz; 781 rte_mcfg_mem_read_unlock(); 782 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 783 goto alloc_resources; 784 } 785 MLX5_ASSERT(data.msl == data_re.msl); 786 rte_rwlock_write_lock(&share_cache->rwlock); 787 /* 788 * Check the address is really missing. If other thread already created 789 * one or it is not found due to overflow, abort and return. 790 */ 791 if (mlx5_mr_lookup_cache(share_cache, entry, addr) != UINT32_MAX) { 792 /* 793 * Insert to the global cache table. It may fail due to 794 * low-on-memory. Then, this entry will have to be searched 795 * here again. 796 */ 797 mr_btree_insert(&share_cache->cache, entry); 798 DRV_LOG(DEBUG, "Found MR for %p on final lookup, abort", 799 (void *)addr); 800 rte_rwlock_write_unlock(&share_cache->rwlock); 801 rte_mcfg_mem_read_unlock(); 802 /* 803 * Must be unlocked before calling rte_free() because 804 * mlx5_mr_mem_event_free_cb() can be called inside. 805 */ 806 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 807 return entry->lkey; 808 } 809 /* 810 * Trim start and end addresses for verbs MR. Set bits for registering 811 * memsegs but exclude already registered ones. Bitmap can be 812 * fragmented. 813 */ 814 for (n = 0; n < ms_n; ++n) { 815 uintptr_t start; 816 struct mr_cache_entry ret; 817 818 memset(&ret, 0, sizeof(ret)); 819 start = data_re.start + n * msl->page_sz; 820 /* Exclude memsegs already registered by other MRs. */ 821 if (mlx5_mr_lookup_cache(share_cache, &ret, start) == 822 UINT32_MAX) { 823 /* 824 * Start from the first unregistered memseg in the 825 * extended range. 826 */ 827 if (ms_idx_shift == -1) { 828 mr->ms_base_idx += n; 829 data.start = start; 830 ms_idx_shift = n; 831 } 832 data.end = start + msl->page_sz; 833 rte_bitmap_set(mr->ms_bmp, n - ms_idx_shift); 834 ++mr->ms_n; 835 } 836 } 837 len = data.end - data.start; 838 mr->ms_bmp_n = len / msl->page_sz; 839 MLX5_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n); 840 /* 841 * Finally create an MR for the memory chunk. Verbs: ibv_reg_mr() can 842 * be called with holding the memory lock because it doesn't use 843 * mlx5_alloc_buf_extern() which eventually calls rte_malloc_socket() 844 * through mlx5_alloc_verbs_buf(). 845 */ 846 share_cache->reg_mr_cb(pd, (void *)data.start, len, &mr->pmd_mr); 847 if (mr->pmd_mr.obj == NULL) { 848 DRV_LOG(DEBUG, "Fail to create an MR for address (%p)", 849 (void *)addr); 850 rte_errno = EINVAL; 851 goto err_mrlock; 852 } 853 MLX5_ASSERT((uintptr_t)mr->pmd_mr.addr == data.start); 854 MLX5_ASSERT(mr->pmd_mr.len); 855 LIST_INSERT_HEAD(&share_cache->mr_list, mr, mr); 856 DRV_LOG(DEBUG, "MR CREATED (%p) for %p:\n" 857 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 858 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 859 (void *)mr, (void *)addr, data.start, data.end, 860 rte_cpu_to_be_32(mr->pmd_mr.lkey), 861 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 862 /* Insert to the global cache table. */ 863 mlx5_mr_insert_cache(share_cache, mr); 864 /* Fill in output data. */ 865 mlx5_mr_lookup_cache(share_cache, entry, addr); 866 /* Lookup can't fail. */ 867 MLX5_ASSERT(entry->lkey != UINT32_MAX); 868 rte_rwlock_write_unlock(&share_cache->rwlock); 869 rte_mcfg_mem_read_unlock(); 870 return entry->lkey; 871 err_mrlock: 872 rte_rwlock_write_unlock(&share_cache->rwlock); 873 err_memlock: 874 rte_mcfg_mem_read_unlock(); 875 err_nolock: 876 /* 877 * In case of error, as this can be called in a datapath, a warning 878 * message per an error is preferable instead. Must be unlocked before 879 * calling rte_free() because mlx5_mr_mem_event_free_cb() can be called 880 * inside. 881 */ 882 mlx5_mr_free(mr, share_cache->dereg_mr_cb); 883 return UINT32_MAX; 884 } 885 886 /** 887 * Create a new global Memory Region (MR) for a missing virtual address. 888 * This can be called from primary and secondary process. 889 * 890 * @param pd 891 * Pointer to pd handle of a device (net, regex, vdpa,...). 892 * @param mp_id 893 * Multi-process identifier, may be NULL for the primary process. 894 * @param share_cache 895 * Pointer to a global shared MR cache. 896 * @param[out] entry 897 * Pointer to returning MR cache entry, found in the global cache or newly 898 * created. If failed to create one, this will not be updated. 899 * @param addr 900 * Target virtual address to register. 901 * @param mr_ext_memseg_en 902 * Configurable flag about external memory segment enable or not. 903 * 904 * @return 905 * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 906 */ 907 static uint32_t 908 mlx5_mr_create(void *pd, struct mlx5_mp_id *mp_id, 909 struct mlx5_mr_share_cache *share_cache, 910 struct mr_cache_entry *entry, uintptr_t addr, 911 unsigned int mr_ext_memseg_en) 912 { 913 uint32_t ret = 0; 914 915 switch (rte_eal_process_type()) { 916 case RTE_PROC_PRIMARY: 917 ret = mlx5_mr_create_primary(pd, share_cache, entry, 918 addr, mr_ext_memseg_en); 919 break; 920 case RTE_PROC_SECONDARY: 921 ret = mlx5_mr_create_secondary(pd, mp_id, share_cache, entry, 922 addr, mr_ext_memseg_en); 923 break; 924 default: 925 break; 926 } 927 return ret; 928 } 929 930 /** 931 * Look up address in the global MR cache table. If not found, create a new MR. 932 * Insert the found/created entry to local bottom-half cache table. 933 * 934 * @param pd 935 * Pointer to pd of a device (net, regex, vdpa,...). 936 * @param mp_id 937 * Multi-process identifier, may be NULL for the primary process. 938 * @param share_cache 939 * Pointer to a global shared MR cache. 940 * @param mr_ctrl 941 * Pointer to per-queue MR control structure. 942 * @param[out] entry 943 * Pointer to returning MR cache entry, found in the global cache or newly 944 * created. If failed to create one, this is not written. 945 * @param addr 946 * Search key. 947 * @param mr_ext_memseg_en 948 * Configurable flag about external memory segment enable or not. 949 * 950 * @return 951 * Searched LKey on success, UINT32_MAX on no match. 952 */ 953 static uint32_t 954 mr_lookup_caches(void *pd, struct mlx5_mp_id *mp_id, 955 struct mlx5_mr_share_cache *share_cache, 956 struct mlx5_mr_ctrl *mr_ctrl, 957 struct mr_cache_entry *entry, uintptr_t addr, 958 unsigned int mr_ext_memseg_en) 959 { 960 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 961 uint32_t lkey; 962 uint16_t idx; 963 964 /* If local cache table is full, try to double it. */ 965 if (unlikely(bt->len == bt->size)) 966 mr_btree_expand(bt, bt->size << 1); 967 /* Look up in the global cache. */ 968 rte_rwlock_read_lock(&share_cache->rwlock); 969 lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 970 if (lkey != UINT32_MAX) { 971 /* Found. */ 972 *entry = (*share_cache->cache.table)[idx]; 973 rte_rwlock_read_unlock(&share_cache->rwlock); 974 /* 975 * Update local cache. Even if it fails, return the found entry 976 * to update top-half cache. Next time, this entry will be found 977 * in the global cache. 978 */ 979 mr_btree_insert(bt, entry); 980 return lkey; 981 } 982 rte_rwlock_read_unlock(&share_cache->rwlock); 983 /* First time to see the address? Create a new MR. */ 984 lkey = mlx5_mr_create(pd, mp_id, share_cache, entry, addr, 985 mr_ext_memseg_en); 986 /* 987 * Update the local cache if successfully created a new global MR. Even 988 * if failed to create one, there's no action to take in this datapath 989 * code. As returning LKey is invalid, this will eventually make HW 990 * fail. 991 */ 992 if (lkey != UINT32_MAX) 993 mr_btree_insert(bt, entry); 994 return lkey; 995 } 996 997 /** 998 * Bottom-half of LKey search on datapath. First search in cache_bh[] and if 999 * misses, search in the global MR cache table and update the new entry to 1000 * per-queue local caches. 1001 * 1002 * @param pd 1003 * Pointer to pd of a device (net, regex, vdpa,...). 1004 * @param mp_id 1005 * Multi-process identifier, may be NULL for the primary process. 1006 * @param share_cache 1007 * Pointer to a global shared MR cache. 1008 * @param mr_ctrl 1009 * Pointer to per-queue MR control structure. 1010 * @param addr 1011 * Search key. 1012 * @param mr_ext_memseg_en 1013 * Configurable flag about external memory segment enable or not. 1014 * 1015 * @return 1016 * Searched LKey on success, UINT32_MAX on no match. 1017 */ 1018 static uint32_t 1019 mlx5_mr_addr2mr_bh(void *pd, struct mlx5_mp_id *mp_id, 1020 struct mlx5_mr_share_cache *share_cache, 1021 struct mlx5_mr_ctrl *mr_ctrl, uintptr_t addr, 1022 unsigned int mr_ext_memseg_en) 1023 { 1024 uint32_t lkey; 1025 uint16_t bh_idx = 0; 1026 /* Victim in top-half cache to replace with new entry. */ 1027 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 1028 1029 /* Binary-search MR translation table. */ 1030 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 1031 /* Update top-half cache. */ 1032 if (likely(lkey != UINT32_MAX)) { 1033 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 1034 } else { 1035 /* 1036 * If missed in local lookup table, search in the global cache 1037 * and local cache_bh[] will be updated inside if possible. 1038 * Top-half cache entry will also be updated. 1039 */ 1040 lkey = mr_lookup_caches(pd, mp_id, share_cache, mr_ctrl, 1041 repl, addr, mr_ext_memseg_en); 1042 if (unlikely(lkey == UINT32_MAX)) 1043 return UINT32_MAX; 1044 } 1045 /* Update the most recently used entry. */ 1046 mr_ctrl->mru = mr_ctrl->head; 1047 /* Point to the next victim, the oldest. */ 1048 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1049 return lkey; 1050 } 1051 1052 /** 1053 * Release all the created MRs and resources on global MR cache of a device 1054 * list. 1055 * 1056 * @param share_cache 1057 * Pointer to a global shared MR cache. 1058 */ 1059 void 1060 mlx5_mr_release_cache(struct mlx5_mr_share_cache *share_cache) 1061 { 1062 struct mlx5_mr *mr_next; 1063 1064 rte_rwlock_write_lock(&share_cache->rwlock); 1065 /* Detach from MR list and move to free list. */ 1066 mr_next = LIST_FIRST(&share_cache->mr_list); 1067 while (mr_next != NULL) { 1068 struct mlx5_mr *mr = mr_next; 1069 1070 mr_next = LIST_NEXT(mr, mr); 1071 LIST_REMOVE(mr, mr); 1072 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1073 } 1074 LIST_INIT(&share_cache->mr_list); 1075 /* Free global cache. */ 1076 mlx5_mr_btree_free(&share_cache->cache); 1077 rte_rwlock_write_unlock(&share_cache->rwlock); 1078 /* Free all remaining MRs. */ 1079 mlx5_mr_garbage_collect(share_cache); 1080 } 1081 1082 /** 1083 * Initialize global MR cache of a device. 1084 * 1085 * @param share_cache 1086 * Pointer to a global shared MR cache. 1087 * @param socket 1088 * NUMA socket on which memory must be allocated. 1089 * 1090 * @return 1091 * 0 on success, a negative errno value otherwise and rte_errno is set. 1092 */ 1093 int 1094 mlx5_mr_create_cache(struct mlx5_mr_share_cache *share_cache, int socket) 1095 { 1096 /* Set the reg_mr and dereg_mr callback functions */ 1097 mlx5_os_set_reg_mr_cb(&share_cache->reg_mr_cb, 1098 &share_cache->dereg_mr_cb); 1099 rte_rwlock_init(&share_cache->rwlock); 1100 rte_rwlock_init(&share_cache->mprwlock); 1101 share_cache->mp_cb_registered = 0; 1102 /* Initialize B-tree and allocate memory for global MR cache table. */ 1103 return mlx5_mr_btree_init(&share_cache->cache, 1104 MLX5_MR_BTREE_CACHE_N * 2, socket); 1105 } 1106 1107 /** 1108 * Flush all of the local cache entries. 1109 * 1110 * @param mr_ctrl 1111 * Pointer to per-queue MR local cache. 1112 */ 1113 void 1114 mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl) 1115 { 1116 /* Reset the most-recently-used index. */ 1117 mr_ctrl->mru = 0; 1118 /* Reset the linear search array. */ 1119 mr_ctrl->head = 0; 1120 memset(mr_ctrl->cache, 0, sizeof(mr_ctrl->cache)); 1121 /* Reset the B-tree table. */ 1122 mr_ctrl->cache_bh.len = 1; 1123 mr_ctrl->cache_bh.overflow = 0; 1124 /* Update the generation number. */ 1125 mr_ctrl->cur_gen = *mr_ctrl->dev_gen_ptr; 1126 DRV_LOG(DEBUG, "mr_ctrl(%p): flushed, cur_gen=%d", 1127 (void *)mr_ctrl, mr_ctrl->cur_gen); 1128 } 1129 1130 /** 1131 * Creates a memory region for external memory, that is memory which is not 1132 * part of the DPDK memory segments. 1133 * 1134 * @param pd 1135 * Pointer to pd of a device (net, regex, vdpa,...). 1136 * @param addr 1137 * Starting virtual address of memory. 1138 * @param len 1139 * Length of memory segment being mapped. 1140 * @param socked_id 1141 * Socket to allocate heap memory for the control structures. 1142 * 1143 * @return 1144 * Pointer to MR structure on success, NULL otherwise. 1145 */ 1146 struct mlx5_mr * 1147 mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, 1148 mlx5_reg_mr_t reg_mr_cb) 1149 { 1150 struct mlx5_mr *mr = NULL; 1151 1152 mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1153 RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE), 1154 RTE_CACHE_LINE_SIZE, socket_id); 1155 if (mr == NULL) 1156 return NULL; 1157 reg_mr_cb(pd, (void *)addr, len, &mr->pmd_mr); 1158 if (mr->pmd_mr.obj == NULL) { 1159 DRV_LOG(WARNING, 1160 "Fail to create MR for address (%p)", 1161 (void *)addr); 1162 mlx5_free(mr); 1163 return NULL; 1164 } 1165 mr->msl = NULL; /* Mark it is external memory. */ 1166 mr->ms_bmp = NULL; 1167 mr->ms_n = 1; 1168 mr->ms_bmp_n = 1; 1169 DRV_LOG(DEBUG, 1170 "MR CREATED (%p) for external memory %p:\n" 1171 " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 1172 " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 1173 (void *)mr, (void *)addr, 1174 addr, addr + len, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1175 mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 1176 return mr; 1177 } 1178 1179 /** 1180 * Callback for memory free event. Iterate freed memsegs and check whether it 1181 * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a 1182 * result, the MR would be fragmented. If it becomes empty, the MR will be freed 1183 * later by mlx5_mr_garbage_collect(). Even if this callback is called from a 1184 * secondary process, the garbage collector will be called in primary process 1185 * as the secondary process can't call mlx5_mr_create(). 1186 * 1187 * The global cache must be rebuilt if there's any change and this event has to 1188 * be propagated to dataplane threads to flush the local caches. 1189 * 1190 * @param share_cache 1191 * Pointer to a global shared MR cache. 1192 * @param ibdev_name 1193 * Name of ibv device. 1194 * @param addr 1195 * Address of freed memory. 1196 * @param len 1197 * Size of freed memory. 1198 */ 1199 void 1200 mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, 1201 const char *ibdev_name, const void *addr, size_t len) 1202 { 1203 const struct rte_memseg_list *msl; 1204 struct mlx5_mr *mr; 1205 int ms_n; 1206 int i; 1207 int rebuild = 0; 1208 1209 DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu", 1210 ibdev_name, addr, len); 1211 msl = rte_mem_virt2memseg_list(addr); 1212 /* addr and len must be page-aligned. */ 1213 MLX5_ASSERT((uintptr_t)addr == 1214 RTE_ALIGN((uintptr_t)addr, msl->page_sz)); 1215 MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); 1216 ms_n = len / msl->page_sz; 1217 rte_rwlock_write_lock(&share_cache->rwlock); 1218 /* Clear bits of freed memsegs from MR. */ 1219 for (i = 0; i < ms_n; ++i) { 1220 const struct rte_memseg *ms; 1221 struct mr_cache_entry entry; 1222 uintptr_t start; 1223 int ms_idx; 1224 uint32_t pos; 1225 1226 /* Find MR having this memseg. */ 1227 start = (uintptr_t)addr + i * msl->page_sz; 1228 mr = mlx5_mr_lookup_list(share_cache, &entry, start); 1229 if (mr == NULL) 1230 continue; 1231 MLX5_ASSERT(mr->msl); /* Can't be external memory. */ 1232 ms = rte_mem_virt2memseg((void *)start, msl); 1233 MLX5_ASSERT(ms != NULL); 1234 MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 1235 ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 1236 pos = ms_idx - mr->ms_base_idx; 1237 MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); 1238 MLX5_ASSERT(pos < mr->ms_bmp_n); 1239 DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p", 1240 ibdev_name, (void *)mr, pos, (void *)start); 1241 rte_bitmap_clear(mr->ms_bmp, pos); 1242 if (--mr->ms_n == 0) { 1243 LIST_REMOVE(mr, mr); 1244 LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 1245 DRV_LOG(DEBUG, "device %s remove MR(%p) from list", 1246 ibdev_name, (void *)mr); 1247 } 1248 /* 1249 * MR is fragmented or will be freed. the global cache must be 1250 * rebuilt. 1251 */ 1252 rebuild = 1; 1253 } 1254 if (rebuild) { 1255 mlx5_mr_rebuild_cache(share_cache); 1256 /* 1257 * No explicit wmb is needed after updating dev_gen due to 1258 * store-release ordering in unlock that provides the 1259 * implicit barrier at the software visible level. 1260 */ 1261 ++share_cache->dev_gen; 1262 DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", 1263 share_cache->dev_gen); 1264 } 1265 rte_rwlock_write_unlock(&share_cache->rwlock); 1266 } 1267 1268 /** 1269 * Dump all the created MRs and the global cache entries. 1270 * 1271 * @param share_cache 1272 * Pointer to a global shared MR cache. 1273 */ 1274 void 1275 mlx5_mr_dump_cache(struct mlx5_mr_share_cache *share_cache __rte_unused) 1276 { 1277 #ifdef RTE_LIBRTE_MLX5_DEBUG 1278 struct mlx5_mr *mr; 1279 int mr_n = 0; 1280 int chunk_n = 0; 1281 1282 rte_rwlock_read_lock(&share_cache->rwlock); 1283 /* Iterate all the existing MRs. */ 1284 LIST_FOREACH(mr, &share_cache->mr_list, mr) { 1285 unsigned int n; 1286 1287 DRV_LOG(DEBUG, "MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u", 1288 mr_n++, rte_cpu_to_be_32(mr->pmd_mr.lkey), 1289 mr->ms_n, mr->ms_bmp_n); 1290 if (mr->ms_n == 0) 1291 continue; 1292 for (n = 0; n < mr->ms_bmp_n; ) { 1293 struct mr_cache_entry ret = { 0, }; 1294 1295 n = mr_find_next_chunk(mr, &ret, n); 1296 if (!ret.end) 1297 break; 1298 DRV_LOG(DEBUG, 1299 " chunk[%u], [0x%" PRIxPTR ", 0x%" PRIxPTR ")", 1300 chunk_n++, ret.start, ret.end); 1301 } 1302 } 1303 DRV_LOG(DEBUG, "Dumping global cache %p", (void *)share_cache); 1304 mlx5_mr_btree_dump(&share_cache->cache); 1305 rte_rwlock_read_unlock(&share_cache->rwlock); 1306 #endif 1307 } 1308 1309 static int 1310 mlx5_range_compare_start(const void *lhs, const void *rhs) 1311 { 1312 const struct mlx5_range *r1 = lhs, *r2 = rhs; 1313 1314 if (r1->start > r2->start) 1315 return 1; 1316 else if (r1->start < r2->start) 1317 return -1; 1318 return 0; 1319 } 1320 1321 static void 1322 mlx5_range_from_mempool_chunk(struct rte_mempool *mp, void *opaque, 1323 struct rte_mempool_memhdr *memhdr, 1324 unsigned int idx) 1325 { 1326 struct mlx5_range *ranges = opaque, *range = &ranges[idx]; 1327 uint64_t page_size = rte_mem_page_size(); 1328 1329 RTE_SET_USED(mp); 1330 range->start = RTE_ALIGN_FLOOR((uintptr_t)memhdr->addr, page_size); 1331 range->end = RTE_ALIGN_CEIL(range->start + memhdr->len, page_size); 1332 } 1333 1334 /** 1335 * Get VA-contiguous ranges of the mempool memory. 1336 * Each range start and end is aligned to the system page size. 1337 * 1338 * @param[in] mp 1339 * Analyzed mempool. 1340 * @param[out] out 1341 * Receives the ranges, caller must release it with free(). 1342 * @param[out] ount_n 1343 * Receives the number of @p out elements. 1344 * 1345 * @return 1346 * 0 on success, (-1) on failure. 1347 */ 1348 static int 1349 mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out, 1350 unsigned int *out_n) 1351 { 1352 struct mlx5_range *chunks; 1353 unsigned int chunks_n = mp->nb_mem_chunks, contig_n, i; 1354 1355 /* Collect page-aligned memory ranges of the mempool. */ 1356 chunks = calloc(sizeof(chunks[0]), chunks_n); 1357 if (chunks == NULL) 1358 return -1; 1359 rte_mempool_mem_iter(mp, mlx5_range_from_mempool_chunk, chunks); 1360 /* Merge adjacent chunks and place them at the beginning. */ 1361 qsort(chunks, chunks_n, sizeof(chunks[0]), mlx5_range_compare_start); 1362 contig_n = 1; 1363 for (i = 1; i < chunks_n; i++) 1364 if (chunks[i - 1].end != chunks[i].start) { 1365 chunks[contig_n - 1].end = chunks[i - 1].end; 1366 chunks[contig_n] = chunks[i]; 1367 contig_n++; 1368 } 1369 /* Extend the last contiguous chunk to the end of the mempool. */ 1370 chunks[contig_n - 1].end = chunks[i - 1].end; 1371 *out = chunks; 1372 *out_n = contig_n; 1373 return 0; 1374 } 1375 1376 /** 1377 * Analyze mempool memory to select memory ranges to register. 1378 * 1379 * @param[in] mp 1380 * Mempool to analyze. 1381 * @param[out] out 1382 * Receives memory ranges to register, aligned to the system page size. 1383 * The caller must release them with free(). 1384 * @param[out] out_n 1385 * Receives the number of @p out items. 1386 * @param[out] share_hugepage 1387 * Receives True if the entire pool resides within a single hugepage. 1388 * 1389 * @return 1390 * 0 on success, (-1) on failure. 1391 */ 1392 static int 1393 mlx5_mempool_reg_analyze(struct rte_mempool *mp, struct mlx5_range **out, 1394 unsigned int *out_n, bool *share_hugepage) 1395 { 1396 struct mlx5_range *ranges = NULL; 1397 unsigned int i, ranges_n = 0; 1398 struct rte_memseg_list *msl; 1399 1400 if (mlx5_get_mempool_ranges(mp, &ranges, &ranges_n) < 0) { 1401 DRV_LOG(ERR, "Cannot get address ranges for mempool %s", 1402 mp->name); 1403 return -1; 1404 } 1405 /* Check if the hugepage of the pool can be shared. */ 1406 *share_hugepage = false; 1407 msl = rte_mem_virt2memseg_list((void *)ranges[0].start); 1408 if (msl != NULL) { 1409 uint64_t hugepage_sz = 0; 1410 1411 /* Check that all ranges are on pages of the same size. */ 1412 for (i = 0; i < ranges_n; i++) { 1413 if (hugepage_sz != 0 && hugepage_sz != msl->page_sz) 1414 break; 1415 hugepage_sz = msl->page_sz; 1416 } 1417 if (i == ranges_n) { 1418 /* 1419 * If the entire pool is within one hugepage, 1420 * combine all ranges into one of the hugepage size. 1421 */ 1422 uintptr_t reg_start = ranges[0].start; 1423 uintptr_t reg_end = ranges[ranges_n - 1].end; 1424 uintptr_t hugepage_start = 1425 RTE_ALIGN_FLOOR(reg_start, hugepage_sz); 1426 uintptr_t hugepage_end = hugepage_start + hugepage_sz; 1427 if (reg_end < hugepage_end) { 1428 ranges[0].start = hugepage_start; 1429 ranges[0].end = hugepage_end; 1430 ranges_n = 1; 1431 *share_hugepage = true; 1432 } 1433 } 1434 } 1435 *out = ranges; 1436 *out_n = ranges_n; 1437 return 0; 1438 } 1439 1440 /** Create a registration object for the mempool. */ 1441 static struct mlx5_mempool_reg * 1442 mlx5_mempool_reg_create(struct rte_mempool *mp, unsigned int mrs_n) 1443 { 1444 struct mlx5_mempool_reg *mpr = NULL; 1445 1446 mpr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1447 sizeof(*mpr) + mrs_n * sizeof(mpr->mrs[0]), 1448 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); 1449 if (mpr == NULL) { 1450 DRV_LOG(ERR, "Cannot allocate mempool %s registration object", 1451 mp->name); 1452 return NULL; 1453 } 1454 mpr->mp = mp; 1455 mpr->mrs = (struct mlx5_mempool_mr *)(mpr + 1); 1456 mpr->mrs_n = mrs_n; 1457 return mpr; 1458 } 1459 1460 /** 1461 * Destroy a mempool registration object. 1462 * 1463 * @param standalone 1464 * Whether @p mpr owns its MRs excludively, i.e. they are not shared. 1465 */ 1466 static void 1467 mlx5_mempool_reg_destroy(struct mlx5_mr_share_cache *share_cache, 1468 struct mlx5_mempool_reg *mpr, bool standalone) 1469 { 1470 if (standalone) { 1471 unsigned int i; 1472 1473 for (i = 0; i < mpr->mrs_n; i++) 1474 share_cache->dereg_mr_cb(&mpr->mrs[i].pmd_mr); 1475 } 1476 mlx5_free(mpr); 1477 } 1478 1479 /** Find registration object of a mempool. */ 1480 static struct mlx5_mempool_reg * 1481 mlx5_mempool_reg_lookup(struct mlx5_mr_share_cache *share_cache, 1482 struct rte_mempool *mp) 1483 { 1484 struct mlx5_mempool_reg *mpr; 1485 1486 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1487 if (mpr->mp == mp) 1488 break; 1489 return mpr; 1490 } 1491 1492 /** Increment reference counters of MRs used in the registration. */ 1493 static void 1494 mlx5_mempool_reg_attach(struct mlx5_mempool_reg *mpr) 1495 { 1496 unsigned int i; 1497 1498 for (i = 0; i < mpr->mrs_n; i++) 1499 __atomic_add_fetch(&mpr->mrs[i].refcnt, 1, __ATOMIC_RELAXED); 1500 } 1501 1502 /** 1503 * Decrement reference counters of MRs used in the registration. 1504 * 1505 * @return True if no more references to @p mpr MRs exist, False otherwise. 1506 */ 1507 static bool 1508 mlx5_mempool_reg_detach(struct mlx5_mempool_reg *mpr) 1509 { 1510 unsigned int i; 1511 bool ret = false; 1512 1513 for (i = 0; i < mpr->mrs_n; i++) 1514 ret |= __atomic_sub_fetch(&mpr->mrs[i].refcnt, 1, 1515 __ATOMIC_RELAXED) == 0; 1516 return ret; 1517 } 1518 1519 static int 1520 mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache, 1521 void *pd, struct rte_mempool *mp) 1522 { 1523 struct mlx5_range *ranges = NULL; 1524 struct mlx5_mempool_reg *mpr, *new_mpr; 1525 unsigned int i, ranges_n; 1526 bool share_hugepage; 1527 int ret = -1; 1528 1529 /* Early check to avoid unnecessary creation of MRs. */ 1530 rte_rwlock_read_lock(&share_cache->rwlock); 1531 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1532 rte_rwlock_read_unlock(&share_cache->rwlock); 1533 if (mpr != NULL) { 1534 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1535 mp->name, pd); 1536 rte_errno = EEXIST; 1537 goto exit; 1538 } 1539 if (mlx5_mempool_reg_analyze(mp, &ranges, &ranges_n, 1540 &share_hugepage) < 0) { 1541 DRV_LOG(ERR, "Cannot get mempool %s memory ranges", mp->name); 1542 rte_errno = ENOMEM; 1543 goto exit; 1544 } 1545 new_mpr = mlx5_mempool_reg_create(mp, ranges_n); 1546 if (new_mpr == NULL) { 1547 DRV_LOG(ERR, 1548 "Cannot create a registration object for mempool %s in PD %p", 1549 mp->name, pd); 1550 rte_errno = ENOMEM; 1551 goto exit; 1552 } 1553 /* 1554 * If the entire mempool fits in a single hugepage, the MR for this 1555 * hugepage can be shared across mempools that also fit in it. 1556 */ 1557 if (share_hugepage) { 1558 rte_rwlock_write_lock(&share_cache->rwlock); 1559 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) { 1560 if (mpr->mrs[0].pmd_mr.addr == (void *)ranges[0].start) 1561 break; 1562 } 1563 if (mpr != NULL) { 1564 new_mpr->mrs = mpr->mrs; 1565 mlx5_mempool_reg_attach(new_mpr); 1566 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, 1567 new_mpr, next); 1568 } 1569 rte_rwlock_write_unlock(&share_cache->rwlock); 1570 if (mpr != NULL) { 1571 DRV_LOG(DEBUG, "Shared MR %#x in PD %p for mempool %s with mempool %s", 1572 mpr->mrs[0].pmd_mr.lkey, pd, mp->name, 1573 mpr->mp->name); 1574 ret = 0; 1575 goto exit; 1576 } 1577 } 1578 for (i = 0; i < ranges_n; i++) { 1579 struct mlx5_mempool_mr *mr = &new_mpr->mrs[i]; 1580 const struct mlx5_range *range = &ranges[i]; 1581 size_t len = range->end - range->start; 1582 1583 if (share_cache->reg_mr_cb(pd, (void *)range->start, len, 1584 &mr->pmd_mr) < 0) { 1585 DRV_LOG(ERR, 1586 "Failed to create an MR in PD %p for address range " 1587 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1588 pd, range->start, range->end, len, mp->name); 1589 break; 1590 } 1591 DRV_LOG(DEBUG, 1592 "Created a new MR %#x in PD %p for address range " 1593 "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", 1594 mr->pmd_mr.lkey, pd, range->start, range->end, len, 1595 mp->name); 1596 } 1597 if (i != ranges_n) { 1598 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1599 rte_errno = EINVAL; 1600 goto exit; 1601 } 1602 /* Concurrent registration is not supposed to happen. */ 1603 rte_rwlock_write_lock(&share_cache->rwlock); 1604 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1605 if (mpr == NULL) { 1606 mlx5_mempool_reg_attach(new_mpr); 1607 LIST_INSERT_HEAD(&share_cache->mempool_reg_list, new_mpr, next); 1608 ret = 0; 1609 } 1610 rte_rwlock_write_unlock(&share_cache->rwlock); 1611 if (mpr != NULL) { 1612 DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p", 1613 mp->name, pd); 1614 mlx5_mempool_reg_destroy(share_cache, new_mpr, true); 1615 rte_errno = EEXIST; 1616 goto exit; 1617 } 1618 exit: 1619 free(ranges); 1620 return ret; 1621 } 1622 1623 static int 1624 mlx5_mr_mempool_register_secondary(struct mlx5_mr_share_cache *share_cache, 1625 void *pd, struct rte_mempool *mp, 1626 struct mlx5_mp_id *mp_id) 1627 { 1628 if (mp_id == NULL) { 1629 rte_errno = EINVAL; 1630 return -1; 1631 } 1632 return mlx5_mp_req_mempool_reg(mp_id, share_cache, pd, mp, true); 1633 } 1634 1635 /** 1636 * Register the memory of a mempool in the protection domain. 1637 * 1638 * @param share_cache 1639 * Shared MR cache of the protection domain. 1640 * @param pd 1641 * Protection domain object. 1642 * @param mp 1643 * Mempool to register. 1644 * @param mp_id 1645 * Multi-process identifier, may be NULL for the primary process. 1646 * 1647 * @return 1648 * 0 on success, (-1) on failure and rte_errno is set. 1649 */ 1650 int 1651 mlx5_mr_mempool_register(struct mlx5_mr_share_cache *share_cache, void *pd, 1652 struct rte_mempool *mp, struct mlx5_mp_id *mp_id) 1653 { 1654 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1655 return 0; 1656 switch (rte_eal_process_type()) { 1657 case RTE_PROC_PRIMARY: 1658 return mlx5_mr_mempool_register_primary(share_cache, pd, mp); 1659 case RTE_PROC_SECONDARY: 1660 return mlx5_mr_mempool_register_secondary(share_cache, pd, mp, 1661 mp_id); 1662 default: 1663 return -1; 1664 } 1665 } 1666 1667 static int 1668 mlx5_mr_mempool_unregister_primary(struct mlx5_mr_share_cache *share_cache, 1669 struct rte_mempool *mp) 1670 { 1671 struct mlx5_mempool_reg *mpr; 1672 bool standalone = false; 1673 1674 rte_rwlock_write_lock(&share_cache->rwlock); 1675 LIST_FOREACH(mpr, &share_cache->mempool_reg_list, next) 1676 if (mpr->mp == mp) { 1677 LIST_REMOVE(mpr, next); 1678 standalone = mlx5_mempool_reg_detach(mpr); 1679 if (standalone) 1680 /* 1681 * The unlock operation below provides a memory 1682 * barrier due to its store-release semantics. 1683 */ 1684 ++share_cache->dev_gen; 1685 break; 1686 } 1687 rte_rwlock_write_unlock(&share_cache->rwlock); 1688 if (mpr == NULL) { 1689 rte_errno = ENOENT; 1690 return -1; 1691 } 1692 mlx5_mempool_reg_destroy(share_cache, mpr, standalone); 1693 return 0; 1694 } 1695 1696 static int 1697 mlx5_mr_mempool_unregister_secondary(struct mlx5_mr_share_cache *share_cache, 1698 struct rte_mempool *mp, 1699 struct mlx5_mp_id *mp_id) 1700 { 1701 if (mp_id == NULL) { 1702 rte_errno = EINVAL; 1703 return -1; 1704 } 1705 return mlx5_mp_req_mempool_reg(mp_id, share_cache, NULL, mp, false); 1706 } 1707 1708 /** 1709 * Unregister the memory of a mempool from the protection domain. 1710 * 1711 * @param share_cache 1712 * Shared MR cache of the protection domain. 1713 * @param mp 1714 * Mempool to unregister. 1715 * @param mp_id 1716 * Multi-process identifier, may be NULL for the primary process. 1717 * 1718 * @return 1719 * 0 on success, (-1) on failure and rte_errno is set. 1720 */ 1721 int 1722 mlx5_mr_mempool_unregister(struct mlx5_mr_share_cache *share_cache, 1723 struct rte_mempool *mp, struct mlx5_mp_id *mp_id) 1724 { 1725 if (mp->flags & RTE_MEMPOOL_F_NON_IO) 1726 return 0; 1727 switch (rte_eal_process_type()) { 1728 case RTE_PROC_PRIMARY: 1729 return mlx5_mr_mempool_unregister_primary(share_cache, mp); 1730 case RTE_PROC_SECONDARY: 1731 return mlx5_mr_mempool_unregister_secondary(share_cache, mp, 1732 mp_id); 1733 default: 1734 return -1; 1735 } 1736 } 1737 1738 /** 1739 * Lookup a MR key by and address in a registered mempool. 1740 * 1741 * @param mpr 1742 * Mempool registration object. 1743 * @param addr 1744 * Address within the mempool. 1745 * @param entry 1746 * Bottom-half cache entry to fill. 1747 * 1748 * @return 1749 * MR key or UINT32_MAX on failure, which can only happen 1750 * if the address is not from within the mempool. 1751 */ 1752 static uint32_t 1753 mlx5_mempool_reg_addr2mr(struct mlx5_mempool_reg *mpr, uintptr_t addr, 1754 struct mr_cache_entry *entry) 1755 { 1756 uint32_t lkey = UINT32_MAX; 1757 unsigned int i; 1758 1759 for (i = 0; i < mpr->mrs_n; i++) { 1760 const struct mlx5_pmd_mr *mr = &mpr->mrs[i].pmd_mr; 1761 uintptr_t mr_addr = (uintptr_t)mr->addr; 1762 1763 if (mr_addr <= addr) { 1764 lkey = rte_cpu_to_be_32(mr->lkey); 1765 entry->start = mr_addr; 1766 entry->end = mr_addr + mr->len; 1767 entry->lkey = lkey; 1768 break; 1769 } 1770 } 1771 return lkey; 1772 } 1773 1774 /** 1775 * Update bottom-half cache from the list of mempool registrations. 1776 * 1777 * @param share_cache 1778 * Pointer to a global shared MR cache. 1779 * @param mr_ctrl 1780 * Per-queue MR control handle. 1781 * @param entry 1782 * Pointer to an entry in the bottom-half cache to update 1783 * with the MR lkey looked up. 1784 * @param mp 1785 * Mempool containing the address. 1786 * @param addr 1787 * Address to lookup. 1788 * @return 1789 * MR lkey on success, UINT32_MAX on failure. 1790 */ 1791 static uint32_t 1792 mlx5_lookup_mempool_regs(struct mlx5_mr_share_cache *share_cache, 1793 struct mlx5_mr_ctrl *mr_ctrl, 1794 struct mr_cache_entry *entry, 1795 struct rte_mempool *mp, uintptr_t addr) 1796 { 1797 struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 1798 struct mlx5_mempool_reg *mpr; 1799 uint32_t lkey = UINT32_MAX; 1800 1801 /* If local cache table is full, try to double it. */ 1802 if (unlikely(bt->len == bt->size)) 1803 mr_btree_expand(bt, bt->size << 1); 1804 /* Look up in mempool registrations. */ 1805 rte_rwlock_read_lock(&share_cache->rwlock); 1806 mpr = mlx5_mempool_reg_lookup(share_cache, mp); 1807 if (mpr != NULL) 1808 lkey = mlx5_mempool_reg_addr2mr(mpr, addr, entry); 1809 rte_rwlock_read_unlock(&share_cache->rwlock); 1810 /* 1811 * Update local cache. Even if it fails, return the found entry 1812 * to update top-half cache. Next time, this entry will be found 1813 * in the global cache. 1814 */ 1815 if (lkey != UINT32_MAX) 1816 mr_btree_insert(bt, entry); 1817 return lkey; 1818 } 1819 1820 /** 1821 * Bottom-half lookup for the address from the mempool. 1822 * 1823 * @param share_cache 1824 * Pointer to a global shared MR cache. 1825 * @param mr_ctrl 1826 * Per-queue MR control handle. 1827 * @param mp 1828 * Mempool containing the address. 1829 * @param addr 1830 * Address to lookup. 1831 * @return 1832 * MR lkey on success, UINT32_MAX on failure. 1833 */ 1834 uint32_t 1835 mlx5_mr_mempool2mr_bh(struct mlx5_mr_share_cache *share_cache, 1836 struct mlx5_mr_ctrl *mr_ctrl, 1837 struct rte_mempool *mp, uintptr_t addr) 1838 { 1839 struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 1840 uint32_t lkey; 1841 uint16_t bh_idx = 0; 1842 1843 /* Binary-search MR translation table. */ 1844 lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 1845 /* Update top-half cache. */ 1846 if (likely(lkey != UINT32_MAX)) { 1847 *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 1848 } else { 1849 lkey = mlx5_lookup_mempool_regs(share_cache, mr_ctrl, repl, 1850 mp, addr); 1851 /* Can only fail if the address is not from the mempool. */ 1852 if (unlikely(lkey == UINT32_MAX)) 1853 return UINT32_MAX; 1854 } 1855 /* Update the most recently used entry. */ 1856 mr_ctrl->mru = mr_ctrl->head; 1857 /* Point to the next victim, the oldest. */ 1858 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 1859 return lkey; 1860 } 1861 1862 /** 1863 * Bottom-half of LKey search on. If supported, lookup for the address from 1864 * the mempool. Otherwise, search in old mechanism caches. 1865 * 1866 * @param cdev 1867 * Pointer to mlx5 device. 1868 * @param mp_id 1869 * Multi-process identifier, may be NULL for the primary process. 1870 * @param mr_ctrl 1871 * Pointer to per-queue MR control structure. 1872 * @param mb 1873 * Pointer to mbuf. 1874 * 1875 * @return 1876 * Searched LKey on success, UINT32_MAX on no match. 1877 */ 1878 static uint32_t 1879 mlx5_mr_mb2mr_bh(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id, 1880 struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb) 1881 { 1882 uint32_t lkey; 1883 uintptr_t addr = (uintptr_t)mb->buf_addr; 1884 1885 if (cdev->config.mr_mempool_reg_en) { 1886 struct rte_mempool *mp = NULL; 1887 struct mlx5_mprq_buf *buf; 1888 1889 if (!RTE_MBUF_HAS_EXTBUF(mb)) { 1890 mp = mlx5_mb2mp(mb); 1891 } else if (mb->shinfo->free_cb == mlx5_mprq_buf_free_cb) { 1892 /* Recover MPRQ mempool. */ 1893 buf = mb->shinfo->fcb_opaque; 1894 mp = buf->mp; 1895 } 1896 if (mp != NULL) { 1897 lkey = mlx5_mr_mempool2mr_bh(&cdev->mr_scache, 1898 mr_ctrl, mp, addr); 1899 /* 1900 * Lookup can only fail on invalid input, e.g. "addr" 1901 * is not from "mp" or "mp" has MEMPOOL_F_NON_IO set. 1902 */ 1903 if (lkey != UINT32_MAX) 1904 return lkey; 1905 } 1906 /* Fallback for generic mechanism in corner cases. */ 1907 } 1908 return mlx5_mr_addr2mr_bh(cdev->pd, mp_id, &cdev->mr_scache, mr_ctrl, 1909 addr, cdev->config.mr_ext_memseg_en); 1910 } 1911 1912 /** 1913 * Query LKey from a packet buffer. 1914 * 1915 * @param cdev 1916 * Pointer to the mlx5 device structure. 1917 * @param mp_id 1918 * Multi-process identifier, may be NULL for the primary process. 1919 * @param mr_ctrl 1920 * Pointer to per-queue MR control structure. 1921 * @param mbuf 1922 * Pointer to mbuf. 1923 * 1924 * @return 1925 * Searched LKey on success, UINT32_MAX on no match. 1926 */ 1927 uint32_t 1928 mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id, 1929 struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf) 1930 { 1931 uint32_t lkey; 1932 1933 /* Check generation bit to see if there's any change on existing MRs. */ 1934 if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen)) 1935 mlx5_mr_flush_local_cache(mr_ctrl); 1936 /* Linear search on MR cache array. */ 1937 lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru, 1938 MLX5_MR_CACHE_N, (uintptr_t)mbuf->buf_addr); 1939 if (likely(lkey != UINT32_MAX)) 1940 return lkey; 1941 /* Take slower bottom-half on miss. */ 1942 return mlx5_mr_mb2mr_bh(cdev, mp_id, mr_ctrl, mbuf); 1943 } 1944