1*b8dc6b0eSVu Pham /* SPDX-License-Identifier: BSD-3-Clause 2*b8dc6b0eSVu Pham * Copyright 2016 6WIND S.A. 3*b8dc6b0eSVu Pham * Copyright 2020 Mellanox Technologies, Ltd 4*b8dc6b0eSVu Pham */ 5*b8dc6b0eSVu Pham #include <rte_eal_memconfig.h> 6*b8dc6b0eSVu Pham #include <rte_errno.h> 7*b8dc6b0eSVu Pham #include <rte_mempool.h> 8*b8dc6b0eSVu Pham #include <rte_malloc.h> 9*b8dc6b0eSVu Pham #include <rte_rwlock.h> 10*b8dc6b0eSVu Pham 11*b8dc6b0eSVu Pham #include "mlx5_glue.h" 12*b8dc6b0eSVu Pham #include "mlx5_common_mp.h" 13*b8dc6b0eSVu Pham #include "mlx5_common_mr.h" 14*b8dc6b0eSVu Pham #include "mlx5_common_utils.h" 15*b8dc6b0eSVu Pham 16*b8dc6b0eSVu Pham struct mr_find_contig_memsegs_data { 17*b8dc6b0eSVu Pham uintptr_t addr; 18*b8dc6b0eSVu Pham uintptr_t start; 19*b8dc6b0eSVu Pham uintptr_t end; 20*b8dc6b0eSVu Pham const struct rte_memseg_list *msl; 21*b8dc6b0eSVu Pham }; 22*b8dc6b0eSVu Pham 23*b8dc6b0eSVu Pham /** 24*b8dc6b0eSVu Pham * Expand B-tree table to a given size. Can't be called with holding 25*b8dc6b0eSVu Pham * memory_hotplug_lock or share_cache.rwlock due to rte_realloc(). 26*b8dc6b0eSVu Pham * 27*b8dc6b0eSVu Pham * @param bt 28*b8dc6b0eSVu Pham * Pointer to B-tree structure. 29*b8dc6b0eSVu Pham * @param n 30*b8dc6b0eSVu Pham * Number of entries for expansion. 31*b8dc6b0eSVu Pham * 32*b8dc6b0eSVu Pham * @return 33*b8dc6b0eSVu Pham * 0 on success, -1 on failure. 34*b8dc6b0eSVu Pham */ 35*b8dc6b0eSVu Pham static int 36*b8dc6b0eSVu Pham mr_btree_expand(struct mlx5_mr_btree *bt, int n) 37*b8dc6b0eSVu Pham { 38*b8dc6b0eSVu Pham void *mem; 39*b8dc6b0eSVu Pham int ret = 0; 40*b8dc6b0eSVu Pham 41*b8dc6b0eSVu Pham if (n <= bt->size) 42*b8dc6b0eSVu Pham return ret; 43*b8dc6b0eSVu Pham /* 44*b8dc6b0eSVu Pham * Downside of directly using rte_realloc() is that SOCKET_ID_ANY is 45*b8dc6b0eSVu Pham * used inside if there's no room to expand. Because this is a quite 46*b8dc6b0eSVu Pham * rare case and a part of very slow path, it is very acceptable. 47*b8dc6b0eSVu Pham * Initially cache_bh[] will be given practically enough space and once 48*b8dc6b0eSVu Pham * it is expanded, expansion wouldn't be needed again ever. 49*b8dc6b0eSVu Pham */ 50*b8dc6b0eSVu Pham mem = rte_realloc(bt->table, n * sizeof(struct mr_cache_entry), 0); 51*b8dc6b0eSVu Pham if (mem == NULL) { 52*b8dc6b0eSVu Pham /* Not an error, B-tree search will be skipped. */ 53*b8dc6b0eSVu Pham DRV_LOG(WARNING, "failed to expand MR B-tree (%p) table", 54*b8dc6b0eSVu Pham (void *)bt); 55*b8dc6b0eSVu Pham ret = -1; 56*b8dc6b0eSVu Pham } else { 57*b8dc6b0eSVu Pham DRV_LOG(DEBUG, "expanded MR B-tree table (size=%u)", n); 58*b8dc6b0eSVu Pham bt->table = mem; 59*b8dc6b0eSVu Pham bt->size = n; 60*b8dc6b0eSVu Pham } 61*b8dc6b0eSVu Pham return ret; 62*b8dc6b0eSVu Pham } 63*b8dc6b0eSVu Pham 64*b8dc6b0eSVu Pham /** 65*b8dc6b0eSVu Pham * Look up LKey from given B-tree lookup table, store the last index and return 66*b8dc6b0eSVu Pham * searched LKey. 67*b8dc6b0eSVu Pham * 68*b8dc6b0eSVu Pham * @param bt 69*b8dc6b0eSVu Pham * Pointer to B-tree structure. 70*b8dc6b0eSVu Pham * @param[out] idx 71*b8dc6b0eSVu Pham * Pointer to index. Even on search failure, returns index where it stops 72*b8dc6b0eSVu Pham * searching so that index can be used when inserting a new entry. 73*b8dc6b0eSVu Pham * @param addr 74*b8dc6b0eSVu Pham * Search key. 75*b8dc6b0eSVu Pham * 76*b8dc6b0eSVu Pham * @return 77*b8dc6b0eSVu Pham * Searched LKey on success, UINT32_MAX on no match. 78*b8dc6b0eSVu Pham */ 79*b8dc6b0eSVu Pham static uint32_t 80*b8dc6b0eSVu Pham mr_btree_lookup(struct mlx5_mr_btree *bt, uint16_t *idx, uintptr_t addr) 81*b8dc6b0eSVu Pham { 82*b8dc6b0eSVu Pham struct mr_cache_entry *lkp_tbl; 83*b8dc6b0eSVu Pham uint16_t n; 84*b8dc6b0eSVu Pham uint16_t base = 0; 85*b8dc6b0eSVu Pham 86*b8dc6b0eSVu Pham MLX5_ASSERT(bt != NULL); 87*b8dc6b0eSVu Pham lkp_tbl = *bt->table; 88*b8dc6b0eSVu Pham n = bt->len; 89*b8dc6b0eSVu Pham /* First entry must be NULL for comparison. */ 90*b8dc6b0eSVu Pham MLX5_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 && 91*b8dc6b0eSVu Pham lkp_tbl[0].lkey == UINT32_MAX)); 92*b8dc6b0eSVu Pham /* Binary search. */ 93*b8dc6b0eSVu Pham do { 94*b8dc6b0eSVu Pham register uint16_t delta = n >> 1; 95*b8dc6b0eSVu Pham 96*b8dc6b0eSVu Pham if (addr < lkp_tbl[base + delta].start) { 97*b8dc6b0eSVu Pham n = delta; 98*b8dc6b0eSVu Pham } else { 99*b8dc6b0eSVu Pham base += delta; 100*b8dc6b0eSVu Pham n -= delta; 101*b8dc6b0eSVu Pham } 102*b8dc6b0eSVu Pham } while (n > 1); 103*b8dc6b0eSVu Pham MLX5_ASSERT(addr >= lkp_tbl[base].start); 104*b8dc6b0eSVu Pham *idx = base; 105*b8dc6b0eSVu Pham if (addr < lkp_tbl[base].end) 106*b8dc6b0eSVu Pham return lkp_tbl[base].lkey; 107*b8dc6b0eSVu Pham /* Not found. */ 108*b8dc6b0eSVu Pham return UINT32_MAX; 109*b8dc6b0eSVu Pham } 110*b8dc6b0eSVu Pham 111*b8dc6b0eSVu Pham /** 112*b8dc6b0eSVu Pham * Insert an entry to B-tree lookup table. 113*b8dc6b0eSVu Pham * 114*b8dc6b0eSVu Pham * @param bt 115*b8dc6b0eSVu Pham * Pointer to B-tree structure. 116*b8dc6b0eSVu Pham * @param entry 117*b8dc6b0eSVu Pham * Pointer to new entry to insert. 118*b8dc6b0eSVu Pham * 119*b8dc6b0eSVu Pham * @return 120*b8dc6b0eSVu Pham * 0 on success, -1 on failure. 121*b8dc6b0eSVu Pham */ 122*b8dc6b0eSVu Pham static int 123*b8dc6b0eSVu Pham mr_btree_insert(struct mlx5_mr_btree *bt, struct mr_cache_entry *entry) 124*b8dc6b0eSVu Pham { 125*b8dc6b0eSVu Pham struct mr_cache_entry *lkp_tbl; 126*b8dc6b0eSVu Pham uint16_t idx = 0; 127*b8dc6b0eSVu Pham size_t shift; 128*b8dc6b0eSVu Pham 129*b8dc6b0eSVu Pham MLX5_ASSERT(bt != NULL); 130*b8dc6b0eSVu Pham MLX5_ASSERT(bt->len <= bt->size); 131*b8dc6b0eSVu Pham MLX5_ASSERT(bt->len > 0); 132*b8dc6b0eSVu Pham lkp_tbl = *bt->table; 133*b8dc6b0eSVu Pham /* Find out the slot for insertion. */ 134*b8dc6b0eSVu Pham if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) { 135*b8dc6b0eSVu Pham DRV_LOG(DEBUG, 136*b8dc6b0eSVu Pham "abort insertion to B-tree(%p): already exist at" 137*b8dc6b0eSVu Pham " idx=%u [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 138*b8dc6b0eSVu Pham (void *)bt, idx, entry->start, entry->end, entry->lkey); 139*b8dc6b0eSVu Pham /* Already exist, return. */ 140*b8dc6b0eSVu Pham return 0; 141*b8dc6b0eSVu Pham } 142*b8dc6b0eSVu Pham /* If table is full, return error. */ 143*b8dc6b0eSVu Pham if (unlikely(bt->len == bt->size)) { 144*b8dc6b0eSVu Pham bt->overflow = 1; 145*b8dc6b0eSVu Pham return -1; 146*b8dc6b0eSVu Pham } 147*b8dc6b0eSVu Pham /* Insert entry. */ 148*b8dc6b0eSVu Pham ++idx; 149*b8dc6b0eSVu Pham shift = (bt->len - idx) * sizeof(struct mr_cache_entry); 150*b8dc6b0eSVu Pham if (shift) 151*b8dc6b0eSVu Pham memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift); 152*b8dc6b0eSVu Pham lkp_tbl[idx] = *entry; 153*b8dc6b0eSVu Pham bt->len++; 154*b8dc6b0eSVu Pham DRV_LOG(DEBUG, 155*b8dc6b0eSVu Pham "inserted B-tree(%p)[%u]," 156*b8dc6b0eSVu Pham " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 157*b8dc6b0eSVu Pham (void *)bt, idx, entry->start, entry->end, entry->lkey); 158*b8dc6b0eSVu Pham return 0; 159*b8dc6b0eSVu Pham } 160*b8dc6b0eSVu Pham 161*b8dc6b0eSVu Pham /** 162*b8dc6b0eSVu Pham * Initialize B-tree and allocate memory for lookup table. 163*b8dc6b0eSVu Pham * 164*b8dc6b0eSVu Pham * @param bt 165*b8dc6b0eSVu Pham * Pointer to B-tree structure. 166*b8dc6b0eSVu Pham * @param n 167*b8dc6b0eSVu Pham * Number of entries to allocate. 168*b8dc6b0eSVu Pham * @param socket 169*b8dc6b0eSVu Pham * NUMA socket on which memory must be allocated. 170*b8dc6b0eSVu Pham * 171*b8dc6b0eSVu Pham * @return 172*b8dc6b0eSVu Pham * 0 on success, a negative errno value otherwise and rte_errno is set. 173*b8dc6b0eSVu Pham */ 174*b8dc6b0eSVu Pham int 175*b8dc6b0eSVu Pham mlx5_mr_btree_init(struct mlx5_mr_btree *bt, int n, int socket) 176*b8dc6b0eSVu Pham { 177*b8dc6b0eSVu Pham if (bt == NULL) { 178*b8dc6b0eSVu Pham rte_errno = EINVAL; 179*b8dc6b0eSVu Pham return -rte_errno; 180*b8dc6b0eSVu Pham } 181*b8dc6b0eSVu Pham MLX5_ASSERT(!bt->table && !bt->size); 182*b8dc6b0eSVu Pham memset(bt, 0, sizeof(*bt)); 183*b8dc6b0eSVu Pham bt->table = rte_calloc_socket("B-tree table", 184*b8dc6b0eSVu Pham n, sizeof(struct mr_cache_entry), 185*b8dc6b0eSVu Pham 0, socket); 186*b8dc6b0eSVu Pham if (bt->table == NULL) { 187*b8dc6b0eSVu Pham rte_errno = ENOMEM; 188*b8dc6b0eSVu Pham DEBUG("failed to allocate memory for btree cache on socket %d", 189*b8dc6b0eSVu Pham socket); 190*b8dc6b0eSVu Pham return -rte_errno; 191*b8dc6b0eSVu Pham } 192*b8dc6b0eSVu Pham bt->size = n; 193*b8dc6b0eSVu Pham /* First entry must be NULL for binary search. */ 194*b8dc6b0eSVu Pham (*bt->table)[bt->len++] = (struct mr_cache_entry) { 195*b8dc6b0eSVu Pham .lkey = UINT32_MAX, 196*b8dc6b0eSVu Pham }; 197*b8dc6b0eSVu Pham DEBUG("initialized B-tree %p with table %p", 198*b8dc6b0eSVu Pham (void *)bt, (void *)bt->table); 199*b8dc6b0eSVu Pham return 0; 200*b8dc6b0eSVu Pham } 201*b8dc6b0eSVu Pham 202*b8dc6b0eSVu Pham /** 203*b8dc6b0eSVu Pham * Free B-tree resources. 204*b8dc6b0eSVu Pham * 205*b8dc6b0eSVu Pham * @param bt 206*b8dc6b0eSVu Pham * Pointer to B-tree structure. 207*b8dc6b0eSVu Pham */ 208*b8dc6b0eSVu Pham void 209*b8dc6b0eSVu Pham mlx5_mr_btree_free(struct mlx5_mr_btree *bt) 210*b8dc6b0eSVu Pham { 211*b8dc6b0eSVu Pham if (bt == NULL) 212*b8dc6b0eSVu Pham return; 213*b8dc6b0eSVu Pham DEBUG("freeing B-tree %p with table %p", 214*b8dc6b0eSVu Pham (void *)bt, (void *)bt->table); 215*b8dc6b0eSVu Pham rte_free(bt->table); 216*b8dc6b0eSVu Pham memset(bt, 0, sizeof(*bt)); 217*b8dc6b0eSVu Pham } 218*b8dc6b0eSVu Pham 219*b8dc6b0eSVu Pham /** 220*b8dc6b0eSVu Pham * Dump all the entries in a B-tree 221*b8dc6b0eSVu Pham * 222*b8dc6b0eSVu Pham * @param bt 223*b8dc6b0eSVu Pham * Pointer to B-tree structure. 224*b8dc6b0eSVu Pham */ 225*b8dc6b0eSVu Pham void 226*b8dc6b0eSVu Pham mlx5_mr_btree_dump(struct mlx5_mr_btree *bt __rte_unused) 227*b8dc6b0eSVu Pham { 228*b8dc6b0eSVu Pham #ifdef RTE_LIBRTE_MLX5_DEBUG 229*b8dc6b0eSVu Pham int idx; 230*b8dc6b0eSVu Pham struct mr_cache_entry *lkp_tbl; 231*b8dc6b0eSVu Pham 232*b8dc6b0eSVu Pham if (bt == NULL) 233*b8dc6b0eSVu Pham return; 234*b8dc6b0eSVu Pham lkp_tbl = *bt->table; 235*b8dc6b0eSVu Pham for (idx = 0; idx < bt->len; ++idx) { 236*b8dc6b0eSVu Pham struct mr_cache_entry *entry = &lkp_tbl[idx]; 237*b8dc6b0eSVu Pham 238*b8dc6b0eSVu Pham DEBUG("B-tree(%p)[%u]," 239*b8dc6b0eSVu Pham " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 240*b8dc6b0eSVu Pham (void *)bt, idx, entry->start, entry->end, entry->lkey); 241*b8dc6b0eSVu Pham } 242*b8dc6b0eSVu Pham #endif 243*b8dc6b0eSVu Pham } 244*b8dc6b0eSVu Pham 245*b8dc6b0eSVu Pham /** 246*b8dc6b0eSVu Pham * Find virtually contiguous memory chunk in a given MR. 247*b8dc6b0eSVu Pham * 248*b8dc6b0eSVu Pham * @param dev 249*b8dc6b0eSVu Pham * Pointer to MR structure. 250*b8dc6b0eSVu Pham * @param[out] entry 251*b8dc6b0eSVu Pham * Pointer to returning MR cache entry. If not found, this will not be 252*b8dc6b0eSVu Pham * updated. 253*b8dc6b0eSVu Pham * @param start_idx 254*b8dc6b0eSVu Pham * Start index of the memseg bitmap. 255*b8dc6b0eSVu Pham * 256*b8dc6b0eSVu Pham * @return 257*b8dc6b0eSVu Pham * Next index to go on lookup. 258*b8dc6b0eSVu Pham */ 259*b8dc6b0eSVu Pham static int 260*b8dc6b0eSVu Pham mr_find_next_chunk(struct mlx5_mr *mr, struct mr_cache_entry *entry, 261*b8dc6b0eSVu Pham int base_idx) 262*b8dc6b0eSVu Pham { 263*b8dc6b0eSVu Pham uintptr_t start = 0; 264*b8dc6b0eSVu Pham uintptr_t end = 0; 265*b8dc6b0eSVu Pham uint32_t idx = 0; 266*b8dc6b0eSVu Pham 267*b8dc6b0eSVu Pham /* MR for external memory doesn't have memseg list. */ 268*b8dc6b0eSVu Pham if (mr->msl == NULL) { 269*b8dc6b0eSVu Pham struct ibv_mr *ibv_mr = mr->ibv_mr; 270*b8dc6b0eSVu Pham 271*b8dc6b0eSVu Pham MLX5_ASSERT(mr->ms_bmp_n == 1); 272*b8dc6b0eSVu Pham MLX5_ASSERT(mr->ms_n == 1); 273*b8dc6b0eSVu Pham MLX5_ASSERT(base_idx == 0); 274*b8dc6b0eSVu Pham /* 275*b8dc6b0eSVu Pham * Can't search it from memseg list but get it directly from 276*b8dc6b0eSVu Pham * verbs MR as there's only one chunk. 277*b8dc6b0eSVu Pham */ 278*b8dc6b0eSVu Pham entry->start = (uintptr_t)ibv_mr->addr; 279*b8dc6b0eSVu Pham entry->end = (uintptr_t)ibv_mr->addr + mr->ibv_mr->length; 280*b8dc6b0eSVu Pham entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey); 281*b8dc6b0eSVu Pham /* Returning 1 ends iteration. */ 282*b8dc6b0eSVu Pham return 1; 283*b8dc6b0eSVu Pham } 284*b8dc6b0eSVu Pham for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) { 285*b8dc6b0eSVu Pham if (rte_bitmap_get(mr->ms_bmp, idx)) { 286*b8dc6b0eSVu Pham const struct rte_memseg_list *msl; 287*b8dc6b0eSVu Pham const struct rte_memseg *ms; 288*b8dc6b0eSVu Pham 289*b8dc6b0eSVu Pham msl = mr->msl; 290*b8dc6b0eSVu Pham ms = rte_fbarray_get(&msl->memseg_arr, 291*b8dc6b0eSVu Pham mr->ms_base_idx + idx); 292*b8dc6b0eSVu Pham MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 293*b8dc6b0eSVu Pham if (!start) 294*b8dc6b0eSVu Pham start = ms->addr_64; 295*b8dc6b0eSVu Pham end = ms->addr_64 + ms->hugepage_sz; 296*b8dc6b0eSVu Pham } else if (start) { 297*b8dc6b0eSVu Pham /* Passed the end of a fragment. */ 298*b8dc6b0eSVu Pham break; 299*b8dc6b0eSVu Pham } 300*b8dc6b0eSVu Pham } 301*b8dc6b0eSVu Pham if (start) { 302*b8dc6b0eSVu Pham /* Found one chunk. */ 303*b8dc6b0eSVu Pham entry->start = start; 304*b8dc6b0eSVu Pham entry->end = end; 305*b8dc6b0eSVu Pham entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey); 306*b8dc6b0eSVu Pham } 307*b8dc6b0eSVu Pham return idx; 308*b8dc6b0eSVu Pham } 309*b8dc6b0eSVu Pham 310*b8dc6b0eSVu Pham /** 311*b8dc6b0eSVu Pham * Insert a MR to the global B-tree cache. It may fail due to low-on-memory. 312*b8dc6b0eSVu Pham * Then, this entry will have to be searched by mr_lookup_list() in 313*b8dc6b0eSVu Pham * mlx5_mr_create() on miss. 314*b8dc6b0eSVu Pham * 315*b8dc6b0eSVu Pham * @param share_cache 316*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 317*b8dc6b0eSVu Pham * @param mr 318*b8dc6b0eSVu Pham * Pointer to MR to insert. 319*b8dc6b0eSVu Pham * 320*b8dc6b0eSVu Pham * @return 321*b8dc6b0eSVu Pham * 0 on success, -1 on failure. 322*b8dc6b0eSVu Pham */ 323*b8dc6b0eSVu Pham int 324*b8dc6b0eSVu Pham mlx5_mr_insert_cache(struct mlx5_mr_share_cache *share_cache, 325*b8dc6b0eSVu Pham struct mlx5_mr *mr) 326*b8dc6b0eSVu Pham { 327*b8dc6b0eSVu Pham unsigned int n; 328*b8dc6b0eSVu Pham 329*b8dc6b0eSVu Pham DRV_LOG(DEBUG, "Inserting MR(%p) to global cache(%p)", 330*b8dc6b0eSVu Pham (void *)mr, (void *)share_cache); 331*b8dc6b0eSVu Pham for (n = 0; n < mr->ms_bmp_n; ) { 332*b8dc6b0eSVu Pham struct mr_cache_entry entry; 333*b8dc6b0eSVu Pham 334*b8dc6b0eSVu Pham memset(&entry, 0, sizeof(entry)); 335*b8dc6b0eSVu Pham /* Find a contiguous chunk and advance the index. */ 336*b8dc6b0eSVu Pham n = mr_find_next_chunk(mr, &entry, n); 337*b8dc6b0eSVu Pham if (!entry.end) 338*b8dc6b0eSVu Pham break; 339*b8dc6b0eSVu Pham if (mr_btree_insert(&share_cache->cache, &entry) < 0) { 340*b8dc6b0eSVu Pham /* 341*b8dc6b0eSVu Pham * Overflowed, but the global table cannot be expanded 342*b8dc6b0eSVu Pham * because of deadlock. 343*b8dc6b0eSVu Pham */ 344*b8dc6b0eSVu Pham return -1; 345*b8dc6b0eSVu Pham } 346*b8dc6b0eSVu Pham } 347*b8dc6b0eSVu Pham return 0; 348*b8dc6b0eSVu Pham } 349*b8dc6b0eSVu Pham 350*b8dc6b0eSVu Pham /** 351*b8dc6b0eSVu Pham * Look up address in the original global MR list. 352*b8dc6b0eSVu Pham * 353*b8dc6b0eSVu Pham * @param share_cache 354*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 355*b8dc6b0eSVu Pham * @param[out] entry 356*b8dc6b0eSVu Pham * Pointer to returning MR cache entry. If no match, this will not be updated. 357*b8dc6b0eSVu Pham * @param addr 358*b8dc6b0eSVu Pham * Search key. 359*b8dc6b0eSVu Pham * 360*b8dc6b0eSVu Pham * @return 361*b8dc6b0eSVu Pham * Found MR on match, NULL otherwise. 362*b8dc6b0eSVu Pham */ 363*b8dc6b0eSVu Pham struct mlx5_mr * 364*b8dc6b0eSVu Pham mlx5_mr_lookup_list(struct mlx5_mr_share_cache *share_cache, 365*b8dc6b0eSVu Pham struct mr_cache_entry *entry, uintptr_t addr) 366*b8dc6b0eSVu Pham { 367*b8dc6b0eSVu Pham struct mlx5_mr *mr; 368*b8dc6b0eSVu Pham 369*b8dc6b0eSVu Pham /* Iterate all the existing MRs. */ 370*b8dc6b0eSVu Pham LIST_FOREACH(mr, &share_cache->mr_list, mr) { 371*b8dc6b0eSVu Pham unsigned int n; 372*b8dc6b0eSVu Pham 373*b8dc6b0eSVu Pham if (mr->ms_n == 0) 374*b8dc6b0eSVu Pham continue; 375*b8dc6b0eSVu Pham for (n = 0; n < mr->ms_bmp_n; ) { 376*b8dc6b0eSVu Pham struct mr_cache_entry ret; 377*b8dc6b0eSVu Pham 378*b8dc6b0eSVu Pham memset(&ret, 0, sizeof(ret)); 379*b8dc6b0eSVu Pham n = mr_find_next_chunk(mr, &ret, n); 380*b8dc6b0eSVu Pham if (addr >= ret.start && addr < ret.end) { 381*b8dc6b0eSVu Pham /* Found. */ 382*b8dc6b0eSVu Pham *entry = ret; 383*b8dc6b0eSVu Pham return mr; 384*b8dc6b0eSVu Pham } 385*b8dc6b0eSVu Pham } 386*b8dc6b0eSVu Pham } 387*b8dc6b0eSVu Pham return NULL; 388*b8dc6b0eSVu Pham } 389*b8dc6b0eSVu Pham 390*b8dc6b0eSVu Pham /** 391*b8dc6b0eSVu Pham * Look up address on global MR cache. 392*b8dc6b0eSVu Pham * 393*b8dc6b0eSVu Pham * @param share_cache 394*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 395*b8dc6b0eSVu Pham * @param[out] entry 396*b8dc6b0eSVu Pham * Pointer to returning MR cache entry. If no match, this will not be updated. 397*b8dc6b0eSVu Pham * @param addr 398*b8dc6b0eSVu Pham * Search key. 399*b8dc6b0eSVu Pham * 400*b8dc6b0eSVu Pham * @return 401*b8dc6b0eSVu Pham * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 402*b8dc6b0eSVu Pham */ 403*b8dc6b0eSVu Pham uint32_t 404*b8dc6b0eSVu Pham mlx5_mr_lookup_cache(struct mlx5_mr_share_cache *share_cache, 405*b8dc6b0eSVu Pham struct mr_cache_entry *entry, uintptr_t addr) 406*b8dc6b0eSVu Pham { 407*b8dc6b0eSVu Pham uint16_t idx; 408*b8dc6b0eSVu Pham uint32_t lkey = UINT32_MAX; 409*b8dc6b0eSVu Pham struct mlx5_mr *mr; 410*b8dc6b0eSVu Pham 411*b8dc6b0eSVu Pham /* 412*b8dc6b0eSVu Pham * If the global cache has overflowed since it failed to expand the 413*b8dc6b0eSVu Pham * B-tree table, it can't have all the existing MRs. Then, the address 414*b8dc6b0eSVu Pham * has to be searched by traversing the original MR list instead, which 415*b8dc6b0eSVu Pham * is very slow path. Otherwise, the global cache is all inclusive. 416*b8dc6b0eSVu Pham */ 417*b8dc6b0eSVu Pham if (!unlikely(share_cache->cache.overflow)) { 418*b8dc6b0eSVu Pham lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 419*b8dc6b0eSVu Pham if (lkey != UINT32_MAX) 420*b8dc6b0eSVu Pham *entry = (*share_cache->cache.table)[idx]; 421*b8dc6b0eSVu Pham } else { 422*b8dc6b0eSVu Pham /* Falling back to the slowest path. */ 423*b8dc6b0eSVu Pham mr = mlx5_mr_lookup_list(share_cache, entry, addr); 424*b8dc6b0eSVu Pham if (mr != NULL) 425*b8dc6b0eSVu Pham lkey = entry->lkey; 426*b8dc6b0eSVu Pham } 427*b8dc6b0eSVu Pham MLX5_ASSERT(lkey == UINT32_MAX || (addr >= entry->start && 428*b8dc6b0eSVu Pham addr < entry->end)); 429*b8dc6b0eSVu Pham return lkey; 430*b8dc6b0eSVu Pham } 431*b8dc6b0eSVu Pham 432*b8dc6b0eSVu Pham /** 433*b8dc6b0eSVu Pham * Free MR resources. MR lock must not be held to avoid a deadlock. rte_free() 434*b8dc6b0eSVu Pham * can raise memory free event and the callback function will spin on the lock. 435*b8dc6b0eSVu Pham * 436*b8dc6b0eSVu Pham * @param mr 437*b8dc6b0eSVu Pham * Pointer to MR to free. 438*b8dc6b0eSVu Pham */ 439*b8dc6b0eSVu Pham static void 440*b8dc6b0eSVu Pham mr_free(struct mlx5_mr *mr) 441*b8dc6b0eSVu Pham { 442*b8dc6b0eSVu Pham if (mr == NULL) 443*b8dc6b0eSVu Pham return; 444*b8dc6b0eSVu Pham DRV_LOG(DEBUG, "freeing MR(%p):", (void *)mr); 445*b8dc6b0eSVu Pham if (mr->ibv_mr != NULL) 446*b8dc6b0eSVu Pham claim_zero(mlx5_glue->dereg_mr(mr->ibv_mr)); 447*b8dc6b0eSVu Pham if (mr->ms_bmp != NULL) 448*b8dc6b0eSVu Pham rte_bitmap_free(mr->ms_bmp); 449*b8dc6b0eSVu Pham rte_free(mr); 450*b8dc6b0eSVu Pham } 451*b8dc6b0eSVu Pham 452*b8dc6b0eSVu Pham void 453*b8dc6b0eSVu Pham mlx5_mr_rebuild_cache(struct mlx5_mr_share_cache *share_cache) 454*b8dc6b0eSVu Pham { 455*b8dc6b0eSVu Pham struct mlx5_mr *mr; 456*b8dc6b0eSVu Pham 457*b8dc6b0eSVu Pham DRV_LOG(DEBUG, "Rebuild dev cache[] %p", (void *)share_cache); 458*b8dc6b0eSVu Pham /* Flush cache to rebuild. */ 459*b8dc6b0eSVu Pham share_cache->cache.len = 1; 460*b8dc6b0eSVu Pham share_cache->cache.overflow = 0; 461*b8dc6b0eSVu Pham /* Iterate all the existing MRs. */ 462*b8dc6b0eSVu Pham LIST_FOREACH(mr, &share_cache->mr_list, mr) 463*b8dc6b0eSVu Pham if (mlx5_mr_insert_cache(share_cache, mr) < 0) 464*b8dc6b0eSVu Pham return; 465*b8dc6b0eSVu Pham } 466*b8dc6b0eSVu Pham 467*b8dc6b0eSVu Pham /** 468*b8dc6b0eSVu Pham * Release resources of detached MR having no online entry. 469*b8dc6b0eSVu Pham * 470*b8dc6b0eSVu Pham * @param share_cache 471*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 472*b8dc6b0eSVu Pham */ 473*b8dc6b0eSVu Pham static void 474*b8dc6b0eSVu Pham mlx5_mr_garbage_collect(struct mlx5_mr_share_cache *share_cache) 475*b8dc6b0eSVu Pham { 476*b8dc6b0eSVu Pham struct mlx5_mr *mr_next; 477*b8dc6b0eSVu Pham struct mlx5_mr_list free_list = LIST_HEAD_INITIALIZER(free_list); 478*b8dc6b0eSVu Pham 479*b8dc6b0eSVu Pham /* Must be called from the primary process. */ 480*b8dc6b0eSVu Pham MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 481*b8dc6b0eSVu Pham /* 482*b8dc6b0eSVu Pham * MR can't be freed with holding the lock because rte_free() could call 483*b8dc6b0eSVu Pham * memory free callback function. This will be a deadlock situation. 484*b8dc6b0eSVu Pham */ 485*b8dc6b0eSVu Pham rte_rwlock_write_lock(&share_cache->rwlock); 486*b8dc6b0eSVu Pham /* Detach the whole free list and release it after unlocking. */ 487*b8dc6b0eSVu Pham free_list = share_cache->mr_free_list; 488*b8dc6b0eSVu Pham LIST_INIT(&share_cache->mr_free_list); 489*b8dc6b0eSVu Pham rte_rwlock_write_unlock(&share_cache->rwlock); 490*b8dc6b0eSVu Pham /* Release resources. */ 491*b8dc6b0eSVu Pham mr_next = LIST_FIRST(&free_list); 492*b8dc6b0eSVu Pham while (mr_next != NULL) { 493*b8dc6b0eSVu Pham struct mlx5_mr *mr = mr_next; 494*b8dc6b0eSVu Pham 495*b8dc6b0eSVu Pham mr_next = LIST_NEXT(mr, mr); 496*b8dc6b0eSVu Pham mr_free(mr); 497*b8dc6b0eSVu Pham } 498*b8dc6b0eSVu Pham } 499*b8dc6b0eSVu Pham 500*b8dc6b0eSVu Pham /* Called during rte_memseg_contig_walk() by mlx5_mr_create(). */ 501*b8dc6b0eSVu Pham static int 502*b8dc6b0eSVu Pham mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, 503*b8dc6b0eSVu Pham const struct rte_memseg *ms, size_t len, void *arg) 504*b8dc6b0eSVu Pham { 505*b8dc6b0eSVu Pham struct mr_find_contig_memsegs_data *data = arg; 506*b8dc6b0eSVu Pham 507*b8dc6b0eSVu Pham if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len) 508*b8dc6b0eSVu Pham return 0; 509*b8dc6b0eSVu Pham /* Found, save it and stop walking. */ 510*b8dc6b0eSVu Pham data->start = ms->addr_64; 511*b8dc6b0eSVu Pham data->end = ms->addr_64 + len; 512*b8dc6b0eSVu Pham data->msl = msl; 513*b8dc6b0eSVu Pham return 1; 514*b8dc6b0eSVu Pham } 515*b8dc6b0eSVu Pham 516*b8dc6b0eSVu Pham /** 517*b8dc6b0eSVu Pham * Create a new global Memory Region (MR) for a missing virtual address. 518*b8dc6b0eSVu Pham * This API should be called on a secondary process, then a request is sent to 519*b8dc6b0eSVu Pham * the primary process in order to create a MR for the address. As the global MR 520*b8dc6b0eSVu Pham * list is on the shared memory, following LKey lookup should succeed unless the 521*b8dc6b0eSVu Pham * request fails. 522*b8dc6b0eSVu Pham * 523*b8dc6b0eSVu Pham * @param pd 524*b8dc6b0eSVu Pham * Pointer to ibv_pd of a device (net, regex, vdpa,...). 525*b8dc6b0eSVu Pham * @param share_cache 526*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 527*b8dc6b0eSVu Pham * @param[out] entry 528*b8dc6b0eSVu Pham * Pointer to returning MR cache entry, found in the global cache or newly 529*b8dc6b0eSVu Pham * created. If failed to create one, this will not be updated. 530*b8dc6b0eSVu Pham * @param addr 531*b8dc6b0eSVu Pham * Target virtual address to register. 532*b8dc6b0eSVu Pham * @param mr_ext_memseg_en 533*b8dc6b0eSVu Pham * Configurable flag about external memory segment enable or not. 534*b8dc6b0eSVu Pham * 535*b8dc6b0eSVu Pham * @return 536*b8dc6b0eSVu Pham * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 537*b8dc6b0eSVu Pham */ 538*b8dc6b0eSVu Pham static uint32_t 539*b8dc6b0eSVu Pham mlx5_mr_create_secondary(struct ibv_pd *pd __rte_unused, 540*b8dc6b0eSVu Pham struct mlx5_mp_id *mp_id, 541*b8dc6b0eSVu Pham struct mlx5_mr_share_cache *share_cache, 542*b8dc6b0eSVu Pham struct mr_cache_entry *entry, uintptr_t addr, 543*b8dc6b0eSVu Pham unsigned int mr_ext_memseg_en __rte_unused) 544*b8dc6b0eSVu Pham { 545*b8dc6b0eSVu Pham int ret; 546*b8dc6b0eSVu Pham 547*b8dc6b0eSVu Pham DEBUG("port %u requesting MR creation for address (%p)", 548*b8dc6b0eSVu Pham mp_id->port_id, (void *)addr); 549*b8dc6b0eSVu Pham ret = mlx5_mp_req_mr_create(mp_id, addr); 550*b8dc6b0eSVu Pham if (ret) { 551*b8dc6b0eSVu Pham DEBUG("Fail to request MR creation for address (%p)", 552*b8dc6b0eSVu Pham (void *)addr); 553*b8dc6b0eSVu Pham return UINT32_MAX; 554*b8dc6b0eSVu Pham } 555*b8dc6b0eSVu Pham rte_rwlock_read_lock(&share_cache->rwlock); 556*b8dc6b0eSVu Pham /* Fill in output data. */ 557*b8dc6b0eSVu Pham mlx5_mr_lookup_cache(share_cache, entry, addr); 558*b8dc6b0eSVu Pham /* Lookup can't fail. */ 559*b8dc6b0eSVu Pham MLX5_ASSERT(entry->lkey != UINT32_MAX); 560*b8dc6b0eSVu Pham rte_rwlock_read_unlock(&share_cache->rwlock); 561*b8dc6b0eSVu Pham DEBUG("MR CREATED by primary process for %p:\n" 562*b8dc6b0eSVu Pham " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", 563*b8dc6b0eSVu Pham (void *)addr, entry->start, entry->end, entry->lkey); 564*b8dc6b0eSVu Pham return entry->lkey; 565*b8dc6b0eSVu Pham } 566*b8dc6b0eSVu Pham 567*b8dc6b0eSVu Pham /** 568*b8dc6b0eSVu Pham * Create a new global Memory Region (MR) for a missing virtual address. 569*b8dc6b0eSVu Pham * Register entire virtually contiguous memory chunk around the address. 570*b8dc6b0eSVu Pham * 571*b8dc6b0eSVu Pham * @param pd 572*b8dc6b0eSVu Pham * Pointer to ibv_pd of a device (net, regex, vdpa,...). 573*b8dc6b0eSVu Pham * @param share_cache 574*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 575*b8dc6b0eSVu Pham * @param[out] entry 576*b8dc6b0eSVu Pham * Pointer to returning MR cache entry, found in the global cache or newly 577*b8dc6b0eSVu Pham * created. If failed to create one, this will not be updated. 578*b8dc6b0eSVu Pham * @param addr 579*b8dc6b0eSVu Pham * Target virtual address to register. 580*b8dc6b0eSVu Pham * @param mr_ext_memseg_en 581*b8dc6b0eSVu Pham * Configurable flag about external memory segment enable or not. 582*b8dc6b0eSVu Pham * 583*b8dc6b0eSVu Pham * @return 584*b8dc6b0eSVu Pham * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 585*b8dc6b0eSVu Pham */ 586*b8dc6b0eSVu Pham uint32_t 587*b8dc6b0eSVu Pham mlx5_mr_create_primary(struct ibv_pd *pd, 588*b8dc6b0eSVu Pham struct mlx5_mr_share_cache *share_cache, 589*b8dc6b0eSVu Pham struct mr_cache_entry *entry, uintptr_t addr, 590*b8dc6b0eSVu Pham unsigned int mr_ext_memseg_en) 591*b8dc6b0eSVu Pham { 592*b8dc6b0eSVu Pham struct mr_find_contig_memsegs_data data = {.addr = addr, }; 593*b8dc6b0eSVu Pham struct mr_find_contig_memsegs_data data_re; 594*b8dc6b0eSVu Pham const struct rte_memseg_list *msl; 595*b8dc6b0eSVu Pham const struct rte_memseg *ms; 596*b8dc6b0eSVu Pham struct mlx5_mr *mr = NULL; 597*b8dc6b0eSVu Pham int ms_idx_shift = -1; 598*b8dc6b0eSVu Pham uint32_t bmp_size; 599*b8dc6b0eSVu Pham void *bmp_mem; 600*b8dc6b0eSVu Pham uint32_t ms_n; 601*b8dc6b0eSVu Pham uint32_t n; 602*b8dc6b0eSVu Pham size_t len; 603*b8dc6b0eSVu Pham 604*b8dc6b0eSVu Pham DRV_LOG(DEBUG, "Creating a MR using address (%p)", (void *)addr); 605*b8dc6b0eSVu Pham /* 606*b8dc6b0eSVu Pham * Release detached MRs if any. This can't be called with holding either 607*b8dc6b0eSVu Pham * memory_hotplug_lock or share_cache->rwlock. MRs on the free list have 608*b8dc6b0eSVu Pham * been detached by the memory free event but it couldn't be released 609*b8dc6b0eSVu Pham * inside the callback due to deadlock. As a result, releasing resources 610*b8dc6b0eSVu Pham * is quite opportunistic. 611*b8dc6b0eSVu Pham */ 612*b8dc6b0eSVu Pham mlx5_mr_garbage_collect(share_cache); 613*b8dc6b0eSVu Pham /* 614*b8dc6b0eSVu Pham * If enabled, find out a contiguous virtual address chunk in use, to 615*b8dc6b0eSVu Pham * which the given address belongs, in order to register maximum range. 616*b8dc6b0eSVu Pham * In the best case where mempools are not dynamically recreated and 617*b8dc6b0eSVu Pham * '--socket-mem' is specified as an EAL option, it is very likely to 618*b8dc6b0eSVu Pham * have only one MR(LKey) per a socket and per a hugepage-size even 619*b8dc6b0eSVu Pham * though the system memory is highly fragmented. As the whole memory 620*b8dc6b0eSVu Pham * chunk will be pinned by kernel, it can't be reused unless entire 621*b8dc6b0eSVu Pham * chunk is freed from EAL. 622*b8dc6b0eSVu Pham * 623*b8dc6b0eSVu Pham * If disabled, just register one memseg (page). Then, memory 624*b8dc6b0eSVu Pham * consumption will be minimized but it may drop performance if there 625*b8dc6b0eSVu Pham * are many MRs to lookup on the datapath. 626*b8dc6b0eSVu Pham */ 627*b8dc6b0eSVu Pham if (!mr_ext_memseg_en) { 628*b8dc6b0eSVu Pham data.msl = rte_mem_virt2memseg_list((void *)addr); 629*b8dc6b0eSVu Pham data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); 630*b8dc6b0eSVu Pham data.end = data.start + data.msl->page_sz; 631*b8dc6b0eSVu Pham } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { 632*b8dc6b0eSVu Pham DRV_LOG(WARNING, 633*b8dc6b0eSVu Pham "Unable to find virtually contiguous" 634*b8dc6b0eSVu Pham " chunk for address (%p)." 635*b8dc6b0eSVu Pham " rte_memseg_contig_walk() failed.", (void *)addr); 636*b8dc6b0eSVu Pham rte_errno = ENXIO; 637*b8dc6b0eSVu Pham goto err_nolock; 638*b8dc6b0eSVu Pham } 639*b8dc6b0eSVu Pham alloc_resources: 640*b8dc6b0eSVu Pham /* Addresses must be page-aligned. */ 641*b8dc6b0eSVu Pham MLX5_ASSERT(data.msl); 642*b8dc6b0eSVu Pham MLX5_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz)); 643*b8dc6b0eSVu Pham MLX5_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz)); 644*b8dc6b0eSVu Pham msl = data.msl; 645*b8dc6b0eSVu Pham ms = rte_mem_virt2memseg((void *)data.start, msl); 646*b8dc6b0eSVu Pham len = data.end - data.start; 647*b8dc6b0eSVu Pham MLX5_ASSERT(ms); 648*b8dc6b0eSVu Pham MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); 649*b8dc6b0eSVu Pham /* Number of memsegs in the range. */ 650*b8dc6b0eSVu Pham ms_n = len / msl->page_sz; 651*b8dc6b0eSVu Pham DEBUG("Extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 652*b8dc6b0eSVu Pham " page_sz=0x%" PRIx64 ", ms_n=%u", 653*b8dc6b0eSVu Pham (void *)addr, data.start, data.end, msl->page_sz, ms_n); 654*b8dc6b0eSVu Pham /* Size of memory for bitmap. */ 655*b8dc6b0eSVu Pham bmp_size = rte_bitmap_get_memory_footprint(ms_n); 656*b8dc6b0eSVu Pham mr = rte_zmalloc_socket(NULL, 657*b8dc6b0eSVu Pham RTE_ALIGN_CEIL(sizeof(*mr), 658*b8dc6b0eSVu Pham RTE_CACHE_LINE_SIZE) + 659*b8dc6b0eSVu Pham bmp_size, 660*b8dc6b0eSVu Pham RTE_CACHE_LINE_SIZE, msl->socket_id); 661*b8dc6b0eSVu Pham if (mr == NULL) { 662*b8dc6b0eSVu Pham DEBUG("Unable to allocate memory for a new MR of" 663*b8dc6b0eSVu Pham " address (%p).", (void *)addr); 664*b8dc6b0eSVu Pham rte_errno = ENOMEM; 665*b8dc6b0eSVu Pham goto err_nolock; 666*b8dc6b0eSVu Pham } 667*b8dc6b0eSVu Pham mr->msl = msl; 668*b8dc6b0eSVu Pham /* 669*b8dc6b0eSVu Pham * Save the index of the first memseg and initialize memseg bitmap. To 670*b8dc6b0eSVu Pham * see if a memseg of ms_idx in the memseg-list is still valid, check: 671*b8dc6b0eSVu Pham * rte_bitmap_get(mr->bmp, ms_idx - mr->ms_base_idx) 672*b8dc6b0eSVu Pham */ 673*b8dc6b0eSVu Pham mr->ms_base_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 674*b8dc6b0eSVu Pham bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE); 675*b8dc6b0eSVu Pham mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size); 676*b8dc6b0eSVu Pham if (mr->ms_bmp == NULL) { 677*b8dc6b0eSVu Pham DEBUG("Unable to initialize bitmap for a new MR of" 678*b8dc6b0eSVu Pham " address (%p).", (void *)addr); 679*b8dc6b0eSVu Pham rte_errno = EINVAL; 680*b8dc6b0eSVu Pham goto err_nolock; 681*b8dc6b0eSVu Pham } 682*b8dc6b0eSVu Pham /* 683*b8dc6b0eSVu Pham * Should recheck whether the extended contiguous chunk is still valid. 684*b8dc6b0eSVu Pham * Because memory_hotplug_lock can't be held if there's any memory 685*b8dc6b0eSVu Pham * related calls in a critical path, resource allocation above can't be 686*b8dc6b0eSVu Pham * locked. If the memory has been changed at this point, try again with 687*b8dc6b0eSVu Pham * just single page. If not, go on with the big chunk atomically from 688*b8dc6b0eSVu Pham * here. 689*b8dc6b0eSVu Pham */ 690*b8dc6b0eSVu Pham rte_mcfg_mem_read_lock(); 691*b8dc6b0eSVu Pham data_re = data; 692*b8dc6b0eSVu Pham if (len > msl->page_sz && 693*b8dc6b0eSVu Pham !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) { 694*b8dc6b0eSVu Pham DEBUG("Unable to find virtually contiguous" 695*b8dc6b0eSVu Pham " chunk for address (%p)." 696*b8dc6b0eSVu Pham " rte_memseg_contig_walk() failed.", (void *)addr); 697*b8dc6b0eSVu Pham rte_errno = ENXIO; 698*b8dc6b0eSVu Pham goto err_memlock; 699*b8dc6b0eSVu Pham } 700*b8dc6b0eSVu Pham if (data.start != data_re.start || data.end != data_re.end) { 701*b8dc6b0eSVu Pham /* 702*b8dc6b0eSVu Pham * The extended contiguous chunk has been changed. Try again 703*b8dc6b0eSVu Pham * with single memseg instead. 704*b8dc6b0eSVu Pham */ 705*b8dc6b0eSVu Pham data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz); 706*b8dc6b0eSVu Pham data.end = data.start + msl->page_sz; 707*b8dc6b0eSVu Pham rte_mcfg_mem_read_unlock(); 708*b8dc6b0eSVu Pham mr_free(mr); 709*b8dc6b0eSVu Pham goto alloc_resources; 710*b8dc6b0eSVu Pham } 711*b8dc6b0eSVu Pham MLX5_ASSERT(data.msl == data_re.msl); 712*b8dc6b0eSVu Pham rte_rwlock_write_lock(&share_cache->rwlock); 713*b8dc6b0eSVu Pham /* 714*b8dc6b0eSVu Pham * Check the address is really missing. If other thread already created 715*b8dc6b0eSVu Pham * one or it is not found due to overflow, abort and return. 716*b8dc6b0eSVu Pham */ 717*b8dc6b0eSVu Pham if (mlx5_mr_lookup_cache(share_cache, entry, addr) != UINT32_MAX) { 718*b8dc6b0eSVu Pham /* 719*b8dc6b0eSVu Pham * Insert to the global cache table. It may fail due to 720*b8dc6b0eSVu Pham * low-on-memory. Then, this entry will have to be searched 721*b8dc6b0eSVu Pham * here again. 722*b8dc6b0eSVu Pham */ 723*b8dc6b0eSVu Pham mr_btree_insert(&share_cache->cache, entry); 724*b8dc6b0eSVu Pham DEBUG("Found MR for %p on final lookup, abort", (void *)addr); 725*b8dc6b0eSVu Pham rte_rwlock_write_unlock(&share_cache->rwlock); 726*b8dc6b0eSVu Pham rte_mcfg_mem_read_unlock(); 727*b8dc6b0eSVu Pham /* 728*b8dc6b0eSVu Pham * Must be unlocked before calling rte_free() because 729*b8dc6b0eSVu Pham * mlx5_mr_mem_event_free_cb() can be called inside. 730*b8dc6b0eSVu Pham */ 731*b8dc6b0eSVu Pham mr_free(mr); 732*b8dc6b0eSVu Pham return entry->lkey; 733*b8dc6b0eSVu Pham } 734*b8dc6b0eSVu Pham /* 735*b8dc6b0eSVu Pham * Trim start and end addresses for verbs MR. Set bits for registering 736*b8dc6b0eSVu Pham * memsegs but exclude already registered ones. Bitmap can be 737*b8dc6b0eSVu Pham * fragmented. 738*b8dc6b0eSVu Pham */ 739*b8dc6b0eSVu Pham for (n = 0; n < ms_n; ++n) { 740*b8dc6b0eSVu Pham uintptr_t start; 741*b8dc6b0eSVu Pham struct mr_cache_entry ret; 742*b8dc6b0eSVu Pham 743*b8dc6b0eSVu Pham memset(&ret, 0, sizeof(ret)); 744*b8dc6b0eSVu Pham start = data_re.start + n * msl->page_sz; 745*b8dc6b0eSVu Pham /* Exclude memsegs already registered by other MRs. */ 746*b8dc6b0eSVu Pham if (mlx5_mr_lookup_cache(share_cache, &ret, start) == 747*b8dc6b0eSVu Pham UINT32_MAX) { 748*b8dc6b0eSVu Pham /* 749*b8dc6b0eSVu Pham * Start from the first unregistered memseg in the 750*b8dc6b0eSVu Pham * extended range. 751*b8dc6b0eSVu Pham */ 752*b8dc6b0eSVu Pham if (ms_idx_shift == -1) { 753*b8dc6b0eSVu Pham mr->ms_base_idx += n; 754*b8dc6b0eSVu Pham data.start = start; 755*b8dc6b0eSVu Pham ms_idx_shift = n; 756*b8dc6b0eSVu Pham } 757*b8dc6b0eSVu Pham data.end = start + msl->page_sz; 758*b8dc6b0eSVu Pham rte_bitmap_set(mr->ms_bmp, n - ms_idx_shift); 759*b8dc6b0eSVu Pham ++mr->ms_n; 760*b8dc6b0eSVu Pham } 761*b8dc6b0eSVu Pham } 762*b8dc6b0eSVu Pham len = data.end - data.start; 763*b8dc6b0eSVu Pham mr->ms_bmp_n = len / msl->page_sz; 764*b8dc6b0eSVu Pham MLX5_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n); 765*b8dc6b0eSVu Pham /* 766*b8dc6b0eSVu Pham * Finally create a verbs MR for the memory chunk. ibv_reg_mr() can be 767*b8dc6b0eSVu Pham * called with holding the memory lock because it doesn't use 768*b8dc6b0eSVu Pham * mlx5_alloc_buf_extern() which eventually calls rte_malloc_socket() 769*b8dc6b0eSVu Pham * through mlx5_alloc_verbs_buf(). 770*b8dc6b0eSVu Pham */ 771*b8dc6b0eSVu Pham mr->ibv_mr = mlx5_glue->reg_mr(pd, (void *)data.start, len, 772*b8dc6b0eSVu Pham IBV_ACCESS_LOCAL_WRITE | 773*b8dc6b0eSVu Pham IBV_ACCESS_RELAXED_ORDERING); 774*b8dc6b0eSVu Pham if (mr->ibv_mr == NULL) { 775*b8dc6b0eSVu Pham DEBUG("Fail to create a verbs MR for address (%p)", 776*b8dc6b0eSVu Pham (void *)addr); 777*b8dc6b0eSVu Pham rte_errno = EINVAL; 778*b8dc6b0eSVu Pham goto err_mrlock; 779*b8dc6b0eSVu Pham } 780*b8dc6b0eSVu Pham MLX5_ASSERT((uintptr_t)mr->ibv_mr->addr == data.start); 781*b8dc6b0eSVu Pham MLX5_ASSERT(mr->ibv_mr->length == len); 782*b8dc6b0eSVu Pham LIST_INSERT_HEAD(&share_cache->mr_list, mr, mr); 783*b8dc6b0eSVu Pham DEBUG("MR CREATED (%p) for %p:\n" 784*b8dc6b0eSVu Pham " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 785*b8dc6b0eSVu Pham " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 786*b8dc6b0eSVu Pham (void *)mr, (void *)addr, data.start, data.end, 787*b8dc6b0eSVu Pham rte_cpu_to_be_32(mr->ibv_mr->lkey), 788*b8dc6b0eSVu Pham mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 789*b8dc6b0eSVu Pham /* Insert to the global cache table. */ 790*b8dc6b0eSVu Pham mlx5_mr_insert_cache(share_cache, mr); 791*b8dc6b0eSVu Pham /* Fill in output data. */ 792*b8dc6b0eSVu Pham mlx5_mr_lookup_cache(share_cache, entry, addr); 793*b8dc6b0eSVu Pham /* Lookup can't fail. */ 794*b8dc6b0eSVu Pham MLX5_ASSERT(entry->lkey != UINT32_MAX); 795*b8dc6b0eSVu Pham rte_rwlock_write_unlock(&share_cache->rwlock); 796*b8dc6b0eSVu Pham rte_mcfg_mem_read_unlock(); 797*b8dc6b0eSVu Pham return entry->lkey; 798*b8dc6b0eSVu Pham err_mrlock: 799*b8dc6b0eSVu Pham rte_rwlock_write_unlock(&share_cache->rwlock); 800*b8dc6b0eSVu Pham err_memlock: 801*b8dc6b0eSVu Pham rte_mcfg_mem_read_unlock(); 802*b8dc6b0eSVu Pham err_nolock: 803*b8dc6b0eSVu Pham /* 804*b8dc6b0eSVu Pham * In case of error, as this can be called in a datapath, a warning 805*b8dc6b0eSVu Pham * message per an error is preferable instead. Must be unlocked before 806*b8dc6b0eSVu Pham * calling rte_free() because mlx5_mr_mem_event_free_cb() can be called 807*b8dc6b0eSVu Pham * inside. 808*b8dc6b0eSVu Pham */ 809*b8dc6b0eSVu Pham mr_free(mr); 810*b8dc6b0eSVu Pham return UINT32_MAX; 811*b8dc6b0eSVu Pham } 812*b8dc6b0eSVu Pham 813*b8dc6b0eSVu Pham /** 814*b8dc6b0eSVu Pham * Create a new global Memory Region (MR) for a missing virtual address. 815*b8dc6b0eSVu Pham * This can be called from primary and secondary process. 816*b8dc6b0eSVu Pham * 817*b8dc6b0eSVu Pham * @param pd 818*b8dc6b0eSVu Pham * Pointer to ibv_pd of a device (net, regex, vdpa,...). 819*b8dc6b0eSVu Pham * @param share_cache 820*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 821*b8dc6b0eSVu Pham * @param[out] entry 822*b8dc6b0eSVu Pham * Pointer to returning MR cache entry, found in the global cache or newly 823*b8dc6b0eSVu Pham * created. If failed to create one, this will not be updated. 824*b8dc6b0eSVu Pham * @param addr 825*b8dc6b0eSVu Pham * Target virtual address to register. 826*b8dc6b0eSVu Pham * 827*b8dc6b0eSVu Pham * @return 828*b8dc6b0eSVu Pham * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 829*b8dc6b0eSVu Pham */ 830*b8dc6b0eSVu Pham static uint32_t 831*b8dc6b0eSVu Pham mlx5_mr_create(struct ibv_pd *pd, struct mlx5_mp_id *mp_id, 832*b8dc6b0eSVu Pham struct mlx5_mr_share_cache *share_cache, 833*b8dc6b0eSVu Pham struct mr_cache_entry *entry, uintptr_t addr, 834*b8dc6b0eSVu Pham unsigned int mr_ext_memseg_en) 835*b8dc6b0eSVu Pham { 836*b8dc6b0eSVu Pham uint32_t ret = 0; 837*b8dc6b0eSVu Pham 838*b8dc6b0eSVu Pham switch (rte_eal_process_type()) { 839*b8dc6b0eSVu Pham case RTE_PROC_PRIMARY: 840*b8dc6b0eSVu Pham ret = mlx5_mr_create_primary(pd, share_cache, entry, 841*b8dc6b0eSVu Pham addr, mr_ext_memseg_en); 842*b8dc6b0eSVu Pham break; 843*b8dc6b0eSVu Pham case RTE_PROC_SECONDARY: 844*b8dc6b0eSVu Pham ret = mlx5_mr_create_secondary(pd, mp_id, share_cache, entry, 845*b8dc6b0eSVu Pham addr, mr_ext_memseg_en); 846*b8dc6b0eSVu Pham break; 847*b8dc6b0eSVu Pham default: 848*b8dc6b0eSVu Pham break; 849*b8dc6b0eSVu Pham } 850*b8dc6b0eSVu Pham return ret; 851*b8dc6b0eSVu Pham } 852*b8dc6b0eSVu Pham 853*b8dc6b0eSVu Pham /** 854*b8dc6b0eSVu Pham * Look up address in the global MR cache table. If not found, create a new MR. 855*b8dc6b0eSVu Pham * Insert the found/created entry to local bottom-half cache table. 856*b8dc6b0eSVu Pham * 857*b8dc6b0eSVu Pham * @param pd 858*b8dc6b0eSVu Pham * Pointer to ibv_pd of a device (net, regex, vdpa,...). 859*b8dc6b0eSVu Pham * @param share_cache 860*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 861*b8dc6b0eSVu Pham * @param mr_ctrl 862*b8dc6b0eSVu Pham * Pointer to per-queue MR control structure. 863*b8dc6b0eSVu Pham * @param[out] entry 864*b8dc6b0eSVu Pham * Pointer to returning MR cache entry, found in the global cache or newly 865*b8dc6b0eSVu Pham * created. If failed to create one, this is not written. 866*b8dc6b0eSVu Pham * @param addr 867*b8dc6b0eSVu Pham * Search key. 868*b8dc6b0eSVu Pham * 869*b8dc6b0eSVu Pham * @return 870*b8dc6b0eSVu Pham * Searched LKey on success, UINT32_MAX on no match. 871*b8dc6b0eSVu Pham */ 872*b8dc6b0eSVu Pham static uint32_t 873*b8dc6b0eSVu Pham mr_lookup_caches(struct ibv_pd *pd, struct mlx5_mp_id *mp_id, 874*b8dc6b0eSVu Pham struct mlx5_mr_share_cache *share_cache, 875*b8dc6b0eSVu Pham struct mlx5_mr_ctrl *mr_ctrl, 876*b8dc6b0eSVu Pham struct mr_cache_entry *entry, uintptr_t addr, 877*b8dc6b0eSVu Pham unsigned int mr_ext_memseg_en) 878*b8dc6b0eSVu Pham { 879*b8dc6b0eSVu Pham struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh; 880*b8dc6b0eSVu Pham uint32_t lkey; 881*b8dc6b0eSVu Pham uint16_t idx; 882*b8dc6b0eSVu Pham 883*b8dc6b0eSVu Pham /* If local cache table is full, try to double it. */ 884*b8dc6b0eSVu Pham if (unlikely(bt->len == bt->size)) 885*b8dc6b0eSVu Pham mr_btree_expand(bt, bt->size << 1); 886*b8dc6b0eSVu Pham /* Look up in the global cache. */ 887*b8dc6b0eSVu Pham rte_rwlock_read_lock(&share_cache->rwlock); 888*b8dc6b0eSVu Pham lkey = mr_btree_lookup(&share_cache->cache, &idx, addr); 889*b8dc6b0eSVu Pham if (lkey != UINT32_MAX) { 890*b8dc6b0eSVu Pham /* Found. */ 891*b8dc6b0eSVu Pham *entry = (*share_cache->cache.table)[idx]; 892*b8dc6b0eSVu Pham rte_rwlock_read_unlock(&share_cache->rwlock); 893*b8dc6b0eSVu Pham /* 894*b8dc6b0eSVu Pham * Update local cache. Even if it fails, return the found entry 895*b8dc6b0eSVu Pham * to update top-half cache. Next time, this entry will be found 896*b8dc6b0eSVu Pham * in the global cache. 897*b8dc6b0eSVu Pham */ 898*b8dc6b0eSVu Pham mr_btree_insert(bt, entry); 899*b8dc6b0eSVu Pham return lkey; 900*b8dc6b0eSVu Pham } 901*b8dc6b0eSVu Pham rte_rwlock_read_unlock(&share_cache->rwlock); 902*b8dc6b0eSVu Pham /* First time to see the address? Create a new MR. */ 903*b8dc6b0eSVu Pham lkey = mlx5_mr_create(pd, mp_id, share_cache, entry, addr, 904*b8dc6b0eSVu Pham mr_ext_memseg_en); 905*b8dc6b0eSVu Pham /* 906*b8dc6b0eSVu Pham * Update the local cache if successfully created a new global MR. Even 907*b8dc6b0eSVu Pham * if failed to create one, there's no action to take in this datapath 908*b8dc6b0eSVu Pham * code. As returning LKey is invalid, this will eventually make HW 909*b8dc6b0eSVu Pham * fail. 910*b8dc6b0eSVu Pham */ 911*b8dc6b0eSVu Pham if (lkey != UINT32_MAX) 912*b8dc6b0eSVu Pham mr_btree_insert(bt, entry); 913*b8dc6b0eSVu Pham return lkey; 914*b8dc6b0eSVu Pham } 915*b8dc6b0eSVu Pham 916*b8dc6b0eSVu Pham /** 917*b8dc6b0eSVu Pham * Bottom-half of LKey search on datapath. First search in cache_bh[] and if 918*b8dc6b0eSVu Pham * misses, search in the global MR cache table and update the new entry to 919*b8dc6b0eSVu Pham * per-queue local caches. 920*b8dc6b0eSVu Pham * 921*b8dc6b0eSVu Pham * @param pd 922*b8dc6b0eSVu Pham * Pointer to ibv_pd of a device (net, regex, vdpa,...). 923*b8dc6b0eSVu Pham * @param share_cache 924*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 925*b8dc6b0eSVu Pham * @param mr_ctrl 926*b8dc6b0eSVu Pham * Pointer to per-queue MR control structure. 927*b8dc6b0eSVu Pham * @param addr 928*b8dc6b0eSVu Pham * Search key. 929*b8dc6b0eSVu Pham * 930*b8dc6b0eSVu Pham * @return 931*b8dc6b0eSVu Pham * Searched LKey on success, UINT32_MAX on no match. 932*b8dc6b0eSVu Pham */ 933*b8dc6b0eSVu Pham uint32_t mlx5_mr_addr2mr_bh(struct ibv_pd *pd, struct mlx5_mp_id *mp_id, 934*b8dc6b0eSVu Pham struct mlx5_mr_share_cache *share_cache, 935*b8dc6b0eSVu Pham struct mlx5_mr_ctrl *mr_ctrl, 936*b8dc6b0eSVu Pham uintptr_t addr, unsigned int mr_ext_memseg_en) 937*b8dc6b0eSVu Pham { 938*b8dc6b0eSVu Pham uint32_t lkey; 939*b8dc6b0eSVu Pham uint16_t bh_idx = 0; 940*b8dc6b0eSVu Pham /* Victim in top-half cache to replace with new entry. */ 941*b8dc6b0eSVu Pham struct mr_cache_entry *repl = &mr_ctrl->cache[mr_ctrl->head]; 942*b8dc6b0eSVu Pham 943*b8dc6b0eSVu Pham /* Binary-search MR translation table. */ 944*b8dc6b0eSVu Pham lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 945*b8dc6b0eSVu Pham /* Update top-half cache. */ 946*b8dc6b0eSVu Pham if (likely(lkey != UINT32_MAX)) { 947*b8dc6b0eSVu Pham *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 948*b8dc6b0eSVu Pham } else { 949*b8dc6b0eSVu Pham /* 950*b8dc6b0eSVu Pham * If missed in local lookup table, search in the global cache 951*b8dc6b0eSVu Pham * and local cache_bh[] will be updated inside if possible. 952*b8dc6b0eSVu Pham * Top-half cache entry will also be updated. 953*b8dc6b0eSVu Pham */ 954*b8dc6b0eSVu Pham lkey = mr_lookup_caches(pd, mp_id, share_cache, mr_ctrl, 955*b8dc6b0eSVu Pham repl, addr, mr_ext_memseg_en); 956*b8dc6b0eSVu Pham if (unlikely(lkey == UINT32_MAX)) 957*b8dc6b0eSVu Pham return UINT32_MAX; 958*b8dc6b0eSVu Pham } 959*b8dc6b0eSVu Pham /* Update the most recently used entry. */ 960*b8dc6b0eSVu Pham mr_ctrl->mru = mr_ctrl->head; 961*b8dc6b0eSVu Pham /* Point to the next victim, the oldest. */ 962*b8dc6b0eSVu Pham mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; 963*b8dc6b0eSVu Pham return lkey; 964*b8dc6b0eSVu Pham } 965*b8dc6b0eSVu Pham 966*b8dc6b0eSVu Pham /** 967*b8dc6b0eSVu Pham * Release all the created MRs and resources on global MR cache of a device. 968*b8dc6b0eSVu Pham * list. 969*b8dc6b0eSVu Pham * 970*b8dc6b0eSVu Pham * @param share_cache 971*b8dc6b0eSVu Pham * Pointer to a global shared MR cache. 972*b8dc6b0eSVu Pham */ 973*b8dc6b0eSVu Pham void 974*b8dc6b0eSVu Pham mlx5_mr_release_cache(struct mlx5_mr_share_cache *share_cache) 975*b8dc6b0eSVu Pham { 976*b8dc6b0eSVu Pham struct mlx5_mr *mr_next; 977*b8dc6b0eSVu Pham 978*b8dc6b0eSVu Pham rte_rwlock_write_lock(&share_cache->rwlock); 979*b8dc6b0eSVu Pham /* Detach from MR list and move to free list. */ 980*b8dc6b0eSVu Pham mr_next = LIST_FIRST(&share_cache->mr_list); 981*b8dc6b0eSVu Pham while (mr_next != NULL) { 982*b8dc6b0eSVu Pham struct mlx5_mr *mr = mr_next; 983*b8dc6b0eSVu Pham 984*b8dc6b0eSVu Pham mr_next = LIST_NEXT(mr, mr); 985*b8dc6b0eSVu Pham LIST_REMOVE(mr, mr); 986*b8dc6b0eSVu Pham LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); 987*b8dc6b0eSVu Pham } 988*b8dc6b0eSVu Pham LIST_INIT(&share_cache->mr_list); 989*b8dc6b0eSVu Pham /* Free global cache. */ 990*b8dc6b0eSVu Pham mlx5_mr_btree_free(&share_cache->cache); 991*b8dc6b0eSVu Pham rte_rwlock_write_unlock(&share_cache->rwlock); 992*b8dc6b0eSVu Pham /* Free all remaining MRs. */ 993*b8dc6b0eSVu Pham mlx5_mr_garbage_collect(share_cache); 994*b8dc6b0eSVu Pham } 995*b8dc6b0eSVu Pham 996*b8dc6b0eSVu Pham /** 997*b8dc6b0eSVu Pham * Flush all of the local cache entries. 998*b8dc6b0eSVu Pham * 999*b8dc6b0eSVu Pham * @param mr_ctrl 1000*b8dc6b0eSVu Pham * Pointer to per-queue MR local cache. 1001*b8dc6b0eSVu Pham */ 1002*b8dc6b0eSVu Pham void 1003*b8dc6b0eSVu Pham mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl) 1004*b8dc6b0eSVu Pham { 1005*b8dc6b0eSVu Pham /* Reset the most-recently-used index. */ 1006*b8dc6b0eSVu Pham mr_ctrl->mru = 0; 1007*b8dc6b0eSVu Pham /* Reset the linear search array. */ 1008*b8dc6b0eSVu Pham mr_ctrl->head = 0; 1009*b8dc6b0eSVu Pham memset(mr_ctrl->cache, 0, sizeof(mr_ctrl->cache)); 1010*b8dc6b0eSVu Pham /* Reset the B-tree table. */ 1011*b8dc6b0eSVu Pham mr_ctrl->cache_bh.len = 1; 1012*b8dc6b0eSVu Pham mr_ctrl->cache_bh.overflow = 0; 1013*b8dc6b0eSVu Pham /* Update the generation number. */ 1014*b8dc6b0eSVu Pham mr_ctrl->cur_gen = *mr_ctrl->dev_gen_ptr; 1015*b8dc6b0eSVu Pham DRV_LOG(DEBUG, "mr_ctrl(%p): flushed, cur_gen=%d", 1016*b8dc6b0eSVu Pham (void *)mr_ctrl, mr_ctrl->cur_gen); 1017*b8dc6b0eSVu Pham } 1018*b8dc6b0eSVu Pham 1019*b8dc6b0eSVu Pham /** 1020*b8dc6b0eSVu Pham * Creates a memory region for external memory, that is memory which is not 1021*b8dc6b0eSVu Pham * part of the DPDK memory segments. 1022*b8dc6b0eSVu Pham * 1023*b8dc6b0eSVu Pham * @param pd 1024*b8dc6b0eSVu Pham * Pointer to ibv_pd of a device (net, regex, vdpa,...). 1025*b8dc6b0eSVu Pham * @param addr 1026*b8dc6b0eSVu Pham * Starting virtual address of memory. 1027*b8dc6b0eSVu Pham * @param len 1028*b8dc6b0eSVu Pham * Length of memory segment being mapped. 1029*b8dc6b0eSVu Pham * @param socked_id 1030*b8dc6b0eSVu Pham * Socket to allocate heap memory for the control structures. 1031*b8dc6b0eSVu Pham * 1032*b8dc6b0eSVu Pham * @return 1033*b8dc6b0eSVu Pham * Pointer to MR structure on success, NULL otherwise. 1034*b8dc6b0eSVu Pham */ 1035*b8dc6b0eSVu Pham struct mlx5_mr * 1036*b8dc6b0eSVu Pham mlx5_create_mr_ext(struct ibv_pd *pd, uintptr_t addr, size_t len, int socket_id) 1037*b8dc6b0eSVu Pham { 1038*b8dc6b0eSVu Pham struct mlx5_mr *mr = NULL; 1039*b8dc6b0eSVu Pham 1040*b8dc6b0eSVu Pham mr = rte_zmalloc_socket(NULL, 1041*b8dc6b0eSVu Pham RTE_ALIGN_CEIL(sizeof(*mr), 1042*b8dc6b0eSVu Pham RTE_CACHE_LINE_SIZE), 1043*b8dc6b0eSVu Pham RTE_CACHE_LINE_SIZE, socket_id); 1044*b8dc6b0eSVu Pham if (mr == NULL) 1045*b8dc6b0eSVu Pham return NULL; 1046*b8dc6b0eSVu Pham mr->ibv_mr = mlx5_glue->reg_mr(pd, (void *)addr, len, 1047*b8dc6b0eSVu Pham IBV_ACCESS_LOCAL_WRITE | 1048*b8dc6b0eSVu Pham IBV_ACCESS_RELAXED_ORDERING); 1049*b8dc6b0eSVu Pham if (mr->ibv_mr == NULL) { 1050*b8dc6b0eSVu Pham DRV_LOG(WARNING, 1051*b8dc6b0eSVu Pham "Fail to create a verbs MR for address (%p)", 1052*b8dc6b0eSVu Pham (void *)addr); 1053*b8dc6b0eSVu Pham rte_free(mr); 1054*b8dc6b0eSVu Pham return NULL; 1055*b8dc6b0eSVu Pham } 1056*b8dc6b0eSVu Pham mr->msl = NULL; /* Mark it is external memory. */ 1057*b8dc6b0eSVu Pham mr->ms_bmp = NULL; 1058*b8dc6b0eSVu Pham mr->ms_n = 1; 1059*b8dc6b0eSVu Pham mr->ms_bmp_n = 1; 1060*b8dc6b0eSVu Pham DRV_LOG(DEBUG, 1061*b8dc6b0eSVu Pham "MR CREATED (%p) for external memory %p:\n" 1062*b8dc6b0eSVu Pham " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 1063*b8dc6b0eSVu Pham " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 1064*b8dc6b0eSVu Pham (void *)mr, (void *)addr, 1065*b8dc6b0eSVu Pham addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey), 1066*b8dc6b0eSVu Pham mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 1067*b8dc6b0eSVu Pham return mr; 1068*b8dc6b0eSVu Pham } 1069*b8dc6b0eSVu Pham 1070*b8dc6b0eSVu Pham /** 1071*b8dc6b0eSVu Pham * Dump all the created MRs and the global cache entries. 1072*b8dc6b0eSVu Pham * 1073*b8dc6b0eSVu Pham * @param sh 1074*b8dc6b0eSVu Pham * Pointer to Ethernet device shared context. 1075*b8dc6b0eSVu Pham */ 1076*b8dc6b0eSVu Pham void 1077*b8dc6b0eSVu Pham mlx5_mr_dump_cache(struct mlx5_mr_share_cache *share_cache __rte_unused) 1078*b8dc6b0eSVu Pham { 1079*b8dc6b0eSVu Pham #ifdef RTE_LIBRTE_MLX5_DEBUG 1080*b8dc6b0eSVu Pham struct mlx5_mr *mr; 1081*b8dc6b0eSVu Pham int mr_n = 0; 1082*b8dc6b0eSVu Pham int chunk_n = 0; 1083*b8dc6b0eSVu Pham 1084*b8dc6b0eSVu Pham rte_rwlock_read_lock(&share_cache->rwlock); 1085*b8dc6b0eSVu Pham /* Iterate all the existing MRs. */ 1086*b8dc6b0eSVu Pham LIST_FOREACH(mr, &share_cache->mr_list, mr) { 1087*b8dc6b0eSVu Pham unsigned int n; 1088*b8dc6b0eSVu Pham 1089*b8dc6b0eSVu Pham DEBUG("MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u", 1090*b8dc6b0eSVu Pham mr_n++, rte_cpu_to_be_32(mr->ibv_mr->lkey), 1091*b8dc6b0eSVu Pham mr->ms_n, mr->ms_bmp_n); 1092*b8dc6b0eSVu Pham if (mr->ms_n == 0) 1093*b8dc6b0eSVu Pham continue; 1094*b8dc6b0eSVu Pham for (n = 0; n < mr->ms_bmp_n; ) { 1095*b8dc6b0eSVu Pham struct mr_cache_entry ret = { 0, }; 1096*b8dc6b0eSVu Pham 1097*b8dc6b0eSVu Pham n = mr_find_next_chunk(mr, &ret, n); 1098*b8dc6b0eSVu Pham if (!ret.end) 1099*b8dc6b0eSVu Pham break; 1100*b8dc6b0eSVu Pham DEBUG(" chunk[%u], [0x%" PRIxPTR ", 0x%" PRIxPTR ")", 1101*b8dc6b0eSVu Pham chunk_n++, ret.start, ret.end); 1102*b8dc6b0eSVu Pham } 1103*b8dc6b0eSVu Pham } 1104*b8dc6b0eSVu Pham DEBUG("Dumping global cache %p", (void *)share_cache); 1105*b8dc6b0eSVu Pham mlx5_mr_btree_dump(&share_cache->cache); 1106*b8dc6b0eSVu Pham rte_rwlock_read_unlock(&share_cache->rwlock); 1107*b8dc6b0eSVu Pham #endif 1108*b8dc6b0eSVu Pham } 1109