182092c87SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause 2655588afSAdrien Mazarguil * Copyright 2017 6WIND S.A. 35feecc57SShahaf Shuler * Copyright 2017 Mellanox Technologies, Ltd 4655588afSAdrien Mazarguil */ 5655588afSAdrien Mazarguil 6655588afSAdrien Mazarguil /** 7655588afSAdrien Mazarguil * @file 8655588afSAdrien Mazarguil * Memory management functions for mlx4 driver. 9655588afSAdrien Mazarguil */ 10655588afSAdrien Mazarguil 11655588afSAdrien Mazarguil #include <errno.h> 12326d2cdfSOphir Munk #include <inttypes.h> 13655588afSAdrien Mazarguil #include <stddef.h> 14655588afSAdrien Mazarguil #include <stdint.h> 15655588afSAdrien Mazarguil #include <string.h> 16655588afSAdrien Mazarguil 17655588afSAdrien Mazarguil /* Verbs headers do not support -pedantic. */ 18655588afSAdrien Mazarguil #ifdef PEDANTIC 19655588afSAdrien Mazarguil #pragma GCC diagnostic ignored "-Wpedantic" 20655588afSAdrien Mazarguil #endif 21655588afSAdrien Mazarguil #include <infiniband/verbs.h> 22655588afSAdrien Mazarguil #ifdef PEDANTIC 23655588afSAdrien Mazarguil #pragma GCC diagnostic error "-Wpedantic" 24655588afSAdrien Mazarguil #endif 25655588afSAdrien Mazarguil 26326d2cdfSOphir Munk #include <rte_branch_prediction.h> 27655588afSAdrien Mazarguil #include <rte_common.h> 288ac35916SDavid Marchand #include <rte_eal_memconfig.h> 29655588afSAdrien Mazarguil #include <rte_errno.h> 300d033530SAdrien Mazarguil #include <rte_malloc.h> 31655588afSAdrien Mazarguil #include <rte_memory.h> 32655588afSAdrien Mazarguil #include <rte_mempool.h> 339797bfccSYongseok Koh #include <rte_rwlock.h> 34655588afSAdrien Mazarguil 354eba244bSAdrien Mazarguil #include "mlx4_glue.h" 369797bfccSYongseok Koh #include "mlx4_mr.h" 37326d2cdfSOphir Munk #include "mlx4_rxtx.h" 38655588afSAdrien Mazarguil #include "mlx4_utils.h" 39655588afSAdrien Mazarguil 409797bfccSYongseok Koh struct mr_find_contig_memsegs_data { 419797bfccSYongseok Koh uintptr_t addr; 429797bfccSYongseok Koh uintptr_t start; 439797bfccSYongseok Koh uintptr_t end; 449797bfccSYongseok Koh const struct rte_memseg_list *msl; 459797bfccSYongseok Koh }; 469797bfccSYongseok Koh 479797bfccSYongseok Koh struct mr_update_mp_data { 489797bfccSYongseok Koh struct rte_eth_dev *dev; 499797bfccSYongseok Koh struct mlx4_mr_ctrl *mr_ctrl; 509797bfccSYongseok Koh int ret; 519797bfccSYongseok Koh }; 529797bfccSYongseok Koh 539797bfccSYongseok Koh /** 549797bfccSYongseok Koh * Expand B-tree table to a given size. Can't be called with holding 559797bfccSYongseok Koh * memory_hotplug_lock or priv->mr.rwlock due to rte_realloc(). 569797bfccSYongseok Koh * 579797bfccSYongseok Koh * @param bt 589797bfccSYongseok Koh * Pointer to B-tree structure. 599797bfccSYongseok Koh * @param n 609797bfccSYongseok Koh * Number of entries for expansion. 619797bfccSYongseok Koh * 629797bfccSYongseok Koh * @return 639797bfccSYongseok Koh * 0 on success, -1 on failure. 649797bfccSYongseok Koh */ 659797bfccSYongseok Koh static int 669797bfccSYongseok Koh mr_btree_expand(struct mlx4_mr_btree *bt, int n) 679797bfccSYongseok Koh { 689797bfccSYongseok Koh void *mem; 699797bfccSYongseok Koh int ret = 0; 709797bfccSYongseok Koh 719797bfccSYongseok Koh if (n <= bt->size) 729797bfccSYongseok Koh return ret; 739797bfccSYongseok Koh /* 749797bfccSYongseok Koh * Downside of directly using rte_realloc() is that SOCKET_ID_ANY is 759797bfccSYongseok Koh * used inside if there's no room to expand. Because this is a quite 769797bfccSYongseok Koh * rare case and a part of very slow path, it is very acceptable. 779797bfccSYongseok Koh * Initially cache_bh[] will be given practically enough space and once 789797bfccSYongseok Koh * it is expanded, expansion wouldn't be needed again ever. 799797bfccSYongseok Koh */ 809797bfccSYongseok Koh mem = rte_realloc(bt->table, n * sizeof(struct mlx4_mr_cache), 0); 819797bfccSYongseok Koh if (mem == NULL) { 829797bfccSYongseok Koh /* Not an error, B-tree search will be skipped. */ 839797bfccSYongseok Koh WARN("failed to expand MR B-tree (%p) table", (void *)bt); 849797bfccSYongseok Koh ret = -1; 859797bfccSYongseok Koh } else { 869797bfccSYongseok Koh DEBUG("expanded MR B-tree table (size=%u)", n); 879797bfccSYongseok Koh bt->table = mem; 889797bfccSYongseok Koh bt->size = n; 899797bfccSYongseok Koh } 909797bfccSYongseok Koh return ret; 919797bfccSYongseok Koh } 929797bfccSYongseok Koh 939797bfccSYongseok Koh /** 949797bfccSYongseok Koh * Look up LKey from given B-tree lookup table, store the last index and return 959797bfccSYongseok Koh * searched LKey. 969797bfccSYongseok Koh * 979797bfccSYongseok Koh * @param bt 989797bfccSYongseok Koh * Pointer to B-tree structure. 999797bfccSYongseok Koh * @param[out] idx 1009797bfccSYongseok Koh * Pointer to index. Even on search failure, returns index where it stops 1019797bfccSYongseok Koh * searching so that index can be used when inserting a new entry. 1029797bfccSYongseok Koh * @param addr 1039797bfccSYongseok Koh * Search key. 1049797bfccSYongseok Koh * 1059797bfccSYongseok Koh * @return 1069797bfccSYongseok Koh * Searched LKey on success, UINT32_MAX on no match. 1079797bfccSYongseok Koh */ 1089797bfccSYongseok Koh static uint32_t 1099797bfccSYongseok Koh mr_btree_lookup(struct mlx4_mr_btree *bt, uint16_t *idx, uintptr_t addr) 1109797bfccSYongseok Koh { 1119797bfccSYongseok Koh struct mlx4_mr_cache *lkp_tbl; 1129797bfccSYongseok Koh uint16_t n; 1139797bfccSYongseok Koh uint16_t base = 0; 1149797bfccSYongseok Koh 1158e08df22SAlexander Kozyrev MLX4_ASSERT(bt != NULL); 1169797bfccSYongseok Koh lkp_tbl = *bt->table; 1179797bfccSYongseok Koh n = bt->len; 1189797bfccSYongseok Koh /* First entry must be NULL for comparison. */ 1198e08df22SAlexander Kozyrev MLX4_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 && 1209797bfccSYongseok Koh lkp_tbl[0].lkey == UINT32_MAX)); 1219797bfccSYongseok Koh /* Binary search. */ 1229797bfccSYongseok Koh do { 1239797bfccSYongseok Koh register uint16_t delta = n >> 1; 1249797bfccSYongseok Koh 1259797bfccSYongseok Koh if (addr < lkp_tbl[base + delta].start) { 1269797bfccSYongseok Koh n = delta; 1279797bfccSYongseok Koh } else { 1289797bfccSYongseok Koh base += delta; 1299797bfccSYongseok Koh n -= delta; 1309797bfccSYongseok Koh } 1319797bfccSYongseok Koh } while (n > 1); 1328e08df22SAlexander Kozyrev MLX4_ASSERT(addr >= lkp_tbl[base].start); 1339797bfccSYongseok Koh *idx = base; 1349797bfccSYongseok Koh if (addr < lkp_tbl[base].end) 1359797bfccSYongseok Koh return lkp_tbl[base].lkey; 1369797bfccSYongseok Koh /* Not found. */ 1379797bfccSYongseok Koh return UINT32_MAX; 1389797bfccSYongseok Koh } 1399797bfccSYongseok Koh 1409797bfccSYongseok Koh /** 1419797bfccSYongseok Koh * Insert an entry to B-tree lookup table. 1429797bfccSYongseok Koh * 1439797bfccSYongseok Koh * @param bt 1449797bfccSYongseok Koh * Pointer to B-tree structure. 1459797bfccSYongseok Koh * @param entry 1469797bfccSYongseok Koh * Pointer to new entry to insert. 1479797bfccSYongseok Koh * 1489797bfccSYongseok Koh * @return 1499797bfccSYongseok Koh * 0 on success, -1 on failure. 1509797bfccSYongseok Koh */ 1519797bfccSYongseok Koh static int 1529797bfccSYongseok Koh mr_btree_insert(struct mlx4_mr_btree *bt, struct mlx4_mr_cache *entry) 1539797bfccSYongseok Koh { 1549797bfccSYongseok Koh struct mlx4_mr_cache *lkp_tbl; 1559797bfccSYongseok Koh uint16_t idx = 0; 1569797bfccSYongseok Koh size_t shift; 1579797bfccSYongseok Koh 1588e08df22SAlexander Kozyrev MLX4_ASSERT(bt != NULL); 1598e08df22SAlexander Kozyrev MLX4_ASSERT(bt->len <= bt->size); 1608e08df22SAlexander Kozyrev MLX4_ASSERT(bt->len > 0); 1619797bfccSYongseok Koh lkp_tbl = *bt->table; 1629797bfccSYongseok Koh /* Find out the slot for insertion. */ 1639797bfccSYongseok Koh if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) { 1649797bfccSYongseok Koh DEBUG("abort insertion to B-tree(%p): already exist at" 1659797bfccSYongseok Koh " idx=%u [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 1669797bfccSYongseok Koh (void *)bt, idx, entry->start, entry->end, entry->lkey); 1679797bfccSYongseok Koh /* Already exist, return. */ 1689797bfccSYongseok Koh return 0; 1699797bfccSYongseok Koh } 1709797bfccSYongseok Koh /* If table is full, return error. */ 1719797bfccSYongseok Koh if (unlikely(bt->len == bt->size)) { 1729797bfccSYongseok Koh bt->overflow = 1; 1739797bfccSYongseok Koh return -1; 1749797bfccSYongseok Koh } 1759797bfccSYongseok Koh /* Insert entry. */ 1769797bfccSYongseok Koh ++idx; 1779797bfccSYongseok Koh shift = (bt->len - idx) * sizeof(struct mlx4_mr_cache); 1789797bfccSYongseok Koh if (shift) 1799797bfccSYongseok Koh memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift); 1809797bfccSYongseok Koh lkp_tbl[idx] = *entry; 1819797bfccSYongseok Koh bt->len++; 1829797bfccSYongseok Koh DEBUG("inserted B-tree(%p)[%u]," 1839797bfccSYongseok Koh " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 1849797bfccSYongseok Koh (void *)bt, idx, entry->start, entry->end, entry->lkey); 1859797bfccSYongseok Koh return 0; 1869797bfccSYongseok Koh } 1879797bfccSYongseok Koh 1889797bfccSYongseok Koh /** 1899797bfccSYongseok Koh * Initialize B-tree and allocate memory for lookup table. 1909797bfccSYongseok Koh * 1919797bfccSYongseok Koh * @param bt 1929797bfccSYongseok Koh * Pointer to B-tree structure. 1939797bfccSYongseok Koh * @param n 1949797bfccSYongseok Koh * Number of entries to allocate. 1959797bfccSYongseok Koh * @param socket 1969797bfccSYongseok Koh * NUMA socket on which memory must be allocated. 1979797bfccSYongseok Koh * 1989797bfccSYongseok Koh * @return 1999797bfccSYongseok Koh * 0 on success, a negative errno value otherwise and rte_errno is set. 2009797bfccSYongseok Koh */ 2019797bfccSYongseok Koh int 2029797bfccSYongseok Koh mlx4_mr_btree_init(struct mlx4_mr_btree *bt, int n, int socket) 2039797bfccSYongseok Koh { 2049797bfccSYongseok Koh if (bt == NULL) { 2059797bfccSYongseok Koh rte_errno = EINVAL; 2069797bfccSYongseok Koh return -rte_errno; 2079797bfccSYongseok Koh } 2089797bfccSYongseok Koh memset(bt, 0, sizeof(*bt)); 2099797bfccSYongseok Koh bt->table = rte_calloc_socket("B-tree table", 2109797bfccSYongseok Koh n, sizeof(struct mlx4_mr_cache), 2119797bfccSYongseok Koh 0, socket); 2129797bfccSYongseok Koh if (bt->table == NULL) { 2139797bfccSYongseok Koh rte_errno = ENOMEM; 2149797bfccSYongseok Koh ERROR("failed to allocate memory for btree cache on socket %d", 2159797bfccSYongseok Koh socket); 2169797bfccSYongseok Koh return -rte_errno; 2179797bfccSYongseok Koh } 2189797bfccSYongseok Koh bt->size = n; 2199797bfccSYongseok Koh /* First entry must be NULL for binary search. */ 2209797bfccSYongseok Koh (*bt->table)[bt->len++] = (struct mlx4_mr_cache) { 2219797bfccSYongseok Koh .lkey = UINT32_MAX, 2229797bfccSYongseok Koh }; 2239797bfccSYongseok Koh DEBUG("initialized B-tree %p with table %p", 2249797bfccSYongseok Koh (void *)bt, (void *)bt->table); 2259797bfccSYongseok Koh return 0; 2269797bfccSYongseok Koh } 2279797bfccSYongseok Koh 2289797bfccSYongseok Koh /** 2299797bfccSYongseok Koh * Free B-tree resources. 2309797bfccSYongseok Koh * 2319797bfccSYongseok Koh * @param bt 2329797bfccSYongseok Koh * Pointer to B-tree structure. 2339797bfccSYongseok Koh */ 2349797bfccSYongseok Koh void 2359797bfccSYongseok Koh mlx4_mr_btree_free(struct mlx4_mr_btree *bt) 2369797bfccSYongseok Koh { 2379797bfccSYongseok Koh if (bt == NULL) 2389797bfccSYongseok Koh return; 2399797bfccSYongseok Koh DEBUG("freeing B-tree %p with table %p", (void *)bt, (void *)bt->table); 2409797bfccSYongseok Koh rte_free(bt->table); 2419797bfccSYongseok Koh memset(bt, 0, sizeof(*bt)); 2429797bfccSYongseok Koh } 2439797bfccSYongseok Koh 244e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG 2459797bfccSYongseok Koh /** 2469797bfccSYongseok Koh * Dump all the entries in a B-tree 2479797bfccSYongseok Koh * 2489797bfccSYongseok Koh * @param bt 2499797bfccSYongseok Koh * Pointer to B-tree structure. 2509797bfccSYongseok Koh */ 2519797bfccSYongseok Koh void 2529797bfccSYongseok Koh mlx4_mr_btree_dump(struct mlx4_mr_btree *bt) 2539797bfccSYongseok Koh { 2549797bfccSYongseok Koh int idx; 2559797bfccSYongseok Koh struct mlx4_mr_cache *lkp_tbl; 2569797bfccSYongseok Koh 2579797bfccSYongseok Koh if (bt == NULL) 2589797bfccSYongseok Koh return; 2599797bfccSYongseok Koh lkp_tbl = *bt->table; 2609797bfccSYongseok Koh for (idx = 0; idx < bt->len; ++idx) { 2619797bfccSYongseok Koh struct mlx4_mr_cache *entry = &lkp_tbl[idx]; 2629797bfccSYongseok Koh 2639797bfccSYongseok Koh DEBUG("B-tree(%p)[%u]," 2649797bfccSYongseok Koh " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x", 2659797bfccSYongseok Koh (void *)bt, idx, entry->start, entry->end, entry->lkey); 2669797bfccSYongseok Koh } 2679797bfccSYongseok Koh } 2689797bfccSYongseok Koh #endif 2699797bfccSYongseok Koh 2709797bfccSYongseok Koh /** 2719797bfccSYongseok Koh * Find virtually contiguous memory chunk in a given MR. 2729797bfccSYongseok Koh * 2739797bfccSYongseok Koh * @param dev 2749797bfccSYongseok Koh * Pointer to MR structure. 2759797bfccSYongseok Koh * @param[out] entry 2769797bfccSYongseok Koh * Pointer to returning MR cache entry. If not found, this will not be 2779797bfccSYongseok Koh * updated. 2789797bfccSYongseok Koh * @param start_idx 2799797bfccSYongseok Koh * Start index of the memseg bitmap. 2809797bfccSYongseok Koh * 2819797bfccSYongseok Koh * @return 2829797bfccSYongseok Koh * Next index to go on lookup. 2839797bfccSYongseok Koh */ 2849797bfccSYongseok Koh static int 2859797bfccSYongseok Koh mr_find_next_chunk(struct mlx4_mr *mr, struct mlx4_mr_cache *entry, 2869797bfccSYongseok Koh int base_idx) 2879797bfccSYongseok Koh { 2889797bfccSYongseok Koh uintptr_t start = 0; 2899797bfccSYongseok Koh uintptr_t end = 0; 2909797bfccSYongseok Koh uint32_t idx = 0; 2919797bfccSYongseok Koh 29231912d99SYongseok Koh /* MR for external memory doesn't have memseg list. */ 29331912d99SYongseok Koh if (mr->msl == NULL) { 29431912d99SYongseok Koh struct ibv_mr *ibv_mr = mr->ibv_mr; 29531912d99SYongseok Koh 2968e08df22SAlexander Kozyrev MLX4_ASSERT(mr->ms_bmp_n == 1); 2978e08df22SAlexander Kozyrev MLX4_ASSERT(mr->ms_n == 1); 2988e08df22SAlexander Kozyrev MLX4_ASSERT(base_idx == 0); 29931912d99SYongseok Koh /* 30031912d99SYongseok Koh * Can't search it from memseg list but get it directly from 30131912d99SYongseok Koh * verbs MR as there's only one chunk. 30231912d99SYongseok Koh */ 30331912d99SYongseok Koh entry->start = (uintptr_t)ibv_mr->addr; 30431912d99SYongseok Koh entry->end = (uintptr_t)ibv_mr->addr + mr->ibv_mr->length; 30531912d99SYongseok Koh entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey); 30631912d99SYongseok Koh /* Returning 1 ends iteration. */ 30731912d99SYongseok Koh return 1; 30831912d99SYongseok Koh } 3099797bfccSYongseok Koh for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) { 3109797bfccSYongseok Koh if (rte_bitmap_get(mr->ms_bmp, idx)) { 3119797bfccSYongseok Koh const struct rte_memseg_list *msl; 3129797bfccSYongseok Koh const struct rte_memseg *ms; 3139797bfccSYongseok Koh 3149797bfccSYongseok Koh msl = mr->msl; 3159797bfccSYongseok Koh ms = rte_fbarray_get(&msl->memseg_arr, 3169797bfccSYongseok Koh mr->ms_base_idx + idx); 3178e08df22SAlexander Kozyrev MLX4_ASSERT(msl->page_sz == ms->hugepage_sz); 3189797bfccSYongseok Koh if (!start) 3199797bfccSYongseok Koh start = ms->addr_64; 3209797bfccSYongseok Koh end = ms->addr_64 + ms->hugepage_sz; 3219797bfccSYongseok Koh } else if (start) { 3229797bfccSYongseok Koh /* Passed the end of a fragment. */ 3239797bfccSYongseok Koh break; 3249797bfccSYongseok Koh } 3259797bfccSYongseok Koh } 3269797bfccSYongseok Koh if (start) { 3279797bfccSYongseok Koh /* Found one chunk. */ 3289797bfccSYongseok Koh entry->start = start; 3299797bfccSYongseok Koh entry->end = end; 3309797bfccSYongseok Koh entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey); 3319797bfccSYongseok Koh } 3329797bfccSYongseok Koh return idx; 3339797bfccSYongseok Koh } 3349797bfccSYongseok Koh 3359797bfccSYongseok Koh /** 3369797bfccSYongseok Koh * Insert a MR to the global B-tree cache. It may fail due to low-on-memory. 3379797bfccSYongseok Koh * Then, this entry will have to be searched by mr_lookup_dev_list() in 3389797bfccSYongseok Koh * mlx4_mr_create() on miss. 3399797bfccSYongseok Koh * 3409797bfccSYongseok Koh * @param dev 3419797bfccSYongseok Koh * Pointer to Ethernet device. 3429797bfccSYongseok Koh * @param mr 3439797bfccSYongseok Koh * Pointer to MR to insert. 3449797bfccSYongseok Koh * 3459797bfccSYongseok Koh * @return 3469797bfccSYongseok Koh * 0 on success, -1 on failure. 3479797bfccSYongseok Koh */ 3489797bfccSYongseok Koh static int 3499797bfccSYongseok Koh mr_insert_dev_cache(struct rte_eth_dev *dev, struct mlx4_mr *mr) 3509797bfccSYongseok Koh { 351dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 3529797bfccSYongseok Koh unsigned int n; 3539797bfccSYongseok Koh 3549797bfccSYongseok Koh DEBUG("port %u inserting MR(%p) to global cache", 3559797bfccSYongseok Koh dev->data->port_id, (void *)mr); 3569797bfccSYongseok Koh for (n = 0; n < mr->ms_bmp_n; ) { 357d924d6b9SAli Alnubani struct mlx4_mr_cache entry; 3589797bfccSYongseok Koh 359d924d6b9SAli Alnubani memset(&entry, 0, sizeof(entry)); 3609797bfccSYongseok Koh /* Find a contiguous chunk and advance the index. */ 3619797bfccSYongseok Koh n = mr_find_next_chunk(mr, &entry, n); 3629797bfccSYongseok Koh if (!entry.end) 3639797bfccSYongseok Koh break; 3649797bfccSYongseok Koh if (mr_btree_insert(&priv->mr.cache, &entry) < 0) { 3659797bfccSYongseok Koh /* 3669797bfccSYongseok Koh * Overflowed, but the global table cannot be expanded 3679797bfccSYongseok Koh * because of deadlock. 3689797bfccSYongseok Koh */ 3699797bfccSYongseok Koh return -1; 3709797bfccSYongseok Koh } 3719797bfccSYongseok Koh } 3729797bfccSYongseok Koh return 0; 3739797bfccSYongseok Koh } 3749797bfccSYongseok Koh 3759797bfccSYongseok Koh /** 3769797bfccSYongseok Koh * Look up address in the original global MR list. 3779797bfccSYongseok Koh * 3789797bfccSYongseok Koh * @param dev 3799797bfccSYongseok Koh * Pointer to Ethernet device. 3809797bfccSYongseok Koh * @param[out] entry 3819797bfccSYongseok Koh * Pointer to returning MR cache entry. If no match, this will not be updated. 3829797bfccSYongseok Koh * @param addr 3839797bfccSYongseok Koh * Search key. 3849797bfccSYongseok Koh * 3859797bfccSYongseok Koh * @return 3869797bfccSYongseok Koh * Found MR on match, NULL otherwise. 3879797bfccSYongseok Koh */ 3889797bfccSYongseok Koh static struct mlx4_mr * 3899797bfccSYongseok Koh mr_lookup_dev_list(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, 3909797bfccSYongseok Koh uintptr_t addr) 3919797bfccSYongseok Koh { 392dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 3939797bfccSYongseok Koh struct mlx4_mr *mr; 3949797bfccSYongseok Koh 3959797bfccSYongseok Koh /* Iterate all the existing MRs. */ 3969797bfccSYongseok Koh LIST_FOREACH(mr, &priv->mr.mr_list, mr) { 3979797bfccSYongseok Koh unsigned int n; 3989797bfccSYongseok Koh 3999797bfccSYongseok Koh if (mr->ms_n == 0) 4009797bfccSYongseok Koh continue; 4019797bfccSYongseok Koh for (n = 0; n < mr->ms_bmp_n; ) { 402d924d6b9SAli Alnubani struct mlx4_mr_cache ret; 4039797bfccSYongseok Koh 404d924d6b9SAli Alnubani memset(&ret, 0, sizeof(ret)); 4059797bfccSYongseok Koh n = mr_find_next_chunk(mr, &ret, n); 4069797bfccSYongseok Koh if (addr >= ret.start && addr < ret.end) { 4079797bfccSYongseok Koh /* Found. */ 4089797bfccSYongseok Koh *entry = ret; 4099797bfccSYongseok Koh return mr; 4109797bfccSYongseok Koh } 4119797bfccSYongseok Koh } 4129797bfccSYongseok Koh } 4139797bfccSYongseok Koh return NULL; 4149797bfccSYongseok Koh } 4159797bfccSYongseok Koh 4169797bfccSYongseok Koh /** 4179797bfccSYongseok Koh * Look up address on device. 4189797bfccSYongseok Koh * 4199797bfccSYongseok Koh * @param dev 4209797bfccSYongseok Koh * Pointer to Ethernet device. 4219797bfccSYongseok Koh * @param[out] entry 4229797bfccSYongseok Koh * Pointer to returning MR cache entry. If no match, this will not be updated. 4239797bfccSYongseok Koh * @param addr 4249797bfccSYongseok Koh * Search key. 4259797bfccSYongseok Koh * 4269797bfccSYongseok Koh * @return 4279797bfccSYongseok Koh * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 4289797bfccSYongseok Koh */ 4299797bfccSYongseok Koh static uint32_t 4309797bfccSYongseok Koh mr_lookup_dev(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, 4319797bfccSYongseok Koh uintptr_t addr) 4329797bfccSYongseok Koh { 433dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 4349797bfccSYongseok Koh uint16_t idx; 4359797bfccSYongseok Koh uint32_t lkey = UINT32_MAX; 4369797bfccSYongseok Koh struct mlx4_mr *mr; 4379797bfccSYongseok Koh 4389797bfccSYongseok Koh /* 4399797bfccSYongseok Koh * If the global cache has overflowed since it failed to expand the 4409797bfccSYongseok Koh * B-tree table, it can't have all the existing MRs. Then, the address 4419797bfccSYongseok Koh * has to be searched by traversing the original MR list instead, which 4429797bfccSYongseok Koh * is very slow path. Otherwise, the global cache is all inclusive. 4439797bfccSYongseok Koh */ 4449797bfccSYongseok Koh if (!unlikely(priv->mr.cache.overflow)) { 4459797bfccSYongseok Koh lkey = mr_btree_lookup(&priv->mr.cache, &idx, addr); 4469797bfccSYongseok Koh if (lkey != UINT32_MAX) 4479797bfccSYongseok Koh *entry = (*priv->mr.cache.table)[idx]; 4489797bfccSYongseok Koh } else { 4499797bfccSYongseok Koh /* Falling back to the slowest path. */ 4509797bfccSYongseok Koh mr = mr_lookup_dev_list(dev, entry, addr); 4519797bfccSYongseok Koh if (mr != NULL) 4529797bfccSYongseok Koh lkey = entry->lkey; 4539797bfccSYongseok Koh } 4548e08df22SAlexander Kozyrev MLX4_ASSERT(lkey == UINT32_MAX || (addr >= entry->start && 4559797bfccSYongseok Koh addr < entry->end)); 4569797bfccSYongseok Koh return lkey; 4579797bfccSYongseok Koh } 4589797bfccSYongseok Koh 4599797bfccSYongseok Koh /** 4609797bfccSYongseok Koh * Free MR resources. MR lock must not be held to avoid a deadlock. rte_free() 4619797bfccSYongseok Koh * can raise memory free event and the callback function will spin on the lock. 4629797bfccSYongseok Koh * 4639797bfccSYongseok Koh * @param mr 4649797bfccSYongseok Koh * Pointer to MR to free. 4659797bfccSYongseok Koh */ 4669797bfccSYongseok Koh static void 4679797bfccSYongseok Koh mr_free(struct mlx4_mr *mr) 4689797bfccSYongseok Koh { 4699797bfccSYongseok Koh if (mr == NULL) 4709797bfccSYongseok Koh return; 4719797bfccSYongseok Koh DEBUG("freeing MR(%p):", (void *)mr); 4729797bfccSYongseok Koh if (mr->ibv_mr != NULL) 4739797bfccSYongseok Koh claim_zero(mlx4_glue->dereg_mr(mr->ibv_mr)); 4749797bfccSYongseok Koh rte_bitmap_free(mr->ms_bmp); 4759797bfccSYongseok Koh rte_free(mr); 4769797bfccSYongseok Koh } 4779797bfccSYongseok Koh 4789797bfccSYongseok Koh /** 479897dbd3cSViacheslav Ovsiienko * Release resources of detached MR having no online entry. 4809797bfccSYongseok Koh * 4819797bfccSYongseok Koh * @param dev 4829797bfccSYongseok Koh * Pointer to Ethernet device. 4839797bfccSYongseok Koh */ 4849797bfccSYongseok Koh static void 4859797bfccSYongseok Koh mlx4_mr_garbage_collect(struct rte_eth_dev *dev) 4869797bfccSYongseok Koh { 487dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 4889797bfccSYongseok Koh struct mlx4_mr *mr_next; 4899797bfccSYongseok Koh struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list); 4909797bfccSYongseok Koh 4910203d33aSYongseok Koh /* Must be called from the primary process. */ 4928e08df22SAlexander Kozyrev MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 4939797bfccSYongseok Koh /* 4949797bfccSYongseok Koh * MR can't be freed with holding the lock because rte_free() could call 4959797bfccSYongseok Koh * memory free callback function. This will be a deadlock situation. 4969797bfccSYongseok Koh */ 4979797bfccSYongseok Koh rte_rwlock_write_lock(&priv->mr.rwlock); 4989797bfccSYongseok Koh /* Detach the whole free list and release it after unlocking. */ 4999797bfccSYongseok Koh free_list = priv->mr.mr_free_list; 5009797bfccSYongseok Koh LIST_INIT(&priv->mr.mr_free_list); 5019797bfccSYongseok Koh rte_rwlock_write_unlock(&priv->mr.rwlock); 5029797bfccSYongseok Koh /* Release resources. */ 5039797bfccSYongseok Koh mr_next = LIST_FIRST(&free_list); 5049797bfccSYongseok Koh while (mr_next != NULL) { 5059797bfccSYongseok Koh struct mlx4_mr *mr = mr_next; 5069797bfccSYongseok Koh 5079797bfccSYongseok Koh mr_next = LIST_NEXT(mr, mr); 5089797bfccSYongseok Koh mr_free(mr); 5099797bfccSYongseok Koh } 5109797bfccSYongseok Koh } 5119797bfccSYongseok Koh 5129797bfccSYongseok Koh /* Called during rte_memseg_contig_walk() by mlx4_mr_create(). */ 5139797bfccSYongseok Koh static int 5149797bfccSYongseok Koh mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, 5159797bfccSYongseok Koh const struct rte_memseg *ms, size_t len, void *arg) 5169797bfccSYongseok Koh { 5179797bfccSYongseok Koh struct mr_find_contig_memsegs_data *data = arg; 5189797bfccSYongseok Koh 5199797bfccSYongseok Koh if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len) 5209797bfccSYongseok Koh return 0; 5219797bfccSYongseok Koh /* Found, save it and stop walking. */ 5229797bfccSYongseok Koh data->start = ms->addr_64; 5239797bfccSYongseok Koh data->end = ms->addr_64 + len; 5249797bfccSYongseok Koh data->msl = msl; 5259797bfccSYongseok Koh return 1; 5269797bfccSYongseok Koh } 5279797bfccSYongseok Koh 5289797bfccSYongseok Koh /** 529897dbd3cSViacheslav Ovsiienko * Create a new global Memory Region (MR) for a missing virtual address. 5300b259b8eSYongseok Koh * This API should be called on a secondary process, then a request is sent to 5310b259b8eSYongseok Koh * the primary process in order to create a MR for the address. As the global MR 5320b259b8eSYongseok Koh * list is on the shared memory, following LKey lookup should succeed unless the 5330b259b8eSYongseok Koh * request fails. 5349797bfccSYongseok Koh * 5359797bfccSYongseok Koh * @param dev 5369797bfccSYongseok Koh * Pointer to Ethernet device. 5379797bfccSYongseok Koh * @param[out] entry 5389797bfccSYongseok Koh * Pointer to returning MR cache entry, found in the global cache or newly 5399797bfccSYongseok Koh * created. If failed to create one, this will not be updated. 5409797bfccSYongseok Koh * @param addr 5419797bfccSYongseok Koh * Target virtual address to register. 5429797bfccSYongseok Koh * 5439797bfccSYongseok Koh * @return 5449797bfccSYongseok Koh * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 5459797bfccSYongseok Koh */ 5469797bfccSYongseok Koh static uint32_t 5470b259b8eSYongseok Koh mlx4_mr_create_secondary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, 5480b259b8eSYongseok Koh uintptr_t addr) 5490b259b8eSYongseok Koh { 5500b259b8eSYongseok Koh struct mlx4_priv *priv = dev->data->dev_private; 5510b259b8eSYongseok Koh int ret; 5520b259b8eSYongseok Koh 5530b259b8eSYongseok Koh DEBUG("port %u requesting MR creation for address (%p)", 5540b259b8eSYongseok Koh dev->data->port_id, (void *)addr); 5550b259b8eSYongseok Koh ret = mlx4_mp_req_mr_create(dev, addr); 5560b259b8eSYongseok Koh if (ret) { 5570b259b8eSYongseok Koh DEBUG("port %u fail to request MR creation for address (%p)", 5580b259b8eSYongseok Koh dev->data->port_id, (void *)addr); 5590b259b8eSYongseok Koh return UINT32_MAX; 5600b259b8eSYongseok Koh } 5610b259b8eSYongseok Koh rte_rwlock_read_lock(&priv->mr.rwlock); 5620b259b8eSYongseok Koh /* Fill in output data. */ 5630b259b8eSYongseok Koh mr_lookup_dev(dev, entry, addr); 5640b259b8eSYongseok Koh /* Lookup can't fail. */ 5658e08df22SAlexander Kozyrev MLX4_ASSERT(entry->lkey != UINT32_MAX); 5660b259b8eSYongseok Koh rte_rwlock_read_unlock(&priv->mr.rwlock); 567*1af8b0b2SDavid Marchand DEBUG("port %u MR CREATED by primary process for %p:", 568*1af8b0b2SDavid Marchand dev->data->port_id, (void *)addr); 569*1af8b0b2SDavid Marchand DEBUG(" [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", 5700b259b8eSYongseok Koh entry->start, entry->end, entry->lkey); 5710b259b8eSYongseok Koh return entry->lkey; 5720b259b8eSYongseok Koh } 5730b259b8eSYongseok Koh 5740b259b8eSYongseok Koh /** 575897dbd3cSViacheslav Ovsiienko * Create a new global Memory Region (MR) for a missing virtual address. 5760b259b8eSYongseok Koh * Register entire virtually contiguous memory chunk around the address. 5770b259b8eSYongseok Koh * This must be called from the primary process. 5780b259b8eSYongseok Koh * 5790b259b8eSYongseok Koh * @param dev 5800b259b8eSYongseok Koh * Pointer to Ethernet device. 5810b259b8eSYongseok Koh * @param[out] entry 5820b259b8eSYongseok Koh * Pointer to returning MR cache entry, found in the global cache or newly 5830b259b8eSYongseok Koh * created. If failed to create one, this will not be updated. 5840b259b8eSYongseok Koh * @param addr 5850b259b8eSYongseok Koh * Target virtual address to register. 5860b259b8eSYongseok Koh * 5870b259b8eSYongseok Koh * @return 5880b259b8eSYongseok Koh * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 5890b259b8eSYongseok Koh */ 5900b259b8eSYongseok Koh uint32_t 5910b259b8eSYongseok Koh mlx4_mr_create_primary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, 5929797bfccSYongseok Koh uintptr_t addr) 5939797bfccSYongseok Koh { 594dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 5959797bfccSYongseok Koh const struct rte_memseg_list *msl; 5969797bfccSYongseok Koh const struct rte_memseg *ms; 5979797bfccSYongseok Koh struct mlx4_mr *mr = NULL; 5989797bfccSYongseok Koh size_t len; 5999797bfccSYongseok Koh uint32_t ms_n; 6009797bfccSYongseok Koh uint32_t bmp_size; 6019797bfccSYongseok Koh void *bmp_mem; 6029797bfccSYongseok Koh int ms_idx_shift = -1; 6039797bfccSYongseok Koh unsigned int n; 6049797bfccSYongseok Koh struct mr_find_contig_memsegs_data data = { 6059797bfccSYongseok Koh .addr = addr, 6069797bfccSYongseok Koh }; 6079797bfccSYongseok Koh struct mr_find_contig_memsegs_data data_re; 6089797bfccSYongseok Koh 6099797bfccSYongseok Koh DEBUG("port %u creating a MR using address (%p)", 6109797bfccSYongseok Koh dev->data->port_id, (void *)addr); 6119797bfccSYongseok Koh /* 6129797bfccSYongseok Koh * Release detached MRs if any. This can't be called with holding either 6139797bfccSYongseok Koh * memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have 6149797bfccSYongseok Koh * been detached by the memory free event but it couldn't be released 6159797bfccSYongseok Koh * inside the callback due to deadlock. As a result, releasing resources 6169797bfccSYongseok Koh * is quite opportunistic. 6179797bfccSYongseok Koh */ 6189797bfccSYongseok Koh mlx4_mr_garbage_collect(dev); 6199797bfccSYongseok Koh /* 620f4efc0ebSYongseok Koh * If enabled, find out a contiguous virtual address chunk in use, to 621f4efc0ebSYongseok Koh * which the given address belongs, in order to register maximum range. 622f4efc0ebSYongseok Koh * In the best case where mempools are not dynamically recreated and 62396c0cc17SAli Alnubani * '--socket-mem' is specified as an EAL option, it is very likely to 6249797bfccSYongseok Koh * have only one MR(LKey) per a socket and per a hugepage-size even 625f4efc0ebSYongseok Koh * though the system memory is highly fragmented. As the whole memory 626f4efc0ebSYongseok Koh * chunk will be pinned by kernel, it can't be reused unless entire 627f4efc0ebSYongseok Koh * chunk is freed from EAL. 628f4efc0ebSYongseok Koh * 629f4efc0ebSYongseok Koh * If disabled, just register one memseg (page). Then, memory 630f4efc0ebSYongseok Koh * consumption will be minimized but it may drop performance if there 631f4efc0ebSYongseok Koh * are many MRs to lookup on the datapath. 6329797bfccSYongseok Koh */ 633f4efc0ebSYongseok Koh if (!priv->mr_ext_memseg_en) { 634f4efc0ebSYongseok Koh data.msl = rte_mem_virt2memseg_list((void *)addr); 635f4efc0ebSYongseok Koh data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); 636f4efc0ebSYongseok Koh data.end = data.start + data.msl->page_sz; 637f4efc0ebSYongseok Koh } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { 6389797bfccSYongseok Koh WARN("port %u unable to find virtually contiguous" 6399797bfccSYongseok Koh " chunk for address (%p)." 6409797bfccSYongseok Koh " rte_memseg_contig_walk() failed.", 6419797bfccSYongseok Koh dev->data->port_id, (void *)addr); 6429797bfccSYongseok Koh rte_errno = ENXIO; 6439797bfccSYongseok Koh goto err_nolock; 6449797bfccSYongseok Koh } 6459797bfccSYongseok Koh alloc_resources: 6469797bfccSYongseok Koh /* Addresses must be page-aligned. */ 6478e08df22SAlexander Kozyrev MLX4_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz)); 6488e08df22SAlexander Kozyrev MLX4_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz)); 6499797bfccSYongseok Koh msl = data.msl; 6509797bfccSYongseok Koh ms = rte_mem_virt2memseg((void *)data.start, msl); 6519797bfccSYongseok Koh len = data.end - data.start; 6528e08df22SAlexander Kozyrev MLX4_ASSERT(msl->page_sz == ms->hugepage_sz); 6539797bfccSYongseok Koh /* Number of memsegs in the range. */ 6549797bfccSYongseok Koh ms_n = len / msl->page_sz; 6559797bfccSYongseok Koh DEBUG("port %u extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 6569797bfccSYongseok Koh " page_sz=0x%" PRIx64 ", ms_n=%u", 6579797bfccSYongseok Koh dev->data->port_id, (void *)addr, 6589797bfccSYongseok Koh data.start, data.end, msl->page_sz, ms_n); 6599797bfccSYongseok Koh /* Size of memory for bitmap. */ 6609797bfccSYongseok Koh bmp_size = rte_bitmap_get_memory_footprint(ms_n); 6619797bfccSYongseok Koh mr = rte_zmalloc_socket(NULL, 6629797bfccSYongseok Koh RTE_ALIGN_CEIL(sizeof(*mr), 6639797bfccSYongseok Koh RTE_CACHE_LINE_SIZE) + 6649797bfccSYongseok Koh bmp_size, 6659797bfccSYongseok Koh RTE_CACHE_LINE_SIZE, msl->socket_id); 6669797bfccSYongseok Koh if (mr == NULL) { 6679797bfccSYongseok Koh WARN("port %u unable to allocate memory for a new MR of" 6689797bfccSYongseok Koh " address (%p).", 6699797bfccSYongseok Koh dev->data->port_id, (void *)addr); 6709797bfccSYongseok Koh rte_errno = ENOMEM; 6719797bfccSYongseok Koh goto err_nolock; 6729797bfccSYongseok Koh } 6739797bfccSYongseok Koh mr->msl = msl; 6749797bfccSYongseok Koh /* 6759797bfccSYongseok Koh * Save the index of the first memseg and initialize memseg bitmap. To 6769797bfccSYongseok Koh * see if a memseg of ms_idx in the memseg-list is still valid, check: 6779797bfccSYongseok Koh * rte_bitmap_get(mr->bmp, ms_idx - mr->ms_base_idx) 6789797bfccSYongseok Koh */ 6799797bfccSYongseok Koh mr->ms_base_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 6809797bfccSYongseok Koh bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE); 6819797bfccSYongseok Koh mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size); 6829797bfccSYongseok Koh if (mr->ms_bmp == NULL) { 683897dbd3cSViacheslav Ovsiienko WARN("port %u unable to initialize bitmap for a new MR of" 6849797bfccSYongseok Koh " address (%p).", 6859797bfccSYongseok Koh dev->data->port_id, (void *)addr); 6869797bfccSYongseok Koh rte_errno = EINVAL; 6879797bfccSYongseok Koh goto err_nolock; 6889797bfccSYongseok Koh } 6899797bfccSYongseok Koh /* 6909797bfccSYongseok Koh * Should recheck whether the extended contiguous chunk is still valid. 6919797bfccSYongseok Koh * Because memory_hotplug_lock can't be held if there's any memory 6929797bfccSYongseok Koh * related calls in a critical path, resource allocation above can't be 6939797bfccSYongseok Koh * locked. If the memory has been changed at this point, try again with 6949797bfccSYongseok Koh * just single page. If not, go on with the big chunk atomically from 6959797bfccSYongseok Koh * here. 6969797bfccSYongseok Koh */ 69776f80881SAnatoly Burakov rte_mcfg_mem_read_lock(); 6989797bfccSYongseok Koh data_re = data; 6999797bfccSYongseok Koh if (len > msl->page_sz && 7009797bfccSYongseok Koh !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) { 7019797bfccSYongseok Koh WARN("port %u unable to find virtually contiguous" 7029797bfccSYongseok Koh " chunk for address (%p)." 7039797bfccSYongseok Koh " rte_memseg_contig_walk() failed.", 7049797bfccSYongseok Koh dev->data->port_id, (void *)addr); 7059797bfccSYongseok Koh rte_errno = ENXIO; 7069797bfccSYongseok Koh goto err_memlock; 7079797bfccSYongseok Koh } 7089797bfccSYongseok Koh if (data.start != data_re.start || data.end != data_re.end) { 7099797bfccSYongseok Koh /* 7109797bfccSYongseok Koh * The extended contiguous chunk has been changed. Try again 7119797bfccSYongseok Koh * with single memseg instead. 7129797bfccSYongseok Koh */ 7139797bfccSYongseok Koh data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz); 7149797bfccSYongseok Koh data.end = data.start + msl->page_sz; 71576f80881SAnatoly Burakov rte_mcfg_mem_read_unlock(); 7169797bfccSYongseok Koh mr_free(mr); 7179797bfccSYongseok Koh goto alloc_resources; 7189797bfccSYongseok Koh } 7198e08df22SAlexander Kozyrev MLX4_ASSERT(data.msl == data_re.msl); 7209797bfccSYongseok Koh rte_rwlock_write_lock(&priv->mr.rwlock); 7219797bfccSYongseok Koh /* 7229797bfccSYongseok Koh * Check the address is really missing. If other thread already created 7239797bfccSYongseok Koh * one or it is not found due to overflow, abort and return. 7249797bfccSYongseok Koh */ 7259797bfccSYongseok Koh if (mr_lookup_dev(dev, entry, addr) != UINT32_MAX) { 7269797bfccSYongseok Koh /* 7279797bfccSYongseok Koh * Insert to the global cache table. It may fail due to 7289797bfccSYongseok Koh * low-on-memory. Then, this entry will have to be searched 7299797bfccSYongseok Koh * here again. 7309797bfccSYongseok Koh */ 7319797bfccSYongseok Koh mr_btree_insert(&priv->mr.cache, entry); 7329797bfccSYongseok Koh DEBUG("port %u found MR for %p on final lookup, abort", 7339797bfccSYongseok Koh dev->data->port_id, (void *)addr); 7349797bfccSYongseok Koh rte_rwlock_write_unlock(&priv->mr.rwlock); 73576f80881SAnatoly Burakov rte_mcfg_mem_read_unlock(); 7369797bfccSYongseok Koh /* 7379797bfccSYongseok Koh * Must be unlocked before calling rte_free() because 7389797bfccSYongseok Koh * mlx4_mr_mem_event_free_cb() can be called inside. 7399797bfccSYongseok Koh */ 7409797bfccSYongseok Koh mr_free(mr); 7419797bfccSYongseok Koh return entry->lkey; 7429797bfccSYongseok Koh } 7439797bfccSYongseok Koh /* 7449797bfccSYongseok Koh * Trim start and end addresses for verbs MR. Set bits for registering 7459797bfccSYongseok Koh * memsegs but exclude already registered ones. Bitmap can be 7469797bfccSYongseok Koh * fragmented. 7479797bfccSYongseok Koh */ 7489797bfccSYongseok Koh for (n = 0; n < ms_n; ++n) { 7499797bfccSYongseok Koh uintptr_t start; 750d924d6b9SAli Alnubani struct mlx4_mr_cache ret; 7519797bfccSYongseok Koh 752d924d6b9SAli Alnubani memset(&ret, 0, sizeof(ret)); 7539797bfccSYongseok Koh start = data_re.start + n * msl->page_sz; 7549797bfccSYongseok Koh /* Exclude memsegs already registered by other MRs. */ 7559797bfccSYongseok Koh if (mr_lookup_dev(dev, &ret, start) == UINT32_MAX) { 7569797bfccSYongseok Koh /* 7579797bfccSYongseok Koh * Start from the first unregistered memseg in the 7589797bfccSYongseok Koh * extended range. 7599797bfccSYongseok Koh */ 7609797bfccSYongseok Koh if (ms_idx_shift == -1) { 7619797bfccSYongseok Koh mr->ms_base_idx += n; 7629797bfccSYongseok Koh data.start = start; 7639797bfccSYongseok Koh ms_idx_shift = n; 7649797bfccSYongseok Koh } 7659797bfccSYongseok Koh data.end = start + msl->page_sz; 7669797bfccSYongseok Koh rte_bitmap_set(mr->ms_bmp, n - ms_idx_shift); 7679797bfccSYongseok Koh ++mr->ms_n; 7689797bfccSYongseok Koh } 7699797bfccSYongseok Koh } 7709797bfccSYongseok Koh len = data.end - data.start; 7719797bfccSYongseok Koh mr->ms_bmp_n = len / msl->page_sz; 7728e08df22SAlexander Kozyrev MLX4_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n); 7739797bfccSYongseok Koh /* 7749797bfccSYongseok Koh * Finally create a verbs MR for the memory chunk. ibv_reg_mr() can be 7759797bfccSYongseok Koh * called with holding the memory lock because it doesn't use 7769797bfccSYongseok Koh * mlx4_alloc_buf_extern() which eventually calls rte_malloc_socket() 7779797bfccSYongseok Koh * through mlx4_alloc_verbs_buf(). 7789797bfccSYongseok Koh */ 7799797bfccSYongseok Koh mr->ibv_mr = mlx4_glue->reg_mr(priv->pd, (void *)data.start, len, 7809797bfccSYongseok Koh IBV_ACCESS_LOCAL_WRITE); 7819797bfccSYongseok Koh if (mr->ibv_mr == NULL) { 7829797bfccSYongseok Koh WARN("port %u fail to create a verbs MR for address (%p)", 7839797bfccSYongseok Koh dev->data->port_id, (void *)addr); 7849797bfccSYongseok Koh rte_errno = EINVAL; 7859797bfccSYongseok Koh goto err_mrlock; 7869797bfccSYongseok Koh } 7878e08df22SAlexander Kozyrev MLX4_ASSERT((uintptr_t)mr->ibv_mr->addr == data.start); 7888e08df22SAlexander Kozyrev MLX4_ASSERT(mr->ibv_mr->length == len); 7899797bfccSYongseok Koh LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr); 790*1af8b0b2SDavid Marchand DEBUG("port %u MR CREATED (%p) for %p:", 791*1af8b0b2SDavid Marchand dev->data->port_id, (void *)mr, (void *)addr); 792*1af8b0b2SDavid Marchand DEBUG(" [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 7939797bfccSYongseok Koh " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 7949797bfccSYongseok Koh data.start, data.end, rte_cpu_to_be_32(mr->ibv_mr->lkey), 7959797bfccSYongseok Koh mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 7969797bfccSYongseok Koh /* Insert to the global cache table. */ 7979797bfccSYongseok Koh mr_insert_dev_cache(dev, mr); 7989797bfccSYongseok Koh /* Fill in output data. */ 7999797bfccSYongseok Koh mr_lookup_dev(dev, entry, addr); 8009797bfccSYongseok Koh /* Lookup can't fail. */ 8018e08df22SAlexander Kozyrev MLX4_ASSERT(entry->lkey != UINT32_MAX); 8029797bfccSYongseok Koh rte_rwlock_write_unlock(&priv->mr.rwlock); 80376f80881SAnatoly Burakov rte_mcfg_mem_read_unlock(); 8049797bfccSYongseok Koh return entry->lkey; 8059797bfccSYongseok Koh err_mrlock: 8069797bfccSYongseok Koh rte_rwlock_write_unlock(&priv->mr.rwlock); 8079797bfccSYongseok Koh err_memlock: 80876f80881SAnatoly Burakov rte_mcfg_mem_read_unlock(); 8099797bfccSYongseok Koh err_nolock: 8109797bfccSYongseok Koh /* 8119797bfccSYongseok Koh * In case of error, as this can be called in a datapath, a warning 8129797bfccSYongseok Koh * message per an error is preferable instead. Must be unlocked before 8139797bfccSYongseok Koh * calling rte_free() because mlx4_mr_mem_event_free_cb() can be called 8149797bfccSYongseok Koh * inside. 8159797bfccSYongseok Koh */ 8169797bfccSYongseok Koh mr_free(mr); 8179797bfccSYongseok Koh return UINT32_MAX; 8189797bfccSYongseok Koh } 8199797bfccSYongseok Koh 8209797bfccSYongseok Koh /** 821897dbd3cSViacheslav Ovsiienko * Create a new global Memory Region (MR) for a missing virtual address. 8220b259b8eSYongseok Koh * This can be called from primary and secondary process. 8230b259b8eSYongseok Koh * 8240b259b8eSYongseok Koh * @param dev 8250b259b8eSYongseok Koh * Pointer to Ethernet device. 8260b259b8eSYongseok Koh * @param[out] entry 8270b259b8eSYongseok Koh * Pointer to returning MR cache entry, found in the global cache or newly 8280b259b8eSYongseok Koh * created. If failed to create one, this will not be updated. 8290b259b8eSYongseok Koh * @param addr 8300b259b8eSYongseok Koh * Target virtual address to register. 8310b259b8eSYongseok Koh * 8320b259b8eSYongseok Koh * @return 8330b259b8eSYongseok Koh * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. 8340b259b8eSYongseok Koh */ 8350b259b8eSYongseok Koh static uint32_t 8360b259b8eSYongseok Koh mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, 8370b259b8eSYongseok Koh uintptr_t addr) 8380b259b8eSYongseok Koh { 8390b259b8eSYongseok Koh uint32_t ret = 0; 8400b259b8eSYongseok Koh 8410b259b8eSYongseok Koh switch (rte_eal_process_type()) { 8420b259b8eSYongseok Koh case RTE_PROC_PRIMARY: 8430b259b8eSYongseok Koh ret = mlx4_mr_create_primary(dev, entry, addr); 8440b259b8eSYongseok Koh break; 8450b259b8eSYongseok Koh case RTE_PROC_SECONDARY: 8460b259b8eSYongseok Koh ret = mlx4_mr_create_secondary(dev, entry, addr); 8470b259b8eSYongseok Koh break; 8480b259b8eSYongseok Koh default: 8490b259b8eSYongseok Koh break; 8500b259b8eSYongseok Koh } 8510b259b8eSYongseok Koh return ret; 8520b259b8eSYongseok Koh } 8530b259b8eSYongseok Koh 8540b259b8eSYongseok Koh /** 8559797bfccSYongseok Koh * Rebuild the global B-tree cache of device from the original MR list. 8569797bfccSYongseok Koh * 8579797bfccSYongseok Koh * @param dev 8589797bfccSYongseok Koh * Pointer to Ethernet device. 8599797bfccSYongseok Koh */ 8609797bfccSYongseok Koh static void 8619797bfccSYongseok Koh mr_rebuild_dev_cache(struct rte_eth_dev *dev) 8629797bfccSYongseok Koh { 863dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 8649797bfccSYongseok Koh struct mlx4_mr *mr; 8659797bfccSYongseok Koh 8669797bfccSYongseok Koh DEBUG("port %u rebuild dev cache[]", dev->data->port_id); 8679797bfccSYongseok Koh /* Flush cache to rebuild. */ 8689797bfccSYongseok Koh priv->mr.cache.len = 1; 8699797bfccSYongseok Koh priv->mr.cache.overflow = 0; 8709797bfccSYongseok Koh /* Iterate all the existing MRs. */ 8719797bfccSYongseok Koh LIST_FOREACH(mr, &priv->mr.mr_list, mr) 8729797bfccSYongseok Koh if (mr_insert_dev_cache(dev, mr) < 0) 8739797bfccSYongseok Koh return; 8749797bfccSYongseok Koh } 8759797bfccSYongseok Koh 8769797bfccSYongseok Koh /** 8779797bfccSYongseok Koh * Callback for memory free event. Iterate freed memsegs and check whether it 8789797bfccSYongseok Koh * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a 8799797bfccSYongseok Koh * result, the MR would be fragmented. If it becomes empty, the MR will be freed 8809797bfccSYongseok Koh * later by mlx4_mr_garbage_collect(). 8819797bfccSYongseok Koh * 8829797bfccSYongseok Koh * The global cache must be rebuilt if there's any change and this event has to 8839797bfccSYongseok Koh * be propagated to dataplane threads to flush the local caches. 8849797bfccSYongseok Koh * 8859797bfccSYongseok Koh * @param dev 8869797bfccSYongseok Koh * Pointer to Ethernet device. 8879797bfccSYongseok Koh * @param addr 8889797bfccSYongseok Koh * Address of freed memory. 8899797bfccSYongseok Koh * @param len 8909797bfccSYongseok Koh * Size of freed memory. 8919797bfccSYongseok Koh */ 8929797bfccSYongseok Koh static void 8939797bfccSYongseok Koh mlx4_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len) 8949797bfccSYongseok Koh { 895dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 8969797bfccSYongseok Koh const struct rte_memseg_list *msl; 8979797bfccSYongseok Koh struct mlx4_mr *mr; 8989797bfccSYongseok Koh int ms_n; 8999797bfccSYongseok Koh int i; 9009797bfccSYongseok Koh int rebuild = 0; 9019797bfccSYongseok Koh 9029797bfccSYongseok Koh DEBUG("port %u free callback: addr=%p, len=%zu", 9039797bfccSYongseok Koh dev->data->port_id, addr, len); 9049797bfccSYongseok Koh msl = rte_mem_virt2memseg_list(addr); 9059797bfccSYongseok Koh /* addr and len must be page-aligned. */ 9068e08df22SAlexander Kozyrev MLX4_ASSERT((uintptr_t)addr == 9078e08df22SAlexander Kozyrev RTE_ALIGN((uintptr_t)addr, msl->page_sz)); 9088e08df22SAlexander Kozyrev MLX4_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); 9099797bfccSYongseok Koh ms_n = len / msl->page_sz; 9109797bfccSYongseok Koh rte_rwlock_write_lock(&priv->mr.rwlock); 9119797bfccSYongseok Koh /* Clear bits of freed memsegs from MR. */ 9129797bfccSYongseok Koh for (i = 0; i < ms_n; ++i) { 9139797bfccSYongseok Koh const struct rte_memseg *ms; 9149797bfccSYongseok Koh struct mlx4_mr_cache entry; 9159797bfccSYongseok Koh uintptr_t start; 9169797bfccSYongseok Koh int ms_idx; 9179797bfccSYongseok Koh uint32_t pos; 9189797bfccSYongseok Koh 9199797bfccSYongseok Koh /* Find MR having this memseg. */ 9209797bfccSYongseok Koh start = (uintptr_t)addr + i * msl->page_sz; 9219797bfccSYongseok Koh mr = mr_lookup_dev_list(dev, &entry, start); 9229797bfccSYongseok Koh if (mr == NULL) 9239797bfccSYongseok Koh continue; 9248e08df22SAlexander Kozyrev MLX4_ASSERT(mr->msl); /* Can't be external memory. */ 9259797bfccSYongseok Koh ms = rte_mem_virt2memseg((void *)start, msl); 9268e08df22SAlexander Kozyrev MLX4_ASSERT(ms != NULL); 9278e08df22SAlexander Kozyrev MLX4_ASSERT(msl->page_sz == ms->hugepage_sz); 9289797bfccSYongseok Koh ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); 9299797bfccSYongseok Koh pos = ms_idx - mr->ms_base_idx; 9308e08df22SAlexander Kozyrev MLX4_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); 9318e08df22SAlexander Kozyrev MLX4_ASSERT(pos < mr->ms_bmp_n); 9329797bfccSYongseok Koh DEBUG("port %u MR(%p): clear bitmap[%u] for addr %p", 9339797bfccSYongseok Koh dev->data->port_id, (void *)mr, pos, (void *)start); 9349797bfccSYongseok Koh rte_bitmap_clear(mr->ms_bmp, pos); 9359797bfccSYongseok Koh if (--mr->ms_n == 0) { 9369797bfccSYongseok Koh LIST_REMOVE(mr, mr); 9379797bfccSYongseok Koh LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr); 9389797bfccSYongseok Koh DEBUG("port %u remove MR(%p) from list", 9399797bfccSYongseok Koh dev->data->port_id, (void *)mr); 9409797bfccSYongseok Koh } 9419797bfccSYongseok Koh /* 9429797bfccSYongseok Koh * MR is fragmented or will be freed. the global cache must be 9439797bfccSYongseok Koh * rebuilt. 9449797bfccSYongseok Koh */ 9459797bfccSYongseok Koh rebuild = 1; 9469797bfccSYongseok Koh } 9479797bfccSYongseok Koh if (rebuild) { 9489797bfccSYongseok Koh mr_rebuild_dev_cache(dev); 9499797bfccSYongseok Koh /* 950f0f7c557SFeifei Wang * No explicit wmb is needed after updating dev_gen due to 951f0f7c557SFeifei Wang * store-release ordering in unlock that provides the 952f0f7c557SFeifei Wang * implicit barrier at the software visible level. 9539797bfccSYongseok Koh */ 9549797bfccSYongseok Koh ++priv->mr.dev_gen; 9559797bfccSYongseok Koh DEBUG("broadcasting local cache flush, gen=%d", 9569797bfccSYongseok Koh priv->mr.dev_gen); 9579797bfccSYongseok Koh } 9589797bfccSYongseok Koh rte_rwlock_write_unlock(&priv->mr.rwlock); 959e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG 9609797bfccSYongseok Koh if (rebuild) 9619797bfccSYongseok Koh mlx4_mr_dump_dev(dev); 9629797bfccSYongseok Koh #endif 9639797bfccSYongseok Koh } 9649797bfccSYongseok Koh 9659797bfccSYongseok Koh /** 9669797bfccSYongseok Koh * Callback for memory event. 9679797bfccSYongseok Koh * 9689797bfccSYongseok Koh * @param event_type 9699797bfccSYongseok Koh * Memory event type. 9709797bfccSYongseok Koh * @param addr 9719797bfccSYongseok Koh * Address of memory. 9729797bfccSYongseok Koh * @param len 9739797bfccSYongseok Koh * Size of memory. 9749797bfccSYongseok Koh */ 9759797bfccSYongseok Koh void 9769797bfccSYongseok Koh mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, 9779797bfccSYongseok Koh size_t len, void *arg __rte_unused) 9789797bfccSYongseok Koh { 979dbeba4cfSThomas Monjalon struct mlx4_priv *priv; 9800203d33aSYongseok Koh struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list; 9819797bfccSYongseok Koh 9820203d33aSYongseok Koh /* Must be called from the primary process. */ 9838e08df22SAlexander Kozyrev MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 9849797bfccSYongseok Koh switch (event_type) { 9859797bfccSYongseok Koh case RTE_MEM_EVENT_FREE: 9860203d33aSYongseok Koh rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock); 9879797bfccSYongseok Koh /* Iterate all the existing mlx4 devices. */ 9880203d33aSYongseok Koh LIST_FOREACH(priv, dev_list, mem_event_cb) 989099c2c53SYongseok Koh mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len); 9900203d33aSYongseok Koh rte_rwlock_read_unlock(&mlx4_shared_data->mem_event_rwlock); 9919797bfccSYongseok Koh break; 9929797bfccSYongseok Koh case RTE_MEM_EVENT_ALLOC: 9939797bfccSYongseok Koh default: 9949797bfccSYongseok Koh break; 9959797bfccSYongseok Koh } 9969797bfccSYongseok Koh } 9979797bfccSYongseok Koh 9989797bfccSYongseok Koh /** 9999797bfccSYongseok Koh * Look up address in the global MR cache table. If not found, create a new MR. 10009797bfccSYongseok Koh * Insert the found/created entry to local bottom-half cache table. 10019797bfccSYongseok Koh * 10029797bfccSYongseok Koh * @param dev 10039797bfccSYongseok Koh * Pointer to Ethernet device. 10049797bfccSYongseok Koh * @param mr_ctrl 10059797bfccSYongseok Koh * Pointer to per-queue MR control structure. 10069797bfccSYongseok Koh * @param[out] entry 10079797bfccSYongseok Koh * Pointer to returning MR cache entry, found in the global cache or newly 10089797bfccSYongseok Koh * created. If failed to create one, this is not written. 10099797bfccSYongseok Koh * @param addr 10109797bfccSYongseok Koh * Search key. 10119797bfccSYongseok Koh * 10129797bfccSYongseok Koh * @return 10139797bfccSYongseok Koh * Searched LKey on success, UINT32_MAX on no match. 10149797bfccSYongseok Koh */ 10159797bfccSYongseok Koh static uint32_t 10169797bfccSYongseok Koh mlx4_mr_lookup_dev(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl, 10179797bfccSYongseok Koh struct mlx4_mr_cache *entry, uintptr_t addr) 10189797bfccSYongseok Koh { 1019dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 10209797bfccSYongseok Koh struct mlx4_mr_btree *bt = &mr_ctrl->cache_bh; 10219797bfccSYongseok Koh uint16_t idx; 10229797bfccSYongseok Koh uint32_t lkey; 10239797bfccSYongseok Koh 10249797bfccSYongseok Koh /* If local cache table is full, try to double it. */ 10259797bfccSYongseok Koh if (unlikely(bt->len == bt->size)) 10269797bfccSYongseok Koh mr_btree_expand(bt, bt->size << 1); 10279797bfccSYongseok Koh /* Look up in the global cache. */ 10289797bfccSYongseok Koh rte_rwlock_read_lock(&priv->mr.rwlock); 10299797bfccSYongseok Koh lkey = mr_btree_lookup(&priv->mr.cache, &idx, addr); 10309797bfccSYongseok Koh if (lkey != UINT32_MAX) { 10319797bfccSYongseok Koh /* Found. */ 10329797bfccSYongseok Koh *entry = (*priv->mr.cache.table)[idx]; 10339797bfccSYongseok Koh rte_rwlock_read_unlock(&priv->mr.rwlock); 10349797bfccSYongseok Koh /* 10359797bfccSYongseok Koh * Update local cache. Even if it fails, return the found entry 10369797bfccSYongseok Koh * to update top-half cache. Next time, this entry will be found 10379797bfccSYongseok Koh * in the global cache. 10389797bfccSYongseok Koh */ 10399797bfccSYongseok Koh mr_btree_insert(bt, entry); 10409797bfccSYongseok Koh return lkey; 10419797bfccSYongseok Koh } 10429797bfccSYongseok Koh rte_rwlock_read_unlock(&priv->mr.rwlock); 10439797bfccSYongseok Koh /* First time to see the address? Create a new MR. */ 10449797bfccSYongseok Koh lkey = mlx4_mr_create(dev, entry, addr); 10459797bfccSYongseok Koh /* 10469797bfccSYongseok Koh * Update the local cache if successfully created a new global MR. Even 10479797bfccSYongseok Koh * if failed to create one, there's no action to take in this datapath 10489797bfccSYongseok Koh * code. As returning LKey is invalid, this will eventually make HW 10499797bfccSYongseok Koh * fail. 10509797bfccSYongseok Koh */ 10519797bfccSYongseok Koh if (lkey != UINT32_MAX) 10529797bfccSYongseok Koh mr_btree_insert(bt, entry); 10539797bfccSYongseok Koh return lkey; 10549797bfccSYongseok Koh } 10559797bfccSYongseok Koh 10569797bfccSYongseok Koh /** 10579797bfccSYongseok Koh * Bottom-half of LKey search on datapath. Firstly search in cache_bh[] and if 10589797bfccSYongseok Koh * misses, search in the global MR cache table and update the new entry to 10599797bfccSYongseok Koh * per-queue local caches. 10609797bfccSYongseok Koh * 10619797bfccSYongseok Koh * @param dev 10629797bfccSYongseok Koh * Pointer to Ethernet device. 10639797bfccSYongseok Koh * @param mr_ctrl 10649797bfccSYongseok Koh * Pointer to per-queue MR control structure. 10659797bfccSYongseok Koh * @param addr 10669797bfccSYongseok Koh * Search key. 10679797bfccSYongseok Koh * 10689797bfccSYongseok Koh * @return 10699797bfccSYongseok Koh * Searched LKey on success, UINT32_MAX on no match. 10709797bfccSYongseok Koh */ 10719797bfccSYongseok Koh static uint32_t 10729797bfccSYongseok Koh mlx4_mr_addr2mr_bh(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl, 10739797bfccSYongseok Koh uintptr_t addr) 10749797bfccSYongseok Koh { 10759797bfccSYongseok Koh uint32_t lkey; 10769797bfccSYongseok Koh uint16_t bh_idx = 0; 10779797bfccSYongseok Koh /* Victim in top-half cache to replace with new entry. */ 10789797bfccSYongseok Koh struct mlx4_mr_cache *repl = &mr_ctrl->cache[mr_ctrl->head]; 10799797bfccSYongseok Koh 10809797bfccSYongseok Koh /* Binary-search MR translation table. */ 10819797bfccSYongseok Koh lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr); 10829797bfccSYongseok Koh /* Update top-half cache. */ 10839797bfccSYongseok Koh if (likely(lkey != UINT32_MAX)) { 10849797bfccSYongseok Koh *repl = (*mr_ctrl->cache_bh.table)[bh_idx]; 10859797bfccSYongseok Koh } else { 10869797bfccSYongseok Koh /* 10879797bfccSYongseok Koh * If missed in local lookup table, search in the global cache 10889797bfccSYongseok Koh * and local cache_bh[] will be updated inside if possible. 10899797bfccSYongseok Koh * Top-half cache entry will also be updated. 10909797bfccSYongseok Koh */ 10919797bfccSYongseok Koh lkey = mlx4_mr_lookup_dev(dev, mr_ctrl, repl, addr); 10929797bfccSYongseok Koh if (unlikely(lkey == UINT32_MAX)) 10939797bfccSYongseok Koh return UINT32_MAX; 10949797bfccSYongseok Koh } 10959797bfccSYongseok Koh /* Update the most recently used entry. */ 10969797bfccSYongseok Koh mr_ctrl->mru = mr_ctrl->head; 10979797bfccSYongseok Koh /* Point to the next victim, the oldest. */ 10989797bfccSYongseok Koh mr_ctrl->head = (mr_ctrl->head + 1) % MLX4_MR_CACHE_N; 10999797bfccSYongseok Koh return lkey; 11009797bfccSYongseok Koh } 11019797bfccSYongseok Koh 11029797bfccSYongseok Koh /** 11039797bfccSYongseok Koh * Bottom-half of LKey search on Rx. 11049797bfccSYongseok Koh * 11059797bfccSYongseok Koh * @param rxq 11069797bfccSYongseok Koh * Pointer to Rx queue structure. 11079797bfccSYongseok Koh * @param addr 11089797bfccSYongseok Koh * Search key. 11099797bfccSYongseok Koh * 11109797bfccSYongseok Koh * @return 11119797bfccSYongseok Koh * Searched LKey on success, UINT32_MAX on no match. 11129797bfccSYongseok Koh */ 11139797bfccSYongseok Koh uint32_t 11149797bfccSYongseok Koh mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr) 11159797bfccSYongseok Koh { 11169797bfccSYongseok Koh struct mlx4_mr_ctrl *mr_ctrl = &rxq->mr_ctrl; 1117dbeba4cfSThomas Monjalon struct mlx4_priv *priv = rxq->priv; 11189797bfccSYongseok Koh 1119099c2c53SYongseok Koh return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr); 11209797bfccSYongseok Koh } 11219797bfccSYongseok Koh 11229797bfccSYongseok Koh /** 11239797bfccSYongseok Koh * Bottom-half of LKey search on Tx. 11249797bfccSYongseok Koh * 11259797bfccSYongseok Koh * @param txq 11269797bfccSYongseok Koh * Pointer to Tx queue structure. 11279797bfccSYongseok Koh * @param addr 11289797bfccSYongseok Koh * Search key. 11299797bfccSYongseok Koh * 11309797bfccSYongseok Koh * @return 11319797bfccSYongseok Koh * Searched LKey on success, UINT32_MAX on no match. 11329797bfccSYongseok Koh */ 113319487763SYongseok Koh static uint32_t 11349797bfccSYongseok Koh mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr) 11359797bfccSYongseok Koh { 11369797bfccSYongseok Koh struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl; 1137dbeba4cfSThomas Monjalon struct mlx4_priv *priv = txq->priv; 11389797bfccSYongseok Koh 1139099c2c53SYongseok Koh return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr); 11409797bfccSYongseok Koh } 11419797bfccSYongseok Koh 11429797bfccSYongseok Koh /** 114319487763SYongseok Koh * Bottom-half of LKey search on Tx. If it can't be searched in the memseg 114419487763SYongseok Koh * list, register the mempool of the mbuf as externally allocated memory. 114519487763SYongseok Koh * 114619487763SYongseok Koh * @param txq 114719487763SYongseok Koh * Pointer to Tx queue structure. 114819487763SYongseok Koh * @param mb 114919487763SYongseok Koh * Pointer to mbuf. 115019487763SYongseok Koh * 115119487763SYongseok Koh * @return 115219487763SYongseok Koh * Searched LKey on success, UINT32_MAX on no match. 115319487763SYongseok Koh */ 115419487763SYongseok Koh uint32_t 115519487763SYongseok Koh mlx4_tx_mb2mr_bh(struct txq *txq, struct rte_mbuf *mb) 115619487763SYongseok Koh { 115719487763SYongseok Koh uintptr_t addr = (uintptr_t)mb->buf_addr; 115819487763SYongseok Koh uint32_t lkey; 115919487763SYongseok Koh 116019487763SYongseok Koh lkey = mlx4_tx_addr2mr_bh(txq, addr); 116119487763SYongseok Koh if (lkey == UINT32_MAX && rte_errno == ENXIO) { 116219487763SYongseok Koh /* Mempool may have externally allocated memory. */ 116319487763SYongseok Koh return mlx4_tx_update_ext_mp(txq, addr, mlx4_mb2mp(mb)); 116419487763SYongseok Koh } 116519487763SYongseok Koh return lkey; 116619487763SYongseok Koh } 116719487763SYongseok Koh 116819487763SYongseok Koh /** 11699797bfccSYongseok Koh * Flush all of the local cache entries. 11709797bfccSYongseok Koh * 11719797bfccSYongseok Koh * @param mr_ctrl 11729797bfccSYongseok Koh * Pointer to per-queue MR control structure. 11739797bfccSYongseok Koh */ 11749797bfccSYongseok Koh void 11759797bfccSYongseok Koh mlx4_mr_flush_local_cache(struct mlx4_mr_ctrl *mr_ctrl) 11769797bfccSYongseok Koh { 11779797bfccSYongseok Koh /* Reset the most-recently-used index. */ 11789797bfccSYongseok Koh mr_ctrl->mru = 0; 11799797bfccSYongseok Koh /* Reset the linear search array. */ 11809797bfccSYongseok Koh mr_ctrl->head = 0; 11819797bfccSYongseok Koh memset(mr_ctrl->cache, 0, sizeof(mr_ctrl->cache)); 11829797bfccSYongseok Koh /* Reset the B-tree table. */ 11839797bfccSYongseok Koh mr_ctrl->cache_bh.len = 1; 11849797bfccSYongseok Koh mr_ctrl->cache_bh.overflow = 0; 11859797bfccSYongseok Koh /* Update the generation number. */ 11869797bfccSYongseok Koh mr_ctrl->cur_gen = *mr_ctrl->dev_gen_ptr; 11879797bfccSYongseok Koh DEBUG("mr_ctrl(%p): flushed, cur_gen=%d", 11889797bfccSYongseok Koh (void *)mr_ctrl, mr_ctrl->cur_gen); 11899797bfccSYongseok Koh } 11909797bfccSYongseok Koh 119131912d99SYongseok Koh /** 119231912d99SYongseok Koh * Called during rte_mempool_mem_iter() by mlx4_mr_update_ext_mp(). 119331912d99SYongseok Koh * 119431912d99SYongseok Koh * Externally allocated chunk is registered and a MR is created for the chunk. 119531912d99SYongseok Koh * The MR object is added to the global list. If memseg list of a MR object 119631912d99SYongseok Koh * (mr->msl) is null, the MR object can be regarded as externally allocated 119731912d99SYongseok Koh * memory. 119831912d99SYongseok Koh * 119931912d99SYongseok Koh * Once external memory is registered, it should be static. If the memory is 120031912d99SYongseok Koh * freed and the virtual address range has different physical memory mapped 120131912d99SYongseok Koh * again, it may cause crash on device due to the wrong translation entry. PMD 120231912d99SYongseok Koh * can't track the free event of the external memory for now. 120331912d99SYongseok Koh */ 120431912d99SYongseok Koh static void 120531912d99SYongseok Koh mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque, 120631912d99SYongseok Koh struct rte_mempool_memhdr *memhdr, 120731912d99SYongseok Koh unsigned mem_idx __rte_unused) 120831912d99SYongseok Koh { 120931912d99SYongseok Koh struct mr_update_mp_data *data = opaque; 121031912d99SYongseok Koh struct rte_eth_dev *dev = data->dev; 1211dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 121231912d99SYongseok Koh struct mlx4_mr_ctrl *mr_ctrl = data->mr_ctrl; 121331912d99SYongseok Koh struct mlx4_mr *mr = NULL; 121431912d99SYongseok Koh uintptr_t addr = (uintptr_t)memhdr->addr; 121531912d99SYongseok Koh size_t len = memhdr->len; 121631912d99SYongseok Koh struct mlx4_mr_cache entry; 121731912d99SYongseok Koh uint32_t lkey; 121831912d99SYongseok Koh 12198e08df22SAlexander Kozyrev MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 122031912d99SYongseok Koh /* If already registered, it should return. */ 122131912d99SYongseok Koh rte_rwlock_read_lock(&priv->mr.rwlock); 122231912d99SYongseok Koh lkey = mr_lookup_dev(dev, &entry, addr); 122331912d99SYongseok Koh rte_rwlock_read_unlock(&priv->mr.rwlock); 122431912d99SYongseok Koh if (lkey != UINT32_MAX) 122531912d99SYongseok Koh return; 122631912d99SYongseok Koh mr = rte_zmalloc_socket(NULL, 122731912d99SYongseok Koh RTE_ALIGN_CEIL(sizeof(*mr), 122831912d99SYongseok Koh RTE_CACHE_LINE_SIZE), 122931912d99SYongseok Koh RTE_CACHE_LINE_SIZE, mp->socket_id); 123031912d99SYongseok Koh if (mr == NULL) { 123131912d99SYongseok Koh WARN("port %u unable to allocate memory for a new MR of" 123231912d99SYongseok Koh " mempool (%s).", 123331912d99SYongseok Koh dev->data->port_id, mp->name); 123431912d99SYongseok Koh data->ret = -1; 123531912d99SYongseok Koh return; 123631912d99SYongseok Koh } 123731912d99SYongseok Koh DEBUG("port %u register MR for chunk #%d of mempool (%s)", 123831912d99SYongseok Koh dev->data->port_id, mem_idx, mp->name); 123931912d99SYongseok Koh mr->ibv_mr = mlx4_glue->reg_mr(priv->pd, (void *)addr, len, 124031912d99SYongseok Koh IBV_ACCESS_LOCAL_WRITE); 124131912d99SYongseok Koh if (mr->ibv_mr == NULL) { 124231912d99SYongseok Koh WARN("port %u fail to create a verbs MR for address (%p)", 124331912d99SYongseok Koh dev->data->port_id, (void *)addr); 124431912d99SYongseok Koh rte_free(mr); 124531912d99SYongseok Koh data->ret = -1; 124631912d99SYongseok Koh return; 124731912d99SYongseok Koh } 124831912d99SYongseok Koh mr->msl = NULL; /* Mark it is external memory. */ 124931912d99SYongseok Koh mr->ms_bmp = NULL; 125031912d99SYongseok Koh mr->ms_n = 1; 125131912d99SYongseok Koh mr->ms_bmp_n = 1; 125231912d99SYongseok Koh rte_rwlock_write_lock(&priv->mr.rwlock); 125331912d99SYongseok Koh LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr); 1254*1af8b0b2SDavid Marchand DEBUG("port %u MR CREATED (%p) for external memory %p:", 1255*1af8b0b2SDavid Marchand dev->data->port_id, (void *)mr, (void *)addr); 1256*1af8b0b2SDavid Marchand DEBUG(" [0x%" PRIxPTR ", 0x%" PRIxPTR ")," 125731912d99SYongseok Koh " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", 125831912d99SYongseok Koh addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey), 125931912d99SYongseok Koh mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); 126031912d99SYongseok Koh /* Insert to the global cache table. */ 126131912d99SYongseok Koh mr_insert_dev_cache(dev, mr); 126231912d99SYongseok Koh rte_rwlock_write_unlock(&priv->mr.rwlock); 126331912d99SYongseok Koh /* Insert to the local cache table */ 126431912d99SYongseok Koh mlx4_mr_addr2mr_bh(dev, mr_ctrl, addr); 126531912d99SYongseok Koh } 126631912d99SYongseok Koh 126731912d99SYongseok Koh /** 126831912d99SYongseok Koh * Register MR for entire memory chunks in a Mempool having externally allocated 126931912d99SYongseok Koh * memory and fill in local cache. 127031912d99SYongseok Koh * 127131912d99SYongseok Koh * @param dev 127231912d99SYongseok Koh * Pointer to Ethernet device. 127331912d99SYongseok Koh * @param mr_ctrl 127431912d99SYongseok Koh * Pointer to per-queue MR control structure. 127531912d99SYongseok Koh * @param mp 127631912d99SYongseok Koh * Pointer to registering Mempool. 127731912d99SYongseok Koh * 127831912d99SYongseok Koh * @return 127931912d99SYongseok Koh * 0 on success, -1 on failure. 128031912d99SYongseok Koh */ 128131912d99SYongseok Koh static uint32_t 128231912d99SYongseok Koh mlx4_mr_update_ext_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl, 128331912d99SYongseok Koh struct rte_mempool *mp) 128431912d99SYongseok Koh { 128531912d99SYongseok Koh struct mr_update_mp_data data = { 128631912d99SYongseok Koh .dev = dev, 128731912d99SYongseok Koh .mr_ctrl = mr_ctrl, 128831912d99SYongseok Koh .ret = 0, 128931912d99SYongseok Koh }; 129031912d99SYongseok Koh 129131912d99SYongseok Koh rte_mempool_mem_iter(mp, mlx4_mr_update_ext_mp_cb, &data); 129231912d99SYongseok Koh return data.ret; 129331912d99SYongseok Koh } 129431912d99SYongseok Koh 129531912d99SYongseok Koh /** 129631912d99SYongseok Koh * Register MR entire memory chunks in a Mempool having externally allocated 129731912d99SYongseok Koh * memory and search LKey of the address to return. 129831912d99SYongseok Koh * 129931912d99SYongseok Koh * @param dev 130031912d99SYongseok Koh * Pointer to Ethernet device. 130131912d99SYongseok Koh * @param addr 130231912d99SYongseok Koh * Search key. 130331912d99SYongseok Koh * @param mp 130431912d99SYongseok Koh * Pointer to registering Mempool where addr belongs. 130531912d99SYongseok Koh * 130631912d99SYongseok Koh * @return 130731912d99SYongseok Koh * LKey for address on success, UINT32_MAX on failure. 130831912d99SYongseok Koh */ 130931912d99SYongseok Koh uint32_t 131031912d99SYongseok Koh mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp) 131131912d99SYongseok Koh { 131231912d99SYongseok Koh struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl; 1313dbeba4cfSThomas Monjalon struct mlx4_priv *priv = txq->priv; 131431912d99SYongseok Koh 13150203d33aSYongseok Koh if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 13160203d33aSYongseok Koh WARN("port %u using address (%p) from unregistered mempool" 13170203d33aSYongseok Koh " having externally allocated memory" 13180203d33aSYongseok Koh " in secondary process, please create mempool" 13190203d33aSYongseok Koh " prior to rte_eth_dev_start()", 13200203d33aSYongseok Koh PORT_ID(priv), (void *)addr); 13210203d33aSYongseok Koh return UINT32_MAX; 13220203d33aSYongseok Koh } 1323099c2c53SYongseok Koh mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp); 132431912d99SYongseok Koh return mlx4_tx_addr2mr_bh(txq, addr); 132531912d99SYongseok Koh } 132631912d99SYongseok Koh 13279797bfccSYongseok Koh /* Called during rte_mempool_mem_iter() by mlx4_mr_update_mp(). */ 13289797bfccSYongseok Koh static void 13299797bfccSYongseok Koh mlx4_mr_update_mp_cb(struct rte_mempool *mp __rte_unused, void *opaque, 13309797bfccSYongseok Koh struct rte_mempool_memhdr *memhdr, 13319797bfccSYongseok Koh unsigned mem_idx __rte_unused) 13329797bfccSYongseok Koh { 13339797bfccSYongseok Koh struct mr_update_mp_data *data = opaque; 13349797bfccSYongseok Koh uint32_t lkey; 13359797bfccSYongseok Koh 13369797bfccSYongseok Koh /* Stop iteration if failed in the previous walk. */ 13379797bfccSYongseok Koh if (data->ret < 0) 13389797bfccSYongseok Koh return; 13399797bfccSYongseok Koh /* Register address of the chunk and update local caches. */ 13409797bfccSYongseok Koh lkey = mlx4_mr_addr2mr_bh(data->dev, data->mr_ctrl, 13419797bfccSYongseok Koh (uintptr_t)memhdr->addr); 13429797bfccSYongseok Koh if (lkey == UINT32_MAX) 13439797bfccSYongseok Koh data->ret = -1; 13449797bfccSYongseok Koh } 13459797bfccSYongseok Koh 13469797bfccSYongseok Koh /** 13479797bfccSYongseok Koh * Register entire memory chunks in a Mempool. 13489797bfccSYongseok Koh * 13499797bfccSYongseok Koh * @param dev 13509797bfccSYongseok Koh * Pointer to Ethernet device. 13519797bfccSYongseok Koh * @param mr_ctrl 13529797bfccSYongseok Koh * Pointer to per-queue MR control structure. 13539797bfccSYongseok Koh * @param mp 13549797bfccSYongseok Koh * Pointer to registering Mempool. 13559797bfccSYongseok Koh * 13569797bfccSYongseok Koh * @return 13579797bfccSYongseok Koh * 0 on success, -1 on failure. 13589797bfccSYongseok Koh */ 13599797bfccSYongseok Koh int 13609797bfccSYongseok Koh mlx4_mr_update_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl, 13619797bfccSYongseok Koh struct rte_mempool *mp) 13629797bfccSYongseok Koh { 13639797bfccSYongseok Koh struct mr_update_mp_data data = { 13649797bfccSYongseok Koh .dev = dev, 13659797bfccSYongseok Koh .mr_ctrl = mr_ctrl, 13669797bfccSYongseok Koh .ret = 0, 13679797bfccSYongseok Koh }; 13689797bfccSYongseok Koh 13699797bfccSYongseok Koh rte_mempool_mem_iter(mp, mlx4_mr_update_mp_cb, &data); 137031912d99SYongseok Koh if (data.ret < 0 && rte_errno == ENXIO) { 137131912d99SYongseok Koh /* Mempool may have externally allocated memory. */ 137231912d99SYongseok Koh return mlx4_mr_update_ext_mp(dev, mr_ctrl, mp); 137331912d99SYongseok Koh } 13749797bfccSYongseok Koh return data.ret; 13759797bfccSYongseok Koh } 13769797bfccSYongseok Koh 1377e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG 13789797bfccSYongseok Koh /** 13799797bfccSYongseok Koh * Dump all the created MRs and the global cache entries. 13809797bfccSYongseok Koh * 13819797bfccSYongseok Koh * @param dev 13829797bfccSYongseok Koh * Pointer to Ethernet device. 13839797bfccSYongseok Koh */ 13849797bfccSYongseok Koh void 13859797bfccSYongseok Koh mlx4_mr_dump_dev(struct rte_eth_dev *dev) 13869797bfccSYongseok Koh { 1387dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 13889797bfccSYongseok Koh struct mlx4_mr *mr; 13899797bfccSYongseok Koh int mr_n = 0; 13909797bfccSYongseok Koh int chunk_n = 0; 13919797bfccSYongseok Koh 13929797bfccSYongseok Koh rte_rwlock_read_lock(&priv->mr.rwlock); 13939797bfccSYongseok Koh /* Iterate all the existing MRs. */ 13949797bfccSYongseok Koh LIST_FOREACH(mr, &priv->mr.mr_list, mr) { 13959797bfccSYongseok Koh unsigned int n; 13969797bfccSYongseok Koh 13979797bfccSYongseok Koh DEBUG("port %u MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u", 13989797bfccSYongseok Koh dev->data->port_id, mr_n++, 13999797bfccSYongseok Koh rte_cpu_to_be_32(mr->ibv_mr->lkey), 14009797bfccSYongseok Koh mr->ms_n, mr->ms_bmp_n); 14019797bfccSYongseok Koh if (mr->ms_n == 0) 14029797bfccSYongseok Koh continue; 14039797bfccSYongseok Koh for (n = 0; n < mr->ms_bmp_n; ) { 1404d924d6b9SAli Alnubani struct mlx4_mr_cache ret; 14059797bfccSYongseok Koh 1406d924d6b9SAli Alnubani memset(&ret, 0, sizeof(ret)); 14079797bfccSYongseok Koh n = mr_find_next_chunk(mr, &ret, n); 14089797bfccSYongseok Koh if (!ret.end) 14099797bfccSYongseok Koh break; 14109797bfccSYongseok Koh DEBUG(" chunk[%u], [0x%" PRIxPTR ", 0x%" PRIxPTR ")", 14119797bfccSYongseok Koh chunk_n++, ret.start, ret.end); 14129797bfccSYongseok Koh } 14139797bfccSYongseok Koh } 14149797bfccSYongseok Koh DEBUG("port %u dumping global cache", dev->data->port_id); 14159797bfccSYongseok Koh mlx4_mr_btree_dump(&priv->mr.cache); 14169797bfccSYongseok Koh rte_rwlock_read_unlock(&priv->mr.rwlock); 14179797bfccSYongseok Koh } 14189797bfccSYongseok Koh #endif 14199797bfccSYongseok Koh 14209797bfccSYongseok Koh /** 14219797bfccSYongseok Koh * Release all the created MRs and resources. Remove device from memory callback 14229797bfccSYongseok Koh * list. 14239797bfccSYongseok Koh * 14249797bfccSYongseok Koh * @param dev 14259797bfccSYongseok Koh * Pointer to Ethernet device. 14269797bfccSYongseok Koh */ 14279797bfccSYongseok Koh void 14289797bfccSYongseok Koh mlx4_mr_release(struct rte_eth_dev *dev) 14299797bfccSYongseok Koh { 1430dbeba4cfSThomas Monjalon struct mlx4_priv *priv = dev->data->dev_private; 1431897dbd3cSViacheslav Ovsiienko struct mlx4_mr *mr_next; 14329797bfccSYongseok Koh 14339797bfccSYongseok Koh /* Remove from memory callback device list. */ 14340203d33aSYongseok Koh rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock); 14359797bfccSYongseok Koh LIST_REMOVE(priv, mem_event_cb); 14360203d33aSYongseok Koh rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock); 1437e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG 14389797bfccSYongseok Koh mlx4_mr_dump_dev(dev); 14399797bfccSYongseok Koh #endif 14409797bfccSYongseok Koh rte_rwlock_write_lock(&priv->mr.rwlock); 14419797bfccSYongseok Koh /* Detach from MR list and move to free list. */ 1442897dbd3cSViacheslav Ovsiienko mr_next = LIST_FIRST(&priv->mr.mr_list); 14439797bfccSYongseok Koh while (mr_next != NULL) { 14449797bfccSYongseok Koh struct mlx4_mr *mr = mr_next; 14459797bfccSYongseok Koh 14469797bfccSYongseok Koh mr_next = LIST_NEXT(mr, mr); 14479797bfccSYongseok Koh LIST_REMOVE(mr, mr); 14489797bfccSYongseok Koh LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr); 14499797bfccSYongseok Koh } 14509797bfccSYongseok Koh LIST_INIT(&priv->mr.mr_list); 14519797bfccSYongseok Koh /* Free global cache. */ 14529797bfccSYongseok Koh mlx4_mr_btree_free(&priv->mr.cache); 14539797bfccSYongseok Koh rte_rwlock_write_unlock(&priv->mr.rwlock); 14549797bfccSYongseok Koh /* Free all remaining MRs. */ 14559797bfccSYongseok Koh mlx4_mr_garbage_collect(dev); 14569797bfccSYongseok Koh } 1457