xref: /dpdk/drivers/net/mlx4/mlx4_mr.c (revision 1af8b0b2747fe6c6267fa7bedb602e569742362e)
182092c87SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2655588afSAdrien Mazarguil  * Copyright 2017 6WIND S.A.
35feecc57SShahaf Shuler  * Copyright 2017 Mellanox Technologies, Ltd
4655588afSAdrien Mazarguil  */
5655588afSAdrien Mazarguil 
6655588afSAdrien Mazarguil /**
7655588afSAdrien Mazarguil  * @file
8655588afSAdrien Mazarguil  * Memory management functions for mlx4 driver.
9655588afSAdrien Mazarguil  */
10655588afSAdrien Mazarguil 
11655588afSAdrien Mazarguil #include <errno.h>
12326d2cdfSOphir Munk #include <inttypes.h>
13655588afSAdrien Mazarguil #include <stddef.h>
14655588afSAdrien Mazarguil #include <stdint.h>
15655588afSAdrien Mazarguil #include <string.h>
16655588afSAdrien Mazarguil 
17655588afSAdrien Mazarguil /* Verbs headers do not support -pedantic. */
18655588afSAdrien Mazarguil #ifdef PEDANTIC
19655588afSAdrien Mazarguil #pragma GCC diagnostic ignored "-Wpedantic"
20655588afSAdrien Mazarguil #endif
21655588afSAdrien Mazarguil #include <infiniband/verbs.h>
22655588afSAdrien Mazarguil #ifdef PEDANTIC
23655588afSAdrien Mazarguil #pragma GCC diagnostic error "-Wpedantic"
24655588afSAdrien Mazarguil #endif
25655588afSAdrien Mazarguil 
26326d2cdfSOphir Munk #include <rte_branch_prediction.h>
27655588afSAdrien Mazarguil #include <rte_common.h>
288ac35916SDavid Marchand #include <rte_eal_memconfig.h>
29655588afSAdrien Mazarguil #include <rte_errno.h>
300d033530SAdrien Mazarguil #include <rte_malloc.h>
31655588afSAdrien Mazarguil #include <rte_memory.h>
32655588afSAdrien Mazarguil #include <rte_mempool.h>
339797bfccSYongseok Koh #include <rte_rwlock.h>
34655588afSAdrien Mazarguil 
354eba244bSAdrien Mazarguil #include "mlx4_glue.h"
369797bfccSYongseok Koh #include "mlx4_mr.h"
37326d2cdfSOphir Munk #include "mlx4_rxtx.h"
38655588afSAdrien Mazarguil #include "mlx4_utils.h"
39655588afSAdrien Mazarguil 
409797bfccSYongseok Koh struct mr_find_contig_memsegs_data {
419797bfccSYongseok Koh 	uintptr_t addr;
429797bfccSYongseok Koh 	uintptr_t start;
439797bfccSYongseok Koh 	uintptr_t end;
449797bfccSYongseok Koh 	const struct rte_memseg_list *msl;
459797bfccSYongseok Koh };
469797bfccSYongseok Koh 
479797bfccSYongseok Koh struct mr_update_mp_data {
489797bfccSYongseok Koh 	struct rte_eth_dev *dev;
499797bfccSYongseok Koh 	struct mlx4_mr_ctrl *mr_ctrl;
509797bfccSYongseok Koh 	int ret;
519797bfccSYongseok Koh };
529797bfccSYongseok Koh 
539797bfccSYongseok Koh /**
549797bfccSYongseok Koh  * Expand B-tree table to a given size. Can't be called with holding
559797bfccSYongseok Koh  * memory_hotplug_lock or priv->mr.rwlock due to rte_realloc().
569797bfccSYongseok Koh  *
579797bfccSYongseok Koh  * @param bt
589797bfccSYongseok Koh  *   Pointer to B-tree structure.
599797bfccSYongseok Koh  * @param n
609797bfccSYongseok Koh  *   Number of entries for expansion.
619797bfccSYongseok Koh  *
629797bfccSYongseok Koh  * @return
639797bfccSYongseok Koh  *   0 on success, -1 on failure.
649797bfccSYongseok Koh  */
659797bfccSYongseok Koh static int
669797bfccSYongseok Koh mr_btree_expand(struct mlx4_mr_btree *bt, int n)
679797bfccSYongseok Koh {
689797bfccSYongseok Koh 	void *mem;
699797bfccSYongseok Koh 	int ret = 0;
709797bfccSYongseok Koh 
719797bfccSYongseok Koh 	if (n <= bt->size)
729797bfccSYongseok Koh 		return ret;
739797bfccSYongseok Koh 	/*
749797bfccSYongseok Koh 	 * Downside of directly using rte_realloc() is that SOCKET_ID_ANY is
759797bfccSYongseok Koh 	 * used inside if there's no room to expand. Because this is a quite
769797bfccSYongseok Koh 	 * rare case and a part of very slow path, it is very acceptable.
779797bfccSYongseok Koh 	 * Initially cache_bh[] will be given practically enough space and once
789797bfccSYongseok Koh 	 * it is expanded, expansion wouldn't be needed again ever.
799797bfccSYongseok Koh 	 */
809797bfccSYongseok Koh 	mem = rte_realloc(bt->table, n * sizeof(struct mlx4_mr_cache), 0);
819797bfccSYongseok Koh 	if (mem == NULL) {
829797bfccSYongseok Koh 		/* Not an error, B-tree search will be skipped. */
839797bfccSYongseok Koh 		WARN("failed to expand MR B-tree (%p) table", (void *)bt);
849797bfccSYongseok Koh 		ret = -1;
859797bfccSYongseok Koh 	} else {
869797bfccSYongseok Koh 		DEBUG("expanded MR B-tree table (size=%u)", n);
879797bfccSYongseok Koh 		bt->table = mem;
889797bfccSYongseok Koh 		bt->size = n;
899797bfccSYongseok Koh 	}
909797bfccSYongseok Koh 	return ret;
919797bfccSYongseok Koh }
929797bfccSYongseok Koh 
939797bfccSYongseok Koh /**
949797bfccSYongseok Koh  * Look up LKey from given B-tree lookup table, store the last index and return
959797bfccSYongseok Koh  * searched LKey.
969797bfccSYongseok Koh  *
979797bfccSYongseok Koh  * @param bt
989797bfccSYongseok Koh  *   Pointer to B-tree structure.
999797bfccSYongseok Koh  * @param[out] idx
1009797bfccSYongseok Koh  *   Pointer to index. Even on search failure, returns index where it stops
1019797bfccSYongseok Koh  *   searching so that index can be used when inserting a new entry.
1029797bfccSYongseok Koh  * @param addr
1039797bfccSYongseok Koh  *   Search key.
1049797bfccSYongseok Koh  *
1059797bfccSYongseok Koh  * @return
1069797bfccSYongseok Koh  *   Searched LKey on success, UINT32_MAX on no match.
1079797bfccSYongseok Koh  */
1089797bfccSYongseok Koh static uint32_t
1099797bfccSYongseok Koh mr_btree_lookup(struct mlx4_mr_btree *bt, uint16_t *idx, uintptr_t addr)
1109797bfccSYongseok Koh {
1119797bfccSYongseok Koh 	struct mlx4_mr_cache *lkp_tbl;
1129797bfccSYongseok Koh 	uint16_t n;
1139797bfccSYongseok Koh 	uint16_t base = 0;
1149797bfccSYongseok Koh 
1158e08df22SAlexander Kozyrev 	MLX4_ASSERT(bt != NULL);
1169797bfccSYongseok Koh 	lkp_tbl = *bt->table;
1179797bfccSYongseok Koh 	n = bt->len;
1189797bfccSYongseok Koh 	/* First entry must be NULL for comparison. */
1198e08df22SAlexander Kozyrev 	MLX4_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 &&
1209797bfccSYongseok Koh 				    lkp_tbl[0].lkey == UINT32_MAX));
1219797bfccSYongseok Koh 	/* Binary search. */
1229797bfccSYongseok Koh 	do {
1239797bfccSYongseok Koh 		register uint16_t delta = n >> 1;
1249797bfccSYongseok Koh 
1259797bfccSYongseok Koh 		if (addr < lkp_tbl[base + delta].start) {
1269797bfccSYongseok Koh 			n = delta;
1279797bfccSYongseok Koh 		} else {
1289797bfccSYongseok Koh 			base += delta;
1299797bfccSYongseok Koh 			n -= delta;
1309797bfccSYongseok Koh 		}
1319797bfccSYongseok Koh 	} while (n > 1);
1328e08df22SAlexander Kozyrev 	MLX4_ASSERT(addr >= lkp_tbl[base].start);
1339797bfccSYongseok Koh 	*idx = base;
1349797bfccSYongseok Koh 	if (addr < lkp_tbl[base].end)
1359797bfccSYongseok Koh 		return lkp_tbl[base].lkey;
1369797bfccSYongseok Koh 	/* Not found. */
1379797bfccSYongseok Koh 	return UINT32_MAX;
1389797bfccSYongseok Koh }
1399797bfccSYongseok Koh 
1409797bfccSYongseok Koh /**
1419797bfccSYongseok Koh  * Insert an entry to B-tree lookup table.
1429797bfccSYongseok Koh  *
1439797bfccSYongseok Koh  * @param bt
1449797bfccSYongseok Koh  *   Pointer to B-tree structure.
1459797bfccSYongseok Koh  * @param entry
1469797bfccSYongseok Koh  *   Pointer to new entry to insert.
1479797bfccSYongseok Koh  *
1489797bfccSYongseok Koh  * @return
1499797bfccSYongseok Koh  *   0 on success, -1 on failure.
1509797bfccSYongseok Koh  */
1519797bfccSYongseok Koh static int
1529797bfccSYongseok Koh mr_btree_insert(struct mlx4_mr_btree *bt, struct mlx4_mr_cache *entry)
1539797bfccSYongseok Koh {
1549797bfccSYongseok Koh 	struct mlx4_mr_cache *lkp_tbl;
1559797bfccSYongseok Koh 	uint16_t idx = 0;
1569797bfccSYongseok Koh 	size_t shift;
1579797bfccSYongseok Koh 
1588e08df22SAlexander Kozyrev 	MLX4_ASSERT(bt != NULL);
1598e08df22SAlexander Kozyrev 	MLX4_ASSERT(bt->len <= bt->size);
1608e08df22SAlexander Kozyrev 	MLX4_ASSERT(bt->len > 0);
1619797bfccSYongseok Koh 	lkp_tbl = *bt->table;
1629797bfccSYongseok Koh 	/* Find out the slot for insertion. */
1639797bfccSYongseok Koh 	if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) {
1649797bfccSYongseok Koh 		DEBUG("abort insertion to B-tree(%p): already exist at"
1659797bfccSYongseok Koh 		      " idx=%u [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x",
1669797bfccSYongseok Koh 		      (void *)bt, idx, entry->start, entry->end, entry->lkey);
1679797bfccSYongseok Koh 		/* Already exist, return. */
1689797bfccSYongseok Koh 		return 0;
1699797bfccSYongseok Koh 	}
1709797bfccSYongseok Koh 	/* If table is full, return error. */
1719797bfccSYongseok Koh 	if (unlikely(bt->len == bt->size)) {
1729797bfccSYongseok Koh 		bt->overflow = 1;
1739797bfccSYongseok Koh 		return -1;
1749797bfccSYongseok Koh 	}
1759797bfccSYongseok Koh 	/* Insert entry. */
1769797bfccSYongseok Koh 	++idx;
1779797bfccSYongseok Koh 	shift = (bt->len - idx) * sizeof(struct mlx4_mr_cache);
1789797bfccSYongseok Koh 	if (shift)
1799797bfccSYongseok Koh 		memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift);
1809797bfccSYongseok Koh 	lkp_tbl[idx] = *entry;
1819797bfccSYongseok Koh 	bt->len++;
1829797bfccSYongseok Koh 	DEBUG("inserted B-tree(%p)[%u],"
1839797bfccSYongseok Koh 	      " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x",
1849797bfccSYongseok Koh 	      (void *)bt, idx, entry->start, entry->end, entry->lkey);
1859797bfccSYongseok Koh 	return 0;
1869797bfccSYongseok Koh }
1879797bfccSYongseok Koh 
1889797bfccSYongseok Koh /**
1899797bfccSYongseok Koh  * Initialize B-tree and allocate memory for lookup table.
1909797bfccSYongseok Koh  *
1919797bfccSYongseok Koh  * @param bt
1929797bfccSYongseok Koh  *   Pointer to B-tree structure.
1939797bfccSYongseok Koh  * @param n
1949797bfccSYongseok Koh  *   Number of entries to allocate.
1959797bfccSYongseok Koh  * @param socket
1969797bfccSYongseok Koh  *   NUMA socket on which memory must be allocated.
1979797bfccSYongseok Koh  *
1989797bfccSYongseok Koh  * @return
1999797bfccSYongseok Koh  *   0 on success, a negative errno value otherwise and rte_errno is set.
2009797bfccSYongseok Koh  */
2019797bfccSYongseok Koh int
2029797bfccSYongseok Koh mlx4_mr_btree_init(struct mlx4_mr_btree *bt, int n, int socket)
2039797bfccSYongseok Koh {
2049797bfccSYongseok Koh 	if (bt == NULL) {
2059797bfccSYongseok Koh 		rte_errno = EINVAL;
2069797bfccSYongseok Koh 		return -rte_errno;
2079797bfccSYongseok Koh 	}
2089797bfccSYongseok Koh 	memset(bt, 0, sizeof(*bt));
2099797bfccSYongseok Koh 	bt->table = rte_calloc_socket("B-tree table",
2109797bfccSYongseok Koh 				      n, sizeof(struct mlx4_mr_cache),
2119797bfccSYongseok Koh 				      0, socket);
2129797bfccSYongseok Koh 	if (bt->table == NULL) {
2139797bfccSYongseok Koh 		rte_errno = ENOMEM;
2149797bfccSYongseok Koh 		ERROR("failed to allocate memory for btree cache on socket %d",
2159797bfccSYongseok Koh 		      socket);
2169797bfccSYongseok Koh 		return -rte_errno;
2179797bfccSYongseok Koh 	}
2189797bfccSYongseok Koh 	bt->size = n;
2199797bfccSYongseok Koh 	/* First entry must be NULL for binary search. */
2209797bfccSYongseok Koh 	(*bt->table)[bt->len++] = (struct mlx4_mr_cache) {
2219797bfccSYongseok Koh 		.lkey = UINT32_MAX,
2229797bfccSYongseok Koh 	};
2239797bfccSYongseok Koh 	DEBUG("initialized B-tree %p with table %p",
2249797bfccSYongseok Koh 	      (void *)bt, (void *)bt->table);
2259797bfccSYongseok Koh 	return 0;
2269797bfccSYongseok Koh }
2279797bfccSYongseok Koh 
2289797bfccSYongseok Koh /**
2299797bfccSYongseok Koh  * Free B-tree resources.
2309797bfccSYongseok Koh  *
2319797bfccSYongseok Koh  * @param bt
2329797bfccSYongseok Koh  *   Pointer to B-tree structure.
2339797bfccSYongseok Koh  */
2349797bfccSYongseok Koh void
2359797bfccSYongseok Koh mlx4_mr_btree_free(struct mlx4_mr_btree *bt)
2369797bfccSYongseok Koh {
2379797bfccSYongseok Koh 	if (bt == NULL)
2389797bfccSYongseok Koh 		return;
2399797bfccSYongseok Koh 	DEBUG("freeing B-tree %p with table %p", (void *)bt, (void *)bt->table);
2409797bfccSYongseok Koh 	rte_free(bt->table);
2419797bfccSYongseok Koh 	memset(bt, 0, sizeof(*bt));
2429797bfccSYongseok Koh }
2439797bfccSYongseok Koh 
244e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG
2459797bfccSYongseok Koh /**
2469797bfccSYongseok Koh  * Dump all the entries in a B-tree
2479797bfccSYongseok Koh  *
2489797bfccSYongseok Koh  * @param bt
2499797bfccSYongseok Koh  *   Pointer to B-tree structure.
2509797bfccSYongseok Koh  */
2519797bfccSYongseok Koh void
2529797bfccSYongseok Koh mlx4_mr_btree_dump(struct mlx4_mr_btree *bt)
2539797bfccSYongseok Koh {
2549797bfccSYongseok Koh 	int idx;
2559797bfccSYongseok Koh 	struct mlx4_mr_cache *lkp_tbl;
2569797bfccSYongseok Koh 
2579797bfccSYongseok Koh 	if (bt == NULL)
2589797bfccSYongseok Koh 		return;
2599797bfccSYongseok Koh 	lkp_tbl = *bt->table;
2609797bfccSYongseok Koh 	for (idx = 0; idx < bt->len; ++idx) {
2619797bfccSYongseok Koh 		struct mlx4_mr_cache *entry = &lkp_tbl[idx];
2629797bfccSYongseok Koh 
2639797bfccSYongseok Koh 		DEBUG("B-tree(%p)[%u],"
2649797bfccSYongseok Koh 		      " [0x%" PRIxPTR ", 0x%" PRIxPTR ") lkey=0x%x",
2659797bfccSYongseok Koh 		      (void *)bt, idx, entry->start, entry->end, entry->lkey);
2669797bfccSYongseok Koh 	}
2679797bfccSYongseok Koh }
2689797bfccSYongseok Koh #endif
2699797bfccSYongseok Koh 
2709797bfccSYongseok Koh /**
2719797bfccSYongseok Koh  * Find virtually contiguous memory chunk in a given MR.
2729797bfccSYongseok Koh  *
2739797bfccSYongseok Koh  * @param dev
2749797bfccSYongseok Koh  *   Pointer to MR structure.
2759797bfccSYongseok Koh  * @param[out] entry
2769797bfccSYongseok Koh  *   Pointer to returning MR cache entry. If not found, this will not be
2779797bfccSYongseok Koh  *   updated.
2789797bfccSYongseok Koh  * @param start_idx
2799797bfccSYongseok Koh  *   Start index of the memseg bitmap.
2809797bfccSYongseok Koh  *
2819797bfccSYongseok Koh  * @return
2829797bfccSYongseok Koh  *   Next index to go on lookup.
2839797bfccSYongseok Koh  */
2849797bfccSYongseok Koh static int
2859797bfccSYongseok Koh mr_find_next_chunk(struct mlx4_mr *mr, struct mlx4_mr_cache *entry,
2869797bfccSYongseok Koh 		   int base_idx)
2879797bfccSYongseok Koh {
2889797bfccSYongseok Koh 	uintptr_t start = 0;
2899797bfccSYongseok Koh 	uintptr_t end = 0;
2909797bfccSYongseok Koh 	uint32_t idx = 0;
2919797bfccSYongseok Koh 
29231912d99SYongseok Koh 	/* MR for external memory doesn't have memseg list. */
29331912d99SYongseok Koh 	if (mr->msl == NULL) {
29431912d99SYongseok Koh 		struct ibv_mr *ibv_mr = mr->ibv_mr;
29531912d99SYongseok Koh 
2968e08df22SAlexander Kozyrev 		MLX4_ASSERT(mr->ms_bmp_n == 1);
2978e08df22SAlexander Kozyrev 		MLX4_ASSERT(mr->ms_n == 1);
2988e08df22SAlexander Kozyrev 		MLX4_ASSERT(base_idx == 0);
29931912d99SYongseok Koh 		/*
30031912d99SYongseok Koh 		 * Can't search it from memseg list but get it directly from
30131912d99SYongseok Koh 		 * verbs MR as there's only one chunk.
30231912d99SYongseok Koh 		 */
30331912d99SYongseok Koh 		entry->start = (uintptr_t)ibv_mr->addr;
30431912d99SYongseok Koh 		entry->end = (uintptr_t)ibv_mr->addr + mr->ibv_mr->length;
30531912d99SYongseok Koh 		entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey);
30631912d99SYongseok Koh 		/* Returning 1 ends iteration. */
30731912d99SYongseok Koh 		return 1;
30831912d99SYongseok Koh 	}
3099797bfccSYongseok Koh 	for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) {
3109797bfccSYongseok Koh 		if (rte_bitmap_get(mr->ms_bmp, idx)) {
3119797bfccSYongseok Koh 			const struct rte_memseg_list *msl;
3129797bfccSYongseok Koh 			const struct rte_memseg *ms;
3139797bfccSYongseok Koh 
3149797bfccSYongseok Koh 			msl = mr->msl;
3159797bfccSYongseok Koh 			ms = rte_fbarray_get(&msl->memseg_arr,
3169797bfccSYongseok Koh 					     mr->ms_base_idx + idx);
3178e08df22SAlexander Kozyrev 			MLX4_ASSERT(msl->page_sz == ms->hugepage_sz);
3189797bfccSYongseok Koh 			if (!start)
3199797bfccSYongseok Koh 				start = ms->addr_64;
3209797bfccSYongseok Koh 			end = ms->addr_64 + ms->hugepage_sz;
3219797bfccSYongseok Koh 		} else if (start) {
3229797bfccSYongseok Koh 			/* Passed the end of a fragment. */
3239797bfccSYongseok Koh 			break;
3249797bfccSYongseok Koh 		}
3259797bfccSYongseok Koh 	}
3269797bfccSYongseok Koh 	if (start) {
3279797bfccSYongseok Koh 		/* Found one chunk. */
3289797bfccSYongseok Koh 		entry->start = start;
3299797bfccSYongseok Koh 		entry->end = end;
3309797bfccSYongseok Koh 		entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey);
3319797bfccSYongseok Koh 	}
3329797bfccSYongseok Koh 	return idx;
3339797bfccSYongseok Koh }
3349797bfccSYongseok Koh 
3359797bfccSYongseok Koh /**
3369797bfccSYongseok Koh  * Insert a MR to the global B-tree cache. It may fail due to low-on-memory.
3379797bfccSYongseok Koh  * Then, this entry will have to be searched by mr_lookup_dev_list() in
3389797bfccSYongseok Koh  * mlx4_mr_create() on miss.
3399797bfccSYongseok Koh  *
3409797bfccSYongseok Koh  * @param dev
3419797bfccSYongseok Koh  *   Pointer to Ethernet device.
3429797bfccSYongseok Koh  * @param mr
3439797bfccSYongseok Koh  *   Pointer to MR to insert.
3449797bfccSYongseok Koh  *
3459797bfccSYongseok Koh  * @return
3469797bfccSYongseok Koh  *   0 on success, -1 on failure.
3479797bfccSYongseok Koh  */
3489797bfccSYongseok Koh static int
3499797bfccSYongseok Koh mr_insert_dev_cache(struct rte_eth_dev *dev, struct mlx4_mr *mr)
3509797bfccSYongseok Koh {
351dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
3529797bfccSYongseok Koh 	unsigned int n;
3539797bfccSYongseok Koh 
3549797bfccSYongseok Koh 	DEBUG("port %u inserting MR(%p) to global cache",
3559797bfccSYongseok Koh 	      dev->data->port_id, (void *)mr);
3569797bfccSYongseok Koh 	for (n = 0; n < mr->ms_bmp_n; ) {
357d924d6b9SAli Alnubani 		struct mlx4_mr_cache entry;
3589797bfccSYongseok Koh 
359d924d6b9SAli Alnubani 		memset(&entry, 0, sizeof(entry));
3609797bfccSYongseok Koh 		/* Find a contiguous chunk and advance the index. */
3619797bfccSYongseok Koh 		n = mr_find_next_chunk(mr, &entry, n);
3629797bfccSYongseok Koh 		if (!entry.end)
3639797bfccSYongseok Koh 			break;
3649797bfccSYongseok Koh 		if (mr_btree_insert(&priv->mr.cache, &entry) < 0) {
3659797bfccSYongseok Koh 			/*
3669797bfccSYongseok Koh 			 * Overflowed, but the global table cannot be expanded
3679797bfccSYongseok Koh 			 * because of deadlock.
3689797bfccSYongseok Koh 			 */
3699797bfccSYongseok Koh 			return -1;
3709797bfccSYongseok Koh 		}
3719797bfccSYongseok Koh 	}
3729797bfccSYongseok Koh 	return 0;
3739797bfccSYongseok Koh }
3749797bfccSYongseok Koh 
3759797bfccSYongseok Koh /**
3769797bfccSYongseok Koh  * Look up address in the original global MR list.
3779797bfccSYongseok Koh  *
3789797bfccSYongseok Koh  * @param dev
3799797bfccSYongseok Koh  *   Pointer to Ethernet device.
3809797bfccSYongseok Koh  * @param[out] entry
3819797bfccSYongseok Koh  *   Pointer to returning MR cache entry. If no match, this will not be updated.
3829797bfccSYongseok Koh  * @param addr
3839797bfccSYongseok Koh  *   Search key.
3849797bfccSYongseok Koh  *
3859797bfccSYongseok Koh  * @return
3869797bfccSYongseok Koh  *   Found MR on match, NULL otherwise.
3879797bfccSYongseok Koh  */
3889797bfccSYongseok Koh static struct mlx4_mr *
3899797bfccSYongseok Koh mr_lookup_dev_list(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
3909797bfccSYongseok Koh 		   uintptr_t addr)
3919797bfccSYongseok Koh {
392dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
3939797bfccSYongseok Koh 	struct mlx4_mr *mr;
3949797bfccSYongseok Koh 
3959797bfccSYongseok Koh 	/* Iterate all the existing MRs. */
3969797bfccSYongseok Koh 	LIST_FOREACH(mr, &priv->mr.mr_list, mr) {
3979797bfccSYongseok Koh 		unsigned int n;
3989797bfccSYongseok Koh 
3999797bfccSYongseok Koh 		if (mr->ms_n == 0)
4009797bfccSYongseok Koh 			continue;
4019797bfccSYongseok Koh 		for (n = 0; n < mr->ms_bmp_n; ) {
402d924d6b9SAli Alnubani 			struct mlx4_mr_cache ret;
4039797bfccSYongseok Koh 
404d924d6b9SAli Alnubani 			memset(&ret, 0, sizeof(ret));
4059797bfccSYongseok Koh 			n = mr_find_next_chunk(mr, &ret, n);
4069797bfccSYongseok Koh 			if (addr >= ret.start && addr < ret.end) {
4079797bfccSYongseok Koh 				/* Found. */
4089797bfccSYongseok Koh 				*entry = ret;
4099797bfccSYongseok Koh 				return mr;
4109797bfccSYongseok Koh 			}
4119797bfccSYongseok Koh 		}
4129797bfccSYongseok Koh 	}
4139797bfccSYongseok Koh 	return NULL;
4149797bfccSYongseok Koh }
4159797bfccSYongseok Koh 
4169797bfccSYongseok Koh /**
4179797bfccSYongseok Koh  * Look up address on device.
4189797bfccSYongseok Koh  *
4199797bfccSYongseok Koh  * @param dev
4209797bfccSYongseok Koh  *   Pointer to Ethernet device.
4219797bfccSYongseok Koh  * @param[out] entry
4229797bfccSYongseok Koh  *   Pointer to returning MR cache entry. If no match, this will not be updated.
4239797bfccSYongseok Koh  * @param addr
4249797bfccSYongseok Koh  *   Search key.
4259797bfccSYongseok Koh  *
4269797bfccSYongseok Koh  * @return
4279797bfccSYongseok Koh  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
4289797bfccSYongseok Koh  */
4299797bfccSYongseok Koh static uint32_t
4309797bfccSYongseok Koh mr_lookup_dev(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
4319797bfccSYongseok Koh 	      uintptr_t addr)
4329797bfccSYongseok Koh {
433dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
4349797bfccSYongseok Koh 	uint16_t idx;
4359797bfccSYongseok Koh 	uint32_t lkey = UINT32_MAX;
4369797bfccSYongseok Koh 	struct mlx4_mr *mr;
4379797bfccSYongseok Koh 
4389797bfccSYongseok Koh 	/*
4399797bfccSYongseok Koh 	 * If the global cache has overflowed since it failed to expand the
4409797bfccSYongseok Koh 	 * B-tree table, it can't have all the existing MRs. Then, the address
4419797bfccSYongseok Koh 	 * has to be searched by traversing the original MR list instead, which
4429797bfccSYongseok Koh 	 * is very slow path. Otherwise, the global cache is all inclusive.
4439797bfccSYongseok Koh 	 */
4449797bfccSYongseok Koh 	if (!unlikely(priv->mr.cache.overflow)) {
4459797bfccSYongseok Koh 		lkey = mr_btree_lookup(&priv->mr.cache, &idx, addr);
4469797bfccSYongseok Koh 		if (lkey != UINT32_MAX)
4479797bfccSYongseok Koh 			*entry = (*priv->mr.cache.table)[idx];
4489797bfccSYongseok Koh 	} else {
4499797bfccSYongseok Koh 		/* Falling back to the slowest path. */
4509797bfccSYongseok Koh 		mr = mr_lookup_dev_list(dev, entry, addr);
4519797bfccSYongseok Koh 		if (mr != NULL)
4529797bfccSYongseok Koh 			lkey = entry->lkey;
4539797bfccSYongseok Koh 	}
4548e08df22SAlexander Kozyrev 	MLX4_ASSERT(lkey == UINT32_MAX || (addr >= entry->start &&
4559797bfccSYongseok Koh 					   addr < entry->end));
4569797bfccSYongseok Koh 	return lkey;
4579797bfccSYongseok Koh }
4589797bfccSYongseok Koh 
4599797bfccSYongseok Koh /**
4609797bfccSYongseok Koh  * Free MR resources. MR lock must not be held to avoid a deadlock. rte_free()
4619797bfccSYongseok Koh  * can raise memory free event and the callback function will spin on the lock.
4629797bfccSYongseok Koh  *
4639797bfccSYongseok Koh  * @param mr
4649797bfccSYongseok Koh  *   Pointer to MR to free.
4659797bfccSYongseok Koh  */
4669797bfccSYongseok Koh static void
4679797bfccSYongseok Koh mr_free(struct mlx4_mr *mr)
4689797bfccSYongseok Koh {
4699797bfccSYongseok Koh 	if (mr == NULL)
4709797bfccSYongseok Koh 		return;
4719797bfccSYongseok Koh 	DEBUG("freeing MR(%p):", (void *)mr);
4729797bfccSYongseok Koh 	if (mr->ibv_mr != NULL)
4739797bfccSYongseok Koh 		claim_zero(mlx4_glue->dereg_mr(mr->ibv_mr));
4749797bfccSYongseok Koh 	rte_bitmap_free(mr->ms_bmp);
4759797bfccSYongseok Koh 	rte_free(mr);
4769797bfccSYongseok Koh }
4779797bfccSYongseok Koh 
4789797bfccSYongseok Koh /**
479897dbd3cSViacheslav Ovsiienko  * Release resources of detached MR having no online entry.
4809797bfccSYongseok Koh  *
4819797bfccSYongseok Koh  * @param dev
4829797bfccSYongseok Koh  *   Pointer to Ethernet device.
4839797bfccSYongseok Koh  */
4849797bfccSYongseok Koh static void
4859797bfccSYongseok Koh mlx4_mr_garbage_collect(struct rte_eth_dev *dev)
4869797bfccSYongseok Koh {
487dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
4889797bfccSYongseok Koh 	struct mlx4_mr *mr_next;
4899797bfccSYongseok Koh 	struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list);
4909797bfccSYongseok Koh 
4910203d33aSYongseok Koh 	/* Must be called from the primary process. */
4928e08df22SAlexander Kozyrev 	MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
4939797bfccSYongseok Koh 	/*
4949797bfccSYongseok Koh 	 * MR can't be freed with holding the lock because rte_free() could call
4959797bfccSYongseok Koh 	 * memory free callback function. This will be a deadlock situation.
4969797bfccSYongseok Koh 	 */
4979797bfccSYongseok Koh 	rte_rwlock_write_lock(&priv->mr.rwlock);
4989797bfccSYongseok Koh 	/* Detach the whole free list and release it after unlocking. */
4999797bfccSYongseok Koh 	free_list = priv->mr.mr_free_list;
5009797bfccSYongseok Koh 	LIST_INIT(&priv->mr.mr_free_list);
5019797bfccSYongseok Koh 	rte_rwlock_write_unlock(&priv->mr.rwlock);
5029797bfccSYongseok Koh 	/* Release resources. */
5039797bfccSYongseok Koh 	mr_next = LIST_FIRST(&free_list);
5049797bfccSYongseok Koh 	while (mr_next != NULL) {
5059797bfccSYongseok Koh 		struct mlx4_mr *mr = mr_next;
5069797bfccSYongseok Koh 
5079797bfccSYongseok Koh 		mr_next = LIST_NEXT(mr, mr);
5089797bfccSYongseok Koh 		mr_free(mr);
5099797bfccSYongseok Koh 	}
5109797bfccSYongseok Koh }
5119797bfccSYongseok Koh 
5129797bfccSYongseok Koh /* Called during rte_memseg_contig_walk() by mlx4_mr_create(). */
5139797bfccSYongseok Koh static int
5149797bfccSYongseok Koh mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
5159797bfccSYongseok Koh 			  const struct rte_memseg *ms, size_t len, void *arg)
5169797bfccSYongseok Koh {
5179797bfccSYongseok Koh 	struct mr_find_contig_memsegs_data *data = arg;
5189797bfccSYongseok Koh 
5199797bfccSYongseok Koh 	if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len)
5209797bfccSYongseok Koh 		return 0;
5219797bfccSYongseok Koh 	/* Found, save it and stop walking. */
5229797bfccSYongseok Koh 	data->start = ms->addr_64;
5239797bfccSYongseok Koh 	data->end = ms->addr_64 + len;
5249797bfccSYongseok Koh 	data->msl = msl;
5259797bfccSYongseok Koh 	return 1;
5269797bfccSYongseok Koh }
5279797bfccSYongseok Koh 
5289797bfccSYongseok Koh /**
529897dbd3cSViacheslav Ovsiienko  * Create a new global Memory Region (MR) for a missing virtual address.
5300b259b8eSYongseok Koh  * This API should be called on a secondary process, then a request is sent to
5310b259b8eSYongseok Koh  * the primary process in order to create a MR for the address. As the global MR
5320b259b8eSYongseok Koh  * list is on the shared memory, following LKey lookup should succeed unless the
5330b259b8eSYongseok Koh  * request fails.
5349797bfccSYongseok Koh  *
5359797bfccSYongseok Koh  * @param dev
5369797bfccSYongseok Koh  *   Pointer to Ethernet device.
5379797bfccSYongseok Koh  * @param[out] entry
5389797bfccSYongseok Koh  *   Pointer to returning MR cache entry, found in the global cache or newly
5399797bfccSYongseok Koh  *   created. If failed to create one, this will not be updated.
5409797bfccSYongseok Koh  * @param addr
5419797bfccSYongseok Koh  *   Target virtual address to register.
5429797bfccSYongseok Koh  *
5439797bfccSYongseok Koh  * @return
5449797bfccSYongseok Koh  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
5459797bfccSYongseok Koh  */
5469797bfccSYongseok Koh static uint32_t
5470b259b8eSYongseok Koh mlx4_mr_create_secondary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
5480b259b8eSYongseok Koh 			 uintptr_t addr)
5490b259b8eSYongseok Koh {
5500b259b8eSYongseok Koh 	struct mlx4_priv *priv = dev->data->dev_private;
5510b259b8eSYongseok Koh 	int ret;
5520b259b8eSYongseok Koh 
5530b259b8eSYongseok Koh 	DEBUG("port %u requesting MR creation for address (%p)",
5540b259b8eSYongseok Koh 	      dev->data->port_id, (void *)addr);
5550b259b8eSYongseok Koh 	ret = mlx4_mp_req_mr_create(dev, addr);
5560b259b8eSYongseok Koh 	if (ret) {
5570b259b8eSYongseok Koh 		DEBUG("port %u fail to request MR creation for address (%p)",
5580b259b8eSYongseok Koh 		      dev->data->port_id, (void *)addr);
5590b259b8eSYongseok Koh 		return UINT32_MAX;
5600b259b8eSYongseok Koh 	}
5610b259b8eSYongseok Koh 	rte_rwlock_read_lock(&priv->mr.rwlock);
5620b259b8eSYongseok Koh 	/* Fill in output data. */
5630b259b8eSYongseok Koh 	mr_lookup_dev(dev, entry, addr);
5640b259b8eSYongseok Koh 	/* Lookup can't fail. */
5658e08df22SAlexander Kozyrev 	MLX4_ASSERT(entry->lkey != UINT32_MAX);
5660b259b8eSYongseok Koh 	rte_rwlock_read_unlock(&priv->mr.rwlock);
567*1af8b0b2SDavid Marchand 	DEBUG("port %u MR CREATED by primary process for %p:",
568*1af8b0b2SDavid Marchand 	      dev->data->port_id, (void *)addr);
569*1af8b0b2SDavid Marchand 	DEBUG("  [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x",
5700b259b8eSYongseok Koh 	      entry->start, entry->end, entry->lkey);
5710b259b8eSYongseok Koh 	return entry->lkey;
5720b259b8eSYongseok Koh }
5730b259b8eSYongseok Koh 
5740b259b8eSYongseok Koh /**
575897dbd3cSViacheslav Ovsiienko  * Create a new global Memory Region (MR) for a missing virtual address.
5760b259b8eSYongseok Koh  * Register entire virtually contiguous memory chunk around the address.
5770b259b8eSYongseok Koh  * This must be called from the primary process.
5780b259b8eSYongseok Koh  *
5790b259b8eSYongseok Koh  * @param dev
5800b259b8eSYongseok Koh  *   Pointer to Ethernet device.
5810b259b8eSYongseok Koh  * @param[out] entry
5820b259b8eSYongseok Koh  *   Pointer to returning MR cache entry, found in the global cache or newly
5830b259b8eSYongseok Koh  *   created. If failed to create one, this will not be updated.
5840b259b8eSYongseok Koh  * @param addr
5850b259b8eSYongseok Koh  *   Target virtual address to register.
5860b259b8eSYongseok Koh  *
5870b259b8eSYongseok Koh  * @return
5880b259b8eSYongseok Koh  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
5890b259b8eSYongseok Koh  */
5900b259b8eSYongseok Koh uint32_t
5910b259b8eSYongseok Koh mlx4_mr_create_primary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
5929797bfccSYongseok Koh 		       uintptr_t addr)
5939797bfccSYongseok Koh {
594dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
5959797bfccSYongseok Koh 	const struct rte_memseg_list *msl;
5969797bfccSYongseok Koh 	const struct rte_memseg *ms;
5979797bfccSYongseok Koh 	struct mlx4_mr *mr = NULL;
5989797bfccSYongseok Koh 	size_t len;
5999797bfccSYongseok Koh 	uint32_t ms_n;
6009797bfccSYongseok Koh 	uint32_t bmp_size;
6019797bfccSYongseok Koh 	void *bmp_mem;
6029797bfccSYongseok Koh 	int ms_idx_shift = -1;
6039797bfccSYongseok Koh 	unsigned int n;
6049797bfccSYongseok Koh 	struct mr_find_contig_memsegs_data data = {
6059797bfccSYongseok Koh 		.addr = addr,
6069797bfccSYongseok Koh 	};
6079797bfccSYongseok Koh 	struct mr_find_contig_memsegs_data data_re;
6089797bfccSYongseok Koh 
6099797bfccSYongseok Koh 	DEBUG("port %u creating a MR using address (%p)",
6109797bfccSYongseok Koh 	      dev->data->port_id, (void *)addr);
6119797bfccSYongseok Koh 	/*
6129797bfccSYongseok Koh 	 * Release detached MRs if any. This can't be called with holding either
6139797bfccSYongseok Koh 	 * memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
6149797bfccSYongseok Koh 	 * been detached by the memory free event but it couldn't be released
6159797bfccSYongseok Koh 	 * inside the callback due to deadlock. As a result, releasing resources
6169797bfccSYongseok Koh 	 * is quite opportunistic.
6179797bfccSYongseok Koh 	 */
6189797bfccSYongseok Koh 	mlx4_mr_garbage_collect(dev);
6199797bfccSYongseok Koh 	/*
620f4efc0ebSYongseok Koh 	 * If enabled, find out a contiguous virtual address chunk in use, to
621f4efc0ebSYongseok Koh 	 * which the given address belongs, in order to register maximum range.
622f4efc0ebSYongseok Koh 	 * In the best case where mempools are not dynamically recreated and
62396c0cc17SAli Alnubani 	 * '--socket-mem' is specified as an EAL option, it is very likely to
6249797bfccSYongseok Koh 	 * have only one MR(LKey) per a socket and per a hugepage-size even
625f4efc0ebSYongseok Koh 	 * though the system memory is highly fragmented. As the whole memory
626f4efc0ebSYongseok Koh 	 * chunk will be pinned by kernel, it can't be reused unless entire
627f4efc0ebSYongseok Koh 	 * chunk is freed from EAL.
628f4efc0ebSYongseok Koh 	 *
629f4efc0ebSYongseok Koh 	 * If disabled, just register one memseg (page). Then, memory
630f4efc0ebSYongseok Koh 	 * consumption will be minimized but it may drop performance if there
631f4efc0ebSYongseok Koh 	 * are many MRs to lookup on the datapath.
6329797bfccSYongseok Koh 	 */
633f4efc0ebSYongseok Koh 	if (!priv->mr_ext_memseg_en) {
634f4efc0ebSYongseok Koh 		data.msl = rte_mem_virt2memseg_list((void *)addr);
635f4efc0ebSYongseok Koh 		data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz);
636f4efc0ebSYongseok Koh 		data.end = data.start + data.msl->page_sz;
637f4efc0ebSYongseok Koh 	} else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) {
6389797bfccSYongseok Koh 		WARN("port %u unable to find virtually contiguous"
6399797bfccSYongseok Koh 		     " chunk for address (%p)."
6409797bfccSYongseok Koh 		     " rte_memseg_contig_walk() failed.",
6419797bfccSYongseok Koh 		     dev->data->port_id, (void *)addr);
6429797bfccSYongseok Koh 		rte_errno = ENXIO;
6439797bfccSYongseok Koh 		goto err_nolock;
6449797bfccSYongseok Koh 	}
6459797bfccSYongseok Koh alloc_resources:
6469797bfccSYongseok Koh 	/* Addresses must be page-aligned. */
6478e08df22SAlexander Kozyrev 	MLX4_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz));
6488e08df22SAlexander Kozyrev 	MLX4_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz));
6499797bfccSYongseok Koh 	msl = data.msl;
6509797bfccSYongseok Koh 	ms = rte_mem_virt2memseg((void *)data.start, msl);
6519797bfccSYongseok Koh 	len = data.end - data.start;
6528e08df22SAlexander Kozyrev 	MLX4_ASSERT(msl->page_sz == ms->hugepage_sz);
6539797bfccSYongseok Koh 	/* Number of memsegs in the range. */
6549797bfccSYongseok Koh 	ms_n = len / msl->page_sz;
6559797bfccSYongseok Koh 	DEBUG("port %u extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
6569797bfccSYongseok Koh 	      " page_sz=0x%" PRIx64 ", ms_n=%u",
6579797bfccSYongseok Koh 	      dev->data->port_id, (void *)addr,
6589797bfccSYongseok Koh 	      data.start, data.end, msl->page_sz, ms_n);
6599797bfccSYongseok Koh 	/* Size of memory for bitmap. */
6609797bfccSYongseok Koh 	bmp_size = rte_bitmap_get_memory_footprint(ms_n);
6619797bfccSYongseok Koh 	mr = rte_zmalloc_socket(NULL,
6629797bfccSYongseok Koh 				RTE_ALIGN_CEIL(sizeof(*mr),
6639797bfccSYongseok Koh 					       RTE_CACHE_LINE_SIZE) +
6649797bfccSYongseok Koh 				bmp_size,
6659797bfccSYongseok Koh 				RTE_CACHE_LINE_SIZE, msl->socket_id);
6669797bfccSYongseok Koh 	if (mr == NULL) {
6679797bfccSYongseok Koh 		WARN("port %u unable to allocate memory for a new MR of"
6689797bfccSYongseok Koh 		     " address (%p).",
6699797bfccSYongseok Koh 		     dev->data->port_id, (void *)addr);
6709797bfccSYongseok Koh 		rte_errno = ENOMEM;
6719797bfccSYongseok Koh 		goto err_nolock;
6729797bfccSYongseok Koh 	}
6739797bfccSYongseok Koh 	mr->msl = msl;
6749797bfccSYongseok Koh 	/*
6759797bfccSYongseok Koh 	 * Save the index of the first memseg and initialize memseg bitmap. To
6769797bfccSYongseok Koh 	 * see if a memseg of ms_idx in the memseg-list is still valid, check:
6779797bfccSYongseok Koh 	 *	rte_bitmap_get(mr->bmp, ms_idx - mr->ms_base_idx)
6789797bfccSYongseok Koh 	 */
6799797bfccSYongseok Koh 	mr->ms_base_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
6809797bfccSYongseok Koh 	bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE);
6819797bfccSYongseok Koh 	mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size);
6829797bfccSYongseok Koh 	if (mr->ms_bmp == NULL) {
683897dbd3cSViacheslav Ovsiienko 		WARN("port %u unable to initialize bitmap for a new MR of"
6849797bfccSYongseok Koh 		     " address (%p).",
6859797bfccSYongseok Koh 		     dev->data->port_id, (void *)addr);
6869797bfccSYongseok Koh 		rte_errno = EINVAL;
6879797bfccSYongseok Koh 		goto err_nolock;
6889797bfccSYongseok Koh 	}
6899797bfccSYongseok Koh 	/*
6909797bfccSYongseok Koh 	 * Should recheck whether the extended contiguous chunk is still valid.
6919797bfccSYongseok Koh 	 * Because memory_hotplug_lock can't be held if there's any memory
6929797bfccSYongseok Koh 	 * related calls in a critical path, resource allocation above can't be
6939797bfccSYongseok Koh 	 * locked. If the memory has been changed at this point, try again with
6949797bfccSYongseok Koh 	 * just single page. If not, go on with the big chunk atomically from
6959797bfccSYongseok Koh 	 * here.
6969797bfccSYongseok Koh 	 */
69776f80881SAnatoly Burakov 	rte_mcfg_mem_read_lock();
6989797bfccSYongseok Koh 	data_re = data;
6999797bfccSYongseok Koh 	if (len > msl->page_sz &&
7009797bfccSYongseok Koh 	    !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) {
7019797bfccSYongseok Koh 		WARN("port %u unable to find virtually contiguous"
7029797bfccSYongseok Koh 		     " chunk for address (%p)."
7039797bfccSYongseok Koh 		     " rte_memseg_contig_walk() failed.",
7049797bfccSYongseok Koh 		     dev->data->port_id, (void *)addr);
7059797bfccSYongseok Koh 		rte_errno = ENXIO;
7069797bfccSYongseok Koh 		goto err_memlock;
7079797bfccSYongseok Koh 	}
7089797bfccSYongseok Koh 	if (data.start != data_re.start || data.end != data_re.end) {
7099797bfccSYongseok Koh 		/*
7109797bfccSYongseok Koh 		 * The extended contiguous chunk has been changed. Try again
7119797bfccSYongseok Koh 		 * with single memseg instead.
7129797bfccSYongseok Koh 		 */
7139797bfccSYongseok Koh 		data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz);
7149797bfccSYongseok Koh 		data.end = data.start + msl->page_sz;
71576f80881SAnatoly Burakov 		rte_mcfg_mem_read_unlock();
7169797bfccSYongseok Koh 		mr_free(mr);
7179797bfccSYongseok Koh 		goto alloc_resources;
7189797bfccSYongseok Koh 	}
7198e08df22SAlexander Kozyrev 	MLX4_ASSERT(data.msl == data_re.msl);
7209797bfccSYongseok Koh 	rte_rwlock_write_lock(&priv->mr.rwlock);
7219797bfccSYongseok Koh 	/*
7229797bfccSYongseok Koh 	 * Check the address is really missing. If other thread already created
7239797bfccSYongseok Koh 	 * one or it is not found due to overflow, abort and return.
7249797bfccSYongseok Koh 	 */
7259797bfccSYongseok Koh 	if (mr_lookup_dev(dev, entry, addr) != UINT32_MAX) {
7269797bfccSYongseok Koh 		/*
7279797bfccSYongseok Koh 		 * Insert to the global cache table. It may fail due to
7289797bfccSYongseok Koh 		 * low-on-memory. Then, this entry will have to be searched
7299797bfccSYongseok Koh 		 * here again.
7309797bfccSYongseok Koh 		 */
7319797bfccSYongseok Koh 		mr_btree_insert(&priv->mr.cache, entry);
7329797bfccSYongseok Koh 		DEBUG("port %u found MR for %p on final lookup, abort",
7339797bfccSYongseok Koh 		      dev->data->port_id, (void *)addr);
7349797bfccSYongseok Koh 		rte_rwlock_write_unlock(&priv->mr.rwlock);
73576f80881SAnatoly Burakov 		rte_mcfg_mem_read_unlock();
7369797bfccSYongseok Koh 		/*
7379797bfccSYongseok Koh 		 * Must be unlocked before calling rte_free() because
7389797bfccSYongseok Koh 		 * mlx4_mr_mem_event_free_cb() can be called inside.
7399797bfccSYongseok Koh 		 */
7409797bfccSYongseok Koh 		mr_free(mr);
7419797bfccSYongseok Koh 		return entry->lkey;
7429797bfccSYongseok Koh 	}
7439797bfccSYongseok Koh 	/*
7449797bfccSYongseok Koh 	 * Trim start and end addresses for verbs MR. Set bits for registering
7459797bfccSYongseok Koh 	 * memsegs but exclude already registered ones. Bitmap can be
7469797bfccSYongseok Koh 	 * fragmented.
7479797bfccSYongseok Koh 	 */
7489797bfccSYongseok Koh 	for (n = 0; n < ms_n; ++n) {
7499797bfccSYongseok Koh 		uintptr_t start;
750d924d6b9SAli Alnubani 		struct mlx4_mr_cache ret;
7519797bfccSYongseok Koh 
752d924d6b9SAli Alnubani 		memset(&ret, 0, sizeof(ret));
7539797bfccSYongseok Koh 		start = data_re.start + n * msl->page_sz;
7549797bfccSYongseok Koh 		/* Exclude memsegs already registered by other MRs. */
7559797bfccSYongseok Koh 		if (mr_lookup_dev(dev, &ret, start) == UINT32_MAX) {
7569797bfccSYongseok Koh 			/*
7579797bfccSYongseok Koh 			 * Start from the first unregistered memseg in the
7589797bfccSYongseok Koh 			 * extended range.
7599797bfccSYongseok Koh 			 */
7609797bfccSYongseok Koh 			if (ms_idx_shift == -1) {
7619797bfccSYongseok Koh 				mr->ms_base_idx += n;
7629797bfccSYongseok Koh 				data.start = start;
7639797bfccSYongseok Koh 				ms_idx_shift = n;
7649797bfccSYongseok Koh 			}
7659797bfccSYongseok Koh 			data.end = start + msl->page_sz;
7669797bfccSYongseok Koh 			rte_bitmap_set(mr->ms_bmp, n - ms_idx_shift);
7679797bfccSYongseok Koh 			++mr->ms_n;
7689797bfccSYongseok Koh 		}
7699797bfccSYongseok Koh 	}
7709797bfccSYongseok Koh 	len = data.end - data.start;
7719797bfccSYongseok Koh 	mr->ms_bmp_n = len / msl->page_sz;
7728e08df22SAlexander Kozyrev 	MLX4_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n);
7739797bfccSYongseok Koh 	/*
7749797bfccSYongseok Koh 	 * Finally create a verbs MR for the memory chunk. ibv_reg_mr() can be
7759797bfccSYongseok Koh 	 * called with holding the memory lock because it doesn't use
7769797bfccSYongseok Koh 	 * mlx4_alloc_buf_extern() which eventually calls rte_malloc_socket()
7779797bfccSYongseok Koh 	 * through mlx4_alloc_verbs_buf().
7789797bfccSYongseok Koh 	 */
7799797bfccSYongseok Koh 	mr->ibv_mr = mlx4_glue->reg_mr(priv->pd, (void *)data.start, len,
7809797bfccSYongseok Koh 				       IBV_ACCESS_LOCAL_WRITE);
7819797bfccSYongseok Koh 	if (mr->ibv_mr == NULL) {
7829797bfccSYongseok Koh 		WARN("port %u fail to create a verbs MR for address (%p)",
7839797bfccSYongseok Koh 		     dev->data->port_id, (void *)addr);
7849797bfccSYongseok Koh 		rte_errno = EINVAL;
7859797bfccSYongseok Koh 		goto err_mrlock;
7869797bfccSYongseok Koh 	}
7878e08df22SAlexander Kozyrev 	MLX4_ASSERT((uintptr_t)mr->ibv_mr->addr == data.start);
7888e08df22SAlexander Kozyrev 	MLX4_ASSERT(mr->ibv_mr->length == len);
7899797bfccSYongseok Koh 	LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
790*1af8b0b2SDavid Marchand 	DEBUG("port %u MR CREATED (%p) for %p:",
791*1af8b0b2SDavid Marchand 	      dev->data->port_id, (void *)mr, (void *)addr);
792*1af8b0b2SDavid Marchand 	DEBUG("  [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
7939797bfccSYongseok Koh 	      " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
7949797bfccSYongseok Koh 	      data.start, data.end, rte_cpu_to_be_32(mr->ibv_mr->lkey),
7959797bfccSYongseok Koh 	      mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n);
7969797bfccSYongseok Koh 	/* Insert to the global cache table. */
7979797bfccSYongseok Koh 	mr_insert_dev_cache(dev, mr);
7989797bfccSYongseok Koh 	/* Fill in output data. */
7999797bfccSYongseok Koh 	mr_lookup_dev(dev, entry, addr);
8009797bfccSYongseok Koh 	/* Lookup can't fail. */
8018e08df22SAlexander Kozyrev 	MLX4_ASSERT(entry->lkey != UINT32_MAX);
8029797bfccSYongseok Koh 	rte_rwlock_write_unlock(&priv->mr.rwlock);
80376f80881SAnatoly Burakov 	rte_mcfg_mem_read_unlock();
8049797bfccSYongseok Koh 	return entry->lkey;
8059797bfccSYongseok Koh err_mrlock:
8069797bfccSYongseok Koh 	rte_rwlock_write_unlock(&priv->mr.rwlock);
8079797bfccSYongseok Koh err_memlock:
80876f80881SAnatoly Burakov 	rte_mcfg_mem_read_unlock();
8099797bfccSYongseok Koh err_nolock:
8109797bfccSYongseok Koh 	/*
8119797bfccSYongseok Koh 	 * In case of error, as this can be called in a datapath, a warning
8129797bfccSYongseok Koh 	 * message per an error is preferable instead. Must be unlocked before
8139797bfccSYongseok Koh 	 * calling rte_free() because mlx4_mr_mem_event_free_cb() can be called
8149797bfccSYongseok Koh 	 * inside.
8159797bfccSYongseok Koh 	 */
8169797bfccSYongseok Koh 	mr_free(mr);
8179797bfccSYongseok Koh 	return UINT32_MAX;
8189797bfccSYongseok Koh }
8199797bfccSYongseok Koh 
8209797bfccSYongseok Koh /**
821897dbd3cSViacheslav Ovsiienko  * Create a new global Memory Region (MR) for a missing virtual address.
8220b259b8eSYongseok Koh  * This can be called from primary and secondary process.
8230b259b8eSYongseok Koh  *
8240b259b8eSYongseok Koh  * @param dev
8250b259b8eSYongseok Koh  *   Pointer to Ethernet device.
8260b259b8eSYongseok Koh  * @param[out] entry
8270b259b8eSYongseok Koh  *   Pointer to returning MR cache entry, found in the global cache or newly
8280b259b8eSYongseok Koh  *   created. If failed to create one, this will not be updated.
8290b259b8eSYongseok Koh  * @param addr
8300b259b8eSYongseok Koh  *   Target virtual address to register.
8310b259b8eSYongseok Koh  *
8320b259b8eSYongseok Koh  * @return
8330b259b8eSYongseok Koh  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
8340b259b8eSYongseok Koh  */
8350b259b8eSYongseok Koh static uint32_t
8360b259b8eSYongseok Koh mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
8370b259b8eSYongseok Koh 	       uintptr_t addr)
8380b259b8eSYongseok Koh {
8390b259b8eSYongseok Koh 	uint32_t ret = 0;
8400b259b8eSYongseok Koh 
8410b259b8eSYongseok Koh 	switch (rte_eal_process_type()) {
8420b259b8eSYongseok Koh 	case RTE_PROC_PRIMARY:
8430b259b8eSYongseok Koh 		ret = mlx4_mr_create_primary(dev, entry, addr);
8440b259b8eSYongseok Koh 		break;
8450b259b8eSYongseok Koh 	case RTE_PROC_SECONDARY:
8460b259b8eSYongseok Koh 		ret = mlx4_mr_create_secondary(dev, entry, addr);
8470b259b8eSYongseok Koh 		break;
8480b259b8eSYongseok Koh 	default:
8490b259b8eSYongseok Koh 		break;
8500b259b8eSYongseok Koh 	}
8510b259b8eSYongseok Koh 	return ret;
8520b259b8eSYongseok Koh }
8530b259b8eSYongseok Koh 
8540b259b8eSYongseok Koh /**
8559797bfccSYongseok Koh  * Rebuild the global B-tree cache of device from the original MR list.
8569797bfccSYongseok Koh  *
8579797bfccSYongseok Koh  * @param dev
8589797bfccSYongseok Koh  *   Pointer to Ethernet device.
8599797bfccSYongseok Koh  */
8609797bfccSYongseok Koh static void
8619797bfccSYongseok Koh mr_rebuild_dev_cache(struct rte_eth_dev *dev)
8629797bfccSYongseok Koh {
863dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
8649797bfccSYongseok Koh 	struct mlx4_mr *mr;
8659797bfccSYongseok Koh 
8669797bfccSYongseok Koh 	DEBUG("port %u rebuild dev cache[]", dev->data->port_id);
8679797bfccSYongseok Koh 	/* Flush cache to rebuild. */
8689797bfccSYongseok Koh 	priv->mr.cache.len = 1;
8699797bfccSYongseok Koh 	priv->mr.cache.overflow = 0;
8709797bfccSYongseok Koh 	/* Iterate all the existing MRs. */
8719797bfccSYongseok Koh 	LIST_FOREACH(mr, &priv->mr.mr_list, mr)
8729797bfccSYongseok Koh 		if (mr_insert_dev_cache(dev, mr) < 0)
8739797bfccSYongseok Koh 			return;
8749797bfccSYongseok Koh }
8759797bfccSYongseok Koh 
8769797bfccSYongseok Koh /**
8779797bfccSYongseok Koh  * Callback for memory free event. Iterate freed memsegs and check whether it
8789797bfccSYongseok Koh  * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a
8799797bfccSYongseok Koh  * result, the MR would be fragmented. If it becomes empty, the MR will be freed
8809797bfccSYongseok Koh  * later by mlx4_mr_garbage_collect().
8819797bfccSYongseok Koh  *
8829797bfccSYongseok Koh  * The global cache must be rebuilt if there's any change and this event has to
8839797bfccSYongseok Koh  * be propagated to dataplane threads to flush the local caches.
8849797bfccSYongseok Koh  *
8859797bfccSYongseok Koh  * @param dev
8869797bfccSYongseok Koh  *   Pointer to Ethernet device.
8879797bfccSYongseok Koh  * @param addr
8889797bfccSYongseok Koh  *   Address of freed memory.
8899797bfccSYongseok Koh  * @param len
8909797bfccSYongseok Koh  *   Size of freed memory.
8919797bfccSYongseok Koh  */
8929797bfccSYongseok Koh static void
8939797bfccSYongseok Koh mlx4_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len)
8949797bfccSYongseok Koh {
895dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
8969797bfccSYongseok Koh 	const struct rte_memseg_list *msl;
8979797bfccSYongseok Koh 	struct mlx4_mr *mr;
8989797bfccSYongseok Koh 	int ms_n;
8999797bfccSYongseok Koh 	int i;
9009797bfccSYongseok Koh 	int rebuild = 0;
9019797bfccSYongseok Koh 
9029797bfccSYongseok Koh 	DEBUG("port %u free callback: addr=%p, len=%zu",
9039797bfccSYongseok Koh 	      dev->data->port_id, addr, len);
9049797bfccSYongseok Koh 	msl = rte_mem_virt2memseg_list(addr);
9059797bfccSYongseok Koh 	/* addr and len must be page-aligned. */
9068e08df22SAlexander Kozyrev 	MLX4_ASSERT((uintptr_t)addr ==
9078e08df22SAlexander Kozyrev 		    RTE_ALIGN((uintptr_t)addr, msl->page_sz));
9088e08df22SAlexander Kozyrev 	MLX4_ASSERT(len == RTE_ALIGN(len, msl->page_sz));
9099797bfccSYongseok Koh 	ms_n = len / msl->page_sz;
9109797bfccSYongseok Koh 	rte_rwlock_write_lock(&priv->mr.rwlock);
9119797bfccSYongseok Koh 	/* Clear bits of freed memsegs from MR. */
9129797bfccSYongseok Koh 	for (i = 0; i < ms_n; ++i) {
9139797bfccSYongseok Koh 		const struct rte_memseg *ms;
9149797bfccSYongseok Koh 		struct mlx4_mr_cache entry;
9159797bfccSYongseok Koh 		uintptr_t start;
9169797bfccSYongseok Koh 		int ms_idx;
9179797bfccSYongseok Koh 		uint32_t pos;
9189797bfccSYongseok Koh 
9199797bfccSYongseok Koh 		/* Find MR having this memseg. */
9209797bfccSYongseok Koh 		start = (uintptr_t)addr + i * msl->page_sz;
9219797bfccSYongseok Koh 		mr = mr_lookup_dev_list(dev, &entry, start);
9229797bfccSYongseok Koh 		if (mr == NULL)
9239797bfccSYongseok Koh 			continue;
9248e08df22SAlexander Kozyrev 		MLX4_ASSERT(mr->msl); /* Can't be external memory. */
9259797bfccSYongseok Koh 		ms = rte_mem_virt2memseg((void *)start, msl);
9268e08df22SAlexander Kozyrev 		MLX4_ASSERT(ms != NULL);
9278e08df22SAlexander Kozyrev 		MLX4_ASSERT(msl->page_sz == ms->hugepage_sz);
9289797bfccSYongseok Koh 		ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
9299797bfccSYongseok Koh 		pos = ms_idx - mr->ms_base_idx;
9308e08df22SAlexander Kozyrev 		MLX4_ASSERT(rte_bitmap_get(mr->ms_bmp, pos));
9318e08df22SAlexander Kozyrev 		MLX4_ASSERT(pos < mr->ms_bmp_n);
9329797bfccSYongseok Koh 		DEBUG("port %u MR(%p): clear bitmap[%u] for addr %p",
9339797bfccSYongseok Koh 		      dev->data->port_id, (void *)mr, pos, (void *)start);
9349797bfccSYongseok Koh 		rte_bitmap_clear(mr->ms_bmp, pos);
9359797bfccSYongseok Koh 		if (--mr->ms_n == 0) {
9369797bfccSYongseok Koh 			LIST_REMOVE(mr, mr);
9379797bfccSYongseok Koh 			LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr);
9389797bfccSYongseok Koh 			DEBUG("port %u remove MR(%p) from list",
9399797bfccSYongseok Koh 			      dev->data->port_id, (void *)mr);
9409797bfccSYongseok Koh 		}
9419797bfccSYongseok Koh 		/*
9429797bfccSYongseok Koh 		 * MR is fragmented or will be freed. the global cache must be
9439797bfccSYongseok Koh 		 * rebuilt.
9449797bfccSYongseok Koh 		 */
9459797bfccSYongseok Koh 		rebuild = 1;
9469797bfccSYongseok Koh 	}
9479797bfccSYongseok Koh 	if (rebuild) {
9489797bfccSYongseok Koh 		mr_rebuild_dev_cache(dev);
9499797bfccSYongseok Koh 		/*
950f0f7c557SFeifei Wang 		 * No explicit wmb is needed after updating dev_gen due to
951f0f7c557SFeifei Wang 		 * store-release ordering in unlock that provides the
952f0f7c557SFeifei Wang 		 * implicit barrier at the software visible level.
9539797bfccSYongseok Koh 		 */
9549797bfccSYongseok Koh 		++priv->mr.dev_gen;
9559797bfccSYongseok Koh 		DEBUG("broadcasting local cache flush, gen=%d",
9569797bfccSYongseok Koh 		      priv->mr.dev_gen);
9579797bfccSYongseok Koh 	}
9589797bfccSYongseok Koh 	rte_rwlock_write_unlock(&priv->mr.rwlock);
959e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG
9609797bfccSYongseok Koh 	if (rebuild)
9619797bfccSYongseok Koh 		mlx4_mr_dump_dev(dev);
9629797bfccSYongseok Koh #endif
9639797bfccSYongseok Koh }
9649797bfccSYongseok Koh 
9659797bfccSYongseok Koh /**
9669797bfccSYongseok Koh  * Callback for memory event.
9679797bfccSYongseok Koh  *
9689797bfccSYongseok Koh  * @param event_type
9699797bfccSYongseok Koh  *   Memory event type.
9709797bfccSYongseok Koh  * @param addr
9719797bfccSYongseok Koh  *   Address of memory.
9729797bfccSYongseok Koh  * @param len
9739797bfccSYongseok Koh  *   Size of memory.
9749797bfccSYongseok Koh  */
9759797bfccSYongseok Koh void
9769797bfccSYongseok Koh mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
9779797bfccSYongseok Koh 		     size_t len, void *arg __rte_unused)
9789797bfccSYongseok Koh {
979dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv;
9800203d33aSYongseok Koh 	struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list;
9819797bfccSYongseok Koh 
9820203d33aSYongseok Koh 	/* Must be called from the primary process. */
9838e08df22SAlexander Kozyrev 	MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
9849797bfccSYongseok Koh 	switch (event_type) {
9859797bfccSYongseok Koh 	case RTE_MEM_EVENT_FREE:
9860203d33aSYongseok Koh 		rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock);
9879797bfccSYongseok Koh 		/* Iterate all the existing mlx4 devices. */
9880203d33aSYongseok Koh 		LIST_FOREACH(priv, dev_list, mem_event_cb)
989099c2c53SYongseok Koh 			mlx4_mr_mem_event_free_cb(ETH_DEV(priv), addr, len);
9900203d33aSYongseok Koh 		rte_rwlock_read_unlock(&mlx4_shared_data->mem_event_rwlock);
9919797bfccSYongseok Koh 		break;
9929797bfccSYongseok Koh 	case RTE_MEM_EVENT_ALLOC:
9939797bfccSYongseok Koh 	default:
9949797bfccSYongseok Koh 		break;
9959797bfccSYongseok Koh 	}
9969797bfccSYongseok Koh }
9979797bfccSYongseok Koh 
9989797bfccSYongseok Koh /**
9999797bfccSYongseok Koh  * Look up address in the global MR cache table. If not found, create a new MR.
10009797bfccSYongseok Koh  * Insert the found/created entry to local bottom-half cache table.
10019797bfccSYongseok Koh  *
10029797bfccSYongseok Koh  * @param dev
10039797bfccSYongseok Koh  *   Pointer to Ethernet device.
10049797bfccSYongseok Koh  * @param mr_ctrl
10059797bfccSYongseok Koh  *   Pointer to per-queue MR control structure.
10069797bfccSYongseok Koh  * @param[out] entry
10079797bfccSYongseok Koh  *   Pointer to returning MR cache entry, found in the global cache or newly
10089797bfccSYongseok Koh  *   created. If failed to create one, this is not written.
10099797bfccSYongseok Koh  * @param addr
10109797bfccSYongseok Koh  *   Search key.
10119797bfccSYongseok Koh  *
10129797bfccSYongseok Koh  * @return
10139797bfccSYongseok Koh  *   Searched LKey on success, UINT32_MAX on no match.
10149797bfccSYongseok Koh  */
10159797bfccSYongseok Koh static uint32_t
10169797bfccSYongseok Koh mlx4_mr_lookup_dev(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,
10179797bfccSYongseok Koh 		   struct mlx4_mr_cache *entry, uintptr_t addr)
10189797bfccSYongseok Koh {
1019dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
10209797bfccSYongseok Koh 	struct mlx4_mr_btree *bt = &mr_ctrl->cache_bh;
10219797bfccSYongseok Koh 	uint16_t idx;
10229797bfccSYongseok Koh 	uint32_t lkey;
10239797bfccSYongseok Koh 
10249797bfccSYongseok Koh 	/* If local cache table is full, try to double it. */
10259797bfccSYongseok Koh 	if (unlikely(bt->len == bt->size))
10269797bfccSYongseok Koh 		mr_btree_expand(bt, bt->size << 1);
10279797bfccSYongseok Koh 	/* Look up in the global cache. */
10289797bfccSYongseok Koh 	rte_rwlock_read_lock(&priv->mr.rwlock);
10299797bfccSYongseok Koh 	lkey = mr_btree_lookup(&priv->mr.cache, &idx, addr);
10309797bfccSYongseok Koh 	if (lkey != UINT32_MAX) {
10319797bfccSYongseok Koh 		/* Found. */
10329797bfccSYongseok Koh 		*entry = (*priv->mr.cache.table)[idx];
10339797bfccSYongseok Koh 		rte_rwlock_read_unlock(&priv->mr.rwlock);
10349797bfccSYongseok Koh 		/*
10359797bfccSYongseok Koh 		 * Update local cache. Even if it fails, return the found entry
10369797bfccSYongseok Koh 		 * to update top-half cache. Next time, this entry will be found
10379797bfccSYongseok Koh 		 * in the global cache.
10389797bfccSYongseok Koh 		 */
10399797bfccSYongseok Koh 		mr_btree_insert(bt, entry);
10409797bfccSYongseok Koh 		return lkey;
10419797bfccSYongseok Koh 	}
10429797bfccSYongseok Koh 	rte_rwlock_read_unlock(&priv->mr.rwlock);
10439797bfccSYongseok Koh 	/* First time to see the address? Create a new MR. */
10449797bfccSYongseok Koh 	lkey = mlx4_mr_create(dev, entry, addr);
10459797bfccSYongseok Koh 	/*
10469797bfccSYongseok Koh 	 * Update the local cache if successfully created a new global MR. Even
10479797bfccSYongseok Koh 	 * if failed to create one, there's no action to take in this datapath
10489797bfccSYongseok Koh 	 * code. As returning LKey is invalid, this will eventually make HW
10499797bfccSYongseok Koh 	 * fail.
10509797bfccSYongseok Koh 	 */
10519797bfccSYongseok Koh 	if (lkey != UINT32_MAX)
10529797bfccSYongseok Koh 		mr_btree_insert(bt, entry);
10539797bfccSYongseok Koh 	return lkey;
10549797bfccSYongseok Koh }
10559797bfccSYongseok Koh 
10569797bfccSYongseok Koh /**
10579797bfccSYongseok Koh  * Bottom-half of LKey search on datapath. Firstly search in cache_bh[] and if
10589797bfccSYongseok Koh  * misses, search in the global MR cache table and update the new entry to
10599797bfccSYongseok Koh  * per-queue local caches.
10609797bfccSYongseok Koh  *
10619797bfccSYongseok Koh  * @param dev
10629797bfccSYongseok Koh  *   Pointer to Ethernet device.
10639797bfccSYongseok Koh  * @param mr_ctrl
10649797bfccSYongseok Koh  *   Pointer to per-queue MR control structure.
10659797bfccSYongseok Koh  * @param addr
10669797bfccSYongseok Koh  *   Search key.
10679797bfccSYongseok Koh  *
10689797bfccSYongseok Koh  * @return
10699797bfccSYongseok Koh  *   Searched LKey on success, UINT32_MAX on no match.
10709797bfccSYongseok Koh  */
10719797bfccSYongseok Koh static uint32_t
10729797bfccSYongseok Koh mlx4_mr_addr2mr_bh(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,
10739797bfccSYongseok Koh 		   uintptr_t addr)
10749797bfccSYongseok Koh {
10759797bfccSYongseok Koh 	uint32_t lkey;
10769797bfccSYongseok Koh 	uint16_t bh_idx = 0;
10779797bfccSYongseok Koh 	/* Victim in top-half cache to replace with new entry. */
10789797bfccSYongseok Koh 	struct mlx4_mr_cache *repl = &mr_ctrl->cache[mr_ctrl->head];
10799797bfccSYongseok Koh 
10809797bfccSYongseok Koh 	/* Binary-search MR translation table. */
10819797bfccSYongseok Koh 	lkey = mr_btree_lookup(&mr_ctrl->cache_bh, &bh_idx, addr);
10829797bfccSYongseok Koh 	/* Update top-half cache. */
10839797bfccSYongseok Koh 	if (likely(lkey != UINT32_MAX)) {
10849797bfccSYongseok Koh 		*repl = (*mr_ctrl->cache_bh.table)[bh_idx];
10859797bfccSYongseok Koh 	} else {
10869797bfccSYongseok Koh 		/*
10879797bfccSYongseok Koh 		 * If missed in local lookup table, search in the global cache
10889797bfccSYongseok Koh 		 * and local cache_bh[] will be updated inside if possible.
10899797bfccSYongseok Koh 		 * Top-half cache entry will also be updated.
10909797bfccSYongseok Koh 		 */
10919797bfccSYongseok Koh 		lkey = mlx4_mr_lookup_dev(dev, mr_ctrl, repl, addr);
10929797bfccSYongseok Koh 		if (unlikely(lkey == UINT32_MAX))
10939797bfccSYongseok Koh 			return UINT32_MAX;
10949797bfccSYongseok Koh 	}
10959797bfccSYongseok Koh 	/* Update the most recently used entry. */
10969797bfccSYongseok Koh 	mr_ctrl->mru = mr_ctrl->head;
10979797bfccSYongseok Koh 	/* Point to the next victim, the oldest. */
10989797bfccSYongseok Koh 	mr_ctrl->head = (mr_ctrl->head + 1) % MLX4_MR_CACHE_N;
10999797bfccSYongseok Koh 	return lkey;
11009797bfccSYongseok Koh }
11019797bfccSYongseok Koh 
11029797bfccSYongseok Koh /**
11039797bfccSYongseok Koh  * Bottom-half of LKey search on Rx.
11049797bfccSYongseok Koh  *
11059797bfccSYongseok Koh  * @param rxq
11069797bfccSYongseok Koh  *   Pointer to Rx queue structure.
11079797bfccSYongseok Koh  * @param addr
11089797bfccSYongseok Koh  *   Search key.
11099797bfccSYongseok Koh  *
11109797bfccSYongseok Koh  * @return
11119797bfccSYongseok Koh  *   Searched LKey on success, UINT32_MAX on no match.
11129797bfccSYongseok Koh  */
11139797bfccSYongseok Koh uint32_t
11149797bfccSYongseok Koh mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr)
11159797bfccSYongseok Koh {
11169797bfccSYongseok Koh 	struct mlx4_mr_ctrl *mr_ctrl = &rxq->mr_ctrl;
1117dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = rxq->priv;
11189797bfccSYongseok Koh 
1119099c2c53SYongseok Koh 	return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
11209797bfccSYongseok Koh }
11219797bfccSYongseok Koh 
11229797bfccSYongseok Koh /**
11239797bfccSYongseok Koh  * Bottom-half of LKey search on Tx.
11249797bfccSYongseok Koh  *
11259797bfccSYongseok Koh  * @param txq
11269797bfccSYongseok Koh  *   Pointer to Tx queue structure.
11279797bfccSYongseok Koh  * @param addr
11289797bfccSYongseok Koh  *   Search key.
11299797bfccSYongseok Koh  *
11309797bfccSYongseok Koh  * @return
11319797bfccSYongseok Koh  *   Searched LKey on success, UINT32_MAX on no match.
11329797bfccSYongseok Koh  */
113319487763SYongseok Koh static uint32_t
11349797bfccSYongseok Koh mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr)
11359797bfccSYongseok Koh {
11369797bfccSYongseok Koh 	struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
1137dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = txq->priv;
11389797bfccSYongseok Koh 
1139099c2c53SYongseok Koh 	return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr);
11409797bfccSYongseok Koh }
11419797bfccSYongseok Koh 
11429797bfccSYongseok Koh /**
114319487763SYongseok Koh  * Bottom-half of LKey search on Tx. If it can't be searched in the memseg
114419487763SYongseok Koh  * list, register the mempool of the mbuf as externally allocated memory.
114519487763SYongseok Koh  *
114619487763SYongseok Koh  * @param txq
114719487763SYongseok Koh  *   Pointer to Tx queue structure.
114819487763SYongseok Koh  * @param mb
114919487763SYongseok Koh  *   Pointer to mbuf.
115019487763SYongseok Koh  *
115119487763SYongseok Koh  * @return
115219487763SYongseok Koh  *   Searched LKey on success, UINT32_MAX on no match.
115319487763SYongseok Koh  */
115419487763SYongseok Koh uint32_t
115519487763SYongseok Koh mlx4_tx_mb2mr_bh(struct txq *txq, struct rte_mbuf *mb)
115619487763SYongseok Koh {
115719487763SYongseok Koh 	uintptr_t addr = (uintptr_t)mb->buf_addr;
115819487763SYongseok Koh 	uint32_t lkey;
115919487763SYongseok Koh 
116019487763SYongseok Koh 	lkey = mlx4_tx_addr2mr_bh(txq, addr);
116119487763SYongseok Koh 	if (lkey == UINT32_MAX && rte_errno == ENXIO) {
116219487763SYongseok Koh 		/* Mempool may have externally allocated memory. */
116319487763SYongseok Koh 		return mlx4_tx_update_ext_mp(txq, addr, mlx4_mb2mp(mb));
116419487763SYongseok Koh 	}
116519487763SYongseok Koh 	return lkey;
116619487763SYongseok Koh }
116719487763SYongseok Koh 
116819487763SYongseok Koh /**
11699797bfccSYongseok Koh  * Flush all of the local cache entries.
11709797bfccSYongseok Koh  *
11719797bfccSYongseok Koh  * @param mr_ctrl
11729797bfccSYongseok Koh  *   Pointer to per-queue MR control structure.
11739797bfccSYongseok Koh  */
11749797bfccSYongseok Koh void
11759797bfccSYongseok Koh mlx4_mr_flush_local_cache(struct mlx4_mr_ctrl *mr_ctrl)
11769797bfccSYongseok Koh {
11779797bfccSYongseok Koh 	/* Reset the most-recently-used index. */
11789797bfccSYongseok Koh 	mr_ctrl->mru = 0;
11799797bfccSYongseok Koh 	/* Reset the linear search array. */
11809797bfccSYongseok Koh 	mr_ctrl->head = 0;
11819797bfccSYongseok Koh 	memset(mr_ctrl->cache, 0, sizeof(mr_ctrl->cache));
11829797bfccSYongseok Koh 	/* Reset the B-tree table. */
11839797bfccSYongseok Koh 	mr_ctrl->cache_bh.len = 1;
11849797bfccSYongseok Koh 	mr_ctrl->cache_bh.overflow = 0;
11859797bfccSYongseok Koh 	/* Update the generation number. */
11869797bfccSYongseok Koh 	mr_ctrl->cur_gen = *mr_ctrl->dev_gen_ptr;
11879797bfccSYongseok Koh 	DEBUG("mr_ctrl(%p): flushed, cur_gen=%d",
11889797bfccSYongseok Koh 	      (void *)mr_ctrl, mr_ctrl->cur_gen);
11899797bfccSYongseok Koh }
11909797bfccSYongseok Koh 
119131912d99SYongseok Koh /**
119231912d99SYongseok Koh  * Called during rte_mempool_mem_iter() by mlx4_mr_update_ext_mp().
119331912d99SYongseok Koh  *
119431912d99SYongseok Koh  * Externally allocated chunk is registered and a MR is created for the chunk.
119531912d99SYongseok Koh  * The MR object is added to the global list. If memseg list of a MR object
119631912d99SYongseok Koh  * (mr->msl) is null, the MR object can be regarded as externally allocated
119731912d99SYongseok Koh  * memory.
119831912d99SYongseok Koh  *
119931912d99SYongseok Koh  * Once external memory is registered, it should be static. If the memory is
120031912d99SYongseok Koh  * freed and the virtual address range has different physical memory mapped
120131912d99SYongseok Koh  * again, it may cause crash on device due to the wrong translation entry. PMD
120231912d99SYongseok Koh  * can't track the free event of the external memory for now.
120331912d99SYongseok Koh  */
120431912d99SYongseok Koh static void
120531912d99SYongseok Koh mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
120631912d99SYongseok Koh 			 struct rte_mempool_memhdr *memhdr,
120731912d99SYongseok Koh 			 unsigned mem_idx __rte_unused)
120831912d99SYongseok Koh {
120931912d99SYongseok Koh 	struct mr_update_mp_data *data = opaque;
121031912d99SYongseok Koh 	struct rte_eth_dev *dev = data->dev;
1211dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
121231912d99SYongseok Koh 	struct mlx4_mr_ctrl *mr_ctrl = data->mr_ctrl;
121331912d99SYongseok Koh 	struct mlx4_mr *mr = NULL;
121431912d99SYongseok Koh 	uintptr_t addr = (uintptr_t)memhdr->addr;
121531912d99SYongseok Koh 	size_t len = memhdr->len;
121631912d99SYongseok Koh 	struct mlx4_mr_cache entry;
121731912d99SYongseok Koh 	uint32_t lkey;
121831912d99SYongseok Koh 
12198e08df22SAlexander Kozyrev 	MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
122031912d99SYongseok Koh 	/* If already registered, it should return. */
122131912d99SYongseok Koh 	rte_rwlock_read_lock(&priv->mr.rwlock);
122231912d99SYongseok Koh 	lkey = mr_lookup_dev(dev, &entry, addr);
122331912d99SYongseok Koh 	rte_rwlock_read_unlock(&priv->mr.rwlock);
122431912d99SYongseok Koh 	if (lkey != UINT32_MAX)
122531912d99SYongseok Koh 		return;
122631912d99SYongseok Koh 	mr = rte_zmalloc_socket(NULL,
122731912d99SYongseok Koh 				RTE_ALIGN_CEIL(sizeof(*mr),
122831912d99SYongseok Koh 					       RTE_CACHE_LINE_SIZE),
122931912d99SYongseok Koh 				RTE_CACHE_LINE_SIZE, mp->socket_id);
123031912d99SYongseok Koh 	if (mr == NULL) {
123131912d99SYongseok Koh 		WARN("port %u unable to allocate memory for a new MR of"
123231912d99SYongseok Koh 		     " mempool (%s).",
123331912d99SYongseok Koh 		     dev->data->port_id, mp->name);
123431912d99SYongseok Koh 		data->ret = -1;
123531912d99SYongseok Koh 		return;
123631912d99SYongseok Koh 	}
123731912d99SYongseok Koh 	DEBUG("port %u register MR for chunk #%d of mempool (%s)",
123831912d99SYongseok Koh 	      dev->data->port_id, mem_idx, mp->name);
123931912d99SYongseok Koh 	mr->ibv_mr = mlx4_glue->reg_mr(priv->pd, (void *)addr, len,
124031912d99SYongseok Koh 				       IBV_ACCESS_LOCAL_WRITE);
124131912d99SYongseok Koh 	if (mr->ibv_mr == NULL) {
124231912d99SYongseok Koh 		WARN("port %u fail to create a verbs MR for address (%p)",
124331912d99SYongseok Koh 		     dev->data->port_id, (void *)addr);
124431912d99SYongseok Koh 		rte_free(mr);
124531912d99SYongseok Koh 		data->ret = -1;
124631912d99SYongseok Koh 		return;
124731912d99SYongseok Koh 	}
124831912d99SYongseok Koh 	mr->msl = NULL; /* Mark it is external memory. */
124931912d99SYongseok Koh 	mr->ms_bmp = NULL;
125031912d99SYongseok Koh 	mr->ms_n = 1;
125131912d99SYongseok Koh 	mr->ms_bmp_n = 1;
125231912d99SYongseok Koh 	rte_rwlock_write_lock(&priv->mr.rwlock);
125331912d99SYongseok Koh 	LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
1254*1af8b0b2SDavid Marchand 	DEBUG("port %u MR CREATED (%p) for external memory %p:",
1255*1af8b0b2SDavid Marchand 	      dev->data->port_id, (void *)mr, (void *)addr);
1256*1af8b0b2SDavid Marchand 	DEBUG("  [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
125731912d99SYongseok Koh 	      " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
125831912d99SYongseok Koh 	      addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey),
125931912d99SYongseok Koh 	      mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n);
126031912d99SYongseok Koh 	/* Insert to the global cache table. */
126131912d99SYongseok Koh 	mr_insert_dev_cache(dev, mr);
126231912d99SYongseok Koh 	rte_rwlock_write_unlock(&priv->mr.rwlock);
126331912d99SYongseok Koh 	/* Insert to the local cache table */
126431912d99SYongseok Koh 	mlx4_mr_addr2mr_bh(dev, mr_ctrl, addr);
126531912d99SYongseok Koh }
126631912d99SYongseok Koh 
126731912d99SYongseok Koh /**
126831912d99SYongseok Koh  * Register MR for entire memory chunks in a Mempool having externally allocated
126931912d99SYongseok Koh  * memory and fill in local cache.
127031912d99SYongseok Koh  *
127131912d99SYongseok Koh  * @param dev
127231912d99SYongseok Koh  *   Pointer to Ethernet device.
127331912d99SYongseok Koh  * @param mr_ctrl
127431912d99SYongseok Koh  *   Pointer to per-queue MR control structure.
127531912d99SYongseok Koh  * @param mp
127631912d99SYongseok Koh  *   Pointer to registering Mempool.
127731912d99SYongseok Koh  *
127831912d99SYongseok Koh  * @return
127931912d99SYongseok Koh  *   0 on success, -1 on failure.
128031912d99SYongseok Koh  */
128131912d99SYongseok Koh static uint32_t
128231912d99SYongseok Koh mlx4_mr_update_ext_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,
128331912d99SYongseok Koh 		      struct rte_mempool *mp)
128431912d99SYongseok Koh {
128531912d99SYongseok Koh 	struct mr_update_mp_data data = {
128631912d99SYongseok Koh 		.dev = dev,
128731912d99SYongseok Koh 		.mr_ctrl = mr_ctrl,
128831912d99SYongseok Koh 		.ret = 0,
128931912d99SYongseok Koh 	};
129031912d99SYongseok Koh 
129131912d99SYongseok Koh 	rte_mempool_mem_iter(mp, mlx4_mr_update_ext_mp_cb, &data);
129231912d99SYongseok Koh 	return data.ret;
129331912d99SYongseok Koh }
129431912d99SYongseok Koh 
129531912d99SYongseok Koh /**
129631912d99SYongseok Koh  * Register MR entire memory chunks in a Mempool having externally allocated
129731912d99SYongseok Koh  * memory and search LKey of the address to return.
129831912d99SYongseok Koh  *
129931912d99SYongseok Koh  * @param dev
130031912d99SYongseok Koh  *   Pointer to Ethernet device.
130131912d99SYongseok Koh  * @param addr
130231912d99SYongseok Koh  *   Search key.
130331912d99SYongseok Koh  * @param mp
130431912d99SYongseok Koh  *   Pointer to registering Mempool where addr belongs.
130531912d99SYongseok Koh  *
130631912d99SYongseok Koh  * @return
130731912d99SYongseok Koh  *   LKey for address on success, UINT32_MAX on failure.
130831912d99SYongseok Koh  */
130931912d99SYongseok Koh uint32_t
131031912d99SYongseok Koh mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
131131912d99SYongseok Koh {
131231912d99SYongseok Koh 	struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
1313dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = txq->priv;
131431912d99SYongseok Koh 
13150203d33aSYongseok Koh 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
13160203d33aSYongseok Koh 		WARN("port %u using address (%p) from unregistered mempool"
13170203d33aSYongseok Koh 		     " having externally allocated memory"
13180203d33aSYongseok Koh 		     " in secondary process, please create mempool"
13190203d33aSYongseok Koh 		     " prior to rte_eth_dev_start()",
13200203d33aSYongseok Koh 		     PORT_ID(priv), (void *)addr);
13210203d33aSYongseok Koh 		return UINT32_MAX;
13220203d33aSYongseok Koh 	}
1323099c2c53SYongseok Koh 	mlx4_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
132431912d99SYongseok Koh 	return mlx4_tx_addr2mr_bh(txq, addr);
132531912d99SYongseok Koh }
132631912d99SYongseok Koh 
13279797bfccSYongseok Koh /* Called during rte_mempool_mem_iter() by mlx4_mr_update_mp(). */
13289797bfccSYongseok Koh static void
13299797bfccSYongseok Koh mlx4_mr_update_mp_cb(struct rte_mempool *mp __rte_unused, void *opaque,
13309797bfccSYongseok Koh 		     struct rte_mempool_memhdr *memhdr,
13319797bfccSYongseok Koh 		     unsigned mem_idx __rte_unused)
13329797bfccSYongseok Koh {
13339797bfccSYongseok Koh 	struct mr_update_mp_data *data = opaque;
13349797bfccSYongseok Koh 	uint32_t lkey;
13359797bfccSYongseok Koh 
13369797bfccSYongseok Koh 	/* Stop iteration if failed in the previous walk. */
13379797bfccSYongseok Koh 	if (data->ret < 0)
13389797bfccSYongseok Koh 		return;
13399797bfccSYongseok Koh 	/* Register address of the chunk and update local caches. */
13409797bfccSYongseok Koh 	lkey = mlx4_mr_addr2mr_bh(data->dev, data->mr_ctrl,
13419797bfccSYongseok Koh 				  (uintptr_t)memhdr->addr);
13429797bfccSYongseok Koh 	if (lkey == UINT32_MAX)
13439797bfccSYongseok Koh 		data->ret = -1;
13449797bfccSYongseok Koh }
13459797bfccSYongseok Koh 
13469797bfccSYongseok Koh /**
13479797bfccSYongseok Koh  * Register entire memory chunks in a Mempool.
13489797bfccSYongseok Koh  *
13499797bfccSYongseok Koh  * @param dev
13509797bfccSYongseok Koh  *   Pointer to Ethernet device.
13519797bfccSYongseok Koh  * @param mr_ctrl
13529797bfccSYongseok Koh  *   Pointer to per-queue MR control structure.
13539797bfccSYongseok Koh  * @param mp
13549797bfccSYongseok Koh  *   Pointer to registering Mempool.
13559797bfccSYongseok Koh  *
13569797bfccSYongseok Koh  * @return
13579797bfccSYongseok Koh  *   0 on success, -1 on failure.
13589797bfccSYongseok Koh  */
13599797bfccSYongseok Koh int
13609797bfccSYongseok Koh mlx4_mr_update_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,
13619797bfccSYongseok Koh 		  struct rte_mempool *mp)
13629797bfccSYongseok Koh {
13639797bfccSYongseok Koh 	struct mr_update_mp_data data = {
13649797bfccSYongseok Koh 		.dev = dev,
13659797bfccSYongseok Koh 		.mr_ctrl = mr_ctrl,
13669797bfccSYongseok Koh 		.ret = 0,
13679797bfccSYongseok Koh 	};
13689797bfccSYongseok Koh 
13699797bfccSYongseok Koh 	rte_mempool_mem_iter(mp, mlx4_mr_update_mp_cb, &data);
137031912d99SYongseok Koh 	if (data.ret < 0 && rte_errno == ENXIO) {
137131912d99SYongseok Koh 		/* Mempool may have externally allocated memory. */
137231912d99SYongseok Koh 		return mlx4_mr_update_ext_mp(dev, mr_ctrl, mp);
137331912d99SYongseok Koh 	}
13749797bfccSYongseok Koh 	return data.ret;
13759797bfccSYongseok Koh }
13769797bfccSYongseok Koh 
1377e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG
13789797bfccSYongseok Koh /**
13799797bfccSYongseok Koh  * Dump all the created MRs and the global cache entries.
13809797bfccSYongseok Koh  *
13819797bfccSYongseok Koh  * @param dev
13829797bfccSYongseok Koh  *   Pointer to Ethernet device.
13839797bfccSYongseok Koh  */
13849797bfccSYongseok Koh void
13859797bfccSYongseok Koh mlx4_mr_dump_dev(struct rte_eth_dev *dev)
13869797bfccSYongseok Koh {
1387dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
13889797bfccSYongseok Koh 	struct mlx4_mr *mr;
13899797bfccSYongseok Koh 	int mr_n = 0;
13909797bfccSYongseok Koh 	int chunk_n = 0;
13919797bfccSYongseok Koh 
13929797bfccSYongseok Koh 	rte_rwlock_read_lock(&priv->mr.rwlock);
13939797bfccSYongseok Koh 	/* Iterate all the existing MRs. */
13949797bfccSYongseok Koh 	LIST_FOREACH(mr, &priv->mr.mr_list, mr) {
13959797bfccSYongseok Koh 		unsigned int n;
13969797bfccSYongseok Koh 
13979797bfccSYongseok Koh 		DEBUG("port %u MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u",
13989797bfccSYongseok Koh 		      dev->data->port_id, mr_n++,
13999797bfccSYongseok Koh 		      rte_cpu_to_be_32(mr->ibv_mr->lkey),
14009797bfccSYongseok Koh 		      mr->ms_n, mr->ms_bmp_n);
14019797bfccSYongseok Koh 		if (mr->ms_n == 0)
14029797bfccSYongseok Koh 			continue;
14039797bfccSYongseok Koh 		for (n = 0; n < mr->ms_bmp_n; ) {
1404d924d6b9SAli Alnubani 			struct mlx4_mr_cache ret;
14059797bfccSYongseok Koh 
1406d924d6b9SAli Alnubani 			memset(&ret, 0, sizeof(ret));
14079797bfccSYongseok Koh 			n = mr_find_next_chunk(mr, &ret, n);
14089797bfccSYongseok Koh 			if (!ret.end)
14099797bfccSYongseok Koh 				break;
14109797bfccSYongseok Koh 			DEBUG("  chunk[%u], [0x%" PRIxPTR ", 0x%" PRIxPTR ")",
14119797bfccSYongseok Koh 			      chunk_n++, ret.start, ret.end);
14129797bfccSYongseok Koh 		}
14139797bfccSYongseok Koh 	}
14149797bfccSYongseok Koh 	DEBUG("port %u dumping global cache", dev->data->port_id);
14159797bfccSYongseok Koh 	mlx4_mr_btree_dump(&priv->mr.cache);
14169797bfccSYongseok Koh 	rte_rwlock_read_unlock(&priv->mr.rwlock);
14179797bfccSYongseok Koh }
14189797bfccSYongseok Koh #endif
14199797bfccSYongseok Koh 
14209797bfccSYongseok Koh /**
14219797bfccSYongseok Koh  * Release all the created MRs and resources. Remove device from memory callback
14229797bfccSYongseok Koh  * list.
14239797bfccSYongseok Koh  *
14249797bfccSYongseok Koh  * @param dev
14259797bfccSYongseok Koh  *   Pointer to Ethernet device.
14269797bfccSYongseok Koh  */
14279797bfccSYongseok Koh void
14289797bfccSYongseok Koh mlx4_mr_release(struct rte_eth_dev *dev)
14299797bfccSYongseok Koh {
1430dbeba4cfSThomas Monjalon 	struct mlx4_priv *priv = dev->data->dev_private;
1431897dbd3cSViacheslav Ovsiienko 	struct mlx4_mr *mr_next;
14329797bfccSYongseok Koh 
14339797bfccSYongseok Koh 	/* Remove from memory callback device list. */
14340203d33aSYongseok Koh 	rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
14359797bfccSYongseok Koh 	LIST_REMOVE(priv, mem_event_cb);
14360203d33aSYongseok Koh 	rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
1437e99fdaa7SAlexander Kozyrev #ifdef RTE_LIBRTE_MLX4_DEBUG
14389797bfccSYongseok Koh 	mlx4_mr_dump_dev(dev);
14399797bfccSYongseok Koh #endif
14409797bfccSYongseok Koh 	rte_rwlock_write_lock(&priv->mr.rwlock);
14419797bfccSYongseok Koh 	/* Detach from MR list and move to free list. */
1442897dbd3cSViacheslav Ovsiienko 	mr_next = LIST_FIRST(&priv->mr.mr_list);
14439797bfccSYongseok Koh 	while (mr_next != NULL) {
14449797bfccSYongseok Koh 		struct mlx4_mr *mr = mr_next;
14459797bfccSYongseok Koh 
14469797bfccSYongseok Koh 		mr_next = LIST_NEXT(mr, mr);
14479797bfccSYongseok Koh 		LIST_REMOVE(mr, mr);
14489797bfccSYongseok Koh 		LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr);
14499797bfccSYongseok Koh 	}
14509797bfccSYongseok Koh 	LIST_INIT(&priv->mr.mr_list);
14519797bfccSYongseok Koh 	/* Free global cache. */
14529797bfccSYongseok Koh 	mlx4_mr_btree_free(&priv->mr.cache);
14539797bfccSYongseok Koh 	rte_rwlock_write_unlock(&priv->mr.rwlock);
14549797bfccSYongseok Koh 	/* Free all remaining MRs. */
14559797bfccSYongseok Koh 	mlx4_mr_garbage_collect(dev);
14569797bfccSYongseok Koh }
1457