xref: /spdk/lib/rdma_utils/rdma_utils.c (revision 141dc9434b9db9f5f824ba54aaba59d4811bb583)
18a01b4d6SAlexey Marchuk /*   SPDX-License-Identifier: BSD-3-Clause
28a01b4d6SAlexey Marchuk  *   Copyright (c) Intel Corporation. All rights reserved.
38a01b4d6SAlexey Marchuk  *   Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
48a01b4d6SAlexey Marchuk  */
58a01b4d6SAlexey Marchuk 
68a01b4d6SAlexey Marchuk #include "spdk_internal/rdma_utils.h"
78a01b4d6SAlexey Marchuk 
88a01b4d6SAlexey Marchuk #include "spdk/log.h"
98a01b4d6SAlexey Marchuk #include "spdk/string.h"
108a01b4d6SAlexey Marchuk #include "spdk/likely.h"
1120b14cdcSJim Harris #include "spdk/net.h"
1220b14cdcSJim Harris #include "spdk/file.h"
138a01b4d6SAlexey Marchuk 
148a01b4d6SAlexey Marchuk #include "spdk_internal/assert.h"
158a01b4d6SAlexey Marchuk 
168a01b4d6SAlexey Marchuk #include <rdma/rdma_cma.h>
178a01b4d6SAlexey Marchuk #include <rdma/rdma_verbs.h>
188a01b4d6SAlexey Marchuk 
198a01b4d6SAlexey Marchuk struct rdma_utils_device {
208a01b4d6SAlexey Marchuk 	struct ibv_pd			*pd;
218a01b4d6SAlexey Marchuk 	struct ibv_context		*context;
228a01b4d6SAlexey Marchuk 	int				ref;
238a01b4d6SAlexey Marchuk 	bool				removed;
248a01b4d6SAlexey Marchuk 	TAILQ_ENTRY(rdma_utils_device)	tailq;
258a01b4d6SAlexey Marchuk };
268a01b4d6SAlexey Marchuk 
278a01b4d6SAlexey Marchuk struct spdk_rdma_utils_mem_map {
288a01b4d6SAlexey Marchuk 	struct spdk_mem_map			*map;
298a01b4d6SAlexey Marchuk 	struct ibv_pd				*pd;
308a01b4d6SAlexey Marchuk 	struct spdk_nvme_rdma_hooks		*hooks;
318a01b4d6SAlexey Marchuk 	uint32_t				ref_count;
328ffb2c09SAlexey Marchuk 	uint32_t				access_flags;
338a01b4d6SAlexey Marchuk 	LIST_ENTRY(spdk_rdma_utils_mem_map)	link;
348a01b4d6SAlexey Marchuk };
358a01b4d6SAlexey Marchuk 
360a9c0239SAlexey Marchuk struct rdma_utils_memory_domain {
370a9c0239SAlexey Marchuk 	TAILQ_ENTRY(rdma_utils_memory_domain) link;
380a9c0239SAlexey Marchuk 	uint32_t ref;
390a9c0239SAlexey Marchuk 	enum spdk_dma_device_type type;
400a9c0239SAlexey Marchuk 	struct ibv_pd *pd;
410a9c0239SAlexey Marchuk 	struct spdk_memory_domain *domain;
420a9c0239SAlexey Marchuk 	struct spdk_memory_domain_rdma_ctx rdma_ctx;
430a9c0239SAlexey Marchuk };
440a9c0239SAlexey Marchuk 
458a01b4d6SAlexey Marchuk static pthread_mutex_t g_dev_mutex = PTHREAD_MUTEX_INITIALIZER;
468a01b4d6SAlexey Marchuk static struct ibv_context **g_ctx_list = NULL;
478a01b4d6SAlexey Marchuk static TAILQ_HEAD(, rdma_utils_device) g_dev_list = TAILQ_HEAD_INITIALIZER(g_dev_list);
488a01b4d6SAlexey Marchuk 
498a01b4d6SAlexey Marchuk static LIST_HEAD(, spdk_rdma_utils_mem_map) g_rdma_utils_mr_maps = LIST_HEAD_INITIALIZER(
508a01b4d6SAlexey Marchuk 			&g_rdma_utils_mr_maps);
518a01b4d6SAlexey Marchuk static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER;
528a01b4d6SAlexey Marchuk 
530a9c0239SAlexey Marchuk static TAILQ_HEAD(, rdma_utils_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER(
540a9c0239SAlexey Marchuk 			g_memory_domains);
550a9c0239SAlexey Marchuk static pthread_mutex_t g_memory_domains_lock = PTHREAD_MUTEX_INITIALIZER;
560a9c0239SAlexey Marchuk 
578a01b4d6SAlexey Marchuk static int
588a01b4d6SAlexey Marchuk rdma_utils_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
598a01b4d6SAlexey Marchuk 		      enum spdk_mem_map_notify_action action,
608a01b4d6SAlexey Marchuk 		      void *vaddr, size_t size)
618a01b4d6SAlexey Marchuk {
628a01b4d6SAlexey Marchuk 	struct spdk_rdma_utils_mem_map *rmap = cb_ctx;
638a01b4d6SAlexey Marchuk 	struct ibv_pd *pd = rmap->pd;
648a01b4d6SAlexey Marchuk 	struct ibv_mr *mr;
658ffb2c09SAlexey Marchuk 	uint32_t access_flags;
668a01b4d6SAlexey Marchuk 	int rc;
678a01b4d6SAlexey Marchuk 
688a01b4d6SAlexey Marchuk 	switch (action) {
698a01b4d6SAlexey Marchuk 	case SPDK_MEM_MAP_NOTIFY_REGISTER:
708a01b4d6SAlexey Marchuk 		if (rmap->hooks && rmap->hooks->get_rkey) {
718a01b4d6SAlexey Marchuk 			rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
728a01b4d6SAlexey Marchuk 							  rmap->hooks->get_rkey(pd, vaddr, size));
738a01b4d6SAlexey Marchuk 		} else {
748ffb2c09SAlexey Marchuk 			access_flags = rmap->access_flags;
758a01b4d6SAlexey Marchuk #ifdef IBV_ACCESS_OPTIONAL_FIRST
768a01b4d6SAlexey Marchuk 			access_flags |= IBV_ACCESS_RELAXED_ORDERING;
778a01b4d6SAlexey Marchuk #endif
788a01b4d6SAlexey Marchuk 			mr = ibv_reg_mr(pd, vaddr, size, access_flags);
798a01b4d6SAlexey Marchuk 			if (mr == NULL) {
808a01b4d6SAlexey Marchuk 				SPDK_ERRLOG("ibv_reg_mr() failed\n");
818a01b4d6SAlexey Marchuk 				return -1;
828a01b4d6SAlexey Marchuk 			} else {
838a01b4d6SAlexey Marchuk 				rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
848a01b4d6SAlexey Marchuk 			}
858a01b4d6SAlexey Marchuk 		}
868a01b4d6SAlexey Marchuk 		break;
878a01b4d6SAlexey Marchuk 	case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
888a01b4d6SAlexey Marchuk 		if (rmap->hooks == NULL || rmap->hooks->get_rkey == NULL) {
898a01b4d6SAlexey Marchuk 			mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
908a01b4d6SAlexey Marchuk 			if (mr) {
918a01b4d6SAlexey Marchuk 				ibv_dereg_mr(mr);
928a01b4d6SAlexey Marchuk 			}
938a01b4d6SAlexey Marchuk 		}
948a01b4d6SAlexey Marchuk 		rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
958a01b4d6SAlexey Marchuk 		break;
968a01b4d6SAlexey Marchuk 	default:
978a01b4d6SAlexey Marchuk 		SPDK_UNREACHABLE();
988a01b4d6SAlexey Marchuk 	}
998a01b4d6SAlexey Marchuk 
1008a01b4d6SAlexey Marchuk 	return rc;
1018a01b4d6SAlexey Marchuk }
1028a01b4d6SAlexey Marchuk 
1038a01b4d6SAlexey Marchuk static int
1048a01b4d6SAlexey Marchuk rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2)
1058a01b4d6SAlexey Marchuk {
1068a01b4d6SAlexey Marchuk 	/* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */
1078a01b4d6SAlexey Marchuk 	return addr_1 == addr_2;
1088a01b4d6SAlexey Marchuk }
1098a01b4d6SAlexey Marchuk 
1108a01b4d6SAlexey Marchuk const struct spdk_mem_map_ops g_rdma_map_ops = {
1118a01b4d6SAlexey Marchuk 	.notify_cb = rdma_utils_mem_notify,
1128a01b4d6SAlexey Marchuk 	.are_contiguous = rdma_check_contiguous_entries
1138a01b4d6SAlexey Marchuk };
1148a01b4d6SAlexey Marchuk 
1158a01b4d6SAlexey Marchuk static void
1168a01b4d6SAlexey Marchuk _rdma_free_mem_map(struct spdk_rdma_utils_mem_map *map)
1178a01b4d6SAlexey Marchuk {
1188a01b4d6SAlexey Marchuk 	assert(map);
1198a01b4d6SAlexey Marchuk 
1208a01b4d6SAlexey Marchuk 	if (map->hooks) {
1218a01b4d6SAlexey Marchuk 		spdk_free(map);
1228a01b4d6SAlexey Marchuk 	} else {
1238a01b4d6SAlexey Marchuk 		free(map);
1248a01b4d6SAlexey Marchuk 	}
1258a01b4d6SAlexey Marchuk }
1268a01b4d6SAlexey Marchuk 
1278a01b4d6SAlexey Marchuk struct spdk_rdma_utils_mem_map *
1288a01b4d6SAlexey Marchuk spdk_rdma_utils_create_mem_map(struct ibv_pd *pd, struct spdk_nvme_rdma_hooks *hooks,
1298ffb2c09SAlexey Marchuk 			       uint32_t access_flags)
1308a01b4d6SAlexey Marchuk {
1318a01b4d6SAlexey Marchuk 	struct spdk_rdma_utils_mem_map *map;
1328a01b4d6SAlexey Marchuk 
1338ffb2c09SAlexey Marchuk 	if (pd->context->device->transport_type == IBV_TRANSPORT_IWARP) {
1348ffb2c09SAlexey Marchuk 		/* IWARP requires REMOTE_WRITE permission for RDMA_READ operation */
1358ffb2c09SAlexey Marchuk 		access_flags |= IBV_ACCESS_REMOTE_WRITE;
1368ffb2c09SAlexey Marchuk 	}
1378ffb2c09SAlexey Marchuk 
1388a01b4d6SAlexey Marchuk 	pthread_mutex_lock(&g_rdma_mr_maps_mutex);
1398a01b4d6SAlexey Marchuk 	/* Look up existing mem map registration for this pd */
1408a01b4d6SAlexey Marchuk 	LIST_FOREACH(map, &g_rdma_utils_mr_maps, link) {
1418ffb2c09SAlexey Marchuk 		if (map->pd == pd && map->access_flags == access_flags) {
1428a01b4d6SAlexey Marchuk 			map->ref_count++;
1438a01b4d6SAlexey Marchuk 			pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
1448a01b4d6SAlexey Marchuk 			return map;
1458a01b4d6SAlexey Marchuk 		}
1468a01b4d6SAlexey Marchuk 	}
1478a01b4d6SAlexey Marchuk 
1488a01b4d6SAlexey Marchuk 	if (hooks) {
149186b109dSJim Harris 		map = spdk_zmalloc(sizeof(*map), 0, NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
1508a01b4d6SAlexey Marchuk 	} else {
1518a01b4d6SAlexey Marchuk 		map = calloc(1, sizeof(*map));
1528a01b4d6SAlexey Marchuk 	}
1538a01b4d6SAlexey Marchuk 	if (!map) {
1548a01b4d6SAlexey Marchuk 		pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
1558a01b4d6SAlexey Marchuk 		SPDK_ERRLOG("Memory allocation failed\n");
1568a01b4d6SAlexey Marchuk 		return NULL;
1578a01b4d6SAlexey Marchuk 	}
1588a01b4d6SAlexey Marchuk 	map->pd = pd;
1598a01b4d6SAlexey Marchuk 	map->ref_count = 1;
1608a01b4d6SAlexey Marchuk 	map->hooks = hooks;
1618ffb2c09SAlexey Marchuk 	map->access_flags = access_flags;
1628a01b4d6SAlexey Marchuk 	map->map = spdk_mem_map_alloc(0, &g_rdma_map_ops, map);
1638a01b4d6SAlexey Marchuk 	if (!map->map) {
1648a01b4d6SAlexey Marchuk 		SPDK_ERRLOG("Unable to create memory map\n");
1658a01b4d6SAlexey Marchuk 		_rdma_free_mem_map(map);
1668a01b4d6SAlexey Marchuk 		pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
1678a01b4d6SAlexey Marchuk 		return NULL;
1688a01b4d6SAlexey Marchuk 	}
1698a01b4d6SAlexey Marchuk 	LIST_INSERT_HEAD(&g_rdma_utils_mr_maps, map, link);
1708a01b4d6SAlexey Marchuk 
1718a01b4d6SAlexey Marchuk 	pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
1728a01b4d6SAlexey Marchuk 
1738a01b4d6SAlexey Marchuk 	return map;
1748a01b4d6SAlexey Marchuk }
1758a01b4d6SAlexey Marchuk 
1768a01b4d6SAlexey Marchuk void
1778a01b4d6SAlexey Marchuk spdk_rdma_utils_free_mem_map(struct spdk_rdma_utils_mem_map **_map)
1788a01b4d6SAlexey Marchuk {
1798a01b4d6SAlexey Marchuk 	struct spdk_rdma_utils_mem_map *map;
1808a01b4d6SAlexey Marchuk 
1818a01b4d6SAlexey Marchuk 	if (!_map) {
1828a01b4d6SAlexey Marchuk 		return;
1838a01b4d6SAlexey Marchuk 	}
1848a01b4d6SAlexey Marchuk 
1858a01b4d6SAlexey Marchuk 	map = *_map;
1868a01b4d6SAlexey Marchuk 	if (!map) {
1878a01b4d6SAlexey Marchuk 		return;
1888a01b4d6SAlexey Marchuk 	}
1898a01b4d6SAlexey Marchuk 	*_map = NULL;
1908a01b4d6SAlexey Marchuk 
1918a01b4d6SAlexey Marchuk 	pthread_mutex_lock(&g_rdma_mr_maps_mutex);
1928a01b4d6SAlexey Marchuk 	assert(map->ref_count > 0);
1938a01b4d6SAlexey Marchuk 	map->ref_count--;
1948a01b4d6SAlexey Marchuk 	if (map->ref_count != 0) {
1958a01b4d6SAlexey Marchuk 		pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
1968a01b4d6SAlexey Marchuk 		return;
1978a01b4d6SAlexey Marchuk 	}
1988a01b4d6SAlexey Marchuk 
1998a01b4d6SAlexey Marchuk 	LIST_REMOVE(map, link);
2008a01b4d6SAlexey Marchuk 	pthread_mutex_unlock(&g_rdma_mr_maps_mutex);
2018a01b4d6SAlexey Marchuk 	if (map->map) {
2028a01b4d6SAlexey Marchuk 		spdk_mem_map_free(&map->map);
2038a01b4d6SAlexey Marchuk 	}
2048a01b4d6SAlexey Marchuk 	_rdma_free_mem_map(map);
2058a01b4d6SAlexey Marchuk }
2068a01b4d6SAlexey Marchuk 
2078a01b4d6SAlexey Marchuk int
2088a01b4d6SAlexey Marchuk spdk_rdma_utils_get_translation(struct spdk_rdma_utils_mem_map *map, void *address,
2098a01b4d6SAlexey Marchuk 				size_t length, struct spdk_rdma_utils_memory_translation *translation)
2108a01b4d6SAlexey Marchuk {
2118a01b4d6SAlexey Marchuk 	uint64_t real_length = length;
2128a01b4d6SAlexey Marchuk 
2138a01b4d6SAlexey Marchuk 	assert(map);
2148a01b4d6SAlexey Marchuk 	assert(address);
2158a01b4d6SAlexey Marchuk 	assert(translation);
2168a01b4d6SAlexey Marchuk 
2178a01b4d6SAlexey Marchuk 	if (map->hooks && map->hooks->get_rkey) {
2188a01b4d6SAlexey Marchuk 		translation->translation_type = SPDK_RDMA_UTILS_TRANSLATION_KEY;
2198a01b4d6SAlexey Marchuk 		translation->mr_or_key.key = spdk_mem_map_translate(map->map, (uint64_t)address, &real_length);
2208a01b4d6SAlexey Marchuk 	} else {
2218a01b4d6SAlexey Marchuk 		translation->translation_type = SPDK_RDMA_UTILS_TRANSLATION_MR;
2228a01b4d6SAlexey Marchuk 		translation->mr_or_key.mr = (struct ibv_mr *)spdk_mem_map_translate(map->map, (uint64_t)address,
2238a01b4d6SAlexey Marchuk 					    &real_length);
2248a01b4d6SAlexey Marchuk 		if (spdk_unlikely(!translation->mr_or_key.mr)) {
2258a01b4d6SAlexey Marchuk 			SPDK_ERRLOG("No translation for ptr %p, size %zu\n", address, length);
2268a01b4d6SAlexey Marchuk 			return -EINVAL;
2278a01b4d6SAlexey Marchuk 		}
2288a01b4d6SAlexey Marchuk 	}
2298a01b4d6SAlexey Marchuk 
2308a01b4d6SAlexey Marchuk 	assert(real_length >= length);
2318a01b4d6SAlexey Marchuk 
2328a01b4d6SAlexey Marchuk 	return 0;
2338a01b4d6SAlexey Marchuk }
2348a01b4d6SAlexey Marchuk 
2358a01b4d6SAlexey Marchuk 
2368a01b4d6SAlexey Marchuk static struct rdma_utils_device *
2378a01b4d6SAlexey Marchuk rdma_add_dev(struct ibv_context *context)
2388a01b4d6SAlexey Marchuk {
2398a01b4d6SAlexey Marchuk 	struct rdma_utils_device *dev;
2408a01b4d6SAlexey Marchuk 
2418a01b4d6SAlexey Marchuk 	dev = calloc(1, sizeof(*dev));
2428a01b4d6SAlexey Marchuk 	if (dev == NULL) {
2438a01b4d6SAlexey Marchuk 		SPDK_ERRLOG("Failed to allocate RDMA device object.\n");
2448a01b4d6SAlexey Marchuk 		return NULL;
2458a01b4d6SAlexey Marchuk 	}
2468a01b4d6SAlexey Marchuk 
2478a01b4d6SAlexey Marchuk 	dev->pd = ibv_alloc_pd(context);
2488a01b4d6SAlexey Marchuk 	if (dev->pd == NULL) {
2498a01b4d6SAlexey Marchuk 		SPDK_ERRLOG("ibv_alloc_pd() failed: %s (%d)\n", spdk_strerror(errno), errno);
2508a01b4d6SAlexey Marchuk 		free(dev);
2518a01b4d6SAlexey Marchuk 		return NULL;
2528a01b4d6SAlexey Marchuk 	}
2538a01b4d6SAlexey Marchuk 
2548a01b4d6SAlexey Marchuk 	dev->context = context;
2558a01b4d6SAlexey Marchuk 	TAILQ_INSERT_TAIL(&g_dev_list, dev, tailq);
2568a01b4d6SAlexey Marchuk 
2578a01b4d6SAlexey Marchuk 	return dev;
2588a01b4d6SAlexey Marchuk }
2598a01b4d6SAlexey Marchuk 
2608a01b4d6SAlexey Marchuk static void
2618a01b4d6SAlexey Marchuk rdma_remove_dev(struct rdma_utils_device *dev)
2628a01b4d6SAlexey Marchuk {
2638a01b4d6SAlexey Marchuk 	if (!dev->removed || dev->ref > 0) {
2648a01b4d6SAlexey Marchuk 		return;
2658a01b4d6SAlexey Marchuk 	}
2668a01b4d6SAlexey Marchuk 
2678a01b4d6SAlexey Marchuk 	/* Deallocate protection domain only if the device is already removed and
2688a01b4d6SAlexey Marchuk 	 * there is no reference.
2698a01b4d6SAlexey Marchuk 	 */
2708a01b4d6SAlexey Marchuk 	TAILQ_REMOVE(&g_dev_list, dev, tailq);
2718a01b4d6SAlexey Marchuk 	ibv_dealloc_pd(dev->pd);
2728a01b4d6SAlexey Marchuk 	free(dev);
2738a01b4d6SAlexey Marchuk }
2748a01b4d6SAlexey Marchuk 
2758a01b4d6SAlexey Marchuk static int
2768a01b4d6SAlexey Marchuk ctx_cmp(const void *_c1, const void *_c2)
2778a01b4d6SAlexey Marchuk {
2788a01b4d6SAlexey Marchuk 	struct ibv_context *c1 = *(struct ibv_context **)_c1;
2798a01b4d6SAlexey Marchuk 	struct ibv_context *c2 = *(struct ibv_context **)_c2;
2808a01b4d6SAlexey Marchuk 
2818a01b4d6SAlexey Marchuk 	return c1 < c2 ? -1 : c1 > c2;
2828a01b4d6SAlexey Marchuk }
2838a01b4d6SAlexey Marchuk 
2848a01b4d6SAlexey Marchuk static int
2858a01b4d6SAlexey Marchuk rdma_sync_dev_list(void)
2868a01b4d6SAlexey Marchuk {
2878a01b4d6SAlexey Marchuk 	struct ibv_context **new_ctx_list;
2888a01b4d6SAlexey Marchuk 	int i, j;
2898a01b4d6SAlexey Marchuk 	int num_devs = 0;
2908a01b4d6SAlexey Marchuk 
2918a01b4d6SAlexey Marchuk 	/*
2928a01b4d6SAlexey Marchuk 	 * rdma_get_devices() returns a NULL terminated array of opened RDMA devices,
2938a01b4d6SAlexey Marchuk 	 * and sets num_devs to the number of the returned devices.
2948a01b4d6SAlexey Marchuk 	 */
2958a01b4d6SAlexey Marchuk 	new_ctx_list = rdma_get_devices(&num_devs);
2968a01b4d6SAlexey Marchuk 	if (new_ctx_list == NULL) {
2978a01b4d6SAlexey Marchuk 		SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno);
2988a01b4d6SAlexey Marchuk 		return -ENODEV;
2998a01b4d6SAlexey Marchuk 	}
3008a01b4d6SAlexey Marchuk 
3018a01b4d6SAlexey Marchuk 	if (num_devs == 0) {
3028a01b4d6SAlexey Marchuk 		rdma_free_devices(new_ctx_list);
3038a01b4d6SAlexey Marchuk 		SPDK_ERRLOG("Returned RDMA device array was empty\n");
3048a01b4d6SAlexey Marchuk 		return -ENODEV;
3058a01b4d6SAlexey Marchuk 	}
3068a01b4d6SAlexey Marchuk 
3078a01b4d6SAlexey Marchuk 	/*
3088a01b4d6SAlexey Marchuk 	 * Sort new_ctx_list by addresses to update devices easily.
3098a01b4d6SAlexey Marchuk 	 */
3108a01b4d6SAlexey Marchuk 	qsort(new_ctx_list, num_devs, sizeof(struct ibv_context *), ctx_cmp);
3118a01b4d6SAlexey Marchuk 
3128a01b4d6SAlexey Marchuk 	if (g_ctx_list == NULL) {
3138a01b4d6SAlexey Marchuk 		/* If no old array, this is the first call. Add all devices. */
3148a01b4d6SAlexey Marchuk 		for (i = 0; new_ctx_list[i] != NULL; i++) {
3158a01b4d6SAlexey Marchuk 			rdma_add_dev(new_ctx_list[i]);
3168a01b4d6SAlexey Marchuk 		}
3178a01b4d6SAlexey Marchuk 
3188a01b4d6SAlexey Marchuk 		goto exit;
3198a01b4d6SAlexey Marchuk 	}
3208a01b4d6SAlexey Marchuk 
3218a01b4d6SAlexey Marchuk 	for (i = j = 0; new_ctx_list[i] != NULL || g_ctx_list[j] != NULL;) {
3228a01b4d6SAlexey Marchuk 		struct ibv_context *new_ctx = new_ctx_list[i];
3238a01b4d6SAlexey Marchuk 		struct ibv_context *old_ctx = g_ctx_list[j];
3248a01b4d6SAlexey Marchuk 		bool add = false, remove = false;
3258a01b4d6SAlexey Marchuk 
3268a01b4d6SAlexey Marchuk 		/*
3278a01b4d6SAlexey Marchuk 		 * If a context exists only in the new array, create a device for it,
3288a01b4d6SAlexey Marchuk 		 * or if a context exists only in the old array, try removing the
3298a01b4d6SAlexey Marchuk 		 * corresponding device.
3308a01b4d6SAlexey Marchuk 		 */
3318a01b4d6SAlexey Marchuk 
3328a01b4d6SAlexey Marchuk 		if (old_ctx == NULL) {
3338a01b4d6SAlexey Marchuk 			add = true;
3348a01b4d6SAlexey Marchuk 		} else if (new_ctx == NULL) {
3358a01b4d6SAlexey Marchuk 			remove = true;
3368a01b4d6SAlexey Marchuk 		} else if (new_ctx < old_ctx) {
3378a01b4d6SAlexey Marchuk 			add = true;
3388a01b4d6SAlexey Marchuk 		} else if (old_ctx < new_ctx) {
3398a01b4d6SAlexey Marchuk 			remove = true;
3408a01b4d6SAlexey Marchuk 		}
3418a01b4d6SAlexey Marchuk 
3428a01b4d6SAlexey Marchuk 		if (add) {
3438a01b4d6SAlexey Marchuk 			rdma_add_dev(new_ctx_list[i]);
3448a01b4d6SAlexey Marchuk 			i++;
3458a01b4d6SAlexey Marchuk 		} else if (remove) {
3468a01b4d6SAlexey Marchuk 			struct rdma_utils_device *dev, *tmp;
3478a01b4d6SAlexey Marchuk 
3488a01b4d6SAlexey Marchuk 			TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) {
3498a01b4d6SAlexey Marchuk 				if (dev->context == g_ctx_list[j]) {
3508a01b4d6SAlexey Marchuk 					dev->removed = true;
3518a01b4d6SAlexey Marchuk 					rdma_remove_dev(dev);
3528a01b4d6SAlexey Marchuk 				}
3538a01b4d6SAlexey Marchuk 			}
3548a01b4d6SAlexey Marchuk 			j++;
3558a01b4d6SAlexey Marchuk 		} else {
3568a01b4d6SAlexey Marchuk 			i++;
3578a01b4d6SAlexey Marchuk 			j++;
3588a01b4d6SAlexey Marchuk 		}
3598a01b4d6SAlexey Marchuk 	}
3608a01b4d6SAlexey Marchuk 
3618a01b4d6SAlexey Marchuk 	/* Free the old array. */
3628a01b4d6SAlexey Marchuk 	rdma_free_devices(g_ctx_list);
3638a01b4d6SAlexey Marchuk 
3648a01b4d6SAlexey Marchuk exit:
3658a01b4d6SAlexey Marchuk 	/*
3668a01b4d6SAlexey Marchuk 	 * Keep the newly returned array so that allocated protection domains
3678a01b4d6SAlexey Marchuk 	 * are not freed unexpectedly.
3688a01b4d6SAlexey Marchuk 	 */
3698a01b4d6SAlexey Marchuk 	g_ctx_list = new_ctx_list;
3708a01b4d6SAlexey Marchuk 	return 0;
3718a01b4d6SAlexey Marchuk }
3728a01b4d6SAlexey Marchuk 
3738a01b4d6SAlexey Marchuk struct ibv_pd *
3748a01b4d6SAlexey Marchuk spdk_rdma_utils_get_pd(struct ibv_context *context)
3758a01b4d6SAlexey Marchuk {
3768a01b4d6SAlexey Marchuk 	struct rdma_utils_device *dev;
3778a01b4d6SAlexey Marchuk 	int rc;
3788a01b4d6SAlexey Marchuk 
3798a01b4d6SAlexey Marchuk 	pthread_mutex_lock(&g_dev_mutex);
3808a01b4d6SAlexey Marchuk 
3818a01b4d6SAlexey Marchuk 	rc = rdma_sync_dev_list();
3828a01b4d6SAlexey Marchuk 	if (rc != 0) {
3838a01b4d6SAlexey Marchuk 		pthread_mutex_unlock(&g_dev_mutex);
3848a01b4d6SAlexey Marchuk 
3858a01b4d6SAlexey Marchuk 		SPDK_ERRLOG("Failed to sync RDMA device list\n");
3868a01b4d6SAlexey Marchuk 		return NULL;
3878a01b4d6SAlexey Marchuk 	}
3888a01b4d6SAlexey Marchuk 
3898a01b4d6SAlexey Marchuk 	TAILQ_FOREACH(dev, &g_dev_list, tailq) {
3908a01b4d6SAlexey Marchuk 		if (dev->context == context && !dev->removed) {
3918a01b4d6SAlexey Marchuk 			dev->ref++;
3928a01b4d6SAlexey Marchuk 			pthread_mutex_unlock(&g_dev_mutex);
3938a01b4d6SAlexey Marchuk 
3948a01b4d6SAlexey Marchuk 			return dev->pd;
3958a01b4d6SAlexey Marchuk 		}
3968a01b4d6SAlexey Marchuk 	}
3978a01b4d6SAlexey Marchuk 
3988a01b4d6SAlexey Marchuk 	pthread_mutex_unlock(&g_dev_mutex);
3998a01b4d6SAlexey Marchuk 
4008a01b4d6SAlexey Marchuk 	SPDK_ERRLOG("Failed to get PD\n");
4018a01b4d6SAlexey Marchuk 	return NULL;
4028a01b4d6SAlexey Marchuk }
4038a01b4d6SAlexey Marchuk 
4048a01b4d6SAlexey Marchuk void
4058a01b4d6SAlexey Marchuk spdk_rdma_utils_put_pd(struct ibv_pd *pd)
4068a01b4d6SAlexey Marchuk {
4078a01b4d6SAlexey Marchuk 	struct rdma_utils_device *dev, *tmp;
4088a01b4d6SAlexey Marchuk 
4098a01b4d6SAlexey Marchuk 	pthread_mutex_lock(&g_dev_mutex);
4108a01b4d6SAlexey Marchuk 
4118a01b4d6SAlexey Marchuk 	TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) {
4128a01b4d6SAlexey Marchuk 		if (dev->pd == pd) {
4138a01b4d6SAlexey Marchuk 			assert(dev->ref > 0);
4148a01b4d6SAlexey Marchuk 			dev->ref--;
4158a01b4d6SAlexey Marchuk 
4168a01b4d6SAlexey Marchuk 			rdma_remove_dev(dev);
4178a01b4d6SAlexey Marchuk 		}
4188a01b4d6SAlexey Marchuk 	}
4198a01b4d6SAlexey Marchuk 
4208a01b4d6SAlexey Marchuk 	rdma_sync_dev_list();
4218a01b4d6SAlexey Marchuk 
4228a01b4d6SAlexey Marchuk 	pthread_mutex_unlock(&g_dev_mutex);
4238a01b4d6SAlexey Marchuk }
4248a01b4d6SAlexey Marchuk 
4258a01b4d6SAlexey Marchuk __attribute__((destructor)) static void
4268a01b4d6SAlexey Marchuk _rdma_utils_fini(void)
4278a01b4d6SAlexey Marchuk {
4288a01b4d6SAlexey Marchuk 	struct rdma_utils_device *dev, *tmp;
4298a01b4d6SAlexey Marchuk 
4308a01b4d6SAlexey Marchuk 	TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) {
4318a01b4d6SAlexey Marchuk 		dev->removed = true;
4328a01b4d6SAlexey Marchuk 		dev->ref = 0;
4338a01b4d6SAlexey Marchuk 		rdma_remove_dev(dev);
4348a01b4d6SAlexey Marchuk 	}
4358a01b4d6SAlexey Marchuk 
4368a01b4d6SAlexey Marchuk 	if (g_ctx_list != NULL) {
4378a01b4d6SAlexey Marchuk 		rdma_free_devices(g_ctx_list);
4388a01b4d6SAlexey Marchuk 		g_ctx_list = NULL;
4398a01b4d6SAlexey Marchuk 	}
4408a01b4d6SAlexey Marchuk }
4410a9c0239SAlexey Marchuk 
4420a9c0239SAlexey Marchuk struct spdk_memory_domain *
4430a9c0239SAlexey Marchuk spdk_rdma_utils_get_memory_domain(struct ibv_pd *pd)
4440a9c0239SAlexey Marchuk {
4450a9c0239SAlexey Marchuk 	struct rdma_utils_memory_domain *domain = NULL;
446*141dc943SAlexey Marchuk 	struct spdk_memory_domain_ctx ctx = {};
4470a9c0239SAlexey Marchuk 	int rc;
4480a9c0239SAlexey Marchuk 
4490a9c0239SAlexey Marchuk 	pthread_mutex_lock(&g_memory_domains_lock);
4500a9c0239SAlexey Marchuk 
4510a9c0239SAlexey Marchuk 	TAILQ_FOREACH(domain, &g_memory_domains, link) {
4520a9c0239SAlexey Marchuk 		if (domain->pd == pd) {
4530a9c0239SAlexey Marchuk 			domain->ref++;
4540a9c0239SAlexey Marchuk 			pthread_mutex_unlock(&g_memory_domains_lock);
4550a9c0239SAlexey Marchuk 			return domain->domain;
4560a9c0239SAlexey Marchuk 		}
4570a9c0239SAlexey Marchuk 	}
4580a9c0239SAlexey Marchuk 
4590a9c0239SAlexey Marchuk 	domain = calloc(1, sizeof(*domain));
4600a9c0239SAlexey Marchuk 	if (!domain) {
4610a9c0239SAlexey Marchuk 		SPDK_ERRLOG("Memory allocation failed\n");
4620a9c0239SAlexey Marchuk 		pthread_mutex_unlock(&g_memory_domains_lock);
4630a9c0239SAlexey Marchuk 		return NULL;
4640a9c0239SAlexey Marchuk 	}
4650a9c0239SAlexey Marchuk 
4660a9c0239SAlexey Marchuk 	domain->rdma_ctx.size = sizeof(domain->rdma_ctx);
4670a9c0239SAlexey Marchuk 	domain->rdma_ctx.ibv_pd = pd;
4680a9c0239SAlexey Marchuk 	ctx.size = sizeof(ctx);
4690a9c0239SAlexey Marchuk 	ctx.user_ctx = &domain->rdma_ctx;
470*141dc943SAlexey Marchuk 	ctx.user_ctx_size = domain->rdma_ctx.size;
4710a9c0239SAlexey Marchuk 
4720a9c0239SAlexey Marchuk 	rc = spdk_memory_domain_create(&domain->domain, SPDK_DMA_DEVICE_TYPE_RDMA, &ctx,
4730a9c0239SAlexey Marchuk 				       SPDK_RDMA_DMA_DEVICE);
4740a9c0239SAlexey Marchuk 	if (rc) {
4750a9c0239SAlexey Marchuk 		SPDK_ERRLOG("Failed to create memory domain\n");
4760a9c0239SAlexey Marchuk 		free(domain);
4770a9c0239SAlexey Marchuk 		pthread_mutex_unlock(&g_memory_domains_lock);
4780a9c0239SAlexey Marchuk 		return NULL;
4790a9c0239SAlexey Marchuk 	}
4800a9c0239SAlexey Marchuk 
4810a9c0239SAlexey Marchuk 	domain->pd = pd;
4820a9c0239SAlexey Marchuk 	domain->ref = 1;
4830a9c0239SAlexey Marchuk 	TAILQ_INSERT_TAIL(&g_memory_domains, domain, link);
4840a9c0239SAlexey Marchuk 
4850a9c0239SAlexey Marchuk 	pthread_mutex_unlock(&g_memory_domains_lock);
4860a9c0239SAlexey Marchuk 
4870a9c0239SAlexey Marchuk 	return domain->domain;
4880a9c0239SAlexey Marchuk }
4890a9c0239SAlexey Marchuk 
4900a9c0239SAlexey Marchuk int
4910a9c0239SAlexey Marchuk spdk_rdma_utils_put_memory_domain(struct spdk_memory_domain *_domain)
4920a9c0239SAlexey Marchuk {
4930a9c0239SAlexey Marchuk 	struct rdma_utils_memory_domain *domain = NULL;
4940a9c0239SAlexey Marchuk 
4950a9c0239SAlexey Marchuk 	if (!_domain) {
4960a9c0239SAlexey Marchuk 		return 0;
4970a9c0239SAlexey Marchuk 	}
4980a9c0239SAlexey Marchuk 
4990a9c0239SAlexey Marchuk 	pthread_mutex_lock(&g_memory_domains_lock);
5000a9c0239SAlexey Marchuk 
5010a9c0239SAlexey Marchuk 	TAILQ_FOREACH(domain, &g_memory_domains, link) {
5020a9c0239SAlexey Marchuk 		if (domain->domain == _domain) {
5030a9c0239SAlexey Marchuk 			break;
5040a9c0239SAlexey Marchuk 		}
5050a9c0239SAlexey Marchuk 	}
5060a9c0239SAlexey Marchuk 
5070a9c0239SAlexey Marchuk 	if (!domain) {
5080a9c0239SAlexey Marchuk 		pthread_mutex_unlock(&g_memory_domains_lock);
5090a9c0239SAlexey Marchuk 		return -ENODEV;
5100a9c0239SAlexey Marchuk 	}
5110a9c0239SAlexey Marchuk 	assert(domain->ref > 0);
5120a9c0239SAlexey Marchuk 
5130a9c0239SAlexey Marchuk 	domain->ref--;
5140a9c0239SAlexey Marchuk 
5150a9c0239SAlexey Marchuk 	if (domain->ref == 0) {
5160a9c0239SAlexey Marchuk 		spdk_memory_domain_destroy(domain->domain);
5170a9c0239SAlexey Marchuk 		TAILQ_REMOVE(&g_memory_domains, domain, link);
5180a9c0239SAlexey Marchuk 		free(domain);
5190a9c0239SAlexey Marchuk 	}
5200a9c0239SAlexey Marchuk 
5210a9c0239SAlexey Marchuk 	pthread_mutex_unlock(&g_memory_domains_lock);
5220a9c0239SAlexey Marchuk 
5230a9c0239SAlexey Marchuk 	return 0;
5240a9c0239SAlexey Marchuk }
52520b14cdcSJim Harris 
52620b14cdcSJim Harris int32_t
52720b14cdcSJim Harris spdk_rdma_cm_id_get_numa_id(struct rdma_cm_id *cm_id)
52820b14cdcSJim Harris {
52920b14cdcSJim Harris 	struct sockaddr	*sa;
53020b14cdcSJim Harris 	char		addr[64];
53120b14cdcSJim Harris 	char		ifc[64];
53220b14cdcSJim Harris 	uint32_t	numa_id;
53320b14cdcSJim Harris 	int		rc;
53420b14cdcSJim Harris 
53520b14cdcSJim Harris 	sa = rdma_get_local_addr(cm_id);
53620b14cdcSJim Harris 	if (sa == NULL) {
53720b14cdcSJim Harris 		return SPDK_ENV_NUMA_ID_ANY;
53820b14cdcSJim Harris 	}
53920b14cdcSJim Harris 	rc = spdk_net_get_address_string(sa, addr, sizeof(addr));
54020b14cdcSJim Harris 	if (rc) {
54120b14cdcSJim Harris 		return SPDK_ENV_NUMA_ID_ANY;
54220b14cdcSJim Harris 	}
54320b14cdcSJim Harris 	rc = spdk_net_get_interface_name(addr, ifc, sizeof(ifc));
54420b14cdcSJim Harris 	if (rc) {
54520b14cdcSJim Harris 		return SPDK_ENV_NUMA_ID_ANY;
54620b14cdcSJim Harris 	}
54720b14cdcSJim Harris 	rc = spdk_read_sysfs_attribute_uint32(&numa_id,
54820b14cdcSJim Harris 					      "/sys/class/net/%s/device/numa_node", ifc);
54920b14cdcSJim Harris 	if (rc || numa_id > INT32_MAX) {
55020b14cdcSJim Harris 		return SPDK_ENV_NUMA_ID_ANY;
55120b14cdcSJim Harris 	}
55220b14cdcSJim Harris 	return (int32_t)numa_id;
55320b14cdcSJim Harris }
554