18a01b4d6SAlexey Marchuk /* SPDX-License-Identifier: BSD-3-Clause 28a01b4d6SAlexey Marchuk * Copyright (c) Intel Corporation. All rights reserved. 38a01b4d6SAlexey Marchuk * Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 48a01b4d6SAlexey Marchuk */ 58a01b4d6SAlexey Marchuk 68a01b4d6SAlexey Marchuk #include "spdk_internal/rdma_utils.h" 78a01b4d6SAlexey Marchuk 88a01b4d6SAlexey Marchuk #include "spdk/log.h" 98a01b4d6SAlexey Marchuk #include "spdk/string.h" 108a01b4d6SAlexey Marchuk #include "spdk/likely.h" 1120b14cdcSJim Harris #include "spdk/net.h" 1220b14cdcSJim Harris #include "spdk/file.h" 138a01b4d6SAlexey Marchuk 148a01b4d6SAlexey Marchuk #include "spdk_internal/assert.h" 158a01b4d6SAlexey Marchuk 168a01b4d6SAlexey Marchuk #include <rdma/rdma_cma.h> 178a01b4d6SAlexey Marchuk #include <rdma/rdma_verbs.h> 188a01b4d6SAlexey Marchuk 198a01b4d6SAlexey Marchuk struct rdma_utils_device { 208a01b4d6SAlexey Marchuk struct ibv_pd *pd; 218a01b4d6SAlexey Marchuk struct ibv_context *context; 228a01b4d6SAlexey Marchuk int ref; 238a01b4d6SAlexey Marchuk bool removed; 248a01b4d6SAlexey Marchuk TAILQ_ENTRY(rdma_utils_device) tailq; 258a01b4d6SAlexey Marchuk }; 268a01b4d6SAlexey Marchuk 278a01b4d6SAlexey Marchuk struct spdk_rdma_utils_mem_map { 288a01b4d6SAlexey Marchuk struct spdk_mem_map *map; 298a01b4d6SAlexey Marchuk struct ibv_pd *pd; 308a01b4d6SAlexey Marchuk struct spdk_nvme_rdma_hooks *hooks; 318a01b4d6SAlexey Marchuk uint32_t ref_count; 328ffb2c09SAlexey Marchuk uint32_t access_flags; 338a01b4d6SAlexey Marchuk LIST_ENTRY(spdk_rdma_utils_mem_map) link; 348a01b4d6SAlexey Marchuk }; 358a01b4d6SAlexey Marchuk 360a9c0239SAlexey Marchuk struct rdma_utils_memory_domain { 370a9c0239SAlexey Marchuk TAILQ_ENTRY(rdma_utils_memory_domain) link; 380a9c0239SAlexey Marchuk uint32_t ref; 390a9c0239SAlexey Marchuk enum spdk_dma_device_type type; 400a9c0239SAlexey Marchuk struct ibv_pd *pd; 410a9c0239SAlexey Marchuk struct spdk_memory_domain *domain; 420a9c0239SAlexey Marchuk struct spdk_memory_domain_rdma_ctx rdma_ctx; 430a9c0239SAlexey Marchuk }; 440a9c0239SAlexey Marchuk 458a01b4d6SAlexey Marchuk static pthread_mutex_t g_dev_mutex = PTHREAD_MUTEX_INITIALIZER; 468a01b4d6SAlexey Marchuk static struct ibv_context **g_ctx_list = NULL; 478a01b4d6SAlexey Marchuk static TAILQ_HEAD(, rdma_utils_device) g_dev_list = TAILQ_HEAD_INITIALIZER(g_dev_list); 488a01b4d6SAlexey Marchuk 498a01b4d6SAlexey Marchuk static LIST_HEAD(, spdk_rdma_utils_mem_map) g_rdma_utils_mr_maps = LIST_HEAD_INITIALIZER( 508a01b4d6SAlexey Marchuk &g_rdma_utils_mr_maps); 518a01b4d6SAlexey Marchuk static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER; 528a01b4d6SAlexey Marchuk 530a9c0239SAlexey Marchuk static TAILQ_HEAD(, rdma_utils_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER( 540a9c0239SAlexey Marchuk g_memory_domains); 550a9c0239SAlexey Marchuk static pthread_mutex_t g_memory_domains_lock = PTHREAD_MUTEX_INITIALIZER; 560a9c0239SAlexey Marchuk 578a01b4d6SAlexey Marchuk static int 588a01b4d6SAlexey Marchuk rdma_utils_mem_notify(void *cb_ctx, struct spdk_mem_map *map, 598a01b4d6SAlexey Marchuk enum spdk_mem_map_notify_action action, 608a01b4d6SAlexey Marchuk void *vaddr, size_t size) 618a01b4d6SAlexey Marchuk { 628a01b4d6SAlexey Marchuk struct spdk_rdma_utils_mem_map *rmap = cb_ctx; 638a01b4d6SAlexey Marchuk struct ibv_pd *pd = rmap->pd; 648a01b4d6SAlexey Marchuk struct ibv_mr *mr; 658ffb2c09SAlexey Marchuk uint32_t access_flags; 668a01b4d6SAlexey Marchuk int rc; 678a01b4d6SAlexey Marchuk 688a01b4d6SAlexey Marchuk switch (action) { 698a01b4d6SAlexey Marchuk case SPDK_MEM_MAP_NOTIFY_REGISTER: 708a01b4d6SAlexey Marchuk if (rmap->hooks && rmap->hooks->get_rkey) { 718a01b4d6SAlexey Marchuk rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, 728a01b4d6SAlexey Marchuk rmap->hooks->get_rkey(pd, vaddr, size)); 738a01b4d6SAlexey Marchuk } else { 748ffb2c09SAlexey Marchuk access_flags = rmap->access_flags; 758a01b4d6SAlexey Marchuk #ifdef IBV_ACCESS_OPTIONAL_FIRST 768a01b4d6SAlexey Marchuk access_flags |= IBV_ACCESS_RELAXED_ORDERING; 778a01b4d6SAlexey Marchuk #endif 788a01b4d6SAlexey Marchuk mr = ibv_reg_mr(pd, vaddr, size, access_flags); 798a01b4d6SAlexey Marchuk if (mr == NULL) { 808a01b4d6SAlexey Marchuk SPDK_ERRLOG("ibv_reg_mr() failed\n"); 818a01b4d6SAlexey Marchuk return -1; 828a01b4d6SAlexey Marchuk } else { 838a01b4d6SAlexey Marchuk rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); 848a01b4d6SAlexey Marchuk } 858a01b4d6SAlexey Marchuk } 868a01b4d6SAlexey Marchuk break; 878a01b4d6SAlexey Marchuk case SPDK_MEM_MAP_NOTIFY_UNREGISTER: 888a01b4d6SAlexey Marchuk if (rmap->hooks == NULL || rmap->hooks->get_rkey == NULL) { 898a01b4d6SAlexey Marchuk mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); 908a01b4d6SAlexey Marchuk if (mr) { 918a01b4d6SAlexey Marchuk ibv_dereg_mr(mr); 928a01b4d6SAlexey Marchuk } 938a01b4d6SAlexey Marchuk } 948a01b4d6SAlexey Marchuk rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); 958a01b4d6SAlexey Marchuk break; 968a01b4d6SAlexey Marchuk default: 978a01b4d6SAlexey Marchuk SPDK_UNREACHABLE(); 988a01b4d6SAlexey Marchuk } 998a01b4d6SAlexey Marchuk 1008a01b4d6SAlexey Marchuk return rc; 1018a01b4d6SAlexey Marchuk } 1028a01b4d6SAlexey Marchuk 1038a01b4d6SAlexey Marchuk static int 1048a01b4d6SAlexey Marchuk rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2) 1058a01b4d6SAlexey Marchuk { 1068a01b4d6SAlexey Marchuk /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */ 1078a01b4d6SAlexey Marchuk return addr_1 == addr_2; 1088a01b4d6SAlexey Marchuk } 1098a01b4d6SAlexey Marchuk 1108a01b4d6SAlexey Marchuk const struct spdk_mem_map_ops g_rdma_map_ops = { 1118a01b4d6SAlexey Marchuk .notify_cb = rdma_utils_mem_notify, 1128a01b4d6SAlexey Marchuk .are_contiguous = rdma_check_contiguous_entries 1138a01b4d6SAlexey Marchuk }; 1148a01b4d6SAlexey Marchuk 1158a01b4d6SAlexey Marchuk static void 1168a01b4d6SAlexey Marchuk _rdma_free_mem_map(struct spdk_rdma_utils_mem_map *map) 1178a01b4d6SAlexey Marchuk { 1188a01b4d6SAlexey Marchuk assert(map); 1198a01b4d6SAlexey Marchuk 1208a01b4d6SAlexey Marchuk if (map->hooks) { 1218a01b4d6SAlexey Marchuk spdk_free(map); 1228a01b4d6SAlexey Marchuk } else { 1238a01b4d6SAlexey Marchuk free(map); 1248a01b4d6SAlexey Marchuk } 1258a01b4d6SAlexey Marchuk } 1268a01b4d6SAlexey Marchuk 1278a01b4d6SAlexey Marchuk struct spdk_rdma_utils_mem_map * 1288a01b4d6SAlexey Marchuk spdk_rdma_utils_create_mem_map(struct ibv_pd *pd, struct spdk_nvme_rdma_hooks *hooks, 1298ffb2c09SAlexey Marchuk uint32_t access_flags) 1308a01b4d6SAlexey Marchuk { 1318a01b4d6SAlexey Marchuk struct spdk_rdma_utils_mem_map *map; 1328a01b4d6SAlexey Marchuk 1338ffb2c09SAlexey Marchuk if (pd->context->device->transport_type == IBV_TRANSPORT_IWARP) { 1348ffb2c09SAlexey Marchuk /* IWARP requires REMOTE_WRITE permission for RDMA_READ operation */ 1358ffb2c09SAlexey Marchuk access_flags |= IBV_ACCESS_REMOTE_WRITE; 1368ffb2c09SAlexey Marchuk } 1378ffb2c09SAlexey Marchuk 1388a01b4d6SAlexey Marchuk pthread_mutex_lock(&g_rdma_mr_maps_mutex); 1398a01b4d6SAlexey Marchuk /* Look up existing mem map registration for this pd */ 1408a01b4d6SAlexey Marchuk LIST_FOREACH(map, &g_rdma_utils_mr_maps, link) { 1418ffb2c09SAlexey Marchuk if (map->pd == pd && map->access_flags == access_flags) { 1428a01b4d6SAlexey Marchuk map->ref_count++; 1438a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1448a01b4d6SAlexey Marchuk return map; 1458a01b4d6SAlexey Marchuk } 1468a01b4d6SAlexey Marchuk } 1478a01b4d6SAlexey Marchuk 1488a01b4d6SAlexey Marchuk if (hooks) { 149186b109dSJim Harris map = spdk_zmalloc(sizeof(*map), 0, NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA); 1508a01b4d6SAlexey Marchuk } else { 1518a01b4d6SAlexey Marchuk map = calloc(1, sizeof(*map)); 1528a01b4d6SAlexey Marchuk } 1538a01b4d6SAlexey Marchuk if (!map) { 1548a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1558a01b4d6SAlexey Marchuk SPDK_ERRLOG("Memory allocation failed\n"); 1568a01b4d6SAlexey Marchuk return NULL; 1578a01b4d6SAlexey Marchuk } 1588a01b4d6SAlexey Marchuk map->pd = pd; 1598a01b4d6SAlexey Marchuk map->ref_count = 1; 1608a01b4d6SAlexey Marchuk map->hooks = hooks; 1618ffb2c09SAlexey Marchuk map->access_flags = access_flags; 1628a01b4d6SAlexey Marchuk map->map = spdk_mem_map_alloc(0, &g_rdma_map_ops, map); 1638a01b4d6SAlexey Marchuk if (!map->map) { 1648a01b4d6SAlexey Marchuk SPDK_ERRLOG("Unable to create memory map\n"); 1658a01b4d6SAlexey Marchuk _rdma_free_mem_map(map); 1668a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1678a01b4d6SAlexey Marchuk return NULL; 1688a01b4d6SAlexey Marchuk } 1698a01b4d6SAlexey Marchuk LIST_INSERT_HEAD(&g_rdma_utils_mr_maps, map, link); 1708a01b4d6SAlexey Marchuk 1718a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1728a01b4d6SAlexey Marchuk 1738a01b4d6SAlexey Marchuk return map; 1748a01b4d6SAlexey Marchuk } 1758a01b4d6SAlexey Marchuk 1768a01b4d6SAlexey Marchuk void 1778a01b4d6SAlexey Marchuk spdk_rdma_utils_free_mem_map(struct spdk_rdma_utils_mem_map **_map) 1788a01b4d6SAlexey Marchuk { 1798a01b4d6SAlexey Marchuk struct spdk_rdma_utils_mem_map *map; 1808a01b4d6SAlexey Marchuk 1818a01b4d6SAlexey Marchuk if (!_map) { 1828a01b4d6SAlexey Marchuk return; 1838a01b4d6SAlexey Marchuk } 1848a01b4d6SAlexey Marchuk 1858a01b4d6SAlexey Marchuk map = *_map; 1868a01b4d6SAlexey Marchuk if (!map) { 1878a01b4d6SAlexey Marchuk return; 1888a01b4d6SAlexey Marchuk } 1898a01b4d6SAlexey Marchuk *_map = NULL; 1908a01b4d6SAlexey Marchuk 1918a01b4d6SAlexey Marchuk pthread_mutex_lock(&g_rdma_mr_maps_mutex); 1928a01b4d6SAlexey Marchuk assert(map->ref_count > 0); 1938a01b4d6SAlexey Marchuk map->ref_count--; 1948a01b4d6SAlexey Marchuk if (map->ref_count != 0) { 1958a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 1968a01b4d6SAlexey Marchuk return; 1978a01b4d6SAlexey Marchuk } 1988a01b4d6SAlexey Marchuk 1998a01b4d6SAlexey Marchuk LIST_REMOVE(map, link); 2008a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 2018a01b4d6SAlexey Marchuk if (map->map) { 2028a01b4d6SAlexey Marchuk spdk_mem_map_free(&map->map); 2038a01b4d6SAlexey Marchuk } 2048a01b4d6SAlexey Marchuk _rdma_free_mem_map(map); 2058a01b4d6SAlexey Marchuk } 2068a01b4d6SAlexey Marchuk 2078a01b4d6SAlexey Marchuk int 2088a01b4d6SAlexey Marchuk spdk_rdma_utils_get_translation(struct spdk_rdma_utils_mem_map *map, void *address, 2098a01b4d6SAlexey Marchuk size_t length, struct spdk_rdma_utils_memory_translation *translation) 2108a01b4d6SAlexey Marchuk { 2118a01b4d6SAlexey Marchuk uint64_t real_length = length; 2128a01b4d6SAlexey Marchuk 2138a01b4d6SAlexey Marchuk assert(map); 2148a01b4d6SAlexey Marchuk assert(address); 2158a01b4d6SAlexey Marchuk assert(translation); 2168a01b4d6SAlexey Marchuk 2178a01b4d6SAlexey Marchuk if (map->hooks && map->hooks->get_rkey) { 2188a01b4d6SAlexey Marchuk translation->translation_type = SPDK_RDMA_UTILS_TRANSLATION_KEY; 2198a01b4d6SAlexey Marchuk translation->mr_or_key.key = spdk_mem_map_translate(map->map, (uint64_t)address, &real_length); 2208a01b4d6SAlexey Marchuk } else { 2218a01b4d6SAlexey Marchuk translation->translation_type = SPDK_RDMA_UTILS_TRANSLATION_MR; 2228a01b4d6SAlexey Marchuk translation->mr_or_key.mr = (struct ibv_mr *)spdk_mem_map_translate(map->map, (uint64_t)address, 2238a01b4d6SAlexey Marchuk &real_length); 2248a01b4d6SAlexey Marchuk if (spdk_unlikely(!translation->mr_or_key.mr)) { 2258a01b4d6SAlexey Marchuk SPDK_ERRLOG("No translation for ptr %p, size %zu\n", address, length); 2268a01b4d6SAlexey Marchuk return -EINVAL; 2278a01b4d6SAlexey Marchuk } 2288a01b4d6SAlexey Marchuk } 2298a01b4d6SAlexey Marchuk 2308a01b4d6SAlexey Marchuk assert(real_length >= length); 2318a01b4d6SAlexey Marchuk 2328a01b4d6SAlexey Marchuk return 0; 2338a01b4d6SAlexey Marchuk } 2348a01b4d6SAlexey Marchuk 2358a01b4d6SAlexey Marchuk 2368a01b4d6SAlexey Marchuk static struct rdma_utils_device * 2378a01b4d6SAlexey Marchuk rdma_add_dev(struct ibv_context *context) 2388a01b4d6SAlexey Marchuk { 2398a01b4d6SAlexey Marchuk struct rdma_utils_device *dev; 2408a01b4d6SAlexey Marchuk 2418a01b4d6SAlexey Marchuk dev = calloc(1, sizeof(*dev)); 2428a01b4d6SAlexey Marchuk if (dev == NULL) { 2438a01b4d6SAlexey Marchuk SPDK_ERRLOG("Failed to allocate RDMA device object.\n"); 2448a01b4d6SAlexey Marchuk return NULL; 2458a01b4d6SAlexey Marchuk } 2468a01b4d6SAlexey Marchuk 2478a01b4d6SAlexey Marchuk dev->pd = ibv_alloc_pd(context); 2488a01b4d6SAlexey Marchuk if (dev->pd == NULL) { 2498a01b4d6SAlexey Marchuk SPDK_ERRLOG("ibv_alloc_pd() failed: %s (%d)\n", spdk_strerror(errno), errno); 2508a01b4d6SAlexey Marchuk free(dev); 2518a01b4d6SAlexey Marchuk return NULL; 2528a01b4d6SAlexey Marchuk } 2538a01b4d6SAlexey Marchuk 2548a01b4d6SAlexey Marchuk dev->context = context; 2558a01b4d6SAlexey Marchuk TAILQ_INSERT_TAIL(&g_dev_list, dev, tailq); 2568a01b4d6SAlexey Marchuk 2578a01b4d6SAlexey Marchuk return dev; 2588a01b4d6SAlexey Marchuk } 2598a01b4d6SAlexey Marchuk 2608a01b4d6SAlexey Marchuk static void 2618a01b4d6SAlexey Marchuk rdma_remove_dev(struct rdma_utils_device *dev) 2628a01b4d6SAlexey Marchuk { 2638a01b4d6SAlexey Marchuk if (!dev->removed || dev->ref > 0) { 2648a01b4d6SAlexey Marchuk return; 2658a01b4d6SAlexey Marchuk } 2668a01b4d6SAlexey Marchuk 2678a01b4d6SAlexey Marchuk /* Deallocate protection domain only if the device is already removed and 2688a01b4d6SAlexey Marchuk * there is no reference. 2698a01b4d6SAlexey Marchuk */ 2708a01b4d6SAlexey Marchuk TAILQ_REMOVE(&g_dev_list, dev, tailq); 2718a01b4d6SAlexey Marchuk ibv_dealloc_pd(dev->pd); 2728a01b4d6SAlexey Marchuk free(dev); 2738a01b4d6SAlexey Marchuk } 2748a01b4d6SAlexey Marchuk 2758a01b4d6SAlexey Marchuk static int 2768a01b4d6SAlexey Marchuk ctx_cmp(const void *_c1, const void *_c2) 2778a01b4d6SAlexey Marchuk { 2788a01b4d6SAlexey Marchuk struct ibv_context *c1 = *(struct ibv_context **)_c1; 2798a01b4d6SAlexey Marchuk struct ibv_context *c2 = *(struct ibv_context **)_c2; 2808a01b4d6SAlexey Marchuk 2818a01b4d6SAlexey Marchuk return c1 < c2 ? -1 : c1 > c2; 2828a01b4d6SAlexey Marchuk } 2838a01b4d6SAlexey Marchuk 2848a01b4d6SAlexey Marchuk static int 2858a01b4d6SAlexey Marchuk rdma_sync_dev_list(void) 2868a01b4d6SAlexey Marchuk { 2878a01b4d6SAlexey Marchuk struct ibv_context **new_ctx_list; 2888a01b4d6SAlexey Marchuk int i, j; 2898a01b4d6SAlexey Marchuk int num_devs = 0; 2908a01b4d6SAlexey Marchuk 2918a01b4d6SAlexey Marchuk /* 2928a01b4d6SAlexey Marchuk * rdma_get_devices() returns a NULL terminated array of opened RDMA devices, 2938a01b4d6SAlexey Marchuk * and sets num_devs to the number of the returned devices. 2948a01b4d6SAlexey Marchuk */ 2958a01b4d6SAlexey Marchuk new_ctx_list = rdma_get_devices(&num_devs); 2968a01b4d6SAlexey Marchuk if (new_ctx_list == NULL) { 2978a01b4d6SAlexey Marchuk SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno); 2988a01b4d6SAlexey Marchuk return -ENODEV; 2998a01b4d6SAlexey Marchuk } 3008a01b4d6SAlexey Marchuk 3018a01b4d6SAlexey Marchuk if (num_devs == 0) { 3028a01b4d6SAlexey Marchuk rdma_free_devices(new_ctx_list); 3038a01b4d6SAlexey Marchuk SPDK_ERRLOG("Returned RDMA device array was empty\n"); 3048a01b4d6SAlexey Marchuk return -ENODEV; 3058a01b4d6SAlexey Marchuk } 3068a01b4d6SAlexey Marchuk 3078a01b4d6SAlexey Marchuk /* 3088a01b4d6SAlexey Marchuk * Sort new_ctx_list by addresses to update devices easily. 3098a01b4d6SAlexey Marchuk */ 3108a01b4d6SAlexey Marchuk qsort(new_ctx_list, num_devs, sizeof(struct ibv_context *), ctx_cmp); 3118a01b4d6SAlexey Marchuk 3128a01b4d6SAlexey Marchuk if (g_ctx_list == NULL) { 3138a01b4d6SAlexey Marchuk /* If no old array, this is the first call. Add all devices. */ 3148a01b4d6SAlexey Marchuk for (i = 0; new_ctx_list[i] != NULL; i++) { 3158a01b4d6SAlexey Marchuk rdma_add_dev(new_ctx_list[i]); 3168a01b4d6SAlexey Marchuk } 3178a01b4d6SAlexey Marchuk 3188a01b4d6SAlexey Marchuk goto exit; 3198a01b4d6SAlexey Marchuk } 3208a01b4d6SAlexey Marchuk 3218a01b4d6SAlexey Marchuk for (i = j = 0; new_ctx_list[i] != NULL || g_ctx_list[j] != NULL;) { 3228a01b4d6SAlexey Marchuk struct ibv_context *new_ctx = new_ctx_list[i]; 3238a01b4d6SAlexey Marchuk struct ibv_context *old_ctx = g_ctx_list[j]; 3248a01b4d6SAlexey Marchuk bool add = false, remove = false; 3258a01b4d6SAlexey Marchuk 3268a01b4d6SAlexey Marchuk /* 3278a01b4d6SAlexey Marchuk * If a context exists only in the new array, create a device for it, 3288a01b4d6SAlexey Marchuk * or if a context exists only in the old array, try removing the 3298a01b4d6SAlexey Marchuk * corresponding device. 3308a01b4d6SAlexey Marchuk */ 3318a01b4d6SAlexey Marchuk 3328a01b4d6SAlexey Marchuk if (old_ctx == NULL) { 3338a01b4d6SAlexey Marchuk add = true; 3348a01b4d6SAlexey Marchuk } else if (new_ctx == NULL) { 3358a01b4d6SAlexey Marchuk remove = true; 3368a01b4d6SAlexey Marchuk } else if (new_ctx < old_ctx) { 3378a01b4d6SAlexey Marchuk add = true; 3388a01b4d6SAlexey Marchuk } else if (old_ctx < new_ctx) { 3398a01b4d6SAlexey Marchuk remove = true; 3408a01b4d6SAlexey Marchuk } 3418a01b4d6SAlexey Marchuk 3428a01b4d6SAlexey Marchuk if (add) { 3438a01b4d6SAlexey Marchuk rdma_add_dev(new_ctx_list[i]); 3448a01b4d6SAlexey Marchuk i++; 3458a01b4d6SAlexey Marchuk } else if (remove) { 3468a01b4d6SAlexey Marchuk struct rdma_utils_device *dev, *tmp; 3478a01b4d6SAlexey Marchuk 3488a01b4d6SAlexey Marchuk TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { 3498a01b4d6SAlexey Marchuk if (dev->context == g_ctx_list[j]) { 3508a01b4d6SAlexey Marchuk dev->removed = true; 3518a01b4d6SAlexey Marchuk rdma_remove_dev(dev); 3528a01b4d6SAlexey Marchuk } 3538a01b4d6SAlexey Marchuk } 3548a01b4d6SAlexey Marchuk j++; 3558a01b4d6SAlexey Marchuk } else { 3568a01b4d6SAlexey Marchuk i++; 3578a01b4d6SAlexey Marchuk j++; 3588a01b4d6SAlexey Marchuk } 3598a01b4d6SAlexey Marchuk } 3608a01b4d6SAlexey Marchuk 3618a01b4d6SAlexey Marchuk /* Free the old array. */ 3628a01b4d6SAlexey Marchuk rdma_free_devices(g_ctx_list); 3638a01b4d6SAlexey Marchuk 3648a01b4d6SAlexey Marchuk exit: 3658a01b4d6SAlexey Marchuk /* 3668a01b4d6SAlexey Marchuk * Keep the newly returned array so that allocated protection domains 3678a01b4d6SAlexey Marchuk * are not freed unexpectedly. 3688a01b4d6SAlexey Marchuk */ 3698a01b4d6SAlexey Marchuk g_ctx_list = new_ctx_list; 3708a01b4d6SAlexey Marchuk return 0; 3718a01b4d6SAlexey Marchuk } 3728a01b4d6SAlexey Marchuk 3738a01b4d6SAlexey Marchuk struct ibv_pd * 3748a01b4d6SAlexey Marchuk spdk_rdma_utils_get_pd(struct ibv_context *context) 3758a01b4d6SAlexey Marchuk { 3768a01b4d6SAlexey Marchuk struct rdma_utils_device *dev; 3778a01b4d6SAlexey Marchuk int rc; 3788a01b4d6SAlexey Marchuk 3798a01b4d6SAlexey Marchuk pthread_mutex_lock(&g_dev_mutex); 3808a01b4d6SAlexey Marchuk 3818a01b4d6SAlexey Marchuk rc = rdma_sync_dev_list(); 3828a01b4d6SAlexey Marchuk if (rc != 0) { 3838a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_dev_mutex); 3848a01b4d6SAlexey Marchuk 3858a01b4d6SAlexey Marchuk SPDK_ERRLOG("Failed to sync RDMA device list\n"); 3868a01b4d6SAlexey Marchuk return NULL; 3878a01b4d6SAlexey Marchuk } 3888a01b4d6SAlexey Marchuk 3898a01b4d6SAlexey Marchuk TAILQ_FOREACH(dev, &g_dev_list, tailq) { 3908a01b4d6SAlexey Marchuk if (dev->context == context && !dev->removed) { 3918a01b4d6SAlexey Marchuk dev->ref++; 3928a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_dev_mutex); 3938a01b4d6SAlexey Marchuk 3948a01b4d6SAlexey Marchuk return dev->pd; 3958a01b4d6SAlexey Marchuk } 3968a01b4d6SAlexey Marchuk } 3978a01b4d6SAlexey Marchuk 3988a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_dev_mutex); 3998a01b4d6SAlexey Marchuk 4008a01b4d6SAlexey Marchuk SPDK_ERRLOG("Failed to get PD\n"); 4018a01b4d6SAlexey Marchuk return NULL; 4028a01b4d6SAlexey Marchuk } 4038a01b4d6SAlexey Marchuk 4048a01b4d6SAlexey Marchuk void 4058a01b4d6SAlexey Marchuk spdk_rdma_utils_put_pd(struct ibv_pd *pd) 4068a01b4d6SAlexey Marchuk { 4078a01b4d6SAlexey Marchuk struct rdma_utils_device *dev, *tmp; 4088a01b4d6SAlexey Marchuk 4098a01b4d6SAlexey Marchuk pthread_mutex_lock(&g_dev_mutex); 4108a01b4d6SAlexey Marchuk 4118a01b4d6SAlexey Marchuk TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { 4128a01b4d6SAlexey Marchuk if (dev->pd == pd) { 4138a01b4d6SAlexey Marchuk assert(dev->ref > 0); 4148a01b4d6SAlexey Marchuk dev->ref--; 4158a01b4d6SAlexey Marchuk 4168a01b4d6SAlexey Marchuk rdma_remove_dev(dev); 4178a01b4d6SAlexey Marchuk } 4188a01b4d6SAlexey Marchuk } 4198a01b4d6SAlexey Marchuk 4208a01b4d6SAlexey Marchuk rdma_sync_dev_list(); 4218a01b4d6SAlexey Marchuk 4228a01b4d6SAlexey Marchuk pthread_mutex_unlock(&g_dev_mutex); 4238a01b4d6SAlexey Marchuk } 4248a01b4d6SAlexey Marchuk 4258a01b4d6SAlexey Marchuk __attribute__((destructor)) static void 4268a01b4d6SAlexey Marchuk _rdma_utils_fini(void) 4278a01b4d6SAlexey Marchuk { 4288a01b4d6SAlexey Marchuk struct rdma_utils_device *dev, *tmp; 4298a01b4d6SAlexey Marchuk 4308a01b4d6SAlexey Marchuk TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { 4318a01b4d6SAlexey Marchuk dev->removed = true; 4328a01b4d6SAlexey Marchuk dev->ref = 0; 4338a01b4d6SAlexey Marchuk rdma_remove_dev(dev); 4348a01b4d6SAlexey Marchuk } 4358a01b4d6SAlexey Marchuk 4368a01b4d6SAlexey Marchuk if (g_ctx_list != NULL) { 4378a01b4d6SAlexey Marchuk rdma_free_devices(g_ctx_list); 4388a01b4d6SAlexey Marchuk g_ctx_list = NULL; 4398a01b4d6SAlexey Marchuk } 4408a01b4d6SAlexey Marchuk } 4410a9c0239SAlexey Marchuk 4420a9c0239SAlexey Marchuk struct spdk_memory_domain * 4430a9c0239SAlexey Marchuk spdk_rdma_utils_get_memory_domain(struct ibv_pd *pd) 4440a9c0239SAlexey Marchuk { 4450a9c0239SAlexey Marchuk struct rdma_utils_memory_domain *domain = NULL; 446*141dc943SAlexey Marchuk struct spdk_memory_domain_ctx ctx = {}; 4470a9c0239SAlexey Marchuk int rc; 4480a9c0239SAlexey Marchuk 4490a9c0239SAlexey Marchuk pthread_mutex_lock(&g_memory_domains_lock); 4500a9c0239SAlexey Marchuk 4510a9c0239SAlexey Marchuk TAILQ_FOREACH(domain, &g_memory_domains, link) { 4520a9c0239SAlexey Marchuk if (domain->pd == pd) { 4530a9c0239SAlexey Marchuk domain->ref++; 4540a9c0239SAlexey Marchuk pthread_mutex_unlock(&g_memory_domains_lock); 4550a9c0239SAlexey Marchuk return domain->domain; 4560a9c0239SAlexey Marchuk } 4570a9c0239SAlexey Marchuk } 4580a9c0239SAlexey Marchuk 4590a9c0239SAlexey Marchuk domain = calloc(1, sizeof(*domain)); 4600a9c0239SAlexey Marchuk if (!domain) { 4610a9c0239SAlexey Marchuk SPDK_ERRLOG("Memory allocation failed\n"); 4620a9c0239SAlexey Marchuk pthread_mutex_unlock(&g_memory_domains_lock); 4630a9c0239SAlexey Marchuk return NULL; 4640a9c0239SAlexey Marchuk } 4650a9c0239SAlexey Marchuk 4660a9c0239SAlexey Marchuk domain->rdma_ctx.size = sizeof(domain->rdma_ctx); 4670a9c0239SAlexey Marchuk domain->rdma_ctx.ibv_pd = pd; 4680a9c0239SAlexey Marchuk ctx.size = sizeof(ctx); 4690a9c0239SAlexey Marchuk ctx.user_ctx = &domain->rdma_ctx; 470*141dc943SAlexey Marchuk ctx.user_ctx_size = domain->rdma_ctx.size; 4710a9c0239SAlexey Marchuk 4720a9c0239SAlexey Marchuk rc = spdk_memory_domain_create(&domain->domain, SPDK_DMA_DEVICE_TYPE_RDMA, &ctx, 4730a9c0239SAlexey Marchuk SPDK_RDMA_DMA_DEVICE); 4740a9c0239SAlexey Marchuk if (rc) { 4750a9c0239SAlexey Marchuk SPDK_ERRLOG("Failed to create memory domain\n"); 4760a9c0239SAlexey Marchuk free(domain); 4770a9c0239SAlexey Marchuk pthread_mutex_unlock(&g_memory_domains_lock); 4780a9c0239SAlexey Marchuk return NULL; 4790a9c0239SAlexey Marchuk } 4800a9c0239SAlexey Marchuk 4810a9c0239SAlexey Marchuk domain->pd = pd; 4820a9c0239SAlexey Marchuk domain->ref = 1; 4830a9c0239SAlexey Marchuk TAILQ_INSERT_TAIL(&g_memory_domains, domain, link); 4840a9c0239SAlexey Marchuk 4850a9c0239SAlexey Marchuk pthread_mutex_unlock(&g_memory_domains_lock); 4860a9c0239SAlexey Marchuk 4870a9c0239SAlexey Marchuk return domain->domain; 4880a9c0239SAlexey Marchuk } 4890a9c0239SAlexey Marchuk 4900a9c0239SAlexey Marchuk int 4910a9c0239SAlexey Marchuk spdk_rdma_utils_put_memory_domain(struct spdk_memory_domain *_domain) 4920a9c0239SAlexey Marchuk { 4930a9c0239SAlexey Marchuk struct rdma_utils_memory_domain *domain = NULL; 4940a9c0239SAlexey Marchuk 4950a9c0239SAlexey Marchuk if (!_domain) { 4960a9c0239SAlexey Marchuk return 0; 4970a9c0239SAlexey Marchuk } 4980a9c0239SAlexey Marchuk 4990a9c0239SAlexey Marchuk pthread_mutex_lock(&g_memory_domains_lock); 5000a9c0239SAlexey Marchuk 5010a9c0239SAlexey Marchuk TAILQ_FOREACH(domain, &g_memory_domains, link) { 5020a9c0239SAlexey Marchuk if (domain->domain == _domain) { 5030a9c0239SAlexey Marchuk break; 5040a9c0239SAlexey Marchuk } 5050a9c0239SAlexey Marchuk } 5060a9c0239SAlexey Marchuk 5070a9c0239SAlexey Marchuk if (!domain) { 5080a9c0239SAlexey Marchuk pthread_mutex_unlock(&g_memory_domains_lock); 5090a9c0239SAlexey Marchuk return -ENODEV; 5100a9c0239SAlexey Marchuk } 5110a9c0239SAlexey Marchuk assert(domain->ref > 0); 5120a9c0239SAlexey Marchuk 5130a9c0239SAlexey Marchuk domain->ref--; 5140a9c0239SAlexey Marchuk 5150a9c0239SAlexey Marchuk if (domain->ref == 0) { 5160a9c0239SAlexey Marchuk spdk_memory_domain_destroy(domain->domain); 5170a9c0239SAlexey Marchuk TAILQ_REMOVE(&g_memory_domains, domain, link); 5180a9c0239SAlexey Marchuk free(domain); 5190a9c0239SAlexey Marchuk } 5200a9c0239SAlexey Marchuk 5210a9c0239SAlexey Marchuk pthread_mutex_unlock(&g_memory_domains_lock); 5220a9c0239SAlexey Marchuk 5230a9c0239SAlexey Marchuk return 0; 5240a9c0239SAlexey Marchuk } 52520b14cdcSJim Harris 52620b14cdcSJim Harris int32_t 52720b14cdcSJim Harris spdk_rdma_cm_id_get_numa_id(struct rdma_cm_id *cm_id) 52820b14cdcSJim Harris { 52920b14cdcSJim Harris struct sockaddr *sa; 53020b14cdcSJim Harris char addr[64]; 53120b14cdcSJim Harris char ifc[64]; 53220b14cdcSJim Harris uint32_t numa_id; 53320b14cdcSJim Harris int rc; 53420b14cdcSJim Harris 53520b14cdcSJim Harris sa = rdma_get_local_addr(cm_id); 53620b14cdcSJim Harris if (sa == NULL) { 53720b14cdcSJim Harris return SPDK_ENV_NUMA_ID_ANY; 53820b14cdcSJim Harris } 53920b14cdcSJim Harris rc = spdk_net_get_address_string(sa, addr, sizeof(addr)); 54020b14cdcSJim Harris if (rc) { 54120b14cdcSJim Harris return SPDK_ENV_NUMA_ID_ANY; 54220b14cdcSJim Harris } 54320b14cdcSJim Harris rc = spdk_net_get_interface_name(addr, ifc, sizeof(ifc)); 54420b14cdcSJim Harris if (rc) { 54520b14cdcSJim Harris return SPDK_ENV_NUMA_ID_ANY; 54620b14cdcSJim Harris } 54720b14cdcSJim Harris rc = spdk_read_sysfs_attribute_uint32(&numa_id, 54820b14cdcSJim Harris "/sys/class/net/%s/device/numa_node", ifc); 54920b14cdcSJim Harris if (rc || numa_id > INT32_MAX) { 55020b14cdcSJim Harris return SPDK_ENV_NUMA_ID_ANY; 55120b14cdcSJim Harris } 55220b14cdcSJim Harris return (int32_t)numa_id; 55320b14cdcSJim Harris } 554