1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. All rights reserved. 3 * Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 */ 5 6 #include "spdk_internal/rdma_utils.h" 7 8 #include "spdk/log.h" 9 #include "spdk/string.h" 10 #include "spdk/likely.h" 11 12 #include "spdk_internal/assert.h" 13 14 #include <rdma/rdma_cma.h> 15 #include <rdma/rdma_verbs.h> 16 17 struct rdma_utils_device { 18 struct ibv_pd *pd; 19 struct ibv_context *context; 20 int ref; 21 bool removed; 22 TAILQ_ENTRY(rdma_utils_device) tailq; 23 }; 24 25 struct spdk_rdma_utils_mem_map { 26 struct spdk_mem_map *map; 27 struct ibv_pd *pd; 28 struct spdk_nvme_rdma_hooks *hooks; 29 uint32_t ref_count; 30 enum spdk_rdma_utils_memory_map_role role; 31 LIST_ENTRY(spdk_rdma_utils_mem_map) link; 32 }; 33 34 static pthread_mutex_t g_dev_mutex = PTHREAD_MUTEX_INITIALIZER; 35 static struct ibv_context **g_ctx_list = NULL; 36 static TAILQ_HEAD(, rdma_utils_device) g_dev_list = TAILQ_HEAD_INITIALIZER(g_dev_list); 37 38 static LIST_HEAD(, spdk_rdma_utils_mem_map) g_rdma_utils_mr_maps = LIST_HEAD_INITIALIZER( 39 &g_rdma_utils_mr_maps); 40 static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER; 41 42 static int 43 rdma_utils_mem_notify(void *cb_ctx, struct spdk_mem_map *map, 44 enum spdk_mem_map_notify_action action, 45 void *vaddr, size_t size) 46 { 47 struct spdk_rdma_utils_mem_map *rmap = cb_ctx; 48 struct ibv_pd *pd = rmap->pd; 49 struct ibv_mr *mr; 50 uint32_t access_flags = 0; 51 int rc; 52 53 switch (action) { 54 case SPDK_MEM_MAP_NOTIFY_REGISTER: 55 if (rmap->hooks && rmap->hooks->get_rkey) { 56 rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, 57 rmap->hooks->get_rkey(pd, vaddr, size)); 58 } else { 59 switch (rmap->role) { 60 case SPDK_RDMA_UTILS_MEMORY_MAP_ROLE_TARGET: 61 access_flags = IBV_ACCESS_LOCAL_WRITE; 62 if (pd->context->device->transport_type == IBV_TRANSPORT_IWARP) { 63 /* IWARP requires REMOTE_WRITE permission for RDMA_READ operation */ 64 access_flags |= IBV_ACCESS_REMOTE_WRITE; 65 } 66 break; 67 case SPDK_RDMA_UTILS_MEMORY_MAP_ROLE_INITIATOR: 68 access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE; 69 break; 70 default: 71 SPDK_UNREACHABLE(); 72 } 73 #ifdef IBV_ACCESS_OPTIONAL_FIRST 74 access_flags |= IBV_ACCESS_RELAXED_ORDERING; 75 #endif 76 mr = ibv_reg_mr(pd, vaddr, size, access_flags); 77 if (mr == NULL) { 78 SPDK_ERRLOG("ibv_reg_mr() failed\n"); 79 return -1; 80 } else { 81 rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); 82 } 83 } 84 break; 85 case SPDK_MEM_MAP_NOTIFY_UNREGISTER: 86 if (rmap->hooks == NULL || rmap->hooks->get_rkey == NULL) { 87 mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); 88 if (mr) { 89 ibv_dereg_mr(mr); 90 } 91 } 92 rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); 93 break; 94 default: 95 SPDK_UNREACHABLE(); 96 } 97 98 return rc; 99 } 100 101 static int 102 rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2) 103 { 104 /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */ 105 return addr_1 == addr_2; 106 } 107 108 const struct spdk_mem_map_ops g_rdma_map_ops = { 109 .notify_cb = rdma_utils_mem_notify, 110 .are_contiguous = rdma_check_contiguous_entries 111 }; 112 113 static void 114 _rdma_free_mem_map(struct spdk_rdma_utils_mem_map *map) 115 { 116 assert(map); 117 118 if (map->hooks) { 119 spdk_free(map); 120 } else { 121 free(map); 122 } 123 } 124 125 struct spdk_rdma_utils_mem_map * 126 spdk_rdma_utils_create_mem_map(struct ibv_pd *pd, struct spdk_nvme_rdma_hooks *hooks, 127 enum spdk_rdma_utils_memory_map_role role) 128 { 129 struct spdk_rdma_utils_mem_map *map; 130 131 pthread_mutex_lock(&g_rdma_mr_maps_mutex); 132 /* Look up existing mem map registration for this pd */ 133 LIST_FOREACH(map, &g_rdma_utils_mr_maps, link) { 134 if (map->pd == pd && map->role == role) { 135 map->ref_count++; 136 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 137 return map; 138 } 139 } 140 141 if (hooks) { 142 map = spdk_zmalloc(sizeof(*map), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); 143 } else { 144 map = calloc(1, sizeof(*map)); 145 } 146 if (!map) { 147 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 148 SPDK_ERRLOG("Memory allocation failed\n"); 149 return NULL; 150 } 151 map->pd = pd; 152 map->ref_count = 1; 153 map->hooks = hooks; 154 map->role = role; 155 map->map = spdk_mem_map_alloc(0, &g_rdma_map_ops, map); 156 if (!map->map) { 157 SPDK_ERRLOG("Unable to create memory map\n"); 158 _rdma_free_mem_map(map); 159 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 160 return NULL; 161 } 162 LIST_INSERT_HEAD(&g_rdma_utils_mr_maps, map, link); 163 164 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 165 166 return map; 167 } 168 169 void 170 spdk_rdma_utils_free_mem_map(struct spdk_rdma_utils_mem_map **_map) 171 { 172 struct spdk_rdma_utils_mem_map *map; 173 174 if (!_map) { 175 return; 176 } 177 178 map = *_map; 179 if (!map) { 180 return; 181 } 182 *_map = NULL; 183 184 pthread_mutex_lock(&g_rdma_mr_maps_mutex); 185 assert(map->ref_count > 0); 186 map->ref_count--; 187 if (map->ref_count != 0) { 188 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 189 return; 190 } 191 192 LIST_REMOVE(map, link); 193 pthread_mutex_unlock(&g_rdma_mr_maps_mutex); 194 if (map->map) { 195 spdk_mem_map_free(&map->map); 196 } 197 _rdma_free_mem_map(map); 198 } 199 200 int 201 spdk_rdma_utils_get_translation(struct spdk_rdma_utils_mem_map *map, void *address, 202 size_t length, struct spdk_rdma_utils_memory_translation *translation) 203 { 204 uint64_t real_length = length; 205 206 assert(map); 207 assert(address); 208 assert(translation); 209 210 if (map->hooks && map->hooks->get_rkey) { 211 translation->translation_type = SPDK_RDMA_UTILS_TRANSLATION_KEY; 212 translation->mr_or_key.key = spdk_mem_map_translate(map->map, (uint64_t)address, &real_length); 213 } else { 214 translation->translation_type = SPDK_RDMA_UTILS_TRANSLATION_MR; 215 translation->mr_or_key.mr = (struct ibv_mr *)spdk_mem_map_translate(map->map, (uint64_t)address, 216 &real_length); 217 if (spdk_unlikely(!translation->mr_or_key.mr)) { 218 SPDK_ERRLOG("No translation for ptr %p, size %zu\n", address, length); 219 return -EINVAL; 220 } 221 } 222 223 assert(real_length >= length); 224 225 return 0; 226 } 227 228 229 static struct rdma_utils_device * 230 rdma_add_dev(struct ibv_context *context) 231 { 232 struct rdma_utils_device *dev; 233 234 dev = calloc(1, sizeof(*dev)); 235 if (dev == NULL) { 236 SPDK_ERRLOG("Failed to allocate RDMA device object.\n"); 237 return NULL; 238 } 239 240 dev->pd = ibv_alloc_pd(context); 241 if (dev->pd == NULL) { 242 SPDK_ERRLOG("ibv_alloc_pd() failed: %s (%d)\n", spdk_strerror(errno), errno); 243 free(dev); 244 return NULL; 245 } 246 247 dev->context = context; 248 TAILQ_INSERT_TAIL(&g_dev_list, dev, tailq); 249 250 return dev; 251 } 252 253 static void 254 rdma_remove_dev(struct rdma_utils_device *dev) 255 { 256 if (!dev->removed || dev->ref > 0) { 257 return; 258 } 259 260 /* Deallocate protection domain only if the device is already removed and 261 * there is no reference. 262 */ 263 TAILQ_REMOVE(&g_dev_list, dev, tailq); 264 ibv_dealloc_pd(dev->pd); 265 free(dev); 266 } 267 268 static int 269 ctx_cmp(const void *_c1, const void *_c2) 270 { 271 struct ibv_context *c1 = *(struct ibv_context **)_c1; 272 struct ibv_context *c2 = *(struct ibv_context **)_c2; 273 274 return c1 < c2 ? -1 : c1 > c2; 275 } 276 277 static int 278 rdma_sync_dev_list(void) 279 { 280 struct ibv_context **new_ctx_list; 281 int i, j; 282 int num_devs = 0; 283 284 /* 285 * rdma_get_devices() returns a NULL terminated array of opened RDMA devices, 286 * and sets num_devs to the number of the returned devices. 287 */ 288 new_ctx_list = rdma_get_devices(&num_devs); 289 if (new_ctx_list == NULL) { 290 SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno); 291 return -ENODEV; 292 } 293 294 if (num_devs == 0) { 295 rdma_free_devices(new_ctx_list); 296 SPDK_ERRLOG("Returned RDMA device array was empty\n"); 297 return -ENODEV; 298 } 299 300 /* 301 * Sort new_ctx_list by addresses to update devices easily. 302 */ 303 qsort(new_ctx_list, num_devs, sizeof(struct ibv_context *), ctx_cmp); 304 305 if (g_ctx_list == NULL) { 306 /* If no old array, this is the first call. Add all devices. */ 307 for (i = 0; new_ctx_list[i] != NULL; i++) { 308 rdma_add_dev(new_ctx_list[i]); 309 } 310 311 goto exit; 312 } 313 314 for (i = j = 0; new_ctx_list[i] != NULL || g_ctx_list[j] != NULL;) { 315 struct ibv_context *new_ctx = new_ctx_list[i]; 316 struct ibv_context *old_ctx = g_ctx_list[j]; 317 bool add = false, remove = false; 318 319 /* 320 * If a context exists only in the new array, create a device for it, 321 * or if a context exists only in the old array, try removing the 322 * corresponding device. 323 */ 324 325 if (old_ctx == NULL) { 326 add = true; 327 } else if (new_ctx == NULL) { 328 remove = true; 329 } else if (new_ctx < old_ctx) { 330 add = true; 331 } else if (old_ctx < new_ctx) { 332 remove = true; 333 } 334 335 if (add) { 336 rdma_add_dev(new_ctx_list[i]); 337 i++; 338 } else if (remove) { 339 struct rdma_utils_device *dev, *tmp; 340 341 TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { 342 if (dev->context == g_ctx_list[j]) { 343 dev->removed = true; 344 rdma_remove_dev(dev); 345 } 346 } 347 j++; 348 } else { 349 i++; 350 j++; 351 } 352 } 353 354 /* Free the old array. */ 355 rdma_free_devices(g_ctx_list); 356 357 exit: 358 /* 359 * Keep the newly returned array so that allocated protection domains 360 * are not freed unexpectedly. 361 */ 362 g_ctx_list = new_ctx_list; 363 return 0; 364 } 365 366 struct ibv_pd * 367 spdk_rdma_utils_get_pd(struct ibv_context *context) 368 { 369 struct rdma_utils_device *dev; 370 int rc; 371 372 pthread_mutex_lock(&g_dev_mutex); 373 374 rc = rdma_sync_dev_list(); 375 if (rc != 0) { 376 pthread_mutex_unlock(&g_dev_mutex); 377 378 SPDK_ERRLOG("Failed to sync RDMA device list\n"); 379 return NULL; 380 } 381 382 TAILQ_FOREACH(dev, &g_dev_list, tailq) { 383 if (dev->context == context && !dev->removed) { 384 dev->ref++; 385 pthread_mutex_unlock(&g_dev_mutex); 386 387 return dev->pd; 388 } 389 } 390 391 pthread_mutex_unlock(&g_dev_mutex); 392 393 SPDK_ERRLOG("Failed to get PD\n"); 394 return NULL; 395 } 396 397 void 398 spdk_rdma_utils_put_pd(struct ibv_pd *pd) 399 { 400 struct rdma_utils_device *dev, *tmp; 401 402 pthread_mutex_lock(&g_dev_mutex); 403 404 TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { 405 if (dev->pd == pd) { 406 assert(dev->ref > 0); 407 dev->ref--; 408 409 rdma_remove_dev(dev); 410 } 411 } 412 413 rdma_sync_dev_list(); 414 415 pthread_mutex_unlock(&g_dev_mutex); 416 } 417 418 __attribute__((destructor)) static void 419 _rdma_utils_fini(void) 420 { 421 struct rdma_utils_device *dev, *tmp; 422 423 TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { 424 dev->removed = true; 425 dev->ref = 0; 426 rdma_remove_dev(dev); 427 } 428 429 if (g_ctx_list != NULL) { 430 rdma_free_devices(g_ctx_list); 431 g_ctx_list = NULL; 432 } 433 } 434