1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "env_internal.h" 37 38 #include <rte_config.h> 39 #include <rte_eal_memconfig.h> 40 41 #include "spdk_internal/assert.h" 42 43 #include "spdk/assert.h" 44 #include "spdk/likely.h" 45 #include "spdk/queue.h" 46 #include "spdk/util.h" 47 48 #if DEBUG 49 #define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) 50 #else 51 #define DEBUG_PRINT(...) 52 #endif 53 54 #define FN_2MB_TO_4KB(fn) (fn << (SHIFT_2MB - SHIFT_4KB)) 55 #define FN_4KB_TO_2MB(fn) (fn >> (SHIFT_2MB - SHIFT_4KB)) 56 57 #define MAP_256TB_IDX(vfn_2mb) ((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB)) 58 #define MAP_1GB_IDX(vfn_2mb) ((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB)) - 1)) 59 60 #define _2MB_OFFSET(ptr) (((uintptr_t)(ptr)) & (VALUE_2MB - 1)) 61 62 /* Page is registered */ 63 #define REG_MAP_REGISTERED (1ULL << 62) 64 65 /* A notification region barrier. The 2MB translation entry that's marked 66 * with this flag must be unregistered separately. This allows contiguous 67 * regions to be unregistered in the same chunks they were registered. 68 */ 69 #define REG_MAP_NOTIFY_START (1ULL << 63) 70 71 /* Translation of a single 2MB page. */ 72 struct map_2mb { 73 uint64_t translation_2mb; 74 }; 75 76 /* Second-level map table indexed by bits [21..29] of the virtual address. 77 * Each entry contains the address translation or error for entries that haven't 78 * been retrieved yet. 79 */ 80 struct map_1gb { 81 struct map_2mb map[1ULL << (SHIFT_1GB - SHIFT_2MB)]; 82 }; 83 84 /* Top-level map table indexed by bits [30..47] of the virtual address. 85 * Each entry points to a second-level map table or NULL. 86 */ 87 struct map_256tb { 88 struct map_1gb *map[1ULL << (SHIFT_256TB - SHIFT_1GB)]; 89 }; 90 91 /* Page-granularity memory address translation */ 92 struct spdk_mem_map { 93 struct map_256tb map_256tb; 94 pthread_mutex_t mutex; 95 uint64_t default_translation; 96 struct spdk_mem_map_ops ops; 97 void *cb_ctx; 98 TAILQ_ENTRY(spdk_mem_map) tailq; 99 }; 100 101 /* Registrations map. The 64 bit translations are bit fields with the 102 * following layout (starting with the low bits): 103 * 0 - 61 : reserved 104 * 62 - 63 : flags 105 */ 106 static struct spdk_mem_map *g_mem_reg_map; 107 static TAILQ_HEAD(, spdk_mem_map) g_spdk_mem_maps = TAILQ_HEAD_INITIALIZER(g_spdk_mem_maps); 108 static pthread_mutex_t g_spdk_mem_map_mutex = PTHREAD_MUTEX_INITIALIZER; 109 110 /* 111 * Walk the currently registered memory via the main memory registration map 112 * and call the new map's notify callback for each virtually contiguous region. 113 */ 114 static int 115 spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_action action) 116 { 117 size_t idx_256tb; 118 uint64_t idx_1gb; 119 uint64_t contig_start = UINT64_MAX; 120 uint64_t contig_end = UINT64_MAX; 121 struct map_1gb *map_1gb; 122 int rc; 123 124 if (!g_mem_reg_map) { 125 return -EINVAL; 126 } 127 128 /* Hold the memory registration map mutex so no new registrations can be added while we are looping. */ 129 pthread_mutex_lock(&g_mem_reg_map->mutex); 130 131 for (idx_256tb = 0; 132 idx_256tb < sizeof(g_mem_reg_map->map_256tb.map) / sizeof(g_mem_reg_map->map_256tb.map[0]); 133 idx_256tb++) { 134 map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb]; 135 136 if (!map_1gb) { 137 if (contig_start != UINT64_MAX) { 138 /* End of of a virtually contiguous range */ 139 rc = map->ops.notify_cb(map->cb_ctx, map, action, 140 (void *)contig_start, 141 contig_end - contig_start + VALUE_2MB); 142 /* Don't bother handling unregister failures. It can't be any worse */ 143 if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) { 144 goto err_unregister; 145 } 146 } 147 contig_start = UINT64_MAX; 148 continue; 149 } 150 151 for (idx_1gb = 0; idx_1gb < sizeof(map_1gb->map) / sizeof(map_1gb->map[0]); idx_1gb++) { 152 if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) && 153 (contig_start == UINT64_MAX || 154 (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { 155 /* Rebuild the virtual address from the indexes */ 156 uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); 157 158 if (contig_start == UINT64_MAX) { 159 contig_start = vaddr; 160 } 161 162 contig_end = vaddr; 163 } else { 164 if (contig_start != UINT64_MAX) { 165 /* End of of a virtually contiguous range */ 166 rc = map->ops.notify_cb(map->cb_ctx, map, action, 167 (void *)contig_start, 168 contig_end - contig_start + VALUE_2MB); 169 /* Don't bother handling unregister failures. It can't be any worse */ 170 if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) { 171 goto err_unregister; 172 } 173 174 /* This page might be a part of a neighbour region, so process 175 * it again. The idx_1gb will be incremented immediately. 176 */ 177 idx_1gb--; 178 } 179 contig_start = UINT64_MAX; 180 } 181 } 182 } 183 184 pthread_mutex_unlock(&g_mem_reg_map->mutex); 185 return 0; 186 187 err_unregister: 188 /* Unwind to the first empty translation so we don't unregister 189 * a region that just failed to register. 190 */ 191 idx_256tb = MAP_256TB_IDX((contig_start >> SHIFT_2MB) - 1); 192 idx_1gb = MAP_1GB_IDX((contig_start >> SHIFT_2MB) - 1); 193 contig_start = UINT64_MAX; 194 contig_end = UINT64_MAX; 195 196 /* Unregister any memory we managed to register before the failure */ 197 for (; idx_256tb < SIZE_MAX; idx_256tb--) { 198 map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb]; 199 200 if (!map_1gb) { 201 if (contig_end != UINT64_MAX) { 202 /* End of of a virtually contiguous range */ 203 map->ops.notify_cb(map->cb_ctx, map, 204 SPDK_MEM_MAP_NOTIFY_UNREGISTER, 205 (void *)contig_start, 206 contig_end - contig_start + VALUE_2MB); 207 } 208 contig_end = UINT64_MAX; 209 continue; 210 } 211 212 for (; idx_1gb < UINT64_MAX; idx_1gb--) { 213 if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) && 214 (contig_end == UINT64_MAX || (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { 215 /* Rebuild the virtual address from the indexes */ 216 uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); 217 218 if (contig_end == UINT64_MAX) { 219 contig_end = vaddr; 220 } 221 contig_start = vaddr; 222 } else { 223 if (contig_end != UINT64_MAX) { 224 /* End of of a virtually contiguous range */ 225 map->ops.notify_cb(map->cb_ctx, map, 226 SPDK_MEM_MAP_NOTIFY_UNREGISTER, 227 (void *)contig_start, 228 contig_end - contig_start + VALUE_2MB); 229 idx_1gb++; 230 } 231 contig_end = UINT64_MAX; 232 } 233 } 234 idx_1gb = sizeof(map_1gb->map) / sizeof(map_1gb->map[0]) - 1; 235 } 236 237 pthread_mutex_unlock(&g_mem_reg_map->mutex); 238 return rc; 239 } 240 241 struct spdk_mem_map * 242 spdk_mem_map_alloc(uint64_t default_translation, const struct spdk_mem_map_ops *ops, void *cb_ctx) 243 { 244 struct spdk_mem_map *map; 245 int rc; 246 247 map = calloc(1, sizeof(*map)); 248 if (map == NULL) { 249 return NULL; 250 } 251 252 if (pthread_mutex_init(&map->mutex, NULL)) { 253 free(map); 254 return NULL; 255 } 256 257 map->default_translation = default_translation; 258 map->cb_ctx = cb_ctx; 259 if (ops) { 260 map->ops = *ops; 261 } 262 263 if (ops && ops->notify_cb) { 264 pthread_mutex_lock(&g_spdk_mem_map_mutex); 265 rc = spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_REGISTER); 266 if (rc != 0) { 267 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 268 DEBUG_PRINT("Initial mem_map notify failed\n"); 269 pthread_mutex_destroy(&map->mutex); 270 free(map); 271 return NULL; 272 } 273 TAILQ_INSERT_TAIL(&g_spdk_mem_maps, map, tailq); 274 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 275 } 276 277 return map; 278 } 279 280 void 281 spdk_mem_map_free(struct spdk_mem_map **pmap) 282 { 283 struct spdk_mem_map *map; 284 size_t i; 285 286 if (!pmap) { 287 return; 288 } 289 290 map = *pmap; 291 292 if (!map) { 293 return; 294 } 295 296 if (map->ops.notify_cb) { 297 pthread_mutex_lock(&g_spdk_mem_map_mutex); 298 spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_UNREGISTER); 299 TAILQ_REMOVE(&g_spdk_mem_maps, map, tailq); 300 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 301 } 302 303 for (i = 0; i < sizeof(map->map_256tb.map) / sizeof(map->map_256tb.map[0]); i++) { 304 free(map->map_256tb.map[i]); 305 } 306 307 pthread_mutex_destroy(&map->mutex); 308 309 free(map); 310 *pmap = NULL; 311 } 312 313 int 314 spdk_mem_register(void *vaddr, size_t len) 315 { 316 struct spdk_mem_map *map; 317 int rc; 318 void *seg_vaddr; 319 size_t seg_len; 320 uint64_t reg; 321 322 if ((uintptr_t)vaddr & ~MASK_256TB) { 323 DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr); 324 return -EINVAL; 325 } 326 327 if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { 328 DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n", 329 __func__, vaddr, len); 330 return -EINVAL; 331 } 332 333 if (len == 0) { 334 return 0; 335 } 336 337 pthread_mutex_lock(&g_spdk_mem_map_mutex); 338 339 seg_vaddr = vaddr; 340 seg_len = len; 341 while (seg_len > 0) { 342 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 343 if (reg & REG_MAP_REGISTERED) { 344 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 345 return -EBUSY; 346 } 347 seg_vaddr += VALUE_2MB; 348 seg_len -= VALUE_2MB; 349 } 350 351 seg_vaddr = vaddr; 352 seg_len = 0; 353 while (len > 0) { 354 spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 355 seg_len == 0 ? REG_MAP_REGISTERED | REG_MAP_NOTIFY_START : REG_MAP_REGISTERED); 356 seg_len += VALUE_2MB; 357 vaddr += VALUE_2MB; 358 len -= VALUE_2MB; 359 } 360 361 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 362 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len); 363 if (rc != 0) { 364 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 365 return rc; 366 } 367 } 368 369 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 370 return 0; 371 } 372 373 int 374 spdk_mem_unregister(void *vaddr, size_t len) 375 { 376 struct spdk_mem_map *map; 377 int rc; 378 void *seg_vaddr; 379 size_t seg_len; 380 uint64_t reg, newreg; 381 382 if ((uintptr_t)vaddr & ~MASK_256TB) { 383 DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr); 384 return -EINVAL; 385 } 386 387 if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { 388 DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n", 389 __func__, vaddr, len); 390 return -EINVAL; 391 } 392 393 pthread_mutex_lock(&g_spdk_mem_map_mutex); 394 395 /* The first page must be a start of a region. Also check if it's 396 * registered to make sure we don't return -ERANGE for non-registered 397 * regions. 398 */ 399 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); 400 if ((reg & REG_MAP_REGISTERED) && (reg & REG_MAP_NOTIFY_START) == 0) { 401 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 402 return -ERANGE; 403 } 404 405 seg_vaddr = vaddr; 406 seg_len = len; 407 while (seg_len > 0) { 408 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 409 if ((reg & REG_MAP_REGISTERED) == 0) { 410 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 411 return -EINVAL; 412 } 413 seg_vaddr += VALUE_2MB; 414 seg_len -= VALUE_2MB; 415 } 416 417 newreg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 418 /* If the next page is registered, it must be a start of a region as well, 419 * otherwise we'd be unregistering only a part of a region. 420 */ 421 if ((newreg & REG_MAP_NOTIFY_START) == 0 && (newreg & REG_MAP_REGISTERED)) { 422 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 423 return -ERANGE; 424 } 425 seg_vaddr = vaddr; 426 seg_len = 0; 427 428 while (len > 0) { 429 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); 430 spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 0); 431 432 if (seg_len > 0 && (reg & REG_MAP_NOTIFY_START)) { 433 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 434 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len); 435 if (rc != 0) { 436 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 437 return rc; 438 } 439 } 440 441 seg_vaddr = vaddr; 442 seg_len = VALUE_2MB; 443 } else { 444 seg_len += VALUE_2MB; 445 } 446 447 vaddr += VALUE_2MB; 448 len -= VALUE_2MB; 449 } 450 451 if (seg_len > 0) { 452 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 453 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len); 454 if (rc != 0) { 455 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 456 return rc; 457 } 458 } 459 } 460 461 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 462 return 0; 463 } 464 465 static struct map_1gb * 466 spdk_mem_map_get_map_1gb(struct spdk_mem_map *map, uint64_t vfn_2mb) 467 { 468 struct map_1gb *map_1gb; 469 uint64_t idx_256tb = MAP_256TB_IDX(vfn_2mb); 470 size_t i; 471 472 if (spdk_unlikely(idx_256tb >= SPDK_COUNTOF(map->map_256tb.map))) { 473 return NULL; 474 } 475 476 map_1gb = map->map_256tb.map[idx_256tb]; 477 478 if (!map_1gb) { 479 pthread_mutex_lock(&map->mutex); 480 481 /* Recheck to make sure nobody else got the mutex first. */ 482 map_1gb = map->map_256tb.map[idx_256tb]; 483 if (!map_1gb) { 484 map_1gb = malloc(sizeof(struct map_1gb)); 485 if (map_1gb) { 486 /* initialize all entries to default translation */ 487 for (i = 0; i < SPDK_COUNTOF(map_1gb->map); i++) { 488 map_1gb->map[i].translation_2mb = map->default_translation; 489 } 490 map->map_256tb.map[idx_256tb] = map_1gb; 491 } 492 } 493 494 pthread_mutex_unlock(&map->mutex); 495 496 if (!map_1gb) { 497 DEBUG_PRINT("allocation failed\n"); 498 return NULL; 499 } 500 } 501 502 return map_1gb; 503 } 504 505 int 506 spdk_mem_map_set_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size, 507 uint64_t translation) 508 { 509 uint64_t vfn_2mb; 510 struct map_1gb *map_1gb; 511 uint64_t idx_1gb; 512 struct map_2mb *map_2mb; 513 514 if ((uintptr_t)vaddr & ~MASK_256TB) { 515 DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr); 516 return -EINVAL; 517 } 518 519 /* For now, only 2 MB-aligned registrations are supported */ 520 if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) { 521 DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n", 522 __func__, vaddr, size); 523 return -EINVAL; 524 } 525 526 vfn_2mb = vaddr >> SHIFT_2MB; 527 528 while (size) { 529 map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb); 530 if (!map_1gb) { 531 DEBUG_PRINT("could not get %p map\n", (void *)vaddr); 532 return -ENOMEM; 533 } 534 535 idx_1gb = MAP_1GB_IDX(vfn_2mb); 536 map_2mb = &map_1gb->map[idx_1gb]; 537 map_2mb->translation_2mb = translation; 538 539 size -= VALUE_2MB; 540 vfn_2mb++; 541 } 542 543 return 0; 544 } 545 546 int 547 spdk_mem_map_clear_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size) 548 { 549 uint64_t vfn_2mb; 550 struct map_1gb *map_1gb; 551 uint64_t idx_1gb; 552 struct map_2mb *map_2mb; 553 554 if ((uintptr_t)vaddr & ~MASK_256TB) { 555 DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr); 556 return -EINVAL; 557 } 558 559 /* For now, only 2 MB-aligned registrations are supported */ 560 if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) { 561 DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n", 562 __func__, vaddr, size); 563 return -EINVAL; 564 } 565 566 vfn_2mb = vaddr >> SHIFT_2MB; 567 568 while (size) { 569 map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb); 570 if (!map_1gb) { 571 DEBUG_PRINT("could not get %p map\n", (void *)vaddr); 572 return -ENOMEM; 573 } 574 575 idx_1gb = MAP_1GB_IDX(vfn_2mb); 576 map_2mb = &map_1gb->map[idx_1gb]; 577 map_2mb->translation_2mb = map->default_translation; 578 579 size -= VALUE_2MB; 580 vfn_2mb++; 581 } 582 583 return 0; 584 } 585 586 uint64_t 587 spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size) 588 { 589 const struct map_1gb *map_1gb; 590 const struct map_2mb *map_2mb; 591 uint64_t idx_256tb; 592 uint64_t idx_1gb; 593 uint64_t vfn_2mb; 594 uint64_t total_size = 0; 595 uint64_t cur_size; 596 uint64_t prev_translation; 597 598 if (size != NULL) { 599 total_size = *size; 600 *size = 0; 601 } 602 603 if (spdk_unlikely(vaddr & ~MASK_256TB)) { 604 DEBUG_PRINT("invalid usermode virtual address %p\n", (void *)vaddr); 605 return map->default_translation; 606 } 607 608 vfn_2mb = vaddr >> SHIFT_2MB; 609 idx_256tb = MAP_256TB_IDX(vfn_2mb); 610 idx_1gb = MAP_1GB_IDX(vfn_2mb); 611 612 map_1gb = map->map_256tb.map[idx_256tb]; 613 if (spdk_unlikely(!map_1gb)) { 614 return map->default_translation; 615 } 616 617 cur_size = VALUE_2MB - _2MB_OFFSET(vaddr); 618 if (size != NULL) { 619 *size = cur_size; 620 } 621 622 map_2mb = &map_1gb->map[idx_1gb]; 623 if (size == NULL || map->ops.are_contiguous == NULL || 624 map_2mb->translation_2mb == map->default_translation) { 625 return map_2mb->translation_2mb; 626 } 627 628 prev_translation = map_2mb->translation_2mb;; 629 while (cur_size < total_size) { 630 vfn_2mb++; 631 idx_256tb = MAP_256TB_IDX(vfn_2mb); 632 idx_1gb = MAP_1GB_IDX(vfn_2mb); 633 634 map_1gb = map->map_256tb.map[idx_256tb]; 635 if (spdk_unlikely(!map_1gb)) { 636 break; 637 } 638 639 map_2mb = &map_1gb->map[idx_1gb]; 640 if (!map->ops.are_contiguous(prev_translation, map_2mb->translation_2mb)) { 641 break; 642 } 643 644 cur_size += VALUE_2MB; 645 prev_translation = map_2mb->translation_2mb; 646 } 647 648 *size = cur_size; 649 return prev_translation; 650 } 651 652 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) 653 static void 654 memory_hotplug_cb(enum rte_mem_event event_type, 655 const void *addr, size_t len, void *arg) 656 { 657 if (event_type == RTE_MEM_EVENT_ALLOC) { 658 spdk_mem_register((void *)addr, len); 659 660 /* Now mark each segment so that DPDK won't later free it. 661 * This ensures we don't have to deal with the memory 662 * getting freed in different units than it was allocated. 663 */ 664 while (len > 0) { 665 struct rte_memseg *seg; 666 667 seg = rte_mem_virt2memseg(addr, NULL); 668 assert(seg != NULL); 669 seg->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE; 670 addr = (void *)((uintptr_t)addr + seg->hugepage_sz); 671 len -= seg->hugepage_sz; 672 } 673 } else if (event_type == RTE_MEM_EVENT_FREE) { 674 spdk_mem_unregister((void *)addr, len); 675 } 676 } 677 678 static int 679 memory_iter_cb(const struct rte_memseg_list *msl, 680 const struct rte_memseg *ms, size_t len, void *arg) 681 { 682 return spdk_mem_register(ms->addr, len); 683 } 684 #endif 685 686 int 687 spdk_mem_map_init(void) 688 { 689 g_mem_reg_map = spdk_mem_map_alloc(0, NULL, NULL); 690 if (g_mem_reg_map == NULL) { 691 DEBUG_PRINT("memory registration map allocation failed\n"); 692 return -1; 693 } 694 695 /* 696 * Walk all DPDK memory segments and register them 697 * with the master memory map 698 */ 699 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) 700 rte_mem_event_callback_register("spdk", memory_hotplug_cb, NULL); 701 rte_memseg_contig_walk(memory_iter_cb, NULL); 702 #else 703 struct rte_mem_config *mcfg; 704 size_t seg_idx; 705 706 mcfg = rte_eal_get_configuration()->mem_config; 707 for (seg_idx = 0; seg_idx < RTE_MAX_MEMSEG; seg_idx++) { 708 struct rte_memseg *seg = &mcfg->memseg[seg_idx]; 709 710 if (seg->addr == NULL) { 711 break; 712 } 713 714 spdk_mem_register(seg->addr, seg->len); 715 } 716 #endif 717 return 0; 718 } 719