1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "env_internal.h" 37 38 #include <rte_config.h> 39 #include <rte_eal_memconfig.h> 40 41 #include "spdk_internal/assert.h" 42 43 #include "spdk/assert.h" 44 #include "spdk/likely.h" 45 #include "spdk/queue.h" 46 #include "spdk/util.h" 47 48 #if DEBUG 49 #define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) 50 #else 51 #define DEBUG_PRINT(...) 52 #endif 53 54 #define FN_2MB_TO_4KB(fn) (fn << (SHIFT_2MB - SHIFT_4KB)) 55 #define FN_4KB_TO_2MB(fn) (fn >> (SHIFT_2MB - SHIFT_4KB)) 56 57 #define MAP_256TB_IDX(vfn_2mb) ((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB)) 58 #define MAP_1GB_IDX(vfn_2mb) ((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB)) - 1)) 59 60 /* Page is registered */ 61 #define REG_MAP_REGISTERED (1ULL << 62) 62 63 /* A notification region barrier. The 2MB translation entry that's marked 64 * with this flag must be unregistered separately. This allows contiguous 65 * regions to be unregistered in the same chunks they were registered. 66 */ 67 #define REG_MAP_NOTIFY_START (1ULL << 63) 68 69 /* Translation of a single 2MB page. */ 70 struct map_2mb { 71 uint64_t translation_2mb; 72 }; 73 74 /* Second-level map table indexed by bits [21..29] of the virtual address. 75 * Each entry contains the address translation or error for entries that haven't 76 * been retrieved yet. 77 */ 78 struct map_1gb { 79 struct map_2mb map[1ULL << (SHIFT_1GB - SHIFT_2MB)]; 80 }; 81 82 /* Top-level map table indexed by bits [30..47] of the virtual address. 83 * Each entry points to a second-level map table or NULL. 84 */ 85 struct map_256tb { 86 struct map_1gb *map[1ULL << (SHIFT_256TB - SHIFT_1GB)]; 87 }; 88 89 /* Page-granularity memory address translation */ 90 struct spdk_mem_map { 91 struct map_256tb map_256tb; 92 pthread_mutex_t mutex; 93 uint64_t default_translation; 94 struct spdk_mem_map_ops ops; 95 void *cb_ctx; 96 TAILQ_ENTRY(spdk_mem_map) tailq; 97 }; 98 99 /* Registrations map. The 64 bit translations are bit fields with the 100 * following layout (starting with the low bits): 101 * 0 - 61 : reserved 102 * 62 - 63 : flags 103 */ 104 static struct spdk_mem_map *g_mem_reg_map; 105 static TAILQ_HEAD(, spdk_mem_map) g_spdk_mem_maps = TAILQ_HEAD_INITIALIZER(g_spdk_mem_maps); 106 static pthread_mutex_t g_spdk_mem_map_mutex = PTHREAD_MUTEX_INITIALIZER; 107 108 /* 109 * Walk the currently registered memory via the main memory registration map 110 * and call the new map's notify callback for each virtually contiguous region. 111 */ 112 static int 113 spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_action action) 114 { 115 size_t idx_256tb; 116 uint64_t idx_1gb; 117 uint64_t contig_start = UINT64_MAX; 118 uint64_t contig_end = UINT64_MAX; 119 struct map_1gb *map_1gb; 120 int rc; 121 122 if (!g_mem_reg_map) { 123 return -EINVAL; 124 } 125 126 /* Hold the memory registration map mutex so no new registrations can be added while we are looping. */ 127 pthread_mutex_lock(&g_mem_reg_map->mutex); 128 129 for (idx_256tb = 0; 130 idx_256tb < sizeof(g_mem_reg_map->map_256tb.map) / sizeof(g_mem_reg_map->map_256tb.map[0]); 131 idx_256tb++) { 132 map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb]; 133 134 if (!map_1gb) { 135 if (contig_start != UINT64_MAX) { 136 /* End of of a virtually contiguous range */ 137 rc = map->ops.notify_cb(map->cb_ctx, map, action, 138 (void *)contig_start, 139 contig_end - contig_start + VALUE_2MB); 140 /* Don't bother handling unregister failures. It can't be any worse */ 141 if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) { 142 goto err_unregister; 143 } 144 } 145 contig_start = UINT64_MAX; 146 continue; 147 } 148 149 for (idx_1gb = 0; idx_1gb < sizeof(map_1gb->map) / sizeof(map_1gb->map[0]); idx_1gb++) { 150 if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) && 151 (contig_start == UINT64_MAX || 152 (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { 153 /* Rebuild the virtual address from the indexes */ 154 uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); 155 156 if (contig_start == UINT64_MAX) { 157 contig_start = vaddr; 158 } 159 160 contig_end = vaddr; 161 } else { 162 if (contig_start != UINT64_MAX) { 163 /* End of of a virtually contiguous range */ 164 rc = map->ops.notify_cb(map->cb_ctx, map, action, 165 (void *)contig_start, 166 contig_end - contig_start + VALUE_2MB); 167 /* Don't bother handling unregister failures. It can't be any worse */ 168 if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) { 169 goto err_unregister; 170 } 171 172 /* This page might be a part of a neighbour region, so process 173 * it again. The idx_1gb will be incremented immediately. 174 */ 175 idx_1gb--; 176 } 177 contig_start = UINT64_MAX; 178 } 179 } 180 } 181 182 pthread_mutex_unlock(&g_mem_reg_map->mutex); 183 return 0; 184 185 err_unregister: 186 /* Unwind to the first empty translation so we don't unregister 187 * a region that just failed to register. 188 */ 189 idx_256tb = MAP_256TB_IDX((contig_start >> SHIFT_2MB) - 1); 190 idx_1gb = MAP_1GB_IDX((contig_start >> SHIFT_2MB) - 1); 191 contig_start = UINT64_MAX; 192 contig_end = UINT64_MAX; 193 194 /* Unregister any memory we managed to register before the failure */ 195 for (; idx_256tb < SIZE_MAX; idx_256tb--) { 196 map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb]; 197 198 if (!map_1gb) { 199 if (contig_end != UINT64_MAX) { 200 /* End of of a virtually contiguous range */ 201 map->ops.notify_cb(map->cb_ctx, map, 202 SPDK_MEM_MAP_NOTIFY_UNREGISTER, 203 (void *)contig_start, 204 contig_end - contig_start + VALUE_2MB); 205 } 206 contig_end = UINT64_MAX; 207 continue; 208 } 209 210 for (; idx_1gb < UINT64_MAX; idx_1gb--) { 211 if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) && 212 (contig_end == UINT64_MAX || (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { 213 /* Rebuild the virtual address from the indexes */ 214 uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); 215 216 if (contig_end == UINT64_MAX) { 217 contig_end = vaddr; 218 } 219 contig_start = vaddr; 220 } else { 221 if (contig_end != UINT64_MAX) { 222 /* End of of a virtually contiguous range */ 223 map->ops.notify_cb(map->cb_ctx, map, 224 SPDK_MEM_MAP_NOTIFY_UNREGISTER, 225 (void *)contig_start, 226 contig_end - contig_start + VALUE_2MB); 227 idx_1gb++; 228 } 229 contig_end = UINT64_MAX; 230 } 231 } 232 idx_1gb = sizeof(map_1gb->map) / sizeof(map_1gb->map[0]) - 1; 233 } 234 235 pthread_mutex_unlock(&g_mem_reg_map->mutex); 236 return rc; 237 } 238 239 struct spdk_mem_map * 240 spdk_mem_map_alloc(uint64_t default_translation, const struct spdk_mem_map_ops *ops, void *cb_ctx) 241 { 242 struct spdk_mem_map *map; 243 int rc; 244 245 map = calloc(1, sizeof(*map)); 246 if (map == NULL) { 247 return NULL; 248 } 249 250 if (pthread_mutex_init(&map->mutex, NULL)) { 251 free(map); 252 return NULL; 253 } 254 255 map->default_translation = default_translation; 256 map->cb_ctx = cb_ctx; 257 if (ops) { 258 map->ops = *ops; 259 } 260 261 if (ops && ops->notify_cb) { 262 pthread_mutex_lock(&g_spdk_mem_map_mutex); 263 rc = spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_REGISTER); 264 if (rc != 0) { 265 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 266 DEBUG_PRINT("Initial mem_map notify failed\n"); 267 pthread_mutex_destroy(&map->mutex); 268 free(map); 269 return NULL; 270 } 271 TAILQ_INSERT_TAIL(&g_spdk_mem_maps, map, tailq); 272 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 273 } 274 275 return map; 276 } 277 278 void 279 spdk_mem_map_free(struct spdk_mem_map **pmap) 280 { 281 struct spdk_mem_map *map; 282 size_t i; 283 284 if (!pmap) { 285 return; 286 } 287 288 map = *pmap; 289 290 if (!map) { 291 return; 292 } 293 294 if (map->ops.notify_cb) { 295 pthread_mutex_lock(&g_spdk_mem_map_mutex); 296 spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_UNREGISTER); 297 TAILQ_REMOVE(&g_spdk_mem_maps, map, tailq); 298 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 299 } 300 301 for (i = 0; i < sizeof(map->map_256tb.map) / sizeof(map->map_256tb.map[0]); i++) { 302 free(map->map_256tb.map[i]); 303 } 304 305 pthread_mutex_destroy(&map->mutex); 306 307 free(map); 308 *pmap = NULL; 309 } 310 311 int 312 spdk_mem_register(void *vaddr, size_t len) 313 { 314 struct spdk_mem_map *map; 315 int rc; 316 void *seg_vaddr; 317 size_t seg_len; 318 uint64_t reg; 319 320 if ((uintptr_t)vaddr & ~MASK_256TB) { 321 DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr); 322 return -EINVAL; 323 } 324 325 if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { 326 DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n", 327 __func__, vaddr, len); 328 return -EINVAL; 329 } 330 331 if (len == 0) { 332 return 0; 333 } 334 335 pthread_mutex_lock(&g_spdk_mem_map_mutex); 336 337 seg_vaddr = vaddr; 338 seg_len = len; 339 while (seg_len > 0) { 340 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 341 if (reg & REG_MAP_REGISTERED) { 342 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 343 return -EBUSY; 344 } 345 seg_vaddr += VALUE_2MB; 346 seg_len -= VALUE_2MB; 347 } 348 349 seg_vaddr = vaddr; 350 seg_len = 0; 351 while (len > 0) { 352 spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 353 seg_len == 0 ? REG_MAP_REGISTERED | REG_MAP_NOTIFY_START : REG_MAP_REGISTERED); 354 seg_len += VALUE_2MB; 355 vaddr += VALUE_2MB; 356 len -= VALUE_2MB; 357 } 358 359 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 360 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len); 361 if (rc != 0) { 362 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 363 return rc; 364 } 365 } 366 367 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 368 return 0; 369 } 370 371 int 372 spdk_mem_unregister(void *vaddr, size_t len) 373 { 374 struct spdk_mem_map *map; 375 int rc; 376 void *seg_vaddr; 377 size_t seg_len; 378 uint64_t reg, newreg; 379 380 if ((uintptr_t)vaddr & ~MASK_256TB) { 381 DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr); 382 return -EINVAL; 383 } 384 385 if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { 386 DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n", 387 __func__, vaddr, len); 388 return -EINVAL; 389 } 390 391 pthread_mutex_lock(&g_spdk_mem_map_mutex); 392 393 /* The first page must be a start of a region. Also check if it's 394 * registered to make sure we don't return -ERANGE for non-registered 395 * regions. 396 */ 397 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); 398 if ((reg & REG_MAP_REGISTERED) && (reg & REG_MAP_NOTIFY_START) == 0) { 399 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 400 return -ERANGE; 401 } 402 403 seg_vaddr = vaddr; 404 seg_len = len; 405 while (seg_len > 0) { 406 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 407 if ((reg & REG_MAP_REGISTERED) == 0) { 408 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 409 return -EINVAL; 410 } 411 seg_vaddr += VALUE_2MB; 412 seg_len -= VALUE_2MB; 413 } 414 415 newreg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 416 /* If the next page is registered, it must be a start of a region as well, 417 * otherwise we'd be unregistering only a part of a region. 418 */ 419 if ((newreg & REG_MAP_NOTIFY_START) == 0 && (newreg & REG_MAP_REGISTERED)) { 420 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 421 return -ERANGE; 422 } 423 seg_vaddr = vaddr; 424 seg_len = 0; 425 426 while (len > 0) { 427 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); 428 spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 0); 429 430 if (seg_len > 0 && (reg & REG_MAP_NOTIFY_START)) { 431 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 432 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len); 433 if (rc != 0) { 434 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 435 return rc; 436 } 437 } 438 439 seg_vaddr = vaddr; 440 seg_len = VALUE_2MB; 441 } else { 442 seg_len += VALUE_2MB; 443 } 444 445 vaddr += VALUE_2MB; 446 len -= VALUE_2MB; 447 } 448 449 if (seg_len > 0) { 450 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 451 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len); 452 if (rc != 0) { 453 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 454 return rc; 455 } 456 } 457 } 458 459 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 460 return 0; 461 } 462 463 static struct map_1gb * 464 spdk_mem_map_get_map_1gb(struct spdk_mem_map *map, uint64_t vfn_2mb) 465 { 466 struct map_1gb *map_1gb; 467 uint64_t idx_256tb = MAP_256TB_IDX(vfn_2mb); 468 size_t i; 469 470 if (spdk_unlikely(idx_256tb >= SPDK_COUNTOF(map->map_256tb.map))) { 471 return NULL; 472 } 473 474 map_1gb = map->map_256tb.map[idx_256tb]; 475 476 if (!map_1gb) { 477 pthread_mutex_lock(&map->mutex); 478 479 /* Recheck to make sure nobody else got the mutex first. */ 480 map_1gb = map->map_256tb.map[idx_256tb]; 481 if (!map_1gb) { 482 map_1gb = malloc(sizeof(struct map_1gb)); 483 if (map_1gb) { 484 /* initialize all entries to default translation */ 485 for (i = 0; i < SPDK_COUNTOF(map_1gb->map); i++) { 486 map_1gb->map[i].translation_2mb = map->default_translation; 487 } 488 map->map_256tb.map[idx_256tb] = map_1gb; 489 } 490 } 491 492 pthread_mutex_unlock(&map->mutex); 493 494 if (!map_1gb) { 495 DEBUG_PRINT("allocation failed\n"); 496 return NULL; 497 } 498 } 499 500 return map_1gb; 501 } 502 503 int 504 spdk_mem_map_set_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size, 505 uint64_t translation) 506 { 507 uint64_t vfn_2mb; 508 struct map_1gb *map_1gb; 509 uint64_t idx_1gb; 510 struct map_2mb *map_2mb; 511 512 if ((uintptr_t)vaddr & ~MASK_256TB) { 513 DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr); 514 return -EINVAL; 515 } 516 517 /* For now, only 2 MB-aligned registrations are supported */ 518 if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) { 519 DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n", 520 __func__, vaddr, size); 521 return -EINVAL; 522 } 523 524 vfn_2mb = vaddr >> SHIFT_2MB; 525 526 while (size) { 527 map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb); 528 if (!map_1gb) { 529 DEBUG_PRINT("could not get %p map\n", (void *)vaddr); 530 return -ENOMEM; 531 } 532 533 idx_1gb = MAP_1GB_IDX(vfn_2mb); 534 map_2mb = &map_1gb->map[idx_1gb]; 535 map_2mb->translation_2mb = translation; 536 537 size -= VALUE_2MB; 538 vfn_2mb++; 539 } 540 541 return 0; 542 } 543 544 int 545 spdk_mem_map_clear_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size) 546 { 547 uint64_t vfn_2mb; 548 struct map_1gb *map_1gb; 549 uint64_t idx_1gb; 550 struct map_2mb *map_2mb; 551 552 if ((uintptr_t)vaddr & ~MASK_256TB) { 553 DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr); 554 return -EINVAL; 555 } 556 557 /* For now, only 2 MB-aligned registrations are supported */ 558 if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) { 559 DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n", 560 __func__, vaddr, size); 561 return -EINVAL; 562 } 563 564 vfn_2mb = vaddr >> SHIFT_2MB; 565 566 while (size) { 567 map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb); 568 if (!map_1gb) { 569 DEBUG_PRINT("could not get %p map\n", (void *)vaddr); 570 return -ENOMEM; 571 } 572 573 idx_1gb = MAP_1GB_IDX(vfn_2mb); 574 map_2mb = &map_1gb->map[idx_1gb]; 575 map_2mb->translation_2mb = map->default_translation; 576 577 size -= VALUE_2MB; 578 vfn_2mb++; 579 } 580 581 return 0; 582 } 583 584 uint64_t 585 spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size) 586 { 587 const struct map_1gb *map_1gb; 588 const struct map_2mb *map_2mb; 589 uint64_t idx_256tb; 590 uint64_t idx_1gb; 591 uint64_t vfn_2mb; 592 uint64_t total_size = 0; 593 uint64_t cur_size; 594 uint64_t prev_translation; 595 596 if (size != NULL) { 597 total_size = *size; 598 *size = 0; 599 } 600 601 if (spdk_unlikely(vaddr & ~MASK_256TB)) { 602 DEBUG_PRINT("invalid usermode virtual address %p\n", (void *)vaddr); 603 return map->default_translation; 604 } 605 606 vfn_2mb = vaddr >> SHIFT_2MB; 607 idx_256tb = MAP_256TB_IDX(vfn_2mb); 608 idx_1gb = MAP_1GB_IDX(vfn_2mb); 609 610 map_1gb = map->map_256tb.map[idx_256tb]; 611 if (spdk_unlikely(!map_1gb)) { 612 return map->default_translation; 613 } 614 615 cur_size = VALUE_2MB; 616 if (size != NULL) { 617 *size = VALUE_2MB; 618 } 619 620 map_2mb = &map_1gb->map[idx_1gb]; 621 if (size == NULL || map->ops.are_contiguous == NULL || 622 map_2mb->translation_2mb == map->default_translation) { 623 return map_2mb->translation_2mb; 624 } 625 626 prev_translation = map_2mb->translation_2mb;; 627 while (cur_size < total_size) { 628 vfn_2mb++; 629 idx_256tb = MAP_256TB_IDX(vfn_2mb); 630 idx_1gb = MAP_1GB_IDX(vfn_2mb); 631 632 map_1gb = map->map_256tb.map[idx_256tb]; 633 if (spdk_unlikely(!map_1gb)) { 634 break; 635 } 636 637 map_2mb = &map_1gb->map[idx_1gb]; 638 if (!map->ops.are_contiguous(prev_translation, map_2mb->translation_2mb)) { 639 break; 640 } 641 642 cur_size += VALUE_2MB; 643 prev_translation = map_2mb->translation_2mb; 644 } 645 646 *size = cur_size; 647 return prev_translation; 648 } 649 650 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) 651 static void 652 memory_hotplug_cb(enum rte_mem_event event_type, 653 const void *addr, size_t len, void *arg) 654 { 655 if (event_type == RTE_MEM_EVENT_ALLOC) { 656 while (len > 0) { 657 struct rte_memseg *seg; 658 659 seg = rte_mem_virt2memseg(addr, NULL); 660 assert(seg != NULL); 661 assert(len >= seg->hugepage_sz); 662 663 spdk_mem_register((void *)seg->addr, seg->hugepage_sz); 664 addr = (void *)((uintptr_t)addr + seg->hugepage_sz); 665 len -= seg->hugepage_sz; 666 } 667 } else if (event_type == RTE_MEM_EVENT_FREE) { 668 spdk_mem_unregister((void *)addr, len); 669 } 670 } 671 672 static int 673 memory_iter_cb(const struct rte_memseg_list *msl, 674 const struct rte_memseg *ms, size_t len, void *arg) 675 { 676 return spdk_mem_register(ms->addr, len); 677 } 678 #endif 679 680 int 681 spdk_mem_map_init(void) 682 { 683 g_mem_reg_map = spdk_mem_map_alloc(0, NULL, NULL); 684 if (g_mem_reg_map == NULL) { 685 DEBUG_PRINT("memory registration map allocation failed\n"); 686 return -1; 687 } 688 689 /* 690 * Walk all DPDK memory segments and register them 691 * with the master memory map 692 */ 693 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) 694 rte_mem_event_callback_register("spdk", memory_hotplug_cb, NULL); 695 rte_memseg_contig_walk(memory_iter_cb, NULL); 696 #else 697 struct rte_mem_config *mcfg; 698 size_t seg_idx; 699 700 mcfg = rte_eal_get_configuration()->mem_config; 701 for (seg_idx = 0; seg_idx < RTE_MAX_MEMSEG; seg_idx++) { 702 struct rte_memseg *seg = &mcfg->memseg[seg_idx]; 703 704 if (seg->addr == NULL) { 705 break; 706 } 707 708 spdk_mem_register(seg->addr, seg->len); 709 } 710 #endif 711 return 0; 712 } 713