1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "env_internal.h" 37 38 #include <rte_config.h> 39 #include <rte_eal_memconfig.h> 40 41 #include "spdk_internal/assert.h" 42 #include "spdk_internal/memory.h" 43 44 #include "spdk/assert.h" 45 #include "spdk/likely.h" 46 #include "spdk/queue.h" 47 #include "spdk/util.h" 48 49 #if DEBUG 50 #define DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) 51 #else 52 #define DEBUG_PRINT(...) 53 #endif 54 55 #define FN_2MB_TO_4KB(fn) (fn << (SHIFT_2MB - SHIFT_4KB)) 56 #define FN_4KB_TO_2MB(fn) (fn >> (SHIFT_2MB - SHIFT_4KB)) 57 58 #define MAP_256TB_IDX(vfn_2mb) ((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB)) 59 #define MAP_1GB_IDX(vfn_2mb) ((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB)) - 1)) 60 61 /* Page is registered */ 62 #define REG_MAP_REGISTERED (1ULL << 62) 63 64 /* A notification region barrier. The 2MB translation entry that's marked 65 * with this flag must be unregistered separately. This allows contiguous 66 * regions to be unregistered in the same chunks they were registered. 67 */ 68 #define REG_MAP_NOTIFY_START (1ULL << 63) 69 70 /* Translation of a single 2MB page. */ 71 struct map_2mb { 72 uint64_t translation_2mb; 73 }; 74 75 /* Second-level map table indexed by bits [21..29] of the virtual address. 76 * Each entry contains the address translation or error for entries that haven't 77 * been retrieved yet. 78 */ 79 struct map_1gb { 80 struct map_2mb map[1ULL << (SHIFT_1GB - SHIFT_2MB)]; 81 }; 82 83 /* Top-level map table indexed by bits [30..47] of the virtual address. 84 * Each entry points to a second-level map table or NULL. 85 */ 86 struct map_256tb { 87 struct map_1gb *map[1ULL << (SHIFT_256TB - SHIFT_1GB)]; 88 }; 89 90 /* Page-granularity memory address translation */ 91 struct spdk_mem_map { 92 struct map_256tb map_256tb; 93 pthread_mutex_t mutex; 94 uint64_t default_translation; 95 struct spdk_mem_map_ops ops; 96 void *cb_ctx; 97 TAILQ_ENTRY(spdk_mem_map) tailq; 98 }; 99 100 /* Registrations map. The 64 bit translations are bit fields with the 101 * following layout (starting with the low bits): 102 * 0 - 61 : reserved 103 * 62 - 63 : flags 104 */ 105 static struct spdk_mem_map *g_mem_reg_map; 106 static TAILQ_HEAD(, spdk_mem_map) g_spdk_mem_maps = TAILQ_HEAD_INITIALIZER(g_spdk_mem_maps); 107 static pthread_mutex_t g_spdk_mem_map_mutex = PTHREAD_MUTEX_INITIALIZER; 108 109 /* 110 * Walk the currently registered memory via the main memory registration map 111 * and call the new map's notify callback for each virtually contiguous region. 112 */ 113 static int 114 spdk_mem_map_notify_walk(struct spdk_mem_map *map, enum spdk_mem_map_notify_action action) 115 { 116 size_t idx_256tb; 117 uint64_t idx_1gb; 118 uint64_t contig_start = UINT64_MAX; 119 uint64_t contig_end = UINT64_MAX; 120 struct map_1gb *map_1gb; 121 int rc; 122 123 if (!g_mem_reg_map) { 124 return -EINVAL; 125 } 126 127 /* Hold the memory registration map mutex so no new registrations can be added while we are looping. */ 128 pthread_mutex_lock(&g_mem_reg_map->mutex); 129 130 for (idx_256tb = 0; 131 idx_256tb < sizeof(g_mem_reg_map->map_256tb.map) / sizeof(g_mem_reg_map->map_256tb.map[0]); 132 idx_256tb++) { 133 map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb]; 134 135 if (!map_1gb) { 136 if (contig_start != UINT64_MAX) { 137 /* End of of a virtually contiguous range */ 138 rc = map->ops.notify_cb(map->cb_ctx, map, action, 139 (void *)contig_start, 140 contig_end - contig_start + VALUE_2MB); 141 /* Don't bother handling unregister failures. It can't be any worse */ 142 if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) { 143 goto err_unregister; 144 } 145 } 146 contig_start = UINT64_MAX; 147 continue; 148 } 149 150 for (idx_1gb = 0; idx_1gb < sizeof(map_1gb->map) / sizeof(map_1gb->map[0]); idx_1gb++) { 151 if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) && 152 (contig_start == UINT64_MAX || 153 (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { 154 /* Rebuild the virtual address from the indexes */ 155 uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); 156 157 if (contig_start == UINT64_MAX) { 158 contig_start = vaddr; 159 } 160 161 contig_end = vaddr; 162 } else { 163 if (contig_start != UINT64_MAX) { 164 /* End of of a virtually contiguous range */ 165 rc = map->ops.notify_cb(map->cb_ctx, map, action, 166 (void *)contig_start, 167 contig_end - contig_start + VALUE_2MB); 168 /* Don't bother handling unregister failures. It can't be any worse */ 169 if (rc != 0 && action == SPDK_MEM_MAP_NOTIFY_REGISTER) { 170 goto err_unregister; 171 } 172 173 /* This page might be a part of a neighbour region, so process 174 * it again. The idx_1gb will be incremented immediately. 175 */ 176 idx_1gb--; 177 } 178 contig_start = UINT64_MAX; 179 } 180 } 181 } 182 183 pthread_mutex_unlock(&g_mem_reg_map->mutex); 184 return 0; 185 186 err_unregister: 187 /* Unwind to the first empty translation so we don't unregister 188 * a region that just failed to register. 189 */ 190 idx_256tb = MAP_256TB_IDX((contig_start >> SHIFT_2MB) - 1); 191 idx_1gb = MAP_1GB_IDX((contig_start >> SHIFT_2MB) - 1); 192 contig_start = UINT64_MAX; 193 contig_end = UINT64_MAX; 194 195 /* Unregister any memory we managed to register before the failure */ 196 for (; idx_256tb < SIZE_MAX; idx_256tb--) { 197 map_1gb = g_mem_reg_map->map_256tb.map[idx_256tb]; 198 199 if (!map_1gb) { 200 if (contig_end != UINT64_MAX) { 201 /* End of of a virtually contiguous range */ 202 map->ops.notify_cb(map->cb_ctx, map, 203 SPDK_MEM_MAP_NOTIFY_UNREGISTER, 204 (void *)contig_start, 205 contig_end - contig_start + VALUE_2MB); 206 } 207 contig_end = UINT64_MAX; 208 continue; 209 } 210 211 for (; idx_1gb < UINT64_MAX; idx_1gb--) { 212 if ((map_1gb->map[idx_1gb].translation_2mb & REG_MAP_REGISTERED) && 213 (contig_end == UINT64_MAX || (map_1gb->map[idx_1gb].translation_2mb & REG_MAP_NOTIFY_START) == 0)) { 214 /* Rebuild the virtual address from the indexes */ 215 uint64_t vaddr = (idx_256tb << SHIFT_1GB) | (idx_1gb << SHIFT_2MB); 216 217 if (contig_end == UINT64_MAX) { 218 contig_end = vaddr; 219 } 220 contig_start = vaddr; 221 } else { 222 if (contig_end != UINT64_MAX) { 223 /* End of of a virtually contiguous range */ 224 map->ops.notify_cb(map->cb_ctx, map, 225 SPDK_MEM_MAP_NOTIFY_UNREGISTER, 226 (void *)contig_start, 227 contig_end - contig_start + VALUE_2MB); 228 idx_1gb++; 229 } 230 contig_end = UINT64_MAX; 231 } 232 } 233 idx_1gb = sizeof(map_1gb->map) / sizeof(map_1gb->map[0]) - 1; 234 } 235 236 pthread_mutex_unlock(&g_mem_reg_map->mutex); 237 return rc; 238 } 239 240 struct spdk_mem_map * 241 spdk_mem_map_alloc(uint64_t default_translation, const struct spdk_mem_map_ops *ops, void *cb_ctx) 242 { 243 struct spdk_mem_map *map; 244 int rc; 245 246 map = calloc(1, sizeof(*map)); 247 if (map == NULL) { 248 return NULL; 249 } 250 251 if (pthread_mutex_init(&map->mutex, NULL)) { 252 free(map); 253 return NULL; 254 } 255 256 map->default_translation = default_translation; 257 map->cb_ctx = cb_ctx; 258 if (ops) { 259 map->ops = *ops; 260 } 261 262 if (ops && ops->notify_cb) { 263 pthread_mutex_lock(&g_spdk_mem_map_mutex); 264 rc = spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_REGISTER); 265 if (rc != 0) { 266 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 267 DEBUG_PRINT("Initial mem_map notify failed\n"); 268 pthread_mutex_destroy(&map->mutex); 269 free(map); 270 return NULL; 271 } 272 TAILQ_INSERT_TAIL(&g_spdk_mem_maps, map, tailq); 273 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 274 } 275 276 return map; 277 } 278 279 void 280 spdk_mem_map_free(struct spdk_mem_map **pmap) 281 { 282 struct spdk_mem_map *map; 283 size_t i; 284 285 if (!pmap) { 286 return; 287 } 288 289 map = *pmap; 290 291 if (!map) { 292 return; 293 } 294 295 if (map->ops.notify_cb) { 296 pthread_mutex_lock(&g_spdk_mem_map_mutex); 297 spdk_mem_map_notify_walk(map, SPDK_MEM_MAP_NOTIFY_UNREGISTER); 298 TAILQ_REMOVE(&g_spdk_mem_maps, map, tailq); 299 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 300 } 301 302 for (i = 0; i < sizeof(map->map_256tb.map) / sizeof(map->map_256tb.map[0]); i++) { 303 free(map->map_256tb.map[i]); 304 } 305 306 pthread_mutex_destroy(&map->mutex); 307 308 free(map); 309 *pmap = NULL; 310 } 311 312 int 313 spdk_mem_register(void *vaddr, size_t len) 314 { 315 struct spdk_mem_map *map; 316 int rc; 317 void *seg_vaddr; 318 size_t seg_len; 319 uint64_t reg; 320 321 if ((uintptr_t)vaddr & ~MASK_256TB) { 322 DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr); 323 return -EINVAL; 324 } 325 326 if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { 327 DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n", 328 __func__, vaddr, len); 329 return -EINVAL; 330 } 331 332 if (len == 0) { 333 return 0; 334 } 335 336 pthread_mutex_lock(&g_spdk_mem_map_mutex); 337 338 seg_vaddr = vaddr; 339 seg_len = len; 340 while (seg_len > 0) { 341 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 342 if (reg & REG_MAP_REGISTERED) { 343 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 344 return -EBUSY; 345 } 346 seg_vaddr += VALUE_2MB; 347 seg_len -= VALUE_2MB; 348 } 349 350 seg_vaddr = vaddr; 351 seg_len = 0; 352 while (len > 0) { 353 spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 354 seg_len == 0 ? REG_MAP_REGISTERED | REG_MAP_NOTIFY_START : REG_MAP_REGISTERED); 355 seg_len += VALUE_2MB; 356 vaddr += VALUE_2MB; 357 len -= VALUE_2MB; 358 } 359 360 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 361 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_REGISTER, seg_vaddr, seg_len); 362 if (rc != 0) { 363 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 364 return rc; 365 } 366 } 367 368 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 369 return 0; 370 } 371 372 int 373 spdk_mem_unregister(void *vaddr, size_t len) 374 { 375 struct spdk_mem_map *map; 376 int rc; 377 void *seg_vaddr; 378 size_t seg_len; 379 uint64_t reg, newreg; 380 381 if ((uintptr_t)vaddr & ~MASK_256TB) { 382 DEBUG_PRINT("invalid usermode virtual address %p\n", vaddr); 383 return -EINVAL; 384 } 385 386 if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) { 387 DEBUG_PRINT("invalid %s parameters, vaddr=%p len=%ju\n", 388 __func__, vaddr, len); 389 return -EINVAL; 390 } 391 392 pthread_mutex_lock(&g_spdk_mem_map_mutex); 393 394 /* The first page must be a start of a region. Also check if it's 395 * registered to make sure we don't return -ERANGE for non-registered 396 * regions. 397 */ 398 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); 399 if ((reg & REG_MAP_REGISTERED) && (reg & REG_MAP_NOTIFY_START) == 0) { 400 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 401 return -ERANGE; 402 } 403 404 seg_vaddr = vaddr; 405 seg_len = len; 406 while (seg_len > 0) { 407 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 408 if ((reg & REG_MAP_REGISTERED) == 0) { 409 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 410 return -EINVAL; 411 } 412 seg_vaddr += VALUE_2MB; 413 seg_len -= VALUE_2MB; 414 } 415 416 newreg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)seg_vaddr, NULL); 417 /* If the next page is registered, it must be a start of a region as well, 418 * otherwise we'd be unregistering only a part of a region. 419 */ 420 if ((newreg & REG_MAP_NOTIFY_START) == 0 && (newreg & REG_MAP_REGISTERED)) { 421 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 422 return -ERANGE; 423 } 424 seg_vaddr = vaddr; 425 seg_len = 0; 426 427 while (len > 0) { 428 reg = spdk_mem_map_translate(g_mem_reg_map, (uint64_t)vaddr, NULL); 429 spdk_mem_map_set_translation(g_mem_reg_map, (uint64_t)vaddr, VALUE_2MB, 0); 430 431 if (seg_len > 0 && (reg & REG_MAP_NOTIFY_START)) { 432 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 433 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len); 434 if (rc != 0) { 435 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 436 return rc; 437 } 438 } 439 440 seg_vaddr = vaddr; 441 seg_len = VALUE_2MB; 442 } else { 443 seg_len += VALUE_2MB; 444 } 445 446 vaddr += VALUE_2MB; 447 len -= VALUE_2MB; 448 } 449 450 if (seg_len > 0) { 451 TAILQ_FOREACH(map, &g_spdk_mem_maps, tailq) { 452 rc = map->ops.notify_cb(map->cb_ctx, map, SPDK_MEM_MAP_NOTIFY_UNREGISTER, seg_vaddr, seg_len); 453 if (rc != 0) { 454 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 455 return rc; 456 } 457 } 458 } 459 460 pthread_mutex_unlock(&g_spdk_mem_map_mutex); 461 return 0; 462 } 463 464 static struct map_1gb * 465 spdk_mem_map_get_map_1gb(struct spdk_mem_map *map, uint64_t vfn_2mb) 466 { 467 struct map_1gb *map_1gb; 468 uint64_t idx_256tb = MAP_256TB_IDX(vfn_2mb); 469 size_t i; 470 471 if (spdk_unlikely(idx_256tb >= SPDK_COUNTOF(map->map_256tb.map))) { 472 return NULL; 473 } 474 475 map_1gb = map->map_256tb.map[idx_256tb]; 476 477 if (!map_1gb) { 478 pthread_mutex_lock(&map->mutex); 479 480 /* Recheck to make sure nobody else got the mutex first. */ 481 map_1gb = map->map_256tb.map[idx_256tb]; 482 if (!map_1gb) { 483 map_1gb = malloc(sizeof(struct map_1gb)); 484 if (map_1gb) { 485 /* initialize all entries to default translation */ 486 for (i = 0; i < SPDK_COUNTOF(map_1gb->map); i++) { 487 map_1gb->map[i].translation_2mb = map->default_translation; 488 } 489 map->map_256tb.map[idx_256tb] = map_1gb; 490 } 491 } 492 493 pthread_mutex_unlock(&map->mutex); 494 495 if (!map_1gb) { 496 DEBUG_PRINT("allocation failed\n"); 497 return NULL; 498 } 499 } 500 501 return map_1gb; 502 } 503 504 int 505 spdk_mem_map_set_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size, 506 uint64_t translation) 507 { 508 uint64_t vfn_2mb; 509 struct map_1gb *map_1gb; 510 uint64_t idx_1gb; 511 struct map_2mb *map_2mb; 512 513 if ((uintptr_t)vaddr & ~MASK_256TB) { 514 DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr); 515 return -EINVAL; 516 } 517 518 /* For now, only 2 MB-aligned registrations are supported */ 519 if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) { 520 DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n", 521 __func__, vaddr, size); 522 return -EINVAL; 523 } 524 525 vfn_2mb = vaddr >> SHIFT_2MB; 526 527 while (size) { 528 map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb); 529 if (!map_1gb) { 530 DEBUG_PRINT("could not get %p map\n", (void *)vaddr); 531 return -ENOMEM; 532 } 533 534 idx_1gb = MAP_1GB_IDX(vfn_2mb); 535 map_2mb = &map_1gb->map[idx_1gb]; 536 map_2mb->translation_2mb = translation; 537 538 size -= VALUE_2MB; 539 vfn_2mb++; 540 } 541 542 return 0; 543 } 544 545 int 546 spdk_mem_map_clear_translation(struct spdk_mem_map *map, uint64_t vaddr, uint64_t size) 547 { 548 uint64_t vfn_2mb; 549 struct map_1gb *map_1gb; 550 uint64_t idx_1gb; 551 struct map_2mb *map_2mb; 552 553 if ((uintptr_t)vaddr & ~MASK_256TB) { 554 DEBUG_PRINT("invalid usermode virtual address %lu\n", vaddr); 555 return -EINVAL; 556 } 557 558 /* For now, only 2 MB-aligned registrations are supported */ 559 if (((uintptr_t)vaddr & MASK_2MB) || (size & MASK_2MB)) { 560 DEBUG_PRINT("invalid %s parameters, vaddr=%lu len=%ju\n", 561 __func__, vaddr, size); 562 return -EINVAL; 563 } 564 565 vfn_2mb = vaddr >> SHIFT_2MB; 566 567 while (size) { 568 map_1gb = spdk_mem_map_get_map_1gb(map, vfn_2mb); 569 if (!map_1gb) { 570 DEBUG_PRINT("could not get %p map\n", (void *)vaddr); 571 return -ENOMEM; 572 } 573 574 idx_1gb = MAP_1GB_IDX(vfn_2mb); 575 map_2mb = &map_1gb->map[idx_1gb]; 576 map_2mb->translation_2mb = map->default_translation; 577 578 size -= VALUE_2MB; 579 vfn_2mb++; 580 } 581 582 return 0; 583 } 584 585 uint64_t 586 spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size) 587 { 588 const struct map_1gb *map_1gb; 589 const struct map_2mb *map_2mb; 590 uint64_t idx_256tb; 591 uint64_t idx_1gb; 592 uint64_t vfn_2mb; 593 uint64_t cur_size; 594 uint64_t prev_translation; 595 uint64_t orig_translation; 596 597 if (spdk_unlikely(vaddr & ~MASK_256TB)) { 598 DEBUG_PRINT("invalid usermode virtual address %p\n", (void *)vaddr); 599 return map->default_translation; 600 } 601 602 vfn_2mb = vaddr >> SHIFT_2MB; 603 idx_256tb = MAP_256TB_IDX(vfn_2mb); 604 idx_1gb = MAP_1GB_IDX(vfn_2mb); 605 606 map_1gb = map->map_256tb.map[idx_256tb]; 607 if (spdk_unlikely(!map_1gb)) { 608 return map->default_translation; 609 } 610 611 cur_size = VALUE_2MB - _2MB_OFFSET(vaddr); 612 map_2mb = &map_1gb->map[idx_1gb]; 613 if (size == NULL || map->ops.are_contiguous == NULL || 614 map_2mb->translation_2mb == map->default_translation) { 615 if (size != NULL) { 616 *size = spdk_min(*size, cur_size); 617 } 618 return map_2mb->translation_2mb; 619 } 620 621 orig_translation = map_2mb->translation_2mb; 622 prev_translation = orig_translation; 623 while (cur_size < *size) { 624 vfn_2mb++; 625 idx_256tb = MAP_256TB_IDX(vfn_2mb); 626 idx_1gb = MAP_1GB_IDX(vfn_2mb); 627 628 map_1gb = map->map_256tb.map[idx_256tb]; 629 if (spdk_unlikely(!map_1gb)) { 630 break; 631 } 632 633 map_2mb = &map_1gb->map[idx_1gb]; 634 if (!map->ops.are_contiguous(prev_translation, map_2mb->translation_2mb)) { 635 break; 636 } 637 638 cur_size += VALUE_2MB; 639 prev_translation = map_2mb->translation_2mb; 640 } 641 642 *size = spdk_min(*size, cur_size); 643 return orig_translation; 644 } 645 646 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) 647 static void 648 memory_hotplug_cb(enum rte_mem_event event_type, 649 const void *addr, size_t len, void *arg) 650 { 651 if (event_type == RTE_MEM_EVENT_ALLOC) { 652 spdk_mem_register((void *)addr, len); 653 654 /* Now mark each segment so that DPDK won't later free it. 655 * This ensures we don't have to deal with the memory 656 * getting freed in different units than it was allocated. 657 */ 658 while (len > 0) { 659 struct rte_memseg *seg; 660 661 seg = rte_mem_virt2memseg(addr, NULL); 662 assert(seg != NULL); 663 seg->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE; 664 addr = (void *)((uintptr_t)addr + seg->hugepage_sz); 665 len -= seg->hugepage_sz; 666 } 667 } else if (event_type == RTE_MEM_EVENT_FREE) { 668 spdk_mem_unregister((void *)addr, len); 669 } 670 } 671 672 static int 673 memory_iter_cb(const struct rte_memseg_list *msl, 674 const struct rte_memseg *ms, size_t len, void *arg) 675 { 676 return spdk_mem_register(ms->addr, len); 677 } 678 #endif 679 680 int 681 spdk_mem_map_init(void) 682 { 683 g_mem_reg_map = spdk_mem_map_alloc(0, NULL, NULL); 684 if (g_mem_reg_map == NULL) { 685 DEBUG_PRINT("memory registration map allocation failed\n"); 686 return -1; 687 } 688 689 /* 690 * Walk all DPDK memory segments and register them 691 * with the master memory map 692 */ 693 #if RTE_VERSION >= RTE_VERSION_NUM(18, 05, 0, 0) 694 rte_mem_event_callback_register("spdk", memory_hotplug_cb, NULL); 695 rte_memseg_contig_walk(memory_iter_cb, NULL); 696 #else 697 struct rte_mem_config *mcfg; 698 size_t seg_idx; 699 700 mcfg = rte_eal_get_configuration()->mem_config; 701 for (seg_idx = 0; seg_idx < RTE_MAX_MEMSEG; seg_idx++) { 702 struct rte_memseg *seg = &mcfg->memseg[seg_idx]; 703 704 if (seg->addr == NULL) { 705 break; 706 } 707 708 spdk_mem_register(seg->addr, seg->len); 709 } 710 #endif 711 return 0; 712 } 713