1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _RTE_MEMORY_H_ 6 #define _RTE_MEMORY_H_ 7 8 /** 9 * @file 10 * 11 * Memory-related RTE API. 12 */ 13 14 #include <stdint.h> 15 #include <stddef.h> 16 #include <stdio.h> 17 18 #ifdef __cplusplus 19 extern "C" { 20 #endif 21 22 #include <rte_bitops.h> 23 #include <rte_common.h> 24 #include <rte_compat.h> 25 #include <rte_config.h> 26 #include <rte_fbarray.h> 27 28 #define RTE_PGSIZE_4K (1ULL << 12) 29 #define RTE_PGSIZE_64K (1ULL << 16) 30 #define RTE_PGSIZE_256K (1ULL << 18) 31 #define RTE_PGSIZE_2M (1ULL << 21) 32 #define RTE_PGSIZE_16M (1ULL << 24) 33 #define RTE_PGSIZE_256M (1ULL << 28) 34 #define RTE_PGSIZE_512M (1ULL << 29) 35 #define RTE_PGSIZE_1G (1ULL << 30) 36 #define RTE_PGSIZE_4G (1ULL << 32) 37 #define RTE_PGSIZE_16G (1ULL << 34) 38 39 #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ 40 41 /** Prevent this segment from being freed back to the OS. */ 42 #define RTE_MEMSEG_FLAG_DO_NOT_FREE RTE_BIT32(0) 43 /** This segment is not filled with zeros. */ 44 #define RTE_MEMSEG_FLAG_DIRTY RTE_BIT32(1) 45 46 /** 47 * Physical memory segment descriptor. 48 */ 49 struct rte_memseg { 50 rte_iova_t iova; /**< Start IO address. */ 51 RTE_STD_C11 52 union { 53 void *addr; /**< Start virtual address. */ 54 uint64_t addr_64; /**< Makes sure addr is always 64 bits */ 55 }; 56 size_t len; /**< Length of the segment. */ 57 uint64_t hugepage_sz; /**< The pagesize of underlying memory */ 58 int32_t socket_id; /**< NUMA socket ID. */ 59 uint32_t nchannel; /**< Number of channels. */ 60 uint32_t nrank; /**< Number of ranks. */ 61 uint32_t flags; /**< Memseg-specific flags */ 62 } __rte_packed; 63 64 /** 65 * memseg list is a special case as we need to store a bunch of other data 66 * together with the array itself. 67 */ 68 struct rte_memseg_list { 69 RTE_STD_C11 70 union { 71 void *base_va; 72 /**< Base virtual address for this memseg list. */ 73 uint64_t addr_64; 74 /**< Makes sure addr is always 64-bits */ 75 }; 76 uint64_t page_sz; /**< Page size for all memsegs in this list. */ 77 int socket_id; /**< Socket ID for all memsegs in this list. */ 78 volatile uint32_t version; /**< version number for multiprocess sync. */ 79 size_t len; /**< Length of memory area covered by this memseg list. */ 80 unsigned int external; /**< 1 if this list points to external memory */ 81 unsigned int heap; /**< 1 if this list points to a heap */ 82 struct rte_fbarray memseg_arr; 83 }; 84 85 /** 86 * Lock page in physical memory and prevent from swapping. 87 * 88 * @param virt 89 * The virtual address. 90 * @return 91 * 0 on success, negative on error. 92 */ 93 int rte_mem_lock_page(const void *virt); 94 95 /** 96 * Get physical address of any mapped virtual address in the current process. 97 * It is found by browsing the /proc/self/pagemap special file. 98 * The page must be locked. 99 * 100 * @param virt 101 * The virtual address. 102 * @return 103 * The physical address or RTE_BAD_IOVA on error. 104 */ 105 phys_addr_t rte_mem_virt2phy(const void *virt); 106 107 /** 108 * Get IO virtual address of any mapped virtual address in the current process. 109 * 110 * @note This function will not check internal page table. Instead, in IOVA as 111 * PA mode, it will fall back to getting real physical address (which may 112 * not match the expected IOVA, such as what was specified for external 113 * memory). 114 * 115 * @param virt 116 * The virtual address. 117 * @return 118 * The IO address or RTE_BAD_IOVA on error. 119 */ 120 rte_iova_t rte_mem_virt2iova(const void *virt); 121 122 /** 123 * Get virtual memory address corresponding to iova address. 124 * 125 * @note This function read-locks the memory hotplug subsystem, and thus cannot 126 * be used within memory-related callback functions. 127 * 128 * @param iova 129 * The iova address. 130 * @return 131 * Virtual address corresponding to iova address (or NULL if address does not 132 * exist within DPDK memory map). 133 */ 134 void * 135 rte_mem_iova2virt(rte_iova_t iova); 136 137 /** 138 * Get memseg to which a particular virtual address belongs. 139 * 140 * @param virt 141 * The virtual address. 142 * @param msl 143 * The memseg list in which to look up based on ``virt`` address 144 * (can be NULL). 145 * @return 146 * Memseg pointer on success, or NULL on error. 147 */ 148 struct rte_memseg * 149 rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl); 150 151 /** 152 * Get memseg list corresponding to virtual memory address. 153 * 154 * @param virt 155 * The virtual address. 156 * @return 157 * Memseg list to which this virtual address belongs to. 158 */ 159 struct rte_memseg_list * 160 rte_mem_virt2memseg_list(const void *virt); 161 162 /** 163 * Memseg walk function prototype. 164 * 165 * Returning 0 will continue walk 166 * Returning 1 will stop the walk 167 * Returning -1 will stop the walk and report error 168 */ 169 typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl, 170 const struct rte_memseg *ms, void *arg); 171 172 /** 173 * Memseg contig walk function prototype. This will trigger a callback on every 174 * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each 175 * callback call will be [``ms->addr``, ``ms->addr + len``). 176 * 177 * Returning 0 will continue walk 178 * Returning 1 will stop the walk 179 * Returning -1 will stop the walk and report error 180 */ 181 typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl, 182 const struct rte_memseg *ms, size_t len, void *arg); 183 184 /** 185 * Memseg list walk function prototype. This will trigger a callback on every 186 * allocated memseg list. 187 * 188 * Returning 0 will continue walk 189 * Returning 1 will stop the walk 190 * Returning -1 will stop the walk and report error 191 */ 192 typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, 193 void *arg); 194 195 /** 196 * Walk list of all memsegs. 197 * 198 * @note This function read-locks the memory hotplug subsystem, and thus cannot 199 * be used within memory-related callback functions. 200 * 201 * @note This function will also walk through externally allocated segments. It 202 * is up to the user to decide whether to skip through these segments. 203 * 204 * @param func 205 * Iterator function 206 * @param arg 207 * Argument passed to iterator 208 * @return 209 * 0 if walked over the entire list 210 * 1 if stopped by the user 211 * -1 if user function reported error 212 */ 213 int 214 rte_memseg_walk(rte_memseg_walk_t func, void *arg); 215 216 /** 217 * Walk each VA-contiguous area. 218 * 219 * @note This function read-locks the memory hotplug subsystem, and thus cannot 220 * be used within memory-related callback functions. 221 * 222 * @note This function will also walk through externally allocated segments. It 223 * is up to the user to decide whether to skip through these segments. 224 * 225 * @param func 226 * Iterator function 227 * @param arg 228 * Argument passed to iterator 229 * @return 230 * 0 if walked over the entire list 231 * 1 if stopped by the user 232 * -1 if user function reported error 233 */ 234 int 235 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); 236 237 /** 238 * Walk each allocated memseg list. 239 * 240 * @note This function read-locks the memory hotplug subsystem, and thus cannot 241 * be used within memory-related callback functions. 242 * 243 * @note This function will also walk through externally allocated segments. It 244 * is up to the user to decide whether to skip through these segments. 245 * 246 * @param func 247 * Iterator function 248 * @param arg 249 * Argument passed to iterator 250 * @return 251 * 0 if walked over the entire list 252 * 1 if stopped by the user 253 * -1 if user function reported error 254 */ 255 int 256 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); 257 258 /** 259 * Walk list of all memsegs without performing any locking. 260 * 261 * @note This function does not perform any locking, and is only safe to call 262 * from within memory-related callback functions. 263 * 264 * @param func 265 * Iterator function 266 * @param arg 267 * Argument passed to iterator 268 * @return 269 * 0 if walked over the entire list 270 * 1 if stopped by the user 271 * -1 if user function reported error 272 */ 273 int 274 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); 275 276 /** 277 * Walk each VA-contiguous area without performing any locking. 278 * 279 * @note This function does not perform any locking, and is only safe to call 280 * from within memory-related callback functions. 281 * 282 * @param func 283 * Iterator function 284 * @param arg 285 * Argument passed to iterator 286 * @return 287 * 0 if walked over the entire list 288 * 1 if stopped by the user 289 * -1 if user function reported error 290 */ 291 int 292 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg); 293 294 /** 295 * Walk each allocated memseg list without performing any locking. 296 * 297 * @note This function does not perform any locking, and is only safe to call 298 * from within memory-related callback functions. 299 * 300 * @param func 301 * Iterator function 302 * @param arg 303 * Argument passed to iterator 304 * @return 305 * 0 if walked over the entire list 306 * 1 if stopped by the user 307 * -1 if user function reported error 308 */ 309 int 310 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg); 311 312 /** 313 * Return file descriptor associated with a particular memseg (if available). 314 * 315 * @note This function read-locks the memory hotplug subsystem, and thus cannot 316 * be used within memory-related callback functions. 317 * 318 * @note This returns an internal file descriptor. Performing any operations on 319 * this file descriptor is inherently dangerous, so it should be treated 320 * as read-only for all intents and purposes. 321 * 322 * @param ms 323 * A pointer to memseg for which to get file descriptor. 324 * 325 * @return 326 * Valid file descriptor in case of success. 327 * -1 in case of error, with ``rte_errno`` set to the following values: 328 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 329 * - ENODEV - ``ms`` fd is not available 330 * - ENOENT - ``ms`` is an unused segment 331 * - ENOTSUP - segment fd's are not supported 332 */ 333 int 334 rte_memseg_get_fd(const struct rte_memseg *ms); 335 336 /** 337 * Return file descriptor associated with a particular memseg (if available). 338 * 339 * @note This function does not perform any locking, and is only safe to call 340 * from within memory-related callback functions. 341 * 342 * @note This returns an internal file descriptor. Performing any operations on 343 * this file descriptor is inherently dangerous, so it should be treated 344 * as read-only for all intents and purposes. 345 * 346 * @param ms 347 * A pointer to memseg for which to get file descriptor. 348 * 349 * @return 350 * Valid file descriptor in case of success. 351 * -1 in case of error, with ``rte_errno`` set to the following values: 352 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 353 * - ENODEV - ``ms`` fd is not available 354 * - ENOENT - ``ms`` is an unused segment 355 * - ENOTSUP - segment fd's are not supported 356 */ 357 int 358 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms); 359 360 /** 361 * Get offset into segment file descriptor associated with a particular memseg 362 * (if available). 363 * 364 * @note This function read-locks the memory hotplug subsystem, and thus cannot 365 * be used within memory-related callback functions. 366 * 367 * @param ms 368 * A pointer to memseg for which to get file descriptor. 369 * @param offset 370 * A pointer to offset value where the result will be stored. 371 * 372 * @return 373 * Valid file descriptor in case of success. 374 * -1 in case of error, with ``rte_errno`` set to the following values: 375 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 376 * - EINVAL - ``offset`` pointer was NULL 377 * - ENODEV - ``ms`` fd is not available 378 * - ENOENT - ``ms`` is an unused segment 379 * - ENOTSUP - segment fd's are not supported 380 */ 381 int 382 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset); 383 384 /** 385 * Get offset into segment file descriptor associated with a particular memseg 386 * (if available). 387 * 388 * @note This function does not perform any locking, and is only safe to call 389 * from within memory-related callback functions. 390 * 391 * @param ms 392 * A pointer to memseg for which to get file descriptor. 393 * @param offset 394 * A pointer to offset value where the result will be stored. 395 * 396 * @return 397 * Valid file descriptor in case of success. 398 * -1 in case of error, with ``rte_errno`` set to the following values: 399 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 400 * - EINVAL - ``offset`` pointer was NULL 401 * - ENODEV - ``ms`` fd is not available 402 * - ENOENT - ``ms`` is an unused segment 403 * - ENOTSUP - segment fd's are not supported 404 */ 405 int 406 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, 407 size_t *offset); 408 409 /** 410 * Register external memory chunk with DPDK. 411 * 412 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 413 * API's. 414 * 415 * @note This API will not perform any DMA mapping. It is expected that user 416 * will do that themselves. 417 * 418 * @note Before accessing this memory in other processes, it needs to be 419 * attached in each of those processes by calling ``rte_extmem_attach`` in 420 * each other process. 421 * 422 * @param va_addr 423 * Start of virtual area to register. Must be aligned by ``page_sz``. 424 * @param len 425 * Length of virtual area to register. Must be aligned by ``page_sz``. 426 * @param iova_addrs 427 * Array of page IOVA addresses corresponding to each page in this memory 428 * area. Can be NULL, in which case page IOVA addresses will be set to 429 * RTE_BAD_IOVA. 430 * @param n_pages 431 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs`` 432 * is NULL. 433 * @param page_sz 434 * Page size of the underlying memory 435 * 436 * @return 437 * - 0 on success 438 * - -1 in case of error, with rte_errno set to one of the following: 439 * EINVAL - one of the parameters was invalid 440 * EEXIST - memory chunk is already registered 441 * ENOSPC - no more space in internal config to store a new memory chunk 442 */ 443 int 444 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], 445 unsigned int n_pages, size_t page_sz); 446 447 /** 448 * Unregister external memory chunk with DPDK. 449 * 450 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 451 * API's. 452 * 453 * @note This API will not perform any DMA unmapping. It is expected that user 454 * will do that themselves. 455 * 456 * @note Before calling this function, all other processes must call 457 * ``rte_extmem_detach`` to detach from the memory area. 458 * 459 * @param va_addr 460 * Start of virtual area to unregister 461 * @param len 462 * Length of virtual area to unregister 463 * 464 * @return 465 * - 0 on success 466 * - -1 in case of error, with rte_errno set to one of the following: 467 * EINVAL - one of the parameters was invalid 468 * ENOENT - memory chunk was not found 469 */ 470 int 471 rte_extmem_unregister(void *va_addr, size_t len); 472 473 /** 474 * Attach to external memory chunk registered in another process. 475 * 476 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 477 * API's. 478 * 479 * @note This API will not perform any DMA mapping. It is expected that user 480 * will do that themselves. 481 * 482 * @param va_addr 483 * Start of virtual area to register 484 * @param len 485 * Length of virtual area to register 486 * 487 * @return 488 * - 0 on success 489 * - -1 in case of error, with rte_errno set to one of the following: 490 * EINVAL - one of the parameters was invalid 491 * ENOENT - memory chunk was not found 492 */ 493 int 494 rte_extmem_attach(void *va_addr, size_t len); 495 496 /** 497 * Detach from external memory chunk registered in another process. 498 * 499 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 500 * API's. 501 * 502 * @note This API will not perform any DMA unmapping. It is expected that user 503 * will do that themselves. 504 * 505 * @param va_addr 506 * Start of virtual area to unregister 507 * @param len 508 * Length of virtual area to unregister 509 * 510 * @return 511 * - 0 on success 512 * - -1 in case of error, with rte_errno set to one of the following: 513 * EINVAL - one of the parameters was invalid 514 * ENOENT - memory chunk was not found 515 */ 516 int 517 rte_extmem_detach(void *va_addr, size_t len); 518 519 /** 520 * Dump the physical memory layout to a file. 521 * 522 * @note This function read-locks the memory hotplug subsystem, and thus cannot 523 * be used within memory-related callback functions. 524 * 525 * @param f 526 * A pointer to a file for output 527 */ 528 void rte_dump_physmem_layout(FILE *f); 529 530 /** 531 * Get the total amount of available physical memory. 532 * 533 * @note This function read-locks the memory hotplug subsystem, and thus cannot 534 * be used within memory-related callback functions. 535 * 536 * @return 537 * The total amount of available physical memory in bytes. 538 */ 539 uint64_t rte_eal_get_physmem_size(void); 540 541 /** 542 * Get the number of memory channels. 543 * 544 * @return 545 * The number of memory channels on the system. The value is 0 if unknown 546 * or not the same on all devices. 547 */ 548 unsigned rte_memory_get_nchannel(void); 549 550 /** 551 * Get the number of memory ranks. 552 * 553 * @return 554 * The number of memory ranks on the system. The value is 0 if unknown or 555 * not the same on all devices. 556 */ 557 unsigned rte_memory_get_nrank(void); 558 559 /** 560 * Check if all currently allocated memory segments are compliant with 561 * supplied DMA address width. 562 * 563 * @param maskbits 564 * Address width to check against. 565 */ 566 int rte_mem_check_dma_mask(uint8_t maskbits); 567 568 /** 569 * Check if all currently allocated memory segments are compliant with 570 * supplied DMA address width. This function will use 571 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying 572 * memory_hotplug_lock will not be acquired avoiding deadlock during 573 * memory initialization. 574 * 575 * This function is just for EAL core memory internal use. Drivers should 576 * use the previous rte_mem_check_dma_mask. 577 * 578 * @param maskbits 579 * Address width to check against. 580 */ 581 int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits); 582 583 /** 584 * Set dma mask to use once memory initialization is done. Previous functions 585 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be 586 * used safely until memory has been initialized. 587 */ 588 void rte_mem_set_dma_mask(uint8_t maskbits); 589 590 /** 591 * Drivers based on uio will not load unless physical 592 * addresses are obtainable. It is only possible to get 593 * physical addresses when running as a privileged user. 594 * 595 * @return 596 * 1 if the system is able to obtain physical addresses. 597 * 0 if using DMA addresses through an IOMMU. 598 */ 599 int rte_eal_using_phys_addrs(void); 600 601 602 /** 603 * Enum indicating which kind of memory event has happened. Used by callbacks to 604 * distinguish between memory allocations and deallocations. 605 */ 606 enum rte_mem_event { 607 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */ 608 RTE_MEM_EVENT_FREE, /**< Deallocation event. */ 609 }; 610 #define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64 611 /**< maximum length of callback name */ 612 613 /** 614 * Function typedef used to register callbacks for memory events. 615 */ 616 typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type, 617 const void *addr, size_t len, void *arg); 618 619 /** 620 * Function used to register callbacks for memory events. 621 * 622 * @note callbacks will happen while memory hotplug subsystem is write-locked, 623 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a 624 * deadlock when called from within such callbacks. 625 * 626 * @note mem event callbacks not being supported is an expected error condition, 627 * so user code needs to handle this situation. In these cases, return 628 * value will be -1, and rte_errno will be set to ENOTSUP. 629 * 630 * @param name 631 * Name associated with specified callback to be added to the list. 632 * 633 * @param clb 634 * Callback function pointer. 635 * 636 * @param arg 637 * Argument to pass to the callback. 638 * 639 * @return 640 * 0 on successful callback register 641 * -1 on unsuccessful callback register, with rte_errno value indicating 642 * reason for failure. 643 */ 644 int 645 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb, 646 void *arg); 647 648 /** 649 * Function used to unregister callbacks for memory events. 650 * 651 * @param name 652 * Name associated with specified callback to be removed from the list. 653 * 654 * @param arg 655 * Argument to look for among callbacks with specified callback name. 656 * 657 * @return 658 * 0 on successful callback unregister 659 * -1 on unsuccessful callback unregister, with rte_errno value indicating 660 * reason for failure. 661 */ 662 int 663 rte_mem_event_callback_unregister(const char *name, void *arg); 664 665 666 #define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64 667 /**< maximum length of alloc validator name */ 668 /** 669 * Function typedef used to register memory allocation validation callbacks. 670 * 671 * Returning 0 will allow allocation attempt to continue. Returning -1 will 672 * prevent allocation from succeeding. 673 */ 674 typedef int (*rte_mem_alloc_validator_t)(int socket_id, 675 size_t cur_limit, size_t new_len); 676 677 /** 678 * @brief Register validator callback for memory allocations. 679 * 680 * Callbacks registered by this function will be called right before memory 681 * allocator is about to trigger allocation of more pages from the system if 682 * said allocation will bring total memory usage above specified limit on 683 * specified socket. User will be able to cancel pending allocation if callback 684 * returns -1. 685 * 686 * @note callbacks will happen while memory hotplug subsystem is write-locked, 687 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a 688 * deadlock when called from within such callbacks. 689 * 690 * @note validator callbacks not being supported is an expected error condition, 691 * so user code needs to handle this situation. In these cases, return 692 * value will be -1, and rte_errno will be set to ENOTSUP. 693 * 694 * @param name 695 * Name associated with specified callback to be added to the list. 696 * 697 * @param clb 698 * Callback function pointer. 699 * 700 * @param socket_id 701 * Socket ID on which to watch for allocations. 702 * 703 * @param limit 704 * Limit above which to trigger callbacks. 705 * 706 * @return 707 * 0 on successful callback register 708 * -1 on unsuccessful callback register, with rte_errno value indicating 709 * reason for failure. 710 */ 711 int 712 rte_mem_alloc_validator_register(const char *name, 713 rte_mem_alloc_validator_t clb, int socket_id, size_t limit); 714 715 /** 716 * @brief Unregister validator callback for memory allocations. 717 * 718 * @param name 719 * Name associated with specified callback to be removed from the list. 720 * 721 * @param socket_id 722 * Socket ID on which to watch for allocations. 723 * 724 * @return 725 * 0 on successful callback unregister 726 * -1 on unsuccessful callback unregister, with rte_errno value indicating 727 * reason for failure. 728 */ 729 int 730 rte_mem_alloc_validator_unregister(const char *name, int socket_id); 731 732 #ifdef __cplusplus 733 } 734 #endif 735 736 #endif /* _RTE_MEMORY_H_ */ 737