1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _RTE_MEMORY_H_ 6 #define _RTE_MEMORY_H_ 7 8 /** 9 * @file 10 * 11 * Memory-related RTE API. 12 */ 13 14 #include <stdint.h> 15 #include <stddef.h> 16 #include <stdio.h> 17 18 #ifdef __cplusplus 19 extern "C" { 20 #endif 21 22 #include <rte_common.h> 23 #include <rte_compat.h> 24 #include <rte_config.h> 25 #include <rte_fbarray.h> 26 27 #define RTE_PGSIZE_4K (1ULL << 12) 28 #define RTE_PGSIZE_64K (1ULL << 16) 29 #define RTE_PGSIZE_256K (1ULL << 18) 30 #define RTE_PGSIZE_2M (1ULL << 21) 31 #define RTE_PGSIZE_16M (1ULL << 24) 32 #define RTE_PGSIZE_256M (1ULL << 28) 33 #define RTE_PGSIZE_512M (1ULL << 29) 34 #define RTE_PGSIZE_1G (1ULL << 30) 35 #define RTE_PGSIZE_4G (1ULL << 32) 36 #define RTE_PGSIZE_16G (1ULL << 34) 37 38 #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ 39 40 /** 41 * Physical memory segment descriptor. 42 */ 43 #define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0) 44 /**< Prevent this segment from being freed back to the OS. */ 45 struct rte_memseg { 46 rte_iova_t iova; /**< Start IO address. */ 47 RTE_STD_C11 48 union { 49 void *addr; /**< Start virtual address. */ 50 uint64_t addr_64; /**< Makes sure addr is always 64 bits */ 51 }; 52 size_t len; /**< Length of the segment. */ 53 uint64_t hugepage_sz; /**< The pagesize of underlying memory */ 54 int32_t socket_id; /**< NUMA socket ID. */ 55 uint32_t nchannel; /**< Number of channels. */ 56 uint32_t nrank; /**< Number of ranks. */ 57 uint32_t flags; /**< Memseg-specific flags */ 58 } __rte_packed; 59 60 /** 61 * memseg list is a special case as we need to store a bunch of other data 62 * together with the array itself. 63 */ 64 struct rte_memseg_list { 65 RTE_STD_C11 66 union { 67 void *base_va; 68 /**< Base virtual address for this memseg list. */ 69 uint64_t addr_64; 70 /**< Makes sure addr is always 64-bits */ 71 }; 72 uint64_t page_sz; /**< Page size for all memsegs in this list. */ 73 int socket_id; /**< Socket ID for all memsegs in this list. */ 74 volatile uint32_t version; /**< version number for multiprocess sync. */ 75 size_t len; /**< Length of memory area covered by this memseg list. */ 76 unsigned int external; /**< 1 if this list points to external memory */ 77 unsigned int heap; /**< 1 if this list points to a heap */ 78 struct rte_fbarray memseg_arr; 79 }; 80 81 /** 82 * Lock page in physical memory and prevent from swapping. 83 * 84 * @param virt 85 * The virtual address. 86 * @return 87 * 0 on success, negative on error. 88 */ 89 int rte_mem_lock_page(const void *virt); 90 91 /** 92 * Get physical address of any mapped virtual address in the current process. 93 * It is found by browsing the /proc/self/pagemap special file. 94 * The page must be locked. 95 * 96 * @param virt 97 * The virtual address. 98 * @return 99 * The physical address or RTE_BAD_IOVA on error. 100 */ 101 phys_addr_t rte_mem_virt2phy(const void *virt); 102 103 /** 104 * Get IO virtual address of any mapped virtual address in the current process. 105 * 106 * @note This function will not check internal page table. Instead, in IOVA as 107 * PA mode, it will fall back to getting real physical address (which may 108 * not match the expected IOVA, such as what was specified for external 109 * memory). 110 * 111 * @param virt 112 * The virtual address. 113 * @return 114 * The IO address or RTE_BAD_IOVA on error. 115 */ 116 rte_iova_t rte_mem_virt2iova(const void *virt); 117 118 /** 119 * Get virtual memory address corresponding to iova address. 120 * 121 * @note This function read-locks the memory hotplug subsystem, and thus cannot 122 * be used within memory-related callback functions. 123 * 124 * @param iova 125 * The iova address. 126 * @return 127 * Virtual address corresponding to iova address (or NULL if address does not 128 * exist within DPDK memory map). 129 */ 130 void * 131 rte_mem_iova2virt(rte_iova_t iova); 132 133 /** 134 * Get memseg to which a particular virtual address belongs. 135 * 136 * @param virt 137 * The virtual address. 138 * @param msl 139 * The memseg list in which to look up based on ``virt`` address 140 * (can be NULL). 141 * @return 142 * Memseg pointer on success, or NULL on error. 143 */ 144 struct rte_memseg * 145 rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl); 146 147 /** 148 * Get memseg list corresponding to virtual memory address. 149 * 150 * @param virt 151 * The virtual address. 152 * @return 153 * Memseg list to which this virtual address belongs to. 154 */ 155 struct rte_memseg_list * 156 rte_mem_virt2memseg_list(const void *virt); 157 158 /** 159 * Memseg walk function prototype. 160 * 161 * Returning 0 will continue walk 162 * Returning 1 will stop the walk 163 * Returning -1 will stop the walk and report error 164 */ 165 typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl, 166 const struct rte_memseg *ms, void *arg); 167 168 /** 169 * Memseg contig walk function prototype. This will trigger a callback on every 170 * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each 171 * callback call will be [``ms->addr``, ``ms->addr + len``). 172 * 173 * Returning 0 will continue walk 174 * Returning 1 will stop the walk 175 * Returning -1 will stop the walk and report error 176 */ 177 typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl, 178 const struct rte_memseg *ms, size_t len, void *arg); 179 180 /** 181 * Memseg list walk function prototype. This will trigger a callback on every 182 * allocated memseg list. 183 * 184 * Returning 0 will continue walk 185 * Returning 1 will stop the walk 186 * Returning -1 will stop the walk and report error 187 */ 188 typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl, 189 void *arg); 190 191 /** 192 * Walk list of all memsegs. 193 * 194 * @note This function read-locks the memory hotplug subsystem, and thus cannot 195 * be used within memory-related callback functions. 196 * 197 * @note This function will also walk through externally allocated segments. It 198 * is up to the user to decide whether to skip through these segments. 199 * 200 * @param func 201 * Iterator function 202 * @param arg 203 * Argument passed to iterator 204 * @return 205 * 0 if walked over the entire list 206 * 1 if stopped by the user 207 * -1 if user function reported error 208 */ 209 int 210 rte_memseg_walk(rte_memseg_walk_t func, void *arg); 211 212 /** 213 * Walk each VA-contiguous area. 214 * 215 * @note This function read-locks the memory hotplug subsystem, and thus cannot 216 * be used within memory-related callback functions. 217 * 218 * @note This function will also walk through externally allocated segments. It 219 * is up to the user to decide whether to skip through these segments. 220 * 221 * @param func 222 * Iterator function 223 * @param arg 224 * Argument passed to iterator 225 * @return 226 * 0 if walked over the entire list 227 * 1 if stopped by the user 228 * -1 if user function reported error 229 */ 230 int 231 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg); 232 233 /** 234 * Walk each allocated memseg list. 235 * 236 * @note This function read-locks the memory hotplug subsystem, and thus cannot 237 * be used within memory-related callback functions. 238 * 239 * @note This function will also walk through externally allocated segments. It 240 * is up to the user to decide whether to skip through these segments. 241 * 242 * @param func 243 * Iterator function 244 * @param arg 245 * Argument passed to iterator 246 * @return 247 * 0 if walked over the entire list 248 * 1 if stopped by the user 249 * -1 if user function reported error 250 */ 251 int 252 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); 253 254 /** 255 * Walk list of all memsegs without performing any locking. 256 * 257 * @note This function does not perform any locking, and is only safe to call 258 * from within memory-related callback functions. 259 * 260 * @param func 261 * Iterator function 262 * @param arg 263 * Argument passed to iterator 264 * @return 265 * 0 if walked over the entire list 266 * 1 if stopped by the user 267 * -1 if user function reported error 268 */ 269 int 270 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); 271 272 /** 273 * Walk each VA-contiguous area without performing any locking. 274 * 275 * @note This function does not perform any locking, and is only safe to call 276 * from within memory-related callback functions. 277 * 278 * @param func 279 * Iterator function 280 * @param arg 281 * Argument passed to iterator 282 * @return 283 * 0 if walked over the entire list 284 * 1 if stopped by the user 285 * -1 if user function reported error 286 */ 287 int 288 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg); 289 290 /** 291 * Walk each allocated memseg list without performing any locking. 292 * 293 * @note This function does not perform any locking, and is only safe to call 294 * from within memory-related callback functions. 295 * 296 * @param func 297 * Iterator function 298 * @param arg 299 * Argument passed to iterator 300 * @return 301 * 0 if walked over the entire list 302 * 1 if stopped by the user 303 * -1 if user function reported error 304 */ 305 int 306 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg); 307 308 /** 309 * Return file descriptor associated with a particular memseg (if available). 310 * 311 * @note This function read-locks the memory hotplug subsystem, and thus cannot 312 * be used within memory-related callback functions. 313 * 314 * @note This returns an internal file descriptor. Performing any operations on 315 * this file descriptor is inherently dangerous, so it should be treated 316 * as read-only for all intents and purposes. 317 * 318 * @param ms 319 * A pointer to memseg for which to get file descriptor. 320 * 321 * @return 322 * Valid file descriptor in case of success. 323 * -1 in case of error, with ``rte_errno`` set to the following values: 324 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 325 * - ENODEV - ``ms`` fd is not available 326 * - ENOENT - ``ms`` is an unused segment 327 * - ENOTSUP - segment fd's are not supported 328 */ 329 int 330 rte_memseg_get_fd(const struct rte_memseg *ms); 331 332 /** 333 * Return file descriptor associated with a particular memseg (if available). 334 * 335 * @note This function does not perform any locking, and is only safe to call 336 * from within memory-related callback functions. 337 * 338 * @note This returns an internal file descriptor. Performing any operations on 339 * this file descriptor is inherently dangerous, so it should be treated 340 * as read-only for all intents and purposes. 341 * 342 * @param ms 343 * A pointer to memseg for which to get file descriptor. 344 * 345 * @return 346 * Valid file descriptor in case of success. 347 * -1 in case of error, with ``rte_errno`` set to the following values: 348 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 349 * - ENODEV - ``ms`` fd is not available 350 * - ENOENT - ``ms`` is an unused segment 351 * - ENOTSUP - segment fd's are not supported 352 */ 353 int 354 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms); 355 356 /** 357 * Get offset into segment file descriptor associated with a particular memseg 358 * (if available). 359 * 360 * @note This function read-locks the memory hotplug subsystem, and thus cannot 361 * be used within memory-related callback functions. 362 * 363 * @param ms 364 * A pointer to memseg for which to get file descriptor. 365 * @param offset 366 * A pointer to offset value where the result will be stored. 367 * 368 * @return 369 * Valid file descriptor in case of success. 370 * -1 in case of error, with ``rte_errno`` set to the following values: 371 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 372 * - EINVAL - ``offset`` pointer was NULL 373 * - ENODEV - ``ms`` fd is not available 374 * - ENOENT - ``ms`` is an unused segment 375 * - ENOTSUP - segment fd's are not supported 376 */ 377 int 378 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset); 379 380 /** 381 * Get offset into segment file descriptor associated with a particular memseg 382 * (if available). 383 * 384 * @note This function does not perform any locking, and is only safe to call 385 * from within memory-related callback functions. 386 * 387 * @param ms 388 * A pointer to memseg for which to get file descriptor. 389 * @param offset 390 * A pointer to offset value where the result will be stored. 391 * 392 * @return 393 * Valid file descriptor in case of success. 394 * -1 in case of error, with ``rte_errno`` set to the following values: 395 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg 396 * - EINVAL - ``offset`` pointer was NULL 397 * - ENODEV - ``ms`` fd is not available 398 * - ENOENT - ``ms`` is an unused segment 399 * - ENOTSUP - segment fd's are not supported 400 */ 401 int 402 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, 403 size_t *offset); 404 405 /** 406 * Register external memory chunk with DPDK. 407 * 408 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 409 * API's. 410 * 411 * @note This API will not perform any DMA mapping. It is expected that user 412 * will do that themselves. 413 * 414 * @note Before accessing this memory in other processes, it needs to be 415 * attached in each of those processes by calling ``rte_extmem_attach`` in 416 * each other process. 417 * 418 * @param va_addr 419 * Start of virtual area to register. Must be aligned by ``page_sz``. 420 * @param len 421 * Length of virtual area to register. Must be aligned by ``page_sz``. 422 * @param iova_addrs 423 * Array of page IOVA addresses corresponding to each page in this memory 424 * area. Can be NULL, in which case page IOVA addresses will be set to 425 * RTE_BAD_IOVA. 426 * @param n_pages 427 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs`` 428 * is NULL. 429 * @param page_sz 430 * Page size of the underlying memory 431 * 432 * @return 433 * - 0 on success 434 * - -1 in case of error, with rte_errno set to one of the following: 435 * EINVAL - one of the parameters was invalid 436 * EEXIST - memory chunk is already registered 437 * ENOSPC - no more space in internal config to store a new memory chunk 438 */ 439 int 440 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[], 441 unsigned int n_pages, size_t page_sz); 442 443 /** 444 * Unregister external memory chunk with DPDK. 445 * 446 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 447 * API's. 448 * 449 * @note This API will not perform any DMA unmapping. It is expected that user 450 * will do that themselves. 451 * 452 * @note Before calling this function, all other processes must call 453 * ``rte_extmem_detach`` to detach from the memory area. 454 * 455 * @param va_addr 456 * Start of virtual area to unregister 457 * @param len 458 * Length of virtual area to unregister 459 * 460 * @return 461 * - 0 on success 462 * - -1 in case of error, with rte_errno set to one of the following: 463 * EINVAL - one of the parameters was invalid 464 * ENOENT - memory chunk was not found 465 */ 466 int 467 rte_extmem_unregister(void *va_addr, size_t len); 468 469 /** 470 * Attach to external memory chunk registered in another process. 471 * 472 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 473 * API's. 474 * 475 * @note This API will not perform any DMA mapping. It is expected that user 476 * will do that themselves. 477 * 478 * @param va_addr 479 * Start of virtual area to register 480 * @param len 481 * Length of virtual area to register 482 * 483 * @return 484 * - 0 on success 485 * - -1 in case of error, with rte_errno set to one of the following: 486 * EINVAL - one of the parameters was invalid 487 * ENOENT - memory chunk was not found 488 */ 489 int 490 rte_extmem_attach(void *va_addr, size_t len); 491 492 /** 493 * Detach from external memory chunk registered in another process. 494 * 495 * @note Using this API is mutually exclusive with ``rte_malloc`` family of 496 * API's. 497 * 498 * @note This API will not perform any DMA unmapping. It is expected that user 499 * will do that themselves. 500 * 501 * @param va_addr 502 * Start of virtual area to unregister 503 * @param len 504 * Length of virtual area to unregister 505 * 506 * @return 507 * - 0 on success 508 * - -1 in case of error, with rte_errno set to one of the following: 509 * EINVAL - one of the parameters was invalid 510 * ENOENT - memory chunk was not found 511 */ 512 int 513 rte_extmem_detach(void *va_addr, size_t len); 514 515 /** 516 * Dump the physical memory layout to a file. 517 * 518 * @note This function read-locks the memory hotplug subsystem, and thus cannot 519 * be used within memory-related callback functions. 520 * 521 * @param f 522 * A pointer to a file for output 523 */ 524 void rte_dump_physmem_layout(FILE *f); 525 526 /** 527 * Get the total amount of available physical memory. 528 * 529 * @note This function read-locks the memory hotplug subsystem, and thus cannot 530 * be used within memory-related callback functions. 531 * 532 * @return 533 * The total amount of available physical memory in bytes. 534 */ 535 uint64_t rte_eal_get_physmem_size(void); 536 537 /** 538 * Get the number of memory channels. 539 * 540 * @return 541 * The number of memory channels on the system. The value is 0 if unknown 542 * or not the same on all devices. 543 */ 544 unsigned rte_memory_get_nchannel(void); 545 546 /** 547 * Get the number of memory ranks. 548 * 549 * @return 550 * The number of memory ranks on the system. The value is 0 if unknown or 551 * not the same on all devices. 552 */ 553 unsigned rte_memory_get_nrank(void); 554 555 /** 556 * Check if all currently allocated memory segments are compliant with 557 * supplied DMA address width. 558 * 559 * @param maskbits 560 * Address width to check against. 561 */ 562 int rte_mem_check_dma_mask(uint8_t maskbits); 563 564 /** 565 * Check if all currently allocated memory segments are compliant with 566 * supplied DMA address width. This function will use 567 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying 568 * memory_hotplug_lock will not be acquired avoiding deadlock during 569 * memory initialization. 570 * 571 * This function is just for EAL core memory internal use. Drivers should 572 * use the previous rte_mem_check_dma_mask. 573 * 574 * @param maskbits 575 * Address width to check against. 576 */ 577 int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits); 578 579 /** 580 * Set dma mask to use once memory initialization is done. Previous functions 581 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be 582 * used safely until memory has been initialized. 583 */ 584 void rte_mem_set_dma_mask(uint8_t maskbits); 585 586 /** 587 * Drivers based on uio will not load unless physical 588 * addresses are obtainable. It is only possible to get 589 * physical addresses when running as a privileged user. 590 * 591 * @return 592 * 1 if the system is able to obtain physical addresses. 593 * 0 if using DMA addresses through an IOMMU. 594 */ 595 int rte_eal_using_phys_addrs(void); 596 597 598 /** 599 * Enum indicating which kind of memory event has happened. Used by callbacks to 600 * distinguish between memory allocations and deallocations. 601 */ 602 enum rte_mem_event { 603 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */ 604 RTE_MEM_EVENT_FREE, /**< Deallocation event. */ 605 }; 606 #define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64 607 /**< maximum length of callback name */ 608 609 /** 610 * Function typedef used to register callbacks for memory events. 611 */ 612 typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type, 613 const void *addr, size_t len, void *arg); 614 615 /** 616 * Function used to register callbacks for memory events. 617 * 618 * @note callbacks will happen while memory hotplug subsystem is write-locked, 619 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a 620 * deadlock when called from within such callbacks. 621 * 622 * @note mem event callbacks not being supported is an expected error condition, 623 * so user code needs to handle this situation. In these cases, return 624 * value will be -1, and rte_errno will be set to ENOTSUP. 625 * 626 * @param name 627 * Name associated with specified callback to be added to the list. 628 * 629 * @param clb 630 * Callback function pointer. 631 * 632 * @param arg 633 * Argument to pass to the callback. 634 * 635 * @return 636 * 0 on successful callback register 637 * -1 on unsuccessful callback register, with rte_errno value indicating 638 * reason for failure. 639 */ 640 int 641 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb, 642 void *arg); 643 644 /** 645 * Function used to unregister callbacks for memory events. 646 * 647 * @param name 648 * Name associated with specified callback to be removed from the list. 649 * 650 * @param arg 651 * Argument to look for among callbacks with specified callback name. 652 * 653 * @return 654 * 0 on successful callback unregister 655 * -1 on unsuccessful callback unregister, with rte_errno value indicating 656 * reason for failure. 657 */ 658 int 659 rte_mem_event_callback_unregister(const char *name, void *arg); 660 661 662 #define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64 663 /**< maximum length of alloc validator name */ 664 /** 665 * Function typedef used to register memory allocation validation callbacks. 666 * 667 * Returning 0 will allow allocation attempt to continue. Returning -1 will 668 * prevent allocation from succeeding. 669 */ 670 typedef int (*rte_mem_alloc_validator_t)(int socket_id, 671 size_t cur_limit, size_t new_len); 672 673 /** 674 * @brief Register validator callback for memory allocations. 675 * 676 * Callbacks registered by this function will be called right before memory 677 * allocator is about to trigger allocation of more pages from the system if 678 * said allocation will bring total memory usage above specified limit on 679 * specified socket. User will be able to cancel pending allocation if callback 680 * returns -1. 681 * 682 * @note callbacks will happen while memory hotplug subsystem is write-locked, 683 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a 684 * deadlock when called from within such callbacks. 685 * 686 * @note validator callbacks not being supported is an expected error condition, 687 * so user code needs to handle this situation. In these cases, return 688 * value will be -1, and rte_errno will be set to ENOTSUP. 689 * 690 * @param name 691 * Name associated with specified callback to be added to the list. 692 * 693 * @param clb 694 * Callback function pointer. 695 * 696 * @param socket_id 697 * Socket ID on which to watch for allocations. 698 * 699 * @param limit 700 * Limit above which to trigger callbacks. 701 * 702 * @return 703 * 0 on successful callback register 704 * -1 on unsuccessful callback register, with rte_errno value indicating 705 * reason for failure. 706 */ 707 int 708 rte_mem_alloc_validator_register(const char *name, 709 rte_mem_alloc_validator_t clb, int socket_id, size_t limit); 710 711 /** 712 * @brief Unregister validator callback for memory allocations. 713 * 714 * @param name 715 * Name associated with specified callback to be removed from the list. 716 * 717 * @param socket_id 718 * Socket ID on which to watch for allocations. 719 * 720 * @return 721 * 0 on successful callback unregister 722 * -1 on unsuccessful callback unregister, with rte_errno value indicating 723 * reason for failure. 724 */ 725 int 726 rte_mem_alloc_validator_unregister(const char *name, int socket_id); 727 728 #ifdef __cplusplus 729 } 730 #endif 731 732 #endif /* _RTE_MEMORY_H_ */ 733