1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation. 3 * Copyright 2014 6WIND S.A. 4 */ 5 6 #ifndef _RTE_MBUF_H_ 7 #define _RTE_MBUF_H_ 8 9 /** 10 * @file 11 * RTE Mbuf 12 * 13 * The mbuf library provides the ability to create and destroy buffers 14 * that may be used by the RTE application to store message 15 * buffers. The message buffers are stored in a mempool, using the 16 * RTE mempool library. 17 * 18 * The preferred way to create a mbuf pool is to use 19 * rte_pktmbuf_pool_create(). However, in some situations, an 20 * application may want to have more control (ex: populate the pool with 21 * specific memory), in this case it is possible to use functions from 22 * rte_mempool. See how rte_pktmbuf_pool_create() is implemented for 23 * details. 24 * 25 * This library provides an API to allocate/free packet mbufs, which are 26 * used to carry network packets. 27 * 28 * To understand the concepts of packet buffers or mbufs, you 29 * should read "TCP/IP Illustrated, Volume 2: The Implementation, 30 * Addison-Wesley, 1995, ISBN 0-201-63354-X from Richard Stevens" 31 * http://www.kohala.com/start/tcpipiv2.html 32 */ 33 34 #include <stdint.h> 35 36 #include <rte_common.h> 37 #include <rte_config.h> 38 #include <rte_mempool.h> 39 #include <rte_prefetch.h> 40 #include <rte_branch_prediction.h> 41 #include <rte_mbuf_ptype.h> 42 #include <rte_mbuf_core.h> 43 44 #ifdef __cplusplus 45 extern "C" { 46 #endif 47 48 /** 49 * Get the name of a RX offload flag 50 * 51 * @param mask 52 * The mask describing the flag. 53 * @return 54 * The name of this flag, or NULL if it's not a valid RX flag. 55 */ 56 const char *rte_get_rx_ol_flag_name(uint64_t mask); 57 58 /** 59 * Dump the list of RX offload flags in a buffer 60 * 61 * @param mask 62 * The mask describing the RX flags. 63 * @param buf 64 * The output buffer. 65 * @param buflen 66 * The length of the buffer. 67 * @return 68 * 0 on success, (-1) on error. 69 */ 70 int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); 71 72 /** 73 * Get the name of a TX offload flag 74 * 75 * @param mask 76 * The mask describing the flag. Usually only one bit must be set. 77 * Several bits can be given if they belong to the same mask. 78 * Ex: RTE_MBUF_F_TX_L4_MASK. 79 * @return 80 * The name of this flag, or NULL if it's not a valid TX flag. 81 */ 82 const char *rte_get_tx_ol_flag_name(uint64_t mask); 83 84 /** 85 * Dump the list of TX offload flags in a buffer 86 * 87 * @param mask 88 * The mask describing the TX flags. 89 * @param buf 90 * The output buffer. 91 * @param buflen 92 * The length of the buffer. 93 * @return 94 * 0 on success, (-1) on error. 95 */ 96 int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); 97 98 /** 99 * Prefetch the first part of the mbuf 100 * 101 * The first 64 bytes of the mbuf corresponds to fields that are used early 102 * in the receive path. If the cache line of the architecture is higher than 103 * 64B, the second part will also be prefetched. 104 * 105 * @param m 106 * The pointer to the mbuf. 107 */ 108 static inline void 109 rte_mbuf_prefetch_part1(struct rte_mbuf *m) 110 { 111 rte_prefetch0(m); 112 } 113 114 /** 115 * Prefetch the second part of the mbuf 116 * 117 * The next 64 bytes of the mbuf corresponds to fields that are used in the 118 * transmit path. If the cache line of the architecture is higher than 64B, 119 * this function does nothing as it is expected that the full mbuf is 120 * already in cache. 121 * 122 * @param m 123 * The pointer to the mbuf. 124 */ 125 static inline void 126 rte_mbuf_prefetch_part2(struct rte_mbuf *m) 127 { 128 #if RTE_CACHE_LINE_SIZE == 64 129 rte_prefetch0(RTE_PTR_ADD(m, RTE_CACHE_LINE_MIN_SIZE)); 130 #else 131 RTE_SET_USED(m); 132 #endif 133 } 134 135 136 static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp); 137 138 /** 139 * Get the IOVA address of the mbuf data buffer. 140 * 141 * @param m 142 * The pointer to the mbuf. 143 * @return 144 * The IOVA address of the mbuf. 145 */ 146 static inline rte_iova_t 147 rte_mbuf_iova_get(const struct rte_mbuf *m) 148 { 149 #if RTE_IOVA_IN_MBUF 150 return m->buf_iova; 151 #else 152 return (rte_iova_t)m->buf_addr; 153 #endif 154 } 155 156 /** 157 * Set the IOVA address of the mbuf data buffer. 158 * 159 * @param m 160 * The pointer to the mbuf. 161 * @param iova 162 * Value to set as IOVA address of the mbuf. 163 */ 164 static inline void 165 rte_mbuf_iova_set(struct rte_mbuf *m, rte_iova_t iova) 166 { 167 #if RTE_IOVA_IN_MBUF 168 m->buf_iova = iova; 169 #else 170 RTE_SET_USED(m); 171 RTE_SET_USED(iova); 172 #endif 173 } 174 175 /** 176 * Return the IO address of the beginning of the mbuf data 177 * 178 * @param mb 179 * The pointer to the mbuf. 180 * @return 181 * The IO address of the beginning of the mbuf data 182 */ 183 static inline rte_iova_t 184 rte_mbuf_data_iova(const struct rte_mbuf *mb) 185 { 186 return rte_mbuf_iova_get(mb) + mb->data_off; 187 } 188 189 /** 190 * Return the default IO address of the beginning of the mbuf data 191 * 192 * This function is used by drivers in their receive function, as it 193 * returns the location where data should be written by the NIC, taking 194 * the default headroom in account. 195 * 196 * @param mb 197 * The pointer to the mbuf. 198 * @return 199 * The IO address of the beginning of the mbuf data 200 */ 201 static inline rte_iova_t 202 rte_mbuf_data_iova_default(const struct rte_mbuf *mb) 203 { 204 return rte_mbuf_iova_get(mb) + RTE_PKTMBUF_HEADROOM; 205 } 206 207 /** 208 * Return the mbuf owning the data buffer address of an indirect mbuf. 209 * 210 * @param mi 211 * The pointer to the indirect mbuf. 212 * @return 213 * The address of the direct mbuf corresponding to buffer_addr. 214 */ 215 static inline struct rte_mbuf * 216 rte_mbuf_from_indirect(struct rte_mbuf *mi) 217 { 218 return (struct rte_mbuf *)RTE_PTR_SUB(mi->buf_addr, sizeof(*mi) + mi->priv_size); 219 } 220 221 /** 222 * Return address of buffer embedded in the given mbuf. 223 * 224 * The return value shall be same as mb->buf_addr if the mbuf is already 225 * initialized and direct. However, this API is useful if mempool of the 226 * mbuf is already known because it doesn't need to access mbuf contents in 227 * order to get the mempool pointer. 228 * 229 * @param mb 230 * The pointer to the mbuf. 231 * @param mp 232 * The pointer to the mempool of the mbuf. 233 * @return 234 * The pointer of the mbuf buffer. 235 */ 236 static inline char * 237 rte_mbuf_buf_addr(struct rte_mbuf *mb, struct rte_mempool *mp) 238 { 239 return (char *)mb + sizeof(*mb) + rte_pktmbuf_priv_size(mp); 240 } 241 242 /** 243 * Return the default address of the beginning of the mbuf data. 244 * 245 * @param mb 246 * The pointer to the mbuf. 247 * @return 248 * The pointer of the beginning of the mbuf data. 249 */ 250 static inline char * 251 rte_mbuf_data_addr_default(struct rte_mbuf *mb) 252 { 253 return rte_mbuf_buf_addr(mb, mb->pool) + RTE_PKTMBUF_HEADROOM; 254 } 255 256 /** 257 * Return address of buffer embedded in the given mbuf. 258 * 259 * @note: Accessing mempool pointer of a mbuf is expensive because the 260 * pointer is stored in the 2nd cache line of mbuf. If mempool is known, it 261 * is better not to reference the mempool pointer in mbuf but calling 262 * rte_mbuf_buf_addr() would be more efficient. 263 * 264 * @param md 265 * The pointer to the mbuf. 266 * @return 267 * The address of the data buffer owned by the mbuf. 268 */ 269 static inline char * 270 rte_mbuf_to_baddr(struct rte_mbuf *md) 271 { 272 return rte_mbuf_buf_addr(md, md->pool); 273 } 274 275 /** 276 * Return the starting address of the private data area embedded in 277 * the given mbuf. 278 * 279 * Note that no check is made to ensure that a private data area 280 * actually exists in the supplied mbuf. 281 * 282 * @param m 283 * The pointer to the mbuf. 284 * @return 285 * The starting address of the private data area of the given mbuf. 286 */ 287 static inline void * 288 rte_mbuf_to_priv(struct rte_mbuf *m) 289 { 290 return RTE_PTR_ADD(m, sizeof(struct rte_mbuf)); 291 } 292 293 /** 294 * Private data in case of pktmbuf pool. 295 * 296 * A structure that contains some pktmbuf_pool-specific data that are 297 * appended after the mempool structure (in private data). 298 */ 299 struct rte_pktmbuf_pool_private { 300 uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */ 301 uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */ 302 uint32_t flags; /**< reserved for future use. */ 303 }; 304 305 /** 306 * Return the flags from private data in an mempool structure. 307 * 308 * @param mp 309 * A pointer to the mempool structure. 310 * @return 311 * The flags from the private data structure. 312 */ 313 static inline uint32_t 314 rte_pktmbuf_priv_flags(struct rte_mempool *mp) 315 { 316 struct rte_pktmbuf_pool_private *mbp_priv; 317 318 mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp); 319 return mbp_priv->flags; 320 } 321 322 /** 323 * When set, pktmbuf mempool will hold only mbufs with pinned external 324 * buffer. The external buffer will be attached to the mbuf at the 325 * memory pool creation and will never be detached by the mbuf free calls. 326 * mbuf should not contain any room for data after the mbuf structure. 327 */ 328 #define RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF (1 << 0) 329 330 /** 331 * Returns non zero if given mbuf has a pinned external buffer, or zero 332 * otherwise. The pinned external buffer is allocated at pool creation 333 * time and should not be freed on mbuf freeing. 334 * 335 * External buffer is a user-provided anonymous buffer. 336 */ 337 #define RTE_MBUF_HAS_PINNED_EXTBUF(mb) \ 338 (rte_pktmbuf_priv_flags(mb->pool) & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) 339 340 #ifdef RTE_LIBRTE_MBUF_DEBUG 341 342 /** check mbuf type in debug mode */ 343 #define __rte_mbuf_sanity_check(m, is_h) rte_mbuf_sanity_check(m, is_h) 344 345 #else /* RTE_LIBRTE_MBUF_DEBUG */ 346 347 /** check mbuf type in debug mode */ 348 #define __rte_mbuf_sanity_check(m, is_h) do { } while (0) 349 350 #endif /* RTE_LIBRTE_MBUF_DEBUG */ 351 352 #ifdef RTE_MBUF_REFCNT_ATOMIC 353 354 /** 355 * Reads the value of an mbuf's refcnt. 356 * @param m 357 * Mbuf to read 358 * @return 359 * Reference count number. 360 */ 361 static inline uint16_t 362 rte_mbuf_refcnt_read(const struct rte_mbuf *m) 363 { 364 return rte_atomic_load_explicit(&m->refcnt, rte_memory_order_relaxed); 365 } 366 367 /** 368 * Sets an mbuf's refcnt to a defined value. 369 * @param m 370 * Mbuf to update 371 * @param new_value 372 * Value set 373 */ 374 static inline void 375 rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) 376 { 377 rte_atomic_store_explicit(&m->refcnt, new_value, rte_memory_order_relaxed); 378 } 379 380 /* internal */ 381 static inline uint16_t 382 __rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) 383 { 384 return rte_atomic_fetch_add_explicit(&m->refcnt, value, 385 rte_memory_order_acq_rel) + value; 386 } 387 388 /** 389 * Adds given value to an mbuf's refcnt and returns its new value. 390 * @param m 391 * Mbuf to update 392 * @param value 393 * Value to add/subtract 394 * @return 395 * Updated value 396 */ 397 static inline uint16_t 398 rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) 399 { 400 /* 401 * The atomic_add is an expensive operation, so we don't want to 402 * call it in the case where we know we are the unique holder of 403 * this mbuf (i.e. ref_cnt == 1). Otherwise, an atomic 404 * operation has to be used because concurrent accesses on the 405 * reference counter can occur. 406 */ 407 if (likely(rte_mbuf_refcnt_read(m) == 1)) { 408 ++value; 409 rte_mbuf_refcnt_set(m, (uint16_t)value); 410 return (uint16_t)value; 411 } 412 413 return __rte_mbuf_refcnt_update(m, value); 414 } 415 416 #else /* ! RTE_MBUF_REFCNT_ATOMIC */ 417 418 /* internal */ 419 static inline uint16_t 420 __rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) 421 { 422 m->refcnt = (uint16_t)(m->refcnt + value); 423 return m->refcnt; 424 } 425 426 /** 427 * Adds given value to an mbuf's refcnt and returns its new value. 428 */ 429 static inline uint16_t 430 rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) 431 { 432 return __rte_mbuf_refcnt_update(m, value); 433 } 434 435 /** 436 * Reads the value of an mbuf's refcnt. 437 */ 438 static inline uint16_t 439 rte_mbuf_refcnt_read(const struct rte_mbuf *m) 440 { 441 return m->refcnt; 442 } 443 444 /** 445 * Sets an mbuf's refcnt to the defined value. 446 */ 447 static inline void 448 rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) 449 { 450 m->refcnt = new_value; 451 } 452 453 #endif /* RTE_MBUF_REFCNT_ATOMIC */ 454 455 /** 456 * Reads the refcnt of an external buffer. 457 * 458 * @param shinfo 459 * Shared data of the external buffer. 460 * @return 461 * Reference count number. 462 */ 463 static inline uint16_t 464 rte_mbuf_ext_refcnt_read(const struct rte_mbuf_ext_shared_info *shinfo) 465 { 466 return rte_atomic_load_explicit(&shinfo->refcnt, rte_memory_order_relaxed); 467 } 468 469 /** 470 * Set refcnt of an external buffer. 471 * 472 * @param shinfo 473 * Shared data of the external buffer. 474 * @param new_value 475 * Value set 476 */ 477 static inline void 478 rte_mbuf_ext_refcnt_set(struct rte_mbuf_ext_shared_info *shinfo, 479 uint16_t new_value) 480 { 481 rte_atomic_store_explicit(&shinfo->refcnt, new_value, rte_memory_order_relaxed); 482 } 483 484 /** 485 * Add given value to refcnt of an external buffer and return its new 486 * value. 487 * 488 * @param shinfo 489 * Shared data of the external buffer. 490 * @param value 491 * Value to add/subtract 492 * @return 493 * Updated value 494 */ 495 static inline uint16_t 496 rte_mbuf_ext_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo, 497 int16_t value) 498 { 499 if (likely(rte_mbuf_ext_refcnt_read(shinfo) == 1)) { 500 ++value; 501 rte_mbuf_ext_refcnt_set(shinfo, (uint16_t)value); 502 return (uint16_t)value; 503 } 504 505 return rte_atomic_fetch_add_explicit(&shinfo->refcnt, value, 506 rte_memory_order_acq_rel) + value; 507 } 508 509 /** Mbuf prefetch */ 510 #define RTE_MBUF_PREFETCH_TO_FREE(m) do { \ 511 if ((m) != NULL) \ 512 rte_prefetch0(m); \ 513 } while (0) 514 515 516 /** 517 * Sanity checks on an mbuf. 518 * 519 * Check the consistency of the given mbuf. The function will cause a 520 * panic if corruption is detected. 521 * 522 * @param m 523 * The mbuf to be checked. 524 * @param is_header 525 * True if the mbuf is a packet header, false if it is a sub-segment 526 * of a packet (in this case, some fields like nb_segs are not checked) 527 */ 528 void 529 rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header); 530 531 /** 532 * Sanity checks on a mbuf. 533 * 534 * Almost like rte_mbuf_sanity_check(), but this function gives the reason 535 * if corruption is detected rather than panic. 536 * 537 * @param m 538 * The mbuf to be checked. 539 * @param is_header 540 * True if the mbuf is a packet header, false if it is a sub-segment 541 * of a packet (in this case, some fields like nb_segs are not checked) 542 * @param reason 543 * A reference to a string pointer where to store the reason why a mbuf is 544 * considered invalid. 545 * @return 546 * - 0 if no issue has been found, reason is left untouched. 547 * - -1 if a problem is detected, reason then points to a string describing 548 * the reason why the mbuf is deemed invalid. 549 */ 550 int rte_mbuf_check(const struct rte_mbuf *m, int is_header, 551 const char **reason); 552 553 /** 554 * Sanity checks on a reinitialized mbuf in debug mode. 555 * 556 * Check the consistency of the given reinitialized mbuf. 557 * The function will cause a panic if corruption is detected. 558 * 559 * Check that the mbuf is properly reinitialized (refcnt=1, next=NULL, 560 * nb_segs=1), as done by rte_pktmbuf_prefree_seg(). 561 * 562 * @param m 563 * The mbuf to be checked. 564 */ 565 static __rte_always_inline void 566 __rte_mbuf_raw_sanity_check(__rte_unused const struct rte_mbuf *m) 567 { 568 RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1); 569 RTE_ASSERT(m->next == NULL); 570 RTE_ASSERT(m->nb_segs == 1); 571 __rte_mbuf_sanity_check(m, 0); 572 } 573 574 /** For backwards compatibility. */ 575 #define MBUF_RAW_ALLOC_CHECK(m) __rte_mbuf_raw_sanity_check(m) 576 577 /** 578 * Allocate an uninitialized mbuf from mempool *mp*. 579 * 580 * This function can be used by PMDs (especially in RX functions) to 581 * allocate an uninitialized mbuf. The driver is responsible of 582 * initializing all the required fields. See rte_pktmbuf_reset(). 583 * For standard needs, prefer rte_pktmbuf_alloc(). 584 * 585 * The caller can expect that the following fields of the mbuf structure 586 * are initialized: buf_addr, buf_iova, buf_len, refcnt=1, nb_segs=1, 587 * next=NULL, pool, priv_size. The other fields must be initialized 588 * by the caller. 589 * 590 * @param mp 591 * The mempool from which mbuf is allocated. 592 * @return 593 * - The pointer to the new mbuf on success. 594 * - NULL if allocation failed. 595 */ 596 static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) 597 { 598 union { 599 void *ptr; 600 struct rte_mbuf *m; 601 } ret; 602 603 if (rte_mempool_get(mp, &ret.ptr) < 0) 604 return NULL; 605 __rte_mbuf_raw_sanity_check(ret.m); 606 return ret.m; 607 } 608 609 /** 610 * Put mbuf back into its original mempool. 611 * 612 * The caller must ensure that the mbuf is direct and properly 613 * reinitialized (refcnt=1, next=NULL, nb_segs=1), as done by 614 * rte_pktmbuf_prefree_seg(). 615 * 616 * This function should be used with care, when optimization is 617 * required. For standard needs, prefer rte_pktmbuf_free() or 618 * rte_pktmbuf_free_seg(). 619 * 620 * @param m 621 * The mbuf to be freed. 622 */ 623 static __rte_always_inline void 624 rte_mbuf_raw_free(struct rte_mbuf *m) 625 { 626 RTE_ASSERT(!RTE_MBUF_CLONED(m) && 627 (!RTE_MBUF_HAS_EXTBUF(m) || RTE_MBUF_HAS_PINNED_EXTBUF(m))); 628 __rte_mbuf_raw_sanity_check(m); 629 rte_mempool_put(m->pool, m); 630 } 631 632 /** 633 * The packet mbuf constructor. 634 * 635 * This function initializes some fields in the mbuf structure that are 636 * not modified by the user once created (origin pool, buffer start 637 * address, and so on). This function is given as a callback function to 638 * rte_mempool_obj_iter() or rte_mempool_create() at pool creation time. 639 * 640 * This function expects that the mempool private area was previously 641 * initialized with rte_pktmbuf_pool_init(). 642 * 643 * @param mp 644 * The mempool from which mbufs originate. 645 * @param opaque_arg 646 * A pointer that can be used by the user to retrieve useful information 647 * for mbuf initialization. This pointer is the opaque argument passed to 648 * rte_mempool_obj_iter() or rte_mempool_create(). 649 * @param m 650 * The mbuf to initialize. 651 * @param i 652 * The index of the mbuf in the pool table. 653 */ 654 void rte_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, 655 void *m, unsigned i); 656 657 /** 658 * A packet mbuf pool constructor. 659 * 660 * This function initializes the mempool private data in the case of a 661 * pktmbuf pool. This private data is needed by the driver. The 662 * function must be called on the mempool before it is used, or it 663 * can be given as a callback function to rte_mempool_create() at 664 * pool creation. It can be extended by the user, for example, to 665 * provide another packet size. 666 * 667 * The mempool private area size must be at least equal to 668 * sizeof(struct rte_pktmbuf_pool_private). 669 * 670 * @param mp 671 * The mempool from which mbufs originate. 672 * @param opaque_arg 673 * A pointer that can be used by the user to retrieve useful information 674 * for mbuf initialization. This pointer is the opaque argument passed to 675 * rte_mempool_create(). 676 */ 677 void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg); 678 679 /** 680 * Create a mbuf pool. 681 * 682 * This function creates and initializes a packet mbuf pool. It is 683 * a wrapper to rte_mempool functions. 684 * 685 * @param name 686 * The name of the mbuf pool. 687 * @param n 688 * The number of elements in the mbuf pool. The optimum size (in terms 689 * of memory usage) for a mempool is when n is a power of two minus one: 690 * n = (2^q - 1). 691 * @param cache_size 692 * Size of the per-core object cache. See rte_mempool_create() for 693 * details. 694 * @param priv_size 695 * Size of application private are between the rte_mbuf structure 696 * and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN. 697 * @param data_room_size 698 * Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM. 699 * @param socket_id 700 * The socket identifier where the memory should be allocated. The 701 * value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the 702 * reserved zone. 703 * @return 704 * The pointer to the new allocated mempool, on success. NULL on error 705 * with rte_errno set appropriately. Possible rte_errno values include: 706 * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure 707 * - EINVAL - cache size provided is too large, or priv_size is not aligned. 708 * - ENOSPC - the maximum number of memzones has already been allocated 709 * - EEXIST - a memzone with the same name already exists 710 * - ENOMEM - no appropriate memory area found in which to create memzone 711 */ 712 struct rte_mempool * 713 rte_pktmbuf_pool_create(const char *name, unsigned n, 714 unsigned cache_size, uint16_t priv_size, uint16_t data_room_size, 715 int socket_id); 716 717 /** 718 * Create a mbuf pool with a given mempool ops name 719 * 720 * This function creates and initializes a packet mbuf pool. It is 721 * a wrapper to rte_mempool functions. 722 * 723 * @param name 724 * The name of the mbuf pool. 725 * @param n 726 * The number of elements in the mbuf pool. The optimum size (in terms 727 * of memory usage) for a mempool is when n is a power of two minus one: 728 * n = (2^q - 1). 729 * @param cache_size 730 * Size of the per-core object cache. See rte_mempool_create() for 731 * details. 732 * @param priv_size 733 * Size of application private are between the rte_mbuf structure 734 * and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN. 735 * @param data_room_size 736 * Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM. 737 * @param socket_id 738 * The socket identifier where the memory should be allocated. The 739 * value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the 740 * reserved zone. 741 * @param ops_name 742 * The mempool ops name to be used for this mempool instead of 743 * default mempool. The value can be *NULL* to use default mempool. 744 * @return 745 * The pointer to the new allocated mempool, on success. NULL on error 746 * with rte_errno set appropriately. Possible rte_errno values include: 747 * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure 748 * - EINVAL - cache size provided is too large, or priv_size is not aligned. 749 * - ENOSPC - the maximum number of memzones has already been allocated 750 * - EEXIST - a memzone with the same name already exists 751 * - ENOMEM - no appropriate memory area found in which to create memzone 752 */ 753 struct rte_mempool * 754 rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n, 755 unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size, 756 int socket_id, const char *ops_name); 757 758 /** A structure that describes the pinned external buffer segment. */ 759 struct rte_pktmbuf_extmem { 760 void *buf_ptr; /**< The virtual address of data buffer. */ 761 rte_iova_t buf_iova; /**< The IO address of the data buffer. */ 762 size_t buf_len; /**< External buffer length in bytes. */ 763 uint16_t elt_size; /**< mbuf element size in bytes. */ 764 }; 765 766 /** 767 * Create a mbuf pool with external pinned data buffers. 768 * 769 * This function creates and initializes a packet mbuf pool that contains 770 * only mbufs with external buffer. It is a wrapper to rte_mempool functions. 771 * 772 * @param name 773 * The name of the mbuf pool. 774 * @param n 775 * The number of elements in the mbuf pool. The optimum size (in terms 776 * of memory usage) for a mempool is when n is a power of two minus one: 777 * n = (2^q - 1). 778 * @param cache_size 779 * Size of the per-core object cache. See rte_mempool_create() for 780 * details. 781 * @param priv_size 782 * Size of application private are between the rte_mbuf structure 783 * and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN. 784 * @param data_room_size 785 * Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM. 786 * @param socket_id 787 * The socket identifier where the memory should be allocated. The 788 * value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the 789 * reserved zone. 790 * @param ext_mem 791 * Pointer to the array of structures describing the external memory 792 * for data buffers. It is caller responsibility to register this memory 793 * with rte_extmem_register() (if needed), map this memory to appropriate 794 * physical device, etc. 795 * @param ext_num 796 * Number of elements in the ext_mem array. 797 * @return 798 * The pointer to the new allocated mempool, on success. NULL on error 799 * with rte_errno set appropriately. Possible rte_errno values include: 800 * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure 801 * - EINVAL - cache size provided is too large, or priv_size is not aligned. 802 * - ENOSPC - the maximum number of memzones has already been allocated 803 * - EEXIST - a memzone with the same name already exists 804 * - ENOMEM - no appropriate memory area found in which to create memzone 805 */ 806 struct rte_mempool * 807 rte_pktmbuf_pool_create_extbuf(const char *name, unsigned int n, 808 unsigned int cache_size, uint16_t priv_size, 809 uint16_t data_room_size, int socket_id, 810 const struct rte_pktmbuf_extmem *ext_mem, 811 unsigned int ext_num); 812 813 /** 814 * Get the data room size of mbufs stored in a pktmbuf_pool 815 * 816 * The data room size is the amount of data that can be stored in a 817 * mbuf including the headroom (RTE_PKTMBUF_HEADROOM). 818 * 819 * @param mp 820 * The packet mbuf pool. 821 * @return 822 * The data room size of mbufs stored in this mempool. 823 */ 824 static inline uint16_t 825 rte_pktmbuf_data_room_size(struct rte_mempool *mp) 826 { 827 struct rte_pktmbuf_pool_private *mbp_priv; 828 829 mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp); 830 return mbp_priv->mbuf_data_room_size; 831 } 832 833 /** 834 * Get the application private size of mbufs stored in a pktmbuf_pool 835 * 836 * The private size of mbuf is a zone located between the rte_mbuf 837 * structure and the data buffer where an application can store data 838 * associated to a packet. 839 * 840 * @param mp 841 * The packet mbuf pool. 842 * @return 843 * The private size of mbufs stored in this mempool. 844 */ 845 static inline uint16_t 846 rte_pktmbuf_priv_size(struct rte_mempool *mp) 847 { 848 struct rte_pktmbuf_pool_private *mbp_priv; 849 850 mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp); 851 return mbp_priv->mbuf_priv_size; 852 } 853 854 /** 855 * Reset the data_off field of a packet mbuf to its default value. 856 * 857 * The given mbuf must have only one segment, which should be empty. 858 * 859 * @param m 860 * The packet mbuf's data_off field has to be reset. 861 */ 862 static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m) 863 { 864 m->data_off = (uint16_t)RTE_MIN((uint16_t)RTE_PKTMBUF_HEADROOM, 865 (uint16_t)m->buf_len); 866 } 867 868 /** 869 * Reset the fields of a packet mbuf to their default values. 870 * 871 * The given mbuf must have only one segment. 872 * 873 * @param m 874 * The packet mbuf to be reset. 875 */ 876 static inline void rte_pktmbuf_reset(struct rte_mbuf *m) 877 { 878 m->next = NULL; 879 m->pkt_len = 0; 880 m->tx_offload = 0; 881 m->vlan_tci = 0; 882 m->vlan_tci_outer = 0; 883 m->nb_segs = 1; 884 m->port = RTE_MBUF_PORT_INVALID; 885 886 m->ol_flags &= RTE_MBUF_F_EXTERNAL; 887 m->packet_type = 0; 888 rte_pktmbuf_reset_headroom(m); 889 890 m->data_len = 0; 891 __rte_mbuf_sanity_check(m, 1); 892 } 893 894 /** 895 * Allocate a new mbuf from a mempool. 896 * 897 * This new mbuf contains one segment, which has a length of 0. The pointer 898 * to data is initialized to have some bytes of headroom in the buffer 899 * (if buffer size allows). 900 * 901 * @param mp 902 * The mempool from which the mbuf is allocated. 903 * @return 904 * - The pointer to the new mbuf on success. 905 * - NULL if allocation failed. 906 */ 907 static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp) 908 { 909 struct rte_mbuf *m; 910 if ((m = rte_mbuf_raw_alloc(mp)) != NULL) 911 rte_pktmbuf_reset(m); 912 return m; 913 } 914 915 /** 916 * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default 917 * values. 918 * 919 * @param pool 920 * The mempool from which mbufs are allocated. 921 * @param mbufs 922 * Array of pointers to mbufs 923 * @param count 924 * Array size 925 * @return 926 * - 0: Success 927 * - -ENOENT: Not enough entries in the mempool; no mbufs are retrieved. 928 */ 929 static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool, 930 struct rte_mbuf **mbufs, unsigned count) 931 { 932 unsigned idx = 0; 933 int rc; 934 935 rc = rte_mempool_get_bulk(pool, (void **)mbufs, count); 936 if (unlikely(rc)) 937 return rc; 938 939 /* To understand duff's device on loop unwinding optimization, see 940 * https://en.wikipedia.org/wiki/Duff's_device. 941 * Here while() loop is used rather than do() while{} to avoid extra 942 * check if count is zero. 943 */ 944 switch (count % 4) { 945 case 0: 946 while (idx != count) { 947 __rte_mbuf_raw_sanity_check(mbufs[idx]); 948 rte_pktmbuf_reset(mbufs[idx]); 949 idx++; 950 /* fall-through */ 951 case 3: 952 __rte_mbuf_raw_sanity_check(mbufs[idx]); 953 rte_pktmbuf_reset(mbufs[idx]); 954 idx++; 955 /* fall-through */ 956 case 2: 957 __rte_mbuf_raw_sanity_check(mbufs[idx]); 958 rte_pktmbuf_reset(mbufs[idx]); 959 idx++; 960 /* fall-through */ 961 case 1: 962 __rte_mbuf_raw_sanity_check(mbufs[idx]); 963 rte_pktmbuf_reset(mbufs[idx]); 964 idx++; 965 /* fall-through */ 966 } 967 } 968 return 0; 969 } 970 971 /** 972 * Initialize shared data at the end of an external buffer before attaching 973 * to a mbuf by ``rte_pktmbuf_attach_extbuf()``. This is not a mandatory 974 * initialization but a helper function to simply spare a few bytes at the 975 * end of the buffer for shared data. If shared data is allocated 976 * separately, this should not be called but application has to properly 977 * initialize the shared data according to its need. 978 * 979 * Free callback and its argument is saved and the refcnt is set to 1. 980 * 981 * @warning 982 * The value of buf_len will be reduced to RTE_PTR_DIFF(shinfo, buf_addr) 983 * after this initialization. This shall be used for 984 * ``rte_pktmbuf_attach_extbuf()`` 985 * 986 * @param buf_addr 987 * The pointer to the external buffer. 988 * @param [in,out] buf_len 989 * The pointer to length of the external buffer. Input value must be 990 * larger than the size of ``struct rte_mbuf_ext_shared_info`` and 991 * padding for alignment. If not enough, this function will return NULL. 992 * Adjusted buffer length will be returned through this pointer. 993 * @param free_cb 994 * Free callback function to call when the external buffer needs to be 995 * freed. 996 * @param fcb_opaque 997 * Argument for the free callback function. 998 * 999 * @return 1000 * A pointer to the initialized shared data on success, return NULL 1001 * otherwise. 1002 */ 1003 static inline struct rte_mbuf_ext_shared_info * 1004 rte_pktmbuf_ext_shinfo_init_helper(void *buf_addr, uint16_t *buf_len, 1005 rte_mbuf_extbuf_free_callback_t free_cb, void *fcb_opaque) 1006 { 1007 struct rte_mbuf_ext_shared_info *shinfo; 1008 void *buf_end = RTE_PTR_ADD(buf_addr, *buf_len); 1009 void *addr; 1010 1011 addr = RTE_PTR_ALIGN_FLOOR(RTE_PTR_SUB(buf_end, sizeof(*shinfo)), 1012 sizeof(uintptr_t)); 1013 if (addr <= buf_addr) 1014 return NULL; 1015 1016 shinfo = (struct rte_mbuf_ext_shared_info *)addr; 1017 shinfo->free_cb = free_cb; 1018 shinfo->fcb_opaque = fcb_opaque; 1019 rte_mbuf_ext_refcnt_set(shinfo, 1); 1020 1021 *buf_len = (uint16_t)RTE_PTR_DIFF(shinfo, buf_addr); 1022 return shinfo; 1023 } 1024 1025 /** 1026 * Attach an external buffer to a mbuf. 1027 * 1028 * User-managed anonymous buffer can be attached to an mbuf. When attaching 1029 * it, corresponding free callback function and its argument should be 1030 * provided via shinfo. This callback function will be called once all the 1031 * mbufs are detached from the buffer (refcnt becomes zero). 1032 * 1033 * The headroom length of the attaching mbuf will be set to zero and this 1034 * can be properly adjusted after attachment. For example, ``rte_pktmbuf_adj()`` 1035 * or ``rte_pktmbuf_reset_headroom()`` might be used. 1036 * 1037 * Similarly, the packet length is initialized to 0. If the buffer contains 1038 * data, the user has to adjust ``data_len`` and the ``pkt_len`` field of 1039 * the mbuf accordingly. 1040 * 1041 * More mbufs can be attached to the same external buffer by 1042 * ``rte_pktmbuf_attach()`` once the external buffer has been attached by 1043 * this API. 1044 * 1045 * Detachment can be done by either ``rte_pktmbuf_detach_extbuf()`` or 1046 * ``rte_pktmbuf_detach()``. 1047 * 1048 * Memory for shared data must be provided and user must initialize all of 1049 * the content properly, especially free callback and refcnt. The pointer 1050 * of shared data will be stored in m->shinfo. 1051 * ``rte_pktmbuf_ext_shinfo_init_helper`` can help to simply spare a few 1052 * bytes at the end of buffer for the shared data, store free callback and 1053 * its argument and set the refcnt to 1. The following is an example: 1054 * 1055 * struct rte_mbuf_ext_shared_info *shinfo = 1056 * rte_pktmbuf_ext_shinfo_init_helper(buf_addr, &buf_len, 1057 * free_cb, fcb_arg); 1058 * rte_pktmbuf_attach_extbuf(m, buf_addr, buf_iova, buf_len, shinfo); 1059 * rte_pktmbuf_reset_headroom(m); 1060 * rte_pktmbuf_adj(m, data_len); 1061 * 1062 * Attaching an external buffer is quite similar to mbuf indirection in 1063 * replacing buffer addresses and length of a mbuf, but a few differences: 1064 * - When an indirect mbuf is attached, refcnt of the direct mbuf would be 1065 * 2 as long as the direct mbuf itself isn't freed after the attachment. 1066 * In such cases, the buffer area of a direct mbuf must be read-only. But 1067 * external buffer has its own refcnt and it starts from 1. Unless 1068 * multiple mbufs are attached to a mbuf having an external buffer, the 1069 * external buffer is writable. 1070 * - There's no need to allocate buffer from a mempool. Any buffer can be 1071 * attached with appropriate free callback and its IO address. 1072 * - Smaller metadata is required to maintain shared data such as refcnt. 1073 * 1074 * @param m 1075 * The pointer to the mbuf. 1076 * @param buf_addr 1077 * The pointer to the external buffer. 1078 * @param buf_iova 1079 * IO address of the external buffer. 1080 * @param buf_len 1081 * The size of the external buffer. 1082 * @param shinfo 1083 * User-provided memory for shared data of the external buffer. 1084 */ 1085 static inline void 1086 rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr, 1087 rte_iova_t buf_iova, uint16_t buf_len, 1088 struct rte_mbuf_ext_shared_info *shinfo) 1089 { 1090 /* mbuf should not be read-only */ 1091 RTE_ASSERT(RTE_MBUF_DIRECT(m) && rte_mbuf_refcnt_read(m) == 1); 1092 RTE_ASSERT(shinfo->free_cb != NULL); 1093 1094 m->buf_addr = buf_addr; 1095 rte_mbuf_iova_set(m, buf_iova); 1096 m->buf_len = buf_len; 1097 1098 m->data_len = 0; 1099 m->data_off = 0; 1100 1101 m->ol_flags |= RTE_MBUF_F_EXTERNAL; 1102 m->shinfo = shinfo; 1103 } 1104 1105 /** 1106 * Detach the external buffer attached to a mbuf, same as 1107 * ``rte_pktmbuf_detach()`` 1108 * 1109 * @param m 1110 * The mbuf having external buffer. 1111 */ 1112 #define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m) 1113 1114 /** 1115 * Copy dynamic fields from msrc to mdst. 1116 * 1117 * @param mdst 1118 * The destination mbuf. 1119 * @param msrc 1120 * The source mbuf. 1121 */ 1122 static inline void 1123 rte_mbuf_dynfield_copy(struct rte_mbuf *mdst, const struct rte_mbuf *msrc) 1124 { 1125 #if !RTE_IOVA_IN_MBUF 1126 mdst->dynfield2 = msrc->dynfield2; 1127 #endif 1128 memcpy(&mdst->dynfield1, msrc->dynfield1, sizeof(mdst->dynfield1)); 1129 } 1130 1131 /* internal */ 1132 static inline void 1133 __rte_pktmbuf_copy_hdr(struct rte_mbuf *mdst, const struct rte_mbuf *msrc) 1134 { 1135 mdst->port = msrc->port; 1136 mdst->vlan_tci = msrc->vlan_tci; 1137 mdst->vlan_tci_outer = msrc->vlan_tci_outer; 1138 mdst->tx_offload = msrc->tx_offload; 1139 mdst->hash = msrc->hash; 1140 mdst->packet_type = msrc->packet_type; 1141 rte_mbuf_dynfield_copy(mdst, msrc); 1142 } 1143 1144 /** 1145 * Attach packet mbuf to another packet mbuf. 1146 * 1147 * If the mbuf we are attaching to isn't a direct buffer and is attached to 1148 * an external buffer, the mbuf being attached will be attached to the 1149 * external buffer instead of mbuf indirection. 1150 * 1151 * Otherwise, the mbuf will be indirectly attached. After attachment we 1152 * refer the mbuf we attached as 'indirect', while mbuf we attached to as 1153 * 'direct'. The direct mbuf's reference counter is incremented. 1154 * 1155 * Right now, not supported: 1156 * - attachment for already indirect mbuf (e.g. - mi has to be direct). 1157 * - mbuf we trying to attach (mi) is used by someone else 1158 * e.g. it's reference counter is greater then 1. 1159 * 1160 * @param mi 1161 * The indirect packet mbuf. 1162 * @param m 1163 * The packet mbuf we're attaching to. 1164 */ 1165 static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) 1166 { 1167 RTE_ASSERT(RTE_MBUF_DIRECT(mi) && 1168 rte_mbuf_refcnt_read(mi) == 1); 1169 1170 if (RTE_MBUF_HAS_EXTBUF(m)) { 1171 rte_mbuf_ext_refcnt_update(m->shinfo, 1); 1172 mi->ol_flags = m->ol_flags; 1173 mi->shinfo = m->shinfo; 1174 } else { 1175 /* if m is not direct, get the mbuf that embeds the data */ 1176 rte_mbuf_refcnt_update(rte_mbuf_from_indirect(m), 1); 1177 mi->priv_size = m->priv_size; 1178 mi->ol_flags = m->ol_flags | RTE_MBUF_F_INDIRECT; 1179 } 1180 1181 __rte_pktmbuf_copy_hdr(mi, m); 1182 1183 mi->data_off = m->data_off; 1184 mi->data_len = m->data_len; 1185 rte_mbuf_iova_set(mi, rte_mbuf_iova_get(m)); 1186 mi->buf_addr = m->buf_addr; 1187 mi->buf_len = m->buf_len; 1188 1189 mi->next = NULL; 1190 mi->pkt_len = mi->data_len; 1191 mi->nb_segs = 1; 1192 1193 __rte_mbuf_sanity_check(mi, 1); 1194 __rte_mbuf_sanity_check(m, 0); 1195 } 1196 1197 /** 1198 * @internal used by rte_pktmbuf_detach(). 1199 * 1200 * Decrement the reference counter of the external buffer. When the 1201 * reference counter becomes 0, the buffer is freed by pre-registered 1202 * callback. 1203 */ 1204 static inline void 1205 __rte_pktmbuf_free_extbuf(struct rte_mbuf *m) 1206 { 1207 RTE_ASSERT(RTE_MBUF_HAS_EXTBUF(m)); 1208 RTE_ASSERT(m->shinfo != NULL); 1209 1210 if (rte_mbuf_ext_refcnt_update(m->shinfo, -1) == 0) 1211 m->shinfo->free_cb(m->buf_addr, m->shinfo->fcb_opaque); 1212 } 1213 1214 /** 1215 * @internal used by rte_pktmbuf_detach(). 1216 * 1217 * Decrement the direct mbuf's reference counter. When the reference 1218 * counter becomes 0, the direct mbuf is freed. 1219 */ 1220 static inline void 1221 __rte_pktmbuf_free_direct(struct rte_mbuf *m) 1222 { 1223 struct rte_mbuf *md; 1224 1225 RTE_ASSERT(RTE_MBUF_CLONED(m)); 1226 1227 md = rte_mbuf_from_indirect(m); 1228 1229 if (rte_mbuf_refcnt_update(md, -1) == 0) { 1230 md->next = NULL; 1231 md->nb_segs = 1; 1232 rte_mbuf_refcnt_set(md, 1); 1233 rte_mbuf_raw_free(md); 1234 } 1235 } 1236 1237 /** 1238 * Detach a packet mbuf from external buffer or direct buffer. 1239 * 1240 * - decrement refcnt and free the external/direct buffer if refcnt 1241 * becomes zero. 1242 * - restore original mbuf address and length values. 1243 * - reset pktmbuf data and data_len to their default values. 1244 * 1245 * All other fields of the given packet mbuf will be left intact. 1246 * 1247 * If the packet mbuf was allocated from the pool with pinned 1248 * external buffers the rte_pktmbuf_detach does nothing with the 1249 * mbuf of this kind, because the pinned buffers are not supposed 1250 * to be detached. 1251 * 1252 * @param m 1253 * The indirect attached packet mbuf. 1254 */ 1255 static inline void rte_pktmbuf_detach(struct rte_mbuf *m) 1256 { 1257 struct rte_mempool *mp = m->pool; 1258 uint32_t mbuf_size, buf_len; 1259 uint16_t priv_size; 1260 1261 if (RTE_MBUF_HAS_EXTBUF(m)) { 1262 /* 1263 * The mbuf has the external attached buffer, 1264 * we should check the type of the memory pool where 1265 * the mbuf was allocated from to detect the pinned 1266 * external buffer. 1267 */ 1268 uint32_t flags = rte_pktmbuf_priv_flags(mp); 1269 1270 if (flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) { 1271 /* 1272 * The pinned external buffer should not be 1273 * detached from its backing mbuf, just exit. 1274 */ 1275 return; 1276 } 1277 __rte_pktmbuf_free_extbuf(m); 1278 } else { 1279 __rte_pktmbuf_free_direct(m); 1280 } 1281 priv_size = rte_pktmbuf_priv_size(mp); 1282 mbuf_size = (uint32_t)(sizeof(struct rte_mbuf) + priv_size); 1283 buf_len = rte_pktmbuf_data_room_size(mp); 1284 1285 m->priv_size = priv_size; 1286 m->buf_addr = (char *)m + mbuf_size; 1287 rte_mbuf_iova_set(m, rte_mempool_virt2iova(m) + mbuf_size); 1288 m->buf_len = (uint16_t)buf_len; 1289 rte_pktmbuf_reset_headroom(m); 1290 m->data_len = 0; 1291 m->ol_flags = 0; 1292 } 1293 1294 /** 1295 * @internal Handle the packet mbufs with attached pinned external buffer 1296 * on the mbuf freeing: 1297 * 1298 * - return zero if reference counter in shinfo is one. It means there is 1299 * no more reference to this pinned buffer and mbuf can be returned to 1300 * the pool 1301 * 1302 * - otherwise (if reference counter is not one), decrement reference 1303 * counter and return non-zero value to prevent freeing the backing mbuf. 1304 * 1305 * Returns non zero if mbuf should not be freed. 1306 */ 1307 static inline int __rte_pktmbuf_pinned_extbuf_decref(struct rte_mbuf *m) 1308 { 1309 struct rte_mbuf_ext_shared_info *shinfo; 1310 1311 /* Clear flags, mbuf is being freed. */ 1312 m->ol_flags = RTE_MBUF_F_EXTERNAL; 1313 shinfo = m->shinfo; 1314 1315 /* Optimize for performance - do not dec/reinit */ 1316 if (likely(rte_mbuf_ext_refcnt_read(shinfo) == 1)) 1317 return 0; 1318 1319 /* 1320 * Direct usage of add primitive to avoid 1321 * duplication of comparing with one. 1322 */ 1323 if (likely(rte_atomic_fetch_add_explicit(&shinfo->refcnt, -1, 1324 rte_memory_order_acq_rel) - 1)) 1325 return 1; 1326 1327 /* Reinitialize counter before mbuf freeing. */ 1328 rte_mbuf_ext_refcnt_set(shinfo, 1); 1329 return 0; 1330 } 1331 1332 /** 1333 * Decrease reference counter and unlink a mbuf segment 1334 * 1335 * This function does the same than a free, except that it does not 1336 * return the segment to its pool. 1337 * It decreases the reference counter, and if it reaches 0, it is 1338 * detached from its parent for an indirect mbuf. 1339 * 1340 * @param m 1341 * The mbuf to be unlinked 1342 * @return 1343 * - (m) if it is the last reference. It can be recycled or freed. 1344 * - (NULL) if the mbuf still has remaining references on it. 1345 */ 1346 static __rte_always_inline struct rte_mbuf * 1347 rte_pktmbuf_prefree_seg(struct rte_mbuf *m) 1348 { 1349 __rte_mbuf_sanity_check(m, 0); 1350 1351 if (likely(rte_mbuf_refcnt_read(m) == 1)) { 1352 1353 if (!RTE_MBUF_DIRECT(m)) { 1354 rte_pktmbuf_detach(m); 1355 if (RTE_MBUF_HAS_EXTBUF(m) && 1356 RTE_MBUF_HAS_PINNED_EXTBUF(m) && 1357 __rte_pktmbuf_pinned_extbuf_decref(m)) 1358 return NULL; 1359 } 1360 1361 if (m->next != NULL) 1362 m->next = NULL; 1363 if (m->nb_segs != 1) 1364 m->nb_segs = 1; 1365 1366 return m; 1367 1368 } else if (__rte_mbuf_refcnt_update(m, -1) == 0) { 1369 1370 if (!RTE_MBUF_DIRECT(m)) { 1371 rte_pktmbuf_detach(m); 1372 if (RTE_MBUF_HAS_EXTBUF(m) && 1373 RTE_MBUF_HAS_PINNED_EXTBUF(m) && 1374 __rte_pktmbuf_pinned_extbuf_decref(m)) 1375 return NULL; 1376 } 1377 1378 if (m->next != NULL) 1379 m->next = NULL; 1380 if (m->nb_segs != 1) 1381 m->nb_segs = 1; 1382 rte_mbuf_refcnt_set(m, 1); 1383 1384 return m; 1385 } 1386 return NULL; 1387 } 1388 1389 /** 1390 * Free a segment of a packet mbuf into its original mempool. 1391 * 1392 * Free an mbuf, without parsing other segments in case of chained 1393 * buffers. 1394 * 1395 * @param m 1396 * The packet mbuf segment to be freed. 1397 */ 1398 static __rte_always_inline void 1399 rte_pktmbuf_free_seg(struct rte_mbuf *m) 1400 { 1401 m = rte_pktmbuf_prefree_seg(m); 1402 if (likely(m != NULL)) 1403 rte_mbuf_raw_free(m); 1404 } 1405 1406 /** 1407 * Free a packet mbuf back into its original mempool. 1408 * 1409 * Free an mbuf, and all its segments in case of chained buffers. Each 1410 * segment is added back into its original mempool. 1411 * 1412 * @param m 1413 * The packet mbuf to be freed. If NULL, the function does nothing. 1414 */ 1415 static inline void rte_pktmbuf_free(struct rte_mbuf *m) 1416 { 1417 struct rte_mbuf *m_next; 1418 1419 if (m != NULL) 1420 __rte_mbuf_sanity_check(m, 1); 1421 1422 while (m != NULL) { 1423 m_next = m->next; 1424 rte_pktmbuf_free_seg(m); 1425 m = m_next; 1426 } 1427 } 1428 1429 /** 1430 * Free a bulk of packet mbufs back into their original mempools. 1431 * 1432 * Free a bulk of mbufs, and all their segments in case of chained buffers. 1433 * Each segment is added back into its original mempool. 1434 * 1435 * @param mbufs 1436 * Array of pointers to packet mbufs. 1437 * The array may contain NULL pointers. 1438 * @param count 1439 * Array size. 1440 */ 1441 void rte_pktmbuf_free_bulk(struct rte_mbuf **mbufs, unsigned int count); 1442 1443 /** 1444 * Create a "clone" of the given packet mbuf. 1445 * 1446 * Walks through all segments of the given packet mbuf, and for each of them: 1447 * - Creates a new packet mbuf from the given pool. 1448 * - Attaches newly created mbuf to the segment. 1449 * Then updates pkt_len and nb_segs of the "clone" packet mbuf to match values 1450 * from the original packet mbuf. 1451 * 1452 * @param md 1453 * The packet mbuf to be cloned. 1454 * @param mp 1455 * The mempool from which the "clone" mbufs are allocated. 1456 * @return 1457 * - The pointer to the new "clone" mbuf on success. 1458 * - NULL if allocation fails. 1459 */ 1460 struct rte_mbuf * 1461 rte_pktmbuf_clone(struct rte_mbuf *md, struct rte_mempool *mp); 1462 1463 /** 1464 * Create a full copy of a given packet mbuf. 1465 * 1466 * Copies all the data from a given packet mbuf to a newly allocated 1467 * set of mbufs. The private data are is not copied. 1468 * 1469 * @param m 1470 * The packet mbuf to be copied. 1471 * @param mp 1472 * The mempool from which the "clone" mbufs are allocated. 1473 * @param offset 1474 * The number of bytes to skip before copying. 1475 * If the mbuf does not have that many bytes, it is an error 1476 * and NULL is returned. 1477 * @param length 1478 * The upper limit on bytes to copy. Passing UINT32_MAX 1479 * means all data (after offset). 1480 * @return 1481 * - The pointer to the new "clone" mbuf on success. 1482 * - NULL if allocation fails. 1483 */ 1484 struct rte_mbuf * 1485 rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp, 1486 uint32_t offset, uint32_t length); 1487 1488 /** 1489 * Adds given value to the refcnt of all packet mbuf segments. 1490 * 1491 * Walks through all segments of given packet mbuf and for each of them 1492 * invokes rte_mbuf_refcnt_update(). 1493 * 1494 * @param m 1495 * The packet mbuf whose refcnt to be updated. 1496 * @param v 1497 * The value to add to the mbuf's segments refcnt. 1498 */ 1499 static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) 1500 { 1501 __rte_mbuf_sanity_check(m, 1); 1502 1503 do { 1504 rte_mbuf_refcnt_update(m, v); 1505 } while ((m = m->next) != NULL); 1506 } 1507 1508 /** 1509 * Get the headroom in a packet mbuf. 1510 * 1511 * @param m 1512 * The packet mbuf. 1513 * @return 1514 * The length of the headroom. 1515 */ 1516 static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m) 1517 { 1518 __rte_mbuf_sanity_check(m, 0); 1519 return m->data_off; 1520 } 1521 1522 /** 1523 * Get the tailroom of a packet mbuf. 1524 * 1525 * @param m 1526 * The packet mbuf. 1527 * @return 1528 * The length of the tailroom. 1529 */ 1530 static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m) 1531 { 1532 __rte_mbuf_sanity_check(m, 0); 1533 return (uint16_t)(m->buf_len - rte_pktmbuf_headroom(m) - 1534 m->data_len); 1535 } 1536 1537 /** 1538 * Get the last segment of the packet. 1539 * 1540 * @param m 1541 * The packet mbuf. 1542 * @return 1543 * The last segment of the given mbuf. 1544 */ 1545 static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m) 1546 { 1547 __rte_mbuf_sanity_check(m, 1); 1548 while (m->next != NULL) 1549 m = m->next; 1550 return m; 1551 } 1552 1553 /** 1554 * A macro that returns the length of the packet. 1555 * 1556 * The value can be read or assigned. 1557 * 1558 * @param m 1559 * The packet mbuf. 1560 */ 1561 #define rte_pktmbuf_pkt_len(m) ((m)->pkt_len) 1562 1563 /** 1564 * A macro that returns the length of the segment. 1565 * 1566 * The value can be read or assigned. 1567 * 1568 * @param m 1569 * The packet mbuf. 1570 */ 1571 #define rte_pktmbuf_data_len(m) ((m)->data_len) 1572 1573 /** 1574 * Prepend len bytes to an mbuf data area. 1575 * 1576 * Returns a pointer to the new 1577 * data start address. If there is not enough headroom in the first 1578 * segment, the function will return NULL, without modifying the mbuf. 1579 * 1580 * @param m 1581 * The pkt mbuf. 1582 * @param len 1583 * The amount of data to prepend (in bytes). 1584 * @return 1585 * A pointer to the start of the newly prepended data, or 1586 * NULL if there is not enough headroom space in the first segment 1587 */ 1588 static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m, 1589 uint16_t len) 1590 { 1591 __rte_mbuf_sanity_check(m, 1); 1592 1593 if (unlikely(len > rte_pktmbuf_headroom(m))) 1594 return NULL; 1595 1596 /* NB: elaborating the subtraction like this instead of using 1597 * -= allows us to ensure the result type is uint16_t 1598 * avoiding compiler warnings on gcc 8.1 at least */ 1599 m->data_off = (uint16_t)(m->data_off - len); 1600 m->data_len = (uint16_t)(m->data_len + len); 1601 m->pkt_len = (m->pkt_len + len); 1602 1603 return (char *)m->buf_addr + m->data_off; 1604 } 1605 1606 /** 1607 * Append len bytes to an mbuf. 1608 * 1609 * Append len bytes to an mbuf and return a pointer to the start address 1610 * of the added data. If there is not enough tailroom in the last 1611 * segment, the function will return NULL, without modifying the mbuf. 1612 * 1613 * @param m 1614 * The packet mbuf. 1615 * @param len 1616 * The amount of data to append (in bytes). 1617 * @return 1618 * A pointer to the start of the newly appended data, or 1619 * NULL if there is not enough tailroom space in the last segment 1620 */ 1621 static inline char *rte_pktmbuf_append(struct rte_mbuf *m, uint16_t len) 1622 { 1623 void *tail; 1624 struct rte_mbuf *m_last; 1625 1626 __rte_mbuf_sanity_check(m, 1); 1627 1628 m_last = rte_pktmbuf_lastseg(m); 1629 if (unlikely(len > rte_pktmbuf_tailroom(m_last))) 1630 return NULL; 1631 1632 tail = (char *)m_last->buf_addr + m_last->data_off + m_last->data_len; 1633 m_last->data_len = (uint16_t)(m_last->data_len + len); 1634 m->pkt_len = (m->pkt_len + len); 1635 return (char*) tail; 1636 } 1637 1638 /** 1639 * Remove len bytes at the beginning of an mbuf. 1640 * 1641 * Returns a pointer to the start address of the new data area. If the 1642 * length is greater than the length of the first segment, then the 1643 * function will fail and return NULL, without modifying the mbuf. 1644 * 1645 * @param m 1646 * The packet mbuf. 1647 * @param len 1648 * The amount of data to remove (in bytes). 1649 * @return 1650 * A pointer to the new start of the data. 1651 */ 1652 static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len) 1653 { 1654 __rte_mbuf_sanity_check(m, 1); 1655 1656 if (unlikely(len > m->data_len)) 1657 return NULL; 1658 1659 /* NB: elaborating the addition like this instead of using 1660 * += allows us to ensure the result type is uint16_t 1661 * avoiding compiler warnings on gcc 8.1 at least */ 1662 m->data_len = (uint16_t)(m->data_len - len); 1663 m->data_off = (uint16_t)(m->data_off + len); 1664 m->pkt_len = (m->pkt_len - len); 1665 return (char *)m->buf_addr + m->data_off; 1666 } 1667 1668 /** 1669 * Remove len bytes of data at the end of the mbuf. 1670 * 1671 * If the length is greater than the length of the last segment, the 1672 * function will fail and return -1 without modifying the mbuf. 1673 * 1674 * @param m 1675 * The packet mbuf. 1676 * @param len 1677 * The amount of data to remove (in bytes). 1678 * @return 1679 * - 0: On success. 1680 * - -1: On error. 1681 */ 1682 static inline int rte_pktmbuf_trim(struct rte_mbuf *m, uint16_t len) 1683 { 1684 struct rte_mbuf *m_last; 1685 1686 __rte_mbuf_sanity_check(m, 1); 1687 1688 m_last = rte_pktmbuf_lastseg(m); 1689 if (unlikely(len > m_last->data_len)) 1690 return -1; 1691 1692 m_last->data_len = (uint16_t)(m_last->data_len - len); 1693 m->pkt_len = (m->pkt_len - len); 1694 return 0; 1695 } 1696 1697 /** 1698 * Test if mbuf data is contiguous. 1699 * 1700 * @param m 1701 * The packet mbuf. 1702 * @return 1703 * - 1, if all data is contiguous (one segment). 1704 * - 0, if there is several segments. 1705 */ 1706 static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m) 1707 { 1708 __rte_mbuf_sanity_check(m, 1); 1709 return m->nb_segs == 1; 1710 } 1711 1712 /** 1713 * @internal used by rte_pktmbuf_read(). 1714 */ 1715 const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, 1716 uint32_t len, void *buf); 1717 1718 /** 1719 * Read len data bytes in a mbuf at specified offset. 1720 * 1721 * If the data is contiguous, return the pointer in the mbuf data, else 1722 * copy the data in the buffer provided by the user and return its 1723 * pointer. 1724 * 1725 * @param m 1726 * The pointer to the mbuf. 1727 * @param off 1728 * The offset of the data in the mbuf. 1729 * @param len 1730 * The amount of bytes to read. 1731 * @param buf 1732 * The buffer where data is copied if it is not contiguous in mbuf 1733 * data. Its length should be at least equal to the len parameter. 1734 * @return 1735 * The pointer to the data, either in the mbuf if it is contiguous, 1736 * or in the user buffer. If mbuf is too small, NULL is returned. 1737 */ 1738 static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m, 1739 uint32_t off, uint32_t len, void *buf) 1740 { 1741 if (likely(off + len <= rte_pktmbuf_data_len(m))) 1742 return rte_pktmbuf_mtod_offset(m, char *, off); 1743 else 1744 return __rte_pktmbuf_read(m, off, len, buf); 1745 } 1746 1747 /** 1748 * Chain an mbuf to another, thereby creating a segmented packet. 1749 * 1750 * Note: The implementation will do a linear walk over the segments to find 1751 * the tail entry. For cases when there are many segments, it's better to 1752 * chain the entries manually. 1753 * 1754 * @param head 1755 * The head of the mbuf chain (the first packet) 1756 * @param tail 1757 * The mbuf to put last in the chain 1758 * 1759 * @return 1760 * - 0, on success. 1761 * - -EOVERFLOW, if the chain segment limit exceeded 1762 */ 1763 static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail) 1764 { 1765 struct rte_mbuf *cur_tail; 1766 1767 /* Check for number-of-segments-overflow */ 1768 if (head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS) 1769 return -EOVERFLOW; 1770 1771 /* Chain 'tail' onto the old tail */ 1772 cur_tail = rte_pktmbuf_lastseg(head); 1773 cur_tail->next = tail; 1774 1775 /* accumulate number of segments and total length. 1776 * NB: elaborating the addition like this instead of using 1777 * -= allows us to ensure the result type is uint16_t 1778 * avoiding compiler warnings on gcc 8.1 at least */ 1779 head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs); 1780 head->pkt_len += tail->pkt_len; 1781 1782 /* pkt_len is only set in the head */ 1783 tail->pkt_len = tail->data_len; 1784 1785 return 0; 1786 } 1787 1788 /** 1789 * For given input values generate raw tx_offload value. 1790 * Note that it is caller responsibility to make sure that input parameters 1791 * don't exceed maximum bit-field values. 1792 * @param il2 1793 * l2_len value. 1794 * @param il3 1795 * l3_len value. 1796 * @param il4 1797 * l4_len value. 1798 * @param tso 1799 * tso_segsz value. 1800 * @param ol3 1801 * outer_l3_len value. 1802 * @param ol2 1803 * outer_l2_len value. 1804 * @param unused 1805 * unused value. 1806 * @return 1807 * raw tx_offload value. 1808 */ 1809 static __rte_always_inline uint64_t 1810 rte_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso, 1811 uint64_t ol3, uint64_t ol2, uint64_t unused) 1812 { 1813 return il2 << RTE_MBUF_L2_LEN_OFS | 1814 il3 << RTE_MBUF_L3_LEN_OFS | 1815 il4 << RTE_MBUF_L4_LEN_OFS | 1816 tso << RTE_MBUF_TSO_SEGSZ_OFS | 1817 ol3 << RTE_MBUF_OUTL3_LEN_OFS | 1818 ol2 << RTE_MBUF_OUTL2_LEN_OFS | 1819 unused << RTE_MBUF_TXOFLD_UNUSED_OFS; 1820 } 1821 1822 /** 1823 * Validate general requirements for Tx offload in mbuf. 1824 * 1825 * This function checks correctness and completeness of Tx offload settings. 1826 * 1827 * @param m 1828 * The packet mbuf to be validated. 1829 * @return 1830 * 0 if packet is valid 1831 */ 1832 static inline int 1833 rte_validate_tx_offload(const struct rte_mbuf *m) 1834 { 1835 uint64_t ol_flags = m->ol_flags; 1836 1837 /* Does packet set any of available offloads? */ 1838 if (!(ol_flags & RTE_MBUF_F_TX_OFFLOAD_MASK)) 1839 return 0; 1840 1841 /* IP checksum can be counted only for IPv4 packet */ 1842 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM) && (ol_flags & RTE_MBUF_F_TX_IPV6)) 1843 return -EINVAL; 1844 1845 /* IP type not set when required */ 1846 if (ol_flags & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_TCP_SEG)) 1847 if (!(ol_flags & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6))) 1848 return -EINVAL; 1849 1850 /* Check requirements for TSO packet */ 1851 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) 1852 if ((m->tso_segsz == 0) || 1853 ((ol_flags & RTE_MBUF_F_TX_IPV4) && 1854 !(ol_flags & RTE_MBUF_F_TX_IP_CKSUM))) 1855 return -EINVAL; 1856 1857 /* RTE_MBUF_F_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */ 1858 if ((ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) && 1859 !(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) 1860 return -EINVAL; 1861 1862 return 0; 1863 } 1864 1865 /** 1866 * @internal used by rte_pktmbuf_linearize(). 1867 */ 1868 int __rte_pktmbuf_linearize(struct rte_mbuf *mbuf); 1869 1870 /** 1871 * Linearize data in mbuf. 1872 * 1873 * This function moves the mbuf data in the first segment if there is enough 1874 * tailroom. The subsequent segments are unchained and freed. 1875 * 1876 * @param mbuf 1877 * mbuf to linearize 1878 * @return 1879 * - 0, on success 1880 * - -1, on error 1881 */ 1882 static inline int 1883 rte_pktmbuf_linearize(struct rte_mbuf *mbuf) 1884 { 1885 if (rte_pktmbuf_is_contiguous(mbuf)) 1886 return 0; 1887 return __rte_pktmbuf_linearize(mbuf); 1888 } 1889 1890 /** 1891 * Dump an mbuf structure to a file. 1892 * 1893 * Dump all fields for the given packet mbuf and all its associated 1894 * segments (in the case of a chained buffer). 1895 * 1896 * @param f 1897 * A pointer to a file for output 1898 * @param m 1899 * The packet mbuf. 1900 * @param dump_len 1901 * If dump_len != 0, also dump the "dump_len" first data bytes of 1902 * the packet. 1903 */ 1904 void rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len); 1905 1906 /** 1907 * Get the value of mbuf sched queue_id field. 1908 */ 1909 static inline uint32_t 1910 rte_mbuf_sched_queue_get(const struct rte_mbuf *m) 1911 { 1912 return m->hash.sched.queue_id; 1913 } 1914 1915 /** 1916 * Get the value of mbuf sched traffic_class field. 1917 */ 1918 static inline uint8_t 1919 rte_mbuf_sched_traffic_class_get(const struct rte_mbuf *m) 1920 { 1921 return m->hash.sched.traffic_class; 1922 } 1923 1924 /** 1925 * Get the value of mbuf sched color field. 1926 */ 1927 static inline uint8_t 1928 rte_mbuf_sched_color_get(const struct rte_mbuf *m) 1929 { 1930 return m->hash.sched.color; 1931 } 1932 1933 /** 1934 * Get the values of mbuf sched queue_id, traffic_class and color. 1935 * 1936 * @param m 1937 * Mbuf to read 1938 * @param queue_id 1939 * Returns the queue id 1940 * @param traffic_class 1941 * Returns the traffic class id 1942 * @param color 1943 * Returns the colour id 1944 */ 1945 static inline void 1946 rte_mbuf_sched_get(const struct rte_mbuf *m, uint32_t *queue_id, 1947 uint8_t *traffic_class, 1948 uint8_t *color) 1949 { 1950 struct rte_mbuf_sched sched = m->hash.sched; 1951 1952 *queue_id = sched.queue_id; 1953 *traffic_class = sched.traffic_class; 1954 *color = sched.color; 1955 } 1956 1957 /** 1958 * Set the mbuf sched queue_id to the defined value. 1959 */ 1960 static inline void 1961 rte_mbuf_sched_queue_set(struct rte_mbuf *m, uint32_t queue_id) 1962 { 1963 m->hash.sched.queue_id = queue_id; 1964 } 1965 1966 /** 1967 * Set the mbuf sched traffic_class id to the defined value. 1968 */ 1969 static inline void 1970 rte_mbuf_sched_traffic_class_set(struct rte_mbuf *m, uint8_t traffic_class) 1971 { 1972 m->hash.sched.traffic_class = traffic_class; 1973 } 1974 1975 /** 1976 * Set the mbuf sched color id to the defined value. 1977 */ 1978 static inline void 1979 rte_mbuf_sched_color_set(struct rte_mbuf *m, uint8_t color) 1980 { 1981 m->hash.sched.color = color; 1982 } 1983 1984 /** 1985 * Set the mbuf sched queue_id, traffic_class and color. 1986 * 1987 * @param m 1988 * Mbuf to set 1989 * @param queue_id 1990 * Queue id value to be set 1991 * @param traffic_class 1992 * Traffic class id value to be set 1993 * @param color 1994 * Color id to be set 1995 */ 1996 static inline void 1997 rte_mbuf_sched_set(struct rte_mbuf *m, uint32_t queue_id, 1998 uint8_t traffic_class, 1999 uint8_t color) 2000 { 2001 m->hash.sched = (struct rte_mbuf_sched){ 2002 .queue_id = queue_id, 2003 .traffic_class = traffic_class, 2004 .color = color, 2005 .reserved = 0, 2006 }; 2007 } 2008 2009 #ifdef __cplusplus 2010 } 2011 #endif 2012 2013 #endif /* _RTE_MBUF_H_ */ 2014