199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright(c) 2010-2014 Intel Corporation 399a2dd95SBruce Richardson */ 499a2dd95SBruce Richardson 599a2dd95SBruce Richardson #ifndef _RTE_IP_FRAG_H_ 699a2dd95SBruce Richardson #define _RTE_IP_FRAG_H_ 799a2dd95SBruce Richardson 899a2dd95SBruce Richardson /** 999a2dd95SBruce Richardson * @file 1099a2dd95SBruce Richardson * RTE IP Fragmentation and Reassembly 1199a2dd95SBruce Richardson * 1299a2dd95SBruce Richardson * Implementation of IP packet fragmentation and reassembly. 1399a2dd95SBruce Richardson */ 1499a2dd95SBruce Richardson 1599a2dd95SBruce Richardson #include <stdint.h> 1699a2dd95SBruce Richardson #include <stdio.h> 1799a2dd95SBruce Richardson 1899a2dd95SBruce Richardson #include <rte_config.h> 1999a2dd95SBruce Richardson #include <rte_malloc.h> 2099a2dd95SBruce Richardson #include <rte_memory.h> 2199a2dd95SBruce Richardson #include <rte_ip.h> 2299a2dd95SBruce Richardson #include <rte_byteorder.h> 2399a2dd95SBruce Richardson 24*719834a6SMattias Rönnblom #ifdef __cplusplus 25*719834a6SMattias Rönnblom extern "C" { 26*719834a6SMattias Rönnblom #endif 27*719834a6SMattias Rönnblom 2899a2dd95SBruce Richardson struct rte_mbuf; 2999a2dd95SBruce Richardson 30b7fc82ecSKonstantin Ananyev /** death row size (in packets) */ 31b7fc82ecSKonstantin Ananyev #define RTE_IP_FRAG_DEATH_ROW_LEN 32 3299a2dd95SBruce Richardson 33b7fc82ecSKonstantin Ananyev /** death row size in mbufs */ 34b7fc82ecSKonstantin Ananyev #define RTE_IP_FRAG_DEATH_ROW_MBUF_LEN \ 35b7fc82ecSKonstantin Ananyev (RTE_IP_FRAG_DEATH_ROW_LEN * (RTE_LIBRTE_IP_FRAG_MAX_FRAG + 1)) 3699a2dd95SBruce Richardson 3799a2dd95SBruce Richardson /** mbuf death row (packets to be freed) */ 3899a2dd95SBruce Richardson struct rte_ip_frag_death_row { 3999a2dd95SBruce Richardson uint32_t cnt; /**< number of mbufs currently on death row */ 40b7fc82ecSKonstantin Ananyev struct rte_mbuf *row[RTE_IP_FRAG_DEATH_ROW_MBUF_LEN]; 4199a2dd95SBruce Richardson /**< mbufs to be freed */ 4299a2dd95SBruce Richardson }; 4399a2dd95SBruce Richardson 4499a2dd95SBruce Richardson /** 4599a2dd95SBruce Richardson * Create a new IP fragmentation table. 4699a2dd95SBruce Richardson * 4799a2dd95SBruce Richardson * @param bucket_num 4899a2dd95SBruce Richardson * Number of buckets in the hash table. 4999a2dd95SBruce Richardson * @param bucket_entries 5099a2dd95SBruce Richardson * Number of entries per bucket (e.g. hash associativity). 5199a2dd95SBruce Richardson * Should be power of two. 5299a2dd95SBruce Richardson * @param max_entries 5399a2dd95SBruce Richardson * Maximum number of entries that could be stored in the table. 5499a2dd95SBruce Richardson * The value should be less or equal then bucket_num * bucket_entries. 5599a2dd95SBruce Richardson * @param max_cycles 5699a2dd95SBruce Richardson * Maximum TTL in cycles for each fragmented packet. 5799a2dd95SBruce Richardson * @param socket_id 5899a2dd95SBruce Richardson * The *socket_id* argument is the socket identifier in the case of 5999a2dd95SBruce Richardson * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. 6099a2dd95SBruce Richardson * @return 6199a2dd95SBruce Richardson * The pointer to the new allocated fragmentation table, on success. NULL on error. 6299a2dd95SBruce Richardson */ 6399a2dd95SBruce Richardson struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num, 6499a2dd95SBruce Richardson uint32_t bucket_entries, uint32_t max_entries, 6599a2dd95SBruce Richardson uint64_t max_cycles, int socket_id); 6699a2dd95SBruce Richardson 6799a2dd95SBruce Richardson /** 6899a2dd95SBruce Richardson * Free allocated IP fragmentation table. 6999a2dd95SBruce Richardson * 7099a2dd95SBruce Richardson * @param tbl 7199a2dd95SBruce Richardson * Fragmentation table to free. 7299a2dd95SBruce Richardson */ 7399a2dd95SBruce Richardson void 7499a2dd95SBruce Richardson rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl); 7599a2dd95SBruce Richardson 7699a2dd95SBruce Richardson /** 7799a2dd95SBruce Richardson * This function implements the fragmentation of IPv6 packets. 7899a2dd95SBruce Richardson * 7999a2dd95SBruce Richardson * @param pkt_in 8099a2dd95SBruce Richardson * The input packet. 8199a2dd95SBruce Richardson * @param pkts_out 8299a2dd95SBruce Richardson * Array storing the output fragments. 8399a2dd95SBruce Richardson * @param nb_pkts_out 8499a2dd95SBruce Richardson * Number of fragments. 8599a2dd95SBruce Richardson * @param mtu_size 8699a2dd95SBruce Richardson * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6 8799a2dd95SBruce Richardson * datagrams. This value includes the size of the IPv6 header. 8899a2dd95SBruce Richardson * @param pool_direct 8999a2dd95SBruce Richardson * MBUF pool used for allocating direct buffers for the output fragments. 9099a2dd95SBruce Richardson * @param pool_indirect 9199a2dd95SBruce Richardson * MBUF pool used for allocating indirect buffers for the output fragments. 9299a2dd95SBruce Richardson * @return 9399a2dd95SBruce Richardson * Upon successful completion - number of output fragments placed 9499a2dd95SBruce Richardson * in the pkts_out array. 9599a2dd95SBruce Richardson * Otherwise - (-1) * errno. 9699a2dd95SBruce Richardson */ 9799a2dd95SBruce Richardson int32_t 9899a2dd95SBruce Richardson rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, 9999a2dd95SBruce Richardson struct rte_mbuf **pkts_out, 10099a2dd95SBruce Richardson uint16_t nb_pkts_out, 10199a2dd95SBruce Richardson uint16_t mtu_size, 10299a2dd95SBruce Richardson struct rte_mempool *pool_direct, 10399a2dd95SBruce Richardson struct rte_mempool *pool_indirect); 10499a2dd95SBruce Richardson 10599a2dd95SBruce Richardson /** 10699a2dd95SBruce Richardson * This function implements reassembly of fragmented IPv6 packets. 10799a2dd95SBruce Richardson * Incoming mbuf should have its l2_len/l3_len fields setup correctly. 10899a2dd95SBruce Richardson * 10999a2dd95SBruce Richardson * @param tbl 11099a2dd95SBruce Richardson * Table where to lookup/add the fragmented packet. 11199a2dd95SBruce Richardson * @param dr 11299a2dd95SBruce Richardson * Death row to free buffers to 11399a2dd95SBruce Richardson * @param mb 11499a2dd95SBruce Richardson * Incoming mbuf with IPv6 fragment. 11599a2dd95SBruce Richardson * @param tms 11699a2dd95SBruce Richardson * Fragment arrival timestamp. 11799a2dd95SBruce Richardson * @param ip_hdr 11899a2dd95SBruce Richardson * Pointer to the IPv6 header. 11999a2dd95SBruce Richardson * @param frag_hdr 12099a2dd95SBruce Richardson * Pointer to the IPv6 fragment extension header. 12199a2dd95SBruce Richardson * @return 12299a2dd95SBruce Richardson * Pointer to mbuf for reassembled packet, or NULL if: 12399a2dd95SBruce Richardson * - an error occurred. 12499a2dd95SBruce Richardson * - not all fragments of the packet are collected yet. 12599a2dd95SBruce Richardson */ 12699a2dd95SBruce Richardson struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, 12799a2dd95SBruce Richardson struct rte_ip_frag_death_row *dr, 12899a2dd95SBruce Richardson struct rte_mbuf *mb, uint64_t tms, struct rte_ipv6_hdr *ip_hdr, 129b7fc82ecSKonstantin Ananyev struct rte_ipv6_fragment_ext *frag_hdr); 13099a2dd95SBruce Richardson 13199a2dd95SBruce Richardson /** 13299a2dd95SBruce Richardson * Return a pointer to the packet's fragment header, if found. 13399a2dd95SBruce Richardson * It only looks at the extension header that's right after the fixed IPv6 13499a2dd95SBruce Richardson * header, and doesn't follow the whole chain of extension headers. 13599a2dd95SBruce Richardson * 13699a2dd95SBruce Richardson * @param hdr 13799a2dd95SBruce Richardson * Pointer to the IPv6 header. 13899a2dd95SBruce Richardson * @return 13999a2dd95SBruce Richardson * Pointer to the IPv6 fragment extension header, or NULL if it's not 14099a2dd95SBruce Richardson * present. 14199a2dd95SBruce Richardson */ 142b7fc82ecSKonstantin Ananyev static inline struct rte_ipv6_fragment_ext * 14399a2dd95SBruce Richardson rte_ipv6_frag_get_ipv6_fragment_header(struct rte_ipv6_hdr *hdr) 14499a2dd95SBruce Richardson { 14599a2dd95SBruce Richardson if (hdr->proto == IPPROTO_FRAGMENT) { 146b7fc82ecSKonstantin Ananyev return (struct rte_ipv6_fragment_ext *) ++hdr; 14799a2dd95SBruce Richardson } 14899a2dd95SBruce Richardson else 14999a2dd95SBruce Richardson return NULL; 15099a2dd95SBruce Richardson } 15199a2dd95SBruce Richardson 15299a2dd95SBruce Richardson /** 15399a2dd95SBruce Richardson * IPv4 fragmentation. 15499a2dd95SBruce Richardson * 15599a2dd95SBruce Richardson * This function implements the fragmentation of IPv4 packets. 15699a2dd95SBruce Richardson * 15799a2dd95SBruce Richardson * @param pkt_in 15899a2dd95SBruce Richardson * The input packet. 15999a2dd95SBruce Richardson * @param pkts_out 16099a2dd95SBruce Richardson * Array storing the output fragments. 16199a2dd95SBruce Richardson * @param nb_pkts_out 16299a2dd95SBruce Richardson * Number of fragments. 16399a2dd95SBruce Richardson * @param mtu_size 16499a2dd95SBruce Richardson * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 16599a2dd95SBruce Richardson * datagrams. This value includes the size of the IPv4 header. 16699a2dd95SBruce Richardson * @param pool_direct 16799a2dd95SBruce Richardson * MBUF pool used for allocating direct buffers for the output fragments. 16899a2dd95SBruce Richardson * @param pool_indirect 16999a2dd95SBruce Richardson * MBUF pool used for allocating indirect buffers for the output fragments. 17099a2dd95SBruce Richardson * @return 17199a2dd95SBruce Richardson * Upon successful completion - number of output fragments placed 17299a2dd95SBruce Richardson * in the pkts_out array. 17399a2dd95SBruce Richardson * Otherwise - (-1) * errno. 17499a2dd95SBruce Richardson */ 17599a2dd95SBruce Richardson int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, 17699a2dd95SBruce Richardson struct rte_mbuf **pkts_out, 17799a2dd95SBruce Richardson uint16_t nb_pkts_out, uint16_t mtu_size, 17899a2dd95SBruce Richardson struct rte_mempool *pool_direct, 17999a2dd95SBruce Richardson struct rte_mempool *pool_indirect); 18099a2dd95SBruce Richardson 18199a2dd95SBruce Richardson /** 1824aee6110SHuichao Cai * IPv4 fragmentation by copy. 1834aee6110SHuichao Cai * 1844aee6110SHuichao Cai * This function implements the fragmentation of IPv4 packets by copy 1854aee6110SHuichao Cai * non-segmented mbuf. 1864aee6110SHuichao Cai * This function is mainly used to adapt Tx MBUF_FAST_FREE offload. 1874aee6110SHuichao Cai * MBUF_FAST_FREE: Device supports optimization for fast release of mbufs. 1884aee6110SHuichao Cai * When set, application must guarantee that per-queue all mbufs comes from 1894aee6110SHuichao Cai * the same mempool, has refcnt = 1, direct and non-segmented. 1904aee6110SHuichao Cai * 1914aee6110SHuichao Cai * @param pkt_in 1924aee6110SHuichao Cai * The input packet. 1934aee6110SHuichao Cai * @param pkts_out 1944aee6110SHuichao Cai * Array storing the output fragments. 1954aee6110SHuichao Cai * @param nb_pkts_out 1964aee6110SHuichao Cai * Number of fragments. 1974aee6110SHuichao Cai * @param mtu_size 1984aee6110SHuichao Cai * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 1994aee6110SHuichao Cai * datagrams. This value includes the size of the IPv4 header. 2004aee6110SHuichao Cai * @param pool_direct 2014aee6110SHuichao Cai * MBUF pool used for allocating direct buffers for the output fragments. 2024aee6110SHuichao Cai * @return 2034aee6110SHuichao Cai * Upon successful completion - number of output fragments placed 2044aee6110SHuichao Cai * in the pkts_out array. 2054aee6110SHuichao Cai * Otherwise - (-1) * errno. 2064aee6110SHuichao Cai */ 2074aee6110SHuichao Cai int32_t 2084aee6110SHuichao Cai rte_ipv4_fragment_copy_nonseg_packet(struct rte_mbuf *pkt_in, 2094aee6110SHuichao Cai struct rte_mbuf **pkts_out, 2104aee6110SHuichao Cai uint16_t nb_pkts_out, 2114aee6110SHuichao Cai uint16_t mtu_size, 2124aee6110SHuichao Cai struct rte_mempool *pool_direct); 2134aee6110SHuichao Cai 2144aee6110SHuichao Cai /** 21599a2dd95SBruce Richardson * This function implements reassembly of fragmented IPv4 packets. 21699a2dd95SBruce Richardson * Incoming mbufs should have its l2_len/l3_len fields setup correctly. 21799a2dd95SBruce Richardson * 21899a2dd95SBruce Richardson * @param tbl 21999a2dd95SBruce Richardson * Table where to lookup/add the fragmented packet. 22099a2dd95SBruce Richardson * @param dr 22199a2dd95SBruce Richardson * Death row to free buffers to 22299a2dd95SBruce Richardson * @param mb 22399a2dd95SBruce Richardson * Incoming mbuf with IPv4 fragment. 22499a2dd95SBruce Richardson * @param tms 22599a2dd95SBruce Richardson * Fragment arrival timestamp. 22699a2dd95SBruce Richardson * @param ip_hdr 22799a2dd95SBruce Richardson * Pointer to the IPV4 header inside the fragment. 22899a2dd95SBruce Richardson * @return 22999a2dd95SBruce Richardson * Pointer to mbuf for reassembled packet, or NULL if: 23099a2dd95SBruce Richardson * - an error occurred. 23199a2dd95SBruce Richardson * - not all fragments of the packet are collected yet. 23299a2dd95SBruce Richardson */ 23399a2dd95SBruce Richardson struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, 23499a2dd95SBruce Richardson struct rte_ip_frag_death_row *dr, 23599a2dd95SBruce Richardson struct rte_mbuf *mb, uint64_t tms, struct rte_ipv4_hdr *ip_hdr); 23699a2dd95SBruce Richardson 23799a2dd95SBruce Richardson /** 23899a2dd95SBruce Richardson * Check if the IPv4 packet is fragmented 23999a2dd95SBruce Richardson * 24099a2dd95SBruce Richardson * @param hdr 24199a2dd95SBruce Richardson * IPv4 header of the packet 24299a2dd95SBruce Richardson * @return 24399a2dd95SBruce Richardson * 1 if fragmented, 0 if not fragmented 24499a2dd95SBruce Richardson */ 24599a2dd95SBruce Richardson static inline int 24699a2dd95SBruce Richardson rte_ipv4_frag_pkt_is_fragmented(const struct rte_ipv4_hdr *hdr) 24799a2dd95SBruce Richardson { 24899a2dd95SBruce Richardson uint16_t flag_offset, ip_flag, ip_ofs; 24999a2dd95SBruce Richardson 25099a2dd95SBruce Richardson flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); 25199a2dd95SBruce Richardson ip_ofs = (uint16_t)(flag_offset & RTE_IPV4_HDR_OFFSET_MASK); 25299a2dd95SBruce Richardson ip_flag = (uint16_t)(flag_offset & RTE_IPV4_HDR_MF_FLAG); 25399a2dd95SBruce Richardson 25499a2dd95SBruce Richardson return ip_flag != 0 || ip_ofs != 0; 25599a2dd95SBruce Richardson } 25699a2dd95SBruce Richardson 25799a2dd95SBruce Richardson /** 25899a2dd95SBruce Richardson * Free mbufs on a given death row. 25999a2dd95SBruce Richardson * 26099a2dd95SBruce Richardson * @param dr 26199a2dd95SBruce Richardson * Death row to free mbufs in. 26299a2dd95SBruce Richardson * @param prefetch 26399a2dd95SBruce Richardson * How many buffers to prefetch before freeing. 26499a2dd95SBruce Richardson */ 26599a2dd95SBruce Richardson void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, 26699a2dd95SBruce Richardson uint32_t prefetch); 26799a2dd95SBruce Richardson 26899a2dd95SBruce Richardson 26999a2dd95SBruce Richardson /** 27099a2dd95SBruce Richardson * Dump fragmentation table statistics to file. 27199a2dd95SBruce Richardson * 27299a2dd95SBruce Richardson * @param f 27399a2dd95SBruce Richardson * File to dump statistics to 27499a2dd95SBruce Richardson * @param tbl 27599a2dd95SBruce Richardson * Fragmentation table to dump statistics from 27699a2dd95SBruce Richardson */ 27799a2dd95SBruce Richardson void 27899a2dd95SBruce Richardson rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl); 27999a2dd95SBruce Richardson 28099a2dd95SBruce Richardson /** 28199a2dd95SBruce Richardson * Delete expired fragments 28299a2dd95SBruce Richardson * 28399a2dd95SBruce Richardson * @param tbl 28499a2dd95SBruce Richardson * Table to delete expired fragments from 28599a2dd95SBruce Richardson * @param dr 28699a2dd95SBruce Richardson * Death row to free buffers to 28799a2dd95SBruce Richardson * @param tms 28899a2dd95SBruce Richardson * Current timestamp 28999a2dd95SBruce Richardson */ 29099a2dd95SBruce Richardson void 291b7fc82ecSKonstantin Ananyev rte_ip_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl, 29299a2dd95SBruce Richardson struct rte_ip_frag_death_row *dr, uint64_t tms); 29399a2dd95SBruce Richardson 294b7fc82ecSKonstantin Ananyev /**@{@name Obsolete macros, kept here for compatibility reasons. 295b7fc82ecSKonstantin Ananyev * Will be deprecated/removed in future DPDK releases. 296b7fc82ecSKonstantin Ananyev */ 297b7fc82ecSKonstantin Ananyev /** Obsolete */ 298b7fc82ecSKonstantin Ananyev #define IP_FRAG_DEATH_ROW_LEN RTE_IP_FRAG_DEATH_ROW_LEN 299b7fc82ecSKonstantin Ananyev /** Obsolete */ 300b7fc82ecSKonstantin Ananyev #define IP_FRAG_DEATH_ROW_MBUF_LEN RTE_IP_FRAG_DEATH_ROW_MBUF_LEN 301b7fc82ecSKonstantin Ananyev /** Obsolete */ 302b7fc82ecSKonstantin Ananyev #define ipv6_extension_fragment rte_ipv6_fragment_ext 303b7fc82ecSKonstantin Ananyev /**@}*/ 304b7fc82ecSKonstantin Ananyev 30599a2dd95SBruce Richardson #ifdef __cplusplus 30699a2dd95SBruce Richardson } 30799a2dd95SBruce Richardson #endif 30899a2dd95SBruce Richardson 30999a2dd95SBruce Richardson #endif /* _RTE_IP_FRAG_H_ */ 310