1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef _RTE_IP_FRAG_H_ 6 #define _RTE_IP_FRAG_H_ 7 8 /** 9 * @file 10 * RTE IP Fragmentation and Reassembly 11 * 12 * Implementation of IP packet fragmentation and reassembly. 13 */ 14 15 #ifdef __cplusplus 16 extern "C" { 17 #endif 18 19 #include <stdint.h> 20 #include <stdio.h> 21 22 #include <rte_config.h> 23 #include <rte_malloc.h> 24 #include <rte_memory.h> 25 #include <rte_ip.h> 26 #include <rte_byteorder.h> 27 28 struct rte_mbuf; 29 30 enum { 31 IP_LAST_FRAG_IDX, /**< index of last fragment */ 32 IP_FIRST_FRAG_IDX, /**< index of first fragment */ 33 IP_MIN_FRAG_NUM, /**< minimum number of fragments */ 34 IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG, 35 /**< maximum number of fragments per packet */ 36 }; 37 38 /** @internal fragmented mbuf */ 39 struct ip_frag { 40 uint16_t ofs; /**< offset into the packet */ 41 uint16_t len; /**< length of fragment */ 42 struct rte_mbuf *mb; /**< fragment mbuf */ 43 }; 44 45 /** @internal <src addr, dst_addr, id> to uniquely identify fragmented datagram. */ 46 struct ip_frag_key { 47 uint64_t src_dst[4]; 48 /**< src and dst address, only first 8 bytes used for IPv4 */ 49 RTE_STD_C11 50 union { 51 uint64_t id_key_len; /**< combined for easy fetch */ 52 __extension__ 53 struct { 54 uint32_t id; /**< packet id */ 55 uint32_t key_len; /**< src/dst key length */ 56 }; 57 }; 58 }; 59 60 /** 61 * @internal Fragmented packet to reassemble. 62 * First two entries in the frags[] array are for the last and first fragments. 63 */ 64 struct ip_frag_pkt { 65 RTE_TAILQ_ENTRY(ip_frag_pkt) lru; /**< LRU list */ 66 struct ip_frag_key key; /**< fragmentation key */ 67 uint64_t start; /**< creation timestamp */ 68 uint32_t total_size; /**< expected reassembled size */ 69 uint32_t frag_size; /**< size of fragments received */ 70 uint32_t last_idx; /**< index of next entry to fill */ 71 struct ip_frag frags[IP_MAX_FRAG_NUM]; /**< fragments */ 72 } __rte_cache_aligned; 73 74 #define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */ 75 76 /* death row size in mbufs */ 77 #define IP_FRAG_DEATH_ROW_MBUF_LEN (IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)) 78 79 /** mbuf death row (packets to be freed) */ 80 struct rte_ip_frag_death_row { 81 uint32_t cnt; /**< number of mbufs currently on death row */ 82 struct rte_mbuf *row[IP_FRAG_DEATH_ROW_MBUF_LEN]; 83 /**< mbufs to be freed */ 84 }; 85 86 RTE_TAILQ_HEAD(ip_pkt_list, ip_frag_pkt); /**< @internal fragments tailq */ 87 88 /** fragmentation table statistics */ 89 struct ip_frag_tbl_stat { 90 uint64_t find_num; /**< total # of find/insert attempts. */ 91 uint64_t add_num; /**< # of add ops. */ 92 uint64_t del_num; /**< # of del ops. */ 93 uint64_t reuse_num; /**< # of reuse (del/add) ops. */ 94 uint64_t fail_total; /**< total # of add failures. */ 95 uint64_t fail_nospace; /**< # of 'no space' add failures. */ 96 } __rte_cache_aligned; 97 98 /** fragmentation table */ 99 struct rte_ip_frag_tbl { 100 uint64_t max_cycles; /**< ttl for table entries. */ 101 uint32_t entry_mask; /**< hash value mask. */ 102 uint32_t max_entries; /**< max entries allowed. */ 103 uint32_t use_entries; /**< entries in use. */ 104 uint32_t bucket_entries; /**< hash associativity. */ 105 uint32_t nb_entries; /**< total size of the table. */ 106 uint32_t nb_buckets; /**< num of associativity lines. */ 107 struct ip_frag_pkt *last; /**< last used entry. */ 108 struct ip_pkt_list lru; /**< LRU list for table entries. */ 109 struct ip_frag_tbl_stat stat; /**< statistics counters. */ 110 __extension__ struct ip_frag_pkt pkt[0]; /**< hash table. */ 111 }; 112 113 /* struct ipv6_extension_fragment moved to librte_net/rte_ip.h and renamed. */ 114 #define ipv6_extension_fragment rte_ipv6_fragment_ext 115 116 /** 117 * Create a new IP fragmentation table. 118 * 119 * @param bucket_num 120 * Number of buckets in the hash table. 121 * @param bucket_entries 122 * Number of entries per bucket (e.g. hash associativity). 123 * Should be power of two. 124 * @param max_entries 125 * Maximum number of entries that could be stored in the table. 126 * The value should be less or equal then bucket_num * bucket_entries. 127 * @param max_cycles 128 * Maximum TTL in cycles for each fragmented packet. 129 * @param socket_id 130 * The *socket_id* argument is the socket identifier in the case of 131 * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. 132 * @return 133 * The pointer to the new allocated fragmentation table, on success. NULL on error. 134 */ 135 struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num, 136 uint32_t bucket_entries, uint32_t max_entries, 137 uint64_t max_cycles, int socket_id); 138 139 /** 140 * Free allocated IP fragmentation table. 141 * 142 * @param tbl 143 * Fragmentation table to free. 144 */ 145 void 146 rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl); 147 148 /** 149 * This function implements the fragmentation of IPv6 packets. 150 * 151 * @param pkt_in 152 * The input packet. 153 * @param pkts_out 154 * Array storing the output fragments. 155 * @param nb_pkts_out 156 * Number of fragments. 157 * @param mtu_size 158 * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6 159 * datagrams. This value includes the size of the IPv6 header. 160 * @param pool_direct 161 * MBUF pool used for allocating direct buffers for the output fragments. 162 * @param pool_indirect 163 * MBUF pool used for allocating indirect buffers for the output fragments. 164 * @return 165 * Upon successful completion - number of output fragments placed 166 * in the pkts_out array. 167 * Otherwise - (-1) * errno. 168 */ 169 int32_t 170 rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, 171 struct rte_mbuf **pkts_out, 172 uint16_t nb_pkts_out, 173 uint16_t mtu_size, 174 struct rte_mempool *pool_direct, 175 struct rte_mempool *pool_indirect); 176 177 /** 178 * This function implements reassembly of fragmented IPv6 packets. 179 * Incoming mbuf should have its l2_len/l3_len fields setup correctly. 180 * 181 * @param tbl 182 * Table where to lookup/add the fragmented packet. 183 * @param dr 184 * Death row to free buffers to 185 * @param mb 186 * Incoming mbuf with IPv6 fragment. 187 * @param tms 188 * Fragment arrival timestamp. 189 * @param ip_hdr 190 * Pointer to the IPv6 header. 191 * @param frag_hdr 192 * Pointer to the IPv6 fragment extension header. 193 * @return 194 * Pointer to mbuf for reassembled packet, or NULL if: 195 * - an error occurred. 196 * - not all fragments of the packet are collected yet. 197 */ 198 struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, 199 struct rte_ip_frag_death_row *dr, 200 struct rte_mbuf *mb, uint64_t tms, struct rte_ipv6_hdr *ip_hdr, 201 struct ipv6_extension_fragment *frag_hdr); 202 203 /** 204 * Return a pointer to the packet's fragment header, if found. 205 * It only looks at the extension header that's right after the fixed IPv6 206 * header, and doesn't follow the whole chain of extension headers. 207 * 208 * @param hdr 209 * Pointer to the IPv6 header. 210 * @return 211 * Pointer to the IPv6 fragment extension header, or NULL if it's not 212 * present. 213 */ 214 static inline struct ipv6_extension_fragment * 215 rte_ipv6_frag_get_ipv6_fragment_header(struct rte_ipv6_hdr *hdr) 216 { 217 if (hdr->proto == IPPROTO_FRAGMENT) { 218 return (struct ipv6_extension_fragment *) ++hdr; 219 } 220 else 221 return NULL; 222 } 223 224 /** 225 * IPv4 fragmentation. 226 * 227 * This function implements the fragmentation of IPv4 packets. 228 * 229 * @param pkt_in 230 * The input packet. 231 * @param pkts_out 232 * Array storing the output fragments. 233 * @param nb_pkts_out 234 * Number of fragments. 235 * @param mtu_size 236 * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 237 * datagrams. This value includes the size of the IPv4 header. 238 * @param pool_direct 239 * MBUF pool used for allocating direct buffers for the output fragments. 240 * @param pool_indirect 241 * MBUF pool used for allocating indirect buffers for the output fragments. 242 * @return 243 * Upon successful completion - number of output fragments placed 244 * in the pkts_out array. 245 * Otherwise - (-1) * errno. 246 */ 247 int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, 248 struct rte_mbuf **pkts_out, 249 uint16_t nb_pkts_out, uint16_t mtu_size, 250 struct rte_mempool *pool_direct, 251 struct rte_mempool *pool_indirect); 252 253 /** 254 * This function implements reassembly of fragmented IPv4 packets. 255 * Incoming mbufs should have its l2_len/l3_len fields setup correctly. 256 * 257 * @param tbl 258 * Table where to lookup/add the fragmented packet. 259 * @param dr 260 * Death row to free buffers to 261 * @param mb 262 * Incoming mbuf with IPv4 fragment. 263 * @param tms 264 * Fragment arrival timestamp. 265 * @param ip_hdr 266 * Pointer to the IPV4 header inside the fragment. 267 * @return 268 * Pointer to mbuf for reassembled packet, or NULL if: 269 * - an error occurred. 270 * - not all fragments of the packet are collected yet. 271 */ 272 struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, 273 struct rte_ip_frag_death_row *dr, 274 struct rte_mbuf *mb, uint64_t tms, struct rte_ipv4_hdr *ip_hdr); 275 276 /** 277 * Check if the IPv4 packet is fragmented 278 * 279 * @param hdr 280 * IPv4 header of the packet 281 * @return 282 * 1 if fragmented, 0 if not fragmented 283 */ 284 static inline int 285 rte_ipv4_frag_pkt_is_fragmented(const struct rte_ipv4_hdr *hdr) 286 { 287 uint16_t flag_offset, ip_flag, ip_ofs; 288 289 flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); 290 ip_ofs = (uint16_t)(flag_offset & RTE_IPV4_HDR_OFFSET_MASK); 291 ip_flag = (uint16_t)(flag_offset & RTE_IPV4_HDR_MF_FLAG); 292 293 return ip_flag != 0 || ip_ofs != 0; 294 } 295 296 /** 297 * Free mbufs on a given death row. 298 * 299 * @param dr 300 * Death row to free mbufs in. 301 * @param prefetch 302 * How many buffers to prefetch before freeing. 303 */ 304 void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, 305 uint32_t prefetch); 306 307 308 /** 309 * Dump fragmentation table statistics to file. 310 * 311 * @param f 312 * File to dump statistics to 313 * @param tbl 314 * Fragmentation table to dump statistics from 315 */ 316 void 317 rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl); 318 319 /** 320 * Delete expired fragments 321 * 322 * @param tbl 323 * Table to delete expired fragments from 324 * @param dr 325 * Death row to free buffers to 326 * @param tms 327 * Current timestamp 328 */ 329 __rte_experimental 330 void 331 rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl, 332 struct rte_ip_frag_death_row *dr, uint64_t tms); 333 334 #ifdef __cplusplus 335 } 336 #endif 337 338 #endif /* _RTE_IP_FRAG_H_ */ 339