1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #ifndef _GRO_TCP4_H_ 6 #define _GRO_TCP4_H_ 7 8 #include <rte_ip.h> 9 #include <rte_tcp.h> 10 #include <rte_vxlan.h> 11 12 #define INVALID_ARRAY_INDEX 0xffffffffUL 13 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) 14 15 /* 16 * The max length of a IPv4 packet, which includes the length of the L3 17 * header, the L4 header and the data payload. 18 */ 19 #define MAX_IPV4_PKT_LENGTH UINT16_MAX 20 21 /* The maximum TCP header length */ 22 #define MAX_TCP_HLEN 60 23 #define INVALID_TCP_HDRLEN(len) \ 24 (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN)) 25 26 /* Header fields representing a TCP/IPv4 flow */ 27 struct tcp4_flow_key { 28 struct rte_ether_addr eth_saddr; 29 struct rte_ether_addr eth_daddr; 30 uint32_t ip_src_addr; 31 uint32_t ip_dst_addr; 32 33 uint32_t recv_ack; 34 uint16_t src_port; 35 uint16_t dst_port; 36 }; 37 38 struct gro_tcp4_flow { 39 struct tcp4_flow_key key; 40 /* 41 * The index of the first packet in the flow. 42 * INVALID_ARRAY_INDEX indicates an empty flow. 43 */ 44 uint32_t start_index; 45 }; 46 47 struct gro_tcp4_item { 48 /* 49 * The first MBUF segment of the packet. If the value 50 * is NULL, it means the item is empty. 51 */ 52 struct rte_mbuf *firstseg; 53 /* The last MBUF segment of the packet */ 54 struct rte_mbuf *lastseg; 55 /* 56 * The time when the first packet is inserted into the table. 57 * This value won't be updated, even if the packet is merged 58 * with other packets. 59 */ 60 uint64_t start_time; 61 /* 62 * next_pkt_idx is used to chain the packets that 63 * are in the same flow but can't be merged together 64 * (e.g. caused by packet reordering). 65 */ 66 uint32_t next_pkt_idx; 67 /* TCP sequence number of the packet */ 68 uint32_t sent_seq; 69 /* IPv4 ID of the packet */ 70 uint16_t ip_id; 71 /* the number of merged packets */ 72 uint16_t nb_merged; 73 /* Indicate if IPv4 ID can be ignored */ 74 uint8_t is_atomic; 75 }; 76 77 /* 78 * TCP/IPv4 reassembly table structure. 79 */ 80 struct gro_tcp4_tbl { 81 /* item array */ 82 struct gro_tcp4_item *items; 83 /* flow array */ 84 struct gro_tcp4_flow *flows; 85 /* current item number */ 86 uint32_t item_num; 87 /* current flow num */ 88 uint32_t flow_num; 89 /* item array size */ 90 uint32_t max_item_num; 91 /* flow array size */ 92 uint32_t max_flow_num; 93 }; 94 95 /** 96 * This function creates a TCP/IPv4 reassembly table. 97 * 98 * @param socket_id 99 * Socket index for allocating the TCP/IPv4 reassemble table 100 * @param max_flow_num 101 * The maximum number of flows in the TCP/IPv4 GRO table 102 * @param max_item_per_flow 103 * The maximum number of packets per flow 104 * 105 * @return 106 * - Return the table pointer on success. 107 * - Return NULL on failure. 108 */ 109 void *gro_tcp4_tbl_create(uint16_t socket_id, 110 uint16_t max_flow_num, 111 uint16_t max_item_per_flow); 112 113 /** 114 * This function destroys a TCP/IPv4 reassembly table. 115 * 116 * @param tbl 117 * Pointer pointing to the TCP/IPv4 reassembly table. 118 */ 119 void gro_tcp4_tbl_destroy(void *tbl); 120 121 /** 122 * This function merges a TCP/IPv4 packet. It doesn't process the packet, 123 * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have 124 * payload. 125 * 126 * This function doesn't check if the packet has correct checksums and 127 * doesn't re-calculate checksums for the merged packet. Additionally, 128 * it assumes the packets are complete (i.e., MF==0 && frag_off==0), 129 * when IP fragmentation is possible (i.e., DF==0). It returns the 130 * packet, if the packet has invalid parameters (e.g. SYN bit is set) 131 * or there is no available space in the table. 132 * 133 * @param pkt 134 * Packet to reassemble 135 * @param tbl 136 * Pointer pointing to the TCP/IPv4 reassembly table 137 * @start_time 138 * The time when the packet is inserted into the table 139 * 140 * @return 141 * - Return a positive value if the packet is merged. 142 * - Return zero if the packet isn't merged but stored in the table. 143 * - Return a negative value for invalid parameters or no available 144 * space in the table. 145 */ 146 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, 147 struct gro_tcp4_tbl *tbl, 148 uint64_t start_time); 149 150 /** 151 * This function flushes timeout packets in a TCP/IPv4 reassembly table, 152 * and without updating checksums. 153 * 154 * @param tbl 155 * TCP/IPv4 reassembly table pointer 156 * @param flush_timestamp 157 * Flush packets which are inserted into the table before or at the 158 * flush_timestamp. 159 * @param out 160 * Pointer array used to keep flushed packets 161 * @param nb_out 162 * The element number in 'out'. It also determines the maximum number of 163 * packets that can be flushed finally. 164 * 165 * @return 166 * The number of flushed packets 167 */ 168 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, 169 uint64_t flush_timestamp, 170 struct rte_mbuf **out, 171 uint16_t nb_out); 172 173 /** 174 * This function returns the number of the packets in a TCP/IPv4 175 * reassembly table. 176 * 177 * @param tbl 178 * TCP/IPv4 reassembly table pointer 179 * 180 * @return 181 * The number of packets in the table 182 */ 183 uint32_t gro_tcp4_tbl_pkt_count(void *tbl); 184 185 /* 186 * Check if two TCP/IPv4 packets belong to the same flow. 187 */ 188 static inline int 189 is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2) 190 { 191 return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && 192 rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) && 193 (k1.ip_src_addr == k2.ip_src_addr) && 194 (k1.ip_dst_addr == k2.ip_dst_addr) && 195 (k1.recv_ack == k2.recv_ack) && 196 (k1.src_port == k2.src_port) && 197 (k1.dst_port == k2.dst_port)); 198 } 199 200 /* 201 * Merge two TCP/IPv4 packets without updating checksums. 202 * If cmp is larger than 0, append the new packet to the 203 * original packet. Otherwise, pre-pend the new packet to 204 * the original packet. 205 */ 206 static inline int 207 merge_two_tcp4_packets(struct gro_tcp4_item *item, 208 struct rte_mbuf *pkt, 209 int cmp, 210 uint32_t sent_seq, 211 uint16_t ip_id, 212 uint16_t l2_offset) 213 { 214 struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; 215 uint16_t hdr_len, l2_len; 216 217 if (cmp > 0) { 218 pkt_head = item->firstseg; 219 pkt_tail = pkt; 220 } else { 221 pkt_head = pkt; 222 pkt_tail = item->firstseg; 223 } 224 225 /* check if the IPv4 packet length is greater than the max value */ 226 hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len + 227 pkt_head->l4_len; 228 l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; 229 if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - 230 hdr_len > MAX_IPV4_PKT_LENGTH)) 231 return 0; 232 233 /* remove the packet header for the tail packet */ 234 rte_pktmbuf_adj(pkt_tail, hdr_len); 235 236 /* chain two packets together */ 237 if (cmp > 0) { 238 item->lastseg->next = pkt; 239 item->lastseg = rte_pktmbuf_lastseg(pkt); 240 /* update IP ID to the larger value */ 241 item->ip_id = ip_id; 242 } else { 243 lastseg = rte_pktmbuf_lastseg(pkt); 244 lastseg->next = item->firstseg; 245 item->firstseg = pkt; 246 /* update sent_seq to the smaller value */ 247 item->sent_seq = sent_seq; 248 item->ip_id = ip_id; 249 } 250 item->nb_merged++; 251 252 /* update MBUF metadata for the merged packet */ 253 pkt_head->nb_segs += pkt_tail->nb_segs; 254 pkt_head->pkt_len += pkt_tail->pkt_len; 255 256 return 1; 257 } 258 259 /* 260 * Check if two TCP/IPv4 packets are neighbors. 261 */ 262 static inline int 263 check_seq_option(struct gro_tcp4_item *item, 264 struct rte_tcp_hdr *tcph, 265 uint32_t sent_seq, 266 uint16_t ip_id, 267 uint16_t tcp_hl, 268 uint16_t tcp_dl, 269 uint16_t l2_offset, 270 uint8_t is_atomic) 271 { 272 struct rte_mbuf *pkt_orig = item->firstseg; 273 struct rte_ipv4_hdr *iph_orig; 274 struct rte_tcp_hdr *tcph_orig; 275 uint16_t len, tcp_hl_orig; 276 277 iph_orig = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) + 278 l2_offset + pkt_orig->l2_len); 279 tcph_orig = (struct rte_tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len); 280 tcp_hl_orig = pkt_orig->l4_len; 281 282 /* Check if TCP option fields equal */ 283 len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr); 284 if ((tcp_hl != tcp_hl_orig) || ((len > 0) && 285 (memcmp(tcph + 1, tcph_orig + 1, 286 len) != 0))) 287 return 0; 288 289 /* Don't merge packets whose DF bits are different */ 290 if (unlikely(item->is_atomic ^ is_atomic)) 291 return 0; 292 293 /* check if the two packets are neighbors */ 294 len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - 295 pkt_orig->l3_len - tcp_hl_orig; 296 if ((sent_seq == item->sent_seq + len) && (is_atomic || 297 (ip_id == item->ip_id + 1))) 298 /* append the new packet */ 299 return 1; 300 else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic || 301 (ip_id + item->nb_merged == item->ip_id))) 302 /* pre-pend the new packet */ 303 return -1; 304 305 return 0; 306 } 307 #endif 308