xref: /dpdk/lib/gro/gro_tcp4.h (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #ifndef _GRO_TCP4_H_
6 #define _GRO_TCP4_H_
7 
8 #include <rte_ip.h>
9 #include <rte_tcp.h>
10 #include <rte_vxlan.h>
11 
12 #define INVALID_ARRAY_INDEX 0xffffffffUL
13 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
14 
15 /*
16  * The max length of a IPv4 packet, which includes the length of the L3
17  * header, the L4 header and the data payload.
18  */
19 #define MAX_IPV4_PKT_LENGTH UINT16_MAX
20 
21 /* The maximum TCP header length */
22 #define MAX_TCP_HLEN 60
23 #define INVALID_TCP_HDRLEN(len) \
24 	(((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN))
25 
26 /* Header fields representing a TCP/IPv4 flow */
27 struct tcp4_flow_key {
28 	struct rte_ether_addr eth_saddr;
29 	struct rte_ether_addr eth_daddr;
30 	uint32_t ip_src_addr;
31 	uint32_t ip_dst_addr;
32 
33 	uint32_t recv_ack;
34 	uint16_t src_port;
35 	uint16_t dst_port;
36 };
37 
38 struct gro_tcp4_flow {
39 	struct tcp4_flow_key key;
40 	/*
41 	 * The index of the first packet in the flow.
42 	 * INVALID_ARRAY_INDEX indicates an empty flow.
43 	 */
44 	uint32_t start_index;
45 };
46 
47 struct gro_tcp4_item {
48 	/*
49 	 * The first MBUF segment of the packet. If the value
50 	 * is NULL, it means the item is empty.
51 	 */
52 	struct rte_mbuf *firstseg;
53 	/* The last MBUF segment of the packet */
54 	struct rte_mbuf *lastseg;
55 	/*
56 	 * The time when the first packet is inserted into the table.
57 	 * This value won't be updated, even if the packet is merged
58 	 * with other packets.
59 	 */
60 	uint64_t start_time;
61 	/*
62 	 * next_pkt_idx is used to chain the packets that
63 	 * are in the same flow but can't be merged together
64 	 * (e.g. caused by packet reordering).
65 	 */
66 	uint32_t next_pkt_idx;
67 	/* TCP sequence number of the packet */
68 	uint32_t sent_seq;
69 	/* IPv4 ID of the packet */
70 	uint16_t ip_id;
71 	/* the number of merged packets */
72 	uint16_t nb_merged;
73 	/* Indicate if IPv4 ID can be ignored */
74 	uint8_t is_atomic;
75 };
76 
77 /*
78  * TCP/IPv4 reassembly table structure.
79  */
80 struct gro_tcp4_tbl {
81 	/* item array */
82 	struct gro_tcp4_item *items;
83 	/* flow array */
84 	struct gro_tcp4_flow *flows;
85 	/* current item number */
86 	uint32_t item_num;
87 	/* current flow num */
88 	uint32_t flow_num;
89 	/* item array size */
90 	uint32_t max_item_num;
91 	/* flow array size */
92 	uint32_t max_flow_num;
93 };
94 
95 /**
96  * This function creates a TCP/IPv4 reassembly table.
97  *
98  * @param socket_id
99  *  Socket index for allocating the TCP/IPv4 reassemble table
100  * @param max_flow_num
101  *  The maximum number of flows in the TCP/IPv4 GRO table
102  * @param max_item_per_flow
103  *  The maximum number of packets per flow
104  *
105  * @return
106  *  - Return the table pointer on success.
107  *  - Return NULL on failure.
108  */
109 void *gro_tcp4_tbl_create(uint16_t socket_id,
110 		uint16_t max_flow_num,
111 		uint16_t max_item_per_flow);
112 
113 /**
114  * This function destroys a TCP/IPv4 reassembly table.
115  *
116  * @param tbl
117  *  Pointer pointing to the TCP/IPv4 reassembly table.
118  */
119 void gro_tcp4_tbl_destroy(void *tbl);
120 
121 /**
122  * This function merges a TCP/IPv4 packet. It doesn't process the packet,
123  * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
124  * payload.
125  *
126  * This function doesn't check if the packet has correct checksums and
127  * doesn't re-calculate checksums for the merged packet. Additionally,
128  * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
129  * when IP fragmentation is possible (i.e., DF==0). It returns the
130  * packet, if the packet has invalid parameters (e.g. SYN bit is set)
131  * or there is no available space in the table.
132  *
133  * @param pkt
134  *  Packet to reassemble
135  * @param tbl
136  *  Pointer pointing to the TCP/IPv4 reassembly table
137  * @start_time
138  *  The time when the packet is inserted into the table
139  *
140  * @return
141  *  - Return a positive value if the packet is merged.
142  *  - Return zero if the packet isn't merged but stored in the table.
143  *  - Return a negative value for invalid parameters or no available
144  *    space in the table.
145  */
146 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
147 		struct gro_tcp4_tbl *tbl,
148 		uint64_t start_time);
149 
150 /**
151  * This function flushes timeout packets in a TCP/IPv4 reassembly table,
152  * and without updating checksums.
153  *
154  * @param tbl
155  *  TCP/IPv4 reassembly table pointer
156  * @param flush_timestamp
157  *  Flush packets which are inserted into the table before or at the
158  *  flush_timestamp.
159  * @param out
160  *  Pointer array used to keep flushed packets
161  * @param nb_out
162  *  The element number in 'out'. It also determines the maximum number of
163  *  packets that can be flushed finally.
164  *
165  * @return
166  *  The number of flushed packets
167  */
168 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
169 		uint64_t flush_timestamp,
170 		struct rte_mbuf **out,
171 		uint16_t nb_out);
172 
173 /**
174  * This function returns the number of the packets in a TCP/IPv4
175  * reassembly table.
176  *
177  * @param tbl
178  *  TCP/IPv4 reassembly table pointer
179  *
180  * @return
181  *  The number of packets in the table
182  */
183 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
184 
185 /*
186  * Check if two TCP/IPv4 packets belong to the same flow.
187  */
188 static inline int
189 is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
190 {
191 	return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
192 			rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
193 			(k1.ip_src_addr == k2.ip_src_addr) &&
194 			(k1.ip_dst_addr == k2.ip_dst_addr) &&
195 			(k1.recv_ack == k2.recv_ack) &&
196 			(k1.src_port == k2.src_port) &&
197 			(k1.dst_port == k2.dst_port));
198 }
199 
200 /*
201  * Merge two TCP/IPv4 packets without updating checksums.
202  * If cmp is larger than 0, append the new packet to the
203  * original packet. Otherwise, pre-pend the new packet to
204  * the original packet.
205  */
206 static inline int
207 merge_two_tcp4_packets(struct gro_tcp4_item *item,
208 		struct rte_mbuf *pkt,
209 		int cmp,
210 		uint32_t sent_seq,
211 		uint16_t ip_id,
212 		uint16_t l2_offset)
213 {
214 	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
215 	uint16_t hdr_len, l2_len;
216 
217 	if (cmp > 0) {
218 		pkt_head = item->firstseg;
219 		pkt_tail = pkt;
220 	} else {
221 		pkt_head = pkt;
222 		pkt_tail = item->firstseg;
223 	}
224 
225 	/* check if the IPv4 packet length is greater than the max value */
226 	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
227 		pkt_head->l4_len;
228 	l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
229 	if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
230 				hdr_len > MAX_IPV4_PKT_LENGTH))
231 		return 0;
232 
233 	/* remove the packet header for the tail packet */
234 	rte_pktmbuf_adj(pkt_tail, hdr_len);
235 
236 	/* chain two packets together */
237 	if (cmp > 0) {
238 		item->lastseg->next = pkt;
239 		item->lastseg = rte_pktmbuf_lastseg(pkt);
240 		/* update IP ID to the larger value */
241 		item->ip_id = ip_id;
242 	} else {
243 		lastseg = rte_pktmbuf_lastseg(pkt);
244 		lastseg->next = item->firstseg;
245 		item->firstseg = pkt;
246 		/* update sent_seq to the smaller value */
247 		item->sent_seq = sent_seq;
248 		item->ip_id = ip_id;
249 	}
250 	item->nb_merged++;
251 
252 	/* update MBUF metadata for the merged packet */
253 	pkt_head->nb_segs += pkt_tail->nb_segs;
254 	pkt_head->pkt_len += pkt_tail->pkt_len;
255 
256 	return 1;
257 }
258 
259 /*
260  * Check if two TCP/IPv4 packets are neighbors.
261  */
262 static inline int
263 check_seq_option(struct gro_tcp4_item *item,
264 		struct rte_tcp_hdr *tcph,
265 		uint32_t sent_seq,
266 		uint16_t ip_id,
267 		uint16_t tcp_hl,
268 		uint16_t tcp_dl,
269 		uint16_t l2_offset,
270 		uint8_t is_atomic)
271 {
272 	struct rte_mbuf *pkt_orig = item->firstseg;
273 	struct rte_ipv4_hdr *iph_orig;
274 	struct rte_tcp_hdr *tcph_orig;
275 	uint16_t len, tcp_hl_orig;
276 
277 	iph_orig = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
278 			l2_offset + pkt_orig->l2_len);
279 	tcph_orig = (struct rte_tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
280 	tcp_hl_orig = pkt_orig->l4_len;
281 
282 	/* Check if TCP option fields equal */
283 	len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr);
284 	if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
285 				(memcmp(tcph + 1, tcph_orig + 1,
286 					len) != 0)))
287 		return 0;
288 
289 	/* Don't merge packets whose DF bits are different */
290 	if (unlikely(item->is_atomic ^ is_atomic))
291 		return 0;
292 
293 	/* check if the two packets are neighbors */
294 	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
295 		pkt_orig->l3_len - tcp_hl_orig;
296 	if ((sent_seq == item->sent_seq + len) && (is_atomic ||
297 				(ip_id == item->ip_id + 1)))
298 		/* append the new packet */
299 		return 1;
300 	else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
301 				(ip_id + item->nb_merged == item->ip_id)))
302 		/* pre-pend the new packet */
303 		return -1;
304 
305 	return 0;
306 }
307 #endif
308