xref: /dpdk/lib/gro/gro_vxlan_udp4.c (revision 7917b0d38e92e8b9ec5a870415b791420e10f11a)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Inspur Corporation
3  */
4 
5 #include <rte_malloc.h>
6 #include <rte_mbuf.h>
7 #include <rte_ethdev.h>
8 #include <rte_udp.h>
9 
10 #include "gro_vxlan_udp4.h"
11 
12 void *
13 gro_vxlan_udp4_tbl_create(uint16_t socket_id,
14 		uint16_t max_flow_num,
15 		uint16_t max_item_per_flow)
16 {
17 	struct gro_vxlan_udp4_tbl *tbl;
18 	size_t size;
19 	uint32_t entries_num, i;
20 
21 	entries_num = max_flow_num * max_item_per_flow;
22 	entries_num = RTE_MIN(entries_num, GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM);
23 
24 	if (entries_num == 0)
25 		return NULL;
26 
27 	tbl = rte_zmalloc_socket(__func__,
28 			sizeof(struct gro_vxlan_udp4_tbl),
29 			RTE_CACHE_LINE_SIZE,
30 			socket_id);
31 	if (tbl == NULL)
32 		return NULL;
33 
34 	size = sizeof(struct gro_vxlan_udp4_item) * entries_num;
35 	tbl->items = rte_zmalloc_socket(__func__,
36 			size,
37 			RTE_CACHE_LINE_SIZE,
38 			socket_id);
39 	if (tbl->items == NULL) {
40 		rte_free(tbl);
41 		return NULL;
42 	}
43 	tbl->max_item_num = entries_num;
44 
45 	size = sizeof(struct gro_vxlan_udp4_flow) * entries_num;
46 	tbl->flows = rte_zmalloc_socket(__func__,
47 			size,
48 			RTE_CACHE_LINE_SIZE,
49 			socket_id);
50 	if (tbl->flows == NULL) {
51 		rte_free(tbl->items);
52 		rte_free(tbl);
53 		return NULL;
54 	}
55 
56 	for (i = 0; i < entries_num; i++)
57 		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
58 	tbl->max_flow_num = entries_num;
59 
60 	return tbl;
61 }
62 
63 void
64 gro_vxlan_udp4_tbl_destroy(void *tbl)
65 {
66 	struct gro_vxlan_udp4_tbl *vxlan_tbl = tbl;
67 
68 	if (vxlan_tbl) {
69 		rte_free(vxlan_tbl->items);
70 		rte_free(vxlan_tbl->flows);
71 	}
72 	rte_free(vxlan_tbl);
73 }
74 
75 static inline uint32_t
76 find_an_empty_item(struct gro_vxlan_udp4_tbl *tbl)
77 {
78 	uint32_t max_item_num = tbl->max_item_num, i;
79 
80 	for (i = 0; i < max_item_num; i++)
81 		if (tbl->items[i].inner_item.firstseg == NULL)
82 			return i;
83 	return INVALID_ARRAY_INDEX;
84 }
85 
86 static inline uint32_t
87 find_an_empty_flow(struct gro_vxlan_udp4_tbl *tbl)
88 {
89 	uint32_t max_flow_num = tbl->max_flow_num, i;
90 
91 	for (i = 0; i < max_flow_num; i++)
92 		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
93 			return i;
94 	return INVALID_ARRAY_INDEX;
95 }
96 
97 static inline uint32_t
98 insert_new_item(struct gro_vxlan_udp4_tbl *tbl,
99 		struct rte_mbuf *pkt,
100 		uint64_t start_time,
101 		uint32_t prev_idx,
102 		uint16_t frag_offset,
103 		uint8_t is_last_frag)
104 {
105 	uint32_t item_idx;
106 
107 	item_idx = find_an_empty_item(tbl);
108 	if (unlikely(item_idx == INVALID_ARRAY_INDEX))
109 		return INVALID_ARRAY_INDEX;
110 
111 	tbl->items[item_idx].inner_item.firstseg = pkt;
112 	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
113 	tbl->items[item_idx].inner_item.start_time = start_time;
114 	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
115 	tbl->items[item_idx].inner_item.frag_offset = frag_offset;
116 	tbl->items[item_idx].inner_item.is_last_frag = is_last_frag;
117 	tbl->items[item_idx].inner_item.nb_merged = 1;
118 	tbl->item_num++;
119 
120 	/* If the previous packet exists, chain the new one with it. */
121 	if (prev_idx != INVALID_ARRAY_INDEX) {
122 		tbl->items[item_idx].inner_item.next_pkt_idx =
123 			tbl->items[prev_idx].inner_item.next_pkt_idx;
124 		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
125 	}
126 
127 	return item_idx;
128 }
129 
130 static inline uint32_t
131 delete_item(struct gro_vxlan_udp4_tbl *tbl,
132 		uint32_t item_idx,
133 		uint32_t prev_item_idx)
134 {
135 	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
136 
137 	/* NULL indicates an empty item. */
138 	tbl->items[item_idx].inner_item.firstseg = NULL;
139 	tbl->item_num--;
140 	if (prev_item_idx != INVALID_ARRAY_INDEX)
141 		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
142 
143 	return next_idx;
144 }
145 
146 static inline uint32_t
147 insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
148 		struct vxlan_udp4_flow_key *src,
149 		uint32_t item_idx)
150 {
151 	struct vxlan_udp4_flow_key *dst;
152 	uint32_t flow_idx;
153 
154 	flow_idx = find_an_empty_flow(tbl);
155 	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
156 		return INVALID_ARRAY_INDEX;
157 
158 	dst = &(tbl->flows[flow_idx].key);
159 
160 	rte_ether_addr_copy(&(src->inner_key.eth_saddr),
161 			&(dst->inner_key.eth_saddr));
162 	rte_ether_addr_copy(&(src->inner_key.eth_daddr),
163 			&(dst->inner_key.eth_daddr));
164 	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
165 	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
166 	dst->inner_key.ip_id = src->inner_key.ip_id;
167 
168 	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
169 	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
170 	rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
171 	rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
172 	dst->outer_ip_src_addr = src->outer_ip_src_addr;
173 	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
174 	dst->outer_dst_port = src->outer_dst_port;
175 
176 	tbl->flows[flow_idx].start_index = item_idx;
177 	tbl->flow_num++;
178 
179 	return flow_idx;
180 }
181 
182 static inline int
183 is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
184 		struct vxlan_udp4_flow_key k2)
185 {
186 	/* For VxLAN packet, outer udp src port is calculated from
187 	 * inner packet RSS hash, udp src port of the first UDP
188 	 * fragment is different from one of other UDP fragments
189 	 * even if they are same flow, so we have to skip outer udp
190 	 * src port comparison here.
191 	 */
192 	return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
193 					&k2.outer_eth_saddr) &&
194 			rte_is_same_ether_addr(&k1.outer_eth_daddr,
195 				&k2.outer_eth_daddr) &&
196 			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
197 			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
198 			(k1.outer_dst_port == k2.outer_dst_port) &&
199 			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
200 			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
201 			is_same_udp4_flow(k1.inner_key, k2.inner_key));
202 }
203 
204 static inline int
205 udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
206 		uint16_t frag_offset,
207 		uint16_t ip_dl)
208 {
209 	struct rte_mbuf *pkt = item->inner_item.firstseg;
210 	int cmp;
211 	uint16_t l2_offset;
212 	int ret = 0;
213 
214 	/* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
215 	 * we needn't compare outer_ip_id because they are same,
216 	 * for the case outer_is_atomic is 1, we also have no way
217 	 * to compare outer_ip_id because the difference between
218 	 * outer_ip_ids of two received packets isn't always +/-1.
219 	 * So skip outer_ip_id comparison here.
220 	 */
221 
222 	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
223 	cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
224 					l2_offset);
225 	if (cmp > 0)
226 		/* Append the new packet. */
227 		ret = 1;
228 	else if (cmp < 0)
229 		/* Prepend the new packet. */
230 		ret = -1;
231 
232 	return ret;
233 }
234 
235 static inline int
236 merge_two_vxlan_udp4_packets(struct gro_vxlan_udp4_item *item,
237 		struct rte_mbuf *pkt,
238 		int cmp,
239 		uint16_t frag_offset,
240 		uint8_t is_last_frag)
241 {
242 	if (merge_two_udp4_packets(&item->inner_item, pkt, cmp, frag_offset,
243 				is_last_frag,
244 				pkt->outer_l2_len + pkt->outer_l3_len)) {
245 		return 1;
246 	}
247 
248 	return 0;
249 }
250 
251 static inline void
252 update_vxlan_header(struct gro_vxlan_udp4_item *item)
253 {
254 	struct rte_ipv4_hdr *ipv4_hdr;
255 	struct rte_udp_hdr *udp_hdr;
256 	struct rte_mbuf *pkt = item->inner_item.firstseg;
257 	uint16_t len;
258 	uint16_t frag_offset;
259 
260 	/* Update the outer IPv4 header. */
261 	len = pkt->pkt_len - pkt->outer_l2_len;
262 	ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *,
263 					   pkt->outer_l2_len);
264 	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
265 
266 	/* Update the outer UDP header. */
267 	len -= pkt->outer_l3_len;
268 	udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
269 	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
270 
271 	/* Update the inner IPv4 header. */
272 	len -= pkt->l2_len;
273 	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
274 	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
275 
276 	/* Clear MF bit if it is last fragment */
277 	if (item->inner_item.is_last_frag) {
278 		frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
279 		ipv4_hdr->fragment_offset =
280 			rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG);
281 	}
282 }
283 
284 int32_t
285 gro_vxlan_udp4_reassemble(struct rte_mbuf *pkt,
286 		struct gro_vxlan_udp4_tbl *tbl,
287 		uint64_t start_time)
288 {
289 	struct rte_ether_hdr *outer_eth_hdr, *eth_hdr;
290 	struct rte_ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
291 	struct rte_udp_hdr *udp_hdr;
292 	struct rte_vxlan_hdr *vxlan_hdr;
293 	uint16_t frag_offset;
294 	uint8_t is_last_frag;
295 	int16_t ip_dl;
296 	uint16_t ip_id;
297 
298 	struct vxlan_udp4_flow_key key;
299 	uint32_t cur_idx, prev_idx, item_idx;
300 	uint32_t i, max_flow_num, remaining_flow_num;
301 	int cmp;
302 	uint16_t hdr_len;
303 	uint8_t find;
304 
305 	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
306 	outer_ipv4_hdr = (struct rte_ipv4_hdr *)((char *)outer_eth_hdr +
307 			pkt->outer_l2_len);
308 
309 	udp_hdr = (struct rte_udp_hdr *)((char *)outer_ipv4_hdr +
310 			pkt->outer_l3_len);
311 	vxlan_hdr = (struct rte_vxlan_hdr *)((char *)udp_hdr +
312 			sizeof(struct rte_udp_hdr));
313 	eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_hdr +
314 			sizeof(struct rte_vxlan_hdr));
315 	/* l2_len = outer udp hdr len + vxlan hdr len + inner l2 len */
316 	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
317 
318 	/*
319 	 * Don't process the packet which has non-fragment inner IP.
320 	 */
321 	if (!is_ipv4_fragment(ipv4_hdr))
322 		return -1;
323 
324 	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
325 		pkt->l3_len;
326 	/*
327 	 * Don't process the packet whose payload length is less than or
328 	 * equal to 0.
329 	 */
330 	if (pkt->pkt_len <= hdr_len)
331 		return -1;
332 
333 	ip_dl = pkt->pkt_len - hdr_len;
334 
335 	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
336 	frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
337 	is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0;
338 	frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3;
339 
340 	rte_ether_addr_copy(&(eth_hdr->src_addr), &(key.inner_key.eth_saddr));
341 	rte_ether_addr_copy(&(eth_hdr->dst_addr), &(key.inner_key.eth_daddr));
342 	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
343 	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
344 	key.inner_key.ip_id = ip_id;
345 
346 	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
347 	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
348 	rte_ether_addr_copy(&(outer_eth_hdr->src_addr), &(key.outer_eth_saddr));
349 	rte_ether_addr_copy(&(outer_eth_hdr->dst_addr), &(key.outer_eth_daddr));
350 	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
351 	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
352 	/* Note: It is unnecessary to save outer_src_port here because it can
353 	 * be different for VxLAN UDP fragments from the same flow.
354 	 */
355 	key.outer_dst_port = udp_hdr->dst_port;
356 
357 	/* Search for a matched flow. */
358 	max_flow_num = tbl->max_flow_num;
359 	remaining_flow_num = tbl->flow_num;
360 	find = 0;
361 	for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
362 		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
363 			if (is_same_vxlan_udp4_flow(tbl->flows[i].key, key)) {
364 				find = 1;
365 				break;
366 			}
367 			remaining_flow_num--;
368 		}
369 	}
370 
371 	/*
372 	 * Can't find a matched flow. Insert a new flow and store the
373 	 * packet into the flow.
374 	 */
375 	if (find == 0) {
376 		item_idx = insert_new_item(tbl, pkt, start_time,
377 				INVALID_ARRAY_INDEX, frag_offset,
378 				is_last_frag);
379 		if (unlikely(item_idx == INVALID_ARRAY_INDEX))
380 			return -1;
381 		if (insert_new_flow(tbl, &key, item_idx) ==
382 				INVALID_ARRAY_INDEX) {
383 			/*
384 			 * Fail to insert a new flow, so
385 			 * delete the inserted packet.
386 			 */
387 			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
388 			return -1;
389 		}
390 		return 0;
391 	}
392 
393 	/* Check all packets in the flow and try to find a neighbor. */
394 	cur_idx = tbl->flows[i].start_index;
395 	prev_idx = cur_idx;
396 	do {
397 		cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
398 				frag_offset, ip_dl);
399 		if (cmp) {
400 			if (merge_two_vxlan_udp4_packets(
401 						&(tbl->items[cur_idx]),
402 						pkt, cmp, frag_offset,
403 						is_last_frag)) {
404 				return 1;
405 			}
406 			/*
407 			 * Can't merge two packets, as the packet
408 			 * length will be greater than the max value.
409 			 * Insert the packet into the flow.
410 			 */
411 			if (insert_new_item(tbl, pkt, start_time, prev_idx,
412 						frag_offset, is_last_frag) ==
413 					INVALID_ARRAY_INDEX)
414 				return -1;
415 			return 0;
416 		}
417 
418 		/* Ensure inserted items are ordered by frag_offset */
419 		if (frag_offset
420 			< tbl->items[cur_idx].inner_item.frag_offset) {
421 			break;
422 		}
423 
424 		prev_idx = cur_idx;
425 		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
426 	} while (cur_idx != INVALID_ARRAY_INDEX);
427 
428 	/* Can't find neighbor. Insert the packet into the flow. */
429 	if (cur_idx == tbl->flows[i].start_index) {
430 		/* Insert it before the first packet of the flow */
431 		item_idx = insert_new_item(tbl, pkt, start_time,
432 				INVALID_ARRAY_INDEX, frag_offset,
433 				is_last_frag);
434 		if (unlikely(item_idx == INVALID_ARRAY_INDEX))
435 			return -1;
436 		tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
437 		tbl->flows[i].start_index = item_idx;
438 	} else {
439 		if (insert_new_item(tbl, pkt, start_time, prev_idx,
440 					frag_offset, is_last_frag
441 					) == INVALID_ARRAY_INDEX)
442 			return -1;
443 	}
444 
445 	return 0;
446 }
447 
448 static int
449 gro_vxlan_udp4_merge_items(struct gro_vxlan_udp4_tbl *tbl,
450 			   uint32_t start_idx)
451 {
452 	uint16_t frag_offset;
453 	uint8_t is_last_frag;
454 	int16_t ip_dl;
455 	struct rte_mbuf *pkt;
456 	int cmp;
457 	uint32_t item_idx;
458 	uint16_t hdr_len;
459 
460 	item_idx = tbl->items[start_idx].inner_item.next_pkt_idx;
461 	while (item_idx != INVALID_ARRAY_INDEX) {
462 		pkt = tbl->items[item_idx].inner_item.firstseg;
463 		hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
464 			pkt->l3_len;
465 		ip_dl = pkt->pkt_len - hdr_len;
466 		frag_offset = tbl->items[item_idx].inner_item.frag_offset;
467 		is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
468 		cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
469 					frag_offset, ip_dl);
470 		if (cmp) {
471 			if (merge_two_vxlan_udp4_packets(
472 					&(tbl->items[start_idx]),
473 					pkt, cmp, frag_offset,
474 					is_last_frag)) {
475 				item_idx = delete_item(tbl, item_idx,
476 							INVALID_ARRAY_INDEX);
477 				tbl->items[start_idx].inner_item.next_pkt_idx
478 					= item_idx;
479 			} else
480 				return 0;
481 		} else
482 			return 0;
483 	}
484 
485 	return 0;
486 }
487 
488 uint16_t
489 gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
490 		uint64_t flush_timestamp,
491 		struct rte_mbuf **out,
492 		uint16_t nb_out)
493 {
494 	uint16_t k = 0;
495 	uint32_t i, j;
496 	uint32_t max_flow_num = tbl->max_flow_num;
497 
498 	for (i = 0; i < max_flow_num; i++) {
499 		if (unlikely(tbl->flow_num == 0))
500 			return k;
501 
502 		j = tbl->flows[i].start_index;
503 		while (j != INVALID_ARRAY_INDEX) {
504 			if (tbl->items[j].inner_item.start_time <=
505 					flush_timestamp) {
506 				gro_vxlan_udp4_merge_items(tbl, j);
507 				out[k++] = tbl->items[j].inner_item.firstseg;
508 				if (tbl->items[j].inner_item.nb_merged > 1)
509 					update_vxlan_header(&(tbl->items[j]));
510 				/*
511 				 * Delete the item and get the next packet
512 				 * index.
513 				 */
514 				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
515 				tbl->flows[i].start_index = j;
516 				if (j == INVALID_ARRAY_INDEX)
517 					tbl->flow_num--;
518 
519 				if (unlikely(k == nb_out))
520 					return k;
521 			} else
522 				/*
523 				 * Flushing packets does not strictly follow
524 				 * timestamp. It does not flush left packets of
525 				 * the flow this time once it finds one item
526 				 * whose start_time is greater than
527 				 * flush_timestamp. So go to check other flows.
528 				 */
529 				break;
530 		}
531 	}
532 	return k;
533 }
534 
535 uint32_t
536 gro_vxlan_udp4_tbl_pkt_count(void *tbl)
537 {
538 	struct gro_vxlan_udp4_tbl *gro_tbl = tbl;
539 
540 	if (gro_tbl)
541 		return gro_tbl->item_num;
542 
543 	return 0;
544 }
545