xref: /dpdk/examples/l3fwd/l3fwd_em.c (revision d5c4897ecfb2540dc4990d9b367ddbe5013d0e66)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdint.h>
8 #include <inttypes.h>
9 #include <sys/types.h>
10 #include <string.h>
11 #include <sys/queue.h>
12 #include <stdarg.h>
13 #include <errno.h>
14 #include <getopt.h>
15 #include <stdbool.h>
16 #include <netinet/in.h>
17 
18 #include <rte_debug.h>
19 #include <rte_ether.h>
20 #include <rte_ethdev.h>
21 #include <rte_cycles.h>
22 #include <rte_mbuf.h>
23 #include <rte_ip.h>
24 #include <rte_tcp.h>
25 #include <rte_udp.h>
26 #include <rte_hash.h>
27 
28 #include "l3fwd.h"
29 #include "l3fwd_event.h"
30 #include "em_route_parse.c"
31 
32 #if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32)
33 #define EM_HASH_CRC 1
34 #endif
35 
36 #ifdef EM_HASH_CRC
37 #include <rte_hash_crc.h>
38 #define DEFAULT_HASH_FUNC       rte_hash_crc
39 #else
40 #include <rte_jhash.h>
41 #define DEFAULT_HASH_FUNC       rte_jhash
42 #endif
43 
44 #define IPV6_ADDR_LEN 16
45 
46 union ipv4_5tuple_host {
47 	struct {
48 		uint8_t  pad0;
49 		uint8_t  proto;
50 		uint16_t pad1;
51 		uint32_t ip_src;
52 		uint32_t ip_dst;
53 		uint16_t port_src;
54 		uint16_t port_dst;
55 	};
56 	xmm_t xmm;
57 };
58 
59 #define XMM_NUM_IN_IPV6_5TUPLE 3
60 
61 union ipv6_5tuple_host {
62 	struct {
63 		uint16_t pad0;
64 		uint8_t  proto;
65 		uint8_t  pad1;
66 		uint8_t  ip_src[IPV6_ADDR_LEN];
67 		uint8_t  ip_dst[IPV6_ADDR_LEN];
68 		uint16_t port_src;
69 		uint16_t port_dst;
70 		uint64_t reserve;
71 	};
72 	xmm_t xmm[XMM_NUM_IN_IPV6_5TUPLE];
73 };
74 
75 /* 198.18.0.0/16 are set aside for RFC2544 benchmarking (RFC5735).
76  * Use RFC863 Discard Protocol.
77  */
78 const struct ipv4_l3fwd_em_route ipv4_l3fwd_em_route_array[] = {
79 	{{RTE_IPV4(198, 18, 0, 0), RTE_IPV4(198, 18, 0, 1),  9, 9, IPPROTO_UDP}, 0},
80 	{{RTE_IPV4(198, 18, 1, 0), RTE_IPV4(198, 18, 1, 1),  9, 9, IPPROTO_UDP}, 1},
81 	{{RTE_IPV4(198, 18, 2, 0), RTE_IPV4(198, 18, 2, 1),  9, 9, IPPROTO_UDP}, 2},
82 	{{RTE_IPV4(198, 18, 3, 0), RTE_IPV4(198, 18, 3, 1),  9, 9, IPPROTO_UDP}, 3},
83 	{{RTE_IPV4(198, 18, 4, 0), RTE_IPV4(198, 18, 4, 1),  9, 9, IPPROTO_UDP}, 4},
84 	{{RTE_IPV4(198, 18, 5, 0), RTE_IPV4(198, 18, 5, 1),  9, 9, IPPROTO_UDP}, 5},
85 	{{RTE_IPV4(198, 18, 6, 0), RTE_IPV4(198, 18, 6, 1),  9, 9, IPPROTO_UDP}, 6},
86 	{{RTE_IPV4(198, 18, 7, 0), RTE_IPV4(198, 18, 7, 1),  9, 9, IPPROTO_UDP}, 7},
87 	{{RTE_IPV4(198, 18, 8, 0), RTE_IPV4(198, 18, 8, 1),  9, 9, IPPROTO_UDP}, 8},
88 	{{RTE_IPV4(198, 18, 9, 0), RTE_IPV4(198, 18, 9, 1),  9, 9, IPPROTO_UDP}, 9},
89 	{{RTE_IPV4(198, 18, 10, 0), RTE_IPV4(198, 18, 10, 1),  9, 9, IPPROTO_UDP}, 10},
90 	{{RTE_IPV4(198, 18, 11, 0), RTE_IPV4(198, 18, 11, 1),  9, 9, IPPROTO_UDP}, 11},
91 	{{RTE_IPV4(198, 18, 12, 0), RTE_IPV4(198, 18, 12, 1),  9, 9, IPPROTO_UDP}, 12},
92 	{{RTE_IPV4(198, 18, 13, 0), RTE_IPV4(198, 18, 13, 1),  9, 9, IPPROTO_UDP}, 13},
93 	{{RTE_IPV4(198, 18, 14, 0), RTE_IPV4(198, 18, 14, 1),  9, 9, IPPROTO_UDP}, 14},
94 	{{RTE_IPV4(198, 18, 15, 0), RTE_IPV4(198, 18, 15, 1),  9, 9, IPPROTO_UDP}, 15},
95 };
96 
97 /* 2001:0200::/48 is IANA reserved range for IPv6 benchmarking (RFC5180).
98  * Use RFC863 Discard Protocol.
99  */
100 const struct ipv6_l3fwd_em_route ipv6_l3fwd_em_route_array[] = {
101 	{{{32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
102 	  {32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 0},
103 	{{{32, 1, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
104 	  {32, 1, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 1},
105 	{{{32, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0},
106 	  {32, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 2},
107 	{{{32, 1, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0},
108 	  {32, 1, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 3},
109 	{{{32, 1, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0},
110 	  {32, 1, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 4},
111 	{{{32, 1, 2, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0},
112 	  {32, 1, 2, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 5},
113 	{{{32, 1, 2, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0},
114 	  {32, 1, 2, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 6},
115 	{{{32, 1, 2, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0},
116 	  {32, 1, 2, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 7},
117 	{{{32, 1, 2, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0},
118 	  {32, 1, 2, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 8},
119 	{{{32, 1, 2, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0},
120 	  {32, 1, 2, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 9},
121 	{{{32, 1, 2, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0},
122 	  {32, 1, 2, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 10},
123 	{{{32, 1, 2, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0},
124 	  {32, 1, 2, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 11},
125 	{{{32, 1, 2, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0},
126 	  {32, 1, 2, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 12},
127 	{{{32, 1, 2, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0},
128 	  {32, 1, 2, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 13},
129 	{{{32, 1, 2, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0},
130 	  {32, 1, 2, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 14},
131 	{{{32, 1, 2, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0},
132 	  {32, 1, 2, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 1}, 9, 9, IPPROTO_UDP}, 15},
133 };
134 
135 struct rte_hash *ipv4_l3fwd_em_lookup_struct[NB_SOCKETS];
136 struct rte_hash *ipv6_l3fwd_em_lookup_struct[NB_SOCKETS];
137 
138 static inline uint32_t
139 ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
140 		uint32_t init_val)
141 {
142 	const union ipv4_5tuple_host *k;
143 	uint32_t t;
144 	const uint32_t *p;
145 
146 	k = data;
147 	t = k->proto;
148 	p = (const uint32_t *)&k->port_src;
149 
150 #ifdef EM_HASH_CRC
151 	init_val = rte_hash_crc_4byte(t, init_val);
152 	init_val = rte_hash_crc_4byte(k->ip_src, init_val);
153 	init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
154 	init_val = rte_hash_crc_4byte(*p, init_val);
155 #else
156 	init_val = rte_jhash_1word(t, init_val);
157 	init_val = rte_jhash_1word(k->ip_src, init_val);
158 	init_val = rte_jhash_1word(k->ip_dst, init_val);
159 	init_val = rte_jhash_1word(*p, init_val);
160 #endif
161 
162 	return init_val;
163 }
164 
165 static inline uint32_t
166 ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
167 		uint32_t init_val)
168 {
169 	const union ipv6_5tuple_host *k;
170 	uint32_t t;
171 	const uint32_t *p;
172 #ifdef EM_HASH_CRC
173 	const uint32_t  *ip_src0, *ip_src1, *ip_src2, *ip_src3;
174 	const uint32_t  *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
175 #endif
176 
177 	k = data;
178 	t = k->proto;
179 	p = (const uint32_t *)&k->port_src;
180 
181 #ifdef EM_HASH_CRC
182 	ip_src0 = (const uint32_t *) k->ip_src;
183 	ip_src1 = (const uint32_t *)(k->ip_src+4);
184 	ip_src2 = (const uint32_t *)(k->ip_src+8);
185 	ip_src3 = (const uint32_t *)(k->ip_src+12);
186 	ip_dst0 = (const uint32_t *) k->ip_dst;
187 	ip_dst1 = (const uint32_t *)(k->ip_dst+4);
188 	ip_dst2 = (const uint32_t *)(k->ip_dst+8);
189 	ip_dst3 = (const uint32_t *)(k->ip_dst+12);
190 	init_val = rte_hash_crc_4byte(t, init_val);
191 	init_val = rte_hash_crc_4byte(*ip_src0, init_val);
192 	init_val = rte_hash_crc_4byte(*ip_src1, init_val);
193 	init_val = rte_hash_crc_4byte(*ip_src2, init_val);
194 	init_val = rte_hash_crc_4byte(*ip_src3, init_val);
195 	init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
196 	init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
197 	init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
198 	init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
199 	init_val = rte_hash_crc_4byte(*p, init_val);
200 #else
201 	init_val = rte_jhash_1word(t, init_val);
202 	init_val = rte_jhash(k->ip_src,
203 			sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
204 	init_val = rte_jhash(k->ip_dst,
205 			sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
206 	init_val = rte_jhash_1word(*p, init_val);
207 #endif
208 	return init_val;
209 }
210 
211 static alignas(RTE_CACHE_LINE_SIZE) uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES];
212 static alignas(RTE_CACHE_LINE_SIZE) uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES];
213 
214 static rte_xmm_t mask0;
215 static rte_xmm_t mask1;
216 static rte_xmm_t mask2;
217 
218 #if defined(__SSE2__)
219 static inline xmm_t
220 em_mask_key(void *key, xmm_t mask)
221 {
222 	__m128i data = _mm_loadu_si128((__m128i *)(key));
223 
224 	return _mm_and_si128(data, mask);
225 }
226 #elif defined(__ARM_NEON)
227 static inline xmm_t
228 em_mask_key(void *key, xmm_t mask)
229 {
230 	int32x4_t data = vld1q_s32((int32_t *)key);
231 
232 	return vandq_s32(data, mask);
233 }
234 #elif defined(__ALTIVEC__)
235 static inline xmm_t
236 em_mask_key(void *key, xmm_t mask)
237 {
238 	xmm_t data = vec_ld(0, (xmm_t *)(key));
239 
240 	return vec_and(data, mask);
241 }
242 #elif defined(RTE_ARCH_RISCV)
243 static inline xmm_t
244 em_mask_key(void *key, xmm_t mask)
245 {
246 	xmm_t data = vect_load_128(key);
247 
248 	return vect_and(data, mask);
249 }
250 #elif defined(RTE_ARCH_LOONGARCH)
251 static inline xmm_t
252 em_mask_key(void *key, xmm_t mask)
253 {
254 	xmm_t data = vect_load_128(key);
255 
256 	return vect_and(data, mask);
257 }
258 #else
259 #error No vector engine (SSE, NEON, ALTIVEC) available, check your toolchain
260 #endif
261 
262 /* Performing hash-based lookups. 8< */
263 static inline uint16_t
264 em_get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid, void *lookup_struct)
265 {
266 	int ret = 0;
267 	union ipv4_5tuple_host key;
268 	struct rte_hash *ipv4_l3fwd_lookup_struct =
269 		(struct rte_hash *)lookup_struct;
270 
271 	ipv4_hdr = (uint8_t *)ipv4_hdr +
272 		offsetof(struct rte_ipv4_hdr, time_to_live);
273 
274 	/*
275 	 * Get 5 tuple: dst port, src port, dst IP address,
276 	 * src IP address and protocol.
277 	 */
278 	key.xmm = em_mask_key(ipv4_hdr, mask0.x);
279 
280 	/* Find destination port */
281 	ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
282 	return (ret < 0) ? portid : ipv4_l3fwd_out_if[ret];
283 }
284 /* >8 End of performing hash-based lookups. */
285 
286 static inline uint16_t
287 em_get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, void *lookup_struct)
288 {
289 	int ret = 0;
290 	union ipv6_5tuple_host key;
291 	struct rte_hash *ipv6_l3fwd_lookup_struct =
292 		(struct rte_hash *)lookup_struct;
293 
294 	ipv6_hdr = (uint8_t *)ipv6_hdr +
295 		offsetof(struct rte_ipv6_hdr, payload_len);
296 	void *data0 = ipv6_hdr;
297 	void *data1 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t);
298 	void *data2 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t) + sizeof(xmm_t);
299 
300 	/* Get part of 5 tuple: src IP address lower 96 bits and protocol */
301 	key.xmm[0] = em_mask_key(data0, mask1.x);
302 
303 	/*
304 	 * Get part of 5 tuple: dst IP address lower 96 bits
305 	 * and src IP address higher 32 bits.
306 	 */
307 #if defined RTE_ARCH_X86
308 	key.xmm[1] = _mm_loadu_si128(data1);
309 #else
310 	key.xmm[1] = *(xmm_t *)data1;
311 #endif
312 
313 	/*
314 	 * Get part of 5 tuple: dst port and src port
315 	 * and dst IP address higher 32 bits.
316 	 */
317 	key.xmm[2] = em_mask_key(data2, mask2.x);
318 
319 	/* Find destination port */
320 	ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
321 	return (ret < 0) ? portid : ipv6_l3fwd_out_if[ret];
322 }
323 
324 #if defined RTE_ARCH_X86 || defined __ARM_NEON
325 #if defined(NO_HASH_MULTI_LOOKUP)
326 #include "l3fwd_em_sequential.h"
327 #else
328 #include "l3fwd_em_hlm.h"
329 #endif
330 #else
331 #include "l3fwd_em.h"
332 #endif
333 
334 static void
335 convert_ipv4_5tuple(struct ipv4_5tuple *key1,
336 		union ipv4_5tuple_host *key2)
337 {
338 	key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst);
339 	key2->ip_src = rte_cpu_to_be_32(key1->ip_src);
340 	key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
341 	key2->port_src = rte_cpu_to_be_16(key1->port_src);
342 	key2->proto = key1->proto;
343 	key2->pad0 = 0;
344 	key2->pad1 = 0;
345 }
346 
347 static void
348 convert_ipv6_5tuple(struct ipv6_5tuple *key1,
349 		union ipv6_5tuple_host *key2)
350 {
351 	uint32_t i;
352 
353 	for (i = 0; i < 16; i++) {
354 		key2->ip_dst[i] = key1->ip_dst[i];
355 		key2->ip_src[i] = key1->ip_src[i];
356 	}
357 	key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
358 	key2->port_src = rte_cpu_to_be_16(key1->port_src);
359 	key2->proto = key1->proto;
360 	key2->pad0 = 0;
361 	key2->pad1 = 0;
362 	key2->reserve = 0;
363 }
364 
365 #define BYTE_VALUE_MAX 256
366 #define ALL_32_BITS 0xffffffff
367 #define BIT_8_TO_15 0x0000ff00
368 
369 static inline void
370 populate_ipv4_flow_into_table(const struct rte_hash *h)
371 {
372 	int i;
373 	int32_t ret;
374 	struct rte_eth_dev_info dev_info;
375 	char srcbuf[INET6_ADDRSTRLEN];
376 	char dstbuf[INET6_ADDRSTRLEN];
377 
378 	mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS,
379 				ALL_32_BITS, ALL_32_BITS} };
380 
381 	for (i = 0; i < route_num_v4; i++) {
382 		struct em_rule *entry;
383 		union ipv4_5tuple_host newkey;
384 		struct in_addr src;
385 		struct in_addr dst;
386 
387 		if ((1 << em_route_base_v4[i].if_out &
388 				enabled_port_mask) == 0)
389 			continue;
390 
391 		entry = &em_route_base_v4[i];
392 		convert_ipv4_5tuple(&(entry->v4_key), &newkey);
393 		ret = rte_hash_add_key(h, (void *) &newkey);
394 		if (ret < 0) {
395 			rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
396 				" to the l3fwd hash.\n", i);
397 		}
398 		ipv4_l3fwd_out_if[ret] = entry->if_out;
399 		ret = rte_eth_dev_info_get(em_route_base_v4[i].if_out,
400 				     &dev_info);
401 		if (ret != 0)
402 			rte_exit(EXIT_FAILURE,
403 				"Error during getting device (port %u) info: %s\n",
404 				em_route_base_v4[i].if_out, strerror(-ret));
405 
406 		src.s_addr = htonl(em_route_base_v4[i].v4_key.ip_src);
407 		dst.s_addr = htonl(em_route_base_v4[i].v4_key.ip_dst);
408 		printf("EM: Adding route %s, %s, %d, %d, %d (%d) [%s]\n",
409 			   inet_ntop(AF_INET, &dst, dstbuf, sizeof(dstbuf)),
410 		       inet_ntop(AF_INET, &src, srcbuf, sizeof(srcbuf)),
411 			   em_route_base_v4[i].v4_key.port_dst,
412 			   em_route_base_v4[i].v4_key.port_src,
413 			   em_route_base_v4[i].v4_key.proto,
414 		       em_route_base_v4[i].if_out, rte_dev_name(dev_info.device));
415 	}
416 	printf("Hash: Adding 0x%" PRIx64 " keys\n",
417 		(uint64_t)route_num_v4);
418 }
419 
420 #define BIT_16_TO_23 0x00ff0000
421 static inline void
422 populate_ipv6_flow_into_table(const struct rte_hash *h)
423 {
424 	int i;
425 	int32_t ret;
426 	struct rte_eth_dev_info dev_info;
427 	char srcbuf[INET6_ADDRSTRLEN];
428 	char dstbuf[INET6_ADDRSTRLEN];
429 
430 	mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS,
431 				ALL_32_BITS, ALL_32_BITS} };
432 
433 	mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} };
434 
435 	for (i = 0; i < route_num_v6; i++) {
436 		struct em_rule *entry;
437 		union ipv6_5tuple_host newkey;
438 
439 		if ((1 << em_route_base_v6[i].if_out &
440 				enabled_port_mask) == 0)
441 			continue;
442 
443 		entry = &em_route_base_v6[i];
444 		convert_ipv6_5tuple(&(entry->v6_key), &newkey);
445 		ret = rte_hash_add_key(h, (void *) &newkey);
446 		if (ret < 0) {
447 			rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
448 				" to the l3fwd hash.\n", i);
449 		}
450 		ipv6_l3fwd_out_if[ret] = entry->if_out;
451 		ret = rte_eth_dev_info_get(em_route_base_v6[i].if_out,
452 				     &dev_info);
453 		if (ret != 0)
454 			rte_exit(EXIT_FAILURE,
455 				"Error during getting device (port %u) info: %s\n",
456 				em_route_base_v6[i].if_out, strerror(-ret));
457 
458 		printf("EM: Adding route %s, %s, %d, %d, %d (%d) [%s]\n",
459 			   inet_ntop(AF_INET6, em_route_base_v6[i].v6_key.ip_dst,
460 			   dstbuf, sizeof(dstbuf)),
461 		       inet_ntop(AF_INET6, em_route_base_v6[i].v6_key.ip_src,
462 			   srcbuf, sizeof(srcbuf)),
463 			   em_route_base_v6[i].v6_key.port_dst,
464 			   em_route_base_v6[i].v6_key.port_src,
465 			   em_route_base_v6[i].v6_key.proto,
466 		       em_route_base_v6[i].if_out, rte_dev_name(dev_info.device));
467 	}
468 	printf("Hash: Adding 0x%" PRIx64 "keys\n",
469 		(uint64_t)route_num_v6);
470 }
471 
472 /* Requirements:
473  * 1. IP packets without extension;
474  * 2. L4 payload should be either TCP or UDP.
475  */
476 int
477 em_check_ptype(int portid)
478 {
479 	int i, ret;
480 	int ptype_l3_ipv4_ext = 0;
481 	int ptype_l3_ipv6_ext = 0;
482 	int ptype_l4_tcp = 0;
483 	int ptype_l4_udp = 0;
484 	uint32_t ptype_mask = RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK;
485 
486 	ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
487 	if (ret <= 0)
488 		return 0;
489 
490 	uint32_t ptypes[ret];
491 
492 	ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
493 	for (i = 0; i < ret; ++i) {
494 		switch (ptypes[i]) {
495 		case RTE_PTYPE_L3_IPV4_EXT:
496 			ptype_l3_ipv4_ext = 1;
497 			break;
498 		case RTE_PTYPE_L3_IPV6_EXT:
499 			ptype_l3_ipv6_ext = 1;
500 			break;
501 		case RTE_PTYPE_L4_TCP:
502 			ptype_l4_tcp = 1;
503 			break;
504 		case RTE_PTYPE_L4_UDP:
505 			ptype_l4_udp = 1;
506 			break;
507 		}
508 	}
509 
510 	if (!ipv6 && !ptype_l3_ipv4_ext) {
511 		printf("port %d cannot parse RTE_PTYPE_L3_IPV4_EXT\n", portid);
512 		return 0;
513 	}
514 	if (ipv6 && !ptype_l3_ipv6_ext) {
515 		printf("port %d cannot parse RTE_PTYPE_L3_IPV6_EXT\n", portid);
516 		return 0;
517 	}
518 
519 	if (ptype_l4_tcp == 0)
520 		printf("port %d cannot parse RTE_PTYPE_L4_TCP\n", portid);
521 	if (ptype_l4_udp == 0)
522 		printf("port %d cannot parse RTE_PTYPE_L4_UDP\n", portid);
523 	if (ptype_l4_tcp && ptype_l4_udp)
524 		return 1;
525 
526 	return 0;
527 }
528 
529 static inline void
530 em_parse_ptype(struct rte_mbuf *m)
531 {
532 	struct rte_ether_hdr *eth_hdr;
533 	uint32_t packet_type = RTE_PTYPE_UNKNOWN;
534 	uint16_t ether_type;
535 	void *l3;
536 	int hdr_len;
537 	struct rte_ipv4_hdr *ipv4_hdr;
538 	struct rte_ipv6_hdr *ipv6_hdr;
539 
540 	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
541 	ether_type = eth_hdr->ether_type;
542 	l3 = (uint8_t *)eth_hdr + sizeof(struct rte_ether_hdr);
543 	if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
544 		ipv4_hdr = (struct rte_ipv4_hdr *)l3;
545 		hdr_len = rte_ipv4_hdr_len(ipv4_hdr);
546 		if (hdr_len == sizeof(struct rte_ipv4_hdr)) {
547 			packet_type |= RTE_PTYPE_L3_IPV4;
548 			if (ipv4_hdr->next_proto_id == IPPROTO_TCP)
549 				packet_type |= RTE_PTYPE_L4_TCP;
550 			else if (ipv4_hdr->next_proto_id == IPPROTO_UDP)
551 				packet_type |= RTE_PTYPE_L4_UDP;
552 		} else
553 			packet_type |= RTE_PTYPE_L3_IPV4_EXT;
554 	} else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) {
555 		ipv6_hdr = (struct rte_ipv6_hdr *)l3;
556 		if (ipv6_hdr->proto == IPPROTO_TCP)
557 			packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP;
558 		else if (ipv6_hdr->proto == IPPROTO_UDP)
559 			packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP;
560 		else
561 			packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
562 	}
563 
564 	m->packet_type = packet_type;
565 }
566 
567 uint16_t
568 em_cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused,
569 		  struct rte_mbuf *pkts[], uint16_t nb_pkts,
570 		  uint16_t max_pkts __rte_unused,
571 		  void *user_param __rte_unused)
572 {
573 	unsigned i;
574 
575 	for (i = 0; i < nb_pkts; ++i)
576 		em_parse_ptype(pkts[i]);
577 
578 	return nb_pkts;
579 }
580 
581 /* main processing loop */
582 int
583 em_main_loop(__rte_unused void *dummy)
584 {
585 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
586 	unsigned lcore_id;
587 	uint64_t prev_tsc, diff_tsc, cur_tsc;
588 	int i, nb_rx;
589 	uint16_t queueid;
590 	uint16_t portid;
591 	struct lcore_conf *qconf;
592 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
593 		US_PER_S * BURST_TX_DRAIN_US;
594 
595 	lcore_id = rte_lcore_id();
596 	qconf = &lcore_conf[lcore_id];
597 
598 	const uint16_t n_rx_q = qconf->n_rx_queue;
599 	const uint16_t n_tx_p = qconf->n_tx_port;
600 	if (n_rx_q == 0) {
601 		RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
602 		return 0;
603 	}
604 
605 	RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
606 
607 	for (i = 0; i < n_rx_q; i++) {
608 
609 		portid = qconf->rx_queue_list[i].port_id;
610 		queueid = qconf->rx_queue_list[i].queue_id;
611 		RTE_LOG(INFO, L3FWD,
612 			" -- lcoreid=%u portid=%u rxqueueid=%" PRIu16 "\n",
613 			lcore_id, portid, queueid);
614 	}
615 
616 	cur_tsc = rte_rdtsc();
617 	prev_tsc = cur_tsc;
618 
619 	while (!force_quit) {
620 
621 		/*
622 		 * TX burst queue drain
623 		 */
624 		diff_tsc = cur_tsc - prev_tsc;
625 		if (unlikely(diff_tsc > drain_tsc)) {
626 
627 			for (i = 0; i < n_tx_p; ++i) {
628 				portid = qconf->tx_port_id[i];
629 				if (qconf->tx_mbufs[portid].len == 0)
630 					continue;
631 				send_burst(qconf,
632 					qconf->tx_mbufs[portid].len,
633 					portid);
634 				qconf->tx_mbufs[portid].len = 0;
635 			}
636 
637 			prev_tsc = cur_tsc;
638 		}
639 
640 		/*
641 		 * Read packet from RX queues
642 		 */
643 		for (i = 0; i < n_rx_q; ++i) {
644 			portid = qconf->rx_queue_list[i].port_id;
645 			queueid = qconf->rx_queue_list[i].queue_id;
646 			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
647 				nb_pkt_per_burst);
648 			if (nb_rx == 0)
649 				continue;
650 
651 #if defined RTE_ARCH_X86 || defined __ARM_NEON
652 			l3fwd_em_send_packets(nb_rx, pkts_burst,
653 							portid, qconf);
654 #else
655 			l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst,
656 							portid, qconf);
657 #endif
658 		}
659 
660 		cur_tsc = rte_rdtsc();
661 	}
662 
663 	return 0;
664 }
665 
666 #ifdef RTE_LIB_EVENTDEV
667 static __rte_always_inline void
668 em_event_loop_single(struct l3fwd_event_resources *evt_rsrc,
669 		const uint8_t flags)
670 {
671 	const int event_p_id = l3fwd_get_free_event_port(evt_rsrc);
672 	const uint8_t tx_q_id = evt_rsrc->evq.event_q_id[
673 		evt_rsrc->evq.nb_queues - 1];
674 	const uint8_t event_d_id = evt_rsrc->event_d_id;
675 	uint8_t deq = 0, enq = 0;
676 	struct lcore_conf *lconf;
677 	unsigned int lcore_id;
678 	struct rte_event ev;
679 
680 	if (event_p_id < 0)
681 		return;
682 
683 	lcore_id = rte_lcore_id();
684 	lconf = &lcore_conf[lcore_id];
685 
686 	RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
687 	while (!force_quit) {
688 		deq = rte_event_dequeue_burst(event_d_id, event_p_id, &ev, 1,
689 					      0);
690 		if (!deq)
691 			continue;
692 
693 		struct rte_mbuf *mbuf = ev.mbuf;
694 
695 #if defined RTE_ARCH_X86 || defined __ARM_NEON
696 		mbuf->port = em_get_dst_port(lconf, mbuf, mbuf->port);
697 		process_packet(mbuf, &mbuf->port);
698 #else
699 		l3fwd_em_simple_process(mbuf, lconf);
700 #endif
701 		if (mbuf->port == BAD_PORT) {
702 			rte_pktmbuf_free(mbuf);
703 			continue;
704 		}
705 
706 		if (flags & L3FWD_EVENT_TX_ENQ) {
707 			ev.queue_id = tx_q_id;
708 			ev.op = RTE_EVENT_OP_FORWARD;
709 			do {
710 				enq = rte_event_enqueue_burst(
711 					event_d_id, event_p_id, &ev, 1);
712 			} while (!enq && !force_quit);
713 		}
714 
715 		if (flags & L3FWD_EVENT_TX_DIRECT) {
716 			rte_event_eth_tx_adapter_txq_set(mbuf, 0);
717 			do {
718 				enq = rte_event_eth_tx_adapter_enqueue(
719 					event_d_id, event_p_id, &ev, 1, 0);
720 			} while (!enq && !force_quit);
721 		}
722 	}
723 
724 	l3fwd_event_worker_cleanup(event_d_id, event_p_id, &ev, enq, deq, 0);
725 }
726 
727 static __rte_always_inline void
728 em_event_loop_burst(struct l3fwd_event_resources *evt_rsrc,
729 		const uint8_t flags)
730 {
731 	const int event_p_id = l3fwd_get_free_event_port(evt_rsrc);
732 	const uint8_t tx_q_id = evt_rsrc->evq.event_q_id[
733 		evt_rsrc->evq.nb_queues - 1];
734 	const uint8_t event_d_id = evt_rsrc->event_d_id;
735 	const uint16_t deq_len = evt_rsrc->deq_depth;
736 	struct rte_event events[MAX_PKT_BURST];
737 	int i, nb_enq = 0, nb_deq = 0;
738 	struct lcore_conf *lconf;
739 	unsigned int lcore_id;
740 
741 	if (event_p_id < 0)
742 		return;
743 
744 	lcore_id = rte_lcore_id();
745 
746 	lconf = &lcore_conf[lcore_id];
747 
748 	RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
749 
750 	while (!force_quit) {
751 		/* Read events from RX queues */
752 		nb_deq = rte_event_dequeue_burst(event_d_id, event_p_id,
753 				events, deq_len, 0);
754 		if (nb_deq == 0) {
755 			rte_pause();
756 			continue;
757 		}
758 
759 #if defined RTE_ARCH_X86 || defined __ARM_NEON
760 		l3fwd_em_process_events(nb_deq, (struct rte_event **)&events,
761 					lconf);
762 #else
763 		l3fwd_em_no_opt_process_events(nb_deq,
764 					       (struct rte_event **)&events,
765 					       lconf);
766 #endif
767 		for (i = 0; i < nb_deq; i++) {
768 			if (flags & L3FWD_EVENT_TX_ENQ) {
769 				events[i].queue_id = tx_q_id;
770 				events[i].op = RTE_EVENT_OP_FORWARD;
771 			}
772 
773 			if (flags & L3FWD_EVENT_TX_DIRECT)
774 				rte_event_eth_tx_adapter_txq_set(events[i].mbuf,
775 								 0);
776 		}
777 
778 		if (flags & L3FWD_EVENT_TX_ENQ) {
779 			nb_enq = rte_event_enqueue_burst(event_d_id, event_p_id,
780 					events, nb_deq);
781 			while (nb_enq < nb_deq && !force_quit)
782 				nb_enq += rte_event_enqueue_burst(event_d_id,
783 						event_p_id, events + nb_enq,
784 						nb_deq - nb_enq);
785 		}
786 
787 		if (flags & L3FWD_EVENT_TX_DIRECT) {
788 			nb_enq = rte_event_eth_tx_adapter_enqueue(event_d_id,
789 					event_p_id, events, nb_deq, 0);
790 			while (nb_enq < nb_deq && !force_quit)
791 				nb_enq += rte_event_eth_tx_adapter_enqueue(
792 						event_d_id, event_p_id,
793 						events + nb_enq,
794 						nb_deq - nb_enq, 0);
795 		}
796 	}
797 
798 	l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
799 				   nb_deq, 0);
800 }
801 
802 static __rte_always_inline void
803 em_event_loop(struct l3fwd_event_resources *evt_rsrc,
804 		 const uint8_t flags)
805 {
806 	if (flags & L3FWD_EVENT_SINGLE)
807 		em_event_loop_single(evt_rsrc, flags);
808 	if (flags & L3FWD_EVENT_BURST)
809 		em_event_loop_burst(evt_rsrc, flags);
810 }
811 
812 int __rte_noinline
813 em_event_main_loop_tx_d(__rte_unused void *dummy)
814 {
815 	struct l3fwd_event_resources *evt_rsrc =
816 					l3fwd_get_eventdev_rsrc();
817 
818 	em_event_loop(evt_rsrc, L3FWD_EVENT_TX_DIRECT | L3FWD_EVENT_SINGLE);
819 	return 0;
820 }
821 
822 int __rte_noinline
823 em_event_main_loop_tx_d_burst(__rte_unused void *dummy)
824 {
825 	struct l3fwd_event_resources *evt_rsrc =
826 					l3fwd_get_eventdev_rsrc();
827 
828 	em_event_loop(evt_rsrc, L3FWD_EVENT_TX_DIRECT | L3FWD_EVENT_BURST);
829 	return 0;
830 }
831 
832 int __rte_noinline
833 em_event_main_loop_tx_q(__rte_unused void *dummy)
834 {
835 	struct l3fwd_event_resources *evt_rsrc =
836 					l3fwd_get_eventdev_rsrc();
837 
838 	em_event_loop(evt_rsrc, L3FWD_EVENT_TX_ENQ | L3FWD_EVENT_SINGLE);
839 	return 0;
840 }
841 
842 int __rte_noinline
843 em_event_main_loop_tx_q_burst(__rte_unused void *dummy)
844 {
845 	struct l3fwd_event_resources *evt_rsrc =
846 					l3fwd_get_eventdev_rsrc();
847 
848 	em_event_loop(evt_rsrc, L3FWD_EVENT_TX_ENQ | L3FWD_EVENT_BURST);
849 	return 0;
850 }
851 
852 /* Same eventdev loop for single and burst of vector */
853 static __rte_always_inline void
854 em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
855 		     const uint8_t flags)
856 {
857 	const int event_p_id = l3fwd_get_free_event_port(evt_rsrc);
858 	const uint8_t tx_q_id =
859 		evt_rsrc->evq.event_q_id[evt_rsrc->evq.nb_queues - 1];
860 	const uint8_t event_d_id = evt_rsrc->event_d_id;
861 	const uint16_t deq_len = evt_rsrc->deq_depth;
862 	struct rte_event events[MAX_PKT_BURST];
863 	int i, nb_enq = 0, nb_deq = 0;
864 	struct lcore_conf *lconf;
865 	unsigned int lcore_id;
866 	uint16_t *dst_ports;
867 
868 	if (event_p_id < 0)
869 		return;
870 
871 	dst_ports = rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
872 				RTE_CACHE_LINE_SIZE);
873 	if (dst_ports == NULL)
874 		return;
875 	lcore_id = rte_lcore_id();
876 	lconf = &lcore_conf[lcore_id];
877 
878 	RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
879 
880 	while (!force_quit) {
881 		/* Read events from RX queues */
882 		nb_deq = rte_event_dequeue_burst(event_d_id, event_p_id, events,
883 						 deq_len, 0);
884 		if (nb_deq == 0) {
885 			rte_pause();
886 			continue;
887 		}
888 
889 		for (i = 0; i < nb_deq; i++) {
890 			if (flags & L3FWD_EVENT_TX_ENQ) {
891 				events[i].queue_id = tx_q_id;
892 				events[i].op = RTE_EVENT_OP_FORWARD;
893 			}
894 
895 #if defined RTE_ARCH_X86 || defined __ARM_NEON
896 			l3fwd_em_process_event_vector(events[i].vec, lconf,
897 						      dst_ports);
898 #else
899 			l3fwd_em_no_opt_process_event_vector(events[i].vec,
900 							     lconf, dst_ports);
901 #endif
902 		}
903 
904 		if (flags & L3FWD_EVENT_TX_ENQ) {
905 			nb_enq = rte_event_enqueue_burst(event_d_id, event_p_id,
906 							 events, nb_deq);
907 			while (nb_enq < nb_deq && !force_quit)
908 				nb_enq += rte_event_enqueue_burst(
909 					event_d_id, event_p_id, events + nb_enq,
910 					nb_deq - nb_enq);
911 		}
912 
913 		if (flags & L3FWD_EVENT_TX_DIRECT) {
914 			nb_enq = rte_event_eth_tx_adapter_enqueue(
915 				event_d_id, event_p_id, events, nb_deq, 0);
916 			while (nb_enq < nb_deq && !force_quit)
917 				nb_enq += rte_event_eth_tx_adapter_enqueue(
918 					event_d_id, event_p_id, events + nb_enq,
919 					nb_deq - nb_enq, 0);
920 		}
921 	}
922 
923 	l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
924 				   nb_deq, 1);
925 	rte_free(dst_ports);
926 }
927 
928 int __rte_noinline
929 em_event_main_loop_tx_d_vector(__rte_unused void *dummy)
930 {
931 	struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
932 
933 	em_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
934 	return 0;
935 }
936 
937 int __rte_noinline
938 em_event_main_loop_tx_d_burst_vector(__rte_unused void *dummy)
939 {
940 	struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
941 
942 	em_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
943 	return 0;
944 }
945 
946 int __rte_noinline
947 em_event_main_loop_tx_q_vector(__rte_unused void *dummy)
948 {
949 	struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
950 
951 	em_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_ENQ);
952 	return 0;
953 }
954 
955 int __rte_noinline
956 em_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy)
957 {
958 	struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc();
959 
960 	em_event_loop_vector(evt_rsrc, L3FWD_EVENT_TX_ENQ);
961 	return 0;
962 }
963 #endif
964 
965 /* Initialize exact match (hash) parameters. 8< */
966 void
967 setup_hash(const int socketid)
968 {
969 	struct rte_hash_parameters ipv4_l3fwd_hash_params = {
970 		.name = NULL,
971 		.entries = L3FWD_HASH_ENTRIES,
972 		.key_len = sizeof(union ipv4_5tuple_host),
973 		.hash_func = ipv4_hash_crc,
974 		.hash_func_init_val = 0,
975 	};
976 
977 	struct rte_hash_parameters ipv6_l3fwd_hash_params = {
978 		.name = NULL,
979 		.entries = L3FWD_HASH_ENTRIES,
980 		.key_len = sizeof(union ipv6_5tuple_host),
981 		.hash_func = ipv6_hash_crc,
982 		.hash_func_init_val = 0,
983 	};
984 
985 	char s[64];
986 
987 	/* create ipv4 hash */
988 	snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
989 	ipv4_l3fwd_hash_params.name = s;
990 	ipv4_l3fwd_hash_params.socket_id = socketid;
991 	ipv4_l3fwd_em_lookup_struct[socketid] =
992 		rte_hash_create(&ipv4_l3fwd_hash_params);
993 	if (ipv4_l3fwd_em_lookup_struct[socketid] == NULL)
994 		rte_exit(EXIT_FAILURE,
995 			"Unable to create the l3fwd hash on socket %d\n",
996 			socketid);
997 
998 	/* create ipv6 hash */
999 	snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
1000 	ipv6_l3fwd_hash_params.name = s;
1001 	ipv6_l3fwd_hash_params.socket_id = socketid;
1002 	ipv6_l3fwd_em_lookup_struct[socketid] =
1003 		rte_hash_create(&ipv6_l3fwd_hash_params);
1004 	if (ipv6_l3fwd_em_lookup_struct[socketid] == NULL)
1005 		rte_exit(EXIT_FAILURE,
1006 			"Unable to create the l3fwd hash on socket %d\n",
1007 			socketid);
1008 
1009 	/*
1010 	 * Use data from ipv4/ipv6 l3fwd config file
1011 	 * directly to initialize the hash table.
1012 	 */
1013 	if (ipv6 == 0) {
1014 		/* populate the ipv4 hash */
1015 		populate_ipv4_flow_into_table(
1016 			ipv4_l3fwd_em_lookup_struct[socketid]);
1017 	} else {
1018 		/* populate the ipv6 hash */
1019 		populate_ipv6_flow_into_table(
1020 			ipv6_l3fwd_em_lookup_struct[socketid]);
1021 	}
1022 }
1023 /* >8 End of initialization of hash parameters. */
1024 
1025 /* Return ipv4/ipv6 em fwd lookup struct. */
1026 void *
1027 em_get_ipv4_l3fwd_lookup_struct(const int socketid)
1028 {
1029 	return ipv4_l3fwd_em_lookup_struct[socketid];
1030 }
1031 
1032 void *
1033 em_get_ipv6_l3fwd_lookup_struct(const int socketid)
1034 {
1035 	return ipv6_l3fwd_em_lookup_struct[socketid];
1036 }
1037