xref: /dpdk/examples/ip_fragmentation/main.c (revision 25d11a86c56d50947af33d0b79ede622809bd8b9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdint.h>
8 #include <inttypes.h>
9 #include <sys/types.h>
10 #include <sys/param.h>
11 #include <string.h>
12 #include <sys/queue.h>
13 #include <stdarg.h>
14 #include <errno.h>
15 #include <getopt.h>
16 
17 #include <rte_common.h>
18 #include <rte_byteorder.h>
19 #include <rte_log.h>
20 #include <rte_memory.h>
21 #include <rte_memcpy.h>
22 #include <rte_eal.h>
23 #include <rte_launch.h>
24 #include <rte_atomic.h>
25 #include <rte_cycles.h>
26 #include <rte_prefetch.h>
27 #include <rte_lcore.h>
28 #include <rte_per_lcore.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_interrupts.h>
31 #include <rte_random.h>
32 #include <rte_debug.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev.h>
35 #include <rte_mempool.h>
36 #include <rte_mbuf.h>
37 #include <rte_lpm.h>
38 #include <rte_lpm6.h>
39 #include <rte_ip.h>
40 #include <rte_string_fns.h>
41 
42 #include <rte_ip_frag.h>
43 
44 #define RTE_LOGTYPE_IP_FRAG RTE_LOGTYPE_USER1
45 
46 /* allow max jumbo frame 9.5 KB */
47 #define JUMBO_FRAME_MAX_SIZE	0x2600
48 
49 #define	ROUNDUP_DIV(a, b)	(((a) + (b) - 1) / (b))
50 
51 /*
52  * Default byte size for the IPv6 Maximum Transfer Unit (MTU).
53  * This value includes the size of IPv6 header.
54  */
55 #define	IPV4_MTU_DEFAULT	ETHER_MTU
56 #define	IPV6_MTU_DEFAULT	ETHER_MTU
57 
58 /*
59  * The overhead from max frame size to MTU.
60  * We have to consider the max possible overhead.
61  */
62 #define MTU_OVERHEAD	\
63 	(ETHER_HDR_LEN + ETHER_CRC_LEN + 2 * sizeof(struct vlan_hdr))
64 
65 /*
66  * Default payload in bytes for the IPv6 packet.
67  */
68 #define	IPV4_DEFAULT_PAYLOAD	(IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr))
69 #define	IPV6_DEFAULT_PAYLOAD	(IPV6_MTU_DEFAULT - sizeof(struct ipv6_hdr))
70 
71 /*
72  * Max number of fragments per packet expected - defined by config file.
73  */
74 #define	MAX_PACKET_FRAG RTE_LIBRTE_IP_FRAG_MAX_FRAG
75 
76 #define NB_MBUF   8192
77 
78 #define MAX_PKT_BURST	32
79 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
80 
81 /* Configure how many packets ahead to prefetch, when reading packets */
82 #define PREFETCH_OFFSET	3
83 
84 /*
85  * Configurable number of RX/TX ring descriptors
86  */
87 #define RTE_TEST_RX_DESC_DEFAULT 1024
88 #define RTE_TEST_TX_DESC_DEFAULT 1024
89 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
90 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
91 
92 /* ethernet addresses of ports */
93 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
94 
95 #ifndef IPv4_BYTES
96 #define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8
97 #define IPv4_BYTES(addr) \
98 		(uint8_t) (((addr) >> 24) & 0xFF),\
99 		(uint8_t) (((addr) >> 16) & 0xFF),\
100 		(uint8_t) (((addr) >> 8) & 0xFF),\
101 		(uint8_t) ((addr) & 0xFF)
102 #endif
103 
104 #ifndef IPv6_BYTES
105 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
106                        "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
107 #define IPv6_BYTES(addr) \
108 	addr[0],  addr[1], addr[2],  addr[3], \
109 	addr[4],  addr[5], addr[6],  addr[7], \
110 	addr[8],  addr[9], addr[10], addr[11],\
111 	addr[12], addr[13],addr[14], addr[15]
112 #endif
113 
114 #define IPV6_ADDR_LEN 16
115 
116 /* mask of enabled ports */
117 static int enabled_port_mask = 0;
118 
119 static int rx_queue_per_lcore = 1;
120 
121 #define MBUF_TABLE_SIZE  (2 * MAX(MAX_PKT_BURST, MAX_PACKET_FRAG))
122 
123 struct mbuf_table {
124 	uint16_t len;
125 	struct rte_mbuf *m_table[MBUF_TABLE_SIZE];
126 };
127 
128 struct rx_queue {
129 	struct rte_mempool *direct_pool;
130 	struct rte_mempool *indirect_pool;
131 	struct rte_lpm *lpm;
132 	struct rte_lpm6 *lpm6;
133 	uint16_t portid;
134 };
135 
136 #define MAX_RX_QUEUE_PER_LCORE 16
137 #define MAX_TX_QUEUE_PER_PORT 16
138 struct lcore_queue_conf {
139 	uint16_t n_rx_queue;
140 	uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
141 	struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
142 	struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
143 } __rte_cache_aligned;
144 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
145 
146 static struct rte_eth_conf port_conf = {
147 	.rxmode = {
148 		.max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
149 		.split_hdr_size = 0,
150 		.offloads = (DEV_RX_OFFLOAD_CHECKSUM |
151 			     DEV_RX_OFFLOAD_SCATTER |
152 			     DEV_RX_OFFLOAD_JUMBO_FRAME),
153 	},
154 	.txmode = {
155 		.mq_mode = ETH_MQ_TX_NONE,
156 		.offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM |
157 			     DEV_TX_OFFLOAD_MULTI_SEGS),
158 	},
159 };
160 
161 /*
162  * IPv4 forwarding table
163  */
164 struct l3fwd_ipv4_route {
165 	uint32_t ip;
166 	uint8_t  depth;
167 	uint8_t  if_out;
168 };
169 
170 struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = {
171 		{IPv4(100,10,0,0), 16, 0},
172 		{IPv4(100,20,0,0), 16, 1},
173 		{IPv4(100,30,0,0), 16, 2},
174 		{IPv4(100,40,0,0), 16, 3},
175 		{IPv4(100,50,0,0), 16, 4},
176 		{IPv4(100,60,0,0), 16, 5},
177 		{IPv4(100,70,0,0), 16, 6},
178 		{IPv4(100,80,0,0), 16, 7},
179 };
180 
181 /*
182  * IPv6 forwarding table
183  */
184 
185 struct l3fwd_ipv6_route {
186 	uint8_t ip[IPV6_ADDR_LEN];
187 	uint8_t depth;
188 	uint8_t if_out;
189 };
190 
191 static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = {
192 	{{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0},
193 	{{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1},
194 	{{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2},
195 	{{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3},
196 	{{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4},
197 	{{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5},
198 	{{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6},
199 	{{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7},
200 };
201 
202 #define LPM_MAX_RULES         1024
203 #define LPM6_MAX_RULES         1024
204 #define LPM6_NUMBER_TBL8S (1 << 16)
205 
206 struct rte_lpm6_config lpm6_config = {
207 		.max_rules = LPM6_MAX_RULES,
208 		.number_tbl8s = LPM6_NUMBER_TBL8S,
209 		.flags = 0
210 };
211 
212 static struct rte_mempool *socket_direct_pool[RTE_MAX_NUMA_NODES];
213 static struct rte_mempool *socket_indirect_pool[RTE_MAX_NUMA_NODES];
214 static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES];
215 static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES];
216 
217 /* Send burst of packets on an output interface */
218 static inline int
219 send_burst(struct lcore_queue_conf *qconf, uint16_t n, uint16_t port)
220 {
221 	struct rte_mbuf **m_table;
222 	int ret;
223 	uint16_t queueid;
224 
225 	queueid = qconf->tx_queue_id[port];
226 	m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
227 
228 	ret = rte_eth_tx_burst(port, queueid, m_table, n);
229 	if (unlikely(ret < n)) {
230 		do {
231 			rte_pktmbuf_free(m_table[ret]);
232 		} while (++ret < n);
233 	}
234 
235 	return 0;
236 }
237 
238 static inline void
239 l3fwd_simple_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf,
240 		uint8_t queueid, uint16_t port_in)
241 {
242 	struct rx_queue *rxq;
243 	uint32_t i, len, next_hop;
244 	uint8_t ipv6;
245 	uint16_t port_out;
246 	int32_t len2;
247 
248 	ipv6 = 0;
249 	rxq = &qconf->rx_queue_list[queueid];
250 
251 	/* by default, send everything back to the source port */
252 	port_out = port_in;
253 
254 	/* Remove the Ethernet header and trailer from the input packet */
255 	rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr));
256 
257 	/* Build transmission burst */
258 	len = qconf->tx_mbufs[port_out].len;
259 
260 	/* if this is an IPv4 packet */
261 	if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
262 		struct ipv4_hdr *ip_hdr;
263 		uint32_t ip_dst;
264 		/* Read the lookup key (i.e. ip_dst) from the input packet */
265 		ip_hdr = rte_pktmbuf_mtod(m, struct ipv4_hdr *);
266 		ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr);
267 
268 		/* Find destination port */
269 		if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 &&
270 				(enabled_port_mask & 1 << next_hop) != 0) {
271 			port_out = next_hop;
272 
273 			/* Build transmission burst for new port */
274 			len = qconf->tx_mbufs[port_out].len;
275 		}
276 
277 		/* if we don't need to do any fragmentation */
278 		if (likely (IPV4_MTU_DEFAULT >= m->pkt_len)) {
279 			qconf->tx_mbufs[port_out].m_table[len] = m;
280 			len2 = 1;
281 		} else {
282 			len2 = rte_ipv4_fragment_packet(m,
283 				&qconf->tx_mbufs[port_out].m_table[len],
284 				(uint16_t)(MBUF_TABLE_SIZE - len),
285 				IPV4_MTU_DEFAULT,
286 				rxq->direct_pool, rxq->indirect_pool);
287 
288 			/* Free input packet */
289 			rte_pktmbuf_free(m);
290 
291 			/* If we fail to fragment the packet */
292 			if (unlikely (len2 < 0))
293 				return;
294 		}
295 	} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
296 		/* if this is an IPv6 packet */
297 		struct ipv6_hdr *ip_hdr;
298 
299 		ipv6 = 1;
300 
301 		/* Read the lookup key (i.e. ip_dst) from the input packet */
302 		ip_hdr = rte_pktmbuf_mtod(m, struct ipv6_hdr *);
303 
304 		/* Find destination port */
305 		if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr,
306 						&next_hop) == 0 &&
307 				(enabled_port_mask & 1 << next_hop) != 0) {
308 			port_out = next_hop;
309 
310 			/* Build transmission burst for new port */
311 			len = qconf->tx_mbufs[port_out].len;
312 		}
313 
314 		/* if we don't need to do any fragmentation */
315 		if (likely (IPV6_MTU_DEFAULT >= m->pkt_len)) {
316 			qconf->tx_mbufs[port_out].m_table[len] = m;
317 			len2 = 1;
318 		} else {
319 			len2 = rte_ipv6_fragment_packet(m,
320 				&qconf->tx_mbufs[port_out].m_table[len],
321 				(uint16_t)(MBUF_TABLE_SIZE - len),
322 				IPV6_MTU_DEFAULT,
323 				rxq->direct_pool, rxq->indirect_pool);
324 
325 			/* Free input packet */
326 			rte_pktmbuf_free(m);
327 
328 			/* If we fail to fragment the packet */
329 			if (unlikely (len2 < 0))
330 				return;
331 		}
332 	}
333 	/* else, just forward the packet */
334 	else {
335 		qconf->tx_mbufs[port_out].m_table[len] = m;
336 		len2 = 1;
337 	}
338 
339 	for (i = len; i < len + len2; i ++) {
340 		void *d_addr_bytes;
341 
342 		m = qconf->tx_mbufs[port_out].m_table[i];
343 		struct ether_hdr *eth_hdr = (struct ether_hdr *)
344 			rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct ether_hdr));
345 		if (eth_hdr == NULL) {
346 			rte_panic("No headroom in mbuf.\n");
347 		}
348 
349 		m->l2_len = sizeof(struct ether_hdr);
350 
351 		/* 02:00:00:00:00:xx */
352 		d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
353 		*((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)port_out << 40);
354 
355 		/* src addr */
356 		ether_addr_copy(&ports_eth_addr[port_out], &eth_hdr->s_addr);
357 		if (ipv6)
358 			eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6);
359 		else
360 			eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
361 	}
362 
363 	len += len2;
364 
365 	if (likely(len < MAX_PKT_BURST)) {
366 		qconf->tx_mbufs[port_out].len = (uint16_t)len;
367 		return;
368 	}
369 
370 	/* Transmit packets */
371 	send_burst(qconf, (uint16_t)len, port_out);
372 	qconf->tx_mbufs[port_out].len = 0;
373 }
374 
375 /* main processing loop */
376 static int
377 main_loop(__attribute__((unused)) void *dummy)
378 {
379 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
380 	unsigned lcore_id;
381 	uint64_t prev_tsc, diff_tsc, cur_tsc;
382 	int i, j, nb_rx;
383 	uint16_t portid;
384 	struct lcore_queue_conf *qconf;
385 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
386 
387 	prev_tsc = 0;
388 
389 	lcore_id = rte_lcore_id();
390 	qconf = &lcore_queue_conf[lcore_id];
391 
392 	if (qconf->n_rx_queue == 0) {
393 		RTE_LOG(INFO, IP_FRAG, "lcore %u has nothing to do\n", lcore_id);
394 		return 0;
395 	}
396 
397 	RTE_LOG(INFO, IP_FRAG, "entering main loop on lcore %u\n", lcore_id);
398 
399 	for (i = 0; i < qconf->n_rx_queue; i++) {
400 
401 		portid = qconf->rx_queue_list[i].portid;
402 		RTE_LOG(INFO, IP_FRAG, " -- lcoreid=%u portid=%d\n", lcore_id,
403 				portid);
404 	}
405 
406 	while (1) {
407 
408 		cur_tsc = rte_rdtsc();
409 
410 		/*
411 		 * TX burst queue drain
412 		 */
413 		diff_tsc = cur_tsc - prev_tsc;
414 		if (unlikely(diff_tsc > drain_tsc)) {
415 
416 			/*
417 			 * This could be optimized (use queueid instead of
418 			 * portid), but it is not called so often
419 			 */
420 			for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
421 				if (qconf->tx_mbufs[portid].len == 0)
422 					continue;
423 				send_burst(&lcore_queue_conf[lcore_id],
424 					   qconf->tx_mbufs[portid].len,
425 					   portid);
426 				qconf->tx_mbufs[portid].len = 0;
427 			}
428 
429 			prev_tsc = cur_tsc;
430 		}
431 
432 		/*
433 		 * Read packet from RX queues
434 		 */
435 		for (i = 0; i < qconf->n_rx_queue; i++) {
436 
437 			portid = qconf->rx_queue_list[i].portid;
438 			nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst,
439 						 MAX_PKT_BURST);
440 
441 			/* Prefetch first packets */
442 			for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
443 				rte_prefetch0(rte_pktmbuf_mtod(
444 						pkts_burst[j], void *));
445 			}
446 
447 			/* Prefetch and forward already prefetched packets */
448 			for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
449 				rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
450 						j + PREFETCH_OFFSET], void *));
451 				l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
452 			}
453 
454 			/* Forward remaining prefetched packets */
455 			for (; j < nb_rx; j++) {
456 				l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
457 			}
458 		}
459 	}
460 }
461 
462 /* display usage */
463 static void
464 print_usage(const char *prgname)
465 {
466 	printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
467 	       "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
468 	       "  -q NQ: number of queue (=ports) per lcore (default is 1)\n",
469 	       prgname);
470 }
471 
472 static int
473 parse_portmask(const char *portmask)
474 {
475 	char *end = NULL;
476 	unsigned long pm;
477 
478 	/* parse hexadecimal string */
479 	pm = strtoul(portmask, &end, 16);
480 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
481 		return -1;
482 
483 	if (pm == 0)
484 		return -1;
485 
486 	return pm;
487 }
488 
489 static int
490 parse_nqueue(const char *q_arg)
491 {
492 	char *end = NULL;
493 	unsigned long n;
494 
495 	/* parse hexadecimal string */
496 	n = strtoul(q_arg, &end, 10);
497 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
498 		return -1;
499 	if (n == 0)
500 		return -1;
501 	if (n >= MAX_RX_QUEUE_PER_LCORE)
502 		return -1;
503 
504 	return n;
505 }
506 
507 /* Parse the argument given in the command line of the application */
508 static int
509 parse_args(int argc, char **argv)
510 {
511 	int opt, ret;
512 	char **argvopt;
513 	int option_index;
514 	char *prgname = argv[0];
515 	static struct option lgopts[] = {
516 		{NULL, 0, 0, 0}
517 	};
518 
519 	argvopt = argv;
520 
521 	while ((opt = getopt_long(argc, argvopt, "p:q:",
522 				  lgopts, &option_index)) != EOF) {
523 
524 		switch (opt) {
525 		/* portmask */
526 		case 'p':
527 			enabled_port_mask = parse_portmask(optarg);
528 			if (enabled_port_mask < 0) {
529 				printf("invalid portmask\n");
530 				print_usage(prgname);
531 				return -1;
532 			}
533 			break;
534 
535 		/* nqueue */
536 		case 'q':
537 			rx_queue_per_lcore = parse_nqueue(optarg);
538 			if (rx_queue_per_lcore < 0) {
539 				printf("invalid queue number\n");
540 				print_usage(prgname);
541 				return -1;
542 			}
543 			break;
544 
545 		/* long options */
546 		case 0:
547 			print_usage(prgname);
548 			return -1;
549 
550 		default:
551 			print_usage(prgname);
552 			return -1;
553 		}
554 	}
555 
556 	if (enabled_port_mask == 0) {
557 		printf("portmask not specified\n");
558 		print_usage(prgname);
559 		return -1;
560 	}
561 
562 	if (optind >= 0)
563 		argv[optind-1] = prgname;
564 
565 	ret = optind-1;
566 	optind = 1; /* reset getopt lib */
567 	return ret;
568 }
569 
570 static void
571 print_ethaddr(const char *name, struct ether_addr *eth_addr)
572 {
573 	char buf[ETHER_ADDR_FMT_SIZE];
574 	ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
575 	printf("%s%s", name, buf);
576 }
577 
578 /* Check the link status of all ports in up to 9s, and print them finally */
579 static void
580 check_all_ports_link_status(uint32_t port_mask)
581 {
582 #define CHECK_INTERVAL 100 /* 100ms */
583 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
584 	uint16_t portid;
585 	uint8_t count, all_ports_up, print_flag = 0;
586 	struct rte_eth_link link;
587 
588 	printf("\nChecking link status");
589 	fflush(stdout);
590 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
591 		all_ports_up = 1;
592 		RTE_ETH_FOREACH_DEV(portid) {
593 			if ((port_mask & (1 << portid)) == 0)
594 				continue;
595 			memset(&link, 0, sizeof(link));
596 			rte_eth_link_get_nowait(portid, &link);
597 			/* print link status if flag set */
598 			if (print_flag == 1) {
599 				if (link.link_status)
600 					printf(
601 					"Port%d Link Up .Speed %u Mbps - %s\n",
602 						portid, link.link_speed,
603 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
604 					("full-duplex") : ("half-duplex\n"));
605 				else
606 					printf("Port %d Link Down\n", portid);
607 				continue;
608 			}
609 			/* clear all_ports_up flag if any link down */
610 			if (link.link_status == ETH_LINK_DOWN) {
611 				all_ports_up = 0;
612 				break;
613 			}
614 		}
615 		/* after finally printing all link status, get out */
616 		if (print_flag == 1)
617 			break;
618 
619 		if (all_ports_up == 0) {
620 			printf(".");
621 			fflush(stdout);
622 			rte_delay_ms(CHECK_INTERVAL);
623 		}
624 
625 		/* set the print_flag if all ports up or timeout */
626 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
627 			print_flag = 1;
628 			printf("\ndone\n");
629 		}
630 	}
631 }
632 
633 /* Check L3 packet type detection capablity of the NIC port */
634 static int
635 check_ptype(int portid)
636 {
637 	int i, ret;
638 	int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
639 	uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
640 
641 	ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
642 	if (ret <= 0)
643 		return 0;
644 
645 	uint32_t ptypes[ret];
646 
647 	ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
648 	for (i = 0; i < ret; ++i) {
649 		if (ptypes[i] & RTE_PTYPE_L3_IPV4)
650 			ptype_l3_ipv4 = 1;
651 		if (ptypes[i] & RTE_PTYPE_L3_IPV6)
652 			ptype_l3_ipv6 = 1;
653 	}
654 
655 	if (ptype_l3_ipv4 == 0)
656 		printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
657 
658 	if (ptype_l3_ipv6 == 0)
659 		printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
660 
661 	if (ptype_l3_ipv4 && ptype_l3_ipv6)
662 		return 1;
663 
664 	return 0;
665 
666 }
667 
668 /* Parse packet type of a packet by SW */
669 static inline void
670 parse_ptype(struct rte_mbuf *m)
671 {
672 	struct ether_hdr *eth_hdr;
673 	uint32_t packet_type = RTE_PTYPE_UNKNOWN;
674 	uint16_t ether_type;
675 
676 	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
677 	ether_type = eth_hdr->ether_type;
678 	if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
679 		packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
680 	else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
681 		packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
682 
683 	m->packet_type = packet_type;
684 }
685 
686 /* callback function to detect packet type for a queue of a port */
687 static uint16_t
688 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused,
689 		   struct rte_mbuf *pkts[], uint16_t nb_pkts,
690 		   uint16_t max_pkts __rte_unused,
691 		   void *user_param __rte_unused)
692 {
693 	uint16_t i;
694 
695 	for (i = 0; i < nb_pkts; ++i)
696 		parse_ptype(pkts[i]);
697 
698 	return nb_pkts;
699 }
700 
701 static int
702 init_routing_table(void)
703 {
704 	struct rte_lpm *lpm;
705 	struct rte_lpm6 *lpm6;
706 	int socket, ret;
707 	unsigned i;
708 
709 	for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
710 		if (socket_lpm[socket]) {
711 			lpm = socket_lpm[socket];
712 			/* populate the LPM table */
713 			for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) {
714 				ret = rte_lpm_add(lpm,
715 					l3fwd_ipv4_route_array[i].ip,
716 					l3fwd_ipv4_route_array[i].depth,
717 					l3fwd_ipv4_route_array[i].if_out);
718 
719 				if (ret < 0) {
720 					RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
721 						"LPM table\n", i);
722 					return -1;
723 				}
724 
725 				RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv4_BYTES_FMT
726 						"/%d (port %d)\n",
727 					socket,
728 					IPv4_BYTES(l3fwd_ipv4_route_array[i].ip),
729 					l3fwd_ipv4_route_array[i].depth,
730 					l3fwd_ipv4_route_array[i].if_out);
731 			}
732 		}
733 
734 		if (socket_lpm6[socket]) {
735 			lpm6 = socket_lpm6[socket];
736 			/* populate the LPM6 table */
737 			for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) {
738 				ret = rte_lpm6_add(lpm6,
739 					l3fwd_ipv6_route_array[i].ip,
740 					l3fwd_ipv6_route_array[i].depth,
741 					l3fwd_ipv6_route_array[i].if_out);
742 
743 				if (ret < 0) {
744 					RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
745 						"LPM6 table\n", i);
746 					return -1;
747 				}
748 
749 				RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv6_BYTES_FMT
750 						"/%d (port %d)\n",
751 					socket,
752 					IPv6_BYTES(l3fwd_ipv6_route_array[i].ip),
753 					l3fwd_ipv6_route_array[i].depth,
754 					l3fwd_ipv6_route_array[i].if_out);
755 			}
756 		}
757 	}
758 	return 0;
759 }
760 
761 static int
762 init_mem(void)
763 {
764 	char buf[PATH_MAX];
765 	struct rte_mempool *mp;
766 	struct rte_lpm *lpm;
767 	struct rte_lpm6 *lpm6;
768 	struct rte_lpm_config lpm_config;
769 	int socket;
770 	unsigned lcore_id;
771 
772 	/* traverse through lcores and initialize structures on each socket */
773 
774 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
775 
776 		if (rte_lcore_is_enabled(lcore_id) == 0)
777 			continue;
778 
779 		socket = rte_lcore_to_socket_id(lcore_id);
780 
781 		if (socket == SOCKET_ID_ANY)
782 			socket = 0;
783 
784 		if (socket_direct_pool[socket] == NULL) {
785 			RTE_LOG(INFO, IP_FRAG, "Creating direct mempool on socket %i\n",
786 					socket);
787 			snprintf(buf, sizeof(buf), "pool_direct_%i", socket);
788 
789 			mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32,
790 				0, RTE_MBUF_DEFAULT_BUF_SIZE, socket);
791 			if (mp == NULL) {
792 				RTE_LOG(ERR, IP_FRAG, "Cannot create direct mempool\n");
793 				return -1;
794 			}
795 			socket_direct_pool[socket] = mp;
796 		}
797 
798 		if (socket_indirect_pool[socket] == NULL) {
799 			RTE_LOG(INFO, IP_FRAG, "Creating indirect mempool on socket %i\n",
800 					socket);
801 			snprintf(buf, sizeof(buf), "pool_indirect_%i", socket);
802 
803 			mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 0, 0,
804 				socket);
805 			if (mp == NULL) {
806 				RTE_LOG(ERR, IP_FRAG, "Cannot create indirect mempool\n");
807 				return -1;
808 			}
809 			socket_indirect_pool[socket] = mp;
810 		}
811 
812 		if (socket_lpm[socket] == NULL) {
813 			RTE_LOG(INFO, IP_FRAG, "Creating LPM table on socket %i\n", socket);
814 			snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
815 
816 			lpm_config.max_rules = LPM_MAX_RULES;
817 			lpm_config.number_tbl8s = 256;
818 			lpm_config.flags = 0;
819 
820 			lpm = rte_lpm_create(buf, socket, &lpm_config);
821 			if (lpm == NULL) {
822 				RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
823 				return -1;
824 			}
825 			socket_lpm[socket] = lpm;
826 		}
827 
828 		if (socket_lpm6[socket] == NULL) {
829 			RTE_LOG(INFO, IP_FRAG, "Creating LPM6 table on socket %i\n", socket);
830 			snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
831 
832 			lpm6 = rte_lpm6_create(buf, socket, &lpm6_config);
833 			if (lpm6 == NULL) {
834 				RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
835 				return -1;
836 			}
837 			socket_lpm6[socket] = lpm6;
838 		}
839 	}
840 
841 	return 0;
842 }
843 
844 int
845 main(int argc, char **argv)
846 {
847 	struct lcore_queue_conf *qconf;
848 	struct rte_eth_dev_info dev_info;
849 	struct rte_eth_txconf *txconf;
850 	struct rx_queue *rxq;
851 	int socket, ret;
852 	uint16_t nb_ports;
853 	uint16_t queueid = 0;
854 	unsigned lcore_id = 0, rx_lcore_id = 0;
855 	uint32_t n_tx_queue, nb_lcores;
856 	uint16_t portid;
857 
858 	/* init EAL */
859 	ret = rte_eal_init(argc, argv);
860 	if (ret < 0)
861 		rte_exit(EXIT_FAILURE, "rte_eal_init failed");
862 	argc -= ret;
863 	argv += ret;
864 
865 	/* parse application arguments (after the EAL ones) */
866 	ret = parse_args(argc, argv);
867 	if (ret < 0)
868 		rte_exit(EXIT_FAILURE, "Invalid arguments");
869 
870 	nb_ports = rte_eth_dev_count_avail();
871 	if (nb_ports == 0)
872 		rte_exit(EXIT_FAILURE, "No ports found!\n");
873 
874 	nb_lcores = rte_lcore_count();
875 
876 	/* initialize structures (mempools, lpm etc.) */
877 	if (init_mem() < 0)
878 		rte_panic("Cannot initialize memory structures!\n");
879 
880 	/* check if portmask has non-existent ports */
881 	if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned)))
882 		rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n");
883 
884 	/* initialize all ports */
885 	RTE_ETH_FOREACH_DEV(portid) {
886 		struct rte_eth_conf local_port_conf = port_conf;
887 		struct rte_eth_rxconf rxq_conf;
888 
889 		/* skip ports that are not enabled */
890 		if ((enabled_port_mask & (1 << portid)) == 0) {
891 			printf("Skipping disabled port %d\n", portid);
892 			continue;
893 		}
894 
895 		qconf = &lcore_queue_conf[rx_lcore_id];
896 
897 		/* limit the frame size to the maximum supported by NIC */
898 		rte_eth_dev_info_get(portid, &dev_info);
899 		local_port_conf.rxmode.max_rx_pkt_len = RTE_MIN(
900 		    dev_info.max_rx_pktlen,
901 		    local_port_conf.rxmode.max_rx_pkt_len);
902 
903 		/* get the lcore_id for this port */
904 		while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
905 		       qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) {
906 
907 			rx_lcore_id ++;
908 			if (rx_lcore_id >= RTE_MAX_LCORE)
909 				rte_exit(EXIT_FAILURE, "Not enough cores\n");
910 
911 			qconf = &lcore_queue_conf[rx_lcore_id];
912 		}
913 
914 		socket = (int) rte_lcore_to_socket_id(rx_lcore_id);
915 		if (socket == SOCKET_ID_ANY)
916 			socket = 0;
917 
918 		rxq = &qconf->rx_queue_list[qconf->n_rx_queue];
919 		rxq->portid = portid;
920 		rxq->direct_pool = socket_direct_pool[socket];
921 		rxq->indirect_pool = socket_indirect_pool[socket];
922 		rxq->lpm = socket_lpm[socket];
923 		rxq->lpm6 = socket_lpm6[socket];
924 		qconf->n_rx_queue++;
925 
926 		/* init port */
927 		printf("Initializing port %d on lcore %u...", portid,
928 		       rx_lcore_id);
929 		fflush(stdout);
930 
931 		n_tx_queue = nb_lcores;
932 		if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
933 			n_tx_queue = MAX_TX_QUEUE_PER_PORT;
934 		if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
935 			local_port_conf.txmode.offloads |=
936 				DEV_TX_OFFLOAD_MBUF_FAST_FREE;
937 		ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue,
938 					    &local_port_conf);
939 		if (ret < 0) {
940 			printf("\n");
941 			rte_exit(EXIT_FAILURE, "Cannot configure device: "
942 				"err=%d, port=%d\n",
943 				ret, portid);
944 		}
945 
946 		/* set the mtu to the maximum received packet size */
947 		ret = rte_eth_dev_set_mtu(portid,
948 			local_port_conf.rxmode.max_rx_pkt_len - MTU_OVERHEAD);
949 		if (ret < 0) {
950 			printf("\n");
951 			rte_exit(EXIT_FAILURE, "Set MTU failed: "
952 				"err=%d, port=%d\n",
953 			ret, portid);
954 		}
955 
956 		ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
957 					    &nb_txd);
958 		if (ret < 0) {
959 			printf("\n");
960 			rte_exit(EXIT_FAILURE, "Cannot adjust number of "
961 				"descriptors: err=%d, port=%d\n", ret, portid);
962 		}
963 
964 		/* init one RX queue */
965 		rxq_conf = dev_info.default_rxconf;
966 		rxq_conf.offloads = local_port_conf.rxmode.offloads;
967 		ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
968 					     socket, &rxq_conf,
969 					     socket_direct_pool[socket]);
970 		if (ret < 0) {
971 			printf("\n");
972 			rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: "
973 				"err=%d, port=%d\n",
974 				ret, portid);
975 		}
976 
977 		rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
978 		print_ethaddr(" Address:", &ports_eth_addr[portid]);
979 		printf("\n");
980 
981 		/* init one TX queue per couple (lcore,port) */
982 		queueid = 0;
983 		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
984 			if (rte_lcore_is_enabled(lcore_id) == 0)
985 				continue;
986 
987 			socket = (int) rte_lcore_to_socket_id(lcore_id);
988 			printf("txq=%u,%d ", lcore_id, queueid);
989 			fflush(stdout);
990 
991 			txconf = &dev_info.default_txconf;
992 			txconf->offloads = local_port_conf.txmode.offloads;
993 			ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
994 						     socket, txconf);
995 			if (ret < 0) {
996 				printf("\n");
997 				rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: "
998 					"err=%d, port=%d\n", ret, portid);
999 			}
1000 
1001 			qconf = &lcore_queue_conf[lcore_id];
1002 			qconf->tx_queue_id[portid] = queueid;
1003 			queueid++;
1004 		}
1005 
1006 		printf("\n");
1007 	}
1008 
1009 	printf("\n");
1010 
1011 	/* start ports */
1012 	RTE_ETH_FOREACH_DEV(portid) {
1013 		if ((enabled_port_mask & (1 << portid)) == 0) {
1014 			continue;
1015 		}
1016 		/* Start device */
1017 		ret = rte_eth_dev_start(portid);
1018 		if (ret < 0)
1019 			rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
1020 				ret, portid);
1021 
1022 		rte_eth_promiscuous_enable(portid);
1023 
1024 		if (check_ptype(portid) == 0) {
1025 			rte_eth_add_rx_callback(portid, 0, cb_parse_ptype, NULL);
1026 			printf("Add Rx callback function to detect L3 packet type by SW :"
1027 				" port = %d\n", portid);
1028 		}
1029 	}
1030 
1031 	if (init_routing_table() < 0)
1032 		rte_exit(EXIT_FAILURE, "Cannot init routing table\n");
1033 
1034 	check_all_ports_link_status(enabled_port_mask);
1035 
1036 	/* launch per-lcore init on every lcore */
1037 	rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1038 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1039 		if (rte_eal_wait_lcore(lcore_id) < 0)
1040 			return -1;
1041 	}
1042 
1043 	return 0;
1044 }
1045