xref: /dpdk/examples/vmdq/main.c (revision 089e5ed727a15da2729cfee9b63533dd120bd04c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <sys/queue.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <inttypes.h>
15 #include <getopt.h>
16 
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_memory.h>
20 #include <rte_memcpy.h>
21 #include <rte_eal.h>
22 #include <rte_launch.h>
23 #include <rte_atomic.h>
24 #include <rte_cycles.h>
25 #include <rte_prefetch.h>
26 #include <rte_lcore.h>
27 #include <rte_per_lcore.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_interrupts.h>
30 #include <rte_random.h>
31 #include <rte_debug.h>
32 #include <rte_ether.h>
33 #include <rte_ethdev.h>
34 #include <rte_mempool.h>
35 #include <rte_mbuf.h>
36 
37 #define MAX_QUEUES 1024
38 /*
39  * 1024 queues require to meet the needs of a large number of vmdq_pools.
40  * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
41  */
42 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
43 						RTE_TEST_TX_DESC_DEFAULT))
44 #define MBUF_CACHE_SIZE 64
45 
46 #define MAX_PKT_BURST 32
47 
48 /*
49  * Configurable number of RX/TX ring descriptors
50  */
51 #define RTE_TEST_RX_DESC_DEFAULT 1024
52 #define RTE_TEST_TX_DESC_DEFAULT 1024
53 
54 #define INVALID_PORT_ID 0xFF
55 
56 /* mask of enabled ports */
57 static uint32_t enabled_port_mask;
58 
59 /* number of pools (if user does not specify any, 8 by default */
60 static uint32_t num_queues = 8;
61 static uint32_t num_pools = 8;
62 
63 /* empty vmdq configuration structure. Filled in programatically */
64 static const struct rte_eth_conf vmdq_conf_default = {
65 	.rxmode = {
66 		.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
67 		.split_hdr_size = 0,
68 	},
69 
70 	.txmode = {
71 		.mq_mode = ETH_MQ_TX_NONE,
72 	},
73 	.rx_adv_conf = {
74 		/*
75 		 * should be overridden separately in code with
76 		 * appropriate values
77 		 */
78 		.vmdq_rx_conf = {
79 			.nb_queue_pools = ETH_8_POOLS,
80 			.enable_default_pool = 0,
81 			.default_pool = 0,
82 			.nb_pool_maps = 0,
83 			.pool_map = {{0, 0},},
84 		},
85 	},
86 };
87 
88 static unsigned lcore_ids[RTE_MAX_LCORE];
89 static uint16_t ports[RTE_MAX_ETHPORTS];
90 static unsigned num_ports; /**< The number of ports specified in command line */
91 
92 /* array used for printing out statistics */
93 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
94 
95 const uint16_t vlan_tags[] = {
96 	0,  1,  2,  3,  4,  5,  6,  7,
97 	8,  9, 10, 11,	12, 13, 14, 15,
98 	16, 17, 18, 19, 20, 21, 22, 23,
99 	24, 25, 26, 27, 28, 29, 30, 31,
100 	32, 33, 34, 35, 36, 37, 38, 39,
101 	40, 41, 42, 43, 44, 45, 46, 47,
102 	48, 49, 50, 51, 52, 53, 54, 55,
103 	56, 57, 58, 59, 60, 61, 62, 63,
104 };
105 const uint16_t num_vlans = RTE_DIM(vlan_tags);
106 static uint16_t num_pf_queues,  num_vmdq_queues;
107 static uint16_t vmdq_pool_base, vmdq_queue_base;
108 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
109 static struct rte_ether_addr pool_addr_template = {
110 	.addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
111 };
112 
113 /* ethernet addresses of ports */
114 static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
115 
116 #define MAX_QUEUE_NUM_10G 128
117 #define MAX_QUEUE_NUM_1G 8
118 #define MAX_POOL_MAP_NUM_10G 64
119 #define MAX_POOL_MAP_NUM_1G 32
120 #define MAX_POOL_NUM_10G 64
121 #define MAX_POOL_NUM_1G 8
122 /*
123  * Builds up the correct configuration for vmdq based on the vlan tags array
124  * given above, and determine the queue number and pool map number according to
125  * valid pool number
126  */
127 static inline int
128 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
129 {
130 	struct rte_eth_vmdq_rx_conf conf;
131 	unsigned i;
132 
133 	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
134 	conf.nb_pool_maps = num_pools;
135 	conf.enable_default_pool = 0;
136 	conf.default_pool = 0; /* set explicit value, even if not used */
137 
138 	for (i = 0; i < conf.nb_pool_maps; i++) {
139 		conf.pool_map[i].vlan_id = vlan_tags[i];
140 		conf.pool_map[i].pools = (1UL << (i % num_pools));
141 	}
142 
143 	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
144 	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
145 		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
146 	return 0;
147 }
148 
149 /*
150  * Initialises a given port using global settings and with the rx buffers
151  * coming from the mbuf_pool passed as parameter
152  */
153 static inline int
154 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
155 {
156 	struct rte_eth_dev_info dev_info;
157 	struct rte_eth_rxconf *rxconf;
158 	struct rte_eth_txconf *txconf;
159 	struct rte_eth_conf port_conf;
160 	uint16_t rxRings, txRings;
161 	uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
162 	uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
163 	int retval;
164 	uint16_t q;
165 	uint16_t queues_per_pool;
166 	uint32_t max_nb_pools;
167 
168 	/*
169 	 * The max pool number from dev_info will be used to validate the pool
170 	 * number specified in cmd line
171 	 */
172 	retval = rte_eth_dev_info_get(port, &dev_info);
173 	if (retval != 0) {
174 		printf("Error during getting device (port %u) info: %s\n",
175 				port, strerror(-retval));
176 		return retval;
177 	}
178 
179 	max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
180 	/*
181 	 * We allow to process part of VMDQ pools specified by num_pools in
182 	 * command line.
183 	 */
184 	if (num_pools > max_nb_pools) {
185 		printf("num_pools %d >max_nb_pools %d\n",
186 			num_pools, max_nb_pools);
187 		return -1;
188 	}
189 	retval = get_eth_conf(&port_conf, max_nb_pools);
190 	if (retval < 0)
191 		return retval;
192 
193 	/*
194 	 * NIC queues are divided into pf queues and vmdq queues.
195 	 */
196 	/* There is assumption here all ports have the same configuration! */
197 	num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
198 	queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
199 	num_vmdq_queues = num_pools * queues_per_pool;
200 	num_queues = num_pf_queues + num_vmdq_queues;
201 	vmdq_queue_base = dev_info.vmdq_queue_base;
202 	vmdq_pool_base  = dev_info.vmdq_pool_base;
203 
204 	printf("pf queue num: %u, configured vmdq pool num: %u,"
205 		" each vmdq pool has %u queues\n",
206 		num_pf_queues, num_pools, queues_per_pool);
207 	printf("vmdq queue base: %d pool base %d\n",
208 		vmdq_queue_base, vmdq_pool_base);
209 	if (!rte_eth_dev_is_valid_port(port))
210 		return -1;
211 
212 	/*
213 	 * Though in this example, we only receive packets from the first queue
214 	 * of each pool and send packets through first rte_lcore_count() tx
215 	 * queues of vmdq queues, all queues including pf queues are setup.
216 	 * This is because VMDQ queues doesn't always start from zero, and the
217 	 * PMD layer doesn't support selectively initialising part of rx/tx
218 	 * queues.
219 	 */
220 	rxRings = (uint16_t)dev_info.max_rx_queues;
221 	txRings = (uint16_t)dev_info.max_tx_queues;
222 
223 	retval = rte_eth_dev_info_get(port, &dev_info);
224 	if (retval != 0) {
225 		printf("Error during getting device (port %u) info: %s\n",
226 				port, strerror(-retval));
227 		return retval;
228 	}
229 
230 	if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
231 		port_conf.txmode.offloads |=
232 			DEV_TX_OFFLOAD_MBUF_FAST_FREE;
233 	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
234 	if (retval != 0)
235 		return retval;
236 
237 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
238 				&txRingSize);
239 	if (retval != 0)
240 		return retval;
241 	if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
242 			RTE_TEST_TX_DESC_DEFAULT)) {
243 		printf("Mbuf pool has an insufficient size for port %u.\n",
244 			port);
245 		return -1;
246 	}
247 
248 	rxconf = &dev_info.default_rxconf;
249 	rxconf->rx_drop_en = 1;
250 	txconf = &dev_info.default_txconf;
251 	txconf->offloads = port_conf.txmode.offloads;
252 	for (q = 0; q < rxRings; q++) {
253 		retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
254 					rte_eth_dev_socket_id(port),
255 					rxconf,
256 					mbuf_pool);
257 		if (retval < 0) {
258 			printf("initialise rx queue %d failed\n", q);
259 			return retval;
260 		}
261 	}
262 
263 	for (q = 0; q < txRings; q++) {
264 		retval = rte_eth_tx_queue_setup(port, q, txRingSize,
265 					rte_eth_dev_socket_id(port),
266 					txconf);
267 		if (retval < 0) {
268 			printf("initialise tx queue %d failed\n", q);
269 			return retval;
270 		}
271 	}
272 
273 	retval  = rte_eth_dev_start(port);
274 	if (retval < 0) {
275 		printf("port %d start failed\n", port);
276 		return retval;
277 	}
278 
279 	rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
280 	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
281 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
282 			(unsigned)port,
283 			vmdq_ports_eth_addr[port].addr_bytes[0],
284 			vmdq_ports_eth_addr[port].addr_bytes[1],
285 			vmdq_ports_eth_addr[port].addr_bytes[2],
286 			vmdq_ports_eth_addr[port].addr_bytes[3],
287 			vmdq_ports_eth_addr[port].addr_bytes[4],
288 			vmdq_ports_eth_addr[port].addr_bytes[5]);
289 
290 	/*
291 	 * Set mac for each pool.
292 	 * There is no default mac for the pools in i40.
293 	 * Removes this after i40e fixes this issue.
294 	 */
295 	for (q = 0; q < num_pools; q++) {
296 		struct rte_ether_addr mac;
297 		mac = pool_addr_template;
298 		mac.addr_bytes[4] = port;
299 		mac.addr_bytes[5] = q;
300 		printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
301 			port, q,
302 			mac.addr_bytes[0], mac.addr_bytes[1],
303 			mac.addr_bytes[2], mac.addr_bytes[3],
304 			mac.addr_bytes[4], mac.addr_bytes[5]);
305 		retval = rte_eth_dev_mac_addr_add(port, &mac,
306 				q + vmdq_pool_base);
307 		if (retval) {
308 			printf("mac addr add failed at pool %d\n", q);
309 			return retval;
310 		}
311 	}
312 
313 	return 0;
314 }
315 
316 /* Check num_pools parameter and set it if OK*/
317 static int
318 vmdq_parse_num_pools(const char *q_arg)
319 {
320 	char *end = NULL;
321 	int n;
322 
323 	/* parse number string */
324 	n = strtol(q_arg, &end, 10);
325 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
326 		return -1;
327 
328 	if (num_pools > num_vlans) {
329 		printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
330 		return -1;
331 	}
332 
333 	num_pools = n;
334 
335 	return 0;
336 }
337 
338 
339 static int
340 parse_portmask(const char *portmask)
341 {
342 	char *end = NULL;
343 	unsigned long pm;
344 
345 	/* parse hexadecimal string */
346 	pm = strtoul(portmask, &end, 16);
347 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
348 		return -1;
349 
350 	if (pm == 0)
351 		return -1;
352 
353 	return pm;
354 }
355 
356 /* Display usage */
357 static void
358 vmdq_usage(const char *prgname)
359 {
360 	printf("%s [EAL options] -- -p PORTMASK]\n"
361 	"  --nb-pools NP: number of pools\n",
362 	       prgname);
363 }
364 
365 /*  Parse the argument (num_pools) given in the command line of the application */
366 static int
367 vmdq_parse_args(int argc, char **argv)
368 {
369 	int opt;
370 	int option_index;
371 	unsigned i;
372 	const char *prgname = argv[0];
373 	static struct option long_option[] = {
374 		{"nb-pools", required_argument, NULL, 0},
375 		{NULL, 0, 0, 0}
376 	};
377 
378 	/* Parse command line */
379 	while ((opt = getopt_long(argc, argv, "p:", long_option,
380 		&option_index)) != EOF) {
381 		switch (opt) {
382 		/* portmask */
383 		case 'p':
384 			enabled_port_mask = parse_portmask(optarg);
385 			if (enabled_port_mask == 0) {
386 				printf("invalid portmask\n");
387 				vmdq_usage(prgname);
388 				return -1;
389 			}
390 			break;
391 		case 0:
392 			if (vmdq_parse_num_pools(optarg) == -1) {
393 				printf("invalid number of pools\n");
394 				vmdq_usage(prgname);
395 				return -1;
396 			}
397 			break;
398 
399 		default:
400 			vmdq_usage(prgname);
401 			return -1;
402 		}
403 	}
404 
405 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
406 		if (enabled_port_mask & (1 << i))
407 			ports[num_ports++] = (uint8_t)i;
408 	}
409 
410 	if (num_ports < 2 || num_ports % 2) {
411 		printf("Current enabled port number is %u,"
412 			"but it should be even and at least 2\n", num_ports);
413 		return -1;
414 	}
415 
416 	return 0;
417 }
418 
419 static void
420 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
421 {
422 	struct rte_ether_hdr *eth;
423 	void *tmp;
424 
425 	eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
426 
427 	/* 02:00:00:00:00:xx */
428 	tmp = &eth->d_addr.addr_bytes[0];
429 	*((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
430 
431 	/* src addr */
432 	rte_ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
433 }
434 
435 /* When we receive a HUP signal, print out our stats */
436 static void
437 sighup_handler(int signum)
438 {
439 	unsigned q;
440 	for (q = 0; q < num_queues; q++) {
441 		if (q % (num_queues/num_pools) == 0)
442 			printf("\nPool %u: ", q/(num_queues/num_pools));
443 		printf("%lu ", rxPackets[q]);
444 	}
445 	printf("\nFinished handling signal %d\n", signum);
446 }
447 
448 /*
449  * Main thread that does the work, reading from INPUT_PORT
450  * and writing to OUTPUT_PORT
451  */
452 static int
453 lcore_main(__attribute__((__unused__)) void *dummy)
454 {
455 	const uint16_t lcore_id = (uint16_t)rte_lcore_id();
456 	const uint16_t num_cores = (uint16_t)rte_lcore_count();
457 	uint16_t core_id = 0;
458 	uint16_t startQueue, endQueue;
459 	uint16_t q, i, p;
460 	const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
461 
462 	for (i = 0; i < num_cores; i++)
463 		if (lcore_ids[i] == lcore_id) {
464 			core_id = i;
465 			break;
466 		}
467 
468 	if (remainder != 0) {
469 		if (core_id < remainder) {
470 			startQueue = (uint16_t)(core_id *
471 					(num_vmdq_queues / num_cores + 1));
472 			endQueue = (uint16_t)(startQueue +
473 					(num_vmdq_queues / num_cores) + 1);
474 		} else {
475 			startQueue = (uint16_t)(core_id *
476 					(num_vmdq_queues / num_cores) +
477 					remainder);
478 			endQueue = (uint16_t)(startQueue +
479 					(num_vmdq_queues / num_cores));
480 		}
481 	} else {
482 		startQueue = (uint16_t)(core_id *
483 				(num_vmdq_queues / num_cores));
484 		endQueue = (uint16_t)(startQueue +
485 				(num_vmdq_queues / num_cores));
486 	}
487 
488 	/* vmdq queue idx doesn't always start from zero.*/
489 	startQueue += vmdq_queue_base;
490 	endQueue   += vmdq_queue_base;
491 	printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
492 		(unsigned)lcore_id, startQueue, endQueue - 1);
493 
494 	if (startQueue == endQueue) {
495 		printf("lcore %u has nothing to do\n", lcore_id);
496 		return 0;
497 	}
498 
499 	for (;;) {
500 		struct rte_mbuf *buf[MAX_PKT_BURST];
501 		const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
502 
503 		for (p = 0; p < num_ports; p++) {
504 			const uint8_t sport = ports[p];
505 			/* 0 <-> 1, 2 <-> 3 etc */
506 			const uint8_t dport = ports[p ^ 1];
507 			if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
508 				continue;
509 
510 			for (q = startQueue; q < endQueue; q++) {
511 				const uint16_t rxCount = rte_eth_rx_burst(sport,
512 					q, buf, buf_size);
513 
514 				if (unlikely(rxCount == 0))
515 					continue;
516 
517 				rxPackets[q] += rxCount;
518 
519 				for (i = 0; i < rxCount; i++)
520 					update_mac_address(buf[i], dport);
521 
522 				const uint16_t txCount = rte_eth_tx_burst(dport,
523 					vmdq_queue_base + core_id,
524 					buf,
525 					rxCount);
526 
527 				if (txCount != rxCount) {
528 					for (i = txCount; i < rxCount; i++)
529 						rte_pktmbuf_free(buf[i]);
530 				}
531 			}
532 		}
533 	}
534 }
535 
536 /*
537  * Update the global var NUM_PORTS and array PORTS according to system ports number
538  * and return valid ports number
539  */
540 static unsigned check_ports_num(unsigned nb_ports)
541 {
542 	unsigned valid_num_ports = num_ports;
543 	unsigned portid;
544 
545 	if (num_ports > nb_ports) {
546 		printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
547 			num_ports, nb_ports);
548 		num_ports = nb_ports;
549 	}
550 
551 	for (portid = 0; portid < num_ports; portid++) {
552 		if (!rte_eth_dev_is_valid_port(ports[portid])) {
553 			printf("\nSpecified port ID(%u) is not valid\n",
554 				ports[portid]);
555 			ports[portid] = INVALID_PORT_ID;
556 			valid_num_ports--;
557 		}
558 	}
559 	return valid_num_ports;
560 }
561 
562 /* Main function, does initialisation and calls the per-lcore functions */
563 int
564 main(int argc, char *argv[])
565 {
566 	struct rte_mempool *mbuf_pool;
567 	unsigned lcore_id, core_id = 0;
568 	int ret;
569 	unsigned nb_ports, valid_num_ports;
570 	uint16_t portid;
571 
572 	signal(SIGHUP, sighup_handler);
573 
574 	/* init EAL */
575 	ret = rte_eal_init(argc, argv);
576 	if (ret < 0)
577 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
578 	argc -= ret;
579 	argv += ret;
580 
581 	/* parse app arguments */
582 	ret = vmdq_parse_args(argc, argv);
583 	if (ret < 0)
584 		rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
585 
586 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
587 		if (rte_lcore_is_enabled(lcore_id))
588 			lcore_ids[core_id++] = lcore_id;
589 
590 	if (rte_lcore_count() > RTE_MAX_LCORE)
591 		rte_exit(EXIT_FAILURE, "Not enough cores\n");
592 
593 	nb_ports = rte_eth_dev_count_avail();
594 
595 	/*
596 	 * Update the global var NUM_PORTS and global array PORTS
597 	 * and get value of var VALID_NUM_PORTS according to system ports number
598 	 */
599 	valid_num_ports = check_ports_num(nb_ports);
600 
601 	if (valid_num_ports < 2 || valid_num_ports % 2) {
602 		printf("Current valid ports number is %u\n", valid_num_ports);
603 		rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
604 	}
605 
606 	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
607 		NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
608 		0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
609 	if (mbuf_pool == NULL)
610 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
611 
612 	/* initialize all ports */
613 	RTE_ETH_FOREACH_DEV(portid) {
614 		/* skip ports that are not enabled */
615 		if ((enabled_port_mask & (1 << portid)) == 0) {
616 			printf("\nSkipping disabled port %d\n", portid);
617 			continue;
618 		}
619 		if (port_init(portid, mbuf_pool) != 0)
620 			rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
621 	}
622 
623 	/* call lcore_main() on every lcore */
624 	rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
625 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
626 		if (rte_eal_wait_lcore(lcore_id) < 0)
627 			return -1;
628 	}
629 
630 	return 0;
631 }
632