xref: /dpdk/examples/vmdq/main.c (revision 117eaa70584b73eebf6f648cf3ee6f2ab03264a0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <sys/queue.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <inttypes.h>
15 #include <getopt.h>
16 
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_memory.h>
20 #include <rte_memcpy.h>
21 #include <rte_eal.h>
22 #include <rte_launch.h>
23 #include <rte_atomic.h>
24 #include <rte_cycles.h>
25 #include <rte_prefetch.h>
26 #include <rte_lcore.h>
27 #include <rte_per_lcore.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_interrupts.h>
30 #include <rte_random.h>
31 #include <rte_debug.h>
32 #include <rte_ether.h>
33 #include <rte_ethdev.h>
34 #include <rte_mempool.h>
35 #include <rte_mbuf.h>
36 
37 #define MAX_QUEUES 1024
38 /*
39  * 1024 queues require to meet the needs of a large number of vmdq_pools.
40  * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
41  */
42 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
43 						RTE_TEST_TX_DESC_DEFAULT))
44 #define MBUF_CACHE_SIZE 64
45 
46 #define MAX_PKT_BURST 32
47 
48 /*
49  * Configurable number of RX/TX ring descriptors
50  */
51 #define RTE_TEST_RX_DESC_DEFAULT 1024
52 #define RTE_TEST_TX_DESC_DEFAULT 1024
53 
54 #define INVALID_PORT_ID 0xFF
55 
56 /* mask of enabled ports */
57 static uint32_t enabled_port_mask;
58 
59 /* number of pools (if user does not specify any, 8 by default */
60 static uint32_t num_queues = 8;
61 static uint32_t num_pools = 8;
62 
63 /* empty vmdq configuration structure. Filled in programatically */
64 static const struct rte_eth_conf vmdq_conf_default = {
65 	.rxmode = {
66 		.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
67 		.split_hdr_size = 0,
68 		.ignore_offload_bitfield = 1,
69 	},
70 
71 	.txmode = {
72 		.mq_mode = ETH_MQ_TX_NONE,
73 	},
74 	.rx_adv_conf = {
75 		/*
76 		 * should be overridden separately in code with
77 		 * appropriate values
78 		 */
79 		.vmdq_rx_conf = {
80 			.nb_queue_pools = ETH_8_POOLS,
81 			.enable_default_pool = 0,
82 			.default_pool = 0,
83 			.nb_pool_maps = 0,
84 			.pool_map = {{0, 0},},
85 		},
86 	},
87 };
88 
89 static unsigned lcore_ids[RTE_MAX_LCORE];
90 static uint16_t ports[RTE_MAX_ETHPORTS];
91 static unsigned num_ports; /**< The number of ports specified in command line */
92 
93 /* array used for printing out statistics */
94 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
95 
96 const uint16_t vlan_tags[] = {
97 	0,  1,  2,  3,  4,  5,  6,  7,
98 	8,  9, 10, 11,	12, 13, 14, 15,
99 	16, 17, 18, 19, 20, 21, 22, 23,
100 	24, 25, 26, 27, 28, 29, 30, 31,
101 	32, 33, 34, 35, 36, 37, 38, 39,
102 	40, 41, 42, 43, 44, 45, 46, 47,
103 	48, 49, 50, 51, 52, 53, 54, 55,
104 	56, 57, 58, 59, 60, 61, 62, 63,
105 };
106 const uint16_t num_vlans = RTE_DIM(vlan_tags);
107 static uint16_t num_pf_queues,  num_vmdq_queues;
108 static uint16_t vmdq_pool_base, vmdq_queue_base;
109 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
110 static struct ether_addr pool_addr_template = {
111 	.addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
112 };
113 
114 /* ethernet addresses of ports */
115 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
116 
117 #define MAX_QUEUE_NUM_10G 128
118 #define MAX_QUEUE_NUM_1G 8
119 #define MAX_POOL_MAP_NUM_10G 64
120 #define MAX_POOL_MAP_NUM_1G 32
121 #define MAX_POOL_NUM_10G 64
122 #define MAX_POOL_NUM_1G 8
123 /*
124  * Builds up the correct configuration for vmdq based on the vlan tags array
125  * given above, and determine the queue number and pool map number according to
126  * valid pool number
127  */
128 static inline int
129 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
130 {
131 	struct rte_eth_vmdq_rx_conf conf;
132 	unsigned i;
133 
134 	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
135 	conf.nb_pool_maps = num_pools;
136 	conf.enable_default_pool = 0;
137 	conf.default_pool = 0; /* set explicit value, even if not used */
138 
139 	for (i = 0; i < conf.nb_pool_maps; i++) {
140 		conf.pool_map[i].vlan_id = vlan_tags[i];
141 		conf.pool_map[i].pools = (1UL << (i % num_pools));
142 	}
143 
144 	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
145 	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
146 		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
147 	return 0;
148 }
149 
150 /*
151  * Initialises a given port using global settings and with the rx buffers
152  * coming from the mbuf_pool passed as parameter
153  */
154 static inline int
155 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
156 {
157 	struct rte_eth_dev_info dev_info;
158 	struct rte_eth_rxconf *rxconf;
159 	struct rte_eth_txconf *txconf;
160 	struct rte_eth_conf port_conf;
161 	uint16_t rxRings, txRings;
162 	uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
163 	uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
164 	int retval;
165 	uint16_t q;
166 	uint16_t queues_per_pool;
167 	uint32_t max_nb_pools;
168 
169 	/*
170 	 * The max pool number from dev_info will be used to validate the pool
171 	 * number specified in cmd line
172 	 */
173 	rte_eth_dev_info_get(port, &dev_info);
174 	max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
175 	/*
176 	 * We allow to process part of VMDQ pools specified by num_pools in
177 	 * command line.
178 	 */
179 	if (num_pools > max_nb_pools) {
180 		printf("num_pools %d >max_nb_pools %d\n",
181 			num_pools, max_nb_pools);
182 		return -1;
183 	}
184 	retval = get_eth_conf(&port_conf, max_nb_pools);
185 	if (retval < 0)
186 		return retval;
187 
188 	/*
189 	 * NIC queues are divided into pf queues and vmdq queues.
190 	 */
191 	/* There is assumption here all ports have the same configuration! */
192 	num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
193 	queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
194 	num_vmdq_queues = num_pools * queues_per_pool;
195 	num_queues = num_pf_queues + num_vmdq_queues;
196 	vmdq_queue_base = dev_info.vmdq_queue_base;
197 	vmdq_pool_base  = dev_info.vmdq_pool_base;
198 
199 	printf("pf queue num: %u, configured vmdq pool num: %u,"
200 		" each vmdq pool has %u queues\n",
201 		num_pf_queues, num_pools, queues_per_pool);
202 	printf("vmdq queue base: %d pool base %d\n",
203 		vmdq_queue_base, vmdq_pool_base);
204 	if (port >= rte_eth_dev_count())
205 		return -1;
206 
207 	/*
208 	 * Though in this example, we only receive packets from the first queue
209 	 * of each pool and send packets through first rte_lcore_count() tx
210 	 * queues of vmdq queues, all queues including pf queues are setup.
211 	 * This is because VMDQ queues doesn't always start from zero, and the
212 	 * PMD layer doesn't support selectively initialising part of rx/tx
213 	 * queues.
214 	 */
215 	rxRings = (uint16_t)dev_info.max_rx_queues;
216 	txRings = (uint16_t)dev_info.max_tx_queues;
217 
218 	rte_eth_dev_info_get(port, &dev_info);
219 	if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
220 		port_conf.txmode.offloads |=
221 			DEV_TX_OFFLOAD_MBUF_FAST_FREE;
222 	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
223 	if (retval != 0)
224 		return retval;
225 
226 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
227 				&txRingSize);
228 	if (retval != 0)
229 		return retval;
230 	if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
231 			RTE_TEST_TX_DESC_DEFAULT)) {
232 		printf("Mbuf pool has an insufficient size for port %u.\n",
233 			port);
234 		return -1;
235 	}
236 
237 	rxconf = &dev_info.default_rxconf;
238 	rxconf->rx_drop_en = 1;
239 	txconf = &dev_info.default_txconf;
240 	txconf->txq_flags = ETH_TXQ_FLAGS_IGNORE;
241 	txconf->offloads = port_conf.txmode.offloads;
242 	for (q = 0; q < rxRings; q++) {
243 		retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
244 					rte_eth_dev_socket_id(port),
245 					rxconf,
246 					mbuf_pool);
247 		if (retval < 0) {
248 			printf("initialise rx queue %d failed\n", q);
249 			return retval;
250 		}
251 	}
252 
253 	for (q = 0; q < txRings; q++) {
254 		retval = rte_eth_tx_queue_setup(port, q, txRingSize,
255 					rte_eth_dev_socket_id(port),
256 					txconf);
257 		if (retval < 0) {
258 			printf("initialise tx queue %d failed\n", q);
259 			return retval;
260 		}
261 	}
262 
263 	retval  = rte_eth_dev_start(port);
264 	if (retval < 0) {
265 		printf("port %d start failed\n", port);
266 		return retval;
267 	}
268 
269 	rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
270 	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
271 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
272 			(unsigned)port,
273 			vmdq_ports_eth_addr[port].addr_bytes[0],
274 			vmdq_ports_eth_addr[port].addr_bytes[1],
275 			vmdq_ports_eth_addr[port].addr_bytes[2],
276 			vmdq_ports_eth_addr[port].addr_bytes[3],
277 			vmdq_ports_eth_addr[port].addr_bytes[4],
278 			vmdq_ports_eth_addr[port].addr_bytes[5]);
279 
280 	/*
281 	 * Set mac for each pool.
282 	 * There is no default mac for the pools in i40.
283 	 * Removes this after i40e fixes this issue.
284 	 */
285 	for (q = 0; q < num_pools; q++) {
286 		struct ether_addr mac;
287 		mac = pool_addr_template;
288 		mac.addr_bytes[4] = port;
289 		mac.addr_bytes[5] = q;
290 		printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
291 			port, q,
292 			mac.addr_bytes[0], mac.addr_bytes[1],
293 			mac.addr_bytes[2], mac.addr_bytes[3],
294 			mac.addr_bytes[4], mac.addr_bytes[5]);
295 		retval = rte_eth_dev_mac_addr_add(port, &mac,
296 				q + vmdq_pool_base);
297 		if (retval) {
298 			printf("mac addr add failed at pool %d\n", q);
299 			return retval;
300 		}
301 	}
302 
303 	return 0;
304 }
305 
306 /* Check num_pools parameter and set it if OK*/
307 static int
308 vmdq_parse_num_pools(const char *q_arg)
309 {
310 	char *end = NULL;
311 	int n;
312 
313 	/* parse number string */
314 	n = strtol(q_arg, &end, 10);
315 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
316 		return -1;
317 
318 	if (num_pools > num_vlans) {
319 		printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
320 		return -1;
321 	}
322 
323 	num_pools = n;
324 
325 	return 0;
326 }
327 
328 
329 static int
330 parse_portmask(const char *portmask)
331 {
332 	char *end = NULL;
333 	unsigned long pm;
334 
335 	/* parse hexadecimal string */
336 	pm = strtoul(portmask, &end, 16);
337 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
338 		return -1;
339 
340 	if (pm == 0)
341 		return -1;
342 
343 	return pm;
344 }
345 
346 /* Display usage */
347 static void
348 vmdq_usage(const char *prgname)
349 {
350 	printf("%s [EAL options] -- -p PORTMASK]\n"
351 	"  --nb-pools NP: number of pools\n",
352 	       prgname);
353 }
354 
355 /*  Parse the argument (num_pools) given in the command line of the application */
356 static int
357 vmdq_parse_args(int argc, char **argv)
358 {
359 	int opt;
360 	int option_index;
361 	unsigned i;
362 	const char *prgname = argv[0];
363 	static struct option long_option[] = {
364 		{"nb-pools", required_argument, NULL, 0},
365 		{NULL, 0, 0, 0}
366 	};
367 
368 	/* Parse command line */
369 	while ((opt = getopt_long(argc, argv, "p:", long_option,
370 		&option_index)) != EOF) {
371 		switch (opt) {
372 		/* portmask */
373 		case 'p':
374 			enabled_port_mask = parse_portmask(optarg);
375 			if (enabled_port_mask == 0) {
376 				printf("invalid portmask\n");
377 				vmdq_usage(prgname);
378 				return -1;
379 			}
380 			break;
381 		case 0:
382 			if (vmdq_parse_num_pools(optarg) == -1) {
383 				printf("invalid number of pools\n");
384 				vmdq_usage(prgname);
385 				return -1;
386 			}
387 			break;
388 
389 		default:
390 			vmdq_usage(prgname);
391 			return -1;
392 		}
393 	}
394 
395 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
396 		if (enabled_port_mask & (1 << i))
397 			ports[num_ports++] = (uint8_t)i;
398 	}
399 
400 	if (num_ports < 2 || num_ports % 2) {
401 		printf("Current enabled port number is %u,"
402 			"but it should be even and at least 2\n", num_ports);
403 		return -1;
404 	}
405 
406 	return 0;
407 }
408 
409 static void
410 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
411 {
412 	struct ether_hdr *eth;
413 	void *tmp;
414 
415 	eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
416 
417 	/* 02:00:00:00:00:xx */
418 	tmp = &eth->d_addr.addr_bytes[0];
419 	*((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
420 
421 	/* src addr */
422 	ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
423 }
424 
425 /* When we receive a HUP signal, print out our stats */
426 static void
427 sighup_handler(int signum)
428 {
429 	unsigned q;
430 	for (q = 0; q < num_queues; q++) {
431 		if (q % (num_queues/num_pools) == 0)
432 			printf("\nPool %u: ", q/(num_queues/num_pools));
433 		printf("%lu ", rxPackets[q]);
434 	}
435 	printf("\nFinished handling signal %d\n", signum);
436 }
437 
438 /*
439  * Main thread that does the work, reading from INPUT_PORT
440  * and writing to OUTPUT_PORT
441  */
442 static int
443 lcore_main(__attribute__((__unused__)) void *dummy)
444 {
445 	const uint16_t lcore_id = (uint16_t)rte_lcore_id();
446 	const uint16_t num_cores = (uint16_t)rte_lcore_count();
447 	uint16_t core_id = 0;
448 	uint16_t startQueue, endQueue;
449 	uint16_t q, i, p;
450 	const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
451 
452 	for (i = 0; i < num_cores; i++)
453 		if (lcore_ids[i] == lcore_id) {
454 			core_id = i;
455 			break;
456 		}
457 
458 	if (remainder != 0) {
459 		if (core_id < remainder) {
460 			startQueue = (uint16_t)(core_id *
461 					(num_vmdq_queues / num_cores + 1));
462 			endQueue = (uint16_t)(startQueue +
463 					(num_vmdq_queues / num_cores) + 1);
464 		} else {
465 			startQueue = (uint16_t)(core_id *
466 					(num_vmdq_queues / num_cores) +
467 					remainder);
468 			endQueue = (uint16_t)(startQueue +
469 					(num_vmdq_queues / num_cores));
470 		}
471 	} else {
472 		startQueue = (uint16_t)(core_id *
473 				(num_vmdq_queues / num_cores));
474 		endQueue = (uint16_t)(startQueue +
475 				(num_vmdq_queues / num_cores));
476 	}
477 
478 	/* vmdq queue idx doesn't always start from zero.*/
479 	startQueue += vmdq_queue_base;
480 	endQueue   += vmdq_queue_base;
481 	printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
482 		(unsigned)lcore_id, startQueue, endQueue - 1);
483 
484 	if (startQueue == endQueue) {
485 		printf("lcore %u has nothing to do\n", lcore_id);
486 		return 0;
487 	}
488 
489 	for (;;) {
490 		struct rte_mbuf *buf[MAX_PKT_BURST];
491 		const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
492 
493 		for (p = 0; p < num_ports; p++) {
494 			const uint8_t sport = ports[p];
495 			/* 0 <-> 1, 2 <-> 3 etc */
496 			const uint8_t dport = ports[p ^ 1];
497 			if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
498 				continue;
499 
500 			for (q = startQueue; q < endQueue; q++) {
501 				const uint16_t rxCount = rte_eth_rx_burst(sport,
502 					q, buf, buf_size);
503 
504 				if (unlikely(rxCount == 0))
505 					continue;
506 
507 				rxPackets[q] += rxCount;
508 
509 				for (i = 0; i < rxCount; i++)
510 					update_mac_address(buf[i], dport);
511 
512 				const uint16_t txCount = rte_eth_tx_burst(dport,
513 					vmdq_queue_base + core_id,
514 					buf,
515 					rxCount);
516 
517 				if (txCount != rxCount) {
518 					for (i = txCount; i < rxCount; i++)
519 						rte_pktmbuf_free(buf[i]);
520 				}
521 			}
522 		}
523 	}
524 }
525 
526 /*
527  * Update the global var NUM_PORTS and array PORTS according to system ports number
528  * and return valid ports number
529  */
530 static unsigned check_ports_num(unsigned nb_ports)
531 {
532 	unsigned valid_num_ports = num_ports;
533 	unsigned portid;
534 
535 	if (num_ports > nb_ports) {
536 		printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
537 			num_ports, nb_ports);
538 		num_ports = nb_ports;
539 	}
540 
541 	for (portid = 0; portid < num_ports; portid++) {
542 		if (ports[portid] >= nb_ports) {
543 			printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
544 				ports[portid], (nb_ports - 1));
545 			ports[portid] = INVALID_PORT_ID;
546 			valid_num_ports--;
547 		}
548 	}
549 	return valid_num_ports;
550 }
551 
552 /* Main function, does initialisation and calls the per-lcore functions */
553 int
554 main(int argc, char *argv[])
555 {
556 	struct rte_mempool *mbuf_pool;
557 	unsigned lcore_id, core_id = 0;
558 	int ret;
559 	unsigned nb_ports, valid_num_ports;
560 	uint16_t portid;
561 
562 	signal(SIGHUP, sighup_handler);
563 
564 	/* init EAL */
565 	ret = rte_eal_init(argc, argv);
566 	if (ret < 0)
567 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
568 	argc -= ret;
569 	argv += ret;
570 
571 	/* parse app arguments */
572 	ret = vmdq_parse_args(argc, argv);
573 	if (ret < 0)
574 		rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
575 
576 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
577 		if (rte_lcore_is_enabled(lcore_id))
578 			lcore_ids[core_id++] = lcore_id;
579 
580 	if (rte_lcore_count() > RTE_MAX_LCORE)
581 		rte_exit(EXIT_FAILURE, "Not enough cores\n");
582 
583 	nb_ports = rte_eth_dev_count();
584 
585 	/*
586 	 * Update the global var NUM_PORTS and global array PORTS
587 	 * and get value of var VALID_NUM_PORTS according to system ports number
588 	 */
589 	valid_num_ports = check_ports_num(nb_ports);
590 
591 	if (valid_num_ports < 2 || valid_num_ports % 2) {
592 		printf("Current valid ports number is %u\n", valid_num_ports);
593 		rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
594 	}
595 
596 	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
597 		NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
598 		0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
599 	if (mbuf_pool == NULL)
600 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
601 
602 	/* initialize all ports */
603 	for (portid = 0; portid < nb_ports; portid++) {
604 		/* skip ports that are not enabled */
605 		if ((enabled_port_mask & (1 << portid)) == 0) {
606 			printf("\nSkipping disabled port %d\n", portid);
607 			continue;
608 		}
609 		if (port_init(portid, mbuf_pool) != 0)
610 			rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
611 	}
612 
613 	/* call lcore_main() on every lcore */
614 	rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
615 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
616 		if (rte_eal_wait_lcore(lcore_id) < 0)
617 			return -1;
618 	}
619 
620 	return 0;
621 }
622