xref: /dpdk/examples/vmdq/main.c (revision 3998e2a07220844d3f3c17f76a781ced3efe0de0)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <sys/queue.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <inttypes.h>
15 #include <getopt.h>
16 
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_memory.h>
20 #include <rte_memcpy.h>
21 #include <rte_eal.h>
22 #include <rte_launch.h>
23 #include <rte_atomic.h>
24 #include <rte_cycles.h>
25 #include <rte_prefetch.h>
26 #include <rte_lcore.h>
27 #include <rte_per_lcore.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_interrupts.h>
30 #include <rte_random.h>
31 #include <rte_debug.h>
32 #include <rte_ether.h>
33 #include <rte_ethdev.h>
34 #include <rte_mempool.h>
35 #include <rte_mbuf.h>
36 
37 #define MAX_QUEUES 1024
38 /*
39  * 1024 queues require to meet the needs of a large number of vmdq_pools.
40  * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
41  */
42 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
43 						RTE_TEST_TX_DESC_DEFAULT))
44 #define MBUF_CACHE_SIZE 64
45 
46 #define MAX_PKT_BURST 32
47 
48 /*
49  * Configurable number of RX/TX ring descriptors
50  */
51 #define RTE_TEST_RX_DESC_DEFAULT 128
52 #define RTE_TEST_TX_DESC_DEFAULT 512
53 
54 #define INVALID_PORT_ID 0xFF
55 
56 /* mask of enabled ports */
57 static uint32_t enabled_port_mask;
58 
59 /* number of pools (if user does not specify any, 8 by default */
60 static uint32_t num_queues = 8;
61 static uint32_t num_pools = 8;
62 
63 /* empty vmdq configuration structure. Filled in programatically */
64 static const struct rte_eth_conf vmdq_conf_default = {
65 	.rxmode = {
66 		.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
67 		.split_hdr_size = 0,
68 		.header_split   = 0, /**< Header Split disabled */
69 		.hw_ip_checksum = 0, /**< IP checksum offload disabled */
70 		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
71 		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
72 	},
73 
74 	.txmode = {
75 		.mq_mode = ETH_MQ_TX_NONE,
76 	},
77 	.rx_adv_conf = {
78 		/*
79 		 * should be overridden separately in code with
80 		 * appropriate values
81 		 */
82 		.vmdq_rx_conf = {
83 			.nb_queue_pools = ETH_8_POOLS,
84 			.enable_default_pool = 0,
85 			.default_pool = 0,
86 			.nb_pool_maps = 0,
87 			.pool_map = {{0, 0},},
88 		},
89 	},
90 };
91 
92 static unsigned lcore_ids[RTE_MAX_LCORE];
93 static uint16_t ports[RTE_MAX_ETHPORTS];
94 static unsigned num_ports; /**< The number of ports specified in command line */
95 
96 /* array used for printing out statistics */
97 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
98 
99 const uint16_t vlan_tags[] = {
100 	0,  1,  2,  3,  4,  5,  6,  7,
101 	8,  9, 10, 11,	12, 13, 14, 15,
102 	16, 17, 18, 19, 20, 21, 22, 23,
103 	24, 25, 26, 27, 28, 29, 30, 31,
104 	32, 33, 34, 35, 36, 37, 38, 39,
105 	40, 41, 42, 43, 44, 45, 46, 47,
106 	48, 49, 50, 51, 52, 53, 54, 55,
107 	56, 57, 58, 59, 60, 61, 62, 63,
108 };
109 const uint16_t num_vlans = RTE_DIM(vlan_tags);
110 static uint16_t num_pf_queues,  num_vmdq_queues;
111 static uint16_t vmdq_pool_base, vmdq_queue_base;
112 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
113 static struct ether_addr pool_addr_template = {
114 	.addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
115 };
116 
117 /* ethernet addresses of ports */
118 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
119 
120 #define MAX_QUEUE_NUM_10G 128
121 #define MAX_QUEUE_NUM_1G 8
122 #define MAX_POOL_MAP_NUM_10G 64
123 #define MAX_POOL_MAP_NUM_1G 32
124 #define MAX_POOL_NUM_10G 64
125 #define MAX_POOL_NUM_1G 8
126 /*
127  * Builds up the correct configuration for vmdq based on the vlan tags array
128  * given above, and determine the queue number and pool map number according to
129  * valid pool number
130  */
131 static inline int
132 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
133 {
134 	struct rte_eth_vmdq_rx_conf conf;
135 	unsigned i;
136 
137 	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
138 	conf.nb_pool_maps = num_pools;
139 	conf.enable_default_pool = 0;
140 	conf.default_pool = 0; /* set explicit value, even if not used */
141 
142 	for (i = 0; i < conf.nb_pool_maps; i++) {
143 		conf.pool_map[i].vlan_id = vlan_tags[i];
144 		conf.pool_map[i].pools = (1UL << (i % num_pools));
145 	}
146 
147 	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
148 	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
149 		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
150 	return 0;
151 }
152 
153 /*
154  * Initialises a given port using global settings and with the rx buffers
155  * coming from the mbuf_pool passed as parameter
156  */
157 static inline int
158 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
159 {
160 	struct rte_eth_dev_info dev_info;
161 	struct rte_eth_rxconf *rxconf;
162 	struct rte_eth_conf port_conf;
163 	uint16_t rxRings, txRings;
164 	uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
165 	uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
166 	int retval;
167 	uint16_t q;
168 	uint16_t queues_per_pool;
169 	uint32_t max_nb_pools;
170 
171 	/*
172 	 * The max pool number from dev_info will be used to validate the pool
173 	 * number specified in cmd line
174 	 */
175 	rte_eth_dev_info_get(port, &dev_info);
176 	max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
177 	/*
178 	 * We allow to process part of VMDQ pools specified by num_pools in
179 	 * command line.
180 	 */
181 	if (num_pools > max_nb_pools) {
182 		printf("num_pools %d >max_nb_pools %d\n",
183 			num_pools, max_nb_pools);
184 		return -1;
185 	}
186 	retval = get_eth_conf(&port_conf, max_nb_pools);
187 	if (retval < 0)
188 		return retval;
189 
190 	/*
191 	 * NIC queues are divided into pf queues and vmdq queues.
192 	 */
193 	/* There is assumption here all ports have the same configuration! */
194 	num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
195 	queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
196 	num_vmdq_queues = num_pools * queues_per_pool;
197 	num_queues = num_pf_queues + num_vmdq_queues;
198 	vmdq_queue_base = dev_info.vmdq_queue_base;
199 	vmdq_pool_base  = dev_info.vmdq_pool_base;
200 
201 	printf("pf queue num: %u, configured vmdq pool num: %u,"
202 		" each vmdq pool has %u queues\n",
203 		num_pf_queues, num_pools, queues_per_pool);
204 	printf("vmdq queue base: %d pool base %d\n",
205 		vmdq_queue_base, vmdq_pool_base);
206 	if (port >= rte_eth_dev_count())
207 		return -1;
208 
209 	/*
210 	 * Though in this example, we only receive packets from the first queue
211 	 * of each pool and send packets through first rte_lcore_count() tx
212 	 * queues of vmdq queues, all queues including pf queues are setup.
213 	 * This is because VMDQ queues doesn't always start from zero, and the
214 	 * PMD layer doesn't support selectively initialising part of rx/tx
215 	 * queues.
216 	 */
217 	rxRings = (uint16_t)dev_info.max_rx_queues;
218 	txRings = (uint16_t)dev_info.max_tx_queues;
219 	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
220 	if (retval != 0)
221 		return retval;
222 
223 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
224 				&txRingSize);
225 	if (retval != 0)
226 		return retval;
227 	if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
228 			RTE_TEST_TX_DESC_DEFAULT)) {
229 		printf("Mbuf pool has an insufficient size for port %u.\n",
230 			port);
231 		return -1;
232 	}
233 
234 	rte_eth_dev_info_get(port, &dev_info);
235 	rxconf = &dev_info.default_rxconf;
236 	rxconf->rx_drop_en = 1;
237 	for (q = 0; q < rxRings; q++) {
238 		retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
239 					rte_eth_dev_socket_id(port),
240 					rxconf,
241 					mbuf_pool);
242 		if (retval < 0) {
243 			printf("initialise rx queue %d failed\n", q);
244 			return retval;
245 		}
246 	}
247 
248 	for (q = 0; q < txRings; q++) {
249 		retval = rte_eth_tx_queue_setup(port, q, txRingSize,
250 					rte_eth_dev_socket_id(port),
251 					NULL);
252 		if (retval < 0) {
253 			printf("initialise tx queue %d failed\n", q);
254 			return retval;
255 		}
256 	}
257 
258 	retval  = rte_eth_dev_start(port);
259 	if (retval < 0) {
260 		printf("port %d start failed\n", port);
261 		return retval;
262 	}
263 
264 	rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
265 	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
266 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
267 			(unsigned)port,
268 			vmdq_ports_eth_addr[port].addr_bytes[0],
269 			vmdq_ports_eth_addr[port].addr_bytes[1],
270 			vmdq_ports_eth_addr[port].addr_bytes[2],
271 			vmdq_ports_eth_addr[port].addr_bytes[3],
272 			vmdq_ports_eth_addr[port].addr_bytes[4],
273 			vmdq_ports_eth_addr[port].addr_bytes[5]);
274 
275 	/*
276 	 * Set mac for each pool.
277 	 * There is no default mac for the pools in i40.
278 	 * Removes this after i40e fixes this issue.
279 	 */
280 	for (q = 0; q < num_pools; q++) {
281 		struct ether_addr mac;
282 		mac = pool_addr_template;
283 		mac.addr_bytes[4] = port;
284 		mac.addr_bytes[5] = q;
285 		printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
286 			port, q,
287 			mac.addr_bytes[0], mac.addr_bytes[1],
288 			mac.addr_bytes[2], mac.addr_bytes[3],
289 			mac.addr_bytes[4], mac.addr_bytes[5]);
290 		retval = rte_eth_dev_mac_addr_add(port, &mac,
291 				q + vmdq_pool_base);
292 		if (retval) {
293 			printf("mac addr add failed at pool %d\n", q);
294 			return retval;
295 		}
296 	}
297 
298 	return 0;
299 }
300 
301 /* Check num_pools parameter and set it if OK*/
302 static int
303 vmdq_parse_num_pools(const char *q_arg)
304 {
305 	char *end = NULL;
306 	int n;
307 
308 	/* parse number string */
309 	n = strtol(q_arg, &end, 10);
310 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
311 		return -1;
312 
313 	if (num_pools > num_vlans) {
314 		printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
315 		return -1;
316 	}
317 
318 	num_pools = n;
319 
320 	return 0;
321 }
322 
323 
324 static int
325 parse_portmask(const char *portmask)
326 {
327 	char *end = NULL;
328 	unsigned long pm;
329 
330 	/* parse hexadecimal string */
331 	pm = strtoul(portmask, &end, 16);
332 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
333 		return -1;
334 
335 	if (pm == 0)
336 		return -1;
337 
338 	return pm;
339 }
340 
341 /* Display usage */
342 static void
343 vmdq_usage(const char *prgname)
344 {
345 	printf("%s [EAL options] -- -p PORTMASK]\n"
346 	"  --nb-pools NP: number of pools\n",
347 	       prgname);
348 }
349 
350 /*  Parse the argument (num_pools) given in the command line of the application */
351 static int
352 vmdq_parse_args(int argc, char **argv)
353 {
354 	int opt;
355 	int option_index;
356 	unsigned i;
357 	const char *prgname = argv[0];
358 	static struct option long_option[] = {
359 		{"nb-pools", required_argument, NULL, 0},
360 		{NULL, 0, 0, 0}
361 	};
362 
363 	/* Parse command line */
364 	while ((opt = getopt_long(argc, argv, "p:", long_option,
365 		&option_index)) != EOF) {
366 		switch (opt) {
367 		/* portmask */
368 		case 'p':
369 			enabled_port_mask = parse_portmask(optarg);
370 			if (enabled_port_mask == 0) {
371 				printf("invalid portmask\n");
372 				vmdq_usage(prgname);
373 				return -1;
374 			}
375 			break;
376 		case 0:
377 			if (vmdq_parse_num_pools(optarg) == -1) {
378 				printf("invalid number of pools\n");
379 				vmdq_usage(prgname);
380 				return -1;
381 			}
382 			break;
383 
384 		default:
385 			vmdq_usage(prgname);
386 			return -1;
387 		}
388 	}
389 
390 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
391 		if (enabled_port_mask & (1 << i))
392 			ports[num_ports++] = (uint8_t)i;
393 	}
394 
395 	if (num_ports < 2 || num_ports % 2) {
396 		printf("Current enabled port number is %u,"
397 			"but it should be even and at least 2\n", num_ports);
398 		return -1;
399 	}
400 
401 	return 0;
402 }
403 
404 static void
405 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
406 {
407 	struct ether_hdr *eth;
408 	void *tmp;
409 
410 	eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
411 
412 	/* 02:00:00:00:00:xx */
413 	tmp = &eth->d_addr.addr_bytes[0];
414 	*((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
415 
416 	/* src addr */
417 	ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
418 }
419 
420 /* When we receive a HUP signal, print out our stats */
421 static void
422 sighup_handler(int signum)
423 {
424 	unsigned q;
425 	for (q = 0; q < num_queues; q++) {
426 		if (q % (num_queues/num_pools) == 0)
427 			printf("\nPool %u: ", q/(num_queues/num_pools));
428 		printf("%lu ", rxPackets[q]);
429 	}
430 	printf("\nFinished handling signal %d\n", signum);
431 }
432 
433 /*
434  * Main thread that does the work, reading from INPUT_PORT
435  * and writing to OUTPUT_PORT
436  */
437 static int
438 lcore_main(__attribute__((__unused__)) void *dummy)
439 {
440 	const uint16_t lcore_id = (uint16_t)rte_lcore_id();
441 	const uint16_t num_cores = (uint16_t)rte_lcore_count();
442 	uint16_t core_id = 0;
443 	uint16_t startQueue, endQueue;
444 	uint16_t q, i, p;
445 	const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
446 
447 	for (i = 0; i < num_cores; i++)
448 		if (lcore_ids[i] == lcore_id) {
449 			core_id = i;
450 			break;
451 		}
452 
453 	if (remainder != 0) {
454 		if (core_id < remainder) {
455 			startQueue = (uint16_t)(core_id *
456 					(num_vmdq_queues / num_cores + 1));
457 			endQueue = (uint16_t)(startQueue +
458 					(num_vmdq_queues / num_cores) + 1);
459 		} else {
460 			startQueue = (uint16_t)(core_id *
461 					(num_vmdq_queues / num_cores) +
462 					remainder);
463 			endQueue = (uint16_t)(startQueue +
464 					(num_vmdq_queues / num_cores));
465 		}
466 	} else {
467 		startQueue = (uint16_t)(core_id *
468 				(num_vmdq_queues / num_cores));
469 		endQueue = (uint16_t)(startQueue +
470 				(num_vmdq_queues / num_cores));
471 	}
472 
473 	/* vmdq queue idx doesn't always start from zero.*/
474 	startQueue += vmdq_queue_base;
475 	endQueue   += vmdq_queue_base;
476 	printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
477 		(unsigned)lcore_id, startQueue, endQueue - 1);
478 
479 	if (startQueue == endQueue) {
480 		printf("lcore %u has nothing to do\n", lcore_id);
481 		return 0;
482 	}
483 
484 	for (;;) {
485 		struct rte_mbuf *buf[MAX_PKT_BURST];
486 		const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
487 
488 		for (p = 0; p < num_ports; p++) {
489 			const uint8_t sport = ports[p];
490 			/* 0 <-> 1, 2 <-> 3 etc */
491 			const uint8_t dport = ports[p ^ 1];
492 			if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
493 				continue;
494 
495 			for (q = startQueue; q < endQueue; q++) {
496 				const uint16_t rxCount = rte_eth_rx_burst(sport,
497 					q, buf, buf_size);
498 
499 				if (unlikely(rxCount == 0))
500 					continue;
501 
502 				rxPackets[q] += rxCount;
503 
504 				for (i = 0; i < rxCount; i++)
505 					update_mac_address(buf[i], dport);
506 
507 				const uint16_t txCount = rte_eth_tx_burst(dport,
508 					vmdq_queue_base + core_id,
509 					buf,
510 					rxCount);
511 
512 				if (txCount != rxCount) {
513 					for (i = txCount; i < rxCount; i++)
514 						rte_pktmbuf_free(buf[i]);
515 				}
516 			}
517 		}
518 	}
519 }
520 
521 /*
522  * Update the global var NUM_PORTS and array PORTS according to system ports number
523  * and return valid ports number
524  */
525 static unsigned check_ports_num(unsigned nb_ports)
526 {
527 	unsigned valid_num_ports = num_ports;
528 	unsigned portid;
529 
530 	if (num_ports > nb_ports) {
531 		printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
532 			num_ports, nb_ports);
533 		num_ports = nb_ports;
534 	}
535 
536 	for (portid = 0; portid < num_ports; portid++) {
537 		if (ports[portid] >= nb_ports) {
538 			printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
539 				ports[portid], (nb_ports - 1));
540 			ports[portid] = INVALID_PORT_ID;
541 			valid_num_ports--;
542 		}
543 	}
544 	return valid_num_ports;
545 }
546 
547 /* Main function, does initialisation and calls the per-lcore functions */
548 int
549 main(int argc, char *argv[])
550 {
551 	struct rte_mempool *mbuf_pool;
552 	unsigned lcore_id, core_id = 0;
553 	int ret;
554 	unsigned nb_ports, valid_num_ports;
555 	uint16_t portid;
556 
557 	signal(SIGHUP, sighup_handler);
558 
559 	/* init EAL */
560 	ret = rte_eal_init(argc, argv);
561 	if (ret < 0)
562 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
563 	argc -= ret;
564 	argv += ret;
565 
566 	/* parse app arguments */
567 	ret = vmdq_parse_args(argc, argv);
568 	if (ret < 0)
569 		rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
570 
571 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
572 		if (rte_lcore_is_enabled(lcore_id))
573 			lcore_ids[core_id++] = lcore_id;
574 
575 	if (rte_lcore_count() > RTE_MAX_LCORE)
576 		rte_exit(EXIT_FAILURE, "Not enough cores\n");
577 
578 	nb_ports = rte_eth_dev_count();
579 
580 	/*
581 	 * Update the global var NUM_PORTS and global array PORTS
582 	 * and get value of var VALID_NUM_PORTS according to system ports number
583 	 */
584 	valid_num_ports = check_ports_num(nb_ports);
585 
586 	if (valid_num_ports < 2 || valid_num_ports % 2) {
587 		printf("Current valid ports number is %u\n", valid_num_ports);
588 		rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
589 	}
590 
591 	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
592 		NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
593 		0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
594 	if (mbuf_pool == NULL)
595 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
596 
597 	/* initialize all ports */
598 	for (portid = 0; portid < nb_ports; portid++) {
599 		/* skip ports that are not enabled */
600 		if ((enabled_port_mask & (1 << portid)) == 0) {
601 			printf("\nSkipping disabled port %d\n", portid);
602 			continue;
603 		}
604 		if (port_init(portid, mbuf_pool) != 0)
605 			rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
606 	}
607 
608 	/* call lcore_main() on every lcore */
609 	rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
610 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
611 		if (rte_eal_wait_lcore(lcore_id) < 0)
612 			return -1;
613 	}
614 
615 	return 0;
616 }
617