xref: /dpdk/examples/vmdq/main.c (revision 25d11a86c56d50947af33d0b79ede622809bd8b9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <sys/queue.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <inttypes.h>
15 #include <getopt.h>
16 
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_memory.h>
20 #include <rte_memcpy.h>
21 #include <rte_eal.h>
22 #include <rte_launch.h>
23 #include <rte_atomic.h>
24 #include <rte_cycles.h>
25 #include <rte_prefetch.h>
26 #include <rte_lcore.h>
27 #include <rte_per_lcore.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_interrupts.h>
30 #include <rte_random.h>
31 #include <rte_debug.h>
32 #include <rte_ether.h>
33 #include <rte_ethdev.h>
34 #include <rte_mempool.h>
35 #include <rte_mbuf.h>
36 
37 #define MAX_QUEUES 1024
38 /*
39  * 1024 queues require to meet the needs of a large number of vmdq_pools.
40  * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
41  */
42 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
43 						RTE_TEST_TX_DESC_DEFAULT))
44 #define MBUF_CACHE_SIZE 64
45 
46 #define MAX_PKT_BURST 32
47 
48 /*
49  * Configurable number of RX/TX ring descriptors
50  */
51 #define RTE_TEST_RX_DESC_DEFAULT 1024
52 #define RTE_TEST_TX_DESC_DEFAULT 1024
53 
54 #define INVALID_PORT_ID 0xFF
55 
56 /* mask of enabled ports */
57 static uint32_t enabled_port_mask;
58 
59 /* number of pools (if user does not specify any, 8 by default */
60 static uint32_t num_queues = 8;
61 static uint32_t num_pools = 8;
62 
63 /* empty vmdq configuration structure. Filled in programatically */
64 static const struct rte_eth_conf vmdq_conf_default = {
65 	.rxmode = {
66 		.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
67 		.split_hdr_size = 0,
68 	},
69 
70 	.txmode = {
71 		.mq_mode = ETH_MQ_TX_NONE,
72 	},
73 	.rx_adv_conf = {
74 		/*
75 		 * should be overridden separately in code with
76 		 * appropriate values
77 		 */
78 		.vmdq_rx_conf = {
79 			.nb_queue_pools = ETH_8_POOLS,
80 			.enable_default_pool = 0,
81 			.default_pool = 0,
82 			.nb_pool_maps = 0,
83 			.pool_map = {{0, 0},},
84 		},
85 	},
86 };
87 
88 static unsigned lcore_ids[RTE_MAX_LCORE];
89 static uint16_t ports[RTE_MAX_ETHPORTS];
90 static unsigned num_ports; /**< The number of ports specified in command line */
91 
92 /* array used for printing out statistics */
93 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
94 
95 const uint16_t vlan_tags[] = {
96 	0,  1,  2,  3,  4,  5,  6,  7,
97 	8,  9, 10, 11,	12, 13, 14, 15,
98 	16, 17, 18, 19, 20, 21, 22, 23,
99 	24, 25, 26, 27, 28, 29, 30, 31,
100 	32, 33, 34, 35, 36, 37, 38, 39,
101 	40, 41, 42, 43, 44, 45, 46, 47,
102 	48, 49, 50, 51, 52, 53, 54, 55,
103 	56, 57, 58, 59, 60, 61, 62, 63,
104 };
105 const uint16_t num_vlans = RTE_DIM(vlan_tags);
106 static uint16_t num_pf_queues,  num_vmdq_queues;
107 static uint16_t vmdq_pool_base, vmdq_queue_base;
108 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
109 static struct ether_addr pool_addr_template = {
110 	.addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
111 };
112 
113 /* ethernet addresses of ports */
114 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
115 
116 #define MAX_QUEUE_NUM_10G 128
117 #define MAX_QUEUE_NUM_1G 8
118 #define MAX_POOL_MAP_NUM_10G 64
119 #define MAX_POOL_MAP_NUM_1G 32
120 #define MAX_POOL_NUM_10G 64
121 #define MAX_POOL_NUM_1G 8
122 /*
123  * Builds up the correct configuration for vmdq based on the vlan tags array
124  * given above, and determine the queue number and pool map number according to
125  * valid pool number
126  */
127 static inline int
128 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
129 {
130 	struct rte_eth_vmdq_rx_conf conf;
131 	unsigned i;
132 
133 	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
134 	conf.nb_pool_maps = num_pools;
135 	conf.enable_default_pool = 0;
136 	conf.default_pool = 0; /* set explicit value, even if not used */
137 
138 	for (i = 0; i < conf.nb_pool_maps; i++) {
139 		conf.pool_map[i].vlan_id = vlan_tags[i];
140 		conf.pool_map[i].pools = (1UL << (i % num_pools));
141 	}
142 
143 	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
144 	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
145 		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
146 	return 0;
147 }
148 
149 /*
150  * Initialises a given port using global settings and with the rx buffers
151  * coming from the mbuf_pool passed as parameter
152  */
153 static inline int
154 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
155 {
156 	struct rte_eth_dev_info dev_info;
157 	struct rte_eth_rxconf *rxconf;
158 	struct rte_eth_txconf *txconf;
159 	struct rte_eth_conf port_conf;
160 	uint16_t rxRings, txRings;
161 	uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
162 	uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
163 	int retval;
164 	uint16_t q;
165 	uint16_t queues_per_pool;
166 	uint32_t max_nb_pools;
167 
168 	/*
169 	 * The max pool number from dev_info will be used to validate the pool
170 	 * number specified in cmd line
171 	 */
172 	rte_eth_dev_info_get(port, &dev_info);
173 	max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
174 	/*
175 	 * We allow to process part of VMDQ pools specified by num_pools in
176 	 * command line.
177 	 */
178 	if (num_pools > max_nb_pools) {
179 		printf("num_pools %d >max_nb_pools %d\n",
180 			num_pools, max_nb_pools);
181 		return -1;
182 	}
183 	retval = get_eth_conf(&port_conf, max_nb_pools);
184 	if (retval < 0)
185 		return retval;
186 
187 	/*
188 	 * NIC queues are divided into pf queues and vmdq queues.
189 	 */
190 	/* There is assumption here all ports have the same configuration! */
191 	num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
192 	queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
193 	num_vmdq_queues = num_pools * queues_per_pool;
194 	num_queues = num_pf_queues + num_vmdq_queues;
195 	vmdq_queue_base = dev_info.vmdq_queue_base;
196 	vmdq_pool_base  = dev_info.vmdq_pool_base;
197 
198 	printf("pf queue num: %u, configured vmdq pool num: %u,"
199 		" each vmdq pool has %u queues\n",
200 		num_pf_queues, num_pools, queues_per_pool);
201 	printf("vmdq queue base: %d pool base %d\n",
202 		vmdq_queue_base, vmdq_pool_base);
203 	if (!rte_eth_dev_is_valid_port(port))
204 		return -1;
205 
206 	/*
207 	 * Though in this example, we only receive packets from the first queue
208 	 * of each pool and send packets through first rte_lcore_count() tx
209 	 * queues of vmdq queues, all queues including pf queues are setup.
210 	 * This is because VMDQ queues doesn't always start from zero, and the
211 	 * PMD layer doesn't support selectively initialising part of rx/tx
212 	 * queues.
213 	 */
214 	rxRings = (uint16_t)dev_info.max_rx_queues;
215 	txRings = (uint16_t)dev_info.max_tx_queues;
216 
217 	rte_eth_dev_info_get(port, &dev_info);
218 	if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
219 		port_conf.txmode.offloads |=
220 			DEV_TX_OFFLOAD_MBUF_FAST_FREE;
221 	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
222 	if (retval != 0)
223 		return retval;
224 
225 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
226 				&txRingSize);
227 	if (retval != 0)
228 		return retval;
229 	if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
230 			RTE_TEST_TX_DESC_DEFAULT)) {
231 		printf("Mbuf pool has an insufficient size for port %u.\n",
232 			port);
233 		return -1;
234 	}
235 
236 	rxconf = &dev_info.default_rxconf;
237 	rxconf->rx_drop_en = 1;
238 	txconf = &dev_info.default_txconf;
239 	txconf->offloads = port_conf.txmode.offloads;
240 	for (q = 0; q < rxRings; q++) {
241 		retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
242 					rte_eth_dev_socket_id(port),
243 					rxconf,
244 					mbuf_pool);
245 		if (retval < 0) {
246 			printf("initialise rx queue %d failed\n", q);
247 			return retval;
248 		}
249 	}
250 
251 	for (q = 0; q < txRings; q++) {
252 		retval = rte_eth_tx_queue_setup(port, q, txRingSize,
253 					rte_eth_dev_socket_id(port),
254 					txconf);
255 		if (retval < 0) {
256 			printf("initialise tx queue %d failed\n", q);
257 			return retval;
258 		}
259 	}
260 
261 	retval  = rte_eth_dev_start(port);
262 	if (retval < 0) {
263 		printf("port %d start failed\n", port);
264 		return retval;
265 	}
266 
267 	rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
268 	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
269 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
270 			(unsigned)port,
271 			vmdq_ports_eth_addr[port].addr_bytes[0],
272 			vmdq_ports_eth_addr[port].addr_bytes[1],
273 			vmdq_ports_eth_addr[port].addr_bytes[2],
274 			vmdq_ports_eth_addr[port].addr_bytes[3],
275 			vmdq_ports_eth_addr[port].addr_bytes[4],
276 			vmdq_ports_eth_addr[port].addr_bytes[5]);
277 
278 	/*
279 	 * Set mac for each pool.
280 	 * There is no default mac for the pools in i40.
281 	 * Removes this after i40e fixes this issue.
282 	 */
283 	for (q = 0; q < num_pools; q++) {
284 		struct ether_addr mac;
285 		mac = pool_addr_template;
286 		mac.addr_bytes[4] = port;
287 		mac.addr_bytes[5] = q;
288 		printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
289 			port, q,
290 			mac.addr_bytes[0], mac.addr_bytes[1],
291 			mac.addr_bytes[2], mac.addr_bytes[3],
292 			mac.addr_bytes[4], mac.addr_bytes[5]);
293 		retval = rte_eth_dev_mac_addr_add(port, &mac,
294 				q + vmdq_pool_base);
295 		if (retval) {
296 			printf("mac addr add failed at pool %d\n", q);
297 			return retval;
298 		}
299 	}
300 
301 	return 0;
302 }
303 
304 /* Check num_pools parameter and set it if OK*/
305 static int
306 vmdq_parse_num_pools(const char *q_arg)
307 {
308 	char *end = NULL;
309 	int n;
310 
311 	/* parse number string */
312 	n = strtol(q_arg, &end, 10);
313 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
314 		return -1;
315 
316 	if (num_pools > num_vlans) {
317 		printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
318 		return -1;
319 	}
320 
321 	num_pools = n;
322 
323 	return 0;
324 }
325 
326 
327 static int
328 parse_portmask(const char *portmask)
329 {
330 	char *end = NULL;
331 	unsigned long pm;
332 
333 	/* parse hexadecimal string */
334 	pm = strtoul(portmask, &end, 16);
335 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
336 		return -1;
337 
338 	if (pm == 0)
339 		return -1;
340 
341 	return pm;
342 }
343 
344 /* Display usage */
345 static void
346 vmdq_usage(const char *prgname)
347 {
348 	printf("%s [EAL options] -- -p PORTMASK]\n"
349 	"  --nb-pools NP: number of pools\n",
350 	       prgname);
351 }
352 
353 /*  Parse the argument (num_pools) given in the command line of the application */
354 static int
355 vmdq_parse_args(int argc, char **argv)
356 {
357 	int opt;
358 	int option_index;
359 	unsigned i;
360 	const char *prgname = argv[0];
361 	static struct option long_option[] = {
362 		{"nb-pools", required_argument, NULL, 0},
363 		{NULL, 0, 0, 0}
364 	};
365 
366 	/* Parse command line */
367 	while ((opt = getopt_long(argc, argv, "p:", long_option,
368 		&option_index)) != EOF) {
369 		switch (opt) {
370 		/* portmask */
371 		case 'p':
372 			enabled_port_mask = parse_portmask(optarg);
373 			if (enabled_port_mask == 0) {
374 				printf("invalid portmask\n");
375 				vmdq_usage(prgname);
376 				return -1;
377 			}
378 			break;
379 		case 0:
380 			if (vmdq_parse_num_pools(optarg) == -1) {
381 				printf("invalid number of pools\n");
382 				vmdq_usage(prgname);
383 				return -1;
384 			}
385 			break;
386 
387 		default:
388 			vmdq_usage(prgname);
389 			return -1;
390 		}
391 	}
392 
393 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
394 		if (enabled_port_mask & (1 << i))
395 			ports[num_ports++] = (uint8_t)i;
396 	}
397 
398 	if (num_ports < 2 || num_ports % 2) {
399 		printf("Current enabled port number is %u,"
400 			"but it should be even and at least 2\n", num_ports);
401 		return -1;
402 	}
403 
404 	return 0;
405 }
406 
407 static void
408 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
409 {
410 	struct ether_hdr *eth;
411 	void *tmp;
412 
413 	eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
414 
415 	/* 02:00:00:00:00:xx */
416 	tmp = &eth->d_addr.addr_bytes[0];
417 	*((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
418 
419 	/* src addr */
420 	ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
421 }
422 
423 /* When we receive a HUP signal, print out our stats */
424 static void
425 sighup_handler(int signum)
426 {
427 	unsigned q;
428 	for (q = 0; q < num_queues; q++) {
429 		if (q % (num_queues/num_pools) == 0)
430 			printf("\nPool %u: ", q/(num_queues/num_pools));
431 		printf("%lu ", rxPackets[q]);
432 	}
433 	printf("\nFinished handling signal %d\n", signum);
434 }
435 
436 /*
437  * Main thread that does the work, reading from INPUT_PORT
438  * and writing to OUTPUT_PORT
439  */
440 static int
441 lcore_main(__attribute__((__unused__)) void *dummy)
442 {
443 	const uint16_t lcore_id = (uint16_t)rte_lcore_id();
444 	const uint16_t num_cores = (uint16_t)rte_lcore_count();
445 	uint16_t core_id = 0;
446 	uint16_t startQueue, endQueue;
447 	uint16_t q, i, p;
448 	const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
449 
450 	for (i = 0; i < num_cores; i++)
451 		if (lcore_ids[i] == lcore_id) {
452 			core_id = i;
453 			break;
454 		}
455 
456 	if (remainder != 0) {
457 		if (core_id < remainder) {
458 			startQueue = (uint16_t)(core_id *
459 					(num_vmdq_queues / num_cores + 1));
460 			endQueue = (uint16_t)(startQueue +
461 					(num_vmdq_queues / num_cores) + 1);
462 		} else {
463 			startQueue = (uint16_t)(core_id *
464 					(num_vmdq_queues / num_cores) +
465 					remainder);
466 			endQueue = (uint16_t)(startQueue +
467 					(num_vmdq_queues / num_cores));
468 		}
469 	} else {
470 		startQueue = (uint16_t)(core_id *
471 				(num_vmdq_queues / num_cores));
472 		endQueue = (uint16_t)(startQueue +
473 				(num_vmdq_queues / num_cores));
474 	}
475 
476 	/* vmdq queue idx doesn't always start from zero.*/
477 	startQueue += vmdq_queue_base;
478 	endQueue   += vmdq_queue_base;
479 	printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
480 		(unsigned)lcore_id, startQueue, endQueue - 1);
481 
482 	if (startQueue == endQueue) {
483 		printf("lcore %u has nothing to do\n", lcore_id);
484 		return 0;
485 	}
486 
487 	for (;;) {
488 		struct rte_mbuf *buf[MAX_PKT_BURST];
489 		const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
490 
491 		for (p = 0; p < num_ports; p++) {
492 			const uint8_t sport = ports[p];
493 			/* 0 <-> 1, 2 <-> 3 etc */
494 			const uint8_t dport = ports[p ^ 1];
495 			if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
496 				continue;
497 
498 			for (q = startQueue; q < endQueue; q++) {
499 				const uint16_t rxCount = rte_eth_rx_burst(sport,
500 					q, buf, buf_size);
501 
502 				if (unlikely(rxCount == 0))
503 					continue;
504 
505 				rxPackets[q] += rxCount;
506 
507 				for (i = 0; i < rxCount; i++)
508 					update_mac_address(buf[i], dport);
509 
510 				const uint16_t txCount = rte_eth_tx_burst(dport,
511 					vmdq_queue_base + core_id,
512 					buf,
513 					rxCount);
514 
515 				if (txCount != rxCount) {
516 					for (i = txCount; i < rxCount; i++)
517 						rte_pktmbuf_free(buf[i]);
518 				}
519 			}
520 		}
521 	}
522 }
523 
524 /*
525  * Update the global var NUM_PORTS and array PORTS according to system ports number
526  * and return valid ports number
527  */
528 static unsigned check_ports_num(unsigned nb_ports)
529 {
530 	unsigned valid_num_ports = num_ports;
531 	unsigned portid;
532 
533 	if (num_ports > nb_ports) {
534 		printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
535 			num_ports, nb_ports);
536 		num_ports = nb_ports;
537 	}
538 
539 	for (portid = 0; portid < num_ports; portid++) {
540 		if (!rte_eth_dev_is_valid_port(ports[portid])) {
541 			printf("\nSpecified port ID(%u) is not valid\n",
542 				ports[portid]);
543 			ports[portid] = INVALID_PORT_ID;
544 			valid_num_ports--;
545 		}
546 	}
547 	return valid_num_ports;
548 }
549 
550 /* Main function, does initialisation and calls the per-lcore functions */
551 int
552 main(int argc, char *argv[])
553 {
554 	struct rte_mempool *mbuf_pool;
555 	unsigned lcore_id, core_id = 0;
556 	int ret;
557 	unsigned nb_ports, valid_num_ports;
558 	uint16_t portid;
559 
560 	signal(SIGHUP, sighup_handler);
561 
562 	/* init EAL */
563 	ret = rte_eal_init(argc, argv);
564 	if (ret < 0)
565 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
566 	argc -= ret;
567 	argv += ret;
568 
569 	/* parse app arguments */
570 	ret = vmdq_parse_args(argc, argv);
571 	if (ret < 0)
572 		rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
573 
574 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
575 		if (rte_lcore_is_enabled(lcore_id))
576 			lcore_ids[core_id++] = lcore_id;
577 
578 	if (rte_lcore_count() > RTE_MAX_LCORE)
579 		rte_exit(EXIT_FAILURE, "Not enough cores\n");
580 
581 	nb_ports = rte_eth_dev_count_avail();
582 
583 	/*
584 	 * Update the global var NUM_PORTS and global array PORTS
585 	 * and get value of var VALID_NUM_PORTS according to system ports number
586 	 */
587 	valid_num_ports = check_ports_num(nb_ports);
588 
589 	if (valid_num_ports < 2 || valid_num_ports % 2) {
590 		printf("Current valid ports number is %u\n", valid_num_ports);
591 		rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
592 	}
593 
594 	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
595 		NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
596 		0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
597 	if (mbuf_pool == NULL)
598 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
599 
600 	/* initialize all ports */
601 	RTE_ETH_FOREACH_DEV(portid) {
602 		/* skip ports that are not enabled */
603 		if ((enabled_port_mask & (1 << portid)) == 0) {
604 			printf("\nSkipping disabled port %d\n", portid);
605 			continue;
606 		}
607 		if (port_init(portid, mbuf_pool) != 0)
608 			rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
609 	}
610 
611 	/* call lcore_main() on every lcore */
612 	rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
613 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
614 		if (rte_eal_wait_lcore(lcore_id) < 0)
615 			return -1;
616 	}
617 
618 	return 0;
619 }
620