xref: /dpdk/examples/vmdq/main.c (revision 0964a95120fa024888fbc0ea5e34d1abef1b93dc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <sys/queue.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <inttypes.h>
15 #include <getopt.h>
16 
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_memory.h>
20 #include <rte_memcpy.h>
21 #include <rte_eal.h>
22 #include <rte_launch.h>
23 #include <rte_atomic.h>
24 #include <rte_cycles.h>
25 #include <rte_prefetch.h>
26 #include <rte_lcore.h>
27 #include <rte_per_lcore.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_interrupts.h>
30 #include <rte_random.h>
31 #include <rte_debug.h>
32 #include <rte_ether.h>
33 #include <rte_ethdev.h>
34 #include <rte_mempool.h>
35 #include <rte_mbuf.h>
36 
37 #define MAX_QUEUES 1024
38 /*
39  * 1024 queues require to meet the needs of a large number of vmdq_pools.
40  * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
41  */
42 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
43 						RTE_TEST_TX_DESC_DEFAULT))
44 #define MBUF_CACHE_SIZE 64
45 
46 #define MAX_PKT_BURST 32
47 
48 /*
49  * Configurable number of RX/TX ring descriptors
50  */
51 #define RTE_TEST_RX_DESC_DEFAULT 1024
52 #define RTE_TEST_TX_DESC_DEFAULT 1024
53 
54 #define INVALID_PORT_ID 0xFF
55 
56 /* mask of enabled ports */
57 static uint32_t enabled_port_mask;
58 
59 /* number of pools (if user does not specify any, 8 by default */
60 static uint32_t num_queues = 8;
61 static uint32_t num_pools = 8;
62 
63 /* empty vmdq configuration structure. Filled in programatically */
64 static const struct rte_eth_conf vmdq_conf_default = {
65 	.rxmode = {
66 		.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
67 		.split_hdr_size = 0,
68 	},
69 
70 	.txmode = {
71 		.mq_mode = ETH_MQ_TX_NONE,
72 	},
73 	.rx_adv_conf = {
74 		/*
75 		 * should be overridden separately in code with
76 		 * appropriate values
77 		 */
78 		.vmdq_rx_conf = {
79 			.nb_queue_pools = ETH_8_POOLS,
80 			.enable_default_pool = 0,
81 			.default_pool = 0,
82 			.nb_pool_maps = 0,
83 			.pool_map = {{0, 0},},
84 		},
85 	},
86 };
87 
88 static unsigned lcore_ids[RTE_MAX_LCORE];
89 static uint16_t ports[RTE_MAX_ETHPORTS];
90 static unsigned num_ports; /**< The number of ports specified in command line */
91 
92 /* array used for printing out statistics */
93 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
94 
95 const uint16_t vlan_tags[] = {
96 	0,  1,  2,  3,  4,  5,  6,  7,
97 	8,  9, 10, 11,	12, 13, 14, 15,
98 	16, 17, 18, 19, 20, 21, 22, 23,
99 	24, 25, 26, 27, 28, 29, 30, 31,
100 	32, 33, 34, 35, 36, 37, 38, 39,
101 	40, 41, 42, 43, 44, 45, 46, 47,
102 	48, 49, 50, 51, 52, 53, 54, 55,
103 	56, 57, 58, 59, 60, 61, 62, 63,
104 };
105 const uint16_t num_vlans = RTE_DIM(vlan_tags);
106 static uint16_t num_pf_queues,  num_vmdq_queues;
107 static uint16_t vmdq_pool_base, vmdq_queue_base;
108 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
109 static struct rte_ether_addr pool_addr_template = {
110 	.addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
111 };
112 
113 /* ethernet addresses of ports */
114 static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
115 
116 #define MAX_QUEUE_NUM_10G 128
117 #define MAX_QUEUE_NUM_1G 8
118 #define MAX_POOL_MAP_NUM_10G 64
119 #define MAX_POOL_MAP_NUM_1G 32
120 #define MAX_POOL_NUM_10G 64
121 #define MAX_POOL_NUM_1G 8
122 /*
123  * Builds up the correct configuration for vmdq based on the vlan tags array
124  * given above, and determine the queue number and pool map number according to
125  * valid pool number
126  */
127 static inline int
128 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
129 {
130 	struct rte_eth_vmdq_rx_conf conf;
131 	unsigned i;
132 
133 	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
134 	conf.nb_pool_maps = num_pools;
135 	conf.enable_default_pool = 0;
136 	conf.default_pool = 0; /* set explicit value, even if not used */
137 
138 	for (i = 0; i < conf.nb_pool_maps; i++) {
139 		conf.pool_map[i].vlan_id = vlan_tags[i];
140 		conf.pool_map[i].pools = (1UL << (i % num_pools));
141 	}
142 
143 	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
144 	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
145 		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
146 	return 0;
147 }
148 
149 /*
150  * Initialises a given port using global settings and with the rx buffers
151  * coming from the mbuf_pool passed as parameter
152  */
153 static inline int
154 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
155 {
156 	struct rte_eth_dev_info dev_info;
157 	struct rte_eth_rxconf *rxconf;
158 	struct rte_eth_txconf *txconf;
159 	struct rte_eth_conf port_conf;
160 	uint16_t rxRings, txRings;
161 	uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
162 	uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
163 	int retval;
164 	uint16_t q;
165 	uint16_t queues_per_pool;
166 	uint32_t max_nb_pools;
167 
168 	/*
169 	 * The max pool number from dev_info will be used to validate the pool
170 	 * number specified in cmd line
171 	 */
172 	retval = rte_eth_dev_info_get(port, &dev_info);
173 	if (retval != 0) {
174 		printf("Error during getting device (port %u) info: %s\n",
175 				port, strerror(-retval));
176 		return retval;
177 	}
178 
179 	max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
180 	/*
181 	 * We allow to process part of VMDQ pools specified by num_pools in
182 	 * command line.
183 	 */
184 	if (num_pools > max_nb_pools) {
185 		printf("num_pools %d >max_nb_pools %d\n",
186 			num_pools, max_nb_pools);
187 		return -1;
188 	}
189 	retval = get_eth_conf(&port_conf, max_nb_pools);
190 	if (retval < 0)
191 		return retval;
192 
193 	/*
194 	 * NIC queues are divided into pf queues and vmdq queues.
195 	 */
196 	/* There is assumption here all ports have the same configuration! */
197 	num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
198 	queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
199 	num_vmdq_queues = num_pools * queues_per_pool;
200 	num_queues = num_pf_queues + num_vmdq_queues;
201 	vmdq_queue_base = dev_info.vmdq_queue_base;
202 	vmdq_pool_base  = dev_info.vmdq_pool_base;
203 
204 	printf("pf queue num: %u, configured vmdq pool num: %u,"
205 		" each vmdq pool has %u queues\n",
206 		num_pf_queues, num_pools, queues_per_pool);
207 	printf("vmdq queue base: %d pool base %d\n",
208 		vmdq_queue_base, vmdq_pool_base);
209 	if (!rte_eth_dev_is_valid_port(port))
210 		return -1;
211 
212 	/*
213 	 * Though in this example, we only receive packets from the first queue
214 	 * of each pool and send packets through first rte_lcore_count() tx
215 	 * queues of vmdq queues, all queues including pf queues are setup.
216 	 * This is because VMDQ queues doesn't always start from zero, and the
217 	 * PMD layer doesn't support selectively initialising part of rx/tx
218 	 * queues.
219 	 */
220 	rxRings = (uint16_t)dev_info.max_rx_queues;
221 	txRings = (uint16_t)dev_info.max_tx_queues;
222 
223 	retval = rte_eth_dev_info_get(port, &dev_info);
224 	if (retval != 0) {
225 		printf("Error during getting device (port %u) info: %s\n",
226 				port, strerror(-retval));
227 		return retval;
228 	}
229 
230 	if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
231 		port_conf.txmode.offloads |=
232 			DEV_TX_OFFLOAD_MBUF_FAST_FREE;
233 	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
234 	if (retval != 0)
235 		return retval;
236 
237 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
238 				&txRingSize);
239 	if (retval != 0)
240 		return retval;
241 	if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
242 			RTE_TEST_TX_DESC_DEFAULT)) {
243 		printf("Mbuf pool has an insufficient size for port %u.\n",
244 			port);
245 		return -1;
246 	}
247 
248 	rxconf = &dev_info.default_rxconf;
249 	rxconf->rx_drop_en = 1;
250 	txconf = &dev_info.default_txconf;
251 	txconf->offloads = port_conf.txmode.offloads;
252 	for (q = 0; q < rxRings; q++) {
253 		retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
254 					rte_eth_dev_socket_id(port),
255 					rxconf,
256 					mbuf_pool);
257 		if (retval < 0) {
258 			printf("initialise rx queue %d failed\n", q);
259 			return retval;
260 		}
261 	}
262 
263 	for (q = 0; q < txRings; q++) {
264 		retval = rte_eth_tx_queue_setup(port, q, txRingSize,
265 					rte_eth_dev_socket_id(port),
266 					txconf);
267 		if (retval < 0) {
268 			printf("initialise tx queue %d failed\n", q);
269 			return retval;
270 		}
271 	}
272 
273 	retval  = rte_eth_dev_start(port);
274 	if (retval < 0) {
275 		printf("port %d start failed\n", port);
276 		return retval;
277 	}
278 
279 	retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
280 	if (retval < 0) {
281 		printf("port %d MAC address get failed: %s\n", port,
282 		       rte_strerror(-retval));
283 		return retval;
284 	}
285 	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
286 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
287 			(unsigned)port,
288 			vmdq_ports_eth_addr[port].addr_bytes[0],
289 			vmdq_ports_eth_addr[port].addr_bytes[1],
290 			vmdq_ports_eth_addr[port].addr_bytes[2],
291 			vmdq_ports_eth_addr[port].addr_bytes[3],
292 			vmdq_ports_eth_addr[port].addr_bytes[4],
293 			vmdq_ports_eth_addr[port].addr_bytes[5]);
294 
295 	/*
296 	 * Set mac for each pool.
297 	 * There is no default mac for the pools in i40.
298 	 * Removes this after i40e fixes this issue.
299 	 */
300 	for (q = 0; q < num_pools; q++) {
301 		struct rte_ether_addr mac;
302 		mac = pool_addr_template;
303 		mac.addr_bytes[4] = port;
304 		mac.addr_bytes[5] = q;
305 		printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
306 			port, q,
307 			mac.addr_bytes[0], mac.addr_bytes[1],
308 			mac.addr_bytes[2], mac.addr_bytes[3],
309 			mac.addr_bytes[4], mac.addr_bytes[5]);
310 		retval = rte_eth_dev_mac_addr_add(port, &mac,
311 				q + vmdq_pool_base);
312 		if (retval) {
313 			printf("mac addr add failed at pool %d\n", q);
314 			return retval;
315 		}
316 	}
317 
318 	return 0;
319 }
320 
321 /* Check num_pools parameter and set it if OK*/
322 static int
323 vmdq_parse_num_pools(const char *q_arg)
324 {
325 	char *end = NULL;
326 	int n;
327 
328 	/* parse number string */
329 	n = strtol(q_arg, &end, 10);
330 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
331 		return -1;
332 
333 	if (num_pools > num_vlans) {
334 		printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
335 		return -1;
336 	}
337 
338 	num_pools = n;
339 
340 	return 0;
341 }
342 
343 
344 static int
345 parse_portmask(const char *portmask)
346 {
347 	char *end = NULL;
348 	unsigned long pm;
349 
350 	/* parse hexadecimal string */
351 	pm = strtoul(portmask, &end, 16);
352 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
353 		return -1;
354 
355 	if (pm == 0)
356 		return -1;
357 
358 	return pm;
359 }
360 
361 /* Display usage */
362 static void
363 vmdq_usage(const char *prgname)
364 {
365 	printf("%s [EAL options] -- -p PORTMASK]\n"
366 	"  --nb-pools NP: number of pools\n",
367 	       prgname);
368 }
369 
370 /*  Parse the argument (num_pools) given in the command line of the application */
371 static int
372 vmdq_parse_args(int argc, char **argv)
373 {
374 	int opt;
375 	int option_index;
376 	unsigned i;
377 	const char *prgname = argv[0];
378 	static struct option long_option[] = {
379 		{"nb-pools", required_argument, NULL, 0},
380 		{NULL, 0, 0, 0}
381 	};
382 
383 	/* Parse command line */
384 	while ((opt = getopt_long(argc, argv, "p:", long_option,
385 		&option_index)) != EOF) {
386 		switch (opt) {
387 		/* portmask */
388 		case 'p':
389 			enabled_port_mask = parse_portmask(optarg);
390 			if (enabled_port_mask == 0) {
391 				printf("invalid portmask\n");
392 				vmdq_usage(prgname);
393 				return -1;
394 			}
395 			break;
396 		case 0:
397 			if (vmdq_parse_num_pools(optarg) == -1) {
398 				printf("invalid number of pools\n");
399 				vmdq_usage(prgname);
400 				return -1;
401 			}
402 			break;
403 
404 		default:
405 			vmdq_usage(prgname);
406 			return -1;
407 		}
408 	}
409 
410 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
411 		if (enabled_port_mask & (1 << i))
412 			ports[num_ports++] = (uint8_t)i;
413 	}
414 
415 	if (num_ports < 2 || num_ports % 2) {
416 		printf("Current enabled port number is %u,"
417 			"but it should be even and at least 2\n", num_ports);
418 		return -1;
419 	}
420 
421 	return 0;
422 }
423 
424 static void
425 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
426 {
427 	struct rte_ether_hdr *eth;
428 	void *tmp;
429 
430 	eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
431 
432 	/* 02:00:00:00:00:xx */
433 	tmp = &eth->d_addr.addr_bytes[0];
434 	*((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
435 
436 	/* src addr */
437 	rte_ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
438 }
439 
440 /* When we receive a HUP signal, print out our stats */
441 static void
442 sighup_handler(int signum)
443 {
444 	unsigned q;
445 	for (q = 0; q < num_queues; q++) {
446 		if (q % (num_queues/num_pools) == 0)
447 			printf("\nPool %u: ", q/(num_queues/num_pools));
448 		printf("%lu ", rxPackets[q]);
449 	}
450 	printf("\nFinished handling signal %d\n", signum);
451 }
452 
453 /*
454  * Main thread that does the work, reading from INPUT_PORT
455  * and writing to OUTPUT_PORT
456  */
457 static int
458 lcore_main(__attribute__((__unused__)) void *dummy)
459 {
460 	const uint16_t lcore_id = (uint16_t)rte_lcore_id();
461 	const uint16_t num_cores = (uint16_t)rte_lcore_count();
462 	uint16_t core_id = 0;
463 	uint16_t startQueue, endQueue;
464 	uint16_t q, i, p;
465 	const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
466 
467 	for (i = 0; i < num_cores; i++)
468 		if (lcore_ids[i] == lcore_id) {
469 			core_id = i;
470 			break;
471 		}
472 
473 	if (remainder != 0) {
474 		if (core_id < remainder) {
475 			startQueue = (uint16_t)(core_id *
476 					(num_vmdq_queues / num_cores + 1));
477 			endQueue = (uint16_t)(startQueue +
478 					(num_vmdq_queues / num_cores) + 1);
479 		} else {
480 			startQueue = (uint16_t)(core_id *
481 					(num_vmdq_queues / num_cores) +
482 					remainder);
483 			endQueue = (uint16_t)(startQueue +
484 					(num_vmdq_queues / num_cores));
485 		}
486 	} else {
487 		startQueue = (uint16_t)(core_id *
488 				(num_vmdq_queues / num_cores));
489 		endQueue = (uint16_t)(startQueue +
490 				(num_vmdq_queues / num_cores));
491 	}
492 
493 	/* vmdq queue idx doesn't always start from zero.*/
494 	startQueue += vmdq_queue_base;
495 	endQueue   += vmdq_queue_base;
496 	printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
497 		(unsigned)lcore_id, startQueue, endQueue - 1);
498 
499 	if (startQueue == endQueue) {
500 		printf("lcore %u has nothing to do\n", lcore_id);
501 		return 0;
502 	}
503 
504 	for (;;) {
505 		struct rte_mbuf *buf[MAX_PKT_BURST];
506 		const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
507 
508 		for (p = 0; p < num_ports; p++) {
509 			const uint8_t sport = ports[p];
510 			/* 0 <-> 1, 2 <-> 3 etc */
511 			const uint8_t dport = ports[p ^ 1];
512 			if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
513 				continue;
514 
515 			for (q = startQueue; q < endQueue; q++) {
516 				const uint16_t rxCount = rte_eth_rx_burst(sport,
517 					q, buf, buf_size);
518 
519 				if (unlikely(rxCount == 0))
520 					continue;
521 
522 				rxPackets[q] += rxCount;
523 
524 				for (i = 0; i < rxCount; i++)
525 					update_mac_address(buf[i], dport);
526 
527 				const uint16_t txCount = rte_eth_tx_burst(dport,
528 					vmdq_queue_base + core_id,
529 					buf,
530 					rxCount);
531 
532 				if (txCount != rxCount) {
533 					for (i = txCount; i < rxCount; i++)
534 						rte_pktmbuf_free(buf[i]);
535 				}
536 			}
537 		}
538 	}
539 }
540 
541 /*
542  * Update the global var NUM_PORTS and array PORTS according to system ports number
543  * and return valid ports number
544  */
545 static unsigned check_ports_num(unsigned nb_ports)
546 {
547 	unsigned valid_num_ports = num_ports;
548 	unsigned portid;
549 
550 	if (num_ports > nb_ports) {
551 		printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
552 			num_ports, nb_ports);
553 		num_ports = nb_ports;
554 	}
555 
556 	for (portid = 0; portid < num_ports; portid++) {
557 		if (!rte_eth_dev_is_valid_port(ports[portid])) {
558 			printf("\nSpecified port ID(%u) is not valid\n",
559 				ports[portid]);
560 			ports[portid] = INVALID_PORT_ID;
561 			valid_num_ports--;
562 		}
563 	}
564 	return valid_num_ports;
565 }
566 
567 /* Main function, does initialisation and calls the per-lcore functions */
568 int
569 main(int argc, char *argv[])
570 {
571 	struct rte_mempool *mbuf_pool;
572 	unsigned lcore_id, core_id = 0;
573 	int ret;
574 	unsigned nb_ports, valid_num_ports;
575 	uint16_t portid;
576 
577 	signal(SIGHUP, sighup_handler);
578 
579 	/* init EAL */
580 	ret = rte_eal_init(argc, argv);
581 	if (ret < 0)
582 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
583 	argc -= ret;
584 	argv += ret;
585 
586 	/* parse app arguments */
587 	ret = vmdq_parse_args(argc, argv);
588 	if (ret < 0)
589 		rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
590 
591 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
592 		if (rte_lcore_is_enabled(lcore_id))
593 			lcore_ids[core_id++] = lcore_id;
594 
595 	if (rte_lcore_count() > RTE_MAX_LCORE)
596 		rte_exit(EXIT_FAILURE, "Not enough cores\n");
597 
598 	nb_ports = rte_eth_dev_count_avail();
599 
600 	/*
601 	 * Update the global var NUM_PORTS and global array PORTS
602 	 * and get value of var VALID_NUM_PORTS according to system ports number
603 	 */
604 	valid_num_ports = check_ports_num(nb_ports);
605 
606 	if (valid_num_ports < 2 || valid_num_ports % 2) {
607 		printf("Current valid ports number is %u\n", valid_num_ports);
608 		rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
609 	}
610 
611 	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
612 		NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
613 		0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
614 	if (mbuf_pool == NULL)
615 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
616 
617 	/* initialize all ports */
618 	RTE_ETH_FOREACH_DEV(portid) {
619 		/* skip ports that are not enabled */
620 		if ((enabled_port_mask & (1 << portid)) == 0) {
621 			printf("\nSkipping disabled port %d\n", portid);
622 			continue;
623 		}
624 		if (port_init(portid, mbuf_pool) != 0)
625 			rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
626 	}
627 
628 	/* call lcore_main() on every lcore */
629 	rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
630 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
631 		if (rte_eal_wait_lcore(lcore_id) < 0)
632 			return -1;
633 	}
634 
635 	return 0;
636 }
637