xref: /dpdk/examples/distributor/main.c (revision edab33b1c01d508fdd934c06ee27f84250d2749a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <stdint.h>
35 #include <inttypes.h>
36 #include <unistd.h>
37 #include <signal.h>
38 #include <getopt.h>
39 
40 #include <rte_eal.h>
41 #include <rte_ethdev.h>
42 #include <rte_cycles.h>
43 #include <rte_malloc.h>
44 #include <rte_debug.h>
45 #include <rte_distributor.h>
46 
47 #define RX_RING_SIZE 256
48 #define TX_RING_SIZE 512
49 #define NUM_MBUFS ((64*1024)-1)
50 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
51 #define MBUF_CACHE_SIZE 250
52 #define BURST_SIZE 32
53 #define RTE_RING_SZ 1024
54 
55 /* uncommnet below line to enable debug logs */
56 /* #define DEBUG */
57 
58 #ifdef DEBUG
59 #define LOG_LEVEL RTE_LOG_DEBUG
60 #define LOG_DEBUG(log_type, fmt, args...) do {	\
61 	RTE_LOG(DEBUG, log_type, fmt, ##args)		\
62 } while (0)
63 #else
64 #define LOG_LEVEL RTE_LOG_INFO
65 #define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
66 #endif
67 
68 #define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1
69 
70 /* mask of enabled ports */
71 static uint32_t enabled_port_mask;
72 volatile uint8_t quit_signal;
73 volatile uint8_t quit_signal_rx;
74 
75 static volatile struct app_stats {
76 	struct {
77 		uint64_t rx_pkts;
78 		uint64_t returned_pkts;
79 		uint64_t enqueued_pkts;
80 	} rx __rte_cache_aligned;
81 
82 	struct {
83 		uint64_t dequeue_pkts;
84 		uint64_t tx_pkts;
85 	} tx __rte_cache_aligned;
86 } app_stats;
87 
88 static const struct rte_eth_conf port_conf_default = {
89 	.rxmode = {
90 		.mq_mode = ETH_MQ_RX_RSS,
91 		.max_rx_pkt_len = ETHER_MAX_LEN,
92 	},
93 	.txmode = {
94 		.mq_mode = ETH_MQ_TX_NONE,
95 	},
96 	.rx_adv_conf = {
97 		.rss_conf = {
98 			.rss_hf = ETH_RSS_IP | ETH_RSS_UDP |
99 				ETH_RSS_TCP | ETH_RSS_SCTP,
100 		}
101 	},
102 };
103 
104 struct output_buffer {
105 	unsigned count;
106 	struct rte_mbuf *mbufs[BURST_SIZE];
107 };
108 
109 /*
110  * Initialises a given port using global settings and with the rx buffers
111  * coming from the mbuf_pool passed as parameter
112  */
113 static inline int
114 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
115 {
116 	struct rte_eth_conf port_conf = port_conf_default;
117 	const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1;
118 	int retval;
119 	uint16_t q;
120 
121 	if (port >= rte_eth_dev_count())
122 		return -1;
123 
124 	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
125 	if (retval != 0)
126 		return retval;
127 
128 	for (q = 0; q < rxRings; q++) {
129 		retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
130 						rte_eth_dev_socket_id(port),
131 						NULL, mbuf_pool);
132 		if (retval < 0)
133 			return retval;
134 	}
135 
136 	for (q = 0; q < txRings; q++) {
137 		retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
138 						rte_eth_dev_socket_id(port),
139 						NULL);
140 		if (retval < 0)
141 			return retval;
142 	}
143 
144 	retval = rte_eth_dev_start(port);
145 	if (retval < 0)
146 		return retval;
147 
148 	struct rte_eth_link link;
149 	rte_eth_link_get_nowait(port, &link);
150 	if (!link.link_status) {
151 		sleep(1);
152 		rte_eth_link_get_nowait(port, &link);
153 	}
154 
155 	if (!link.link_status) {
156 		printf("Link down on port %"PRIu8"\n", port);
157 		return 0;
158 	}
159 
160 	struct ether_addr addr;
161 	rte_eth_macaddr_get(port, &addr);
162 	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
163 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
164 			(unsigned)port,
165 			addr.addr_bytes[0], addr.addr_bytes[1],
166 			addr.addr_bytes[2], addr.addr_bytes[3],
167 			addr.addr_bytes[4], addr.addr_bytes[5]);
168 
169 	rte_eth_promiscuous_enable(port);
170 
171 	return 0;
172 }
173 
174 struct lcore_params {
175 	unsigned worker_id;
176 	struct rte_distributor *d;
177 	struct rte_ring *r;
178 	struct rte_mempool *mem_pool;
179 };
180 
181 static void
182 quit_workers(struct rte_distributor *d, struct rte_mempool *p)
183 {
184 	const unsigned num_workers = rte_lcore_count() - 2;
185 	unsigned i;
186 	struct rte_mbuf *bufs[num_workers];
187 	rte_mempool_get_bulk(p, (void *)bufs, num_workers);
188 
189 	for (i = 0; i < num_workers; i++)
190 		bufs[i]->hash.rss = i << 1;
191 
192 	rte_distributor_process(d, bufs, num_workers);
193 	rte_mempool_put_bulk(p, (void *)bufs, num_workers);
194 }
195 
196 static int
197 lcore_rx(struct lcore_params *p)
198 {
199 	struct rte_distributor *d = p->d;
200 	struct rte_mempool *mem_pool = p->mem_pool;
201 	struct rte_ring *r = p->r;
202 	const uint8_t nb_ports = rte_eth_dev_count();
203 	const int socket_id = rte_socket_id();
204 	uint8_t port;
205 
206 	for (port = 0; port < nb_ports; port++) {
207 		/* skip ports that are not enabled */
208 		if ((enabled_port_mask & (1 << port)) == 0)
209 			continue;
210 
211 		if (rte_eth_dev_socket_id(port) > 0 &&
212 				rte_eth_dev_socket_id(port) != socket_id)
213 			printf("WARNING, port %u is on remote NUMA node to "
214 					"RX thread.\n\tPerformance will not "
215 					"be optimal.\n", port);
216 	}
217 
218 	printf("\nCore %u doing packet RX.\n", rte_lcore_id());
219 	port = 0;
220 	while (!quit_signal_rx) {
221 
222 		/* skip ports that are not enabled */
223 		if ((enabled_port_mask & (1 << port)) == 0) {
224 			if (++port == nb_ports)
225 				port = 0;
226 			continue;
227 		}
228 		struct rte_mbuf *bufs[BURST_SIZE*2];
229 		const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs,
230 				BURST_SIZE);
231 		app_stats.rx.rx_pkts += nb_rx;
232 
233 		rte_distributor_process(d, bufs, nb_rx);
234 		const uint16_t nb_ret = rte_distributor_returned_pkts(d,
235 				bufs, BURST_SIZE*2);
236 		app_stats.rx.returned_pkts += nb_ret;
237 		if (unlikely(nb_ret == 0))
238 			continue;
239 
240 		uint16_t sent = rte_ring_enqueue_burst(r, (void *)bufs, nb_ret);
241 		app_stats.rx.enqueued_pkts += sent;
242 		if (unlikely(sent < nb_ret)) {
243 			LOG_DEBUG(DISTRAPP, "%s:Packet loss due to full ring\n", __func__);
244 			while (sent < nb_ret)
245 				rte_pktmbuf_free(bufs[sent++]);
246 		}
247 		if (++port == nb_ports)
248 			port = 0;
249 	}
250 	rte_distributor_process(d, NULL, 0);
251 	/* flush distributor to bring to known state */
252 	rte_distributor_flush(d);
253 	/* set worker & tx threads quit flag */
254 	quit_signal = 1;
255 	/*
256 	 * worker threads may hang in get packet as
257 	 * distributor process is not running, just make sure workers
258 	 * get packets till quit_signal is actually been
259 	 * received and they gracefully shutdown
260 	 */
261 	quit_workers(d, mem_pool);
262 	/* rx thread should quit at last */
263 	return 0;
264 }
265 
266 static inline void
267 flush_one_port(struct output_buffer *outbuf, uint8_t outp)
268 {
269 	unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs,
270 			outbuf->count);
271 	app_stats.tx.tx_pkts += nb_tx;
272 
273 	if (unlikely(nb_tx < outbuf->count)) {
274 		LOG_DEBUG(DISTRAPP, "%s:Packet loss with tx_burst\n", __func__);
275 		do {
276 			rte_pktmbuf_free(outbuf->mbufs[nb_tx]);
277 		} while (++nb_tx < outbuf->count);
278 	}
279 	outbuf->count = 0;
280 }
281 
282 static inline void
283 flush_all_ports(struct output_buffer *tx_buffers, uint8_t nb_ports)
284 {
285 	uint8_t outp;
286 	for (outp = 0; outp < nb_ports; outp++) {
287 		/* skip ports that are not enabled */
288 		if ((enabled_port_mask & (1 << outp)) == 0)
289 			continue;
290 
291 		if (tx_buffers[outp].count == 0)
292 			continue;
293 
294 		flush_one_port(&tx_buffers[outp], outp);
295 	}
296 }
297 
298 static int
299 lcore_tx(struct rte_ring *in_r)
300 {
301 	static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
302 	const uint8_t nb_ports = rte_eth_dev_count();
303 	const int socket_id = rte_socket_id();
304 	uint8_t port;
305 
306 	for (port = 0; port < nb_ports; port++) {
307 		/* skip ports that are not enabled */
308 		if ((enabled_port_mask & (1 << port)) == 0)
309 			continue;
310 
311 		if (rte_eth_dev_socket_id(port) > 0 &&
312 				rte_eth_dev_socket_id(port) != socket_id)
313 			printf("WARNING, port %u is on remote NUMA node to "
314 					"TX thread.\n\tPerformance will not "
315 					"be optimal.\n", port);
316 	}
317 
318 	printf("\nCore %u doing packet TX.\n", rte_lcore_id());
319 	while (!quit_signal) {
320 
321 		for (port = 0; port < nb_ports; port++) {
322 			/* skip ports that are not enabled */
323 			if ((enabled_port_mask & (1 << port)) == 0)
324 				continue;
325 
326 			struct rte_mbuf *bufs[BURST_SIZE];
327 			const uint16_t nb_rx = rte_ring_dequeue_burst(in_r,
328 					(void *)bufs, BURST_SIZE);
329 			app_stats.tx.dequeue_pkts += nb_rx;
330 
331 			/* if we get no traffic, flush anything we have */
332 			if (unlikely(nb_rx == 0)) {
333 				flush_all_ports(tx_buffers, nb_ports);
334 				continue;
335 			}
336 
337 			/* for traffic we receive, queue it up for transmit */
338 			uint16_t i;
339 			_mm_prefetch(bufs[0], 0);
340 			_mm_prefetch(bufs[1], 0);
341 			_mm_prefetch(bufs[2], 0);
342 			for (i = 0; i < nb_rx; i++) {
343 				struct output_buffer *outbuf;
344 				uint8_t outp;
345 				_mm_prefetch(bufs[i + 3], 0);
346 				/*
347 				 * workers should update in_port to hold the
348 				 * output port value
349 				 */
350 				outp = bufs[i]->port;
351 				/* skip ports that are not enabled */
352 				if ((enabled_port_mask & (1 << outp)) == 0)
353 					continue;
354 
355 				outbuf = &tx_buffers[outp];
356 				outbuf->mbufs[outbuf->count++] = bufs[i];
357 				if (outbuf->count == BURST_SIZE)
358 					flush_one_port(outbuf, outp);
359 			}
360 		}
361 	}
362 	return 0;
363 }
364 
365 static void
366 int_handler(int sig_num)
367 {
368 	printf("Exiting on signal %d\n", sig_num);
369 	/* set quit flag for rx thread to exit */
370 	quit_signal_rx = 1;
371 }
372 
373 static void
374 print_stats(void)
375 {
376 	struct rte_eth_stats eth_stats;
377 	unsigned i;
378 
379 	printf("\nRX thread stats:\n");
380 	printf(" - Received:    %"PRIu64"\n", app_stats.rx.rx_pkts);
381 	printf(" - Processed:   %"PRIu64"\n", app_stats.rx.returned_pkts);
382 	printf(" - Enqueued:    %"PRIu64"\n", app_stats.rx.enqueued_pkts);
383 
384 	printf("\nTX thread stats:\n");
385 	printf(" - Dequeued:    %"PRIu64"\n", app_stats.tx.dequeue_pkts);
386 	printf(" - Transmitted: %"PRIu64"\n", app_stats.tx.tx_pkts);
387 
388 	for (i = 0; i < rte_eth_dev_count(); i++) {
389 		rte_eth_stats_get(i, &eth_stats);
390 		printf("\nPort %u stats:\n", i);
391 		printf(" - Pkts in:   %"PRIu64"\n", eth_stats.ipackets);
392 		printf(" - Pkts out:  %"PRIu64"\n", eth_stats.opackets);
393 		printf(" - In Errs:   %"PRIu64"\n", eth_stats.ierrors);
394 		printf(" - Out Errs:  %"PRIu64"\n", eth_stats.oerrors);
395 		printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
396 	}
397 }
398 
399 static int
400 lcore_worker(struct lcore_params *p)
401 {
402 	struct rte_distributor *d = p->d;
403 	const unsigned id = p->worker_id;
404 	/*
405 	 * for single port, xor_val will be zero so we won't modify the output
406 	 * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa
407 	 */
408 	const unsigned xor_val = (rte_eth_dev_count() > 1);
409 	struct rte_mbuf *buf = NULL;
410 
411 	printf("\nCore %u acting as worker core.\n", rte_lcore_id());
412 	while (!quit_signal) {
413 		buf = rte_distributor_get_pkt(d, id, buf);
414 		buf->port ^= xor_val;
415 	}
416 	return 0;
417 }
418 
419 /* display usage */
420 static void
421 print_usage(const char *prgname)
422 {
423 	printf("%s [EAL options] -- -p PORTMASK\n"
424 			"  -p PORTMASK: hexadecimal bitmask of ports to configure\n",
425 			prgname);
426 }
427 
428 static int
429 parse_portmask(const char *portmask)
430 {
431 	char *end = NULL;
432 	unsigned long pm;
433 
434 	/* parse hexadecimal string */
435 	pm = strtoul(portmask, &end, 16);
436 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
437 		return -1;
438 
439 	if (pm == 0)
440 		return -1;
441 
442 	return pm;
443 }
444 
445 /* Parse the argument given in the command line of the application */
446 static int
447 parse_args(int argc, char **argv)
448 {
449 	int opt;
450 	char **argvopt;
451 	int option_index;
452 	char *prgname = argv[0];
453 	static struct option lgopts[] = {
454 		{NULL, 0, 0, 0}
455 	};
456 
457 	argvopt = argv;
458 
459 	while ((opt = getopt_long(argc, argvopt, "p:",
460 			lgopts, &option_index)) != EOF) {
461 
462 		switch (opt) {
463 		/* portmask */
464 		case 'p':
465 			enabled_port_mask = parse_portmask(optarg);
466 			if (enabled_port_mask == 0) {
467 				printf("invalid portmask\n");
468 				print_usage(prgname);
469 				return -1;
470 			}
471 			break;
472 
473 		default:
474 			print_usage(prgname);
475 			return -1;
476 		}
477 	}
478 
479 	if (optind <= 1) {
480 		print_usage(prgname);
481 		return -1;
482 	}
483 
484 	argv[optind-1] = prgname;
485 
486 	optind = 0; /* reset getopt lib */
487 	return 0;
488 }
489 
490 /* Main function, does initialization and calls the per-lcore functions */
491 int
492 main(int argc, char *argv[])
493 {
494 	struct rte_mempool *mbuf_pool;
495 	struct rte_distributor *d;
496 	struct rte_ring *output_ring;
497 	unsigned lcore_id, worker_id = 0;
498 	unsigned nb_ports;
499 	uint8_t portid;
500 	uint8_t nb_ports_available;
501 
502 	/* catch ctrl-c so we can print on exit */
503 	signal(SIGINT, int_handler);
504 
505 	/* init EAL */
506 	int ret = rte_eal_init(argc, argv);
507 	if (ret < 0)
508 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
509 	argc -= ret;
510 	argv += ret;
511 
512 	/* parse application arguments (after the EAL ones) */
513 	ret = parse_args(argc, argv);
514 	if (ret < 0)
515 		rte_exit(EXIT_FAILURE, "Invalid distributor parameters\n");
516 
517 	if (rte_lcore_count() < 3)
518 		rte_exit(EXIT_FAILURE, "Error, This application needs at "
519 				"least 3 logical cores to run:\n"
520 				"1 lcore for packet RX and distribution\n"
521 				"1 lcore for packet TX\n"
522 				"and at least 1 lcore for worker threads\n");
523 
524 	nb_ports = rte_eth_dev_count();
525 	if (nb_ports == 0)
526 		rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
527 	if (nb_ports != 1 && (nb_ports & 1))
528 		rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
529 				"when using a single port\n");
530 
531 	mbuf_pool = rte_mempool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
532 			MBUF_SIZE, MBUF_CACHE_SIZE,
533 			sizeof(struct rte_pktmbuf_pool_private),
534 			rte_pktmbuf_pool_init, NULL,
535 			rte_pktmbuf_init, NULL,
536 			rte_socket_id(), 0);
537 	if (mbuf_pool == NULL)
538 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
539 	nb_ports_available = nb_ports;
540 
541 	/* initialize all ports */
542 	for (portid = 0; portid < nb_ports; portid++) {
543 		/* skip ports that are not enabled */
544 		if ((enabled_port_mask & (1 << portid)) == 0) {
545 			printf("\nSkipping disabled port %d\n", portid);
546 			nb_ports_available--;
547 			continue;
548 		}
549 		/* init port */
550 		printf("Initializing port %u... done\n", (unsigned) portid);
551 
552 		if (port_init(portid, mbuf_pool) != 0)
553 			rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
554 					portid);
555 	}
556 
557 	if (!nb_ports_available) {
558 		rte_exit(EXIT_FAILURE,
559 				"All available ports are disabled. Please set portmask.\n");
560 	}
561 
562 	d = rte_distributor_create("PKT_DIST", rte_socket_id(),
563 			rte_lcore_count() - 2);
564 	if (d == NULL)
565 		rte_exit(EXIT_FAILURE, "Cannot create distributor\n");
566 
567 	/*
568 	 * scheduler ring is read only by the transmitter core, but written to
569 	 * by multiple threads
570 	 */
571 	output_ring = rte_ring_create("Output_ring", RTE_RING_SZ,
572 			rte_socket_id(), RING_F_SC_DEQ);
573 	if (output_ring == NULL)
574 		rte_exit(EXIT_FAILURE, "Cannot create output ring\n");
575 
576 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
577 		if (worker_id == rte_lcore_count() - 2)
578 			rte_eal_remote_launch((lcore_function_t *)lcore_tx,
579 					output_ring, lcore_id);
580 		else {
581 			struct lcore_params *p =
582 					rte_malloc(NULL, sizeof(*p), 0);
583 			if (!p)
584 				rte_panic("malloc failure\n");
585 			*p = (struct lcore_params){worker_id, d, output_ring, mbuf_pool};
586 
587 			rte_eal_remote_launch((lcore_function_t *)lcore_worker,
588 					p, lcore_id);
589 		}
590 		worker_id++;
591 	}
592 	/* call lcore_main on master core only */
593 	struct lcore_params p = { 0, d, output_ring, mbuf_pool};
594 	lcore_rx(&p);
595 
596 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
597 		if (rte_eal_wait_lcore(lcore_id) < 0)
598 			return -1;
599 	}
600 
601 	print_stats();
602 	return 0;
603 }
604