xref: /dpdk/examples/distributor/main.c (revision 1cde1b9a9b4dbf31cb5e5ccdfc5da3cb079f43a2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <inttypes.h>
7 #include <unistd.h>
8 #include <signal.h>
9 #include <getopt.h>
10 
11 #include <rte_eal.h>
12 #include <rte_ethdev.h>
13 #include <rte_cycles.h>
14 #include <rte_malloc.h>
15 #include <rte_debug.h>
16 #include <rte_prefetch.h>
17 #include <rte_distributor.h>
18 #include <rte_pause.h>
19 #include <rte_power.h>
20 
21 #define RX_RING_SIZE 1024
22 #define TX_RING_SIZE 1024
23 #define NUM_MBUFS ((64*1024)-1)
24 #define MBUF_CACHE_SIZE 128
25 #define BURST_SIZE 64
26 #define SCHED_RX_RING_SZ 8192
27 #define SCHED_TX_RING_SZ 65536
28 #define BURST_SIZE_TX 32
29 
30 #define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1
31 
32 #define ANSI_COLOR_RED     "\x1b[31m"
33 #define ANSI_COLOR_RESET   "\x1b[0m"
34 
35 /* mask of enabled ports */
36 static uint32_t enabled_port_mask;
37 volatile uint8_t quit_signal;
38 volatile uint8_t quit_signal_rx;
39 volatile uint8_t quit_signal_dist;
40 volatile uint8_t quit_signal_work;
41 unsigned int power_lib_initialised;
42 
43 static volatile struct app_stats {
44 	struct {
45 		uint64_t rx_pkts;
46 		uint64_t returned_pkts;
47 		uint64_t enqueued_pkts;
48 		uint64_t enqdrop_pkts;
49 	} rx __rte_cache_aligned;
50 	int pad1 __rte_cache_aligned;
51 
52 	struct {
53 		uint64_t in_pkts;
54 		uint64_t ret_pkts;
55 		uint64_t sent_pkts;
56 		uint64_t enqdrop_pkts;
57 	} dist __rte_cache_aligned;
58 	int pad2 __rte_cache_aligned;
59 
60 	struct {
61 		uint64_t dequeue_pkts;
62 		uint64_t tx_pkts;
63 		uint64_t enqdrop_pkts;
64 	} tx __rte_cache_aligned;
65 	int pad3 __rte_cache_aligned;
66 
67 	uint64_t worker_pkts[64] __rte_cache_aligned;
68 
69 	int pad4 __rte_cache_aligned;
70 
71 	uint64_t worker_bursts[64][8] __rte_cache_aligned;
72 
73 	int pad5 __rte_cache_aligned;
74 
75 	uint64_t port_rx_pkts[64] __rte_cache_aligned;
76 	uint64_t port_tx_pkts[64] __rte_cache_aligned;
77 } app_stats;
78 
79 struct app_stats prev_app_stats;
80 
81 static const struct rte_eth_conf port_conf_default = {
82 	.rxmode = {
83 		.mq_mode = ETH_MQ_RX_RSS,
84 		.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
85 	},
86 	.txmode = {
87 		.mq_mode = ETH_MQ_TX_NONE,
88 	},
89 	.rx_adv_conf = {
90 		.rss_conf = {
91 			.rss_hf = ETH_RSS_IP | ETH_RSS_UDP |
92 				ETH_RSS_TCP | ETH_RSS_SCTP,
93 		}
94 	},
95 };
96 
97 struct output_buffer {
98 	unsigned count;
99 	struct rte_mbuf *mbufs[BURST_SIZE];
100 };
101 
102 static void print_stats(void);
103 
104 /*
105  * Initialises a given port using global settings and with the rx buffers
106  * coming from the mbuf_pool passed as parameter
107  */
108 static inline int
109 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
110 {
111 	struct rte_eth_conf port_conf = port_conf_default;
112 	const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1;
113 	int retval;
114 	uint16_t q;
115 	uint16_t nb_rxd = RX_RING_SIZE;
116 	uint16_t nb_txd = TX_RING_SIZE;
117 	struct rte_eth_dev_info dev_info;
118 	struct rte_eth_txconf txconf;
119 
120 	if (!rte_eth_dev_is_valid_port(port))
121 		return -1;
122 
123 	retval = rte_eth_dev_info_get(port, &dev_info);
124 	if (retval != 0) {
125 		printf("Error during getting device (port %u) info: %s\n",
126 				port, strerror(-retval));
127 		return retval;
128 	}
129 
130 	if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
131 		port_conf.txmode.offloads |=
132 			DEV_TX_OFFLOAD_MBUF_FAST_FREE;
133 
134 	port_conf.rx_adv_conf.rss_conf.rss_hf &=
135 		dev_info.flow_type_rss_offloads;
136 	if (port_conf.rx_adv_conf.rss_conf.rss_hf !=
137 			port_conf_default.rx_adv_conf.rss_conf.rss_hf) {
138 		printf("Port %u modified RSS hash function based on hardware support,"
139 			"requested:%#"PRIx64" configured:%#"PRIx64"\n",
140 			port,
141 			port_conf_default.rx_adv_conf.rss_conf.rss_hf,
142 			port_conf.rx_adv_conf.rss_conf.rss_hf);
143 	}
144 
145 	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
146 	if (retval != 0)
147 		return retval;
148 
149 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
150 	if (retval != 0)
151 		return retval;
152 
153 	for (q = 0; q < rxRings; q++) {
154 		retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
155 						rte_eth_dev_socket_id(port),
156 						NULL, mbuf_pool);
157 		if (retval < 0)
158 			return retval;
159 	}
160 
161 	txconf = dev_info.default_txconf;
162 	txconf.offloads = port_conf.txmode.offloads;
163 	for (q = 0; q < txRings; q++) {
164 		retval = rte_eth_tx_queue_setup(port, q, nb_txd,
165 						rte_eth_dev_socket_id(port),
166 						&txconf);
167 		if (retval < 0)
168 			return retval;
169 	}
170 
171 	retval = rte_eth_dev_start(port);
172 	if (retval < 0)
173 		return retval;
174 
175 	struct rte_eth_link link;
176 	do {
177 		retval = rte_eth_link_get_nowait(port, &link);
178 		if (retval < 0) {
179 			printf("Failed link get (port %u): %s\n",
180 				port, rte_strerror(-retval));
181 			return retval;
182 		} else if (link.link_status)
183 			break;
184 
185 		printf("Waiting for Link up on port %"PRIu16"\n", port);
186 		sleep(1);
187 	} while (!link.link_status);
188 
189 	if (!link.link_status) {
190 		printf("Link down on port %"PRIu16"\n", port);
191 		return 0;
192 	}
193 
194 	struct rte_ether_addr addr;
195 	retval = rte_eth_macaddr_get(port, &addr);
196 	if (retval < 0) {
197 		printf("Failed to get MAC address (port %u): %s\n",
198 				port, rte_strerror(-retval));
199 		return retval;
200 	}
201 
202 	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
203 			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
204 			port,
205 			addr.addr_bytes[0], addr.addr_bytes[1],
206 			addr.addr_bytes[2], addr.addr_bytes[3],
207 			addr.addr_bytes[4], addr.addr_bytes[5]);
208 
209 	retval = rte_eth_promiscuous_enable(port);
210 	if (retval != 0)
211 		return retval;
212 
213 	return 0;
214 }
215 
216 struct lcore_params {
217 	unsigned worker_id;
218 	struct rte_distributor *d;
219 	struct rte_ring *rx_dist_ring;
220 	struct rte_ring *dist_tx_ring;
221 	struct rte_mempool *mem_pool;
222 };
223 
224 static int
225 lcore_rx(struct lcore_params *p)
226 {
227 	const uint16_t nb_ports = rte_eth_dev_count_avail();
228 	const int socket_id = rte_socket_id();
229 	uint16_t port;
230 	struct rte_mbuf *bufs[BURST_SIZE*2];
231 
232 	RTE_ETH_FOREACH_DEV(port) {
233 		/* skip ports that are not enabled */
234 		if ((enabled_port_mask & (1 << port)) == 0)
235 			continue;
236 
237 		if (rte_eth_dev_socket_id(port) > 0 &&
238 				rte_eth_dev_socket_id(port) != socket_id)
239 			printf("WARNING, port %u is on remote NUMA node to "
240 					"RX thread.\n\tPerformance will not "
241 					"be optimal.\n", port);
242 	}
243 
244 	printf("\nCore %u doing packet RX.\n", rte_lcore_id());
245 	port = 0;
246 	while (!quit_signal_rx) {
247 
248 		/* skip ports that are not enabled */
249 		if ((enabled_port_mask & (1 << port)) == 0) {
250 			if (++port == nb_ports)
251 				port = 0;
252 			continue;
253 		}
254 		const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs,
255 				BURST_SIZE);
256 		if (unlikely(nb_rx == 0)) {
257 			if (++port == nb_ports)
258 				port = 0;
259 			continue;
260 		}
261 		app_stats.rx.rx_pkts += nb_rx;
262 
263 /*
264  * You can run the distributor on the rx core with this code. Returned
265  * packets are then send straight to the tx core.
266  */
267 #if 0
268 	rte_distributor_process(d, bufs, nb_rx);
269 	const uint16_t nb_ret = rte_distributor_returned_pktsd,
270 			bufs, BURST_SIZE*2);
271 
272 		app_stats.rx.returned_pkts += nb_ret;
273 		if (unlikely(nb_ret == 0)) {
274 			if (++port == nb_ports)
275 				port = 0;
276 			continue;
277 		}
278 
279 		struct rte_ring *tx_ring = p->dist_tx_ring;
280 		uint16_t sent = rte_ring_enqueue_burst(tx_ring,
281 				(void *)bufs, nb_ret, NULL);
282 #else
283 		uint16_t nb_ret = nb_rx;
284 		/*
285 		 * Swap the following two lines if you want the rx traffic
286 		 * to go directly to tx, no distribution.
287 		 */
288 		struct rte_ring *out_ring = p->rx_dist_ring;
289 		/* struct rte_ring *out_ring = p->dist_tx_ring; */
290 
291 		uint16_t sent = rte_ring_enqueue_burst(out_ring,
292 				(void *)bufs, nb_ret, NULL);
293 #endif
294 
295 		app_stats.rx.enqueued_pkts += sent;
296 		if (unlikely(sent < nb_ret)) {
297 			app_stats.rx.enqdrop_pkts +=  nb_ret - sent;
298 			RTE_LOG_DP(DEBUG, DISTRAPP,
299 				"%s:Packet loss due to full ring\n", __func__);
300 			while (sent < nb_ret)
301 				rte_pktmbuf_free(bufs[sent++]);
302 		}
303 		if (++port == nb_ports)
304 			port = 0;
305 	}
306 	if (power_lib_initialised)
307 		rte_power_exit(rte_lcore_id());
308 	/* set worker & tx threads quit flag */
309 	printf("\nCore %u exiting rx task.\n", rte_lcore_id());
310 	quit_signal = 1;
311 	return 0;
312 }
313 
314 static inline void
315 flush_one_port(struct output_buffer *outbuf, uint8_t outp)
316 {
317 	unsigned int nb_tx = rte_eth_tx_burst(outp, 0,
318 			outbuf->mbufs, outbuf->count);
319 	app_stats.tx.tx_pkts += outbuf->count;
320 
321 	if (unlikely(nb_tx < outbuf->count)) {
322 		app_stats.tx.enqdrop_pkts +=  outbuf->count - nb_tx;
323 		do {
324 			rte_pktmbuf_free(outbuf->mbufs[nb_tx]);
325 		} while (++nb_tx < outbuf->count);
326 	}
327 	outbuf->count = 0;
328 }
329 
330 static inline void
331 flush_all_ports(struct output_buffer *tx_buffers)
332 {
333 	uint16_t outp;
334 
335 	RTE_ETH_FOREACH_DEV(outp) {
336 		/* skip ports that are not enabled */
337 		if ((enabled_port_mask & (1 << outp)) == 0)
338 			continue;
339 
340 		if (tx_buffers[outp].count == 0)
341 			continue;
342 
343 		flush_one_port(&tx_buffers[outp], outp);
344 	}
345 }
346 
347 
348 
349 static int
350 lcore_distributor(struct lcore_params *p)
351 {
352 	struct rte_ring *in_r = p->rx_dist_ring;
353 	struct rte_ring *out_r = p->dist_tx_ring;
354 	struct rte_mbuf *bufs[BURST_SIZE * 4];
355 	struct rte_distributor *d = p->d;
356 
357 	printf("\nCore %u acting as distributor core.\n", rte_lcore_id());
358 	while (!quit_signal_dist) {
359 		const uint16_t nb_rx = rte_ring_dequeue_burst(in_r,
360 				(void *)bufs, BURST_SIZE*1, NULL);
361 		if (nb_rx) {
362 			app_stats.dist.in_pkts += nb_rx;
363 
364 			/* Distribute the packets */
365 			rte_distributor_process(d, bufs, nb_rx);
366 			/* Handle Returns */
367 			const uint16_t nb_ret =
368 				rte_distributor_returned_pkts(d,
369 					bufs, BURST_SIZE*2);
370 
371 			if (unlikely(nb_ret == 0))
372 				continue;
373 			app_stats.dist.ret_pkts += nb_ret;
374 
375 			uint16_t sent = rte_ring_enqueue_burst(out_r,
376 					(void *)bufs, nb_ret, NULL);
377 			app_stats.dist.sent_pkts += sent;
378 			if (unlikely(sent < nb_ret)) {
379 				app_stats.dist.enqdrop_pkts += nb_ret - sent;
380 				RTE_LOG(DEBUG, DISTRAPP,
381 					"%s:Packet loss due to full out ring\n",
382 					__func__);
383 				while (sent < nb_ret)
384 					rte_pktmbuf_free(bufs[sent++]);
385 			}
386 		}
387 	}
388 	printf("\nCore %u exiting distributor task.\n", rte_lcore_id());
389 	quit_signal_work = 1;
390 	if (power_lib_initialised)
391 		rte_power_exit(rte_lcore_id());
392 	rte_distributor_flush(d);
393 	/* Unblock any returns so workers can exit */
394 	rte_distributor_clear_returns(d);
395 	quit_signal_rx = 1;
396 	return 0;
397 }
398 
399 
400 static int
401 lcore_tx(struct rte_ring *in_r)
402 {
403 	static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
404 	const int socket_id = rte_socket_id();
405 	uint16_t port;
406 
407 	RTE_ETH_FOREACH_DEV(port) {
408 		/* skip ports that are not enabled */
409 		if ((enabled_port_mask & (1 << port)) == 0)
410 			continue;
411 
412 		if (rte_eth_dev_socket_id(port) > 0 &&
413 				rte_eth_dev_socket_id(port) != socket_id)
414 			printf("WARNING, port %u is on remote NUMA node to "
415 					"TX thread.\n\tPerformance will not "
416 					"be optimal.\n", port);
417 	}
418 
419 	printf("\nCore %u doing packet TX.\n", rte_lcore_id());
420 	while (!quit_signal) {
421 
422 		RTE_ETH_FOREACH_DEV(port) {
423 			/* skip ports that are not enabled */
424 			if ((enabled_port_mask & (1 << port)) == 0)
425 				continue;
426 
427 			struct rte_mbuf *bufs[BURST_SIZE_TX];
428 			const uint16_t nb_rx = rte_ring_dequeue_burst(in_r,
429 					(void *)bufs, BURST_SIZE_TX, NULL);
430 			app_stats.tx.dequeue_pkts += nb_rx;
431 
432 			/* if we get no traffic, flush anything we have */
433 			if (unlikely(nb_rx == 0)) {
434 				flush_all_ports(tx_buffers);
435 				continue;
436 			}
437 
438 			/* for traffic we receive, queue it up for transmit */
439 			uint16_t i;
440 			rte_prefetch_non_temporal((void *)bufs[0]);
441 			rte_prefetch_non_temporal((void *)bufs[1]);
442 			rte_prefetch_non_temporal((void *)bufs[2]);
443 			for (i = 0; i < nb_rx; i++) {
444 				struct output_buffer *outbuf;
445 				uint8_t outp;
446 				rte_prefetch_non_temporal((void *)bufs[i + 3]);
447 				/*
448 				 * workers should update in_port to hold the
449 				 * output port value
450 				 */
451 				outp = bufs[i]->port;
452 				/* skip ports that are not enabled */
453 				if ((enabled_port_mask & (1 << outp)) == 0)
454 					continue;
455 
456 				outbuf = &tx_buffers[outp];
457 				outbuf->mbufs[outbuf->count++] = bufs[i];
458 				if (outbuf->count == BURST_SIZE_TX)
459 					flush_one_port(outbuf, outp);
460 			}
461 		}
462 	}
463 	if (power_lib_initialised)
464 		rte_power_exit(rte_lcore_id());
465 	printf("\nCore %u exiting tx task.\n", rte_lcore_id());
466 	return 0;
467 }
468 
469 static void
470 int_handler(int sig_num)
471 {
472 	printf("Exiting on signal %d\n", sig_num);
473 	/* set quit flag for rx thread to exit */
474 	quit_signal_dist = 1;
475 }
476 
477 static void
478 print_stats(void)
479 {
480 	struct rte_eth_stats eth_stats;
481 	unsigned int i, j;
482 	const unsigned int num_workers = rte_lcore_count() - 4;
483 
484 	RTE_ETH_FOREACH_DEV(i) {
485 		rte_eth_stats_get(i, &eth_stats);
486 		app_stats.port_rx_pkts[i] = eth_stats.ipackets;
487 		app_stats.port_tx_pkts[i] = eth_stats.opackets;
488 	}
489 
490 	printf("\n\nRX Thread:\n");
491 	RTE_ETH_FOREACH_DEV(i) {
492 		printf("Port %u Pktsin : %5.2f\n", i,
493 				(app_stats.port_rx_pkts[i] -
494 				prev_app_stats.port_rx_pkts[i])/1000000.0);
495 		prev_app_stats.port_rx_pkts[i] = app_stats.port_rx_pkts[i];
496 	}
497 	printf(" - Received:    %5.2f\n",
498 			(app_stats.rx.rx_pkts -
499 			prev_app_stats.rx.rx_pkts)/1000000.0);
500 	printf(" - Returned:    %5.2f\n",
501 			(app_stats.rx.returned_pkts -
502 			prev_app_stats.rx.returned_pkts)/1000000.0);
503 	printf(" - Enqueued:    %5.2f\n",
504 			(app_stats.rx.enqueued_pkts -
505 			prev_app_stats.rx.enqueued_pkts)/1000000.0);
506 	printf(" - Dropped:     %s%5.2f%s\n", ANSI_COLOR_RED,
507 			(app_stats.rx.enqdrop_pkts -
508 			prev_app_stats.rx.enqdrop_pkts)/1000000.0,
509 			ANSI_COLOR_RESET);
510 
511 	printf("Distributor thread:\n");
512 	printf(" - In:          %5.2f\n",
513 			(app_stats.dist.in_pkts -
514 			prev_app_stats.dist.in_pkts)/1000000.0);
515 	printf(" - Returned:    %5.2f\n",
516 			(app_stats.dist.ret_pkts -
517 			prev_app_stats.dist.ret_pkts)/1000000.0);
518 	printf(" - Sent:        %5.2f\n",
519 			(app_stats.dist.sent_pkts -
520 			prev_app_stats.dist.sent_pkts)/1000000.0);
521 	printf(" - Dropped      %s%5.2f%s\n", ANSI_COLOR_RED,
522 			(app_stats.dist.enqdrop_pkts -
523 			prev_app_stats.dist.enqdrop_pkts)/1000000.0,
524 			ANSI_COLOR_RESET);
525 
526 	printf("TX thread:\n");
527 	printf(" - Dequeued:    %5.2f\n",
528 			(app_stats.tx.dequeue_pkts -
529 			prev_app_stats.tx.dequeue_pkts)/1000000.0);
530 	RTE_ETH_FOREACH_DEV(i) {
531 		printf("Port %u Pktsout: %5.2f\n",
532 				i, (app_stats.port_tx_pkts[i] -
533 				prev_app_stats.port_tx_pkts[i])/1000000.0);
534 		prev_app_stats.port_tx_pkts[i] = app_stats.port_tx_pkts[i];
535 	}
536 	printf(" - Transmitted: %5.2f\n",
537 			(app_stats.tx.tx_pkts -
538 			prev_app_stats.tx.tx_pkts)/1000000.0);
539 	printf(" - Dropped:     %s%5.2f%s\n", ANSI_COLOR_RED,
540 			(app_stats.tx.enqdrop_pkts -
541 			prev_app_stats.tx.enqdrop_pkts)/1000000.0,
542 			ANSI_COLOR_RESET);
543 
544 	prev_app_stats.rx.rx_pkts = app_stats.rx.rx_pkts;
545 	prev_app_stats.rx.returned_pkts = app_stats.rx.returned_pkts;
546 	prev_app_stats.rx.enqueued_pkts = app_stats.rx.enqueued_pkts;
547 	prev_app_stats.rx.enqdrop_pkts = app_stats.rx.enqdrop_pkts;
548 	prev_app_stats.dist.in_pkts = app_stats.dist.in_pkts;
549 	prev_app_stats.dist.ret_pkts = app_stats.dist.ret_pkts;
550 	prev_app_stats.dist.sent_pkts = app_stats.dist.sent_pkts;
551 	prev_app_stats.dist.enqdrop_pkts = app_stats.dist.enqdrop_pkts;
552 	prev_app_stats.tx.dequeue_pkts = app_stats.tx.dequeue_pkts;
553 	prev_app_stats.tx.tx_pkts = app_stats.tx.tx_pkts;
554 	prev_app_stats.tx.enqdrop_pkts = app_stats.tx.enqdrop_pkts;
555 
556 	for (i = 0; i < num_workers; i++) {
557 		printf("Worker %02u Pkts: %5.2f. Bursts(1-8): ", i,
558 				(app_stats.worker_pkts[i] -
559 				prev_app_stats.worker_pkts[i])/1000000.0);
560 		for (j = 0; j < 8; j++) {
561 			printf("%"PRIu64" ", app_stats.worker_bursts[i][j]);
562 			app_stats.worker_bursts[i][j] = 0;
563 		}
564 		printf("\n");
565 		prev_app_stats.worker_pkts[i] = app_stats.worker_pkts[i];
566 	}
567 }
568 
569 static int
570 lcore_worker(struct lcore_params *p)
571 {
572 	struct rte_distributor *d = p->d;
573 	const unsigned id = p->worker_id;
574 	unsigned int num = 0;
575 	unsigned int i;
576 
577 	/*
578 	 * for single port, xor_val will be zero so we won't modify the output
579 	 * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa
580 	 */
581 	const unsigned xor_val = (rte_eth_dev_count_avail() > 1);
582 	struct rte_mbuf *buf[8] __rte_cache_aligned;
583 
584 	for (i = 0; i < 8; i++)
585 		buf[i] = NULL;
586 
587 	app_stats.worker_pkts[p->worker_id] = 1;
588 
589 	printf("\nCore %u acting as worker core.\n", rte_lcore_id());
590 	while (!quit_signal_work) {
591 		num = rte_distributor_get_pkt(d, id, buf, buf, num);
592 		/* Do a little bit of work for each packet */
593 		for (i = 0; i < num; i++) {
594 			uint64_t t = rte_rdtsc()+100;
595 
596 			while (rte_rdtsc() < t)
597 				rte_pause();
598 			buf[i]->port ^= xor_val;
599 		}
600 
601 		app_stats.worker_pkts[p->worker_id] += num;
602 		if (num > 0)
603 			app_stats.worker_bursts[p->worker_id][num-1]++;
604 	}
605 	if (power_lib_initialised)
606 		rte_power_exit(rte_lcore_id());
607 	rte_free(p);
608 	return 0;
609 }
610 
611 static int
612 init_power_library(void)
613 {
614 	int ret = 0, lcore_id;
615 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
616 		/* init power management library */
617 		ret = rte_power_init(lcore_id);
618 		if (ret) {
619 			RTE_LOG(ERR, POWER,
620 				"Library initialization failed on core %u\n",
621 				lcore_id);
622 			/*
623 			 * Return on first failure, we'll fall back
624 			 * to non-power operation
625 			 */
626 			return ret;
627 		}
628 	}
629 	return ret;
630 }
631 
632 /* display usage */
633 static void
634 print_usage(const char *prgname)
635 {
636 	printf("%s [EAL options] -- -p PORTMASK\n"
637 			"  -p PORTMASK: hexadecimal bitmask of ports to configure\n",
638 			prgname);
639 }
640 
641 static int
642 parse_portmask(const char *portmask)
643 {
644 	char *end = NULL;
645 	unsigned long pm;
646 
647 	/* parse hexadecimal string */
648 	pm = strtoul(portmask, &end, 16);
649 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
650 		return -1;
651 
652 	if (pm == 0)
653 		return -1;
654 
655 	return pm;
656 }
657 
658 /* Parse the argument given in the command line of the application */
659 static int
660 parse_args(int argc, char **argv)
661 {
662 	int opt;
663 	char **argvopt;
664 	int option_index;
665 	char *prgname = argv[0];
666 	static struct option lgopts[] = {
667 		{NULL, 0, 0, 0}
668 	};
669 
670 	argvopt = argv;
671 
672 	while ((opt = getopt_long(argc, argvopt, "p:",
673 			lgopts, &option_index)) != EOF) {
674 
675 		switch (opt) {
676 		/* portmask */
677 		case 'p':
678 			enabled_port_mask = parse_portmask(optarg);
679 			if (enabled_port_mask == 0) {
680 				printf("invalid portmask\n");
681 				print_usage(prgname);
682 				return -1;
683 			}
684 			break;
685 
686 		default:
687 			print_usage(prgname);
688 			return -1;
689 		}
690 	}
691 
692 	if (optind <= 1) {
693 		print_usage(prgname);
694 		return -1;
695 	}
696 
697 	argv[optind-1] = prgname;
698 
699 	optind = 1; /* reset getopt lib */
700 	return 0;
701 }
702 
703 /* Main function, does initialization and calls the per-lcore functions */
704 int
705 main(int argc, char *argv[])
706 {
707 	struct rte_mempool *mbuf_pool;
708 	struct rte_distributor *d;
709 	struct rte_ring *dist_tx_ring;
710 	struct rte_ring *rx_dist_ring;
711 	struct rte_power_core_capabilities lcore_cap;
712 	unsigned int lcore_id, worker_id = 0;
713 	int distr_core_id = -1, rx_core_id = -1, tx_core_id = -1;
714 	unsigned nb_ports;
715 	uint16_t portid;
716 	uint16_t nb_ports_available;
717 	uint64_t t, freq;
718 
719 	/* catch ctrl-c so we can print on exit */
720 	signal(SIGINT, int_handler);
721 
722 	/* init EAL */
723 	int ret = rte_eal_init(argc, argv);
724 	if (ret < 0)
725 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
726 	argc -= ret;
727 	argv += ret;
728 
729 	/* parse application arguments (after the EAL ones) */
730 	ret = parse_args(argc, argv);
731 	if (ret < 0)
732 		rte_exit(EXIT_FAILURE, "Invalid distributor parameters\n");
733 
734 	if (rte_lcore_count() < 5)
735 		rte_exit(EXIT_FAILURE, "Error, This application needs at "
736 				"least 5 logical cores to run:\n"
737 				"1 lcore for stats (can be core 0)\n"
738 				"1 lcore for packet RX\n"
739 				"1 lcore for distribution\n"
740 				"1 lcore for packet TX\n"
741 				"and at least 1 lcore for worker threads\n");
742 
743 	if (init_power_library() == 0)
744 		power_lib_initialised = 1;
745 
746 	nb_ports = rte_eth_dev_count_avail();
747 	if (nb_ports == 0)
748 		rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
749 	if (nb_ports != 1 && (nb_ports & 1))
750 		rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
751 				"when using a single port\n");
752 
753 	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
754 		NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
755 		RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
756 	if (mbuf_pool == NULL)
757 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
758 	nb_ports_available = nb_ports;
759 
760 	/* initialize all ports */
761 	RTE_ETH_FOREACH_DEV(portid) {
762 		/* skip ports that are not enabled */
763 		if ((enabled_port_mask & (1 << portid)) == 0) {
764 			printf("\nSkipping disabled port %d\n", portid);
765 			nb_ports_available--;
766 			continue;
767 		}
768 		/* init port */
769 		printf("Initializing port %u... done\n", portid);
770 
771 		if (port_init(portid, mbuf_pool) != 0)
772 			rte_exit(EXIT_FAILURE, "Cannot initialize port %u\n",
773 					portid);
774 	}
775 
776 	if (!nb_ports_available) {
777 		rte_exit(EXIT_FAILURE,
778 				"All available ports are disabled. Please set portmask.\n");
779 	}
780 
781 	d = rte_distributor_create("PKT_DIST", rte_socket_id(),
782 			rte_lcore_count() - 4,
783 			RTE_DIST_ALG_BURST);
784 	if (d == NULL)
785 		rte_exit(EXIT_FAILURE, "Cannot create distributor\n");
786 
787 	/*
788 	 * scheduler ring is read by the transmitter core, and written to
789 	 * by scheduler core
790 	 */
791 	dist_tx_ring = rte_ring_create("Output_ring", SCHED_TX_RING_SZ,
792 			rte_socket_id(), RING_F_SC_DEQ | RING_F_SP_ENQ);
793 	if (dist_tx_ring == NULL)
794 		rte_exit(EXIT_FAILURE, "Cannot create output ring\n");
795 
796 	rx_dist_ring = rte_ring_create("Input_ring", SCHED_RX_RING_SZ,
797 			rte_socket_id(), RING_F_SC_DEQ | RING_F_SP_ENQ);
798 	if (rx_dist_ring == NULL)
799 		rte_exit(EXIT_FAILURE, "Cannot create output ring\n");
800 
801 	if (power_lib_initialised) {
802 		/*
803 		 * Here we'll pre-assign lcore ids to the rx, tx and
804 		 * distributor workloads if there's higher frequency
805 		 * on those cores e.g. if Turbo Boost is enabled.
806 		 * It's also worth mentioning that it will assign cores in a
807 		 * specific order, so that if there's less than three
808 		 * available, the higher frequency cores will go to the
809 		 * distributor first, then rx, then tx.
810 		 */
811 		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
812 
813 			rte_power_get_capabilities(lcore_id, &lcore_cap);
814 
815 			if (lcore_cap.priority != 1)
816 				continue;
817 
818 			if (distr_core_id < 0) {
819 				distr_core_id = lcore_id;
820 				printf("Distributor on priority core %d\n",
821 					lcore_id);
822 				continue;
823 			}
824 			if (rx_core_id < 0) {
825 				rx_core_id = lcore_id;
826 				printf("Rx on priority core %d\n",
827 					lcore_id);
828 				continue;
829 			}
830 			if (tx_core_id < 0) {
831 				tx_core_id = lcore_id;
832 				printf("Tx on priority core %d\n",
833 					lcore_id);
834 				continue;
835 			}
836 		}
837 	}
838 
839 	/*
840 	 * If there's any of the key workloads left without an lcore_id
841 	 * after the high performing core assignment above, pre-assign
842 	 * them here.
843 	 */
844 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
845 		if (lcore_id == (unsigned int)distr_core_id ||
846 				lcore_id == (unsigned int)rx_core_id ||
847 				lcore_id == (unsigned int)tx_core_id)
848 			continue;
849 		if (distr_core_id < 0) {
850 			distr_core_id = lcore_id;
851 			printf("Distributor on core %d\n", lcore_id);
852 			continue;
853 		}
854 		if (rx_core_id < 0) {
855 			rx_core_id = lcore_id;
856 			printf("Rx on core %d\n", lcore_id);
857 			continue;
858 		}
859 		if (tx_core_id < 0) {
860 			tx_core_id = lcore_id;
861 			printf("Tx on core %d\n", lcore_id);
862 			continue;
863 		}
864 	}
865 
866 	printf(" tx id %d, dist id %d, rx id %d\n",
867 			tx_core_id,
868 			distr_core_id,
869 			rx_core_id);
870 
871 	/*
872 	 * Kick off all the worker threads first, avoiding the pre-assigned
873 	 * lcore_ids for tx, rx and distributor workloads.
874 	 */
875 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
876 		if (lcore_id == (unsigned int)distr_core_id ||
877 				lcore_id == (unsigned int)rx_core_id ||
878 				lcore_id == (unsigned int)tx_core_id)
879 			continue;
880 		printf("Starting thread %d as worker, lcore_id %d\n",
881 				worker_id, lcore_id);
882 		struct lcore_params *p =
883 			rte_malloc(NULL, sizeof(*p), 0);
884 		if (!p)
885 			rte_panic("malloc failure\n");
886 		*p = (struct lcore_params){worker_id++, d, rx_dist_ring,
887 			dist_tx_ring, mbuf_pool};
888 
889 		rte_eal_remote_launch((lcore_function_t *)lcore_worker,
890 				p, lcore_id);
891 	}
892 
893 	/* Start tx core */
894 	rte_eal_remote_launch((lcore_function_t *)lcore_tx,
895 			dist_tx_ring, tx_core_id);
896 
897 	/* Start distributor core */
898 	struct lcore_params *pd =
899 		rte_malloc(NULL, sizeof(*pd), 0);
900 	if (!pd)
901 		rte_panic("malloc failure\n");
902 	*pd = (struct lcore_params){worker_id++, d,
903 		rx_dist_ring, dist_tx_ring, mbuf_pool};
904 	rte_eal_remote_launch(
905 			(lcore_function_t *)lcore_distributor,
906 			pd, distr_core_id);
907 
908 	/* Start rx core */
909 	struct lcore_params *pr =
910 		rte_malloc(NULL, sizeof(*pr), 0);
911 	if (!pr)
912 		rte_panic("malloc failure\n");
913 	*pr = (struct lcore_params){worker_id++, d, rx_dist_ring,
914 		dist_tx_ring, mbuf_pool};
915 	rte_eal_remote_launch((lcore_function_t *)lcore_rx,
916 			pr, rx_core_id);
917 
918 	freq = rte_get_timer_hz();
919 	t = rte_rdtsc() + freq;
920 	while (!quit_signal_dist) {
921 		if (t < rte_rdtsc()) {
922 			print_stats();
923 			t = rte_rdtsc() + freq;
924 		}
925 		usleep(1000);
926 	}
927 
928 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
929 		if (rte_eal_wait_lcore(lcore_id) < 0)
930 			return -1;
931 	}
932 
933 	print_stats();
934 
935 	rte_free(pd);
936 	rte_free(pr);
937 
938 	return 0;
939 }
940