xref: /dpdk/app/test-pmd/testpmd.c (revision bb9be9a45e01e09caaf3e57b0c2c68c87a925b01)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_branch_prediction.h>
40 #include <rte_mempool.h>
41 #include <rte_malloc.h>
42 #include <rte_mbuf.h>
43 #include <rte_mbuf_pool_ops.h>
44 #include <rte_interrupts.h>
45 #include <rte_pci.h>
46 #include <rte_ether.h>
47 #include <rte_ethdev.h>
48 #include <rte_dev.h>
49 #include <rte_string_fns.h>
50 #ifdef RTE_NET_IXGBE
51 #include <rte_pmd_ixgbe.h>
52 #endif
53 #ifdef RTE_LIB_PDUMP
54 #include <rte_pdump.h>
55 #endif
56 #include <rte_flow.h>
57 #ifdef RTE_LIB_METRICS
58 #include <rte_metrics.h>
59 #endif
60 #ifdef RTE_LIB_BITRATESTATS
61 #include <rte_bitrate.h>
62 #endif
63 #ifdef RTE_LIB_LATENCYSTATS
64 #include <rte_latencystats.h>
65 #endif
66 #ifdef RTE_EXEC_ENV_WINDOWS
67 #include <process.h>
68 #endif
69 
70 #include "testpmd.h"
71 
72 #ifndef MAP_HUGETLB
73 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
74 #define HUGE_FLAG (0x40000)
75 #else
76 #define HUGE_FLAG MAP_HUGETLB
77 #endif
78 
79 #ifndef MAP_HUGE_SHIFT
80 /* older kernels (or FreeBSD) will not have this define */
81 #define HUGE_SHIFT (26)
82 #else
83 #define HUGE_SHIFT MAP_HUGE_SHIFT
84 #endif
85 
86 #define EXTMEM_HEAP_NAME "extmem"
87 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
88 
89 uint16_t verbose_level = 0; /**< Silent by default. */
90 int testpmd_logtype; /**< Log type for testpmd logs */
91 
92 /* use main core for command line ? */
93 uint8_t interactive = 0;
94 uint8_t auto_start = 0;
95 uint8_t tx_first;
96 char cmdline_filename[PATH_MAX] = {0};
97 
98 /*
99  * NUMA support configuration.
100  * When set, the NUMA support attempts to dispatch the allocation of the
101  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
102  * probed ports among the CPU sockets 0 and 1.
103  * Otherwise, all memory is allocated from CPU socket 0.
104  */
105 uint8_t numa_support = 1; /**< numa enabled by default */
106 
107 /*
108  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
109  * not configured.
110  */
111 uint8_t socket_num = UMA_NO_CONFIG;
112 
113 /*
114  * Select mempool allocation type:
115  * - native: use regular DPDK memory
116  * - anon: use regular DPDK memory to create mempool, but populate using
117  *         anonymous memory (may not be IOVA-contiguous)
118  * - xmem: use externally allocated hugepage memory
119  */
120 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
121 
122 /*
123  * Store specified sockets on which memory pool to be used by ports
124  * is allocated.
125  */
126 uint8_t port_numa[RTE_MAX_ETHPORTS];
127 
128 /*
129  * Store specified sockets on which RX ring to be used by ports
130  * is allocated.
131  */
132 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
133 
134 /*
135  * Store specified sockets on which TX ring to be used by ports
136  * is allocated.
137  */
138 uint8_t txring_numa[RTE_MAX_ETHPORTS];
139 
140 /*
141  * Record the Ethernet address of peer target ports to which packets are
142  * forwarded.
143  * Must be instantiated with the ethernet addresses of peer traffic generator
144  * ports.
145  */
146 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
147 portid_t nb_peer_eth_addrs = 0;
148 
149 /*
150  * Probed Target Environment.
151  */
152 struct rte_port *ports;	       /**< For all probed ethernet ports. */
153 portid_t nb_ports;             /**< Number of probed ethernet ports. */
154 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
155 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
156 
157 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
158 
159 /*
160  * Test Forwarding Configuration.
161  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
162  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
163  */
164 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
165 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
166 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
167 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
168 
169 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
170 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
171 
172 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
173 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
174 
175 /*
176  * Forwarding engines.
177  */
178 struct fwd_engine * fwd_engines[] = {
179 	&io_fwd_engine,
180 	&mac_fwd_engine,
181 	&mac_swap_engine,
182 	&flow_gen_engine,
183 	&rx_only_engine,
184 	&tx_only_engine,
185 	&csum_fwd_engine,
186 	&icmp_echo_engine,
187 	&noisy_vnf_engine,
188 	&five_tuple_swap_fwd_engine,
189 #ifdef RTE_LIBRTE_IEEE1588
190 	&ieee1588_fwd_engine,
191 #endif
192 	&shared_rxq_engine,
193 	NULL,
194 };
195 
196 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
197 uint16_t mempool_flags;
198 
199 struct fwd_config cur_fwd_config;
200 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
201 uint32_t retry_enabled;
202 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
203 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
204 
205 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
206 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
207 	DEFAULT_MBUF_DATA_SIZE
208 }; /**< Mbuf data space size. */
209 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
210                                       * specified on command-line. */
211 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
212 
213 /** Extended statistics to show. */
214 struct rte_eth_xstat_name *xstats_display;
215 
216 unsigned int xstats_display_num; /**< Size of extended statistics to show */
217 
218 /*
219  * In container, it cannot terminate the process which running with 'stats-period'
220  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
221  */
222 uint8_t f_quit;
223 
224 /*
225  * Max Rx frame size, set by '--max-pkt-len' parameter.
226  */
227 uint32_t max_rx_pkt_len;
228 
229 /*
230  * Configuration of packet segments used to scatter received packets
231  * if some of split features is configured.
232  */
233 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
234 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
235 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
236 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
237 
238 /*
239  * Configuration of packet segments used by the "txonly" processing engine.
240  */
241 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
242 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
243 	TXONLY_DEF_PACKET_LEN,
244 };
245 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
246 
247 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
248 /**< Split policy for packets to TX. */
249 
250 uint8_t txonly_multi_flow;
251 /**< Whether multiple flows are generated in TXONLY mode. */
252 
253 uint32_t tx_pkt_times_inter;
254 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
255 
256 uint32_t tx_pkt_times_intra;
257 /**< Timings for send scheduling in TXONLY mode, time between packets. */
258 
259 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
260 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
261 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
262 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
263 
264 /* current configuration is in DCB or not,0 means it is not in DCB mode */
265 uint8_t dcb_config = 0;
266 
267 /*
268  * Configurable number of RX/TX queues.
269  */
270 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
271 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
272 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
273 
274 /*
275  * Configurable number of RX/TX ring descriptors.
276  * Defaults are supplied by drivers via ethdev.
277  */
278 #define RTE_TEST_RX_DESC_DEFAULT 0
279 #define RTE_TEST_TX_DESC_DEFAULT 0
280 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
281 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
282 
283 #define RTE_PMD_PARAM_UNSET -1
284 /*
285  * Configurable values of RX and TX ring threshold registers.
286  */
287 
288 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
289 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
290 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
291 
292 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
293 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
294 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
295 
296 /*
297  * Configurable value of RX free threshold.
298  */
299 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
300 
301 /*
302  * Configurable value of RX drop enable.
303  */
304 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
305 
306 /*
307  * Configurable value of TX free threshold.
308  */
309 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
310 
311 /*
312  * Configurable value of TX RS bit threshold.
313  */
314 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
315 
316 /*
317  * Configurable value of buffered packets before sending.
318  */
319 uint16_t noisy_tx_sw_bufsz;
320 
321 /*
322  * Configurable value of packet buffer timeout.
323  */
324 uint16_t noisy_tx_sw_buf_flush_time;
325 
326 /*
327  * Configurable value for size of VNF internal memory area
328  * used for simulating noisy neighbour behaviour
329  */
330 uint64_t noisy_lkup_mem_sz;
331 
332 /*
333  * Configurable value of number of random writes done in
334  * VNF simulation memory area.
335  */
336 uint64_t noisy_lkup_num_writes;
337 
338 /*
339  * Configurable value of number of random reads done in
340  * VNF simulation memory area.
341  */
342 uint64_t noisy_lkup_num_reads;
343 
344 /*
345  * Configurable value of number of random reads/writes done in
346  * VNF simulation memory area.
347  */
348 uint64_t noisy_lkup_num_reads_writes;
349 
350 /*
351  * Receive Side Scaling (RSS) configuration.
352  */
353 uint64_t rss_hf = RTE_ETH_RSS_IP; /* RSS IP by default. */
354 
355 /*
356  * Port topology configuration
357  */
358 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
359 
360 /*
361  * Avoids to flush all the RX streams before starts forwarding.
362  */
363 uint8_t no_flush_rx = 0; /* flush by default */
364 
365 /*
366  * Flow API isolated mode.
367  */
368 uint8_t flow_isolate_all;
369 
370 /*
371  * Avoids to check link status when starting/stopping a port.
372  */
373 uint8_t no_link_check = 0; /* check by default */
374 
375 /*
376  * Don't automatically start all ports in interactive mode.
377  */
378 uint8_t no_device_start = 0;
379 
380 /*
381  * Enable link status change notification
382  */
383 uint8_t lsc_interrupt = 1; /* enabled by default */
384 
385 /*
386  * Enable device removal notification.
387  */
388 uint8_t rmv_interrupt = 1; /* enabled by default */
389 
390 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
391 
392 /* After attach, port setup is called on event or by iterator */
393 bool setup_on_probe_event = true;
394 
395 /* Clear ptypes on port initialization. */
396 uint8_t clear_ptypes = true;
397 
398 /* Hairpin ports configuration mode. */
399 uint16_t hairpin_mode;
400 
401 /* Pretty printing of ethdev events */
402 static const char * const eth_event_desc[] = {
403 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
404 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
405 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
406 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
407 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
408 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
409 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
410 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
411 	[RTE_ETH_EVENT_NEW] = "device probed",
412 	[RTE_ETH_EVENT_DESTROY] = "device released",
413 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
414 	[RTE_ETH_EVENT_MAX] = NULL,
415 };
416 
417 /*
418  * Display or mask ether events
419  * Default to all events except VF_MBOX
420  */
421 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
422 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
423 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
424 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
425 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
426 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
427 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
428 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
429 /*
430  * Decide if all memory are locked for performance.
431  */
432 int do_mlockall = 0;
433 
434 /*
435  * NIC bypass mode configuration options.
436  */
437 
438 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
439 /* The NIC bypass watchdog timeout. */
440 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
441 #endif
442 
443 
444 #ifdef RTE_LIB_LATENCYSTATS
445 
446 /*
447  * Set when latency stats is enabled in the commandline
448  */
449 uint8_t latencystats_enabled;
450 
451 /*
452  * Lcore ID to serive latency statistics.
453  */
454 lcoreid_t latencystats_lcore_id = -1;
455 
456 #endif
457 
458 /*
459  * Ethernet device configuration.
460  */
461 struct rte_eth_rxmode rx_mode;
462 
463 struct rte_eth_txmode tx_mode = {
464 	.offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
465 };
466 
467 struct rte_eth_fdir_conf fdir_conf = {
468 	.mode = RTE_FDIR_MODE_NONE,
469 	.pballoc = RTE_ETH_FDIR_PBALLOC_64K,
470 	.status = RTE_FDIR_REPORT_STATUS,
471 	.mask = {
472 		.vlan_tci_mask = 0xFFEF,
473 		.ipv4_mask     = {
474 			.src_ip = 0xFFFFFFFF,
475 			.dst_ip = 0xFFFFFFFF,
476 		},
477 		.ipv6_mask     = {
478 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
479 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
480 		},
481 		.src_port_mask = 0xFFFF,
482 		.dst_port_mask = 0xFFFF,
483 		.mac_addr_byte_mask = 0xFF,
484 		.tunnel_type_mask = 1,
485 		.tunnel_id_mask = 0xFFFFFFFF,
486 	},
487 	.drop_queue = 127,
488 };
489 
490 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
491 
492 /*
493  * Display zero values by default for xstats
494  */
495 uint8_t xstats_hide_zero;
496 
497 /*
498  * Measure of CPU cycles disabled by default
499  */
500 uint8_t record_core_cycles;
501 
502 /*
503  * Display of RX and TX bursts disabled by default
504  */
505 uint8_t record_burst_stats;
506 
507 /*
508  * Number of ports per shared Rx queue group, 0 disable.
509  */
510 uint32_t rxq_share;
511 
512 unsigned int num_sockets = 0;
513 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
514 
515 #ifdef RTE_LIB_BITRATESTATS
516 /* Bitrate statistics */
517 struct rte_stats_bitrates *bitrate_data;
518 lcoreid_t bitrate_lcore_id;
519 uint8_t bitrate_enabled;
520 #endif
521 
522 #ifdef RTE_LIB_GRO
523 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
524 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
525 #endif
526 
527 /*
528  * hexadecimal bitmask of RX mq mode can be enabled.
529  */
530 enum rte_eth_rx_mq_mode rx_mq_mode = RTE_ETH_MQ_RX_VMDQ_DCB_RSS;
531 
532 /*
533  * Used to set forced link speed
534  */
535 uint32_t eth_link_speed;
536 
537 /*
538  * ID of the current process in multi-process, used to
539  * configure the queues to be polled.
540  */
541 int proc_id;
542 
543 /*
544  * Number of processes in multi-process, used to
545  * configure the queues to be polled.
546  */
547 unsigned int num_procs = 1;
548 
549 static void
550 eth_rx_metadata_negotiate_mp(uint16_t port_id)
551 {
552 	uint64_t rx_meta_features = 0;
553 	int ret;
554 
555 	if (!is_proc_primary())
556 		return;
557 
558 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
559 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
560 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
561 
562 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
563 	if (ret == 0) {
564 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
565 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
566 				    port_id);
567 		}
568 
569 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
570 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
571 				    port_id);
572 		}
573 
574 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
575 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
576 				    port_id);
577 		}
578 	} else if (ret != -ENOTSUP) {
579 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
580 			 port_id, rte_strerror(-ret));
581 	}
582 }
583 
584 static void
585 flow_pick_transfer_proxy_mp(uint16_t port_id)
586 {
587 	struct rte_port *port = &ports[port_id];
588 	int ret;
589 
590 	port->flow_transfer_proxy = port_id;
591 
592 	if (!is_proc_primary())
593 		return;
594 
595 	ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
596 					   NULL);
597 	if (ret != 0) {
598 		fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
599 			port_id, rte_strerror(-ret));
600 	}
601 }
602 
603 static int
604 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
605 		      const struct rte_eth_conf *dev_conf)
606 {
607 	if (is_proc_primary())
608 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
609 					dev_conf);
610 	return 0;
611 }
612 
613 static int
614 eth_dev_start_mp(uint16_t port_id)
615 {
616 	if (is_proc_primary())
617 		return rte_eth_dev_start(port_id);
618 
619 	return 0;
620 }
621 
622 static int
623 eth_dev_stop_mp(uint16_t port_id)
624 {
625 	if (is_proc_primary())
626 		return rte_eth_dev_stop(port_id);
627 
628 	return 0;
629 }
630 
631 static void
632 mempool_free_mp(struct rte_mempool *mp)
633 {
634 	if (is_proc_primary())
635 		rte_mempool_free(mp);
636 }
637 
638 static int
639 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
640 {
641 	if (is_proc_primary())
642 		return rte_eth_dev_set_mtu(port_id, mtu);
643 
644 	return 0;
645 }
646 
647 /* Forward function declarations */
648 static void setup_attached_port(portid_t pi);
649 static void check_all_ports_link_status(uint32_t port_mask);
650 static int eth_event_callback(portid_t port_id,
651 			      enum rte_eth_event_type type,
652 			      void *param, void *ret_param);
653 static void dev_event_callback(const char *device_name,
654 				enum rte_dev_event_type type,
655 				void *param);
656 static void fill_xstats_display_info(void);
657 
658 /*
659  * Check if all the ports are started.
660  * If yes, return positive value. If not, return zero.
661  */
662 static int all_ports_started(void);
663 
664 #ifdef RTE_LIB_GSO
665 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
666 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
667 #endif
668 
669 /* Holds the registered mbuf dynamic flags names. */
670 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
671 
672 
673 /*
674  * Helper function to check if socket is already discovered.
675  * If yes, return positive value. If not, return zero.
676  */
677 int
678 new_socket_id(unsigned int socket_id)
679 {
680 	unsigned int i;
681 
682 	for (i = 0; i < num_sockets; i++) {
683 		if (socket_ids[i] == socket_id)
684 			return 0;
685 	}
686 	return 1;
687 }
688 
689 /*
690  * Setup default configuration.
691  */
692 static void
693 set_default_fwd_lcores_config(void)
694 {
695 	unsigned int i;
696 	unsigned int nb_lc;
697 	unsigned int sock_num;
698 
699 	nb_lc = 0;
700 	for (i = 0; i < RTE_MAX_LCORE; i++) {
701 		if (!rte_lcore_is_enabled(i))
702 			continue;
703 		sock_num = rte_lcore_to_socket_id(i);
704 		if (new_socket_id(sock_num)) {
705 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
706 				rte_exit(EXIT_FAILURE,
707 					 "Total sockets greater than %u\n",
708 					 RTE_MAX_NUMA_NODES);
709 			}
710 			socket_ids[num_sockets++] = sock_num;
711 		}
712 		if (i == rte_get_main_lcore())
713 			continue;
714 		fwd_lcores_cpuids[nb_lc++] = i;
715 	}
716 	nb_lcores = (lcoreid_t) nb_lc;
717 	nb_cfg_lcores = nb_lcores;
718 	nb_fwd_lcores = 1;
719 }
720 
721 static void
722 set_def_peer_eth_addrs(void)
723 {
724 	portid_t i;
725 
726 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
727 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
728 		peer_eth_addrs[i].addr_bytes[5] = i;
729 	}
730 }
731 
732 static void
733 set_default_fwd_ports_config(void)
734 {
735 	portid_t pt_id;
736 	int i = 0;
737 
738 	RTE_ETH_FOREACH_DEV(pt_id) {
739 		fwd_ports_ids[i++] = pt_id;
740 
741 		/* Update sockets info according to the attached device */
742 		int socket_id = rte_eth_dev_socket_id(pt_id);
743 		if (socket_id >= 0 && new_socket_id(socket_id)) {
744 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
745 				rte_exit(EXIT_FAILURE,
746 					 "Total sockets greater than %u\n",
747 					 RTE_MAX_NUMA_NODES);
748 			}
749 			socket_ids[num_sockets++] = socket_id;
750 		}
751 	}
752 
753 	nb_cfg_ports = nb_ports;
754 	nb_fwd_ports = nb_ports;
755 }
756 
757 void
758 set_def_fwd_config(void)
759 {
760 	set_default_fwd_lcores_config();
761 	set_def_peer_eth_addrs();
762 	set_default_fwd_ports_config();
763 }
764 
765 #ifndef RTE_EXEC_ENV_WINDOWS
766 /* extremely pessimistic estimation of memory required to create a mempool */
767 static int
768 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
769 {
770 	unsigned int n_pages, mbuf_per_pg, leftover;
771 	uint64_t total_mem, mbuf_mem, obj_sz;
772 
773 	/* there is no good way to predict how much space the mempool will
774 	 * occupy because it will allocate chunks on the fly, and some of those
775 	 * will come from default DPDK memory while some will come from our
776 	 * external memory, so just assume 128MB will be enough for everyone.
777 	 */
778 	uint64_t hdr_mem = 128 << 20;
779 
780 	/* account for possible non-contiguousness */
781 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
782 	if (obj_sz > pgsz) {
783 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
784 		return -1;
785 	}
786 
787 	mbuf_per_pg = pgsz / obj_sz;
788 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
789 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
790 
791 	mbuf_mem = n_pages * pgsz;
792 
793 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
794 
795 	if (total_mem > SIZE_MAX) {
796 		TESTPMD_LOG(ERR, "Memory size too big\n");
797 		return -1;
798 	}
799 	*out = (size_t)total_mem;
800 
801 	return 0;
802 }
803 
804 static int
805 pagesz_flags(uint64_t page_sz)
806 {
807 	/* as per mmap() manpage, all page sizes are log2 of page size
808 	 * shifted by MAP_HUGE_SHIFT
809 	 */
810 	int log2 = rte_log2_u64(page_sz);
811 
812 	return (log2 << HUGE_SHIFT);
813 }
814 
815 static void *
816 alloc_mem(size_t memsz, size_t pgsz, bool huge)
817 {
818 	void *addr;
819 	int flags;
820 
821 	/* allocate anonymous hugepages */
822 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
823 	if (huge)
824 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
825 
826 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
827 	if (addr == MAP_FAILED)
828 		return NULL;
829 
830 	return addr;
831 }
832 
833 struct extmem_param {
834 	void *addr;
835 	size_t len;
836 	size_t pgsz;
837 	rte_iova_t *iova_table;
838 	unsigned int iova_table_len;
839 };
840 
841 static int
842 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
843 		bool huge)
844 {
845 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
846 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
847 	unsigned int cur_page, n_pages, pgsz_idx;
848 	size_t mem_sz, cur_pgsz;
849 	rte_iova_t *iovas = NULL;
850 	void *addr;
851 	int ret;
852 
853 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
854 		/* skip anything that is too big */
855 		if (pgsizes[pgsz_idx] > SIZE_MAX)
856 			continue;
857 
858 		cur_pgsz = pgsizes[pgsz_idx];
859 
860 		/* if we were told not to allocate hugepages, override */
861 		if (!huge)
862 			cur_pgsz = sysconf(_SC_PAGESIZE);
863 
864 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
865 		if (ret < 0) {
866 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
867 			return -1;
868 		}
869 
870 		/* allocate our memory */
871 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
872 
873 		/* if we couldn't allocate memory with a specified page size,
874 		 * that doesn't mean we can't do it with other page sizes, so
875 		 * try another one.
876 		 */
877 		if (addr == NULL)
878 			continue;
879 
880 		/* store IOVA addresses for every page in this memory area */
881 		n_pages = mem_sz / cur_pgsz;
882 
883 		iovas = malloc(sizeof(*iovas) * n_pages);
884 
885 		if (iovas == NULL) {
886 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
887 			goto fail;
888 		}
889 		/* lock memory if it's not huge pages */
890 		if (!huge)
891 			mlock(addr, mem_sz);
892 
893 		/* populate IOVA addresses */
894 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
895 			rte_iova_t iova;
896 			size_t offset;
897 			void *cur;
898 
899 			offset = cur_pgsz * cur_page;
900 			cur = RTE_PTR_ADD(addr, offset);
901 
902 			/* touch the page before getting its IOVA */
903 			*(volatile char *)cur = 0;
904 
905 			iova = rte_mem_virt2iova(cur);
906 
907 			iovas[cur_page] = iova;
908 		}
909 
910 		break;
911 	}
912 	/* if we couldn't allocate anything */
913 	if (iovas == NULL)
914 		return -1;
915 
916 	param->addr = addr;
917 	param->len = mem_sz;
918 	param->pgsz = cur_pgsz;
919 	param->iova_table = iovas;
920 	param->iova_table_len = n_pages;
921 
922 	return 0;
923 fail:
924 	if (iovas)
925 		free(iovas);
926 	if (addr)
927 		munmap(addr, mem_sz);
928 
929 	return -1;
930 }
931 
932 static int
933 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
934 {
935 	struct extmem_param param;
936 	int socket_id, ret;
937 
938 	memset(&param, 0, sizeof(param));
939 
940 	/* check if our heap exists */
941 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
942 	if (socket_id < 0) {
943 		/* create our heap */
944 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
945 		if (ret < 0) {
946 			TESTPMD_LOG(ERR, "Cannot create heap\n");
947 			return -1;
948 		}
949 	}
950 
951 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
952 	if (ret < 0) {
953 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
954 		return -1;
955 	}
956 
957 	/* we now have a valid memory area, so add it to heap */
958 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
959 			param.addr, param.len, param.iova_table,
960 			param.iova_table_len, param.pgsz);
961 
962 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
963 
964 	/* not needed any more */
965 	free(param.iova_table);
966 
967 	if (ret < 0) {
968 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
969 		munmap(param.addr, param.len);
970 		return -1;
971 	}
972 
973 	/* success */
974 
975 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
976 			param.len >> 20);
977 
978 	return 0;
979 }
980 static void
981 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
982 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
983 {
984 	uint16_t pid = 0;
985 	int ret;
986 
987 	RTE_ETH_FOREACH_DEV(pid) {
988 		struct rte_eth_dev_info dev_info;
989 
990 		ret = eth_dev_info_get_print_err(pid, &dev_info);
991 		if (ret != 0) {
992 			TESTPMD_LOG(DEBUG,
993 				    "unable to get device info for port %d on addr 0x%p,"
994 				    "mempool unmapping will not be performed\n",
995 				    pid, memhdr->addr);
996 			continue;
997 		}
998 
999 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
1000 		if (ret) {
1001 			TESTPMD_LOG(DEBUG,
1002 				    "unable to DMA unmap addr 0x%p "
1003 				    "for device %s\n",
1004 				    memhdr->addr, dev_info.device->name);
1005 		}
1006 	}
1007 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
1008 	if (ret) {
1009 		TESTPMD_LOG(DEBUG,
1010 			    "unable to un-register addr 0x%p\n", memhdr->addr);
1011 	}
1012 }
1013 
1014 static void
1015 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1016 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1017 {
1018 	uint16_t pid = 0;
1019 	size_t page_size = sysconf(_SC_PAGESIZE);
1020 	int ret;
1021 
1022 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1023 				  page_size);
1024 	if (ret) {
1025 		TESTPMD_LOG(DEBUG,
1026 			    "unable to register addr 0x%p\n", memhdr->addr);
1027 		return;
1028 	}
1029 	RTE_ETH_FOREACH_DEV(pid) {
1030 		struct rte_eth_dev_info dev_info;
1031 
1032 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1033 		if (ret != 0) {
1034 			TESTPMD_LOG(DEBUG,
1035 				    "unable to get device info for port %d on addr 0x%p,"
1036 				    "mempool mapping will not be performed\n",
1037 				    pid, memhdr->addr);
1038 			continue;
1039 		}
1040 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1041 		if (ret) {
1042 			TESTPMD_LOG(DEBUG,
1043 				    "unable to DMA map addr 0x%p "
1044 				    "for device %s\n",
1045 				    memhdr->addr, dev_info.device->name);
1046 		}
1047 	}
1048 }
1049 #endif
1050 
1051 static unsigned int
1052 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1053 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1054 {
1055 	struct rte_pktmbuf_extmem *xmem;
1056 	unsigned int ext_num, zone_num, elt_num;
1057 	uint16_t elt_size;
1058 
1059 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1060 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1061 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1062 
1063 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1064 	if (xmem == NULL) {
1065 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1066 				 "external buffer descriptors\n");
1067 		*ext_mem = NULL;
1068 		return 0;
1069 	}
1070 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1071 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1072 		const struct rte_memzone *mz;
1073 		char mz_name[RTE_MEMZONE_NAMESIZE];
1074 		int ret;
1075 
1076 		ret = snprintf(mz_name, sizeof(mz_name),
1077 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1078 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1079 			errno = ENAMETOOLONG;
1080 			ext_num = 0;
1081 			break;
1082 		}
1083 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1084 						 socket_id,
1085 						 RTE_MEMZONE_IOVA_CONTIG |
1086 						 RTE_MEMZONE_1GB |
1087 						 RTE_MEMZONE_SIZE_HINT_ONLY,
1088 						 EXTBUF_ZONE_SIZE);
1089 		if (mz == NULL) {
1090 			/*
1091 			 * The caller exits on external buffer creation
1092 			 * error, so there is no need to free memzones.
1093 			 */
1094 			errno = ENOMEM;
1095 			ext_num = 0;
1096 			break;
1097 		}
1098 		xseg->buf_ptr = mz->addr;
1099 		xseg->buf_iova = mz->iova;
1100 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1101 		xseg->elt_size = elt_size;
1102 	}
1103 	if (ext_num == 0 && xmem != NULL) {
1104 		free(xmem);
1105 		xmem = NULL;
1106 	}
1107 	*ext_mem = xmem;
1108 	return ext_num;
1109 }
1110 
1111 /*
1112  * Configuration initialisation done once at init time.
1113  */
1114 static struct rte_mempool *
1115 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1116 		 unsigned int socket_id, uint16_t size_idx)
1117 {
1118 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1119 	struct rte_mempool *rte_mp = NULL;
1120 #ifndef RTE_EXEC_ENV_WINDOWS
1121 	uint32_t mb_size;
1122 
1123 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1124 #endif
1125 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1126 	if (!is_proc_primary()) {
1127 		rte_mp = rte_mempool_lookup(pool_name);
1128 		if (rte_mp == NULL)
1129 			rte_exit(EXIT_FAILURE,
1130 				"Get mbuf pool for socket %u failed: %s\n",
1131 				socket_id, rte_strerror(rte_errno));
1132 		return rte_mp;
1133 	}
1134 
1135 	TESTPMD_LOG(INFO,
1136 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1137 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1138 
1139 	switch (mp_alloc_type) {
1140 	case MP_ALLOC_NATIVE:
1141 		{
1142 			/* wrapper to rte_mempool_create() */
1143 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1144 					rte_mbuf_best_mempool_ops());
1145 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1146 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1147 			break;
1148 		}
1149 #ifndef RTE_EXEC_ENV_WINDOWS
1150 	case MP_ALLOC_ANON:
1151 		{
1152 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1153 				mb_size, (unsigned int) mb_mempool_cache,
1154 				sizeof(struct rte_pktmbuf_pool_private),
1155 				socket_id, mempool_flags);
1156 			if (rte_mp == NULL)
1157 				goto err;
1158 
1159 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1160 				rte_mempool_free(rte_mp);
1161 				rte_mp = NULL;
1162 				goto err;
1163 			}
1164 			rte_pktmbuf_pool_init(rte_mp, NULL);
1165 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1166 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1167 			break;
1168 		}
1169 	case MP_ALLOC_XMEM:
1170 	case MP_ALLOC_XMEM_HUGE:
1171 		{
1172 			int heap_socket;
1173 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1174 
1175 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1176 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1177 
1178 			heap_socket =
1179 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1180 			if (heap_socket < 0)
1181 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1182 
1183 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1184 					rte_mbuf_best_mempool_ops());
1185 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1186 					mb_mempool_cache, 0, mbuf_seg_size,
1187 					heap_socket);
1188 			break;
1189 		}
1190 #endif
1191 	case MP_ALLOC_XBUF:
1192 		{
1193 			struct rte_pktmbuf_extmem *ext_mem;
1194 			unsigned int ext_num;
1195 
1196 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1197 					       socket_id, pool_name, &ext_mem);
1198 			if (ext_num == 0)
1199 				rte_exit(EXIT_FAILURE,
1200 					 "Can't create pinned data buffers\n");
1201 
1202 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1203 					rte_mbuf_best_mempool_ops());
1204 			rte_mp = rte_pktmbuf_pool_create_extbuf
1205 					(pool_name, nb_mbuf, mb_mempool_cache,
1206 					 0, mbuf_seg_size, socket_id,
1207 					 ext_mem, ext_num);
1208 			free(ext_mem);
1209 			break;
1210 		}
1211 	default:
1212 		{
1213 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1214 		}
1215 	}
1216 
1217 #ifndef RTE_EXEC_ENV_WINDOWS
1218 err:
1219 #endif
1220 	if (rte_mp == NULL) {
1221 		rte_exit(EXIT_FAILURE,
1222 			"Creation of mbuf pool for socket %u failed: %s\n",
1223 			socket_id, rte_strerror(rte_errno));
1224 	} else if (verbose_level > 0) {
1225 		rte_mempool_dump(stdout, rte_mp);
1226 	}
1227 	return rte_mp;
1228 }
1229 
1230 /*
1231  * Check given socket id is valid or not with NUMA mode,
1232  * if valid, return 0, else return -1
1233  */
1234 static int
1235 check_socket_id(const unsigned int socket_id)
1236 {
1237 	static int warning_once = 0;
1238 
1239 	if (new_socket_id(socket_id)) {
1240 		if (!warning_once && numa_support)
1241 			fprintf(stderr,
1242 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1243 		warning_once = 1;
1244 		return -1;
1245 	}
1246 	return 0;
1247 }
1248 
1249 /*
1250  * Get the allowed maximum number of RX queues.
1251  * *pid return the port id which has minimal value of
1252  * max_rx_queues in all ports.
1253  */
1254 queueid_t
1255 get_allowed_max_nb_rxq(portid_t *pid)
1256 {
1257 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1258 	bool max_rxq_valid = false;
1259 	portid_t pi;
1260 	struct rte_eth_dev_info dev_info;
1261 
1262 	RTE_ETH_FOREACH_DEV(pi) {
1263 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1264 			continue;
1265 
1266 		max_rxq_valid = true;
1267 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1268 			allowed_max_rxq = dev_info.max_rx_queues;
1269 			*pid = pi;
1270 		}
1271 	}
1272 	return max_rxq_valid ? allowed_max_rxq : 0;
1273 }
1274 
1275 /*
1276  * Check input rxq is valid or not.
1277  * If input rxq is not greater than any of maximum number
1278  * of RX queues of all ports, it is valid.
1279  * if valid, return 0, else return -1
1280  */
1281 int
1282 check_nb_rxq(queueid_t rxq)
1283 {
1284 	queueid_t allowed_max_rxq;
1285 	portid_t pid = 0;
1286 
1287 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1288 	if (rxq > allowed_max_rxq) {
1289 		fprintf(stderr,
1290 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1291 			rxq, allowed_max_rxq, pid);
1292 		return -1;
1293 	}
1294 	return 0;
1295 }
1296 
1297 /*
1298  * Get the allowed maximum number of TX queues.
1299  * *pid return the port id which has minimal value of
1300  * max_tx_queues in all ports.
1301  */
1302 queueid_t
1303 get_allowed_max_nb_txq(portid_t *pid)
1304 {
1305 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1306 	bool max_txq_valid = false;
1307 	portid_t pi;
1308 	struct rte_eth_dev_info dev_info;
1309 
1310 	RTE_ETH_FOREACH_DEV(pi) {
1311 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1312 			continue;
1313 
1314 		max_txq_valid = true;
1315 		if (dev_info.max_tx_queues < allowed_max_txq) {
1316 			allowed_max_txq = dev_info.max_tx_queues;
1317 			*pid = pi;
1318 		}
1319 	}
1320 	return max_txq_valid ? allowed_max_txq : 0;
1321 }
1322 
1323 /*
1324  * Check input txq is valid or not.
1325  * If input txq is not greater than any of maximum number
1326  * of TX queues of all ports, it is valid.
1327  * if valid, return 0, else return -1
1328  */
1329 int
1330 check_nb_txq(queueid_t txq)
1331 {
1332 	queueid_t allowed_max_txq;
1333 	portid_t pid = 0;
1334 
1335 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1336 	if (txq > allowed_max_txq) {
1337 		fprintf(stderr,
1338 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1339 			txq, allowed_max_txq, pid);
1340 		return -1;
1341 	}
1342 	return 0;
1343 }
1344 
1345 /*
1346  * Get the allowed maximum number of RXDs of every rx queue.
1347  * *pid return the port id which has minimal value of
1348  * max_rxd in all queues of all ports.
1349  */
1350 static uint16_t
1351 get_allowed_max_nb_rxd(portid_t *pid)
1352 {
1353 	uint16_t allowed_max_rxd = UINT16_MAX;
1354 	portid_t pi;
1355 	struct rte_eth_dev_info dev_info;
1356 
1357 	RTE_ETH_FOREACH_DEV(pi) {
1358 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1359 			continue;
1360 
1361 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1362 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1363 			*pid = pi;
1364 		}
1365 	}
1366 	return allowed_max_rxd;
1367 }
1368 
1369 /*
1370  * Get the allowed minimal number of RXDs of every rx queue.
1371  * *pid return the port id which has minimal value of
1372  * min_rxd in all queues of all ports.
1373  */
1374 static uint16_t
1375 get_allowed_min_nb_rxd(portid_t *pid)
1376 {
1377 	uint16_t allowed_min_rxd = 0;
1378 	portid_t pi;
1379 	struct rte_eth_dev_info dev_info;
1380 
1381 	RTE_ETH_FOREACH_DEV(pi) {
1382 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1383 			continue;
1384 
1385 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1386 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1387 			*pid = pi;
1388 		}
1389 	}
1390 
1391 	return allowed_min_rxd;
1392 }
1393 
1394 /*
1395  * Check input rxd is valid or not.
1396  * If input rxd is not greater than any of maximum number
1397  * of RXDs of every Rx queues and is not less than any of
1398  * minimal number of RXDs of every Rx queues, it is valid.
1399  * if valid, return 0, else return -1
1400  */
1401 int
1402 check_nb_rxd(queueid_t rxd)
1403 {
1404 	uint16_t allowed_max_rxd;
1405 	uint16_t allowed_min_rxd;
1406 	portid_t pid = 0;
1407 
1408 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1409 	if (rxd > allowed_max_rxd) {
1410 		fprintf(stderr,
1411 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1412 			rxd, allowed_max_rxd, pid);
1413 		return -1;
1414 	}
1415 
1416 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1417 	if (rxd < allowed_min_rxd) {
1418 		fprintf(stderr,
1419 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1420 			rxd, allowed_min_rxd, pid);
1421 		return -1;
1422 	}
1423 
1424 	return 0;
1425 }
1426 
1427 /*
1428  * Get the allowed maximum number of TXDs of every rx queues.
1429  * *pid return the port id which has minimal value of
1430  * max_txd in every tx queue.
1431  */
1432 static uint16_t
1433 get_allowed_max_nb_txd(portid_t *pid)
1434 {
1435 	uint16_t allowed_max_txd = UINT16_MAX;
1436 	portid_t pi;
1437 	struct rte_eth_dev_info dev_info;
1438 
1439 	RTE_ETH_FOREACH_DEV(pi) {
1440 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1441 			continue;
1442 
1443 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1444 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1445 			*pid = pi;
1446 		}
1447 	}
1448 	return allowed_max_txd;
1449 }
1450 
1451 /*
1452  * Get the allowed maximum number of TXDs of every tx queues.
1453  * *pid return the port id which has minimal value of
1454  * min_txd in every tx queue.
1455  */
1456 static uint16_t
1457 get_allowed_min_nb_txd(portid_t *pid)
1458 {
1459 	uint16_t allowed_min_txd = 0;
1460 	portid_t pi;
1461 	struct rte_eth_dev_info dev_info;
1462 
1463 	RTE_ETH_FOREACH_DEV(pi) {
1464 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1465 			continue;
1466 
1467 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1468 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1469 			*pid = pi;
1470 		}
1471 	}
1472 
1473 	return allowed_min_txd;
1474 }
1475 
1476 /*
1477  * Check input txd is valid or not.
1478  * If input txd is not greater than any of maximum number
1479  * of TXDs of every Rx queues, it is valid.
1480  * if valid, return 0, else return -1
1481  */
1482 int
1483 check_nb_txd(queueid_t txd)
1484 {
1485 	uint16_t allowed_max_txd;
1486 	uint16_t allowed_min_txd;
1487 	portid_t pid = 0;
1488 
1489 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1490 	if (txd > allowed_max_txd) {
1491 		fprintf(stderr,
1492 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1493 			txd, allowed_max_txd, pid);
1494 		return -1;
1495 	}
1496 
1497 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1498 	if (txd < allowed_min_txd) {
1499 		fprintf(stderr,
1500 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1501 			txd, allowed_min_txd, pid);
1502 		return -1;
1503 	}
1504 	return 0;
1505 }
1506 
1507 
1508 /*
1509  * Get the allowed maximum number of hairpin queues.
1510  * *pid return the port id which has minimal value of
1511  * max_hairpin_queues in all ports.
1512  */
1513 queueid_t
1514 get_allowed_max_nb_hairpinq(portid_t *pid)
1515 {
1516 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1517 	portid_t pi;
1518 	struct rte_eth_hairpin_cap cap;
1519 
1520 	RTE_ETH_FOREACH_DEV(pi) {
1521 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1522 			*pid = pi;
1523 			return 0;
1524 		}
1525 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1526 			allowed_max_hairpinq = cap.max_nb_queues;
1527 			*pid = pi;
1528 		}
1529 	}
1530 	return allowed_max_hairpinq;
1531 }
1532 
1533 /*
1534  * Check input hairpin is valid or not.
1535  * If input hairpin is not greater than any of maximum number
1536  * of hairpin queues of all ports, it is valid.
1537  * if valid, return 0, else return -1
1538  */
1539 int
1540 check_nb_hairpinq(queueid_t hairpinq)
1541 {
1542 	queueid_t allowed_max_hairpinq;
1543 	portid_t pid = 0;
1544 
1545 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1546 	if (hairpinq > allowed_max_hairpinq) {
1547 		fprintf(stderr,
1548 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1549 			hairpinq, allowed_max_hairpinq, pid);
1550 		return -1;
1551 	}
1552 	return 0;
1553 }
1554 
1555 static int
1556 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1557 {
1558 	uint32_t eth_overhead;
1559 
1560 	if (dev_info->max_mtu != UINT16_MAX &&
1561 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1562 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1563 	else
1564 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1565 
1566 	return eth_overhead;
1567 }
1568 
1569 static void
1570 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1571 {
1572 	struct rte_port *port = &ports[pid];
1573 	int ret;
1574 	int i;
1575 
1576 	eth_rx_metadata_negotiate_mp(pid);
1577 	flow_pick_transfer_proxy_mp(pid);
1578 
1579 	port->dev_conf.txmode = tx_mode;
1580 	port->dev_conf.rxmode = rx_mode;
1581 
1582 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1583 	if (ret != 0)
1584 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1585 
1586 	if (!(port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE))
1587 		port->dev_conf.txmode.offloads &=
1588 			~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1589 
1590 	/* Apply Rx offloads configuration */
1591 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1592 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1593 	/* Apply Tx offloads configuration */
1594 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1595 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1596 
1597 	if (eth_link_speed)
1598 		port->dev_conf.link_speeds = eth_link_speed;
1599 
1600 	if (max_rx_pkt_len)
1601 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1602 			get_eth_overhead(&port->dev_info);
1603 
1604 	/* set flag to initialize port/queue */
1605 	port->need_reconfig = 1;
1606 	port->need_reconfig_queues = 1;
1607 	port->socket_id = socket_id;
1608 	port->tx_metadata = 0;
1609 
1610 	/*
1611 	 * Check for maximum number of segments per MTU.
1612 	 * Accordingly update the mbuf data size.
1613 	 */
1614 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1615 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1616 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1617 		uint16_t mtu;
1618 
1619 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1620 			uint16_t data_size = (mtu + eth_overhead) /
1621 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1622 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1623 
1624 			if (buffer_size > mbuf_data_size[0]) {
1625 				mbuf_data_size[0] = buffer_size;
1626 				TESTPMD_LOG(WARNING,
1627 					"Configured mbuf size of the first segment %hu\n",
1628 					mbuf_data_size[0]);
1629 			}
1630 		}
1631 	}
1632 }
1633 
1634 static void
1635 init_config(void)
1636 {
1637 	portid_t pid;
1638 	struct rte_mempool *mbp;
1639 	unsigned int nb_mbuf_per_pool;
1640 	lcoreid_t  lc_id;
1641 #ifdef RTE_LIB_GRO
1642 	struct rte_gro_param gro_param;
1643 #endif
1644 #ifdef RTE_LIB_GSO
1645 	uint32_t gso_types;
1646 #endif
1647 
1648 	/* Configuration of logical cores. */
1649 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1650 				sizeof(struct fwd_lcore *) * nb_lcores,
1651 				RTE_CACHE_LINE_SIZE);
1652 	if (fwd_lcores == NULL) {
1653 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1654 							"failed\n", nb_lcores);
1655 	}
1656 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1657 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1658 					       sizeof(struct fwd_lcore),
1659 					       RTE_CACHE_LINE_SIZE);
1660 		if (fwd_lcores[lc_id] == NULL) {
1661 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1662 								"failed\n");
1663 		}
1664 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1665 	}
1666 
1667 	RTE_ETH_FOREACH_DEV(pid) {
1668 		uint32_t socket_id;
1669 
1670 		if (numa_support) {
1671 			socket_id = port_numa[pid];
1672 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1673 				socket_id = rte_eth_dev_socket_id(pid);
1674 
1675 				/*
1676 				 * if socket_id is invalid,
1677 				 * set to the first available socket.
1678 				 */
1679 				if (check_socket_id(socket_id) < 0)
1680 					socket_id = socket_ids[0];
1681 			}
1682 		} else {
1683 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1684 				    0 : socket_num;
1685 		}
1686 		/* Apply default TxRx configuration for all ports */
1687 		init_config_port_offloads(pid, socket_id);
1688 	}
1689 	/*
1690 	 * Create pools of mbuf.
1691 	 * If NUMA support is disabled, create a single pool of mbuf in
1692 	 * socket 0 memory by default.
1693 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1694 	 *
1695 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1696 	 * nb_txd can be configured at run time.
1697 	 */
1698 	if (param_total_num_mbufs)
1699 		nb_mbuf_per_pool = param_total_num_mbufs;
1700 	else {
1701 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1702 			(nb_lcores * mb_mempool_cache) +
1703 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1704 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1705 	}
1706 
1707 	if (numa_support) {
1708 		uint8_t i, j;
1709 
1710 		for (i = 0; i < num_sockets; i++)
1711 			for (j = 0; j < mbuf_data_size_n; j++)
1712 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1713 					mbuf_pool_create(mbuf_data_size[j],
1714 							  nb_mbuf_per_pool,
1715 							  socket_ids[i], j);
1716 	} else {
1717 		uint8_t i;
1718 
1719 		for (i = 0; i < mbuf_data_size_n; i++)
1720 			mempools[i] = mbuf_pool_create
1721 					(mbuf_data_size[i],
1722 					 nb_mbuf_per_pool,
1723 					 socket_num == UMA_NO_CONFIG ?
1724 					 0 : socket_num, i);
1725 	}
1726 
1727 	init_port_config();
1728 
1729 #ifdef RTE_LIB_GSO
1730 	gso_types = RTE_ETH_TX_OFFLOAD_TCP_TSO | RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
1731 		RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | RTE_ETH_TX_OFFLOAD_UDP_TSO;
1732 #endif
1733 	/*
1734 	 * Records which Mbuf pool to use by each logical core, if needed.
1735 	 */
1736 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1737 		mbp = mbuf_pool_find(
1738 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1739 
1740 		if (mbp == NULL)
1741 			mbp = mbuf_pool_find(0, 0);
1742 		fwd_lcores[lc_id]->mbp = mbp;
1743 #ifdef RTE_LIB_GSO
1744 		/* initialize GSO context */
1745 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1746 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1747 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1748 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1749 			RTE_ETHER_CRC_LEN;
1750 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1751 #endif
1752 	}
1753 
1754 	fwd_config_setup();
1755 
1756 #ifdef RTE_LIB_GRO
1757 	/* create a gro context for each lcore */
1758 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1759 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1760 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1761 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1762 		gro_param.socket_id = rte_lcore_to_socket_id(
1763 				fwd_lcores_cpuids[lc_id]);
1764 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1765 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1766 			rte_exit(EXIT_FAILURE,
1767 					"rte_gro_ctx_create() failed\n");
1768 		}
1769 	}
1770 #endif
1771 }
1772 
1773 
1774 void
1775 reconfig(portid_t new_port_id, unsigned socket_id)
1776 {
1777 	/* Reconfiguration of Ethernet ports. */
1778 	init_config_port_offloads(new_port_id, socket_id);
1779 	init_port_config();
1780 }
1781 
1782 
1783 int
1784 init_fwd_streams(void)
1785 {
1786 	portid_t pid;
1787 	struct rte_port *port;
1788 	streamid_t sm_id, nb_fwd_streams_new;
1789 	queueid_t q;
1790 
1791 	/* set socket id according to numa or not */
1792 	RTE_ETH_FOREACH_DEV(pid) {
1793 		port = &ports[pid];
1794 		if (nb_rxq > port->dev_info.max_rx_queues) {
1795 			fprintf(stderr,
1796 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1797 				nb_rxq, port->dev_info.max_rx_queues);
1798 			return -1;
1799 		}
1800 		if (nb_txq > port->dev_info.max_tx_queues) {
1801 			fprintf(stderr,
1802 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1803 				nb_txq, port->dev_info.max_tx_queues);
1804 			return -1;
1805 		}
1806 		if (numa_support) {
1807 			if (port_numa[pid] != NUMA_NO_CONFIG)
1808 				port->socket_id = port_numa[pid];
1809 			else {
1810 				port->socket_id = rte_eth_dev_socket_id(pid);
1811 
1812 				/*
1813 				 * if socket_id is invalid,
1814 				 * set to the first available socket.
1815 				 */
1816 				if (check_socket_id(port->socket_id) < 0)
1817 					port->socket_id = socket_ids[0];
1818 			}
1819 		}
1820 		else {
1821 			if (socket_num == UMA_NO_CONFIG)
1822 				port->socket_id = 0;
1823 			else
1824 				port->socket_id = socket_num;
1825 		}
1826 	}
1827 
1828 	q = RTE_MAX(nb_rxq, nb_txq);
1829 	if (q == 0) {
1830 		fprintf(stderr,
1831 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1832 		return -1;
1833 	}
1834 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1835 	if (nb_fwd_streams_new == nb_fwd_streams)
1836 		return 0;
1837 	/* clear the old */
1838 	if (fwd_streams != NULL) {
1839 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1840 			if (fwd_streams[sm_id] == NULL)
1841 				continue;
1842 			rte_free(fwd_streams[sm_id]);
1843 			fwd_streams[sm_id] = NULL;
1844 		}
1845 		rte_free(fwd_streams);
1846 		fwd_streams = NULL;
1847 	}
1848 
1849 	/* init new */
1850 	nb_fwd_streams = nb_fwd_streams_new;
1851 	if (nb_fwd_streams) {
1852 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1853 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1854 			RTE_CACHE_LINE_SIZE);
1855 		if (fwd_streams == NULL)
1856 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1857 				 " (struct fwd_stream *)) failed\n",
1858 				 nb_fwd_streams);
1859 
1860 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1861 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1862 				" struct fwd_stream", sizeof(struct fwd_stream),
1863 				RTE_CACHE_LINE_SIZE);
1864 			if (fwd_streams[sm_id] == NULL)
1865 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1866 					 "(struct fwd_stream) failed\n");
1867 		}
1868 	}
1869 
1870 	return 0;
1871 }
1872 
1873 static void
1874 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1875 {
1876 	uint64_t total_burst, sburst;
1877 	uint64_t nb_burst;
1878 	uint64_t burst_stats[4];
1879 	uint16_t pktnb_stats[4];
1880 	uint16_t nb_pkt;
1881 	int burst_percent[4], sburstp;
1882 	int i;
1883 
1884 	/*
1885 	 * First compute the total number of packet bursts and the
1886 	 * two highest numbers of bursts of the same number of packets.
1887 	 */
1888 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1889 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1890 
1891 	/* Show stats for 0 burst size always */
1892 	total_burst = pbs->pkt_burst_spread[0];
1893 	burst_stats[0] = pbs->pkt_burst_spread[0];
1894 	pktnb_stats[0] = 0;
1895 
1896 	/* Find the next 2 burst sizes with highest occurrences. */
1897 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST + 1; nb_pkt++) {
1898 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1899 
1900 		if (nb_burst == 0)
1901 			continue;
1902 
1903 		total_burst += nb_burst;
1904 
1905 		if (nb_burst > burst_stats[1]) {
1906 			burst_stats[2] = burst_stats[1];
1907 			pktnb_stats[2] = pktnb_stats[1];
1908 			burst_stats[1] = nb_burst;
1909 			pktnb_stats[1] = nb_pkt;
1910 		} else if (nb_burst > burst_stats[2]) {
1911 			burst_stats[2] = nb_burst;
1912 			pktnb_stats[2] = nb_pkt;
1913 		}
1914 	}
1915 	if (total_burst == 0)
1916 		return;
1917 
1918 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1919 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1920 		if (i == 3) {
1921 			printf("%d%% of other]\n", 100 - sburstp);
1922 			return;
1923 		}
1924 
1925 		sburst += burst_stats[i];
1926 		if (sburst == total_burst) {
1927 			printf("%d%% of %d pkts]\n",
1928 				100 - sburstp, (int) pktnb_stats[i]);
1929 			return;
1930 		}
1931 
1932 		burst_percent[i] =
1933 			(double)burst_stats[i] / total_burst * 100;
1934 		printf("%d%% of %d pkts + ",
1935 			burst_percent[i], (int) pktnb_stats[i]);
1936 		sburstp += burst_percent[i];
1937 	}
1938 }
1939 
1940 static void
1941 fwd_stream_stats_display(streamid_t stream_id)
1942 {
1943 	struct fwd_stream *fs;
1944 	static const char *fwd_top_stats_border = "-------";
1945 
1946 	fs = fwd_streams[stream_id];
1947 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1948 	    (fs->fwd_dropped == 0))
1949 		return;
1950 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1951 	       "TX Port=%2d/Queue=%2d %s\n",
1952 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1953 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1954 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1955 	       " TX-dropped: %-14"PRIu64,
1956 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1957 
1958 	/* if checksum mode */
1959 	if (cur_fwd_eng == &csum_fwd_engine) {
1960 		printf("  RX- bad IP checksum: %-14"PRIu64
1961 		       "  Rx- bad L4 checksum: %-14"PRIu64
1962 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1963 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1964 			fs->rx_bad_outer_l4_csum);
1965 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1966 			fs->rx_bad_outer_ip_csum);
1967 	} else {
1968 		printf("\n");
1969 	}
1970 
1971 	if (record_burst_stats) {
1972 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1973 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1974 	}
1975 }
1976 
1977 void
1978 fwd_stats_display(void)
1979 {
1980 	static const char *fwd_stats_border = "----------------------";
1981 	static const char *acc_stats_border = "+++++++++++++++";
1982 	struct {
1983 		struct fwd_stream *rx_stream;
1984 		struct fwd_stream *tx_stream;
1985 		uint64_t tx_dropped;
1986 		uint64_t rx_bad_ip_csum;
1987 		uint64_t rx_bad_l4_csum;
1988 		uint64_t rx_bad_outer_l4_csum;
1989 		uint64_t rx_bad_outer_ip_csum;
1990 	} ports_stats[RTE_MAX_ETHPORTS];
1991 	uint64_t total_rx_dropped = 0;
1992 	uint64_t total_tx_dropped = 0;
1993 	uint64_t total_rx_nombuf = 0;
1994 	struct rte_eth_stats stats;
1995 	uint64_t fwd_cycles = 0;
1996 	uint64_t total_recv = 0;
1997 	uint64_t total_xmit = 0;
1998 	struct rte_port *port;
1999 	streamid_t sm_id;
2000 	portid_t pt_id;
2001 	int i;
2002 
2003 	memset(ports_stats, 0, sizeof(ports_stats));
2004 
2005 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2006 		struct fwd_stream *fs = fwd_streams[sm_id];
2007 
2008 		if (cur_fwd_config.nb_fwd_streams >
2009 		    cur_fwd_config.nb_fwd_ports) {
2010 			fwd_stream_stats_display(sm_id);
2011 		} else {
2012 			ports_stats[fs->tx_port].tx_stream = fs;
2013 			ports_stats[fs->rx_port].rx_stream = fs;
2014 		}
2015 
2016 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2017 
2018 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2019 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2020 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2021 				fs->rx_bad_outer_l4_csum;
2022 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2023 				fs->rx_bad_outer_ip_csum;
2024 
2025 		if (record_core_cycles)
2026 			fwd_cycles += fs->core_cycles;
2027 	}
2028 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2029 		pt_id = fwd_ports_ids[i];
2030 		port = &ports[pt_id];
2031 
2032 		rte_eth_stats_get(pt_id, &stats);
2033 		stats.ipackets -= port->stats.ipackets;
2034 		stats.opackets -= port->stats.opackets;
2035 		stats.ibytes -= port->stats.ibytes;
2036 		stats.obytes -= port->stats.obytes;
2037 		stats.imissed -= port->stats.imissed;
2038 		stats.oerrors -= port->stats.oerrors;
2039 		stats.rx_nombuf -= port->stats.rx_nombuf;
2040 
2041 		total_recv += stats.ipackets;
2042 		total_xmit += stats.opackets;
2043 		total_rx_dropped += stats.imissed;
2044 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
2045 		total_tx_dropped += stats.oerrors;
2046 		total_rx_nombuf  += stats.rx_nombuf;
2047 
2048 		printf("\n  %s Forward statistics for port %-2d %s\n",
2049 		       fwd_stats_border, pt_id, fwd_stats_border);
2050 
2051 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2052 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2053 		       stats.ipackets + stats.imissed);
2054 
2055 		if (cur_fwd_eng == &csum_fwd_engine) {
2056 			printf("  Bad-ipcsum: %-14"PRIu64
2057 			       " Bad-l4csum: %-14"PRIu64
2058 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2059 			       ports_stats[pt_id].rx_bad_ip_csum,
2060 			       ports_stats[pt_id].rx_bad_l4_csum,
2061 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2062 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2063 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2064 		}
2065 		if (stats.ierrors + stats.rx_nombuf > 0) {
2066 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2067 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2068 		}
2069 
2070 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2071 		       "TX-total: %-"PRIu64"\n",
2072 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2073 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2074 
2075 		if (record_burst_stats) {
2076 			if (ports_stats[pt_id].rx_stream)
2077 				pkt_burst_stats_display("RX",
2078 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2079 			if (ports_stats[pt_id].tx_stream)
2080 				pkt_burst_stats_display("TX",
2081 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2082 		}
2083 
2084 		printf("  %s--------------------------------%s\n",
2085 		       fwd_stats_border, fwd_stats_border);
2086 	}
2087 
2088 	printf("\n  %s Accumulated forward statistics for all ports"
2089 	       "%s\n",
2090 	       acc_stats_border, acc_stats_border);
2091 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2092 	       "%-"PRIu64"\n"
2093 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2094 	       "%-"PRIu64"\n",
2095 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2096 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2097 	if (total_rx_nombuf > 0)
2098 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2099 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2100 	       "%s\n",
2101 	       acc_stats_border, acc_stats_border);
2102 	if (record_core_cycles) {
2103 #define CYC_PER_MHZ 1E6
2104 		if (total_recv > 0 || total_xmit > 0) {
2105 			uint64_t total_pkts = 0;
2106 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2107 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2108 				total_pkts = total_xmit;
2109 			else
2110 				total_pkts = total_recv;
2111 
2112 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2113 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2114 			       " MHz Clock\n",
2115 			       (double) fwd_cycles / total_pkts,
2116 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2117 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2118 		}
2119 	}
2120 }
2121 
2122 void
2123 fwd_stats_reset(void)
2124 {
2125 	streamid_t sm_id;
2126 	portid_t pt_id;
2127 	int i;
2128 
2129 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2130 		pt_id = fwd_ports_ids[i];
2131 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2132 	}
2133 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2134 		struct fwd_stream *fs = fwd_streams[sm_id];
2135 
2136 		fs->rx_packets = 0;
2137 		fs->tx_packets = 0;
2138 		fs->fwd_dropped = 0;
2139 		fs->rx_bad_ip_csum = 0;
2140 		fs->rx_bad_l4_csum = 0;
2141 		fs->rx_bad_outer_l4_csum = 0;
2142 		fs->rx_bad_outer_ip_csum = 0;
2143 
2144 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2145 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2146 		fs->core_cycles = 0;
2147 	}
2148 }
2149 
2150 static void
2151 flush_fwd_rx_queues(void)
2152 {
2153 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2154 	portid_t  rxp;
2155 	portid_t port_id;
2156 	queueid_t rxq;
2157 	uint16_t  nb_rx;
2158 	uint16_t  i;
2159 	uint8_t   j;
2160 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2161 	uint64_t timer_period;
2162 
2163 	if (num_procs > 1) {
2164 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2165 		return;
2166 	}
2167 
2168 	/* convert to number of cycles */
2169 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2170 
2171 	for (j = 0; j < 2; j++) {
2172 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2173 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2174 				port_id = fwd_ports_ids[rxp];
2175 				/**
2176 				* testpmd can stuck in the below do while loop
2177 				* if rte_eth_rx_burst() always returns nonzero
2178 				* packets. So timer is added to exit this loop
2179 				* after 1sec timer expiry.
2180 				*/
2181 				prev_tsc = rte_rdtsc();
2182 				do {
2183 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2184 						pkts_burst, MAX_PKT_BURST);
2185 					for (i = 0; i < nb_rx; i++)
2186 						rte_pktmbuf_free(pkts_burst[i]);
2187 
2188 					cur_tsc = rte_rdtsc();
2189 					diff_tsc = cur_tsc - prev_tsc;
2190 					timer_tsc += diff_tsc;
2191 				} while ((nb_rx > 0) &&
2192 					(timer_tsc < timer_period));
2193 				timer_tsc = 0;
2194 			}
2195 		}
2196 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2197 	}
2198 }
2199 
2200 static void
2201 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2202 {
2203 	struct fwd_stream **fsm;
2204 	streamid_t nb_fs;
2205 	streamid_t sm_id;
2206 #ifdef RTE_LIB_BITRATESTATS
2207 	uint64_t tics_per_1sec;
2208 	uint64_t tics_datum;
2209 	uint64_t tics_current;
2210 	uint16_t i, cnt_ports;
2211 
2212 	cnt_ports = nb_ports;
2213 	tics_datum = rte_rdtsc();
2214 	tics_per_1sec = rte_get_timer_hz();
2215 #endif
2216 	fsm = &fwd_streams[fc->stream_idx];
2217 	nb_fs = fc->stream_nb;
2218 	do {
2219 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2220 			(*pkt_fwd)(fsm[sm_id]);
2221 #ifdef RTE_LIB_BITRATESTATS
2222 		if (bitrate_enabled != 0 &&
2223 				bitrate_lcore_id == rte_lcore_id()) {
2224 			tics_current = rte_rdtsc();
2225 			if (tics_current - tics_datum >= tics_per_1sec) {
2226 				/* Periodic bitrate calculation */
2227 				for (i = 0; i < cnt_ports; i++)
2228 					rte_stats_bitrate_calc(bitrate_data,
2229 						ports_ids[i]);
2230 				tics_datum = tics_current;
2231 			}
2232 		}
2233 #endif
2234 #ifdef RTE_LIB_LATENCYSTATS
2235 		if (latencystats_enabled != 0 &&
2236 				latencystats_lcore_id == rte_lcore_id())
2237 			rte_latencystats_update();
2238 #endif
2239 
2240 	} while (! fc->stopped);
2241 }
2242 
2243 static int
2244 start_pkt_forward_on_core(void *fwd_arg)
2245 {
2246 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2247 			     cur_fwd_config.fwd_eng->packet_fwd);
2248 	return 0;
2249 }
2250 
2251 /*
2252  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2253  * Used to start communication flows in network loopback test configurations.
2254  */
2255 static int
2256 run_one_txonly_burst_on_core(void *fwd_arg)
2257 {
2258 	struct fwd_lcore *fwd_lc;
2259 	struct fwd_lcore tmp_lcore;
2260 
2261 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2262 	tmp_lcore = *fwd_lc;
2263 	tmp_lcore.stopped = 1;
2264 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2265 	return 0;
2266 }
2267 
2268 /*
2269  * Launch packet forwarding:
2270  *     - Setup per-port forwarding context.
2271  *     - launch logical cores with their forwarding configuration.
2272  */
2273 static void
2274 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2275 {
2276 	unsigned int i;
2277 	unsigned int lc_id;
2278 	int diag;
2279 
2280 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2281 		lc_id = fwd_lcores_cpuids[i];
2282 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2283 			fwd_lcores[i]->stopped = 0;
2284 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2285 						     fwd_lcores[i], lc_id);
2286 			if (diag != 0)
2287 				fprintf(stderr,
2288 					"launch lcore %u failed - diag=%d\n",
2289 					lc_id, diag);
2290 		}
2291 	}
2292 }
2293 
2294 /*
2295  * Launch packet forwarding configuration.
2296  */
2297 void
2298 start_packet_forwarding(int with_tx_first)
2299 {
2300 	port_fwd_begin_t port_fwd_begin;
2301 	port_fwd_end_t  port_fwd_end;
2302 	unsigned int i;
2303 
2304 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2305 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2306 
2307 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2308 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2309 
2310 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2311 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2312 		(!nb_rxq || !nb_txq))
2313 		rte_exit(EXIT_FAILURE,
2314 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2315 			cur_fwd_eng->fwd_mode_name);
2316 
2317 	if (all_ports_started() == 0) {
2318 		fprintf(stderr, "Not all ports were started\n");
2319 		return;
2320 	}
2321 	if (test_done == 0) {
2322 		fprintf(stderr, "Packet forwarding already started\n");
2323 		return;
2324 	}
2325 
2326 	fwd_config_setup();
2327 
2328 	pkt_fwd_config_display(&cur_fwd_config);
2329 	if (!pkt_fwd_shared_rxq_check())
2330 		return;
2331 
2332 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2333 	if (port_fwd_begin != NULL) {
2334 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2335 			if (port_fwd_begin(fwd_ports_ids[i])) {
2336 				fprintf(stderr,
2337 					"Packet forwarding is not ready\n");
2338 				return;
2339 			}
2340 		}
2341 	}
2342 
2343 	if (with_tx_first) {
2344 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2345 		if (port_fwd_begin != NULL) {
2346 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2347 				if (port_fwd_begin(fwd_ports_ids[i])) {
2348 					fprintf(stderr,
2349 						"Packet forwarding is not ready\n");
2350 					return;
2351 				}
2352 			}
2353 		}
2354 	}
2355 
2356 	test_done = 0;
2357 
2358 	if(!no_flush_rx)
2359 		flush_fwd_rx_queues();
2360 
2361 	rxtx_config_display();
2362 
2363 	fwd_stats_reset();
2364 	if (with_tx_first) {
2365 		while (with_tx_first--) {
2366 			launch_packet_forwarding(
2367 					run_one_txonly_burst_on_core);
2368 			rte_eal_mp_wait_lcore();
2369 		}
2370 		port_fwd_end = tx_only_engine.port_fwd_end;
2371 		if (port_fwd_end != NULL) {
2372 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2373 				(*port_fwd_end)(fwd_ports_ids[i]);
2374 		}
2375 	}
2376 	launch_packet_forwarding(start_pkt_forward_on_core);
2377 }
2378 
2379 void
2380 stop_packet_forwarding(void)
2381 {
2382 	port_fwd_end_t port_fwd_end;
2383 	lcoreid_t lc_id;
2384 	portid_t pt_id;
2385 	int i;
2386 
2387 	if (test_done) {
2388 		fprintf(stderr, "Packet forwarding not started\n");
2389 		return;
2390 	}
2391 	printf("Telling cores to stop...");
2392 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2393 		fwd_lcores[lc_id]->stopped = 1;
2394 	printf("\nWaiting for lcores to finish...\n");
2395 	rte_eal_mp_wait_lcore();
2396 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2397 	if (port_fwd_end != NULL) {
2398 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2399 			pt_id = fwd_ports_ids[i];
2400 			(*port_fwd_end)(pt_id);
2401 		}
2402 	}
2403 
2404 	fwd_stats_display();
2405 
2406 	printf("\nDone.\n");
2407 	test_done = 1;
2408 }
2409 
2410 void
2411 dev_set_link_up(portid_t pid)
2412 {
2413 	if (rte_eth_dev_set_link_up(pid) < 0)
2414 		fprintf(stderr, "\nSet link up fail.\n");
2415 }
2416 
2417 void
2418 dev_set_link_down(portid_t pid)
2419 {
2420 	if (rte_eth_dev_set_link_down(pid) < 0)
2421 		fprintf(stderr, "\nSet link down fail.\n");
2422 }
2423 
2424 static int
2425 all_ports_started(void)
2426 {
2427 	portid_t pi;
2428 	struct rte_port *port;
2429 
2430 	RTE_ETH_FOREACH_DEV(pi) {
2431 		port = &ports[pi];
2432 		/* Check if there is a port which is not started */
2433 		if ((port->port_status != RTE_PORT_STARTED) &&
2434 			(port->slave_flag == 0))
2435 			return 0;
2436 	}
2437 
2438 	/* No port is not started */
2439 	return 1;
2440 }
2441 
2442 int
2443 port_is_stopped(portid_t port_id)
2444 {
2445 	struct rte_port *port = &ports[port_id];
2446 
2447 	if ((port->port_status != RTE_PORT_STOPPED) &&
2448 	    (port->slave_flag == 0))
2449 		return 0;
2450 	return 1;
2451 }
2452 
2453 int
2454 all_ports_stopped(void)
2455 {
2456 	portid_t pi;
2457 
2458 	RTE_ETH_FOREACH_DEV(pi) {
2459 		if (!port_is_stopped(pi))
2460 			return 0;
2461 	}
2462 
2463 	return 1;
2464 }
2465 
2466 int
2467 port_is_started(portid_t port_id)
2468 {
2469 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2470 		return 0;
2471 
2472 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2473 		return 0;
2474 
2475 	return 1;
2476 }
2477 
2478 /* Configure the Rx and Tx hairpin queues for the selected port. */
2479 static int
2480 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2481 {
2482 	queueid_t qi;
2483 	struct rte_eth_hairpin_conf hairpin_conf = {
2484 		.peer_count = 1,
2485 	};
2486 	int i;
2487 	int diag;
2488 	struct rte_port *port = &ports[pi];
2489 	uint16_t peer_rx_port = pi;
2490 	uint16_t peer_tx_port = pi;
2491 	uint32_t manual = 1;
2492 	uint32_t tx_exp = hairpin_mode & 0x10;
2493 
2494 	if (!(hairpin_mode & 0xf)) {
2495 		peer_rx_port = pi;
2496 		peer_tx_port = pi;
2497 		manual = 0;
2498 	} else if (hairpin_mode & 0x1) {
2499 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2500 						       RTE_ETH_DEV_NO_OWNER);
2501 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2502 			peer_tx_port = rte_eth_find_next_owned_by(0,
2503 						RTE_ETH_DEV_NO_OWNER);
2504 		if (p_pi != RTE_MAX_ETHPORTS) {
2505 			peer_rx_port = p_pi;
2506 		} else {
2507 			uint16_t next_pi;
2508 
2509 			/* Last port will be the peer RX port of the first. */
2510 			RTE_ETH_FOREACH_DEV(next_pi)
2511 				peer_rx_port = next_pi;
2512 		}
2513 		manual = 1;
2514 	} else if (hairpin_mode & 0x2) {
2515 		if (cnt_pi & 0x1) {
2516 			peer_rx_port = p_pi;
2517 		} else {
2518 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2519 						RTE_ETH_DEV_NO_OWNER);
2520 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2521 				peer_rx_port = pi;
2522 		}
2523 		peer_tx_port = peer_rx_port;
2524 		manual = 1;
2525 	}
2526 
2527 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2528 		hairpin_conf.peers[0].port = peer_rx_port;
2529 		hairpin_conf.peers[0].queue = i + nb_rxq;
2530 		hairpin_conf.manual_bind = !!manual;
2531 		hairpin_conf.tx_explicit = !!tx_exp;
2532 		diag = rte_eth_tx_hairpin_queue_setup
2533 			(pi, qi, nb_txd, &hairpin_conf);
2534 		i++;
2535 		if (diag == 0)
2536 			continue;
2537 
2538 		/* Fail to setup rx queue, return */
2539 		if (port->port_status == RTE_PORT_HANDLING)
2540 			port->port_status = RTE_PORT_STOPPED;
2541 		else
2542 			fprintf(stderr,
2543 				"Port %d can not be set back to stopped\n", pi);
2544 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2545 			pi);
2546 		/* try to reconfigure queues next time */
2547 		port->need_reconfig_queues = 1;
2548 		return -1;
2549 	}
2550 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2551 		hairpin_conf.peers[0].port = peer_tx_port;
2552 		hairpin_conf.peers[0].queue = i + nb_txq;
2553 		hairpin_conf.manual_bind = !!manual;
2554 		hairpin_conf.tx_explicit = !!tx_exp;
2555 		diag = rte_eth_rx_hairpin_queue_setup
2556 			(pi, qi, nb_rxd, &hairpin_conf);
2557 		i++;
2558 		if (diag == 0)
2559 			continue;
2560 
2561 		/* Fail to setup rx queue, return */
2562 		if (port->port_status == RTE_PORT_HANDLING)
2563 			port->port_status = RTE_PORT_STOPPED;
2564 		else
2565 			fprintf(stderr,
2566 				"Port %d can not be set back to stopped\n", pi);
2567 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2568 			pi);
2569 		/* try to reconfigure queues next time */
2570 		port->need_reconfig_queues = 1;
2571 		return -1;
2572 	}
2573 	return 0;
2574 }
2575 
2576 /* Configure the Rx with optional split. */
2577 int
2578 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2579 	       uint16_t nb_rx_desc, unsigned int socket_id,
2580 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2581 {
2582 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2583 	unsigned int i, mp_n;
2584 	int ret;
2585 
2586 	if (rx_pkt_nb_segs <= 1 ||
2587 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2588 		rx_conf->rx_seg = NULL;
2589 		rx_conf->rx_nseg = 0;
2590 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2591 					     nb_rx_desc, socket_id,
2592 					     rx_conf, mp);
2593 		return ret;
2594 	}
2595 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2596 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2597 		struct rte_mempool *mpx;
2598 		/*
2599 		 * Use last valid pool for the segments with number
2600 		 * exceeding the pool index.
2601 		 */
2602 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2603 		mpx = mbuf_pool_find(socket_id, mp_n);
2604 		/* Handle zero as mbuf data buffer size. */
2605 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2606 				   rx_pkt_seg_lengths[i] :
2607 				   mbuf_data_size[mp_n];
2608 		rx_seg->offset = i < rx_pkt_nb_offs ?
2609 				   rx_pkt_seg_offsets[i] : 0;
2610 		rx_seg->mp = mpx ? mpx : mp;
2611 	}
2612 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2613 	rx_conf->rx_seg = rx_useg;
2614 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2615 				    socket_id, rx_conf, NULL);
2616 	rx_conf->rx_seg = NULL;
2617 	rx_conf->rx_nseg = 0;
2618 	return ret;
2619 }
2620 
2621 static int
2622 alloc_xstats_display_info(portid_t pi)
2623 {
2624 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2625 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2626 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2627 
2628 	if (xstats_display_num == 0)
2629 		return 0;
2630 
2631 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2632 	if (*ids_supp == NULL)
2633 		goto fail_ids_supp;
2634 
2635 	*prev_values = calloc(xstats_display_num,
2636 			      sizeof(**prev_values));
2637 	if (*prev_values == NULL)
2638 		goto fail_prev_values;
2639 
2640 	*curr_values = calloc(xstats_display_num,
2641 			      sizeof(**curr_values));
2642 	if (*curr_values == NULL)
2643 		goto fail_curr_values;
2644 
2645 	ports[pi].xstats_info.allocated = true;
2646 
2647 	return 0;
2648 
2649 fail_curr_values:
2650 	free(*prev_values);
2651 fail_prev_values:
2652 	free(*ids_supp);
2653 fail_ids_supp:
2654 	return -ENOMEM;
2655 }
2656 
2657 static void
2658 free_xstats_display_info(portid_t pi)
2659 {
2660 	if (!ports[pi].xstats_info.allocated)
2661 		return;
2662 	free(ports[pi].xstats_info.ids_supp);
2663 	free(ports[pi].xstats_info.prev_values);
2664 	free(ports[pi].xstats_info.curr_values);
2665 	ports[pi].xstats_info.allocated = false;
2666 }
2667 
2668 /** Fill helper structures for specified port to show extended statistics. */
2669 static void
2670 fill_xstats_display_info_for_port(portid_t pi)
2671 {
2672 	unsigned int stat, stat_supp;
2673 	const char *xstat_name;
2674 	struct rte_port *port;
2675 	uint64_t *ids_supp;
2676 	int rc;
2677 
2678 	if (xstats_display_num == 0)
2679 		return;
2680 
2681 	if (pi == (portid_t)RTE_PORT_ALL) {
2682 		fill_xstats_display_info();
2683 		return;
2684 	}
2685 
2686 	port = &ports[pi];
2687 	if (port->port_status != RTE_PORT_STARTED)
2688 		return;
2689 
2690 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2691 		rte_exit(EXIT_FAILURE,
2692 			 "Failed to allocate xstats display memory\n");
2693 
2694 	ids_supp = port->xstats_info.ids_supp;
2695 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2696 		xstat_name = xstats_display[stat].name;
2697 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2698 						   ids_supp + stat_supp);
2699 		if (rc != 0) {
2700 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2701 				xstat_name, pi, stat);
2702 			continue;
2703 		}
2704 		stat_supp++;
2705 	}
2706 
2707 	port->xstats_info.ids_supp_sz = stat_supp;
2708 }
2709 
2710 /** Fill helper structures for all ports to show extended statistics. */
2711 static void
2712 fill_xstats_display_info(void)
2713 {
2714 	portid_t pi;
2715 
2716 	if (xstats_display_num == 0)
2717 		return;
2718 
2719 	RTE_ETH_FOREACH_DEV(pi)
2720 		fill_xstats_display_info_for_port(pi);
2721 }
2722 
2723 int
2724 start_port(portid_t pid)
2725 {
2726 	int diag, need_check_link_status = -1;
2727 	portid_t pi;
2728 	portid_t p_pi = RTE_MAX_ETHPORTS;
2729 	portid_t pl[RTE_MAX_ETHPORTS];
2730 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2731 	uint16_t cnt_pi = 0;
2732 	uint16_t cfg_pi = 0;
2733 	int peer_pi;
2734 	queueid_t qi;
2735 	struct rte_port *port;
2736 	struct rte_eth_hairpin_cap cap;
2737 
2738 	if (port_id_is_invalid(pid, ENABLED_WARN))
2739 		return 0;
2740 
2741 	RTE_ETH_FOREACH_DEV(pi) {
2742 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2743 			continue;
2744 
2745 		need_check_link_status = 0;
2746 		port = &ports[pi];
2747 		if (port->port_status == RTE_PORT_STOPPED)
2748 			port->port_status = RTE_PORT_HANDLING;
2749 		else {
2750 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2751 			continue;
2752 		}
2753 
2754 		if (port->need_reconfig > 0) {
2755 			struct rte_eth_conf dev_conf;
2756 			int k;
2757 
2758 			port->need_reconfig = 0;
2759 
2760 			if (flow_isolate_all) {
2761 				int ret = port_flow_isolate(pi, 1);
2762 				if (ret) {
2763 					fprintf(stderr,
2764 						"Failed to apply isolated mode on port %d\n",
2765 						pi);
2766 					return -1;
2767 				}
2768 			}
2769 			configure_rxtx_dump_callbacks(0);
2770 			printf("Configuring Port %d (socket %u)\n", pi,
2771 					port->socket_id);
2772 			if (nb_hairpinq > 0 &&
2773 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2774 				fprintf(stderr,
2775 					"Port %d doesn't support hairpin queues\n",
2776 					pi);
2777 				return -1;
2778 			}
2779 
2780 			/* configure port */
2781 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2782 						     nb_txq + nb_hairpinq,
2783 						     &(port->dev_conf));
2784 			if (diag != 0) {
2785 				if (port->port_status == RTE_PORT_HANDLING)
2786 					port->port_status = RTE_PORT_STOPPED;
2787 				else
2788 					fprintf(stderr,
2789 						"Port %d can not be set back to stopped\n",
2790 						pi);
2791 				fprintf(stderr, "Fail to configure port %d\n",
2792 					pi);
2793 				/* try to reconfigure port next time */
2794 				port->need_reconfig = 1;
2795 				return -1;
2796 			}
2797 			/* get device configuration*/
2798 			if (0 !=
2799 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
2800 				fprintf(stderr,
2801 					"port %d can not get device configuration\n",
2802 					pi);
2803 				return -1;
2804 			}
2805 			/* Apply Rx offloads configuration */
2806 			if (dev_conf.rxmode.offloads !=
2807 			    port->dev_conf.rxmode.offloads) {
2808 				port->dev_conf.rxmode.offloads |=
2809 					dev_conf.rxmode.offloads;
2810 				for (k = 0;
2811 				     k < port->dev_info.max_rx_queues;
2812 				     k++)
2813 					port->rx_conf[k].offloads |=
2814 						dev_conf.rxmode.offloads;
2815 			}
2816 			/* Apply Tx offloads configuration */
2817 			if (dev_conf.txmode.offloads !=
2818 			    port->dev_conf.txmode.offloads) {
2819 				port->dev_conf.txmode.offloads |=
2820 					dev_conf.txmode.offloads;
2821 				for (k = 0;
2822 				     k < port->dev_info.max_tx_queues;
2823 				     k++)
2824 					port->tx_conf[k].offloads |=
2825 						dev_conf.txmode.offloads;
2826 			}
2827 		}
2828 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2829 			port->need_reconfig_queues = 0;
2830 			/* setup tx queues */
2831 			for (qi = 0; qi < nb_txq; qi++) {
2832 				if ((numa_support) &&
2833 					(txring_numa[pi] != NUMA_NO_CONFIG))
2834 					diag = rte_eth_tx_queue_setup(pi, qi,
2835 						port->nb_tx_desc[qi],
2836 						txring_numa[pi],
2837 						&(port->tx_conf[qi]));
2838 				else
2839 					diag = rte_eth_tx_queue_setup(pi, qi,
2840 						port->nb_tx_desc[qi],
2841 						port->socket_id,
2842 						&(port->tx_conf[qi]));
2843 
2844 				if (diag == 0)
2845 					continue;
2846 
2847 				/* Fail to setup tx queue, return */
2848 				if (port->port_status == RTE_PORT_HANDLING)
2849 					port->port_status = RTE_PORT_STOPPED;
2850 				else
2851 					fprintf(stderr,
2852 						"Port %d can not be set back to stopped\n",
2853 						pi);
2854 				fprintf(stderr,
2855 					"Fail to configure port %d tx queues\n",
2856 					pi);
2857 				/* try to reconfigure queues next time */
2858 				port->need_reconfig_queues = 1;
2859 				return -1;
2860 			}
2861 			for (qi = 0; qi < nb_rxq; qi++) {
2862 				/* setup rx queues */
2863 				if ((numa_support) &&
2864 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2865 					struct rte_mempool * mp =
2866 						mbuf_pool_find
2867 							(rxring_numa[pi], 0);
2868 					if (mp == NULL) {
2869 						fprintf(stderr,
2870 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2871 							rxring_numa[pi]);
2872 						return -1;
2873 					}
2874 
2875 					diag = rx_queue_setup(pi, qi,
2876 					     port->nb_rx_desc[qi],
2877 					     rxring_numa[pi],
2878 					     &(port->rx_conf[qi]),
2879 					     mp);
2880 				} else {
2881 					struct rte_mempool *mp =
2882 						mbuf_pool_find
2883 							(port->socket_id, 0);
2884 					if (mp == NULL) {
2885 						fprintf(stderr,
2886 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2887 							port->socket_id);
2888 						return -1;
2889 					}
2890 					diag = rx_queue_setup(pi, qi,
2891 					     port->nb_rx_desc[qi],
2892 					     port->socket_id,
2893 					     &(port->rx_conf[qi]),
2894 					     mp);
2895 				}
2896 				if (diag == 0)
2897 					continue;
2898 
2899 				/* Fail to setup rx queue, return */
2900 				if (port->port_status == RTE_PORT_HANDLING)
2901 					port->port_status = RTE_PORT_STOPPED;
2902 				else
2903 					fprintf(stderr,
2904 						"Port %d can not be set back to stopped\n",
2905 						pi);
2906 				fprintf(stderr,
2907 					"Fail to configure port %d rx queues\n",
2908 					pi);
2909 				/* try to reconfigure queues next time */
2910 				port->need_reconfig_queues = 1;
2911 				return -1;
2912 			}
2913 			/* setup hairpin queues */
2914 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2915 				return -1;
2916 		}
2917 		configure_rxtx_dump_callbacks(verbose_level);
2918 		if (clear_ptypes) {
2919 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2920 					NULL, 0);
2921 			if (diag < 0)
2922 				fprintf(stderr,
2923 					"Port %d: Failed to disable Ptype parsing\n",
2924 					pi);
2925 		}
2926 
2927 		p_pi = pi;
2928 		cnt_pi++;
2929 
2930 		/* start port */
2931 		diag = eth_dev_start_mp(pi);
2932 		if (diag < 0) {
2933 			fprintf(stderr, "Fail to start port %d: %s\n",
2934 				pi, rte_strerror(-diag));
2935 
2936 			/* Fail to setup rx queue, return */
2937 			if (port->port_status == RTE_PORT_HANDLING)
2938 				port->port_status = RTE_PORT_STOPPED;
2939 			else
2940 				fprintf(stderr,
2941 					"Port %d can not be set back to stopped\n",
2942 					pi);
2943 			continue;
2944 		}
2945 
2946 		if (port->port_status == RTE_PORT_HANDLING)
2947 			port->port_status = RTE_PORT_STARTED;
2948 		else
2949 			fprintf(stderr, "Port %d can not be set into started\n",
2950 				pi);
2951 
2952 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2953 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2954 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2955 
2956 		/* at least one port started, need checking link status */
2957 		need_check_link_status = 1;
2958 
2959 		pl[cfg_pi++] = pi;
2960 	}
2961 
2962 	if (need_check_link_status == 1 && !no_link_check)
2963 		check_all_ports_link_status(RTE_PORT_ALL);
2964 	else if (need_check_link_status == 0)
2965 		fprintf(stderr, "Please stop the ports first\n");
2966 
2967 	if (hairpin_mode & 0xf) {
2968 		uint16_t i;
2969 		int j;
2970 
2971 		/* bind all started hairpin ports */
2972 		for (i = 0; i < cfg_pi; i++) {
2973 			pi = pl[i];
2974 			/* bind current Tx to all peer Rx */
2975 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2976 							RTE_MAX_ETHPORTS, 1);
2977 			if (peer_pi < 0)
2978 				return peer_pi;
2979 			for (j = 0; j < peer_pi; j++) {
2980 				if (!port_is_started(peer_pl[j]))
2981 					continue;
2982 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2983 				if (diag < 0) {
2984 					fprintf(stderr,
2985 						"Error during binding hairpin Tx port %u to %u: %s\n",
2986 						pi, peer_pl[j],
2987 						rte_strerror(-diag));
2988 					return -1;
2989 				}
2990 			}
2991 			/* bind all peer Tx to current Rx */
2992 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2993 							RTE_MAX_ETHPORTS, 0);
2994 			if (peer_pi < 0)
2995 				return peer_pi;
2996 			for (j = 0; j < peer_pi; j++) {
2997 				if (!port_is_started(peer_pl[j]))
2998 					continue;
2999 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
3000 				if (diag < 0) {
3001 					fprintf(stderr,
3002 						"Error during binding hairpin Tx port %u to %u: %s\n",
3003 						peer_pl[j], pi,
3004 						rte_strerror(-diag));
3005 					return -1;
3006 				}
3007 			}
3008 		}
3009 	}
3010 
3011 	fill_xstats_display_info_for_port(pid);
3012 
3013 	printf("Done\n");
3014 	return 0;
3015 }
3016 
3017 void
3018 stop_port(portid_t pid)
3019 {
3020 	portid_t pi;
3021 	struct rte_port *port;
3022 	int need_check_link_status = 0;
3023 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3024 	int peer_pi;
3025 
3026 	if (port_id_is_invalid(pid, ENABLED_WARN))
3027 		return;
3028 
3029 	printf("Stopping ports...\n");
3030 
3031 	RTE_ETH_FOREACH_DEV(pi) {
3032 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3033 			continue;
3034 
3035 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3036 			fprintf(stderr,
3037 				"Please remove port %d from forwarding configuration.\n",
3038 				pi);
3039 			continue;
3040 		}
3041 
3042 		if (port_is_bonding_slave(pi)) {
3043 			fprintf(stderr,
3044 				"Please remove port %d from bonded device.\n",
3045 				pi);
3046 			continue;
3047 		}
3048 
3049 		port = &ports[pi];
3050 		if (port->port_status == RTE_PORT_STARTED)
3051 			port->port_status = RTE_PORT_HANDLING;
3052 		else
3053 			continue;
3054 
3055 		if (hairpin_mode & 0xf) {
3056 			int j;
3057 
3058 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3059 			/* unbind all peer Tx from current Rx */
3060 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3061 							RTE_MAX_ETHPORTS, 0);
3062 			if (peer_pi < 0)
3063 				continue;
3064 			for (j = 0; j < peer_pi; j++) {
3065 				if (!port_is_started(peer_pl[j]))
3066 					continue;
3067 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3068 			}
3069 		}
3070 
3071 		if (port->flow_list)
3072 			port_flow_flush(pi);
3073 
3074 		if (eth_dev_stop_mp(pi) != 0)
3075 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3076 				pi);
3077 
3078 		if (port->port_status == RTE_PORT_HANDLING)
3079 			port->port_status = RTE_PORT_STOPPED;
3080 		else
3081 			fprintf(stderr, "Port %d can not be set into stopped\n",
3082 				pi);
3083 		need_check_link_status = 1;
3084 	}
3085 	if (need_check_link_status && !no_link_check)
3086 		check_all_ports_link_status(RTE_PORT_ALL);
3087 
3088 	printf("Done\n");
3089 }
3090 
3091 static void
3092 remove_invalid_ports_in(portid_t *array, portid_t *total)
3093 {
3094 	portid_t i;
3095 	portid_t new_total = 0;
3096 
3097 	for (i = 0; i < *total; i++)
3098 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3099 			array[new_total] = array[i];
3100 			new_total++;
3101 		}
3102 	*total = new_total;
3103 }
3104 
3105 static void
3106 remove_invalid_ports(void)
3107 {
3108 	remove_invalid_ports_in(ports_ids, &nb_ports);
3109 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3110 	nb_cfg_ports = nb_fwd_ports;
3111 }
3112 
3113 void
3114 close_port(portid_t pid)
3115 {
3116 	portid_t pi;
3117 	struct rte_port *port;
3118 
3119 	if (port_id_is_invalid(pid, ENABLED_WARN))
3120 		return;
3121 
3122 	printf("Closing ports...\n");
3123 
3124 	RTE_ETH_FOREACH_DEV(pi) {
3125 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3126 			continue;
3127 
3128 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3129 			fprintf(stderr,
3130 				"Please remove port %d from forwarding configuration.\n",
3131 				pi);
3132 			continue;
3133 		}
3134 
3135 		if (port_is_bonding_slave(pi)) {
3136 			fprintf(stderr,
3137 				"Please remove port %d from bonded device.\n",
3138 				pi);
3139 			continue;
3140 		}
3141 
3142 		port = &ports[pi];
3143 		if (port->port_status == RTE_PORT_CLOSED) {
3144 			fprintf(stderr, "Port %d is already closed\n", pi);
3145 			continue;
3146 		}
3147 
3148 		if (is_proc_primary()) {
3149 			port_flow_flush(pi);
3150 			port_flex_item_flush(pi);
3151 			rte_eth_dev_close(pi);
3152 		}
3153 
3154 		free_xstats_display_info(pi);
3155 	}
3156 
3157 	remove_invalid_ports();
3158 	printf("Done\n");
3159 }
3160 
3161 void
3162 reset_port(portid_t pid)
3163 {
3164 	int diag;
3165 	portid_t pi;
3166 	struct rte_port *port;
3167 
3168 	if (port_id_is_invalid(pid, ENABLED_WARN))
3169 		return;
3170 
3171 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3172 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3173 		fprintf(stderr,
3174 			"Can not reset port(s), please stop port(s) first.\n");
3175 		return;
3176 	}
3177 
3178 	printf("Resetting ports...\n");
3179 
3180 	RTE_ETH_FOREACH_DEV(pi) {
3181 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3182 			continue;
3183 
3184 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3185 			fprintf(stderr,
3186 				"Please remove port %d from forwarding configuration.\n",
3187 				pi);
3188 			continue;
3189 		}
3190 
3191 		if (port_is_bonding_slave(pi)) {
3192 			fprintf(stderr,
3193 				"Please remove port %d from bonded device.\n",
3194 				pi);
3195 			continue;
3196 		}
3197 
3198 		diag = rte_eth_dev_reset(pi);
3199 		if (diag == 0) {
3200 			port = &ports[pi];
3201 			port->need_reconfig = 1;
3202 			port->need_reconfig_queues = 1;
3203 		} else {
3204 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3205 				pi, diag);
3206 		}
3207 	}
3208 
3209 	printf("Done\n");
3210 }
3211 
3212 void
3213 attach_port(char *identifier)
3214 {
3215 	portid_t pi;
3216 	struct rte_dev_iterator iterator;
3217 
3218 	printf("Attaching a new port...\n");
3219 
3220 	if (identifier == NULL) {
3221 		fprintf(stderr, "Invalid parameters are specified\n");
3222 		return;
3223 	}
3224 
3225 	if (rte_dev_probe(identifier) < 0) {
3226 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3227 		return;
3228 	}
3229 
3230 	/* first attach mode: event */
3231 	if (setup_on_probe_event) {
3232 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3233 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3234 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3235 					ports[pi].need_setup != 0)
3236 				setup_attached_port(pi);
3237 		return;
3238 	}
3239 
3240 	/* second attach mode: iterator */
3241 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3242 		/* setup ports matching the devargs used for probing */
3243 		if (port_is_forwarding(pi))
3244 			continue; /* port was already attached before */
3245 		setup_attached_port(pi);
3246 	}
3247 }
3248 
3249 static void
3250 setup_attached_port(portid_t pi)
3251 {
3252 	unsigned int socket_id;
3253 	int ret;
3254 
3255 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3256 	/* if socket_id is invalid, set to the first available socket. */
3257 	if (check_socket_id(socket_id) < 0)
3258 		socket_id = socket_ids[0];
3259 	reconfig(pi, socket_id);
3260 	ret = rte_eth_promiscuous_enable(pi);
3261 	if (ret != 0)
3262 		fprintf(stderr,
3263 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3264 			pi, rte_strerror(-ret));
3265 
3266 	ports_ids[nb_ports++] = pi;
3267 	fwd_ports_ids[nb_fwd_ports++] = pi;
3268 	nb_cfg_ports = nb_fwd_ports;
3269 	ports[pi].need_setup = 0;
3270 	ports[pi].port_status = RTE_PORT_STOPPED;
3271 
3272 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3273 	printf("Done\n");
3274 }
3275 
3276 static void
3277 detach_device(struct rte_device *dev)
3278 {
3279 	portid_t sibling;
3280 
3281 	if (dev == NULL) {
3282 		fprintf(stderr, "Device already removed\n");
3283 		return;
3284 	}
3285 
3286 	printf("Removing a device...\n");
3287 
3288 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3289 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3290 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3291 				fprintf(stderr, "Port %u not stopped\n",
3292 					sibling);
3293 				return;
3294 			}
3295 			port_flow_flush(sibling);
3296 		}
3297 	}
3298 
3299 	if (rte_dev_remove(dev) < 0) {
3300 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3301 		return;
3302 	}
3303 	remove_invalid_ports();
3304 
3305 	printf("Device is detached\n");
3306 	printf("Now total ports is %d\n", nb_ports);
3307 	printf("Done\n");
3308 	return;
3309 }
3310 
3311 void
3312 detach_port_device(portid_t port_id)
3313 {
3314 	int ret;
3315 	struct rte_eth_dev_info dev_info;
3316 
3317 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3318 		return;
3319 
3320 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3321 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3322 			fprintf(stderr, "Port not stopped\n");
3323 			return;
3324 		}
3325 		fprintf(stderr, "Port was not closed\n");
3326 	}
3327 
3328 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3329 	if (ret != 0) {
3330 		TESTPMD_LOG(ERR,
3331 			"Failed to get device info for port %d, not detaching\n",
3332 			port_id);
3333 		return;
3334 	}
3335 	detach_device(dev_info.device);
3336 }
3337 
3338 void
3339 detach_devargs(char *identifier)
3340 {
3341 	struct rte_dev_iterator iterator;
3342 	struct rte_devargs da;
3343 	portid_t port_id;
3344 
3345 	printf("Removing a device...\n");
3346 
3347 	memset(&da, 0, sizeof(da));
3348 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3349 		fprintf(stderr, "cannot parse identifier\n");
3350 		return;
3351 	}
3352 
3353 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3354 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3355 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3356 				fprintf(stderr, "Port %u not stopped\n",
3357 					port_id);
3358 				rte_eth_iterator_cleanup(&iterator);
3359 				rte_devargs_reset(&da);
3360 				return;
3361 			}
3362 			port_flow_flush(port_id);
3363 		}
3364 	}
3365 
3366 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3367 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3368 			    da.name, da.bus->name);
3369 		rte_devargs_reset(&da);
3370 		return;
3371 	}
3372 
3373 	remove_invalid_ports();
3374 
3375 	printf("Device %s is detached\n", identifier);
3376 	printf("Now total ports is %d\n", nb_ports);
3377 	printf("Done\n");
3378 	rte_devargs_reset(&da);
3379 }
3380 
3381 void
3382 pmd_test_exit(void)
3383 {
3384 	portid_t pt_id;
3385 	unsigned int i;
3386 	int ret;
3387 
3388 	if (test_done == 0)
3389 		stop_packet_forwarding();
3390 
3391 #ifndef RTE_EXEC_ENV_WINDOWS
3392 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3393 		if (mempools[i]) {
3394 			if (mp_alloc_type == MP_ALLOC_ANON)
3395 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3396 						     NULL);
3397 		}
3398 	}
3399 #endif
3400 	if (ports != NULL) {
3401 		no_link_check = 1;
3402 		RTE_ETH_FOREACH_DEV(pt_id) {
3403 			printf("\nStopping port %d...\n", pt_id);
3404 			fflush(stdout);
3405 			stop_port(pt_id);
3406 		}
3407 		RTE_ETH_FOREACH_DEV(pt_id) {
3408 			printf("\nShutting down port %d...\n", pt_id);
3409 			fflush(stdout);
3410 			close_port(pt_id);
3411 		}
3412 	}
3413 
3414 	if (hot_plug) {
3415 		ret = rte_dev_event_monitor_stop();
3416 		if (ret) {
3417 			RTE_LOG(ERR, EAL,
3418 				"fail to stop device event monitor.");
3419 			return;
3420 		}
3421 
3422 		ret = rte_dev_event_callback_unregister(NULL,
3423 			dev_event_callback, NULL);
3424 		if (ret < 0) {
3425 			RTE_LOG(ERR, EAL,
3426 				"fail to unregister device event callback.\n");
3427 			return;
3428 		}
3429 
3430 		ret = rte_dev_hotplug_handle_disable();
3431 		if (ret) {
3432 			RTE_LOG(ERR, EAL,
3433 				"fail to disable hotplug handling.\n");
3434 			return;
3435 		}
3436 	}
3437 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3438 		if (mempools[i])
3439 			mempool_free_mp(mempools[i]);
3440 	}
3441 	free(xstats_display);
3442 
3443 	printf("\nBye...\n");
3444 }
3445 
3446 typedef void (*cmd_func_t)(void);
3447 struct pmd_test_command {
3448 	const char *cmd_name;
3449 	cmd_func_t cmd_func;
3450 };
3451 
3452 /* Check the link status of all ports in up to 9s, and print them finally */
3453 static void
3454 check_all_ports_link_status(uint32_t port_mask)
3455 {
3456 #define CHECK_INTERVAL 100 /* 100ms */
3457 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3458 	portid_t portid;
3459 	uint8_t count, all_ports_up, print_flag = 0;
3460 	struct rte_eth_link link;
3461 	int ret;
3462 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3463 
3464 	printf("Checking link statuses...\n");
3465 	fflush(stdout);
3466 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3467 		all_ports_up = 1;
3468 		RTE_ETH_FOREACH_DEV(portid) {
3469 			if ((port_mask & (1 << portid)) == 0)
3470 				continue;
3471 			memset(&link, 0, sizeof(link));
3472 			ret = rte_eth_link_get_nowait(portid, &link);
3473 			if (ret < 0) {
3474 				all_ports_up = 0;
3475 				if (print_flag == 1)
3476 					fprintf(stderr,
3477 						"Port %u link get failed: %s\n",
3478 						portid, rte_strerror(-ret));
3479 				continue;
3480 			}
3481 			/* print link status if flag set */
3482 			if (print_flag == 1) {
3483 				rte_eth_link_to_str(link_status,
3484 					sizeof(link_status), &link);
3485 				printf("Port %d %s\n", portid, link_status);
3486 				continue;
3487 			}
3488 			/* clear all_ports_up flag if any link down */
3489 			if (link.link_status == RTE_ETH_LINK_DOWN) {
3490 				all_ports_up = 0;
3491 				break;
3492 			}
3493 		}
3494 		/* after finally printing all link status, get out */
3495 		if (print_flag == 1)
3496 			break;
3497 
3498 		if (all_ports_up == 0) {
3499 			fflush(stdout);
3500 			rte_delay_ms(CHECK_INTERVAL);
3501 		}
3502 
3503 		/* set the print_flag if all ports up or timeout */
3504 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3505 			print_flag = 1;
3506 		}
3507 
3508 		if (lsc_interrupt)
3509 			break;
3510 	}
3511 }
3512 
3513 static void
3514 rmv_port_callback(void *arg)
3515 {
3516 	int need_to_start = 0;
3517 	int org_no_link_check = no_link_check;
3518 	portid_t port_id = (intptr_t)arg;
3519 	struct rte_eth_dev_info dev_info;
3520 	int ret;
3521 
3522 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3523 
3524 	if (!test_done && port_is_forwarding(port_id)) {
3525 		need_to_start = 1;
3526 		stop_packet_forwarding();
3527 	}
3528 	no_link_check = 1;
3529 	stop_port(port_id);
3530 	no_link_check = org_no_link_check;
3531 
3532 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3533 	if (ret != 0)
3534 		TESTPMD_LOG(ERR,
3535 			"Failed to get device info for port %d, not detaching\n",
3536 			port_id);
3537 	else {
3538 		struct rte_device *device = dev_info.device;
3539 		close_port(port_id);
3540 		detach_device(device); /* might be already removed or have more ports */
3541 	}
3542 	if (need_to_start)
3543 		start_packet_forwarding(0);
3544 }
3545 
3546 /* This function is used by the interrupt thread */
3547 static int
3548 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3549 		  void *ret_param)
3550 {
3551 	RTE_SET_USED(param);
3552 	RTE_SET_USED(ret_param);
3553 
3554 	if (type >= RTE_ETH_EVENT_MAX) {
3555 		fprintf(stderr,
3556 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3557 			port_id, __func__, type);
3558 		fflush(stderr);
3559 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3560 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3561 			eth_event_desc[type]);
3562 		fflush(stdout);
3563 	}
3564 
3565 	switch (type) {
3566 	case RTE_ETH_EVENT_NEW:
3567 		ports[port_id].need_setup = 1;
3568 		ports[port_id].port_status = RTE_PORT_HANDLING;
3569 		break;
3570 	case RTE_ETH_EVENT_INTR_RMV:
3571 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3572 			break;
3573 		if (rte_eal_alarm_set(100000,
3574 				rmv_port_callback, (void *)(intptr_t)port_id))
3575 			fprintf(stderr,
3576 				"Could not set up deferred device removal\n");
3577 		break;
3578 	case RTE_ETH_EVENT_DESTROY:
3579 		ports[port_id].port_status = RTE_PORT_CLOSED;
3580 		printf("Port %u is closed\n", port_id);
3581 		break;
3582 	default:
3583 		break;
3584 	}
3585 	return 0;
3586 }
3587 
3588 static int
3589 register_eth_event_callback(void)
3590 {
3591 	int ret;
3592 	enum rte_eth_event_type event;
3593 
3594 	for (event = RTE_ETH_EVENT_UNKNOWN;
3595 			event < RTE_ETH_EVENT_MAX; event++) {
3596 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3597 				event,
3598 				eth_event_callback,
3599 				NULL);
3600 		if (ret != 0) {
3601 			TESTPMD_LOG(ERR, "Failed to register callback for "
3602 					"%s event\n", eth_event_desc[event]);
3603 			return -1;
3604 		}
3605 	}
3606 
3607 	return 0;
3608 }
3609 
3610 /* This function is used by the interrupt thread */
3611 static void
3612 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3613 			     __rte_unused void *arg)
3614 {
3615 	uint16_t port_id;
3616 	int ret;
3617 
3618 	if (type >= RTE_DEV_EVENT_MAX) {
3619 		fprintf(stderr, "%s called upon invalid event %d\n",
3620 			__func__, type);
3621 		fflush(stderr);
3622 	}
3623 
3624 	switch (type) {
3625 	case RTE_DEV_EVENT_REMOVE:
3626 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3627 			device_name);
3628 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3629 		if (ret) {
3630 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3631 				device_name);
3632 			return;
3633 		}
3634 		/*
3635 		 * Because the user's callback is invoked in eal interrupt
3636 		 * callback, the interrupt callback need to be finished before
3637 		 * it can be unregistered when detaching device. So finish
3638 		 * callback soon and use a deferred removal to detach device
3639 		 * is need. It is a workaround, once the device detaching be
3640 		 * moved into the eal in the future, the deferred removal could
3641 		 * be deleted.
3642 		 */
3643 		if (rte_eal_alarm_set(100000,
3644 				rmv_port_callback, (void *)(intptr_t)port_id))
3645 			RTE_LOG(ERR, EAL,
3646 				"Could not set up deferred device removal\n");
3647 		break;
3648 	case RTE_DEV_EVENT_ADD:
3649 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3650 			device_name);
3651 		/* TODO: After finish kernel driver binding,
3652 		 * begin to attach port.
3653 		 */
3654 		break;
3655 	default:
3656 		break;
3657 	}
3658 }
3659 
3660 static void
3661 rxtx_port_config(portid_t pid)
3662 {
3663 	uint16_t qid;
3664 	uint64_t offloads;
3665 	struct rte_port *port = &ports[pid];
3666 
3667 	for (qid = 0; qid < nb_rxq; qid++) {
3668 		offloads = port->rx_conf[qid].offloads;
3669 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3670 
3671 		if (rxq_share > 0 &&
3672 		    (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3673 			/* Non-zero share group to enable RxQ share. */
3674 			port->rx_conf[qid].share_group = pid / rxq_share + 1;
3675 			port->rx_conf[qid].share_qid = qid; /* Equal mapping. */
3676 		}
3677 
3678 		if (offloads != 0)
3679 			port->rx_conf[qid].offloads = offloads;
3680 
3681 		/* Check if any Rx parameters have been passed */
3682 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3683 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3684 
3685 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3686 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3687 
3688 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3689 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3690 
3691 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3692 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3693 
3694 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3695 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3696 
3697 		port->nb_rx_desc[qid] = nb_rxd;
3698 	}
3699 
3700 	for (qid = 0; qid < nb_txq; qid++) {
3701 		offloads = port->tx_conf[qid].offloads;
3702 		port->tx_conf[qid] = port->dev_info.default_txconf;
3703 		if (offloads != 0)
3704 			port->tx_conf[qid].offloads = offloads;
3705 
3706 		/* Check if any Tx parameters have been passed */
3707 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3708 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3709 
3710 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3711 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3712 
3713 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3714 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3715 
3716 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3717 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3718 
3719 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3720 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3721 
3722 		port->nb_tx_desc[qid] = nb_txd;
3723 	}
3724 }
3725 
3726 /*
3727  * Helper function to set MTU from frame size
3728  *
3729  * port->dev_info should be set before calling this function.
3730  *
3731  * return 0 on success, negative on error
3732  */
3733 int
3734 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3735 {
3736 	struct rte_port *port = &ports[portid];
3737 	uint32_t eth_overhead;
3738 	uint16_t mtu, new_mtu;
3739 
3740 	eth_overhead = get_eth_overhead(&port->dev_info);
3741 
3742 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3743 		printf("Failed to get MTU for port %u\n", portid);
3744 		return -1;
3745 	}
3746 
3747 	new_mtu = max_rx_pktlen - eth_overhead;
3748 
3749 	if (mtu == new_mtu)
3750 		return 0;
3751 
3752 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3753 		fprintf(stderr,
3754 			"Failed to set MTU to %u for port %u\n",
3755 			new_mtu, portid);
3756 		return -1;
3757 	}
3758 
3759 	port->dev_conf.rxmode.mtu = new_mtu;
3760 
3761 	return 0;
3762 }
3763 
3764 void
3765 init_port_config(void)
3766 {
3767 	portid_t pid;
3768 	struct rte_port *port;
3769 	int ret, i;
3770 
3771 	RTE_ETH_FOREACH_DEV(pid) {
3772 		port = &ports[pid];
3773 		port->dev_conf.fdir_conf = fdir_conf;
3774 
3775 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3776 		if (ret != 0)
3777 			return;
3778 
3779 		if (nb_rxq > 1) {
3780 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3781 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3782 				rss_hf & port->dev_info.flow_type_rss_offloads;
3783 		} else {
3784 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3785 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3786 		}
3787 
3788 		if (port->dcb_flag == 0) {
3789 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3790 				port->dev_conf.rxmode.mq_mode =
3791 					(enum rte_eth_rx_mq_mode)
3792 						(rx_mq_mode & RTE_ETH_MQ_RX_RSS);
3793 			} else {
3794 				port->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
3795 				port->dev_conf.rxmode.offloads &=
3796 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3797 
3798 				for (i = 0;
3799 				     i < port->dev_info.nb_rx_queues;
3800 				     i++)
3801 					port->rx_conf[i].offloads &=
3802 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3803 			}
3804 		}
3805 
3806 		rxtx_port_config(pid);
3807 
3808 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3809 		if (ret != 0)
3810 			return;
3811 
3812 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3813 		rte_pmd_ixgbe_bypass_init(pid);
3814 #endif
3815 
3816 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3817 			port->dev_conf.intr_conf.lsc = 1;
3818 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3819 			port->dev_conf.intr_conf.rmv = 1;
3820 	}
3821 }
3822 
3823 void set_port_slave_flag(portid_t slave_pid)
3824 {
3825 	struct rte_port *port;
3826 
3827 	port = &ports[slave_pid];
3828 	port->slave_flag = 1;
3829 }
3830 
3831 void clear_port_slave_flag(portid_t slave_pid)
3832 {
3833 	struct rte_port *port;
3834 
3835 	port = &ports[slave_pid];
3836 	port->slave_flag = 0;
3837 }
3838 
3839 uint8_t port_is_bonding_slave(portid_t slave_pid)
3840 {
3841 	struct rte_port *port;
3842 	struct rte_eth_dev_info dev_info;
3843 	int ret;
3844 
3845 	port = &ports[slave_pid];
3846 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3847 	if (ret != 0) {
3848 		TESTPMD_LOG(ERR,
3849 			"Failed to get device info for port id %d,"
3850 			"cannot determine if the port is a bonded slave",
3851 			slave_pid);
3852 		return 0;
3853 	}
3854 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3855 		return 1;
3856 	return 0;
3857 }
3858 
3859 const uint16_t vlan_tags[] = {
3860 		0,  1,  2,  3,  4,  5,  6,  7,
3861 		8,  9, 10, 11,  12, 13, 14, 15,
3862 		16, 17, 18, 19, 20, 21, 22, 23,
3863 		24, 25, 26, 27, 28, 29, 30, 31
3864 };
3865 
3866 static  int
3867 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3868 		 enum dcb_mode_enable dcb_mode,
3869 		 enum rte_eth_nb_tcs num_tcs,
3870 		 uint8_t pfc_en)
3871 {
3872 	uint8_t i;
3873 	int32_t rc;
3874 	struct rte_eth_rss_conf rss_conf;
3875 
3876 	/*
3877 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3878 	 * given above, and the number of traffic classes available for use.
3879 	 */
3880 	if (dcb_mode == DCB_VT_ENABLED) {
3881 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3882 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3883 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3884 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3885 
3886 		/* VMDQ+DCB RX and TX configurations */
3887 		vmdq_rx_conf->enable_default_pool = 0;
3888 		vmdq_rx_conf->default_pool = 0;
3889 		vmdq_rx_conf->nb_queue_pools =
3890 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3891 		vmdq_tx_conf->nb_queue_pools =
3892 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3893 
3894 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3895 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3896 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3897 			vmdq_rx_conf->pool_map[i].pools =
3898 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3899 		}
3900 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3901 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3902 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3903 		}
3904 
3905 		/* set DCB mode of RX and TX of multiple queues */
3906 		eth_conf->rxmode.mq_mode =
3907 				(enum rte_eth_rx_mq_mode)
3908 					(rx_mq_mode & RTE_ETH_MQ_RX_VMDQ_DCB);
3909 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_VMDQ_DCB;
3910 	} else {
3911 		struct rte_eth_dcb_rx_conf *rx_conf =
3912 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3913 		struct rte_eth_dcb_tx_conf *tx_conf =
3914 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3915 
3916 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3917 
3918 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3919 		if (rc != 0)
3920 			return rc;
3921 
3922 		rx_conf->nb_tcs = num_tcs;
3923 		tx_conf->nb_tcs = num_tcs;
3924 
3925 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3926 			rx_conf->dcb_tc[i] = i % num_tcs;
3927 			tx_conf->dcb_tc[i] = i % num_tcs;
3928 		}
3929 
3930 		eth_conf->rxmode.mq_mode =
3931 				(enum rte_eth_rx_mq_mode)
3932 					(rx_mq_mode & RTE_ETH_MQ_RX_DCB_RSS);
3933 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3934 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_DCB;
3935 	}
3936 
3937 	if (pfc_en)
3938 		eth_conf->dcb_capability_en =
3939 				RTE_ETH_DCB_PG_SUPPORT | RTE_ETH_DCB_PFC_SUPPORT;
3940 	else
3941 		eth_conf->dcb_capability_en = RTE_ETH_DCB_PG_SUPPORT;
3942 
3943 	return 0;
3944 }
3945 
3946 int
3947 init_port_dcb_config(portid_t pid,
3948 		     enum dcb_mode_enable dcb_mode,
3949 		     enum rte_eth_nb_tcs num_tcs,
3950 		     uint8_t pfc_en)
3951 {
3952 	struct rte_eth_conf port_conf;
3953 	struct rte_port *rte_port;
3954 	int retval;
3955 	uint16_t i;
3956 
3957 	if (num_procs > 1) {
3958 		printf("The multi-process feature doesn't support dcb.\n");
3959 		return -ENOTSUP;
3960 	}
3961 	rte_port = &ports[pid];
3962 
3963 	/* retain the original device configuration. */
3964 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3965 
3966 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3967 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3968 	if (retval < 0)
3969 		return retval;
3970 	port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3971 	/* remove RSS HASH offload for DCB in vt mode */
3972 	if (port_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_VMDQ_DCB) {
3973 		port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3974 		for (i = 0; i < nb_rxq; i++)
3975 			rte_port->rx_conf[i].offloads &=
3976 				~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3977 	}
3978 
3979 	/* re-configure the device . */
3980 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3981 	if (retval < 0)
3982 		return retval;
3983 
3984 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3985 	if (retval != 0)
3986 		return retval;
3987 
3988 	/* If dev_info.vmdq_pool_base is greater than 0,
3989 	 * the queue id of vmdq pools is started after pf queues.
3990 	 */
3991 	if (dcb_mode == DCB_VT_ENABLED &&
3992 	    rte_port->dev_info.vmdq_pool_base > 0) {
3993 		fprintf(stderr,
3994 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3995 			pid);
3996 		return -1;
3997 	}
3998 
3999 	/* Assume the ports in testpmd have the same dcb capability
4000 	 * and has the same number of rxq and txq in dcb mode
4001 	 */
4002 	if (dcb_mode == DCB_VT_ENABLED) {
4003 		if (rte_port->dev_info.max_vfs > 0) {
4004 			nb_rxq = rte_port->dev_info.nb_rx_queues;
4005 			nb_txq = rte_port->dev_info.nb_tx_queues;
4006 		} else {
4007 			nb_rxq = rte_port->dev_info.max_rx_queues;
4008 			nb_txq = rte_port->dev_info.max_tx_queues;
4009 		}
4010 	} else {
4011 		/*if vt is disabled, use all pf queues */
4012 		if (rte_port->dev_info.vmdq_pool_base == 0) {
4013 			nb_rxq = rte_port->dev_info.max_rx_queues;
4014 			nb_txq = rte_port->dev_info.max_tx_queues;
4015 		} else {
4016 			nb_rxq = (queueid_t)num_tcs;
4017 			nb_txq = (queueid_t)num_tcs;
4018 
4019 		}
4020 	}
4021 	rx_free_thresh = 64;
4022 
4023 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
4024 
4025 	rxtx_port_config(pid);
4026 	/* VLAN filter */
4027 	rte_port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4028 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
4029 		rx_vft_set(pid, vlan_tags[i], 1);
4030 
4031 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4032 	if (retval != 0)
4033 		return retval;
4034 
4035 	rte_port->dcb_flag = 1;
4036 
4037 	/* Enter DCB configuration status */
4038 	dcb_config = 1;
4039 
4040 	return 0;
4041 }
4042 
4043 static void
4044 init_port(void)
4045 {
4046 	int i;
4047 
4048 	/* Configuration of Ethernet ports. */
4049 	ports = rte_zmalloc("testpmd: ports",
4050 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4051 			    RTE_CACHE_LINE_SIZE);
4052 	if (ports == NULL) {
4053 		rte_exit(EXIT_FAILURE,
4054 				"rte_zmalloc(%d struct rte_port) failed\n",
4055 				RTE_MAX_ETHPORTS);
4056 	}
4057 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4058 		ports[i].xstats_info.allocated = false;
4059 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4060 		LIST_INIT(&ports[i].flow_tunnel_list);
4061 	/* Initialize ports NUMA structures */
4062 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4063 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4064 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4065 }
4066 
4067 static void
4068 force_quit(void)
4069 {
4070 	pmd_test_exit();
4071 	prompt_exit();
4072 }
4073 
4074 static void
4075 print_stats(void)
4076 {
4077 	uint8_t i;
4078 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4079 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4080 
4081 	/* Clear screen and move to top left */
4082 	printf("%s%s", clr, top_left);
4083 
4084 	printf("\nPort statistics ====================================");
4085 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4086 		nic_stats_display(fwd_ports_ids[i]);
4087 
4088 	fflush(stdout);
4089 }
4090 
4091 static void
4092 signal_handler(int signum)
4093 {
4094 	if (signum == SIGINT || signum == SIGTERM) {
4095 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4096 			signum);
4097 #ifdef RTE_LIB_PDUMP
4098 		/* uninitialize packet capture framework */
4099 		rte_pdump_uninit();
4100 #endif
4101 #ifdef RTE_LIB_LATENCYSTATS
4102 		if (latencystats_enabled != 0)
4103 			rte_latencystats_uninit();
4104 #endif
4105 		force_quit();
4106 		/* Set flag to indicate the force termination. */
4107 		f_quit = 1;
4108 		/* exit with the expected status */
4109 #ifndef RTE_EXEC_ENV_WINDOWS
4110 		signal(signum, SIG_DFL);
4111 		kill(getpid(), signum);
4112 #endif
4113 	}
4114 }
4115 
4116 int
4117 main(int argc, char** argv)
4118 {
4119 	int diag;
4120 	portid_t port_id;
4121 	uint16_t count;
4122 	int ret;
4123 
4124 	signal(SIGINT, signal_handler);
4125 	signal(SIGTERM, signal_handler);
4126 
4127 	testpmd_logtype = rte_log_register("testpmd");
4128 	if (testpmd_logtype < 0)
4129 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4130 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4131 
4132 	diag = rte_eal_init(argc, argv);
4133 	if (diag < 0)
4134 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4135 			 rte_strerror(rte_errno));
4136 
4137 	ret = register_eth_event_callback();
4138 	if (ret != 0)
4139 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4140 
4141 #ifdef RTE_LIB_PDUMP
4142 	/* initialize packet capture framework */
4143 	rte_pdump_init();
4144 #endif
4145 
4146 	count = 0;
4147 	RTE_ETH_FOREACH_DEV(port_id) {
4148 		ports_ids[count] = port_id;
4149 		count++;
4150 	}
4151 	nb_ports = (portid_t) count;
4152 	if (nb_ports == 0)
4153 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4154 
4155 	/* allocate port structures, and init them */
4156 	init_port();
4157 
4158 	set_def_fwd_config();
4159 	if (nb_lcores == 0)
4160 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4161 			 "Check the core mask argument\n");
4162 
4163 	/* Bitrate/latency stats disabled by default */
4164 #ifdef RTE_LIB_BITRATESTATS
4165 	bitrate_enabled = 0;
4166 #endif
4167 #ifdef RTE_LIB_LATENCYSTATS
4168 	latencystats_enabled = 0;
4169 #endif
4170 
4171 	/* on FreeBSD, mlockall() is disabled by default */
4172 #ifdef RTE_EXEC_ENV_FREEBSD
4173 	do_mlockall = 0;
4174 #else
4175 	do_mlockall = 1;
4176 #endif
4177 
4178 	argc -= diag;
4179 	argv += diag;
4180 	if (argc > 1)
4181 		launch_args_parse(argc, argv);
4182 
4183 #ifndef RTE_EXEC_ENV_WINDOWS
4184 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4185 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4186 			strerror(errno));
4187 	}
4188 #endif
4189 
4190 	if (tx_first && interactive)
4191 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4192 				"interactive mode.\n");
4193 
4194 	if (tx_first && lsc_interrupt) {
4195 		fprintf(stderr,
4196 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4197 		lsc_interrupt = 0;
4198 	}
4199 
4200 	if (!nb_rxq && !nb_txq)
4201 		fprintf(stderr,
4202 			"Warning: Either rx or tx queues should be non-zero\n");
4203 
4204 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4205 		fprintf(stderr,
4206 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4207 			nb_rxq, nb_txq);
4208 
4209 	init_config();
4210 
4211 	if (hot_plug) {
4212 		ret = rte_dev_hotplug_handle_enable();
4213 		if (ret) {
4214 			RTE_LOG(ERR, EAL,
4215 				"fail to enable hotplug handling.");
4216 			return -1;
4217 		}
4218 
4219 		ret = rte_dev_event_monitor_start();
4220 		if (ret) {
4221 			RTE_LOG(ERR, EAL,
4222 				"fail to start device event monitoring.");
4223 			return -1;
4224 		}
4225 
4226 		ret = rte_dev_event_callback_register(NULL,
4227 			dev_event_callback, NULL);
4228 		if (ret) {
4229 			RTE_LOG(ERR, EAL,
4230 				"fail  to register device event callback\n");
4231 			return -1;
4232 		}
4233 	}
4234 
4235 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4236 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4237 
4238 	/* set all ports to promiscuous mode by default */
4239 	RTE_ETH_FOREACH_DEV(port_id) {
4240 		ret = rte_eth_promiscuous_enable(port_id);
4241 		if (ret != 0)
4242 			fprintf(stderr,
4243 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4244 				port_id, rte_strerror(-ret));
4245 	}
4246 
4247 #ifdef RTE_LIB_METRICS
4248 	/* Init metrics library */
4249 	rte_metrics_init(rte_socket_id());
4250 #endif
4251 
4252 #ifdef RTE_LIB_LATENCYSTATS
4253 	if (latencystats_enabled != 0) {
4254 		int ret = rte_latencystats_init(1, NULL);
4255 		if (ret)
4256 			fprintf(stderr,
4257 				"Warning: latencystats init() returned error %d\n",
4258 				ret);
4259 		fprintf(stderr, "Latencystats running on lcore %d\n",
4260 			latencystats_lcore_id);
4261 	}
4262 #endif
4263 
4264 	/* Setup bitrate stats */
4265 #ifdef RTE_LIB_BITRATESTATS
4266 	if (bitrate_enabled != 0) {
4267 		bitrate_data = rte_stats_bitrate_create();
4268 		if (bitrate_data == NULL)
4269 			rte_exit(EXIT_FAILURE,
4270 				"Could not allocate bitrate data.\n");
4271 		rte_stats_bitrate_reg(bitrate_data);
4272 	}
4273 #endif
4274 #ifdef RTE_LIB_CMDLINE
4275 	if (strlen(cmdline_filename) != 0)
4276 		cmdline_read_from_file(cmdline_filename);
4277 
4278 	if (interactive == 1) {
4279 		if (auto_start) {
4280 			printf("Start automatic packet forwarding\n");
4281 			start_packet_forwarding(0);
4282 		}
4283 		prompt();
4284 		pmd_test_exit();
4285 	} else
4286 #endif
4287 	{
4288 		char c;
4289 		int rc;
4290 
4291 		f_quit = 0;
4292 
4293 		printf("No commandline core given, start packet forwarding\n");
4294 		start_packet_forwarding(tx_first);
4295 		if (stats_period != 0) {
4296 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4297 			uint64_t timer_period;
4298 
4299 			/* Convert to number of cycles */
4300 			timer_period = stats_period * rte_get_timer_hz();
4301 
4302 			while (f_quit == 0) {
4303 				cur_time = rte_get_timer_cycles();
4304 				diff_time += cur_time - prev_time;
4305 
4306 				if (diff_time >= timer_period) {
4307 					print_stats();
4308 					/* Reset the timer */
4309 					diff_time = 0;
4310 				}
4311 				/* Sleep to avoid unnecessary checks */
4312 				prev_time = cur_time;
4313 				rte_delay_us_sleep(US_PER_S);
4314 			}
4315 		}
4316 
4317 		printf("Press enter to exit\n");
4318 		rc = read(0, &c, 1);
4319 		pmd_test_exit();
4320 		if (rc < 0)
4321 			return 1;
4322 	}
4323 
4324 	ret = rte_eal_cleanup();
4325 	if (ret != 0)
4326 		rte_exit(EXIT_FAILURE,
4327 			 "EAL cleanup failed: %s\n", strerror(-ret));
4328 
4329 	return EXIT_SUCCESS;
4330 }
4331