xref: /dpdk/app/test-pmd/testpmd.c (revision 96db98db69f759cdb54c02ef72c4cae760b01ab3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_branch_prediction.h>
40 #include <rte_mempool.h>
41 #include <rte_malloc.h>
42 #include <rte_mbuf.h>
43 #include <rte_mbuf_pool_ops.h>
44 #include <rte_interrupts.h>
45 #include <rte_pci.h>
46 #include <rte_ether.h>
47 #include <rte_ethdev.h>
48 #include <rte_dev.h>
49 #include <rte_string_fns.h>
50 #ifdef RTE_NET_IXGBE
51 #include <rte_pmd_ixgbe.h>
52 #endif
53 #ifdef RTE_LIB_PDUMP
54 #include <rte_pdump.h>
55 #endif
56 #include <rte_flow.h>
57 #ifdef RTE_LIB_METRICS
58 #include <rte_metrics.h>
59 #endif
60 #ifdef RTE_LIB_BITRATESTATS
61 #include <rte_bitrate.h>
62 #endif
63 #ifdef RTE_LIB_LATENCYSTATS
64 #include <rte_latencystats.h>
65 #endif
66 #ifdef RTE_EXEC_ENV_WINDOWS
67 #include <process.h>
68 #endif
69 
70 #include "testpmd.h"
71 
72 #ifndef MAP_HUGETLB
73 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
74 #define HUGE_FLAG (0x40000)
75 #else
76 #define HUGE_FLAG MAP_HUGETLB
77 #endif
78 
79 #ifndef MAP_HUGE_SHIFT
80 /* older kernels (or FreeBSD) will not have this define */
81 #define HUGE_SHIFT (26)
82 #else
83 #define HUGE_SHIFT MAP_HUGE_SHIFT
84 #endif
85 
86 #define EXTMEM_HEAP_NAME "extmem"
87 /*
88  * Zone size with the malloc overhead (max of debug and release variants)
89  * must fit into the smallest supported hugepage size (2M),
90  * so that an IOVA-contiguous zone of this size can always be allocated
91  * if there are free 2M hugepages.
92  */
93 #define EXTBUF_ZONE_SIZE (RTE_PGSIZE_2M - 4 * RTE_CACHE_LINE_SIZE)
94 
95 uint16_t verbose_level = 0; /**< Silent by default. */
96 int testpmd_logtype; /**< Log type for testpmd logs */
97 
98 /* use main core for command line ? */
99 uint8_t interactive = 0;
100 uint8_t auto_start = 0;
101 uint8_t tx_first;
102 char cmdline_filename[PATH_MAX] = {0};
103 
104 /*
105  * NUMA support configuration.
106  * When set, the NUMA support attempts to dispatch the allocation of the
107  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
108  * probed ports among the CPU sockets 0 and 1.
109  * Otherwise, all memory is allocated from CPU socket 0.
110  */
111 uint8_t numa_support = 1; /**< numa enabled by default */
112 
113 /*
114  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
115  * not configured.
116  */
117 uint8_t socket_num = UMA_NO_CONFIG;
118 
119 /*
120  * Select mempool allocation type:
121  * - native: use regular DPDK memory
122  * - anon: use regular DPDK memory to create mempool, but populate using
123  *         anonymous memory (may not be IOVA-contiguous)
124  * - xmem: use externally allocated hugepage memory
125  */
126 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
127 
128 /*
129  * Store specified sockets on which memory pool to be used by ports
130  * is allocated.
131  */
132 uint8_t port_numa[RTE_MAX_ETHPORTS];
133 
134 /*
135  * Store specified sockets on which RX ring to be used by ports
136  * is allocated.
137  */
138 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
139 
140 /*
141  * Store specified sockets on which TX ring to be used by ports
142  * is allocated.
143  */
144 uint8_t txring_numa[RTE_MAX_ETHPORTS];
145 
146 /*
147  * Record the Ethernet address of peer target ports to which packets are
148  * forwarded.
149  * Must be instantiated with the ethernet addresses of peer traffic generator
150  * ports.
151  */
152 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
153 portid_t nb_peer_eth_addrs = 0;
154 
155 /*
156  * Probed Target Environment.
157  */
158 struct rte_port *ports;	       /**< For all probed ethernet ports. */
159 portid_t nb_ports;             /**< Number of probed ethernet ports. */
160 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
161 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
162 
163 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
164 
165 /*
166  * Test Forwarding Configuration.
167  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
168  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
169  */
170 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
171 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
172 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
173 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
174 
175 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
176 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
177 
178 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
179 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
180 
181 /*
182  * Forwarding engines.
183  */
184 struct fwd_engine * fwd_engines[] = {
185 	&io_fwd_engine,
186 	&mac_fwd_engine,
187 	&mac_swap_engine,
188 	&flow_gen_engine,
189 	&rx_only_engine,
190 	&tx_only_engine,
191 	&csum_fwd_engine,
192 	&icmp_echo_engine,
193 	&noisy_vnf_engine,
194 	&five_tuple_swap_fwd_engine,
195 #ifdef RTE_LIBRTE_IEEE1588
196 	&ieee1588_fwd_engine,
197 #endif
198 	&shared_rxq_engine,
199 	NULL,
200 };
201 
202 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
203 uint16_t mempool_flags;
204 
205 struct fwd_config cur_fwd_config;
206 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
207 uint32_t retry_enabled;
208 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
209 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
210 
211 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
212 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
213 	DEFAULT_MBUF_DATA_SIZE
214 }; /**< Mbuf data space size. */
215 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
216                                       * specified on command-line. */
217 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
218 
219 /** Extended statistics to show. */
220 struct rte_eth_xstat_name *xstats_display;
221 
222 unsigned int xstats_display_num; /**< Size of extended statistics to show */
223 
224 /*
225  * In container, it cannot terminate the process which running with 'stats-period'
226  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
227  */
228 uint8_t f_quit;
229 
230 /*
231  * Max Rx frame size, set by '--max-pkt-len' parameter.
232  */
233 uint32_t max_rx_pkt_len;
234 
235 /*
236  * Configuration of packet segments used to scatter received packets
237  * if some of split features is configured.
238  */
239 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
240 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
241 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
242 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
243 
244 /*
245  * Configuration of packet segments used by the "txonly" processing engine.
246  */
247 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
248 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
249 	TXONLY_DEF_PACKET_LEN,
250 };
251 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
252 
253 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
254 /**< Split policy for packets to TX. */
255 
256 uint8_t txonly_multi_flow;
257 /**< Whether multiple flows are generated in TXONLY mode. */
258 
259 uint32_t tx_pkt_times_inter;
260 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
261 
262 uint32_t tx_pkt_times_intra;
263 /**< Timings for send scheduling in TXONLY mode, time between packets. */
264 
265 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
266 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
267 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
268 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
269 
270 /* current configuration is in DCB or not,0 means it is not in DCB mode */
271 uint8_t dcb_config = 0;
272 
273 /*
274  * Configurable number of RX/TX queues.
275  */
276 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
277 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
278 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
279 
280 /*
281  * Configurable number of RX/TX ring descriptors.
282  * Defaults are supplied by drivers via ethdev.
283  */
284 #define RTE_TEST_RX_DESC_DEFAULT 0
285 #define RTE_TEST_TX_DESC_DEFAULT 0
286 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
287 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
288 
289 #define RTE_PMD_PARAM_UNSET -1
290 /*
291  * Configurable values of RX and TX ring threshold registers.
292  */
293 
294 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
295 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
296 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
297 
298 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
299 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
300 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
301 
302 /*
303  * Configurable value of RX free threshold.
304  */
305 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
306 
307 /*
308  * Configurable value of RX drop enable.
309  */
310 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
311 
312 /*
313  * Configurable value of TX free threshold.
314  */
315 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
316 
317 /*
318  * Configurable value of TX RS bit threshold.
319  */
320 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
321 
322 /*
323  * Configurable value of buffered packets before sending.
324  */
325 uint16_t noisy_tx_sw_bufsz;
326 
327 /*
328  * Configurable value of packet buffer timeout.
329  */
330 uint16_t noisy_tx_sw_buf_flush_time;
331 
332 /*
333  * Configurable value for size of VNF internal memory area
334  * used for simulating noisy neighbour behaviour
335  */
336 uint64_t noisy_lkup_mem_sz;
337 
338 /*
339  * Configurable value of number of random writes done in
340  * VNF simulation memory area.
341  */
342 uint64_t noisy_lkup_num_writes;
343 
344 /*
345  * Configurable value of number of random reads done in
346  * VNF simulation memory area.
347  */
348 uint64_t noisy_lkup_num_reads;
349 
350 /*
351  * Configurable value of number of random reads/writes done in
352  * VNF simulation memory area.
353  */
354 uint64_t noisy_lkup_num_reads_writes;
355 
356 /*
357  * Receive Side Scaling (RSS) configuration.
358  */
359 uint64_t rss_hf = RTE_ETH_RSS_IP; /* RSS IP by default. */
360 
361 /*
362  * Port topology configuration
363  */
364 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
365 
366 /*
367  * Avoids to flush all the RX streams before starts forwarding.
368  */
369 uint8_t no_flush_rx = 0; /* flush by default */
370 
371 /*
372  * Flow API isolated mode.
373  */
374 uint8_t flow_isolate_all;
375 
376 /*
377  * Avoids to check link status when starting/stopping a port.
378  */
379 uint8_t no_link_check = 0; /* check by default */
380 
381 /*
382  * Don't automatically start all ports in interactive mode.
383  */
384 uint8_t no_device_start = 0;
385 
386 /*
387  * Enable link status change notification
388  */
389 uint8_t lsc_interrupt = 1; /* enabled by default */
390 
391 /*
392  * Enable device removal notification.
393  */
394 uint8_t rmv_interrupt = 1; /* enabled by default */
395 
396 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
397 
398 /* After attach, port setup is called on event or by iterator */
399 bool setup_on_probe_event = true;
400 
401 /* Clear ptypes on port initialization. */
402 uint8_t clear_ptypes = true;
403 
404 /* Hairpin ports configuration mode. */
405 uint16_t hairpin_mode;
406 
407 /* Pretty printing of ethdev events */
408 static const char * const eth_event_desc[] = {
409 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
410 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
411 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
412 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
413 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
414 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
415 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
416 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
417 	[RTE_ETH_EVENT_NEW] = "device probed",
418 	[RTE_ETH_EVENT_DESTROY] = "device released",
419 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
420 	[RTE_ETH_EVENT_MAX] = NULL,
421 };
422 
423 /*
424  * Display or mask ether events
425  * Default to all events except VF_MBOX
426  */
427 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
428 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
429 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
430 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
431 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
432 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
433 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
434 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
435 /*
436  * Decide if all memory are locked for performance.
437  */
438 int do_mlockall = 0;
439 
440 /*
441  * NIC bypass mode configuration options.
442  */
443 
444 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
445 /* The NIC bypass watchdog timeout. */
446 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
447 #endif
448 
449 
450 #ifdef RTE_LIB_LATENCYSTATS
451 
452 /*
453  * Set when latency stats is enabled in the commandline
454  */
455 uint8_t latencystats_enabled;
456 
457 /*
458  * Lcore ID to service latency statistics.
459  */
460 lcoreid_t latencystats_lcore_id = -1;
461 
462 #endif
463 
464 /*
465  * Ethernet device configuration.
466  */
467 struct rte_eth_rxmode rx_mode;
468 
469 struct rte_eth_txmode tx_mode = {
470 	.offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
471 };
472 
473 struct rte_eth_fdir_conf fdir_conf = {
474 	.mode = RTE_FDIR_MODE_NONE,
475 	.pballoc = RTE_ETH_FDIR_PBALLOC_64K,
476 	.status = RTE_FDIR_REPORT_STATUS,
477 	.mask = {
478 		.vlan_tci_mask = 0xFFEF,
479 		.ipv4_mask     = {
480 			.src_ip = 0xFFFFFFFF,
481 			.dst_ip = 0xFFFFFFFF,
482 		},
483 		.ipv6_mask     = {
484 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
485 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
486 		},
487 		.src_port_mask = 0xFFFF,
488 		.dst_port_mask = 0xFFFF,
489 		.mac_addr_byte_mask = 0xFF,
490 		.tunnel_type_mask = 1,
491 		.tunnel_id_mask = 0xFFFFFFFF,
492 	},
493 	.drop_queue = 127,
494 };
495 
496 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
497 
498 /*
499  * Display zero values by default for xstats
500  */
501 uint8_t xstats_hide_zero;
502 
503 /*
504  * Measure of CPU cycles disabled by default
505  */
506 uint8_t record_core_cycles;
507 
508 /*
509  * Display of RX and TX bursts disabled by default
510  */
511 uint8_t record_burst_stats;
512 
513 /*
514  * Number of ports per shared Rx queue group, 0 disable.
515  */
516 uint32_t rxq_share;
517 
518 unsigned int num_sockets = 0;
519 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
520 
521 #ifdef RTE_LIB_BITRATESTATS
522 /* Bitrate statistics */
523 struct rte_stats_bitrates *bitrate_data;
524 lcoreid_t bitrate_lcore_id;
525 uint8_t bitrate_enabled;
526 #endif
527 
528 #ifdef RTE_LIB_GRO
529 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
530 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
531 #endif
532 
533 /*
534  * hexadecimal bitmask of RX mq mode can be enabled.
535  */
536 enum rte_eth_rx_mq_mode rx_mq_mode = RTE_ETH_MQ_RX_VMDQ_DCB_RSS;
537 
538 /*
539  * Used to set forced link speed
540  */
541 uint32_t eth_link_speed;
542 
543 /*
544  * ID of the current process in multi-process, used to
545  * configure the queues to be polled.
546  */
547 int proc_id;
548 
549 /*
550  * Number of processes in multi-process, used to
551  * configure the queues to be polled.
552  */
553 unsigned int num_procs = 1;
554 
555 static void
556 eth_rx_metadata_negotiate_mp(uint16_t port_id)
557 {
558 	uint64_t rx_meta_features = 0;
559 	int ret;
560 
561 	if (!is_proc_primary())
562 		return;
563 
564 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
565 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
566 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
567 
568 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
569 	if (ret == 0) {
570 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
571 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
572 				    port_id);
573 		}
574 
575 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
576 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
577 				    port_id);
578 		}
579 
580 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
581 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
582 				    port_id);
583 		}
584 	} else if (ret != -ENOTSUP) {
585 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
586 			 port_id, rte_strerror(-ret));
587 	}
588 }
589 
590 static int
591 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
592 		      const struct rte_eth_conf *dev_conf)
593 {
594 	if (is_proc_primary())
595 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
596 					dev_conf);
597 	return 0;
598 }
599 
600 static int
601 eth_dev_start_mp(uint16_t port_id)
602 {
603 	if (is_proc_primary())
604 		return rte_eth_dev_start(port_id);
605 
606 	return 0;
607 }
608 
609 static int
610 eth_dev_stop_mp(uint16_t port_id)
611 {
612 	if (is_proc_primary())
613 		return rte_eth_dev_stop(port_id);
614 
615 	return 0;
616 }
617 
618 static void
619 mempool_free_mp(struct rte_mempool *mp)
620 {
621 	if (is_proc_primary())
622 		rte_mempool_free(mp);
623 }
624 
625 static int
626 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
627 {
628 	if (is_proc_primary())
629 		return rte_eth_dev_set_mtu(port_id, mtu);
630 
631 	return 0;
632 }
633 
634 /* Forward function declarations */
635 static void setup_attached_port(portid_t pi);
636 static void check_all_ports_link_status(uint32_t port_mask);
637 static int eth_event_callback(portid_t port_id,
638 			      enum rte_eth_event_type type,
639 			      void *param, void *ret_param);
640 static void dev_event_callback(const char *device_name,
641 				enum rte_dev_event_type type,
642 				void *param);
643 static void fill_xstats_display_info(void);
644 
645 /*
646  * Check if all the ports are started.
647  * If yes, return positive value. If not, return zero.
648  */
649 static int all_ports_started(void);
650 
651 #ifdef RTE_LIB_GSO
652 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
653 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
654 #endif
655 
656 /* Holds the registered mbuf dynamic flags names. */
657 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
658 
659 
660 /*
661  * Helper function to check if socket is already discovered.
662  * If yes, return positive value. If not, return zero.
663  */
664 int
665 new_socket_id(unsigned int socket_id)
666 {
667 	unsigned int i;
668 
669 	for (i = 0; i < num_sockets; i++) {
670 		if (socket_ids[i] == socket_id)
671 			return 0;
672 	}
673 	return 1;
674 }
675 
676 /*
677  * Setup default configuration.
678  */
679 static void
680 set_default_fwd_lcores_config(void)
681 {
682 	unsigned int i;
683 	unsigned int nb_lc;
684 	unsigned int sock_num;
685 
686 	nb_lc = 0;
687 	for (i = 0; i < RTE_MAX_LCORE; i++) {
688 		if (!rte_lcore_is_enabled(i))
689 			continue;
690 		sock_num = rte_lcore_to_socket_id(i);
691 		if (new_socket_id(sock_num)) {
692 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
693 				rte_exit(EXIT_FAILURE,
694 					 "Total sockets greater than %u\n",
695 					 RTE_MAX_NUMA_NODES);
696 			}
697 			socket_ids[num_sockets++] = sock_num;
698 		}
699 		if (i == rte_get_main_lcore())
700 			continue;
701 		fwd_lcores_cpuids[nb_lc++] = i;
702 	}
703 	nb_lcores = (lcoreid_t) nb_lc;
704 	nb_cfg_lcores = nb_lcores;
705 	nb_fwd_lcores = 1;
706 }
707 
708 static void
709 set_def_peer_eth_addrs(void)
710 {
711 	portid_t i;
712 
713 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
714 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
715 		peer_eth_addrs[i].addr_bytes[5] = i;
716 	}
717 }
718 
719 static void
720 set_default_fwd_ports_config(void)
721 {
722 	portid_t pt_id;
723 	int i = 0;
724 
725 	RTE_ETH_FOREACH_DEV(pt_id) {
726 		fwd_ports_ids[i++] = pt_id;
727 
728 		/* Update sockets info according to the attached device */
729 		int socket_id = rte_eth_dev_socket_id(pt_id);
730 		if (socket_id >= 0 && new_socket_id(socket_id)) {
731 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
732 				rte_exit(EXIT_FAILURE,
733 					 "Total sockets greater than %u\n",
734 					 RTE_MAX_NUMA_NODES);
735 			}
736 			socket_ids[num_sockets++] = socket_id;
737 		}
738 	}
739 
740 	nb_cfg_ports = nb_ports;
741 	nb_fwd_ports = nb_ports;
742 }
743 
744 void
745 set_def_fwd_config(void)
746 {
747 	set_default_fwd_lcores_config();
748 	set_def_peer_eth_addrs();
749 	set_default_fwd_ports_config();
750 }
751 
752 #ifndef RTE_EXEC_ENV_WINDOWS
753 /* extremely pessimistic estimation of memory required to create a mempool */
754 static int
755 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
756 {
757 	unsigned int n_pages, mbuf_per_pg, leftover;
758 	uint64_t total_mem, mbuf_mem, obj_sz;
759 
760 	/* there is no good way to predict how much space the mempool will
761 	 * occupy because it will allocate chunks on the fly, and some of those
762 	 * will come from default DPDK memory while some will come from our
763 	 * external memory, so just assume 128MB will be enough for everyone.
764 	 */
765 	uint64_t hdr_mem = 128 << 20;
766 
767 	/* account for possible non-contiguousness */
768 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
769 	if (obj_sz > pgsz) {
770 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
771 		return -1;
772 	}
773 
774 	mbuf_per_pg = pgsz / obj_sz;
775 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
776 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
777 
778 	mbuf_mem = n_pages * pgsz;
779 
780 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
781 
782 	if (total_mem > SIZE_MAX) {
783 		TESTPMD_LOG(ERR, "Memory size too big\n");
784 		return -1;
785 	}
786 	*out = (size_t)total_mem;
787 
788 	return 0;
789 }
790 
791 static int
792 pagesz_flags(uint64_t page_sz)
793 {
794 	/* as per mmap() manpage, all page sizes are log2 of page size
795 	 * shifted by MAP_HUGE_SHIFT
796 	 */
797 	int log2 = rte_log2_u64(page_sz);
798 
799 	return (log2 << HUGE_SHIFT);
800 }
801 
802 static void *
803 alloc_mem(size_t memsz, size_t pgsz, bool huge)
804 {
805 	void *addr;
806 	int flags;
807 
808 	/* allocate anonymous hugepages */
809 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
810 	if (huge)
811 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
812 
813 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
814 	if (addr == MAP_FAILED)
815 		return NULL;
816 
817 	return addr;
818 }
819 
820 struct extmem_param {
821 	void *addr;
822 	size_t len;
823 	size_t pgsz;
824 	rte_iova_t *iova_table;
825 	unsigned int iova_table_len;
826 };
827 
828 static int
829 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
830 		bool huge)
831 {
832 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
833 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
834 	unsigned int cur_page, n_pages, pgsz_idx;
835 	size_t mem_sz, cur_pgsz;
836 	rte_iova_t *iovas = NULL;
837 	void *addr;
838 	int ret;
839 
840 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
841 		/* skip anything that is too big */
842 		if (pgsizes[pgsz_idx] > SIZE_MAX)
843 			continue;
844 
845 		cur_pgsz = pgsizes[pgsz_idx];
846 
847 		/* if we were told not to allocate hugepages, override */
848 		if (!huge)
849 			cur_pgsz = sysconf(_SC_PAGESIZE);
850 
851 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
852 		if (ret < 0) {
853 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
854 			return -1;
855 		}
856 
857 		/* allocate our memory */
858 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
859 
860 		/* if we couldn't allocate memory with a specified page size,
861 		 * that doesn't mean we can't do it with other page sizes, so
862 		 * try another one.
863 		 */
864 		if (addr == NULL)
865 			continue;
866 
867 		/* store IOVA addresses for every page in this memory area */
868 		n_pages = mem_sz / cur_pgsz;
869 
870 		iovas = malloc(sizeof(*iovas) * n_pages);
871 
872 		if (iovas == NULL) {
873 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
874 			goto fail;
875 		}
876 		/* lock memory if it's not huge pages */
877 		if (!huge)
878 			mlock(addr, mem_sz);
879 
880 		/* populate IOVA addresses */
881 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
882 			rte_iova_t iova;
883 			size_t offset;
884 			void *cur;
885 
886 			offset = cur_pgsz * cur_page;
887 			cur = RTE_PTR_ADD(addr, offset);
888 
889 			/* touch the page before getting its IOVA */
890 			*(volatile char *)cur = 0;
891 
892 			iova = rte_mem_virt2iova(cur);
893 
894 			iovas[cur_page] = iova;
895 		}
896 
897 		break;
898 	}
899 	/* if we couldn't allocate anything */
900 	if (iovas == NULL)
901 		return -1;
902 
903 	param->addr = addr;
904 	param->len = mem_sz;
905 	param->pgsz = cur_pgsz;
906 	param->iova_table = iovas;
907 	param->iova_table_len = n_pages;
908 
909 	return 0;
910 fail:
911 	free(iovas);
912 	if (addr)
913 		munmap(addr, mem_sz);
914 
915 	return -1;
916 }
917 
918 static int
919 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
920 {
921 	struct extmem_param param;
922 	int socket_id, ret;
923 
924 	memset(&param, 0, sizeof(param));
925 
926 	/* check if our heap exists */
927 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
928 	if (socket_id < 0) {
929 		/* create our heap */
930 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
931 		if (ret < 0) {
932 			TESTPMD_LOG(ERR, "Cannot create heap\n");
933 			return -1;
934 		}
935 	}
936 
937 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
938 	if (ret < 0) {
939 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
940 		return -1;
941 	}
942 
943 	/* we now have a valid memory area, so add it to heap */
944 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
945 			param.addr, param.len, param.iova_table,
946 			param.iova_table_len, param.pgsz);
947 
948 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
949 
950 	/* not needed any more */
951 	free(param.iova_table);
952 
953 	if (ret < 0) {
954 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
955 		munmap(param.addr, param.len);
956 		return -1;
957 	}
958 
959 	/* success */
960 
961 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
962 			param.len >> 20);
963 
964 	return 0;
965 }
966 static void
967 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
968 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
969 {
970 	uint16_t pid = 0;
971 	int ret;
972 
973 	RTE_ETH_FOREACH_DEV(pid) {
974 		struct rte_eth_dev_info dev_info;
975 
976 		ret = eth_dev_info_get_print_err(pid, &dev_info);
977 		if (ret != 0) {
978 			TESTPMD_LOG(DEBUG,
979 				    "unable to get device info for port %d on addr 0x%p,"
980 				    "mempool unmapping will not be performed\n",
981 				    pid, memhdr->addr);
982 			continue;
983 		}
984 
985 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
986 		if (ret) {
987 			TESTPMD_LOG(DEBUG,
988 				    "unable to DMA unmap addr 0x%p "
989 				    "for device %s\n",
990 				    memhdr->addr, dev_info.device->name);
991 		}
992 	}
993 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
994 	if (ret) {
995 		TESTPMD_LOG(DEBUG,
996 			    "unable to un-register addr 0x%p\n", memhdr->addr);
997 	}
998 }
999 
1000 static void
1001 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1002 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1003 {
1004 	uint16_t pid = 0;
1005 	size_t page_size = sysconf(_SC_PAGESIZE);
1006 	int ret;
1007 
1008 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1009 				  page_size);
1010 	if (ret) {
1011 		TESTPMD_LOG(DEBUG,
1012 			    "unable to register addr 0x%p\n", memhdr->addr);
1013 		return;
1014 	}
1015 	RTE_ETH_FOREACH_DEV(pid) {
1016 		struct rte_eth_dev_info dev_info;
1017 
1018 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1019 		if (ret != 0) {
1020 			TESTPMD_LOG(DEBUG,
1021 				    "unable to get device info for port %d on addr 0x%p,"
1022 				    "mempool mapping will not be performed\n",
1023 				    pid, memhdr->addr);
1024 			continue;
1025 		}
1026 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1027 		if (ret) {
1028 			TESTPMD_LOG(DEBUG,
1029 				    "unable to DMA map addr 0x%p "
1030 				    "for device %s\n",
1031 				    memhdr->addr, dev_info.device->name);
1032 		}
1033 	}
1034 }
1035 #endif
1036 
1037 static unsigned int
1038 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1039 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1040 {
1041 	struct rte_pktmbuf_extmem *xmem;
1042 	unsigned int ext_num, zone_num, elt_num;
1043 	uint16_t elt_size;
1044 
1045 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1046 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1047 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1048 
1049 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1050 	if (xmem == NULL) {
1051 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1052 				 "external buffer descriptors\n");
1053 		*ext_mem = NULL;
1054 		return 0;
1055 	}
1056 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1057 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1058 		const struct rte_memzone *mz;
1059 		char mz_name[RTE_MEMZONE_NAMESIZE];
1060 		int ret;
1061 
1062 		ret = snprintf(mz_name, sizeof(mz_name),
1063 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1064 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1065 			errno = ENAMETOOLONG;
1066 			ext_num = 0;
1067 			break;
1068 		}
1069 		mz = rte_memzone_reserve(mz_name, EXTBUF_ZONE_SIZE,
1070 					 socket_id,
1071 					 RTE_MEMZONE_IOVA_CONTIG |
1072 					 RTE_MEMZONE_1GB |
1073 					 RTE_MEMZONE_SIZE_HINT_ONLY);
1074 		if (mz == NULL) {
1075 			/*
1076 			 * The caller exits on external buffer creation
1077 			 * error, so there is no need to free memzones.
1078 			 */
1079 			errno = ENOMEM;
1080 			ext_num = 0;
1081 			break;
1082 		}
1083 		xseg->buf_ptr = mz->addr;
1084 		xseg->buf_iova = mz->iova;
1085 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1086 		xseg->elt_size = elt_size;
1087 	}
1088 	if (ext_num == 0 && xmem != NULL) {
1089 		free(xmem);
1090 		xmem = NULL;
1091 	}
1092 	*ext_mem = xmem;
1093 	return ext_num;
1094 }
1095 
1096 /*
1097  * Configuration initialisation done once at init time.
1098  */
1099 static struct rte_mempool *
1100 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1101 		 unsigned int socket_id, uint16_t size_idx)
1102 {
1103 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1104 	struct rte_mempool *rte_mp = NULL;
1105 #ifndef RTE_EXEC_ENV_WINDOWS
1106 	uint32_t mb_size;
1107 
1108 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1109 #endif
1110 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1111 	if (!is_proc_primary()) {
1112 		rte_mp = rte_mempool_lookup(pool_name);
1113 		if (rte_mp == NULL)
1114 			rte_exit(EXIT_FAILURE,
1115 				"Get mbuf pool for socket %u failed: %s\n",
1116 				socket_id, rte_strerror(rte_errno));
1117 		return rte_mp;
1118 	}
1119 
1120 	TESTPMD_LOG(INFO,
1121 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1122 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1123 
1124 	switch (mp_alloc_type) {
1125 	case MP_ALLOC_NATIVE:
1126 		{
1127 			/* wrapper to rte_mempool_create() */
1128 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1129 					rte_mbuf_best_mempool_ops());
1130 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1131 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1132 			break;
1133 		}
1134 #ifndef RTE_EXEC_ENV_WINDOWS
1135 	case MP_ALLOC_ANON:
1136 		{
1137 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1138 				mb_size, (unsigned int) mb_mempool_cache,
1139 				sizeof(struct rte_pktmbuf_pool_private),
1140 				socket_id, mempool_flags);
1141 			if (rte_mp == NULL)
1142 				goto err;
1143 
1144 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1145 				rte_mempool_free(rte_mp);
1146 				rte_mp = NULL;
1147 				goto err;
1148 			}
1149 			rte_pktmbuf_pool_init(rte_mp, NULL);
1150 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1151 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1152 			break;
1153 		}
1154 	case MP_ALLOC_XMEM:
1155 	case MP_ALLOC_XMEM_HUGE:
1156 		{
1157 			int heap_socket;
1158 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1159 
1160 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1161 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1162 
1163 			heap_socket =
1164 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1165 			if (heap_socket < 0)
1166 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1167 
1168 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1169 					rte_mbuf_best_mempool_ops());
1170 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1171 					mb_mempool_cache, 0, mbuf_seg_size,
1172 					heap_socket);
1173 			break;
1174 		}
1175 #endif
1176 	case MP_ALLOC_XBUF:
1177 		{
1178 			struct rte_pktmbuf_extmem *ext_mem;
1179 			unsigned int ext_num;
1180 
1181 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1182 					       socket_id, pool_name, &ext_mem);
1183 			if (ext_num == 0)
1184 				rte_exit(EXIT_FAILURE,
1185 					 "Can't create pinned data buffers\n");
1186 
1187 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1188 					rte_mbuf_best_mempool_ops());
1189 			rte_mp = rte_pktmbuf_pool_create_extbuf
1190 					(pool_name, nb_mbuf, mb_mempool_cache,
1191 					 0, mbuf_seg_size, socket_id,
1192 					 ext_mem, ext_num);
1193 			free(ext_mem);
1194 			break;
1195 		}
1196 	default:
1197 		{
1198 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1199 		}
1200 	}
1201 
1202 #ifndef RTE_EXEC_ENV_WINDOWS
1203 err:
1204 #endif
1205 	if (rte_mp == NULL) {
1206 		rte_exit(EXIT_FAILURE,
1207 			"Creation of mbuf pool for socket %u failed: %s\n",
1208 			socket_id, rte_strerror(rte_errno));
1209 	} else if (verbose_level > 0) {
1210 		rte_mempool_dump(stdout, rte_mp);
1211 	}
1212 	return rte_mp;
1213 }
1214 
1215 /*
1216  * Check given socket id is valid or not with NUMA mode,
1217  * if valid, return 0, else return -1
1218  */
1219 static int
1220 check_socket_id(const unsigned int socket_id)
1221 {
1222 	static int warning_once = 0;
1223 
1224 	if (new_socket_id(socket_id)) {
1225 		if (!warning_once && numa_support)
1226 			fprintf(stderr,
1227 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1228 		warning_once = 1;
1229 		return -1;
1230 	}
1231 	return 0;
1232 }
1233 
1234 /*
1235  * Get the allowed maximum number of RX queues.
1236  * *pid return the port id which has minimal value of
1237  * max_rx_queues in all ports.
1238  */
1239 queueid_t
1240 get_allowed_max_nb_rxq(portid_t *pid)
1241 {
1242 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1243 	bool max_rxq_valid = false;
1244 	portid_t pi;
1245 	struct rte_eth_dev_info dev_info;
1246 
1247 	RTE_ETH_FOREACH_DEV(pi) {
1248 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1249 			continue;
1250 
1251 		max_rxq_valid = true;
1252 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1253 			allowed_max_rxq = dev_info.max_rx_queues;
1254 			*pid = pi;
1255 		}
1256 	}
1257 	return max_rxq_valid ? allowed_max_rxq : 0;
1258 }
1259 
1260 /*
1261  * Check input rxq is valid or not.
1262  * If input rxq is not greater than any of maximum number
1263  * of RX queues of all ports, it is valid.
1264  * if valid, return 0, else return -1
1265  */
1266 int
1267 check_nb_rxq(queueid_t rxq)
1268 {
1269 	queueid_t allowed_max_rxq;
1270 	portid_t pid = 0;
1271 
1272 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1273 	if (rxq > allowed_max_rxq) {
1274 		fprintf(stderr,
1275 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1276 			rxq, allowed_max_rxq, pid);
1277 		return -1;
1278 	}
1279 	return 0;
1280 }
1281 
1282 /*
1283  * Get the allowed maximum number of TX queues.
1284  * *pid return the port id which has minimal value of
1285  * max_tx_queues in all ports.
1286  */
1287 queueid_t
1288 get_allowed_max_nb_txq(portid_t *pid)
1289 {
1290 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1291 	bool max_txq_valid = false;
1292 	portid_t pi;
1293 	struct rte_eth_dev_info dev_info;
1294 
1295 	RTE_ETH_FOREACH_DEV(pi) {
1296 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1297 			continue;
1298 
1299 		max_txq_valid = true;
1300 		if (dev_info.max_tx_queues < allowed_max_txq) {
1301 			allowed_max_txq = dev_info.max_tx_queues;
1302 			*pid = pi;
1303 		}
1304 	}
1305 	return max_txq_valid ? allowed_max_txq : 0;
1306 }
1307 
1308 /*
1309  * Check input txq is valid or not.
1310  * If input txq is not greater than any of maximum number
1311  * of TX queues of all ports, it is valid.
1312  * if valid, return 0, else return -1
1313  */
1314 int
1315 check_nb_txq(queueid_t txq)
1316 {
1317 	queueid_t allowed_max_txq;
1318 	portid_t pid = 0;
1319 
1320 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1321 	if (txq > allowed_max_txq) {
1322 		fprintf(stderr,
1323 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1324 			txq, allowed_max_txq, pid);
1325 		return -1;
1326 	}
1327 	return 0;
1328 }
1329 
1330 /*
1331  * Get the allowed maximum number of RXDs of every rx queue.
1332  * *pid return the port id which has minimal value of
1333  * max_rxd in all queues of all ports.
1334  */
1335 static uint16_t
1336 get_allowed_max_nb_rxd(portid_t *pid)
1337 {
1338 	uint16_t allowed_max_rxd = UINT16_MAX;
1339 	portid_t pi;
1340 	struct rte_eth_dev_info dev_info;
1341 
1342 	RTE_ETH_FOREACH_DEV(pi) {
1343 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1344 			continue;
1345 
1346 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1347 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1348 			*pid = pi;
1349 		}
1350 	}
1351 	return allowed_max_rxd;
1352 }
1353 
1354 /*
1355  * Get the allowed minimal number of RXDs of every rx queue.
1356  * *pid return the port id which has minimal value of
1357  * min_rxd in all queues of all ports.
1358  */
1359 static uint16_t
1360 get_allowed_min_nb_rxd(portid_t *pid)
1361 {
1362 	uint16_t allowed_min_rxd = 0;
1363 	portid_t pi;
1364 	struct rte_eth_dev_info dev_info;
1365 
1366 	RTE_ETH_FOREACH_DEV(pi) {
1367 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1368 			continue;
1369 
1370 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1371 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1372 			*pid = pi;
1373 		}
1374 	}
1375 
1376 	return allowed_min_rxd;
1377 }
1378 
1379 /*
1380  * Check input rxd is valid or not.
1381  * If input rxd is not greater than any of maximum number
1382  * of RXDs of every Rx queues and is not less than any of
1383  * minimal number of RXDs of every Rx queues, it is valid.
1384  * if valid, return 0, else return -1
1385  */
1386 int
1387 check_nb_rxd(queueid_t rxd)
1388 {
1389 	uint16_t allowed_max_rxd;
1390 	uint16_t allowed_min_rxd;
1391 	portid_t pid = 0;
1392 
1393 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1394 	if (rxd > allowed_max_rxd) {
1395 		fprintf(stderr,
1396 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1397 			rxd, allowed_max_rxd, pid);
1398 		return -1;
1399 	}
1400 
1401 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1402 	if (rxd < allowed_min_rxd) {
1403 		fprintf(stderr,
1404 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1405 			rxd, allowed_min_rxd, pid);
1406 		return -1;
1407 	}
1408 
1409 	return 0;
1410 }
1411 
1412 /*
1413  * Get the allowed maximum number of TXDs of every rx queues.
1414  * *pid return the port id which has minimal value of
1415  * max_txd in every tx queue.
1416  */
1417 static uint16_t
1418 get_allowed_max_nb_txd(portid_t *pid)
1419 {
1420 	uint16_t allowed_max_txd = UINT16_MAX;
1421 	portid_t pi;
1422 	struct rte_eth_dev_info dev_info;
1423 
1424 	RTE_ETH_FOREACH_DEV(pi) {
1425 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1426 			continue;
1427 
1428 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1429 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1430 			*pid = pi;
1431 		}
1432 	}
1433 	return allowed_max_txd;
1434 }
1435 
1436 /*
1437  * Get the allowed maximum number of TXDs of every tx queues.
1438  * *pid return the port id which has minimal value of
1439  * min_txd in every tx queue.
1440  */
1441 static uint16_t
1442 get_allowed_min_nb_txd(portid_t *pid)
1443 {
1444 	uint16_t allowed_min_txd = 0;
1445 	portid_t pi;
1446 	struct rte_eth_dev_info dev_info;
1447 
1448 	RTE_ETH_FOREACH_DEV(pi) {
1449 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1450 			continue;
1451 
1452 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1453 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1454 			*pid = pi;
1455 		}
1456 	}
1457 
1458 	return allowed_min_txd;
1459 }
1460 
1461 /*
1462  * Check input txd is valid or not.
1463  * If input txd is not greater than any of maximum number
1464  * of TXDs of every Rx queues, it is valid.
1465  * if valid, return 0, else return -1
1466  */
1467 int
1468 check_nb_txd(queueid_t txd)
1469 {
1470 	uint16_t allowed_max_txd;
1471 	uint16_t allowed_min_txd;
1472 	portid_t pid = 0;
1473 
1474 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1475 	if (txd > allowed_max_txd) {
1476 		fprintf(stderr,
1477 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1478 			txd, allowed_max_txd, pid);
1479 		return -1;
1480 	}
1481 
1482 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1483 	if (txd < allowed_min_txd) {
1484 		fprintf(stderr,
1485 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1486 			txd, allowed_min_txd, pid);
1487 		return -1;
1488 	}
1489 	return 0;
1490 }
1491 
1492 
1493 /*
1494  * Get the allowed maximum number of hairpin queues.
1495  * *pid return the port id which has minimal value of
1496  * max_hairpin_queues in all ports.
1497  */
1498 queueid_t
1499 get_allowed_max_nb_hairpinq(portid_t *pid)
1500 {
1501 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1502 	portid_t pi;
1503 	struct rte_eth_hairpin_cap cap;
1504 
1505 	RTE_ETH_FOREACH_DEV(pi) {
1506 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1507 			*pid = pi;
1508 			return 0;
1509 		}
1510 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1511 			allowed_max_hairpinq = cap.max_nb_queues;
1512 			*pid = pi;
1513 		}
1514 	}
1515 	return allowed_max_hairpinq;
1516 }
1517 
1518 /*
1519  * Check input hairpin is valid or not.
1520  * If input hairpin is not greater than any of maximum number
1521  * of hairpin queues of all ports, it is valid.
1522  * if valid, return 0, else return -1
1523  */
1524 int
1525 check_nb_hairpinq(queueid_t hairpinq)
1526 {
1527 	queueid_t allowed_max_hairpinq;
1528 	portid_t pid = 0;
1529 
1530 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1531 	if (hairpinq > allowed_max_hairpinq) {
1532 		fprintf(stderr,
1533 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1534 			hairpinq, allowed_max_hairpinq, pid);
1535 		return -1;
1536 	}
1537 	return 0;
1538 }
1539 
1540 static int
1541 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1542 {
1543 	uint32_t eth_overhead;
1544 
1545 	if (dev_info->max_mtu != UINT16_MAX &&
1546 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1547 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1548 	else
1549 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1550 
1551 	return eth_overhead;
1552 }
1553 
1554 static void
1555 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1556 {
1557 	struct rte_port *port = &ports[pid];
1558 	int ret;
1559 	int i;
1560 
1561 	eth_rx_metadata_negotiate_mp(pid);
1562 
1563 	port->dev_conf.txmode = tx_mode;
1564 	port->dev_conf.rxmode = rx_mode;
1565 
1566 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1567 	if (ret != 0)
1568 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1569 
1570 	if (!(port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE))
1571 		port->dev_conf.txmode.offloads &=
1572 			~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1573 
1574 	/* Apply Rx offloads configuration */
1575 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1576 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1577 	/* Apply Tx offloads configuration */
1578 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1579 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1580 
1581 	if (eth_link_speed)
1582 		port->dev_conf.link_speeds = eth_link_speed;
1583 
1584 	if (max_rx_pkt_len)
1585 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1586 			get_eth_overhead(&port->dev_info);
1587 
1588 	/* set flag to initialize port/queue */
1589 	port->need_reconfig = 1;
1590 	port->need_reconfig_queues = 1;
1591 	port->socket_id = socket_id;
1592 	port->tx_metadata = 0;
1593 
1594 	/*
1595 	 * Check for maximum number of segments per MTU.
1596 	 * Accordingly update the mbuf data size.
1597 	 */
1598 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1599 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1600 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1601 		uint16_t mtu;
1602 
1603 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1604 			uint16_t data_size = (mtu + eth_overhead) /
1605 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1606 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1607 
1608 			if (buffer_size > mbuf_data_size[0]) {
1609 				mbuf_data_size[0] = buffer_size;
1610 				TESTPMD_LOG(WARNING,
1611 					"Configured mbuf size of the first segment %hu\n",
1612 					mbuf_data_size[0]);
1613 			}
1614 		}
1615 	}
1616 }
1617 
1618 static void
1619 init_config(void)
1620 {
1621 	portid_t pid;
1622 	struct rte_mempool *mbp;
1623 	unsigned int nb_mbuf_per_pool;
1624 	lcoreid_t  lc_id;
1625 #ifdef RTE_LIB_GRO
1626 	struct rte_gro_param gro_param;
1627 #endif
1628 #ifdef RTE_LIB_GSO
1629 	uint32_t gso_types;
1630 #endif
1631 
1632 	/* Configuration of logical cores. */
1633 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1634 				sizeof(struct fwd_lcore *) * nb_lcores,
1635 				RTE_CACHE_LINE_SIZE);
1636 	if (fwd_lcores == NULL) {
1637 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1638 							"failed\n", nb_lcores);
1639 	}
1640 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1641 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1642 					       sizeof(struct fwd_lcore),
1643 					       RTE_CACHE_LINE_SIZE);
1644 		if (fwd_lcores[lc_id] == NULL) {
1645 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1646 								"failed\n");
1647 		}
1648 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1649 	}
1650 
1651 	RTE_ETH_FOREACH_DEV(pid) {
1652 		uint32_t socket_id;
1653 
1654 		if (numa_support) {
1655 			socket_id = port_numa[pid];
1656 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1657 				socket_id = rte_eth_dev_socket_id(pid);
1658 
1659 				/*
1660 				 * if socket_id is invalid,
1661 				 * set to the first available socket.
1662 				 */
1663 				if (check_socket_id(socket_id) < 0)
1664 					socket_id = socket_ids[0];
1665 			}
1666 		} else {
1667 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1668 				    0 : socket_num;
1669 		}
1670 		/* Apply default TxRx configuration for all ports */
1671 		init_config_port_offloads(pid, socket_id);
1672 	}
1673 	/*
1674 	 * Create pools of mbuf.
1675 	 * If NUMA support is disabled, create a single pool of mbuf in
1676 	 * socket 0 memory by default.
1677 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1678 	 *
1679 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1680 	 * nb_txd can be configured at run time.
1681 	 */
1682 	if (param_total_num_mbufs)
1683 		nb_mbuf_per_pool = param_total_num_mbufs;
1684 	else {
1685 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1686 			(nb_lcores * mb_mempool_cache) +
1687 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1688 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1689 	}
1690 
1691 	if (numa_support) {
1692 		uint8_t i, j;
1693 
1694 		for (i = 0; i < num_sockets; i++)
1695 			for (j = 0; j < mbuf_data_size_n; j++)
1696 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1697 					mbuf_pool_create(mbuf_data_size[j],
1698 							  nb_mbuf_per_pool,
1699 							  socket_ids[i], j);
1700 	} else {
1701 		uint8_t i;
1702 
1703 		for (i = 0; i < mbuf_data_size_n; i++)
1704 			mempools[i] = mbuf_pool_create
1705 					(mbuf_data_size[i],
1706 					 nb_mbuf_per_pool,
1707 					 socket_num == UMA_NO_CONFIG ?
1708 					 0 : socket_num, i);
1709 	}
1710 
1711 	init_port_config();
1712 
1713 #ifdef RTE_LIB_GSO
1714 	gso_types = RTE_ETH_TX_OFFLOAD_TCP_TSO | RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
1715 		RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | RTE_ETH_TX_OFFLOAD_UDP_TSO;
1716 #endif
1717 	/*
1718 	 * Records which Mbuf pool to use by each logical core, if needed.
1719 	 */
1720 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1721 		mbp = mbuf_pool_find(
1722 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1723 
1724 		if (mbp == NULL)
1725 			mbp = mbuf_pool_find(0, 0);
1726 		fwd_lcores[lc_id]->mbp = mbp;
1727 #ifdef RTE_LIB_GSO
1728 		/* initialize GSO context */
1729 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1730 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1731 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1732 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1733 			RTE_ETHER_CRC_LEN;
1734 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1735 #endif
1736 	}
1737 
1738 	fwd_config_setup();
1739 
1740 #ifdef RTE_LIB_GRO
1741 	/* create a gro context for each lcore */
1742 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1743 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1744 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1745 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1746 		gro_param.socket_id = rte_lcore_to_socket_id(
1747 				fwd_lcores_cpuids[lc_id]);
1748 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1749 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1750 			rte_exit(EXIT_FAILURE,
1751 					"rte_gro_ctx_create() failed\n");
1752 		}
1753 	}
1754 #endif
1755 }
1756 
1757 
1758 void
1759 reconfig(portid_t new_port_id, unsigned socket_id)
1760 {
1761 	/* Reconfiguration of Ethernet ports. */
1762 	init_config_port_offloads(new_port_id, socket_id);
1763 	init_port_config();
1764 }
1765 
1766 
1767 int
1768 init_fwd_streams(void)
1769 {
1770 	portid_t pid;
1771 	struct rte_port *port;
1772 	streamid_t sm_id, nb_fwd_streams_new;
1773 	queueid_t q;
1774 
1775 	/* set socket id according to numa or not */
1776 	RTE_ETH_FOREACH_DEV(pid) {
1777 		port = &ports[pid];
1778 		if (nb_rxq > port->dev_info.max_rx_queues) {
1779 			fprintf(stderr,
1780 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1781 				nb_rxq, port->dev_info.max_rx_queues);
1782 			return -1;
1783 		}
1784 		if (nb_txq > port->dev_info.max_tx_queues) {
1785 			fprintf(stderr,
1786 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1787 				nb_txq, port->dev_info.max_tx_queues);
1788 			return -1;
1789 		}
1790 		if (numa_support) {
1791 			if (port_numa[pid] != NUMA_NO_CONFIG)
1792 				port->socket_id = port_numa[pid];
1793 			else {
1794 				port->socket_id = rte_eth_dev_socket_id(pid);
1795 
1796 				/*
1797 				 * if socket_id is invalid,
1798 				 * set to the first available socket.
1799 				 */
1800 				if (check_socket_id(port->socket_id) < 0)
1801 					port->socket_id = socket_ids[0];
1802 			}
1803 		}
1804 		else {
1805 			if (socket_num == UMA_NO_CONFIG)
1806 				port->socket_id = 0;
1807 			else
1808 				port->socket_id = socket_num;
1809 		}
1810 	}
1811 
1812 	q = RTE_MAX(nb_rxq, nb_txq);
1813 	if (q == 0) {
1814 		fprintf(stderr,
1815 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1816 		return -1;
1817 	}
1818 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1819 	if (nb_fwd_streams_new == nb_fwd_streams)
1820 		return 0;
1821 	/* clear the old */
1822 	if (fwd_streams != NULL) {
1823 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1824 			if (fwd_streams[sm_id] == NULL)
1825 				continue;
1826 			rte_free(fwd_streams[sm_id]);
1827 			fwd_streams[sm_id] = NULL;
1828 		}
1829 		rte_free(fwd_streams);
1830 		fwd_streams = NULL;
1831 	}
1832 
1833 	/* init new */
1834 	nb_fwd_streams = nb_fwd_streams_new;
1835 	if (nb_fwd_streams) {
1836 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1837 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1838 			RTE_CACHE_LINE_SIZE);
1839 		if (fwd_streams == NULL)
1840 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1841 				 " (struct fwd_stream *)) failed\n",
1842 				 nb_fwd_streams);
1843 
1844 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1845 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1846 				" struct fwd_stream", sizeof(struct fwd_stream),
1847 				RTE_CACHE_LINE_SIZE);
1848 			if (fwd_streams[sm_id] == NULL)
1849 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1850 					 "(struct fwd_stream) failed\n");
1851 		}
1852 	}
1853 
1854 	return 0;
1855 }
1856 
1857 static void
1858 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1859 {
1860 	uint64_t total_burst, sburst;
1861 	uint64_t nb_burst;
1862 	uint64_t burst_stats[4];
1863 	uint16_t pktnb_stats[4];
1864 	uint16_t nb_pkt;
1865 	int burst_percent[4], sburstp;
1866 	int i;
1867 
1868 	/*
1869 	 * First compute the total number of packet bursts and the
1870 	 * two highest numbers of bursts of the same number of packets.
1871 	 */
1872 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1873 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1874 
1875 	/* Show stats for 0 burst size always */
1876 	total_burst = pbs->pkt_burst_spread[0];
1877 	burst_stats[0] = pbs->pkt_burst_spread[0];
1878 	pktnb_stats[0] = 0;
1879 
1880 	/* Find the next 2 burst sizes with highest occurrences. */
1881 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST + 1; nb_pkt++) {
1882 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1883 
1884 		if (nb_burst == 0)
1885 			continue;
1886 
1887 		total_burst += nb_burst;
1888 
1889 		if (nb_burst > burst_stats[1]) {
1890 			burst_stats[2] = burst_stats[1];
1891 			pktnb_stats[2] = pktnb_stats[1];
1892 			burst_stats[1] = nb_burst;
1893 			pktnb_stats[1] = nb_pkt;
1894 		} else if (nb_burst > burst_stats[2]) {
1895 			burst_stats[2] = nb_burst;
1896 			pktnb_stats[2] = nb_pkt;
1897 		}
1898 	}
1899 	if (total_burst == 0)
1900 		return;
1901 
1902 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1903 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1904 		if (i == 3) {
1905 			printf("%d%% of other]\n", 100 - sburstp);
1906 			return;
1907 		}
1908 
1909 		sburst += burst_stats[i];
1910 		if (sburst == total_burst) {
1911 			printf("%d%% of %d pkts]\n",
1912 				100 - sburstp, (int) pktnb_stats[i]);
1913 			return;
1914 		}
1915 
1916 		burst_percent[i] =
1917 			(double)burst_stats[i] / total_burst * 100;
1918 		printf("%d%% of %d pkts + ",
1919 			burst_percent[i], (int) pktnb_stats[i]);
1920 		sburstp += burst_percent[i];
1921 	}
1922 }
1923 
1924 static void
1925 fwd_stream_stats_display(streamid_t stream_id)
1926 {
1927 	struct fwd_stream *fs;
1928 	static const char *fwd_top_stats_border = "-------";
1929 
1930 	fs = fwd_streams[stream_id];
1931 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1932 	    (fs->fwd_dropped == 0))
1933 		return;
1934 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1935 	       "TX Port=%2d/Queue=%2d %s\n",
1936 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1937 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1938 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1939 	       " TX-dropped: %-14"PRIu64,
1940 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1941 
1942 	/* if checksum mode */
1943 	if (cur_fwd_eng == &csum_fwd_engine) {
1944 		printf("  RX- bad IP checksum: %-14"PRIu64
1945 		       "  Rx- bad L4 checksum: %-14"PRIu64
1946 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1947 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1948 			fs->rx_bad_outer_l4_csum);
1949 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1950 			fs->rx_bad_outer_ip_csum);
1951 	} else {
1952 		printf("\n");
1953 	}
1954 
1955 	if (record_burst_stats) {
1956 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1957 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1958 	}
1959 }
1960 
1961 void
1962 fwd_stats_display(void)
1963 {
1964 	static const char *fwd_stats_border = "----------------------";
1965 	static const char *acc_stats_border = "+++++++++++++++";
1966 	struct {
1967 		struct fwd_stream *rx_stream;
1968 		struct fwd_stream *tx_stream;
1969 		uint64_t tx_dropped;
1970 		uint64_t rx_bad_ip_csum;
1971 		uint64_t rx_bad_l4_csum;
1972 		uint64_t rx_bad_outer_l4_csum;
1973 		uint64_t rx_bad_outer_ip_csum;
1974 	} ports_stats[RTE_MAX_ETHPORTS];
1975 	uint64_t total_rx_dropped = 0;
1976 	uint64_t total_tx_dropped = 0;
1977 	uint64_t total_rx_nombuf = 0;
1978 	struct rte_eth_stats stats;
1979 	uint64_t fwd_cycles = 0;
1980 	uint64_t total_recv = 0;
1981 	uint64_t total_xmit = 0;
1982 	struct rte_port *port;
1983 	streamid_t sm_id;
1984 	portid_t pt_id;
1985 	int ret;
1986 	int i;
1987 
1988 	memset(ports_stats, 0, sizeof(ports_stats));
1989 
1990 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1991 		struct fwd_stream *fs = fwd_streams[sm_id];
1992 
1993 		if (cur_fwd_config.nb_fwd_streams >
1994 		    cur_fwd_config.nb_fwd_ports) {
1995 			fwd_stream_stats_display(sm_id);
1996 		} else {
1997 			ports_stats[fs->tx_port].tx_stream = fs;
1998 			ports_stats[fs->rx_port].rx_stream = fs;
1999 		}
2000 
2001 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2002 
2003 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2004 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2005 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2006 				fs->rx_bad_outer_l4_csum;
2007 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2008 				fs->rx_bad_outer_ip_csum;
2009 
2010 		if (record_core_cycles)
2011 			fwd_cycles += fs->core_cycles;
2012 	}
2013 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2014 		pt_id = fwd_ports_ids[i];
2015 		port = &ports[pt_id];
2016 
2017 		ret = rte_eth_stats_get(pt_id, &stats);
2018 		if (ret != 0) {
2019 			fprintf(stderr,
2020 				"%s: Error: failed to get stats (port %u): %d",
2021 				__func__, pt_id, ret);
2022 			continue;
2023 		}
2024 		stats.ipackets -= port->stats.ipackets;
2025 		stats.opackets -= port->stats.opackets;
2026 		stats.ibytes -= port->stats.ibytes;
2027 		stats.obytes -= port->stats.obytes;
2028 		stats.imissed -= port->stats.imissed;
2029 		stats.oerrors -= port->stats.oerrors;
2030 		stats.rx_nombuf -= port->stats.rx_nombuf;
2031 
2032 		total_recv += stats.ipackets;
2033 		total_xmit += stats.opackets;
2034 		total_rx_dropped += stats.imissed;
2035 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
2036 		total_tx_dropped += stats.oerrors;
2037 		total_rx_nombuf  += stats.rx_nombuf;
2038 
2039 		printf("\n  %s Forward statistics for port %-2d %s\n",
2040 		       fwd_stats_border, pt_id, fwd_stats_border);
2041 
2042 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2043 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2044 		       stats.ipackets + stats.imissed);
2045 
2046 		if (cur_fwd_eng == &csum_fwd_engine) {
2047 			printf("  Bad-ipcsum: %-14"PRIu64
2048 			       " Bad-l4csum: %-14"PRIu64
2049 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2050 			       ports_stats[pt_id].rx_bad_ip_csum,
2051 			       ports_stats[pt_id].rx_bad_l4_csum,
2052 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2053 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2054 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2055 		}
2056 		if (stats.ierrors + stats.rx_nombuf > 0) {
2057 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2058 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2059 		}
2060 
2061 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2062 		       "TX-total: %-"PRIu64"\n",
2063 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2064 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2065 
2066 		if (record_burst_stats) {
2067 			if (ports_stats[pt_id].rx_stream)
2068 				pkt_burst_stats_display("RX",
2069 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2070 			if (ports_stats[pt_id].tx_stream)
2071 				pkt_burst_stats_display("TX",
2072 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2073 		}
2074 
2075 		printf("  %s--------------------------------%s\n",
2076 		       fwd_stats_border, fwd_stats_border);
2077 	}
2078 
2079 	printf("\n  %s Accumulated forward statistics for all ports"
2080 	       "%s\n",
2081 	       acc_stats_border, acc_stats_border);
2082 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2083 	       "%-"PRIu64"\n"
2084 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2085 	       "%-"PRIu64"\n",
2086 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2087 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2088 	if (total_rx_nombuf > 0)
2089 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2090 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2091 	       "%s\n",
2092 	       acc_stats_border, acc_stats_border);
2093 	if (record_core_cycles) {
2094 #define CYC_PER_MHZ 1E6
2095 		if (total_recv > 0 || total_xmit > 0) {
2096 			uint64_t total_pkts = 0;
2097 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2098 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2099 				total_pkts = total_xmit;
2100 			else
2101 				total_pkts = total_recv;
2102 
2103 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2104 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2105 			       " MHz Clock\n",
2106 			       (double) fwd_cycles / total_pkts,
2107 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2108 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2109 		}
2110 	}
2111 }
2112 
2113 void
2114 fwd_stats_reset(void)
2115 {
2116 	streamid_t sm_id;
2117 	portid_t pt_id;
2118 	int ret;
2119 	int i;
2120 
2121 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2122 		pt_id = fwd_ports_ids[i];
2123 		ret = rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2124 		if (ret != 0)
2125 			fprintf(stderr,
2126 				"%s: Error: failed to clear stats (port %u):%d",
2127 				__func__, pt_id, ret);
2128 	}
2129 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2130 		struct fwd_stream *fs = fwd_streams[sm_id];
2131 
2132 		fs->rx_packets = 0;
2133 		fs->tx_packets = 0;
2134 		fs->fwd_dropped = 0;
2135 		fs->rx_bad_ip_csum = 0;
2136 		fs->rx_bad_l4_csum = 0;
2137 		fs->rx_bad_outer_l4_csum = 0;
2138 		fs->rx_bad_outer_ip_csum = 0;
2139 
2140 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2141 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2142 		fs->core_cycles = 0;
2143 	}
2144 }
2145 
2146 static void
2147 flush_fwd_rx_queues(void)
2148 {
2149 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2150 	portid_t  rxp;
2151 	portid_t port_id;
2152 	queueid_t rxq;
2153 	uint16_t  nb_rx;
2154 	uint16_t  i;
2155 	uint8_t   j;
2156 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2157 	uint64_t timer_period;
2158 
2159 	if (num_procs > 1) {
2160 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2161 		return;
2162 	}
2163 
2164 	/* convert to number of cycles */
2165 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2166 
2167 	for (j = 0; j < 2; j++) {
2168 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2169 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2170 				port_id = fwd_ports_ids[rxp];
2171 				/**
2172 				* testpmd can stuck in the below do while loop
2173 				* if rte_eth_rx_burst() always returns nonzero
2174 				* packets. So timer is added to exit this loop
2175 				* after 1sec timer expiry.
2176 				*/
2177 				prev_tsc = rte_rdtsc();
2178 				do {
2179 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2180 						pkts_burst, MAX_PKT_BURST);
2181 					for (i = 0; i < nb_rx; i++)
2182 						rte_pktmbuf_free(pkts_burst[i]);
2183 
2184 					cur_tsc = rte_rdtsc();
2185 					diff_tsc = cur_tsc - prev_tsc;
2186 					timer_tsc += diff_tsc;
2187 				} while ((nb_rx > 0) &&
2188 					(timer_tsc < timer_period));
2189 				timer_tsc = 0;
2190 			}
2191 		}
2192 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2193 	}
2194 }
2195 
2196 static void
2197 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2198 {
2199 	struct fwd_stream **fsm;
2200 	streamid_t nb_fs;
2201 	streamid_t sm_id;
2202 #ifdef RTE_LIB_BITRATESTATS
2203 	uint64_t tics_per_1sec;
2204 	uint64_t tics_datum;
2205 	uint64_t tics_current;
2206 	uint16_t i, cnt_ports;
2207 
2208 	cnt_ports = nb_ports;
2209 	tics_datum = rte_rdtsc();
2210 	tics_per_1sec = rte_get_timer_hz();
2211 #endif
2212 	fsm = &fwd_streams[fc->stream_idx];
2213 	nb_fs = fc->stream_nb;
2214 	do {
2215 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2216 			(*pkt_fwd)(fsm[sm_id]);
2217 #ifdef RTE_LIB_BITRATESTATS
2218 		if (bitrate_enabled != 0 &&
2219 				bitrate_lcore_id == rte_lcore_id()) {
2220 			tics_current = rte_rdtsc();
2221 			if (tics_current - tics_datum >= tics_per_1sec) {
2222 				/* Periodic bitrate calculation */
2223 				for (i = 0; i < cnt_ports; i++)
2224 					rte_stats_bitrate_calc(bitrate_data,
2225 						ports_ids[i]);
2226 				tics_datum = tics_current;
2227 			}
2228 		}
2229 #endif
2230 #ifdef RTE_LIB_LATENCYSTATS
2231 		if (latencystats_enabled != 0 &&
2232 				latencystats_lcore_id == rte_lcore_id())
2233 			rte_latencystats_update();
2234 #endif
2235 
2236 	} while (! fc->stopped);
2237 }
2238 
2239 static int
2240 start_pkt_forward_on_core(void *fwd_arg)
2241 {
2242 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2243 			     cur_fwd_config.fwd_eng->packet_fwd);
2244 	return 0;
2245 }
2246 
2247 /*
2248  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2249  * Used to start communication flows in network loopback test configurations.
2250  */
2251 static int
2252 run_one_txonly_burst_on_core(void *fwd_arg)
2253 {
2254 	struct fwd_lcore *fwd_lc;
2255 	struct fwd_lcore tmp_lcore;
2256 
2257 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2258 	tmp_lcore = *fwd_lc;
2259 	tmp_lcore.stopped = 1;
2260 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2261 	return 0;
2262 }
2263 
2264 /*
2265  * Launch packet forwarding:
2266  *     - Setup per-port forwarding context.
2267  *     - launch logical cores with their forwarding configuration.
2268  */
2269 static void
2270 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2271 {
2272 	unsigned int i;
2273 	unsigned int lc_id;
2274 	int diag;
2275 
2276 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2277 		lc_id = fwd_lcores_cpuids[i];
2278 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2279 			fwd_lcores[i]->stopped = 0;
2280 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2281 						     fwd_lcores[i], lc_id);
2282 			if (diag != 0)
2283 				fprintf(stderr,
2284 					"launch lcore %u failed - diag=%d\n",
2285 					lc_id, diag);
2286 		}
2287 	}
2288 }
2289 
2290 /*
2291  * Launch packet forwarding configuration.
2292  */
2293 void
2294 start_packet_forwarding(int with_tx_first)
2295 {
2296 	port_fwd_begin_t port_fwd_begin;
2297 	port_fwd_end_t  port_fwd_end;
2298 	unsigned int i;
2299 
2300 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2301 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2302 
2303 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2304 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2305 
2306 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2307 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2308 		(!nb_rxq || !nb_txq))
2309 		rte_exit(EXIT_FAILURE,
2310 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2311 			cur_fwd_eng->fwd_mode_name);
2312 
2313 	if (all_ports_started() == 0) {
2314 		fprintf(stderr, "Not all ports were started\n");
2315 		return;
2316 	}
2317 	if (test_done == 0) {
2318 		fprintf(stderr, "Packet forwarding already started\n");
2319 		return;
2320 	}
2321 
2322 	fwd_config_setup();
2323 
2324 	pkt_fwd_config_display(&cur_fwd_config);
2325 	if (!pkt_fwd_shared_rxq_check())
2326 		return;
2327 
2328 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2329 	if (port_fwd_begin != NULL) {
2330 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2331 			if (port_fwd_begin(fwd_ports_ids[i])) {
2332 				fprintf(stderr,
2333 					"Packet forwarding is not ready\n");
2334 				return;
2335 			}
2336 		}
2337 	}
2338 
2339 	if (with_tx_first) {
2340 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2341 		if (port_fwd_begin != NULL) {
2342 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2343 				if (port_fwd_begin(fwd_ports_ids[i])) {
2344 					fprintf(stderr,
2345 						"Packet forwarding is not ready\n");
2346 					return;
2347 				}
2348 			}
2349 		}
2350 	}
2351 
2352 	test_done = 0;
2353 
2354 	if(!no_flush_rx)
2355 		flush_fwd_rx_queues();
2356 
2357 	rxtx_config_display();
2358 
2359 	fwd_stats_reset();
2360 	if (with_tx_first) {
2361 		while (with_tx_first--) {
2362 			launch_packet_forwarding(
2363 					run_one_txonly_burst_on_core);
2364 			rte_eal_mp_wait_lcore();
2365 		}
2366 		port_fwd_end = tx_only_engine.port_fwd_end;
2367 		if (port_fwd_end != NULL) {
2368 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2369 				(*port_fwd_end)(fwd_ports_ids[i]);
2370 		}
2371 	}
2372 	launch_packet_forwarding(start_pkt_forward_on_core);
2373 }
2374 
2375 void
2376 stop_packet_forwarding(void)
2377 {
2378 	port_fwd_end_t port_fwd_end;
2379 	lcoreid_t lc_id;
2380 	portid_t pt_id;
2381 	int i;
2382 
2383 	if (test_done) {
2384 		fprintf(stderr, "Packet forwarding not started\n");
2385 		return;
2386 	}
2387 	printf("Telling cores to stop...");
2388 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2389 		fwd_lcores[lc_id]->stopped = 1;
2390 	printf("\nWaiting for lcores to finish...\n");
2391 	rte_eal_mp_wait_lcore();
2392 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2393 	if (port_fwd_end != NULL) {
2394 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2395 			pt_id = fwd_ports_ids[i];
2396 			(*port_fwd_end)(pt_id);
2397 		}
2398 	}
2399 
2400 	fwd_stats_display();
2401 
2402 	printf("\nDone.\n");
2403 	test_done = 1;
2404 }
2405 
2406 void
2407 dev_set_link_up(portid_t pid)
2408 {
2409 	if (rte_eth_dev_set_link_up(pid) < 0)
2410 		fprintf(stderr, "\nSet link up fail.\n");
2411 }
2412 
2413 void
2414 dev_set_link_down(portid_t pid)
2415 {
2416 	if (rte_eth_dev_set_link_down(pid) < 0)
2417 		fprintf(stderr, "\nSet link down fail.\n");
2418 }
2419 
2420 static int
2421 all_ports_started(void)
2422 {
2423 	portid_t pi;
2424 	struct rte_port *port;
2425 
2426 	RTE_ETH_FOREACH_DEV(pi) {
2427 		port = &ports[pi];
2428 		/* Check if there is a port which is not started */
2429 		if ((port->port_status != RTE_PORT_STARTED) &&
2430 			(port->slave_flag == 0))
2431 			return 0;
2432 	}
2433 
2434 	/* No port is not started */
2435 	return 1;
2436 }
2437 
2438 int
2439 port_is_stopped(portid_t port_id)
2440 {
2441 	struct rte_port *port = &ports[port_id];
2442 
2443 	if ((port->port_status != RTE_PORT_STOPPED) &&
2444 	    (port->slave_flag == 0))
2445 		return 0;
2446 	return 1;
2447 }
2448 
2449 int
2450 all_ports_stopped(void)
2451 {
2452 	portid_t pi;
2453 
2454 	RTE_ETH_FOREACH_DEV(pi) {
2455 		if (!port_is_stopped(pi))
2456 			return 0;
2457 	}
2458 
2459 	return 1;
2460 }
2461 
2462 int
2463 port_is_started(portid_t port_id)
2464 {
2465 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2466 		return 0;
2467 
2468 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2469 		return 0;
2470 
2471 	return 1;
2472 }
2473 
2474 /* Configure the Rx and Tx hairpin queues for the selected port. */
2475 static int
2476 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2477 {
2478 	queueid_t qi;
2479 	struct rte_eth_hairpin_conf hairpin_conf = {
2480 		.peer_count = 1,
2481 	};
2482 	int i;
2483 	int diag;
2484 	struct rte_port *port = &ports[pi];
2485 	uint16_t peer_rx_port = pi;
2486 	uint16_t peer_tx_port = pi;
2487 	uint32_t manual = 1;
2488 	uint32_t tx_exp = hairpin_mode & 0x10;
2489 
2490 	if (!(hairpin_mode & 0xf)) {
2491 		peer_rx_port = pi;
2492 		peer_tx_port = pi;
2493 		manual = 0;
2494 	} else if (hairpin_mode & 0x1) {
2495 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2496 						       RTE_ETH_DEV_NO_OWNER);
2497 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2498 			peer_tx_port = rte_eth_find_next_owned_by(0,
2499 						RTE_ETH_DEV_NO_OWNER);
2500 		if (p_pi != RTE_MAX_ETHPORTS) {
2501 			peer_rx_port = p_pi;
2502 		} else {
2503 			uint16_t next_pi;
2504 
2505 			/* Last port will be the peer RX port of the first. */
2506 			RTE_ETH_FOREACH_DEV(next_pi)
2507 				peer_rx_port = next_pi;
2508 		}
2509 		manual = 1;
2510 	} else if (hairpin_mode & 0x2) {
2511 		if (cnt_pi & 0x1) {
2512 			peer_rx_port = p_pi;
2513 		} else {
2514 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2515 						RTE_ETH_DEV_NO_OWNER);
2516 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2517 				peer_rx_port = pi;
2518 		}
2519 		peer_tx_port = peer_rx_port;
2520 		manual = 1;
2521 	}
2522 
2523 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2524 		hairpin_conf.peers[0].port = peer_rx_port;
2525 		hairpin_conf.peers[0].queue = i + nb_rxq;
2526 		hairpin_conf.manual_bind = !!manual;
2527 		hairpin_conf.tx_explicit = !!tx_exp;
2528 		diag = rte_eth_tx_hairpin_queue_setup
2529 			(pi, qi, nb_txd, &hairpin_conf);
2530 		i++;
2531 		if (diag == 0)
2532 			continue;
2533 
2534 		/* Fail to setup rx queue, return */
2535 		if (port->port_status == RTE_PORT_HANDLING)
2536 			port->port_status = RTE_PORT_STOPPED;
2537 		else
2538 			fprintf(stderr,
2539 				"Port %d can not be set back to stopped\n", pi);
2540 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2541 			pi);
2542 		/* try to reconfigure queues next time */
2543 		port->need_reconfig_queues = 1;
2544 		return -1;
2545 	}
2546 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2547 		hairpin_conf.peers[0].port = peer_tx_port;
2548 		hairpin_conf.peers[0].queue = i + nb_txq;
2549 		hairpin_conf.manual_bind = !!manual;
2550 		hairpin_conf.tx_explicit = !!tx_exp;
2551 		diag = rte_eth_rx_hairpin_queue_setup
2552 			(pi, qi, nb_rxd, &hairpin_conf);
2553 		i++;
2554 		if (diag == 0)
2555 			continue;
2556 
2557 		/* Fail to setup rx queue, return */
2558 		if (port->port_status == RTE_PORT_HANDLING)
2559 			port->port_status = RTE_PORT_STOPPED;
2560 		else
2561 			fprintf(stderr,
2562 				"Port %d can not be set back to stopped\n", pi);
2563 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2564 			pi);
2565 		/* try to reconfigure queues next time */
2566 		port->need_reconfig_queues = 1;
2567 		return -1;
2568 	}
2569 	return 0;
2570 }
2571 
2572 /* Configure the Rx with optional split. */
2573 int
2574 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2575 	       uint16_t nb_rx_desc, unsigned int socket_id,
2576 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2577 {
2578 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2579 	unsigned int i, mp_n;
2580 	int ret;
2581 
2582 	if (rx_pkt_nb_segs <= 1 ||
2583 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2584 		rx_conf->rx_seg = NULL;
2585 		rx_conf->rx_nseg = 0;
2586 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2587 					     nb_rx_desc, socket_id,
2588 					     rx_conf, mp);
2589 		return ret;
2590 	}
2591 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2592 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2593 		struct rte_mempool *mpx;
2594 		/*
2595 		 * Use last valid pool for the segments with number
2596 		 * exceeding the pool index.
2597 		 */
2598 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2599 		mpx = mbuf_pool_find(socket_id, mp_n);
2600 		/* Handle zero as mbuf data buffer size. */
2601 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2602 				   rx_pkt_seg_lengths[i] :
2603 				   mbuf_data_size[mp_n];
2604 		rx_seg->offset = i < rx_pkt_nb_offs ?
2605 				   rx_pkt_seg_offsets[i] : 0;
2606 		rx_seg->mp = mpx ? mpx : mp;
2607 	}
2608 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2609 	rx_conf->rx_seg = rx_useg;
2610 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2611 				    socket_id, rx_conf, NULL);
2612 	rx_conf->rx_seg = NULL;
2613 	rx_conf->rx_nseg = 0;
2614 	return ret;
2615 }
2616 
2617 static int
2618 alloc_xstats_display_info(portid_t pi)
2619 {
2620 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2621 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2622 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2623 
2624 	if (xstats_display_num == 0)
2625 		return 0;
2626 
2627 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2628 	if (*ids_supp == NULL)
2629 		goto fail_ids_supp;
2630 
2631 	*prev_values = calloc(xstats_display_num,
2632 			      sizeof(**prev_values));
2633 	if (*prev_values == NULL)
2634 		goto fail_prev_values;
2635 
2636 	*curr_values = calloc(xstats_display_num,
2637 			      sizeof(**curr_values));
2638 	if (*curr_values == NULL)
2639 		goto fail_curr_values;
2640 
2641 	ports[pi].xstats_info.allocated = true;
2642 
2643 	return 0;
2644 
2645 fail_curr_values:
2646 	free(*prev_values);
2647 fail_prev_values:
2648 	free(*ids_supp);
2649 fail_ids_supp:
2650 	return -ENOMEM;
2651 }
2652 
2653 static void
2654 free_xstats_display_info(portid_t pi)
2655 {
2656 	if (!ports[pi].xstats_info.allocated)
2657 		return;
2658 	free(ports[pi].xstats_info.ids_supp);
2659 	free(ports[pi].xstats_info.prev_values);
2660 	free(ports[pi].xstats_info.curr_values);
2661 	ports[pi].xstats_info.allocated = false;
2662 }
2663 
2664 /** Fill helper structures for specified port to show extended statistics. */
2665 static void
2666 fill_xstats_display_info_for_port(portid_t pi)
2667 {
2668 	unsigned int stat, stat_supp;
2669 	const char *xstat_name;
2670 	struct rte_port *port;
2671 	uint64_t *ids_supp;
2672 	int rc;
2673 
2674 	if (xstats_display_num == 0)
2675 		return;
2676 
2677 	if (pi == (portid_t)RTE_PORT_ALL) {
2678 		fill_xstats_display_info();
2679 		return;
2680 	}
2681 
2682 	port = &ports[pi];
2683 	if (port->port_status != RTE_PORT_STARTED)
2684 		return;
2685 
2686 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2687 		rte_exit(EXIT_FAILURE,
2688 			 "Failed to allocate xstats display memory\n");
2689 
2690 	ids_supp = port->xstats_info.ids_supp;
2691 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2692 		xstat_name = xstats_display[stat].name;
2693 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2694 						   ids_supp + stat_supp);
2695 		if (rc != 0) {
2696 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2697 				xstat_name, pi, stat);
2698 			continue;
2699 		}
2700 		stat_supp++;
2701 	}
2702 
2703 	port->xstats_info.ids_supp_sz = stat_supp;
2704 }
2705 
2706 /** Fill helper structures for all ports to show extended statistics. */
2707 static void
2708 fill_xstats_display_info(void)
2709 {
2710 	portid_t pi;
2711 
2712 	if (xstats_display_num == 0)
2713 		return;
2714 
2715 	RTE_ETH_FOREACH_DEV(pi)
2716 		fill_xstats_display_info_for_port(pi);
2717 }
2718 
2719 int
2720 start_port(portid_t pid)
2721 {
2722 	int diag, need_check_link_status = -1;
2723 	portid_t pi;
2724 	portid_t p_pi = RTE_MAX_ETHPORTS;
2725 	portid_t pl[RTE_MAX_ETHPORTS];
2726 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2727 	uint16_t cnt_pi = 0;
2728 	uint16_t cfg_pi = 0;
2729 	int peer_pi;
2730 	queueid_t qi;
2731 	struct rte_port *port;
2732 	struct rte_eth_hairpin_cap cap;
2733 
2734 	if (port_id_is_invalid(pid, ENABLED_WARN))
2735 		return 0;
2736 
2737 	RTE_ETH_FOREACH_DEV(pi) {
2738 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2739 			continue;
2740 
2741 		if (port_is_bonding_slave(pi)) {
2742 			fprintf(stderr,
2743 				"Please remove port %d from bonded device.\n",
2744 				pi);
2745 			continue;
2746 		}
2747 
2748 		need_check_link_status = 0;
2749 		port = &ports[pi];
2750 		if (port->port_status == RTE_PORT_STOPPED)
2751 			port->port_status = RTE_PORT_HANDLING;
2752 		else {
2753 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2754 			continue;
2755 		}
2756 
2757 		if (port->need_reconfig > 0) {
2758 			struct rte_eth_conf dev_conf;
2759 			int k;
2760 
2761 			port->need_reconfig = 0;
2762 
2763 			if (flow_isolate_all) {
2764 				int ret = port_flow_isolate(pi, 1);
2765 				if (ret) {
2766 					fprintf(stderr,
2767 						"Failed to apply isolated mode on port %d\n",
2768 						pi);
2769 					return -1;
2770 				}
2771 			}
2772 			configure_rxtx_dump_callbacks(0);
2773 			printf("Configuring Port %d (socket %u)\n", pi,
2774 					port->socket_id);
2775 			if (nb_hairpinq > 0 &&
2776 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2777 				fprintf(stderr,
2778 					"Port %d doesn't support hairpin queues\n",
2779 					pi);
2780 				return -1;
2781 			}
2782 
2783 			/* configure port */
2784 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2785 						     nb_txq + nb_hairpinq,
2786 						     &(port->dev_conf));
2787 			if (diag != 0) {
2788 				if (port->port_status == RTE_PORT_HANDLING)
2789 					port->port_status = RTE_PORT_STOPPED;
2790 				else
2791 					fprintf(stderr,
2792 						"Port %d can not be set back to stopped\n",
2793 						pi);
2794 				fprintf(stderr, "Fail to configure port %d\n",
2795 					pi);
2796 				/* try to reconfigure port next time */
2797 				port->need_reconfig = 1;
2798 				return -1;
2799 			}
2800 			/* get device configuration*/
2801 			if (0 !=
2802 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
2803 				fprintf(stderr,
2804 					"port %d can not get device configuration\n",
2805 					pi);
2806 				return -1;
2807 			}
2808 			/* Apply Rx offloads configuration */
2809 			if (dev_conf.rxmode.offloads !=
2810 			    port->dev_conf.rxmode.offloads) {
2811 				port->dev_conf.rxmode.offloads |=
2812 					dev_conf.rxmode.offloads;
2813 				for (k = 0;
2814 				     k < port->dev_info.max_rx_queues;
2815 				     k++)
2816 					port->rx_conf[k].offloads |=
2817 						dev_conf.rxmode.offloads;
2818 			}
2819 			/* Apply Tx offloads configuration */
2820 			if (dev_conf.txmode.offloads !=
2821 			    port->dev_conf.txmode.offloads) {
2822 				port->dev_conf.txmode.offloads |=
2823 					dev_conf.txmode.offloads;
2824 				for (k = 0;
2825 				     k < port->dev_info.max_tx_queues;
2826 				     k++)
2827 					port->tx_conf[k].offloads |=
2828 						dev_conf.txmode.offloads;
2829 			}
2830 		}
2831 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2832 			port->need_reconfig_queues = 0;
2833 			/* setup tx queues */
2834 			for (qi = 0; qi < nb_txq; qi++) {
2835 				if ((numa_support) &&
2836 					(txring_numa[pi] != NUMA_NO_CONFIG))
2837 					diag = rte_eth_tx_queue_setup(pi, qi,
2838 						port->nb_tx_desc[qi],
2839 						txring_numa[pi],
2840 						&(port->tx_conf[qi]));
2841 				else
2842 					diag = rte_eth_tx_queue_setup(pi, qi,
2843 						port->nb_tx_desc[qi],
2844 						port->socket_id,
2845 						&(port->tx_conf[qi]));
2846 
2847 				if (diag == 0)
2848 					continue;
2849 
2850 				/* Fail to setup tx queue, return */
2851 				if (port->port_status == RTE_PORT_HANDLING)
2852 					port->port_status = RTE_PORT_STOPPED;
2853 				else
2854 					fprintf(stderr,
2855 						"Port %d can not be set back to stopped\n",
2856 						pi);
2857 				fprintf(stderr,
2858 					"Fail to configure port %d tx queues\n",
2859 					pi);
2860 				/* try to reconfigure queues next time */
2861 				port->need_reconfig_queues = 1;
2862 				return -1;
2863 			}
2864 			for (qi = 0; qi < nb_rxq; qi++) {
2865 				/* setup rx queues */
2866 				if ((numa_support) &&
2867 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2868 					struct rte_mempool * mp =
2869 						mbuf_pool_find
2870 							(rxring_numa[pi], 0);
2871 					if (mp == NULL) {
2872 						fprintf(stderr,
2873 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2874 							rxring_numa[pi]);
2875 						return -1;
2876 					}
2877 
2878 					diag = rx_queue_setup(pi, qi,
2879 					     port->nb_rx_desc[qi],
2880 					     rxring_numa[pi],
2881 					     &(port->rx_conf[qi]),
2882 					     mp);
2883 				} else {
2884 					struct rte_mempool *mp =
2885 						mbuf_pool_find
2886 							(port->socket_id, 0);
2887 					if (mp == NULL) {
2888 						fprintf(stderr,
2889 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2890 							port->socket_id);
2891 						return -1;
2892 					}
2893 					diag = rx_queue_setup(pi, qi,
2894 					     port->nb_rx_desc[qi],
2895 					     port->socket_id,
2896 					     &(port->rx_conf[qi]),
2897 					     mp);
2898 				}
2899 				if (diag == 0)
2900 					continue;
2901 
2902 				/* Fail to setup rx queue, return */
2903 				if (port->port_status == RTE_PORT_HANDLING)
2904 					port->port_status = RTE_PORT_STOPPED;
2905 				else
2906 					fprintf(stderr,
2907 						"Port %d can not be set back to stopped\n",
2908 						pi);
2909 				fprintf(stderr,
2910 					"Fail to configure port %d rx queues\n",
2911 					pi);
2912 				/* try to reconfigure queues next time */
2913 				port->need_reconfig_queues = 1;
2914 				return -1;
2915 			}
2916 			/* setup hairpin queues */
2917 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2918 				return -1;
2919 		}
2920 		configure_rxtx_dump_callbacks(verbose_level);
2921 		if (clear_ptypes) {
2922 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2923 					NULL, 0);
2924 			if (diag < 0)
2925 				fprintf(stderr,
2926 					"Port %d: Failed to disable Ptype parsing\n",
2927 					pi);
2928 		}
2929 
2930 		p_pi = pi;
2931 		cnt_pi++;
2932 
2933 		/* start port */
2934 		diag = eth_dev_start_mp(pi);
2935 		if (diag < 0) {
2936 			fprintf(stderr, "Fail to start port %d: %s\n",
2937 				pi, rte_strerror(-diag));
2938 
2939 			/* Fail to setup rx queue, return */
2940 			if (port->port_status == RTE_PORT_HANDLING)
2941 				port->port_status = RTE_PORT_STOPPED;
2942 			else
2943 				fprintf(stderr,
2944 					"Port %d can not be set back to stopped\n",
2945 					pi);
2946 			continue;
2947 		}
2948 
2949 		if (port->port_status == RTE_PORT_HANDLING)
2950 			port->port_status = RTE_PORT_STARTED;
2951 		else
2952 			fprintf(stderr, "Port %d can not be set into started\n",
2953 				pi);
2954 
2955 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2956 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2957 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2958 
2959 		/* at least one port started, need checking link status */
2960 		need_check_link_status = 1;
2961 
2962 		pl[cfg_pi++] = pi;
2963 	}
2964 
2965 	if (need_check_link_status == 1 && !no_link_check)
2966 		check_all_ports_link_status(RTE_PORT_ALL);
2967 	else if (need_check_link_status == 0)
2968 		fprintf(stderr, "Please stop the ports first\n");
2969 
2970 	if (hairpin_mode & 0xf) {
2971 		uint16_t i;
2972 		int j;
2973 
2974 		/* bind all started hairpin ports */
2975 		for (i = 0; i < cfg_pi; i++) {
2976 			pi = pl[i];
2977 			/* bind current Tx to all peer Rx */
2978 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2979 							RTE_MAX_ETHPORTS, 1);
2980 			if (peer_pi < 0)
2981 				return peer_pi;
2982 			for (j = 0; j < peer_pi; j++) {
2983 				if (!port_is_started(peer_pl[j]))
2984 					continue;
2985 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2986 				if (diag < 0) {
2987 					fprintf(stderr,
2988 						"Error during binding hairpin Tx port %u to %u: %s\n",
2989 						pi, peer_pl[j],
2990 						rte_strerror(-diag));
2991 					return -1;
2992 				}
2993 			}
2994 			/* bind all peer Tx to current Rx */
2995 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2996 							RTE_MAX_ETHPORTS, 0);
2997 			if (peer_pi < 0)
2998 				return peer_pi;
2999 			for (j = 0; j < peer_pi; j++) {
3000 				if (!port_is_started(peer_pl[j]))
3001 					continue;
3002 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
3003 				if (diag < 0) {
3004 					fprintf(stderr,
3005 						"Error during binding hairpin Tx port %u to %u: %s\n",
3006 						peer_pl[j], pi,
3007 						rte_strerror(-diag));
3008 					return -1;
3009 				}
3010 			}
3011 		}
3012 	}
3013 
3014 	fill_xstats_display_info_for_port(pid);
3015 
3016 	printf("Done\n");
3017 	return 0;
3018 }
3019 
3020 void
3021 stop_port(portid_t pid)
3022 {
3023 	portid_t pi;
3024 	struct rte_port *port;
3025 	int need_check_link_status = 0;
3026 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3027 	int peer_pi;
3028 
3029 	if (port_id_is_invalid(pid, ENABLED_WARN))
3030 		return;
3031 
3032 	printf("Stopping ports...\n");
3033 
3034 	RTE_ETH_FOREACH_DEV(pi) {
3035 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3036 			continue;
3037 
3038 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3039 			fprintf(stderr,
3040 				"Please remove port %d from forwarding configuration.\n",
3041 				pi);
3042 			continue;
3043 		}
3044 
3045 		if (port_is_bonding_slave(pi)) {
3046 			fprintf(stderr,
3047 				"Please remove port %d from bonded device.\n",
3048 				pi);
3049 			continue;
3050 		}
3051 
3052 		port = &ports[pi];
3053 		if (port->port_status == RTE_PORT_STARTED)
3054 			port->port_status = RTE_PORT_HANDLING;
3055 		else
3056 			continue;
3057 
3058 		if (hairpin_mode & 0xf) {
3059 			int j;
3060 
3061 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3062 			/* unbind all peer Tx from current Rx */
3063 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3064 							RTE_MAX_ETHPORTS, 0);
3065 			if (peer_pi < 0)
3066 				continue;
3067 			for (j = 0; j < peer_pi; j++) {
3068 				if (!port_is_started(peer_pl[j]))
3069 					continue;
3070 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3071 			}
3072 		}
3073 
3074 		if (port->flow_list)
3075 			port_flow_flush(pi);
3076 
3077 		if (eth_dev_stop_mp(pi) != 0)
3078 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3079 				pi);
3080 
3081 		if (port->port_status == RTE_PORT_HANDLING)
3082 			port->port_status = RTE_PORT_STOPPED;
3083 		else
3084 			fprintf(stderr, "Port %d can not be set into stopped\n",
3085 				pi);
3086 		need_check_link_status = 1;
3087 	}
3088 	if (need_check_link_status && !no_link_check)
3089 		check_all_ports_link_status(RTE_PORT_ALL);
3090 
3091 	printf("Done\n");
3092 }
3093 
3094 static void
3095 remove_invalid_ports_in(portid_t *array, portid_t *total)
3096 {
3097 	portid_t i;
3098 	portid_t new_total = 0;
3099 
3100 	for (i = 0; i < *total; i++)
3101 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3102 			array[new_total] = array[i];
3103 			new_total++;
3104 		}
3105 	*total = new_total;
3106 }
3107 
3108 static void
3109 remove_invalid_ports(void)
3110 {
3111 	remove_invalid_ports_in(ports_ids, &nb_ports);
3112 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3113 	nb_cfg_ports = nb_fwd_ports;
3114 }
3115 
3116 void
3117 close_port(portid_t pid)
3118 {
3119 	portid_t pi;
3120 	struct rte_port *port;
3121 
3122 	if (port_id_is_invalid(pid, ENABLED_WARN))
3123 		return;
3124 
3125 	printf("Closing ports...\n");
3126 
3127 	RTE_ETH_FOREACH_DEV(pi) {
3128 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3129 			continue;
3130 
3131 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3132 			fprintf(stderr,
3133 				"Please remove port %d from forwarding configuration.\n",
3134 				pi);
3135 			continue;
3136 		}
3137 
3138 		if (port_is_bonding_slave(pi)) {
3139 			fprintf(stderr,
3140 				"Please remove port %d from bonded device.\n",
3141 				pi);
3142 			continue;
3143 		}
3144 
3145 		port = &ports[pi];
3146 		if (port->port_status == RTE_PORT_CLOSED) {
3147 			fprintf(stderr, "Port %d is already closed\n", pi);
3148 			continue;
3149 		}
3150 
3151 		if (is_proc_primary()) {
3152 			port_flow_flush(pi);
3153 			port_flex_item_flush(pi);
3154 			rte_eth_dev_close(pi);
3155 		}
3156 
3157 		free_xstats_display_info(pi);
3158 	}
3159 
3160 	remove_invalid_ports();
3161 	printf("Done\n");
3162 }
3163 
3164 void
3165 reset_port(portid_t pid)
3166 {
3167 	int diag;
3168 	portid_t pi;
3169 	struct rte_port *port;
3170 
3171 	if (port_id_is_invalid(pid, ENABLED_WARN))
3172 		return;
3173 
3174 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3175 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3176 		fprintf(stderr,
3177 			"Can not reset port(s), please stop port(s) first.\n");
3178 		return;
3179 	}
3180 
3181 	printf("Resetting ports...\n");
3182 
3183 	RTE_ETH_FOREACH_DEV(pi) {
3184 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3185 			continue;
3186 
3187 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3188 			fprintf(stderr,
3189 				"Please remove port %d from forwarding configuration.\n",
3190 				pi);
3191 			continue;
3192 		}
3193 
3194 		if (port_is_bonding_slave(pi)) {
3195 			fprintf(stderr,
3196 				"Please remove port %d from bonded device.\n",
3197 				pi);
3198 			continue;
3199 		}
3200 
3201 		diag = rte_eth_dev_reset(pi);
3202 		if (diag == 0) {
3203 			port = &ports[pi];
3204 			port->need_reconfig = 1;
3205 			port->need_reconfig_queues = 1;
3206 		} else {
3207 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3208 				pi, diag);
3209 		}
3210 	}
3211 
3212 	printf("Done\n");
3213 }
3214 
3215 void
3216 attach_port(char *identifier)
3217 {
3218 	portid_t pi;
3219 	struct rte_dev_iterator iterator;
3220 
3221 	printf("Attaching a new port...\n");
3222 
3223 	if (identifier == NULL) {
3224 		fprintf(stderr, "Invalid parameters are specified\n");
3225 		return;
3226 	}
3227 
3228 	if (rte_dev_probe(identifier) < 0) {
3229 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3230 		return;
3231 	}
3232 
3233 	/* first attach mode: event */
3234 	if (setup_on_probe_event) {
3235 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3236 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3237 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3238 					ports[pi].need_setup != 0)
3239 				setup_attached_port(pi);
3240 		return;
3241 	}
3242 
3243 	/* second attach mode: iterator */
3244 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3245 		/* setup ports matching the devargs used for probing */
3246 		if (port_is_forwarding(pi))
3247 			continue; /* port was already attached before */
3248 		setup_attached_port(pi);
3249 	}
3250 }
3251 
3252 static void
3253 setup_attached_port(portid_t pi)
3254 {
3255 	unsigned int socket_id;
3256 	int ret;
3257 
3258 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3259 	/* if socket_id is invalid, set to the first available socket. */
3260 	if (check_socket_id(socket_id) < 0)
3261 		socket_id = socket_ids[0];
3262 	reconfig(pi, socket_id);
3263 	ret = rte_eth_promiscuous_enable(pi);
3264 	if (ret != 0)
3265 		fprintf(stderr,
3266 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3267 			pi, rte_strerror(-ret));
3268 
3269 	ports_ids[nb_ports++] = pi;
3270 	fwd_ports_ids[nb_fwd_ports++] = pi;
3271 	nb_cfg_ports = nb_fwd_ports;
3272 	ports[pi].need_setup = 0;
3273 	ports[pi].port_status = RTE_PORT_STOPPED;
3274 
3275 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3276 	printf("Done\n");
3277 }
3278 
3279 static void
3280 detach_device(struct rte_device *dev)
3281 {
3282 	portid_t sibling;
3283 
3284 	if (dev == NULL) {
3285 		fprintf(stderr, "Device already removed\n");
3286 		return;
3287 	}
3288 
3289 	printf("Removing a device...\n");
3290 
3291 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3292 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3293 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3294 				fprintf(stderr, "Port %u not stopped\n",
3295 					sibling);
3296 				return;
3297 			}
3298 			port_flow_flush(sibling);
3299 		}
3300 	}
3301 
3302 	if (rte_dev_remove(dev) < 0) {
3303 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3304 		return;
3305 	}
3306 	remove_invalid_ports();
3307 
3308 	printf("Device is detached\n");
3309 	printf("Now total ports is %d\n", nb_ports);
3310 	printf("Done\n");
3311 	return;
3312 }
3313 
3314 void
3315 detach_port_device(portid_t port_id)
3316 {
3317 	int ret;
3318 	struct rte_eth_dev_info dev_info;
3319 
3320 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3321 		return;
3322 
3323 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3324 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3325 			fprintf(stderr, "Port not stopped\n");
3326 			return;
3327 		}
3328 		fprintf(stderr, "Port was not closed\n");
3329 	}
3330 
3331 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3332 	if (ret != 0) {
3333 		TESTPMD_LOG(ERR,
3334 			"Failed to get device info for port %d, not detaching\n",
3335 			port_id);
3336 		return;
3337 	}
3338 	detach_device(dev_info.device);
3339 }
3340 
3341 void
3342 detach_devargs(char *identifier)
3343 {
3344 	struct rte_dev_iterator iterator;
3345 	struct rte_devargs da;
3346 	portid_t port_id;
3347 
3348 	printf("Removing a device...\n");
3349 
3350 	memset(&da, 0, sizeof(da));
3351 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3352 		fprintf(stderr, "cannot parse identifier\n");
3353 		return;
3354 	}
3355 
3356 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3357 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3358 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3359 				fprintf(stderr, "Port %u not stopped\n",
3360 					port_id);
3361 				rte_eth_iterator_cleanup(&iterator);
3362 				rte_devargs_reset(&da);
3363 				return;
3364 			}
3365 			port_flow_flush(port_id);
3366 		}
3367 	}
3368 
3369 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3370 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3371 			    da.name, da.bus->name);
3372 		rte_devargs_reset(&da);
3373 		return;
3374 	}
3375 
3376 	remove_invalid_ports();
3377 
3378 	printf("Device %s is detached\n", identifier);
3379 	printf("Now total ports is %d\n", nb_ports);
3380 	printf("Done\n");
3381 	rte_devargs_reset(&da);
3382 }
3383 
3384 void
3385 pmd_test_exit(void)
3386 {
3387 	portid_t pt_id;
3388 	unsigned int i;
3389 	int ret;
3390 
3391 	if (test_done == 0)
3392 		stop_packet_forwarding();
3393 
3394 #ifndef RTE_EXEC_ENV_WINDOWS
3395 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3396 		if (mempools[i]) {
3397 			if (mp_alloc_type == MP_ALLOC_ANON)
3398 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3399 						     NULL);
3400 		}
3401 	}
3402 #endif
3403 	if (ports != NULL) {
3404 		no_link_check = 1;
3405 		RTE_ETH_FOREACH_DEV(pt_id) {
3406 			printf("\nStopping port %d...\n", pt_id);
3407 			fflush(stdout);
3408 			stop_port(pt_id);
3409 		}
3410 		RTE_ETH_FOREACH_DEV(pt_id) {
3411 			printf("\nShutting down port %d...\n", pt_id);
3412 			fflush(stdout);
3413 			close_port(pt_id);
3414 		}
3415 	}
3416 
3417 	if (hot_plug) {
3418 		ret = rte_dev_event_monitor_stop();
3419 		if (ret) {
3420 			RTE_LOG(ERR, EAL,
3421 				"fail to stop device event monitor.");
3422 			return;
3423 		}
3424 
3425 		ret = rte_dev_event_callback_unregister(NULL,
3426 			dev_event_callback, NULL);
3427 		if (ret < 0) {
3428 			RTE_LOG(ERR, EAL,
3429 				"fail to unregister device event callback.\n");
3430 			return;
3431 		}
3432 
3433 		ret = rte_dev_hotplug_handle_disable();
3434 		if (ret) {
3435 			RTE_LOG(ERR, EAL,
3436 				"fail to disable hotplug handling.\n");
3437 			return;
3438 		}
3439 	}
3440 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3441 		if (mempools[i])
3442 			mempool_free_mp(mempools[i]);
3443 	}
3444 	free(xstats_display);
3445 
3446 	printf("\nBye...\n");
3447 }
3448 
3449 typedef void (*cmd_func_t)(void);
3450 struct pmd_test_command {
3451 	const char *cmd_name;
3452 	cmd_func_t cmd_func;
3453 };
3454 
3455 /* Check the link status of all ports in up to 9s, and print them finally */
3456 static void
3457 check_all_ports_link_status(uint32_t port_mask)
3458 {
3459 #define CHECK_INTERVAL 100 /* 100ms */
3460 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3461 	portid_t portid;
3462 	uint8_t count, all_ports_up, print_flag = 0;
3463 	struct rte_eth_link link;
3464 	int ret;
3465 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3466 
3467 	printf("Checking link statuses...\n");
3468 	fflush(stdout);
3469 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3470 		all_ports_up = 1;
3471 		RTE_ETH_FOREACH_DEV(portid) {
3472 			if ((port_mask & (1 << portid)) == 0)
3473 				continue;
3474 			memset(&link, 0, sizeof(link));
3475 			ret = rte_eth_link_get_nowait(portid, &link);
3476 			if (ret < 0) {
3477 				all_ports_up = 0;
3478 				if (print_flag == 1)
3479 					fprintf(stderr,
3480 						"Port %u link get failed: %s\n",
3481 						portid, rte_strerror(-ret));
3482 				continue;
3483 			}
3484 			/* print link status if flag set */
3485 			if (print_flag == 1) {
3486 				rte_eth_link_to_str(link_status,
3487 					sizeof(link_status), &link);
3488 				printf("Port %d %s\n", portid, link_status);
3489 				continue;
3490 			}
3491 			/* clear all_ports_up flag if any link down */
3492 			if (link.link_status == RTE_ETH_LINK_DOWN) {
3493 				all_ports_up = 0;
3494 				break;
3495 			}
3496 		}
3497 		/* after finally printing all link status, get out */
3498 		if (print_flag == 1)
3499 			break;
3500 
3501 		if (all_ports_up == 0) {
3502 			fflush(stdout);
3503 			rte_delay_ms(CHECK_INTERVAL);
3504 		}
3505 
3506 		/* set the print_flag if all ports up or timeout */
3507 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3508 			print_flag = 1;
3509 		}
3510 
3511 		if (lsc_interrupt)
3512 			break;
3513 	}
3514 }
3515 
3516 static void
3517 rmv_port_callback(void *arg)
3518 {
3519 	int need_to_start = 0;
3520 	int org_no_link_check = no_link_check;
3521 	portid_t port_id = (intptr_t)arg;
3522 	struct rte_eth_dev_info dev_info;
3523 	int ret;
3524 
3525 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3526 
3527 	if (!test_done && port_is_forwarding(port_id)) {
3528 		need_to_start = 1;
3529 		stop_packet_forwarding();
3530 	}
3531 	no_link_check = 1;
3532 	stop_port(port_id);
3533 	no_link_check = org_no_link_check;
3534 
3535 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3536 	if (ret != 0)
3537 		TESTPMD_LOG(ERR,
3538 			"Failed to get device info for port %d, not detaching\n",
3539 			port_id);
3540 	else {
3541 		struct rte_device *device = dev_info.device;
3542 		close_port(port_id);
3543 		detach_device(device); /* might be already removed or have more ports */
3544 	}
3545 	if (need_to_start)
3546 		start_packet_forwarding(0);
3547 }
3548 
3549 /* This function is used by the interrupt thread */
3550 static int
3551 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3552 		  void *ret_param)
3553 {
3554 	RTE_SET_USED(param);
3555 	RTE_SET_USED(ret_param);
3556 
3557 	if (type >= RTE_ETH_EVENT_MAX) {
3558 		fprintf(stderr,
3559 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3560 			port_id, __func__, type);
3561 		fflush(stderr);
3562 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3563 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3564 			eth_event_desc[type]);
3565 		fflush(stdout);
3566 	}
3567 
3568 	switch (type) {
3569 	case RTE_ETH_EVENT_NEW:
3570 		ports[port_id].need_setup = 1;
3571 		ports[port_id].port_status = RTE_PORT_HANDLING;
3572 		break;
3573 	case RTE_ETH_EVENT_INTR_RMV:
3574 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3575 			break;
3576 		if (rte_eal_alarm_set(100000,
3577 				rmv_port_callback, (void *)(intptr_t)port_id))
3578 			fprintf(stderr,
3579 				"Could not set up deferred device removal\n");
3580 		break;
3581 	case RTE_ETH_EVENT_DESTROY:
3582 		ports[port_id].port_status = RTE_PORT_CLOSED;
3583 		printf("Port %u is closed\n", port_id);
3584 		break;
3585 	default:
3586 		break;
3587 	}
3588 	return 0;
3589 }
3590 
3591 static int
3592 register_eth_event_callback(void)
3593 {
3594 	int ret;
3595 	enum rte_eth_event_type event;
3596 
3597 	for (event = RTE_ETH_EVENT_UNKNOWN;
3598 			event < RTE_ETH_EVENT_MAX; event++) {
3599 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3600 				event,
3601 				eth_event_callback,
3602 				NULL);
3603 		if (ret != 0) {
3604 			TESTPMD_LOG(ERR, "Failed to register callback for "
3605 					"%s event\n", eth_event_desc[event]);
3606 			return -1;
3607 		}
3608 	}
3609 
3610 	return 0;
3611 }
3612 
3613 /* This function is used by the interrupt thread */
3614 static void
3615 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3616 			     __rte_unused void *arg)
3617 {
3618 	uint16_t port_id;
3619 	int ret;
3620 
3621 	if (type >= RTE_DEV_EVENT_MAX) {
3622 		fprintf(stderr, "%s called upon invalid event %d\n",
3623 			__func__, type);
3624 		fflush(stderr);
3625 	}
3626 
3627 	switch (type) {
3628 	case RTE_DEV_EVENT_REMOVE:
3629 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3630 			device_name);
3631 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3632 		if (ret) {
3633 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3634 				device_name);
3635 			return;
3636 		}
3637 		/*
3638 		 * Because the user's callback is invoked in eal interrupt
3639 		 * callback, the interrupt callback need to be finished before
3640 		 * it can be unregistered when detaching device. So finish
3641 		 * callback soon and use a deferred removal to detach device
3642 		 * is need. It is a workaround, once the device detaching be
3643 		 * moved into the eal in the future, the deferred removal could
3644 		 * be deleted.
3645 		 */
3646 		if (rte_eal_alarm_set(100000,
3647 				rmv_port_callback, (void *)(intptr_t)port_id))
3648 			RTE_LOG(ERR, EAL,
3649 				"Could not set up deferred device removal\n");
3650 		break;
3651 	case RTE_DEV_EVENT_ADD:
3652 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3653 			device_name);
3654 		/* TODO: After finish kernel driver binding,
3655 		 * begin to attach port.
3656 		 */
3657 		break;
3658 	default:
3659 		break;
3660 	}
3661 }
3662 
3663 static void
3664 rxtx_port_config(portid_t pid)
3665 {
3666 	uint16_t qid;
3667 	uint64_t offloads;
3668 	struct rte_port *port = &ports[pid];
3669 
3670 	for (qid = 0; qid < nb_rxq; qid++) {
3671 		offloads = port->rx_conf[qid].offloads;
3672 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3673 
3674 		if (rxq_share > 0 &&
3675 		    (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3676 			/* Non-zero share group to enable RxQ share. */
3677 			port->rx_conf[qid].share_group = pid / rxq_share + 1;
3678 			port->rx_conf[qid].share_qid = qid; /* Equal mapping. */
3679 		}
3680 
3681 		if (offloads != 0)
3682 			port->rx_conf[qid].offloads = offloads;
3683 
3684 		/* Check if any Rx parameters have been passed */
3685 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3686 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3687 
3688 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3689 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3690 
3691 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3692 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3693 
3694 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3695 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3696 
3697 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3698 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3699 
3700 		port->nb_rx_desc[qid] = nb_rxd;
3701 	}
3702 
3703 	for (qid = 0; qid < nb_txq; qid++) {
3704 		offloads = port->tx_conf[qid].offloads;
3705 		port->tx_conf[qid] = port->dev_info.default_txconf;
3706 		if (offloads != 0)
3707 			port->tx_conf[qid].offloads = offloads;
3708 
3709 		/* Check if any Tx parameters have been passed */
3710 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3711 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3712 
3713 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3714 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3715 
3716 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3717 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3718 
3719 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3720 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3721 
3722 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3723 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3724 
3725 		port->nb_tx_desc[qid] = nb_txd;
3726 	}
3727 }
3728 
3729 /*
3730  * Helper function to set MTU from frame size
3731  *
3732  * port->dev_info should be set before calling this function.
3733  *
3734  * return 0 on success, negative on error
3735  */
3736 int
3737 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3738 {
3739 	struct rte_port *port = &ports[portid];
3740 	uint32_t eth_overhead;
3741 	uint16_t mtu, new_mtu;
3742 
3743 	eth_overhead = get_eth_overhead(&port->dev_info);
3744 
3745 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3746 		printf("Failed to get MTU for port %u\n", portid);
3747 		return -1;
3748 	}
3749 
3750 	new_mtu = max_rx_pktlen - eth_overhead;
3751 
3752 	if (mtu == new_mtu)
3753 		return 0;
3754 
3755 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3756 		fprintf(stderr,
3757 			"Failed to set MTU to %u for port %u\n",
3758 			new_mtu, portid);
3759 		return -1;
3760 	}
3761 
3762 	port->dev_conf.rxmode.mtu = new_mtu;
3763 
3764 	return 0;
3765 }
3766 
3767 void
3768 init_port_config(void)
3769 {
3770 	portid_t pid;
3771 	struct rte_port *port;
3772 	int ret, i;
3773 
3774 	RTE_ETH_FOREACH_DEV(pid) {
3775 		port = &ports[pid];
3776 		port->dev_conf.fdir_conf = fdir_conf;
3777 
3778 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3779 		if (ret != 0)
3780 			return;
3781 
3782 		if (nb_rxq > 1) {
3783 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3784 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3785 				rss_hf & port->dev_info.flow_type_rss_offloads;
3786 		} else {
3787 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3788 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3789 		}
3790 
3791 		if (port->dcb_flag == 0) {
3792 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3793 				port->dev_conf.rxmode.mq_mode =
3794 					(enum rte_eth_rx_mq_mode)
3795 						(rx_mq_mode & RTE_ETH_MQ_RX_RSS);
3796 			} else {
3797 				port->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
3798 				port->dev_conf.rxmode.offloads &=
3799 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3800 
3801 				for (i = 0;
3802 				     i < port->dev_info.nb_rx_queues;
3803 				     i++)
3804 					port->rx_conf[i].offloads &=
3805 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3806 			}
3807 		}
3808 
3809 		rxtx_port_config(pid);
3810 
3811 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3812 		if (ret != 0)
3813 			return;
3814 
3815 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3816 		rte_pmd_ixgbe_bypass_init(pid);
3817 #endif
3818 
3819 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3820 			port->dev_conf.intr_conf.lsc = 1;
3821 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3822 			port->dev_conf.intr_conf.rmv = 1;
3823 	}
3824 }
3825 
3826 void set_port_slave_flag(portid_t slave_pid)
3827 {
3828 	struct rte_port *port;
3829 
3830 	port = &ports[slave_pid];
3831 	port->slave_flag = 1;
3832 }
3833 
3834 void clear_port_slave_flag(portid_t slave_pid)
3835 {
3836 	struct rte_port *port;
3837 
3838 	port = &ports[slave_pid];
3839 	port->slave_flag = 0;
3840 }
3841 
3842 uint8_t port_is_bonding_slave(portid_t slave_pid)
3843 {
3844 	struct rte_port *port;
3845 	struct rte_eth_dev_info dev_info;
3846 	int ret;
3847 
3848 	port = &ports[slave_pid];
3849 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3850 	if (ret != 0) {
3851 		TESTPMD_LOG(ERR,
3852 			"Failed to get device info for port id %d,"
3853 			"cannot determine if the port is a bonded slave",
3854 			slave_pid);
3855 		return 0;
3856 	}
3857 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3858 		return 1;
3859 	return 0;
3860 }
3861 
3862 const uint16_t vlan_tags[] = {
3863 		0,  1,  2,  3,  4,  5,  6,  7,
3864 		8,  9, 10, 11,  12, 13, 14, 15,
3865 		16, 17, 18, 19, 20, 21, 22, 23,
3866 		24, 25, 26, 27, 28, 29, 30, 31
3867 };
3868 
3869 static  int
3870 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3871 		 enum dcb_mode_enable dcb_mode,
3872 		 enum rte_eth_nb_tcs num_tcs,
3873 		 uint8_t pfc_en)
3874 {
3875 	uint8_t i;
3876 	int32_t rc;
3877 	struct rte_eth_rss_conf rss_conf;
3878 
3879 	/*
3880 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3881 	 * given above, and the number of traffic classes available for use.
3882 	 */
3883 	if (dcb_mode == DCB_VT_ENABLED) {
3884 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3885 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3886 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3887 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3888 
3889 		/* VMDQ+DCB RX and TX configurations */
3890 		vmdq_rx_conf->enable_default_pool = 0;
3891 		vmdq_rx_conf->default_pool = 0;
3892 		vmdq_rx_conf->nb_queue_pools =
3893 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3894 		vmdq_tx_conf->nb_queue_pools =
3895 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3896 
3897 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3898 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3899 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3900 			vmdq_rx_conf->pool_map[i].pools =
3901 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3902 		}
3903 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3904 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3905 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3906 		}
3907 
3908 		/* set DCB mode of RX and TX of multiple queues */
3909 		eth_conf->rxmode.mq_mode =
3910 				(enum rte_eth_rx_mq_mode)
3911 					(rx_mq_mode & RTE_ETH_MQ_RX_VMDQ_DCB);
3912 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_VMDQ_DCB;
3913 	} else {
3914 		struct rte_eth_dcb_rx_conf *rx_conf =
3915 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3916 		struct rte_eth_dcb_tx_conf *tx_conf =
3917 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3918 
3919 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3920 
3921 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3922 		if (rc != 0)
3923 			return rc;
3924 
3925 		rx_conf->nb_tcs = num_tcs;
3926 		tx_conf->nb_tcs = num_tcs;
3927 
3928 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3929 			rx_conf->dcb_tc[i] = i % num_tcs;
3930 			tx_conf->dcb_tc[i] = i % num_tcs;
3931 		}
3932 
3933 		eth_conf->rxmode.mq_mode =
3934 				(enum rte_eth_rx_mq_mode)
3935 					(rx_mq_mode & RTE_ETH_MQ_RX_DCB_RSS);
3936 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3937 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_DCB;
3938 	}
3939 
3940 	if (pfc_en)
3941 		eth_conf->dcb_capability_en =
3942 				RTE_ETH_DCB_PG_SUPPORT | RTE_ETH_DCB_PFC_SUPPORT;
3943 	else
3944 		eth_conf->dcb_capability_en = RTE_ETH_DCB_PG_SUPPORT;
3945 
3946 	return 0;
3947 }
3948 
3949 int
3950 init_port_dcb_config(portid_t pid,
3951 		     enum dcb_mode_enable dcb_mode,
3952 		     enum rte_eth_nb_tcs num_tcs,
3953 		     uint8_t pfc_en)
3954 {
3955 	struct rte_eth_conf port_conf;
3956 	struct rte_port *rte_port;
3957 	int retval;
3958 	uint16_t i;
3959 
3960 	if (num_procs > 1) {
3961 		printf("The multi-process feature doesn't support dcb.\n");
3962 		return -ENOTSUP;
3963 	}
3964 	rte_port = &ports[pid];
3965 
3966 	/* retain the original device configuration. */
3967 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3968 
3969 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3970 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3971 	if (retval < 0)
3972 		return retval;
3973 	port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3974 	/* remove RSS HASH offload for DCB in vt mode */
3975 	if (port_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_VMDQ_DCB) {
3976 		port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3977 		for (i = 0; i < nb_rxq; i++)
3978 			rte_port->rx_conf[i].offloads &=
3979 				~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3980 	}
3981 
3982 	/* re-configure the device . */
3983 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3984 	if (retval < 0)
3985 		return retval;
3986 
3987 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3988 	if (retval != 0)
3989 		return retval;
3990 
3991 	/* If dev_info.vmdq_pool_base is greater than 0,
3992 	 * the queue id of vmdq pools is started after pf queues.
3993 	 */
3994 	if (dcb_mode == DCB_VT_ENABLED &&
3995 	    rte_port->dev_info.vmdq_pool_base > 0) {
3996 		fprintf(stderr,
3997 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3998 			pid);
3999 		return -1;
4000 	}
4001 
4002 	/* Assume the ports in testpmd have the same dcb capability
4003 	 * and has the same number of rxq and txq in dcb mode
4004 	 */
4005 	if (dcb_mode == DCB_VT_ENABLED) {
4006 		if (rte_port->dev_info.max_vfs > 0) {
4007 			nb_rxq = rte_port->dev_info.nb_rx_queues;
4008 			nb_txq = rte_port->dev_info.nb_tx_queues;
4009 		} else {
4010 			nb_rxq = rte_port->dev_info.max_rx_queues;
4011 			nb_txq = rte_port->dev_info.max_tx_queues;
4012 		}
4013 	} else {
4014 		/*if vt is disabled, use all pf queues */
4015 		if (rte_port->dev_info.vmdq_pool_base == 0) {
4016 			nb_rxq = rte_port->dev_info.max_rx_queues;
4017 			nb_txq = rte_port->dev_info.max_tx_queues;
4018 		} else {
4019 			nb_rxq = (queueid_t)num_tcs;
4020 			nb_txq = (queueid_t)num_tcs;
4021 
4022 		}
4023 	}
4024 	rx_free_thresh = 64;
4025 
4026 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
4027 
4028 	rxtx_port_config(pid);
4029 	/* VLAN filter */
4030 	rte_port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4031 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
4032 		rx_vft_set(pid, vlan_tags[i], 1);
4033 
4034 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4035 	if (retval != 0)
4036 		return retval;
4037 
4038 	rte_port->dcb_flag = 1;
4039 
4040 	/* Enter DCB configuration status */
4041 	dcb_config = 1;
4042 
4043 	return 0;
4044 }
4045 
4046 static void
4047 init_port(void)
4048 {
4049 	int i;
4050 
4051 	/* Configuration of Ethernet ports. */
4052 	ports = rte_zmalloc("testpmd: ports",
4053 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4054 			    RTE_CACHE_LINE_SIZE);
4055 	if (ports == NULL) {
4056 		rte_exit(EXIT_FAILURE,
4057 				"rte_zmalloc(%d struct rte_port) failed\n",
4058 				RTE_MAX_ETHPORTS);
4059 	}
4060 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4061 		ports[i].xstats_info.allocated = false;
4062 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4063 		LIST_INIT(&ports[i].flow_tunnel_list);
4064 	/* Initialize ports NUMA structures */
4065 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4066 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4067 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4068 }
4069 
4070 static void
4071 force_quit(void)
4072 {
4073 	pmd_test_exit();
4074 	prompt_exit();
4075 }
4076 
4077 static void
4078 print_stats(void)
4079 {
4080 	uint8_t i;
4081 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4082 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4083 
4084 	/* Clear screen and move to top left */
4085 	printf("%s%s", clr, top_left);
4086 
4087 	printf("\nPort statistics ====================================");
4088 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4089 		nic_stats_display(fwd_ports_ids[i]);
4090 
4091 	fflush(stdout);
4092 }
4093 
4094 static void
4095 signal_handler(int signum)
4096 {
4097 	if (signum == SIGINT || signum == SIGTERM) {
4098 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4099 			signum);
4100 #ifdef RTE_LIB_PDUMP
4101 		/* uninitialize packet capture framework */
4102 		rte_pdump_uninit();
4103 #endif
4104 #ifdef RTE_LIB_LATENCYSTATS
4105 		if (latencystats_enabled != 0)
4106 			rte_latencystats_uninit();
4107 #endif
4108 		force_quit();
4109 		/* Set flag to indicate the force termination. */
4110 		f_quit = 1;
4111 		/* exit with the expected status */
4112 #ifndef RTE_EXEC_ENV_WINDOWS
4113 		signal(signum, SIG_DFL);
4114 		kill(getpid(), signum);
4115 #endif
4116 	}
4117 }
4118 
4119 int
4120 main(int argc, char** argv)
4121 {
4122 	int diag;
4123 	portid_t port_id;
4124 	uint16_t count;
4125 	int ret;
4126 
4127 	signal(SIGINT, signal_handler);
4128 	signal(SIGTERM, signal_handler);
4129 
4130 	testpmd_logtype = rte_log_register("testpmd");
4131 	if (testpmd_logtype < 0)
4132 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4133 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4134 
4135 	diag = rte_eal_init(argc, argv);
4136 	if (diag < 0)
4137 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4138 			 rte_strerror(rte_errno));
4139 
4140 	ret = register_eth_event_callback();
4141 	if (ret != 0)
4142 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4143 
4144 #ifdef RTE_LIB_PDUMP
4145 	/* initialize packet capture framework */
4146 	rte_pdump_init();
4147 #endif
4148 
4149 	count = 0;
4150 	RTE_ETH_FOREACH_DEV(port_id) {
4151 		ports_ids[count] = port_id;
4152 		count++;
4153 	}
4154 	nb_ports = (portid_t) count;
4155 	if (nb_ports == 0)
4156 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4157 
4158 	/* allocate port structures, and init them */
4159 	init_port();
4160 
4161 	set_def_fwd_config();
4162 	if (nb_lcores == 0)
4163 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4164 			 "Check the core mask argument\n");
4165 
4166 	/* Bitrate/latency stats disabled by default */
4167 #ifdef RTE_LIB_BITRATESTATS
4168 	bitrate_enabled = 0;
4169 #endif
4170 #ifdef RTE_LIB_LATENCYSTATS
4171 	latencystats_enabled = 0;
4172 #endif
4173 
4174 	/* on FreeBSD, mlockall() is disabled by default */
4175 #ifdef RTE_EXEC_ENV_FREEBSD
4176 	do_mlockall = 0;
4177 #else
4178 	do_mlockall = 1;
4179 #endif
4180 
4181 	argc -= diag;
4182 	argv += diag;
4183 	if (argc > 1)
4184 		launch_args_parse(argc, argv);
4185 
4186 #ifndef RTE_EXEC_ENV_WINDOWS
4187 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4188 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4189 			strerror(errno));
4190 	}
4191 #endif
4192 
4193 	if (tx_first && interactive)
4194 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4195 				"interactive mode.\n");
4196 
4197 	if (tx_first && lsc_interrupt) {
4198 		fprintf(stderr,
4199 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4200 		lsc_interrupt = 0;
4201 	}
4202 
4203 	if (!nb_rxq && !nb_txq)
4204 		fprintf(stderr,
4205 			"Warning: Either rx or tx queues should be non-zero\n");
4206 
4207 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4208 		fprintf(stderr,
4209 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4210 			nb_rxq, nb_txq);
4211 
4212 	init_config();
4213 
4214 	if (hot_plug) {
4215 		ret = rte_dev_hotplug_handle_enable();
4216 		if (ret) {
4217 			RTE_LOG(ERR, EAL,
4218 				"fail to enable hotplug handling.");
4219 			return -1;
4220 		}
4221 
4222 		ret = rte_dev_event_monitor_start();
4223 		if (ret) {
4224 			RTE_LOG(ERR, EAL,
4225 				"fail to start device event monitoring.");
4226 			return -1;
4227 		}
4228 
4229 		ret = rte_dev_event_callback_register(NULL,
4230 			dev_event_callback, NULL);
4231 		if (ret) {
4232 			RTE_LOG(ERR, EAL,
4233 				"fail  to register device event callback\n");
4234 			return -1;
4235 		}
4236 	}
4237 
4238 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4239 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4240 
4241 	/* set all ports to promiscuous mode by default */
4242 	RTE_ETH_FOREACH_DEV(port_id) {
4243 		ret = rte_eth_promiscuous_enable(port_id);
4244 		if (ret != 0)
4245 			fprintf(stderr,
4246 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4247 				port_id, rte_strerror(-ret));
4248 	}
4249 
4250 #ifdef RTE_LIB_METRICS
4251 	/* Init metrics library */
4252 	rte_metrics_init(rte_socket_id());
4253 #endif
4254 
4255 #ifdef RTE_LIB_LATENCYSTATS
4256 	if (latencystats_enabled != 0) {
4257 		int ret = rte_latencystats_init(1, NULL);
4258 		if (ret)
4259 			fprintf(stderr,
4260 				"Warning: latencystats init() returned error %d\n",
4261 				ret);
4262 		fprintf(stderr, "Latencystats running on lcore %d\n",
4263 			latencystats_lcore_id);
4264 	}
4265 #endif
4266 
4267 	/* Setup bitrate stats */
4268 #ifdef RTE_LIB_BITRATESTATS
4269 	if (bitrate_enabled != 0) {
4270 		bitrate_data = rte_stats_bitrate_create();
4271 		if (bitrate_data == NULL)
4272 			rte_exit(EXIT_FAILURE,
4273 				"Could not allocate bitrate data.\n");
4274 		rte_stats_bitrate_reg(bitrate_data);
4275 	}
4276 #endif
4277 #ifdef RTE_LIB_CMDLINE
4278 	if (strlen(cmdline_filename) != 0)
4279 		cmdline_read_from_file(cmdline_filename);
4280 
4281 	if (interactive == 1) {
4282 		if (auto_start) {
4283 			printf("Start automatic packet forwarding\n");
4284 			start_packet_forwarding(0);
4285 		}
4286 		prompt();
4287 		pmd_test_exit();
4288 	} else
4289 #endif
4290 	{
4291 		char c;
4292 		int rc;
4293 
4294 		f_quit = 0;
4295 
4296 		printf("No commandline core given, start packet forwarding\n");
4297 		start_packet_forwarding(tx_first);
4298 		if (stats_period != 0) {
4299 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4300 			uint64_t timer_period;
4301 
4302 			/* Convert to number of cycles */
4303 			timer_period = stats_period * rte_get_timer_hz();
4304 
4305 			while (f_quit == 0) {
4306 				cur_time = rte_get_timer_cycles();
4307 				diff_time += cur_time - prev_time;
4308 
4309 				if (diff_time >= timer_period) {
4310 					print_stats();
4311 					/* Reset the timer */
4312 					diff_time = 0;
4313 				}
4314 				/* Sleep to avoid unnecessary checks */
4315 				prev_time = cur_time;
4316 				rte_delay_us_sleep(US_PER_S);
4317 			}
4318 		}
4319 
4320 		printf("Press enter to exit\n");
4321 		rc = read(0, &c, 1);
4322 		pmd_test_exit();
4323 		if (rc < 0)
4324 			return 1;
4325 	}
4326 
4327 	ret = rte_eal_cleanup();
4328 	if (ret != 0)
4329 		rte_exit(EXIT_FAILURE,
4330 			 "EAL cleanup failed: %s\n", strerror(-ret));
4331 
4332 	return EXIT_SUCCESS;
4333 }
4334