xref: /dpdk/app/test-pmd/testpmd.c (revision 65744833651199c3bb3b996c6f0f9fe87b42c770)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68 
69 #include "testpmd.h"
70 
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77 
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84 
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87 
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90 
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96 
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105 
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111 
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120 
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126 
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132 
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138 
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147 
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;	       /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155 
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157 
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167 
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170 
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173 
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178 	&io_fwd_engine,
179 	&mac_fwd_engine,
180 	&mac_swap_engine,
181 	&flow_gen_engine,
182 	&rx_only_engine,
183 	&tx_only_engine,
184 	&csum_fwd_engine,
185 	&icmp_echo_engine,
186 	&noisy_vnf_engine,
187 	&five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189 	&ieee1588_fwd_engine,
190 #endif
191 	NULL,
192 };
193 
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196 
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202 
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205 	DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210 
211 /** Extended statistics to show. */
212 struct rte_eth_xstat_name *xstats_display;
213 
214 unsigned int xstats_display_num; /**< Size of extended statistics to show */
215 
216 /*
217  * In container, it cannot terminate the process which running with 'stats-period'
218  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
219  */
220 uint8_t f_quit;
221 
222 /*
223  * Max Rx frame size, set by '--max-pkt-len' parameter.
224  */
225 uint32_t max_rx_pkt_len;
226 
227 /*
228  * Configuration of packet segments used to scatter received packets
229  * if some of split features is configured.
230  */
231 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
232 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
233 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
234 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
235 
236 /*
237  * Configuration of packet segments used by the "txonly" processing engine.
238  */
239 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
240 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
241 	TXONLY_DEF_PACKET_LEN,
242 };
243 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
244 
245 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
246 /**< Split policy for packets to TX. */
247 
248 uint8_t txonly_multi_flow;
249 /**< Whether multiple flows are generated in TXONLY mode. */
250 
251 uint32_t tx_pkt_times_inter;
252 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
253 
254 uint32_t tx_pkt_times_intra;
255 /**< Timings for send scheduling in TXONLY mode, time between packets. */
256 
257 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
258 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
259 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
260 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
261 
262 /* current configuration is in DCB or not,0 means it is not in DCB mode */
263 uint8_t dcb_config = 0;
264 
265 /*
266  * Configurable number of RX/TX queues.
267  */
268 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
269 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
270 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
271 
272 /*
273  * Configurable number of RX/TX ring descriptors.
274  * Defaults are supplied by drivers via ethdev.
275  */
276 #define RTE_TEST_RX_DESC_DEFAULT 0
277 #define RTE_TEST_TX_DESC_DEFAULT 0
278 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
279 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
280 
281 #define RTE_PMD_PARAM_UNSET -1
282 /*
283  * Configurable values of RX and TX ring threshold registers.
284  */
285 
286 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
287 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
288 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
289 
290 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
291 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
292 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
293 
294 /*
295  * Configurable value of RX free threshold.
296  */
297 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
298 
299 /*
300  * Configurable value of RX drop enable.
301  */
302 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
303 
304 /*
305  * Configurable value of TX free threshold.
306  */
307 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
308 
309 /*
310  * Configurable value of TX RS bit threshold.
311  */
312 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
313 
314 /*
315  * Configurable value of buffered packets before sending.
316  */
317 uint16_t noisy_tx_sw_bufsz;
318 
319 /*
320  * Configurable value of packet buffer timeout.
321  */
322 uint16_t noisy_tx_sw_buf_flush_time;
323 
324 /*
325  * Configurable value for size of VNF internal memory area
326  * used for simulating noisy neighbour behaviour
327  */
328 uint64_t noisy_lkup_mem_sz;
329 
330 /*
331  * Configurable value of number of random writes done in
332  * VNF simulation memory area.
333  */
334 uint64_t noisy_lkup_num_writes;
335 
336 /*
337  * Configurable value of number of random reads done in
338  * VNF simulation memory area.
339  */
340 uint64_t noisy_lkup_num_reads;
341 
342 /*
343  * Configurable value of number of random reads/writes done in
344  * VNF simulation memory area.
345  */
346 uint64_t noisy_lkup_num_reads_writes;
347 
348 /*
349  * Receive Side Scaling (RSS) configuration.
350  */
351 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
352 
353 /*
354  * Port topology configuration
355  */
356 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
357 
358 /*
359  * Avoids to flush all the RX streams before starts forwarding.
360  */
361 uint8_t no_flush_rx = 0; /* flush by default */
362 
363 /*
364  * Flow API isolated mode.
365  */
366 uint8_t flow_isolate_all;
367 
368 /*
369  * Avoids to check link status when starting/stopping a port.
370  */
371 uint8_t no_link_check = 0; /* check by default */
372 
373 /*
374  * Don't automatically start all ports in interactive mode.
375  */
376 uint8_t no_device_start = 0;
377 
378 /*
379  * Enable link status change notification
380  */
381 uint8_t lsc_interrupt = 1; /* enabled by default */
382 
383 /*
384  * Enable device removal notification.
385  */
386 uint8_t rmv_interrupt = 1; /* enabled by default */
387 
388 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
389 
390 /* After attach, port setup is called on event or by iterator */
391 bool setup_on_probe_event = true;
392 
393 /* Clear ptypes on port initialization. */
394 uint8_t clear_ptypes = true;
395 
396 /* Hairpin ports configuration mode. */
397 uint16_t hairpin_mode;
398 
399 /* Pretty printing of ethdev events */
400 static const char * const eth_event_desc[] = {
401 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
402 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
403 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
404 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
405 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
406 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
407 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
408 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
409 	[RTE_ETH_EVENT_NEW] = "device probed",
410 	[RTE_ETH_EVENT_DESTROY] = "device released",
411 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
412 	[RTE_ETH_EVENT_MAX] = NULL,
413 };
414 
415 /*
416  * Display or mask ether events
417  * Default to all events except VF_MBOX
418  */
419 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
420 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
421 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
422 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
423 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
424 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
425 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
426 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
427 /*
428  * Decide if all memory are locked for performance.
429  */
430 int do_mlockall = 0;
431 
432 /*
433  * NIC bypass mode configuration options.
434  */
435 
436 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
437 /* The NIC bypass watchdog timeout. */
438 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
439 #endif
440 
441 
442 #ifdef RTE_LIB_LATENCYSTATS
443 
444 /*
445  * Set when latency stats is enabled in the commandline
446  */
447 uint8_t latencystats_enabled;
448 
449 /*
450  * Lcore ID to serive latency statistics.
451  */
452 lcoreid_t latencystats_lcore_id = -1;
453 
454 #endif
455 
456 /*
457  * Ethernet device configuration.
458  */
459 struct rte_eth_rxmode rx_mode;
460 
461 struct rte_eth_txmode tx_mode = {
462 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
463 };
464 
465 struct rte_fdir_conf fdir_conf = {
466 	.mode = RTE_FDIR_MODE_NONE,
467 	.pballoc = RTE_FDIR_PBALLOC_64K,
468 	.status = RTE_FDIR_REPORT_STATUS,
469 	.mask = {
470 		.vlan_tci_mask = 0xFFEF,
471 		.ipv4_mask     = {
472 			.src_ip = 0xFFFFFFFF,
473 			.dst_ip = 0xFFFFFFFF,
474 		},
475 		.ipv6_mask     = {
476 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
477 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
478 		},
479 		.src_port_mask = 0xFFFF,
480 		.dst_port_mask = 0xFFFF,
481 		.mac_addr_byte_mask = 0xFF,
482 		.tunnel_type_mask = 1,
483 		.tunnel_id_mask = 0xFFFFFFFF,
484 	},
485 	.drop_queue = 127,
486 };
487 
488 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
489 
490 /*
491  * Display zero values by default for xstats
492  */
493 uint8_t xstats_hide_zero;
494 
495 /*
496  * Measure of CPU cycles disabled by default
497  */
498 uint8_t record_core_cycles;
499 
500 /*
501  * Display of RX and TX bursts disabled by default
502  */
503 uint8_t record_burst_stats;
504 
505 /*
506  * Number of ports per shared Rx queue group, 0 disable.
507  */
508 uint32_t rxq_share;
509 
510 unsigned int num_sockets = 0;
511 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
512 
513 #ifdef RTE_LIB_BITRATESTATS
514 /* Bitrate statistics */
515 struct rte_stats_bitrates *bitrate_data;
516 lcoreid_t bitrate_lcore_id;
517 uint8_t bitrate_enabled;
518 #endif
519 
520 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
521 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
522 
523 /*
524  * hexadecimal bitmask of RX mq mode can be enabled.
525  */
526 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
527 
528 /*
529  * Used to set forced link speed
530  */
531 uint32_t eth_link_speed;
532 
533 /*
534  * ID of the current process in multi-process, used to
535  * configure the queues to be polled.
536  */
537 int proc_id;
538 
539 /*
540  * Number of processes in multi-process, used to
541  * configure the queues to be polled.
542  */
543 unsigned int num_procs = 1;
544 
545 static void
546 eth_rx_metadata_negotiate_mp(uint16_t port_id)
547 {
548 	uint64_t rx_meta_features = 0;
549 	int ret;
550 
551 	if (!is_proc_primary())
552 		return;
553 
554 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
555 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
556 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
557 
558 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
559 	if (ret == 0) {
560 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
561 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
562 				    port_id);
563 		}
564 
565 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
566 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
567 				    port_id);
568 		}
569 
570 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
571 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
572 				    port_id);
573 		}
574 	} else if (ret != -ENOTSUP) {
575 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
576 			 port_id, rte_strerror(-ret));
577 	}
578 }
579 
580 static void
581 flow_pick_transfer_proxy_mp(uint16_t port_id)
582 {
583 	struct rte_port *port = &ports[port_id];
584 	int ret;
585 
586 	port->flow_transfer_proxy = port_id;
587 
588 	if (!is_proc_primary())
589 		return;
590 
591 	ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
592 					   NULL);
593 	if (ret != 0) {
594 		fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
595 			port_id, rte_strerror(-ret));
596 	}
597 }
598 
599 static int
600 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
601 		      const struct rte_eth_conf *dev_conf)
602 {
603 	if (is_proc_primary())
604 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
605 					dev_conf);
606 	return 0;
607 }
608 
609 static int
610 eth_dev_start_mp(uint16_t port_id)
611 {
612 	if (is_proc_primary())
613 		return rte_eth_dev_start(port_id);
614 
615 	return 0;
616 }
617 
618 static int
619 eth_dev_stop_mp(uint16_t port_id)
620 {
621 	if (is_proc_primary())
622 		return rte_eth_dev_stop(port_id);
623 
624 	return 0;
625 }
626 
627 static void
628 mempool_free_mp(struct rte_mempool *mp)
629 {
630 	if (is_proc_primary())
631 		rte_mempool_free(mp);
632 }
633 
634 static int
635 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
636 {
637 	if (is_proc_primary())
638 		return rte_eth_dev_set_mtu(port_id, mtu);
639 
640 	return 0;
641 }
642 
643 /* Forward function declarations */
644 static void setup_attached_port(portid_t pi);
645 static void check_all_ports_link_status(uint32_t port_mask);
646 static int eth_event_callback(portid_t port_id,
647 			      enum rte_eth_event_type type,
648 			      void *param, void *ret_param);
649 static void dev_event_callback(const char *device_name,
650 				enum rte_dev_event_type type,
651 				void *param);
652 static void fill_xstats_display_info(void);
653 
654 /*
655  * Check if all the ports are started.
656  * If yes, return positive value. If not, return zero.
657  */
658 static int all_ports_started(void);
659 
660 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
661 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
662 
663 /* Holds the registered mbuf dynamic flags names. */
664 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
665 
666 
667 /*
668  * Helper function to check if socket is already discovered.
669  * If yes, return positive value. If not, return zero.
670  */
671 int
672 new_socket_id(unsigned int socket_id)
673 {
674 	unsigned int i;
675 
676 	for (i = 0; i < num_sockets; i++) {
677 		if (socket_ids[i] == socket_id)
678 			return 0;
679 	}
680 	return 1;
681 }
682 
683 /*
684  * Setup default configuration.
685  */
686 static void
687 set_default_fwd_lcores_config(void)
688 {
689 	unsigned int i;
690 	unsigned int nb_lc;
691 	unsigned int sock_num;
692 
693 	nb_lc = 0;
694 	for (i = 0; i < RTE_MAX_LCORE; i++) {
695 		if (!rte_lcore_is_enabled(i))
696 			continue;
697 		sock_num = rte_lcore_to_socket_id(i);
698 		if (new_socket_id(sock_num)) {
699 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
700 				rte_exit(EXIT_FAILURE,
701 					 "Total sockets greater than %u\n",
702 					 RTE_MAX_NUMA_NODES);
703 			}
704 			socket_ids[num_sockets++] = sock_num;
705 		}
706 		if (i == rte_get_main_lcore())
707 			continue;
708 		fwd_lcores_cpuids[nb_lc++] = i;
709 	}
710 	nb_lcores = (lcoreid_t) nb_lc;
711 	nb_cfg_lcores = nb_lcores;
712 	nb_fwd_lcores = 1;
713 }
714 
715 static void
716 set_def_peer_eth_addrs(void)
717 {
718 	portid_t i;
719 
720 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
721 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
722 		peer_eth_addrs[i].addr_bytes[5] = i;
723 	}
724 }
725 
726 static void
727 set_default_fwd_ports_config(void)
728 {
729 	portid_t pt_id;
730 	int i = 0;
731 
732 	RTE_ETH_FOREACH_DEV(pt_id) {
733 		fwd_ports_ids[i++] = pt_id;
734 
735 		/* Update sockets info according to the attached device */
736 		int socket_id = rte_eth_dev_socket_id(pt_id);
737 		if (socket_id >= 0 && new_socket_id(socket_id)) {
738 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
739 				rte_exit(EXIT_FAILURE,
740 					 "Total sockets greater than %u\n",
741 					 RTE_MAX_NUMA_NODES);
742 			}
743 			socket_ids[num_sockets++] = socket_id;
744 		}
745 	}
746 
747 	nb_cfg_ports = nb_ports;
748 	nb_fwd_ports = nb_ports;
749 }
750 
751 void
752 set_def_fwd_config(void)
753 {
754 	set_default_fwd_lcores_config();
755 	set_def_peer_eth_addrs();
756 	set_default_fwd_ports_config();
757 }
758 
759 #ifndef RTE_EXEC_ENV_WINDOWS
760 /* extremely pessimistic estimation of memory required to create a mempool */
761 static int
762 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
763 {
764 	unsigned int n_pages, mbuf_per_pg, leftover;
765 	uint64_t total_mem, mbuf_mem, obj_sz;
766 
767 	/* there is no good way to predict how much space the mempool will
768 	 * occupy because it will allocate chunks on the fly, and some of those
769 	 * will come from default DPDK memory while some will come from our
770 	 * external memory, so just assume 128MB will be enough for everyone.
771 	 */
772 	uint64_t hdr_mem = 128 << 20;
773 
774 	/* account for possible non-contiguousness */
775 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
776 	if (obj_sz > pgsz) {
777 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
778 		return -1;
779 	}
780 
781 	mbuf_per_pg = pgsz / obj_sz;
782 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
783 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
784 
785 	mbuf_mem = n_pages * pgsz;
786 
787 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
788 
789 	if (total_mem > SIZE_MAX) {
790 		TESTPMD_LOG(ERR, "Memory size too big\n");
791 		return -1;
792 	}
793 	*out = (size_t)total_mem;
794 
795 	return 0;
796 }
797 
798 static int
799 pagesz_flags(uint64_t page_sz)
800 {
801 	/* as per mmap() manpage, all page sizes are log2 of page size
802 	 * shifted by MAP_HUGE_SHIFT
803 	 */
804 	int log2 = rte_log2_u64(page_sz);
805 
806 	return (log2 << HUGE_SHIFT);
807 }
808 
809 static void *
810 alloc_mem(size_t memsz, size_t pgsz, bool huge)
811 {
812 	void *addr;
813 	int flags;
814 
815 	/* allocate anonymous hugepages */
816 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
817 	if (huge)
818 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
819 
820 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
821 	if (addr == MAP_FAILED)
822 		return NULL;
823 
824 	return addr;
825 }
826 
827 struct extmem_param {
828 	void *addr;
829 	size_t len;
830 	size_t pgsz;
831 	rte_iova_t *iova_table;
832 	unsigned int iova_table_len;
833 };
834 
835 static int
836 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
837 		bool huge)
838 {
839 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
840 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
841 	unsigned int cur_page, n_pages, pgsz_idx;
842 	size_t mem_sz, cur_pgsz;
843 	rte_iova_t *iovas = NULL;
844 	void *addr;
845 	int ret;
846 
847 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
848 		/* skip anything that is too big */
849 		if (pgsizes[pgsz_idx] > SIZE_MAX)
850 			continue;
851 
852 		cur_pgsz = pgsizes[pgsz_idx];
853 
854 		/* if we were told not to allocate hugepages, override */
855 		if (!huge)
856 			cur_pgsz = sysconf(_SC_PAGESIZE);
857 
858 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
859 		if (ret < 0) {
860 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
861 			return -1;
862 		}
863 
864 		/* allocate our memory */
865 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
866 
867 		/* if we couldn't allocate memory with a specified page size,
868 		 * that doesn't mean we can't do it with other page sizes, so
869 		 * try another one.
870 		 */
871 		if (addr == NULL)
872 			continue;
873 
874 		/* store IOVA addresses for every page in this memory area */
875 		n_pages = mem_sz / cur_pgsz;
876 
877 		iovas = malloc(sizeof(*iovas) * n_pages);
878 
879 		if (iovas == NULL) {
880 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
881 			goto fail;
882 		}
883 		/* lock memory if it's not huge pages */
884 		if (!huge)
885 			mlock(addr, mem_sz);
886 
887 		/* populate IOVA addresses */
888 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
889 			rte_iova_t iova;
890 			size_t offset;
891 			void *cur;
892 
893 			offset = cur_pgsz * cur_page;
894 			cur = RTE_PTR_ADD(addr, offset);
895 
896 			/* touch the page before getting its IOVA */
897 			*(volatile char *)cur = 0;
898 
899 			iova = rte_mem_virt2iova(cur);
900 
901 			iovas[cur_page] = iova;
902 		}
903 
904 		break;
905 	}
906 	/* if we couldn't allocate anything */
907 	if (iovas == NULL)
908 		return -1;
909 
910 	param->addr = addr;
911 	param->len = mem_sz;
912 	param->pgsz = cur_pgsz;
913 	param->iova_table = iovas;
914 	param->iova_table_len = n_pages;
915 
916 	return 0;
917 fail:
918 	if (iovas)
919 		free(iovas);
920 	if (addr)
921 		munmap(addr, mem_sz);
922 
923 	return -1;
924 }
925 
926 static int
927 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
928 {
929 	struct extmem_param param;
930 	int socket_id, ret;
931 
932 	memset(&param, 0, sizeof(param));
933 
934 	/* check if our heap exists */
935 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
936 	if (socket_id < 0) {
937 		/* create our heap */
938 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
939 		if (ret < 0) {
940 			TESTPMD_LOG(ERR, "Cannot create heap\n");
941 			return -1;
942 		}
943 	}
944 
945 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
946 	if (ret < 0) {
947 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
948 		return -1;
949 	}
950 
951 	/* we now have a valid memory area, so add it to heap */
952 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
953 			param.addr, param.len, param.iova_table,
954 			param.iova_table_len, param.pgsz);
955 
956 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
957 
958 	/* not needed any more */
959 	free(param.iova_table);
960 
961 	if (ret < 0) {
962 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
963 		munmap(param.addr, param.len);
964 		return -1;
965 	}
966 
967 	/* success */
968 
969 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
970 			param.len >> 20);
971 
972 	return 0;
973 }
974 static void
975 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
976 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
977 {
978 	uint16_t pid = 0;
979 	int ret;
980 
981 	RTE_ETH_FOREACH_DEV(pid) {
982 		struct rte_eth_dev_info dev_info;
983 
984 		ret = eth_dev_info_get_print_err(pid, &dev_info);
985 		if (ret != 0) {
986 			TESTPMD_LOG(DEBUG,
987 				    "unable to get device info for port %d on addr 0x%p,"
988 				    "mempool unmapping will not be performed\n",
989 				    pid, memhdr->addr);
990 			continue;
991 		}
992 
993 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
994 		if (ret) {
995 			TESTPMD_LOG(DEBUG,
996 				    "unable to DMA unmap addr 0x%p "
997 				    "for device %s\n",
998 				    memhdr->addr, dev_info.device->name);
999 		}
1000 	}
1001 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
1002 	if (ret) {
1003 		TESTPMD_LOG(DEBUG,
1004 			    "unable to un-register addr 0x%p\n", memhdr->addr);
1005 	}
1006 }
1007 
1008 static void
1009 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1010 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1011 {
1012 	uint16_t pid = 0;
1013 	size_t page_size = sysconf(_SC_PAGESIZE);
1014 	int ret;
1015 
1016 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1017 				  page_size);
1018 	if (ret) {
1019 		TESTPMD_LOG(DEBUG,
1020 			    "unable to register addr 0x%p\n", memhdr->addr);
1021 		return;
1022 	}
1023 	RTE_ETH_FOREACH_DEV(pid) {
1024 		struct rte_eth_dev_info dev_info;
1025 
1026 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1027 		if (ret != 0) {
1028 			TESTPMD_LOG(DEBUG,
1029 				    "unable to get device info for port %d on addr 0x%p,"
1030 				    "mempool mapping will not be performed\n",
1031 				    pid, memhdr->addr);
1032 			continue;
1033 		}
1034 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1035 		if (ret) {
1036 			TESTPMD_LOG(DEBUG,
1037 				    "unable to DMA map addr 0x%p "
1038 				    "for device %s\n",
1039 				    memhdr->addr, dev_info.device->name);
1040 		}
1041 	}
1042 }
1043 #endif
1044 
1045 static unsigned int
1046 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1047 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1048 {
1049 	struct rte_pktmbuf_extmem *xmem;
1050 	unsigned int ext_num, zone_num, elt_num;
1051 	uint16_t elt_size;
1052 
1053 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1054 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1055 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1056 
1057 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1058 	if (xmem == NULL) {
1059 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1060 				 "external buffer descriptors\n");
1061 		*ext_mem = NULL;
1062 		return 0;
1063 	}
1064 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1065 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1066 		const struct rte_memzone *mz;
1067 		char mz_name[RTE_MEMZONE_NAMESIZE];
1068 		int ret;
1069 
1070 		ret = snprintf(mz_name, sizeof(mz_name),
1071 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1072 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1073 			errno = ENAMETOOLONG;
1074 			ext_num = 0;
1075 			break;
1076 		}
1077 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1078 						 socket_id,
1079 						 RTE_MEMZONE_IOVA_CONTIG |
1080 						 RTE_MEMZONE_1GB |
1081 						 RTE_MEMZONE_SIZE_HINT_ONLY,
1082 						 EXTBUF_ZONE_SIZE);
1083 		if (mz == NULL) {
1084 			/*
1085 			 * The caller exits on external buffer creation
1086 			 * error, so there is no need to free memzones.
1087 			 */
1088 			errno = ENOMEM;
1089 			ext_num = 0;
1090 			break;
1091 		}
1092 		xseg->buf_ptr = mz->addr;
1093 		xseg->buf_iova = mz->iova;
1094 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1095 		xseg->elt_size = elt_size;
1096 	}
1097 	if (ext_num == 0 && xmem != NULL) {
1098 		free(xmem);
1099 		xmem = NULL;
1100 	}
1101 	*ext_mem = xmem;
1102 	return ext_num;
1103 }
1104 
1105 /*
1106  * Configuration initialisation done once at init time.
1107  */
1108 static struct rte_mempool *
1109 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1110 		 unsigned int socket_id, uint16_t size_idx)
1111 {
1112 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1113 	struct rte_mempool *rte_mp = NULL;
1114 #ifndef RTE_EXEC_ENV_WINDOWS
1115 	uint32_t mb_size;
1116 
1117 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1118 #endif
1119 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1120 	if (!is_proc_primary()) {
1121 		rte_mp = rte_mempool_lookup(pool_name);
1122 		if (rte_mp == NULL)
1123 			rte_exit(EXIT_FAILURE,
1124 				"Get mbuf pool for socket %u failed: %s\n",
1125 				socket_id, rte_strerror(rte_errno));
1126 		return rte_mp;
1127 	}
1128 
1129 	TESTPMD_LOG(INFO,
1130 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1131 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1132 
1133 	switch (mp_alloc_type) {
1134 	case MP_ALLOC_NATIVE:
1135 		{
1136 			/* wrapper to rte_mempool_create() */
1137 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1138 					rte_mbuf_best_mempool_ops());
1139 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1140 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1141 			break;
1142 		}
1143 #ifndef RTE_EXEC_ENV_WINDOWS
1144 	case MP_ALLOC_ANON:
1145 		{
1146 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1147 				mb_size, (unsigned int) mb_mempool_cache,
1148 				sizeof(struct rte_pktmbuf_pool_private),
1149 				socket_id, mempool_flags);
1150 			if (rte_mp == NULL)
1151 				goto err;
1152 
1153 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1154 				rte_mempool_free(rte_mp);
1155 				rte_mp = NULL;
1156 				goto err;
1157 			}
1158 			rte_pktmbuf_pool_init(rte_mp, NULL);
1159 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1160 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1161 			break;
1162 		}
1163 	case MP_ALLOC_XMEM:
1164 	case MP_ALLOC_XMEM_HUGE:
1165 		{
1166 			int heap_socket;
1167 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1168 
1169 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1170 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1171 
1172 			heap_socket =
1173 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1174 			if (heap_socket < 0)
1175 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1176 
1177 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1178 					rte_mbuf_best_mempool_ops());
1179 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1180 					mb_mempool_cache, 0, mbuf_seg_size,
1181 					heap_socket);
1182 			break;
1183 		}
1184 #endif
1185 	case MP_ALLOC_XBUF:
1186 		{
1187 			struct rte_pktmbuf_extmem *ext_mem;
1188 			unsigned int ext_num;
1189 
1190 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1191 					       socket_id, pool_name, &ext_mem);
1192 			if (ext_num == 0)
1193 				rte_exit(EXIT_FAILURE,
1194 					 "Can't create pinned data buffers\n");
1195 
1196 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1197 					rte_mbuf_best_mempool_ops());
1198 			rte_mp = rte_pktmbuf_pool_create_extbuf
1199 					(pool_name, nb_mbuf, mb_mempool_cache,
1200 					 0, mbuf_seg_size, socket_id,
1201 					 ext_mem, ext_num);
1202 			free(ext_mem);
1203 			break;
1204 		}
1205 	default:
1206 		{
1207 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1208 		}
1209 	}
1210 
1211 #ifndef RTE_EXEC_ENV_WINDOWS
1212 err:
1213 #endif
1214 	if (rte_mp == NULL) {
1215 		rte_exit(EXIT_FAILURE,
1216 			"Creation of mbuf pool for socket %u failed: %s\n",
1217 			socket_id, rte_strerror(rte_errno));
1218 	} else if (verbose_level > 0) {
1219 		rte_mempool_dump(stdout, rte_mp);
1220 	}
1221 	return rte_mp;
1222 }
1223 
1224 /*
1225  * Check given socket id is valid or not with NUMA mode,
1226  * if valid, return 0, else return -1
1227  */
1228 static int
1229 check_socket_id(const unsigned int socket_id)
1230 {
1231 	static int warning_once = 0;
1232 
1233 	if (new_socket_id(socket_id)) {
1234 		if (!warning_once && numa_support)
1235 			fprintf(stderr,
1236 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1237 		warning_once = 1;
1238 		return -1;
1239 	}
1240 	return 0;
1241 }
1242 
1243 /*
1244  * Get the allowed maximum number of RX queues.
1245  * *pid return the port id which has minimal value of
1246  * max_rx_queues in all ports.
1247  */
1248 queueid_t
1249 get_allowed_max_nb_rxq(portid_t *pid)
1250 {
1251 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1252 	bool max_rxq_valid = false;
1253 	portid_t pi;
1254 	struct rte_eth_dev_info dev_info;
1255 
1256 	RTE_ETH_FOREACH_DEV(pi) {
1257 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1258 			continue;
1259 
1260 		max_rxq_valid = true;
1261 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1262 			allowed_max_rxq = dev_info.max_rx_queues;
1263 			*pid = pi;
1264 		}
1265 	}
1266 	return max_rxq_valid ? allowed_max_rxq : 0;
1267 }
1268 
1269 /*
1270  * Check input rxq is valid or not.
1271  * If input rxq is not greater than any of maximum number
1272  * of RX queues of all ports, it is valid.
1273  * if valid, return 0, else return -1
1274  */
1275 int
1276 check_nb_rxq(queueid_t rxq)
1277 {
1278 	queueid_t allowed_max_rxq;
1279 	portid_t pid = 0;
1280 
1281 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1282 	if (rxq > allowed_max_rxq) {
1283 		fprintf(stderr,
1284 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1285 			rxq, allowed_max_rxq, pid);
1286 		return -1;
1287 	}
1288 	return 0;
1289 }
1290 
1291 /*
1292  * Get the allowed maximum number of TX queues.
1293  * *pid return the port id which has minimal value of
1294  * max_tx_queues in all ports.
1295  */
1296 queueid_t
1297 get_allowed_max_nb_txq(portid_t *pid)
1298 {
1299 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1300 	bool max_txq_valid = false;
1301 	portid_t pi;
1302 	struct rte_eth_dev_info dev_info;
1303 
1304 	RTE_ETH_FOREACH_DEV(pi) {
1305 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1306 			continue;
1307 
1308 		max_txq_valid = true;
1309 		if (dev_info.max_tx_queues < allowed_max_txq) {
1310 			allowed_max_txq = dev_info.max_tx_queues;
1311 			*pid = pi;
1312 		}
1313 	}
1314 	return max_txq_valid ? allowed_max_txq : 0;
1315 }
1316 
1317 /*
1318  * Check input txq is valid or not.
1319  * If input txq is not greater than any of maximum number
1320  * of TX queues of all ports, it is valid.
1321  * if valid, return 0, else return -1
1322  */
1323 int
1324 check_nb_txq(queueid_t txq)
1325 {
1326 	queueid_t allowed_max_txq;
1327 	portid_t pid = 0;
1328 
1329 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1330 	if (txq > allowed_max_txq) {
1331 		fprintf(stderr,
1332 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1333 			txq, allowed_max_txq, pid);
1334 		return -1;
1335 	}
1336 	return 0;
1337 }
1338 
1339 /*
1340  * Get the allowed maximum number of RXDs of every rx queue.
1341  * *pid return the port id which has minimal value of
1342  * max_rxd in all queues of all ports.
1343  */
1344 static uint16_t
1345 get_allowed_max_nb_rxd(portid_t *pid)
1346 {
1347 	uint16_t allowed_max_rxd = UINT16_MAX;
1348 	portid_t pi;
1349 	struct rte_eth_dev_info dev_info;
1350 
1351 	RTE_ETH_FOREACH_DEV(pi) {
1352 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1353 			continue;
1354 
1355 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1356 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1357 			*pid = pi;
1358 		}
1359 	}
1360 	return allowed_max_rxd;
1361 }
1362 
1363 /*
1364  * Get the allowed minimal number of RXDs of every rx queue.
1365  * *pid return the port id which has minimal value of
1366  * min_rxd in all queues of all ports.
1367  */
1368 static uint16_t
1369 get_allowed_min_nb_rxd(portid_t *pid)
1370 {
1371 	uint16_t allowed_min_rxd = 0;
1372 	portid_t pi;
1373 	struct rte_eth_dev_info dev_info;
1374 
1375 	RTE_ETH_FOREACH_DEV(pi) {
1376 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1377 			continue;
1378 
1379 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1380 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1381 			*pid = pi;
1382 		}
1383 	}
1384 
1385 	return allowed_min_rxd;
1386 }
1387 
1388 /*
1389  * Check input rxd is valid or not.
1390  * If input rxd is not greater than any of maximum number
1391  * of RXDs of every Rx queues and is not less than any of
1392  * minimal number of RXDs of every Rx queues, it is valid.
1393  * if valid, return 0, else return -1
1394  */
1395 int
1396 check_nb_rxd(queueid_t rxd)
1397 {
1398 	uint16_t allowed_max_rxd;
1399 	uint16_t allowed_min_rxd;
1400 	portid_t pid = 0;
1401 
1402 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1403 	if (rxd > allowed_max_rxd) {
1404 		fprintf(stderr,
1405 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1406 			rxd, allowed_max_rxd, pid);
1407 		return -1;
1408 	}
1409 
1410 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1411 	if (rxd < allowed_min_rxd) {
1412 		fprintf(stderr,
1413 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1414 			rxd, allowed_min_rxd, pid);
1415 		return -1;
1416 	}
1417 
1418 	return 0;
1419 }
1420 
1421 /*
1422  * Get the allowed maximum number of TXDs of every rx queues.
1423  * *pid return the port id which has minimal value of
1424  * max_txd in every tx queue.
1425  */
1426 static uint16_t
1427 get_allowed_max_nb_txd(portid_t *pid)
1428 {
1429 	uint16_t allowed_max_txd = UINT16_MAX;
1430 	portid_t pi;
1431 	struct rte_eth_dev_info dev_info;
1432 
1433 	RTE_ETH_FOREACH_DEV(pi) {
1434 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1435 			continue;
1436 
1437 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1438 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1439 			*pid = pi;
1440 		}
1441 	}
1442 	return allowed_max_txd;
1443 }
1444 
1445 /*
1446  * Get the allowed maximum number of TXDs of every tx queues.
1447  * *pid return the port id which has minimal value of
1448  * min_txd in every tx queue.
1449  */
1450 static uint16_t
1451 get_allowed_min_nb_txd(portid_t *pid)
1452 {
1453 	uint16_t allowed_min_txd = 0;
1454 	portid_t pi;
1455 	struct rte_eth_dev_info dev_info;
1456 
1457 	RTE_ETH_FOREACH_DEV(pi) {
1458 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1459 			continue;
1460 
1461 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1462 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1463 			*pid = pi;
1464 		}
1465 	}
1466 
1467 	return allowed_min_txd;
1468 }
1469 
1470 /*
1471  * Check input txd is valid or not.
1472  * If input txd is not greater than any of maximum number
1473  * of TXDs of every Rx queues, it is valid.
1474  * if valid, return 0, else return -1
1475  */
1476 int
1477 check_nb_txd(queueid_t txd)
1478 {
1479 	uint16_t allowed_max_txd;
1480 	uint16_t allowed_min_txd;
1481 	portid_t pid = 0;
1482 
1483 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1484 	if (txd > allowed_max_txd) {
1485 		fprintf(stderr,
1486 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1487 			txd, allowed_max_txd, pid);
1488 		return -1;
1489 	}
1490 
1491 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1492 	if (txd < allowed_min_txd) {
1493 		fprintf(stderr,
1494 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1495 			txd, allowed_min_txd, pid);
1496 		return -1;
1497 	}
1498 	return 0;
1499 }
1500 
1501 
1502 /*
1503  * Get the allowed maximum number of hairpin queues.
1504  * *pid return the port id which has minimal value of
1505  * max_hairpin_queues in all ports.
1506  */
1507 queueid_t
1508 get_allowed_max_nb_hairpinq(portid_t *pid)
1509 {
1510 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1511 	portid_t pi;
1512 	struct rte_eth_hairpin_cap cap;
1513 
1514 	RTE_ETH_FOREACH_DEV(pi) {
1515 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1516 			*pid = pi;
1517 			return 0;
1518 		}
1519 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1520 			allowed_max_hairpinq = cap.max_nb_queues;
1521 			*pid = pi;
1522 		}
1523 	}
1524 	return allowed_max_hairpinq;
1525 }
1526 
1527 /*
1528  * Check input hairpin is valid or not.
1529  * If input hairpin is not greater than any of maximum number
1530  * of hairpin queues of all ports, it is valid.
1531  * if valid, return 0, else return -1
1532  */
1533 int
1534 check_nb_hairpinq(queueid_t hairpinq)
1535 {
1536 	queueid_t allowed_max_hairpinq;
1537 	portid_t pid = 0;
1538 
1539 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1540 	if (hairpinq > allowed_max_hairpinq) {
1541 		fprintf(stderr,
1542 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1543 			hairpinq, allowed_max_hairpinq, pid);
1544 		return -1;
1545 	}
1546 	return 0;
1547 }
1548 
1549 static int
1550 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1551 {
1552 	uint32_t eth_overhead;
1553 
1554 	if (dev_info->max_mtu != UINT16_MAX &&
1555 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1556 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1557 	else
1558 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1559 
1560 	return eth_overhead;
1561 }
1562 
1563 static void
1564 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1565 {
1566 	struct rte_port *port = &ports[pid];
1567 	int ret;
1568 	int i;
1569 
1570 	eth_rx_metadata_negotiate_mp(pid);
1571 	flow_pick_transfer_proxy_mp(pid);
1572 
1573 	port->dev_conf.txmode = tx_mode;
1574 	port->dev_conf.rxmode = rx_mode;
1575 
1576 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1577 	if (ret != 0)
1578 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1579 
1580 	if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1581 		port->dev_conf.txmode.offloads &=
1582 			~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1583 
1584 	/* Apply Rx offloads configuration */
1585 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1586 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1587 	/* Apply Tx offloads configuration */
1588 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1589 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1590 
1591 	if (eth_link_speed)
1592 		port->dev_conf.link_speeds = eth_link_speed;
1593 
1594 	if (max_rx_pkt_len)
1595 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1596 			get_eth_overhead(&port->dev_info);
1597 
1598 	/* set flag to initialize port/queue */
1599 	port->need_reconfig = 1;
1600 	port->need_reconfig_queues = 1;
1601 	port->socket_id = socket_id;
1602 	port->tx_metadata = 0;
1603 
1604 	/*
1605 	 * Check for maximum number of segments per MTU.
1606 	 * Accordingly update the mbuf data size.
1607 	 */
1608 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1609 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1610 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1611 		uint16_t mtu;
1612 
1613 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1614 			uint16_t data_size = (mtu + eth_overhead) /
1615 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1616 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1617 
1618 			if (buffer_size > mbuf_data_size[0]) {
1619 				mbuf_data_size[0] = buffer_size;
1620 				TESTPMD_LOG(WARNING,
1621 					"Configured mbuf size of the first segment %hu\n",
1622 					mbuf_data_size[0]);
1623 			}
1624 		}
1625 	}
1626 }
1627 
1628 static void
1629 init_config(void)
1630 {
1631 	portid_t pid;
1632 	struct rte_mempool *mbp;
1633 	unsigned int nb_mbuf_per_pool;
1634 	lcoreid_t  lc_id;
1635 	struct rte_gro_param gro_param;
1636 	uint32_t gso_types;
1637 
1638 	/* Configuration of logical cores. */
1639 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1640 				sizeof(struct fwd_lcore *) * nb_lcores,
1641 				RTE_CACHE_LINE_SIZE);
1642 	if (fwd_lcores == NULL) {
1643 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1644 							"failed\n", nb_lcores);
1645 	}
1646 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1647 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1648 					       sizeof(struct fwd_lcore),
1649 					       RTE_CACHE_LINE_SIZE);
1650 		if (fwd_lcores[lc_id] == NULL) {
1651 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1652 								"failed\n");
1653 		}
1654 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1655 	}
1656 
1657 	RTE_ETH_FOREACH_DEV(pid) {
1658 		uint32_t socket_id;
1659 
1660 		if (numa_support) {
1661 			socket_id = port_numa[pid];
1662 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1663 				socket_id = rte_eth_dev_socket_id(pid);
1664 
1665 				/*
1666 				 * if socket_id is invalid,
1667 				 * set to the first available socket.
1668 				 */
1669 				if (check_socket_id(socket_id) < 0)
1670 					socket_id = socket_ids[0];
1671 			}
1672 		} else {
1673 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1674 				    0 : socket_num;
1675 		}
1676 		/* Apply default TxRx configuration for all ports */
1677 		init_config_port_offloads(pid, socket_id);
1678 	}
1679 	/*
1680 	 * Create pools of mbuf.
1681 	 * If NUMA support is disabled, create a single pool of mbuf in
1682 	 * socket 0 memory by default.
1683 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1684 	 *
1685 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1686 	 * nb_txd can be configured at run time.
1687 	 */
1688 	if (param_total_num_mbufs)
1689 		nb_mbuf_per_pool = param_total_num_mbufs;
1690 	else {
1691 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1692 			(nb_lcores * mb_mempool_cache) +
1693 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1694 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1695 	}
1696 
1697 	if (numa_support) {
1698 		uint8_t i, j;
1699 
1700 		for (i = 0; i < num_sockets; i++)
1701 			for (j = 0; j < mbuf_data_size_n; j++)
1702 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1703 					mbuf_pool_create(mbuf_data_size[j],
1704 							  nb_mbuf_per_pool,
1705 							  socket_ids[i], j);
1706 	} else {
1707 		uint8_t i;
1708 
1709 		for (i = 0; i < mbuf_data_size_n; i++)
1710 			mempools[i] = mbuf_pool_create
1711 					(mbuf_data_size[i],
1712 					 nb_mbuf_per_pool,
1713 					 socket_num == UMA_NO_CONFIG ?
1714 					 0 : socket_num, i);
1715 	}
1716 
1717 	init_port_config();
1718 
1719 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1720 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1721 	/*
1722 	 * Records which Mbuf pool to use by each logical core, if needed.
1723 	 */
1724 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1725 		mbp = mbuf_pool_find(
1726 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1727 
1728 		if (mbp == NULL)
1729 			mbp = mbuf_pool_find(0, 0);
1730 		fwd_lcores[lc_id]->mbp = mbp;
1731 		/* initialize GSO context */
1732 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1733 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1734 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1735 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1736 			RTE_ETHER_CRC_LEN;
1737 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1738 	}
1739 
1740 	fwd_config_setup();
1741 
1742 	/* create a gro context for each lcore */
1743 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1744 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1745 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1746 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1747 		gro_param.socket_id = rte_lcore_to_socket_id(
1748 				fwd_lcores_cpuids[lc_id]);
1749 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1750 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1751 			rte_exit(EXIT_FAILURE,
1752 					"rte_gro_ctx_create() failed\n");
1753 		}
1754 	}
1755 }
1756 
1757 
1758 void
1759 reconfig(portid_t new_port_id, unsigned socket_id)
1760 {
1761 	/* Reconfiguration of Ethernet ports. */
1762 	init_config_port_offloads(new_port_id, socket_id);
1763 	init_port_config();
1764 }
1765 
1766 
1767 int
1768 init_fwd_streams(void)
1769 {
1770 	portid_t pid;
1771 	struct rte_port *port;
1772 	streamid_t sm_id, nb_fwd_streams_new;
1773 	queueid_t q;
1774 
1775 	/* set socket id according to numa or not */
1776 	RTE_ETH_FOREACH_DEV(pid) {
1777 		port = &ports[pid];
1778 		if (nb_rxq > port->dev_info.max_rx_queues) {
1779 			fprintf(stderr,
1780 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1781 				nb_rxq, port->dev_info.max_rx_queues);
1782 			return -1;
1783 		}
1784 		if (nb_txq > port->dev_info.max_tx_queues) {
1785 			fprintf(stderr,
1786 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1787 				nb_txq, port->dev_info.max_tx_queues);
1788 			return -1;
1789 		}
1790 		if (numa_support) {
1791 			if (port_numa[pid] != NUMA_NO_CONFIG)
1792 				port->socket_id = port_numa[pid];
1793 			else {
1794 				port->socket_id = rte_eth_dev_socket_id(pid);
1795 
1796 				/*
1797 				 * if socket_id is invalid,
1798 				 * set to the first available socket.
1799 				 */
1800 				if (check_socket_id(port->socket_id) < 0)
1801 					port->socket_id = socket_ids[0];
1802 			}
1803 		}
1804 		else {
1805 			if (socket_num == UMA_NO_CONFIG)
1806 				port->socket_id = 0;
1807 			else
1808 				port->socket_id = socket_num;
1809 		}
1810 	}
1811 
1812 	q = RTE_MAX(nb_rxq, nb_txq);
1813 	if (q == 0) {
1814 		fprintf(stderr,
1815 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1816 		return -1;
1817 	}
1818 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1819 	if (nb_fwd_streams_new == nb_fwd_streams)
1820 		return 0;
1821 	/* clear the old */
1822 	if (fwd_streams != NULL) {
1823 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1824 			if (fwd_streams[sm_id] == NULL)
1825 				continue;
1826 			rte_free(fwd_streams[sm_id]);
1827 			fwd_streams[sm_id] = NULL;
1828 		}
1829 		rte_free(fwd_streams);
1830 		fwd_streams = NULL;
1831 	}
1832 
1833 	/* init new */
1834 	nb_fwd_streams = nb_fwd_streams_new;
1835 	if (nb_fwd_streams) {
1836 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1837 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1838 			RTE_CACHE_LINE_SIZE);
1839 		if (fwd_streams == NULL)
1840 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1841 				 " (struct fwd_stream *)) failed\n",
1842 				 nb_fwd_streams);
1843 
1844 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1845 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1846 				" struct fwd_stream", sizeof(struct fwd_stream),
1847 				RTE_CACHE_LINE_SIZE);
1848 			if (fwd_streams[sm_id] == NULL)
1849 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1850 					 "(struct fwd_stream) failed\n");
1851 		}
1852 	}
1853 
1854 	return 0;
1855 }
1856 
1857 static void
1858 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1859 {
1860 	uint64_t total_burst, sburst;
1861 	uint64_t nb_burst;
1862 	uint64_t burst_stats[4];
1863 	uint16_t pktnb_stats[4];
1864 	uint16_t nb_pkt;
1865 	int burst_percent[4], sburstp;
1866 	int i;
1867 
1868 	/*
1869 	 * First compute the total number of packet bursts and the
1870 	 * two highest numbers of bursts of the same number of packets.
1871 	 */
1872 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1873 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1874 
1875 	/* Show stats for 0 burst size always */
1876 	total_burst = pbs->pkt_burst_spread[0];
1877 	burst_stats[0] = pbs->pkt_burst_spread[0];
1878 	pktnb_stats[0] = 0;
1879 
1880 	/* Find the next 2 burst sizes with highest occurrences. */
1881 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1882 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1883 
1884 		if (nb_burst == 0)
1885 			continue;
1886 
1887 		total_burst += nb_burst;
1888 
1889 		if (nb_burst > burst_stats[1]) {
1890 			burst_stats[2] = burst_stats[1];
1891 			pktnb_stats[2] = pktnb_stats[1];
1892 			burst_stats[1] = nb_burst;
1893 			pktnb_stats[1] = nb_pkt;
1894 		} else if (nb_burst > burst_stats[2]) {
1895 			burst_stats[2] = nb_burst;
1896 			pktnb_stats[2] = nb_pkt;
1897 		}
1898 	}
1899 	if (total_burst == 0)
1900 		return;
1901 
1902 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1903 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1904 		if (i == 3) {
1905 			printf("%d%% of other]\n", 100 - sburstp);
1906 			return;
1907 		}
1908 
1909 		sburst += burst_stats[i];
1910 		if (sburst == total_burst) {
1911 			printf("%d%% of %d pkts]\n",
1912 				100 - sburstp, (int) pktnb_stats[i]);
1913 			return;
1914 		}
1915 
1916 		burst_percent[i] =
1917 			(double)burst_stats[i] / total_burst * 100;
1918 		printf("%d%% of %d pkts + ",
1919 			burst_percent[i], (int) pktnb_stats[i]);
1920 		sburstp += burst_percent[i];
1921 	}
1922 }
1923 
1924 static void
1925 fwd_stream_stats_display(streamid_t stream_id)
1926 {
1927 	struct fwd_stream *fs;
1928 	static const char *fwd_top_stats_border = "-------";
1929 
1930 	fs = fwd_streams[stream_id];
1931 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1932 	    (fs->fwd_dropped == 0))
1933 		return;
1934 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1935 	       "TX Port=%2d/Queue=%2d %s\n",
1936 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1937 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1938 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1939 	       " TX-dropped: %-14"PRIu64,
1940 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1941 
1942 	/* if checksum mode */
1943 	if (cur_fwd_eng == &csum_fwd_engine) {
1944 		printf("  RX- bad IP checksum: %-14"PRIu64
1945 		       "  Rx- bad L4 checksum: %-14"PRIu64
1946 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1947 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1948 			fs->rx_bad_outer_l4_csum);
1949 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1950 			fs->rx_bad_outer_ip_csum);
1951 	} else {
1952 		printf("\n");
1953 	}
1954 
1955 	if (record_burst_stats) {
1956 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1957 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1958 	}
1959 }
1960 
1961 void
1962 fwd_stats_display(void)
1963 {
1964 	static const char *fwd_stats_border = "----------------------";
1965 	static const char *acc_stats_border = "+++++++++++++++";
1966 	struct {
1967 		struct fwd_stream *rx_stream;
1968 		struct fwd_stream *tx_stream;
1969 		uint64_t tx_dropped;
1970 		uint64_t rx_bad_ip_csum;
1971 		uint64_t rx_bad_l4_csum;
1972 		uint64_t rx_bad_outer_l4_csum;
1973 		uint64_t rx_bad_outer_ip_csum;
1974 	} ports_stats[RTE_MAX_ETHPORTS];
1975 	uint64_t total_rx_dropped = 0;
1976 	uint64_t total_tx_dropped = 0;
1977 	uint64_t total_rx_nombuf = 0;
1978 	struct rte_eth_stats stats;
1979 	uint64_t fwd_cycles = 0;
1980 	uint64_t total_recv = 0;
1981 	uint64_t total_xmit = 0;
1982 	struct rte_port *port;
1983 	streamid_t sm_id;
1984 	portid_t pt_id;
1985 	int i;
1986 
1987 	memset(ports_stats, 0, sizeof(ports_stats));
1988 
1989 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1990 		struct fwd_stream *fs = fwd_streams[sm_id];
1991 
1992 		if (cur_fwd_config.nb_fwd_streams >
1993 		    cur_fwd_config.nb_fwd_ports) {
1994 			fwd_stream_stats_display(sm_id);
1995 		} else {
1996 			ports_stats[fs->tx_port].tx_stream = fs;
1997 			ports_stats[fs->rx_port].rx_stream = fs;
1998 		}
1999 
2000 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2001 
2002 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2003 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2004 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2005 				fs->rx_bad_outer_l4_csum;
2006 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2007 				fs->rx_bad_outer_ip_csum;
2008 
2009 		if (record_core_cycles)
2010 			fwd_cycles += fs->core_cycles;
2011 	}
2012 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2013 		pt_id = fwd_ports_ids[i];
2014 		port = &ports[pt_id];
2015 
2016 		rte_eth_stats_get(pt_id, &stats);
2017 		stats.ipackets -= port->stats.ipackets;
2018 		stats.opackets -= port->stats.opackets;
2019 		stats.ibytes -= port->stats.ibytes;
2020 		stats.obytes -= port->stats.obytes;
2021 		stats.imissed -= port->stats.imissed;
2022 		stats.oerrors -= port->stats.oerrors;
2023 		stats.rx_nombuf -= port->stats.rx_nombuf;
2024 
2025 		total_recv += stats.ipackets;
2026 		total_xmit += stats.opackets;
2027 		total_rx_dropped += stats.imissed;
2028 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
2029 		total_tx_dropped += stats.oerrors;
2030 		total_rx_nombuf  += stats.rx_nombuf;
2031 
2032 		printf("\n  %s Forward statistics for port %-2d %s\n",
2033 		       fwd_stats_border, pt_id, fwd_stats_border);
2034 
2035 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2036 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2037 		       stats.ipackets + stats.imissed);
2038 
2039 		if (cur_fwd_eng == &csum_fwd_engine) {
2040 			printf("  Bad-ipcsum: %-14"PRIu64
2041 			       " Bad-l4csum: %-14"PRIu64
2042 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2043 			       ports_stats[pt_id].rx_bad_ip_csum,
2044 			       ports_stats[pt_id].rx_bad_l4_csum,
2045 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2046 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2047 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2048 		}
2049 		if (stats.ierrors + stats.rx_nombuf > 0) {
2050 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2051 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2052 		}
2053 
2054 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2055 		       "TX-total: %-"PRIu64"\n",
2056 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2057 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2058 
2059 		if (record_burst_stats) {
2060 			if (ports_stats[pt_id].rx_stream)
2061 				pkt_burst_stats_display("RX",
2062 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2063 			if (ports_stats[pt_id].tx_stream)
2064 				pkt_burst_stats_display("TX",
2065 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2066 		}
2067 
2068 		printf("  %s--------------------------------%s\n",
2069 		       fwd_stats_border, fwd_stats_border);
2070 	}
2071 
2072 	printf("\n  %s Accumulated forward statistics for all ports"
2073 	       "%s\n",
2074 	       acc_stats_border, acc_stats_border);
2075 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2076 	       "%-"PRIu64"\n"
2077 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2078 	       "%-"PRIu64"\n",
2079 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2080 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2081 	if (total_rx_nombuf > 0)
2082 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2083 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2084 	       "%s\n",
2085 	       acc_stats_border, acc_stats_border);
2086 	if (record_core_cycles) {
2087 #define CYC_PER_MHZ 1E6
2088 		if (total_recv > 0 || total_xmit > 0) {
2089 			uint64_t total_pkts = 0;
2090 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2091 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2092 				total_pkts = total_xmit;
2093 			else
2094 				total_pkts = total_recv;
2095 
2096 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2097 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2098 			       " MHz Clock\n",
2099 			       (double) fwd_cycles / total_pkts,
2100 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2101 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2102 		}
2103 	}
2104 }
2105 
2106 void
2107 fwd_stats_reset(void)
2108 {
2109 	streamid_t sm_id;
2110 	portid_t pt_id;
2111 	int i;
2112 
2113 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2114 		pt_id = fwd_ports_ids[i];
2115 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2116 	}
2117 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2118 		struct fwd_stream *fs = fwd_streams[sm_id];
2119 
2120 		fs->rx_packets = 0;
2121 		fs->tx_packets = 0;
2122 		fs->fwd_dropped = 0;
2123 		fs->rx_bad_ip_csum = 0;
2124 		fs->rx_bad_l4_csum = 0;
2125 		fs->rx_bad_outer_l4_csum = 0;
2126 		fs->rx_bad_outer_ip_csum = 0;
2127 
2128 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2129 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2130 		fs->core_cycles = 0;
2131 	}
2132 }
2133 
2134 static void
2135 flush_fwd_rx_queues(void)
2136 {
2137 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2138 	portid_t  rxp;
2139 	portid_t port_id;
2140 	queueid_t rxq;
2141 	uint16_t  nb_rx;
2142 	uint16_t  i;
2143 	uint8_t   j;
2144 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2145 	uint64_t timer_period;
2146 
2147 	if (num_procs > 1) {
2148 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2149 		return;
2150 	}
2151 
2152 	/* convert to number of cycles */
2153 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2154 
2155 	for (j = 0; j < 2; j++) {
2156 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2157 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2158 				port_id = fwd_ports_ids[rxp];
2159 				/**
2160 				* testpmd can stuck in the below do while loop
2161 				* if rte_eth_rx_burst() always returns nonzero
2162 				* packets. So timer is added to exit this loop
2163 				* after 1sec timer expiry.
2164 				*/
2165 				prev_tsc = rte_rdtsc();
2166 				do {
2167 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2168 						pkts_burst, MAX_PKT_BURST);
2169 					for (i = 0; i < nb_rx; i++)
2170 						rte_pktmbuf_free(pkts_burst[i]);
2171 
2172 					cur_tsc = rte_rdtsc();
2173 					diff_tsc = cur_tsc - prev_tsc;
2174 					timer_tsc += diff_tsc;
2175 				} while ((nb_rx > 0) &&
2176 					(timer_tsc < timer_period));
2177 				timer_tsc = 0;
2178 			}
2179 		}
2180 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2181 	}
2182 }
2183 
2184 static void
2185 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2186 {
2187 	struct fwd_stream **fsm;
2188 	streamid_t nb_fs;
2189 	streamid_t sm_id;
2190 #ifdef RTE_LIB_BITRATESTATS
2191 	uint64_t tics_per_1sec;
2192 	uint64_t tics_datum;
2193 	uint64_t tics_current;
2194 	uint16_t i, cnt_ports;
2195 
2196 	cnt_ports = nb_ports;
2197 	tics_datum = rte_rdtsc();
2198 	tics_per_1sec = rte_get_timer_hz();
2199 #endif
2200 	fsm = &fwd_streams[fc->stream_idx];
2201 	nb_fs = fc->stream_nb;
2202 	do {
2203 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2204 			(*pkt_fwd)(fsm[sm_id]);
2205 #ifdef RTE_LIB_BITRATESTATS
2206 		if (bitrate_enabled != 0 &&
2207 				bitrate_lcore_id == rte_lcore_id()) {
2208 			tics_current = rte_rdtsc();
2209 			if (tics_current - tics_datum >= tics_per_1sec) {
2210 				/* Periodic bitrate calculation */
2211 				for (i = 0; i < cnt_ports; i++)
2212 					rte_stats_bitrate_calc(bitrate_data,
2213 						ports_ids[i]);
2214 				tics_datum = tics_current;
2215 			}
2216 		}
2217 #endif
2218 #ifdef RTE_LIB_LATENCYSTATS
2219 		if (latencystats_enabled != 0 &&
2220 				latencystats_lcore_id == rte_lcore_id())
2221 			rte_latencystats_update();
2222 #endif
2223 
2224 	} while (! fc->stopped);
2225 }
2226 
2227 static int
2228 start_pkt_forward_on_core(void *fwd_arg)
2229 {
2230 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2231 			     cur_fwd_config.fwd_eng->packet_fwd);
2232 	return 0;
2233 }
2234 
2235 /*
2236  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2237  * Used to start communication flows in network loopback test configurations.
2238  */
2239 static int
2240 run_one_txonly_burst_on_core(void *fwd_arg)
2241 {
2242 	struct fwd_lcore *fwd_lc;
2243 	struct fwd_lcore tmp_lcore;
2244 
2245 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2246 	tmp_lcore = *fwd_lc;
2247 	tmp_lcore.stopped = 1;
2248 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2249 	return 0;
2250 }
2251 
2252 /*
2253  * Launch packet forwarding:
2254  *     - Setup per-port forwarding context.
2255  *     - launch logical cores with their forwarding configuration.
2256  */
2257 static void
2258 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2259 {
2260 	unsigned int i;
2261 	unsigned int lc_id;
2262 	int diag;
2263 
2264 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2265 		lc_id = fwd_lcores_cpuids[i];
2266 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2267 			fwd_lcores[i]->stopped = 0;
2268 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2269 						     fwd_lcores[i], lc_id);
2270 			if (diag != 0)
2271 				fprintf(stderr,
2272 					"launch lcore %u failed - diag=%d\n",
2273 					lc_id, diag);
2274 		}
2275 	}
2276 }
2277 
2278 /*
2279  * Launch packet forwarding configuration.
2280  */
2281 void
2282 start_packet_forwarding(int with_tx_first)
2283 {
2284 	port_fwd_begin_t port_fwd_begin;
2285 	port_fwd_end_t  port_fwd_end;
2286 	unsigned int i;
2287 
2288 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2289 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2290 
2291 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2292 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2293 
2294 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2295 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2296 		(!nb_rxq || !nb_txq))
2297 		rte_exit(EXIT_FAILURE,
2298 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2299 			cur_fwd_eng->fwd_mode_name);
2300 
2301 	if (all_ports_started() == 0) {
2302 		fprintf(stderr, "Not all ports were started\n");
2303 		return;
2304 	}
2305 	if (test_done == 0) {
2306 		fprintf(stderr, "Packet forwarding already started\n");
2307 		return;
2308 	}
2309 
2310 	fwd_config_setup();
2311 
2312 	pkt_fwd_config_display(&cur_fwd_config);
2313 	if (!pkt_fwd_shared_rxq_check())
2314 		return;
2315 
2316 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2317 	if (port_fwd_begin != NULL) {
2318 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2319 			if (port_fwd_begin(fwd_ports_ids[i])) {
2320 				fprintf(stderr,
2321 					"Packet forwarding is not ready\n");
2322 				return;
2323 			}
2324 		}
2325 	}
2326 
2327 	if (with_tx_first) {
2328 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2329 		if (port_fwd_begin != NULL) {
2330 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2331 				if (port_fwd_begin(fwd_ports_ids[i])) {
2332 					fprintf(stderr,
2333 						"Packet forwarding is not ready\n");
2334 					return;
2335 				}
2336 			}
2337 		}
2338 	}
2339 
2340 	test_done = 0;
2341 
2342 	if(!no_flush_rx)
2343 		flush_fwd_rx_queues();
2344 
2345 	rxtx_config_display();
2346 
2347 	fwd_stats_reset();
2348 	if (with_tx_first) {
2349 		while (with_tx_first--) {
2350 			launch_packet_forwarding(
2351 					run_one_txonly_burst_on_core);
2352 			rte_eal_mp_wait_lcore();
2353 		}
2354 		port_fwd_end = tx_only_engine.port_fwd_end;
2355 		if (port_fwd_end != NULL) {
2356 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2357 				(*port_fwd_end)(fwd_ports_ids[i]);
2358 		}
2359 	}
2360 	launch_packet_forwarding(start_pkt_forward_on_core);
2361 }
2362 
2363 void
2364 stop_packet_forwarding(void)
2365 {
2366 	port_fwd_end_t port_fwd_end;
2367 	lcoreid_t lc_id;
2368 	portid_t pt_id;
2369 	int i;
2370 
2371 	if (test_done) {
2372 		fprintf(stderr, "Packet forwarding not started\n");
2373 		return;
2374 	}
2375 	printf("Telling cores to stop...");
2376 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2377 		fwd_lcores[lc_id]->stopped = 1;
2378 	printf("\nWaiting for lcores to finish...\n");
2379 	rte_eal_mp_wait_lcore();
2380 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2381 	if (port_fwd_end != NULL) {
2382 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2383 			pt_id = fwd_ports_ids[i];
2384 			(*port_fwd_end)(pt_id);
2385 		}
2386 	}
2387 
2388 	fwd_stats_display();
2389 
2390 	printf("\nDone.\n");
2391 	test_done = 1;
2392 }
2393 
2394 void
2395 dev_set_link_up(portid_t pid)
2396 {
2397 	if (rte_eth_dev_set_link_up(pid) < 0)
2398 		fprintf(stderr, "\nSet link up fail.\n");
2399 }
2400 
2401 void
2402 dev_set_link_down(portid_t pid)
2403 {
2404 	if (rte_eth_dev_set_link_down(pid) < 0)
2405 		fprintf(stderr, "\nSet link down fail.\n");
2406 }
2407 
2408 static int
2409 all_ports_started(void)
2410 {
2411 	portid_t pi;
2412 	struct rte_port *port;
2413 
2414 	RTE_ETH_FOREACH_DEV(pi) {
2415 		port = &ports[pi];
2416 		/* Check if there is a port which is not started */
2417 		if ((port->port_status != RTE_PORT_STARTED) &&
2418 			(port->slave_flag == 0))
2419 			return 0;
2420 	}
2421 
2422 	/* No port is not started */
2423 	return 1;
2424 }
2425 
2426 int
2427 port_is_stopped(portid_t port_id)
2428 {
2429 	struct rte_port *port = &ports[port_id];
2430 
2431 	if ((port->port_status != RTE_PORT_STOPPED) &&
2432 	    (port->slave_flag == 0))
2433 		return 0;
2434 	return 1;
2435 }
2436 
2437 int
2438 all_ports_stopped(void)
2439 {
2440 	portid_t pi;
2441 
2442 	RTE_ETH_FOREACH_DEV(pi) {
2443 		if (!port_is_stopped(pi))
2444 			return 0;
2445 	}
2446 
2447 	return 1;
2448 }
2449 
2450 int
2451 port_is_started(portid_t port_id)
2452 {
2453 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2454 		return 0;
2455 
2456 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2457 		return 0;
2458 
2459 	return 1;
2460 }
2461 
2462 /* Configure the Rx and Tx hairpin queues for the selected port. */
2463 static int
2464 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2465 {
2466 	queueid_t qi;
2467 	struct rte_eth_hairpin_conf hairpin_conf = {
2468 		.peer_count = 1,
2469 	};
2470 	int i;
2471 	int diag;
2472 	struct rte_port *port = &ports[pi];
2473 	uint16_t peer_rx_port = pi;
2474 	uint16_t peer_tx_port = pi;
2475 	uint32_t manual = 1;
2476 	uint32_t tx_exp = hairpin_mode & 0x10;
2477 
2478 	if (!(hairpin_mode & 0xf)) {
2479 		peer_rx_port = pi;
2480 		peer_tx_port = pi;
2481 		manual = 0;
2482 	} else if (hairpin_mode & 0x1) {
2483 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2484 						       RTE_ETH_DEV_NO_OWNER);
2485 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2486 			peer_tx_port = rte_eth_find_next_owned_by(0,
2487 						RTE_ETH_DEV_NO_OWNER);
2488 		if (p_pi != RTE_MAX_ETHPORTS) {
2489 			peer_rx_port = p_pi;
2490 		} else {
2491 			uint16_t next_pi;
2492 
2493 			/* Last port will be the peer RX port of the first. */
2494 			RTE_ETH_FOREACH_DEV(next_pi)
2495 				peer_rx_port = next_pi;
2496 		}
2497 		manual = 1;
2498 	} else if (hairpin_mode & 0x2) {
2499 		if (cnt_pi & 0x1) {
2500 			peer_rx_port = p_pi;
2501 		} else {
2502 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2503 						RTE_ETH_DEV_NO_OWNER);
2504 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2505 				peer_rx_port = pi;
2506 		}
2507 		peer_tx_port = peer_rx_port;
2508 		manual = 1;
2509 	}
2510 
2511 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2512 		hairpin_conf.peers[0].port = peer_rx_port;
2513 		hairpin_conf.peers[0].queue = i + nb_rxq;
2514 		hairpin_conf.manual_bind = !!manual;
2515 		hairpin_conf.tx_explicit = !!tx_exp;
2516 		diag = rte_eth_tx_hairpin_queue_setup
2517 			(pi, qi, nb_txd, &hairpin_conf);
2518 		i++;
2519 		if (diag == 0)
2520 			continue;
2521 
2522 		/* Fail to setup rx queue, return */
2523 		if (rte_atomic16_cmpset(&(port->port_status),
2524 					RTE_PORT_HANDLING,
2525 					RTE_PORT_STOPPED) == 0)
2526 			fprintf(stderr,
2527 				"Port %d can not be set back to stopped\n", pi);
2528 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2529 			pi);
2530 		/* try to reconfigure queues next time */
2531 		port->need_reconfig_queues = 1;
2532 		return -1;
2533 	}
2534 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2535 		hairpin_conf.peers[0].port = peer_tx_port;
2536 		hairpin_conf.peers[0].queue = i + nb_txq;
2537 		hairpin_conf.manual_bind = !!manual;
2538 		hairpin_conf.tx_explicit = !!tx_exp;
2539 		diag = rte_eth_rx_hairpin_queue_setup
2540 			(pi, qi, nb_rxd, &hairpin_conf);
2541 		i++;
2542 		if (diag == 0)
2543 			continue;
2544 
2545 		/* Fail to setup rx queue, return */
2546 		if (rte_atomic16_cmpset(&(port->port_status),
2547 					RTE_PORT_HANDLING,
2548 					RTE_PORT_STOPPED) == 0)
2549 			fprintf(stderr,
2550 				"Port %d can not be set back to stopped\n", pi);
2551 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2552 			pi);
2553 		/* try to reconfigure queues next time */
2554 		port->need_reconfig_queues = 1;
2555 		return -1;
2556 	}
2557 	return 0;
2558 }
2559 
2560 /* Configure the Rx with optional split. */
2561 int
2562 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2563 	       uint16_t nb_rx_desc, unsigned int socket_id,
2564 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2565 {
2566 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2567 	unsigned int i, mp_n;
2568 	int ret;
2569 
2570 	if (rx_pkt_nb_segs <= 1 ||
2571 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2572 		rx_conf->rx_seg = NULL;
2573 		rx_conf->rx_nseg = 0;
2574 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2575 					     nb_rx_desc, socket_id,
2576 					     rx_conf, mp);
2577 		return ret;
2578 	}
2579 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2580 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2581 		struct rte_mempool *mpx;
2582 		/*
2583 		 * Use last valid pool for the segments with number
2584 		 * exceeding the pool index.
2585 		 */
2586 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2587 		mpx = mbuf_pool_find(socket_id, mp_n);
2588 		/* Handle zero as mbuf data buffer size. */
2589 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2590 				   rx_pkt_seg_lengths[i] :
2591 				   mbuf_data_size[mp_n];
2592 		rx_seg->offset = i < rx_pkt_nb_offs ?
2593 				   rx_pkt_seg_offsets[i] : 0;
2594 		rx_seg->mp = mpx ? mpx : mp;
2595 	}
2596 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2597 	rx_conf->rx_seg = rx_useg;
2598 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2599 				    socket_id, rx_conf, NULL);
2600 	rx_conf->rx_seg = NULL;
2601 	rx_conf->rx_nseg = 0;
2602 	return ret;
2603 }
2604 
2605 static int
2606 alloc_xstats_display_info(portid_t pi)
2607 {
2608 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2609 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2610 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2611 
2612 	if (xstats_display_num == 0)
2613 		return 0;
2614 
2615 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2616 	if (*ids_supp == NULL)
2617 		goto fail_ids_supp;
2618 
2619 	*prev_values = calloc(xstats_display_num,
2620 			      sizeof(**prev_values));
2621 	if (*prev_values == NULL)
2622 		goto fail_prev_values;
2623 
2624 	*curr_values = calloc(xstats_display_num,
2625 			      sizeof(**curr_values));
2626 	if (*curr_values == NULL)
2627 		goto fail_curr_values;
2628 
2629 	ports[pi].xstats_info.allocated = true;
2630 
2631 	return 0;
2632 
2633 fail_curr_values:
2634 	free(*prev_values);
2635 fail_prev_values:
2636 	free(*ids_supp);
2637 fail_ids_supp:
2638 	return -ENOMEM;
2639 }
2640 
2641 static void
2642 free_xstats_display_info(portid_t pi)
2643 {
2644 	if (!ports[pi].xstats_info.allocated)
2645 		return;
2646 	free(ports[pi].xstats_info.ids_supp);
2647 	free(ports[pi].xstats_info.prev_values);
2648 	free(ports[pi].xstats_info.curr_values);
2649 	ports[pi].xstats_info.allocated = false;
2650 }
2651 
2652 /** Fill helper structures for specified port to show extended statistics. */
2653 static void
2654 fill_xstats_display_info_for_port(portid_t pi)
2655 {
2656 	unsigned int stat, stat_supp;
2657 	const char *xstat_name;
2658 	struct rte_port *port;
2659 	uint64_t *ids_supp;
2660 	int rc;
2661 
2662 	if (xstats_display_num == 0)
2663 		return;
2664 
2665 	if (pi == (portid_t)RTE_PORT_ALL) {
2666 		fill_xstats_display_info();
2667 		return;
2668 	}
2669 
2670 	port = &ports[pi];
2671 	if (port->port_status != RTE_PORT_STARTED)
2672 		return;
2673 
2674 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2675 		rte_exit(EXIT_FAILURE,
2676 			 "Failed to allocate xstats display memory\n");
2677 
2678 	ids_supp = port->xstats_info.ids_supp;
2679 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2680 		xstat_name = xstats_display[stat].name;
2681 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2682 						   ids_supp + stat_supp);
2683 		if (rc != 0) {
2684 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2685 				xstat_name, pi, stat);
2686 			continue;
2687 		}
2688 		stat_supp++;
2689 	}
2690 
2691 	port->xstats_info.ids_supp_sz = stat_supp;
2692 }
2693 
2694 /** Fill helper structures for all ports to show extended statistics. */
2695 static void
2696 fill_xstats_display_info(void)
2697 {
2698 	portid_t pi;
2699 
2700 	if (xstats_display_num == 0)
2701 		return;
2702 
2703 	RTE_ETH_FOREACH_DEV(pi)
2704 		fill_xstats_display_info_for_port(pi);
2705 }
2706 
2707 int
2708 start_port(portid_t pid)
2709 {
2710 	int diag, need_check_link_status = -1;
2711 	portid_t pi;
2712 	portid_t p_pi = RTE_MAX_ETHPORTS;
2713 	portid_t pl[RTE_MAX_ETHPORTS];
2714 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2715 	uint16_t cnt_pi = 0;
2716 	uint16_t cfg_pi = 0;
2717 	int peer_pi;
2718 	queueid_t qi;
2719 	struct rte_port *port;
2720 	struct rte_eth_hairpin_cap cap;
2721 
2722 	if (port_id_is_invalid(pid, ENABLED_WARN))
2723 		return 0;
2724 
2725 	RTE_ETH_FOREACH_DEV(pi) {
2726 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2727 			continue;
2728 
2729 		need_check_link_status = 0;
2730 		port = &ports[pi];
2731 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2732 						 RTE_PORT_HANDLING) == 0) {
2733 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2734 			continue;
2735 		}
2736 
2737 		if (port->need_reconfig > 0) {
2738 			struct rte_eth_conf dev_conf;
2739 			int k;
2740 
2741 			port->need_reconfig = 0;
2742 
2743 			if (flow_isolate_all) {
2744 				int ret = port_flow_isolate(pi, 1);
2745 				if (ret) {
2746 					fprintf(stderr,
2747 						"Failed to apply isolated mode on port %d\n",
2748 						pi);
2749 					return -1;
2750 				}
2751 			}
2752 			configure_rxtx_dump_callbacks(0);
2753 			printf("Configuring Port %d (socket %u)\n", pi,
2754 					port->socket_id);
2755 			if (nb_hairpinq > 0 &&
2756 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2757 				fprintf(stderr,
2758 					"Port %d doesn't support hairpin queues\n",
2759 					pi);
2760 				return -1;
2761 			}
2762 
2763 			/* configure port */
2764 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2765 						     nb_txq + nb_hairpinq,
2766 						     &(port->dev_conf));
2767 			if (diag != 0) {
2768 				if (rte_atomic16_cmpset(&(port->port_status),
2769 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2770 					fprintf(stderr,
2771 						"Port %d can not be set back to stopped\n",
2772 						pi);
2773 				fprintf(stderr, "Fail to configure port %d\n",
2774 					pi);
2775 				/* try to reconfigure port next time */
2776 				port->need_reconfig = 1;
2777 				return -1;
2778 			}
2779 			/* get device configuration*/
2780 			if (0 !=
2781 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
2782 				fprintf(stderr,
2783 					"port %d can not get device configuration\n",
2784 					pi);
2785 				return -1;
2786 			}
2787 			/* Apply Rx offloads configuration */
2788 			if (dev_conf.rxmode.offloads !=
2789 			    port->dev_conf.rxmode.offloads) {
2790 				port->dev_conf.rxmode.offloads |=
2791 					dev_conf.rxmode.offloads;
2792 				for (k = 0;
2793 				     k < port->dev_info.max_rx_queues;
2794 				     k++)
2795 					port->rx_conf[k].offloads |=
2796 						dev_conf.rxmode.offloads;
2797 			}
2798 			/* Apply Tx offloads configuration */
2799 			if (dev_conf.txmode.offloads !=
2800 			    port->dev_conf.txmode.offloads) {
2801 				port->dev_conf.txmode.offloads |=
2802 					dev_conf.txmode.offloads;
2803 				for (k = 0;
2804 				     k < port->dev_info.max_tx_queues;
2805 				     k++)
2806 					port->tx_conf[k].offloads |=
2807 						dev_conf.txmode.offloads;
2808 			}
2809 		}
2810 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2811 			port->need_reconfig_queues = 0;
2812 			/* setup tx queues */
2813 			for (qi = 0; qi < nb_txq; qi++) {
2814 				if ((numa_support) &&
2815 					(txring_numa[pi] != NUMA_NO_CONFIG))
2816 					diag = rte_eth_tx_queue_setup(pi, qi,
2817 						port->nb_tx_desc[qi],
2818 						txring_numa[pi],
2819 						&(port->tx_conf[qi]));
2820 				else
2821 					diag = rte_eth_tx_queue_setup(pi, qi,
2822 						port->nb_tx_desc[qi],
2823 						port->socket_id,
2824 						&(port->tx_conf[qi]));
2825 
2826 				if (diag == 0)
2827 					continue;
2828 
2829 				/* Fail to setup tx queue, return */
2830 				if (rte_atomic16_cmpset(&(port->port_status),
2831 							RTE_PORT_HANDLING,
2832 							RTE_PORT_STOPPED) == 0)
2833 					fprintf(stderr,
2834 						"Port %d can not be set back to stopped\n",
2835 						pi);
2836 				fprintf(stderr,
2837 					"Fail to configure port %d tx queues\n",
2838 					pi);
2839 				/* try to reconfigure queues next time */
2840 				port->need_reconfig_queues = 1;
2841 				return -1;
2842 			}
2843 			for (qi = 0; qi < nb_rxq; qi++) {
2844 				/* setup rx queues */
2845 				if ((numa_support) &&
2846 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2847 					struct rte_mempool * mp =
2848 						mbuf_pool_find
2849 							(rxring_numa[pi], 0);
2850 					if (mp == NULL) {
2851 						fprintf(stderr,
2852 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2853 							rxring_numa[pi]);
2854 						return -1;
2855 					}
2856 
2857 					diag = rx_queue_setup(pi, qi,
2858 					     port->nb_rx_desc[qi],
2859 					     rxring_numa[pi],
2860 					     &(port->rx_conf[qi]),
2861 					     mp);
2862 				} else {
2863 					struct rte_mempool *mp =
2864 						mbuf_pool_find
2865 							(port->socket_id, 0);
2866 					if (mp == NULL) {
2867 						fprintf(stderr,
2868 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2869 							port->socket_id);
2870 						return -1;
2871 					}
2872 					diag = rx_queue_setup(pi, qi,
2873 					     port->nb_rx_desc[qi],
2874 					     port->socket_id,
2875 					     &(port->rx_conf[qi]),
2876 					     mp);
2877 				}
2878 				if (diag == 0)
2879 					continue;
2880 
2881 				/* Fail to setup rx queue, return */
2882 				if (rte_atomic16_cmpset(&(port->port_status),
2883 							RTE_PORT_HANDLING,
2884 							RTE_PORT_STOPPED) == 0)
2885 					fprintf(stderr,
2886 						"Port %d can not be set back to stopped\n",
2887 						pi);
2888 				fprintf(stderr,
2889 					"Fail to configure port %d rx queues\n",
2890 					pi);
2891 				/* try to reconfigure queues next time */
2892 				port->need_reconfig_queues = 1;
2893 				return -1;
2894 			}
2895 			/* setup hairpin queues */
2896 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2897 				return -1;
2898 		}
2899 		configure_rxtx_dump_callbacks(verbose_level);
2900 		if (clear_ptypes) {
2901 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2902 					NULL, 0);
2903 			if (diag < 0)
2904 				fprintf(stderr,
2905 					"Port %d: Failed to disable Ptype parsing\n",
2906 					pi);
2907 		}
2908 
2909 		p_pi = pi;
2910 		cnt_pi++;
2911 
2912 		/* start port */
2913 		diag = eth_dev_start_mp(pi);
2914 		if (diag < 0) {
2915 			fprintf(stderr, "Fail to start port %d: %s\n",
2916 				pi, rte_strerror(-diag));
2917 
2918 			/* Fail to setup rx queue, return */
2919 			if (rte_atomic16_cmpset(&(port->port_status),
2920 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2921 				fprintf(stderr,
2922 					"Port %d can not be set back to stopped\n",
2923 					pi);
2924 			continue;
2925 		}
2926 
2927 		if (rte_atomic16_cmpset(&(port->port_status),
2928 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2929 			fprintf(stderr, "Port %d can not be set into started\n",
2930 				pi);
2931 
2932 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2933 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2934 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2935 
2936 		/* at least one port started, need checking link status */
2937 		need_check_link_status = 1;
2938 
2939 		pl[cfg_pi++] = pi;
2940 	}
2941 
2942 	if (need_check_link_status == 1 && !no_link_check)
2943 		check_all_ports_link_status(RTE_PORT_ALL);
2944 	else if (need_check_link_status == 0)
2945 		fprintf(stderr, "Please stop the ports first\n");
2946 
2947 	if (hairpin_mode & 0xf) {
2948 		uint16_t i;
2949 		int j;
2950 
2951 		/* bind all started hairpin ports */
2952 		for (i = 0; i < cfg_pi; i++) {
2953 			pi = pl[i];
2954 			/* bind current Tx to all peer Rx */
2955 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2956 							RTE_MAX_ETHPORTS, 1);
2957 			if (peer_pi < 0)
2958 				return peer_pi;
2959 			for (j = 0; j < peer_pi; j++) {
2960 				if (!port_is_started(peer_pl[j]))
2961 					continue;
2962 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2963 				if (diag < 0) {
2964 					fprintf(stderr,
2965 						"Error during binding hairpin Tx port %u to %u: %s\n",
2966 						pi, peer_pl[j],
2967 						rte_strerror(-diag));
2968 					return -1;
2969 				}
2970 			}
2971 			/* bind all peer Tx to current Rx */
2972 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2973 							RTE_MAX_ETHPORTS, 0);
2974 			if (peer_pi < 0)
2975 				return peer_pi;
2976 			for (j = 0; j < peer_pi; j++) {
2977 				if (!port_is_started(peer_pl[j]))
2978 					continue;
2979 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2980 				if (diag < 0) {
2981 					fprintf(stderr,
2982 						"Error during binding hairpin Tx port %u to %u: %s\n",
2983 						peer_pl[j], pi,
2984 						rte_strerror(-diag));
2985 					return -1;
2986 				}
2987 			}
2988 		}
2989 	}
2990 
2991 	fill_xstats_display_info_for_port(pid);
2992 
2993 	printf("Done\n");
2994 	return 0;
2995 }
2996 
2997 void
2998 stop_port(portid_t pid)
2999 {
3000 	portid_t pi;
3001 	struct rte_port *port;
3002 	int need_check_link_status = 0;
3003 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3004 	int peer_pi;
3005 
3006 	if (port_id_is_invalid(pid, ENABLED_WARN))
3007 		return;
3008 
3009 	printf("Stopping ports...\n");
3010 
3011 	RTE_ETH_FOREACH_DEV(pi) {
3012 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3013 			continue;
3014 
3015 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3016 			fprintf(stderr,
3017 				"Please remove port %d from forwarding configuration.\n",
3018 				pi);
3019 			continue;
3020 		}
3021 
3022 		if (port_is_bonding_slave(pi)) {
3023 			fprintf(stderr,
3024 				"Please remove port %d from bonded device.\n",
3025 				pi);
3026 			continue;
3027 		}
3028 
3029 		port = &ports[pi];
3030 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
3031 						RTE_PORT_HANDLING) == 0)
3032 			continue;
3033 
3034 		if (hairpin_mode & 0xf) {
3035 			int j;
3036 
3037 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3038 			/* unbind all peer Tx from current Rx */
3039 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3040 							RTE_MAX_ETHPORTS, 0);
3041 			if (peer_pi < 0)
3042 				continue;
3043 			for (j = 0; j < peer_pi; j++) {
3044 				if (!port_is_started(peer_pl[j]))
3045 					continue;
3046 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3047 			}
3048 		}
3049 
3050 		if (port->flow_list)
3051 			port_flow_flush(pi);
3052 
3053 		if (eth_dev_stop_mp(pi) != 0)
3054 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3055 				pi);
3056 
3057 		if (rte_atomic16_cmpset(&(port->port_status),
3058 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
3059 			fprintf(stderr, "Port %d can not be set into stopped\n",
3060 				pi);
3061 		need_check_link_status = 1;
3062 	}
3063 	if (need_check_link_status && !no_link_check)
3064 		check_all_ports_link_status(RTE_PORT_ALL);
3065 
3066 	printf("Done\n");
3067 }
3068 
3069 static void
3070 remove_invalid_ports_in(portid_t *array, portid_t *total)
3071 {
3072 	portid_t i;
3073 	portid_t new_total = 0;
3074 
3075 	for (i = 0; i < *total; i++)
3076 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3077 			array[new_total] = array[i];
3078 			new_total++;
3079 		}
3080 	*total = new_total;
3081 }
3082 
3083 static void
3084 remove_invalid_ports(void)
3085 {
3086 	remove_invalid_ports_in(ports_ids, &nb_ports);
3087 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3088 	nb_cfg_ports = nb_fwd_ports;
3089 }
3090 
3091 void
3092 close_port(portid_t pid)
3093 {
3094 	portid_t pi;
3095 	struct rte_port *port;
3096 
3097 	if (port_id_is_invalid(pid, ENABLED_WARN))
3098 		return;
3099 
3100 	printf("Closing ports...\n");
3101 
3102 	RTE_ETH_FOREACH_DEV(pi) {
3103 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3104 			continue;
3105 
3106 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3107 			fprintf(stderr,
3108 				"Please remove port %d from forwarding configuration.\n",
3109 				pi);
3110 			continue;
3111 		}
3112 
3113 		if (port_is_bonding_slave(pi)) {
3114 			fprintf(stderr,
3115 				"Please remove port %d from bonded device.\n",
3116 				pi);
3117 			continue;
3118 		}
3119 
3120 		port = &ports[pi];
3121 		if (rte_atomic16_cmpset(&(port->port_status),
3122 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
3123 			fprintf(stderr, "Port %d is already closed\n", pi);
3124 			continue;
3125 		}
3126 
3127 		if (is_proc_primary()) {
3128 			port_flow_flush(pi);
3129 			port_flex_item_flush(pi);
3130 			rte_eth_dev_close(pi);
3131 		}
3132 
3133 		free_xstats_display_info(pi);
3134 	}
3135 
3136 	remove_invalid_ports();
3137 	printf("Done\n");
3138 }
3139 
3140 void
3141 reset_port(portid_t pid)
3142 {
3143 	int diag;
3144 	portid_t pi;
3145 	struct rte_port *port;
3146 
3147 	if (port_id_is_invalid(pid, ENABLED_WARN))
3148 		return;
3149 
3150 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3151 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3152 		fprintf(stderr,
3153 			"Can not reset port(s), please stop port(s) first.\n");
3154 		return;
3155 	}
3156 
3157 	printf("Resetting ports...\n");
3158 
3159 	RTE_ETH_FOREACH_DEV(pi) {
3160 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3161 			continue;
3162 
3163 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3164 			fprintf(stderr,
3165 				"Please remove port %d from forwarding configuration.\n",
3166 				pi);
3167 			continue;
3168 		}
3169 
3170 		if (port_is_bonding_slave(pi)) {
3171 			fprintf(stderr,
3172 				"Please remove port %d from bonded device.\n",
3173 				pi);
3174 			continue;
3175 		}
3176 
3177 		diag = rte_eth_dev_reset(pi);
3178 		if (diag == 0) {
3179 			port = &ports[pi];
3180 			port->need_reconfig = 1;
3181 			port->need_reconfig_queues = 1;
3182 		} else {
3183 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3184 				pi, diag);
3185 		}
3186 	}
3187 
3188 	printf("Done\n");
3189 }
3190 
3191 void
3192 attach_port(char *identifier)
3193 {
3194 	portid_t pi;
3195 	struct rte_dev_iterator iterator;
3196 
3197 	printf("Attaching a new port...\n");
3198 
3199 	if (identifier == NULL) {
3200 		fprintf(stderr, "Invalid parameters are specified\n");
3201 		return;
3202 	}
3203 
3204 	if (rte_dev_probe(identifier) < 0) {
3205 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3206 		return;
3207 	}
3208 
3209 	/* first attach mode: event */
3210 	if (setup_on_probe_event) {
3211 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3212 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3213 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3214 					ports[pi].need_setup != 0)
3215 				setup_attached_port(pi);
3216 		return;
3217 	}
3218 
3219 	/* second attach mode: iterator */
3220 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3221 		/* setup ports matching the devargs used for probing */
3222 		if (port_is_forwarding(pi))
3223 			continue; /* port was already attached before */
3224 		setup_attached_port(pi);
3225 	}
3226 }
3227 
3228 static void
3229 setup_attached_port(portid_t pi)
3230 {
3231 	unsigned int socket_id;
3232 	int ret;
3233 
3234 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3235 	/* if socket_id is invalid, set to the first available socket. */
3236 	if (check_socket_id(socket_id) < 0)
3237 		socket_id = socket_ids[0];
3238 	reconfig(pi, socket_id);
3239 	ret = rte_eth_promiscuous_enable(pi);
3240 	if (ret != 0)
3241 		fprintf(stderr,
3242 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3243 			pi, rte_strerror(-ret));
3244 
3245 	ports_ids[nb_ports++] = pi;
3246 	fwd_ports_ids[nb_fwd_ports++] = pi;
3247 	nb_cfg_ports = nb_fwd_ports;
3248 	ports[pi].need_setup = 0;
3249 	ports[pi].port_status = RTE_PORT_STOPPED;
3250 
3251 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3252 	printf("Done\n");
3253 }
3254 
3255 static void
3256 detach_device(struct rte_device *dev)
3257 {
3258 	portid_t sibling;
3259 
3260 	if (dev == NULL) {
3261 		fprintf(stderr, "Device already removed\n");
3262 		return;
3263 	}
3264 
3265 	printf("Removing a device...\n");
3266 
3267 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3268 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3269 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3270 				fprintf(stderr, "Port %u not stopped\n",
3271 					sibling);
3272 				return;
3273 			}
3274 			port_flow_flush(sibling);
3275 		}
3276 	}
3277 
3278 	if (rte_dev_remove(dev) < 0) {
3279 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3280 		return;
3281 	}
3282 	remove_invalid_ports();
3283 
3284 	printf("Device is detached\n");
3285 	printf("Now total ports is %d\n", nb_ports);
3286 	printf("Done\n");
3287 	return;
3288 }
3289 
3290 void
3291 detach_port_device(portid_t port_id)
3292 {
3293 	int ret;
3294 	struct rte_eth_dev_info dev_info;
3295 
3296 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3297 		return;
3298 
3299 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3300 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3301 			fprintf(stderr, "Port not stopped\n");
3302 			return;
3303 		}
3304 		fprintf(stderr, "Port was not closed\n");
3305 	}
3306 
3307 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3308 	if (ret != 0) {
3309 		TESTPMD_LOG(ERR,
3310 			"Failed to get device info for port %d, not detaching\n",
3311 			port_id);
3312 		return;
3313 	}
3314 	detach_device(dev_info.device);
3315 }
3316 
3317 void
3318 detach_devargs(char *identifier)
3319 {
3320 	struct rte_dev_iterator iterator;
3321 	struct rte_devargs da;
3322 	portid_t port_id;
3323 
3324 	printf("Removing a device...\n");
3325 
3326 	memset(&da, 0, sizeof(da));
3327 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3328 		fprintf(stderr, "cannot parse identifier\n");
3329 		return;
3330 	}
3331 
3332 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3333 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3334 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3335 				fprintf(stderr, "Port %u not stopped\n",
3336 					port_id);
3337 				rte_eth_iterator_cleanup(&iterator);
3338 				rte_devargs_reset(&da);
3339 				return;
3340 			}
3341 			port_flow_flush(port_id);
3342 		}
3343 	}
3344 
3345 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3346 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3347 			    da.name, da.bus->name);
3348 		rte_devargs_reset(&da);
3349 		return;
3350 	}
3351 
3352 	remove_invalid_ports();
3353 
3354 	printf("Device %s is detached\n", identifier);
3355 	printf("Now total ports is %d\n", nb_ports);
3356 	printf("Done\n");
3357 	rte_devargs_reset(&da);
3358 }
3359 
3360 void
3361 pmd_test_exit(void)
3362 {
3363 	portid_t pt_id;
3364 	unsigned int i;
3365 	int ret;
3366 
3367 	if (test_done == 0)
3368 		stop_packet_forwarding();
3369 
3370 #ifndef RTE_EXEC_ENV_WINDOWS
3371 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3372 		if (mempools[i]) {
3373 			if (mp_alloc_type == MP_ALLOC_ANON)
3374 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3375 						     NULL);
3376 		}
3377 	}
3378 #endif
3379 	if (ports != NULL) {
3380 		no_link_check = 1;
3381 		RTE_ETH_FOREACH_DEV(pt_id) {
3382 			printf("\nStopping port %d...\n", pt_id);
3383 			fflush(stdout);
3384 			stop_port(pt_id);
3385 		}
3386 		RTE_ETH_FOREACH_DEV(pt_id) {
3387 			printf("\nShutting down port %d...\n", pt_id);
3388 			fflush(stdout);
3389 			close_port(pt_id);
3390 		}
3391 	}
3392 
3393 	if (hot_plug) {
3394 		ret = rte_dev_event_monitor_stop();
3395 		if (ret) {
3396 			RTE_LOG(ERR, EAL,
3397 				"fail to stop device event monitor.");
3398 			return;
3399 		}
3400 
3401 		ret = rte_dev_event_callback_unregister(NULL,
3402 			dev_event_callback, NULL);
3403 		if (ret < 0) {
3404 			RTE_LOG(ERR, EAL,
3405 				"fail to unregister device event callback.\n");
3406 			return;
3407 		}
3408 
3409 		ret = rte_dev_hotplug_handle_disable();
3410 		if (ret) {
3411 			RTE_LOG(ERR, EAL,
3412 				"fail to disable hotplug handling.\n");
3413 			return;
3414 		}
3415 	}
3416 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3417 		if (mempools[i])
3418 			mempool_free_mp(mempools[i]);
3419 	}
3420 	free(xstats_display);
3421 
3422 	printf("\nBye...\n");
3423 }
3424 
3425 typedef void (*cmd_func_t)(void);
3426 struct pmd_test_command {
3427 	const char *cmd_name;
3428 	cmd_func_t cmd_func;
3429 };
3430 
3431 /* Check the link status of all ports in up to 9s, and print them finally */
3432 static void
3433 check_all_ports_link_status(uint32_t port_mask)
3434 {
3435 #define CHECK_INTERVAL 100 /* 100ms */
3436 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3437 	portid_t portid;
3438 	uint8_t count, all_ports_up, print_flag = 0;
3439 	struct rte_eth_link link;
3440 	int ret;
3441 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3442 
3443 	printf("Checking link statuses...\n");
3444 	fflush(stdout);
3445 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3446 		all_ports_up = 1;
3447 		RTE_ETH_FOREACH_DEV(portid) {
3448 			if ((port_mask & (1 << portid)) == 0)
3449 				continue;
3450 			memset(&link, 0, sizeof(link));
3451 			ret = rte_eth_link_get_nowait(portid, &link);
3452 			if (ret < 0) {
3453 				all_ports_up = 0;
3454 				if (print_flag == 1)
3455 					fprintf(stderr,
3456 						"Port %u link get failed: %s\n",
3457 						portid, rte_strerror(-ret));
3458 				continue;
3459 			}
3460 			/* print link status if flag set */
3461 			if (print_flag == 1) {
3462 				rte_eth_link_to_str(link_status,
3463 					sizeof(link_status), &link);
3464 				printf("Port %d %s\n", portid, link_status);
3465 				continue;
3466 			}
3467 			/* clear all_ports_up flag if any link down */
3468 			if (link.link_status == ETH_LINK_DOWN) {
3469 				all_ports_up = 0;
3470 				break;
3471 			}
3472 		}
3473 		/* after finally printing all link status, get out */
3474 		if (print_flag == 1)
3475 			break;
3476 
3477 		if (all_ports_up == 0) {
3478 			fflush(stdout);
3479 			rte_delay_ms(CHECK_INTERVAL);
3480 		}
3481 
3482 		/* set the print_flag if all ports up or timeout */
3483 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3484 			print_flag = 1;
3485 		}
3486 
3487 		if (lsc_interrupt)
3488 			break;
3489 	}
3490 }
3491 
3492 static void
3493 rmv_port_callback(void *arg)
3494 {
3495 	int need_to_start = 0;
3496 	int org_no_link_check = no_link_check;
3497 	portid_t port_id = (intptr_t)arg;
3498 	struct rte_eth_dev_info dev_info;
3499 	int ret;
3500 
3501 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3502 
3503 	if (!test_done && port_is_forwarding(port_id)) {
3504 		need_to_start = 1;
3505 		stop_packet_forwarding();
3506 	}
3507 	no_link_check = 1;
3508 	stop_port(port_id);
3509 	no_link_check = org_no_link_check;
3510 
3511 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3512 	if (ret != 0)
3513 		TESTPMD_LOG(ERR,
3514 			"Failed to get device info for port %d, not detaching\n",
3515 			port_id);
3516 	else {
3517 		struct rte_device *device = dev_info.device;
3518 		close_port(port_id);
3519 		detach_device(device); /* might be already removed or have more ports */
3520 	}
3521 	if (need_to_start)
3522 		start_packet_forwarding(0);
3523 }
3524 
3525 /* This function is used by the interrupt thread */
3526 static int
3527 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3528 		  void *ret_param)
3529 {
3530 	RTE_SET_USED(param);
3531 	RTE_SET_USED(ret_param);
3532 
3533 	if (type >= RTE_ETH_EVENT_MAX) {
3534 		fprintf(stderr,
3535 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3536 			port_id, __func__, type);
3537 		fflush(stderr);
3538 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3539 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3540 			eth_event_desc[type]);
3541 		fflush(stdout);
3542 	}
3543 
3544 	switch (type) {
3545 	case RTE_ETH_EVENT_NEW:
3546 		ports[port_id].need_setup = 1;
3547 		ports[port_id].port_status = RTE_PORT_HANDLING;
3548 		break;
3549 	case RTE_ETH_EVENT_INTR_RMV:
3550 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3551 			break;
3552 		if (rte_eal_alarm_set(100000,
3553 				rmv_port_callback, (void *)(intptr_t)port_id))
3554 			fprintf(stderr,
3555 				"Could not set up deferred device removal\n");
3556 		break;
3557 	case RTE_ETH_EVENT_DESTROY:
3558 		ports[port_id].port_status = RTE_PORT_CLOSED;
3559 		printf("Port %u is closed\n", port_id);
3560 		break;
3561 	default:
3562 		break;
3563 	}
3564 	return 0;
3565 }
3566 
3567 static int
3568 register_eth_event_callback(void)
3569 {
3570 	int ret;
3571 	enum rte_eth_event_type event;
3572 
3573 	for (event = RTE_ETH_EVENT_UNKNOWN;
3574 			event < RTE_ETH_EVENT_MAX; event++) {
3575 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3576 				event,
3577 				eth_event_callback,
3578 				NULL);
3579 		if (ret != 0) {
3580 			TESTPMD_LOG(ERR, "Failed to register callback for "
3581 					"%s event\n", eth_event_desc[event]);
3582 			return -1;
3583 		}
3584 	}
3585 
3586 	return 0;
3587 }
3588 
3589 /* This function is used by the interrupt thread */
3590 static void
3591 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3592 			     __rte_unused void *arg)
3593 {
3594 	uint16_t port_id;
3595 	int ret;
3596 
3597 	if (type >= RTE_DEV_EVENT_MAX) {
3598 		fprintf(stderr, "%s called upon invalid event %d\n",
3599 			__func__, type);
3600 		fflush(stderr);
3601 	}
3602 
3603 	switch (type) {
3604 	case RTE_DEV_EVENT_REMOVE:
3605 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3606 			device_name);
3607 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3608 		if (ret) {
3609 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3610 				device_name);
3611 			return;
3612 		}
3613 		/*
3614 		 * Because the user's callback is invoked in eal interrupt
3615 		 * callback, the interrupt callback need to be finished before
3616 		 * it can be unregistered when detaching device. So finish
3617 		 * callback soon and use a deferred removal to detach device
3618 		 * is need. It is a workaround, once the device detaching be
3619 		 * moved into the eal in the future, the deferred removal could
3620 		 * be deleted.
3621 		 */
3622 		if (rte_eal_alarm_set(100000,
3623 				rmv_port_callback, (void *)(intptr_t)port_id))
3624 			RTE_LOG(ERR, EAL,
3625 				"Could not set up deferred device removal\n");
3626 		break;
3627 	case RTE_DEV_EVENT_ADD:
3628 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3629 			device_name);
3630 		/* TODO: After finish kernel driver binding,
3631 		 * begin to attach port.
3632 		 */
3633 		break;
3634 	default:
3635 		break;
3636 	}
3637 }
3638 
3639 static void
3640 rxtx_port_config(portid_t pid)
3641 {
3642 	uint16_t qid;
3643 	uint64_t offloads;
3644 	struct rte_port *port = &ports[pid];
3645 
3646 	for (qid = 0; qid < nb_rxq; qid++) {
3647 		offloads = port->rx_conf[qid].offloads;
3648 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3649 
3650 		if (rxq_share > 0 &&
3651 		    (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3652 			/* Non-zero share group to enable RxQ share. */
3653 			port->rx_conf[qid].share_group = pid / rxq_share + 1;
3654 			port->rx_conf[qid].share_qid = qid; /* Equal mapping. */
3655 		}
3656 
3657 		if (offloads != 0)
3658 			port->rx_conf[qid].offloads = offloads;
3659 
3660 		/* Check if any Rx parameters have been passed */
3661 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3662 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3663 
3664 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3665 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3666 
3667 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3668 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3669 
3670 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3671 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3672 
3673 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3674 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3675 
3676 		port->nb_rx_desc[qid] = nb_rxd;
3677 	}
3678 
3679 	for (qid = 0; qid < nb_txq; qid++) {
3680 		offloads = port->tx_conf[qid].offloads;
3681 		port->tx_conf[qid] = port->dev_info.default_txconf;
3682 		if (offloads != 0)
3683 			port->tx_conf[qid].offloads = offloads;
3684 
3685 		/* Check if any Tx parameters have been passed */
3686 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3687 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3688 
3689 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3690 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3691 
3692 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3693 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3694 
3695 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3696 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3697 
3698 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3699 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3700 
3701 		port->nb_tx_desc[qid] = nb_txd;
3702 	}
3703 }
3704 
3705 /*
3706  * Helper function to set MTU from frame size
3707  *
3708  * port->dev_info should be set before calling this function.
3709  *
3710  * return 0 on success, negative on error
3711  */
3712 int
3713 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3714 {
3715 	struct rte_port *port = &ports[portid];
3716 	uint32_t eth_overhead;
3717 	uint16_t mtu, new_mtu;
3718 
3719 	eth_overhead = get_eth_overhead(&port->dev_info);
3720 
3721 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3722 		printf("Failed to get MTU for port %u\n", portid);
3723 		return -1;
3724 	}
3725 
3726 	new_mtu = max_rx_pktlen - eth_overhead;
3727 
3728 	if (mtu == new_mtu)
3729 		return 0;
3730 
3731 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3732 		fprintf(stderr,
3733 			"Failed to set MTU to %u for port %u\n",
3734 			new_mtu, portid);
3735 		return -1;
3736 	}
3737 
3738 	port->dev_conf.rxmode.mtu = new_mtu;
3739 
3740 	return 0;
3741 }
3742 
3743 void
3744 init_port_config(void)
3745 {
3746 	portid_t pid;
3747 	struct rte_port *port;
3748 	int ret, i;
3749 
3750 	RTE_ETH_FOREACH_DEV(pid) {
3751 		port = &ports[pid];
3752 		port->dev_conf.fdir_conf = fdir_conf;
3753 
3754 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3755 		if (ret != 0)
3756 			return;
3757 
3758 		if (nb_rxq > 1) {
3759 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3760 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3761 				rss_hf & port->dev_info.flow_type_rss_offloads;
3762 		} else {
3763 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3764 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3765 		}
3766 
3767 		if (port->dcb_flag == 0) {
3768 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3769 				port->dev_conf.rxmode.mq_mode =
3770 					(enum rte_eth_rx_mq_mode)
3771 						(rx_mq_mode & ETH_MQ_RX_RSS);
3772 			} else {
3773 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3774 				port->dev_conf.rxmode.offloads &=
3775 						~DEV_RX_OFFLOAD_RSS_HASH;
3776 
3777 				for (i = 0;
3778 				     i < port->dev_info.nb_rx_queues;
3779 				     i++)
3780 					port->rx_conf[i].offloads &=
3781 						~DEV_RX_OFFLOAD_RSS_HASH;
3782 			}
3783 		}
3784 
3785 		rxtx_port_config(pid);
3786 
3787 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3788 		if (ret != 0)
3789 			return;
3790 
3791 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3792 		rte_pmd_ixgbe_bypass_init(pid);
3793 #endif
3794 
3795 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3796 			port->dev_conf.intr_conf.lsc = 1;
3797 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3798 			port->dev_conf.intr_conf.rmv = 1;
3799 	}
3800 }
3801 
3802 void set_port_slave_flag(portid_t slave_pid)
3803 {
3804 	struct rte_port *port;
3805 
3806 	port = &ports[slave_pid];
3807 	port->slave_flag = 1;
3808 }
3809 
3810 void clear_port_slave_flag(portid_t slave_pid)
3811 {
3812 	struct rte_port *port;
3813 
3814 	port = &ports[slave_pid];
3815 	port->slave_flag = 0;
3816 }
3817 
3818 uint8_t port_is_bonding_slave(portid_t slave_pid)
3819 {
3820 	struct rte_port *port;
3821 	struct rte_eth_dev_info dev_info;
3822 	int ret;
3823 
3824 	port = &ports[slave_pid];
3825 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3826 	if (ret != 0) {
3827 		TESTPMD_LOG(ERR,
3828 			"Failed to get device info for port id %d,"
3829 			"cannot determine if the port is a bonded slave",
3830 			slave_pid);
3831 		return 0;
3832 	}
3833 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3834 		return 1;
3835 	return 0;
3836 }
3837 
3838 const uint16_t vlan_tags[] = {
3839 		0,  1,  2,  3,  4,  5,  6,  7,
3840 		8,  9, 10, 11,  12, 13, 14, 15,
3841 		16, 17, 18, 19, 20, 21, 22, 23,
3842 		24, 25, 26, 27, 28, 29, 30, 31
3843 };
3844 
3845 static  int
3846 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3847 		 enum dcb_mode_enable dcb_mode,
3848 		 enum rte_eth_nb_tcs num_tcs,
3849 		 uint8_t pfc_en)
3850 {
3851 	uint8_t i;
3852 	int32_t rc;
3853 	struct rte_eth_rss_conf rss_conf;
3854 
3855 	/*
3856 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3857 	 * given above, and the number of traffic classes available for use.
3858 	 */
3859 	if (dcb_mode == DCB_VT_ENABLED) {
3860 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3861 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3862 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3863 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3864 
3865 		/* VMDQ+DCB RX and TX configurations */
3866 		vmdq_rx_conf->enable_default_pool = 0;
3867 		vmdq_rx_conf->default_pool = 0;
3868 		vmdq_rx_conf->nb_queue_pools =
3869 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3870 		vmdq_tx_conf->nb_queue_pools =
3871 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3872 
3873 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3874 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3875 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3876 			vmdq_rx_conf->pool_map[i].pools =
3877 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3878 		}
3879 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3880 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3881 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3882 		}
3883 
3884 		/* set DCB mode of RX and TX of multiple queues */
3885 		eth_conf->rxmode.mq_mode =
3886 				(enum rte_eth_rx_mq_mode)
3887 					(rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3888 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3889 	} else {
3890 		struct rte_eth_dcb_rx_conf *rx_conf =
3891 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3892 		struct rte_eth_dcb_tx_conf *tx_conf =
3893 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3894 
3895 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3896 
3897 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3898 		if (rc != 0)
3899 			return rc;
3900 
3901 		rx_conf->nb_tcs = num_tcs;
3902 		tx_conf->nb_tcs = num_tcs;
3903 
3904 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3905 			rx_conf->dcb_tc[i] = i % num_tcs;
3906 			tx_conf->dcb_tc[i] = i % num_tcs;
3907 		}
3908 
3909 		eth_conf->rxmode.mq_mode =
3910 				(enum rte_eth_rx_mq_mode)
3911 					(rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3912 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3913 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3914 	}
3915 
3916 	if (pfc_en)
3917 		eth_conf->dcb_capability_en =
3918 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3919 	else
3920 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3921 
3922 	return 0;
3923 }
3924 
3925 int
3926 init_port_dcb_config(portid_t pid,
3927 		     enum dcb_mode_enable dcb_mode,
3928 		     enum rte_eth_nb_tcs num_tcs,
3929 		     uint8_t pfc_en)
3930 {
3931 	struct rte_eth_conf port_conf;
3932 	struct rte_port *rte_port;
3933 	int retval;
3934 	uint16_t i;
3935 
3936 	if (num_procs > 1) {
3937 		printf("The multi-process feature doesn't support dcb.\n");
3938 		return -ENOTSUP;
3939 	}
3940 	rte_port = &ports[pid];
3941 
3942 	/* retain the original device configuration. */
3943 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3944 
3945 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3946 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3947 	if (retval < 0)
3948 		return retval;
3949 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3950 
3951 	/* re-configure the device . */
3952 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3953 	if (retval < 0)
3954 		return retval;
3955 
3956 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3957 	if (retval != 0)
3958 		return retval;
3959 
3960 	/* If dev_info.vmdq_pool_base is greater than 0,
3961 	 * the queue id of vmdq pools is started after pf queues.
3962 	 */
3963 	if (dcb_mode == DCB_VT_ENABLED &&
3964 	    rte_port->dev_info.vmdq_pool_base > 0) {
3965 		fprintf(stderr,
3966 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3967 			pid);
3968 		return -1;
3969 	}
3970 
3971 	/* Assume the ports in testpmd have the same dcb capability
3972 	 * and has the same number of rxq and txq in dcb mode
3973 	 */
3974 	if (dcb_mode == DCB_VT_ENABLED) {
3975 		if (rte_port->dev_info.max_vfs > 0) {
3976 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3977 			nb_txq = rte_port->dev_info.nb_tx_queues;
3978 		} else {
3979 			nb_rxq = rte_port->dev_info.max_rx_queues;
3980 			nb_txq = rte_port->dev_info.max_tx_queues;
3981 		}
3982 	} else {
3983 		/*if vt is disabled, use all pf queues */
3984 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3985 			nb_rxq = rte_port->dev_info.max_rx_queues;
3986 			nb_txq = rte_port->dev_info.max_tx_queues;
3987 		} else {
3988 			nb_rxq = (queueid_t)num_tcs;
3989 			nb_txq = (queueid_t)num_tcs;
3990 
3991 		}
3992 	}
3993 	rx_free_thresh = 64;
3994 
3995 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3996 
3997 	rxtx_port_config(pid);
3998 	/* VLAN filter */
3999 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
4000 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
4001 		rx_vft_set(pid, vlan_tags[i], 1);
4002 
4003 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4004 	if (retval != 0)
4005 		return retval;
4006 
4007 	rte_port->dcb_flag = 1;
4008 
4009 	/* Enter DCB configuration status */
4010 	dcb_config = 1;
4011 
4012 	return 0;
4013 }
4014 
4015 static void
4016 init_port(void)
4017 {
4018 	int i;
4019 
4020 	/* Configuration of Ethernet ports. */
4021 	ports = rte_zmalloc("testpmd: ports",
4022 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4023 			    RTE_CACHE_LINE_SIZE);
4024 	if (ports == NULL) {
4025 		rte_exit(EXIT_FAILURE,
4026 				"rte_zmalloc(%d struct rte_port) failed\n",
4027 				RTE_MAX_ETHPORTS);
4028 	}
4029 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4030 		ports[i].xstats_info.allocated = false;
4031 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4032 		LIST_INIT(&ports[i].flow_tunnel_list);
4033 	/* Initialize ports NUMA structures */
4034 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4035 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4036 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4037 }
4038 
4039 static void
4040 force_quit(void)
4041 {
4042 	pmd_test_exit();
4043 	prompt_exit();
4044 }
4045 
4046 static void
4047 print_stats(void)
4048 {
4049 	uint8_t i;
4050 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4051 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4052 
4053 	/* Clear screen and move to top left */
4054 	printf("%s%s", clr, top_left);
4055 
4056 	printf("\nPort statistics ====================================");
4057 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4058 		nic_stats_display(fwd_ports_ids[i]);
4059 
4060 	fflush(stdout);
4061 }
4062 
4063 static void
4064 signal_handler(int signum)
4065 {
4066 	if (signum == SIGINT || signum == SIGTERM) {
4067 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4068 			signum);
4069 #ifdef RTE_LIB_PDUMP
4070 		/* uninitialize packet capture framework */
4071 		rte_pdump_uninit();
4072 #endif
4073 #ifdef RTE_LIB_LATENCYSTATS
4074 		if (latencystats_enabled != 0)
4075 			rte_latencystats_uninit();
4076 #endif
4077 		force_quit();
4078 		/* Set flag to indicate the force termination. */
4079 		f_quit = 1;
4080 		/* exit with the expected status */
4081 #ifndef RTE_EXEC_ENV_WINDOWS
4082 		signal(signum, SIG_DFL);
4083 		kill(getpid(), signum);
4084 #endif
4085 	}
4086 }
4087 
4088 int
4089 main(int argc, char** argv)
4090 {
4091 	int diag;
4092 	portid_t port_id;
4093 	uint16_t count;
4094 	int ret;
4095 
4096 	signal(SIGINT, signal_handler);
4097 	signal(SIGTERM, signal_handler);
4098 
4099 	testpmd_logtype = rte_log_register("testpmd");
4100 	if (testpmd_logtype < 0)
4101 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4102 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4103 
4104 	diag = rte_eal_init(argc, argv);
4105 	if (diag < 0)
4106 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4107 			 rte_strerror(rte_errno));
4108 
4109 	ret = register_eth_event_callback();
4110 	if (ret != 0)
4111 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4112 
4113 #ifdef RTE_LIB_PDUMP
4114 	/* initialize packet capture framework */
4115 	rte_pdump_init();
4116 #endif
4117 
4118 	count = 0;
4119 	RTE_ETH_FOREACH_DEV(port_id) {
4120 		ports_ids[count] = port_id;
4121 		count++;
4122 	}
4123 	nb_ports = (portid_t) count;
4124 	if (nb_ports == 0)
4125 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4126 
4127 	/* allocate port structures, and init them */
4128 	init_port();
4129 
4130 	set_def_fwd_config();
4131 	if (nb_lcores == 0)
4132 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4133 			 "Check the core mask argument\n");
4134 
4135 	/* Bitrate/latency stats disabled by default */
4136 #ifdef RTE_LIB_BITRATESTATS
4137 	bitrate_enabled = 0;
4138 #endif
4139 #ifdef RTE_LIB_LATENCYSTATS
4140 	latencystats_enabled = 0;
4141 #endif
4142 
4143 	/* on FreeBSD, mlockall() is disabled by default */
4144 #ifdef RTE_EXEC_ENV_FREEBSD
4145 	do_mlockall = 0;
4146 #else
4147 	do_mlockall = 1;
4148 #endif
4149 
4150 	argc -= diag;
4151 	argv += diag;
4152 	if (argc > 1)
4153 		launch_args_parse(argc, argv);
4154 
4155 #ifndef RTE_EXEC_ENV_WINDOWS
4156 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4157 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4158 			strerror(errno));
4159 	}
4160 #endif
4161 
4162 	if (tx_first && interactive)
4163 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4164 				"interactive mode.\n");
4165 
4166 	if (tx_first && lsc_interrupt) {
4167 		fprintf(stderr,
4168 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4169 		lsc_interrupt = 0;
4170 	}
4171 
4172 	if (!nb_rxq && !nb_txq)
4173 		fprintf(stderr,
4174 			"Warning: Either rx or tx queues should be non-zero\n");
4175 
4176 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4177 		fprintf(stderr,
4178 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4179 			nb_rxq, nb_txq);
4180 
4181 	init_config();
4182 
4183 	if (hot_plug) {
4184 		ret = rte_dev_hotplug_handle_enable();
4185 		if (ret) {
4186 			RTE_LOG(ERR, EAL,
4187 				"fail to enable hotplug handling.");
4188 			return -1;
4189 		}
4190 
4191 		ret = rte_dev_event_monitor_start();
4192 		if (ret) {
4193 			RTE_LOG(ERR, EAL,
4194 				"fail to start device event monitoring.");
4195 			return -1;
4196 		}
4197 
4198 		ret = rte_dev_event_callback_register(NULL,
4199 			dev_event_callback, NULL);
4200 		if (ret) {
4201 			RTE_LOG(ERR, EAL,
4202 				"fail  to register device event callback\n");
4203 			return -1;
4204 		}
4205 	}
4206 
4207 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4208 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4209 
4210 	/* set all ports to promiscuous mode by default */
4211 	RTE_ETH_FOREACH_DEV(port_id) {
4212 		ret = rte_eth_promiscuous_enable(port_id);
4213 		if (ret != 0)
4214 			fprintf(stderr,
4215 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4216 				port_id, rte_strerror(-ret));
4217 	}
4218 
4219 	/* Init metrics library */
4220 	rte_metrics_init(rte_socket_id());
4221 
4222 #ifdef RTE_LIB_LATENCYSTATS
4223 	if (latencystats_enabled != 0) {
4224 		int ret = rte_latencystats_init(1, NULL);
4225 		if (ret)
4226 			fprintf(stderr,
4227 				"Warning: latencystats init() returned error %d\n",
4228 				ret);
4229 		fprintf(stderr, "Latencystats running on lcore %d\n",
4230 			latencystats_lcore_id);
4231 	}
4232 #endif
4233 
4234 	/* Setup bitrate stats */
4235 #ifdef RTE_LIB_BITRATESTATS
4236 	if (bitrate_enabled != 0) {
4237 		bitrate_data = rte_stats_bitrate_create();
4238 		if (bitrate_data == NULL)
4239 			rte_exit(EXIT_FAILURE,
4240 				"Could not allocate bitrate data.\n");
4241 		rte_stats_bitrate_reg(bitrate_data);
4242 	}
4243 #endif
4244 #ifdef RTE_LIB_CMDLINE
4245 	if (strlen(cmdline_filename) != 0)
4246 		cmdline_read_from_file(cmdline_filename);
4247 
4248 	if (interactive == 1) {
4249 		if (auto_start) {
4250 			printf("Start automatic packet forwarding\n");
4251 			start_packet_forwarding(0);
4252 		}
4253 		prompt();
4254 		pmd_test_exit();
4255 	} else
4256 #endif
4257 	{
4258 		char c;
4259 		int rc;
4260 
4261 		f_quit = 0;
4262 
4263 		printf("No commandline core given, start packet forwarding\n");
4264 		start_packet_forwarding(tx_first);
4265 		if (stats_period != 0) {
4266 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4267 			uint64_t timer_period;
4268 
4269 			/* Convert to number of cycles */
4270 			timer_period = stats_period * rte_get_timer_hz();
4271 
4272 			while (f_quit == 0) {
4273 				cur_time = rte_get_timer_cycles();
4274 				diff_time += cur_time - prev_time;
4275 
4276 				if (diff_time >= timer_period) {
4277 					print_stats();
4278 					/* Reset the timer */
4279 					diff_time = 0;
4280 				}
4281 				/* Sleep to avoid unnecessary checks */
4282 				prev_time = cur_time;
4283 				rte_delay_us_sleep(US_PER_S);
4284 			}
4285 		}
4286 
4287 		printf("Press enter to exit\n");
4288 		rc = read(0, &c, 1);
4289 		pmd_test_exit();
4290 		if (rc < 0)
4291 			return 1;
4292 	}
4293 
4294 	ret = rte_eal_cleanup();
4295 	if (ret != 0)
4296 		rte_exit(EXIT_FAILURE,
4297 			 "EAL cleanup failed: %s\n", strerror(-ret));
4298 
4299 	return EXIT_SUCCESS;
4300 }
4301