xref: /dpdk/app/test-pmd/testpmd.c (revision 0dff3f26d6faad4e51f75e5245f0387ee9bb0c6d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_branch_prediction.h>
40 #include <rte_mempool.h>
41 #include <rte_malloc.h>
42 #include <rte_mbuf.h>
43 #include <rte_mbuf_pool_ops.h>
44 #include <rte_interrupts.h>
45 #include <rte_pci.h>
46 #include <rte_ether.h>
47 #include <rte_ethdev.h>
48 #include <rte_dev.h>
49 #include <rte_string_fns.h>
50 #ifdef RTE_NET_IXGBE
51 #include <rte_pmd_ixgbe.h>
52 #endif
53 #ifdef RTE_LIB_PDUMP
54 #include <rte_pdump.h>
55 #endif
56 #include <rte_flow.h>
57 #ifdef RTE_LIB_METRICS
58 #include <rte_metrics.h>
59 #endif
60 #ifdef RTE_LIB_BITRATESTATS
61 #include <rte_bitrate.h>
62 #endif
63 #ifdef RTE_LIB_LATENCYSTATS
64 #include <rte_latencystats.h>
65 #endif
66 #ifdef RTE_EXEC_ENV_WINDOWS
67 #include <process.h>
68 #endif
69 
70 #include "testpmd.h"
71 
72 #ifndef MAP_HUGETLB
73 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
74 #define HUGE_FLAG (0x40000)
75 #else
76 #define HUGE_FLAG MAP_HUGETLB
77 #endif
78 
79 #ifndef MAP_HUGE_SHIFT
80 /* older kernels (or FreeBSD) will not have this define */
81 #define HUGE_SHIFT (26)
82 #else
83 #define HUGE_SHIFT MAP_HUGE_SHIFT
84 #endif
85 
86 #define EXTMEM_HEAP_NAME "extmem"
87 /*
88  * Zone size with the malloc overhead (max of debug and release variants)
89  * must fit into the smallest supported hugepage size (2M),
90  * so that an IOVA-contiguous zone of this size can always be allocated
91  * if there are free 2M hugepages.
92  */
93 #define EXTBUF_ZONE_SIZE (RTE_PGSIZE_2M - 4 * RTE_CACHE_LINE_SIZE)
94 
95 uint16_t verbose_level = 0; /**< Silent by default. */
96 int testpmd_logtype; /**< Log type for testpmd logs */
97 
98 /* use main core for command line ? */
99 uint8_t interactive = 0;
100 uint8_t auto_start = 0;
101 uint8_t tx_first;
102 char cmdline_filename[PATH_MAX] = {0};
103 
104 /*
105  * NUMA support configuration.
106  * When set, the NUMA support attempts to dispatch the allocation of the
107  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
108  * probed ports among the CPU sockets 0 and 1.
109  * Otherwise, all memory is allocated from CPU socket 0.
110  */
111 uint8_t numa_support = 1; /**< numa enabled by default */
112 
113 /*
114  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
115  * not configured.
116  */
117 uint8_t socket_num = UMA_NO_CONFIG;
118 
119 /*
120  * Select mempool allocation type:
121  * - native: use regular DPDK memory
122  * - anon: use regular DPDK memory to create mempool, but populate using
123  *         anonymous memory (may not be IOVA-contiguous)
124  * - xmem: use externally allocated hugepage memory
125  */
126 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
127 
128 /*
129  * Store specified sockets on which memory pool to be used by ports
130  * is allocated.
131  */
132 uint8_t port_numa[RTE_MAX_ETHPORTS];
133 
134 /*
135  * Store specified sockets on which RX ring to be used by ports
136  * is allocated.
137  */
138 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
139 
140 /*
141  * Store specified sockets on which TX ring to be used by ports
142  * is allocated.
143  */
144 uint8_t txring_numa[RTE_MAX_ETHPORTS];
145 
146 /*
147  * Record the Ethernet address of peer target ports to which packets are
148  * forwarded.
149  * Must be instantiated with the ethernet addresses of peer traffic generator
150  * ports.
151  */
152 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
153 portid_t nb_peer_eth_addrs = 0;
154 
155 /*
156  * Probed Target Environment.
157  */
158 struct rte_port *ports;	       /**< For all probed ethernet ports. */
159 portid_t nb_ports;             /**< Number of probed ethernet ports. */
160 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
161 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
162 
163 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
164 
165 /*
166  * Test Forwarding Configuration.
167  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
168  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
169  */
170 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
171 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
172 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
173 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
174 
175 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
176 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
177 
178 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
179 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
180 
181 /*
182  * Forwarding engines.
183  */
184 struct fwd_engine * fwd_engines[] = {
185 	&io_fwd_engine,
186 	&mac_fwd_engine,
187 	&mac_swap_engine,
188 	&flow_gen_engine,
189 	&rx_only_engine,
190 	&tx_only_engine,
191 	&csum_fwd_engine,
192 	&icmp_echo_engine,
193 	&noisy_vnf_engine,
194 	&five_tuple_swap_fwd_engine,
195 #ifdef RTE_LIBRTE_IEEE1588
196 	&ieee1588_fwd_engine,
197 #endif
198 	&shared_rxq_engine,
199 	NULL,
200 };
201 
202 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
203 uint16_t mempool_flags;
204 
205 struct fwd_config cur_fwd_config;
206 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
207 uint32_t retry_enabled;
208 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
209 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
210 
211 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
212 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
213 	DEFAULT_MBUF_DATA_SIZE
214 }; /**< Mbuf data space size. */
215 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
216                                       * specified on command-line. */
217 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
218 
219 /** Extended statistics to show. */
220 struct rte_eth_xstat_name *xstats_display;
221 
222 unsigned int xstats_display_num; /**< Size of extended statistics to show */
223 
224 /*
225  * In container, it cannot terminate the process which running with 'stats-period'
226  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
227  */
228 uint8_t f_quit;
229 
230 /*
231  * Max Rx frame size, set by '--max-pkt-len' parameter.
232  */
233 uint32_t max_rx_pkt_len;
234 
235 /*
236  * Configuration of packet segments used to scatter received packets
237  * if some of split features is configured.
238  */
239 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
240 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
241 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
242 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
243 
244 /*
245  * Configuration of packet segments used by the "txonly" processing engine.
246  */
247 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
248 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
249 	TXONLY_DEF_PACKET_LEN,
250 };
251 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
252 
253 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
254 /**< Split policy for packets to TX. */
255 
256 uint8_t txonly_multi_flow;
257 /**< Whether multiple flows are generated in TXONLY mode. */
258 
259 uint32_t tx_pkt_times_inter;
260 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
261 
262 uint32_t tx_pkt_times_intra;
263 /**< Timings for send scheduling in TXONLY mode, time between packets. */
264 
265 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
266 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
267 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
268 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
269 
270 /* current configuration is in DCB or not,0 means it is not in DCB mode */
271 uint8_t dcb_config = 0;
272 
273 /*
274  * Configurable number of RX/TX queues.
275  */
276 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
277 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
278 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
279 
280 /*
281  * Configurable number of RX/TX ring descriptors.
282  * Defaults are supplied by drivers via ethdev.
283  */
284 #define RTE_TEST_RX_DESC_DEFAULT 0
285 #define RTE_TEST_TX_DESC_DEFAULT 0
286 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
287 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
288 
289 #define RTE_PMD_PARAM_UNSET -1
290 /*
291  * Configurable values of RX and TX ring threshold registers.
292  */
293 
294 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
295 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
296 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
297 
298 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
299 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
300 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
301 
302 /*
303  * Configurable value of RX free threshold.
304  */
305 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
306 
307 /*
308  * Configurable value of RX drop enable.
309  */
310 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
311 
312 /*
313  * Configurable value of TX free threshold.
314  */
315 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
316 
317 /*
318  * Configurable value of TX RS bit threshold.
319  */
320 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
321 
322 /*
323  * Configurable value of buffered packets before sending.
324  */
325 uint16_t noisy_tx_sw_bufsz;
326 
327 /*
328  * Configurable value of packet buffer timeout.
329  */
330 uint16_t noisy_tx_sw_buf_flush_time;
331 
332 /*
333  * Configurable value for size of VNF internal memory area
334  * used for simulating noisy neighbour behaviour
335  */
336 uint64_t noisy_lkup_mem_sz;
337 
338 /*
339  * Configurable value of number of random writes done in
340  * VNF simulation memory area.
341  */
342 uint64_t noisy_lkup_num_writes;
343 
344 /*
345  * Configurable value of number of random reads done in
346  * VNF simulation memory area.
347  */
348 uint64_t noisy_lkup_num_reads;
349 
350 /*
351  * Configurable value of number of random reads/writes done in
352  * VNF simulation memory area.
353  */
354 uint64_t noisy_lkup_num_reads_writes;
355 
356 /*
357  * Receive Side Scaling (RSS) configuration.
358  */
359 uint64_t rss_hf = RTE_ETH_RSS_IP; /* RSS IP by default. */
360 
361 /*
362  * Port topology configuration
363  */
364 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
365 
366 /*
367  * Avoids to flush all the RX streams before starts forwarding.
368  */
369 uint8_t no_flush_rx = 0; /* flush by default */
370 
371 /*
372  * Flow API isolated mode.
373  */
374 uint8_t flow_isolate_all;
375 
376 /*
377  * Avoids to check link status when starting/stopping a port.
378  */
379 uint8_t no_link_check = 0; /* check by default */
380 
381 /*
382  * Don't automatically start all ports in interactive mode.
383  */
384 uint8_t no_device_start = 0;
385 
386 /*
387  * Enable link status change notification
388  */
389 uint8_t lsc_interrupt = 1; /* enabled by default */
390 
391 /*
392  * Enable device removal notification.
393  */
394 uint8_t rmv_interrupt = 1; /* enabled by default */
395 
396 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
397 
398 /* After attach, port setup is called on event or by iterator */
399 bool setup_on_probe_event = true;
400 
401 /* Clear ptypes on port initialization. */
402 uint8_t clear_ptypes = true;
403 
404 /* Hairpin ports configuration mode. */
405 uint16_t hairpin_mode;
406 
407 /* Pretty printing of ethdev events */
408 static const char * const eth_event_desc[] = {
409 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
410 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
411 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
412 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
413 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
414 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
415 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
416 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
417 	[RTE_ETH_EVENT_NEW] = "device probed",
418 	[RTE_ETH_EVENT_DESTROY] = "device released",
419 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
420 	[RTE_ETH_EVENT_MAX] = NULL,
421 };
422 
423 /*
424  * Display or mask ether events
425  * Default to all events except VF_MBOX
426  */
427 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
428 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
429 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
430 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
431 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
432 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
433 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
434 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
435 /*
436  * Decide if all memory are locked for performance.
437  */
438 int do_mlockall = 0;
439 
440 /*
441  * NIC bypass mode configuration options.
442  */
443 
444 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
445 /* The NIC bypass watchdog timeout. */
446 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
447 #endif
448 
449 
450 #ifdef RTE_LIB_LATENCYSTATS
451 
452 /*
453  * Set when latency stats is enabled in the commandline
454  */
455 uint8_t latencystats_enabled;
456 
457 /*
458  * Lcore ID to service latency statistics.
459  */
460 lcoreid_t latencystats_lcore_id = -1;
461 
462 #endif
463 
464 /*
465  * Ethernet device configuration.
466  */
467 struct rte_eth_rxmode rx_mode;
468 
469 struct rte_eth_txmode tx_mode = {
470 	.offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
471 };
472 
473 struct rte_eth_fdir_conf fdir_conf = {
474 	.mode = RTE_FDIR_MODE_NONE,
475 	.pballoc = RTE_ETH_FDIR_PBALLOC_64K,
476 	.status = RTE_FDIR_REPORT_STATUS,
477 	.mask = {
478 		.vlan_tci_mask = 0xFFEF,
479 		.ipv4_mask     = {
480 			.src_ip = 0xFFFFFFFF,
481 			.dst_ip = 0xFFFFFFFF,
482 		},
483 		.ipv6_mask     = {
484 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
485 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
486 		},
487 		.src_port_mask = 0xFFFF,
488 		.dst_port_mask = 0xFFFF,
489 		.mac_addr_byte_mask = 0xFF,
490 		.tunnel_type_mask = 1,
491 		.tunnel_id_mask = 0xFFFFFFFF,
492 	},
493 	.drop_queue = 127,
494 };
495 
496 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
497 
498 /*
499  * Display zero values by default for xstats
500  */
501 uint8_t xstats_hide_zero;
502 
503 /*
504  * Measure of CPU cycles disabled by default
505  */
506 uint8_t record_core_cycles;
507 
508 /*
509  * Display of RX and TX bursts disabled by default
510  */
511 uint8_t record_burst_stats;
512 
513 /*
514  * Number of ports per shared Rx queue group, 0 disable.
515  */
516 uint32_t rxq_share;
517 
518 unsigned int num_sockets = 0;
519 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
520 
521 #ifdef RTE_LIB_BITRATESTATS
522 /* Bitrate statistics */
523 struct rte_stats_bitrates *bitrate_data;
524 lcoreid_t bitrate_lcore_id;
525 uint8_t bitrate_enabled;
526 #endif
527 
528 #ifdef RTE_LIB_GRO
529 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
530 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
531 #endif
532 
533 /*
534  * hexadecimal bitmask of RX mq mode can be enabled.
535  */
536 enum rte_eth_rx_mq_mode rx_mq_mode = RTE_ETH_MQ_RX_VMDQ_DCB_RSS;
537 
538 /*
539  * Used to set forced link speed
540  */
541 uint32_t eth_link_speed;
542 
543 /*
544  * ID of the current process in multi-process, used to
545  * configure the queues to be polled.
546  */
547 int proc_id;
548 
549 /*
550  * Number of processes in multi-process, used to
551  * configure the queues to be polled.
552  */
553 unsigned int num_procs = 1;
554 
555 static void
556 eth_rx_metadata_negotiate_mp(uint16_t port_id)
557 {
558 	uint64_t rx_meta_features = 0;
559 	int ret;
560 
561 	if (!is_proc_primary())
562 		return;
563 
564 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
565 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
566 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
567 
568 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
569 	if (ret == 0) {
570 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
571 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
572 				    port_id);
573 		}
574 
575 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
576 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
577 				    port_id);
578 		}
579 
580 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
581 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
582 				    port_id);
583 		}
584 	} else if (ret != -ENOTSUP) {
585 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
586 			 port_id, rte_strerror(-ret));
587 	}
588 }
589 
590 static int
591 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
592 		      const struct rte_eth_conf *dev_conf)
593 {
594 	if (is_proc_primary())
595 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
596 					dev_conf);
597 	return 0;
598 }
599 
600 static int
601 eth_dev_start_mp(uint16_t port_id)
602 {
603 	if (is_proc_primary())
604 		return rte_eth_dev_start(port_id);
605 
606 	return 0;
607 }
608 
609 static int
610 eth_dev_stop_mp(uint16_t port_id)
611 {
612 	if (is_proc_primary())
613 		return rte_eth_dev_stop(port_id);
614 
615 	return 0;
616 }
617 
618 static void
619 mempool_free_mp(struct rte_mempool *mp)
620 {
621 	if (is_proc_primary())
622 		rte_mempool_free(mp);
623 }
624 
625 static int
626 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
627 {
628 	if (is_proc_primary())
629 		return rte_eth_dev_set_mtu(port_id, mtu);
630 
631 	return 0;
632 }
633 
634 /* Forward function declarations */
635 static void setup_attached_port(portid_t pi);
636 static void check_all_ports_link_status(uint32_t port_mask);
637 static int eth_event_callback(portid_t port_id,
638 			      enum rte_eth_event_type type,
639 			      void *param, void *ret_param);
640 static void dev_event_callback(const char *device_name,
641 				enum rte_dev_event_type type,
642 				void *param);
643 static void fill_xstats_display_info(void);
644 
645 /*
646  * Check if all the ports are started.
647  * If yes, return positive value. If not, return zero.
648  */
649 static int all_ports_started(void);
650 
651 #ifdef RTE_LIB_GSO
652 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
653 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
654 #endif
655 
656 /* Holds the registered mbuf dynamic flags names. */
657 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
658 
659 
660 /*
661  * Helper function to check if socket is already discovered.
662  * If yes, return positive value. If not, return zero.
663  */
664 int
665 new_socket_id(unsigned int socket_id)
666 {
667 	unsigned int i;
668 
669 	for (i = 0; i < num_sockets; i++) {
670 		if (socket_ids[i] == socket_id)
671 			return 0;
672 	}
673 	return 1;
674 }
675 
676 /*
677  * Setup default configuration.
678  */
679 static void
680 set_default_fwd_lcores_config(void)
681 {
682 	unsigned int i;
683 	unsigned int nb_lc;
684 	unsigned int sock_num;
685 
686 	nb_lc = 0;
687 	for (i = 0; i < RTE_MAX_LCORE; i++) {
688 		if (!rte_lcore_is_enabled(i))
689 			continue;
690 		sock_num = rte_lcore_to_socket_id(i);
691 		if (new_socket_id(sock_num)) {
692 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
693 				rte_exit(EXIT_FAILURE,
694 					 "Total sockets greater than %u\n",
695 					 RTE_MAX_NUMA_NODES);
696 			}
697 			socket_ids[num_sockets++] = sock_num;
698 		}
699 		if (i == rte_get_main_lcore())
700 			continue;
701 		fwd_lcores_cpuids[nb_lc++] = i;
702 	}
703 	nb_lcores = (lcoreid_t) nb_lc;
704 	nb_cfg_lcores = nb_lcores;
705 	nb_fwd_lcores = 1;
706 }
707 
708 static void
709 set_def_peer_eth_addrs(void)
710 {
711 	portid_t i;
712 
713 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
714 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
715 		peer_eth_addrs[i].addr_bytes[5] = i;
716 	}
717 }
718 
719 static void
720 set_default_fwd_ports_config(void)
721 {
722 	portid_t pt_id;
723 	int i = 0;
724 
725 	RTE_ETH_FOREACH_DEV(pt_id) {
726 		fwd_ports_ids[i++] = pt_id;
727 
728 		/* Update sockets info according to the attached device */
729 		int socket_id = rte_eth_dev_socket_id(pt_id);
730 		if (socket_id >= 0 && new_socket_id(socket_id)) {
731 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
732 				rte_exit(EXIT_FAILURE,
733 					 "Total sockets greater than %u\n",
734 					 RTE_MAX_NUMA_NODES);
735 			}
736 			socket_ids[num_sockets++] = socket_id;
737 		}
738 	}
739 
740 	nb_cfg_ports = nb_ports;
741 	nb_fwd_ports = nb_ports;
742 }
743 
744 void
745 set_def_fwd_config(void)
746 {
747 	set_default_fwd_lcores_config();
748 	set_def_peer_eth_addrs();
749 	set_default_fwd_ports_config();
750 }
751 
752 #ifndef RTE_EXEC_ENV_WINDOWS
753 /* extremely pessimistic estimation of memory required to create a mempool */
754 static int
755 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
756 {
757 	unsigned int n_pages, mbuf_per_pg, leftover;
758 	uint64_t total_mem, mbuf_mem, obj_sz;
759 
760 	/* there is no good way to predict how much space the mempool will
761 	 * occupy because it will allocate chunks on the fly, and some of those
762 	 * will come from default DPDK memory while some will come from our
763 	 * external memory, so just assume 128MB will be enough for everyone.
764 	 */
765 	uint64_t hdr_mem = 128 << 20;
766 
767 	/* account for possible non-contiguousness */
768 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
769 	if (obj_sz > pgsz) {
770 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
771 		return -1;
772 	}
773 
774 	mbuf_per_pg = pgsz / obj_sz;
775 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
776 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
777 
778 	mbuf_mem = n_pages * pgsz;
779 
780 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
781 
782 	if (total_mem > SIZE_MAX) {
783 		TESTPMD_LOG(ERR, "Memory size too big\n");
784 		return -1;
785 	}
786 	*out = (size_t)total_mem;
787 
788 	return 0;
789 }
790 
791 static int
792 pagesz_flags(uint64_t page_sz)
793 {
794 	/* as per mmap() manpage, all page sizes are log2 of page size
795 	 * shifted by MAP_HUGE_SHIFT
796 	 */
797 	int log2 = rte_log2_u64(page_sz);
798 
799 	return (log2 << HUGE_SHIFT);
800 }
801 
802 static void *
803 alloc_mem(size_t memsz, size_t pgsz, bool huge)
804 {
805 	void *addr;
806 	int flags;
807 
808 	/* allocate anonymous hugepages */
809 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
810 	if (huge)
811 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
812 
813 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
814 	if (addr == MAP_FAILED)
815 		return NULL;
816 
817 	return addr;
818 }
819 
820 struct extmem_param {
821 	void *addr;
822 	size_t len;
823 	size_t pgsz;
824 	rte_iova_t *iova_table;
825 	unsigned int iova_table_len;
826 };
827 
828 static int
829 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
830 		bool huge)
831 {
832 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
833 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
834 	unsigned int cur_page, n_pages, pgsz_idx;
835 	size_t mem_sz, cur_pgsz;
836 	rte_iova_t *iovas = NULL;
837 	void *addr;
838 	int ret;
839 
840 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
841 		/* skip anything that is too big */
842 		if (pgsizes[pgsz_idx] > SIZE_MAX)
843 			continue;
844 
845 		cur_pgsz = pgsizes[pgsz_idx];
846 
847 		/* if we were told not to allocate hugepages, override */
848 		if (!huge)
849 			cur_pgsz = sysconf(_SC_PAGESIZE);
850 
851 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
852 		if (ret < 0) {
853 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
854 			return -1;
855 		}
856 
857 		/* allocate our memory */
858 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
859 
860 		/* if we couldn't allocate memory with a specified page size,
861 		 * that doesn't mean we can't do it with other page sizes, so
862 		 * try another one.
863 		 */
864 		if (addr == NULL)
865 			continue;
866 
867 		/* store IOVA addresses for every page in this memory area */
868 		n_pages = mem_sz / cur_pgsz;
869 
870 		iovas = malloc(sizeof(*iovas) * n_pages);
871 
872 		if (iovas == NULL) {
873 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
874 			goto fail;
875 		}
876 		/* lock memory if it's not huge pages */
877 		if (!huge)
878 			mlock(addr, mem_sz);
879 
880 		/* populate IOVA addresses */
881 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
882 			rte_iova_t iova;
883 			size_t offset;
884 			void *cur;
885 
886 			offset = cur_pgsz * cur_page;
887 			cur = RTE_PTR_ADD(addr, offset);
888 
889 			/* touch the page before getting its IOVA */
890 			*(volatile char *)cur = 0;
891 
892 			iova = rte_mem_virt2iova(cur);
893 
894 			iovas[cur_page] = iova;
895 		}
896 
897 		break;
898 	}
899 	/* if we couldn't allocate anything */
900 	if (iovas == NULL)
901 		return -1;
902 
903 	param->addr = addr;
904 	param->len = mem_sz;
905 	param->pgsz = cur_pgsz;
906 	param->iova_table = iovas;
907 	param->iova_table_len = n_pages;
908 
909 	return 0;
910 fail:
911 	if (iovas)
912 		free(iovas);
913 	if (addr)
914 		munmap(addr, mem_sz);
915 
916 	return -1;
917 }
918 
919 static int
920 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
921 {
922 	struct extmem_param param;
923 	int socket_id, ret;
924 
925 	memset(&param, 0, sizeof(param));
926 
927 	/* check if our heap exists */
928 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
929 	if (socket_id < 0) {
930 		/* create our heap */
931 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
932 		if (ret < 0) {
933 			TESTPMD_LOG(ERR, "Cannot create heap\n");
934 			return -1;
935 		}
936 	}
937 
938 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
939 	if (ret < 0) {
940 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
941 		return -1;
942 	}
943 
944 	/* we now have a valid memory area, so add it to heap */
945 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
946 			param.addr, param.len, param.iova_table,
947 			param.iova_table_len, param.pgsz);
948 
949 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
950 
951 	/* not needed any more */
952 	free(param.iova_table);
953 
954 	if (ret < 0) {
955 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
956 		munmap(param.addr, param.len);
957 		return -1;
958 	}
959 
960 	/* success */
961 
962 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
963 			param.len >> 20);
964 
965 	return 0;
966 }
967 static void
968 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
969 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
970 {
971 	uint16_t pid = 0;
972 	int ret;
973 
974 	RTE_ETH_FOREACH_DEV(pid) {
975 		struct rte_eth_dev_info dev_info;
976 
977 		ret = eth_dev_info_get_print_err(pid, &dev_info);
978 		if (ret != 0) {
979 			TESTPMD_LOG(DEBUG,
980 				    "unable to get device info for port %d on addr 0x%p,"
981 				    "mempool unmapping will not be performed\n",
982 				    pid, memhdr->addr);
983 			continue;
984 		}
985 
986 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
987 		if (ret) {
988 			TESTPMD_LOG(DEBUG,
989 				    "unable to DMA unmap addr 0x%p "
990 				    "for device %s\n",
991 				    memhdr->addr, dev_info.device->name);
992 		}
993 	}
994 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
995 	if (ret) {
996 		TESTPMD_LOG(DEBUG,
997 			    "unable to un-register addr 0x%p\n", memhdr->addr);
998 	}
999 }
1000 
1001 static void
1002 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1003 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1004 {
1005 	uint16_t pid = 0;
1006 	size_t page_size = sysconf(_SC_PAGESIZE);
1007 	int ret;
1008 
1009 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1010 				  page_size);
1011 	if (ret) {
1012 		TESTPMD_LOG(DEBUG,
1013 			    "unable to register addr 0x%p\n", memhdr->addr);
1014 		return;
1015 	}
1016 	RTE_ETH_FOREACH_DEV(pid) {
1017 		struct rte_eth_dev_info dev_info;
1018 
1019 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1020 		if (ret != 0) {
1021 			TESTPMD_LOG(DEBUG,
1022 				    "unable to get device info for port %d on addr 0x%p,"
1023 				    "mempool mapping will not be performed\n",
1024 				    pid, memhdr->addr);
1025 			continue;
1026 		}
1027 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1028 		if (ret) {
1029 			TESTPMD_LOG(DEBUG,
1030 				    "unable to DMA map addr 0x%p "
1031 				    "for device %s\n",
1032 				    memhdr->addr, dev_info.device->name);
1033 		}
1034 	}
1035 }
1036 #endif
1037 
1038 static unsigned int
1039 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1040 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1041 {
1042 	struct rte_pktmbuf_extmem *xmem;
1043 	unsigned int ext_num, zone_num, elt_num;
1044 	uint16_t elt_size;
1045 
1046 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1047 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1048 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1049 
1050 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1051 	if (xmem == NULL) {
1052 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1053 				 "external buffer descriptors\n");
1054 		*ext_mem = NULL;
1055 		return 0;
1056 	}
1057 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1058 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1059 		const struct rte_memzone *mz;
1060 		char mz_name[RTE_MEMZONE_NAMESIZE];
1061 		int ret;
1062 
1063 		ret = snprintf(mz_name, sizeof(mz_name),
1064 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1065 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1066 			errno = ENAMETOOLONG;
1067 			ext_num = 0;
1068 			break;
1069 		}
1070 		mz = rte_memzone_reserve(mz_name, EXTBUF_ZONE_SIZE,
1071 					 socket_id,
1072 					 RTE_MEMZONE_IOVA_CONTIG |
1073 					 RTE_MEMZONE_1GB |
1074 					 RTE_MEMZONE_SIZE_HINT_ONLY);
1075 		if (mz == NULL) {
1076 			/*
1077 			 * The caller exits on external buffer creation
1078 			 * error, so there is no need to free memzones.
1079 			 */
1080 			errno = ENOMEM;
1081 			ext_num = 0;
1082 			break;
1083 		}
1084 		xseg->buf_ptr = mz->addr;
1085 		xseg->buf_iova = mz->iova;
1086 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1087 		xseg->elt_size = elt_size;
1088 	}
1089 	if (ext_num == 0 && xmem != NULL) {
1090 		free(xmem);
1091 		xmem = NULL;
1092 	}
1093 	*ext_mem = xmem;
1094 	return ext_num;
1095 }
1096 
1097 /*
1098  * Configuration initialisation done once at init time.
1099  */
1100 static struct rte_mempool *
1101 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1102 		 unsigned int socket_id, uint16_t size_idx)
1103 {
1104 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1105 	struct rte_mempool *rte_mp = NULL;
1106 #ifndef RTE_EXEC_ENV_WINDOWS
1107 	uint32_t mb_size;
1108 
1109 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1110 #endif
1111 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1112 	if (!is_proc_primary()) {
1113 		rte_mp = rte_mempool_lookup(pool_name);
1114 		if (rte_mp == NULL)
1115 			rte_exit(EXIT_FAILURE,
1116 				"Get mbuf pool for socket %u failed: %s\n",
1117 				socket_id, rte_strerror(rte_errno));
1118 		return rte_mp;
1119 	}
1120 
1121 	TESTPMD_LOG(INFO,
1122 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1123 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1124 
1125 	switch (mp_alloc_type) {
1126 	case MP_ALLOC_NATIVE:
1127 		{
1128 			/* wrapper to rte_mempool_create() */
1129 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1130 					rte_mbuf_best_mempool_ops());
1131 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1132 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1133 			break;
1134 		}
1135 #ifndef RTE_EXEC_ENV_WINDOWS
1136 	case MP_ALLOC_ANON:
1137 		{
1138 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1139 				mb_size, (unsigned int) mb_mempool_cache,
1140 				sizeof(struct rte_pktmbuf_pool_private),
1141 				socket_id, mempool_flags);
1142 			if (rte_mp == NULL)
1143 				goto err;
1144 
1145 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1146 				rte_mempool_free(rte_mp);
1147 				rte_mp = NULL;
1148 				goto err;
1149 			}
1150 			rte_pktmbuf_pool_init(rte_mp, NULL);
1151 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1152 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1153 			break;
1154 		}
1155 	case MP_ALLOC_XMEM:
1156 	case MP_ALLOC_XMEM_HUGE:
1157 		{
1158 			int heap_socket;
1159 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1160 
1161 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1162 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1163 
1164 			heap_socket =
1165 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1166 			if (heap_socket < 0)
1167 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1168 
1169 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1170 					rte_mbuf_best_mempool_ops());
1171 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1172 					mb_mempool_cache, 0, mbuf_seg_size,
1173 					heap_socket);
1174 			break;
1175 		}
1176 #endif
1177 	case MP_ALLOC_XBUF:
1178 		{
1179 			struct rte_pktmbuf_extmem *ext_mem;
1180 			unsigned int ext_num;
1181 
1182 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1183 					       socket_id, pool_name, &ext_mem);
1184 			if (ext_num == 0)
1185 				rte_exit(EXIT_FAILURE,
1186 					 "Can't create pinned data buffers\n");
1187 
1188 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1189 					rte_mbuf_best_mempool_ops());
1190 			rte_mp = rte_pktmbuf_pool_create_extbuf
1191 					(pool_name, nb_mbuf, mb_mempool_cache,
1192 					 0, mbuf_seg_size, socket_id,
1193 					 ext_mem, ext_num);
1194 			free(ext_mem);
1195 			break;
1196 		}
1197 	default:
1198 		{
1199 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1200 		}
1201 	}
1202 
1203 #ifndef RTE_EXEC_ENV_WINDOWS
1204 err:
1205 #endif
1206 	if (rte_mp == NULL) {
1207 		rte_exit(EXIT_FAILURE,
1208 			"Creation of mbuf pool for socket %u failed: %s\n",
1209 			socket_id, rte_strerror(rte_errno));
1210 	} else if (verbose_level > 0) {
1211 		rte_mempool_dump(stdout, rte_mp);
1212 	}
1213 	return rte_mp;
1214 }
1215 
1216 /*
1217  * Check given socket id is valid or not with NUMA mode,
1218  * if valid, return 0, else return -1
1219  */
1220 static int
1221 check_socket_id(const unsigned int socket_id)
1222 {
1223 	static int warning_once = 0;
1224 
1225 	if (new_socket_id(socket_id)) {
1226 		if (!warning_once && numa_support)
1227 			fprintf(stderr,
1228 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1229 		warning_once = 1;
1230 		return -1;
1231 	}
1232 	return 0;
1233 }
1234 
1235 /*
1236  * Get the allowed maximum number of RX queues.
1237  * *pid return the port id which has minimal value of
1238  * max_rx_queues in all ports.
1239  */
1240 queueid_t
1241 get_allowed_max_nb_rxq(portid_t *pid)
1242 {
1243 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1244 	bool max_rxq_valid = false;
1245 	portid_t pi;
1246 	struct rte_eth_dev_info dev_info;
1247 
1248 	RTE_ETH_FOREACH_DEV(pi) {
1249 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1250 			continue;
1251 
1252 		max_rxq_valid = true;
1253 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1254 			allowed_max_rxq = dev_info.max_rx_queues;
1255 			*pid = pi;
1256 		}
1257 	}
1258 	return max_rxq_valid ? allowed_max_rxq : 0;
1259 }
1260 
1261 /*
1262  * Check input rxq is valid or not.
1263  * If input rxq is not greater than any of maximum number
1264  * of RX queues of all ports, it is valid.
1265  * if valid, return 0, else return -1
1266  */
1267 int
1268 check_nb_rxq(queueid_t rxq)
1269 {
1270 	queueid_t allowed_max_rxq;
1271 	portid_t pid = 0;
1272 
1273 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1274 	if (rxq > allowed_max_rxq) {
1275 		fprintf(stderr,
1276 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1277 			rxq, allowed_max_rxq, pid);
1278 		return -1;
1279 	}
1280 	return 0;
1281 }
1282 
1283 /*
1284  * Get the allowed maximum number of TX queues.
1285  * *pid return the port id which has minimal value of
1286  * max_tx_queues in all ports.
1287  */
1288 queueid_t
1289 get_allowed_max_nb_txq(portid_t *pid)
1290 {
1291 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1292 	bool max_txq_valid = false;
1293 	portid_t pi;
1294 	struct rte_eth_dev_info dev_info;
1295 
1296 	RTE_ETH_FOREACH_DEV(pi) {
1297 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1298 			continue;
1299 
1300 		max_txq_valid = true;
1301 		if (dev_info.max_tx_queues < allowed_max_txq) {
1302 			allowed_max_txq = dev_info.max_tx_queues;
1303 			*pid = pi;
1304 		}
1305 	}
1306 	return max_txq_valid ? allowed_max_txq : 0;
1307 }
1308 
1309 /*
1310  * Check input txq is valid or not.
1311  * If input txq is not greater than any of maximum number
1312  * of TX queues of all ports, it is valid.
1313  * if valid, return 0, else return -1
1314  */
1315 int
1316 check_nb_txq(queueid_t txq)
1317 {
1318 	queueid_t allowed_max_txq;
1319 	portid_t pid = 0;
1320 
1321 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1322 	if (txq > allowed_max_txq) {
1323 		fprintf(stderr,
1324 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1325 			txq, allowed_max_txq, pid);
1326 		return -1;
1327 	}
1328 	return 0;
1329 }
1330 
1331 /*
1332  * Get the allowed maximum number of RXDs of every rx queue.
1333  * *pid return the port id which has minimal value of
1334  * max_rxd in all queues of all ports.
1335  */
1336 static uint16_t
1337 get_allowed_max_nb_rxd(portid_t *pid)
1338 {
1339 	uint16_t allowed_max_rxd = UINT16_MAX;
1340 	portid_t pi;
1341 	struct rte_eth_dev_info dev_info;
1342 
1343 	RTE_ETH_FOREACH_DEV(pi) {
1344 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1345 			continue;
1346 
1347 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1348 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1349 			*pid = pi;
1350 		}
1351 	}
1352 	return allowed_max_rxd;
1353 }
1354 
1355 /*
1356  * Get the allowed minimal number of RXDs of every rx queue.
1357  * *pid return the port id which has minimal value of
1358  * min_rxd in all queues of all ports.
1359  */
1360 static uint16_t
1361 get_allowed_min_nb_rxd(portid_t *pid)
1362 {
1363 	uint16_t allowed_min_rxd = 0;
1364 	portid_t pi;
1365 	struct rte_eth_dev_info dev_info;
1366 
1367 	RTE_ETH_FOREACH_DEV(pi) {
1368 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1369 			continue;
1370 
1371 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1372 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1373 			*pid = pi;
1374 		}
1375 	}
1376 
1377 	return allowed_min_rxd;
1378 }
1379 
1380 /*
1381  * Check input rxd is valid or not.
1382  * If input rxd is not greater than any of maximum number
1383  * of RXDs of every Rx queues and is not less than any of
1384  * minimal number of RXDs of every Rx queues, it is valid.
1385  * if valid, return 0, else return -1
1386  */
1387 int
1388 check_nb_rxd(queueid_t rxd)
1389 {
1390 	uint16_t allowed_max_rxd;
1391 	uint16_t allowed_min_rxd;
1392 	portid_t pid = 0;
1393 
1394 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1395 	if (rxd > allowed_max_rxd) {
1396 		fprintf(stderr,
1397 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1398 			rxd, allowed_max_rxd, pid);
1399 		return -1;
1400 	}
1401 
1402 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1403 	if (rxd < allowed_min_rxd) {
1404 		fprintf(stderr,
1405 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1406 			rxd, allowed_min_rxd, pid);
1407 		return -1;
1408 	}
1409 
1410 	return 0;
1411 }
1412 
1413 /*
1414  * Get the allowed maximum number of TXDs of every rx queues.
1415  * *pid return the port id which has minimal value of
1416  * max_txd in every tx queue.
1417  */
1418 static uint16_t
1419 get_allowed_max_nb_txd(portid_t *pid)
1420 {
1421 	uint16_t allowed_max_txd = UINT16_MAX;
1422 	portid_t pi;
1423 	struct rte_eth_dev_info dev_info;
1424 
1425 	RTE_ETH_FOREACH_DEV(pi) {
1426 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1427 			continue;
1428 
1429 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1430 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1431 			*pid = pi;
1432 		}
1433 	}
1434 	return allowed_max_txd;
1435 }
1436 
1437 /*
1438  * Get the allowed maximum number of TXDs of every tx queues.
1439  * *pid return the port id which has minimal value of
1440  * min_txd in every tx queue.
1441  */
1442 static uint16_t
1443 get_allowed_min_nb_txd(portid_t *pid)
1444 {
1445 	uint16_t allowed_min_txd = 0;
1446 	portid_t pi;
1447 	struct rte_eth_dev_info dev_info;
1448 
1449 	RTE_ETH_FOREACH_DEV(pi) {
1450 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1451 			continue;
1452 
1453 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1454 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1455 			*pid = pi;
1456 		}
1457 	}
1458 
1459 	return allowed_min_txd;
1460 }
1461 
1462 /*
1463  * Check input txd is valid or not.
1464  * If input txd is not greater than any of maximum number
1465  * of TXDs of every Rx queues, it is valid.
1466  * if valid, return 0, else return -1
1467  */
1468 int
1469 check_nb_txd(queueid_t txd)
1470 {
1471 	uint16_t allowed_max_txd;
1472 	uint16_t allowed_min_txd;
1473 	portid_t pid = 0;
1474 
1475 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1476 	if (txd > allowed_max_txd) {
1477 		fprintf(stderr,
1478 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1479 			txd, allowed_max_txd, pid);
1480 		return -1;
1481 	}
1482 
1483 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1484 	if (txd < allowed_min_txd) {
1485 		fprintf(stderr,
1486 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1487 			txd, allowed_min_txd, pid);
1488 		return -1;
1489 	}
1490 	return 0;
1491 }
1492 
1493 
1494 /*
1495  * Get the allowed maximum number of hairpin queues.
1496  * *pid return the port id which has minimal value of
1497  * max_hairpin_queues in all ports.
1498  */
1499 queueid_t
1500 get_allowed_max_nb_hairpinq(portid_t *pid)
1501 {
1502 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1503 	portid_t pi;
1504 	struct rte_eth_hairpin_cap cap;
1505 
1506 	RTE_ETH_FOREACH_DEV(pi) {
1507 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1508 			*pid = pi;
1509 			return 0;
1510 		}
1511 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1512 			allowed_max_hairpinq = cap.max_nb_queues;
1513 			*pid = pi;
1514 		}
1515 	}
1516 	return allowed_max_hairpinq;
1517 }
1518 
1519 /*
1520  * Check input hairpin is valid or not.
1521  * If input hairpin is not greater than any of maximum number
1522  * of hairpin queues of all ports, it is valid.
1523  * if valid, return 0, else return -1
1524  */
1525 int
1526 check_nb_hairpinq(queueid_t hairpinq)
1527 {
1528 	queueid_t allowed_max_hairpinq;
1529 	portid_t pid = 0;
1530 
1531 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1532 	if (hairpinq > allowed_max_hairpinq) {
1533 		fprintf(stderr,
1534 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1535 			hairpinq, allowed_max_hairpinq, pid);
1536 		return -1;
1537 	}
1538 	return 0;
1539 }
1540 
1541 static int
1542 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1543 {
1544 	uint32_t eth_overhead;
1545 
1546 	if (dev_info->max_mtu != UINT16_MAX &&
1547 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1548 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1549 	else
1550 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1551 
1552 	return eth_overhead;
1553 }
1554 
1555 static void
1556 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1557 {
1558 	struct rte_port *port = &ports[pid];
1559 	int ret;
1560 	int i;
1561 
1562 	eth_rx_metadata_negotiate_mp(pid);
1563 
1564 	port->dev_conf.txmode = tx_mode;
1565 	port->dev_conf.rxmode = rx_mode;
1566 
1567 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1568 	if (ret != 0)
1569 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1570 
1571 	if (!(port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE))
1572 		port->dev_conf.txmode.offloads &=
1573 			~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1574 
1575 	/* Apply Rx offloads configuration */
1576 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1577 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1578 	/* Apply Tx offloads configuration */
1579 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1580 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1581 
1582 	if (eth_link_speed)
1583 		port->dev_conf.link_speeds = eth_link_speed;
1584 
1585 	if (max_rx_pkt_len)
1586 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1587 			get_eth_overhead(&port->dev_info);
1588 
1589 	/* set flag to initialize port/queue */
1590 	port->need_reconfig = 1;
1591 	port->need_reconfig_queues = 1;
1592 	port->socket_id = socket_id;
1593 	port->tx_metadata = 0;
1594 
1595 	/*
1596 	 * Check for maximum number of segments per MTU.
1597 	 * Accordingly update the mbuf data size.
1598 	 */
1599 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1600 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1601 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1602 		uint16_t mtu;
1603 
1604 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1605 			uint16_t data_size = (mtu + eth_overhead) /
1606 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1607 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1608 
1609 			if (buffer_size > mbuf_data_size[0]) {
1610 				mbuf_data_size[0] = buffer_size;
1611 				TESTPMD_LOG(WARNING,
1612 					"Configured mbuf size of the first segment %hu\n",
1613 					mbuf_data_size[0]);
1614 			}
1615 		}
1616 	}
1617 }
1618 
1619 static void
1620 init_config(void)
1621 {
1622 	portid_t pid;
1623 	struct rte_mempool *mbp;
1624 	unsigned int nb_mbuf_per_pool;
1625 	lcoreid_t  lc_id;
1626 #ifdef RTE_LIB_GRO
1627 	struct rte_gro_param gro_param;
1628 #endif
1629 #ifdef RTE_LIB_GSO
1630 	uint32_t gso_types;
1631 #endif
1632 
1633 	/* Configuration of logical cores. */
1634 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1635 				sizeof(struct fwd_lcore *) * nb_lcores,
1636 				RTE_CACHE_LINE_SIZE);
1637 	if (fwd_lcores == NULL) {
1638 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1639 							"failed\n", nb_lcores);
1640 	}
1641 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1642 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1643 					       sizeof(struct fwd_lcore),
1644 					       RTE_CACHE_LINE_SIZE);
1645 		if (fwd_lcores[lc_id] == NULL) {
1646 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1647 								"failed\n");
1648 		}
1649 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1650 	}
1651 
1652 	RTE_ETH_FOREACH_DEV(pid) {
1653 		uint32_t socket_id;
1654 
1655 		if (numa_support) {
1656 			socket_id = port_numa[pid];
1657 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1658 				socket_id = rte_eth_dev_socket_id(pid);
1659 
1660 				/*
1661 				 * if socket_id is invalid,
1662 				 * set to the first available socket.
1663 				 */
1664 				if (check_socket_id(socket_id) < 0)
1665 					socket_id = socket_ids[0];
1666 			}
1667 		} else {
1668 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1669 				    0 : socket_num;
1670 		}
1671 		/* Apply default TxRx configuration for all ports */
1672 		init_config_port_offloads(pid, socket_id);
1673 	}
1674 	/*
1675 	 * Create pools of mbuf.
1676 	 * If NUMA support is disabled, create a single pool of mbuf in
1677 	 * socket 0 memory by default.
1678 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1679 	 *
1680 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1681 	 * nb_txd can be configured at run time.
1682 	 */
1683 	if (param_total_num_mbufs)
1684 		nb_mbuf_per_pool = param_total_num_mbufs;
1685 	else {
1686 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1687 			(nb_lcores * mb_mempool_cache) +
1688 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1689 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1690 	}
1691 
1692 	if (numa_support) {
1693 		uint8_t i, j;
1694 
1695 		for (i = 0; i < num_sockets; i++)
1696 			for (j = 0; j < mbuf_data_size_n; j++)
1697 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1698 					mbuf_pool_create(mbuf_data_size[j],
1699 							  nb_mbuf_per_pool,
1700 							  socket_ids[i], j);
1701 	} else {
1702 		uint8_t i;
1703 
1704 		for (i = 0; i < mbuf_data_size_n; i++)
1705 			mempools[i] = mbuf_pool_create
1706 					(mbuf_data_size[i],
1707 					 nb_mbuf_per_pool,
1708 					 socket_num == UMA_NO_CONFIG ?
1709 					 0 : socket_num, i);
1710 	}
1711 
1712 	init_port_config();
1713 
1714 #ifdef RTE_LIB_GSO
1715 	gso_types = RTE_ETH_TX_OFFLOAD_TCP_TSO | RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
1716 		RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | RTE_ETH_TX_OFFLOAD_UDP_TSO;
1717 #endif
1718 	/*
1719 	 * Records which Mbuf pool to use by each logical core, if needed.
1720 	 */
1721 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1722 		mbp = mbuf_pool_find(
1723 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1724 
1725 		if (mbp == NULL)
1726 			mbp = mbuf_pool_find(0, 0);
1727 		fwd_lcores[lc_id]->mbp = mbp;
1728 #ifdef RTE_LIB_GSO
1729 		/* initialize GSO context */
1730 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1731 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1732 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1733 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1734 			RTE_ETHER_CRC_LEN;
1735 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1736 #endif
1737 	}
1738 
1739 	fwd_config_setup();
1740 
1741 #ifdef RTE_LIB_GRO
1742 	/* create a gro context for each lcore */
1743 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1744 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1745 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1746 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1747 		gro_param.socket_id = rte_lcore_to_socket_id(
1748 				fwd_lcores_cpuids[lc_id]);
1749 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1750 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1751 			rte_exit(EXIT_FAILURE,
1752 					"rte_gro_ctx_create() failed\n");
1753 		}
1754 	}
1755 #endif
1756 }
1757 
1758 
1759 void
1760 reconfig(portid_t new_port_id, unsigned socket_id)
1761 {
1762 	/* Reconfiguration of Ethernet ports. */
1763 	init_config_port_offloads(new_port_id, socket_id);
1764 	init_port_config();
1765 }
1766 
1767 
1768 int
1769 init_fwd_streams(void)
1770 {
1771 	portid_t pid;
1772 	struct rte_port *port;
1773 	streamid_t sm_id, nb_fwd_streams_new;
1774 	queueid_t q;
1775 
1776 	/* set socket id according to numa or not */
1777 	RTE_ETH_FOREACH_DEV(pid) {
1778 		port = &ports[pid];
1779 		if (nb_rxq > port->dev_info.max_rx_queues) {
1780 			fprintf(stderr,
1781 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1782 				nb_rxq, port->dev_info.max_rx_queues);
1783 			return -1;
1784 		}
1785 		if (nb_txq > port->dev_info.max_tx_queues) {
1786 			fprintf(stderr,
1787 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1788 				nb_txq, port->dev_info.max_tx_queues);
1789 			return -1;
1790 		}
1791 		if (numa_support) {
1792 			if (port_numa[pid] != NUMA_NO_CONFIG)
1793 				port->socket_id = port_numa[pid];
1794 			else {
1795 				port->socket_id = rte_eth_dev_socket_id(pid);
1796 
1797 				/*
1798 				 * if socket_id is invalid,
1799 				 * set to the first available socket.
1800 				 */
1801 				if (check_socket_id(port->socket_id) < 0)
1802 					port->socket_id = socket_ids[0];
1803 			}
1804 		}
1805 		else {
1806 			if (socket_num == UMA_NO_CONFIG)
1807 				port->socket_id = 0;
1808 			else
1809 				port->socket_id = socket_num;
1810 		}
1811 	}
1812 
1813 	q = RTE_MAX(nb_rxq, nb_txq);
1814 	if (q == 0) {
1815 		fprintf(stderr,
1816 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1817 		return -1;
1818 	}
1819 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1820 	if (nb_fwd_streams_new == nb_fwd_streams)
1821 		return 0;
1822 	/* clear the old */
1823 	if (fwd_streams != NULL) {
1824 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1825 			if (fwd_streams[sm_id] == NULL)
1826 				continue;
1827 			rte_free(fwd_streams[sm_id]);
1828 			fwd_streams[sm_id] = NULL;
1829 		}
1830 		rte_free(fwd_streams);
1831 		fwd_streams = NULL;
1832 	}
1833 
1834 	/* init new */
1835 	nb_fwd_streams = nb_fwd_streams_new;
1836 	if (nb_fwd_streams) {
1837 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1838 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1839 			RTE_CACHE_LINE_SIZE);
1840 		if (fwd_streams == NULL)
1841 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1842 				 " (struct fwd_stream *)) failed\n",
1843 				 nb_fwd_streams);
1844 
1845 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1846 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1847 				" struct fwd_stream", sizeof(struct fwd_stream),
1848 				RTE_CACHE_LINE_SIZE);
1849 			if (fwd_streams[sm_id] == NULL)
1850 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1851 					 "(struct fwd_stream) failed\n");
1852 		}
1853 	}
1854 
1855 	return 0;
1856 }
1857 
1858 static void
1859 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1860 {
1861 	uint64_t total_burst, sburst;
1862 	uint64_t nb_burst;
1863 	uint64_t burst_stats[4];
1864 	uint16_t pktnb_stats[4];
1865 	uint16_t nb_pkt;
1866 	int burst_percent[4], sburstp;
1867 	int i;
1868 
1869 	/*
1870 	 * First compute the total number of packet bursts and the
1871 	 * two highest numbers of bursts of the same number of packets.
1872 	 */
1873 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1874 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1875 
1876 	/* Show stats for 0 burst size always */
1877 	total_burst = pbs->pkt_burst_spread[0];
1878 	burst_stats[0] = pbs->pkt_burst_spread[0];
1879 	pktnb_stats[0] = 0;
1880 
1881 	/* Find the next 2 burst sizes with highest occurrences. */
1882 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST + 1; nb_pkt++) {
1883 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1884 
1885 		if (nb_burst == 0)
1886 			continue;
1887 
1888 		total_burst += nb_burst;
1889 
1890 		if (nb_burst > burst_stats[1]) {
1891 			burst_stats[2] = burst_stats[1];
1892 			pktnb_stats[2] = pktnb_stats[1];
1893 			burst_stats[1] = nb_burst;
1894 			pktnb_stats[1] = nb_pkt;
1895 		} else if (nb_burst > burst_stats[2]) {
1896 			burst_stats[2] = nb_burst;
1897 			pktnb_stats[2] = nb_pkt;
1898 		}
1899 	}
1900 	if (total_burst == 0)
1901 		return;
1902 
1903 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1904 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1905 		if (i == 3) {
1906 			printf("%d%% of other]\n", 100 - sburstp);
1907 			return;
1908 		}
1909 
1910 		sburst += burst_stats[i];
1911 		if (sburst == total_burst) {
1912 			printf("%d%% of %d pkts]\n",
1913 				100 - sburstp, (int) pktnb_stats[i]);
1914 			return;
1915 		}
1916 
1917 		burst_percent[i] =
1918 			(double)burst_stats[i] / total_burst * 100;
1919 		printf("%d%% of %d pkts + ",
1920 			burst_percent[i], (int) pktnb_stats[i]);
1921 		sburstp += burst_percent[i];
1922 	}
1923 }
1924 
1925 static void
1926 fwd_stream_stats_display(streamid_t stream_id)
1927 {
1928 	struct fwd_stream *fs;
1929 	static const char *fwd_top_stats_border = "-------";
1930 
1931 	fs = fwd_streams[stream_id];
1932 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1933 	    (fs->fwd_dropped == 0))
1934 		return;
1935 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1936 	       "TX Port=%2d/Queue=%2d %s\n",
1937 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1938 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1939 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1940 	       " TX-dropped: %-14"PRIu64,
1941 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1942 
1943 	/* if checksum mode */
1944 	if (cur_fwd_eng == &csum_fwd_engine) {
1945 		printf("  RX- bad IP checksum: %-14"PRIu64
1946 		       "  Rx- bad L4 checksum: %-14"PRIu64
1947 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1948 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1949 			fs->rx_bad_outer_l4_csum);
1950 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1951 			fs->rx_bad_outer_ip_csum);
1952 	} else {
1953 		printf("\n");
1954 	}
1955 
1956 	if (record_burst_stats) {
1957 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1958 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1959 	}
1960 }
1961 
1962 void
1963 fwd_stats_display(void)
1964 {
1965 	static const char *fwd_stats_border = "----------------------";
1966 	static const char *acc_stats_border = "+++++++++++++++";
1967 	struct {
1968 		struct fwd_stream *rx_stream;
1969 		struct fwd_stream *tx_stream;
1970 		uint64_t tx_dropped;
1971 		uint64_t rx_bad_ip_csum;
1972 		uint64_t rx_bad_l4_csum;
1973 		uint64_t rx_bad_outer_l4_csum;
1974 		uint64_t rx_bad_outer_ip_csum;
1975 	} ports_stats[RTE_MAX_ETHPORTS];
1976 	uint64_t total_rx_dropped = 0;
1977 	uint64_t total_tx_dropped = 0;
1978 	uint64_t total_rx_nombuf = 0;
1979 	struct rte_eth_stats stats;
1980 	uint64_t fwd_cycles = 0;
1981 	uint64_t total_recv = 0;
1982 	uint64_t total_xmit = 0;
1983 	struct rte_port *port;
1984 	streamid_t sm_id;
1985 	portid_t pt_id;
1986 	int i;
1987 
1988 	memset(ports_stats, 0, sizeof(ports_stats));
1989 
1990 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1991 		struct fwd_stream *fs = fwd_streams[sm_id];
1992 
1993 		if (cur_fwd_config.nb_fwd_streams >
1994 		    cur_fwd_config.nb_fwd_ports) {
1995 			fwd_stream_stats_display(sm_id);
1996 		} else {
1997 			ports_stats[fs->tx_port].tx_stream = fs;
1998 			ports_stats[fs->rx_port].rx_stream = fs;
1999 		}
2000 
2001 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2002 
2003 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2004 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2005 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2006 				fs->rx_bad_outer_l4_csum;
2007 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2008 				fs->rx_bad_outer_ip_csum;
2009 
2010 		if (record_core_cycles)
2011 			fwd_cycles += fs->core_cycles;
2012 	}
2013 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2014 		pt_id = fwd_ports_ids[i];
2015 		port = &ports[pt_id];
2016 
2017 		rte_eth_stats_get(pt_id, &stats);
2018 		stats.ipackets -= port->stats.ipackets;
2019 		stats.opackets -= port->stats.opackets;
2020 		stats.ibytes -= port->stats.ibytes;
2021 		stats.obytes -= port->stats.obytes;
2022 		stats.imissed -= port->stats.imissed;
2023 		stats.oerrors -= port->stats.oerrors;
2024 		stats.rx_nombuf -= port->stats.rx_nombuf;
2025 
2026 		total_recv += stats.ipackets;
2027 		total_xmit += stats.opackets;
2028 		total_rx_dropped += stats.imissed;
2029 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
2030 		total_tx_dropped += stats.oerrors;
2031 		total_rx_nombuf  += stats.rx_nombuf;
2032 
2033 		printf("\n  %s Forward statistics for port %-2d %s\n",
2034 		       fwd_stats_border, pt_id, fwd_stats_border);
2035 
2036 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2037 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2038 		       stats.ipackets + stats.imissed);
2039 
2040 		if (cur_fwd_eng == &csum_fwd_engine) {
2041 			printf("  Bad-ipcsum: %-14"PRIu64
2042 			       " Bad-l4csum: %-14"PRIu64
2043 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2044 			       ports_stats[pt_id].rx_bad_ip_csum,
2045 			       ports_stats[pt_id].rx_bad_l4_csum,
2046 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2047 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2048 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2049 		}
2050 		if (stats.ierrors + stats.rx_nombuf > 0) {
2051 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2052 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2053 		}
2054 
2055 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2056 		       "TX-total: %-"PRIu64"\n",
2057 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2058 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2059 
2060 		if (record_burst_stats) {
2061 			if (ports_stats[pt_id].rx_stream)
2062 				pkt_burst_stats_display("RX",
2063 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2064 			if (ports_stats[pt_id].tx_stream)
2065 				pkt_burst_stats_display("TX",
2066 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2067 		}
2068 
2069 		printf("  %s--------------------------------%s\n",
2070 		       fwd_stats_border, fwd_stats_border);
2071 	}
2072 
2073 	printf("\n  %s Accumulated forward statistics for all ports"
2074 	       "%s\n",
2075 	       acc_stats_border, acc_stats_border);
2076 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2077 	       "%-"PRIu64"\n"
2078 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2079 	       "%-"PRIu64"\n",
2080 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2081 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2082 	if (total_rx_nombuf > 0)
2083 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2084 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2085 	       "%s\n",
2086 	       acc_stats_border, acc_stats_border);
2087 	if (record_core_cycles) {
2088 #define CYC_PER_MHZ 1E6
2089 		if (total_recv > 0 || total_xmit > 0) {
2090 			uint64_t total_pkts = 0;
2091 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2092 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2093 				total_pkts = total_xmit;
2094 			else
2095 				total_pkts = total_recv;
2096 
2097 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2098 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2099 			       " MHz Clock\n",
2100 			       (double) fwd_cycles / total_pkts,
2101 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2102 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2103 		}
2104 	}
2105 }
2106 
2107 void
2108 fwd_stats_reset(void)
2109 {
2110 	streamid_t sm_id;
2111 	portid_t pt_id;
2112 	int i;
2113 
2114 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2115 		pt_id = fwd_ports_ids[i];
2116 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2117 	}
2118 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2119 		struct fwd_stream *fs = fwd_streams[sm_id];
2120 
2121 		fs->rx_packets = 0;
2122 		fs->tx_packets = 0;
2123 		fs->fwd_dropped = 0;
2124 		fs->rx_bad_ip_csum = 0;
2125 		fs->rx_bad_l4_csum = 0;
2126 		fs->rx_bad_outer_l4_csum = 0;
2127 		fs->rx_bad_outer_ip_csum = 0;
2128 
2129 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2130 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2131 		fs->core_cycles = 0;
2132 	}
2133 }
2134 
2135 static void
2136 flush_fwd_rx_queues(void)
2137 {
2138 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2139 	portid_t  rxp;
2140 	portid_t port_id;
2141 	queueid_t rxq;
2142 	uint16_t  nb_rx;
2143 	uint16_t  i;
2144 	uint8_t   j;
2145 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2146 	uint64_t timer_period;
2147 
2148 	if (num_procs > 1) {
2149 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2150 		return;
2151 	}
2152 
2153 	/* convert to number of cycles */
2154 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2155 
2156 	for (j = 0; j < 2; j++) {
2157 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2158 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2159 				port_id = fwd_ports_ids[rxp];
2160 				/**
2161 				* testpmd can stuck in the below do while loop
2162 				* if rte_eth_rx_burst() always returns nonzero
2163 				* packets. So timer is added to exit this loop
2164 				* after 1sec timer expiry.
2165 				*/
2166 				prev_tsc = rte_rdtsc();
2167 				do {
2168 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2169 						pkts_burst, MAX_PKT_BURST);
2170 					for (i = 0; i < nb_rx; i++)
2171 						rte_pktmbuf_free(pkts_burst[i]);
2172 
2173 					cur_tsc = rte_rdtsc();
2174 					diff_tsc = cur_tsc - prev_tsc;
2175 					timer_tsc += diff_tsc;
2176 				} while ((nb_rx > 0) &&
2177 					(timer_tsc < timer_period));
2178 				timer_tsc = 0;
2179 			}
2180 		}
2181 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2182 	}
2183 }
2184 
2185 static void
2186 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2187 {
2188 	struct fwd_stream **fsm;
2189 	streamid_t nb_fs;
2190 	streamid_t sm_id;
2191 #ifdef RTE_LIB_BITRATESTATS
2192 	uint64_t tics_per_1sec;
2193 	uint64_t tics_datum;
2194 	uint64_t tics_current;
2195 	uint16_t i, cnt_ports;
2196 
2197 	cnt_ports = nb_ports;
2198 	tics_datum = rte_rdtsc();
2199 	tics_per_1sec = rte_get_timer_hz();
2200 #endif
2201 	fsm = &fwd_streams[fc->stream_idx];
2202 	nb_fs = fc->stream_nb;
2203 	do {
2204 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2205 			(*pkt_fwd)(fsm[sm_id]);
2206 #ifdef RTE_LIB_BITRATESTATS
2207 		if (bitrate_enabled != 0 &&
2208 				bitrate_lcore_id == rte_lcore_id()) {
2209 			tics_current = rte_rdtsc();
2210 			if (tics_current - tics_datum >= tics_per_1sec) {
2211 				/* Periodic bitrate calculation */
2212 				for (i = 0; i < cnt_ports; i++)
2213 					rte_stats_bitrate_calc(bitrate_data,
2214 						ports_ids[i]);
2215 				tics_datum = tics_current;
2216 			}
2217 		}
2218 #endif
2219 #ifdef RTE_LIB_LATENCYSTATS
2220 		if (latencystats_enabled != 0 &&
2221 				latencystats_lcore_id == rte_lcore_id())
2222 			rte_latencystats_update();
2223 #endif
2224 
2225 	} while (! fc->stopped);
2226 }
2227 
2228 static int
2229 start_pkt_forward_on_core(void *fwd_arg)
2230 {
2231 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2232 			     cur_fwd_config.fwd_eng->packet_fwd);
2233 	return 0;
2234 }
2235 
2236 /*
2237  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2238  * Used to start communication flows in network loopback test configurations.
2239  */
2240 static int
2241 run_one_txonly_burst_on_core(void *fwd_arg)
2242 {
2243 	struct fwd_lcore *fwd_lc;
2244 	struct fwd_lcore tmp_lcore;
2245 
2246 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2247 	tmp_lcore = *fwd_lc;
2248 	tmp_lcore.stopped = 1;
2249 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2250 	return 0;
2251 }
2252 
2253 /*
2254  * Launch packet forwarding:
2255  *     - Setup per-port forwarding context.
2256  *     - launch logical cores with their forwarding configuration.
2257  */
2258 static void
2259 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2260 {
2261 	unsigned int i;
2262 	unsigned int lc_id;
2263 	int diag;
2264 
2265 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2266 		lc_id = fwd_lcores_cpuids[i];
2267 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2268 			fwd_lcores[i]->stopped = 0;
2269 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2270 						     fwd_lcores[i], lc_id);
2271 			if (diag != 0)
2272 				fprintf(stderr,
2273 					"launch lcore %u failed - diag=%d\n",
2274 					lc_id, diag);
2275 		}
2276 	}
2277 }
2278 
2279 /*
2280  * Launch packet forwarding configuration.
2281  */
2282 void
2283 start_packet_forwarding(int with_tx_first)
2284 {
2285 	port_fwd_begin_t port_fwd_begin;
2286 	port_fwd_end_t  port_fwd_end;
2287 	unsigned int i;
2288 
2289 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2290 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2291 
2292 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2293 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2294 
2295 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2296 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2297 		(!nb_rxq || !nb_txq))
2298 		rte_exit(EXIT_FAILURE,
2299 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2300 			cur_fwd_eng->fwd_mode_name);
2301 
2302 	if (all_ports_started() == 0) {
2303 		fprintf(stderr, "Not all ports were started\n");
2304 		return;
2305 	}
2306 	if (test_done == 0) {
2307 		fprintf(stderr, "Packet forwarding already started\n");
2308 		return;
2309 	}
2310 
2311 	fwd_config_setup();
2312 
2313 	pkt_fwd_config_display(&cur_fwd_config);
2314 	if (!pkt_fwd_shared_rxq_check())
2315 		return;
2316 
2317 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2318 	if (port_fwd_begin != NULL) {
2319 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2320 			if (port_fwd_begin(fwd_ports_ids[i])) {
2321 				fprintf(stderr,
2322 					"Packet forwarding is not ready\n");
2323 				return;
2324 			}
2325 		}
2326 	}
2327 
2328 	if (with_tx_first) {
2329 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2330 		if (port_fwd_begin != NULL) {
2331 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2332 				if (port_fwd_begin(fwd_ports_ids[i])) {
2333 					fprintf(stderr,
2334 						"Packet forwarding is not ready\n");
2335 					return;
2336 				}
2337 			}
2338 		}
2339 	}
2340 
2341 	test_done = 0;
2342 
2343 	if(!no_flush_rx)
2344 		flush_fwd_rx_queues();
2345 
2346 	rxtx_config_display();
2347 
2348 	fwd_stats_reset();
2349 	if (with_tx_first) {
2350 		while (with_tx_first--) {
2351 			launch_packet_forwarding(
2352 					run_one_txonly_burst_on_core);
2353 			rte_eal_mp_wait_lcore();
2354 		}
2355 		port_fwd_end = tx_only_engine.port_fwd_end;
2356 		if (port_fwd_end != NULL) {
2357 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2358 				(*port_fwd_end)(fwd_ports_ids[i]);
2359 		}
2360 	}
2361 	launch_packet_forwarding(start_pkt_forward_on_core);
2362 }
2363 
2364 void
2365 stop_packet_forwarding(void)
2366 {
2367 	port_fwd_end_t port_fwd_end;
2368 	lcoreid_t lc_id;
2369 	portid_t pt_id;
2370 	int i;
2371 
2372 	if (test_done) {
2373 		fprintf(stderr, "Packet forwarding not started\n");
2374 		return;
2375 	}
2376 	printf("Telling cores to stop...");
2377 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2378 		fwd_lcores[lc_id]->stopped = 1;
2379 	printf("\nWaiting for lcores to finish...\n");
2380 	rte_eal_mp_wait_lcore();
2381 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2382 	if (port_fwd_end != NULL) {
2383 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2384 			pt_id = fwd_ports_ids[i];
2385 			(*port_fwd_end)(pt_id);
2386 		}
2387 	}
2388 
2389 	fwd_stats_display();
2390 
2391 	printf("\nDone.\n");
2392 	test_done = 1;
2393 }
2394 
2395 void
2396 dev_set_link_up(portid_t pid)
2397 {
2398 	if (rte_eth_dev_set_link_up(pid) < 0)
2399 		fprintf(stderr, "\nSet link up fail.\n");
2400 }
2401 
2402 void
2403 dev_set_link_down(portid_t pid)
2404 {
2405 	if (rte_eth_dev_set_link_down(pid) < 0)
2406 		fprintf(stderr, "\nSet link down fail.\n");
2407 }
2408 
2409 static int
2410 all_ports_started(void)
2411 {
2412 	portid_t pi;
2413 	struct rte_port *port;
2414 
2415 	RTE_ETH_FOREACH_DEV(pi) {
2416 		port = &ports[pi];
2417 		/* Check if there is a port which is not started */
2418 		if ((port->port_status != RTE_PORT_STARTED) &&
2419 			(port->slave_flag == 0))
2420 			return 0;
2421 	}
2422 
2423 	/* No port is not started */
2424 	return 1;
2425 }
2426 
2427 int
2428 port_is_stopped(portid_t port_id)
2429 {
2430 	struct rte_port *port = &ports[port_id];
2431 
2432 	if ((port->port_status != RTE_PORT_STOPPED) &&
2433 	    (port->slave_flag == 0))
2434 		return 0;
2435 	return 1;
2436 }
2437 
2438 int
2439 all_ports_stopped(void)
2440 {
2441 	portid_t pi;
2442 
2443 	RTE_ETH_FOREACH_DEV(pi) {
2444 		if (!port_is_stopped(pi))
2445 			return 0;
2446 	}
2447 
2448 	return 1;
2449 }
2450 
2451 int
2452 port_is_started(portid_t port_id)
2453 {
2454 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2455 		return 0;
2456 
2457 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2458 		return 0;
2459 
2460 	return 1;
2461 }
2462 
2463 /* Configure the Rx and Tx hairpin queues for the selected port. */
2464 static int
2465 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2466 {
2467 	queueid_t qi;
2468 	struct rte_eth_hairpin_conf hairpin_conf = {
2469 		.peer_count = 1,
2470 	};
2471 	int i;
2472 	int diag;
2473 	struct rte_port *port = &ports[pi];
2474 	uint16_t peer_rx_port = pi;
2475 	uint16_t peer_tx_port = pi;
2476 	uint32_t manual = 1;
2477 	uint32_t tx_exp = hairpin_mode & 0x10;
2478 
2479 	if (!(hairpin_mode & 0xf)) {
2480 		peer_rx_port = pi;
2481 		peer_tx_port = pi;
2482 		manual = 0;
2483 	} else if (hairpin_mode & 0x1) {
2484 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2485 						       RTE_ETH_DEV_NO_OWNER);
2486 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2487 			peer_tx_port = rte_eth_find_next_owned_by(0,
2488 						RTE_ETH_DEV_NO_OWNER);
2489 		if (p_pi != RTE_MAX_ETHPORTS) {
2490 			peer_rx_port = p_pi;
2491 		} else {
2492 			uint16_t next_pi;
2493 
2494 			/* Last port will be the peer RX port of the first. */
2495 			RTE_ETH_FOREACH_DEV(next_pi)
2496 				peer_rx_port = next_pi;
2497 		}
2498 		manual = 1;
2499 	} else if (hairpin_mode & 0x2) {
2500 		if (cnt_pi & 0x1) {
2501 			peer_rx_port = p_pi;
2502 		} else {
2503 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2504 						RTE_ETH_DEV_NO_OWNER);
2505 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2506 				peer_rx_port = pi;
2507 		}
2508 		peer_tx_port = peer_rx_port;
2509 		manual = 1;
2510 	}
2511 
2512 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2513 		hairpin_conf.peers[0].port = peer_rx_port;
2514 		hairpin_conf.peers[0].queue = i + nb_rxq;
2515 		hairpin_conf.manual_bind = !!manual;
2516 		hairpin_conf.tx_explicit = !!tx_exp;
2517 		diag = rte_eth_tx_hairpin_queue_setup
2518 			(pi, qi, nb_txd, &hairpin_conf);
2519 		i++;
2520 		if (diag == 0)
2521 			continue;
2522 
2523 		/* Fail to setup rx queue, return */
2524 		if (port->port_status == RTE_PORT_HANDLING)
2525 			port->port_status = RTE_PORT_STOPPED;
2526 		else
2527 			fprintf(stderr,
2528 				"Port %d can not be set back to stopped\n", pi);
2529 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2530 			pi);
2531 		/* try to reconfigure queues next time */
2532 		port->need_reconfig_queues = 1;
2533 		return -1;
2534 	}
2535 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2536 		hairpin_conf.peers[0].port = peer_tx_port;
2537 		hairpin_conf.peers[0].queue = i + nb_txq;
2538 		hairpin_conf.manual_bind = !!manual;
2539 		hairpin_conf.tx_explicit = !!tx_exp;
2540 		diag = rte_eth_rx_hairpin_queue_setup
2541 			(pi, qi, nb_rxd, &hairpin_conf);
2542 		i++;
2543 		if (diag == 0)
2544 			continue;
2545 
2546 		/* Fail to setup rx queue, return */
2547 		if (port->port_status == RTE_PORT_HANDLING)
2548 			port->port_status = RTE_PORT_STOPPED;
2549 		else
2550 			fprintf(stderr,
2551 				"Port %d can not be set back to stopped\n", pi);
2552 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2553 			pi);
2554 		/* try to reconfigure queues next time */
2555 		port->need_reconfig_queues = 1;
2556 		return -1;
2557 	}
2558 	return 0;
2559 }
2560 
2561 /* Configure the Rx with optional split. */
2562 int
2563 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2564 	       uint16_t nb_rx_desc, unsigned int socket_id,
2565 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2566 {
2567 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2568 	unsigned int i, mp_n;
2569 	int ret;
2570 
2571 	if (rx_pkt_nb_segs <= 1 ||
2572 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2573 		rx_conf->rx_seg = NULL;
2574 		rx_conf->rx_nseg = 0;
2575 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2576 					     nb_rx_desc, socket_id,
2577 					     rx_conf, mp);
2578 		return ret;
2579 	}
2580 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2581 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2582 		struct rte_mempool *mpx;
2583 		/*
2584 		 * Use last valid pool for the segments with number
2585 		 * exceeding the pool index.
2586 		 */
2587 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2588 		mpx = mbuf_pool_find(socket_id, mp_n);
2589 		/* Handle zero as mbuf data buffer size. */
2590 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2591 				   rx_pkt_seg_lengths[i] :
2592 				   mbuf_data_size[mp_n];
2593 		rx_seg->offset = i < rx_pkt_nb_offs ?
2594 				   rx_pkt_seg_offsets[i] : 0;
2595 		rx_seg->mp = mpx ? mpx : mp;
2596 	}
2597 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2598 	rx_conf->rx_seg = rx_useg;
2599 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2600 				    socket_id, rx_conf, NULL);
2601 	rx_conf->rx_seg = NULL;
2602 	rx_conf->rx_nseg = 0;
2603 	return ret;
2604 }
2605 
2606 static int
2607 alloc_xstats_display_info(portid_t pi)
2608 {
2609 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2610 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2611 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2612 
2613 	if (xstats_display_num == 0)
2614 		return 0;
2615 
2616 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2617 	if (*ids_supp == NULL)
2618 		goto fail_ids_supp;
2619 
2620 	*prev_values = calloc(xstats_display_num,
2621 			      sizeof(**prev_values));
2622 	if (*prev_values == NULL)
2623 		goto fail_prev_values;
2624 
2625 	*curr_values = calloc(xstats_display_num,
2626 			      sizeof(**curr_values));
2627 	if (*curr_values == NULL)
2628 		goto fail_curr_values;
2629 
2630 	ports[pi].xstats_info.allocated = true;
2631 
2632 	return 0;
2633 
2634 fail_curr_values:
2635 	free(*prev_values);
2636 fail_prev_values:
2637 	free(*ids_supp);
2638 fail_ids_supp:
2639 	return -ENOMEM;
2640 }
2641 
2642 static void
2643 free_xstats_display_info(portid_t pi)
2644 {
2645 	if (!ports[pi].xstats_info.allocated)
2646 		return;
2647 	free(ports[pi].xstats_info.ids_supp);
2648 	free(ports[pi].xstats_info.prev_values);
2649 	free(ports[pi].xstats_info.curr_values);
2650 	ports[pi].xstats_info.allocated = false;
2651 }
2652 
2653 /** Fill helper structures for specified port to show extended statistics. */
2654 static void
2655 fill_xstats_display_info_for_port(portid_t pi)
2656 {
2657 	unsigned int stat, stat_supp;
2658 	const char *xstat_name;
2659 	struct rte_port *port;
2660 	uint64_t *ids_supp;
2661 	int rc;
2662 
2663 	if (xstats_display_num == 0)
2664 		return;
2665 
2666 	if (pi == (portid_t)RTE_PORT_ALL) {
2667 		fill_xstats_display_info();
2668 		return;
2669 	}
2670 
2671 	port = &ports[pi];
2672 	if (port->port_status != RTE_PORT_STARTED)
2673 		return;
2674 
2675 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2676 		rte_exit(EXIT_FAILURE,
2677 			 "Failed to allocate xstats display memory\n");
2678 
2679 	ids_supp = port->xstats_info.ids_supp;
2680 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2681 		xstat_name = xstats_display[stat].name;
2682 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2683 						   ids_supp + stat_supp);
2684 		if (rc != 0) {
2685 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2686 				xstat_name, pi, stat);
2687 			continue;
2688 		}
2689 		stat_supp++;
2690 	}
2691 
2692 	port->xstats_info.ids_supp_sz = stat_supp;
2693 }
2694 
2695 /** Fill helper structures for all ports to show extended statistics. */
2696 static void
2697 fill_xstats_display_info(void)
2698 {
2699 	portid_t pi;
2700 
2701 	if (xstats_display_num == 0)
2702 		return;
2703 
2704 	RTE_ETH_FOREACH_DEV(pi)
2705 		fill_xstats_display_info_for_port(pi);
2706 }
2707 
2708 int
2709 start_port(portid_t pid)
2710 {
2711 	int diag, need_check_link_status = -1;
2712 	portid_t pi;
2713 	portid_t p_pi = RTE_MAX_ETHPORTS;
2714 	portid_t pl[RTE_MAX_ETHPORTS];
2715 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2716 	uint16_t cnt_pi = 0;
2717 	uint16_t cfg_pi = 0;
2718 	int peer_pi;
2719 	queueid_t qi;
2720 	struct rte_port *port;
2721 	struct rte_eth_hairpin_cap cap;
2722 
2723 	if (port_id_is_invalid(pid, ENABLED_WARN))
2724 		return 0;
2725 
2726 	RTE_ETH_FOREACH_DEV(pi) {
2727 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2728 			continue;
2729 
2730 		need_check_link_status = 0;
2731 		port = &ports[pi];
2732 		if (port->port_status == RTE_PORT_STOPPED)
2733 			port->port_status = RTE_PORT_HANDLING;
2734 		else {
2735 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2736 			continue;
2737 		}
2738 
2739 		if (port->need_reconfig > 0) {
2740 			struct rte_eth_conf dev_conf;
2741 			int k;
2742 
2743 			port->need_reconfig = 0;
2744 
2745 			if (flow_isolate_all) {
2746 				int ret = port_flow_isolate(pi, 1);
2747 				if (ret) {
2748 					fprintf(stderr,
2749 						"Failed to apply isolated mode on port %d\n",
2750 						pi);
2751 					return -1;
2752 				}
2753 			}
2754 			configure_rxtx_dump_callbacks(0);
2755 			printf("Configuring Port %d (socket %u)\n", pi,
2756 					port->socket_id);
2757 			if (nb_hairpinq > 0 &&
2758 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2759 				fprintf(stderr,
2760 					"Port %d doesn't support hairpin queues\n",
2761 					pi);
2762 				return -1;
2763 			}
2764 
2765 			/* configure port */
2766 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2767 						     nb_txq + nb_hairpinq,
2768 						     &(port->dev_conf));
2769 			if (diag != 0) {
2770 				if (port->port_status == RTE_PORT_HANDLING)
2771 					port->port_status = RTE_PORT_STOPPED;
2772 				else
2773 					fprintf(stderr,
2774 						"Port %d can not be set back to stopped\n",
2775 						pi);
2776 				fprintf(stderr, "Fail to configure port %d\n",
2777 					pi);
2778 				/* try to reconfigure port next time */
2779 				port->need_reconfig = 1;
2780 				return -1;
2781 			}
2782 			/* get device configuration*/
2783 			if (0 !=
2784 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
2785 				fprintf(stderr,
2786 					"port %d can not get device configuration\n",
2787 					pi);
2788 				return -1;
2789 			}
2790 			/* Apply Rx offloads configuration */
2791 			if (dev_conf.rxmode.offloads !=
2792 			    port->dev_conf.rxmode.offloads) {
2793 				port->dev_conf.rxmode.offloads |=
2794 					dev_conf.rxmode.offloads;
2795 				for (k = 0;
2796 				     k < port->dev_info.max_rx_queues;
2797 				     k++)
2798 					port->rx_conf[k].offloads |=
2799 						dev_conf.rxmode.offloads;
2800 			}
2801 			/* Apply Tx offloads configuration */
2802 			if (dev_conf.txmode.offloads !=
2803 			    port->dev_conf.txmode.offloads) {
2804 				port->dev_conf.txmode.offloads |=
2805 					dev_conf.txmode.offloads;
2806 				for (k = 0;
2807 				     k < port->dev_info.max_tx_queues;
2808 				     k++)
2809 					port->tx_conf[k].offloads |=
2810 						dev_conf.txmode.offloads;
2811 			}
2812 		}
2813 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2814 			port->need_reconfig_queues = 0;
2815 			/* setup tx queues */
2816 			for (qi = 0; qi < nb_txq; qi++) {
2817 				if ((numa_support) &&
2818 					(txring_numa[pi] != NUMA_NO_CONFIG))
2819 					diag = rte_eth_tx_queue_setup(pi, qi,
2820 						port->nb_tx_desc[qi],
2821 						txring_numa[pi],
2822 						&(port->tx_conf[qi]));
2823 				else
2824 					diag = rte_eth_tx_queue_setup(pi, qi,
2825 						port->nb_tx_desc[qi],
2826 						port->socket_id,
2827 						&(port->tx_conf[qi]));
2828 
2829 				if (diag == 0)
2830 					continue;
2831 
2832 				/* Fail to setup tx queue, return */
2833 				if (port->port_status == RTE_PORT_HANDLING)
2834 					port->port_status = RTE_PORT_STOPPED;
2835 				else
2836 					fprintf(stderr,
2837 						"Port %d can not be set back to stopped\n",
2838 						pi);
2839 				fprintf(stderr,
2840 					"Fail to configure port %d tx queues\n",
2841 					pi);
2842 				/* try to reconfigure queues next time */
2843 				port->need_reconfig_queues = 1;
2844 				return -1;
2845 			}
2846 			for (qi = 0; qi < nb_rxq; qi++) {
2847 				/* setup rx queues */
2848 				if ((numa_support) &&
2849 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2850 					struct rte_mempool * mp =
2851 						mbuf_pool_find
2852 							(rxring_numa[pi], 0);
2853 					if (mp == NULL) {
2854 						fprintf(stderr,
2855 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2856 							rxring_numa[pi]);
2857 						return -1;
2858 					}
2859 
2860 					diag = rx_queue_setup(pi, qi,
2861 					     port->nb_rx_desc[qi],
2862 					     rxring_numa[pi],
2863 					     &(port->rx_conf[qi]),
2864 					     mp);
2865 				} else {
2866 					struct rte_mempool *mp =
2867 						mbuf_pool_find
2868 							(port->socket_id, 0);
2869 					if (mp == NULL) {
2870 						fprintf(stderr,
2871 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2872 							port->socket_id);
2873 						return -1;
2874 					}
2875 					diag = rx_queue_setup(pi, qi,
2876 					     port->nb_rx_desc[qi],
2877 					     port->socket_id,
2878 					     &(port->rx_conf[qi]),
2879 					     mp);
2880 				}
2881 				if (diag == 0)
2882 					continue;
2883 
2884 				/* Fail to setup rx queue, return */
2885 				if (port->port_status == RTE_PORT_HANDLING)
2886 					port->port_status = RTE_PORT_STOPPED;
2887 				else
2888 					fprintf(stderr,
2889 						"Port %d can not be set back to stopped\n",
2890 						pi);
2891 				fprintf(stderr,
2892 					"Fail to configure port %d rx queues\n",
2893 					pi);
2894 				/* try to reconfigure queues next time */
2895 				port->need_reconfig_queues = 1;
2896 				return -1;
2897 			}
2898 			/* setup hairpin queues */
2899 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2900 				return -1;
2901 		}
2902 		configure_rxtx_dump_callbacks(verbose_level);
2903 		if (clear_ptypes) {
2904 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2905 					NULL, 0);
2906 			if (diag < 0)
2907 				fprintf(stderr,
2908 					"Port %d: Failed to disable Ptype parsing\n",
2909 					pi);
2910 		}
2911 
2912 		p_pi = pi;
2913 		cnt_pi++;
2914 
2915 		/* start port */
2916 		diag = eth_dev_start_mp(pi);
2917 		if (diag < 0) {
2918 			fprintf(stderr, "Fail to start port %d: %s\n",
2919 				pi, rte_strerror(-diag));
2920 
2921 			/* Fail to setup rx queue, return */
2922 			if (port->port_status == RTE_PORT_HANDLING)
2923 				port->port_status = RTE_PORT_STOPPED;
2924 			else
2925 				fprintf(stderr,
2926 					"Port %d can not be set back to stopped\n",
2927 					pi);
2928 			continue;
2929 		}
2930 
2931 		if (port->port_status == RTE_PORT_HANDLING)
2932 			port->port_status = RTE_PORT_STARTED;
2933 		else
2934 			fprintf(stderr, "Port %d can not be set into started\n",
2935 				pi);
2936 
2937 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2938 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2939 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2940 
2941 		/* at least one port started, need checking link status */
2942 		need_check_link_status = 1;
2943 
2944 		pl[cfg_pi++] = pi;
2945 	}
2946 
2947 	if (need_check_link_status == 1 && !no_link_check)
2948 		check_all_ports_link_status(RTE_PORT_ALL);
2949 	else if (need_check_link_status == 0)
2950 		fprintf(stderr, "Please stop the ports first\n");
2951 
2952 	if (hairpin_mode & 0xf) {
2953 		uint16_t i;
2954 		int j;
2955 
2956 		/* bind all started hairpin ports */
2957 		for (i = 0; i < cfg_pi; i++) {
2958 			pi = pl[i];
2959 			/* bind current Tx to all peer Rx */
2960 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2961 							RTE_MAX_ETHPORTS, 1);
2962 			if (peer_pi < 0)
2963 				return peer_pi;
2964 			for (j = 0; j < peer_pi; j++) {
2965 				if (!port_is_started(peer_pl[j]))
2966 					continue;
2967 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2968 				if (diag < 0) {
2969 					fprintf(stderr,
2970 						"Error during binding hairpin Tx port %u to %u: %s\n",
2971 						pi, peer_pl[j],
2972 						rte_strerror(-diag));
2973 					return -1;
2974 				}
2975 			}
2976 			/* bind all peer Tx to current Rx */
2977 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2978 							RTE_MAX_ETHPORTS, 0);
2979 			if (peer_pi < 0)
2980 				return peer_pi;
2981 			for (j = 0; j < peer_pi; j++) {
2982 				if (!port_is_started(peer_pl[j]))
2983 					continue;
2984 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2985 				if (diag < 0) {
2986 					fprintf(stderr,
2987 						"Error during binding hairpin Tx port %u to %u: %s\n",
2988 						peer_pl[j], pi,
2989 						rte_strerror(-diag));
2990 					return -1;
2991 				}
2992 			}
2993 		}
2994 	}
2995 
2996 	fill_xstats_display_info_for_port(pid);
2997 
2998 	printf("Done\n");
2999 	return 0;
3000 }
3001 
3002 void
3003 stop_port(portid_t pid)
3004 {
3005 	portid_t pi;
3006 	struct rte_port *port;
3007 	int need_check_link_status = 0;
3008 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3009 	int peer_pi;
3010 
3011 	if (port_id_is_invalid(pid, ENABLED_WARN))
3012 		return;
3013 
3014 	printf("Stopping ports...\n");
3015 
3016 	RTE_ETH_FOREACH_DEV(pi) {
3017 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3018 			continue;
3019 
3020 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3021 			fprintf(stderr,
3022 				"Please remove port %d from forwarding configuration.\n",
3023 				pi);
3024 			continue;
3025 		}
3026 
3027 		if (port_is_bonding_slave(pi)) {
3028 			fprintf(stderr,
3029 				"Please remove port %d from bonded device.\n",
3030 				pi);
3031 			continue;
3032 		}
3033 
3034 		port = &ports[pi];
3035 		if (port->port_status == RTE_PORT_STARTED)
3036 			port->port_status = RTE_PORT_HANDLING;
3037 		else
3038 			continue;
3039 
3040 		if (hairpin_mode & 0xf) {
3041 			int j;
3042 
3043 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3044 			/* unbind all peer Tx from current Rx */
3045 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3046 							RTE_MAX_ETHPORTS, 0);
3047 			if (peer_pi < 0)
3048 				continue;
3049 			for (j = 0; j < peer_pi; j++) {
3050 				if (!port_is_started(peer_pl[j]))
3051 					continue;
3052 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3053 			}
3054 		}
3055 
3056 		if (port->flow_list)
3057 			port_flow_flush(pi);
3058 
3059 		if (eth_dev_stop_mp(pi) != 0)
3060 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3061 				pi);
3062 
3063 		if (port->port_status == RTE_PORT_HANDLING)
3064 			port->port_status = RTE_PORT_STOPPED;
3065 		else
3066 			fprintf(stderr, "Port %d can not be set into stopped\n",
3067 				pi);
3068 		need_check_link_status = 1;
3069 	}
3070 	if (need_check_link_status && !no_link_check)
3071 		check_all_ports_link_status(RTE_PORT_ALL);
3072 
3073 	printf("Done\n");
3074 }
3075 
3076 static void
3077 remove_invalid_ports_in(portid_t *array, portid_t *total)
3078 {
3079 	portid_t i;
3080 	portid_t new_total = 0;
3081 
3082 	for (i = 0; i < *total; i++)
3083 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3084 			array[new_total] = array[i];
3085 			new_total++;
3086 		}
3087 	*total = new_total;
3088 }
3089 
3090 static void
3091 remove_invalid_ports(void)
3092 {
3093 	remove_invalid_ports_in(ports_ids, &nb_ports);
3094 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3095 	nb_cfg_ports = nb_fwd_ports;
3096 }
3097 
3098 void
3099 close_port(portid_t pid)
3100 {
3101 	portid_t pi;
3102 	struct rte_port *port;
3103 
3104 	if (port_id_is_invalid(pid, ENABLED_WARN))
3105 		return;
3106 
3107 	printf("Closing ports...\n");
3108 
3109 	RTE_ETH_FOREACH_DEV(pi) {
3110 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3111 			continue;
3112 
3113 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3114 			fprintf(stderr,
3115 				"Please remove port %d from forwarding configuration.\n",
3116 				pi);
3117 			continue;
3118 		}
3119 
3120 		if (port_is_bonding_slave(pi)) {
3121 			fprintf(stderr,
3122 				"Please remove port %d from bonded device.\n",
3123 				pi);
3124 			continue;
3125 		}
3126 
3127 		port = &ports[pi];
3128 		if (port->port_status == RTE_PORT_CLOSED) {
3129 			fprintf(stderr, "Port %d is already closed\n", pi);
3130 			continue;
3131 		}
3132 
3133 		if (is_proc_primary()) {
3134 			port_flow_flush(pi);
3135 			port_flex_item_flush(pi);
3136 			rte_eth_dev_close(pi);
3137 		}
3138 
3139 		free_xstats_display_info(pi);
3140 	}
3141 
3142 	remove_invalid_ports();
3143 	printf("Done\n");
3144 }
3145 
3146 void
3147 reset_port(portid_t pid)
3148 {
3149 	int diag;
3150 	portid_t pi;
3151 	struct rte_port *port;
3152 
3153 	if (port_id_is_invalid(pid, ENABLED_WARN))
3154 		return;
3155 
3156 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3157 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3158 		fprintf(stderr,
3159 			"Can not reset port(s), please stop port(s) first.\n");
3160 		return;
3161 	}
3162 
3163 	printf("Resetting ports...\n");
3164 
3165 	RTE_ETH_FOREACH_DEV(pi) {
3166 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3167 			continue;
3168 
3169 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3170 			fprintf(stderr,
3171 				"Please remove port %d from forwarding configuration.\n",
3172 				pi);
3173 			continue;
3174 		}
3175 
3176 		if (port_is_bonding_slave(pi)) {
3177 			fprintf(stderr,
3178 				"Please remove port %d from bonded device.\n",
3179 				pi);
3180 			continue;
3181 		}
3182 
3183 		diag = rte_eth_dev_reset(pi);
3184 		if (diag == 0) {
3185 			port = &ports[pi];
3186 			port->need_reconfig = 1;
3187 			port->need_reconfig_queues = 1;
3188 		} else {
3189 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3190 				pi, diag);
3191 		}
3192 	}
3193 
3194 	printf("Done\n");
3195 }
3196 
3197 void
3198 attach_port(char *identifier)
3199 {
3200 	portid_t pi;
3201 	struct rte_dev_iterator iterator;
3202 
3203 	printf("Attaching a new port...\n");
3204 
3205 	if (identifier == NULL) {
3206 		fprintf(stderr, "Invalid parameters are specified\n");
3207 		return;
3208 	}
3209 
3210 	if (rte_dev_probe(identifier) < 0) {
3211 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3212 		return;
3213 	}
3214 
3215 	/* first attach mode: event */
3216 	if (setup_on_probe_event) {
3217 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3218 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3219 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3220 					ports[pi].need_setup != 0)
3221 				setup_attached_port(pi);
3222 		return;
3223 	}
3224 
3225 	/* second attach mode: iterator */
3226 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3227 		/* setup ports matching the devargs used for probing */
3228 		if (port_is_forwarding(pi))
3229 			continue; /* port was already attached before */
3230 		setup_attached_port(pi);
3231 	}
3232 }
3233 
3234 static void
3235 setup_attached_port(portid_t pi)
3236 {
3237 	unsigned int socket_id;
3238 	int ret;
3239 
3240 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3241 	/* if socket_id is invalid, set to the first available socket. */
3242 	if (check_socket_id(socket_id) < 0)
3243 		socket_id = socket_ids[0];
3244 	reconfig(pi, socket_id);
3245 	ret = rte_eth_promiscuous_enable(pi);
3246 	if (ret != 0)
3247 		fprintf(stderr,
3248 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3249 			pi, rte_strerror(-ret));
3250 
3251 	ports_ids[nb_ports++] = pi;
3252 	fwd_ports_ids[nb_fwd_ports++] = pi;
3253 	nb_cfg_ports = nb_fwd_ports;
3254 	ports[pi].need_setup = 0;
3255 	ports[pi].port_status = RTE_PORT_STOPPED;
3256 
3257 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3258 	printf("Done\n");
3259 }
3260 
3261 static void
3262 detach_device(struct rte_device *dev)
3263 {
3264 	portid_t sibling;
3265 
3266 	if (dev == NULL) {
3267 		fprintf(stderr, "Device already removed\n");
3268 		return;
3269 	}
3270 
3271 	printf("Removing a device...\n");
3272 
3273 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3274 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3275 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3276 				fprintf(stderr, "Port %u not stopped\n",
3277 					sibling);
3278 				return;
3279 			}
3280 			port_flow_flush(sibling);
3281 		}
3282 	}
3283 
3284 	if (rte_dev_remove(dev) < 0) {
3285 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3286 		return;
3287 	}
3288 	remove_invalid_ports();
3289 
3290 	printf("Device is detached\n");
3291 	printf("Now total ports is %d\n", nb_ports);
3292 	printf("Done\n");
3293 	return;
3294 }
3295 
3296 void
3297 detach_port_device(portid_t port_id)
3298 {
3299 	int ret;
3300 	struct rte_eth_dev_info dev_info;
3301 
3302 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3303 		return;
3304 
3305 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3306 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3307 			fprintf(stderr, "Port not stopped\n");
3308 			return;
3309 		}
3310 		fprintf(stderr, "Port was not closed\n");
3311 	}
3312 
3313 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3314 	if (ret != 0) {
3315 		TESTPMD_LOG(ERR,
3316 			"Failed to get device info for port %d, not detaching\n",
3317 			port_id);
3318 		return;
3319 	}
3320 	detach_device(dev_info.device);
3321 }
3322 
3323 void
3324 detach_devargs(char *identifier)
3325 {
3326 	struct rte_dev_iterator iterator;
3327 	struct rte_devargs da;
3328 	portid_t port_id;
3329 
3330 	printf("Removing a device...\n");
3331 
3332 	memset(&da, 0, sizeof(da));
3333 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3334 		fprintf(stderr, "cannot parse identifier\n");
3335 		return;
3336 	}
3337 
3338 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3339 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3340 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3341 				fprintf(stderr, "Port %u not stopped\n",
3342 					port_id);
3343 				rte_eth_iterator_cleanup(&iterator);
3344 				rte_devargs_reset(&da);
3345 				return;
3346 			}
3347 			port_flow_flush(port_id);
3348 		}
3349 	}
3350 
3351 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3352 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3353 			    da.name, da.bus->name);
3354 		rte_devargs_reset(&da);
3355 		return;
3356 	}
3357 
3358 	remove_invalid_ports();
3359 
3360 	printf("Device %s is detached\n", identifier);
3361 	printf("Now total ports is %d\n", nb_ports);
3362 	printf("Done\n");
3363 	rte_devargs_reset(&da);
3364 }
3365 
3366 void
3367 pmd_test_exit(void)
3368 {
3369 	portid_t pt_id;
3370 	unsigned int i;
3371 	int ret;
3372 
3373 	if (test_done == 0)
3374 		stop_packet_forwarding();
3375 
3376 #ifndef RTE_EXEC_ENV_WINDOWS
3377 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3378 		if (mempools[i]) {
3379 			if (mp_alloc_type == MP_ALLOC_ANON)
3380 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3381 						     NULL);
3382 		}
3383 	}
3384 #endif
3385 	if (ports != NULL) {
3386 		no_link_check = 1;
3387 		RTE_ETH_FOREACH_DEV(pt_id) {
3388 			printf("\nStopping port %d...\n", pt_id);
3389 			fflush(stdout);
3390 			stop_port(pt_id);
3391 		}
3392 		RTE_ETH_FOREACH_DEV(pt_id) {
3393 			printf("\nShutting down port %d...\n", pt_id);
3394 			fflush(stdout);
3395 			close_port(pt_id);
3396 		}
3397 	}
3398 
3399 	if (hot_plug) {
3400 		ret = rte_dev_event_monitor_stop();
3401 		if (ret) {
3402 			RTE_LOG(ERR, EAL,
3403 				"fail to stop device event monitor.");
3404 			return;
3405 		}
3406 
3407 		ret = rte_dev_event_callback_unregister(NULL,
3408 			dev_event_callback, NULL);
3409 		if (ret < 0) {
3410 			RTE_LOG(ERR, EAL,
3411 				"fail to unregister device event callback.\n");
3412 			return;
3413 		}
3414 
3415 		ret = rte_dev_hotplug_handle_disable();
3416 		if (ret) {
3417 			RTE_LOG(ERR, EAL,
3418 				"fail to disable hotplug handling.\n");
3419 			return;
3420 		}
3421 	}
3422 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3423 		if (mempools[i])
3424 			mempool_free_mp(mempools[i]);
3425 	}
3426 	free(xstats_display);
3427 
3428 	printf("\nBye...\n");
3429 }
3430 
3431 typedef void (*cmd_func_t)(void);
3432 struct pmd_test_command {
3433 	const char *cmd_name;
3434 	cmd_func_t cmd_func;
3435 };
3436 
3437 /* Check the link status of all ports in up to 9s, and print them finally */
3438 static void
3439 check_all_ports_link_status(uint32_t port_mask)
3440 {
3441 #define CHECK_INTERVAL 100 /* 100ms */
3442 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3443 	portid_t portid;
3444 	uint8_t count, all_ports_up, print_flag = 0;
3445 	struct rte_eth_link link;
3446 	int ret;
3447 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3448 
3449 	printf("Checking link statuses...\n");
3450 	fflush(stdout);
3451 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3452 		all_ports_up = 1;
3453 		RTE_ETH_FOREACH_DEV(portid) {
3454 			if ((port_mask & (1 << portid)) == 0)
3455 				continue;
3456 			memset(&link, 0, sizeof(link));
3457 			ret = rte_eth_link_get_nowait(portid, &link);
3458 			if (ret < 0) {
3459 				all_ports_up = 0;
3460 				if (print_flag == 1)
3461 					fprintf(stderr,
3462 						"Port %u link get failed: %s\n",
3463 						portid, rte_strerror(-ret));
3464 				continue;
3465 			}
3466 			/* print link status if flag set */
3467 			if (print_flag == 1) {
3468 				rte_eth_link_to_str(link_status,
3469 					sizeof(link_status), &link);
3470 				printf("Port %d %s\n", portid, link_status);
3471 				continue;
3472 			}
3473 			/* clear all_ports_up flag if any link down */
3474 			if (link.link_status == RTE_ETH_LINK_DOWN) {
3475 				all_ports_up = 0;
3476 				break;
3477 			}
3478 		}
3479 		/* after finally printing all link status, get out */
3480 		if (print_flag == 1)
3481 			break;
3482 
3483 		if (all_ports_up == 0) {
3484 			fflush(stdout);
3485 			rte_delay_ms(CHECK_INTERVAL);
3486 		}
3487 
3488 		/* set the print_flag if all ports up or timeout */
3489 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3490 			print_flag = 1;
3491 		}
3492 
3493 		if (lsc_interrupt)
3494 			break;
3495 	}
3496 }
3497 
3498 static void
3499 rmv_port_callback(void *arg)
3500 {
3501 	int need_to_start = 0;
3502 	int org_no_link_check = no_link_check;
3503 	portid_t port_id = (intptr_t)arg;
3504 	struct rte_eth_dev_info dev_info;
3505 	int ret;
3506 
3507 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3508 
3509 	if (!test_done && port_is_forwarding(port_id)) {
3510 		need_to_start = 1;
3511 		stop_packet_forwarding();
3512 	}
3513 	no_link_check = 1;
3514 	stop_port(port_id);
3515 	no_link_check = org_no_link_check;
3516 
3517 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3518 	if (ret != 0)
3519 		TESTPMD_LOG(ERR,
3520 			"Failed to get device info for port %d, not detaching\n",
3521 			port_id);
3522 	else {
3523 		struct rte_device *device = dev_info.device;
3524 		close_port(port_id);
3525 		detach_device(device); /* might be already removed or have more ports */
3526 	}
3527 	if (need_to_start)
3528 		start_packet_forwarding(0);
3529 }
3530 
3531 /* This function is used by the interrupt thread */
3532 static int
3533 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3534 		  void *ret_param)
3535 {
3536 	RTE_SET_USED(param);
3537 	RTE_SET_USED(ret_param);
3538 
3539 	if (type >= RTE_ETH_EVENT_MAX) {
3540 		fprintf(stderr,
3541 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3542 			port_id, __func__, type);
3543 		fflush(stderr);
3544 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3545 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3546 			eth_event_desc[type]);
3547 		fflush(stdout);
3548 	}
3549 
3550 	switch (type) {
3551 	case RTE_ETH_EVENT_NEW:
3552 		ports[port_id].need_setup = 1;
3553 		ports[port_id].port_status = RTE_PORT_HANDLING;
3554 		break;
3555 	case RTE_ETH_EVENT_INTR_RMV:
3556 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3557 			break;
3558 		if (rte_eal_alarm_set(100000,
3559 				rmv_port_callback, (void *)(intptr_t)port_id))
3560 			fprintf(stderr,
3561 				"Could not set up deferred device removal\n");
3562 		break;
3563 	case RTE_ETH_EVENT_DESTROY:
3564 		ports[port_id].port_status = RTE_PORT_CLOSED;
3565 		printf("Port %u is closed\n", port_id);
3566 		break;
3567 	default:
3568 		break;
3569 	}
3570 	return 0;
3571 }
3572 
3573 static int
3574 register_eth_event_callback(void)
3575 {
3576 	int ret;
3577 	enum rte_eth_event_type event;
3578 
3579 	for (event = RTE_ETH_EVENT_UNKNOWN;
3580 			event < RTE_ETH_EVENT_MAX; event++) {
3581 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3582 				event,
3583 				eth_event_callback,
3584 				NULL);
3585 		if (ret != 0) {
3586 			TESTPMD_LOG(ERR, "Failed to register callback for "
3587 					"%s event\n", eth_event_desc[event]);
3588 			return -1;
3589 		}
3590 	}
3591 
3592 	return 0;
3593 }
3594 
3595 /* This function is used by the interrupt thread */
3596 static void
3597 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3598 			     __rte_unused void *arg)
3599 {
3600 	uint16_t port_id;
3601 	int ret;
3602 
3603 	if (type >= RTE_DEV_EVENT_MAX) {
3604 		fprintf(stderr, "%s called upon invalid event %d\n",
3605 			__func__, type);
3606 		fflush(stderr);
3607 	}
3608 
3609 	switch (type) {
3610 	case RTE_DEV_EVENT_REMOVE:
3611 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3612 			device_name);
3613 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3614 		if (ret) {
3615 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3616 				device_name);
3617 			return;
3618 		}
3619 		/*
3620 		 * Because the user's callback is invoked in eal interrupt
3621 		 * callback, the interrupt callback need to be finished before
3622 		 * it can be unregistered when detaching device. So finish
3623 		 * callback soon and use a deferred removal to detach device
3624 		 * is need. It is a workaround, once the device detaching be
3625 		 * moved into the eal in the future, the deferred removal could
3626 		 * be deleted.
3627 		 */
3628 		if (rte_eal_alarm_set(100000,
3629 				rmv_port_callback, (void *)(intptr_t)port_id))
3630 			RTE_LOG(ERR, EAL,
3631 				"Could not set up deferred device removal\n");
3632 		break;
3633 	case RTE_DEV_EVENT_ADD:
3634 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3635 			device_name);
3636 		/* TODO: After finish kernel driver binding,
3637 		 * begin to attach port.
3638 		 */
3639 		break;
3640 	default:
3641 		break;
3642 	}
3643 }
3644 
3645 static void
3646 rxtx_port_config(portid_t pid)
3647 {
3648 	uint16_t qid;
3649 	uint64_t offloads;
3650 	struct rte_port *port = &ports[pid];
3651 
3652 	for (qid = 0; qid < nb_rxq; qid++) {
3653 		offloads = port->rx_conf[qid].offloads;
3654 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3655 
3656 		if (rxq_share > 0 &&
3657 		    (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3658 			/* Non-zero share group to enable RxQ share. */
3659 			port->rx_conf[qid].share_group = pid / rxq_share + 1;
3660 			port->rx_conf[qid].share_qid = qid; /* Equal mapping. */
3661 		}
3662 
3663 		if (offloads != 0)
3664 			port->rx_conf[qid].offloads = offloads;
3665 
3666 		/* Check if any Rx parameters have been passed */
3667 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3668 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3669 
3670 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3671 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3672 
3673 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3674 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3675 
3676 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3677 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3678 
3679 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3680 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3681 
3682 		port->nb_rx_desc[qid] = nb_rxd;
3683 	}
3684 
3685 	for (qid = 0; qid < nb_txq; qid++) {
3686 		offloads = port->tx_conf[qid].offloads;
3687 		port->tx_conf[qid] = port->dev_info.default_txconf;
3688 		if (offloads != 0)
3689 			port->tx_conf[qid].offloads = offloads;
3690 
3691 		/* Check if any Tx parameters have been passed */
3692 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3693 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3694 
3695 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3696 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3697 
3698 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3699 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3700 
3701 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3702 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3703 
3704 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3705 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3706 
3707 		port->nb_tx_desc[qid] = nb_txd;
3708 	}
3709 }
3710 
3711 /*
3712  * Helper function to set MTU from frame size
3713  *
3714  * port->dev_info should be set before calling this function.
3715  *
3716  * return 0 on success, negative on error
3717  */
3718 int
3719 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3720 {
3721 	struct rte_port *port = &ports[portid];
3722 	uint32_t eth_overhead;
3723 	uint16_t mtu, new_mtu;
3724 
3725 	eth_overhead = get_eth_overhead(&port->dev_info);
3726 
3727 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3728 		printf("Failed to get MTU for port %u\n", portid);
3729 		return -1;
3730 	}
3731 
3732 	new_mtu = max_rx_pktlen - eth_overhead;
3733 
3734 	if (mtu == new_mtu)
3735 		return 0;
3736 
3737 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3738 		fprintf(stderr,
3739 			"Failed to set MTU to %u for port %u\n",
3740 			new_mtu, portid);
3741 		return -1;
3742 	}
3743 
3744 	port->dev_conf.rxmode.mtu = new_mtu;
3745 
3746 	return 0;
3747 }
3748 
3749 void
3750 init_port_config(void)
3751 {
3752 	portid_t pid;
3753 	struct rte_port *port;
3754 	int ret, i;
3755 
3756 	RTE_ETH_FOREACH_DEV(pid) {
3757 		port = &ports[pid];
3758 		port->dev_conf.fdir_conf = fdir_conf;
3759 
3760 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3761 		if (ret != 0)
3762 			return;
3763 
3764 		if (nb_rxq > 1) {
3765 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3766 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3767 				rss_hf & port->dev_info.flow_type_rss_offloads;
3768 		} else {
3769 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3770 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3771 		}
3772 
3773 		if (port->dcb_flag == 0) {
3774 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3775 				port->dev_conf.rxmode.mq_mode =
3776 					(enum rte_eth_rx_mq_mode)
3777 						(rx_mq_mode & RTE_ETH_MQ_RX_RSS);
3778 			} else {
3779 				port->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
3780 				port->dev_conf.rxmode.offloads &=
3781 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3782 
3783 				for (i = 0;
3784 				     i < port->dev_info.nb_rx_queues;
3785 				     i++)
3786 					port->rx_conf[i].offloads &=
3787 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3788 			}
3789 		}
3790 
3791 		rxtx_port_config(pid);
3792 
3793 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3794 		if (ret != 0)
3795 			return;
3796 
3797 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3798 		rte_pmd_ixgbe_bypass_init(pid);
3799 #endif
3800 
3801 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3802 			port->dev_conf.intr_conf.lsc = 1;
3803 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3804 			port->dev_conf.intr_conf.rmv = 1;
3805 	}
3806 }
3807 
3808 void set_port_slave_flag(portid_t slave_pid)
3809 {
3810 	struct rte_port *port;
3811 
3812 	port = &ports[slave_pid];
3813 	port->slave_flag = 1;
3814 }
3815 
3816 void clear_port_slave_flag(portid_t slave_pid)
3817 {
3818 	struct rte_port *port;
3819 
3820 	port = &ports[slave_pid];
3821 	port->slave_flag = 0;
3822 }
3823 
3824 uint8_t port_is_bonding_slave(portid_t slave_pid)
3825 {
3826 	struct rte_port *port;
3827 	struct rte_eth_dev_info dev_info;
3828 	int ret;
3829 
3830 	port = &ports[slave_pid];
3831 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3832 	if (ret != 0) {
3833 		TESTPMD_LOG(ERR,
3834 			"Failed to get device info for port id %d,"
3835 			"cannot determine if the port is a bonded slave",
3836 			slave_pid);
3837 		return 0;
3838 	}
3839 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3840 		return 1;
3841 	return 0;
3842 }
3843 
3844 const uint16_t vlan_tags[] = {
3845 		0,  1,  2,  3,  4,  5,  6,  7,
3846 		8,  9, 10, 11,  12, 13, 14, 15,
3847 		16, 17, 18, 19, 20, 21, 22, 23,
3848 		24, 25, 26, 27, 28, 29, 30, 31
3849 };
3850 
3851 static  int
3852 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3853 		 enum dcb_mode_enable dcb_mode,
3854 		 enum rte_eth_nb_tcs num_tcs,
3855 		 uint8_t pfc_en)
3856 {
3857 	uint8_t i;
3858 	int32_t rc;
3859 	struct rte_eth_rss_conf rss_conf;
3860 
3861 	/*
3862 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3863 	 * given above, and the number of traffic classes available for use.
3864 	 */
3865 	if (dcb_mode == DCB_VT_ENABLED) {
3866 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3867 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3868 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3869 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3870 
3871 		/* VMDQ+DCB RX and TX configurations */
3872 		vmdq_rx_conf->enable_default_pool = 0;
3873 		vmdq_rx_conf->default_pool = 0;
3874 		vmdq_rx_conf->nb_queue_pools =
3875 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3876 		vmdq_tx_conf->nb_queue_pools =
3877 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3878 
3879 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3880 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3881 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3882 			vmdq_rx_conf->pool_map[i].pools =
3883 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3884 		}
3885 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3886 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3887 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3888 		}
3889 
3890 		/* set DCB mode of RX and TX of multiple queues */
3891 		eth_conf->rxmode.mq_mode =
3892 				(enum rte_eth_rx_mq_mode)
3893 					(rx_mq_mode & RTE_ETH_MQ_RX_VMDQ_DCB);
3894 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_VMDQ_DCB;
3895 	} else {
3896 		struct rte_eth_dcb_rx_conf *rx_conf =
3897 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3898 		struct rte_eth_dcb_tx_conf *tx_conf =
3899 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3900 
3901 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3902 
3903 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3904 		if (rc != 0)
3905 			return rc;
3906 
3907 		rx_conf->nb_tcs = num_tcs;
3908 		tx_conf->nb_tcs = num_tcs;
3909 
3910 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3911 			rx_conf->dcb_tc[i] = i % num_tcs;
3912 			tx_conf->dcb_tc[i] = i % num_tcs;
3913 		}
3914 
3915 		eth_conf->rxmode.mq_mode =
3916 				(enum rte_eth_rx_mq_mode)
3917 					(rx_mq_mode & RTE_ETH_MQ_RX_DCB_RSS);
3918 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3919 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_DCB;
3920 	}
3921 
3922 	if (pfc_en)
3923 		eth_conf->dcb_capability_en =
3924 				RTE_ETH_DCB_PG_SUPPORT | RTE_ETH_DCB_PFC_SUPPORT;
3925 	else
3926 		eth_conf->dcb_capability_en = RTE_ETH_DCB_PG_SUPPORT;
3927 
3928 	return 0;
3929 }
3930 
3931 int
3932 init_port_dcb_config(portid_t pid,
3933 		     enum dcb_mode_enable dcb_mode,
3934 		     enum rte_eth_nb_tcs num_tcs,
3935 		     uint8_t pfc_en)
3936 {
3937 	struct rte_eth_conf port_conf;
3938 	struct rte_port *rte_port;
3939 	int retval;
3940 	uint16_t i;
3941 
3942 	if (num_procs > 1) {
3943 		printf("The multi-process feature doesn't support dcb.\n");
3944 		return -ENOTSUP;
3945 	}
3946 	rte_port = &ports[pid];
3947 
3948 	/* retain the original device configuration. */
3949 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3950 
3951 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3952 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3953 	if (retval < 0)
3954 		return retval;
3955 	port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3956 	/* remove RSS HASH offload for DCB in vt mode */
3957 	if (port_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_VMDQ_DCB) {
3958 		port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3959 		for (i = 0; i < nb_rxq; i++)
3960 			rte_port->rx_conf[i].offloads &=
3961 				~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3962 	}
3963 
3964 	/* re-configure the device . */
3965 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3966 	if (retval < 0)
3967 		return retval;
3968 
3969 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3970 	if (retval != 0)
3971 		return retval;
3972 
3973 	/* If dev_info.vmdq_pool_base is greater than 0,
3974 	 * the queue id of vmdq pools is started after pf queues.
3975 	 */
3976 	if (dcb_mode == DCB_VT_ENABLED &&
3977 	    rte_port->dev_info.vmdq_pool_base > 0) {
3978 		fprintf(stderr,
3979 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3980 			pid);
3981 		return -1;
3982 	}
3983 
3984 	/* Assume the ports in testpmd have the same dcb capability
3985 	 * and has the same number of rxq and txq in dcb mode
3986 	 */
3987 	if (dcb_mode == DCB_VT_ENABLED) {
3988 		if (rte_port->dev_info.max_vfs > 0) {
3989 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3990 			nb_txq = rte_port->dev_info.nb_tx_queues;
3991 		} else {
3992 			nb_rxq = rte_port->dev_info.max_rx_queues;
3993 			nb_txq = rte_port->dev_info.max_tx_queues;
3994 		}
3995 	} else {
3996 		/*if vt is disabled, use all pf queues */
3997 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3998 			nb_rxq = rte_port->dev_info.max_rx_queues;
3999 			nb_txq = rte_port->dev_info.max_tx_queues;
4000 		} else {
4001 			nb_rxq = (queueid_t)num_tcs;
4002 			nb_txq = (queueid_t)num_tcs;
4003 
4004 		}
4005 	}
4006 	rx_free_thresh = 64;
4007 
4008 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
4009 
4010 	rxtx_port_config(pid);
4011 	/* VLAN filter */
4012 	rte_port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4013 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
4014 		rx_vft_set(pid, vlan_tags[i], 1);
4015 
4016 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4017 	if (retval != 0)
4018 		return retval;
4019 
4020 	rte_port->dcb_flag = 1;
4021 
4022 	/* Enter DCB configuration status */
4023 	dcb_config = 1;
4024 
4025 	return 0;
4026 }
4027 
4028 static void
4029 init_port(void)
4030 {
4031 	int i;
4032 
4033 	/* Configuration of Ethernet ports. */
4034 	ports = rte_zmalloc("testpmd: ports",
4035 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4036 			    RTE_CACHE_LINE_SIZE);
4037 	if (ports == NULL) {
4038 		rte_exit(EXIT_FAILURE,
4039 				"rte_zmalloc(%d struct rte_port) failed\n",
4040 				RTE_MAX_ETHPORTS);
4041 	}
4042 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4043 		ports[i].xstats_info.allocated = false;
4044 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4045 		LIST_INIT(&ports[i].flow_tunnel_list);
4046 	/* Initialize ports NUMA structures */
4047 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4048 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4049 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4050 }
4051 
4052 static void
4053 force_quit(void)
4054 {
4055 	pmd_test_exit();
4056 	prompt_exit();
4057 }
4058 
4059 static void
4060 print_stats(void)
4061 {
4062 	uint8_t i;
4063 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4064 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4065 
4066 	/* Clear screen and move to top left */
4067 	printf("%s%s", clr, top_left);
4068 
4069 	printf("\nPort statistics ====================================");
4070 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4071 		nic_stats_display(fwd_ports_ids[i]);
4072 
4073 	fflush(stdout);
4074 }
4075 
4076 static void
4077 signal_handler(int signum)
4078 {
4079 	if (signum == SIGINT || signum == SIGTERM) {
4080 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4081 			signum);
4082 #ifdef RTE_LIB_PDUMP
4083 		/* uninitialize packet capture framework */
4084 		rte_pdump_uninit();
4085 #endif
4086 #ifdef RTE_LIB_LATENCYSTATS
4087 		if (latencystats_enabled != 0)
4088 			rte_latencystats_uninit();
4089 #endif
4090 		force_quit();
4091 		/* Set flag to indicate the force termination. */
4092 		f_quit = 1;
4093 		/* exit with the expected status */
4094 #ifndef RTE_EXEC_ENV_WINDOWS
4095 		signal(signum, SIG_DFL);
4096 		kill(getpid(), signum);
4097 #endif
4098 	}
4099 }
4100 
4101 int
4102 main(int argc, char** argv)
4103 {
4104 	int diag;
4105 	portid_t port_id;
4106 	uint16_t count;
4107 	int ret;
4108 
4109 	signal(SIGINT, signal_handler);
4110 	signal(SIGTERM, signal_handler);
4111 
4112 	testpmd_logtype = rte_log_register("testpmd");
4113 	if (testpmd_logtype < 0)
4114 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4115 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4116 
4117 	diag = rte_eal_init(argc, argv);
4118 	if (diag < 0)
4119 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4120 			 rte_strerror(rte_errno));
4121 
4122 	ret = register_eth_event_callback();
4123 	if (ret != 0)
4124 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4125 
4126 #ifdef RTE_LIB_PDUMP
4127 	/* initialize packet capture framework */
4128 	rte_pdump_init();
4129 #endif
4130 
4131 	count = 0;
4132 	RTE_ETH_FOREACH_DEV(port_id) {
4133 		ports_ids[count] = port_id;
4134 		count++;
4135 	}
4136 	nb_ports = (portid_t) count;
4137 	if (nb_ports == 0)
4138 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4139 
4140 	/* allocate port structures, and init them */
4141 	init_port();
4142 
4143 	set_def_fwd_config();
4144 	if (nb_lcores == 0)
4145 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4146 			 "Check the core mask argument\n");
4147 
4148 	/* Bitrate/latency stats disabled by default */
4149 #ifdef RTE_LIB_BITRATESTATS
4150 	bitrate_enabled = 0;
4151 #endif
4152 #ifdef RTE_LIB_LATENCYSTATS
4153 	latencystats_enabled = 0;
4154 #endif
4155 
4156 	/* on FreeBSD, mlockall() is disabled by default */
4157 #ifdef RTE_EXEC_ENV_FREEBSD
4158 	do_mlockall = 0;
4159 #else
4160 	do_mlockall = 1;
4161 #endif
4162 
4163 	argc -= diag;
4164 	argv += diag;
4165 	if (argc > 1)
4166 		launch_args_parse(argc, argv);
4167 
4168 #ifndef RTE_EXEC_ENV_WINDOWS
4169 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4170 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4171 			strerror(errno));
4172 	}
4173 #endif
4174 
4175 	if (tx_first && interactive)
4176 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4177 				"interactive mode.\n");
4178 
4179 	if (tx_first && lsc_interrupt) {
4180 		fprintf(stderr,
4181 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4182 		lsc_interrupt = 0;
4183 	}
4184 
4185 	if (!nb_rxq && !nb_txq)
4186 		fprintf(stderr,
4187 			"Warning: Either rx or tx queues should be non-zero\n");
4188 
4189 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4190 		fprintf(stderr,
4191 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4192 			nb_rxq, nb_txq);
4193 
4194 	init_config();
4195 
4196 	if (hot_plug) {
4197 		ret = rte_dev_hotplug_handle_enable();
4198 		if (ret) {
4199 			RTE_LOG(ERR, EAL,
4200 				"fail to enable hotplug handling.");
4201 			return -1;
4202 		}
4203 
4204 		ret = rte_dev_event_monitor_start();
4205 		if (ret) {
4206 			RTE_LOG(ERR, EAL,
4207 				"fail to start device event monitoring.");
4208 			return -1;
4209 		}
4210 
4211 		ret = rte_dev_event_callback_register(NULL,
4212 			dev_event_callback, NULL);
4213 		if (ret) {
4214 			RTE_LOG(ERR, EAL,
4215 				"fail  to register device event callback\n");
4216 			return -1;
4217 		}
4218 	}
4219 
4220 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4221 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4222 
4223 	/* set all ports to promiscuous mode by default */
4224 	RTE_ETH_FOREACH_DEV(port_id) {
4225 		ret = rte_eth_promiscuous_enable(port_id);
4226 		if (ret != 0)
4227 			fprintf(stderr,
4228 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4229 				port_id, rte_strerror(-ret));
4230 	}
4231 
4232 #ifdef RTE_LIB_METRICS
4233 	/* Init metrics library */
4234 	rte_metrics_init(rte_socket_id());
4235 #endif
4236 
4237 #ifdef RTE_LIB_LATENCYSTATS
4238 	if (latencystats_enabled != 0) {
4239 		int ret = rte_latencystats_init(1, NULL);
4240 		if (ret)
4241 			fprintf(stderr,
4242 				"Warning: latencystats init() returned error %d\n",
4243 				ret);
4244 		fprintf(stderr, "Latencystats running on lcore %d\n",
4245 			latencystats_lcore_id);
4246 	}
4247 #endif
4248 
4249 	/* Setup bitrate stats */
4250 #ifdef RTE_LIB_BITRATESTATS
4251 	if (bitrate_enabled != 0) {
4252 		bitrate_data = rte_stats_bitrate_create();
4253 		if (bitrate_data == NULL)
4254 			rte_exit(EXIT_FAILURE,
4255 				"Could not allocate bitrate data.\n");
4256 		rte_stats_bitrate_reg(bitrate_data);
4257 	}
4258 #endif
4259 #ifdef RTE_LIB_CMDLINE
4260 	if (strlen(cmdline_filename) != 0)
4261 		cmdline_read_from_file(cmdline_filename);
4262 
4263 	if (interactive == 1) {
4264 		if (auto_start) {
4265 			printf("Start automatic packet forwarding\n");
4266 			start_packet_forwarding(0);
4267 		}
4268 		prompt();
4269 		pmd_test_exit();
4270 	} else
4271 #endif
4272 	{
4273 		char c;
4274 		int rc;
4275 
4276 		f_quit = 0;
4277 
4278 		printf("No commandline core given, start packet forwarding\n");
4279 		start_packet_forwarding(tx_first);
4280 		if (stats_period != 0) {
4281 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4282 			uint64_t timer_period;
4283 
4284 			/* Convert to number of cycles */
4285 			timer_period = stats_period * rte_get_timer_hz();
4286 
4287 			while (f_quit == 0) {
4288 				cur_time = rte_get_timer_cycles();
4289 				diff_time += cur_time - prev_time;
4290 
4291 				if (diff_time >= timer_period) {
4292 					print_stats();
4293 					/* Reset the timer */
4294 					diff_time = 0;
4295 				}
4296 				/* Sleep to avoid unnecessary checks */
4297 				prev_time = cur_time;
4298 				rte_delay_us_sleep(US_PER_S);
4299 			}
4300 		}
4301 
4302 		printf("Press enter to exit\n");
4303 		rc = read(0, &c, 1);
4304 		pmd_test_exit();
4305 		if (rc < 0)
4306 			return 1;
4307 	}
4308 
4309 	ret = rte_eal_cleanup();
4310 	if (ret != 0)
4311 		rte_exit(EXIT_FAILURE,
4312 			 "EAL cleanup failed: %s\n", strerror(-ret));
4313 
4314 	return EXIT_SUCCESS;
4315 }
4316