xref: /dpdk/app/test-pmd/testpmd.c (revision 59f3a8acbcdbafeebe816a26d76dfb06e6450f31)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68 
69 #include "testpmd.h"
70 
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77 
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84 
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87 
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90 
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96 
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105 
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111 
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120 
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126 
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132 
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138 
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147 
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;	       /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155 
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157 
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167 
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170 
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173 
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178 	&io_fwd_engine,
179 	&mac_fwd_engine,
180 	&mac_swap_engine,
181 	&flow_gen_engine,
182 	&rx_only_engine,
183 	&tx_only_engine,
184 	&csum_fwd_engine,
185 	&icmp_echo_engine,
186 	&noisy_vnf_engine,
187 	&five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189 	&ieee1588_fwd_engine,
190 #endif
191 	NULL,
192 };
193 
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196 
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202 
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205 	DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210 
211 /** Extended statistics to show. */
212 struct rte_eth_xstat_name *xstats_display;
213 
214 unsigned int xstats_display_num; /**< Size of extended statistics to show */
215 
216 /*
217  * In container, it cannot terminate the process which running with 'stats-period'
218  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
219  */
220 uint8_t f_quit;
221 
222 /*
223  * Max Rx frame size, set by '--max-pkt-len' parameter.
224  */
225 uint32_t max_rx_pkt_len;
226 
227 /*
228  * Configuration of packet segments used to scatter received packets
229  * if some of split features is configured.
230  */
231 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
232 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
233 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
234 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
235 
236 /*
237  * Configuration of packet segments used by the "txonly" processing engine.
238  */
239 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
240 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
241 	TXONLY_DEF_PACKET_LEN,
242 };
243 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
244 
245 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
246 /**< Split policy for packets to TX. */
247 
248 uint8_t txonly_multi_flow;
249 /**< Whether multiple flows are generated in TXONLY mode. */
250 
251 uint32_t tx_pkt_times_inter;
252 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
253 
254 uint32_t tx_pkt_times_intra;
255 /**< Timings for send scheduling in TXONLY mode, time between packets. */
256 
257 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
258 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
259 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
260 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
261 
262 /* current configuration is in DCB or not,0 means it is not in DCB mode */
263 uint8_t dcb_config = 0;
264 
265 /*
266  * Configurable number of RX/TX queues.
267  */
268 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
269 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
270 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
271 
272 /*
273  * Configurable number of RX/TX ring descriptors.
274  * Defaults are supplied by drivers via ethdev.
275  */
276 #define RTE_TEST_RX_DESC_DEFAULT 0
277 #define RTE_TEST_TX_DESC_DEFAULT 0
278 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
279 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
280 
281 #define RTE_PMD_PARAM_UNSET -1
282 /*
283  * Configurable values of RX and TX ring threshold registers.
284  */
285 
286 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
287 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
288 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
289 
290 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
291 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
292 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
293 
294 /*
295  * Configurable value of RX free threshold.
296  */
297 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
298 
299 /*
300  * Configurable value of RX drop enable.
301  */
302 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
303 
304 /*
305  * Configurable value of TX free threshold.
306  */
307 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
308 
309 /*
310  * Configurable value of TX RS bit threshold.
311  */
312 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
313 
314 /*
315  * Configurable value of buffered packets before sending.
316  */
317 uint16_t noisy_tx_sw_bufsz;
318 
319 /*
320  * Configurable value of packet buffer timeout.
321  */
322 uint16_t noisy_tx_sw_buf_flush_time;
323 
324 /*
325  * Configurable value for size of VNF internal memory area
326  * used for simulating noisy neighbour behaviour
327  */
328 uint64_t noisy_lkup_mem_sz;
329 
330 /*
331  * Configurable value of number of random writes done in
332  * VNF simulation memory area.
333  */
334 uint64_t noisy_lkup_num_writes;
335 
336 /*
337  * Configurable value of number of random reads done in
338  * VNF simulation memory area.
339  */
340 uint64_t noisy_lkup_num_reads;
341 
342 /*
343  * Configurable value of number of random reads/writes done in
344  * VNF simulation memory area.
345  */
346 uint64_t noisy_lkup_num_reads_writes;
347 
348 /*
349  * Receive Side Scaling (RSS) configuration.
350  */
351 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
352 
353 /*
354  * Port topology configuration
355  */
356 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
357 
358 /*
359  * Avoids to flush all the RX streams before starts forwarding.
360  */
361 uint8_t no_flush_rx = 0; /* flush by default */
362 
363 /*
364  * Flow API isolated mode.
365  */
366 uint8_t flow_isolate_all;
367 
368 /*
369  * Avoids to check link status when starting/stopping a port.
370  */
371 uint8_t no_link_check = 0; /* check by default */
372 
373 /*
374  * Don't automatically start all ports in interactive mode.
375  */
376 uint8_t no_device_start = 0;
377 
378 /*
379  * Enable link status change notification
380  */
381 uint8_t lsc_interrupt = 1; /* enabled by default */
382 
383 /*
384  * Enable device removal notification.
385  */
386 uint8_t rmv_interrupt = 1; /* enabled by default */
387 
388 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
389 
390 /* After attach, port setup is called on event or by iterator */
391 bool setup_on_probe_event = true;
392 
393 /* Clear ptypes on port initialization. */
394 uint8_t clear_ptypes = true;
395 
396 /* Hairpin ports configuration mode. */
397 uint16_t hairpin_mode;
398 
399 /* Pretty printing of ethdev events */
400 static const char * const eth_event_desc[] = {
401 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
402 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
403 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
404 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
405 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
406 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
407 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
408 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
409 	[RTE_ETH_EVENT_NEW] = "device probed",
410 	[RTE_ETH_EVENT_DESTROY] = "device released",
411 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
412 	[RTE_ETH_EVENT_MAX] = NULL,
413 };
414 
415 /*
416  * Display or mask ether events
417  * Default to all events except VF_MBOX
418  */
419 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
420 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
421 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
422 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
423 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
424 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
425 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
426 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
427 /*
428  * Decide if all memory are locked for performance.
429  */
430 int do_mlockall = 0;
431 
432 /*
433  * NIC bypass mode configuration options.
434  */
435 
436 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
437 /* The NIC bypass watchdog timeout. */
438 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
439 #endif
440 
441 
442 #ifdef RTE_LIB_LATENCYSTATS
443 
444 /*
445  * Set when latency stats is enabled in the commandline
446  */
447 uint8_t latencystats_enabled;
448 
449 /*
450  * Lcore ID to serive latency statistics.
451  */
452 lcoreid_t latencystats_lcore_id = -1;
453 
454 #endif
455 
456 /*
457  * Ethernet device configuration.
458  */
459 struct rte_eth_rxmode rx_mode;
460 
461 struct rte_eth_txmode tx_mode = {
462 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
463 };
464 
465 struct rte_fdir_conf fdir_conf = {
466 	.mode = RTE_FDIR_MODE_NONE,
467 	.pballoc = RTE_FDIR_PBALLOC_64K,
468 	.status = RTE_FDIR_REPORT_STATUS,
469 	.mask = {
470 		.vlan_tci_mask = 0xFFEF,
471 		.ipv4_mask     = {
472 			.src_ip = 0xFFFFFFFF,
473 			.dst_ip = 0xFFFFFFFF,
474 		},
475 		.ipv6_mask     = {
476 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
477 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
478 		},
479 		.src_port_mask = 0xFFFF,
480 		.dst_port_mask = 0xFFFF,
481 		.mac_addr_byte_mask = 0xFF,
482 		.tunnel_type_mask = 1,
483 		.tunnel_id_mask = 0xFFFFFFFF,
484 	},
485 	.drop_queue = 127,
486 };
487 
488 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
489 
490 /*
491  * Display zero values by default for xstats
492  */
493 uint8_t xstats_hide_zero;
494 
495 /*
496  * Measure of CPU cycles disabled by default
497  */
498 uint8_t record_core_cycles;
499 
500 /*
501  * Display of RX and TX bursts disabled by default
502  */
503 uint8_t record_burst_stats;
504 
505 unsigned int num_sockets = 0;
506 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
507 
508 #ifdef RTE_LIB_BITRATESTATS
509 /* Bitrate statistics */
510 struct rte_stats_bitrates *bitrate_data;
511 lcoreid_t bitrate_lcore_id;
512 uint8_t bitrate_enabled;
513 #endif
514 
515 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
516 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
517 
518 /*
519  * hexadecimal bitmask of RX mq mode can be enabled.
520  */
521 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
522 
523 /*
524  * Used to set forced link speed
525  */
526 uint32_t eth_link_speed;
527 
528 /*
529  * ID of the current process in multi-process, used to
530  * configure the queues to be polled.
531  */
532 int proc_id;
533 
534 /*
535  * Number of processes in multi-process, used to
536  * configure the queues to be polled.
537  */
538 unsigned int num_procs = 1;
539 
540 static void
541 eth_rx_metadata_negotiate_mp(uint16_t port_id)
542 {
543 	uint64_t rx_meta_features = 0;
544 	int ret;
545 
546 	if (!is_proc_primary())
547 		return;
548 
549 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
550 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
551 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
552 
553 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
554 	if (ret == 0) {
555 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
556 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
557 				    port_id);
558 		}
559 
560 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
561 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
562 				    port_id);
563 		}
564 
565 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
566 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
567 				    port_id);
568 		}
569 	} else if (ret != -ENOTSUP) {
570 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
571 			 port_id, rte_strerror(-ret));
572 	}
573 }
574 
575 static void
576 flow_pick_transfer_proxy_mp(uint16_t port_id)
577 {
578 	struct rte_port *port = &ports[port_id];
579 	int ret;
580 
581 	port->flow_transfer_proxy = port_id;
582 
583 	if (!is_proc_primary())
584 		return;
585 
586 	ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
587 					   NULL);
588 	if (ret != 0) {
589 		fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
590 			port_id, rte_strerror(-ret));
591 	}
592 }
593 
594 static int
595 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
596 		      const struct rte_eth_conf *dev_conf)
597 {
598 	if (is_proc_primary())
599 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
600 					dev_conf);
601 	return 0;
602 }
603 
604 static int
605 eth_dev_start_mp(uint16_t port_id)
606 {
607 	if (is_proc_primary())
608 		return rte_eth_dev_start(port_id);
609 
610 	return 0;
611 }
612 
613 static int
614 eth_dev_stop_mp(uint16_t port_id)
615 {
616 	if (is_proc_primary())
617 		return rte_eth_dev_stop(port_id);
618 
619 	return 0;
620 }
621 
622 static void
623 mempool_free_mp(struct rte_mempool *mp)
624 {
625 	if (is_proc_primary())
626 		rte_mempool_free(mp);
627 }
628 
629 static int
630 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
631 {
632 	if (is_proc_primary())
633 		return rte_eth_dev_set_mtu(port_id, mtu);
634 
635 	return 0;
636 }
637 
638 /* Forward function declarations */
639 static void setup_attached_port(portid_t pi);
640 static void check_all_ports_link_status(uint32_t port_mask);
641 static int eth_event_callback(portid_t port_id,
642 			      enum rte_eth_event_type type,
643 			      void *param, void *ret_param);
644 static void dev_event_callback(const char *device_name,
645 				enum rte_dev_event_type type,
646 				void *param);
647 static void fill_xstats_display_info(void);
648 
649 /*
650  * Check if all the ports are started.
651  * If yes, return positive value. If not, return zero.
652  */
653 static int all_ports_started(void);
654 
655 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
656 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
657 
658 /* Holds the registered mbuf dynamic flags names. */
659 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
660 
661 
662 /*
663  * Helper function to check if socket is already discovered.
664  * If yes, return positive value. If not, return zero.
665  */
666 int
667 new_socket_id(unsigned int socket_id)
668 {
669 	unsigned int i;
670 
671 	for (i = 0; i < num_sockets; i++) {
672 		if (socket_ids[i] == socket_id)
673 			return 0;
674 	}
675 	return 1;
676 }
677 
678 /*
679  * Setup default configuration.
680  */
681 static void
682 set_default_fwd_lcores_config(void)
683 {
684 	unsigned int i;
685 	unsigned int nb_lc;
686 	unsigned int sock_num;
687 
688 	nb_lc = 0;
689 	for (i = 0; i < RTE_MAX_LCORE; i++) {
690 		if (!rte_lcore_is_enabled(i))
691 			continue;
692 		sock_num = rte_lcore_to_socket_id(i);
693 		if (new_socket_id(sock_num)) {
694 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
695 				rte_exit(EXIT_FAILURE,
696 					 "Total sockets greater than %u\n",
697 					 RTE_MAX_NUMA_NODES);
698 			}
699 			socket_ids[num_sockets++] = sock_num;
700 		}
701 		if (i == rte_get_main_lcore())
702 			continue;
703 		fwd_lcores_cpuids[nb_lc++] = i;
704 	}
705 	nb_lcores = (lcoreid_t) nb_lc;
706 	nb_cfg_lcores = nb_lcores;
707 	nb_fwd_lcores = 1;
708 }
709 
710 static void
711 set_def_peer_eth_addrs(void)
712 {
713 	portid_t i;
714 
715 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
716 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
717 		peer_eth_addrs[i].addr_bytes[5] = i;
718 	}
719 }
720 
721 static void
722 set_default_fwd_ports_config(void)
723 {
724 	portid_t pt_id;
725 	int i = 0;
726 
727 	RTE_ETH_FOREACH_DEV(pt_id) {
728 		fwd_ports_ids[i++] = pt_id;
729 
730 		/* Update sockets info according to the attached device */
731 		int socket_id = rte_eth_dev_socket_id(pt_id);
732 		if (socket_id >= 0 && new_socket_id(socket_id)) {
733 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
734 				rte_exit(EXIT_FAILURE,
735 					 "Total sockets greater than %u\n",
736 					 RTE_MAX_NUMA_NODES);
737 			}
738 			socket_ids[num_sockets++] = socket_id;
739 		}
740 	}
741 
742 	nb_cfg_ports = nb_ports;
743 	nb_fwd_ports = nb_ports;
744 }
745 
746 void
747 set_def_fwd_config(void)
748 {
749 	set_default_fwd_lcores_config();
750 	set_def_peer_eth_addrs();
751 	set_default_fwd_ports_config();
752 }
753 
754 #ifndef RTE_EXEC_ENV_WINDOWS
755 /* extremely pessimistic estimation of memory required to create a mempool */
756 static int
757 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
758 {
759 	unsigned int n_pages, mbuf_per_pg, leftover;
760 	uint64_t total_mem, mbuf_mem, obj_sz;
761 
762 	/* there is no good way to predict how much space the mempool will
763 	 * occupy because it will allocate chunks on the fly, and some of those
764 	 * will come from default DPDK memory while some will come from our
765 	 * external memory, so just assume 128MB will be enough for everyone.
766 	 */
767 	uint64_t hdr_mem = 128 << 20;
768 
769 	/* account for possible non-contiguousness */
770 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
771 	if (obj_sz > pgsz) {
772 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
773 		return -1;
774 	}
775 
776 	mbuf_per_pg = pgsz / obj_sz;
777 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
778 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
779 
780 	mbuf_mem = n_pages * pgsz;
781 
782 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
783 
784 	if (total_mem > SIZE_MAX) {
785 		TESTPMD_LOG(ERR, "Memory size too big\n");
786 		return -1;
787 	}
788 	*out = (size_t)total_mem;
789 
790 	return 0;
791 }
792 
793 static int
794 pagesz_flags(uint64_t page_sz)
795 {
796 	/* as per mmap() manpage, all page sizes are log2 of page size
797 	 * shifted by MAP_HUGE_SHIFT
798 	 */
799 	int log2 = rte_log2_u64(page_sz);
800 
801 	return (log2 << HUGE_SHIFT);
802 }
803 
804 static void *
805 alloc_mem(size_t memsz, size_t pgsz, bool huge)
806 {
807 	void *addr;
808 	int flags;
809 
810 	/* allocate anonymous hugepages */
811 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
812 	if (huge)
813 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
814 
815 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
816 	if (addr == MAP_FAILED)
817 		return NULL;
818 
819 	return addr;
820 }
821 
822 struct extmem_param {
823 	void *addr;
824 	size_t len;
825 	size_t pgsz;
826 	rte_iova_t *iova_table;
827 	unsigned int iova_table_len;
828 };
829 
830 static int
831 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
832 		bool huge)
833 {
834 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
835 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
836 	unsigned int cur_page, n_pages, pgsz_idx;
837 	size_t mem_sz, cur_pgsz;
838 	rte_iova_t *iovas = NULL;
839 	void *addr;
840 	int ret;
841 
842 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
843 		/* skip anything that is too big */
844 		if (pgsizes[pgsz_idx] > SIZE_MAX)
845 			continue;
846 
847 		cur_pgsz = pgsizes[pgsz_idx];
848 
849 		/* if we were told not to allocate hugepages, override */
850 		if (!huge)
851 			cur_pgsz = sysconf(_SC_PAGESIZE);
852 
853 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
854 		if (ret < 0) {
855 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
856 			return -1;
857 		}
858 
859 		/* allocate our memory */
860 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
861 
862 		/* if we couldn't allocate memory with a specified page size,
863 		 * that doesn't mean we can't do it with other page sizes, so
864 		 * try another one.
865 		 */
866 		if (addr == NULL)
867 			continue;
868 
869 		/* store IOVA addresses for every page in this memory area */
870 		n_pages = mem_sz / cur_pgsz;
871 
872 		iovas = malloc(sizeof(*iovas) * n_pages);
873 
874 		if (iovas == NULL) {
875 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
876 			goto fail;
877 		}
878 		/* lock memory if it's not huge pages */
879 		if (!huge)
880 			mlock(addr, mem_sz);
881 
882 		/* populate IOVA addresses */
883 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
884 			rte_iova_t iova;
885 			size_t offset;
886 			void *cur;
887 
888 			offset = cur_pgsz * cur_page;
889 			cur = RTE_PTR_ADD(addr, offset);
890 
891 			/* touch the page before getting its IOVA */
892 			*(volatile char *)cur = 0;
893 
894 			iova = rte_mem_virt2iova(cur);
895 
896 			iovas[cur_page] = iova;
897 		}
898 
899 		break;
900 	}
901 	/* if we couldn't allocate anything */
902 	if (iovas == NULL)
903 		return -1;
904 
905 	param->addr = addr;
906 	param->len = mem_sz;
907 	param->pgsz = cur_pgsz;
908 	param->iova_table = iovas;
909 	param->iova_table_len = n_pages;
910 
911 	return 0;
912 fail:
913 	if (iovas)
914 		free(iovas);
915 	if (addr)
916 		munmap(addr, mem_sz);
917 
918 	return -1;
919 }
920 
921 static int
922 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
923 {
924 	struct extmem_param param;
925 	int socket_id, ret;
926 
927 	memset(&param, 0, sizeof(param));
928 
929 	/* check if our heap exists */
930 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
931 	if (socket_id < 0) {
932 		/* create our heap */
933 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
934 		if (ret < 0) {
935 			TESTPMD_LOG(ERR, "Cannot create heap\n");
936 			return -1;
937 		}
938 	}
939 
940 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
941 	if (ret < 0) {
942 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
943 		return -1;
944 	}
945 
946 	/* we now have a valid memory area, so add it to heap */
947 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
948 			param.addr, param.len, param.iova_table,
949 			param.iova_table_len, param.pgsz);
950 
951 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
952 
953 	/* not needed any more */
954 	free(param.iova_table);
955 
956 	if (ret < 0) {
957 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
958 		munmap(param.addr, param.len);
959 		return -1;
960 	}
961 
962 	/* success */
963 
964 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
965 			param.len >> 20);
966 
967 	return 0;
968 }
969 static void
970 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
971 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
972 {
973 	uint16_t pid = 0;
974 	int ret;
975 
976 	RTE_ETH_FOREACH_DEV(pid) {
977 		struct rte_eth_dev_info dev_info;
978 
979 		ret = eth_dev_info_get_print_err(pid, &dev_info);
980 		if (ret != 0) {
981 			TESTPMD_LOG(DEBUG,
982 				    "unable to get device info for port %d on addr 0x%p,"
983 				    "mempool unmapping will not be performed\n",
984 				    pid, memhdr->addr);
985 			continue;
986 		}
987 
988 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
989 		if (ret) {
990 			TESTPMD_LOG(DEBUG,
991 				    "unable to DMA unmap addr 0x%p "
992 				    "for device %s\n",
993 				    memhdr->addr, dev_info.device->name);
994 		}
995 	}
996 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
997 	if (ret) {
998 		TESTPMD_LOG(DEBUG,
999 			    "unable to un-register addr 0x%p\n", memhdr->addr);
1000 	}
1001 }
1002 
1003 static void
1004 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1005 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1006 {
1007 	uint16_t pid = 0;
1008 	size_t page_size = sysconf(_SC_PAGESIZE);
1009 	int ret;
1010 
1011 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1012 				  page_size);
1013 	if (ret) {
1014 		TESTPMD_LOG(DEBUG,
1015 			    "unable to register addr 0x%p\n", memhdr->addr);
1016 		return;
1017 	}
1018 	RTE_ETH_FOREACH_DEV(pid) {
1019 		struct rte_eth_dev_info dev_info;
1020 
1021 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1022 		if (ret != 0) {
1023 			TESTPMD_LOG(DEBUG,
1024 				    "unable to get device info for port %d on addr 0x%p,"
1025 				    "mempool mapping will not be performed\n",
1026 				    pid, memhdr->addr);
1027 			continue;
1028 		}
1029 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1030 		if (ret) {
1031 			TESTPMD_LOG(DEBUG,
1032 				    "unable to DMA map addr 0x%p "
1033 				    "for device %s\n",
1034 				    memhdr->addr, dev_info.device->name);
1035 		}
1036 	}
1037 }
1038 #endif
1039 
1040 static unsigned int
1041 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1042 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1043 {
1044 	struct rte_pktmbuf_extmem *xmem;
1045 	unsigned int ext_num, zone_num, elt_num;
1046 	uint16_t elt_size;
1047 
1048 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1049 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1050 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1051 
1052 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1053 	if (xmem == NULL) {
1054 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1055 				 "external buffer descriptors\n");
1056 		*ext_mem = NULL;
1057 		return 0;
1058 	}
1059 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1060 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1061 		const struct rte_memzone *mz;
1062 		char mz_name[RTE_MEMZONE_NAMESIZE];
1063 		int ret;
1064 
1065 		ret = snprintf(mz_name, sizeof(mz_name),
1066 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1067 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1068 			errno = ENAMETOOLONG;
1069 			ext_num = 0;
1070 			break;
1071 		}
1072 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1073 						 socket_id,
1074 						 RTE_MEMZONE_IOVA_CONTIG |
1075 						 RTE_MEMZONE_1GB |
1076 						 RTE_MEMZONE_SIZE_HINT_ONLY,
1077 						 EXTBUF_ZONE_SIZE);
1078 		if (mz == NULL) {
1079 			/*
1080 			 * The caller exits on external buffer creation
1081 			 * error, so there is no need to free memzones.
1082 			 */
1083 			errno = ENOMEM;
1084 			ext_num = 0;
1085 			break;
1086 		}
1087 		xseg->buf_ptr = mz->addr;
1088 		xseg->buf_iova = mz->iova;
1089 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1090 		xseg->elt_size = elt_size;
1091 	}
1092 	if (ext_num == 0 && xmem != NULL) {
1093 		free(xmem);
1094 		xmem = NULL;
1095 	}
1096 	*ext_mem = xmem;
1097 	return ext_num;
1098 }
1099 
1100 /*
1101  * Configuration initialisation done once at init time.
1102  */
1103 static struct rte_mempool *
1104 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1105 		 unsigned int socket_id, uint16_t size_idx)
1106 {
1107 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1108 	struct rte_mempool *rte_mp = NULL;
1109 #ifndef RTE_EXEC_ENV_WINDOWS
1110 	uint32_t mb_size;
1111 
1112 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1113 #endif
1114 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1115 	if (!is_proc_primary()) {
1116 		rte_mp = rte_mempool_lookup(pool_name);
1117 		if (rte_mp == NULL)
1118 			rte_exit(EXIT_FAILURE,
1119 				"Get mbuf pool for socket %u failed: %s\n",
1120 				socket_id, rte_strerror(rte_errno));
1121 		return rte_mp;
1122 	}
1123 
1124 	TESTPMD_LOG(INFO,
1125 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1126 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1127 
1128 	switch (mp_alloc_type) {
1129 	case MP_ALLOC_NATIVE:
1130 		{
1131 			/* wrapper to rte_mempool_create() */
1132 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1133 					rte_mbuf_best_mempool_ops());
1134 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1135 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1136 			break;
1137 		}
1138 #ifndef RTE_EXEC_ENV_WINDOWS
1139 	case MP_ALLOC_ANON:
1140 		{
1141 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1142 				mb_size, (unsigned int) mb_mempool_cache,
1143 				sizeof(struct rte_pktmbuf_pool_private),
1144 				socket_id, mempool_flags);
1145 			if (rte_mp == NULL)
1146 				goto err;
1147 
1148 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1149 				rte_mempool_free(rte_mp);
1150 				rte_mp = NULL;
1151 				goto err;
1152 			}
1153 			rte_pktmbuf_pool_init(rte_mp, NULL);
1154 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1155 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1156 			break;
1157 		}
1158 	case MP_ALLOC_XMEM:
1159 	case MP_ALLOC_XMEM_HUGE:
1160 		{
1161 			int heap_socket;
1162 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1163 
1164 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1165 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1166 
1167 			heap_socket =
1168 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1169 			if (heap_socket < 0)
1170 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1171 
1172 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1173 					rte_mbuf_best_mempool_ops());
1174 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1175 					mb_mempool_cache, 0, mbuf_seg_size,
1176 					heap_socket);
1177 			break;
1178 		}
1179 #endif
1180 	case MP_ALLOC_XBUF:
1181 		{
1182 			struct rte_pktmbuf_extmem *ext_mem;
1183 			unsigned int ext_num;
1184 
1185 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1186 					       socket_id, pool_name, &ext_mem);
1187 			if (ext_num == 0)
1188 				rte_exit(EXIT_FAILURE,
1189 					 "Can't create pinned data buffers\n");
1190 
1191 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1192 					rte_mbuf_best_mempool_ops());
1193 			rte_mp = rte_pktmbuf_pool_create_extbuf
1194 					(pool_name, nb_mbuf, mb_mempool_cache,
1195 					 0, mbuf_seg_size, socket_id,
1196 					 ext_mem, ext_num);
1197 			free(ext_mem);
1198 			break;
1199 		}
1200 	default:
1201 		{
1202 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1203 		}
1204 	}
1205 
1206 #ifndef RTE_EXEC_ENV_WINDOWS
1207 err:
1208 #endif
1209 	if (rte_mp == NULL) {
1210 		rte_exit(EXIT_FAILURE,
1211 			"Creation of mbuf pool for socket %u failed: %s\n",
1212 			socket_id, rte_strerror(rte_errno));
1213 	} else if (verbose_level > 0) {
1214 		rte_mempool_dump(stdout, rte_mp);
1215 	}
1216 	return rte_mp;
1217 }
1218 
1219 /*
1220  * Check given socket id is valid or not with NUMA mode,
1221  * if valid, return 0, else return -1
1222  */
1223 static int
1224 check_socket_id(const unsigned int socket_id)
1225 {
1226 	static int warning_once = 0;
1227 
1228 	if (new_socket_id(socket_id)) {
1229 		if (!warning_once && numa_support)
1230 			fprintf(stderr,
1231 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1232 		warning_once = 1;
1233 		return -1;
1234 	}
1235 	return 0;
1236 }
1237 
1238 /*
1239  * Get the allowed maximum number of RX queues.
1240  * *pid return the port id which has minimal value of
1241  * max_rx_queues in all ports.
1242  */
1243 queueid_t
1244 get_allowed_max_nb_rxq(portid_t *pid)
1245 {
1246 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1247 	bool max_rxq_valid = false;
1248 	portid_t pi;
1249 	struct rte_eth_dev_info dev_info;
1250 
1251 	RTE_ETH_FOREACH_DEV(pi) {
1252 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1253 			continue;
1254 
1255 		max_rxq_valid = true;
1256 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1257 			allowed_max_rxq = dev_info.max_rx_queues;
1258 			*pid = pi;
1259 		}
1260 	}
1261 	return max_rxq_valid ? allowed_max_rxq : 0;
1262 }
1263 
1264 /*
1265  * Check input rxq is valid or not.
1266  * If input rxq is not greater than any of maximum number
1267  * of RX queues of all ports, it is valid.
1268  * if valid, return 0, else return -1
1269  */
1270 int
1271 check_nb_rxq(queueid_t rxq)
1272 {
1273 	queueid_t allowed_max_rxq;
1274 	portid_t pid = 0;
1275 
1276 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1277 	if (rxq > allowed_max_rxq) {
1278 		fprintf(stderr,
1279 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1280 			rxq, allowed_max_rxq, pid);
1281 		return -1;
1282 	}
1283 	return 0;
1284 }
1285 
1286 /*
1287  * Get the allowed maximum number of TX queues.
1288  * *pid return the port id which has minimal value of
1289  * max_tx_queues in all ports.
1290  */
1291 queueid_t
1292 get_allowed_max_nb_txq(portid_t *pid)
1293 {
1294 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1295 	bool max_txq_valid = false;
1296 	portid_t pi;
1297 	struct rte_eth_dev_info dev_info;
1298 
1299 	RTE_ETH_FOREACH_DEV(pi) {
1300 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1301 			continue;
1302 
1303 		max_txq_valid = true;
1304 		if (dev_info.max_tx_queues < allowed_max_txq) {
1305 			allowed_max_txq = dev_info.max_tx_queues;
1306 			*pid = pi;
1307 		}
1308 	}
1309 	return max_txq_valid ? allowed_max_txq : 0;
1310 }
1311 
1312 /*
1313  * Check input txq is valid or not.
1314  * If input txq is not greater than any of maximum number
1315  * of TX queues of all ports, it is valid.
1316  * if valid, return 0, else return -1
1317  */
1318 int
1319 check_nb_txq(queueid_t txq)
1320 {
1321 	queueid_t allowed_max_txq;
1322 	portid_t pid = 0;
1323 
1324 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1325 	if (txq > allowed_max_txq) {
1326 		fprintf(stderr,
1327 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1328 			txq, allowed_max_txq, pid);
1329 		return -1;
1330 	}
1331 	return 0;
1332 }
1333 
1334 /*
1335  * Get the allowed maximum number of RXDs of every rx queue.
1336  * *pid return the port id which has minimal value of
1337  * max_rxd in all queues of all ports.
1338  */
1339 static uint16_t
1340 get_allowed_max_nb_rxd(portid_t *pid)
1341 {
1342 	uint16_t allowed_max_rxd = UINT16_MAX;
1343 	portid_t pi;
1344 	struct rte_eth_dev_info dev_info;
1345 
1346 	RTE_ETH_FOREACH_DEV(pi) {
1347 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1348 			continue;
1349 
1350 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1351 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1352 			*pid = pi;
1353 		}
1354 	}
1355 	return allowed_max_rxd;
1356 }
1357 
1358 /*
1359  * Get the allowed minimal number of RXDs of every rx queue.
1360  * *pid return the port id which has minimal value of
1361  * min_rxd in all queues of all ports.
1362  */
1363 static uint16_t
1364 get_allowed_min_nb_rxd(portid_t *pid)
1365 {
1366 	uint16_t allowed_min_rxd = 0;
1367 	portid_t pi;
1368 	struct rte_eth_dev_info dev_info;
1369 
1370 	RTE_ETH_FOREACH_DEV(pi) {
1371 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1372 			continue;
1373 
1374 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1375 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1376 			*pid = pi;
1377 		}
1378 	}
1379 
1380 	return allowed_min_rxd;
1381 }
1382 
1383 /*
1384  * Check input rxd is valid or not.
1385  * If input rxd is not greater than any of maximum number
1386  * of RXDs of every Rx queues and is not less than any of
1387  * minimal number of RXDs of every Rx queues, it is valid.
1388  * if valid, return 0, else return -1
1389  */
1390 int
1391 check_nb_rxd(queueid_t rxd)
1392 {
1393 	uint16_t allowed_max_rxd;
1394 	uint16_t allowed_min_rxd;
1395 	portid_t pid = 0;
1396 
1397 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1398 	if (rxd > allowed_max_rxd) {
1399 		fprintf(stderr,
1400 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1401 			rxd, allowed_max_rxd, pid);
1402 		return -1;
1403 	}
1404 
1405 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1406 	if (rxd < allowed_min_rxd) {
1407 		fprintf(stderr,
1408 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1409 			rxd, allowed_min_rxd, pid);
1410 		return -1;
1411 	}
1412 
1413 	return 0;
1414 }
1415 
1416 /*
1417  * Get the allowed maximum number of TXDs of every rx queues.
1418  * *pid return the port id which has minimal value of
1419  * max_txd in every tx queue.
1420  */
1421 static uint16_t
1422 get_allowed_max_nb_txd(portid_t *pid)
1423 {
1424 	uint16_t allowed_max_txd = UINT16_MAX;
1425 	portid_t pi;
1426 	struct rte_eth_dev_info dev_info;
1427 
1428 	RTE_ETH_FOREACH_DEV(pi) {
1429 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1430 			continue;
1431 
1432 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1433 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1434 			*pid = pi;
1435 		}
1436 	}
1437 	return allowed_max_txd;
1438 }
1439 
1440 /*
1441  * Get the allowed maximum number of TXDs of every tx queues.
1442  * *pid return the port id which has minimal value of
1443  * min_txd in every tx queue.
1444  */
1445 static uint16_t
1446 get_allowed_min_nb_txd(portid_t *pid)
1447 {
1448 	uint16_t allowed_min_txd = 0;
1449 	portid_t pi;
1450 	struct rte_eth_dev_info dev_info;
1451 
1452 	RTE_ETH_FOREACH_DEV(pi) {
1453 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1454 			continue;
1455 
1456 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1457 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1458 			*pid = pi;
1459 		}
1460 	}
1461 
1462 	return allowed_min_txd;
1463 }
1464 
1465 /*
1466  * Check input txd is valid or not.
1467  * If input txd is not greater than any of maximum number
1468  * of TXDs of every Rx queues, it is valid.
1469  * if valid, return 0, else return -1
1470  */
1471 int
1472 check_nb_txd(queueid_t txd)
1473 {
1474 	uint16_t allowed_max_txd;
1475 	uint16_t allowed_min_txd;
1476 	portid_t pid = 0;
1477 
1478 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1479 	if (txd > allowed_max_txd) {
1480 		fprintf(stderr,
1481 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1482 			txd, allowed_max_txd, pid);
1483 		return -1;
1484 	}
1485 
1486 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1487 	if (txd < allowed_min_txd) {
1488 		fprintf(stderr,
1489 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1490 			txd, allowed_min_txd, pid);
1491 		return -1;
1492 	}
1493 	return 0;
1494 }
1495 
1496 
1497 /*
1498  * Get the allowed maximum number of hairpin queues.
1499  * *pid return the port id which has minimal value of
1500  * max_hairpin_queues in all ports.
1501  */
1502 queueid_t
1503 get_allowed_max_nb_hairpinq(portid_t *pid)
1504 {
1505 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1506 	portid_t pi;
1507 	struct rte_eth_hairpin_cap cap;
1508 
1509 	RTE_ETH_FOREACH_DEV(pi) {
1510 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1511 			*pid = pi;
1512 			return 0;
1513 		}
1514 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1515 			allowed_max_hairpinq = cap.max_nb_queues;
1516 			*pid = pi;
1517 		}
1518 	}
1519 	return allowed_max_hairpinq;
1520 }
1521 
1522 /*
1523  * Check input hairpin is valid or not.
1524  * If input hairpin is not greater than any of maximum number
1525  * of hairpin queues of all ports, it is valid.
1526  * if valid, return 0, else return -1
1527  */
1528 int
1529 check_nb_hairpinq(queueid_t hairpinq)
1530 {
1531 	queueid_t allowed_max_hairpinq;
1532 	portid_t pid = 0;
1533 
1534 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1535 	if (hairpinq > allowed_max_hairpinq) {
1536 		fprintf(stderr,
1537 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1538 			hairpinq, allowed_max_hairpinq, pid);
1539 		return -1;
1540 	}
1541 	return 0;
1542 }
1543 
1544 static int
1545 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1546 {
1547 	uint32_t eth_overhead;
1548 
1549 	if (dev_info->max_mtu != UINT16_MAX &&
1550 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1551 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1552 	else
1553 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1554 
1555 	return eth_overhead;
1556 }
1557 
1558 static void
1559 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1560 {
1561 	struct rte_port *port = &ports[pid];
1562 	int ret;
1563 	int i;
1564 
1565 	eth_rx_metadata_negotiate_mp(pid);
1566 	flow_pick_transfer_proxy_mp(pid);
1567 
1568 	port->dev_conf.txmode = tx_mode;
1569 	port->dev_conf.rxmode = rx_mode;
1570 
1571 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1572 	if (ret != 0)
1573 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1574 
1575 	if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1576 		port->dev_conf.txmode.offloads &=
1577 			~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1578 
1579 	/* Apply Rx offloads configuration */
1580 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1581 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1582 	/* Apply Tx offloads configuration */
1583 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1584 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1585 
1586 	if (eth_link_speed)
1587 		port->dev_conf.link_speeds = eth_link_speed;
1588 
1589 	if (max_rx_pkt_len)
1590 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1591 			get_eth_overhead(&port->dev_info);
1592 
1593 	/* set flag to initialize port/queue */
1594 	port->need_reconfig = 1;
1595 	port->need_reconfig_queues = 1;
1596 	port->socket_id = socket_id;
1597 	port->tx_metadata = 0;
1598 
1599 	/*
1600 	 * Check for maximum number of segments per MTU.
1601 	 * Accordingly update the mbuf data size.
1602 	 */
1603 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1604 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1605 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1606 		uint16_t mtu;
1607 
1608 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1609 			uint16_t data_size = (mtu + eth_overhead) /
1610 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1611 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1612 
1613 			if (buffer_size > mbuf_data_size[0]) {
1614 				mbuf_data_size[0] = buffer_size;
1615 				TESTPMD_LOG(WARNING,
1616 					"Configured mbuf size of the first segment %hu\n",
1617 					mbuf_data_size[0]);
1618 			}
1619 		}
1620 	}
1621 }
1622 
1623 static void
1624 init_config(void)
1625 {
1626 	portid_t pid;
1627 	struct rte_mempool *mbp;
1628 	unsigned int nb_mbuf_per_pool;
1629 	lcoreid_t  lc_id;
1630 	struct rte_gro_param gro_param;
1631 	uint32_t gso_types;
1632 
1633 	/* Configuration of logical cores. */
1634 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1635 				sizeof(struct fwd_lcore *) * nb_lcores,
1636 				RTE_CACHE_LINE_SIZE);
1637 	if (fwd_lcores == NULL) {
1638 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1639 							"failed\n", nb_lcores);
1640 	}
1641 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1642 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1643 					       sizeof(struct fwd_lcore),
1644 					       RTE_CACHE_LINE_SIZE);
1645 		if (fwd_lcores[lc_id] == NULL) {
1646 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1647 								"failed\n");
1648 		}
1649 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1650 	}
1651 
1652 	RTE_ETH_FOREACH_DEV(pid) {
1653 		uint32_t socket_id;
1654 
1655 		if (numa_support) {
1656 			socket_id = port_numa[pid];
1657 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1658 				socket_id = rte_eth_dev_socket_id(pid);
1659 
1660 				/*
1661 				 * if socket_id is invalid,
1662 				 * set to the first available socket.
1663 				 */
1664 				if (check_socket_id(socket_id) < 0)
1665 					socket_id = socket_ids[0];
1666 			}
1667 		} else {
1668 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1669 				    0 : socket_num;
1670 		}
1671 		/* Apply default TxRx configuration for all ports */
1672 		init_config_port_offloads(pid, socket_id);
1673 	}
1674 	/*
1675 	 * Create pools of mbuf.
1676 	 * If NUMA support is disabled, create a single pool of mbuf in
1677 	 * socket 0 memory by default.
1678 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1679 	 *
1680 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1681 	 * nb_txd can be configured at run time.
1682 	 */
1683 	if (param_total_num_mbufs)
1684 		nb_mbuf_per_pool = param_total_num_mbufs;
1685 	else {
1686 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1687 			(nb_lcores * mb_mempool_cache) +
1688 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1689 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1690 	}
1691 
1692 	if (numa_support) {
1693 		uint8_t i, j;
1694 
1695 		for (i = 0; i < num_sockets; i++)
1696 			for (j = 0; j < mbuf_data_size_n; j++)
1697 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1698 					mbuf_pool_create(mbuf_data_size[j],
1699 							  nb_mbuf_per_pool,
1700 							  socket_ids[i], j);
1701 	} else {
1702 		uint8_t i;
1703 
1704 		for (i = 0; i < mbuf_data_size_n; i++)
1705 			mempools[i] = mbuf_pool_create
1706 					(mbuf_data_size[i],
1707 					 nb_mbuf_per_pool,
1708 					 socket_num == UMA_NO_CONFIG ?
1709 					 0 : socket_num, i);
1710 	}
1711 
1712 	init_port_config();
1713 
1714 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1715 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1716 	/*
1717 	 * Records which Mbuf pool to use by each logical core, if needed.
1718 	 */
1719 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1720 		mbp = mbuf_pool_find(
1721 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1722 
1723 		if (mbp == NULL)
1724 			mbp = mbuf_pool_find(0, 0);
1725 		fwd_lcores[lc_id]->mbp = mbp;
1726 		/* initialize GSO context */
1727 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1728 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1729 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1730 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1731 			RTE_ETHER_CRC_LEN;
1732 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1733 	}
1734 
1735 	fwd_config_setup();
1736 
1737 	/* create a gro context for each lcore */
1738 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1739 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1740 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1741 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1742 		gro_param.socket_id = rte_lcore_to_socket_id(
1743 				fwd_lcores_cpuids[lc_id]);
1744 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1745 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1746 			rte_exit(EXIT_FAILURE,
1747 					"rte_gro_ctx_create() failed\n");
1748 		}
1749 	}
1750 }
1751 
1752 
1753 void
1754 reconfig(portid_t new_port_id, unsigned socket_id)
1755 {
1756 	/* Reconfiguration of Ethernet ports. */
1757 	init_config_port_offloads(new_port_id, socket_id);
1758 	init_port_config();
1759 }
1760 
1761 
1762 int
1763 init_fwd_streams(void)
1764 {
1765 	portid_t pid;
1766 	struct rte_port *port;
1767 	streamid_t sm_id, nb_fwd_streams_new;
1768 	queueid_t q;
1769 
1770 	/* set socket id according to numa or not */
1771 	RTE_ETH_FOREACH_DEV(pid) {
1772 		port = &ports[pid];
1773 		if (nb_rxq > port->dev_info.max_rx_queues) {
1774 			fprintf(stderr,
1775 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1776 				nb_rxq, port->dev_info.max_rx_queues);
1777 			return -1;
1778 		}
1779 		if (nb_txq > port->dev_info.max_tx_queues) {
1780 			fprintf(stderr,
1781 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1782 				nb_txq, port->dev_info.max_tx_queues);
1783 			return -1;
1784 		}
1785 		if (numa_support) {
1786 			if (port_numa[pid] != NUMA_NO_CONFIG)
1787 				port->socket_id = port_numa[pid];
1788 			else {
1789 				port->socket_id = rte_eth_dev_socket_id(pid);
1790 
1791 				/*
1792 				 * if socket_id is invalid,
1793 				 * set to the first available socket.
1794 				 */
1795 				if (check_socket_id(port->socket_id) < 0)
1796 					port->socket_id = socket_ids[0];
1797 			}
1798 		}
1799 		else {
1800 			if (socket_num == UMA_NO_CONFIG)
1801 				port->socket_id = 0;
1802 			else
1803 				port->socket_id = socket_num;
1804 		}
1805 	}
1806 
1807 	q = RTE_MAX(nb_rxq, nb_txq);
1808 	if (q == 0) {
1809 		fprintf(stderr,
1810 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1811 		return -1;
1812 	}
1813 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1814 	if (nb_fwd_streams_new == nb_fwd_streams)
1815 		return 0;
1816 	/* clear the old */
1817 	if (fwd_streams != NULL) {
1818 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1819 			if (fwd_streams[sm_id] == NULL)
1820 				continue;
1821 			rte_free(fwd_streams[sm_id]);
1822 			fwd_streams[sm_id] = NULL;
1823 		}
1824 		rte_free(fwd_streams);
1825 		fwd_streams = NULL;
1826 	}
1827 
1828 	/* init new */
1829 	nb_fwd_streams = nb_fwd_streams_new;
1830 	if (nb_fwd_streams) {
1831 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1832 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1833 			RTE_CACHE_LINE_SIZE);
1834 		if (fwd_streams == NULL)
1835 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1836 				 " (struct fwd_stream *)) failed\n",
1837 				 nb_fwd_streams);
1838 
1839 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1840 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1841 				" struct fwd_stream", sizeof(struct fwd_stream),
1842 				RTE_CACHE_LINE_SIZE);
1843 			if (fwd_streams[sm_id] == NULL)
1844 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1845 					 "(struct fwd_stream) failed\n");
1846 		}
1847 	}
1848 
1849 	return 0;
1850 }
1851 
1852 static void
1853 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1854 {
1855 	uint64_t total_burst, sburst;
1856 	uint64_t nb_burst;
1857 	uint64_t burst_stats[4];
1858 	uint16_t pktnb_stats[4];
1859 	uint16_t nb_pkt;
1860 	int burst_percent[4], sburstp;
1861 	int i;
1862 
1863 	/*
1864 	 * First compute the total number of packet bursts and the
1865 	 * two highest numbers of bursts of the same number of packets.
1866 	 */
1867 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1868 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1869 
1870 	/* Show stats for 0 burst size always */
1871 	total_burst = pbs->pkt_burst_spread[0];
1872 	burst_stats[0] = pbs->pkt_burst_spread[0];
1873 	pktnb_stats[0] = 0;
1874 
1875 	/* Find the next 2 burst sizes with highest occurrences. */
1876 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1877 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1878 
1879 		if (nb_burst == 0)
1880 			continue;
1881 
1882 		total_burst += nb_burst;
1883 
1884 		if (nb_burst > burst_stats[1]) {
1885 			burst_stats[2] = burst_stats[1];
1886 			pktnb_stats[2] = pktnb_stats[1];
1887 			burst_stats[1] = nb_burst;
1888 			pktnb_stats[1] = nb_pkt;
1889 		} else if (nb_burst > burst_stats[2]) {
1890 			burst_stats[2] = nb_burst;
1891 			pktnb_stats[2] = nb_pkt;
1892 		}
1893 	}
1894 	if (total_burst == 0)
1895 		return;
1896 
1897 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1898 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1899 		if (i == 3) {
1900 			printf("%d%% of other]\n", 100 - sburstp);
1901 			return;
1902 		}
1903 
1904 		sburst += burst_stats[i];
1905 		if (sburst == total_burst) {
1906 			printf("%d%% of %d pkts]\n",
1907 				100 - sburstp, (int) pktnb_stats[i]);
1908 			return;
1909 		}
1910 
1911 		burst_percent[i] =
1912 			(double)burst_stats[i] / total_burst * 100;
1913 		printf("%d%% of %d pkts + ",
1914 			burst_percent[i], (int) pktnb_stats[i]);
1915 		sburstp += burst_percent[i];
1916 	}
1917 }
1918 
1919 static void
1920 fwd_stream_stats_display(streamid_t stream_id)
1921 {
1922 	struct fwd_stream *fs;
1923 	static const char *fwd_top_stats_border = "-------";
1924 
1925 	fs = fwd_streams[stream_id];
1926 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1927 	    (fs->fwd_dropped == 0))
1928 		return;
1929 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1930 	       "TX Port=%2d/Queue=%2d %s\n",
1931 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1932 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1933 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1934 	       " TX-dropped: %-14"PRIu64,
1935 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1936 
1937 	/* if checksum mode */
1938 	if (cur_fwd_eng == &csum_fwd_engine) {
1939 		printf("  RX- bad IP checksum: %-14"PRIu64
1940 		       "  Rx- bad L4 checksum: %-14"PRIu64
1941 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1942 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1943 			fs->rx_bad_outer_l4_csum);
1944 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1945 			fs->rx_bad_outer_ip_csum);
1946 	} else {
1947 		printf("\n");
1948 	}
1949 
1950 	if (record_burst_stats) {
1951 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1952 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1953 	}
1954 }
1955 
1956 void
1957 fwd_stats_display(void)
1958 {
1959 	static const char *fwd_stats_border = "----------------------";
1960 	static const char *acc_stats_border = "+++++++++++++++";
1961 	struct {
1962 		struct fwd_stream *rx_stream;
1963 		struct fwd_stream *tx_stream;
1964 		uint64_t tx_dropped;
1965 		uint64_t rx_bad_ip_csum;
1966 		uint64_t rx_bad_l4_csum;
1967 		uint64_t rx_bad_outer_l4_csum;
1968 		uint64_t rx_bad_outer_ip_csum;
1969 	} ports_stats[RTE_MAX_ETHPORTS];
1970 	uint64_t total_rx_dropped = 0;
1971 	uint64_t total_tx_dropped = 0;
1972 	uint64_t total_rx_nombuf = 0;
1973 	struct rte_eth_stats stats;
1974 	uint64_t fwd_cycles = 0;
1975 	uint64_t total_recv = 0;
1976 	uint64_t total_xmit = 0;
1977 	struct rte_port *port;
1978 	streamid_t sm_id;
1979 	portid_t pt_id;
1980 	int i;
1981 
1982 	memset(ports_stats, 0, sizeof(ports_stats));
1983 
1984 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1985 		struct fwd_stream *fs = fwd_streams[sm_id];
1986 
1987 		if (cur_fwd_config.nb_fwd_streams >
1988 		    cur_fwd_config.nb_fwd_ports) {
1989 			fwd_stream_stats_display(sm_id);
1990 		} else {
1991 			ports_stats[fs->tx_port].tx_stream = fs;
1992 			ports_stats[fs->rx_port].rx_stream = fs;
1993 		}
1994 
1995 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1996 
1997 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1998 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1999 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2000 				fs->rx_bad_outer_l4_csum;
2001 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2002 				fs->rx_bad_outer_ip_csum;
2003 
2004 		if (record_core_cycles)
2005 			fwd_cycles += fs->core_cycles;
2006 	}
2007 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2008 		pt_id = fwd_ports_ids[i];
2009 		port = &ports[pt_id];
2010 
2011 		rte_eth_stats_get(pt_id, &stats);
2012 		stats.ipackets -= port->stats.ipackets;
2013 		stats.opackets -= port->stats.opackets;
2014 		stats.ibytes -= port->stats.ibytes;
2015 		stats.obytes -= port->stats.obytes;
2016 		stats.imissed -= port->stats.imissed;
2017 		stats.oerrors -= port->stats.oerrors;
2018 		stats.rx_nombuf -= port->stats.rx_nombuf;
2019 
2020 		total_recv += stats.ipackets;
2021 		total_xmit += stats.opackets;
2022 		total_rx_dropped += stats.imissed;
2023 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
2024 		total_tx_dropped += stats.oerrors;
2025 		total_rx_nombuf  += stats.rx_nombuf;
2026 
2027 		printf("\n  %s Forward statistics for port %-2d %s\n",
2028 		       fwd_stats_border, pt_id, fwd_stats_border);
2029 
2030 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2031 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2032 		       stats.ipackets + stats.imissed);
2033 
2034 		if (cur_fwd_eng == &csum_fwd_engine) {
2035 			printf("  Bad-ipcsum: %-14"PRIu64
2036 			       " Bad-l4csum: %-14"PRIu64
2037 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2038 			       ports_stats[pt_id].rx_bad_ip_csum,
2039 			       ports_stats[pt_id].rx_bad_l4_csum,
2040 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2041 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2042 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2043 		}
2044 		if (stats.ierrors + stats.rx_nombuf > 0) {
2045 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2046 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2047 		}
2048 
2049 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2050 		       "TX-total: %-"PRIu64"\n",
2051 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2052 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2053 
2054 		if (record_burst_stats) {
2055 			if (ports_stats[pt_id].rx_stream)
2056 				pkt_burst_stats_display("RX",
2057 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2058 			if (ports_stats[pt_id].tx_stream)
2059 				pkt_burst_stats_display("TX",
2060 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2061 		}
2062 
2063 		printf("  %s--------------------------------%s\n",
2064 		       fwd_stats_border, fwd_stats_border);
2065 	}
2066 
2067 	printf("\n  %s Accumulated forward statistics for all ports"
2068 	       "%s\n",
2069 	       acc_stats_border, acc_stats_border);
2070 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2071 	       "%-"PRIu64"\n"
2072 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2073 	       "%-"PRIu64"\n",
2074 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2075 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2076 	if (total_rx_nombuf > 0)
2077 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2078 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2079 	       "%s\n",
2080 	       acc_stats_border, acc_stats_border);
2081 	if (record_core_cycles) {
2082 #define CYC_PER_MHZ 1E6
2083 		if (total_recv > 0 || total_xmit > 0) {
2084 			uint64_t total_pkts = 0;
2085 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2086 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2087 				total_pkts = total_xmit;
2088 			else
2089 				total_pkts = total_recv;
2090 
2091 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2092 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2093 			       " MHz Clock\n",
2094 			       (double) fwd_cycles / total_pkts,
2095 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2096 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2097 		}
2098 	}
2099 }
2100 
2101 void
2102 fwd_stats_reset(void)
2103 {
2104 	streamid_t sm_id;
2105 	portid_t pt_id;
2106 	int i;
2107 
2108 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2109 		pt_id = fwd_ports_ids[i];
2110 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2111 	}
2112 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2113 		struct fwd_stream *fs = fwd_streams[sm_id];
2114 
2115 		fs->rx_packets = 0;
2116 		fs->tx_packets = 0;
2117 		fs->fwd_dropped = 0;
2118 		fs->rx_bad_ip_csum = 0;
2119 		fs->rx_bad_l4_csum = 0;
2120 		fs->rx_bad_outer_l4_csum = 0;
2121 		fs->rx_bad_outer_ip_csum = 0;
2122 
2123 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2124 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2125 		fs->core_cycles = 0;
2126 	}
2127 }
2128 
2129 static void
2130 flush_fwd_rx_queues(void)
2131 {
2132 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2133 	portid_t  rxp;
2134 	portid_t port_id;
2135 	queueid_t rxq;
2136 	uint16_t  nb_rx;
2137 	uint16_t  i;
2138 	uint8_t   j;
2139 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2140 	uint64_t timer_period;
2141 
2142 	if (num_procs > 1) {
2143 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2144 		return;
2145 	}
2146 
2147 	/* convert to number of cycles */
2148 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2149 
2150 	for (j = 0; j < 2; j++) {
2151 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2152 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2153 				port_id = fwd_ports_ids[rxp];
2154 				/**
2155 				* testpmd can stuck in the below do while loop
2156 				* if rte_eth_rx_burst() always returns nonzero
2157 				* packets. So timer is added to exit this loop
2158 				* after 1sec timer expiry.
2159 				*/
2160 				prev_tsc = rte_rdtsc();
2161 				do {
2162 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2163 						pkts_burst, MAX_PKT_BURST);
2164 					for (i = 0; i < nb_rx; i++)
2165 						rte_pktmbuf_free(pkts_burst[i]);
2166 
2167 					cur_tsc = rte_rdtsc();
2168 					diff_tsc = cur_tsc - prev_tsc;
2169 					timer_tsc += diff_tsc;
2170 				} while ((nb_rx > 0) &&
2171 					(timer_tsc < timer_period));
2172 				timer_tsc = 0;
2173 			}
2174 		}
2175 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2176 	}
2177 }
2178 
2179 static void
2180 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2181 {
2182 	struct fwd_stream **fsm;
2183 	streamid_t nb_fs;
2184 	streamid_t sm_id;
2185 #ifdef RTE_LIB_BITRATESTATS
2186 	uint64_t tics_per_1sec;
2187 	uint64_t tics_datum;
2188 	uint64_t tics_current;
2189 	uint16_t i, cnt_ports;
2190 
2191 	cnt_ports = nb_ports;
2192 	tics_datum = rte_rdtsc();
2193 	tics_per_1sec = rte_get_timer_hz();
2194 #endif
2195 	fsm = &fwd_streams[fc->stream_idx];
2196 	nb_fs = fc->stream_nb;
2197 	do {
2198 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2199 			(*pkt_fwd)(fsm[sm_id]);
2200 #ifdef RTE_LIB_BITRATESTATS
2201 		if (bitrate_enabled != 0 &&
2202 				bitrate_lcore_id == rte_lcore_id()) {
2203 			tics_current = rte_rdtsc();
2204 			if (tics_current - tics_datum >= tics_per_1sec) {
2205 				/* Periodic bitrate calculation */
2206 				for (i = 0; i < cnt_ports; i++)
2207 					rte_stats_bitrate_calc(bitrate_data,
2208 						ports_ids[i]);
2209 				tics_datum = tics_current;
2210 			}
2211 		}
2212 #endif
2213 #ifdef RTE_LIB_LATENCYSTATS
2214 		if (latencystats_enabled != 0 &&
2215 				latencystats_lcore_id == rte_lcore_id())
2216 			rte_latencystats_update();
2217 #endif
2218 
2219 	} while (! fc->stopped);
2220 }
2221 
2222 static int
2223 start_pkt_forward_on_core(void *fwd_arg)
2224 {
2225 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2226 			     cur_fwd_config.fwd_eng->packet_fwd);
2227 	return 0;
2228 }
2229 
2230 /*
2231  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2232  * Used to start communication flows in network loopback test configurations.
2233  */
2234 static int
2235 run_one_txonly_burst_on_core(void *fwd_arg)
2236 {
2237 	struct fwd_lcore *fwd_lc;
2238 	struct fwd_lcore tmp_lcore;
2239 
2240 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2241 	tmp_lcore = *fwd_lc;
2242 	tmp_lcore.stopped = 1;
2243 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2244 	return 0;
2245 }
2246 
2247 /*
2248  * Launch packet forwarding:
2249  *     - Setup per-port forwarding context.
2250  *     - launch logical cores with their forwarding configuration.
2251  */
2252 static void
2253 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2254 {
2255 	unsigned int i;
2256 	unsigned int lc_id;
2257 	int diag;
2258 
2259 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2260 		lc_id = fwd_lcores_cpuids[i];
2261 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2262 			fwd_lcores[i]->stopped = 0;
2263 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2264 						     fwd_lcores[i], lc_id);
2265 			if (diag != 0)
2266 				fprintf(stderr,
2267 					"launch lcore %u failed - diag=%d\n",
2268 					lc_id, diag);
2269 		}
2270 	}
2271 }
2272 
2273 /*
2274  * Launch packet forwarding configuration.
2275  */
2276 void
2277 start_packet_forwarding(int with_tx_first)
2278 {
2279 	port_fwd_begin_t port_fwd_begin;
2280 	port_fwd_end_t  port_fwd_end;
2281 	unsigned int i;
2282 
2283 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2284 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2285 
2286 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2287 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2288 
2289 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2290 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2291 		(!nb_rxq || !nb_txq))
2292 		rte_exit(EXIT_FAILURE,
2293 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2294 			cur_fwd_eng->fwd_mode_name);
2295 
2296 	if (all_ports_started() == 0) {
2297 		fprintf(stderr, "Not all ports were started\n");
2298 		return;
2299 	}
2300 	if (test_done == 0) {
2301 		fprintf(stderr, "Packet forwarding already started\n");
2302 		return;
2303 	}
2304 
2305 	fwd_config_setup();
2306 
2307 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2308 	if (port_fwd_begin != NULL) {
2309 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2310 			if (port_fwd_begin(fwd_ports_ids[i])) {
2311 				fprintf(stderr,
2312 					"Packet forwarding is not ready\n");
2313 				return;
2314 			}
2315 		}
2316 	}
2317 
2318 	if (with_tx_first) {
2319 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2320 		if (port_fwd_begin != NULL) {
2321 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2322 				if (port_fwd_begin(fwd_ports_ids[i])) {
2323 					fprintf(stderr,
2324 						"Packet forwarding is not ready\n");
2325 					return;
2326 				}
2327 			}
2328 		}
2329 	}
2330 
2331 	test_done = 0;
2332 
2333 	if(!no_flush_rx)
2334 		flush_fwd_rx_queues();
2335 
2336 	pkt_fwd_config_display(&cur_fwd_config);
2337 	rxtx_config_display();
2338 
2339 	fwd_stats_reset();
2340 	if (with_tx_first) {
2341 		while (with_tx_first--) {
2342 			launch_packet_forwarding(
2343 					run_one_txonly_burst_on_core);
2344 			rte_eal_mp_wait_lcore();
2345 		}
2346 		port_fwd_end = tx_only_engine.port_fwd_end;
2347 		if (port_fwd_end != NULL) {
2348 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2349 				(*port_fwd_end)(fwd_ports_ids[i]);
2350 		}
2351 	}
2352 	launch_packet_forwarding(start_pkt_forward_on_core);
2353 }
2354 
2355 void
2356 stop_packet_forwarding(void)
2357 {
2358 	port_fwd_end_t port_fwd_end;
2359 	lcoreid_t lc_id;
2360 	portid_t pt_id;
2361 	int i;
2362 
2363 	if (test_done) {
2364 		fprintf(stderr, "Packet forwarding not started\n");
2365 		return;
2366 	}
2367 	printf("Telling cores to stop...");
2368 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2369 		fwd_lcores[lc_id]->stopped = 1;
2370 	printf("\nWaiting for lcores to finish...\n");
2371 	rte_eal_mp_wait_lcore();
2372 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2373 	if (port_fwd_end != NULL) {
2374 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2375 			pt_id = fwd_ports_ids[i];
2376 			(*port_fwd_end)(pt_id);
2377 		}
2378 	}
2379 
2380 	fwd_stats_display();
2381 
2382 	printf("\nDone.\n");
2383 	test_done = 1;
2384 }
2385 
2386 void
2387 dev_set_link_up(portid_t pid)
2388 {
2389 	if (rte_eth_dev_set_link_up(pid) < 0)
2390 		fprintf(stderr, "\nSet link up fail.\n");
2391 }
2392 
2393 void
2394 dev_set_link_down(portid_t pid)
2395 {
2396 	if (rte_eth_dev_set_link_down(pid) < 0)
2397 		fprintf(stderr, "\nSet link down fail.\n");
2398 }
2399 
2400 static int
2401 all_ports_started(void)
2402 {
2403 	portid_t pi;
2404 	struct rte_port *port;
2405 
2406 	RTE_ETH_FOREACH_DEV(pi) {
2407 		port = &ports[pi];
2408 		/* Check if there is a port which is not started */
2409 		if ((port->port_status != RTE_PORT_STARTED) &&
2410 			(port->slave_flag == 0))
2411 			return 0;
2412 	}
2413 
2414 	/* No port is not started */
2415 	return 1;
2416 }
2417 
2418 int
2419 port_is_stopped(portid_t port_id)
2420 {
2421 	struct rte_port *port = &ports[port_id];
2422 
2423 	if ((port->port_status != RTE_PORT_STOPPED) &&
2424 	    (port->slave_flag == 0))
2425 		return 0;
2426 	return 1;
2427 }
2428 
2429 int
2430 all_ports_stopped(void)
2431 {
2432 	portid_t pi;
2433 
2434 	RTE_ETH_FOREACH_DEV(pi) {
2435 		if (!port_is_stopped(pi))
2436 			return 0;
2437 	}
2438 
2439 	return 1;
2440 }
2441 
2442 int
2443 port_is_started(portid_t port_id)
2444 {
2445 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2446 		return 0;
2447 
2448 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2449 		return 0;
2450 
2451 	return 1;
2452 }
2453 
2454 /* Configure the Rx and Tx hairpin queues for the selected port. */
2455 static int
2456 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2457 {
2458 	queueid_t qi;
2459 	struct rte_eth_hairpin_conf hairpin_conf = {
2460 		.peer_count = 1,
2461 	};
2462 	int i;
2463 	int diag;
2464 	struct rte_port *port = &ports[pi];
2465 	uint16_t peer_rx_port = pi;
2466 	uint16_t peer_tx_port = pi;
2467 	uint32_t manual = 1;
2468 	uint32_t tx_exp = hairpin_mode & 0x10;
2469 
2470 	if (!(hairpin_mode & 0xf)) {
2471 		peer_rx_port = pi;
2472 		peer_tx_port = pi;
2473 		manual = 0;
2474 	} else if (hairpin_mode & 0x1) {
2475 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2476 						       RTE_ETH_DEV_NO_OWNER);
2477 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2478 			peer_tx_port = rte_eth_find_next_owned_by(0,
2479 						RTE_ETH_DEV_NO_OWNER);
2480 		if (p_pi != RTE_MAX_ETHPORTS) {
2481 			peer_rx_port = p_pi;
2482 		} else {
2483 			uint16_t next_pi;
2484 
2485 			/* Last port will be the peer RX port of the first. */
2486 			RTE_ETH_FOREACH_DEV(next_pi)
2487 				peer_rx_port = next_pi;
2488 		}
2489 		manual = 1;
2490 	} else if (hairpin_mode & 0x2) {
2491 		if (cnt_pi & 0x1) {
2492 			peer_rx_port = p_pi;
2493 		} else {
2494 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2495 						RTE_ETH_DEV_NO_OWNER);
2496 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2497 				peer_rx_port = pi;
2498 		}
2499 		peer_tx_port = peer_rx_port;
2500 		manual = 1;
2501 	}
2502 
2503 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2504 		hairpin_conf.peers[0].port = peer_rx_port;
2505 		hairpin_conf.peers[0].queue = i + nb_rxq;
2506 		hairpin_conf.manual_bind = !!manual;
2507 		hairpin_conf.tx_explicit = !!tx_exp;
2508 		diag = rte_eth_tx_hairpin_queue_setup
2509 			(pi, qi, nb_txd, &hairpin_conf);
2510 		i++;
2511 		if (diag == 0)
2512 			continue;
2513 
2514 		/* Fail to setup rx queue, return */
2515 		if (rte_atomic16_cmpset(&(port->port_status),
2516 					RTE_PORT_HANDLING,
2517 					RTE_PORT_STOPPED) == 0)
2518 			fprintf(stderr,
2519 				"Port %d can not be set back to stopped\n", pi);
2520 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2521 			pi);
2522 		/* try to reconfigure queues next time */
2523 		port->need_reconfig_queues = 1;
2524 		return -1;
2525 	}
2526 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2527 		hairpin_conf.peers[0].port = peer_tx_port;
2528 		hairpin_conf.peers[0].queue = i + nb_txq;
2529 		hairpin_conf.manual_bind = !!manual;
2530 		hairpin_conf.tx_explicit = !!tx_exp;
2531 		diag = rte_eth_rx_hairpin_queue_setup
2532 			(pi, qi, nb_rxd, &hairpin_conf);
2533 		i++;
2534 		if (diag == 0)
2535 			continue;
2536 
2537 		/* Fail to setup rx queue, return */
2538 		if (rte_atomic16_cmpset(&(port->port_status),
2539 					RTE_PORT_HANDLING,
2540 					RTE_PORT_STOPPED) == 0)
2541 			fprintf(stderr,
2542 				"Port %d can not be set back to stopped\n", pi);
2543 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2544 			pi);
2545 		/* try to reconfigure queues next time */
2546 		port->need_reconfig_queues = 1;
2547 		return -1;
2548 	}
2549 	return 0;
2550 }
2551 
2552 /* Configure the Rx with optional split. */
2553 int
2554 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2555 	       uint16_t nb_rx_desc, unsigned int socket_id,
2556 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2557 {
2558 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2559 	unsigned int i, mp_n;
2560 	int ret;
2561 
2562 	if (rx_pkt_nb_segs <= 1 ||
2563 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2564 		rx_conf->rx_seg = NULL;
2565 		rx_conf->rx_nseg = 0;
2566 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2567 					     nb_rx_desc, socket_id,
2568 					     rx_conf, mp);
2569 		return ret;
2570 	}
2571 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2572 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2573 		struct rte_mempool *mpx;
2574 		/*
2575 		 * Use last valid pool for the segments with number
2576 		 * exceeding the pool index.
2577 		 */
2578 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2579 		mpx = mbuf_pool_find(socket_id, mp_n);
2580 		/* Handle zero as mbuf data buffer size. */
2581 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2582 				   rx_pkt_seg_lengths[i] :
2583 				   mbuf_data_size[mp_n];
2584 		rx_seg->offset = i < rx_pkt_nb_offs ?
2585 				   rx_pkt_seg_offsets[i] : 0;
2586 		rx_seg->mp = mpx ? mpx : mp;
2587 	}
2588 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2589 	rx_conf->rx_seg = rx_useg;
2590 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2591 				    socket_id, rx_conf, NULL);
2592 	rx_conf->rx_seg = NULL;
2593 	rx_conf->rx_nseg = 0;
2594 	return ret;
2595 }
2596 
2597 static int
2598 alloc_xstats_display_info(portid_t pi)
2599 {
2600 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2601 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2602 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2603 
2604 	if (xstats_display_num == 0)
2605 		return 0;
2606 
2607 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2608 	if (*ids_supp == NULL)
2609 		goto fail_ids_supp;
2610 
2611 	*prev_values = calloc(xstats_display_num,
2612 			      sizeof(**prev_values));
2613 	if (*prev_values == NULL)
2614 		goto fail_prev_values;
2615 
2616 	*curr_values = calloc(xstats_display_num,
2617 			      sizeof(**curr_values));
2618 	if (*curr_values == NULL)
2619 		goto fail_curr_values;
2620 
2621 	ports[pi].xstats_info.allocated = true;
2622 
2623 	return 0;
2624 
2625 fail_curr_values:
2626 	free(*prev_values);
2627 fail_prev_values:
2628 	free(*ids_supp);
2629 fail_ids_supp:
2630 	return -ENOMEM;
2631 }
2632 
2633 static void
2634 free_xstats_display_info(portid_t pi)
2635 {
2636 	if (!ports[pi].xstats_info.allocated)
2637 		return;
2638 	free(ports[pi].xstats_info.ids_supp);
2639 	free(ports[pi].xstats_info.prev_values);
2640 	free(ports[pi].xstats_info.curr_values);
2641 	ports[pi].xstats_info.allocated = false;
2642 }
2643 
2644 /** Fill helper structures for specified port to show extended statistics. */
2645 static void
2646 fill_xstats_display_info_for_port(portid_t pi)
2647 {
2648 	unsigned int stat, stat_supp;
2649 	const char *xstat_name;
2650 	struct rte_port *port;
2651 	uint64_t *ids_supp;
2652 	int rc;
2653 
2654 	if (xstats_display_num == 0)
2655 		return;
2656 
2657 	if (pi == (portid_t)RTE_PORT_ALL) {
2658 		fill_xstats_display_info();
2659 		return;
2660 	}
2661 
2662 	port = &ports[pi];
2663 	if (port->port_status != RTE_PORT_STARTED)
2664 		return;
2665 
2666 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2667 		rte_exit(EXIT_FAILURE,
2668 			 "Failed to allocate xstats display memory\n");
2669 
2670 	ids_supp = port->xstats_info.ids_supp;
2671 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2672 		xstat_name = xstats_display[stat].name;
2673 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2674 						   ids_supp + stat_supp);
2675 		if (rc != 0) {
2676 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2677 				xstat_name, pi, stat);
2678 			continue;
2679 		}
2680 		stat_supp++;
2681 	}
2682 
2683 	port->xstats_info.ids_supp_sz = stat_supp;
2684 }
2685 
2686 /** Fill helper structures for all ports to show extended statistics. */
2687 static void
2688 fill_xstats_display_info(void)
2689 {
2690 	portid_t pi;
2691 
2692 	if (xstats_display_num == 0)
2693 		return;
2694 
2695 	RTE_ETH_FOREACH_DEV(pi)
2696 		fill_xstats_display_info_for_port(pi);
2697 }
2698 
2699 int
2700 start_port(portid_t pid)
2701 {
2702 	int diag, need_check_link_status = -1;
2703 	portid_t pi;
2704 	portid_t p_pi = RTE_MAX_ETHPORTS;
2705 	portid_t pl[RTE_MAX_ETHPORTS];
2706 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2707 	uint16_t cnt_pi = 0;
2708 	uint16_t cfg_pi = 0;
2709 	int peer_pi;
2710 	queueid_t qi;
2711 	struct rte_port *port;
2712 	struct rte_eth_hairpin_cap cap;
2713 
2714 	if (port_id_is_invalid(pid, ENABLED_WARN))
2715 		return 0;
2716 
2717 	RTE_ETH_FOREACH_DEV(pi) {
2718 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2719 			continue;
2720 
2721 		need_check_link_status = 0;
2722 		port = &ports[pi];
2723 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2724 						 RTE_PORT_HANDLING) == 0) {
2725 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2726 			continue;
2727 		}
2728 
2729 		if (port->need_reconfig > 0) {
2730 			struct rte_eth_conf dev_conf;
2731 			int k;
2732 
2733 			port->need_reconfig = 0;
2734 
2735 			if (flow_isolate_all) {
2736 				int ret = port_flow_isolate(pi, 1);
2737 				if (ret) {
2738 					fprintf(stderr,
2739 						"Failed to apply isolated mode on port %d\n",
2740 						pi);
2741 					return -1;
2742 				}
2743 			}
2744 			configure_rxtx_dump_callbacks(0);
2745 			printf("Configuring Port %d (socket %u)\n", pi,
2746 					port->socket_id);
2747 			if (nb_hairpinq > 0 &&
2748 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2749 				fprintf(stderr,
2750 					"Port %d doesn't support hairpin queues\n",
2751 					pi);
2752 				return -1;
2753 			}
2754 
2755 			/* configure port */
2756 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2757 						     nb_txq + nb_hairpinq,
2758 						     &(port->dev_conf));
2759 			if (diag != 0) {
2760 				if (rte_atomic16_cmpset(&(port->port_status),
2761 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2762 					fprintf(stderr,
2763 						"Port %d can not be set back to stopped\n",
2764 						pi);
2765 				fprintf(stderr, "Fail to configure port %d\n",
2766 					pi);
2767 				/* try to reconfigure port next time */
2768 				port->need_reconfig = 1;
2769 				return -1;
2770 			}
2771 			/* get device configuration*/
2772 			if (0 !=
2773 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
2774 				fprintf(stderr,
2775 					"port %d can not get device configuration\n",
2776 					pi);
2777 				return -1;
2778 			}
2779 			/* Apply Rx offloads configuration */
2780 			if (dev_conf.rxmode.offloads !=
2781 			    port->dev_conf.rxmode.offloads) {
2782 				port->dev_conf.rxmode.offloads |=
2783 					dev_conf.rxmode.offloads;
2784 				for (k = 0;
2785 				     k < port->dev_info.max_rx_queues;
2786 				     k++)
2787 					port->rx_conf[k].offloads |=
2788 						dev_conf.rxmode.offloads;
2789 			}
2790 			/* Apply Tx offloads configuration */
2791 			if (dev_conf.txmode.offloads !=
2792 			    port->dev_conf.txmode.offloads) {
2793 				port->dev_conf.txmode.offloads |=
2794 					dev_conf.txmode.offloads;
2795 				for (k = 0;
2796 				     k < port->dev_info.max_tx_queues;
2797 				     k++)
2798 					port->tx_conf[k].offloads |=
2799 						dev_conf.txmode.offloads;
2800 			}
2801 		}
2802 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2803 			port->need_reconfig_queues = 0;
2804 			/* setup tx queues */
2805 			for (qi = 0; qi < nb_txq; qi++) {
2806 				if ((numa_support) &&
2807 					(txring_numa[pi] != NUMA_NO_CONFIG))
2808 					diag = rte_eth_tx_queue_setup(pi, qi,
2809 						port->nb_tx_desc[qi],
2810 						txring_numa[pi],
2811 						&(port->tx_conf[qi]));
2812 				else
2813 					diag = rte_eth_tx_queue_setup(pi, qi,
2814 						port->nb_tx_desc[qi],
2815 						port->socket_id,
2816 						&(port->tx_conf[qi]));
2817 
2818 				if (diag == 0)
2819 					continue;
2820 
2821 				/* Fail to setup tx queue, return */
2822 				if (rte_atomic16_cmpset(&(port->port_status),
2823 							RTE_PORT_HANDLING,
2824 							RTE_PORT_STOPPED) == 0)
2825 					fprintf(stderr,
2826 						"Port %d can not be set back to stopped\n",
2827 						pi);
2828 				fprintf(stderr,
2829 					"Fail to configure port %d tx queues\n",
2830 					pi);
2831 				/* try to reconfigure queues next time */
2832 				port->need_reconfig_queues = 1;
2833 				return -1;
2834 			}
2835 			for (qi = 0; qi < nb_rxq; qi++) {
2836 				/* setup rx queues */
2837 				if ((numa_support) &&
2838 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2839 					struct rte_mempool * mp =
2840 						mbuf_pool_find
2841 							(rxring_numa[pi], 0);
2842 					if (mp == NULL) {
2843 						fprintf(stderr,
2844 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2845 							rxring_numa[pi]);
2846 						return -1;
2847 					}
2848 
2849 					diag = rx_queue_setup(pi, qi,
2850 					     port->nb_rx_desc[qi],
2851 					     rxring_numa[pi],
2852 					     &(port->rx_conf[qi]),
2853 					     mp);
2854 				} else {
2855 					struct rte_mempool *mp =
2856 						mbuf_pool_find
2857 							(port->socket_id, 0);
2858 					if (mp == NULL) {
2859 						fprintf(stderr,
2860 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2861 							port->socket_id);
2862 						return -1;
2863 					}
2864 					diag = rx_queue_setup(pi, qi,
2865 					     port->nb_rx_desc[qi],
2866 					     port->socket_id,
2867 					     &(port->rx_conf[qi]),
2868 					     mp);
2869 				}
2870 				if (diag == 0)
2871 					continue;
2872 
2873 				/* Fail to setup rx queue, return */
2874 				if (rte_atomic16_cmpset(&(port->port_status),
2875 							RTE_PORT_HANDLING,
2876 							RTE_PORT_STOPPED) == 0)
2877 					fprintf(stderr,
2878 						"Port %d can not be set back to stopped\n",
2879 						pi);
2880 				fprintf(stderr,
2881 					"Fail to configure port %d rx queues\n",
2882 					pi);
2883 				/* try to reconfigure queues next time */
2884 				port->need_reconfig_queues = 1;
2885 				return -1;
2886 			}
2887 			/* setup hairpin queues */
2888 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2889 				return -1;
2890 		}
2891 		configure_rxtx_dump_callbacks(verbose_level);
2892 		if (clear_ptypes) {
2893 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2894 					NULL, 0);
2895 			if (diag < 0)
2896 				fprintf(stderr,
2897 					"Port %d: Failed to disable Ptype parsing\n",
2898 					pi);
2899 		}
2900 
2901 		p_pi = pi;
2902 		cnt_pi++;
2903 
2904 		/* start port */
2905 		diag = eth_dev_start_mp(pi);
2906 		if (diag < 0) {
2907 			fprintf(stderr, "Fail to start port %d: %s\n",
2908 				pi, rte_strerror(-diag));
2909 
2910 			/* Fail to setup rx queue, return */
2911 			if (rte_atomic16_cmpset(&(port->port_status),
2912 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2913 				fprintf(stderr,
2914 					"Port %d can not be set back to stopped\n",
2915 					pi);
2916 			continue;
2917 		}
2918 
2919 		if (rte_atomic16_cmpset(&(port->port_status),
2920 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2921 			fprintf(stderr, "Port %d can not be set into started\n",
2922 				pi);
2923 
2924 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2925 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2926 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2927 
2928 		/* at least one port started, need checking link status */
2929 		need_check_link_status = 1;
2930 
2931 		pl[cfg_pi++] = pi;
2932 	}
2933 
2934 	if (need_check_link_status == 1 && !no_link_check)
2935 		check_all_ports_link_status(RTE_PORT_ALL);
2936 	else if (need_check_link_status == 0)
2937 		fprintf(stderr, "Please stop the ports first\n");
2938 
2939 	if (hairpin_mode & 0xf) {
2940 		uint16_t i;
2941 		int j;
2942 
2943 		/* bind all started hairpin ports */
2944 		for (i = 0; i < cfg_pi; i++) {
2945 			pi = pl[i];
2946 			/* bind current Tx to all peer Rx */
2947 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2948 							RTE_MAX_ETHPORTS, 1);
2949 			if (peer_pi < 0)
2950 				return peer_pi;
2951 			for (j = 0; j < peer_pi; j++) {
2952 				if (!port_is_started(peer_pl[j]))
2953 					continue;
2954 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2955 				if (diag < 0) {
2956 					fprintf(stderr,
2957 						"Error during binding hairpin Tx port %u to %u: %s\n",
2958 						pi, peer_pl[j],
2959 						rte_strerror(-diag));
2960 					return -1;
2961 				}
2962 			}
2963 			/* bind all peer Tx to current Rx */
2964 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2965 							RTE_MAX_ETHPORTS, 0);
2966 			if (peer_pi < 0)
2967 				return peer_pi;
2968 			for (j = 0; j < peer_pi; j++) {
2969 				if (!port_is_started(peer_pl[j]))
2970 					continue;
2971 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2972 				if (diag < 0) {
2973 					fprintf(stderr,
2974 						"Error during binding hairpin Tx port %u to %u: %s\n",
2975 						peer_pl[j], pi,
2976 						rte_strerror(-diag));
2977 					return -1;
2978 				}
2979 			}
2980 		}
2981 	}
2982 
2983 	fill_xstats_display_info_for_port(pid);
2984 
2985 	printf("Done\n");
2986 	return 0;
2987 }
2988 
2989 void
2990 stop_port(portid_t pid)
2991 {
2992 	portid_t pi;
2993 	struct rte_port *port;
2994 	int need_check_link_status = 0;
2995 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2996 	int peer_pi;
2997 
2998 	if (port_id_is_invalid(pid, ENABLED_WARN))
2999 		return;
3000 
3001 	printf("Stopping ports...\n");
3002 
3003 	RTE_ETH_FOREACH_DEV(pi) {
3004 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3005 			continue;
3006 
3007 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3008 			fprintf(stderr,
3009 				"Please remove port %d from forwarding configuration.\n",
3010 				pi);
3011 			continue;
3012 		}
3013 
3014 		if (port_is_bonding_slave(pi)) {
3015 			fprintf(stderr,
3016 				"Please remove port %d from bonded device.\n",
3017 				pi);
3018 			continue;
3019 		}
3020 
3021 		port = &ports[pi];
3022 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
3023 						RTE_PORT_HANDLING) == 0)
3024 			continue;
3025 
3026 		if (hairpin_mode & 0xf) {
3027 			int j;
3028 
3029 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3030 			/* unbind all peer Tx from current Rx */
3031 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3032 							RTE_MAX_ETHPORTS, 0);
3033 			if (peer_pi < 0)
3034 				continue;
3035 			for (j = 0; j < peer_pi; j++) {
3036 				if (!port_is_started(peer_pl[j]))
3037 					continue;
3038 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3039 			}
3040 		}
3041 
3042 		if (port->flow_list)
3043 			port_flow_flush(pi);
3044 
3045 		if (eth_dev_stop_mp(pi) != 0)
3046 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3047 				pi);
3048 
3049 		if (rte_atomic16_cmpset(&(port->port_status),
3050 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
3051 			fprintf(stderr, "Port %d can not be set into stopped\n",
3052 				pi);
3053 		need_check_link_status = 1;
3054 	}
3055 	if (need_check_link_status && !no_link_check)
3056 		check_all_ports_link_status(RTE_PORT_ALL);
3057 
3058 	printf("Done\n");
3059 }
3060 
3061 static void
3062 remove_invalid_ports_in(portid_t *array, portid_t *total)
3063 {
3064 	portid_t i;
3065 	portid_t new_total = 0;
3066 
3067 	for (i = 0; i < *total; i++)
3068 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3069 			array[new_total] = array[i];
3070 			new_total++;
3071 		}
3072 	*total = new_total;
3073 }
3074 
3075 static void
3076 remove_invalid_ports(void)
3077 {
3078 	remove_invalid_ports_in(ports_ids, &nb_ports);
3079 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3080 	nb_cfg_ports = nb_fwd_ports;
3081 }
3082 
3083 void
3084 close_port(portid_t pid)
3085 {
3086 	portid_t pi;
3087 	struct rte_port *port;
3088 
3089 	if (port_id_is_invalid(pid, ENABLED_WARN))
3090 		return;
3091 
3092 	printf("Closing ports...\n");
3093 
3094 	RTE_ETH_FOREACH_DEV(pi) {
3095 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3096 			continue;
3097 
3098 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3099 			fprintf(stderr,
3100 				"Please remove port %d from forwarding configuration.\n",
3101 				pi);
3102 			continue;
3103 		}
3104 
3105 		if (port_is_bonding_slave(pi)) {
3106 			fprintf(stderr,
3107 				"Please remove port %d from bonded device.\n",
3108 				pi);
3109 			continue;
3110 		}
3111 
3112 		port = &ports[pi];
3113 		if (rte_atomic16_cmpset(&(port->port_status),
3114 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
3115 			fprintf(stderr, "Port %d is already closed\n", pi);
3116 			continue;
3117 		}
3118 
3119 		if (is_proc_primary()) {
3120 			port_flow_flush(pi);
3121 			port_flex_item_flush(pi);
3122 			rte_eth_dev_close(pi);
3123 		}
3124 
3125 		free_xstats_display_info(pi);
3126 	}
3127 
3128 	remove_invalid_ports();
3129 	printf("Done\n");
3130 }
3131 
3132 void
3133 reset_port(portid_t pid)
3134 {
3135 	int diag;
3136 	portid_t pi;
3137 	struct rte_port *port;
3138 
3139 	if (port_id_is_invalid(pid, ENABLED_WARN))
3140 		return;
3141 
3142 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3143 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3144 		fprintf(stderr,
3145 			"Can not reset port(s), please stop port(s) first.\n");
3146 		return;
3147 	}
3148 
3149 	printf("Resetting ports...\n");
3150 
3151 	RTE_ETH_FOREACH_DEV(pi) {
3152 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3153 			continue;
3154 
3155 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3156 			fprintf(stderr,
3157 				"Please remove port %d from forwarding configuration.\n",
3158 				pi);
3159 			continue;
3160 		}
3161 
3162 		if (port_is_bonding_slave(pi)) {
3163 			fprintf(stderr,
3164 				"Please remove port %d from bonded device.\n",
3165 				pi);
3166 			continue;
3167 		}
3168 
3169 		diag = rte_eth_dev_reset(pi);
3170 		if (diag == 0) {
3171 			port = &ports[pi];
3172 			port->need_reconfig = 1;
3173 			port->need_reconfig_queues = 1;
3174 		} else {
3175 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3176 				pi, diag);
3177 		}
3178 	}
3179 
3180 	printf("Done\n");
3181 }
3182 
3183 void
3184 attach_port(char *identifier)
3185 {
3186 	portid_t pi;
3187 	struct rte_dev_iterator iterator;
3188 
3189 	printf("Attaching a new port...\n");
3190 
3191 	if (identifier == NULL) {
3192 		fprintf(stderr, "Invalid parameters are specified\n");
3193 		return;
3194 	}
3195 
3196 	if (rte_dev_probe(identifier) < 0) {
3197 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3198 		return;
3199 	}
3200 
3201 	/* first attach mode: event */
3202 	if (setup_on_probe_event) {
3203 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3204 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3205 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3206 					ports[pi].need_setup != 0)
3207 				setup_attached_port(pi);
3208 		return;
3209 	}
3210 
3211 	/* second attach mode: iterator */
3212 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3213 		/* setup ports matching the devargs used for probing */
3214 		if (port_is_forwarding(pi))
3215 			continue; /* port was already attached before */
3216 		setup_attached_port(pi);
3217 	}
3218 }
3219 
3220 static void
3221 setup_attached_port(portid_t pi)
3222 {
3223 	unsigned int socket_id;
3224 	int ret;
3225 
3226 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3227 	/* if socket_id is invalid, set to the first available socket. */
3228 	if (check_socket_id(socket_id) < 0)
3229 		socket_id = socket_ids[0];
3230 	reconfig(pi, socket_id);
3231 	ret = rte_eth_promiscuous_enable(pi);
3232 	if (ret != 0)
3233 		fprintf(stderr,
3234 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3235 			pi, rte_strerror(-ret));
3236 
3237 	ports_ids[nb_ports++] = pi;
3238 	fwd_ports_ids[nb_fwd_ports++] = pi;
3239 	nb_cfg_ports = nb_fwd_ports;
3240 	ports[pi].need_setup = 0;
3241 	ports[pi].port_status = RTE_PORT_STOPPED;
3242 
3243 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3244 	printf("Done\n");
3245 }
3246 
3247 static void
3248 detach_device(struct rte_device *dev)
3249 {
3250 	portid_t sibling;
3251 
3252 	if (dev == NULL) {
3253 		fprintf(stderr, "Device already removed\n");
3254 		return;
3255 	}
3256 
3257 	printf("Removing a device...\n");
3258 
3259 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3260 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3261 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3262 				fprintf(stderr, "Port %u not stopped\n",
3263 					sibling);
3264 				return;
3265 			}
3266 			port_flow_flush(sibling);
3267 		}
3268 	}
3269 
3270 	if (rte_dev_remove(dev) < 0) {
3271 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3272 		return;
3273 	}
3274 	remove_invalid_ports();
3275 
3276 	printf("Device is detached\n");
3277 	printf("Now total ports is %d\n", nb_ports);
3278 	printf("Done\n");
3279 	return;
3280 }
3281 
3282 void
3283 detach_port_device(portid_t port_id)
3284 {
3285 	int ret;
3286 	struct rte_eth_dev_info dev_info;
3287 
3288 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3289 		return;
3290 
3291 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3292 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3293 			fprintf(stderr, "Port not stopped\n");
3294 			return;
3295 		}
3296 		fprintf(stderr, "Port was not closed\n");
3297 	}
3298 
3299 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3300 	if (ret != 0) {
3301 		TESTPMD_LOG(ERR,
3302 			"Failed to get device info for port %d, not detaching\n",
3303 			port_id);
3304 		return;
3305 	}
3306 	detach_device(dev_info.device);
3307 }
3308 
3309 void
3310 detach_devargs(char *identifier)
3311 {
3312 	struct rte_dev_iterator iterator;
3313 	struct rte_devargs da;
3314 	portid_t port_id;
3315 
3316 	printf("Removing a device...\n");
3317 
3318 	memset(&da, 0, sizeof(da));
3319 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3320 		fprintf(stderr, "cannot parse identifier\n");
3321 		return;
3322 	}
3323 
3324 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3325 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3326 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3327 				fprintf(stderr, "Port %u not stopped\n",
3328 					port_id);
3329 				rte_eth_iterator_cleanup(&iterator);
3330 				rte_devargs_reset(&da);
3331 				return;
3332 			}
3333 			port_flow_flush(port_id);
3334 		}
3335 	}
3336 
3337 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3338 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3339 			    da.name, da.bus->name);
3340 		rte_devargs_reset(&da);
3341 		return;
3342 	}
3343 
3344 	remove_invalid_ports();
3345 
3346 	printf("Device %s is detached\n", identifier);
3347 	printf("Now total ports is %d\n", nb_ports);
3348 	printf("Done\n");
3349 	rte_devargs_reset(&da);
3350 }
3351 
3352 void
3353 pmd_test_exit(void)
3354 {
3355 	portid_t pt_id;
3356 	unsigned int i;
3357 	int ret;
3358 
3359 	if (test_done == 0)
3360 		stop_packet_forwarding();
3361 
3362 #ifndef RTE_EXEC_ENV_WINDOWS
3363 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3364 		if (mempools[i]) {
3365 			if (mp_alloc_type == MP_ALLOC_ANON)
3366 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3367 						     NULL);
3368 		}
3369 	}
3370 #endif
3371 	if (ports != NULL) {
3372 		no_link_check = 1;
3373 		RTE_ETH_FOREACH_DEV(pt_id) {
3374 			printf("\nStopping port %d...\n", pt_id);
3375 			fflush(stdout);
3376 			stop_port(pt_id);
3377 		}
3378 		RTE_ETH_FOREACH_DEV(pt_id) {
3379 			printf("\nShutting down port %d...\n", pt_id);
3380 			fflush(stdout);
3381 			close_port(pt_id);
3382 		}
3383 	}
3384 
3385 	if (hot_plug) {
3386 		ret = rte_dev_event_monitor_stop();
3387 		if (ret) {
3388 			RTE_LOG(ERR, EAL,
3389 				"fail to stop device event monitor.");
3390 			return;
3391 		}
3392 
3393 		ret = rte_dev_event_callback_unregister(NULL,
3394 			dev_event_callback, NULL);
3395 		if (ret < 0) {
3396 			RTE_LOG(ERR, EAL,
3397 				"fail to unregister device event callback.\n");
3398 			return;
3399 		}
3400 
3401 		ret = rte_dev_hotplug_handle_disable();
3402 		if (ret) {
3403 			RTE_LOG(ERR, EAL,
3404 				"fail to disable hotplug handling.\n");
3405 			return;
3406 		}
3407 	}
3408 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3409 		if (mempools[i])
3410 			mempool_free_mp(mempools[i]);
3411 	}
3412 	free(xstats_display);
3413 
3414 	printf("\nBye...\n");
3415 }
3416 
3417 typedef void (*cmd_func_t)(void);
3418 struct pmd_test_command {
3419 	const char *cmd_name;
3420 	cmd_func_t cmd_func;
3421 };
3422 
3423 /* Check the link status of all ports in up to 9s, and print them finally */
3424 static void
3425 check_all_ports_link_status(uint32_t port_mask)
3426 {
3427 #define CHECK_INTERVAL 100 /* 100ms */
3428 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3429 	portid_t portid;
3430 	uint8_t count, all_ports_up, print_flag = 0;
3431 	struct rte_eth_link link;
3432 	int ret;
3433 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3434 
3435 	printf("Checking link statuses...\n");
3436 	fflush(stdout);
3437 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3438 		all_ports_up = 1;
3439 		RTE_ETH_FOREACH_DEV(portid) {
3440 			if ((port_mask & (1 << portid)) == 0)
3441 				continue;
3442 			memset(&link, 0, sizeof(link));
3443 			ret = rte_eth_link_get_nowait(portid, &link);
3444 			if (ret < 0) {
3445 				all_ports_up = 0;
3446 				if (print_flag == 1)
3447 					fprintf(stderr,
3448 						"Port %u link get failed: %s\n",
3449 						portid, rte_strerror(-ret));
3450 				continue;
3451 			}
3452 			/* print link status if flag set */
3453 			if (print_flag == 1) {
3454 				rte_eth_link_to_str(link_status,
3455 					sizeof(link_status), &link);
3456 				printf("Port %d %s\n", portid, link_status);
3457 				continue;
3458 			}
3459 			/* clear all_ports_up flag if any link down */
3460 			if (link.link_status == ETH_LINK_DOWN) {
3461 				all_ports_up = 0;
3462 				break;
3463 			}
3464 		}
3465 		/* after finally printing all link status, get out */
3466 		if (print_flag == 1)
3467 			break;
3468 
3469 		if (all_ports_up == 0) {
3470 			fflush(stdout);
3471 			rte_delay_ms(CHECK_INTERVAL);
3472 		}
3473 
3474 		/* set the print_flag if all ports up or timeout */
3475 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3476 			print_flag = 1;
3477 		}
3478 
3479 		if (lsc_interrupt)
3480 			break;
3481 	}
3482 }
3483 
3484 static void
3485 rmv_port_callback(void *arg)
3486 {
3487 	int need_to_start = 0;
3488 	int org_no_link_check = no_link_check;
3489 	portid_t port_id = (intptr_t)arg;
3490 	struct rte_eth_dev_info dev_info;
3491 	int ret;
3492 
3493 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3494 
3495 	if (!test_done && port_is_forwarding(port_id)) {
3496 		need_to_start = 1;
3497 		stop_packet_forwarding();
3498 	}
3499 	no_link_check = 1;
3500 	stop_port(port_id);
3501 	no_link_check = org_no_link_check;
3502 
3503 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3504 	if (ret != 0)
3505 		TESTPMD_LOG(ERR,
3506 			"Failed to get device info for port %d, not detaching\n",
3507 			port_id);
3508 	else {
3509 		struct rte_device *device = dev_info.device;
3510 		close_port(port_id);
3511 		detach_device(device); /* might be already removed or have more ports */
3512 	}
3513 	if (need_to_start)
3514 		start_packet_forwarding(0);
3515 }
3516 
3517 /* This function is used by the interrupt thread */
3518 static int
3519 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3520 		  void *ret_param)
3521 {
3522 	RTE_SET_USED(param);
3523 	RTE_SET_USED(ret_param);
3524 
3525 	if (type >= RTE_ETH_EVENT_MAX) {
3526 		fprintf(stderr,
3527 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3528 			port_id, __func__, type);
3529 		fflush(stderr);
3530 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3531 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3532 			eth_event_desc[type]);
3533 		fflush(stdout);
3534 	}
3535 
3536 	switch (type) {
3537 	case RTE_ETH_EVENT_NEW:
3538 		ports[port_id].need_setup = 1;
3539 		ports[port_id].port_status = RTE_PORT_HANDLING;
3540 		break;
3541 	case RTE_ETH_EVENT_INTR_RMV:
3542 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3543 			break;
3544 		if (rte_eal_alarm_set(100000,
3545 				rmv_port_callback, (void *)(intptr_t)port_id))
3546 			fprintf(stderr,
3547 				"Could not set up deferred device removal\n");
3548 		break;
3549 	case RTE_ETH_EVENT_DESTROY:
3550 		ports[port_id].port_status = RTE_PORT_CLOSED;
3551 		printf("Port %u is closed\n", port_id);
3552 		break;
3553 	default:
3554 		break;
3555 	}
3556 	return 0;
3557 }
3558 
3559 static int
3560 register_eth_event_callback(void)
3561 {
3562 	int ret;
3563 	enum rte_eth_event_type event;
3564 
3565 	for (event = RTE_ETH_EVENT_UNKNOWN;
3566 			event < RTE_ETH_EVENT_MAX; event++) {
3567 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3568 				event,
3569 				eth_event_callback,
3570 				NULL);
3571 		if (ret != 0) {
3572 			TESTPMD_LOG(ERR, "Failed to register callback for "
3573 					"%s event\n", eth_event_desc[event]);
3574 			return -1;
3575 		}
3576 	}
3577 
3578 	return 0;
3579 }
3580 
3581 /* This function is used by the interrupt thread */
3582 static void
3583 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3584 			     __rte_unused void *arg)
3585 {
3586 	uint16_t port_id;
3587 	int ret;
3588 
3589 	if (type >= RTE_DEV_EVENT_MAX) {
3590 		fprintf(stderr, "%s called upon invalid event %d\n",
3591 			__func__, type);
3592 		fflush(stderr);
3593 	}
3594 
3595 	switch (type) {
3596 	case RTE_DEV_EVENT_REMOVE:
3597 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3598 			device_name);
3599 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3600 		if (ret) {
3601 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3602 				device_name);
3603 			return;
3604 		}
3605 		/*
3606 		 * Because the user's callback is invoked in eal interrupt
3607 		 * callback, the interrupt callback need to be finished before
3608 		 * it can be unregistered when detaching device. So finish
3609 		 * callback soon and use a deferred removal to detach device
3610 		 * is need. It is a workaround, once the device detaching be
3611 		 * moved into the eal in the future, the deferred removal could
3612 		 * be deleted.
3613 		 */
3614 		if (rte_eal_alarm_set(100000,
3615 				rmv_port_callback, (void *)(intptr_t)port_id))
3616 			RTE_LOG(ERR, EAL,
3617 				"Could not set up deferred device removal\n");
3618 		break;
3619 	case RTE_DEV_EVENT_ADD:
3620 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3621 			device_name);
3622 		/* TODO: After finish kernel driver binding,
3623 		 * begin to attach port.
3624 		 */
3625 		break;
3626 	default:
3627 		break;
3628 	}
3629 }
3630 
3631 static void
3632 rxtx_port_config(struct rte_port *port)
3633 {
3634 	uint16_t qid;
3635 	uint64_t offloads;
3636 
3637 	for (qid = 0; qid < nb_rxq; qid++) {
3638 		offloads = port->rx_conf[qid].offloads;
3639 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3640 		if (offloads != 0)
3641 			port->rx_conf[qid].offloads = offloads;
3642 
3643 		/* Check if any Rx parameters have been passed */
3644 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3645 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3646 
3647 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3648 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3649 
3650 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3651 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3652 
3653 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3654 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3655 
3656 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3657 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3658 
3659 		port->nb_rx_desc[qid] = nb_rxd;
3660 	}
3661 
3662 	for (qid = 0; qid < nb_txq; qid++) {
3663 		offloads = port->tx_conf[qid].offloads;
3664 		port->tx_conf[qid] = port->dev_info.default_txconf;
3665 		if (offloads != 0)
3666 			port->tx_conf[qid].offloads = offloads;
3667 
3668 		/* Check if any Tx parameters have been passed */
3669 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3670 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3671 
3672 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3673 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3674 
3675 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3676 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3677 
3678 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3679 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3680 
3681 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3682 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3683 
3684 		port->nb_tx_desc[qid] = nb_txd;
3685 	}
3686 }
3687 
3688 /*
3689  * Helper function to set MTU from frame size
3690  *
3691  * port->dev_info should be set before calling this function.
3692  *
3693  * return 0 on success, negative on error
3694  */
3695 int
3696 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3697 {
3698 	struct rte_port *port = &ports[portid];
3699 	uint32_t eth_overhead;
3700 	uint16_t mtu, new_mtu;
3701 
3702 	eth_overhead = get_eth_overhead(&port->dev_info);
3703 
3704 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3705 		printf("Failed to get MTU for port %u\n", portid);
3706 		return -1;
3707 	}
3708 
3709 	new_mtu = max_rx_pktlen - eth_overhead;
3710 
3711 	if (mtu == new_mtu)
3712 		return 0;
3713 
3714 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3715 		fprintf(stderr,
3716 			"Failed to set MTU to %u for port %u\n",
3717 			new_mtu, portid);
3718 		return -1;
3719 	}
3720 
3721 	port->dev_conf.rxmode.mtu = new_mtu;
3722 
3723 	return 0;
3724 }
3725 
3726 void
3727 init_port_config(void)
3728 {
3729 	portid_t pid;
3730 	struct rte_port *port;
3731 	int ret, i;
3732 
3733 	RTE_ETH_FOREACH_DEV(pid) {
3734 		port = &ports[pid];
3735 		port->dev_conf.fdir_conf = fdir_conf;
3736 
3737 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3738 		if (ret != 0)
3739 			return;
3740 
3741 		if (nb_rxq > 1) {
3742 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3743 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3744 				rss_hf & port->dev_info.flow_type_rss_offloads;
3745 		} else {
3746 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3747 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3748 		}
3749 
3750 		if (port->dcb_flag == 0) {
3751 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3752 				port->dev_conf.rxmode.mq_mode =
3753 					(enum rte_eth_rx_mq_mode)
3754 						(rx_mq_mode & ETH_MQ_RX_RSS);
3755 			} else {
3756 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3757 				port->dev_conf.rxmode.offloads &=
3758 						~DEV_RX_OFFLOAD_RSS_HASH;
3759 
3760 				for (i = 0;
3761 				     i < port->dev_info.nb_rx_queues;
3762 				     i++)
3763 					port->rx_conf[i].offloads &=
3764 						~DEV_RX_OFFLOAD_RSS_HASH;
3765 			}
3766 		}
3767 
3768 		rxtx_port_config(port);
3769 
3770 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3771 		if (ret != 0)
3772 			return;
3773 
3774 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3775 		rte_pmd_ixgbe_bypass_init(pid);
3776 #endif
3777 
3778 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3779 			port->dev_conf.intr_conf.lsc = 1;
3780 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3781 			port->dev_conf.intr_conf.rmv = 1;
3782 	}
3783 }
3784 
3785 void set_port_slave_flag(portid_t slave_pid)
3786 {
3787 	struct rte_port *port;
3788 
3789 	port = &ports[slave_pid];
3790 	port->slave_flag = 1;
3791 }
3792 
3793 void clear_port_slave_flag(portid_t slave_pid)
3794 {
3795 	struct rte_port *port;
3796 
3797 	port = &ports[slave_pid];
3798 	port->slave_flag = 0;
3799 }
3800 
3801 uint8_t port_is_bonding_slave(portid_t slave_pid)
3802 {
3803 	struct rte_port *port;
3804 	struct rte_eth_dev_info dev_info;
3805 	int ret;
3806 
3807 	port = &ports[slave_pid];
3808 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3809 	if (ret != 0) {
3810 		TESTPMD_LOG(ERR,
3811 			"Failed to get device info for port id %d,"
3812 			"cannot determine if the port is a bonded slave",
3813 			slave_pid);
3814 		return 0;
3815 	}
3816 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3817 		return 1;
3818 	return 0;
3819 }
3820 
3821 const uint16_t vlan_tags[] = {
3822 		0,  1,  2,  3,  4,  5,  6,  7,
3823 		8,  9, 10, 11,  12, 13, 14, 15,
3824 		16, 17, 18, 19, 20, 21, 22, 23,
3825 		24, 25, 26, 27, 28, 29, 30, 31
3826 };
3827 
3828 static  int
3829 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3830 		 enum dcb_mode_enable dcb_mode,
3831 		 enum rte_eth_nb_tcs num_tcs,
3832 		 uint8_t pfc_en)
3833 {
3834 	uint8_t i;
3835 	int32_t rc;
3836 	struct rte_eth_rss_conf rss_conf;
3837 
3838 	/*
3839 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3840 	 * given above, and the number of traffic classes available for use.
3841 	 */
3842 	if (dcb_mode == DCB_VT_ENABLED) {
3843 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3844 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3845 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3846 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3847 
3848 		/* VMDQ+DCB RX and TX configurations */
3849 		vmdq_rx_conf->enable_default_pool = 0;
3850 		vmdq_rx_conf->default_pool = 0;
3851 		vmdq_rx_conf->nb_queue_pools =
3852 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3853 		vmdq_tx_conf->nb_queue_pools =
3854 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3855 
3856 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3857 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3858 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3859 			vmdq_rx_conf->pool_map[i].pools =
3860 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3861 		}
3862 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3863 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3864 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3865 		}
3866 
3867 		/* set DCB mode of RX and TX of multiple queues */
3868 		eth_conf->rxmode.mq_mode =
3869 				(enum rte_eth_rx_mq_mode)
3870 					(rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3871 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3872 	} else {
3873 		struct rte_eth_dcb_rx_conf *rx_conf =
3874 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3875 		struct rte_eth_dcb_tx_conf *tx_conf =
3876 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3877 
3878 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3879 
3880 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3881 		if (rc != 0)
3882 			return rc;
3883 
3884 		rx_conf->nb_tcs = num_tcs;
3885 		tx_conf->nb_tcs = num_tcs;
3886 
3887 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3888 			rx_conf->dcb_tc[i] = i % num_tcs;
3889 			tx_conf->dcb_tc[i] = i % num_tcs;
3890 		}
3891 
3892 		eth_conf->rxmode.mq_mode =
3893 				(enum rte_eth_rx_mq_mode)
3894 					(rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3895 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3896 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3897 	}
3898 
3899 	if (pfc_en)
3900 		eth_conf->dcb_capability_en =
3901 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3902 	else
3903 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3904 
3905 	return 0;
3906 }
3907 
3908 int
3909 init_port_dcb_config(portid_t pid,
3910 		     enum dcb_mode_enable dcb_mode,
3911 		     enum rte_eth_nb_tcs num_tcs,
3912 		     uint8_t pfc_en)
3913 {
3914 	struct rte_eth_conf port_conf;
3915 	struct rte_port *rte_port;
3916 	int retval;
3917 	uint16_t i;
3918 
3919 	if (num_procs > 1) {
3920 		printf("The multi-process feature doesn't support dcb.\n");
3921 		return -ENOTSUP;
3922 	}
3923 	rte_port = &ports[pid];
3924 
3925 	/* retain the original device configuration. */
3926 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3927 
3928 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3929 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3930 	if (retval < 0)
3931 		return retval;
3932 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3933 
3934 	/* re-configure the device . */
3935 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3936 	if (retval < 0)
3937 		return retval;
3938 
3939 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3940 	if (retval != 0)
3941 		return retval;
3942 
3943 	/* If dev_info.vmdq_pool_base is greater than 0,
3944 	 * the queue id of vmdq pools is started after pf queues.
3945 	 */
3946 	if (dcb_mode == DCB_VT_ENABLED &&
3947 	    rte_port->dev_info.vmdq_pool_base > 0) {
3948 		fprintf(stderr,
3949 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3950 			pid);
3951 		return -1;
3952 	}
3953 
3954 	/* Assume the ports in testpmd have the same dcb capability
3955 	 * and has the same number of rxq and txq in dcb mode
3956 	 */
3957 	if (dcb_mode == DCB_VT_ENABLED) {
3958 		if (rte_port->dev_info.max_vfs > 0) {
3959 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3960 			nb_txq = rte_port->dev_info.nb_tx_queues;
3961 		} else {
3962 			nb_rxq = rte_port->dev_info.max_rx_queues;
3963 			nb_txq = rte_port->dev_info.max_tx_queues;
3964 		}
3965 	} else {
3966 		/*if vt is disabled, use all pf queues */
3967 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3968 			nb_rxq = rte_port->dev_info.max_rx_queues;
3969 			nb_txq = rte_port->dev_info.max_tx_queues;
3970 		} else {
3971 			nb_rxq = (queueid_t)num_tcs;
3972 			nb_txq = (queueid_t)num_tcs;
3973 
3974 		}
3975 	}
3976 	rx_free_thresh = 64;
3977 
3978 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3979 
3980 	rxtx_port_config(rte_port);
3981 	/* VLAN filter */
3982 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3983 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
3984 		rx_vft_set(pid, vlan_tags[i], 1);
3985 
3986 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3987 	if (retval != 0)
3988 		return retval;
3989 
3990 	rte_port->dcb_flag = 1;
3991 
3992 	/* Enter DCB configuration status */
3993 	dcb_config = 1;
3994 
3995 	return 0;
3996 }
3997 
3998 static void
3999 init_port(void)
4000 {
4001 	int i;
4002 
4003 	/* Configuration of Ethernet ports. */
4004 	ports = rte_zmalloc("testpmd: ports",
4005 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4006 			    RTE_CACHE_LINE_SIZE);
4007 	if (ports == NULL) {
4008 		rte_exit(EXIT_FAILURE,
4009 				"rte_zmalloc(%d struct rte_port) failed\n",
4010 				RTE_MAX_ETHPORTS);
4011 	}
4012 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4013 		ports[i].xstats_info.allocated = false;
4014 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4015 		LIST_INIT(&ports[i].flow_tunnel_list);
4016 	/* Initialize ports NUMA structures */
4017 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4018 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4019 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4020 }
4021 
4022 static void
4023 force_quit(void)
4024 {
4025 	pmd_test_exit();
4026 	prompt_exit();
4027 }
4028 
4029 static void
4030 print_stats(void)
4031 {
4032 	uint8_t i;
4033 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4034 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4035 
4036 	/* Clear screen and move to top left */
4037 	printf("%s%s", clr, top_left);
4038 
4039 	printf("\nPort statistics ====================================");
4040 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4041 		nic_stats_display(fwd_ports_ids[i]);
4042 
4043 	fflush(stdout);
4044 }
4045 
4046 static void
4047 signal_handler(int signum)
4048 {
4049 	if (signum == SIGINT || signum == SIGTERM) {
4050 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4051 			signum);
4052 #ifdef RTE_LIB_PDUMP
4053 		/* uninitialize packet capture framework */
4054 		rte_pdump_uninit();
4055 #endif
4056 #ifdef RTE_LIB_LATENCYSTATS
4057 		if (latencystats_enabled != 0)
4058 			rte_latencystats_uninit();
4059 #endif
4060 		force_quit();
4061 		/* Set flag to indicate the force termination. */
4062 		f_quit = 1;
4063 		/* exit with the expected status */
4064 #ifndef RTE_EXEC_ENV_WINDOWS
4065 		signal(signum, SIG_DFL);
4066 		kill(getpid(), signum);
4067 #endif
4068 	}
4069 }
4070 
4071 int
4072 main(int argc, char** argv)
4073 {
4074 	int diag;
4075 	portid_t port_id;
4076 	uint16_t count;
4077 	int ret;
4078 
4079 	signal(SIGINT, signal_handler);
4080 	signal(SIGTERM, signal_handler);
4081 
4082 	testpmd_logtype = rte_log_register("testpmd");
4083 	if (testpmd_logtype < 0)
4084 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4085 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4086 
4087 	diag = rte_eal_init(argc, argv);
4088 	if (diag < 0)
4089 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4090 			 rte_strerror(rte_errno));
4091 
4092 	ret = register_eth_event_callback();
4093 	if (ret != 0)
4094 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4095 
4096 #ifdef RTE_LIB_PDUMP
4097 	/* initialize packet capture framework */
4098 	rte_pdump_init();
4099 #endif
4100 
4101 	count = 0;
4102 	RTE_ETH_FOREACH_DEV(port_id) {
4103 		ports_ids[count] = port_id;
4104 		count++;
4105 	}
4106 	nb_ports = (portid_t) count;
4107 	if (nb_ports == 0)
4108 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4109 
4110 	/* allocate port structures, and init them */
4111 	init_port();
4112 
4113 	set_def_fwd_config();
4114 	if (nb_lcores == 0)
4115 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4116 			 "Check the core mask argument\n");
4117 
4118 	/* Bitrate/latency stats disabled by default */
4119 #ifdef RTE_LIB_BITRATESTATS
4120 	bitrate_enabled = 0;
4121 #endif
4122 #ifdef RTE_LIB_LATENCYSTATS
4123 	latencystats_enabled = 0;
4124 #endif
4125 
4126 	/* on FreeBSD, mlockall() is disabled by default */
4127 #ifdef RTE_EXEC_ENV_FREEBSD
4128 	do_mlockall = 0;
4129 #else
4130 	do_mlockall = 1;
4131 #endif
4132 
4133 	argc -= diag;
4134 	argv += diag;
4135 	if (argc > 1)
4136 		launch_args_parse(argc, argv);
4137 
4138 #ifndef RTE_EXEC_ENV_WINDOWS
4139 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4140 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4141 			strerror(errno));
4142 	}
4143 #endif
4144 
4145 	if (tx_first && interactive)
4146 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4147 				"interactive mode.\n");
4148 
4149 	if (tx_first && lsc_interrupt) {
4150 		fprintf(stderr,
4151 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4152 		lsc_interrupt = 0;
4153 	}
4154 
4155 	if (!nb_rxq && !nb_txq)
4156 		fprintf(stderr,
4157 			"Warning: Either rx or tx queues should be non-zero\n");
4158 
4159 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4160 		fprintf(stderr,
4161 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4162 			nb_rxq, nb_txq);
4163 
4164 	init_config();
4165 
4166 	if (hot_plug) {
4167 		ret = rte_dev_hotplug_handle_enable();
4168 		if (ret) {
4169 			RTE_LOG(ERR, EAL,
4170 				"fail to enable hotplug handling.");
4171 			return -1;
4172 		}
4173 
4174 		ret = rte_dev_event_monitor_start();
4175 		if (ret) {
4176 			RTE_LOG(ERR, EAL,
4177 				"fail to start device event monitoring.");
4178 			return -1;
4179 		}
4180 
4181 		ret = rte_dev_event_callback_register(NULL,
4182 			dev_event_callback, NULL);
4183 		if (ret) {
4184 			RTE_LOG(ERR, EAL,
4185 				"fail  to register device event callback\n");
4186 			return -1;
4187 		}
4188 	}
4189 
4190 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4191 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4192 
4193 	/* set all ports to promiscuous mode by default */
4194 	RTE_ETH_FOREACH_DEV(port_id) {
4195 		ret = rte_eth_promiscuous_enable(port_id);
4196 		if (ret != 0)
4197 			fprintf(stderr,
4198 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4199 				port_id, rte_strerror(-ret));
4200 	}
4201 
4202 	/* Init metrics library */
4203 	rte_metrics_init(rte_socket_id());
4204 
4205 #ifdef RTE_LIB_LATENCYSTATS
4206 	if (latencystats_enabled != 0) {
4207 		int ret = rte_latencystats_init(1, NULL);
4208 		if (ret)
4209 			fprintf(stderr,
4210 				"Warning: latencystats init() returned error %d\n",
4211 				ret);
4212 		fprintf(stderr, "Latencystats running on lcore %d\n",
4213 			latencystats_lcore_id);
4214 	}
4215 #endif
4216 
4217 	/* Setup bitrate stats */
4218 #ifdef RTE_LIB_BITRATESTATS
4219 	if (bitrate_enabled != 0) {
4220 		bitrate_data = rte_stats_bitrate_create();
4221 		if (bitrate_data == NULL)
4222 			rte_exit(EXIT_FAILURE,
4223 				"Could not allocate bitrate data.\n");
4224 		rte_stats_bitrate_reg(bitrate_data);
4225 	}
4226 #endif
4227 #ifdef RTE_LIB_CMDLINE
4228 	if (strlen(cmdline_filename) != 0)
4229 		cmdline_read_from_file(cmdline_filename);
4230 
4231 	if (interactive == 1) {
4232 		if (auto_start) {
4233 			printf("Start automatic packet forwarding\n");
4234 			start_packet_forwarding(0);
4235 		}
4236 		prompt();
4237 		pmd_test_exit();
4238 	} else
4239 #endif
4240 	{
4241 		char c;
4242 		int rc;
4243 
4244 		f_quit = 0;
4245 
4246 		printf("No commandline core given, start packet forwarding\n");
4247 		start_packet_forwarding(tx_first);
4248 		if (stats_period != 0) {
4249 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4250 			uint64_t timer_period;
4251 
4252 			/* Convert to number of cycles */
4253 			timer_period = stats_period * rte_get_timer_hz();
4254 
4255 			while (f_quit == 0) {
4256 				cur_time = rte_get_timer_cycles();
4257 				diff_time += cur_time - prev_time;
4258 
4259 				if (diff_time >= timer_period) {
4260 					print_stats();
4261 					/* Reset the timer */
4262 					diff_time = 0;
4263 				}
4264 				/* Sleep to avoid unnecessary checks */
4265 				prev_time = cur_time;
4266 				rte_delay_us_sleep(US_PER_S);
4267 			}
4268 		}
4269 
4270 		printf("Press enter to exit\n");
4271 		rc = read(0, &c, 1);
4272 		pmd_test_exit();
4273 		if (rc < 0)
4274 			return 1;
4275 	}
4276 
4277 	ret = rte_eal_cleanup();
4278 	if (ret != 0)
4279 		rte_exit(EXIT_FAILURE,
4280 			 "EAL cleanup failed: %s\n", strerror(-ret));
4281 
4282 	return EXIT_SUCCESS;
4283 }
4284