xref: /dpdk/app/test-pmd/testpmd.c (revision fb360c75062d71014c1bba90db64f493fb0ae9e2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_bus.h>
36 #include <rte_eal.h>
37 #include <rte_alarm.h>
38 #include <rte_per_lcore.h>
39 #include <rte_lcore.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_ether.h>
47 #include <rte_ethdev.h>
48 #include <rte_dev.h>
49 #include <rte_string_fns.h>
50 #ifdef RTE_NET_IXGBE
51 #include <rte_pmd_ixgbe.h>
52 #endif
53 #ifdef RTE_LIB_PDUMP
54 #include <rte_pdump.h>
55 #endif
56 #include <rte_flow.h>
57 #ifdef RTE_LIB_METRICS
58 #include <rte_metrics.h>
59 #endif
60 #ifdef RTE_LIB_BITRATESTATS
61 #include <rte_bitrate.h>
62 #endif
63 #ifdef RTE_LIB_LATENCYSTATS
64 #include <rte_latencystats.h>
65 #endif
66 #ifdef RTE_EXEC_ENV_WINDOWS
67 #include <process.h>
68 #endif
69 #ifdef RTE_NET_BOND
70 #include <rte_eth_bond.h>
71 #endif
72 #ifdef RTE_NET_MLX5
73 #include "mlx5_testpmd.h"
74 #endif
75 
76 #include "testpmd.h"
77 
78 #ifndef MAP_HUGETLB
79 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
80 #define HUGE_FLAG (0x40000)
81 #else
82 #define HUGE_FLAG MAP_HUGETLB
83 #endif
84 
85 #ifndef MAP_HUGE_SHIFT
86 /* older kernels (or FreeBSD) will not have this define */
87 #define HUGE_SHIFT (26)
88 #else
89 #define HUGE_SHIFT MAP_HUGE_SHIFT
90 #endif
91 
92 #define EXTMEM_HEAP_NAME "extmem"
93 /*
94  * Zone size with the malloc overhead (max of debug and release variants)
95  * must fit into the smallest supported hugepage size (2M),
96  * so that an IOVA-contiguous zone of this size can always be allocated
97  * if there are free 2M hugepages.
98  */
99 #define EXTBUF_ZONE_SIZE (RTE_PGSIZE_2M - 4 * RTE_CACHE_LINE_SIZE)
100 
101 uint16_t verbose_level = 0; /**< Silent by default. */
102 int testpmd_logtype; /**< Log type for testpmd logs */
103 
104 /* use main core for command line ? */
105 uint8_t interactive = 0;
106 uint8_t auto_start = 0;
107 uint8_t tx_first;
108 char cmdline_filename[PATH_MAX] = {0};
109 
110 /*
111  * NUMA support configuration.
112  * When set, the NUMA support attempts to dispatch the allocation of the
113  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
114  * probed ports among the CPU sockets 0 and 1.
115  * Otherwise, all memory is allocated from CPU socket 0.
116  */
117 uint8_t numa_support = 1; /**< numa enabled by default */
118 
119 /*
120  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
121  * not configured.
122  */
123 uint8_t socket_num = UMA_NO_CONFIG;
124 
125 /*
126  * Select mempool allocation type:
127  * - native: use regular DPDK memory
128  * - anon: use regular DPDK memory to create mempool, but populate using
129  *         anonymous memory (may not be IOVA-contiguous)
130  * - xmem: use externally allocated hugepage memory
131  */
132 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
133 
134 /*
135  * Store specified sockets on which memory pool to be used by ports
136  * is allocated.
137  */
138 uint8_t port_numa[RTE_MAX_ETHPORTS];
139 
140 /*
141  * Store specified sockets on which RX ring to be used by ports
142  * is allocated.
143  */
144 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
145 
146 /*
147  * Store specified sockets on which TX ring to be used by ports
148  * is allocated.
149  */
150 uint8_t txring_numa[RTE_MAX_ETHPORTS];
151 
152 /*
153  * Record the Ethernet address of peer target ports to which packets are
154  * forwarded.
155  * Must be instantiated with the ethernet addresses of peer traffic generator
156  * ports.
157  */
158 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
159 portid_t nb_peer_eth_addrs = 0;
160 
161 /*
162  * Probed Target Environment.
163  */
164 struct rte_port *ports;	       /**< For all probed ethernet ports. */
165 portid_t nb_ports;             /**< Number of probed ethernet ports. */
166 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
167 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
168 
169 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
170 
171 /*
172  * Test Forwarding Configuration.
173  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
174  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
175  */
176 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
177 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
178 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
179 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
180 
181 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
182 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
183 
184 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
185 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
186 
187 /*
188  * Forwarding engines.
189  */
190 struct fwd_engine * fwd_engines[] = {
191 	&io_fwd_engine,
192 	&mac_fwd_engine,
193 	&mac_swap_engine,
194 	&flow_gen_engine,
195 	&rx_only_engine,
196 	&tx_only_engine,
197 	&csum_fwd_engine,
198 	&icmp_echo_engine,
199 	&noisy_vnf_engine,
200 	&five_tuple_swap_fwd_engine,
201 #ifdef RTE_LIBRTE_IEEE1588
202 	&ieee1588_fwd_engine,
203 #endif
204 	&shared_rxq_engine,
205 	NULL,
206 };
207 
208 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
209 uint16_t mempool_flags;
210 
211 struct fwd_config cur_fwd_config;
212 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
213 uint32_t retry_enabled;
214 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
215 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
216 
217 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
218 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
219 	DEFAULT_MBUF_DATA_SIZE
220 }; /**< Mbuf data space size. */
221 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
222                                       * specified on command-line. */
223 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
224 
225 /** Extended statistics to show. */
226 struct rte_eth_xstat_name *xstats_display;
227 
228 unsigned int xstats_display_num; /**< Size of extended statistics to show */
229 
230 /*
231  * In container, it cannot terminate the process which running with 'stats-period'
232  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
233  */
234 static volatile uint8_t f_quit;
235 uint8_t cl_quit; /* Quit testpmd from cmdline. */
236 
237 /*
238  * Max Rx frame size, set by '--max-pkt-len' parameter.
239  */
240 uint32_t max_rx_pkt_len;
241 
242 /*
243  * Configuration of packet segments used to scatter received packets
244  * if some of split features is configured.
245  */
246 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
247 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
248 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
249 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
250 uint32_t rx_pkt_hdr_protos[MAX_SEGS_BUFFER_SPLIT];
251 
252 /*
253  * Configuration of packet segments used by the "txonly" processing engine.
254  */
255 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
256 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
257 	TXONLY_DEF_PACKET_LEN,
258 };
259 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
260 
261 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
262 /**< Split policy for packets to TX. */
263 
264 uint8_t txonly_multi_flow;
265 /**< Whether multiple flows are generated in TXONLY mode. */
266 
267 uint32_t tx_pkt_times_inter;
268 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
269 
270 uint32_t tx_pkt_times_intra;
271 /**< Timings for send scheduling in TXONLY mode, time between packets. */
272 
273 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
274 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
275 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
276 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
277 
278 /* current configuration is in DCB or not,0 means it is not in DCB mode */
279 uint8_t dcb_config = 0;
280 
281 /*
282  * Configurable number of RX/TX queues.
283  */
284 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
285 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
286 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
287 
288 /*
289  * Configurable number of RX/TX ring descriptors.
290  * Defaults are supplied by drivers via ethdev.
291  */
292 #define RX_DESC_DEFAULT 0
293 #define TX_DESC_DEFAULT 0
294 uint16_t nb_rxd = RX_DESC_DEFAULT; /**< Number of RX descriptors. */
295 uint16_t nb_txd = TX_DESC_DEFAULT; /**< Number of TX descriptors. */
296 
297 #define RTE_PMD_PARAM_UNSET -1
298 /*
299  * Configurable values of RX and TX ring threshold registers.
300  */
301 
302 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
303 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
304 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
305 
306 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
307 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
308 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
309 
310 /*
311  * Configurable value of RX free threshold.
312  */
313 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
314 
315 /*
316  * Configurable value of RX drop enable.
317  */
318 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
319 
320 /*
321  * Configurable value of TX free threshold.
322  */
323 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
324 
325 /*
326  * Configurable value of TX RS bit threshold.
327  */
328 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
329 
330 /*
331  * Configurable value of buffered packets before sending.
332  */
333 uint16_t noisy_tx_sw_bufsz;
334 
335 /*
336  * Configurable value of packet buffer timeout.
337  */
338 uint16_t noisy_tx_sw_buf_flush_time;
339 
340 /*
341  * Configurable value for size of VNF internal memory area
342  * used for simulating noisy neighbour behaviour
343  */
344 uint64_t noisy_lkup_mem_sz;
345 
346 /*
347  * Configurable value of number of random writes done in
348  * VNF simulation memory area.
349  */
350 uint64_t noisy_lkup_num_writes;
351 
352 /*
353  * Configurable value of number of random reads done in
354  * VNF simulation memory area.
355  */
356 uint64_t noisy_lkup_num_reads;
357 
358 /*
359  * Configurable value of number of random reads/writes done in
360  * VNF simulation memory area.
361  */
362 uint64_t noisy_lkup_num_reads_writes;
363 
364 /*
365  * Receive Side Scaling (RSS) configuration.
366  */
367 uint64_t rss_hf = RTE_ETH_RSS_IP; /* RSS IP by default. */
368 
369 /*
370  * Port topology configuration
371  */
372 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
373 
374 /*
375  * Avoids to flush all the RX streams before starts forwarding.
376  */
377 uint8_t no_flush_rx = 0; /* flush by default */
378 
379 /*
380  * Flow API isolated mode.
381  */
382 uint8_t flow_isolate_all;
383 
384 /*
385  * Avoids to check link status when starting/stopping a port.
386  */
387 uint8_t no_link_check = 0; /* check by default */
388 
389 /*
390  * Don't automatically start all ports in interactive mode.
391  */
392 uint8_t no_device_start = 0;
393 
394 /*
395  * Enable link status change notification
396  */
397 uint8_t lsc_interrupt = 1; /* enabled by default */
398 
399 /*
400  * Enable device removal notification.
401  */
402 uint8_t rmv_interrupt = 1; /* enabled by default */
403 
404 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
405 
406 /* After attach, port setup is called on event or by iterator */
407 bool setup_on_probe_event = true;
408 
409 /* Clear ptypes on port initialization. */
410 uint8_t clear_ptypes = true;
411 
412 /* Hairpin ports configuration mode. */
413 uint32_t hairpin_mode;
414 
415 /* Pretty printing of ethdev events */
416 static const char * const eth_event_desc[] = {
417 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
418 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
419 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
420 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
421 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
422 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
423 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
424 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
425 	[RTE_ETH_EVENT_NEW] = "device probed",
426 	[RTE_ETH_EVENT_DESTROY] = "device released",
427 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
428 	[RTE_ETH_EVENT_RX_AVAIL_THRESH] = "RxQ available descriptors threshold reached",
429 	[RTE_ETH_EVENT_ERR_RECOVERING] = "error recovering",
430 	[RTE_ETH_EVENT_RECOVERY_SUCCESS] = "error recovery successful",
431 	[RTE_ETH_EVENT_RECOVERY_FAILED] = "error recovery failed",
432 	[RTE_ETH_EVENT_MAX] = NULL,
433 };
434 
435 /*
436  * Display or mask ether events
437  * Default to all events except VF_MBOX
438  */
439 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
440 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
441 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
442 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
443 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
444 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
445 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
446 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED) |
447 			    (UINT32_C(1) << RTE_ETH_EVENT_ERR_RECOVERING) |
448 			    (UINT32_C(1) << RTE_ETH_EVENT_RECOVERY_SUCCESS) |
449 			    (UINT32_C(1) << RTE_ETH_EVENT_RECOVERY_FAILED);
450 /*
451  * Decide if all memory are locked for performance.
452  */
453 int do_mlockall = 0;
454 
455 #ifdef RTE_LIB_LATENCYSTATS
456 
457 /*
458  * Set when latency stats is enabled in the commandline
459  */
460 uint8_t latencystats_enabled;
461 
462 /*
463  * Lcore ID to service latency statistics.
464  */
465 lcoreid_t latencystats_lcore_id = -1;
466 
467 #endif
468 
469 /*
470  * Ethernet device configuration.
471  */
472 struct rte_eth_rxmode rx_mode;
473 
474 struct rte_eth_txmode tx_mode = {
475 	.offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
476 };
477 
478 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
479 
480 /*
481  * Display zero values by default for xstats
482  */
483 uint8_t xstats_hide_zero;
484 
485 /*
486  * Measure of CPU cycles disabled by default
487  */
488 uint8_t record_core_cycles;
489 
490 /*
491  * Display of RX and TX bursts disabled by default
492  */
493 uint8_t record_burst_stats;
494 
495 /*
496  * Number of ports per shared Rx queue group, 0 disable.
497  */
498 uint32_t rxq_share;
499 
500 unsigned int num_sockets = 0;
501 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
502 
503 #ifdef RTE_LIB_BITRATESTATS
504 /* Bitrate statistics */
505 struct rte_stats_bitrates *bitrate_data;
506 lcoreid_t bitrate_lcore_id;
507 uint8_t bitrate_enabled;
508 #endif
509 
510 #ifdef RTE_LIB_GRO
511 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
512 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
513 #endif
514 
515 /*
516  * hexadecimal bitmask of RX mq mode can be enabled.
517  */
518 enum rte_eth_rx_mq_mode rx_mq_mode = RTE_ETH_MQ_RX_VMDQ_DCB_RSS;
519 
520 /*
521  * Used to set forced link speed
522  */
523 uint32_t eth_link_speed;
524 
525 /*
526  * ID of the current process in multi-process, used to
527  * configure the queues to be polled.
528  */
529 int proc_id;
530 
531 /*
532  * Number of processes in multi-process, used to
533  * configure the queues to be polled.
534  */
535 unsigned int num_procs = 1;
536 
537 static void
538 eth_rx_metadata_negotiate_mp(uint16_t port_id)
539 {
540 	uint64_t rx_meta_features = 0;
541 	int ret;
542 
543 	if (!is_proc_primary())
544 		return;
545 
546 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
547 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
548 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
549 
550 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
551 	if (ret == 0) {
552 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
553 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
554 				    port_id);
555 		}
556 
557 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
558 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
559 				    port_id);
560 		}
561 
562 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
563 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
564 				    port_id);
565 		}
566 	} else if (ret != -ENOTSUP) {
567 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
568 			 port_id, rte_strerror(-ret));
569 	}
570 }
571 
572 static int
573 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
574 		      const struct rte_eth_conf *dev_conf)
575 {
576 	if (is_proc_primary())
577 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
578 					dev_conf);
579 	return 0;
580 }
581 
582 static int
583 change_bonding_slave_port_status(portid_t bond_pid, bool is_stop)
584 {
585 #ifdef RTE_NET_BOND
586 
587 	portid_t slave_pids[RTE_MAX_ETHPORTS];
588 	struct rte_port *port;
589 	int num_slaves;
590 	portid_t slave_pid;
591 	int i;
592 
593 	num_slaves = rte_eth_bond_slaves_get(bond_pid, slave_pids,
594 						RTE_MAX_ETHPORTS);
595 	if (num_slaves < 0) {
596 		fprintf(stderr, "Failed to get slave list for port = %u\n",
597 			bond_pid);
598 		return num_slaves;
599 	}
600 
601 	for (i = 0; i < num_slaves; i++) {
602 		slave_pid = slave_pids[i];
603 		port = &ports[slave_pid];
604 		port->port_status =
605 			is_stop ? RTE_PORT_STOPPED : RTE_PORT_STARTED;
606 	}
607 #else
608 	RTE_SET_USED(bond_pid);
609 	RTE_SET_USED(is_stop);
610 #endif
611 	return 0;
612 }
613 
614 static int
615 eth_dev_start_mp(uint16_t port_id)
616 {
617 	int ret;
618 
619 	if (is_proc_primary()) {
620 		ret = rte_eth_dev_start(port_id);
621 		if (ret != 0)
622 			return ret;
623 
624 		struct rte_port *port = &ports[port_id];
625 
626 		/*
627 		 * Starting a bonded port also starts all slaves under the bonded
628 		 * device. So if this port is bond device, we need to modify the
629 		 * port status of these slaves.
630 		 */
631 		if (port->bond_flag == 1)
632 			return change_bonding_slave_port_status(port_id, false);
633 	}
634 
635 	return 0;
636 }
637 
638 static int
639 eth_dev_stop_mp(uint16_t port_id)
640 {
641 	int ret;
642 
643 	if (is_proc_primary()) {
644 		ret = rte_eth_dev_stop(port_id);
645 		if (ret != 0)
646 			return ret;
647 
648 		struct rte_port *port = &ports[port_id];
649 
650 		/*
651 		 * Stopping a bonded port also stops all slaves under the bonded
652 		 * device. So if this port is bond device, we need to modify the
653 		 * port status of these slaves.
654 		 */
655 		if (port->bond_flag == 1)
656 			return change_bonding_slave_port_status(port_id, true);
657 	}
658 
659 	return 0;
660 }
661 
662 static void
663 mempool_free_mp(struct rte_mempool *mp)
664 {
665 	if (is_proc_primary())
666 		rte_mempool_free(mp);
667 }
668 
669 static int
670 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
671 {
672 	if (is_proc_primary())
673 		return rte_eth_dev_set_mtu(port_id, mtu);
674 
675 	return 0;
676 }
677 
678 /* Forward function declarations */
679 static void setup_attached_port(portid_t pi);
680 static void check_all_ports_link_status(uint32_t port_mask);
681 static int eth_event_callback(portid_t port_id,
682 			      enum rte_eth_event_type type,
683 			      void *param, void *ret_param);
684 static void dev_event_callback(const char *device_name,
685 				enum rte_dev_event_type type,
686 				void *param);
687 static void fill_xstats_display_info(void);
688 
689 /*
690  * Check if all the ports are started.
691  * If yes, return positive value. If not, return zero.
692  */
693 static int all_ports_started(void);
694 
695 #ifdef RTE_LIB_GSO
696 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
697 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
698 #endif
699 
700 /* Holds the registered mbuf dynamic flags names. */
701 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
702 
703 
704 /*
705  * Helper function to check if socket is already discovered.
706  * If yes, return positive value. If not, return zero.
707  */
708 int
709 new_socket_id(unsigned int socket_id)
710 {
711 	unsigned int i;
712 
713 	for (i = 0; i < num_sockets; i++) {
714 		if (socket_ids[i] == socket_id)
715 			return 0;
716 	}
717 	return 1;
718 }
719 
720 /*
721  * Setup default configuration.
722  */
723 static void
724 set_default_fwd_lcores_config(void)
725 {
726 	unsigned int i;
727 	unsigned int nb_lc;
728 	unsigned int sock_num;
729 
730 	nb_lc = 0;
731 	for (i = 0; i < RTE_MAX_LCORE; i++) {
732 		if (!rte_lcore_is_enabled(i))
733 			continue;
734 		sock_num = rte_lcore_to_socket_id(i);
735 		if (new_socket_id(sock_num)) {
736 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
737 				rte_exit(EXIT_FAILURE,
738 					 "Total sockets greater than %u\n",
739 					 RTE_MAX_NUMA_NODES);
740 			}
741 			socket_ids[num_sockets++] = sock_num;
742 		}
743 		if (i == rte_get_main_lcore())
744 			continue;
745 		fwd_lcores_cpuids[nb_lc++] = i;
746 	}
747 	nb_lcores = (lcoreid_t) nb_lc;
748 	nb_cfg_lcores = nb_lcores;
749 	nb_fwd_lcores = 1;
750 }
751 
752 static void
753 set_def_peer_eth_addrs(void)
754 {
755 	portid_t i;
756 
757 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
758 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
759 		peer_eth_addrs[i].addr_bytes[5] = i;
760 	}
761 }
762 
763 static void
764 set_default_fwd_ports_config(void)
765 {
766 	portid_t pt_id;
767 	int i = 0;
768 
769 	RTE_ETH_FOREACH_DEV(pt_id) {
770 		fwd_ports_ids[i++] = pt_id;
771 
772 		/* Update sockets info according to the attached device */
773 		int socket_id = rte_eth_dev_socket_id(pt_id);
774 		if (socket_id >= 0 && new_socket_id(socket_id)) {
775 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
776 				rte_exit(EXIT_FAILURE,
777 					 "Total sockets greater than %u\n",
778 					 RTE_MAX_NUMA_NODES);
779 			}
780 			socket_ids[num_sockets++] = socket_id;
781 		}
782 	}
783 
784 	nb_cfg_ports = nb_ports;
785 	nb_fwd_ports = nb_ports;
786 }
787 
788 void
789 set_def_fwd_config(void)
790 {
791 	set_default_fwd_lcores_config();
792 	set_def_peer_eth_addrs();
793 	set_default_fwd_ports_config();
794 }
795 
796 #ifndef RTE_EXEC_ENV_WINDOWS
797 /* extremely pessimistic estimation of memory required to create a mempool */
798 static int
799 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
800 {
801 	unsigned int n_pages, mbuf_per_pg, leftover;
802 	uint64_t total_mem, mbuf_mem, obj_sz;
803 
804 	/* there is no good way to predict how much space the mempool will
805 	 * occupy because it will allocate chunks on the fly, and some of those
806 	 * will come from default DPDK memory while some will come from our
807 	 * external memory, so just assume 128MB will be enough for everyone.
808 	 */
809 	uint64_t hdr_mem = 128 << 20;
810 
811 	/* account for possible non-contiguousness */
812 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
813 	if (obj_sz > pgsz) {
814 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
815 		return -1;
816 	}
817 
818 	mbuf_per_pg = pgsz / obj_sz;
819 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
820 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
821 
822 	mbuf_mem = n_pages * pgsz;
823 
824 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
825 
826 	if (total_mem > SIZE_MAX) {
827 		TESTPMD_LOG(ERR, "Memory size too big\n");
828 		return -1;
829 	}
830 	*out = (size_t)total_mem;
831 
832 	return 0;
833 }
834 
835 static int
836 pagesz_flags(uint64_t page_sz)
837 {
838 	/* as per mmap() manpage, all page sizes are log2 of page size
839 	 * shifted by MAP_HUGE_SHIFT
840 	 */
841 	int log2 = rte_log2_u64(page_sz);
842 
843 	return (log2 << HUGE_SHIFT);
844 }
845 
846 static void *
847 alloc_mem(size_t memsz, size_t pgsz, bool huge)
848 {
849 	void *addr;
850 	int flags;
851 
852 	/* allocate anonymous hugepages */
853 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
854 	if (huge)
855 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
856 
857 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
858 	if (addr == MAP_FAILED)
859 		return NULL;
860 
861 	return addr;
862 }
863 
864 struct extmem_param {
865 	void *addr;
866 	size_t len;
867 	size_t pgsz;
868 	rte_iova_t *iova_table;
869 	unsigned int iova_table_len;
870 };
871 
872 static int
873 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
874 		bool huge)
875 {
876 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
877 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
878 	unsigned int cur_page, n_pages, pgsz_idx;
879 	size_t mem_sz, cur_pgsz;
880 	rte_iova_t *iovas = NULL;
881 	void *addr;
882 	int ret;
883 
884 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
885 		/* skip anything that is too big */
886 		if (pgsizes[pgsz_idx] > SIZE_MAX)
887 			continue;
888 
889 		cur_pgsz = pgsizes[pgsz_idx];
890 
891 		/* if we were told not to allocate hugepages, override */
892 		if (!huge)
893 			cur_pgsz = sysconf(_SC_PAGESIZE);
894 
895 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
896 		if (ret < 0) {
897 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
898 			return -1;
899 		}
900 
901 		/* allocate our memory */
902 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
903 
904 		/* if we couldn't allocate memory with a specified page size,
905 		 * that doesn't mean we can't do it with other page sizes, so
906 		 * try another one.
907 		 */
908 		if (addr == NULL)
909 			continue;
910 
911 		/* store IOVA addresses for every page in this memory area */
912 		n_pages = mem_sz / cur_pgsz;
913 
914 		iovas = malloc(sizeof(*iovas) * n_pages);
915 
916 		if (iovas == NULL) {
917 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
918 			goto fail;
919 		}
920 		/* lock memory if it's not huge pages */
921 		if (!huge)
922 			mlock(addr, mem_sz);
923 
924 		/* populate IOVA addresses */
925 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
926 			rte_iova_t iova;
927 			size_t offset;
928 			void *cur;
929 
930 			offset = cur_pgsz * cur_page;
931 			cur = RTE_PTR_ADD(addr, offset);
932 
933 			/* touch the page before getting its IOVA */
934 			*(volatile char *)cur = 0;
935 
936 			iova = rte_mem_virt2iova(cur);
937 
938 			iovas[cur_page] = iova;
939 		}
940 
941 		break;
942 	}
943 	/* if we couldn't allocate anything */
944 	if (iovas == NULL)
945 		return -1;
946 
947 	param->addr = addr;
948 	param->len = mem_sz;
949 	param->pgsz = cur_pgsz;
950 	param->iova_table = iovas;
951 	param->iova_table_len = n_pages;
952 
953 	return 0;
954 fail:
955 	free(iovas);
956 	if (addr)
957 		munmap(addr, mem_sz);
958 
959 	return -1;
960 }
961 
962 static int
963 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
964 {
965 	struct extmem_param param;
966 	int socket_id, ret;
967 
968 	memset(&param, 0, sizeof(param));
969 
970 	/* check if our heap exists */
971 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
972 	if (socket_id < 0) {
973 		/* create our heap */
974 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
975 		if (ret < 0) {
976 			TESTPMD_LOG(ERR, "Cannot create heap\n");
977 			return -1;
978 		}
979 	}
980 
981 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
982 	if (ret < 0) {
983 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
984 		return -1;
985 	}
986 
987 	/* we now have a valid memory area, so add it to heap */
988 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
989 			param.addr, param.len, param.iova_table,
990 			param.iova_table_len, param.pgsz);
991 
992 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
993 
994 	/* not needed any more */
995 	free(param.iova_table);
996 
997 	if (ret < 0) {
998 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
999 		munmap(param.addr, param.len);
1000 		return -1;
1001 	}
1002 
1003 	/* success */
1004 
1005 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
1006 			param.len >> 20);
1007 
1008 	return 0;
1009 }
1010 static void
1011 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1012 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1013 {
1014 	uint16_t pid = 0;
1015 	int ret;
1016 
1017 	RTE_ETH_FOREACH_DEV(pid) {
1018 		struct rte_eth_dev_info dev_info;
1019 
1020 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1021 		if (ret != 0) {
1022 			TESTPMD_LOG(DEBUG,
1023 				    "unable to get device info for port %d on addr 0x%p,"
1024 				    "mempool unmapping will not be performed\n",
1025 				    pid, memhdr->addr);
1026 			continue;
1027 		}
1028 
1029 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
1030 		if (ret) {
1031 			TESTPMD_LOG(DEBUG,
1032 				    "unable to DMA unmap addr 0x%p "
1033 				    "for device %s\n",
1034 				    memhdr->addr, rte_dev_name(dev_info.device));
1035 		}
1036 	}
1037 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
1038 	if (ret) {
1039 		TESTPMD_LOG(DEBUG,
1040 			    "unable to un-register addr 0x%p\n", memhdr->addr);
1041 	}
1042 }
1043 
1044 static void
1045 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1046 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1047 {
1048 	uint16_t pid = 0;
1049 	size_t page_size = sysconf(_SC_PAGESIZE);
1050 	int ret;
1051 
1052 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1053 				  page_size);
1054 	if (ret) {
1055 		TESTPMD_LOG(DEBUG,
1056 			    "unable to register addr 0x%p\n", memhdr->addr);
1057 		return;
1058 	}
1059 	RTE_ETH_FOREACH_DEV(pid) {
1060 		struct rte_eth_dev_info dev_info;
1061 
1062 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1063 		if (ret != 0) {
1064 			TESTPMD_LOG(DEBUG,
1065 				    "unable to get device info for port %d on addr 0x%p,"
1066 				    "mempool mapping will not be performed\n",
1067 				    pid, memhdr->addr);
1068 			continue;
1069 		}
1070 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1071 		if (ret) {
1072 			TESTPMD_LOG(DEBUG,
1073 				    "unable to DMA map addr 0x%p "
1074 				    "for device %s\n",
1075 				    memhdr->addr, rte_dev_name(dev_info.device));
1076 		}
1077 	}
1078 }
1079 #endif
1080 
1081 static unsigned int
1082 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1083 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1084 {
1085 	struct rte_pktmbuf_extmem *xmem;
1086 	unsigned int ext_num, zone_num, elt_num;
1087 	uint16_t elt_size;
1088 
1089 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1090 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1091 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1092 
1093 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1094 	if (xmem == NULL) {
1095 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1096 				 "external buffer descriptors\n");
1097 		*ext_mem = NULL;
1098 		return 0;
1099 	}
1100 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1101 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1102 		const struct rte_memzone *mz;
1103 		char mz_name[RTE_MEMZONE_NAMESIZE];
1104 		int ret;
1105 
1106 		ret = snprintf(mz_name, sizeof(mz_name),
1107 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1108 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1109 			errno = ENAMETOOLONG;
1110 			ext_num = 0;
1111 			break;
1112 		}
1113 		mz = rte_memzone_reserve(mz_name, EXTBUF_ZONE_SIZE,
1114 					 socket_id,
1115 					 RTE_MEMZONE_IOVA_CONTIG |
1116 					 RTE_MEMZONE_1GB |
1117 					 RTE_MEMZONE_SIZE_HINT_ONLY);
1118 		if (mz == NULL) {
1119 			/*
1120 			 * The caller exits on external buffer creation
1121 			 * error, so there is no need to free memzones.
1122 			 */
1123 			errno = ENOMEM;
1124 			ext_num = 0;
1125 			break;
1126 		}
1127 		xseg->buf_ptr = mz->addr;
1128 		xseg->buf_iova = mz->iova;
1129 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1130 		xseg->elt_size = elt_size;
1131 	}
1132 	if (ext_num == 0 && xmem != NULL) {
1133 		free(xmem);
1134 		xmem = NULL;
1135 	}
1136 	*ext_mem = xmem;
1137 	return ext_num;
1138 }
1139 
1140 /*
1141  * Configuration initialisation done once at init time.
1142  */
1143 static struct rte_mempool *
1144 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1145 		 unsigned int socket_id, uint16_t size_idx)
1146 {
1147 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1148 	struct rte_mempool *rte_mp = NULL;
1149 #ifndef RTE_EXEC_ENV_WINDOWS
1150 	uint32_t mb_size;
1151 
1152 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1153 #endif
1154 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1155 	if (!is_proc_primary()) {
1156 		rte_mp = rte_mempool_lookup(pool_name);
1157 		if (rte_mp == NULL)
1158 			rte_exit(EXIT_FAILURE,
1159 				"Get mbuf pool for socket %u failed: %s\n",
1160 				socket_id, rte_strerror(rte_errno));
1161 		return rte_mp;
1162 	}
1163 
1164 	TESTPMD_LOG(INFO,
1165 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1166 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1167 
1168 	switch (mp_alloc_type) {
1169 	case MP_ALLOC_NATIVE:
1170 		{
1171 			/* wrapper to rte_mempool_create() */
1172 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1173 					rte_mbuf_best_mempool_ops());
1174 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1175 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1176 			break;
1177 		}
1178 #ifndef RTE_EXEC_ENV_WINDOWS
1179 	case MP_ALLOC_ANON:
1180 		{
1181 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1182 				mb_size, (unsigned int) mb_mempool_cache,
1183 				sizeof(struct rte_pktmbuf_pool_private),
1184 				socket_id, mempool_flags);
1185 			if (rte_mp == NULL)
1186 				goto err;
1187 
1188 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1189 				rte_mempool_free(rte_mp);
1190 				rte_mp = NULL;
1191 				goto err;
1192 			}
1193 			rte_pktmbuf_pool_init(rte_mp, NULL);
1194 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1195 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1196 			break;
1197 		}
1198 	case MP_ALLOC_XMEM:
1199 	case MP_ALLOC_XMEM_HUGE:
1200 		{
1201 			int heap_socket;
1202 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1203 
1204 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1205 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1206 
1207 			heap_socket =
1208 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1209 			if (heap_socket < 0)
1210 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1211 
1212 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1213 					rte_mbuf_best_mempool_ops());
1214 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1215 					mb_mempool_cache, 0, mbuf_seg_size,
1216 					heap_socket);
1217 			break;
1218 		}
1219 #endif
1220 	case MP_ALLOC_XBUF:
1221 		{
1222 			struct rte_pktmbuf_extmem *ext_mem;
1223 			unsigned int ext_num;
1224 
1225 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1226 					       socket_id, pool_name, &ext_mem);
1227 			if (ext_num == 0)
1228 				rte_exit(EXIT_FAILURE,
1229 					 "Can't create pinned data buffers\n");
1230 
1231 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1232 					rte_mbuf_best_mempool_ops());
1233 			rte_mp = rte_pktmbuf_pool_create_extbuf
1234 					(pool_name, nb_mbuf, mb_mempool_cache,
1235 					 0, mbuf_seg_size, socket_id,
1236 					 ext_mem, ext_num);
1237 			free(ext_mem);
1238 			break;
1239 		}
1240 	default:
1241 		{
1242 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1243 		}
1244 	}
1245 
1246 #ifndef RTE_EXEC_ENV_WINDOWS
1247 err:
1248 #endif
1249 	if (rte_mp == NULL) {
1250 		rte_exit(EXIT_FAILURE,
1251 			"Creation of mbuf pool for socket %u failed: %s\n",
1252 			socket_id, rte_strerror(rte_errno));
1253 	} else if (verbose_level > 0) {
1254 		rte_mempool_dump(stdout, rte_mp);
1255 	}
1256 	return rte_mp;
1257 }
1258 
1259 /*
1260  * Check given socket id is valid or not with NUMA mode,
1261  * if valid, return 0, else return -1
1262  */
1263 static int
1264 check_socket_id(const unsigned int socket_id)
1265 {
1266 	static int warning_once = 0;
1267 
1268 	if (new_socket_id(socket_id)) {
1269 		if (!warning_once && numa_support)
1270 			fprintf(stderr,
1271 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1272 		warning_once = 1;
1273 		return -1;
1274 	}
1275 	return 0;
1276 }
1277 
1278 /*
1279  * Get the allowed maximum number of RX queues.
1280  * *pid return the port id which has minimal value of
1281  * max_rx_queues in all ports.
1282  */
1283 queueid_t
1284 get_allowed_max_nb_rxq(portid_t *pid)
1285 {
1286 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1287 	bool max_rxq_valid = false;
1288 	portid_t pi;
1289 	struct rte_eth_dev_info dev_info;
1290 
1291 	RTE_ETH_FOREACH_DEV(pi) {
1292 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1293 			continue;
1294 
1295 		max_rxq_valid = true;
1296 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1297 			allowed_max_rxq = dev_info.max_rx_queues;
1298 			*pid = pi;
1299 		}
1300 	}
1301 	return max_rxq_valid ? allowed_max_rxq : 0;
1302 }
1303 
1304 /*
1305  * Check input rxq is valid or not.
1306  * If input rxq is not greater than any of maximum number
1307  * of RX queues of all ports, it is valid.
1308  * if valid, return 0, else return -1
1309  */
1310 int
1311 check_nb_rxq(queueid_t rxq)
1312 {
1313 	queueid_t allowed_max_rxq;
1314 	portid_t pid = 0;
1315 
1316 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1317 	if (rxq > allowed_max_rxq) {
1318 		fprintf(stderr,
1319 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1320 			rxq, allowed_max_rxq, pid);
1321 		return -1;
1322 	}
1323 	return 0;
1324 }
1325 
1326 /*
1327  * Get the allowed maximum number of TX queues.
1328  * *pid return the port id which has minimal value of
1329  * max_tx_queues in all ports.
1330  */
1331 queueid_t
1332 get_allowed_max_nb_txq(portid_t *pid)
1333 {
1334 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1335 	bool max_txq_valid = false;
1336 	portid_t pi;
1337 	struct rte_eth_dev_info dev_info;
1338 
1339 	RTE_ETH_FOREACH_DEV(pi) {
1340 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1341 			continue;
1342 
1343 		max_txq_valid = true;
1344 		if (dev_info.max_tx_queues < allowed_max_txq) {
1345 			allowed_max_txq = dev_info.max_tx_queues;
1346 			*pid = pi;
1347 		}
1348 	}
1349 	return max_txq_valid ? allowed_max_txq : 0;
1350 }
1351 
1352 /*
1353  * Check input txq is valid or not.
1354  * If input txq is not greater than any of maximum number
1355  * of TX queues of all ports, it is valid.
1356  * if valid, return 0, else return -1
1357  */
1358 int
1359 check_nb_txq(queueid_t txq)
1360 {
1361 	queueid_t allowed_max_txq;
1362 	portid_t pid = 0;
1363 
1364 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1365 	if (txq > allowed_max_txq) {
1366 		fprintf(stderr,
1367 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1368 			txq, allowed_max_txq, pid);
1369 		return -1;
1370 	}
1371 	return 0;
1372 }
1373 
1374 /*
1375  * Get the allowed maximum number of RXDs of every rx queue.
1376  * *pid return the port id which has minimal value of
1377  * max_rxd in all queues of all ports.
1378  */
1379 static uint16_t
1380 get_allowed_max_nb_rxd(portid_t *pid)
1381 {
1382 	uint16_t allowed_max_rxd = UINT16_MAX;
1383 	portid_t pi;
1384 	struct rte_eth_dev_info dev_info;
1385 
1386 	RTE_ETH_FOREACH_DEV(pi) {
1387 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1388 			continue;
1389 
1390 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1391 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1392 			*pid = pi;
1393 		}
1394 	}
1395 	return allowed_max_rxd;
1396 }
1397 
1398 /*
1399  * Get the allowed minimal number of RXDs of every rx queue.
1400  * *pid return the port id which has minimal value of
1401  * min_rxd in all queues of all ports.
1402  */
1403 static uint16_t
1404 get_allowed_min_nb_rxd(portid_t *pid)
1405 {
1406 	uint16_t allowed_min_rxd = 0;
1407 	portid_t pi;
1408 	struct rte_eth_dev_info dev_info;
1409 
1410 	RTE_ETH_FOREACH_DEV(pi) {
1411 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1412 			continue;
1413 
1414 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1415 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1416 			*pid = pi;
1417 		}
1418 	}
1419 
1420 	return allowed_min_rxd;
1421 }
1422 
1423 /*
1424  * Check input rxd is valid or not.
1425  * If input rxd is not greater than any of maximum number
1426  * of RXDs of every Rx queues and is not less than any of
1427  * minimal number of RXDs of every Rx queues, it is valid.
1428  * if valid, return 0, else return -1
1429  */
1430 int
1431 check_nb_rxd(queueid_t rxd)
1432 {
1433 	uint16_t allowed_max_rxd;
1434 	uint16_t allowed_min_rxd;
1435 	portid_t pid = 0;
1436 
1437 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1438 	if (rxd > allowed_max_rxd) {
1439 		fprintf(stderr,
1440 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1441 			rxd, allowed_max_rxd, pid);
1442 		return -1;
1443 	}
1444 
1445 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1446 	if (rxd < allowed_min_rxd) {
1447 		fprintf(stderr,
1448 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1449 			rxd, allowed_min_rxd, pid);
1450 		return -1;
1451 	}
1452 
1453 	return 0;
1454 }
1455 
1456 /*
1457  * Get the allowed maximum number of TXDs of every rx queues.
1458  * *pid return the port id which has minimal value of
1459  * max_txd in every tx queue.
1460  */
1461 static uint16_t
1462 get_allowed_max_nb_txd(portid_t *pid)
1463 {
1464 	uint16_t allowed_max_txd = UINT16_MAX;
1465 	portid_t pi;
1466 	struct rte_eth_dev_info dev_info;
1467 
1468 	RTE_ETH_FOREACH_DEV(pi) {
1469 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1470 			continue;
1471 
1472 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1473 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1474 			*pid = pi;
1475 		}
1476 	}
1477 	return allowed_max_txd;
1478 }
1479 
1480 /*
1481  * Get the allowed maximum number of TXDs of every tx queues.
1482  * *pid return the port id which has minimal value of
1483  * min_txd in every tx queue.
1484  */
1485 static uint16_t
1486 get_allowed_min_nb_txd(portid_t *pid)
1487 {
1488 	uint16_t allowed_min_txd = 0;
1489 	portid_t pi;
1490 	struct rte_eth_dev_info dev_info;
1491 
1492 	RTE_ETH_FOREACH_DEV(pi) {
1493 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1494 			continue;
1495 
1496 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1497 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1498 			*pid = pi;
1499 		}
1500 	}
1501 
1502 	return allowed_min_txd;
1503 }
1504 
1505 /*
1506  * Check input txd is valid or not.
1507  * If input txd is not greater than any of maximum number
1508  * of TXDs of every Rx queues, it is valid.
1509  * if valid, return 0, else return -1
1510  */
1511 int
1512 check_nb_txd(queueid_t txd)
1513 {
1514 	uint16_t allowed_max_txd;
1515 	uint16_t allowed_min_txd;
1516 	portid_t pid = 0;
1517 
1518 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1519 	if (txd > allowed_max_txd) {
1520 		fprintf(stderr,
1521 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1522 			txd, allowed_max_txd, pid);
1523 		return -1;
1524 	}
1525 
1526 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1527 	if (txd < allowed_min_txd) {
1528 		fprintf(stderr,
1529 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1530 			txd, allowed_min_txd, pid);
1531 		return -1;
1532 	}
1533 	return 0;
1534 }
1535 
1536 
1537 /*
1538  * Get the allowed maximum number of hairpin queues.
1539  * *pid return the port id which has minimal value of
1540  * max_hairpin_queues in all ports.
1541  */
1542 queueid_t
1543 get_allowed_max_nb_hairpinq(portid_t *pid)
1544 {
1545 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1546 	portid_t pi;
1547 	struct rte_eth_hairpin_cap cap;
1548 
1549 	RTE_ETH_FOREACH_DEV(pi) {
1550 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1551 			*pid = pi;
1552 			return 0;
1553 		}
1554 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1555 			allowed_max_hairpinq = cap.max_nb_queues;
1556 			*pid = pi;
1557 		}
1558 	}
1559 	return allowed_max_hairpinq;
1560 }
1561 
1562 /*
1563  * Check input hairpin is valid or not.
1564  * If input hairpin is not greater than any of maximum number
1565  * of hairpin queues of all ports, it is valid.
1566  * if valid, return 0, else return -1
1567  */
1568 int
1569 check_nb_hairpinq(queueid_t hairpinq)
1570 {
1571 	queueid_t allowed_max_hairpinq;
1572 	portid_t pid = 0;
1573 
1574 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1575 	if (hairpinq > allowed_max_hairpinq) {
1576 		fprintf(stderr,
1577 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1578 			hairpinq, allowed_max_hairpinq, pid);
1579 		return -1;
1580 	}
1581 	return 0;
1582 }
1583 
1584 static int
1585 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1586 {
1587 	uint32_t eth_overhead;
1588 
1589 	if (dev_info->max_mtu != UINT16_MAX &&
1590 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1591 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1592 	else
1593 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1594 
1595 	return eth_overhead;
1596 }
1597 
1598 static void
1599 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1600 {
1601 	struct rte_port *port = &ports[pid];
1602 	int ret;
1603 	int i;
1604 
1605 	eth_rx_metadata_negotiate_mp(pid);
1606 
1607 	port->dev_conf.txmode = tx_mode;
1608 	port->dev_conf.rxmode = rx_mode;
1609 
1610 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1611 	if (ret != 0)
1612 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1613 
1614 	if (!(port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE))
1615 		port->dev_conf.txmode.offloads &=
1616 			~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1617 
1618 	/* Apply Rx offloads configuration */
1619 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1620 		port->rxq[i].conf.offloads = port->dev_conf.rxmode.offloads;
1621 	/* Apply Tx offloads configuration */
1622 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1623 		port->txq[i].conf.offloads = port->dev_conf.txmode.offloads;
1624 
1625 	if (eth_link_speed)
1626 		port->dev_conf.link_speeds = eth_link_speed;
1627 
1628 	if (max_rx_pkt_len)
1629 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1630 			get_eth_overhead(&port->dev_info);
1631 
1632 	/* set flag to initialize port/queue */
1633 	port->need_reconfig = 1;
1634 	port->need_reconfig_queues = 1;
1635 	port->socket_id = socket_id;
1636 	port->tx_metadata = 0;
1637 
1638 	/*
1639 	 * Check for maximum number of segments per MTU.
1640 	 * Accordingly update the mbuf data size.
1641 	 */
1642 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1643 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1644 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1645 		uint16_t mtu;
1646 
1647 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1648 			uint16_t data_size = (mtu + eth_overhead) /
1649 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1650 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1651 
1652 			if (buffer_size > mbuf_data_size[0]) {
1653 				mbuf_data_size[0] = buffer_size;
1654 				TESTPMD_LOG(WARNING,
1655 					"Configured mbuf size of the first segment %hu\n",
1656 					mbuf_data_size[0]);
1657 			}
1658 		}
1659 	}
1660 }
1661 
1662 static void
1663 init_config(void)
1664 {
1665 	portid_t pid;
1666 	struct rte_mempool *mbp;
1667 	unsigned int nb_mbuf_per_pool;
1668 	lcoreid_t  lc_id;
1669 #ifdef RTE_LIB_GRO
1670 	struct rte_gro_param gro_param;
1671 #endif
1672 #ifdef RTE_LIB_GSO
1673 	uint32_t gso_types;
1674 #endif
1675 
1676 	/* Configuration of logical cores. */
1677 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1678 				sizeof(struct fwd_lcore *) * nb_lcores,
1679 				RTE_CACHE_LINE_SIZE);
1680 	if (fwd_lcores == NULL) {
1681 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1682 							"failed\n", nb_lcores);
1683 	}
1684 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1685 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1686 					       sizeof(struct fwd_lcore),
1687 					       RTE_CACHE_LINE_SIZE);
1688 		if (fwd_lcores[lc_id] == NULL) {
1689 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1690 								"failed\n");
1691 		}
1692 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1693 	}
1694 
1695 	RTE_ETH_FOREACH_DEV(pid) {
1696 		uint32_t socket_id;
1697 
1698 		if (numa_support) {
1699 			socket_id = port_numa[pid];
1700 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1701 				socket_id = rte_eth_dev_socket_id(pid);
1702 
1703 				/*
1704 				 * if socket_id is invalid,
1705 				 * set to the first available socket.
1706 				 */
1707 				if (check_socket_id(socket_id) < 0)
1708 					socket_id = socket_ids[0];
1709 			}
1710 		} else {
1711 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1712 				    0 : socket_num;
1713 		}
1714 		/* Apply default TxRx configuration for all ports */
1715 		init_config_port_offloads(pid, socket_id);
1716 	}
1717 	/*
1718 	 * Create pools of mbuf.
1719 	 * If NUMA support is disabled, create a single pool of mbuf in
1720 	 * socket 0 memory by default.
1721 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1722 	 *
1723 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1724 	 * nb_txd can be configured at run time.
1725 	 */
1726 	if (param_total_num_mbufs)
1727 		nb_mbuf_per_pool = param_total_num_mbufs;
1728 	else {
1729 		nb_mbuf_per_pool = RX_DESC_MAX +
1730 			(nb_lcores * mb_mempool_cache) +
1731 			TX_DESC_MAX + MAX_PKT_BURST;
1732 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1733 	}
1734 
1735 	if (numa_support) {
1736 		uint8_t i, j;
1737 
1738 		for (i = 0; i < num_sockets; i++)
1739 			for (j = 0; j < mbuf_data_size_n; j++)
1740 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1741 					mbuf_pool_create(mbuf_data_size[j],
1742 							  nb_mbuf_per_pool,
1743 							  socket_ids[i], j);
1744 	} else {
1745 		uint8_t i;
1746 
1747 		for (i = 0; i < mbuf_data_size_n; i++)
1748 			mempools[i] = mbuf_pool_create
1749 					(mbuf_data_size[i],
1750 					 nb_mbuf_per_pool,
1751 					 socket_num == UMA_NO_CONFIG ?
1752 					 0 : socket_num, i);
1753 	}
1754 
1755 	init_port_config();
1756 
1757 #ifdef RTE_LIB_GSO
1758 	gso_types = RTE_ETH_TX_OFFLOAD_TCP_TSO | RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
1759 		RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | RTE_ETH_TX_OFFLOAD_UDP_TSO;
1760 #endif
1761 	/*
1762 	 * Records which Mbuf pool to use by each logical core, if needed.
1763 	 */
1764 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1765 		mbp = mbuf_pool_find(
1766 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1767 
1768 		if (mbp == NULL)
1769 			mbp = mbuf_pool_find(0, 0);
1770 		fwd_lcores[lc_id]->mbp = mbp;
1771 #ifdef RTE_LIB_GSO
1772 		/* initialize GSO context */
1773 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1774 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1775 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1776 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1777 			RTE_ETHER_CRC_LEN;
1778 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1779 #endif
1780 	}
1781 
1782 	fwd_config_setup();
1783 
1784 #ifdef RTE_LIB_GRO
1785 	/* create a gro context for each lcore */
1786 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1787 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1788 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1789 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1790 		gro_param.socket_id = rte_lcore_to_socket_id(
1791 				fwd_lcores_cpuids[lc_id]);
1792 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1793 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1794 			rte_exit(EXIT_FAILURE,
1795 					"rte_gro_ctx_create() failed\n");
1796 		}
1797 	}
1798 #endif
1799 }
1800 
1801 
1802 void
1803 reconfig(portid_t new_port_id, unsigned socket_id)
1804 {
1805 	/* Reconfiguration of Ethernet ports. */
1806 	init_config_port_offloads(new_port_id, socket_id);
1807 	init_port_config();
1808 }
1809 
1810 int
1811 init_fwd_streams(void)
1812 {
1813 	portid_t pid;
1814 	struct rte_port *port;
1815 	streamid_t sm_id, nb_fwd_streams_new;
1816 	queueid_t q;
1817 
1818 	/* set socket id according to numa or not */
1819 	RTE_ETH_FOREACH_DEV(pid) {
1820 		port = &ports[pid];
1821 		if (nb_rxq > port->dev_info.max_rx_queues) {
1822 			fprintf(stderr,
1823 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1824 				nb_rxq, port->dev_info.max_rx_queues);
1825 			return -1;
1826 		}
1827 		if (nb_txq > port->dev_info.max_tx_queues) {
1828 			fprintf(stderr,
1829 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1830 				nb_txq, port->dev_info.max_tx_queues);
1831 			return -1;
1832 		}
1833 		if (numa_support) {
1834 			if (port_numa[pid] != NUMA_NO_CONFIG)
1835 				port->socket_id = port_numa[pid];
1836 			else {
1837 				port->socket_id = rte_eth_dev_socket_id(pid);
1838 
1839 				/*
1840 				 * if socket_id is invalid,
1841 				 * set to the first available socket.
1842 				 */
1843 				if (check_socket_id(port->socket_id) < 0)
1844 					port->socket_id = socket_ids[0];
1845 			}
1846 		}
1847 		else {
1848 			if (socket_num == UMA_NO_CONFIG)
1849 				port->socket_id = 0;
1850 			else
1851 				port->socket_id = socket_num;
1852 		}
1853 	}
1854 
1855 	q = RTE_MAX(nb_rxq, nb_txq);
1856 	if (q == 0) {
1857 		fprintf(stderr,
1858 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1859 		return -1;
1860 	}
1861 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1862 	if (nb_fwd_streams_new == nb_fwd_streams)
1863 		return 0;
1864 	/* clear the old */
1865 	if (fwd_streams != NULL) {
1866 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1867 			if (fwd_streams[sm_id] == NULL)
1868 				continue;
1869 			rte_free(fwd_streams[sm_id]);
1870 			fwd_streams[sm_id] = NULL;
1871 		}
1872 		rte_free(fwd_streams);
1873 		fwd_streams = NULL;
1874 	}
1875 
1876 	/* init new */
1877 	nb_fwd_streams = nb_fwd_streams_new;
1878 	if (nb_fwd_streams) {
1879 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1880 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1881 			RTE_CACHE_LINE_SIZE);
1882 		if (fwd_streams == NULL)
1883 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1884 				 " (struct fwd_stream *)) failed\n",
1885 				 nb_fwd_streams);
1886 
1887 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1888 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1889 				" struct fwd_stream", sizeof(struct fwd_stream),
1890 				RTE_CACHE_LINE_SIZE);
1891 			if (fwd_streams[sm_id] == NULL)
1892 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1893 					 "(struct fwd_stream) failed\n");
1894 		}
1895 	}
1896 
1897 	return 0;
1898 }
1899 
1900 static void
1901 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1902 {
1903 	uint64_t total_burst, sburst;
1904 	uint64_t nb_burst;
1905 	uint64_t burst_stats[4];
1906 	uint16_t pktnb_stats[4];
1907 	uint16_t nb_pkt;
1908 	int burst_percent[4], sburstp;
1909 	int i;
1910 
1911 	/*
1912 	 * First compute the total number of packet bursts and the
1913 	 * two highest numbers of bursts of the same number of packets.
1914 	 */
1915 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1916 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1917 
1918 	/* Show stats for 0 burst size always */
1919 	total_burst = pbs->pkt_burst_spread[0];
1920 	burst_stats[0] = pbs->pkt_burst_spread[0];
1921 	pktnb_stats[0] = 0;
1922 
1923 	/* Find the next 2 burst sizes with highest occurrences. */
1924 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST + 1; nb_pkt++) {
1925 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1926 
1927 		if (nb_burst == 0)
1928 			continue;
1929 
1930 		total_burst += nb_burst;
1931 
1932 		if (nb_burst > burst_stats[1]) {
1933 			burst_stats[2] = burst_stats[1];
1934 			pktnb_stats[2] = pktnb_stats[1];
1935 			burst_stats[1] = nb_burst;
1936 			pktnb_stats[1] = nb_pkt;
1937 		} else if (nb_burst > burst_stats[2]) {
1938 			burst_stats[2] = nb_burst;
1939 			pktnb_stats[2] = nb_pkt;
1940 		}
1941 	}
1942 	if (total_burst == 0)
1943 		return;
1944 
1945 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1946 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1947 		if (i == 3) {
1948 			printf("%d%% of other]\n", 100 - sburstp);
1949 			return;
1950 		}
1951 
1952 		sburst += burst_stats[i];
1953 		if (sburst == total_burst) {
1954 			printf("%d%% of %d pkts]\n",
1955 				100 - sburstp, (int) pktnb_stats[i]);
1956 			return;
1957 		}
1958 
1959 		burst_percent[i] =
1960 			(double)burst_stats[i] / total_burst * 100;
1961 		printf("%d%% of %d pkts + ",
1962 			burst_percent[i], (int) pktnb_stats[i]);
1963 		sburstp += burst_percent[i];
1964 	}
1965 }
1966 
1967 static void
1968 fwd_stream_stats_display(streamid_t stream_id)
1969 {
1970 	struct fwd_stream *fs;
1971 	static const char *fwd_top_stats_border = "-------";
1972 
1973 	fs = fwd_streams[stream_id];
1974 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1975 	    (fs->fwd_dropped == 0))
1976 		return;
1977 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1978 	       "TX Port=%2d/Queue=%2d %s\n",
1979 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1980 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1981 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1982 	       " TX-dropped: %-14"PRIu64,
1983 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1984 
1985 	/* if checksum mode */
1986 	if (cur_fwd_eng == &csum_fwd_engine) {
1987 		printf("  RX- bad IP checksum: %-14"PRIu64
1988 		       "  Rx- bad L4 checksum: %-14"PRIu64
1989 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1990 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1991 			fs->rx_bad_outer_l4_csum);
1992 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1993 			fs->rx_bad_outer_ip_csum);
1994 	} else {
1995 		printf("\n");
1996 	}
1997 
1998 	if (record_burst_stats) {
1999 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
2000 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
2001 	}
2002 }
2003 
2004 void
2005 fwd_stats_display(void)
2006 {
2007 	static const char *fwd_stats_border = "----------------------";
2008 	static const char *acc_stats_border = "+++++++++++++++";
2009 	struct {
2010 		struct fwd_stream *rx_stream;
2011 		struct fwd_stream *tx_stream;
2012 		uint64_t tx_dropped;
2013 		uint64_t rx_bad_ip_csum;
2014 		uint64_t rx_bad_l4_csum;
2015 		uint64_t rx_bad_outer_l4_csum;
2016 		uint64_t rx_bad_outer_ip_csum;
2017 	} ports_stats[RTE_MAX_ETHPORTS];
2018 	uint64_t total_rx_dropped = 0;
2019 	uint64_t total_tx_dropped = 0;
2020 	uint64_t total_rx_nombuf = 0;
2021 	struct rte_eth_stats stats;
2022 	uint64_t fwd_cycles = 0;
2023 	uint64_t total_recv = 0;
2024 	uint64_t total_xmit = 0;
2025 	struct rte_port *port;
2026 	streamid_t sm_id;
2027 	portid_t pt_id;
2028 	int ret;
2029 	int i;
2030 
2031 	memset(ports_stats, 0, sizeof(ports_stats));
2032 
2033 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2034 		struct fwd_stream *fs = fwd_streams[sm_id];
2035 
2036 		if (cur_fwd_config.nb_fwd_streams >
2037 		    cur_fwd_config.nb_fwd_ports) {
2038 			fwd_stream_stats_display(sm_id);
2039 		} else {
2040 			ports_stats[fs->tx_port].tx_stream = fs;
2041 			ports_stats[fs->rx_port].rx_stream = fs;
2042 		}
2043 
2044 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2045 
2046 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2047 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2048 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2049 				fs->rx_bad_outer_l4_csum;
2050 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2051 				fs->rx_bad_outer_ip_csum;
2052 
2053 		if (record_core_cycles)
2054 			fwd_cycles += fs->core_cycles;
2055 	}
2056 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2057 		pt_id = fwd_ports_ids[i];
2058 		port = &ports[pt_id];
2059 
2060 		ret = rte_eth_stats_get(pt_id, &stats);
2061 		if (ret != 0) {
2062 			fprintf(stderr,
2063 				"%s: Error: failed to get stats (port %u): %d",
2064 				__func__, pt_id, ret);
2065 			continue;
2066 		}
2067 		stats.ipackets -= port->stats.ipackets;
2068 		stats.opackets -= port->stats.opackets;
2069 		stats.ibytes -= port->stats.ibytes;
2070 		stats.obytes -= port->stats.obytes;
2071 		stats.imissed -= port->stats.imissed;
2072 		stats.oerrors -= port->stats.oerrors;
2073 		stats.rx_nombuf -= port->stats.rx_nombuf;
2074 
2075 		total_recv += stats.ipackets;
2076 		total_xmit += stats.opackets;
2077 		total_rx_dropped += stats.imissed;
2078 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
2079 		total_tx_dropped += stats.oerrors;
2080 		total_rx_nombuf  += stats.rx_nombuf;
2081 
2082 		printf("\n  %s Forward statistics for port %-2d %s\n",
2083 		       fwd_stats_border, pt_id, fwd_stats_border);
2084 
2085 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2086 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2087 		       stats.ipackets + stats.imissed);
2088 
2089 		if (cur_fwd_eng == &csum_fwd_engine) {
2090 			printf("  Bad-ipcsum: %-14"PRIu64
2091 			       " Bad-l4csum: %-14"PRIu64
2092 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2093 			       ports_stats[pt_id].rx_bad_ip_csum,
2094 			       ports_stats[pt_id].rx_bad_l4_csum,
2095 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2096 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2097 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2098 		}
2099 		if (stats.ierrors + stats.rx_nombuf > 0) {
2100 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2101 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2102 		}
2103 
2104 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2105 		       "TX-total: %-"PRIu64"\n",
2106 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2107 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2108 
2109 		if (record_burst_stats) {
2110 			if (ports_stats[pt_id].rx_stream)
2111 				pkt_burst_stats_display("RX",
2112 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2113 			if (ports_stats[pt_id].tx_stream)
2114 				pkt_burst_stats_display("TX",
2115 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2116 		}
2117 
2118 		printf("  %s--------------------------------%s\n",
2119 		       fwd_stats_border, fwd_stats_border);
2120 	}
2121 
2122 	printf("\n  %s Accumulated forward statistics for all ports"
2123 	       "%s\n",
2124 	       acc_stats_border, acc_stats_border);
2125 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2126 	       "%-"PRIu64"\n"
2127 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2128 	       "%-"PRIu64"\n",
2129 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2130 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2131 	if (total_rx_nombuf > 0)
2132 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2133 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2134 	       "%s\n",
2135 	       acc_stats_border, acc_stats_border);
2136 	if (record_core_cycles) {
2137 #define CYC_PER_MHZ 1E6
2138 		if (total_recv > 0 || total_xmit > 0) {
2139 			uint64_t total_pkts = 0;
2140 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2141 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2142 				total_pkts = total_xmit;
2143 			else
2144 				total_pkts = total_recv;
2145 
2146 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2147 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2148 			       " MHz Clock\n",
2149 			       (double) fwd_cycles / total_pkts,
2150 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2151 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2152 		}
2153 	}
2154 }
2155 
2156 void
2157 fwd_stats_reset(void)
2158 {
2159 	streamid_t sm_id;
2160 	portid_t pt_id;
2161 	int ret;
2162 	int i;
2163 
2164 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2165 		pt_id = fwd_ports_ids[i];
2166 		ret = rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2167 		if (ret != 0)
2168 			fprintf(stderr,
2169 				"%s: Error: failed to clear stats (port %u):%d",
2170 				__func__, pt_id, ret);
2171 	}
2172 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2173 		struct fwd_stream *fs = fwd_streams[sm_id];
2174 
2175 		fs->rx_packets = 0;
2176 		fs->tx_packets = 0;
2177 		fs->fwd_dropped = 0;
2178 		fs->rx_bad_ip_csum = 0;
2179 		fs->rx_bad_l4_csum = 0;
2180 		fs->rx_bad_outer_l4_csum = 0;
2181 		fs->rx_bad_outer_ip_csum = 0;
2182 
2183 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2184 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2185 		fs->core_cycles = 0;
2186 	}
2187 }
2188 
2189 static void
2190 flush_fwd_rx_queues(void)
2191 {
2192 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2193 	portid_t  rxp;
2194 	portid_t port_id;
2195 	queueid_t rxq;
2196 	uint16_t  nb_rx;
2197 	uint16_t  i;
2198 	uint8_t   j;
2199 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2200 	uint64_t timer_period;
2201 
2202 	if (num_procs > 1) {
2203 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2204 		return;
2205 	}
2206 
2207 	/* convert to number of cycles */
2208 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2209 
2210 	for (j = 0; j < 2; j++) {
2211 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2212 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2213 				port_id = fwd_ports_ids[rxp];
2214 
2215 				/* Polling stopped queues is prohibited. */
2216 				if (ports[port_id].rxq[rxq].state ==
2217 				    RTE_ETH_QUEUE_STATE_STOPPED)
2218 					continue;
2219 
2220 				/**
2221 				* testpmd can stuck in the below do while loop
2222 				* if rte_eth_rx_burst() always returns nonzero
2223 				* packets. So timer is added to exit this loop
2224 				* after 1sec timer expiry.
2225 				*/
2226 				prev_tsc = rte_rdtsc();
2227 				do {
2228 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2229 						pkts_burst, MAX_PKT_BURST);
2230 					for (i = 0; i < nb_rx; i++)
2231 						rte_pktmbuf_free(pkts_burst[i]);
2232 
2233 					cur_tsc = rte_rdtsc();
2234 					diff_tsc = cur_tsc - prev_tsc;
2235 					timer_tsc += diff_tsc;
2236 				} while ((nb_rx > 0) &&
2237 					(timer_tsc < timer_period));
2238 				timer_tsc = 0;
2239 			}
2240 		}
2241 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2242 	}
2243 }
2244 
2245 static void
2246 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2247 {
2248 	struct fwd_stream **fsm;
2249 	streamid_t nb_fs;
2250 	streamid_t sm_id;
2251 #ifdef RTE_LIB_BITRATESTATS
2252 	uint64_t tics_per_1sec;
2253 	uint64_t tics_datum;
2254 	uint64_t tics_current;
2255 	uint16_t i, cnt_ports;
2256 
2257 	cnt_ports = nb_ports;
2258 	tics_datum = rte_rdtsc();
2259 	tics_per_1sec = rte_get_timer_hz();
2260 #endif
2261 	fsm = &fwd_streams[fc->stream_idx];
2262 	nb_fs = fc->stream_nb;
2263 	do {
2264 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2265 			if (!fsm[sm_id]->disabled)
2266 				(*pkt_fwd)(fsm[sm_id]);
2267 #ifdef RTE_LIB_BITRATESTATS
2268 		if (bitrate_enabled != 0 &&
2269 				bitrate_lcore_id == rte_lcore_id()) {
2270 			tics_current = rte_rdtsc();
2271 			if (tics_current - tics_datum >= tics_per_1sec) {
2272 				/* Periodic bitrate calculation */
2273 				for (i = 0; i < cnt_ports; i++)
2274 					rte_stats_bitrate_calc(bitrate_data,
2275 						ports_ids[i]);
2276 				tics_datum = tics_current;
2277 			}
2278 		}
2279 #endif
2280 #ifdef RTE_LIB_LATENCYSTATS
2281 		if (latencystats_enabled != 0 &&
2282 				latencystats_lcore_id == rte_lcore_id())
2283 			rte_latencystats_update();
2284 #endif
2285 
2286 	} while (! fc->stopped);
2287 }
2288 
2289 static int
2290 start_pkt_forward_on_core(void *fwd_arg)
2291 {
2292 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2293 			     cur_fwd_config.fwd_eng->packet_fwd);
2294 	return 0;
2295 }
2296 
2297 /*
2298  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2299  * Used to start communication flows in network loopback test configurations.
2300  */
2301 static int
2302 run_one_txonly_burst_on_core(void *fwd_arg)
2303 {
2304 	struct fwd_lcore *fwd_lc;
2305 	struct fwd_lcore tmp_lcore;
2306 
2307 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2308 	tmp_lcore = *fwd_lc;
2309 	tmp_lcore.stopped = 1;
2310 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2311 	return 0;
2312 }
2313 
2314 /*
2315  * Launch packet forwarding:
2316  *     - Setup per-port forwarding context.
2317  *     - launch logical cores with their forwarding configuration.
2318  */
2319 static void
2320 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2321 {
2322 	unsigned int i;
2323 	unsigned int lc_id;
2324 	int diag;
2325 
2326 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2327 		lc_id = fwd_lcores_cpuids[i];
2328 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2329 			fwd_lcores[i]->stopped = 0;
2330 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2331 						     fwd_lcores[i], lc_id);
2332 			if (diag != 0)
2333 				fprintf(stderr,
2334 					"launch lcore %u failed - diag=%d\n",
2335 					lc_id, diag);
2336 		}
2337 	}
2338 }
2339 
2340 /*
2341  * Launch packet forwarding configuration.
2342  */
2343 void
2344 start_packet_forwarding(int with_tx_first)
2345 {
2346 	port_fwd_begin_t port_fwd_begin;
2347 	port_fwd_end_t  port_fwd_end;
2348 	stream_init_t stream_init = cur_fwd_eng->stream_init;
2349 	unsigned int i;
2350 
2351 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2352 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2353 
2354 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2355 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2356 
2357 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2358 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2359 		(!nb_rxq || !nb_txq))
2360 		rte_exit(EXIT_FAILURE,
2361 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2362 			cur_fwd_eng->fwd_mode_name);
2363 
2364 	if (all_ports_started() == 0) {
2365 		fprintf(stderr, "Not all ports were started\n");
2366 		return;
2367 	}
2368 	if (test_done == 0) {
2369 		fprintf(stderr, "Packet forwarding already started\n");
2370 		return;
2371 	}
2372 
2373 	fwd_config_setup();
2374 
2375 	pkt_fwd_config_display(&cur_fwd_config);
2376 	if (!pkt_fwd_shared_rxq_check())
2377 		return;
2378 
2379 	if (stream_init != NULL)
2380 		for (i = 0; i < cur_fwd_config.nb_fwd_streams; i++)
2381 			stream_init(fwd_streams[i]);
2382 
2383 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2384 	if (port_fwd_begin != NULL) {
2385 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2386 			if (port_fwd_begin(fwd_ports_ids[i])) {
2387 				fprintf(stderr,
2388 					"Packet forwarding is not ready\n");
2389 				return;
2390 			}
2391 		}
2392 	}
2393 
2394 	if (with_tx_first) {
2395 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2396 		if (port_fwd_begin != NULL) {
2397 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2398 				if (port_fwd_begin(fwd_ports_ids[i])) {
2399 					fprintf(stderr,
2400 						"Packet forwarding is not ready\n");
2401 					return;
2402 				}
2403 			}
2404 		}
2405 	}
2406 
2407 	test_done = 0;
2408 
2409 	if(!no_flush_rx)
2410 		flush_fwd_rx_queues();
2411 
2412 	rxtx_config_display();
2413 
2414 	fwd_stats_reset();
2415 	if (with_tx_first) {
2416 		while (with_tx_first--) {
2417 			launch_packet_forwarding(
2418 					run_one_txonly_burst_on_core);
2419 			rte_eal_mp_wait_lcore();
2420 		}
2421 		port_fwd_end = tx_only_engine.port_fwd_end;
2422 		if (port_fwd_end != NULL) {
2423 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2424 				(*port_fwd_end)(fwd_ports_ids[i]);
2425 		}
2426 	}
2427 	launch_packet_forwarding(start_pkt_forward_on_core);
2428 }
2429 
2430 void
2431 stop_packet_forwarding(void)
2432 {
2433 	port_fwd_end_t port_fwd_end;
2434 	lcoreid_t lc_id;
2435 	portid_t pt_id;
2436 	int i;
2437 
2438 	if (test_done) {
2439 		fprintf(stderr, "Packet forwarding not started\n");
2440 		return;
2441 	}
2442 	printf("Telling cores to stop...");
2443 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2444 		fwd_lcores[lc_id]->stopped = 1;
2445 	printf("\nWaiting for lcores to finish...\n");
2446 	rte_eal_mp_wait_lcore();
2447 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2448 	if (port_fwd_end != NULL) {
2449 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2450 			pt_id = fwd_ports_ids[i];
2451 			(*port_fwd_end)(pt_id);
2452 		}
2453 	}
2454 
2455 	fwd_stats_display();
2456 
2457 	printf("\nDone.\n");
2458 	test_done = 1;
2459 }
2460 
2461 void
2462 dev_set_link_up(portid_t pid)
2463 {
2464 	if (rte_eth_dev_set_link_up(pid) < 0)
2465 		fprintf(stderr, "\nSet link up fail.\n");
2466 }
2467 
2468 void
2469 dev_set_link_down(portid_t pid)
2470 {
2471 	if (rte_eth_dev_set_link_down(pid) < 0)
2472 		fprintf(stderr, "\nSet link down fail.\n");
2473 }
2474 
2475 static int
2476 all_ports_started(void)
2477 {
2478 	portid_t pi;
2479 	struct rte_port *port;
2480 
2481 	RTE_ETH_FOREACH_DEV(pi) {
2482 		port = &ports[pi];
2483 		/* Check if there is a port which is not started */
2484 		if ((port->port_status != RTE_PORT_STARTED) &&
2485 			(port->slave_flag == 0))
2486 			return 0;
2487 	}
2488 
2489 	/* No port is not started */
2490 	return 1;
2491 }
2492 
2493 int
2494 port_is_stopped(portid_t port_id)
2495 {
2496 	struct rte_port *port = &ports[port_id];
2497 
2498 	if ((port->port_status != RTE_PORT_STOPPED) &&
2499 	    (port->slave_flag == 0))
2500 		return 0;
2501 	return 1;
2502 }
2503 
2504 int
2505 all_ports_stopped(void)
2506 {
2507 	portid_t pi;
2508 
2509 	RTE_ETH_FOREACH_DEV(pi) {
2510 		if (!port_is_stopped(pi))
2511 			return 0;
2512 	}
2513 
2514 	return 1;
2515 }
2516 
2517 int
2518 port_is_started(portid_t port_id)
2519 {
2520 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2521 		return 0;
2522 
2523 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2524 		return 0;
2525 
2526 	return 1;
2527 }
2528 
2529 #define HAIRPIN_MODE_RX_FORCE_MEMORY RTE_BIT32(8)
2530 #define HAIRPIN_MODE_TX_FORCE_MEMORY RTE_BIT32(9)
2531 
2532 #define HAIRPIN_MODE_RX_LOCKED_MEMORY RTE_BIT32(12)
2533 #define HAIRPIN_MODE_RX_RTE_MEMORY RTE_BIT32(13)
2534 
2535 #define HAIRPIN_MODE_TX_LOCKED_MEMORY RTE_BIT32(16)
2536 #define HAIRPIN_MODE_TX_RTE_MEMORY RTE_BIT32(17)
2537 
2538 
2539 /* Configure the Rx and Tx hairpin queues for the selected port. */
2540 static int
2541 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2542 {
2543 	queueid_t qi;
2544 	struct rte_eth_hairpin_conf hairpin_conf = {
2545 		.peer_count = 1,
2546 	};
2547 	int i;
2548 	int diag;
2549 	struct rte_port *port = &ports[pi];
2550 	uint16_t peer_rx_port = pi;
2551 	uint16_t peer_tx_port = pi;
2552 	uint32_t manual = 1;
2553 	uint32_t tx_exp = hairpin_mode & 0x10;
2554 	uint32_t rx_force_memory = hairpin_mode & HAIRPIN_MODE_RX_FORCE_MEMORY;
2555 	uint32_t rx_locked_memory = hairpin_mode & HAIRPIN_MODE_RX_LOCKED_MEMORY;
2556 	uint32_t rx_rte_memory = hairpin_mode & HAIRPIN_MODE_RX_RTE_MEMORY;
2557 	uint32_t tx_force_memory = hairpin_mode & HAIRPIN_MODE_TX_FORCE_MEMORY;
2558 	uint32_t tx_locked_memory = hairpin_mode & HAIRPIN_MODE_TX_LOCKED_MEMORY;
2559 	uint32_t tx_rte_memory = hairpin_mode & HAIRPIN_MODE_TX_RTE_MEMORY;
2560 
2561 	if (!(hairpin_mode & 0xf)) {
2562 		peer_rx_port = pi;
2563 		peer_tx_port = pi;
2564 		manual = 0;
2565 	} else if (hairpin_mode & 0x1) {
2566 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2567 						       RTE_ETH_DEV_NO_OWNER);
2568 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2569 			peer_tx_port = rte_eth_find_next_owned_by(0,
2570 						RTE_ETH_DEV_NO_OWNER);
2571 		if (p_pi != RTE_MAX_ETHPORTS) {
2572 			peer_rx_port = p_pi;
2573 		} else {
2574 			uint16_t next_pi;
2575 
2576 			/* Last port will be the peer RX port of the first. */
2577 			RTE_ETH_FOREACH_DEV(next_pi)
2578 				peer_rx_port = next_pi;
2579 		}
2580 		manual = 1;
2581 	} else if (hairpin_mode & 0x2) {
2582 		if (cnt_pi & 0x1) {
2583 			peer_rx_port = p_pi;
2584 		} else {
2585 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2586 						RTE_ETH_DEV_NO_OWNER);
2587 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2588 				peer_rx_port = pi;
2589 		}
2590 		peer_tx_port = peer_rx_port;
2591 		manual = 1;
2592 	}
2593 
2594 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2595 		hairpin_conf.peers[0].port = peer_rx_port;
2596 		hairpin_conf.peers[0].queue = i + nb_rxq;
2597 		hairpin_conf.manual_bind = !!manual;
2598 		hairpin_conf.tx_explicit = !!tx_exp;
2599 		hairpin_conf.force_memory = !!tx_force_memory;
2600 		hairpin_conf.use_locked_device_memory = !!tx_locked_memory;
2601 		hairpin_conf.use_rte_memory = !!tx_rte_memory;
2602 		diag = rte_eth_tx_hairpin_queue_setup
2603 			(pi, qi, nb_txd, &hairpin_conf);
2604 		i++;
2605 		if (diag == 0)
2606 			continue;
2607 
2608 		/* Fail to setup rx queue, return */
2609 		if (port->port_status == RTE_PORT_HANDLING)
2610 			port->port_status = RTE_PORT_STOPPED;
2611 		else
2612 			fprintf(stderr,
2613 				"Port %d can not be set back to stopped\n", pi);
2614 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2615 			pi);
2616 		/* try to reconfigure queues next time */
2617 		port->need_reconfig_queues = 1;
2618 		return -1;
2619 	}
2620 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2621 		hairpin_conf.peers[0].port = peer_tx_port;
2622 		hairpin_conf.peers[0].queue = i + nb_txq;
2623 		hairpin_conf.manual_bind = !!manual;
2624 		hairpin_conf.tx_explicit = !!tx_exp;
2625 		hairpin_conf.force_memory = !!rx_force_memory;
2626 		hairpin_conf.use_locked_device_memory = !!rx_locked_memory;
2627 		hairpin_conf.use_rte_memory = !!rx_rte_memory;
2628 		diag = rte_eth_rx_hairpin_queue_setup
2629 			(pi, qi, nb_rxd, &hairpin_conf);
2630 		i++;
2631 		if (diag == 0)
2632 			continue;
2633 
2634 		/* Fail to setup rx queue, return */
2635 		if (port->port_status == RTE_PORT_HANDLING)
2636 			port->port_status = RTE_PORT_STOPPED;
2637 		else
2638 			fprintf(stderr,
2639 				"Port %d can not be set back to stopped\n", pi);
2640 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2641 			pi);
2642 		/* try to reconfigure queues next time */
2643 		port->need_reconfig_queues = 1;
2644 		return -1;
2645 	}
2646 	return 0;
2647 }
2648 
2649 /* Configure the Rx with optional split. */
2650 int
2651 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2652 	       uint16_t nb_rx_desc, unsigned int socket_id,
2653 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2654 {
2655 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2656 	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
2657 	struct rte_mempool *mpx;
2658 	unsigned int i, mp_n;
2659 	uint32_t prev_hdrs = 0;
2660 	int ret;
2661 
2662 	/* Verify Rx queue configuration is single pool and segment or
2663 	 * multiple pool/segment.
2664 	 * @see rte_eth_rxconf::rx_mempools
2665 	 * @see rte_eth_rxconf::rx_seg
2666 	 */
2667 	if (!(mbuf_data_size_n > 1) && !(rx_pkt_nb_segs > 1 ||
2668 	    ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) != 0))) {
2669 		/* Single pool/segment configuration */
2670 		rx_conf->rx_seg = NULL;
2671 		rx_conf->rx_nseg = 0;
2672 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2673 					     nb_rx_desc, socket_id,
2674 					     rx_conf, mp);
2675 		goto exit;
2676 	}
2677 
2678 	if (rx_pkt_nb_segs > 1 ||
2679 	    rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
2680 		/* multi-segment configuration */
2681 		for (i = 0; i < rx_pkt_nb_segs; i++) {
2682 			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2683 			/*
2684 			 * Use last valid pool for the segments with number
2685 			 * exceeding the pool index.
2686 			 */
2687 			mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2688 			mpx = mbuf_pool_find(socket_id, mp_n);
2689 			/* Handle zero as mbuf data buffer size. */
2690 			rx_seg->offset = i < rx_pkt_nb_offs ?
2691 					   rx_pkt_seg_offsets[i] : 0;
2692 			rx_seg->mp = mpx ? mpx : mp;
2693 			if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
2694 				rx_seg->proto_hdr = rx_pkt_hdr_protos[i] & ~prev_hdrs;
2695 				prev_hdrs |= rx_seg->proto_hdr;
2696 			} else {
2697 				rx_seg->length = rx_pkt_seg_lengths[i] ?
2698 						rx_pkt_seg_lengths[i] :
2699 						mbuf_data_size[mp_n];
2700 			}
2701 		}
2702 		rx_conf->rx_nseg = rx_pkt_nb_segs;
2703 		rx_conf->rx_seg = rx_useg;
2704 	} else {
2705 		/* multi-pool configuration */
2706 		for (i = 0; i < mbuf_data_size_n; i++) {
2707 			mpx = mbuf_pool_find(socket_id, i);
2708 			rx_mempool[i] = mpx ? mpx : mp;
2709 		}
2710 		rx_conf->rx_mempools = rx_mempool;
2711 		rx_conf->rx_nmempool = mbuf_data_size_n;
2712 	}
2713 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2714 				    socket_id, rx_conf, NULL);
2715 	rx_conf->rx_seg = NULL;
2716 	rx_conf->rx_nseg = 0;
2717 	rx_conf->rx_mempools = NULL;
2718 	rx_conf->rx_nmempool = 0;
2719 exit:
2720 	ports[port_id].rxq[rx_queue_id].state = rx_conf->rx_deferred_start ?
2721 						RTE_ETH_QUEUE_STATE_STOPPED :
2722 						RTE_ETH_QUEUE_STATE_STARTED;
2723 	return ret;
2724 }
2725 
2726 static int
2727 alloc_xstats_display_info(portid_t pi)
2728 {
2729 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2730 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2731 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2732 
2733 	if (xstats_display_num == 0)
2734 		return 0;
2735 
2736 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2737 	if (*ids_supp == NULL)
2738 		goto fail_ids_supp;
2739 
2740 	*prev_values = calloc(xstats_display_num,
2741 			      sizeof(**prev_values));
2742 	if (*prev_values == NULL)
2743 		goto fail_prev_values;
2744 
2745 	*curr_values = calloc(xstats_display_num,
2746 			      sizeof(**curr_values));
2747 	if (*curr_values == NULL)
2748 		goto fail_curr_values;
2749 
2750 	ports[pi].xstats_info.allocated = true;
2751 
2752 	return 0;
2753 
2754 fail_curr_values:
2755 	free(*prev_values);
2756 fail_prev_values:
2757 	free(*ids_supp);
2758 fail_ids_supp:
2759 	return -ENOMEM;
2760 }
2761 
2762 static void
2763 free_xstats_display_info(portid_t pi)
2764 {
2765 	if (!ports[pi].xstats_info.allocated)
2766 		return;
2767 	free(ports[pi].xstats_info.ids_supp);
2768 	free(ports[pi].xstats_info.prev_values);
2769 	free(ports[pi].xstats_info.curr_values);
2770 	ports[pi].xstats_info.allocated = false;
2771 }
2772 
2773 /** Fill helper structures for specified port to show extended statistics. */
2774 static void
2775 fill_xstats_display_info_for_port(portid_t pi)
2776 {
2777 	unsigned int stat, stat_supp;
2778 	const char *xstat_name;
2779 	struct rte_port *port;
2780 	uint64_t *ids_supp;
2781 	int rc;
2782 
2783 	if (xstats_display_num == 0)
2784 		return;
2785 
2786 	if (pi == (portid_t)RTE_PORT_ALL) {
2787 		fill_xstats_display_info();
2788 		return;
2789 	}
2790 
2791 	port = &ports[pi];
2792 	if (port->port_status != RTE_PORT_STARTED)
2793 		return;
2794 
2795 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2796 		rte_exit(EXIT_FAILURE,
2797 			 "Failed to allocate xstats display memory\n");
2798 
2799 	ids_supp = port->xstats_info.ids_supp;
2800 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2801 		xstat_name = xstats_display[stat].name;
2802 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2803 						   ids_supp + stat_supp);
2804 		if (rc != 0) {
2805 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2806 				xstat_name, pi, stat);
2807 			continue;
2808 		}
2809 		stat_supp++;
2810 	}
2811 
2812 	port->xstats_info.ids_supp_sz = stat_supp;
2813 }
2814 
2815 /** Fill helper structures for all ports to show extended statistics. */
2816 static void
2817 fill_xstats_display_info(void)
2818 {
2819 	portid_t pi;
2820 
2821 	if (xstats_display_num == 0)
2822 		return;
2823 
2824 	RTE_ETH_FOREACH_DEV(pi)
2825 		fill_xstats_display_info_for_port(pi);
2826 }
2827 
2828 /*
2829  * Some capabilities (like, rx_offload_capa and tx_offload_capa) of bonding
2830  * device in dev_info is zero when no slave is added. And its capability
2831  * will be updated when add a new slave device. So adding a slave device need
2832  * to update the port configurations of bonding device.
2833  */
2834 static void
2835 update_bonding_port_dev_conf(portid_t bond_pid)
2836 {
2837 #ifdef RTE_NET_BOND
2838 	struct rte_port *port = &ports[bond_pid];
2839 	uint16_t i;
2840 	int ret;
2841 
2842 	ret = eth_dev_info_get_print_err(bond_pid, &port->dev_info);
2843 	if (ret != 0) {
2844 		fprintf(stderr, "Failed to get dev info for port = %u\n",
2845 			bond_pid);
2846 		return;
2847 	}
2848 
2849 	if (port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
2850 		port->dev_conf.txmode.offloads |=
2851 				RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
2852 	/* Apply Tx offloads configuration */
2853 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
2854 		port->txq[i].conf.offloads = port->dev_conf.txmode.offloads;
2855 
2856 	port->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
2857 				port->dev_info.flow_type_rss_offloads;
2858 #else
2859 	RTE_SET_USED(bond_pid);
2860 #endif
2861 }
2862 
2863 int
2864 start_port(portid_t pid)
2865 {
2866 	int diag, need_check_link_status = -1;
2867 	portid_t pi;
2868 	portid_t p_pi = RTE_MAX_ETHPORTS;
2869 	portid_t pl[RTE_MAX_ETHPORTS];
2870 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2871 	uint16_t cnt_pi = 0;
2872 	uint16_t cfg_pi = 0;
2873 	int peer_pi;
2874 	queueid_t qi;
2875 	struct rte_port *port;
2876 	struct rte_eth_hairpin_cap cap;
2877 
2878 	if (port_id_is_invalid(pid, ENABLED_WARN))
2879 		return 0;
2880 
2881 	RTE_ETH_FOREACH_DEV(pi) {
2882 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2883 			continue;
2884 
2885 		if (port_is_bonding_slave(pi)) {
2886 			fprintf(stderr,
2887 				"Please remove port %d from bonded device.\n",
2888 				pi);
2889 			continue;
2890 		}
2891 
2892 		need_check_link_status = 0;
2893 		port = &ports[pi];
2894 		if (port->port_status == RTE_PORT_STOPPED)
2895 			port->port_status = RTE_PORT_HANDLING;
2896 		else {
2897 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2898 			continue;
2899 		}
2900 
2901 		if (port->need_reconfig > 0) {
2902 			struct rte_eth_conf dev_conf;
2903 			int k;
2904 
2905 			port->need_reconfig = 0;
2906 
2907 			if (flow_isolate_all) {
2908 				int ret = port_flow_isolate(pi, 1);
2909 				if (ret) {
2910 					fprintf(stderr,
2911 						"Failed to apply isolated mode on port %d\n",
2912 						pi);
2913 					return -1;
2914 				}
2915 			}
2916 			configure_rxtx_dump_callbacks(0);
2917 			printf("Configuring Port %d (socket %u)\n", pi,
2918 					port->socket_id);
2919 			if (nb_hairpinq > 0 &&
2920 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2921 				fprintf(stderr,
2922 					"Port %d doesn't support hairpin queues\n",
2923 					pi);
2924 				return -1;
2925 			}
2926 
2927 			if (port->bond_flag == 1 && port->update_conf == 1) {
2928 				update_bonding_port_dev_conf(pi);
2929 				port->update_conf = 0;
2930 			}
2931 
2932 			/* configure port */
2933 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2934 						     nb_txq + nb_hairpinq,
2935 						     &(port->dev_conf));
2936 			if (diag != 0) {
2937 				if (port->port_status == RTE_PORT_HANDLING)
2938 					port->port_status = RTE_PORT_STOPPED;
2939 				else
2940 					fprintf(stderr,
2941 						"Port %d can not be set back to stopped\n",
2942 						pi);
2943 				fprintf(stderr, "Fail to configure port %d\n",
2944 					pi);
2945 				/* try to reconfigure port next time */
2946 				port->need_reconfig = 1;
2947 				return -1;
2948 			}
2949 			/* get device configuration*/
2950 			if (0 !=
2951 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
2952 				fprintf(stderr,
2953 					"port %d can not get device configuration\n",
2954 					pi);
2955 				return -1;
2956 			}
2957 			/* Apply Rx offloads configuration */
2958 			if (dev_conf.rxmode.offloads !=
2959 			    port->dev_conf.rxmode.offloads) {
2960 				port->dev_conf.rxmode.offloads |=
2961 					dev_conf.rxmode.offloads;
2962 				for (k = 0;
2963 				     k < port->dev_info.max_rx_queues;
2964 				     k++)
2965 					port->rxq[k].conf.offloads |=
2966 						dev_conf.rxmode.offloads;
2967 			}
2968 			/* Apply Tx offloads configuration */
2969 			if (dev_conf.txmode.offloads !=
2970 			    port->dev_conf.txmode.offloads) {
2971 				port->dev_conf.txmode.offloads |=
2972 					dev_conf.txmode.offloads;
2973 				for (k = 0;
2974 				     k < port->dev_info.max_tx_queues;
2975 				     k++)
2976 					port->txq[k].conf.offloads |=
2977 						dev_conf.txmode.offloads;
2978 			}
2979 		}
2980 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2981 			port->need_reconfig_queues = 0;
2982 			/* setup tx queues */
2983 			for (qi = 0; qi < nb_txq; qi++) {
2984 				struct rte_eth_txconf *conf =
2985 							&port->txq[qi].conf;
2986 
2987 				if ((numa_support) &&
2988 					(txring_numa[pi] != NUMA_NO_CONFIG))
2989 					diag = rte_eth_tx_queue_setup(pi, qi,
2990 						port->nb_tx_desc[qi],
2991 						txring_numa[pi],
2992 						&(port->txq[qi].conf));
2993 				else
2994 					diag = rte_eth_tx_queue_setup(pi, qi,
2995 						port->nb_tx_desc[qi],
2996 						port->socket_id,
2997 						&(port->txq[qi].conf));
2998 
2999 				if (diag == 0) {
3000 					port->txq[qi].state =
3001 						conf->tx_deferred_start ?
3002 						RTE_ETH_QUEUE_STATE_STOPPED :
3003 						RTE_ETH_QUEUE_STATE_STARTED;
3004 					continue;
3005 				}
3006 
3007 				/* Fail to setup tx queue, return */
3008 				if (port->port_status == RTE_PORT_HANDLING)
3009 					port->port_status = RTE_PORT_STOPPED;
3010 				else
3011 					fprintf(stderr,
3012 						"Port %d can not be set back to stopped\n",
3013 						pi);
3014 				fprintf(stderr,
3015 					"Fail to configure port %d tx queues\n",
3016 					pi);
3017 				/* try to reconfigure queues next time */
3018 				port->need_reconfig_queues = 1;
3019 				return -1;
3020 			}
3021 			for (qi = 0; qi < nb_rxq; qi++) {
3022 				/* setup rx queues */
3023 				if ((numa_support) &&
3024 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
3025 					struct rte_mempool * mp =
3026 						mbuf_pool_find
3027 							(rxring_numa[pi], 0);
3028 					if (mp == NULL) {
3029 						fprintf(stderr,
3030 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
3031 							rxring_numa[pi]);
3032 						return -1;
3033 					}
3034 
3035 					diag = rx_queue_setup(pi, qi,
3036 					     port->nb_rx_desc[qi],
3037 					     rxring_numa[pi],
3038 					     &(port->rxq[qi].conf),
3039 					     mp);
3040 				} else {
3041 					struct rte_mempool *mp =
3042 						mbuf_pool_find
3043 							(port->socket_id, 0);
3044 					if (mp == NULL) {
3045 						fprintf(stderr,
3046 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
3047 							port->socket_id);
3048 						return -1;
3049 					}
3050 					diag = rx_queue_setup(pi, qi,
3051 					     port->nb_rx_desc[qi],
3052 					     port->socket_id,
3053 					     &(port->rxq[qi].conf),
3054 					     mp);
3055 				}
3056 				if (diag == 0)
3057 					continue;
3058 
3059 				/* Fail to setup rx queue, return */
3060 				if (port->port_status == RTE_PORT_HANDLING)
3061 					port->port_status = RTE_PORT_STOPPED;
3062 				else
3063 					fprintf(stderr,
3064 						"Port %d can not be set back to stopped\n",
3065 						pi);
3066 				fprintf(stderr,
3067 					"Fail to configure port %d rx queues\n",
3068 					pi);
3069 				/* try to reconfigure queues next time */
3070 				port->need_reconfig_queues = 1;
3071 				return -1;
3072 			}
3073 			/* setup hairpin queues */
3074 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
3075 				return -1;
3076 		}
3077 		configure_rxtx_dump_callbacks(verbose_level);
3078 		if (clear_ptypes) {
3079 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
3080 					NULL, 0);
3081 			if (diag < 0)
3082 				fprintf(stderr,
3083 					"Port %d: Failed to disable Ptype parsing\n",
3084 					pi);
3085 		}
3086 
3087 		p_pi = pi;
3088 		cnt_pi++;
3089 
3090 		/* start port */
3091 		diag = eth_dev_start_mp(pi);
3092 		if (diag < 0) {
3093 			fprintf(stderr, "Fail to start port %d: %s\n",
3094 				pi, rte_strerror(-diag));
3095 
3096 			/* Fail to setup rx queue, return */
3097 			if (port->port_status == RTE_PORT_HANDLING)
3098 				port->port_status = RTE_PORT_STOPPED;
3099 			else
3100 				fprintf(stderr,
3101 					"Port %d can not be set back to stopped\n",
3102 					pi);
3103 			continue;
3104 		}
3105 
3106 		if (port->port_status == RTE_PORT_HANDLING)
3107 			port->port_status = RTE_PORT_STARTED;
3108 		else
3109 			fprintf(stderr, "Port %d can not be set into started\n",
3110 				pi);
3111 
3112 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
3113 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
3114 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
3115 
3116 		/* at least one port started, need checking link status */
3117 		need_check_link_status = 1;
3118 
3119 		pl[cfg_pi++] = pi;
3120 	}
3121 
3122 	if (need_check_link_status == 1 && !no_link_check)
3123 		check_all_ports_link_status(RTE_PORT_ALL);
3124 	else if (need_check_link_status == 0)
3125 		fprintf(stderr, "Please stop the ports first\n");
3126 
3127 	if (hairpin_mode & 0xf) {
3128 		uint16_t i;
3129 		int j;
3130 
3131 		/* bind all started hairpin ports */
3132 		for (i = 0; i < cfg_pi; i++) {
3133 			pi = pl[i];
3134 			/* bind current Tx to all peer Rx */
3135 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3136 							RTE_MAX_ETHPORTS, 1);
3137 			if (peer_pi < 0)
3138 				return peer_pi;
3139 			for (j = 0; j < peer_pi; j++) {
3140 				if (!port_is_started(peer_pl[j]))
3141 					continue;
3142 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
3143 				if (diag < 0) {
3144 					fprintf(stderr,
3145 						"Error during binding hairpin Tx port %u to %u: %s\n",
3146 						pi, peer_pl[j],
3147 						rte_strerror(-diag));
3148 					return -1;
3149 				}
3150 			}
3151 			/* bind all peer Tx to current Rx */
3152 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3153 							RTE_MAX_ETHPORTS, 0);
3154 			if (peer_pi < 0)
3155 				return peer_pi;
3156 			for (j = 0; j < peer_pi; j++) {
3157 				if (!port_is_started(peer_pl[j]))
3158 					continue;
3159 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
3160 				if (diag < 0) {
3161 					fprintf(stderr,
3162 						"Error during binding hairpin Tx port %u to %u: %s\n",
3163 						peer_pl[j], pi,
3164 						rte_strerror(-diag));
3165 					return -1;
3166 				}
3167 			}
3168 		}
3169 	}
3170 
3171 	fill_xstats_display_info_for_port(pid);
3172 
3173 	printf("Done\n");
3174 	return 0;
3175 }
3176 
3177 void
3178 stop_port(portid_t pid)
3179 {
3180 	portid_t pi;
3181 	struct rte_port *port;
3182 	int need_check_link_status = 0;
3183 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3184 	int peer_pi;
3185 
3186 	if (port_id_is_invalid(pid, ENABLED_WARN))
3187 		return;
3188 
3189 	printf("Stopping ports...\n");
3190 
3191 	RTE_ETH_FOREACH_DEV(pi) {
3192 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3193 			continue;
3194 
3195 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3196 			fprintf(stderr,
3197 				"Please remove port %d from forwarding configuration.\n",
3198 				pi);
3199 			continue;
3200 		}
3201 
3202 		if (port_is_bonding_slave(pi)) {
3203 			fprintf(stderr,
3204 				"Please remove port %d from bonded device.\n",
3205 				pi);
3206 			continue;
3207 		}
3208 
3209 		port = &ports[pi];
3210 		if (port->port_status == RTE_PORT_STARTED)
3211 			port->port_status = RTE_PORT_HANDLING;
3212 		else
3213 			continue;
3214 
3215 		if (hairpin_mode & 0xf) {
3216 			int j;
3217 
3218 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3219 			/* unbind all peer Tx from current Rx */
3220 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3221 							RTE_MAX_ETHPORTS, 0);
3222 			if (peer_pi < 0)
3223 				continue;
3224 			for (j = 0; j < peer_pi; j++) {
3225 				if (!port_is_started(peer_pl[j]))
3226 					continue;
3227 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3228 			}
3229 		}
3230 
3231 		if (port->flow_list)
3232 			port_flow_flush(pi);
3233 
3234 		if (eth_dev_stop_mp(pi) != 0)
3235 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3236 				pi);
3237 
3238 		if (port->port_status == RTE_PORT_HANDLING)
3239 			port->port_status = RTE_PORT_STOPPED;
3240 		else
3241 			fprintf(stderr, "Port %d can not be set into stopped\n",
3242 				pi);
3243 		need_check_link_status = 1;
3244 	}
3245 	if (need_check_link_status && !no_link_check)
3246 		check_all_ports_link_status(RTE_PORT_ALL);
3247 
3248 	printf("Done\n");
3249 }
3250 
3251 static void
3252 remove_invalid_ports_in(portid_t *array, portid_t *total)
3253 {
3254 	portid_t i;
3255 	portid_t new_total = 0;
3256 
3257 	for (i = 0; i < *total; i++)
3258 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3259 			array[new_total] = array[i];
3260 			new_total++;
3261 		}
3262 	*total = new_total;
3263 }
3264 
3265 static void
3266 remove_invalid_ports(void)
3267 {
3268 	remove_invalid_ports_in(ports_ids, &nb_ports);
3269 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3270 	nb_cfg_ports = nb_fwd_ports;
3271 }
3272 
3273 static void
3274 flush_port_owned_resources(portid_t pi)
3275 {
3276 	mcast_addr_pool_destroy(pi);
3277 	port_flow_flush(pi);
3278 	port_flex_item_flush(pi);
3279 	port_flow_template_table_flush(pi);
3280 	port_flow_pattern_template_flush(pi);
3281 	port_flow_actions_template_flush(pi);
3282 	port_action_handle_flush(pi);
3283 }
3284 
3285 static void
3286 clear_bonding_slave_device(portid_t *slave_pids, uint16_t num_slaves)
3287 {
3288 	struct rte_port *port;
3289 	portid_t slave_pid;
3290 	uint16_t i;
3291 
3292 	for (i = 0; i < num_slaves; i++) {
3293 		slave_pid = slave_pids[i];
3294 		if (port_is_started(slave_pid) == 1) {
3295 			if (rte_eth_dev_stop(slave_pid) != 0)
3296 				fprintf(stderr, "rte_eth_dev_stop failed for port %u\n",
3297 					slave_pid);
3298 
3299 			port = &ports[slave_pid];
3300 			port->port_status = RTE_PORT_STOPPED;
3301 		}
3302 
3303 		clear_port_slave_flag(slave_pid);
3304 
3305 		/* Close slave device when testpmd quit or is killed. */
3306 		if (cl_quit == 1 || f_quit == 1)
3307 			rte_eth_dev_close(slave_pid);
3308 	}
3309 }
3310 
3311 void
3312 close_port(portid_t pid)
3313 {
3314 	portid_t pi;
3315 	struct rte_port *port;
3316 	portid_t slave_pids[RTE_MAX_ETHPORTS];
3317 	int num_slaves = 0;
3318 
3319 	if (port_id_is_invalid(pid, ENABLED_WARN))
3320 		return;
3321 
3322 	printf("Closing ports...\n");
3323 
3324 	RTE_ETH_FOREACH_DEV(pi) {
3325 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3326 			continue;
3327 
3328 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3329 			fprintf(stderr,
3330 				"Please remove port %d from forwarding configuration.\n",
3331 				pi);
3332 			continue;
3333 		}
3334 
3335 		if (port_is_bonding_slave(pi)) {
3336 			fprintf(stderr,
3337 				"Please remove port %d from bonded device.\n",
3338 				pi);
3339 			continue;
3340 		}
3341 
3342 		port = &ports[pi];
3343 		if (port->port_status == RTE_PORT_CLOSED) {
3344 			fprintf(stderr, "Port %d is already closed\n", pi);
3345 			continue;
3346 		}
3347 
3348 		if (is_proc_primary()) {
3349 			flush_port_owned_resources(pi);
3350 #ifdef RTE_NET_BOND
3351 			if (port->bond_flag == 1)
3352 				num_slaves = rte_eth_bond_slaves_get(pi,
3353 						slave_pids, RTE_MAX_ETHPORTS);
3354 #endif
3355 			rte_eth_dev_close(pi);
3356 			/*
3357 			 * If this port is bonded device, all slaves under the
3358 			 * device need to be removed or closed.
3359 			 */
3360 			if (port->bond_flag == 1 && num_slaves > 0)
3361 				clear_bonding_slave_device(slave_pids,
3362 							num_slaves);
3363 		}
3364 
3365 		free_xstats_display_info(pi);
3366 	}
3367 
3368 	remove_invalid_ports();
3369 	printf("Done\n");
3370 }
3371 
3372 void
3373 reset_port(portid_t pid)
3374 {
3375 	int diag;
3376 	portid_t pi;
3377 	struct rte_port *port;
3378 
3379 	if (port_id_is_invalid(pid, ENABLED_WARN))
3380 		return;
3381 
3382 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3383 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3384 		fprintf(stderr,
3385 			"Can not reset port(s), please stop port(s) first.\n");
3386 		return;
3387 	}
3388 
3389 	printf("Resetting ports...\n");
3390 
3391 	RTE_ETH_FOREACH_DEV(pi) {
3392 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3393 			continue;
3394 
3395 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3396 			fprintf(stderr,
3397 				"Please remove port %d from forwarding configuration.\n",
3398 				pi);
3399 			continue;
3400 		}
3401 
3402 		if (port_is_bonding_slave(pi)) {
3403 			fprintf(stderr,
3404 				"Please remove port %d from bonded device.\n",
3405 				pi);
3406 			continue;
3407 		}
3408 
3409 		if (is_proc_primary()) {
3410 			diag = rte_eth_dev_reset(pi);
3411 			if (diag == 0) {
3412 				port = &ports[pi];
3413 				port->need_reconfig = 1;
3414 				port->need_reconfig_queues = 1;
3415 			} else {
3416 				fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3417 					pi, diag);
3418 			}
3419 		}
3420 	}
3421 
3422 	printf("Done\n");
3423 }
3424 
3425 void
3426 attach_port(char *identifier)
3427 {
3428 	portid_t pi;
3429 	struct rte_dev_iterator iterator;
3430 
3431 	printf("Attaching a new port...\n");
3432 
3433 	if (identifier == NULL) {
3434 		fprintf(stderr, "Invalid parameters are specified\n");
3435 		return;
3436 	}
3437 
3438 	if (rte_dev_probe(identifier) < 0) {
3439 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3440 		return;
3441 	}
3442 
3443 	/* first attach mode: event */
3444 	if (setup_on_probe_event) {
3445 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3446 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3447 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3448 					ports[pi].need_setup != 0)
3449 				setup_attached_port(pi);
3450 		return;
3451 	}
3452 
3453 	/* second attach mode: iterator */
3454 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3455 		/* setup ports matching the devargs used for probing */
3456 		if (port_is_forwarding(pi))
3457 			continue; /* port was already attached before */
3458 		setup_attached_port(pi);
3459 	}
3460 }
3461 
3462 static void
3463 setup_attached_port(portid_t pi)
3464 {
3465 	unsigned int socket_id;
3466 	int ret;
3467 
3468 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3469 	/* if socket_id is invalid, set to the first available socket. */
3470 	if (check_socket_id(socket_id) < 0)
3471 		socket_id = socket_ids[0];
3472 	reconfig(pi, socket_id);
3473 	ret = rte_eth_promiscuous_enable(pi);
3474 	if (ret != 0)
3475 		fprintf(stderr,
3476 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3477 			pi, rte_strerror(-ret));
3478 
3479 	ports_ids[nb_ports++] = pi;
3480 	fwd_ports_ids[nb_fwd_ports++] = pi;
3481 	nb_cfg_ports = nb_fwd_ports;
3482 	ports[pi].need_setup = 0;
3483 	ports[pi].port_status = RTE_PORT_STOPPED;
3484 
3485 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3486 	printf("Done\n");
3487 }
3488 
3489 static void
3490 detach_device(struct rte_device *dev)
3491 {
3492 	portid_t sibling;
3493 
3494 	if (dev == NULL) {
3495 		fprintf(stderr, "Device already removed\n");
3496 		return;
3497 	}
3498 
3499 	printf("Removing a device...\n");
3500 
3501 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3502 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3503 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3504 				fprintf(stderr, "Port %u not stopped\n",
3505 					sibling);
3506 				return;
3507 			}
3508 			flush_port_owned_resources(sibling);
3509 		}
3510 	}
3511 
3512 	if (rte_dev_remove(dev) < 0) {
3513 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", rte_dev_name(dev));
3514 		return;
3515 	}
3516 	remove_invalid_ports();
3517 
3518 	printf("Device is detached\n");
3519 	printf("Now total ports is %d\n", nb_ports);
3520 	printf("Done\n");
3521 	return;
3522 }
3523 
3524 void
3525 detach_port_device(portid_t port_id)
3526 {
3527 	int ret;
3528 	struct rte_eth_dev_info dev_info;
3529 
3530 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3531 		return;
3532 
3533 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3534 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3535 			fprintf(stderr, "Port not stopped\n");
3536 			return;
3537 		}
3538 		fprintf(stderr, "Port was not closed\n");
3539 	}
3540 
3541 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3542 	if (ret != 0) {
3543 		TESTPMD_LOG(ERR,
3544 			"Failed to get device info for port %d, not detaching\n",
3545 			port_id);
3546 		return;
3547 	}
3548 	detach_device(dev_info.device);
3549 }
3550 
3551 void
3552 detach_devargs(char *identifier)
3553 {
3554 	struct rte_dev_iterator iterator;
3555 	struct rte_devargs da;
3556 	portid_t port_id;
3557 
3558 	printf("Removing a device...\n");
3559 
3560 	memset(&da, 0, sizeof(da));
3561 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3562 		fprintf(stderr, "cannot parse identifier\n");
3563 		return;
3564 	}
3565 
3566 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3567 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3568 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3569 				fprintf(stderr, "Port %u not stopped\n",
3570 					port_id);
3571 				rte_eth_iterator_cleanup(&iterator);
3572 				rte_devargs_reset(&da);
3573 				return;
3574 			}
3575 			flush_port_owned_resources(port_id);
3576 		}
3577 	}
3578 
3579 	if (rte_eal_hotplug_remove(rte_bus_name(da.bus), da.name) != 0) {
3580 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3581 			    da.name, rte_bus_name(da.bus));
3582 		rte_devargs_reset(&da);
3583 		return;
3584 	}
3585 
3586 	remove_invalid_ports();
3587 
3588 	printf("Device %s is detached\n", identifier);
3589 	printf("Now total ports is %d\n", nb_ports);
3590 	printf("Done\n");
3591 	rte_devargs_reset(&da);
3592 }
3593 
3594 void
3595 pmd_test_exit(void)
3596 {
3597 	portid_t pt_id;
3598 	unsigned int i;
3599 	int ret;
3600 
3601 	if (test_done == 0)
3602 		stop_packet_forwarding();
3603 
3604 #ifndef RTE_EXEC_ENV_WINDOWS
3605 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3606 		if (mempools[i]) {
3607 			if (mp_alloc_type == MP_ALLOC_ANON)
3608 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3609 						     NULL);
3610 		}
3611 	}
3612 #endif
3613 	if (ports != NULL) {
3614 		no_link_check = 1;
3615 		RTE_ETH_FOREACH_DEV(pt_id) {
3616 			printf("\nStopping port %d...\n", pt_id);
3617 			fflush(stdout);
3618 			stop_port(pt_id);
3619 		}
3620 		RTE_ETH_FOREACH_DEV(pt_id) {
3621 			printf("\nShutting down port %d...\n", pt_id);
3622 			fflush(stdout);
3623 			close_port(pt_id);
3624 		}
3625 	}
3626 
3627 	if (hot_plug) {
3628 		ret = rte_dev_event_monitor_stop();
3629 		if (ret) {
3630 			RTE_LOG(ERR, EAL,
3631 				"fail to stop device event monitor.");
3632 			return;
3633 		}
3634 
3635 		ret = rte_dev_event_callback_unregister(NULL,
3636 			dev_event_callback, NULL);
3637 		if (ret < 0) {
3638 			RTE_LOG(ERR, EAL,
3639 				"fail to unregister device event callback.\n");
3640 			return;
3641 		}
3642 
3643 		ret = rte_dev_hotplug_handle_disable();
3644 		if (ret) {
3645 			RTE_LOG(ERR, EAL,
3646 				"fail to disable hotplug handling.\n");
3647 			return;
3648 		}
3649 	}
3650 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3651 		if (mempools[i])
3652 			mempool_free_mp(mempools[i]);
3653 	}
3654 	free(xstats_display);
3655 
3656 	printf("\nBye...\n");
3657 }
3658 
3659 typedef void (*cmd_func_t)(void);
3660 struct pmd_test_command {
3661 	const char *cmd_name;
3662 	cmd_func_t cmd_func;
3663 };
3664 
3665 /* Check the link status of all ports in up to 9s, and print them finally */
3666 static void
3667 check_all_ports_link_status(uint32_t port_mask)
3668 {
3669 #define CHECK_INTERVAL 100 /* 100ms */
3670 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3671 	portid_t portid;
3672 	uint8_t count, all_ports_up, print_flag = 0;
3673 	struct rte_eth_link link;
3674 	int ret;
3675 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3676 
3677 	printf("Checking link statuses...\n");
3678 	fflush(stdout);
3679 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3680 		all_ports_up = 1;
3681 		RTE_ETH_FOREACH_DEV(portid) {
3682 			if ((port_mask & (1 << portid)) == 0)
3683 				continue;
3684 			memset(&link, 0, sizeof(link));
3685 			ret = rte_eth_link_get_nowait(portid, &link);
3686 			if (ret < 0) {
3687 				all_ports_up = 0;
3688 				if (print_flag == 1)
3689 					fprintf(stderr,
3690 						"Port %u link get failed: %s\n",
3691 						portid, rte_strerror(-ret));
3692 				continue;
3693 			}
3694 			/* print link status if flag set */
3695 			if (print_flag == 1) {
3696 				rte_eth_link_to_str(link_status,
3697 					sizeof(link_status), &link);
3698 				printf("Port %d %s\n", portid, link_status);
3699 				continue;
3700 			}
3701 			/* clear all_ports_up flag if any link down */
3702 			if (link.link_status == RTE_ETH_LINK_DOWN) {
3703 				all_ports_up = 0;
3704 				break;
3705 			}
3706 		}
3707 		/* after finally printing all link status, get out */
3708 		if (print_flag == 1)
3709 			break;
3710 
3711 		if (all_ports_up == 0) {
3712 			fflush(stdout);
3713 			rte_delay_ms(CHECK_INTERVAL);
3714 		}
3715 
3716 		/* set the print_flag if all ports up or timeout */
3717 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3718 			print_flag = 1;
3719 		}
3720 
3721 		if (lsc_interrupt)
3722 			break;
3723 	}
3724 }
3725 
3726 static void
3727 rmv_port_callback(void *arg)
3728 {
3729 	int need_to_start = 0;
3730 	int org_no_link_check = no_link_check;
3731 	portid_t port_id = (intptr_t)arg;
3732 	struct rte_eth_dev_info dev_info;
3733 	int ret;
3734 
3735 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3736 
3737 	if (!test_done && port_is_forwarding(port_id)) {
3738 		need_to_start = 1;
3739 		stop_packet_forwarding();
3740 	}
3741 	no_link_check = 1;
3742 	stop_port(port_id);
3743 	no_link_check = org_no_link_check;
3744 
3745 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3746 	if (ret != 0)
3747 		TESTPMD_LOG(ERR,
3748 			"Failed to get device info for port %d, not detaching\n",
3749 			port_id);
3750 	else {
3751 		struct rte_device *device = dev_info.device;
3752 		close_port(port_id);
3753 		detach_device(device); /* might be already removed or have more ports */
3754 	}
3755 	if (need_to_start)
3756 		start_packet_forwarding(0);
3757 }
3758 
3759 /* This function is used by the interrupt thread */
3760 static int
3761 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3762 		  void *ret_param)
3763 {
3764 	RTE_SET_USED(param);
3765 	RTE_SET_USED(ret_param);
3766 
3767 	if (type >= RTE_ETH_EVENT_MAX) {
3768 		fprintf(stderr,
3769 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3770 			port_id, __func__, type);
3771 		fflush(stderr);
3772 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3773 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3774 			eth_event_desc[type]);
3775 		fflush(stdout);
3776 	}
3777 
3778 	switch (type) {
3779 	case RTE_ETH_EVENT_NEW:
3780 		ports[port_id].need_setup = 1;
3781 		ports[port_id].port_status = RTE_PORT_HANDLING;
3782 		break;
3783 	case RTE_ETH_EVENT_INTR_RMV:
3784 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3785 			break;
3786 		if (rte_eal_alarm_set(100000,
3787 				rmv_port_callback, (void *)(intptr_t)port_id))
3788 			fprintf(stderr,
3789 				"Could not set up deferred device removal\n");
3790 		break;
3791 	case RTE_ETH_EVENT_DESTROY:
3792 		ports[port_id].port_status = RTE_PORT_CLOSED;
3793 		printf("Port %u is closed\n", port_id);
3794 		break;
3795 	case RTE_ETH_EVENT_RX_AVAIL_THRESH: {
3796 		uint16_t rxq_id;
3797 		int ret;
3798 
3799 		/* avail_thresh query API rewinds rxq_id, no need to check max RxQ num */
3800 		for (rxq_id = 0; ; rxq_id++) {
3801 			ret = rte_eth_rx_avail_thresh_query(port_id, &rxq_id,
3802 							    NULL);
3803 			if (ret <= 0)
3804 				break;
3805 			printf("Received avail_thresh event, port: %u, rxq_id: %u\n",
3806 			       port_id, rxq_id);
3807 
3808 #ifdef RTE_NET_MLX5
3809 			mlx5_test_avail_thresh_event_handler(port_id, rxq_id);
3810 #endif
3811 		}
3812 		break;
3813 	}
3814 	default:
3815 		break;
3816 	}
3817 	return 0;
3818 }
3819 
3820 static int
3821 register_eth_event_callback(void)
3822 {
3823 	int ret;
3824 	enum rte_eth_event_type event;
3825 
3826 	for (event = RTE_ETH_EVENT_UNKNOWN;
3827 			event < RTE_ETH_EVENT_MAX; event++) {
3828 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3829 				event,
3830 				eth_event_callback,
3831 				NULL);
3832 		if (ret != 0) {
3833 			TESTPMD_LOG(ERR, "Failed to register callback for "
3834 					"%s event\n", eth_event_desc[event]);
3835 			return -1;
3836 		}
3837 	}
3838 
3839 	return 0;
3840 }
3841 
3842 /* This function is used by the interrupt thread */
3843 static void
3844 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3845 			     __rte_unused void *arg)
3846 {
3847 	uint16_t port_id;
3848 	int ret;
3849 
3850 	if (type >= RTE_DEV_EVENT_MAX) {
3851 		fprintf(stderr, "%s called upon invalid event %d\n",
3852 			__func__, type);
3853 		fflush(stderr);
3854 	}
3855 
3856 	switch (type) {
3857 	case RTE_DEV_EVENT_REMOVE:
3858 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3859 			device_name);
3860 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3861 		if (ret) {
3862 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3863 				device_name);
3864 			return;
3865 		}
3866 		/*
3867 		 * Because the user's callback is invoked in eal interrupt
3868 		 * callback, the interrupt callback need to be finished before
3869 		 * it can be unregistered when detaching device. So finish
3870 		 * callback soon and use a deferred removal to detach device
3871 		 * is need. It is a workaround, once the device detaching be
3872 		 * moved into the eal in the future, the deferred removal could
3873 		 * be deleted.
3874 		 */
3875 		if (rte_eal_alarm_set(100000,
3876 				rmv_port_callback, (void *)(intptr_t)port_id))
3877 			RTE_LOG(ERR, EAL,
3878 				"Could not set up deferred device removal\n");
3879 		break;
3880 	case RTE_DEV_EVENT_ADD:
3881 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3882 			device_name);
3883 		/* TODO: After finish kernel driver binding,
3884 		 * begin to attach port.
3885 		 */
3886 		break;
3887 	default:
3888 		break;
3889 	}
3890 }
3891 
3892 static void
3893 rxtx_port_config(portid_t pid)
3894 {
3895 	uint16_t qid;
3896 	uint64_t offloads;
3897 	struct rte_port *port = &ports[pid];
3898 
3899 	for (qid = 0; qid < nb_rxq; qid++) {
3900 		offloads = port->rxq[qid].conf.offloads;
3901 		port->rxq[qid].conf = port->dev_info.default_rxconf;
3902 
3903 		if (rxq_share > 0 &&
3904 		    (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3905 			/* Non-zero share group to enable RxQ share. */
3906 			port->rxq[qid].conf.share_group = pid / rxq_share + 1;
3907 			port->rxq[qid].conf.share_qid = qid; /* Equal mapping. */
3908 		}
3909 
3910 		if (offloads != 0)
3911 			port->rxq[qid].conf.offloads = offloads;
3912 
3913 		/* Check if any Rx parameters have been passed */
3914 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3915 			port->rxq[qid].conf.rx_thresh.pthresh = rx_pthresh;
3916 
3917 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3918 			port->rxq[qid].conf.rx_thresh.hthresh = rx_hthresh;
3919 
3920 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3921 			port->rxq[qid].conf.rx_thresh.wthresh = rx_wthresh;
3922 
3923 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3924 			port->rxq[qid].conf.rx_free_thresh = rx_free_thresh;
3925 
3926 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3927 			port->rxq[qid].conf.rx_drop_en = rx_drop_en;
3928 
3929 		port->nb_rx_desc[qid] = nb_rxd;
3930 	}
3931 
3932 	for (qid = 0; qid < nb_txq; qid++) {
3933 		offloads = port->txq[qid].conf.offloads;
3934 		port->txq[qid].conf = port->dev_info.default_txconf;
3935 		if (offloads != 0)
3936 			port->txq[qid].conf.offloads = offloads;
3937 
3938 		/* Check if any Tx parameters have been passed */
3939 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3940 			port->txq[qid].conf.tx_thresh.pthresh = tx_pthresh;
3941 
3942 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3943 			port->txq[qid].conf.tx_thresh.hthresh = tx_hthresh;
3944 
3945 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3946 			port->txq[qid].conf.tx_thresh.wthresh = tx_wthresh;
3947 
3948 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3949 			port->txq[qid].conf.tx_rs_thresh = tx_rs_thresh;
3950 
3951 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3952 			port->txq[qid].conf.tx_free_thresh = tx_free_thresh;
3953 
3954 		port->nb_tx_desc[qid] = nb_txd;
3955 	}
3956 }
3957 
3958 /*
3959  * Helper function to set MTU from frame size
3960  *
3961  * port->dev_info should be set before calling this function.
3962  *
3963  * return 0 on success, negative on error
3964  */
3965 int
3966 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3967 {
3968 	struct rte_port *port = &ports[portid];
3969 	uint32_t eth_overhead;
3970 	uint16_t mtu, new_mtu;
3971 
3972 	eth_overhead = get_eth_overhead(&port->dev_info);
3973 
3974 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3975 		printf("Failed to get MTU for port %u\n", portid);
3976 		return -1;
3977 	}
3978 
3979 	new_mtu = max_rx_pktlen - eth_overhead;
3980 
3981 	if (mtu == new_mtu)
3982 		return 0;
3983 
3984 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3985 		fprintf(stderr,
3986 			"Failed to set MTU to %u for port %u\n",
3987 			new_mtu, portid);
3988 		return -1;
3989 	}
3990 
3991 	port->dev_conf.rxmode.mtu = new_mtu;
3992 
3993 	return 0;
3994 }
3995 
3996 void
3997 init_port_config(void)
3998 {
3999 	portid_t pid;
4000 	struct rte_port *port;
4001 	int ret, i;
4002 
4003 	RTE_ETH_FOREACH_DEV(pid) {
4004 		port = &ports[pid];
4005 
4006 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
4007 		if (ret != 0)
4008 			return;
4009 
4010 		if (nb_rxq > 1) {
4011 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
4012 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
4013 				rss_hf & port->dev_info.flow_type_rss_offloads;
4014 		} else {
4015 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
4016 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
4017 		}
4018 
4019 		if (port->dcb_flag == 0) {
4020 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
4021 				port->dev_conf.rxmode.mq_mode =
4022 					(enum rte_eth_rx_mq_mode)
4023 						(rx_mq_mode & RTE_ETH_MQ_RX_RSS);
4024 			} else {
4025 				port->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
4026 				port->dev_conf.rxmode.offloads &=
4027 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4028 
4029 				for (i = 0;
4030 				     i < port->dev_info.nb_rx_queues;
4031 				     i++)
4032 					port->rxq[i].conf.offloads &=
4033 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4034 			}
4035 		}
4036 
4037 		rxtx_port_config(pid);
4038 
4039 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
4040 		if (ret != 0)
4041 			return;
4042 
4043 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
4044 			port->dev_conf.intr_conf.lsc = 1;
4045 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
4046 			port->dev_conf.intr_conf.rmv = 1;
4047 	}
4048 }
4049 
4050 void set_port_slave_flag(portid_t slave_pid)
4051 {
4052 	struct rte_port *port;
4053 
4054 	port = &ports[slave_pid];
4055 	port->slave_flag = 1;
4056 }
4057 
4058 void clear_port_slave_flag(portid_t slave_pid)
4059 {
4060 	struct rte_port *port;
4061 
4062 	port = &ports[slave_pid];
4063 	port->slave_flag = 0;
4064 }
4065 
4066 uint8_t port_is_bonding_slave(portid_t slave_pid)
4067 {
4068 	struct rte_port *port;
4069 	struct rte_eth_dev_info dev_info;
4070 	int ret;
4071 
4072 	port = &ports[slave_pid];
4073 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
4074 	if (ret != 0) {
4075 		TESTPMD_LOG(ERR,
4076 			"Failed to get device info for port id %d,"
4077 			"cannot determine if the port is a bonded slave",
4078 			slave_pid);
4079 		return 0;
4080 	}
4081 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
4082 		return 1;
4083 	return 0;
4084 }
4085 
4086 const uint16_t vlan_tags[] = {
4087 		0,  1,  2,  3,  4,  5,  6,  7,
4088 		8,  9, 10, 11,  12, 13, 14, 15,
4089 		16, 17, 18, 19, 20, 21, 22, 23,
4090 		24, 25, 26, 27, 28, 29, 30, 31
4091 };
4092 
4093 static  int
4094 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
4095 		 enum dcb_mode_enable dcb_mode,
4096 		 enum rte_eth_nb_tcs num_tcs,
4097 		 uint8_t pfc_en)
4098 {
4099 	uint8_t i;
4100 	int32_t rc;
4101 	struct rte_eth_rss_conf rss_conf;
4102 
4103 	/*
4104 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
4105 	 * given above, and the number of traffic classes available for use.
4106 	 */
4107 	if (dcb_mode == DCB_VT_ENABLED) {
4108 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
4109 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
4110 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
4111 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
4112 
4113 		/* VMDQ+DCB RX and TX configurations */
4114 		vmdq_rx_conf->enable_default_pool = 0;
4115 		vmdq_rx_conf->default_pool = 0;
4116 		vmdq_rx_conf->nb_queue_pools =
4117 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
4118 		vmdq_tx_conf->nb_queue_pools =
4119 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
4120 
4121 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
4122 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
4123 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
4124 			vmdq_rx_conf->pool_map[i].pools =
4125 				1 << (i % vmdq_rx_conf->nb_queue_pools);
4126 		}
4127 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4128 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
4129 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
4130 		}
4131 
4132 		/* set DCB mode of RX and TX of multiple queues */
4133 		eth_conf->rxmode.mq_mode =
4134 				(enum rte_eth_rx_mq_mode)
4135 					(rx_mq_mode & RTE_ETH_MQ_RX_VMDQ_DCB);
4136 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_VMDQ_DCB;
4137 	} else {
4138 		struct rte_eth_dcb_rx_conf *rx_conf =
4139 				&eth_conf->rx_adv_conf.dcb_rx_conf;
4140 		struct rte_eth_dcb_tx_conf *tx_conf =
4141 				&eth_conf->tx_adv_conf.dcb_tx_conf;
4142 
4143 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
4144 
4145 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
4146 		if (rc != 0)
4147 			return rc;
4148 
4149 		rx_conf->nb_tcs = num_tcs;
4150 		tx_conf->nb_tcs = num_tcs;
4151 
4152 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4153 			rx_conf->dcb_tc[i] = i % num_tcs;
4154 			tx_conf->dcb_tc[i] = i % num_tcs;
4155 		}
4156 
4157 		eth_conf->rxmode.mq_mode =
4158 				(enum rte_eth_rx_mq_mode)
4159 					(rx_mq_mode & RTE_ETH_MQ_RX_DCB_RSS);
4160 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
4161 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_DCB;
4162 	}
4163 
4164 	if (pfc_en)
4165 		eth_conf->dcb_capability_en =
4166 				RTE_ETH_DCB_PG_SUPPORT | RTE_ETH_DCB_PFC_SUPPORT;
4167 	else
4168 		eth_conf->dcb_capability_en = RTE_ETH_DCB_PG_SUPPORT;
4169 
4170 	return 0;
4171 }
4172 
4173 int
4174 init_port_dcb_config(portid_t pid,
4175 		     enum dcb_mode_enable dcb_mode,
4176 		     enum rte_eth_nb_tcs num_tcs,
4177 		     uint8_t pfc_en)
4178 {
4179 	struct rte_eth_conf port_conf;
4180 	struct rte_port *rte_port;
4181 	int retval;
4182 	uint16_t i;
4183 
4184 	if (num_procs > 1) {
4185 		printf("The multi-process feature doesn't support dcb.\n");
4186 		return -ENOTSUP;
4187 	}
4188 	rte_port = &ports[pid];
4189 
4190 	/* retain the original device configuration. */
4191 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
4192 
4193 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
4194 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
4195 	if (retval < 0)
4196 		return retval;
4197 	port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4198 	/* remove RSS HASH offload for DCB in vt mode */
4199 	if (port_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_VMDQ_DCB) {
4200 		port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4201 		for (i = 0; i < nb_rxq; i++)
4202 			rte_port->rxq[i].conf.offloads &=
4203 				~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4204 	}
4205 
4206 	/* re-configure the device . */
4207 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
4208 	if (retval < 0)
4209 		return retval;
4210 
4211 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
4212 	if (retval != 0)
4213 		return retval;
4214 
4215 	/* If dev_info.vmdq_pool_base is greater than 0,
4216 	 * the queue id of vmdq pools is started after pf queues.
4217 	 */
4218 	if (dcb_mode == DCB_VT_ENABLED &&
4219 	    rte_port->dev_info.vmdq_pool_base > 0) {
4220 		fprintf(stderr,
4221 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
4222 			pid);
4223 		return -1;
4224 	}
4225 
4226 	/* Assume the ports in testpmd have the same dcb capability
4227 	 * and has the same number of rxq and txq in dcb mode
4228 	 */
4229 	if (dcb_mode == DCB_VT_ENABLED) {
4230 		if (rte_port->dev_info.max_vfs > 0) {
4231 			nb_rxq = rte_port->dev_info.nb_rx_queues;
4232 			nb_txq = rte_port->dev_info.nb_tx_queues;
4233 		} else {
4234 			nb_rxq = rte_port->dev_info.max_rx_queues;
4235 			nb_txq = rte_port->dev_info.max_tx_queues;
4236 		}
4237 	} else {
4238 		/*if vt is disabled, use all pf queues */
4239 		if (rte_port->dev_info.vmdq_pool_base == 0) {
4240 			nb_rxq = rte_port->dev_info.max_rx_queues;
4241 			nb_txq = rte_port->dev_info.max_tx_queues;
4242 		} else {
4243 			nb_rxq = (queueid_t)num_tcs;
4244 			nb_txq = (queueid_t)num_tcs;
4245 
4246 		}
4247 	}
4248 	rx_free_thresh = 64;
4249 
4250 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
4251 
4252 	rxtx_port_config(pid);
4253 	/* VLAN filter */
4254 	rte_port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4255 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
4256 		rx_vft_set(pid, vlan_tags[i], 1);
4257 
4258 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4259 	if (retval != 0)
4260 		return retval;
4261 
4262 	rte_port->dcb_flag = 1;
4263 
4264 	/* Enter DCB configuration status */
4265 	dcb_config = 1;
4266 
4267 	return 0;
4268 }
4269 
4270 static void
4271 init_port(void)
4272 {
4273 	int i;
4274 
4275 	/* Configuration of Ethernet ports. */
4276 	ports = rte_zmalloc("testpmd: ports",
4277 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4278 			    RTE_CACHE_LINE_SIZE);
4279 	if (ports == NULL) {
4280 		rte_exit(EXIT_FAILURE,
4281 				"rte_zmalloc(%d struct rte_port) failed\n",
4282 				RTE_MAX_ETHPORTS);
4283 	}
4284 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
4285 		ports[i].fwd_mac_swap = 1;
4286 		ports[i].xstats_info.allocated = false;
4287 		LIST_INIT(&ports[i].flow_tunnel_list);
4288 	}
4289 	/* Initialize ports NUMA structures */
4290 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4291 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4292 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4293 }
4294 
4295 static void
4296 force_quit(void)
4297 {
4298 	pmd_test_exit();
4299 	prompt_exit();
4300 }
4301 
4302 static void
4303 print_stats(void)
4304 {
4305 	uint8_t i;
4306 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4307 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4308 
4309 	/* Clear screen and move to top left */
4310 	printf("%s%s", clr, top_left);
4311 
4312 	printf("\nPort statistics ====================================");
4313 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4314 		nic_stats_display(fwd_ports_ids[i]);
4315 
4316 	fflush(stdout);
4317 }
4318 
4319 static void
4320 signal_handler(int signum)
4321 {
4322 	if (signum == SIGINT || signum == SIGTERM) {
4323 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4324 			signum);
4325 #ifdef RTE_LIB_PDUMP
4326 		/* uninitialize packet capture framework */
4327 		rte_pdump_uninit();
4328 #endif
4329 #ifdef RTE_LIB_LATENCYSTATS
4330 		if (latencystats_enabled != 0)
4331 			rte_latencystats_uninit();
4332 #endif
4333 		force_quit();
4334 		/* Set flag to indicate the force termination. */
4335 		f_quit = 1;
4336 		/* exit with the expected status */
4337 #ifndef RTE_EXEC_ENV_WINDOWS
4338 		signal(signum, SIG_DFL);
4339 		kill(getpid(), signum);
4340 #endif
4341 	}
4342 }
4343 
4344 int
4345 main(int argc, char** argv)
4346 {
4347 	int diag;
4348 	portid_t port_id;
4349 	uint16_t count;
4350 	int ret;
4351 
4352 	signal(SIGINT, signal_handler);
4353 	signal(SIGTERM, signal_handler);
4354 
4355 	testpmd_logtype = rte_log_register("testpmd");
4356 	if (testpmd_logtype < 0)
4357 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4358 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4359 
4360 	diag = rte_eal_init(argc, argv);
4361 	if (diag < 0)
4362 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4363 			 rte_strerror(rte_errno));
4364 
4365 	ret = register_eth_event_callback();
4366 	if (ret != 0)
4367 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4368 
4369 #ifdef RTE_LIB_PDUMP
4370 	/* initialize packet capture framework */
4371 	rte_pdump_init();
4372 #endif
4373 
4374 	count = 0;
4375 	RTE_ETH_FOREACH_DEV(port_id) {
4376 		ports_ids[count] = port_id;
4377 		count++;
4378 	}
4379 	nb_ports = (portid_t) count;
4380 	if (nb_ports == 0)
4381 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4382 
4383 	/* allocate port structures, and init them */
4384 	init_port();
4385 
4386 	set_def_fwd_config();
4387 	if (nb_lcores == 0)
4388 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4389 			 "Check the core mask argument\n");
4390 
4391 	/* Bitrate/latency stats disabled by default */
4392 #ifdef RTE_LIB_BITRATESTATS
4393 	bitrate_enabled = 0;
4394 #endif
4395 #ifdef RTE_LIB_LATENCYSTATS
4396 	latencystats_enabled = 0;
4397 #endif
4398 
4399 	/* on FreeBSD, mlockall() is disabled by default */
4400 #ifdef RTE_EXEC_ENV_FREEBSD
4401 	do_mlockall = 0;
4402 #else
4403 	do_mlockall = 1;
4404 #endif
4405 
4406 	argc -= diag;
4407 	argv += diag;
4408 	if (argc > 1)
4409 		launch_args_parse(argc, argv);
4410 
4411 #ifndef RTE_EXEC_ENV_WINDOWS
4412 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4413 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4414 			strerror(errno));
4415 	}
4416 #endif
4417 
4418 	if (tx_first && interactive)
4419 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4420 				"interactive mode.\n");
4421 
4422 	if (tx_first && lsc_interrupt) {
4423 		fprintf(stderr,
4424 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4425 		lsc_interrupt = 0;
4426 	}
4427 
4428 	if (!nb_rxq && !nb_txq)
4429 		fprintf(stderr,
4430 			"Warning: Either rx or tx queues should be non-zero\n");
4431 
4432 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4433 		fprintf(stderr,
4434 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4435 			nb_rxq, nb_txq);
4436 
4437 	init_config();
4438 
4439 	if (hot_plug) {
4440 		ret = rte_dev_hotplug_handle_enable();
4441 		if (ret) {
4442 			RTE_LOG(ERR, EAL,
4443 				"fail to enable hotplug handling.");
4444 			return -1;
4445 		}
4446 
4447 		ret = rte_dev_event_monitor_start();
4448 		if (ret) {
4449 			RTE_LOG(ERR, EAL,
4450 				"fail to start device event monitoring.");
4451 			return -1;
4452 		}
4453 
4454 		ret = rte_dev_event_callback_register(NULL,
4455 			dev_event_callback, NULL);
4456 		if (ret) {
4457 			RTE_LOG(ERR, EAL,
4458 				"fail  to register device event callback\n");
4459 			return -1;
4460 		}
4461 	}
4462 
4463 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4464 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4465 
4466 	/* set all ports to promiscuous mode by default */
4467 	RTE_ETH_FOREACH_DEV(port_id) {
4468 		ret = rte_eth_promiscuous_enable(port_id);
4469 		if (ret != 0)
4470 			fprintf(stderr,
4471 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4472 				port_id, rte_strerror(-ret));
4473 	}
4474 
4475 #ifdef RTE_LIB_METRICS
4476 	/* Init metrics library */
4477 	rte_metrics_init(rte_socket_id());
4478 #endif
4479 
4480 #ifdef RTE_LIB_LATENCYSTATS
4481 	if (latencystats_enabled != 0) {
4482 		int ret = rte_latencystats_init(1, NULL);
4483 		if (ret)
4484 			fprintf(stderr,
4485 				"Warning: latencystats init() returned error %d\n",
4486 				ret);
4487 		fprintf(stderr, "Latencystats running on lcore %d\n",
4488 			latencystats_lcore_id);
4489 	}
4490 #endif
4491 
4492 	/* Setup bitrate stats */
4493 #ifdef RTE_LIB_BITRATESTATS
4494 	if (bitrate_enabled != 0) {
4495 		bitrate_data = rte_stats_bitrate_create();
4496 		if (bitrate_data == NULL)
4497 			rte_exit(EXIT_FAILURE,
4498 				"Could not allocate bitrate data.\n");
4499 		rte_stats_bitrate_reg(bitrate_data);
4500 	}
4501 #endif
4502 #ifdef RTE_LIB_CMDLINE
4503 	if (init_cmdline() != 0)
4504 		rte_exit(EXIT_FAILURE,
4505 			"Could not initialise cmdline context.\n");
4506 
4507 	if (strlen(cmdline_filename) != 0)
4508 		cmdline_read_from_file(cmdline_filename);
4509 
4510 	if (interactive == 1) {
4511 		if (auto_start) {
4512 			printf("Start automatic packet forwarding\n");
4513 			start_packet_forwarding(0);
4514 		}
4515 		prompt();
4516 		pmd_test_exit();
4517 	} else
4518 #endif
4519 	{
4520 		char c;
4521 		int rc;
4522 
4523 		f_quit = 0;
4524 
4525 		printf("No commandline core given, start packet forwarding\n");
4526 		start_packet_forwarding(tx_first);
4527 		if (stats_period != 0) {
4528 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4529 			uint64_t timer_period;
4530 
4531 			/* Convert to number of cycles */
4532 			timer_period = stats_period * rte_get_timer_hz();
4533 
4534 			while (f_quit == 0) {
4535 				cur_time = rte_get_timer_cycles();
4536 				diff_time += cur_time - prev_time;
4537 
4538 				if (diff_time >= timer_period) {
4539 					print_stats();
4540 					/* Reset the timer */
4541 					diff_time = 0;
4542 				}
4543 				/* Sleep to avoid unnecessary checks */
4544 				prev_time = cur_time;
4545 				rte_delay_us_sleep(US_PER_S);
4546 			}
4547 		}
4548 
4549 		printf("Press enter to exit\n");
4550 		rc = read(0, &c, 1);
4551 		pmd_test_exit();
4552 		if (rc < 0)
4553 			return 1;
4554 	}
4555 
4556 	ret = rte_eal_cleanup();
4557 	if (ret != 0)
4558 		rte_exit(EXIT_FAILURE,
4559 			 "EAL cleanup failed: %s\n", strerror(-ret));
4560 
4561 	return EXIT_SUCCESS;
4562 }
4563