xref: /dpdk/app/test-pmd/testpmd.c (revision 5028f207a4fa6d5cdd86019e43d2e2d80fa21ced)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_bus.h>
36 #include <rte_eal.h>
37 #include <rte_alarm.h>
38 #include <rte_per_lcore.h>
39 #include <rte_lcore.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_ether.h>
47 #include <rte_ethdev.h>
48 #include <rte_dev.h>
49 #include <rte_string_fns.h>
50 #ifdef RTE_NET_IXGBE
51 #include <rte_pmd_ixgbe.h>
52 #endif
53 #ifdef RTE_LIB_PDUMP
54 #include <rte_pdump.h>
55 #endif
56 #include <rte_flow.h>
57 #ifdef RTE_LIB_METRICS
58 #include <rte_metrics.h>
59 #endif
60 #ifdef RTE_LIB_BITRATESTATS
61 #include <rte_bitrate.h>
62 #endif
63 #ifdef RTE_LIB_LATENCYSTATS
64 #include <rte_latencystats.h>
65 #endif
66 #ifdef RTE_EXEC_ENV_WINDOWS
67 #include <process.h>
68 #endif
69 #ifdef RTE_NET_BOND
70 #include <rte_eth_bond.h>
71 #endif
72 #ifdef RTE_NET_MLX5
73 #include "mlx5_testpmd.h"
74 #endif
75 
76 #include "testpmd.h"
77 
78 #ifndef MAP_HUGETLB
79 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
80 #define HUGE_FLAG (0x40000)
81 #else
82 #define HUGE_FLAG MAP_HUGETLB
83 #endif
84 
85 #ifndef MAP_HUGE_SHIFT
86 /* older kernels (or FreeBSD) will not have this define */
87 #define HUGE_SHIFT (26)
88 #else
89 #define HUGE_SHIFT MAP_HUGE_SHIFT
90 #endif
91 
92 #define EXTMEM_HEAP_NAME "extmem"
93 /*
94  * Zone size with the malloc overhead (max of debug and release variants)
95  * must fit into the smallest supported hugepage size (2M),
96  * so that an IOVA-contiguous zone of this size can always be allocated
97  * if there are free 2M hugepages.
98  */
99 #define EXTBUF_ZONE_SIZE (RTE_PGSIZE_2M - 4 * RTE_CACHE_LINE_SIZE)
100 
101 uint16_t verbose_level = 0; /**< Silent by default. */
102 int testpmd_logtype; /**< Log type for testpmd logs */
103 
104 /* use main core for command line ? */
105 uint8_t interactive = 0;
106 uint8_t auto_start = 0;
107 uint8_t tx_first;
108 char cmdline_filename[PATH_MAX] = {0};
109 
110 /*
111  * NUMA support configuration.
112  * When set, the NUMA support attempts to dispatch the allocation of the
113  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
114  * probed ports among the CPU sockets 0 and 1.
115  * Otherwise, all memory is allocated from CPU socket 0.
116  */
117 uint8_t numa_support = 1; /**< numa enabled by default */
118 
119 /*
120  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
121  * not configured.
122  */
123 uint8_t socket_num = UMA_NO_CONFIG;
124 
125 /*
126  * Select mempool allocation type:
127  * - native: use regular DPDK memory
128  * - anon: use regular DPDK memory to create mempool, but populate using
129  *         anonymous memory (may not be IOVA-contiguous)
130  * - xmem: use externally allocated hugepage memory
131  */
132 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
133 
134 /*
135  * Store specified sockets on which memory pool to be used by ports
136  * is allocated.
137  */
138 uint8_t port_numa[RTE_MAX_ETHPORTS];
139 
140 /*
141  * Store specified sockets on which RX ring to be used by ports
142  * is allocated.
143  */
144 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
145 
146 /*
147  * Store specified sockets on which TX ring to be used by ports
148  * is allocated.
149  */
150 uint8_t txring_numa[RTE_MAX_ETHPORTS];
151 
152 /*
153  * Record the Ethernet address of peer target ports to which packets are
154  * forwarded.
155  * Must be instantiated with the ethernet addresses of peer traffic generator
156  * ports.
157  */
158 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
159 portid_t nb_peer_eth_addrs = 0;
160 
161 /*
162  * Probed Target Environment.
163  */
164 struct rte_port *ports;	       /**< For all probed ethernet ports. */
165 portid_t nb_ports;             /**< Number of probed ethernet ports. */
166 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
167 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
168 
169 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
170 
171 /*
172  * Test Forwarding Configuration.
173  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
174  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
175  */
176 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
177 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
178 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
179 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
180 
181 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
182 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
183 
184 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
185 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
186 
187 /*
188  * Forwarding engines.
189  */
190 struct fwd_engine * fwd_engines[] = {
191 	&io_fwd_engine,
192 	&mac_fwd_engine,
193 	&mac_swap_engine,
194 	&flow_gen_engine,
195 	&rx_only_engine,
196 	&tx_only_engine,
197 	&csum_fwd_engine,
198 	&icmp_echo_engine,
199 	&noisy_vnf_engine,
200 	&five_tuple_swap_fwd_engine,
201 #ifdef RTE_LIBRTE_IEEE1588
202 	&ieee1588_fwd_engine,
203 #endif
204 	&shared_rxq_engine,
205 	NULL,
206 };
207 
208 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
209 uint16_t mempool_flags;
210 
211 struct fwd_config cur_fwd_config;
212 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
213 uint32_t retry_enabled;
214 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
215 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
216 
217 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
218 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
219 	DEFAULT_MBUF_DATA_SIZE
220 }; /**< Mbuf data space size. */
221 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
222                                       * specified on command-line. */
223 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
224 
225 /** Extended statistics to show. */
226 struct rte_eth_xstat_name *xstats_display;
227 
228 unsigned int xstats_display_num; /**< Size of extended statistics to show */
229 
230 /*
231  * In container, it cannot terminate the process which running with 'stats-period'
232  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
233  */
234 static volatile uint8_t f_quit;
235 uint8_t cl_quit; /* Quit testpmd from cmdline. */
236 
237 /*
238  * Max Rx frame size, set by '--max-pkt-len' parameter.
239  */
240 uint32_t max_rx_pkt_len;
241 
242 /*
243  * Configuration of packet segments used to scatter received packets
244  * if some of split features is configured.
245  */
246 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
247 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
248 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
249 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
250 uint32_t rx_pkt_hdr_protos[MAX_SEGS_BUFFER_SPLIT];
251 
252 uint8_t multi_rx_mempool; /**< Enables multi-rx-mempool feature */
253 
254 /*
255  * Configuration of packet segments used by the "txonly" processing engine.
256  */
257 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
258 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
259 	TXONLY_DEF_PACKET_LEN,
260 };
261 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
262 
263 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
264 /**< Split policy for packets to TX. */
265 
266 uint8_t txonly_multi_flow;
267 /**< Whether multiple flows are generated in TXONLY mode. */
268 
269 uint32_t tx_pkt_times_inter;
270 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
271 
272 uint32_t tx_pkt_times_intra;
273 /**< Timings for send scheduling in TXONLY mode, time between packets. */
274 
275 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
276 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
277 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
278 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
279 
280 /* current configuration is in DCB or not,0 means it is not in DCB mode */
281 uint8_t dcb_config = 0;
282 
283 /*
284  * Configurable number of RX/TX queues.
285  */
286 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
287 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
288 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
289 
290 /*
291  * Configurable number of RX/TX ring descriptors.
292  * Defaults are supplied by drivers via ethdev.
293  */
294 #define RX_DESC_DEFAULT 0
295 #define TX_DESC_DEFAULT 0
296 uint16_t nb_rxd = RX_DESC_DEFAULT; /**< Number of RX descriptors. */
297 uint16_t nb_txd = TX_DESC_DEFAULT; /**< Number of TX descriptors. */
298 
299 #define RTE_PMD_PARAM_UNSET -1
300 /*
301  * Configurable values of RX and TX ring threshold registers.
302  */
303 
304 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
305 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
306 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
307 
308 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
309 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
310 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
311 
312 /*
313  * Configurable value of RX free threshold.
314  */
315 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
316 
317 /*
318  * Configurable value of RX drop enable.
319  */
320 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
321 
322 /*
323  * Configurable value of TX free threshold.
324  */
325 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
326 
327 /*
328  * Configurable value of TX RS bit threshold.
329  */
330 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
331 
332 /*
333  * Configurable value of buffered packets before sending.
334  */
335 uint16_t noisy_tx_sw_bufsz;
336 
337 /*
338  * Configurable value of packet buffer timeout.
339  */
340 uint16_t noisy_tx_sw_buf_flush_time;
341 
342 /*
343  * Configurable value for size of VNF internal memory area
344  * used for simulating noisy neighbour behaviour
345  */
346 uint64_t noisy_lkup_mem_sz;
347 
348 /*
349  * Configurable value of number of random writes done in
350  * VNF simulation memory area.
351  */
352 uint64_t noisy_lkup_num_writes;
353 
354 /*
355  * Configurable value of number of random reads done in
356  * VNF simulation memory area.
357  */
358 uint64_t noisy_lkup_num_reads;
359 
360 /*
361  * Configurable value of number of random reads/writes done in
362  * VNF simulation memory area.
363  */
364 uint64_t noisy_lkup_num_reads_writes;
365 
366 /*
367  * Receive Side Scaling (RSS) configuration.
368  */
369 uint64_t rss_hf = RTE_ETH_RSS_IP; /* RSS IP by default. */
370 
371 /*
372  * Port topology configuration
373  */
374 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
375 
376 /*
377  * Avoids to flush all the RX streams before starts forwarding.
378  */
379 uint8_t no_flush_rx = 0; /* flush by default */
380 
381 /*
382  * Flow API isolated mode.
383  */
384 uint8_t flow_isolate_all;
385 
386 /*
387  * Disable port flow flush when stop port.
388  */
389 uint8_t no_flow_flush = 0; /* do flow flush by default */
390 
391 /*
392  * Avoids to check link status when starting/stopping a port.
393  */
394 uint8_t no_link_check = 0; /* check by default */
395 
396 /*
397  * Don't automatically start all ports in interactive mode.
398  */
399 uint8_t no_device_start = 0;
400 
401 /*
402  * Enable link status change notification
403  */
404 uint8_t lsc_interrupt = 1; /* enabled by default */
405 
406 /*
407  * Enable device removal notification.
408  */
409 uint8_t rmv_interrupt = 1; /* enabled by default */
410 
411 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
412 
413 /* After attach, port setup is called on event or by iterator */
414 bool setup_on_probe_event = true;
415 
416 /* Clear ptypes on port initialization. */
417 uint8_t clear_ptypes = true;
418 
419 /* Hairpin ports configuration mode. */
420 uint32_t hairpin_mode;
421 
422 /* Pretty printing of ethdev events */
423 static const char * const eth_event_desc[] = {
424 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
425 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
426 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
427 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
428 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
429 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
430 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
431 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
432 	[RTE_ETH_EVENT_NEW] = "device probed",
433 	[RTE_ETH_EVENT_DESTROY] = "device released",
434 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
435 	[RTE_ETH_EVENT_RX_AVAIL_THRESH] = "RxQ available descriptors threshold reached",
436 	[RTE_ETH_EVENT_ERR_RECOVERING] = "error recovering",
437 	[RTE_ETH_EVENT_RECOVERY_SUCCESS] = "error recovery successful",
438 	[RTE_ETH_EVENT_RECOVERY_FAILED] = "error recovery failed",
439 	[RTE_ETH_EVENT_MAX] = NULL,
440 };
441 
442 /*
443  * Display or mask ether events
444  * Default to all events except VF_MBOX
445  */
446 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
447 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
448 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
449 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
450 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
451 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
452 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
453 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED) |
454 			    (UINT32_C(1) << RTE_ETH_EVENT_ERR_RECOVERING) |
455 			    (UINT32_C(1) << RTE_ETH_EVENT_RECOVERY_SUCCESS) |
456 			    (UINT32_C(1) << RTE_ETH_EVENT_RECOVERY_FAILED);
457 /*
458  * Decide if all memory are locked for performance.
459  */
460 int do_mlockall = 0;
461 
462 #ifdef RTE_LIB_LATENCYSTATS
463 
464 /*
465  * Set when latency stats is enabled in the commandline
466  */
467 uint8_t latencystats_enabled;
468 
469 /*
470  * Lcore ID to service latency statistics.
471  */
472 lcoreid_t latencystats_lcore_id = -1;
473 
474 #endif
475 
476 /*
477  * Ethernet device configuration.
478  */
479 struct rte_eth_rxmode rx_mode;
480 
481 struct rte_eth_txmode tx_mode = {
482 	.offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
483 };
484 
485 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
486 
487 /*
488  * Display zero values by default for xstats
489  */
490 uint8_t xstats_hide_zero;
491 
492 /*
493  * Measure of CPU cycles disabled by default
494  */
495 uint8_t record_core_cycles;
496 
497 /*
498  * Display of RX and TX bursts disabled by default
499  */
500 uint8_t record_burst_stats;
501 
502 /*
503  * Number of ports per shared Rx queue group, 0 disable.
504  */
505 uint32_t rxq_share;
506 
507 unsigned int num_sockets = 0;
508 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
509 
510 #ifdef RTE_LIB_BITRATESTATS
511 /* Bitrate statistics */
512 struct rte_stats_bitrates *bitrate_data;
513 lcoreid_t bitrate_lcore_id;
514 uint8_t bitrate_enabled;
515 #endif
516 
517 #ifdef RTE_LIB_GRO
518 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
519 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
520 #endif
521 
522 /*
523  * hexadecimal bitmask of RX mq mode can be enabled.
524  */
525 enum rte_eth_rx_mq_mode rx_mq_mode = RTE_ETH_MQ_RX_VMDQ_DCB_RSS;
526 
527 /*
528  * Used to set forced link speed
529  */
530 uint32_t eth_link_speed;
531 
532 /*
533  * ID of the current process in multi-process, used to
534  * configure the queues to be polled.
535  */
536 int proc_id;
537 
538 /*
539  * Number of processes in multi-process, used to
540  * configure the queues to be polled.
541  */
542 unsigned int num_procs = 1;
543 
544 static void
545 eth_rx_metadata_negotiate_mp(uint16_t port_id)
546 {
547 	uint64_t rx_meta_features = 0;
548 	int ret;
549 
550 	if (!is_proc_primary())
551 		return;
552 
553 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
554 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
555 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
556 
557 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
558 	if (ret == 0) {
559 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
560 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
561 				    port_id);
562 		}
563 
564 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
565 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
566 				    port_id);
567 		}
568 
569 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
570 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
571 				    port_id);
572 		}
573 	} else if (ret != -ENOTSUP) {
574 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
575 			 port_id, rte_strerror(-ret));
576 	}
577 }
578 
579 static int
580 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
581 		      const struct rte_eth_conf *dev_conf)
582 {
583 	if (is_proc_primary())
584 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
585 					dev_conf);
586 	return 0;
587 }
588 
589 static int
590 change_bonding_slave_port_status(portid_t bond_pid, bool is_stop)
591 {
592 #ifdef RTE_NET_BOND
593 
594 	portid_t slave_pids[RTE_MAX_ETHPORTS];
595 	struct rte_port *port;
596 	int num_slaves;
597 	portid_t slave_pid;
598 	int i;
599 
600 	num_slaves = rte_eth_bond_slaves_get(bond_pid, slave_pids,
601 						RTE_MAX_ETHPORTS);
602 	if (num_slaves < 0) {
603 		fprintf(stderr, "Failed to get slave list for port = %u\n",
604 			bond_pid);
605 		return num_slaves;
606 	}
607 
608 	for (i = 0; i < num_slaves; i++) {
609 		slave_pid = slave_pids[i];
610 		port = &ports[slave_pid];
611 		port->port_status =
612 			is_stop ? RTE_PORT_STOPPED : RTE_PORT_STARTED;
613 	}
614 #else
615 	RTE_SET_USED(bond_pid);
616 	RTE_SET_USED(is_stop);
617 #endif
618 	return 0;
619 }
620 
621 static int
622 eth_dev_start_mp(uint16_t port_id)
623 {
624 	int ret;
625 
626 	if (is_proc_primary()) {
627 		ret = rte_eth_dev_start(port_id);
628 		if (ret != 0)
629 			return ret;
630 
631 		struct rte_port *port = &ports[port_id];
632 
633 		/*
634 		 * Starting a bonded port also starts all slaves under the bonded
635 		 * device. So if this port is bond device, we need to modify the
636 		 * port status of these slaves.
637 		 */
638 		if (port->bond_flag == 1)
639 			return change_bonding_slave_port_status(port_id, false);
640 	}
641 
642 	return 0;
643 }
644 
645 static int
646 eth_dev_stop_mp(uint16_t port_id)
647 {
648 	int ret;
649 
650 	if (is_proc_primary()) {
651 		ret = rte_eth_dev_stop(port_id);
652 		if (ret != 0)
653 			return ret;
654 
655 		struct rte_port *port = &ports[port_id];
656 
657 		/*
658 		 * Stopping a bonded port also stops all slaves under the bonded
659 		 * device. So if this port is bond device, we need to modify the
660 		 * port status of these slaves.
661 		 */
662 		if (port->bond_flag == 1)
663 			return change_bonding_slave_port_status(port_id, true);
664 	}
665 
666 	return 0;
667 }
668 
669 static void
670 mempool_free_mp(struct rte_mempool *mp)
671 {
672 	if (is_proc_primary())
673 		rte_mempool_free(mp);
674 }
675 
676 static int
677 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
678 {
679 	if (is_proc_primary())
680 		return rte_eth_dev_set_mtu(port_id, mtu);
681 
682 	return 0;
683 }
684 
685 /* Forward function declarations */
686 static void setup_attached_port(portid_t pi);
687 static void check_all_ports_link_status(uint32_t port_mask);
688 static int eth_event_callback(portid_t port_id,
689 			      enum rte_eth_event_type type,
690 			      void *param, void *ret_param);
691 static void dev_event_callback(const char *device_name,
692 				enum rte_dev_event_type type,
693 				void *param);
694 static void fill_xstats_display_info(void);
695 
696 /*
697  * Check if all the ports are started.
698  * If yes, return positive value. If not, return zero.
699  */
700 static int all_ports_started(void);
701 
702 #ifdef RTE_LIB_GSO
703 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
704 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
705 #endif
706 
707 /* Holds the registered mbuf dynamic flags names. */
708 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
709 
710 
711 /*
712  * Helper function to check if socket is already discovered.
713  * If yes, return positive value. If not, return zero.
714  */
715 int
716 new_socket_id(unsigned int socket_id)
717 {
718 	unsigned int i;
719 
720 	for (i = 0; i < num_sockets; i++) {
721 		if (socket_ids[i] == socket_id)
722 			return 0;
723 	}
724 	return 1;
725 }
726 
727 /*
728  * Setup default configuration.
729  */
730 static void
731 set_default_fwd_lcores_config(void)
732 {
733 	unsigned int i;
734 	unsigned int nb_lc;
735 	unsigned int sock_num;
736 
737 	nb_lc = 0;
738 	for (i = 0; i < RTE_MAX_LCORE; i++) {
739 		if (!rte_lcore_is_enabled(i))
740 			continue;
741 		sock_num = rte_lcore_to_socket_id(i);
742 		if (new_socket_id(sock_num)) {
743 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
744 				rte_exit(EXIT_FAILURE,
745 					 "Total sockets greater than %u\n",
746 					 RTE_MAX_NUMA_NODES);
747 			}
748 			socket_ids[num_sockets++] = sock_num;
749 		}
750 		if (i == rte_get_main_lcore())
751 			continue;
752 		fwd_lcores_cpuids[nb_lc++] = i;
753 	}
754 	nb_lcores = (lcoreid_t) nb_lc;
755 	nb_cfg_lcores = nb_lcores;
756 	nb_fwd_lcores = 1;
757 }
758 
759 static void
760 set_def_peer_eth_addrs(void)
761 {
762 	portid_t i;
763 
764 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
765 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
766 		peer_eth_addrs[i].addr_bytes[5] = i;
767 	}
768 }
769 
770 static void
771 set_default_fwd_ports_config(void)
772 {
773 	portid_t pt_id;
774 	int i = 0;
775 
776 	RTE_ETH_FOREACH_DEV(pt_id) {
777 		fwd_ports_ids[i++] = pt_id;
778 
779 		/* Update sockets info according to the attached device */
780 		int socket_id = rte_eth_dev_socket_id(pt_id);
781 		if (socket_id >= 0 && new_socket_id(socket_id)) {
782 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
783 				rte_exit(EXIT_FAILURE,
784 					 "Total sockets greater than %u\n",
785 					 RTE_MAX_NUMA_NODES);
786 			}
787 			socket_ids[num_sockets++] = socket_id;
788 		}
789 	}
790 
791 	nb_cfg_ports = nb_ports;
792 	nb_fwd_ports = nb_ports;
793 }
794 
795 void
796 set_def_fwd_config(void)
797 {
798 	set_default_fwd_lcores_config();
799 	set_def_peer_eth_addrs();
800 	set_default_fwd_ports_config();
801 }
802 
803 #ifndef RTE_EXEC_ENV_WINDOWS
804 /* extremely pessimistic estimation of memory required to create a mempool */
805 static int
806 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
807 {
808 	unsigned int n_pages, mbuf_per_pg, leftover;
809 	uint64_t total_mem, mbuf_mem, obj_sz;
810 
811 	/* there is no good way to predict how much space the mempool will
812 	 * occupy because it will allocate chunks on the fly, and some of those
813 	 * will come from default DPDK memory while some will come from our
814 	 * external memory, so just assume 128MB will be enough for everyone.
815 	 */
816 	uint64_t hdr_mem = 128 << 20;
817 
818 	/* account for possible non-contiguousness */
819 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
820 	if (obj_sz > pgsz) {
821 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
822 		return -1;
823 	}
824 
825 	mbuf_per_pg = pgsz / obj_sz;
826 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
827 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
828 
829 	mbuf_mem = n_pages * pgsz;
830 
831 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
832 
833 	if (total_mem > SIZE_MAX) {
834 		TESTPMD_LOG(ERR, "Memory size too big\n");
835 		return -1;
836 	}
837 	*out = (size_t)total_mem;
838 
839 	return 0;
840 }
841 
842 static int
843 pagesz_flags(uint64_t page_sz)
844 {
845 	/* as per mmap() manpage, all page sizes are log2 of page size
846 	 * shifted by MAP_HUGE_SHIFT
847 	 */
848 	int log2 = rte_log2_u64(page_sz);
849 
850 	return (log2 << HUGE_SHIFT);
851 }
852 
853 static void *
854 alloc_mem(size_t memsz, size_t pgsz, bool huge)
855 {
856 	void *addr;
857 	int flags;
858 
859 	/* allocate anonymous hugepages */
860 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
861 	if (huge)
862 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
863 
864 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
865 	if (addr == MAP_FAILED)
866 		return NULL;
867 
868 	return addr;
869 }
870 
871 struct extmem_param {
872 	void *addr;
873 	size_t len;
874 	size_t pgsz;
875 	rte_iova_t *iova_table;
876 	unsigned int iova_table_len;
877 };
878 
879 static int
880 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
881 		bool huge)
882 {
883 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
884 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
885 	unsigned int cur_page, n_pages, pgsz_idx;
886 	size_t mem_sz, cur_pgsz;
887 	rte_iova_t *iovas = NULL;
888 	void *addr;
889 	int ret;
890 
891 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
892 		/* skip anything that is too big */
893 		if (pgsizes[pgsz_idx] > SIZE_MAX)
894 			continue;
895 
896 		cur_pgsz = pgsizes[pgsz_idx];
897 
898 		/* if we were told not to allocate hugepages, override */
899 		if (!huge)
900 			cur_pgsz = sysconf(_SC_PAGESIZE);
901 
902 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
903 		if (ret < 0) {
904 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
905 			return -1;
906 		}
907 
908 		/* allocate our memory */
909 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
910 
911 		/* if we couldn't allocate memory with a specified page size,
912 		 * that doesn't mean we can't do it with other page sizes, so
913 		 * try another one.
914 		 */
915 		if (addr == NULL)
916 			continue;
917 
918 		/* store IOVA addresses for every page in this memory area */
919 		n_pages = mem_sz / cur_pgsz;
920 
921 		iovas = malloc(sizeof(*iovas) * n_pages);
922 
923 		if (iovas == NULL) {
924 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
925 			goto fail;
926 		}
927 		/* lock memory if it's not huge pages */
928 		if (!huge)
929 			mlock(addr, mem_sz);
930 
931 		/* populate IOVA addresses */
932 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
933 			rte_iova_t iova;
934 			size_t offset;
935 			void *cur;
936 
937 			offset = cur_pgsz * cur_page;
938 			cur = RTE_PTR_ADD(addr, offset);
939 
940 			/* touch the page before getting its IOVA */
941 			*(volatile char *)cur = 0;
942 
943 			iova = rte_mem_virt2iova(cur);
944 
945 			iovas[cur_page] = iova;
946 		}
947 
948 		break;
949 	}
950 	/* if we couldn't allocate anything */
951 	if (iovas == NULL)
952 		return -1;
953 
954 	param->addr = addr;
955 	param->len = mem_sz;
956 	param->pgsz = cur_pgsz;
957 	param->iova_table = iovas;
958 	param->iova_table_len = n_pages;
959 
960 	return 0;
961 fail:
962 	free(iovas);
963 	if (addr)
964 		munmap(addr, mem_sz);
965 
966 	return -1;
967 }
968 
969 static int
970 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
971 {
972 	struct extmem_param param;
973 	int socket_id, ret;
974 
975 	memset(&param, 0, sizeof(param));
976 
977 	/* check if our heap exists */
978 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
979 	if (socket_id < 0) {
980 		/* create our heap */
981 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
982 		if (ret < 0) {
983 			TESTPMD_LOG(ERR, "Cannot create heap\n");
984 			return -1;
985 		}
986 	}
987 
988 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
989 	if (ret < 0) {
990 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
991 		return -1;
992 	}
993 
994 	/* we now have a valid memory area, so add it to heap */
995 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
996 			param.addr, param.len, param.iova_table,
997 			param.iova_table_len, param.pgsz);
998 
999 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
1000 
1001 	/* not needed any more */
1002 	free(param.iova_table);
1003 
1004 	if (ret < 0) {
1005 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
1006 		munmap(param.addr, param.len);
1007 		return -1;
1008 	}
1009 
1010 	/* success */
1011 
1012 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
1013 			param.len >> 20);
1014 
1015 	return 0;
1016 }
1017 static void
1018 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1019 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1020 {
1021 	uint16_t pid = 0;
1022 	int ret;
1023 
1024 	RTE_ETH_FOREACH_DEV(pid) {
1025 		struct rte_eth_dev_info dev_info;
1026 
1027 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1028 		if (ret != 0) {
1029 			TESTPMD_LOG(DEBUG,
1030 				    "unable to get device info for port %d on addr 0x%p,"
1031 				    "mempool unmapping will not be performed\n",
1032 				    pid, memhdr->addr);
1033 			continue;
1034 		}
1035 
1036 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
1037 		if (ret) {
1038 			TESTPMD_LOG(DEBUG,
1039 				    "unable to DMA unmap addr 0x%p "
1040 				    "for device %s\n",
1041 				    memhdr->addr, rte_dev_name(dev_info.device));
1042 		}
1043 	}
1044 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
1045 	if (ret) {
1046 		TESTPMD_LOG(DEBUG,
1047 			    "unable to un-register addr 0x%p\n", memhdr->addr);
1048 	}
1049 }
1050 
1051 static void
1052 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1053 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1054 {
1055 	uint16_t pid = 0;
1056 	size_t page_size = sysconf(_SC_PAGESIZE);
1057 	int ret;
1058 
1059 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1060 				  page_size);
1061 	if (ret) {
1062 		TESTPMD_LOG(DEBUG,
1063 			    "unable to register addr 0x%p\n", memhdr->addr);
1064 		return;
1065 	}
1066 	RTE_ETH_FOREACH_DEV(pid) {
1067 		struct rte_eth_dev_info dev_info;
1068 
1069 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1070 		if (ret != 0) {
1071 			TESTPMD_LOG(DEBUG,
1072 				    "unable to get device info for port %d on addr 0x%p,"
1073 				    "mempool mapping will not be performed\n",
1074 				    pid, memhdr->addr);
1075 			continue;
1076 		}
1077 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1078 		if (ret) {
1079 			TESTPMD_LOG(DEBUG,
1080 				    "unable to DMA map addr 0x%p "
1081 				    "for device %s\n",
1082 				    memhdr->addr, rte_dev_name(dev_info.device));
1083 		}
1084 	}
1085 }
1086 #endif
1087 
1088 static unsigned int
1089 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1090 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1091 {
1092 	struct rte_pktmbuf_extmem *xmem;
1093 	unsigned int ext_num, zone_num, elt_num;
1094 	uint16_t elt_size;
1095 
1096 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1097 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1098 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1099 
1100 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1101 	if (xmem == NULL) {
1102 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1103 				 "external buffer descriptors\n");
1104 		*ext_mem = NULL;
1105 		return 0;
1106 	}
1107 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1108 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1109 		const struct rte_memzone *mz;
1110 		char mz_name[RTE_MEMZONE_NAMESIZE];
1111 		int ret;
1112 
1113 		ret = snprintf(mz_name, sizeof(mz_name),
1114 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1115 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1116 			errno = ENAMETOOLONG;
1117 			ext_num = 0;
1118 			break;
1119 		}
1120 		mz = rte_memzone_reserve(mz_name, EXTBUF_ZONE_SIZE,
1121 					 socket_id,
1122 					 RTE_MEMZONE_IOVA_CONTIG |
1123 					 RTE_MEMZONE_1GB |
1124 					 RTE_MEMZONE_SIZE_HINT_ONLY);
1125 		if (mz == NULL) {
1126 			/*
1127 			 * The caller exits on external buffer creation
1128 			 * error, so there is no need to free memzones.
1129 			 */
1130 			errno = ENOMEM;
1131 			ext_num = 0;
1132 			break;
1133 		}
1134 		xseg->buf_ptr = mz->addr;
1135 		xseg->buf_iova = mz->iova;
1136 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1137 		xseg->elt_size = elt_size;
1138 	}
1139 	if (ext_num == 0 && xmem != NULL) {
1140 		free(xmem);
1141 		xmem = NULL;
1142 	}
1143 	*ext_mem = xmem;
1144 	return ext_num;
1145 }
1146 
1147 /*
1148  * Configuration initialisation done once at init time.
1149  */
1150 static struct rte_mempool *
1151 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1152 		 unsigned int socket_id, uint16_t size_idx)
1153 {
1154 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1155 	struct rte_mempool *rte_mp = NULL;
1156 #ifndef RTE_EXEC_ENV_WINDOWS
1157 	uint32_t mb_size;
1158 
1159 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1160 #endif
1161 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1162 	if (!is_proc_primary()) {
1163 		rte_mp = rte_mempool_lookup(pool_name);
1164 		if (rte_mp == NULL)
1165 			rte_exit(EXIT_FAILURE,
1166 				"Get mbuf pool for socket %u failed: %s\n",
1167 				socket_id, rte_strerror(rte_errno));
1168 		return rte_mp;
1169 	}
1170 
1171 	TESTPMD_LOG(INFO,
1172 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1173 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1174 
1175 	switch (mp_alloc_type) {
1176 	case MP_ALLOC_NATIVE:
1177 		{
1178 			/* wrapper to rte_mempool_create() */
1179 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1180 					rte_mbuf_best_mempool_ops());
1181 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1182 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1183 			break;
1184 		}
1185 #ifndef RTE_EXEC_ENV_WINDOWS
1186 	case MP_ALLOC_ANON:
1187 		{
1188 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1189 				mb_size, (unsigned int) mb_mempool_cache,
1190 				sizeof(struct rte_pktmbuf_pool_private),
1191 				socket_id, mempool_flags);
1192 			if (rte_mp == NULL)
1193 				goto err;
1194 
1195 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1196 				rte_mempool_free(rte_mp);
1197 				rte_mp = NULL;
1198 				goto err;
1199 			}
1200 			rte_pktmbuf_pool_init(rte_mp, NULL);
1201 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1202 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1203 			break;
1204 		}
1205 	case MP_ALLOC_XMEM:
1206 	case MP_ALLOC_XMEM_HUGE:
1207 		{
1208 			int heap_socket;
1209 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1210 
1211 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1212 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1213 
1214 			heap_socket =
1215 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1216 			if (heap_socket < 0)
1217 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1218 
1219 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1220 					rte_mbuf_best_mempool_ops());
1221 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1222 					mb_mempool_cache, 0, mbuf_seg_size,
1223 					heap_socket);
1224 			break;
1225 		}
1226 #endif
1227 	case MP_ALLOC_XBUF:
1228 		{
1229 			struct rte_pktmbuf_extmem *ext_mem;
1230 			unsigned int ext_num;
1231 
1232 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1233 					       socket_id, pool_name, &ext_mem);
1234 			if (ext_num == 0)
1235 				rte_exit(EXIT_FAILURE,
1236 					 "Can't create pinned data buffers\n");
1237 
1238 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1239 					rte_mbuf_best_mempool_ops());
1240 			rte_mp = rte_pktmbuf_pool_create_extbuf
1241 					(pool_name, nb_mbuf, mb_mempool_cache,
1242 					 0, mbuf_seg_size, socket_id,
1243 					 ext_mem, ext_num);
1244 			free(ext_mem);
1245 			break;
1246 		}
1247 	default:
1248 		{
1249 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1250 		}
1251 	}
1252 
1253 #ifndef RTE_EXEC_ENV_WINDOWS
1254 err:
1255 #endif
1256 	if (rte_mp == NULL) {
1257 		rte_exit(EXIT_FAILURE,
1258 			"Creation of mbuf pool for socket %u failed: %s\n",
1259 			socket_id, rte_strerror(rte_errno));
1260 	} else if (verbose_level > 0) {
1261 		rte_mempool_dump(stdout, rte_mp);
1262 	}
1263 	return rte_mp;
1264 }
1265 
1266 /*
1267  * Check given socket id is valid or not with NUMA mode,
1268  * if valid, return 0, else return -1
1269  */
1270 static int
1271 check_socket_id(const unsigned int socket_id)
1272 {
1273 	static int warning_once = 0;
1274 
1275 	if (new_socket_id(socket_id)) {
1276 		if (!warning_once && numa_support)
1277 			fprintf(stderr,
1278 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1279 		warning_once = 1;
1280 		return -1;
1281 	}
1282 	return 0;
1283 }
1284 
1285 /*
1286  * Get the allowed maximum number of RX queues.
1287  * *pid return the port id which has minimal value of
1288  * max_rx_queues in all ports.
1289  */
1290 queueid_t
1291 get_allowed_max_nb_rxq(portid_t *pid)
1292 {
1293 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1294 	bool max_rxq_valid = false;
1295 	portid_t pi;
1296 	struct rte_eth_dev_info dev_info;
1297 
1298 	RTE_ETH_FOREACH_DEV(pi) {
1299 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1300 			continue;
1301 
1302 		max_rxq_valid = true;
1303 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1304 			allowed_max_rxq = dev_info.max_rx_queues;
1305 			*pid = pi;
1306 		}
1307 	}
1308 	return max_rxq_valid ? allowed_max_rxq : 0;
1309 }
1310 
1311 /*
1312  * Check input rxq is valid or not.
1313  * If input rxq is not greater than any of maximum number
1314  * of RX queues of all ports, it is valid.
1315  * if valid, return 0, else return -1
1316  */
1317 int
1318 check_nb_rxq(queueid_t rxq)
1319 {
1320 	queueid_t allowed_max_rxq;
1321 	portid_t pid = 0;
1322 
1323 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1324 	if (rxq > allowed_max_rxq) {
1325 		fprintf(stderr,
1326 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1327 			rxq, allowed_max_rxq, pid);
1328 		return -1;
1329 	}
1330 	return 0;
1331 }
1332 
1333 /*
1334  * Get the allowed maximum number of TX queues.
1335  * *pid return the port id which has minimal value of
1336  * max_tx_queues in all ports.
1337  */
1338 queueid_t
1339 get_allowed_max_nb_txq(portid_t *pid)
1340 {
1341 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1342 	bool max_txq_valid = false;
1343 	portid_t pi;
1344 	struct rte_eth_dev_info dev_info;
1345 
1346 	RTE_ETH_FOREACH_DEV(pi) {
1347 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1348 			continue;
1349 
1350 		max_txq_valid = true;
1351 		if (dev_info.max_tx_queues < allowed_max_txq) {
1352 			allowed_max_txq = dev_info.max_tx_queues;
1353 			*pid = pi;
1354 		}
1355 	}
1356 	return max_txq_valid ? allowed_max_txq : 0;
1357 }
1358 
1359 /*
1360  * Check input txq is valid or not.
1361  * If input txq is not greater than any of maximum number
1362  * of TX queues of all ports, it is valid.
1363  * if valid, return 0, else return -1
1364  */
1365 int
1366 check_nb_txq(queueid_t txq)
1367 {
1368 	queueid_t allowed_max_txq;
1369 	portid_t pid = 0;
1370 
1371 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1372 	if (txq > allowed_max_txq) {
1373 		fprintf(stderr,
1374 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1375 			txq, allowed_max_txq, pid);
1376 		return -1;
1377 	}
1378 	return 0;
1379 }
1380 
1381 /*
1382  * Get the allowed maximum number of RXDs of every rx queue.
1383  * *pid return the port id which has minimal value of
1384  * max_rxd in all queues of all ports.
1385  */
1386 static uint16_t
1387 get_allowed_max_nb_rxd(portid_t *pid)
1388 {
1389 	uint16_t allowed_max_rxd = UINT16_MAX;
1390 	portid_t pi;
1391 	struct rte_eth_dev_info dev_info;
1392 
1393 	RTE_ETH_FOREACH_DEV(pi) {
1394 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1395 			continue;
1396 
1397 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1398 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1399 			*pid = pi;
1400 		}
1401 	}
1402 	return allowed_max_rxd;
1403 }
1404 
1405 /*
1406  * Get the allowed minimal number of RXDs of every rx queue.
1407  * *pid return the port id which has minimal value of
1408  * min_rxd in all queues of all ports.
1409  */
1410 static uint16_t
1411 get_allowed_min_nb_rxd(portid_t *pid)
1412 {
1413 	uint16_t allowed_min_rxd = 0;
1414 	portid_t pi;
1415 	struct rte_eth_dev_info dev_info;
1416 
1417 	RTE_ETH_FOREACH_DEV(pi) {
1418 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1419 			continue;
1420 
1421 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1422 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1423 			*pid = pi;
1424 		}
1425 	}
1426 
1427 	return allowed_min_rxd;
1428 }
1429 
1430 /*
1431  * Check input rxd is valid or not.
1432  * If input rxd is not greater than any of maximum number
1433  * of RXDs of every Rx queues and is not less than any of
1434  * minimal number of RXDs of every Rx queues, it is valid.
1435  * if valid, return 0, else return -1
1436  */
1437 int
1438 check_nb_rxd(queueid_t rxd)
1439 {
1440 	uint16_t allowed_max_rxd;
1441 	uint16_t allowed_min_rxd;
1442 	portid_t pid = 0;
1443 
1444 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1445 	if (rxd > allowed_max_rxd) {
1446 		fprintf(stderr,
1447 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1448 			rxd, allowed_max_rxd, pid);
1449 		return -1;
1450 	}
1451 
1452 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1453 	if (rxd < allowed_min_rxd) {
1454 		fprintf(stderr,
1455 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1456 			rxd, allowed_min_rxd, pid);
1457 		return -1;
1458 	}
1459 
1460 	return 0;
1461 }
1462 
1463 /*
1464  * Get the allowed maximum number of TXDs of every rx queues.
1465  * *pid return the port id which has minimal value of
1466  * max_txd in every tx queue.
1467  */
1468 static uint16_t
1469 get_allowed_max_nb_txd(portid_t *pid)
1470 {
1471 	uint16_t allowed_max_txd = UINT16_MAX;
1472 	portid_t pi;
1473 	struct rte_eth_dev_info dev_info;
1474 
1475 	RTE_ETH_FOREACH_DEV(pi) {
1476 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1477 			continue;
1478 
1479 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1480 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1481 			*pid = pi;
1482 		}
1483 	}
1484 	return allowed_max_txd;
1485 }
1486 
1487 /*
1488  * Get the allowed maximum number of TXDs of every tx queues.
1489  * *pid return the port id which has minimal value of
1490  * min_txd in every tx queue.
1491  */
1492 static uint16_t
1493 get_allowed_min_nb_txd(portid_t *pid)
1494 {
1495 	uint16_t allowed_min_txd = 0;
1496 	portid_t pi;
1497 	struct rte_eth_dev_info dev_info;
1498 
1499 	RTE_ETH_FOREACH_DEV(pi) {
1500 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1501 			continue;
1502 
1503 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1504 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1505 			*pid = pi;
1506 		}
1507 	}
1508 
1509 	return allowed_min_txd;
1510 }
1511 
1512 /*
1513  * Check input txd is valid or not.
1514  * If input txd is not greater than any of maximum number
1515  * of TXDs of every Rx queues, it is valid.
1516  * if valid, return 0, else return -1
1517  */
1518 int
1519 check_nb_txd(queueid_t txd)
1520 {
1521 	uint16_t allowed_max_txd;
1522 	uint16_t allowed_min_txd;
1523 	portid_t pid = 0;
1524 
1525 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1526 	if (txd > allowed_max_txd) {
1527 		fprintf(stderr,
1528 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1529 			txd, allowed_max_txd, pid);
1530 		return -1;
1531 	}
1532 
1533 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1534 	if (txd < allowed_min_txd) {
1535 		fprintf(stderr,
1536 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1537 			txd, allowed_min_txd, pid);
1538 		return -1;
1539 	}
1540 	return 0;
1541 }
1542 
1543 
1544 /*
1545  * Get the allowed maximum number of hairpin queues.
1546  * *pid return the port id which has minimal value of
1547  * max_hairpin_queues in all ports.
1548  */
1549 queueid_t
1550 get_allowed_max_nb_hairpinq(portid_t *pid)
1551 {
1552 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1553 	portid_t pi;
1554 	struct rte_eth_hairpin_cap cap;
1555 
1556 	RTE_ETH_FOREACH_DEV(pi) {
1557 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1558 			*pid = pi;
1559 			return 0;
1560 		}
1561 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1562 			allowed_max_hairpinq = cap.max_nb_queues;
1563 			*pid = pi;
1564 		}
1565 	}
1566 	return allowed_max_hairpinq;
1567 }
1568 
1569 /*
1570  * Check input hairpin is valid or not.
1571  * If input hairpin is not greater than any of maximum number
1572  * of hairpin queues of all ports, it is valid.
1573  * if valid, return 0, else return -1
1574  */
1575 int
1576 check_nb_hairpinq(queueid_t hairpinq)
1577 {
1578 	queueid_t allowed_max_hairpinq;
1579 	portid_t pid = 0;
1580 
1581 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1582 	if (hairpinq > allowed_max_hairpinq) {
1583 		fprintf(stderr,
1584 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1585 			hairpinq, allowed_max_hairpinq, pid);
1586 		return -1;
1587 	}
1588 	return 0;
1589 }
1590 
1591 static int
1592 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1593 {
1594 	uint32_t eth_overhead;
1595 
1596 	if (dev_info->max_mtu != UINT16_MAX &&
1597 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1598 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1599 	else
1600 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1601 
1602 	return eth_overhead;
1603 }
1604 
1605 static void
1606 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1607 {
1608 	struct rte_port *port = &ports[pid];
1609 	int ret;
1610 	int i;
1611 
1612 	eth_rx_metadata_negotiate_mp(pid);
1613 
1614 	port->dev_conf.txmode = tx_mode;
1615 	port->dev_conf.rxmode = rx_mode;
1616 
1617 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1618 	if (ret != 0)
1619 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1620 
1621 	if (!(port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE))
1622 		port->dev_conf.txmode.offloads &=
1623 			~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1624 
1625 	/* Apply Rx offloads configuration */
1626 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1627 		port->rxq[i].conf.offloads = port->dev_conf.rxmode.offloads;
1628 	/* Apply Tx offloads configuration */
1629 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1630 		port->txq[i].conf.offloads = port->dev_conf.txmode.offloads;
1631 
1632 	if (eth_link_speed)
1633 		port->dev_conf.link_speeds = eth_link_speed;
1634 
1635 	if (max_rx_pkt_len)
1636 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1637 			get_eth_overhead(&port->dev_info);
1638 
1639 	/* set flag to initialize port/queue */
1640 	port->need_reconfig = 1;
1641 	port->need_reconfig_queues = 1;
1642 	port->socket_id = socket_id;
1643 	port->tx_metadata = 0;
1644 
1645 	/*
1646 	 * Check for maximum number of segments per MTU.
1647 	 * Accordingly update the mbuf data size.
1648 	 */
1649 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1650 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1651 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1652 		uint16_t mtu;
1653 
1654 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1655 			uint16_t data_size = (mtu + eth_overhead) /
1656 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1657 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1658 
1659 			if (buffer_size > mbuf_data_size[0]) {
1660 				mbuf_data_size[0] = buffer_size;
1661 				TESTPMD_LOG(WARNING,
1662 					"Configured mbuf size of the first segment %hu\n",
1663 					mbuf_data_size[0]);
1664 			}
1665 		}
1666 	}
1667 }
1668 
1669 static void
1670 init_config(void)
1671 {
1672 	portid_t pid;
1673 	struct rte_mempool *mbp;
1674 	unsigned int nb_mbuf_per_pool;
1675 	lcoreid_t  lc_id;
1676 #ifdef RTE_LIB_GRO
1677 	struct rte_gro_param gro_param;
1678 #endif
1679 #ifdef RTE_LIB_GSO
1680 	uint32_t gso_types;
1681 #endif
1682 
1683 	/* Configuration of logical cores. */
1684 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1685 				sizeof(struct fwd_lcore *) * nb_lcores,
1686 				RTE_CACHE_LINE_SIZE);
1687 	if (fwd_lcores == NULL) {
1688 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1689 							"failed\n", nb_lcores);
1690 	}
1691 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1692 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1693 					       sizeof(struct fwd_lcore),
1694 					       RTE_CACHE_LINE_SIZE);
1695 		if (fwd_lcores[lc_id] == NULL) {
1696 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1697 								"failed\n");
1698 		}
1699 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1700 	}
1701 
1702 	RTE_ETH_FOREACH_DEV(pid) {
1703 		uint32_t socket_id;
1704 
1705 		if (numa_support) {
1706 			socket_id = port_numa[pid];
1707 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1708 				socket_id = rte_eth_dev_socket_id(pid);
1709 
1710 				/*
1711 				 * if socket_id is invalid,
1712 				 * set to the first available socket.
1713 				 */
1714 				if (check_socket_id(socket_id) < 0)
1715 					socket_id = socket_ids[0];
1716 			}
1717 		} else {
1718 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1719 				    0 : socket_num;
1720 		}
1721 		/* Apply default TxRx configuration for all ports */
1722 		init_config_port_offloads(pid, socket_id);
1723 	}
1724 	/*
1725 	 * Create pools of mbuf.
1726 	 * If NUMA support is disabled, create a single pool of mbuf in
1727 	 * socket 0 memory by default.
1728 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1729 	 *
1730 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1731 	 * nb_txd can be configured at run time.
1732 	 */
1733 	if (param_total_num_mbufs)
1734 		nb_mbuf_per_pool = param_total_num_mbufs;
1735 	else {
1736 		nb_mbuf_per_pool = RX_DESC_MAX +
1737 			(nb_lcores * mb_mempool_cache) +
1738 			TX_DESC_MAX + MAX_PKT_BURST;
1739 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1740 	}
1741 
1742 	if (numa_support) {
1743 		uint8_t i, j;
1744 
1745 		for (i = 0; i < num_sockets; i++)
1746 			for (j = 0; j < mbuf_data_size_n; j++)
1747 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1748 					mbuf_pool_create(mbuf_data_size[j],
1749 							  nb_mbuf_per_pool,
1750 							  socket_ids[i], j);
1751 	} else {
1752 		uint8_t i;
1753 
1754 		for (i = 0; i < mbuf_data_size_n; i++)
1755 			mempools[i] = mbuf_pool_create
1756 					(mbuf_data_size[i],
1757 					 nb_mbuf_per_pool,
1758 					 socket_num == UMA_NO_CONFIG ?
1759 					 0 : socket_num, i);
1760 	}
1761 
1762 	init_port_config();
1763 
1764 #ifdef RTE_LIB_GSO
1765 	gso_types = RTE_ETH_TX_OFFLOAD_TCP_TSO | RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
1766 		RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | RTE_ETH_TX_OFFLOAD_UDP_TSO;
1767 #endif
1768 	/*
1769 	 * Records which Mbuf pool to use by each logical core, if needed.
1770 	 */
1771 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1772 		mbp = mbuf_pool_find(
1773 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1774 
1775 		if (mbp == NULL)
1776 			mbp = mbuf_pool_find(0, 0);
1777 		fwd_lcores[lc_id]->mbp = mbp;
1778 #ifdef RTE_LIB_GSO
1779 		/* initialize GSO context */
1780 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1781 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1782 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1783 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1784 			RTE_ETHER_CRC_LEN;
1785 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1786 #endif
1787 	}
1788 
1789 	fwd_config_setup();
1790 
1791 #ifdef RTE_LIB_GRO
1792 	/* create a gro context for each lcore */
1793 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1794 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1795 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1796 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1797 		gro_param.socket_id = rte_lcore_to_socket_id(
1798 				fwd_lcores_cpuids[lc_id]);
1799 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1800 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1801 			rte_exit(EXIT_FAILURE,
1802 					"rte_gro_ctx_create() failed\n");
1803 		}
1804 	}
1805 #endif
1806 }
1807 
1808 
1809 void
1810 reconfig(portid_t new_port_id, unsigned socket_id)
1811 {
1812 	/* Reconfiguration of Ethernet ports. */
1813 	init_config_port_offloads(new_port_id, socket_id);
1814 	init_port_config();
1815 }
1816 
1817 int
1818 init_fwd_streams(void)
1819 {
1820 	portid_t pid;
1821 	struct rte_port *port;
1822 	streamid_t sm_id, nb_fwd_streams_new;
1823 	queueid_t q;
1824 
1825 	/* set socket id according to numa or not */
1826 	RTE_ETH_FOREACH_DEV(pid) {
1827 		port = &ports[pid];
1828 		if (nb_rxq > port->dev_info.max_rx_queues) {
1829 			fprintf(stderr,
1830 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1831 				nb_rxq, port->dev_info.max_rx_queues);
1832 			return -1;
1833 		}
1834 		if (nb_txq > port->dev_info.max_tx_queues) {
1835 			fprintf(stderr,
1836 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1837 				nb_txq, port->dev_info.max_tx_queues);
1838 			return -1;
1839 		}
1840 		if (numa_support) {
1841 			if (port_numa[pid] != NUMA_NO_CONFIG)
1842 				port->socket_id = port_numa[pid];
1843 			else {
1844 				port->socket_id = rte_eth_dev_socket_id(pid);
1845 
1846 				/*
1847 				 * if socket_id is invalid,
1848 				 * set to the first available socket.
1849 				 */
1850 				if (check_socket_id(port->socket_id) < 0)
1851 					port->socket_id = socket_ids[0];
1852 			}
1853 		}
1854 		else {
1855 			if (socket_num == UMA_NO_CONFIG)
1856 				port->socket_id = 0;
1857 			else
1858 				port->socket_id = socket_num;
1859 		}
1860 	}
1861 
1862 	q = RTE_MAX(nb_rxq, nb_txq);
1863 	if (q == 0) {
1864 		fprintf(stderr,
1865 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1866 		return -1;
1867 	}
1868 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1869 	if (nb_fwd_streams_new == nb_fwd_streams)
1870 		return 0;
1871 	/* clear the old */
1872 	if (fwd_streams != NULL) {
1873 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1874 			if (fwd_streams[sm_id] == NULL)
1875 				continue;
1876 			rte_free(fwd_streams[sm_id]);
1877 			fwd_streams[sm_id] = NULL;
1878 		}
1879 		rte_free(fwd_streams);
1880 		fwd_streams = NULL;
1881 	}
1882 
1883 	/* init new */
1884 	nb_fwd_streams = nb_fwd_streams_new;
1885 	if (nb_fwd_streams) {
1886 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1887 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1888 			RTE_CACHE_LINE_SIZE);
1889 		if (fwd_streams == NULL)
1890 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1891 				 " (struct fwd_stream *)) failed\n",
1892 				 nb_fwd_streams);
1893 
1894 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1895 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1896 				" struct fwd_stream", sizeof(struct fwd_stream),
1897 				RTE_CACHE_LINE_SIZE);
1898 			if (fwd_streams[sm_id] == NULL)
1899 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1900 					 "(struct fwd_stream) failed\n");
1901 		}
1902 	}
1903 
1904 	return 0;
1905 }
1906 
1907 static void
1908 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1909 {
1910 	uint64_t total_burst, sburst;
1911 	uint64_t nb_burst;
1912 	uint64_t burst_stats[4];
1913 	uint16_t pktnb_stats[4];
1914 	uint16_t nb_pkt;
1915 	int burst_percent[4], sburstp;
1916 	int i;
1917 
1918 	/*
1919 	 * First compute the total number of packet bursts and the
1920 	 * two highest numbers of bursts of the same number of packets.
1921 	 */
1922 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1923 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1924 
1925 	/* Show stats for 0 burst size always */
1926 	total_burst = pbs->pkt_burst_spread[0];
1927 	burst_stats[0] = pbs->pkt_burst_spread[0];
1928 	pktnb_stats[0] = 0;
1929 
1930 	/* Find the next 2 burst sizes with highest occurrences. */
1931 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST + 1; nb_pkt++) {
1932 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1933 
1934 		if (nb_burst == 0)
1935 			continue;
1936 
1937 		total_burst += nb_burst;
1938 
1939 		if (nb_burst > burst_stats[1]) {
1940 			burst_stats[2] = burst_stats[1];
1941 			pktnb_stats[2] = pktnb_stats[1];
1942 			burst_stats[1] = nb_burst;
1943 			pktnb_stats[1] = nb_pkt;
1944 		} else if (nb_burst > burst_stats[2]) {
1945 			burst_stats[2] = nb_burst;
1946 			pktnb_stats[2] = nb_pkt;
1947 		}
1948 	}
1949 	if (total_burst == 0)
1950 		return;
1951 
1952 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1953 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1954 		if (i == 3) {
1955 			printf("%d%% of other]\n", 100 - sburstp);
1956 			return;
1957 		}
1958 
1959 		sburst += burst_stats[i];
1960 		if (sburst == total_burst) {
1961 			printf("%d%% of %d pkts]\n",
1962 				100 - sburstp, (int) pktnb_stats[i]);
1963 			return;
1964 		}
1965 
1966 		burst_percent[i] =
1967 			(double)burst_stats[i] / total_burst * 100;
1968 		printf("%d%% of %d pkts + ",
1969 			burst_percent[i], (int) pktnb_stats[i]);
1970 		sburstp += burst_percent[i];
1971 	}
1972 }
1973 
1974 static void
1975 fwd_stream_stats_display(streamid_t stream_id)
1976 {
1977 	struct fwd_stream *fs;
1978 	static const char *fwd_top_stats_border = "-------";
1979 
1980 	fs = fwd_streams[stream_id];
1981 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1982 	    (fs->fwd_dropped == 0))
1983 		return;
1984 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1985 	       "TX Port=%2d/Queue=%2d %s\n",
1986 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1987 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1988 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1989 	       " TX-dropped: %-14"PRIu64,
1990 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1991 
1992 	/* if checksum mode */
1993 	if (cur_fwd_eng == &csum_fwd_engine) {
1994 		printf("  RX- bad IP checksum: %-14"PRIu64
1995 		       "  Rx- bad L4 checksum: %-14"PRIu64
1996 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1997 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1998 			fs->rx_bad_outer_l4_csum);
1999 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
2000 			fs->rx_bad_outer_ip_csum);
2001 	} else {
2002 		printf("\n");
2003 	}
2004 
2005 	if (record_burst_stats) {
2006 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
2007 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
2008 	}
2009 }
2010 
2011 void
2012 fwd_stats_display(void)
2013 {
2014 	static const char *fwd_stats_border = "----------------------";
2015 	static const char *acc_stats_border = "+++++++++++++++";
2016 	struct {
2017 		struct fwd_stream *rx_stream;
2018 		struct fwd_stream *tx_stream;
2019 		uint64_t tx_dropped;
2020 		uint64_t rx_bad_ip_csum;
2021 		uint64_t rx_bad_l4_csum;
2022 		uint64_t rx_bad_outer_l4_csum;
2023 		uint64_t rx_bad_outer_ip_csum;
2024 	} ports_stats[RTE_MAX_ETHPORTS];
2025 	uint64_t total_rx_dropped = 0;
2026 	uint64_t total_tx_dropped = 0;
2027 	uint64_t total_rx_nombuf = 0;
2028 	struct rte_eth_stats stats;
2029 	uint64_t fwd_cycles = 0;
2030 	uint64_t total_recv = 0;
2031 	uint64_t total_xmit = 0;
2032 	struct rte_port *port;
2033 	streamid_t sm_id;
2034 	portid_t pt_id;
2035 	int ret;
2036 	int i;
2037 
2038 	memset(ports_stats, 0, sizeof(ports_stats));
2039 
2040 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2041 		struct fwd_stream *fs = fwd_streams[sm_id];
2042 
2043 		if (cur_fwd_config.nb_fwd_streams >
2044 		    cur_fwd_config.nb_fwd_ports) {
2045 			fwd_stream_stats_display(sm_id);
2046 		} else {
2047 			ports_stats[fs->tx_port].tx_stream = fs;
2048 			ports_stats[fs->rx_port].rx_stream = fs;
2049 		}
2050 
2051 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2052 
2053 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2054 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2055 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2056 				fs->rx_bad_outer_l4_csum;
2057 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2058 				fs->rx_bad_outer_ip_csum;
2059 
2060 		if (record_core_cycles)
2061 			fwd_cycles += fs->busy_cycles;
2062 	}
2063 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2064 		uint64_t tx_dropped = 0;
2065 
2066 		pt_id = fwd_ports_ids[i];
2067 		port = &ports[pt_id];
2068 
2069 		ret = rte_eth_stats_get(pt_id, &stats);
2070 		if (ret != 0) {
2071 			fprintf(stderr,
2072 				"%s: Error: failed to get stats (port %u): %d",
2073 				__func__, pt_id, ret);
2074 			continue;
2075 		}
2076 		stats.ipackets -= port->stats.ipackets;
2077 		stats.opackets -= port->stats.opackets;
2078 		stats.ibytes -= port->stats.ibytes;
2079 		stats.obytes -= port->stats.obytes;
2080 		stats.imissed -= port->stats.imissed;
2081 		stats.oerrors -= port->stats.oerrors;
2082 		stats.rx_nombuf -= port->stats.rx_nombuf;
2083 
2084 		total_recv += stats.ipackets;
2085 		total_xmit += stats.opackets;
2086 		total_rx_dropped += stats.imissed;
2087 		tx_dropped += ports_stats[pt_id].tx_dropped;
2088 		tx_dropped += stats.oerrors;
2089 		total_tx_dropped += tx_dropped;
2090 		total_rx_nombuf  += stats.rx_nombuf;
2091 
2092 		printf("\n  %s Forward statistics for port %-2d %s\n",
2093 		       fwd_stats_border, pt_id, fwd_stats_border);
2094 
2095 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2096 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2097 		       stats.ipackets + stats.imissed);
2098 
2099 		if (cur_fwd_eng == &csum_fwd_engine) {
2100 			printf("  Bad-ipcsum: %-14"PRIu64
2101 			       " Bad-l4csum: %-14"PRIu64
2102 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2103 			       ports_stats[pt_id].rx_bad_ip_csum,
2104 			       ports_stats[pt_id].rx_bad_l4_csum,
2105 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2106 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2107 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2108 		}
2109 		if (stats.ierrors + stats.rx_nombuf > 0) {
2110 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2111 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2112 		}
2113 
2114 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2115 		       "TX-total: %-"PRIu64"\n",
2116 		       stats.opackets, tx_dropped,
2117 		       stats.opackets + tx_dropped);
2118 
2119 		if (record_burst_stats) {
2120 			if (ports_stats[pt_id].rx_stream)
2121 				pkt_burst_stats_display("RX",
2122 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2123 			if (ports_stats[pt_id].tx_stream)
2124 				pkt_burst_stats_display("TX",
2125 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2126 		}
2127 
2128 		printf("  %s--------------------------------%s\n",
2129 		       fwd_stats_border, fwd_stats_border);
2130 	}
2131 
2132 	printf("\n  %s Accumulated forward statistics for all ports"
2133 	       "%s\n",
2134 	       acc_stats_border, acc_stats_border);
2135 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2136 	       "%-"PRIu64"\n"
2137 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2138 	       "%-"PRIu64"\n",
2139 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2140 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2141 	if (total_rx_nombuf > 0)
2142 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2143 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2144 	       "%s\n",
2145 	       acc_stats_border, acc_stats_border);
2146 	if (record_core_cycles) {
2147 #define CYC_PER_MHZ 1E6
2148 		if (total_recv > 0 || total_xmit > 0) {
2149 			uint64_t total_pkts = 0;
2150 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2151 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2152 				total_pkts = total_xmit;
2153 			else
2154 				total_pkts = total_recv;
2155 
2156 			printf("\n  CPU cycles/packet=%.2F (busy cycles="
2157 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2158 			       " MHz Clock\n",
2159 			       (double) fwd_cycles / total_pkts,
2160 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2161 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2162 		}
2163 	}
2164 }
2165 
2166 void
2167 fwd_stats_reset(void)
2168 {
2169 	streamid_t sm_id;
2170 	portid_t pt_id;
2171 	int ret;
2172 	int i;
2173 
2174 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2175 		pt_id = fwd_ports_ids[i];
2176 		ret = rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2177 		if (ret != 0)
2178 			fprintf(stderr,
2179 				"%s: Error: failed to clear stats (port %u):%d",
2180 				__func__, pt_id, ret);
2181 	}
2182 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2183 		struct fwd_stream *fs = fwd_streams[sm_id];
2184 
2185 		fs->rx_packets = 0;
2186 		fs->tx_packets = 0;
2187 		fs->fwd_dropped = 0;
2188 		fs->rx_bad_ip_csum = 0;
2189 		fs->rx_bad_l4_csum = 0;
2190 		fs->rx_bad_outer_l4_csum = 0;
2191 		fs->rx_bad_outer_ip_csum = 0;
2192 
2193 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2194 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2195 		fs->busy_cycles = 0;
2196 	}
2197 }
2198 
2199 static void
2200 flush_fwd_rx_queues(void)
2201 {
2202 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2203 	portid_t  rxp;
2204 	portid_t port_id;
2205 	queueid_t rxq;
2206 	uint16_t  nb_rx;
2207 	uint8_t   j;
2208 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2209 	uint64_t timer_period;
2210 
2211 	if (num_procs > 1) {
2212 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2213 		return;
2214 	}
2215 
2216 	/* convert to number of cycles */
2217 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2218 
2219 	for (j = 0; j < 2; j++) {
2220 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2221 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2222 				port_id = fwd_ports_ids[rxp];
2223 
2224 				/* Polling stopped queues is prohibited. */
2225 				if (ports[port_id].rxq[rxq].state ==
2226 				    RTE_ETH_QUEUE_STATE_STOPPED)
2227 					continue;
2228 
2229 				/**
2230 				* testpmd can stuck in the below do while loop
2231 				* if rte_eth_rx_burst() always returns nonzero
2232 				* packets. So timer is added to exit this loop
2233 				* after 1sec timer expiry.
2234 				*/
2235 				prev_tsc = rte_rdtsc();
2236 				do {
2237 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2238 						pkts_burst, MAX_PKT_BURST);
2239 					rte_pktmbuf_free_bulk(pkts_burst, nb_rx);
2240 
2241 					cur_tsc = rte_rdtsc();
2242 					diff_tsc = cur_tsc - prev_tsc;
2243 					timer_tsc += diff_tsc;
2244 				} while ((nb_rx > 0) &&
2245 					(timer_tsc < timer_period));
2246 				timer_tsc = 0;
2247 			}
2248 		}
2249 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2250 	}
2251 }
2252 
2253 static void
2254 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2255 {
2256 	struct fwd_stream **fsm;
2257 	uint64_t prev_tsc;
2258 	streamid_t nb_fs;
2259 	streamid_t sm_id;
2260 #ifdef RTE_LIB_BITRATESTATS
2261 	uint64_t tics_per_1sec;
2262 	uint64_t tics_datum;
2263 	uint64_t tics_current;
2264 	uint16_t i, cnt_ports;
2265 
2266 	cnt_ports = nb_ports;
2267 	tics_datum = rte_rdtsc();
2268 	tics_per_1sec = rte_get_timer_hz();
2269 #endif
2270 	fsm = &fwd_streams[fc->stream_idx];
2271 	nb_fs = fc->stream_nb;
2272 	prev_tsc = rte_rdtsc();
2273 	do {
2274 		for (sm_id = 0; sm_id < nb_fs; sm_id++) {
2275 			struct fwd_stream *fs = fsm[sm_id];
2276 			uint64_t start_fs_tsc = 0;
2277 			bool busy;
2278 
2279 			if (fs->disabled)
2280 				continue;
2281 			if (record_core_cycles)
2282 				start_fs_tsc = rte_rdtsc();
2283 			busy = (*pkt_fwd)(fs);
2284 			if (record_core_cycles && busy)
2285 				fs->busy_cycles += rte_rdtsc() - start_fs_tsc;
2286 		}
2287 #ifdef RTE_LIB_BITRATESTATS
2288 		if (bitrate_enabled != 0 &&
2289 				bitrate_lcore_id == rte_lcore_id()) {
2290 			tics_current = rte_rdtsc();
2291 			if (tics_current - tics_datum >= tics_per_1sec) {
2292 				/* Periodic bitrate calculation */
2293 				for (i = 0; i < cnt_ports; i++)
2294 					rte_stats_bitrate_calc(bitrate_data,
2295 						ports_ids[i]);
2296 				tics_datum = tics_current;
2297 			}
2298 		}
2299 #endif
2300 #ifdef RTE_LIB_LATENCYSTATS
2301 		if (latencystats_enabled != 0 &&
2302 				latencystats_lcore_id == rte_lcore_id())
2303 			rte_latencystats_update();
2304 #endif
2305 		if (record_core_cycles) {
2306 			uint64_t tsc = rte_rdtsc();
2307 
2308 			fc->total_cycles += tsc - prev_tsc;
2309 			prev_tsc = tsc;
2310 		}
2311 	} while (! fc->stopped);
2312 }
2313 
2314 static int
2315 lcore_usage_callback(unsigned int lcore_id, struct rte_lcore_usage *usage)
2316 {
2317 	struct fwd_stream **fsm;
2318 	struct fwd_lcore *fc;
2319 	streamid_t nb_fs;
2320 	streamid_t sm_id;
2321 
2322 	fc = lcore_to_fwd_lcore(lcore_id);
2323 	if (fc == NULL)
2324 		return -1;
2325 
2326 	fsm = &fwd_streams[fc->stream_idx];
2327 	nb_fs = fc->stream_nb;
2328 	usage->busy_cycles = 0;
2329 	usage->total_cycles = fc->total_cycles;
2330 
2331 	for (sm_id = 0; sm_id < nb_fs; sm_id++) {
2332 		if (!fsm[sm_id]->disabled)
2333 			usage->busy_cycles += fsm[sm_id]->busy_cycles;
2334 	}
2335 
2336 	return 0;
2337 }
2338 
2339 static int
2340 start_pkt_forward_on_core(void *fwd_arg)
2341 {
2342 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2343 			     cur_fwd_config.fwd_eng->packet_fwd);
2344 	return 0;
2345 }
2346 
2347 /*
2348  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2349  * Used to start communication flows in network loopback test configurations.
2350  */
2351 static int
2352 run_one_txonly_burst_on_core(void *fwd_arg)
2353 {
2354 	struct fwd_lcore *fwd_lc;
2355 	struct fwd_lcore tmp_lcore;
2356 
2357 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2358 	tmp_lcore = *fwd_lc;
2359 	tmp_lcore.stopped = 1;
2360 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2361 	return 0;
2362 }
2363 
2364 /*
2365  * Launch packet forwarding:
2366  *     - Setup per-port forwarding context.
2367  *     - launch logical cores with their forwarding configuration.
2368  */
2369 static void
2370 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2371 {
2372 	unsigned int i;
2373 	unsigned int lc_id;
2374 	int diag;
2375 
2376 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2377 		lc_id = fwd_lcores_cpuids[i];
2378 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2379 			fwd_lcores[i]->stopped = 0;
2380 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2381 						     fwd_lcores[i], lc_id);
2382 			if (diag != 0)
2383 				fprintf(stderr,
2384 					"launch lcore %u failed - diag=%d\n",
2385 					lc_id, diag);
2386 		}
2387 	}
2388 }
2389 
2390 void
2391 common_fwd_stream_init(struct fwd_stream *fs)
2392 {
2393 	bool rx_stopped, tx_stopped;
2394 
2395 	rx_stopped = (ports[fs->rx_port].rxq[fs->rx_queue].state == RTE_ETH_QUEUE_STATE_STOPPED);
2396 	tx_stopped = (ports[fs->tx_port].txq[fs->tx_queue].state == RTE_ETH_QUEUE_STATE_STOPPED);
2397 	fs->disabled = rx_stopped || tx_stopped;
2398 }
2399 
2400 static void
2401 update_rx_queue_state(uint16_t port_id, uint16_t queue_id)
2402 {
2403 	struct rte_eth_rxq_info rx_qinfo;
2404 	int32_t rc;
2405 
2406 	rc = rte_eth_rx_queue_info_get(port_id,
2407 			queue_id, &rx_qinfo);
2408 	if (rc == 0) {
2409 		ports[port_id].rxq[queue_id].state =
2410 			rx_qinfo.queue_state;
2411 	} else if (rc == -ENOTSUP) {
2412 		/*
2413 		 * Set the rxq state to RTE_ETH_QUEUE_STATE_STARTED
2414 		 * to ensure that the PMDs do not implement
2415 		 * rte_eth_rx_queue_info_get can forward.
2416 		 */
2417 		ports[port_id].rxq[queue_id].state =
2418 			RTE_ETH_QUEUE_STATE_STARTED;
2419 	} else {
2420 		TESTPMD_LOG(WARNING,
2421 			"Failed to get rx queue info\n");
2422 	}
2423 }
2424 
2425 static void
2426 update_tx_queue_state(uint16_t port_id, uint16_t queue_id)
2427 {
2428 	struct rte_eth_txq_info tx_qinfo;
2429 	int32_t rc;
2430 
2431 	rc = rte_eth_tx_queue_info_get(port_id,
2432 			queue_id, &tx_qinfo);
2433 	if (rc == 0) {
2434 		ports[port_id].txq[queue_id].state =
2435 			tx_qinfo.queue_state;
2436 	} else if (rc == -ENOTSUP) {
2437 		/*
2438 		 * Set the txq state to RTE_ETH_QUEUE_STATE_STARTED
2439 		 * to ensure that the PMDs do not implement
2440 		 * rte_eth_tx_queue_info_get can forward.
2441 		 */
2442 		ports[port_id].txq[queue_id].state =
2443 			RTE_ETH_QUEUE_STATE_STARTED;
2444 	} else {
2445 		TESTPMD_LOG(WARNING,
2446 			"Failed to get tx queue info\n");
2447 	}
2448 }
2449 
2450 static void
2451 update_queue_state(void)
2452 {
2453 	portid_t pi;
2454 	queueid_t qi;
2455 
2456 	RTE_ETH_FOREACH_DEV(pi) {
2457 		for (qi = 0; qi < nb_rxq; qi++)
2458 			update_rx_queue_state(pi, qi);
2459 		for (qi = 0; qi < nb_txq; qi++)
2460 			update_tx_queue_state(pi, qi);
2461 	}
2462 }
2463 
2464 /*
2465  * Launch packet forwarding configuration.
2466  */
2467 void
2468 start_packet_forwarding(int with_tx_first)
2469 {
2470 	port_fwd_begin_t port_fwd_begin;
2471 	port_fwd_end_t  port_fwd_end;
2472 	stream_init_t stream_init = cur_fwd_eng->stream_init;
2473 	unsigned int i;
2474 
2475 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2476 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2477 
2478 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2479 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2480 
2481 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2482 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2483 		(!nb_rxq || !nb_txq))
2484 		rte_exit(EXIT_FAILURE,
2485 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2486 			cur_fwd_eng->fwd_mode_name);
2487 
2488 	if (all_ports_started() == 0) {
2489 		fprintf(stderr, "Not all ports were started\n");
2490 		return;
2491 	}
2492 	if (test_done == 0) {
2493 		fprintf(stderr, "Packet forwarding already started\n");
2494 		return;
2495 	}
2496 
2497 	fwd_config_setup();
2498 
2499 	pkt_fwd_config_display(&cur_fwd_config);
2500 	if (!pkt_fwd_shared_rxq_check())
2501 		return;
2502 
2503 	if (stream_init != NULL) {
2504 		if (rte_eal_process_type() == RTE_PROC_SECONDARY)
2505 			update_queue_state();
2506 		for (i = 0; i < cur_fwd_config.nb_fwd_streams; i++)
2507 			stream_init(fwd_streams[i]);
2508 	}
2509 
2510 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2511 	if (port_fwd_begin != NULL) {
2512 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2513 			if (port_fwd_begin(fwd_ports_ids[i])) {
2514 				fprintf(stderr,
2515 					"Packet forwarding is not ready\n");
2516 				return;
2517 			}
2518 		}
2519 	}
2520 
2521 	if (with_tx_first) {
2522 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2523 		if (port_fwd_begin != NULL) {
2524 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2525 				if (port_fwd_begin(fwd_ports_ids[i])) {
2526 					fprintf(stderr,
2527 						"Packet forwarding is not ready\n");
2528 					return;
2529 				}
2530 			}
2531 		}
2532 	}
2533 
2534 	test_done = 0;
2535 
2536 	if(!no_flush_rx)
2537 		flush_fwd_rx_queues();
2538 
2539 	rxtx_config_display();
2540 
2541 	fwd_stats_reset();
2542 	if (with_tx_first) {
2543 		while (with_tx_first--) {
2544 			launch_packet_forwarding(
2545 					run_one_txonly_burst_on_core);
2546 			rte_eal_mp_wait_lcore();
2547 		}
2548 		port_fwd_end = tx_only_engine.port_fwd_end;
2549 		if (port_fwd_end != NULL) {
2550 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2551 				(*port_fwd_end)(fwd_ports_ids[i]);
2552 		}
2553 	}
2554 	launch_packet_forwarding(start_pkt_forward_on_core);
2555 }
2556 
2557 void
2558 stop_packet_forwarding(void)
2559 {
2560 	port_fwd_end_t port_fwd_end;
2561 	lcoreid_t lc_id;
2562 	portid_t pt_id;
2563 	int i;
2564 
2565 	if (test_done) {
2566 		fprintf(stderr, "Packet forwarding not started\n");
2567 		return;
2568 	}
2569 	printf("Telling cores to stop...");
2570 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2571 		fwd_lcores[lc_id]->stopped = 1;
2572 	printf("\nWaiting for lcores to finish...\n");
2573 	rte_eal_mp_wait_lcore();
2574 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2575 	if (port_fwd_end != NULL) {
2576 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2577 			pt_id = fwd_ports_ids[i];
2578 			(*port_fwd_end)(pt_id);
2579 		}
2580 	}
2581 
2582 	fwd_stats_display();
2583 
2584 	printf("\nDone.\n");
2585 	test_done = 1;
2586 }
2587 
2588 void
2589 dev_set_link_up(portid_t pid)
2590 {
2591 	if (rte_eth_dev_set_link_up(pid) < 0)
2592 		fprintf(stderr, "\nSet link up fail.\n");
2593 }
2594 
2595 void
2596 dev_set_link_down(portid_t pid)
2597 {
2598 	if (rte_eth_dev_set_link_down(pid) < 0)
2599 		fprintf(stderr, "\nSet link down fail.\n");
2600 }
2601 
2602 static int
2603 all_ports_started(void)
2604 {
2605 	portid_t pi;
2606 	struct rte_port *port;
2607 
2608 	RTE_ETH_FOREACH_DEV(pi) {
2609 		port = &ports[pi];
2610 		/* Check if there is a port which is not started */
2611 		if ((port->port_status != RTE_PORT_STARTED) &&
2612 			(port->slave_flag == 0))
2613 			return 0;
2614 	}
2615 
2616 	/* No port is not started */
2617 	return 1;
2618 }
2619 
2620 int
2621 port_is_stopped(portid_t port_id)
2622 {
2623 	struct rte_port *port = &ports[port_id];
2624 
2625 	if ((port->port_status != RTE_PORT_STOPPED) &&
2626 	    (port->slave_flag == 0))
2627 		return 0;
2628 	return 1;
2629 }
2630 
2631 int
2632 all_ports_stopped(void)
2633 {
2634 	portid_t pi;
2635 
2636 	RTE_ETH_FOREACH_DEV(pi) {
2637 		if (!port_is_stopped(pi))
2638 			return 0;
2639 	}
2640 
2641 	return 1;
2642 }
2643 
2644 int
2645 port_is_started(portid_t port_id)
2646 {
2647 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2648 		return 0;
2649 
2650 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2651 		return 0;
2652 
2653 	return 1;
2654 }
2655 
2656 #define HAIRPIN_MODE_RX_FORCE_MEMORY RTE_BIT32(8)
2657 #define HAIRPIN_MODE_TX_FORCE_MEMORY RTE_BIT32(9)
2658 
2659 #define HAIRPIN_MODE_RX_LOCKED_MEMORY RTE_BIT32(12)
2660 #define HAIRPIN_MODE_RX_RTE_MEMORY RTE_BIT32(13)
2661 
2662 #define HAIRPIN_MODE_TX_LOCKED_MEMORY RTE_BIT32(16)
2663 #define HAIRPIN_MODE_TX_RTE_MEMORY RTE_BIT32(17)
2664 
2665 
2666 /* Configure the Rx and Tx hairpin queues for the selected port. */
2667 static int
2668 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2669 {
2670 	queueid_t qi;
2671 	struct rte_eth_hairpin_conf hairpin_conf = {
2672 		.peer_count = 1,
2673 	};
2674 	int i;
2675 	int diag;
2676 	struct rte_port *port = &ports[pi];
2677 	uint16_t peer_rx_port = pi;
2678 	uint16_t peer_tx_port = pi;
2679 	uint32_t manual = 1;
2680 	uint32_t tx_exp = hairpin_mode & 0x10;
2681 	uint32_t rx_force_memory = hairpin_mode & HAIRPIN_MODE_RX_FORCE_MEMORY;
2682 	uint32_t rx_locked_memory = hairpin_mode & HAIRPIN_MODE_RX_LOCKED_MEMORY;
2683 	uint32_t rx_rte_memory = hairpin_mode & HAIRPIN_MODE_RX_RTE_MEMORY;
2684 	uint32_t tx_force_memory = hairpin_mode & HAIRPIN_MODE_TX_FORCE_MEMORY;
2685 	uint32_t tx_locked_memory = hairpin_mode & HAIRPIN_MODE_TX_LOCKED_MEMORY;
2686 	uint32_t tx_rte_memory = hairpin_mode & HAIRPIN_MODE_TX_RTE_MEMORY;
2687 
2688 	if (!(hairpin_mode & 0xf)) {
2689 		peer_rx_port = pi;
2690 		peer_tx_port = pi;
2691 		manual = 0;
2692 	} else if (hairpin_mode & 0x1) {
2693 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2694 						       RTE_ETH_DEV_NO_OWNER);
2695 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2696 			peer_tx_port = rte_eth_find_next_owned_by(0,
2697 						RTE_ETH_DEV_NO_OWNER);
2698 		if (p_pi != RTE_MAX_ETHPORTS) {
2699 			peer_rx_port = p_pi;
2700 		} else {
2701 			uint16_t next_pi;
2702 
2703 			/* Last port will be the peer RX port of the first. */
2704 			RTE_ETH_FOREACH_DEV(next_pi)
2705 				peer_rx_port = next_pi;
2706 		}
2707 		manual = 1;
2708 	} else if (hairpin_mode & 0x2) {
2709 		if (cnt_pi & 0x1) {
2710 			peer_rx_port = p_pi;
2711 		} else {
2712 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2713 						RTE_ETH_DEV_NO_OWNER);
2714 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2715 				peer_rx_port = pi;
2716 		}
2717 		peer_tx_port = peer_rx_port;
2718 		manual = 1;
2719 	}
2720 
2721 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2722 		hairpin_conf.peers[0].port = peer_rx_port;
2723 		hairpin_conf.peers[0].queue = i + nb_rxq;
2724 		hairpin_conf.manual_bind = !!manual;
2725 		hairpin_conf.tx_explicit = !!tx_exp;
2726 		hairpin_conf.force_memory = !!tx_force_memory;
2727 		hairpin_conf.use_locked_device_memory = !!tx_locked_memory;
2728 		hairpin_conf.use_rte_memory = !!tx_rte_memory;
2729 		diag = rte_eth_tx_hairpin_queue_setup
2730 			(pi, qi, nb_txd, &hairpin_conf);
2731 		i++;
2732 		if (diag == 0)
2733 			continue;
2734 
2735 		/* Fail to setup rx queue, return */
2736 		if (port->port_status == RTE_PORT_HANDLING)
2737 			port->port_status = RTE_PORT_STOPPED;
2738 		else
2739 			fprintf(stderr,
2740 				"Port %d can not be set back to stopped\n", pi);
2741 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2742 			pi);
2743 		/* try to reconfigure queues next time */
2744 		port->need_reconfig_queues = 1;
2745 		return -1;
2746 	}
2747 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2748 		hairpin_conf.peers[0].port = peer_tx_port;
2749 		hairpin_conf.peers[0].queue = i + nb_txq;
2750 		hairpin_conf.manual_bind = !!manual;
2751 		hairpin_conf.tx_explicit = !!tx_exp;
2752 		hairpin_conf.force_memory = !!rx_force_memory;
2753 		hairpin_conf.use_locked_device_memory = !!rx_locked_memory;
2754 		hairpin_conf.use_rte_memory = !!rx_rte_memory;
2755 		diag = rte_eth_rx_hairpin_queue_setup
2756 			(pi, qi, nb_rxd, &hairpin_conf);
2757 		i++;
2758 		if (diag == 0)
2759 			continue;
2760 
2761 		/* Fail to setup rx queue, return */
2762 		if (port->port_status == RTE_PORT_HANDLING)
2763 			port->port_status = RTE_PORT_STOPPED;
2764 		else
2765 			fprintf(stderr,
2766 				"Port %d can not be set back to stopped\n", pi);
2767 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2768 			pi);
2769 		/* try to reconfigure queues next time */
2770 		port->need_reconfig_queues = 1;
2771 		return -1;
2772 	}
2773 	return 0;
2774 }
2775 
2776 /* Configure the Rx with optional split. */
2777 int
2778 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2779 	       uint16_t nb_rx_desc, unsigned int socket_id,
2780 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2781 {
2782 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2783 	struct rte_mempool *rx_mempool[MAX_MEMPOOL] = {};
2784 	struct rte_mempool *mpx;
2785 	unsigned int i, mp_n;
2786 	uint32_t prev_hdrs = 0;
2787 	int ret;
2788 
2789 
2790 	if ((rx_pkt_nb_segs > 1) &&
2791 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) {
2792 		/* multi-segment configuration */
2793 		for (i = 0; i < rx_pkt_nb_segs; i++) {
2794 			struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2795 			/*
2796 			 * Use last valid pool for the segments with number
2797 			 * exceeding the pool index.
2798 			 */
2799 			mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2800 			mpx = mbuf_pool_find(socket_id, mp_n);
2801 			/* Handle zero as mbuf data buffer size. */
2802 			rx_seg->offset = i < rx_pkt_nb_offs ?
2803 					   rx_pkt_seg_offsets[i] : 0;
2804 			rx_seg->mp = mpx ? mpx : mp;
2805 			if (rx_pkt_hdr_protos[i] != 0 && rx_pkt_seg_lengths[i] == 0) {
2806 				rx_seg->proto_hdr = rx_pkt_hdr_protos[i] & ~prev_hdrs;
2807 				prev_hdrs |= rx_seg->proto_hdr;
2808 			} else {
2809 				rx_seg->length = rx_pkt_seg_lengths[i] ?
2810 						rx_pkt_seg_lengths[i] :
2811 						mbuf_data_size[mp_n];
2812 			}
2813 		}
2814 		rx_conf->rx_nseg = rx_pkt_nb_segs;
2815 		rx_conf->rx_seg = rx_useg;
2816 		rx_conf->rx_mempools = NULL;
2817 		rx_conf->rx_nmempool = 0;
2818 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2819 				    socket_id, rx_conf, NULL);
2820 		rx_conf->rx_seg = NULL;
2821 		rx_conf->rx_nseg = 0;
2822 	} else if (multi_rx_mempool == 1) {
2823 		/* multi-pool configuration */
2824 		struct rte_eth_dev_info dev_info;
2825 
2826 		if (mbuf_data_size_n <= 1) {
2827 			fprintf(stderr, "Invalid number of mempools %u\n",
2828 				mbuf_data_size_n);
2829 			return -EINVAL;
2830 		}
2831 		ret = rte_eth_dev_info_get(port_id, &dev_info);
2832 		if (ret != 0)
2833 			return ret;
2834 		if (dev_info.max_rx_mempools == 0) {
2835 			fprintf(stderr,
2836 				"Port %u doesn't support requested multi-rx-mempool configuration.\n",
2837 				port_id);
2838 			return -ENOTSUP;
2839 		}
2840 		for (i = 0; i < mbuf_data_size_n; i++) {
2841 			mpx = mbuf_pool_find(socket_id, i);
2842 			rx_mempool[i] = mpx ? mpx : mp;
2843 		}
2844 		rx_conf->rx_mempools = rx_mempool;
2845 		rx_conf->rx_nmempool = mbuf_data_size_n;
2846 		rx_conf->rx_seg = NULL;
2847 		rx_conf->rx_nseg = 0;
2848 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2849 				    socket_id, rx_conf, NULL);
2850 		rx_conf->rx_mempools = NULL;
2851 		rx_conf->rx_nmempool = 0;
2852 	} else {
2853 		/* Single pool/segment configuration */
2854 		rx_conf->rx_seg = NULL;
2855 		rx_conf->rx_nseg = 0;
2856 		rx_conf->rx_mempools = NULL;
2857 		rx_conf->rx_nmempool = 0;
2858 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2859 				    socket_id, rx_conf, mp);
2860 	}
2861 
2862 	ports[port_id].rxq[rx_queue_id].state = rx_conf->rx_deferred_start ?
2863 						RTE_ETH_QUEUE_STATE_STOPPED :
2864 						RTE_ETH_QUEUE_STATE_STARTED;
2865 	return ret;
2866 }
2867 
2868 static int
2869 alloc_xstats_display_info(portid_t pi)
2870 {
2871 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2872 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2873 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2874 
2875 	if (xstats_display_num == 0)
2876 		return 0;
2877 
2878 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2879 	if (*ids_supp == NULL)
2880 		goto fail_ids_supp;
2881 
2882 	*prev_values = calloc(xstats_display_num,
2883 			      sizeof(**prev_values));
2884 	if (*prev_values == NULL)
2885 		goto fail_prev_values;
2886 
2887 	*curr_values = calloc(xstats_display_num,
2888 			      sizeof(**curr_values));
2889 	if (*curr_values == NULL)
2890 		goto fail_curr_values;
2891 
2892 	ports[pi].xstats_info.allocated = true;
2893 
2894 	return 0;
2895 
2896 fail_curr_values:
2897 	free(*prev_values);
2898 fail_prev_values:
2899 	free(*ids_supp);
2900 fail_ids_supp:
2901 	return -ENOMEM;
2902 }
2903 
2904 static void
2905 free_xstats_display_info(portid_t pi)
2906 {
2907 	if (!ports[pi].xstats_info.allocated)
2908 		return;
2909 	free(ports[pi].xstats_info.ids_supp);
2910 	free(ports[pi].xstats_info.prev_values);
2911 	free(ports[pi].xstats_info.curr_values);
2912 	ports[pi].xstats_info.allocated = false;
2913 }
2914 
2915 /** Fill helper structures for specified port to show extended statistics. */
2916 static void
2917 fill_xstats_display_info_for_port(portid_t pi)
2918 {
2919 	unsigned int stat, stat_supp;
2920 	const char *xstat_name;
2921 	struct rte_port *port;
2922 	uint64_t *ids_supp;
2923 	int rc;
2924 
2925 	if (xstats_display_num == 0)
2926 		return;
2927 
2928 	if (pi == (portid_t)RTE_PORT_ALL) {
2929 		fill_xstats_display_info();
2930 		return;
2931 	}
2932 
2933 	port = &ports[pi];
2934 	if (port->port_status != RTE_PORT_STARTED)
2935 		return;
2936 
2937 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2938 		rte_exit(EXIT_FAILURE,
2939 			 "Failed to allocate xstats display memory\n");
2940 
2941 	ids_supp = port->xstats_info.ids_supp;
2942 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2943 		xstat_name = xstats_display[stat].name;
2944 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2945 						   ids_supp + stat_supp);
2946 		if (rc != 0) {
2947 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2948 				xstat_name, pi, stat);
2949 			continue;
2950 		}
2951 		stat_supp++;
2952 	}
2953 
2954 	port->xstats_info.ids_supp_sz = stat_supp;
2955 }
2956 
2957 /** Fill helper structures for all ports to show extended statistics. */
2958 static void
2959 fill_xstats_display_info(void)
2960 {
2961 	portid_t pi;
2962 
2963 	if (xstats_display_num == 0)
2964 		return;
2965 
2966 	RTE_ETH_FOREACH_DEV(pi)
2967 		fill_xstats_display_info_for_port(pi);
2968 }
2969 
2970 /*
2971  * Some capabilities (like, rx_offload_capa and tx_offload_capa) of bonding
2972  * device in dev_info is zero when no slave is added. And its capability
2973  * will be updated when add a new slave device. So adding a slave device need
2974  * to update the port configurations of bonding device.
2975  */
2976 static void
2977 update_bonding_port_dev_conf(portid_t bond_pid)
2978 {
2979 #ifdef RTE_NET_BOND
2980 	struct rte_port *port = &ports[bond_pid];
2981 	uint16_t i;
2982 	int ret;
2983 
2984 	ret = eth_dev_info_get_print_err(bond_pid, &port->dev_info);
2985 	if (ret != 0) {
2986 		fprintf(stderr, "Failed to get dev info for port = %u\n",
2987 			bond_pid);
2988 		return;
2989 	}
2990 
2991 	if (port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
2992 		port->dev_conf.txmode.offloads |=
2993 				RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
2994 	/* Apply Tx offloads configuration */
2995 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
2996 		port->txq[i].conf.offloads = port->dev_conf.txmode.offloads;
2997 
2998 	port->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
2999 				port->dev_info.flow_type_rss_offloads;
3000 #else
3001 	RTE_SET_USED(bond_pid);
3002 #endif
3003 }
3004 
3005 int
3006 start_port(portid_t pid)
3007 {
3008 	int diag;
3009 	portid_t pi;
3010 	portid_t p_pi = RTE_MAX_ETHPORTS;
3011 	portid_t pl[RTE_MAX_ETHPORTS];
3012 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3013 	uint16_t cnt_pi = 0;
3014 	uint16_t cfg_pi = 0;
3015 	int peer_pi;
3016 	queueid_t qi;
3017 	struct rte_port *port;
3018 	struct rte_eth_hairpin_cap cap;
3019 	bool at_least_one_port_exist = false;
3020 	bool all_ports_already_started = true;
3021 	bool at_least_one_port_successfully_started = false;
3022 
3023 	if (port_id_is_invalid(pid, ENABLED_WARN))
3024 		return 0;
3025 
3026 	RTE_ETH_FOREACH_DEV(pi) {
3027 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3028 			continue;
3029 
3030 		if (port_is_bonding_slave(pi)) {
3031 			fprintf(stderr,
3032 				"Please remove port %d from bonded device.\n",
3033 				pi);
3034 			continue;
3035 		}
3036 
3037 		at_least_one_port_exist = true;
3038 
3039 		port = &ports[pi];
3040 		if (port->port_status == RTE_PORT_STOPPED) {
3041 			port->port_status = RTE_PORT_HANDLING;
3042 			all_ports_already_started = false;
3043 		} else {
3044 			fprintf(stderr, "Port %d is now not stopped\n", pi);
3045 			continue;
3046 		}
3047 
3048 		if (port->need_reconfig > 0) {
3049 			struct rte_eth_conf dev_conf;
3050 			int k;
3051 
3052 			port->need_reconfig = 0;
3053 
3054 			if (flow_isolate_all) {
3055 				int ret = port_flow_isolate(pi, 1);
3056 				if (ret) {
3057 					fprintf(stderr,
3058 						"Failed to apply isolated mode on port %d\n",
3059 						pi);
3060 					return -1;
3061 				}
3062 			}
3063 			configure_rxtx_dump_callbacks(0);
3064 			printf("Configuring Port %d (socket %u)\n", pi,
3065 					port->socket_id);
3066 			if (nb_hairpinq > 0 &&
3067 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
3068 				fprintf(stderr,
3069 					"Port %d doesn't support hairpin queues\n",
3070 					pi);
3071 				return -1;
3072 			}
3073 
3074 			if (port->bond_flag == 1 && port->update_conf == 1) {
3075 				update_bonding_port_dev_conf(pi);
3076 				port->update_conf = 0;
3077 			}
3078 
3079 			/* configure port */
3080 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
3081 						     nb_txq + nb_hairpinq,
3082 						     &(port->dev_conf));
3083 			if (diag != 0) {
3084 				if (port->port_status == RTE_PORT_HANDLING)
3085 					port->port_status = RTE_PORT_STOPPED;
3086 				else
3087 					fprintf(stderr,
3088 						"Port %d can not be set back to stopped\n",
3089 						pi);
3090 				fprintf(stderr, "Fail to configure port %d\n",
3091 					pi);
3092 				/* try to reconfigure port next time */
3093 				port->need_reconfig = 1;
3094 				return -1;
3095 			}
3096 			/* get device configuration*/
3097 			if (0 !=
3098 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
3099 				fprintf(stderr,
3100 					"port %d can not get device configuration\n",
3101 					pi);
3102 				return -1;
3103 			}
3104 			/* Apply Rx offloads configuration */
3105 			if (dev_conf.rxmode.offloads !=
3106 			    port->dev_conf.rxmode.offloads) {
3107 				port->dev_conf.rxmode.offloads |=
3108 					dev_conf.rxmode.offloads;
3109 				for (k = 0;
3110 				     k < port->dev_info.max_rx_queues;
3111 				     k++)
3112 					port->rxq[k].conf.offloads |=
3113 						dev_conf.rxmode.offloads;
3114 			}
3115 			/* Apply Tx offloads configuration */
3116 			if (dev_conf.txmode.offloads !=
3117 			    port->dev_conf.txmode.offloads) {
3118 				port->dev_conf.txmode.offloads |=
3119 					dev_conf.txmode.offloads;
3120 				for (k = 0;
3121 				     k < port->dev_info.max_tx_queues;
3122 				     k++)
3123 					port->txq[k].conf.offloads |=
3124 						dev_conf.txmode.offloads;
3125 			}
3126 		}
3127 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
3128 			port->need_reconfig_queues = 0;
3129 			/* setup tx queues */
3130 			for (qi = 0; qi < nb_txq; qi++) {
3131 				struct rte_eth_txconf *conf =
3132 							&port->txq[qi].conf;
3133 
3134 				if ((numa_support) &&
3135 					(txring_numa[pi] != NUMA_NO_CONFIG))
3136 					diag = rte_eth_tx_queue_setup(pi, qi,
3137 						port->nb_tx_desc[qi],
3138 						txring_numa[pi],
3139 						&(port->txq[qi].conf));
3140 				else
3141 					diag = rte_eth_tx_queue_setup(pi, qi,
3142 						port->nb_tx_desc[qi],
3143 						port->socket_id,
3144 						&(port->txq[qi].conf));
3145 
3146 				if (diag == 0) {
3147 					port->txq[qi].state =
3148 						conf->tx_deferred_start ?
3149 						RTE_ETH_QUEUE_STATE_STOPPED :
3150 						RTE_ETH_QUEUE_STATE_STARTED;
3151 					continue;
3152 				}
3153 
3154 				/* Fail to setup tx queue, return */
3155 				if (port->port_status == RTE_PORT_HANDLING)
3156 					port->port_status = RTE_PORT_STOPPED;
3157 				else
3158 					fprintf(stderr,
3159 						"Port %d can not be set back to stopped\n",
3160 						pi);
3161 				fprintf(stderr,
3162 					"Fail to configure port %d tx queues\n",
3163 					pi);
3164 				/* try to reconfigure queues next time */
3165 				port->need_reconfig_queues = 1;
3166 				return -1;
3167 			}
3168 			for (qi = 0; qi < nb_rxq; qi++) {
3169 				/* setup rx queues */
3170 				if ((numa_support) &&
3171 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
3172 					struct rte_mempool * mp =
3173 						mbuf_pool_find
3174 							(rxring_numa[pi], 0);
3175 					if (mp == NULL) {
3176 						fprintf(stderr,
3177 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
3178 							rxring_numa[pi]);
3179 						return -1;
3180 					}
3181 
3182 					diag = rx_queue_setup(pi, qi,
3183 					     port->nb_rx_desc[qi],
3184 					     rxring_numa[pi],
3185 					     &(port->rxq[qi].conf),
3186 					     mp);
3187 				} else {
3188 					struct rte_mempool *mp =
3189 						mbuf_pool_find
3190 							(port->socket_id, 0);
3191 					if (mp == NULL) {
3192 						fprintf(stderr,
3193 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
3194 							port->socket_id);
3195 						return -1;
3196 					}
3197 					diag = rx_queue_setup(pi, qi,
3198 					     port->nb_rx_desc[qi],
3199 					     port->socket_id,
3200 					     &(port->rxq[qi].conf),
3201 					     mp);
3202 				}
3203 				if (diag == 0)
3204 					continue;
3205 
3206 				/* Fail to setup rx queue, return */
3207 				if (port->port_status == RTE_PORT_HANDLING)
3208 					port->port_status = RTE_PORT_STOPPED;
3209 				else
3210 					fprintf(stderr,
3211 						"Port %d can not be set back to stopped\n",
3212 						pi);
3213 				fprintf(stderr,
3214 					"Fail to configure port %d rx queues\n",
3215 					pi);
3216 				/* try to reconfigure queues next time */
3217 				port->need_reconfig_queues = 1;
3218 				return -1;
3219 			}
3220 			/* setup hairpin queues */
3221 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
3222 				return -1;
3223 		}
3224 		configure_rxtx_dump_callbacks(verbose_level);
3225 		if (clear_ptypes) {
3226 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
3227 					NULL, 0);
3228 			if (diag < 0)
3229 				fprintf(stderr,
3230 					"Port %d: Failed to disable Ptype parsing\n",
3231 					pi);
3232 		}
3233 
3234 		p_pi = pi;
3235 		cnt_pi++;
3236 
3237 		/* start port */
3238 		diag = eth_dev_start_mp(pi);
3239 		if (diag < 0) {
3240 			fprintf(stderr, "Fail to start port %d: %s\n",
3241 				pi, rte_strerror(-diag));
3242 
3243 			/* Fail to setup rx queue, return */
3244 			if (port->port_status == RTE_PORT_HANDLING)
3245 				port->port_status = RTE_PORT_STOPPED;
3246 			else
3247 				fprintf(stderr,
3248 					"Port %d can not be set back to stopped\n",
3249 					pi);
3250 			continue;
3251 		}
3252 
3253 		if (port->port_status == RTE_PORT_HANDLING)
3254 			port->port_status = RTE_PORT_STARTED;
3255 		else
3256 			fprintf(stderr, "Port %d can not be set into started\n",
3257 				pi);
3258 
3259 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
3260 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
3261 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
3262 
3263 		at_least_one_port_successfully_started = true;
3264 
3265 		pl[cfg_pi++] = pi;
3266 	}
3267 
3268 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3269 		update_queue_state();
3270 
3271 	if (at_least_one_port_successfully_started && !no_link_check)
3272 		check_all_ports_link_status(RTE_PORT_ALL);
3273 	else if (at_least_one_port_exist & all_ports_already_started)
3274 		fprintf(stderr, "Please stop the ports first\n");
3275 
3276 	if (hairpin_mode & 0xf) {
3277 		uint16_t i;
3278 		int j;
3279 
3280 		/* bind all started hairpin ports */
3281 		for (i = 0; i < cfg_pi; i++) {
3282 			pi = pl[i];
3283 			/* bind current Tx to all peer Rx */
3284 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3285 							RTE_MAX_ETHPORTS, 1);
3286 			if (peer_pi < 0)
3287 				return peer_pi;
3288 			for (j = 0; j < peer_pi; j++) {
3289 				if (!port_is_started(peer_pl[j]))
3290 					continue;
3291 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
3292 				if (diag < 0) {
3293 					fprintf(stderr,
3294 						"Error during binding hairpin Tx port %u to %u: %s\n",
3295 						pi, peer_pl[j],
3296 						rte_strerror(-diag));
3297 					return -1;
3298 				}
3299 			}
3300 			/* bind all peer Tx to current Rx */
3301 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3302 							RTE_MAX_ETHPORTS, 0);
3303 			if (peer_pi < 0)
3304 				return peer_pi;
3305 			for (j = 0; j < peer_pi; j++) {
3306 				if (!port_is_started(peer_pl[j]))
3307 					continue;
3308 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
3309 				if (diag < 0) {
3310 					fprintf(stderr,
3311 						"Error during binding hairpin Tx port %u to %u: %s\n",
3312 						peer_pl[j], pi,
3313 						rte_strerror(-diag));
3314 					return -1;
3315 				}
3316 			}
3317 		}
3318 	}
3319 
3320 	fill_xstats_display_info_for_port(pid);
3321 
3322 	printf("Done\n");
3323 	return 0;
3324 }
3325 
3326 void
3327 stop_port(portid_t pid)
3328 {
3329 	portid_t pi;
3330 	struct rte_port *port;
3331 	int need_check_link_status = 0;
3332 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3333 	int peer_pi;
3334 	int ret;
3335 
3336 	if (port_id_is_invalid(pid, ENABLED_WARN))
3337 		return;
3338 
3339 	printf("Stopping ports...\n");
3340 
3341 	RTE_ETH_FOREACH_DEV(pi) {
3342 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3343 			continue;
3344 
3345 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3346 			fprintf(stderr,
3347 				"Please remove port %d from forwarding configuration.\n",
3348 				pi);
3349 			continue;
3350 		}
3351 
3352 		if (port_is_bonding_slave(pi)) {
3353 			fprintf(stderr,
3354 				"Please remove port %d from bonded device.\n",
3355 				pi);
3356 			continue;
3357 		}
3358 
3359 		port = &ports[pi];
3360 		if (port->port_status == RTE_PORT_STARTED)
3361 			port->port_status = RTE_PORT_HANDLING;
3362 		else
3363 			continue;
3364 
3365 		if (hairpin_mode & 0xf) {
3366 			int j;
3367 
3368 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3369 			/* unbind all peer Tx from current Rx */
3370 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3371 							RTE_MAX_ETHPORTS, 0);
3372 			if (peer_pi < 0)
3373 				continue;
3374 			for (j = 0; j < peer_pi; j++) {
3375 				if (!port_is_started(peer_pl[j]))
3376 					continue;
3377 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3378 			}
3379 		}
3380 
3381 		if (port->flow_list && !no_flow_flush)
3382 			port_flow_flush(pi);
3383 
3384 		ret = eth_dev_stop_mp(pi);
3385 		if (ret != 0) {
3386 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3387 				pi);
3388 			/* Allow to retry stopping the port. */
3389 			port->port_status = RTE_PORT_STARTED;
3390 			continue;
3391 		}
3392 
3393 		if (port->port_status == RTE_PORT_HANDLING)
3394 			port->port_status = RTE_PORT_STOPPED;
3395 		else
3396 			fprintf(stderr, "Port %d can not be set into stopped\n",
3397 				pi);
3398 		need_check_link_status = 1;
3399 	}
3400 	if (need_check_link_status && !no_link_check)
3401 		check_all_ports_link_status(RTE_PORT_ALL);
3402 
3403 	printf("Done\n");
3404 }
3405 
3406 static void
3407 remove_invalid_ports_in(portid_t *array, portid_t *total)
3408 {
3409 	portid_t i;
3410 	portid_t new_total = 0;
3411 
3412 	for (i = 0; i < *total; i++)
3413 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3414 			array[new_total] = array[i];
3415 			new_total++;
3416 		}
3417 	*total = new_total;
3418 }
3419 
3420 static void
3421 remove_invalid_ports(void)
3422 {
3423 	remove_invalid_ports_in(ports_ids, &nb_ports);
3424 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3425 	nb_cfg_ports = nb_fwd_ports;
3426 }
3427 
3428 static void
3429 flush_port_owned_resources(portid_t pi)
3430 {
3431 	mcast_addr_pool_destroy(pi);
3432 	port_flow_flush(pi);
3433 	port_flow_template_table_flush(pi);
3434 	port_flow_pattern_template_flush(pi);
3435 	port_flow_actions_template_flush(pi);
3436 	port_flex_item_flush(pi);
3437 	port_action_handle_flush(pi);
3438 }
3439 
3440 static void
3441 clear_bonding_slave_device(portid_t *slave_pids, uint16_t num_slaves)
3442 {
3443 	struct rte_port *port;
3444 	portid_t slave_pid;
3445 	uint16_t i;
3446 
3447 	for (i = 0; i < num_slaves; i++) {
3448 		slave_pid = slave_pids[i];
3449 		if (port_is_started(slave_pid) == 1) {
3450 			if (rte_eth_dev_stop(slave_pid) != 0)
3451 				fprintf(stderr, "rte_eth_dev_stop failed for port %u\n",
3452 					slave_pid);
3453 
3454 			port = &ports[slave_pid];
3455 			port->port_status = RTE_PORT_STOPPED;
3456 		}
3457 
3458 		clear_port_slave_flag(slave_pid);
3459 
3460 		/* Close slave device when testpmd quit or is killed. */
3461 		if (cl_quit == 1 || f_quit == 1)
3462 			rte_eth_dev_close(slave_pid);
3463 	}
3464 }
3465 
3466 void
3467 close_port(portid_t pid)
3468 {
3469 	portid_t pi;
3470 	struct rte_port *port;
3471 	portid_t slave_pids[RTE_MAX_ETHPORTS];
3472 	int num_slaves = 0;
3473 
3474 	if (port_id_is_invalid(pid, ENABLED_WARN))
3475 		return;
3476 
3477 	printf("Closing ports...\n");
3478 
3479 	RTE_ETH_FOREACH_DEV(pi) {
3480 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3481 			continue;
3482 
3483 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3484 			fprintf(stderr,
3485 				"Please remove port %d from forwarding configuration.\n",
3486 				pi);
3487 			continue;
3488 		}
3489 
3490 		if (port_is_bonding_slave(pi)) {
3491 			fprintf(stderr,
3492 				"Please remove port %d from bonded device.\n",
3493 				pi);
3494 			continue;
3495 		}
3496 
3497 		port = &ports[pi];
3498 		if (port->port_status == RTE_PORT_CLOSED) {
3499 			fprintf(stderr, "Port %d is already closed\n", pi);
3500 			continue;
3501 		}
3502 
3503 		if (is_proc_primary()) {
3504 			flush_port_owned_resources(pi);
3505 #ifdef RTE_NET_BOND
3506 			if (port->bond_flag == 1)
3507 				num_slaves = rte_eth_bond_slaves_get(pi,
3508 						slave_pids, RTE_MAX_ETHPORTS);
3509 #endif
3510 			rte_eth_dev_close(pi);
3511 			/*
3512 			 * If this port is bonded device, all slaves under the
3513 			 * device need to be removed or closed.
3514 			 */
3515 			if (port->bond_flag == 1 && num_slaves > 0)
3516 				clear_bonding_slave_device(slave_pids,
3517 							num_slaves);
3518 		}
3519 
3520 		free_xstats_display_info(pi);
3521 	}
3522 
3523 	remove_invalid_ports();
3524 	printf("Done\n");
3525 }
3526 
3527 void
3528 reset_port(portid_t pid)
3529 {
3530 	int diag;
3531 	portid_t pi;
3532 	struct rte_port *port;
3533 
3534 	if (port_id_is_invalid(pid, ENABLED_WARN))
3535 		return;
3536 
3537 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3538 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3539 		fprintf(stderr,
3540 			"Can not reset port(s), please stop port(s) first.\n");
3541 		return;
3542 	}
3543 
3544 	printf("Resetting ports...\n");
3545 
3546 	RTE_ETH_FOREACH_DEV(pi) {
3547 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3548 			continue;
3549 
3550 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3551 			fprintf(stderr,
3552 				"Please remove port %d from forwarding configuration.\n",
3553 				pi);
3554 			continue;
3555 		}
3556 
3557 		if (port_is_bonding_slave(pi)) {
3558 			fprintf(stderr,
3559 				"Please remove port %d from bonded device.\n",
3560 				pi);
3561 			continue;
3562 		}
3563 
3564 		if (is_proc_primary()) {
3565 			diag = rte_eth_dev_reset(pi);
3566 			if (diag == 0) {
3567 				port = &ports[pi];
3568 				port->need_reconfig = 1;
3569 				port->need_reconfig_queues = 1;
3570 			} else {
3571 				fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3572 					pi, diag);
3573 			}
3574 		}
3575 	}
3576 
3577 	printf("Done\n");
3578 }
3579 
3580 void
3581 attach_port(char *identifier)
3582 {
3583 	portid_t pi;
3584 	struct rte_dev_iterator iterator;
3585 
3586 	printf("Attaching a new port...\n");
3587 
3588 	if (identifier == NULL) {
3589 		fprintf(stderr, "Invalid parameters are specified\n");
3590 		return;
3591 	}
3592 
3593 	if (rte_dev_probe(identifier) < 0) {
3594 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3595 		return;
3596 	}
3597 
3598 	/* first attach mode: event */
3599 	if (setup_on_probe_event) {
3600 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3601 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3602 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3603 					ports[pi].need_setup != 0)
3604 				setup_attached_port(pi);
3605 		return;
3606 	}
3607 
3608 	/* second attach mode: iterator */
3609 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3610 		/* setup ports matching the devargs used for probing */
3611 		if (port_is_forwarding(pi))
3612 			continue; /* port was already attached before */
3613 		setup_attached_port(pi);
3614 	}
3615 }
3616 
3617 static void
3618 setup_attached_port(portid_t pi)
3619 {
3620 	unsigned int socket_id;
3621 	int ret;
3622 
3623 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3624 	/* if socket_id is invalid, set to the first available socket. */
3625 	if (check_socket_id(socket_id) < 0)
3626 		socket_id = socket_ids[0];
3627 	reconfig(pi, socket_id);
3628 	ret = rte_eth_promiscuous_enable(pi);
3629 	if (ret != 0)
3630 		fprintf(stderr,
3631 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3632 			pi, rte_strerror(-ret));
3633 
3634 	ports_ids[nb_ports++] = pi;
3635 	fwd_ports_ids[nb_fwd_ports++] = pi;
3636 	nb_cfg_ports = nb_fwd_ports;
3637 	ports[pi].need_setup = 0;
3638 	ports[pi].port_status = RTE_PORT_STOPPED;
3639 
3640 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3641 	printf("Done\n");
3642 }
3643 
3644 static void
3645 detach_device(struct rte_device *dev)
3646 {
3647 	portid_t sibling;
3648 
3649 	if (dev == NULL) {
3650 		fprintf(stderr, "Device already removed\n");
3651 		return;
3652 	}
3653 
3654 	printf("Removing a device...\n");
3655 
3656 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3657 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3658 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3659 				fprintf(stderr, "Port %u not stopped\n",
3660 					sibling);
3661 				return;
3662 			}
3663 			flush_port_owned_resources(sibling);
3664 		}
3665 	}
3666 
3667 	if (rte_dev_remove(dev) < 0) {
3668 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", rte_dev_name(dev));
3669 		return;
3670 	}
3671 	remove_invalid_ports();
3672 
3673 	printf("Device is detached\n");
3674 	printf("Now total ports is %d\n", nb_ports);
3675 	printf("Done\n");
3676 	return;
3677 }
3678 
3679 void
3680 detach_port_device(portid_t port_id)
3681 {
3682 	int ret;
3683 	struct rte_eth_dev_info dev_info;
3684 
3685 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3686 		return;
3687 
3688 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3689 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3690 			fprintf(stderr, "Port not stopped\n");
3691 			return;
3692 		}
3693 		fprintf(stderr, "Port was not closed\n");
3694 	}
3695 
3696 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3697 	if (ret != 0) {
3698 		TESTPMD_LOG(ERR,
3699 			"Failed to get device info for port %d, not detaching\n",
3700 			port_id);
3701 		return;
3702 	}
3703 	detach_device(dev_info.device);
3704 }
3705 
3706 void
3707 detach_devargs(char *identifier)
3708 {
3709 	struct rte_dev_iterator iterator;
3710 	struct rte_devargs da;
3711 	portid_t port_id;
3712 
3713 	printf("Removing a device...\n");
3714 
3715 	memset(&da, 0, sizeof(da));
3716 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3717 		fprintf(stderr, "cannot parse identifier\n");
3718 		return;
3719 	}
3720 
3721 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3722 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3723 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3724 				fprintf(stderr, "Port %u not stopped\n",
3725 					port_id);
3726 				rte_eth_iterator_cleanup(&iterator);
3727 				rte_devargs_reset(&da);
3728 				return;
3729 			}
3730 			flush_port_owned_resources(port_id);
3731 		}
3732 	}
3733 
3734 	if (rte_eal_hotplug_remove(rte_bus_name(da.bus), da.name) != 0) {
3735 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3736 			    da.name, rte_bus_name(da.bus));
3737 		rte_devargs_reset(&da);
3738 		return;
3739 	}
3740 
3741 	remove_invalid_ports();
3742 
3743 	printf("Device %s is detached\n", identifier);
3744 	printf("Now total ports is %d\n", nb_ports);
3745 	printf("Done\n");
3746 	rte_devargs_reset(&da);
3747 }
3748 
3749 void
3750 pmd_test_exit(void)
3751 {
3752 	portid_t pt_id;
3753 	unsigned int i;
3754 	int ret;
3755 
3756 	if (test_done == 0)
3757 		stop_packet_forwarding();
3758 
3759 #ifndef RTE_EXEC_ENV_WINDOWS
3760 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3761 		if (mempools[i]) {
3762 			if (mp_alloc_type == MP_ALLOC_ANON)
3763 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3764 						     NULL);
3765 		}
3766 	}
3767 #endif
3768 	if (ports != NULL) {
3769 		no_link_check = 1;
3770 		RTE_ETH_FOREACH_DEV(pt_id) {
3771 			printf("\nStopping port %d...\n", pt_id);
3772 			fflush(stdout);
3773 			stop_port(pt_id);
3774 		}
3775 		RTE_ETH_FOREACH_DEV(pt_id) {
3776 			printf("\nShutting down port %d...\n", pt_id);
3777 			fflush(stdout);
3778 			close_port(pt_id);
3779 		}
3780 	}
3781 
3782 	if (hot_plug) {
3783 		ret = rte_dev_event_monitor_stop();
3784 		if (ret) {
3785 			RTE_LOG(ERR, EAL,
3786 				"fail to stop device event monitor.");
3787 			return;
3788 		}
3789 
3790 		ret = rte_dev_event_callback_unregister(NULL,
3791 			dev_event_callback, NULL);
3792 		if (ret < 0) {
3793 			RTE_LOG(ERR, EAL,
3794 				"fail to unregister device event callback.\n");
3795 			return;
3796 		}
3797 
3798 		ret = rte_dev_hotplug_handle_disable();
3799 		if (ret) {
3800 			RTE_LOG(ERR, EAL,
3801 				"fail to disable hotplug handling.\n");
3802 			return;
3803 		}
3804 	}
3805 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3806 		if (mempools[i])
3807 			mempool_free_mp(mempools[i]);
3808 	}
3809 	free(xstats_display);
3810 
3811 	printf("\nBye...\n");
3812 }
3813 
3814 typedef void (*cmd_func_t)(void);
3815 struct pmd_test_command {
3816 	const char *cmd_name;
3817 	cmd_func_t cmd_func;
3818 };
3819 
3820 /* Check the link status of all ports in up to 9s, and print them finally */
3821 static void
3822 check_all_ports_link_status(uint32_t port_mask)
3823 {
3824 #define CHECK_INTERVAL 100 /* 100ms */
3825 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3826 	portid_t portid;
3827 	uint8_t count, all_ports_up, print_flag = 0;
3828 	struct rte_eth_link link;
3829 	int ret;
3830 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3831 
3832 	printf("Checking link statuses...\n");
3833 	fflush(stdout);
3834 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3835 		all_ports_up = 1;
3836 		RTE_ETH_FOREACH_DEV(portid) {
3837 			if ((port_mask & (1 << portid)) == 0)
3838 				continue;
3839 			memset(&link, 0, sizeof(link));
3840 			ret = rte_eth_link_get_nowait(portid, &link);
3841 			if (ret < 0) {
3842 				all_ports_up = 0;
3843 				if (print_flag == 1)
3844 					fprintf(stderr,
3845 						"Port %u link get failed: %s\n",
3846 						portid, rte_strerror(-ret));
3847 				continue;
3848 			}
3849 			/* print link status if flag set */
3850 			if (print_flag == 1) {
3851 				rte_eth_link_to_str(link_status,
3852 					sizeof(link_status), &link);
3853 				printf("Port %d %s\n", portid, link_status);
3854 				continue;
3855 			}
3856 			/* clear all_ports_up flag if any link down */
3857 			if (link.link_status == RTE_ETH_LINK_DOWN) {
3858 				all_ports_up = 0;
3859 				break;
3860 			}
3861 		}
3862 		/* after finally printing all link status, get out */
3863 		if (print_flag == 1)
3864 			break;
3865 
3866 		if (all_ports_up == 0) {
3867 			fflush(stdout);
3868 			rte_delay_ms(CHECK_INTERVAL);
3869 		}
3870 
3871 		/* set the print_flag if all ports up or timeout */
3872 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3873 			print_flag = 1;
3874 		}
3875 
3876 		if (lsc_interrupt)
3877 			break;
3878 	}
3879 }
3880 
3881 static void
3882 rmv_port_callback(void *arg)
3883 {
3884 	int need_to_start = 0;
3885 	int org_no_link_check = no_link_check;
3886 	portid_t port_id = (intptr_t)arg;
3887 	struct rte_eth_dev_info dev_info;
3888 	int ret;
3889 
3890 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3891 
3892 	if (!test_done && port_is_forwarding(port_id)) {
3893 		need_to_start = 1;
3894 		stop_packet_forwarding();
3895 	}
3896 	no_link_check = 1;
3897 	stop_port(port_id);
3898 	no_link_check = org_no_link_check;
3899 
3900 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3901 	if (ret != 0)
3902 		TESTPMD_LOG(ERR,
3903 			"Failed to get device info for port %d, not detaching\n",
3904 			port_id);
3905 	else {
3906 		struct rte_device *device = dev_info.device;
3907 		close_port(port_id);
3908 		detach_device(device); /* might be already removed or have more ports */
3909 	}
3910 	if (need_to_start)
3911 		start_packet_forwarding(0);
3912 }
3913 
3914 /* This function is used by the interrupt thread */
3915 static int
3916 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3917 		  void *ret_param)
3918 {
3919 	RTE_SET_USED(param);
3920 	RTE_SET_USED(ret_param);
3921 
3922 	if (type >= RTE_ETH_EVENT_MAX) {
3923 		fprintf(stderr,
3924 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3925 			port_id, __func__, type);
3926 		fflush(stderr);
3927 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3928 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3929 			eth_event_desc[type]);
3930 		fflush(stdout);
3931 	}
3932 
3933 	switch (type) {
3934 	case RTE_ETH_EVENT_NEW:
3935 		ports[port_id].need_setup = 1;
3936 		ports[port_id].port_status = RTE_PORT_HANDLING;
3937 		break;
3938 	case RTE_ETH_EVENT_INTR_RMV:
3939 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3940 			break;
3941 		if (rte_eal_alarm_set(100000,
3942 				rmv_port_callback, (void *)(intptr_t)port_id))
3943 			fprintf(stderr,
3944 				"Could not set up deferred device removal\n");
3945 		break;
3946 	case RTE_ETH_EVENT_DESTROY:
3947 		ports[port_id].port_status = RTE_PORT_CLOSED;
3948 		printf("Port %u is closed\n", port_id);
3949 		break;
3950 	case RTE_ETH_EVENT_RX_AVAIL_THRESH: {
3951 		uint16_t rxq_id;
3952 		int ret;
3953 
3954 		/* avail_thresh query API rewinds rxq_id, no need to check max RxQ num */
3955 		for (rxq_id = 0; ; rxq_id++) {
3956 			ret = rte_eth_rx_avail_thresh_query(port_id, &rxq_id,
3957 							    NULL);
3958 			if (ret <= 0)
3959 				break;
3960 			printf("Received avail_thresh event, port: %u, rxq_id: %u\n",
3961 			       port_id, rxq_id);
3962 
3963 #ifdef RTE_NET_MLX5
3964 			mlx5_test_avail_thresh_event_handler(port_id, rxq_id);
3965 #endif
3966 		}
3967 		break;
3968 	}
3969 	default:
3970 		break;
3971 	}
3972 	return 0;
3973 }
3974 
3975 static int
3976 register_eth_event_callback(void)
3977 {
3978 	int ret;
3979 	enum rte_eth_event_type event;
3980 
3981 	for (event = RTE_ETH_EVENT_UNKNOWN;
3982 			event < RTE_ETH_EVENT_MAX; event++) {
3983 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3984 				event,
3985 				eth_event_callback,
3986 				NULL);
3987 		if (ret != 0) {
3988 			TESTPMD_LOG(ERR, "Failed to register callback for "
3989 					"%s event\n", eth_event_desc[event]);
3990 			return -1;
3991 		}
3992 	}
3993 
3994 	return 0;
3995 }
3996 
3997 /* This function is used by the interrupt thread */
3998 static void
3999 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
4000 			     __rte_unused void *arg)
4001 {
4002 	uint16_t port_id;
4003 	int ret;
4004 
4005 	if (type >= RTE_DEV_EVENT_MAX) {
4006 		fprintf(stderr, "%s called upon invalid event %d\n",
4007 			__func__, type);
4008 		fflush(stderr);
4009 	}
4010 
4011 	switch (type) {
4012 	case RTE_DEV_EVENT_REMOVE:
4013 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
4014 			device_name);
4015 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
4016 		if (ret) {
4017 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
4018 				device_name);
4019 			return;
4020 		}
4021 		/*
4022 		 * Because the user's callback is invoked in eal interrupt
4023 		 * callback, the interrupt callback need to be finished before
4024 		 * it can be unregistered when detaching device. So finish
4025 		 * callback soon and use a deferred removal to detach device
4026 		 * is need. It is a workaround, once the device detaching be
4027 		 * moved into the eal in the future, the deferred removal could
4028 		 * be deleted.
4029 		 */
4030 		if (rte_eal_alarm_set(100000,
4031 				rmv_port_callback, (void *)(intptr_t)port_id))
4032 			RTE_LOG(ERR, EAL,
4033 				"Could not set up deferred device removal\n");
4034 		break;
4035 	case RTE_DEV_EVENT_ADD:
4036 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
4037 			device_name);
4038 		/* TODO: After finish kernel driver binding,
4039 		 * begin to attach port.
4040 		 */
4041 		break;
4042 	default:
4043 		break;
4044 	}
4045 }
4046 
4047 static void
4048 rxtx_port_config(portid_t pid)
4049 {
4050 	uint16_t qid;
4051 	uint64_t offloads;
4052 	struct rte_port *port = &ports[pid];
4053 
4054 	for (qid = 0; qid < nb_rxq; qid++) {
4055 		offloads = port->rxq[qid].conf.offloads;
4056 		port->rxq[qid].conf = port->dev_info.default_rxconf;
4057 
4058 		if (rxq_share > 0 &&
4059 		    (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
4060 			/* Non-zero share group to enable RxQ share. */
4061 			port->rxq[qid].conf.share_group = pid / rxq_share + 1;
4062 			port->rxq[qid].conf.share_qid = qid; /* Equal mapping. */
4063 		}
4064 
4065 		if (offloads != 0)
4066 			port->rxq[qid].conf.offloads = offloads;
4067 
4068 		/* Check if any Rx parameters have been passed */
4069 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
4070 			port->rxq[qid].conf.rx_thresh.pthresh = rx_pthresh;
4071 
4072 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
4073 			port->rxq[qid].conf.rx_thresh.hthresh = rx_hthresh;
4074 
4075 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
4076 			port->rxq[qid].conf.rx_thresh.wthresh = rx_wthresh;
4077 
4078 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
4079 			port->rxq[qid].conf.rx_free_thresh = rx_free_thresh;
4080 
4081 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
4082 			port->rxq[qid].conf.rx_drop_en = rx_drop_en;
4083 
4084 		port->nb_rx_desc[qid] = nb_rxd;
4085 	}
4086 
4087 	for (qid = 0; qid < nb_txq; qid++) {
4088 		offloads = port->txq[qid].conf.offloads;
4089 		port->txq[qid].conf = port->dev_info.default_txconf;
4090 		if (offloads != 0)
4091 			port->txq[qid].conf.offloads = offloads;
4092 
4093 		/* Check if any Tx parameters have been passed */
4094 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
4095 			port->txq[qid].conf.tx_thresh.pthresh = tx_pthresh;
4096 
4097 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
4098 			port->txq[qid].conf.tx_thresh.hthresh = tx_hthresh;
4099 
4100 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
4101 			port->txq[qid].conf.tx_thresh.wthresh = tx_wthresh;
4102 
4103 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
4104 			port->txq[qid].conf.tx_rs_thresh = tx_rs_thresh;
4105 
4106 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
4107 			port->txq[qid].conf.tx_free_thresh = tx_free_thresh;
4108 
4109 		port->nb_tx_desc[qid] = nb_txd;
4110 	}
4111 }
4112 
4113 /*
4114  * Helper function to set MTU from frame size
4115  *
4116  * port->dev_info should be set before calling this function.
4117  *
4118  * return 0 on success, negative on error
4119  */
4120 int
4121 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
4122 {
4123 	struct rte_port *port = &ports[portid];
4124 	uint32_t eth_overhead;
4125 	uint16_t mtu, new_mtu;
4126 
4127 	eth_overhead = get_eth_overhead(&port->dev_info);
4128 
4129 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
4130 		printf("Failed to get MTU for port %u\n", portid);
4131 		return -1;
4132 	}
4133 
4134 	new_mtu = max_rx_pktlen - eth_overhead;
4135 
4136 	if (mtu == new_mtu)
4137 		return 0;
4138 
4139 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
4140 		fprintf(stderr,
4141 			"Failed to set MTU to %u for port %u\n",
4142 			new_mtu, portid);
4143 		return -1;
4144 	}
4145 
4146 	port->dev_conf.rxmode.mtu = new_mtu;
4147 
4148 	return 0;
4149 }
4150 
4151 void
4152 init_port_config(void)
4153 {
4154 	portid_t pid;
4155 	struct rte_port *port;
4156 	int ret, i;
4157 
4158 	RTE_ETH_FOREACH_DEV(pid) {
4159 		port = &ports[pid];
4160 
4161 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
4162 		if (ret != 0)
4163 			return;
4164 
4165 		if (nb_rxq > 1) {
4166 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
4167 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
4168 				rss_hf & port->dev_info.flow_type_rss_offloads;
4169 		} else {
4170 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
4171 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
4172 		}
4173 
4174 		if (port->dcb_flag == 0) {
4175 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
4176 				port->dev_conf.rxmode.mq_mode =
4177 					(enum rte_eth_rx_mq_mode)
4178 						(rx_mq_mode & RTE_ETH_MQ_RX_RSS);
4179 			} else {
4180 				port->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
4181 				port->dev_conf.rxmode.offloads &=
4182 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4183 
4184 				for (i = 0;
4185 				     i < port->dev_info.nb_rx_queues;
4186 				     i++)
4187 					port->rxq[i].conf.offloads &=
4188 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4189 			}
4190 		}
4191 
4192 		rxtx_port_config(pid);
4193 
4194 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
4195 		if (ret != 0)
4196 			return;
4197 
4198 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
4199 			port->dev_conf.intr_conf.lsc = 1;
4200 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
4201 			port->dev_conf.intr_conf.rmv = 1;
4202 	}
4203 }
4204 
4205 void set_port_slave_flag(portid_t slave_pid)
4206 {
4207 	struct rte_port *port;
4208 
4209 	port = &ports[slave_pid];
4210 	port->slave_flag = 1;
4211 }
4212 
4213 void clear_port_slave_flag(portid_t slave_pid)
4214 {
4215 	struct rte_port *port;
4216 
4217 	port = &ports[slave_pid];
4218 	port->slave_flag = 0;
4219 }
4220 
4221 uint8_t port_is_bonding_slave(portid_t slave_pid)
4222 {
4223 	struct rte_port *port;
4224 	struct rte_eth_dev_info dev_info;
4225 	int ret;
4226 
4227 	port = &ports[slave_pid];
4228 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
4229 	if (ret != 0) {
4230 		TESTPMD_LOG(ERR,
4231 			"Failed to get device info for port id %d,"
4232 			"cannot determine if the port is a bonded slave",
4233 			slave_pid);
4234 		return 0;
4235 	}
4236 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
4237 		return 1;
4238 	return 0;
4239 }
4240 
4241 const uint16_t vlan_tags[] = {
4242 		0,  1,  2,  3,  4,  5,  6,  7,
4243 		8,  9, 10, 11,  12, 13, 14, 15,
4244 		16, 17, 18, 19, 20, 21, 22, 23,
4245 		24, 25, 26, 27, 28, 29, 30, 31
4246 };
4247 
4248 static  int
4249 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
4250 		 enum dcb_mode_enable dcb_mode,
4251 		 enum rte_eth_nb_tcs num_tcs,
4252 		 uint8_t pfc_en)
4253 {
4254 	uint8_t i;
4255 	int32_t rc;
4256 	struct rte_eth_rss_conf rss_conf;
4257 
4258 	/*
4259 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
4260 	 * given above, and the number of traffic classes available for use.
4261 	 */
4262 	if (dcb_mode == DCB_VT_ENABLED) {
4263 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
4264 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
4265 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
4266 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
4267 
4268 		/* VMDQ+DCB RX and TX configurations */
4269 		vmdq_rx_conf->enable_default_pool = 0;
4270 		vmdq_rx_conf->default_pool = 0;
4271 		vmdq_rx_conf->nb_queue_pools =
4272 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
4273 		vmdq_tx_conf->nb_queue_pools =
4274 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
4275 
4276 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
4277 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
4278 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
4279 			vmdq_rx_conf->pool_map[i].pools =
4280 				1 << (i % vmdq_rx_conf->nb_queue_pools);
4281 		}
4282 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4283 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
4284 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
4285 		}
4286 
4287 		/* set DCB mode of RX and TX of multiple queues */
4288 		eth_conf->rxmode.mq_mode =
4289 				(enum rte_eth_rx_mq_mode)
4290 					(rx_mq_mode & RTE_ETH_MQ_RX_VMDQ_DCB);
4291 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_VMDQ_DCB;
4292 	} else {
4293 		struct rte_eth_dcb_rx_conf *rx_conf =
4294 				&eth_conf->rx_adv_conf.dcb_rx_conf;
4295 		struct rte_eth_dcb_tx_conf *tx_conf =
4296 				&eth_conf->tx_adv_conf.dcb_tx_conf;
4297 
4298 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
4299 
4300 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
4301 		if (rc != 0)
4302 			return rc;
4303 
4304 		rx_conf->nb_tcs = num_tcs;
4305 		tx_conf->nb_tcs = num_tcs;
4306 
4307 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4308 			rx_conf->dcb_tc[i] = i % num_tcs;
4309 			tx_conf->dcb_tc[i] = i % num_tcs;
4310 		}
4311 
4312 		eth_conf->rxmode.mq_mode =
4313 				(enum rte_eth_rx_mq_mode)
4314 					(rx_mq_mode & RTE_ETH_MQ_RX_DCB_RSS);
4315 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
4316 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_DCB;
4317 	}
4318 
4319 	if (pfc_en)
4320 		eth_conf->dcb_capability_en =
4321 				RTE_ETH_DCB_PG_SUPPORT | RTE_ETH_DCB_PFC_SUPPORT;
4322 	else
4323 		eth_conf->dcb_capability_en = RTE_ETH_DCB_PG_SUPPORT;
4324 
4325 	return 0;
4326 }
4327 
4328 int
4329 init_port_dcb_config(portid_t pid,
4330 		     enum dcb_mode_enable dcb_mode,
4331 		     enum rte_eth_nb_tcs num_tcs,
4332 		     uint8_t pfc_en)
4333 {
4334 	struct rte_eth_conf port_conf;
4335 	struct rte_port *rte_port;
4336 	int retval;
4337 	uint16_t i;
4338 
4339 	if (num_procs > 1) {
4340 		printf("The multi-process feature doesn't support dcb.\n");
4341 		return -ENOTSUP;
4342 	}
4343 	rte_port = &ports[pid];
4344 
4345 	/* retain the original device configuration. */
4346 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
4347 
4348 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
4349 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
4350 	if (retval < 0)
4351 		return retval;
4352 	port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4353 	/* remove RSS HASH offload for DCB in vt mode */
4354 	if (port_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_VMDQ_DCB) {
4355 		port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4356 		for (i = 0; i < nb_rxq; i++)
4357 			rte_port->rxq[i].conf.offloads &=
4358 				~RTE_ETH_RX_OFFLOAD_RSS_HASH;
4359 	}
4360 
4361 	/* re-configure the device . */
4362 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
4363 	if (retval < 0)
4364 		return retval;
4365 
4366 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
4367 	if (retval != 0)
4368 		return retval;
4369 
4370 	/* If dev_info.vmdq_pool_base is greater than 0,
4371 	 * the queue id of vmdq pools is started after pf queues.
4372 	 */
4373 	if (dcb_mode == DCB_VT_ENABLED &&
4374 	    rte_port->dev_info.vmdq_pool_base > 0) {
4375 		fprintf(stderr,
4376 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
4377 			pid);
4378 		return -1;
4379 	}
4380 
4381 	/* Assume the ports in testpmd have the same dcb capability
4382 	 * and has the same number of rxq and txq in dcb mode
4383 	 */
4384 	if (dcb_mode == DCB_VT_ENABLED) {
4385 		if (rte_port->dev_info.max_vfs > 0) {
4386 			nb_rxq = rte_port->dev_info.nb_rx_queues;
4387 			nb_txq = rte_port->dev_info.nb_tx_queues;
4388 		} else {
4389 			nb_rxq = rte_port->dev_info.max_rx_queues;
4390 			nb_txq = rte_port->dev_info.max_tx_queues;
4391 		}
4392 	} else {
4393 		/*if vt is disabled, use all pf queues */
4394 		if (rte_port->dev_info.vmdq_pool_base == 0) {
4395 			nb_rxq = rte_port->dev_info.max_rx_queues;
4396 			nb_txq = rte_port->dev_info.max_tx_queues;
4397 		} else {
4398 			nb_rxq = (queueid_t)num_tcs;
4399 			nb_txq = (queueid_t)num_tcs;
4400 
4401 		}
4402 	}
4403 	rx_free_thresh = 64;
4404 
4405 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
4406 
4407 	rxtx_port_config(pid);
4408 	/* VLAN filter */
4409 	rte_port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4410 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
4411 		rx_vft_set(pid, vlan_tags[i], 1);
4412 
4413 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4414 	if (retval != 0)
4415 		return retval;
4416 
4417 	rte_port->dcb_flag = 1;
4418 
4419 	/* Enter DCB configuration status */
4420 	dcb_config = 1;
4421 
4422 	return 0;
4423 }
4424 
4425 static void
4426 init_port(void)
4427 {
4428 	int i;
4429 
4430 	/* Configuration of Ethernet ports. */
4431 	ports = rte_zmalloc("testpmd: ports",
4432 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4433 			    RTE_CACHE_LINE_SIZE);
4434 	if (ports == NULL) {
4435 		rte_exit(EXIT_FAILURE,
4436 				"rte_zmalloc(%d struct rte_port) failed\n",
4437 				RTE_MAX_ETHPORTS);
4438 	}
4439 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
4440 		ports[i].fwd_mac_swap = 1;
4441 		ports[i].xstats_info.allocated = false;
4442 		LIST_INIT(&ports[i].flow_tunnel_list);
4443 	}
4444 	/* Initialize ports NUMA structures */
4445 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4446 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4447 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4448 }
4449 
4450 static void
4451 force_quit(void)
4452 {
4453 	pmd_test_exit();
4454 	prompt_exit();
4455 }
4456 
4457 static void
4458 print_stats(void)
4459 {
4460 	uint8_t i;
4461 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4462 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4463 
4464 	/* Clear screen and move to top left */
4465 	printf("%s%s", clr, top_left);
4466 
4467 	printf("\nPort statistics ====================================");
4468 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4469 		nic_stats_display(fwd_ports_ids[i]);
4470 
4471 	fflush(stdout);
4472 }
4473 
4474 static void
4475 signal_handler(int signum)
4476 {
4477 	if (signum == SIGINT || signum == SIGTERM) {
4478 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4479 			signum);
4480 #ifdef RTE_LIB_PDUMP
4481 		/* uninitialize packet capture framework */
4482 		rte_pdump_uninit();
4483 #endif
4484 #ifdef RTE_LIB_LATENCYSTATS
4485 		if (latencystats_enabled != 0)
4486 			rte_latencystats_uninit();
4487 #endif
4488 		force_quit();
4489 		/* Set flag to indicate the force termination. */
4490 		f_quit = 1;
4491 		/* exit with the expected status */
4492 #ifndef RTE_EXEC_ENV_WINDOWS
4493 		signal(signum, SIG_DFL);
4494 		kill(getpid(), signum);
4495 #endif
4496 	}
4497 }
4498 
4499 int
4500 main(int argc, char** argv)
4501 {
4502 	int diag;
4503 	portid_t port_id;
4504 	uint16_t count;
4505 	int ret;
4506 
4507 	signal(SIGINT, signal_handler);
4508 	signal(SIGTERM, signal_handler);
4509 
4510 	testpmd_logtype = rte_log_register("testpmd");
4511 	if (testpmd_logtype < 0)
4512 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4513 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4514 
4515 	diag = rte_eal_init(argc, argv);
4516 	if (diag < 0)
4517 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4518 			 rte_strerror(rte_errno));
4519 
4520 	/* allocate port structures, and init them */
4521 	init_port();
4522 
4523 	ret = register_eth_event_callback();
4524 	if (ret != 0)
4525 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4526 
4527 #ifdef RTE_LIB_PDUMP
4528 	/* initialize packet capture framework */
4529 	rte_pdump_init();
4530 #endif
4531 
4532 	count = 0;
4533 	RTE_ETH_FOREACH_DEV(port_id) {
4534 		ports_ids[count] = port_id;
4535 		count++;
4536 	}
4537 	nb_ports = (portid_t) count;
4538 	if (nb_ports == 0)
4539 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4540 
4541 	set_def_fwd_config();
4542 	if (nb_lcores == 0)
4543 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4544 			 "Check the core mask argument\n");
4545 
4546 	/* Bitrate/latency stats disabled by default */
4547 #ifdef RTE_LIB_BITRATESTATS
4548 	bitrate_enabled = 0;
4549 #endif
4550 #ifdef RTE_LIB_LATENCYSTATS
4551 	latencystats_enabled = 0;
4552 #endif
4553 
4554 	/* on FreeBSD, mlockall() is disabled by default */
4555 #ifdef RTE_EXEC_ENV_FREEBSD
4556 	do_mlockall = 0;
4557 #else
4558 	do_mlockall = 1;
4559 #endif
4560 
4561 	argc -= diag;
4562 	argv += diag;
4563 	if (argc > 1)
4564 		launch_args_parse(argc, argv);
4565 
4566 #ifndef RTE_EXEC_ENV_WINDOWS
4567 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4568 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4569 			strerror(errno));
4570 	}
4571 #endif
4572 
4573 	if (tx_first && interactive)
4574 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4575 				"interactive mode.\n");
4576 
4577 	if (tx_first && lsc_interrupt) {
4578 		fprintf(stderr,
4579 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4580 		lsc_interrupt = 0;
4581 	}
4582 
4583 	if (!nb_rxq && !nb_txq)
4584 		fprintf(stderr,
4585 			"Warning: Either rx or tx queues should be non-zero\n");
4586 
4587 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4588 		fprintf(stderr,
4589 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4590 			nb_rxq, nb_txq);
4591 
4592 	init_config();
4593 
4594 	if (hot_plug) {
4595 		ret = rte_dev_hotplug_handle_enable();
4596 		if (ret) {
4597 			RTE_LOG(ERR, EAL,
4598 				"fail to enable hotplug handling.");
4599 			return -1;
4600 		}
4601 
4602 		ret = rte_dev_event_monitor_start();
4603 		if (ret) {
4604 			RTE_LOG(ERR, EAL,
4605 				"fail to start device event monitoring.");
4606 			return -1;
4607 		}
4608 
4609 		ret = rte_dev_event_callback_register(NULL,
4610 			dev_event_callback, NULL);
4611 		if (ret) {
4612 			RTE_LOG(ERR, EAL,
4613 				"fail  to register device event callback\n");
4614 			return -1;
4615 		}
4616 	}
4617 
4618 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0) {
4619 		if (!interactive) {
4620 			rte_eal_cleanup();
4621 			rte_exit(EXIT_FAILURE, "Start ports failed\n");
4622 		}
4623 		fprintf(stderr, "Start ports failed\n");
4624 	}
4625 
4626 	/* set all ports to promiscuous mode by default */
4627 	RTE_ETH_FOREACH_DEV(port_id) {
4628 		ret = rte_eth_promiscuous_enable(port_id);
4629 		if (ret != 0)
4630 			fprintf(stderr,
4631 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4632 				port_id, rte_strerror(-ret));
4633 	}
4634 
4635 #ifdef RTE_LIB_METRICS
4636 	/* Init metrics library */
4637 	rte_metrics_init(rte_socket_id());
4638 #endif
4639 
4640 #ifdef RTE_LIB_LATENCYSTATS
4641 	if (latencystats_enabled != 0) {
4642 		int ret = rte_latencystats_init(1, NULL);
4643 		if (ret)
4644 			fprintf(stderr,
4645 				"Warning: latencystats init() returned error %d\n",
4646 				ret);
4647 		fprintf(stderr, "Latencystats running on lcore %d\n",
4648 			latencystats_lcore_id);
4649 	}
4650 #endif
4651 
4652 	/* Setup bitrate stats */
4653 #ifdef RTE_LIB_BITRATESTATS
4654 	if (bitrate_enabled != 0) {
4655 		bitrate_data = rte_stats_bitrate_create();
4656 		if (bitrate_data == NULL)
4657 			rte_exit(EXIT_FAILURE,
4658 				"Could not allocate bitrate data.\n");
4659 		rte_stats_bitrate_reg(bitrate_data);
4660 	}
4661 #endif
4662 
4663 	if (record_core_cycles)
4664 		rte_lcore_register_usage_cb(lcore_usage_callback);
4665 
4666 #ifdef RTE_LIB_CMDLINE
4667 	if (init_cmdline() != 0)
4668 		rte_exit(EXIT_FAILURE,
4669 			"Could not initialise cmdline context.\n");
4670 
4671 	if (strlen(cmdline_filename) != 0)
4672 		cmdline_read_from_file(cmdline_filename);
4673 
4674 	if (interactive == 1) {
4675 		if (auto_start) {
4676 			printf("Start automatic packet forwarding\n");
4677 			start_packet_forwarding(0);
4678 		}
4679 		prompt();
4680 		pmd_test_exit();
4681 	} else
4682 #endif
4683 	{
4684 		char c;
4685 		int rc;
4686 
4687 		f_quit = 0;
4688 
4689 		printf("No commandline core given, start packet forwarding\n");
4690 		start_packet_forwarding(tx_first);
4691 		if (stats_period != 0) {
4692 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4693 			uint64_t timer_period;
4694 
4695 			/* Convert to number of cycles */
4696 			timer_period = stats_period * rte_get_timer_hz();
4697 
4698 			while (f_quit == 0) {
4699 				cur_time = rte_get_timer_cycles();
4700 				diff_time += cur_time - prev_time;
4701 
4702 				if (diff_time >= timer_period) {
4703 					print_stats();
4704 					/* Reset the timer */
4705 					diff_time = 0;
4706 				}
4707 				/* Sleep to avoid unnecessary checks */
4708 				prev_time = cur_time;
4709 				rte_delay_us_sleep(US_PER_S);
4710 			}
4711 		}
4712 
4713 		printf("Press enter to exit\n");
4714 		rc = read(0, &c, 1);
4715 		pmd_test_exit();
4716 		if (rc < 0)
4717 			return 1;
4718 	}
4719 
4720 	ret = rte_eal_cleanup();
4721 	if (ret != 0)
4722 		rte_exit(EXIT_FAILURE,
4723 			 "EAL cleanup failed: %s\n", strerror(-ret));
4724 
4725 	return EXIT_SUCCESS;
4726 }
4727