xref: /dpdk/app/test-pmd/testpmd.c (revision 081e42dab11d1add2d038fdf2bd4c86b20043d08)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68 
69 #include "testpmd.h"
70 
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77 
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84 
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87 
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90 
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96 
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105 
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111 
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120 
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126 
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132 
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138 
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147 
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;	       /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155 
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157 
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167 
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170 
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173 
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178 	&io_fwd_engine,
179 	&mac_fwd_engine,
180 	&mac_swap_engine,
181 	&flow_gen_engine,
182 	&rx_only_engine,
183 	&tx_only_engine,
184 	&csum_fwd_engine,
185 	&icmp_echo_engine,
186 	&noisy_vnf_engine,
187 	&five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189 	&ieee1588_fwd_engine,
190 #endif
191 	NULL,
192 };
193 
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196 
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202 
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205 	DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210 
211 /*
212  * In container, it cannot terminate the process which running with 'stats-period'
213  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
214  */
215 uint8_t f_quit;
216 
217 /*
218  * Configuration of packet segments used to scatter received packets
219  * if some of split features is configured.
220  */
221 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
222 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
223 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
224 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
225 
226 /*
227  * Configuration of packet segments used by the "txonly" processing engine.
228  */
229 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
230 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
231 	TXONLY_DEF_PACKET_LEN,
232 };
233 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
234 
235 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
236 /**< Split policy for packets to TX. */
237 
238 uint8_t txonly_multi_flow;
239 /**< Whether multiple flows are generated in TXONLY mode. */
240 
241 uint32_t tx_pkt_times_inter;
242 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
243 
244 uint32_t tx_pkt_times_intra;
245 /**< Timings for send scheduling in TXONLY mode, time between packets. */
246 
247 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
248 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
249 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
250 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
251 
252 /* current configuration is in DCB or not,0 means it is not in DCB mode */
253 uint8_t dcb_config = 0;
254 
255 /*
256  * Configurable number of RX/TX queues.
257  */
258 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
259 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
260 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
261 
262 /*
263  * Configurable number of RX/TX ring descriptors.
264  * Defaults are supplied by drivers via ethdev.
265  */
266 #define RTE_TEST_RX_DESC_DEFAULT 0
267 #define RTE_TEST_TX_DESC_DEFAULT 0
268 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
269 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
270 
271 #define RTE_PMD_PARAM_UNSET -1
272 /*
273  * Configurable values of RX and TX ring threshold registers.
274  */
275 
276 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
277 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
278 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
279 
280 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
281 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
282 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
283 
284 /*
285  * Configurable value of RX free threshold.
286  */
287 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
288 
289 /*
290  * Configurable value of RX drop enable.
291  */
292 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
293 
294 /*
295  * Configurable value of TX free threshold.
296  */
297 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
298 
299 /*
300  * Configurable value of TX RS bit threshold.
301  */
302 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
303 
304 /*
305  * Configurable value of buffered packets before sending.
306  */
307 uint16_t noisy_tx_sw_bufsz;
308 
309 /*
310  * Configurable value of packet buffer timeout.
311  */
312 uint16_t noisy_tx_sw_buf_flush_time;
313 
314 /*
315  * Configurable value for size of VNF internal memory area
316  * used for simulating noisy neighbour behaviour
317  */
318 uint64_t noisy_lkup_mem_sz;
319 
320 /*
321  * Configurable value of number of random writes done in
322  * VNF simulation memory area.
323  */
324 uint64_t noisy_lkup_num_writes;
325 
326 /*
327  * Configurable value of number of random reads done in
328  * VNF simulation memory area.
329  */
330 uint64_t noisy_lkup_num_reads;
331 
332 /*
333  * Configurable value of number of random reads/writes done in
334  * VNF simulation memory area.
335  */
336 uint64_t noisy_lkup_num_reads_writes;
337 
338 /*
339  * Receive Side Scaling (RSS) configuration.
340  */
341 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
342 
343 /*
344  * Port topology configuration
345  */
346 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
347 
348 /*
349  * Avoids to flush all the RX streams before starts forwarding.
350  */
351 uint8_t no_flush_rx = 0; /* flush by default */
352 
353 /*
354  * Flow API isolated mode.
355  */
356 uint8_t flow_isolate_all;
357 
358 /*
359  * Avoids to check link status when starting/stopping a port.
360  */
361 uint8_t no_link_check = 0; /* check by default */
362 
363 /*
364  * Don't automatically start all ports in interactive mode.
365  */
366 uint8_t no_device_start = 0;
367 
368 /*
369  * Enable link status change notification
370  */
371 uint8_t lsc_interrupt = 1; /* enabled by default */
372 
373 /*
374  * Enable device removal notification.
375  */
376 uint8_t rmv_interrupt = 1; /* enabled by default */
377 
378 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
379 
380 /* After attach, port setup is called on event or by iterator */
381 bool setup_on_probe_event = true;
382 
383 /* Clear ptypes on port initialization. */
384 uint8_t clear_ptypes = true;
385 
386 /* Hairpin ports configuration mode. */
387 uint16_t hairpin_mode;
388 
389 /* Pretty printing of ethdev events */
390 static const char * const eth_event_desc[] = {
391 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
392 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
393 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
394 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
395 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
396 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
397 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
398 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
399 	[RTE_ETH_EVENT_NEW] = "device probed",
400 	[RTE_ETH_EVENT_DESTROY] = "device released",
401 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
402 	[RTE_ETH_EVENT_MAX] = NULL,
403 };
404 
405 /*
406  * Display or mask ether events
407  * Default to all events except VF_MBOX
408  */
409 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
410 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
411 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
412 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
413 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
414 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
415 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
416 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
417 /*
418  * Decide if all memory are locked for performance.
419  */
420 int do_mlockall = 0;
421 
422 /*
423  * NIC bypass mode configuration options.
424  */
425 
426 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
427 /* The NIC bypass watchdog timeout. */
428 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
429 #endif
430 
431 
432 #ifdef RTE_LIB_LATENCYSTATS
433 
434 /*
435  * Set when latency stats is enabled in the commandline
436  */
437 uint8_t latencystats_enabled;
438 
439 /*
440  * Lcore ID to serive latency statistics.
441  */
442 lcoreid_t latencystats_lcore_id = -1;
443 
444 #endif
445 
446 /*
447  * Ethernet device configuration.
448  */
449 struct rte_eth_rxmode rx_mode = {
450 	/* Default maximum frame length.
451 	 * Zero is converted to "RTE_ETHER_MTU + PMD Ethernet overhead"
452 	 * in init_config().
453 	 */
454 	.max_rx_pkt_len = 0,
455 };
456 
457 struct rte_eth_txmode tx_mode = {
458 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
459 };
460 
461 struct rte_fdir_conf fdir_conf = {
462 	.mode = RTE_FDIR_MODE_NONE,
463 	.pballoc = RTE_FDIR_PBALLOC_64K,
464 	.status = RTE_FDIR_REPORT_STATUS,
465 	.mask = {
466 		.vlan_tci_mask = 0xFFEF,
467 		.ipv4_mask     = {
468 			.src_ip = 0xFFFFFFFF,
469 			.dst_ip = 0xFFFFFFFF,
470 		},
471 		.ipv6_mask     = {
472 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
473 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
474 		},
475 		.src_port_mask = 0xFFFF,
476 		.dst_port_mask = 0xFFFF,
477 		.mac_addr_byte_mask = 0xFF,
478 		.tunnel_type_mask = 1,
479 		.tunnel_id_mask = 0xFFFFFFFF,
480 	},
481 	.drop_queue = 127,
482 };
483 
484 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
485 
486 /*
487  * Display zero values by default for xstats
488  */
489 uint8_t xstats_hide_zero;
490 
491 /*
492  * Measure of CPU cycles disabled by default
493  */
494 uint8_t record_core_cycles;
495 
496 /*
497  * Display of RX and TX bursts disabled by default
498  */
499 uint8_t record_burst_stats;
500 
501 unsigned int num_sockets = 0;
502 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
503 
504 #ifdef RTE_LIB_BITRATESTATS
505 /* Bitrate statistics */
506 struct rte_stats_bitrates *bitrate_data;
507 lcoreid_t bitrate_lcore_id;
508 uint8_t bitrate_enabled;
509 #endif
510 
511 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
512 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
513 
514 /*
515  * hexadecimal bitmask of RX mq mode can be enabled.
516  */
517 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
518 
519 /*
520  * Used to set forced link speed
521  */
522 uint32_t eth_link_speed;
523 
524 /*
525  * ID of the current process in multi-process, used to
526  * configure the queues to be polled.
527  */
528 int proc_id;
529 
530 /*
531  * Number of processes in multi-process, used to
532  * configure the queues to be polled.
533  */
534 unsigned int num_procs = 1;
535 
536 static void
537 eth_rx_metadata_negotiate_mp(uint16_t port_id)
538 {
539 	uint64_t rx_meta_features = 0;
540 	int ret;
541 
542 	if (!is_proc_primary())
543 		return;
544 
545 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
546 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
547 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
548 
549 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
550 	if (ret == 0) {
551 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
552 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
553 				    port_id);
554 		}
555 
556 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
557 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
558 				    port_id);
559 		}
560 
561 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
562 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
563 				    port_id);
564 		}
565 	} else if (ret != -ENOTSUP) {
566 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
567 			 port_id, rte_strerror(-ret));
568 	}
569 }
570 
571 static int
572 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
573 		      const struct rte_eth_conf *dev_conf)
574 {
575 	if (is_proc_primary())
576 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
577 					dev_conf);
578 	return 0;
579 }
580 
581 static int
582 eth_dev_start_mp(uint16_t port_id)
583 {
584 	if (is_proc_primary())
585 		return rte_eth_dev_start(port_id);
586 
587 	return 0;
588 }
589 
590 static int
591 eth_dev_stop_mp(uint16_t port_id)
592 {
593 	if (is_proc_primary())
594 		return rte_eth_dev_stop(port_id);
595 
596 	return 0;
597 }
598 
599 static void
600 mempool_free_mp(struct rte_mempool *mp)
601 {
602 	if (is_proc_primary())
603 		rte_mempool_free(mp);
604 }
605 
606 static int
607 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
608 {
609 	if (is_proc_primary())
610 		return rte_eth_dev_set_mtu(port_id, mtu);
611 
612 	return 0;
613 }
614 
615 /* Forward function declarations */
616 static void setup_attached_port(portid_t pi);
617 static void check_all_ports_link_status(uint32_t port_mask);
618 static int eth_event_callback(portid_t port_id,
619 			      enum rte_eth_event_type type,
620 			      void *param, void *ret_param);
621 static void dev_event_callback(const char *device_name,
622 				enum rte_dev_event_type type,
623 				void *param);
624 
625 /*
626  * Check if all the ports are started.
627  * If yes, return positive value. If not, return zero.
628  */
629 static int all_ports_started(void);
630 
631 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
632 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
633 
634 /* Holds the registered mbuf dynamic flags names. */
635 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
636 
637 /*
638  * Helper function to check if socket is already discovered.
639  * If yes, return positive value. If not, return zero.
640  */
641 int
642 new_socket_id(unsigned int socket_id)
643 {
644 	unsigned int i;
645 
646 	for (i = 0; i < num_sockets; i++) {
647 		if (socket_ids[i] == socket_id)
648 			return 0;
649 	}
650 	return 1;
651 }
652 
653 /*
654  * Setup default configuration.
655  */
656 static void
657 set_default_fwd_lcores_config(void)
658 {
659 	unsigned int i;
660 	unsigned int nb_lc;
661 	unsigned int sock_num;
662 
663 	nb_lc = 0;
664 	for (i = 0; i < RTE_MAX_LCORE; i++) {
665 		if (!rte_lcore_is_enabled(i))
666 			continue;
667 		sock_num = rte_lcore_to_socket_id(i);
668 		if (new_socket_id(sock_num)) {
669 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
670 				rte_exit(EXIT_FAILURE,
671 					 "Total sockets greater than %u\n",
672 					 RTE_MAX_NUMA_NODES);
673 			}
674 			socket_ids[num_sockets++] = sock_num;
675 		}
676 		if (i == rte_get_main_lcore())
677 			continue;
678 		fwd_lcores_cpuids[nb_lc++] = i;
679 	}
680 	nb_lcores = (lcoreid_t) nb_lc;
681 	nb_cfg_lcores = nb_lcores;
682 	nb_fwd_lcores = 1;
683 }
684 
685 static void
686 set_def_peer_eth_addrs(void)
687 {
688 	portid_t i;
689 
690 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
691 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
692 		peer_eth_addrs[i].addr_bytes[5] = i;
693 	}
694 }
695 
696 static void
697 set_default_fwd_ports_config(void)
698 {
699 	portid_t pt_id;
700 	int i = 0;
701 
702 	RTE_ETH_FOREACH_DEV(pt_id) {
703 		fwd_ports_ids[i++] = pt_id;
704 
705 		/* Update sockets info according to the attached device */
706 		int socket_id = rte_eth_dev_socket_id(pt_id);
707 		if (socket_id >= 0 && new_socket_id(socket_id)) {
708 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
709 				rte_exit(EXIT_FAILURE,
710 					 "Total sockets greater than %u\n",
711 					 RTE_MAX_NUMA_NODES);
712 			}
713 			socket_ids[num_sockets++] = socket_id;
714 		}
715 	}
716 
717 	nb_cfg_ports = nb_ports;
718 	nb_fwd_ports = nb_ports;
719 }
720 
721 void
722 set_def_fwd_config(void)
723 {
724 	set_default_fwd_lcores_config();
725 	set_def_peer_eth_addrs();
726 	set_default_fwd_ports_config();
727 }
728 
729 #ifndef RTE_EXEC_ENV_WINDOWS
730 /* extremely pessimistic estimation of memory required to create a mempool */
731 static int
732 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
733 {
734 	unsigned int n_pages, mbuf_per_pg, leftover;
735 	uint64_t total_mem, mbuf_mem, obj_sz;
736 
737 	/* there is no good way to predict how much space the mempool will
738 	 * occupy because it will allocate chunks on the fly, and some of those
739 	 * will come from default DPDK memory while some will come from our
740 	 * external memory, so just assume 128MB will be enough for everyone.
741 	 */
742 	uint64_t hdr_mem = 128 << 20;
743 
744 	/* account for possible non-contiguousness */
745 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
746 	if (obj_sz > pgsz) {
747 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
748 		return -1;
749 	}
750 
751 	mbuf_per_pg = pgsz / obj_sz;
752 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
753 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
754 
755 	mbuf_mem = n_pages * pgsz;
756 
757 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
758 
759 	if (total_mem > SIZE_MAX) {
760 		TESTPMD_LOG(ERR, "Memory size too big\n");
761 		return -1;
762 	}
763 	*out = (size_t)total_mem;
764 
765 	return 0;
766 }
767 
768 static int
769 pagesz_flags(uint64_t page_sz)
770 {
771 	/* as per mmap() manpage, all page sizes are log2 of page size
772 	 * shifted by MAP_HUGE_SHIFT
773 	 */
774 	int log2 = rte_log2_u64(page_sz);
775 
776 	return (log2 << HUGE_SHIFT);
777 }
778 
779 static void *
780 alloc_mem(size_t memsz, size_t pgsz, bool huge)
781 {
782 	void *addr;
783 	int flags;
784 
785 	/* allocate anonymous hugepages */
786 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
787 	if (huge)
788 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
789 
790 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
791 	if (addr == MAP_FAILED)
792 		return NULL;
793 
794 	return addr;
795 }
796 
797 struct extmem_param {
798 	void *addr;
799 	size_t len;
800 	size_t pgsz;
801 	rte_iova_t *iova_table;
802 	unsigned int iova_table_len;
803 };
804 
805 static int
806 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
807 		bool huge)
808 {
809 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
810 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
811 	unsigned int cur_page, n_pages, pgsz_idx;
812 	size_t mem_sz, cur_pgsz;
813 	rte_iova_t *iovas = NULL;
814 	void *addr;
815 	int ret;
816 
817 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
818 		/* skip anything that is too big */
819 		if (pgsizes[pgsz_idx] > SIZE_MAX)
820 			continue;
821 
822 		cur_pgsz = pgsizes[pgsz_idx];
823 
824 		/* if we were told not to allocate hugepages, override */
825 		if (!huge)
826 			cur_pgsz = sysconf(_SC_PAGESIZE);
827 
828 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
829 		if (ret < 0) {
830 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
831 			return -1;
832 		}
833 
834 		/* allocate our memory */
835 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
836 
837 		/* if we couldn't allocate memory with a specified page size,
838 		 * that doesn't mean we can't do it with other page sizes, so
839 		 * try another one.
840 		 */
841 		if (addr == NULL)
842 			continue;
843 
844 		/* store IOVA addresses for every page in this memory area */
845 		n_pages = mem_sz / cur_pgsz;
846 
847 		iovas = malloc(sizeof(*iovas) * n_pages);
848 
849 		if (iovas == NULL) {
850 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
851 			goto fail;
852 		}
853 		/* lock memory if it's not huge pages */
854 		if (!huge)
855 			mlock(addr, mem_sz);
856 
857 		/* populate IOVA addresses */
858 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
859 			rte_iova_t iova;
860 			size_t offset;
861 			void *cur;
862 
863 			offset = cur_pgsz * cur_page;
864 			cur = RTE_PTR_ADD(addr, offset);
865 
866 			/* touch the page before getting its IOVA */
867 			*(volatile char *)cur = 0;
868 
869 			iova = rte_mem_virt2iova(cur);
870 
871 			iovas[cur_page] = iova;
872 		}
873 
874 		break;
875 	}
876 	/* if we couldn't allocate anything */
877 	if (iovas == NULL)
878 		return -1;
879 
880 	param->addr = addr;
881 	param->len = mem_sz;
882 	param->pgsz = cur_pgsz;
883 	param->iova_table = iovas;
884 	param->iova_table_len = n_pages;
885 
886 	return 0;
887 fail:
888 	if (iovas)
889 		free(iovas);
890 	if (addr)
891 		munmap(addr, mem_sz);
892 
893 	return -1;
894 }
895 
896 static int
897 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
898 {
899 	struct extmem_param param;
900 	int socket_id, ret;
901 
902 	memset(&param, 0, sizeof(param));
903 
904 	/* check if our heap exists */
905 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
906 	if (socket_id < 0) {
907 		/* create our heap */
908 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
909 		if (ret < 0) {
910 			TESTPMD_LOG(ERR, "Cannot create heap\n");
911 			return -1;
912 		}
913 	}
914 
915 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
916 	if (ret < 0) {
917 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
918 		return -1;
919 	}
920 
921 	/* we now have a valid memory area, so add it to heap */
922 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
923 			param.addr, param.len, param.iova_table,
924 			param.iova_table_len, param.pgsz);
925 
926 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
927 
928 	/* not needed any more */
929 	free(param.iova_table);
930 
931 	if (ret < 0) {
932 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
933 		munmap(param.addr, param.len);
934 		return -1;
935 	}
936 
937 	/* success */
938 
939 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
940 			param.len >> 20);
941 
942 	return 0;
943 }
944 static void
945 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
946 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
947 {
948 	uint16_t pid = 0;
949 	int ret;
950 
951 	RTE_ETH_FOREACH_DEV(pid) {
952 		struct rte_eth_dev_info dev_info;
953 
954 		ret = eth_dev_info_get_print_err(pid, &dev_info);
955 		if (ret != 0) {
956 			TESTPMD_LOG(DEBUG,
957 				    "unable to get device info for port %d on addr 0x%p,"
958 				    "mempool unmapping will not be performed\n",
959 				    pid, memhdr->addr);
960 			continue;
961 		}
962 
963 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
964 		if (ret) {
965 			TESTPMD_LOG(DEBUG,
966 				    "unable to DMA unmap addr 0x%p "
967 				    "for device %s\n",
968 				    memhdr->addr, dev_info.device->name);
969 		}
970 	}
971 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
972 	if (ret) {
973 		TESTPMD_LOG(DEBUG,
974 			    "unable to un-register addr 0x%p\n", memhdr->addr);
975 	}
976 }
977 
978 static void
979 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
980 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
981 {
982 	uint16_t pid = 0;
983 	size_t page_size = sysconf(_SC_PAGESIZE);
984 	int ret;
985 
986 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
987 				  page_size);
988 	if (ret) {
989 		TESTPMD_LOG(DEBUG,
990 			    "unable to register addr 0x%p\n", memhdr->addr);
991 		return;
992 	}
993 	RTE_ETH_FOREACH_DEV(pid) {
994 		struct rte_eth_dev_info dev_info;
995 
996 		ret = eth_dev_info_get_print_err(pid, &dev_info);
997 		if (ret != 0) {
998 			TESTPMD_LOG(DEBUG,
999 				    "unable to get device info for port %d on addr 0x%p,"
1000 				    "mempool mapping will not be performed\n",
1001 				    pid, memhdr->addr);
1002 			continue;
1003 		}
1004 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1005 		if (ret) {
1006 			TESTPMD_LOG(DEBUG,
1007 				    "unable to DMA map addr 0x%p "
1008 				    "for device %s\n",
1009 				    memhdr->addr, dev_info.device->name);
1010 		}
1011 	}
1012 }
1013 #endif
1014 
1015 static unsigned int
1016 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1017 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1018 {
1019 	struct rte_pktmbuf_extmem *xmem;
1020 	unsigned int ext_num, zone_num, elt_num;
1021 	uint16_t elt_size;
1022 
1023 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1024 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1025 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1026 
1027 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1028 	if (xmem == NULL) {
1029 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1030 				 "external buffer descriptors\n");
1031 		*ext_mem = NULL;
1032 		return 0;
1033 	}
1034 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1035 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1036 		const struct rte_memzone *mz;
1037 		char mz_name[RTE_MEMZONE_NAMESIZE];
1038 		int ret;
1039 
1040 		ret = snprintf(mz_name, sizeof(mz_name),
1041 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1042 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1043 			errno = ENAMETOOLONG;
1044 			ext_num = 0;
1045 			break;
1046 		}
1047 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1048 						 socket_id,
1049 						 RTE_MEMZONE_IOVA_CONTIG |
1050 						 RTE_MEMZONE_1GB |
1051 						 RTE_MEMZONE_SIZE_HINT_ONLY,
1052 						 EXTBUF_ZONE_SIZE);
1053 		if (mz == NULL) {
1054 			/*
1055 			 * The caller exits on external buffer creation
1056 			 * error, so there is no need to free memzones.
1057 			 */
1058 			errno = ENOMEM;
1059 			ext_num = 0;
1060 			break;
1061 		}
1062 		xseg->buf_ptr = mz->addr;
1063 		xseg->buf_iova = mz->iova;
1064 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1065 		xseg->elt_size = elt_size;
1066 	}
1067 	if (ext_num == 0 && xmem != NULL) {
1068 		free(xmem);
1069 		xmem = NULL;
1070 	}
1071 	*ext_mem = xmem;
1072 	return ext_num;
1073 }
1074 
1075 /*
1076  * Configuration initialisation done once at init time.
1077  */
1078 static struct rte_mempool *
1079 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1080 		 unsigned int socket_id, uint16_t size_idx)
1081 {
1082 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1083 	struct rte_mempool *rte_mp = NULL;
1084 #ifndef RTE_EXEC_ENV_WINDOWS
1085 	uint32_t mb_size;
1086 
1087 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1088 #endif
1089 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1090 	if (!is_proc_primary()) {
1091 		rte_mp = rte_mempool_lookup(pool_name);
1092 		if (rte_mp == NULL)
1093 			rte_exit(EXIT_FAILURE,
1094 				"Get mbuf pool for socket %u failed: %s\n",
1095 				socket_id, rte_strerror(rte_errno));
1096 		return rte_mp;
1097 	}
1098 
1099 	TESTPMD_LOG(INFO,
1100 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1101 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1102 
1103 	switch (mp_alloc_type) {
1104 	case MP_ALLOC_NATIVE:
1105 		{
1106 			/* wrapper to rte_mempool_create() */
1107 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1108 					rte_mbuf_best_mempool_ops());
1109 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1110 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1111 			break;
1112 		}
1113 #ifndef RTE_EXEC_ENV_WINDOWS
1114 	case MP_ALLOC_ANON:
1115 		{
1116 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1117 				mb_size, (unsigned int) mb_mempool_cache,
1118 				sizeof(struct rte_pktmbuf_pool_private),
1119 				socket_id, mempool_flags);
1120 			if (rte_mp == NULL)
1121 				goto err;
1122 
1123 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1124 				rte_mempool_free(rte_mp);
1125 				rte_mp = NULL;
1126 				goto err;
1127 			}
1128 			rte_pktmbuf_pool_init(rte_mp, NULL);
1129 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1130 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1131 			break;
1132 		}
1133 	case MP_ALLOC_XMEM:
1134 	case MP_ALLOC_XMEM_HUGE:
1135 		{
1136 			int heap_socket;
1137 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1138 
1139 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1140 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1141 
1142 			heap_socket =
1143 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1144 			if (heap_socket < 0)
1145 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1146 
1147 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1148 					rte_mbuf_best_mempool_ops());
1149 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1150 					mb_mempool_cache, 0, mbuf_seg_size,
1151 					heap_socket);
1152 			break;
1153 		}
1154 #endif
1155 	case MP_ALLOC_XBUF:
1156 		{
1157 			struct rte_pktmbuf_extmem *ext_mem;
1158 			unsigned int ext_num;
1159 
1160 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1161 					       socket_id, pool_name, &ext_mem);
1162 			if (ext_num == 0)
1163 				rte_exit(EXIT_FAILURE,
1164 					 "Can't create pinned data buffers\n");
1165 
1166 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1167 					rte_mbuf_best_mempool_ops());
1168 			rte_mp = rte_pktmbuf_pool_create_extbuf
1169 					(pool_name, nb_mbuf, mb_mempool_cache,
1170 					 0, mbuf_seg_size, socket_id,
1171 					 ext_mem, ext_num);
1172 			free(ext_mem);
1173 			break;
1174 		}
1175 	default:
1176 		{
1177 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1178 		}
1179 	}
1180 
1181 #ifndef RTE_EXEC_ENV_WINDOWS
1182 err:
1183 #endif
1184 	if (rte_mp == NULL) {
1185 		rte_exit(EXIT_FAILURE,
1186 			"Creation of mbuf pool for socket %u failed: %s\n",
1187 			socket_id, rte_strerror(rte_errno));
1188 	} else if (verbose_level > 0) {
1189 		rte_mempool_dump(stdout, rte_mp);
1190 	}
1191 	return rte_mp;
1192 }
1193 
1194 /*
1195  * Check given socket id is valid or not with NUMA mode,
1196  * if valid, return 0, else return -1
1197  */
1198 static int
1199 check_socket_id(const unsigned int socket_id)
1200 {
1201 	static int warning_once = 0;
1202 
1203 	if (new_socket_id(socket_id)) {
1204 		if (!warning_once && numa_support)
1205 			fprintf(stderr,
1206 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1207 		warning_once = 1;
1208 		return -1;
1209 	}
1210 	return 0;
1211 }
1212 
1213 /*
1214  * Get the allowed maximum number of RX queues.
1215  * *pid return the port id which has minimal value of
1216  * max_rx_queues in all ports.
1217  */
1218 queueid_t
1219 get_allowed_max_nb_rxq(portid_t *pid)
1220 {
1221 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1222 	bool max_rxq_valid = false;
1223 	portid_t pi;
1224 	struct rte_eth_dev_info dev_info;
1225 
1226 	RTE_ETH_FOREACH_DEV(pi) {
1227 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1228 			continue;
1229 
1230 		max_rxq_valid = true;
1231 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1232 			allowed_max_rxq = dev_info.max_rx_queues;
1233 			*pid = pi;
1234 		}
1235 	}
1236 	return max_rxq_valid ? allowed_max_rxq : 0;
1237 }
1238 
1239 /*
1240  * Check input rxq is valid or not.
1241  * If input rxq is not greater than any of maximum number
1242  * of RX queues of all ports, it is valid.
1243  * if valid, return 0, else return -1
1244  */
1245 int
1246 check_nb_rxq(queueid_t rxq)
1247 {
1248 	queueid_t allowed_max_rxq;
1249 	portid_t pid = 0;
1250 
1251 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1252 	if (rxq > allowed_max_rxq) {
1253 		fprintf(stderr,
1254 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1255 			rxq, allowed_max_rxq, pid);
1256 		return -1;
1257 	}
1258 	return 0;
1259 }
1260 
1261 /*
1262  * Get the allowed maximum number of TX queues.
1263  * *pid return the port id which has minimal value of
1264  * max_tx_queues in all ports.
1265  */
1266 queueid_t
1267 get_allowed_max_nb_txq(portid_t *pid)
1268 {
1269 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1270 	bool max_txq_valid = false;
1271 	portid_t pi;
1272 	struct rte_eth_dev_info dev_info;
1273 
1274 	RTE_ETH_FOREACH_DEV(pi) {
1275 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1276 			continue;
1277 
1278 		max_txq_valid = true;
1279 		if (dev_info.max_tx_queues < allowed_max_txq) {
1280 			allowed_max_txq = dev_info.max_tx_queues;
1281 			*pid = pi;
1282 		}
1283 	}
1284 	return max_txq_valid ? allowed_max_txq : 0;
1285 }
1286 
1287 /*
1288  * Check input txq is valid or not.
1289  * If input txq is not greater than any of maximum number
1290  * of TX queues of all ports, it is valid.
1291  * if valid, return 0, else return -1
1292  */
1293 int
1294 check_nb_txq(queueid_t txq)
1295 {
1296 	queueid_t allowed_max_txq;
1297 	portid_t pid = 0;
1298 
1299 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1300 	if (txq > allowed_max_txq) {
1301 		fprintf(stderr,
1302 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1303 			txq, allowed_max_txq, pid);
1304 		return -1;
1305 	}
1306 	return 0;
1307 }
1308 
1309 /*
1310  * Get the allowed maximum number of RXDs of every rx queue.
1311  * *pid return the port id which has minimal value of
1312  * max_rxd in all queues of all ports.
1313  */
1314 static uint16_t
1315 get_allowed_max_nb_rxd(portid_t *pid)
1316 {
1317 	uint16_t allowed_max_rxd = UINT16_MAX;
1318 	portid_t pi;
1319 	struct rte_eth_dev_info dev_info;
1320 
1321 	RTE_ETH_FOREACH_DEV(pi) {
1322 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1323 			continue;
1324 
1325 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1326 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1327 			*pid = pi;
1328 		}
1329 	}
1330 	return allowed_max_rxd;
1331 }
1332 
1333 /*
1334  * Get the allowed minimal number of RXDs of every rx queue.
1335  * *pid return the port id which has minimal value of
1336  * min_rxd in all queues of all ports.
1337  */
1338 static uint16_t
1339 get_allowed_min_nb_rxd(portid_t *pid)
1340 {
1341 	uint16_t allowed_min_rxd = 0;
1342 	portid_t pi;
1343 	struct rte_eth_dev_info dev_info;
1344 
1345 	RTE_ETH_FOREACH_DEV(pi) {
1346 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1347 			continue;
1348 
1349 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1350 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1351 			*pid = pi;
1352 		}
1353 	}
1354 
1355 	return allowed_min_rxd;
1356 }
1357 
1358 /*
1359  * Check input rxd is valid or not.
1360  * If input rxd is not greater than any of maximum number
1361  * of RXDs of every Rx queues and is not less than any of
1362  * minimal number of RXDs of every Rx queues, it is valid.
1363  * if valid, return 0, else return -1
1364  */
1365 int
1366 check_nb_rxd(queueid_t rxd)
1367 {
1368 	uint16_t allowed_max_rxd;
1369 	uint16_t allowed_min_rxd;
1370 	portid_t pid = 0;
1371 
1372 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1373 	if (rxd > allowed_max_rxd) {
1374 		fprintf(stderr,
1375 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1376 			rxd, allowed_max_rxd, pid);
1377 		return -1;
1378 	}
1379 
1380 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1381 	if (rxd < allowed_min_rxd) {
1382 		fprintf(stderr,
1383 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1384 			rxd, allowed_min_rxd, pid);
1385 		return -1;
1386 	}
1387 
1388 	return 0;
1389 }
1390 
1391 /*
1392  * Get the allowed maximum number of TXDs of every rx queues.
1393  * *pid return the port id which has minimal value of
1394  * max_txd in every tx queue.
1395  */
1396 static uint16_t
1397 get_allowed_max_nb_txd(portid_t *pid)
1398 {
1399 	uint16_t allowed_max_txd = UINT16_MAX;
1400 	portid_t pi;
1401 	struct rte_eth_dev_info dev_info;
1402 
1403 	RTE_ETH_FOREACH_DEV(pi) {
1404 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1405 			continue;
1406 
1407 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1408 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1409 			*pid = pi;
1410 		}
1411 	}
1412 	return allowed_max_txd;
1413 }
1414 
1415 /*
1416  * Get the allowed maximum number of TXDs of every tx queues.
1417  * *pid return the port id which has minimal value of
1418  * min_txd in every tx queue.
1419  */
1420 static uint16_t
1421 get_allowed_min_nb_txd(portid_t *pid)
1422 {
1423 	uint16_t allowed_min_txd = 0;
1424 	portid_t pi;
1425 	struct rte_eth_dev_info dev_info;
1426 
1427 	RTE_ETH_FOREACH_DEV(pi) {
1428 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1429 			continue;
1430 
1431 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1432 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1433 			*pid = pi;
1434 		}
1435 	}
1436 
1437 	return allowed_min_txd;
1438 }
1439 
1440 /*
1441  * Check input txd is valid or not.
1442  * If input txd is not greater than any of maximum number
1443  * of TXDs of every Rx queues, it is valid.
1444  * if valid, return 0, else return -1
1445  */
1446 int
1447 check_nb_txd(queueid_t txd)
1448 {
1449 	uint16_t allowed_max_txd;
1450 	uint16_t allowed_min_txd;
1451 	portid_t pid = 0;
1452 
1453 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1454 	if (txd > allowed_max_txd) {
1455 		fprintf(stderr,
1456 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1457 			txd, allowed_max_txd, pid);
1458 		return -1;
1459 	}
1460 
1461 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1462 	if (txd < allowed_min_txd) {
1463 		fprintf(stderr,
1464 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1465 			txd, allowed_min_txd, pid);
1466 		return -1;
1467 	}
1468 	return 0;
1469 }
1470 
1471 
1472 /*
1473  * Get the allowed maximum number of hairpin queues.
1474  * *pid return the port id which has minimal value of
1475  * max_hairpin_queues in all ports.
1476  */
1477 queueid_t
1478 get_allowed_max_nb_hairpinq(portid_t *pid)
1479 {
1480 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1481 	portid_t pi;
1482 	struct rte_eth_hairpin_cap cap;
1483 
1484 	RTE_ETH_FOREACH_DEV(pi) {
1485 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1486 			*pid = pi;
1487 			return 0;
1488 		}
1489 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1490 			allowed_max_hairpinq = cap.max_nb_queues;
1491 			*pid = pi;
1492 		}
1493 	}
1494 	return allowed_max_hairpinq;
1495 }
1496 
1497 /*
1498  * Check input hairpin is valid or not.
1499  * If input hairpin is not greater than any of maximum number
1500  * of hairpin queues of all ports, it is valid.
1501  * if valid, return 0, else return -1
1502  */
1503 int
1504 check_nb_hairpinq(queueid_t hairpinq)
1505 {
1506 	queueid_t allowed_max_hairpinq;
1507 	portid_t pid = 0;
1508 
1509 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1510 	if (hairpinq > allowed_max_hairpinq) {
1511 		fprintf(stderr,
1512 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1513 			hairpinq, allowed_max_hairpinq, pid);
1514 		return -1;
1515 	}
1516 	return 0;
1517 }
1518 
1519 static void
1520 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1521 {
1522 	struct rte_port *port = &ports[pid];
1523 	uint16_t data_size;
1524 	int ret;
1525 	int i;
1526 
1527 	eth_rx_metadata_negotiate_mp(pid);
1528 
1529 	port->dev_conf.txmode = tx_mode;
1530 	port->dev_conf.rxmode = rx_mode;
1531 
1532 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1533 	if (ret != 0)
1534 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1535 
1536 	ret = update_jumbo_frame_offload(pid);
1537 	if (ret != 0)
1538 		fprintf(stderr,
1539 			"Updating jumbo frame offload failed for port %u\n",
1540 			pid);
1541 
1542 	if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1543 		port->dev_conf.txmode.offloads &=
1544 			~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1545 
1546 	/* Apply Rx offloads configuration */
1547 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1548 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1549 	/* Apply Tx offloads configuration */
1550 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1551 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1552 
1553 	if (eth_link_speed)
1554 		port->dev_conf.link_speeds = eth_link_speed;
1555 
1556 	/* set flag to initialize port/queue */
1557 	port->need_reconfig = 1;
1558 	port->need_reconfig_queues = 1;
1559 	port->socket_id = socket_id;
1560 	port->tx_metadata = 0;
1561 
1562 	/*
1563 	 * Check for maximum number of segments per MTU.
1564 	 * Accordingly update the mbuf data size.
1565 	 */
1566 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1567 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1568 		data_size = rx_mode.max_rx_pkt_len /
1569 			port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1570 
1571 		if ((data_size + RTE_PKTMBUF_HEADROOM) > mbuf_data_size[0]) {
1572 			mbuf_data_size[0] = data_size + RTE_PKTMBUF_HEADROOM;
1573 			TESTPMD_LOG(WARNING,
1574 				    "Configured mbuf size of the first segment %hu\n",
1575 				    mbuf_data_size[0]);
1576 		}
1577 	}
1578 }
1579 
1580 static void
1581 init_config(void)
1582 {
1583 	portid_t pid;
1584 	struct rte_mempool *mbp;
1585 	unsigned int nb_mbuf_per_pool;
1586 	lcoreid_t  lc_id;
1587 	struct rte_gro_param gro_param;
1588 	uint32_t gso_types;
1589 
1590 	/* Configuration of logical cores. */
1591 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1592 				sizeof(struct fwd_lcore *) * nb_lcores,
1593 				RTE_CACHE_LINE_SIZE);
1594 	if (fwd_lcores == NULL) {
1595 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1596 							"failed\n", nb_lcores);
1597 	}
1598 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1599 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1600 					       sizeof(struct fwd_lcore),
1601 					       RTE_CACHE_LINE_SIZE);
1602 		if (fwd_lcores[lc_id] == NULL) {
1603 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1604 								"failed\n");
1605 		}
1606 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1607 	}
1608 
1609 	RTE_ETH_FOREACH_DEV(pid) {
1610 		uint32_t socket_id;
1611 
1612 		if (numa_support) {
1613 			socket_id = port_numa[pid];
1614 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1615 				socket_id = rte_eth_dev_socket_id(pid);
1616 
1617 				/*
1618 				 * if socket_id is invalid,
1619 				 * set to the first available socket.
1620 				 */
1621 				if (check_socket_id(socket_id) < 0)
1622 					socket_id = socket_ids[0];
1623 			}
1624 		} else {
1625 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1626 				    0 : socket_num;
1627 		}
1628 		/* Apply default TxRx configuration for all ports */
1629 		init_config_port_offloads(pid, socket_id);
1630 	}
1631 	/*
1632 	 * Create pools of mbuf.
1633 	 * If NUMA support is disabled, create a single pool of mbuf in
1634 	 * socket 0 memory by default.
1635 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1636 	 *
1637 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1638 	 * nb_txd can be configured at run time.
1639 	 */
1640 	if (param_total_num_mbufs)
1641 		nb_mbuf_per_pool = param_total_num_mbufs;
1642 	else {
1643 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1644 			(nb_lcores * mb_mempool_cache) +
1645 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1646 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1647 	}
1648 
1649 	if (numa_support) {
1650 		uint8_t i, j;
1651 
1652 		for (i = 0; i < num_sockets; i++)
1653 			for (j = 0; j < mbuf_data_size_n; j++)
1654 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1655 					mbuf_pool_create(mbuf_data_size[j],
1656 							  nb_mbuf_per_pool,
1657 							  socket_ids[i], j);
1658 	} else {
1659 		uint8_t i;
1660 
1661 		for (i = 0; i < mbuf_data_size_n; i++)
1662 			mempools[i] = mbuf_pool_create
1663 					(mbuf_data_size[i],
1664 					 nb_mbuf_per_pool,
1665 					 socket_num == UMA_NO_CONFIG ?
1666 					 0 : socket_num, i);
1667 	}
1668 
1669 	init_port_config();
1670 
1671 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1672 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1673 	/*
1674 	 * Records which Mbuf pool to use by each logical core, if needed.
1675 	 */
1676 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1677 		mbp = mbuf_pool_find(
1678 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1679 
1680 		if (mbp == NULL)
1681 			mbp = mbuf_pool_find(0, 0);
1682 		fwd_lcores[lc_id]->mbp = mbp;
1683 		/* initialize GSO context */
1684 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1685 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1686 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1687 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1688 			RTE_ETHER_CRC_LEN;
1689 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1690 	}
1691 
1692 	fwd_config_setup();
1693 
1694 	/* create a gro context for each lcore */
1695 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1696 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1697 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1698 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1699 		gro_param.socket_id = rte_lcore_to_socket_id(
1700 				fwd_lcores_cpuids[lc_id]);
1701 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1702 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1703 			rte_exit(EXIT_FAILURE,
1704 					"rte_gro_ctx_create() failed\n");
1705 		}
1706 	}
1707 }
1708 
1709 
1710 void
1711 reconfig(portid_t new_port_id, unsigned socket_id)
1712 {
1713 	/* Reconfiguration of Ethernet ports. */
1714 	init_config_port_offloads(new_port_id, socket_id);
1715 	init_port_config();
1716 }
1717 
1718 
1719 int
1720 init_fwd_streams(void)
1721 {
1722 	portid_t pid;
1723 	struct rte_port *port;
1724 	streamid_t sm_id, nb_fwd_streams_new;
1725 	queueid_t q;
1726 
1727 	/* set socket id according to numa or not */
1728 	RTE_ETH_FOREACH_DEV(pid) {
1729 		port = &ports[pid];
1730 		if (nb_rxq > port->dev_info.max_rx_queues) {
1731 			fprintf(stderr,
1732 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1733 				nb_rxq, port->dev_info.max_rx_queues);
1734 			return -1;
1735 		}
1736 		if (nb_txq > port->dev_info.max_tx_queues) {
1737 			fprintf(stderr,
1738 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1739 				nb_txq, port->dev_info.max_tx_queues);
1740 			return -1;
1741 		}
1742 		if (numa_support) {
1743 			if (port_numa[pid] != NUMA_NO_CONFIG)
1744 				port->socket_id = port_numa[pid];
1745 			else {
1746 				port->socket_id = rte_eth_dev_socket_id(pid);
1747 
1748 				/*
1749 				 * if socket_id is invalid,
1750 				 * set to the first available socket.
1751 				 */
1752 				if (check_socket_id(port->socket_id) < 0)
1753 					port->socket_id = socket_ids[0];
1754 			}
1755 		}
1756 		else {
1757 			if (socket_num == UMA_NO_CONFIG)
1758 				port->socket_id = 0;
1759 			else
1760 				port->socket_id = socket_num;
1761 		}
1762 	}
1763 
1764 	q = RTE_MAX(nb_rxq, nb_txq);
1765 	if (q == 0) {
1766 		fprintf(stderr,
1767 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1768 		return -1;
1769 	}
1770 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1771 	if (nb_fwd_streams_new == nb_fwd_streams)
1772 		return 0;
1773 	/* clear the old */
1774 	if (fwd_streams != NULL) {
1775 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1776 			if (fwd_streams[sm_id] == NULL)
1777 				continue;
1778 			rte_free(fwd_streams[sm_id]);
1779 			fwd_streams[sm_id] = NULL;
1780 		}
1781 		rte_free(fwd_streams);
1782 		fwd_streams = NULL;
1783 	}
1784 
1785 	/* init new */
1786 	nb_fwd_streams = nb_fwd_streams_new;
1787 	if (nb_fwd_streams) {
1788 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1789 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1790 			RTE_CACHE_LINE_SIZE);
1791 		if (fwd_streams == NULL)
1792 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1793 				 " (struct fwd_stream *)) failed\n",
1794 				 nb_fwd_streams);
1795 
1796 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1797 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1798 				" struct fwd_stream", sizeof(struct fwd_stream),
1799 				RTE_CACHE_LINE_SIZE);
1800 			if (fwd_streams[sm_id] == NULL)
1801 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1802 					 "(struct fwd_stream) failed\n");
1803 		}
1804 	}
1805 
1806 	return 0;
1807 }
1808 
1809 static void
1810 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1811 {
1812 	uint64_t total_burst, sburst;
1813 	uint64_t nb_burst;
1814 	uint64_t burst_stats[4];
1815 	uint16_t pktnb_stats[4];
1816 	uint16_t nb_pkt;
1817 	int burst_percent[4], sburstp;
1818 	int i;
1819 
1820 	/*
1821 	 * First compute the total number of packet bursts and the
1822 	 * two highest numbers of bursts of the same number of packets.
1823 	 */
1824 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1825 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1826 
1827 	/* Show stats for 0 burst size always */
1828 	total_burst = pbs->pkt_burst_spread[0];
1829 	burst_stats[0] = pbs->pkt_burst_spread[0];
1830 	pktnb_stats[0] = 0;
1831 
1832 	/* Find the next 2 burst sizes with highest occurrences. */
1833 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1834 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1835 
1836 		if (nb_burst == 0)
1837 			continue;
1838 
1839 		total_burst += nb_burst;
1840 
1841 		if (nb_burst > burst_stats[1]) {
1842 			burst_stats[2] = burst_stats[1];
1843 			pktnb_stats[2] = pktnb_stats[1];
1844 			burst_stats[1] = nb_burst;
1845 			pktnb_stats[1] = nb_pkt;
1846 		} else if (nb_burst > burst_stats[2]) {
1847 			burst_stats[2] = nb_burst;
1848 			pktnb_stats[2] = nb_pkt;
1849 		}
1850 	}
1851 	if (total_burst == 0)
1852 		return;
1853 
1854 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1855 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1856 		if (i == 3) {
1857 			printf("%d%% of other]\n", 100 - sburstp);
1858 			return;
1859 		}
1860 
1861 		sburst += burst_stats[i];
1862 		if (sburst == total_burst) {
1863 			printf("%d%% of %d pkts]\n",
1864 				100 - sburstp, (int) pktnb_stats[i]);
1865 			return;
1866 		}
1867 
1868 		burst_percent[i] =
1869 			(double)burst_stats[i] / total_burst * 100;
1870 		printf("%d%% of %d pkts + ",
1871 			burst_percent[i], (int) pktnb_stats[i]);
1872 		sburstp += burst_percent[i];
1873 	}
1874 }
1875 
1876 static void
1877 fwd_stream_stats_display(streamid_t stream_id)
1878 {
1879 	struct fwd_stream *fs;
1880 	static const char *fwd_top_stats_border = "-------";
1881 
1882 	fs = fwd_streams[stream_id];
1883 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1884 	    (fs->fwd_dropped == 0))
1885 		return;
1886 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1887 	       "TX Port=%2d/Queue=%2d %s\n",
1888 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1889 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1890 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1891 	       " TX-dropped: %-14"PRIu64,
1892 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1893 
1894 	/* if checksum mode */
1895 	if (cur_fwd_eng == &csum_fwd_engine) {
1896 		printf("  RX- bad IP checksum: %-14"PRIu64
1897 		       "  Rx- bad L4 checksum: %-14"PRIu64
1898 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1899 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1900 			fs->rx_bad_outer_l4_csum);
1901 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1902 			fs->rx_bad_outer_ip_csum);
1903 	} else {
1904 		printf("\n");
1905 	}
1906 
1907 	if (record_burst_stats) {
1908 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1909 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1910 	}
1911 }
1912 
1913 void
1914 fwd_stats_display(void)
1915 {
1916 	static const char *fwd_stats_border = "----------------------";
1917 	static const char *acc_stats_border = "+++++++++++++++";
1918 	struct {
1919 		struct fwd_stream *rx_stream;
1920 		struct fwd_stream *tx_stream;
1921 		uint64_t tx_dropped;
1922 		uint64_t rx_bad_ip_csum;
1923 		uint64_t rx_bad_l4_csum;
1924 		uint64_t rx_bad_outer_l4_csum;
1925 		uint64_t rx_bad_outer_ip_csum;
1926 	} ports_stats[RTE_MAX_ETHPORTS];
1927 	uint64_t total_rx_dropped = 0;
1928 	uint64_t total_tx_dropped = 0;
1929 	uint64_t total_rx_nombuf = 0;
1930 	struct rte_eth_stats stats;
1931 	uint64_t fwd_cycles = 0;
1932 	uint64_t total_recv = 0;
1933 	uint64_t total_xmit = 0;
1934 	struct rte_port *port;
1935 	streamid_t sm_id;
1936 	portid_t pt_id;
1937 	int i;
1938 
1939 	memset(ports_stats, 0, sizeof(ports_stats));
1940 
1941 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1942 		struct fwd_stream *fs = fwd_streams[sm_id];
1943 
1944 		if (cur_fwd_config.nb_fwd_streams >
1945 		    cur_fwd_config.nb_fwd_ports) {
1946 			fwd_stream_stats_display(sm_id);
1947 		} else {
1948 			ports_stats[fs->tx_port].tx_stream = fs;
1949 			ports_stats[fs->rx_port].rx_stream = fs;
1950 		}
1951 
1952 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1953 
1954 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1955 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1956 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1957 				fs->rx_bad_outer_l4_csum;
1958 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
1959 				fs->rx_bad_outer_ip_csum;
1960 
1961 		if (record_core_cycles)
1962 			fwd_cycles += fs->core_cycles;
1963 	}
1964 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1965 		pt_id = fwd_ports_ids[i];
1966 		port = &ports[pt_id];
1967 
1968 		rte_eth_stats_get(pt_id, &stats);
1969 		stats.ipackets -= port->stats.ipackets;
1970 		stats.opackets -= port->stats.opackets;
1971 		stats.ibytes -= port->stats.ibytes;
1972 		stats.obytes -= port->stats.obytes;
1973 		stats.imissed -= port->stats.imissed;
1974 		stats.oerrors -= port->stats.oerrors;
1975 		stats.rx_nombuf -= port->stats.rx_nombuf;
1976 
1977 		total_recv += stats.ipackets;
1978 		total_xmit += stats.opackets;
1979 		total_rx_dropped += stats.imissed;
1980 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
1981 		total_tx_dropped += stats.oerrors;
1982 		total_rx_nombuf  += stats.rx_nombuf;
1983 
1984 		printf("\n  %s Forward statistics for port %-2d %s\n",
1985 		       fwd_stats_border, pt_id, fwd_stats_border);
1986 
1987 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
1988 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
1989 		       stats.ipackets + stats.imissed);
1990 
1991 		if (cur_fwd_eng == &csum_fwd_engine) {
1992 			printf("  Bad-ipcsum: %-14"PRIu64
1993 			       " Bad-l4csum: %-14"PRIu64
1994 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
1995 			       ports_stats[pt_id].rx_bad_ip_csum,
1996 			       ports_stats[pt_id].rx_bad_l4_csum,
1997 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
1998 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
1999 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2000 		}
2001 		if (stats.ierrors + stats.rx_nombuf > 0) {
2002 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2003 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2004 		}
2005 
2006 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2007 		       "TX-total: %-"PRIu64"\n",
2008 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2009 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2010 
2011 		if (record_burst_stats) {
2012 			if (ports_stats[pt_id].rx_stream)
2013 				pkt_burst_stats_display("RX",
2014 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2015 			if (ports_stats[pt_id].tx_stream)
2016 				pkt_burst_stats_display("TX",
2017 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2018 		}
2019 
2020 		printf("  %s--------------------------------%s\n",
2021 		       fwd_stats_border, fwd_stats_border);
2022 	}
2023 
2024 	printf("\n  %s Accumulated forward statistics for all ports"
2025 	       "%s\n",
2026 	       acc_stats_border, acc_stats_border);
2027 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2028 	       "%-"PRIu64"\n"
2029 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2030 	       "%-"PRIu64"\n",
2031 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2032 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2033 	if (total_rx_nombuf > 0)
2034 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2035 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2036 	       "%s\n",
2037 	       acc_stats_border, acc_stats_border);
2038 	if (record_core_cycles) {
2039 #define CYC_PER_MHZ 1E6
2040 		if (total_recv > 0 || total_xmit > 0) {
2041 			uint64_t total_pkts = 0;
2042 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2043 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2044 				total_pkts = total_xmit;
2045 			else
2046 				total_pkts = total_recv;
2047 
2048 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2049 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2050 			       " MHz Clock\n",
2051 			       (double) fwd_cycles / total_pkts,
2052 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2053 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2054 		}
2055 	}
2056 }
2057 
2058 void
2059 fwd_stats_reset(void)
2060 {
2061 	streamid_t sm_id;
2062 	portid_t pt_id;
2063 	int i;
2064 
2065 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2066 		pt_id = fwd_ports_ids[i];
2067 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2068 	}
2069 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2070 		struct fwd_stream *fs = fwd_streams[sm_id];
2071 
2072 		fs->rx_packets = 0;
2073 		fs->tx_packets = 0;
2074 		fs->fwd_dropped = 0;
2075 		fs->rx_bad_ip_csum = 0;
2076 		fs->rx_bad_l4_csum = 0;
2077 		fs->rx_bad_outer_l4_csum = 0;
2078 		fs->rx_bad_outer_ip_csum = 0;
2079 
2080 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2081 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2082 		fs->core_cycles = 0;
2083 	}
2084 }
2085 
2086 static void
2087 flush_fwd_rx_queues(void)
2088 {
2089 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2090 	portid_t  rxp;
2091 	portid_t port_id;
2092 	queueid_t rxq;
2093 	uint16_t  nb_rx;
2094 	uint16_t  i;
2095 	uint8_t   j;
2096 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2097 	uint64_t timer_period;
2098 
2099 	if (num_procs > 1) {
2100 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2101 		return;
2102 	}
2103 
2104 	/* convert to number of cycles */
2105 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2106 
2107 	for (j = 0; j < 2; j++) {
2108 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2109 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2110 				port_id = fwd_ports_ids[rxp];
2111 				/**
2112 				* testpmd can stuck in the below do while loop
2113 				* if rte_eth_rx_burst() always returns nonzero
2114 				* packets. So timer is added to exit this loop
2115 				* after 1sec timer expiry.
2116 				*/
2117 				prev_tsc = rte_rdtsc();
2118 				do {
2119 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2120 						pkts_burst, MAX_PKT_BURST);
2121 					for (i = 0; i < nb_rx; i++)
2122 						rte_pktmbuf_free(pkts_burst[i]);
2123 
2124 					cur_tsc = rte_rdtsc();
2125 					diff_tsc = cur_tsc - prev_tsc;
2126 					timer_tsc += diff_tsc;
2127 				} while ((nb_rx > 0) &&
2128 					(timer_tsc < timer_period));
2129 				timer_tsc = 0;
2130 			}
2131 		}
2132 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2133 	}
2134 }
2135 
2136 static void
2137 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2138 {
2139 	struct fwd_stream **fsm;
2140 	streamid_t nb_fs;
2141 	streamid_t sm_id;
2142 #ifdef RTE_LIB_BITRATESTATS
2143 	uint64_t tics_per_1sec;
2144 	uint64_t tics_datum;
2145 	uint64_t tics_current;
2146 	uint16_t i, cnt_ports;
2147 
2148 	cnt_ports = nb_ports;
2149 	tics_datum = rte_rdtsc();
2150 	tics_per_1sec = rte_get_timer_hz();
2151 #endif
2152 	fsm = &fwd_streams[fc->stream_idx];
2153 	nb_fs = fc->stream_nb;
2154 	do {
2155 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2156 			(*pkt_fwd)(fsm[sm_id]);
2157 #ifdef RTE_LIB_BITRATESTATS
2158 		if (bitrate_enabled != 0 &&
2159 				bitrate_lcore_id == rte_lcore_id()) {
2160 			tics_current = rte_rdtsc();
2161 			if (tics_current - tics_datum >= tics_per_1sec) {
2162 				/* Periodic bitrate calculation */
2163 				for (i = 0; i < cnt_ports; i++)
2164 					rte_stats_bitrate_calc(bitrate_data,
2165 						ports_ids[i]);
2166 				tics_datum = tics_current;
2167 			}
2168 		}
2169 #endif
2170 #ifdef RTE_LIB_LATENCYSTATS
2171 		if (latencystats_enabled != 0 &&
2172 				latencystats_lcore_id == rte_lcore_id())
2173 			rte_latencystats_update();
2174 #endif
2175 
2176 	} while (! fc->stopped);
2177 }
2178 
2179 static int
2180 start_pkt_forward_on_core(void *fwd_arg)
2181 {
2182 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2183 			     cur_fwd_config.fwd_eng->packet_fwd);
2184 	return 0;
2185 }
2186 
2187 /*
2188  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2189  * Used to start communication flows in network loopback test configurations.
2190  */
2191 static int
2192 run_one_txonly_burst_on_core(void *fwd_arg)
2193 {
2194 	struct fwd_lcore *fwd_lc;
2195 	struct fwd_lcore tmp_lcore;
2196 
2197 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2198 	tmp_lcore = *fwd_lc;
2199 	tmp_lcore.stopped = 1;
2200 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2201 	return 0;
2202 }
2203 
2204 /*
2205  * Launch packet forwarding:
2206  *     - Setup per-port forwarding context.
2207  *     - launch logical cores with their forwarding configuration.
2208  */
2209 static void
2210 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2211 {
2212 	unsigned int i;
2213 	unsigned int lc_id;
2214 	int diag;
2215 
2216 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2217 		lc_id = fwd_lcores_cpuids[i];
2218 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2219 			fwd_lcores[i]->stopped = 0;
2220 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2221 						     fwd_lcores[i], lc_id);
2222 			if (diag != 0)
2223 				fprintf(stderr,
2224 					"launch lcore %u failed - diag=%d\n",
2225 					lc_id, diag);
2226 		}
2227 	}
2228 }
2229 
2230 /*
2231  * Launch packet forwarding configuration.
2232  */
2233 void
2234 start_packet_forwarding(int with_tx_first)
2235 {
2236 	port_fwd_begin_t port_fwd_begin;
2237 	port_fwd_end_t  port_fwd_end;
2238 	unsigned int i;
2239 
2240 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2241 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2242 
2243 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2244 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2245 
2246 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2247 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2248 		(!nb_rxq || !nb_txq))
2249 		rte_exit(EXIT_FAILURE,
2250 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2251 			cur_fwd_eng->fwd_mode_name);
2252 
2253 	if (all_ports_started() == 0) {
2254 		fprintf(stderr, "Not all ports were started\n");
2255 		return;
2256 	}
2257 	if (test_done == 0) {
2258 		fprintf(stderr, "Packet forwarding already started\n");
2259 		return;
2260 	}
2261 
2262 	fwd_config_setup();
2263 
2264 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2265 	if (port_fwd_begin != NULL) {
2266 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2267 			if (port_fwd_begin(fwd_ports_ids[i])) {
2268 				fprintf(stderr,
2269 					"Packet forwarding is not ready\n");
2270 				return;
2271 			}
2272 		}
2273 	}
2274 
2275 	if (with_tx_first) {
2276 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2277 		if (port_fwd_begin != NULL) {
2278 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2279 				if (port_fwd_begin(fwd_ports_ids[i])) {
2280 					fprintf(stderr,
2281 						"Packet forwarding is not ready\n");
2282 					return;
2283 				}
2284 			}
2285 		}
2286 	}
2287 
2288 	test_done = 0;
2289 
2290 	if(!no_flush_rx)
2291 		flush_fwd_rx_queues();
2292 
2293 	pkt_fwd_config_display(&cur_fwd_config);
2294 	rxtx_config_display();
2295 
2296 	fwd_stats_reset();
2297 	if (with_tx_first) {
2298 		while (with_tx_first--) {
2299 			launch_packet_forwarding(
2300 					run_one_txonly_burst_on_core);
2301 			rte_eal_mp_wait_lcore();
2302 		}
2303 		port_fwd_end = tx_only_engine.port_fwd_end;
2304 		if (port_fwd_end != NULL) {
2305 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2306 				(*port_fwd_end)(fwd_ports_ids[i]);
2307 		}
2308 	}
2309 	launch_packet_forwarding(start_pkt_forward_on_core);
2310 }
2311 
2312 void
2313 stop_packet_forwarding(void)
2314 {
2315 	port_fwd_end_t port_fwd_end;
2316 	lcoreid_t lc_id;
2317 	portid_t pt_id;
2318 	int i;
2319 
2320 	if (test_done) {
2321 		fprintf(stderr, "Packet forwarding not started\n");
2322 		return;
2323 	}
2324 	printf("Telling cores to stop...");
2325 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2326 		fwd_lcores[lc_id]->stopped = 1;
2327 	printf("\nWaiting for lcores to finish...\n");
2328 	rte_eal_mp_wait_lcore();
2329 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2330 	if (port_fwd_end != NULL) {
2331 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2332 			pt_id = fwd_ports_ids[i];
2333 			(*port_fwd_end)(pt_id);
2334 		}
2335 	}
2336 
2337 	fwd_stats_display();
2338 
2339 	printf("\nDone.\n");
2340 	test_done = 1;
2341 }
2342 
2343 void
2344 dev_set_link_up(portid_t pid)
2345 {
2346 	if (rte_eth_dev_set_link_up(pid) < 0)
2347 		fprintf(stderr, "\nSet link up fail.\n");
2348 }
2349 
2350 void
2351 dev_set_link_down(portid_t pid)
2352 {
2353 	if (rte_eth_dev_set_link_down(pid) < 0)
2354 		fprintf(stderr, "\nSet link down fail.\n");
2355 }
2356 
2357 static int
2358 all_ports_started(void)
2359 {
2360 	portid_t pi;
2361 	struct rte_port *port;
2362 
2363 	RTE_ETH_FOREACH_DEV(pi) {
2364 		port = &ports[pi];
2365 		/* Check if there is a port which is not started */
2366 		if ((port->port_status != RTE_PORT_STARTED) &&
2367 			(port->slave_flag == 0))
2368 			return 0;
2369 	}
2370 
2371 	/* No port is not started */
2372 	return 1;
2373 }
2374 
2375 int
2376 port_is_stopped(portid_t port_id)
2377 {
2378 	struct rte_port *port = &ports[port_id];
2379 
2380 	if ((port->port_status != RTE_PORT_STOPPED) &&
2381 	    (port->slave_flag == 0))
2382 		return 0;
2383 	return 1;
2384 }
2385 
2386 int
2387 all_ports_stopped(void)
2388 {
2389 	portid_t pi;
2390 
2391 	RTE_ETH_FOREACH_DEV(pi) {
2392 		if (!port_is_stopped(pi))
2393 			return 0;
2394 	}
2395 
2396 	return 1;
2397 }
2398 
2399 int
2400 port_is_started(portid_t port_id)
2401 {
2402 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2403 		return 0;
2404 
2405 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2406 		return 0;
2407 
2408 	return 1;
2409 }
2410 
2411 /* Configure the Rx and Tx hairpin queues for the selected port. */
2412 static int
2413 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2414 {
2415 	queueid_t qi;
2416 	struct rte_eth_hairpin_conf hairpin_conf = {
2417 		.peer_count = 1,
2418 	};
2419 	int i;
2420 	int diag;
2421 	struct rte_port *port = &ports[pi];
2422 	uint16_t peer_rx_port = pi;
2423 	uint16_t peer_tx_port = pi;
2424 	uint32_t manual = 1;
2425 	uint32_t tx_exp = hairpin_mode & 0x10;
2426 
2427 	if (!(hairpin_mode & 0xf)) {
2428 		peer_rx_port = pi;
2429 		peer_tx_port = pi;
2430 		manual = 0;
2431 	} else if (hairpin_mode & 0x1) {
2432 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2433 						       RTE_ETH_DEV_NO_OWNER);
2434 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2435 			peer_tx_port = rte_eth_find_next_owned_by(0,
2436 						RTE_ETH_DEV_NO_OWNER);
2437 		if (p_pi != RTE_MAX_ETHPORTS) {
2438 			peer_rx_port = p_pi;
2439 		} else {
2440 			uint16_t next_pi;
2441 
2442 			/* Last port will be the peer RX port of the first. */
2443 			RTE_ETH_FOREACH_DEV(next_pi)
2444 				peer_rx_port = next_pi;
2445 		}
2446 		manual = 1;
2447 	} else if (hairpin_mode & 0x2) {
2448 		if (cnt_pi & 0x1) {
2449 			peer_rx_port = p_pi;
2450 		} else {
2451 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2452 						RTE_ETH_DEV_NO_OWNER);
2453 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2454 				peer_rx_port = pi;
2455 		}
2456 		peer_tx_port = peer_rx_port;
2457 		manual = 1;
2458 	}
2459 
2460 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2461 		hairpin_conf.peers[0].port = peer_rx_port;
2462 		hairpin_conf.peers[0].queue = i + nb_rxq;
2463 		hairpin_conf.manual_bind = !!manual;
2464 		hairpin_conf.tx_explicit = !!tx_exp;
2465 		diag = rte_eth_tx_hairpin_queue_setup
2466 			(pi, qi, nb_txd, &hairpin_conf);
2467 		i++;
2468 		if (diag == 0)
2469 			continue;
2470 
2471 		/* Fail to setup rx queue, return */
2472 		if (rte_atomic16_cmpset(&(port->port_status),
2473 					RTE_PORT_HANDLING,
2474 					RTE_PORT_STOPPED) == 0)
2475 			fprintf(stderr,
2476 				"Port %d can not be set back to stopped\n", pi);
2477 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2478 			pi);
2479 		/* try to reconfigure queues next time */
2480 		port->need_reconfig_queues = 1;
2481 		return -1;
2482 	}
2483 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2484 		hairpin_conf.peers[0].port = peer_tx_port;
2485 		hairpin_conf.peers[0].queue = i + nb_txq;
2486 		hairpin_conf.manual_bind = !!manual;
2487 		hairpin_conf.tx_explicit = !!tx_exp;
2488 		diag = rte_eth_rx_hairpin_queue_setup
2489 			(pi, qi, nb_rxd, &hairpin_conf);
2490 		i++;
2491 		if (diag == 0)
2492 			continue;
2493 
2494 		/* Fail to setup rx queue, return */
2495 		if (rte_atomic16_cmpset(&(port->port_status),
2496 					RTE_PORT_HANDLING,
2497 					RTE_PORT_STOPPED) == 0)
2498 			fprintf(stderr,
2499 				"Port %d can not be set back to stopped\n", pi);
2500 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2501 			pi);
2502 		/* try to reconfigure queues next time */
2503 		port->need_reconfig_queues = 1;
2504 		return -1;
2505 	}
2506 	return 0;
2507 }
2508 
2509 /* Configure the Rx with optional split. */
2510 int
2511 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2512 	       uint16_t nb_rx_desc, unsigned int socket_id,
2513 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2514 {
2515 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2516 	unsigned int i, mp_n;
2517 	int ret;
2518 
2519 	if (rx_pkt_nb_segs <= 1 ||
2520 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2521 		rx_conf->rx_seg = NULL;
2522 		rx_conf->rx_nseg = 0;
2523 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2524 					     nb_rx_desc, socket_id,
2525 					     rx_conf, mp);
2526 		return ret;
2527 	}
2528 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2529 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2530 		struct rte_mempool *mpx;
2531 		/*
2532 		 * Use last valid pool for the segments with number
2533 		 * exceeding the pool index.
2534 		 */
2535 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2536 		mpx = mbuf_pool_find(socket_id, mp_n);
2537 		/* Handle zero as mbuf data buffer size. */
2538 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2539 				   rx_pkt_seg_lengths[i] :
2540 				   mbuf_data_size[mp_n];
2541 		rx_seg->offset = i < rx_pkt_nb_offs ?
2542 				   rx_pkt_seg_offsets[i] : 0;
2543 		rx_seg->mp = mpx ? mpx : mp;
2544 	}
2545 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2546 	rx_conf->rx_seg = rx_useg;
2547 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2548 				    socket_id, rx_conf, NULL);
2549 	rx_conf->rx_seg = NULL;
2550 	rx_conf->rx_nseg = 0;
2551 	return ret;
2552 }
2553 
2554 int
2555 start_port(portid_t pid)
2556 {
2557 	int diag, need_check_link_status = -1;
2558 	portid_t pi;
2559 	portid_t p_pi = RTE_MAX_ETHPORTS;
2560 	portid_t pl[RTE_MAX_ETHPORTS];
2561 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2562 	uint16_t cnt_pi = 0;
2563 	uint16_t cfg_pi = 0;
2564 	int peer_pi;
2565 	queueid_t qi;
2566 	struct rte_port *port;
2567 	struct rte_eth_hairpin_cap cap;
2568 
2569 	if (port_id_is_invalid(pid, ENABLED_WARN))
2570 		return 0;
2571 
2572 	RTE_ETH_FOREACH_DEV(pi) {
2573 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2574 			continue;
2575 
2576 		need_check_link_status = 0;
2577 		port = &ports[pi];
2578 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2579 						 RTE_PORT_HANDLING) == 0) {
2580 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2581 			continue;
2582 		}
2583 
2584 		if (port->need_reconfig > 0) {
2585 			port->need_reconfig = 0;
2586 
2587 			if (flow_isolate_all) {
2588 				int ret = port_flow_isolate(pi, 1);
2589 				if (ret) {
2590 					fprintf(stderr,
2591 						"Failed to apply isolated mode on port %d\n",
2592 						pi);
2593 					return -1;
2594 				}
2595 			}
2596 			configure_rxtx_dump_callbacks(0);
2597 			printf("Configuring Port %d (socket %u)\n", pi,
2598 					port->socket_id);
2599 			if (nb_hairpinq > 0 &&
2600 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2601 				fprintf(stderr,
2602 					"Port %d doesn't support hairpin queues\n",
2603 					pi);
2604 				return -1;
2605 			}
2606 			/* configure port */
2607 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2608 						     nb_txq + nb_hairpinq,
2609 						     &(port->dev_conf));
2610 			if (diag != 0) {
2611 				if (rte_atomic16_cmpset(&(port->port_status),
2612 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2613 					fprintf(stderr,
2614 						"Port %d can not be set back to stopped\n",
2615 						pi);
2616 				fprintf(stderr, "Fail to configure port %d\n",
2617 					pi);
2618 				/* try to reconfigure port next time */
2619 				port->need_reconfig = 1;
2620 				return -1;
2621 			}
2622 		}
2623 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2624 			port->need_reconfig_queues = 0;
2625 			/* setup tx queues */
2626 			for (qi = 0; qi < nb_txq; qi++) {
2627 				if ((numa_support) &&
2628 					(txring_numa[pi] != NUMA_NO_CONFIG))
2629 					diag = rte_eth_tx_queue_setup(pi, qi,
2630 						port->nb_tx_desc[qi],
2631 						txring_numa[pi],
2632 						&(port->tx_conf[qi]));
2633 				else
2634 					diag = rte_eth_tx_queue_setup(pi, qi,
2635 						port->nb_tx_desc[qi],
2636 						port->socket_id,
2637 						&(port->tx_conf[qi]));
2638 
2639 				if (diag == 0)
2640 					continue;
2641 
2642 				/* Fail to setup tx queue, return */
2643 				if (rte_atomic16_cmpset(&(port->port_status),
2644 							RTE_PORT_HANDLING,
2645 							RTE_PORT_STOPPED) == 0)
2646 					fprintf(stderr,
2647 						"Port %d can not be set back to stopped\n",
2648 						pi);
2649 				fprintf(stderr,
2650 					"Fail to configure port %d tx queues\n",
2651 					pi);
2652 				/* try to reconfigure queues next time */
2653 				port->need_reconfig_queues = 1;
2654 				return -1;
2655 			}
2656 			for (qi = 0; qi < nb_rxq; qi++) {
2657 				/* setup rx queues */
2658 				if ((numa_support) &&
2659 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2660 					struct rte_mempool * mp =
2661 						mbuf_pool_find
2662 							(rxring_numa[pi], 0);
2663 					if (mp == NULL) {
2664 						fprintf(stderr,
2665 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2666 							rxring_numa[pi]);
2667 						return -1;
2668 					}
2669 
2670 					diag = rx_queue_setup(pi, qi,
2671 					     port->nb_rx_desc[qi],
2672 					     rxring_numa[pi],
2673 					     &(port->rx_conf[qi]),
2674 					     mp);
2675 				} else {
2676 					struct rte_mempool *mp =
2677 						mbuf_pool_find
2678 							(port->socket_id, 0);
2679 					if (mp == NULL) {
2680 						fprintf(stderr,
2681 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2682 							port->socket_id);
2683 						return -1;
2684 					}
2685 					diag = rx_queue_setup(pi, qi,
2686 					     port->nb_rx_desc[qi],
2687 					     port->socket_id,
2688 					     &(port->rx_conf[qi]),
2689 					     mp);
2690 				}
2691 				if (diag == 0)
2692 					continue;
2693 
2694 				/* Fail to setup rx queue, return */
2695 				if (rte_atomic16_cmpset(&(port->port_status),
2696 							RTE_PORT_HANDLING,
2697 							RTE_PORT_STOPPED) == 0)
2698 					fprintf(stderr,
2699 						"Port %d can not be set back to stopped\n",
2700 						pi);
2701 				fprintf(stderr,
2702 					"Fail to configure port %d rx queues\n",
2703 					pi);
2704 				/* try to reconfigure queues next time */
2705 				port->need_reconfig_queues = 1;
2706 				return -1;
2707 			}
2708 			/* setup hairpin queues */
2709 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2710 				return -1;
2711 		}
2712 		configure_rxtx_dump_callbacks(verbose_level);
2713 		if (clear_ptypes) {
2714 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2715 					NULL, 0);
2716 			if (diag < 0)
2717 				fprintf(stderr,
2718 					"Port %d: Failed to disable Ptype parsing\n",
2719 					pi);
2720 		}
2721 
2722 		p_pi = pi;
2723 		cnt_pi++;
2724 
2725 		/* start port */
2726 		diag = eth_dev_start_mp(pi);
2727 		if (diag < 0) {
2728 			fprintf(stderr, "Fail to start port %d: %s\n",
2729 				pi, rte_strerror(-diag));
2730 
2731 			/* Fail to setup rx queue, return */
2732 			if (rte_atomic16_cmpset(&(port->port_status),
2733 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2734 				fprintf(stderr,
2735 					"Port %d can not be set back to stopped\n",
2736 					pi);
2737 			continue;
2738 		}
2739 
2740 		if (rte_atomic16_cmpset(&(port->port_status),
2741 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2742 			fprintf(stderr, "Port %d can not be set into started\n",
2743 				pi);
2744 
2745 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2746 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2747 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2748 
2749 		/* at least one port started, need checking link status */
2750 		need_check_link_status = 1;
2751 
2752 		pl[cfg_pi++] = pi;
2753 	}
2754 
2755 	if (need_check_link_status == 1 && !no_link_check)
2756 		check_all_ports_link_status(RTE_PORT_ALL);
2757 	else if (need_check_link_status == 0)
2758 		fprintf(stderr, "Please stop the ports first\n");
2759 
2760 	if (hairpin_mode & 0xf) {
2761 		uint16_t i;
2762 		int j;
2763 
2764 		/* bind all started hairpin ports */
2765 		for (i = 0; i < cfg_pi; i++) {
2766 			pi = pl[i];
2767 			/* bind current Tx to all peer Rx */
2768 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2769 							RTE_MAX_ETHPORTS, 1);
2770 			if (peer_pi < 0)
2771 				return peer_pi;
2772 			for (j = 0; j < peer_pi; j++) {
2773 				if (!port_is_started(peer_pl[j]))
2774 					continue;
2775 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2776 				if (diag < 0) {
2777 					fprintf(stderr,
2778 						"Error during binding hairpin Tx port %u to %u: %s\n",
2779 						pi, peer_pl[j],
2780 						rte_strerror(-diag));
2781 					return -1;
2782 				}
2783 			}
2784 			/* bind all peer Tx to current Rx */
2785 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2786 							RTE_MAX_ETHPORTS, 0);
2787 			if (peer_pi < 0)
2788 				return peer_pi;
2789 			for (j = 0; j < peer_pi; j++) {
2790 				if (!port_is_started(peer_pl[j]))
2791 					continue;
2792 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2793 				if (diag < 0) {
2794 					fprintf(stderr,
2795 						"Error during binding hairpin Tx port %u to %u: %s\n",
2796 						peer_pl[j], pi,
2797 						rte_strerror(-diag));
2798 					return -1;
2799 				}
2800 			}
2801 		}
2802 	}
2803 
2804 	printf("Done\n");
2805 	return 0;
2806 }
2807 
2808 void
2809 stop_port(portid_t pid)
2810 {
2811 	portid_t pi;
2812 	struct rte_port *port;
2813 	int need_check_link_status = 0;
2814 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2815 	int peer_pi;
2816 
2817 	if (port_id_is_invalid(pid, ENABLED_WARN))
2818 		return;
2819 
2820 	printf("Stopping ports...\n");
2821 
2822 	RTE_ETH_FOREACH_DEV(pi) {
2823 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2824 			continue;
2825 
2826 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2827 			fprintf(stderr,
2828 				"Please remove port %d from forwarding configuration.\n",
2829 				pi);
2830 			continue;
2831 		}
2832 
2833 		if (port_is_bonding_slave(pi)) {
2834 			fprintf(stderr,
2835 				"Please remove port %d from bonded device.\n",
2836 				pi);
2837 			continue;
2838 		}
2839 
2840 		port = &ports[pi];
2841 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2842 						RTE_PORT_HANDLING) == 0)
2843 			continue;
2844 
2845 		if (hairpin_mode & 0xf) {
2846 			int j;
2847 
2848 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
2849 			/* unbind all peer Tx from current Rx */
2850 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2851 							RTE_MAX_ETHPORTS, 0);
2852 			if (peer_pi < 0)
2853 				continue;
2854 			for (j = 0; j < peer_pi; j++) {
2855 				if (!port_is_started(peer_pl[j]))
2856 					continue;
2857 				rte_eth_hairpin_unbind(peer_pl[j], pi);
2858 			}
2859 		}
2860 
2861 		if (port->flow_list)
2862 			port_flow_flush(pi);
2863 
2864 		if (eth_dev_stop_mp(pi) != 0)
2865 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
2866 				pi);
2867 
2868 		if (rte_atomic16_cmpset(&(port->port_status),
2869 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2870 			fprintf(stderr, "Port %d can not be set into stopped\n",
2871 				pi);
2872 		need_check_link_status = 1;
2873 	}
2874 	if (need_check_link_status && !no_link_check)
2875 		check_all_ports_link_status(RTE_PORT_ALL);
2876 
2877 	printf("Done\n");
2878 }
2879 
2880 static void
2881 remove_invalid_ports_in(portid_t *array, portid_t *total)
2882 {
2883 	portid_t i;
2884 	portid_t new_total = 0;
2885 
2886 	for (i = 0; i < *total; i++)
2887 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2888 			array[new_total] = array[i];
2889 			new_total++;
2890 		}
2891 	*total = new_total;
2892 }
2893 
2894 static void
2895 remove_invalid_ports(void)
2896 {
2897 	remove_invalid_ports_in(ports_ids, &nb_ports);
2898 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2899 	nb_cfg_ports = nb_fwd_ports;
2900 }
2901 
2902 void
2903 close_port(portid_t pid)
2904 {
2905 	portid_t pi;
2906 	struct rte_port *port;
2907 
2908 	if (port_id_is_invalid(pid, ENABLED_WARN))
2909 		return;
2910 
2911 	printf("Closing ports...\n");
2912 
2913 	RTE_ETH_FOREACH_DEV(pi) {
2914 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2915 			continue;
2916 
2917 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2918 			fprintf(stderr,
2919 				"Please remove port %d from forwarding configuration.\n",
2920 				pi);
2921 			continue;
2922 		}
2923 
2924 		if (port_is_bonding_slave(pi)) {
2925 			fprintf(stderr,
2926 				"Please remove port %d from bonded device.\n",
2927 				pi);
2928 			continue;
2929 		}
2930 
2931 		port = &ports[pi];
2932 		if (rte_atomic16_cmpset(&(port->port_status),
2933 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2934 			fprintf(stderr, "Port %d is already closed\n", pi);
2935 			continue;
2936 		}
2937 
2938 		if (is_proc_primary()) {
2939 			port_flow_flush(pi);
2940 			rte_eth_dev_close(pi);
2941 		}
2942 	}
2943 
2944 	remove_invalid_ports();
2945 	printf("Done\n");
2946 }
2947 
2948 void
2949 reset_port(portid_t pid)
2950 {
2951 	int diag;
2952 	portid_t pi;
2953 	struct rte_port *port;
2954 
2955 	if (port_id_is_invalid(pid, ENABLED_WARN))
2956 		return;
2957 
2958 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2959 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2960 		fprintf(stderr,
2961 			"Can not reset port(s), please stop port(s) first.\n");
2962 		return;
2963 	}
2964 
2965 	printf("Resetting ports...\n");
2966 
2967 	RTE_ETH_FOREACH_DEV(pi) {
2968 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2969 			continue;
2970 
2971 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2972 			fprintf(stderr,
2973 				"Please remove port %d from forwarding configuration.\n",
2974 				pi);
2975 			continue;
2976 		}
2977 
2978 		if (port_is_bonding_slave(pi)) {
2979 			fprintf(stderr,
2980 				"Please remove port %d from bonded device.\n",
2981 				pi);
2982 			continue;
2983 		}
2984 
2985 		diag = rte_eth_dev_reset(pi);
2986 		if (diag == 0) {
2987 			port = &ports[pi];
2988 			port->need_reconfig = 1;
2989 			port->need_reconfig_queues = 1;
2990 		} else {
2991 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
2992 				pi, diag);
2993 		}
2994 	}
2995 
2996 	printf("Done\n");
2997 }
2998 
2999 void
3000 attach_port(char *identifier)
3001 {
3002 	portid_t pi;
3003 	struct rte_dev_iterator iterator;
3004 
3005 	printf("Attaching a new port...\n");
3006 
3007 	if (identifier == NULL) {
3008 		fprintf(stderr, "Invalid parameters are specified\n");
3009 		return;
3010 	}
3011 
3012 	if (rte_dev_probe(identifier) < 0) {
3013 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3014 		return;
3015 	}
3016 
3017 	/* first attach mode: event */
3018 	if (setup_on_probe_event) {
3019 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3020 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3021 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3022 					ports[pi].need_setup != 0)
3023 				setup_attached_port(pi);
3024 		return;
3025 	}
3026 
3027 	/* second attach mode: iterator */
3028 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3029 		/* setup ports matching the devargs used for probing */
3030 		if (port_is_forwarding(pi))
3031 			continue; /* port was already attached before */
3032 		setup_attached_port(pi);
3033 	}
3034 }
3035 
3036 static void
3037 setup_attached_port(portid_t pi)
3038 {
3039 	unsigned int socket_id;
3040 	int ret;
3041 
3042 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3043 	/* if socket_id is invalid, set to the first available socket. */
3044 	if (check_socket_id(socket_id) < 0)
3045 		socket_id = socket_ids[0];
3046 	reconfig(pi, socket_id);
3047 	ret = rte_eth_promiscuous_enable(pi);
3048 	if (ret != 0)
3049 		fprintf(stderr,
3050 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3051 			pi, rte_strerror(-ret));
3052 
3053 	ports_ids[nb_ports++] = pi;
3054 	fwd_ports_ids[nb_fwd_ports++] = pi;
3055 	nb_cfg_ports = nb_fwd_ports;
3056 	ports[pi].need_setup = 0;
3057 	ports[pi].port_status = RTE_PORT_STOPPED;
3058 
3059 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3060 	printf("Done\n");
3061 }
3062 
3063 static void
3064 detach_device(struct rte_device *dev)
3065 {
3066 	portid_t sibling;
3067 
3068 	if (dev == NULL) {
3069 		fprintf(stderr, "Device already removed\n");
3070 		return;
3071 	}
3072 
3073 	printf("Removing a device...\n");
3074 
3075 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3076 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3077 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3078 				fprintf(stderr, "Port %u not stopped\n",
3079 					sibling);
3080 				return;
3081 			}
3082 			port_flow_flush(sibling);
3083 		}
3084 	}
3085 
3086 	if (rte_dev_remove(dev) < 0) {
3087 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3088 		return;
3089 	}
3090 	remove_invalid_ports();
3091 
3092 	printf("Device is detached\n");
3093 	printf("Now total ports is %d\n", nb_ports);
3094 	printf("Done\n");
3095 	return;
3096 }
3097 
3098 void
3099 detach_port_device(portid_t port_id)
3100 {
3101 	int ret;
3102 	struct rte_eth_dev_info dev_info;
3103 
3104 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3105 		return;
3106 
3107 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3108 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3109 			fprintf(stderr, "Port not stopped\n");
3110 			return;
3111 		}
3112 		fprintf(stderr, "Port was not closed\n");
3113 	}
3114 
3115 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3116 	if (ret != 0) {
3117 		TESTPMD_LOG(ERR,
3118 			"Failed to get device info for port %d, not detaching\n",
3119 			port_id);
3120 		return;
3121 	}
3122 	detach_device(dev_info.device);
3123 }
3124 
3125 void
3126 detach_devargs(char *identifier)
3127 {
3128 	struct rte_dev_iterator iterator;
3129 	struct rte_devargs da;
3130 	portid_t port_id;
3131 
3132 	printf("Removing a device...\n");
3133 
3134 	memset(&da, 0, sizeof(da));
3135 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3136 		fprintf(stderr, "cannot parse identifier\n");
3137 		return;
3138 	}
3139 
3140 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3141 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3142 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3143 				fprintf(stderr, "Port %u not stopped\n",
3144 					port_id);
3145 				rte_eth_iterator_cleanup(&iterator);
3146 				rte_devargs_reset(&da);
3147 				return;
3148 			}
3149 			port_flow_flush(port_id);
3150 		}
3151 	}
3152 
3153 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3154 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3155 			    da.name, da.bus->name);
3156 		rte_devargs_reset(&da);
3157 		return;
3158 	}
3159 
3160 	remove_invalid_ports();
3161 
3162 	printf("Device %s is detached\n", identifier);
3163 	printf("Now total ports is %d\n", nb_ports);
3164 	printf("Done\n");
3165 	rte_devargs_reset(&da);
3166 }
3167 
3168 void
3169 pmd_test_exit(void)
3170 {
3171 	portid_t pt_id;
3172 	unsigned int i;
3173 	int ret;
3174 
3175 	if (test_done == 0)
3176 		stop_packet_forwarding();
3177 
3178 #ifndef RTE_EXEC_ENV_WINDOWS
3179 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3180 		if (mempools[i]) {
3181 			if (mp_alloc_type == MP_ALLOC_ANON)
3182 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3183 						     NULL);
3184 		}
3185 	}
3186 #endif
3187 	if (ports != NULL) {
3188 		no_link_check = 1;
3189 		RTE_ETH_FOREACH_DEV(pt_id) {
3190 			printf("\nStopping port %d...\n", pt_id);
3191 			fflush(stdout);
3192 			stop_port(pt_id);
3193 		}
3194 		RTE_ETH_FOREACH_DEV(pt_id) {
3195 			printf("\nShutting down port %d...\n", pt_id);
3196 			fflush(stdout);
3197 			close_port(pt_id);
3198 		}
3199 	}
3200 
3201 	if (hot_plug) {
3202 		ret = rte_dev_event_monitor_stop();
3203 		if (ret) {
3204 			RTE_LOG(ERR, EAL,
3205 				"fail to stop device event monitor.");
3206 			return;
3207 		}
3208 
3209 		ret = rte_dev_event_callback_unregister(NULL,
3210 			dev_event_callback, NULL);
3211 		if (ret < 0) {
3212 			RTE_LOG(ERR, EAL,
3213 				"fail to unregister device event callback.\n");
3214 			return;
3215 		}
3216 
3217 		ret = rte_dev_hotplug_handle_disable();
3218 		if (ret) {
3219 			RTE_LOG(ERR, EAL,
3220 				"fail to disable hotplug handling.\n");
3221 			return;
3222 		}
3223 	}
3224 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3225 		if (mempools[i])
3226 			mempool_free_mp(mempools[i]);
3227 	}
3228 
3229 	printf("\nBye...\n");
3230 }
3231 
3232 typedef void (*cmd_func_t)(void);
3233 struct pmd_test_command {
3234 	const char *cmd_name;
3235 	cmd_func_t cmd_func;
3236 };
3237 
3238 /* Check the link status of all ports in up to 9s, and print them finally */
3239 static void
3240 check_all_ports_link_status(uint32_t port_mask)
3241 {
3242 #define CHECK_INTERVAL 100 /* 100ms */
3243 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3244 	portid_t portid;
3245 	uint8_t count, all_ports_up, print_flag = 0;
3246 	struct rte_eth_link link;
3247 	int ret;
3248 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3249 
3250 	printf("Checking link statuses...\n");
3251 	fflush(stdout);
3252 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3253 		all_ports_up = 1;
3254 		RTE_ETH_FOREACH_DEV(portid) {
3255 			if ((port_mask & (1 << portid)) == 0)
3256 				continue;
3257 			memset(&link, 0, sizeof(link));
3258 			ret = rte_eth_link_get_nowait(portid, &link);
3259 			if (ret < 0) {
3260 				all_ports_up = 0;
3261 				if (print_flag == 1)
3262 					fprintf(stderr,
3263 						"Port %u link get failed: %s\n",
3264 						portid, rte_strerror(-ret));
3265 				continue;
3266 			}
3267 			/* print link status if flag set */
3268 			if (print_flag == 1) {
3269 				rte_eth_link_to_str(link_status,
3270 					sizeof(link_status), &link);
3271 				printf("Port %d %s\n", portid, link_status);
3272 				continue;
3273 			}
3274 			/* clear all_ports_up flag if any link down */
3275 			if (link.link_status == ETH_LINK_DOWN) {
3276 				all_ports_up = 0;
3277 				break;
3278 			}
3279 		}
3280 		/* after finally printing all link status, get out */
3281 		if (print_flag == 1)
3282 			break;
3283 
3284 		if (all_ports_up == 0) {
3285 			fflush(stdout);
3286 			rte_delay_ms(CHECK_INTERVAL);
3287 		}
3288 
3289 		/* set the print_flag if all ports up or timeout */
3290 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3291 			print_flag = 1;
3292 		}
3293 
3294 		if (lsc_interrupt)
3295 			break;
3296 	}
3297 }
3298 
3299 static void
3300 rmv_port_callback(void *arg)
3301 {
3302 	int need_to_start = 0;
3303 	int org_no_link_check = no_link_check;
3304 	portid_t port_id = (intptr_t)arg;
3305 	struct rte_eth_dev_info dev_info;
3306 	int ret;
3307 
3308 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3309 
3310 	if (!test_done && port_is_forwarding(port_id)) {
3311 		need_to_start = 1;
3312 		stop_packet_forwarding();
3313 	}
3314 	no_link_check = 1;
3315 	stop_port(port_id);
3316 	no_link_check = org_no_link_check;
3317 
3318 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3319 	if (ret != 0)
3320 		TESTPMD_LOG(ERR,
3321 			"Failed to get device info for port %d, not detaching\n",
3322 			port_id);
3323 	else {
3324 		struct rte_device *device = dev_info.device;
3325 		close_port(port_id);
3326 		detach_device(device); /* might be already removed or have more ports */
3327 	}
3328 	if (need_to_start)
3329 		start_packet_forwarding(0);
3330 }
3331 
3332 /* This function is used by the interrupt thread */
3333 static int
3334 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3335 		  void *ret_param)
3336 {
3337 	RTE_SET_USED(param);
3338 	RTE_SET_USED(ret_param);
3339 
3340 	if (type >= RTE_ETH_EVENT_MAX) {
3341 		fprintf(stderr,
3342 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3343 			port_id, __func__, type);
3344 		fflush(stderr);
3345 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3346 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3347 			eth_event_desc[type]);
3348 		fflush(stdout);
3349 	}
3350 
3351 	switch (type) {
3352 	case RTE_ETH_EVENT_NEW:
3353 		ports[port_id].need_setup = 1;
3354 		ports[port_id].port_status = RTE_PORT_HANDLING;
3355 		break;
3356 	case RTE_ETH_EVENT_INTR_RMV:
3357 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3358 			break;
3359 		if (rte_eal_alarm_set(100000,
3360 				rmv_port_callback, (void *)(intptr_t)port_id))
3361 			fprintf(stderr,
3362 				"Could not set up deferred device removal\n");
3363 		break;
3364 	case RTE_ETH_EVENT_DESTROY:
3365 		ports[port_id].port_status = RTE_PORT_CLOSED;
3366 		printf("Port %u is closed\n", port_id);
3367 		break;
3368 	default:
3369 		break;
3370 	}
3371 	return 0;
3372 }
3373 
3374 static int
3375 register_eth_event_callback(void)
3376 {
3377 	int ret;
3378 	enum rte_eth_event_type event;
3379 
3380 	for (event = RTE_ETH_EVENT_UNKNOWN;
3381 			event < RTE_ETH_EVENT_MAX; event++) {
3382 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3383 				event,
3384 				eth_event_callback,
3385 				NULL);
3386 		if (ret != 0) {
3387 			TESTPMD_LOG(ERR, "Failed to register callback for "
3388 					"%s event\n", eth_event_desc[event]);
3389 			return -1;
3390 		}
3391 	}
3392 
3393 	return 0;
3394 }
3395 
3396 /* This function is used by the interrupt thread */
3397 static void
3398 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3399 			     __rte_unused void *arg)
3400 {
3401 	uint16_t port_id;
3402 	int ret;
3403 
3404 	if (type >= RTE_DEV_EVENT_MAX) {
3405 		fprintf(stderr, "%s called upon invalid event %d\n",
3406 			__func__, type);
3407 		fflush(stderr);
3408 	}
3409 
3410 	switch (type) {
3411 	case RTE_DEV_EVENT_REMOVE:
3412 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3413 			device_name);
3414 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3415 		if (ret) {
3416 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3417 				device_name);
3418 			return;
3419 		}
3420 		/*
3421 		 * Because the user's callback is invoked in eal interrupt
3422 		 * callback, the interrupt callback need to be finished before
3423 		 * it can be unregistered when detaching device. So finish
3424 		 * callback soon and use a deferred removal to detach device
3425 		 * is need. It is a workaround, once the device detaching be
3426 		 * moved into the eal in the future, the deferred removal could
3427 		 * be deleted.
3428 		 */
3429 		if (rte_eal_alarm_set(100000,
3430 				rmv_port_callback, (void *)(intptr_t)port_id))
3431 			RTE_LOG(ERR, EAL,
3432 				"Could not set up deferred device removal\n");
3433 		break;
3434 	case RTE_DEV_EVENT_ADD:
3435 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3436 			device_name);
3437 		/* TODO: After finish kernel driver binding,
3438 		 * begin to attach port.
3439 		 */
3440 		break;
3441 	default:
3442 		break;
3443 	}
3444 }
3445 
3446 static void
3447 rxtx_port_config(struct rte_port *port)
3448 {
3449 	uint16_t qid;
3450 	uint64_t offloads;
3451 
3452 	for (qid = 0; qid < nb_rxq; qid++) {
3453 		offloads = port->rx_conf[qid].offloads;
3454 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3455 		if (offloads != 0)
3456 			port->rx_conf[qid].offloads = offloads;
3457 
3458 		/* Check if any Rx parameters have been passed */
3459 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3460 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3461 
3462 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3463 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3464 
3465 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3466 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3467 
3468 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3469 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3470 
3471 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3472 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3473 
3474 		port->nb_rx_desc[qid] = nb_rxd;
3475 	}
3476 
3477 	for (qid = 0; qid < nb_txq; qid++) {
3478 		offloads = port->tx_conf[qid].offloads;
3479 		port->tx_conf[qid] = port->dev_info.default_txconf;
3480 		if (offloads != 0)
3481 			port->tx_conf[qid].offloads = offloads;
3482 
3483 		/* Check if any Tx parameters have been passed */
3484 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3485 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3486 
3487 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3488 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3489 
3490 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3491 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3492 
3493 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3494 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3495 
3496 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3497 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3498 
3499 		port->nb_tx_desc[qid] = nb_txd;
3500 	}
3501 }
3502 
3503 /*
3504  * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload,
3505  * MTU is also aligned if JUMBO_FRAME offload is not set.
3506  *
3507  * port->dev_info should be set before calling this function.
3508  *
3509  * return 0 on success, negative on error
3510  */
3511 int
3512 update_jumbo_frame_offload(portid_t portid)
3513 {
3514 	struct rte_port *port = &ports[portid];
3515 	uint32_t eth_overhead;
3516 	uint64_t rx_offloads;
3517 	int ret;
3518 	bool on;
3519 
3520 	/* Update the max_rx_pkt_len to have MTU as RTE_ETHER_MTU */
3521 	if (port->dev_info.max_mtu != UINT16_MAX &&
3522 	    port->dev_info.max_rx_pktlen > port->dev_info.max_mtu)
3523 		eth_overhead = port->dev_info.max_rx_pktlen -
3524 				port->dev_info.max_mtu;
3525 	else
3526 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3527 
3528 	rx_offloads = port->dev_conf.rxmode.offloads;
3529 
3530 	/* Default config value is 0 to use PMD specific overhead */
3531 	if (port->dev_conf.rxmode.max_rx_pkt_len == 0)
3532 		port->dev_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MTU + eth_overhead;
3533 
3534 	if (port->dev_conf.rxmode.max_rx_pkt_len <= RTE_ETHER_MTU + eth_overhead) {
3535 		rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3536 		on = false;
3537 	} else {
3538 		if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3539 			fprintf(stderr,
3540 				"Frame size (%u) is not supported by port %u\n",
3541 				port->dev_conf.rxmode.max_rx_pkt_len,
3542 				portid);
3543 			return -1;
3544 		}
3545 		rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3546 		on = true;
3547 	}
3548 
3549 	if (rx_offloads != port->dev_conf.rxmode.offloads) {
3550 		uint16_t qid;
3551 
3552 		port->dev_conf.rxmode.offloads = rx_offloads;
3553 
3554 		/* Apply JUMBO_FRAME offload configuration to Rx queue(s) */
3555 		for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) {
3556 			if (on)
3557 				port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3558 			else
3559 				port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3560 		}
3561 	}
3562 
3563 	/* If JUMBO_FRAME is set MTU conversion done by ethdev layer,
3564 	 * if unset do it here
3565 	 */
3566 	if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3567 		ret = eth_dev_set_mtu_mp(portid,
3568 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead);
3569 		if (ret)
3570 			fprintf(stderr,
3571 				"Failed to set MTU to %u for port %u\n",
3572 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead,
3573 				portid);
3574 	}
3575 
3576 	return 0;
3577 }
3578 
3579 void
3580 init_port_config(void)
3581 {
3582 	portid_t pid;
3583 	struct rte_port *port;
3584 	int ret;
3585 
3586 	RTE_ETH_FOREACH_DEV(pid) {
3587 		port = &ports[pid];
3588 		port->dev_conf.fdir_conf = fdir_conf;
3589 
3590 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3591 		if (ret != 0)
3592 			return;
3593 
3594 		if (nb_rxq > 1) {
3595 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3596 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3597 				rss_hf & port->dev_info.flow_type_rss_offloads;
3598 		} else {
3599 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3600 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3601 		}
3602 
3603 		if (port->dcb_flag == 0) {
3604 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3605 				port->dev_conf.rxmode.mq_mode =
3606 					(enum rte_eth_rx_mq_mode)
3607 						(rx_mq_mode & ETH_MQ_RX_RSS);
3608 			else
3609 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3610 		}
3611 
3612 		rxtx_port_config(port);
3613 
3614 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3615 		if (ret != 0)
3616 			return;
3617 
3618 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3619 		rte_pmd_ixgbe_bypass_init(pid);
3620 #endif
3621 
3622 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3623 			port->dev_conf.intr_conf.lsc = 1;
3624 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3625 			port->dev_conf.intr_conf.rmv = 1;
3626 	}
3627 }
3628 
3629 void set_port_slave_flag(portid_t slave_pid)
3630 {
3631 	struct rte_port *port;
3632 
3633 	port = &ports[slave_pid];
3634 	port->slave_flag = 1;
3635 }
3636 
3637 void clear_port_slave_flag(portid_t slave_pid)
3638 {
3639 	struct rte_port *port;
3640 
3641 	port = &ports[slave_pid];
3642 	port->slave_flag = 0;
3643 }
3644 
3645 uint8_t port_is_bonding_slave(portid_t slave_pid)
3646 {
3647 	struct rte_port *port;
3648 	struct rte_eth_dev_info dev_info;
3649 	int ret;
3650 
3651 	port = &ports[slave_pid];
3652 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3653 	if (ret != 0) {
3654 		TESTPMD_LOG(ERR,
3655 			"Failed to get device info for port id %d,"
3656 			"cannot determine if the port is a bonded slave",
3657 			slave_pid);
3658 		return 0;
3659 	}
3660 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3661 		return 1;
3662 	return 0;
3663 }
3664 
3665 const uint16_t vlan_tags[] = {
3666 		0,  1,  2,  3,  4,  5,  6,  7,
3667 		8,  9, 10, 11,  12, 13, 14, 15,
3668 		16, 17, 18, 19, 20, 21, 22, 23,
3669 		24, 25, 26, 27, 28, 29, 30, 31
3670 };
3671 
3672 static  int
3673 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3674 		 enum dcb_mode_enable dcb_mode,
3675 		 enum rte_eth_nb_tcs num_tcs,
3676 		 uint8_t pfc_en)
3677 {
3678 	uint8_t i;
3679 	int32_t rc;
3680 	struct rte_eth_rss_conf rss_conf;
3681 
3682 	/*
3683 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3684 	 * given above, and the number of traffic classes available for use.
3685 	 */
3686 	if (dcb_mode == DCB_VT_ENABLED) {
3687 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3688 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3689 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3690 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3691 
3692 		/* VMDQ+DCB RX and TX configurations */
3693 		vmdq_rx_conf->enable_default_pool = 0;
3694 		vmdq_rx_conf->default_pool = 0;
3695 		vmdq_rx_conf->nb_queue_pools =
3696 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3697 		vmdq_tx_conf->nb_queue_pools =
3698 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3699 
3700 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3701 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3702 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3703 			vmdq_rx_conf->pool_map[i].pools =
3704 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3705 		}
3706 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3707 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3708 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3709 		}
3710 
3711 		/* set DCB mode of RX and TX of multiple queues */
3712 		eth_conf->rxmode.mq_mode =
3713 				(enum rte_eth_rx_mq_mode)
3714 					(rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3715 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3716 	} else {
3717 		struct rte_eth_dcb_rx_conf *rx_conf =
3718 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3719 		struct rte_eth_dcb_tx_conf *tx_conf =
3720 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3721 
3722 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3723 
3724 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3725 		if (rc != 0)
3726 			return rc;
3727 
3728 		rx_conf->nb_tcs = num_tcs;
3729 		tx_conf->nb_tcs = num_tcs;
3730 
3731 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3732 			rx_conf->dcb_tc[i] = i % num_tcs;
3733 			tx_conf->dcb_tc[i] = i % num_tcs;
3734 		}
3735 
3736 		eth_conf->rxmode.mq_mode =
3737 				(enum rte_eth_rx_mq_mode)
3738 					(rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3739 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3740 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3741 	}
3742 
3743 	if (pfc_en)
3744 		eth_conf->dcb_capability_en =
3745 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3746 	else
3747 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3748 
3749 	return 0;
3750 }
3751 
3752 int
3753 init_port_dcb_config(portid_t pid,
3754 		     enum dcb_mode_enable dcb_mode,
3755 		     enum rte_eth_nb_tcs num_tcs,
3756 		     uint8_t pfc_en)
3757 {
3758 	struct rte_eth_conf port_conf;
3759 	struct rte_port *rte_port;
3760 	int retval;
3761 	uint16_t i;
3762 
3763 	if (num_procs > 1) {
3764 		printf("The multi-process feature doesn't support dcb.\n");
3765 		return -ENOTSUP;
3766 	}
3767 	rte_port = &ports[pid];
3768 
3769 	/* retain the original device configuration. */
3770 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3771 
3772 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3773 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3774 	if (retval < 0)
3775 		return retval;
3776 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3777 
3778 	/* re-configure the device . */
3779 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3780 	if (retval < 0)
3781 		return retval;
3782 
3783 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3784 	if (retval != 0)
3785 		return retval;
3786 
3787 	/* If dev_info.vmdq_pool_base is greater than 0,
3788 	 * the queue id of vmdq pools is started after pf queues.
3789 	 */
3790 	if (dcb_mode == DCB_VT_ENABLED &&
3791 	    rte_port->dev_info.vmdq_pool_base > 0) {
3792 		fprintf(stderr,
3793 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3794 			pid);
3795 		return -1;
3796 	}
3797 
3798 	/* Assume the ports in testpmd have the same dcb capability
3799 	 * and has the same number of rxq and txq in dcb mode
3800 	 */
3801 	if (dcb_mode == DCB_VT_ENABLED) {
3802 		if (rte_port->dev_info.max_vfs > 0) {
3803 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3804 			nb_txq = rte_port->dev_info.nb_tx_queues;
3805 		} else {
3806 			nb_rxq = rte_port->dev_info.max_rx_queues;
3807 			nb_txq = rte_port->dev_info.max_tx_queues;
3808 		}
3809 	} else {
3810 		/*if vt is disabled, use all pf queues */
3811 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3812 			nb_rxq = rte_port->dev_info.max_rx_queues;
3813 			nb_txq = rte_port->dev_info.max_tx_queues;
3814 		} else {
3815 			nb_rxq = (queueid_t)num_tcs;
3816 			nb_txq = (queueid_t)num_tcs;
3817 
3818 		}
3819 	}
3820 	rx_free_thresh = 64;
3821 
3822 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3823 
3824 	rxtx_port_config(rte_port);
3825 	/* VLAN filter */
3826 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3827 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
3828 		rx_vft_set(pid, vlan_tags[i], 1);
3829 
3830 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3831 	if (retval != 0)
3832 		return retval;
3833 
3834 	rte_port->dcb_flag = 1;
3835 
3836 	/* Enter DCB configuration status */
3837 	dcb_config = 1;
3838 
3839 	return 0;
3840 }
3841 
3842 static void
3843 init_port(void)
3844 {
3845 	int i;
3846 
3847 	/* Configuration of Ethernet ports. */
3848 	ports = rte_zmalloc("testpmd: ports",
3849 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3850 			    RTE_CACHE_LINE_SIZE);
3851 	if (ports == NULL) {
3852 		rte_exit(EXIT_FAILURE,
3853 				"rte_zmalloc(%d struct rte_port) failed\n",
3854 				RTE_MAX_ETHPORTS);
3855 	}
3856 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
3857 		LIST_INIT(&ports[i].flow_tunnel_list);
3858 	/* Initialize ports NUMA structures */
3859 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3860 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3861 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3862 }
3863 
3864 static void
3865 force_quit(void)
3866 {
3867 	pmd_test_exit();
3868 	prompt_exit();
3869 }
3870 
3871 static void
3872 print_stats(void)
3873 {
3874 	uint8_t i;
3875 	const char clr[] = { 27, '[', '2', 'J', '\0' };
3876 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3877 
3878 	/* Clear screen and move to top left */
3879 	printf("%s%s", clr, top_left);
3880 
3881 	printf("\nPort statistics ====================================");
3882 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3883 		nic_stats_display(fwd_ports_ids[i]);
3884 
3885 	fflush(stdout);
3886 }
3887 
3888 static void
3889 signal_handler(int signum)
3890 {
3891 	if (signum == SIGINT || signum == SIGTERM) {
3892 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
3893 			signum);
3894 #ifdef RTE_LIB_PDUMP
3895 		/* uninitialize packet capture framework */
3896 		rte_pdump_uninit();
3897 #endif
3898 #ifdef RTE_LIB_LATENCYSTATS
3899 		if (latencystats_enabled != 0)
3900 			rte_latencystats_uninit();
3901 #endif
3902 		force_quit();
3903 		/* Set flag to indicate the force termination. */
3904 		f_quit = 1;
3905 		/* exit with the expected status */
3906 #ifndef RTE_EXEC_ENV_WINDOWS
3907 		signal(signum, SIG_DFL);
3908 		kill(getpid(), signum);
3909 #endif
3910 	}
3911 }
3912 
3913 int
3914 main(int argc, char** argv)
3915 {
3916 	int diag;
3917 	portid_t port_id;
3918 	uint16_t count;
3919 	int ret;
3920 
3921 	signal(SIGINT, signal_handler);
3922 	signal(SIGTERM, signal_handler);
3923 
3924 	testpmd_logtype = rte_log_register("testpmd");
3925 	if (testpmd_logtype < 0)
3926 		rte_exit(EXIT_FAILURE, "Cannot register log type");
3927 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3928 
3929 	diag = rte_eal_init(argc, argv);
3930 	if (diag < 0)
3931 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3932 			 rte_strerror(rte_errno));
3933 
3934 	ret = register_eth_event_callback();
3935 	if (ret != 0)
3936 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3937 
3938 #ifdef RTE_LIB_PDUMP
3939 	/* initialize packet capture framework */
3940 	rte_pdump_init();
3941 #endif
3942 
3943 	count = 0;
3944 	RTE_ETH_FOREACH_DEV(port_id) {
3945 		ports_ids[count] = port_id;
3946 		count++;
3947 	}
3948 	nb_ports = (portid_t) count;
3949 	if (nb_ports == 0)
3950 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3951 
3952 	/* allocate port structures, and init them */
3953 	init_port();
3954 
3955 	set_def_fwd_config();
3956 	if (nb_lcores == 0)
3957 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3958 			 "Check the core mask argument\n");
3959 
3960 	/* Bitrate/latency stats disabled by default */
3961 #ifdef RTE_LIB_BITRATESTATS
3962 	bitrate_enabled = 0;
3963 #endif
3964 #ifdef RTE_LIB_LATENCYSTATS
3965 	latencystats_enabled = 0;
3966 #endif
3967 
3968 	/* on FreeBSD, mlockall() is disabled by default */
3969 #ifdef RTE_EXEC_ENV_FREEBSD
3970 	do_mlockall = 0;
3971 #else
3972 	do_mlockall = 1;
3973 #endif
3974 
3975 	argc -= diag;
3976 	argv += diag;
3977 	if (argc > 1)
3978 		launch_args_parse(argc, argv);
3979 
3980 #ifndef RTE_EXEC_ENV_WINDOWS
3981 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3982 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3983 			strerror(errno));
3984 	}
3985 #endif
3986 
3987 	if (tx_first && interactive)
3988 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3989 				"interactive mode.\n");
3990 
3991 	if (tx_first && lsc_interrupt) {
3992 		fprintf(stderr,
3993 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
3994 		lsc_interrupt = 0;
3995 	}
3996 
3997 	if (!nb_rxq && !nb_txq)
3998 		fprintf(stderr,
3999 			"Warning: Either rx or tx queues should be non-zero\n");
4000 
4001 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4002 		fprintf(stderr,
4003 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4004 			nb_rxq, nb_txq);
4005 
4006 	init_config();
4007 
4008 	if (hot_plug) {
4009 		ret = rte_dev_hotplug_handle_enable();
4010 		if (ret) {
4011 			RTE_LOG(ERR, EAL,
4012 				"fail to enable hotplug handling.");
4013 			return -1;
4014 		}
4015 
4016 		ret = rte_dev_event_monitor_start();
4017 		if (ret) {
4018 			RTE_LOG(ERR, EAL,
4019 				"fail to start device event monitoring.");
4020 			return -1;
4021 		}
4022 
4023 		ret = rte_dev_event_callback_register(NULL,
4024 			dev_event_callback, NULL);
4025 		if (ret) {
4026 			RTE_LOG(ERR, EAL,
4027 				"fail  to register device event callback\n");
4028 			return -1;
4029 		}
4030 	}
4031 
4032 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4033 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4034 
4035 	/* set all ports to promiscuous mode by default */
4036 	RTE_ETH_FOREACH_DEV(port_id) {
4037 		ret = rte_eth_promiscuous_enable(port_id);
4038 		if (ret != 0)
4039 			fprintf(stderr,
4040 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4041 				port_id, rte_strerror(-ret));
4042 	}
4043 
4044 	/* Init metrics library */
4045 	rte_metrics_init(rte_socket_id());
4046 
4047 #ifdef RTE_LIB_LATENCYSTATS
4048 	if (latencystats_enabled != 0) {
4049 		int ret = rte_latencystats_init(1, NULL);
4050 		if (ret)
4051 			fprintf(stderr,
4052 				"Warning: latencystats init() returned error %d\n",
4053 				ret);
4054 		fprintf(stderr, "Latencystats running on lcore %d\n",
4055 			latencystats_lcore_id);
4056 	}
4057 #endif
4058 
4059 	/* Setup bitrate stats */
4060 #ifdef RTE_LIB_BITRATESTATS
4061 	if (bitrate_enabled != 0) {
4062 		bitrate_data = rte_stats_bitrate_create();
4063 		if (bitrate_data == NULL)
4064 			rte_exit(EXIT_FAILURE,
4065 				"Could not allocate bitrate data.\n");
4066 		rte_stats_bitrate_reg(bitrate_data);
4067 	}
4068 #endif
4069 
4070 #ifdef RTE_LIB_CMDLINE
4071 	if (strlen(cmdline_filename) != 0)
4072 		cmdline_read_from_file(cmdline_filename);
4073 
4074 	if (interactive == 1) {
4075 		if (auto_start) {
4076 			printf("Start automatic packet forwarding\n");
4077 			start_packet_forwarding(0);
4078 		}
4079 		prompt();
4080 		pmd_test_exit();
4081 	} else
4082 #endif
4083 	{
4084 		char c;
4085 		int rc;
4086 
4087 		f_quit = 0;
4088 
4089 		printf("No commandline core given, start packet forwarding\n");
4090 		start_packet_forwarding(tx_first);
4091 		if (stats_period != 0) {
4092 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4093 			uint64_t timer_period;
4094 
4095 			/* Convert to number of cycles */
4096 			timer_period = stats_period * rte_get_timer_hz();
4097 
4098 			while (f_quit == 0) {
4099 				cur_time = rte_get_timer_cycles();
4100 				diff_time += cur_time - prev_time;
4101 
4102 				if (diff_time >= timer_period) {
4103 					print_stats();
4104 					/* Reset the timer */
4105 					diff_time = 0;
4106 				}
4107 				/* Sleep to avoid unnecessary checks */
4108 				prev_time = cur_time;
4109 				rte_delay_us_sleep(US_PER_S);
4110 			}
4111 		}
4112 
4113 		printf("Press enter to exit\n");
4114 		rc = read(0, &c, 1);
4115 		pmd_test_exit();
4116 		if (rc < 0)
4117 			return 1;
4118 	}
4119 
4120 	ret = rte_eal_cleanup();
4121 	if (ret != 0)
4122 		rte_exit(EXIT_FAILURE,
4123 			 "EAL cleanup failed: %s\n", strerror(-ret));
4124 
4125 	return EXIT_SUCCESS;
4126 }
4127