xref: /dpdk/app/test-pmd/testpmd.c (revision 6970401e97c3e012c2fd646a12cd7e716d523d23)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_branch_prediction.h>
40 #include <rte_mempool.h>
41 #include <rte_malloc.h>
42 #include <rte_mbuf.h>
43 #include <rte_mbuf_pool_ops.h>
44 #include <rte_interrupts.h>
45 #include <rte_pci.h>
46 #include <rte_ether.h>
47 #include <rte_ethdev.h>
48 #include <rte_dev.h>
49 #include <rte_string_fns.h>
50 #ifdef RTE_NET_IXGBE
51 #include <rte_pmd_ixgbe.h>
52 #endif
53 #ifdef RTE_LIB_PDUMP
54 #include <rte_pdump.h>
55 #endif
56 #include <rte_flow.h>
57 #include <rte_metrics.h>
58 #ifdef RTE_LIB_BITRATESTATS
59 #include <rte_bitrate.h>
60 #endif
61 #ifdef RTE_LIB_LATENCYSTATS
62 #include <rte_latencystats.h>
63 #endif
64 #ifdef RTE_EXEC_ENV_WINDOWS
65 #include <process.h>
66 #endif
67 
68 #include "testpmd.h"
69 
70 #ifndef MAP_HUGETLB
71 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
72 #define HUGE_FLAG (0x40000)
73 #else
74 #define HUGE_FLAG MAP_HUGETLB
75 #endif
76 
77 #ifndef MAP_HUGE_SHIFT
78 /* older kernels (or FreeBSD) will not have this define */
79 #define HUGE_SHIFT (26)
80 #else
81 #define HUGE_SHIFT MAP_HUGE_SHIFT
82 #endif
83 
84 #define EXTMEM_HEAP_NAME "extmem"
85 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
86 
87 uint16_t verbose_level = 0; /**< Silent by default. */
88 int testpmd_logtype; /**< Log type for testpmd logs */
89 
90 /* use main core for command line ? */
91 uint8_t interactive = 0;
92 uint8_t auto_start = 0;
93 uint8_t tx_first;
94 char cmdline_filename[PATH_MAX] = {0};
95 
96 /*
97  * NUMA support configuration.
98  * When set, the NUMA support attempts to dispatch the allocation of the
99  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
100  * probed ports among the CPU sockets 0 and 1.
101  * Otherwise, all memory is allocated from CPU socket 0.
102  */
103 uint8_t numa_support = 1; /**< numa enabled by default */
104 
105 /*
106  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
107  * not configured.
108  */
109 uint8_t socket_num = UMA_NO_CONFIG;
110 
111 /*
112  * Select mempool allocation type:
113  * - native: use regular DPDK memory
114  * - anon: use regular DPDK memory to create mempool, but populate using
115  *         anonymous memory (may not be IOVA-contiguous)
116  * - xmem: use externally allocated hugepage memory
117  */
118 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
119 
120 /*
121  * Store specified sockets on which memory pool to be used by ports
122  * is allocated.
123  */
124 uint8_t port_numa[RTE_MAX_ETHPORTS];
125 
126 /*
127  * Store specified sockets on which RX ring to be used by ports
128  * is allocated.
129  */
130 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
131 
132 /*
133  * Store specified sockets on which TX ring to be used by ports
134  * is allocated.
135  */
136 uint8_t txring_numa[RTE_MAX_ETHPORTS];
137 
138 /*
139  * Record the Ethernet address of peer target ports to which packets are
140  * forwarded.
141  * Must be instantiated with the ethernet addresses of peer traffic generator
142  * ports.
143  */
144 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
145 portid_t nb_peer_eth_addrs = 0;
146 
147 /*
148  * Probed Target Environment.
149  */
150 struct rte_port *ports;	       /**< For all probed ethernet ports. */
151 portid_t nb_ports;             /**< Number of probed ethernet ports. */
152 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
153 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
154 
155 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
156 
157 /*
158  * Test Forwarding Configuration.
159  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
160  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
161  */
162 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
163 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
164 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
165 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
166 
167 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
168 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
169 
170 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
171 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
172 
173 /*
174  * Forwarding engines.
175  */
176 struct fwd_engine * fwd_engines[] = {
177 	&io_fwd_engine,
178 	&mac_fwd_engine,
179 	&mac_swap_engine,
180 	&flow_gen_engine,
181 	&rx_only_engine,
182 	&tx_only_engine,
183 	&csum_fwd_engine,
184 	&icmp_echo_engine,
185 	&noisy_vnf_engine,
186 	&five_tuple_swap_fwd_engine,
187 #ifdef RTE_LIBRTE_IEEE1588
188 	&ieee1588_fwd_engine,
189 #endif
190 	&shared_rxq_engine,
191 	NULL,
192 };
193 
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196 
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202 
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205 	DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210 
211 /** Extended statistics to show. */
212 struct rte_eth_xstat_name *xstats_display;
213 
214 unsigned int xstats_display_num; /**< Size of extended statistics to show */
215 
216 /*
217  * In container, it cannot terminate the process which running with 'stats-period'
218  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
219  */
220 uint8_t f_quit;
221 
222 /*
223  * Max Rx frame size, set by '--max-pkt-len' parameter.
224  */
225 uint32_t max_rx_pkt_len;
226 
227 /*
228  * Configuration of packet segments used to scatter received packets
229  * if some of split features is configured.
230  */
231 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
232 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
233 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
234 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
235 
236 /*
237  * Configuration of packet segments used by the "txonly" processing engine.
238  */
239 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
240 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
241 	TXONLY_DEF_PACKET_LEN,
242 };
243 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
244 
245 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
246 /**< Split policy for packets to TX. */
247 
248 uint8_t txonly_multi_flow;
249 /**< Whether multiple flows are generated in TXONLY mode. */
250 
251 uint32_t tx_pkt_times_inter;
252 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
253 
254 uint32_t tx_pkt_times_intra;
255 /**< Timings for send scheduling in TXONLY mode, time between packets. */
256 
257 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
258 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
259 int nb_flows_flowgen = 1024; /**< Number of flows in flowgen mode. */
260 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
261 
262 /* current configuration is in DCB or not,0 means it is not in DCB mode */
263 uint8_t dcb_config = 0;
264 
265 /*
266  * Configurable number of RX/TX queues.
267  */
268 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
269 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
270 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
271 
272 /*
273  * Configurable number of RX/TX ring descriptors.
274  * Defaults are supplied by drivers via ethdev.
275  */
276 #define RTE_TEST_RX_DESC_DEFAULT 0
277 #define RTE_TEST_TX_DESC_DEFAULT 0
278 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
279 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
280 
281 #define RTE_PMD_PARAM_UNSET -1
282 /*
283  * Configurable values of RX and TX ring threshold registers.
284  */
285 
286 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
287 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
288 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
289 
290 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
291 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
292 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
293 
294 /*
295  * Configurable value of RX free threshold.
296  */
297 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
298 
299 /*
300  * Configurable value of RX drop enable.
301  */
302 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
303 
304 /*
305  * Configurable value of TX free threshold.
306  */
307 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
308 
309 /*
310  * Configurable value of TX RS bit threshold.
311  */
312 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
313 
314 /*
315  * Configurable value of buffered packets before sending.
316  */
317 uint16_t noisy_tx_sw_bufsz;
318 
319 /*
320  * Configurable value of packet buffer timeout.
321  */
322 uint16_t noisy_tx_sw_buf_flush_time;
323 
324 /*
325  * Configurable value for size of VNF internal memory area
326  * used for simulating noisy neighbour behaviour
327  */
328 uint64_t noisy_lkup_mem_sz;
329 
330 /*
331  * Configurable value of number of random writes done in
332  * VNF simulation memory area.
333  */
334 uint64_t noisy_lkup_num_writes;
335 
336 /*
337  * Configurable value of number of random reads done in
338  * VNF simulation memory area.
339  */
340 uint64_t noisy_lkup_num_reads;
341 
342 /*
343  * Configurable value of number of random reads/writes done in
344  * VNF simulation memory area.
345  */
346 uint64_t noisy_lkup_num_reads_writes;
347 
348 /*
349  * Receive Side Scaling (RSS) configuration.
350  */
351 uint64_t rss_hf = RTE_ETH_RSS_IP; /* RSS IP by default. */
352 
353 /*
354  * Port topology configuration
355  */
356 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
357 
358 /*
359  * Avoids to flush all the RX streams before starts forwarding.
360  */
361 uint8_t no_flush_rx = 0; /* flush by default */
362 
363 /*
364  * Flow API isolated mode.
365  */
366 uint8_t flow_isolate_all;
367 
368 /*
369  * Avoids to check link status when starting/stopping a port.
370  */
371 uint8_t no_link_check = 0; /* check by default */
372 
373 /*
374  * Don't automatically start all ports in interactive mode.
375  */
376 uint8_t no_device_start = 0;
377 
378 /*
379  * Enable link status change notification
380  */
381 uint8_t lsc_interrupt = 1; /* enabled by default */
382 
383 /*
384  * Enable device removal notification.
385  */
386 uint8_t rmv_interrupt = 1; /* enabled by default */
387 
388 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
389 
390 /* After attach, port setup is called on event or by iterator */
391 bool setup_on_probe_event = true;
392 
393 /* Clear ptypes on port initialization. */
394 uint8_t clear_ptypes = true;
395 
396 /* Hairpin ports configuration mode. */
397 uint16_t hairpin_mode;
398 
399 /* Pretty printing of ethdev events */
400 static const char * const eth_event_desc[] = {
401 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
402 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
403 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
404 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
405 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
406 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
407 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
408 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
409 	[RTE_ETH_EVENT_NEW] = "device probed",
410 	[RTE_ETH_EVENT_DESTROY] = "device released",
411 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
412 	[RTE_ETH_EVENT_MAX] = NULL,
413 };
414 
415 /*
416  * Display or mask ether events
417  * Default to all events except VF_MBOX
418  */
419 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
420 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
421 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
422 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
423 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
424 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
425 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
426 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
427 /*
428  * Decide if all memory are locked for performance.
429  */
430 int do_mlockall = 0;
431 
432 /*
433  * NIC bypass mode configuration options.
434  */
435 
436 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
437 /* The NIC bypass watchdog timeout. */
438 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
439 #endif
440 
441 
442 #ifdef RTE_LIB_LATENCYSTATS
443 
444 /*
445  * Set when latency stats is enabled in the commandline
446  */
447 uint8_t latencystats_enabled;
448 
449 /*
450  * Lcore ID to serive latency statistics.
451  */
452 lcoreid_t latencystats_lcore_id = -1;
453 
454 #endif
455 
456 /*
457  * Ethernet device configuration.
458  */
459 struct rte_eth_rxmode rx_mode;
460 
461 struct rte_eth_txmode tx_mode = {
462 	.offloads = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
463 };
464 
465 struct rte_eth_fdir_conf fdir_conf = {
466 	.mode = RTE_FDIR_MODE_NONE,
467 	.pballoc = RTE_ETH_FDIR_PBALLOC_64K,
468 	.status = RTE_FDIR_REPORT_STATUS,
469 	.mask = {
470 		.vlan_tci_mask = 0xFFEF,
471 		.ipv4_mask     = {
472 			.src_ip = 0xFFFFFFFF,
473 			.dst_ip = 0xFFFFFFFF,
474 		},
475 		.ipv6_mask     = {
476 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
477 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
478 		},
479 		.src_port_mask = 0xFFFF,
480 		.dst_port_mask = 0xFFFF,
481 		.mac_addr_byte_mask = 0xFF,
482 		.tunnel_type_mask = 1,
483 		.tunnel_id_mask = 0xFFFFFFFF,
484 	},
485 	.drop_queue = 127,
486 };
487 
488 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
489 
490 /*
491  * Display zero values by default for xstats
492  */
493 uint8_t xstats_hide_zero;
494 
495 /*
496  * Measure of CPU cycles disabled by default
497  */
498 uint8_t record_core_cycles;
499 
500 /*
501  * Display of RX and TX bursts disabled by default
502  */
503 uint8_t record_burst_stats;
504 
505 /*
506  * Number of ports per shared Rx queue group, 0 disable.
507  */
508 uint32_t rxq_share;
509 
510 unsigned int num_sockets = 0;
511 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
512 
513 #ifdef RTE_LIB_BITRATESTATS
514 /* Bitrate statistics */
515 struct rte_stats_bitrates *bitrate_data;
516 lcoreid_t bitrate_lcore_id;
517 uint8_t bitrate_enabled;
518 #endif
519 
520 #ifdef RTE_LIB_GRO
521 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
522 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
523 #endif
524 
525 /*
526  * hexadecimal bitmask of RX mq mode can be enabled.
527  */
528 enum rte_eth_rx_mq_mode rx_mq_mode = RTE_ETH_MQ_RX_VMDQ_DCB_RSS;
529 
530 /*
531  * Used to set forced link speed
532  */
533 uint32_t eth_link_speed;
534 
535 /*
536  * ID of the current process in multi-process, used to
537  * configure the queues to be polled.
538  */
539 int proc_id;
540 
541 /*
542  * Number of processes in multi-process, used to
543  * configure the queues to be polled.
544  */
545 unsigned int num_procs = 1;
546 
547 static void
548 eth_rx_metadata_negotiate_mp(uint16_t port_id)
549 {
550 	uint64_t rx_meta_features = 0;
551 	int ret;
552 
553 	if (!is_proc_primary())
554 		return;
555 
556 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_FLAG;
557 	rx_meta_features |= RTE_ETH_RX_METADATA_USER_MARK;
558 	rx_meta_features |= RTE_ETH_RX_METADATA_TUNNEL_ID;
559 
560 	ret = rte_eth_rx_metadata_negotiate(port_id, &rx_meta_features);
561 	if (ret == 0) {
562 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_FLAG)) {
563 			TESTPMD_LOG(DEBUG, "Flow action FLAG will not affect Rx mbufs on port %u\n",
564 				    port_id);
565 		}
566 
567 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_USER_MARK)) {
568 			TESTPMD_LOG(DEBUG, "Flow action MARK will not affect Rx mbufs on port %u\n",
569 				    port_id);
570 		}
571 
572 		if (!(rx_meta_features & RTE_ETH_RX_METADATA_TUNNEL_ID)) {
573 			TESTPMD_LOG(DEBUG, "Flow tunnel offload support might be limited or unavailable on port %u\n",
574 				    port_id);
575 		}
576 	} else if (ret != -ENOTSUP) {
577 		rte_exit(EXIT_FAILURE, "Error when negotiating Rx meta features on port %u: %s\n",
578 			 port_id, rte_strerror(-ret));
579 	}
580 }
581 
582 static void
583 flow_pick_transfer_proxy_mp(uint16_t port_id)
584 {
585 	struct rte_port *port = &ports[port_id];
586 	int ret;
587 
588 	port->flow_transfer_proxy = port_id;
589 
590 	if (!is_proc_primary())
591 		return;
592 
593 	ret = rte_flow_pick_transfer_proxy(port_id, &port->flow_transfer_proxy,
594 					   NULL);
595 	if (ret != 0) {
596 		fprintf(stderr, "Error picking flow transfer proxy for port %u: %s - ignore\n",
597 			port_id, rte_strerror(-ret));
598 	}
599 }
600 
601 static int
602 eth_dev_configure_mp(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
603 		      const struct rte_eth_conf *dev_conf)
604 {
605 	if (is_proc_primary())
606 		return rte_eth_dev_configure(port_id, nb_rx_q, nb_tx_q,
607 					dev_conf);
608 	return 0;
609 }
610 
611 static int
612 eth_dev_start_mp(uint16_t port_id)
613 {
614 	if (is_proc_primary())
615 		return rte_eth_dev_start(port_id);
616 
617 	return 0;
618 }
619 
620 static int
621 eth_dev_stop_mp(uint16_t port_id)
622 {
623 	if (is_proc_primary())
624 		return rte_eth_dev_stop(port_id);
625 
626 	return 0;
627 }
628 
629 static void
630 mempool_free_mp(struct rte_mempool *mp)
631 {
632 	if (is_proc_primary())
633 		rte_mempool_free(mp);
634 }
635 
636 static int
637 eth_dev_set_mtu_mp(uint16_t port_id, uint16_t mtu)
638 {
639 	if (is_proc_primary())
640 		return rte_eth_dev_set_mtu(port_id, mtu);
641 
642 	return 0;
643 }
644 
645 /* Forward function declarations */
646 static void setup_attached_port(portid_t pi);
647 static void check_all_ports_link_status(uint32_t port_mask);
648 static int eth_event_callback(portid_t port_id,
649 			      enum rte_eth_event_type type,
650 			      void *param, void *ret_param);
651 static void dev_event_callback(const char *device_name,
652 				enum rte_dev_event_type type,
653 				void *param);
654 static void fill_xstats_display_info(void);
655 
656 /*
657  * Check if all the ports are started.
658  * If yes, return positive value. If not, return zero.
659  */
660 static int all_ports_started(void);
661 
662 #ifdef RTE_LIB_GSO
663 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
664 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
665 #endif
666 
667 /* Holds the registered mbuf dynamic flags names. */
668 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
669 
670 
671 /*
672  * Helper function to check if socket is already discovered.
673  * If yes, return positive value. If not, return zero.
674  */
675 int
676 new_socket_id(unsigned int socket_id)
677 {
678 	unsigned int i;
679 
680 	for (i = 0; i < num_sockets; i++) {
681 		if (socket_ids[i] == socket_id)
682 			return 0;
683 	}
684 	return 1;
685 }
686 
687 /*
688  * Setup default configuration.
689  */
690 static void
691 set_default_fwd_lcores_config(void)
692 {
693 	unsigned int i;
694 	unsigned int nb_lc;
695 	unsigned int sock_num;
696 
697 	nb_lc = 0;
698 	for (i = 0; i < RTE_MAX_LCORE; i++) {
699 		if (!rte_lcore_is_enabled(i))
700 			continue;
701 		sock_num = rte_lcore_to_socket_id(i);
702 		if (new_socket_id(sock_num)) {
703 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
704 				rte_exit(EXIT_FAILURE,
705 					 "Total sockets greater than %u\n",
706 					 RTE_MAX_NUMA_NODES);
707 			}
708 			socket_ids[num_sockets++] = sock_num;
709 		}
710 		if (i == rte_get_main_lcore())
711 			continue;
712 		fwd_lcores_cpuids[nb_lc++] = i;
713 	}
714 	nb_lcores = (lcoreid_t) nb_lc;
715 	nb_cfg_lcores = nb_lcores;
716 	nb_fwd_lcores = 1;
717 }
718 
719 static void
720 set_def_peer_eth_addrs(void)
721 {
722 	portid_t i;
723 
724 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
725 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
726 		peer_eth_addrs[i].addr_bytes[5] = i;
727 	}
728 }
729 
730 static void
731 set_default_fwd_ports_config(void)
732 {
733 	portid_t pt_id;
734 	int i = 0;
735 
736 	RTE_ETH_FOREACH_DEV(pt_id) {
737 		fwd_ports_ids[i++] = pt_id;
738 
739 		/* Update sockets info according to the attached device */
740 		int socket_id = rte_eth_dev_socket_id(pt_id);
741 		if (socket_id >= 0 && new_socket_id(socket_id)) {
742 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
743 				rte_exit(EXIT_FAILURE,
744 					 "Total sockets greater than %u\n",
745 					 RTE_MAX_NUMA_NODES);
746 			}
747 			socket_ids[num_sockets++] = socket_id;
748 		}
749 	}
750 
751 	nb_cfg_ports = nb_ports;
752 	nb_fwd_ports = nb_ports;
753 }
754 
755 void
756 set_def_fwd_config(void)
757 {
758 	set_default_fwd_lcores_config();
759 	set_def_peer_eth_addrs();
760 	set_default_fwd_ports_config();
761 }
762 
763 #ifndef RTE_EXEC_ENV_WINDOWS
764 /* extremely pessimistic estimation of memory required to create a mempool */
765 static int
766 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
767 {
768 	unsigned int n_pages, mbuf_per_pg, leftover;
769 	uint64_t total_mem, mbuf_mem, obj_sz;
770 
771 	/* there is no good way to predict how much space the mempool will
772 	 * occupy because it will allocate chunks on the fly, and some of those
773 	 * will come from default DPDK memory while some will come from our
774 	 * external memory, so just assume 128MB will be enough for everyone.
775 	 */
776 	uint64_t hdr_mem = 128 << 20;
777 
778 	/* account for possible non-contiguousness */
779 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
780 	if (obj_sz > pgsz) {
781 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
782 		return -1;
783 	}
784 
785 	mbuf_per_pg = pgsz / obj_sz;
786 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
787 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
788 
789 	mbuf_mem = n_pages * pgsz;
790 
791 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
792 
793 	if (total_mem > SIZE_MAX) {
794 		TESTPMD_LOG(ERR, "Memory size too big\n");
795 		return -1;
796 	}
797 	*out = (size_t)total_mem;
798 
799 	return 0;
800 }
801 
802 static int
803 pagesz_flags(uint64_t page_sz)
804 {
805 	/* as per mmap() manpage, all page sizes are log2 of page size
806 	 * shifted by MAP_HUGE_SHIFT
807 	 */
808 	int log2 = rte_log2_u64(page_sz);
809 
810 	return (log2 << HUGE_SHIFT);
811 }
812 
813 static void *
814 alloc_mem(size_t memsz, size_t pgsz, bool huge)
815 {
816 	void *addr;
817 	int flags;
818 
819 	/* allocate anonymous hugepages */
820 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
821 	if (huge)
822 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
823 
824 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
825 	if (addr == MAP_FAILED)
826 		return NULL;
827 
828 	return addr;
829 }
830 
831 struct extmem_param {
832 	void *addr;
833 	size_t len;
834 	size_t pgsz;
835 	rte_iova_t *iova_table;
836 	unsigned int iova_table_len;
837 };
838 
839 static int
840 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
841 		bool huge)
842 {
843 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
844 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
845 	unsigned int cur_page, n_pages, pgsz_idx;
846 	size_t mem_sz, cur_pgsz;
847 	rte_iova_t *iovas = NULL;
848 	void *addr;
849 	int ret;
850 
851 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
852 		/* skip anything that is too big */
853 		if (pgsizes[pgsz_idx] > SIZE_MAX)
854 			continue;
855 
856 		cur_pgsz = pgsizes[pgsz_idx];
857 
858 		/* if we were told not to allocate hugepages, override */
859 		if (!huge)
860 			cur_pgsz = sysconf(_SC_PAGESIZE);
861 
862 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
863 		if (ret < 0) {
864 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
865 			return -1;
866 		}
867 
868 		/* allocate our memory */
869 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
870 
871 		/* if we couldn't allocate memory with a specified page size,
872 		 * that doesn't mean we can't do it with other page sizes, so
873 		 * try another one.
874 		 */
875 		if (addr == NULL)
876 			continue;
877 
878 		/* store IOVA addresses for every page in this memory area */
879 		n_pages = mem_sz / cur_pgsz;
880 
881 		iovas = malloc(sizeof(*iovas) * n_pages);
882 
883 		if (iovas == NULL) {
884 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
885 			goto fail;
886 		}
887 		/* lock memory if it's not huge pages */
888 		if (!huge)
889 			mlock(addr, mem_sz);
890 
891 		/* populate IOVA addresses */
892 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
893 			rte_iova_t iova;
894 			size_t offset;
895 			void *cur;
896 
897 			offset = cur_pgsz * cur_page;
898 			cur = RTE_PTR_ADD(addr, offset);
899 
900 			/* touch the page before getting its IOVA */
901 			*(volatile char *)cur = 0;
902 
903 			iova = rte_mem_virt2iova(cur);
904 
905 			iovas[cur_page] = iova;
906 		}
907 
908 		break;
909 	}
910 	/* if we couldn't allocate anything */
911 	if (iovas == NULL)
912 		return -1;
913 
914 	param->addr = addr;
915 	param->len = mem_sz;
916 	param->pgsz = cur_pgsz;
917 	param->iova_table = iovas;
918 	param->iova_table_len = n_pages;
919 
920 	return 0;
921 fail:
922 	if (iovas)
923 		free(iovas);
924 	if (addr)
925 		munmap(addr, mem_sz);
926 
927 	return -1;
928 }
929 
930 static int
931 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
932 {
933 	struct extmem_param param;
934 	int socket_id, ret;
935 
936 	memset(&param, 0, sizeof(param));
937 
938 	/* check if our heap exists */
939 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
940 	if (socket_id < 0) {
941 		/* create our heap */
942 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
943 		if (ret < 0) {
944 			TESTPMD_LOG(ERR, "Cannot create heap\n");
945 			return -1;
946 		}
947 	}
948 
949 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
950 	if (ret < 0) {
951 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
952 		return -1;
953 	}
954 
955 	/* we now have a valid memory area, so add it to heap */
956 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
957 			param.addr, param.len, param.iova_table,
958 			param.iova_table_len, param.pgsz);
959 
960 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
961 
962 	/* not needed any more */
963 	free(param.iova_table);
964 
965 	if (ret < 0) {
966 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
967 		munmap(param.addr, param.len);
968 		return -1;
969 	}
970 
971 	/* success */
972 
973 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
974 			param.len >> 20);
975 
976 	return 0;
977 }
978 static void
979 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
980 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
981 {
982 	uint16_t pid = 0;
983 	int ret;
984 
985 	RTE_ETH_FOREACH_DEV(pid) {
986 		struct rte_eth_dev_info dev_info;
987 
988 		ret = eth_dev_info_get_print_err(pid, &dev_info);
989 		if (ret != 0) {
990 			TESTPMD_LOG(DEBUG,
991 				    "unable to get device info for port %d on addr 0x%p,"
992 				    "mempool unmapping will not be performed\n",
993 				    pid, memhdr->addr);
994 			continue;
995 		}
996 
997 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
998 		if (ret) {
999 			TESTPMD_LOG(DEBUG,
1000 				    "unable to DMA unmap addr 0x%p "
1001 				    "for device %s\n",
1002 				    memhdr->addr, dev_info.device->name);
1003 		}
1004 	}
1005 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
1006 	if (ret) {
1007 		TESTPMD_LOG(DEBUG,
1008 			    "unable to un-register addr 0x%p\n", memhdr->addr);
1009 	}
1010 }
1011 
1012 static void
1013 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
1014 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
1015 {
1016 	uint16_t pid = 0;
1017 	size_t page_size = sysconf(_SC_PAGESIZE);
1018 	int ret;
1019 
1020 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
1021 				  page_size);
1022 	if (ret) {
1023 		TESTPMD_LOG(DEBUG,
1024 			    "unable to register addr 0x%p\n", memhdr->addr);
1025 		return;
1026 	}
1027 	RTE_ETH_FOREACH_DEV(pid) {
1028 		struct rte_eth_dev_info dev_info;
1029 
1030 		ret = eth_dev_info_get_print_err(pid, &dev_info);
1031 		if (ret != 0) {
1032 			TESTPMD_LOG(DEBUG,
1033 				    "unable to get device info for port %d on addr 0x%p,"
1034 				    "mempool mapping will not be performed\n",
1035 				    pid, memhdr->addr);
1036 			continue;
1037 		}
1038 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
1039 		if (ret) {
1040 			TESTPMD_LOG(DEBUG,
1041 				    "unable to DMA map addr 0x%p "
1042 				    "for device %s\n",
1043 				    memhdr->addr, dev_info.device->name);
1044 		}
1045 	}
1046 }
1047 #endif
1048 
1049 static unsigned int
1050 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
1051 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
1052 {
1053 	struct rte_pktmbuf_extmem *xmem;
1054 	unsigned int ext_num, zone_num, elt_num;
1055 	uint16_t elt_size;
1056 
1057 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
1058 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
1059 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
1060 
1061 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
1062 	if (xmem == NULL) {
1063 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
1064 				 "external buffer descriptors\n");
1065 		*ext_mem = NULL;
1066 		return 0;
1067 	}
1068 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
1069 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
1070 		const struct rte_memzone *mz;
1071 		char mz_name[RTE_MEMZONE_NAMESIZE];
1072 		int ret;
1073 
1074 		ret = snprintf(mz_name, sizeof(mz_name),
1075 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
1076 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
1077 			errno = ENAMETOOLONG;
1078 			ext_num = 0;
1079 			break;
1080 		}
1081 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
1082 						 socket_id,
1083 						 RTE_MEMZONE_IOVA_CONTIG |
1084 						 RTE_MEMZONE_1GB |
1085 						 RTE_MEMZONE_SIZE_HINT_ONLY,
1086 						 EXTBUF_ZONE_SIZE);
1087 		if (mz == NULL) {
1088 			/*
1089 			 * The caller exits on external buffer creation
1090 			 * error, so there is no need to free memzones.
1091 			 */
1092 			errno = ENOMEM;
1093 			ext_num = 0;
1094 			break;
1095 		}
1096 		xseg->buf_ptr = mz->addr;
1097 		xseg->buf_iova = mz->iova;
1098 		xseg->buf_len = EXTBUF_ZONE_SIZE;
1099 		xseg->elt_size = elt_size;
1100 	}
1101 	if (ext_num == 0 && xmem != NULL) {
1102 		free(xmem);
1103 		xmem = NULL;
1104 	}
1105 	*ext_mem = xmem;
1106 	return ext_num;
1107 }
1108 
1109 /*
1110  * Configuration initialisation done once at init time.
1111  */
1112 static struct rte_mempool *
1113 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
1114 		 unsigned int socket_id, uint16_t size_idx)
1115 {
1116 	char pool_name[RTE_MEMPOOL_NAMESIZE];
1117 	struct rte_mempool *rte_mp = NULL;
1118 #ifndef RTE_EXEC_ENV_WINDOWS
1119 	uint32_t mb_size;
1120 
1121 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
1122 #endif
1123 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
1124 	if (!is_proc_primary()) {
1125 		rte_mp = rte_mempool_lookup(pool_name);
1126 		if (rte_mp == NULL)
1127 			rte_exit(EXIT_FAILURE,
1128 				"Get mbuf pool for socket %u failed: %s\n",
1129 				socket_id, rte_strerror(rte_errno));
1130 		return rte_mp;
1131 	}
1132 
1133 	TESTPMD_LOG(INFO,
1134 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1135 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1136 
1137 	switch (mp_alloc_type) {
1138 	case MP_ALLOC_NATIVE:
1139 		{
1140 			/* wrapper to rte_mempool_create() */
1141 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1142 					rte_mbuf_best_mempool_ops());
1143 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1144 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1145 			break;
1146 		}
1147 #ifndef RTE_EXEC_ENV_WINDOWS
1148 	case MP_ALLOC_ANON:
1149 		{
1150 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1151 				mb_size, (unsigned int) mb_mempool_cache,
1152 				sizeof(struct rte_pktmbuf_pool_private),
1153 				socket_id, mempool_flags);
1154 			if (rte_mp == NULL)
1155 				goto err;
1156 
1157 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1158 				rte_mempool_free(rte_mp);
1159 				rte_mp = NULL;
1160 				goto err;
1161 			}
1162 			rte_pktmbuf_pool_init(rte_mp, NULL);
1163 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1164 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1165 			break;
1166 		}
1167 	case MP_ALLOC_XMEM:
1168 	case MP_ALLOC_XMEM_HUGE:
1169 		{
1170 			int heap_socket;
1171 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1172 
1173 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1174 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1175 
1176 			heap_socket =
1177 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1178 			if (heap_socket < 0)
1179 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1180 
1181 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1182 					rte_mbuf_best_mempool_ops());
1183 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1184 					mb_mempool_cache, 0, mbuf_seg_size,
1185 					heap_socket);
1186 			break;
1187 		}
1188 #endif
1189 	case MP_ALLOC_XBUF:
1190 		{
1191 			struct rte_pktmbuf_extmem *ext_mem;
1192 			unsigned int ext_num;
1193 
1194 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1195 					       socket_id, pool_name, &ext_mem);
1196 			if (ext_num == 0)
1197 				rte_exit(EXIT_FAILURE,
1198 					 "Can't create pinned data buffers\n");
1199 
1200 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1201 					rte_mbuf_best_mempool_ops());
1202 			rte_mp = rte_pktmbuf_pool_create_extbuf
1203 					(pool_name, nb_mbuf, mb_mempool_cache,
1204 					 0, mbuf_seg_size, socket_id,
1205 					 ext_mem, ext_num);
1206 			free(ext_mem);
1207 			break;
1208 		}
1209 	default:
1210 		{
1211 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1212 		}
1213 	}
1214 
1215 #ifndef RTE_EXEC_ENV_WINDOWS
1216 err:
1217 #endif
1218 	if (rte_mp == NULL) {
1219 		rte_exit(EXIT_FAILURE,
1220 			"Creation of mbuf pool for socket %u failed: %s\n",
1221 			socket_id, rte_strerror(rte_errno));
1222 	} else if (verbose_level > 0) {
1223 		rte_mempool_dump(stdout, rte_mp);
1224 	}
1225 	return rte_mp;
1226 }
1227 
1228 /*
1229  * Check given socket id is valid or not with NUMA mode,
1230  * if valid, return 0, else return -1
1231  */
1232 static int
1233 check_socket_id(const unsigned int socket_id)
1234 {
1235 	static int warning_once = 0;
1236 
1237 	if (new_socket_id(socket_id)) {
1238 		if (!warning_once && numa_support)
1239 			fprintf(stderr,
1240 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1241 		warning_once = 1;
1242 		return -1;
1243 	}
1244 	return 0;
1245 }
1246 
1247 /*
1248  * Get the allowed maximum number of RX queues.
1249  * *pid return the port id which has minimal value of
1250  * max_rx_queues in all ports.
1251  */
1252 queueid_t
1253 get_allowed_max_nb_rxq(portid_t *pid)
1254 {
1255 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1256 	bool max_rxq_valid = false;
1257 	portid_t pi;
1258 	struct rte_eth_dev_info dev_info;
1259 
1260 	RTE_ETH_FOREACH_DEV(pi) {
1261 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1262 			continue;
1263 
1264 		max_rxq_valid = true;
1265 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1266 			allowed_max_rxq = dev_info.max_rx_queues;
1267 			*pid = pi;
1268 		}
1269 	}
1270 	return max_rxq_valid ? allowed_max_rxq : 0;
1271 }
1272 
1273 /*
1274  * Check input rxq is valid or not.
1275  * If input rxq is not greater than any of maximum number
1276  * of RX queues of all ports, it is valid.
1277  * if valid, return 0, else return -1
1278  */
1279 int
1280 check_nb_rxq(queueid_t rxq)
1281 {
1282 	queueid_t allowed_max_rxq;
1283 	portid_t pid = 0;
1284 
1285 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1286 	if (rxq > allowed_max_rxq) {
1287 		fprintf(stderr,
1288 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1289 			rxq, allowed_max_rxq, pid);
1290 		return -1;
1291 	}
1292 	return 0;
1293 }
1294 
1295 /*
1296  * Get the allowed maximum number of TX queues.
1297  * *pid return the port id which has minimal value of
1298  * max_tx_queues in all ports.
1299  */
1300 queueid_t
1301 get_allowed_max_nb_txq(portid_t *pid)
1302 {
1303 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1304 	bool max_txq_valid = false;
1305 	portid_t pi;
1306 	struct rte_eth_dev_info dev_info;
1307 
1308 	RTE_ETH_FOREACH_DEV(pi) {
1309 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1310 			continue;
1311 
1312 		max_txq_valid = true;
1313 		if (dev_info.max_tx_queues < allowed_max_txq) {
1314 			allowed_max_txq = dev_info.max_tx_queues;
1315 			*pid = pi;
1316 		}
1317 	}
1318 	return max_txq_valid ? allowed_max_txq : 0;
1319 }
1320 
1321 /*
1322  * Check input txq is valid or not.
1323  * If input txq is not greater than any of maximum number
1324  * of TX queues of all ports, it is valid.
1325  * if valid, return 0, else return -1
1326  */
1327 int
1328 check_nb_txq(queueid_t txq)
1329 {
1330 	queueid_t allowed_max_txq;
1331 	portid_t pid = 0;
1332 
1333 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1334 	if (txq > allowed_max_txq) {
1335 		fprintf(stderr,
1336 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1337 			txq, allowed_max_txq, pid);
1338 		return -1;
1339 	}
1340 	return 0;
1341 }
1342 
1343 /*
1344  * Get the allowed maximum number of RXDs of every rx queue.
1345  * *pid return the port id which has minimal value of
1346  * max_rxd in all queues of all ports.
1347  */
1348 static uint16_t
1349 get_allowed_max_nb_rxd(portid_t *pid)
1350 {
1351 	uint16_t allowed_max_rxd = UINT16_MAX;
1352 	portid_t pi;
1353 	struct rte_eth_dev_info dev_info;
1354 
1355 	RTE_ETH_FOREACH_DEV(pi) {
1356 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1357 			continue;
1358 
1359 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1360 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1361 			*pid = pi;
1362 		}
1363 	}
1364 	return allowed_max_rxd;
1365 }
1366 
1367 /*
1368  * Get the allowed minimal number of RXDs of every rx queue.
1369  * *pid return the port id which has minimal value of
1370  * min_rxd in all queues of all ports.
1371  */
1372 static uint16_t
1373 get_allowed_min_nb_rxd(portid_t *pid)
1374 {
1375 	uint16_t allowed_min_rxd = 0;
1376 	portid_t pi;
1377 	struct rte_eth_dev_info dev_info;
1378 
1379 	RTE_ETH_FOREACH_DEV(pi) {
1380 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1381 			continue;
1382 
1383 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1384 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1385 			*pid = pi;
1386 		}
1387 	}
1388 
1389 	return allowed_min_rxd;
1390 }
1391 
1392 /*
1393  * Check input rxd is valid or not.
1394  * If input rxd is not greater than any of maximum number
1395  * of RXDs of every Rx queues and is not less than any of
1396  * minimal number of RXDs of every Rx queues, it is valid.
1397  * if valid, return 0, else return -1
1398  */
1399 int
1400 check_nb_rxd(queueid_t rxd)
1401 {
1402 	uint16_t allowed_max_rxd;
1403 	uint16_t allowed_min_rxd;
1404 	portid_t pid = 0;
1405 
1406 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1407 	if (rxd > allowed_max_rxd) {
1408 		fprintf(stderr,
1409 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1410 			rxd, allowed_max_rxd, pid);
1411 		return -1;
1412 	}
1413 
1414 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1415 	if (rxd < allowed_min_rxd) {
1416 		fprintf(stderr,
1417 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1418 			rxd, allowed_min_rxd, pid);
1419 		return -1;
1420 	}
1421 
1422 	return 0;
1423 }
1424 
1425 /*
1426  * Get the allowed maximum number of TXDs of every rx queues.
1427  * *pid return the port id which has minimal value of
1428  * max_txd in every tx queue.
1429  */
1430 static uint16_t
1431 get_allowed_max_nb_txd(portid_t *pid)
1432 {
1433 	uint16_t allowed_max_txd = UINT16_MAX;
1434 	portid_t pi;
1435 	struct rte_eth_dev_info dev_info;
1436 
1437 	RTE_ETH_FOREACH_DEV(pi) {
1438 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1439 			continue;
1440 
1441 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1442 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1443 			*pid = pi;
1444 		}
1445 	}
1446 	return allowed_max_txd;
1447 }
1448 
1449 /*
1450  * Get the allowed maximum number of TXDs of every tx queues.
1451  * *pid return the port id which has minimal value of
1452  * min_txd in every tx queue.
1453  */
1454 static uint16_t
1455 get_allowed_min_nb_txd(portid_t *pid)
1456 {
1457 	uint16_t allowed_min_txd = 0;
1458 	portid_t pi;
1459 	struct rte_eth_dev_info dev_info;
1460 
1461 	RTE_ETH_FOREACH_DEV(pi) {
1462 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1463 			continue;
1464 
1465 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1466 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1467 			*pid = pi;
1468 		}
1469 	}
1470 
1471 	return allowed_min_txd;
1472 }
1473 
1474 /*
1475  * Check input txd is valid or not.
1476  * If input txd is not greater than any of maximum number
1477  * of TXDs of every Rx queues, it is valid.
1478  * if valid, return 0, else return -1
1479  */
1480 int
1481 check_nb_txd(queueid_t txd)
1482 {
1483 	uint16_t allowed_max_txd;
1484 	uint16_t allowed_min_txd;
1485 	portid_t pid = 0;
1486 
1487 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1488 	if (txd > allowed_max_txd) {
1489 		fprintf(stderr,
1490 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1491 			txd, allowed_max_txd, pid);
1492 		return -1;
1493 	}
1494 
1495 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1496 	if (txd < allowed_min_txd) {
1497 		fprintf(stderr,
1498 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1499 			txd, allowed_min_txd, pid);
1500 		return -1;
1501 	}
1502 	return 0;
1503 }
1504 
1505 
1506 /*
1507  * Get the allowed maximum number of hairpin queues.
1508  * *pid return the port id which has minimal value of
1509  * max_hairpin_queues in all ports.
1510  */
1511 queueid_t
1512 get_allowed_max_nb_hairpinq(portid_t *pid)
1513 {
1514 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1515 	portid_t pi;
1516 	struct rte_eth_hairpin_cap cap;
1517 
1518 	RTE_ETH_FOREACH_DEV(pi) {
1519 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1520 			*pid = pi;
1521 			return 0;
1522 		}
1523 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1524 			allowed_max_hairpinq = cap.max_nb_queues;
1525 			*pid = pi;
1526 		}
1527 	}
1528 	return allowed_max_hairpinq;
1529 }
1530 
1531 /*
1532  * Check input hairpin is valid or not.
1533  * If input hairpin is not greater than any of maximum number
1534  * of hairpin queues of all ports, it is valid.
1535  * if valid, return 0, else return -1
1536  */
1537 int
1538 check_nb_hairpinq(queueid_t hairpinq)
1539 {
1540 	queueid_t allowed_max_hairpinq;
1541 	portid_t pid = 0;
1542 
1543 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1544 	if (hairpinq > allowed_max_hairpinq) {
1545 		fprintf(stderr,
1546 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1547 			hairpinq, allowed_max_hairpinq, pid);
1548 		return -1;
1549 	}
1550 	return 0;
1551 }
1552 
1553 static int
1554 get_eth_overhead(struct rte_eth_dev_info *dev_info)
1555 {
1556 	uint32_t eth_overhead;
1557 
1558 	if (dev_info->max_mtu != UINT16_MAX &&
1559 	    dev_info->max_rx_pktlen > dev_info->max_mtu)
1560 		eth_overhead = dev_info->max_rx_pktlen - dev_info->max_mtu;
1561 	else
1562 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
1563 
1564 	return eth_overhead;
1565 }
1566 
1567 static void
1568 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1569 {
1570 	struct rte_port *port = &ports[pid];
1571 	int ret;
1572 	int i;
1573 
1574 	eth_rx_metadata_negotiate_mp(pid);
1575 	flow_pick_transfer_proxy_mp(pid);
1576 
1577 	port->dev_conf.txmode = tx_mode;
1578 	port->dev_conf.rxmode = rx_mode;
1579 
1580 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1581 	if (ret != 0)
1582 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1583 
1584 	if (!(port->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE))
1585 		port->dev_conf.txmode.offloads &=
1586 			~RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1587 
1588 	/* Apply Rx offloads configuration */
1589 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1590 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1591 	/* Apply Tx offloads configuration */
1592 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1593 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1594 
1595 	if (eth_link_speed)
1596 		port->dev_conf.link_speeds = eth_link_speed;
1597 
1598 	if (max_rx_pkt_len)
1599 		port->dev_conf.rxmode.mtu = max_rx_pkt_len -
1600 			get_eth_overhead(&port->dev_info);
1601 
1602 	/* set flag to initialize port/queue */
1603 	port->need_reconfig = 1;
1604 	port->need_reconfig_queues = 1;
1605 	port->socket_id = socket_id;
1606 	port->tx_metadata = 0;
1607 
1608 	/*
1609 	 * Check for maximum number of segments per MTU.
1610 	 * Accordingly update the mbuf data size.
1611 	 */
1612 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1613 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1614 		uint32_t eth_overhead = get_eth_overhead(&port->dev_info);
1615 		uint16_t mtu;
1616 
1617 		if (rte_eth_dev_get_mtu(pid, &mtu) == 0) {
1618 			uint16_t data_size = (mtu + eth_overhead) /
1619 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1620 			uint16_t buffer_size = data_size + RTE_PKTMBUF_HEADROOM;
1621 
1622 			if (buffer_size > mbuf_data_size[0]) {
1623 				mbuf_data_size[0] = buffer_size;
1624 				TESTPMD_LOG(WARNING,
1625 					"Configured mbuf size of the first segment %hu\n",
1626 					mbuf_data_size[0]);
1627 			}
1628 		}
1629 	}
1630 }
1631 
1632 static void
1633 init_config(void)
1634 {
1635 	portid_t pid;
1636 	struct rte_mempool *mbp;
1637 	unsigned int nb_mbuf_per_pool;
1638 	lcoreid_t  lc_id;
1639 #ifdef RTE_LIB_GRO
1640 	struct rte_gro_param gro_param;
1641 #endif
1642 #ifdef RTE_LIB_GSO
1643 	uint32_t gso_types;
1644 #endif
1645 
1646 	/* Configuration of logical cores. */
1647 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1648 				sizeof(struct fwd_lcore *) * nb_lcores,
1649 				RTE_CACHE_LINE_SIZE);
1650 	if (fwd_lcores == NULL) {
1651 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1652 							"failed\n", nb_lcores);
1653 	}
1654 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1655 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1656 					       sizeof(struct fwd_lcore),
1657 					       RTE_CACHE_LINE_SIZE);
1658 		if (fwd_lcores[lc_id] == NULL) {
1659 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1660 								"failed\n");
1661 		}
1662 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1663 	}
1664 
1665 	RTE_ETH_FOREACH_DEV(pid) {
1666 		uint32_t socket_id;
1667 
1668 		if (numa_support) {
1669 			socket_id = port_numa[pid];
1670 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1671 				socket_id = rte_eth_dev_socket_id(pid);
1672 
1673 				/*
1674 				 * if socket_id is invalid,
1675 				 * set to the first available socket.
1676 				 */
1677 				if (check_socket_id(socket_id) < 0)
1678 					socket_id = socket_ids[0];
1679 			}
1680 		} else {
1681 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1682 				    0 : socket_num;
1683 		}
1684 		/* Apply default TxRx configuration for all ports */
1685 		init_config_port_offloads(pid, socket_id);
1686 	}
1687 	/*
1688 	 * Create pools of mbuf.
1689 	 * If NUMA support is disabled, create a single pool of mbuf in
1690 	 * socket 0 memory by default.
1691 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1692 	 *
1693 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1694 	 * nb_txd can be configured at run time.
1695 	 */
1696 	if (param_total_num_mbufs)
1697 		nb_mbuf_per_pool = param_total_num_mbufs;
1698 	else {
1699 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1700 			(nb_lcores * mb_mempool_cache) +
1701 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1702 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1703 	}
1704 
1705 	if (numa_support) {
1706 		uint8_t i, j;
1707 
1708 		for (i = 0; i < num_sockets; i++)
1709 			for (j = 0; j < mbuf_data_size_n; j++)
1710 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1711 					mbuf_pool_create(mbuf_data_size[j],
1712 							  nb_mbuf_per_pool,
1713 							  socket_ids[i], j);
1714 	} else {
1715 		uint8_t i;
1716 
1717 		for (i = 0; i < mbuf_data_size_n; i++)
1718 			mempools[i] = mbuf_pool_create
1719 					(mbuf_data_size[i],
1720 					 nb_mbuf_per_pool,
1721 					 socket_num == UMA_NO_CONFIG ?
1722 					 0 : socket_num, i);
1723 	}
1724 
1725 	init_port_config();
1726 
1727 #ifdef RTE_LIB_GSO
1728 	gso_types = RTE_ETH_TX_OFFLOAD_TCP_TSO | RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
1729 		RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | RTE_ETH_TX_OFFLOAD_UDP_TSO;
1730 #endif
1731 	/*
1732 	 * Records which Mbuf pool to use by each logical core, if needed.
1733 	 */
1734 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1735 		mbp = mbuf_pool_find(
1736 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1737 
1738 		if (mbp == NULL)
1739 			mbp = mbuf_pool_find(0, 0);
1740 		fwd_lcores[lc_id]->mbp = mbp;
1741 #ifdef RTE_LIB_GSO
1742 		/* initialize GSO context */
1743 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1744 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1745 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1746 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1747 			RTE_ETHER_CRC_LEN;
1748 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1749 #endif
1750 	}
1751 
1752 	fwd_config_setup();
1753 
1754 #ifdef RTE_LIB_GRO
1755 	/* create a gro context for each lcore */
1756 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1757 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1758 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1759 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1760 		gro_param.socket_id = rte_lcore_to_socket_id(
1761 				fwd_lcores_cpuids[lc_id]);
1762 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1763 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1764 			rte_exit(EXIT_FAILURE,
1765 					"rte_gro_ctx_create() failed\n");
1766 		}
1767 	}
1768 #endif
1769 }
1770 
1771 
1772 void
1773 reconfig(portid_t new_port_id, unsigned socket_id)
1774 {
1775 	/* Reconfiguration of Ethernet ports. */
1776 	init_config_port_offloads(new_port_id, socket_id);
1777 	init_port_config();
1778 }
1779 
1780 
1781 int
1782 init_fwd_streams(void)
1783 {
1784 	portid_t pid;
1785 	struct rte_port *port;
1786 	streamid_t sm_id, nb_fwd_streams_new;
1787 	queueid_t q;
1788 
1789 	/* set socket id according to numa or not */
1790 	RTE_ETH_FOREACH_DEV(pid) {
1791 		port = &ports[pid];
1792 		if (nb_rxq > port->dev_info.max_rx_queues) {
1793 			fprintf(stderr,
1794 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1795 				nb_rxq, port->dev_info.max_rx_queues);
1796 			return -1;
1797 		}
1798 		if (nb_txq > port->dev_info.max_tx_queues) {
1799 			fprintf(stderr,
1800 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1801 				nb_txq, port->dev_info.max_tx_queues);
1802 			return -1;
1803 		}
1804 		if (numa_support) {
1805 			if (port_numa[pid] != NUMA_NO_CONFIG)
1806 				port->socket_id = port_numa[pid];
1807 			else {
1808 				port->socket_id = rte_eth_dev_socket_id(pid);
1809 
1810 				/*
1811 				 * if socket_id is invalid,
1812 				 * set to the first available socket.
1813 				 */
1814 				if (check_socket_id(port->socket_id) < 0)
1815 					port->socket_id = socket_ids[0];
1816 			}
1817 		}
1818 		else {
1819 			if (socket_num == UMA_NO_CONFIG)
1820 				port->socket_id = 0;
1821 			else
1822 				port->socket_id = socket_num;
1823 		}
1824 	}
1825 
1826 	q = RTE_MAX(nb_rxq, nb_txq);
1827 	if (q == 0) {
1828 		fprintf(stderr,
1829 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1830 		return -1;
1831 	}
1832 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1833 	if (nb_fwd_streams_new == nb_fwd_streams)
1834 		return 0;
1835 	/* clear the old */
1836 	if (fwd_streams != NULL) {
1837 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1838 			if (fwd_streams[sm_id] == NULL)
1839 				continue;
1840 			rte_free(fwd_streams[sm_id]);
1841 			fwd_streams[sm_id] = NULL;
1842 		}
1843 		rte_free(fwd_streams);
1844 		fwd_streams = NULL;
1845 	}
1846 
1847 	/* init new */
1848 	nb_fwd_streams = nb_fwd_streams_new;
1849 	if (nb_fwd_streams) {
1850 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1851 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1852 			RTE_CACHE_LINE_SIZE);
1853 		if (fwd_streams == NULL)
1854 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1855 				 " (struct fwd_stream *)) failed\n",
1856 				 nb_fwd_streams);
1857 
1858 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1859 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1860 				" struct fwd_stream", sizeof(struct fwd_stream),
1861 				RTE_CACHE_LINE_SIZE);
1862 			if (fwd_streams[sm_id] == NULL)
1863 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1864 					 "(struct fwd_stream) failed\n");
1865 		}
1866 	}
1867 
1868 	return 0;
1869 }
1870 
1871 static void
1872 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1873 {
1874 	uint64_t total_burst, sburst;
1875 	uint64_t nb_burst;
1876 	uint64_t burst_stats[4];
1877 	uint16_t pktnb_stats[4];
1878 	uint16_t nb_pkt;
1879 	int burst_percent[4], sburstp;
1880 	int i;
1881 
1882 	/*
1883 	 * First compute the total number of packet bursts and the
1884 	 * two highest numbers of bursts of the same number of packets.
1885 	 */
1886 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1887 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1888 
1889 	/* Show stats for 0 burst size always */
1890 	total_burst = pbs->pkt_burst_spread[0];
1891 	burst_stats[0] = pbs->pkt_burst_spread[0];
1892 	pktnb_stats[0] = 0;
1893 
1894 	/* Find the next 2 burst sizes with highest occurrences. */
1895 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST + 1; nb_pkt++) {
1896 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1897 
1898 		if (nb_burst == 0)
1899 			continue;
1900 
1901 		total_burst += nb_burst;
1902 
1903 		if (nb_burst > burst_stats[1]) {
1904 			burst_stats[2] = burst_stats[1];
1905 			pktnb_stats[2] = pktnb_stats[1];
1906 			burst_stats[1] = nb_burst;
1907 			pktnb_stats[1] = nb_pkt;
1908 		} else if (nb_burst > burst_stats[2]) {
1909 			burst_stats[2] = nb_burst;
1910 			pktnb_stats[2] = nb_pkt;
1911 		}
1912 	}
1913 	if (total_burst == 0)
1914 		return;
1915 
1916 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1917 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1918 		if (i == 3) {
1919 			printf("%d%% of other]\n", 100 - sburstp);
1920 			return;
1921 		}
1922 
1923 		sburst += burst_stats[i];
1924 		if (sburst == total_burst) {
1925 			printf("%d%% of %d pkts]\n",
1926 				100 - sburstp, (int) pktnb_stats[i]);
1927 			return;
1928 		}
1929 
1930 		burst_percent[i] =
1931 			(double)burst_stats[i] / total_burst * 100;
1932 		printf("%d%% of %d pkts + ",
1933 			burst_percent[i], (int) pktnb_stats[i]);
1934 		sburstp += burst_percent[i];
1935 	}
1936 }
1937 
1938 static void
1939 fwd_stream_stats_display(streamid_t stream_id)
1940 {
1941 	struct fwd_stream *fs;
1942 	static const char *fwd_top_stats_border = "-------";
1943 
1944 	fs = fwd_streams[stream_id];
1945 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1946 	    (fs->fwd_dropped == 0))
1947 		return;
1948 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1949 	       "TX Port=%2d/Queue=%2d %s\n",
1950 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1951 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1952 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1953 	       " TX-dropped: %-14"PRIu64,
1954 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1955 
1956 	/* if checksum mode */
1957 	if (cur_fwd_eng == &csum_fwd_engine) {
1958 		printf("  RX- bad IP checksum: %-14"PRIu64
1959 		       "  Rx- bad L4 checksum: %-14"PRIu64
1960 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1961 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1962 			fs->rx_bad_outer_l4_csum);
1963 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1964 			fs->rx_bad_outer_ip_csum);
1965 	} else {
1966 		printf("\n");
1967 	}
1968 
1969 	if (record_burst_stats) {
1970 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1971 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1972 	}
1973 }
1974 
1975 void
1976 fwd_stats_display(void)
1977 {
1978 	static const char *fwd_stats_border = "----------------------";
1979 	static const char *acc_stats_border = "+++++++++++++++";
1980 	struct {
1981 		struct fwd_stream *rx_stream;
1982 		struct fwd_stream *tx_stream;
1983 		uint64_t tx_dropped;
1984 		uint64_t rx_bad_ip_csum;
1985 		uint64_t rx_bad_l4_csum;
1986 		uint64_t rx_bad_outer_l4_csum;
1987 		uint64_t rx_bad_outer_ip_csum;
1988 	} ports_stats[RTE_MAX_ETHPORTS];
1989 	uint64_t total_rx_dropped = 0;
1990 	uint64_t total_tx_dropped = 0;
1991 	uint64_t total_rx_nombuf = 0;
1992 	struct rte_eth_stats stats;
1993 	uint64_t fwd_cycles = 0;
1994 	uint64_t total_recv = 0;
1995 	uint64_t total_xmit = 0;
1996 	struct rte_port *port;
1997 	streamid_t sm_id;
1998 	portid_t pt_id;
1999 	int i;
2000 
2001 	memset(ports_stats, 0, sizeof(ports_stats));
2002 
2003 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2004 		struct fwd_stream *fs = fwd_streams[sm_id];
2005 
2006 		if (cur_fwd_config.nb_fwd_streams >
2007 		    cur_fwd_config.nb_fwd_ports) {
2008 			fwd_stream_stats_display(sm_id);
2009 		} else {
2010 			ports_stats[fs->tx_port].tx_stream = fs;
2011 			ports_stats[fs->rx_port].rx_stream = fs;
2012 		}
2013 
2014 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
2015 
2016 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
2017 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
2018 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
2019 				fs->rx_bad_outer_l4_csum;
2020 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
2021 				fs->rx_bad_outer_ip_csum;
2022 
2023 		if (record_core_cycles)
2024 			fwd_cycles += fs->core_cycles;
2025 	}
2026 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2027 		pt_id = fwd_ports_ids[i];
2028 		port = &ports[pt_id];
2029 
2030 		rte_eth_stats_get(pt_id, &stats);
2031 		stats.ipackets -= port->stats.ipackets;
2032 		stats.opackets -= port->stats.opackets;
2033 		stats.ibytes -= port->stats.ibytes;
2034 		stats.obytes -= port->stats.obytes;
2035 		stats.imissed -= port->stats.imissed;
2036 		stats.oerrors -= port->stats.oerrors;
2037 		stats.rx_nombuf -= port->stats.rx_nombuf;
2038 
2039 		total_recv += stats.ipackets;
2040 		total_xmit += stats.opackets;
2041 		total_rx_dropped += stats.imissed;
2042 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
2043 		total_tx_dropped += stats.oerrors;
2044 		total_rx_nombuf  += stats.rx_nombuf;
2045 
2046 		printf("\n  %s Forward statistics for port %-2d %s\n",
2047 		       fwd_stats_border, pt_id, fwd_stats_border);
2048 
2049 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
2050 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
2051 		       stats.ipackets + stats.imissed);
2052 
2053 		if (cur_fwd_eng == &csum_fwd_engine) {
2054 			printf("  Bad-ipcsum: %-14"PRIu64
2055 			       " Bad-l4csum: %-14"PRIu64
2056 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
2057 			       ports_stats[pt_id].rx_bad_ip_csum,
2058 			       ports_stats[pt_id].rx_bad_l4_csum,
2059 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
2060 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
2061 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
2062 		}
2063 		if (stats.ierrors + stats.rx_nombuf > 0) {
2064 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
2065 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
2066 		}
2067 
2068 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
2069 		       "TX-total: %-"PRIu64"\n",
2070 		       stats.opackets, ports_stats[pt_id].tx_dropped,
2071 		       stats.opackets + ports_stats[pt_id].tx_dropped);
2072 
2073 		if (record_burst_stats) {
2074 			if (ports_stats[pt_id].rx_stream)
2075 				pkt_burst_stats_display("RX",
2076 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
2077 			if (ports_stats[pt_id].tx_stream)
2078 				pkt_burst_stats_display("TX",
2079 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
2080 		}
2081 
2082 		printf("  %s--------------------------------%s\n",
2083 		       fwd_stats_border, fwd_stats_border);
2084 	}
2085 
2086 	printf("\n  %s Accumulated forward statistics for all ports"
2087 	       "%s\n",
2088 	       acc_stats_border, acc_stats_border);
2089 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
2090 	       "%-"PRIu64"\n"
2091 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
2092 	       "%-"PRIu64"\n",
2093 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
2094 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
2095 	if (total_rx_nombuf > 0)
2096 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
2097 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
2098 	       "%s\n",
2099 	       acc_stats_border, acc_stats_border);
2100 	if (record_core_cycles) {
2101 #define CYC_PER_MHZ 1E6
2102 		if (total_recv > 0 || total_xmit > 0) {
2103 			uint64_t total_pkts = 0;
2104 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
2105 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
2106 				total_pkts = total_xmit;
2107 			else
2108 				total_pkts = total_recv;
2109 
2110 			printf("\n  CPU cycles/packet=%.2F (total cycles="
2111 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
2112 			       " MHz Clock\n",
2113 			       (double) fwd_cycles / total_pkts,
2114 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
2115 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
2116 		}
2117 	}
2118 }
2119 
2120 void
2121 fwd_stats_reset(void)
2122 {
2123 	streamid_t sm_id;
2124 	portid_t pt_id;
2125 	int i;
2126 
2127 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2128 		pt_id = fwd_ports_ids[i];
2129 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2130 	}
2131 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2132 		struct fwd_stream *fs = fwd_streams[sm_id];
2133 
2134 		fs->rx_packets = 0;
2135 		fs->tx_packets = 0;
2136 		fs->fwd_dropped = 0;
2137 		fs->rx_bad_ip_csum = 0;
2138 		fs->rx_bad_l4_csum = 0;
2139 		fs->rx_bad_outer_l4_csum = 0;
2140 		fs->rx_bad_outer_ip_csum = 0;
2141 
2142 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2143 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2144 		fs->core_cycles = 0;
2145 	}
2146 }
2147 
2148 static void
2149 flush_fwd_rx_queues(void)
2150 {
2151 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2152 	portid_t  rxp;
2153 	portid_t port_id;
2154 	queueid_t rxq;
2155 	uint16_t  nb_rx;
2156 	uint16_t  i;
2157 	uint8_t   j;
2158 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2159 	uint64_t timer_period;
2160 
2161 	if (num_procs > 1) {
2162 		printf("multi-process not support for flushing fwd Rx queues, skip the below lines and return.\n");
2163 		return;
2164 	}
2165 
2166 	/* convert to number of cycles */
2167 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2168 
2169 	for (j = 0; j < 2; j++) {
2170 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2171 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2172 				port_id = fwd_ports_ids[rxp];
2173 				/**
2174 				* testpmd can stuck in the below do while loop
2175 				* if rte_eth_rx_burst() always returns nonzero
2176 				* packets. So timer is added to exit this loop
2177 				* after 1sec timer expiry.
2178 				*/
2179 				prev_tsc = rte_rdtsc();
2180 				do {
2181 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2182 						pkts_burst, MAX_PKT_BURST);
2183 					for (i = 0; i < nb_rx; i++)
2184 						rte_pktmbuf_free(pkts_burst[i]);
2185 
2186 					cur_tsc = rte_rdtsc();
2187 					diff_tsc = cur_tsc - prev_tsc;
2188 					timer_tsc += diff_tsc;
2189 				} while ((nb_rx > 0) &&
2190 					(timer_tsc < timer_period));
2191 				timer_tsc = 0;
2192 			}
2193 		}
2194 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2195 	}
2196 }
2197 
2198 static void
2199 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2200 {
2201 	struct fwd_stream **fsm;
2202 	streamid_t nb_fs;
2203 	streamid_t sm_id;
2204 #ifdef RTE_LIB_BITRATESTATS
2205 	uint64_t tics_per_1sec;
2206 	uint64_t tics_datum;
2207 	uint64_t tics_current;
2208 	uint16_t i, cnt_ports;
2209 
2210 	cnt_ports = nb_ports;
2211 	tics_datum = rte_rdtsc();
2212 	tics_per_1sec = rte_get_timer_hz();
2213 #endif
2214 	fsm = &fwd_streams[fc->stream_idx];
2215 	nb_fs = fc->stream_nb;
2216 	do {
2217 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2218 			(*pkt_fwd)(fsm[sm_id]);
2219 #ifdef RTE_LIB_BITRATESTATS
2220 		if (bitrate_enabled != 0 &&
2221 				bitrate_lcore_id == rte_lcore_id()) {
2222 			tics_current = rte_rdtsc();
2223 			if (tics_current - tics_datum >= tics_per_1sec) {
2224 				/* Periodic bitrate calculation */
2225 				for (i = 0; i < cnt_ports; i++)
2226 					rte_stats_bitrate_calc(bitrate_data,
2227 						ports_ids[i]);
2228 				tics_datum = tics_current;
2229 			}
2230 		}
2231 #endif
2232 #ifdef RTE_LIB_LATENCYSTATS
2233 		if (latencystats_enabled != 0 &&
2234 				latencystats_lcore_id == rte_lcore_id())
2235 			rte_latencystats_update();
2236 #endif
2237 
2238 	} while (! fc->stopped);
2239 }
2240 
2241 static int
2242 start_pkt_forward_on_core(void *fwd_arg)
2243 {
2244 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2245 			     cur_fwd_config.fwd_eng->packet_fwd);
2246 	return 0;
2247 }
2248 
2249 /*
2250  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2251  * Used to start communication flows in network loopback test configurations.
2252  */
2253 static int
2254 run_one_txonly_burst_on_core(void *fwd_arg)
2255 {
2256 	struct fwd_lcore *fwd_lc;
2257 	struct fwd_lcore tmp_lcore;
2258 
2259 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2260 	tmp_lcore = *fwd_lc;
2261 	tmp_lcore.stopped = 1;
2262 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2263 	return 0;
2264 }
2265 
2266 /*
2267  * Launch packet forwarding:
2268  *     - Setup per-port forwarding context.
2269  *     - launch logical cores with their forwarding configuration.
2270  */
2271 static void
2272 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2273 {
2274 	unsigned int i;
2275 	unsigned int lc_id;
2276 	int diag;
2277 
2278 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2279 		lc_id = fwd_lcores_cpuids[i];
2280 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2281 			fwd_lcores[i]->stopped = 0;
2282 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2283 						     fwd_lcores[i], lc_id);
2284 			if (diag != 0)
2285 				fprintf(stderr,
2286 					"launch lcore %u failed - diag=%d\n",
2287 					lc_id, diag);
2288 		}
2289 	}
2290 }
2291 
2292 /*
2293  * Launch packet forwarding configuration.
2294  */
2295 void
2296 start_packet_forwarding(int with_tx_first)
2297 {
2298 	port_fwd_begin_t port_fwd_begin;
2299 	port_fwd_end_t  port_fwd_end;
2300 	unsigned int i;
2301 
2302 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2303 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2304 
2305 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2306 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2307 
2308 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2309 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2310 		(!nb_rxq || !nb_txq))
2311 		rte_exit(EXIT_FAILURE,
2312 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2313 			cur_fwd_eng->fwd_mode_name);
2314 
2315 	if (all_ports_started() == 0) {
2316 		fprintf(stderr, "Not all ports were started\n");
2317 		return;
2318 	}
2319 	if (test_done == 0) {
2320 		fprintf(stderr, "Packet forwarding already started\n");
2321 		return;
2322 	}
2323 
2324 	fwd_config_setup();
2325 
2326 	pkt_fwd_config_display(&cur_fwd_config);
2327 	if (!pkt_fwd_shared_rxq_check())
2328 		return;
2329 
2330 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2331 	if (port_fwd_begin != NULL) {
2332 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2333 			if (port_fwd_begin(fwd_ports_ids[i])) {
2334 				fprintf(stderr,
2335 					"Packet forwarding is not ready\n");
2336 				return;
2337 			}
2338 		}
2339 	}
2340 
2341 	if (with_tx_first) {
2342 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2343 		if (port_fwd_begin != NULL) {
2344 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2345 				if (port_fwd_begin(fwd_ports_ids[i])) {
2346 					fprintf(stderr,
2347 						"Packet forwarding is not ready\n");
2348 					return;
2349 				}
2350 			}
2351 		}
2352 	}
2353 
2354 	test_done = 0;
2355 
2356 	if(!no_flush_rx)
2357 		flush_fwd_rx_queues();
2358 
2359 	rxtx_config_display();
2360 
2361 	fwd_stats_reset();
2362 	if (with_tx_first) {
2363 		while (with_tx_first--) {
2364 			launch_packet_forwarding(
2365 					run_one_txonly_burst_on_core);
2366 			rte_eal_mp_wait_lcore();
2367 		}
2368 		port_fwd_end = tx_only_engine.port_fwd_end;
2369 		if (port_fwd_end != NULL) {
2370 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2371 				(*port_fwd_end)(fwd_ports_ids[i]);
2372 		}
2373 	}
2374 	launch_packet_forwarding(start_pkt_forward_on_core);
2375 }
2376 
2377 void
2378 stop_packet_forwarding(void)
2379 {
2380 	port_fwd_end_t port_fwd_end;
2381 	lcoreid_t lc_id;
2382 	portid_t pt_id;
2383 	int i;
2384 
2385 	if (test_done) {
2386 		fprintf(stderr, "Packet forwarding not started\n");
2387 		return;
2388 	}
2389 	printf("Telling cores to stop...");
2390 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2391 		fwd_lcores[lc_id]->stopped = 1;
2392 	printf("\nWaiting for lcores to finish...\n");
2393 	rte_eal_mp_wait_lcore();
2394 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2395 	if (port_fwd_end != NULL) {
2396 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2397 			pt_id = fwd_ports_ids[i];
2398 			(*port_fwd_end)(pt_id);
2399 		}
2400 	}
2401 
2402 	fwd_stats_display();
2403 
2404 	printf("\nDone.\n");
2405 	test_done = 1;
2406 }
2407 
2408 void
2409 dev_set_link_up(portid_t pid)
2410 {
2411 	if (rte_eth_dev_set_link_up(pid) < 0)
2412 		fprintf(stderr, "\nSet link up fail.\n");
2413 }
2414 
2415 void
2416 dev_set_link_down(portid_t pid)
2417 {
2418 	if (rte_eth_dev_set_link_down(pid) < 0)
2419 		fprintf(stderr, "\nSet link down fail.\n");
2420 }
2421 
2422 static int
2423 all_ports_started(void)
2424 {
2425 	portid_t pi;
2426 	struct rte_port *port;
2427 
2428 	RTE_ETH_FOREACH_DEV(pi) {
2429 		port = &ports[pi];
2430 		/* Check if there is a port which is not started */
2431 		if ((port->port_status != RTE_PORT_STARTED) &&
2432 			(port->slave_flag == 0))
2433 			return 0;
2434 	}
2435 
2436 	/* No port is not started */
2437 	return 1;
2438 }
2439 
2440 int
2441 port_is_stopped(portid_t port_id)
2442 {
2443 	struct rte_port *port = &ports[port_id];
2444 
2445 	if ((port->port_status != RTE_PORT_STOPPED) &&
2446 	    (port->slave_flag == 0))
2447 		return 0;
2448 	return 1;
2449 }
2450 
2451 int
2452 all_ports_stopped(void)
2453 {
2454 	portid_t pi;
2455 
2456 	RTE_ETH_FOREACH_DEV(pi) {
2457 		if (!port_is_stopped(pi))
2458 			return 0;
2459 	}
2460 
2461 	return 1;
2462 }
2463 
2464 int
2465 port_is_started(portid_t port_id)
2466 {
2467 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2468 		return 0;
2469 
2470 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2471 		return 0;
2472 
2473 	return 1;
2474 }
2475 
2476 /* Configure the Rx and Tx hairpin queues for the selected port. */
2477 static int
2478 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2479 {
2480 	queueid_t qi;
2481 	struct rte_eth_hairpin_conf hairpin_conf = {
2482 		.peer_count = 1,
2483 	};
2484 	int i;
2485 	int diag;
2486 	struct rte_port *port = &ports[pi];
2487 	uint16_t peer_rx_port = pi;
2488 	uint16_t peer_tx_port = pi;
2489 	uint32_t manual = 1;
2490 	uint32_t tx_exp = hairpin_mode & 0x10;
2491 
2492 	if (!(hairpin_mode & 0xf)) {
2493 		peer_rx_port = pi;
2494 		peer_tx_port = pi;
2495 		manual = 0;
2496 	} else if (hairpin_mode & 0x1) {
2497 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2498 						       RTE_ETH_DEV_NO_OWNER);
2499 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2500 			peer_tx_port = rte_eth_find_next_owned_by(0,
2501 						RTE_ETH_DEV_NO_OWNER);
2502 		if (p_pi != RTE_MAX_ETHPORTS) {
2503 			peer_rx_port = p_pi;
2504 		} else {
2505 			uint16_t next_pi;
2506 
2507 			/* Last port will be the peer RX port of the first. */
2508 			RTE_ETH_FOREACH_DEV(next_pi)
2509 				peer_rx_port = next_pi;
2510 		}
2511 		manual = 1;
2512 	} else if (hairpin_mode & 0x2) {
2513 		if (cnt_pi & 0x1) {
2514 			peer_rx_port = p_pi;
2515 		} else {
2516 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2517 						RTE_ETH_DEV_NO_OWNER);
2518 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2519 				peer_rx_port = pi;
2520 		}
2521 		peer_tx_port = peer_rx_port;
2522 		manual = 1;
2523 	}
2524 
2525 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2526 		hairpin_conf.peers[0].port = peer_rx_port;
2527 		hairpin_conf.peers[0].queue = i + nb_rxq;
2528 		hairpin_conf.manual_bind = !!manual;
2529 		hairpin_conf.tx_explicit = !!tx_exp;
2530 		diag = rte_eth_tx_hairpin_queue_setup
2531 			(pi, qi, nb_txd, &hairpin_conf);
2532 		i++;
2533 		if (diag == 0)
2534 			continue;
2535 
2536 		/* Fail to setup rx queue, return */
2537 		if (port->port_status == RTE_PORT_HANDLING)
2538 			port->port_status = RTE_PORT_STOPPED;
2539 		else
2540 			fprintf(stderr,
2541 				"Port %d can not be set back to stopped\n", pi);
2542 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2543 			pi);
2544 		/* try to reconfigure queues next time */
2545 		port->need_reconfig_queues = 1;
2546 		return -1;
2547 	}
2548 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2549 		hairpin_conf.peers[0].port = peer_tx_port;
2550 		hairpin_conf.peers[0].queue = i + nb_txq;
2551 		hairpin_conf.manual_bind = !!manual;
2552 		hairpin_conf.tx_explicit = !!tx_exp;
2553 		diag = rte_eth_rx_hairpin_queue_setup
2554 			(pi, qi, nb_rxd, &hairpin_conf);
2555 		i++;
2556 		if (diag == 0)
2557 			continue;
2558 
2559 		/* Fail to setup rx queue, return */
2560 		if (port->port_status == RTE_PORT_HANDLING)
2561 			port->port_status = RTE_PORT_STOPPED;
2562 		else
2563 			fprintf(stderr,
2564 				"Port %d can not be set back to stopped\n", pi);
2565 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2566 			pi);
2567 		/* try to reconfigure queues next time */
2568 		port->need_reconfig_queues = 1;
2569 		return -1;
2570 	}
2571 	return 0;
2572 }
2573 
2574 /* Configure the Rx with optional split. */
2575 int
2576 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2577 	       uint16_t nb_rx_desc, unsigned int socket_id,
2578 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2579 {
2580 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2581 	unsigned int i, mp_n;
2582 	int ret;
2583 
2584 	if (rx_pkt_nb_segs <= 1 ||
2585 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2586 		rx_conf->rx_seg = NULL;
2587 		rx_conf->rx_nseg = 0;
2588 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2589 					     nb_rx_desc, socket_id,
2590 					     rx_conf, mp);
2591 		return ret;
2592 	}
2593 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2594 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2595 		struct rte_mempool *mpx;
2596 		/*
2597 		 * Use last valid pool for the segments with number
2598 		 * exceeding the pool index.
2599 		 */
2600 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2601 		mpx = mbuf_pool_find(socket_id, mp_n);
2602 		/* Handle zero as mbuf data buffer size. */
2603 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2604 				   rx_pkt_seg_lengths[i] :
2605 				   mbuf_data_size[mp_n];
2606 		rx_seg->offset = i < rx_pkt_nb_offs ?
2607 				   rx_pkt_seg_offsets[i] : 0;
2608 		rx_seg->mp = mpx ? mpx : mp;
2609 	}
2610 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2611 	rx_conf->rx_seg = rx_useg;
2612 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2613 				    socket_id, rx_conf, NULL);
2614 	rx_conf->rx_seg = NULL;
2615 	rx_conf->rx_nseg = 0;
2616 	return ret;
2617 }
2618 
2619 static int
2620 alloc_xstats_display_info(portid_t pi)
2621 {
2622 	uint64_t **ids_supp = &ports[pi].xstats_info.ids_supp;
2623 	uint64_t **prev_values = &ports[pi].xstats_info.prev_values;
2624 	uint64_t **curr_values = &ports[pi].xstats_info.curr_values;
2625 
2626 	if (xstats_display_num == 0)
2627 		return 0;
2628 
2629 	*ids_supp = calloc(xstats_display_num, sizeof(**ids_supp));
2630 	if (*ids_supp == NULL)
2631 		goto fail_ids_supp;
2632 
2633 	*prev_values = calloc(xstats_display_num,
2634 			      sizeof(**prev_values));
2635 	if (*prev_values == NULL)
2636 		goto fail_prev_values;
2637 
2638 	*curr_values = calloc(xstats_display_num,
2639 			      sizeof(**curr_values));
2640 	if (*curr_values == NULL)
2641 		goto fail_curr_values;
2642 
2643 	ports[pi].xstats_info.allocated = true;
2644 
2645 	return 0;
2646 
2647 fail_curr_values:
2648 	free(*prev_values);
2649 fail_prev_values:
2650 	free(*ids_supp);
2651 fail_ids_supp:
2652 	return -ENOMEM;
2653 }
2654 
2655 static void
2656 free_xstats_display_info(portid_t pi)
2657 {
2658 	if (!ports[pi].xstats_info.allocated)
2659 		return;
2660 	free(ports[pi].xstats_info.ids_supp);
2661 	free(ports[pi].xstats_info.prev_values);
2662 	free(ports[pi].xstats_info.curr_values);
2663 	ports[pi].xstats_info.allocated = false;
2664 }
2665 
2666 /** Fill helper structures for specified port to show extended statistics. */
2667 static void
2668 fill_xstats_display_info_for_port(portid_t pi)
2669 {
2670 	unsigned int stat, stat_supp;
2671 	const char *xstat_name;
2672 	struct rte_port *port;
2673 	uint64_t *ids_supp;
2674 	int rc;
2675 
2676 	if (xstats_display_num == 0)
2677 		return;
2678 
2679 	if (pi == (portid_t)RTE_PORT_ALL) {
2680 		fill_xstats_display_info();
2681 		return;
2682 	}
2683 
2684 	port = &ports[pi];
2685 	if (port->port_status != RTE_PORT_STARTED)
2686 		return;
2687 
2688 	if (!port->xstats_info.allocated && alloc_xstats_display_info(pi) != 0)
2689 		rte_exit(EXIT_FAILURE,
2690 			 "Failed to allocate xstats display memory\n");
2691 
2692 	ids_supp = port->xstats_info.ids_supp;
2693 	for (stat = stat_supp = 0; stat < xstats_display_num; stat++) {
2694 		xstat_name = xstats_display[stat].name;
2695 		rc = rte_eth_xstats_get_id_by_name(pi, xstat_name,
2696 						   ids_supp + stat_supp);
2697 		if (rc != 0) {
2698 			fprintf(stderr, "No xstat '%s' on port %u - skip it %u\n",
2699 				xstat_name, pi, stat);
2700 			continue;
2701 		}
2702 		stat_supp++;
2703 	}
2704 
2705 	port->xstats_info.ids_supp_sz = stat_supp;
2706 }
2707 
2708 /** Fill helper structures for all ports to show extended statistics. */
2709 static void
2710 fill_xstats_display_info(void)
2711 {
2712 	portid_t pi;
2713 
2714 	if (xstats_display_num == 0)
2715 		return;
2716 
2717 	RTE_ETH_FOREACH_DEV(pi)
2718 		fill_xstats_display_info_for_port(pi);
2719 }
2720 
2721 int
2722 start_port(portid_t pid)
2723 {
2724 	int diag, need_check_link_status = -1;
2725 	portid_t pi;
2726 	portid_t p_pi = RTE_MAX_ETHPORTS;
2727 	portid_t pl[RTE_MAX_ETHPORTS];
2728 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2729 	uint16_t cnt_pi = 0;
2730 	uint16_t cfg_pi = 0;
2731 	int peer_pi;
2732 	queueid_t qi;
2733 	struct rte_port *port;
2734 	struct rte_eth_hairpin_cap cap;
2735 
2736 	if (port_id_is_invalid(pid, ENABLED_WARN))
2737 		return 0;
2738 
2739 	RTE_ETH_FOREACH_DEV(pi) {
2740 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2741 			continue;
2742 
2743 		need_check_link_status = 0;
2744 		port = &ports[pi];
2745 		if (port->port_status == RTE_PORT_STOPPED)
2746 			port->port_status = RTE_PORT_HANDLING;
2747 		else {
2748 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2749 			continue;
2750 		}
2751 
2752 		if (port->need_reconfig > 0) {
2753 			struct rte_eth_conf dev_conf;
2754 			int k;
2755 
2756 			port->need_reconfig = 0;
2757 
2758 			if (flow_isolate_all) {
2759 				int ret = port_flow_isolate(pi, 1);
2760 				if (ret) {
2761 					fprintf(stderr,
2762 						"Failed to apply isolated mode on port %d\n",
2763 						pi);
2764 					return -1;
2765 				}
2766 			}
2767 			configure_rxtx_dump_callbacks(0);
2768 			printf("Configuring Port %d (socket %u)\n", pi,
2769 					port->socket_id);
2770 			if (nb_hairpinq > 0 &&
2771 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2772 				fprintf(stderr,
2773 					"Port %d doesn't support hairpin queues\n",
2774 					pi);
2775 				return -1;
2776 			}
2777 
2778 			/* configure port */
2779 			diag = eth_dev_configure_mp(pi, nb_rxq + nb_hairpinq,
2780 						     nb_txq + nb_hairpinq,
2781 						     &(port->dev_conf));
2782 			if (diag != 0) {
2783 				if (port->port_status == RTE_PORT_HANDLING)
2784 					port->port_status = RTE_PORT_STOPPED;
2785 				else
2786 					fprintf(stderr,
2787 						"Port %d can not be set back to stopped\n",
2788 						pi);
2789 				fprintf(stderr, "Fail to configure port %d\n",
2790 					pi);
2791 				/* try to reconfigure port next time */
2792 				port->need_reconfig = 1;
2793 				return -1;
2794 			}
2795 			/* get device configuration*/
2796 			if (0 !=
2797 				eth_dev_conf_get_print_err(pi, &dev_conf)) {
2798 				fprintf(stderr,
2799 					"port %d can not get device configuration\n",
2800 					pi);
2801 				return -1;
2802 			}
2803 			/* Apply Rx offloads configuration */
2804 			if (dev_conf.rxmode.offloads !=
2805 			    port->dev_conf.rxmode.offloads) {
2806 				port->dev_conf.rxmode.offloads |=
2807 					dev_conf.rxmode.offloads;
2808 				for (k = 0;
2809 				     k < port->dev_info.max_rx_queues;
2810 				     k++)
2811 					port->rx_conf[k].offloads |=
2812 						dev_conf.rxmode.offloads;
2813 			}
2814 			/* Apply Tx offloads configuration */
2815 			if (dev_conf.txmode.offloads !=
2816 			    port->dev_conf.txmode.offloads) {
2817 				port->dev_conf.txmode.offloads |=
2818 					dev_conf.txmode.offloads;
2819 				for (k = 0;
2820 				     k < port->dev_info.max_tx_queues;
2821 				     k++)
2822 					port->tx_conf[k].offloads |=
2823 						dev_conf.txmode.offloads;
2824 			}
2825 		}
2826 		if (port->need_reconfig_queues > 0 && is_proc_primary()) {
2827 			port->need_reconfig_queues = 0;
2828 			/* setup tx queues */
2829 			for (qi = 0; qi < nb_txq; qi++) {
2830 				if ((numa_support) &&
2831 					(txring_numa[pi] != NUMA_NO_CONFIG))
2832 					diag = rte_eth_tx_queue_setup(pi, qi,
2833 						port->nb_tx_desc[qi],
2834 						txring_numa[pi],
2835 						&(port->tx_conf[qi]));
2836 				else
2837 					diag = rte_eth_tx_queue_setup(pi, qi,
2838 						port->nb_tx_desc[qi],
2839 						port->socket_id,
2840 						&(port->tx_conf[qi]));
2841 
2842 				if (diag == 0)
2843 					continue;
2844 
2845 				/* Fail to setup tx queue, return */
2846 				if (port->port_status == RTE_PORT_HANDLING)
2847 					port->port_status = RTE_PORT_STOPPED;
2848 				else
2849 					fprintf(stderr,
2850 						"Port %d can not be set back to stopped\n",
2851 						pi);
2852 				fprintf(stderr,
2853 					"Fail to configure port %d tx queues\n",
2854 					pi);
2855 				/* try to reconfigure queues next time */
2856 				port->need_reconfig_queues = 1;
2857 				return -1;
2858 			}
2859 			for (qi = 0; qi < nb_rxq; qi++) {
2860 				/* setup rx queues */
2861 				if ((numa_support) &&
2862 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2863 					struct rte_mempool * mp =
2864 						mbuf_pool_find
2865 							(rxring_numa[pi], 0);
2866 					if (mp == NULL) {
2867 						fprintf(stderr,
2868 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2869 							rxring_numa[pi]);
2870 						return -1;
2871 					}
2872 
2873 					diag = rx_queue_setup(pi, qi,
2874 					     port->nb_rx_desc[qi],
2875 					     rxring_numa[pi],
2876 					     &(port->rx_conf[qi]),
2877 					     mp);
2878 				} else {
2879 					struct rte_mempool *mp =
2880 						mbuf_pool_find
2881 							(port->socket_id, 0);
2882 					if (mp == NULL) {
2883 						fprintf(stderr,
2884 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2885 							port->socket_id);
2886 						return -1;
2887 					}
2888 					diag = rx_queue_setup(pi, qi,
2889 					     port->nb_rx_desc[qi],
2890 					     port->socket_id,
2891 					     &(port->rx_conf[qi]),
2892 					     mp);
2893 				}
2894 				if (diag == 0)
2895 					continue;
2896 
2897 				/* Fail to setup rx queue, return */
2898 				if (port->port_status == RTE_PORT_HANDLING)
2899 					port->port_status = RTE_PORT_STOPPED;
2900 				else
2901 					fprintf(stderr,
2902 						"Port %d can not be set back to stopped\n",
2903 						pi);
2904 				fprintf(stderr,
2905 					"Fail to configure port %d rx queues\n",
2906 					pi);
2907 				/* try to reconfigure queues next time */
2908 				port->need_reconfig_queues = 1;
2909 				return -1;
2910 			}
2911 			/* setup hairpin queues */
2912 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2913 				return -1;
2914 		}
2915 		configure_rxtx_dump_callbacks(verbose_level);
2916 		if (clear_ptypes) {
2917 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2918 					NULL, 0);
2919 			if (diag < 0)
2920 				fprintf(stderr,
2921 					"Port %d: Failed to disable Ptype parsing\n",
2922 					pi);
2923 		}
2924 
2925 		p_pi = pi;
2926 		cnt_pi++;
2927 
2928 		/* start port */
2929 		diag = eth_dev_start_mp(pi);
2930 		if (diag < 0) {
2931 			fprintf(stderr, "Fail to start port %d: %s\n",
2932 				pi, rte_strerror(-diag));
2933 
2934 			/* Fail to setup rx queue, return */
2935 			if (port->port_status == RTE_PORT_HANDLING)
2936 				port->port_status = RTE_PORT_STOPPED;
2937 			else
2938 				fprintf(stderr,
2939 					"Port %d can not be set back to stopped\n",
2940 					pi);
2941 			continue;
2942 		}
2943 
2944 		if (port->port_status == RTE_PORT_HANDLING)
2945 			port->port_status = RTE_PORT_STARTED;
2946 		else
2947 			fprintf(stderr, "Port %d can not be set into started\n",
2948 				pi);
2949 
2950 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2951 			printf("Port %d: " RTE_ETHER_ADDR_PRT_FMT "\n", pi,
2952 					RTE_ETHER_ADDR_BYTES(&port->eth_addr));
2953 
2954 		/* at least one port started, need checking link status */
2955 		need_check_link_status = 1;
2956 
2957 		pl[cfg_pi++] = pi;
2958 	}
2959 
2960 	if (need_check_link_status == 1 && !no_link_check)
2961 		check_all_ports_link_status(RTE_PORT_ALL);
2962 	else if (need_check_link_status == 0)
2963 		fprintf(stderr, "Please stop the ports first\n");
2964 
2965 	if (hairpin_mode & 0xf) {
2966 		uint16_t i;
2967 		int j;
2968 
2969 		/* bind all started hairpin ports */
2970 		for (i = 0; i < cfg_pi; i++) {
2971 			pi = pl[i];
2972 			/* bind current Tx to all peer Rx */
2973 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2974 							RTE_MAX_ETHPORTS, 1);
2975 			if (peer_pi < 0)
2976 				return peer_pi;
2977 			for (j = 0; j < peer_pi; j++) {
2978 				if (!port_is_started(peer_pl[j]))
2979 					continue;
2980 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2981 				if (diag < 0) {
2982 					fprintf(stderr,
2983 						"Error during binding hairpin Tx port %u to %u: %s\n",
2984 						pi, peer_pl[j],
2985 						rte_strerror(-diag));
2986 					return -1;
2987 				}
2988 			}
2989 			/* bind all peer Tx to current Rx */
2990 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2991 							RTE_MAX_ETHPORTS, 0);
2992 			if (peer_pi < 0)
2993 				return peer_pi;
2994 			for (j = 0; j < peer_pi; j++) {
2995 				if (!port_is_started(peer_pl[j]))
2996 					continue;
2997 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2998 				if (diag < 0) {
2999 					fprintf(stderr,
3000 						"Error during binding hairpin Tx port %u to %u: %s\n",
3001 						peer_pl[j], pi,
3002 						rte_strerror(-diag));
3003 					return -1;
3004 				}
3005 			}
3006 		}
3007 	}
3008 
3009 	fill_xstats_display_info_for_port(pid);
3010 
3011 	printf("Done\n");
3012 	return 0;
3013 }
3014 
3015 void
3016 stop_port(portid_t pid)
3017 {
3018 	portid_t pi;
3019 	struct rte_port *port;
3020 	int need_check_link_status = 0;
3021 	portid_t peer_pl[RTE_MAX_ETHPORTS];
3022 	int peer_pi;
3023 
3024 	if (port_id_is_invalid(pid, ENABLED_WARN))
3025 		return;
3026 
3027 	printf("Stopping ports...\n");
3028 
3029 	RTE_ETH_FOREACH_DEV(pi) {
3030 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3031 			continue;
3032 
3033 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3034 			fprintf(stderr,
3035 				"Please remove port %d from forwarding configuration.\n",
3036 				pi);
3037 			continue;
3038 		}
3039 
3040 		if (port_is_bonding_slave(pi)) {
3041 			fprintf(stderr,
3042 				"Please remove port %d from bonded device.\n",
3043 				pi);
3044 			continue;
3045 		}
3046 
3047 		port = &ports[pi];
3048 		if (port->port_status == RTE_PORT_STARTED)
3049 			port->port_status = RTE_PORT_HANDLING;
3050 		else
3051 			continue;
3052 
3053 		if (hairpin_mode & 0xf) {
3054 			int j;
3055 
3056 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
3057 			/* unbind all peer Tx from current Rx */
3058 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
3059 							RTE_MAX_ETHPORTS, 0);
3060 			if (peer_pi < 0)
3061 				continue;
3062 			for (j = 0; j < peer_pi; j++) {
3063 				if (!port_is_started(peer_pl[j]))
3064 					continue;
3065 				rte_eth_hairpin_unbind(peer_pl[j], pi);
3066 			}
3067 		}
3068 
3069 		if (port->flow_list)
3070 			port_flow_flush(pi);
3071 
3072 		if (eth_dev_stop_mp(pi) != 0)
3073 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
3074 				pi);
3075 
3076 		if (port->port_status == RTE_PORT_HANDLING)
3077 			port->port_status = RTE_PORT_STOPPED;
3078 		else
3079 			fprintf(stderr, "Port %d can not be set into stopped\n",
3080 				pi);
3081 		need_check_link_status = 1;
3082 	}
3083 	if (need_check_link_status && !no_link_check)
3084 		check_all_ports_link_status(RTE_PORT_ALL);
3085 
3086 	printf("Done\n");
3087 }
3088 
3089 static void
3090 remove_invalid_ports_in(portid_t *array, portid_t *total)
3091 {
3092 	portid_t i;
3093 	portid_t new_total = 0;
3094 
3095 	for (i = 0; i < *total; i++)
3096 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
3097 			array[new_total] = array[i];
3098 			new_total++;
3099 		}
3100 	*total = new_total;
3101 }
3102 
3103 static void
3104 remove_invalid_ports(void)
3105 {
3106 	remove_invalid_ports_in(ports_ids, &nb_ports);
3107 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
3108 	nb_cfg_ports = nb_fwd_ports;
3109 }
3110 
3111 void
3112 close_port(portid_t pid)
3113 {
3114 	portid_t pi;
3115 	struct rte_port *port;
3116 
3117 	if (port_id_is_invalid(pid, ENABLED_WARN))
3118 		return;
3119 
3120 	printf("Closing ports...\n");
3121 
3122 	RTE_ETH_FOREACH_DEV(pi) {
3123 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3124 			continue;
3125 
3126 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3127 			fprintf(stderr,
3128 				"Please remove port %d from forwarding configuration.\n",
3129 				pi);
3130 			continue;
3131 		}
3132 
3133 		if (port_is_bonding_slave(pi)) {
3134 			fprintf(stderr,
3135 				"Please remove port %d from bonded device.\n",
3136 				pi);
3137 			continue;
3138 		}
3139 
3140 		port = &ports[pi];
3141 		if (port->port_status == RTE_PORT_CLOSED) {
3142 			fprintf(stderr, "Port %d is already closed\n", pi);
3143 			continue;
3144 		}
3145 
3146 		if (is_proc_primary()) {
3147 			port_flow_flush(pi);
3148 			port_flex_item_flush(pi);
3149 			rte_eth_dev_close(pi);
3150 		}
3151 
3152 		free_xstats_display_info(pi);
3153 	}
3154 
3155 	remove_invalid_ports();
3156 	printf("Done\n");
3157 }
3158 
3159 void
3160 reset_port(portid_t pid)
3161 {
3162 	int diag;
3163 	portid_t pi;
3164 	struct rte_port *port;
3165 
3166 	if (port_id_is_invalid(pid, ENABLED_WARN))
3167 		return;
3168 
3169 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
3170 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
3171 		fprintf(stderr,
3172 			"Can not reset port(s), please stop port(s) first.\n");
3173 		return;
3174 	}
3175 
3176 	printf("Resetting ports...\n");
3177 
3178 	RTE_ETH_FOREACH_DEV(pi) {
3179 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
3180 			continue;
3181 
3182 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
3183 			fprintf(stderr,
3184 				"Please remove port %d from forwarding configuration.\n",
3185 				pi);
3186 			continue;
3187 		}
3188 
3189 		if (port_is_bonding_slave(pi)) {
3190 			fprintf(stderr,
3191 				"Please remove port %d from bonded device.\n",
3192 				pi);
3193 			continue;
3194 		}
3195 
3196 		diag = rte_eth_dev_reset(pi);
3197 		if (diag == 0) {
3198 			port = &ports[pi];
3199 			port->need_reconfig = 1;
3200 			port->need_reconfig_queues = 1;
3201 		} else {
3202 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
3203 				pi, diag);
3204 		}
3205 	}
3206 
3207 	printf("Done\n");
3208 }
3209 
3210 void
3211 attach_port(char *identifier)
3212 {
3213 	portid_t pi;
3214 	struct rte_dev_iterator iterator;
3215 
3216 	printf("Attaching a new port...\n");
3217 
3218 	if (identifier == NULL) {
3219 		fprintf(stderr, "Invalid parameters are specified\n");
3220 		return;
3221 	}
3222 
3223 	if (rte_dev_probe(identifier) < 0) {
3224 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
3225 		return;
3226 	}
3227 
3228 	/* first attach mode: event */
3229 	if (setup_on_probe_event) {
3230 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
3231 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
3232 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
3233 					ports[pi].need_setup != 0)
3234 				setup_attached_port(pi);
3235 		return;
3236 	}
3237 
3238 	/* second attach mode: iterator */
3239 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
3240 		/* setup ports matching the devargs used for probing */
3241 		if (port_is_forwarding(pi))
3242 			continue; /* port was already attached before */
3243 		setup_attached_port(pi);
3244 	}
3245 }
3246 
3247 static void
3248 setup_attached_port(portid_t pi)
3249 {
3250 	unsigned int socket_id;
3251 	int ret;
3252 
3253 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
3254 	/* if socket_id is invalid, set to the first available socket. */
3255 	if (check_socket_id(socket_id) < 0)
3256 		socket_id = socket_ids[0];
3257 	reconfig(pi, socket_id);
3258 	ret = rte_eth_promiscuous_enable(pi);
3259 	if (ret != 0)
3260 		fprintf(stderr,
3261 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3262 			pi, rte_strerror(-ret));
3263 
3264 	ports_ids[nb_ports++] = pi;
3265 	fwd_ports_ids[nb_fwd_ports++] = pi;
3266 	nb_cfg_ports = nb_fwd_ports;
3267 	ports[pi].need_setup = 0;
3268 	ports[pi].port_status = RTE_PORT_STOPPED;
3269 
3270 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
3271 	printf("Done\n");
3272 }
3273 
3274 static void
3275 detach_device(struct rte_device *dev)
3276 {
3277 	portid_t sibling;
3278 
3279 	if (dev == NULL) {
3280 		fprintf(stderr, "Device already removed\n");
3281 		return;
3282 	}
3283 
3284 	printf("Removing a device...\n");
3285 
3286 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
3287 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
3288 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
3289 				fprintf(stderr, "Port %u not stopped\n",
3290 					sibling);
3291 				return;
3292 			}
3293 			port_flow_flush(sibling);
3294 		}
3295 	}
3296 
3297 	if (rte_dev_remove(dev) < 0) {
3298 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
3299 		return;
3300 	}
3301 	remove_invalid_ports();
3302 
3303 	printf("Device is detached\n");
3304 	printf("Now total ports is %d\n", nb_ports);
3305 	printf("Done\n");
3306 	return;
3307 }
3308 
3309 void
3310 detach_port_device(portid_t port_id)
3311 {
3312 	int ret;
3313 	struct rte_eth_dev_info dev_info;
3314 
3315 	if (port_id_is_invalid(port_id, ENABLED_WARN))
3316 		return;
3317 
3318 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3319 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3320 			fprintf(stderr, "Port not stopped\n");
3321 			return;
3322 		}
3323 		fprintf(stderr, "Port was not closed\n");
3324 	}
3325 
3326 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3327 	if (ret != 0) {
3328 		TESTPMD_LOG(ERR,
3329 			"Failed to get device info for port %d, not detaching\n",
3330 			port_id);
3331 		return;
3332 	}
3333 	detach_device(dev_info.device);
3334 }
3335 
3336 void
3337 detach_devargs(char *identifier)
3338 {
3339 	struct rte_dev_iterator iterator;
3340 	struct rte_devargs da;
3341 	portid_t port_id;
3342 
3343 	printf("Removing a device...\n");
3344 
3345 	memset(&da, 0, sizeof(da));
3346 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3347 		fprintf(stderr, "cannot parse identifier\n");
3348 		return;
3349 	}
3350 
3351 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3352 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3353 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3354 				fprintf(stderr, "Port %u not stopped\n",
3355 					port_id);
3356 				rte_eth_iterator_cleanup(&iterator);
3357 				rte_devargs_reset(&da);
3358 				return;
3359 			}
3360 			port_flow_flush(port_id);
3361 		}
3362 	}
3363 
3364 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3365 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3366 			    da.name, da.bus->name);
3367 		rte_devargs_reset(&da);
3368 		return;
3369 	}
3370 
3371 	remove_invalid_ports();
3372 
3373 	printf("Device %s is detached\n", identifier);
3374 	printf("Now total ports is %d\n", nb_ports);
3375 	printf("Done\n");
3376 	rte_devargs_reset(&da);
3377 }
3378 
3379 void
3380 pmd_test_exit(void)
3381 {
3382 	portid_t pt_id;
3383 	unsigned int i;
3384 	int ret;
3385 
3386 	if (test_done == 0)
3387 		stop_packet_forwarding();
3388 
3389 #ifndef RTE_EXEC_ENV_WINDOWS
3390 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3391 		if (mempools[i]) {
3392 			if (mp_alloc_type == MP_ALLOC_ANON)
3393 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3394 						     NULL);
3395 		}
3396 	}
3397 #endif
3398 	if (ports != NULL) {
3399 		no_link_check = 1;
3400 		RTE_ETH_FOREACH_DEV(pt_id) {
3401 			printf("\nStopping port %d...\n", pt_id);
3402 			fflush(stdout);
3403 			stop_port(pt_id);
3404 		}
3405 		RTE_ETH_FOREACH_DEV(pt_id) {
3406 			printf("\nShutting down port %d...\n", pt_id);
3407 			fflush(stdout);
3408 			close_port(pt_id);
3409 		}
3410 	}
3411 
3412 	if (hot_plug) {
3413 		ret = rte_dev_event_monitor_stop();
3414 		if (ret) {
3415 			RTE_LOG(ERR, EAL,
3416 				"fail to stop device event monitor.");
3417 			return;
3418 		}
3419 
3420 		ret = rte_dev_event_callback_unregister(NULL,
3421 			dev_event_callback, NULL);
3422 		if (ret < 0) {
3423 			RTE_LOG(ERR, EAL,
3424 				"fail to unregister device event callback.\n");
3425 			return;
3426 		}
3427 
3428 		ret = rte_dev_hotplug_handle_disable();
3429 		if (ret) {
3430 			RTE_LOG(ERR, EAL,
3431 				"fail to disable hotplug handling.\n");
3432 			return;
3433 		}
3434 	}
3435 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3436 		if (mempools[i])
3437 			mempool_free_mp(mempools[i]);
3438 	}
3439 	free(xstats_display);
3440 
3441 	printf("\nBye...\n");
3442 }
3443 
3444 typedef void (*cmd_func_t)(void);
3445 struct pmd_test_command {
3446 	const char *cmd_name;
3447 	cmd_func_t cmd_func;
3448 };
3449 
3450 /* Check the link status of all ports in up to 9s, and print them finally */
3451 static void
3452 check_all_ports_link_status(uint32_t port_mask)
3453 {
3454 #define CHECK_INTERVAL 100 /* 100ms */
3455 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3456 	portid_t portid;
3457 	uint8_t count, all_ports_up, print_flag = 0;
3458 	struct rte_eth_link link;
3459 	int ret;
3460 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3461 
3462 	printf("Checking link statuses...\n");
3463 	fflush(stdout);
3464 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3465 		all_ports_up = 1;
3466 		RTE_ETH_FOREACH_DEV(portid) {
3467 			if ((port_mask & (1 << portid)) == 0)
3468 				continue;
3469 			memset(&link, 0, sizeof(link));
3470 			ret = rte_eth_link_get_nowait(portid, &link);
3471 			if (ret < 0) {
3472 				all_ports_up = 0;
3473 				if (print_flag == 1)
3474 					fprintf(stderr,
3475 						"Port %u link get failed: %s\n",
3476 						portid, rte_strerror(-ret));
3477 				continue;
3478 			}
3479 			/* print link status if flag set */
3480 			if (print_flag == 1) {
3481 				rte_eth_link_to_str(link_status,
3482 					sizeof(link_status), &link);
3483 				printf("Port %d %s\n", portid, link_status);
3484 				continue;
3485 			}
3486 			/* clear all_ports_up flag if any link down */
3487 			if (link.link_status == RTE_ETH_LINK_DOWN) {
3488 				all_ports_up = 0;
3489 				break;
3490 			}
3491 		}
3492 		/* after finally printing all link status, get out */
3493 		if (print_flag == 1)
3494 			break;
3495 
3496 		if (all_ports_up == 0) {
3497 			fflush(stdout);
3498 			rte_delay_ms(CHECK_INTERVAL);
3499 		}
3500 
3501 		/* set the print_flag if all ports up or timeout */
3502 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3503 			print_flag = 1;
3504 		}
3505 
3506 		if (lsc_interrupt)
3507 			break;
3508 	}
3509 }
3510 
3511 static void
3512 rmv_port_callback(void *arg)
3513 {
3514 	int need_to_start = 0;
3515 	int org_no_link_check = no_link_check;
3516 	portid_t port_id = (intptr_t)arg;
3517 	struct rte_eth_dev_info dev_info;
3518 	int ret;
3519 
3520 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3521 
3522 	if (!test_done && port_is_forwarding(port_id)) {
3523 		need_to_start = 1;
3524 		stop_packet_forwarding();
3525 	}
3526 	no_link_check = 1;
3527 	stop_port(port_id);
3528 	no_link_check = org_no_link_check;
3529 
3530 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3531 	if (ret != 0)
3532 		TESTPMD_LOG(ERR,
3533 			"Failed to get device info for port %d, not detaching\n",
3534 			port_id);
3535 	else {
3536 		struct rte_device *device = dev_info.device;
3537 		close_port(port_id);
3538 		detach_device(device); /* might be already removed or have more ports */
3539 	}
3540 	if (need_to_start)
3541 		start_packet_forwarding(0);
3542 }
3543 
3544 /* This function is used by the interrupt thread */
3545 static int
3546 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3547 		  void *ret_param)
3548 {
3549 	RTE_SET_USED(param);
3550 	RTE_SET_USED(ret_param);
3551 
3552 	if (type >= RTE_ETH_EVENT_MAX) {
3553 		fprintf(stderr,
3554 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3555 			port_id, __func__, type);
3556 		fflush(stderr);
3557 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3558 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3559 			eth_event_desc[type]);
3560 		fflush(stdout);
3561 	}
3562 
3563 	switch (type) {
3564 	case RTE_ETH_EVENT_NEW:
3565 		ports[port_id].need_setup = 1;
3566 		ports[port_id].port_status = RTE_PORT_HANDLING;
3567 		break;
3568 	case RTE_ETH_EVENT_INTR_RMV:
3569 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3570 			break;
3571 		if (rte_eal_alarm_set(100000,
3572 				rmv_port_callback, (void *)(intptr_t)port_id))
3573 			fprintf(stderr,
3574 				"Could not set up deferred device removal\n");
3575 		break;
3576 	case RTE_ETH_EVENT_DESTROY:
3577 		ports[port_id].port_status = RTE_PORT_CLOSED;
3578 		printf("Port %u is closed\n", port_id);
3579 		break;
3580 	default:
3581 		break;
3582 	}
3583 	return 0;
3584 }
3585 
3586 static int
3587 register_eth_event_callback(void)
3588 {
3589 	int ret;
3590 	enum rte_eth_event_type event;
3591 
3592 	for (event = RTE_ETH_EVENT_UNKNOWN;
3593 			event < RTE_ETH_EVENT_MAX; event++) {
3594 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3595 				event,
3596 				eth_event_callback,
3597 				NULL);
3598 		if (ret != 0) {
3599 			TESTPMD_LOG(ERR, "Failed to register callback for "
3600 					"%s event\n", eth_event_desc[event]);
3601 			return -1;
3602 		}
3603 	}
3604 
3605 	return 0;
3606 }
3607 
3608 /* This function is used by the interrupt thread */
3609 static void
3610 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3611 			     __rte_unused void *arg)
3612 {
3613 	uint16_t port_id;
3614 	int ret;
3615 
3616 	if (type >= RTE_DEV_EVENT_MAX) {
3617 		fprintf(stderr, "%s called upon invalid event %d\n",
3618 			__func__, type);
3619 		fflush(stderr);
3620 	}
3621 
3622 	switch (type) {
3623 	case RTE_DEV_EVENT_REMOVE:
3624 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3625 			device_name);
3626 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3627 		if (ret) {
3628 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3629 				device_name);
3630 			return;
3631 		}
3632 		/*
3633 		 * Because the user's callback is invoked in eal interrupt
3634 		 * callback, the interrupt callback need to be finished before
3635 		 * it can be unregistered when detaching device. So finish
3636 		 * callback soon and use a deferred removal to detach device
3637 		 * is need. It is a workaround, once the device detaching be
3638 		 * moved into the eal in the future, the deferred removal could
3639 		 * be deleted.
3640 		 */
3641 		if (rte_eal_alarm_set(100000,
3642 				rmv_port_callback, (void *)(intptr_t)port_id))
3643 			RTE_LOG(ERR, EAL,
3644 				"Could not set up deferred device removal\n");
3645 		break;
3646 	case RTE_DEV_EVENT_ADD:
3647 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3648 			device_name);
3649 		/* TODO: After finish kernel driver binding,
3650 		 * begin to attach port.
3651 		 */
3652 		break;
3653 	default:
3654 		break;
3655 	}
3656 }
3657 
3658 static void
3659 rxtx_port_config(portid_t pid)
3660 {
3661 	uint16_t qid;
3662 	uint64_t offloads;
3663 	struct rte_port *port = &ports[pid];
3664 
3665 	for (qid = 0; qid < nb_rxq; qid++) {
3666 		offloads = port->rx_conf[qid].offloads;
3667 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3668 
3669 		if (rxq_share > 0 &&
3670 		    (port->dev_info.dev_capa & RTE_ETH_DEV_CAPA_RXQ_SHARE)) {
3671 			/* Non-zero share group to enable RxQ share. */
3672 			port->rx_conf[qid].share_group = pid / rxq_share + 1;
3673 			port->rx_conf[qid].share_qid = qid; /* Equal mapping. */
3674 		}
3675 
3676 		if (offloads != 0)
3677 			port->rx_conf[qid].offloads = offloads;
3678 
3679 		/* Check if any Rx parameters have been passed */
3680 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3681 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3682 
3683 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3684 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3685 
3686 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3687 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3688 
3689 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3690 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3691 
3692 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3693 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3694 
3695 		port->nb_rx_desc[qid] = nb_rxd;
3696 	}
3697 
3698 	for (qid = 0; qid < nb_txq; qid++) {
3699 		offloads = port->tx_conf[qid].offloads;
3700 		port->tx_conf[qid] = port->dev_info.default_txconf;
3701 		if (offloads != 0)
3702 			port->tx_conf[qid].offloads = offloads;
3703 
3704 		/* Check if any Tx parameters have been passed */
3705 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3706 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3707 
3708 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3709 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3710 
3711 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3712 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3713 
3714 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3715 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3716 
3717 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3718 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3719 
3720 		port->nb_tx_desc[qid] = nb_txd;
3721 	}
3722 }
3723 
3724 /*
3725  * Helper function to set MTU from frame size
3726  *
3727  * port->dev_info should be set before calling this function.
3728  *
3729  * return 0 on success, negative on error
3730  */
3731 int
3732 update_mtu_from_frame_size(portid_t portid, uint32_t max_rx_pktlen)
3733 {
3734 	struct rte_port *port = &ports[portid];
3735 	uint32_t eth_overhead;
3736 	uint16_t mtu, new_mtu;
3737 
3738 	eth_overhead = get_eth_overhead(&port->dev_info);
3739 
3740 	if (rte_eth_dev_get_mtu(portid, &mtu) != 0) {
3741 		printf("Failed to get MTU for port %u\n", portid);
3742 		return -1;
3743 	}
3744 
3745 	new_mtu = max_rx_pktlen - eth_overhead;
3746 
3747 	if (mtu == new_mtu)
3748 		return 0;
3749 
3750 	if (eth_dev_set_mtu_mp(portid, new_mtu) != 0) {
3751 		fprintf(stderr,
3752 			"Failed to set MTU to %u for port %u\n",
3753 			new_mtu, portid);
3754 		return -1;
3755 	}
3756 
3757 	port->dev_conf.rxmode.mtu = new_mtu;
3758 
3759 	return 0;
3760 }
3761 
3762 void
3763 init_port_config(void)
3764 {
3765 	portid_t pid;
3766 	struct rte_port *port;
3767 	int ret, i;
3768 
3769 	RTE_ETH_FOREACH_DEV(pid) {
3770 		port = &ports[pid];
3771 		port->dev_conf.fdir_conf = fdir_conf;
3772 
3773 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3774 		if (ret != 0)
3775 			return;
3776 
3777 		if (nb_rxq > 1) {
3778 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3779 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3780 				rss_hf & port->dev_info.flow_type_rss_offloads;
3781 		} else {
3782 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3783 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3784 		}
3785 
3786 		if (port->dcb_flag == 0) {
3787 			if (port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) {
3788 				port->dev_conf.rxmode.mq_mode =
3789 					(enum rte_eth_rx_mq_mode)
3790 						(rx_mq_mode & RTE_ETH_MQ_RX_RSS);
3791 			} else {
3792 				port->dev_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
3793 				port->dev_conf.rxmode.offloads &=
3794 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3795 
3796 				for (i = 0;
3797 				     i < port->dev_info.nb_rx_queues;
3798 				     i++)
3799 					port->rx_conf[i].offloads &=
3800 						~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3801 			}
3802 		}
3803 
3804 		rxtx_port_config(pid);
3805 
3806 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3807 		if (ret != 0)
3808 			return;
3809 
3810 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3811 		rte_pmd_ixgbe_bypass_init(pid);
3812 #endif
3813 
3814 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3815 			port->dev_conf.intr_conf.lsc = 1;
3816 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3817 			port->dev_conf.intr_conf.rmv = 1;
3818 	}
3819 }
3820 
3821 void set_port_slave_flag(portid_t slave_pid)
3822 {
3823 	struct rte_port *port;
3824 
3825 	port = &ports[slave_pid];
3826 	port->slave_flag = 1;
3827 }
3828 
3829 void clear_port_slave_flag(portid_t slave_pid)
3830 {
3831 	struct rte_port *port;
3832 
3833 	port = &ports[slave_pid];
3834 	port->slave_flag = 0;
3835 }
3836 
3837 uint8_t port_is_bonding_slave(portid_t slave_pid)
3838 {
3839 	struct rte_port *port;
3840 	struct rte_eth_dev_info dev_info;
3841 	int ret;
3842 
3843 	port = &ports[slave_pid];
3844 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3845 	if (ret != 0) {
3846 		TESTPMD_LOG(ERR,
3847 			"Failed to get device info for port id %d,"
3848 			"cannot determine if the port is a bonded slave",
3849 			slave_pid);
3850 		return 0;
3851 	}
3852 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3853 		return 1;
3854 	return 0;
3855 }
3856 
3857 const uint16_t vlan_tags[] = {
3858 		0,  1,  2,  3,  4,  5,  6,  7,
3859 		8,  9, 10, 11,  12, 13, 14, 15,
3860 		16, 17, 18, 19, 20, 21, 22, 23,
3861 		24, 25, 26, 27, 28, 29, 30, 31
3862 };
3863 
3864 static  int
3865 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3866 		 enum dcb_mode_enable dcb_mode,
3867 		 enum rte_eth_nb_tcs num_tcs,
3868 		 uint8_t pfc_en)
3869 {
3870 	uint8_t i;
3871 	int32_t rc;
3872 	struct rte_eth_rss_conf rss_conf;
3873 
3874 	/*
3875 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3876 	 * given above, and the number of traffic classes available for use.
3877 	 */
3878 	if (dcb_mode == DCB_VT_ENABLED) {
3879 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3880 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3881 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3882 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3883 
3884 		/* VMDQ+DCB RX and TX configurations */
3885 		vmdq_rx_conf->enable_default_pool = 0;
3886 		vmdq_rx_conf->default_pool = 0;
3887 		vmdq_rx_conf->nb_queue_pools =
3888 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3889 		vmdq_tx_conf->nb_queue_pools =
3890 			(num_tcs ==  RTE_ETH_4_TCS ? RTE_ETH_32_POOLS : RTE_ETH_16_POOLS);
3891 
3892 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3893 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3894 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3895 			vmdq_rx_conf->pool_map[i].pools =
3896 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3897 		}
3898 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3899 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3900 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3901 		}
3902 
3903 		/* set DCB mode of RX and TX of multiple queues */
3904 		eth_conf->rxmode.mq_mode =
3905 				(enum rte_eth_rx_mq_mode)
3906 					(rx_mq_mode & RTE_ETH_MQ_RX_VMDQ_DCB);
3907 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_VMDQ_DCB;
3908 	} else {
3909 		struct rte_eth_dcb_rx_conf *rx_conf =
3910 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3911 		struct rte_eth_dcb_tx_conf *tx_conf =
3912 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3913 
3914 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3915 
3916 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3917 		if (rc != 0)
3918 			return rc;
3919 
3920 		rx_conf->nb_tcs = num_tcs;
3921 		tx_conf->nb_tcs = num_tcs;
3922 
3923 		for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3924 			rx_conf->dcb_tc[i] = i % num_tcs;
3925 			tx_conf->dcb_tc[i] = i % num_tcs;
3926 		}
3927 
3928 		eth_conf->rxmode.mq_mode =
3929 				(enum rte_eth_rx_mq_mode)
3930 					(rx_mq_mode & RTE_ETH_MQ_RX_DCB_RSS);
3931 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3932 		eth_conf->txmode.mq_mode = RTE_ETH_MQ_TX_DCB;
3933 	}
3934 
3935 	if (pfc_en)
3936 		eth_conf->dcb_capability_en =
3937 				RTE_ETH_DCB_PG_SUPPORT | RTE_ETH_DCB_PFC_SUPPORT;
3938 	else
3939 		eth_conf->dcb_capability_en = RTE_ETH_DCB_PG_SUPPORT;
3940 
3941 	return 0;
3942 }
3943 
3944 int
3945 init_port_dcb_config(portid_t pid,
3946 		     enum dcb_mode_enable dcb_mode,
3947 		     enum rte_eth_nb_tcs num_tcs,
3948 		     uint8_t pfc_en)
3949 {
3950 	struct rte_eth_conf port_conf;
3951 	struct rte_port *rte_port;
3952 	int retval;
3953 	uint16_t i;
3954 
3955 	if (num_procs > 1) {
3956 		printf("The multi-process feature doesn't support dcb.\n");
3957 		return -ENOTSUP;
3958 	}
3959 	rte_port = &ports[pid];
3960 
3961 	/* retain the original device configuration. */
3962 	memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf));
3963 
3964 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3965 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3966 	if (retval < 0)
3967 		return retval;
3968 	port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
3969 	/* remove RSS HASH offload for DCB in vt mode */
3970 	if (port_conf.rxmode.mq_mode == RTE_ETH_MQ_RX_VMDQ_DCB) {
3971 		port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3972 		for (i = 0; i < nb_rxq; i++)
3973 			rte_port->rx_conf[i].offloads &=
3974 				~RTE_ETH_RX_OFFLOAD_RSS_HASH;
3975 	}
3976 
3977 	/* re-configure the device . */
3978 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3979 	if (retval < 0)
3980 		return retval;
3981 
3982 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3983 	if (retval != 0)
3984 		return retval;
3985 
3986 	/* If dev_info.vmdq_pool_base is greater than 0,
3987 	 * the queue id of vmdq pools is started after pf queues.
3988 	 */
3989 	if (dcb_mode == DCB_VT_ENABLED &&
3990 	    rte_port->dev_info.vmdq_pool_base > 0) {
3991 		fprintf(stderr,
3992 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3993 			pid);
3994 		return -1;
3995 	}
3996 
3997 	/* Assume the ports in testpmd have the same dcb capability
3998 	 * and has the same number of rxq and txq in dcb mode
3999 	 */
4000 	if (dcb_mode == DCB_VT_ENABLED) {
4001 		if (rte_port->dev_info.max_vfs > 0) {
4002 			nb_rxq = rte_port->dev_info.nb_rx_queues;
4003 			nb_txq = rte_port->dev_info.nb_tx_queues;
4004 		} else {
4005 			nb_rxq = rte_port->dev_info.max_rx_queues;
4006 			nb_txq = rte_port->dev_info.max_tx_queues;
4007 		}
4008 	} else {
4009 		/*if vt is disabled, use all pf queues */
4010 		if (rte_port->dev_info.vmdq_pool_base == 0) {
4011 			nb_rxq = rte_port->dev_info.max_rx_queues;
4012 			nb_txq = rte_port->dev_info.max_tx_queues;
4013 		} else {
4014 			nb_rxq = (queueid_t)num_tcs;
4015 			nb_txq = (queueid_t)num_tcs;
4016 
4017 		}
4018 	}
4019 	rx_free_thresh = 64;
4020 
4021 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
4022 
4023 	rxtx_port_config(pid);
4024 	/* VLAN filter */
4025 	rte_port->dev_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
4026 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
4027 		rx_vft_set(pid, vlan_tags[i], 1);
4028 
4029 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
4030 	if (retval != 0)
4031 		return retval;
4032 
4033 	rte_port->dcb_flag = 1;
4034 
4035 	/* Enter DCB configuration status */
4036 	dcb_config = 1;
4037 
4038 	return 0;
4039 }
4040 
4041 static void
4042 init_port(void)
4043 {
4044 	int i;
4045 
4046 	/* Configuration of Ethernet ports. */
4047 	ports = rte_zmalloc("testpmd: ports",
4048 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
4049 			    RTE_CACHE_LINE_SIZE);
4050 	if (ports == NULL) {
4051 		rte_exit(EXIT_FAILURE,
4052 				"rte_zmalloc(%d struct rte_port) failed\n",
4053 				RTE_MAX_ETHPORTS);
4054 	}
4055 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4056 		ports[i].xstats_info.allocated = false;
4057 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
4058 		LIST_INIT(&ports[i].flow_tunnel_list);
4059 	/* Initialize ports NUMA structures */
4060 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4061 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4062 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
4063 }
4064 
4065 static void
4066 force_quit(void)
4067 {
4068 	pmd_test_exit();
4069 	prompt_exit();
4070 }
4071 
4072 static void
4073 print_stats(void)
4074 {
4075 	uint8_t i;
4076 	const char clr[] = { 27, '[', '2', 'J', '\0' };
4077 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
4078 
4079 	/* Clear screen and move to top left */
4080 	printf("%s%s", clr, top_left);
4081 
4082 	printf("\nPort statistics ====================================");
4083 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
4084 		nic_stats_display(fwd_ports_ids[i]);
4085 
4086 	fflush(stdout);
4087 }
4088 
4089 static void
4090 signal_handler(int signum)
4091 {
4092 	if (signum == SIGINT || signum == SIGTERM) {
4093 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
4094 			signum);
4095 #ifdef RTE_LIB_PDUMP
4096 		/* uninitialize packet capture framework */
4097 		rte_pdump_uninit();
4098 #endif
4099 #ifdef RTE_LIB_LATENCYSTATS
4100 		if (latencystats_enabled != 0)
4101 			rte_latencystats_uninit();
4102 #endif
4103 		force_quit();
4104 		/* Set flag to indicate the force termination. */
4105 		f_quit = 1;
4106 		/* exit with the expected status */
4107 #ifndef RTE_EXEC_ENV_WINDOWS
4108 		signal(signum, SIG_DFL);
4109 		kill(getpid(), signum);
4110 #endif
4111 	}
4112 }
4113 
4114 int
4115 main(int argc, char** argv)
4116 {
4117 	int diag;
4118 	portid_t port_id;
4119 	uint16_t count;
4120 	int ret;
4121 
4122 	signal(SIGINT, signal_handler);
4123 	signal(SIGTERM, signal_handler);
4124 
4125 	testpmd_logtype = rte_log_register("testpmd");
4126 	if (testpmd_logtype < 0)
4127 		rte_exit(EXIT_FAILURE, "Cannot register log type");
4128 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
4129 
4130 	diag = rte_eal_init(argc, argv);
4131 	if (diag < 0)
4132 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
4133 			 rte_strerror(rte_errno));
4134 
4135 	ret = register_eth_event_callback();
4136 	if (ret != 0)
4137 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
4138 
4139 #ifdef RTE_LIB_PDUMP
4140 	/* initialize packet capture framework */
4141 	rte_pdump_init();
4142 #endif
4143 
4144 	count = 0;
4145 	RTE_ETH_FOREACH_DEV(port_id) {
4146 		ports_ids[count] = port_id;
4147 		count++;
4148 	}
4149 	nb_ports = (portid_t) count;
4150 	if (nb_ports == 0)
4151 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
4152 
4153 	/* allocate port structures, and init them */
4154 	init_port();
4155 
4156 	set_def_fwd_config();
4157 	if (nb_lcores == 0)
4158 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
4159 			 "Check the core mask argument\n");
4160 
4161 	/* Bitrate/latency stats disabled by default */
4162 #ifdef RTE_LIB_BITRATESTATS
4163 	bitrate_enabled = 0;
4164 #endif
4165 #ifdef RTE_LIB_LATENCYSTATS
4166 	latencystats_enabled = 0;
4167 #endif
4168 
4169 	/* on FreeBSD, mlockall() is disabled by default */
4170 #ifdef RTE_EXEC_ENV_FREEBSD
4171 	do_mlockall = 0;
4172 #else
4173 	do_mlockall = 1;
4174 #endif
4175 
4176 	argc -= diag;
4177 	argv += diag;
4178 	if (argc > 1)
4179 		launch_args_parse(argc, argv);
4180 
4181 #ifndef RTE_EXEC_ENV_WINDOWS
4182 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
4183 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
4184 			strerror(errno));
4185 	}
4186 #endif
4187 
4188 	if (tx_first && interactive)
4189 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
4190 				"interactive mode.\n");
4191 
4192 	if (tx_first && lsc_interrupt) {
4193 		fprintf(stderr,
4194 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
4195 		lsc_interrupt = 0;
4196 	}
4197 
4198 	if (!nb_rxq && !nb_txq)
4199 		fprintf(stderr,
4200 			"Warning: Either rx or tx queues should be non-zero\n");
4201 
4202 	if (nb_rxq > 1 && nb_rxq > nb_txq)
4203 		fprintf(stderr,
4204 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
4205 			nb_rxq, nb_txq);
4206 
4207 	init_config();
4208 
4209 	if (hot_plug) {
4210 		ret = rte_dev_hotplug_handle_enable();
4211 		if (ret) {
4212 			RTE_LOG(ERR, EAL,
4213 				"fail to enable hotplug handling.");
4214 			return -1;
4215 		}
4216 
4217 		ret = rte_dev_event_monitor_start();
4218 		if (ret) {
4219 			RTE_LOG(ERR, EAL,
4220 				"fail to start device event monitoring.");
4221 			return -1;
4222 		}
4223 
4224 		ret = rte_dev_event_callback_register(NULL,
4225 			dev_event_callback, NULL);
4226 		if (ret) {
4227 			RTE_LOG(ERR, EAL,
4228 				"fail  to register device event callback\n");
4229 			return -1;
4230 		}
4231 	}
4232 
4233 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
4234 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
4235 
4236 	/* set all ports to promiscuous mode by default */
4237 	RTE_ETH_FOREACH_DEV(port_id) {
4238 		ret = rte_eth_promiscuous_enable(port_id);
4239 		if (ret != 0)
4240 			fprintf(stderr,
4241 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
4242 				port_id, rte_strerror(-ret));
4243 	}
4244 
4245 	/* Init metrics library */
4246 	rte_metrics_init(rte_socket_id());
4247 
4248 #ifdef RTE_LIB_LATENCYSTATS
4249 	if (latencystats_enabled != 0) {
4250 		int ret = rte_latencystats_init(1, NULL);
4251 		if (ret)
4252 			fprintf(stderr,
4253 				"Warning: latencystats init() returned error %d\n",
4254 				ret);
4255 		fprintf(stderr, "Latencystats running on lcore %d\n",
4256 			latencystats_lcore_id);
4257 	}
4258 #endif
4259 
4260 	/* Setup bitrate stats */
4261 #ifdef RTE_LIB_BITRATESTATS
4262 	if (bitrate_enabled != 0) {
4263 		bitrate_data = rte_stats_bitrate_create();
4264 		if (bitrate_data == NULL)
4265 			rte_exit(EXIT_FAILURE,
4266 				"Could not allocate bitrate data.\n");
4267 		rte_stats_bitrate_reg(bitrate_data);
4268 	}
4269 #endif
4270 #ifdef RTE_LIB_CMDLINE
4271 	if (strlen(cmdline_filename) != 0)
4272 		cmdline_read_from_file(cmdline_filename);
4273 
4274 	if (interactive == 1) {
4275 		if (auto_start) {
4276 			printf("Start automatic packet forwarding\n");
4277 			start_packet_forwarding(0);
4278 		}
4279 		prompt();
4280 		pmd_test_exit();
4281 	} else
4282 #endif
4283 	{
4284 		char c;
4285 		int rc;
4286 
4287 		f_quit = 0;
4288 
4289 		printf("No commandline core given, start packet forwarding\n");
4290 		start_packet_forwarding(tx_first);
4291 		if (stats_period != 0) {
4292 			uint64_t prev_time = 0, cur_time, diff_time = 0;
4293 			uint64_t timer_period;
4294 
4295 			/* Convert to number of cycles */
4296 			timer_period = stats_period * rte_get_timer_hz();
4297 
4298 			while (f_quit == 0) {
4299 				cur_time = rte_get_timer_cycles();
4300 				diff_time += cur_time - prev_time;
4301 
4302 				if (diff_time >= timer_period) {
4303 					print_stats();
4304 					/* Reset the timer */
4305 					diff_time = 0;
4306 				}
4307 				/* Sleep to avoid unnecessary checks */
4308 				prev_time = cur_time;
4309 				rte_delay_us_sleep(US_PER_S);
4310 			}
4311 		}
4312 
4313 		printf("Press enter to exit\n");
4314 		rc = read(0, &c, 1);
4315 		pmd_test_exit();
4316 		if (rc < 0)
4317 			return 1;
4318 	}
4319 
4320 	ret = rte_eal_cleanup();
4321 	if (ret != 0)
4322 		rte_exit(EXIT_FAILURE,
4323 			 "EAL cleanup failed: %s\n", strerror(-ret));
4324 
4325 	return EXIT_SUCCESS;
4326 }
4327