xref: /dpdk/app/test-pmd/testpmd.c (revision 3bb3ebb51b789d4ecb417cbdb1dce5c7211f6f18)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <errno.h>
15 #include <stdbool.h>
16 
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19 
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <inttypes.h>
23 
24 #include <rte_common.h>
25 #include <rte_errno.h>
26 #include <rte_byteorder.h>
27 #include <rte_log.h>
28 #include <rte_debug.h>
29 #include <rte_cycles.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_launch.h>
33 #include <rte_eal.h>
34 #include <rte_alarm.h>
35 #include <rte_per_lcore.h>
36 #include <rte_lcore.h>
37 #include <rte_atomic.h>
38 #include <rte_branch_prediction.h>
39 #include <rte_mempool.h>
40 #include <rte_malloc.h>
41 #include <rte_mbuf.h>
42 #include <rte_mbuf_pool_ops.h>
43 #include <rte_interrupts.h>
44 #include <rte_pci.h>
45 #include <rte_ether.h>
46 #include <rte_ethdev.h>
47 #include <rte_dev.h>
48 #include <rte_string_fns.h>
49 #ifdef RTE_NET_IXGBE
50 #include <rte_pmd_ixgbe.h>
51 #endif
52 #ifdef RTE_LIB_PDUMP
53 #include <rte_pdump.h>
54 #endif
55 #include <rte_flow.h>
56 #include <rte_metrics.h>
57 #ifdef RTE_LIB_BITRATESTATS
58 #include <rte_bitrate.h>
59 #endif
60 #ifdef RTE_LIB_LATENCYSTATS
61 #include <rte_latencystats.h>
62 #endif
63 
64 #include "testpmd.h"
65 
66 #ifndef MAP_HUGETLB
67 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
68 #define HUGE_FLAG (0x40000)
69 #else
70 #define HUGE_FLAG MAP_HUGETLB
71 #endif
72 
73 #ifndef MAP_HUGE_SHIFT
74 /* older kernels (or FreeBSD) will not have this define */
75 #define HUGE_SHIFT (26)
76 #else
77 #define HUGE_SHIFT MAP_HUGE_SHIFT
78 #endif
79 
80 #define EXTMEM_HEAP_NAME "extmem"
81 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
82 
83 uint16_t verbose_level = 0; /**< Silent by default. */
84 int testpmd_logtype; /**< Log type for testpmd logs */
85 
86 /* use main core for command line ? */
87 uint8_t interactive = 0;
88 uint8_t auto_start = 0;
89 uint8_t tx_first;
90 char cmdline_filename[PATH_MAX] = {0};
91 
92 /*
93  * NUMA support configuration.
94  * When set, the NUMA support attempts to dispatch the allocation of the
95  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
96  * probed ports among the CPU sockets 0 and 1.
97  * Otherwise, all memory is allocated from CPU socket 0.
98  */
99 uint8_t numa_support = 1; /**< numa enabled by default */
100 
101 /*
102  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
103  * not configured.
104  */
105 uint8_t socket_num = UMA_NO_CONFIG;
106 
107 /*
108  * Select mempool allocation type:
109  * - native: use regular DPDK memory
110  * - anon: use regular DPDK memory to create mempool, but populate using
111  *         anonymous memory (may not be IOVA-contiguous)
112  * - xmem: use externally allocated hugepage memory
113  */
114 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
115 
116 /*
117  * Store specified sockets on which memory pool to be used by ports
118  * is allocated.
119  */
120 uint8_t port_numa[RTE_MAX_ETHPORTS];
121 
122 /*
123  * Store specified sockets on which RX ring to be used by ports
124  * is allocated.
125  */
126 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
127 
128 /*
129  * Store specified sockets on which TX ring to be used by ports
130  * is allocated.
131  */
132 uint8_t txring_numa[RTE_MAX_ETHPORTS];
133 
134 /*
135  * Record the Ethernet address of peer target ports to which packets are
136  * forwarded.
137  * Must be instantiated with the ethernet addresses of peer traffic generator
138  * ports.
139  */
140 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
141 portid_t nb_peer_eth_addrs = 0;
142 
143 /*
144  * Probed Target Environment.
145  */
146 struct rte_port *ports;	       /**< For all probed ethernet ports. */
147 portid_t nb_ports;             /**< Number of probed ethernet ports. */
148 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
149 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
150 
151 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
152 
153 /*
154  * Test Forwarding Configuration.
155  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
156  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
157  */
158 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
159 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
160 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
161 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
162 
163 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
164 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
165 
166 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
167 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
168 
169 /*
170  * Forwarding engines.
171  */
172 struct fwd_engine * fwd_engines[] = {
173 	&io_fwd_engine,
174 	&mac_fwd_engine,
175 	&mac_swap_engine,
176 	&flow_gen_engine,
177 	&rx_only_engine,
178 	&tx_only_engine,
179 	&csum_fwd_engine,
180 	&icmp_echo_engine,
181 	&noisy_vnf_engine,
182 	&five_tuple_swap_fwd_engine,
183 #ifdef RTE_LIBRTE_IEEE1588
184 	&ieee1588_fwd_engine,
185 #endif
186 	NULL,
187 };
188 
189 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
190 uint16_t mempool_flags;
191 
192 struct fwd_config cur_fwd_config;
193 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
194 uint32_t retry_enabled;
195 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
196 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
197 
198 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
199 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
200 	DEFAULT_MBUF_DATA_SIZE
201 }; /**< Mbuf data space size. */
202 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
203                                       * specified on command-line. */
204 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
205 
206 /*
207  * In container, it cannot terminate the process which running with 'stats-period'
208  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
209  */
210 uint8_t f_quit;
211 
212 /*
213  * Configuration of packet segments used to scatter received packets
214  * if some of split features is configured.
215  */
216 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
217 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
218 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
219 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
220 
221 /*
222  * Configuration of packet segments used by the "txonly" processing engine.
223  */
224 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
225 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
226 	TXONLY_DEF_PACKET_LEN,
227 };
228 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
229 
230 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
231 /**< Split policy for packets to TX. */
232 
233 uint8_t txonly_multi_flow;
234 /**< Whether multiple flows are generated in TXONLY mode. */
235 
236 uint32_t tx_pkt_times_inter;
237 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
238 
239 uint32_t tx_pkt_times_intra;
240 /**< Timings for send scheduling in TXONLY mode, time between packets. */
241 
242 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
243 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
244 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
245 
246 /* current configuration is in DCB or not,0 means it is not in DCB mode */
247 uint8_t dcb_config = 0;
248 
249 /* Whether the dcb is in testing status */
250 uint8_t dcb_test = 0;
251 
252 /*
253  * Configurable number of RX/TX queues.
254  */
255 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
256 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
257 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
258 
259 /*
260  * Configurable number of RX/TX ring descriptors.
261  * Defaults are supplied by drivers via ethdev.
262  */
263 #define RTE_TEST_RX_DESC_DEFAULT 0
264 #define RTE_TEST_TX_DESC_DEFAULT 0
265 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
266 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
267 
268 #define RTE_PMD_PARAM_UNSET -1
269 /*
270  * Configurable values of RX and TX ring threshold registers.
271  */
272 
273 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
274 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
275 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
276 
277 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
278 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
279 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
280 
281 /*
282  * Configurable value of RX free threshold.
283  */
284 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
285 
286 /*
287  * Configurable value of RX drop enable.
288  */
289 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
290 
291 /*
292  * Configurable value of TX free threshold.
293  */
294 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
295 
296 /*
297  * Configurable value of TX RS bit threshold.
298  */
299 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
300 
301 /*
302  * Configurable value of buffered packets before sending.
303  */
304 uint16_t noisy_tx_sw_bufsz;
305 
306 /*
307  * Configurable value of packet buffer timeout.
308  */
309 uint16_t noisy_tx_sw_buf_flush_time;
310 
311 /*
312  * Configurable value for size of VNF internal memory area
313  * used for simulating noisy neighbour behaviour
314  */
315 uint64_t noisy_lkup_mem_sz;
316 
317 /*
318  * Configurable value of number of random writes done in
319  * VNF simulation memory area.
320  */
321 uint64_t noisy_lkup_num_writes;
322 
323 /*
324  * Configurable value of number of random reads done in
325  * VNF simulation memory area.
326  */
327 uint64_t noisy_lkup_num_reads;
328 
329 /*
330  * Configurable value of number of random reads/writes done in
331  * VNF simulation memory area.
332  */
333 uint64_t noisy_lkup_num_reads_writes;
334 
335 /*
336  * Receive Side Scaling (RSS) configuration.
337  */
338 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
339 
340 /*
341  * Port topology configuration
342  */
343 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
344 
345 /*
346  * Avoids to flush all the RX streams before starts forwarding.
347  */
348 uint8_t no_flush_rx = 0; /* flush by default */
349 
350 /*
351  * Flow API isolated mode.
352  */
353 uint8_t flow_isolate_all;
354 
355 /*
356  * Avoids to check link status when starting/stopping a port.
357  */
358 uint8_t no_link_check = 0; /* check by default */
359 
360 /*
361  * Don't automatically start all ports in interactive mode.
362  */
363 uint8_t no_device_start = 0;
364 
365 /*
366  * Enable link status change notification
367  */
368 uint8_t lsc_interrupt = 1; /* enabled by default */
369 
370 /*
371  * Enable device removal notification.
372  */
373 uint8_t rmv_interrupt = 1; /* enabled by default */
374 
375 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
376 
377 /* After attach, port setup is called on event or by iterator */
378 bool setup_on_probe_event = true;
379 
380 /* Clear ptypes on port initialization. */
381 uint8_t clear_ptypes = true;
382 
383 /* Hairpin ports configuration mode. */
384 uint16_t hairpin_mode;
385 
386 /* Pretty printing of ethdev events */
387 static const char * const eth_event_desc[] = {
388 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
389 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
390 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
391 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
392 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
393 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
394 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
395 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
396 	[RTE_ETH_EVENT_NEW] = "device probed",
397 	[RTE_ETH_EVENT_DESTROY] = "device released",
398 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
399 	[RTE_ETH_EVENT_MAX] = NULL,
400 };
401 
402 /*
403  * Display or mask ether events
404  * Default to all events except VF_MBOX
405  */
406 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
407 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
408 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
409 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
410 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
411 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
412 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
413 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
414 /*
415  * Decide if all memory are locked for performance.
416  */
417 int do_mlockall = 0;
418 
419 /*
420  * NIC bypass mode configuration options.
421  */
422 
423 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
424 /* The NIC bypass watchdog timeout. */
425 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
426 #endif
427 
428 
429 #ifdef RTE_LIB_LATENCYSTATS
430 
431 /*
432  * Set when latency stats is enabled in the commandline
433  */
434 uint8_t latencystats_enabled;
435 
436 /*
437  * Lcore ID to serive latency statistics.
438  */
439 lcoreid_t latencystats_lcore_id = -1;
440 
441 #endif
442 
443 /*
444  * Ethernet device configuration.
445  */
446 struct rte_eth_rxmode rx_mode = {
447 	/* Default maximum frame length.
448 	 * Zero is converted to "RTE_ETHER_MTU + PMD Ethernet overhead"
449 	 * in init_config().
450 	 */
451 	.max_rx_pkt_len = 0,
452 };
453 
454 struct rte_eth_txmode tx_mode = {
455 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
456 };
457 
458 struct rte_fdir_conf fdir_conf = {
459 	.mode = RTE_FDIR_MODE_NONE,
460 	.pballoc = RTE_FDIR_PBALLOC_64K,
461 	.status = RTE_FDIR_REPORT_STATUS,
462 	.mask = {
463 		.vlan_tci_mask = 0xFFEF,
464 		.ipv4_mask     = {
465 			.src_ip = 0xFFFFFFFF,
466 			.dst_ip = 0xFFFFFFFF,
467 		},
468 		.ipv6_mask     = {
469 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
470 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
471 		},
472 		.src_port_mask = 0xFFFF,
473 		.dst_port_mask = 0xFFFF,
474 		.mac_addr_byte_mask = 0xFF,
475 		.tunnel_type_mask = 1,
476 		.tunnel_id_mask = 0xFFFFFFFF,
477 	},
478 	.drop_queue = 127,
479 };
480 
481 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
482 
483 /*
484  * Display zero values by default for xstats
485  */
486 uint8_t xstats_hide_zero;
487 
488 /*
489  * Measure of CPU cycles disabled by default
490  */
491 uint8_t record_core_cycles;
492 
493 /*
494  * Display of RX and TX bursts disabled by default
495  */
496 uint8_t record_burst_stats;
497 
498 unsigned int num_sockets = 0;
499 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
500 
501 #ifdef RTE_LIB_BITRATESTATS
502 /* Bitrate statistics */
503 struct rte_stats_bitrates *bitrate_data;
504 lcoreid_t bitrate_lcore_id;
505 uint8_t bitrate_enabled;
506 #endif
507 
508 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
509 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
510 
511 /*
512  * hexadecimal bitmask of RX mq mode can be enabled.
513  */
514 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
515 
516 /*
517  * Used to set forced link speed
518  */
519 uint32_t eth_link_speed;
520 
521 /* Forward function declarations */
522 static void setup_attached_port(portid_t pi);
523 static void check_all_ports_link_status(uint32_t port_mask);
524 static int eth_event_callback(portid_t port_id,
525 			      enum rte_eth_event_type type,
526 			      void *param, void *ret_param);
527 static void dev_event_callback(const char *device_name,
528 				enum rte_dev_event_type type,
529 				void *param);
530 
531 /*
532  * Check if all the ports are started.
533  * If yes, return positive value. If not, return zero.
534  */
535 static int all_ports_started(void);
536 
537 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
538 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
539 
540 /* Holds the registered mbuf dynamic flags names. */
541 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
542 
543 /*
544  * Helper function to check if socket is already discovered.
545  * If yes, return positive value. If not, return zero.
546  */
547 int
548 new_socket_id(unsigned int socket_id)
549 {
550 	unsigned int i;
551 
552 	for (i = 0; i < num_sockets; i++) {
553 		if (socket_ids[i] == socket_id)
554 			return 0;
555 	}
556 	return 1;
557 }
558 
559 /*
560  * Setup default configuration.
561  */
562 static void
563 set_default_fwd_lcores_config(void)
564 {
565 	unsigned int i;
566 	unsigned int nb_lc;
567 	unsigned int sock_num;
568 
569 	nb_lc = 0;
570 	for (i = 0; i < RTE_MAX_LCORE; i++) {
571 		if (!rte_lcore_is_enabled(i))
572 			continue;
573 		sock_num = rte_lcore_to_socket_id(i);
574 		if (new_socket_id(sock_num)) {
575 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
576 				rte_exit(EXIT_FAILURE,
577 					 "Total sockets greater than %u\n",
578 					 RTE_MAX_NUMA_NODES);
579 			}
580 			socket_ids[num_sockets++] = sock_num;
581 		}
582 		if (i == rte_get_main_lcore())
583 			continue;
584 		fwd_lcores_cpuids[nb_lc++] = i;
585 	}
586 	nb_lcores = (lcoreid_t) nb_lc;
587 	nb_cfg_lcores = nb_lcores;
588 	nb_fwd_lcores = 1;
589 }
590 
591 static void
592 set_def_peer_eth_addrs(void)
593 {
594 	portid_t i;
595 
596 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
597 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
598 		peer_eth_addrs[i].addr_bytes[5] = i;
599 	}
600 }
601 
602 static void
603 set_default_fwd_ports_config(void)
604 {
605 	portid_t pt_id;
606 	int i = 0;
607 
608 	RTE_ETH_FOREACH_DEV(pt_id) {
609 		fwd_ports_ids[i++] = pt_id;
610 
611 		/* Update sockets info according to the attached device */
612 		int socket_id = rte_eth_dev_socket_id(pt_id);
613 		if (socket_id >= 0 && new_socket_id(socket_id)) {
614 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
615 				rte_exit(EXIT_FAILURE,
616 					 "Total sockets greater than %u\n",
617 					 RTE_MAX_NUMA_NODES);
618 			}
619 			socket_ids[num_sockets++] = socket_id;
620 		}
621 	}
622 
623 	nb_cfg_ports = nb_ports;
624 	nb_fwd_ports = nb_ports;
625 }
626 
627 void
628 set_def_fwd_config(void)
629 {
630 	set_default_fwd_lcores_config();
631 	set_def_peer_eth_addrs();
632 	set_default_fwd_ports_config();
633 }
634 
635 /* extremely pessimistic estimation of memory required to create a mempool */
636 static int
637 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
638 {
639 	unsigned int n_pages, mbuf_per_pg, leftover;
640 	uint64_t total_mem, mbuf_mem, obj_sz;
641 
642 	/* there is no good way to predict how much space the mempool will
643 	 * occupy because it will allocate chunks on the fly, and some of those
644 	 * will come from default DPDK memory while some will come from our
645 	 * external memory, so just assume 128MB will be enough for everyone.
646 	 */
647 	uint64_t hdr_mem = 128 << 20;
648 
649 	/* account for possible non-contiguousness */
650 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
651 	if (obj_sz > pgsz) {
652 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
653 		return -1;
654 	}
655 
656 	mbuf_per_pg = pgsz / obj_sz;
657 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
658 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
659 
660 	mbuf_mem = n_pages * pgsz;
661 
662 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
663 
664 	if (total_mem > SIZE_MAX) {
665 		TESTPMD_LOG(ERR, "Memory size too big\n");
666 		return -1;
667 	}
668 	*out = (size_t)total_mem;
669 
670 	return 0;
671 }
672 
673 static int
674 pagesz_flags(uint64_t page_sz)
675 {
676 	/* as per mmap() manpage, all page sizes are log2 of page size
677 	 * shifted by MAP_HUGE_SHIFT
678 	 */
679 	int log2 = rte_log2_u64(page_sz);
680 
681 	return (log2 << HUGE_SHIFT);
682 }
683 
684 static void *
685 alloc_mem(size_t memsz, size_t pgsz, bool huge)
686 {
687 	void *addr;
688 	int flags;
689 
690 	/* allocate anonymous hugepages */
691 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
692 	if (huge)
693 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
694 
695 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
696 	if (addr == MAP_FAILED)
697 		return NULL;
698 
699 	return addr;
700 }
701 
702 struct extmem_param {
703 	void *addr;
704 	size_t len;
705 	size_t pgsz;
706 	rte_iova_t *iova_table;
707 	unsigned int iova_table_len;
708 };
709 
710 static int
711 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
712 		bool huge)
713 {
714 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
715 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
716 	unsigned int cur_page, n_pages, pgsz_idx;
717 	size_t mem_sz, cur_pgsz;
718 	rte_iova_t *iovas = NULL;
719 	void *addr;
720 	int ret;
721 
722 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
723 		/* skip anything that is too big */
724 		if (pgsizes[pgsz_idx] > SIZE_MAX)
725 			continue;
726 
727 		cur_pgsz = pgsizes[pgsz_idx];
728 
729 		/* if we were told not to allocate hugepages, override */
730 		if (!huge)
731 			cur_pgsz = sysconf(_SC_PAGESIZE);
732 
733 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
734 		if (ret < 0) {
735 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
736 			return -1;
737 		}
738 
739 		/* allocate our memory */
740 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
741 
742 		/* if we couldn't allocate memory with a specified page size,
743 		 * that doesn't mean we can't do it with other page sizes, so
744 		 * try another one.
745 		 */
746 		if (addr == NULL)
747 			continue;
748 
749 		/* store IOVA addresses for every page in this memory area */
750 		n_pages = mem_sz / cur_pgsz;
751 
752 		iovas = malloc(sizeof(*iovas) * n_pages);
753 
754 		if (iovas == NULL) {
755 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
756 			goto fail;
757 		}
758 		/* lock memory if it's not huge pages */
759 		if (!huge)
760 			mlock(addr, mem_sz);
761 
762 		/* populate IOVA addresses */
763 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
764 			rte_iova_t iova;
765 			size_t offset;
766 			void *cur;
767 
768 			offset = cur_pgsz * cur_page;
769 			cur = RTE_PTR_ADD(addr, offset);
770 
771 			/* touch the page before getting its IOVA */
772 			*(volatile char *)cur = 0;
773 
774 			iova = rte_mem_virt2iova(cur);
775 
776 			iovas[cur_page] = iova;
777 		}
778 
779 		break;
780 	}
781 	/* if we couldn't allocate anything */
782 	if (iovas == NULL)
783 		return -1;
784 
785 	param->addr = addr;
786 	param->len = mem_sz;
787 	param->pgsz = cur_pgsz;
788 	param->iova_table = iovas;
789 	param->iova_table_len = n_pages;
790 
791 	return 0;
792 fail:
793 	if (iovas)
794 		free(iovas);
795 	if (addr)
796 		munmap(addr, mem_sz);
797 
798 	return -1;
799 }
800 
801 static int
802 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
803 {
804 	struct extmem_param param;
805 	int socket_id, ret;
806 
807 	memset(&param, 0, sizeof(param));
808 
809 	/* check if our heap exists */
810 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
811 	if (socket_id < 0) {
812 		/* create our heap */
813 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
814 		if (ret < 0) {
815 			TESTPMD_LOG(ERR, "Cannot create heap\n");
816 			return -1;
817 		}
818 	}
819 
820 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
821 	if (ret < 0) {
822 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
823 		return -1;
824 	}
825 
826 	/* we now have a valid memory area, so add it to heap */
827 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
828 			param.addr, param.len, param.iova_table,
829 			param.iova_table_len, param.pgsz);
830 
831 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
832 
833 	/* not needed any more */
834 	free(param.iova_table);
835 
836 	if (ret < 0) {
837 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
838 		munmap(param.addr, param.len);
839 		return -1;
840 	}
841 
842 	/* success */
843 
844 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
845 			param.len >> 20);
846 
847 	return 0;
848 }
849 static void
850 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
851 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
852 {
853 	uint16_t pid = 0;
854 	int ret;
855 
856 	RTE_ETH_FOREACH_DEV(pid) {
857 		struct rte_eth_dev *dev =
858 			&rte_eth_devices[pid];
859 
860 		ret = rte_dev_dma_unmap(dev->device, memhdr->addr, 0,
861 					memhdr->len);
862 		if (ret) {
863 			TESTPMD_LOG(DEBUG,
864 				    "unable to DMA unmap addr 0x%p "
865 				    "for device %s\n",
866 				    memhdr->addr, dev->data->name);
867 		}
868 	}
869 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
870 	if (ret) {
871 		TESTPMD_LOG(DEBUG,
872 			    "unable to un-register addr 0x%p\n", memhdr->addr);
873 	}
874 }
875 
876 static void
877 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
878 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
879 {
880 	uint16_t pid = 0;
881 	size_t page_size = sysconf(_SC_PAGESIZE);
882 	int ret;
883 
884 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
885 				  page_size);
886 	if (ret) {
887 		TESTPMD_LOG(DEBUG,
888 			    "unable to register addr 0x%p\n", memhdr->addr);
889 		return;
890 	}
891 	RTE_ETH_FOREACH_DEV(pid) {
892 		struct rte_eth_dev *dev =
893 			&rte_eth_devices[pid];
894 
895 		ret = rte_dev_dma_map(dev->device, memhdr->addr, 0,
896 				      memhdr->len);
897 		if (ret) {
898 			TESTPMD_LOG(DEBUG,
899 				    "unable to DMA map addr 0x%p "
900 				    "for device %s\n",
901 				    memhdr->addr, dev->data->name);
902 		}
903 	}
904 }
905 
906 static unsigned int
907 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
908 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
909 {
910 	struct rte_pktmbuf_extmem *xmem;
911 	unsigned int ext_num, zone_num, elt_num;
912 	uint16_t elt_size;
913 
914 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
915 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
916 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
917 
918 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
919 	if (xmem == NULL) {
920 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
921 				 "external buffer descriptors\n");
922 		*ext_mem = NULL;
923 		return 0;
924 	}
925 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
926 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
927 		const struct rte_memzone *mz;
928 		char mz_name[RTE_MEMZONE_NAMESIZE];
929 		int ret;
930 
931 		ret = snprintf(mz_name, sizeof(mz_name),
932 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
933 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
934 			errno = ENAMETOOLONG;
935 			ext_num = 0;
936 			break;
937 		}
938 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
939 						 socket_id,
940 						 RTE_MEMZONE_IOVA_CONTIG |
941 						 RTE_MEMZONE_1GB |
942 						 RTE_MEMZONE_SIZE_HINT_ONLY,
943 						 EXTBUF_ZONE_SIZE);
944 		if (mz == NULL) {
945 			/*
946 			 * The caller exits on external buffer creation
947 			 * error, so there is no need to free memzones.
948 			 */
949 			errno = ENOMEM;
950 			ext_num = 0;
951 			break;
952 		}
953 		xseg->buf_ptr = mz->addr;
954 		xseg->buf_iova = mz->iova;
955 		xseg->buf_len = EXTBUF_ZONE_SIZE;
956 		xseg->elt_size = elt_size;
957 	}
958 	if (ext_num == 0 && xmem != NULL) {
959 		free(xmem);
960 		xmem = NULL;
961 	}
962 	*ext_mem = xmem;
963 	return ext_num;
964 }
965 
966 /*
967  * Configuration initialisation done once at init time.
968  */
969 static struct rte_mempool *
970 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
971 		 unsigned int socket_id, uint16_t size_idx)
972 {
973 	char pool_name[RTE_MEMPOOL_NAMESIZE];
974 	struct rte_mempool *rte_mp = NULL;
975 	uint32_t mb_size;
976 
977 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
978 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
979 
980 	TESTPMD_LOG(INFO,
981 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
982 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
983 
984 	switch (mp_alloc_type) {
985 	case MP_ALLOC_NATIVE:
986 		{
987 			/* wrapper to rte_mempool_create() */
988 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
989 					rte_mbuf_best_mempool_ops());
990 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
991 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
992 			break;
993 		}
994 	case MP_ALLOC_ANON:
995 		{
996 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
997 				mb_size, (unsigned int) mb_mempool_cache,
998 				sizeof(struct rte_pktmbuf_pool_private),
999 				socket_id, mempool_flags);
1000 			if (rte_mp == NULL)
1001 				goto err;
1002 
1003 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1004 				rte_mempool_free(rte_mp);
1005 				rte_mp = NULL;
1006 				goto err;
1007 			}
1008 			rte_pktmbuf_pool_init(rte_mp, NULL);
1009 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1010 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1011 			break;
1012 		}
1013 	case MP_ALLOC_XMEM:
1014 	case MP_ALLOC_XMEM_HUGE:
1015 		{
1016 			int heap_socket;
1017 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1018 
1019 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1020 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1021 
1022 			heap_socket =
1023 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1024 			if (heap_socket < 0)
1025 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1026 
1027 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1028 					rte_mbuf_best_mempool_ops());
1029 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1030 					mb_mempool_cache, 0, mbuf_seg_size,
1031 					heap_socket);
1032 			break;
1033 		}
1034 	case MP_ALLOC_XBUF:
1035 		{
1036 			struct rte_pktmbuf_extmem *ext_mem;
1037 			unsigned int ext_num;
1038 
1039 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1040 					       socket_id, pool_name, &ext_mem);
1041 			if (ext_num == 0)
1042 				rte_exit(EXIT_FAILURE,
1043 					 "Can't create pinned data buffers\n");
1044 
1045 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1046 					rte_mbuf_best_mempool_ops());
1047 			rte_mp = rte_pktmbuf_pool_create_extbuf
1048 					(pool_name, nb_mbuf, mb_mempool_cache,
1049 					 0, mbuf_seg_size, socket_id,
1050 					 ext_mem, ext_num);
1051 			free(ext_mem);
1052 			break;
1053 		}
1054 	default:
1055 		{
1056 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1057 		}
1058 	}
1059 
1060 err:
1061 	if (rte_mp == NULL) {
1062 		rte_exit(EXIT_FAILURE,
1063 			"Creation of mbuf pool for socket %u failed: %s\n",
1064 			socket_id, rte_strerror(rte_errno));
1065 	} else if (verbose_level > 0) {
1066 		rte_mempool_dump(stdout, rte_mp);
1067 	}
1068 	return rte_mp;
1069 }
1070 
1071 /*
1072  * Check given socket id is valid or not with NUMA mode,
1073  * if valid, return 0, else return -1
1074  */
1075 static int
1076 check_socket_id(const unsigned int socket_id)
1077 {
1078 	static int warning_once = 0;
1079 
1080 	if (new_socket_id(socket_id)) {
1081 		if (!warning_once && numa_support)
1082 			printf("Warning: NUMA should be configured manually by"
1083 			       " using --port-numa-config and"
1084 			       " --ring-numa-config parameters along with"
1085 			       " --numa.\n");
1086 		warning_once = 1;
1087 		return -1;
1088 	}
1089 	return 0;
1090 }
1091 
1092 /*
1093  * Get the allowed maximum number of RX queues.
1094  * *pid return the port id which has minimal value of
1095  * max_rx_queues in all ports.
1096  */
1097 queueid_t
1098 get_allowed_max_nb_rxq(portid_t *pid)
1099 {
1100 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1101 	bool max_rxq_valid = false;
1102 	portid_t pi;
1103 	struct rte_eth_dev_info dev_info;
1104 
1105 	RTE_ETH_FOREACH_DEV(pi) {
1106 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1107 			continue;
1108 
1109 		max_rxq_valid = true;
1110 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1111 			allowed_max_rxq = dev_info.max_rx_queues;
1112 			*pid = pi;
1113 		}
1114 	}
1115 	return max_rxq_valid ? allowed_max_rxq : 0;
1116 }
1117 
1118 /*
1119  * Check input rxq is valid or not.
1120  * If input rxq is not greater than any of maximum number
1121  * of RX queues of all ports, it is valid.
1122  * if valid, return 0, else return -1
1123  */
1124 int
1125 check_nb_rxq(queueid_t rxq)
1126 {
1127 	queueid_t allowed_max_rxq;
1128 	portid_t pid = 0;
1129 
1130 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1131 	if (rxq > allowed_max_rxq) {
1132 		printf("Fail: input rxq (%u) can't be greater "
1133 		       "than max_rx_queues (%u) of port %u\n",
1134 		       rxq,
1135 		       allowed_max_rxq,
1136 		       pid);
1137 		return -1;
1138 	}
1139 	return 0;
1140 }
1141 
1142 /*
1143  * Get the allowed maximum number of TX queues.
1144  * *pid return the port id which has minimal value of
1145  * max_tx_queues in all ports.
1146  */
1147 queueid_t
1148 get_allowed_max_nb_txq(portid_t *pid)
1149 {
1150 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1151 	bool max_txq_valid = false;
1152 	portid_t pi;
1153 	struct rte_eth_dev_info dev_info;
1154 
1155 	RTE_ETH_FOREACH_DEV(pi) {
1156 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1157 			continue;
1158 
1159 		max_txq_valid = true;
1160 		if (dev_info.max_tx_queues < allowed_max_txq) {
1161 			allowed_max_txq = dev_info.max_tx_queues;
1162 			*pid = pi;
1163 		}
1164 	}
1165 	return max_txq_valid ? allowed_max_txq : 0;
1166 }
1167 
1168 /*
1169  * Check input txq is valid or not.
1170  * If input txq is not greater than any of maximum number
1171  * of TX queues of all ports, it is valid.
1172  * if valid, return 0, else return -1
1173  */
1174 int
1175 check_nb_txq(queueid_t txq)
1176 {
1177 	queueid_t allowed_max_txq;
1178 	portid_t pid = 0;
1179 
1180 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1181 	if (txq > allowed_max_txq) {
1182 		printf("Fail: input txq (%u) can't be greater "
1183 		       "than max_tx_queues (%u) of port %u\n",
1184 		       txq,
1185 		       allowed_max_txq,
1186 		       pid);
1187 		return -1;
1188 	}
1189 	return 0;
1190 }
1191 
1192 /*
1193  * Get the allowed maximum number of RXDs of every rx queue.
1194  * *pid return the port id which has minimal value of
1195  * max_rxd in all queues of all ports.
1196  */
1197 static uint16_t
1198 get_allowed_max_nb_rxd(portid_t *pid)
1199 {
1200 	uint16_t allowed_max_rxd = UINT16_MAX;
1201 	portid_t pi;
1202 	struct rte_eth_dev_info dev_info;
1203 
1204 	RTE_ETH_FOREACH_DEV(pi) {
1205 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1206 			continue;
1207 
1208 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1209 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1210 			*pid = pi;
1211 		}
1212 	}
1213 	return allowed_max_rxd;
1214 }
1215 
1216 /*
1217  * Get the allowed minimal number of RXDs of every rx queue.
1218  * *pid return the port id which has minimal value of
1219  * min_rxd in all queues of all ports.
1220  */
1221 static uint16_t
1222 get_allowed_min_nb_rxd(portid_t *pid)
1223 {
1224 	uint16_t allowed_min_rxd = 0;
1225 	portid_t pi;
1226 	struct rte_eth_dev_info dev_info;
1227 
1228 	RTE_ETH_FOREACH_DEV(pi) {
1229 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1230 			continue;
1231 
1232 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1233 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1234 			*pid = pi;
1235 		}
1236 	}
1237 
1238 	return allowed_min_rxd;
1239 }
1240 
1241 /*
1242  * Check input rxd is valid or not.
1243  * If input rxd is not greater than any of maximum number
1244  * of RXDs of every Rx queues and is not less than any of
1245  * minimal number of RXDs of every Rx queues, it is valid.
1246  * if valid, return 0, else return -1
1247  */
1248 int
1249 check_nb_rxd(queueid_t rxd)
1250 {
1251 	uint16_t allowed_max_rxd;
1252 	uint16_t allowed_min_rxd;
1253 	portid_t pid = 0;
1254 
1255 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1256 	if (rxd > allowed_max_rxd) {
1257 		printf("Fail: input rxd (%u) can't be greater "
1258 		       "than max_rxds (%u) of port %u\n",
1259 		       rxd,
1260 		       allowed_max_rxd,
1261 		       pid);
1262 		return -1;
1263 	}
1264 
1265 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1266 	if (rxd < allowed_min_rxd) {
1267 		printf("Fail: input rxd (%u) can't be less "
1268 		       "than min_rxds (%u) of port %u\n",
1269 		       rxd,
1270 		       allowed_min_rxd,
1271 		       pid);
1272 		return -1;
1273 	}
1274 
1275 	return 0;
1276 }
1277 
1278 /*
1279  * Get the allowed maximum number of TXDs of every rx queues.
1280  * *pid return the port id which has minimal value of
1281  * max_txd in every tx queue.
1282  */
1283 static uint16_t
1284 get_allowed_max_nb_txd(portid_t *pid)
1285 {
1286 	uint16_t allowed_max_txd = UINT16_MAX;
1287 	portid_t pi;
1288 	struct rte_eth_dev_info dev_info;
1289 
1290 	RTE_ETH_FOREACH_DEV(pi) {
1291 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1292 			continue;
1293 
1294 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1295 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1296 			*pid = pi;
1297 		}
1298 	}
1299 	return allowed_max_txd;
1300 }
1301 
1302 /*
1303  * Get the allowed maximum number of TXDs of every tx queues.
1304  * *pid return the port id which has minimal value of
1305  * min_txd in every tx queue.
1306  */
1307 static uint16_t
1308 get_allowed_min_nb_txd(portid_t *pid)
1309 {
1310 	uint16_t allowed_min_txd = 0;
1311 	portid_t pi;
1312 	struct rte_eth_dev_info dev_info;
1313 
1314 	RTE_ETH_FOREACH_DEV(pi) {
1315 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1316 			continue;
1317 
1318 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1319 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1320 			*pid = pi;
1321 		}
1322 	}
1323 
1324 	return allowed_min_txd;
1325 }
1326 
1327 /*
1328  * Check input txd is valid or not.
1329  * If input txd is not greater than any of maximum number
1330  * of TXDs of every Rx queues, it is valid.
1331  * if valid, return 0, else return -1
1332  */
1333 int
1334 check_nb_txd(queueid_t txd)
1335 {
1336 	uint16_t allowed_max_txd;
1337 	uint16_t allowed_min_txd;
1338 	portid_t pid = 0;
1339 
1340 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1341 	if (txd > allowed_max_txd) {
1342 		printf("Fail: input txd (%u) can't be greater "
1343 		       "than max_txds (%u) of port %u\n",
1344 		       txd,
1345 		       allowed_max_txd,
1346 		       pid);
1347 		return -1;
1348 	}
1349 
1350 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1351 	if (txd < allowed_min_txd) {
1352 		printf("Fail: input txd (%u) can't be less "
1353 		       "than min_txds (%u) of port %u\n",
1354 		       txd,
1355 		       allowed_min_txd,
1356 		       pid);
1357 		return -1;
1358 	}
1359 	return 0;
1360 }
1361 
1362 
1363 /*
1364  * Get the allowed maximum number of hairpin queues.
1365  * *pid return the port id which has minimal value of
1366  * max_hairpin_queues in all ports.
1367  */
1368 queueid_t
1369 get_allowed_max_nb_hairpinq(portid_t *pid)
1370 {
1371 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1372 	portid_t pi;
1373 	struct rte_eth_hairpin_cap cap;
1374 
1375 	RTE_ETH_FOREACH_DEV(pi) {
1376 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1377 			*pid = pi;
1378 			return 0;
1379 		}
1380 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1381 			allowed_max_hairpinq = cap.max_nb_queues;
1382 			*pid = pi;
1383 		}
1384 	}
1385 	return allowed_max_hairpinq;
1386 }
1387 
1388 /*
1389  * Check input hairpin is valid or not.
1390  * If input hairpin is not greater than any of maximum number
1391  * of hairpin queues of all ports, it is valid.
1392  * if valid, return 0, else return -1
1393  */
1394 int
1395 check_nb_hairpinq(queueid_t hairpinq)
1396 {
1397 	queueid_t allowed_max_hairpinq;
1398 	portid_t pid = 0;
1399 
1400 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1401 	if (hairpinq > allowed_max_hairpinq) {
1402 		printf("Fail: input hairpin (%u) can't be greater "
1403 		       "than max_hairpin_queues (%u) of port %u\n",
1404 		       hairpinq, allowed_max_hairpinq, pid);
1405 		return -1;
1406 	}
1407 	return 0;
1408 }
1409 
1410 static void
1411 init_config(void)
1412 {
1413 	portid_t pid;
1414 	struct rte_port *port;
1415 	struct rte_mempool *mbp;
1416 	unsigned int nb_mbuf_per_pool;
1417 	lcoreid_t  lc_id;
1418 	uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
1419 	struct rte_gro_param gro_param;
1420 	uint32_t gso_types;
1421 	uint16_t data_size;
1422 	bool warning = 0;
1423 	int k;
1424 	int ret;
1425 
1426 	memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
1427 
1428 	/* Configuration of logical cores. */
1429 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1430 				sizeof(struct fwd_lcore *) * nb_lcores,
1431 				RTE_CACHE_LINE_SIZE);
1432 	if (fwd_lcores == NULL) {
1433 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1434 							"failed\n", nb_lcores);
1435 	}
1436 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1437 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1438 					       sizeof(struct fwd_lcore),
1439 					       RTE_CACHE_LINE_SIZE);
1440 		if (fwd_lcores[lc_id] == NULL) {
1441 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1442 								"failed\n");
1443 		}
1444 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1445 	}
1446 
1447 	RTE_ETH_FOREACH_DEV(pid) {
1448 		port = &ports[pid];
1449 		/* Apply default TxRx configuration for all ports */
1450 		port->dev_conf.txmode = tx_mode;
1451 		port->dev_conf.rxmode = rx_mode;
1452 
1453 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1454 		if (ret != 0)
1455 			rte_exit(EXIT_FAILURE,
1456 				 "rte_eth_dev_info_get() failed\n");
1457 
1458 		ret = update_jumbo_frame_offload(pid);
1459 		if (ret != 0)
1460 			printf("Updating jumbo frame offload failed for port %u\n",
1461 				pid);
1462 
1463 		if (!(port->dev_info.tx_offload_capa &
1464 		      DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1465 			port->dev_conf.txmode.offloads &=
1466 				~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1467 		if (numa_support) {
1468 			if (port_numa[pid] != NUMA_NO_CONFIG)
1469 				port_per_socket[port_numa[pid]]++;
1470 			else {
1471 				uint32_t socket_id = rte_eth_dev_socket_id(pid);
1472 
1473 				/*
1474 				 * if socket_id is invalid,
1475 				 * set to the first available socket.
1476 				 */
1477 				if (check_socket_id(socket_id) < 0)
1478 					socket_id = socket_ids[0];
1479 				port_per_socket[socket_id]++;
1480 			}
1481 		}
1482 
1483 		/* Apply Rx offloads configuration */
1484 		for (k = 0; k < port->dev_info.max_rx_queues; k++)
1485 			port->rx_conf[k].offloads =
1486 				port->dev_conf.rxmode.offloads;
1487 		/* Apply Tx offloads configuration */
1488 		for (k = 0; k < port->dev_info.max_tx_queues; k++)
1489 			port->tx_conf[k].offloads =
1490 				port->dev_conf.txmode.offloads;
1491 
1492 		if (eth_link_speed)
1493 			port->dev_conf.link_speeds = eth_link_speed;
1494 
1495 		/* set flag to initialize port/queue */
1496 		port->need_reconfig = 1;
1497 		port->need_reconfig_queues = 1;
1498 		port->tx_metadata = 0;
1499 
1500 		/* Check for maximum number of segments per MTU. Accordingly
1501 		 * update the mbuf data size.
1502 		 */
1503 		if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1504 				port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1505 			data_size = rx_mode.max_rx_pkt_len /
1506 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1507 
1508 			if ((data_size + RTE_PKTMBUF_HEADROOM) >
1509 							mbuf_data_size[0]) {
1510 				mbuf_data_size[0] = data_size +
1511 						 RTE_PKTMBUF_HEADROOM;
1512 				warning = 1;
1513 			}
1514 		}
1515 	}
1516 
1517 	if (warning)
1518 		TESTPMD_LOG(WARNING,
1519 			    "Configured mbuf size of the first segment %hu\n",
1520 			    mbuf_data_size[0]);
1521 	/*
1522 	 * Create pools of mbuf.
1523 	 * If NUMA support is disabled, create a single pool of mbuf in
1524 	 * socket 0 memory by default.
1525 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1526 	 *
1527 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1528 	 * nb_txd can be configured at run time.
1529 	 */
1530 	if (param_total_num_mbufs)
1531 		nb_mbuf_per_pool = param_total_num_mbufs;
1532 	else {
1533 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1534 			(nb_lcores * mb_mempool_cache) +
1535 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1536 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1537 	}
1538 
1539 	if (numa_support) {
1540 		uint8_t i, j;
1541 
1542 		for (i = 0; i < num_sockets; i++)
1543 			for (j = 0; j < mbuf_data_size_n; j++)
1544 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1545 					mbuf_pool_create(mbuf_data_size[j],
1546 							  nb_mbuf_per_pool,
1547 							  socket_ids[i], j);
1548 	} else {
1549 		uint8_t i;
1550 
1551 		for (i = 0; i < mbuf_data_size_n; i++)
1552 			mempools[i] = mbuf_pool_create
1553 					(mbuf_data_size[i],
1554 					 nb_mbuf_per_pool,
1555 					 socket_num == UMA_NO_CONFIG ?
1556 					 0 : socket_num, i);
1557 	}
1558 
1559 	init_port_config();
1560 
1561 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1562 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1563 	/*
1564 	 * Records which Mbuf pool to use by each logical core, if needed.
1565 	 */
1566 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1567 		mbp = mbuf_pool_find(
1568 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1569 
1570 		if (mbp == NULL)
1571 			mbp = mbuf_pool_find(0, 0);
1572 		fwd_lcores[lc_id]->mbp = mbp;
1573 		/* initialize GSO context */
1574 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1575 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1576 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1577 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1578 			RTE_ETHER_CRC_LEN;
1579 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1580 	}
1581 
1582 	/* Configuration of packet forwarding streams. */
1583 	if (init_fwd_streams() < 0)
1584 		rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
1585 
1586 	fwd_config_setup();
1587 
1588 	/* create a gro context for each lcore */
1589 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1590 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1591 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1592 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1593 		gro_param.socket_id = rte_lcore_to_socket_id(
1594 				fwd_lcores_cpuids[lc_id]);
1595 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1596 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1597 			rte_exit(EXIT_FAILURE,
1598 					"rte_gro_ctx_create() failed\n");
1599 		}
1600 	}
1601 }
1602 
1603 
1604 void
1605 reconfig(portid_t new_port_id, unsigned socket_id)
1606 {
1607 	struct rte_port *port;
1608 	int ret;
1609 
1610 	/* Reconfiguration of Ethernet ports. */
1611 	port = &ports[new_port_id];
1612 
1613 	ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
1614 	if (ret != 0)
1615 		return;
1616 
1617 	/* set flag to initialize port/queue */
1618 	port->need_reconfig = 1;
1619 	port->need_reconfig_queues = 1;
1620 	port->socket_id = socket_id;
1621 
1622 	init_port_config();
1623 }
1624 
1625 
1626 int
1627 init_fwd_streams(void)
1628 {
1629 	portid_t pid;
1630 	struct rte_port *port;
1631 	streamid_t sm_id, nb_fwd_streams_new;
1632 	queueid_t q;
1633 
1634 	/* set socket id according to numa or not */
1635 	RTE_ETH_FOREACH_DEV(pid) {
1636 		port = &ports[pid];
1637 		if (nb_rxq > port->dev_info.max_rx_queues) {
1638 			printf("Fail: nb_rxq(%d) is greater than "
1639 				"max_rx_queues(%d)\n", nb_rxq,
1640 				port->dev_info.max_rx_queues);
1641 			return -1;
1642 		}
1643 		if (nb_txq > port->dev_info.max_tx_queues) {
1644 			printf("Fail: nb_txq(%d) is greater than "
1645 				"max_tx_queues(%d)\n", nb_txq,
1646 				port->dev_info.max_tx_queues);
1647 			return -1;
1648 		}
1649 		if (numa_support) {
1650 			if (port_numa[pid] != NUMA_NO_CONFIG)
1651 				port->socket_id = port_numa[pid];
1652 			else {
1653 				port->socket_id = rte_eth_dev_socket_id(pid);
1654 
1655 				/*
1656 				 * if socket_id is invalid,
1657 				 * set to the first available socket.
1658 				 */
1659 				if (check_socket_id(port->socket_id) < 0)
1660 					port->socket_id = socket_ids[0];
1661 			}
1662 		}
1663 		else {
1664 			if (socket_num == UMA_NO_CONFIG)
1665 				port->socket_id = 0;
1666 			else
1667 				port->socket_id = socket_num;
1668 		}
1669 	}
1670 
1671 	q = RTE_MAX(nb_rxq, nb_txq);
1672 	if (q == 0) {
1673 		printf("Fail: Cannot allocate fwd streams as number of queues is 0\n");
1674 		return -1;
1675 	}
1676 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1677 	if (nb_fwd_streams_new == nb_fwd_streams)
1678 		return 0;
1679 	/* clear the old */
1680 	if (fwd_streams != NULL) {
1681 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1682 			if (fwd_streams[sm_id] == NULL)
1683 				continue;
1684 			rte_free(fwd_streams[sm_id]);
1685 			fwd_streams[sm_id] = NULL;
1686 		}
1687 		rte_free(fwd_streams);
1688 		fwd_streams = NULL;
1689 	}
1690 
1691 	/* init new */
1692 	nb_fwd_streams = nb_fwd_streams_new;
1693 	if (nb_fwd_streams) {
1694 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1695 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1696 			RTE_CACHE_LINE_SIZE);
1697 		if (fwd_streams == NULL)
1698 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1699 				 " (struct fwd_stream *)) failed\n",
1700 				 nb_fwd_streams);
1701 
1702 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1703 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1704 				" struct fwd_stream", sizeof(struct fwd_stream),
1705 				RTE_CACHE_LINE_SIZE);
1706 			if (fwd_streams[sm_id] == NULL)
1707 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1708 					 "(struct fwd_stream) failed\n");
1709 		}
1710 	}
1711 
1712 	return 0;
1713 }
1714 
1715 static void
1716 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1717 {
1718 	uint64_t total_burst, sburst;
1719 	uint64_t nb_burst;
1720 	uint64_t burst_stats[4];
1721 	uint16_t pktnb_stats[4];
1722 	uint16_t nb_pkt;
1723 	int burst_percent[4], sburstp;
1724 	int i;
1725 
1726 	/*
1727 	 * First compute the total number of packet bursts and the
1728 	 * two highest numbers of bursts of the same number of packets.
1729 	 */
1730 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1731 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1732 
1733 	/* Show stats for 0 burst size always */
1734 	total_burst = pbs->pkt_burst_spread[0];
1735 	burst_stats[0] = pbs->pkt_burst_spread[0];
1736 	pktnb_stats[0] = 0;
1737 
1738 	/* Find the next 2 burst sizes with highest occurrences. */
1739 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1740 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1741 
1742 		if (nb_burst == 0)
1743 			continue;
1744 
1745 		total_burst += nb_burst;
1746 
1747 		if (nb_burst > burst_stats[1]) {
1748 			burst_stats[2] = burst_stats[1];
1749 			pktnb_stats[2] = pktnb_stats[1];
1750 			burst_stats[1] = nb_burst;
1751 			pktnb_stats[1] = nb_pkt;
1752 		} else if (nb_burst > burst_stats[2]) {
1753 			burst_stats[2] = nb_burst;
1754 			pktnb_stats[2] = nb_pkt;
1755 		}
1756 	}
1757 	if (total_burst == 0)
1758 		return;
1759 
1760 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1761 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1762 		if (i == 3) {
1763 			printf("%d%% of other]\n", 100 - sburstp);
1764 			return;
1765 		}
1766 
1767 		sburst += burst_stats[i];
1768 		if (sburst == total_burst) {
1769 			printf("%d%% of %d pkts]\n",
1770 				100 - sburstp, (int) pktnb_stats[i]);
1771 			return;
1772 		}
1773 
1774 		burst_percent[i] =
1775 			(double)burst_stats[i] / total_burst * 100;
1776 		printf("%d%% of %d pkts + ",
1777 			burst_percent[i], (int) pktnb_stats[i]);
1778 		sburstp += burst_percent[i];
1779 	}
1780 }
1781 
1782 static void
1783 fwd_stream_stats_display(streamid_t stream_id)
1784 {
1785 	struct fwd_stream *fs;
1786 	static const char *fwd_top_stats_border = "-------";
1787 
1788 	fs = fwd_streams[stream_id];
1789 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1790 	    (fs->fwd_dropped == 0))
1791 		return;
1792 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1793 	       "TX Port=%2d/Queue=%2d %s\n",
1794 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1795 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1796 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1797 	       " TX-dropped: %-14"PRIu64,
1798 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1799 
1800 	/* if checksum mode */
1801 	if (cur_fwd_eng == &csum_fwd_engine) {
1802 		printf("  RX- bad IP checksum: %-14"PRIu64
1803 		       "  Rx- bad L4 checksum: %-14"PRIu64
1804 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1805 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1806 			fs->rx_bad_outer_l4_csum);
1807 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1808 			fs->rx_bad_outer_ip_csum);
1809 	} else {
1810 		printf("\n");
1811 	}
1812 
1813 	if (record_burst_stats) {
1814 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1815 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1816 	}
1817 }
1818 
1819 void
1820 fwd_stats_display(void)
1821 {
1822 	static const char *fwd_stats_border = "----------------------";
1823 	static const char *acc_stats_border = "+++++++++++++++";
1824 	struct {
1825 		struct fwd_stream *rx_stream;
1826 		struct fwd_stream *tx_stream;
1827 		uint64_t tx_dropped;
1828 		uint64_t rx_bad_ip_csum;
1829 		uint64_t rx_bad_l4_csum;
1830 		uint64_t rx_bad_outer_l4_csum;
1831 		uint64_t rx_bad_outer_ip_csum;
1832 	} ports_stats[RTE_MAX_ETHPORTS];
1833 	uint64_t total_rx_dropped = 0;
1834 	uint64_t total_tx_dropped = 0;
1835 	uint64_t total_rx_nombuf = 0;
1836 	struct rte_eth_stats stats;
1837 	uint64_t fwd_cycles = 0;
1838 	uint64_t total_recv = 0;
1839 	uint64_t total_xmit = 0;
1840 	struct rte_port *port;
1841 	streamid_t sm_id;
1842 	portid_t pt_id;
1843 	int i;
1844 
1845 	memset(ports_stats, 0, sizeof(ports_stats));
1846 
1847 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1848 		struct fwd_stream *fs = fwd_streams[sm_id];
1849 
1850 		if (cur_fwd_config.nb_fwd_streams >
1851 		    cur_fwd_config.nb_fwd_ports) {
1852 			fwd_stream_stats_display(sm_id);
1853 		} else {
1854 			ports_stats[fs->tx_port].tx_stream = fs;
1855 			ports_stats[fs->rx_port].rx_stream = fs;
1856 		}
1857 
1858 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1859 
1860 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1861 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1862 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1863 				fs->rx_bad_outer_l4_csum;
1864 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
1865 				fs->rx_bad_outer_ip_csum;
1866 
1867 		if (record_core_cycles)
1868 			fwd_cycles += fs->core_cycles;
1869 	}
1870 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1871 		pt_id = fwd_ports_ids[i];
1872 		port = &ports[pt_id];
1873 
1874 		rte_eth_stats_get(pt_id, &stats);
1875 		stats.ipackets -= port->stats.ipackets;
1876 		stats.opackets -= port->stats.opackets;
1877 		stats.ibytes -= port->stats.ibytes;
1878 		stats.obytes -= port->stats.obytes;
1879 		stats.imissed -= port->stats.imissed;
1880 		stats.oerrors -= port->stats.oerrors;
1881 		stats.rx_nombuf -= port->stats.rx_nombuf;
1882 
1883 		total_recv += stats.ipackets;
1884 		total_xmit += stats.opackets;
1885 		total_rx_dropped += stats.imissed;
1886 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
1887 		total_tx_dropped += stats.oerrors;
1888 		total_rx_nombuf  += stats.rx_nombuf;
1889 
1890 		printf("\n  %s Forward statistics for port %-2d %s\n",
1891 		       fwd_stats_border, pt_id, fwd_stats_border);
1892 
1893 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
1894 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
1895 		       stats.ipackets + stats.imissed);
1896 
1897 		if (cur_fwd_eng == &csum_fwd_engine) {
1898 			printf("  Bad-ipcsum: %-14"PRIu64
1899 			       " Bad-l4csum: %-14"PRIu64
1900 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
1901 			       ports_stats[pt_id].rx_bad_ip_csum,
1902 			       ports_stats[pt_id].rx_bad_l4_csum,
1903 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
1904 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
1905 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
1906 		}
1907 		if (stats.ierrors + stats.rx_nombuf > 0) {
1908 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
1909 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
1910 		}
1911 
1912 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
1913 		       "TX-total: %-"PRIu64"\n",
1914 		       stats.opackets, ports_stats[pt_id].tx_dropped,
1915 		       stats.opackets + ports_stats[pt_id].tx_dropped);
1916 
1917 		if (record_burst_stats) {
1918 			if (ports_stats[pt_id].rx_stream)
1919 				pkt_burst_stats_display("RX",
1920 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
1921 			if (ports_stats[pt_id].tx_stream)
1922 				pkt_burst_stats_display("TX",
1923 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
1924 		}
1925 
1926 		printf("  %s--------------------------------%s\n",
1927 		       fwd_stats_border, fwd_stats_border);
1928 	}
1929 
1930 	printf("\n  %s Accumulated forward statistics for all ports"
1931 	       "%s\n",
1932 	       acc_stats_border, acc_stats_border);
1933 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1934 	       "%-"PRIu64"\n"
1935 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1936 	       "%-"PRIu64"\n",
1937 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1938 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1939 	if (total_rx_nombuf > 0)
1940 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1941 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1942 	       "%s\n",
1943 	       acc_stats_border, acc_stats_border);
1944 	if (record_core_cycles) {
1945 #define CYC_PER_MHZ 1E6
1946 		if (total_recv > 0 || total_xmit > 0) {
1947 			uint64_t total_pkts = 0;
1948 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
1949 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
1950 				total_pkts = total_xmit;
1951 			else
1952 				total_pkts = total_recv;
1953 
1954 			printf("\n  CPU cycles/packet=%.2F (total cycles="
1955 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
1956 			       " MHz Clock\n",
1957 			       (double) fwd_cycles / total_pkts,
1958 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
1959 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
1960 		}
1961 	}
1962 }
1963 
1964 void
1965 fwd_stats_reset(void)
1966 {
1967 	streamid_t sm_id;
1968 	portid_t pt_id;
1969 	int i;
1970 
1971 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1972 		pt_id = fwd_ports_ids[i];
1973 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
1974 	}
1975 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1976 		struct fwd_stream *fs = fwd_streams[sm_id];
1977 
1978 		fs->rx_packets = 0;
1979 		fs->tx_packets = 0;
1980 		fs->fwd_dropped = 0;
1981 		fs->rx_bad_ip_csum = 0;
1982 		fs->rx_bad_l4_csum = 0;
1983 		fs->rx_bad_outer_l4_csum = 0;
1984 		fs->rx_bad_outer_ip_csum = 0;
1985 
1986 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
1987 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
1988 		fs->core_cycles = 0;
1989 	}
1990 }
1991 
1992 static void
1993 flush_fwd_rx_queues(void)
1994 {
1995 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1996 	portid_t  rxp;
1997 	portid_t port_id;
1998 	queueid_t rxq;
1999 	uint16_t  nb_rx;
2000 	uint16_t  i;
2001 	uint8_t   j;
2002 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2003 	uint64_t timer_period;
2004 
2005 	/* convert to number of cycles */
2006 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2007 
2008 	for (j = 0; j < 2; j++) {
2009 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2010 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2011 				port_id = fwd_ports_ids[rxp];
2012 				/**
2013 				* testpmd can stuck in the below do while loop
2014 				* if rte_eth_rx_burst() always returns nonzero
2015 				* packets. So timer is added to exit this loop
2016 				* after 1sec timer expiry.
2017 				*/
2018 				prev_tsc = rte_rdtsc();
2019 				do {
2020 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2021 						pkts_burst, MAX_PKT_BURST);
2022 					for (i = 0; i < nb_rx; i++)
2023 						rte_pktmbuf_free(pkts_burst[i]);
2024 
2025 					cur_tsc = rte_rdtsc();
2026 					diff_tsc = cur_tsc - prev_tsc;
2027 					timer_tsc += diff_tsc;
2028 				} while ((nb_rx > 0) &&
2029 					(timer_tsc < timer_period));
2030 				timer_tsc = 0;
2031 			}
2032 		}
2033 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2034 	}
2035 }
2036 
2037 static void
2038 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2039 {
2040 	struct fwd_stream **fsm;
2041 	streamid_t nb_fs;
2042 	streamid_t sm_id;
2043 #ifdef RTE_LIB_BITRATESTATS
2044 	uint64_t tics_per_1sec;
2045 	uint64_t tics_datum;
2046 	uint64_t tics_current;
2047 	uint16_t i, cnt_ports;
2048 
2049 	cnt_ports = nb_ports;
2050 	tics_datum = rte_rdtsc();
2051 	tics_per_1sec = rte_get_timer_hz();
2052 #endif
2053 	fsm = &fwd_streams[fc->stream_idx];
2054 	nb_fs = fc->stream_nb;
2055 	do {
2056 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2057 			(*pkt_fwd)(fsm[sm_id]);
2058 #ifdef RTE_LIB_BITRATESTATS
2059 		if (bitrate_enabled != 0 &&
2060 				bitrate_lcore_id == rte_lcore_id()) {
2061 			tics_current = rte_rdtsc();
2062 			if (tics_current - tics_datum >= tics_per_1sec) {
2063 				/* Periodic bitrate calculation */
2064 				for (i = 0; i < cnt_ports; i++)
2065 					rte_stats_bitrate_calc(bitrate_data,
2066 						ports_ids[i]);
2067 				tics_datum = tics_current;
2068 			}
2069 		}
2070 #endif
2071 #ifdef RTE_LIB_LATENCYSTATS
2072 		if (latencystats_enabled != 0 &&
2073 				latencystats_lcore_id == rte_lcore_id())
2074 			rte_latencystats_update();
2075 #endif
2076 
2077 	} while (! fc->stopped);
2078 }
2079 
2080 static int
2081 start_pkt_forward_on_core(void *fwd_arg)
2082 {
2083 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2084 			     cur_fwd_config.fwd_eng->packet_fwd);
2085 	return 0;
2086 }
2087 
2088 /*
2089  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2090  * Used to start communication flows in network loopback test configurations.
2091  */
2092 static int
2093 run_one_txonly_burst_on_core(void *fwd_arg)
2094 {
2095 	struct fwd_lcore *fwd_lc;
2096 	struct fwd_lcore tmp_lcore;
2097 
2098 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2099 	tmp_lcore = *fwd_lc;
2100 	tmp_lcore.stopped = 1;
2101 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2102 	return 0;
2103 }
2104 
2105 /*
2106  * Launch packet forwarding:
2107  *     - Setup per-port forwarding context.
2108  *     - launch logical cores with their forwarding configuration.
2109  */
2110 static void
2111 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2112 {
2113 	port_fwd_begin_t port_fwd_begin;
2114 	unsigned int i;
2115 	unsigned int lc_id;
2116 	int diag;
2117 
2118 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2119 	if (port_fwd_begin != NULL) {
2120 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2121 			(*port_fwd_begin)(fwd_ports_ids[i]);
2122 	}
2123 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2124 		lc_id = fwd_lcores_cpuids[i];
2125 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2126 			fwd_lcores[i]->stopped = 0;
2127 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2128 						     fwd_lcores[i], lc_id);
2129 			if (diag != 0)
2130 				printf("launch lcore %u failed - diag=%d\n",
2131 				       lc_id, diag);
2132 		}
2133 	}
2134 }
2135 
2136 /*
2137  * Launch packet forwarding configuration.
2138  */
2139 void
2140 start_packet_forwarding(int with_tx_first)
2141 {
2142 	port_fwd_begin_t port_fwd_begin;
2143 	port_fwd_end_t  port_fwd_end;
2144 	struct rte_port *port;
2145 	unsigned int i;
2146 	portid_t   pt_id;
2147 
2148 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2149 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2150 
2151 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2152 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2153 
2154 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2155 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2156 		(!nb_rxq || !nb_txq))
2157 		rte_exit(EXIT_FAILURE,
2158 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2159 			cur_fwd_eng->fwd_mode_name);
2160 
2161 	if (all_ports_started() == 0) {
2162 		printf("Not all ports were started\n");
2163 		return;
2164 	}
2165 	if (test_done == 0) {
2166 		printf("Packet forwarding already started\n");
2167 		return;
2168 	}
2169 
2170 
2171 	if(dcb_test) {
2172 		for (i = 0; i < nb_fwd_ports; i++) {
2173 			pt_id = fwd_ports_ids[i];
2174 			port = &ports[pt_id];
2175 			if (!port->dcb_flag) {
2176 				printf("In DCB mode, all forwarding ports must "
2177                                        "be configured in this mode.\n");
2178 				return;
2179 			}
2180 		}
2181 		if (nb_fwd_lcores == 1) {
2182 			printf("In DCB mode,the nb forwarding cores "
2183                                "should be larger than 1.\n");
2184 			return;
2185 		}
2186 	}
2187 	test_done = 0;
2188 
2189 	fwd_config_setup();
2190 
2191 	if(!no_flush_rx)
2192 		flush_fwd_rx_queues();
2193 
2194 	pkt_fwd_config_display(&cur_fwd_config);
2195 	rxtx_config_display();
2196 
2197 	fwd_stats_reset();
2198 	if (with_tx_first) {
2199 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2200 		if (port_fwd_begin != NULL) {
2201 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2202 				(*port_fwd_begin)(fwd_ports_ids[i]);
2203 		}
2204 		while (with_tx_first--) {
2205 			launch_packet_forwarding(
2206 					run_one_txonly_burst_on_core);
2207 			rte_eal_mp_wait_lcore();
2208 		}
2209 		port_fwd_end = tx_only_engine.port_fwd_end;
2210 		if (port_fwd_end != NULL) {
2211 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2212 				(*port_fwd_end)(fwd_ports_ids[i]);
2213 		}
2214 	}
2215 	launch_packet_forwarding(start_pkt_forward_on_core);
2216 }
2217 
2218 void
2219 stop_packet_forwarding(void)
2220 {
2221 	port_fwd_end_t port_fwd_end;
2222 	lcoreid_t lc_id;
2223 	portid_t pt_id;
2224 	int i;
2225 
2226 	if (test_done) {
2227 		printf("Packet forwarding not started\n");
2228 		return;
2229 	}
2230 	printf("Telling cores to stop...");
2231 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2232 		fwd_lcores[lc_id]->stopped = 1;
2233 	printf("\nWaiting for lcores to finish...\n");
2234 	rte_eal_mp_wait_lcore();
2235 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2236 	if (port_fwd_end != NULL) {
2237 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2238 			pt_id = fwd_ports_ids[i];
2239 			(*port_fwd_end)(pt_id);
2240 		}
2241 	}
2242 
2243 	fwd_stats_display();
2244 
2245 	printf("\nDone.\n");
2246 	test_done = 1;
2247 }
2248 
2249 void
2250 dev_set_link_up(portid_t pid)
2251 {
2252 	if (rte_eth_dev_set_link_up(pid) < 0)
2253 		printf("\nSet link up fail.\n");
2254 }
2255 
2256 void
2257 dev_set_link_down(portid_t pid)
2258 {
2259 	if (rte_eth_dev_set_link_down(pid) < 0)
2260 		printf("\nSet link down fail.\n");
2261 }
2262 
2263 static int
2264 all_ports_started(void)
2265 {
2266 	portid_t pi;
2267 	struct rte_port *port;
2268 
2269 	RTE_ETH_FOREACH_DEV(pi) {
2270 		port = &ports[pi];
2271 		/* Check if there is a port which is not started */
2272 		if ((port->port_status != RTE_PORT_STARTED) &&
2273 			(port->slave_flag == 0))
2274 			return 0;
2275 	}
2276 
2277 	/* No port is not started */
2278 	return 1;
2279 }
2280 
2281 int
2282 port_is_stopped(portid_t port_id)
2283 {
2284 	struct rte_port *port = &ports[port_id];
2285 
2286 	if ((port->port_status != RTE_PORT_STOPPED) &&
2287 	    (port->slave_flag == 0))
2288 		return 0;
2289 	return 1;
2290 }
2291 
2292 int
2293 all_ports_stopped(void)
2294 {
2295 	portid_t pi;
2296 
2297 	RTE_ETH_FOREACH_DEV(pi) {
2298 		if (!port_is_stopped(pi))
2299 			return 0;
2300 	}
2301 
2302 	return 1;
2303 }
2304 
2305 int
2306 port_is_started(portid_t port_id)
2307 {
2308 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2309 		return 0;
2310 
2311 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2312 		return 0;
2313 
2314 	return 1;
2315 }
2316 
2317 /* Configure the Rx and Tx hairpin queues for the selected port. */
2318 static int
2319 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2320 {
2321 	queueid_t qi;
2322 	struct rte_eth_hairpin_conf hairpin_conf = {
2323 		.peer_count = 1,
2324 	};
2325 	int i;
2326 	int diag;
2327 	struct rte_port *port = &ports[pi];
2328 	uint16_t peer_rx_port = pi;
2329 	uint16_t peer_tx_port = pi;
2330 	uint32_t manual = 1;
2331 	uint32_t tx_exp = hairpin_mode & 0x10;
2332 
2333 	if (!(hairpin_mode & 0xf)) {
2334 		peer_rx_port = pi;
2335 		peer_tx_port = pi;
2336 		manual = 0;
2337 	} else if (hairpin_mode & 0x1) {
2338 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2339 						       RTE_ETH_DEV_NO_OWNER);
2340 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2341 			peer_tx_port = rte_eth_find_next_owned_by(0,
2342 						RTE_ETH_DEV_NO_OWNER);
2343 		if (p_pi != RTE_MAX_ETHPORTS) {
2344 			peer_rx_port = p_pi;
2345 		} else {
2346 			uint16_t next_pi;
2347 
2348 			/* Last port will be the peer RX port of the first. */
2349 			RTE_ETH_FOREACH_DEV(next_pi)
2350 				peer_rx_port = next_pi;
2351 		}
2352 		manual = 1;
2353 	} else if (hairpin_mode & 0x2) {
2354 		if (cnt_pi & 0x1) {
2355 			peer_rx_port = p_pi;
2356 		} else {
2357 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2358 						RTE_ETH_DEV_NO_OWNER);
2359 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2360 				peer_rx_port = pi;
2361 		}
2362 		peer_tx_port = peer_rx_port;
2363 		manual = 1;
2364 	}
2365 
2366 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2367 		hairpin_conf.peers[0].port = peer_rx_port;
2368 		hairpin_conf.peers[0].queue = i + nb_rxq;
2369 		hairpin_conf.manual_bind = !!manual;
2370 		hairpin_conf.tx_explicit = !!tx_exp;
2371 		diag = rte_eth_tx_hairpin_queue_setup
2372 			(pi, qi, nb_txd, &hairpin_conf);
2373 		i++;
2374 		if (diag == 0)
2375 			continue;
2376 
2377 		/* Fail to setup rx queue, return */
2378 		if (rte_atomic16_cmpset(&(port->port_status),
2379 					RTE_PORT_HANDLING,
2380 					RTE_PORT_STOPPED) == 0)
2381 			printf("Port %d can not be set back "
2382 					"to stopped\n", pi);
2383 		printf("Fail to configure port %d hairpin "
2384 				"queues\n", pi);
2385 		/* try to reconfigure queues next time */
2386 		port->need_reconfig_queues = 1;
2387 		return -1;
2388 	}
2389 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2390 		hairpin_conf.peers[0].port = peer_tx_port;
2391 		hairpin_conf.peers[0].queue = i + nb_txq;
2392 		hairpin_conf.manual_bind = !!manual;
2393 		hairpin_conf.tx_explicit = !!tx_exp;
2394 		diag = rte_eth_rx_hairpin_queue_setup
2395 			(pi, qi, nb_rxd, &hairpin_conf);
2396 		i++;
2397 		if (diag == 0)
2398 			continue;
2399 
2400 		/* Fail to setup rx queue, return */
2401 		if (rte_atomic16_cmpset(&(port->port_status),
2402 					RTE_PORT_HANDLING,
2403 					RTE_PORT_STOPPED) == 0)
2404 			printf("Port %d can not be set back "
2405 					"to stopped\n", pi);
2406 		printf("Fail to configure port %d hairpin "
2407 				"queues\n", pi);
2408 		/* try to reconfigure queues next time */
2409 		port->need_reconfig_queues = 1;
2410 		return -1;
2411 	}
2412 	return 0;
2413 }
2414 
2415 /* Configure the Rx with optional split. */
2416 int
2417 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2418 	       uint16_t nb_rx_desc, unsigned int socket_id,
2419 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2420 {
2421 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2422 	unsigned int i, mp_n;
2423 	int ret;
2424 
2425 	if (rx_pkt_nb_segs <= 1 ||
2426 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2427 		rx_conf->rx_seg = NULL;
2428 		rx_conf->rx_nseg = 0;
2429 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2430 					     nb_rx_desc, socket_id,
2431 					     rx_conf, mp);
2432 		return ret;
2433 	}
2434 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2435 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2436 		struct rte_mempool *mpx;
2437 		/*
2438 		 * Use last valid pool for the segments with number
2439 		 * exceeding the pool index.
2440 		 */
2441 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2442 		mpx = mbuf_pool_find(socket_id, mp_n);
2443 		/* Handle zero as mbuf data buffer size. */
2444 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2445 				   rx_pkt_seg_lengths[i] :
2446 				   mbuf_data_size[mp_n];
2447 		rx_seg->offset = i < rx_pkt_nb_offs ?
2448 				   rx_pkt_seg_offsets[i] : 0;
2449 		rx_seg->mp = mpx ? mpx : mp;
2450 	}
2451 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2452 	rx_conf->rx_seg = rx_useg;
2453 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2454 				    socket_id, rx_conf, NULL);
2455 	rx_conf->rx_seg = NULL;
2456 	rx_conf->rx_nseg = 0;
2457 	return ret;
2458 }
2459 
2460 int
2461 start_port(portid_t pid)
2462 {
2463 	int diag, need_check_link_status = -1;
2464 	portid_t pi;
2465 	portid_t p_pi = RTE_MAX_ETHPORTS;
2466 	portid_t pl[RTE_MAX_ETHPORTS];
2467 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2468 	uint16_t cnt_pi = 0;
2469 	uint16_t cfg_pi = 0;
2470 	int peer_pi;
2471 	queueid_t qi;
2472 	struct rte_port *port;
2473 	struct rte_ether_addr mac_addr;
2474 	struct rte_eth_hairpin_cap cap;
2475 
2476 	if (port_id_is_invalid(pid, ENABLED_WARN))
2477 		return 0;
2478 
2479 	if(dcb_config)
2480 		dcb_test = 1;
2481 	RTE_ETH_FOREACH_DEV(pi) {
2482 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2483 			continue;
2484 
2485 		need_check_link_status = 0;
2486 		port = &ports[pi];
2487 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2488 						 RTE_PORT_HANDLING) == 0) {
2489 			printf("Port %d is now not stopped\n", pi);
2490 			continue;
2491 		}
2492 
2493 		if (port->need_reconfig > 0) {
2494 			port->need_reconfig = 0;
2495 
2496 			if (flow_isolate_all) {
2497 				int ret = port_flow_isolate(pi, 1);
2498 				if (ret) {
2499 					printf("Failed to apply isolated"
2500 					       " mode on port %d\n", pi);
2501 					return -1;
2502 				}
2503 			}
2504 			configure_rxtx_dump_callbacks(0);
2505 			printf("Configuring Port %d (socket %u)\n", pi,
2506 					port->socket_id);
2507 			if (nb_hairpinq > 0 &&
2508 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2509 				printf("Port %d doesn't support hairpin "
2510 				       "queues\n", pi);
2511 				return -1;
2512 			}
2513 			/* configure port */
2514 			diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2515 						     nb_txq + nb_hairpinq,
2516 						     &(port->dev_conf));
2517 			if (diag != 0) {
2518 				if (rte_atomic16_cmpset(&(port->port_status),
2519 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2520 					printf("Port %d can not be set back "
2521 							"to stopped\n", pi);
2522 				printf("Fail to configure port %d\n", pi);
2523 				/* try to reconfigure port next time */
2524 				port->need_reconfig = 1;
2525 				return -1;
2526 			}
2527 		}
2528 		if (port->need_reconfig_queues > 0) {
2529 			port->need_reconfig_queues = 0;
2530 			/* setup tx queues */
2531 			for (qi = 0; qi < nb_txq; qi++) {
2532 				if ((numa_support) &&
2533 					(txring_numa[pi] != NUMA_NO_CONFIG))
2534 					diag = rte_eth_tx_queue_setup(pi, qi,
2535 						port->nb_tx_desc[qi],
2536 						txring_numa[pi],
2537 						&(port->tx_conf[qi]));
2538 				else
2539 					diag = rte_eth_tx_queue_setup(pi, qi,
2540 						port->nb_tx_desc[qi],
2541 						port->socket_id,
2542 						&(port->tx_conf[qi]));
2543 
2544 				if (diag == 0)
2545 					continue;
2546 
2547 				/* Fail to setup tx queue, return */
2548 				if (rte_atomic16_cmpset(&(port->port_status),
2549 							RTE_PORT_HANDLING,
2550 							RTE_PORT_STOPPED) == 0)
2551 					printf("Port %d can not be set back "
2552 							"to stopped\n", pi);
2553 				printf("Fail to configure port %d tx queues\n",
2554 				       pi);
2555 				/* try to reconfigure queues next time */
2556 				port->need_reconfig_queues = 1;
2557 				return -1;
2558 			}
2559 			for (qi = 0; qi < nb_rxq; qi++) {
2560 				/* setup rx queues */
2561 				if ((numa_support) &&
2562 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2563 					struct rte_mempool * mp =
2564 						mbuf_pool_find
2565 							(rxring_numa[pi], 0);
2566 					if (mp == NULL) {
2567 						printf("Failed to setup RX queue:"
2568 							"No mempool allocation"
2569 							" on the socket %d\n",
2570 							rxring_numa[pi]);
2571 						return -1;
2572 					}
2573 
2574 					diag = rx_queue_setup(pi, qi,
2575 					     port->nb_rx_desc[qi],
2576 					     rxring_numa[pi],
2577 					     &(port->rx_conf[qi]),
2578 					     mp);
2579 				} else {
2580 					struct rte_mempool *mp =
2581 						mbuf_pool_find
2582 							(port->socket_id, 0);
2583 					if (mp == NULL) {
2584 						printf("Failed to setup RX queue:"
2585 							"No mempool allocation"
2586 							" on the socket %d\n",
2587 							port->socket_id);
2588 						return -1;
2589 					}
2590 					diag = rx_queue_setup(pi, qi,
2591 					     port->nb_rx_desc[qi],
2592 					     port->socket_id,
2593 					     &(port->rx_conf[qi]),
2594 					     mp);
2595 				}
2596 				if (diag == 0)
2597 					continue;
2598 
2599 				/* Fail to setup rx queue, return */
2600 				if (rte_atomic16_cmpset(&(port->port_status),
2601 							RTE_PORT_HANDLING,
2602 							RTE_PORT_STOPPED) == 0)
2603 					printf("Port %d can not be set back "
2604 							"to stopped\n", pi);
2605 				printf("Fail to configure port %d rx queues\n",
2606 				       pi);
2607 				/* try to reconfigure queues next time */
2608 				port->need_reconfig_queues = 1;
2609 				return -1;
2610 			}
2611 			/* setup hairpin queues */
2612 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2613 				return -1;
2614 		}
2615 		configure_rxtx_dump_callbacks(verbose_level);
2616 		if (clear_ptypes) {
2617 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2618 					NULL, 0);
2619 			if (diag < 0)
2620 				printf(
2621 				"Port %d: Failed to disable Ptype parsing\n",
2622 				pi);
2623 		}
2624 
2625 		p_pi = pi;
2626 		cnt_pi++;
2627 
2628 		/* start port */
2629 		diag = rte_eth_dev_start(pi);
2630 		if (diag < 0) {
2631 			printf("Fail to start port %d: %s\n", pi,
2632 			       rte_strerror(-diag));
2633 
2634 			/* Fail to setup rx queue, return */
2635 			if (rte_atomic16_cmpset(&(port->port_status),
2636 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2637 				printf("Port %d can not be set back to "
2638 							"stopped\n", pi);
2639 			continue;
2640 		}
2641 
2642 		if (rte_atomic16_cmpset(&(port->port_status),
2643 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2644 			printf("Port %d can not be set into started\n", pi);
2645 
2646 		if (eth_macaddr_get_print_err(pi, &mac_addr) == 0)
2647 			printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2648 				mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
2649 				mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
2650 				mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]);
2651 
2652 		/* at least one port started, need checking link status */
2653 		need_check_link_status = 1;
2654 
2655 		pl[cfg_pi++] = pi;
2656 	}
2657 
2658 	if (need_check_link_status == 1 && !no_link_check)
2659 		check_all_ports_link_status(RTE_PORT_ALL);
2660 	else if (need_check_link_status == 0)
2661 		printf("Please stop the ports first\n");
2662 
2663 	if (hairpin_mode & 0xf) {
2664 		uint16_t i;
2665 		int j;
2666 
2667 		/* bind all started hairpin ports */
2668 		for (i = 0; i < cfg_pi; i++) {
2669 			pi = pl[i];
2670 			/* bind current Tx to all peer Rx */
2671 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2672 							RTE_MAX_ETHPORTS, 1);
2673 			if (peer_pi < 0)
2674 				return peer_pi;
2675 			for (j = 0; j < peer_pi; j++) {
2676 				if (!port_is_started(peer_pl[j]))
2677 					continue;
2678 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2679 				if (diag < 0) {
2680 					printf("Error during binding hairpin"
2681 					       " Tx port %u to %u: %s\n",
2682 					       pi, peer_pl[j],
2683 					       rte_strerror(-diag));
2684 					return -1;
2685 				}
2686 			}
2687 			/* bind all peer Tx to current Rx */
2688 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2689 							RTE_MAX_ETHPORTS, 0);
2690 			if (peer_pi < 0)
2691 				return peer_pi;
2692 			for (j = 0; j < peer_pi; j++) {
2693 				if (!port_is_started(peer_pl[j]))
2694 					continue;
2695 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2696 				if (diag < 0) {
2697 					printf("Error during binding hairpin"
2698 					       " Tx port %u to %u: %s\n",
2699 					       peer_pl[j], pi,
2700 					       rte_strerror(-diag));
2701 					return -1;
2702 				}
2703 			}
2704 		}
2705 	}
2706 
2707 	printf("Done\n");
2708 	return 0;
2709 }
2710 
2711 void
2712 stop_port(portid_t pid)
2713 {
2714 	portid_t pi;
2715 	struct rte_port *port;
2716 	int need_check_link_status = 0;
2717 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2718 	int peer_pi;
2719 
2720 	if (dcb_test) {
2721 		dcb_test = 0;
2722 		dcb_config = 0;
2723 	}
2724 
2725 	if (port_id_is_invalid(pid, ENABLED_WARN))
2726 		return;
2727 
2728 	printf("Stopping ports...\n");
2729 
2730 	RTE_ETH_FOREACH_DEV(pi) {
2731 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2732 			continue;
2733 
2734 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2735 			printf("Please remove port %d from forwarding configuration.\n", pi);
2736 			continue;
2737 		}
2738 
2739 		if (port_is_bonding_slave(pi)) {
2740 			printf("Please remove port %d from bonded device.\n", pi);
2741 			continue;
2742 		}
2743 
2744 		port = &ports[pi];
2745 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2746 						RTE_PORT_HANDLING) == 0)
2747 			continue;
2748 
2749 		if (hairpin_mode & 0xf) {
2750 			int j;
2751 
2752 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
2753 			/* unbind all peer Tx from current Rx */
2754 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2755 							RTE_MAX_ETHPORTS, 0);
2756 			if (peer_pi < 0)
2757 				continue;
2758 			for (j = 0; j < peer_pi; j++) {
2759 				if (!port_is_started(peer_pl[j]))
2760 					continue;
2761 				rte_eth_hairpin_unbind(peer_pl[j], pi);
2762 			}
2763 		}
2764 
2765 		if (port->flow_list)
2766 			port_flow_flush(pi);
2767 
2768 		if (rte_eth_dev_stop(pi) != 0)
2769 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
2770 				pi);
2771 
2772 		if (rte_atomic16_cmpset(&(port->port_status),
2773 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2774 			printf("Port %d can not be set into stopped\n", pi);
2775 		need_check_link_status = 1;
2776 	}
2777 	if (need_check_link_status && !no_link_check)
2778 		check_all_ports_link_status(RTE_PORT_ALL);
2779 
2780 	printf("Done\n");
2781 }
2782 
2783 static void
2784 remove_invalid_ports_in(portid_t *array, portid_t *total)
2785 {
2786 	portid_t i;
2787 	portid_t new_total = 0;
2788 
2789 	for (i = 0; i < *total; i++)
2790 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2791 			array[new_total] = array[i];
2792 			new_total++;
2793 		}
2794 	*total = new_total;
2795 }
2796 
2797 static void
2798 remove_invalid_ports(void)
2799 {
2800 	remove_invalid_ports_in(ports_ids, &nb_ports);
2801 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2802 	nb_cfg_ports = nb_fwd_ports;
2803 }
2804 
2805 void
2806 close_port(portid_t pid)
2807 {
2808 	portid_t pi;
2809 	struct rte_port *port;
2810 
2811 	if (port_id_is_invalid(pid, ENABLED_WARN))
2812 		return;
2813 
2814 	printf("Closing ports...\n");
2815 
2816 	RTE_ETH_FOREACH_DEV(pi) {
2817 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2818 			continue;
2819 
2820 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2821 			printf("Please remove port %d from forwarding configuration.\n", pi);
2822 			continue;
2823 		}
2824 
2825 		if (port_is_bonding_slave(pi)) {
2826 			printf("Please remove port %d from bonded device.\n", pi);
2827 			continue;
2828 		}
2829 
2830 		port = &ports[pi];
2831 		if (rte_atomic16_cmpset(&(port->port_status),
2832 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2833 			printf("Port %d is already closed\n", pi);
2834 			continue;
2835 		}
2836 
2837 		port_flow_flush(pi);
2838 		rte_eth_dev_close(pi);
2839 	}
2840 
2841 	remove_invalid_ports();
2842 	printf("Done\n");
2843 }
2844 
2845 void
2846 reset_port(portid_t pid)
2847 {
2848 	int diag;
2849 	portid_t pi;
2850 	struct rte_port *port;
2851 
2852 	if (port_id_is_invalid(pid, ENABLED_WARN))
2853 		return;
2854 
2855 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2856 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2857 		printf("Can not reset port(s), please stop port(s) first.\n");
2858 		return;
2859 	}
2860 
2861 	printf("Resetting ports...\n");
2862 
2863 	RTE_ETH_FOREACH_DEV(pi) {
2864 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2865 			continue;
2866 
2867 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2868 			printf("Please remove port %d from forwarding "
2869 			       "configuration.\n", pi);
2870 			continue;
2871 		}
2872 
2873 		if (port_is_bonding_slave(pi)) {
2874 			printf("Please remove port %d from bonded device.\n",
2875 			       pi);
2876 			continue;
2877 		}
2878 
2879 		diag = rte_eth_dev_reset(pi);
2880 		if (diag == 0) {
2881 			port = &ports[pi];
2882 			port->need_reconfig = 1;
2883 			port->need_reconfig_queues = 1;
2884 		} else {
2885 			printf("Failed to reset port %d. diag=%d\n", pi, diag);
2886 		}
2887 	}
2888 
2889 	printf("Done\n");
2890 }
2891 
2892 void
2893 attach_port(char *identifier)
2894 {
2895 	portid_t pi;
2896 	struct rte_dev_iterator iterator;
2897 
2898 	printf("Attaching a new port...\n");
2899 
2900 	if (identifier == NULL) {
2901 		printf("Invalid parameters are specified\n");
2902 		return;
2903 	}
2904 
2905 	if (rte_dev_probe(identifier) < 0) {
2906 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2907 		return;
2908 	}
2909 
2910 	/* first attach mode: event */
2911 	if (setup_on_probe_event) {
2912 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
2913 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2914 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
2915 					ports[pi].need_setup != 0)
2916 				setup_attached_port(pi);
2917 		return;
2918 	}
2919 
2920 	/* second attach mode: iterator */
2921 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2922 		/* setup ports matching the devargs used for probing */
2923 		if (port_is_forwarding(pi))
2924 			continue; /* port was already attached before */
2925 		setup_attached_port(pi);
2926 	}
2927 }
2928 
2929 static void
2930 setup_attached_port(portid_t pi)
2931 {
2932 	unsigned int socket_id;
2933 	int ret;
2934 
2935 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2936 	/* if socket_id is invalid, set to the first available socket. */
2937 	if (check_socket_id(socket_id) < 0)
2938 		socket_id = socket_ids[0];
2939 	reconfig(pi, socket_id);
2940 	ret = rte_eth_promiscuous_enable(pi);
2941 	if (ret != 0)
2942 		printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
2943 			pi, rte_strerror(-ret));
2944 
2945 	ports_ids[nb_ports++] = pi;
2946 	fwd_ports_ids[nb_fwd_ports++] = pi;
2947 	nb_cfg_ports = nb_fwd_ports;
2948 	ports[pi].need_setup = 0;
2949 	ports[pi].port_status = RTE_PORT_STOPPED;
2950 
2951 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2952 	printf("Done\n");
2953 }
2954 
2955 static void
2956 detach_device(struct rte_device *dev)
2957 {
2958 	portid_t sibling;
2959 
2960 	if (dev == NULL) {
2961 		printf("Device already removed\n");
2962 		return;
2963 	}
2964 
2965 	printf("Removing a device...\n");
2966 
2967 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2968 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2969 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
2970 				printf("Port %u not stopped\n", sibling);
2971 				return;
2972 			}
2973 			port_flow_flush(sibling);
2974 		}
2975 	}
2976 
2977 	if (rte_dev_remove(dev) < 0) {
2978 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2979 		return;
2980 	}
2981 	remove_invalid_ports();
2982 
2983 	printf("Device is detached\n");
2984 	printf("Now total ports is %d\n", nb_ports);
2985 	printf("Done\n");
2986 	return;
2987 }
2988 
2989 void
2990 detach_port_device(portid_t port_id)
2991 {
2992 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2993 		return;
2994 
2995 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2996 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2997 			printf("Port not stopped\n");
2998 			return;
2999 		}
3000 		printf("Port was not closed\n");
3001 	}
3002 
3003 	detach_device(rte_eth_devices[port_id].device);
3004 }
3005 
3006 void
3007 detach_devargs(char *identifier)
3008 {
3009 	struct rte_dev_iterator iterator;
3010 	struct rte_devargs da;
3011 	portid_t port_id;
3012 
3013 	printf("Removing a device...\n");
3014 
3015 	memset(&da, 0, sizeof(da));
3016 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3017 		printf("cannot parse identifier\n");
3018 		if (da.args)
3019 			free(da.args);
3020 		return;
3021 	}
3022 
3023 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3024 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3025 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3026 				printf("Port %u not stopped\n", port_id);
3027 				rte_eth_iterator_cleanup(&iterator);
3028 				return;
3029 			}
3030 			port_flow_flush(port_id);
3031 		}
3032 	}
3033 
3034 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3035 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3036 			    da.name, da.bus->name);
3037 		return;
3038 	}
3039 
3040 	remove_invalid_ports();
3041 
3042 	printf("Device %s is detached\n", identifier);
3043 	printf("Now total ports is %d\n", nb_ports);
3044 	printf("Done\n");
3045 }
3046 
3047 void
3048 pmd_test_exit(void)
3049 {
3050 	portid_t pt_id;
3051 	unsigned int i;
3052 	int ret;
3053 
3054 	if (test_done == 0)
3055 		stop_packet_forwarding();
3056 
3057 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3058 		if (mempools[i]) {
3059 			if (mp_alloc_type == MP_ALLOC_ANON)
3060 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3061 						     NULL);
3062 		}
3063 	}
3064 	if (ports != NULL) {
3065 		no_link_check = 1;
3066 		RTE_ETH_FOREACH_DEV(pt_id) {
3067 			printf("\nStopping port %d...\n", pt_id);
3068 			fflush(stdout);
3069 			stop_port(pt_id);
3070 		}
3071 		RTE_ETH_FOREACH_DEV(pt_id) {
3072 			printf("\nShutting down port %d...\n", pt_id);
3073 			fflush(stdout);
3074 			close_port(pt_id);
3075 		}
3076 	}
3077 
3078 	if (hot_plug) {
3079 		ret = rte_dev_event_monitor_stop();
3080 		if (ret) {
3081 			RTE_LOG(ERR, EAL,
3082 				"fail to stop device event monitor.");
3083 			return;
3084 		}
3085 
3086 		ret = rte_dev_event_callback_unregister(NULL,
3087 			dev_event_callback, NULL);
3088 		if (ret < 0) {
3089 			RTE_LOG(ERR, EAL,
3090 				"fail to unregister device event callback.\n");
3091 			return;
3092 		}
3093 
3094 		ret = rte_dev_hotplug_handle_disable();
3095 		if (ret) {
3096 			RTE_LOG(ERR, EAL,
3097 				"fail to disable hotplug handling.\n");
3098 			return;
3099 		}
3100 	}
3101 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3102 		if (mempools[i])
3103 			rte_mempool_free(mempools[i]);
3104 	}
3105 
3106 	printf("\nBye...\n");
3107 }
3108 
3109 typedef void (*cmd_func_t)(void);
3110 struct pmd_test_command {
3111 	const char *cmd_name;
3112 	cmd_func_t cmd_func;
3113 };
3114 
3115 /* Check the link status of all ports in up to 9s, and print them finally */
3116 static void
3117 check_all_ports_link_status(uint32_t port_mask)
3118 {
3119 #define CHECK_INTERVAL 100 /* 100ms */
3120 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3121 	portid_t portid;
3122 	uint8_t count, all_ports_up, print_flag = 0;
3123 	struct rte_eth_link link;
3124 	int ret;
3125 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3126 
3127 	printf("Checking link statuses...\n");
3128 	fflush(stdout);
3129 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3130 		all_ports_up = 1;
3131 		RTE_ETH_FOREACH_DEV(portid) {
3132 			if ((port_mask & (1 << portid)) == 0)
3133 				continue;
3134 			memset(&link, 0, sizeof(link));
3135 			ret = rte_eth_link_get_nowait(portid, &link);
3136 			if (ret < 0) {
3137 				all_ports_up = 0;
3138 				if (print_flag == 1)
3139 					printf("Port %u link get failed: %s\n",
3140 						portid, rte_strerror(-ret));
3141 				continue;
3142 			}
3143 			/* print link status if flag set */
3144 			if (print_flag == 1) {
3145 				rte_eth_link_to_str(link_status,
3146 					sizeof(link_status), &link);
3147 				printf("Port %d %s\n", portid, link_status);
3148 				continue;
3149 			}
3150 			/* clear all_ports_up flag if any link down */
3151 			if (link.link_status == ETH_LINK_DOWN) {
3152 				all_ports_up = 0;
3153 				break;
3154 			}
3155 		}
3156 		/* after finally printing all link status, get out */
3157 		if (print_flag == 1)
3158 			break;
3159 
3160 		if (all_ports_up == 0) {
3161 			fflush(stdout);
3162 			rte_delay_ms(CHECK_INTERVAL);
3163 		}
3164 
3165 		/* set the print_flag if all ports up or timeout */
3166 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3167 			print_flag = 1;
3168 		}
3169 
3170 		if (lsc_interrupt)
3171 			break;
3172 	}
3173 }
3174 
3175 static void
3176 rmv_port_callback(void *arg)
3177 {
3178 	int need_to_start = 0;
3179 	int org_no_link_check = no_link_check;
3180 	portid_t port_id = (intptr_t)arg;
3181 	struct rte_device *dev;
3182 
3183 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3184 
3185 	if (!test_done && port_is_forwarding(port_id)) {
3186 		need_to_start = 1;
3187 		stop_packet_forwarding();
3188 	}
3189 	no_link_check = 1;
3190 	stop_port(port_id);
3191 	no_link_check = org_no_link_check;
3192 
3193 	/* Save rte_device pointer before closing ethdev port */
3194 	dev = rte_eth_devices[port_id].device;
3195 	close_port(port_id);
3196 	detach_device(dev); /* might be already removed or have more ports */
3197 
3198 	if (need_to_start)
3199 		start_packet_forwarding(0);
3200 }
3201 
3202 /* This function is used by the interrupt thread */
3203 static int
3204 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3205 		  void *ret_param)
3206 {
3207 	RTE_SET_USED(param);
3208 	RTE_SET_USED(ret_param);
3209 
3210 	if (type >= RTE_ETH_EVENT_MAX) {
3211 		fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3212 			port_id, __func__, type);
3213 		fflush(stderr);
3214 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3215 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3216 			eth_event_desc[type]);
3217 		fflush(stdout);
3218 	}
3219 
3220 	switch (type) {
3221 	case RTE_ETH_EVENT_NEW:
3222 		ports[port_id].need_setup = 1;
3223 		ports[port_id].port_status = RTE_PORT_HANDLING;
3224 		break;
3225 	case RTE_ETH_EVENT_INTR_RMV:
3226 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3227 			break;
3228 		if (rte_eal_alarm_set(100000,
3229 				rmv_port_callback, (void *)(intptr_t)port_id))
3230 			fprintf(stderr, "Could not set up deferred device removal\n");
3231 		break;
3232 	case RTE_ETH_EVENT_DESTROY:
3233 		ports[port_id].port_status = RTE_PORT_CLOSED;
3234 		printf("Port %u is closed\n", port_id);
3235 		break;
3236 	default:
3237 		break;
3238 	}
3239 	return 0;
3240 }
3241 
3242 static int
3243 register_eth_event_callback(void)
3244 {
3245 	int ret;
3246 	enum rte_eth_event_type event;
3247 
3248 	for (event = RTE_ETH_EVENT_UNKNOWN;
3249 			event < RTE_ETH_EVENT_MAX; event++) {
3250 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3251 				event,
3252 				eth_event_callback,
3253 				NULL);
3254 		if (ret != 0) {
3255 			TESTPMD_LOG(ERR, "Failed to register callback for "
3256 					"%s event\n", eth_event_desc[event]);
3257 			return -1;
3258 		}
3259 	}
3260 
3261 	return 0;
3262 }
3263 
3264 /* This function is used by the interrupt thread */
3265 static void
3266 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3267 			     __rte_unused void *arg)
3268 {
3269 	uint16_t port_id;
3270 	int ret;
3271 
3272 	if (type >= RTE_DEV_EVENT_MAX) {
3273 		fprintf(stderr, "%s called upon invalid event %d\n",
3274 			__func__, type);
3275 		fflush(stderr);
3276 	}
3277 
3278 	switch (type) {
3279 	case RTE_DEV_EVENT_REMOVE:
3280 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3281 			device_name);
3282 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3283 		if (ret) {
3284 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3285 				device_name);
3286 			return;
3287 		}
3288 		/*
3289 		 * Because the user's callback is invoked in eal interrupt
3290 		 * callback, the interrupt callback need to be finished before
3291 		 * it can be unregistered when detaching device. So finish
3292 		 * callback soon and use a deferred removal to detach device
3293 		 * is need. It is a workaround, once the device detaching be
3294 		 * moved into the eal in the future, the deferred removal could
3295 		 * be deleted.
3296 		 */
3297 		if (rte_eal_alarm_set(100000,
3298 				rmv_port_callback, (void *)(intptr_t)port_id))
3299 			RTE_LOG(ERR, EAL,
3300 				"Could not set up deferred device removal\n");
3301 		break;
3302 	case RTE_DEV_EVENT_ADD:
3303 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3304 			device_name);
3305 		/* TODO: After finish kernel driver binding,
3306 		 * begin to attach port.
3307 		 */
3308 		break;
3309 	default:
3310 		break;
3311 	}
3312 }
3313 
3314 static void
3315 rxtx_port_config(struct rte_port *port)
3316 {
3317 	uint16_t qid;
3318 	uint64_t offloads;
3319 
3320 	for (qid = 0; qid < nb_rxq; qid++) {
3321 		offloads = port->rx_conf[qid].offloads;
3322 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3323 		if (offloads != 0)
3324 			port->rx_conf[qid].offloads = offloads;
3325 
3326 		/* Check if any Rx parameters have been passed */
3327 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3328 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3329 
3330 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3331 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3332 
3333 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3334 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3335 
3336 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3337 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3338 
3339 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3340 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3341 
3342 		port->nb_rx_desc[qid] = nb_rxd;
3343 	}
3344 
3345 	for (qid = 0; qid < nb_txq; qid++) {
3346 		offloads = port->tx_conf[qid].offloads;
3347 		port->tx_conf[qid] = port->dev_info.default_txconf;
3348 		if (offloads != 0)
3349 			port->tx_conf[qid].offloads = offloads;
3350 
3351 		/* Check if any Tx parameters have been passed */
3352 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3353 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3354 
3355 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3356 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3357 
3358 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3359 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3360 
3361 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3362 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3363 
3364 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3365 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3366 
3367 		port->nb_tx_desc[qid] = nb_txd;
3368 	}
3369 }
3370 
3371 /*
3372  * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload,
3373  * MTU is also aligned if JUMBO_FRAME offload is not set.
3374  *
3375  * port->dev_info should be set before calling this function.
3376  *
3377  * return 0 on success, negative on error
3378  */
3379 int
3380 update_jumbo_frame_offload(portid_t portid)
3381 {
3382 	struct rte_port *port = &ports[portid];
3383 	uint32_t eth_overhead;
3384 	uint64_t rx_offloads;
3385 	int ret;
3386 	bool on;
3387 
3388 	/* Update the max_rx_pkt_len to have MTU as RTE_ETHER_MTU */
3389 	if (port->dev_info.max_mtu != UINT16_MAX &&
3390 	    port->dev_info.max_rx_pktlen > port->dev_info.max_mtu)
3391 		eth_overhead = port->dev_info.max_rx_pktlen -
3392 				port->dev_info.max_mtu;
3393 	else
3394 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3395 
3396 	rx_offloads = port->dev_conf.rxmode.offloads;
3397 
3398 	/* Default config value is 0 to use PMD specific overhead */
3399 	if (port->dev_conf.rxmode.max_rx_pkt_len == 0)
3400 		port->dev_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MTU + eth_overhead;
3401 
3402 	if (port->dev_conf.rxmode.max_rx_pkt_len <= RTE_ETHER_MTU + eth_overhead) {
3403 		rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3404 		on = false;
3405 	} else {
3406 		if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3407 			printf("Frame size (%u) is not supported by port %u\n",
3408 				port->dev_conf.rxmode.max_rx_pkt_len,
3409 				portid);
3410 			return -1;
3411 		}
3412 		rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3413 		on = true;
3414 	}
3415 
3416 	if (rx_offloads != port->dev_conf.rxmode.offloads) {
3417 		uint16_t qid;
3418 
3419 		port->dev_conf.rxmode.offloads = rx_offloads;
3420 
3421 		/* Apply JUMBO_FRAME offload configuration to Rx queue(s) */
3422 		for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) {
3423 			if (on)
3424 				port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3425 			else
3426 				port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3427 		}
3428 	}
3429 
3430 	/* If JUMBO_FRAME is set MTU conversion done by ethdev layer,
3431 	 * if unset do it here
3432 	 */
3433 	if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3434 		ret = rte_eth_dev_set_mtu(portid,
3435 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead);
3436 		if (ret)
3437 			printf("Failed to set MTU to %u for port %u\n",
3438 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead,
3439 				portid);
3440 	}
3441 
3442 	return 0;
3443 }
3444 
3445 void
3446 init_port_config(void)
3447 {
3448 	portid_t pid;
3449 	struct rte_port *port;
3450 	int ret;
3451 
3452 	RTE_ETH_FOREACH_DEV(pid) {
3453 		port = &ports[pid];
3454 		port->dev_conf.fdir_conf = fdir_conf;
3455 
3456 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3457 		if (ret != 0)
3458 			return;
3459 
3460 		if (nb_rxq > 1) {
3461 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3462 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3463 				rss_hf & port->dev_info.flow_type_rss_offloads;
3464 		} else {
3465 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3466 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3467 		}
3468 
3469 		if (port->dcb_flag == 0) {
3470 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3471 				port->dev_conf.rxmode.mq_mode =
3472 					(enum rte_eth_rx_mq_mode)
3473 						(rx_mq_mode & ETH_MQ_RX_RSS);
3474 			else
3475 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3476 		}
3477 
3478 		rxtx_port_config(port);
3479 
3480 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3481 		if (ret != 0)
3482 			return;
3483 
3484 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3485 		rte_pmd_ixgbe_bypass_init(pid);
3486 #endif
3487 
3488 		if (lsc_interrupt &&
3489 		    (rte_eth_devices[pid].data->dev_flags &
3490 		     RTE_ETH_DEV_INTR_LSC))
3491 			port->dev_conf.intr_conf.lsc = 1;
3492 		if (rmv_interrupt &&
3493 		    (rte_eth_devices[pid].data->dev_flags &
3494 		     RTE_ETH_DEV_INTR_RMV))
3495 			port->dev_conf.intr_conf.rmv = 1;
3496 	}
3497 }
3498 
3499 void set_port_slave_flag(portid_t slave_pid)
3500 {
3501 	struct rte_port *port;
3502 
3503 	port = &ports[slave_pid];
3504 	port->slave_flag = 1;
3505 }
3506 
3507 void clear_port_slave_flag(portid_t slave_pid)
3508 {
3509 	struct rte_port *port;
3510 
3511 	port = &ports[slave_pid];
3512 	port->slave_flag = 0;
3513 }
3514 
3515 uint8_t port_is_bonding_slave(portid_t slave_pid)
3516 {
3517 	struct rte_port *port;
3518 
3519 	port = &ports[slave_pid];
3520 	if ((rte_eth_devices[slave_pid].data->dev_flags &
3521 	    RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3522 		return 1;
3523 	return 0;
3524 }
3525 
3526 const uint16_t vlan_tags[] = {
3527 		0,  1,  2,  3,  4,  5,  6,  7,
3528 		8,  9, 10, 11,  12, 13, 14, 15,
3529 		16, 17, 18, 19, 20, 21, 22, 23,
3530 		24, 25, 26, 27, 28, 29, 30, 31
3531 };
3532 
3533 static  int
3534 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3535 		 enum dcb_mode_enable dcb_mode,
3536 		 enum rte_eth_nb_tcs num_tcs,
3537 		 uint8_t pfc_en)
3538 {
3539 	uint8_t i;
3540 	int32_t rc;
3541 	struct rte_eth_rss_conf rss_conf;
3542 
3543 	/*
3544 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3545 	 * given above, and the number of traffic classes available for use.
3546 	 */
3547 	if (dcb_mode == DCB_VT_ENABLED) {
3548 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3549 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3550 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3551 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3552 
3553 		/* VMDQ+DCB RX and TX configurations */
3554 		vmdq_rx_conf->enable_default_pool = 0;
3555 		vmdq_rx_conf->default_pool = 0;
3556 		vmdq_rx_conf->nb_queue_pools =
3557 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3558 		vmdq_tx_conf->nb_queue_pools =
3559 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3560 
3561 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3562 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3563 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3564 			vmdq_rx_conf->pool_map[i].pools =
3565 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3566 		}
3567 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3568 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3569 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3570 		}
3571 
3572 		/* set DCB mode of RX and TX of multiple queues */
3573 		eth_conf->rxmode.mq_mode =
3574 				(enum rte_eth_rx_mq_mode)
3575 					(rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3576 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3577 	} else {
3578 		struct rte_eth_dcb_rx_conf *rx_conf =
3579 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3580 		struct rte_eth_dcb_tx_conf *tx_conf =
3581 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3582 
3583 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3584 
3585 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3586 		if (rc != 0)
3587 			return rc;
3588 
3589 		rx_conf->nb_tcs = num_tcs;
3590 		tx_conf->nb_tcs = num_tcs;
3591 
3592 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3593 			rx_conf->dcb_tc[i] = i % num_tcs;
3594 			tx_conf->dcb_tc[i] = i % num_tcs;
3595 		}
3596 
3597 		eth_conf->rxmode.mq_mode =
3598 				(enum rte_eth_rx_mq_mode)
3599 					(rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3600 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3601 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3602 	}
3603 
3604 	if (pfc_en)
3605 		eth_conf->dcb_capability_en =
3606 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3607 	else
3608 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3609 
3610 	return 0;
3611 }
3612 
3613 int
3614 init_port_dcb_config(portid_t pid,
3615 		     enum dcb_mode_enable dcb_mode,
3616 		     enum rte_eth_nb_tcs num_tcs,
3617 		     uint8_t pfc_en)
3618 {
3619 	struct rte_eth_conf port_conf;
3620 	struct rte_port *rte_port;
3621 	int retval;
3622 	uint16_t i;
3623 
3624 	rte_port = &ports[pid];
3625 
3626 	memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3627 	/* Enter DCB configuration status */
3628 	dcb_config = 1;
3629 
3630 	port_conf.rxmode = rte_port->dev_conf.rxmode;
3631 	port_conf.txmode = rte_port->dev_conf.txmode;
3632 
3633 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3634 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3635 	if (retval < 0)
3636 		return retval;
3637 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3638 
3639 	/* re-configure the device . */
3640 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3641 	if (retval < 0)
3642 		return retval;
3643 
3644 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3645 	if (retval != 0)
3646 		return retval;
3647 
3648 	/* If dev_info.vmdq_pool_base is greater than 0,
3649 	 * the queue id of vmdq pools is started after pf queues.
3650 	 */
3651 	if (dcb_mode == DCB_VT_ENABLED &&
3652 	    rte_port->dev_info.vmdq_pool_base > 0) {
3653 		printf("VMDQ_DCB multi-queue mode is nonsensical"
3654 			" for port %d.", pid);
3655 		return -1;
3656 	}
3657 
3658 	/* Assume the ports in testpmd have the same dcb capability
3659 	 * and has the same number of rxq and txq in dcb mode
3660 	 */
3661 	if (dcb_mode == DCB_VT_ENABLED) {
3662 		if (rte_port->dev_info.max_vfs > 0) {
3663 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3664 			nb_txq = rte_port->dev_info.nb_tx_queues;
3665 		} else {
3666 			nb_rxq = rte_port->dev_info.max_rx_queues;
3667 			nb_txq = rte_port->dev_info.max_tx_queues;
3668 		}
3669 	} else {
3670 		/*if vt is disabled, use all pf queues */
3671 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3672 			nb_rxq = rte_port->dev_info.max_rx_queues;
3673 			nb_txq = rte_port->dev_info.max_tx_queues;
3674 		} else {
3675 			nb_rxq = (queueid_t)num_tcs;
3676 			nb_txq = (queueid_t)num_tcs;
3677 
3678 		}
3679 	}
3680 	rx_free_thresh = 64;
3681 
3682 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3683 
3684 	rxtx_port_config(rte_port);
3685 	/* VLAN filter */
3686 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3687 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
3688 		rx_vft_set(pid, vlan_tags[i], 1);
3689 
3690 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3691 	if (retval != 0)
3692 		return retval;
3693 
3694 	rte_port->dcb_flag = 1;
3695 
3696 	return 0;
3697 }
3698 
3699 static void
3700 init_port(void)
3701 {
3702 	int i;
3703 
3704 	/* Configuration of Ethernet ports. */
3705 	ports = rte_zmalloc("testpmd: ports",
3706 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3707 			    RTE_CACHE_LINE_SIZE);
3708 	if (ports == NULL) {
3709 		rte_exit(EXIT_FAILURE,
3710 				"rte_zmalloc(%d struct rte_port) failed\n",
3711 				RTE_MAX_ETHPORTS);
3712 	}
3713 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
3714 		LIST_INIT(&ports[i].flow_tunnel_list);
3715 	/* Initialize ports NUMA structures */
3716 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3717 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3718 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3719 }
3720 
3721 static void
3722 force_quit(void)
3723 {
3724 	pmd_test_exit();
3725 	prompt_exit();
3726 }
3727 
3728 static void
3729 print_stats(void)
3730 {
3731 	uint8_t i;
3732 	const char clr[] = { 27, '[', '2', 'J', '\0' };
3733 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3734 
3735 	/* Clear screen and move to top left */
3736 	printf("%s%s", clr, top_left);
3737 
3738 	printf("\nPort statistics ====================================");
3739 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3740 		nic_stats_display(fwd_ports_ids[i]);
3741 
3742 	fflush(stdout);
3743 }
3744 
3745 static void
3746 signal_handler(int signum)
3747 {
3748 	if (signum == SIGINT || signum == SIGTERM) {
3749 		printf("\nSignal %d received, preparing to exit...\n",
3750 				signum);
3751 #ifdef RTE_LIB_PDUMP
3752 		/* uninitialize packet capture framework */
3753 		rte_pdump_uninit();
3754 #endif
3755 #ifdef RTE_LIB_LATENCYSTATS
3756 		if (latencystats_enabled != 0)
3757 			rte_latencystats_uninit();
3758 #endif
3759 		force_quit();
3760 		/* Set flag to indicate the force termination. */
3761 		f_quit = 1;
3762 		/* exit with the expected status */
3763 		signal(signum, SIG_DFL);
3764 		kill(getpid(), signum);
3765 	}
3766 }
3767 
3768 int
3769 main(int argc, char** argv)
3770 {
3771 	int diag;
3772 	portid_t port_id;
3773 	uint16_t count;
3774 	int ret;
3775 
3776 	signal(SIGINT, signal_handler);
3777 	signal(SIGTERM, signal_handler);
3778 
3779 	testpmd_logtype = rte_log_register("testpmd");
3780 	if (testpmd_logtype < 0)
3781 		rte_exit(EXIT_FAILURE, "Cannot register log type");
3782 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3783 
3784 	diag = rte_eal_init(argc, argv);
3785 	if (diag < 0)
3786 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3787 			 rte_strerror(rte_errno));
3788 
3789 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3790 		rte_exit(EXIT_FAILURE,
3791 			 "Secondary process type not supported.\n");
3792 
3793 	ret = register_eth_event_callback();
3794 	if (ret != 0)
3795 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3796 
3797 #ifdef RTE_LIB_PDUMP
3798 	/* initialize packet capture framework */
3799 	rte_pdump_init();
3800 #endif
3801 
3802 	count = 0;
3803 	RTE_ETH_FOREACH_DEV(port_id) {
3804 		ports_ids[count] = port_id;
3805 		count++;
3806 	}
3807 	nb_ports = (portid_t) count;
3808 	if (nb_ports == 0)
3809 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3810 
3811 	/* allocate port structures, and init them */
3812 	init_port();
3813 
3814 	set_def_fwd_config();
3815 	if (nb_lcores == 0)
3816 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3817 			 "Check the core mask argument\n");
3818 
3819 	/* Bitrate/latency stats disabled by default */
3820 #ifdef RTE_LIB_BITRATESTATS
3821 	bitrate_enabled = 0;
3822 #endif
3823 #ifdef RTE_LIB_LATENCYSTATS
3824 	latencystats_enabled = 0;
3825 #endif
3826 
3827 	/* on FreeBSD, mlockall() is disabled by default */
3828 #ifdef RTE_EXEC_ENV_FREEBSD
3829 	do_mlockall = 0;
3830 #else
3831 	do_mlockall = 1;
3832 #endif
3833 
3834 	argc -= diag;
3835 	argv += diag;
3836 	if (argc > 1)
3837 		launch_args_parse(argc, argv);
3838 
3839 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3840 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3841 			strerror(errno));
3842 	}
3843 
3844 	if (tx_first && interactive)
3845 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3846 				"interactive mode.\n");
3847 
3848 	if (tx_first && lsc_interrupt) {
3849 		printf("Warning: lsc_interrupt needs to be off when "
3850 				" using tx_first. Disabling.\n");
3851 		lsc_interrupt = 0;
3852 	}
3853 
3854 	if (!nb_rxq && !nb_txq)
3855 		printf("Warning: Either rx or tx queues should be non-zero\n");
3856 
3857 	if (nb_rxq > 1 && nb_rxq > nb_txq)
3858 		printf("Warning: nb_rxq=%d enables RSS configuration, "
3859 		       "but nb_txq=%d will prevent to fully test it.\n",
3860 		       nb_rxq, nb_txq);
3861 
3862 	init_config();
3863 
3864 	if (hot_plug) {
3865 		ret = rte_dev_hotplug_handle_enable();
3866 		if (ret) {
3867 			RTE_LOG(ERR, EAL,
3868 				"fail to enable hotplug handling.");
3869 			return -1;
3870 		}
3871 
3872 		ret = rte_dev_event_monitor_start();
3873 		if (ret) {
3874 			RTE_LOG(ERR, EAL,
3875 				"fail to start device event monitoring.");
3876 			return -1;
3877 		}
3878 
3879 		ret = rte_dev_event_callback_register(NULL,
3880 			dev_event_callback, NULL);
3881 		if (ret) {
3882 			RTE_LOG(ERR, EAL,
3883 				"fail  to register device event callback\n");
3884 			return -1;
3885 		}
3886 	}
3887 
3888 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3889 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
3890 
3891 	/* set all ports to promiscuous mode by default */
3892 	RTE_ETH_FOREACH_DEV(port_id) {
3893 		ret = rte_eth_promiscuous_enable(port_id);
3894 		if (ret != 0)
3895 			printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
3896 				port_id, rte_strerror(-ret));
3897 	}
3898 
3899 	/* Init metrics library */
3900 	rte_metrics_init(rte_socket_id());
3901 
3902 #ifdef RTE_LIB_LATENCYSTATS
3903 	if (latencystats_enabled != 0) {
3904 		int ret = rte_latencystats_init(1, NULL);
3905 		if (ret)
3906 			printf("Warning: latencystats init()"
3907 				" returned error %d\n",	ret);
3908 		printf("Latencystats running on lcore %d\n",
3909 			latencystats_lcore_id);
3910 	}
3911 #endif
3912 
3913 	/* Setup bitrate stats */
3914 #ifdef RTE_LIB_BITRATESTATS
3915 	if (bitrate_enabled != 0) {
3916 		bitrate_data = rte_stats_bitrate_create();
3917 		if (bitrate_data == NULL)
3918 			rte_exit(EXIT_FAILURE,
3919 				"Could not allocate bitrate data.\n");
3920 		rte_stats_bitrate_reg(bitrate_data);
3921 	}
3922 #endif
3923 
3924 #ifdef RTE_LIB_CMDLINE
3925 	if (strlen(cmdline_filename) != 0)
3926 		cmdline_read_from_file(cmdline_filename);
3927 
3928 	if (interactive == 1) {
3929 		if (auto_start) {
3930 			printf("Start automatic packet forwarding\n");
3931 			start_packet_forwarding(0);
3932 		}
3933 		prompt();
3934 		pmd_test_exit();
3935 	} else
3936 #endif
3937 	{
3938 		char c;
3939 		int rc;
3940 
3941 		f_quit = 0;
3942 
3943 		printf("No commandline core given, start packet forwarding\n");
3944 		start_packet_forwarding(tx_first);
3945 		if (stats_period != 0) {
3946 			uint64_t prev_time = 0, cur_time, diff_time = 0;
3947 			uint64_t timer_period;
3948 
3949 			/* Convert to number of cycles */
3950 			timer_period = stats_period * rte_get_timer_hz();
3951 
3952 			while (f_quit == 0) {
3953 				cur_time = rte_get_timer_cycles();
3954 				diff_time += cur_time - prev_time;
3955 
3956 				if (diff_time >= timer_period) {
3957 					print_stats();
3958 					/* Reset the timer */
3959 					diff_time = 0;
3960 				}
3961 				/* Sleep to avoid unnecessary checks */
3962 				prev_time = cur_time;
3963 				sleep(1);
3964 			}
3965 		}
3966 
3967 		printf("Press enter to exit\n");
3968 		rc = read(0, &c, 1);
3969 		pmd_test_exit();
3970 		if (rc < 0)
3971 			return 1;
3972 	}
3973 
3974 	ret = rte_eal_cleanup();
3975 	if (ret != 0)
3976 		rte_exit(EXIT_FAILURE,
3977 			 "EAL cleanup failed: %s\n", strerror(-ret));
3978 
3979 	return EXIT_SUCCESS;
3980 }
3981