xref: /dpdk/app/test-pmd/testpmd.c (revision 01817b10d27c8d1376210d4798bf504dffaa8ccd)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <errno.h>
15 #include <stdbool.h>
16 
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19 
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <inttypes.h>
23 
24 #include <rte_common.h>
25 #include <rte_errno.h>
26 #include <rte_byteorder.h>
27 #include <rte_log.h>
28 #include <rte_debug.h>
29 #include <rte_cycles.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_launch.h>
33 #include <rte_eal.h>
34 #include <rte_alarm.h>
35 #include <rte_per_lcore.h>
36 #include <rte_lcore.h>
37 #include <rte_atomic.h>
38 #include <rte_branch_prediction.h>
39 #include <rte_mempool.h>
40 #include <rte_malloc.h>
41 #include <rte_mbuf.h>
42 #include <rte_mbuf_pool_ops.h>
43 #include <rte_interrupts.h>
44 #include <rte_pci.h>
45 #include <rte_ether.h>
46 #include <rte_ethdev.h>
47 #include <rte_dev.h>
48 #include <rte_string_fns.h>
49 #ifdef RTE_LIBRTE_IXGBE_PMD
50 #include <rte_pmd_ixgbe.h>
51 #endif
52 #ifdef RTE_LIBRTE_PDUMP
53 #include <rte_pdump.h>
54 #endif
55 #include <rte_flow.h>
56 #include <rte_metrics.h>
57 #ifdef RTE_LIBRTE_BITRATESTATS
58 #include <rte_bitrate.h>
59 #endif
60 #ifdef RTE_LIBRTE_LATENCY_STATS
61 #include <rte_latencystats.h>
62 #endif
63 
64 #include "testpmd.h"
65 
66 #ifndef MAP_HUGETLB
67 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
68 #define HUGE_FLAG (0x40000)
69 #else
70 #define HUGE_FLAG MAP_HUGETLB
71 #endif
72 
73 #ifndef MAP_HUGE_SHIFT
74 /* older kernels (or FreeBSD) will not have this define */
75 #define HUGE_SHIFT (26)
76 #else
77 #define HUGE_SHIFT MAP_HUGE_SHIFT
78 #endif
79 
80 #define EXTMEM_HEAP_NAME "extmem"
81 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
82 
83 uint16_t verbose_level = 0; /**< Silent by default. */
84 int testpmd_logtype; /**< Log type for testpmd logs */
85 
86 /* use master core for command line ? */
87 uint8_t interactive = 0;
88 uint8_t auto_start = 0;
89 uint8_t tx_first;
90 char cmdline_filename[PATH_MAX] = {0};
91 
92 /*
93  * NUMA support configuration.
94  * When set, the NUMA support attempts to dispatch the allocation of the
95  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
96  * probed ports among the CPU sockets 0 and 1.
97  * Otherwise, all memory is allocated from CPU socket 0.
98  */
99 uint8_t numa_support = 1; /**< numa enabled by default */
100 
101 /*
102  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
103  * not configured.
104  */
105 uint8_t socket_num = UMA_NO_CONFIG;
106 
107 /*
108  * Select mempool allocation type:
109  * - native: use regular DPDK memory
110  * - anon: use regular DPDK memory to create mempool, but populate using
111  *         anonymous memory (may not be IOVA-contiguous)
112  * - xmem: use externally allocated hugepage memory
113  */
114 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
115 
116 /*
117  * Store specified sockets on which memory pool to be used by ports
118  * is allocated.
119  */
120 uint8_t port_numa[RTE_MAX_ETHPORTS];
121 
122 /*
123  * Store specified sockets on which RX ring to be used by ports
124  * is allocated.
125  */
126 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
127 
128 /*
129  * Store specified sockets on which TX ring to be used by ports
130  * is allocated.
131  */
132 uint8_t txring_numa[RTE_MAX_ETHPORTS];
133 
134 /*
135  * Record the Ethernet address of peer target ports to which packets are
136  * forwarded.
137  * Must be instantiated with the ethernet addresses of peer traffic generator
138  * ports.
139  */
140 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
141 portid_t nb_peer_eth_addrs = 0;
142 
143 /*
144  * Probed Target Environment.
145  */
146 struct rte_port *ports;	       /**< For all probed ethernet ports. */
147 portid_t nb_ports;             /**< Number of probed ethernet ports. */
148 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
149 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
150 
151 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
152 
153 /*
154  * Test Forwarding Configuration.
155  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
156  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
157  */
158 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
159 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
160 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
161 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
162 
163 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
164 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
165 
166 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
167 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
168 
169 /*
170  * Forwarding engines.
171  */
172 struct fwd_engine * fwd_engines[] = {
173 	&io_fwd_engine,
174 	&mac_fwd_engine,
175 	&mac_swap_engine,
176 	&flow_gen_engine,
177 	&rx_only_engine,
178 	&tx_only_engine,
179 	&csum_fwd_engine,
180 	&icmp_echo_engine,
181 	&noisy_vnf_engine,
182 	&five_tuple_swap_fwd_engine,
183 #ifdef RTE_LIBRTE_IEEE1588
184 	&ieee1588_fwd_engine,
185 #endif
186 	NULL,
187 };
188 
189 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES];
190 uint16_t mempool_flags;
191 
192 struct fwd_config cur_fwd_config;
193 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
194 uint32_t retry_enabled;
195 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
196 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
197 
198 uint16_t mbuf_data_size = DEFAULT_MBUF_DATA_SIZE; /**< Mbuf data space size. */
199 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
200                                       * specified on command-line. */
201 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
202 
203 /*
204  * In container, it cannot terminate the process which running with 'stats-period'
205  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
206  */
207 uint8_t f_quit;
208 
209 /*
210  * Configuration of packet segments used by the "txonly" processing engine.
211  */
212 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
213 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
214 	TXONLY_DEF_PACKET_LEN,
215 };
216 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
217 
218 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
219 /**< Split policy for packets to TX. */
220 
221 uint8_t txonly_multi_flow;
222 /**< Whether multiple flows are generated in TXONLY mode. */
223 
224 uint32_t tx_pkt_times_inter;
225 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
226 
227 uint32_t tx_pkt_times_intra;
228 /**< Timings for send scheduling in TXONLY mode, time between packets. */
229 
230 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
231 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
232 
233 /* current configuration is in DCB or not,0 means it is not in DCB mode */
234 uint8_t dcb_config = 0;
235 
236 /* Whether the dcb is in testing status */
237 uint8_t dcb_test = 0;
238 
239 /*
240  * Configurable number of RX/TX queues.
241  */
242 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
243 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
244 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
245 
246 /*
247  * Configurable number of RX/TX ring descriptors.
248  * Defaults are supplied by drivers via ethdev.
249  */
250 #define RTE_TEST_RX_DESC_DEFAULT 0
251 #define RTE_TEST_TX_DESC_DEFAULT 0
252 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
253 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
254 
255 #define RTE_PMD_PARAM_UNSET -1
256 /*
257  * Configurable values of RX and TX ring threshold registers.
258  */
259 
260 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
261 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
262 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
263 
264 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
265 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
266 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
267 
268 /*
269  * Configurable value of RX free threshold.
270  */
271 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
272 
273 /*
274  * Configurable value of RX drop enable.
275  */
276 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
277 
278 /*
279  * Configurable value of TX free threshold.
280  */
281 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
282 
283 /*
284  * Configurable value of TX RS bit threshold.
285  */
286 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
287 
288 /*
289  * Configurable value of buffered packets before sending.
290  */
291 uint16_t noisy_tx_sw_bufsz;
292 
293 /*
294  * Configurable value of packet buffer timeout.
295  */
296 uint16_t noisy_tx_sw_buf_flush_time;
297 
298 /*
299  * Configurable value for size of VNF internal memory area
300  * used for simulating noisy neighbour behaviour
301  */
302 uint64_t noisy_lkup_mem_sz;
303 
304 /*
305  * Configurable value of number of random writes done in
306  * VNF simulation memory area.
307  */
308 uint64_t noisy_lkup_num_writes;
309 
310 /*
311  * Configurable value of number of random reads done in
312  * VNF simulation memory area.
313  */
314 uint64_t noisy_lkup_num_reads;
315 
316 /*
317  * Configurable value of number of random reads/writes done in
318  * VNF simulation memory area.
319  */
320 uint64_t noisy_lkup_num_reads_writes;
321 
322 /*
323  * Receive Side Scaling (RSS) configuration.
324  */
325 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
326 
327 /*
328  * Port topology configuration
329  */
330 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
331 
332 /*
333  * Avoids to flush all the RX streams before starts forwarding.
334  */
335 uint8_t no_flush_rx = 0; /* flush by default */
336 
337 /*
338  * Flow API isolated mode.
339  */
340 uint8_t flow_isolate_all;
341 
342 /*
343  * Avoids to check link status when starting/stopping a port.
344  */
345 uint8_t no_link_check = 0; /* check by default */
346 
347 /*
348  * Don't automatically start all ports in interactive mode.
349  */
350 uint8_t no_device_start = 0;
351 
352 /*
353  * Enable link status change notification
354  */
355 uint8_t lsc_interrupt = 1; /* enabled by default */
356 
357 /*
358  * Enable device removal notification.
359  */
360 uint8_t rmv_interrupt = 1; /* enabled by default */
361 
362 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
363 
364 /* After attach, port setup is called on event or by iterator */
365 bool setup_on_probe_event = true;
366 
367 /* Clear ptypes on port initialization. */
368 uint8_t clear_ptypes = true;
369 
370 /* Hairpin ports configuration mode. */
371 uint16_t hairpin_mode;
372 
373 /* Pretty printing of ethdev events */
374 static const char * const eth_event_desc[] = {
375 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
376 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
377 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
378 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
379 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
380 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
381 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
382 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
383 	[RTE_ETH_EVENT_NEW] = "device probed",
384 	[RTE_ETH_EVENT_DESTROY] = "device released",
385 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
386 	[RTE_ETH_EVENT_MAX] = NULL,
387 };
388 
389 /*
390  * Display or mask ether events
391  * Default to all events except VF_MBOX
392  */
393 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
394 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
395 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
396 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
397 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
398 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
399 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
400 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
401 /*
402  * Decide if all memory are locked for performance.
403  */
404 int do_mlockall = 0;
405 
406 /*
407  * NIC bypass mode configuration options.
408  */
409 
410 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
411 /* The NIC bypass watchdog timeout. */
412 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
413 #endif
414 
415 
416 #ifdef RTE_LIBRTE_LATENCY_STATS
417 
418 /*
419  * Set when latency stats is enabled in the commandline
420  */
421 uint8_t latencystats_enabled;
422 
423 /*
424  * Lcore ID to serive latency statistics.
425  */
426 lcoreid_t latencystats_lcore_id = -1;
427 
428 #endif
429 
430 /*
431  * Ethernet device configuration.
432  */
433 struct rte_eth_rxmode rx_mode = {
434 	.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
435 		/**< Default maximum frame length. */
436 };
437 
438 struct rte_eth_txmode tx_mode = {
439 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
440 };
441 
442 struct rte_fdir_conf fdir_conf = {
443 	.mode = RTE_FDIR_MODE_NONE,
444 	.pballoc = RTE_FDIR_PBALLOC_64K,
445 	.status = RTE_FDIR_REPORT_STATUS,
446 	.mask = {
447 		.vlan_tci_mask = 0xFFEF,
448 		.ipv4_mask     = {
449 			.src_ip = 0xFFFFFFFF,
450 			.dst_ip = 0xFFFFFFFF,
451 		},
452 		.ipv6_mask     = {
453 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
454 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
455 		},
456 		.src_port_mask = 0xFFFF,
457 		.dst_port_mask = 0xFFFF,
458 		.mac_addr_byte_mask = 0xFF,
459 		.tunnel_type_mask = 1,
460 		.tunnel_id_mask = 0xFFFFFFFF,
461 	},
462 	.drop_queue = 127,
463 };
464 
465 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
466 
467 struct queue_stats_mappings tx_queue_stats_mappings_array[MAX_TX_QUEUE_STATS_MAPPINGS];
468 struct queue_stats_mappings rx_queue_stats_mappings_array[MAX_RX_QUEUE_STATS_MAPPINGS];
469 
470 struct queue_stats_mappings *tx_queue_stats_mappings = tx_queue_stats_mappings_array;
471 struct queue_stats_mappings *rx_queue_stats_mappings = rx_queue_stats_mappings_array;
472 
473 uint16_t nb_tx_queue_stats_mappings = 0;
474 uint16_t nb_rx_queue_stats_mappings = 0;
475 
476 /*
477  * Display zero values by default for xstats
478  */
479 uint8_t xstats_hide_zero;
480 
481 /*
482  * Measure of CPU cycles disabled by default
483  */
484 uint8_t record_core_cycles;
485 
486 /*
487  * Display of RX and TX bursts disabled by default
488  */
489 uint8_t record_burst_stats;
490 
491 unsigned int num_sockets = 0;
492 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
493 
494 #ifdef RTE_LIBRTE_BITRATESTATS
495 /* Bitrate statistics */
496 struct rte_stats_bitrates *bitrate_data;
497 lcoreid_t bitrate_lcore_id;
498 uint8_t bitrate_enabled;
499 #endif
500 
501 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
502 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
503 
504 /*
505  * hexadecimal bitmask of RX mq mode can be enabled.
506  */
507 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
508 
509 /* Forward function declarations */
510 static void setup_attached_port(portid_t pi);
511 static void map_port_queue_stats_mapping_registers(portid_t pi,
512 						   struct rte_port *port);
513 static void check_all_ports_link_status(uint32_t port_mask);
514 static int eth_event_callback(portid_t port_id,
515 			      enum rte_eth_event_type type,
516 			      void *param, void *ret_param);
517 static void dev_event_callback(const char *device_name,
518 				enum rte_dev_event_type type,
519 				void *param);
520 
521 /*
522  * Check if all the ports are started.
523  * If yes, return positive value. If not, return zero.
524  */
525 static int all_ports_started(void);
526 
527 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
528 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
529 
530 /* Holds the registered mbuf dynamic flags names. */
531 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
532 
533 /*
534  * Helper function to check if socket is already discovered.
535  * If yes, return positive value. If not, return zero.
536  */
537 int
538 new_socket_id(unsigned int socket_id)
539 {
540 	unsigned int i;
541 
542 	for (i = 0; i < num_sockets; i++) {
543 		if (socket_ids[i] == socket_id)
544 			return 0;
545 	}
546 	return 1;
547 }
548 
549 /*
550  * Setup default configuration.
551  */
552 static void
553 set_default_fwd_lcores_config(void)
554 {
555 	unsigned int i;
556 	unsigned int nb_lc;
557 	unsigned int sock_num;
558 
559 	nb_lc = 0;
560 	for (i = 0; i < RTE_MAX_LCORE; i++) {
561 		if (!rte_lcore_is_enabled(i))
562 			continue;
563 		sock_num = rte_lcore_to_socket_id(i);
564 		if (new_socket_id(sock_num)) {
565 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
566 				rte_exit(EXIT_FAILURE,
567 					 "Total sockets greater than %u\n",
568 					 RTE_MAX_NUMA_NODES);
569 			}
570 			socket_ids[num_sockets++] = sock_num;
571 		}
572 		if (i == rte_get_master_lcore())
573 			continue;
574 		fwd_lcores_cpuids[nb_lc++] = i;
575 	}
576 	nb_lcores = (lcoreid_t) nb_lc;
577 	nb_cfg_lcores = nb_lcores;
578 	nb_fwd_lcores = 1;
579 }
580 
581 static void
582 set_def_peer_eth_addrs(void)
583 {
584 	portid_t i;
585 
586 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
587 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
588 		peer_eth_addrs[i].addr_bytes[5] = i;
589 	}
590 }
591 
592 static void
593 set_default_fwd_ports_config(void)
594 {
595 	portid_t pt_id;
596 	int i = 0;
597 
598 	RTE_ETH_FOREACH_DEV(pt_id) {
599 		fwd_ports_ids[i++] = pt_id;
600 
601 		/* Update sockets info according to the attached device */
602 		int socket_id = rte_eth_dev_socket_id(pt_id);
603 		if (socket_id >= 0 && new_socket_id(socket_id)) {
604 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
605 				rte_exit(EXIT_FAILURE,
606 					 "Total sockets greater than %u\n",
607 					 RTE_MAX_NUMA_NODES);
608 			}
609 			socket_ids[num_sockets++] = socket_id;
610 		}
611 	}
612 
613 	nb_cfg_ports = nb_ports;
614 	nb_fwd_ports = nb_ports;
615 }
616 
617 void
618 set_def_fwd_config(void)
619 {
620 	set_default_fwd_lcores_config();
621 	set_def_peer_eth_addrs();
622 	set_default_fwd_ports_config();
623 }
624 
625 /* extremely pessimistic estimation of memory required to create a mempool */
626 static int
627 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
628 {
629 	unsigned int n_pages, mbuf_per_pg, leftover;
630 	uint64_t total_mem, mbuf_mem, obj_sz;
631 
632 	/* there is no good way to predict how much space the mempool will
633 	 * occupy because it will allocate chunks on the fly, and some of those
634 	 * will come from default DPDK memory while some will come from our
635 	 * external memory, so just assume 128MB will be enough for everyone.
636 	 */
637 	uint64_t hdr_mem = 128 << 20;
638 
639 	/* account for possible non-contiguousness */
640 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
641 	if (obj_sz > pgsz) {
642 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
643 		return -1;
644 	}
645 
646 	mbuf_per_pg = pgsz / obj_sz;
647 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
648 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
649 
650 	mbuf_mem = n_pages * pgsz;
651 
652 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
653 
654 	if (total_mem > SIZE_MAX) {
655 		TESTPMD_LOG(ERR, "Memory size too big\n");
656 		return -1;
657 	}
658 	*out = (size_t)total_mem;
659 
660 	return 0;
661 }
662 
663 static int
664 pagesz_flags(uint64_t page_sz)
665 {
666 	/* as per mmap() manpage, all page sizes are log2 of page size
667 	 * shifted by MAP_HUGE_SHIFT
668 	 */
669 	int log2 = rte_log2_u64(page_sz);
670 
671 	return (log2 << HUGE_SHIFT);
672 }
673 
674 static void *
675 alloc_mem(size_t memsz, size_t pgsz, bool huge)
676 {
677 	void *addr;
678 	int flags;
679 
680 	/* allocate anonymous hugepages */
681 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
682 	if (huge)
683 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
684 
685 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
686 	if (addr == MAP_FAILED)
687 		return NULL;
688 
689 	return addr;
690 }
691 
692 struct extmem_param {
693 	void *addr;
694 	size_t len;
695 	size_t pgsz;
696 	rte_iova_t *iova_table;
697 	unsigned int iova_table_len;
698 };
699 
700 static int
701 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
702 		bool huge)
703 {
704 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
705 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
706 	unsigned int cur_page, n_pages, pgsz_idx;
707 	size_t mem_sz, cur_pgsz;
708 	rte_iova_t *iovas = NULL;
709 	void *addr;
710 	int ret;
711 
712 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
713 		/* skip anything that is too big */
714 		if (pgsizes[pgsz_idx] > SIZE_MAX)
715 			continue;
716 
717 		cur_pgsz = pgsizes[pgsz_idx];
718 
719 		/* if we were told not to allocate hugepages, override */
720 		if (!huge)
721 			cur_pgsz = sysconf(_SC_PAGESIZE);
722 
723 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
724 		if (ret < 0) {
725 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
726 			return -1;
727 		}
728 
729 		/* allocate our memory */
730 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
731 
732 		/* if we couldn't allocate memory with a specified page size,
733 		 * that doesn't mean we can't do it with other page sizes, so
734 		 * try another one.
735 		 */
736 		if (addr == NULL)
737 			continue;
738 
739 		/* store IOVA addresses for every page in this memory area */
740 		n_pages = mem_sz / cur_pgsz;
741 
742 		iovas = malloc(sizeof(*iovas) * n_pages);
743 
744 		if (iovas == NULL) {
745 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
746 			goto fail;
747 		}
748 		/* lock memory if it's not huge pages */
749 		if (!huge)
750 			mlock(addr, mem_sz);
751 
752 		/* populate IOVA addresses */
753 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
754 			rte_iova_t iova;
755 			size_t offset;
756 			void *cur;
757 
758 			offset = cur_pgsz * cur_page;
759 			cur = RTE_PTR_ADD(addr, offset);
760 
761 			/* touch the page before getting its IOVA */
762 			*(volatile char *)cur = 0;
763 
764 			iova = rte_mem_virt2iova(cur);
765 
766 			iovas[cur_page] = iova;
767 		}
768 
769 		break;
770 	}
771 	/* if we couldn't allocate anything */
772 	if (iovas == NULL)
773 		return -1;
774 
775 	param->addr = addr;
776 	param->len = mem_sz;
777 	param->pgsz = cur_pgsz;
778 	param->iova_table = iovas;
779 	param->iova_table_len = n_pages;
780 
781 	return 0;
782 fail:
783 	if (iovas)
784 		free(iovas);
785 	if (addr)
786 		munmap(addr, mem_sz);
787 
788 	return -1;
789 }
790 
791 static int
792 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
793 {
794 	struct extmem_param param;
795 	int socket_id, ret;
796 
797 	memset(&param, 0, sizeof(param));
798 
799 	/* check if our heap exists */
800 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
801 	if (socket_id < 0) {
802 		/* create our heap */
803 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
804 		if (ret < 0) {
805 			TESTPMD_LOG(ERR, "Cannot create heap\n");
806 			return -1;
807 		}
808 	}
809 
810 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
811 	if (ret < 0) {
812 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
813 		return -1;
814 	}
815 
816 	/* we now have a valid memory area, so add it to heap */
817 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
818 			param.addr, param.len, param.iova_table,
819 			param.iova_table_len, param.pgsz);
820 
821 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
822 
823 	/* not needed any more */
824 	free(param.iova_table);
825 
826 	if (ret < 0) {
827 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
828 		munmap(param.addr, param.len);
829 		return -1;
830 	}
831 
832 	/* success */
833 
834 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
835 			param.len >> 20);
836 
837 	return 0;
838 }
839 static void
840 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
841 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
842 {
843 	uint16_t pid = 0;
844 	int ret;
845 
846 	RTE_ETH_FOREACH_DEV(pid) {
847 		struct rte_eth_dev *dev =
848 			&rte_eth_devices[pid];
849 
850 		ret = rte_dev_dma_unmap(dev->device, memhdr->addr, 0,
851 					memhdr->len);
852 		if (ret) {
853 			TESTPMD_LOG(DEBUG,
854 				    "unable to DMA unmap addr 0x%p "
855 				    "for device %s\n",
856 				    memhdr->addr, dev->data->name);
857 		}
858 	}
859 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
860 	if (ret) {
861 		TESTPMD_LOG(DEBUG,
862 			    "unable to un-register addr 0x%p\n", memhdr->addr);
863 	}
864 }
865 
866 static void
867 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
868 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
869 {
870 	uint16_t pid = 0;
871 	size_t page_size = sysconf(_SC_PAGESIZE);
872 	int ret;
873 
874 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
875 				  page_size);
876 	if (ret) {
877 		TESTPMD_LOG(DEBUG,
878 			    "unable to register addr 0x%p\n", memhdr->addr);
879 		return;
880 	}
881 	RTE_ETH_FOREACH_DEV(pid) {
882 		struct rte_eth_dev *dev =
883 			&rte_eth_devices[pid];
884 
885 		ret = rte_dev_dma_map(dev->device, memhdr->addr, 0,
886 				      memhdr->len);
887 		if (ret) {
888 			TESTPMD_LOG(DEBUG,
889 				    "unable to DMA map addr 0x%p "
890 				    "for device %s\n",
891 				    memhdr->addr, dev->data->name);
892 		}
893 	}
894 }
895 
896 static unsigned int
897 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
898 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
899 {
900 	struct rte_pktmbuf_extmem *xmem;
901 	unsigned int ext_num, zone_num, elt_num;
902 	uint16_t elt_size;
903 
904 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
905 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
906 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
907 
908 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
909 	if (xmem == NULL) {
910 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
911 				 "external buffer descriptors\n");
912 		*ext_mem = NULL;
913 		return 0;
914 	}
915 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
916 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
917 		const struct rte_memzone *mz;
918 		char mz_name[RTE_MEMZONE_NAMESIZE];
919 		int ret;
920 
921 		ret = snprintf(mz_name, sizeof(mz_name),
922 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
923 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
924 			errno = ENAMETOOLONG;
925 			ext_num = 0;
926 			break;
927 		}
928 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
929 						 socket_id,
930 						 RTE_MEMZONE_IOVA_CONTIG |
931 						 RTE_MEMZONE_1GB |
932 						 RTE_MEMZONE_SIZE_HINT_ONLY,
933 						 EXTBUF_ZONE_SIZE);
934 		if (mz == NULL) {
935 			/*
936 			 * The caller exits on external buffer creation
937 			 * error, so there is no need to free memzones.
938 			 */
939 			errno = ENOMEM;
940 			ext_num = 0;
941 			break;
942 		}
943 		xseg->buf_ptr = mz->addr;
944 		xseg->buf_iova = mz->iova;
945 		xseg->buf_len = EXTBUF_ZONE_SIZE;
946 		xseg->elt_size = elt_size;
947 	}
948 	if (ext_num == 0 && xmem != NULL) {
949 		free(xmem);
950 		xmem = NULL;
951 	}
952 	*ext_mem = xmem;
953 	return ext_num;
954 }
955 
956 /*
957  * Configuration initialisation done once at init time.
958  */
959 static struct rte_mempool *
960 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
961 		 unsigned int socket_id)
962 {
963 	char pool_name[RTE_MEMPOOL_NAMESIZE];
964 	struct rte_mempool *rte_mp = NULL;
965 	uint32_t mb_size;
966 
967 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
968 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name));
969 
970 	TESTPMD_LOG(INFO,
971 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
972 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
973 
974 	switch (mp_alloc_type) {
975 	case MP_ALLOC_NATIVE:
976 		{
977 			/* wrapper to rte_mempool_create() */
978 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
979 					rte_mbuf_best_mempool_ops());
980 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
981 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
982 			break;
983 		}
984 	case MP_ALLOC_ANON:
985 		{
986 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
987 				mb_size, (unsigned int) mb_mempool_cache,
988 				sizeof(struct rte_pktmbuf_pool_private),
989 				socket_id, mempool_flags);
990 			if (rte_mp == NULL)
991 				goto err;
992 
993 			if (rte_mempool_populate_anon(rte_mp) == 0) {
994 				rte_mempool_free(rte_mp);
995 				rte_mp = NULL;
996 				goto err;
997 			}
998 			rte_pktmbuf_pool_init(rte_mp, NULL);
999 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1000 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1001 			break;
1002 		}
1003 	case MP_ALLOC_XMEM:
1004 	case MP_ALLOC_XMEM_HUGE:
1005 		{
1006 			int heap_socket;
1007 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1008 
1009 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1010 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1011 
1012 			heap_socket =
1013 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1014 			if (heap_socket < 0)
1015 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1016 
1017 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1018 					rte_mbuf_best_mempool_ops());
1019 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1020 					mb_mempool_cache, 0, mbuf_seg_size,
1021 					heap_socket);
1022 			break;
1023 		}
1024 	case MP_ALLOC_XBUF:
1025 		{
1026 			struct rte_pktmbuf_extmem *ext_mem;
1027 			unsigned int ext_num;
1028 
1029 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1030 					       socket_id, pool_name, &ext_mem);
1031 			if (ext_num == 0)
1032 				rte_exit(EXIT_FAILURE,
1033 					 "Can't create pinned data buffers\n");
1034 
1035 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1036 					rte_mbuf_best_mempool_ops());
1037 			rte_mp = rte_pktmbuf_pool_create_extbuf
1038 					(pool_name, nb_mbuf, mb_mempool_cache,
1039 					 0, mbuf_seg_size, socket_id,
1040 					 ext_mem, ext_num);
1041 			free(ext_mem);
1042 			break;
1043 		}
1044 	default:
1045 		{
1046 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1047 		}
1048 	}
1049 
1050 err:
1051 	if (rte_mp == NULL) {
1052 		rte_exit(EXIT_FAILURE,
1053 			"Creation of mbuf pool for socket %u failed: %s\n",
1054 			socket_id, rte_strerror(rte_errno));
1055 	} else if (verbose_level > 0) {
1056 		rte_mempool_dump(stdout, rte_mp);
1057 	}
1058 	return rte_mp;
1059 }
1060 
1061 /*
1062  * Check given socket id is valid or not with NUMA mode,
1063  * if valid, return 0, else return -1
1064  */
1065 static int
1066 check_socket_id(const unsigned int socket_id)
1067 {
1068 	static int warning_once = 0;
1069 
1070 	if (new_socket_id(socket_id)) {
1071 		if (!warning_once && numa_support)
1072 			printf("Warning: NUMA should be configured manually by"
1073 			       " using --port-numa-config and"
1074 			       " --ring-numa-config parameters along with"
1075 			       " --numa.\n");
1076 		warning_once = 1;
1077 		return -1;
1078 	}
1079 	return 0;
1080 }
1081 
1082 /*
1083  * Get the allowed maximum number of RX queues.
1084  * *pid return the port id which has minimal value of
1085  * max_rx_queues in all ports.
1086  */
1087 queueid_t
1088 get_allowed_max_nb_rxq(portid_t *pid)
1089 {
1090 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1091 	bool max_rxq_valid = false;
1092 	portid_t pi;
1093 	struct rte_eth_dev_info dev_info;
1094 
1095 	RTE_ETH_FOREACH_DEV(pi) {
1096 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1097 			continue;
1098 
1099 		max_rxq_valid = true;
1100 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1101 			allowed_max_rxq = dev_info.max_rx_queues;
1102 			*pid = pi;
1103 		}
1104 	}
1105 	return max_rxq_valid ? allowed_max_rxq : 0;
1106 }
1107 
1108 /*
1109  * Check input rxq is valid or not.
1110  * If input rxq is not greater than any of maximum number
1111  * of RX queues of all ports, it is valid.
1112  * if valid, return 0, else return -1
1113  */
1114 int
1115 check_nb_rxq(queueid_t rxq)
1116 {
1117 	queueid_t allowed_max_rxq;
1118 	portid_t pid = 0;
1119 
1120 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1121 	if (rxq > allowed_max_rxq) {
1122 		printf("Fail: input rxq (%u) can't be greater "
1123 		       "than max_rx_queues (%u) of port %u\n",
1124 		       rxq,
1125 		       allowed_max_rxq,
1126 		       pid);
1127 		return -1;
1128 	}
1129 	return 0;
1130 }
1131 
1132 /*
1133  * Get the allowed maximum number of TX queues.
1134  * *pid return the port id which has minimal value of
1135  * max_tx_queues in all ports.
1136  */
1137 queueid_t
1138 get_allowed_max_nb_txq(portid_t *pid)
1139 {
1140 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1141 	bool max_txq_valid = false;
1142 	portid_t pi;
1143 	struct rte_eth_dev_info dev_info;
1144 
1145 	RTE_ETH_FOREACH_DEV(pi) {
1146 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1147 			continue;
1148 
1149 		max_txq_valid = true;
1150 		if (dev_info.max_tx_queues < allowed_max_txq) {
1151 			allowed_max_txq = dev_info.max_tx_queues;
1152 			*pid = pi;
1153 		}
1154 	}
1155 	return max_txq_valid ? allowed_max_txq : 0;
1156 }
1157 
1158 /*
1159  * Check input txq is valid or not.
1160  * If input txq is not greater than any of maximum number
1161  * of TX queues of all ports, it is valid.
1162  * if valid, return 0, else return -1
1163  */
1164 int
1165 check_nb_txq(queueid_t txq)
1166 {
1167 	queueid_t allowed_max_txq;
1168 	portid_t pid = 0;
1169 
1170 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1171 	if (txq > allowed_max_txq) {
1172 		printf("Fail: input txq (%u) can't be greater "
1173 		       "than max_tx_queues (%u) of port %u\n",
1174 		       txq,
1175 		       allowed_max_txq,
1176 		       pid);
1177 		return -1;
1178 	}
1179 	return 0;
1180 }
1181 
1182 /*
1183  * Get the allowed maximum number of RXDs of every rx queue.
1184  * *pid return the port id which has minimal value of
1185  * max_rxd in all queues of all ports.
1186  */
1187 static uint16_t
1188 get_allowed_max_nb_rxd(portid_t *pid)
1189 {
1190 	uint16_t allowed_max_rxd = UINT16_MAX;
1191 	portid_t pi;
1192 	struct rte_eth_dev_info dev_info;
1193 
1194 	RTE_ETH_FOREACH_DEV(pi) {
1195 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1196 			continue;
1197 
1198 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1199 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1200 			*pid = pi;
1201 		}
1202 	}
1203 	return allowed_max_rxd;
1204 }
1205 
1206 /*
1207  * Get the allowed minimal number of RXDs of every rx queue.
1208  * *pid return the port id which has minimal value of
1209  * min_rxd in all queues of all ports.
1210  */
1211 static uint16_t
1212 get_allowed_min_nb_rxd(portid_t *pid)
1213 {
1214 	uint16_t allowed_min_rxd = 0;
1215 	portid_t pi;
1216 	struct rte_eth_dev_info dev_info;
1217 
1218 	RTE_ETH_FOREACH_DEV(pi) {
1219 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1220 			continue;
1221 
1222 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1223 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1224 			*pid = pi;
1225 		}
1226 	}
1227 
1228 	return allowed_min_rxd;
1229 }
1230 
1231 /*
1232  * Check input rxd is valid or not.
1233  * If input rxd is not greater than any of maximum number
1234  * of RXDs of every Rx queues and is not less than any of
1235  * minimal number of RXDs of every Rx queues, it is valid.
1236  * if valid, return 0, else return -1
1237  */
1238 int
1239 check_nb_rxd(queueid_t rxd)
1240 {
1241 	uint16_t allowed_max_rxd;
1242 	uint16_t allowed_min_rxd;
1243 	portid_t pid = 0;
1244 
1245 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1246 	if (rxd > allowed_max_rxd) {
1247 		printf("Fail: input rxd (%u) can't be greater "
1248 		       "than max_rxds (%u) of port %u\n",
1249 		       rxd,
1250 		       allowed_max_rxd,
1251 		       pid);
1252 		return -1;
1253 	}
1254 
1255 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1256 	if (rxd < allowed_min_rxd) {
1257 		printf("Fail: input rxd (%u) can't be less "
1258 		       "than min_rxds (%u) of port %u\n",
1259 		       rxd,
1260 		       allowed_min_rxd,
1261 		       pid);
1262 		return -1;
1263 	}
1264 
1265 	return 0;
1266 }
1267 
1268 /*
1269  * Get the allowed maximum number of TXDs of every rx queues.
1270  * *pid return the port id which has minimal value of
1271  * max_txd in every tx queue.
1272  */
1273 static uint16_t
1274 get_allowed_max_nb_txd(portid_t *pid)
1275 {
1276 	uint16_t allowed_max_txd = UINT16_MAX;
1277 	portid_t pi;
1278 	struct rte_eth_dev_info dev_info;
1279 
1280 	RTE_ETH_FOREACH_DEV(pi) {
1281 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1282 			continue;
1283 
1284 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1285 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1286 			*pid = pi;
1287 		}
1288 	}
1289 	return allowed_max_txd;
1290 }
1291 
1292 /*
1293  * Get the allowed maximum number of TXDs of every tx queues.
1294  * *pid return the port id which has minimal value of
1295  * min_txd in every tx queue.
1296  */
1297 static uint16_t
1298 get_allowed_min_nb_txd(portid_t *pid)
1299 {
1300 	uint16_t allowed_min_txd = 0;
1301 	portid_t pi;
1302 	struct rte_eth_dev_info dev_info;
1303 
1304 	RTE_ETH_FOREACH_DEV(pi) {
1305 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1306 			continue;
1307 
1308 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1309 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1310 			*pid = pi;
1311 		}
1312 	}
1313 
1314 	return allowed_min_txd;
1315 }
1316 
1317 /*
1318  * Check input txd is valid or not.
1319  * If input txd is not greater than any of maximum number
1320  * of TXDs of every Rx queues, it is valid.
1321  * if valid, return 0, else return -1
1322  */
1323 int
1324 check_nb_txd(queueid_t txd)
1325 {
1326 	uint16_t allowed_max_txd;
1327 	uint16_t allowed_min_txd;
1328 	portid_t pid = 0;
1329 
1330 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1331 	if (txd > allowed_max_txd) {
1332 		printf("Fail: input txd (%u) can't be greater "
1333 		       "than max_txds (%u) of port %u\n",
1334 		       txd,
1335 		       allowed_max_txd,
1336 		       pid);
1337 		return -1;
1338 	}
1339 
1340 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1341 	if (txd < allowed_min_txd) {
1342 		printf("Fail: input txd (%u) can't be less "
1343 		       "than min_txds (%u) of port %u\n",
1344 		       txd,
1345 		       allowed_min_txd,
1346 		       pid);
1347 		return -1;
1348 	}
1349 	return 0;
1350 }
1351 
1352 
1353 /*
1354  * Get the allowed maximum number of hairpin queues.
1355  * *pid return the port id which has minimal value of
1356  * max_hairpin_queues in all ports.
1357  */
1358 queueid_t
1359 get_allowed_max_nb_hairpinq(portid_t *pid)
1360 {
1361 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1362 	portid_t pi;
1363 	struct rte_eth_hairpin_cap cap;
1364 
1365 	RTE_ETH_FOREACH_DEV(pi) {
1366 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1367 			*pid = pi;
1368 			return 0;
1369 		}
1370 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1371 			allowed_max_hairpinq = cap.max_nb_queues;
1372 			*pid = pi;
1373 		}
1374 	}
1375 	return allowed_max_hairpinq;
1376 }
1377 
1378 /*
1379  * Check input hairpin is valid or not.
1380  * If input hairpin is not greater than any of maximum number
1381  * of hairpin queues of all ports, it is valid.
1382  * if valid, return 0, else return -1
1383  */
1384 int
1385 check_nb_hairpinq(queueid_t hairpinq)
1386 {
1387 	queueid_t allowed_max_hairpinq;
1388 	portid_t pid = 0;
1389 
1390 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1391 	if (hairpinq > allowed_max_hairpinq) {
1392 		printf("Fail: input hairpin (%u) can't be greater "
1393 		       "than max_hairpin_queues (%u) of port %u\n",
1394 		       hairpinq, allowed_max_hairpinq, pid);
1395 		return -1;
1396 	}
1397 	return 0;
1398 }
1399 
1400 static void
1401 init_config(void)
1402 {
1403 	portid_t pid;
1404 	struct rte_port *port;
1405 	struct rte_mempool *mbp;
1406 	unsigned int nb_mbuf_per_pool;
1407 	lcoreid_t  lc_id;
1408 	uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
1409 	struct rte_gro_param gro_param;
1410 	uint32_t gso_types;
1411 	uint16_t data_size;
1412 	bool warning = 0;
1413 	int k;
1414 	int ret;
1415 
1416 	memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
1417 
1418 	/* Configuration of logical cores. */
1419 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1420 				sizeof(struct fwd_lcore *) * nb_lcores,
1421 				RTE_CACHE_LINE_SIZE);
1422 	if (fwd_lcores == NULL) {
1423 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1424 							"failed\n", nb_lcores);
1425 	}
1426 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1427 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1428 					       sizeof(struct fwd_lcore),
1429 					       RTE_CACHE_LINE_SIZE);
1430 		if (fwd_lcores[lc_id] == NULL) {
1431 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1432 								"failed\n");
1433 		}
1434 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1435 	}
1436 
1437 	RTE_ETH_FOREACH_DEV(pid) {
1438 		port = &ports[pid];
1439 		/* Apply default TxRx configuration for all ports */
1440 		port->dev_conf.txmode = tx_mode;
1441 		port->dev_conf.rxmode = rx_mode;
1442 
1443 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1444 		if (ret != 0)
1445 			rte_exit(EXIT_FAILURE,
1446 				 "rte_eth_dev_info_get() failed\n");
1447 
1448 		if (!(port->dev_info.tx_offload_capa &
1449 		      DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1450 			port->dev_conf.txmode.offloads &=
1451 				~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1452 		if (numa_support) {
1453 			if (port_numa[pid] != NUMA_NO_CONFIG)
1454 				port_per_socket[port_numa[pid]]++;
1455 			else {
1456 				uint32_t socket_id = rte_eth_dev_socket_id(pid);
1457 
1458 				/*
1459 				 * if socket_id is invalid,
1460 				 * set to the first available socket.
1461 				 */
1462 				if (check_socket_id(socket_id) < 0)
1463 					socket_id = socket_ids[0];
1464 				port_per_socket[socket_id]++;
1465 			}
1466 		}
1467 
1468 		/* Apply Rx offloads configuration */
1469 		for (k = 0; k < port->dev_info.max_rx_queues; k++)
1470 			port->rx_conf[k].offloads =
1471 				port->dev_conf.rxmode.offloads;
1472 		/* Apply Tx offloads configuration */
1473 		for (k = 0; k < port->dev_info.max_tx_queues; k++)
1474 			port->tx_conf[k].offloads =
1475 				port->dev_conf.txmode.offloads;
1476 
1477 		/* set flag to initialize port/queue */
1478 		port->need_reconfig = 1;
1479 		port->need_reconfig_queues = 1;
1480 		port->tx_metadata = 0;
1481 
1482 		/* Check for maximum number of segments per MTU. Accordingly
1483 		 * update the mbuf data size.
1484 		 */
1485 		if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1486 				port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1487 			data_size = rx_mode.max_rx_pkt_len /
1488 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1489 
1490 			if ((data_size + RTE_PKTMBUF_HEADROOM) >
1491 							mbuf_data_size) {
1492 				mbuf_data_size = data_size +
1493 						 RTE_PKTMBUF_HEADROOM;
1494 				warning = 1;
1495 			}
1496 		}
1497 	}
1498 
1499 	if (warning)
1500 		TESTPMD_LOG(WARNING, "Configured mbuf size %hu\n",
1501 			    mbuf_data_size);
1502 
1503 	/*
1504 	 * Create pools of mbuf.
1505 	 * If NUMA support is disabled, create a single pool of mbuf in
1506 	 * socket 0 memory by default.
1507 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1508 	 *
1509 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1510 	 * nb_txd can be configured at run time.
1511 	 */
1512 	if (param_total_num_mbufs)
1513 		nb_mbuf_per_pool = param_total_num_mbufs;
1514 	else {
1515 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1516 			(nb_lcores * mb_mempool_cache) +
1517 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1518 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1519 	}
1520 
1521 	if (numa_support) {
1522 		uint8_t i;
1523 
1524 		for (i = 0; i < num_sockets; i++)
1525 			mempools[i] = mbuf_pool_create(mbuf_data_size,
1526 						       nb_mbuf_per_pool,
1527 						       socket_ids[i]);
1528 	} else {
1529 		if (socket_num == UMA_NO_CONFIG)
1530 			mempools[0] = mbuf_pool_create(mbuf_data_size,
1531 						       nb_mbuf_per_pool, 0);
1532 		else
1533 			mempools[socket_num] = mbuf_pool_create
1534 							(mbuf_data_size,
1535 							 nb_mbuf_per_pool,
1536 							 socket_num);
1537 	}
1538 
1539 	init_port_config();
1540 
1541 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1542 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1543 	/*
1544 	 * Records which Mbuf pool to use by each logical core, if needed.
1545 	 */
1546 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1547 		mbp = mbuf_pool_find(
1548 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]));
1549 
1550 		if (mbp == NULL)
1551 			mbp = mbuf_pool_find(0);
1552 		fwd_lcores[lc_id]->mbp = mbp;
1553 		/* initialize GSO context */
1554 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1555 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1556 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1557 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1558 			RTE_ETHER_CRC_LEN;
1559 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1560 	}
1561 
1562 	/* Configuration of packet forwarding streams. */
1563 	if (init_fwd_streams() < 0)
1564 		rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
1565 
1566 	fwd_config_setup();
1567 
1568 	/* create a gro context for each lcore */
1569 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1570 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1571 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1572 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1573 		gro_param.socket_id = rte_lcore_to_socket_id(
1574 				fwd_lcores_cpuids[lc_id]);
1575 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1576 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1577 			rte_exit(EXIT_FAILURE,
1578 					"rte_gro_ctx_create() failed\n");
1579 		}
1580 	}
1581 }
1582 
1583 
1584 void
1585 reconfig(portid_t new_port_id, unsigned socket_id)
1586 {
1587 	struct rte_port *port;
1588 	int ret;
1589 
1590 	/* Reconfiguration of Ethernet ports. */
1591 	port = &ports[new_port_id];
1592 
1593 	ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
1594 	if (ret != 0)
1595 		return;
1596 
1597 	/* set flag to initialize port/queue */
1598 	port->need_reconfig = 1;
1599 	port->need_reconfig_queues = 1;
1600 	port->socket_id = socket_id;
1601 
1602 	init_port_config();
1603 }
1604 
1605 
1606 int
1607 init_fwd_streams(void)
1608 {
1609 	portid_t pid;
1610 	struct rte_port *port;
1611 	streamid_t sm_id, nb_fwd_streams_new;
1612 	queueid_t q;
1613 
1614 	/* set socket id according to numa or not */
1615 	RTE_ETH_FOREACH_DEV(pid) {
1616 		port = &ports[pid];
1617 		if (nb_rxq > port->dev_info.max_rx_queues) {
1618 			printf("Fail: nb_rxq(%d) is greater than "
1619 				"max_rx_queues(%d)\n", nb_rxq,
1620 				port->dev_info.max_rx_queues);
1621 			return -1;
1622 		}
1623 		if (nb_txq > port->dev_info.max_tx_queues) {
1624 			printf("Fail: nb_txq(%d) is greater than "
1625 				"max_tx_queues(%d)\n", nb_txq,
1626 				port->dev_info.max_tx_queues);
1627 			return -1;
1628 		}
1629 		if (numa_support) {
1630 			if (port_numa[pid] != NUMA_NO_CONFIG)
1631 				port->socket_id = port_numa[pid];
1632 			else {
1633 				port->socket_id = rte_eth_dev_socket_id(pid);
1634 
1635 				/*
1636 				 * if socket_id is invalid,
1637 				 * set to the first available socket.
1638 				 */
1639 				if (check_socket_id(port->socket_id) < 0)
1640 					port->socket_id = socket_ids[0];
1641 			}
1642 		}
1643 		else {
1644 			if (socket_num == UMA_NO_CONFIG)
1645 				port->socket_id = 0;
1646 			else
1647 				port->socket_id = socket_num;
1648 		}
1649 	}
1650 
1651 	q = RTE_MAX(nb_rxq, nb_txq);
1652 	if (q == 0) {
1653 		printf("Fail: Cannot allocate fwd streams as number of queues is 0\n");
1654 		return -1;
1655 	}
1656 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1657 	if (nb_fwd_streams_new == nb_fwd_streams)
1658 		return 0;
1659 	/* clear the old */
1660 	if (fwd_streams != NULL) {
1661 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1662 			if (fwd_streams[sm_id] == NULL)
1663 				continue;
1664 			rte_free(fwd_streams[sm_id]);
1665 			fwd_streams[sm_id] = NULL;
1666 		}
1667 		rte_free(fwd_streams);
1668 		fwd_streams = NULL;
1669 	}
1670 
1671 	/* init new */
1672 	nb_fwd_streams = nb_fwd_streams_new;
1673 	if (nb_fwd_streams) {
1674 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1675 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1676 			RTE_CACHE_LINE_SIZE);
1677 		if (fwd_streams == NULL)
1678 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1679 				 " (struct fwd_stream *)) failed\n",
1680 				 nb_fwd_streams);
1681 
1682 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1683 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1684 				" struct fwd_stream", sizeof(struct fwd_stream),
1685 				RTE_CACHE_LINE_SIZE);
1686 			if (fwd_streams[sm_id] == NULL)
1687 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1688 					 "(struct fwd_stream) failed\n");
1689 		}
1690 	}
1691 
1692 	return 0;
1693 }
1694 
1695 static void
1696 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1697 {
1698 	uint64_t total_burst, sburst;
1699 	uint64_t nb_burst;
1700 	uint64_t burst_stats[4];
1701 	uint16_t pktnb_stats[4];
1702 	uint16_t nb_pkt;
1703 	int burst_percent[4], sburstp;
1704 	int i;
1705 
1706 	/*
1707 	 * First compute the total number of packet bursts and the
1708 	 * two highest numbers of bursts of the same number of packets.
1709 	 */
1710 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1711 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1712 
1713 	/* Show stats for 0 burst size always */
1714 	total_burst = pbs->pkt_burst_spread[0];
1715 	burst_stats[0] = pbs->pkt_burst_spread[0];
1716 	pktnb_stats[0] = 0;
1717 
1718 	/* Find the next 2 burst sizes with highest occurrences. */
1719 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1720 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1721 
1722 		if (nb_burst == 0)
1723 			continue;
1724 
1725 		total_burst += nb_burst;
1726 
1727 		if (nb_burst > burst_stats[1]) {
1728 			burst_stats[2] = burst_stats[1];
1729 			pktnb_stats[2] = pktnb_stats[1];
1730 			burst_stats[1] = nb_burst;
1731 			pktnb_stats[1] = nb_pkt;
1732 		} else if (nb_burst > burst_stats[2]) {
1733 			burst_stats[2] = nb_burst;
1734 			pktnb_stats[2] = nb_pkt;
1735 		}
1736 	}
1737 	if (total_burst == 0)
1738 		return;
1739 
1740 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1741 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1742 		if (i == 3) {
1743 			printf("%d%% of other]\n", 100 - sburstp);
1744 			return;
1745 		}
1746 
1747 		sburst += burst_stats[i];
1748 		if (sburst == total_burst) {
1749 			printf("%d%% of %d pkts]\n",
1750 				100 - sburstp, (int) pktnb_stats[i]);
1751 			return;
1752 		}
1753 
1754 		burst_percent[i] =
1755 			(double)burst_stats[i] / total_burst * 100;
1756 		printf("%d%% of %d pkts + ",
1757 			burst_percent[i], (int) pktnb_stats[i]);
1758 		sburstp += burst_percent[i];
1759 	}
1760 }
1761 
1762 static void
1763 fwd_stream_stats_display(streamid_t stream_id)
1764 {
1765 	struct fwd_stream *fs;
1766 	static const char *fwd_top_stats_border = "-------";
1767 
1768 	fs = fwd_streams[stream_id];
1769 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1770 	    (fs->fwd_dropped == 0))
1771 		return;
1772 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1773 	       "TX Port=%2d/Queue=%2d %s\n",
1774 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1775 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1776 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1777 	       " TX-dropped: %-14"PRIu64,
1778 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1779 
1780 	/* if checksum mode */
1781 	if (cur_fwd_eng == &csum_fwd_engine) {
1782 		printf("  RX- bad IP checksum: %-14"PRIu64
1783 		       "  Rx- bad L4 checksum: %-14"PRIu64
1784 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1785 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1786 			fs->rx_bad_outer_l4_csum);
1787 	} else {
1788 		printf("\n");
1789 	}
1790 
1791 	if (record_burst_stats) {
1792 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1793 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1794 	}
1795 }
1796 
1797 void
1798 fwd_stats_display(void)
1799 {
1800 	static const char *fwd_stats_border = "----------------------";
1801 	static const char *acc_stats_border = "+++++++++++++++";
1802 	struct {
1803 		struct fwd_stream *rx_stream;
1804 		struct fwd_stream *tx_stream;
1805 		uint64_t tx_dropped;
1806 		uint64_t rx_bad_ip_csum;
1807 		uint64_t rx_bad_l4_csum;
1808 		uint64_t rx_bad_outer_l4_csum;
1809 	} ports_stats[RTE_MAX_ETHPORTS];
1810 	uint64_t total_rx_dropped = 0;
1811 	uint64_t total_tx_dropped = 0;
1812 	uint64_t total_rx_nombuf = 0;
1813 	struct rte_eth_stats stats;
1814 	uint64_t fwd_cycles = 0;
1815 	uint64_t total_recv = 0;
1816 	uint64_t total_xmit = 0;
1817 	struct rte_port *port;
1818 	streamid_t sm_id;
1819 	portid_t pt_id;
1820 	int i;
1821 
1822 	memset(ports_stats, 0, sizeof(ports_stats));
1823 
1824 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1825 		struct fwd_stream *fs = fwd_streams[sm_id];
1826 
1827 		if (cur_fwd_config.nb_fwd_streams >
1828 		    cur_fwd_config.nb_fwd_ports) {
1829 			fwd_stream_stats_display(sm_id);
1830 		} else {
1831 			ports_stats[fs->tx_port].tx_stream = fs;
1832 			ports_stats[fs->rx_port].rx_stream = fs;
1833 		}
1834 
1835 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1836 
1837 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1838 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1839 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1840 				fs->rx_bad_outer_l4_csum;
1841 
1842 		if (record_core_cycles)
1843 			fwd_cycles += fs->core_cycles;
1844 	}
1845 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1846 		uint8_t j;
1847 
1848 		pt_id = fwd_ports_ids[i];
1849 		port = &ports[pt_id];
1850 
1851 		rte_eth_stats_get(pt_id, &stats);
1852 		stats.ipackets -= port->stats.ipackets;
1853 		stats.opackets -= port->stats.opackets;
1854 		stats.ibytes -= port->stats.ibytes;
1855 		stats.obytes -= port->stats.obytes;
1856 		stats.imissed -= port->stats.imissed;
1857 		stats.oerrors -= port->stats.oerrors;
1858 		stats.rx_nombuf -= port->stats.rx_nombuf;
1859 
1860 		total_recv += stats.ipackets;
1861 		total_xmit += stats.opackets;
1862 		total_rx_dropped += stats.imissed;
1863 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
1864 		total_tx_dropped += stats.oerrors;
1865 		total_rx_nombuf  += stats.rx_nombuf;
1866 
1867 		printf("\n  %s Forward statistics for port %-2d %s\n",
1868 		       fwd_stats_border, pt_id, fwd_stats_border);
1869 
1870 		if (!port->rx_queue_stats_mapping_enabled &&
1871 		    !port->tx_queue_stats_mapping_enabled) {
1872 			printf("  RX-packets: %-14"PRIu64
1873 			       " RX-dropped: %-14"PRIu64
1874 			       "RX-total: %-"PRIu64"\n",
1875 			       stats.ipackets, stats.imissed,
1876 			       stats.ipackets + stats.imissed);
1877 
1878 			if (cur_fwd_eng == &csum_fwd_engine)
1879 				printf("  Bad-ipcsum: %-14"PRIu64
1880 				       " Bad-l4csum: %-14"PRIu64
1881 				       "Bad-outer-l4csum: %-14"PRIu64"\n",
1882 				       ports_stats[pt_id].rx_bad_ip_csum,
1883 				       ports_stats[pt_id].rx_bad_l4_csum,
1884 				       ports_stats[pt_id].rx_bad_outer_l4_csum);
1885 			if (stats.ierrors + stats.rx_nombuf > 0) {
1886 				printf("  RX-error: %-"PRIu64"\n",
1887 				       stats.ierrors);
1888 				printf("  RX-nombufs: %-14"PRIu64"\n",
1889 				       stats.rx_nombuf);
1890 			}
1891 
1892 			printf("  TX-packets: %-14"PRIu64
1893 			       " TX-dropped: %-14"PRIu64
1894 			       "TX-total: %-"PRIu64"\n",
1895 			       stats.opackets, ports_stats[pt_id].tx_dropped,
1896 			       stats.opackets + ports_stats[pt_id].tx_dropped);
1897 		} else {
1898 			printf("  RX-packets:             %14"PRIu64
1899 			       "    RX-dropped:%14"PRIu64
1900 			       "    RX-total:%14"PRIu64"\n",
1901 			       stats.ipackets, stats.imissed,
1902 			       stats.ipackets + stats.imissed);
1903 
1904 			if (cur_fwd_eng == &csum_fwd_engine)
1905 				printf("  Bad-ipcsum:%14"PRIu64
1906 				       "    Bad-l4csum:%14"PRIu64
1907 				       "    Bad-outer-l4csum: %-14"PRIu64"\n",
1908 				       ports_stats[pt_id].rx_bad_ip_csum,
1909 				       ports_stats[pt_id].rx_bad_l4_csum,
1910 				       ports_stats[pt_id].rx_bad_outer_l4_csum);
1911 			if ((stats.ierrors + stats.rx_nombuf) > 0) {
1912 				printf("  RX-error:%"PRIu64"\n", stats.ierrors);
1913 				printf("  RX-nombufs:             %14"PRIu64"\n",
1914 				       stats.rx_nombuf);
1915 			}
1916 
1917 			printf("  TX-packets:             %14"PRIu64
1918 			       "    TX-dropped:%14"PRIu64
1919 			       "    TX-total:%14"PRIu64"\n",
1920 			       stats.opackets, ports_stats[pt_id].tx_dropped,
1921 			       stats.opackets + ports_stats[pt_id].tx_dropped);
1922 		}
1923 
1924 		if (record_burst_stats) {
1925 			if (ports_stats[pt_id].rx_stream)
1926 				pkt_burst_stats_display("RX",
1927 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
1928 			if (ports_stats[pt_id].tx_stream)
1929 				pkt_burst_stats_display("TX",
1930 					&ports_stats[pt_id].tx_stream->tx_burst_stats);
1931 		}
1932 
1933 		if (port->rx_queue_stats_mapping_enabled) {
1934 			printf("\n");
1935 			for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1936 				printf("  Stats reg %2d RX-packets:%14"PRIu64
1937 				       "     RX-errors:%14"PRIu64
1938 				       "    RX-bytes:%14"PRIu64"\n",
1939 				       j, stats.q_ipackets[j],
1940 				       stats.q_errors[j], stats.q_ibytes[j]);
1941 			}
1942 			printf("\n");
1943 		}
1944 		if (port->tx_queue_stats_mapping_enabled) {
1945 			for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1946 				printf("  Stats reg %2d TX-packets:%14"PRIu64
1947 				       "                                 TX-bytes:%14"
1948 				       PRIu64"\n",
1949 				       j, stats.q_opackets[j],
1950 				       stats.q_obytes[j]);
1951 			}
1952 		}
1953 
1954 		printf("  %s--------------------------------%s\n",
1955 		       fwd_stats_border, fwd_stats_border);
1956 	}
1957 
1958 	printf("\n  %s Accumulated forward statistics for all ports"
1959 	       "%s\n",
1960 	       acc_stats_border, acc_stats_border);
1961 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1962 	       "%-"PRIu64"\n"
1963 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1964 	       "%-"PRIu64"\n",
1965 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1966 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1967 	if (total_rx_nombuf > 0)
1968 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1969 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1970 	       "%s\n",
1971 	       acc_stats_border, acc_stats_border);
1972 	if (record_core_cycles) {
1973 #define CYC_PER_MHZ 1E6
1974 		if (total_recv > 0 || total_xmit > 0) {
1975 			uint64_t total_pkts = 0;
1976 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
1977 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
1978 				total_pkts = total_xmit;
1979 			else
1980 				total_pkts = total_recv;
1981 
1982 			printf("\n  CPU cycles/packet=%.2F (total cycles="
1983 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
1984 			       " MHz Clock\n",
1985 			       (double) fwd_cycles / total_pkts,
1986 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
1987 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
1988 		}
1989 	}
1990 }
1991 
1992 void
1993 fwd_stats_reset(void)
1994 {
1995 	streamid_t sm_id;
1996 	portid_t pt_id;
1997 	int i;
1998 
1999 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2000 		pt_id = fwd_ports_ids[i];
2001 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
2002 	}
2003 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
2004 		struct fwd_stream *fs = fwd_streams[sm_id];
2005 
2006 		fs->rx_packets = 0;
2007 		fs->tx_packets = 0;
2008 		fs->fwd_dropped = 0;
2009 		fs->rx_bad_ip_csum = 0;
2010 		fs->rx_bad_l4_csum = 0;
2011 		fs->rx_bad_outer_l4_csum = 0;
2012 
2013 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
2014 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
2015 		fs->core_cycles = 0;
2016 	}
2017 }
2018 
2019 static void
2020 flush_fwd_rx_queues(void)
2021 {
2022 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2023 	portid_t  rxp;
2024 	portid_t port_id;
2025 	queueid_t rxq;
2026 	uint16_t  nb_rx;
2027 	uint16_t  i;
2028 	uint8_t   j;
2029 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
2030 	uint64_t timer_period;
2031 
2032 	/* convert to number of cycles */
2033 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2034 
2035 	for (j = 0; j < 2; j++) {
2036 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2037 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2038 				port_id = fwd_ports_ids[rxp];
2039 				/**
2040 				* testpmd can stuck in the below do while loop
2041 				* if rte_eth_rx_burst() always returns nonzero
2042 				* packets. So timer is added to exit this loop
2043 				* after 1sec timer expiry.
2044 				*/
2045 				prev_tsc = rte_rdtsc();
2046 				do {
2047 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2048 						pkts_burst, MAX_PKT_BURST);
2049 					for (i = 0; i < nb_rx; i++)
2050 						rte_pktmbuf_free(pkts_burst[i]);
2051 
2052 					cur_tsc = rte_rdtsc();
2053 					diff_tsc = cur_tsc - prev_tsc;
2054 					timer_tsc += diff_tsc;
2055 				} while ((nb_rx > 0) &&
2056 					(timer_tsc < timer_period));
2057 				timer_tsc = 0;
2058 			}
2059 		}
2060 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2061 	}
2062 }
2063 
2064 static void
2065 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2066 {
2067 	struct fwd_stream **fsm;
2068 	streamid_t nb_fs;
2069 	streamid_t sm_id;
2070 #ifdef RTE_LIBRTE_BITRATESTATS
2071 	uint64_t tics_per_1sec;
2072 	uint64_t tics_datum;
2073 	uint64_t tics_current;
2074 	uint16_t i, cnt_ports;
2075 
2076 	cnt_ports = nb_ports;
2077 	tics_datum = rte_rdtsc();
2078 	tics_per_1sec = rte_get_timer_hz();
2079 #endif
2080 	fsm = &fwd_streams[fc->stream_idx];
2081 	nb_fs = fc->stream_nb;
2082 	do {
2083 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2084 			(*pkt_fwd)(fsm[sm_id]);
2085 #ifdef RTE_LIBRTE_BITRATESTATS
2086 		if (bitrate_enabled != 0 &&
2087 				bitrate_lcore_id == rte_lcore_id()) {
2088 			tics_current = rte_rdtsc();
2089 			if (tics_current - tics_datum >= tics_per_1sec) {
2090 				/* Periodic bitrate calculation */
2091 				for (i = 0; i < cnt_ports; i++)
2092 					rte_stats_bitrate_calc(bitrate_data,
2093 						ports_ids[i]);
2094 				tics_datum = tics_current;
2095 			}
2096 		}
2097 #endif
2098 #ifdef RTE_LIBRTE_LATENCY_STATS
2099 		if (latencystats_enabled != 0 &&
2100 				latencystats_lcore_id == rte_lcore_id())
2101 			rte_latencystats_update();
2102 #endif
2103 
2104 	} while (! fc->stopped);
2105 }
2106 
2107 static int
2108 start_pkt_forward_on_core(void *fwd_arg)
2109 {
2110 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2111 			     cur_fwd_config.fwd_eng->packet_fwd);
2112 	return 0;
2113 }
2114 
2115 /*
2116  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2117  * Used to start communication flows in network loopback test configurations.
2118  */
2119 static int
2120 run_one_txonly_burst_on_core(void *fwd_arg)
2121 {
2122 	struct fwd_lcore *fwd_lc;
2123 	struct fwd_lcore tmp_lcore;
2124 
2125 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2126 	tmp_lcore = *fwd_lc;
2127 	tmp_lcore.stopped = 1;
2128 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2129 	return 0;
2130 }
2131 
2132 /*
2133  * Launch packet forwarding:
2134  *     - Setup per-port forwarding context.
2135  *     - launch logical cores with their forwarding configuration.
2136  */
2137 static void
2138 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2139 {
2140 	port_fwd_begin_t port_fwd_begin;
2141 	unsigned int i;
2142 	unsigned int lc_id;
2143 	int diag;
2144 
2145 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2146 	if (port_fwd_begin != NULL) {
2147 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2148 			(*port_fwd_begin)(fwd_ports_ids[i]);
2149 	}
2150 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2151 		lc_id = fwd_lcores_cpuids[i];
2152 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2153 			fwd_lcores[i]->stopped = 0;
2154 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2155 						     fwd_lcores[i], lc_id);
2156 			if (diag != 0)
2157 				printf("launch lcore %u failed - diag=%d\n",
2158 				       lc_id, diag);
2159 		}
2160 	}
2161 }
2162 
2163 /*
2164  * Launch packet forwarding configuration.
2165  */
2166 void
2167 start_packet_forwarding(int with_tx_first)
2168 {
2169 	port_fwd_begin_t port_fwd_begin;
2170 	port_fwd_end_t  port_fwd_end;
2171 	struct rte_port *port;
2172 	unsigned int i;
2173 	portid_t   pt_id;
2174 
2175 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2176 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2177 
2178 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2179 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2180 
2181 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2182 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2183 		(!nb_rxq || !nb_txq))
2184 		rte_exit(EXIT_FAILURE,
2185 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2186 			cur_fwd_eng->fwd_mode_name);
2187 
2188 	if (all_ports_started() == 0) {
2189 		printf("Not all ports were started\n");
2190 		return;
2191 	}
2192 	if (test_done == 0) {
2193 		printf("Packet forwarding already started\n");
2194 		return;
2195 	}
2196 
2197 
2198 	if(dcb_test) {
2199 		for (i = 0; i < nb_fwd_ports; i++) {
2200 			pt_id = fwd_ports_ids[i];
2201 			port = &ports[pt_id];
2202 			if (!port->dcb_flag) {
2203 				printf("In DCB mode, all forwarding ports must "
2204                                        "be configured in this mode.\n");
2205 				return;
2206 			}
2207 		}
2208 		if (nb_fwd_lcores == 1) {
2209 			printf("In DCB mode,the nb forwarding cores "
2210                                "should be larger than 1.\n");
2211 			return;
2212 		}
2213 	}
2214 	test_done = 0;
2215 
2216 	fwd_config_setup();
2217 
2218 	if(!no_flush_rx)
2219 		flush_fwd_rx_queues();
2220 
2221 	pkt_fwd_config_display(&cur_fwd_config);
2222 	rxtx_config_display();
2223 
2224 	fwd_stats_reset();
2225 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2226 		pt_id = fwd_ports_ids[i];
2227 		port = &ports[pt_id];
2228 		map_port_queue_stats_mapping_registers(pt_id, port);
2229 	}
2230 	if (with_tx_first) {
2231 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2232 		if (port_fwd_begin != NULL) {
2233 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2234 				(*port_fwd_begin)(fwd_ports_ids[i]);
2235 		}
2236 		while (with_tx_first--) {
2237 			launch_packet_forwarding(
2238 					run_one_txonly_burst_on_core);
2239 			rte_eal_mp_wait_lcore();
2240 		}
2241 		port_fwd_end = tx_only_engine.port_fwd_end;
2242 		if (port_fwd_end != NULL) {
2243 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2244 				(*port_fwd_end)(fwd_ports_ids[i]);
2245 		}
2246 	}
2247 	launch_packet_forwarding(start_pkt_forward_on_core);
2248 }
2249 
2250 void
2251 stop_packet_forwarding(void)
2252 {
2253 	port_fwd_end_t port_fwd_end;
2254 	lcoreid_t lc_id;
2255 	portid_t pt_id;
2256 	int i;
2257 
2258 	if (test_done) {
2259 		printf("Packet forwarding not started\n");
2260 		return;
2261 	}
2262 	printf("Telling cores to stop...");
2263 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2264 		fwd_lcores[lc_id]->stopped = 1;
2265 	printf("\nWaiting for lcores to finish...\n");
2266 	rte_eal_mp_wait_lcore();
2267 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2268 	if (port_fwd_end != NULL) {
2269 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2270 			pt_id = fwd_ports_ids[i];
2271 			(*port_fwd_end)(pt_id);
2272 		}
2273 	}
2274 
2275 	fwd_stats_display();
2276 
2277 	printf("\nDone.\n");
2278 	test_done = 1;
2279 }
2280 
2281 void
2282 dev_set_link_up(portid_t pid)
2283 {
2284 	if (rte_eth_dev_set_link_up(pid) < 0)
2285 		printf("\nSet link up fail.\n");
2286 }
2287 
2288 void
2289 dev_set_link_down(portid_t pid)
2290 {
2291 	if (rte_eth_dev_set_link_down(pid) < 0)
2292 		printf("\nSet link down fail.\n");
2293 }
2294 
2295 static int
2296 all_ports_started(void)
2297 {
2298 	portid_t pi;
2299 	struct rte_port *port;
2300 
2301 	RTE_ETH_FOREACH_DEV(pi) {
2302 		port = &ports[pi];
2303 		/* Check if there is a port which is not started */
2304 		if ((port->port_status != RTE_PORT_STARTED) &&
2305 			(port->slave_flag == 0))
2306 			return 0;
2307 	}
2308 
2309 	/* No port is not started */
2310 	return 1;
2311 }
2312 
2313 int
2314 port_is_stopped(portid_t port_id)
2315 {
2316 	struct rte_port *port = &ports[port_id];
2317 
2318 	if ((port->port_status != RTE_PORT_STOPPED) &&
2319 	    (port->slave_flag == 0))
2320 		return 0;
2321 	return 1;
2322 }
2323 
2324 int
2325 all_ports_stopped(void)
2326 {
2327 	portid_t pi;
2328 
2329 	RTE_ETH_FOREACH_DEV(pi) {
2330 		if (!port_is_stopped(pi))
2331 			return 0;
2332 	}
2333 
2334 	return 1;
2335 }
2336 
2337 int
2338 port_is_started(portid_t port_id)
2339 {
2340 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2341 		return 0;
2342 
2343 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2344 		return 0;
2345 
2346 	return 1;
2347 }
2348 
2349 /* Configure the Rx and Tx hairpin queues for the selected port. */
2350 static int
2351 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2352 {
2353 	queueid_t qi;
2354 	struct rte_eth_hairpin_conf hairpin_conf = {
2355 		.peer_count = 1,
2356 	};
2357 	int i;
2358 	int diag;
2359 	struct rte_port *port = &ports[pi];
2360 	uint16_t peer_rx_port = pi;
2361 	uint16_t peer_tx_port = pi;
2362 	uint32_t manual = 1;
2363 	uint32_t tx_exp = hairpin_mode & 0x10;
2364 
2365 	if (!(hairpin_mode & 0xf)) {
2366 		peer_rx_port = pi;
2367 		peer_tx_port = pi;
2368 		manual = 0;
2369 	} else if (hairpin_mode & 0x1) {
2370 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2371 						       RTE_ETH_DEV_NO_OWNER);
2372 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2373 			peer_tx_port = rte_eth_find_next_owned_by(0,
2374 						RTE_ETH_DEV_NO_OWNER);
2375 		if (p_pi != RTE_MAX_ETHPORTS) {
2376 			peer_rx_port = p_pi;
2377 		} else {
2378 			uint16_t next_pi;
2379 
2380 			/* Last port will be the peer RX port of the first. */
2381 			RTE_ETH_FOREACH_DEV(next_pi)
2382 				peer_rx_port = next_pi;
2383 		}
2384 		manual = 1;
2385 	} else if (hairpin_mode & 0x2) {
2386 		if (cnt_pi & 0x1) {
2387 			peer_rx_port = p_pi;
2388 		} else {
2389 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2390 						RTE_ETH_DEV_NO_OWNER);
2391 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2392 				peer_rx_port = pi;
2393 		}
2394 		peer_tx_port = peer_rx_port;
2395 		manual = 1;
2396 	}
2397 
2398 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2399 		hairpin_conf.peers[0].port = peer_rx_port;
2400 		hairpin_conf.peers[0].queue = i + nb_rxq;
2401 		hairpin_conf.manual_bind = !!manual;
2402 		hairpin_conf.tx_explicit = !!tx_exp;
2403 		diag = rte_eth_tx_hairpin_queue_setup
2404 			(pi, qi, nb_txd, &hairpin_conf);
2405 		i++;
2406 		if (diag == 0)
2407 			continue;
2408 
2409 		/* Fail to setup rx queue, return */
2410 		if (rte_atomic16_cmpset(&(port->port_status),
2411 					RTE_PORT_HANDLING,
2412 					RTE_PORT_STOPPED) == 0)
2413 			printf("Port %d can not be set back "
2414 					"to stopped\n", pi);
2415 		printf("Fail to configure port %d hairpin "
2416 				"queues\n", pi);
2417 		/* try to reconfigure queues next time */
2418 		port->need_reconfig_queues = 1;
2419 		return -1;
2420 	}
2421 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2422 		hairpin_conf.peers[0].port = peer_tx_port;
2423 		hairpin_conf.peers[0].queue = i + nb_txq;
2424 		hairpin_conf.manual_bind = !!manual;
2425 		hairpin_conf.tx_explicit = !!tx_exp;
2426 		diag = rte_eth_rx_hairpin_queue_setup
2427 			(pi, qi, nb_rxd, &hairpin_conf);
2428 		i++;
2429 		if (diag == 0)
2430 			continue;
2431 
2432 		/* Fail to setup rx queue, return */
2433 		if (rte_atomic16_cmpset(&(port->port_status),
2434 					RTE_PORT_HANDLING,
2435 					RTE_PORT_STOPPED) == 0)
2436 			printf("Port %d can not be set back "
2437 					"to stopped\n", pi);
2438 		printf("Fail to configure port %d hairpin "
2439 				"queues\n", pi);
2440 		/* try to reconfigure queues next time */
2441 		port->need_reconfig_queues = 1;
2442 		return -1;
2443 	}
2444 	return 0;
2445 }
2446 
2447 int
2448 start_port(portid_t pid)
2449 {
2450 	int diag, need_check_link_status = -1;
2451 	portid_t pi;
2452 	portid_t p_pi = RTE_MAX_ETHPORTS;
2453 	portid_t pl[RTE_MAX_ETHPORTS];
2454 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2455 	uint16_t cnt_pi = 0;
2456 	uint16_t cfg_pi = 0;
2457 	int peer_pi;
2458 	queueid_t qi;
2459 	struct rte_port *port;
2460 	struct rte_ether_addr mac_addr;
2461 	struct rte_eth_hairpin_cap cap;
2462 
2463 	if (port_id_is_invalid(pid, ENABLED_WARN))
2464 		return 0;
2465 
2466 	if(dcb_config)
2467 		dcb_test = 1;
2468 	RTE_ETH_FOREACH_DEV(pi) {
2469 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2470 			continue;
2471 
2472 		need_check_link_status = 0;
2473 		port = &ports[pi];
2474 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2475 						 RTE_PORT_HANDLING) == 0) {
2476 			printf("Port %d is now not stopped\n", pi);
2477 			continue;
2478 		}
2479 
2480 		if (port->need_reconfig > 0) {
2481 			port->need_reconfig = 0;
2482 
2483 			if (flow_isolate_all) {
2484 				int ret = port_flow_isolate(pi, 1);
2485 				if (ret) {
2486 					printf("Failed to apply isolated"
2487 					       " mode on port %d\n", pi);
2488 					return -1;
2489 				}
2490 			}
2491 			configure_rxtx_dump_callbacks(0);
2492 			printf("Configuring Port %d (socket %u)\n", pi,
2493 					port->socket_id);
2494 			if (nb_hairpinq > 0 &&
2495 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2496 				printf("Port %d doesn't support hairpin "
2497 				       "queues\n", pi);
2498 				return -1;
2499 			}
2500 			/* configure port */
2501 			diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2502 						     nb_txq + nb_hairpinq,
2503 						     &(port->dev_conf));
2504 			if (diag != 0) {
2505 				if (rte_atomic16_cmpset(&(port->port_status),
2506 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2507 					printf("Port %d can not be set back "
2508 							"to stopped\n", pi);
2509 				printf("Fail to configure port %d\n", pi);
2510 				/* try to reconfigure port next time */
2511 				port->need_reconfig = 1;
2512 				return -1;
2513 			}
2514 		}
2515 		if (port->need_reconfig_queues > 0) {
2516 			port->need_reconfig_queues = 0;
2517 			/* setup tx queues */
2518 			for (qi = 0; qi < nb_txq; qi++) {
2519 				if ((numa_support) &&
2520 					(txring_numa[pi] != NUMA_NO_CONFIG))
2521 					diag = rte_eth_tx_queue_setup(pi, qi,
2522 						port->nb_tx_desc[qi],
2523 						txring_numa[pi],
2524 						&(port->tx_conf[qi]));
2525 				else
2526 					diag = rte_eth_tx_queue_setup(pi, qi,
2527 						port->nb_tx_desc[qi],
2528 						port->socket_id,
2529 						&(port->tx_conf[qi]));
2530 
2531 				if (diag == 0)
2532 					continue;
2533 
2534 				/* Fail to setup tx queue, return */
2535 				if (rte_atomic16_cmpset(&(port->port_status),
2536 							RTE_PORT_HANDLING,
2537 							RTE_PORT_STOPPED) == 0)
2538 					printf("Port %d can not be set back "
2539 							"to stopped\n", pi);
2540 				printf("Fail to configure port %d tx queues\n",
2541 				       pi);
2542 				/* try to reconfigure queues next time */
2543 				port->need_reconfig_queues = 1;
2544 				return -1;
2545 			}
2546 			for (qi = 0; qi < nb_rxq; qi++) {
2547 				/* setup rx queues */
2548 				if ((numa_support) &&
2549 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2550 					struct rte_mempool * mp =
2551 						mbuf_pool_find(rxring_numa[pi]);
2552 					if (mp == NULL) {
2553 						printf("Failed to setup RX queue:"
2554 							"No mempool allocation"
2555 							" on the socket %d\n",
2556 							rxring_numa[pi]);
2557 						return -1;
2558 					}
2559 
2560 					diag = rte_eth_rx_queue_setup(pi, qi,
2561 					     port->nb_rx_desc[qi],
2562 					     rxring_numa[pi],
2563 					     &(port->rx_conf[qi]),
2564 					     mp);
2565 				} else {
2566 					struct rte_mempool *mp =
2567 						mbuf_pool_find(port->socket_id);
2568 					if (mp == NULL) {
2569 						printf("Failed to setup RX queue:"
2570 							"No mempool allocation"
2571 							" on the socket %d\n",
2572 							port->socket_id);
2573 						return -1;
2574 					}
2575 					diag = rte_eth_rx_queue_setup(pi, qi,
2576 					     port->nb_rx_desc[qi],
2577 					     port->socket_id,
2578 					     &(port->rx_conf[qi]),
2579 					     mp);
2580 				}
2581 				if (diag == 0)
2582 					continue;
2583 
2584 				/* Fail to setup rx queue, return */
2585 				if (rte_atomic16_cmpset(&(port->port_status),
2586 							RTE_PORT_HANDLING,
2587 							RTE_PORT_STOPPED) == 0)
2588 					printf("Port %d can not be set back "
2589 							"to stopped\n", pi);
2590 				printf("Fail to configure port %d rx queues\n",
2591 				       pi);
2592 				/* try to reconfigure queues next time */
2593 				port->need_reconfig_queues = 1;
2594 				return -1;
2595 			}
2596 			/* setup hairpin queues */
2597 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2598 				return -1;
2599 		}
2600 		configure_rxtx_dump_callbacks(verbose_level);
2601 		if (clear_ptypes) {
2602 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2603 					NULL, 0);
2604 			if (diag < 0)
2605 				printf(
2606 				"Port %d: Failed to disable Ptype parsing\n",
2607 				pi);
2608 		}
2609 
2610 		p_pi = pi;
2611 		cnt_pi++;
2612 
2613 		/* start port */
2614 		if (rte_eth_dev_start(pi) < 0) {
2615 			printf("Fail to start port %d\n", pi);
2616 
2617 			/* Fail to setup rx queue, return */
2618 			if (rte_atomic16_cmpset(&(port->port_status),
2619 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2620 				printf("Port %d can not be set back to "
2621 							"stopped\n", pi);
2622 			continue;
2623 		}
2624 
2625 		if (rte_atomic16_cmpset(&(port->port_status),
2626 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2627 			printf("Port %d can not be set into started\n", pi);
2628 
2629 		if (eth_macaddr_get_print_err(pi, &mac_addr) == 0)
2630 			printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2631 				mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
2632 				mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
2633 				mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]);
2634 
2635 		/* at least one port started, need checking link status */
2636 		need_check_link_status = 1;
2637 
2638 		pl[cfg_pi++] = pi;
2639 	}
2640 
2641 	if (need_check_link_status == 1 && !no_link_check)
2642 		check_all_ports_link_status(RTE_PORT_ALL);
2643 	else if (need_check_link_status == 0)
2644 		printf("Please stop the ports first\n");
2645 
2646 	if (hairpin_mode & 0xf) {
2647 		uint16_t i;
2648 		int j;
2649 
2650 		/* bind all started hairpin ports */
2651 		for (i = 0; i < cfg_pi; i++) {
2652 			pi = pl[i];
2653 			/* bind current Tx to all peer Rx */
2654 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2655 							RTE_MAX_ETHPORTS, 1);
2656 			if (peer_pi < 0)
2657 				return peer_pi;
2658 			for (j = 0; j < peer_pi; j++) {
2659 				if (!port_is_started(peer_pl[j]))
2660 					continue;
2661 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2662 				if (diag < 0) {
2663 					printf("Error during binding hairpin"
2664 					       " Tx port %u to %u: %s\n",
2665 					       pi, peer_pl[j],
2666 					       rte_strerror(-diag));
2667 					return -1;
2668 				}
2669 			}
2670 			/* bind all peer Tx to current Rx */
2671 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2672 							RTE_MAX_ETHPORTS, 0);
2673 			if (peer_pi < 0)
2674 				return peer_pi;
2675 			for (j = 0; j < peer_pi; j++) {
2676 				if (!port_is_started(peer_pl[j]))
2677 					continue;
2678 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2679 				if (diag < 0) {
2680 					printf("Error during binding hairpin"
2681 					       " Tx port %u to %u: %s\n",
2682 					       peer_pl[j], pi,
2683 					       rte_strerror(-diag));
2684 					return -1;
2685 				}
2686 			}
2687 		}
2688 	}
2689 
2690 	printf("Done\n");
2691 	return 0;
2692 }
2693 
2694 void
2695 stop_port(portid_t pid)
2696 {
2697 	portid_t pi;
2698 	struct rte_port *port;
2699 	int need_check_link_status = 0;
2700 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2701 	int peer_pi;
2702 
2703 	if (dcb_test) {
2704 		dcb_test = 0;
2705 		dcb_config = 0;
2706 	}
2707 
2708 	if (port_id_is_invalid(pid, ENABLED_WARN))
2709 		return;
2710 
2711 	printf("Stopping ports...\n");
2712 
2713 	RTE_ETH_FOREACH_DEV(pi) {
2714 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2715 			continue;
2716 
2717 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2718 			printf("Please remove port %d from forwarding configuration.\n", pi);
2719 			continue;
2720 		}
2721 
2722 		if (port_is_bonding_slave(pi)) {
2723 			printf("Please remove port %d from bonded device.\n", pi);
2724 			continue;
2725 		}
2726 
2727 		port = &ports[pi];
2728 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2729 						RTE_PORT_HANDLING) == 0)
2730 			continue;
2731 
2732 		if (hairpin_mode & 0xf) {
2733 			int j;
2734 
2735 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
2736 			/* unbind all peer Tx from current Rx */
2737 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2738 							RTE_MAX_ETHPORTS, 0);
2739 			if (peer_pi < 0)
2740 				continue;
2741 			for (j = 0; j < peer_pi; j++) {
2742 				if (!port_is_started(peer_pl[j]))
2743 					continue;
2744 				rte_eth_hairpin_unbind(peer_pl[j], pi);
2745 			}
2746 		}
2747 
2748 		rte_eth_dev_stop(pi);
2749 
2750 		if (rte_atomic16_cmpset(&(port->port_status),
2751 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2752 			printf("Port %d can not be set into stopped\n", pi);
2753 		need_check_link_status = 1;
2754 	}
2755 	if (need_check_link_status && !no_link_check)
2756 		check_all_ports_link_status(RTE_PORT_ALL);
2757 
2758 	printf("Done\n");
2759 }
2760 
2761 static void
2762 remove_invalid_ports_in(portid_t *array, portid_t *total)
2763 {
2764 	portid_t i;
2765 	portid_t new_total = 0;
2766 
2767 	for (i = 0; i < *total; i++)
2768 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2769 			array[new_total] = array[i];
2770 			new_total++;
2771 		}
2772 	*total = new_total;
2773 }
2774 
2775 static void
2776 remove_invalid_ports(void)
2777 {
2778 	remove_invalid_ports_in(ports_ids, &nb_ports);
2779 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2780 	nb_cfg_ports = nb_fwd_ports;
2781 }
2782 
2783 void
2784 close_port(portid_t pid)
2785 {
2786 	portid_t pi;
2787 	struct rte_port *port;
2788 
2789 	if (port_id_is_invalid(pid, ENABLED_WARN))
2790 		return;
2791 
2792 	printf("Closing ports...\n");
2793 
2794 	RTE_ETH_FOREACH_DEV(pi) {
2795 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2796 			continue;
2797 
2798 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2799 			printf("Please remove port %d from forwarding configuration.\n", pi);
2800 			continue;
2801 		}
2802 
2803 		if (port_is_bonding_slave(pi)) {
2804 			printf("Please remove port %d from bonded device.\n", pi);
2805 			continue;
2806 		}
2807 
2808 		port = &ports[pi];
2809 		if (rte_atomic16_cmpset(&(port->port_status),
2810 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2811 			printf("Port %d is already closed\n", pi);
2812 			continue;
2813 		}
2814 
2815 		port_flow_flush(pi);
2816 		rte_eth_dev_close(pi);
2817 	}
2818 
2819 	remove_invalid_ports();
2820 	printf("Done\n");
2821 }
2822 
2823 void
2824 reset_port(portid_t pid)
2825 {
2826 	int diag;
2827 	portid_t pi;
2828 	struct rte_port *port;
2829 
2830 	if (port_id_is_invalid(pid, ENABLED_WARN))
2831 		return;
2832 
2833 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2834 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2835 		printf("Can not reset port(s), please stop port(s) first.\n");
2836 		return;
2837 	}
2838 
2839 	printf("Resetting ports...\n");
2840 
2841 	RTE_ETH_FOREACH_DEV(pi) {
2842 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2843 			continue;
2844 
2845 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2846 			printf("Please remove port %d from forwarding "
2847 			       "configuration.\n", pi);
2848 			continue;
2849 		}
2850 
2851 		if (port_is_bonding_slave(pi)) {
2852 			printf("Please remove port %d from bonded device.\n",
2853 			       pi);
2854 			continue;
2855 		}
2856 
2857 		diag = rte_eth_dev_reset(pi);
2858 		if (diag == 0) {
2859 			port = &ports[pi];
2860 			port->need_reconfig = 1;
2861 			port->need_reconfig_queues = 1;
2862 		} else {
2863 			printf("Failed to reset port %d. diag=%d\n", pi, diag);
2864 		}
2865 	}
2866 
2867 	printf("Done\n");
2868 }
2869 
2870 void
2871 attach_port(char *identifier)
2872 {
2873 	portid_t pi;
2874 	struct rte_dev_iterator iterator;
2875 
2876 	printf("Attaching a new port...\n");
2877 
2878 	if (identifier == NULL) {
2879 		printf("Invalid parameters are specified\n");
2880 		return;
2881 	}
2882 
2883 	if (rte_dev_probe(identifier) < 0) {
2884 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2885 		return;
2886 	}
2887 
2888 	/* first attach mode: event */
2889 	if (setup_on_probe_event) {
2890 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
2891 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2892 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
2893 					ports[pi].need_setup != 0)
2894 				setup_attached_port(pi);
2895 		return;
2896 	}
2897 
2898 	/* second attach mode: iterator */
2899 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2900 		/* setup ports matching the devargs used for probing */
2901 		if (port_is_forwarding(pi))
2902 			continue; /* port was already attached before */
2903 		setup_attached_port(pi);
2904 	}
2905 }
2906 
2907 static void
2908 setup_attached_port(portid_t pi)
2909 {
2910 	unsigned int socket_id;
2911 	int ret;
2912 
2913 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2914 	/* if socket_id is invalid, set to the first available socket. */
2915 	if (check_socket_id(socket_id) < 0)
2916 		socket_id = socket_ids[0];
2917 	reconfig(pi, socket_id);
2918 	ret = rte_eth_promiscuous_enable(pi);
2919 	if (ret != 0)
2920 		printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
2921 			pi, rte_strerror(-ret));
2922 
2923 	ports_ids[nb_ports++] = pi;
2924 	fwd_ports_ids[nb_fwd_ports++] = pi;
2925 	nb_cfg_ports = nb_fwd_ports;
2926 	ports[pi].need_setup = 0;
2927 	ports[pi].port_status = RTE_PORT_STOPPED;
2928 
2929 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2930 	printf("Done\n");
2931 }
2932 
2933 static void
2934 detach_device(struct rte_device *dev)
2935 {
2936 	portid_t sibling;
2937 
2938 	if (dev == NULL) {
2939 		printf("Device already removed\n");
2940 		return;
2941 	}
2942 
2943 	printf("Removing a device...\n");
2944 
2945 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2946 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2947 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
2948 				printf("Port %u not stopped\n", sibling);
2949 				return;
2950 			}
2951 			port_flow_flush(sibling);
2952 		}
2953 	}
2954 
2955 	if (rte_dev_remove(dev) < 0) {
2956 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2957 		return;
2958 	}
2959 	remove_invalid_ports();
2960 
2961 	printf("Device is detached\n");
2962 	printf("Now total ports is %d\n", nb_ports);
2963 	printf("Done\n");
2964 	return;
2965 }
2966 
2967 void
2968 detach_port_device(portid_t port_id)
2969 {
2970 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2971 		return;
2972 
2973 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2974 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2975 			printf("Port not stopped\n");
2976 			return;
2977 		}
2978 		printf("Port was not closed\n");
2979 	}
2980 
2981 	detach_device(rte_eth_devices[port_id].device);
2982 }
2983 
2984 void
2985 detach_devargs(char *identifier)
2986 {
2987 	struct rte_dev_iterator iterator;
2988 	struct rte_devargs da;
2989 	portid_t port_id;
2990 
2991 	printf("Removing a device...\n");
2992 
2993 	memset(&da, 0, sizeof(da));
2994 	if (rte_devargs_parsef(&da, "%s", identifier)) {
2995 		printf("cannot parse identifier\n");
2996 		if (da.args)
2997 			free(da.args);
2998 		return;
2999 	}
3000 
3001 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3002 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3003 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3004 				printf("Port %u not stopped\n", port_id);
3005 				rte_eth_iterator_cleanup(&iterator);
3006 				return;
3007 			}
3008 			port_flow_flush(port_id);
3009 		}
3010 	}
3011 
3012 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3013 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3014 			    da.name, da.bus->name);
3015 		return;
3016 	}
3017 
3018 	remove_invalid_ports();
3019 
3020 	printf("Device %s is detached\n", identifier);
3021 	printf("Now total ports is %d\n", nb_ports);
3022 	printf("Done\n");
3023 }
3024 
3025 void
3026 pmd_test_exit(void)
3027 {
3028 	portid_t pt_id;
3029 	int ret;
3030 	int i;
3031 
3032 	if (test_done == 0)
3033 		stop_packet_forwarding();
3034 
3035 	for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
3036 		if (mempools[i]) {
3037 			if (mp_alloc_type == MP_ALLOC_ANON)
3038 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3039 						     NULL);
3040 		}
3041 	}
3042 	if (ports != NULL) {
3043 		no_link_check = 1;
3044 		RTE_ETH_FOREACH_DEV(pt_id) {
3045 			printf("\nStopping port %d...\n", pt_id);
3046 			fflush(stdout);
3047 			stop_port(pt_id);
3048 		}
3049 		RTE_ETH_FOREACH_DEV(pt_id) {
3050 			printf("\nShutting down port %d...\n", pt_id);
3051 			fflush(stdout);
3052 			close_port(pt_id);
3053 		}
3054 	}
3055 
3056 	if (hot_plug) {
3057 		ret = rte_dev_event_monitor_stop();
3058 		if (ret) {
3059 			RTE_LOG(ERR, EAL,
3060 				"fail to stop device event monitor.");
3061 			return;
3062 		}
3063 
3064 		ret = rte_dev_event_callback_unregister(NULL,
3065 			dev_event_callback, NULL);
3066 		if (ret < 0) {
3067 			RTE_LOG(ERR, EAL,
3068 				"fail to unregister device event callback.\n");
3069 			return;
3070 		}
3071 
3072 		ret = rte_dev_hotplug_handle_disable();
3073 		if (ret) {
3074 			RTE_LOG(ERR, EAL,
3075 				"fail to disable hotplug handling.\n");
3076 			return;
3077 		}
3078 	}
3079 	for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
3080 		if (mempools[i])
3081 			rte_mempool_free(mempools[i]);
3082 	}
3083 
3084 	printf("\nBye...\n");
3085 }
3086 
3087 typedef void (*cmd_func_t)(void);
3088 struct pmd_test_command {
3089 	const char *cmd_name;
3090 	cmd_func_t cmd_func;
3091 };
3092 
3093 /* Check the link status of all ports in up to 9s, and print them finally */
3094 static void
3095 check_all_ports_link_status(uint32_t port_mask)
3096 {
3097 #define CHECK_INTERVAL 100 /* 100ms */
3098 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3099 	portid_t portid;
3100 	uint8_t count, all_ports_up, print_flag = 0;
3101 	struct rte_eth_link link;
3102 	int ret;
3103 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3104 
3105 	printf("Checking link statuses...\n");
3106 	fflush(stdout);
3107 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3108 		all_ports_up = 1;
3109 		RTE_ETH_FOREACH_DEV(portid) {
3110 			if ((port_mask & (1 << portid)) == 0)
3111 				continue;
3112 			memset(&link, 0, sizeof(link));
3113 			ret = rte_eth_link_get_nowait(portid, &link);
3114 			if (ret < 0) {
3115 				all_ports_up = 0;
3116 				if (print_flag == 1)
3117 					printf("Port %u link get failed: %s\n",
3118 						portid, rte_strerror(-ret));
3119 				continue;
3120 			}
3121 			/* print link status if flag set */
3122 			if (print_flag == 1) {
3123 				rte_eth_link_to_str(link_status,
3124 					sizeof(link_status), &link);
3125 				printf("Port %d %s\n", portid, link_status);
3126 				continue;
3127 			}
3128 			/* clear all_ports_up flag if any link down */
3129 			if (link.link_status == ETH_LINK_DOWN) {
3130 				all_ports_up = 0;
3131 				break;
3132 			}
3133 		}
3134 		/* after finally printing all link status, get out */
3135 		if (print_flag == 1)
3136 			break;
3137 
3138 		if (all_ports_up == 0) {
3139 			fflush(stdout);
3140 			rte_delay_ms(CHECK_INTERVAL);
3141 		}
3142 
3143 		/* set the print_flag if all ports up or timeout */
3144 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3145 			print_flag = 1;
3146 		}
3147 
3148 		if (lsc_interrupt)
3149 			break;
3150 	}
3151 }
3152 
3153 static void
3154 rmv_port_callback(void *arg)
3155 {
3156 	int need_to_start = 0;
3157 	int org_no_link_check = no_link_check;
3158 	portid_t port_id = (intptr_t)arg;
3159 	struct rte_device *dev;
3160 
3161 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3162 
3163 	if (!test_done && port_is_forwarding(port_id)) {
3164 		need_to_start = 1;
3165 		stop_packet_forwarding();
3166 	}
3167 	no_link_check = 1;
3168 	stop_port(port_id);
3169 	no_link_check = org_no_link_check;
3170 
3171 	/* Save rte_device pointer before closing ethdev port */
3172 	dev = rte_eth_devices[port_id].device;
3173 	close_port(port_id);
3174 	detach_device(dev); /* might be already removed or have more ports */
3175 
3176 	if (need_to_start)
3177 		start_packet_forwarding(0);
3178 }
3179 
3180 /* This function is used by the interrupt thread */
3181 static int
3182 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3183 		  void *ret_param)
3184 {
3185 	RTE_SET_USED(param);
3186 	RTE_SET_USED(ret_param);
3187 
3188 	if (type >= RTE_ETH_EVENT_MAX) {
3189 		fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3190 			port_id, __func__, type);
3191 		fflush(stderr);
3192 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3193 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3194 			eth_event_desc[type]);
3195 		fflush(stdout);
3196 	}
3197 
3198 	switch (type) {
3199 	case RTE_ETH_EVENT_NEW:
3200 		ports[port_id].need_setup = 1;
3201 		ports[port_id].port_status = RTE_PORT_HANDLING;
3202 		break;
3203 	case RTE_ETH_EVENT_INTR_RMV:
3204 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3205 			break;
3206 		if (rte_eal_alarm_set(100000,
3207 				rmv_port_callback, (void *)(intptr_t)port_id))
3208 			fprintf(stderr, "Could not set up deferred device removal\n");
3209 		break;
3210 	case RTE_ETH_EVENT_DESTROY:
3211 		ports[port_id].port_status = RTE_PORT_CLOSED;
3212 		printf("Port %u is closed\n", port_id);
3213 		break;
3214 	default:
3215 		break;
3216 	}
3217 	return 0;
3218 }
3219 
3220 static int
3221 register_eth_event_callback(void)
3222 {
3223 	int ret;
3224 	enum rte_eth_event_type event;
3225 
3226 	for (event = RTE_ETH_EVENT_UNKNOWN;
3227 			event < RTE_ETH_EVENT_MAX; event++) {
3228 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3229 				event,
3230 				eth_event_callback,
3231 				NULL);
3232 		if (ret != 0) {
3233 			TESTPMD_LOG(ERR, "Failed to register callback for "
3234 					"%s event\n", eth_event_desc[event]);
3235 			return -1;
3236 		}
3237 	}
3238 
3239 	return 0;
3240 }
3241 
3242 /* This function is used by the interrupt thread */
3243 static void
3244 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3245 			     __rte_unused void *arg)
3246 {
3247 	uint16_t port_id;
3248 	int ret;
3249 
3250 	if (type >= RTE_DEV_EVENT_MAX) {
3251 		fprintf(stderr, "%s called upon invalid event %d\n",
3252 			__func__, type);
3253 		fflush(stderr);
3254 	}
3255 
3256 	switch (type) {
3257 	case RTE_DEV_EVENT_REMOVE:
3258 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3259 			device_name);
3260 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3261 		if (ret) {
3262 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3263 				device_name);
3264 			return;
3265 		}
3266 		/*
3267 		 * Because the user's callback is invoked in eal interrupt
3268 		 * callback, the interrupt callback need to be finished before
3269 		 * it can be unregistered when detaching device. So finish
3270 		 * callback soon and use a deferred removal to detach device
3271 		 * is need. It is a workaround, once the device detaching be
3272 		 * moved into the eal in the future, the deferred removal could
3273 		 * be deleted.
3274 		 */
3275 		if (rte_eal_alarm_set(100000,
3276 				rmv_port_callback, (void *)(intptr_t)port_id))
3277 			RTE_LOG(ERR, EAL,
3278 				"Could not set up deferred device removal\n");
3279 		break;
3280 	case RTE_DEV_EVENT_ADD:
3281 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3282 			device_name);
3283 		/* TODO: After finish kernel driver binding,
3284 		 * begin to attach port.
3285 		 */
3286 		break;
3287 	default:
3288 		break;
3289 	}
3290 }
3291 
3292 static int
3293 set_tx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
3294 {
3295 	uint16_t i;
3296 	int diag;
3297 	uint8_t mapping_found = 0;
3298 
3299 	for (i = 0; i < nb_tx_queue_stats_mappings; i++) {
3300 		if ((tx_queue_stats_mappings[i].port_id == port_id) &&
3301 				(tx_queue_stats_mappings[i].queue_id < nb_txq )) {
3302 			diag = rte_eth_dev_set_tx_queue_stats_mapping(port_id,
3303 					tx_queue_stats_mappings[i].queue_id,
3304 					tx_queue_stats_mappings[i].stats_counter_id);
3305 			if (diag != 0)
3306 				return diag;
3307 			mapping_found = 1;
3308 		}
3309 	}
3310 	if (mapping_found)
3311 		port->tx_queue_stats_mapping_enabled = 1;
3312 	return 0;
3313 }
3314 
3315 static int
3316 set_rx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
3317 {
3318 	uint16_t i;
3319 	int diag;
3320 	uint8_t mapping_found = 0;
3321 
3322 	for (i = 0; i < nb_rx_queue_stats_mappings; i++) {
3323 		if ((rx_queue_stats_mappings[i].port_id == port_id) &&
3324 				(rx_queue_stats_mappings[i].queue_id < nb_rxq )) {
3325 			diag = rte_eth_dev_set_rx_queue_stats_mapping(port_id,
3326 					rx_queue_stats_mappings[i].queue_id,
3327 					rx_queue_stats_mappings[i].stats_counter_id);
3328 			if (diag != 0)
3329 				return diag;
3330 			mapping_found = 1;
3331 		}
3332 	}
3333 	if (mapping_found)
3334 		port->rx_queue_stats_mapping_enabled = 1;
3335 	return 0;
3336 }
3337 
3338 static void
3339 map_port_queue_stats_mapping_registers(portid_t pi, struct rte_port *port)
3340 {
3341 	int diag = 0;
3342 
3343 	diag = set_tx_queue_stats_mapping_registers(pi, port);
3344 	if (diag != 0) {
3345 		if (diag == -ENOTSUP) {
3346 			port->tx_queue_stats_mapping_enabled = 0;
3347 			printf("TX queue stats mapping not supported port id=%d\n", pi);
3348 		}
3349 		else
3350 			rte_exit(EXIT_FAILURE,
3351 					"set_tx_queue_stats_mapping_registers "
3352 					"failed for port id=%d diag=%d\n",
3353 					pi, diag);
3354 	}
3355 
3356 	diag = set_rx_queue_stats_mapping_registers(pi, port);
3357 	if (diag != 0) {
3358 		if (diag == -ENOTSUP) {
3359 			port->rx_queue_stats_mapping_enabled = 0;
3360 			printf("RX queue stats mapping not supported port id=%d\n", pi);
3361 		}
3362 		else
3363 			rte_exit(EXIT_FAILURE,
3364 					"set_rx_queue_stats_mapping_registers "
3365 					"failed for port id=%d diag=%d\n",
3366 					pi, diag);
3367 	}
3368 }
3369 
3370 static void
3371 rxtx_port_config(struct rte_port *port)
3372 {
3373 	uint16_t qid;
3374 	uint64_t offloads;
3375 
3376 	for (qid = 0; qid < nb_rxq; qid++) {
3377 		offloads = port->rx_conf[qid].offloads;
3378 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3379 		if (offloads != 0)
3380 			port->rx_conf[qid].offloads = offloads;
3381 
3382 		/* Check if any Rx parameters have been passed */
3383 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3384 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3385 
3386 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3387 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3388 
3389 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3390 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3391 
3392 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3393 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3394 
3395 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3396 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3397 
3398 		port->nb_rx_desc[qid] = nb_rxd;
3399 	}
3400 
3401 	for (qid = 0; qid < nb_txq; qid++) {
3402 		offloads = port->tx_conf[qid].offloads;
3403 		port->tx_conf[qid] = port->dev_info.default_txconf;
3404 		if (offloads != 0)
3405 			port->tx_conf[qid].offloads = offloads;
3406 
3407 		/* Check if any Tx parameters have been passed */
3408 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3409 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3410 
3411 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3412 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3413 
3414 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3415 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3416 
3417 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3418 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3419 
3420 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3421 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3422 
3423 		port->nb_tx_desc[qid] = nb_txd;
3424 	}
3425 }
3426 
3427 void
3428 init_port_config(void)
3429 {
3430 	portid_t pid;
3431 	struct rte_port *port;
3432 	int ret;
3433 
3434 	RTE_ETH_FOREACH_DEV(pid) {
3435 		port = &ports[pid];
3436 		port->dev_conf.fdir_conf = fdir_conf;
3437 
3438 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3439 		if (ret != 0)
3440 			return;
3441 
3442 		if (nb_rxq > 1) {
3443 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3444 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3445 				rss_hf & port->dev_info.flow_type_rss_offloads;
3446 		} else {
3447 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3448 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3449 		}
3450 
3451 		if (port->dcb_flag == 0) {
3452 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3453 				port->dev_conf.rxmode.mq_mode =
3454 					(enum rte_eth_rx_mq_mode)
3455 						(rx_mq_mode & ETH_MQ_RX_RSS);
3456 			else
3457 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3458 		}
3459 
3460 		rxtx_port_config(port);
3461 
3462 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3463 		if (ret != 0)
3464 			return;
3465 
3466 		map_port_queue_stats_mapping_registers(pid, port);
3467 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
3468 		rte_pmd_ixgbe_bypass_init(pid);
3469 #endif
3470 
3471 		if (lsc_interrupt &&
3472 		    (rte_eth_devices[pid].data->dev_flags &
3473 		     RTE_ETH_DEV_INTR_LSC))
3474 			port->dev_conf.intr_conf.lsc = 1;
3475 		if (rmv_interrupt &&
3476 		    (rte_eth_devices[pid].data->dev_flags &
3477 		     RTE_ETH_DEV_INTR_RMV))
3478 			port->dev_conf.intr_conf.rmv = 1;
3479 	}
3480 }
3481 
3482 void set_port_slave_flag(portid_t slave_pid)
3483 {
3484 	struct rte_port *port;
3485 
3486 	port = &ports[slave_pid];
3487 	port->slave_flag = 1;
3488 }
3489 
3490 void clear_port_slave_flag(portid_t slave_pid)
3491 {
3492 	struct rte_port *port;
3493 
3494 	port = &ports[slave_pid];
3495 	port->slave_flag = 0;
3496 }
3497 
3498 uint8_t port_is_bonding_slave(portid_t slave_pid)
3499 {
3500 	struct rte_port *port;
3501 
3502 	port = &ports[slave_pid];
3503 	if ((rte_eth_devices[slave_pid].data->dev_flags &
3504 	    RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3505 		return 1;
3506 	return 0;
3507 }
3508 
3509 const uint16_t vlan_tags[] = {
3510 		0,  1,  2,  3,  4,  5,  6,  7,
3511 		8,  9, 10, 11,  12, 13, 14, 15,
3512 		16, 17, 18, 19, 20, 21, 22, 23,
3513 		24, 25, 26, 27, 28, 29, 30, 31
3514 };
3515 
3516 static  int
3517 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3518 		 enum dcb_mode_enable dcb_mode,
3519 		 enum rte_eth_nb_tcs num_tcs,
3520 		 uint8_t pfc_en)
3521 {
3522 	uint8_t i;
3523 	int32_t rc;
3524 	struct rte_eth_rss_conf rss_conf;
3525 
3526 	/*
3527 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3528 	 * given above, and the number of traffic classes available for use.
3529 	 */
3530 	if (dcb_mode == DCB_VT_ENABLED) {
3531 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3532 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3533 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3534 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3535 
3536 		/* VMDQ+DCB RX and TX configurations */
3537 		vmdq_rx_conf->enable_default_pool = 0;
3538 		vmdq_rx_conf->default_pool = 0;
3539 		vmdq_rx_conf->nb_queue_pools =
3540 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3541 		vmdq_tx_conf->nb_queue_pools =
3542 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3543 
3544 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3545 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3546 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3547 			vmdq_rx_conf->pool_map[i].pools =
3548 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3549 		}
3550 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3551 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3552 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3553 		}
3554 
3555 		/* set DCB mode of RX and TX of multiple queues */
3556 		eth_conf->rxmode.mq_mode =
3557 				(enum rte_eth_rx_mq_mode)
3558 					(rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3559 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3560 	} else {
3561 		struct rte_eth_dcb_rx_conf *rx_conf =
3562 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3563 		struct rte_eth_dcb_tx_conf *tx_conf =
3564 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3565 
3566 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3567 
3568 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3569 		if (rc != 0)
3570 			return rc;
3571 
3572 		rx_conf->nb_tcs = num_tcs;
3573 		tx_conf->nb_tcs = num_tcs;
3574 
3575 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3576 			rx_conf->dcb_tc[i] = i % num_tcs;
3577 			tx_conf->dcb_tc[i] = i % num_tcs;
3578 		}
3579 
3580 		eth_conf->rxmode.mq_mode =
3581 				(enum rte_eth_rx_mq_mode)
3582 					(rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3583 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3584 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3585 	}
3586 
3587 	if (pfc_en)
3588 		eth_conf->dcb_capability_en =
3589 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3590 	else
3591 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3592 
3593 	return 0;
3594 }
3595 
3596 int
3597 init_port_dcb_config(portid_t pid,
3598 		     enum dcb_mode_enable dcb_mode,
3599 		     enum rte_eth_nb_tcs num_tcs,
3600 		     uint8_t pfc_en)
3601 {
3602 	struct rte_eth_conf port_conf;
3603 	struct rte_port *rte_port;
3604 	int retval;
3605 	uint16_t i;
3606 
3607 	rte_port = &ports[pid];
3608 
3609 	memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3610 	/* Enter DCB configuration status */
3611 	dcb_config = 1;
3612 
3613 	port_conf.rxmode = rte_port->dev_conf.rxmode;
3614 	port_conf.txmode = rte_port->dev_conf.txmode;
3615 
3616 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3617 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3618 	if (retval < 0)
3619 		return retval;
3620 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3621 
3622 	/* re-configure the device . */
3623 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3624 	if (retval < 0)
3625 		return retval;
3626 
3627 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3628 	if (retval != 0)
3629 		return retval;
3630 
3631 	/* If dev_info.vmdq_pool_base is greater than 0,
3632 	 * the queue id of vmdq pools is started after pf queues.
3633 	 */
3634 	if (dcb_mode == DCB_VT_ENABLED &&
3635 	    rte_port->dev_info.vmdq_pool_base > 0) {
3636 		printf("VMDQ_DCB multi-queue mode is nonsensical"
3637 			" for port %d.", pid);
3638 		return -1;
3639 	}
3640 
3641 	/* Assume the ports in testpmd have the same dcb capability
3642 	 * and has the same number of rxq and txq in dcb mode
3643 	 */
3644 	if (dcb_mode == DCB_VT_ENABLED) {
3645 		if (rte_port->dev_info.max_vfs > 0) {
3646 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3647 			nb_txq = rte_port->dev_info.nb_tx_queues;
3648 		} else {
3649 			nb_rxq = rte_port->dev_info.max_rx_queues;
3650 			nb_txq = rte_port->dev_info.max_tx_queues;
3651 		}
3652 	} else {
3653 		/*if vt is disabled, use all pf queues */
3654 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3655 			nb_rxq = rte_port->dev_info.max_rx_queues;
3656 			nb_txq = rte_port->dev_info.max_tx_queues;
3657 		} else {
3658 			nb_rxq = (queueid_t)num_tcs;
3659 			nb_txq = (queueid_t)num_tcs;
3660 
3661 		}
3662 	}
3663 	rx_free_thresh = 64;
3664 
3665 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3666 
3667 	rxtx_port_config(rte_port);
3668 	/* VLAN filter */
3669 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3670 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
3671 		rx_vft_set(pid, vlan_tags[i], 1);
3672 
3673 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3674 	if (retval != 0)
3675 		return retval;
3676 
3677 	map_port_queue_stats_mapping_registers(pid, rte_port);
3678 
3679 	rte_port->dcb_flag = 1;
3680 
3681 	return 0;
3682 }
3683 
3684 static void
3685 init_port(void)
3686 {
3687 	/* Configuration of Ethernet ports. */
3688 	ports = rte_zmalloc("testpmd: ports",
3689 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3690 			    RTE_CACHE_LINE_SIZE);
3691 	if (ports == NULL) {
3692 		rte_exit(EXIT_FAILURE,
3693 				"rte_zmalloc(%d struct rte_port) failed\n",
3694 				RTE_MAX_ETHPORTS);
3695 	}
3696 
3697 	/* Initialize ports NUMA structures */
3698 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3699 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3700 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3701 }
3702 
3703 static void
3704 force_quit(void)
3705 {
3706 	pmd_test_exit();
3707 	prompt_exit();
3708 }
3709 
3710 static void
3711 print_stats(void)
3712 {
3713 	uint8_t i;
3714 	const char clr[] = { 27, '[', '2', 'J', '\0' };
3715 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3716 
3717 	/* Clear screen and move to top left */
3718 	printf("%s%s", clr, top_left);
3719 
3720 	printf("\nPort statistics ====================================");
3721 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3722 		nic_stats_display(fwd_ports_ids[i]);
3723 
3724 	fflush(stdout);
3725 }
3726 
3727 static void
3728 signal_handler(int signum)
3729 {
3730 	if (signum == SIGINT || signum == SIGTERM) {
3731 		printf("\nSignal %d received, preparing to exit...\n",
3732 				signum);
3733 #ifdef RTE_LIBRTE_PDUMP
3734 		/* uninitialize packet capture framework */
3735 		rte_pdump_uninit();
3736 #endif
3737 #ifdef RTE_LIBRTE_LATENCY_STATS
3738 		if (latencystats_enabled != 0)
3739 			rte_latencystats_uninit();
3740 #endif
3741 		force_quit();
3742 		/* Set flag to indicate the force termination. */
3743 		f_quit = 1;
3744 		/* exit with the expected status */
3745 		signal(signum, SIG_DFL);
3746 		kill(getpid(), signum);
3747 	}
3748 }
3749 
3750 int
3751 main(int argc, char** argv)
3752 {
3753 	int diag;
3754 	portid_t port_id;
3755 	uint16_t count;
3756 	int ret;
3757 
3758 	signal(SIGINT, signal_handler);
3759 	signal(SIGTERM, signal_handler);
3760 
3761 	testpmd_logtype = rte_log_register("testpmd");
3762 	if (testpmd_logtype < 0)
3763 		rte_exit(EXIT_FAILURE, "Cannot register log type");
3764 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3765 
3766 	diag = rte_eal_init(argc, argv);
3767 	if (diag < 0)
3768 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3769 			 rte_strerror(rte_errno));
3770 
3771 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3772 		rte_exit(EXIT_FAILURE,
3773 			 "Secondary process type not supported.\n");
3774 
3775 	ret = register_eth_event_callback();
3776 	if (ret != 0)
3777 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3778 
3779 #ifdef RTE_LIBRTE_PDUMP
3780 	/* initialize packet capture framework */
3781 	rte_pdump_init();
3782 #endif
3783 
3784 	count = 0;
3785 	RTE_ETH_FOREACH_DEV(port_id) {
3786 		ports_ids[count] = port_id;
3787 		count++;
3788 	}
3789 	nb_ports = (portid_t) count;
3790 	if (nb_ports == 0)
3791 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3792 
3793 	/* allocate port structures, and init them */
3794 	init_port();
3795 
3796 	set_def_fwd_config();
3797 	if (nb_lcores == 0)
3798 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3799 			 "Check the core mask argument\n");
3800 
3801 	/* Bitrate/latency stats disabled by default */
3802 #ifdef RTE_LIBRTE_BITRATESTATS
3803 	bitrate_enabled = 0;
3804 #endif
3805 #ifdef RTE_LIBRTE_LATENCY_STATS
3806 	latencystats_enabled = 0;
3807 #endif
3808 
3809 	/* on FreeBSD, mlockall() is disabled by default */
3810 #ifdef RTE_EXEC_ENV_FREEBSD
3811 	do_mlockall = 0;
3812 #else
3813 	do_mlockall = 1;
3814 #endif
3815 
3816 	argc -= diag;
3817 	argv += diag;
3818 	if (argc > 1)
3819 		launch_args_parse(argc, argv);
3820 
3821 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3822 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3823 			strerror(errno));
3824 	}
3825 
3826 	if (tx_first && interactive)
3827 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3828 				"interactive mode.\n");
3829 
3830 	if (tx_first && lsc_interrupt) {
3831 		printf("Warning: lsc_interrupt needs to be off when "
3832 				" using tx_first. Disabling.\n");
3833 		lsc_interrupt = 0;
3834 	}
3835 
3836 	if (!nb_rxq && !nb_txq)
3837 		printf("Warning: Either rx or tx queues should be non-zero\n");
3838 
3839 	if (nb_rxq > 1 && nb_rxq > nb_txq)
3840 		printf("Warning: nb_rxq=%d enables RSS configuration, "
3841 		       "but nb_txq=%d will prevent to fully test it.\n",
3842 		       nb_rxq, nb_txq);
3843 
3844 	init_config();
3845 
3846 	if (hot_plug) {
3847 		ret = rte_dev_hotplug_handle_enable();
3848 		if (ret) {
3849 			RTE_LOG(ERR, EAL,
3850 				"fail to enable hotplug handling.");
3851 			return -1;
3852 		}
3853 
3854 		ret = rte_dev_event_monitor_start();
3855 		if (ret) {
3856 			RTE_LOG(ERR, EAL,
3857 				"fail to start device event monitoring.");
3858 			return -1;
3859 		}
3860 
3861 		ret = rte_dev_event_callback_register(NULL,
3862 			dev_event_callback, NULL);
3863 		if (ret) {
3864 			RTE_LOG(ERR, EAL,
3865 				"fail  to register device event callback\n");
3866 			return -1;
3867 		}
3868 	}
3869 
3870 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3871 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
3872 
3873 	/* set all ports to promiscuous mode by default */
3874 	RTE_ETH_FOREACH_DEV(port_id) {
3875 		ret = rte_eth_promiscuous_enable(port_id);
3876 		if (ret != 0)
3877 			printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
3878 				port_id, rte_strerror(-ret));
3879 	}
3880 
3881 	/* Init metrics library */
3882 	rte_metrics_init(rte_socket_id());
3883 
3884 #ifdef RTE_LIBRTE_LATENCY_STATS
3885 	if (latencystats_enabled != 0) {
3886 		int ret = rte_latencystats_init(1, NULL);
3887 		if (ret)
3888 			printf("Warning: latencystats init()"
3889 				" returned error %d\n",	ret);
3890 		printf("Latencystats running on lcore %d\n",
3891 			latencystats_lcore_id);
3892 	}
3893 #endif
3894 
3895 	/* Setup bitrate stats */
3896 #ifdef RTE_LIBRTE_BITRATESTATS
3897 	if (bitrate_enabled != 0) {
3898 		bitrate_data = rte_stats_bitrate_create();
3899 		if (bitrate_data == NULL)
3900 			rte_exit(EXIT_FAILURE,
3901 				"Could not allocate bitrate data.\n");
3902 		rte_stats_bitrate_reg(bitrate_data);
3903 	}
3904 #endif
3905 
3906 #ifdef RTE_LIBRTE_CMDLINE
3907 	if (strlen(cmdline_filename) != 0)
3908 		cmdline_read_from_file(cmdline_filename);
3909 
3910 	if (interactive == 1) {
3911 		if (auto_start) {
3912 			printf("Start automatic packet forwarding\n");
3913 			start_packet_forwarding(0);
3914 		}
3915 		prompt();
3916 		pmd_test_exit();
3917 	} else
3918 #endif
3919 	{
3920 		char c;
3921 		int rc;
3922 
3923 		f_quit = 0;
3924 
3925 		printf("No commandline core given, start packet forwarding\n");
3926 		start_packet_forwarding(tx_first);
3927 		if (stats_period != 0) {
3928 			uint64_t prev_time = 0, cur_time, diff_time = 0;
3929 			uint64_t timer_period;
3930 
3931 			/* Convert to number of cycles */
3932 			timer_period = stats_period * rte_get_timer_hz();
3933 
3934 			while (f_quit == 0) {
3935 				cur_time = rte_get_timer_cycles();
3936 				diff_time += cur_time - prev_time;
3937 
3938 				if (diff_time >= timer_period) {
3939 					print_stats();
3940 					/* Reset the timer */
3941 					diff_time = 0;
3942 				}
3943 				/* Sleep to avoid unnecessary checks */
3944 				prev_time = cur_time;
3945 				sleep(1);
3946 			}
3947 		}
3948 
3949 		printf("Press enter to exit\n");
3950 		rc = read(0, &c, 1);
3951 		pmd_test_exit();
3952 		if (rc < 0)
3953 			return 1;
3954 	}
3955 
3956 	ret = rte_eal_cleanup();
3957 	if (ret != 0)
3958 		rte_exit(EXIT_FAILURE,
3959 			 "EAL cleanup failed: %s\n", strerror(-ret));
3960 
3961 	return EXIT_SUCCESS;
3962 }
3963