xref: /dpdk/app/test-pmd/testpmd.c (revision 21f46d5f194ee475de622caa1cad54a91d1effb1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <errno.h>
15 #include <stdbool.h>
16 
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19 
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <inttypes.h>
23 
24 #include <rte_common.h>
25 #include <rte_errno.h>
26 #include <rte_byteorder.h>
27 #include <rte_log.h>
28 #include <rte_debug.h>
29 #include <rte_cycles.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_launch.h>
33 #include <rte_eal.h>
34 #include <rte_alarm.h>
35 #include <rte_per_lcore.h>
36 #include <rte_lcore.h>
37 #include <rte_atomic.h>
38 #include <rte_branch_prediction.h>
39 #include <rte_mempool.h>
40 #include <rte_malloc.h>
41 #include <rte_mbuf.h>
42 #include <rte_mbuf_pool_ops.h>
43 #include <rte_interrupts.h>
44 #include <rte_pci.h>
45 #include <rte_ether.h>
46 #include <rte_ethdev.h>
47 #include <rte_dev.h>
48 #include <rte_string_fns.h>
49 #ifdef RTE_LIBRTE_IXGBE_PMD
50 #include <rte_pmd_ixgbe.h>
51 #endif
52 #ifdef RTE_LIBRTE_PDUMP
53 #include <rte_pdump.h>
54 #endif
55 #include <rte_flow.h>
56 #include <rte_metrics.h>
57 #ifdef RTE_LIBRTE_BITRATE
58 #include <rte_bitrate.h>
59 #endif
60 #ifdef RTE_LIBRTE_LATENCY_STATS
61 #include <rte_latencystats.h>
62 #endif
63 
64 #include "testpmd.h"
65 
66 #ifndef MAP_HUGETLB
67 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
68 #define HUGE_FLAG (0x40000)
69 #else
70 #define HUGE_FLAG MAP_HUGETLB
71 #endif
72 
73 #ifndef MAP_HUGE_SHIFT
74 /* older kernels (or FreeBSD) will not have this define */
75 #define HUGE_SHIFT (26)
76 #else
77 #define HUGE_SHIFT MAP_HUGE_SHIFT
78 #endif
79 
80 #define EXTMEM_HEAP_NAME "extmem"
81 
82 uint16_t verbose_level = 0; /**< Silent by default. */
83 int testpmd_logtype; /**< Log type for testpmd logs */
84 
85 /* use master core for command line ? */
86 uint8_t interactive = 0;
87 uint8_t auto_start = 0;
88 uint8_t tx_first;
89 char cmdline_filename[PATH_MAX] = {0};
90 
91 /*
92  * NUMA support configuration.
93  * When set, the NUMA support attempts to dispatch the allocation of the
94  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
95  * probed ports among the CPU sockets 0 and 1.
96  * Otherwise, all memory is allocated from CPU socket 0.
97  */
98 uint8_t numa_support = 1; /**< numa enabled by default */
99 
100 /*
101  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
102  * not configured.
103  */
104 uint8_t socket_num = UMA_NO_CONFIG;
105 
106 /*
107  * Select mempool allocation type:
108  * - native: use regular DPDK memory
109  * - anon: use regular DPDK memory to create mempool, but populate using
110  *         anonymous memory (may not be IOVA-contiguous)
111  * - xmem: use externally allocated hugepage memory
112  */
113 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
114 
115 /*
116  * Store specified sockets on which memory pool to be used by ports
117  * is allocated.
118  */
119 uint8_t port_numa[RTE_MAX_ETHPORTS];
120 
121 /*
122  * Store specified sockets on which RX ring to be used by ports
123  * is allocated.
124  */
125 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
126 
127 /*
128  * Store specified sockets on which TX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t txring_numa[RTE_MAX_ETHPORTS];
132 
133 /*
134  * Record the Ethernet address of peer target ports to which packets are
135  * forwarded.
136  * Must be instantiated with the ethernet addresses of peer traffic generator
137  * ports.
138  */
139 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
140 portid_t nb_peer_eth_addrs = 0;
141 
142 /*
143  * Probed Target Environment.
144  */
145 struct rte_port *ports;	       /**< For all probed ethernet ports. */
146 portid_t nb_ports;             /**< Number of probed ethernet ports. */
147 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
148 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
149 
150 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
151 
152 /*
153  * Test Forwarding Configuration.
154  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
155  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
156  */
157 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
158 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
159 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
160 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
161 
162 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
163 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
164 
165 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
166 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
167 
168 /*
169  * Forwarding engines.
170  */
171 struct fwd_engine * fwd_engines[] = {
172 	&io_fwd_engine,
173 	&mac_fwd_engine,
174 	&mac_swap_engine,
175 	&flow_gen_engine,
176 	&rx_only_engine,
177 	&tx_only_engine,
178 	&csum_fwd_engine,
179 	&icmp_echo_engine,
180 	&noisy_vnf_engine,
181 #if defined RTE_LIBRTE_PMD_SOFTNIC
182 	&softnic_fwd_engine,
183 #endif
184 #ifdef RTE_LIBRTE_IEEE1588
185 	&ieee1588_fwd_engine,
186 #endif
187 	NULL,
188 };
189 
190 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES];
191 uint16_t mempool_flags;
192 
193 struct fwd_config cur_fwd_config;
194 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
195 uint32_t retry_enabled;
196 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
197 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
198 
199 uint16_t mbuf_data_size = DEFAULT_MBUF_DATA_SIZE; /**< Mbuf data space size. */
200 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
201                                       * specified on command-line. */
202 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
203 
204 /*
205  * In container, it cannot terminate the process which running with 'stats-period'
206  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
207  */
208 uint8_t f_quit;
209 
210 /*
211  * Configuration of packet segments used by the "txonly" processing engine.
212  */
213 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
214 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
215 	TXONLY_DEF_PACKET_LEN,
216 };
217 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
218 
219 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
220 /**< Split policy for packets to TX. */
221 
222 uint8_t txonly_multi_flow;
223 /**< Whether multiple flows are generated in TXONLY mode. */
224 
225 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
226 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
227 
228 /* current configuration is in DCB or not,0 means it is not in DCB mode */
229 uint8_t dcb_config = 0;
230 
231 /* Whether the dcb is in testing status */
232 uint8_t dcb_test = 0;
233 
234 /*
235  * Configurable number of RX/TX queues.
236  */
237 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
238 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
239 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
240 
241 /*
242  * Configurable number of RX/TX ring descriptors.
243  * Defaults are supplied by drivers via ethdev.
244  */
245 #define RTE_TEST_RX_DESC_DEFAULT 0
246 #define RTE_TEST_TX_DESC_DEFAULT 0
247 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
248 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
249 
250 #define RTE_PMD_PARAM_UNSET -1
251 /*
252  * Configurable values of RX and TX ring threshold registers.
253  */
254 
255 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
256 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
257 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
258 
259 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
260 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
261 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
262 
263 /*
264  * Configurable value of RX free threshold.
265  */
266 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
267 
268 /*
269  * Configurable value of RX drop enable.
270  */
271 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
272 
273 /*
274  * Configurable value of TX free threshold.
275  */
276 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
277 
278 /*
279  * Configurable value of TX RS bit threshold.
280  */
281 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
282 
283 /*
284  * Configurable value of buffered packets before sending.
285  */
286 uint16_t noisy_tx_sw_bufsz;
287 
288 /*
289  * Configurable value of packet buffer timeout.
290  */
291 uint16_t noisy_tx_sw_buf_flush_time;
292 
293 /*
294  * Configurable value for size of VNF internal memory area
295  * used for simulating noisy neighbour behaviour
296  */
297 uint64_t noisy_lkup_mem_sz;
298 
299 /*
300  * Configurable value of number of random writes done in
301  * VNF simulation memory area.
302  */
303 uint64_t noisy_lkup_num_writes;
304 
305 /*
306  * Configurable value of number of random reads done in
307  * VNF simulation memory area.
308  */
309 uint64_t noisy_lkup_num_reads;
310 
311 /*
312  * Configurable value of number of random reads/writes done in
313  * VNF simulation memory area.
314  */
315 uint64_t noisy_lkup_num_reads_writes;
316 
317 /*
318  * Receive Side Scaling (RSS) configuration.
319  */
320 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
321 
322 /*
323  * Port topology configuration
324  */
325 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
326 
327 /*
328  * Avoids to flush all the RX streams before starts forwarding.
329  */
330 uint8_t no_flush_rx = 0; /* flush by default */
331 
332 /*
333  * Flow API isolated mode.
334  */
335 uint8_t flow_isolate_all;
336 
337 /*
338  * Avoids to check link status when starting/stopping a port.
339  */
340 uint8_t no_link_check = 0; /* check by default */
341 
342 /*
343  * Don't automatically start all ports in interactive mode.
344  */
345 uint8_t no_device_start = 0;
346 
347 /*
348  * Enable link status change notification
349  */
350 uint8_t lsc_interrupt = 1; /* enabled by default */
351 
352 /*
353  * Enable device removal notification.
354  */
355 uint8_t rmv_interrupt = 1; /* enabled by default */
356 
357 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
358 
359 /* After attach, port setup is called on event or by iterator */
360 bool setup_on_probe_event = true;
361 
362 /* Clear ptypes on port initialization. */
363 uint8_t clear_ptypes = true;
364 
365 /* Pretty printing of ethdev events */
366 static const char * const eth_event_desc[] = {
367 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
368 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
369 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
370 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
371 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
372 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
373 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
374 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
375 	[RTE_ETH_EVENT_NEW] = "device probed",
376 	[RTE_ETH_EVENT_DESTROY] = "device released",
377 	[RTE_ETH_EVENT_MAX] = NULL,
378 };
379 
380 /*
381  * Display or mask ether events
382  * Default to all events except VF_MBOX
383  */
384 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
385 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
386 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
387 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
388 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
389 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
390 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV);
391 /*
392  * Decide if all memory are locked for performance.
393  */
394 int do_mlockall = 0;
395 
396 /*
397  * NIC bypass mode configuration options.
398  */
399 
400 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
401 /* The NIC bypass watchdog timeout. */
402 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
403 #endif
404 
405 
406 #ifdef RTE_LIBRTE_LATENCY_STATS
407 
408 /*
409  * Set when latency stats is enabled in the commandline
410  */
411 uint8_t latencystats_enabled;
412 
413 /*
414  * Lcore ID to serive latency statistics.
415  */
416 lcoreid_t latencystats_lcore_id = -1;
417 
418 #endif
419 
420 /*
421  * Ethernet device configuration.
422  */
423 struct rte_eth_rxmode rx_mode = {
424 	.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
425 		/**< Default maximum frame length. */
426 };
427 
428 struct rte_eth_txmode tx_mode = {
429 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
430 };
431 
432 struct rte_fdir_conf fdir_conf = {
433 	.mode = RTE_FDIR_MODE_NONE,
434 	.pballoc = RTE_FDIR_PBALLOC_64K,
435 	.status = RTE_FDIR_REPORT_STATUS,
436 	.mask = {
437 		.vlan_tci_mask = 0xFFEF,
438 		.ipv4_mask     = {
439 			.src_ip = 0xFFFFFFFF,
440 			.dst_ip = 0xFFFFFFFF,
441 		},
442 		.ipv6_mask     = {
443 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
444 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
445 		},
446 		.src_port_mask = 0xFFFF,
447 		.dst_port_mask = 0xFFFF,
448 		.mac_addr_byte_mask = 0xFF,
449 		.tunnel_type_mask = 1,
450 		.tunnel_id_mask = 0xFFFFFFFF,
451 	},
452 	.drop_queue = 127,
453 };
454 
455 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
456 
457 struct queue_stats_mappings tx_queue_stats_mappings_array[MAX_TX_QUEUE_STATS_MAPPINGS];
458 struct queue_stats_mappings rx_queue_stats_mappings_array[MAX_RX_QUEUE_STATS_MAPPINGS];
459 
460 struct queue_stats_mappings *tx_queue_stats_mappings = tx_queue_stats_mappings_array;
461 struct queue_stats_mappings *rx_queue_stats_mappings = rx_queue_stats_mappings_array;
462 
463 uint16_t nb_tx_queue_stats_mappings = 0;
464 uint16_t nb_rx_queue_stats_mappings = 0;
465 
466 /*
467  * Display zero values by default for xstats
468  */
469 uint8_t xstats_hide_zero;
470 
471 unsigned int num_sockets = 0;
472 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
473 
474 #ifdef RTE_LIBRTE_BITRATE
475 /* Bitrate statistics */
476 struct rte_stats_bitrates *bitrate_data;
477 lcoreid_t bitrate_lcore_id;
478 uint8_t bitrate_enabled;
479 #endif
480 
481 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
482 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
483 
484 /* Forward function declarations */
485 static void setup_attached_port(portid_t pi);
486 static void map_port_queue_stats_mapping_registers(portid_t pi,
487 						   struct rte_port *port);
488 static void check_all_ports_link_status(uint32_t port_mask);
489 static int eth_event_callback(portid_t port_id,
490 			      enum rte_eth_event_type type,
491 			      void *param, void *ret_param);
492 static void dev_event_callback(const char *device_name,
493 				enum rte_dev_event_type type,
494 				void *param);
495 
496 /*
497  * Check if all the ports are started.
498  * If yes, return positive value. If not, return zero.
499  */
500 static int all_ports_started(void);
501 
502 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
503 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
504 
505 /*
506  * Helper function to check if socket is already discovered.
507  * If yes, return positive value. If not, return zero.
508  */
509 int
510 new_socket_id(unsigned int socket_id)
511 {
512 	unsigned int i;
513 
514 	for (i = 0; i < num_sockets; i++) {
515 		if (socket_ids[i] == socket_id)
516 			return 0;
517 	}
518 	return 1;
519 }
520 
521 /*
522  * Setup default configuration.
523  */
524 static void
525 set_default_fwd_lcores_config(void)
526 {
527 	unsigned int i;
528 	unsigned int nb_lc;
529 	unsigned int sock_num;
530 
531 	nb_lc = 0;
532 	for (i = 0; i < RTE_MAX_LCORE; i++) {
533 		if (!rte_lcore_is_enabled(i))
534 			continue;
535 		sock_num = rte_lcore_to_socket_id(i);
536 		if (new_socket_id(sock_num)) {
537 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
538 				rte_exit(EXIT_FAILURE,
539 					 "Total sockets greater than %u\n",
540 					 RTE_MAX_NUMA_NODES);
541 			}
542 			socket_ids[num_sockets++] = sock_num;
543 		}
544 		if (i == rte_get_master_lcore())
545 			continue;
546 		fwd_lcores_cpuids[nb_lc++] = i;
547 	}
548 	nb_lcores = (lcoreid_t) nb_lc;
549 	nb_cfg_lcores = nb_lcores;
550 	nb_fwd_lcores = 1;
551 }
552 
553 static void
554 set_def_peer_eth_addrs(void)
555 {
556 	portid_t i;
557 
558 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
559 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
560 		peer_eth_addrs[i].addr_bytes[5] = i;
561 	}
562 }
563 
564 static void
565 set_default_fwd_ports_config(void)
566 {
567 	portid_t pt_id;
568 	int i = 0;
569 
570 	RTE_ETH_FOREACH_DEV(pt_id) {
571 		fwd_ports_ids[i++] = pt_id;
572 
573 		/* Update sockets info according to the attached device */
574 		int socket_id = rte_eth_dev_socket_id(pt_id);
575 		if (socket_id >= 0 && new_socket_id(socket_id)) {
576 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
577 				rte_exit(EXIT_FAILURE,
578 					 "Total sockets greater than %u\n",
579 					 RTE_MAX_NUMA_NODES);
580 			}
581 			socket_ids[num_sockets++] = socket_id;
582 		}
583 	}
584 
585 	nb_cfg_ports = nb_ports;
586 	nb_fwd_ports = nb_ports;
587 }
588 
589 void
590 set_def_fwd_config(void)
591 {
592 	set_default_fwd_lcores_config();
593 	set_def_peer_eth_addrs();
594 	set_default_fwd_ports_config();
595 }
596 
597 /* extremely pessimistic estimation of memory required to create a mempool */
598 static int
599 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
600 {
601 	unsigned int n_pages, mbuf_per_pg, leftover;
602 	uint64_t total_mem, mbuf_mem, obj_sz;
603 
604 	/* there is no good way to predict how much space the mempool will
605 	 * occupy because it will allocate chunks on the fly, and some of those
606 	 * will come from default DPDK memory while some will come from our
607 	 * external memory, so just assume 128MB will be enough for everyone.
608 	 */
609 	uint64_t hdr_mem = 128 << 20;
610 
611 	/* account for possible non-contiguousness */
612 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
613 	if (obj_sz > pgsz) {
614 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
615 		return -1;
616 	}
617 
618 	mbuf_per_pg = pgsz / obj_sz;
619 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
620 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
621 
622 	mbuf_mem = n_pages * pgsz;
623 
624 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
625 
626 	if (total_mem > SIZE_MAX) {
627 		TESTPMD_LOG(ERR, "Memory size too big\n");
628 		return -1;
629 	}
630 	*out = (size_t)total_mem;
631 
632 	return 0;
633 }
634 
635 static int
636 pagesz_flags(uint64_t page_sz)
637 {
638 	/* as per mmap() manpage, all page sizes are log2 of page size
639 	 * shifted by MAP_HUGE_SHIFT
640 	 */
641 	int log2 = rte_log2_u64(page_sz);
642 
643 	return (log2 << HUGE_SHIFT);
644 }
645 
646 static void *
647 alloc_mem(size_t memsz, size_t pgsz, bool huge)
648 {
649 	void *addr;
650 	int flags;
651 
652 	/* allocate anonymous hugepages */
653 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
654 	if (huge)
655 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
656 
657 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
658 	if (addr == MAP_FAILED)
659 		return NULL;
660 
661 	return addr;
662 }
663 
664 struct extmem_param {
665 	void *addr;
666 	size_t len;
667 	size_t pgsz;
668 	rte_iova_t *iova_table;
669 	unsigned int iova_table_len;
670 };
671 
672 static int
673 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
674 		bool huge)
675 {
676 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
677 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
678 	unsigned int cur_page, n_pages, pgsz_idx;
679 	size_t mem_sz, cur_pgsz;
680 	rte_iova_t *iovas = NULL;
681 	void *addr;
682 	int ret;
683 
684 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
685 		/* skip anything that is too big */
686 		if (pgsizes[pgsz_idx] > SIZE_MAX)
687 			continue;
688 
689 		cur_pgsz = pgsizes[pgsz_idx];
690 
691 		/* if we were told not to allocate hugepages, override */
692 		if (!huge)
693 			cur_pgsz = sysconf(_SC_PAGESIZE);
694 
695 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
696 		if (ret < 0) {
697 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
698 			return -1;
699 		}
700 
701 		/* allocate our memory */
702 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
703 
704 		/* if we couldn't allocate memory with a specified page size,
705 		 * that doesn't mean we can't do it with other page sizes, so
706 		 * try another one.
707 		 */
708 		if (addr == NULL)
709 			continue;
710 
711 		/* store IOVA addresses for every page in this memory area */
712 		n_pages = mem_sz / cur_pgsz;
713 
714 		iovas = malloc(sizeof(*iovas) * n_pages);
715 
716 		if (iovas == NULL) {
717 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
718 			goto fail;
719 		}
720 		/* lock memory if it's not huge pages */
721 		if (!huge)
722 			mlock(addr, mem_sz);
723 
724 		/* populate IOVA addresses */
725 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
726 			rte_iova_t iova;
727 			size_t offset;
728 			void *cur;
729 
730 			offset = cur_pgsz * cur_page;
731 			cur = RTE_PTR_ADD(addr, offset);
732 
733 			/* touch the page before getting its IOVA */
734 			*(volatile char *)cur = 0;
735 
736 			iova = rte_mem_virt2iova(cur);
737 
738 			iovas[cur_page] = iova;
739 		}
740 
741 		break;
742 	}
743 	/* if we couldn't allocate anything */
744 	if (iovas == NULL)
745 		return -1;
746 
747 	param->addr = addr;
748 	param->len = mem_sz;
749 	param->pgsz = cur_pgsz;
750 	param->iova_table = iovas;
751 	param->iova_table_len = n_pages;
752 
753 	return 0;
754 fail:
755 	if (iovas)
756 		free(iovas);
757 	if (addr)
758 		munmap(addr, mem_sz);
759 
760 	return -1;
761 }
762 
763 static int
764 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
765 {
766 	struct extmem_param param;
767 	int socket_id, ret;
768 
769 	memset(&param, 0, sizeof(param));
770 
771 	/* check if our heap exists */
772 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
773 	if (socket_id < 0) {
774 		/* create our heap */
775 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
776 		if (ret < 0) {
777 			TESTPMD_LOG(ERR, "Cannot create heap\n");
778 			return -1;
779 		}
780 	}
781 
782 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
783 	if (ret < 0) {
784 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
785 		return -1;
786 	}
787 
788 	/* we now have a valid memory area, so add it to heap */
789 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
790 			param.addr, param.len, param.iova_table,
791 			param.iova_table_len, param.pgsz);
792 
793 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
794 
795 	/* not needed any more */
796 	free(param.iova_table);
797 
798 	if (ret < 0) {
799 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
800 		munmap(param.addr, param.len);
801 		return -1;
802 	}
803 
804 	/* success */
805 
806 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
807 			param.len >> 20);
808 
809 	return 0;
810 }
811 static void
812 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
813 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
814 {
815 	uint16_t pid = 0;
816 	int ret;
817 
818 	RTE_ETH_FOREACH_DEV(pid) {
819 		struct rte_eth_dev *dev =
820 			&rte_eth_devices[pid];
821 
822 		ret = rte_dev_dma_unmap(dev->device, memhdr->addr, 0,
823 					memhdr->len);
824 		if (ret) {
825 			TESTPMD_LOG(DEBUG,
826 				    "unable to DMA unmap addr 0x%p "
827 				    "for device %s\n",
828 				    memhdr->addr, dev->data->name);
829 		}
830 	}
831 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
832 	if (ret) {
833 		TESTPMD_LOG(DEBUG,
834 			    "unable to un-register addr 0x%p\n", memhdr->addr);
835 	}
836 }
837 
838 static void
839 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
840 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
841 {
842 	uint16_t pid = 0;
843 	size_t page_size = sysconf(_SC_PAGESIZE);
844 	int ret;
845 
846 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
847 				  page_size);
848 	if (ret) {
849 		TESTPMD_LOG(DEBUG,
850 			    "unable to register addr 0x%p\n", memhdr->addr);
851 		return;
852 	}
853 	RTE_ETH_FOREACH_DEV(pid) {
854 		struct rte_eth_dev *dev =
855 			&rte_eth_devices[pid];
856 
857 		ret = rte_dev_dma_map(dev->device, memhdr->addr, 0,
858 				      memhdr->len);
859 		if (ret) {
860 			TESTPMD_LOG(DEBUG,
861 				    "unable to DMA map addr 0x%p "
862 				    "for device %s\n",
863 				    memhdr->addr, dev->data->name);
864 		}
865 	}
866 }
867 
868 /*
869  * Configuration initialisation done once at init time.
870  */
871 static struct rte_mempool *
872 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
873 		 unsigned int socket_id)
874 {
875 	char pool_name[RTE_MEMPOOL_NAMESIZE];
876 	struct rte_mempool *rte_mp = NULL;
877 	uint32_t mb_size;
878 
879 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
880 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name));
881 
882 	TESTPMD_LOG(INFO,
883 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
884 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
885 
886 	switch (mp_alloc_type) {
887 	case MP_ALLOC_NATIVE:
888 		{
889 			/* wrapper to rte_mempool_create() */
890 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
891 					rte_mbuf_best_mempool_ops());
892 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
893 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
894 			break;
895 		}
896 	case MP_ALLOC_ANON:
897 		{
898 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
899 				mb_size, (unsigned int) mb_mempool_cache,
900 				sizeof(struct rte_pktmbuf_pool_private),
901 				socket_id, mempool_flags);
902 			if (rte_mp == NULL)
903 				goto err;
904 
905 			if (rte_mempool_populate_anon(rte_mp) == 0) {
906 				rte_mempool_free(rte_mp);
907 				rte_mp = NULL;
908 				goto err;
909 			}
910 			rte_pktmbuf_pool_init(rte_mp, NULL);
911 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
912 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
913 			break;
914 		}
915 	case MP_ALLOC_XMEM:
916 	case MP_ALLOC_XMEM_HUGE:
917 		{
918 			int heap_socket;
919 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
920 
921 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
922 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
923 
924 			heap_socket =
925 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
926 			if (heap_socket < 0)
927 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
928 
929 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
930 					rte_mbuf_best_mempool_ops());
931 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
932 					mb_mempool_cache, 0, mbuf_seg_size,
933 					heap_socket);
934 			break;
935 		}
936 	default:
937 		{
938 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
939 		}
940 	}
941 
942 err:
943 	if (rte_mp == NULL) {
944 		rte_exit(EXIT_FAILURE,
945 			"Creation of mbuf pool for socket %u failed: %s\n",
946 			socket_id, rte_strerror(rte_errno));
947 	} else if (verbose_level > 0) {
948 		rte_mempool_dump(stdout, rte_mp);
949 	}
950 	return rte_mp;
951 }
952 
953 /*
954  * Check given socket id is valid or not with NUMA mode,
955  * if valid, return 0, else return -1
956  */
957 static int
958 check_socket_id(const unsigned int socket_id)
959 {
960 	static int warning_once = 0;
961 
962 	if (new_socket_id(socket_id)) {
963 		if (!warning_once && numa_support)
964 			printf("Warning: NUMA should be configured manually by"
965 			       " using --port-numa-config and"
966 			       " --ring-numa-config parameters along with"
967 			       " --numa.\n");
968 		warning_once = 1;
969 		return -1;
970 	}
971 	return 0;
972 }
973 
974 /*
975  * Get the allowed maximum number of RX queues.
976  * *pid return the port id which has minimal value of
977  * max_rx_queues in all ports.
978  */
979 queueid_t
980 get_allowed_max_nb_rxq(portid_t *pid)
981 {
982 	queueid_t allowed_max_rxq = MAX_QUEUE_ID;
983 	bool max_rxq_valid = false;
984 	portid_t pi;
985 	struct rte_eth_dev_info dev_info;
986 
987 	RTE_ETH_FOREACH_DEV(pi) {
988 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
989 			continue;
990 
991 		max_rxq_valid = true;
992 		if (dev_info.max_rx_queues < allowed_max_rxq) {
993 			allowed_max_rxq = dev_info.max_rx_queues;
994 			*pid = pi;
995 		}
996 	}
997 	return max_rxq_valid ? allowed_max_rxq : 0;
998 }
999 
1000 /*
1001  * Check input rxq is valid or not.
1002  * If input rxq is not greater than any of maximum number
1003  * of RX queues of all ports, it is valid.
1004  * if valid, return 0, else return -1
1005  */
1006 int
1007 check_nb_rxq(queueid_t rxq)
1008 {
1009 	queueid_t allowed_max_rxq;
1010 	portid_t pid = 0;
1011 
1012 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1013 	if (rxq > allowed_max_rxq) {
1014 		printf("Fail: input rxq (%u) can't be greater "
1015 		       "than max_rx_queues (%u) of port %u\n",
1016 		       rxq,
1017 		       allowed_max_rxq,
1018 		       pid);
1019 		return -1;
1020 	}
1021 	return 0;
1022 }
1023 
1024 /*
1025  * Get the allowed maximum number of TX queues.
1026  * *pid return the port id which has minimal value of
1027  * max_tx_queues in all ports.
1028  */
1029 queueid_t
1030 get_allowed_max_nb_txq(portid_t *pid)
1031 {
1032 	queueid_t allowed_max_txq = MAX_QUEUE_ID;
1033 	bool max_txq_valid = false;
1034 	portid_t pi;
1035 	struct rte_eth_dev_info dev_info;
1036 
1037 	RTE_ETH_FOREACH_DEV(pi) {
1038 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1039 			continue;
1040 
1041 		max_txq_valid = true;
1042 		if (dev_info.max_tx_queues < allowed_max_txq) {
1043 			allowed_max_txq = dev_info.max_tx_queues;
1044 			*pid = pi;
1045 		}
1046 	}
1047 	return max_txq_valid ? allowed_max_txq : 0;
1048 }
1049 
1050 /*
1051  * Check input txq is valid or not.
1052  * If input txq is not greater than any of maximum number
1053  * of TX queues of all ports, it is valid.
1054  * if valid, return 0, else return -1
1055  */
1056 int
1057 check_nb_txq(queueid_t txq)
1058 {
1059 	queueid_t allowed_max_txq;
1060 	portid_t pid = 0;
1061 
1062 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1063 	if (txq > allowed_max_txq) {
1064 		printf("Fail: input txq (%u) can't be greater "
1065 		       "than max_tx_queues (%u) of port %u\n",
1066 		       txq,
1067 		       allowed_max_txq,
1068 		       pid);
1069 		return -1;
1070 	}
1071 	return 0;
1072 }
1073 
1074 /*
1075  * Get the allowed maximum number of hairpin queues.
1076  * *pid return the port id which has minimal value of
1077  * max_hairpin_queues in all ports.
1078  */
1079 queueid_t
1080 get_allowed_max_nb_hairpinq(portid_t *pid)
1081 {
1082 	queueid_t allowed_max_hairpinq = MAX_QUEUE_ID;
1083 	portid_t pi;
1084 	struct rte_eth_hairpin_cap cap;
1085 
1086 	RTE_ETH_FOREACH_DEV(pi) {
1087 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1088 			*pid = pi;
1089 			return 0;
1090 		}
1091 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1092 			allowed_max_hairpinq = cap.max_nb_queues;
1093 			*pid = pi;
1094 		}
1095 	}
1096 	return allowed_max_hairpinq;
1097 }
1098 
1099 /*
1100  * Check input hairpin is valid or not.
1101  * If input hairpin is not greater than any of maximum number
1102  * of hairpin queues of all ports, it is valid.
1103  * if valid, return 0, else return -1
1104  */
1105 int
1106 check_nb_hairpinq(queueid_t hairpinq)
1107 {
1108 	queueid_t allowed_max_hairpinq;
1109 	portid_t pid = 0;
1110 
1111 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1112 	if (hairpinq > allowed_max_hairpinq) {
1113 		printf("Fail: input hairpin (%u) can't be greater "
1114 		       "than max_hairpin_queues (%u) of port %u\n",
1115 		       hairpinq, allowed_max_hairpinq, pid);
1116 		return -1;
1117 	}
1118 	return 0;
1119 }
1120 
1121 static void
1122 init_config(void)
1123 {
1124 	portid_t pid;
1125 	struct rte_port *port;
1126 	struct rte_mempool *mbp;
1127 	unsigned int nb_mbuf_per_pool;
1128 	lcoreid_t  lc_id;
1129 	uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
1130 	struct rte_gro_param gro_param;
1131 	uint32_t gso_types;
1132 	uint16_t data_size;
1133 	bool warning = 0;
1134 	int k;
1135 	int ret;
1136 
1137 	memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
1138 
1139 	/* Configuration of logical cores. */
1140 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1141 				sizeof(struct fwd_lcore *) * nb_lcores,
1142 				RTE_CACHE_LINE_SIZE);
1143 	if (fwd_lcores == NULL) {
1144 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1145 							"failed\n", nb_lcores);
1146 	}
1147 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1148 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1149 					       sizeof(struct fwd_lcore),
1150 					       RTE_CACHE_LINE_SIZE);
1151 		if (fwd_lcores[lc_id] == NULL) {
1152 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1153 								"failed\n");
1154 		}
1155 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1156 	}
1157 
1158 	RTE_ETH_FOREACH_DEV(pid) {
1159 		port = &ports[pid];
1160 		/* Apply default TxRx configuration for all ports */
1161 		port->dev_conf.txmode = tx_mode;
1162 		port->dev_conf.rxmode = rx_mode;
1163 
1164 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1165 		if (ret != 0)
1166 			rte_exit(EXIT_FAILURE,
1167 				 "rte_eth_dev_info_get() failed\n");
1168 
1169 		if (!(port->dev_info.tx_offload_capa &
1170 		      DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1171 			port->dev_conf.txmode.offloads &=
1172 				~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1173 		if (numa_support) {
1174 			if (port_numa[pid] != NUMA_NO_CONFIG)
1175 				port_per_socket[port_numa[pid]]++;
1176 			else {
1177 				uint32_t socket_id = rte_eth_dev_socket_id(pid);
1178 
1179 				/*
1180 				 * if socket_id is invalid,
1181 				 * set to the first available socket.
1182 				 */
1183 				if (check_socket_id(socket_id) < 0)
1184 					socket_id = socket_ids[0];
1185 				port_per_socket[socket_id]++;
1186 			}
1187 		}
1188 
1189 		/* Apply Rx offloads configuration */
1190 		for (k = 0; k < port->dev_info.max_rx_queues; k++)
1191 			port->rx_conf[k].offloads =
1192 				port->dev_conf.rxmode.offloads;
1193 		/* Apply Tx offloads configuration */
1194 		for (k = 0; k < port->dev_info.max_tx_queues; k++)
1195 			port->tx_conf[k].offloads =
1196 				port->dev_conf.txmode.offloads;
1197 
1198 		/* set flag to initialize port/queue */
1199 		port->need_reconfig = 1;
1200 		port->need_reconfig_queues = 1;
1201 		port->tx_metadata = 0;
1202 
1203 		/* Check for maximum number of segments per MTU. Accordingly
1204 		 * update the mbuf data size.
1205 		 */
1206 		if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1207 				port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1208 			data_size = rx_mode.max_rx_pkt_len /
1209 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1210 
1211 			if ((data_size + RTE_PKTMBUF_HEADROOM) >
1212 							mbuf_data_size) {
1213 				mbuf_data_size = data_size +
1214 						 RTE_PKTMBUF_HEADROOM;
1215 				warning = 1;
1216 			}
1217 		}
1218 	}
1219 
1220 	if (warning)
1221 		TESTPMD_LOG(WARNING, "Configured mbuf size %hu\n",
1222 			    mbuf_data_size);
1223 
1224 	/*
1225 	 * Create pools of mbuf.
1226 	 * If NUMA support is disabled, create a single pool of mbuf in
1227 	 * socket 0 memory by default.
1228 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1229 	 *
1230 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1231 	 * nb_txd can be configured at run time.
1232 	 */
1233 	if (param_total_num_mbufs)
1234 		nb_mbuf_per_pool = param_total_num_mbufs;
1235 	else {
1236 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1237 			(nb_lcores * mb_mempool_cache) +
1238 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1239 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1240 	}
1241 
1242 	if (numa_support) {
1243 		uint8_t i;
1244 
1245 		for (i = 0; i < num_sockets; i++)
1246 			mempools[i] = mbuf_pool_create(mbuf_data_size,
1247 						       nb_mbuf_per_pool,
1248 						       socket_ids[i]);
1249 	} else {
1250 		if (socket_num == UMA_NO_CONFIG)
1251 			mempools[0] = mbuf_pool_create(mbuf_data_size,
1252 						       nb_mbuf_per_pool, 0);
1253 		else
1254 			mempools[socket_num] = mbuf_pool_create
1255 							(mbuf_data_size,
1256 							 nb_mbuf_per_pool,
1257 							 socket_num);
1258 	}
1259 
1260 	init_port_config();
1261 
1262 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1263 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1264 	/*
1265 	 * Records which Mbuf pool to use by each logical core, if needed.
1266 	 */
1267 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1268 		mbp = mbuf_pool_find(
1269 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]));
1270 
1271 		if (mbp == NULL)
1272 			mbp = mbuf_pool_find(0);
1273 		fwd_lcores[lc_id]->mbp = mbp;
1274 		/* initialize GSO context */
1275 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1276 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1277 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1278 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1279 			RTE_ETHER_CRC_LEN;
1280 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1281 	}
1282 
1283 	/* Configuration of packet forwarding streams. */
1284 	if (init_fwd_streams() < 0)
1285 		rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
1286 
1287 	fwd_config_setup();
1288 
1289 	/* create a gro context for each lcore */
1290 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1291 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1292 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1293 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1294 		gro_param.socket_id = rte_lcore_to_socket_id(
1295 				fwd_lcores_cpuids[lc_id]);
1296 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1297 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1298 			rte_exit(EXIT_FAILURE,
1299 					"rte_gro_ctx_create() failed\n");
1300 		}
1301 	}
1302 
1303 #if defined RTE_LIBRTE_PMD_SOFTNIC
1304 	if (strcmp(cur_fwd_eng->fwd_mode_name, "softnic") == 0) {
1305 		RTE_ETH_FOREACH_DEV(pid) {
1306 			port = &ports[pid];
1307 			const char *driver = port->dev_info.driver_name;
1308 
1309 			if (strcmp(driver, "net_softnic") == 0)
1310 				port->softport.fwd_lcore_arg = fwd_lcores;
1311 		}
1312 	}
1313 #endif
1314 
1315 }
1316 
1317 
1318 void
1319 reconfig(portid_t new_port_id, unsigned socket_id)
1320 {
1321 	struct rte_port *port;
1322 	int ret;
1323 
1324 	/* Reconfiguration of Ethernet ports. */
1325 	port = &ports[new_port_id];
1326 
1327 	ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
1328 	if (ret != 0)
1329 		return;
1330 
1331 	/* set flag to initialize port/queue */
1332 	port->need_reconfig = 1;
1333 	port->need_reconfig_queues = 1;
1334 	port->socket_id = socket_id;
1335 
1336 	init_port_config();
1337 }
1338 
1339 
1340 int
1341 init_fwd_streams(void)
1342 {
1343 	portid_t pid;
1344 	struct rte_port *port;
1345 	streamid_t sm_id, nb_fwd_streams_new;
1346 	queueid_t q;
1347 
1348 	/* set socket id according to numa or not */
1349 	RTE_ETH_FOREACH_DEV(pid) {
1350 		port = &ports[pid];
1351 		if (nb_rxq > port->dev_info.max_rx_queues) {
1352 			printf("Fail: nb_rxq(%d) is greater than "
1353 				"max_rx_queues(%d)\n", nb_rxq,
1354 				port->dev_info.max_rx_queues);
1355 			return -1;
1356 		}
1357 		if (nb_txq > port->dev_info.max_tx_queues) {
1358 			printf("Fail: nb_txq(%d) is greater than "
1359 				"max_tx_queues(%d)\n", nb_txq,
1360 				port->dev_info.max_tx_queues);
1361 			return -1;
1362 		}
1363 		if (numa_support) {
1364 			if (port_numa[pid] != NUMA_NO_CONFIG)
1365 				port->socket_id = port_numa[pid];
1366 			else {
1367 				port->socket_id = rte_eth_dev_socket_id(pid);
1368 
1369 				/*
1370 				 * if socket_id is invalid,
1371 				 * set to the first available socket.
1372 				 */
1373 				if (check_socket_id(port->socket_id) < 0)
1374 					port->socket_id = socket_ids[0];
1375 			}
1376 		}
1377 		else {
1378 			if (socket_num == UMA_NO_CONFIG)
1379 				port->socket_id = 0;
1380 			else
1381 				port->socket_id = socket_num;
1382 		}
1383 	}
1384 
1385 	q = RTE_MAX(nb_rxq, nb_txq);
1386 	if (q == 0) {
1387 		printf("Fail: Cannot allocate fwd streams as number of queues is 0\n");
1388 		return -1;
1389 	}
1390 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1391 	if (nb_fwd_streams_new == nb_fwd_streams)
1392 		return 0;
1393 	/* clear the old */
1394 	if (fwd_streams != NULL) {
1395 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1396 			if (fwd_streams[sm_id] == NULL)
1397 				continue;
1398 			rte_free(fwd_streams[sm_id]);
1399 			fwd_streams[sm_id] = NULL;
1400 		}
1401 		rte_free(fwd_streams);
1402 		fwd_streams = NULL;
1403 	}
1404 
1405 	/* init new */
1406 	nb_fwd_streams = nb_fwd_streams_new;
1407 	if (nb_fwd_streams) {
1408 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1409 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1410 			RTE_CACHE_LINE_SIZE);
1411 		if (fwd_streams == NULL)
1412 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1413 				 " (struct fwd_stream *)) failed\n",
1414 				 nb_fwd_streams);
1415 
1416 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1417 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1418 				" struct fwd_stream", sizeof(struct fwd_stream),
1419 				RTE_CACHE_LINE_SIZE);
1420 			if (fwd_streams[sm_id] == NULL)
1421 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1422 					 "(struct fwd_stream) failed\n");
1423 		}
1424 	}
1425 
1426 	return 0;
1427 }
1428 
1429 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1430 static void
1431 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1432 {
1433 	unsigned int total_burst;
1434 	unsigned int nb_burst;
1435 	unsigned int burst_stats[3];
1436 	uint16_t pktnb_stats[3];
1437 	uint16_t nb_pkt;
1438 	int burst_percent[3];
1439 
1440 	/*
1441 	 * First compute the total number of packet bursts and the
1442 	 * two highest numbers of bursts of the same number of packets.
1443 	 */
1444 	total_burst = 0;
1445 	burst_stats[0] = burst_stats[1] = burst_stats[2] = 0;
1446 	pktnb_stats[0] = pktnb_stats[1] = pktnb_stats[2] = 0;
1447 	for (nb_pkt = 0; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1448 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1449 		if (nb_burst == 0)
1450 			continue;
1451 		total_burst += nb_burst;
1452 		if (nb_burst > burst_stats[0]) {
1453 			burst_stats[1] = burst_stats[0];
1454 			pktnb_stats[1] = pktnb_stats[0];
1455 			burst_stats[0] = nb_burst;
1456 			pktnb_stats[0] = nb_pkt;
1457 		} else if (nb_burst > burst_stats[1]) {
1458 			burst_stats[1] = nb_burst;
1459 			pktnb_stats[1] = nb_pkt;
1460 		}
1461 	}
1462 	if (total_burst == 0)
1463 		return;
1464 	burst_percent[0] = (burst_stats[0] * 100) / total_burst;
1465 	printf("  %s-bursts : %u [%d%% of %d pkts", rx_tx, total_burst,
1466 	       burst_percent[0], (int) pktnb_stats[0]);
1467 	if (burst_stats[0] == total_burst) {
1468 		printf("]\n");
1469 		return;
1470 	}
1471 	if (burst_stats[0] + burst_stats[1] == total_burst) {
1472 		printf(" + %d%% of %d pkts]\n",
1473 		       100 - burst_percent[0], pktnb_stats[1]);
1474 		return;
1475 	}
1476 	burst_percent[1] = (burst_stats[1] * 100) / total_burst;
1477 	burst_percent[2] = 100 - (burst_percent[0] + burst_percent[1]);
1478 	if ((burst_percent[1] == 0) || (burst_percent[2] == 0)) {
1479 		printf(" + %d%% of others]\n", 100 - burst_percent[0]);
1480 		return;
1481 	}
1482 	printf(" + %d%% of %d pkts + %d%% of others]\n",
1483 	       burst_percent[1], (int) pktnb_stats[1], burst_percent[2]);
1484 }
1485 #endif /* RTE_TEST_PMD_RECORD_BURST_STATS */
1486 
1487 static void
1488 fwd_stream_stats_display(streamid_t stream_id)
1489 {
1490 	struct fwd_stream *fs;
1491 	static const char *fwd_top_stats_border = "-------";
1492 
1493 	fs = fwd_streams[stream_id];
1494 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1495 	    (fs->fwd_dropped == 0))
1496 		return;
1497 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1498 	       "TX Port=%2d/Queue=%2d %s\n",
1499 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1500 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1501 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1502 	       " TX-dropped: %-14"PRIu64,
1503 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1504 
1505 	/* if checksum mode */
1506 	if (cur_fwd_eng == &csum_fwd_engine) {
1507 		printf("  RX- bad IP checksum: %-14"PRIu64
1508 		       "  Rx- bad L4 checksum: %-14"PRIu64
1509 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1510 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1511 			fs->rx_bad_outer_l4_csum);
1512 	} else {
1513 		printf("\n");
1514 	}
1515 
1516 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1517 	pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1518 	pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1519 #endif
1520 }
1521 
1522 void
1523 fwd_stats_display(void)
1524 {
1525 	static const char *fwd_stats_border = "----------------------";
1526 	static const char *acc_stats_border = "+++++++++++++++";
1527 	struct {
1528 		struct fwd_stream *rx_stream;
1529 		struct fwd_stream *tx_stream;
1530 		uint64_t tx_dropped;
1531 		uint64_t rx_bad_ip_csum;
1532 		uint64_t rx_bad_l4_csum;
1533 		uint64_t rx_bad_outer_l4_csum;
1534 	} ports_stats[RTE_MAX_ETHPORTS];
1535 	uint64_t total_rx_dropped = 0;
1536 	uint64_t total_tx_dropped = 0;
1537 	uint64_t total_rx_nombuf = 0;
1538 	struct rte_eth_stats stats;
1539 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1540 	uint64_t fwd_cycles = 0;
1541 #endif
1542 	uint64_t total_recv = 0;
1543 	uint64_t total_xmit = 0;
1544 	struct rte_port *port;
1545 	streamid_t sm_id;
1546 	portid_t pt_id;
1547 	int i;
1548 
1549 	memset(ports_stats, 0, sizeof(ports_stats));
1550 
1551 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1552 		struct fwd_stream *fs = fwd_streams[sm_id];
1553 
1554 		if (cur_fwd_config.nb_fwd_streams >
1555 		    cur_fwd_config.nb_fwd_ports) {
1556 			fwd_stream_stats_display(sm_id);
1557 		} else {
1558 			ports_stats[fs->tx_port].tx_stream = fs;
1559 			ports_stats[fs->rx_port].rx_stream = fs;
1560 		}
1561 
1562 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1563 
1564 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1565 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1566 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1567 				fs->rx_bad_outer_l4_csum;
1568 
1569 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1570 		fwd_cycles += fs->core_cycles;
1571 #endif
1572 	}
1573 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1574 		uint8_t j;
1575 
1576 		pt_id = fwd_ports_ids[i];
1577 		port = &ports[pt_id];
1578 
1579 		rte_eth_stats_get(pt_id, &stats);
1580 		stats.ipackets -= port->stats.ipackets;
1581 		stats.opackets -= port->stats.opackets;
1582 		stats.ibytes -= port->stats.ibytes;
1583 		stats.obytes -= port->stats.obytes;
1584 		stats.imissed -= port->stats.imissed;
1585 		stats.oerrors -= port->stats.oerrors;
1586 		stats.rx_nombuf -= port->stats.rx_nombuf;
1587 
1588 		total_recv += stats.ipackets;
1589 		total_xmit += stats.opackets;
1590 		total_rx_dropped += stats.imissed;
1591 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
1592 		total_tx_dropped += stats.oerrors;
1593 		total_rx_nombuf  += stats.rx_nombuf;
1594 
1595 		printf("\n  %s Forward statistics for port %-2d %s\n",
1596 		       fwd_stats_border, pt_id, fwd_stats_border);
1597 
1598 		if (!port->rx_queue_stats_mapping_enabled &&
1599 		    !port->tx_queue_stats_mapping_enabled) {
1600 			printf("  RX-packets: %-14"PRIu64
1601 			       " RX-dropped: %-14"PRIu64
1602 			       "RX-total: %-"PRIu64"\n",
1603 			       stats.ipackets, stats.imissed,
1604 			       stats.ipackets + stats.imissed);
1605 
1606 			if (cur_fwd_eng == &csum_fwd_engine)
1607 				printf("  Bad-ipcsum: %-14"PRIu64
1608 				       " Bad-l4csum: %-14"PRIu64
1609 				       "Bad-outer-l4csum: %-14"PRIu64"\n",
1610 				       ports_stats[pt_id].rx_bad_ip_csum,
1611 				       ports_stats[pt_id].rx_bad_l4_csum,
1612 				       ports_stats[pt_id].rx_bad_outer_l4_csum);
1613 			if (stats.ierrors + stats.rx_nombuf > 0) {
1614 				printf("  RX-error: %-"PRIu64"\n",
1615 				       stats.ierrors);
1616 				printf("  RX-nombufs: %-14"PRIu64"\n",
1617 				       stats.rx_nombuf);
1618 			}
1619 
1620 			printf("  TX-packets: %-14"PRIu64
1621 			       " TX-dropped: %-14"PRIu64
1622 			       "TX-total: %-"PRIu64"\n",
1623 			       stats.opackets, ports_stats[pt_id].tx_dropped,
1624 			       stats.opackets + ports_stats[pt_id].tx_dropped);
1625 		} else {
1626 			printf("  RX-packets:             %14"PRIu64
1627 			       "    RX-dropped:%14"PRIu64
1628 			       "    RX-total:%14"PRIu64"\n",
1629 			       stats.ipackets, stats.imissed,
1630 			       stats.ipackets + stats.imissed);
1631 
1632 			if (cur_fwd_eng == &csum_fwd_engine)
1633 				printf("  Bad-ipcsum:%14"PRIu64
1634 				       "    Bad-l4csum:%14"PRIu64
1635 				       "    Bad-outer-l4csum: %-14"PRIu64"\n",
1636 				       ports_stats[pt_id].rx_bad_ip_csum,
1637 				       ports_stats[pt_id].rx_bad_l4_csum,
1638 				       ports_stats[pt_id].rx_bad_outer_l4_csum);
1639 			if ((stats.ierrors + stats.rx_nombuf) > 0) {
1640 				printf("  RX-error:%"PRIu64"\n", stats.ierrors);
1641 				printf("  RX-nombufs:             %14"PRIu64"\n",
1642 				       stats.rx_nombuf);
1643 			}
1644 
1645 			printf("  TX-packets:             %14"PRIu64
1646 			       "    TX-dropped:%14"PRIu64
1647 			       "    TX-total:%14"PRIu64"\n",
1648 			       stats.opackets, ports_stats[pt_id].tx_dropped,
1649 			       stats.opackets + ports_stats[pt_id].tx_dropped);
1650 		}
1651 
1652 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1653 		if (ports_stats[pt_id].rx_stream)
1654 			pkt_burst_stats_display("RX",
1655 				&ports_stats[pt_id].rx_stream->rx_burst_stats);
1656 		if (ports_stats[pt_id].tx_stream)
1657 			pkt_burst_stats_display("TX",
1658 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
1659 #endif
1660 
1661 		if (port->rx_queue_stats_mapping_enabled) {
1662 			printf("\n");
1663 			for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1664 				printf("  Stats reg %2d RX-packets:%14"PRIu64
1665 				       "     RX-errors:%14"PRIu64
1666 				       "    RX-bytes:%14"PRIu64"\n",
1667 				       j, stats.q_ipackets[j],
1668 				       stats.q_errors[j], stats.q_ibytes[j]);
1669 			}
1670 			printf("\n");
1671 		}
1672 		if (port->tx_queue_stats_mapping_enabled) {
1673 			for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1674 				printf("  Stats reg %2d TX-packets:%14"PRIu64
1675 				       "                                 TX-bytes:%14"
1676 				       PRIu64"\n",
1677 				       j, stats.q_opackets[j],
1678 				       stats.q_obytes[j]);
1679 			}
1680 		}
1681 
1682 		printf("  %s--------------------------------%s\n",
1683 		       fwd_stats_border, fwd_stats_border);
1684 	}
1685 
1686 	printf("\n  %s Accumulated forward statistics for all ports"
1687 	       "%s\n",
1688 	       acc_stats_border, acc_stats_border);
1689 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1690 	       "%-"PRIu64"\n"
1691 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1692 	       "%-"PRIu64"\n",
1693 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1694 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1695 	if (total_rx_nombuf > 0)
1696 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1697 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1698 	       "%s\n",
1699 	       acc_stats_border, acc_stats_border);
1700 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1701 	if (total_recv > 0)
1702 		printf("\n  CPU cycles/packet=%u (total cycles="
1703 		       "%"PRIu64" / total RX packets=%"PRIu64")\n",
1704 		       (unsigned int)(fwd_cycles / total_recv),
1705 		       fwd_cycles, total_recv);
1706 #endif
1707 }
1708 
1709 void
1710 fwd_stats_reset(void)
1711 {
1712 	streamid_t sm_id;
1713 	portid_t pt_id;
1714 	int i;
1715 
1716 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1717 		pt_id = fwd_ports_ids[i];
1718 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
1719 	}
1720 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1721 		struct fwd_stream *fs = fwd_streams[sm_id];
1722 
1723 		fs->rx_packets = 0;
1724 		fs->tx_packets = 0;
1725 		fs->fwd_dropped = 0;
1726 		fs->rx_bad_ip_csum = 0;
1727 		fs->rx_bad_l4_csum = 0;
1728 		fs->rx_bad_outer_l4_csum = 0;
1729 
1730 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
1731 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
1732 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
1733 #endif
1734 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
1735 		fs->core_cycles = 0;
1736 #endif
1737 	}
1738 }
1739 
1740 static void
1741 flush_fwd_rx_queues(void)
1742 {
1743 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1744 	portid_t  rxp;
1745 	portid_t port_id;
1746 	queueid_t rxq;
1747 	uint16_t  nb_rx;
1748 	uint16_t  i;
1749 	uint8_t   j;
1750 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
1751 	uint64_t timer_period;
1752 
1753 	/* convert to number of cycles */
1754 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
1755 
1756 	for (j = 0; j < 2; j++) {
1757 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
1758 			for (rxq = 0; rxq < nb_rxq; rxq++) {
1759 				port_id = fwd_ports_ids[rxp];
1760 				/**
1761 				* testpmd can stuck in the below do while loop
1762 				* if rte_eth_rx_burst() always returns nonzero
1763 				* packets. So timer is added to exit this loop
1764 				* after 1sec timer expiry.
1765 				*/
1766 				prev_tsc = rte_rdtsc();
1767 				do {
1768 					nb_rx = rte_eth_rx_burst(port_id, rxq,
1769 						pkts_burst, MAX_PKT_BURST);
1770 					for (i = 0; i < nb_rx; i++)
1771 						rte_pktmbuf_free(pkts_burst[i]);
1772 
1773 					cur_tsc = rte_rdtsc();
1774 					diff_tsc = cur_tsc - prev_tsc;
1775 					timer_tsc += diff_tsc;
1776 				} while ((nb_rx > 0) &&
1777 					(timer_tsc < timer_period));
1778 				timer_tsc = 0;
1779 			}
1780 		}
1781 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
1782 	}
1783 }
1784 
1785 static void
1786 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
1787 {
1788 	struct fwd_stream **fsm;
1789 	streamid_t nb_fs;
1790 	streamid_t sm_id;
1791 #ifdef RTE_LIBRTE_BITRATE
1792 	uint64_t tics_per_1sec;
1793 	uint64_t tics_datum;
1794 	uint64_t tics_current;
1795 	uint16_t i, cnt_ports;
1796 
1797 	cnt_ports = nb_ports;
1798 	tics_datum = rte_rdtsc();
1799 	tics_per_1sec = rte_get_timer_hz();
1800 #endif
1801 	fsm = &fwd_streams[fc->stream_idx];
1802 	nb_fs = fc->stream_nb;
1803 	do {
1804 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
1805 			(*pkt_fwd)(fsm[sm_id]);
1806 #ifdef RTE_LIBRTE_BITRATE
1807 		if (bitrate_enabled != 0 &&
1808 				bitrate_lcore_id == rte_lcore_id()) {
1809 			tics_current = rte_rdtsc();
1810 			if (tics_current - tics_datum >= tics_per_1sec) {
1811 				/* Periodic bitrate calculation */
1812 				for (i = 0; i < cnt_ports; i++)
1813 					rte_stats_bitrate_calc(bitrate_data,
1814 						ports_ids[i]);
1815 				tics_datum = tics_current;
1816 			}
1817 		}
1818 #endif
1819 #ifdef RTE_LIBRTE_LATENCY_STATS
1820 		if (latencystats_enabled != 0 &&
1821 				latencystats_lcore_id == rte_lcore_id())
1822 			rte_latencystats_update();
1823 #endif
1824 
1825 	} while (! fc->stopped);
1826 }
1827 
1828 static int
1829 start_pkt_forward_on_core(void *fwd_arg)
1830 {
1831 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
1832 			     cur_fwd_config.fwd_eng->packet_fwd);
1833 	return 0;
1834 }
1835 
1836 /*
1837  * Run the TXONLY packet forwarding engine to send a single burst of packets.
1838  * Used to start communication flows in network loopback test configurations.
1839  */
1840 static int
1841 run_one_txonly_burst_on_core(void *fwd_arg)
1842 {
1843 	struct fwd_lcore *fwd_lc;
1844 	struct fwd_lcore tmp_lcore;
1845 
1846 	fwd_lc = (struct fwd_lcore *) fwd_arg;
1847 	tmp_lcore = *fwd_lc;
1848 	tmp_lcore.stopped = 1;
1849 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
1850 	return 0;
1851 }
1852 
1853 /*
1854  * Launch packet forwarding:
1855  *     - Setup per-port forwarding context.
1856  *     - launch logical cores with their forwarding configuration.
1857  */
1858 static void
1859 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
1860 {
1861 	port_fwd_begin_t port_fwd_begin;
1862 	unsigned int i;
1863 	unsigned int lc_id;
1864 	int diag;
1865 
1866 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
1867 	if (port_fwd_begin != NULL) {
1868 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
1869 			(*port_fwd_begin)(fwd_ports_ids[i]);
1870 	}
1871 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
1872 		lc_id = fwd_lcores_cpuids[i];
1873 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
1874 			fwd_lcores[i]->stopped = 0;
1875 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
1876 						     fwd_lcores[i], lc_id);
1877 			if (diag != 0)
1878 				printf("launch lcore %u failed - diag=%d\n",
1879 				       lc_id, diag);
1880 		}
1881 	}
1882 }
1883 
1884 /*
1885  * Launch packet forwarding configuration.
1886  */
1887 void
1888 start_packet_forwarding(int with_tx_first)
1889 {
1890 	port_fwd_begin_t port_fwd_begin;
1891 	port_fwd_end_t  port_fwd_end;
1892 	struct rte_port *port;
1893 	unsigned int i;
1894 	portid_t   pt_id;
1895 
1896 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
1897 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
1898 
1899 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
1900 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
1901 
1902 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
1903 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
1904 		(!nb_rxq || !nb_txq))
1905 		rte_exit(EXIT_FAILURE,
1906 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
1907 			cur_fwd_eng->fwd_mode_name);
1908 
1909 	if (all_ports_started() == 0) {
1910 		printf("Not all ports were started\n");
1911 		return;
1912 	}
1913 	if (test_done == 0) {
1914 		printf("Packet forwarding already started\n");
1915 		return;
1916 	}
1917 
1918 
1919 	if(dcb_test) {
1920 		for (i = 0; i < nb_fwd_ports; i++) {
1921 			pt_id = fwd_ports_ids[i];
1922 			port = &ports[pt_id];
1923 			if (!port->dcb_flag) {
1924 				printf("In DCB mode, all forwarding ports must "
1925                                        "be configured in this mode.\n");
1926 				return;
1927 			}
1928 		}
1929 		if (nb_fwd_lcores == 1) {
1930 			printf("In DCB mode,the nb forwarding cores "
1931                                "should be larger than 1.\n");
1932 			return;
1933 		}
1934 	}
1935 	test_done = 0;
1936 
1937 	fwd_config_setup();
1938 
1939 	if(!no_flush_rx)
1940 		flush_fwd_rx_queues();
1941 
1942 	pkt_fwd_config_display(&cur_fwd_config);
1943 	rxtx_config_display();
1944 
1945 	fwd_stats_reset();
1946 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1947 		pt_id = fwd_ports_ids[i];
1948 		port = &ports[pt_id];
1949 		map_port_queue_stats_mapping_registers(pt_id, port);
1950 	}
1951 	if (with_tx_first) {
1952 		port_fwd_begin = tx_only_engine.port_fwd_begin;
1953 		if (port_fwd_begin != NULL) {
1954 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
1955 				(*port_fwd_begin)(fwd_ports_ids[i]);
1956 		}
1957 		while (with_tx_first--) {
1958 			launch_packet_forwarding(
1959 					run_one_txonly_burst_on_core);
1960 			rte_eal_mp_wait_lcore();
1961 		}
1962 		port_fwd_end = tx_only_engine.port_fwd_end;
1963 		if (port_fwd_end != NULL) {
1964 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
1965 				(*port_fwd_end)(fwd_ports_ids[i]);
1966 		}
1967 	}
1968 	launch_packet_forwarding(start_pkt_forward_on_core);
1969 }
1970 
1971 void
1972 stop_packet_forwarding(void)
1973 {
1974 	port_fwd_end_t port_fwd_end;
1975 	lcoreid_t lc_id;
1976 	portid_t pt_id;
1977 	int i;
1978 
1979 	if (test_done) {
1980 		printf("Packet forwarding not started\n");
1981 		return;
1982 	}
1983 	printf("Telling cores to stop...");
1984 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
1985 		fwd_lcores[lc_id]->stopped = 1;
1986 	printf("\nWaiting for lcores to finish...\n");
1987 	rte_eal_mp_wait_lcore();
1988 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
1989 	if (port_fwd_end != NULL) {
1990 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1991 			pt_id = fwd_ports_ids[i];
1992 			(*port_fwd_end)(pt_id);
1993 		}
1994 	}
1995 
1996 	fwd_stats_display();
1997 
1998 	printf("\nDone.\n");
1999 	test_done = 1;
2000 }
2001 
2002 void
2003 dev_set_link_up(portid_t pid)
2004 {
2005 	if (rte_eth_dev_set_link_up(pid) < 0)
2006 		printf("\nSet link up fail.\n");
2007 }
2008 
2009 void
2010 dev_set_link_down(portid_t pid)
2011 {
2012 	if (rte_eth_dev_set_link_down(pid) < 0)
2013 		printf("\nSet link down fail.\n");
2014 }
2015 
2016 static int
2017 all_ports_started(void)
2018 {
2019 	portid_t pi;
2020 	struct rte_port *port;
2021 
2022 	RTE_ETH_FOREACH_DEV(pi) {
2023 		port = &ports[pi];
2024 		/* Check if there is a port which is not started */
2025 		if ((port->port_status != RTE_PORT_STARTED) &&
2026 			(port->slave_flag == 0))
2027 			return 0;
2028 	}
2029 
2030 	/* No port is not started */
2031 	return 1;
2032 }
2033 
2034 int
2035 port_is_stopped(portid_t port_id)
2036 {
2037 	struct rte_port *port = &ports[port_id];
2038 
2039 	if ((port->port_status != RTE_PORT_STOPPED) &&
2040 	    (port->slave_flag == 0))
2041 		return 0;
2042 	return 1;
2043 }
2044 
2045 int
2046 all_ports_stopped(void)
2047 {
2048 	portid_t pi;
2049 
2050 	RTE_ETH_FOREACH_DEV(pi) {
2051 		if (!port_is_stopped(pi))
2052 			return 0;
2053 	}
2054 
2055 	return 1;
2056 }
2057 
2058 int
2059 port_is_started(portid_t port_id)
2060 {
2061 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2062 		return 0;
2063 
2064 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2065 		return 0;
2066 
2067 	return 1;
2068 }
2069 
2070 /* Configure the Rx and Tx hairpin queues for the selected port. */
2071 static int
2072 setup_hairpin_queues(portid_t pi)
2073 {
2074 	queueid_t qi;
2075 	struct rte_eth_hairpin_conf hairpin_conf = {
2076 		.peer_count = 1,
2077 	};
2078 	int i;
2079 	int diag;
2080 	struct rte_port *port = &ports[pi];
2081 
2082 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2083 		hairpin_conf.peers[0].port = pi;
2084 		hairpin_conf.peers[0].queue = i + nb_rxq;
2085 		diag = rte_eth_tx_hairpin_queue_setup
2086 			(pi, qi, nb_txd, &hairpin_conf);
2087 		i++;
2088 		if (diag == 0)
2089 			continue;
2090 
2091 		/* Fail to setup rx queue, return */
2092 		if (rte_atomic16_cmpset(&(port->port_status),
2093 					RTE_PORT_HANDLING,
2094 					RTE_PORT_STOPPED) == 0)
2095 			printf("Port %d can not be set back "
2096 					"to stopped\n", pi);
2097 		printf("Fail to configure port %d hairpin "
2098 				"queues\n", pi);
2099 		/* try to reconfigure queues next time */
2100 		port->need_reconfig_queues = 1;
2101 		return -1;
2102 	}
2103 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2104 		hairpin_conf.peers[0].port = pi;
2105 		hairpin_conf.peers[0].queue = i + nb_txq;
2106 		diag = rte_eth_rx_hairpin_queue_setup
2107 			(pi, qi, nb_rxd, &hairpin_conf);
2108 		i++;
2109 		if (diag == 0)
2110 			continue;
2111 
2112 		/* Fail to setup rx queue, return */
2113 		if (rte_atomic16_cmpset(&(port->port_status),
2114 					RTE_PORT_HANDLING,
2115 					RTE_PORT_STOPPED) == 0)
2116 			printf("Port %d can not be set back "
2117 					"to stopped\n", pi);
2118 		printf("Fail to configure port %d hairpin "
2119 				"queues\n", pi);
2120 		/* try to reconfigure queues next time */
2121 		port->need_reconfig_queues = 1;
2122 		return -1;
2123 	}
2124 	return 0;
2125 }
2126 
2127 int
2128 start_port(portid_t pid)
2129 {
2130 	int diag, need_check_link_status = -1;
2131 	portid_t pi;
2132 	queueid_t qi;
2133 	struct rte_port *port;
2134 	struct rte_ether_addr mac_addr;
2135 	struct rte_eth_hairpin_cap cap;
2136 
2137 	if (port_id_is_invalid(pid, ENABLED_WARN))
2138 		return 0;
2139 
2140 	if(dcb_config)
2141 		dcb_test = 1;
2142 	RTE_ETH_FOREACH_DEV(pi) {
2143 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2144 			continue;
2145 
2146 		need_check_link_status = 0;
2147 		port = &ports[pi];
2148 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2149 						 RTE_PORT_HANDLING) == 0) {
2150 			printf("Port %d is now not stopped\n", pi);
2151 			continue;
2152 		}
2153 
2154 		if (port->need_reconfig > 0) {
2155 			port->need_reconfig = 0;
2156 
2157 			if (flow_isolate_all) {
2158 				int ret = port_flow_isolate(pi, 1);
2159 				if (ret) {
2160 					printf("Failed to apply isolated"
2161 					       " mode on port %d\n", pi);
2162 					return -1;
2163 				}
2164 			}
2165 			configure_rxtx_dump_callbacks(0);
2166 			printf("Configuring Port %d (socket %u)\n", pi,
2167 					port->socket_id);
2168 			if (nb_hairpinq > 0 &&
2169 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2170 				printf("Port %d doesn't support hairpin "
2171 				       "queues\n", pi);
2172 				return -1;
2173 			}
2174 			/* configure port */
2175 			diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2176 						     nb_txq + nb_hairpinq,
2177 						     &(port->dev_conf));
2178 			if (diag != 0) {
2179 				if (rte_atomic16_cmpset(&(port->port_status),
2180 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2181 					printf("Port %d can not be set back "
2182 							"to stopped\n", pi);
2183 				printf("Fail to configure port %d\n", pi);
2184 				/* try to reconfigure port next time */
2185 				port->need_reconfig = 1;
2186 				return -1;
2187 			}
2188 		}
2189 		if (port->need_reconfig_queues > 0) {
2190 			port->need_reconfig_queues = 0;
2191 			/* setup tx queues */
2192 			for (qi = 0; qi < nb_txq; qi++) {
2193 				if ((numa_support) &&
2194 					(txring_numa[pi] != NUMA_NO_CONFIG))
2195 					diag = rte_eth_tx_queue_setup(pi, qi,
2196 						port->nb_tx_desc[qi],
2197 						txring_numa[pi],
2198 						&(port->tx_conf[qi]));
2199 				else
2200 					diag = rte_eth_tx_queue_setup(pi, qi,
2201 						port->nb_tx_desc[qi],
2202 						port->socket_id,
2203 						&(port->tx_conf[qi]));
2204 
2205 				if (diag == 0)
2206 					continue;
2207 
2208 				/* Fail to setup tx queue, return */
2209 				if (rte_atomic16_cmpset(&(port->port_status),
2210 							RTE_PORT_HANDLING,
2211 							RTE_PORT_STOPPED) == 0)
2212 					printf("Port %d can not be set back "
2213 							"to stopped\n", pi);
2214 				printf("Fail to configure port %d tx queues\n",
2215 				       pi);
2216 				/* try to reconfigure queues next time */
2217 				port->need_reconfig_queues = 1;
2218 				return -1;
2219 			}
2220 			for (qi = 0; qi < nb_rxq; qi++) {
2221 				/* setup rx queues */
2222 				if ((numa_support) &&
2223 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2224 					struct rte_mempool * mp =
2225 						mbuf_pool_find(rxring_numa[pi]);
2226 					if (mp == NULL) {
2227 						printf("Failed to setup RX queue:"
2228 							"No mempool allocation"
2229 							" on the socket %d\n",
2230 							rxring_numa[pi]);
2231 						return -1;
2232 					}
2233 
2234 					diag = rte_eth_rx_queue_setup(pi, qi,
2235 					     port->nb_rx_desc[qi],
2236 					     rxring_numa[pi],
2237 					     &(port->rx_conf[qi]),
2238 					     mp);
2239 				} else {
2240 					struct rte_mempool *mp =
2241 						mbuf_pool_find(port->socket_id);
2242 					if (mp == NULL) {
2243 						printf("Failed to setup RX queue:"
2244 							"No mempool allocation"
2245 							" on the socket %d\n",
2246 							port->socket_id);
2247 						return -1;
2248 					}
2249 					diag = rte_eth_rx_queue_setup(pi, qi,
2250 					     port->nb_rx_desc[qi],
2251 					     port->socket_id,
2252 					     &(port->rx_conf[qi]),
2253 					     mp);
2254 				}
2255 				if (diag == 0)
2256 					continue;
2257 
2258 				/* Fail to setup rx queue, return */
2259 				if (rte_atomic16_cmpset(&(port->port_status),
2260 							RTE_PORT_HANDLING,
2261 							RTE_PORT_STOPPED) == 0)
2262 					printf("Port %d can not be set back "
2263 							"to stopped\n", pi);
2264 				printf("Fail to configure port %d rx queues\n",
2265 				       pi);
2266 				/* try to reconfigure queues next time */
2267 				port->need_reconfig_queues = 1;
2268 				return -1;
2269 			}
2270 			/* setup hairpin queues */
2271 			if (setup_hairpin_queues(pi) != 0)
2272 				return -1;
2273 		}
2274 		configure_rxtx_dump_callbacks(verbose_level);
2275 		if (clear_ptypes) {
2276 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2277 					NULL, 0);
2278 			if (diag < 0)
2279 				printf(
2280 				"Port %d: Failed to disable Ptype parsing\n",
2281 				pi);
2282 		}
2283 
2284 		/* start port */
2285 		if (rte_eth_dev_start(pi) < 0) {
2286 			printf("Fail to start port %d\n", pi);
2287 
2288 			/* Fail to setup rx queue, return */
2289 			if (rte_atomic16_cmpset(&(port->port_status),
2290 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2291 				printf("Port %d can not be set back to "
2292 							"stopped\n", pi);
2293 			continue;
2294 		}
2295 
2296 		if (rte_atomic16_cmpset(&(port->port_status),
2297 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2298 			printf("Port %d can not be set into started\n", pi);
2299 
2300 		if (eth_macaddr_get_print_err(pi, &mac_addr) == 0)
2301 			printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2302 				mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
2303 				mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
2304 				mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]);
2305 
2306 		/* at least one port started, need checking link status */
2307 		need_check_link_status = 1;
2308 	}
2309 
2310 	if (need_check_link_status == 1 && !no_link_check)
2311 		check_all_ports_link_status(RTE_PORT_ALL);
2312 	else if (need_check_link_status == 0)
2313 		printf("Please stop the ports first\n");
2314 
2315 	printf("Done\n");
2316 	return 0;
2317 }
2318 
2319 void
2320 stop_port(portid_t pid)
2321 {
2322 	portid_t pi;
2323 	struct rte_port *port;
2324 	int need_check_link_status = 0;
2325 
2326 	if (dcb_test) {
2327 		dcb_test = 0;
2328 		dcb_config = 0;
2329 	}
2330 
2331 	if (port_id_is_invalid(pid, ENABLED_WARN))
2332 		return;
2333 
2334 	printf("Stopping ports...\n");
2335 
2336 	RTE_ETH_FOREACH_DEV(pi) {
2337 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2338 			continue;
2339 
2340 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2341 			printf("Please remove port %d from forwarding configuration.\n", pi);
2342 			continue;
2343 		}
2344 
2345 		if (port_is_bonding_slave(pi)) {
2346 			printf("Please remove port %d from bonded device.\n", pi);
2347 			continue;
2348 		}
2349 
2350 		port = &ports[pi];
2351 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2352 						RTE_PORT_HANDLING) == 0)
2353 			continue;
2354 
2355 		rte_eth_dev_stop(pi);
2356 
2357 		if (rte_atomic16_cmpset(&(port->port_status),
2358 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2359 			printf("Port %d can not be set into stopped\n", pi);
2360 		need_check_link_status = 1;
2361 	}
2362 	if (need_check_link_status && !no_link_check)
2363 		check_all_ports_link_status(RTE_PORT_ALL);
2364 
2365 	printf("Done\n");
2366 }
2367 
2368 static void
2369 remove_invalid_ports_in(portid_t *array, portid_t *total)
2370 {
2371 	portid_t i;
2372 	portid_t new_total = 0;
2373 
2374 	for (i = 0; i < *total; i++)
2375 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2376 			array[new_total] = array[i];
2377 			new_total++;
2378 		}
2379 	*total = new_total;
2380 }
2381 
2382 static void
2383 remove_invalid_ports(void)
2384 {
2385 	remove_invalid_ports_in(ports_ids, &nb_ports);
2386 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2387 	nb_cfg_ports = nb_fwd_ports;
2388 }
2389 
2390 void
2391 close_port(portid_t pid)
2392 {
2393 	portid_t pi;
2394 	struct rte_port *port;
2395 
2396 	if (port_id_is_invalid(pid, ENABLED_WARN))
2397 		return;
2398 
2399 	printf("Closing ports...\n");
2400 
2401 	RTE_ETH_FOREACH_DEV(pi) {
2402 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2403 			continue;
2404 
2405 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2406 			printf("Please remove port %d from forwarding configuration.\n", pi);
2407 			continue;
2408 		}
2409 
2410 		if (port_is_bonding_slave(pi)) {
2411 			printf("Please remove port %d from bonded device.\n", pi);
2412 			continue;
2413 		}
2414 
2415 		port = &ports[pi];
2416 		if (rte_atomic16_cmpset(&(port->port_status),
2417 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2418 			printf("Port %d is already closed\n", pi);
2419 			continue;
2420 		}
2421 
2422 		if (rte_atomic16_cmpset(&(port->port_status),
2423 			RTE_PORT_STOPPED, RTE_PORT_HANDLING) == 0) {
2424 			printf("Port %d is now not stopped\n", pi);
2425 			continue;
2426 		}
2427 
2428 		if (port->flow_list)
2429 			port_flow_flush(pi);
2430 		rte_eth_dev_close(pi);
2431 
2432 		remove_invalid_ports();
2433 
2434 		if (rte_atomic16_cmpset(&(port->port_status),
2435 			RTE_PORT_HANDLING, RTE_PORT_CLOSED) == 0)
2436 			printf("Port %d cannot be set to closed\n", pi);
2437 	}
2438 
2439 	printf("Done\n");
2440 }
2441 
2442 void
2443 reset_port(portid_t pid)
2444 {
2445 	int diag;
2446 	portid_t pi;
2447 	struct rte_port *port;
2448 
2449 	if (port_id_is_invalid(pid, ENABLED_WARN))
2450 		return;
2451 
2452 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2453 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2454 		printf("Can not reset port(s), please stop port(s) first.\n");
2455 		return;
2456 	}
2457 
2458 	printf("Resetting ports...\n");
2459 
2460 	RTE_ETH_FOREACH_DEV(pi) {
2461 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2462 			continue;
2463 
2464 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2465 			printf("Please remove port %d from forwarding "
2466 			       "configuration.\n", pi);
2467 			continue;
2468 		}
2469 
2470 		if (port_is_bonding_slave(pi)) {
2471 			printf("Please remove port %d from bonded device.\n",
2472 			       pi);
2473 			continue;
2474 		}
2475 
2476 		diag = rte_eth_dev_reset(pi);
2477 		if (diag == 0) {
2478 			port = &ports[pi];
2479 			port->need_reconfig = 1;
2480 			port->need_reconfig_queues = 1;
2481 		} else {
2482 			printf("Failed to reset port %d. diag=%d\n", pi, diag);
2483 		}
2484 	}
2485 
2486 	printf("Done\n");
2487 }
2488 
2489 void
2490 attach_port(char *identifier)
2491 {
2492 	portid_t pi;
2493 	struct rte_dev_iterator iterator;
2494 
2495 	printf("Attaching a new port...\n");
2496 
2497 	if (identifier == NULL) {
2498 		printf("Invalid parameters are specified\n");
2499 		return;
2500 	}
2501 
2502 	if (rte_dev_probe(identifier) < 0) {
2503 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2504 		return;
2505 	}
2506 
2507 	/* first attach mode: event */
2508 	if (setup_on_probe_event) {
2509 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
2510 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2511 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
2512 					ports[pi].need_setup != 0)
2513 				setup_attached_port(pi);
2514 		return;
2515 	}
2516 
2517 	/* second attach mode: iterator */
2518 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2519 		/* setup ports matching the devargs used for probing */
2520 		if (port_is_forwarding(pi))
2521 			continue; /* port was already attached before */
2522 		setup_attached_port(pi);
2523 	}
2524 }
2525 
2526 static void
2527 setup_attached_port(portid_t pi)
2528 {
2529 	unsigned int socket_id;
2530 	int ret;
2531 
2532 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2533 	/* if socket_id is invalid, set to the first available socket. */
2534 	if (check_socket_id(socket_id) < 0)
2535 		socket_id = socket_ids[0];
2536 	reconfig(pi, socket_id);
2537 	ret = rte_eth_promiscuous_enable(pi);
2538 	if (ret != 0)
2539 		printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
2540 			pi, rte_strerror(-ret));
2541 
2542 	ports_ids[nb_ports++] = pi;
2543 	fwd_ports_ids[nb_fwd_ports++] = pi;
2544 	nb_cfg_ports = nb_fwd_ports;
2545 	ports[pi].need_setup = 0;
2546 	ports[pi].port_status = RTE_PORT_STOPPED;
2547 
2548 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2549 	printf("Done\n");
2550 }
2551 
2552 void
2553 detach_port_device(portid_t port_id)
2554 {
2555 	struct rte_device *dev;
2556 	portid_t sibling;
2557 
2558 	printf("Removing a device...\n");
2559 
2560 	dev = rte_eth_devices[port_id].device;
2561 	if (dev == NULL) {
2562 		printf("Device already removed\n");
2563 		return;
2564 	}
2565 
2566 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2567 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2568 			printf("Port not stopped\n");
2569 			return;
2570 		}
2571 		printf("Port was not closed\n");
2572 		if (ports[port_id].flow_list)
2573 			port_flow_flush(port_id);
2574 	}
2575 
2576 	if (rte_dev_remove(dev) < 0) {
2577 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2578 		return;
2579 	}
2580 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2581 		/* reset mapping between old ports and removed device */
2582 		rte_eth_devices[sibling].device = NULL;
2583 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2584 			/* sibling ports are forced to be closed */
2585 			ports[sibling].port_status = RTE_PORT_CLOSED;
2586 			printf("Port %u is closed\n", sibling);
2587 		}
2588 	}
2589 
2590 	remove_invalid_ports();
2591 
2592 	printf("Device of port %u is detached\n", port_id);
2593 	printf("Now total ports is %d\n", nb_ports);
2594 	printf("Done\n");
2595 	return;
2596 }
2597 
2598 void
2599 detach_device(char *identifier)
2600 {
2601 	struct rte_dev_iterator iterator;
2602 	struct rte_devargs da;
2603 	portid_t port_id;
2604 
2605 	printf("Removing a device...\n");
2606 
2607 	memset(&da, 0, sizeof(da));
2608 	if (rte_devargs_parsef(&da, "%s", identifier)) {
2609 		printf("cannot parse identifier\n");
2610 		if (da.args)
2611 			free(da.args);
2612 		return;
2613 	}
2614 
2615 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
2616 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2617 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2618 				printf("Port %u not stopped\n", port_id);
2619 				return;
2620 			}
2621 
2622 			/* sibling ports are forced to be closed */
2623 			if (ports[port_id].flow_list)
2624 				port_flow_flush(port_id);
2625 			ports[port_id].port_status = RTE_PORT_CLOSED;
2626 			printf("Port %u is now closed\n", port_id);
2627 		}
2628 	}
2629 
2630 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
2631 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
2632 			    da.name, da.bus->name);
2633 		return;
2634 	}
2635 
2636 	remove_invalid_ports();
2637 
2638 	printf("Device %s is detached\n", identifier);
2639 	printf("Now total ports is %d\n", nb_ports);
2640 	printf("Done\n");
2641 }
2642 
2643 void
2644 pmd_test_exit(void)
2645 {
2646 	portid_t pt_id;
2647 	int ret;
2648 	int i;
2649 
2650 	if (test_done == 0)
2651 		stop_packet_forwarding();
2652 
2653 	for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
2654 		if (mempools[i]) {
2655 			if (mp_alloc_type == MP_ALLOC_ANON)
2656 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
2657 						     NULL);
2658 		}
2659 	}
2660 	if (ports != NULL) {
2661 		no_link_check = 1;
2662 		RTE_ETH_FOREACH_DEV(pt_id) {
2663 			printf("\nStopping port %d...\n", pt_id);
2664 			fflush(stdout);
2665 			stop_port(pt_id);
2666 		}
2667 		RTE_ETH_FOREACH_DEV(pt_id) {
2668 			printf("\nShutting down port %d...\n", pt_id);
2669 			fflush(stdout);
2670 			close_port(pt_id);
2671 		}
2672 	}
2673 
2674 	if (hot_plug) {
2675 		ret = rte_dev_event_monitor_stop();
2676 		if (ret) {
2677 			RTE_LOG(ERR, EAL,
2678 				"fail to stop device event monitor.");
2679 			return;
2680 		}
2681 
2682 		ret = rte_dev_event_callback_unregister(NULL,
2683 			dev_event_callback, NULL);
2684 		if (ret < 0) {
2685 			RTE_LOG(ERR, EAL,
2686 				"fail to unregister device event callback.\n");
2687 			return;
2688 		}
2689 
2690 		ret = rte_dev_hotplug_handle_disable();
2691 		if (ret) {
2692 			RTE_LOG(ERR, EAL,
2693 				"fail to disable hotplug handling.\n");
2694 			return;
2695 		}
2696 	}
2697 	for (i = 0 ; i < RTE_MAX_NUMA_NODES ; i++) {
2698 		if (mempools[i])
2699 			rte_mempool_free(mempools[i]);
2700 	}
2701 
2702 	printf("\nBye...\n");
2703 }
2704 
2705 typedef void (*cmd_func_t)(void);
2706 struct pmd_test_command {
2707 	const char *cmd_name;
2708 	cmd_func_t cmd_func;
2709 };
2710 
2711 #define PMD_TEST_CMD_NB (sizeof(pmd_test_menu) / sizeof(pmd_test_menu[0]))
2712 
2713 /* Check the link status of all ports in up to 9s, and print them finally */
2714 static void
2715 check_all_ports_link_status(uint32_t port_mask)
2716 {
2717 #define CHECK_INTERVAL 100 /* 100ms */
2718 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
2719 	portid_t portid;
2720 	uint8_t count, all_ports_up, print_flag = 0;
2721 	struct rte_eth_link link;
2722 	int ret;
2723 
2724 	printf("Checking link statuses...\n");
2725 	fflush(stdout);
2726 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
2727 		all_ports_up = 1;
2728 		RTE_ETH_FOREACH_DEV(portid) {
2729 			if ((port_mask & (1 << portid)) == 0)
2730 				continue;
2731 			memset(&link, 0, sizeof(link));
2732 			ret = rte_eth_link_get_nowait(portid, &link);
2733 			if (ret < 0) {
2734 				all_ports_up = 0;
2735 				if (print_flag == 1)
2736 					printf("Port %u link get failed: %s\n",
2737 						portid, rte_strerror(-ret));
2738 				continue;
2739 			}
2740 			/* print link status if flag set */
2741 			if (print_flag == 1) {
2742 				if (link.link_status)
2743 					printf(
2744 					"Port%d Link Up. speed %u Mbps- %s\n",
2745 					portid, link.link_speed,
2746 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
2747 					("full-duplex") : ("half-duplex\n"));
2748 				else
2749 					printf("Port %d Link Down\n", portid);
2750 				continue;
2751 			}
2752 			/* clear all_ports_up flag if any link down */
2753 			if (link.link_status == ETH_LINK_DOWN) {
2754 				all_ports_up = 0;
2755 				break;
2756 			}
2757 		}
2758 		/* after finally printing all link status, get out */
2759 		if (print_flag == 1)
2760 			break;
2761 
2762 		if (all_ports_up == 0) {
2763 			fflush(stdout);
2764 			rte_delay_ms(CHECK_INTERVAL);
2765 		}
2766 
2767 		/* set the print_flag if all ports up or timeout */
2768 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
2769 			print_flag = 1;
2770 		}
2771 
2772 		if (lsc_interrupt)
2773 			break;
2774 	}
2775 }
2776 
2777 /*
2778  * This callback is for remove a port for a device. It has limitation because
2779  * it is not for multiple port removal for a device.
2780  * TODO: the device detach invoke will plan to be removed from user side to
2781  * eal. And convert all PMDs to free port resources on ether device closing.
2782  */
2783 static void
2784 rmv_port_callback(void *arg)
2785 {
2786 	int need_to_start = 0;
2787 	int org_no_link_check = no_link_check;
2788 	portid_t port_id = (intptr_t)arg;
2789 
2790 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
2791 
2792 	if (!test_done && port_is_forwarding(port_id)) {
2793 		need_to_start = 1;
2794 		stop_packet_forwarding();
2795 	}
2796 	no_link_check = 1;
2797 	stop_port(port_id);
2798 	no_link_check = org_no_link_check;
2799 	close_port(port_id);
2800 	detach_port_device(port_id);
2801 	if (need_to_start)
2802 		start_packet_forwarding(0);
2803 }
2804 
2805 /* This function is used by the interrupt thread */
2806 static int
2807 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
2808 		  void *ret_param)
2809 {
2810 	RTE_SET_USED(param);
2811 	RTE_SET_USED(ret_param);
2812 
2813 	if (type >= RTE_ETH_EVENT_MAX) {
2814 		fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
2815 			port_id, __func__, type);
2816 		fflush(stderr);
2817 	} else if (event_print_mask & (UINT32_C(1) << type)) {
2818 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
2819 			eth_event_desc[type]);
2820 		fflush(stdout);
2821 	}
2822 
2823 	switch (type) {
2824 	case RTE_ETH_EVENT_NEW:
2825 		ports[port_id].need_setup = 1;
2826 		ports[port_id].port_status = RTE_PORT_HANDLING;
2827 		break;
2828 	case RTE_ETH_EVENT_INTR_RMV:
2829 		if (port_id_is_invalid(port_id, DISABLED_WARN))
2830 			break;
2831 		if (rte_eal_alarm_set(100000,
2832 				rmv_port_callback, (void *)(intptr_t)port_id))
2833 			fprintf(stderr, "Could not set up deferred device removal\n");
2834 		break;
2835 	default:
2836 		break;
2837 	}
2838 	return 0;
2839 }
2840 
2841 static int
2842 register_eth_event_callback(void)
2843 {
2844 	int ret;
2845 	enum rte_eth_event_type event;
2846 
2847 	for (event = RTE_ETH_EVENT_UNKNOWN;
2848 			event < RTE_ETH_EVENT_MAX; event++) {
2849 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
2850 				event,
2851 				eth_event_callback,
2852 				NULL);
2853 		if (ret != 0) {
2854 			TESTPMD_LOG(ERR, "Failed to register callback for "
2855 					"%s event\n", eth_event_desc[event]);
2856 			return -1;
2857 		}
2858 	}
2859 
2860 	return 0;
2861 }
2862 
2863 /* This function is used by the interrupt thread */
2864 static void
2865 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
2866 			     __rte_unused void *arg)
2867 {
2868 	uint16_t port_id;
2869 	int ret;
2870 
2871 	if (type >= RTE_DEV_EVENT_MAX) {
2872 		fprintf(stderr, "%s called upon invalid event %d\n",
2873 			__func__, type);
2874 		fflush(stderr);
2875 	}
2876 
2877 	switch (type) {
2878 	case RTE_DEV_EVENT_REMOVE:
2879 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
2880 			device_name);
2881 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
2882 		if (ret) {
2883 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
2884 				device_name);
2885 			return;
2886 		}
2887 		/*
2888 		 * Because the user's callback is invoked in eal interrupt
2889 		 * callback, the interrupt callback need to be finished before
2890 		 * it can be unregistered when detaching device. So finish
2891 		 * callback soon and use a deferred removal to detach device
2892 		 * is need. It is a workaround, once the device detaching be
2893 		 * moved into the eal in the future, the deferred removal could
2894 		 * be deleted.
2895 		 */
2896 		if (rte_eal_alarm_set(100000,
2897 				rmv_port_callback, (void *)(intptr_t)port_id))
2898 			RTE_LOG(ERR, EAL,
2899 				"Could not set up deferred device removal\n");
2900 		break;
2901 	case RTE_DEV_EVENT_ADD:
2902 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
2903 			device_name);
2904 		/* TODO: After finish kernel driver binding,
2905 		 * begin to attach port.
2906 		 */
2907 		break;
2908 	default:
2909 		break;
2910 	}
2911 }
2912 
2913 static int
2914 set_tx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
2915 {
2916 	uint16_t i;
2917 	int diag;
2918 	uint8_t mapping_found = 0;
2919 
2920 	for (i = 0; i < nb_tx_queue_stats_mappings; i++) {
2921 		if ((tx_queue_stats_mappings[i].port_id == port_id) &&
2922 				(tx_queue_stats_mappings[i].queue_id < nb_txq )) {
2923 			diag = rte_eth_dev_set_tx_queue_stats_mapping(port_id,
2924 					tx_queue_stats_mappings[i].queue_id,
2925 					tx_queue_stats_mappings[i].stats_counter_id);
2926 			if (diag != 0)
2927 				return diag;
2928 			mapping_found = 1;
2929 		}
2930 	}
2931 	if (mapping_found)
2932 		port->tx_queue_stats_mapping_enabled = 1;
2933 	return 0;
2934 }
2935 
2936 static int
2937 set_rx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port)
2938 {
2939 	uint16_t i;
2940 	int diag;
2941 	uint8_t mapping_found = 0;
2942 
2943 	for (i = 0; i < nb_rx_queue_stats_mappings; i++) {
2944 		if ((rx_queue_stats_mappings[i].port_id == port_id) &&
2945 				(rx_queue_stats_mappings[i].queue_id < nb_rxq )) {
2946 			diag = rte_eth_dev_set_rx_queue_stats_mapping(port_id,
2947 					rx_queue_stats_mappings[i].queue_id,
2948 					rx_queue_stats_mappings[i].stats_counter_id);
2949 			if (diag != 0)
2950 				return diag;
2951 			mapping_found = 1;
2952 		}
2953 	}
2954 	if (mapping_found)
2955 		port->rx_queue_stats_mapping_enabled = 1;
2956 	return 0;
2957 }
2958 
2959 static void
2960 map_port_queue_stats_mapping_registers(portid_t pi, struct rte_port *port)
2961 {
2962 	int diag = 0;
2963 
2964 	diag = set_tx_queue_stats_mapping_registers(pi, port);
2965 	if (diag != 0) {
2966 		if (diag == -ENOTSUP) {
2967 			port->tx_queue_stats_mapping_enabled = 0;
2968 			printf("TX queue stats mapping not supported port id=%d\n", pi);
2969 		}
2970 		else
2971 			rte_exit(EXIT_FAILURE,
2972 					"set_tx_queue_stats_mapping_registers "
2973 					"failed for port id=%d diag=%d\n",
2974 					pi, diag);
2975 	}
2976 
2977 	diag = set_rx_queue_stats_mapping_registers(pi, port);
2978 	if (diag != 0) {
2979 		if (diag == -ENOTSUP) {
2980 			port->rx_queue_stats_mapping_enabled = 0;
2981 			printf("RX queue stats mapping not supported port id=%d\n", pi);
2982 		}
2983 		else
2984 			rte_exit(EXIT_FAILURE,
2985 					"set_rx_queue_stats_mapping_registers "
2986 					"failed for port id=%d diag=%d\n",
2987 					pi, diag);
2988 	}
2989 }
2990 
2991 static void
2992 rxtx_port_config(struct rte_port *port)
2993 {
2994 	uint16_t qid;
2995 	uint64_t offloads;
2996 
2997 	for (qid = 0; qid < nb_rxq; qid++) {
2998 		offloads = port->rx_conf[qid].offloads;
2999 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3000 		if (offloads != 0)
3001 			port->rx_conf[qid].offloads = offloads;
3002 
3003 		/* Check if any Rx parameters have been passed */
3004 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3005 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3006 
3007 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3008 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3009 
3010 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3011 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3012 
3013 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3014 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3015 
3016 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3017 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3018 
3019 		port->nb_rx_desc[qid] = nb_rxd;
3020 	}
3021 
3022 	for (qid = 0; qid < nb_txq; qid++) {
3023 		offloads = port->tx_conf[qid].offloads;
3024 		port->tx_conf[qid] = port->dev_info.default_txconf;
3025 		if (offloads != 0)
3026 			port->tx_conf[qid].offloads = offloads;
3027 
3028 		/* Check if any Tx parameters have been passed */
3029 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3030 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3031 
3032 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3033 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3034 
3035 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3036 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3037 
3038 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3039 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3040 
3041 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3042 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3043 
3044 		port->nb_tx_desc[qid] = nb_txd;
3045 	}
3046 }
3047 
3048 void
3049 init_port_config(void)
3050 {
3051 	portid_t pid;
3052 	struct rte_port *port;
3053 	int ret;
3054 
3055 	RTE_ETH_FOREACH_DEV(pid) {
3056 		port = &ports[pid];
3057 		port->dev_conf.fdir_conf = fdir_conf;
3058 
3059 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3060 		if (ret != 0)
3061 			return;
3062 
3063 		if (nb_rxq > 1) {
3064 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3065 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3066 				rss_hf & port->dev_info.flow_type_rss_offloads;
3067 		} else {
3068 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3069 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3070 		}
3071 
3072 		if (port->dcb_flag == 0) {
3073 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3074 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
3075 			else
3076 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3077 		}
3078 
3079 		rxtx_port_config(port);
3080 
3081 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3082 		if (ret != 0)
3083 			return;
3084 
3085 		map_port_queue_stats_mapping_registers(pid, port);
3086 #if defined RTE_LIBRTE_IXGBE_PMD && defined RTE_LIBRTE_IXGBE_BYPASS
3087 		rte_pmd_ixgbe_bypass_init(pid);
3088 #endif
3089 
3090 		if (lsc_interrupt &&
3091 		    (rte_eth_devices[pid].data->dev_flags &
3092 		     RTE_ETH_DEV_INTR_LSC))
3093 			port->dev_conf.intr_conf.lsc = 1;
3094 		if (rmv_interrupt &&
3095 		    (rte_eth_devices[pid].data->dev_flags &
3096 		     RTE_ETH_DEV_INTR_RMV))
3097 			port->dev_conf.intr_conf.rmv = 1;
3098 	}
3099 }
3100 
3101 void set_port_slave_flag(portid_t slave_pid)
3102 {
3103 	struct rte_port *port;
3104 
3105 	port = &ports[slave_pid];
3106 	port->slave_flag = 1;
3107 }
3108 
3109 void clear_port_slave_flag(portid_t slave_pid)
3110 {
3111 	struct rte_port *port;
3112 
3113 	port = &ports[slave_pid];
3114 	port->slave_flag = 0;
3115 }
3116 
3117 uint8_t port_is_bonding_slave(portid_t slave_pid)
3118 {
3119 	struct rte_port *port;
3120 
3121 	port = &ports[slave_pid];
3122 	if ((rte_eth_devices[slave_pid].data->dev_flags &
3123 	    RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3124 		return 1;
3125 	return 0;
3126 }
3127 
3128 const uint16_t vlan_tags[] = {
3129 		0,  1,  2,  3,  4,  5,  6,  7,
3130 		8,  9, 10, 11,  12, 13, 14, 15,
3131 		16, 17, 18, 19, 20, 21, 22, 23,
3132 		24, 25, 26, 27, 28, 29, 30, 31
3133 };
3134 
3135 static  int
3136 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3137 		 enum dcb_mode_enable dcb_mode,
3138 		 enum rte_eth_nb_tcs num_tcs,
3139 		 uint8_t pfc_en)
3140 {
3141 	uint8_t i;
3142 	int32_t rc;
3143 	struct rte_eth_rss_conf rss_conf;
3144 
3145 	/*
3146 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3147 	 * given above, and the number of traffic classes available for use.
3148 	 */
3149 	if (dcb_mode == DCB_VT_ENABLED) {
3150 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3151 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3152 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3153 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3154 
3155 		/* VMDQ+DCB RX and TX configurations */
3156 		vmdq_rx_conf->enable_default_pool = 0;
3157 		vmdq_rx_conf->default_pool = 0;
3158 		vmdq_rx_conf->nb_queue_pools =
3159 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3160 		vmdq_tx_conf->nb_queue_pools =
3161 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3162 
3163 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3164 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3165 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3166 			vmdq_rx_conf->pool_map[i].pools =
3167 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3168 		}
3169 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3170 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3171 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3172 		}
3173 
3174 		/* set DCB mode of RX and TX of multiple queues */
3175 		eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB;
3176 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3177 	} else {
3178 		struct rte_eth_dcb_rx_conf *rx_conf =
3179 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3180 		struct rte_eth_dcb_tx_conf *tx_conf =
3181 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3182 
3183 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3184 		if (rc != 0)
3185 			return rc;
3186 
3187 		rx_conf->nb_tcs = num_tcs;
3188 		tx_conf->nb_tcs = num_tcs;
3189 
3190 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3191 			rx_conf->dcb_tc[i] = i % num_tcs;
3192 			tx_conf->dcb_tc[i] = i % num_tcs;
3193 		}
3194 
3195 		eth_conf->rxmode.mq_mode = ETH_MQ_RX_DCB_RSS;
3196 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3197 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3198 	}
3199 
3200 	if (pfc_en)
3201 		eth_conf->dcb_capability_en =
3202 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3203 	else
3204 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3205 
3206 	return 0;
3207 }
3208 
3209 int
3210 init_port_dcb_config(portid_t pid,
3211 		     enum dcb_mode_enable dcb_mode,
3212 		     enum rte_eth_nb_tcs num_tcs,
3213 		     uint8_t pfc_en)
3214 {
3215 	struct rte_eth_conf port_conf;
3216 	struct rte_port *rte_port;
3217 	int retval;
3218 	uint16_t i;
3219 
3220 	rte_port = &ports[pid];
3221 
3222 	memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3223 	/* Enter DCB configuration status */
3224 	dcb_config = 1;
3225 
3226 	port_conf.rxmode = rte_port->dev_conf.rxmode;
3227 	port_conf.txmode = rte_port->dev_conf.txmode;
3228 
3229 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3230 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3231 	if (retval < 0)
3232 		return retval;
3233 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3234 
3235 	/* re-configure the device . */
3236 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3237 	if (retval < 0)
3238 		return retval;
3239 
3240 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3241 	if (retval != 0)
3242 		return retval;
3243 
3244 	/* If dev_info.vmdq_pool_base is greater than 0,
3245 	 * the queue id of vmdq pools is started after pf queues.
3246 	 */
3247 	if (dcb_mode == DCB_VT_ENABLED &&
3248 	    rte_port->dev_info.vmdq_pool_base > 0) {
3249 		printf("VMDQ_DCB multi-queue mode is nonsensical"
3250 			" for port %d.", pid);
3251 		return -1;
3252 	}
3253 
3254 	/* Assume the ports in testpmd have the same dcb capability
3255 	 * and has the same number of rxq and txq in dcb mode
3256 	 */
3257 	if (dcb_mode == DCB_VT_ENABLED) {
3258 		if (rte_port->dev_info.max_vfs > 0) {
3259 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3260 			nb_txq = rte_port->dev_info.nb_tx_queues;
3261 		} else {
3262 			nb_rxq = rte_port->dev_info.max_rx_queues;
3263 			nb_txq = rte_port->dev_info.max_tx_queues;
3264 		}
3265 	} else {
3266 		/*if vt is disabled, use all pf queues */
3267 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3268 			nb_rxq = rte_port->dev_info.max_rx_queues;
3269 			nb_txq = rte_port->dev_info.max_tx_queues;
3270 		} else {
3271 			nb_rxq = (queueid_t)num_tcs;
3272 			nb_txq = (queueid_t)num_tcs;
3273 
3274 		}
3275 	}
3276 	rx_free_thresh = 64;
3277 
3278 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3279 
3280 	rxtx_port_config(rte_port);
3281 	/* VLAN filter */
3282 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3283 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
3284 		rx_vft_set(pid, vlan_tags[i], 1);
3285 
3286 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3287 	if (retval != 0)
3288 		return retval;
3289 
3290 	map_port_queue_stats_mapping_registers(pid, rte_port);
3291 
3292 	rte_port->dcb_flag = 1;
3293 
3294 	return 0;
3295 }
3296 
3297 static void
3298 init_port(void)
3299 {
3300 	/* Configuration of Ethernet ports. */
3301 	ports = rte_zmalloc("testpmd: ports",
3302 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3303 			    RTE_CACHE_LINE_SIZE);
3304 	if (ports == NULL) {
3305 		rte_exit(EXIT_FAILURE,
3306 				"rte_zmalloc(%d struct rte_port) failed\n",
3307 				RTE_MAX_ETHPORTS);
3308 	}
3309 
3310 	/* Initialize ports NUMA structures */
3311 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3312 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3313 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3314 }
3315 
3316 static void
3317 force_quit(void)
3318 {
3319 	pmd_test_exit();
3320 	prompt_exit();
3321 }
3322 
3323 static void
3324 print_stats(void)
3325 {
3326 	uint8_t i;
3327 	const char clr[] = { 27, '[', '2', 'J', '\0' };
3328 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3329 
3330 	/* Clear screen and move to top left */
3331 	printf("%s%s", clr, top_left);
3332 
3333 	printf("\nPort statistics ====================================");
3334 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3335 		nic_stats_display(fwd_ports_ids[i]);
3336 
3337 	fflush(stdout);
3338 }
3339 
3340 static void
3341 signal_handler(int signum)
3342 {
3343 	if (signum == SIGINT || signum == SIGTERM) {
3344 		printf("\nSignal %d received, preparing to exit...\n",
3345 				signum);
3346 #ifdef RTE_LIBRTE_PDUMP
3347 		/* uninitialize packet capture framework */
3348 		rte_pdump_uninit();
3349 #endif
3350 #ifdef RTE_LIBRTE_LATENCY_STATS
3351 		if (latencystats_enabled != 0)
3352 			rte_latencystats_uninit();
3353 #endif
3354 		force_quit();
3355 		/* Set flag to indicate the force termination. */
3356 		f_quit = 1;
3357 		/* exit with the expected status */
3358 		signal(signum, SIG_DFL);
3359 		kill(getpid(), signum);
3360 	}
3361 }
3362 
3363 int
3364 main(int argc, char** argv)
3365 {
3366 	int diag;
3367 	portid_t port_id;
3368 	uint16_t count;
3369 	int ret;
3370 
3371 	signal(SIGINT, signal_handler);
3372 	signal(SIGTERM, signal_handler);
3373 
3374 	testpmd_logtype = rte_log_register("testpmd");
3375 	if (testpmd_logtype < 0)
3376 		rte_exit(EXIT_FAILURE, "Cannot register log type");
3377 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3378 
3379 	diag = rte_eal_init(argc, argv);
3380 	if (diag < 0)
3381 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3382 			 rte_strerror(rte_errno));
3383 
3384 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3385 		rte_exit(EXIT_FAILURE,
3386 			 "Secondary process type not supported.\n");
3387 
3388 	ret = register_eth_event_callback();
3389 	if (ret != 0)
3390 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3391 
3392 #ifdef RTE_LIBRTE_PDUMP
3393 	/* initialize packet capture framework */
3394 	rte_pdump_init();
3395 #endif
3396 
3397 	count = 0;
3398 	RTE_ETH_FOREACH_DEV(port_id) {
3399 		ports_ids[count] = port_id;
3400 		count++;
3401 	}
3402 	nb_ports = (portid_t) count;
3403 	if (nb_ports == 0)
3404 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3405 
3406 	/* allocate port structures, and init them */
3407 	init_port();
3408 
3409 	set_def_fwd_config();
3410 	if (nb_lcores == 0)
3411 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3412 			 "Check the core mask argument\n");
3413 
3414 	/* Bitrate/latency stats disabled by default */
3415 #ifdef RTE_LIBRTE_BITRATE
3416 	bitrate_enabled = 0;
3417 #endif
3418 #ifdef RTE_LIBRTE_LATENCY_STATS
3419 	latencystats_enabled = 0;
3420 #endif
3421 
3422 	/* on FreeBSD, mlockall() is disabled by default */
3423 #ifdef RTE_EXEC_ENV_FREEBSD
3424 	do_mlockall = 0;
3425 #else
3426 	do_mlockall = 1;
3427 #endif
3428 
3429 	argc -= diag;
3430 	argv += diag;
3431 	if (argc > 1)
3432 		launch_args_parse(argc, argv);
3433 
3434 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3435 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3436 			strerror(errno));
3437 	}
3438 
3439 	if (tx_first && interactive)
3440 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3441 				"interactive mode.\n");
3442 
3443 	if (tx_first && lsc_interrupt) {
3444 		printf("Warning: lsc_interrupt needs to be off when "
3445 				" using tx_first. Disabling.\n");
3446 		lsc_interrupt = 0;
3447 	}
3448 
3449 	if (!nb_rxq && !nb_txq)
3450 		printf("Warning: Either rx or tx queues should be non-zero\n");
3451 
3452 	if (nb_rxq > 1 && nb_rxq > nb_txq)
3453 		printf("Warning: nb_rxq=%d enables RSS configuration, "
3454 		       "but nb_txq=%d will prevent to fully test it.\n",
3455 		       nb_rxq, nb_txq);
3456 
3457 	init_config();
3458 
3459 	if (hot_plug) {
3460 		ret = rte_dev_hotplug_handle_enable();
3461 		if (ret) {
3462 			RTE_LOG(ERR, EAL,
3463 				"fail to enable hotplug handling.");
3464 			return -1;
3465 		}
3466 
3467 		ret = rte_dev_event_monitor_start();
3468 		if (ret) {
3469 			RTE_LOG(ERR, EAL,
3470 				"fail to start device event monitoring.");
3471 			return -1;
3472 		}
3473 
3474 		ret = rte_dev_event_callback_register(NULL,
3475 			dev_event_callback, NULL);
3476 		if (ret) {
3477 			RTE_LOG(ERR, EAL,
3478 				"fail  to register device event callback\n");
3479 			return -1;
3480 		}
3481 	}
3482 
3483 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3484 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
3485 
3486 	/* set all ports to promiscuous mode by default */
3487 	RTE_ETH_FOREACH_DEV(port_id) {
3488 		ret = rte_eth_promiscuous_enable(port_id);
3489 		if (ret != 0)
3490 			printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
3491 				port_id, rte_strerror(-ret));
3492 	}
3493 
3494 	/* Init metrics library */
3495 	rte_metrics_init(rte_socket_id());
3496 
3497 #ifdef RTE_LIBRTE_LATENCY_STATS
3498 	if (latencystats_enabled != 0) {
3499 		int ret = rte_latencystats_init(1, NULL);
3500 		if (ret)
3501 			printf("Warning: latencystats init()"
3502 				" returned error %d\n",	ret);
3503 		printf("Latencystats running on lcore %d\n",
3504 			latencystats_lcore_id);
3505 	}
3506 #endif
3507 
3508 	/* Setup bitrate stats */
3509 #ifdef RTE_LIBRTE_BITRATE
3510 	if (bitrate_enabled != 0) {
3511 		bitrate_data = rte_stats_bitrate_create();
3512 		if (bitrate_data == NULL)
3513 			rte_exit(EXIT_FAILURE,
3514 				"Could not allocate bitrate data.\n");
3515 		rte_stats_bitrate_reg(bitrate_data);
3516 	}
3517 #endif
3518 
3519 #ifdef RTE_LIBRTE_CMDLINE
3520 	if (strlen(cmdline_filename) != 0)
3521 		cmdline_read_from_file(cmdline_filename);
3522 
3523 	if (interactive == 1) {
3524 		if (auto_start) {
3525 			printf("Start automatic packet forwarding\n");
3526 			start_packet_forwarding(0);
3527 		}
3528 		prompt();
3529 		pmd_test_exit();
3530 	} else
3531 #endif
3532 	{
3533 		char c;
3534 		int rc;
3535 
3536 		f_quit = 0;
3537 
3538 		printf("No commandline core given, start packet forwarding\n");
3539 		start_packet_forwarding(tx_first);
3540 		if (stats_period != 0) {
3541 			uint64_t prev_time = 0, cur_time, diff_time = 0;
3542 			uint64_t timer_period;
3543 
3544 			/* Convert to number of cycles */
3545 			timer_period = stats_period * rte_get_timer_hz();
3546 
3547 			while (f_quit == 0) {
3548 				cur_time = rte_get_timer_cycles();
3549 				diff_time += cur_time - prev_time;
3550 
3551 				if (diff_time >= timer_period) {
3552 					print_stats();
3553 					/* Reset the timer */
3554 					diff_time = 0;
3555 				}
3556 				/* Sleep to avoid unnecessary checks */
3557 				prev_time = cur_time;
3558 				sleep(1);
3559 			}
3560 		}
3561 
3562 		printf("Press enter to exit\n");
3563 		rc = read(0, &c, 1);
3564 		pmd_test_exit();
3565 		if (rc < 0)
3566 			return 1;
3567 	}
3568 
3569 	return 0;
3570 }
3571