xref: /dpdk/app/test-pmd/testpmd.c (revision e1d38504e16cdc7fbb977d9fc750a93cc784d37e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #ifndef RTE_EXEC_ENV_WINDOWS
13 #include <sys/mman.h>
14 #endif
15 #include <sys/types.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 
19 #include <sys/queue.h>
20 #include <sys/stat.h>
21 
22 #include <stdint.h>
23 #include <unistd.h>
24 #include <inttypes.h>
25 
26 #include <rte_common.h>
27 #include <rte_errno.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_debug.h>
31 #include <rte_cycles.h>
32 #include <rte_memory.h>
33 #include <rte_memcpy.h>
34 #include <rte_launch.h>
35 #include <rte_eal.h>
36 #include <rte_alarm.h>
37 #include <rte_per_lcore.h>
38 #include <rte_lcore.h>
39 #include <rte_atomic.h>
40 #include <rte_branch_prediction.h>
41 #include <rte_mempool.h>
42 #include <rte_malloc.h>
43 #include <rte_mbuf.h>
44 #include <rte_mbuf_pool_ops.h>
45 #include <rte_interrupts.h>
46 #include <rte_pci.h>
47 #include <rte_ether.h>
48 #include <rte_ethdev.h>
49 #include <rte_dev.h>
50 #include <rte_string_fns.h>
51 #ifdef RTE_NET_IXGBE
52 #include <rte_pmd_ixgbe.h>
53 #endif
54 #ifdef RTE_LIB_PDUMP
55 #include <rte_pdump.h>
56 #endif
57 #include <rte_flow.h>
58 #include <rte_metrics.h>
59 #ifdef RTE_LIB_BITRATESTATS
60 #include <rte_bitrate.h>
61 #endif
62 #ifdef RTE_LIB_LATENCYSTATS
63 #include <rte_latencystats.h>
64 #endif
65 #ifdef RTE_EXEC_ENV_WINDOWS
66 #include <process.h>
67 #endif
68 
69 #include "testpmd.h"
70 
71 #ifndef MAP_HUGETLB
72 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
73 #define HUGE_FLAG (0x40000)
74 #else
75 #define HUGE_FLAG MAP_HUGETLB
76 #endif
77 
78 #ifndef MAP_HUGE_SHIFT
79 /* older kernels (or FreeBSD) will not have this define */
80 #define HUGE_SHIFT (26)
81 #else
82 #define HUGE_SHIFT MAP_HUGE_SHIFT
83 #endif
84 
85 #define EXTMEM_HEAP_NAME "extmem"
86 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
87 
88 uint16_t verbose_level = 0; /**< Silent by default. */
89 int testpmd_logtype; /**< Log type for testpmd logs */
90 
91 /* use main core for command line ? */
92 uint8_t interactive = 0;
93 uint8_t auto_start = 0;
94 uint8_t tx_first;
95 char cmdline_filename[PATH_MAX] = {0};
96 
97 /*
98  * NUMA support configuration.
99  * When set, the NUMA support attempts to dispatch the allocation of the
100  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
101  * probed ports among the CPU sockets 0 and 1.
102  * Otherwise, all memory is allocated from CPU socket 0.
103  */
104 uint8_t numa_support = 1; /**< numa enabled by default */
105 
106 /*
107  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
108  * not configured.
109  */
110 uint8_t socket_num = UMA_NO_CONFIG;
111 
112 /*
113  * Select mempool allocation type:
114  * - native: use regular DPDK memory
115  * - anon: use regular DPDK memory to create mempool, but populate using
116  *         anonymous memory (may not be IOVA-contiguous)
117  * - xmem: use externally allocated hugepage memory
118  */
119 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
120 
121 /*
122  * Store specified sockets on which memory pool to be used by ports
123  * is allocated.
124  */
125 uint8_t port_numa[RTE_MAX_ETHPORTS];
126 
127 /*
128  * Store specified sockets on which RX ring to be used by ports
129  * is allocated.
130  */
131 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
132 
133 /*
134  * Store specified sockets on which TX ring to be used by ports
135  * is allocated.
136  */
137 uint8_t txring_numa[RTE_MAX_ETHPORTS];
138 
139 /*
140  * Record the Ethernet address of peer target ports to which packets are
141  * forwarded.
142  * Must be instantiated with the ethernet addresses of peer traffic generator
143  * ports.
144  */
145 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
146 portid_t nb_peer_eth_addrs = 0;
147 
148 /*
149  * Probed Target Environment.
150  */
151 struct rte_port *ports;	       /**< For all probed ethernet ports. */
152 portid_t nb_ports;             /**< Number of probed ethernet ports. */
153 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
154 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
155 
156 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
157 
158 /*
159  * Test Forwarding Configuration.
160  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
161  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
162  */
163 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
164 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
165 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
166 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
167 
168 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
169 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
170 
171 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
172 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
173 
174 /*
175  * Forwarding engines.
176  */
177 struct fwd_engine * fwd_engines[] = {
178 	&io_fwd_engine,
179 	&mac_fwd_engine,
180 	&mac_swap_engine,
181 	&flow_gen_engine,
182 	&rx_only_engine,
183 	&tx_only_engine,
184 	&csum_fwd_engine,
185 	&icmp_echo_engine,
186 	&noisy_vnf_engine,
187 	&five_tuple_swap_fwd_engine,
188 #ifdef RTE_LIBRTE_IEEE1588
189 	&ieee1588_fwd_engine,
190 #endif
191 	NULL,
192 };
193 
194 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
195 uint16_t mempool_flags;
196 
197 struct fwd_config cur_fwd_config;
198 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
199 uint32_t retry_enabled;
200 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
201 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
202 
203 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
204 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
205 	DEFAULT_MBUF_DATA_SIZE
206 }; /**< Mbuf data space size. */
207 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
208                                       * specified on command-line. */
209 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
210 
211 /*
212  * In container, it cannot terminate the process which running with 'stats-period'
213  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
214  */
215 uint8_t f_quit;
216 
217 /*
218  * Configuration of packet segments used to scatter received packets
219  * if some of split features is configured.
220  */
221 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
222 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
223 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
224 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
225 
226 /*
227  * Configuration of packet segments used by the "txonly" processing engine.
228  */
229 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
230 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
231 	TXONLY_DEF_PACKET_LEN,
232 };
233 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
234 
235 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
236 /**< Split policy for packets to TX. */
237 
238 uint8_t txonly_multi_flow;
239 /**< Whether multiple flows are generated in TXONLY mode. */
240 
241 uint32_t tx_pkt_times_inter;
242 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
243 
244 uint32_t tx_pkt_times_intra;
245 /**< Timings for send scheduling in TXONLY mode, time between packets. */
246 
247 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
248 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
249 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
250 
251 /* current configuration is in DCB or not,0 means it is not in DCB mode */
252 uint8_t dcb_config = 0;
253 
254 /*
255  * Configurable number of RX/TX queues.
256  */
257 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
258 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
259 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
260 
261 /*
262  * Configurable number of RX/TX ring descriptors.
263  * Defaults are supplied by drivers via ethdev.
264  */
265 #define RTE_TEST_RX_DESC_DEFAULT 0
266 #define RTE_TEST_TX_DESC_DEFAULT 0
267 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
268 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
269 
270 #define RTE_PMD_PARAM_UNSET -1
271 /*
272  * Configurable values of RX and TX ring threshold registers.
273  */
274 
275 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
276 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
277 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
278 
279 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
280 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
281 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
282 
283 /*
284  * Configurable value of RX free threshold.
285  */
286 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
287 
288 /*
289  * Configurable value of RX drop enable.
290  */
291 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
292 
293 /*
294  * Configurable value of TX free threshold.
295  */
296 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
297 
298 /*
299  * Configurable value of TX RS bit threshold.
300  */
301 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
302 
303 /*
304  * Configurable value of buffered packets before sending.
305  */
306 uint16_t noisy_tx_sw_bufsz;
307 
308 /*
309  * Configurable value of packet buffer timeout.
310  */
311 uint16_t noisy_tx_sw_buf_flush_time;
312 
313 /*
314  * Configurable value for size of VNF internal memory area
315  * used for simulating noisy neighbour behaviour
316  */
317 uint64_t noisy_lkup_mem_sz;
318 
319 /*
320  * Configurable value of number of random writes done in
321  * VNF simulation memory area.
322  */
323 uint64_t noisy_lkup_num_writes;
324 
325 /*
326  * Configurable value of number of random reads done in
327  * VNF simulation memory area.
328  */
329 uint64_t noisy_lkup_num_reads;
330 
331 /*
332  * Configurable value of number of random reads/writes done in
333  * VNF simulation memory area.
334  */
335 uint64_t noisy_lkup_num_reads_writes;
336 
337 /*
338  * Receive Side Scaling (RSS) configuration.
339  */
340 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
341 
342 /*
343  * Port topology configuration
344  */
345 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
346 
347 /*
348  * Avoids to flush all the RX streams before starts forwarding.
349  */
350 uint8_t no_flush_rx = 0; /* flush by default */
351 
352 /*
353  * Flow API isolated mode.
354  */
355 uint8_t flow_isolate_all;
356 
357 /*
358  * Avoids to check link status when starting/stopping a port.
359  */
360 uint8_t no_link_check = 0; /* check by default */
361 
362 /*
363  * Don't automatically start all ports in interactive mode.
364  */
365 uint8_t no_device_start = 0;
366 
367 /*
368  * Enable link status change notification
369  */
370 uint8_t lsc_interrupt = 1; /* enabled by default */
371 
372 /*
373  * Enable device removal notification.
374  */
375 uint8_t rmv_interrupt = 1; /* enabled by default */
376 
377 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
378 
379 /* After attach, port setup is called on event or by iterator */
380 bool setup_on_probe_event = true;
381 
382 /* Clear ptypes on port initialization. */
383 uint8_t clear_ptypes = true;
384 
385 /* Hairpin ports configuration mode. */
386 uint16_t hairpin_mode;
387 
388 /* Pretty printing of ethdev events */
389 static const char * const eth_event_desc[] = {
390 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
391 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
392 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
393 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
394 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
395 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
396 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
397 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
398 	[RTE_ETH_EVENT_NEW] = "device probed",
399 	[RTE_ETH_EVENT_DESTROY] = "device released",
400 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
401 	[RTE_ETH_EVENT_MAX] = NULL,
402 };
403 
404 /*
405  * Display or mask ether events
406  * Default to all events except VF_MBOX
407  */
408 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
409 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
410 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
411 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
412 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
413 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
414 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
415 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
416 /*
417  * Decide if all memory are locked for performance.
418  */
419 int do_mlockall = 0;
420 
421 /*
422  * NIC bypass mode configuration options.
423  */
424 
425 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
426 /* The NIC bypass watchdog timeout. */
427 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
428 #endif
429 
430 
431 #ifdef RTE_LIB_LATENCYSTATS
432 
433 /*
434  * Set when latency stats is enabled in the commandline
435  */
436 uint8_t latencystats_enabled;
437 
438 /*
439  * Lcore ID to serive latency statistics.
440  */
441 lcoreid_t latencystats_lcore_id = -1;
442 
443 #endif
444 
445 /*
446  * Ethernet device configuration.
447  */
448 struct rte_eth_rxmode rx_mode = {
449 	/* Default maximum frame length.
450 	 * Zero is converted to "RTE_ETHER_MTU + PMD Ethernet overhead"
451 	 * in init_config().
452 	 */
453 	.max_rx_pkt_len = 0,
454 };
455 
456 struct rte_eth_txmode tx_mode = {
457 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
458 };
459 
460 struct rte_fdir_conf fdir_conf = {
461 	.mode = RTE_FDIR_MODE_NONE,
462 	.pballoc = RTE_FDIR_PBALLOC_64K,
463 	.status = RTE_FDIR_REPORT_STATUS,
464 	.mask = {
465 		.vlan_tci_mask = 0xFFEF,
466 		.ipv4_mask     = {
467 			.src_ip = 0xFFFFFFFF,
468 			.dst_ip = 0xFFFFFFFF,
469 		},
470 		.ipv6_mask     = {
471 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
472 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
473 		},
474 		.src_port_mask = 0xFFFF,
475 		.dst_port_mask = 0xFFFF,
476 		.mac_addr_byte_mask = 0xFF,
477 		.tunnel_type_mask = 1,
478 		.tunnel_id_mask = 0xFFFFFFFF,
479 	},
480 	.drop_queue = 127,
481 };
482 
483 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
484 
485 /*
486  * Display zero values by default for xstats
487  */
488 uint8_t xstats_hide_zero;
489 
490 /*
491  * Measure of CPU cycles disabled by default
492  */
493 uint8_t record_core_cycles;
494 
495 /*
496  * Display of RX and TX bursts disabled by default
497  */
498 uint8_t record_burst_stats;
499 
500 unsigned int num_sockets = 0;
501 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
502 
503 #ifdef RTE_LIB_BITRATESTATS
504 /* Bitrate statistics */
505 struct rte_stats_bitrates *bitrate_data;
506 lcoreid_t bitrate_lcore_id;
507 uint8_t bitrate_enabled;
508 #endif
509 
510 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
511 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
512 
513 /*
514  * hexadecimal bitmask of RX mq mode can be enabled.
515  */
516 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
517 
518 /*
519  * Used to set forced link speed
520  */
521 uint32_t eth_link_speed;
522 
523 /* Forward function declarations */
524 static void setup_attached_port(portid_t pi);
525 static void check_all_ports_link_status(uint32_t port_mask);
526 static int eth_event_callback(portid_t port_id,
527 			      enum rte_eth_event_type type,
528 			      void *param, void *ret_param);
529 static void dev_event_callback(const char *device_name,
530 				enum rte_dev_event_type type,
531 				void *param);
532 
533 /*
534  * Check if all the ports are started.
535  * If yes, return positive value. If not, return zero.
536  */
537 static int all_ports_started(void);
538 
539 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
540 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
541 
542 /* Holds the registered mbuf dynamic flags names. */
543 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
544 
545 /*
546  * Helper function to check if socket is already discovered.
547  * If yes, return positive value. If not, return zero.
548  */
549 int
550 new_socket_id(unsigned int socket_id)
551 {
552 	unsigned int i;
553 
554 	for (i = 0; i < num_sockets; i++) {
555 		if (socket_ids[i] == socket_id)
556 			return 0;
557 	}
558 	return 1;
559 }
560 
561 /*
562  * Setup default configuration.
563  */
564 static void
565 set_default_fwd_lcores_config(void)
566 {
567 	unsigned int i;
568 	unsigned int nb_lc;
569 	unsigned int sock_num;
570 
571 	nb_lc = 0;
572 	for (i = 0; i < RTE_MAX_LCORE; i++) {
573 		if (!rte_lcore_is_enabled(i))
574 			continue;
575 		sock_num = rte_lcore_to_socket_id(i);
576 		if (new_socket_id(sock_num)) {
577 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
578 				rte_exit(EXIT_FAILURE,
579 					 "Total sockets greater than %u\n",
580 					 RTE_MAX_NUMA_NODES);
581 			}
582 			socket_ids[num_sockets++] = sock_num;
583 		}
584 		if (i == rte_get_main_lcore())
585 			continue;
586 		fwd_lcores_cpuids[nb_lc++] = i;
587 	}
588 	nb_lcores = (lcoreid_t) nb_lc;
589 	nb_cfg_lcores = nb_lcores;
590 	nb_fwd_lcores = 1;
591 }
592 
593 static void
594 set_def_peer_eth_addrs(void)
595 {
596 	portid_t i;
597 
598 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
599 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
600 		peer_eth_addrs[i].addr_bytes[5] = i;
601 	}
602 }
603 
604 static void
605 set_default_fwd_ports_config(void)
606 {
607 	portid_t pt_id;
608 	int i = 0;
609 
610 	RTE_ETH_FOREACH_DEV(pt_id) {
611 		fwd_ports_ids[i++] = pt_id;
612 
613 		/* Update sockets info according to the attached device */
614 		int socket_id = rte_eth_dev_socket_id(pt_id);
615 		if (socket_id >= 0 && new_socket_id(socket_id)) {
616 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
617 				rte_exit(EXIT_FAILURE,
618 					 "Total sockets greater than %u\n",
619 					 RTE_MAX_NUMA_NODES);
620 			}
621 			socket_ids[num_sockets++] = socket_id;
622 		}
623 	}
624 
625 	nb_cfg_ports = nb_ports;
626 	nb_fwd_ports = nb_ports;
627 }
628 
629 void
630 set_def_fwd_config(void)
631 {
632 	set_default_fwd_lcores_config();
633 	set_def_peer_eth_addrs();
634 	set_default_fwd_ports_config();
635 }
636 
637 #ifndef RTE_EXEC_ENV_WINDOWS
638 /* extremely pessimistic estimation of memory required to create a mempool */
639 static int
640 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
641 {
642 	unsigned int n_pages, mbuf_per_pg, leftover;
643 	uint64_t total_mem, mbuf_mem, obj_sz;
644 
645 	/* there is no good way to predict how much space the mempool will
646 	 * occupy because it will allocate chunks on the fly, and some of those
647 	 * will come from default DPDK memory while some will come from our
648 	 * external memory, so just assume 128MB will be enough for everyone.
649 	 */
650 	uint64_t hdr_mem = 128 << 20;
651 
652 	/* account for possible non-contiguousness */
653 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
654 	if (obj_sz > pgsz) {
655 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
656 		return -1;
657 	}
658 
659 	mbuf_per_pg = pgsz / obj_sz;
660 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
661 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
662 
663 	mbuf_mem = n_pages * pgsz;
664 
665 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
666 
667 	if (total_mem > SIZE_MAX) {
668 		TESTPMD_LOG(ERR, "Memory size too big\n");
669 		return -1;
670 	}
671 	*out = (size_t)total_mem;
672 
673 	return 0;
674 }
675 
676 static int
677 pagesz_flags(uint64_t page_sz)
678 {
679 	/* as per mmap() manpage, all page sizes are log2 of page size
680 	 * shifted by MAP_HUGE_SHIFT
681 	 */
682 	int log2 = rte_log2_u64(page_sz);
683 
684 	return (log2 << HUGE_SHIFT);
685 }
686 
687 static void *
688 alloc_mem(size_t memsz, size_t pgsz, bool huge)
689 {
690 	void *addr;
691 	int flags;
692 
693 	/* allocate anonymous hugepages */
694 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
695 	if (huge)
696 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
697 
698 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
699 	if (addr == MAP_FAILED)
700 		return NULL;
701 
702 	return addr;
703 }
704 
705 struct extmem_param {
706 	void *addr;
707 	size_t len;
708 	size_t pgsz;
709 	rte_iova_t *iova_table;
710 	unsigned int iova_table_len;
711 };
712 
713 static int
714 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
715 		bool huge)
716 {
717 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
718 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
719 	unsigned int cur_page, n_pages, pgsz_idx;
720 	size_t mem_sz, cur_pgsz;
721 	rte_iova_t *iovas = NULL;
722 	void *addr;
723 	int ret;
724 
725 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
726 		/* skip anything that is too big */
727 		if (pgsizes[pgsz_idx] > SIZE_MAX)
728 			continue;
729 
730 		cur_pgsz = pgsizes[pgsz_idx];
731 
732 		/* if we were told not to allocate hugepages, override */
733 		if (!huge)
734 			cur_pgsz = sysconf(_SC_PAGESIZE);
735 
736 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
737 		if (ret < 0) {
738 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
739 			return -1;
740 		}
741 
742 		/* allocate our memory */
743 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
744 
745 		/* if we couldn't allocate memory with a specified page size,
746 		 * that doesn't mean we can't do it with other page sizes, so
747 		 * try another one.
748 		 */
749 		if (addr == NULL)
750 			continue;
751 
752 		/* store IOVA addresses for every page in this memory area */
753 		n_pages = mem_sz / cur_pgsz;
754 
755 		iovas = malloc(sizeof(*iovas) * n_pages);
756 
757 		if (iovas == NULL) {
758 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
759 			goto fail;
760 		}
761 		/* lock memory if it's not huge pages */
762 		if (!huge)
763 			mlock(addr, mem_sz);
764 
765 		/* populate IOVA addresses */
766 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
767 			rte_iova_t iova;
768 			size_t offset;
769 			void *cur;
770 
771 			offset = cur_pgsz * cur_page;
772 			cur = RTE_PTR_ADD(addr, offset);
773 
774 			/* touch the page before getting its IOVA */
775 			*(volatile char *)cur = 0;
776 
777 			iova = rte_mem_virt2iova(cur);
778 
779 			iovas[cur_page] = iova;
780 		}
781 
782 		break;
783 	}
784 	/* if we couldn't allocate anything */
785 	if (iovas == NULL)
786 		return -1;
787 
788 	param->addr = addr;
789 	param->len = mem_sz;
790 	param->pgsz = cur_pgsz;
791 	param->iova_table = iovas;
792 	param->iova_table_len = n_pages;
793 
794 	return 0;
795 fail:
796 	if (iovas)
797 		free(iovas);
798 	if (addr)
799 		munmap(addr, mem_sz);
800 
801 	return -1;
802 }
803 
804 static int
805 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
806 {
807 	struct extmem_param param;
808 	int socket_id, ret;
809 
810 	memset(&param, 0, sizeof(param));
811 
812 	/* check if our heap exists */
813 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
814 	if (socket_id < 0) {
815 		/* create our heap */
816 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
817 		if (ret < 0) {
818 			TESTPMD_LOG(ERR, "Cannot create heap\n");
819 			return -1;
820 		}
821 	}
822 
823 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
824 	if (ret < 0) {
825 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
826 		return -1;
827 	}
828 
829 	/* we now have a valid memory area, so add it to heap */
830 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
831 			param.addr, param.len, param.iova_table,
832 			param.iova_table_len, param.pgsz);
833 
834 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
835 
836 	/* not needed any more */
837 	free(param.iova_table);
838 
839 	if (ret < 0) {
840 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
841 		munmap(param.addr, param.len);
842 		return -1;
843 	}
844 
845 	/* success */
846 
847 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
848 			param.len >> 20);
849 
850 	return 0;
851 }
852 static void
853 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
854 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
855 {
856 	uint16_t pid = 0;
857 	int ret;
858 
859 	RTE_ETH_FOREACH_DEV(pid) {
860 		struct rte_eth_dev_info dev_info;
861 
862 		ret = eth_dev_info_get_print_err(pid, &dev_info);
863 		if (ret != 0) {
864 			TESTPMD_LOG(DEBUG,
865 				    "unable to get device info for port %d on addr 0x%p,"
866 				    "mempool unmapping will not be performed\n",
867 				    pid, memhdr->addr);
868 			continue;
869 		}
870 
871 		ret = rte_dev_dma_unmap(dev_info.device, memhdr->addr, 0, memhdr->len);
872 		if (ret) {
873 			TESTPMD_LOG(DEBUG,
874 				    "unable to DMA unmap addr 0x%p "
875 				    "for device %s\n",
876 				    memhdr->addr, dev_info.device->name);
877 		}
878 	}
879 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
880 	if (ret) {
881 		TESTPMD_LOG(DEBUG,
882 			    "unable to un-register addr 0x%p\n", memhdr->addr);
883 	}
884 }
885 
886 static void
887 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
888 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
889 {
890 	uint16_t pid = 0;
891 	size_t page_size = sysconf(_SC_PAGESIZE);
892 	int ret;
893 
894 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
895 				  page_size);
896 	if (ret) {
897 		TESTPMD_LOG(DEBUG,
898 			    "unable to register addr 0x%p\n", memhdr->addr);
899 		return;
900 	}
901 	RTE_ETH_FOREACH_DEV(pid) {
902 		struct rte_eth_dev_info dev_info;
903 
904 		ret = eth_dev_info_get_print_err(pid, &dev_info);
905 		if (ret != 0) {
906 			TESTPMD_LOG(DEBUG,
907 				    "unable to get device info for port %d on addr 0x%p,"
908 				    "mempool mapping will not be performed\n",
909 				    pid, memhdr->addr);
910 			continue;
911 		}
912 		ret = rte_dev_dma_map(dev_info.device, memhdr->addr, 0, memhdr->len);
913 		if (ret) {
914 			TESTPMD_LOG(DEBUG,
915 				    "unable to DMA map addr 0x%p "
916 				    "for device %s\n",
917 				    memhdr->addr, dev_info.device->name);
918 		}
919 	}
920 }
921 #endif
922 
923 static unsigned int
924 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
925 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
926 {
927 	struct rte_pktmbuf_extmem *xmem;
928 	unsigned int ext_num, zone_num, elt_num;
929 	uint16_t elt_size;
930 
931 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
932 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
933 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
934 
935 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
936 	if (xmem == NULL) {
937 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
938 				 "external buffer descriptors\n");
939 		*ext_mem = NULL;
940 		return 0;
941 	}
942 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
943 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
944 		const struct rte_memzone *mz;
945 		char mz_name[RTE_MEMZONE_NAMESIZE];
946 		int ret;
947 
948 		ret = snprintf(mz_name, sizeof(mz_name),
949 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
950 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
951 			errno = ENAMETOOLONG;
952 			ext_num = 0;
953 			break;
954 		}
955 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
956 						 socket_id,
957 						 RTE_MEMZONE_IOVA_CONTIG |
958 						 RTE_MEMZONE_1GB |
959 						 RTE_MEMZONE_SIZE_HINT_ONLY,
960 						 EXTBUF_ZONE_SIZE);
961 		if (mz == NULL) {
962 			/*
963 			 * The caller exits on external buffer creation
964 			 * error, so there is no need to free memzones.
965 			 */
966 			errno = ENOMEM;
967 			ext_num = 0;
968 			break;
969 		}
970 		xseg->buf_ptr = mz->addr;
971 		xseg->buf_iova = mz->iova;
972 		xseg->buf_len = EXTBUF_ZONE_SIZE;
973 		xseg->elt_size = elt_size;
974 	}
975 	if (ext_num == 0 && xmem != NULL) {
976 		free(xmem);
977 		xmem = NULL;
978 	}
979 	*ext_mem = xmem;
980 	return ext_num;
981 }
982 
983 /*
984  * Configuration initialisation done once at init time.
985  */
986 static struct rte_mempool *
987 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
988 		 unsigned int socket_id, uint16_t size_idx)
989 {
990 	char pool_name[RTE_MEMPOOL_NAMESIZE];
991 	struct rte_mempool *rte_mp = NULL;
992 #ifndef RTE_EXEC_ENV_WINDOWS
993 	uint32_t mb_size;
994 
995 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
996 #endif
997 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
998 
999 	TESTPMD_LOG(INFO,
1000 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
1001 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
1002 
1003 	switch (mp_alloc_type) {
1004 	case MP_ALLOC_NATIVE:
1005 		{
1006 			/* wrapper to rte_mempool_create() */
1007 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1008 					rte_mbuf_best_mempool_ops());
1009 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1010 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
1011 			break;
1012 		}
1013 #ifndef RTE_EXEC_ENV_WINDOWS
1014 	case MP_ALLOC_ANON:
1015 		{
1016 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
1017 				mb_size, (unsigned int) mb_mempool_cache,
1018 				sizeof(struct rte_pktmbuf_pool_private),
1019 				socket_id, mempool_flags);
1020 			if (rte_mp == NULL)
1021 				goto err;
1022 
1023 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1024 				rte_mempool_free(rte_mp);
1025 				rte_mp = NULL;
1026 				goto err;
1027 			}
1028 			rte_pktmbuf_pool_init(rte_mp, NULL);
1029 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1030 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1031 			break;
1032 		}
1033 	case MP_ALLOC_XMEM:
1034 	case MP_ALLOC_XMEM_HUGE:
1035 		{
1036 			int heap_socket;
1037 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1038 
1039 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1040 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1041 
1042 			heap_socket =
1043 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1044 			if (heap_socket < 0)
1045 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1046 
1047 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1048 					rte_mbuf_best_mempool_ops());
1049 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1050 					mb_mempool_cache, 0, mbuf_seg_size,
1051 					heap_socket);
1052 			break;
1053 		}
1054 #endif
1055 	case MP_ALLOC_XBUF:
1056 		{
1057 			struct rte_pktmbuf_extmem *ext_mem;
1058 			unsigned int ext_num;
1059 
1060 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1061 					       socket_id, pool_name, &ext_mem);
1062 			if (ext_num == 0)
1063 				rte_exit(EXIT_FAILURE,
1064 					 "Can't create pinned data buffers\n");
1065 
1066 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1067 					rte_mbuf_best_mempool_ops());
1068 			rte_mp = rte_pktmbuf_pool_create_extbuf
1069 					(pool_name, nb_mbuf, mb_mempool_cache,
1070 					 0, mbuf_seg_size, socket_id,
1071 					 ext_mem, ext_num);
1072 			free(ext_mem);
1073 			break;
1074 		}
1075 	default:
1076 		{
1077 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1078 		}
1079 	}
1080 
1081 #ifndef RTE_EXEC_ENV_WINDOWS
1082 err:
1083 #endif
1084 	if (rte_mp == NULL) {
1085 		rte_exit(EXIT_FAILURE,
1086 			"Creation of mbuf pool for socket %u failed: %s\n",
1087 			socket_id, rte_strerror(rte_errno));
1088 	} else if (verbose_level > 0) {
1089 		rte_mempool_dump(stdout, rte_mp);
1090 	}
1091 	return rte_mp;
1092 }
1093 
1094 /*
1095  * Check given socket id is valid or not with NUMA mode,
1096  * if valid, return 0, else return -1
1097  */
1098 static int
1099 check_socket_id(const unsigned int socket_id)
1100 {
1101 	static int warning_once = 0;
1102 
1103 	if (new_socket_id(socket_id)) {
1104 		if (!warning_once && numa_support)
1105 			fprintf(stderr,
1106 				"Warning: NUMA should be configured manually by using --port-numa-config and --ring-numa-config parameters along with --numa.\n");
1107 		warning_once = 1;
1108 		return -1;
1109 	}
1110 	return 0;
1111 }
1112 
1113 /*
1114  * Get the allowed maximum number of RX queues.
1115  * *pid return the port id which has minimal value of
1116  * max_rx_queues in all ports.
1117  */
1118 queueid_t
1119 get_allowed_max_nb_rxq(portid_t *pid)
1120 {
1121 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1122 	bool max_rxq_valid = false;
1123 	portid_t pi;
1124 	struct rte_eth_dev_info dev_info;
1125 
1126 	RTE_ETH_FOREACH_DEV(pi) {
1127 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1128 			continue;
1129 
1130 		max_rxq_valid = true;
1131 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1132 			allowed_max_rxq = dev_info.max_rx_queues;
1133 			*pid = pi;
1134 		}
1135 	}
1136 	return max_rxq_valid ? allowed_max_rxq : 0;
1137 }
1138 
1139 /*
1140  * Check input rxq is valid or not.
1141  * If input rxq is not greater than any of maximum number
1142  * of RX queues of all ports, it is valid.
1143  * if valid, return 0, else return -1
1144  */
1145 int
1146 check_nb_rxq(queueid_t rxq)
1147 {
1148 	queueid_t allowed_max_rxq;
1149 	portid_t pid = 0;
1150 
1151 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1152 	if (rxq > allowed_max_rxq) {
1153 		fprintf(stderr,
1154 			"Fail: input rxq (%u) can't be greater than max_rx_queues (%u) of port %u\n",
1155 			rxq, allowed_max_rxq, pid);
1156 		return -1;
1157 	}
1158 	return 0;
1159 }
1160 
1161 /*
1162  * Get the allowed maximum number of TX queues.
1163  * *pid return the port id which has minimal value of
1164  * max_tx_queues in all ports.
1165  */
1166 queueid_t
1167 get_allowed_max_nb_txq(portid_t *pid)
1168 {
1169 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1170 	bool max_txq_valid = false;
1171 	portid_t pi;
1172 	struct rte_eth_dev_info dev_info;
1173 
1174 	RTE_ETH_FOREACH_DEV(pi) {
1175 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1176 			continue;
1177 
1178 		max_txq_valid = true;
1179 		if (dev_info.max_tx_queues < allowed_max_txq) {
1180 			allowed_max_txq = dev_info.max_tx_queues;
1181 			*pid = pi;
1182 		}
1183 	}
1184 	return max_txq_valid ? allowed_max_txq : 0;
1185 }
1186 
1187 /*
1188  * Check input txq is valid or not.
1189  * If input txq is not greater than any of maximum number
1190  * of TX queues of all ports, it is valid.
1191  * if valid, return 0, else return -1
1192  */
1193 int
1194 check_nb_txq(queueid_t txq)
1195 {
1196 	queueid_t allowed_max_txq;
1197 	portid_t pid = 0;
1198 
1199 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1200 	if (txq > allowed_max_txq) {
1201 		fprintf(stderr,
1202 			"Fail: input txq (%u) can't be greater than max_tx_queues (%u) of port %u\n",
1203 			txq, allowed_max_txq, pid);
1204 		return -1;
1205 	}
1206 	return 0;
1207 }
1208 
1209 /*
1210  * Get the allowed maximum number of RXDs of every rx queue.
1211  * *pid return the port id which has minimal value of
1212  * max_rxd in all queues of all ports.
1213  */
1214 static uint16_t
1215 get_allowed_max_nb_rxd(portid_t *pid)
1216 {
1217 	uint16_t allowed_max_rxd = UINT16_MAX;
1218 	portid_t pi;
1219 	struct rte_eth_dev_info dev_info;
1220 
1221 	RTE_ETH_FOREACH_DEV(pi) {
1222 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1223 			continue;
1224 
1225 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1226 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1227 			*pid = pi;
1228 		}
1229 	}
1230 	return allowed_max_rxd;
1231 }
1232 
1233 /*
1234  * Get the allowed minimal number of RXDs of every rx queue.
1235  * *pid return the port id which has minimal value of
1236  * min_rxd in all queues of all ports.
1237  */
1238 static uint16_t
1239 get_allowed_min_nb_rxd(portid_t *pid)
1240 {
1241 	uint16_t allowed_min_rxd = 0;
1242 	portid_t pi;
1243 	struct rte_eth_dev_info dev_info;
1244 
1245 	RTE_ETH_FOREACH_DEV(pi) {
1246 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1247 			continue;
1248 
1249 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1250 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1251 			*pid = pi;
1252 		}
1253 	}
1254 
1255 	return allowed_min_rxd;
1256 }
1257 
1258 /*
1259  * Check input rxd is valid or not.
1260  * If input rxd is not greater than any of maximum number
1261  * of RXDs of every Rx queues and is not less than any of
1262  * minimal number of RXDs of every Rx queues, it is valid.
1263  * if valid, return 0, else return -1
1264  */
1265 int
1266 check_nb_rxd(queueid_t rxd)
1267 {
1268 	uint16_t allowed_max_rxd;
1269 	uint16_t allowed_min_rxd;
1270 	portid_t pid = 0;
1271 
1272 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1273 	if (rxd > allowed_max_rxd) {
1274 		fprintf(stderr,
1275 			"Fail: input rxd (%u) can't be greater than max_rxds (%u) of port %u\n",
1276 			rxd, allowed_max_rxd, pid);
1277 		return -1;
1278 	}
1279 
1280 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1281 	if (rxd < allowed_min_rxd) {
1282 		fprintf(stderr,
1283 			"Fail: input rxd (%u) can't be less than min_rxds (%u) of port %u\n",
1284 			rxd, allowed_min_rxd, pid);
1285 		return -1;
1286 	}
1287 
1288 	return 0;
1289 }
1290 
1291 /*
1292  * Get the allowed maximum number of TXDs of every rx queues.
1293  * *pid return the port id which has minimal value of
1294  * max_txd in every tx queue.
1295  */
1296 static uint16_t
1297 get_allowed_max_nb_txd(portid_t *pid)
1298 {
1299 	uint16_t allowed_max_txd = UINT16_MAX;
1300 	portid_t pi;
1301 	struct rte_eth_dev_info dev_info;
1302 
1303 	RTE_ETH_FOREACH_DEV(pi) {
1304 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1305 			continue;
1306 
1307 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1308 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1309 			*pid = pi;
1310 		}
1311 	}
1312 	return allowed_max_txd;
1313 }
1314 
1315 /*
1316  * Get the allowed maximum number of TXDs of every tx queues.
1317  * *pid return the port id which has minimal value of
1318  * min_txd in every tx queue.
1319  */
1320 static uint16_t
1321 get_allowed_min_nb_txd(portid_t *pid)
1322 {
1323 	uint16_t allowed_min_txd = 0;
1324 	portid_t pi;
1325 	struct rte_eth_dev_info dev_info;
1326 
1327 	RTE_ETH_FOREACH_DEV(pi) {
1328 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1329 			continue;
1330 
1331 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1332 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1333 			*pid = pi;
1334 		}
1335 	}
1336 
1337 	return allowed_min_txd;
1338 }
1339 
1340 /*
1341  * Check input txd is valid or not.
1342  * If input txd is not greater than any of maximum number
1343  * of TXDs of every Rx queues, it is valid.
1344  * if valid, return 0, else return -1
1345  */
1346 int
1347 check_nb_txd(queueid_t txd)
1348 {
1349 	uint16_t allowed_max_txd;
1350 	uint16_t allowed_min_txd;
1351 	portid_t pid = 0;
1352 
1353 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1354 	if (txd > allowed_max_txd) {
1355 		fprintf(stderr,
1356 			"Fail: input txd (%u) can't be greater than max_txds (%u) of port %u\n",
1357 			txd, allowed_max_txd, pid);
1358 		return -1;
1359 	}
1360 
1361 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1362 	if (txd < allowed_min_txd) {
1363 		fprintf(stderr,
1364 			"Fail: input txd (%u) can't be less than min_txds (%u) of port %u\n",
1365 			txd, allowed_min_txd, pid);
1366 		return -1;
1367 	}
1368 	return 0;
1369 }
1370 
1371 
1372 /*
1373  * Get the allowed maximum number of hairpin queues.
1374  * *pid return the port id which has minimal value of
1375  * max_hairpin_queues in all ports.
1376  */
1377 queueid_t
1378 get_allowed_max_nb_hairpinq(portid_t *pid)
1379 {
1380 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1381 	portid_t pi;
1382 	struct rte_eth_hairpin_cap cap;
1383 
1384 	RTE_ETH_FOREACH_DEV(pi) {
1385 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1386 			*pid = pi;
1387 			return 0;
1388 		}
1389 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1390 			allowed_max_hairpinq = cap.max_nb_queues;
1391 			*pid = pi;
1392 		}
1393 	}
1394 	return allowed_max_hairpinq;
1395 }
1396 
1397 /*
1398  * Check input hairpin is valid or not.
1399  * If input hairpin is not greater than any of maximum number
1400  * of hairpin queues of all ports, it is valid.
1401  * if valid, return 0, else return -1
1402  */
1403 int
1404 check_nb_hairpinq(queueid_t hairpinq)
1405 {
1406 	queueid_t allowed_max_hairpinq;
1407 	portid_t pid = 0;
1408 
1409 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1410 	if (hairpinq > allowed_max_hairpinq) {
1411 		fprintf(stderr,
1412 			"Fail: input hairpin (%u) can't be greater than max_hairpin_queues (%u) of port %u\n",
1413 			hairpinq, allowed_max_hairpinq, pid);
1414 		return -1;
1415 	}
1416 	return 0;
1417 }
1418 
1419 static void
1420 init_config_port_offloads(portid_t pid, uint32_t socket_id)
1421 {
1422 	struct rte_port *port = &ports[pid];
1423 	uint16_t data_size;
1424 	int ret;
1425 	int i;
1426 
1427 	port->dev_conf.txmode = tx_mode;
1428 	port->dev_conf.rxmode = rx_mode;
1429 
1430 	ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1431 	if (ret != 0)
1432 		rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n");
1433 
1434 	ret = update_jumbo_frame_offload(pid);
1435 	if (ret != 0)
1436 		fprintf(stderr,
1437 			"Updating jumbo frame offload failed for port %u\n",
1438 			pid);
1439 
1440 	if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1441 		port->dev_conf.txmode.offloads &=
1442 			~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1443 
1444 	/* Apply Rx offloads configuration */
1445 	for (i = 0; i < port->dev_info.max_rx_queues; i++)
1446 		port->rx_conf[i].offloads = port->dev_conf.rxmode.offloads;
1447 	/* Apply Tx offloads configuration */
1448 	for (i = 0; i < port->dev_info.max_tx_queues; i++)
1449 		port->tx_conf[i].offloads = port->dev_conf.txmode.offloads;
1450 
1451 	if (eth_link_speed)
1452 		port->dev_conf.link_speeds = eth_link_speed;
1453 
1454 	/* set flag to initialize port/queue */
1455 	port->need_reconfig = 1;
1456 	port->need_reconfig_queues = 1;
1457 	port->socket_id = socket_id;
1458 	port->tx_metadata = 0;
1459 
1460 	/*
1461 	 * Check for maximum number of segments per MTU.
1462 	 * Accordingly update the mbuf data size.
1463 	 */
1464 	if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1465 	    port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1466 		data_size = rx_mode.max_rx_pkt_len /
1467 			port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1468 
1469 		if ((data_size + RTE_PKTMBUF_HEADROOM) > mbuf_data_size[0]) {
1470 			mbuf_data_size[0] = data_size + RTE_PKTMBUF_HEADROOM;
1471 			TESTPMD_LOG(WARNING,
1472 				    "Configured mbuf size of the first segment %hu\n",
1473 				    mbuf_data_size[0]);
1474 		}
1475 	}
1476 }
1477 
1478 static void
1479 init_config(void)
1480 {
1481 	portid_t pid;
1482 	struct rte_mempool *mbp;
1483 	unsigned int nb_mbuf_per_pool;
1484 	lcoreid_t  lc_id;
1485 	struct rte_gro_param gro_param;
1486 	uint32_t gso_types;
1487 
1488 	/* Configuration of logical cores. */
1489 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1490 				sizeof(struct fwd_lcore *) * nb_lcores,
1491 				RTE_CACHE_LINE_SIZE);
1492 	if (fwd_lcores == NULL) {
1493 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1494 							"failed\n", nb_lcores);
1495 	}
1496 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1497 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1498 					       sizeof(struct fwd_lcore),
1499 					       RTE_CACHE_LINE_SIZE);
1500 		if (fwd_lcores[lc_id] == NULL) {
1501 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1502 								"failed\n");
1503 		}
1504 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1505 	}
1506 
1507 	RTE_ETH_FOREACH_DEV(pid) {
1508 		uint32_t socket_id;
1509 
1510 		if (numa_support) {
1511 			socket_id = port_numa[pid];
1512 			if (port_numa[pid] == NUMA_NO_CONFIG) {
1513 				socket_id = rte_eth_dev_socket_id(pid);
1514 
1515 				/*
1516 				 * if socket_id is invalid,
1517 				 * set to the first available socket.
1518 				 */
1519 				if (check_socket_id(socket_id) < 0)
1520 					socket_id = socket_ids[0];
1521 			}
1522 		} else {
1523 			socket_id = (socket_num == UMA_NO_CONFIG) ?
1524 				    0 : socket_num;
1525 		}
1526 		/* Apply default TxRx configuration for all ports */
1527 		init_config_port_offloads(pid, socket_id);
1528 	}
1529 	/*
1530 	 * Create pools of mbuf.
1531 	 * If NUMA support is disabled, create a single pool of mbuf in
1532 	 * socket 0 memory by default.
1533 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1534 	 *
1535 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1536 	 * nb_txd can be configured at run time.
1537 	 */
1538 	if (param_total_num_mbufs)
1539 		nb_mbuf_per_pool = param_total_num_mbufs;
1540 	else {
1541 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1542 			(nb_lcores * mb_mempool_cache) +
1543 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1544 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1545 	}
1546 
1547 	if (numa_support) {
1548 		uint8_t i, j;
1549 
1550 		for (i = 0; i < num_sockets; i++)
1551 			for (j = 0; j < mbuf_data_size_n; j++)
1552 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1553 					mbuf_pool_create(mbuf_data_size[j],
1554 							  nb_mbuf_per_pool,
1555 							  socket_ids[i], j);
1556 	} else {
1557 		uint8_t i;
1558 
1559 		for (i = 0; i < mbuf_data_size_n; i++)
1560 			mempools[i] = mbuf_pool_create
1561 					(mbuf_data_size[i],
1562 					 nb_mbuf_per_pool,
1563 					 socket_num == UMA_NO_CONFIG ?
1564 					 0 : socket_num, i);
1565 	}
1566 
1567 	init_port_config();
1568 
1569 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1570 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1571 	/*
1572 	 * Records which Mbuf pool to use by each logical core, if needed.
1573 	 */
1574 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1575 		mbp = mbuf_pool_find(
1576 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1577 
1578 		if (mbp == NULL)
1579 			mbp = mbuf_pool_find(0, 0);
1580 		fwd_lcores[lc_id]->mbp = mbp;
1581 		/* initialize GSO context */
1582 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1583 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1584 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1585 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1586 			RTE_ETHER_CRC_LEN;
1587 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1588 	}
1589 
1590 	fwd_config_setup();
1591 
1592 	/* create a gro context for each lcore */
1593 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1594 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1595 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1596 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1597 		gro_param.socket_id = rte_lcore_to_socket_id(
1598 				fwd_lcores_cpuids[lc_id]);
1599 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1600 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1601 			rte_exit(EXIT_FAILURE,
1602 					"rte_gro_ctx_create() failed\n");
1603 		}
1604 	}
1605 }
1606 
1607 
1608 void
1609 reconfig(portid_t new_port_id, unsigned socket_id)
1610 {
1611 	/* Reconfiguration of Ethernet ports. */
1612 	init_config_port_offloads(new_port_id, socket_id);
1613 	init_port_config();
1614 }
1615 
1616 
1617 int
1618 init_fwd_streams(void)
1619 {
1620 	portid_t pid;
1621 	struct rte_port *port;
1622 	streamid_t sm_id, nb_fwd_streams_new;
1623 	queueid_t q;
1624 
1625 	/* set socket id according to numa or not */
1626 	RTE_ETH_FOREACH_DEV(pid) {
1627 		port = &ports[pid];
1628 		if (nb_rxq > port->dev_info.max_rx_queues) {
1629 			fprintf(stderr,
1630 				"Fail: nb_rxq(%d) is greater than max_rx_queues(%d)\n",
1631 				nb_rxq, port->dev_info.max_rx_queues);
1632 			return -1;
1633 		}
1634 		if (nb_txq > port->dev_info.max_tx_queues) {
1635 			fprintf(stderr,
1636 				"Fail: nb_txq(%d) is greater than max_tx_queues(%d)\n",
1637 				nb_txq, port->dev_info.max_tx_queues);
1638 			return -1;
1639 		}
1640 		if (numa_support) {
1641 			if (port_numa[pid] != NUMA_NO_CONFIG)
1642 				port->socket_id = port_numa[pid];
1643 			else {
1644 				port->socket_id = rte_eth_dev_socket_id(pid);
1645 
1646 				/*
1647 				 * if socket_id is invalid,
1648 				 * set to the first available socket.
1649 				 */
1650 				if (check_socket_id(port->socket_id) < 0)
1651 					port->socket_id = socket_ids[0];
1652 			}
1653 		}
1654 		else {
1655 			if (socket_num == UMA_NO_CONFIG)
1656 				port->socket_id = 0;
1657 			else
1658 				port->socket_id = socket_num;
1659 		}
1660 	}
1661 
1662 	q = RTE_MAX(nb_rxq, nb_txq);
1663 	if (q == 0) {
1664 		fprintf(stderr,
1665 			"Fail: Cannot allocate fwd streams as number of queues is 0\n");
1666 		return -1;
1667 	}
1668 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1669 	if (nb_fwd_streams_new == nb_fwd_streams)
1670 		return 0;
1671 	/* clear the old */
1672 	if (fwd_streams != NULL) {
1673 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1674 			if (fwd_streams[sm_id] == NULL)
1675 				continue;
1676 			rte_free(fwd_streams[sm_id]);
1677 			fwd_streams[sm_id] = NULL;
1678 		}
1679 		rte_free(fwd_streams);
1680 		fwd_streams = NULL;
1681 	}
1682 
1683 	/* init new */
1684 	nb_fwd_streams = nb_fwd_streams_new;
1685 	if (nb_fwd_streams) {
1686 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1687 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1688 			RTE_CACHE_LINE_SIZE);
1689 		if (fwd_streams == NULL)
1690 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1691 				 " (struct fwd_stream *)) failed\n",
1692 				 nb_fwd_streams);
1693 
1694 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1695 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1696 				" struct fwd_stream", sizeof(struct fwd_stream),
1697 				RTE_CACHE_LINE_SIZE);
1698 			if (fwd_streams[sm_id] == NULL)
1699 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1700 					 "(struct fwd_stream) failed\n");
1701 		}
1702 	}
1703 
1704 	return 0;
1705 }
1706 
1707 static void
1708 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1709 {
1710 	uint64_t total_burst, sburst;
1711 	uint64_t nb_burst;
1712 	uint64_t burst_stats[4];
1713 	uint16_t pktnb_stats[4];
1714 	uint16_t nb_pkt;
1715 	int burst_percent[4], sburstp;
1716 	int i;
1717 
1718 	/*
1719 	 * First compute the total number of packet bursts and the
1720 	 * two highest numbers of bursts of the same number of packets.
1721 	 */
1722 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1723 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1724 
1725 	/* Show stats for 0 burst size always */
1726 	total_burst = pbs->pkt_burst_spread[0];
1727 	burst_stats[0] = pbs->pkt_burst_spread[0];
1728 	pktnb_stats[0] = 0;
1729 
1730 	/* Find the next 2 burst sizes with highest occurrences. */
1731 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1732 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1733 
1734 		if (nb_burst == 0)
1735 			continue;
1736 
1737 		total_burst += nb_burst;
1738 
1739 		if (nb_burst > burst_stats[1]) {
1740 			burst_stats[2] = burst_stats[1];
1741 			pktnb_stats[2] = pktnb_stats[1];
1742 			burst_stats[1] = nb_burst;
1743 			pktnb_stats[1] = nb_pkt;
1744 		} else if (nb_burst > burst_stats[2]) {
1745 			burst_stats[2] = nb_burst;
1746 			pktnb_stats[2] = nb_pkt;
1747 		}
1748 	}
1749 	if (total_burst == 0)
1750 		return;
1751 
1752 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1753 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1754 		if (i == 3) {
1755 			printf("%d%% of other]\n", 100 - sburstp);
1756 			return;
1757 		}
1758 
1759 		sburst += burst_stats[i];
1760 		if (sburst == total_burst) {
1761 			printf("%d%% of %d pkts]\n",
1762 				100 - sburstp, (int) pktnb_stats[i]);
1763 			return;
1764 		}
1765 
1766 		burst_percent[i] =
1767 			(double)burst_stats[i] / total_burst * 100;
1768 		printf("%d%% of %d pkts + ",
1769 			burst_percent[i], (int) pktnb_stats[i]);
1770 		sburstp += burst_percent[i];
1771 	}
1772 }
1773 
1774 static void
1775 fwd_stream_stats_display(streamid_t stream_id)
1776 {
1777 	struct fwd_stream *fs;
1778 	static const char *fwd_top_stats_border = "-------";
1779 
1780 	fs = fwd_streams[stream_id];
1781 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1782 	    (fs->fwd_dropped == 0))
1783 		return;
1784 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1785 	       "TX Port=%2d/Queue=%2d %s\n",
1786 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1787 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1788 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1789 	       " TX-dropped: %-14"PRIu64,
1790 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1791 
1792 	/* if checksum mode */
1793 	if (cur_fwd_eng == &csum_fwd_engine) {
1794 		printf("  RX- bad IP checksum: %-14"PRIu64
1795 		       "  Rx- bad L4 checksum: %-14"PRIu64
1796 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1797 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1798 			fs->rx_bad_outer_l4_csum);
1799 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1800 			fs->rx_bad_outer_ip_csum);
1801 	} else {
1802 		printf("\n");
1803 	}
1804 
1805 	if (record_burst_stats) {
1806 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1807 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1808 	}
1809 }
1810 
1811 void
1812 fwd_stats_display(void)
1813 {
1814 	static const char *fwd_stats_border = "----------------------";
1815 	static const char *acc_stats_border = "+++++++++++++++";
1816 	struct {
1817 		struct fwd_stream *rx_stream;
1818 		struct fwd_stream *tx_stream;
1819 		uint64_t tx_dropped;
1820 		uint64_t rx_bad_ip_csum;
1821 		uint64_t rx_bad_l4_csum;
1822 		uint64_t rx_bad_outer_l4_csum;
1823 		uint64_t rx_bad_outer_ip_csum;
1824 	} ports_stats[RTE_MAX_ETHPORTS];
1825 	uint64_t total_rx_dropped = 0;
1826 	uint64_t total_tx_dropped = 0;
1827 	uint64_t total_rx_nombuf = 0;
1828 	struct rte_eth_stats stats;
1829 	uint64_t fwd_cycles = 0;
1830 	uint64_t total_recv = 0;
1831 	uint64_t total_xmit = 0;
1832 	struct rte_port *port;
1833 	streamid_t sm_id;
1834 	portid_t pt_id;
1835 	int i;
1836 
1837 	memset(ports_stats, 0, sizeof(ports_stats));
1838 
1839 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1840 		struct fwd_stream *fs = fwd_streams[sm_id];
1841 
1842 		if (cur_fwd_config.nb_fwd_streams >
1843 		    cur_fwd_config.nb_fwd_ports) {
1844 			fwd_stream_stats_display(sm_id);
1845 		} else {
1846 			ports_stats[fs->tx_port].tx_stream = fs;
1847 			ports_stats[fs->rx_port].rx_stream = fs;
1848 		}
1849 
1850 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1851 
1852 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1853 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1854 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1855 				fs->rx_bad_outer_l4_csum;
1856 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
1857 				fs->rx_bad_outer_ip_csum;
1858 
1859 		if (record_core_cycles)
1860 			fwd_cycles += fs->core_cycles;
1861 	}
1862 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1863 		pt_id = fwd_ports_ids[i];
1864 		port = &ports[pt_id];
1865 
1866 		rte_eth_stats_get(pt_id, &stats);
1867 		stats.ipackets -= port->stats.ipackets;
1868 		stats.opackets -= port->stats.opackets;
1869 		stats.ibytes -= port->stats.ibytes;
1870 		stats.obytes -= port->stats.obytes;
1871 		stats.imissed -= port->stats.imissed;
1872 		stats.oerrors -= port->stats.oerrors;
1873 		stats.rx_nombuf -= port->stats.rx_nombuf;
1874 
1875 		total_recv += stats.ipackets;
1876 		total_xmit += stats.opackets;
1877 		total_rx_dropped += stats.imissed;
1878 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
1879 		total_tx_dropped += stats.oerrors;
1880 		total_rx_nombuf  += stats.rx_nombuf;
1881 
1882 		printf("\n  %s Forward statistics for port %-2d %s\n",
1883 		       fwd_stats_border, pt_id, fwd_stats_border);
1884 
1885 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
1886 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
1887 		       stats.ipackets + stats.imissed);
1888 
1889 		if (cur_fwd_eng == &csum_fwd_engine) {
1890 			printf("  Bad-ipcsum: %-14"PRIu64
1891 			       " Bad-l4csum: %-14"PRIu64
1892 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
1893 			       ports_stats[pt_id].rx_bad_ip_csum,
1894 			       ports_stats[pt_id].rx_bad_l4_csum,
1895 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
1896 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
1897 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
1898 		}
1899 		if (stats.ierrors + stats.rx_nombuf > 0) {
1900 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
1901 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
1902 		}
1903 
1904 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
1905 		       "TX-total: %-"PRIu64"\n",
1906 		       stats.opackets, ports_stats[pt_id].tx_dropped,
1907 		       stats.opackets + ports_stats[pt_id].tx_dropped);
1908 
1909 		if (record_burst_stats) {
1910 			if (ports_stats[pt_id].rx_stream)
1911 				pkt_burst_stats_display("RX",
1912 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
1913 			if (ports_stats[pt_id].tx_stream)
1914 				pkt_burst_stats_display("TX",
1915 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
1916 		}
1917 
1918 		printf("  %s--------------------------------%s\n",
1919 		       fwd_stats_border, fwd_stats_border);
1920 	}
1921 
1922 	printf("\n  %s Accumulated forward statistics for all ports"
1923 	       "%s\n",
1924 	       acc_stats_border, acc_stats_border);
1925 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1926 	       "%-"PRIu64"\n"
1927 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1928 	       "%-"PRIu64"\n",
1929 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1930 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1931 	if (total_rx_nombuf > 0)
1932 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1933 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1934 	       "%s\n",
1935 	       acc_stats_border, acc_stats_border);
1936 	if (record_core_cycles) {
1937 #define CYC_PER_MHZ 1E6
1938 		if (total_recv > 0 || total_xmit > 0) {
1939 			uint64_t total_pkts = 0;
1940 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
1941 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
1942 				total_pkts = total_xmit;
1943 			else
1944 				total_pkts = total_recv;
1945 
1946 			printf("\n  CPU cycles/packet=%.2F (total cycles="
1947 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
1948 			       " MHz Clock\n",
1949 			       (double) fwd_cycles / total_pkts,
1950 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
1951 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
1952 		}
1953 	}
1954 }
1955 
1956 void
1957 fwd_stats_reset(void)
1958 {
1959 	streamid_t sm_id;
1960 	portid_t pt_id;
1961 	int i;
1962 
1963 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1964 		pt_id = fwd_ports_ids[i];
1965 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
1966 	}
1967 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1968 		struct fwd_stream *fs = fwd_streams[sm_id];
1969 
1970 		fs->rx_packets = 0;
1971 		fs->tx_packets = 0;
1972 		fs->fwd_dropped = 0;
1973 		fs->rx_bad_ip_csum = 0;
1974 		fs->rx_bad_l4_csum = 0;
1975 		fs->rx_bad_outer_l4_csum = 0;
1976 		fs->rx_bad_outer_ip_csum = 0;
1977 
1978 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
1979 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
1980 		fs->core_cycles = 0;
1981 	}
1982 }
1983 
1984 static void
1985 flush_fwd_rx_queues(void)
1986 {
1987 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1988 	portid_t  rxp;
1989 	portid_t port_id;
1990 	queueid_t rxq;
1991 	uint16_t  nb_rx;
1992 	uint16_t  i;
1993 	uint8_t   j;
1994 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
1995 	uint64_t timer_period;
1996 
1997 	/* convert to number of cycles */
1998 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
1999 
2000 	for (j = 0; j < 2; j++) {
2001 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2002 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2003 				port_id = fwd_ports_ids[rxp];
2004 				/**
2005 				* testpmd can stuck in the below do while loop
2006 				* if rte_eth_rx_burst() always returns nonzero
2007 				* packets. So timer is added to exit this loop
2008 				* after 1sec timer expiry.
2009 				*/
2010 				prev_tsc = rte_rdtsc();
2011 				do {
2012 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2013 						pkts_burst, MAX_PKT_BURST);
2014 					for (i = 0; i < nb_rx; i++)
2015 						rte_pktmbuf_free(pkts_burst[i]);
2016 
2017 					cur_tsc = rte_rdtsc();
2018 					diff_tsc = cur_tsc - prev_tsc;
2019 					timer_tsc += diff_tsc;
2020 				} while ((nb_rx > 0) &&
2021 					(timer_tsc < timer_period));
2022 				timer_tsc = 0;
2023 			}
2024 		}
2025 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2026 	}
2027 }
2028 
2029 static void
2030 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2031 {
2032 	struct fwd_stream **fsm;
2033 	streamid_t nb_fs;
2034 	streamid_t sm_id;
2035 #ifdef RTE_LIB_BITRATESTATS
2036 	uint64_t tics_per_1sec;
2037 	uint64_t tics_datum;
2038 	uint64_t tics_current;
2039 	uint16_t i, cnt_ports;
2040 
2041 	cnt_ports = nb_ports;
2042 	tics_datum = rte_rdtsc();
2043 	tics_per_1sec = rte_get_timer_hz();
2044 #endif
2045 	fsm = &fwd_streams[fc->stream_idx];
2046 	nb_fs = fc->stream_nb;
2047 	do {
2048 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2049 			(*pkt_fwd)(fsm[sm_id]);
2050 #ifdef RTE_LIB_BITRATESTATS
2051 		if (bitrate_enabled != 0 &&
2052 				bitrate_lcore_id == rte_lcore_id()) {
2053 			tics_current = rte_rdtsc();
2054 			if (tics_current - tics_datum >= tics_per_1sec) {
2055 				/* Periodic bitrate calculation */
2056 				for (i = 0; i < cnt_ports; i++)
2057 					rte_stats_bitrate_calc(bitrate_data,
2058 						ports_ids[i]);
2059 				tics_datum = tics_current;
2060 			}
2061 		}
2062 #endif
2063 #ifdef RTE_LIB_LATENCYSTATS
2064 		if (latencystats_enabled != 0 &&
2065 				latencystats_lcore_id == rte_lcore_id())
2066 			rte_latencystats_update();
2067 #endif
2068 
2069 	} while (! fc->stopped);
2070 }
2071 
2072 static int
2073 start_pkt_forward_on_core(void *fwd_arg)
2074 {
2075 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2076 			     cur_fwd_config.fwd_eng->packet_fwd);
2077 	return 0;
2078 }
2079 
2080 /*
2081  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2082  * Used to start communication flows in network loopback test configurations.
2083  */
2084 static int
2085 run_one_txonly_burst_on_core(void *fwd_arg)
2086 {
2087 	struct fwd_lcore *fwd_lc;
2088 	struct fwd_lcore tmp_lcore;
2089 
2090 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2091 	tmp_lcore = *fwd_lc;
2092 	tmp_lcore.stopped = 1;
2093 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2094 	return 0;
2095 }
2096 
2097 /*
2098  * Launch packet forwarding:
2099  *     - Setup per-port forwarding context.
2100  *     - launch logical cores with their forwarding configuration.
2101  */
2102 static void
2103 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2104 {
2105 	port_fwd_begin_t port_fwd_begin;
2106 	unsigned int i;
2107 	unsigned int lc_id;
2108 	int diag;
2109 
2110 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2111 	if (port_fwd_begin != NULL) {
2112 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2113 			(*port_fwd_begin)(fwd_ports_ids[i]);
2114 	}
2115 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2116 		lc_id = fwd_lcores_cpuids[i];
2117 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2118 			fwd_lcores[i]->stopped = 0;
2119 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2120 						     fwd_lcores[i], lc_id);
2121 			if (diag != 0)
2122 				fprintf(stderr,
2123 					"launch lcore %u failed - diag=%d\n",
2124 					lc_id, diag);
2125 		}
2126 	}
2127 }
2128 
2129 /*
2130  * Launch packet forwarding configuration.
2131  */
2132 void
2133 start_packet_forwarding(int with_tx_first)
2134 {
2135 	port_fwd_begin_t port_fwd_begin;
2136 	port_fwd_end_t  port_fwd_end;
2137 	unsigned int i;
2138 
2139 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2140 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2141 
2142 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2143 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2144 
2145 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2146 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2147 		(!nb_rxq || !nb_txq))
2148 		rte_exit(EXIT_FAILURE,
2149 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2150 			cur_fwd_eng->fwd_mode_name);
2151 
2152 	if (all_ports_started() == 0) {
2153 		fprintf(stderr, "Not all ports were started\n");
2154 		return;
2155 	}
2156 	if (test_done == 0) {
2157 		fprintf(stderr, "Packet forwarding already started\n");
2158 		return;
2159 	}
2160 	test_done = 0;
2161 
2162 	fwd_config_setup();
2163 
2164 	if(!no_flush_rx)
2165 		flush_fwd_rx_queues();
2166 
2167 	pkt_fwd_config_display(&cur_fwd_config);
2168 	rxtx_config_display();
2169 
2170 	fwd_stats_reset();
2171 	if (with_tx_first) {
2172 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2173 		if (port_fwd_begin != NULL) {
2174 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2175 				(*port_fwd_begin)(fwd_ports_ids[i]);
2176 		}
2177 		while (with_tx_first--) {
2178 			launch_packet_forwarding(
2179 					run_one_txonly_burst_on_core);
2180 			rte_eal_mp_wait_lcore();
2181 		}
2182 		port_fwd_end = tx_only_engine.port_fwd_end;
2183 		if (port_fwd_end != NULL) {
2184 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2185 				(*port_fwd_end)(fwd_ports_ids[i]);
2186 		}
2187 	}
2188 	launch_packet_forwarding(start_pkt_forward_on_core);
2189 }
2190 
2191 void
2192 stop_packet_forwarding(void)
2193 {
2194 	port_fwd_end_t port_fwd_end;
2195 	lcoreid_t lc_id;
2196 	portid_t pt_id;
2197 	int i;
2198 
2199 	if (test_done) {
2200 		fprintf(stderr, "Packet forwarding not started\n");
2201 		return;
2202 	}
2203 	printf("Telling cores to stop...");
2204 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2205 		fwd_lcores[lc_id]->stopped = 1;
2206 	printf("\nWaiting for lcores to finish...\n");
2207 	rte_eal_mp_wait_lcore();
2208 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2209 	if (port_fwd_end != NULL) {
2210 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2211 			pt_id = fwd_ports_ids[i];
2212 			(*port_fwd_end)(pt_id);
2213 		}
2214 	}
2215 
2216 	fwd_stats_display();
2217 
2218 	printf("\nDone.\n");
2219 	test_done = 1;
2220 }
2221 
2222 void
2223 dev_set_link_up(portid_t pid)
2224 {
2225 	if (rte_eth_dev_set_link_up(pid) < 0)
2226 		fprintf(stderr, "\nSet link up fail.\n");
2227 }
2228 
2229 void
2230 dev_set_link_down(portid_t pid)
2231 {
2232 	if (rte_eth_dev_set_link_down(pid) < 0)
2233 		fprintf(stderr, "\nSet link down fail.\n");
2234 }
2235 
2236 static int
2237 all_ports_started(void)
2238 {
2239 	portid_t pi;
2240 	struct rte_port *port;
2241 
2242 	RTE_ETH_FOREACH_DEV(pi) {
2243 		port = &ports[pi];
2244 		/* Check if there is a port which is not started */
2245 		if ((port->port_status != RTE_PORT_STARTED) &&
2246 			(port->slave_flag == 0))
2247 			return 0;
2248 	}
2249 
2250 	/* No port is not started */
2251 	return 1;
2252 }
2253 
2254 int
2255 port_is_stopped(portid_t port_id)
2256 {
2257 	struct rte_port *port = &ports[port_id];
2258 
2259 	if ((port->port_status != RTE_PORT_STOPPED) &&
2260 	    (port->slave_flag == 0))
2261 		return 0;
2262 	return 1;
2263 }
2264 
2265 int
2266 all_ports_stopped(void)
2267 {
2268 	portid_t pi;
2269 
2270 	RTE_ETH_FOREACH_DEV(pi) {
2271 		if (!port_is_stopped(pi))
2272 			return 0;
2273 	}
2274 
2275 	return 1;
2276 }
2277 
2278 int
2279 port_is_started(portid_t port_id)
2280 {
2281 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2282 		return 0;
2283 
2284 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2285 		return 0;
2286 
2287 	return 1;
2288 }
2289 
2290 /* Configure the Rx and Tx hairpin queues for the selected port. */
2291 static int
2292 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2293 {
2294 	queueid_t qi;
2295 	struct rte_eth_hairpin_conf hairpin_conf = {
2296 		.peer_count = 1,
2297 	};
2298 	int i;
2299 	int diag;
2300 	struct rte_port *port = &ports[pi];
2301 	uint16_t peer_rx_port = pi;
2302 	uint16_t peer_tx_port = pi;
2303 	uint32_t manual = 1;
2304 	uint32_t tx_exp = hairpin_mode & 0x10;
2305 
2306 	if (!(hairpin_mode & 0xf)) {
2307 		peer_rx_port = pi;
2308 		peer_tx_port = pi;
2309 		manual = 0;
2310 	} else if (hairpin_mode & 0x1) {
2311 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2312 						       RTE_ETH_DEV_NO_OWNER);
2313 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2314 			peer_tx_port = rte_eth_find_next_owned_by(0,
2315 						RTE_ETH_DEV_NO_OWNER);
2316 		if (p_pi != RTE_MAX_ETHPORTS) {
2317 			peer_rx_port = p_pi;
2318 		} else {
2319 			uint16_t next_pi;
2320 
2321 			/* Last port will be the peer RX port of the first. */
2322 			RTE_ETH_FOREACH_DEV(next_pi)
2323 				peer_rx_port = next_pi;
2324 		}
2325 		manual = 1;
2326 	} else if (hairpin_mode & 0x2) {
2327 		if (cnt_pi & 0x1) {
2328 			peer_rx_port = p_pi;
2329 		} else {
2330 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2331 						RTE_ETH_DEV_NO_OWNER);
2332 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2333 				peer_rx_port = pi;
2334 		}
2335 		peer_tx_port = peer_rx_port;
2336 		manual = 1;
2337 	}
2338 
2339 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2340 		hairpin_conf.peers[0].port = peer_rx_port;
2341 		hairpin_conf.peers[0].queue = i + nb_rxq;
2342 		hairpin_conf.manual_bind = !!manual;
2343 		hairpin_conf.tx_explicit = !!tx_exp;
2344 		diag = rte_eth_tx_hairpin_queue_setup
2345 			(pi, qi, nb_txd, &hairpin_conf);
2346 		i++;
2347 		if (diag == 0)
2348 			continue;
2349 
2350 		/* Fail to setup rx queue, return */
2351 		if (rte_atomic16_cmpset(&(port->port_status),
2352 					RTE_PORT_HANDLING,
2353 					RTE_PORT_STOPPED) == 0)
2354 			fprintf(stderr,
2355 				"Port %d can not be set back to stopped\n", pi);
2356 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2357 			pi);
2358 		/* try to reconfigure queues next time */
2359 		port->need_reconfig_queues = 1;
2360 		return -1;
2361 	}
2362 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2363 		hairpin_conf.peers[0].port = peer_tx_port;
2364 		hairpin_conf.peers[0].queue = i + nb_txq;
2365 		hairpin_conf.manual_bind = !!manual;
2366 		hairpin_conf.tx_explicit = !!tx_exp;
2367 		diag = rte_eth_rx_hairpin_queue_setup
2368 			(pi, qi, nb_rxd, &hairpin_conf);
2369 		i++;
2370 		if (diag == 0)
2371 			continue;
2372 
2373 		/* Fail to setup rx queue, return */
2374 		if (rte_atomic16_cmpset(&(port->port_status),
2375 					RTE_PORT_HANDLING,
2376 					RTE_PORT_STOPPED) == 0)
2377 			fprintf(stderr,
2378 				"Port %d can not be set back to stopped\n", pi);
2379 		fprintf(stderr, "Fail to configure port %d hairpin queues\n",
2380 			pi);
2381 		/* try to reconfigure queues next time */
2382 		port->need_reconfig_queues = 1;
2383 		return -1;
2384 	}
2385 	return 0;
2386 }
2387 
2388 /* Configure the Rx with optional split. */
2389 int
2390 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2391 	       uint16_t nb_rx_desc, unsigned int socket_id,
2392 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2393 {
2394 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2395 	unsigned int i, mp_n;
2396 	int ret;
2397 
2398 	if (rx_pkt_nb_segs <= 1 ||
2399 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2400 		rx_conf->rx_seg = NULL;
2401 		rx_conf->rx_nseg = 0;
2402 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2403 					     nb_rx_desc, socket_id,
2404 					     rx_conf, mp);
2405 		return ret;
2406 	}
2407 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2408 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2409 		struct rte_mempool *mpx;
2410 		/*
2411 		 * Use last valid pool for the segments with number
2412 		 * exceeding the pool index.
2413 		 */
2414 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2415 		mpx = mbuf_pool_find(socket_id, mp_n);
2416 		/* Handle zero as mbuf data buffer size. */
2417 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2418 				   rx_pkt_seg_lengths[i] :
2419 				   mbuf_data_size[mp_n];
2420 		rx_seg->offset = i < rx_pkt_nb_offs ?
2421 				   rx_pkt_seg_offsets[i] : 0;
2422 		rx_seg->mp = mpx ? mpx : mp;
2423 	}
2424 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2425 	rx_conf->rx_seg = rx_useg;
2426 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2427 				    socket_id, rx_conf, NULL);
2428 	rx_conf->rx_seg = NULL;
2429 	rx_conf->rx_nseg = 0;
2430 	return ret;
2431 }
2432 
2433 int
2434 start_port(portid_t pid)
2435 {
2436 	int diag, need_check_link_status = -1;
2437 	portid_t pi;
2438 	portid_t p_pi = RTE_MAX_ETHPORTS;
2439 	portid_t pl[RTE_MAX_ETHPORTS];
2440 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2441 	uint16_t cnt_pi = 0;
2442 	uint16_t cfg_pi = 0;
2443 	int peer_pi;
2444 	queueid_t qi;
2445 	struct rte_port *port;
2446 	struct rte_eth_hairpin_cap cap;
2447 
2448 	if (port_id_is_invalid(pid, ENABLED_WARN))
2449 		return 0;
2450 
2451 	RTE_ETH_FOREACH_DEV(pi) {
2452 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2453 			continue;
2454 
2455 		need_check_link_status = 0;
2456 		port = &ports[pi];
2457 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2458 						 RTE_PORT_HANDLING) == 0) {
2459 			fprintf(stderr, "Port %d is now not stopped\n", pi);
2460 			continue;
2461 		}
2462 
2463 		if (port->need_reconfig > 0) {
2464 			port->need_reconfig = 0;
2465 
2466 			if (flow_isolate_all) {
2467 				int ret = port_flow_isolate(pi, 1);
2468 				if (ret) {
2469 					fprintf(stderr,
2470 						"Failed to apply isolated mode on port %d\n",
2471 						pi);
2472 					return -1;
2473 				}
2474 			}
2475 			configure_rxtx_dump_callbacks(0);
2476 			printf("Configuring Port %d (socket %u)\n", pi,
2477 					port->socket_id);
2478 			if (nb_hairpinq > 0 &&
2479 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2480 				fprintf(stderr,
2481 					"Port %d doesn't support hairpin queues\n",
2482 					pi);
2483 				return -1;
2484 			}
2485 			/* configure port */
2486 			diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2487 						     nb_txq + nb_hairpinq,
2488 						     &(port->dev_conf));
2489 			if (diag != 0) {
2490 				if (rte_atomic16_cmpset(&(port->port_status),
2491 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2492 					fprintf(stderr,
2493 						"Port %d can not be set back to stopped\n",
2494 						pi);
2495 				fprintf(stderr, "Fail to configure port %d\n",
2496 					pi);
2497 				/* try to reconfigure port next time */
2498 				port->need_reconfig = 1;
2499 				return -1;
2500 			}
2501 		}
2502 		if (port->need_reconfig_queues > 0) {
2503 			port->need_reconfig_queues = 0;
2504 			/* setup tx queues */
2505 			for (qi = 0; qi < nb_txq; qi++) {
2506 				if ((numa_support) &&
2507 					(txring_numa[pi] != NUMA_NO_CONFIG))
2508 					diag = rte_eth_tx_queue_setup(pi, qi,
2509 						port->nb_tx_desc[qi],
2510 						txring_numa[pi],
2511 						&(port->tx_conf[qi]));
2512 				else
2513 					diag = rte_eth_tx_queue_setup(pi, qi,
2514 						port->nb_tx_desc[qi],
2515 						port->socket_id,
2516 						&(port->tx_conf[qi]));
2517 
2518 				if (diag == 0)
2519 					continue;
2520 
2521 				/* Fail to setup tx queue, return */
2522 				if (rte_atomic16_cmpset(&(port->port_status),
2523 							RTE_PORT_HANDLING,
2524 							RTE_PORT_STOPPED) == 0)
2525 					fprintf(stderr,
2526 						"Port %d can not be set back to stopped\n",
2527 						pi);
2528 				fprintf(stderr,
2529 					"Fail to configure port %d tx queues\n",
2530 					pi);
2531 				/* try to reconfigure queues next time */
2532 				port->need_reconfig_queues = 1;
2533 				return -1;
2534 			}
2535 			for (qi = 0; qi < nb_rxq; qi++) {
2536 				/* setup rx queues */
2537 				if ((numa_support) &&
2538 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2539 					struct rte_mempool * mp =
2540 						mbuf_pool_find
2541 							(rxring_numa[pi], 0);
2542 					if (mp == NULL) {
2543 						fprintf(stderr,
2544 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2545 							rxring_numa[pi]);
2546 						return -1;
2547 					}
2548 
2549 					diag = rx_queue_setup(pi, qi,
2550 					     port->nb_rx_desc[qi],
2551 					     rxring_numa[pi],
2552 					     &(port->rx_conf[qi]),
2553 					     mp);
2554 				} else {
2555 					struct rte_mempool *mp =
2556 						mbuf_pool_find
2557 							(port->socket_id, 0);
2558 					if (mp == NULL) {
2559 						fprintf(stderr,
2560 							"Failed to setup RX queue: No mempool allocation on the socket %d\n",
2561 							port->socket_id);
2562 						return -1;
2563 					}
2564 					diag = rx_queue_setup(pi, qi,
2565 					     port->nb_rx_desc[qi],
2566 					     port->socket_id,
2567 					     &(port->rx_conf[qi]),
2568 					     mp);
2569 				}
2570 				if (diag == 0)
2571 					continue;
2572 
2573 				/* Fail to setup rx queue, return */
2574 				if (rte_atomic16_cmpset(&(port->port_status),
2575 							RTE_PORT_HANDLING,
2576 							RTE_PORT_STOPPED) == 0)
2577 					fprintf(stderr,
2578 						"Port %d can not be set back to stopped\n",
2579 						pi);
2580 				fprintf(stderr,
2581 					"Fail to configure port %d rx queues\n",
2582 					pi);
2583 				/* try to reconfigure queues next time */
2584 				port->need_reconfig_queues = 1;
2585 				return -1;
2586 			}
2587 			/* setup hairpin queues */
2588 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2589 				return -1;
2590 		}
2591 		configure_rxtx_dump_callbacks(verbose_level);
2592 		if (clear_ptypes) {
2593 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2594 					NULL, 0);
2595 			if (diag < 0)
2596 				fprintf(stderr,
2597 					"Port %d: Failed to disable Ptype parsing\n",
2598 					pi);
2599 		}
2600 
2601 		p_pi = pi;
2602 		cnt_pi++;
2603 
2604 		/* start port */
2605 		diag = rte_eth_dev_start(pi);
2606 		if (diag < 0) {
2607 			fprintf(stderr, "Fail to start port %d: %s\n",
2608 				pi, rte_strerror(-diag));
2609 
2610 			/* Fail to setup rx queue, return */
2611 			if (rte_atomic16_cmpset(&(port->port_status),
2612 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2613 				fprintf(stderr,
2614 					"Port %d can not be set back to stopped\n",
2615 					pi);
2616 			continue;
2617 		}
2618 
2619 		if (rte_atomic16_cmpset(&(port->port_status),
2620 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2621 			fprintf(stderr, "Port %d can not be set into started\n",
2622 				pi);
2623 
2624 		if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0)
2625 			printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2626 				port->eth_addr.addr_bytes[0],
2627 				port->eth_addr.addr_bytes[1],
2628 				port->eth_addr.addr_bytes[2],
2629 				port->eth_addr.addr_bytes[3],
2630 				port->eth_addr.addr_bytes[4],
2631 				port->eth_addr.addr_bytes[5]);
2632 
2633 		/* at least one port started, need checking link status */
2634 		need_check_link_status = 1;
2635 
2636 		pl[cfg_pi++] = pi;
2637 	}
2638 
2639 	if (need_check_link_status == 1 && !no_link_check)
2640 		check_all_ports_link_status(RTE_PORT_ALL);
2641 	else if (need_check_link_status == 0)
2642 		fprintf(stderr, "Please stop the ports first\n");
2643 
2644 	if (hairpin_mode & 0xf) {
2645 		uint16_t i;
2646 		int j;
2647 
2648 		/* bind all started hairpin ports */
2649 		for (i = 0; i < cfg_pi; i++) {
2650 			pi = pl[i];
2651 			/* bind current Tx to all peer Rx */
2652 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2653 							RTE_MAX_ETHPORTS, 1);
2654 			if (peer_pi < 0)
2655 				return peer_pi;
2656 			for (j = 0; j < peer_pi; j++) {
2657 				if (!port_is_started(peer_pl[j]))
2658 					continue;
2659 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2660 				if (diag < 0) {
2661 					fprintf(stderr,
2662 						"Error during binding hairpin Tx port %u to %u: %s\n",
2663 						pi, peer_pl[j],
2664 						rte_strerror(-diag));
2665 					return -1;
2666 				}
2667 			}
2668 			/* bind all peer Tx to current Rx */
2669 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2670 							RTE_MAX_ETHPORTS, 0);
2671 			if (peer_pi < 0)
2672 				return peer_pi;
2673 			for (j = 0; j < peer_pi; j++) {
2674 				if (!port_is_started(peer_pl[j]))
2675 					continue;
2676 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2677 				if (diag < 0) {
2678 					fprintf(stderr,
2679 						"Error during binding hairpin Tx port %u to %u: %s\n",
2680 						peer_pl[j], pi,
2681 						rte_strerror(-diag));
2682 					return -1;
2683 				}
2684 			}
2685 		}
2686 	}
2687 
2688 	printf("Done\n");
2689 	return 0;
2690 }
2691 
2692 void
2693 stop_port(portid_t pid)
2694 {
2695 	portid_t pi;
2696 	struct rte_port *port;
2697 	int need_check_link_status = 0;
2698 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2699 	int peer_pi;
2700 
2701 	if (port_id_is_invalid(pid, ENABLED_WARN))
2702 		return;
2703 
2704 	printf("Stopping ports...\n");
2705 
2706 	RTE_ETH_FOREACH_DEV(pi) {
2707 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2708 			continue;
2709 
2710 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2711 			fprintf(stderr,
2712 				"Please remove port %d from forwarding configuration.\n",
2713 				pi);
2714 			continue;
2715 		}
2716 
2717 		if (port_is_bonding_slave(pi)) {
2718 			fprintf(stderr,
2719 				"Please remove port %d from bonded device.\n",
2720 				pi);
2721 			continue;
2722 		}
2723 
2724 		port = &ports[pi];
2725 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2726 						RTE_PORT_HANDLING) == 0)
2727 			continue;
2728 
2729 		if (hairpin_mode & 0xf) {
2730 			int j;
2731 
2732 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
2733 			/* unbind all peer Tx from current Rx */
2734 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2735 							RTE_MAX_ETHPORTS, 0);
2736 			if (peer_pi < 0)
2737 				continue;
2738 			for (j = 0; j < peer_pi; j++) {
2739 				if (!port_is_started(peer_pl[j]))
2740 					continue;
2741 				rte_eth_hairpin_unbind(peer_pl[j], pi);
2742 			}
2743 		}
2744 
2745 		if (port->flow_list)
2746 			port_flow_flush(pi);
2747 
2748 		if (rte_eth_dev_stop(pi) != 0)
2749 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
2750 				pi);
2751 
2752 		if (rte_atomic16_cmpset(&(port->port_status),
2753 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2754 			fprintf(stderr, "Port %d can not be set into stopped\n",
2755 				pi);
2756 		need_check_link_status = 1;
2757 	}
2758 	if (need_check_link_status && !no_link_check)
2759 		check_all_ports_link_status(RTE_PORT_ALL);
2760 
2761 	printf("Done\n");
2762 }
2763 
2764 static void
2765 remove_invalid_ports_in(portid_t *array, portid_t *total)
2766 {
2767 	portid_t i;
2768 	portid_t new_total = 0;
2769 
2770 	for (i = 0; i < *total; i++)
2771 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2772 			array[new_total] = array[i];
2773 			new_total++;
2774 		}
2775 	*total = new_total;
2776 }
2777 
2778 static void
2779 remove_invalid_ports(void)
2780 {
2781 	remove_invalid_ports_in(ports_ids, &nb_ports);
2782 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2783 	nb_cfg_ports = nb_fwd_ports;
2784 }
2785 
2786 void
2787 close_port(portid_t pid)
2788 {
2789 	portid_t pi;
2790 	struct rte_port *port;
2791 
2792 	if (port_id_is_invalid(pid, ENABLED_WARN))
2793 		return;
2794 
2795 	printf("Closing ports...\n");
2796 
2797 	RTE_ETH_FOREACH_DEV(pi) {
2798 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2799 			continue;
2800 
2801 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2802 			fprintf(stderr,
2803 				"Please remove port %d from forwarding configuration.\n",
2804 				pi);
2805 			continue;
2806 		}
2807 
2808 		if (port_is_bonding_slave(pi)) {
2809 			fprintf(stderr,
2810 				"Please remove port %d from bonded device.\n",
2811 				pi);
2812 			continue;
2813 		}
2814 
2815 		port = &ports[pi];
2816 		if (rte_atomic16_cmpset(&(port->port_status),
2817 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2818 			fprintf(stderr, "Port %d is already closed\n", pi);
2819 			continue;
2820 		}
2821 
2822 		port_flow_flush(pi);
2823 		rte_eth_dev_close(pi);
2824 	}
2825 
2826 	remove_invalid_ports();
2827 	printf("Done\n");
2828 }
2829 
2830 void
2831 reset_port(portid_t pid)
2832 {
2833 	int diag;
2834 	portid_t pi;
2835 	struct rte_port *port;
2836 
2837 	if (port_id_is_invalid(pid, ENABLED_WARN))
2838 		return;
2839 
2840 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2841 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2842 		fprintf(stderr,
2843 			"Can not reset port(s), please stop port(s) first.\n");
2844 		return;
2845 	}
2846 
2847 	printf("Resetting ports...\n");
2848 
2849 	RTE_ETH_FOREACH_DEV(pi) {
2850 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2851 			continue;
2852 
2853 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2854 			fprintf(stderr,
2855 				"Please remove port %d from forwarding configuration.\n",
2856 				pi);
2857 			continue;
2858 		}
2859 
2860 		if (port_is_bonding_slave(pi)) {
2861 			fprintf(stderr,
2862 				"Please remove port %d from bonded device.\n",
2863 				pi);
2864 			continue;
2865 		}
2866 
2867 		diag = rte_eth_dev_reset(pi);
2868 		if (diag == 0) {
2869 			port = &ports[pi];
2870 			port->need_reconfig = 1;
2871 			port->need_reconfig_queues = 1;
2872 		} else {
2873 			fprintf(stderr, "Failed to reset port %d. diag=%d\n",
2874 				pi, diag);
2875 		}
2876 	}
2877 
2878 	printf("Done\n");
2879 }
2880 
2881 void
2882 attach_port(char *identifier)
2883 {
2884 	portid_t pi;
2885 	struct rte_dev_iterator iterator;
2886 
2887 	printf("Attaching a new port...\n");
2888 
2889 	if (identifier == NULL) {
2890 		fprintf(stderr, "Invalid parameters are specified\n");
2891 		return;
2892 	}
2893 
2894 	if (rte_dev_probe(identifier) < 0) {
2895 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2896 		return;
2897 	}
2898 
2899 	/* first attach mode: event */
2900 	if (setup_on_probe_event) {
2901 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
2902 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2903 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
2904 					ports[pi].need_setup != 0)
2905 				setup_attached_port(pi);
2906 		return;
2907 	}
2908 
2909 	/* second attach mode: iterator */
2910 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2911 		/* setup ports matching the devargs used for probing */
2912 		if (port_is_forwarding(pi))
2913 			continue; /* port was already attached before */
2914 		setup_attached_port(pi);
2915 	}
2916 }
2917 
2918 static void
2919 setup_attached_port(portid_t pi)
2920 {
2921 	unsigned int socket_id;
2922 	int ret;
2923 
2924 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2925 	/* if socket_id is invalid, set to the first available socket. */
2926 	if (check_socket_id(socket_id) < 0)
2927 		socket_id = socket_ids[0];
2928 	reconfig(pi, socket_id);
2929 	ret = rte_eth_promiscuous_enable(pi);
2930 	if (ret != 0)
2931 		fprintf(stderr,
2932 			"Error during enabling promiscuous mode for port %u: %s - ignore\n",
2933 			pi, rte_strerror(-ret));
2934 
2935 	ports_ids[nb_ports++] = pi;
2936 	fwd_ports_ids[nb_fwd_ports++] = pi;
2937 	nb_cfg_ports = nb_fwd_ports;
2938 	ports[pi].need_setup = 0;
2939 	ports[pi].port_status = RTE_PORT_STOPPED;
2940 
2941 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2942 	printf("Done\n");
2943 }
2944 
2945 static void
2946 detach_device(struct rte_device *dev)
2947 {
2948 	portid_t sibling;
2949 
2950 	if (dev == NULL) {
2951 		fprintf(stderr, "Device already removed\n");
2952 		return;
2953 	}
2954 
2955 	printf("Removing a device...\n");
2956 
2957 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2958 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2959 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
2960 				fprintf(stderr, "Port %u not stopped\n",
2961 					sibling);
2962 				return;
2963 			}
2964 			port_flow_flush(sibling);
2965 		}
2966 	}
2967 
2968 	if (rte_dev_remove(dev) < 0) {
2969 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2970 		return;
2971 	}
2972 	remove_invalid_ports();
2973 
2974 	printf("Device is detached\n");
2975 	printf("Now total ports is %d\n", nb_ports);
2976 	printf("Done\n");
2977 	return;
2978 }
2979 
2980 void
2981 detach_port_device(portid_t port_id)
2982 {
2983 	int ret;
2984 	struct rte_eth_dev_info dev_info;
2985 
2986 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2987 		return;
2988 
2989 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2990 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2991 			fprintf(stderr, "Port not stopped\n");
2992 			return;
2993 		}
2994 		fprintf(stderr, "Port was not closed\n");
2995 	}
2996 
2997 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
2998 	if (ret != 0) {
2999 		TESTPMD_LOG(ERR,
3000 			"Failed to get device info for port %d, not detaching\n",
3001 			port_id);
3002 		return;
3003 	}
3004 	detach_device(dev_info.device);
3005 }
3006 
3007 void
3008 detach_devargs(char *identifier)
3009 {
3010 	struct rte_dev_iterator iterator;
3011 	struct rte_devargs da;
3012 	portid_t port_id;
3013 
3014 	printf("Removing a device...\n");
3015 
3016 	memset(&da, 0, sizeof(da));
3017 	if (rte_devargs_parsef(&da, "%s", identifier)) {
3018 		fprintf(stderr, "cannot parse identifier\n");
3019 		return;
3020 	}
3021 
3022 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
3023 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
3024 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
3025 				fprintf(stderr, "Port %u not stopped\n",
3026 					port_id);
3027 				rte_eth_iterator_cleanup(&iterator);
3028 				rte_devargs_reset(&da);
3029 				return;
3030 			}
3031 			port_flow_flush(port_id);
3032 		}
3033 	}
3034 
3035 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3036 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3037 			    da.name, da.bus->name);
3038 		rte_devargs_reset(&da);
3039 		return;
3040 	}
3041 
3042 	remove_invalid_ports();
3043 
3044 	printf("Device %s is detached\n", identifier);
3045 	printf("Now total ports is %d\n", nb_ports);
3046 	printf("Done\n");
3047 	rte_devargs_reset(&da);
3048 }
3049 
3050 void
3051 pmd_test_exit(void)
3052 {
3053 	portid_t pt_id;
3054 	unsigned int i;
3055 	int ret;
3056 
3057 	if (test_done == 0)
3058 		stop_packet_forwarding();
3059 
3060 #ifndef RTE_EXEC_ENV_WINDOWS
3061 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3062 		if (mempools[i]) {
3063 			if (mp_alloc_type == MP_ALLOC_ANON)
3064 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3065 						     NULL);
3066 		}
3067 	}
3068 #endif
3069 	if (ports != NULL) {
3070 		no_link_check = 1;
3071 		RTE_ETH_FOREACH_DEV(pt_id) {
3072 			printf("\nStopping port %d...\n", pt_id);
3073 			fflush(stdout);
3074 			stop_port(pt_id);
3075 		}
3076 		RTE_ETH_FOREACH_DEV(pt_id) {
3077 			printf("\nShutting down port %d...\n", pt_id);
3078 			fflush(stdout);
3079 			close_port(pt_id);
3080 		}
3081 	}
3082 
3083 	if (hot_plug) {
3084 		ret = rte_dev_event_monitor_stop();
3085 		if (ret) {
3086 			RTE_LOG(ERR, EAL,
3087 				"fail to stop device event monitor.");
3088 			return;
3089 		}
3090 
3091 		ret = rte_dev_event_callback_unregister(NULL,
3092 			dev_event_callback, NULL);
3093 		if (ret < 0) {
3094 			RTE_LOG(ERR, EAL,
3095 				"fail to unregister device event callback.\n");
3096 			return;
3097 		}
3098 
3099 		ret = rte_dev_hotplug_handle_disable();
3100 		if (ret) {
3101 			RTE_LOG(ERR, EAL,
3102 				"fail to disable hotplug handling.\n");
3103 			return;
3104 		}
3105 	}
3106 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3107 		if (mempools[i])
3108 			rte_mempool_free(mempools[i]);
3109 	}
3110 
3111 	printf("\nBye...\n");
3112 }
3113 
3114 typedef void (*cmd_func_t)(void);
3115 struct pmd_test_command {
3116 	const char *cmd_name;
3117 	cmd_func_t cmd_func;
3118 };
3119 
3120 /* Check the link status of all ports in up to 9s, and print them finally */
3121 static void
3122 check_all_ports_link_status(uint32_t port_mask)
3123 {
3124 #define CHECK_INTERVAL 100 /* 100ms */
3125 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3126 	portid_t portid;
3127 	uint8_t count, all_ports_up, print_flag = 0;
3128 	struct rte_eth_link link;
3129 	int ret;
3130 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3131 
3132 	printf("Checking link statuses...\n");
3133 	fflush(stdout);
3134 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3135 		all_ports_up = 1;
3136 		RTE_ETH_FOREACH_DEV(portid) {
3137 			if ((port_mask & (1 << portid)) == 0)
3138 				continue;
3139 			memset(&link, 0, sizeof(link));
3140 			ret = rte_eth_link_get_nowait(portid, &link);
3141 			if (ret < 0) {
3142 				all_ports_up = 0;
3143 				if (print_flag == 1)
3144 					fprintf(stderr,
3145 						"Port %u link get failed: %s\n",
3146 						portid, rte_strerror(-ret));
3147 				continue;
3148 			}
3149 			/* print link status if flag set */
3150 			if (print_flag == 1) {
3151 				rte_eth_link_to_str(link_status,
3152 					sizeof(link_status), &link);
3153 				printf("Port %d %s\n", portid, link_status);
3154 				continue;
3155 			}
3156 			/* clear all_ports_up flag if any link down */
3157 			if (link.link_status == ETH_LINK_DOWN) {
3158 				all_ports_up = 0;
3159 				break;
3160 			}
3161 		}
3162 		/* after finally printing all link status, get out */
3163 		if (print_flag == 1)
3164 			break;
3165 
3166 		if (all_ports_up == 0) {
3167 			fflush(stdout);
3168 			rte_delay_ms(CHECK_INTERVAL);
3169 		}
3170 
3171 		/* set the print_flag if all ports up or timeout */
3172 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3173 			print_flag = 1;
3174 		}
3175 
3176 		if (lsc_interrupt)
3177 			break;
3178 	}
3179 }
3180 
3181 static void
3182 rmv_port_callback(void *arg)
3183 {
3184 	int need_to_start = 0;
3185 	int org_no_link_check = no_link_check;
3186 	portid_t port_id = (intptr_t)arg;
3187 	struct rte_eth_dev_info dev_info;
3188 	int ret;
3189 
3190 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3191 
3192 	if (!test_done && port_is_forwarding(port_id)) {
3193 		need_to_start = 1;
3194 		stop_packet_forwarding();
3195 	}
3196 	no_link_check = 1;
3197 	stop_port(port_id);
3198 	no_link_check = org_no_link_check;
3199 
3200 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
3201 	if (ret != 0)
3202 		TESTPMD_LOG(ERR,
3203 			"Failed to get device info for port %d, not detaching\n",
3204 			port_id);
3205 	else {
3206 		struct rte_device *device = dev_info.device;
3207 		close_port(port_id);
3208 		detach_device(device); /* might be already removed or have more ports */
3209 	}
3210 	if (need_to_start)
3211 		start_packet_forwarding(0);
3212 }
3213 
3214 /* This function is used by the interrupt thread */
3215 static int
3216 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3217 		  void *ret_param)
3218 {
3219 	RTE_SET_USED(param);
3220 	RTE_SET_USED(ret_param);
3221 
3222 	if (type >= RTE_ETH_EVENT_MAX) {
3223 		fprintf(stderr,
3224 			"\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3225 			port_id, __func__, type);
3226 		fflush(stderr);
3227 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3228 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3229 			eth_event_desc[type]);
3230 		fflush(stdout);
3231 	}
3232 
3233 	switch (type) {
3234 	case RTE_ETH_EVENT_NEW:
3235 		ports[port_id].need_setup = 1;
3236 		ports[port_id].port_status = RTE_PORT_HANDLING;
3237 		break;
3238 	case RTE_ETH_EVENT_INTR_RMV:
3239 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3240 			break;
3241 		if (rte_eal_alarm_set(100000,
3242 				rmv_port_callback, (void *)(intptr_t)port_id))
3243 			fprintf(stderr,
3244 				"Could not set up deferred device removal\n");
3245 		break;
3246 	case RTE_ETH_EVENT_DESTROY:
3247 		ports[port_id].port_status = RTE_PORT_CLOSED;
3248 		printf("Port %u is closed\n", port_id);
3249 		break;
3250 	default:
3251 		break;
3252 	}
3253 	return 0;
3254 }
3255 
3256 static int
3257 register_eth_event_callback(void)
3258 {
3259 	int ret;
3260 	enum rte_eth_event_type event;
3261 
3262 	for (event = RTE_ETH_EVENT_UNKNOWN;
3263 			event < RTE_ETH_EVENT_MAX; event++) {
3264 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3265 				event,
3266 				eth_event_callback,
3267 				NULL);
3268 		if (ret != 0) {
3269 			TESTPMD_LOG(ERR, "Failed to register callback for "
3270 					"%s event\n", eth_event_desc[event]);
3271 			return -1;
3272 		}
3273 	}
3274 
3275 	return 0;
3276 }
3277 
3278 /* This function is used by the interrupt thread */
3279 static void
3280 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3281 			     __rte_unused void *arg)
3282 {
3283 	uint16_t port_id;
3284 	int ret;
3285 
3286 	if (type >= RTE_DEV_EVENT_MAX) {
3287 		fprintf(stderr, "%s called upon invalid event %d\n",
3288 			__func__, type);
3289 		fflush(stderr);
3290 	}
3291 
3292 	switch (type) {
3293 	case RTE_DEV_EVENT_REMOVE:
3294 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3295 			device_name);
3296 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3297 		if (ret) {
3298 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3299 				device_name);
3300 			return;
3301 		}
3302 		/*
3303 		 * Because the user's callback is invoked in eal interrupt
3304 		 * callback, the interrupt callback need to be finished before
3305 		 * it can be unregistered when detaching device. So finish
3306 		 * callback soon and use a deferred removal to detach device
3307 		 * is need. It is a workaround, once the device detaching be
3308 		 * moved into the eal in the future, the deferred removal could
3309 		 * be deleted.
3310 		 */
3311 		if (rte_eal_alarm_set(100000,
3312 				rmv_port_callback, (void *)(intptr_t)port_id))
3313 			RTE_LOG(ERR, EAL,
3314 				"Could not set up deferred device removal\n");
3315 		break;
3316 	case RTE_DEV_EVENT_ADD:
3317 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3318 			device_name);
3319 		/* TODO: After finish kernel driver binding,
3320 		 * begin to attach port.
3321 		 */
3322 		break;
3323 	default:
3324 		break;
3325 	}
3326 }
3327 
3328 static void
3329 rxtx_port_config(struct rte_port *port)
3330 {
3331 	uint16_t qid;
3332 	uint64_t offloads;
3333 
3334 	for (qid = 0; qid < nb_rxq; qid++) {
3335 		offloads = port->rx_conf[qid].offloads;
3336 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3337 		if (offloads != 0)
3338 			port->rx_conf[qid].offloads = offloads;
3339 
3340 		/* Check if any Rx parameters have been passed */
3341 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3342 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3343 
3344 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3345 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3346 
3347 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3348 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3349 
3350 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3351 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3352 
3353 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3354 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3355 
3356 		port->nb_rx_desc[qid] = nb_rxd;
3357 	}
3358 
3359 	for (qid = 0; qid < nb_txq; qid++) {
3360 		offloads = port->tx_conf[qid].offloads;
3361 		port->tx_conf[qid] = port->dev_info.default_txconf;
3362 		if (offloads != 0)
3363 			port->tx_conf[qid].offloads = offloads;
3364 
3365 		/* Check if any Tx parameters have been passed */
3366 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3367 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3368 
3369 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3370 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3371 
3372 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3373 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3374 
3375 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3376 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3377 
3378 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3379 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3380 
3381 		port->nb_tx_desc[qid] = nb_txd;
3382 	}
3383 }
3384 
3385 /*
3386  * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload,
3387  * MTU is also aligned if JUMBO_FRAME offload is not set.
3388  *
3389  * port->dev_info should be set before calling this function.
3390  *
3391  * return 0 on success, negative on error
3392  */
3393 int
3394 update_jumbo_frame_offload(portid_t portid)
3395 {
3396 	struct rte_port *port = &ports[portid];
3397 	uint32_t eth_overhead;
3398 	uint64_t rx_offloads;
3399 	int ret;
3400 	bool on;
3401 
3402 	/* Update the max_rx_pkt_len to have MTU as RTE_ETHER_MTU */
3403 	if (port->dev_info.max_mtu != UINT16_MAX &&
3404 	    port->dev_info.max_rx_pktlen > port->dev_info.max_mtu)
3405 		eth_overhead = port->dev_info.max_rx_pktlen -
3406 				port->dev_info.max_mtu;
3407 	else
3408 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3409 
3410 	rx_offloads = port->dev_conf.rxmode.offloads;
3411 
3412 	/* Default config value is 0 to use PMD specific overhead */
3413 	if (port->dev_conf.rxmode.max_rx_pkt_len == 0)
3414 		port->dev_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MTU + eth_overhead;
3415 
3416 	if (port->dev_conf.rxmode.max_rx_pkt_len <= RTE_ETHER_MTU + eth_overhead) {
3417 		rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3418 		on = false;
3419 	} else {
3420 		if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3421 			fprintf(stderr,
3422 				"Frame size (%u) is not supported by port %u\n",
3423 				port->dev_conf.rxmode.max_rx_pkt_len,
3424 				portid);
3425 			return -1;
3426 		}
3427 		rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3428 		on = true;
3429 	}
3430 
3431 	if (rx_offloads != port->dev_conf.rxmode.offloads) {
3432 		uint16_t qid;
3433 
3434 		port->dev_conf.rxmode.offloads = rx_offloads;
3435 
3436 		/* Apply JUMBO_FRAME offload configuration to Rx queue(s) */
3437 		for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) {
3438 			if (on)
3439 				port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3440 			else
3441 				port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3442 		}
3443 	}
3444 
3445 	/* If JUMBO_FRAME is set MTU conversion done by ethdev layer,
3446 	 * if unset do it here
3447 	 */
3448 	if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3449 		ret = rte_eth_dev_set_mtu(portid,
3450 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead);
3451 		if (ret)
3452 			fprintf(stderr,
3453 				"Failed to set MTU to %u for port %u\n",
3454 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead,
3455 				portid);
3456 	}
3457 
3458 	return 0;
3459 }
3460 
3461 void
3462 init_port_config(void)
3463 {
3464 	portid_t pid;
3465 	struct rte_port *port;
3466 	int ret;
3467 
3468 	RTE_ETH_FOREACH_DEV(pid) {
3469 		port = &ports[pid];
3470 		port->dev_conf.fdir_conf = fdir_conf;
3471 
3472 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3473 		if (ret != 0)
3474 			return;
3475 
3476 		if (nb_rxq > 1) {
3477 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3478 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3479 				rss_hf & port->dev_info.flow_type_rss_offloads;
3480 		} else {
3481 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3482 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3483 		}
3484 
3485 		if (port->dcb_flag == 0) {
3486 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3487 				port->dev_conf.rxmode.mq_mode =
3488 					(enum rte_eth_rx_mq_mode)
3489 						(rx_mq_mode & ETH_MQ_RX_RSS);
3490 			else
3491 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3492 		}
3493 
3494 		rxtx_port_config(port);
3495 
3496 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3497 		if (ret != 0)
3498 			return;
3499 
3500 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3501 		rte_pmd_ixgbe_bypass_init(pid);
3502 #endif
3503 
3504 		if (lsc_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_LSC))
3505 			port->dev_conf.intr_conf.lsc = 1;
3506 		if (rmv_interrupt && (*port->dev_info.dev_flags & RTE_ETH_DEV_INTR_RMV))
3507 			port->dev_conf.intr_conf.rmv = 1;
3508 	}
3509 }
3510 
3511 void set_port_slave_flag(portid_t slave_pid)
3512 {
3513 	struct rte_port *port;
3514 
3515 	port = &ports[slave_pid];
3516 	port->slave_flag = 1;
3517 }
3518 
3519 void clear_port_slave_flag(portid_t slave_pid)
3520 {
3521 	struct rte_port *port;
3522 
3523 	port = &ports[slave_pid];
3524 	port->slave_flag = 0;
3525 }
3526 
3527 uint8_t port_is_bonding_slave(portid_t slave_pid)
3528 {
3529 	struct rte_port *port;
3530 	struct rte_eth_dev_info dev_info;
3531 	int ret;
3532 
3533 	port = &ports[slave_pid];
3534 	ret = eth_dev_info_get_print_err(slave_pid, &dev_info);
3535 	if (ret != 0) {
3536 		TESTPMD_LOG(ERR,
3537 			"Failed to get device info for port id %d,"
3538 			"cannot determine if the port is a bonded slave",
3539 			slave_pid);
3540 		return 0;
3541 	}
3542 	if ((*dev_info.dev_flags & RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3543 		return 1;
3544 	return 0;
3545 }
3546 
3547 const uint16_t vlan_tags[] = {
3548 		0,  1,  2,  3,  4,  5,  6,  7,
3549 		8,  9, 10, 11,  12, 13, 14, 15,
3550 		16, 17, 18, 19, 20, 21, 22, 23,
3551 		24, 25, 26, 27, 28, 29, 30, 31
3552 };
3553 
3554 static  int
3555 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3556 		 enum dcb_mode_enable dcb_mode,
3557 		 enum rte_eth_nb_tcs num_tcs,
3558 		 uint8_t pfc_en)
3559 {
3560 	uint8_t i;
3561 	int32_t rc;
3562 	struct rte_eth_rss_conf rss_conf;
3563 
3564 	/*
3565 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3566 	 * given above, and the number of traffic classes available for use.
3567 	 */
3568 	if (dcb_mode == DCB_VT_ENABLED) {
3569 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3570 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3571 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3572 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3573 
3574 		/* VMDQ+DCB RX and TX configurations */
3575 		vmdq_rx_conf->enable_default_pool = 0;
3576 		vmdq_rx_conf->default_pool = 0;
3577 		vmdq_rx_conf->nb_queue_pools =
3578 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3579 		vmdq_tx_conf->nb_queue_pools =
3580 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3581 
3582 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3583 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3584 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3585 			vmdq_rx_conf->pool_map[i].pools =
3586 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3587 		}
3588 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3589 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3590 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3591 		}
3592 
3593 		/* set DCB mode of RX and TX of multiple queues */
3594 		eth_conf->rxmode.mq_mode =
3595 				(enum rte_eth_rx_mq_mode)
3596 					(rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3597 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3598 	} else {
3599 		struct rte_eth_dcb_rx_conf *rx_conf =
3600 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3601 		struct rte_eth_dcb_tx_conf *tx_conf =
3602 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3603 
3604 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3605 
3606 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3607 		if (rc != 0)
3608 			return rc;
3609 
3610 		rx_conf->nb_tcs = num_tcs;
3611 		tx_conf->nb_tcs = num_tcs;
3612 
3613 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3614 			rx_conf->dcb_tc[i] = i % num_tcs;
3615 			tx_conf->dcb_tc[i] = i % num_tcs;
3616 		}
3617 
3618 		eth_conf->rxmode.mq_mode =
3619 				(enum rte_eth_rx_mq_mode)
3620 					(rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3621 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3622 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3623 	}
3624 
3625 	if (pfc_en)
3626 		eth_conf->dcb_capability_en =
3627 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3628 	else
3629 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3630 
3631 	return 0;
3632 }
3633 
3634 int
3635 init_port_dcb_config(portid_t pid,
3636 		     enum dcb_mode_enable dcb_mode,
3637 		     enum rte_eth_nb_tcs num_tcs,
3638 		     uint8_t pfc_en)
3639 {
3640 	struct rte_eth_conf port_conf;
3641 	struct rte_port *rte_port;
3642 	int retval;
3643 	uint16_t i;
3644 
3645 	rte_port = &ports[pid];
3646 
3647 	memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3648 
3649 	port_conf.rxmode = rte_port->dev_conf.rxmode;
3650 	port_conf.txmode = rte_port->dev_conf.txmode;
3651 
3652 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3653 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3654 	if (retval < 0)
3655 		return retval;
3656 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3657 
3658 	/* re-configure the device . */
3659 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3660 	if (retval < 0)
3661 		return retval;
3662 
3663 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3664 	if (retval != 0)
3665 		return retval;
3666 
3667 	/* If dev_info.vmdq_pool_base is greater than 0,
3668 	 * the queue id of vmdq pools is started after pf queues.
3669 	 */
3670 	if (dcb_mode == DCB_VT_ENABLED &&
3671 	    rte_port->dev_info.vmdq_pool_base > 0) {
3672 		fprintf(stderr,
3673 			"VMDQ_DCB multi-queue mode is nonsensical for port %d.\n",
3674 			pid);
3675 		return -1;
3676 	}
3677 
3678 	/* Assume the ports in testpmd have the same dcb capability
3679 	 * and has the same number of rxq and txq in dcb mode
3680 	 */
3681 	if (dcb_mode == DCB_VT_ENABLED) {
3682 		if (rte_port->dev_info.max_vfs > 0) {
3683 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3684 			nb_txq = rte_port->dev_info.nb_tx_queues;
3685 		} else {
3686 			nb_rxq = rte_port->dev_info.max_rx_queues;
3687 			nb_txq = rte_port->dev_info.max_tx_queues;
3688 		}
3689 	} else {
3690 		/*if vt is disabled, use all pf queues */
3691 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3692 			nb_rxq = rte_port->dev_info.max_rx_queues;
3693 			nb_txq = rte_port->dev_info.max_tx_queues;
3694 		} else {
3695 			nb_rxq = (queueid_t)num_tcs;
3696 			nb_txq = (queueid_t)num_tcs;
3697 
3698 		}
3699 	}
3700 	rx_free_thresh = 64;
3701 
3702 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3703 
3704 	rxtx_port_config(rte_port);
3705 	/* VLAN filter */
3706 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3707 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
3708 		rx_vft_set(pid, vlan_tags[i], 1);
3709 
3710 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3711 	if (retval != 0)
3712 		return retval;
3713 
3714 	rte_port->dcb_flag = 1;
3715 
3716 	/* Enter DCB configuration status */
3717 	dcb_config = 1;
3718 
3719 	return 0;
3720 }
3721 
3722 static void
3723 init_port(void)
3724 {
3725 	int i;
3726 
3727 	/* Configuration of Ethernet ports. */
3728 	ports = rte_zmalloc("testpmd: ports",
3729 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3730 			    RTE_CACHE_LINE_SIZE);
3731 	if (ports == NULL) {
3732 		rte_exit(EXIT_FAILURE,
3733 				"rte_zmalloc(%d struct rte_port) failed\n",
3734 				RTE_MAX_ETHPORTS);
3735 	}
3736 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
3737 		LIST_INIT(&ports[i].flow_tunnel_list);
3738 	/* Initialize ports NUMA structures */
3739 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3740 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3741 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3742 }
3743 
3744 static void
3745 force_quit(void)
3746 {
3747 	pmd_test_exit();
3748 	prompt_exit();
3749 }
3750 
3751 static void
3752 print_stats(void)
3753 {
3754 	uint8_t i;
3755 	const char clr[] = { 27, '[', '2', 'J', '\0' };
3756 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3757 
3758 	/* Clear screen and move to top left */
3759 	printf("%s%s", clr, top_left);
3760 
3761 	printf("\nPort statistics ====================================");
3762 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3763 		nic_stats_display(fwd_ports_ids[i]);
3764 
3765 	fflush(stdout);
3766 }
3767 
3768 static void
3769 signal_handler(int signum)
3770 {
3771 	if (signum == SIGINT || signum == SIGTERM) {
3772 		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
3773 			signum);
3774 #ifdef RTE_LIB_PDUMP
3775 		/* uninitialize packet capture framework */
3776 		rte_pdump_uninit();
3777 #endif
3778 #ifdef RTE_LIB_LATENCYSTATS
3779 		if (latencystats_enabled != 0)
3780 			rte_latencystats_uninit();
3781 #endif
3782 		force_quit();
3783 		/* Set flag to indicate the force termination. */
3784 		f_quit = 1;
3785 		/* exit with the expected status */
3786 #ifndef RTE_EXEC_ENV_WINDOWS
3787 		signal(signum, SIG_DFL);
3788 		kill(getpid(), signum);
3789 #endif
3790 	}
3791 }
3792 
3793 int
3794 main(int argc, char** argv)
3795 {
3796 	int diag;
3797 	portid_t port_id;
3798 	uint16_t count;
3799 	int ret;
3800 
3801 	signal(SIGINT, signal_handler);
3802 	signal(SIGTERM, signal_handler);
3803 
3804 	testpmd_logtype = rte_log_register("testpmd");
3805 	if (testpmd_logtype < 0)
3806 		rte_exit(EXIT_FAILURE, "Cannot register log type");
3807 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3808 
3809 	diag = rte_eal_init(argc, argv);
3810 	if (diag < 0)
3811 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3812 			 rte_strerror(rte_errno));
3813 
3814 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3815 		rte_exit(EXIT_FAILURE,
3816 			 "Secondary process type not supported.\n");
3817 
3818 	ret = register_eth_event_callback();
3819 	if (ret != 0)
3820 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3821 
3822 #ifdef RTE_LIB_PDUMP
3823 	/* initialize packet capture framework */
3824 	rte_pdump_init();
3825 #endif
3826 
3827 	count = 0;
3828 	RTE_ETH_FOREACH_DEV(port_id) {
3829 		ports_ids[count] = port_id;
3830 		count++;
3831 	}
3832 	nb_ports = (portid_t) count;
3833 	if (nb_ports == 0)
3834 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3835 
3836 	/* allocate port structures, and init them */
3837 	init_port();
3838 
3839 	set_def_fwd_config();
3840 	if (nb_lcores == 0)
3841 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3842 			 "Check the core mask argument\n");
3843 
3844 	/* Bitrate/latency stats disabled by default */
3845 #ifdef RTE_LIB_BITRATESTATS
3846 	bitrate_enabled = 0;
3847 #endif
3848 #ifdef RTE_LIB_LATENCYSTATS
3849 	latencystats_enabled = 0;
3850 #endif
3851 
3852 	/* on FreeBSD, mlockall() is disabled by default */
3853 #ifdef RTE_EXEC_ENV_FREEBSD
3854 	do_mlockall = 0;
3855 #else
3856 	do_mlockall = 1;
3857 #endif
3858 
3859 	argc -= diag;
3860 	argv += diag;
3861 	if (argc > 1)
3862 		launch_args_parse(argc, argv);
3863 
3864 #ifndef RTE_EXEC_ENV_WINDOWS
3865 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3866 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3867 			strerror(errno));
3868 	}
3869 #endif
3870 
3871 	if (tx_first && interactive)
3872 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3873 				"interactive mode.\n");
3874 
3875 	if (tx_first && lsc_interrupt) {
3876 		fprintf(stderr,
3877 			"Warning: lsc_interrupt needs to be off when using tx_first. Disabling.\n");
3878 		lsc_interrupt = 0;
3879 	}
3880 
3881 	if (!nb_rxq && !nb_txq)
3882 		fprintf(stderr,
3883 			"Warning: Either rx or tx queues should be non-zero\n");
3884 
3885 	if (nb_rxq > 1 && nb_rxq > nb_txq)
3886 		fprintf(stderr,
3887 			"Warning: nb_rxq=%d enables RSS configuration, but nb_txq=%d will prevent to fully test it.\n",
3888 			nb_rxq, nb_txq);
3889 
3890 	init_config();
3891 
3892 	if (hot_plug) {
3893 		ret = rte_dev_hotplug_handle_enable();
3894 		if (ret) {
3895 			RTE_LOG(ERR, EAL,
3896 				"fail to enable hotplug handling.");
3897 			return -1;
3898 		}
3899 
3900 		ret = rte_dev_event_monitor_start();
3901 		if (ret) {
3902 			RTE_LOG(ERR, EAL,
3903 				"fail to start device event monitoring.");
3904 			return -1;
3905 		}
3906 
3907 		ret = rte_dev_event_callback_register(NULL,
3908 			dev_event_callback, NULL);
3909 		if (ret) {
3910 			RTE_LOG(ERR, EAL,
3911 				"fail  to register device event callback\n");
3912 			return -1;
3913 		}
3914 	}
3915 
3916 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3917 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
3918 
3919 	/* set all ports to promiscuous mode by default */
3920 	RTE_ETH_FOREACH_DEV(port_id) {
3921 		ret = rte_eth_promiscuous_enable(port_id);
3922 		if (ret != 0)
3923 			fprintf(stderr,
3924 				"Error during enabling promiscuous mode for port %u: %s - ignore\n",
3925 				port_id, rte_strerror(-ret));
3926 	}
3927 
3928 	/* Init metrics library */
3929 	rte_metrics_init(rte_socket_id());
3930 
3931 #ifdef RTE_LIB_LATENCYSTATS
3932 	if (latencystats_enabled != 0) {
3933 		int ret = rte_latencystats_init(1, NULL);
3934 		if (ret)
3935 			fprintf(stderr,
3936 				"Warning: latencystats init() returned error %d\n",
3937 				ret);
3938 		fprintf(stderr, "Latencystats running on lcore %d\n",
3939 			latencystats_lcore_id);
3940 	}
3941 #endif
3942 
3943 	/* Setup bitrate stats */
3944 #ifdef RTE_LIB_BITRATESTATS
3945 	if (bitrate_enabled != 0) {
3946 		bitrate_data = rte_stats_bitrate_create();
3947 		if (bitrate_data == NULL)
3948 			rte_exit(EXIT_FAILURE,
3949 				"Could not allocate bitrate data.\n");
3950 		rte_stats_bitrate_reg(bitrate_data);
3951 	}
3952 #endif
3953 
3954 #ifdef RTE_LIB_CMDLINE
3955 	if (strlen(cmdline_filename) != 0)
3956 		cmdline_read_from_file(cmdline_filename);
3957 
3958 	if (interactive == 1) {
3959 		if (auto_start) {
3960 			printf("Start automatic packet forwarding\n");
3961 			start_packet_forwarding(0);
3962 		}
3963 		prompt();
3964 		pmd_test_exit();
3965 	} else
3966 #endif
3967 	{
3968 		char c;
3969 		int rc;
3970 
3971 		f_quit = 0;
3972 
3973 		printf("No commandline core given, start packet forwarding\n");
3974 		start_packet_forwarding(tx_first);
3975 		if (stats_period != 0) {
3976 			uint64_t prev_time = 0, cur_time, diff_time = 0;
3977 			uint64_t timer_period;
3978 
3979 			/* Convert to number of cycles */
3980 			timer_period = stats_period * rte_get_timer_hz();
3981 
3982 			while (f_quit == 0) {
3983 				cur_time = rte_get_timer_cycles();
3984 				diff_time += cur_time - prev_time;
3985 
3986 				if (diff_time >= timer_period) {
3987 					print_stats();
3988 					/* Reset the timer */
3989 					diff_time = 0;
3990 				}
3991 				/* Sleep to avoid unnecessary checks */
3992 				prev_time = cur_time;
3993 				rte_delay_us_sleep(US_PER_S);
3994 			}
3995 		}
3996 
3997 		printf("Press enter to exit\n");
3998 		rc = read(0, &c, 1);
3999 		pmd_test_exit();
4000 		if (rc < 0)
4001 			return 1;
4002 	}
4003 
4004 	ret = rte_eal_cleanup();
4005 	if (ret != 0)
4006 		rte_exit(EXIT_FAILURE,
4007 			 "EAL cleanup failed: %s\n", strerror(-ret));
4008 
4009 	return EXIT_SUCCESS;
4010 }
4011