xref: /dpdk/app/test-pmd/testpmd.c (revision 50db4db5785c4f7f5428f66c6487fd7f705c82b7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <signal.h>
9 #include <string.h>
10 #include <time.h>
11 #include <fcntl.h>
12 #include <sys/mman.h>
13 #include <sys/types.h>
14 #include <errno.h>
15 #include <stdbool.h>
16 
17 #include <sys/queue.h>
18 #include <sys/stat.h>
19 
20 #include <stdint.h>
21 #include <unistd.h>
22 #include <inttypes.h>
23 
24 #include <rte_common.h>
25 #include <rte_errno.h>
26 #include <rte_byteorder.h>
27 #include <rte_log.h>
28 #include <rte_debug.h>
29 #include <rte_cycles.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_launch.h>
33 #include <rte_eal.h>
34 #include <rte_alarm.h>
35 #include <rte_per_lcore.h>
36 #include <rte_lcore.h>
37 #include <rte_atomic.h>
38 #include <rte_branch_prediction.h>
39 #include <rte_mempool.h>
40 #include <rte_malloc.h>
41 #include <rte_mbuf.h>
42 #include <rte_mbuf_pool_ops.h>
43 #include <rte_interrupts.h>
44 #include <rte_pci.h>
45 #include <rte_ether.h>
46 #include <rte_ethdev.h>
47 #include <rte_dev.h>
48 #include <rte_string_fns.h>
49 #ifdef RTE_NET_IXGBE
50 #include <rte_pmd_ixgbe.h>
51 #endif
52 #ifdef RTE_LIB_PDUMP
53 #include <rte_pdump.h>
54 #endif
55 #include <rte_flow.h>
56 #include <rte_metrics.h>
57 #ifdef RTE_LIB_BITRATESTATS
58 #include <rte_bitrate.h>
59 #endif
60 #ifdef RTE_LIB_LATENCYSTATS
61 #include <rte_latencystats.h>
62 #endif
63 
64 #include "testpmd.h"
65 
66 #ifndef MAP_HUGETLB
67 /* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
68 #define HUGE_FLAG (0x40000)
69 #else
70 #define HUGE_FLAG MAP_HUGETLB
71 #endif
72 
73 #ifndef MAP_HUGE_SHIFT
74 /* older kernels (or FreeBSD) will not have this define */
75 #define HUGE_SHIFT (26)
76 #else
77 #define HUGE_SHIFT MAP_HUGE_SHIFT
78 #endif
79 
80 #define EXTMEM_HEAP_NAME "extmem"
81 #define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M
82 
83 uint16_t verbose_level = 0; /**< Silent by default. */
84 int testpmd_logtype; /**< Log type for testpmd logs */
85 
86 /* use main core for command line ? */
87 uint8_t interactive = 0;
88 uint8_t auto_start = 0;
89 uint8_t tx_first;
90 char cmdline_filename[PATH_MAX] = {0};
91 
92 /*
93  * NUMA support configuration.
94  * When set, the NUMA support attempts to dispatch the allocation of the
95  * RX and TX memory rings, and of the DMA memory buffers (mbufs) for the
96  * probed ports among the CPU sockets 0 and 1.
97  * Otherwise, all memory is allocated from CPU socket 0.
98  */
99 uint8_t numa_support = 1; /**< numa enabled by default */
100 
101 /*
102  * In UMA mode,all memory is allocated from socket 0 if --socket-num is
103  * not configured.
104  */
105 uint8_t socket_num = UMA_NO_CONFIG;
106 
107 /*
108  * Select mempool allocation type:
109  * - native: use regular DPDK memory
110  * - anon: use regular DPDK memory to create mempool, but populate using
111  *         anonymous memory (may not be IOVA-contiguous)
112  * - xmem: use externally allocated hugepage memory
113  */
114 uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
115 
116 /*
117  * Store specified sockets on which memory pool to be used by ports
118  * is allocated.
119  */
120 uint8_t port_numa[RTE_MAX_ETHPORTS];
121 
122 /*
123  * Store specified sockets on which RX ring to be used by ports
124  * is allocated.
125  */
126 uint8_t rxring_numa[RTE_MAX_ETHPORTS];
127 
128 /*
129  * Store specified sockets on which TX ring to be used by ports
130  * is allocated.
131  */
132 uint8_t txring_numa[RTE_MAX_ETHPORTS];
133 
134 /*
135  * Record the Ethernet address of peer target ports to which packets are
136  * forwarded.
137  * Must be instantiated with the ethernet addresses of peer traffic generator
138  * ports.
139  */
140 struct rte_ether_addr peer_eth_addrs[RTE_MAX_ETHPORTS];
141 portid_t nb_peer_eth_addrs = 0;
142 
143 /*
144  * Probed Target Environment.
145  */
146 struct rte_port *ports;	       /**< For all probed ethernet ports. */
147 portid_t nb_ports;             /**< Number of probed ethernet ports. */
148 struct fwd_lcore **fwd_lcores; /**< For all probed logical cores. */
149 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
150 
151 portid_t ports_ids[RTE_MAX_ETHPORTS]; /**< Store all port ids. */
152 
153 /*
154  * Test Forwarding Configuration.
155  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
156  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
157  */
158 lcoreid_t nb_cfg_lcores; /**< Number of configured logical cores. */
159 lcoreid_t nb_fwd_lcores; /**< Number of forwarding logical cores. */
160 portid_t  nb_cfg_ports;  /**< Number of configured ports. */
161 portid_t  nb_fwd_ports;  /**< Number of forwarding ports. */
162 
163 unsigned int fwd_lcores_cpuids[RTE_MAX_LCORE]; /**< CPU ids configuration. */
164 portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
165 
166 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
167 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
168 
169 /*
170  * Forwarding engines.
171  */
172 struct fwd_engine * fwd_engines[] = {
173 	&io_fwd_engine,
174 	&mac_fwd_engine,
175 	&mac_swap_engine,
176 	&flow_gen_engine,
177 	&rx_only_engine,
178 	&tx_only_engine,
179 	&csum_fwd_engine,
180 	&icmp_echo_engine,
181 	&noisy_vnf_engine,
182 	&five_tuple_swap_fwd_engine,
183 #ifdef RTE_LIBRTE_IEEE1588
184 	&ieee1588_fwd_engine,
185 #endif
186 	NULL,
187 };
188 
189 struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
190 uint16_t mempool_flags;
191 
192 struct fwd_config cur_fwd_config;
193 struct fwd_engine *cur_fwd_eng = &io_fwd_engine; /**< IO mode by default. */
194 uint32_t retry_enabled;
195 uint32_t burst_tx_delay_time = BURST_TX_WAIT_US;
196 uint32_t burst_tx_retry_num = BURST_TX_RETRIES;
197 
198 uint32_t mbuf_data_size_n = 1; /* Number of specified mbuf sizes. */
199 uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT] = {
200 	DEFAULT_MBUF_DATA_SIZE
201 }; /**< Mbuf data space size. */
202 uint32_t param_total_num_mbufs = 0;  /**< number of mbufs in all pools - if
203                                       * specified on command-line. */
204 uint16_t stats_period; /**< Period to show statistics (disabled by default) */
205 
206 /*
207  * In container, it cannot terminate the process which running with 'stats-period'
208  * option. Set flag to exit stats period loop after received SIGINT/SIGTERM.
209  */
210 uint8_t f_quit;
211 
212 /*
213  * Configuration of packet segments used to scatter received packets
214  * if some of split features is configured.
215  */
216 uint16_t rx_pkt_seg_lengths[MAX_SEGS_BUFFER_SPLIT];
217 uint8_t  rx_pkt_nb_segs; /**< Number of segments to split */
218 uint16_t rx_pkt_seg_offsets[MAX_SEGS_BUFFER_SPLIT];
219 uint8_t  rx_pkt_nb_offs; /**< Number of specified offsets */
220 
221 /*
222  * Configuration of packet segments used by the "txonly" processing engine.
223  */
224 uint16_t tx_pkt_length = TXONLY_DEF_PACKET_LEN; /**< TXONLY packet length. */
225 uint16_t tx_pkt_seg_lengths[RTE_MAX_SEGS_PER_PKT] = {
226 	TXONLY_DEF_PACKET_LEN,
227 };
228 uint8_t  tx_pkt_nb_segs = 1; /**< Number of segments in TXONLY packets */
229 
230 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
231 /**< Split policy for packets to TX. */
232 
233 uint8_t txonly_multi_flow;
234 /**< Whether multiple flows are generated in TXONLY mode. */
235 
236 uint32_t tx_pkt_times_inter;
237 /**< Timings for send scheduling in TXONLY mode, time between bursts. */
238 
239 uint32_t tx_pkt_times_intra;
240 /**< Timings for send scheduling in TXONLY mode, time between packets. */
241 
242 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
243 uint16_t nb_pkt_flowgen_clones; /**< Number of Tx packet clones to send in flowgen mode. */
244 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
245 
246 /* current configuration is in DCB or not,0 means it is not in DCB mode */
247 uint8_t dcb_config = 0;
248 
249 /*
250  * Configurable number of RX/TX queues.
251  */
252 queueid_t nb_hairpinq; /**< Number of hairpin queues per port. */
253 queueid_t nb_rxq = 1; /**< Number of RX queues per port. */
254 queueid_t nb_txq = 1; /**< Number of TX queues per port. */
255 
256 /*
257  * Configurable number of RX/TX ring descriptors.
258  * Defaults are supplied by drivers via ethdev.
259  */
260 #define RTE_TEST_RX_DESC_DEFAULT 0
261 #define RTE_TEST_TX_DESC_DEFAULT 0
262 uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; /**< Number of RX descriptors. */
263 uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; /**< Number of TX descriptors. */
264 
265 #define RTE_PMD_PARAM_UNSET -1
266 /*
267  * Configurable values of RX and TX ring threshold registers.
268  */
269 
270 int8_t rx_pthresh = RTE_PMD_PARAM_UNSET;
271 int8_t rx_hthresh = RTE_PMD_PARAM_UNSET;
272 int8_t rx_wthresh = RTE_PMD_PARAM_UNSET;
273 
274 int8_t tx_pthresh = RTE_PMD_PARAM_UNSET;
275 int8_t tx_hthresh = RTE_PMD_PARAM_UNSET;
276 int8_t tx_wthresh = RTE_PMD_PARAM_UNSET;
277 
278 /*
279  * Configurable value of RX free threshold.
280  */
281 int16_t rx_free_thresh = RTE_PMD_PARAM_UNSET;
282 
283 /*
284  * Configurable value of RX drop enable.
285  */
286 int8_t rx_drop_en = RTE_PMD_PARAM_UNSET;
287 
288 /*
289  * Configurable value of TX free threshold.
290  */
291 int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
292 
293 /*
294  * Configurable value of TX RS bit threshold.
295  */
296 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
297 
298 /*
299  * Configurable value of buffered packets before sending.
300  */
301 uint16_t noisy_tx_sw_bufsz;
302 
303 /*
304  * Configurable value of packet buffer timeout.
305  */
306 uint16_t noisy_tx_sw_buf_flush_time;
307 
308 /*
309  * Configurable value for size of VNF internal memory area
310  * used for simulating noisy neighbour behaviour
311  */
312 uint64_t noisy_lkup_mem_sz;
313 
314 /*
315  * Configurable value of number of random writes done in
316  * VNF simulation memory area.
317  */
318 uint64_t noisy_lkup_num_writes;
319 
320 /*
321  * Configurable value of number of random reads done in
322  * VNF simulation memory area.
323  */
324 uint64_t noisy_lkup_num_reads;
325 
326 /*
327  * Configurable value of number of random reads/writes done in
328  * VNF simulation memory area.
329  */
330 uint64_t noisy_lkup_num_reads_writes;
331 
332 /*
333  * Receive Side Scaling (RSS) configuration.
334  */
335 uint64_t rss_hf = ETH_RSS_IP; /* RSS IP by default. */
336 
337 /*
338  * Port topology configuration
339  */
340 uint16_t port_topology = PORT_TOPOLOGY_PAIRED; /* Ports are paired by default */
341 
342 /*
343  * Avoids to flush all the RX streams before starts forwarding.
344  */
345 uint8_t no_flush_rx = 0; /* flush by default */
346 
347 /*
348  * Flow API isolated mode.
349  */
350 uint8_t flow_isolate_all;
351 
352 /*
353  * Avoids to check link status when starting/stopping a port.
354  */
355 uint8_t no_link_check = 0; /* check by default */
356 
357 /*
358  * Don't automatically start all ports in interactive mode.
359  */
360 uint8_t no_device_start = 0;
361 
362 /*
363  * Enable link status change notification
364  */
365 uint8_t lsc_interrupt = 1; /* enabled by default */
366 
367 /*
368  * Enable device removal notification.
369  */
370 uint8_t rmv_interrupt = 1; /* enabled by default */
371 
372 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
373 
374 /* After attach, port setup is called on event or by iterator */
375 bool setup_on_probe_event = true;
376 
377 /* Clear ptypes on port initialization. */
378 uint8_t clear_ptypes = true;
379 
380 /* Hairpin ports configuration mode. */
381 uint16_t hairpin_mode;
382 
383 /* Pretty printing of ethdev events */
384 static const char * const eth_event_desc[] = {
385 	[RTE_ETH_EVENT_UNKNOWN] = "unknown",
386 	[RTE_ETH_EVENT_INTR_LSC] = "link state change",
387 	[RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
388 	[RTE_ETH_EVENT_INTR_RESET] = "reset",
389 	[RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
390 	[RTE_ETH_EVENT_IPSEC] = "IPsec",
391 	[RTE_ETH_EVENT_MACSEC] = "MACsec",
392 	[RTE_ETH_EVENT_INTR_RMV] = "device removal",
393 	[RTE_ETH_EVENT_NEW] = "device probed",
394 	[RTE_ETH_EVENT_DESTROY] = "device released",
395 	[RTE_ETH_EVENT_FLOW_AGED] = "flow aged",
396 	[RTE_ETH_EVENT_MAX] = NULL,
397 };
398 
399 /*
400  * Display or mask ether events
401  * Default to all events except VF_MBOX
402  */
403 uint32_t event_print_mask = (UINT32_C(1) << RTE_ETH_EVENT_UNKNOWN) |
404 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_LSC) |
405 			    (UINT32_C(1) << RTE_ETH_EVENT_QUEUE_STATE) |
406 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RESET) |
407 			    (UINT32_C(1) << RTE_ETH_EVENT_IPSEC) |
408 			    (UINT32_C(1) << RTE_ETH_EVENT_MACSEC) |
409 			    (UINT32_C(1) << RTE_ETH_EVENT_INTR_RMV) |
410 			    (UINT32_C(1) << RTE_ETH_EVENT_FLOW_AGED);
411 /*
412  * Decide if all memory are locked for performance.
413  */
414 int do_mlockall = 0;
415 
416 /*
417  * NIC bypass mode configuration options.
418  */
419 
420 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
421 /* The NIC bypass watchdog timeout. */
422 uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF;
423 #endif
424 
425 
426 #ifdef RTE_LIB_LATENCYSTATS
427 
428 /*
429  * Set when latency stats is enabled in the commandline
430  */
431 uint8_t latencystats_enabled;
432 
433 /*
434  * Lcore ID to serive latency statistics.
435  */
436 lcoreid_t latencystats_lcore_id = -1;
437 
438 #endif
439 
440 /*
441  * Ethernet device configuration.
442  */
443 struct rte_eth_rxmode rx_mode = {
444 	/* Default maximum frame length.
445 	 * Zero is converted to "RTE_ETHER_MTU + PMD Ethernet overhead"
446 	 * in init_config().
447 	 */
448 	.max_rx_pkt_len = 0,
449 };
450 
451 struct rte_eth_txmode tx_mode = {
452 	.offloads = DEV_TX_OFFLOAD_MBUF_FAST_FREE,
453 };
454 
455 struct rte_fdir_conf fdir_conf = {
456 	.mode = RTE_FDIR_MODE_NONE,
457 	.pballoc = RTE_FDIR_PBALLOC_64K,
458 	.status = RTE_FDIR_REPORT_STATUS,
459 	.mask = {
460 		.vlan_tci_mask = 0xFFEF,
461 		.ipv4_mask     = {
462 			.src_ip = 0xFFFFFFFF,
463 			.dst_ip = 0xFFFFFFFF,
464 		},
465 		.ipv6_mask     = {
466 			.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
467 			.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
468 		},
469 		.src_port_mask = 0xFFFF,
470 		.dst_port_mask = 0xFFFF,
471 		.mac_addr_byte_mask = 0xFF,
472 		.tunnel_type_mask = 1,
473 		.tunnel_id_mask = 0xFFFFFFFF,
474 	},
475 	.drop_queue = 127,
476 };
477 
478 volatile int test_done = 1; /* stop packet forwarding when set to 1. */
479 
480 /*
481  * Display zero values by default for xstats
482  */
483 uint8_t xstats_hide_zero;
484 
485 /*
486  * Measure of CPU cycles disabled by default
487  */
488 uint8_t record_core_cycles;
489 
490 /*
491  * Display of RX and TX bursts disabled by default
492  */
493 uint8_t record_burst_stats;
494 
495 unsigned int num_sockets = 0;
496 unsigned int socket_ids[RTE_MAX_NUMA_NODES];
497 
498 #ifdef RTE_LIB_BITRATESTATS
499 /* Bitrate statistics */
500 struct rte_stats_bitrates *bitrate_data;
501 lcoreid_t bitrate_lcore_id;
502 uint8_t bitrate_enabled;
503 #endif
504 
505 struct gro_status gro_ports[RTE_MAX_ETHPORTS];
506 uint8_t gro_flush_cycles = GRO_DEFAULT_FLUSH_CYCLES;
507 
508 /*
509  * hexadecimal bitmask of RX mq mode can be enabled.
510  */
511 enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
512 
513 /*
514  * Used to set forced link speed
515  */
516 uint32_t eth_link_speed;
517 
518 /* Forward function declarations */
519 static void setup_attached_port(portid_t pi);
520 static void check_all_ports_link_status(uint32_t port_mask);
521 static int eth_event_callback(portid_t port_id,
522 			      enum rte_eth_event_type type,
523 			      void *param, void *ret_param);
524 static void dev_event_callback(const char *device_name,
525 				enum rte_dev_event_type type,
526 				void *param);
527 
528 /*
529  * Check if all the ports are started.
530  * If yes, return positive value. If not, return zero.
531  */
532 static int all_ports_started(void);
533 
534 struct gso_status gso_ports[RTE_MAX_ETHPORTS];
535 uint16_t gso_max_segment_size = RTE_ETHER_MAX_LEN - RTE_ETHER_CRC_LEN;
536 
537 /* Holds the registered mbuf dynamic flags names. */
538 char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
539 
540 /*
541  * Helper function to check if socket is already discovered.
542  * If yes, return positive value. If not, return zero.
543  */
544 int
545 new_socket_id(unsigned int socket_id)
546 {
547 	unsigned int i;
548 
549 	for (i = 0; i < num_sockets; i++) {
550 		if (socket_ids[i] == socket_id)
551 			return 0;
552 	}
553 	return 1;
554 }
555 
556 /*
557  * Setup default configuration.
558  */
559 static void
560 set_default_fwd_lcores_config(void)
561 {
562 	unsigned int i;
563 	unsigned int nb_lc;
564 	unsigned int sock_num;
565 
566 	nb_lc = 0;
567 	for (i = 0; i < RTE_MAX_LCORE; i++) {
568 		if (!rte_lcore_is_enabled(i))
569 			continue;
570 		sock_num = rte_lcore_to_socket_id(i);
571 		if (new_socket_id(sock_num)) {
572 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
573 				rte_exit(EXIT_FAILURE,
574 					 "Total sockets greater than %u\n",
575 					 RTE_MAX_NUMA_NODES);
576 			}
577 			socket_ids[num_sockets++] = sock_num;
578 		}
579 		if (i == rte_get_main_lcore())
580 			continue;
581 		fwd_lcores_cpuids[nb_lc++] = i;
582 	}
583 	nb_lcores = (lcoreid_t) nb_lc;
584 	nb_cfg_lcores = nb_lcores;
585 	nb_fwd_lcores = 1;
586 }
587 
588 static void
589 set_def_peer_eth_addrs(void)
590 {
591 	portid_t i;
592 
593 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
594 		peer_eth_addrs[i].addr_bytes[0] = RTE_ETHER_LOCAL_ADMIN_ADDR;
595 		peer_eth_addrs[i].addr_bytes[5] = i;
596 	}
597 }
598 
599 static void
600 set_default_fwd_ports_config(void)
601 {
602 	portid_t pt_id;
603 	int i = 0;
604 
605 	RTE_ETH_FOREACH_DEV(pt_id) {
606 		fwd_ports_ids[i++] = pt_id;
607 
608 		/* Update sockets info according to the attached device */
609 		int socket_id = rte_eth_dev_socket_id(pt_id);
610 		if (socket_id >= 0 && new_socket_id(socket_id)) {
611 			if (num_sockets >= RTE_MAX_NUMA_NODES) {
612 				rte_exit(EXIT_FAILURE,
613 					 "Total sockets greater than %u\n",
614 					 RTE_MAX_NUMA_NODES);
615 			}
616 			socket_ids[num_sockets++] = socket_id;
617 		}
618 	}
619 
620 	nb_cfg_ports = nb_ports;
621 	nb_fwd_ports = nb_ports;
622 }
623 
624 void
625 set_def_fwd_config(void)
626 {
627 	set_default_fwd_lcores_config();
628 	set_def_peer_eth_addrs();
629 	set_default_fwd_ports_config();
630 }
631 
632 /* extremely pessimistic estimation of memory required to create a mempool */
633 static int
634 calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
635 {
636 	unsigned int n_pages, mbuf_per_pg, leftover;
637 	uint64_t total_mem, mbuf_mem, obj_sz;
638 
639 	/* there is no good way to predict how much space the mempool will
640 	 * occupy because it will allocate chunks on the fly, and some of those
641 	 * will come from default DPDK memory while some will come from our
642 	 * external memory, so just assume 128MB will be enough for everyone.
643 	 */
644 	uint64_t hdr_mem = 128 << 20;
645 
646 	/* account for possible non-contiguousness */
647 	obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
648 	if (obj_sz > pgsz) {
649 		TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
650 		return -1;
651 	}
652 
653 	mbuf_per_pg = pgsz / obj_sz;
654 	leftover = (nb_mbufs % mbuf_per_pg) > 0;
655 	n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
656 
657 	mbuf_mem = n_pages * pgsz;
658 
659 	total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
660 
661 	if (total_mem > SIZE_MAX) {
662 		TESTPMD_LOG(ERR, "Memory size too big\n");
663 		return -1;
664 	}
665 	*out = (size_t)total_mem;
666 
667 	return 0;
668 }
669 
670 static int
671 pagesz_flags(uint64_t page_sz)
672 {
673 	/* as per mmap() manpage, all page sizes are log2 of page size
674 	 * shifted by MAP_HUGE_SHIFT
675 	 */
676 	int log2 = rte_log2_u64(page_sz);
677 
678 	return (log2 << HUGE_SHIFT);
679 }
680 
681 static void *
682 alloc_mem(size_t memsz, size_t pgsz, bool huge)
683 {
684 	void *addr;
685 	int flags;
686 
687 	/* allocate anonymous hugepages */
688 	flags = MAP_ANONYMOUS | MAP_PRIVATE;
689 	if (huge)
690 		flags |= HUGE_FLAG | pagesz_flags(pgsz);
691 
692 	addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
693 	if (addr == MAP_FAILED)
694 		return NULL;
695 
696 	return addr;
697 }
698 
699 struct extmem_param {
700 	void *addr;
701 	size_t len;
702 	size_t pgsz;
703 	rte_iova_t *iova_table;
704 	unsigned int iova_table_len;
705 };
706 
707 static int
708 create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
709 		bool huge)
710 {
711 	uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
712 			RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
713 	unsigned int cur_page, n_pages, pgsz_idx;
714 	size_t mem_sz, cur_pgsz;
715 	rte_iova_t *iovas = NULL;
716 	void *addr;
717 	int ret;
718 
719 	for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
720 		/* skip anything that is too big */
721 		if (pgsizes[pgsz_idx] > SIZE_MAX)
722 			continue;
723 
724 		cur_pgsz = pgsizes[pgsz_idx];
725 
726 		/* if we were told not to allocate hugepages, override */
727 		if (!huge)
728 			cur_pgsz = sysconf(_SC_PAGESIZE);
729 
730 		ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
731 		if (ret < 0) {
732 			TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
733 			return -1;
734 		}
735 
736 		/* allocate our memory */
737 		addr = alloc_mem(mem_sz, cur_pgsz, huge);
738 
739 		/* if we couldn't allocate memory with a specified page size,
740 		 * that doesn't mean we can't do it with other page sizes, so
741 		 * try another one.
742 		 */
743 		if (addr == NULL)
744 			continue;
745 
746 		/* store IOVA addresses for every page in this memory area */
747 		n_pages = mem_sz / cur_pgsz;
748 
749 		iovas = malloc(sizeof(*iovas) * n_pages);
750 
751 		if (iovas == NULL) {
752 			TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
753 			goto fail;
754 		}
755 		/* lock memory if it's not huge pages */
756 		if (!huge)
757 			mlock(addr, mem_sz);
758 
759 		/* populate IOVA addresses */
760 		for (cur_page = 0; cur_page < n_pages; cur_page++) {
761 			rte_iova_t iova;
762 			size_t offset;
763 			void *cur;
764 
765 			offset = cur_pgsz * cur_page;
766 			cur = RTE_PTR_ADD(addr, offset);
767 
768 			/* touch the page before getting its IOVA */
769 			*(volatile char *)cur = 0;
770 
771 			iova = rte_mem_virt2iova(cur);
772 
773 			iovas[cur_page] = iova;
774 		}
775 
776 		break;
777 	}
778 	/* if we couldn't allocate anything */
779 	if (iovas == NULL)
780 		return -1;
781 
782 	param->addr = addr;
783 	param->len = mem_sz;
784 	param->pgsz = cur_pgsz;
785 	param->iova_table = iovas;
786 	param->iova_table_len = n_pages;
787 
788 	return 0;
789 fail:
790 	if (iovas)
791 		free(iovas);
792 	if (addr)
793 		munmap(addr, mem_sz);
794 
795 	return -1;
796 }
797 
798 static int
799 setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
800 {
801 	struct extmem_param param;
802 	int socket_id, ret;
803 
804 	memset(&param, 0, sizeof(param));
805 
806 	/* check if our heap exists */
807 	socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
808 	if (socket_id < 0) {
809 		/* create our heap */
810 		ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
811 		if (ret < 0) {
812 			TESTPMD_LOG(ERR, "Cannot create heap\n");
813 			return -1;
814 		}
815 	}
816 
817 	ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
818 	if (ret < 0) {
819 		TESTPMD_LOG(ERR, "Cannot create memory area\n");
820 		return -1;
821 	}
822 
823 	/* we now have a valid memory area, so add it to heap */
824 	ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
825 			param.addr, param.len, param.iova_table,
826 			param.iova_table_len, param.pgsz);
827 
828 	/* when using VFIO, memory is automatically mapped for DMA by EAL */
829 
830 	/* not needed any more */
831 	free(param.iova_table);
832 
833 	if (ret < 0) {
834 		TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
835 		munmap(param.addr, param.len);
836 		return -1;
837 	}
838 
839 	/* success */
840 
841 	TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
842 			param.len >> 20);
843 
844 	return 0;
845 }
846 static void
847 dma_unmap_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
848 	     struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
849 {
850 	uint16_t pid = 0;
851 	int ret;
852 
853 	RTE_ETH_FOREACH_DEV(pid) {
854 		struct rte_eth_dev *dev =
855 			&rte_eth_devices[pid];
856 
857 		ret = rte_dev_dma_unmap(dev->device, memhdr->addr, 0,
858 					memhdr->len);
859 		if (ret) {
860 			TESTPMD_LOG(DEBUG,
861 				    "unable to DMA unmap addr 0x%p "
862 				    "for device %s\n",
863 				    memhdr->addr, dev->data->name);
864 		}
865 	}
866 	ret = rte_extmem_unregister(memhdr->addr, memhdr->len);
867 	if (ret) {
868 		TESTPMD_LOG(DEBUG,
869 			    "unable to un-register addr 0x%p\n", memhdr->addr);
870 	}
871 }
872 
873 static void
874 dma_map_cb(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
875 	   struct rte_mempool_memhdr *memhdr, unsigned mem_idx __rte_unused)
876 {
877 	uint16_t pid = 0;
878 	size_t page_size = sysconf(_SC_PAGESIZE);
879 	int ret;
880 
881 	ret = rte_extmem_register(memhdr->addr, memhdr->len, NULL, 0,
882 				  page_size);
883 	if (ret) {
884 		TESTPMD_LOG(DEBUG,
885 			    "unable to register addr 0x%p\n", memhdr->addr);
886 		return;
887 	}
888 	RTE_ETH_FOREACH_DEV(pid) {
889 		struct rte_eth_dev *dev =
890 			&rte_eth_devices[pid];
891 
892 		ret = rte_dev_dma_map(dev->device, memhdr->addr, 0,
893 				      memhdr->len);
894 		if (ret) {
895 			TESTPMD_LOG(DEBUG,
896 				    "unable to DMA map addr 0x%p "
897 				    "for device %s\n",
898 				    memhdr->addr, dev->data->name);
899 		}
900 	}
901 }
902 
903 static unsigned int
904 setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id,
905 	    char *pool_name, struct rte_pktmbuf_extmem **ext_mem)
906 {
907 	struct rte_pktmbuf_extmem *xmem;
908 	unsigned int ext_num, zone_num, elt_num;
909 	uint16_t elt_size;
910 
911 	elt_size = RTE_ALIGN_CEIL(mbuf_sz, RTE_CACHE_LINE_SIZE);
912 	elt_num = EXTBUF_ZONE_SIZE / elt_size;
913 	zone_num = (nb_mbufs + elt_num - 1) / elt_num;
914 
915 	xmem = malloc(sizeof(struct rte_pktmbuf_extmem) * zone_num);
916 	if (xmem == NULL) {
917 		TESTPMD_LOG(ERR, "Cannot allocate memory for "
918 				 "external buffer descriptors\n");
919 		*ext_mem = NULL;
920 		return 0;
921 	}
922 	for (ext_num = 0; ext_num < zone_num; ext_num++) {
923 		struct rte_pktmbuf_extmem *xseg = xmem + ext_num;
924 		const struct rte_memzone *mz;
925 		char mz_name[RTE_MEMZONE_NAMESIZE];
926 		int ret;
927 
928 		ret = snprintf(mz_name, sizeof(mz_name),
929 			RTE_MEMPOOL_MZ_FORMAT "_xb_%u", pool_name, ext_num);
930 		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
931 			errno = ENAMETOOLONG;
932 			ext_num = 0;
933 			break;
934 		}
935 		mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE,
936 						 socket_id,
937 						 RTE_MEMZONE_IOVA_CONTIG |
938 						 RTE_MEMZONE_1GB |
939 						 RTE_MEMZONE_SIZE_HINT_ONLY,
940 						 EXTBUF_ZONE_SIZE);
941 		if (mz == NULL) {
942 			/*
943 			 * The caller exits on external buffer creation
944 			 * error, so there is no need to free memzones.
945 			 */
946 			errno = ENOMEM;
947 			ext_num = 0;
948 			break;
949 		}
950 		xseg->buf_ptr = mz->addr;
951 		xseg->buf_iova = mz->iova;
952 		xseg->buf_len = EXTBUF_ZONE_SIZE;
953 		xseg->elt_size = elt_size;
954 	}
955 	if (ext_num == 0 && xmem != NULL) {
956 		free(xmem);
957 		xmem = NULL;
958 	}
959 	*ext_mem = xmem;
960 	return ext_num;
961 }
962 
963 /*
964  * Configuration initialisation done once at init time.
965  */
966 static struct rte_mempool *
967 mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
968 		 unsigned int socket_id, uint16_t size_idx)
969 {
970 	char pool_name[RTE_MEMPOOL_NAMESIZE];
971 	struct rte_mempool *rte_mp = NULL;
972 	uint32_t mb_size;
973 
974 	mb_size = sizeof(struct rte_mbuf) + mbuf_seg_size;
975 	mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name), size_idx);
976 
977 	TESTPMD_LOG(INFO,
978 		"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
979 		pool_name, nb_mbuf, mbuf_seg_size, socket_id);
980 
981 	switch (mp_alloc_type) {
982 	case MP_ALLOC_NATIVE:
983 		{
984 			/* wrapper to rte_mempool_create() */
985 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
986 					rte_mbuf_best_mempool_ops());
987 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
988 				mb_mempool_cache, 0, mbuf_seg_size, socket_id);
989 			break;
990 		}
991 	case MP_ALLOC_ANON:
992 		{
993 			rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
994 				mb_size, (unsigned int) mb_mempool_cache,
995 				sizeof(struct rte_pktmbuf_pool_private),
996 				socket_id, mempool_flags);
997 			if (rte_mp == NULL)
998 				goto err;
999 
1000 			if (rte_mempool_populate_anon(rte_mp) == 0) {
1001 				rte_mempool_free(rte_mp);
1002 				rte_mp = NULL;
1003 				goto err;
1004 			}
1005 			rte_pktmbuf_pool_init(rte_mp, NULL);
1006 			rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
1007 			rte_mempool_mem_iter(rte_mp, dma_map_cb, NULL);
1008 			break;
1009 		}
1010 	case MP_ALLOC_XMEM:
1011 	case MP_ALLOC_XMEM_HUGE:
1012 		{
1013 			int heap_socket;
1014 			bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
1015 
1016 			if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
1017 				rte_exit(EXIT_FAILURE, "Could not create external memory\n");
1018 
1019 			heap_socket =
1020 				rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
1021 			if (heap_socket < 0)
1022 				rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
1023 
1024 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1025 					rte_mbuf_best_mempool_ops());
1026 			rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
1027 					mb_mempool_cache, 0, mbuf_seg_size,
1028 					heap_socket);
1029 			break;
1030 		}
1031 	case MP_ALLOC_XBUF:
1032 		{
1033 			struct rte_pktmbuf_extmem *ext_mem;
1034 			unsigned int ext_num;
1035 
1036 			ext_num = setup_extbuf(nb_mbuf,	mbuf_seg_size,
1037 					       socket_id, pool_name, &ext_mem);
1038 			if (ext_num == 0)
1039 				rte_exit(EXIT_FAILURE,
1040 					 "Can't create pinned data buffers\n");
1041 
1042 			TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
1043 					rte_mbuf_best_mempool_ops());
1044 			rte_mp = rte_pktmbuf_pool_create_extbuf
1045 					(pool_name, nb_mbuf, mb_mempool_cache,
1046 					 0, mbuf_seg_size, socket_id,
1047 					 ext_mem, ext_num);
1048 			free(ext_mem);
1049 			break;
1050 		}
1051 	default:
1052 		{
1053 			rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
1054 		}
1055 	}
1056 
1057 err:
1058 	if (rte_mp == NULL) {
1059 		rte_exit(EXIT_FAILURE,
1060 			"Creation of mbuf pool for socket %u failed: %s\n",
1061 			socket_id, rte_strerror(rte_errno));
1062 	} else if (verbose_level > 0) {
1063 		rte_mempool_dump(stdout, rte_mp);
1064 	}
1065 	return rte_mp;
1066 }
1067 
1068 /*
1069  * Check given socket id is valid or not with NUMA mode,
1070  * if valid, return 0, else return -1
1071  */
1072 static int
1073 check_socket_id(const unsigned int socket_id)
1074 {
1075 	static int warning_once = 0;
1076 
1077 	if (new_socket_id(socket_id)) {
1078 		if (!warning_once && numa_support)
1079 			printf("Warning: NUMA should be configured manually by"
1080 			       " using --port-numa-config and"
1081 			       " --ring-numa-config parameters along with"
1082 			       " --numa.\n");
1083 		warning_once = 1;
1084 		return -1;
1085 	}
1086 	return 0;
1087 }
1088 
1089 /*
1090  * Get the allowed maximum number of RX queues.
1091  * *pid return the port id which has minimal value of
1092  * max_rx_queues in all ports.
1093  */
1094 queueid_t
1095 get_allowed_max_nb_rxq(portid_t *pid)
1096 {
1097 	queueid_t allowed_max_rxq = RTE_MAX_QUEUES_PER_PORT;
1098 	bool max_rxq_valid = false;
1099 	portid_t pi;
1100 	struct rte_eth_dev_info dev_info;
1101 
1102 	RTE_ETH_FOREACH_DEV(pi) {
1103 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1104 			continue;
1105 
1106 		max_rxq_valid = true;
1107 		if (dev_info.max_rx_queues < allowed_max_rxq) {
1108 			allowed_max_rxq = dev_info.max_rx_queues;
1109 			*pid = pi;
1110 		}
1111 	}
1112 	return max_rxq_valid ? allowed_max_rxq : 0;
1113 }
1114 
1115 /*
1116  * Check input rxq is valid or not.
1117  * If input rxq is not greater than any of maximum number
1118  * of RX queues of all ports, it is valid.
1119  * if valid, return 0, else return -1
1120  */
1121 int
1122 check_nb_rxq(queueid_t rxq)
1123 {
1124 	queueid_t allowed_max_rxq;
1125 	portid_t pid = 0;
1126 
1127 	allowed_max_rxq = get_allowed_max_nb_rxq(&pid);
1128 	if (rxq > allowed_max_rxq) {
1129 		printf("Fail: input rxq (%u) can't be greater "
1130 		       "than max_rx_queues (%u) of port %u\n",
1131 		       rxq,
1132 		       allowed_max_rxq,
1133 		       pid);
1134 		return -1;
1135 	}
1136 	return 0;
1137 }
1138 
1139 /*
1140  * Get the allowed maximum number of TX queues.
1141  * *pid return the port id which has minimal value of
1142  * max_tx_queues in all ports.
1143  */
1144 queueid_t
1145 get_allowed_max_nb_txq(portid_t *pid)
1146 {
1147 	queueid_t allowed_max_txq = RTE_MAX_QUEUES_PER_PORT;
1148 	bool max_txq_valid = false;
1149 	portid_t pi;
1150 	struct rte_eth_dev_info dev_info;
1151 
1152 	RTE_ETH_FOREACH_DEV(pi) {
1153 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1154 			continue;
1155 
1156 		max_txq_valid = true;
1157 		if (dev_info.max_tx_queues < allowed_max_txq) {
1158 			allowed_max_txq = dev_info.max_tx_queues;
1159 			*pid = pi;
1160 		}
1161 	}
1162 	return max_txq_valid ? allowed_max_txq : 0;
1163 }
1164 
1165 /*
1166  * Check input txq is valid or not.
1167  * If input txq is not greater than any of maximum number
1168  * of TX queues of all ports, it is valid.
1169  * if valid, return 0, else return -1
1170  */
1171 int
1172 check_nb_txq(queueid_t txq)
1173 {
1174 	queueid_t allowed_max_txq;
1175 	portid_t pid = 0;
1176 
1177 	allowed_max_txq = get_allowed_max_nb_txq(&pid);
1178 	if (txq > allowed_max_txq) {
1179 		printf("Fail: input txq (%u) can't be greater "
1180 		       "than max_tx_queues (%u) of port %u\n",
1181 		       txq,
1182 		       allowed_max_txq,
1183 		       pid);
1184 		return -1;
1185 	}
1186 	return 0;
1187 }
1188 
1189 /*
1190  * Get the allowed maximum number of RXDs of every rx queue.
1191  * *pid return the port id which has minimal value of
1192  * max_rxd in all queues of all ports.
1193  */
1194 static uint16_t
1195 get_allowed_max_nb_rxd(portid_t *pid)
1196 {
1197 	uint16_t allowed_max_rxd = UINT16_MAX;
1198 	portid_t pi;
1199 	struct rte_eth_dev_info dev_info;
1200 
1201 	RTE_ETH_FOREACH_DEV(pi) {
1202 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1203 			continue;
1204 
1205 		if (dev_info.rx_desc_lim.nb_max < allowed_max_rxd) {
1206 			allowed_max_rxd = dev_info.rx_desc_lim.nb_max;
1207 			*pid = pi;
1208 		}
1209 	}
1210 	return allowed_max_rxd;
1211 }
1212 
1213 /*
1214  * Get the allowed minimal number of RXDs of every rx queue.
1215  * *pid return the port id which has minimal value of
1216  * min_rxd in all queues of all ports.
1217  */
1218 static uint16_t
1219 get_allowed_min_nb_rxd(portid_t *pid)
1220 {
1221 	uint16_t allowed_min_rxd = 0;
1222 	portid_t pi;
1223 	struct rte_eth_dev_info dev_info;
1224 
1225 	RTE_ETH_FOREACH_DEV(pi) {
1226 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1227 			continue;
1228 
1229 		if (dev_info.rx_desc_lim.nb_min > allowed_min_rxd) {
1230 			allowed_min_rxd = dev_info.rx_desc_lim.nb_min;
1231 			*pid = pi;
1232 		}
1233 	}
1234 
1235 	return allowed_min_rxd;
1236 }
1237 
1238 /*
1239  * Check input rxd is valid or not.
1240  * If input rxd is not greater than any of maximum number
1241  * of RXDs of every Rx queues and is not less than any of
1242  * minimal number of RXDs of every Rx queues, it is valid.
1243  * if valid, return 0, else return -1
1244  */
1245 int
1246 check_nb_rxd(queueid_t rxd)
1247 {
1248 	uint16_t allowed_max_rxd;
1249 	uint16_t allowed_min_rxd;
1250 	portid_t pid = 0;
1251 
1252 	allowed_max_rxd = get_allowed_max_nb_rxd(&pid);
1253 	if (rxd > allowed_max_rxd) {
1254 		printf("Fail: input rxd (%u) can't be greater "
1255 		       "than max_rxds (%u) of port %u\n",
1256 		       rxd,
1257 		       allowed_max_rxd,
1258 		       pid);
1259 		return -1;
1260 	}
1261 
1262 	allowed_min_rxd = get_allowed_min_nb_rxd(&pid);
1263 	if (rxd < allowed_min_rxd) {
1264 		printf("Fail: input rxd (%u) can't be less "
1265 		       "than min_rxds (%u) of port %u\n",
1266 		       rxd,
1267 		       allowed_min_rxd,
1268 		       pid);
1269 		return -1;
1270 	}
1271 
1272 	return 0;
1273 }
1274 
1275 /*
1276  * Get the allowed maximum number of TXDs of every rx queues.
1277  * *pid return the port id which has minimal value of
1278  * max_txd in every tx queue.
1279  */
1280 static uint16_t
1281 get_allowed_max_nb_txd(portid_t *pid)
1282 {
1283 	uint16_t allowed_max_txd = UINT16_MAX;
1284 	portid_t pi;
1285 	struct rte_eth_dev_info dev_info;
1286 
1287 	RTE_ETH_FOREACH_DEV(pi) {
1288 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1289 			continue;
1290 
1291 		if (dev_info.tx_desc_lim.nb_max < allowed_max_txd) {
1292 			allowed_max_txd = dev_info.tx_desc_lim.nb_max;
1293 			*pid = pi;
1294 		}
1295 	}
1296 	return allowed_max_txd;
1297 }
1298 
1299 /*
1300  * Get the allowed maximum number of TXDs of every tx queues.
1301  * *pid return the port id which has minimal value of
1302  * min_txd in every tx queue.
1303  */
1304 static uint16_t
1305 get_allowed_min_nb_txd(portid_t *pid)
1306 {
1307 	uint16_t allowed_min_txd = 0;
1308 	portid_t pi;
1309 	struct rte_eth_dev_info dev_info;
1310 
1311 	RTE_ETH_FOREACH_DEV(pi) {
1312 		if (eth_dev_info_get_print_err(pi, &dev_info) != 0)
1313 			continue;
1314 
1315 		if (dev_info.tx_desc_lim.nb_min > allowed_min_txd) {
1316 			allowed_min_txd = dev_info.tx_desc_lim.nb_min;
1317 			*pid = pi;
1318 		}
1319 	}
1320 
1321 	return allowed_min_txd;
1322 }
1323 
1324 /*
1325  * Check input txd is valid or not.
1326  * If input txd is not greater than any of maximum number
1327  * of TXDs of every Rx queues, it is valid.
1328  * if valid, return 0, else return -1
1329  */
1330 int
1331 check_nb_txd(queueid_t txd)
1332 {
1333 	uint16_t allowed_max_txd;
1334 	uint16_t allowed_min_txd;
1335 	portid_t pid = 0;
1336 
1337 	allowed_max_txd = get_allowed_max_nb_txd(&pid);
1338 	if (txd > allowed_max_txd) {
1339 		printf("Fail: input txd (%u) can't be greater "
1340 		       "than max_txds (%u) of port %u\n",
1341 		       txd,
1342 		       allowed_max_txd,
1343 		       pid);
1344 		return -1;
1345 	}
1346 
1347 	allowed_min_txd = get_allowed_min_nb_txd(&pid);
1348 	if (txd < allowed_min_txd) {
1349 		printf("Fail: input txd (%u) can't be less "
1350 		       "than min_txds (%u) of port %u\n",
1351 		       txd,
1352 		       allowed_min_txd,
1353 		       pid);
1354 		return -1;
1355 	}
1356 	return 0;
1357 }
1358 
1359 
1360 /*
1361  * Get the allowed maximum number of hairpin queues.
1362  * *pid return the port id which has minimal value of
1363  * max_hairpin_queues in all ports.
1364  */
1365 queueid_t
1366 get_allowed_max_nb_hairpinq(portid_t *pid)
1367 {
1368 	queueid_t allowed_max_hairpinq = RTE_MAX_QUEUES_PER_PORT;
1369 	portid_t pi;
1370 	struct rte_eth_hairpin_cap cap;
1371 
1372 	RTE_ETH_FOREACH_DEV(pi) {
1373 		if (rte_eth_dev_hairpin_capability_get(pi, &cap) != 0) {
1374 			*pid = pi;
1375 			return 0;
1376 		}
1377 		if (cap.max_nb_queues < allowed_max_hairpinq) {
1378 			allowed_max_hairpinq = cap.max_nb_queues;
1379 			*pid = pi;
1380 		}
1381 	}
1382 	return allowed_max_hairpinq;
1383 }
1384 
1385 /*
1386  * Check input hairpin is valid or not.
1387  * If input hairpin is not greater than any of maximum number
1388  * of hairpin queues of all ports, it is valid.
1389  * if valid, return 0, else return -1
1390  */
1391 int
1392 check_nb_hairpinq(queueid_t hairpinq)
1393 {
1394 	queueid_t allowed_max_hairpinq;
1395 	portid_t pid = 0;
1396 
1397 	allowed_max_hairpinq = get_allowed_max_nb_hairpinq(&pid);
1398 	if (hairpinq > allowed_max_hairpinq) {
1399 		printf("Fail: input hairpin (%u) can't be greater "
1400 		       "than max_hairpin_queues (%u) of port %u\n",
1401 		       hairpinq, allowed_max_hairpinq, pid);
1402 		return -1;
1403 	}
1404 	return 0;
1405 }
1406 
1407 static void
1408 init_config(void)
1409 {
1410 	portid_t pid;
1411 	struct rte_port *port;
1412 	struct rte_mempool *mbp;
1413 	unsigned int nb_mbuf_per_pool;
1414 	lcoreid_t  lc_id;
1415 	uint8_t port_per_socket[RTE_MAX_NUMA_NODES];
1416 	struct rte_gro_param gro_param;
1417 	uint32_t gso_types;
1418 	uint16_t data_size;
1419 	bool warning = 0;
1420 	int k;
1421 	int ret;
1422 
1423 	memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
1424 
1425 	/* Configuration of logical cores. */
1426 	fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
1427 				sizeof(struct fwd_lcore *) * nb_lcores,
1428 				RTE_CACHE_LINE_SIZE);
1429 	if (fwd_lcores == NULL) {
1430 		rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_lcore *)) "
1431 							"failed\n", nb_lcores);
1432 	}
1433 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1434 		fwd_lcores[lc_id] = rte_zmalloc("testpmd: struct fwd_lcore",
1435 					       sizeof(struct fwd_lcore),
1436 					       RTE_CACHE_LINE_SIZE);
1437 		if (fwd_lcores[lc_id] == NULL) {
1438 			rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_lcore) "
1439 								"failed\n");
1440 		}
1441 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
1442 	}
1443 
1444 	RTE_ETH_FOREACH_DEV(pid) {
1445 		port = &ports[pid];
1446 		/* Apply default TxRx configuration for all ports */
1447 		port->dev_conf.txmode = tx_mode;
1448 		port->dev_conf.rxmode = rx_mode;
1449 
1450 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
1451 		if (ret != 0)
1452 			rte_exit(EXIT_FAILURE,
1453 				 "rte_eth_dev_info_get() failed\n");
1454 
1455 		ret = update_jumbo_frame_offload(pid);
1456 		if (ret != 0)
1457 			printf("Updating jumbo frame offload failed for port %u\n",
1458 				pid);
1459 
1460 		if (!(port->dev_info.tx_offload_capa &
1461 		      DEV_TX_OFFLOAD_MBUF_FAST_FREE))
1462 			port->dev_conf.txmode.offloads &=
1463 				~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1464 		if (numa_support) {
1465 			if (port_numa[pid] != NUMA_NO_CONFIG)
1466 				port_per_socket[port_numa[pid]]++;
1467 			else {
1468 				uint32_t socket_id = rte_eth_dev_socket_id(pid);
1469 
1470 				/*
1471 				 * if socket_id is invalid,
1472 				 * set to the first available socket.
1473 				 */
1474 				if (check_socket_id(socket_id) < 0)
1475 					socket_id = socket_ids[0];
1476 				port_per_socket[socket_id]++;
1477 			}
1478 		}
1479 
1480 		/* Apply Rx offloads configuration */
1481 		for (k = 0; k < port->dev_info.max_rx_queues; k++)
1482 			port->rx_conf[k].offloads =
1483 				port->dev_conf.rxmode.offloads;
1484 		/* Apply Tx offloads configuration */
1485 		for (k = 0; k < port->dev_info.max_tx_queues; k++)
1486 			port->tx_conf[k].offloads =
1487 				port->dev_conf.txmode.offloads;
1488 
1489 		if (eth_link_speed)
1490 			port->dev_conf.link_speeds = eth_link_speed;
1491 
1492 		/* set flag to initialize port/queue */
1493 		port->need_reconfig = 1;
1494 		port->need_reconfig_queues = 1;
1495 		port->tx_metadata = 0;
1496 
1497 		/* Check for maximum number of segments per MTU. Accordingly
1498 		 * update the mbuf data size.
1499 		 */
1500 		if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX &&
1501 				port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) {
1502 			data_size = rx_mode.max_rx_pkt_len /
1503 				port->dev_info.rx_desc_lim.nb_mtu_seg_max;
1504 
1505 			if ((data_size + RTE_PKTMBUF_HEADROOM) >
1506 							mbuf_data_size[0]) {
1507 				mbuf_data_size[0] = data_size +
1508 						 RTE_PKTMBUF_HEADROOM;
1509 				warning = 1;
1510 			}
1511 		}
1512 	}
1513 
1514 	if (warning)
1515 		TESTPMD_LOG(WARNING,
1516 			    "Configured mbuf size of the first segment %hu\n",
1517 			    mbuf_data_size[0]);
1518 	/*
1519 	 * Create pools of mbuf.
1520 	 * If NUMA support is disabled, create a single pool of mbuf in
1521 	 * socket 0 memory by default.
1522 	 * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
1523 	 *
1524 	 * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
1525 	 * nb_txd can be configured at run time.
1526 	 */
1527 	if (param_total_num_mbufs)
1528 		nb_mbuf_per_pool = param_total_num_mbufs;
1529 	else {
1530 		nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX +
1531 			(nb_lcores * mb_mempool_cache) +
1532 			RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
1533 		nb_mbuf_per_pool *= RTE_MAX_ETHPORTS;
1534 	}
1535 
1536 	if (numa_support) {
1537 		uint8_t i, j;
1538 
1539 		for (i = 0; i < num_sockets; i++)
1540 			for (j = 0; j < mbuf_data_size_n; j++)
1541 				mempools[i * MAX_SEGS_BUFFER_SPLIT + j] =
1542 					mbuf_pool_create(mbuf_data_size[j],
1543 							  nb_mbuf_per_pool,
1544 							  socket_ids[i], j);
1545 	} else {
1546 		uint8_t i;
1547 
1548 		for (i = 0; i < mbuf_data_size_n; i++)
1549 			mempools[i] = mbuf_pool_create
1550 					(mbuf_data_size[i],
1551 					 nb_mbuf_per_pool,
1552 					 socket_num == UMA_NO_CONFIG ?
1553 					 0 : socket_num, i);
1554 	}
1555 
1556 	init_port_config();
1557 
1558 	gso_types = DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
1559 		DEV_TX_OFFLOAD_GRE_TNL_TSO | DEV_TX_OFFLOAD_UDP_TSO;
1560 	/*
1561 	 * Records which Mbuf pool to use by each logical core, if needed.
1562 	 */
1563 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1564 		mbp = mbuf_pool_find(
1565 			rte_lcore_to_socket_id(fwd_lcores_cpuids[lc_id]), 0);
1566 
1567 		if (mbp == NULL)
1568 			mbp = mbuf_pool_find(0, 0);
1569 		fwd_lcores[lc_id]->mbp = mbp;
1570 		/* initialize GSO context */
1571 		fwd_lcores[lc_id]->gso_ctx.direct_pool = mbp;
1572 		fwd_lcores[lc_id]->gso_ctx.indirect_pool = mbp;
1573 		fwd_lcores[lc_id]->gso_ctx.gso_types = gso_types;
1574 		fwd_lcores[lc_id]->gso_ctx.gso_size = RTE_ETHER_MAX_LEN -
1575 			RTE_ETHER_CRC_LEN;
1576 		fwd_lcores[lc_id]->gso_ctx.flag = 0;
1577 	}
1578 
1579 	fwd_config_setup();
1580 
1581 	/* create a gro context for each lcore */
1582 	gro_param.gro_types = RTE_GRO_TCP_IPV4;
1583 	gro_param.max_flow_num = GRO_MAX_FLUSH_CYCLES;
1584 	gro_param.max_item_per_flow = MAX_PKT_BURST;
1585 	for (lc_id = 0; lc_id < nb_lcores; lc_id++) {
1586 		gro_param.socket_id = rte_lcore_to_socket_id(
1587 				fwd_lcores_cpuids[lc_id]);
1588 		fwd_lcores[lc_id]->gro_ctx = rte_gro_ctx_create(&gro_param);
1589 		if (fwd_lcores[lc_id]->gro_ctx == NULL) {
1590 			rte_exit(EXIT_FAILURE,
1591 					"rte_gro_ctx_create() failed\n");
1592 		}
1593 	}
1594 }
1595 
1596 
1597 void
1598 reconfig(portid_t new_port_id, unsigned socket_id)
1599 {
1600 	struct rte_port *port;
1601 	int ret;
1602 
1603 	/* Reconfiguration of Ethernet ports. */
1604 	port = &ports[new_port_id];
1605 
1606 	ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info);
1607 	if (ret != 0)
1608 		return;
1609 
1610 	/* set flag to initialize port/queue */
1611 	port->need_reconfig = 1;
1612 	port->need_reconfig_queues = 1;
1613 	port->socket_id = socket_id;
1614 
1615 	init_port_config();
1616 }
1617 
1618 
1619 int
1620 init_fwd_streams(void)
1621 {
1622 	portid_t pid;
1623 	struct rte_port *port;
1624 	streamid_t sm_id, nb_fwd_streams_new;
1625 	queueid_t q;
1626 
1627 	/* set socket id according to numa or not */
1628 	RTE_ETH_FOREACH_DEV(pid) {
1629 		port = &ports[pid];
1630 		if (nb_rxq > port->dev_info.max_rx_queues) {
1631 			printf("Fail: nb_rxq(%d) is greater than "
1632 				"max_rx_queues(%d)\n", nb_rxq,
1633 				port->dev_info.max_rx_queues);
1634 			return -1;
1635 		}
1636 		if (nb_txq > port->dev_info.max_tx_queues) {
1637 			printf("Fail: nb_txq(%d) is greater than "
1638 				"max_tx_queues(%d)\n", nb_txq,
1639 				port->dev_info.max_tx_queues);
1640 			return -1;
1641 		}
1642 		if (numa_support) {
1643 			if (port_numa[pid] != NUMA_NO_CONFIG)
1644 				port->socket_id = port_numa[pid];
1645 			else {
1646 				port->socket_id = rte_eth_dev_socket_id(pid);
1647 
1648 				/*
1649 				 * if socket_id is invalid,
1650 				 * set to the first available socket.
1651 				 */
1652 				if (check_socket_id(port->socket_id) < 0)
1653 					port->socket_id = socket_ids[0];
1654 			}
1655 		}
1656 		else {
1657 			if (socket_num == UMA_NO_CONFIG)
1658 				port->socket_id = 0;
1659 			else
1660 				port->socket_id = socket_num;
1661 		}
1662 	}
1663 
1664 	q = RTE_MAX(nb_rxq, nb_txq);
1665 	if (q == 0) {
1666 		printf("Fail: Cannot allocate fwd streams as number of queues is 0\n");
1667 		return -1;
1668 	}
1669 	nb_fwd_streams_new = (streamid_t)(nb_ports * q);
1670 	if (nb_fwd_streams_new == nb_fwd_streams)
1671 		return 0;
1672 	/* clear the old */
1673 	if (fwd_streams != NULL) {
1674 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1675 			if (fwd_streams[sm_id] == NULL)
1676 				continue;
1677 			rte_free(fwd_streams[sm_id]);
1678 			fwd_streams[sm_id] = NULL;
1679 		}
1680 		rte_free(fwd_streams);
1681 		fwd_streams = NULL;
1682 	}
1683 
1684 	/* init new */
1685 	nb_fwd_streams = nb_fwd_streams_new;
1686 	if (nb_fwd_streams) {
1687 		fwd_streams = rte_zmalloc("testpmd: fwd_streams",
1688 			sizeof(struct fwd_stream *) * nb_fwd_streams,
1689 			RTE_CACHE_LINE_SIZE);
1690 		if (fwd_streams == NULL)
1691 			rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
1692 				 " (struct fwd_stream *)) failed\n",
1693 				 nb_fwd_streams);
1694 
1695 		for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
1696 			fwd_streams[sm_id] = rte_zmalloc("testpmd:"
1697 				" struct fwd_stream", sizeof(struct fwd_stream),
1698 				RTE_CACHE_LINE_SIZE);
1699 			if (fwd_streams[sm_id] == NULL)
1700 				rte_exit(EXIT_FAILURE, "rte_zmalloc"
1701 					 "(struct fwd_stream) failed\n");
1702 		}
1703 	}
1704 
1705 	return 0;
1706 }
1707 
1708 static void
1709 pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
1710 {
1711 	uint64_t total_burst, sburst;
1712 	uint64_t nb_burst;
1713 	uint64_t burst_stats[4];
1714 	uint16_t pktnb_stats[4];
1715 	uint16_t nb_pkt;
1716 	int burst_percent[4], sburstp;
1717 	int i;
1718 
1719 	/*
1720 	 * First compute the total number of packet bursts and the
1721 	 * two highest numbers of bursts of the same number of packets.
1722 	 */
1723 	memset(&burst_stats, 0x0, sizeof(burst_stats));
1724 	memset(&pktnb_stats, 0x0, sizeof(pktnb_stats));
1725 
1726 	/* Show stats for 0 burst size always */
1727 	total_burst = pbs->pkt_burst_spread[0];
1728 	burst_stats[0] = pbs->pkt_burst_spread[0];
1729 	pktnb_stats[0] = 0;
1730 
1731 	/* Find the next 2 burst sizes with highest occurrences. */
1732 	for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) {
1733 		nb_burst = pbs->pkt_burst_spread[nb_pkt];
1734 
1735 		if (nb_burst == 0)
1736 			continue;
1737 
1738 		total_burst += nb_burst;
1739 
1740 		if (nb_burst > burst_stats[1]) {
1741 			burst_stats[2] = burst_stats[1];
1742 			pktnb_stats[2] = pktnb_stats[1];
1743 			burst_stats[1] = nb_burst;
1744 			pktnb_stats[1] = nb_pkt;
1745 		} else if (nb_burst > burst_stats[2]) {
1746 			burst_stats[2] = nb_burst;
1747 			pktnb_stats[2] = nb_pkt;
1748 		}
1749 	}
1750 	if (total_burst == 0)
1751 		return;
1752 
1753 	printf("  %s-bursts : %"PRIu64" [", rx_tx, total_burst);
1754 	for (i = 0, sburst = 0, sburstp = 0; i < 4; i++) {
1755 		if (i == 3) {
1756 			printf("%d%% of other]\n", 100 - sburstp);
1757 			return;
1758 		}
1759 
1760 		sburst += burst_stats[i];
1761 		if (sburst == total_burst) {
1762 			printf("%d%% of %d pkts]\n",
1763 				100 - sburstp, (int) pktnb_stats[i]);
1764 			return;
1765 		}
1766 
1767 		burst_percent[i] =
1768 			(double)burst_stats[i] / total_burst * 100;
1769 		printf("%d%% of %d pkts + ",
1770 			burst_percent[i], (int) pktnb_stats[i]);
1771 		sburstp += burst_percent[i];
1772 	}
1773 }
1774 
1775 static void
1776 fwd_stream_stats_display(streamid_t stream_id)
1777 {
1778 	struct fwd_stream *fs;
1779 	static const char *fwd_top_stats_border = "-------";
1780 
1781 	fs = fwd_streams[stream_id];
1782 	if ((fs->rx_packets == 0) && (fs->tx_packets == 0) &&
1783 	    (fs->fwd_dropped == 0))
1784 		return;
1785 	printf("\n  %s Forward Stats for RX Port=%2d/Queue=%2d -> "
1786 	       "TX Port=%2d/Queue=%2d %s\n",
1787 	       fwd_top_stats_border, fs->rx_port, fs->rx_queue,
1788 	       fs->tx_port, fs->tx_queue, fwd_top_stats_border);
1789 	printf("  RX-packets: %-14"PRIu64" TX-packets: %-14"PRIu64
1790 	       " TX-dropped: %-14"PRIu64,
1791 	       fs->rx_packets, fs->tx_packets, fs->fwd_dropped);
1792 
1793 	/* if checksum mode */
1794 	if (cur_fwd_eng == &csum_fwd_engine) {
1795 		printf("  RX- bad IP checksum: %-14"PRIu64
1796 		       "  Rx- bad L4 checksum: %-14"PRIu64
1797 		       " Rx- bad outer L4 checksum: %-14"PRIu64"\n",
1798 			fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
1799 			fs->rx_bad_outer_l4_csum);
1800 		printf(" RX- bad outer IP checksum: %-14"PRIu64"\n",
1801 			fs->rx_bad_outer_ip_csum);
1802 	} else {
1803 		printf("\n");
1804 	}
1805 
1806 	if (record_burst_stats) {
1807 		pkt_burst_stats_display("RX", &fs->rx_burst_stats);
1808 		pkt_burst_stats_display("TX", &fs->tx_burst_stats);
1809 	}
1810 }
1811 
1812 void
1813 fwd_stats_display(void)
1814 {
1815 	static const char *fwd_stats_border = "----------------------";
1816 	static const char *acc_stats_border = "+++++++++++++++";
1817 	struct {
1818 		struct fwd_stream *rx_stream;
1819 		struct fwd_stream *tx_stream;
1820 		uint64_t tx_dropped;
1821 		uint64_t rx_bad_ip_csum;
1822 		uint64_t rx_bad_l4_csum;
1823 		uint64_t rx_bad_outer_l4_csum;
1824 		uint64_t rx_bad_outer_ip_csum;
1825 	} ports_stats[RTE_MAX_ETHPORTS];
1826 	uint64_t total_rx_dropped = 0;
1827 	uint64_t total_tx_dropped = 0;
1828 	uint64_t total_rx_nombuf = 0;
1829 	struct rte_eth_stats stats;
1830 	uint64_t fwd_cycles = 0;
1831 	uint64_t total_recv = 0;
1832 	uint64_t total_xmit = 0;
1833 	struct rte_port *port;
1834 	streamid_t sm_id;
1835 	portid_t pt_id;
1836 	int i;
1837 
1838 	memset(ports_stats, 0, sizeof(ports_stats));
1839 
1840 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1841 		struct fwd_stream *fs = fwd_streams[sm_id];
1842 
1843 		if (cur_fwd_config.nb_fwd_streams >
1844 		    cur_fwd_config.nb_fwd_ports) {
1845 			fwd_stream_stats_display(sm_id);
1846 		} else {
1847 			ports_stats[fs->tx_port].tx_stream = fs;
1848 			ports_stats[fs->rx_port].rx_stream = fs;
1849 		}
1850 
1851 		ports_stats[fs->tx_port].tx_dropped += fs->fwd_dropped;
1852 
1853 		ports_stats[fs->rx_port].rx_bad_ip_csum += fs->rx_bad_ip_csum;
1854 		ports_stats[fs->rx_port].rx_bad_l4_csum += fs->rx_bad_l4_csum;
1855 		ports_stats[fs->rx_port].rx_bad_outer_l4_csum +=
1856 				fs->rx_bad_outer_l4_csum;
1857 		ports_stats[fs->rx_port].rx_bad_outer_ip_csum +=
1858 				fs->rx_bad_outer_ip_csum;
1859 
1860 		if (record_core_cycles)
1861 			fwd_cycles += fs->core_cycles;
1862 	}
1863 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1864 		pt_id = fwd_ports_ids[i];
1865 		port = &ports[pt_id];
1866 
1867 		rte_eth_stats_get(pt_id, &stats);
1868 		stats.ipackets -= port->stats.ipackets;
1869 		stats.opackets -= port->stats.opackets;
1870 		stats.ibytes -= port->stats.ibytes;
1871 		stats.obytes -= port->stats.obytes;
1872 		stats.imissed -= port->stats.imissed;
1873 		stats.oerrors -= port->stats.oerrors;
1874 		stats.rx_nombuf -= port->stats.rx_nombuf;
1875 
1876 		total_recv += stats.ipackets;
1877 		total_xmit += stats.opackets;
1878 		total_rx_dropped += stats.imissed;
1879 		total_tx_dropped += ports_stats[pt_id].tx_dropped;
1880 		total_tx_dropped += stats.oerrors;
1881 		total_rx_nombuf  += stats.rx_nombuf;
1882 
1883 		printf("\n  %s Forward statistics for port %-2d %s\n",
1884 		       fwd_stats_border, pt_id, fwd_stats_border);
1885 
1886 		printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64
1887 		       "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed,
1888 		       stats.ipackets + stats.imissed);
1889 
1890 		if (cur_fwd_eng == &csum_fwd_engine) {
1891 			printf("  Bad-ipcsum: %-14"PRIu64
1892 			       " Bad-l4csum: %-14"PRIu64
1893 			       "Bad-outer-l4csum: %-14"PRIu64"\n",
1894 			       ports_stats[pt_id].rx_bad_ip_csum,
1895 			       ports_stats[pt_id].rx_bad_l4_csum,
1896 			       ports_stats[pt_id].rx_bad_outer_l4_csum);
1897 			printf("  Bad-outer-ipcsum: %-14"PRIu64"\n",
1898 			       ports_stats[pt_id].rx_bad_outer_ip_csum);
1899 		}
1900 		if (stats.ierrors + stats.rx_nombuf > 0) {
1901 			printf("  RX-error: %-"PRIu64"\n", stats.ierrors);
1902 			printf("  RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf);
1903 		}
1904 
1905 		printf("  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64
1906 		       "TX-total: %-"PRIu64"\n",
1907 		       stats.opackets, ports_stats[pt_id].tx_dropped,
1908 		       stats.opackets + ports_stats[pt_id].tx_dropped);
1909 
1910 		if (record_burst_stats) {
1911 			if (ports_stats[pt_id].rx_stream)
1912 				pkt_burst_stats_display("RX",
1913 					&ports_stats[pt_id].rx_stream->rx_burst_stats);
1914 			if (ports_stats[pt_id].tx_stream)
1915 				pkt_burst_stats_display("TX",
1916 				&ports_stats[pt_id].tx_stream->tx_burst_stats);
1917 		}
1918 
1919 		printf("  %s--------------------------------%s\n",
1920 		       fwd_stats_border, fwd_stats_border);
1921 	}
1922 
1923 	printf("\n  %s Accumulated forward statistics for all ports"
1924 	       "%s\n",
1925 	       acc_stats_border, acc_stats_border);
1926 	printf("  RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64"RX-total: "
1927 	       "%-"PRIu64"\n"
1928 	       "  TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64"TX-total: "
1929 	       "%-"PRIu64"\n",
1930 	       total_recv, total_rx_dropped, total_recv + total_rx_dropped,
1931 	       total_xmit, total_tx_dropped, total_xmit + total_tx_dropped);
1932 	if (total_rx_nombuf > 0)
1933 		printf("  RX-nombufs: %-14"PRIu64"\n", total_rx_nombuf);
1934 	printf("  %s++++++++++++++++++++++++++++++++++++++++++++++"
1935 	       "%s\n",
1936 	       acc_stats_border, acc_stats_border);
1937 	if (record_core_cycles) {
1938 #define CYC_PER_MHZ 1E6
1939 		if (total_recv > 0 || total_xmit > 0) {
1940 			uint64_t total_pkts = 0;
1941 			if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 ||
1942 			    strcmp(cur_fwd_eng->fwd_mode_name, "flowgen") == 0)
1943 				total_pkts = total_xmit;
1944 			else
1945 				total_pkts = total_recv;
1946 
1947 			printf("\n  CPU cycles/packet=%.2F (total cycles="
1948 			       "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64
1949 			       " MHz Clock\n",
1950 			       (double) fwd_cycles / total_pkts,
1951 			       fwd_cycles, cur_fwd_eng->fwd_mode_name, total_pkts,
1952 			       (uint64_t)(rte_get_tsc_hz() / CYC_PER_MHZ));
1953 		}
1954 	}
1955 }
1956 
1957 void
1958 fwd_stats_reset(void)
1959 {
1960 	streamid_t sm_id;
1961 	portid_t pt_id;
1962 	int i;
1963 
1964 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
1965 		pt_id = fwd_ports_ids[i];
1966 		rte_eth_stats_get(pt_id, &ports[pt_id].stats);
1967 	}
1968 	for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
1969 		struct fwd_stream *fs = fwd_streams[sm_id];
1970 
1971 		fs->rx_packets = 0;
1972 		fs->tx_packets = 0;
1973 		fs->fwd_dropped = 0;
1974 		fs->rx_bad_ip_csum = 0;
1975 		fs->rx_bad_l4_csum = 0;
1976 		fs->rx_bad_outer_l4_csum = 0;
1977 		fs->rx_bad_outer_ip_csum = 0;
1978 
1979 		memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
1980 		memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
1981 		fs->core_cycles = 0;
1982 	}
1983 }
1984 
1985 static void
1986 flush_fwd_rx_queues(void)
1987 {
1988 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1989 	portid_t  rxp;
1990 	portid_t port_id;
1991 	queueid_t rxq;
1992 	uint16_t  nb_rx;
1993 	uint16_t  i;
1994 	uint8_t   j;
1995 	uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
1996 	uint64_t timer_period;
1997 
1998 	/* convert to number of cycles */
1999 	timer_period = rte_get_timer_hz(); /* 1 second timeout */
2000 
2001 	for (j = 0; j < 2; j++) {
2002 		for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) {
2003 			for (rxq = 0; rxq < nb_rxq; rxq++) {
2004 				port_id = fwd_ports_ids[rxp];
2005 				/**
2006 				* testpmd can stuck in the below do while loop
2007 				* if rte_eth_rx_burst() always returns nonzero
2008 				* packets. So timer is added to exit this loop
2009 				* after 1sec timer expiry.
2010 				*/
2011 				prev_tsc = rte_rdtsc();
2012 				do {
2013 					nb_rx = rte_eth_rx_burst(port_id, rxq,
2014 						pkts_burst, MAX_PKT_BURST);
2015 					for (i = 0; i < nb_rx; i++)
2016 						rte_pktmbuf_free(pkts_burst[i]);
2017 
2018 					cur_tsc = rte_rdtsc();
2019 					diff_tsc = cur_tsc - prev_tsc;
2020 					timer_tsc += diff_tsc;
2021 				} while ((nb_rx > 0) &&
2022 					(timer_tsc < timer_period));
2023 				timer_tsc = 0;
2024 			}
2025 		}
2026 		rte_delay_ms(10); /* wait 10 milli-seconds before retrying */
2027 	}
2028 }
2029 
2030 static void
2031 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
2032 {
2033 	struct fwd_stream **fsm;
2034 	streamid_t nb_fs;
2035 	streamid_t sm_id;
2036 #ifdef RTE_LIB_BITRATESTATS
2037 	uint64_t tics_per_1sec;
2038 	uint64_t tics_datum;
2039 	uint64_t tics_current;
2040 	uint16_t i, cnt_ports;
2041 
2042 	cnt_ports = nb_ports;
2043 	tics_datum = rte_rdtsc();
2044 	tics_per_1sec = rte_get_timer_hz();
2045 #endif
2046 	fsm = &fwd_streams[fc->stream_idx];
2047 	nb_fs = fc->stream_nb;
2048 	do {
2049 		for (sm_id = 0; sm_id < nb_fs; sm_id++)
2050 			(*pkt_fwd)(fsm[sm_id]);
2051 #ifdef RTE_LIB_BITRATESTATS
2052 		if (bitrate_enabled != 0 &&
2053 				bitrate_lcore_id == rte_lcore_id()) {
2054 			tics_current = rte_rdtsc();
2055 			if (tics_current - tics_datum >= tics_per_1sec) {
2056 				/* Periodic bitrate calculation */
2057 				for (i = 0; i < cnt_ports; i++)
2058 					rte_stats_bitrate_calc(bitrate_data,
2059 						ports_ids[i]);
2060 				tics_datum = tics_current;
2061 			}
2062 		}
2063 #endif
2064 #ifdef RTE_LIB_LATENCYSTATS
2065 		if (latencystats_enabled != 0 &&
2066 				latencystats_lcore_id == rte_lcore_id())
2067 			rte_latencystats_update();
2068 #endif
2069 
2070 	} while (! fc->stopped);
2071 }
2072 
2073 static int
2074 start_pkt_forward_on_core(void *fwd_arg)
2075 {
2076 	run_pkt_fwd_on_lcore((struct fwd_lcore *) fwd_arg,
2077 			     cur_fwd_config.fwd_eng->packet_fwd);
2078 	return 0;
2079 }
2080 
2081 /*
2082  * Run the TXONLY packet forwarding engine to send a single burst of packets.
2083  * Used to start communication flows in network loopback test configurations.
2084  */
2085 static int
2086 run_one_txonly_burst_on_core(void *fwd_arg)
2087 {
2088 	struct fwd_lcore *fwd_lc;
2089 	struct fwd_lcore tmp_lcore;
2090 
2091 	fwd_lc = (struct fwd_lcore *) fwd_arg;
2092 	tmp_lcore = *fwd_lc;
2093 	tmp_lcore.stopped = 1;
2094 	run_pkt_fwd_on_lcore(&tmp_lcore, tx_only_engine.packet_fwd);
2095 	return 0;
2096 }
2097 
2098 /*
2099  * Launch packet forwarding:
2100  *     - Setup per-port forwarding context.
2101  *     - launch logical cores with their forwarding configuration.
2102  */
2103 static void
2104 launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
2105 {
2106 	port_fwd_begin_t port_fwd_begin;
2107 	unsigned int i;
2108 	unsigned int lc_id;
2109 	int diag;
2110 
2111 	port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin;
2112 	if (port_fwd_begin != NULL) {
2113 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2114 			(*port_fwd_begin)(fwd_ports_ids[i]);
2115 	}
2116 	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
2117 		lc_id = fwd_lcores_cpuids[i];
2118 		if ((interactive == 0) || (lc_id != rte_lcore_id())) {
2119 			fwd_lcores[i]->stopped = 0;
2120 			diag = rte_eal_remote_launch(pkt_fwd_on_lcore,
2121 						     fwd_lcores[i], lc_id);
2122 			if (diag != 0)
2123 				printf("launch lcore %u failed - diag=%d\n",
2124 				       lc_id, diag);
2125 		}
2126 	}
2127 }
2128 
2129 /*
2130  * Launch packet forwarding configuration.
2131  */
2132 void
2133 start_packet_forwarding(int with_tx_first)
2134 {
2135 	port_fwd_begin_t port_fwd_begin;
2136 	port_fwd_end_t  port_fwd_end;
2137 	unsigned int i;
2138 
2139 	if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq)
2140 		rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n");
2141 
2142 	if (strcmp(cur_fwd_eng->fwd_mode_name, "txonly") == 0 && !nb_txq)
2143 		rte_exit(EXIT_FAILURE, "txq are 0, cannot use txonly fwd mode\n");
2144 
2145 	if ((strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") != 0 &&
2146 		strcmp(cur_fwd_eng->fwd_mode_name, "txonly") != 0) &&
2147 		(!nb_rxq || !nb_txq))
2148 		rte_exit(EXIT_FAILURE,
2149 			"Either rxq or txq are 0, cannot use %s fwd mode\n",
2150 			cur_fwd_eng->fwd_mode_name);
2151 
2152 	if (all_ports_started() == 0) {
2153 		printf("Not all ports were started\n");
2154 		return;
2155 	}
2156 	if (test_done == 0) {
2157 		printf("Packet forwarding already started\n");
2158 		return;
2159 	}
2160 	test_done = 0;
2161 
2162 	fwd_config_setup();
2163 
2164 	if(!no_flush_rx)
2165 		flush_fwd_rx_queues();
2166 
2167 	pkt_fwd_config_display(&cur_fwd_config);
2168 	rxtx_config_display();
2169 
2170 	fwd_stats_reset();
2171 	if (with_tx_first) {
2172 		port_fwd_begin = tx_only_engine.port_fwd_begin;
2173 		if (port_fwd_begin != NULL) {
2174 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2175 				(*port_fwd_begin)(fwd_ports_ids[i]);
2176 		}
2177 		while (with_tx_first--) {
2178 			launch_packet_forwarding(
2179 					run_one_txonly_burst_on_core);
2180 			rte_eal_mp_wait_lcore();
2181 		}
2182 		port_fwd_end = tx_only_engine.port_fwd_end;
2183 		if (port_fwd_end != NULL) {
2184 			for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
2185 				(*port_fwd_end)(fwd_ports_ids[i]);
2186 		}
2187 	}
2188 	launch_packet_forwarding(start_pkt_forward_on_core);
2189 }
2190 
2191 void
2192 stop_packet_forwarding(void)
2193 {
2194 	port_fwd_end_t port_fwd_end;
2195 	lcoreid_t lc_id;
2196 	portid_t pt_id;
2197 	int i;
2198 
2199 	if (test_done) {
2200 		printf("Packet forwarding not started\n");
2201 		return;
2202 	}
2203 	printf("Telling cores to stop...");
2204 	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_lcores; lc_id++)
2205 		fwd_lcores[lc_id]->stopped = 1;
2206 	printf("\nWaiting for lcores to finish...\n");
2207 	rte_eal_mp_wait_lcore();
2208 	port_fwd_end = cur_fwd_config.fwd_eng->port_fwd_end;
2209 	if (port_fwd_end != NULL) {
2210 		for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
2211 			pt_id = fwd_ports_ids[i];
2212 			(*port_fwd_end)(pt_id);
2213 		}
2214 	}
2215 
2216 	fwd_stats_display();
2217 
2218 	printf("\nDone.\n");
2219 	test_done = 1;
2220 }
2221 
2222 void
2223 dev_set_link_up(portid_t pid)
2224 {
2225 	if (rte_eth_dev_set_link_up(pid) < 0)
2226 		printf("\nSet link up fail.\n");
2227 }
2228 
2229 void
2230 dev_set_link_down(portid_t pid)
2231 {
2232 	if (rte_eth_dev_set_link_down(pid) < 0)
2233 		printf("\nSet link down fail.\n");
2234 }
2235 
2236 static int
2237 all_ports_started(void)
2238 {
2239 	portid_t pi;
2240 	struct rte_port *port;
2241 
2242 	RTE_ETH_FOREACH_DEV(pi) {
2243 		port = &ports[pi];
2244 		/* Check if there is a port which is not started */
2245 		if ((port->port_status != RTE_PORT_STARTED) &&
2246 			(port->slave_flag == 0))
2247 			return 0;
2248 	}
2249 
2250 	/* No port is not started */
2251 	return 1;
2252 }
2253 
2254 int
2255 port_is_stopped(portid_t port_id)
2256 {
2257 	struct rte_port *port = &ports[port_id];
2258 
2259 	if ((port->port_status != RTE_PORT_STOPPED) &&
2260 	    (port->slave_flag == 0))
2261 		return 0;
2262 	return 1;
2263 }
2264 
2265 int
2266 all_ports_stopped(void)
2267 {
2268 	portid_t pi;
2269 
2270 	RTE_ETH_FOREACH_DEV(pi) {
2271 		if (!port_is_stopped(pi))
2272 			return 0;
2273 	}
2274 
2275 	return 1;
2276 }
2277 
2278 int
2279 port_is_started(portid_t port_id)
2280 {
2281 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2282 		return 0;
2283 
2284 	if (ports[port_id].port_status != RTE_PORT_STARTED)
2285 		return 0;
2286 
2287 	return 1;
2288 }
2289 
2290 /* Configure the Rx and Tx hairpin queues for the selected port. */
2291 static int
2292 setup_hairpin_queues(portid_t pi, portid_t p_pi, uint16_t cnt_pi)
2293 {
2294 	queueid_t qi;
2295 	struct rte_eth_hairpin_conf hairpin_conf = {
2296 		.peer_count = 1,
2297 	};
2298 	int i;
2299 	int diag;
2300 	struct rte_port *port = &ports[pi];
2301 	uint16_t peer_rx_port = pi;
2302 	uint16_t peer_tx_port = pi;
2303 	uint32_t manual = 1;
2304 	uint32_t tx_exp = hairpin_mode & 0x10;
2305 
2306 	if (!(hairpin_mode & 0xf)) {
2307 		peer_rx_port = pi;
2308 		peer_tx_port = pi;
2309 		manual = 0;
2310 	} else if (hairpin_mode & 0x1) {
2311 		peer_tx_port = rte_eth_find_next_owned_by(pi + 1,
2312 						       RTE_ETH_DEV_NO_OWNER);
2313 		if (peer_tx_port >= RTE_MAX_ETHPORTS)
2314 			peer_tx_port = rte_eth_find_next_owned_by(0,
2315 						RTE_ETH_DEV_NO_OWNER);
2316 		if (p_pi != RTE_MAX_ETHPORTS) {
2317 			peer_rx_port = p_pi;
2318 		} else {
2319 			uint16_t next_pi;
2320 
2321 			/* Last port will be the peer RX port of the first. */
2322 			RTE_ETH_FOREACH_DEV(next_pi)
2323 				peer_rx_port = next_pi;
2324 		}
2325 		manual = 1;
2326 	} else if (hairpin_mode & 0x2) {
2327 		if (cnt_pi & 0x1) {
2328 			peer_rx_port = p_pi;
2329 		} else {
2330 			peer_rx_port = rte_eth_find_next_owned_by(pi + 1,
2331 						RTE_ETH_DEV_NO_OWNER);
2332 			if (peer_rx_port >= RTE_MAX_ETHPORTS)
2333 				peer_rx_port = pi;
2334 		}
2335 		peer_tx_port = peer_rx_port;
2336 		manual = 1;
2337 	}
2338 
2339 	for (qi = nb_txq, i = 0; qi < nb_hairpinq + nb_txq; qi++) {
2340 		hairpin_conf.peers[0].port = peer_rx_port;
2341 		hairpin_conf.peers[0].queue = i + nb_rxq;
2342 		hairpin_conf.manual_bind = !!manual;
2343 		hairpin_conf.tx_explicit = !!tx_exp;
2344 		diag = rte_eth_tx_hairpin_queue_setup
2345 			(pi, qi, nb_txd, &hairpin_conf);
2346 		i++;
2347 		if (diag == 0)
2348 			continue;
2349 
2350 		/* Fail to setup rx queue, return */
2351 		if (rte_atomic16_cmpset(&(port->port_status),
2352 					RTE_PORT_HANDLING,
2353 					RTE_PORT_STOPPED) == 0)
2354 			printf("Port %d can not be set back "
2355 					"to stopped\n", pi);
2356 		printf("Fail to configure port %d hairpin "
2357 				"queues\n", pi);
2358 		/* try to reconfigure queues next time */
2359 		port->need_reconfig_queues = 1;
2360 		return -1;
2361 	}
2362 	for (qi = nb_rxq, i = 0; qi < nb_hairpinq + nb_rxq; qi++) {
2363 		hairpin_conf.peers[0].port = peer_tx_port;
2364 		hairpin_conf.peers[0].queue = i + nb_txq;
2365 		hairpin_conf.manual_bind = !!manual;
2366 		hairpin_conf.tx_explicit = !!tx_exp;
2367 		diag = rte_eth_rx_hairpin_queue_setup
2368 			(pi, qi, nb_rxd, &hairpin_conf);
2369 		i++;
2370 		if (diag == 0)
2371 			continue;
2372 
2373 		/* Fail to setup rx queue, return */
2374 		if (rte_atomic16_cmpset(&(port->port_status),
2375 					RTE_PORT_HANDLING,
2376 					RTE_PORT_STOPPED) == 0)
2377 			printf("Port %d can not be set back "
2378 					"to stopped\n", pi);
2379 		printf("Fail to configure port %d hairpin "
2380 				"queues\n", pi);
2381 		/* try to reconfigure queues next time */
2382 		port->need_reconfig_queues = 1;
2383 		return -1;
2384 	}
2385 	return 0;
2386 }
2387 
2388 /* Configure the Rx with optional split. */
2389 int
2390 rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
2391 	       uint16_t nb_rx_desc, unsigned int socket_id,
2392 	       struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp)
2393 {
2394 	union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {};
2395 	unsigned int i, mp_n;
2396 	int ret;
2397 
2398 	if (rx_pkt_nb_segs <= 1 ||
2399 	    (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) {
2400 		rx_conf->rx_seg = NULL;
2401 		rx_conf->rx_nseg = 0;
2402 		ret = rte_eth_rx_queue_setup(port_id, rx_queue_id,
2403 					     nb_rx_desc, socket_id,
2404 					     rx_conf, mp);
2405 		return ret;
2406 	}
2407 	for (i = 0; i < rx_pkt_nb_segs; i++) {
2408 		struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split;
2409 		struct rte_mempool *mpx;
2410 		/*
2411 		 * Use last valid pool for the segments with number
2412 		 * exceeding the pool index.
2413 		 */
2414 		mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i;
2415 		mpx = mbuf_pool_find(socket_id, mp_n);
2416 		/* Handle zero as mbuf data buffer size. */
2417 		rx_seg->length = rx_pkt_seg_lengths[i] ?
2418 				   rx_pkt_seg_lengths[i] :
2419 				   mbuf_data_size[mp_n];
2420 		rx_seg->offset = i < rx_pkt_nb_offs ?
2421 				   rx_pkt_seg_offsets[i] : 0;
2422 		rx_seg->mp = mpx ? mpx : mp;
2423 	}
2424 	rx_conf->rx_nseg = rx_pkt_nb_segs;
2425 	rx_conf->rx_seg = rx_useg;
2426 	ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, nb_rx_desc,
2427 				    socket_id, rx_conf, NULL);
2428 	rx_conf->rx_seg = NULL;
2429 	rx_conf->rx_nseg = 0;
2430 	return ret;
2431 }
2432 
2433 int
2434 start_port(portid_t pid)
2435 {
2436 	int diag, need_check_link_status = -1;
2437 	portid_t pi;
2438 	portid_t p_pi = RTE_MAX_ETHPORTS;
2439 	portid_t pl[RTE_MAX_ETHPORTS];
2440 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2441 	uint16_t cnt_pi = 0;
2442 	uint16_t cfg_pi = 0;
2443 	int peer_pi;
2444 	queueid_t qi;
2445 	struct rte_port *port;
2446 	struct rte_ether_addr mac_addr;
2447 	struct rte_eth_hairpin_cap cap;
2448 
2449 	if (port_id_is_invalid(pid, ENABLED_WARN))
2450 		return 0;
2451 
2452 	RTE_ETH_FOREACH_DEV(pi) {
2453 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2454 			continue;
2455 
2456 		need_check_link_status = 0;
2457 		port = &ports[pi];
2458 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED,
2459 						 RTE_PORT_HANDLING) == 0) {
2460 			printf("Port %d is now not stopped\n", pi);
2461 			continue;
2462 		}
2463 
2464 		if (port->need_reconfig > 0) {
2465 			port->need_reconfig = 0;
2466 
2467 			if (flow_isolate_all) {
2468 				int ret = port_flow_isolate(pi, 1);
2469 				if (ret) {
2470 					printf("Failed to apply isolated"
2471 					       " mode on port %d\n", pi);
2472 					return -1;
2473 				}
2474 			}
2475 			configure_rxtx_dump_callbacks(0);
2476 			printf("Configuring Port %d (socket %u)\n", pi,
2477 					port->socket_id);
2478 			if (nb_hairpinq > 0 &&
2479 			    rte_eth_dev_hairpin_capability_get(pi, &cap)) {
2480 				printf("Port %d doesn't support hairpin "
2481 				       "queues\n", pi);
2482 				return -1;
2483 			}
2484 			/* configure port */
2485 			diag = rte_eth_dev_configure(pi, nb_rxq + nb_hairpinq,
2486 						     nb_txq + nb_hairpinq,
2487 						     &(port->dev_conf));
2488 			if (diag != 0) {
2489 				if (rte_atomic16_cmpset(&(port->port_status),
2490 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2491 					printf("Port %d can not be set back "
2492 							"to stopped\n", pi);
2493 				printf("Fail to configure port %d\n", pi);
2494 				/* try to reconfigure port next time */
2495 				port->need_reconfig = 1;
2496 				return -1;
2497 			}
2498 		}
2499 		if (port->need_reconfig_queues > 0) {
2500 			port->need_reconfig_queues = 0;
2501 			/* setup tx queues */
2502 			for (qi = 0; qi < nb_txq; qi++) {
2503 				if ((numa_support) &&
2504 					(txring_numa[pi] != NUMA_NO_CONFIG))
2505 					diag = rte_eth_tx_queue_setup(pi, qi,
2506 						port->nb_tx_desc[qi],
2507 						txring_numa[pi],
2508 						&(port->tx_conf[qi]));
2509 				else
2510 					diag = rte_eth_tx_queue_setup(pi, qi,
2511 						port->nb_tx_desc[qi],
2512 						port->socket_id,
2513 						&(port->tx_conf[qi]));
2514 
2515 				if (diag == 0)
2516 					continue;
2517 
2518 				/* Fail to setup tx queue, return */
2519 				if (rte_atomic16_cmpset(&(port->port_status),
2520 							RTE_PORT_HANDLING,
2521 							RTE_PORT_STOPPED) == 0)
2522 					printf("Port %d can not be set back "
2523 							"to stopped\n", pi);
2524 				printf("Fail to configure port %d tx queues\n",
2525 				       pi);
2526 				/* try to reconfigure queues next time */
2527 				port->need_reconfig_queues = 1;
2528 				return -1;
2529 			}
2530 			for (qi = 0; qi < nb_rxq; qi++) {
2531 				/* setup rx queues */
2532 				if ((numa_support) &&
2533 					(rxring_numa[pi] != NUMA_NO_CONFIG)) {
2534 					struct rte_mempool * mp =
2535 						mbuf_pool_find
2536 							(rxring_numa[pi], 0);
2537 					if (mp == NULL) {
2538 						printf("Failed to setup RX queue:"
2539 							"No mempool allocation"
2540 							" on the socket %d\n",
2541 							rxring_numa[pi]);
2542 						return -1;
2543 					}
2544 
2545 					diag = rx_queue_setup(pi, qi,
2546 					     port->nb_rx_desc[qi],
2547 					     rxring_numa[pi],
2548 					     &(port->rx_conf[qi]),
2549 					     mp);
2550 				} else {
2551 					struct rte_mempool *mp =
2552 						mbuf_pool_find
2553 							(port->socket_id, 0);
2554 					if (mp == NULL) {
2555 						printf("Failed to setup RX queue:"
2556 							"No mempool allocation"
2557 							" on the socket %d\n",
2558 							port->socket_id);
2559 						return -1;
2560 					}
2561 					diag = rx_queue_setup(pi, qi,
2562 					     port->nb_rx_desc[qi],
2563 					     port->socket_id,
2564 					     &(port->rx_conf[qi]),
2565 					     mp);
2566 				}
2567 				if (diag == 0)
2568 					continue;
2569 
2570 				/* Fail to setup rx queue, return */
2571 				if (rte_atomic16_cmpset(&(port->port_status),
2572 							RTE_PORT_HANDLING,
2573 							RTE_PORT_STOPPED) == 0)
2574 					printf("Port %d can not be set back "
2575 							"to stopped\n", pi);
2576 				printf("Fail to configure port %d rx queues\n",
2577 				       pi);
2578 				/* try to reconfigure queues next time */
2579 				port->need_reconfig_queues = 1;
2580 				return -1;
2581 			}
2582 			/* setup hairpin queues */
2583 			if (setup_hairpin_queues(pi, p_pi, cnt_pi) != 0)
2584 				return -1;
2585 		}
2586 		configure_rxtx_dump_callbacks(verbose_level);
2587 		if (clear_ptypes) {
2588 			diag = rte_eth_dev_set_ptypes(pi, RTE_PTYPE_UNKNOWN,
2589 					NULL, 0);
2590 			if (diag < 0)
2591 				printf(
2592 				"Port %d: Failed to disable Ptype parsing\n",
2593 				pi);
2594 		}
2595 
2596 		p_pi = pi;
2597 		cnt_pi++;
2598 
2599 		/* start port */
2600 		diag = rte_eth_dev_start(pi);
2601 		if (diag < 0) {
2602 			printf("Fail to start port %d: %s\n", pi,
2603 			       rte_strerror(-diag));
2604 
2605 			/* Fail to setup rx queue, return */
2606 			if (rte_atomic16_cmpset(&(port->port_status),
2607 				RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2608 				printf("Port %d can not be set back to "
2609 							"stopped\n", pi);
2610 			continue;
2611 		}
2612 
2613 		if (rte_atomic16_cmpset(&(port->port_status),
2614 			RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0)
2615 			printf("Port %d can not be set into started\n", pi);
2616 
2617 		if (eth_macaddr_get_print_err(pi, &mac_addr) == 0)
2618 			printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi,
2619 				mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
2620 				mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
2621 				mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]);
2622 
2623 		/* at least one port started, need checking link status */
2624 		need_check_link_status = 1;
2625 
2626 		pl[cfg_pi++] = pi;
2627 	}
2628 
2629 	if (need_check_link_status == 1 && !no_link_check)
2630 		check_all_ports_link_status(RTE_PORT_ALL);
2631 	else if (need_check_link_status == 0)
2632 		printf("Please stop the ports first\n");
2633 
2634 	if (hairpin_mode & 0xf) {
2635 		uint16_t i;
2636 		int j;
2637 
2638 		/* bind all started hairpin ports */
2639 		for (i = 0; i < cfg_pi; i++) {
2640 			pi = pl[i];
2641 			/* bind current Tx to all peer Rx */
2642 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2643 							RTE_MAX_ETHPORTS, 1);
2644 			if (peer_pi < 0)
2645 				return peer_pi;
2646 			for (j = 0; j < peer_pi; j++) {
2647 				if (!port_is_started(peer_pl[j]))
2648 					continue;
2649 				diag = rte_eth_hairpin_bind(pi, peer_pl[j]);
2650 				if (diag < 0) {
2651 					printf("Error during binding hairpin"
2652 					       " Tx port %u to %u: %s\n",
2653 					       pi, peer_pl[j],
2654 					       rte_strerror(-diag));
2655 					return -1;
2656 				}
2657 			}
2658 			/* bind all peer Tx to current Rx */
2659 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2660 							RTE_MAX_ETHPORTS, 0);
2661 			if (peer_pi < 0)
2662 				return peer_pi;
2663 			for (j = 0; j < peer_pi; j++) {
2664 				if (!port_is_started(peer_pl[j]))
2665 					continue;
2666 				diag = rte_eth_hairpin_bind(peer_pl[j], pi);
2667 				if (diag < 0) {
2668 					printf("Error during binding hairpin"
2669 					       " Tx port %u to %u: %s\n",
2670 					       peer_pl[j], pi,
2671 					       rte_strerror(-diag));
2672 					return -1;
2673 				}
2674 			}
2675 		}
2676 	}
2677 
2678 	printf("Done\n");
2679 	return 0;
2680 }
2681 
2682 void
2683 stop_port(portid_t pid)
2684 {
2685 	portid_t pi;
2686 	struct rte_port *port;
2687 	int need_check_link_status = 0;
2688 	portid_t peer_pl[RTE_MAX_ETHPORTS];
2689 	int peer_pi;
2690 
2691 	if (port_id_is_invalid(pid, ENABLED_WARN))
2692 		return;
2693 
2694 	printf("Stopping ports...\n");
2695 
2696 	RTE_ETH_FOREACH_DEV(pi) {
2697 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2698 			continue;
2699 
2700 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2701 			printf("Please remove port %d from forwarding configuration.\n", pi);
2702 			continue;
2703 		}
2704 
2705 		if (port_is_bonding_slave(pi)) {
2706 			printf("Please remove port %d from bonded device.\n", pi);
2707 			continue;
2708 		}
2709 
2710 		port = &ports[pi];
2711 		if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STARTED,
2712 						RTE_PORT_HANDLING) == 0)
2713 			continue;
2714 
2715 		if (hairpin_mode & 0xf) {
2716 			int j;
2717 
2718 			rte_eth_hairpin_unbind(pi, RTE_MAX_ETHPORTS);
2719 			/* unbind all peer Tx from current Rx */
2720 			peer_pi = rte_eth_hairpin_get_peer_ports(pi, peer_pl,
2721 							RTE_MAX_ETHPORTS, 0);
2722 			if (peer_pi < 0)
2723 				continue;
2724 			for (j = 0; j < peer_pi; j++) {
2725 				if (!port_is_started(peer_pl[j]))
2726 					continue;
2727 				rte_eth_hairpin_unbind(peer_pl[j], pi);
2728 			}
2729 		}
2730 
2731 		if (port->flow_list)
2732 			port_flow_flush(pi);
2733 
2734 		if (rte_eth_dev_stop(pi) != 0)
2735 			RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n",
2736 				pi);
2737 
2738 		if (rte_atomic16_cmpset(&(port->port_status),
2739 			RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0)
2740 			printf("Port %d can not be set into stopped\n", pi);
2741 		need_check_link_status = 1;
2742 	}
2743 	if (need_check_link_status && !no_link_check)
2744 		check_all_ports_link_status(RTE_PORT_ALL);
2745 
2746 	printf("Done\n");
2747 }
2748 
2749 static void
2750 remove_invalid_ports_in(portid_t *array, portid_t *total)
2751 {
2752 	portid_t i;
2753 	portid_t new_total = 0;
2754 
2755 	for (i = 0; i < *total; i++)
2756 		if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
2757 			array[new_total] = array[i];
2758 			new_total++;
2759 		}
2760 	*total = new_total;
2761 }
2762 
2763 static void
2764 remove_invalid_ports(void)
2765 {
2766 	remove_invalid_ports_in(ports_ids, &nb_ports);
2767 	remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
2768 	nb_cfg_ports = nb_fwd_ports;
2769 }
2770 
2771 void
2772 close_port(portid_t pid)
2773 {
2774 	portid_t pi;
2775 	struct rte_port *port;
2776 
2777 	if (port_id_is_invalid(pid, ENABLED_WARN))
2778 		return;
2779 
2780 	printf("Closing ports...\n");
2781 
2782 	RTE_ETH_FOREACH_DEV(pi) {
2783 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2784 			continue;
2785 
2786 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2787 			printf("Please remove port %d from forwarding configuration.\n", pi);
2788 			continue;
2789 		}
2790 
2791 		if (port_is_bonding_slave(pi)) {
2792 			printf("Please remove port %d from bonded device.\n", pi);
2793 			continue;
2794 		}
2795 
2796 		port = &ports[pi];
2797 		if (rte_atomic16_cmpset(&(port->port_status),
2798 			RTE_PORT_CLOSED, RTE_PORT_CLOSED) == 1) {
2799 			printf("Port %d is already closed\n", pi);
2800 			continue;
2801 		}
2802 
2803 		port_flow_flush(pi);
2804 		rte_eth_dev_close(pi);
2805 	}
2806 
2807 	remove_invalid_ports();
2808 	printf("Done\n");
2809 }
2810 
2811 void
2812 reset_port(portid_t pid)
2813 {
2814 	int diag;
2815 	portid_t pi;
2816 	struct rte_port *port;
2817 
2818 	if (port_id_is_invalid(pid, ENABLED_WARN))
2819 		return;
2820 
2821 	if ((pid == (portid_t)RTE_PORT_ALL && !all_ports_stopped()) ||
2822 		(pid != (portid_t)RTE_PORT_ALL && !port_is_stopped(pid))) {
2823 		printf("Can not reset port(s), please stop port(s) first.\n");
2824 		return;
2825 	}
2826 
2827 	printf("Resetting ports...\n");
2828 
2829 	RTE_ETH_FOREACH_DEV(pi) {
2830 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
2831 			continue;
2832 
2833 		if (port_is_forwarding(pi) != 0 && test_done == 0) {
2834 			printf("Please remove port %d from forwarding "
2835 			       "configuration.\n", pi);
2836 			continue;
2837 		}
2838 
2839 		if (port_is_bonding_slave(pi)) {
2840 			printf("Please remove port %d from bonded device.\n",
2841 			       pi);
2842 			continue;
2843 		}
2844 
2845 		diag = rte_eth_dev_reset(pi);
2846 		if (diag == 0) {
2847 			port = &ports[pi];
2848 			port->need_reconfig = 1;
2849 			port->need_reconfig_queues = 1;
2850 		} else {
2851 			printf("Failed to reset port %d. diag=%d\n", pi, diag);
2852 		}
2853 	}
2854 
2855 	printf("Done\n");
2856 }
2857 
2858 void
2859 attach_port(char *identifier)
2860 {
2861 	portid_t pi;
2862 	struct rte_dev_iterator iterator;
2863 
2864 	printf("Attaching a new port...\n");
2865 
2866 	if (identifier == NULL) {
2867 		printf("Invalid parameters are specified\n");
2868 		return;
2869 	}
2870 
2871 	if (rte_dev_probe(identifier) < 0) {
2872 		TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
2873 		return;
2874 	}
2875 
2876 	/* first attach mode: event */
2877 	if (setup_on_probe_event) {
2878 		/* new ports are detected on RTE_ETH_EVENT_NEW event */
2879 		for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
2880 			if (ports[pi].port_status == RTE_PORT_HANDLING &&
2881 					ports[pi].need_setup != 0)
2882 				setup_attached_port(pi);
2883 		return;
2884 	}
2885 
2886 	/* second attach mode: iterator */
2887 	RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
2888 		/* setup ports matching the devargs used for probing */
2889 		if (port_is_forwarding(pi))
2890 			continue; /* port was already attached before */
2891 		setup_attached_port(pi);
2892 	}
2893 }
2894 
2895 static void
2896 setup_attached_port(portid_t pi)
2897 {
2898 	unsigned int socket_id;
2899 	int ret;
2900 
2901 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
2902 	/* if socket_id is invalid, set to the first available socket. */
2903 	if (check_socket_id(socket_id) < 0)
2904 		socket_id = socket_ids[0];
2905 	reconfig(pi, socket_id);
2906 	ret = rte_eth_promiscuous_enable(pi);
2907 	if (ret != 0)
2908 		printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
2909 			pi, rte_strerror(-ret));
2910 
2911 	ports_ids[nb_ports++] = pi;
2912 	fwd_ports_ids[nb_fwd_ports++] = pi;
2913 	nb_cfg_ports = nb_fwd_ports;
2914 	ports[pi].need_setup = 0;
2915 	ports[pi].port_status = RTE_PORT_STOPPED;
2916 
2917 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
2918 	printf("Done\n");
2919 }
2920 
2921 static void
2922 detach_device(struct rte_device *dev)
2923 {
2924 	portid_t sibling;
2925 
2926 	if (dev == NULL) {
2927 		printf("Device already removed\n");
2928 		return;
2929 	}
2930 
2931 	printf("Removing a device...\n");
2932 
2933 	RTE_ETH_FOREACH_DEV_OF(sibling, dev) {
2934 		if (ports[sibling].port_status != RTE_PORT_CLOSED) {
2935 			if (ports[sibling].port_status != RTE_PORT_STOPPED) {
2936 				printf("Port %u not stopped\n", sibling);
2937 				return;
2938 			}
2939 			port_flow_flush(sibling);
2940 		}
2941 	}
2942 
2943 	if (rte_dev_remove(dev) < 0) {
2944 		TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
2945 		return;
2946 	}
2947 	remove_invalid_ports();
2948 
2949 	printf("Device is detached\n");
2950 	printf("Now total ports is %d\n", nb_ports);
2951 	printf("Done\n");
2952 	return;
2953 }
2954 
2955 void
2956 detach_port_device(portid_t port_id)
2957 {
2958 	if (port_id_is_invalid(port_id, ENABLED_WARN))
2959 		return;
2960 
2961 	if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2962 		if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2963 			printf("Port not stopped\n");
2964 			return;
2965 		}
2966 		printf("Port was not closed\n");
2967 	}
2968 
2969 	detach_device(rte_eth_devices[port_id].device);
2970 }
2971 
2972 void
2973 detach_devargs(char *identifier)
2974 {
2975 	struct rte_dev_iterator iterator;
2976 	struct rte_devargs da;
2977 	portid_t port_id;
2978 
2979 	printf("Removing a device...\n");
2980 
2981 	memset(&da, 0, sizeof(da));
2982 	if (rte_devargs_parsef(&da, "%s", identifier)) {
2983 		printf("cannot parse identifier\n");
2984 		return;
2985 	}
2986 
2987 	RTE_ETH_FOREACH_MATCHING_DEV(port_id, identifier, &iterator) {
2988 		if (ports[port_id].port_status != RTE_PORT_CLOSED) {
2989 			if (ports[port_id].port_status != RTE_PORT_STOPPED) {
2990 				printf("Port %u not stopped\n", port_id);
2991 				rte_eth_iterator_cleanup(&iterator);
2992 				rte_devargs_reset(&da);
2993 				return;
2994 			}
2995 			port_flow_flush(port_id);
2996 		}
2997 	}
2998 
2999 	if (rte_eal_hotplug_remove(da.bus->name, da.name) != 0) {
3000 		TESTPMD_LOG(ERR, "Failed to detach device %s(%s)\n",
3001 			    da.name, da.bus->name);
3002 		rte_devargs_reset(&da);
3003 		return;
3004 	}
3005 
3006 	remove_invalid_ports();
3007 
3008 	printf("Device %s is detached\n", identifier);
3009 	printf("Now total ports is %d\n", nb_ports);
3010 	printf("Done\n");
3011 	rte_devargs_reset(&da);
3012 }
3013 
3014 void
3015 pmd_test_exit(void)
3016 {
3017 	portid_t pt_id;
3018 	unsigned int i;
3019 	int ret;
3020 
3021 	if (test_done == 0)
3022 		stop_packet_forwarding();
3023 
3024 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3025 		if (mempools[i]) {
3026 			if (mp_alloc_type == MP_ALLOC_ANON)
3027 				rte_mempool_mem_iter(mempools[i], dma_unmap_cb,
3028 						     NULL);
3029 		}
3030 	}
3031 	if (ports != NULL) {
3032 		no_link_check = 1;
3033 		RTE_ETH_FOREACH_DEV(pt_id) {
3034 			printf("\nStopping port %d...\n", pt_id);
3035 			fflush(stdout);
3036 			stop_port(pt_id);
3037 		}
3038 		RTE_ETH_FOREACH_DEV(pt_id) {
3039 			printf("\nShutting down port %d...\n", pt_id);
3040 			fflush(stdout);
3041 			close_port(pt_id);
3042 		}
3043 	}
3044 
3045 	if (hot_plug) {
3046 		ret = rte_dev_event_monitor_stop();
3047 		if (ret) {
3048 			RTE_LOG(ERR, EAL,
3049 				"fail to stop device event monitor.");
3050 			return;
3051 		}
3052 
3053 		ret = rte_dev_event_callback_unregister(NULL,
3054 			dev_event_callback, NULL);
3055 		if (ret < 0) {
3056 			RTE_LOG(ERR, EAL,
3057 				"fail to unregister device event callback.\n");
3058 			return;
3059 		}
3060 
3061 		ret = rte_dev_hotplug_handle_disable();
3062 		if (ret) {
3063 			RTE_LOG(ERR, EAL,
3064 				"fail to disable hotplug handling.\n");
3065 			return;
3066 		}
3067 	}
3068 	for (i = 0 ; i < RTE_DIM(mempools) ; i++) {
3069 		if (mempools[i])
3070 			rte_mempool_free(mempools[i]);
3071 	}
3072 
3073 	printf("\nBye...\n");
3074 }
3075 
3076 typedef void (*cmd_func_t)(void);
3077 struct pmd_test_command {
3078 	const char *cmd_name;
3079 	cmd_func_t cmd_func;
3080 };
3081 
3082 /* Check the link status of all ports in up to 9s, and print them finally */
3083 static void
3084 check_all_ports_link_status(uint32_t port_mask)
3085 {
3086 #define CHECK_INTERVAL 100 /* 100ms */
3087 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3088 	portid_t portid;
3089 	uint8_t count, all_ports_up, print_flag = 0;
3090 	struct rte_eth_link link;
3091 	int ret;
3092 	char link_status[RTE_ETH_LINK_MAX_STR_LEN];
3093 
3094 	printf("Checking link statuses...\n");
3095 	fflush(stdout);
3096 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3097 		all_ports_up = 1;
3098 		RTE_ETH_FOREACH_DEV(portid) {
3099 			if ((port_mask & (1 << portid)) == 0)
3100 				continue;
3101 			memset(&link, 0, sizeof(link));
3102 			ret = rte_eth_link_get_nowait(portid, &link);
3103 			if (ret < 0) {
3104 				all_ports_up = 0;
3105 				if (print_flag == 1)
3106 					printf("Port %u link get failed: %s\n",
3107 						portid, rte_strerror(-ret));
3108 				continue;
3109 			}
3110 			/* print link status if flag set */
3111 			if (print_flag == 1) {
3112 				rte_eth_link_to_str(link_status,
3113 					sizeof(link_status), &link);
3114 				printf("Port %d %s\n", portid, link_status);
3115 				continue;
3116 			}
3117 			/* clear all_ports_up flag if any link down */
3118 			if (link.link_status == ETH_LINK_DOWN) {
3119 				all_ports_up = 0;
3120 				break;
3121 			}
3122 		}
3123 		/* after finally printing all link status, get out */
3124 		if (print_flag == 1)
3125 			break;
3126 
3127 		if (all_ports_up == 0) {
3128 			fflush(stdout);
3129 			rte_delay_ms(CHECK_INTERVAL);
3130 		}
3131 
3132 		/* set the print_flag if all ports up or timeout */
3133 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3134 			print_flag = 1;
3135 		}
3136 
3137 		if (lsc_interrupt)
3138 			break;
3139 	}
3140 }
3141 
3142 static void
3143 rmv_port_callback(void *arg)
3144 {
3145 	int need_to_start = 0;
3146 	int org_no_link_check = no_link_check;
3147 	portid_t port_id = (intptr_t)arg;
3148 	struct rte_device *dev;
3149 
3150 	RTE_ETH_VALID_PORTID_OR_RET(port_id);
3151 
3152 	if (!test_done && port_is_forwarding(port_id)) {
3153 		need_to_start = 1;
3154 		stop_packet_forwarding();
3155 	}
3156 	no_link_check = 1;
3157 	stop_port(port_id);
3158 	no_link_check = org_no_link_check;
3159 
3160 	/* Save rte_device pointer before closing ethdev port */
3161 	dev = rte_eth_devices[port_id].device;
3162 	close_port(port_id);
3163 	detach_device(dev); /* might be already removed or have more ports */
3164 
3165 	if (need_to_start)
3166 		start_packet_forwarding(0);
3167 }
3168 
3169 /* This function is used by the interrupt thread */
3170 static int
3171 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
3172 		  void *ret_param)
3173 {
3174 	RTE_SET_USED(param);
3175 	RTE_SET_USED(ret_param);
3176 
3177 	if (type >= RTE_ETH_EVENT_MAX) {
3178 		fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
3179 			port_id, __func__, type);
3180 		fflush(stderr);
3181 	} else if (event_print_mask & (UINT32_C(1) << type)) {
3182 		printf("\nPort %" PRIu16 ": %s event\n", port_id,
3183 			eth_event_desc[type]);
3184 		fflush(stdout);
3185 	}
3186 
3187 	switch (type) {
3188 	case RTE_ETH_EVENT_NEW:
3189 		ports[port_id].need_setup = 1;
3190 		ports[port_id].port_status = RTE_PORT_HANDLING;
3191 		break;
3192 	case RTE_ETH_EVENT_INTR_RMV:
3193 		if (port_id_is_invalid(port_id, DISABLED_WARN))
3194 			break;
3195 		if (rte_eal_alarm_set(100000,
3196 				rmv_port_callback, (void *)(intptr_t)port_id))
3197 			fprintf(stderr, "Could not set up deferred device removal\n");
3198 		break;
3199 	case RTE_ETH_EVENT_DESTROY:
3200 		ports[port_id].port_status = RTE_PORT_CLOSED;
3201 		printf("Port %u is closed\n", port_id);
3202 		break;
3203 	default:
3204 		break;
3205 	}
3206 	return 0;
3207 }
3208 
3209 static int
3210 register_eth_event_callback(void)
3211 {
3212 	int ret;
3213 	enum rte_eth_event_type event;
3214 
3215 	for (event = RTE_ETH_EVENT_UNKNOWN;
3216 			event < RTE_ETH_EVENT_MAX; event++) {
3217 		ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
3218 				event,
3219 				eth_event_callback,
3220 				NULL);
3221 		if (ret != 0) {
3222 			TESTPMD_LOG(ERR, "Failed to register callback for "
3223 					"%s event\n", eth_event_desc[event]);
3224 			return -1;
3225 		}
3226 	}
3227 
3228 	return 0;
3229 }
3230 
3231 /* This function is used by the interrupt thread */
3232 static void
3233 dev_event_callback(const char *device_name, enum rte_dev_event_type type,
3234 			     __rte_unused void *arg)
3235 {
3236 	uint16_t port_id;
3237 	int ret;
3238 
3239 	if (type >= RTE_DEV_EVENT_MAX) {
3240 		fprintf(stderr, "%s called upon invalid event %d\n",
3241 			__func__, type);
3242 		fflush(stderr);
3243 	}
3244 
3245 	switch (type) {
3246 	case RTE_DEV_EVENT_REMOVE:
3247 		RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
3248 			device_name);
3249 		ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
3250 		if (ret) {
3251 			RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
3252 				device_name);
3253 			return;
3254 		}
3255 		/*
3256 		 * Because the user's callback is invoked in eal interrupt
3257 		 * callback, the interrupt callback need to be finished before
3258 		 * it can be unregistered when detaching device. So finish
3259 		 * callback soon and use a deferred removal to detach device
3260 		 * is need. It is a workaround, once the device detaching be
3261 		 * moved into the eal in the future, the deferred removal could
3262 		 * be deleted.
3263 		 */
3264 		if (rte_eal_alarm_set(100000,
3265 				rmv_port_callback, (void *)(intptr_t)port_id))
3266 			RTE_LOG(ERR, EAL,
3267 				"Could not set up deferred device removal\n");
3268 		break;
3269 	case RTE_DEV_EVENT_ADD:
3270 		RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
3271 			device_name);
3272 		/* TODO: After finish kernel driver binding,
3273 		 * begin to attach port.
3274 		 */
3275 		break;
3276 	default:
3277 		break;
3278 	}
3279 }
3280 
3281 static void
3282 rxtx_port_config(struct rte_port *port)
3283 {
3284 	uint16_t qid;
3285 	uint64_t offloads;
3286 
3287 	for (qid = 0; qid < nb_rxq; qid++) {
3288 		offloads = port->rx_conf[qid].offloads;
3289 		port->rx_conf[qid] = port->dev_info.default_rxconf;
3290 		if (offloads != 0)
3291 			port->rx_conf[qid].offloads = offloads;
3292 
3293 		/* Check if any Rx parameters have been passed */
3294 		if (rx_pthresh != RTE_PMD_PARAM_UNSET)
3295 			port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh;
3296 
3297 		if (rx_hthresh != RTE_PMD_PARAM_UNSET)
3298 			port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh;
3299 
3300 		if (rx_wthresh != RTE_PMD_PARAM_UNSET)
3301 			port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh;
3302 
3303 		if (rx_free_thresh != RTE_PMD_PARAM_UNSET)
3304 			port->rx_conf[qid].rx_free_thresh = rx_free_thresh;
3305 
3306 		if (rx_drop_en != RTE_PMD_PARAM_UNSET)
3307 			port->rx_conf[qid].rx_drop_en = rx_drop_en;
3308 
3309 		port->nb_rx_desc[qid] = nb_rxd;
3310 	}
3311 
3312 	for (qid = 0; qid < nb_txq; qid++) {
3313 		offloads = port->tx_conf[qid].offloads;
3314 		port->tx_conf[qid] = port->dev_info.default_txconf;
3315 		if (offloads != 0)
3316 			port->tx_conf[qid].offloads = offloads;
3317 
3318 		/* Check if any Tx parameters have been passed */
3319 		if (tx_pthresh != RTE_PMD_PARAM_UNSET)
3320 			port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh;
3321 
3322 		if (tx_hthresh != RTE_PMD_PARAM_UNSET)
3323 			port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh;
3324 
3325 		if (tx_wthresh != RTE_PMD_PARAM_UNSET)
3326 			port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh;
3327 
3328 		if (tx_rs_thresh != RTE_PMD_PARAM_UNSET)
3329 			port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh;
3330 
3331 		if (tx_free_thresh != RTE_PMD_PARAM_UNSET)
3332 			port->tx_conf[qid].tx_free_thresh = tx_free_thresh;
3333 
3334 		port->nb_tx_desc[qid] = nb_txd;
3335 	}
3336 }
3337 
3338 /*
3339  * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload,
3340  * MTU is also aligned if JUMBO_FRAME offload is not set.
3341  *
3342  * port->dev_info should be set before calling this function.
3343  *
3344  * return 0 on success, negative on error
3345  */
3346 int
3347 update_jumbo_frame_offload(portid_t portid)
3348 {
3349 	struct rte_port *port = &ports[portid];
3350 	uint32_t eth_overhead;
3351 	uint64_t rx_offloads;
3352 	int ret;
3353 	bool on;
3354 
3355 	/* Update the max_rx_pkt_len to have MTU as RTE_ETHER_MTU */
3356 	if (port->dev_info.max_mtu != UINT16_MAX &&
3357 	    port->dev_info.max_rx_pktlen > port->dev_info.max_mtu)
3358 		eth_overhead = port->dev_info.max_rx_pktlen -
3359 				port->dev_info.max_mtu;
3360 	else
3361 		eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3362 
3363 	rx_offloads = port->dev_conf.rxmode.offloads;
3364 
3365 	/* Default config value is 0 to use PMD specific overhead */
3366 	if (port->dev_conf.rxmode.max_rx_pkt_len == 0)
3367 		port->dev_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MTU + eth_overhead;
3368 
3369 	if (port->dev_conf.rxmode.max_rx_pkt_len <= RTE_ETHER_MTU + eth_overhead) {
3370 		rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3371 		on = false;
3372 	} else {
3373 		if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3374 			printf("Frame size (%u) is not supported by port %u\n",
3375 				port->dev_conf.rxmode.max_rx_pkt_len,
3376 				portid);
3377 			return -1;
3378 		}
3379 		rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3380 		on = true;
3381 	}
3382 
3383 	if (rx_offloads != port->dev_conf.rxmode.offloads) {
3384 		uint16_t qid;
3385 
3386 		port->dev_conf.rxmode.offloads = rx_offloads;
3387 
3388 		/* Apply JUMBO_FRAME offload configuration to Rx queue(s) */
3389 		for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) {
3390 			if (on)
3391 				port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
3392 			else
3393 				port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
3394 		}
3395 	}
3396 
3397 	/* If JUMBO_FRAME is set MTU conversion done by ethdev layer,
3398 	 * if unset do it here
3399 	 */
3400 	if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
3401 		ret = rte_eth_dev_set_mtu(portid,
3402 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead);
3403 		if (ret)
3404 			printf("Failed to set MTU to %u for port %u\n",
3405 				port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead,
3406 				portid);
3407 	}
3408 
3409 	return 0;
3410 }
3411 
3412 void
3413 init_port_config(void)
3414 {
3415 	portid_t pid;
3416 	struct rte_port *port;
3417 	int ret;
3418 
3419 	RTE_ETH_FOREACH_DEV(pid) {
3420 		port = &ports[pid];
3421 		port->dev_conf.fdir_conf = fdir_conf;
3422 
3423 		ret = eth_dev_info_get_print_err(pid, &port->dev_info);
3424 		if (ret != 0)
3425 			return;
3426 
3427 		if (nb_rxq > 1) {
3428 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3429 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf =
3430 				rss_hf & port->dev_info.flow_type_rss_offloads;
3431 		} else {
3432 			port->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
3433 			port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
3434 		}
3435 
3436 		if (port->dcb_flag == 0) {
3437 			if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0)
3438 				port->dev_conf.rxmode.mq_mode =
3439 					(enum rte_eth_rx_mq_mode)
3440 						(rx_mq_mode & ETH_MQ_RX_RSS);
3441 			else
3442 				port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
3443 		}
3444 
3445 		rxtx_port_config(port);
3446 
3447 		ret = eth_macaddr_get_print_err(pid, &port->eth_addr);
3448 		if (ret != 0)
3449 			return;
3450 
3451 #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS
3452 		rte_pmd_ixgbe_bypass_init(pid);
3453 #endif
3454 
3455 		if (lsc_interrupt &&
3456 		    (rte_eth_devices[pid].data->dev_flags &
3457 		     RTE_ETH_DEV_INTR_LSC))
3458 			port->dev_conf.intr_conf.lsc = 1;
3459 		if (rmv_interrupt &&
3460 		    (rte_eth_devices[pid].data->dev_flags &
3461 		     RTE_ETH_DEV_INTR_RMV))
3462 			port->dev_conf.intr_conf.rmv = 1;
3463 	}
3464 }
3465 
3466 void set_port_slave_flag(portid_t slave_pid)
3467 {
3468 	struct rte_port *port;
3469 
3470 	port = &ports[slave_pid];
3471 	port->slave_flag = 1;
3472 }
3473 
3474 void clear_port_slave_flag(portid_t slave_pid)
3475 {
3476 	struct rte_port *port;
3477 
3478 	port = &ports[slave_pid];
3479 	port->slave_flag = 0;
3480 }
3481 
3482 uint8_t port_is_bonding_slave(portid_t slave_pid)
3483 {
3484 	struct rte_port *port;
3485 
3486 	port = &ports[slave_pid];
3487 	if ((rte_eth_devices[slave_pid].data->dev_flags &
3488 	    RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
3489 		return 1;
3490 	return 0;
3491 }
3492 
3493 const uint16_t vlan_tags[] = {
3494 		0,  1,  2,  3,  4,  5,  6,  7,
3495 		8,  9, 10, 11,  12, 13, 14, 15,
3496 		16, 17, 18, 19, 20, 21, 22, 23,
3497 		24, 25, 26, 27, 28, 29, 30, 31
3498 };
3499 
3500 static  int
3501 get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf,
3502 		 enum dcb_mode_enable dcb_mode,
3503 		 enum rte_eth_nb_tcs num_tcs,
3504 		 uint8_t pfc_en)
3505 {
3506 	uint8_t i;
3507 	int32_t rc;
3508 	struct rte_eth_rss_conf rss_conf;
3509 
3510 	/*
3511 	 * Builds up the correct configuration for dcb+vt based on the vlan tags array
3512 	 * given above, and the number of traffic classes available for use.
3513 	 */
3514 	if (dcb_mode == DCB_VT_ENABLED) {
3515 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3516 				&eth_conf->rx_adv_conf.vmdq_dcb_conf;
3517 		struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3518 				&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
3519 
3520 		/* VMDQ+DCB RX and TX configurations */
3521 		vmdq_rx_conf->enable_default_pool = 0;
3522 		vmdq_rx_conf->default_pool = 0;
3523 		vmdq_rx_conf->nb_queue_pools =
3524 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3525 		vmdq_tx_conf->nb_queue_pools =
3526 			(num_tcs ==  ETH_4_TCS ? ETH_32_POOLS : ETH_16_POOLS);
3527 
3528 		vmdq_rx_conf->nb_pool_maps = vmdq_rx_conf->nb_queue_pools;
3529 		for (i = 0; i < vmdq_rx_conf->nb_pool_maps; i++) {
3530 			vmdq_rx_conf->pool_map[i].vlan_id = vlan_tags[i];
3531 			vmdq_rx_conf->pool_map[i].pools =
3532 				1 << (i % vmdq_rx_conf->nb_queue_pools);
3533 		}
3534 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3535 			vmdq_rx_conf->dcb_tc[i] = i % num_tcs;
3536 			vmdq_tx_conf->dcb_tc[i] = i % num_tcs;
3537 		}
3538 
3539 		/* set DCB mode of RX and TX of multiple queues */
3540 		eth_conf->rxmode.mq_mode =
3541 				(enum rte_eth_rx_mq_mode)
3542 					(rx_mq_mode & ETH_MQ_RX_VMDQ_DCB);
3543 		eth_conf->txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB;
3544 	} else {
3545 		struct rte_eth_dcb_rx_conf *rx_conf =
3546 				&eth_conf->rx_adv_conf.dcb_rx_conf;
3547 		struct rte_eth_dcb_tx_conf *tx_conf =
3548 				&eth_conf->tx_adv_conf.dcb_tx_conf;
3549 
3550 		memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf));
3551 
3552 		rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf);
3553 		if (rc != 0)
3554 			return rc;
3555 
3556 		rx_conf->nb_tcs = num_tcs;
3557 		tx_conf->nb_tcs = num_tcs;
3558 
3559 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3560 			rx_conf->dcb_tc[i] = i % num_tcs;
3561 			tx_conf->dcb_tc[i] = i % num_tcs;
3562 		}
3563 
3564 		eth_conf->rxmode.mq_mode =
3565 				(enum rte_eth_rx_mq_mode)
3566 					(rx_mq_mode & ETH_MQ_RX_DCB_RSS);
3567 		eth_conf->rx_adv_conf.rss_conf = rss_conf;
3568 		eth_conf->txmode.mq_mode = ETH_MQ_TX_DCB;
3569 	}
3570 
3571 	if (pfc_en)
3572 		eth_conf->dcb_capability_en =
3573 				ETH_DCB_PG_SUPPORT | ETH_DCB_PFC_SUPPORT;
3574 	else
3575 		eth_conf->dcb_capability_en = ETH_DCB_PG_SUPPORT;
3576 
3577 	return 0;
3578 }
3579 
3580 int
3581 init_port_dcb_config(portid_t pid,
3582 		     enum dcb_mode_enable dcb_mode,
3583 		     enum rte_eth_nb_tcs num_tcs,
3584 		     uint8_t pfc_en)
3585 {
3586 	struct rte_eth_conf port_conf;
3587 	struct rte_port *rte_port;
3588 	int retval;
3589 	uint16_t i;
3590 
3591 	rte_port = &ports[pid];
3592 
3593 	memset(&port_conf, 0, sizeof(struct rte_eth_conf));
3594 
3595 	port_conf.rxmode = rte_port->dev_conf.rxmode;
3596 	port_conf.txmode = rte_port->dev_conf.txmode;
3597 
3598 	/*set configuration of DCB in vt mode and DCB in non-vt mode*/
3599 	retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en);
3600 	if (retval < 0)
3601 		return retval;
3602 	port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3603 
3604 	/* re-configure the device . */
3605 	retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf);
3606 	if (retval < 0)
3607 		return retval;
3608 
3609 	retval = eth_dev_info_get_print_err(pid, &rte_port->dev_info);
3610 	if (retval != 0)
3611 		return retval;
3612 
3613 	/* If dev_info.vmdq_pool_base is greater than 0,
3614 	 * the queue id of vmdq pools is started after pf queues.
3615 	 */
3616 	if (dcb_mode == DCB_VT_ENABLED &&
3617 	    rte_port->dev_info.vmdq_pool_base > 0) {
3618 		printf("VMDQ_DCB multi-queue mode is nonsensical"
3619 			" for port %d.", pid);
3620 		return -1;
3621 	}
3622 
3623 	/* Assume the ports in testpmd have the same dcb capability
3624 	 * and has the same number of rxq and txq in dcb mode
3625 	 */
3626 	if (dcb_mode == DCB_VT_ENABLED) {
3627 		if (rte_port->dev_info.max_vfs > 0) {
3628 			nb_rxq = rte_port->dev_info.nb_rx_queues;
3629 			nb_txq = rte_port->dev_info.nb_tx_queues;
3630 		} else {
3631 			nb_rxq = rte_port->dev_info.max_rx_queues;
3632 			nb_txq = rte_port->dev_info.max_tx_queues;
3633 		}
3634 	} else {
3635 		/*if vt is disabled, use all pf queues */
3636 		if (rte_port->dev_info.vmdq_pool_base == 0) {
3637 			nb_rxq = rte_port->dev_info.max_rx_queues;
3638 			nb_txq = rte_port->dev_info.max_tx_queues;
3639 		} else {
3640 			nb_rxq = (queueid_t)num_tcs;
3641 			nb_txq = (queueid_t)num_tcs;
3642 
3643 		}
3644 	}
3645 	rx_free_thresh = 64;
3646 
3647 	memcpy(&rte_port->dev_conf, &port_conf, sizeof(struct rte_eth_conf));
3648 
3649 	rxtx_port_config(rte_port);
3650 	/* VLAN filter */
3651 	rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
3652 	for (i = 0; i < RTE_DIM(vlan_tags); i++)
3653 		rx_vft_set(pid, vlan_tags[i], 1);
3654 
3655 	retval = eth_macaddr_get_print_err(pid, &rte_port->eth_addr);
3656 	if (retval != 0)
3657 		return retval;
3658 
3659 	rte_port->dcb_flag = 1;
3660 
3661 	/* Enter DCB configuration status */
3662 	dcb_config = 1;
3663 
3664 	return 0;
3665 }
3666 
3667 static void
3668 init_port(void)
3669 {
3670 	int i;
3671 
3672 	/* Configuration of Ethernet ports. */
3673 	ports = rte_zmalloc("testpmd: ports",
3674 			    sizeof(struct rte_port) * RTE_MAX_ETHPORTS,
3675 			    RTE_CACHE_LINE_SIZE);
3676 	if (ports == NULL) {
3677 		rte_exit(EXIT_FAILURE,
3678 				"rte_zmalloc(%d struct rte_port) failed\n",
3679 				RTE_MAX_ETHPORTS);
3680 	}
3681 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
3682 		LIST_INIT(&ports[i].flow_tunnel_list);
3683 	/* Initialize ports NUMA structures */
3684 	memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3685 	memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3686 	memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
3687 }
3688 
3689 static void
3690 force_quit(void)
3691 {
3692 	pmd_test_exit();
3693 	prompt_exit();
3694 }
3695 
3696 static void
3697 print_stats(void)
3698 {
3699 	uint8_t i;
3700 	const char clr[] = { 27, '[', '2', 'J', '\0' };
3701 	const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
3702 
3703 	/* Clear screen and move to top left */
3704 	printf("%s%s", clr, top_left);
3705 
3706 	printf("\nPort statistics ====================================");
3707 	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++)
3708 		nic_stats_display(fwd_ports_ids[i]);
3709 
3710 	fflush(stdout);
3711 }
3712 
3713 static void
3714 signal_handler(int signum)
3715 {
3716 	if (signum == SIGINT || signum == SIGTERM) {
3717 		printf("\nSignal %d received, preparing to exit...\n",
3718 				signum);
3719 #ifdef RTE_LIB_PDUMP
3720 		/* uninitialize packet capture framework */
3721 		rte_pdump_uninit();
3722 #endif
3723 #ifdef RTE_LIB_LATENCYSTATS
3724 		if (latencystats_enabled != 0)
3725 			rte_latencystats_uninit();
3726 #endif
3727 		force_quit();
3728 		/* Set flag to indicate the force termination. */
3729 		f_quit = 1;
3730 		/* exit with the expected status */
3731 		signal(signum, SIG_DFL);
3732 		kill(getpid(), signum);
3733 	}
3734 }
3735 
3736 int
3737 main(int argc, char** argv)
3738 {
3739 	int diag;
3740 	portid_t port_id;
3741 	uint16_t count;
3742 	int ret;
3743 
3744 	signal(SIGINT, signal_handler);
3745 	signal(SIGTERM, signal_handler);
3746 
3747 	testpmd_logtype = rte_log_register("testpmd");
3748 	if (testpmd_logtype < 0)
3749 		rte_exit(EXIT_FAILURE, "Cannot register log type");
3750 	rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
3751 
3752 	diag = rte_eal_init(argc, argv);
3753 	if (diag < 0)
3754 		rte_exit(EXIT_FAILURE, "Cannot init EAL: %s\n",
3755 			 rte_strerror(rte_errno));
3756 
3757 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
3758 		rte_exit(EXIT_FAILURE,
3759 			 "Secondary process type not supported.\n");
3760 
3761 	ret = register_eth_event_callback();
3762 	if (ret != 0)
3763 		rte_exit(EXIT_FAILURE, "Cannot register for ethdev events");
3764 
3765 #ifdef RTE_LIB_PDUMP
3766 	/* initialize packet capture framework */
3767 	rte_pdump_init();
3768 #endif
3769 
3770 	count = 0;
3771 	RTE_ETH_FOREACH_DEV(port_id) {
3772 		ports_ids[count] = port_id;
3773 		count++;
3774 	}
3775 	nb_ports = (portid_t) count;
3776 	if (nb_ports == 0)
3777 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
3778 
3779 	/* allocate port structures, and init them */
3780 	init_port();
3781 
3782 	set_def_fwd_config();
3783 	if (nb_lcores == 0)
3784 		rte_exit(EXIT_FAILURE, "No cores defined for forwarding\n"
3785 			 "Check the core mask argument\n");
3786 
3787 	/* Bitrate/latency stats disabled by default */
3788 #ifdef RTE_LIB_BITRATESTATS
3789 	bitrate_enabled = 0;
3790 #endif
3791 #ifdef RTE_LIB_LATENCYSTATS
3792 	latencystats_enabled = 0;
3793 #endif
3794 
3795 	/* on FreeBSD, mlockall() is disabled by default */
3796 #ifdef RTE_EXEC_ENV_FREEBSD
3797 	do_mlockall = 0;
3798 #else
3799 	do_mlockall = 1;
3800 #endif
3801 
3802 	argc -= diag;
3803 	argv += diag;
3804 	if (argc > 1)
3805 		launch_args_parse(argc, argv);
3806 
3807 	if (do_mlockall && mlockall(MCL_CURRENT | MCL_FUTURE)) {
3808 		TESTPMD_LOG(NOTICE, "mlockall() failed with error \"%s\"\n",
3809 			strerror(errno));
3810 	}
3811 
3812 	if (tx_first && interactive)
3813 		rte_exit(EXIT_FAILURE, "--tx-first cannot be used on "
3814 				"interactive mode.\n");
3815 
3816 	if (tx_first && lsc_interrupt) {
3817 		printf("Warning: lsc_interrupt needs to be off when "
3818 				" using tx_first. Disabling.\n");
3819 		lsc_interrupt = 0;
3820 	}
3821 
3822 	if (!nb_rxq && !nb_txq)
3823 		printf("Warning: Either rx or tx queues should be non-zero\n");
3824 
3825 	if (nb_rxq > 1 && nb_rxq > nb_txq)
3826 		printf("Warning: nb_rxq=%d enables RSS configuration, "
3827 		       "but nb_txq=%d will prevent to fully test it.\n",
3828 		       nb_rxq, nb_txq);
3829 
3830 	init_config();
3831 
3832 	if (hot_plug) {
3833 		ret = rte_dev_hotplug_handle_enable();
3834 		if (ret) {
3835 			RTE_LOG(ERR, EAL,
3836 				"fail to enable hotplug handling.");
3837 			return -1;
3838 		}
3839 
3840 		ret = rte_dev_event_monitor_start();
3841 		if (ret) {
3842 			RTE_LOG(ERR, EAL,
3843 				"fail to start device event monitoring.");
3844 			return -1;
3845 		}
3846 
3847 		ret = rte_dev_event_callback_register(NULL,
3848 			dev_event_callback, NULL);
3849 		if (ret) {
3850 			RTE_LOG(ERR, EAL,
3851 				"fail  to register device event callback\n");
3852 			return -1;
3853 		}
3854 	}
3855 
3856 	if (!no_device_start && start_port(RTE_PORT_ALL) != 0)
3857 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
3858 
3859 	/* set all ports to promiscuous mode by default */
3860 	RTE_ETH_FOREACH_DEV(port_id) {
3861 		ret = rte_eth_promiscuous_enable(port_id);
3862 		if (ret != 0)
3863 			printf("Error during enabling promiscuous mode for port %u: %s - ignore\n",
3864 				port_id, rte_strerror(-ret));
3865 	}
3866 
3867 	/* Init metrics library */
3868 	rte_metrics_init(rte_socket_id());
3869 
3870 #ifdef RTE_LIB_LATENCYSTATS
3871 	if (latencystats_enabled != 0) {
3872 		int ret = rte_latencystats_init(1, NULL);
3873 		if (ret)
3874 			printf("Warning: latencystats init()"
3875 				" returned error %d\n",	ret);
3876 		printf("Latencystats running on lcore %d\n",
3877 			latencystats_lcore_id);
3878 	}
3879 #endif
3880 
3881 	/* Setup bitrate stats */
3882 #ifdef RTE_LIB_BITRATESTATS
3883 	if (bitrate_enabled != 0) {
3884 		bitrate_data = rte_stats_bitrate_create();
3885 		if (bitrate_data == NULL)
3886 			rte_exit(EXIT_FAILURE,
3887 				"Could not allocate bitrate data.\n");
3888 		rte_stats_bitrate_reg(bitrate_data);
3889 	}
3890 #endif
3891 
3892 #ifdef RTE_LIB_CMDLINE
3893 	if (strlen(cmdline_filename) != 0)
3894 		cmdline_read_from_file(cmdline_filename);
3895 
3896 	if (interactive == 1) {
3897 		if (auto_start) {
3898 			printf("Start automatic packet forwarding\n");
3899 			start_packet_forwarding(0);
3900 		}
3901 		prompt();
3902 		pmd_test_exit();
3903 	} else
3904 #endif
3905 	{
3906 		char c;
3907 		int rc;
3908 
3909 		f_quit = 0;
3910 
3911 		printf("No commandline core given, start packet forwarding\n");
3912 		start_packet_forwarding(tx_first);
3913 		if (stats_period != 0) {
3914 			uint64_t prev_time = 0, cur_time, diff_time = 0;
3915 			uint64_t timer_period;
3916 
3917 			/* Convert to number of cycles */
3918 			timer_period = stats_period * rte_get_timer_hz();
3919 
3920 			while (f_quit == 0) {
3921 				cur_time = rte_get_timer_cycles();
3922 				diff_time += cur_time - prev_time;
3923 
3924 				if (diff_time >= timer_period) {
3925 					print_stats();
3926 					/* Reset the timer */
3927 					diff_time = 0;
3928 				}
3929 				/* Sleep to avoid unnecessary checks */
3930 				prev_time = cur_time;
3931 				sleep(1);
3932 			}
3933 		}
3934 
3935 		printf("Press enter to exit\n");
3936 		rc = read(0, &c, 1);
3937 		pmd_test_exit();
3938 		if (rc < 0)
3939 			return 1;
3940 	}
3941 
3942 	ret = rte_eal_cleanup();
3943 	if (ret != 0)
3944 		rte_exit(EXIT_FAILURE,
3945 			 "EAL cleanup failed: %s\n", strerror(-ret));
3946 
3947 	return EXIT_SUCCESS;
3948 }
3949