xref: /dpdk/examples/vhost/main.c (revision 4cc4f3ee12dad4b02dd1c00a8f418ddf8f135ce4)
13998e2a0SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
23998e2a0SBruce Richardson  * Copyright(c) 2010-2017 Intel Corporation
3d19533e8SHuawei Xie  */
4d19533e8SHuawei Xie 
5d19533e8SHuawei Xie #include <arpa/inet.h>
6d19533e8SHuawei Xie #include <getopt.h>
7d19533e8SHuawei Xie #include <linux/if_ether.h>
8d19533e8SHuawei Xie #include <linux/if_vlan.h>
9d19533e8SHuawei Xie #include <linux/virtio_net.h>
10d19533e8SHuawei Xie #include <linux/virtio_ring.h>
11d19533e8SHuawei Xie #include <signal.h>
12d19533e8SHuawei Xie #include <stdint.h>
13d19533e8SHuawei Xie #include <sys/eventfd.h>
14d19533e8SHuawei Xie #include <sys/param.h>
15d19533e8SHuawei Xie #include <unistd.h>
16d19533e8SHuawei Xie 
17d19533e8SHuawei Xie #include <rte_cycles.h>
18d19533e8SHuawei Xie #include <rte_ethdev.h>
19d19533e8SHuawei Xie #include <rte_log.h>
20d19533e8SHuawei Xie #include <rte_string_fns.h>
21d19533e8SHuawei Xie #include <rte_malloc.h>
22ca7036b4SDavid Marchand #include <rte_net.h>
23a798beb4SYuanhan Liu #include <rte_vhost.h>
24691693c6SJijiang Liu #include <rte_ip.h>
259fd72e3cSJijiang Liu #include <rte_tcp.h>
26577329e6SJerin Jacob #include <rte_pause.h>
2753d3f477SJiayu Hu #include <rte_dmadev.h>
2853d3f477SJiayu Hu #include <rte_vhost_async.h>
29d19533e8SHuawei Xie 
30d19533e8SHuawei Xie #include "main.h"
31d19533e8SHuawei Xie 
32f17eb179SBernard Iremonger #ifndef MAX_QUEUES
33f17eb179SBernard Iremonger #define MAX_QUEUES 128
34f17eb179SBernard Iremonger #endif
35d19533e8SHuawei Xie 
36917229c2SWenwu Ma #define NUM_MBUFS_DEFAULT 0x24000
37917229c2SWenwu Ma 
38d19533e8SHuawei Xie /* the maximum number of external ports supported */
39d19533e8SHuawei Xie #define MAX_SUP_PORTS 1
40d19533e8SHuawei Xie 
41d19533e8SHuawei Xie #define MBUF_CACHE_SIZE	128
42824cb29cSKonstantin Ananyev #define MBUF_DATA_SIZE	RTE_MBUF_DEFAULT_BUF_SIZE
43d19533e8SHuawei Xie 
44d19533e8SHuawei Xie #define BURST_TX_DRAIN_US 100	/* TX drain every ~100us */
45d19533e8SHuawei Xie 
46d19533e8SHuawei Xie #define BURST_RX_WAIT_US 15	/* Defines how long we wait between retries on RX */
47d19533e8SHuawei Xie #define BURST_RX_RETRIES 4		/* Number of retries on RX. */
48d19533e8SHuawei Xie 
49d19533e8SHuawei Xie #define JUMBO_FRAME_MAX_SIZE    0x2600
501bb4a528SFerruh Yigit #define MAX_MTU (JUMBO_FRAME_MAX_SIZE - (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN))
51d19533e8SHuawei Xie 
52d19533e8SHuawei Xie /* State of virtio device. */
53d19533e8SHuawei Xie #define DEVICE_MAC_LEARNING 0
54d19533e8SHuawei Xie #define DEVICE_RX			1
55d19533e8SHuawei Xie #define DEVICE_SAFE_REMOVE	2
56d19533e8SHuawei Xie 
57d19533e8SHuawei Xie /* Configurable number of RX/TX ring descriptors */
58d19533e8SHuawei Xie #define RTE_TEST_RX_DESC_DEFAULT 1024
59d19533e8SHuawei Xie #define RTE_TEST_TX_DESC_DEFAULT 512
60d19533e8SHuawei Xie 
61d19533e8SHuawei Xie #define INVALID_PORT_ID 0xFF
6253d3f477SJiayu Hu #define INVALID_DMA_ID -1
6353d3f477SJiayu Hu 
6453d3f477SJiayu Hu #define DMA_RING_SIZE 4096
6553d3f477SJiayu Hu 
66a543dcb7SXuan Ding #define ASYNC_ENQUEUE_VHOST 1
67a543dcb7SXuan Ding #define ASYNC_DEQUEUE_VHOST 2
68a543dcb7SXuan Ding 
69917229c2SWenwu Ma /* number of mbufs in all pools - if specified on command-line. */
70917229c2SWenwu Ma static int total_num_mbufs = NUM_MBUFS_DEFAULT;
71917229c2SWenwu Ma 
7253d3f477SJiayu Hu struct dma_for_vhost dma_bind[RTE_MAX_VHOST_DEVICE];
7353d3f477SJiayu Hu int16_t dmas_id[RTE_DMADEV_DEFAULT_MAX];
7453d3f477SJiayu Hu static int dma_count;
75d19533e8SHuawei Xie 
76d19533e8SHuawei Xie /* mask of enabled ports */
77d19533e8SHuawei Xie static uint32_t enabled_port_mask = 0;
78d19533e8SHuawei Xie 
7990924cafSOuyang Changchun /* Promiscuous mode */
8090924cafSOuyang Changchun static uint32_t promiscuous;
8190924cafSOuyang Changchun 
82d19533e8SHuawei Xie /* number of devices/queues to support*/
83d19533e8SHuawei Xie static uint32_t num_queues = 0;
84a981294bSHuawei Xie static uint32_t num_devices;
85d19533e8SHuawei Xie 
8668363d85SYuanhan Liu static struct rte_mempool *mbuf_pool;
8728deb020SHuawei Xie static int mergeable;
88d19533e8SHuawei Xie 
89d19533e8SHuawei Xie /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
90d19533e8SHuawei Xie typedef enum {
91d19533e8SHuawei Xie 	VM2VM_DISABLED = 0,
92d19533e8SHuawei Xie 	VM2VM_SOFTWARE = 1,
93d19533e8SHuawei Xie 	VM2VM_HARDWARE = 2,
94d19533e8SHuawei Xie 	VM2VM_LAST
95d19533e8SHuawei Xie } vm2vm_type;
96d19533e8SHuawei Xie static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;
97d19533e8SHuawei Xie 
98d19533e8SHuawei Xie /* Enable stats. */
99d19533e8SHuawei Xie static uint32_t enable_stats = 0;
100d19533e8SHuawei Xie /* Enable retries on RX. */
101d19533e8SHuawei Xie static uint32_t enable_retry = 1;
1029fd72e3cSJijiang Liu 
1039fd72e3cSJijiang Liu /* Disable TX checksum offload */
1049fd72e3cSJijiang Liu static uint32_t enable_tx_csum;
1059fd72e3cSJijiang Liu 
1069fd72e3cSJijiang Liu /* Disable TSO offload */
1079fd72e3cSJijiang Liu static uint32_t enable_tso;
1089fd72e3cSJijiang Liu 
1092345e3beSYuanhan Liu static int client_mode;
1102345e3beSYuanhan Liu 
111ca059fa5SYuanhan Liu static int builtin_net_driver;
112ca059fa5SYuanhan Liu 
113d19533e8SHuawei Xie /* Specify timeout (in useconds) between retries on RX. */
114d19533e8SHuawei Xie static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
115d19533e8SHuawei Xie /* Specify the number of retries on RX. */
116d19533e8SHuawei Xie static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
117d19533e8SHuawei Xie 
118ad0eef4dSJiayu Hu /* Socket file paths. Can be set by user */
119ad0eef4dSJiayu Hu static char *socket_files;
120ad0eef4dSJiayu Hu static int nb_sockets;
121d19533e8SHuawei Xie 
122a543dcb7SXuan Ding static struct vhost_queue_ops vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
123a543dcb7SXuan Ding 
1247be78d02SJosh Soref /* empty VMDq configuration structure. Filled in programmatically */
125d19533e8SHuawei Xie static struct rte_eth_conf vmdq_conf_default = {
126d19533e8SHuawei Xie 	.rxmode = {
127295968d1SFerruh Yigit 		.mq_mode        = RTE_ETH_MQ_RX_VMDQ_ONLY,
128d19533e8SHuawei Xie 		.split_hdr_size = 0,
129d19533e8SHuawei Xie 		/*
130cc22d8caSShahaf Shuler 		 * VLAN strip is necessary for 1G NIC such as I350,
131d19533e8SHuawei Xie 		 * this fixes bug of ipv4 forwarding in guest can't
1327be78d02SJosh Soref 		 * forward packets from one virtio dev to another virtio dev.
133d19533e8SHuawei Xie 		 */
134295968d1SFerruh Yigit 		.offloads = RTE_ETH_RX_OFFLOAD_VLAN_STRIP,
135d19533e8SHuawei Xie 	},
136d19533e8SHuawei Xie 
137d19533e8SHuawei Xie 	.txmode = {
138295968d1SFerruh Yigit 		.mq_mode = RTE_ETH_MQ_TX_NONE,
139295968d1SFerruh Yigit 		.offloads = (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
140295968d1SFerruh Yigit 			     RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
141295968d1SFerruh Yigit 			     RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
142295968d1SFerruh Yigit 			     RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
143295968d1SFerruh Yigit 			     RTE_ETH_TX_OFFLOAD_TCP_TSO),
144d19533e8SHuawei Xie 	},
145d19533e8SHuawei Xie 	.rx_adv_conf = {
146d19533e8SHuawei Xie 		/*
147d19533e8SHuawei Xie 		 * should be overridden separately in code with
148d19533e8SHuawei Xie 		 * appropriate values
149d19533e8SHuawei Xie 		 */
150d19533e8SHuawei Xie 		.vmdq_rx_conf = {
151295968d1SFerruh Yigit 			.nb_queue_pools = RTE_ETH_8_POOLS,
152d19533e8SHuawei Xie 			.enable_default_pool = 0,
153d19533e8SHuawei Xie 			.default_pool = 0,
154d19533e8SHuawei Xie 			.nb_pool_maps = 0,
155d19533e8SHuawei Xie 			.pool_map = {{0, 0},},
156d19533e8SHuawei Xie 		},
157d19533e8SHuawei Xie 	},
158d19533e8SHuawei Xie };
159d19533e8SHuawei Xie 
160cc22d8caSShahaf Shuler 
161d19533e8SHuawei Xie static unsigned lcore_ids[RTE_MAX_LCORE];
162f8244c63SZhiyong Yang static uint16_t ports[RTE_MAX_ETHPORTS];
163d19533e8SHuawei Xie static unsigned num_ports = 0; /**< The number of ports specified in command line */
16484b02d16SHuawei Xie static uint16_t num_pf_queues, num_vmdq_queues;
16584b02d16SHuawei Xie static uint16_t vmdq_pool_base, vmdq_queue_base;
16684b02d16SHuawei Xie static uint16_t queues_per_pool;
167d19533e8SHuawei Xie 
168d19533e8SHuawei Xie const uint16_t vlan_tags[] = {
169d19533e8SHuawei Xie 	1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
170d19533e8SHuawei Xie 	1008, 1009, 1010, 1011,	1012, 1013, 1014, 1015,
171d19533e8SHuawei Xie 	1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
172d19533e8SHuawei Xie 	1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
173d19533e8SHuawei Xie 	1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
174d19533e8SHuawei Xie 	1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
175d19533e8SHuawei Xie 	1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
176d19533e8SHuawei Xie 	1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
177d19533e8SHuawei Xie };
178d19533e8SHuawei Xie 
179d19533e8SHuawei Xie /* ethernet addresses of ports */
1806d13ea8eSOlivier Matz static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
181d19533e8SHuawei Xie 
18245657a5cSYuanhan Liu static struct vhost_dev_tailq_list vhost_dev_list =
18345657a5cSYuanhan Liu 	TAILQ_HEAD_INITIALIZER(vhost_dev_list);
184d19533e8SHuawei Xie 
185d19533e8SHuawei Xie static struct lcore_info lcore_info[RTE_MAX_LCORE];
186d19533e8SHuawei Xie 
187d19533e8SHuawei Xie /* Used for queueing bursts of TX packets. */
188d19533e8SHuawei Xie struct mbuf_table {
189d19533e8SHuawei Xie 	unsigned len;
190d19533e8SHuawei Xie 	unsigned txq_id;
191d19533e8SHuawei Xie 	struct rte_mbuf *m_table[MAX_PKT_BURST];
192d19533e8SHuawei Xie };
193d19533e8SHuawei Xie 
194a68ba8e0SCheng Jiang struct vhost_bufftable {
195a68ba8e0SCheng Jiang 	uint32_t len;
196a68ba8e0SCheng Jiang 	uint64_t pre_tsc;
197a68ba8e0SCheng Jiang 	struct rte_mbuf *m_table[MAX_PKT_BURST];
198a68ba8e0SCheng Jiang };
199a68ba8e0SCheng Jiang 
200d19533e8SHuawei Xie /* TX queue for each data core. */
201d19533e8SHuawei Xie struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
202d19533e8SHuawei Xie 
203a68ba8e0SCheng Jiang /*
204a68ba8e0SCheng Jiang  * Vhost TX buffer for each data core.
205a68ba8e0SCheng Jiang  * Every data core maintains a TX buffer for every vhost device,
206a68ba8e0SCheng Jiang  * which is used for batch pkts enqueue for higher performance.
207a68ba8e0SCheng Jiang  */
20853d3f477SJiayu Hu struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * RTE_MAX_VHOST_DEVICE];
209a68ba8e0SCheng Jiang 
210273ecdbcSYuanhan Liu #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
211273ecdbcSYuanhan Liu 				 / US_PER_S * BURST_TX_DRAIN_US)
212d19533e8SHuawei Xie 
213a543dcb7SXuan Ding static int vid2socketid[RTE_MAX_VHOST_DEVICE];
214a543dcb7SXuan Ding 
215a543dcb7SXuan Ding static inline uint32_t
216a543dcb7SXuan Ding get_async_flag_by_socketid(int socketid)
217a543dcb7SXuan Ding {
218a543dcb7SXuan Ding 	return dma_bind[socketid].async_flag;
219a543dcb7SXuan Ding }
220a543dcb7SXuan Ding 
221a543dcb7SXuan Ding static inline void
222a543dcb7SXuan Ding init_vid2socketid_array(int vid, int socketid)
223a543dcb7SXuan Ding {
224a543dcb7SXuan Ding 	vid2socketid[vid] = socketid;
225a543dcb7SXuan Ding }
226a543dcb7SXuan Ding 
22753d3f477SJiayu Hu static inline bool
22853d3f477SJiayu Hu is_dma_configured(int16_t dev_id)
22953d3f477SJiayu Hu {
23053d3f477SJiayu Hu 	int i;
23153d3f477SJiayu Hu 
23253d3f477SJiayu Hu 	for (i = 0; i < dma_count; i++)
23353d3f477SJiayu Hu 		if (dmas_id[i] == dev_id)
23453d3f477SJiayu Hu 			return true;
23553d3f477SJiayu Hu 	return false;
23653d3f477SJiayu Hu }
23753d3f477SJiayu Hu 
2383a04ecb2SCheng Jiang static inline int
2393a04ecb2SCheng Jiang open_dma(const char *value)
2403a04ecb2SCheng Jiang {
24153d3f477SJiayu Hu 	struct dma_for_vhost *dma_info = dma_bind;
24253d3f477SJiayu Hu 	char *input = strndup(value, strlen(value) + 1);
24353d3f477SJiayu Hu 	char *addrs = input;
24453d3f477SJiayu Hu 	char *ptrs[2];
24553d3f477SJiayu Hu 	char *start, *end, *substr;
246a543dcb7SXuan Ding 	int64_t socketid, vring_id;
2473a04ecb2SCheng Jiang 
24853d3f477SJiayu Hu 	struct rte_dma_info info;
24953d3f477SJiayu Hu 	struct rte_dma_conf dev_config = { .nb_vchans = 1 };
25053d3f477SJiayu Hu 	struct rte_dma_vchan_conf qconf = {
25153d3f477SJiayu Hu 		.direction = RTE_DMA_DIR_MEM_TO_MEM,
25253d3f477SJiayu Hu 		.nb_desc = DMA_RING_SIZE
25353d3f477SJiayu Hu 	};
25453d3f477SJiayu Hu 
25553d3f477SJiayu Hu 	int dev_id;
25653d3f477SJiayu Hu 	int ret = 0;
25753d3f477SJiayu Hu 	uint16_t i = 0;
25853d3f477SJiayu Hu 	char *dma_arg[RTE_MAX_VHOST_DEVICE];
25953d3f477SJiayu Hu 	int args_nr;
26053d3f477SJiayu Hu 
26153d3f477SJiayu Hu 	while (isblank(*addrs))
26253d3f477SJiayu Hu 		addrs++;
26353d3f477SJiayu Hu 	if (*addrs == '\0') {
26453d3f477SJiayu Hu 		ret = -1;
26553d3f477SJiayu Hu 		goto out;
26653d3f477SJiayu Hu 	}
26753d3f477SJiayu Hu 
26853d3f477SJiayu Hu 	/* process DMA devices within bracket. */
26953d3f477SJiayu Hu 	addrs++;
27053d3f477SJiayu Hu 	substr = strtok(addrs, ";]");
27153d3f477SJiayu Hu 	if (!substr) {
27253d3f477SJiayu Hu 		ret = -1;
27353d3f477SJiayu Hu 		goto out;
27453d3f477SJiayu Hu 	}
27553d3f477SJiayu Hu 
27653d3f477SJiayu Hu 	args_nr = rte_strsplit(substr, strlen(substr), dma_arg, RTE_MAX_VHOST_DEVICE, ',');
27753d3f477SJiayu Hu 	if (args_nr <= 0) {
27853d3f477SJiayu Hu 		ret = -1;
27953d3f477SJiayu Hu 		goto out;
28053d3f477SJiayu Hu 	}
28153d3f477SJiayu Hu 
28253d3f477SJiayu Hu 	while (i < args_nr) {
28353d3f477SJiayu Hu 		char *arg_temp = dma_arg[i];
284a543dcb7SXuan Ding 		char *txd, *rxd;
28553d3f477SJiayu Hu 		uint8_t sub_nr;
286a543dcb7SXuan Ding 		int async_flag;
28753d3f477SJiayu Hu 
28853d3f477SJiayu Hu 		sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
28953d3f477SJiayu Hu 		if (sub_nr != 2) {
29053d3f477SJiayu Hu 			ret = -1;
29153d3f477SJiayu Hu 			goto out;
29253d3f477SJiayu Hu 		}
29353d3f477SJiayu Hu 
294a543dcb7SXuan Ding 		txd = strstr(ptrs[0], "txd");
295a543dcb7SXuan Ding 		rxd = strstr(ptrs[0], "rxd");
296a543dcb7SXuan Ding 		if (txd) {
297a543dcb7SXuan Ding 			start = txd;
298a543dcb7SXuan Ding 			vring_id = VIRTIO_RXQ;
299a543dcb7SXuan Ding 			async_flag = ASYNC_ENQUEUE_VHOST;
300a543dcb7SXuan Ding 		} else if (rxd) {
301a543dcb7SXuan Ding 			start = rxd;
302a543dcb7SXuan Ding 			vring_id = VIRTIO_TXQ;
303a543dcb7SXuan Ding 			async_flag = ASYNC_DEQUEUE_VHOST;
304a543dcb7SXuan Ding 		} else {
30553d3f477SJiayu Hu 			ret = -1;
30653d3f477SJiayu Hu 			goto out;
30753d3f477SJiayu Hu 		}
30853d3f477SJiayu Hu 
30953d3f477SJiayu Hu 		start += 3;
310a543dcb7SXuan Ding 		socketid = strtol(start, &end, 0);
31153d3f477SJiayu Hu 		if (end == start) {
31253d3f477SJiayu Hu 			ret = -1;
31353d3f477SJiayu Hu 			goto out;
31453d3f477SJiayu Hu 		}
31553d3f477SJiayu Hu 
31653d3f477SJiayu Hu 		dev_id = rte_dma_get_dev_id_by_name(ptrs[1]);
31753d3f477SJiayu Hu 		if (dev_id < 0) {
31853d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG, "Fail to find DMA %s.\n", ptrs[1]);
31953d3f477SJiayu Hu 			ret = -1;
32053d3f477SJiayu Hu 			goto out;
32153d3f477SJiayu Hu 		}
32253d3f477SJiayu Hu 
32353d3f477SJiayu Hu 		/* DMA device is already configured, so skip */
32453d3f477SJiayu Hu 		if (is_dma_configured(dev_id))
32553d3f477SJiayu Hu 			goto done;
32653d3f477SJiayu Hu 
32753d3f477SJiayu Hu 		if (rte_dma_info_get(dev_id, &info) != 0) {
32853d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG, "Error with rte_dma_info_get()\n");
32953d3f477SJiayu Hu 			ret = -1;
33053d3f477SJiayu Hu 			goto out;
33153d3f477SJiayu Hu 		}
33253d3f477SJiayu Hu 
33353d3f477SJiayu Hu 		if (info.max_vchans < 1) {
33453d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG, "No channels available on device %d\n", dev_id);
33553d3f477SJiayu Hu 			ret = -1;
33653d3f477SJiayu Hu 			goto out;
33753d3f477SJiayu Hu 		}
33853d3f477SJiayu Hu 
33953d3f477SJiayu Hu 		if (rte_dma_configure(dev_id, &dev_config) != 0) {
34053d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG, "Fail to configure DMA %d.\n", dev_id);
34153d3f477SJiayu Hu 			ret = -1;
34253d3f477SJiayu Hu 			goto out;
34353d3f477SJiayu Hu 		}
34453d3f477SJiayu Hu 
34553d3f477SJiayu Hu 		/* Check the max desc supported by DMA device */
34653d3f477SJiayu Hu 		rte_dma_info_get(dev_id, &info);
34753d3f477SJiayu Hu 		if (info.nb_vchans != 1) {
34853d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG, "No configured queues reported by DMA %d.\n",
34953d3f477SJiayu Hu 					dev_id);
35053d3f477SJiayu Hu 			ret = -1;
35153d3f477SJiayu Hu 			goto out;
35253d3f477SJiayu Hu 		}
35353d3f477SJiayu Hu 
35453d3f477SJiayu Hu 		qconf.nb_desc = RTE_MIN(DMA_RING_SIZE, info.max_desc);
35553d3f477SJiayu Hu 
35653d3f477SJiayu Hu 		if (rte_dma_vchan_setup(dev_id, 0, &qconf) != 0) {
35753d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG, "Fail to set up DMA %d.\n", dev_id);
35853d3f477SJiayu Hu 			ret = -1;
35953d3f477SJiayu Hu 			goto out;
36053d3f477SJiayu Hu 		}
36153d3f477SJiayu Hu 
36253d3f477SJiayu Hu 		if (rte_dma_start(dev_id) != 0) {
36353d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG, "Fail to start DMA %u.\n", dev_id);
36453d3f477SJiayu Hu 			ret = -1;
36553d3f477SJiayu Hu 			goto out;
36653d3f477SJiayu Hu 		}
36753d3f477SJiayu Hu 
36853d3f477SJiayu Hu 		dmas_id[dma_count++] = dev_id;
36953d3f477SJiayu Hu 
37053d3f477SJiayu Hu done:
371a543dcb7SXuan Ding 		(dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
372a543dcb7SXuan Ding 		(dma_info + socketid)->async_flag |= async_flag;
37353d3f477SJiayu Hu 		i++;
37453d3f477SJiayu Hu 	}
37553d3f477SJiayu Hu out:
37653d3f477SJiayu Hu 	free(input);
37753d3f477SJiayu Hu 	return ret;
3783a04ecb2SCheng Jiang }
3793a04ecb2SCheng Jiang 
380d19533e8SHuawei Xie /*
381d19533e8SHuawei Xie  * Builds up the correct configuration for VMDQ VLAN pool map
382d19533e8SHuawei Xie  * according to the pool & queue limits.
383d19533e8SHuawei Xie  */
384d19533e8SHuawei Xie static inline int
385d19533e8SHuawei Xie get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
386d19533e8SHuawei Xie {
387d19533e8SHuawei Xie 	struct rte_eth_vmdq_rx_conf conf;
38890924cafSOuyang Changchun 	struct rte_eth_vmdq_rx_conf *def_conf =
38990924cafSOuyang Changchun 		&vmdq_conf_default.rx_adv_conf.vmdq_rx_conf;
390d19533e8SHuawei Xie 	unsigned i;
391d19533e8SHuawei Xie 
392d19533e8SHuawei Xie 	memset(&conf, 0, sizeof(conf));
393d19533e8SHuawei Xie 	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
394d19533e8SHuawei Xie 	conf.nb_pool_maps = num_devices;
39590924cafSOuyang Changchun 	conf.enable_loop_back = def_conf->enable_loop_back;
39690924cafSOuyang Changchun 	conf.rx_mode = def_conf->rx_mode;
397d19533e8SHuawei Xie 
398d19533e8SHuawei Xie 	for (i = 0; i < conf.nb_pool_maps; i++) {
399d19533e8SHuawei Xie 		conf.pool_map[i].vlan_id = vlan_tags[ i ];
400d19533e8SHuawei Xie 		conf.pool_map[i].pools = (1UL << i);
401d19533e8SHuawei Xie 	}
402d19533e8SHuawei Xie 
403d19533e8SHuawei Xie 	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
404d19533e8SHuawei Xie 	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
405d19533e8SHuawei Xie 		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
406d19533e8SHuawei Xie 	return 0;
407d19533e8SHuawei Xie }
408d19533e8SHuawei Xie 
409d19533e8SHuawei Xie /*
410d19533e8SHuawei Xie  * Initialises a given port using global settings and with the rx buffers
411d19533e8SHuawei Xie  * coming from the mbuf_pool passed as parameter
412d19533e8SHuawei Xie  */
413d19533e8SHuawei Xie static inline int
414f8244c63SZhiyong Yang port_init(uint16_t port)
415d19533e8SHuawei Xie {
416d19533e8SHuawei Xie 	struct rte_eth_dev_info dev_info;
417d19533e8SHuawei Xie 	struct rte_eth_conf port_conf;
418db4014f2SHuawei Xie 	struct rte_eth_rxconf *rxconf;
419db4014f2SHuawei Xie 	struct rte_eth_txconf *txconf;
420db4014f2SHuawei Xie 	int16_t rx_rings, tx_rings;
421d19533e8SHuawei Xie 	uint16_t rx_ring_size, tx_ring_size;
422d19533e8SHuawei Xie 	int retval;
423d19533e8SHuawei Xie 	uint16_t q;
424d19533e8SHuawei Xie 
425d19533e8SHuawei Xie 	/* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
42637fb306cSIvan Ilchenko 	retval = rte_eth_dev_info_get(port, &dev_info);
42737fb306cSIvan Ilchenko 	if (retval != 0) {
42837fb306cSIvan Ilchenko 		RTE_LOG(ERR, VHOST_PORT,
42937fb306cSIvan Ilchenko 			"Error during getting device (port %u) info: %s\n",
43037fb306cSIvan Ilchenko 			port, strerror(-retval));
43137fb306cSIvan Ilchenko 
43237fb306cSIvan Ilchenko 		return retval;
43337fb306cSIvan Ilchenko 	}
4346f3d83c7SYuan Wang 	if (dev_info.max_vmdq_pools == 0) {
4356f3d83c7SYuan Wang 		RTE_LOG(ERR, VHOST_PORT, "Failed to get VMDq info.\n");
4366f3d83c7SYuan Wang 		return -1;
4376f3d83c7SYuan Wang 	}
438d19533e8SHuawei Xie 
439db4014f2SHuawei Xie 	rxconf = &dev_info.default_rxconf;
440db4014f2SHuawei Xie 	txconf = &dev_info.default_txconf;
441db4014f2SHuawei Xie 	rxconf->rx_drop_en = 1;
442f0adccd4SOuyang Changchun 
443d19533e8SHuawei Xie 	/*configure the number of supported virtio devices based on VMDQ limits */
444d19533e8SHuawei Xie 	num_devices = dev_info.max_vmdq_pools;
445d19533e8SHuawei Xie 
446d19533e8SHuawei Xie 	rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
447d19533e8SHuawei Xie 	tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
44800b8b706SYuanhan Liu 
449d19533e8SHuawei Xie 	tx_rings = (uint16_t)rte_lcore_count();
450d19533e8SHuawei Xie 
4515932109aSChenbo Xia 	if (mergeable) {
4525932109aSChenbo Xia 		if (dev_info.max_mtu != UINT16_MAX && dev_info.max_rx_pktlen > dev_info.max_mtu)
4535932109aSChenbo Xia 			vmdq_conf_default.rxmode.mtu = dev_info.max_mtu;
4545932109aSChenbo Xia 		else
4555932109aSChenbo Xia 			vmdq_conf_default.rxmode.mtu = MAX_MTU;
4565932109aSChenbo Xia 	}
4575932109aSChenbo Xia 
458d19533e8SHuawei Xie 	/* Get port configuration. */
459d19533e8SHuawei Xie 	retval = get_eth_conf(&port_conf, num_devices);
460d19533e8SHuawei Xie 	if (retval < 0)
461d19533e8SHuawei Xie 		return retval;
46284b02d16SHuawei Xie 	/* NIC queues are divided into pf queues and vmdq queues.  */
46384b02d16SHuawei Xie 	num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
46484b02d16SHuawei Xie 	queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
46584b02d16SHuawei Xie 	num_vmdq_queues = num_devices * queues_per_pool;
46684b02d16SHuawei Xie 	num_queues = num_pf_queues + num_vmdq_queues;
46784b02d16SHuawei Xie 	vmdq_queue_base = dev_info.vmdq_queue_base;
46884b02d16SHuawei Xie 	vmdq_pool_base  = dev_info.vmdq_pool_base;
46984b02d16SHuawei Xie 	printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
47084b02d16SHuawei Xie 		num_pf_queues, num_devices, queues_per_pool);
471d19533e8SHuawei Xie 
472a9dbe180SThomas Monjalon 	if (!rte_eth_dev_is_valid_port(port))
473a9dbe180SThomas Monjalon 		return -1;
474d19533e8SHuawei Xie 
47584b02d16SHuawei Xie 	rx_rings = (uint16_t)dev_info.max_rx_queues;
476295968d1SFerruh Yigit 	if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
477cc22d8caSShahaf Shuler 		port_conf.txmode.offloads |=
478295968d1SFerruh Yigit 			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
479d19533e8SHuawei Xie 	/* Configure ethernet device. */
480d19533e8SHuawei Xie 	retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
481bb7085b4SJianfeng Tan 	if (retval != 0) {
482bb7085b4SJianfeng Tan 		RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n",
483bb7085b4SJianfeng Tan 			port, strerror(-retval));
484d19533e8SHuawei Xie 		return retval;
485bb7085b4SJianfeng Tan 	}
486d19533e8SHuawei Xie 
48760efb44fSRoman Zhukov 	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size,
48860efb44fSRoman Zhukov 		&tx_ring_size);
48960efb44fSRoman Zhukov 	if (retval != 0) {
49060efb44fSRoman Zhukov 		RTE_LOG(ERR, VHOST_PORT, "Failed to adjust number of descriptors "
49160efb44fSRoman Zhukov 			"for port %u: %s.\n", port, strerror(-retval));
49260efb44fSRoman Zhukov 		return retval;
49360efb44fSRoman Zhukov 	}
49460efb44fSRoman Zhukov 	if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT) {
49560efb44fSRoman Zhukov 		RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size "
49660efb44fSRoman Zhukov 			"for Rx queues on port %u.\n", port);
49760efb44fSRoman Zhukov 		return -1;
49860efb44fSRoman Zhukov 	}
49960efb44fSRoman Zhukov 
500d19533e8SHuawei Xie 	/* Setup the queues. */
501cc22d8caSShahaf Shuler 	rxconf->offloads = port_conf.rxmode.offloads;
502d19533e8SHuawei Xie 	for (q = 0; q < rx_rings; q ++) {
503d19533e8SHuawei Xie 		retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
504db4014f2SHuawei Xie 						rte_eth_dev_socket_id(port),
505db4014f2SHuawei Xie 						rxconf,
50668363d85SYuanhan Liu 						mbuf_pool);
507bb7085b4SJianfeng Tan 		if (retval < 0) {
508bb7085b4SJianfeng Tan 			RTE_LOG(ERR, VHOST_PORT,
509bb7085b4SJianfeng Tan 				"Failed to setup rx queue %u of port %u: %s.\n",
510bb7085b4SJianfeng Tan 				q, port, strerror(-retval));
511d19533e8SHuawei Xie 			return retval;
512d19533e8SHuawei Xie 		}
513bb7085b4SJianfeng Tan 	}
514cc22d8caSShahaf Shuler 	txconf->offloads = port_conf.txmode.offloads;
515d19533e8SHuawei Xie 	for (q = 0; q < tx_rings; q ++) {
516d19533e8SHuawei Xie 		retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
517db4014f2SHuawei Xie 						rte_eth_dev_socket_id(port),
518db4014f2SHuawei Xie 						txconf);
519bb7085b4SJianfeng Tan 		if (retval < 0) {
520bb7085b4SJianfeng Tan 			RTE_LOG(ERR, VHOST_PORT,
521bb7085b4SJianfeng Tan 				"Failed to setup tx queue %u of port %u: %s.\n",
522bb7085b4SJianfeng Tan 				q, port, strerror(-retval));
523d19533e8SHuawei Xie 			return retval;
524d19533e8SHuawei Xie 		}
525bb7085b4SJianfeng Tan 	}
526d19533e8SHuawei Xie 
527d19533e8SHuawei Xie 	/* Start the device. */
528d19533e8SHuawei Xie 	retval  = rte_eth_dev_start(port);
529d19533e8SHuawei Xie 	if (retval < 0) {
530bb7085b4SJianfeng Tan 		RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n",
531bb7085b4SJianfeng Tan 			port, strerror(-retval));
532d19533e8SHuawei Xie 		return retval;
533d19533e8SHuawei Xie 	}
534d19533e8SHuawei Xie 
535f430bbceSIvan Ilchenko 	if (promiscuous) {
536f430bbceSIvan Ilchenko 		retval = rte_eth_promiscuous_enable(port);
537f430bbceSIvan Ilchenko 		if (retval != 0) {
538f430bbceSIvan Ilchenko 			RTE_LOG(ERR, VHOST_PORT,
539f430bbceSIvan Ilchenko 				"Failed to enable promiscuous mode on port %u: %s\n",
540f430bbceSIvan Ilchenko 				port, rte_strerror(-retval));
541f430bbceSIvan Ilchenko 			return retval;
542f430bbceSIvan Ilchenko 		}
543f430bbceSIvan Ilchenko 	}
54490924cafSOuyang Changchun 
54570febdcfSIgor Romanov 	retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
54670febdcfSIgor Romanov 	if (retval < 0) {
54770febdcfSIgor Romanov 		RTE_LOG(ERR, VHOST_PORT,
54870febdcfSIgor Romanov 			"Failed to get MAC address on port %u: %s\n",
54970febdcfSIgor Romanov 			port, rte_strerror(-retval));
55070febdcfSIgor Romanov 		return retval;
55170febdcfSIgor Romanov 	}
55270febdcfSIgor Romanov 
553d19533e8SHuawei Xie 	RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
554d19533e8SHuawei Xie 	RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
555d19533e8SHuawei Xie 		" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
556a7db3afcSAman Deep Singh 		port, RTE_ETHER_ADDR_BYTES(&vmdq_ports_eth_addr[port]));
557d19533e8SHuawei Xie 
558d19533e8SHuawei Xie 	return 0;
559d19533e8SHuawei Xie }
560d19533e8SHuawei Xie 
561d19533e8SHuawei Xie /*
562bde19a4dSJiayu Hu  * Set socket file path.
563d19533e8SHuawei Xie  */
564d19533e8SHuawei Xie static int
565bde19a4dSJiayu Hu us_vhost_parse_socket_path(const char *q_arg)
566d19533e8SHuawei Xie {
567d79035b7STiwei Bie 	char *old;
568d79035b7STiwei Bie 
569d19533e8SHuawei Xie 	/* parse number string */
570fa81d3b9SGang Jiang 	if (strnlen(q_arg, PATH_MAX) == PATH_MAX)
571d19533e8SHuawei Xie 		return -1;
572ad0eef4dSJiayu Hu 
573d79035b7STiwei Bie 	old = socket_files;
574ad0eef4dSJiayu Hu 	socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
575d79035b7STiwei Bie 	if (socket_files == NULL) {
576d79035b7STiwei Bie 		free(old);
577d79035b7STiwei Bie 		return -1;
578d79035b7STiwei Bie 	}
579d79035b7STiwei Bie 
580f9acaf84SBruce Richardson 	strlcpy(socket_files + nb_sockets * PATH_MAX, q_arg, PATH_MAX);
581ad0eef4dSJiayu Hu 	nb_sockets++;
582d19533e8SHuawei Xie 
583d19533e8SHuawei Xie 	return 0;
584d19533e8SHuawei Xie }
585d19533e8SHuawei Xie 
586d19533e8SHuawei Xie /*
587d19533e8SHuawei Xie  * Parse the portmask provided at run time.
588d19533e8SHuawei Xie  */
589d19533e8SHuawei Xie static int
590d19533e8SHuawei Xie parse_portmask(const char *portmask)
591d19533e8SHuawei Xie {
592d19533e8SHuawei Xie 	char *end = NULL;
593d19533e8SHuawei Xie 	unsigned long pm;
594d19533e8SHuawei Xie 
595d19533e8SHuawei Xie 	errno = 0;
596d19533e8SHuawei Xie 
597d19533e8SHuawei Xie 	/* parse hexadecimal string */
598d19533e8SHuawei Xie 	pm = strtoul(portmask, &end, 16);
599d19533e8SHuawei Xie 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
600ce6b8c31SSarosh Arif 		return 0;
601d19533e8SHuawei Xie 
602d19533e8SHuawei Xie 	return pm;
603d19533e8SHuawei Xie 
604d19533e8SHuawei Xie }
605d19533e8SHuawei Xie 
606d19533e8SHuawei Xie /*
607d19533e8SHuawei Xie  * Parse num options at run time.
608d19533e8SHuawei Xie  */
609d19533e8SHuawei Xie static int
610d19533e8SHuawei Xie parse_num_opt(const char *q_arg, uint32_t max_valid_value)
611d19533e8SHuawei Xie {
612d19533e8SHuawei Xie 	char *end = NULL;
613d19533e8SHuawei Xie 	unsigned long num;
614d19533e8SHuawei Xie 
615d19533e8SHuawei Xie 	errno = 0;
616d19533e8SHuawei Xie 
617d19533e8SHuawei Xie 	/* parse unsigned int string */
618d19533e8SHuawei Xie 	num = strtoul(q_arg, &end, 10);
619d19533e8SHuawei Xie 	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
620d19533e8SHuawei Xie 		return -1;
621d19533e8SHuawei Xie 
622d19533e8SHuawei Xie 	if (num > max_valid_value)
623d19533e8SHuawei Xie 		return -1;
624d19533e8SHuawei Xie 
625d19533e8SHuawei Xie 	return num;
626d19533e8SHuawei Xie 
627d19533e8SHuawei Xie }
628d19533e8SHuawei Xie 
629d19533e8SHuawei Xie /*
630d19533e8SHuawei Xie  * Display usage
631d19533e8SHuawei Xie  */
632d19533e8SHuawei Xie static void
633d19533e8SHuawei Xie us_vhost_usage(const char *prgname)
634d19533e8SHuawei Xie {
635d19533e8SHuawei Xie 	RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
636d19533e8SHuawei Xie 	"		--vm2vm [0|1|2]\n"
637d19533e8SHuawei Xie 	"		--rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
638bde19a4dSJiayu Hu 	"		--socket-file <path>\n"
639d19533e8SHuawei Xie 	"		--nb-devices ND\n"
640d19533e8SHuawei Xie 	"		-p PORTMASK: Set mask for ports to be used by application\n"
641d19533e8SHuawei Xie 	"		--vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
6427be78d02SJosh Soref 	"		--rx-retry [0|1]: disable/enable(default) retries on Rx. Enable retry if destination queue is full\n"
643d19533e8SHuawei Xie 	"		--rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
644d19533e8SHuawei Xie 	"		--rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
645d19533e8SHuawei Xie 	"		--mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
646d19533e8SHuawei Xie 	"		--stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
647bde19a4dSJiayu Hu 	"		--socket-file: The path of the socket file.\n"
6489fd72e3cSJijiang Liu 	"		--tx-csum [0|1] disable/enable TX checksum offload.\n"
6492345e3beSYuanhan Liu 	"		--tso [0|1] disable/enable TCP segment offload.\n"
6503a04ecb2SCheng Jiang 	"		--client register a vhost-user socket as client mode.\n"
651917229c2SWenwu Ma 	"		--dmas register dma channel for specific vhost device.\n"
652917229c2SWenwu Ma 	"		--total-num-mbufs [0-N] set the number of mbufs to be allocated in mbuf pools, the default value is 147456.\n",
653d19533e8SHuawei Xie 	       prgname);
654d19533e8SHuawei Xie }
655d19533e8SHuawei Xie 
656965b06f0SIbtisam Tariq enum {
657965b06f0SIbtisam Tariq #define OPT_VM2VM               "vm2vm"
658965b06f0SIbtisam Tariq 	OPT_VM2VM_NUM = 256,
659965b06f0SIbtisam Tariq #define OPT_RX_RETRY            "rx-retry"
660965b06f0SIbtisam Tariq 	OPT_RX_RETRY_NUM,
661965b06f0SIbtisam Tariq #define OPT_RX_RETRY_DELAY      "rx-retry-delay"
662965b06f0SIbtisam Tariq 	OPT_RX_RETRY_DELAY_NUM,
663965b06f0SIbtisam Tariq #define OPT_RX_RETRY_NUMB       "rx-retry-num"
664965b06f0SIbtisam Tariq 	OPT_RX_RETRY_NUMB_NUM,
665965b06f0SIbtisam Tariq #define OPT_MERGEABLE           "mergeable"
666965b06f0SIbtisam Tariq 	OPT_MERGEABLE_NUM,
667965b06f0SIbtisam Tariq #define OPT_STATS               "stats"
668965b06f0SIbtisam Tariq 	OPT_STATS_NUM,
669965b06f0SIbtisam Tariq #define OPT_SOCKET_FILE         "socket-file"
670965b06f0SIbtisam Tariq 	OPT_SOCKET_FILE_NUM,
671965b06f0SIbtisam Tariq #define OPT_TX_CSUM             "tx-csum"
672965b06f0SIbtisam Tariq 	OPT_TX_CSUM_NUM,
673965b06f0SIbtisam Tariq #define OPT_TSO                 "tso"
674965b06f0SIbtisam Tariq 	OPT_TSO_NUM,
675965b06f0SIbtisam Tariq #define OPT_CLIENT              "client"
676965b06f0SIbtisam Tariq 	OPT_CLIENT_NUM,
677965b06f0SIbtisam Tariq #define OPT_BUILTIN_NET_DRIVER  "builtin-net-driver"
678965b06f0SIbtisam Tariq 	OPT_BUILTIN_NET_DRIVER_NUM,
679965b06f0SIbtisam Tariq #define OPT_DMAS                "dmas"
680965b06f0SIbtisam Tariq 	OPT_DMAS_NUM,
681917229c2SWenwu Ma #define OPT_NUM_MBUFS           "total-num-mbufs"
682917229c2SWenwu Ma 	OPT_NUM_MBUFS_NUM,
683965b06f0SIbtisam Tariq };
684965b06f0SIbtisam Tariq 
685d19533e8SHuawei Xie /*
686d19533e8SHuawei Xie  * Parse the arguments given in the command line of the application.
687d19533e8SHuawei Xie  */
688d19533e8SHuawei Xie static int
689d19533e8SHuawei Xie us_vhost_parse_args(int argc, char **argv)
690d19533e8SHuawei Xie {
691d19533e8SHuawei Xie 	int opt, ret;
692d19533e8SHuawei Xie 	int option_index;
693d19533e8SHuawei Xie 	unsigned i;
694d19533e8SHuawei Xie 	const char *prgname = argv[0];
695d19533e8SHuawei Xie 	static struct option long_option[] = {
696965b06f0SIbtisam Tariq 		{OPT_VM2VM, required_argument,
697965b06f0SIbtisam Tariq 				NULL, OPT_VM2VM_NUM},
698965b06f0SIbtisam Tariq 		{OPT_RX_RETRY, required_argument,
699965b06f0SIbtisam Tariq 				NULL, OPT_RX_RETRY_NUM},
700965b06f0SIbtisam Tariq 		{OPT_RX_RETRY_DELAY, required_argument,
701965b06f0SIbtisam Tariq 				NULL, OPT_RX_RETRY_DELAY_NUM},
702965b06f0SIbtisam Tariq 		{OPT_RX_RETRY_NUMB, required_argument,
703965b06f0SIbtisam Tariq 				NULL, OPT_RX_RETRY_NUMB_NUM},
704965b06f0SIbtisam Tariq 		{OPT_MERGEABLE, required_argument,
705965b06f0SIbtisam Tariq 				NULL, OPT_MERGEABLE_NUM},
706965b06f0SIbtisam Tariq 		{OPT_STATS, required_argument,
707965b06f0SIbtisam Tariq 				NULL, OPT_STATS_NUM},
708965b06f0SIbtisam Tariq 		{OPT_SOCKET_FILE, required_argument,
709965b06f0SIbtisam Tariq 				NULL, OPT_SOCKET_FILE_NUM},
710965b06f0SIbtisam Tariq 		{OPT_TX_CSUM, required_argument,
711965b06f0SIbtisam Tariq 				NULL, OPT_TX_CSUM_NUM},
712965b06f0SIbtisam Tariq 		{OPT_TSO, required_argument,
713965b06f0SIbtisam Tariq 				NULL, OPT_TSO_NUM},
714965b06f0SIbtisam Tariq 		{OPT_CLIENT, no_argument,
715965b06f0SIbtisam Tariq 				NULL, OPT_CLIENT_NUM},
716965b06f0SIbtisam Tariq 		{OPT_BUILTIN_NET_DRIVER, no_argument,
717965b06f0SIbtisam Tariq 				NULL, OPT_BUILTIN_NET_DRIVER_NUM},
718965b06f0SIbtisam Tariq 		{OPT_DMAS, required_argument,
719965b06f0SIbtisam Tariq 				NULL, OPT_DMAS_NUM},
720917229c2SWenwu Ma 		{OPT_NUM_MBUFS, required_argument,
721917229c2SWenwu Ma 				NULL, OPT_NUM_MBUFS_NUM},
722d19533e8SHuawei Xie 		{NULL, 0, 0, 0},
723d19533e8SHuawei Xie 	};
724d19533e8SHuawei Xie 
725d19533e8SHuawei Xie 	/* Parse command line */
72690924cafSOuyang Changchun 	while ((opt = getopt_long(argc, argv, "p:P",
72790924cafSOuyang Changchun 			long_option, &option_index)) != EOF) {
728d19533e8SHuawei Xie 		switch (opt) {
729d19533e8SHuawei Xie 		/* Portmask */
730d19533e8SHuawei Xie 		case 'p':
731d19533e8SHuawei Xie 			enabled_port_mask = parse_portmask(optarg);
732d19533e8SHuawei Xie 			if (enabled_port_mask == 0) {
733d19533e8SHuawei Xie 				RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
734d19533e8SHuawei Xie 				us_vhost_usage(prgname);
735d19533e8SHuawei Xie 				return -1;
736d19533e8SHuawei Xie 			}
737d19533e8SHuawei Xie 			break;
738d19533e8SHuawei Xie 
73990924cafSOuyang Changchun 		case 'P':
74090924cafSOuyang Changchun 			promiscuous = 1;
74190924cafSOuyang Changchun 			vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
742295968d1SFerruh Yigit 				RTE_ETH_VMDQ_ACCEPT_BROADCAST |
743295968d1SFerruh Yigit 				RTE_ETH_VMDQ_ACCEPT_MULTICAST;
74490924cafSOuyang Changchun 			break;
74590924cafSOuyang Changchun 
746965b06f0SIbtisam Tariq 		case OPT_VM2VM_NUM:
747d19533e8SHuawei Xie 			ret = parse_num_opt(optarg, (VM2VM_LAST - 1));
748d19533e8SHuawei Xie 			if (ret == -1) {
749d19533e8SHuawei Xie 				RTE_LOG(INFO, VHOST_CONFIG,
750d19533e8SHuawei Xie 					"Invalid argument for "
751d19533e8SHuawei Xie 					"vm2vm [0|1|2]\n");
752d19533e8SHuawei Xie 				us_vhost_usage(prgname);
753d19533e8SHuawei Xie 				return -1;
754965b06f0SIbtisam Tariq 			}
755d19533e8SHuawei Xie 			vm2vm_mode = (vm2vm_type)ret;
756965b06f0SIbtisam Tariq 			break;
757d19533e8SHuawei Xie 
758965b06f0SIbtisam Tariq 		case OPT_RX_RETRY_NUM:
759d19533e8SHuawei Xie 			ret = parse_num_opt(optarg, 1);
760d19533e8SHuawei Xie 			if (ret == -1) {
761d19533e8SHuawei Xie 				RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n");
762d19533e8SHuawei Xie 				us_vhost_usage(prgname);
763d19533e8SHuawei Xie 				return -1;
764965b06f0SIbtisam Tariq 			}
765d19533e8SHuawei Xie 			enable_retry = ret;
766965b06f0SIbtisam Tariq 			break;
767d19533e8SHuawei Xie 
768965b06f0SIbtisam Tariq 		case OPT_TX_CSUM_NUM:
7699fd72e3cSJijiang Liu 			ret = parse_num_opt(optarg, 1);
7709fd72e3cSJijiang Liu 			if (ret == -1) {
7719fd72e3cSJijiang Liu 				RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
7729fd72e3cSJijiang Liu 				us_vhost_usage(prgname);
7739fd72e3cSJijiang Liu 				return -1;
7749fd72e3cSJijiang Liu 			}
775965b06f0SIbtisam Tariq 			enable_tx_csum = ret;
776965b06f0SIbtisam Tariq 			break;
7779fd72e3cSJijiang Liu 
778965b06f0SIbtisam Tariq 		case OPT_TSO_NUM:
7799fd72e3cSJijiang Liu 			ret = parse_num_opt(optarg, 1);
7809fd72e3cSJijiang Liu 			if (ret == -1) {
7819fd72e3cSJijiang Liu 				RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
7829fd72e3cSJijiang Liu 				us_vhost_usage(prgname);
7839fd72e3cSJijiang Liu 				return -1;
7849fd72e3cSJijiang Liu 			}
785965b06f0SIbtisam Tariq 			enable_tso = ret;
786965b06f0SIbtisam Tariq 			break;
7879fd72e3cSJijiang Liu 
788965b06f0SIbtisam Tariq 		case OPT_RX_RETRY_DELAY_NUM:
789d19533e8SHuawei Xie 			ret = parse_num_opt(optarg, INT32_MAX);
790d19533e8SHuawei Xie 			if (ret == -1) {
791d19533e8SHuawei Xie 				RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
792d19533e8SHuawei Xie 				us_vhost_usage(prgname);
793d19533e8SHuawei Xie 				return -1;
794965b06f0SIbtisam Tariq 			}
795d19533e8SHuawei Xie 			burst_rx_delay_time = ret;
796965b06f0SIbtisam Tariq 			break;
797d19533e8SHuawei Xie 
798965b06f0SIbtisam Tariq 		case OPT_RX_RETRY_NUMB_NUM:
799d19533e8SHuawei Xie 			ret = parse_num_opt(optarg, INT32_MAX);
800d19533e8SHuawei Xie 			if (ret == -1) {
801d19533e8SHuawei Xie 				RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
802d19533e8SHuawei Xie 				us_vhost_usage(prgname);
803d19533e8SHuawei Xie 				return -1;
804965b06f0SIbtisam Tariq 			}
805d19533e8SHuawei Xie 			burst_rx_retry_num = ret;
806965b06f0SIbtisam Tariq 			break;
807d19533e8SHuawei Xie 
808965b06f0SIbtisam Tariq 		case OPT_MERGEABLE_NUM:
809d19533e8SHuawei Xie 			ret = parse_num_opt(optarg, 1);
810d19533e8SHuawei Xie 			if (ret == -1) {
811d19533e8SHuawei Xie 				RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n");
812d19533e8SHuawei Xie 				us_vhost_usage(prgname);
813d19533e8SHuawei Xie 				return -1;
814965b06f0SIbtisam Tariq 			}
81528deb020SHuawei Xie 			mergeable = !!ret;
816965b06f0SIbtisam Tariq 			break;
817d19533e8SHuawei Xie 
818965b06f0SIbtisam Tariq 		case OPT_STATS_NUM:
819d19533e8SHuawei Xie 			ret = parse_num_opt(optarg, INT32_MAX);
820d19533e8SHuawei Xie 			if (ret == -1) {
821bde19a4dSJiayu Hu 				RTE_LOG(INFO, VHOST_CONFIG,
822bde19a4dSJiayu Hu 					"Invalid argument for stats [0..N]\n");
823d19533e8SHuawei Xie 				us_vhost_usage(prgname);
824d19533e8SHuawei Xie 				return -1;
825965b06f0SIbtisam Tariq 			}
826d19533e8SHuawei Xie 			enable_stats = ret;
827965b06f0SIbtisam Tariq 			break;
828d19533e8SHuawei Xie 
829bde19a4dSJiayu Hu 		/* Set socket file path. */
830965b06f0SIbtisam Tariq 		case OPT_SOCKET_FILE_NUM:
831bde19a4dSJiayu Hu 			if (us_vhost_parse_socket_path(optarg) == -1) {
832bde19a4dSJiayu Hu 				RTE_LOG(INFO, VHOST_CONFIG,
833bde19a4dSJiayu Hu 				"Invalid argument for socket name (Max %d characters)\n",
834bde19a4dSJiayu Hu 				PATH_MAX);
835d19533e8SHuawei Xie 				us_vhost_usage(prgname);
836d19533e8SHuawei Xie 				return -1;
837d19533e8SHuawei Xie 			}
838965b06f0SIbtisam Tariq 			break;
839d19533e8SHuawei Xie 
840965b06f0SIbtisam Tariq 		case OPT_DMAS_NUM:
8413a04ecb2SCheng Jiang 			if (open_dma(optarg) == -1) {
8423a04ecb2SCheng Jiang 				RTE_LOG(INFO, VHOST_CONFIG,
8433a04ecb2SCheng Jiang 					"Wrong DMA args\n");
8443a04ecb2SCheng Jiang 				us_vhost_usage(prgname);
8453a04ecb2SCheng Jiang 				return -1;
8463a04ecb2SCheng Jiang 			}
847965b06f0SIbtisam Tariq 			break;
8483a04ecb2SCheng Jiang 
849917229c2SWenwu Ma 		case OPT_NUM_MBUFS_NUM:
850917229c2SWenwu Ma 			ret = parse_num_opt(optarg, INT32_MAX);
851917229c2SWenwu Ma 			if (ret == -1) {
852917229c2SWenwu Ma 				RTE_LOG(INFO, VHOST_CONFIG,
853917229c2SWenwu Ma 					"Invalid argument for total-num-mbufs [0..N]\n");
854917229c2SWenwu Ma 				us_vhost_usage(prgname);
855917229c2SWenwu Ma 				return -1;
856917229c2SWenwu Ma 			}
857917229c2SWenwu Ma 
858917229c2SWenwu Ma 			if (total_num_mbufs < ret)
859917229c2SWenwu Ma 				total_num_mbufs = ret;
860917229c2SWenwu Ma 			break;
861917229c2SWenwu Ma 
862965b06f0SIbtisam Tariq 		case OPT_CLIENT_NUM:
863965b06f0SIbtisam Tariq 			client_mode = 1;
864965b06f0SIbtisam Tariq 			break;
865965b06f0SIbtisam Tariq 
866965b06f0SIbtisam Tariq 		case OPT_BUILTIN_NET_DRIVER_NUM:
867965b06f0SIbtisam Tariq 			builtin_net_driver = 1;
868d19533e8SHuawei Xie 			break;
869d19533e8SHuawei Xie 
870d19533e8SHuawei Xie 		/* Invalid option - print options. */
871d19533e8SHuawei Xie 		default:
872d19533e8SHuawei Xie 			us_vhost_usage(prgname);
873d19533e8SHuawei Xie 			return -1;
874d19533e8SHuawei Xie 		}
875d19533e8SHuawei Xie 	}
876d19533e8SHuawei Xie 
877d19533e8SHuawei Xie 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
878d19533e8SHuawei Xie 		if (enabled_port_mask & (1 << i))
879f8244c63SZhiyong Yang 			ports[num_ports++] = i;
880d19533e8SHuawei Xie 	}
881d19533e8SHuawei Xie 
882d19533e8SHuawei Xie 	if ((num_ports ==  0) || (num_ports > MAX_SUP_PORTS)) {
883d19533e8SHuawei Xie 		RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
884d19533e8SHuawei Xie 			"but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
885d19533e8SHuawei Xie 		return -1;
886d19533e8SHuawei Xie 	}
887d19533e8SHuawei Xie 
888d19533e8SHuawei Xie 	return 0;
889d19533e8SHuawei Xie }
890d19533e8SHuawei Xie 
891d19533e8SHuawei Xie /*
892d19533e8SHuawei Xie  * Update the global var NUM_PORTS and array PORTS according to system ports number
893d19533e8SHuawei Xie  * and return valid ports number
894d19533e8SHuawei Xie  */
895d19533e8SHuawei Xie static unsigned check_ports_num(unsigned nb_ports)
896d19533e8SHuawei Xie {
897d19533e8SHuawei Xie 	unsigned valid_num_ports = num_ports;
898d19533e8SHuawei Xie 	unsigned portid;
899d19533e8SHuawei Xie 
900d19533e8SHuawei Xie 	if (num_ports > nb_ports) {
901d19533e8SHuawei Xie 		RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
902d19533e8SHuawei Xie 			num_ports, nb_ports);
903d19533e8SHuawei Xie 		num_ports = nb_ports;
904d19533e8SHuawei Xie 	}
905d19533e8SHuawei Xie 
906d19533e8SHuawei Xie 	for (portid = 0; portid < num_ports; portid ++) {
907a9dbe180SThomas Monjalon 		if (!rte_eth_dev_is_valid_port(ports[portid])) {
908a9dbe180SThomas Monjalon 			RTE_LOG(INFO, VHOST_PORT,
909a9dbe180SThomas Monjalon 				"\nSpecified port ID(%u) is not valid\n",
910a9dbe180SThomas Monjalon 				ports[portid]);
911d19533e8SHuawei Xie 			ports[portid] = INVALID_PORT_ID;
912d19533e8SHuawei Xie 			valid_num_ports--;
913d19533e8SHuawei Xie 		}
914d19533e8SHuawei Xie 	}
915d19533e8SHuawei Xie 	return valid_num_ports;
916d19533e8SHuawei Xie }
917d19533e8SHuawei Xie 
918c0583d98SJerin Jacob static __rte_always_inline struct vhost_dev *
9196d13ea8eSOlivier Matz find_vhost_dev(struct rte_ether_addr *mac)
92045657a5cSYuanhan Liu {
92145657a5cSYuanhan Liu 	struct vhost_dev *vdev;
92245657a5cSYuanhan Liu 
92397daf19eSYuanhan Liu 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
92445657a5cSYuanhan Liu 		if (vdev->ready == DEVICE_RX &&
925538da7a1SOlivier Matz 		    rte_is_same_ether_addr(mac, &vdev->mac_address))
92645657a5cSYuanhan Liu 			return vdev;
92745657a5cSYuanhan Liu 	}
92845657a5cSYuanhan Liu 
92945657a5cSYuanhan Liu 	return NULL;
93045657a5cSYuanhan Liu }
93145657a5cSYuanhan Liu 
932d19533e8SHuawei Xie /*
933d19533e8SHuawei Xie  * This function learns the MAC address of the device and registers this along with a
934d19533e8SHuawei Xie  * vlan tag to a VMDQ.
935d19533e8SHuawei Xie  */
936d19533e8SHuawei Xie static int
937e571e6b4SHuawei Xie link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
938d19533e8SHuawei Xie {
9396d13ea8eSOlivier Matz 	struct rte_ether_hdr *pkt_hdr;
940d19533e8SHuawei Xie 	int i, ret;
941d19533e8SHuawei Xie 
942d19533e8SHuawei Xie 	/* Learn MAC address of guest device from packet */
9436d13ea8eSOlivier Matz 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
944d19533e8SHuawei Xie 
94504d43857SDmitry Kozlyuk 	if (find_vhost_dev(&pkt_hdr->src_addr)) {
94645657a5cSYuanhan Liu 		RTE_LOG(ERR, VHOST_DATA,
947c08a3490SYuanhan Liu 			"(%d) device is using a registered MAC!\n",
948e2a1dd12SYuanhan Liu 			vdev->vid);
949d19533e8SHuawei Xie 		return -1;
950d19533e8SHuawei Xie 	}
951d19533e8SHuawei Xie 
95235b2d13fSOlivier Matz 	for (i = 0; i < RTE_ETHER_ADDR_LEN; i++)
95304d43857SDmitry Kozlyuk 		vdev->mac_address.addr_bytes[i] =
95404d43857SDmitry Kozlyuk 			pkt_hdr->src_addr.addr_bytes[i];
955d19533e8SHuawei Xie 
956d19533e8SHuawei Xie 	/* vlan_tag currently uses the device_id. */
957e2a1dd12SYuanhan Liu 	vdev->vlan_tag = vlan_tags[vdev->vid];
958d19533e8SHuawei Xie 
959d19533e8SHuawei Xie 	/* Print out VMDQ registration info. */
960c08a3490SYuanhan Liu 	RTE_LOG(INFO, VHOST_DATA,
961c2c4f87bSAman Deep Singh 		"(%d) mac " RTE_ETHER_ADDR_PRT_FMT " and vlan %d registered\n",
962a7db3afcSAman Deep Singh 		vdev->vid, RTE_ETHER_ADDR_BYTES(&vdev->mac_address),
963e571e6b4SHuawei Xie 		vdev->vlan_tag);
964d19533e8SHuawei Xie 
965d19533e8SHuawei Xie 	/* Register the MAC address. */
96684b02d16SHuawei Xie 	ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
967e2a1dd12SYuanhan Liu 				(uint32_t)vdev->vid + vmdq_pool_base);
968d19533e8SHuawei Xie 	if (ret)
969c08a3490SYuanhan Liu 		RTE_LOG(ERR, VHOST_DATA,
970c08a3490SYuanhan Liu 			"(%d) failed to add device MAC address to VMDQ\n",
971e2a1dd12SYuanhan Liu 			vdev->vid);
972d19533e8SHuawei Xie 
97365453928SJianfeng Tan 	rte_eth_dev_set_vlan_strip_on_queue(ports[0], vdev->vmdq_rx_q, 1);
974d19533e8SHuawei Xie 
975d19533e8SHuawei Xie 	/* Set device as ready for RX. */
976e571e6b4SHuawei Xie 	vdev->ready = DEVICE_RX;
977d19533e8SHuawei Xie 
978d19533e8SHuawei Xie 	return 0;
979d19533e8SHuawei Xie }
980d19533e8SHuawei Xie 
981d19533e8SHuawei Xie /*
982d19533e8SHuawei Xie  * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
983d19533e8SHuawei Xie  * queue before disabling RX on the device.
984d19533e8SHuawei Xie  */
985d19533e8SHuawei Xie static inline void
986e571e6b4SHuawei Xie unlink_vmdq(struct vhost_dev *vdev)
987d19533e8SHuawei Xie {
988d19533e8SHuawei Xie 	unsigned i = 0;
989d19533e8SHuawei Xie 	unsigned rx_count;
990d19533e8SHuawei Xie 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
991d19533e8SHuawei Xie 
992e571e6b4SHuawei Xie 	if (vdev->ready == DEVICE_RX) {
993d19533e8SHuawei Xie 		/*clear MAC and VLAN settings*/
994e571e6b4SHuawei Xie 		rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
995d19533e8SHuawei Xie 		for (i = 0; i < 6; i++)
996e571e6b4SHuawei Xie 			vdev->mac_address.addr_bytes[i] = 0;
997d19533e8SHuawei Xie 
998e571e6b4SHuawei Xie 		vdev->vlan_tag = 0;
999d19533e8SHuawei Xie 
1000d19533e8SHuawei Xie 		/*Clear out the receive buffers*/
1001d19533e8SHuawei Xie 		rx_count = rte_eth_rx_burst(ports[0],
1002e571e6b4SHuawei Xie 					(uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
1003d19533e8SHuawei Xie 
1004d19533e8SHuawei Xie 		while (rx_count) {
1005d19533e8SHuawei Xie 			for (i = 0; i < rx_count; i++)
1006d19533e8SHuawei Xie 				rte_pktmbuf_free(pkts_burst[i]);
1007d19533e8SHuawei Xie 
1008d19533e8SHuawei Xie 			rx_count = rte_eth_rx_burst(ports[0],
1009e571e6b4SHuawei Xie 					(uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
1010d19533e8SHuawei Xie 		}
1011d19533e8SHuawei Xie 
1012e571e6b4SHuawei Xie 		vdev->ready = DEVICE_MAC_LEARNING;
1013d19533e8SHuawei Xie 	}
1014d19533e8SHuawei Xie }
1015d19533e8SHuawei Xie 
1016a68ba8e0SCheng Jiang static inline void
1017a68ba8e0SCheng Jiang free_pkts(struct rte_mbuf **pkts, uint16_t n)
1018a68ba8e0SCheng Jiang {
1019a68ba8e0SCheng Jiang 	while (n--)
1020a68ba8e0SCheng Jiang 		rte_pktmbuf_free(pkts[n]);
1021a68ba8e0SCheng Jiang }
1022a68ba8e0SCheng Jiang 
1023c0583d98SJerin Jacob static __rte_always_inline void
1024a68ba8e0SCheng Jiang complete_async_pkts(struct vhost_dev *vdev)
1025a68ba8e0SCheng Jiang {
1026a68ba8e0SCheng Jiang 	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
1027a68ba8e0SCheng Jiang 	uint16_t complete_count;
1028a543dcb7SXuan Ding 	int16_t dma_id = dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].dev_id;
1029a68ba8e0SCheng Jiang 
1030a68ba8e0SCheng Jiang 	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
103153d3f477SJiayu Hu 					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST, dma_id, 0);
1032a548f7d5SXuan Ding 	if (complete_count)
1033a68ba8e0SCheng Jiang 		free_pkts(p_cpl, complete_count);
1034b9f23beeSCheng Jiang 
1035a68ba8e0SCheng Jiang }
1036a68ba8e0SCheng Jiang 
1037a68ba8e0SCheng Jiang static __rte_always_inline void
1038a68ba8e0SCheng Jiang sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
10399c5ef512SYuanhan Liu 	    struct rte_mbuf *m)
10409c5ef512SYuanhan Liu {
10419c5ef512SYuanhan Liu 	uint16_t ret;
10429c5ef512SYuanhan Liu 
1043ca059fa5SYuanhan Liu 	if (builtin_net_driver) {
1044ca059fa5SYuanhan Liu 		ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
1045ca059fa5SYuanhan Liu 	} else {
10464ecf22e3SYuanhan Liu 		ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
1047ca059fa5SYuanhan Liu 	}
1048ca059fa5SYuanhan Liu 
10499c5ef512SYuanhan Liu 	if (enable_stats) {
1050a68ba8e0SCheng Jiang 		__atomic_add_fetch(&dst_vdev->stats.rx_total_atomic, 1,
1051a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
1052a68ba8e0SCheng Jiang 		__atomic_add_fetch(&dst_vdev->stats.rx_atomic, ret,
1053a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
105456fe86f8SYuanhan Liu 		src_vdev->stats.tx_total++;
105556fe86f8SYuanhan Liu 		src_vdev->stats.tx += ret;
10569c5ef512SYuanhan Liu 	}
10579c5ef512SYuanhan Liu }
10589c5ef512SYuanhan Liu 
1059a68ba8e0SCheng Jiang static __rte_always_inline void
1060a68ba8e0SCheng Jiang drain_vhost(struct vhost_dev *vdev)
1061a68ba8e0SCheng Jiang {
1062a68ba8e0SCheng Jiang 	uint16_t ret;
106353d3f477SJiayu Hu 	uint32_t buff_idx = rte_lcore_id() * RTE_MAX_VHOST_DEVICE + vdev->vid;
1064a68ba8e0SCheng Jiang 	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
1065a68ba8e0SCheng Jiang 	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
1066a68ba8e0SCheng Jiang 
1067a543dcb7SXuan Ding 	ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev, VIRTIO_RXQ, m, nr_xmit);
1068a68ba8e0SCheng Jiang 
1069a68ba8e0SCheng Jiang 	if (enable_stats) {
1070a68ba8e0SCheng Jiang 		__atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
1071a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
1072a68ba8e0SCheng Jiang 		__atomic_add_fetch(&vdev->stats.rx_atomic, ret,
1073a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
1074a68ba8e0SCheng Jiang 	}
1075a68ba8e0SCheng Jiang 
1076a543dcb7SXuan Ding 	if (!dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].async_enabled)
1077a68ba8e0SCheng Jiang 		free_pkts(m, nr_xmit);
1078a68ba8e0SCheng Jiang }
1079a68ba8e0SCheng Jiang 
1080a68ba8e0SCheng Jiang static __rte_always_inline void
1081a68ba8e0SCheng Jiang drain_vhost_table(void)
1082a68ba8e0SCheng Jiang {
1083a68ba8e0SCheng Jiang 	uint16_t lcore_id = rte_lcore_id();
1084a68ba8e0SCheng Jiang 	struct vhost_bufftable *vhost_txq;
1085a68ba8e0SCheng Jiang 	struct vhost_dev *vdev;
1086a68ba8e0SCheng Jiang 	uint64_t cur_tsc;
1087a68ba8e0SCheng Jiang 
1088a68ba8e0SCheng Jiang 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1089ad5050e4SWenwu Ma 		if (unlikely(vdev->remove == 1))
1090ad5050e4SWenwu Ma 			continue;
1091ad5050e4SWenwu Ma 
109253d3f477SJiayu Hu 		vhost_txq = vhost_txbuff[lcore_id * RTE_MAX_VHOST_DEVICE + vdev->vid];
1093a68ba8e0SCheng Jiang 
1094a68ba8e0SCheng Jiang 		cur_tsc = rte_rdtsc();
1095a68ba8e0SCheng Jiang 		if (unlikely(cur_tsc - vhost_txq->pre_tsc
1096a68ba8e0SCheng Jiang 				> MBUF_TABLE_DRAIN_TSC)) {
1097a68ba8e0SCheng Jiang 			RTE_LOG_DP(DEBUG, VHOST_DATA,
1098a68ba8e0SCheng Jiang 				"Vhost TX queue drained after timeout with burst size %u\n",
1099a68ba8e0SCheng Jiang 				vhost_txq->len);
1100a68ba8e0SCheng Jiang 			drain_vhost(vdev);
1101a68ba8e0SCheng Jiang 			vhost_txq->len = 0;
1102a68ba8e0SCheng Jiang 			vhost_txq->pre_tsc = cur_tsc;
1103a68ba8e0SCheng Jiang 		}
1104a68ba8e0SCheng Jiang 	}
1105a68ba8e0SCheng Jiang }
1106a68ba8e0SCheng Jiang 
1107d19533e8SHuawei Xie /*
1108d19533e8SHuawei Xie  * Check if the packet destination MAC address is for a local device. If so then put
1109d19533e8SHuawei Xie  * the packet on that devices RX queue. If not then return.
1110d19533e8SHuawei Xie  */
1111c0583d98SJerin Jacob static __rte_always_inline int
1112e571e6b4SHuawei Xie virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
1113d19533e8SHuawei Xie {
11146d13ea8eSOlivier Matz 	struct rte_ether_hdr *pkt_hdr;
111545657a5cSYuanhan Liu 	struct vhost_dev *dst_vdev;
1116a68ba8e0SCheng Jiang 	struct vhost_bufftable *vhost_txq;
1117a68ba8e0SCheng Jiang 	uint16_t lcore_id = rte_lcore_id();
11186d13ea8eSOlivier Matz 	pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1119d19533e8SHuawei Xie 
112004d43857SDmitry Kozlyuk 	dst_vdev = find_vhost_dev(&pkt_hdr->dst_addr);
112145657a5cSYuanhan Liu 	if (!dst_vdev)
1122d19533e8SHuawei Xie 		return -1;
112345657a5cSYuanhan Liu 
1124e2a1dd12SYuanhan Liu 	if (vdev->vid == dst_vdev->vid) {
11255d8f0bafSOlivier Matz 		RTE_LOG_DP(DEBUG, VHOST_DATA,
1126c08a3490SYuanhan Liu 			"(%d) TX: src and dst MAC is same. Dropping packet.\n",
1127e2a1dd12SYuanhan Liu 			vdev->vid);
112845657a5cSYuanhan Liu 		return 0;
112945657a5cSYuanhan Liu 	}
113045657a5cSYuanhan Liu 
11315d8f0bafSOlivier Matz 	RTE_LOG_DP(DEBUG, VHOST_DATA,
1132e2a1dd12SYuanhan Liu 		"(%d) TX: MAC address is local\n", dst_vdev->vid);
113345657a5cSYuanhan Liu 
113445657a5cSYuanhan Liu 	if (unlikely(dst_vdev->remove)) {
11355d8f0bafSOlivier Matz 		RTE_LOG_DP(DEBUG, VHOST_DATA,
1136e2a1dd12SYuanhan Liu 			"(%d) device is marked for removal\n", dst_vdev->vid);
113745657a5cSYuanhan Liu 		return 0;
113845657a5cSYuanhan Liu 	}
113945657a5cSYuanhan Liu 
114053d3f477SJiayu Hu 	vhost_txq = vhost_txbuff[lcore_id * RTE_MAX_VHOST_DEVICE + dst_vdev->vid];
1141a68ba8e0SCheng Jiang 	vhost_txq->m_table[vhost_txq->len++] = m;
1142a68ba8e0SCheng Jiang 
1143a68ba8e0SCheng Jiang 	if (enable_stats) {
1144a68ba8e0SCheng Jiang 		vdev->stats.tx_total++;
1145a68ba8e0SCheng Jiang 		vdev->stats.tx++;
1146a68ba8e0SCheng Jiang 	}
1147a68ba8e0SCheng Jiang 
1148a68ba8e0SCheng Jiang 	if (unlikely(vhost_txq->len == MAX_PKT_BURST)) {
1149a68ba8e0SCheng Jiang 		drain_vhost(dst_vdev);
1150a68ba8e0SCheng Jiang 		vhost_txq->len = 0;
1151a68ba8e0SCheng Jiang 		vhost_txq->pre_tsc = rte_rdtsc();
1152a68ba8e0SCheng Jiang 	}
115345657a5cSYuanhan Liu 	return 0;
1154d19533e8SHuawei Xie }
1155d19533e8SHuawei Xie 
1156d19533e8SHuawei Xie /*
115772ec8d77SOuyang Changchun  * Check if the destination MAC of a packet is one local VM,
115872ec8d77SOuyang Changchun  * and get its vlan tag, and offset if it is.
1159d19533e8SHuawei Xie  */
1160c0583d98SJerin Jacob static __rte_always_inline int
11617f262239SYuanhan Liu find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
116272ec8d77SOuyang Changchun 	uint32_t *offset, uint16_t *vlan_tag)
1163d19533e8SHuawei Xie {
116445657a5cSYuanhan Liu 	struct vhost_dev *dst_vdev;
11656d13ea8eSOlivier Matz 	struct rte_ether_hdr *pkt_hdr =
11666d13ea8eSOlivier Matz 		rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1167d19533e8SHuawei Xie 
116804d43857SDmitry Kozlyuk 	dst_vdev = find_vhost_dev(&pkt_hdr->dst_addr);
116945657a5cSYuanhan Liu 	if (!dst_vdev)
117045657a5cSYuanhan Liu 		return 0;
117145657a5cSYuanhan Liu 
1172e2a1dd12SYuanhan Liu 	if (vdev->vid == dst_vdev->vid) {
11735d8f0bafSOlivier Matz 		RTE_LOG_DP(DEBUG, VHOST_DATA,
1174c08a3490SYuanhan Liu 			"(%d) TX: src and dst MAC is same. Dropping packet.\n",
1175e2a1dd12SYuanhan Liu 			vdev->vid);
117672ec8d77SOuyang Changchun 		return -1;
1177d19533e8SHuawei Xie 	}
1178e44fb8a4SOuyang Changchun 
1179e44fb8a4SOuyang Changchun 	/*
1180e44fb8a4SOuyang Changchun 	 * HW vlan strip will reduce the packet length
1181e44fb8a4SOuyang Changchun 	 * by minus length of vlan tag, so need restore
1182e44fb8a4SOuyang Changchun 	 * the packet length by plus it.
1183e44fb8a4SOuyang Changchun 	 */
118425cf2630SFerruh Yigit 	*offset  = RTE_VLAN_HLEN;
1185e2a1dd12SYuanhan Liu 	*vlan_tag = vlan_tags[vdev->vid];
1186d19533e8SHuawei Xie 
11875d8f0bafSOlivier Matz 	RTE_LOG_DP(DEBUG, VHOST_DATA,
11887f262239SYuanhan Liu 		"(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n",
1189e2a1dd12SYuanhan Liu 		vdev->vid, dst_vdev->vid, *vlan_tag);
1190d19533e8SHuawei Xie 
119172ec8d77SOuyang Changchun 	return 0;
119272ec8d77SOuyang Changchun }
119372ec8d77SOuyang Changchun 
11949fd72e3cSJijiang Liu static void virtio_tx_offload(struct rte_mbuf *m)
11959fd72e3cSJijiang Liu {
1196ca7036b4SDavid Marchand 	struct rte_net_hdr_lens hdr_lens;
1197ca7036b4SDavid Marchand 	struct rte_ipv4_hdr *ipv4_hdr;
1198ca7036b4SDavid Marchand 	struct rte_tcp_hdr *tcp_hdr;
1199ca7036b4SDavid Marchand 	uint32_t ptype;
12009fd72e3cSJijiang Liu 	void *l3_hdr;
12019fd72e3cSJijiang Liu 
1202ca7036b4SDavid Marchand 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
1203ca7036b4SDavid Marchand 	m->l2_len = hdr_lens.l2_len;
1204ca7036b4SDavid Marchand 	m->l3_len = hdr_lens.l3_len;
1205ca7036b4SDavid Marchand 	m->l4_len = hdr_lens.l4_len;
12069fd72e3cSJijiang Liu 
1207ca7036b4SDavid Marchand 	l3_hdr = rte_pktmbuf_mtod_offset(m, void *, m->l2_len);
1208ca7036b4SDavid Marchand 	tcp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
1209ca7036b4SDavid Marchand 		m->l2_len + m->l3_len);
1210ca7036b4SDavid Marchand 
1211daa02b5cSOlivier Matz 	m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG;
1212ca7036b4SDavid Marchand 	if ((ptype & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) {
1213daa02b5cSOlivier Matz 		m->ol_flags |= RTE_MBUF_F_TX_IPV4;
1214daa02b5cSOlivier Matz 		m->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
1215df40169aSYuanhan Liu 		ipv4_hdr = l3_hdr;
12169fd72e3cSJijiang Liu 		ipv4_hdr->hdr_checksum = 0;
1217ca7036b4SDavid Marchand 		tcp_hdr->cksum = rte_ipv4_phdr_cksum(l3_hdr, m->ol_flags);
1218ca7036b4SDavid Marchand 	} else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
1219daa02b5cSOlivier Matz 		m->ol_flags |= RTE_MBUF_F_TX_IPV6;
1220ca7036b4SDavid Marchand 		tcp_hdr->cksum = rte_ipv6_phdr_cksum(l3_hdr, m->ol_flags);
1221df40169aSYuanhan Liu 	}
12229fd72e3cSJijiang Liu }
12239fd72e3cSJijiang Liu 
1224c0583d98SJerin Jacob static __rte_always_inline void
1225273ecdbcSYuanhan Liu do_drain_mbuf_table(struct mbuf_table *tx_q)
1226273ecdbcSYuanhan Liu {
1227273ecdbcSYuanhan Liu 	uint16_t count;
1228273ecdbcSYuanhan Liu 
1229273ecdbcSYuanhan Liu 	count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
1230273ecdbcSYuanhan Liu 				 tx_q->m_table, tx_q->len);
1231273ecdbcSYuanhan Liu 	if (unlikely(count < tx_q->len))
1232273ecdbcSYuanhan Liu 		free_pkts(&tx_q->m_table[count], tx_q->len - count);
1233273ecdbcSYuanhan Liu 
1234273ecdbcSYuanhan Liu 	tx_q->len = 0;
1235273ecdbcSYuanhan Liu }
1236273ecdbcSYuanhan Liu 
123772ec8d77SOuyang Changchun /*
1238273ecdbcSYuanhan Liu  * This function routes the TX packet to the correct interface. This
1239273ecdbcSYuanhan Liu  * may be a local device or the physical port.
124072ec8d77SOuyang Changchun  */
1241c0583d98SJerin Jacob static __rte_always_inline void
124272ec8d77SOuyang Changchun virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
124372ec8d77SOuyang Changchun {
124472ec8d77SOuyang Changchun 	struct mbuf_table *tx_q;
1245273ecdbcSYuanhan Liu 	unsigned offset = 0;
124672ec8d77SOuyang Changchun 	const uint16_t lcore_id = rte_lcore_id();
12476d13ea8eSOlivier Matz 	struct rte_ether_hdr *nh;
124872ec8d77SOuyang Changchun 
12499c5ef512SYuanhan Liu 
12506d13ea8eSOlivier Matz 	nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
125104d43857SDmitry Kozlyuk 	if (unlikely(rte_is_broadcast_ether_addr(&nh->dst_addr))) {
12529c5ef512SYuanhan Liu 		struct vhost_dev *vdev2;
12539c5ef512SYuanhan Liu 
125497daf19eSYuanhan Liu 		TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
1255a3fdb532SJunjie Chen 			if (vdev2 != vdev)
1256a68ba8e0SCheng Jiang 				sync_virtio_xmit(vdev2, vdev, m);
12579c5ef512SYuanhan Liu 		}
12589c5ef512SYuanhan Liu 		goto queue2nic;
12599c5ef512SYuanhan Liu 	}
12609c5ef512SYuanhan Liu 
126172ec8d77SOuyang Changchun 	/*check if destination is local VM*/
1262a68ba8e0SCheng Jiang 	if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0))
126372ec8d77SOuyang Changchun 		return;
126472ec8d77SOuyang Changchun 
1265c2ab5162SOuyang Changchun 	if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
12667f262239SYuanhan Liu 		if (unlikely(find_local_dest(vdev, m, &offset,
12677f262239SYuanhan Liu 					     &vlan_tag) != 0)) {
126872ec8d77SOuyang Changchun 			rte_pktmbuf_free(m);
126972ec8d77SOuyang Changchun 			return;
127072ec8d77SOuyang Changchun 		}
1271d19533e8SHuawei Xie 	}
1272d19533e8SHuawei Xie 
12735d8f0bafSOlivier Matz 	RTE_LOG_DP(DEBUG, VHOST_DATA,
1274e2a1dd12SYuanhan Liu 		"(%d) TX: MAC address is external\n", vdev->vid);
1275d19533e8SHuawei Xie 
12769c5ef512SYuanhan Liu queue2nic:
12779c5ef512SYuanhan Liu 
1278d19533e8SHuawei Xie 	/*Add packet to the port tx queue*/
1279d19533e8SHuawei Xie 	tx_q = &lcore_tx_queue[lcore_id];
1280d19533e8SHuawei Xie 
12816d13ea8eSOlivier Matz 	nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
128235b2d13fSOlivier Matz 	if (unlikely(nh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))) {
12838b9bb988SOuyang Changchun 		/* Guest has inserted the vlan tag. */
12846d13ea8eSOlivier Matz 		struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (nh + 1);
12858b9bb988SOuyang Changchun 		uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
12868b9bb988SOuyang Changchun 		if ((vm2vm_mode == VM2VM_HARDWARE) &&
12878b9bb988SOuyang Changchun 			(vh->vlan_tci != vlan_tag_be))
12888b9bb988SOuyang Changchun 			vh->vlan_tci = vlan_tag_be;
12898b9bb988SOuyang Changchun 	} else {
1290daa02b5cSOlivier Matz 		m->ol_flags |= RTE_MBUF_F_TX_VLAN;
1291e44fb8a4SOuyang Changchun 
1292c2ab5162SOuyang Changchun 		/*
1293c2ab5162SOuyang Changchun 		 * Find the right seg to adjust the data len when offset is
1294c2ab5162SOuyang Changchun 		 * bigger than tail room size.
1295c2ab5162SOuyang Changchun 		 */
1296c2ab5162SOuyang Changchun 		if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
1297c2ab5162SOuyang Changchun 			if (likely(offset <= rte_pktmbuf_tailroom(m)))
12984d50b6acSHuawei Xie 				m->data_len += offset;
1299c2ab5162SOuyang Changchun 			else {
1300c2ab5162SOuyang Changchun 				struct rte_mbuf *seg = m;
1301c2ab5162SOuyang Changchun 
1302c2ab5162SOuyang Changchun 				while ((seg->next != NULL) &&
1303c2ab5162SOuyang Changchun 					(offset > rte_pktmbuf_tailroom(seg)))
1304c2ab5162SOuyang Changchun 					seg = seg->next;
1305c2ab5162SOuyang Changchun 
1306c2ab5162SOuyang Changchun 				seg->data_len += offset;
1307c2ab5162SOuyang Changchun 			}
1308e44fb8a4SOuyang Changchun 			m->pkt_len += offset;
1309c2ab5162SOuyang Changchun 		}
1310e44fb8a4SOuyang Changchun 
13114d50b6acSHuawei Xie 		m->vlan_tci = vlan_tag;
13128b9bb988SOuyang Changchun 	}
1313d19533e8SHuawei Xie 
1314daa02b5cSOlivier Matz 	if (m->ol_flags & RTE_MBUF_F_RX_LRO)
13159fd72e3cSJijiang Liu 		virtio_tx_offload(m);
13169fd72e3cSJijiang Liu 
1317273ecdbcSYuanhan Liu 	tx_q->m_table[tx_q->len++] = m;
1318d19533e8SHuawei Xie 	if (enable_stats) {
131956fe86f8SYuanhan Liu 		vdev->stats.tx_total++;
132056fe86f8SYuanhan Liu 		vdev->stats.tx++;
1321d19533e8SHuawei Xie 	}
1322d19533e8SHuawei Xie 
1323273ecdbcSYuanhan Liu 	if (unlikely(tx_q->len == MAX_PKT_BURST))
1324273ecdbcSYuanhan Liu 		do_drain_mbuf_table(tx_q);
1325d19533e8SHuawei Xie }
1326d19533e8SHuawei Xie 
1327d19533e8SHuawei Xie 
1328c0583d98SJerin Jacob static __rte_always_inline void
1329273ecdbcSYuanhan Liu drain_mbuf_table(struct mbuf_table *tx_q)
1330273ecdbcSYuanhan Liu {
1331273ecdbcSYuanhan Liu 	static uint64_t prev_tsc;
1332273ecdbcSYuanhan Liu 	uint64_t cur_tsc;
1333273ecdbcSYuanhan Liu 
1334273ecdbcSYuanhan Liu 	if (tx_q->len == 0)
1335d19533e8SHuawei Xie 		return;
1336273ecdbcSYuanhan Liu 
1337273ecdbcSYuanhan Liu 	cur_tsc = rte_rdtsc();
1338273ecdbcSYuanhan Liu 	if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
1339273ecdbcSYuanhan Liu 		prev_tsc = cur_tsc;
1340273ecdbcSYuanhan Liu 
13415d8f0bafSOlivier Matz 		RTE_LOG_DP(DEBUG, VHOST_DATA,
1342273ecdbcSYuanhan Liu 			"TX queue drained after timeout with burst size %u\n",
1343273ecdbcSYuanhan Liu 			tx_q->len);
1344273ecdbcSYuanhan Liu 		do_drain_mbuf_table(tx_q);
1345d19533e8SHuawei Xie 	}
1346273ecdbcSYuanhan Liu }
1347273ecdbcSYuanhan Liu 
1348a543dcb7SXuan Ding uint16_t
1349a543dcb7SXuan Ding async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
1350a543dcb7SXuan Ding 		struct rte_mbuf **pkts, uint32_t rx_count)
1351a543dcb7SXuan Ding {
1352a543dcb7SXuan Ding 	uint16_t enqueue_count;
1353a543dcb7SXuan Ding 	uint16_t enqueue_fail = 0;
1354a543dcb7SXuan Ding 	uint16_t dma_id = dma_bind[vid2socketid[dev->vid]].dmas[VIRTIO_RXQ].dev_id;
1355a543dcb7SXuan Ding 
1356a543dcb7SXuan Ding 	complete_async_pkts(dev);
1357a543dcb7SXuan Ding 	enqueue_count = rte_vhost_submit_enqueue_burst(dev->vid, queue_id,
1358a543dcb7SXuan Ding 					pkts, rx_count, dma_id, 0);
1359a543dcb7SXuan Ding 
1360a543dcb7SXuan Ding 	enqueue_fail = rx_count - enqueue_count;
1361a543dcb7SXuan Ding 	if (enqueue_fail)
1362a543dcb7SXuan Ding 		free_pkts(&pkts[enqueue_count], enqueue_fail);
1363a543dcb7SXuan Ding 
1364a543dcb7SXuan Ding 	return enqueue_count;
1365a543dcb7SXuan Ding }
1366a543dcb7SXuan Ding 
1367a543dcb7SXuan Ding uint16_t
1368a543dcb7SXuan Ding sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
1369a543dcb7SXuan Ding 		struct rte_mbuf **pkts, uint32_t rx_count)
1370a543dcb7SXuan Ding {
1371a543dcb7SXuan Ding 	return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, rx_count);
1372a543dcb7SXuan Ding }
1373a543dcb7SXuan Ding 
1374c0583d98SJerin Jacob static __rte_always_inline void
1375273ecdbcSYuanhan Liu drain_eth_rx(struct vhost_dev *vdev)
1376273ecdbcSYuanhan Liu {
1377273ecdbcSYuanhan Liu 	uint16_t rx_count, enqueue_count;
1378a68ba8e0SCheng Jiang 	struct rte_mbuf *pkts[MAX_PKT_BURST];
1379273ecdbcSYuanhan Liu 
1380273ecdbcSYuanhan Liu 	rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
1381273ecdbcSYuanhan Liu 				    pkts, MAX_PKT_BURST);
1382abec60e7SCheng Jiang 
1383273ecdbcSYuanhan Liu 	if (!rx_count)
1384273ecdbcSYuanhan Liu 		return;
1385273ecdbcSYuanhan Liu 
1386d19533e8SHuawei Xie 	/*
1387273ecdbcSYuanhan Liu 	 * When "enable_retry" is set, here we wait and retry when there
1388273ecdbcSYuanhan Liu 	 * is no enough free slots in the queue to hold @rx_count packets,
1389273ecdbcSYuanhan Liu 	 * to diminish packet loss.
1390273ecdbcSYuanhan Liu 	 */
1391273ecdbcSYuanhan Liu 	if (enable_retry &&
13924ecf22e3SYuanhan Liu 	    unlikely(rx_count > rte_vhost_avail_entries(vdev->vid,
1393273ecdbcSYuanhan Liu 			VIRTIO_RXQ))) {
1394273ecdbcSYuanhan Liu 		uint32_t retry;
1395273ecdbcSYuanhan Liu 
1396273ecdbcSYuanhan Liu 		for (retry = 0; retry < burst_rx_retry_num; retry++) {
1397273ecdbcSYuanhan Liu 			rte_delay_us(burst_rx_delay_time);
13984ecf22e3SYuanhan Liu 			if (rx_count <= rte_vhost_avail_entries(vdev->vid,
1399273ecdbcSYuanhan Liu 					VIRTIO_RXQ))
1400273ecdbcSYuanhan Liu 				break;
1401273ecdbcSYuanhan Liu 		}
1402273ecdbcSYuanhan Liu 	}
1403273ecdbcSYuanhan Liu 
1404a543dcb7SXuan Ding 	enqueue_count = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
1405a543dcb7SXuan Ding 					VIRTIO_RXQ, pkts, rx_count);
1406abec60e7SCheng Jiang 
1407273ecdbcSYuanhan Liu 	if (enable_stats) {
1408a68ba8e0SCheng Jiang 		__atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
1409a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
1410a68ba8e0SCheng Jiang 		__atomic_add_fetch(&vdev->stats.rx_atomic, enqueue_count,
1411a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
1412273ecdbcSYuanhan Liu 	}
1413273ecdbcSYuanhan Liu 
1414a543dcb7SXuan Ding 	if (!dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].async_enabled)
1415273ecdbcSYuanhan Liu 		free_pkts(pkts, rx_count);
1416273ecdbcSYuanhan Liu }
1417273ecdbcSYuanhan Liu 
1418a543dcb7SXuan Ding uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
1419a543dcb7SXuan Ding 			    struct rte_mempool *mbuf_pool,
1420a543dcb7SXuan Ding 			    struct rte_mbuf **pkts, uint16_t count)
1421a543dcb7SXuan Ding {
1422a543dcb7SXuan Ding 	int nr_inflight;
1423a543dcb7SXuan Ding 	uint16_t dequeue_count;
1424a543dcb7SXuan Ding 	int16_t dma_id = dma_bind[vid2socketid[dev->vid]].dmas[VIRTIO_TXQ].dev_id;
1425a543dcb7SXuan Ding 
1426a543dcb7SXuan Ding 	dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
1427a543dcb7SXuan Ding 			mbuf_pool, pkts, count, &nr_inflight, dma_id, 0);
1428a543dcb7SXuan Ding 
1429a543dcb7SXuan Ding 	return dequeue_count;
1430a543dcb7SXuan Ding }
1431a543dcb7SXuan Ding 
1432a543dcb7SXuan Ding uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
1433a543dcb7SXuan Ding 			   struct rte_mempool *mbuf_pool,
1434a543dcb7SXuan Ding 			   struct rte_mbuf **pkts, uint16_t count)
1435a543dcb7SXuan Ding {
1436a543dcb7SXuan Ding 	return rte_vhost_dequeue_burst(dev->vid, queue_id, mbuf_pool, pkts, count);
1437a543dcb7SXuan Ding }
1438a543dcb7SXuan Ding 
1439c0583d98SJerin Jacob static __rte_always_inline void
1440273ecdbcSYuanhan Liu drain_virtio_tx(struct vhost_dev *vdev)
1441273ecdbcSYuanhan Liu {
1442273ecdbcSYuanhan Liu 	struct rte_mbuf *pkts[MAX_PKT_BURST];
1443273ecdbcSYuanhan Liu 	uint16_t count;
1444273ecdbcSYuanhan Liu 	uint16_t i;
1445273ecdbcSYuanhan Liu 
1446a543dcb7SXuan Ding 	count = vdev_queue_ops[vdev->vid].dequeue_pkt_burst(vdev,
1447a543dcb7SXuan Ding 				VIRTIO_TXQ, mbuf_pool, pkts, MAX_PKT_BURST);
1448273ecdbcSYuanhan Liu 
1449273ecdbcSYuanhan Liu 	/* setup VMDq for the first packet */
1450273ecdbcSYuanhan Liu 	if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
1451273ecdbcSYuanhan Liu 		if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
1452273ecdbcSYuanhan Liu 			free_pkts(pkts, count);
1453273ecdbcSYuanhan Liu 	}
1454273ecdbcSYuanhan Liu 
14557f262239SYuanhan Liu 	for (i = 0; i < count; ++i)
1456e2a1dd12SYuanhan Liu 		virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
1457273ecdbcSYuanhan Liu }
1458273ecdbcSYuanhan Liu 
1459273ecdbcSYuanhan Liu /*
1460273ecdbcSYuanhan Liu  * Main function of vhost-switch. It basically does:
1461273ecdbcSYuanhan Liu  *
1462273ecdbcSYuanhan Liu  * for each vhost device {
1463273ecdbcSYuanhan Liu  *    - drain_eth_rx()
1464273ecdbcSYuanhan Liu  *
1465273ecdbcSYuanhan Liu  *      Which drains the host eth Rx queue linked to the vhost device,
1466273ecdbcSYuanhan Liu  *      and deliver all of them to guest virito Rx ring associated with
1467273ecdbcSYuanhan Liu  *      this vhost device.
1468273ecdbcSYuanhan Liu  *
1469273ecdbcSYuanhan Liu  *    - drain_virtio_tx()
1470273ecdbcSYuanhan Liu  *
1471273ecdbcSYuanhan Liu  *      Which drains the guest virtio Tx queue and deliver all of them
1472273ecdbcSYuanhan Liu  *      to the target, which could be another vhost device, or the
1473273ecdbcSYuanhan Liu  *      physical eth dev. The route is done in function "virtio_tx_route".
1474273ecdbcSYuanhan Liu  * }
1475d19533e8SHuawei Xie  */
1476d19533e8SHuawei Xie static int
1477273ecdbcSYuanhan Liu switch_worker(void *arg __rte_unused)
1478d19533e8SHuawei Xie {
1479273ecdbcSYuanhan Liu 	unsigned i;
1480273ecdbcSYuanhan Liu 	unsigned lcore_id = rte_lcore_id();
1481273ecdbcSYuanhan Liu 	struct vhost_dev *vdev;
1482d19533e8SHuawei Xie 	struct mbuf_table *tx_q;
1483d19533e8SHuawei Xie 
14847be78d02SJosh Soref 	RTE_LOG(INFO, VHOST_DATA, "Processing on Core %u started\n", lcore_id);
1485d19533e8SHuawei Xie 
1486d19533e8SHuawei Xie 	tx_q = &lcore_tx_queue[lcore_id];
1487273ecdbcSYuanhan Liu 	for (i = 0; i < rte_lcore_count(); i++) {
1488d19533e8SHuawei Xie 		if (lcore_ids[i] == lcore_id) {
1489d19533e8SHuawei Xie 			tx_q->txq_id = i;
1490d19533e8SHuawei Xie 			break;
1491d19533e8SHuawei Xie 		}
1492d19533e8SHuawei Xie 	}
1493d19533e8SHuawei Xie 
1494d19533e8SHuawei Xie 	while(1) {
1495273ecdbcSYuanhan Liu 		drain_mbuf_table(tx_q);
1496a68ba8e0SCheng Jiang 		drain_vhost_table();
1497d19533e8SHuawei Xie 		/*
149845657a5cSYuanhan Liu 		 * Inform the configuration core that we have exited the
149945657a5cSYuanhan Liu 		 * linked list and that no devices are in use if requested.
1500d19533e8SHuawei Xie 		 */
150145657a5cSYuanhan Liu 		if (lcore_info[lcore_id].dev_removal_flag == REQUEST_DEV_REMOVAL)
150245657a5cSYuanhan Liu 			lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
1503d19533e8SHuawei Xie 
1504d19533e8SHuawei Xie 		/*
1505273ecdbcSYuanhan Liu 		 * Process vhost devices
1506d19533e8SHuawei Xie 		 */
150797daf19eSYuanhan Liu 		TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list,
150897daf19eSYuanhan Liu 			      lcore_vdev_entry) {
1509364dddcdSHuawei Xie 			if (unlikely(vdev->remove)) {
1510e571e6b4SHuawei Xie 				unlink_vmdq(vdev);
1511e571e6b4SHuawei Xie 				vdev->ready = DEVICE_SAFE_REMOVE;
1512d19533e8SHuawei Xie 				continue;
1513d19533e8SHuawei Xie 			}
151445657a5cSYuanhan Liu 
1515273ecdbcSYuanhan Liu 			if (likely(vdev->ready == DEVICE_RX))
1516273ecdbcSYuanhan Liu 				drain_eth_rx(vdev);
1517d19533e8SHuawei Xie 
1518273ecdbcSYuanhan Liu 			if (likely(!vdev->remove))
1519273ecdbcSYuanhan Liu 				drain_virtio_tx(vdev);
1520d19533e8SHuawei Xie 		}
1521d19533e8SHuawei Xie 	}
1522d19533e8SHuawei Xie 
1523d19533e8SHuawei Xie 	return 0;
1524d19533e8SHuawei Xie }
1525d19533e8SHuawei Xie 
1526a543dcb7SXuan Ding static void
1527a543dcb7SXuan Ding vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t queue_id)
1528a543dcb7SXuan Ding {
1529a543dcb7SXuan Ding 	uint16_t n_pkt = 0;
1530a543dcb7SXuan Ding 	int pkts_inflight;
1531a543dcb7SXuan Ding 
1532a543dcb7SXuan Ding 	int16_t dma_id = dma_bind[vid2socketid[vdev->vid]].dmas[queue_id].dev_id;
1533a543dcb7SXuan Ding 	pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid, queue_id);
1534a543dcb7SXuan Ding 
1535a543dcb7SXuan Ding 	struct rte_mbuf *m_cpl[pkts_inflight];
1536a543dcb7SXuan Ding 
1537a543dcb7SXuan Ding 	while (pkts_inflight) {
1538a543dcb7SXuan Ding 		n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid, queue_id, m_cpl,
1539a543dcb7SXuan Ding 							pkts_inflight, dma_id, 0);
1540a543dcb7SXuan Ding 		free_pkts(m_cpl, n_pkt);
1541a543dcb7SXuan Ding 		pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
1542a543dcb7SXuan Ding 									queue_id);
1543a543dcb7SXuan Ding 	}
1544a543dcb7SXuan Ding }
1545a543dcb7SXuan Ding 
1546*4cc4f3eeSYuan Wang static void
1547*4cc4f3eeSYuan Wang vhost_clear_queue(struct vhost_dev *vdev, uint16_t queue_id)
1548*4cc4f3eeSYuan Wang {
1549*4cc4f3eeSYuan Wang 	uint16_t n_pkt = 0;
1550*4cc4f3eeSYuan Wang 	int pkts_inflight;
1551*4cc4f3eeSYuan Wang 
1552*4cc4f3eeSYuan Wang 	int16_t dma_id = dma_bind[vid2socketid[vdev->vid]].dmas[queue_id].dev_id;
1553*4cc4f3eeSYuan Wang 	pkts_inflight = rte_vhost_async_get_inflight(vdev->vid, queue_id);
1554*4cc4f3eeSYuan Wang 
1555*4cc4f3eeSYuan Wang 	struct rte_mbuf *m_cpl[pkts_inflight];
1556*4cc4f3eeSYuan Wang 
1557*4cc4f3eeSYuan Wang 	while (pkts_inflight) {
1558*4cc4f3eeSYuan Wang 		n_pkt = rte_vhost_clear_queue(vdev->vid, queue_id, m_cpl,
1559*4cc4f3eeSYuan Wang 						pkts_inflight, dma_id, 0);
1560*4cc4f3eeSYuan Wang 		free_pkts(m_cpl, n_pkt);
1561*4cc4f3eeSYuan Wang 		pkts_inflight = rte_vhost_async_get_inflight(vdev->vid, queue_id);
1562*4cc4f3eeSYuan Wang 	}
1563*4cc4f3eeSYuan Wang }
1564*4cc4f3eeSYuan Wang 
1565d19533e8SHuawei Xie /*
156645657a5cSYuanhan Liu  * Remove a device from the specific data core linked list and from the
15677be78d02SJosh Soref  * main linked list. Synchronization  occurs through the use of the
156845657a5cSYuanhan Liu  * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
1569d19533e8SHuawei Xie  * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
1570d19533e8SHuawei Xie  */
1571d19533e8SHuawei Xie static void
15724ecf22e3SYuanhan Liu destroy_device(int vid)
1573d19533e8SHuawei Xie {
157416ae8abeSYuanhan Liu 	struct vhost_dev *vdev = NULL;
1575d19533e8SHuawei Xie 	int lcore;
1576a68ba8e0SCheng Jiang 	uint16_t i;
1577d19533e8SHuawei Xie 
157816ae8abeSYuanhan Liu 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
15794ecf22e3SYuanhan Liu 		if (vdev->vid == vid)
158016ae8abeSYuanhan Liu 			break;
158116ae8abeSYuanhan Liu 	}
158216ae8abeSYuanhan Liu 	if (!vdev)
158316ae8abeSYuanhan Liu 		return;
1584d19533e8SHuawei Xie 	/*set the remove flag. */
1585e571e6b4SHuawei Xie 	vdev->remove = 1;
1586e571e6b4SHuawei Xie 	while(vdev->ready != DEVICE_SAFE_REMOVE) {
1587d19533e8SHuawei Xie 		rte_pause();
1588d19533e8SHuawei Xie 	}
1589d19533e8SHuawei Xie 
1590a68ba8e0SCheng Jiang 	for (i = 0; i < RTE_MAX_LCORE; i++)
159153d3f477SJiayu Hu 		rte_free(vhost_txbuff[i * RTE_MAX_VHOST_DEVICE + vid]);
1592a68ba8e0SCheng Jiang 
1593ca059fa5SYuanhan Liu 	if (builtin_net_driver)
1594ca059fa5SYuanhan Liu 		vs_vhost_net_remove(vdev);
1595ca059fa5SYuanhan Liu 
159697daf19eSYuanhan Liu 	TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev,
159797daf19eSYuanhan Liu 		     lcore_vdev_entry);
159897daf19eSYuanhan Liu 	TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry);
159997daf19eSYuanhan Liu 
1600d19533e8SHuawei Xie 
1601d19533e8SHuawei Xie 	/* Set the dev_removal_flag on each lcore. */
1602cb056611SStephen Hemminger 	RTE_LCORE_FOREACH_WORKER(lcore)
160345657a5cSYuanhan Liu 		lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
1604d19533e8SHuawei Xie 
1605d19533e8SHuawei Xie 	/*
160645657a5cSYuanhan Liu 	 * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL
160745657a5cSYuanhan Liu 	 * we can be sure that they can no longer access the device removed
160845657a5cSYuanhan Liu 	 * from the linked lists and that the devices are no longer in use.
1609d19533e8SHuawei Xie 	 */
1610cb056611SStephen Hemminger 	RTE_LCORE_FOREACH_WORKER(lcore) {
161145657a5cSYuanhan Liu 		while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
1612d19533e8SHuawei Xie 			rte_pause();
1613d19533e8SHuawei Xie 	}
1614d19533e8SHuawei Xie 
161545657a5cSYuanhan Liu 	lcore_info[vdev->coreid].device_num--;
1616d19533e8SHuawei Xie 
161745657a5cSYuanhan Liu 	RTE_LOG(INFO, VHOST_DATA,
1618c08a3490SYuanhan Liu 		"(%d) device has been removed from data core\n",
1619e2a1dd12SYuanhan Liu 		vdev->vid);
1620d19533e8SHuawei Xie 
162153d3f477SJiayu Hu 	if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
1622*4cc4f3eeSYuan Wang 		vhost_clear_queue(vdev, VIRTIO_RXQ);
1623abec60e7SCheng Jiang 		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
162453d3f477SJiayu Hu 		dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
1625b9f23beeSCheng Jiang 	}
1626abec60e7SCheng Jiang 
1627a543dcb7SXuan Ding 	if (dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled) {
1628*4cc4f3eeSYuan Wang 		vhost_clear_queue(vdev, VIRTIO_TXQ);
1629a543dcb7SXuan Ding 		rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
1630a543dcb7SXuan Ding 		dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled = false;
1631a543dcb7SXuan Ding 	}
1632a543dcb7SXuan Ding 
1633e571e6b4SHuawei Xie 	rte_free(vdev);
1634d19533e8SHuawei Xie }
1635d19533e8SHuawei Xie 
1636a543dcb7SXuan Ding static inline int
1637a543dcb7SXuan Ding get_socketid_by_vid(int vid)
1638a543dcb7SXuan Ding {
1639a543dcb7SXuan Ding 	int i;
1640a543dcb7SXuan Ding 	char ifname[PATH_MAX];
1641a543dcb7SXuan Ding 	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
1642a543dcb7SXuan Ding 
1643a543dcb7SXuan Ding 	for (i = 0; i < nb_sockets; i++) {
1644a543dcb7SXuan Ding 		char *file = socket_files + i * PATH_MAX;
1645a543dcb7SXuan Ding 		if (strcmp(file, ifname) == 0)
1646a543dcb7SXuan Ding 			return i;
1647a543dcb7SXuan Ding 	}
1648a543dcb7SXuan Ding 
1649a543dcb7SXuan Ding 	return -1;
1650a543dcb7SXuan Ding }
1651a543dcb7SXuan Ding 
1652a543dcb7SXuan Ding static int
1653a543dcb7SXuan Ding init_vhost_queue_ops(int vid)
1654a543dcb7SXuan Ding {
1655a543dcb7SXuan Ding 	if (builtin_net_driver) {
1656a543dcb7SXuan Ding 		vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
1657a543dcb7SXuan Ding 		vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
1658a543dcb7SXuan Ding 	} else {
1659a543dcb7SXuan Ding 		if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled)
1660a543dcb7SXuan Ding 			vdev_queue_ops[vid].enqueue_pkt_burst = async_enqueue_pkts;
1661a543dcb7SXuan Ding 		else
1662a543dcb7SXuan Ding 			vdev_queue_ops[vid].enqueue_pkt_burst = sync_enqueue_pkts;
1663a543dcb7SXuan Ding 
1664a543dcb7SXuan Ding 		if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled)
1665a543dcb7SXuan Ding 			vdev_queue_ops[vid].dequeue_pkt_burst = async_dequeue_pkts;
1666a543dcb7SXuan Ding 		else
1667a543dcb7SXuan Ding 			vdev_queue_ops[vid].dequeue_pkt_burst = sync_dequeue_pkts;
1668a543dcb7SXuan Ding 	}
1669a543dcb7SXuan Ding 
1670a543dcb7SXuan Ding 	return 0;
1671a543dcb7SXuan Ding }
1672a543dcb7SXuan Ding 
1673a543dcb7SXuan Ding static inline int
1674a543dcb7SXuan Ding vhost_async_channel_register(int vid)
1675a543dcb7SXuan Ding {
1676a543dcb7SXuan Ding 	int rx_ret = 0, tx_ret = 0;
1677a543dcb7SXuan Ding 
1678a543dcb7SXuan Ding 	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
1679a543dcb7SXuan Ding 		rx_ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
1680a543dcb7SXuan Ding 		if (rx_ret == 0)
1681a543dcb7SXuan Ding 			dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled = true;
1682a543dcb7SXuan Ding 	}
1683a543dcb7SXuan Ding 
1684a543dcb7SXuan Ding 	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].dev_id != INVALID_DMA_ID) {
1685a543dcb7SXuan Ding 		tx_ret = rte_vhost_async_channel_register(vid, VIRTIO_TXQ);
1686a543dcb7SXuan Ding 		if (tx_ret == 0)
1687a543dcb7SXuan Ding 			dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled = true;
1688a543dcb7SXuan Ding 	}
1689a543dcb7SXuan Ding 
1690a543dcb7SXuan Ding 	return rx_ret | tx_ret;
1691a543dcb7SXuan Ding }
1692a543dcb7SXuan Ding 
1693a543dcb7SXuan Ding 
1694a543dcb7SXuan Ding 
1695d19533e8SHuawei Xie /*
1696d19533e8SHuawei Xie  * A new device is added to a data core. First the device is added to the main linked list
169710b4270fSRami Rosen  * and then allocated to a specific data core.
1698d19533e8SHuawei Xie  */
1699d19533e8SHuawei Xie static int
17004ecf22e3SYuanhan Liu new_device(int vid)
1701d19533e8SHuawei Xie {
1702d19533e8SHuawei Xie 	int lcore, core_add = 0;
1703a68ba8e0SCheng Jiang 	uint16_t i;
1704d19533e8SHuawei Xie 	uint32_t device_num_min = num_devices;
1705e571e6b4SHuawei Xie 	struct vhost_dev *vdev;
1706a543dcb7SXuan Ding 	int ret;
1707a543dcb7SXuan Ding 
1708fdf20fa7SSergio Gonzalez Monroy 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
1709e571e6b4SHuawei Xie 	if (vdev == NULL) {
1710c08a3490SYuanhan Liu 		RTE_LOG(INFO, VHOST_DATA,
17117f262239SYuanhan Liu 			"(%d) couldn't allocate memory for vhost dev\n",
1712e2a1dd12SYuanhan Liu 			vid);
1713e571e6b4SHuawei Xie 		return -1;
1714e571e6b4SHuawei Xie 	}
1715e2a1dd12SYuanhan Liu 	vdev->vid = vid;
1716d19533e8SHuawei Xie 
1717a68ba8e0SCheng Jiang 	for (i = 0; i < RTE_MAX_LCORE; i++) {
171853d3f477SJiayu Hu 		vhost_txbuff[i * RTE_MAX_VHOST_DEVICE + vid]
1719a68ba8e0SCheng Jiang 			= rte_zmalloc("vhost bufftable",
1720a68ba8e0SCheng Jiang 				sizeof(struct vhost_bufftable),
1721a68ba8e0SCheng Jiang 				RTE_CACHE_LINE_SIZE);
1722a68ba8e0SCheng Jiang 
172353d3f477SJiayu Hu 		if (vhost_txbuff[i * RTE_MAX_VHOST_DEVICE + vid] == NULL) {
1724a68ba8e0SCheng Jiang 			RTE_LOG(INFO, VHOST_DATA,
1725a68ba8e0SCheng Jiang 			  "(%d) couldn't allocate memory for vhost TX\n", vid);
1726a68ba8e0SCheng Jiang 			return -1;
1727a68ba8e0SCheng Jiang 		}
1728a68ba8e0SCheng Jiang 	}
1729a68ba8e0SCheng Jiang 
1730a543dcb7SXuan Ding 	int socketid = get_socketid_by_vid(vid);
1731a543dcb7SXuan Ding 	if (socketid == -1)
1732a543dcb7SXuan Ding 		return -1;
1733a543dcb7SXuan Ding 
1734a543dcb7SXuan Ding 	init_vid2socketid_array(vid, socketid);
1735a543dcb7SXuan Ding 
1736a543dcb7SXuan Ding 	ret =  vhost_async_channel_register(vid);
1737a543dcb7SXuan Ding 
1738a543dcb7SXuan Ding 	if (init_vhost_queue_ops(vid) != 0)
1739a543dcb7SXuan Ding 		return -1;
1740a543dcb7SXuan Ding 
1741ca059fa5SYuanhan Liu 	if (builtin_net_driver)
1742ca059fa5SYuanhan Liu 		vs_vhost_net_setup(vdev);
1743ca059fa5SYuanhan Liu 
174497daf19eSYuanhan Liu 	TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
1745e2a1dd12SYuanhan Liu 	vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base;
1746d19533e8SHuawei Xie 
1747d19533e8SHuawei Xie 	/*reset ready flag*/
1748e571e6b4SHuawei Xie 	vdev->ready = DEVICE_MAC_LEARNING;
1749e571e6b4SHuawei Xie 	vdev->remove = 0;
1750d19533e8SHuawei Xie 
1751d19533e8SHuawei Xie 	/* Find a suitable lcore to add the device. */
1752cb056611SStephen Hemminger 	RTE_LCORE_FOREACH_WORKER(lcore) {
175345657a5cSYuanhan Liu 		if (lcore_info[lcore].device_num < device_num_min) {
175445657a5cSYuanhan Liu 			device_num_min = lcore_info[lcore].device_num;
1755d19533e8SHuawei Xie 			core_add = lcore;
1756d19533e8SHuawei Xie 		}
1757d19533e8SHuawei Xie 	}
1758e571e6b4SHuawei Xie 	vdev->coreid = core_add;
1759e571e6b4SHuawei Xie 
176097daf19eSYuanhan Liu 	TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev,
176197daf19eSYuanhan Liu 			  lcore_vdev_entry);
176245657a5cSYuanhan Liu 	lcore_info[vdev->coreid].device_num++;
1763d19533e8SHuawei Xie 
1764d19533e8SHuawei Xie 	/* Disable notifications. */
17654ecf22e3SYuanhan Liu 	rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
17664ecf22e3SYuanhan Liu 	rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
1767d19533e8SHuawei Xie 
1768c08a3490SYuanhan Liu 	RTE_LOG(INFO, VHOST_DATA,
1769c08a3490SYuanhan Liu 		"(%d) device has been added to data core %d\n",
1770e2a1dd12SYuanhan Liu 		vid, vdev->coreid);
1771d19533e8SHuawei Xie 
177253d3f477SJiayu Hu 	return ret;
17736e9a9d2aSCheng Jiang }
1774abec60e7SCheng Jiang 
1775b9f23beeSCheng Jiang static int
1776b9f23beeSCheng Jiang vring_state_changed(int vid, uint16_t queue_id, int enable)
1777b9f23beeSCheng Jiang {
1778b9f23beeSCheng Jiang 	struct vhost_dev *vdev = NULL;
1779b9f23beeSCheng Jiang 
1780b9f23beeSCheng Jiang 	TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1781b9f23beeSCheng Jiang 		if (vdev->vid == vid)
1782b9f23beeSCheng Jiang 			break;
1783b9f23beeSCheng Jiang 	}
1784b9f23beeSCheng Jiang 	if (!vdev)
1785b9f23beeSCheng Jiang 		return -1;
1786b9f23beeSCheng Jiang 
1787a543dcb7SXuan Ding 	if (dma_bind[vid2socketid[vid]].dmas[queue_id].async_enabled) {
1788a543dcb7SXuan Ding 		if (!enable)
1789a543dcb7SXuan Ding 			vhost_clear_queue_thread_unsafe(vdev, queue_id);
1790b9f23beeSCheng Jiang 	}
1791b9f23beeSCheng Jiang 
1792b9f23beeSCheng Jiang 	return 0;
1793b9f23beeSCheng Jiang }
1794b9f23beeSCheng Jiang 
1795d19533e8SHuawei Xie /*
1796d19533e8SHuawei Xie  * These callback allow devices to be added to the data core when configuration
1797d19533e8SHuawei Xie  * has been fully complete.
1798d19533e8SHuawei Xie  */
1799ab4bb424SMaxime Coquelin static const struct rte_vhost_device_ops virtio_net_device_ops =
1800d19533e8SHuawei Xie {
1801d19533e8SHuawei Xie 	.new_device =  new_device,
1802d19533e8SHuawei Xie 	.destroy_device = destroy_device,
1803b9f23beeSCheng Jiang 	.vring_state_changed = vring_state_changed,
1804d19533e8SHuawei Xie };
1805d19533e8SHuawei Xie 
1806d19533e8SHuawei Xie /*
1807d19533e8SHuawei Xie  * This is a thread will wake up after a period to print stats if the user has
1808d19533e8SHuawei Xie  * enabled them.
1809d19533e8SHuawei Xie  */
1810fa204854SOlivier Matz static void *
1811fa204854SOlivier Matz print_stats(__rte_unused void *arg)
1812d19533e8SHuawei Xie {
181345657a5cSYuanhan Liu 	struct vhost_dev *vdev;
1814d19533e8SHuawei Xie 	uint64_t tx_dropped, rx_dropped;
1815d19533e8SHuawei Xie 	uint64_t tx, tx_total, rx, rx_total;
1816d19533e8SHuawei Xie 	const char clr[] = { 27, '[', '2', 'J', '\0' };
1817d19533e8SHuawei Xie 	const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
1818d19533e8SHuawei Xie 
1819d19533e8SHuawei Xie 	while(1) {
1820d19533e8SHuawei Xie 		sleep(enable_stats);
1821d19533e8SHuawei Xie 
1822d19533e8SHuawei Xie 		/* Clear screen and move to top left */
182356fe86f8SYuanhan Liu 		printf("%s%s\n", clr, top_left);
182456fe86f8SYuanhan Liu 		printf("Device statistics =================================\n");
1825d19533e8SHuawei Xie 
182697daf19eSYuanhan Liu 		TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
182756fe86f8SYuanhan Liu 			tx_total   = vdev->stats.tx_total;
182856fe86f8SYuanhan Liu 			tx         = vdev->stats.tx;
1829d19533e8SHuawei Xie 			tx_dropped = tx_total - tx;
183056fe86f8SYuanhan Liu 
1831a68ba8e0SCheng Jiang 			rx_total = __atomic_load_n(&vdev->stats.rx_total_atomic,
1832a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
1833a68ba8e0SCheng Jiang 			rx         = __atomic_load_n(&vdev->stats.rx_atomic,
1834a68ba8e0SCheng Jiang 				__ATOMIC_SEQ_CST);
1835d19533e8SHuawei Xie 			rx_dropped = rx_total - rx;
1836d19533e8SHuawei Xie 
1837c08a3490SYuanhan Liu 			printf("Statistics for device %d\n"
183856fe86f8SYuanhan Liu 				"-----------------------\n"
183956fe86f8SYuanhan Liu 				"TX total:              %" PRIu64 "\n"
184056fe86f8SYuanhan Liu 				"TX dropped:            %" PRIu64 "\n"
184156fe86f8SYuanhan Liu 				"TX successful:         %" PRIu64 "\n"
184256fe86f8SYuanhan Liu 				"RX total:              %" PRIu64 "\n"
184356fe86f8SYuanhan Liu 				"RX dropped:            %" PRIu64 "\n"
184456fe86f8SYuanhan Liu 				"RX successful:         %" PRIu64 "\n",
18454ecf22e3SYuanhan Liu 				vdev->vid,
184656fe86f8SYuanhan Liu 				tx_total, tx_dropped, tx,
184756fe86f8SYuanhan Liu 				rx_total, rx_dropped, rx);
1848d19533e8SHuawei Xie 		}
184956fe86f8SYuanhan Liu 
185056fe86f8SYuanhan Liu 		printf("===================================================\n");
18513ee6f706SGeorgiy Levashov 
18523ee6f706SGeorgiy Levashov 		fflush(stdout);
1853d19533e8SHuawei Xie 	}
1854fa204854SOlivier Matz 
1855fa204854SOlivier Matz 	return NULL;
1856d19533e8SHuawei Xie }
1857d19533e8SHuawei Xie 
1858ad0eef4dSJiayu Hu static void
1859ad0eef4dSJiayu Hu unregister_drivers(int socket_num)
1860ad0eef4dSJiayu Hu {
1861ad0eef4dSJiayu Hu 	int i, ret;
1862ad0eef4dSJiayu Hu 
1863ad0eef4dSJiayu Hu 	for (i = 0; i < socket_num; i++) {
1864ad0eef4dSJiayu Hu 		ret = rte_vhost_driver_unregister(socket_files + i * PATH_MAX);
1865ad0eef4dSJiayu Hu 		if (ret != 0)
1866ad0eef4dSJiayu Hu 			RTE_LOG(ERR, VHOST_CONFIG,
1867ad0eef4dSJiayu Hu 				"Fail to unregister vhost driver for %s.\n",
1868ad0eef4dSJiayu Hu 				socket_files + i * PATH_MAX);
1869ad0eef4dSJiayu Hu 	}
1870ad0eef4dSJiayu Hu }
1871ad0eef4dSJiayu Hu 
1872c83d2d00SOuyang Changchun /* When we receive a INT signal, unregister vhost driver */
1873c83d2d00SOuyang Changchun static void
1874c83d2d00SOuyang Changchun sigint_handler(__rte_unused int signum)
1875c83d2d00SOuyang Changchun {
1876c83d2d00SOuyang Changchun 	/* Unregister vhost driver. */
1877ad0eef4dSJiayu Hu 	unregister_drivers(nb_sockets);
1878ad0eef4dSJiayu Hu 
1879c83d2d00SOuyang Changchun 	exit(0);
1880c83d2d00SOuyang Changchun }
1881d19533e8SHuawei Xie 
188253d3f477SJiayu Hu static void
188353d3f477SJiayu Hu reset_dma(void)
188453d3f477SJiayu Hu {
188553d3f477SJiayu Hu 	int i;
188653d3f477SJiayu Hu 
188753d3f477SJiayu Hu 	for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
188853d3f477SJiayu Hu 		int j;
188953d3f477SJiayu Hu 
189053d3f477SJiayu Hu 		for (j = 0; j < RTE_MAX_QUEUES_PER_PORT * 2; j++) {
189153d3f477SJiayu Hu 			dma_bind[i].dmas[j].dev_id = INVALID_DMA_ID;
189253d3f477SJiayu Hu 			dma_bind[i].dmas[j].async_enabled = false;
189353d3f477SJiayu Hu 		}
189453d3f477SJiayu Hu 	}
189553d3f477SJiayu Hu 
189653d3f477SJiayu Hu 	for (i = 0; i < RTE_DMADEV_DEFAULT_MAX; i++)
189753d3f477SJiayu Hu 		dmas_id[i] = INVALID_DMA_ID;
189853d3f477SJiayu Hu }
189953d3f477SJiayu Hu 
1900bdb19b77SYuanhan Liu /*
1901164a601bSYuanhan Liu  * Main function, does initialisation and calls the per-lcore functions.
1902d19533e8SHuawei Xie  */
1903d19533e8SHuawei Xie int
190498a16481SDavid Marchand main(int argc, char *argv[])
1905d19533e8SHuawei Xie {
1906d19533e8SHuawei Xie 	unsigned lcore_id, core_id = 0;
1907d19533e8SHuawei Xie 	unsigned nb_ports, valid_num_ports;
1908ad0eef4dSJiayu Hu 	int ret, i;
1909f8244c63SZhiyong Yang 	uint16_t portid;
1910d19533e8SHuawei Xie 	static pthread_t tid;
1911ca7036b4SDavid Marchand 	uint64_t flags = RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
1912d19533e8SHuawei Xie 
1913c83d2d00SOuyang Changchun 	signal(SIGINT, sigint_handler);
1914c83d2d00SOuyang Changchun 
1915d19533e8SHuawei Xie 	/* init EAL */
1916d19533e8SHuawei Xie 	ret = rte_eal_init(argc, argv);
1917d19533e8SHuawei Xie 	if (ret < 0)
1918d19533e8SHuawei Xie 		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1919d19533e8SHuawei Xie 	argc -= ret;
1920d19533e8SHuawei Xie 	argv += ret;
1921d19533e8SHuawei Xie 
192253d3f477SJiayu Hu 	/* initialize dma structures */
192353d3f477SJiayu Hu 	reset_dma();
192453d3f477SJiayu Hu 
1925d19533e8SHuawei Xie 	/* parse app arguments */
1926d19533e8SHuawei Xie 	ret = us_vhost_parse_args(argc, argv);
1927d19533e8SHuawei Xie 	if (ret < 0)
1928d19533e8SHuawei Xie 		rte_exit(EXIT_FAILURE, "Invalid argument\n");
1929d19533e8SHuawei Xie 
1930b3bee7d8SYong Wang 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
193145657a5cSYuanhan Liu 		TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
193245657a5cSYuanhan Liu 
1933d19533e8SHuawei Xie 		if (rte_lcore_is_enabled(lcore_id))
1934d19533e8SHuawei Xie 			lcore_ids[core_id++] = lcore_id;
1935b3bee7d8SYong Wang 	}
1936d19533e8SHuawei Xie 
1937d19533e8SHuawei Xie 	if (rte_lcore_count() > RTE_MAX_LCORE)
1938d19533e8SHuawei Xie 		rte_exit(EXIT_FAILURE,"Not enough cores\n");
1939d19533e8SHuawei Xie 
1940d19533e8SHuawei Xie 	/* Get the number of physical ports. */
1941d9a42a69SThomas Monjalon 	nb_ports = rte_eth_dev_count_avail();
1942d19533e8SHuawei Xie 
1943d19533e8SHuawei Xie 	/*
1944d19533e8SHuawei Xie 	 * Update the global var NUM_PORTS and global array PORTS
1945d19533e8SHuawei Xie 	 * and get value of var VALID_NUM_PORTS according to system ports number
1946d19533e8SHuawei Xie 	 */
1947d19533e8SHuawei Xie 	valid_num_ports = check_ports_num(nb_ports);
1948d19533e8SHuawei Xie 
1949d19533e8SHuawei Xie 	if ((valid_num_ports ==  0) || (valid_num_ports > MAX_SUP_PORTS)) {
1950d19533e8SHuawei Xie 		RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
1951d19533e8SHuawei Xie 			"but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
1952d19533e8SHuawei Xie 		return -1;
1953d19533e8SHuawei Xie 	}
1954d19533e8SHuawei Xie 
1955bdb19b77SYuanhan Liu 	/*
1956bdb19b77SYuanhan Liu 	 * FIXME: here we are trying to allocate mbufs big enough for
1957bdb19b77SYuanhan Liu 	 * @MAX_QUEUES, but the truth is we're never going to use that
1958bdb19b77SYuanhan Liu 	 * many queues here. We probably should only do allocation for
1959bdb19b77SYuanhan Liu 	 * those queues we are going to use.
1960bdb19b77SYuanhan Liu 	 */
1961917229c2SWenwu Ma 	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", total_num_mbufs,
1962917229c2SWenwu Ma 					    MBUF_CACHE_SIZE, 0, MBUF_DATA_SIZE,
1963917229c2SWenwu Ma 					    rte_socket_id());
1964917229c2SWenwu Ma 	if (mbuf_pool == NULL)
1965917229c2SWenwu Ma 		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
1966d19533e8SHuawei Xie 
1967d19533e8SHuawei Xie 	if (vm2vm_mode == VM2VM_HARDWARE) {
1968d19533e8SHuawei Xie 		/* Enable VT loop back to let L2 switch to do it. */
1969d19533e8SHuawei Xie 		vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
19701f49ec15SThomas Monjalon 		RTE_LOG(DEBUG, VHOST_CONFIG,
1971d19533e8SHuawei Xie 			"Enable loop back for L2 switch in vmdq.\n");
1972d19533e8SHuawei Xie 	}
1973d19533e8SHuawei Xie 
1974d19533e8SHuawei Xie 	/* initialize all ports */
19758728ccf3SThomas Monjalon 	RTE_ETH_FOREACH_DEV(portid) {
1976d19533e8SHuawei Xie 		/* skip ports that are not enabled */
1977d19533e8SHuawei Xie 		if ((enabled_port_mask & (1 << portid)) == 0) {
1978d19533e8SHuawei Xie 			RTE_LOG(INFO, VHOST_PORT,
1979d19533e8SHuawei Xie 				"Skipping disabled port %d\n", portid);
1980d19533e8SHuawei Xie 			continue;
1981d19533e8SHuawei Xie 		}
1982d19533e8SHuawei Xie 		if (port_init(portid) != 0)
1983d19533e8SHuawei Xie 			rte_exit(EXIT_FAILURE,
1984d19533e8SHuawei Xie 				"Cannot initialize network ports\n");
1985d19533e8SHuawei Xie 	}
1986d19533e8SHuawei Xie 
1987d19533e8SHuawei Xie 	/* Enable stats if the user option is set. */
198867b6d303SRavi Kerur 	if (enable_stats) {
1989fa204854SOlivier Matz 		ret = rte_ctrl_thread_create(&tid, "print-stats", NULL,
1990fa204854SOlivier Matz 					print_stats, NULL);
1991fa204854SOlivier Matz 		if (ret < 0)
199267b6d303SRavi Kerur 			rte_exit(EXIT_FAILURE,
199367b6d303SRavi Kerur 				"Cannot create print-stats thread\n");
199467b6d303SRavi Kerur 	}
1995d19533e8SHuawei Xie 
1996d19533e8SHuawei Xie 	/* Launch all data cores. */
1997cb056611SStephen Hemminger 	RTE_LCORE_FOREACH_WORKER(lcore_id)
199868363d85SYuanhan Liu 		rte_eal_remote_launch(switch_worker, NULL, lcore_id);
1999d19533e8SHuawei Xie 
20002345e3beSYuanhan Liu 	if (client_mode)
20012345e3beSYuanhan Liu 		flags |= RTE_VHOST_USER_CLIENT;
20022345e3beSYuanhan Liu 
200353d3f477SJiayu Hu 	for (i = 0; i < dma_count; i++) {
200453d3f477SJiayu Hu 		if (rte_vhost_async_dma_configure(dmas_id[i], 0) < 0) {
200553d3f477SJiayu Hu 			RTE_LOG(ERR, VHOST_PORT, "Failed to configure DMA in vhost.\n");
200653d3f477SJiayu Hu 			rte_exit(EXIT_FAILURE, "Cannot use given DMA device\n");
200753d3f477SJiayu Hu 		}
200853d3f477SJiayu Hu 	}
200953d3f477SJiayu Hu 
2010bde19a4dSJiayu Hu 	/* Register vhost user driver to handle vhost messages. */
2011ad0eef4dSJiayu Hu 	for (i = 0; i < nb_sockets; i++) {
20120917f9d1SYuanhan Liu 		char *file = socket_files + i * PATH_MAX;
2013a68ba8e0SCheng Jiang 
2014a543dcb7SXuan Ding 		if (dma_count && get_async_flag_by_socketid(i) != 0)
2015abec60e7SCheng Jiang 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
2016abec60e7SCheng Jiang 
20170917f9d1SYuanhan Liu 		ret = rte_vhost_driver_register(file, flags);
2018ad0eef4dSJiayu Hu 		if (ret != 0) {
2019ad0eef4dSJiayu Hu 			unregister_drivers(i);
2020ad0eef4dSJiayu Hu 			rte_exit(EXIT_FAILURE,
2021ad0eef4dSJiayu Hu 				"vhost driver register failure.\n");
2022ad0eef4dSJiayu Hu 		}
2023ca059fa5SYuanhan Liu 
2024ca059fa5SYuanhan Liu 		if (builtin_net_driver)
2025ca059fa5SYuanhan Liu 			rte_vhost_driver_set_features(file, VIRTIO_NET_FEATURES);
2026ca059fa5SYuanhan Liu 
20270917f9d1SYuanhan Liu 		if (mergeable == 0) {
20280917f9d1SYuanhan Liu 			rte_vhost_driver_disable_features(file,
20290917f9d1SYuanhan Liu 				1ULL << VIRTIO_NET_F_MRG_RXBUF);
20300917f9d1SYuanhan Liu 		}
20310917f9d1SYuanhan Liu 
20320917f9d1SYuanhan Liu 		if (enable_tx_csum == 0) {
20330917f9d1SYuanhan Liu 			rte_vhost_driver_disable_features(file,
20340917f9d1SYuanhan Liu 				1ULL << VIRTIO_NET_F_CSUM);
20350917f9d1SYuanhan Liu 		}
20360917f9d1SYuanhan Liu 
20370917f9d1SYuanhan Liu 		if (enable_tso == 0) {
20380917f9d1SYuanhan Liu 			rte_vhost_driver_disable_features(file,
20390917f9d1SYuanhan Liu 				1ULL << VIRTIO_NET_F_HOST_TSO4);
20400917f9d1SYuanhan Liu 			rte_vhost_driver_disable_features(file,
20410917f9d1SYuanhan Liu 				1ULL << VIRTIO_NET_F_HOST_TSO6);
20420917f9d1SYuanhan Liu 			rte_vhost_driver_disable_features(file,
20430917f9d1SYuanhan Liu 				1ULL << VIRTIO_NET_F_GUEST_TSO4);
20440917f9d1SYuanhan Liu 			rte_vhost_driver_disable_features(file,
20450917f9d1SYuanhan Liu 				1ULL << VIRTIO_NET_F_GUEST_TSO6);
20460917f9d1SYuanhan Liu 		}
20470917f9d1SYuanhan Liu 
20480917f9d1SYuanhan Liu 		if (promiscuous) {
20490917f9d1SYuanhan Liu 			rte_vhost_driver_enable_features(file,
20500917f9d1SYuanhan Liu 				1ULL << VIRTIO_NET_F_CTRL_RX);
20510917f9d1SYuanhan Liu 		}
2052d19533e8SHuawei Xie 
205393433b63SYuanhan Liu 		ret = rte_vhost_driver_callback_register(file,
205493433b63SYuanhan Liu 			&virtio_net_device_ops);
205593433b63SYuanhan Liu 		if (ret != 0) {
205693433b63SYuanhan Liu 			rte_exit(EXIT_FAILURE,
205793433b63SYuanhan Liu 				"failed to register vhost driver callbacks.\n");
205893433b63SYuanhan Liu 		}
2059af147591SYuanhan Liu 
2060af147591SYuanhan Liu 		if (rte_vhost_driver_start(file) < 0) {
2061af147591SYuanhan Liu 			rte_exit(EXIT_FAILURE,
2062af147591SYuanhan Liu 				"failed to start vhost driver.\n");
2063af147591SYuanhan Liu 		}
206493433b63SYuanhan Liu 	}
2065d19533e8SHuawei Xie 
2066cb056611SStephen Hemminger 	RTE_LCORE_FOREACH_WORKER(lcore_id)
2067af147591SYuanhan Liu 		rte_eal_wait_lcore(lcore_id);
2068af147591SYuanhan Liu 
206910aa3757SChengchang Tang 	/* clean up the EAL */
207010aa3757SChengchang Tang 	rte_eal_cleanup();
2071d19533e8SHuawei Xie 
207210aa3757SChengchang Tang 	return 0;
2073d19533e8SHuawei Xie }
2074