13998e2a0SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 23998e2a0SBruce Richardson * Copyright(c) 2010-2017 Intel Corporation 3d19533e8SHuawei Xie */ 4d19533e8SHuawei Xie 5d19533e8SHuawei Xie #include <arpa/inet.h> 6d19533e8SHuawei Xie #include <getopt.h> 7d19533e8SHuawei Xie #include <linux/if_ether.h> 8d19533e8SHuawei Xie #include <linux/if_vlan.h> 9d19533e8SHuawei Xie #include <linux/virtio_net.h> 10d19533e8SHuawei Xie #include <linux/virtio_ring.h> 11d19533e8SHuawei Xie #include <signal.h> 12d19533e8SHuawei Xie #include <stdint.h> 13d19533e8SHuawei Xie #include <sys/eventfd.h> 14d19533e8SHuawei Xie #include <sys/param.h> 15d19533e8SHuawei Xie #include <unistd.h> 16d19533e8SHuawei Xie 17d19533e8SHuawei Xie #include <rte_cycles.h> 18d19533e8SHuawei Xie #include <rte_ethdev.h> 19d19533e8SHuawei Xie #include <rte_log.h> 20d19533e8SHuawei Xie #include <rte_string_fns.h> 21d19533e8SHuawei Xie #include <rte_malloc.h> 22ca7036b4SDavid Marchand #include <rte_net.h> 23a798beb4SYuanhan Liu #include <rte_vhost.h> 24691693c6SJijiang Liu #include <rte_ip.h> 259fd72e3cSJijiang Liu #include <rte_tcp.h> 26577329e6SJerin Jacob #include <rte_pause.h> 2753d3f477SJiayu Hu #include <rte_dmadev.h> 2853d3f477SJiayu Hu #include <rte_vhost_async.h> 29d19533e8SHuawei Xie 30d19533e8SHuawei Xie #include "main.h" 31d19533e8SHuawei Xie 32f17eb179SBernard Iremonger #ifndef MAX_QUEUES 33f17eb179SBernard Iremonger #define MAX_QUEUES 128 34f17eb179SBernard Iremonger #endif 35d19533e8SHuawei Xie 36*917229c2SWenwu Ma #define NUM_MBUFS_DEFAULT 0x24000 37*917229c2SWenwu Ma 38d19533e8SHuawei Xie /* the maximum number of external ports supported */ 39d19533e8SHuawei Xie #define MAX_SUP_PORTS 1 40d19533e8SHuawei Xie 41d19533e8SHuawei Xie #define MBUF_CACHE_SIZE 128 42824cb29cSKonstantin Ananyev #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE 43d19533e8SHuawei Xie 44d19533e8SHuawei Xie #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 45d19533e8SHuawei Xie 46d19533e8SHuawei Xie #define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ 47d19533e8SHuawei Xie #define BURST_RX_RETRIES 4 /* Number of retries on RX. */ 48d19533e8SHuawei Xie 49d19533e8SHuawei Xie #define JUMBO_FRAME_MAX_SIZE 0x2600 501bb4a528SFerruh Yigit #define MAX_MTU (JUMBO_FRAME_MAX_SIZE - (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)) 51d19533e8SHuawei Xie 52d19533e8SHuawei Xie /* State of virtio device. */ 53d19533e8SHuawei Xie #define DEVICE_MAC_LEARNING 0 54d19533e8SHuawei Xie #define DEVICE_RX 1 55d19533e8SHuawei Xie #define DEVICE_SAFE_REMOVE 2 56d19533e8SHuawei Xie 57d19533e8SHuawei Xie /* Configurable number of RX/TX ring descriptors */ 58d19533e8SHuawei Xie #define RTE_TEST_RX_DESC_DEFAULT 1024 59d19533e8SHuawei Xie #define RTE_TEST_TX_DESC_DEFAULT 512 60d19533e8SHuawei Xie 61d19533e8SHuawei Xie #define INVALID_PORT_ID 0xFF 6253d3f477SJiayu Hu #define INVALID_DMA_ID -1 6353d3f477SJiayu Hu 6453d3f477SJiayu Hu #define DMA_RING_SIZE 4096 6553d3f477SJiayu Hu 66*917229c2SWenwu Ma /* number of mbufs in all pools - if specified on command-line. */ 67*917229c2SWenwu Ma static int total_num_mbufs = NUM_MBUFS_DEFAULT; 68*917229c2SWenwu Ma 6953d3f477SJiayu Hu struct dma_for_vhost dma_bind[RTE_MAX_VHOST_DEVICE]; 7053d3f477SJiayu Hu int16_t dmas_id[RTE_DMADEV_DEFAULT_MAX]; 7153d3f477SJiayu Hu static int dma_count; 72d19533e8SHuawei Xie 73d19533e8SHuawei Xie /* mask of enabled ports */ 74d19533e8SHuawei Xie static uint32_t enabled_port_mask = 0; 75d19533e8SHuawei Xie 7690924cafSOuyang Changchun /* Promiscuous mode */ 7790924cafSOuyang Changchun static uint32_t promiscuous; 7890924cafSOuyang Changchun 79d19533e8SHuawei Xie /* number of devices/queues to support*/ 80d19533e8SHuawei Xie static uint32_t num_queues = 0; 81a981294bSHuawei Xie static uint32_t num_devices; 82d19533e8SHuawei Xie 8368363d85SYuanhan Liu static struct rte_mempool *mbuf_pool; 8428deb020SHuawei Xie static int mergeable; 85d19533e8SHuawei Xie 86d19533e8SHuawei Xie /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */ 87d19533e8SHuawei Xie typedef enum { 88d19533e8SHuawei Xie VM2VM_DISABLED = 0, 89d19533e8SHuawei Xie VM2VM_SOFTWARE = 1, 90d19533e8SHuawei Xie VM2VM_HARDWARE = 2, 91d19533e8SHuawei Xie VM2VM_LAST 92d19533e8SHuawei Xie } vm2vm_type; 93d19533e8SHuawei Xie static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE; 94d19533e8SHuawei Xie 95d19533e8SHuawei Xie /* Enable stats. */ 96d19533e8SHuawei Xie static uint32_t enable_stats = 0; 97d19533e8SHuawei Xie /* Enable retries on RX. */ 98d19533e8SHuawei Xie static uint32_t enable_retry = 1; 999fd72e3cSJijiang Liu 1009fd72e3cSJijiang Liu /* Disable TX checksum offload */ 1019fd72e3cSJijiang Liu static uint32_t enable_tx_csum; 1029fd72e3cSJijiang Liu 1039fd72e3cSJijiang Liu /* Disable TSO offload */ 1049fd72e3cSJijiang Liu static uint32_t enable_tso; 1059fd72e3cSJijiang Liu 1062345e3beSYuanhan Liu static int client_mode; 1072345e3beSYuanhan Liu 108ca059fa5SYuanhan Liu static int builtin_net_driver; 109ca059fa5SYuanhan Liu 110d19533e8SHuawei Xie /* Specify timeout (in useconds) between retries on RX. */ 111d19533e8SHuawei Xie static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; 112d19533e8SHuawei Xie /* Specify the number of retries on RX. */ 113d19533e8SHuawei Xie static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; 114d19533e8SHuawei Xie 115ad0eef4dSJiayu Hu /* Socket file paths. Can be set by user */ 116ad0eef4dSJiayu Hu static char *socket_files; 117ad0eef4dSJiayu Hu static int nb_sockets; 118d19533e8SHuawei Xie 1197be78d02SJosh Soref /* empty VMDq configuration structure. Filled in programmatically */ 120d19533e8SHuawei Xie static struct rte_eth_conf vmdq_conf_default = { 121d19533e8SHuawei Xie .rxmode = { 122295968d1SFerruh Yigit .mq_mode = RTE_ETH_MQ_RX_VMDQ_ONLY, 123d19533e8SHuawei Xie .split_hdr_size = 0, 124d19533e8SHuawei Xie /* 125cc22d8caSShahaf Shuler * VLAN strip is necessary for 1G NIC such as I350, 126d19533e8SHuawei Xie * this fixes bug of ipv4 forwarding in guest can't 1277be78d02SJosh Soref * forward packets from one virtio dev to another virtio dev. 128d19533e8SHuawei Xie */ 129295968d1SFerruh Yigit .offloads = RTE_ETH_RX_OFFLOAD_VLAN_STRIP, 130d19533e8SHuawei Xie }, 131d19533e8SHuawei Xie 132d19533e8SHuawei Xie .txmode = { 133295968d1SFerruh Yigit .mq_mode = RTE_ETH_MQ_TX_NONE, 134295968d1SFerruh Yigit .offloads = (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | 135295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_TCP_CKSUM | 136295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_VLAN_INSERT | 137295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_MULTI_SEGS | 138295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_TCP_TSO), 139d19533e8SHuawei Xie }, 140d19533e8SHuawei Xie .rx_adv_conf = { 141d19533e8SHuawei Xie /* 142d19533e8SHuawei Xie * should be overridden separately in code with 143d19533e8SHuawei Xie * appropriate values 144d19533e8SHuawei Xie */ 145d19533e8SHuawei Xie .vmdq_rx_conf = { 146295968d1SFerruh Yigit .nb_queue_pools = RTE_ETH_8_POOLS, 147d19533e8SHuawei Xie .enable_default_pool = 0, 148d19533e8SHuawei Xie .default_pool = 0, 149d19533e8SHuawei Xie .nb_pool_maps = 0, 150d19533e8SHuawei Xie .pool_map = {{0, 0},}, 151d19533e8SHuawei Xie }, 152d19533e8SHuawei Xie }, 153d19533e8SHuawei Xie }; 154d19533e8SHuawei Xie 155cc22d8caSShahaf Shuler 156d19533e8SHuawei Xie static unsigned lcore_ids[RTE_MAX_LCORE]; 157f8244c63SZhiyong Yang static uint16_t ports[RTE_MAX_ETHPORTS]; 158d19533e8SHuawei Xie static unsigned num_ports = 0; /**< The number of ports specified in command line */ 15984b02d16SHuawei Xie static uint16_t num_pf_queues, num_vmdq_queues; 16084b02d16SHuawei Xie static uint16_t vmdq_pool_base, vmdq_queue_base; 16184b02d16SHuawei Xie static uint16_t queues_per_pool; 162d19533e8SHuawei Xie 163d19533e8SHuawei Xie const uint16_t vlan_tags[] = { 164d19533e8SHuawei Xie 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 165d19533e8SHuawei Xie 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 166d19533e8SHuawei Xie 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 167d19533e8SHuawei Xie 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 168d19533e8SHuawei Xie 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 169d19533e8SHuawei Xie 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 170d19533e8SHuawei Xie 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 171d19533e8SHuawei Xie 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 172d19533e8SHuawei Xie }; 173d19533e8SHuawei Xie 174d19533e8SHuawei Xie /* ethernet addresses of ports */ 1756d13ea8eSOlivier Matz static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; 176d19533e8SHuawei Xie 17745657a5cSYuanhan Liu static struct vhost_dev_tailq_list vhost_dev_list = 17845657a5cSYuanhan Liu TAILQ_HEAD_INITIALIZER(vhost_dev_list); 179d19533e8SHuawei Xie 180d19533e8SHuawei Xie static struct lcore_info lcore_info[RTE_MAX_LCORE]; 181d19533e8SHuawei Xie 182d19533e8SHuawei Xie /* Used for queueing bursts of TX packets. */ 183d19533e8SHuawei Xie struct mbuf_table { 184d19533e8SHuawei Xie unsigned len; 185d19533e8SHuawei Xie unsigned txq_id; 186d19533e8SHuawei Xie struct rte_mbuf *m_table[MAX_PKT_BURST]; 187d19533e8SHuawei Xie }; 188d19533e8SHuawei Xie 189a68ba8e0SCheng Jiang struct vhost_bufftable { 190a68ba8e0SCheng Jiang uint32_t len; 191a68ba8e0SCheng Jiang uint64_t pre_tsc; 192a68ba8e0SCheng Jiang struct rte_mbuf *m_table[MAX_PKT_BURST]; 193a68ba8e0SCheng Jiang }; 194a68ba8e0SCheng Jiang 195d19533e8SHuawei Xie /* TX queue for each data core. */ 196d19533e8SHuawei Xie struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; 197d19533e8SHuawei Xie 198a68ba8e0SCheng Jiang /* 199a68ba8e0SCheng Jiang * Vhost TX buffer for each data core. 200a68ba8e0SCheng Jiang * Every data core maintains a TX buffer for every vhost device, 201a68ba8e0SCheng Jiang * which is used for batch pkts enqueue for higher performance. 202a68ba8e0SCheng Jiang */ 20353d3f477SJiayu Hu struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * RTE_MAX_VHOST_DEVICE]; 204a68ba8e0SCheng Jiang 205273ecdbcSYuanhan Liu #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \ 206273ecdbcSYuanhan Liu / US_PER_S * BURST_TX_DRAIN_US) 207d19533e8SHuawei Xie 20853d3f477SJiayu Hu static inline bool 20953d3f477SJiayu Hu is_dma_configured(int16_t dev_id) 21053d3f477SJiayu Hu { 21153d3f477SJiayu Hu int i; 21253d3f477SJiayu Hu 21353d3f477SJiayu Hu for (i = 0; i < dma_count; i++) 21453d3f477SJiayu Hu if (dmas_id[i] == dev_id) 21553d3f477SJiayu Hu return true; 21653d3f477SJiayu Hu return false; 21753d3f477SJiayu Hu } 21853d3f477SJiayu Hu 2193a04ecb2SCheng Jiang static inline int 2203a04ecb2SCheng Jiang open_dma(const char *value) 2213a04ecb2SCheng Jiang { 22253d3f477SJiayu Hu struct dma_for_vhost *dma_info = dma_bind; 22353d3f477SJiayu Hu char *input = strndup(value, strlen(value) + 1); 22453d3f477SJiayu Hu char *addrs = input; 22553d3f477SJiayu Hu char *ptrs[2]; 22653d3f477SJiayu Hu char *start, *end, *substr; 22753d3f477SJiayu Hu int64_t vid; 2283a04ecb2SCheng Jiang 22953d3f477SJiayu Hu struct rte_dma_info info; 23053d3f477SJiayu Hu struct rte_dma_conf dev_config = { .nb_vchans = 1 }; 23153d3f477SJiayu Hu struct rte_dma_vchan_conf qconf = { 23253d3f477SJiayu Hu .direction = RTE_DMA_DIR_MEM_TO_MEM, 23353d3f477SJiayu Hu .nb_desc = DMA_RING_SIZE 23453d3f477SJiayu Hu }; 23553d3f477SJiayu Hu 23653d3f477SJiayu Hu int dev_id; 23753d3f477SJiayu Hu int ret = 0; 23853d3f477SJiayu Hu uint16_t i = 0; 23953d3f477SJiayu Hu char *dma_arg[RTE_MAX_VHOST_DEVICE]; 24053d3f477SJiayu Hu int args_nr; 24153d3f477SJiayu Hu 24253d3f477SJiayu Hu while (isblank(*addrs)) 24353d3f477SJiayu Hu addrs++; 24453d3f477SJiayu Hu if (*addrs == '\0') { 24553d3f477SJiayu Hu ret = -1; 24653d3f477SJiayu Hu goto out; 24753d3f477SJiayu Hu } 24853d3f477SJiayu Hu 24953d3f477SJiayu Hu /* process DMA devices within bracket. */ 25053d3f477SJiayu Hu addrs++; 25153d3f477SJiayu Hu substr = strtok(addrs, ";]"); 25253d3f477SJiayu Hu if (!substr) { 25353d3f477SJiayu Hu ret = -1; 25453d3f477SJiayu Hu goto out; 25553d3f477SJiayu Hu } 25653d3f477SJiayu Hu 25753d3f477SJiayu Hu args_nr = rte_strsplit(substr, strlen(substr), dma_arg, RTE_MAX_VHOST_DEVICE, ','); 25853d3f477SJiayu Hu if (args_nr <= 0) { 25953d3f477SJiayu Hu ret = -1; 26053d3f477SJiayu Hu goto out; 26153d3f477SJiayu Hu } 26253d3f477SJiayu Hu 26353d3f477SJiayu Hu while (i < args_nr) { 26453d3f477SJiayu Hu char *arg_temp = dma_arg[i]; 26553d3f477SJiayu Hu uint8_t sub_nr; 26653d3f477SJiayu Hu 26753d3f477SJiayu Hu sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@'); 26853d3f477SJiayu Hu if (sub_nr != 2) { 26953d3f477SJiayu Hu ret = -1; 27053d3f477SJiayu Hu goto out; 27153d3f477SJiayu Hu } 27253d3f477SJiayu Hu 27353d3f477SJiayu Hu start = strstr(ptrs[0], "txd"); 27453d3f477SJiayu Hu if (start == NULL) { 27553d3f477SJiayu Hu ret = -1; 27653d3f477SJiayu Hu goto out; 27753d3f477SJiayu Hu } 27853d3f477SJiayu Hu 27953d3f477SJiayu Hu start += 3; 28053d3f477SJiayu Hu vid = strtol(start, &end, 0); 28153d3f477SJiayu Hu if (end == start) { 28253d3f477SJiayu Hu ret = -1; 28353d3f477SJiayu Hu goto out; 28453d3f477SJiayu Hu } 28553d3f477SJiayu Hu 28653d3f477SJiayu Hu dev_id = rte_dma_get_dev_id_by_name(ptrs[1]); 28753d3f477SJiayu Hu if (dev_id < 0) { 28853d3f477SJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, "Fail to find DMA %s.\n", ptrs[1]); 28953d3f477SJiayu Hu ret = -1; 29053d3f477SJiayu Hu goto out; 29153d3f477SJiayu Hu } 29253d3f477SJiayu Hu 29353d3f477SJiayu Hu /* DMA device is already configured, so skip */ 29453d3f477SJiayu Hu if (is_dma_configured(dev_id)) 29553d3f477SJiayu Hu goto done; 29653d3f477SJiayu Hu 29753d3f477SJiayu Hu if (rte_dma_info_get(dev_id, &info) != 0) { 29853d3f477SJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, "Error with rte_dma_info_get()\n"); 29953d3f477SJiayu Hu ret = -1; 30053d3f477SJiayu Hu goto out; 30153d3f477SJiayu Hu } 30253d3f477SJiayu Hu 30353d3f477SJiayu Hu if (info.max_vchans < 1) { 30453d3f477SJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, "No channels available on device %d\n", dev_id); 30553d3f477SJiayu Hu ret = -1; 30653d3f477SJiayu Hu goto out; 30753d3f477SJiayu Hu } 30853d3f477SJiayu Hu 30953d3f477SJiayu Hu if (rte_dma_configure(dev_id, &dev_config) != 0) { 31053d3f477SJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, "Fail to configure DMA %d.\n", dev_id); 31153d3f477SJiayu Hu ret = -1; 31253d3f477SJiayu Hu goto out; 31353d3f477SJiayu Hu } 31453d3f477SJiayu Hu 31553d3f477SJiayu Hu /* Check the max desc supported by DMA device */ 31653d3f477SJiayu Hu rte_dma_info_get(dev_id, &info); 31753d3f477SJiayu Hu if (info.nb_vchans != 1) { 31853d3f477SJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, "No configured queues reported by DMA %d.\n", 31953d3f477SJiayu Hu dev_id); 32053d3f477SJiayu Hu ret = -1; 32153d3f477SJiayu Hu goto out; 32253d3f477SJiayu Hu } 32353d3f477SJiayu Hu 32453d3f477SJiayu Hu qconf.nb_desc = RTE_MIN(DMA_RING_SIZE, info.max_desc); 32553d3f477SJiayu Hu 32653d3f477SJiayu Hu if (rte_dma_vchan_setup(dev_id, 0, &qconf) != 0) { 32753d3f477SJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, "Fail to set up DMA %d.\n", dev_id); 32853d3f477SJiayu Hu ret = -1; 32953d3f477SJiayu Hu goto out; 33053d3f477SJiayu Hu } 33153d3f477SJiayu Hu 33253d3f477SJiayu Hu if (rte_dma_start(dev_id) != 0) { 33353d3f477SJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, "Fail to start DMA %u.\n", dev_id); 33453d3f477SJiayu Hu ret = -1; 33553d3f477SJiayu Hu goto out; 33653d3f477SJiayu Hu } 33753d3f477SJiayu Hu 33853d3f477SJiayu Hu dmas_id[dma_count++] = dev_id; 33953d3f477SJiayu Hu 34053d3f477SJiayu Hu done: 34153d3f477SJiayu Hu (dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id; 34253d3f477SJiayu Hu i++; 34353d3f477SJiayu Hu } 34453d3f477SJiayu Hu out: 34553d3f477SJiayu Hu free(input); 34653d3f477SJiayu Hu return ret; 3473a04ecb2SCheng Jiang } 3483a04ecb2SCheng Jiang 349d19533e8SHuawei Xie /* 350d19533e8SHuawei Xie * Builds up the correct configuration for VMDQ VLAN pool map 351d19533e8SHuawei Xie * according to the pool & queue limits. 352d19533e8SHuawei Xie */ 353d19533e8SHuawei Xie static inline int 354d19533e8SHuawei Xie get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices) 355d19533e8SHuawei Xie { 356d19533e8SHuawei Xie struct rte_eth_vmdq_rx_conf conf; 35790924cafSOuyang Changchun struct rte_eth_vmdq_rx_conf *def_conf = 35890924cafSOuyang Changchun &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf; 359d19533e8SHuawei Xie unsigned i; 360d19533e8SHuawei Xie 361d19533e8SHuawei Xie memset(&conf, 0, sizeof(conf)); 362d19533e8SHuawei Xie conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices; 363d19533e8SHuawei Xie conf.nb_pool_maps = num_devices; 36490924cafSOuyang Changchun conf.enable_loop_back = def_conf->enable_loop_back; 36590924cafSOuyang Changchun conf.rx_mode = def_conf->rx_mode; 366d19533e8SHuawei Xie 367d19533e8SHuawei Xie for (i = 0; i < conf.nb_pool_maps; i++) { 368d19533e8SHuawei Xie conf.pool_map[i].vlan_id = vlan_tags[ i ]; 369d19533e8SHuawei Xie conf.pool_map[i].pools = (1UL << i); 370d19533e8SHuawei Xie } 371d19533e8SHuawei Xie 372d19533e8SHuawei Xie (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); 373d19533e8SHuawei Xie (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, 374d19533e8SHuawei Xie sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); 375d19533e8SHuawei Xie return 0; 376d19533e8SHuawei Xie } 377d19533e8SHuawei Xie 378d19533e8SHuawei Xie /* 379d19533e8SHuawei Xie * Initialises a given port using global settings and with the rx buffers 380d19533e8SHuawei Xie * coming from the mbuf_pool passed as parameter 381d19533e8SHuawei Xie */ 382d19533e8SHuawei Xie static inline int 383f8244c63SZhiyong Yang port_init(uint16_t port) 384d19533e8SHuawei Xie { 385d19533e8SHuawei Xie struct rte_eth_dev_info dev_info; 386d19533e8SHuawei Xie struct rte_eth_conf port_conf; 387db4014f2SHuawei Xie struct rte_eth_rxconf *rxconf; 388db4014f2SHuawei Xie struct rte_eth_txconf *txconf; 389db4014f2SHuawei Xie int16_t rx_rings, tx_rings; 390d19533e8SHuawei Xie uint16_t rx_ring_size, tx_ring_size; 391d19533e8SHuawei Xie int retval; 392d19533e8SHuawei Xie uint16_t q; 393d19533e8SHuawei Xie 394d19533e8SHuawei Xie /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */ 39537fb306cSIvan Ilchenko retval = rte_eth_dev_info_get(port, &dev_info); 39637fb306cSIvan Ilchenko if (retval != 0) { 39737fb306cSIvan Ilchenko RTE_LOG(ERR, VHOST_PORT, 39837fb306cSIvan Ilchenko "Error during getting device (port %u) info: %s\n", 39937fb306cSIvan Ilchenko port, strerror(-retval)); 40037fb306cSIvan Ilchenko 40137fb306cSIvan Ilchenko return retval; 40237fb306cSIvan Ilchenko } 403d19533e8SHuawei Xie 404db4014f2SHuawei Xie rxconf = &dev_info.default_rxconf; 405db4014f2SHuawei Xie txconf = &dev_info.default_txconf; 406db4014f2SHuawei Xie rxconf->rx_drop_en = 1; 407f0adccd4SOuyang Changchun 408d19533e8SHuawei Xie /*configure the number of supported virtio devices based on VMDQ limits */ 409d19533e8SHuawei Xie num_devices = dev_info.max_vmdq_pools; 410d19533e8SHuawei Xie 411d19533e8SHuawei Xie rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; 412d19533e8SHuawei Xie tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; 41300b8b706SYuanhan Liu 414d19533e8SHuawei Xie tx_rings = (uint16_t)rte_lcore_count(); 415d19533e8SHuawei Xie 4165932109aSChenbo Xia if (mergeable) { 4175932109aSChenbo Xia if (dev_info.max_mtu != UINT16_MAX && dev_info.max_rx_pktlen > dev_info.max_mtu) 4185932109aSChenbo Xia vmdq_conf_default.rxmode.mtu = dev_info.max_mtu; 4195932109aSChenbo Xia else 4205932109aSChenbo Xia vmdq_conf_default.rxmode.mtu = MAX_MTU; 4215932109aSChenbo Xia } 4225932109aSChenbo Xia 423d19533e8SHuawei Xie /* Get port configuration. */ 424d19533e8SHuawei Xie retval = get_eth_conf(&port_conf, num_devices); 425d19533e8SHuawei Xie if (retval < 0) 426d19533e8SHuawei Xie return retval; 42784b02d16SHuawei Xie /* NIC queues are divided into pf queues and vmdq queues. */ 42884b02d16SHuawei Xie num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; 42984b02d16SHuawei Xie queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; 43084b02d16SHuawei Xie num_vmdq_queues = num_devices * queues_per_pool; 43184b02d16SHuawei Xie num_queues = num_pf_queues + num_vmdq_queues; 43284b02d16SHuawei Xie vmdq_queue_base = dev_info.vmdq_queue_base; 43384b02d16SHuawei Xie vmdq_pool_base = dev_info.vmdq_pool_base; 43484b02d16SHuawei Xie printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n", 43584b02d16SHuawei Xie num_pf_queues, num_devices, queues_per_pool); 436d19533e8SHuawei Xie 437a9dbe180SThomas Monjalon if (!rte_eth_dev_is_valid_port(port)) 438a9dbe180SThomas Monjalon return -1; 439d19533e8SHuawei Xie 44084b02d16SHuawei Xie rx_rings = (uint16_t)dev_info.max_rx_queues; 441295968d1SFerruh Yigit if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) 442cc22d8caSShahaf Shuler port_conf.txmode.offloads |= 443295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 444d19533e8SHuawei Xie /* Configure ethernet device. */ 445d19533e8SHuawei Xie retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); 446bb7085b4SJianfeng Tan if (retval != 0) { 447bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n", 448bb7085b4SJianfeng Tan port, strerror(-retval)); 449d19533e8SHuawei Xie return retval; 450bb7085b4SJianfeng Tan } 451d19533e8SHuawei Xie 45260efb44fSRoman Zhukov retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size, 45360efb44fSRoman Zhukov &tx_ring_size); 45460efb44fSRoman Zhukov if (retval != 0) { 45560efb44fSRoman Zhukov RTE_LOG(ERR, VHOST_PORT, "Failed to adjust number of descriptors " 45660efb44fSRoman Zhukov "for port %u: %s.\n", port, strerror(-retval)); 45760efb44fSRoman Zhukov return retval; 45860efb44fSRoman Zhukov } 45960efb44fSRoman Zhukov if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT) { 46060efb44fSRoman Zhukov RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size " 46160efb44fSRoman Zhukov "for Rx queues on port %u.\n", port); 46260efb44fSRoman Zhukov return -1; 46360efb44fSRoman Zhukov } 46460efb44fSRoman Zhukov 465d19533e8SHuawei Xie /* Setup the queues. */ 466cc22d8caSShahaf Shuler rxconf->offloads = port_conf.rxmode.offloads; 467d19533e8SHuawei Xie for (q = 0; q < rx_rings; q ++) { 468d19533e8SHuawei Xie retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, 469db4014f2SHuawei Xie rte_eth_dev_socket_id(port), 470db4014f2SHuawei Xie rxconf, 47168363d85SYuanhan Liu mbuf_pool); 472bb7085b4SJianfeng Tan if (retval < 0) { 473bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, 474bb7085b4SJianfeng Tan "Failed to setup rx queue %u of port %u: %s.\n", 475bb7085b4SJianfeng Tan q, port, strerror(-retval)); 476d19533e8SHuawei Xie return retval; 477d19533e8SHuawei Xie } 478bb7085b4SJianfeng Tan } 479cc22d8caSShahaf Shuler txconf->offloads = port_conf.txmode.offloads; 480d19533e8SHuawei Xie for (q = 0; q < tx_rings; q ++) { 481d19533e8SHuawei Xie retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, 482db4014f2SHuawei Xie rte_eth_dev_socket_id(port), 483db4014f2SHuawei Xie txconf); 484bb7085b4SJianfeng Tan if (retval < 0) { 485bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, 486bb7085b4SJianfeng Tan "Failed to setup tx queue %u of port %u: %s.\n", 487bb7085b4SJianfeng Tan q, port, strerror(-retval)); 488d19533e8SHuawei Xie return retval; 489d19533e8SHuawei Xie } 490bb7085b4SJianfeng Tan } 491d19533e8SHuawei Xie 492d19533e8SHuawei Xie /* Start the device. */ 493d19533e8SHuawei Xie retval = rte_eth_dev_start(port); 494d19533e8SHuawei Xie if (retval < 0) { 495bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n", 496bb7085b4SJianfeng Tan port, strerror(-retval)); 497d19533e8SHuawei Xie return retval; 498d19533e8SHuawei Xie } 499d19533e8SHuawei Xie 500f430bbceSIvan Ilchenko if (promiscuous) { 501f430bbceSIvan Ilchenko retval = rte_eth_promiscuous_enable(port); 502f430bbceSIvan Ilchenko if (retval != 0) { 503f430bbceSIvan Ilchenko RTE_LOG(ERR, VHOST_PORT, 504f430bbceSIvan Ilchenko "Failed to enable promiscuous mode on port %u: %s\n", 505f430bbceSIvan Ilchenko port, rte_strerror(-retval)); 506f430bbceSIvan Ilchenko return retval; 507f430bbceSIvan Ilchenko } 508f430bbceSIvan Ilchenko } 50990924cafSOuyang Changchun 51070febdcfSIgor Romanov retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); 51170febdcfSIgor Romanov if (retval < 0) { 51270febdcfSIgor Romanov RTE_LOG(ERR, VHOST_PORT, 51370febdcfSIgor Romanov "Failed to get MAC address on port %u: %s\n", 51470febdcfSIgor Romanov port, rte_strerror(-retval)); 51570febdcfSIgor Romanov return retval; 51670febdcfSIgor Romanov } 51770febdcfSIgor Romanov 518d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices); 519d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 520d19533e8SHuawei Xie " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 521a7db3afcSAman Deep Singh port, RTE_ETHER_ADDR_BYTES(&vmdq_ports_eth_addr[port])); 522d19533e8SHuawei Xie 523d19533e8SHuawei Xie return 0; 524d19533e8SHuawei Xie } 525d19533e8SHuawei Xie 526d19533e8SHuawei Xie /* 527bde19a4dSJiayu Hu * Set socket file path. 528d19533e8SHuawei Xie */ 529d19533e8SHuawei Xie static int 530bde19a4dSJiayu Hu us_vhost_parse_socket_path(const char *q_arg) 531d19533e8SHuawei Xie { 532d79035b7STiwei Bie char *old; 533d79035b7STiwei Bie 534d19533e8SHuawei Xie /* parse number string */ 535fa81d3b9SGang Jiang if (strnlen(q_arg, PATH_MAX) == PATH_MAX) 536d19533e8SHuawei Xie return -1; 537ad0eef4dSJiayu Hu 538d79035b7STiwei Bie old = socket_files; 539ad0eef4dSJiayu Hu socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1)); 540d79035b7STiwei Bie if (socket_files == NULL) { 541d79035b7STiwei Bie free(old); 542d79035b7STiwei Bie return -1; 543d79035b7STiwei Bie } 544d79035b7STiwei Bie 545f9acaf84SBruce Richardson strlcpy(socket_files + nb_sockets * PATH_MAX, q_arg, PATH_MAX); 546ad0eef4dSJiayu Hu nb_sockets++; 547d19533e8SHuawei Xie 548d19533e8SHuawei Xie return 0; 549d19533e8SHuawei Xie } 550d19533e8SHuawei Xie 551d19533e8SHuawei Xie /* 552d19533e8SHuawei Xie * Parse the portmask provided at run time. 553d19533e8SHuawei Xie */ 554d19533e8SHuawei Xie static int 555d19533e8SHuawei Xie parse_portmask(const char *portmask) 556d19533e8SHuawei Xie { 557d19533e8SHuawei Xie char *end = NULL; 558d19533e8SHuawei Xie unsigned long pm; 559d19533e8SHuawei Xie 560d19533e8SHuawei Xie errno = 0; 561d19533e8SHuawei Xie 562d19533e8SHuawei Xie /* parse hexadecimal string */ 563d19533e8SHuawei Xie pm = strtoul(portmask, &end, 16); 564d19533e8SHuawei Xie if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) 565ce6b8c31SSarosh Arif return 0; 566d19533e8SHuawei Xie 567d19533e8SHuawei Xie return pm; 568d19533e8SHuawei Xie 569d19533e8SHuawei Xie } 570d19533e8SHuawei Xie 571d19533e8SHuawei Xie /* 572d19533e8SHuawei Xie * Parse num options at run time. 573d19533e8SHuawei Xie */ 574d19533e8SHuawei Xie static int 575d19533e8SHuawei Xie parse_num_opt(const char *q_arg, uint32_t max_valid_value) 576d19533e8SHuawei Xie { 577d19533e8SHuawei Xie char *end = NULL; 578d19533e8SHuawei Xie unsigned long num; 579d19533e8SHuawei Xie 580d19533e8SHuawei Xie errno = 0; 581d19533e8SHuawei Xie 582d19533e8SHuawei Xie /* parse unsigned int string */ 583d19533e8SHuawei Xie num = strtoul(q_arg, &end, 10); 584d19533e8SHuawei Xie if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) 585d19533e8SHuawei Xie return -1; 586d19533e8SHuawei Xie 587d19533e8SHuawei Xie if (num > max_valid_value) 588d19533e8SHuawei Xie return -1; 589d19533e8SHuawei Xie 590d19533e8SHuawei Xie return num; 591d19533e8SHuawei Xie 592d19533e8SHuawei Xie } 593d19533e8SHuawei Xie 594d19533e8SHuawei Xie /* 595d19533e8SHuawei Xie * Display usage 596d19533e8SHuawei Xie */ 597d19533e8SHuawei Xie static void 598d19533e8SHuawei Xie us_vhost_usage(const char *prgname) 599d19533e8SHuawei Xie { 600d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n" 601d19533e8SHuawei Xie " --vm2vm [0|1|2]\n" 602d19533e8SHuawei Xie " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n" 603bde19a4dSJiayu Hu " --socket-file <path>\n" 604d19533e8SHuawei Xie " --nb-devices ND\n" 605d19533e8SHuawei Xie " -p PORTMASK: Set mask for ports to be used by application\n" 606d19533e8SHuawei Xie " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n" 6077be78d02SJosh Soref " --rx-retry [0|1]: disable/enable(default) retries on Rx. Enable retry if destination queue is full\n" 608d19533e8SHuawei Xie " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n" 609d19533e8SHuawei Xie " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n" 610d19533e8SHuawei Xie " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n" 611d19533e8SHuawei Xie " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" 612bde19a4dSJiayu Hu " --socket-file: The path of the socket file.\n" 6139fd72e3cSJijiang Liu " --tx-csum [0|1] disable/enable TX checksum offload.\n" 6142345e3beSYuanhan Liu " --tso [0|1] disable/enable TCP segment offload.\n" 6153a04ecb2SCheng Jiang " --client register a vhost-user socket as client mode.\n" 616*917229c2SWenwu Ma " --dmas register dma channel for specific vhost device.\n" 617*917229c2SWenwu Ma " --total-num-mbufs [0-N] set the number of mbufs to be allocated in mbuf pools, the default value is 147456.\n", 618d19533e8SHuawei Xie prgname); 619d19533e8SHuawei Xie } 620d19533e8SHuawei Xie 621965b06f0SIbtisam Tariq enum { 622965b06f0SIbtisam Tariq #define OPT_VM2VM "vm2vm" 623965b06f0SIbtisam Tariq OPT_VM2VM_NUM = 256, 624965b06f0SIbtisam Tariq #define OPT_RX_RETRY "rx-retry" 625965b06f0SIbtisam Tariq OPT_RX_RETRY_NUM, 626965b06f0SIbtisam Tariq #define OPT_RX_RETRY_DELAY "rx-retry-delay" 627965b06f0SIbtisam Tariq OPT_RX_RETRY_DELAY_NUM, 628965b06f0SIbtisam Tariq #define OPT_RX_RETRY_NUMB "rx-retry-num" 629965b06f0SIbtisam Tariq OPT_RX_RETRY_NUMB_NUM, 630965b06f0SIbtisam Tariq #define OPT_MERGEABLE "mergeable" 631965b06f0SIbtisam Tariq OPT_MERGEABLE_NUM, 632965b06f0SIbtisam Tariq #define OPT_STATS "stats" 633965b06f0SIbtisam Tariq OPT_STATS_NUM, 634965b06f0SIbtisam Tariq #define OPT_SOCKET_FILE "socket-file" 635965b06f0SIbtisam Tariq OPT_SOCKET_FILE_NUM, 636965b06f0SIbtisam Tariq #define OPT_TX_CSUM "tx-csum" 637965b06f0SIbtisam Tariq OPT_TX_CSUM_NUM, 638965b06f0SIbtisam Tariq #define OPT_TSO "tso" 639965b06f0SIbtisam Tariq OPT_TSO_NUM, 640965b06f0SIbtisam Tariq #define OPT_CLIENT "client" 641965b06f0SIbtisam Tariq OPT_CLIENT_NUM, 642965b06f0SIbtisam Tariq #define OPT_BUILTIN_NET_DRIVER "builtin-net-driver" 643965b06f0SIbtisam Tariq OPT_BUILTIN_NET_DRIVER_NUM, 644965b06f0SIbtisam Tariq #define OPT_DMAS "dmas" 645965b06f0SIbtisam Tariq OPT_DMAS_NUM, 646*917229c2SWenwu Ma #define OPT_NUM_MBUFS "total-num-mbufs" 647*917229c2SWenwu Ma OPT_NUM_MBUFS_NUM, 648965b06f0SIbtisam Tariq }; 649965b06f0SIbtisam Tariq 650d19533e8SHuawei Xie /* 651d19533e8SHuawei Xie * Parse the arguments given in the command line of the application. 652d19533e8SHuawei Xie */ 653d19533e8SHuawei Xie static int 654d19533e8SHuawei Xie us_vhost_parse_args(int argc, char **argv) 655d19533e8SHuawei Xie { 656d19533e8SHuawei Xie int opt, ret; 657d19533e8SHuawei Xie int option_index; 658d19533e8SHuawei Xie unsigned i; 659d19533e8SHuawei Xie const char *prgname = argv[0]; 660d19533e8SHuawei Xie static struct option long_option[] = { 661965b06f0SIbtisam Tariq {OPT_VM2VM, required_argument, 662965b06f0SIbtisam Tariq NULL, OPT_VM2VM_NUM}, 663965b06f0SIbtisam Tariq {OPT_RX_RETRY, required_argument, 664965b06f0SIbtisam Tariq NULL, OPT_RX_RETRY_NUM}, 665965b06f0SIbtisam Tariq {OPT_RX_RETRY_DELAY, required_argument, 666965b06f0SIbtisam Tariq NULL, OPT_RX_RETRY_DELAY_NUM}, 667965b06f0SIbtisam Tariq {OPT_RX_RETRY_NUMB, required_argument, 668965b06f0SIbtisam Tariq NULL, OPT_RX_RETRY_NUMB_NUM}, 669965b06f0SIbtisam Tariq {OPT_MERGEABLE, required_argument, 670965b06f0SIbtisam Tariq NULL, OPT_MERGEABLE_NUM}, 671965b06f0SIbtisam Tariq {OPT_STATS, required_argument, 672965b06f0SIbtisam Tariq NULL, OPT_STATS_NUM}, 673965b06f0SIbtisam Tariq {OPT_SOCKET_FILE, required_argument, 674965b06f0SIbtisam Tariq NULL, OPT_SOCKET_FILE_NUM}, 675965b06f0SIbtisam Tariq {OPT_TX_CSUM, required_argument, 676965b06f0SIbtisam Tariq NULL, OPT_TX_CSUM_NUM}, 677965b06f0SIbtisam Tariq {OPT_TSO, required_argument, 678965b06f0SIbtisam Tariq NULL, OPT_TSO_NUM}, 679965b06f0SIbtisam Tariq {OPT_CLIENT, no_argument, 680965b06f0SIbtisam Tariq NULL, OPT_CLIENT_NUM}, 681965b06f0SIbtisam Tariq {OPT_BUILTIN_NET_DRIVER, no_argument, 682965b06f0SIbtisam Tariq NULL, OPT_BUILTIN_NET_DRIVER_NUM}, 683965b06f0SIbtisam Tariq {OPT_DMAS, required_argument, 684965b06f0SIbtisam Tariq NULL, OPT_DMAS_NUM}, 685*917229c2SWenwu Ma {OPT_NUM_MBUFS, required_argument, 686*917229c2SWenwu Ma NULL, OPT_NUM_MBUFS_NUM}, 687d19533e8SHuawei Xie {NULL, 0, 0, 0}, 688d19533e8SHuawei Xie }; 689d19533e8SHuawei Xie 690d19533e8SHuawei Xie /* Parse command line */ 69190924cafSOuyang Changchun while ((opt = getopt_long(argc, argv, "p:P", 69290924cafSOuyang Changchun long_option, &option_index)) != EOF) { 693d19533e8SHuawei Xie switch (opt) { 694d19533e8SHuawei Xie /* Portmask */ 695d19533e8SHuawei Xie case 'p': 696d19533e8SHuawei Xie enabled_port_mask = parse_portmask(optarg); 697d19533e8SHuawei Xie if (enabled_port_mask == 0) { 698d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n"); 699d19533e8SHuawei Xie us_vhost_usage(prgname); 700d19533e8SHuawei Xie return -1; 701d19533e8SHuawei Xie } 702d19533e8SHuawei Xie break; 703d19533e8SHuawei Xie 70490924cafSOuyang Changchun case 'P': 70590924cafSOuyang Changchun promiscuous = 1; 70690924cafSOuyang Changchun vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode = 707295968d1SFerruh Yigit RTE_ETH_VMDQ_ACCEPT_BROADCAST | 708295968d1SFerruh Yigit RTE_ETH_VMDQ_ACCEPT_MULTICAST; 70990924cafSOuyang Changchun break; 71090924cafSOuyang Changchun 711965b06f0SIbtisam Tariq case OPT_VM2VM_NUM: 712d19533e8SHuawei Xie ret = parse_num_opt(optarg, (VM2VM_LAST - 1)); 713d19533e8SHuawei Xie if (ret == -1) { 714d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, 715d19533e8SHuawei Xie "Invalid argument for " 716d19533e8SHuawei Xie "vm2vm [0|1|2]\n"); 717d19533e8SHuawei Xie us_vhost_usage(prgname); 718d19533e8SHuawei Xie return -1; 719965b06f0SIbtisam Tariq } 720d19533e8SHuawei Xie vm2vm_mode = (vm2vm_type)ret; 721965b06f0SIbtisam Tariq break; 722d19533e8SHuawei Xie 723965b06f0SIbtisam Tariq case OPT_RX_RETRY_NUM: 724d19533e8SHuawei Xie ret = parse_num_opt(optarg, 1); 725d19533e8SHuawei Xie if (ret == -1) { 726d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n"); 727d19533e8SHuawei Xie us_vhost_usage(prgname); 728d19533e8SHuawei Xie return -1; 729965b06f0SIbtisam Tariq } 730d19533e8SHuawei Xie enable_retry = ret; 731965b06f0SIbtisam Tariq break; 732d19533e8SHuawei Xie 733965b06f0SIbtisam Tariq case OPT_TX_CSUM_NUM: 7349fd72e3cSJijiang Liu ret = parse_num_opt(optarg, 1); 7359fd72e3cSJijiang Liu if (ret == -1) { 7369fd72e3cSJijiang Liu RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n"); 7379fd72e3cSJijiang Liu us_vhost_usage(prgname); 7389fd72e3cSJijiang Liu return -1; 7399fd72e3cSJijiang Liu } 740965b06f0SIbtisam Tariq enable_tx_csum = ret; 741965b06f0SIbtisam Tariq break; 7429fd72e3cSJijiang Liu 743965b06f0SIbtisam Tariq case OPT_TSO_NUM: 7449fd72e3cSJijiang Liu ret = parse_num_opt(optarg, 1); 7459fd72e3cSJijiang Liu if (ret == -1) { 7469fd72e3cSJijiang Liu RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n"); 7479fd72e3cSJijiang Liu us_vhost_usage(prgname); 7489fd72e3cSJijiang Liu return -1; 7499fd72e3cSJijiang Liu } 750965b06f0SIbtisam Tariq enable_tso = ret; 751965b06f0SIbtisam Tariq break; 7529fd72e3cSJijiang Liu 753965b06f0SIbtisam Tariq case OPT_RX_RETRY_DELAY_NUM: 754d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 755d19533e8SHuawei Xie if (ret == -1) { 756d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n"); 757d19533e8SHuawei Xie us_vhost_usage(prgname); 758d19533e8SHuawei Xie return -1; 759965b06f0SIbtisam Tariq } 760d19533e8SHuawei Xie burst_rx_delay_time = ret; 761965b06f0SIbtisam Tariq break; 762d19533e8SHuawei Xie 763965b06f0SIbtisam Tariq case OPT_RX_RETRY_NUMB_NUM: 764d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 765d19533e8SHuawei Xie if (ret == -1) { 766d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n"); 767d19533e8SHuawei Xie us_vhost_usage(prgname); 768d19533e8SHuawei Xie return -1; 769965b06f0SIbtisam Tariq } 770d19533e8SHuawei Xie burst_rx_retry_num = ret; 771965b06f0SIbtisam Tariq break; 772d19533e8SHuawei Xie 773965b06f0SIbtisam Tariq case OPT_MERGEABLE_NUM: 774d19533e8SHuawei Xie ret = parse_num_opt(optarg, 1); 775d19533e8SHuawei Xie if (ret == -1) { 776d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n"); 777d19533e8SHuawei Xie us_vhost_usage(prgname); 778d19533e8SHuawei Xie return -1; 779965b06f0SIbtisam Tariq } 78028deb020SHuawei Xie mergeable = !!ret; 781965b06f0SIbtisam Tariq break; 782d19533e8SHuawei Xie 783965b06f0SIbtisam Tariq case OPT_STATS_NUM: 784d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 785d19533e8SHuawei Xie if (ret == -1) { 786bde19a4dSJiayu Hu RTE_LOG(INFO, VHOST_CONFIG, 787bde19a4dSJiayu Hu "Invalid argument for stats [0..N]\n"); 788d19533e8SHuawei Xie us_vhost_usage(prgname); 789d19533e8SHuawei Xie return -1; 790965b06f0SIbtisam Tariq } 791d19533e8SHuawei Xie enable_stats = ret; 792965b06f0SIbtisam Tariq break; 793d19533e8SHuawei Xie 794bde19a4dSJiayu Hu /* Set socket file path. */ 795965b06f0SIbtisam Tariq case OPT_SOCKET_FILE_NUM: 796bde19a4dSJiayu Hu if (us_vhost_parse_socket_path(optarg) == -1) { 797bde19a4dSJiayu Hu RTE_LOG(INFO, VHOST_CONFIG, 798bde19a4dSJiayu Hu "Invalid argument for socket name (Max %d characters)\n", 799bde19a4dSJiayu Hu PATH_MAX); 800d19533e8SHuawei Xie us_vhost_usage(prgname); 801d19533e8SHuawei Xie return -1; 802d19533e8SHuawei Xie } 803965b06f0SIbtisam Tariq break; 804d19533e8SHuawei Xie 805965b06f0SIbtisam Tariq case OPT_DMAS_NUM: 8063a04ecb2SCheng Jiang if (open_dma(optarg) == -1) { 8073a04ecb2SCheng Jiang RTE_LOG(INFO, VHOST_CONFIG, 8083a04ecb2SCheng Jiang "Wrong DMA args\n"); 8093a04ecb2SCheng Jiang us_vhost_usage(prgname); 8103a04ecb2SCheng Jiang return -1; 8113a04ecb2SCheng Jiang } 812965b06f0SIbtisam Tariq break; 8133a04ecb2SCheng Jiang 814*917229c2SWenwu Ma case OPT_NUM_MBUFS_NUM: 815*917229c2SWenwu Ma ret = parse_num_opt(optarg, INT32_MAX); 816*917229c2SWenwu Ma if (ret == -1) { 817*917229c2SWenwu Ma RTE_LOG(INFO, VHOST_CONFIG, 818*917229c2SWenwu Ma "Invalid argument for total-num-mbufs [0..N]\n"); 819*917229c2SWenwu Ma us_vhost_usage(prgname); 820*917229c2SWenwu Ma return -1; 821*917229c2SWenwu Ma } 822*917229c2SWenwu Ma 823*917229c2SWenwu Ma if (total_num_mbufs < ret) 824*917229c2SWenwu Ma total_num_mbufs = ret; 825*917229c2SWenwu Ma break; 826*917229c2SWenwu Ma 827965b06f0SIbtisam Tariq case OPT_CLIENT_NUM: 828965b06f0SIbtisam Tariq client_mode = 1; 829965b06f0SIbtisam Tariq break; 830965b06f0SIbtisam Tariq 831965b06f0SIbtisam Tariq case OPT_BUILTIN_NET_DRIVER_NUM: 832965b06f0SIbtisam Tariq builtin_net_driver = 1; 833d19533e8SHuawei Xie break; 834d19533e8SHuawei Xie 835d19533e8SHuawei Xie /* Invalid option - print options. */ 836d19533e8SHuawei Xie default: 837d19533e8SHuawei Xie us_vhost_usage(prgname); 838d19533e8SHuawei Xie return -1; 839d19533e8SHuawei Xie } 840d19533e8SHuawei Xie } 841d19533e8SHuawei Xie 842d19533e8SHuawei Xie for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 843d19533e8SHuawei Xie if (enabled_port_mask & (1 << i)) 844f8244c63SZhiyong Yang ports[num_ports++] = i; 845d19533e8SHuawei Xie } 846d19533e8SHuawei Xie 847d19533e8SHuawei Xie if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) { 848d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," 849d19533e8SHuawei Xie "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); 850d19533e8SHuawei Xie return -1; 851d19533e8SHuawei Xie } 852d19533e8SHuawei Xie 853d19533e8SHuawei Xie return 0; 854d19533e8SHuawei Xie } 855d19533e8SHuawei Xie 856d19533e8SHuawei Xie /* 857d19533e8SHuawei Xie * Update the global var NUM_PORTS and array PORTS according to system ports number 858d19533e8SHuawei Xie * and return valid ports number 859d19533e8SHuawei Xie */ 860d19533e8SHuawei Xie static unsigned check_ports_num(unsigned nb_ports) 861d19533e8SHuawei Xie { 862d19533e8SHuawei Xie unsigned valid_num_ports = num_ports; 863d19533e8SHuawei Xie unsigned portid; 864d19533e8SHuawei Xie 865d19533e8SHuawei Xie if (num_ports > nb_ports) { 866d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n", 867d19533e8SHuawei Xie num_ports, nb_ports); 868d19533e8SHuawei Xie num_ports = nb_ports; 869d19533e8SHuawei Xie } 870d19533e8SHuawei Xie 871d19533e8SHuawei Xie for (portid = 0; portid < num_ports; portid ++) { 872a9dbe180SThomas Monjalon if (!rte_eth_dev_is_valid_port(ports[portid])) { 873a9dbe180SThomas Monjalon RTE_LOG(INFO, VHOST_PORT, 874a9dbe180SThomas Monjalon "\nSpecified port ID(%u) is not valid\n", 875a9dbe180SThomas Monjalon ports[portid]); 876d19533e8SHuawei Xie ports[portid] = INVALID_PORT_ID; 877d19533e8SHuawei Xie valid_num_ports--; 878d19533e8SHuawei Xie } 879d19533e8SHuawei Xie } 880d19533e8SHuawei Xie return valid_num_ports; 881d19533e8SHuawei Xie } 882d19533e8SHuawei Xie 883c0583d98SJerin Jacob static __rte_always_inline struct vhost_dev * 8846d13ea8eSOlivier Matz find_vhost_dev(struct rte_ether_addr *mac) 88545657a5cSYuanhan Liu { 88645657a5cSYuanhan Liu struct vhost_dev *vdev; 88745657a5cSYuanhan Liu 88897daf19eSYuanhan Liu TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 88945657a5cSYuanhan Liu if (vdev->ready == DEVICE_RX && 890538da7a1SOlivier Matz rte_is_same_ether_addr(mac, &vdev->mac_address)) 89145657a5cSYuanhan Liu return vdev; 89245657a5cSYuanhan Liu } 89345657a5cSYuanhan Liu 89445657a5cSYuanhan Liu return NULL; 89545657a5cSYuanhan Liu } 89645657a5cSYuanhan Liu 897d19533e8SHuawei Xie /* 898d19533e8SHuawei Xie * This function learns the MAC address of the device and registers this along with a 899d19533e8SHuawei Xie * vlan tag to a VMDQ. 900d19533e8SHuawei Xie */ 901d19533e8SHuawei Xie static int 902e571e6b4SHuawei Xie link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m) 903d19533e8SHuawei Xie { 9046d13ea8eSOlivier Matz struct rte_ether_hdr *pkt_hdr; 905d19533e8SHuawei Xie int i, ret; 906d19533e8SHuawei Xie 907d19533e8SHuawei Xie /* Learn MAC address of guest device from packet */ 9086d13ea8eSOlivier Matz pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 909d19533e8SHuawei Xie 91004d43857SDmitry Kozlyuk if (find_vhost_dev(&pkt_hdr->src_addr)) { 91145657a5cSYuanhan Liu RTE_LOG(ERR, VHOST_DATA, 912c08a3490SYuanhan Liu "(%d) device is using a registered MAC!\n", 913e2a1dd12SYuanhan Liu vdev->vid); 914d19533e8SHuawei Xie return -1; 915d19533e8SHuawei Xie } 916d19533e8SHuawei Xie 91735b2d13fSOlivier Matz for (i = 0; i < RTE_ETHER_ADDR_LEN; i++) 91804d43857SDmitry Kozlyuk vdev->mac_address.addr_bytes[i] = 91904d43857SDmitry Kozlyuk pkt_hdr->src_addr.addr_bytes[i]; 920d19533e8SHuawei Xie 921d19533e8SHuawei Xie /* vlan_tag currently uses the device_id. */ 922e2a1dd12SYuanhan Liu vdev->vlan_tag = vlan_tags[vdev->vid]; 923d19533e8SHuawei Xie 924d19533e8SHuawei Xie /* Print out VMDQ registration info. */ 925c08a3490SYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 926c2c4f87bSAman Deep Singh "(%d) mac " RTE_ETHER_ADDR_PRT_FMT " and vlan %d registered\n", 927a7db3afcSAman Deep Singh vdev->vid, RTE_ETHER_ADDR_BYTES(&vdev->mac_address), 928e571e6b4SHuawei Xie vdev->vlan_tag); 929d19533e8SHuawei Xie 930d19533e8SHuawei Xie /* Register the MAC address. */ 93184b02d16SHuawei Xie ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address, 932e2a1dd12SYuanhan Liu (uint32_t)vdev->vid + vmdq_pool_base); 933d19533e8SHuawei Xie if (ret) 934c08a3490SYuanhan Liu RTE_LOG(ERR, VHOST_DATA, 935c08a3490SYuanhan Liu "(%d) failed to add device MAC address to VMDQ\n", 936e2a1dd12SYuanhan Liu vdev->vid); 937d19533e8SHuawei Xie 93865453928SJianfeng Tan rte_eth_dev_set_vlan_strip_on_queue(ports[0], vdev->vmdq_rx_q, 1); 939d19533e8SHuawei Xie 940d19533e8SHuawei Xie /* Set device as ready for RX. */ 941e571e6b4SHuawei Xie vdev->ready = DEVICE_RX; 942d19533e8SHuawei Xie 943d19533e8SHuawei Xie return 0; 944d19533e8SHuawei Xie } 945d19533e8SHuawei Xie 946d19533e8SHuawei Xie /* 947d19533e8SHuawei Xie * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX 948d19533e8SHuawei Xie * queue before disabling RX on the device. 949d19533e8SHuawei Xie */ 950d19533e8SHuawei Xie static inline void 951e571e6b4SHuawei Xie unlink_vmdq(struct vhost_dev *vdev) 952d19533e8SHuawei Xie { 953d19533e8SHuawei Xie unsigned i = 0; 954d19533e8SHuawei Xie unsigned rx_count; 955d19533e8SHuawei Xie struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 956d19533e8SHuawei Xie 957e571e6b4SHuawei Xie if (vdev->ready == DEVICE_RX) { 958d19533e8SHuawei Xie /*clear MAC and VLAN settings*/ 959e571e6b4SHuawei Xie rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address); 960d19533e8SHuawei Xie for (i = 0; i < 6; i++) 961e571e6b4SHuawei Xie vdev->mac_address.addr_bytes[i] = 0; 962d19533e8SHuawei Xie 963e571e6b4SHuawei Xie vdev->vlan_tag = 0; 964d19533e8SHuawei Xie 965d19533e8SHuawei Xie /*Clear out the receive buffers*/ 966d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 967e571e6b4SHuawei Xie (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); 968d19533e8SHuawei Xie 969d19533e8SHuawei Xie while (rx_count) { 970d19533e8SHuawei Xie for (i = 0; i < rx_count; i++) 971d19533e8SHuawei Xie rte_pktmbuf_free(pkts_burst[i]); 972d19533e8SHuawei Xie 973d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 974e571e6b4SHuawei Xie (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); 975d19533e8SHuawei Xie } 976d19533e8SHuawei Xie 977e571e6b4SHuawei Xie vdev->ready = DEVICE_MAC_LEARNING; 978d19533e8SHuawei Xie } 979d19533e8SHuawei Xie } 980d19533e8SHuawei Xie 981a68ba8e0SCheng Jiang static inline void 982a68ba8e0SCheng Jiang free_pkts(struct rte_mbuf **pkts, uint16_t n) 983a68ba8e0SCheng Jiang { 984a68ba8e0SCheng Jiang while (n--) 985a68ba8e0SCheng Jiang rte_pktmbuf_free(pkts[n]); 986a68ba8e0SCheng Jiang } 987a68ba8e0SCheng Jiang 988c0583d98SJerin Jacob static __rte_always_inline void 989a68ba8e0SCheng Jiang complete_async_pkts(struct vhost_dev *vdev) 990a68ba8e0SCheng Jiang { 991a68ba8e0SCheng Jiang struct rte_mbuf *p_cpl[MAX_PKT_BURST]; 992a68ba8e0SCheng Jiang uint16_t complete_count; 99353d3f477SJiayu Hu int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id; 994a68ba8e0SCheng Jiang 995a68ba8e0SCheng Jiang complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, 99653d3f477SJiayu Hu VIRTIO_RXQ, p_cpl, MAX_PKT_BURST, dma_id, 0); 997b9f23beeSCheng Jiang if (complete_count) { 998a68ba8e0SCheng Jiang free_pkts(p_cpl, complete_count); 999b9f23beeSCheng Jiang __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST); 1000b9f23beeSCheng Jiang } 1001b9f23beeSCheng Jiang 1002a68ba8e0SCheng Jiang } 1003a68ba8e0SCheng Jiang 1004a68ba8e0SCheng Jiang static __rte_always_inline void 1005a68ba8e0SCheng Jiang sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, 10069c5ef512SYuanhan Liu struct rte_mbuf *m) 10079c5ef512SYuanhan Liu { 10089c5ef512SYuanhan Liu uint16_t ret; 10099c5ef512SYuanhan Liu 1010ca059fa5SYuanhan Liu if (builtin_net_driver) { 1011ca059fa5SYuanhan Liu ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); 1012ca059fa5SYuanhan Liu } else { 10134ecf22e3SYuanhan Liu ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1); 1014ca059fa5SYuanhan Liu } 1015ca059fa5SYuanhan Liu 10169c5ef512SYuanhan Liu if (enable_stats) { 1017a68ba8e0SCheng Jiang __atomic_add_fetch(&dst_vdev->stats.rx_total_atomic, 1, 1018a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1019a68ba8e0SCheng Jiang __atomic_add_fetch(&dst_vdev->stats.rx_atomic, ret, 1020a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 102156fe86f8SYuanhan Liu src_vdev->stats.tx_total++; 102256fe86f8SYuanhan Liu src_vdev->stats.tx += ret; 10239c5ef512SYuanhan Liu } 10249c5ef512SYuanhan Liu } 10259c5ef512SYuanhan Liu 1026a68ba8e0SCheng Jiang static __rte_always_inline void 1027a68ba8e0SCheng Jiang drain_vhost(struct vhost_dev *vdev) 1028a68ba8e0SCheng Jiang { 1029a68ba8e0SCheng Jiang uint16_t ret; 103053d3f477SJiayu Hu uint32_t buff_idx = rte_lcore_id() * RTE_MAX_VHOST_DEVICE + vdev->vid; 1031a68ba8e0SCheng Jiang uint16_t nr_xmit = vhost_txbuff[buff_idx]->len; 1032a68ba8e0SCheng Jiang struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table; 1033a68ba8e0SCheng Jiang 1034a68ba8e0SCheng Jiang if (builtin_net_driver) { 1035a68ba8e0SCheng Jiang ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit); 103653d3f477SJiayu Hu } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) { 1037a68ba8e0SCheng Jiang uint16_t enqueue_fail = 0; 103853d3f477SJiayu Hu int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id; 1039a68ba8e0SCheng Jiang 1040a68ba8e0SCheng Jiang complete_async_pkts(vdev); 104153d3f477SJiayu Hu ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, nr_xmit, dma_id, 0); 1042abeb8652SJiayu Hu __atomic_add_fetch(&vdev->pkts_inflight, ret, __ATOMIC_SEQ_CST); 1043a68ba8e0SCheng Jiang 1044a68ba8e0SCheng Jiang enqueue_fail = nr_xmit - ret; 1045a68ba8e0SCheng Jiang if (enqueue_fail) 1046a68ba8e0SCheng Jiang free_pkts(&m[ret], nr_xmit - ret); 1047a68ba8e0SCheng Jiang } else { 1048a68ba8e0SCheng Jiang ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, 1049a68ba8e0SCheng Jiang m, nr_xmit); 1050a68ba8e0SCheng Jiang } 1051a68ba8e0SCheng Jiang 1052a68ba8e0SCheng Jiang if (enable_stats) { 1053a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit, 1054a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1055a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_atomic, ret, 1056a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1057a68ba8e0SCheng Jiang } 1058a68ba8e0SCheng Jiang 105953d3f477SJiayu Hu if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) 1060a68ba8e0SCheng Jiang free_pkts(m, nr_xmit); 1061a68ba8e0SCheng Jiang } 1062a68ba8e0SCheng Jiang 1063a68ba8e0SCheng Jiang static __rte_always_inline void 1064a68ba8e0SCheng Jiang drain_vhost_table(void) 1065a68ba8e0SCheng Jiang { 1066a68ba8e0SCheng Jiang uint16_t lcore_id = rte_lcore_id(); 1067a68ba8e0SCheng Jiang struct vhost_bufftable *vhost_txq; 1068a68ba8e0SCheng Jiang struct vhost_dev *vdev; 1069a68ba8e0SCheng Jiang uint64_t cur_tsc; 1070a68ba8e0SCheng Jiang 1071a68ba8e0SCheng Jiang TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 1072ad5050e4SWenwu Ma if (unlikely(vdev->remove == 1)) 1073ad5050e4SWenwu Ma continue; 1074ad5050e4SWenwu Ma 107553d3f477SJiayu Hu vhost_txq = vhost_txbuff[lcore_id * RTE_MAX_VHOST_DEVICE + vdev->vid]; 1076a68ba8e0SCheng Jiang 1077a68ba8e0SCheng Jiang cur_tsc = rte_rdtsc(); 1078a68ba8e0SCheng Jiang if (unlikely(cur_tsc - vhost_txq->pre_tsc 1079a68ba8e0SCheng Jiang > MBUF_TABLE_DRAIN_TSC)) { 1080a68ba8e0SCheng Jiang RTE_LOG_DP(DEBUG, VHOST_DATA, 1081a68ba8e0SCheng Jiang "Vhost TX queue drained after timeout with burst size %u\n", 1082a68ba8e0SCheng Jiang vhost_txq->len); 1083a68ba8e0SCheng Jiang drain_vhost(vdev); 1084a68ba8e0SCheng Jiang vhost_txq->len = 0; 1085a68ba8e0SCheng Jiang vhost_txq->pre_tsc = cur_tsc; 1086a68ba8e0SCheng Jiang } 1087a68ba8e0SCheng Jiang } 1088a68ba8e0SCheng Jiang } 1089a68ba8e0SCheng Jiang 1090d19533e8SHuawei Xie /* 1091d19533e8SHuawei Xie * Check if the packet destination MAC address is for a local device. If so then put 1092d19533e8SHuawei Xie * the packet on that devices RX queue. If not then return. 1093d19533e8SHuawei Xie */ 1094c0583d98SJerin Jacob static __rte_always_inline int 1095e571e6b4SHuawei Xie virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) 1096d19533e8SHuawei Xie { 10976d13ea8eSOlivier Matz struct rte_ether_hdr *pkt_hdr; 109845657a5cSYuanhan Liu struct vhost_dev *dst_vdev; 1099a68ba8e0SCheng Jiang struct vhost_bufftable *vhost_txq; 1100a68ba8e0SCheng Jiang uint16_t lcore_id = rte_lcore_id(); 11016d13ea8eSOlivier Matz pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 1102d19533e8SHuawei Xie 110304d43857SDmitry Kozlyuk dst_vdev = find_vhost_dev(&pkt_hdr->dst_addr); 110445657a5cSYuanhan Liu if (!dst_vdev) 1105d19533e8SHuawei Xie return -1; 110645657a5cSYuanhan Liu 1107e2a1dd12SYuanhan Liu if (vdev->vid == dst_vdev->vid) { 11085d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1109c08a3490SYuanhan Liu "(%d) TX: src and dst MAC is same. Dropping packet.\n", 1110e2a1dd12SYuanhan Liu vdev->vid); 111145657a5cSYuanhan Liu return 0; 111245657a5cSYuanhan Liu } 111345657a5cSYuanhan Liu 11145d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1115e2a1dd12SYuanhan Liu "(%d) TX: MAC address is local\n", dst_vdev->vid); 111645657a5cSYuanhan Liu 111745657a5cSYuanhan Liu if (unlikely(dst_vdev->remove)) { 11185d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1119e2a1dd12SYuanhan Liu "(%d) device is marked for removal\n", dst_vdev->vid); 112045657a5cSYuanhan Liu return 0; 112145657a5cSYuanhan Liu } 112245657a5cSYuanhan Liu 112353d3f477SJiayu Hu vhost_txq = vhost_txbuff[lcore_id * RTE_MAX_VHOST_DEVICE + dst_vdev->vid]; 1124a68ba8e0SCheng Jiang vhost_txq->m_table[vhost_txq->len++] = m; 1125a68ba8e0SCheng Jiang 1126a68ba8e0SCheng Jiang if (enable_stats) { 1127a68ba8e0SCheng Jiang vdev->stats.tx_total++; 1128a68ba8e0SCheng Jiang vdev->stats.tx++; 1129a68ba8e0SCheng Jiang } 1130a68ba8e0SCheng Jiang 1131a68ba8e0SCheng Jiang if (unlikely(vhost_txq->len == MAX_PKT_BURST)) { 1132a68ba8e0SCheng Jiang drain_vhost(dst_vdev); 1133a68ba8e0SCheng Jiang vhost_txq->len = 0; 1134a68ba8e0SCheng Jiang vhost_txq->pre_tsc = rte_rdtsc(); 1135a68ba8e0SCheng Jiang } 113645657a5cSYuanhan Liu return 0; 1137d19533e8SHuawei Xie } 1138d19533e8SHuawei Xie 1139d19533e8SHuawei Xie /* 114072ec8d77SOuyang Changchun * Check if the destination MAC of a packet is one local VM, 114172ec8d77SOuyang Changchun * and get its vlan tag, and offset if it is. 1142d19533e8SHuawei Xie */ 1143c0583d98SJerin Jacob static __rte_always_inline int 11447f262239SYuanhan Liu find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m, 114572ec8d77SOuyang Changchun uint32_t *offset, uint16_t *vlan_tag) 1146d19533e8SHuawei Xie { 114745657a5cSYuanhan Liu struct vhost_dev *dst_vdev; 11486d13ea8eSOlivier Matz struct rte_ether_hdr *pkt_hdr = 11496d13ea8eSOlivier Matz rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 1150d19533e8SHuawei Xie 115104d43857SDmitry Kozlyuk dst_vdev = find_vhost_dev(&pkt_hdr->dst_addr); 115245657a5cSYuanhan Liu if (!dst_vdev) 115345657a5cSYuanhan Liu return 0; 115445657a5cSYuanhan Liu 1155e2a1dd12SYuanhan Liu if (vdev->vid == dst_vdev->vid) { 11565d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1157c08a3490SYuanhan Liu "(%d) TX: src and dst MAC is same. Dropping packet.\n", 1158e2a1dd12SYuanhan Liu vdev->vid); 115972ec8d77SOuyang Changchun return -1; 1160d19533e8SHuawei Xie } 1161e44fb8a4SOuyang Changchun 1162e44fb8a4SOuyang Changchun /* 1163e44fb8a4SOuyang Changchun * HW vlan strip will reduce the packet length 1164e44fb8a4SOuyang Changchun * by minus length of vlan tag, so need restore 1165e44fb8a4SOuyang Changchun * the packet length by plus it. 1166e44fb8a4SOuyang Changchun */ 116725cf2630SFerruh Yigit *offset = RTE_VLAN_HLEN; 1168e2a1dd12SYuanhan Liu *vlan_tag = vlan_tags[vdev->vid]; 1169d19533e8SHuawei Xie 11705d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 11717f262239SYuanhan Liu "(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n", 1172e2a1dd12SYuanhan Liu vdev->vid, dst_vdev->vid, *vlan_tag); 1173d19533e8SHuawei Xie 117472ec8d77SOuyang Changchun return 0; 117572ec8d77SOuyang Changchun } 117672ec8d77SOuyang Changchun 11779fd72e3cSJijiang Liu static void virtio_tx_offload(struct rte_mbuf *m) 11789fd72e3cSJijiang Liu { 1179ca7036b4SDavid Marchand struct rte_net_hdr_lens hdr_lens; 1180ca7036b4SDavid Marchand struct rte_ipv4_hdr *ipv4_hdr; 1181ca7036b4SDavid Marchand struct rte_tcp_hdr *tcp_hdr; 1182ca7036b4SDavid Marchand uint32_t ptype; 11839fd72e3cSJijiang Liu void *l3_hdr; 11849fd72e3cSJijiang Liu 1185ca7036b4SDavid Marchand ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 1186ca7036b4SDavid Marchand m->l2_len = hdr_lens.l2_len; 1187ca7036b4SDavid Marchand m->l3_len = hdr_lens.l3_len; 1188ca7036b4SDavid Marchand m->l4_len = hdr_lens.l4_len; 11899fd72e3cSJijiang Liu 1190ca7036b4SDavid Marchand l3_hdr = rte_pktmbuf_mtod_offset(m, void *, m->l2_len); 1191ca7036b4SDavid Marchand tcp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *, 1192ca7036b4SDavid Marchand m->l2_len + m->l3_len); 1193ca7036b4SDavid Marchand 1194daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 1195ca7036b4SDavid Marchand if ((ptype & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) { 1196daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_IPV4; 1197daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; 1198df40169aSYuanhan Liu ipv4_hdr = l3_hdr; 11999fd72e3cSJijiang Liu ipv4_hdr->hdr_checksum = 0; 1200ca7036b4SDavid Marchand tcp_hdr->cksum = rte_ipv4_phdr_cksum(l3_hdr, m->ol_flags); 1201ca7036b4SDavid Marchand } else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */ 1202daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_IPV6; 1203ca7036b4SDavid Marchand tcp_hdr->cksum = rte_ipv6_phdr_cksum(l3_hdr, m->ol_flags); 1204df40169aSYuanhan Liu } 12059fd72e3cSJijiang Liu } 12069fd72e3cSJijiang Liu 1207c0583d98SJerin Jacob static __rte_always_inline void 1208273ecdbcSYuanhan Liu do_drain_mbuf_table(struct mbuf_table *tx_q) 1209273ecdbcSYuanhan Liu { 1210273ecdbcSYuanhan Liu uint16_t count; 1211273ecdbcSYuanhan Liu 1212273ecdbcSYuanhan Liu count = rte_eth_tx_burst(ports[0], tx_q->txq_id, 1213273ecdbcSYuanhan Liu tx_q->m_table, tx_q->len); 1214273ecdbcSYuanhan Liu if (unlikely(count < tx_q->len)) 1215273ecdbcSYuanhan Liu free_pkts(&tx_q->m_table[count], tx_q->len - count); 1216273ecdbcSYuanhan Liu 1217273ecdbcSYuanhan Liu tx_q->len = 0; 1218273ecdbcSYuanhan Liu } 1219273ecdbcSYuanhan Liu 122072ec8d77SOuyang Changchun /* 1221273ecdbcSYuanhan Liu * This function routes the TX packet to the correct interface. This 1222273ecdbcSYuanhan Liu * may be a local device or the physical port. 122372ec8d77SOuyang Changchun */ 1224c0583d98SJerin Jacob static __rte_always_inline void 122572ec8d77SOuyang Changchun virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) 122672ec8d77SOuyang Changchun { 122772ec8d77SOuyang Changchun struct mbuf_table *tx_q; 1228273ecdbcSYuanhan Liu unsigned offset = 0; 122972ec8d77SOuyang Changchun const uint16_t lcore_id = rte_lcore_id(); 12306d13ea8eSOlivier Matz struct rte_ether_hdr *nh; 123172ec8d77SOuyang Changchun 12329c5ef512SYuanhan Liu 12336d13ea8eSOlivier Matz nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 123404d43857SDmitry Kozlyuk if (unlikely(rte_is_broadcast_ether_addr(&nh->dst_addr))) { 12359c5ef512SYuanhan Liu struct vhost_dev *vdev2; 12369c5ef512SYuanhan Liu 123797daf19eSYuanhan Liu TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) { 1238a3fdb532SJunjie Chen if (vdev2 != vdev) 1239a68ba8e0SCheng Jiang sync_virtio_xmit(vdev2, vdev, m); 12409c5ef512SYuanhan Liu } 12419c5ef512SYuanhan Liu goto queue2nic; 12429c5ef512SYuanhan Liu } 12439c5ef512SYuanhan Liu 124472ec8d77SOuyang Changchun /*check if destination is local VM*/ 1245a68ba8e0SCheng Jiang if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) 124672ec8d77SOuyang Changchun return; 124772ec8d77SOuyang Changchun 1248c2ab5162SOuyang Changchun if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { 12497f262239SYuanhan Liu if (unlikely(find_local_dest(vdev, m, &offset, 12507f262239SYuanhan Liu &vlan_tag) != 0)) { 125172ec8d77SOuyang Changchun rte_pktmbuf_free(m); 125272ec8d77SOuyang Changchun return; 125372ec8d77SOuyang Changchun } 1254d19533e8SHuawei Xie } 1255d19533e8SHuawei Xie 12565d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1257e2a1dd12SYuanhan Liu "(%d) TX: MAC address is external\n", vdev->vid); 1258d19533e8SHuawei Xie 12599c5ef512SYuanhan Liu queue2nic: 12609c5ef512SYuanhan Liu 1261d19533e8SHuawei Xie /*Add packet to the port tx queue*/ 1262d19533e8SHuawei Xie tx_q = &lcore_tx_queue[lcore_id]; 1263d19533e8SHuawei Xie 12646d13ea8eSOlivier Matz nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 126535b2d13fSOlivier Matz if (unlikely(nh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))) { 12668b9bb988SOuyang Changchun /* Guest has inserted the vlan tag. */ 12676d13ea8eSOlivier Matz struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (nh + 1); 12688b9bb988SOuyang Changchun uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag); 12698b9bb988SOuyang Changchun if ((vm2vm_mode == VM2VM_HARDWARE) && 12708b9bb988SOuyang Changchun (vh->vlan_tci != vlan_tag_be)) 12718b9bb988SOuyang Changchun vh->vlan_tci = vlan_tag_be; 12728b9bb988SOuyang Changchun } else { 1273daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_VLAN; 1274e44fb8a4SOuyang Changchun 1275c2ab5162SOuyang Changchun /* 1276c2ab5162SOuyang Changchun * Find the right seg to adjust the data len when offset is 1277c2ab5162SOuyang Changchun * bigger than tail room size. 1278c2ab5162SOuyang Changchun */ 1279c2ab5162SOuyang Changchun if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { 1280c2ab5162SOuyang Changchun if (likely(offset <= rte_pktmbuf_tailroom(m))) 12814d50b6acSHuawei Xie m->data_len += offset; 1282c2ab5162SOuyang Changchun else { 1283c2ab5162SOuyang Changchun struct rte_mbuf *seg = m; 1284c2ab5162SOuyang Changchun 1285c2ab5162SOuyang Changchun while ((seg->next != NULL) && 1286c2ab5162SOuyang Changchun (offset > rte_pktmbuf_tailroom(seg))) 1287c2ab5162SOuyang Changchun seg = seg->next; 1288c2ab5162SOuyang Changchun 1289c2ab5162SOuyang Changchun seg->data_len += offset; 1290c2ab5162SOuyang Changchun } 1291e44fb8a4SOuyang Changchun m->pkt_len += offset; 1292c2ab5162SOuyang Changchun } 1293e44fb8a4SOuyang Changchun 12944d50b6acSHuawei Xie m->vlan_tci = vlan_tag; 12958b9bb988SOuyang Changchun } 1296d19533e8SHuawei Xie 1297daa02b5cSOlivier Matz if (m->ol_flags & RTE_MBUF_F_RX_LRO) 12989fd72e3cSJijiang Liu virtio_tx_offload(m); 12999fd72e3cSJijiang Liu 1300273ecdbcSYuanhan Liu tx_q->m_table[tx_q->len++] = m; 1301d19533e8SHuawei Xie if (enable_stats) { 130256fe86f8SYuanhan Liu vdev->stats.tx_total++; 130356fe86f8SYuanhan Liu vdev->stats.tx++; 1304d19533e8SHuawei Xie } 1305d19533e8SHuawei Xie 1306273ecdbcSYuanhan Liu if (unlikely(tx_q->len == MAX_PKT_BURST)) 1307273ecdbcSYuanhan Liu do_drain_mbuf_table(tx_q); 1308d19533e8SHuawei Xie } 1309d19533e8SHuawei Xie 1310d19533e8SHuawei Xie 1311c0583d98SJerin Jacob static __rte_always_inline void 1312273ecdbcSYuanhan Liu drain_mbuf_table(struct mbuf_table *tx_q) 1313273ecdbcSYuanhan Liu { 1314273ecdbcSYuanhan Liu static uint64_t prev_tsc; 1315273ecdbcSYuanhan Liu uint64_t cur_tsc; 1316273ecdbcSYuanhan Liu 1317273ecdbcSYuanhan Liu if (tx_q->len == 0) 1318d19533e8SHuawei Xie return; 1319273ecdbcSYuanhan Liu 1320273ecdbcSYuanhan Liu cur_tsc = rte_rdtsc(); 1321273ecdbcSYuanhan Liu if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) { 1322273ecdbcSYuanhan Liu prev_tsc = cur_tsc; 1323273ecdbcSYuanhan Liu 13245d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1325273ecdbcSYuanhan Liu "TX queue drained after timeout with burst size %u\n", 1326273ecdbcSYuanhan Liu tx_q->len); 1327273ecdbcSYuanhan Liu do_drain_mbuf_table(tx_q); 1328d19533e8SHuawei Xie } 1329273ecdbcSYuanhan Liu } 1330273ecdbcSYuanhan Liu 1331c0583d98SJerin Jacob static __rte_always_inline void 1332273ecdbcSYuanhan Liu drain_eth_rx(struct vhost_dev *vdev) 1333273ecdbcSYuanhan Liu { 1334273ecdbcSYuanhan Liu uint16_t rx_count, enqueue_count; 1335a68ba8e0SCheng Jiang struct rte_mbuf *pkts[MAX_PKT_BURST]; 1336273ecdbcSYuanhan Liu 1337273ecdbcSYuanhan Liu rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q, 1338273ecdbcSYuanhan Liu pkts, MAX_PKT_BURST); 1339abec60e7SCheng Jiang 1340273ecdbcSYuanhan Liu if (!rx_count) 1341273ecdbcSYuanhan Liu return; 1342273ecdbcSYuanhan Liu 1343d19533e8SHuawei Xie /* 1344273ecdbcSYuanhan Liu * When "enable_retry" is set, here we wait and retry when there 1345273ecdbcSYuanhan Liu * is no enough free slots in the queue to hold @rx_count packets, 1346273ecdbcSYuanhan Liu * to diminish packet loss. 1347273ecdbcSYuanhan Liu */ 1348273ecdbcSYuanhan Liu if (enable_retry && 13494ecf22e3SYuanhan Liu unlikely(rx_count > rte_vhost_avail_entries(vdev->vid, 1350273ecdbcSYuanhan Liu VIRTIO_RXQ))) { 1351273ecdbcSYuanhan Liu uint32_t retry; 1352273ecdbcSYuanhan Liu 1353273ecdbcSYuanhan Liu for (retry = 0; retry < burst_rx_retry_num; retry++) { 1354273ecdbcSYuanhan Liu rte_delay_us(burst_rx_delay_time); 13554ecf22e3SYuanhan Liu if (rx_count <= rte_vhost_avail_entries(vdev->vid, 1356273ecdbcSYuanhan Liu VIRTIO_RXQ)) 1357273ecdbcSYuanhan Liu break; 1358273ecdbcSYuanhan Liu } 1359273ecdbcSYuanhan Liu } 1360273ecdbcSYuanhan Liu 1361ca059fa5SYuanhan Liu if (builtin_net_driver) { 1362ca059fa5SYuanhan Liu enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ, 1363ca059fa5SYuanhan Liu pkts, rx_count); 136453d3f477SJiayu Hu } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) { 1365a68ba8e0SCheng Jiang uint16_t enqueue_fail = 0; 136653d3f477SJiayu Hu int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id; 1367a68ba8e0SCheng Jiang 1368a68ba8e0SCheng Jiang complete_async_pkts(vdev); 1369abec60e7SCheng Jiang enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid, 137053d3f477SJiayu Hu VIRTIO_RXQ, pkts, rx_count, dma_id, 0); 1371abeb8652SJiayu Hu __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count, __ATOMIC_SEQ_CST); 1372a68ba8e0SCheng Jiang 1373a68ba8e0SCheng Jiang enqueue_fail = rx_count - enqueue_count; 1374a68ba8e0SCheng Jiang if (enqueue_fail) 1375a68ba8e0SCheng Jiang free_pkts(&pkts[enqueue_count], enqueue_fail); 1376a68ba8e0SCheng Jiang 1377ca059fa5SYuanhan Liu } else { 13784ecf22e3SYuanhan Liu enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, 1379273ecdbcSYuanhan Liu pkts, rx_count); 1380ca059fa5SYuanhan Liu } 1381abec60e7SCheng Jiang 1382273ecdbcSYuanhan Liu if (enable_stats) { 1383a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count, 1384a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1385a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_atomic, enqueue_count, 1386a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1387273ecdbcSYuanhan Liu } 1388273ecdbcSYuanhan Liu 138953d3f477SJiayu Hu if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) 1390273ecdbcSYuanhan Liu free_pkts(pkts, rx_count); 1391273ecdbcSYuanhan Liu } 1392273ecdbcSYuanhan Liu 1393c0583d98SJerin Jacob static __rte_always_inline void 1394273ecdbcSYuanhan Liu drain_virtio_tx(struct vhost_dev *vdev) 1395273ecdbcSYuanhan Liu { 1396273ecdbcSYuanhan Liu struct rte_mbuf *pkts[MAX_PKT_BURST]; 1397273ecdbcSYuanhan Liu uint16_t count; 1398273ecdbcSYuanhan Liu uint16_t i; 1399273ecdbcSYuanhan Liu 1400ca059fa5SYuanhan Liu if (builtin_net_driver) { 1401ca059fa5SYuanhan Liu count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool, 1402273ecdbcSYuanhan Liu pkts, MAX_PKT_BURST); 1403ca059fa5SYuanhan Liu } else { 1404ca059fa5SYuanhan Liu count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ, 1405ca059fa5SYuanhan Liu mbuf_pool, pkts, MAX_PKT_BURST); 1406ca059fa5SYuanhan Liu } 1407273ecdbcSYuanhan Liu 1408273ecdbcSYuanhan Liu /* setup VMDq for the first packet */ 1409273ecdbcSYuanhan Liu if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) { 1410273ecdbcSYuanhan Liu if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1) 1411273ecdbcSYuanhan Liu free_pkts(pkts, count); 1412273ecdbcSYuanhan Liu } 1413273ecdbcSYuanhan Liu 14147f262239SYuanhan Liu for (i = 0; i < count; ++i) 1415e2a1dd12SYuanhan Liu virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]); 1416273ecdbcSYuanhan Liu } 1417273ecdbcSYuanhan Liu 1418273ecdbcSYuanhan Liu /* 1419273ecdbcSYuanhan Liu * Main function of vhost-switch. It basically does: 1420273ecdbcSYuanhan Liu * 1421273ecdbcSYuanhan Liu * for each vhost device { 1422273ecdbcSYuanhan Liu * - drain_eth_rx() 1423273ecdbcSYuanhan Liu * 1424273ecdbcSYuanhan Liu * Which drains the host eth Rx queue linked to the vhost device, 1425273ecdbcSYuanhan Liu * and deliver all of them to guest virito Rx ring associated with 1426273ecdbcSYuanhan Liu * this vhost device. 1427273ecdbcSYuanhan Liu * 1428273ecdbcSYuanhan Liu * - drain_virtio_tx() 1429273ecdbcSYuanhan Liu * 1430273ecdbcSYuanhan Liu * Which drains the guest virtio Tx queue and deliver all of them 1431273ecdbcSYuanhan Liu * to the target, which could be another vhost device, or the 1432273ecdbcSYuanhan Liu * physical eth dev. The route is done in function "virtio_tx_route". 1433273ecdbcSYuanhan Liu * } 1434d19533e8SHuawei Xie */ 1435d19533e8SHuawei Xie static int 1436273ecdbcSYuanhan Liu switch_worker(void *arg __rte_unused) 1437d19533e8SHuawei Xie { 1438273ecdbcSYuanhan Liu unsigned i; 1439273ecdbcSYuanhan Liu unsigned lcore_id = rte_lcore_id(); 1440273ecdbcSYuanhan Liu struct vhost_dev *vdev; 1441d19533e8SHuawei Xie struct mbuf_table *tx_q; 1442d19533e8SHuawei Xie 14437be78d02SJosh Soref RTE_LOG(INFO, VHOST_DATA, "Processing on Core %u started\n", lcore_id); 1444d19533e8SHuawei Xie 1445d19533e8SHuawei Xie tx_q = &lcore_tx_queue[lcore_id]; 1446273ecdbcSYuanhan Liu for (i = 0; i < rte_lcore_count(); i++) { 1447d19533e8SHuawei Xie if (lcore_ids[i] == lcore_id) { 1448d19533e8SHuawei Xie tx_q->txq_id = i; 1449d19533e8SHuawei Xie break; 1450d19533e8SHuawei Xie } 1451d19533e8SHuawei Xie } 1452d19533e8SHuawei Xie 1453d19533e8SHuawei Xie while(1) { 1454273ecdbcSYuanhan Liu drain_mbuf_table(tx_q); 1455a68ba8e0SCheng Jiang drain_vhost_table(); 1456d19533e8SHuawei Xie /* 145745657a5cSYuanhan Liu * Inform the configuration core that we have exited the 145845657a5cSYuanhan Liu * linked list and that no devices are in use if requested. 1459d19533e8SHuawei Xie */ 146045657a5cSYuanhan Liu if (lcore_info[lcore_id].dev_removal_flag == REQUEST_DEV_REMOVAL) 146145657a5cSYuanhan Liu lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL; 1462d19533e8SHuawei Xie 1463d19533e8SHuawei Xie /* 1464273ecdbcSYuanhan Liu * Process vhost devices 1465d19533e8SHuawei Xie */ 146697daf19eSYuanhan Liu TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, 146797daf19eSYuanhan Liu lcore_vdev_entry) { 1468364dddcdSHuawei Xie if (unlikely(vdev->remove)) { 1469e571e6b4SHuawei Xie unlink_vmdq(vdev); 1470e571e6b4SHuawei Xie vdev->ready = DEVICE_SAFE_REMOVE; 1471d19533e8SHuawei Xie continue; 1472d19533e8SHuawei Xie } 147345657a5cSYuanhan Liu 1474273ecdbcSYuanhan Liu if (likely(vdev->ready == DEVICE_RX)) 1475273ecdbcSYuanhan Liu drain_eth_rx(vdev); 1476d19533e8SHuawei Xie 1477273ecdbcSYuanhan Liu if (likely(!vdev->remove)) 1478273ecdbcSYuanhan Liu drain_virtio_tx(vdev); 1479d19533e8SHuawei Xie } 1480d19533e8SHuawei Xie } 1481d19533e8SHuawei Xie 1482d19533e8SHuawei Xie return 0; 1483d19533e8SHuawei Xie } 1484d19533e8SHuawei Xie 1485d19533e8SHuawei Xie /* 148645657a5cSYuanhan Liu * Remove a device from the specific data core linked list and from the 14877be78d02SJosh Soref * main linked list. Synchronization occurs through the use of the 148845657a5cSYuanhan Liu * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering 1489d19533e8SHuawei Xie * of dev->remove=1 which can cause an infinite loop in the rte_pause loop. 1490d19533e8SHuawei Xie */ 1491d19533e8SHuawei Xie static void 14924ecf22e3SYuanhan Liu destroy_device(int vid) 1493d19533e8SHuawei Xie { 149416ae8abeSYuanhan Liu struct vhost_dev *vdev = NULL; 1495d19533e8SHuawei Xie int lcore; 1496a68ba8e0SCheng Jiang uint16_t i; 1497d19533e8SHuawei Xie 149816ae8abeSYuanhan Liu TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 14994ecf22e3SYuanhan Liu if (vdev->vid == vid) 150016ae8abeSYuanhan Liu break; 150116ae8abeSYuanhan Liu } 150216ae8abeSYuanhan Liu if (!vdev) 150316ae8abeSYuanhan Liu return; 1504d19533e8SHuawei Xie /*set the remove flag. */ 1505e571e6b4SHuawei Xie vdev->remove = 1; 1506e571e6b4SHuawei Xie while(vdev->ready != DEVICE_SAFE_REMOVE) { 1507d19533e8SHuawei Xie rte_pause(); 1508d19533e8SHuawei Xie } 1509d19533e8SHuawei Xie 1510a68ba8e0SCheng Jiang for (i = 0; i < RTE_MAX_LCORE; i++) 151153d3f477SJiayu Hu rte_free(vhost_txbuff[i * RTE_MAX_VHOST_DEVICE + vid]); 1512a68ba8e0SCheng Jiang 1513ca059fa5SYuanhan Liu if (builtin_net_driver) 1514ca059fa5SYuanhan Liu vs_vhost_net_remove(vdev); 1515ca059fa5SYuanhan Liu 151697daf19eSYuanhan Liu TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev, 151797daf19eSYuanhan Liu lcore_vdev_entry); 151897daf19eSYuanhan Liu TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry); 151997daf19eSYuanhan Liu 1520d19533e8SHuawei Xie 1521d19533e8SHuawei Xie /* Set the dev_removal_flag on each lcore. */ 1522cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore) 152345657a5cSYuanhan Liu lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL; 1524d19533e8SHuawei Xie 1525d19533e8SHuawei Xie /* 152645657a5cSYuanhan Liu * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL 152745657a5cSYuanhan Liu * we can be sure that they can no longer access the device removed 152845657a5cSYuanhan Liu * from the linked lists and that the devices are no longer in use. 1529d19533e8SHuawei Xie */ 1530cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore) { 153145657a5cSYuanhan Liu while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL) 1532d19533e8SHuawei Xie rte_pause(); 1533d19533e8SHuawei Xie } 1534d19533e8SHuawei Xie 153545657a5cSYuanhan Liu lcore_info[vdev->coreid].device_num--; 1536d19533e8SHuawei Xie 153745657a5cSYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 1538c08a3490SYuanhan Liu "(%d) device has been removed from data core\n", 1539e2a1dd12SYuanhan Liu vdev->vid); 1540d19533e8SHuawei Xie 154153d3f477SJiayu Hu if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) { 1542b9f23beeSCheng Jiang uint16_t n_pkt = 0; 154353d3f477SJiayu Hu int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id; 1544b9f23beeSCheng Jiang struct rte_mbuf *m_cpl[vdev->pkts_inflight]; 1545b9f23beeSCheng Jiang 1546b9f23beeSCheng Jiang while (vdev->pkts_inflight) { 1547b9f23beeSCheng Jiang n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, VIRTIO_RXQ, 154853d3f477SJiayu Hu m_cpl, vdev->pkts_inflight, dma_id, 0); 1549b9f23beeSCheng Jiang free_pkts(m_cpl, n_pkt); 1550b9f23beeSCheng Jiang __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST); 1551b9f23beeSCheng Jiang } 1552b9f23beeSCheng Jiang 1553abec60e7SCheng Jiang rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); 155453d3f477SJiayu Hu dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false; 1555b9f23beeSCheng Jiang } 1556abec60e7SCheng Jiang 1557e571e6b4SHuawei Xie rte_free(vdev); 1558d19533e8SHuawei Xie } 1559d19533e8SHuawei Xie 1560d19533e8SHuawei Xie /* 1561d19533e8SHuawei Xie * A new device is added to a data core. First the device is added to the main linked list 156210b4270fSRami Rosen * and then allocated to a specific data core. 1563d19533e8SHuawei Xie */ 1564d19533e8SHuawei Xie static int 15654ecf22e3SYuanhan Liu new_device(int vid) 1566d19533e8SHuawei Xie { 1567d19533e8SHuawei Xie int lcore, core_add = 0; 1568a68ba8e0SCheng Jiang uint16_t i; 1569d19533e8SHuawei Xie uint32_t device_num_min = num_devices; 1570e571e6b4SHuawei Xie struct vhost_dev *vdev; 1571fdf20fa7SSergio Gonzalez Monroy vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); 1572e571e6b4SHuawei Xie if (vdev == NULL) { 1573c08a3490SYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 15747f262239SYuanhan Liu "(%d) couldn't allocate memory for vhost dev\n", 1575e2a1dd12SYuanhan Liu vid); 1576e571e6b4SHuawei Xie return -1; 1577e571e6b4SHuawei Xie } 1578e2a1dd12SYuanhan Liu vdev->vid = vid; 1579d19533e8SHuawei Xie 1580a68ba8e0SCheng Jiang for (i = 0; i < RTE_MAX_LCORE; i++) { 158153d3f477SJiayu Hu vhost_txbuff[i * RTE_MAX_VHOST_DEVICE + vid] 1582a68ba8e0SCheng Jiang = rte_zmalloc("vhost bufftable", 1583a68ba8e0SCheng Jiang sizeof(struct vhost_bufftable), 1584a68ba8e0SCheng Jiang RTE_CACHE_LINE_SIZE); 1585a68ba8e0SCheng Jiang 158653d3f477SJiayu Hu if (vhost_txbuff[i * RTE_MAX_VHOST_DEVICE + vid] == NULL) { 1587a68ba8e0SCheng Jiang RTE_LOG(INFO, VHOST_DATA, 1588a68ba8e0SCheng Jiang "(%d) couldn't allocate memory for vhost TX\n", vid); 1589a68ba8e0SCheng Jiang return -1; 1590a68ba8e0SCheng Jiang } 1591a68ba8e0SCheng Jiang } 1592a68ba8e0SCheng Jiang 1593ca059fa5SYuanhan Liu if (builtin_net_driver) 1594ca059fa5SYuanhan Liu vs_vhost_net_setup(vdev); 1595ca059fa5SYuanhan Liu 159697daf19eSYuanhan Liu TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry); 1597e2a1dd12SYuanhan Liu vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base; 1598d19533e8SHuawei Xie 1599d19533e8SHuawei Xie /*reset ready flag*/ 1600e571e6b4SHuawei Xie vdev->ready = DEVICE_MAC_LEARNING; 1601e571e6b4SHuawei Xie vdev->remove = 0; 1602d19533e8SHuawei Xie 1603d19533e8SHuawei Xie /* Find a suitable lcore to add the device. */ 1604cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore) { 160545657a5cSYuanhan Liu if (lcore_info[lcore].device_num < device_num_min) { 160645657a5cSYuanhan Liu device_num_min = lcore_info[lcore].device_num; 1607d19533e8SHuawei Xie core_add = lcore; 1608d19533e8SHuawei Xie } 1609d19533e8SHuawei Xie } 1610e571e6b4SHuawei Xie vdev->coreid = core_add; 1611e571e6b4SHuawei Xie 161297daf19eSYuanhan Liu TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev, 161397daf19eSYuanhan Liu lcore_vdev_entry); 161445657a5cSYuanhan Liu lcore_info[vdev->coreid].device_num++; 1615d19533e8SHuawei Xie 1616d19533e8SHuawei Xie /* Disable notifications. */ 16174ecf22e3SYuanhan Liu rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0); 16184ecf22e3SYuanhan Liu rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0); 1619d19533e8SHuawei Xie 1620c08a3490SYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 1621c08a3490SYuanhan Liu "(%d) device has been added to data core %d\n", 1622e2a1dd12SYuanhan Liu vid, vdev->coreid); 1623d19533e8SHuawei Xie 162453d3f477SJiayu Hu if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) { 162553d3f477SJiayu Hu int ret; 1626a68ba8e0SCheng Jiang 162753d3f477SJiayu Hu ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ); 162853d3f477SJiayu Hu if (ret == 0) 162953d3f477SJiayu Hu dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = true; 163053d3f477SJiayu Hu return ret; 16316e9a9d2aSCheng Jiang } 1632abec60e7SCheng Jiang 1633d19533e8SHuawei Xie return 0; 1634d19533e8SHuawei Xie } 1635d19533e8SHuawei Xie 1636b9f23beeSCheng Jiang static int 1637b9f23beeSCheng Jiang vring_state_changed(int vid, uint16_t queue_id, int enable) 1638b9f23beeSCheng Jiang { 1639b9f23beeSCheng Jiang struct vhost_dev *vdev = NULL; 1640b9f23beeSCheng Jiang 1641b9f23beeSCheng Jiang TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 1642b9f23beeSCheng Jiang if (vdev->vid == vid) 1643b9f23beeSCheng Jiang break; 1644b9f23beeSCheng Jiang } 1645b9f23beeSCheng Jiang if (!vdev) 1646b9f23beeSCheng Jiang return -1; 1647b9f23beeSCheng Jiang 1648b9f23beeSCheng Jiang if (queue_id != VIRTIO_RXQ) 1649b9f23beeSCheng Jiang return 0; 1650b9f23beeSCheng Jiang 165153d3f477SJiayu Hu if (dma_bind[vid].dmas[queue_id].async_enabled) { 1652b9f23beeSCheng Jiang if (!enable) { 1653b9f23beeSCheng Jiang uint16_t n_pkt = 0; 165453d3f477SJiayu Hu int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id; 1655b9f23beeSCheng Jiang struct rte_mbuf *m_cpl[vdev->pkts_inflight]; 1656b9f23beeSCheng Jiang 1657b9f23beeSCheng Jiang while (vdev->pkts_inflight) { 1658b9f23beeSCheng Jiang n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, queue_id, 165953d3f477SJiayu Hu m_cpl, vdev->pkts_inflight, dma_id, 0); 1660b9f23beeSCheng Jiang free_pkts(m_cpl, n_pkt); 1661b9f23beeSCheng Jiang __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST); 1662b9f23beeSCheng Jiang } 1663b9f23beeSCheng Jiang } 1664b9f23beeSCheng Jiang } 1665b9f23beeSCheng Jiang 1666b9f23beeSCheng Jiang return 0; 1667b9f23beeSCheng Jiang } 1668b9f23beeSCheng Jiang 1669d19533e8SHuawei Xie /* 1670d19533e8SHuawei Xie * These callback allow devices to be added to the data core when configuration 1671d19533e8SHuawei Xie * has been fully complete. 1672d19533e8SHuawei Xie */ 1673ab4bb424SMaxime Coquelin static const struct rte_vhost_device_ops virtio_net_device_ops = 1674d19533e8SHuawei Xie { 1675d19533e8SHuawei Xie .new_device = new_device, 1676d19533e8SHuawei Xie .destroy_device = destroy_device, 1677b9f23beeSCheng Jiang .vring_state_changed = vring_state_changed, 1678d19533e8SHuawei Xie }; 1679d19533e8SHuawei Xie 1680d19533e8SHuawei Xie /* 1681d19533e8SHuawei Xie * This is a thread will wake up after a period to print stats if the user has 1682d19533e8SHuawei Xie * enabled them. 1683d19533e8SHuawei Xie */ 1684fa204854SOlivier Matz static void * 1685fa204854SOlivier Matz print_stats(__rte_unused void *arg) 1686d19533e8SHuawei Xie { 168745657a5cSYuanhan Liu struct vhost_dev *vdev; 1688d19533e8SHuawei Xie uint64_t tx_dropped, rx_dropped; 1689d19533e8SHuawei Xie uint64_t tx, tx_total, rx, rx_total; 1690d19533e8SHuawei Xie const char clr[] = { 27, '[', '2', 'J', '\0' }; 1691d19533e8SHuawei Xie const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' }; 1692d19533e8SHuawei Xie 1693d19533e8SHuawei Xie while(1) { 1694d19533e8SHuawei Xie sleep(enable_stats); 1695d19533e8SHuawei Xie 1696d19533e8SHuawei Xie /* Clear screen and move to top left */ 169756fe86f8SYuanhan Liu printf("%s%s\n", clr, top_left); 169856fe86f8SYuanhan Liu printf("Device statistics =================================\n"); 1699d19533e8SHuawei Xie 170097daf19eSYuanhan Liu TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 170156fe86f8SYuanhan Liu tx_total = vdev->stats.tx_total; 170256fe86f8SYuanhan Liu tx = vdev->stats.tx; 1703d19533e8SHuawei Xie tx_dropped = tx_total - tx; 170456fe86f8SYuanhan Liu 1705a68ba8e0SCheng Jiang rx_total = __atomic_load_n(&vdev->stats.rx_total_atomic, 1706a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1707a68ba8e0SCheng Jiang rx = __atomic_load_n(&vdev->stats.rx_atomic, 1708a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1709d19533e8SHuawei Xie rx_dropped = rx_total - rx; 1710d19533e8SHuawei Xie 1711c08a3490SYuanhan Liu printf("Statistics for device %d\n" 171256fe86f8SYuanhan Liu "-----------------------\n" 171356fe86f8SYuanhan Liu "TX total: %" PRIu64 "\n" 171456fe86f8SYuanhan Liu "TX dropped: %" PRIu64 "\n" 171556fe86f8SYuanhan Liu "TX successful: %" PRIu64 "\n" 171656fe86f8SYuanhan Liu "RX total: %" PRIu64 "\n" 171756fe86f8SYuanhan Liu "RX dropped: %" PRIu64 "\n" 171856fe86f8SYuanhan Liu "RX successful: %" PRIu64 "\n", 17194ecf22e3SYuanhan Liu vdev->vid, 172056fe86f8SYuanhan Liu tx_total, tx_dropped, tx, 172156fe86f8SYuanhan Liu rx_total, rx_dropped, rx); 1722d19533e8SHuawei Xie } 172356fe86f8SYuanhan Liu 172456fe86f8SYuanhan Liu printf("===================================================\n"); 17253ee6f706SGeorgiy Levashov 17263ee6f706SGeorgiy Levashov fflush(stdout); 1727d19533e8SHuawei Xie } 1728fa204854SOlivier Matz 1729fa204854SOlivier Matz return NULL; 1730d19533e8SHuawei Xie } 1731d19533e8SHuawei Xie 1732ad0eef4dSJiayu Hu static void 1733ad0eef4dSJiayu Hu unregister_drivers(int socket_num) 1734ad0eef4dSJiayu Hu { 1735ad0eef4dSJiayu Hu int i, ret; 1736ad0eef4dSJiayu Hu 1737ad0eef4dSJiayu Hu for (i = 0; i < socket_num; i++) { 1738ad0eef4dSJiayu Hu ret = rte_vhost_driver_unregister(socket_files + i * PATH_MAX); 1739ad0eef4dSJiayu Hu if (ret != 0) 1740ad0eef4dSJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, 1741ad0eef4dSJiayu Hu "Fail to unregister vhost driver for %s.\n", 1742ad0eef4dSJiayu Hu socket_files + i * PATH_MAX); 1743ad0eef4dSJiayu Hu } 1744ad0eef4dSJiayu Hu } 1745ad0eef4dSJiayu Hu 1746c83d2d00SOuyang Changchun /* When we receive a INT signal, unregister vhost driver */ 1747c83d2d00SOuyang Changchun static void 1748c83d2d00SOuyang Changchun sigint_handler(__rte_unused int signum) 1749c83d2d00SOuyang Changchun { 1750c83d2d00SOuyang Changchun /* Unregister vhost driver. */ 1751ad0eef4dSJiayu Hu unregister_drivers(nb_sockets); 1752ad0eef4dSJiayu Hu 1753c83d2d00SOuyang Changchun exit(0); 1754c83d2d00SOuyang Changchun } 1755d19533e8SHuawei Xie 175653d3f477SJiayu Hu static void 175753d3f477SJiayu Hu reset_dma(void) 175853d3f477SJiayu Hu { 175953d3f477SJiayu Hu int i; 176053d3f477SJiayu Hu 176153d3f477SJiayu Hu for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) { 176253d3f477SJiayu Hu int j; 176353d3f477SJiayu Hu 176453d3f477SJiayu Hu for (j = 0; j < RTE_MAX_QUEUES_PER_PORT * 2; j++) { 176553d3f477SJiayu Hu dma_bind[i].dmas[j].dev_id = INVALID_DMA_ID; 176653d3f477SJiayu Hu dma_bind[i].dmas[j].async_enabled = false; 176753d3f477SJiayu Hu } 176853d3f477SJiayu Hu } 176953d3f477SJiayu Hu 177053d3f477SJiayu Hu for (i = 0; i < RTE_DMADEV_DEFAULT_MAX; i++) 177153d3f477SJiayu Hu dmas_id[i] = INVALID_DMA_ID; 177253d3f477SJiayu Hu } 177353d3f477SJiayu Hu 1774bdb19b77SYuanhan Liu /* 1775164a601bSYuanhan Liu * Main function, does initialisation and calls the per-lcore functions. 1776d19533e8SHuawei Xie */ 1777d19533e8SHuawei Xie int 177898a16481SDavid Marchand main(int argc, char *argv[]) 1779d19533e8SHuawei Xie { 1780d19533e8SHuawei Xie unsigned lcore_id, core_id = 0; 1781d19533e8SHuawei Xie unsigned nb_ports, valid_num_ports; 1782ad0eef4dSJiayu Hu int ret, i; 1783f8244c63SZhiyong Yang uint16_t portid; 1784d19533e8SHuawei Xie static pthread_t tid; 1785ca7036b4SDavid Marchand uint64_t flags = RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; 1786d19533e8SHuawei Xie 1787c83d2d00SOuyang Changchun signal(SIGINT, sigint_handler); 1788c83d2d00SOuyang Changchun 1789d19533e8SHuawei Xie /* init EAL */ 1790d19533e8SHuawei Xie ret = rte_eal_init(argc, argv); 1791d19533e8SHuawei Xie if (ret < 0) 1792d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 1793d19533e8SHuawei Xie argc -= ret; 1794d19533e8SHuawei Xie argv += ret; 1795d19533e8SHuawei Xie 179653d3f477SJiayu Hu /* initialize dma structures */ 179753d3f477SJiayu Hu reset_dma(); 179853d3f477SJiayu Hu 1799d19533e8SHuawei Xie /* parse app arguments */ 1800d19533e8SHuawei Xie ret = us_vhost_parse_args(argc, argv); 1801d19533e8SHuawei Xie if (ret < 0) 1802d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Invalid argument\n"); 1803d19533e8SHuawei Xie 1804b3bee7d8SYong Wang for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 180545657a5cSYuanhan Liu TAILQ_INIT(&lcore_info[lcore_id].vdev_list); 180645657a5cSYuanhan Liu 1807d19533e8SHuawei Xie if (rte_lcore_is_enabled(lcore_id)) 1808d19533e8SHuawei Xie lcore_ids[core_id++] = lcore_id; 1809b3bee7d8SYong Wang } 1810d19533e8SHuawei Xie 1811d19533e8SHuawei Xie if (rte_lcore_count() > RTE_MAX_LCORE) 1812d19533e8SHuawei Xie rte_exit(EXIT_FAILURE,"Not enough cores\n"); 1813d19533e8SHuawei Xie 1814d19533e8SHuawei Xie /* Get the number of physical ports. */ 1815d9a42a69SThomas Monjalon nb_ports = rte_eth_dev_count_avail(); 1816d19533e8SHuawei Xie 1817d19533e8SHuawei Xie /* 1818d19533e8SHuawei Xie * Update the global var NUM_PORTS and global array PORTS 1819d19533e8SHuawei Xie * and get value of var VALID_NUM_PORTS according to system ports number 1820d19533e8SHuawei Xie */ 1821d19533e8SHuawei Xie valid_num_ports = check_ports_num(nb_ports); 1822d19533e8SHuawei Xie 1823d19533e8SHuawei Xie if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) { 1824d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," 1825d19533e8SHuawei Xie "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); 1826d19533e8SHuawei Xie return -1; 1827d19533e8SHuawei Xie } 1828d19533e8SHuawei Xie 1829bdb19b77SYuanhan Liu /* 1830bdb19b77SYuanhan Liu * FIXME: here we are trying to allocate mbufs big enough for 1831bdb19b77SYuanhan Liu * @MAX_QUEUES, but the truth is we're never going to use that 1832bdb19b77SYuanhan Liu * many queues here. We probably should only do allocation for 1833bdb19b77SYuanhan Liu * those queues we are going to use. 1834bdb19b77SYuanhan Liu */ 1835*917229c2SWenwu Ma mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", total_num_mbufs, 1836*917229c2SWenwu Ma MBUF_CACHE_SIZE, 0, MBUF_DATA_SIZE, 1837*917229c2SWenwu Ma rte_socket_id()); 1838*917229c2SWenwu Ma if (mbuf_pool == NULL) 1839*917229c2SWenwu Ma rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 1840d19533e8SHuawei Xie 1841d19533e8SHuawei Xie if (vm2vm_mode == VM2VM_HARDWARE) { 1842d19533e8SHuawei Xie /* Enable VT loop back to let L2 switch to do it. */ 1843d19533e8SHuawei Xie vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; 18441f49ec15SThomas Monjalon RTE_LOG(DEBUG, VHOST_CONFIG, 1845d19533e8SHuawei Xie "Enable loop back for L2 switch in vmdq.\n"); 1846d19533e8SHuawei Xie } 1847d19533e8SHuawei Xie 1848d19533e8SHuawei Xie /* initialize all ports */ 18498728ccf3SThomas Monjalon RTE_ETH_FOREACH_DEV(portid) { 1850d19533e8SHuawei Xie /* skip ports that are not enabled */ 1851d19533e8SHuawei Xie if ((enabled_port_mask & (1 << portid)) == 0) { 1852d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, 1853d19533e8SHuawei Xie "Skipping disabled port %d\n", portid); 1854d19533e8SHuawei Xie continue; 1855d19533e8SHuawei Xie } 1856d19533e8SHuawei Xie if (port_init(portid) != 0) 1857d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, 1858d19533e8SHuawei Xie "Cannot initialize network ports\n"); 1859d19533e8SHuawei Xie } 1860d19533e8SHuawei Xie 1861d19533e8SHuawei Xie /* Enable stats if the user option is set. */ 186267b6d303SRavi Kerur if (enable_stats) { 1863fa204854SOlivier Matz ret = rte_ctrl_thread_create(&tid, "print-stats", NULL, 1864fa204854SOlivier Matz print_stats, NULL); 1865fa204854SOlivier Matz if (ret < 0) 186667b6d303SRavi Kerur rte_exit(EXIT_FAILURE, 186767b6d303SRavi Kerur "Cannot create print-stats thread\n"); 186867b6d303SRavi Kerur } 1869d19533e8SHuawei Xie 1870d19533e8SHuawei Xie /* Launch all data cores. */ 1871cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore_id) 187268363d85SYuanhan Liu rte_eal_remote_launch(switch_worker, NULL, lcore_id); 1873d19533e8SHuawei Xie 18742345e3beSYuanhan Liu if (client_mode) 18752345e3beSYuanhan Liu flags |= RTE_VHOST_USER_CLIENT; 18762345e3beSYuanhan Liu 187753d3f477SJiayu Hu for (i = 0; i < dma_count; i++) { 187853d3f477SJiayu Hu if (rte_vhost_async_dma_configure(dmas_id[i], 0) < 0) { 187953d3f477SJiayu Hu RTE_LOG(ERR, VHOST_PORT, "Failed to configure DMA in vhost.\n"); 188053d3f477SJiayu Hu rte_exit(EXIT_FAILURE, "Cannot use given DMA device\n"); 188153d3f477SJiayu Hu } 188253d3f477SJiayu Hu } 188353d3f477SJiayu Hu 1884bde19a4dSJiayu Hu /* Register vhost user driver to handle vhost messages. */ 1885ad0eef4dSJiayu Hu for (i = 0; i < nb_sockets; i++) { 18860917f9d1SYuanhan Liu char *file = socket_files + i * PATH_MAX; 1887a68ba8e0SCheng Jiang 188853d3f477SJiayu Hu if (dma_count) 1889abec60e7SCheng Jiang flags = flags | RTE_VHOST_USER_ASYNC_COPY; 1890abec60e7SCheng Jiang 18910917f9d1SYuanhan Liu ret = rte_vhost_driver_register(file, flags); 1892ad0eef4dSJiayu Hu if (ret != 0) { 1893ad0eef4dSJiayu Hu unregister_drivers(i); 1894ad0eef4dSJiayu Hu rte_exit(EXIT_FAILURE, 1895ad0eef4dSJiayu Hu "vhost driver register failure.\n"); 1896ad0eef4dSJiayu Hu } 1897ca059fa5SYuanhan Liu 1898ca059fa5SYuanhan Liu if (builtin_net_driver) 1899ca059fa5SYuanhan Liu rte_vhost_driver_set_features(file, VIRTIO_NET_FEATURES); 1900ca059fa5SYuanhan Liu 19010917f9d1SYuanhan Liu if (mergeable == 0) { 19020917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 19030917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_MRG_RXBUF); 19040917f9d1SYuanhan Liu } 19050917f9d1SYuanhan Liu 19060917f9d1SYuanhan Liu if (enable_tx_csum == 0) { 19070917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 19080917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_CSUM); 19090917f9d1SYuanhan Liu } 19100917f9d1SYuanhan Liu 19110917f9d1SYuanhan Liu if (enable_tso == 0) { 19120917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 19130917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_HOST_TSO4); 19140917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 19150917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_HOST_TSO6); 19160917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 19170917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_GUEST_TSO4); 19180917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 19190917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_GUEST_TSO6); 19200917f9d1SYuanhan Liu } 19210917f9d1SYuanhan Liu 19220917f9d1SYuanhan Liu if (promiscuous) { 19230917f9d1SYuanhan Liu rte_vhost_driver_enable_features(file, 19240917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_CTRL_RX); 19250917f9d1SYuanhan Liu } 1926d19533e8SHuawei Xie 192793433b63SYuanhan Liu ret = rte_vhost_driver_callback_register(file, 192893433b63SYuanhan Liu &virtio_net_device_ops); 192993433b63SYuanhan Liu if (ret != 0) { 193093433b63SYuanhan Liu rte_exit(EXIT_FAILURE, 193193433b63SYuanhan Liu "failed to register vhost driver callbacks.\n"); 193293433b63SYuanhan Liu } 1933af147591SYuanhan Liu 1934af147591SYuanhan Liu if (rte_vhost_driver_start(file) < 0) { 1935af147591SYuanhan Liu rte_exit(EXIT_FAILURE, 1936af147591SYuanhan Liu "failed to start vhost driver.\n"); 1937af147591SYuanhan Liu } 193893433b63SYuanhan Liu } 1939d19533e8SHuawei Xie 1940cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore_id) 1941af147591SYuanhan Liu rte_eal_wait_lcore(lcore_id); 1942af147591SYuanhan Liu 194310aa3757SChengchang Tang /* clean up the EAL */ 194410aa3757SChengchang Tang rte_eal_cleanup(); 1945d19533e8SHuawei Xie 194610aa3757SChengchang Tang return 0; 1947d19533e8SHuawei Xie } 1948