13998e2a0SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 23998e2a0SBruce Richardson * Copyright(c) 2010-2017 Intel Corporation 3d19533e8SHuawei Xie */ 4d19533e8SHuawei Xie 5d19533e8SHuawei Xie #include <arpa/inet.h> 6d19533e8SHuawei Xie #include <getopt.h> 7d19533e8SHuawei Xie #include <linux/if_ether.h> 8d19533e8SHuawei Xie #include <linux/if_vlan.h> 9d19533e8SHuawei Xie #include <linux/virtio_net.h> 10d19533e8SHuawei Xie #include <linux/virtio_ring.h> 11d19533e8SHuawei Xie #include <signal.h> 12d19533e8SHuawei Xie #include <stdint.h> 13d19533e8SHuawei Xie #include <sys/eventfd.h> 14d19533e8SHuawei Xie #include <sys/param.h> 15d19533e8SHuawei Xie #include <unistd.h> 16d19533e8SHuawei Xie 17d19533e8SHuawei Xie #include <rte_cycles.h> 18d19533e8SHuawei Xie #include <rte_ethdev.h> 19d19533e8SHuawei Xie #include <rte_log.h> 20d19533e8SHuawei Xie #include <rte_string_fns.h> 21d19533e8SHuawei Xie #include <rte_malloc.h> 22ca7036b4SDavid Marchand #include <rte_net.h> 23a798beb4SYuanhan Liu #include <rte_vhost.h> 24691693c6SJijiang Liu #include <rte_ip.h> 259fd72e3cSJijiang Liu #include <rte_tcp.h> 26577329e6SJerin Jacob #include <rte_pause.h> 27d19533e8SHuawei Xie 283a04ecb2SCheng Jiang #include "ioat.h" 29d19533e8SHuawei Xie #include "main.h" 30d19533e8SHuawei Xie 31f17eb179SBernard Iremonger #ifndef MAX_QUEUES 32f17eb179SBernard Iremonger #define MAX_QUEUES 128 33f17eb179SBernard Iremonger #endif 34d19533e8SHuawei Xie 35d19533e8SHuawei Xie /* the maximum number of external ports supported */ 36d19533e8SHuawei Xie #define MAX_SUP_PORTS 1 37d19533e8SHuawei Xie 38d19533e8SHuawei Xie #define MBUF_CACHE_SIZE 128 39824cb29cSKonstantin Ananyev #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE 40d19533e8SHuawei Xie 41d19533e8SHuawei Xie #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 42d19533e8SHuawei Xie 43d19533e8SHuawei Xie #define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ 44d19533e8SHuawei Xie #define BURST_RX_RETRIES 4 /* Number of retries on RX. */ 45d19533e8SHuawei Xie 46d19533e8SHuawei Xie #define JUMBO_FRAME_MAX_SIZE 0x2600 471bb4a528SFerruh Yigit #define MAX_MTU (JUMBO_FRAME_MAX_SIZE - (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)) 48d19533e8SHuawei Xie 49d19533e8SHuawei Xie /* State of virtio device. */ 50d19533e8SHuawei Xie #define DEVICE_MAC_LEARNING 0 51d19533e8SHuawei Xie #define DEVICE_RX 1 52d19533e8SHuawei Xie #define DEVICE_SAFE_REMOVE 2 53d19533e8SHuawei Xie 54d19533e8SHuawei Xie /* Configurable number of RX/TX ring descriptors */ 55d19533e8SHuawei Xie #define RTE_TEST_RX_DESC_DEFAULT 1024 56d19533e8SHuawei Xie #define RTE_TEST_TX_DESC_DEFAULT 512 57d19533e8SHuawei Xie 58d19533e8SHuawei Xie #define INVALID_PORT_ID 0xFF 59d19533e8SHuawei Xie 60d19533e8SHuawei Xie /* mask of enabled ports */ 61d19533e8SHuawei Xie static uint32_t enabled_port_mask = 0; 62d19533e8SHuawei Xie 6390924cafSOuyang Changchun /* Promiscuous mode */ 6490924cafSOuyang Changchun static uint32_t promiscuous; 6590924cafSOuyang Changchun 66d19533e8SHuawei Xie /* number of devices/queues to support*/ 67d19533e8SHuawei Xie static uint32_t num_queues = 0; 68a981294bSHuawei Xie static uint32_t num_devices; 69d19533e8SHuawei Xie 7068363d85SYuanhan Liu static struct rte_mempool *mbuf_pool; 7128deb020SHuawei Xie static int mergeable; 72d19533e8SHuawei Xie 73d19533e8SHuawei Xie /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */ 74d19533e8SHuawei Xie typedef enum { 75d19533e8SHuawei Xie VM2VM_DISABLED = 0, 76d19533e8SHuawei Xie VM2VM_SOFTWARE = 1, 77d19533e8SHuawei Xie VM2VM_HARDWARE = 2, 78d19533e8SHuawei Xie VM2VM_LAST 79d19533e8SHuawei Xie } vm2vm_type; 80d19533e8SHuawei Xie static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE; 81d19533e8SHuawei Xie 82d19533e8SHuawei Xie /* Enable stats. */ 83d19533e8SHuawei Xie static uint32_t enable_stats = 0; 84d19533e8SHuawei Xie /* Enable retries on RX. */ 85d19533e8SHuawei Xie static uint32_t enable_retry = 1; 869fd72e3cSJijiang Liu 879fd72e3cSJijiang Liu /* Disable TX checksum offload */ 889fd72e3cSJijiang Liu static uint32_t enable_tx_csum; 899fd72e3cSJijiang Liu 909fd72e3cSJijiang Liu /* Disable TSO offload */ 919fd72e3cSJijiang Liu static uint32_t enable_tso; 929fd72e3cSJijiang Liu 932345e3beSYuanhan Liu static int client_mode; 942345e3beSYuanhan Liu 95ca059fa5SYuanhan Liu static int builtin_net_driver; 96ca059fa5SYuanhan Liu 973a04ecb2SCheng Jiang static int async_vhost_driver; 983a04ecb2SCheng Jiang 9947afdbbeSCheng Jiang static char *dma_type; 1003a04ecb2SCheng Jiang 101d19533e8SHuawei Xie /* Specify timeout (in useconds) between retries on RX. */ 102d19533e8SHuawei Xie static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; 103d19533e8SHuawei Xie /* Specify the number of retries on RX. */ 104d19533e8SHuawei Xie static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; 105d19533e8SHuawei Xie 106ad0eef4dSJiayu Hu /* Socket file paths. Can be set by user */ 107ad0eef4dSJiayu Hu static char *socket_files; 108ad0eef4dSJiayu Hu static int nb_sockets; 109d19533e8SHuawei Xie 110d19533e8SHuawei Xie /* empty vmdq configuration structure. Filled in programatically */ 111d19533e8SHuawei Xie static struct rte_eth_conf vmdq_conf_default = { 112d19533e8SHuawei Xie .rxmode = { 113295968d1SFerruh Yigit .mq_mode = RTE_ETH_MQ_RX_VMDQ_ONLY, 114d19533e8SHuawei Xie .split_hdr_size = 0, 115d19533e8SHuawei Xie /* 116cc22d8caSShahaf Shuler * VLAN strip is necessary for 1G NIC such as I350, 117d19533e8SHuawei Xie * this fixes bug of ipv4 forwarding in guest can't 118d19533e8SHuawei Xie * forward pakets from one virtio dev to another virtio dev. 119d19533e8SHuawei Xie */ 120295968d1SFerruh Yigit .offloads = RTE_ETH_RX_OFFLOAD_VLAN_STRIP, 121d19533e8SHuawei Xie }, 122d19533e8SHuawei Xie 123d19533e8SHuawei Xie .txmode = { 124295968d1SFerruh Yigit .mq_mode = RTE_ETH_MQ_TX_NONE, 125295968d1SFerruh Yigit .offloads = (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | 126295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_TCP_CKSUM | 127295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_VLAN_INSERT | 128295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_MULTI_SEGS | 129295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_TCP_TSO), 130d19533e8SHuawei Xie }, 131d19533e8SHuawei Xie .rx_adv_conf = { 132d19533e8SHuawei Xie /* 133d19533e8SHuawei Xie * should be overridden separately in code with 134d19533e8SHuawei Xie * appropriate values 135d19533e8SHuawei Xie */ 136d19533e8SHuawei Xie .vmdq_rx_conf = { 137295968d1SFerruh Yigit .nb_queue_pools = RTE_ETH_8_POOLS, 138d19533e8SHuawei Xie .enable_default_pool = 0, 139d19533e8SHuawei Xie .default_pool = 0, 140d19533e8SHuawei Xie .nb_pool_maps = 0, 141d19533e8SHuawei Xie .pool_map = {{0, 0},}, 142d19533e8SHuawei Xie }, 143d19533e8SHuawei Xie }, 144d19533e8SHuawei Xie }; 145d19533e8SHuawei Xie 146cc22d8caSShahaf Shuler 147d19533e8SHuawei Xie static unsigned lcore_ids[RTE_MAX_LCORE]; 148f8244c63SZhiyong Yang static uint16_t ports[RTE_MAX_ETHPORTS]; 149d19533e8SHuawei Xie static unsigned num_ports = 0; /**< The number of ports specified in command line */ 15084b02d16SHuawei Xie static uint16_t num_pf_queues, num_vmdq_queues; 15184b02d16SHuawei Xie static uint16_t vmdq_pool_base, vmdq_queue_base; 15284b02d16SHuawei Xie static uint16_t queues_per_pool; 153d19533e8SHuawei Xie 154d19533e8SHuawei Xie const uint16_t vlan_tags[] = { 155d19533e8SHuawei Xie 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 156d19533e8SHuawei Xie 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 157d19533e8SHuawei Xie 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 158d19533e8SHuawei Xie 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 159d19533e8SHuawei Xie 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 160d19533e8SHuawei Xie 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 161d19533e8SHuawei Xie 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 162d19533e8SHuawei Xie 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 163d19533e8SHuawei Xie }; 164d19533e8SHuawei Xie 165d19533e8SHuawei Xie /* ethernet addresses of ports */ 1666d13ea8eSOlivier Matz static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; 167d19533e8SHuawei Xie 16845657a5cSYuanhan Liu static struct vhost_dev_tailq_list vhost_dev_list = 16945657a5cSYuanhan Liu TAILQ_HEAD_INITIALIZER(vhost_dev_list); 170d19533e8SHuawei Xie 171d19533e8SHuawei Xie static struct lcore_info lcore_info[RTE_MAX_LCORE]; 172d19533e8SHuawei Xie 173d19533e8SHuawei Xie /* Used for queueing bursts of TX packets. */ 174d19533e8SHuawei Xie struct mbuf_table { 175d19533e8SHuawei Xie unsigned len; 176d19533e8SHuawei Xie unsigned txq_id; 177d19533e8SHuawei Xie struct rte_mbuf *m_table[MAX_PKT_BURST]; 178d19533e8SHuawei Xie }; 179d19533e8SHuawei Xie 180a68ba8e0SCheng Jiang struct vhost_bufftable { 181a68ba8e0SCheng Jiang uint32_t len; 182a68ba8e0SCheng Jiang uint64_t pre_tsc; 183a68ba8e0SCheng Jiang struct rte_mbuf *m_table[MAX_PKT_BURST]; 184a68ba8e0SCheng Jiang }; 185a68ba8e0SCheng Jiang 186d19533e8SHuawei Xie /* TX queue for each data core. */ 187d19533e8SHuawei Xie struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; 188d19533e8SHuawei Xie 189a68ba8e0SCheng Jiang /* 190a68ba8e0SCheng Jiang * Vhost TX buffer for each data core. 191a68ba8e0SCheng Jiang * Every data core maintains a TX buffer for every vhost device, 192a68ba8e0SCheng Jiang * which is used for batch pkts enqueue for higher performance. 193a68ba8e0SCheng Jiang */ 194a68ba8e0SCheng Jiang struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE]; 195a68ba8e0SCheng Jiang 196273ecdbcSYuanhan Liu #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \ 197273ecdbcSYuanhan Liu / US_PER_S * BURST_TX_DRAIN_US) 198d19533e8SHuawei Xie #define VLAN_HLEN 4 199d19533e8SHuawei Xie 2003a04ecb2SCheng Jiang static inline int 2013a04ecb2SCheng Jiang open_dma(const char *value) 2023a04ecb2SCheng Jiang { 20347afdbbeSCheng Jiang if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) 2043a04ecb2SCheng Jiang return open_ioat(value); 2053a04ecb2SCheng Jiang 2063a04ecb2SCheng Jiang return -1; 2073a04ecb2SCheng Jiang } 2083a04ecb2SCheng Jiang 209d19533e8SHuawei Xie /* 210d19533e8SHuawei Xie * Builds up the correct configuration for VMDQ VLAN pool map 211d19533e8SHuawei Xie * according to the pool & queue limits. 212d19533e8SHuawei Xie */ 213d19533e8SHuawei Xie static inline int 214d19533e8SHuawei Xie get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices) 215d19533e8SHuawei Xie { 216d19533e8SHuawei Xie struct rte_eth_vmdq_rx_conf conf; 21790924cafSOuyang Changchun struct rte_eth_vmdq_rx_conf *def_conf = 21890924cafSOuyang Changchun &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf; 219d19533e8SHuawei Xie unsigned i; 220d19533e8SHuawei Xie 221d19533e8SHuawei Xie memset(&conf, 0, sizeof(conf)); 222d19533e8SHuawei Xie conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices; 223d19533e8SHuawei Xie conf.nb_pool_maps = num_devices; 22490924cafSOuyang Changchun conf.enable_loop_back = def_conf->enable_loop_back; 22590924cafSOuyang Changchun conf.rx_mode = def_conf->rx_mode; 226d19533e8SHuawei Xie 227d19533e8SHuawei Xie for (i = 0; i < conf.nb_pool_maps; i++) { 228d19533e8SHuawei Xie conf.pool_map[i].vlan_id = vlan_tags[ i ]; 229d19533e8SHuawei Xie conf.pool_map[i].pools = (1UL << i); 230d19533e8SHuawei Xie } 231d19533e8SHuawei Xie 232d19533e8SHuawei Xie (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); 233d19533e8SHuawei Xie (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, 234d19533e8SHuawei Xie sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); 235d19533e8SHuawei Xie return 0; 236d19533e8SHuawei Xie } 237d19533e8SHuawei Xie 238d19533e8SHuawei Xie /* 239d19533e8SHuawei Xie * Initialises a given port using global settings and with the rx buffers 240d19533e8SHuawei Xie * coming from the mbuf_pool passed as parameter 241d19533e8SHuawei Xie */ 242d19533e8SHuawei Xie static inline int 243f8244c63SZhiyong Yang port_init(uint16_t port) 244d19533e8SHuawei Xie { 245d19533e8SHuawei Xie struct rte_eth_dev_info dev_info; 246d19533e8SHuawei Xie struct rte_eth_conf port_conf; 247db4014f2SHuawei Xie struct rte_eth_rxconf *rxconf; 248db4014f2SHuawei Xie struct rte_eth_txconf *txconf; 249db4014f2SHuawei Xie int16_t rx_rings, tx_rings; 250d19533e8SHuawei Xie uint16_t rx_ring_size, tx_ring_size; 251d19533e8SHuawei Xie int retval; 252d19533e8SHuawei Xie uint16_t q; 253d19533e8SHuawei Xie 254d19533e8SHuawei Xie /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */ 25537fb306cSIvan Ilchenko retval = rte_eth_dev_info_get(port, &dev_info); 25637fb306cSIvan Ilchenko if (retval != 0) { 25737fb306cSIvan Ilchenko RTE_LOG(ERR, VHOST_PORT, 25837fb306cSIvan Ilchenko "Error during getting device (port %u) info: %s\n", 25937fb306cSIvan Ilchenko port, strerror(-retval)); 26037fb306cSIvan Ilchenko 26137fb306cSIvan Ilchenko return retval; 26237fb306cSIvan Ilchenko } 263d19533e8SHuawei Xie 264db4014f2SHuawei Xie rxconf = &dev_info.default_rxconf; 265db4014f2SHuawei Xie txconf = &dev_info.default_txconf; 266db4014f2SHuawei Xie rxconf->rx_drop_en = 1; 267f0adccd4SOuyang Changchun 268d19533e8SHuawei Xie /*configure the number of supported virtio devices based on VMDQ limits */ 269d19533e8SHuawei Xie num_devices = dev_info.max_vmdq_pools; 270d19533e8SHuawei Xie 271d19533e8SHuawei Xie rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; 272d19533e8SHuawei Xie tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; 27300b8b706SYuanhan Liu 274d19533e8SHuawei Xie tx_rings = (uint16_t)rte_lcore_count(); 275d19533e8SHuawei Xie 276d19533e8SHuawei Xie /* Get port configuration. */ 277d19533e8SHuawei Xie retval = get_eth_conf(&port_conf, num_devices); 278d19533e8SHuawei Xie if (retval < 0) 279d19533e8SHuawei Xie return retval; 28084b02d16SHuawei Xie /* NIC queues are divided into pf queues and vmdq queues. */ 28184b02d16SHuawei Xie num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num; 28284b02d16SHuawei Xie queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; 28384b02d16SHuawei Xie num_vmdq_queues = num_devices * queues_per_pool; 28484b02d16SHuawei Xie num_queues = num_pf_queues + num_vmdq_queues; 28584b02d16SHuawei Xie vmdq_queue_base = dev_info.vmdq_queue_base; 28684b02d16SHuawei Xie vmdq_pool_base = dev_info.vmdq_pool_base; 28784b02d16SHuawei Xie printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n", 28884b02d16SHuawei Xie num_pf_queues, num_devices, queues_per_pool); 289d19533e8SHuawei Xie 290a9dbe180SThomas Monjalon if (!rte_eth_dev_is_valid_port(port)) 291a9dbe180SThomas Monjalon return -1; 292d19533e8SHuawei Xie 29384b02d16SHuawei Xie rx_rings = (uint16_t)dev_info.max_rx_queues; 294295968d1SFerruh Yigit if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) 295cc22d8caSShahaf Shuler port_conf.txmode.offloads |= 296295968d1SFerruh Yigit RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 297d19533e8SHuawei Xie /* Configure ethernet device. */ 298d19533e8SHuawei Xie retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); 299bb7085b4SJianfeng Tan if (retval != 0) { 300bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n", 301bb7085b4SJianfeng Tan port, strerror(-retval)); 302d19533e8SHuawei Xie return retval; 303bb7085b4SJianfeng Tan } 304d19533e8SHuawei Xie 30560efb44fSRoman Zhukov retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size, 30660efb44fSRoman Zhukov &tx_ring_size); 30760efb44fSRoman Zhukov if (retval != 0) { 30860efb44fSRoman Zhukov RTE_LOG(ERR, VHOST_PORT, "Failed to adjust number of descriptors " 30960efb44fSRoman Zhukov "for port %u: %s.\n", port, strerror(-retval)); 31060efb44fSRoman Zhukov return retval; 31160efb44fSRoman Zhukov } 31260efb44fSRoman Zhukov if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT) { 31360efb44fSRoman Zhukov RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size " 31460efb44fSRoman Zhukov "for Rx queues on port %u.\n", port); 31560efb44fSRoman Zhukov return -1; 31660efb44fSRoman Zhukov } 31760efb44fSRoman Zhukov 318d19533e8SHuawei Xie /* Setup the queues. */ 319cc22d8caSShahaf Shuler rxconf->offloads = port_conf.rxmode.offloads; 320d19533e8SHuawei Xie for (q = 0; q < rx_rings; q ++) { 321d19533e8SHuawei Xie retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, 322db4014f2SHuawei Xie rte_eth_dev_socket_id(port), 323db4014f2SHuawei Xie rxconf, 32468363d85SYuanhan Liu mbuf_pool); 325bb7085b4SJianfeng Tan if (retval < 0) { 326bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, 327bb7085b4SJianfeng Tan "Failed to setup rx queue %u of port %u: %s.\n", 328bb7085b4SJianfeng Tan q, port, strerror(-retval)); 329d19533e8SHuawei Xie return retval; 330d19533e8SHuawei Xie } 331bb7085b4SJianfeng Tan } 332cc22d8caSShahaf Shuler txconf->offloads = port_conf.txmode.offloads; 333d19533e8SHuawei Xie for (q = 0; q < tx_rings; q ++) { 334d19533e8SHuawei Xie retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, 335db4014f2SHuawei Xie rte_eth_dev_socket_id(port), 336db4014f2SHuawei Xie txconf); 337bb7085b4SJianfeng Tan if (retval < 0) { 338bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, 339bb7085b4SJianfeng Tan "Failed to setup tx queue %u of port %u: %s.\n", 340bb7085b4SJianfeng Tan q, port, strerror(-retval)); 341d19533e8SHuawei Xie return retval; 342d19533e8SHuawei Xie } 343bb7085b4SJianfeng Tan } 344d19533e8SHuawei Xie 345d19533e8SHuawei Xie /* Start the device. */ 346d19533e8SHuawei Xie retval = rte_eth_dev_start(port); 347d19533e8SHuawei Xie if (retval < 0) { 348bb7085b4SJianfeng Tan RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n", 349bb7085b4SJianfeng Tan port, strerror(-retval)); 350d19533e8SHuawei Xie return retval; 351d19533e8SHuawei Xie } 352d19533e8SHuawei Xie 353f430bbceSIvan Ilchenko if (promiscuous) { 354f430bbceSIvan Ilchenko retval = rte_eth_promiscuous_enable(port); 355f430bbceSIvan Ilchenko if (retval != 0) { 356f430bbceSIvan Ilchenko RTE_LOG(ERR, VHOST_PORT, 357f430bbceSIvan Ilchenko "Failed to enable promiscuous mode on port %u: %s\n", 358f430bbceSIvan Ilchenko port, rte_strerror(-retval)); 359f430bbceSIvan Ilchenko return retval; 360f430bbceSIvan Ilchenko } 361f430bbceSIvan Ilchenko } 36290924cafSOuyang Changchun 36370febdcfSIgor Romanov retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); 36470febdcfSIgor Romanov if (retval < 0) { 36570febdcfSIgor Romanov RTE_LOG(ERR, VHOST_PORT, 36670febdcfSIgor Romanov "Failed to get MAC address on port %u: %s\n", 36770febdcfSIgor Romanov port, rte_strerror(-retval)); 36870febdcfSIgor Romanov return retval; 36970febdcfSIgor Romanov } 37070febdcfSIgor Romanov 371d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices); 372d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 373d19533e8SHuawei Xie " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 374a7db3afcSAman Deep Singh port, RTE_ETHER_ADDR_BYTES(&vmdq_ports_eth_addr[port])); 375d19533e8SHuawei Xie 376d19533e8SHuawei Xie return 0; 377d19533e8SHuawei Xie } 378d19533e8SHuawei Xie 379d19533e8SHuawei Xie /* 380bde19a4dSJiayu Hu * Set socket file path. 381d19533e8SHuawei Xie */ 382d19533e8SHuawei Xie static int 383bde19a4dSJiayu Hu us_vhost_parse_socket_path(const char *q_arg) 384d19533e8SHuawei Xie { 385d79035b7STiwei Bie char *old; 386d79035b7STiwei Bie 387d19533e8SHuawei Xie /* parse number string */ 388fa81d3b9SGang Jiang if (strnlen(q_arg, PATH_MAX) == PATH_MAX) 389d19533e8SHuawei Xie return -1; 390ad0eef4dSJiayu Hu 391d79035b7STiwei Bie old = socket_files; 392ad0eef4dSJiayu Hu socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1)); 393d79035b7STiwei Bie if (socket_files == NULL) { 394d79035b7STiwei Bie free(old); 395d79035b7STiwei Bie return -1; 396d79035b7STiwei Bie } 397d79035b7STiwei Bie 398f9acaf84SBruce Richardson strlcpy(socket_files + nb_sockets * PATH_MAX, q_arg, PATH_MAX); 399ad0eef4dSJiayu Hu nb_sockets++; 400d19533e8SHuawei Xie 401d19533e8SHuawei Xie return 0; 402d19533e8SHuawei Xie } 403d19533e8SHuawei Xie 404d19533e8SHuawei Xie /* 405d19533e8SHuawei Xie * Parse the portmask provided at run time. 406d19533e8SHuawei Xie */ 407d19533e8SHuawei Xie static int 408d19533e8SHuawei Xie parse_portmask(const char *portmask) 409d19533e8SHuawei Xie { 410d19533e8SHuawei Xie char *end = NULL; 411d19533e8SHuawei Xie unsigned long pm; 412d19533e8SHuawei Xie 413d19533e8SHuawei Xie errno = 0; 414d19533e8SHuawei Xie 415d19533e8SHuawei Xie /* parse hexadecimal string */ 416d19533e8SHuawei Xie pm = strtoul(portmask, &end, 16); 417d19533e8SHuawei Xie if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) 418ce6b8c31SSarosh Arif return 0; 419d19533e8SHuawei Xie 420d19533e8SHuawei Xie return pm; 421d19533e8SHuawei Xie 422d19533e8SHuawei Xie } 423d19533e8SHuawei Xie 424d19533e8SHuawei Xie /* 425d19533e8SHuawei Xie * Parse num options at run time. 426d19533e8SHuawei Xie */ 427d19533e8SHuawei Xie static int 428d19533e8SHuawei Xie parse_num_opt(const char *q_arg, uint32_t max_valid_value) 429d19533e8SHuawei Xie { 430d19533e8SHuawei Xie char *end = NULL; 431d19533e8SHuawei Xie unsigned long num; 432d19533e8SHuawei Xie 433d19533e8SHuawei Xie errno = 0; 434d19533e8SHuawei Xie 435d19533e8SHuawei Xie /* parse unsigned int string */ 436d19533e8SHuawei Xie num = strtoul(q_arg, &end, 10); 437d19533e8SHuawei Xie if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) 438d19533e8SHuawei Xie return -1; 439d19533e8SHuawei Xie 440d19533e8SHuawei Xie if (num > max_valid_value) 441d19533e8SHuawei Xie return -1; 442d19533e8SHuawei Xie 443d19533e8SHuawei Xie return num; 444d19533e8SHuawei Xie 445d19533e8SHuawei Xie } 446d19533e8SHuawei Xie 447d19533e8SHuawei Xie /* 448d19533e8SHuawei Xie * Display usage 449d19533e8SHuawei Xie */ 450d19533e8SHuawei Xie static void 451d19533e8SHuawei Xie us_vhost_usage(const char *prgname) 452d19533e8SHuawei Xie { 453d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n" 454d19533e8SHuawei Xie " --vm2vm [0|1|2]\n" 455d19533e8SHuawei Xie " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n" 456bde19a4dSJiayu Hu " --socket-file <path>\n" 457d19533e8SHuawei Xie " --nb-devices ND\n" 458d19533e8SHuawei Xie " -p PORTMASK: Set mask for ports to be used by application\n" 459d19533e8SHuawei Xie " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n" 460d19533e8SHuawei Xie " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n" 461d19533e8SHuawei Xie " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n" 462d19533e8SHuawei Xie " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n" 463d19533e8SHuawei Xie " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n" 464d19533e8SHuawei Xie " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" 465bde19a4dSJiayu Hu " --socket-file: The path of the socket file.\n" 4669fd72e3cSJijiang Liu " --tx-csum [0|1] disable/enable TX checksum offload.\n" 4672345e3beSYuanhan Liu " --tso [0|1] disable/enable TCP segment offload.\n" 4683a04ecb2SCheng Jiang " --client register a vhost-user socket as client mode.\n" 4693a04ecb2SCheng Jiang " --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n" 4703a04ecb2SCheng Jiang " --dmas register dma channel for specific vhost device.\n", 471d19533e8SHuawei Xie prgname); 472d19533e8SHuawei Xie } 473d19533e8SHuawei Xie 474965b06f0SIbtisam Tariq enum { 475965b06f0SIbtisam Tariq #define OPT_VM2VM "vm2vm" 476965b06f0SIbtisam Tariq OPT_VM2VM_NUM = 256, 477965b06f0SIbtisam Tariq #define OPT_RX_RETRY "rx-retry" 478965b06f0SIbtisam Tariq OPT_RX_RETRY_NUM, 479965b06f0SIbtisam Tariq #define OPT_RX_RETRY_DELAY "rx-retry-delay" 480965b06f0SIbtisam Tariq OPT_RX_RETRY_DELAY_NUM, 481965b06f0SIbtisam Tariq #define OPT_RX_RETRY_NUMB "rx-retry-num" 482965b06f0SIbtisam Tariq OPT_RX_RETRY_NUMB_NUM, 483965b06f0SIbtisam Tariq #define OPT_MERGEABLE "mergeable" 484965b06f0SIbtisam Tariq OPT_MERGEABLE_NUM, 485965b06f0SIbtisam Tariq #define OPT_STATS "stats" 486965b06f0SIbtisam Tariq OPT_STATS_NUM, 487965b06f0SIbtisam Tariq #define OPT_SOCKET_FILE "socket-file" 488965b06f0SIbtisam Tariq OPT_SOCKET_FILE_NUM, 489965b06f0SIbtisam Tariq #define OPT_TX_CSUM "tx-csum" 490965b06f0SIbtisam Tariq OPT_TX_CSUM_NUM, 491965b06f0SIbtisam Tariq #define OPT_TSO "tso" 492965b06f0SIbtisam Tariq OPT_TSO_NUM, 493965b06f0SIbtisam Tariq #define OPT_CLIENT "client" 494965b06f0SIbtisam Tariq OPT_CLIENT_NUM, 495965b06f0SIbtisam Tariq #define OPT_BUILTIN_NET_DRIVER "builtin-net-driver" 496965b06f0SIbtisam Tariq OPT_BUILTIN_NET_DRIVER_NUM, 497965b06f0SIbtisam Tariq #define OPT_DMA_TYPE "dma-type" 498965b06f0SIbtisam Tariq OPT_DMA_TYPE_NUM, 499965b06f0SIbtisam Tariq #define OPT_DMAS "dmas" 500965b06f0SIbtisam Tariq OPT_DMAS_NUM, 501965b06f0SIbtisam Tariq }; 502965b06f0SIbtisam Tariq 503d19533e8SHuawei Xie /* 504d19533e8SHuawei Xie * Parse the arguments given in the command line of the application. 505d19533e8SHuawei Xie */ 506d19533e8SHuawei Xie static int 507d19533e8SHuawei Xie us_vhost_parse_args(int argc, char **argv) 508d19533e8SHuawei Xie { 509d19533e8SHuawei Xie int opt, ret; 510d19533e8SHuawei Xie int option_index; 511d19533e8SHuawei Xie unsigned i; 512d19533e8SHuawei Xie const char *prgname = argv[0]; 513d19533e8SHuawei Xie static struct option long_option[] = { 514965b06f0SIbtisam Tariq {OPT_VM2VM, required_argument, 515965b06f0SIbtisam Tariq NULL, OPT_VM2VM_NUM}, 516965b06f0SIbtisam Tariq {OPT_RX_RETRY, required_argument, 517965b06f0SIbtisam Tariq NULL, OPT_RX_RETRY_NUM}, 518965b06f0SIbtisam Tariq {OPT_RX_RETRY_DELAY, required_argument, 519965b06f0SIbtisam Tariq NULL, OPT_RX_RETRY_DELAY_NUM}, 520965b06f0SIbtisam Tariq {OPT_RX_RETRY_NUMB, required_argument, 521965b06f0SIbtisam Tariq NULL, OPT_RX_RETRY_NUMB_NUM}, 522965b06f0SIbtisam Tariq {OPT_MERGEABLE, required_argument, 523965b06f0SIbtisam Tariq NULL, OPT_MERGEABLE_NUM}, 524965b06f0SIbtisam Tariq {OPT_STATS, required_argument, 525965b06f0SIbtisam Tariq NULL, OPT_STATS_NUM}, 526965b06f0SIbtisam Tariq {OPT_SOCKET_FILE, required_argument, 527965b06f0SIbtisam Tariq NULL, OPT_SOCKET_FILE_NUM}, 528965b06f0SIbtisam Tariq {OPT_TX_CSUM, required_argument, 529965b06f0SIbtisam Tariq NULL, OPT_TX_CSUM_NUM}, 530965b06f0SIbtisam Tariq {OPT_TSO, required_argument, 531965b06f0SIbtisam Tariq NULL, OPT_TSO_NUM}, 532965b06f0SIbtisam Tariq {OPT_CLIENT, no_argument, 533965b06f0SIbtisam Tariq NULL, OPT_CLIENT_NUM}, 534965b06f0SIbtisam Tariq {OPT_BUILTIN_NET_DRIVER, no_argument, 535965b06f0SIbtisam Tariq NULL, OPT_BUILTIN_NET_DRIVER_NUM}, 536965b06f0SIbtisam Tariq {OPT_DMA_TYPE, required_argument, 537965b06f0SIbtisam Tariq NULL, OPT_DMA_TYPE_NUM}, 538965b06f0SIbtisam Tariq {OPT_DMAS, required_argument, 539965b06f0SIbtisam Tariq NULL, OPT_DMAS_NUM}, 540d19533e8SHuawei Xie {NULL, 0, 0, 0}, 541d19533e8SHuawei Xie }; 542d19533e8SHuawei Xie 543d19533e8SHuawei Xie /* Parse command line */ 54490924cafSOuyang Changchun while ((opt = getopt_long(argc, argv, "p:P", 54590924cafSOuyang Changchun long_option, &option_index)) != EOF) { 546d19533e8SHuawei Xie switch (opt) { 547d19533e8SHuawei Xie /* Portmask */ 548d19533e8SHuawei Xie case 'p': 549d19533e8SHuawei Xie enabled_port_mask = parse_portmask(optarg); 550d19533e8SHuawei Xie if (enabled_port_mask == 0) { 551d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n"); 552d19533e8SHuawei Xie us_vhost_usage(prgname); 553d19533e8SHuawei Xie return -1; 554d19533e8SHuawei Xie } 555d19533e8SHuawei Xie break; 556d19533e8SHuawei Xie 55790924cafSOuyang Changchun case 'P': 55890924cafSOuyang Changchun promiscuous = 1; 55990924cafSOuyang Changchun vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode = 560295968d1SFerruh Yigit RTE_ETH_VMDQ_ACCEPT_BROADCAST | 561295968d1SFerruh Yigit RTE_ETH_VMDQ_ACCEPT_MULTICAST; 56290924cafSOuyang Changchun break; 56390924cafSOuyang Changchun 564965b06f0SIbtisam Tariq case OPT_VM2VM_NUM: 565d19533e8SHuawei Xie ret = parse_num_opt(optarg, (VM2VM_LAST - 1)); 566d19533e8SHuawei Xie if (ret == -1) { 567d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, 568d19533e8SHuawei Xie "Invalid argument for " 569d19533e8SHuawei Xie "vm2vm [0|1|2]\n"); 570d19533e8SHuawei Xie us_vhost_usage(prgname); 571d19533e8SHuawei Xie return -1; 572965b06f0SIbtisam Tariq } 573d19533e8SHuawei Xie vm2vm_mode = (vm2vm_type)ret; 574965b06f0SIbtisam Tariq break; 575d19533e8SHuawei Xie 576965b06f0SIbtisam Tariq case OPT_RX_RETRY_NUM: 577d19533e8SHuawei Xie ret = parse_num_opt(optarg, 1); 578d19533e8SHuawei Xie if (ret == -1) { 579d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n"); 580d19533e8SHuawei Xie us_vhost_usage(prgname); 581d19533e8SHuawei Xie return -1; 582965b06f0SIbtisam Tariq } 583d19533e8SHuawei Xie enable_retry = ret; 584965b06f0SIbtisam Tariq break; 585d19533e8SHuawei Xie 586965b06f0SIbtisam Tariq case OPT_TX_CSUM_NUM: 5879fd72e3cSJijiang Liu ret = parse_num_opt(optarg, 1); 5889fd72e3cSJijiang Liu if (ret == -1) { 5899fd72e3cSJijiang Liu RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n"); 5909fd72e3cSJijiang Liu us_vhost_usage(prgname); 5919fd72e3cSJijiang Liu return -1; 5929fd72e3cSJijiang Liu } 593965b06f0SIbtisam Tariq enable_tx_csum = ret; 594965b06f0SIbtisam Tariq break; 5959fd72e3cSJijiang Liu 596965b06f0SIbtisam Tariq case OPT_TSO_NUM: 5979fd72e3cSJijiang Liu ret = parse_num_opt(optarg, 1); 5989fd72e3cSJijiang Liu if (ret == -1) { 5999fd72e3cSJijiang Liu RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n"); 6009fd72e3cSJijiang Liu us_vhost_usage(prgname); 6019fd72e3cSJijiang Liu return -1; 6029fd72e3cSJijiang Liu } 603965b06f0SIbtisam Tariq enable_tso = ret; 604965b06f0SIbtisam Tariq break; 6059fd72e3cSJijiang Liu 606965b06f0SIbtisam Tariq case OPT_RX_RETRY_DELAY_NUM: 607d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 608d19533e8SHuawei Xie if (ret == -1) { 609d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n"); 610d19533e8SHuawei Xie us_vhost_usage(prgname); 611d19533e8SHuawei Xie return -1; 612965b06f0SIbtisam Tariq } 613d19533e8SHuawei Xie burst_rx_delay_time = ret; 614965b06f0SIbtisam Tariq break; 615d19533e8SHuawei Xie 616965b06f0SIbtisam Tariq case OPT_RX_RETRY_NUMB_NUM: 617d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 618d19533e8SHuawei Xie if (ret == -1) { 619d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n"); 620d19533e8SHuawei Xie us_vhost_usage(prgname); 621d19533e8SHuawei Xie return -1; 622965b06f0SIbtisam Tariq } 623d19533e8SHuawei Xie burst_rx_retry_num = ret; 624965b06f0SIbtisam Tariq break; 625d19533e8SHuawei Xie 626965b06f0SIbtisam Tariq case OPT_MERGEABLE_NUM: 627d19533e8SHuawei Xie ret = parse_num_opt(optarg, 1); 628d19533e8SHuawei Xie if (ret == -1) { 629d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n"); 630d19533e8SHuawei Xie us_vhost_usage(prgname); 631d19533e8SHuawei Xie return -1; 632965b06f0SIbtisam Tariq } 63328deb020SHuawei Xie mergeable = !!ret; 634b563c142SFerruh Yigit if (ret) 6351bb4a528SFerruh Yigit vmdq_conf_default.rxmode.mtu = MAX_MTU; 636965b06f0SIbtisam Tariq break; 637d19533e8SHuawei Xie 638965b06f0SIbtisam Tariq case OPT_STATS_NUM: 639d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 640d19533e8SHuawei Xie if (ret == -1) { 641bde19a4dSJiayu Hu RTE_LOG(INFO, VHOST_CONFIG, 642bde19a4dSJiayu Hu "Invalid argument for stats [0..N]\n"); 643d19533e8SHuawei Xie us_vhost_usage(prgname); 644d19533e8SHuawei Xie return -1; 645965b06f0SIbtisam Tariq } 646d19533e8SHuawei Xie enable_stats = ret; 647965b06f0SIbtisam Tariq break; 648d19533e8SHuawei Xie 649bde19a4dSJiayu Hu /* Set socket file path. */ 650965b06f0SIbtisam Tariq case OPT_SOCKET_FILE_NUM: 651bde19a4dSJiayu Hu if (us_vhost_parse_socket_path(optarg) == -1) { 652bde19a4dSJiayu Hu RTE_LOG(INFO, VHOST_CONFIG, 653bde19a4dSJiayu Hu "Invalid argument for socket name (Max %d characters)\n", 654bde19a4dSJiayu Hu PATH_MAX); 655d19533e8SHuawei Xie us_vhost_usage(prgname); 656d19533e8SHuawei Xie return -1; 657d19533e8SHuawei Xie } 658965b06f0SIbtisam Tariq break; 659d19533e8SHuawei Xie 660965b06f0SIbtisam Tariq case OPT_DMA_TYPE_NUM: 66147afdbbeSCheng Jiang dma_type = optarg; 662965b06f0SIbtisam Tariq break; 6633a04ecb2SCheng Jiang 664965b06f0SIbtisam Tariq case OPT_DMAS_NUM: 6653a04ecb2SCheng Jiang if (open_dma(optarg) == -1) { 6663a04ecb2SCheng Jiang RTE_LOG(INFO, VHOST_CONFIG, 6673a04ecb2SCheng Jiang "Wrong DMA args\n"); 6683a04ecb2SCheng Jiang us_vhost_usage(prgname); 6693a04ecb2SCheng Jiang return -1; 6703a04ecb2SCheng Jiang } 6713a04ecb2SCheng Jiang async_vhost_driver = 1; 672965b06f0SIbtisam Tariq break; 6733a04ecb2SCheng Jiang 674965b06f0SIbtisam Tariq case OPT_CLIENT_NUM: 675965b06f0SIbtisam Tariq client_mode = 1; 676965b06f0SIbtisam Tariq break; 677965b06f0SIbtisam Tariq 678965b06f0SIbtisam Tariq case OPT_BUILTIN_NET_DRIVER_NUM: 679965b06f0SIbtisam Tariq builtin_net_driver = 1; 680d19533e8SHuawei Xie break; 681d19533e8SHuawei Xie 682d19533e8SHuawei Xie /* Invalid option - print options. */ 683d19533e8SHuawei Xie default: 684d19533e8SHuawei Xie us_vhost_usage(prgname); 685d19533e8SHuawei Xie return -1; 686d19533e8SHuawei Xie } 687d19533e8SHuawei Xie } 688d19533e8SHuawei Xie 689d19533e8SHuawei Xie for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 690d19533e8SHuawei Xie if (enabled_port_mask & (1 << i)) 691f8244c63SZhiyong Yang ports[num_ports++] = i; 692d19533e8SHuawei Xie } 693d19533e8SHuawei Xie 694d19533e8SHuawei Xie if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) { 695d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," 696d19533e8SHuawei Xie "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); 697d19533e8SHuawei Xie return -1; 698d19533e8SHuawei Xie } 699d19533e8SHuawei Xie 700d19533e8SHuawei Xie return 0; 701d19533e8SHuawei Xie } 702d19533e8SHuawei Xie 703d19533e8SHuawei Xie /* 704d19533e8SHuawei Xie * Update the global var NUM_PORTS and array PORTS according to system ports number 705d19533e8SHuawei Xie * and return valid ports number 706d19533e8SHuawei Xie */ 707d19533e8SHuawei Xie static unsigned check_ports_num(unsigned nb_ports) 708d19533e8SHuawei Xie { 709d19533e8SHuawei Xie unsigned valid_num_ports = num_ports; 710d19533e8SHuawei Xie unsigned portid; 711d19533e8SHuawei Xie 712d19533e8SHuawei Xie if (num_ports > nb_ports) { 713d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n", 714d19533e8SHuawei Xie num_ports, nb_ports); 715d19533e8SHuawei Xie num_ports = nb_ports; 716d19533e8SHuawei Xie } 717d19533e8SHuawei Xie 718d19533e8SHuawei Xie for (portid = 0; portid < num_ports; portid ++) { 719a9dbe180SThomas Monjalon if (!rte_eth_dev_is_valid_port(ports[portid])) { 720a9dbe180SThomas Monjalon RTE_LOG(INFO, VHOST_PORT, 721a9dbe180SThomas Monjalon "\nSpecified port ID(%u) is not valid\n", 722a9dbe180SThomas Monjalon ports[portid]); 723d19533e8SHuawei Xie ports[portid] = INVALID_PORT_ID; 724d19533e8SHuawei Xie valid_num_ports--; 725d19533e8SHuawei Xie } 726d19533e8SHuawei Xie } 727d19533e8SHuawei Xie return valid_num_ports; 728d19533e8SHuawei Xie } 729d19533e8SHuawei Xie 730c0583d98SJerin Jacob static __rte_always_inline struct vhost_dev * 7316d13ea8eSOlivier Matz find_vhost_dev(struct rte_ether_addr *mac) 73245657a5cSYuanhan Liu { 73345657a5cSYuanhan Liu struct vhost_dev *vdev; 73445657a5cSYuanhan Liu 73597daf19eSYuanhan Liu TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 73645657a5cSYuanhan Liu if (vdev->ready == DEVICE_RX && 737538da7a1SOlivier Matz rte_is_same_ether_addr(mac, &vdev->mac_address)) 73845657a5cSYuanhan Liu return vdev; 73945657a5cSYuanhan Liu } 74045657a5cSYuanhan Liu 74145657a5cSYuanhan Liu return NULL; 74245657a5cSYuanhan Liu } 74345657a5cSYuanhan Liu 744d19533e8SHuawei Xie /* 745d19533e8SHuawei Xie * This function learns the MAC address of the device and registers this along with a 746d19533e8SHuawei Xie * vlan tag to a VMDQ. 747d19533e8SHuawei Xie */ 748d19533e8SHuawei Xie static int 749e571e6b4SHuawei Xie link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m) 750d19533e8SHuawei Xie { 7516d13ea8eSOlivier Matz struct rte_ether_hdr *pkt_hdr; 752d19533e8SHuawei Xie int i, ret; 753d19533e8SHuawei Xie 754d19533e8SHuawei Xie /* Learn MAC address of guest device from packet */ 7556d13ea8eSOlivier Matz pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 756d19533e8SHuawei Xie 75704d43857SDmitry Kozlyuk if (find_vhost_dev(&pkt_hdr->src_addr)) { 75845657a5cSYuanhan Liu RTE_LOG(ERR, VHOST_DATA, 759c08a3490SYuanhan Liu "(%d) device is using a registered MAC!\n", 760e2a1dd12SYuanhan Liu vdev->vid); 761d19533e8SHuawei Xie return -1; 762d19533e8SHuawei Xie } 763d19533e8SHuawei Xie 76435b2d13fSOlivier Matz for (i = 0; i < RTE_ETHER_ADDR_LEN; i++) 76504d43857SDmitry Kozlyuk vdev->mac_address.addr_bytes[i] = 76604d43857SDmitry Kozlyuk pkt_hdr->src_addr.addr_bytes[i]; 767d19533e8SHuawei Xie 768d19533e8SHuawei Xie /* vlan_tag currently uses the device_id. */ 769e2a1dd12SYuanhan Liu vdev->vlan_tag = vlan_tags[vdev->vid]; 770d19533e8SHuawei Xie 771d19533e8SHuawei Xie /* Print out VMDQ registration info. */ 772c08a3490SYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 773c2c4f87bSAman Deep Singh "(%d) mac " RTE_ETHER_ADDR_PRT_FMT " and vlan %d registered\n", 774a7db3afcSAman Deep Singh vdev->vid, RTE_ETHER_ADDR_BYTES(&vdev->mac_address), 775e571e6b4SHuawei Xie vdev->vlan_tag); 776d19533e8SHuawei Xie 777d19533e8SHuawei Xie /* Register the MAC address. */ 77884b02d16SHuawei Xie ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address, 779e2a1dd12SYuanhan Liu (uint32_t)vdev->vid + vmdq_pool_base); 780d19533e8SHuawei Xie if (ret) 781c08a3490SYuanhan Liu RTE_LOG(ERR, VHOST_DATA, 782c08a3490SYuanhan Liu "(%d) failed to add device MAC address to VMDQ\n", 783e2a1dd12SYuanhan Liu vdev->vid); 784d19533e8SHuawei Xie 78565453928SJianfeng Tan rte_eth_dev_set_vlan_strip_on_queue(ports[0], vdev->vmdq_rx_q, 1); 786d19533e8SHuawei Xie 787d19533e8SHuawei Xie /* Set device as ready for RX. */ 788e571e6b4SHuawei Xie vdev->ready = DEVICE_RX; 789d19533e8SHuawei Xie 790d19533e8SHuawei Xie return 0; 791d19533e8SHuawei Xie } 792d19533e8SHuawei Xie 793d19533e8SHuawei Xie /* 794d19533e8SHuawei Xie * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX 795d19533e8SHuawei Xie * queue before disabling RX on the device. 796d19533e8SHuawei Xie */ 797d19533e8SHuawei Xie static inline void 798e571e6b4SHuawei Xie unlink_vmdq(struct vhost_dev *vdev) 799d19533e8SHuawei Xie { 800d19533e8SHuawei Xie unsigned i = 0; 801d19533e8SHuawei Xie unsigned rx_count; 802d19533e8SHuawei Xie struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 803d19533e8SHuawei Xie 804e571e6b4SHuawei Xie if (vdev->ready == DEVICE_RX) { 805d19533e8SHuawei Xie /*clear MAC and VLAN settings*/ 806e571e6b4SHuawei Xie rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address); 807d19533e8SHuawei Xie for (i = 0; i < 6; i++) 808e571e6b4SHuawei Xie vdev->mac_address.addr_bytes[i] = 0; 809d19533e8SHuawei Xie 810e571e6b4SHuawei Xie vdev->vlan_tag = 0; 811d19533e8SHuawei Xie 812d19533e8SHuawei Xie /*Clear out the receive buffers*/ 813d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 814e571e6b4SHuawei Xie (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); 815d19533e8SHuawei Xie 816d19533e8SHuawei Xie while (rx_count) { 817d19533e8SHuawei Xie for (i = 0; i < rx_count; i++) 818d19533e8SHuawei Xie rte_pktmbuf_free(pkts_burst[i]); 819d19533e8SHuawei Xie 820d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 821e571e6b4SHuawei Xie (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); 822d19533e8SHuawei Xie } 823d19533e8SHuawei Xie 824e571e6b4SHuawei Xie vdev->ready = DEVICE_MAC_LEARNING; 825d19533e8SHuawei Xie } 826d19533e8SHuawei Xie } 827d19533e8SHuawei Xie 828a68ba8e0SCheng Jiang static inline void 829a68ba8e0SCheng Jiang free_pkts(struct rte_mbuf **pkts, uint16_t n) 830a68ba8e0SCheng Jiang { 831a68ba8e0SCheng Jiang while (n--) 832a68ba8e0SCheng Jiang rte_pktmbuf_free(pkts[n]); 833a68ba8e0SCheng Jiang } 834a68ba8e0SCheng Jiang 835c0583d98SJerin Jacob static __rte_always_inline void 836a68ba8e0SCheng Jiang complete_async_pkts(struct vhost_dev *vdev) 837a68ba8e0SCheng Jiang { 838a68ba8e0SCheng Jiang struct rte_mbuf *p_cpl[MAX_PKT_BURST]; 839a68ba8e0SCheng Jiang uint16_t complete_count; 840a68ba8e0SCheng Jiang 841a68ba8e0SCheng Jiang complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, 842a68ba8e0SCheng Jiang VIRTIO_RXQ, p_cpl, MAX_PKT_BURST); 843b9f23beeSCheng Jiang if (complete_count) { 844a68ba8e0SCheng Jiang free_pkts(p_cpl, complete_count); 845b9f23beeSCheng Jiang __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST); 846b9f23beeSCheng Jiang } 847b9f23beeSCheng Jiang 848a68ba8e0SCheng Jiang } 849a68ba8e0SCheng Jiang 850a68ba8e0SCheng Jiang static __rte_always_inline void 851a68ba8e0SCheng Jiang sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, 8529c5ef512SYuanhan Liu struct rte_mbuf *m) 8539c5ef512SYuanhan Liu { 8549c5ef512SYuanhan Liu uint16_t ret; 8559c5ef512SYuanhan Liu 856ca059fa5SYuanhan Liu if (builtin_net_driver) { 857ca059fa5SYuanhan Liu ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); 858ca059fa5SYuanhan Liu } else { 8594ecf22e3SYuanhan Liu ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1); 860ca059fa5SYuanhan Liu } 861ca059fa5SYuanhan Liu 8629c5ef512SYuanhan Liu if (enable_stats) { 863a68ba8e0SCheng Jiang __atomic_add_fetch(&dst_vdev->stats.rx_total_atomic, 1, 864a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 865a68ba8e0SCheng Jiang __atomic_add_fetch(&dst_vdev->stats.rx_atomic, ret, 866a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 86756fe86f8SYuanhan Liu src_vdev->stats.tx_total++; 86856fe86f8SYuanhan Liu src_vdev->stats.tx += ret; 8699c5ef512SYuanhan Liu } 8709c5ef512SYuanhan Liu } 8719c5ef512SYuanhan Liu 872a68ba8e0SCheng Jiang static __rte_always_inline void 873a68ba8e0SCheng Jiang drain_vhost(struct vhost_dev *vdev) 874a68ba8e0SCheng Jiang { 875a68ba8e0SCheng Jiang uint16_t ret; 876ee6e451fSCheng Jiang uint32_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid; 877a68ba8e0SCheng Jiang uint16_t nr_xmit = vhost_txbuff[buff_idx]->len; 878a68ba8e0SCheng Jiang struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table; 879a68ba8e0SCheng Jiang 880a68ba8e0SCheng Jiang if (builtin_net_driver) { 881a68ba8e0SCheng Jiang ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit); 882a68ba8e0SCheng Jiang } else if (async_vhost_driver) { 883a68ba8e0SCheng Jiang uint16_t enqueue_fail = 0; 884a68ba8e0SCheng Jiang 885a68ba8e0SCheng Jiang complete_async_pkts(vdev); 886abeb8652SJiayu Hu ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, nr_xmit); 887abeb8652SJiayu Hu __atomic_add_fetch(&vdev->pkts_inflight, ret, __ATOMIC_SEQ_CST); 888a68ba8e0SCheng Jiang 889a68ba8e0SCheng Jiang enqueue_fail = nr_xmit - ret; 890a68ba8e0SCheng Jiang if (enqueue_fail) 891a68ba8e0SCheng Jiang free_pkts(&m[ret], nr_xmit - ret); 892a68ba8e0SCheng Jiang } else { 893a68ba8e0SCheng Jiang ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, 894a68ba8e0SCheng Jiang m, nr_xmit); 895a68ba8e0SCheng Jiang } 896a68ba8e0SCheng Jiang 897a68ba8e0SCheng Jiang if (enable_stats) { 898a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit, 899a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 900a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_atomic, ret, 901a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 902a68ba8e0SCheng Jiang } 903a68ba8e0SCheng Jiang 904a68ba8e0SCheng Jiang if (!async_vhost_driver) 905a68ba8e0SCheng Jiang free_pkts(m, nr_xmit); 906a68ba8e0SCheng Jiang } 907a68ba8e0SCheng Jiang 908a68ba8e0SCheng Jiang static __rte_always_inline void 909a68ba8e0SCheng Jiang drain_vhost_table(void) 910a68ba8e0SCheng Jiang { 911a68ba8e0SCheng Jiang uint16_t lcore_id = rte_lcore_id(); 912a68ba8e0SCheng Jiang struct vhost_bufftable *vhost_txq; 913a68ba8e0SCheng Jiang struct vhost_dev *vdev; 914a68ba8e0SCheng Jiang uint64_t cur_tsc; 915a68ba8e0SCheng Jiang 916a68ba8e0SCheng Jiang TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 917ad5050e4SWenwu Ma if (unlikely(vdev->remove == 1)) 918ad5050e4SWenwu Ma continue; 919ad5050e4SWenwu Ma 920a68ba8e0SCheng Jiang vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE 921a68ba8e0SCheng Jiang + vdev->vid]; 922a68ba8e0SCheng Jiang 923a68ba8e0SCheng Jiang cur_tsc = rte_rdtsc(); 924a68ba8e0SCheng Jiang if (unlikely(cur_tsc - vhost_txq->pre_tsc 925a68ba8e0SCheng Jiang > MBUF_TABLE_DRAIN_TSC)) { 926a68ba8e0SCheng Jiang RTE_LOG_DP(DEBUG, VHOST_DATA, 927a68ba8e0SCheng Jiang "Vhost TX queue drained after timeout with burst size %u\n", 928a68ba8e0SCheng Jiang vhost_txq->len); 929a68ba8e0SCheng Jiang drain_vhost(vdev); 930a68ba8e0SCheng Jiang vhost_txq->len = 0; 931a68ba8e0SCheng Jiang vhost_txq->pre_tsc = cur_tsc; 932a68ba8e0SCheng Jiang } 933a68ba8e0SCheng Jiang } 934a68ba8e0SCheng Jiang } 935a68ba8e0SCheng Jiang 936d19533e8SHuawei Xie /* 937d19533e8SHuawei Xie * Check if the packet destination MAC address is for a local device. If so then put 938d19533e8SHuawei Xie * the packet on that devices RX queue. If not then return. 939d19533e8SHuawei Xie */ 940c0583d98SJerin Jacob static __rte_always_inline int 941e571e6b4SHuawei Xie virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m) 942d19533e8SHuawei Xie { 9436d13ea8eSOlivier Matz struct rte_ether_hdr *pkt_hdr; 94445657a5cSYuanhan Liu struct vhost_dev *dst_vdev; 945a68ba8e0SCheng Jiang struct vhost_bufftable *vhost_txq; 946a68ba8e0SCheng Jiang uint16_t lcore_id = rte_lcore_id(); 9476d13ea8eSOlivier Matz pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 948d19533e8SHuawei Xie 94904d43857SDmitry Kozlyuk dst_vdev = find_vhost_dev(&pkt_hdr->dst_addr); 95045657a5cSYuanhan Liu if (!dst_vdev) 951d19533e8SHuawei Xie return -1; 95245657a5cSYuanhan Liu 953e2a1dd12SYuanhan Liu if (vdev->vid == dst_vdev->vid) { 9545d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 955c08a3490SYuanhan Liu "(%d) TX: src and dst MAC is same. Dropping packet.\n", 956e2a1dd12SYuanhan Liu vdev->vid); 95745657a5cSYuanhan Liu return 0; 95845657a5cSYuanhan Liu } 95945657a5cSYuanhan Liu 9605d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 961e2a1dd12SYuanhan Liu "(%d) TX: MAC address is local\n", dst_vdev->vid); 96245657a5cSYuanhan Liu 96345657a5cSYuanhan Liu if (unlikely(dst_vdev->remove)) { 9645d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 965e2a1dd12SYuanhan Liu "(%d) device is marked for removal\n", dst_vdev->vid); 96645657a5cSYuanhan Liu return 0; 96745657a5cSYuanhan Liu } 96845657a5cSYuanhan Liu 969a68ba8e0SCheng Jiang vhost_txq = vhost_txbuff[lcore_id * MAX_VHOST_DEVICE + dst_vdev->vid]; 970a68ba8e0SCheng Jiang vhost_txq->m_table[vhost_txq->len++] = m; 971a68ba8e0SCheng Jiang 972a68ba8e0SCheng Jiang if (enable_stats) { 973a68ba8e0SCheng Jiang vdev->stats.tx_total++; 974a68ba8e0SCheng Jiang vdev->stats.tx++; 975a68ba8e0SCheng Jiang } 976a68ba8e0SCheng Jiang 977a68ba8e0SCheng Jiang if (unlikely(vhost_txq->len == MAX_PKT_BURST)) { 978a68ba8e0SCheng Jiang drain_vhost(dst_vdev); 979a68ba8e0SCheng Jiang vhost_txq->len = 0; 980a68ba8e0SCheng Jiang vhost_txq->pre_tsc = rte_rdtsc(); 981a68ba8e0SCheng Jiang } 98245657a5cSYuanhan Liu return 0; 983d19533e8SHuawei Xie } 984d19533e8SHuawei Xie 985d19533e8SHuawei Xie /* 98672ec8d77SOuyang Changchun * Check if the destination MAC of a packet is one local VM, 98772ec8d77SOuyang Changchun * and get its vlan tag, and offset if it is. 988d19533e8SHuawei Xie */ 989c0583d98SJerin Jacob static __rte_always_inline int 9907f262239SYuanhan Liu find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m, 99172ec8d77SOuyang Changchun uint32_t *offset, uint16_t *vlan_tag) 992d19533e8SHuawei Xie { 99345657a5cSYuanhan Liu struct vhost_dev *dst_vdev; 9946d13ea8eSOlivier Matz struct rte_ether_hdr *pkt_hdr = 9956d13ea8eSOlivier Matz rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 996d19533e8SHuawei Xie 99704d43857SDmitry Kozlyuk dst_vdev = find_vhost_dev(&pkt_hdr->dst_addr); 99845657a5cSYuanhan Liu if (!dst_vdev) 99945657a5cSYuanhan Liu return 0; 100045657a5cSYuanhan Liu 1001e2a1dd12SYuanhan Liu if (vdev->vid == dst_vdev->vid) { 10025d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1003c08a3490SYuanhan Liu "(%d) TX: src and dst MAC is same. Dropping packet.\n", 1004e2a1dd12SYuanhan Liu vdev->vid); 100572ec8d77SOuyang Changchun return -1; 1006d19533e8SHuawei Xie } 1007e44fb8a4SOuyang Changchun 1008e44fb8a4SOuyang Changchun /* 1009e44fb8a4SOuyang Changchun * HW vlan strip will reduce the packet length 1010e44fb8a4SOuyang Changchun * by minus length of vlan tag, so need restore 1011e44fb8a4SOuyang Changchun * the packet length by plus it. 1012e44fb8a4SOuyang Changchun */ 101372ec8d77SOuyang Changchun *offset = VLAN_HLEN; 1014e2a1dd12SYuanhan Liu *vlan_tag = vlan_tags[vdev->vid]; 1015d19533e8SHuawei Xie 10165d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 10177f262239SYuanhan Liu "(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n", 1018e2a1dd12SYuanhan Liu vdev->vid, dst_vdev->vid, *vlan_tag); 1019d19533e8SHuawei Xie 102072ec8d77SOuyang Changchun return 0; 102172ec8d77SOuyang Changchun } 102272ec8d77SOuyang Changchun 10239fd72e3cSJijiang Liu static void virtio_tx_offload(struct rte_mbuf *m) 10249fd72e3cSJijiang Liu { 1025ca7036b4SDavid Marchand struct rte_net_hdr_lens hdr_lens; 1026ca7036b4SDavid Marchand struct rte_ipv4_hdr *ipv4_hdr; 1027ca7036b4SDavid Marchand struct rte_tcp_hdr *tcp_hdr; 1028ca7036b4SDavid Marchand uint32_t ptype; 10299fd72e3cSJijiang Liu void *l3_hdr; 10309fd72e3cSJijiang Liu 1031ca7036b4SDavid Marchand ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 1032ca7036b4SDavid Marchand m->l2_len = hdr_lens.l2_len; 1033ca7036b4SDavid Marchand m->l3_len = hdr_lens.l3_len; 1034ca7036b4SDavid Marchand m->l4_len = hdr_lens.l4_len; 10359fd72e3cSJijiang Liu 1036ca7036b4SDavid Marchand l3_hdr = rte_pktmbuf_mtod_offset(m, void *, m->l2_len); 1037ca7036b4SDavid Marchand tcp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *, 1038ca7036b4SDavid Marchand m->l2_len + m->l3_len); 1039ca7036b4SDavid Marchand 1040*daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG; 1041ca7036b4SDavid Marchand if ((ptype & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) { 1042*daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_IPV4; 1043*daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; 1044df40169aSYuanhan Liu ipv4_hdr = l3_hdr; 10459fd72e3cSJijiang Liu ipv4_hdr->hdr_checksum = 0; 1046ca7036b4SDavid Marchand tcp_hdr->cksum = rte_ipv4_phdr_cksum(l3_hdr, m->ol_flags); 1047ca7036b4SDavid Marchand } else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */ 1048*daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_IPV6; 1049ca7036b4SDavid Marchand tcp_hdr->cksum = rte_ipv6_phdr_cksum(l3_hdr, m->ol_flags); 1050df40169aSYuanhan Liu } 10519fd72e3cSJijiang Liu } 10529fd72e3cSJijiang Liu 1053c0583d98SJerin Jacob static __rte_always_inline void 1054273ecdbcSYuanhan Liu do_drain_mbuf_table(struct mbuf_table *tx_q) 1055273ecdbcSYuanhan Liu { 1056273ecdbcSYuanhan Liu uint16_t count; 1057273ecdbcSYuanhan Liu 1058273ecdbcSYuanhan Liu count = rte_eth_tx_burst(ports[0], tx_q->txq_id, 1059273ecdbcSYuanhan Liu tx_q->m_table, tx_q->len); 1060273ecdbcSYuanhan Liu if (unlikely(count < tx_q->len)) 1061273ecdbcSYuanhan Liu free_pkts(&tx_q->m_table[count], tx_q->len - count); 1062273ecdbcSYuanhan Liu 1063273ecdbcSYuanhan Liu tx_q->len = 0; 1064273ecdbcSYuanhan Liu } 1065273ecdbcSYuanhan Liu 106672ec8d77SOuyang Changchun /* 1067273ecdbcSYuanhan Liu * This function routes the TX packet to the correct interface. This 1068273ecdbcSYuanhan Liu * may be a local device or the physical port. 106972ec8d77SOuyang Changchun */ 1070c0583d98SJerin Jacob static __rte_always_inline void 107172ec8d77SOuyang Changchun virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) 107272ec8d77SOuyang Changchun { 107372ec8d77SOuyang Changchun struct mbuf_table *tx_q; 1074273ecdbcSYuanhan Liu unsigned offset = 0; 107572ec8d77SOuyang Changchun const uint16_t lcore_id = rte_lcore_id(); 10766d13ea8eSOlivier Matz struct rte_ether_hdr *nh; 107772ec8d77SOuyang Changchun 10789c5ef512SYuanhan Liu 10796d13ea8eSOlivier Matz nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 108004d43857SDmitry Kozlyuk if (unlikely(rte_is_broadcast_ether_addr(&nh->dst_addr))) { 10819c5ef512SYuanhan Liu struct vhost_dev *vdev2; 10829c5ef512SYuanhan Liu 108397daf19eSYuanhan Liu TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) { 1084a3fdb532SJunjie Chen if (vdev2 != vdev) 1085a68ba8e0SCheng Jiang sync_virtio_xmit(vdev2, vdev, m); 10869c5ef512SYuanhan Liu } 10879c5ef512SYuanhan Liu goto queue2nic; 10889c5ef512SYuanhan Liu } 10899c5ef512SYuanhan Liu 109072ec8d77SOuyang Changchun /*check if destination is local VM*/ 1091a68ba8e0SCheng Jiang if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) 109272ec8d77SOuyang Changchun return; 109372ec8d77SOuyang Changchun 1094c2ab5162SOuyang Changchun if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { 10957f262239SYuanhan Liu if (unlikely(find_local_dest(vdev, m, &offset, 10967f262239SYuanhan Liu &vlan_tag) != 0)) { 109772ec8d77SOuyang Changchun rte_pktmbuf_free(m); 109872ec8d77SOuyang Changchun return; 109972ec8d77SOuyang Changchun } 1100d19533e8SHuawei Xie } 1101d19533e8SHuawei Xie 11025d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1103e2a1dd12SYuanhan Liu "(%d) TX: MAC address is external\n", vdev->vid); 1104d19533e8SHuawei Xie 11059c5ef512SYuanhan Liu queue2nic: 11069c5ef512SYuanhan Liu 1107d19533e8SHuawei Xie /*Add packet to the port tx queue*/ 1108d19533e8SHuawei Xie tx_q = &lcore_tx_queue[lcore_id]; 1109d19533e8SHuawei Xie 11106d13ea8eSOlivier Matz nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 111135b2d13fSOlivier Matz if (unlikely(nh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))) { 11128b9bb988SOuyang Changchun /* Guest has inserted the vlan tag. */ 11136d13ea8eSOlivier Matz struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (nh + 1); 11148b9bb988SOuyang Changchun uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag); 11158b9bb988SOuyang Changchun if ((vm2vm_mode == VM2VM_HARDWARE) && 11168b9bb988SOuyang Changchun (vh->vlan_tci != vlan_tag_be)) 11178b9bb988SOuyang Changchun vh->vlan_tci = vlan_tag_be; 11188b9bb988SOuyang Changchun } else { 1119*daa02b5cSOlivier Matz m->ol_flags |= RTE_MBUF_F_TX_VLAN; 1120e44fb8a4SOuyang Changchun 1121c2ab5162SOuyang Changchun /* 1122c2ab5162SOuyang Changchun * Find the right seg to adjust the data len when offset is 1123c2ab5162SOuyang Changchun * bigger than tail room size. 1124c2ab5162SOuyang Changchun */ 1125c2ab5162SOuyang Changchun if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) { 1126c2ab5162SOuyang Changchun if (likely(offset <= rte_pktmbuf_tailroom(m))) 11274d50b6acSHuawei Xie m->data_len += offset; 1128c2ab5162SOuyang Changchun else { 1129c2ab5162SOuyang Changchun struct rte_mbuf *seg = m; 1130c2ab5162SOuyang Changchun 1131c2ab5162SOuyang Changchun while ((seg->next != NULL) && 1132c2ab5162SOuyang Changchun (offset > rte_pktmbuf_tailroom(seg))) 1133c2ab5162SOuyang Changchun seg = seg->next; 1134c2ab5162SOuyang Changchun 1135c2ab5162SOuyang Changchun seg->data_len += offset; 1136c2ab5162SOuyang Changchun } 1137e44fb8a4SOuyang Changchun m->pkt_len += offset; 1138c2ab5162SOuyang Changchun } 1139e44fb8a4SOuyang Changchun 11404d50b6acSHuawei Xie m->vlan_tci = vlan_tag; 11418b9bb988SOuyang Changchun } 1142d19533e8SHuawei Xie 1143*daa02b5cSOlivier Matz if (m->ol_flags & RTE_MBUF_F_RX_LRO) 11449fd72e3cSJijiang Liu virtio_tx_offload(m); 11459fd72e3cSJijiang Liu 1146273ecdbcSYuanhan Liu tx_q->m_table[tx_q->len++] = m; 1147d19533e8SHuawei Xie if (enable_stats) { 114856fe86f8SYuanhan Liu vdev->stats.tx_total++; 114956fe86f8SYuanhan Liu vdev->stats.tx++; 1150d19533e8SHuawei Xie } 1151d19533e8SHuawei Xie 1152273ecdbcSYuanhan Liu if (unlikely(tx_q->len == MAX_PKT_BURST)) 1153273ecdbcSYuanhan Liu do_drain_mbuf_table(tx_q); 1154d19533e8SHuawei Xie } 1155d19533e8SHuawei Xie 1156d19533e8SHuawei Xie 1157c0583d98SJerin Jacob static __rte_always_inline void 1158273ecdbcSYuanhan Liu drain_mbuf_table(struct mbuf_table *tx_q) 1159273ecdbcSYuanhan Liu { 1160273ecdbcSYuanhan Liu static uint64_t prev_tsc; 1161273ecdbcSYuanhan Liu uint64_t cur_tsc; 1162273ecdbcSYuanhan Liu 1163273ecdbcSYuanhan Liu if (tx_q->len == 0) 1164d19533e8SHuawei Xie return; 1165273ecdbcSYuanhan Liu 1166273ecdbcSYuanhan Liu cur_tsc = rte_rdtsc(); 1167273ecdbcSYuanhan Liu if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) { 1168273ecdbcSYuanhan Liu prev_tsc = cur_tsc; 1169273ecdbcSYuanhan Liu 11705d8f0bafSOlivier Matz RTE_LOG_DP(DEBUG, VHOST_DATA, 1171273ecdbcSYuanhan Liu "TX queue drained after timeout with burst size %u\n", 1172273ecdbcSYuanhan Liu tx_q->len); 1173273ecdbcSYuanhan Liu do_drain_mbuf_table(tx_q); 1174d19533e8SHuawei Xie } 1175273ecdbcSYuanhan Liu } 1176273ecdbcSYuanhan Liu 1177c0583d98SJerin Jacob static __rte_always_inline void 1178273ecdbcSYuanhan Liu drain_eth_rx(struct vhost_dev *vdev) 1179273ecdbcSYuanhan Liu { 1180273ecdbcSYuanhan Liu uint16_t rx_count, enqueue_count; 1181a68ba8e0SCheng Jiang struct rte_mbuf *pkts[MAX_PKT_BURST]; 1182273ecdbcSYuanhan Liu 1183273ecdbcSYuanhan Liu rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q, 1184273ecdbcSYuanhan Liu pkts, MAX_PKT_BURST); 1185abec60e7SCheng Jiang 1186273ecdbcSYuanhan Liu if (!rx_count) 1187273ecdbcSYuanhan Liu return; 1188273ecdbcSYuanhan Liu 1189d19533e8SHuawei Xie /* 1190273ecdbcSYuanhan Liu * When "enable_retry" is set, here we wait and retry when there 1191273ecdbcSYuanhan Liu * is no enough free slots in the queue to hold @rx_count packets, 1192273ecdbcSYuanhan Liu * to diminish packet loss. 1193273ecdbcSYuanhan Liu */ 1194273ecdbcSYuanhan Liu if (enable_retry && 11954ecf22e3SYuanhan Liu unlikely(rx_count > rte_vhost_avail_entries(vdev->vid, 1196273ecdbcSYuanhan Liu VIRTIO_RXQ))) { 1197273ecdbcSYuanhan Liu uint32_t retry; 1198273ecdbcSYuanhan Liu 1199273ecdbcSYuanhan Liu for (retry = 0; retry < burst_rx_retry_num; retry++) { 1200273ecdbcSYuanhan Liu rte_delay_us(burst_rx_delay_time); 12014ecf22e3SYuanhan Liu if (rx_count <= rte_vhost_avail_entries(vdev->vid, 1202273ecdbcSYuanhan Liu VIRTIO_RXQ)) 1203273ecdbcSYuanhan Liu break; 1204273ecdbcSYuanhan Liu } 1205273ecdbcSYuanhan Liu } 1206273ecdbcSYuanhan Liu 1207ca059fa5SYuanhan Liu if (builtin_net_driver) { 1208ca059fa5SYuanhan Liu enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ, 1209ca059fa5SYuanhan Liu pkts, rx_count); 1210abec60e7SCheng Jiang } else if (async_vhost_driver) { 1211a68ba8e0SCheng Jiang uint16_t enqueue_fail = 0; 1212a68ba8e0SCheng Jiang 1213a68ba8e0SCheng Jiang complete_async_pkts(vdev); 1214abec60e7SCheng Jiang enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid, 1215abeb8652SJiayu Hu VIRTIO_RXQ, pkts, rx_count); 1216abeb8652SJiayu Hu __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count, __ATOMIC_SEQ_CST); 1217a68ba8e0SCheng Jiang 1218a68ba8e0SCheng Jiang enqueue_fail = rx_count - enqueue_count; 1219a68ba8e0SCheng Jiang if (enqueue_fail) 1220a68ba8e0SCheng Jiang free_pkts(&pkts[enqueue_count], enqueue_fail); 1221a68ba8e0SCheng Jiang 1222ca059fa5SYuanhan Liu } else { 12234ecf22e3SYuanhan Liu enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, 1224273ecdbcSYuanhan Liu pkts, rx_count); 1225ca059fa5SYuanhan Liu } 1226abec60e7SCheng Jiang 1227273ecdbcSYuanhan Liu if (enable_stats) { 1228a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count, 1229a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1230a68ba8e0SCheng Jiang __atomic_add_fetch(&vdev->stats.rx_atomic, enqueue_count, 1231a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1232273ecdbcSYuanhan Liu } 1233273ecdbcSYuanhan Liu 1234abec60e7SCheng Jiang if (!async_vhost_driver) 1235273ecdbcSYuanhan Liu free_pkts(pkts, rx_count); 1236273ecdbcSYuanhan Liu } 1237273ecdbcSYuanhan Liu 1238c0583d98SJerin Jacob static __rte_always_inline void 1239273ecdbcSYuanhan Liu drain_virtio_tx(struct vhost_dev *vdev) 1240273ecdbcSYuanhan Liu { 1241273ecdbcSYuanhan Liu struct rte_mbuf *pkts[MAX_PKT_BURST]; 1242273ecdbcSYuanhan Liu uint16_t count; 1243273ecdbcSYuanhan Liu uint16_t i; 1244273ecdbcSYuanhan Liu 1245ca059fa5SYuanhan Liu if (builtin_net_driver) { 1246ca059fa5SYuanhan Liu count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool, 1247273ecdbcSYuanhan Liu pkts, MAX_PKT_BURST); 1248ca059fa5SYuanhan Liu } else { 1249ca059fa5SYuanhan Liu count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ, 1250ca059fa5SYuanhan Liu mbuf_pool, pkts, MAX_PKT_BURST); 1251ca059fa5SYuanhan Liu } 1252273ecdbcSYuanhan Liu 1253273ecdbcSYuanhan Liu /* setup VMDq for the first packet */ 1254273ecdbcSYuanhan Liu if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) { 1255273ecdbcSYuanhan Liu if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1) 1256273ecdbcSYuanhan Liu free_pkts(pkts, count); 1257273ecdbcSYuanhan Liu } 1258273ecdbcSYuanhan Liu 12597f262239SYuanhan Liu for (i = 0; i < count; ++i) 1260e2a1dd12SYuanhan Liu virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]); 1261273ecdbcSYuanhan Liu } 1262273ecdbcSYuanhan Liu 1263273ecdbcSYuanhan Liu /* 1264273ecdbcSYuanhan Liu * Main function of vhost-switch. It basically does: 1265273ecdbcSYuanhan Liu * 1266273ecdbcSYuanhan Liu * for each vhost device { 1267273ecdbcSYuanhan Liu * - drain_eth_rx() 1268273ecdbcSYuanhan Liu * 1269273ecdbcSYuanhan Liu * Which drains the host eth Rx queue linked to the vhost device, 1270273ecdbcSYuanhan Liu * and deliver all of them to guest virito Rx ring associated with 1271273ecdbcSYuanhan Liu * this vhost device. 1272273ecdbcSYuanhan Liu * 1273273ecdbcSYuanhan Liu * - drain_virtio_tx() 1274273ecdbcSYuanhan Liu * 1275273ecdbcSYuanhan Liu * Which drains the guest virtio Tx queue and deliver all of them 1276273ecdbcSYuanhan Liu * to the target, which could be another vhost device, or the 1277273ecdbcSYuanhan Liu * physical eth dev. The route is done in function "virtio_tx_route". 1278273ecdbcSYuanhan Liu * } 1279d19533e8SHuawei Xie */ 1280d19533e8SHuawei Xie static int 1281273ecdbcSYuanhan Liu switch_worker(void *arg __rte_unused) 1282d19533e8SHuawei Xie { 1283273ecdbcSYuanhan Liu unsigned i; 1284273ecdbcSYuanhan Liu unsigned lcore_id = rte_lcore_id(); 1285273ecdbcSYuanhan Liu struct vhost_dev *vdev; 1286d19533e8SHuawei Xie struct mbuf_table *tx_q; 1287d19533e8SHuawei Xie 1288d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); 1289d19533e8SHuawei Xie 1290d19533e8SHuawei Xie tx_q = &lcore_tx_queue[lcore_id]; 1291273ecdbcSYuanhan Liu for (i = 0; i < rte_lcore_count(); i++) { 1292d19533e8SHuawei Xie if (lcore_ids[i] == lcore_id) { 1293d19533e8SHuawei Xie tx_q->txq_id = i; 1294d19533e8SHuawei Xie break; 1295d19533e8SHuawei Xie } 1296d19533e8SHuawei Xie } 1297d19533e8SHuawei Xie 1298d19533e8SHuawei Xie while(1) { 1299273ecdbcSYuanhan Liu drain_mbuf_table(tx_q); 1300a68ba8e0SCheng Jiang drain_vhost_table(); 1301d19533e8SHuawei Xie /* 130245657a5cSYuanhan Liu * Inform the configuration core that we have exited the 130345657a5cSYuanhan Liu * linked list and that no devices are in use if requested. 1304d19533e8SHuawei Xie */ 130545657a5cSYuanhan Liu if (lcore_info[lcore_id].dev_removal_flag == REQUEST_DEV_REMOVAL) 130645657a5cSYuanhan Liu lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL; 1307d19533e8SHuawei Xie 1308d19533e8SHuawei Xie /* 1309273ecdbcSYuanhan Liu * Process vhost devices 1310d19533e8SHuawei Xie */ 131197daf19eSYuanhan Liu TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, 131297daf19eSYuanhan Liu lcore_vdev_entry) { 1313364dddcdSHuawei Xie if (unlikely(vdev->remove)) { 1314e571e6b4SHuawei Xie unlink_vmdq(vdev); 1315e571e6b4SHuawei Xie vdev->ready = DEVICE_SAFE_REMOVE; 1316d19533e8SHuawei Xie continue; 1317d19533e8SHuawei Xie } 131845657a5cSYuanhan Liu 1319273ecdbcSYuanhan Liu if (likely(vdev->ready == DEVICE_RX)) 1320273ecdbcSYuanhan Liu drain_eth_rx(vdev); 1321d19533e8SHuawei Xie 1322273ecdbcSYuanhan Liu if (likely(!vdev->remove)) 1323273ecdbcSYuanhan Liu drain_virtio_tx(vdev); 1324d19533e8SHuawei Xie } 1325d19533e8SHuawei Xie } 1326d19533e8SHuawei Xie 1327d19533e8SHuawei Xie return 0; 1328d19533e8SHuawei Xie } 1329d19533e8SHuawei Xie 1330d19533e8SHuawei Xie /* 133145657a5cSYuanhan Liu * Remove a device from the specific data core linked list and from the 133245657a5cSYuanhan Liu * main linked list. Synchonization occurs through the use of the 133345657a5cSYuanhan Liu * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering 1334d19533e8SHuawei Xie * of dev->remove=1 which can cause an infinite loop in the rte_pause loop. 1335d19533e8SHuawei Xie */ 1336d19533e8SHuawei Xie static void 13374ecf22e3SYuanhan Liu destroy_device(int vid) 1338d19533e8SHuawei Xie { 133916ae8abeSYuanhan Liu struct vhost_dev *vdev = NULL; 1340d19533e8SHuawei Xie int lcore; 1341a68ba8e0SCheng Jiang uint16_t i; 1342d19533e8SHuawei Xie 134316ae8abeSYuanhan Liu TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 13444ecf22e3SYuanhan Liu if (vdev->vid == vid) 134516ae8abeSYuanhan Liu break; 134616ae8abeSYuanhan Liu } 134716ae8abeSYuanhan Liu if (!vdev) 134816ae8abeSYuanhan Liu return; 1349d19533e8SHuawei Xie /*set the remove flag. */ 1350e571e6b4SHuawei Xie vdev->remove = 1; 1351e571e6b4SHuawei Xie while(vdev->ready != DEVICE_SAFE_REMOVE) { 1352d19533e8SHuawei Xie rte_pause(); 1353d19533e8SHuawei Xie } 1354d19533e8SHuawei Xie 1355a68ba8e0SCheng Jiang for (i = 0; i < RTE_MAX_LCORE; i++) 1356a68ba8e0SCheng Jiang rte_free(vhost_txbuff[i * MAX_VHOST_DEVICE + vid]); 1357a68ba8e0SCheng Jiang 1358ca059fa5SYuanhan Liu if (builtin_net_driver) 1359ca059fa5SYuanhan Liu vs_vhost_net_remove(vdev); 1360ca059fa5SYuanhan Liu 136197daf19eSYuanhan Liu TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev, 136297daf19eSYuanhan Liu lcore_vdev_entry); 136397daf19eSYuanhan Liu TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry); 136497daf19eSYuanhan Liu 1365d19533e8SHuawei Xie 1366d19533e8SHuawei Xie /* Set the dev_removal_flag on each lcore. */ 1367cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore) 136845657a5cSYuanhan Liu lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL; 1369d19533e8SHuawei Xie 1370d19533e8SHuawei Xie /* 137145657a5cSYuanhan Liu * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL 137245657a5cSYuanhan Liu * we can be sure that they can no longer access the device removed 137345657a5cSYuanhan Liu * from the linked lists and that the devices are no longer in use. 1374d19533e8SHuawei Xie */ 1375cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore) { 137645657a5cSYuanhan Liu while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL) 1377d19533e8SHuawei Xie rte_pause(); 1378d19533e8SHuawei Xie } 1379d19533e8SHuawei Xie 138045657a5cSYuanhan Liu lcore_info[vdev->coreid].device_num--; 1381d19533e8SHuawei Xie 138245657a5cSYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 1383c08a3490SYuanhan Liu "(%d) device has been removed from data core\n", 1384e2a1dd12SYuanhan Liu vdev->vid); 1385d19533e8SHuawei Xie 1386b9f23beeSCheng Jiang if (async_vhost_driver) { 1387b9f23beeSCheng Jiang uint16_t n_pkt = 0; 1388b9f23beeSCheng Jiang struct rte_mbuf *m_cpl[vdev->pkts_inflight]; 1389b9f23beeSCheng Jiang 1390b9f23beeSCheng Jiang while (vdev->pkts_inflight) { 1391b9f23beeSCheng Jiang n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, VIRTIO_RXQ, 1392b9f23beeSCheng Jiang m_cpl, vdev->pkts_inflight); 1393b9f23beeSCheng Jiang free_pkts(m_cpl, n_pkt); 1394b9f23beeSCheng Jiang __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST); 1395b9f23beeSCheng Jiang } 1396b9f23beeSCheng Jiang 1397abec60e7SCheng Jiang rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ); 1398b9f23beeSCheng Jiang } 1399abec60e7SCheng Jiang 1400e571e6b4SHuawei Xie rte_free(vdev); 1401d19533e8SHuawei Xie } 1402d19533e8SHuawei Xie 1403d19533e8SHuawei Xie /* 1404d19533e8SHuawei Xie * A new device is added to a data core. First the device is added to the main linked list 140510b4270fSRami Rosen * and then allocated to a specific data core. 1406d19533e8SHuawei Xie */ 1407d19533e8SHuawei Xie static int 14084ecf22e3SYuanhan Liu new_device(int vid) 1409d19533e8SHuawei Xie { 1410d19533e8SHuawei Xie int lcore, core_add = 0; 1411a68ba8e0SCheng Jiang uint16_t i; 1412d19533e8SHuawei Xie uint32_t device_num_min = num_devices; 1413e571e6b4SHuawei Xie struct vhost_dev *vdev; 1414fdf20fa7SSergio Gonzalez Monroy vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE); 1415e571e6b4SHuawei Xie if (vdev == NULL) { 1416c08a3490SYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 14177f262239SYuanhan Liu "(%d) couldn't allocate memory for vhost dev\n", 1418e2a1dd12SYuanhan Liu vid); 1419e571e6b4SHuawei Xie return -1; 1420e571e6b4SHuawei Xie } 1421e2a1dd12SYuanhan Liu vdev->vid = vid; 1422d19533e8SHuawei Xie 1423a68ba8e0SCheng Jiang for (i = 0; i < RTE_MAX_LCORE; i++) { 1424a68ba8e0SCheng Jiang vhost_txbuff[i * MAX_VHOST_DEVICE + vid] 1425a68ba8e0SCheng Jiang = rte_zmalloc("vhost bufftable", 1426a68ba8e0SCheng Jiang sizeof(struct vhost_bufftable), 1427a68ba8e0SCheng Jiang RTE_CACHE_LINE_SIZE); 1428a68ba8e0SCheng Jiang 1429a68ba8e0SCheng Jiang if (vhost_txbuff[i * MAX_VHOST_DEVICE + vid] == NULL) { 1430a68ba8e0SCheng Jiang RTE_LOG(INFO, VHOST_DATA, 1431a68ba8e0SCheng Jiang "(%d) couldn't allocate memory for vhost TX\n", vid); 1432a68ba8e0SCheng Jiang return -1; 1433a68ba8e0SCheng Jiang } 1434a68ba8e0SCheng Jiang } 1435a68ba8e0SCheng Jiang 1436ca059fa5SYuanhan Liu if (builtin_net_driver) 1437ca059fa5SYuanhan Liu vs_vhost_net_setup(vdev); 1438ca059fa5SYuanhan Liu 143997daf19eSYuanhan Liu TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry); 1440e2a1dd12SYuanhan Liu vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base; 1441d19533e8SHuawei Xie 1442d19533e8SHuawei Xie /*reset ready flag*/ 1443e571e6b4SHuawei Xie vdev->ready = DEVICE_MAC_LEARNING; 1444e571e6b4SHuawei Xie vdev->remove = 0; 1445d19533e8SHuawei Xie 1446d19533e8SHuawei Xie /* Find a suitable lcore to add the device. */ 1447cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore) { 144845657a5cSYuanhan Liu if (lcore_info[lcore].device_num < device_num_min) { 144945657a5cSYuanhan Liu device_num_min = lcore_info[lcore].device_num; 1450d19533e8SHuawei Xie core_add = lcore; 1451d19533e8SHuawei Xie } 1452d19533e8SHuawei Xie } 1453e571e6b4SHuawei Xie vdev->coreid = core_add; 1454e571e6b4SHuawei Xie 145597daf19eSYuanhan Liu TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev, 145697daf19eSYuanhan Liu lcore_vdev_entry); 145745657a5cSYuanhan Liu lcore_info[vdev->coreid].device_num++; 1458d19533e8SHuawei Xie 1459d19533e8SHuawei Xie /* Disable notifications. */ 14604ecf22e3SYuanhan Liu rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0); 14614ecf22e3SYuanhan Liu rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0); 1462d19533e8SHuawei Xie 1463c08a3490SYuanhan Liu RTE_LOG(INFO, VHOST_DATA, 1464c08a3490SYuanhan Liu "(%d) device has been added to data core %d\n", 1465e2a1dd12SYuanhan Liu vid, vdev->coreid); 1466d19533e8SHuawei Xie 1467abec60e7SCheng Jiang if (async_vhost_driver) { 1468acbc3888SJiayu Hu struct rte_vhost_async_config config = {0}; 14696e9a9d2aSCheng Jiang struct rte_vhost_async_channel_ops channel_ops; 1470a68ba8e0SCheng Jiang 147147afdbbeSCheng Jiang if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) { 14726e9a9d2aSCheng Jiang channel_ops.transfer_data = ioat_transfer_data_cb; 14736e9a9d2aSCheng Jiang channel_ops.check_completed_copies = 14746e9a9d2aSCheng Jiang ioat_check_completed_copies_cb; 1475a68ba8e0SCheng Jiang 1476acbc3888SJiayu Hu config.features = RTE_VHOST_ASYNC_INORDER; 1477a68ba8e0SCheng Jiang 1478abec60e7SCheng Jiang return rte_vhost_async_channel_register(vid, VIRTIO_RXQ, 1479acbc3888SJiayu Hu config, &channel_ops); 1480abec60e7SCheng Jiang } 14816e9a9d2aSCheng Jiang } 1482abec60e7SCheng Jiang 1483d19533e8SHuawei Xie return 0; 1484d19533e8SHuawei Xie } 1485d19533e8SHuawei Xie 1486b9f23beeSCheng Jiang static int 1487b9f23beeSCheng Jiang vring_state_changed(int vid, uint16_t queue_id, int enable) 1488b9f23beeSCheng Jiang { 1489b9f23beeSCheng Jiang struct vhost_dev *vdev = NULL; 1490b9f23beeSCheng Jiang 1491b9f23beeSCheng Jiang TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 1492b9f23beeSCheng Jiang if (vdev->vid == vid) 1493b9f23beeSCheng Jiang break; 1494b9f23beeSCheng Jiang } 1495b9f23beeSCheng Jiang if (!vdev) 1496b9f23beeSCheng Jiang return -1; 1497b9f23beeSCheng Jiang 1498b9f23beeSCheng Jiang if (queue_id != VIRTIO_RXQ) 1499b9f23beeSCheng Jiang return 0; 1500b9f23beeSCheng Jiang 1501b9f23beeSCheng Jiang if (async_vhost_driver) { 1502b9f23beeSCheng Jiang if (!enable) { 1503b9f23beeSCheng Jiang uint16_t n_pkt = 0; 1504b9f23beeSCheng Jiang struct rte_mbuf *m_cpl[vdev->pkts_inflight]; 1505b9f23beeSCheng Jiang 1506b9f23beeSCheng Jiang while (vdev->pkts_inflight) { 1507b9f23beeSCheng Jiang n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, queue_id, 1508b9f23beeSCheng Jiang m_cpl, vdev->pkts_inflight); 1509b9f23beeSCheng Jiang free_pkts(m_cpl, n_pkt); 1510b9f23beeSCheng Jiang __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST); 1511b9f23beeSCheng Jiang } 1512b9f23beeSCheng Jiang } 1513b9f23beeSCheng Jiang } 1514b9f23beeSCheng Jiang 1515b9f23beeSCheng Jiang return 0; 1516b9f23beeSCheng Jiang } 1517b9f23beeSCheng Jiang 1518d19533e8SHuawei Xie /* 1519d19533e8SHuawei Xie * These callback allow devices to be added to the data core when configuration 1520d19533e8SHuawei Xie * has been fully complete. 1521d19533e8SHuawei Xie */ 15227c129037SYuanhan Liu static const struct vhost_device_ops virtio_net_device_ops = 1523d19533e8SHuawei Xie { 1524d19533e8SHuawei Xie .new_device = new_device, 1525d19533e8SHuawei Xie .destroy_device = destroy_device, 1526b9f23beeSCheng Jiang .vring_state_changed = vring_state_changed, 1527d19533e8SHuawei Xie }; 1528d19533e8SHuawei Xie 1529d19533e8SHuawei Xie /* 1530d19533e8SHuawei Xie * This is a thread will wake up after a period to print stats if the user has 1531d19533e8SHuawei Xie * enabled them. 1532d19533e8SHuawei Xie */ 1533fa204854SOlivier Matz static void * 1534fa204854SOlivier Matz print_stats(__rte_unused void *arg) 1535d19533e8SHuawei Xie { 153645657a5cSYuanhan Liu struct vhost_dev *vdev; 1537d19533e8SHuawei Xie uint64_t tx_dropped, rx_dropped; 1538d19533e8SHuawei Xie uint64_t tx, tx_total, rx, rx_total; 1539d19533e8SHuawei Xie const char clr[] = { 27, '[', '2', 'J', '\0' }; 1540d19533e8SHuawei Xie const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' }; 1541d19533e8SHuawei Xie 1542d19533e8SHuawei Xie while(1) { 1543d19533e8SHuawei Xie sleep(enable_stats); 1544d19533e8SHuawei Xie 1545d19533e8SHuawei Xie /* Clear screen and move to top left */ 154656fe86f8SYuanhan Liu printf("%s%s\n", clr, top_left); 154756fe86f8SYuanhan Liu printf("Device statistics =================================\n"); 1548d19533e8SHuawei Xie 154997daf19eSYuanhan Liu TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) { 155056fe86f8SYuanhan Liu tx_total = vdev->stats.tx_total; 155156fe86f8SYuanhan Liu tx = vdev->stats.tx; 1552d19533e8SHuawei Xie tx_dropped = tx_total - tx; 155356fe86f8SYuanhan Liu 1554a68ba8e0SCheng Jiang rx_total = __atomic_load_n(&vdev->stats.rx_total_atomic, 1555a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1556a68ba8e0SCheng Jiang rx = __atomic_load_n(&vdev->stats.rx_atomic, 1557a68ba8e0SCheng Jiang __ATOMIC_SEQ_CST); 1558d19533e8SHuawei Xie rx_dropped = rx_total - rx; 1559d19533e8SHuawei Xie 1560c08a3490SYuanhan Liu printf("Statistics for device %d\n" 156156fe86f8SYuanhan Liu "-----------------------\n" 156256fe86f8SYuanhan Liu "TX total: %" PRIu64 "\n" 156356fe86f8SYuanhan Liu "TX dropped: %" PRIu64 "\n" 156456fe86f8SYuanhan Liu "TX successful: %" PRIu64 "\n" 156556fe86f8SYuanhan Liu "RX total: %" PRIu64 "\n" 156656fe86f8SYuanhan Liu "RX dropped: %" PRIu64 "\n" 156756fe86f8SYuanhan Liu "RX successful: %" PRIu64 "\n", 15684ecf22e3SYuanhan Liu vdev->vid, 156956fe86f8SYuanhan Liu tx_total, tx_dropped, tx, 157056fe86f8SYuanhan Liu rx_total, rx_dropped, rx); 1571d19533e8SHuawei Xie } 157256fe86f8SYuanhan Liu 157356fe86f8SYuanhan Liu printf("===================================================\n"); 15743ee6f706SGeorgiy Levashov 15753ee6f706SGeorgiy Levashov fflush(stdout); 1576d19533e8SHuawei Xie } 1577fa204854SOlivier Matz 1578fa204854SOlivier Matz return NULL; 1579d19533e8SHuawei Xie } 1580d19533e8SHuawei Xie 1581ad0eef4dSJiayu Hu static void 1582ad0eef4dSJiayu Hu unregister_drivers(int socket_num) 1583ad0eef4dSJiayu Hu { 1584ad0eef4dSJiayu Hu int i, ret; 1585ad0eef4dSJiayu Hu 1586ad0eef4dSJiayu Hu for (i = 0; i < socket_num; i++) { 1587ad0eef4dSJiayu Hu ret = rte_vhost_driver_unregister(socket_files + i * PATH_MAX); 1588ad0eef4dSJiayu Hu if (ret != 0) 1589ad0eef4dSJiayu Hu RTE_LOG(ERR, VHOST_CONFIG, 1590ad0eef4dSJiayu Hu "Fail to unregister vhost driver for %s.\n", 1591ad0eef4dSJiayu Hu socket_files + i * PATH_MAX); 1592ad0eef4dSJiayu Hu } 1593ad0eef4dSJiayu Hu } 1594ad0eef4dSJiayu Hu 1595c83d2d00SOuyang Changchun /* When we receive a INT signal, unregister vhost driver */ 1596c83d2d00SOuyang Changchun static void 1597c83d2d00SOuyang Changchun sigint_handler(__rte_unused int signum) 1598c83d2d00SOuyang Changchun { 1599c83d2d00SOuyang Changchun /* Unregister vhost driver. */ 1600ad0eef4dSJiayu Hu unregister_drivers(nb_sockets); 1601ad0eef4dSJiayu Hu 1602c83d2d00SOuyang Changchun exit(0); 1603c83d2d00SOuyang Changchun } 1604d19533e8SHuawei Xie 1605d19533e8SHuawei Xie /* 1606bdb19b77SYuanhan Liu * While creating an mbuf pool, one key thing is to figure out how 1607bdb19b77SYuanhan Liu * many mbuf entries is enough for our use. FYI, here are some 1608bdb19b77SYuanhan Liu * guidelines: 1609bdb19b77SYuanhan Liu * 1610bdb19b77SYuanhan Liu * - Each rx queue would reserve @nr_rx_desc mbufs at queue setup stage 1611bdb19b77SYuanhan Liu * 1612bdb19b77SYuanhan Liu * - For each switch core (A CPU core does the packet switch), we need 1613bdb19b77SYuanhan Liu * also make some reservation for receiving the packets from virtio 1614bdb19b77SYuanhan Liu * Tx queue. How many is enough depends on the usage. It's normally 1615bdb19b77SYuanhan Liu * a simple calculation like following: 1616bdb19b77SYuanhan Liu * 1617bdb19b77SYuanhan Liu * MAX_PKT_BURST * max packet size / mbuf size 1618bdb19b77SYuanhan Liu * 1619bdb19b77SYuanhan Liu * So, we definitely need allocate more mbufs when TSO is enabled. 1620bdb19b77SYuanhan Liu * 1621bdb19b77SYuanhan Liu * - Similarly, for each switching core, we should serve @nr_rx_desc 1622bdb19b77SYuanhan Liu * mbufs for receiving the packets from physical NIC device. 1623bdb19b77SYuanhan Liu * 1624bdb19b77SYuanhan Liu * - We also need make sure, for each switch core, we have allocated 1625bdb19b77SYuanhan Liu * enough mbufs to fill up the mbuf cache. 1626bdb19b77SYuanhan Liu */ 1627bdb19b77SYuanhan Liu static void 1628bdb19b77SYuanhan Liu create_mbuf_pool(uint16_t nr_port, uint32_t nr_switch_core, uint32_t mbuf_size, 1629bdb19b77SYuanhan Liu uint32_t nr_queues, uint32_t nr_rx_desc, uint32_t nr_mbuf_cache) 1630bdb19b77SYuanhan Liu { 1631bdb19b77SYuanhan Liu uint32_t nr_mbufs; 1632bdb19b77SYuanhan Liu uint32_t nr_mbufs_per_core; 1633bdb19b77SYuanhan Liu uint32_t mtu = 1500; 1634bdb19b77SYuanhan Liu 1635bdb19b77SYuanhan Liu if (mergeable) 1636bdb19b77SYuanhan Liu mtu = 9000; 1637bdb19b77SYuanhan Liu if (enable_tso) 1638bdb19b77SYuanhan Liu mtu = 64 * 1024; 1639bdb19b77SYuanhan Liu 1640bdb19b77SYuanhan Liu nr_mbufs_per_core = (mtu + mbuf_size) * MAX_PKT_BURST / 164112ee45a3SYong Wang (mbuf_size - RTE_PKTMBUF_HEADROOM); 1642bdb19b77SYuanhan Liu nr_mbufs_per_core += nr_rx_desc; 1643bdb19b77SYuanhan Liu nr_mbufs_per_core = RTE_MAX(nr_mbufs_per_core, nr_mbuf_cache); 1644bdb19b77SYuanhan Liu 1645bdb19b77SYuanhan Liu nr_mbufs = nr_queues * nr_rx_desc; 1646bdb19b77SYuanhan Liu nr_mbufs += nr_mbufs_per_core * nr_switch_core; 1647bdb19b77SYuanhan Liu nr_mbufs *= nr_port; 1648bdb19b77SYuanhan Liu 1649bdb19b77SYuanhan Liu mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", nr_mbufs, 1650bdb19b77SYuanhan Liu nr_mbuf_cache, 0, mbuf_size, 1651bdb19b77SYuanhan Liu rte_socket_id()); 1652bdb19b77SYuanhan Liu if (mbuf_pool == NULL) 1653bdb19b77SYuanhan Liu rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 1654bdb19b77SYuanhan Liu } 1655bdb19b77SYuanhan Liu 1656bdb19b77SYuanhan Liu /* 1657164a601bSYuanhan Liu * Main function, does initialisation and calls the per-lcore functions. 1658d19533e8SHuawei Xie */ 1659d19533e8SHuawei Xie int 166098a16481SDavid Marchand main(int argc, char *argv[]) 1661d19533e8SHuawei Xie { 1662d19533e8SHuawei Xie unsigned lcore_id, core_id = 0; 1663d19533e8SHuawei Xie unsigned nb_ports, valid_num_ports; 1664ad0eef4dSJiayu Hu int ret, i; 1665f8244c63SZhiyong Yang uint16_t portid; 1666d19533e8SHuawei Xie static pthread_t tid; 1667ca7036b4SDavid Marchand uint64_t flags = RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; 1668d19533e8SHuawei Xie 1669c83d2d00SOuyang Changchun signal(SIGINT, sigint_handler); 1670c83d2d00SOuyang Changchun 1671d19533e8SHuawei Xie /* init EAL */ 1672d19533e8SHuawei Xie ret = rte_eal_init(argc, argv); 1673d19533e8SHuawei Xie if (ret < 0) 1674d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 1675d19533e8SHuawei Xie argc -= ret; 1676d19533e8SHuawei Xie argv += ret; 1677d19533e8SHuawei Xie 1678d19533e8SHuawei Xie /* parse app arguments */ 1679d19533e8SHuawei Xie ret = us_vhost_parse_args(argc, argv); 1680d19533e8SHuawei Xie if (ret < 0) 1681d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Invalid argument\n"); 1682d19533e8SHuawei Xie 1683b3bee7d8SYong Wang for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { 168445657a5cSYuanhan Liu TAILQ_INIT(&lcore_info[lcore_id].vdev_list); 168545657a5cSYuanhan Liu 1686d19533e8SHuawei Xie if (rte_lcore_is_enabled(lcore_id)) 1687d19533e8SHuawei Xie lcore_ids[core_id++] = lcore_id; 1688b3bee7d8SYong Wang } 1689d19533e8SHuawei Xie 1690d19533e8SHuawei Xie if (rte_lcore_count() > RTE_MAX_LCORE) 1691d19533e8SHuawei Xie rte_exit(EXIT_FAILURE,"Not enough cores\n"); 1692d19533e8SHuawei Xie 1693d19533e8SHuawei Xie /* Get the number of physical ports. */ 1694d9a42a69SThomas Monjalon nb_ports = rte_eth_dev_count_avail(); 1695d19533e8SHuawei Xie 1696d19533e8SHuawei Xie /* 1697d19533e8SHuawei Xie * Update the global var NUM_PORTS and global array PORTS 1698d19533e8SHuawei Xie * and get value of var VALID_NUM_PORTS according to system ports number 1699d19533e8SHuawei Xie */ 1700d19533e8SHuawei Xie valid_num_ports = check_ports_num(nb_ports); 1701d19533e8SHuawei Xie 1702d19533e8SHuawei Xie if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) { 1703d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," 1704d19533e8SHuawei Xie "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); 1705d19533e8SHuawei Xie return -1; 1706d19533e8SHuawei Xie } 1707d19533e8SHuawei Xie 1708bdb19b77SYuanhan Liu /* 1709bdb19b77SYuanhan Liu * FIXME: here we are trying to allocate mbufs big enough for 1710bdb19b77SYuanhan Liu * @MAX_QUEUES, but the truth is we're never going to use that 1711bdb19b77SYuanhan Liu * many queues here. We probably should only do allocation for 1712bdb19b77SYuanhan Liu * those queues we are going to use. 1713bdb19b77SYuanhan Liu */ 1714bdb19b77SYuanhan Liu create_mbuf_pool(valid_num_ports, rte_lcore_count() - 1, MBUF_DATA_SIZE, 1715bdb19b77SYuanhan Liu MAX_QUEUES, RTE_TEST_RX_DESC_DEFAULT, MBUF_CACHE_SIZE); 1716d19533e8SHuawei Xie 1717d19533e8SHuawei Xie if (vm2vm_mode == VM2VM_HARDWARE) { 1718d19533e8SHuawei Xie /* Enable VT loop back to let L2 switch to do it. */ 1719d19533e8SHuawei Xie vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; 17201f49ec15SThomas Monjalon RTE_LOG(DEBUG, VHOST_CONFIG, 1721d19533e8SHuawei Xie "Enable loop back for L2 switch in vmdq.\n"); 1722d19533e8SHuawei Xie } 1723d19533e8SHuawei Xie 1724d19533e8SHuawei Xie /* initialize all ports */ 17258728ccf3SThomas Monjalon RTE_ETH_FOREACH_DEV(portid) { 1726d19533e8SHuawei Xie /* skip ports that are not enabled */ 1727d19533e8SHuawei Xie if ((enabled_port_mask & (1 << portid)) == 0) { 1728d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, 1729d19533e8SHuawei Xie "Skipping disabled port %d\n", portid); 1730d19533e8SHuawei Xie continue; 1731d19533e8SHuawei Xie } 1732d19533e8SHuawei Xie if (port_init(portid) != 0) 1733d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, 1734d19533e8SHuawei Xie "Cannot initialize network ports\n"); 1735d19533e8SHuawei Xie } 1736d19533e8SHuawei Xie 1737d19533e8SHuawei Xie /* Enable stats if the user option is set. */ 173867b6d303SRavi Kerur if (enable_stats) { 1739fa204854SOlivier Matz ret = rte_ctrl_thread_create(&tid, "print-stats", NULL, 1740fa204854SOlivier Matz print_stats, NULL); 1741fa204854SOlivier Matz if (ret < 0) 174267b6d303SRavi Kerur rte_exit(EXIT_FAILURE, 174367b6d303SRavi Kerur "Cannot create print-stats thread\n"); 174467b6d303SRavi Kerur } 1745d19533e8SHuawei Xie 1746d19533e8SHuawei Xie /* Launch all data cores. */ 1747cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore_id) 174868363d85SYuanhan Liu rte_eal_remote_launch(switch_worker, NULL, lcore_id); 1749d19533e8SHuawei Xie 17502345e3beSYuanhan Liu if (client_mode) 17512345e3beSYuanhan Liu flags |= RTE_VHOST_USER_CLIENT; 17522345e3beSYuanhan Liu 1753bde19a4dSJiayu Hu /* Register vhost user driver to handle vhost messages. */ 1754ad0eef4dSJiayu Hu for (i = 0; i < nb_sockets; i++) { 17550917f9d1SYuanhan Liu char *file = socket_files + i * PATH_MAX; 1756a68ba8e0SCheng Jiang 1757abec60e7SCheng Jiang if (async_vhost_driver) 1758abec60e7SCheng Jiang flags = flags | RTE_VHOST_USER_ASYNC_COPY; 1759abec60e7SCheng Jiang 17600917f9d1SYuanhan Liu ret = rte_vhost_driver_register(file, flags); 1761ad0eef4dSJiayu Hu if (ret != 0) { 1762ad0eef4dSJiayu Hu unregister_drivers(i); 1763ad0eef4dSJiayu Hu rte_exit(EXIT_FAILURE, 1764ad0eef4dSJiayu Hu "vhost driver register failure.\n"); 1765ad0eef4dSJiayu Hu } 1766ca059fa5SYuanhan Liu 1767ca059fa5SYuanhan Liu if (builtin_net_driver) 1768ca059fa5SYuanhan Liu rte_vhost_driver_set_features(file, VIRTIO_NET_FEATURES); 1769ca059fa5SYuanhan Liu 17700917f9d1SYuanhan Liu if (mergeable == 0) { 17710917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 17720917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_MRG_RXBUF); 17730917f9d1SYuanhan Liu } 17740917f9d1SYuanhan Liu 17750917f9d1SYuanhan Liu if (enable_tx_csum == 0) { 17760917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 17770917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_CSUM); 17780917f9d1SYuanhan Liu } 17790917f9d1SYuanhan Liu 17800917f9d1SYuanhan Liu if (enable_tso == 0) { 17810917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 17820917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_HOST_TSO4); 17830917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 17840917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_HOST_TSO6); 17850917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 17860917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_GUEST_TSO4); 17870917f9d1SYuanhan Liu rte_vhost_driver_disable_features(file, 17880917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_GUEST_TSO6); 17890917f9d1SYuanhan Liu } 17900917f9d1SYuanhan Liu 17910917f9d1SYuanhan Liu if (promiscuous) { 17920917f9d1SYuanhan Liu rte_vhost_driver_enable_features(file, 17930917f9d1SYuanhan Liu 1ULL << VIRTIO_NET_F_CTRL_RX); 17940917f9d1SYuanhan Liu } 1795d19533e8SHuawei Xie 179693433b63SYuanhan Liu ret = rte_vhost_driver_callback_register(file, 179793433b63SYuanhan Liu &virtio_net_device_ops); 179893433b63SYuanhan Liu if (ret != 0) { 179993433b63SYuanhan Liu rte_exit(EXIT_FAILURE, 180093433b63SYuanhan Liu "failed to register vhost driver callbacks.\n"); 180193433b63SYuanhan Liu } 1802af147591SYuanhan Liu 1803af147591SYuanhan Liu if (rte_vhost_driver_start(file) < 0) { 1804af147591SYuanhan Liu rte_exit(EXIT_FAILURE, 1805af147591SYuanhan Liu "failed to start vhost driver.\n"); 1806af147591SYuanhan Liu } 180793433b63SYuanhan Liu } 1808d19533e8SHuawei Xie 1809cb056611SStephen Hemminger RTE_LCORE_FOREACH_WORKER(lcore_id) 1810af147591SYuanhan Liu rte_eal_wait_lcore(lcore_id); 1811af147591SYuanhan Liu 181210aa3757SChengchang Tang /* clean up the EAL */ 181310aa3757SChengchang Tang rte_eal_cleanup(); 1814d19533e8SHuawei Xie 181510aa3757SChengchang Tang return 0; 1816d19533e8SHuawei Xie } 1817