1*d19533e8SHuawei Xie /*- 2*d19533e8SHuawei Xie * BSD LICENSE 3*d19533e8SHuawei Xie * 4*d19533e8SHuawei Xie * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5*d19533e8SHuawei Xie * All rights reserved. 6*d19533e8SHuawei Xie * 7*d19533e8SHuawei Xie * Redistribution and use in source and binary forms, with or without 8*d19533e8SHuawei Xie * modification, are permitted provided that the following conditions 9*d19533e8SHuawei Xie * are met: 10*d19533e8SHuawei Xie * 11*d19533e8SHuawei Xie * * Redistributions of source code must retain the above copyright 12*d19533e8SHuawei Xie * notice, this list of conditions and the following disclaimer. 13*d19533e8SHuawei Xie * * Redistributions in binary form must reproduce the above copyright 14*d19533e8SHuawei Xie * notice, this list of conditions and the following disclaimer in 15*d19533e8SHuawei Xie * the documentation and/or other materials provided with the 16*d19533e8SHuawei Xie * distribution. 17*d19533e8SHuawei Xie * * Neither the name of Intel Corporation nor the names of its 18*d19533e8SHuawei Xie * contributors may be used to endorse or promote products derived 19*d19533e8SHuawei Xie * from this software without specific prior written permission. 20*d19533e8SHuawei Xie * 21*d19533e8SHuawei Xie * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22*d19533e8SHuawei Xie * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23*d19533e8SHuawei Xie * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24*d19533e8SHuawei Xie * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25*d19533e8SHuawei Xie * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26*d19533e8SHuawei Xie * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27*d19533e8SHuawei Xie * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28*d19533e8SHuawei Xie * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29*d19533e8SHuawei Xie * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30*d19533e8SHuawei Xie * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31*d19533e8SHuawei Xie * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32*d19533e8SHuawei Xie */ 33*d19533e8SHuawei Xie 34*d19533e8SHuawei Xie #include <arpa/inet.h> 35*d19533e8SHuawei Xie #include <getopt.h> 36*d19533e8SHuawei Xie #include <linux/if_ether.h> 37*d19533e8SHuawei Xie #include <linux/if_vlan.h> 38*d19533e8SHuawei Xie #include <linux/virtio_net.h> 39*d19533e8SHuawei Xie #include <linux/virtio_ring.h> 40*d19533e8SHuawei Xie #include <signal.h> 41*d19533e8SHuawei Xie #include <stdint.h> 42*d19533e8SHuawei Xie #include <sys/eventfd.h> 43*d19533e8SHuawei Xie #include <sys/param.h> 44*d19533e8SHuawei Xie #include <unistd.h> 45*d19533e8SHuawei Xie 46*d19533e8SHuawei Xie #include <rte_atomic.h> 47*d19533e8SHuawei Xie #include <rte_cycles.h> 48*d19533e8SHuawei Xie #include <rte_ethdev.h> 49*d19533e8SHuawei Xie #include <rte_log.h> 50*d19533e8SHuawei Xie #include <rte_string_fns.h> 51*d19533e8SHuawei Xie #include <rte_malloc.h> 52*d19533e8SHuawei Xie 53*d19533e8SHuawei Xie #include "main.h" 54*d19533e8SHuawei Xie #include "virtio-net.h" 55*d19533e8SHuawei Xie #include "vhost-net-cdev.h" 56*d19533e8SHuawei Xie 57*d19533e8SHuawei Xie #define MAX_QUEUES 128 58*d19533e8SHuawei Xie 59*d19533e8SHuawei Xie /* the maximum number of external ports supported */ 60*d19533e8SHuawei Xie #define MAX_SUP_PORTS 1 61*d19533e8SHuawei Xie 62*d19533e8SHuawei Xie /* 63*d19533e8SHuawei Xie * Calculate the number of buffers needed per port 64*d19533e8SHuawei Xie */ 65*d19533e8SHuawei Xie #define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \ 66*d19533e8SHuawei Xie (num_switching_cores*MAX_PKT_BURST) + \ 67*d19533e8SHuawei Xie (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\ 68*d19533e8SHuawei Xie (num_switching_cores*MBUF_CACHE_SIZE)) 69*d19533e8SHuawei Xie 70*d19533e8SHuawei Xie #define MBUF_CACHE_SIZE 128 71*d19533e8SHuawei Xie #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 72*d19533e8SHuawei Xie 73*d19533e8SHuawei Xie /* 74*d19533e8SHuawei Xie * No frame data buffer allocated from host are required for zero copy 75*d19533e8SHuawei Xie * implementation, guest will allocate the frame data buffer, and vhost 76*d19533e8SHuawei Xie * directly use it. 77*d19533e8SHuawei Xie */ 78*d19533e8SHuawei Xie #define VIRTIO_DESCRIPTOR_LEN_ZCP 1518 79*d19533e8SHuawei Xie #define MBUF_SIZE_ZCP (VIRTIO_DESCRIPTOR_LEN_ZCP + sizeof(struct rte_mbuf) \ 80*d19533e8SHuawei Xie + RTE_PKTMBUF_HEADROOM) 81*d19533e8SHuawei Xie #define MBUF_CACHE_SIZE_ZCP 0 82*d19533e8SHuawei Xie 83*d19533e8SHuawei Xie /* 84*d19533e8SHuawei Xie * RX and TX Prefetch, Host, and Write-back threshold values should be 85*d19533e8SHuawei Xie * carefully set for optimal performance. Consult the network 86*d19533e8SHuawei Xie * controller's datasheet and supporting DPDK documentation for guidance 87*d19533e8SHuawei Xie * on how these parameters should be set. 88*d19533e8SHuawei Xie */ 89*d19533e8SHuawei Xie #define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */ 90*d19533e8SHuawei Xie #define RX_HTHRESH 8 /* Default values of RX host threshold reg. */ 91*d19533e8SHuawei Xie #define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */ 92*d19533e8SHuawei Xie 93*d19533e8SHuawei Xie /* 94*d19533e8SHuawei Xie * These default values are optimized for use with the Intel(R) 82599 10 GbE 95*d19533e8SHuawei Xie * Controller and the DPDK ixgbe PMD. Consider using other values for other 96*d19533e8SHuawei Xie * network controllers and/or network drivers. 97*d19533e8SHuawei Xie */ 98*d19533e8SHuawei Xie #define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */ 99*d19533e8SHuawei Xie #define TX_HTHRESH 0 /* Default values of TX host threshold reg. */ 100*d19533e8SHuawei Xie #define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */ 101*d19533e8SHuawei Xie 102*d19533e8SHuawei Xie #define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ 103*d19533e8SHuawei Xie #define MAX_MRG_PKT_BURST 16 /* Max burst for merge buffers. Set to 1 due to performance issue. */ 104*d19533e8SHuawei Xie #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ 105*d19533e8SHuawei Xie 106*d19533e8SHuawei Xie #define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */ 107*d19533e8SHuawei Xie #define BURST_RX_RETRIES 4 /* Number of retries on RX. */ 108*d19533e8SHuawei Xie 109*d19533e8SHuawei Xie #define JUMBO_FRAME_MAX_SIZE 0x2600 110*d19533e8SHuawei Xie 111*d19533e8SHuawei Xie /* State of virtio device. */ 112*d19533e8SHuawei Xie #define DEVICE_MAC_LEARNING 0 113*d19533e8SHuawei Xie #define DEVICE_RX 1 114*d19533e8SHuawei Xie #define DEVICE_SAFE_REMOVE 2 115*d19533e8SHuawei Xie 116*d19533e8SHuawei Xie /* Config_core_flag status definitions. */ 117*d19533e8SHuawei Xie #define REQUEST_DEV_REMOVAL 1 118*d19533e8SHuawei Xie #define ACK_DEV_REMOVAL 0 119*d19533e8SHuawei Xie 120*d19533e8SHuawei Xie /* Configurable number of RX/TX ring descriptors */ 121*d19533e8SHuawei Xie #define RTE_TEST_RX_DESC_DEFAULT 1024 122*d19533e8SHuawei Xie #define RTE_TEST_TX_DESC_DEFAULT 512 123*d19533e8SHuawei Xie 124*d19533e8SHuawei Xie /* 125*d19533e8SHuawei Xie * Need refine these 2 macros for legacy and DPDK based front end: 126*d19533e8SHuawei Xie * Max vring avail descriptor/entries from guest - MAX_PKT_BURST 127*d19533e8SHuawei Xie * And then adjust power 2. 128*d19533e8SHuawei Xie */ 129*d19533e8SHuawei Xie /* 130*d19533e8SHuawei Xie * For legacy front end, 128 descriptors, 131*d19533e8SHuawei Xie * half for virtio header, another half for mbuf. 132*d19533e8SHuawei Xie */ 133*d19533e8SHuawei Xie #define RTE_TEST_RX_DESC_DEFAULT_ZCP 32 /* legacy: 32, DPDK virt FE: 128. */ 134*d19533e8SHuawei Xie #define RTE_TEST_TX_DESC_DEFAULT_ZCP 64 /* legacy: 64, DPDK virt FE: 64. */ 135*d19533e8SHuawei Xie 136*d19533e8SHuawei Xie /* Get first 4 bytes in mbuf headroom. */ 137*d19533e8SHuawei Xie #define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \ 138*d19533e8SHuawei Xie + sizeof(struct rte_mbuf))) 139*d19533e8SHuawei Xie 140*d19533e8SHuawei Xie /* true if x is a power of 2 */ 141*d19533e8SHuawei Xie #define POWEROF2(x) ((((x)-1) & (x)) == 0) 142*d19533e8SHuawei Xie 143*d19533e8SHuawei Xie #define INVALID_PORT_ID 0xFF 144*d19533e8SHuawei Xie 145*d19533e8SHuawei Xie /* Max number of devices. Limited by vmdq. */ 146*d19533e8SHuawei Xie #define MAX_DEVICES 64 147*d19533e8SHuawei Xie 148*d19533e8SHuawei Xie /* Size of buffers used for snprintfs. */ 149*d19533e8SHuawei Xie #define MAX_PRINT_BUFF 6072 150*d19533e8SHuawei Xie 151*d19533e8SHuawei Xie /* Maximum character device basename size. */ 152*d19533e8SHuawei Xie #define MAX_BASENAME_SZ 10 153*d19533e8SHuawei Xie 154*d19533e8SHuawei Xie /* Maximum long option length for option parsing. */ 155*d19533e8SHuawei Xie #define MAX_LONG_OPT_SZ 64 156*d19533e8SHuawei Xie 157*d19533e8SHuawei Xie /* Used to compare MAC addresses. */ 158*d19533e8SHuawei Xie #define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL 159*d19533e8SHuawei Xie 160*d19533e8SHuawei Xie /* Number of descriptors per cacheline. */ 161*d19533e8SHuawei Xie #define DESC_PER_CACHELINE (CACHE_LINE_SIZE / sizeof(struct vring_desc)) 162*d19533e8SHuawei Xie 163*d19533e8SHuawei Xie /* mask of enabled ports */ 164*d19533e8SHuawei Xie static uint32_t enabled_port_mask = 0; 165*d19533e8SHuawei Xie 166*d19533e8SHuawei Xie /*Number of switching cores enabled*/ 167*d19533e8SHuawei Xie static uint32_t num_switching_cores = 0; 168*d19533e8SHuawei Xie 169*d19533e8SHuawei Xie /* number of devices/queues to support*/ 170*d19533e8SHuawei Xie static uint32_t num_queues = 0; 171*d19533e8SHuawei Xie uint32_t num_devices = 0; 172*d19533e8SHuawei Xie 173*d19533e8SHuawei Xie /* 174*d19533e8SHuawei Xie * Enable zero copy, pkts buffer will directly dma to hw descriptor, 175*d19533e8SHuawei Xie * disabled on default. 176*d19533e8SHuawei Xie */ 177*d19533e8SHuawei Xie static uint32_t zero_copy; 178*d19533e8SHuawei Xie 179*d19533e8SHuawei Xie /* number of descriptors to apply*/ 180*d19533e8SHuawei Xie static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP; 181*d19533e8SHuawei Xie static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP; 182*d19533e8SHuawei Xie 183*d19533e8SHuawei Xie /* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */ 184*d19533e8SHuawei Xie #define MAX_RING_DESC 4096 185*d19533e8SHuawei Xie 186*d19533e8SHuawei Xie struct vpool { 187*d19533e8SHuawei Xie struct rte_mempool *pool; 188*d19533e8SHuawei Xie struct rte_ring *ring; 189*d19533e8SHuawei Xie uint32_t buf_size; 190*d19533e8SHuawei Xie } vpool_array[MAX_QUEUES+MAX_QUEUES]; 191*d19533e8SHuawei Xie 192*d19533e8SHuawei Xie /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */ 193*d19533e8SHuawei Xie typedef enum { 194*d19533e8SHuawei Xie VM2VM_DISABLED = 0, 195*d19533e8SHuawei Xie VM2VM_SOFTWARE = 1, 196*d19533e8SHuawei Xie VM2VM_HARDWARE = 2, 197*d19533e8SHuawei Xie VM2VM_LAST 198*d19533e8SHuawei Xie } vm2vm_type; 199*d19533e8SHuawei Xie static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE; 200*d19533e8SHuawei Xie 201*d19533e8SHuawei Xie /* The type of host physical address translated from guest physical address. */ 202*d19533e8SHuawei Xie typedef enum { 203*d19533e8SHuawei Xie PHYS_ADDR_CONTINUOUS = 0, 204*d19533e8SHuawei Xie PHYS_ADDR_CROSS_SUBREG = 1, 205*d19533e8SHuawei Xie PHYS_ADDR_INVALID = 2, 206*d19533e8SHuawei Xie PHYS_ADDR_LAST 207*d19533e8SHuawei Xie } hpa_type; 208*d19533e8SHuawei Xie 209*d19533e8SHuawei Xie /* Enable stats. */ 210*d19533e8SHuawei Xie static uint32_t enable_stats = 0; 211*d19533e8SHuawei Xie /* Enable retries on RX. */ 212*d19533e8SHuawei Xie static uint32_t enable_retry = 1; 213*d19533e8SHuawei Xie /* Specify timeout (in useconds) between retries on RX. */ 214*d19533e8SHuawei Xie static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US; 215*d19533e8SHuawei Xie /* Specify the number of retries on RX. */ 216*d19533e8SHuawei Xie static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; 217*d19533e8SHuawei Xie 218*d19533e8SHuawei Xie /* Character device basename. Can be set by user. */ 219*d19533e8SHuawei Xie static char dev_basename[MAX_BASENAME_SZ] = "vhost-net"; 220*d19533e8SHuawei Xie 221*d19533e8SHuawei Xie /* Charater device index. Can be set by user. */ 222*d19533e8SHuawei Xie static uint32_t dev_index = 0; 223*d19533e8SHuawei Xie 224*d19533e8SHuawei Xie /* This can be set by the user so it is made available here. */ 225*d19533e8SHuawei Xie extern uint64_t VHOST_FEATURES; 226*d19533e8SHuawei Xie 227*d19533e8SHuawei Xie /* Default configuration for rx and tx thresholds etc. */ 228*d19533e8SHuawei Xie static struct rte_eth_rxconf rx_conf_default = { 229*d19533e8SHuawei Xie .rx_thresh = { 230*d19533e8SHuawei Xie .pthresh = RX_PTHRESH, 231*d19533e8SHuawei Xie .hthresh = RX_HTHRESH, 232*d19533e8SHuawei Xie .wthresh = RX_WTHRESH, 233*d19533e8SHuawei Xie }, 234*d19533e8SHuawei Xie .rx_drop_en = 1, 235*d19533e8SHuawei Xie }; 236*d19533e8SHuawei Xie 237*d19533e8SHuawei Xie /* 238*d19533e8SHuawei Xie * These default values are optimized for use with the Intel(R) 82599 10 GbE 239*d19533e8SHuawei Xie * Controller and the DPDK ixgbe/igb PMD. Consider using other values for other 240*d19533e8SHuawei Xie * network controllers and/or network drivers. 241*d19533e8SHuawei Xie */ 242*d19533e8SHuawei Xie static struct rte_eth_txconf tx_conf_default = { 243*d19533e8SHuawei Xie .tx_thresh = { 244*d19533e8SHuawei Xie .pthresh = TX_PTHRESH, 245*d19533e8SHuawei Xie .hthresh = TX_HTHRESH, 246*d19533e8SHuawei Xie .wthresh = TX_WTHRESH, 247*d19533e8SHuawei Xie }, 248*d19533e8SHuawei Xie .tx_free_thresh = 0, /* Use PMD default values */ 249*d19533e8SHuawei Xie .tx_rs_thresh = 0, /* Use PMD default values */ 250*d19533e8SHuawei Xie }; 251*d19533e8SHuawei Xie 252*d19533e8SHuawei Xie /* empty vmdq configuration structure. Filled in programatically */ 253*d19533e8SHuawei Xie static struct rte_eth_conf vmdq_conf_default = { 254*d19533e8SHuawei Xie .rxmode = { 255*d19533e8SHuawei Xie .mq_mode = ETH_MQ_RX_VMDQ_ONLY, 256*d19533e8SHuawei Xie .split_hdr_size = 0, 257*d19533e8SHuawei Xie .header_split = 0, /**< Header Split disabled */ 258*d19533e8SHuawei Xie .hw_ip_checksum = 0, /**< IP checksum offload disabled */ 259*d19533e8SHuawei Xie .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 260*d19533e8SHuawei Xie /* 261*d19533e8SHuawei Xie * It is necessary for 1G NIC such as I350, 262*d19533e8SHuawei Xie * this fixes bug of ipv4 forwarding in guest can't 263*d19533e8SHuawei Xie * forward pakets from one virtio dev to another virtio dev. 264*d19533e8SHuawei Xie */ 265*d19533e8SHuawei Xie .hw_vlan_strip = 1, /**< VLAN strip enabled. */ 266*d19533e8SHuawei Xie .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 267*d19533e8SHuawei Xie .hw_strip_crc = 0, /**< CRC stripped by hardware */ 268*d19533e8SHuawei Xie }, 269*d19533e8SHuawei Xie 270*d19533e8SHuawei Xie .txmode = { 271*d19533e8SHuawei Xie .mq_mode = ETH_MQ_TX_NONE, 272*d19533e8SHuawei Xie }, 273*d19533e8SHuawei Xie .rx_adv_conf = { 274*d19533e8SHuawei Xie /* 275*d19533e8SHuawei Xie * should be overridden separately in code with 276*d19533e8SHuawei Xie * appropriate values 277*d19533e8SHuawei Xie */ 278*d19533e8SHuawei Xie .vmdq_rx_conf = { 279*d19533e8SHuawei Xie .nb_queue_pools = ETH_8_POOLS, 280*d19533e8SHuawei Xie .enable_default_pool = 0, 281*d19533e8SHuawei Xie .default_pool = 0, 282*d19533e8SHuawei Xie .nb_pool_maps = 0, 283*d19533e8SHuawei Xie .pool_map = {{0, 0},}, 284*d19533e8SHuawei Xie }, 285*d19533e8SHuawei Xie }, 286*d19533e8SHuawei Xie }; 287*d19533e8SHuawei Xie 288*d19533e8SHuawei Xie static unsigned lcore_ids[RTE_MAX_LCORE]; 289*d19533e8SHuawei Xie static uint8_t ports[RTE_MAX_ETHPORTS]; 290*d19533e8SHuawei Xie static unsigned num_ports = 0; /**< The number of ports specified in command line */ 291*d19533e8SHuawei Xie 292*d19533e8SHuawei Xie static const uint16_t external_pkt_default_vlan_tag = 2000; 293*d19533e8SHuawei Xie const uint16_t vlan_tags[] = { 294*d19533e8SHuawei Xie 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 295*d19533e8SHuawei Xie 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 296*d19533e8SHuawei Xie 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 297*d19533e8SHuawei Xie 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 298*d19533e8SHuawei Xie 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 299*d19533e8SHuawei Xie 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 300*d19533e8SHuawei Xie 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 301*d19533e8SHuawei Xie 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 302*d19533e8SHuawei Xie }; 303*d19533e8SHuawei Xie 304*d19533e8SHuawei Xie /* ethernet addresses of ports */ 305*d19533e8SHuawei Xie static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS]; 306*d19533e8SHuawei Xie 307*d19533e8SHuawei Xie /* heads for the main used and free linked lists for the data path. */ 308*d19533e8SHuawei Xie static struct virtio_net_data_ll *ll_root_used = NULL; 309*d19533e8SHuawei Xie static struct virtio_net_data_ll *ll_root_free = NULL; 310*d19533e8SHuawei Xie 311*d19533e8SHuawei Xie /* Array of data core structures containing information on individual core linked lists. */ 312*d19533e8SHuawei Xie static struct lcore_info lcore_info[RTE_MAX_LCORE]; 313*d19533e8SHuawei Xie 314*d19533e8SHuawei Xie /* Used for queueing bursts of TX packets. */ 315*d19533e8SHuawei Xie struct mbuf_table { 316*d19533e8SHuawei Xie unsigned len; 317*d19533e8SHuawei Xie unsigned txq_id; 318*d19533e8SHuawei Xie struct rte_mbuf *m_table[MAX_PKT_BURST]; 319*d19533e8SHuawei Xie }; 320*d19533e8SHuawei Xie 321*d19533e8SHuawei Xie /* TX queue for each data core. */ 322*d19533e8SHuawei Xie struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; 323*d19533e8SHuawei Xie 324*d19533e8SHuawei Xie /* TX queue fori each virtio device for zero copy. */ 325*d19533e8SHuawei Xie struct mbuf_table tx_queue_zcp[MAX_QUEUES]; 326*d19533e8SHuawei Xie 327*d19533e8SHuawei Xie /* Vlan header struct used to insert vlan tags on TX. */ 328*d19533e8SHuawei Xie struct vlan_ethhdr { 329*d19533e8SHuawei Xie unsigned char h_dest[ETH_ALEN]; 330*d19533e8SHuawei Xie unsigned char h_source[ETH_ALEN]; 331*d19533e8SHuawei Xie __be16 h_vlan_proto; 332*d19533e8SHuawei Xie __be16 h_vlan_TCI; 333*d19533e8SHuawei Xie __be16 h_vlan_encapsulated_proto; 334*d19533e8SHuawei Xie }; 335*d19533e8SHuawei Xie 336*d19533e8SHuawei Xie /* IPv4 Header */ 337*d19533e8SHuawei Xie struct ipv4_hdr { 338*d19533e8SHuawei Xie uint8_t version_ihl; /**< version and header length */ 339*d19533e8SHuawei Xie uint8_t type_of_service; /**< type of service */ 340*d19533e8SHuawei Xie uint16_t total_length; /**< length of packet */ 341*d19533e8SHuawei Xie uint16_t packet_id; /**< packet ID */ 342*d19533e8SHuawei Xie uint16_t fragment_offset; /**< fragmentation offset */ 343*d19533e8SHuawei Xie uint8_t time_to_live; /**< time to live */ 344*d19533e8SHuawei Xie uint8_t next_proto_id; /**< protocol ID */ 345*d19533e8SHuawei Xie uint16_t hdr_checksum; /**< header checksum */ 346*d19533e8SHuawei Xie uint32_t src_addr; /**< source address */ 347*d19533e8SHuawei Xie uint32_t dst_addr; /**< destination address */ 348*d19533e8SHuawei Xie } __attribute__((__packed__)); 349*d19533e8SHuawei Xie 350*d19533e8SHuawei Xie /* Header lengths. */ 351*d19533e8SHuawei Xie #define VLAN_HLEN 4 352*d19533e8SHuawei Xie #define VLAN_ETH_HLEN 18 353*d19533e8SHuawei Xie 354*d19533e8SHuawei Xie /* Per-device statistics struct */ 355*d19533e8SHuawei Xie struct device_statistics { 356*d19533e8SHuawei Xie uint64_t tx_total; 357*d19533e8SHuawei Xie rte_atomic64_t rx_total_atomic; 358*d19533e8SHuawei Xie uint64_t rx_total; 359*d19533e8SHuawei Xie uint64_t tx; 360*d19533e8SHuawei Xie rte_atomic64_t rx_atomic; 361*d19533e8SHuawei Xie uint64_t rx; 362*d19533e8SHuawei Xie } __rte_cache_aligned; 363*d19533e8SHuawei Xie struct device_statistics dev_statistics[MAX_DEVICES]; 364*d19533e8SHuawei Xie 365*d19533e8SHuawei Xie /* 366*d19533e8SHuawei Xie * Builds up the correct configuration for VMDQ VLAN pool map 367*d19533e8SHuawei Xie * according to the pool & queue limits. 368*d19533e8SHuawei Xie */ 369*d19533e8SHuawei Xie static inline int 370*d19533e8SHuawei Xie get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices) 371*d19533e8SHuawei Xie { 372*d19533e8SHuawei Xie struct rte_eth_vmdq_rx_conf conf; 373*d19533e8SHuawei Xie unsigned i; 374*d19533e8SHuawei Xie 375*d19533e8SHuawei Xie memset(&conf, 0, sizeof(conf)); 376*d19533e8SHuawei Xie conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices; 377*d19533e8SHuawei Xie conf.nb_pool_maps = num_devices; 378*d19533e8SHuawei Xie conf.enable_loop_back = 379*d19533e8SHuawei Xie vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back; 380*d19533e8SHuawei Xie 381*d19533e8SHuawei Xie for (i = 0; i < conf.nb_pool_maps; i++) { 382*d19533e8SHuawei Xie conf.pool_map[i].vlan_id = vlan_tags[ i ]; 383*d19533e8SHuawei Xie conf.pool_map[i].pools = (1UL << i); 384*d19533e8SHuawei Xie } 385*d19533e8SHuawei Xie 386*d19533e8SHuawei Xie (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); 387*d19533e8SHuawei Xie (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, 388*d19533e8SHuawei Xie sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); 389*d19533e8SHuawei Xie return 0; 390*d19533e8SHuawei Xie } 391*d19533e8SHuawei Xie 392*d19533e8SHuawei Xie /* 393*d19533e8SHuawei Xie * Validate the device number according to the max pool number gotten form 394*d19533e8SHuawei Xie * dev_info. If the device number is invalid, give the error message and 395*d19533e8SHuawei Xie * return -1. Each device must have its own pool. 396*d19533e8SHuawei Xie */ 397*d19533e8SHuawei Xie static inline int 398*d19533e8SHuawei Xie validate_num_devices(uint32_t max_nb_devices) 399*d19533e8SHuawei Xie { 400*d19533e8SHuawei Xie if (num_devices > max_nb_devices) { 401*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n"); 402*d19533e8SHuawei Xie return -1; 403*d19533e8SHuawei Xie } 404*d19533e8SHuawei Xie return 0; 405*d19533e8SHuawei Xie } 406*d19533e8SHuawei Xie 407*d19533e8SHuawei Xie /* 408*d19533e8SHuawei Xie * Initialises a given port using global settings and with the rx buffers 409*d19533e8SHuawei Xie * coming from the mbuf_pool passed as parameter 410*d19533e8SHuawei Xie */ 411*d19533e8SHuawei Xie static inline int 412*d19533e8SHuawei Xie port_init(uint8_t port) 413*d19533e8SHuawei Xie { 414*d19533e8SHuawei Xie struct rte_eth_dev_info dev_info; 415*d19533e8SHuawei Xie struct rte_eth_conf port_conf; 416*d19533e8SHuawei Xie uint16_t rx_rings, tx_rings; 417*d19533e8SHuawei Xie uint16_t rx_ring_size, tx_ring_size; 418*d19533e8SHuawei Xie int retval; 419*d19533e8SHuawei Xie uint16_t q; 420*d19533e8SHuawei Xie 421*d19533e8SHuawei Xie /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */ 422*d19533e8SHuawei Xie rte_eth_dev_info_get (port, &dev_info); 423*d19533e8SHuawei Xie 424*d19533e8SHuawei Xie /*configure the number of supported virtio devices based on VMDQ limits */ 425*d19533e8SHuawei Xie num_devices = dev_info.max_vmdq_pools; 426*d19533e8SHuawei Xie num_queues = dev_info.max_rx_queues; 427*d19533e8SHuawei Xie 428*d19533e8SHuawei Xie if (zero_copy) { 429*d19533e8SHuawei Xie rx_ring_size = num_rx_descriptor; 430*d19533e8SHuawei Xie tx_ring_size = num_tx_descriptor; 431*d19533e8SHuawei Xie tx_rings = dev_info.max_tx_queues; 432*d19533e8SHuawei Xie } else { 433*d19533e8SHuawei Xie rx_ring_size = RTE_TEST_RX_DESC_DEFAULT; 434*d19533e8SHuawei Xie tx_ring_size = RTE_TEST_TX_DESC_DEFAULT; 435*d19533e8SHuawei Xie tx_rings = (uint16_t)rte_lcore_count(); 436*d19533e8SHuawei Xie } 437*d19533e8SHuawei Xie 438*d19533e8SHuawei Xie retval = validate_num_devices(MAX_DEVICES); 439*d19533e8SHuawei Xie if (retval < 0) 440*d19533e8SHuawei Xie return retval; 441*d19533e8SHuawei Xie 442*d19533e8SHuawei Xie /* Get port configuration. */ 443*d19533e8SHuawei Xie retval = get_eth_conf(&port_conf, num_devices); 444*d19533e8SHuawei Xie if (retval < 0) 445*d19533e8SHuawei Xie return retval; 446*d19533e8SHuawei Xie 447*d19533e8SHuawei Xie if (port >= rte_eth_dev_count()) return -1; 448*d19533e8SHuawei Xie 449*d19533e8SHuawei Xie rx_rings = (uint16_t)num_queues, 450*d19533e8SHuawei Xie /* Configure ethernet device. */ 451*d19533e8SHuawei Xie retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); 452*d19533e8SHuawei Xie if (retval != 0) 453*d19533e8SHuawei Xie return retval; 454*d19533e8SHuawei Xie 455*d19533e8SHuawei Xie /* Setup the queues. */ 456*d19533e8SHuawei Xie for (q = 0; q < rx_rings; q ++) { 457*d19533e8SHuawei Xie retval = rte_eth_rx_queue_setup(port, q, rx_ring_size, 458*d19533e8SHuawei Xie rte_eth_dev_socket_id(port), &rx_conf_default, 459*d19533e8SHuawei Xie vpool_array[q].pool); 460*d19533e8SHuawei Xie if (retval < 0) 461*d19533e8SHuawei Xie return retval; 462*d19533e8SHuawei Xie } 463*d19533e8SHuawei Xie for (q = 0; q < tx_rings; q ++) { 464*d19533e8SHuawei Xie retval = rte_eth_tx_queue_setup(port, q, tx_ring_size, 465*d19533e8SHuawei Xie rte_eth_dev_socket_id(port), &tx_conf_default); 466*d19533e8SHuawei Xie if (retval < 0) 467*d19533e8SHuawei Xie return retval; 468*d19533e8SHuawei Xie } 469*d19533e8SHuawei Xie 470*d19533e8SHuawei Xie /* Start the device. */ 471*d19533e8SHuawei Xie retval = rte_eth_dev_start(port); 472*d19533e8SHuawei Xie if (retval < 0) { 473*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, "Failed to start the device.\n"); 474*d19533e8SHuawei Xie return retval; 475*d19533e8SHuawei Xie } 476*d19533e8SHuawei Xie 477*d19533e8SHuawei Xie rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]); 478*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices); 479*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8 480*d19533e8SHuawei Xie " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n", 481*d19533e8SHuawei Xie (unsigned)port, 482*d19533e8SHuawei Xie vmdq_ports_eth_addr[port].addr_bytes[0], 483*d19533e8SHuawei Xie vmdq_ports_eth_addr[port].addr_bytes[1], 484*d19533e8SHuawei Xie vmdq_ports_eth_addr[port].addr_bytes[2], 485*d19533e8SHuawei Xie vmdq_ports_eth_addr[port].addr_bytes[3], 486*d19533e8SHuawei Xie vmdq_ports_eth_addr[port].addr_bytes[4], 487*d19533e8SHuawei Xie vmdq_ports_eth_addr[port].addr_bytes[5]); 488*d19533e8SHuawei Xie 489*d19533e8SHuawei Xie return 0; 490*d19533e8SHuawei Xie } 491*d19533e8SHuawei Xie 492*d19533e8SHuawei Xie /* 493*d19533e8SHuawei Xie * Set character device basename. 494*d19533e8SHuawei Xie */ 495*d19533e8SHuawei Xie static int 496*d19533e8SHuawei Xie us_vhost_parse_basename(const char *q_arg) 497*d19533e8SHuawei Xie { 498*d19533e8SHuawei Xie /* parse number string */ 499*d19533e8SHuawei Xie 500*d19533e8SHuawei Xie if (strnlen(q_arg, MAX_BASENAME_SZ) > MAX_BASENAME_SZ) 501*d19533e8SHuawei Xie return -1; 502*d19533e8SHuawei Xie else 503*d19533e8SHuawei Xie snprintf((char*)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg); 504*d19533e8SHuawei Xie 505*d19533e8SHuawei Xie return 0; 506*d19533e8SHuawei Xie } 507*d19533e8SHuawei Xie 508*d19533e8SHuawei Xie /* 509*d19533e8SHuawei Xie * Parse the portmask provided at run time. 510*d19533e8SHuawei Xie */ 511*d19533e8SHuawei Xie static int 512*d19533e8SHuawei Xie parse_portmask(const char *portmask) 513*d19533e8SHuawei Xie { 514*d19533e8SHuawei Xie char *end = NULL; 515*d19533e8SHuawei Xie unsigned long pm; 516*d19533e8SHuawei Xie 517*d19533e8SHuawei Xie errno = 0; 518*d19533e8SHuawei Xie 519*d19533e8SHuawei Xie /* parse hexadecimal string */ 520*d19533e8SHuawei Xie pm = strtoul(portmask, &end, 16); 521*d19533e8SHuawei Xie if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) 522*d19533e8SHuawei Xie return -1; 523*d19533e8SHuawei Xie 524*d19533e8SHuawei Xie if (pm == 0) 525*d19533e8SHuawei Xie return -1; 526*d19533e8SHuawei Xie 527*d19533e8SHuawei Xie return pm; 528*d19533e8SHuawei Xie 529*d19533e8SHuawei Xie } 530*d19533e8SHuawei Xie 531*d19533e8SHuawei Xie /* 532*d19533e8SHuawei Xie * Parse num options at run time. 533*d19533e8SHuawei Xie */ 534*d19533e8SHuawei Xie static int 535*d19533e8SHuawei Xie parse_num_opt(const char *q_arg, uint32_t max_valid_value) 536*d19533e8SHuawei Xie { 537*d19533e8SHuawei Xie char *end = NULL; 538*d19533e8SHuawei Xie unsigned long num; 539*d19533e8SHuawei Xie 540*d19533e8SHuawei Xie errno = 0; 541*d19533e8SHuawei Xie 542*d19533e8SHuawei Xie /* parse unsigned int string */ 543*d19533e8SHuawei Xie num = strtoul(q_arg, &end, 10); 544*d19533e8SHuawei Xie if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0)) 545*d19533e8SHuawei Xie return -1; 546*d19533e8SHuawei Xie 547*d19533e8SHuawei Xie if (num > max_valid_value) 548*d19533e8SHuawei Xie return -1; 549*d19533e8SHuawei Xie 550*d19533e8SHuawei Xie return num; 551*d19533e8SHuawei Xie 552*d19533e8SHuawei Xie } 553*d19533e8SHuawei Xie 554*d19533e8SHuawei Xie /* 555*d19533e8SHuawei Xie * Display usage 556*d19533e8SHuawei Xie */ 557*d19533e8SHuawei Xie static void 558*d19533e8SHuawei Xie us_vhost_usage(const char *prgname) 559*d19533e8SHuawei Xie { 560*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n" 561*d19533e8SHuawei Xie " --vm2vm [0|1|2]\n" 562*d19533e8SHuawei Xie " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n" 563*d19533e8SHuawei Xie " --dev-basename <name> --dev-index [0-N]\n" 564*d19533e8SHuawei Xie " --nb-devices ND\n" 565*d19533e8SHuawei Xie " -p PORTMASK: Set mask for ports to be used by application\n" 566*d19533e8SHuawei Xie " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n" 567*d19533e8SHuawei Xie " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n" 568*d19533e8SHuawei Xie " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n" 569*d19533e8SHuawei Xie " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n" 570*d19533e8SHuawei Xie " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n" 571*d19533e8SHuawei Xie " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n" 572*d19533e8SHuawei Xie " --dev-basename: The basename to be used for the character device.\n" 573*d19533e8SHuawei Xie " --dev-index [0-N]: Defaults to zero if not used. Index is appended to basename.\n" 574*d19533e8SHuawei Xie " --zero-copy [0|1]: disable(default)/enable rx/tx " 575*d19533e8SHuawei Xie "zero copy\n" 576*d19533e8SHuawei Xie " --rx-desc-num [0-N]: the number of descriptors on rx, " 577*d19533e8SHuawei Xie "used only when zero copy is enabled.\n" 578*d19533e8SHuawei Xie " --tx-desc-num [0-N]: the number of descriptors on tx, " 579*d19533e8SHuawei Xie "used only when zero copy is enabled.\n", 580*d19533e8SHuawei Xie prgname); 581*d19533e8SHuawei Xie } 582*d19533e8SHuawei Xie 583*d19533e8SHuawei Xie /* 584*d19533e8SHuawei Xie * Parse the arguments given in the command line of the application. 585*d19533e8SHuawei Xie */ 586*d19533e8SHuawei Xie static int 587*d19533e8SHuawei Xie us_vhost_parse_args(int argc, char **argv) 588*d19533e8SHuawei Xie { 589*d19533e8SHuawei Xie int opt, ret; 590*d19533e8SHuawei Xie int option_index; 591*d19533e8SHuawei Xie unsigned i; 592*d19533e8SHuawei Xie const char *prgname = argv[0]; 593*d19533e8SHuawei Xie static struct option long_option[] = { 594*d19533e8SHuawei Xie {"vm2vm", required_argument, NULL, 0}, 595*d19533e8SHuawei Xie {"rx-retry", required_argument, NULL, 0}, 596*d19533e8SHuawei Xie {"rx-retry-delay", required_argument, NULL, 0}, 597*d19533e8SHuawei Xie {"rx-retry-num", required_argument, NULL, 0}, 598*d19533e8SHuawei Xie {"mergeable", required_argument, NULL, 0}, 599*d19533e8SHuawei Xie {"stats", required_argument, NULL, 0}, 600*d19533e8SHuawei Xie {"dev-basename", required_argument, NULL, 0}, 601*d19533e8SHuawei Xie {"dev-index", required_argument, NULL, 0}, 602*d19533e8SHuawei Xie {"zero-copy", required_argument, NULL, 0}, 603*d19533e8SHuawei Xie {"rx-desc-num", required_argument, NULL, 0}, 604*d19533e8SHuawei Xie {"tx-desc-num", required_argument, NULL, 0}, 605*d19533e8SHuawei Xie {NULL, 0, 0, 0}, 606*d19533e8SHuawei Xie }; 607*d19533e8SHuawei Xie 608*d19533e8SHuawei Xie /* Parse command line */ 609*d19533e8SHuawei Xie while ((opt = getopt_long(argc, argv, "p:",long_option, &option_index)) != EOF) { 610*d19533e8SHuawei Xie switch (opt) { 611*d19533e8SHuawei Xie /* Portmask */ 612*d19533e8SHuawei Xie case 'p': 613*d19533e8SHuawei Xie enabled_port_mask = parse_portmask(optarg); 614*d19533e8SHuawei Xie if (enabled_port_mask == 0) { 615*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n"); 616*d19533e8SHuawei Xie us_vhost_usage(prgname); 617*d19533e8SHuawei Xie return -1; 618*d19533e8SHuawei Xie } 619*d19533e8SHuawei Xie break; 620*d19533e8SHuawei Xie 621*d19533e8SHuawei Xie case 0: 622*d19533e8SHuawei Xie /* Enable/disable vm2vm comms. */ 623*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "vm2vm", 624*d19533e8SHuawei Xie MAX_LONG_OPT_SZ)) { 625*d19533e8SHuawei Xie ret = parse_num_opt(optarg, (VM2VM_LAST - 1)); 626*d19533e8SHuawei Xie if (ret == -1) { 627*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, 628*d19533e8SHuawei Xie "Invalid argument for " 629*d19533e8SHuawei Xie "vm2vm [0|1|2]\n"); 630*d19533e8SHuawei Xie us_vhost_usage(prgname); 631*d19533e8SHuawei Xie return -1; 632*d19533e8SHuawei Xie } else { 633*d19533e8SHuawei Xie vm2vm_mode = (vm2vm_type)ret; 634*d19533e8SHuawei Xie } 635*d19533e8SHuawei Xie } 636*d19533e8SHuawei Xie 637*d19533e8SHuawei Xie /* Enable/disable retries on RX. */ 638*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) { 639*d19533e8SHuawei Xie ret = parse_num_opt(optarg, 1); 640*d19533e8SHuawei Xie if (ret == -1) { 641*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n"); 642*d19533e8SHuawei Xie us_vhost_usage(prgname); 643*d19533e8SHuawei Xie return -1; 644*d19533e8SHuawei Xie } else { 645*d19533e8SHuawei Xie enable_retry = ret; 646*d19533e8SHuawei Xie } 647*d19533e8SHuawei Xie } 648*d19533e8SHuawei Xie 649*d19533e8SHuawei Xie /* Specify the retries delay time (in useconds) on RX. */ 650*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) { 651*d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 652*d19533e8SHuawei Xie if (ret == -1) { 653*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n"); 654*d19533e8SHuawei Xie us_vhost_usage(prgname); 655*d19533e8SHuawei Xie return -1; 656*d19533e8SHuawei Xie } else { 657*d19533e8SHuawei Xie burst_rx_delay_time = ret; 658*d19533e8SHuawei Xie } 659*d19533e8SHuawei Xie } 660*d19533e8SHuawei Xie 661*d19533e8SHuawei Xie /* Specify the retries number on RX. */ 662*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) { 663*d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 664*d19533e8SHuawei Xie if (ret == -1) { 665*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n"); 666*d19533e8SHuawei Xie us_vhost_usage(prgname); 667*d19533e8SHuawei Xie return -1; 668*d19533e8SHuawei Xie } else { 669*d19533e8SHuawei Xie burst_rx_retry_num = ret; 670*d19533e8SHuawei Xie } 671*d19533e8SHuawei Xie } 672*d19533e8SHuawei Xie 673*d19533e8SHuawei Xie /* Enable/disable RX mergeable buffers. */ 674*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) { 675*d19533e8SHuawei Xie ret = parse_num_opt(optarg, 1); 676*d19533e8SHuawei Xie if (ret == -1) { 677*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n"); 678*d19533e8SHuawei Xie us_vhost_usage(prgname); 679*d19533e8SHuawei Xie return -1; 680*d19533e8SHuawei Xie } else { 681*d19533e8SHuawei Xie if (ret) { 682*d19533e8SHuawei Xie vmdq_conf_default.rxmode.jumbo_frame = 1; 683*d19533e8SHuawei Xie vmdq_conf_default.rxmode.max_rx_pkt_len 684*d19533e8SHuawei Xie = JUMBO_FRAME_MAX_SIZE; 685*d19533e8SHuawei Xie VHOST_FEATURES = (1ULL << VIRTIO_NET_F_MRG_RXBUF); 686*d19533e8SHuawei Xie } 687*d19533e8SHuawei Xie } 688*d19533e8SHuawei Xie } 689*d19533e8SHuawei Xie 690*d19533e8SHuawei Xie /* Enable/disable stats. */ 691*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) { 692*d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 693*d19533e8SHuawei Xie if (ret == -1) { 694*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for stats [0..N]\n"); 695*d19533e8SHuawei Xie us_vhost_usage(prgname); 696*d19533e8SHuawei Xie return -1; 697*d19533e8SHuawei Xie } else { 698*d19533e8SHuawei Xie enable_stats = ret; 699*d19533e8SHuawei Xie } 700*d19533e8SHuawei Xie } 701*d19533e8SHuawei Xie 702*d19533e8SHuawei Xie /* Set character device basename. */ 703*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "dev-basename", MAX_LONG_OPT_SZ)) { 704*d19533e8SHuawei Xie if (us_vhost_parse_basename(optarg) == -1) { 705*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for character device basename (Max %d characters)\n", MAX_BASENAME_SZ); 706*d19533e8SHuawei Xie us_vhost_usage(prgname); 707*d19533e8SHuawei Xie return -1; 708*d19533e8SHuawei Xie } 709*d19533e8SHuawei Xie } 710*d19533e8SHuawei Xie 711*d19533e8SHuawei Xie /* Set character device index. */ 712*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, "dev-index", MAX_LONG_OPT_SZ)) { 713*d19533e8SHuawei Xie ret = parse_num_opt(optarg, INT32_MAX); 714*d19533e8SHuawei Xie if (ret == -1) { 715*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for character device index [0..N]\n"); 716*d19533e8SHuawei Xie us_vhost_usage(prgname); 717*d19533e8SHuawei Xie return -1; 718*d19533e8SHuawei Xie } else 719*d19533e8SHuawei Xie dev_index = ret; 720*d19533e8SHuawei Xie } 721*d19533e8SHuawei Xie 722*d19533e8SHuawei Xie /* Enable/disable rx/tx zero copy. */ 723*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, 724*d19533e8SHuawei Xie "zero-copy", MAX_LONG_OPT_SZ)) { 725*d19533e8SHuawei Xie ret = parse_num_opt(optarg, 1); 726*d19533e8SHuawei Xie if (ret == -1) { 727*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, 728*d19533e8SHuawei Xie "Invalid argument" 729*d19533e8SHuawei Xie " for zero-copy [0|1]\n"); 730*d19533e8SHuawei Xie us_vhost_usage(prgname); 731*d19533e8SHuawei Xie return -1; 732*d19533e8SHuawei Xie } else 733*d19533e8SHuawei Xie zero_copy = ret; 734*d19533e8SHuawei Xie 735*d19533e8SHuawei Xie if (zero_copy) { 736*d19533e8SHuawei Xie #ifdef RTE_MBUF_REFCNT 737*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_CONFIG, "Before running " 738*d19533e8SHuawei Xie "zero copy vhost APP, please " 739*d19533e8SHuawei Xie "disable RTE_MBUF_REFCNT\n" 740*d19533e8SHuawei Xie "in config file and then rebuild DPDK " 741*d19533e8SHuawei Xie "core lib!\n" 742*d19533e8SHuawei Xie "Otherwise please disable zero copy " 743*d19533e8SHuawei Xie "flag in command line!\n"); 744*d19533e8SHuawei Xie return -1; 745*d19533e8SHuawei Xie #endif 746*d19533e8SHuawei Xie } 747*d19533e8SHuawei Xie } 748*d19533e8SHuawei Xie 749*d19533e8SHuawei Xie /* Specify the descriptor number on RX. */ 750*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, 751*d19533e8SHuawei Xie "rx-desc-num", MAX_LONG_OPT_SZ)) { 752*d19533e8SHuawei Xie ret = parse_num_opt(optarg, MAX_RING_DESC); 753*d19533e8SHuawei Xie if ((ret == -1) || (!POWEROF2(ret))) { 754*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, 755*d19533e8SHuawei Xie "Invalid argument for rx-desc-num[0-N]," 756*d19533e8SHuawei Xie "power of 2 required.\n"); 757*d19533e8SHuawei Xie us_vhost_usage(prgname); 758*d19533e8SHuawei Xie return -1; 759*d19533e8SHuawei Xie } else { 760*d19533e8SHuawei Xie num_rx_descriptor = ret; 761*d19533e8SHuawei Xie } 762*d19533e8SHuawei Xie } 763*d19533e8SHuawei Xie 764*d19533e8SHuawei Xie /* Specify the descriptor number on TX. */ 765*d19533e8SHuawei Xie if (!strncmp(long_option[option_index].name, 766*d19533e8SHuawei Xie "tx-desc-num", MAX_LONG_OPT_SZ)) { 767*d19533e8SHuawei Xie ret = parse_num_opt(optarg, MAX_RING_DESC); 768*d19533e8SHuawei Xie if ((ret == -1) || (!POWEROF2(ret))) { 769*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_CONFIG, 770*d19533e8SHuawei Xie "Invalid argument for tx-desc-num [0-N]," 771*d19533e8SHuawei Xie "power of 2 required.\n"); 772*d19533e8SHuawei Xie us_vhost_usage(prgname); 773*d19533e8SHuawei Xie return -1; 774*d19533e8SHuawei Xie } else { 775*d19533e8SHuawei Xie num_tx_descriptor = ret; 776*d19533e8SHuawei Xie } 777*d19533e8SHuawei Xie } 778*d19533e8SHuawei Xie 779*d19533e8SHuawei Xie break; 780*d19533e8SHuawei Xie 781*d19533e8SHuawei Xie /* Invalid option - print options. */ 782*d19533e8SHuawei Xie default: 783*d19533e8SHuawei Xie us_vhost_usage(prgname); 784*d19533e8SHuawei Xie return -1; 785*d19533e8SHuawei Xie } 786*d19533e8SHuawei Xie } 787*d19533e8SHuawei Xie 788*d19533e8SHuawei Xie for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 789*d19533e8SHuawei Xie if (enabled_port_mask & (1 << i)) 790*d19533e8SHuawei Xie ports[num_ports++] = (uint8_t)i; 791*d19533e8SHuawei Xie } 792*d19533e8SHuawei Xie 793*d19533e8SHuawei Xie if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) { 794*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," 795*d19533e8SHuawei Xie "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); 796*d19533e8SHuawei Xie return -1; 797*d19533e8SHuawei Xie } 798*d19533e8SHuawei Xie 799*d19533e8SHuawei Xie if ((zero_copy == 1) && (vm2vm_mode == VM2VM_SOFTWARE)) { 800*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, 801*d19533e8SHuawei Xie "Vhost zero copy doesn't support software vm2vm," 802*d19533e8SHuawei Xie "please specify 'vm2vm 2' to use hardware vm2vm.\n"); 803*d19533e8SHuawei Xie return -1; 804*d19533e8SHuawei Xie } 805*d19533e8SHuawei Xie 806*d19533e8SHuawei Xie if ((zero_copy == 1) && (vmdq_conf_default.rxmode.jumbo_frame == 1)) { 807*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, 808*d19533e8SHuawei Xie "Vhost zero copy doesn't support jumbo frame," 809*d19533e8SHuawei Xie "please specify '--mergeable 0' to disable the " 810*d19533e8SHuawei Xie "mergeable feature.\n"); 811*d19533e8SHuawei Xie return -1; 812*d19533e8SHuawei Xie } 813*d19533e8SHuawei Xie 814*d19533e8SHuawei Xie return 0; 815*d19533e8SHuawei Xie } 816*d19533e8SHuawei Xie 817*d19533e8SHuawei Xie /* 818*d19533e8SHuawei Xie * Update the global var NUM_PORTS and array PORTS according to system ports number 819*d19533e8SHuawei Xie * and return valid ports number 820*d19533e8SHuawei Xie */ 821*d19533e8SHuawei Xie static unsigned check_ports_num(unsigned nb_ports) 822*d19533e8SHuawei Xie { 823*d19533e8SHuawei Xie unsigned valid_num_ports = num_ports; 824*d19533e8SHuawei Xie unsigned portid; 825*d19533e8SHuawei Xie 826*d19533e8SHuawei Xie if (num_ports > nb_ports) { 827*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n", 828*d19533e8SHuawei Xie num_ports, nb_ports); 829*d19533e8SHuawei Xie num_ports = nb_ports; 830*d19533e8SHuawei Xie } 831*d19533e8SHuawei Xie 832*d19533e8SHuawei Xie for (portid = 0; portid < num_ports; portid ++) { 833*d19533e8SHuawei Xie if (ports[portid] >= nb_ports) { 834*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n", 835*d19533e8SHuawei Xie ports[portid], (nb_ports - 1)); 836*d19533e8SHuawei Xie ports[portid] = INVALID_PORT_ID; 837*d19533e8SHuawei Xie valid_num_ports--; 838*d19533e8SHuawei Xie } 839*d19533e8SHuawei Xie } 840*d19533e8SHuawei Xie return valid_num_ports; 841*d19533e8SHuawei Xie } 842*d19533e8SHuawei Xie 843*d19533e8SHuawei Xie /* 844*d19533e8SHuawei Xie * Macro to print out packet contents. Wrapped in debug define so that the 845*d19533e8SHuawei Xie * data path is not effected when debug is disabled. 846*d19533e8SHuawei Xie */ 847*d19533e8SHuawei Xie #ifdef DEBUG 848*d19533e8SHuawei Xie #define PRINT_PACKET(device, addr, size, header) do { \ 849*d19533e8SHuawei Xie char *pkt_addr = (char*)(addr); \ 850*d19533e8SHuawei Xie unsigned int index; \ 851*d19533e8SHuawei Xie char packet[MAX_PRINT_BUFF]; \ 852*d19533e8SHuawei Xie \ 853*d19533e8SHuawei Xie if ((header)) \ 854*d19533e8SHuawei Xie snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \ 855*d19533e8SHuawei Xie else \ 856*d19533e8SHuawei Xie snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \ 857*d19533e8SHuawei Xie for (index = 0; index < (size); index++) { \ 858*d19533e8SHuawei Xie snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \ 859*d19533e8SHuawei Xie "%02hhx ", pkt_addr[index]); \ 860*d19533e8SHuawei Xie } \ 861*d19533e8SHuawei Xie snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), "\n"); \ 862*d19533e8SHuawei Xie \ 863*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "%s", packet); \ 864*d19533e8SHuawei Xie } while(0) 865*d19533e8SHuawei Xie #else 866*d19533e8SHuawei Xie #define PRINT_PACKET(device, addr, size, header) do{} while(0) 867*d19533e8SHuawei Xie #endif 868*d19533e8SHuawei Xie 869*d19533e8SHuawei Xie /* 870*d19533e8SHuawei Xie * Function to convert guest physical addresses to vhost virtual addresses. This 871*d19533e8SHuawei Xie * is used to convert virtio buffer addresses. 872*d19533e8SHuawei Xie */ 873*d19533e8SHuawei Xie static inline uint64_t __attribute__((always_inline)) 874*d19533e8SHuawei Xie gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) 875*d19533e8SHuawei Xie { 876*d19533e8SHuawei Xie struct virtio_memory_regions *region; 877*d19533e8SHuawei Xie uint32_t regionidx; 878*d19533e8SHuawei Xie uint64_t vhost_va = 0; 879*d19533e8SHuawei Xie 880*d19533e8SHuawei Xie for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { 881*d19533e8SHuawei Xie region = &dev->mem->regions[regionidx]; 882*d19533e8SHuawei Xie if ((guest_pa >= region->guest_phys_address) && 883*d19533e8SHuawei Xie (guest_pa <= region->guest_phys_address_end)) { 884*d19533e8SHuawei Xie vhost_va = region->address_offset + guest_pa; 885*d19533e8SHuawei Xie break; 886*d19533e8SHuawei Xie } 887*d19533e8SHuawei Xie } 888*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| VVA %p\n", 889*d19533e8SHuawei Xie dev->device_fh, (void*)(uintptr_t)guest_pa, (void*)(uintptr_t)vhost_va); 890*d19533e8SHuawei Xie 891*d19533e8SHuawei Xie return vhost_va; 892*d19533e8SHuawei Xie } 893*d19533e8SHuawei Xie 894*d19533e8SHuawei Xie /* 895*d19533e8SHuawei Xie * Function to convert guest physical addresses to vhost physical addresses. 896*d19533e8SHuawei Xie * This is used to convert virtio buffer addresses. 897*d19533e8SHuawei Xie */ 898*d19533e8SHuawei Xie static inline uint64_t __attribute__((always_inline)) 899*d19533e8SHuawei Xie gpa_to_hpa(struct virtio_net *dev, uint64_t guest_pa, 900*d19533e8SHuawei Xie uint32_t buf_len, hpa_type *addr_type) 901*d19533e8SHuawei Xie { 902*d19533e8SHuawei Xie struct virtio_memory_regions_hpa *region; 903*d19533e8SHuawei Xie uint32_t regionidx; 904*d19533e8SHuawei Xie uint64_t vhost_pa = 0; 905*d19533e8SHuawei Xie 906*d19533e8SHuawei Xie *addr_type = PHYS_ADDR_INVALID; 907*d19533e8SHuawei Xie 908*d19533e8SHuawei Xie for (regionidx = 0; regionidx < dev->mem->nregions_hpa; regionidx++) { 909*d19533e8SHuawei Xie region = &dev->mem->regions_hpa[regionidx]; 910*d19533e8SHuawei Xie if ((guest_pa >= region->guest_phys_address) && 911*d19533e8SHuawei Xie (guest_pa <= region->guest_phys_address_end)) { 912*d19533e8SHuawei Xie vhost_pa = region->host_phys_addr_offset + guest_pa; 913*d19533e8SHuawei Xie if (likely((guest_pa + buf_len - 1) 914*d19533e8SHuawei Xie <= region->guest_phys_address_end)) 915*d19533e8SHuawei Xie *addr_type = PHYS_ADDR_CONTINUOUS; 916*d19533e8SHuawei Xie else 917*d19533e8SHuawei Xie *addr_type = PHYS_ADDR_CROSS_SUBREG; 918*d19533e8SHuawei Xie break; 919*d19533e8SHuawei Xie } 920*d19533e8SHuawei Xie } 921*d19533e8SHuawei Xie 922*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| HPA %p\n", 923*d19533e8SHuawei Xie dev->device_fh, (void *)(uintptr_t)guest_pa, 924*d19533e8SHuawei Xie (void *)(uintptr_t)vhost_pa); 925*d19533e8SHuawei Xie 926*d19533e8SHuawei Xie return vhost_pa; 927*d19533e8SHuawei Xie } 928*d19533e8SHuawei Xie 929*d19533e8SHuawei Xie /* 930*d19533e8SHuawei Xie * This function adds buffers to the virtio devices RX virtqueue. Buffers can 931*d19533e8SHuawei Xie * be received from the physical port or from another virtio device. A packet 932*d19533e8SHuawei Xie * count is returned to indicate the number of packets that were succesfully 933*d19533e8SHuawei Xie * added to the RX queue. This function works when mergeable is disabled. 934*d19533e8SHuawei Xie */ 935*d19533e8SHuawei Xie static inline uint32_t __attribute__((always_inline)) 936*d19533e8SHuawei Xie virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count) 937*d19533e8SHuawei Xie { 938*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 939*d19533e8SHuawei Xie struct vring_desc *desc; 940*d19533e8SHuawei Xie struct rte_mbuf *buff; 941*d19533e8SHuawei Xie /* The virtio_hdr is initialised to 0. */ 942*d19533e8SHuawei Xie struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0}; 943*d19533e8SHuawei Xie uint64_t buff_addr = 0; 944*d19533e8SHuawei Xie uint64_t buff_hdr_addr = 0; 945*d19533e8SHuawei Xie uint32_t head[MAX_PKT_BURST], packet_len = 0; 946*d19533e8SHuawei Xie uint32_t head_idx, packet_success = 0; 947*d19533e8SHuawei Xie uint32_t retry = 0; 948*d19533e8SHuawei Xie uint16_t avail_idx, res_cur_idx; 949*d19533e8SHuawei Xie uint16_t res_base_idx, res_end_idx; 950*d19533e8SHuawei Xie uint16_t free_entries; 951*d19533e8SHuawei Xie uint8_t success = 0; 952*d19533e8SHuawei Xie 953*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); 954*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_RXQ]; 955*d19533e8SHuawei Xie count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; 956*d19533e8SHuawei Xie 957*d19533e8SHuawei Xie /* As many data cores may want access to available buffers, they need to be reserved. */ 958*d19533e8SHuawei Xie do { 959*d19533e8SHuawei Xie res_base_idx = vq->last_used_idx_res; 960*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 961*d19533e8SHuawei Xie 962*d19533e8SHuawei Xie free_entries = (avail_idx - res_base_idx); 963*d19533e8SHuawei Xie /* If retry is enabled and the queue is full then we wait and retry to avoid packet loss. */ 964*d19533e8SHuawei Xie if (enable_retry && unlikely(count > free_entries)) { 965*d19533e8SHuawei Xie for (retry = 0; retry < burst_rx_retry_num; retry++) { 966*d19533e8SHuawei Xie rte_delay_us(burst_rx_delay_time); 967*d19533e8SHuawei Xie avail_idx = 968*d19533e8SHuawei Xie *((volatile uint16_t *)&vq->avail->idx); 969*d19533e8SHuawei Xie free_entries = (avail_idx - res_base_idx); 970*d19533e8SHuawei Xie if (count <= free_entries) 971*d19533e8SHuawei Xie break; 972*d19533e8SHuawei Xie } 973*d19533e8SHuawei Xie } 974*d19533e8SHuawei Xie 975*d19533e8SHuawei Xie /*check that we have enough buffers*/ 976*d19533e8SHuawei Xie if (unlikely(count > free_entries)) 977*d19533e8SHuawei Xie count = free_entries; 978*d19533e8SHuawei Xie 979*d19533e8SHuawei Xie if (count == 0) 980*d19533e8SHuawei Xie return 0; 981*d19533e8SHuawei Xie 982*d19533e8SHuawei Xie res_end_idx = res_base_idx + count; 983*d19533e8SHuawei Xie /* vq->last_used_idx_res is atomically updated. */ 984*d19533e8SHuawei Xie success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx, 985*d19533e8SHuawei Xie res_end_idx); 986*d19533e8SHuawei Xie } while (unlikely(success == 0)); 987*d19533e8SHuawei Xie res_cur_idx = res_base_idx; 988*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx); 989*d19533e8SHuawei Xie 990*d19533e8SHuawei Xie /* Prefetch available ring to retrieve indexes. */ 991*d19533e8SHuawei Xie rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]); 992*d19533e8SHuawei Xie 993*d19533e8SHuawei Xie /* Retrieve all of the head indexes first to avoid caching issues. */ 994*d19533e8SHuawei Xie for (head_idx = 0; head_idx < count; head_idx++) 995*d19533e8SHuawei Xie head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)]; 996*d19533e8SHuawei Xie 997*d19533e8SHuawei Xie /*Prefetch descriptor index. */ 998*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success]]); 999*d19533e8SHuawei Xie 1000*d19533e8SHuawei Xie while (res_cur_idx != res_end_idx) { 1001*d19533e8SHuawei Xie /* Get descriptor from available ring */ 1002*d19533e8SHuawei Xie desc = &vq->desc[head[packet_success]]; 1003*d19533e8SHuawei Xie 1004*d19533e8SHuawei Xie buff = pkts[packet_success]; 1005*d19533e8SHuawei Xie 1006*d19533e8SHuawei Xie /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */ 1007*d19533e8SHuawei Xie buff_addr = gpa_to_vva(dev, desc->addr); 1008*d19533e8SHuawei Xie /* Prefetch buffer address. */ 1009*d19533e8SHuawei Xie rte_prefetch0((void*)(uintptr_t)buff_addr); 1010*d19533e8SHuawei Xie 1011*d19533e8SHuawei Xie /* Copy virtio_hdr to packet and increment buffer address */ 1012*d19533e8SHuawei Xie buff_hdr_addr = buff_addr; 1013*d19533e8SHuawei Xie packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; 1014*d19533e8SHuawei Xie 1015*d19533e8SHuawei Xie /* 1016*d19533e8SHuawei Xie * If the descriptors are chained the header and data are 1017*d19533e8SHuawei Xie * placed in separate buffers. 1018*d19533e8SHuawei Xie */ 1019*d19533e8SHuawei Xie if (desc->flags & VRING_DESC_F_NEXT) { 1020*d19533e8SHuawei Xie desc->len = vq->vhost_hlen; 1021*d19533e8SHuawei Xie desc = &vq->desc[desc->next]; 1022*d19533e8SHuawei Xie /* Buffer address translation. */ 1023*d19533e8SHuawei Xie buff_addr = gpa_to_vva(dev, desc->addr); 1024*d19533e8SHuawei Xie desc->len = rte_pktmbuf_data_len(buff); 1025*d19533e8SHuawei Xie } else { 1026*d19533e8SHuawei Xie buff_addr += vq->vhost_hlen; 1027*d19533e8SHuawei Xie desc->len = packet_len; 1028*d19533e8SHuawei Xie } 1029*d19533e8SHuawei Xie 1030*d19533e8SHuawei Xie /* Update used ring with desc information */ 1031*d19533e8SHuawei Xie vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success]; 1032*d19533e8SHuawei Xie vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len; 1033*d19533e8SHuawei Xie 1034*d19533e8SHuawei Xie /* Copy mbuf data to buffer */ 1035*d19533e8SHuawei Xie rte_memcpy((void *)(uintptr_t)buff_addr, 1036*d19533e8SHuawei Xie rte_pktmbuf_mtod(buff, const void *), 1037*d19533e8SHuawei Xie rte_pktmbuf_data_len(buff)); 1038*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)buff_addr, 1039*d19533e8SHuawei Xie rte_pktmbuf_data_len(buff), 0); 1040*d19533e8SHuawei Xie 1041*d19533e8SHuawei Xie res_cur_idx++; 1042*d19533e8SHuawei Xie packet_success++; 1043*d19533e8SHuawei Xie 1044*d19533e8SHuawei Xie rte_memcpy((void *)(uintptr_t)buff_hdr_addr, 1045*d19533e8SHuawei Xie (const void *)&virtio_hdr, vq->vhost_hlen); 1046*d19533e8SHuawei Xie 1047*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); 1048*d19533e8SHuawei Xie 1049*d19533e8SHuawei Xie if (res_cur_idx < res_end_idx) { 1050*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 1051*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success]]); 1052*d19533e8SHuawei Xie } 1053*d19533e8SHuawei Xie } 1054*d19533e8SHuawei Xie 1055*d19533e8SHuawei Xie rte_compiler_barrier(); 1056*d19533e8SHuawei Xie 1057*d19533e8SHuawei Xie /* Wait until it's our turn to add our buffer to the used ring. */ 1058*d19533e8SHuawei Xie while (unlikely(vq->last_used_idx != res_base_idx)) 1059*d19533e8SHuawei Xie rte_pause(); 1060*d19533e8SHuawei Xie 1061*d19533e8SHuawei Xie *(volatile uint16_t *)&vq->used->idx += count; 1062*d19533e8SHuawei Xie vq->last_used_idx = res_end_idx; 1063*d19533e8SHuawei Xie 1064*d19533e8SHuawei Xie /* Kick the guest if necessary. */ 1065*d19533e8SHuawei Xie if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 1066*d19533e8SHuawei Xie eventfd_write((int)vq->kickfd, 1); 1067*d19533e8SHuawei Xie return count; 1068*d19533e8SHuawei Xie } 1069*d19533e8SHuawei Xie 1070*d19533e8SHuawei Xie static inline uint32_t __attribute__((always_inline)) 1071*d19533e8SHuawei Xie copy_from_mbuf_to_vring(struct virtio_net *dev, 1072*d19533e8SHuawei Xie uint16_t res_base_idx, uint16_t res_end_idx, 1073*d19533e8SHuawei Xie struct rte_mbuf *pkt) 1074*d19533e8SHuawei Xie { 1075*d19533e8SHuawei Xie uint32_t vec_idx = 0; 1076*d19533e8SHuawei Xie uint32_t entry_success = 0; 1077*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 1078*d19533e8SHuawei Xie /* The virtio_hdr is initialised to 0. */ 1079*d19533e8SHuawei Xie struct virtio_net_hdr_mrg_rxbuf virtio_hdr = { 1080*d19533e8SHuawei Xie {0, 0, 0, 0, 0, 0}, 0}; 1081*d19533e8SHuawei Xie uint16_t cur_idx = res_base_idx; 1082*d19533e8SHuawei Xie uint64_t vb_addr = 0; 1083*d19533e8SHuawei Xie uint64_t vb_hdr_addr = 0; 1084*d19533e8SHuawei Xie uint32_t seg_offset = 0; 1085*d19533e8SHuawei Xie uint32_t vb_offset = 0; 1086*d19533e8SHuawei Xie uint32_t seg_avail; 1087*d19533e8SHuawei Xie uint32_t vb_avail; 1088*d19533e8SHuawei Xie uint32_t cpy_len, entry_len; 1089*d19533e8SHuawei Xie 1090*d19533e8SHuawei Xie if (pkt == NULL) 1091*d19533e8SHuawei Xie return 0; 1092*d19533e8SHuawei Xie 1093*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| " 1094*d19533e8SHuawei Xie "End Index %d\n", 1095*d19533e8SHuawei Xie dev->device_fh, cur_idx, res_end_idx); 1096*d19533e8SHuawei Xie 1097*d19533e8SHuawei Xie /* 1098*d19533e8SHuawei Xie * Convert from gpa to vva 1099*d19533e8SHuawei Xie * (guest physical addr -> vhost virtual addr) 1100*d19533e8SHuawei Xie */ 1101*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_RXQ]; 1102*d19533e8SHuawei Xie vb_addr = 1103*d19533e8SHuawei Xie gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); 1104*d19533e8SHuawei Xie vb_hdr_addr = vb_addr; 1105*d19533e8SHuawei Xie 1106*d19533e8SHuawei Xie /* Prefetch buffer address. */ 1107*d19533e8SHuawei Xie rte_prefetch0((void *)(uintptr_t)vb_addr); 1108*d19533e8SHuawei Xie 1109*d19533e8SHuawei Xie virtio_hdr.num_buffers = res_end_idx - res_base_idx; 1110*d19533e8SHuawei Xie 1111*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", 1112*d19533e8SHuawei Xie dev->device_fh, virtio_hdr.num_buffers); 1113*d19533e8SHuawei Xie 1114*d19533e8SHuawei Xie rte_memcpy((void *)(uintptr_t)vb_hdr_addr, 1115*d19533e8SHuawei Xie (const void *)&virtio_hdr, vq->vhost_hlen); 1116*d19533e8SHuawei Xie 1117*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)vb_hdr_addr, vq->vhost_hlen, 1); 1118*d19533e8SHuawei Xie 1119*d19533e8SHuawei Xie seg_avail = rte_pktmbuf_data_len(pkt); 1120*d19533e8SHuawei Xie vb_offset = vq->vhost_hlen; 1121*d19533e8SHuawei Xie vb_avail = 1122*d19533e8SHuawei Xie vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen; 1123*d19533e8SHuawei Xie 1124*d19533e8SHuawei Xie entry_len = vq->vhost_hlen; 1125*d19533e8SHuawei Xie 1126*d19533e8SHuawei Xie if (vb_avail == 0) { 1127*d19533e8SHuawei Xie uint32_t desc_idx = 1128*d19533e8SHuawei Xie vq->buf_vec[vec_idx].desc_idx; 1129*d19533e8SHuawei Xie vq->desc[desc_idx].len = vq->vhost_hlen; 1130*d19533e8SHuawei Xie 1131*d19533e8SHuawei Xie if ((vq->desc[desc_idx].flags 1132*d19533e8SHuawei Xie & VRING_DESC_F_NEXT) == 0) { 1133*d19533e8SHuawei Xie /* Update used ring with desc information */ 1134*d19533e8SHuawei Xie vq->used->ring[cur_idx & (vq->size - 1)].id 1135*d19533e8SHuawei Xie = vq->buf_vec[vec_idx].desc_idx; 1136*d19533e8SHuawei Xie vq->used->ring[cur_idx & (vq->size - 1)].len 1137*d19533e8SHuawei Xie = entry_len; 1138*d19533e8SHuawei Xie 1139*d19533e8SHuawei Xie entry_len = 0; 1140*d19533e8SHuawei Xie cur_idx++; 1141*d19533e8SHuawei Xie entry_success++; 1142*d19533e8SHuawei Xie } 1143*d19533e8SHuawei Xie 1144*d19533e8SHuawei Xie vec_idx++; 1145*d19533e8SHuawei Xie vb_addr = 1146*d19533e8SHuawei Xie gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); 1147*d19533e8SHuawei Xie 1148*d19533e8SHuawei Xie /* Prefetch buffer address. */ 1149*d19533e8SHuawei Xie rte_prefetch0((void *)(uintptr_t)vb_addr); 1150*d19533e8SHuawei Xie vb_offset = 0; 1151*d19533e8SHuawei Xie vb_avail = vq->buf_vec[vec_idx].buf_len; 1152*d19533e8SHuawei Xie } 1153*d19533e8SHuawei Xie 1154*d19533e8SHuawei Xie cpy_len = RTE_MIN(vb_avail, seg_avail); 1155*d19533e8SHuawei Xie 1156*d19533e8SHuawei Xie while (cpy_len > 0) { 1157*d19533e8SHuawei Xie /* Copy mbuf data to vring buffer */ 1158*d19533e8SHuawei Xie rte_memcpy((void *)(uintptr_t)(vb_addr + vb_offset), 1159*d19533e8SHuawei Xie (const void *)(rte_pktmbuf_mtod(pkt, char*) + seg_offset), 1160*d19533e8SHuawei Xie cpy_len); 1161*d19533e8SHuawei Xie 1162*d19533e8SHuawei Xie PRINT_PACKET(dev, 1163*d19533e8SHuawei Xie (uintptr_t)(vb_addr + vb_offset), 1164*d19533e8SHuawei Xie cpy_len, 0); 1165*d19533e8SHuawei Xie 1166*d19533e8SHuawei Xie seg_offset += cpy_len; 1167*d19533e8SHuawei Xie vb_offset += cpy_len; 1168*d19533e8SHuawei Xie seg_avail -= cpy_len; 1169*d19533e8SHuawei Xie vb_avail -= cpy_len; 1170*d19533e8SHuawei Xie entry_len += cpy_len; 1171*d19533e8SHuawei Xie 1172*d19533e8SHuawei Xie if (seg_avail != 0) { 1173*d19533e8SHuawei Xie /* 1174*d19533e8SHuawei Xie * The virtio buffer in this vring 1175*d19533e8SHuawei Xie * entry reach to its end. 1176*d19533e8SHuawei Xie * But the segment doesn't complete. 1177*d19533e8SHuawei Xie */ 1178*d19533e8SHuawei Xie if ((vq->desc[vq->buf_vec[vec_idx].desc_idx].flags & 1179*d19533e8SHuawei Xie VRING_DESC_F_NEXT) == 0) { 1180*d19533e8SHuawei Xie /* Update used ring with desc information */ 1181*d19533e8SHuawei Xie vq->used->ring[cur_idx & (vq->size - 1)].id 1182*d19533e8SHuawei Xie = vq->buf_vec[vec_idx].desc_idx; 1183*d19533e8SHuawei Xie vq->used->ring[cur_idx & (vq->size - 1)].len 1184*d19533e8SHuawei Xie = entry_len; 1185*d19533e8SHuawei Xie entry_len = 0; 1186*d19533e8SHuawei Xie cur_idx++; 1187*d19533e8SHuawei Xie entry_success++; 1188*d19533e8SHuawei Xie } 1189*d19533e8SHuawei Xie 1190*d19533e8SHuawei Xie vec_idx++; 1191*d19533e8SHuawei Xie vb_addr = gpa_to_vva(dev, 1192*d19533e8SHuawei Xie vq->buf_vec[vec_idx].buf_addr); 1193*d19533e8SHuawei Xie vb_offset = 0; 1194*d19533e8SHuawei Xie vb_avail = vq->buf_vec[vec_idx].buf_len; 1195*d19533e8SHuawei Xie cpy_len = RTE_MIN(vb_avail, seg_avail); 1196*d19533e8SHuawei Xie } else { 1197*d19533e8SHuawei Xie /* 1198*d19533e8SHuawei Xie * This current segment complete, need continue to 1199*d19533e8SHuawei Xie * check if the whole packet complete or not. 1200*d19533e8SHuawei Xie */ 1201*d19533e8SHuawei Xie pkt = pkt->next; 1202*d19533e8SHuawei Xie if (pkt != NULL) { 1203*d19533e8SHuawei Xie /* 1204*d19533e8SHuawei Xie * There are more segments. 1205*d19533e8SHuawei Xie */ 1206*d19533e8SHuawei Xie if (vb_avail == 0) { 1207*d19533e8SHuawei Xie /* 1208*d19533e8SHuawei Xie * This current buffer from vring is 1209*d19533e8SHuawei Xie * used up, need fetch next buffer 1210*d19533e8SHuawei Xie * from buf_vec. 1211*d19533e8SHuawei Xie */ 1212*d19533e8SHuawei Xie uint32_t desc_idx = 1213*d19533e8SHuawei Xie vq->buf_vec[vec_idx].desc_idx; 1214*d19533e8SHuawei Xie vq->desc[desc_idx].len = vb_offset; 1215*d19533e8SHuawei Xie 1216*d19533e8SHuawei Xie if ((vq->desc[desc_idx].flags & 1217*d19533e8SHuawei Xie VRING_DESC_F_NEXT) == 0) { 1218*d19533e8SHuawei Xie uint16_t wrapped_idx = 1219*d19533e8SHuawei Xie cur_idx & (vq->size - 1); 1220*d19533e8SHuawei Xie /* 1221*d19533e8SHuawei Xie * Update used ring with the 1222*d19533e8SHuawei Xie * descriptor information 1223*d19533e8SHuawei Xie */ 1224*d19533e8SHuawei Xie vq->used->ring[wrapped_idx].id 1225*d19533e8SHuawei Xie = desc_idx; 1226*d19533e8SHuawei Xie vq->used->ring[wrapped_idx].len 1227*d19533e8SHuawei Xie = entry_len; 1228*d19533e8SHuawei Xie entry_success++; 1229*d19533e8SHuawei Xie entry_len = 0; 1230*d19533e8SHuawei Xie cur_idx++; 1231*d19533e8SHuawei Xie } 1232*d19533e8SHuawei Xie 1233*d19533e8SHuawei Xie /* Get next buffer from buf_vec. */ 1234*d19533e8SHuawei Xie vec_idx++; 1235*d19533e8SHuawei Xie vb_addr = gpa_to_vva(dev, 1236*d19533e8SHuawei Xie vq->buf_vec[vec_idx].buf_addr); 1237*d19533e8SHuawei Xie vb_avail = 1238*d19533e8SHuawei Xie vq->buf_vec[vec_idx].buf_len; 1239*d19533e8SHuawei Xie vb_offset = 0; 1240*d19533e8SHuawei Xie } 1241*d19533e8SHuawei Xie 1242*d19533e8SHuawei Xie seg_offset = 0; 1243*d19533e8SHuawei Xie seg_avail = rte_pktmbuf_data_len(pkt); 1244*d19533e8SHuawei Xie cpy_len = RTE_MIN(vb_avail, seg_avail); 1245*d19533e8SHuawei Xie } else { 1246*d19533e8SHuawei Xie /* 1247*d19533e8SHuawei Xie * This whole packet completes. 1248*d19533e8SHuawei Xie */ 1249*d19533e8SHuawei Xie uint32_t desc_idx = 1250*d19533e8SHuawei Xie vq->buf_vec[vec_idx].desc_idx; 1251*d19533e8SHuawei Xie vq->desc[desc_idx].len = vb_offset; 1252*d19533e8SHuawei Xie 1253*d19533e8SHuawei Xie while (vq->desc[desc_idx].flags & 1254*d19533e8SHuawei Xie VRING_DESC_F_NEXT) { 1255*d19533e8SHuawei Xie desc_idx = vq->desc[desc_idx].next; 1256*d19533e8SHuawei Xie vq->desc[desc_idx].len = 0; 1257*d19533e8SHuawei Xie } 1258*d19533e8SHuawei Xie 1259*d19533e8SHuawei Xie /* Update used ring with desc information */ 1260*d19533e8SHuawei Xie vq->used->ring[cur_idx & (vq->size - 1)].id 1261*d19533e8SHuawei Xie = vq->buf_vec[vec_idx].desc_idx; 1262*d19533e8SHuawei Xie vq->used->ring[cur_idx & (vq->size - 1)].len 1263*d19533e8SHuawei Xie = entry_len; 1264*d19533e8SHuawei Xie entry_len = 0; 1265*d19533e8SHuawei Xie cur_idx++; 1266*d19533e8SHuawei Xie entry_success++; 1267*d19533e8SHuawei Xie seg_avail = 0; 1268*d19533e8SHuawei Xie cpy_len = RTE_MIN(vb_avail, seg_avail); 1269*d19533e8SHuawei Xie } 1270*d19533e8SHuawei Xie } 1271*d19533e8SHuawei Xie } 1272*d19533e8SHuawei Xie 1273*d19533e8SHuawei Xie return entry_success; 1274*d19533e8SHuawei Xie } 1275*d19533e8SHuawei Xie 1276*d19533e8SHuawei Xie /* 1277*d19533e8SHuawei Xie * This function adds buffers to the virtio devices RX virtqueue. Buffers can 1278*d19533e8SHuawei Xie * be received from the physical port or from another virtio device. A packet 1279*d19533e8SHuawei Xie * count is returned to indicate the number of packets that were succesfully 1280*d19533e8SHuawei Xie * added to the RX queue. This function works for mergeable RX. 1281*d19533e8SHuawei Xie */ 1282*d19533e8SHuawei Xie static inline uint32_t __attribute__((always_inline)) 1283*d19533e8SHuawei Xie virtio_dev_merge_rx(struct virtio_net *dev, struct rte_mbuf **pkts, 1284*d19533e8SHuawei Xie uint32_t count) 1285*d19533e8SHuawei Xie { 1286*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 1287*d19533e8SHuawei Xie uint32_t pkt_idx = 0, entry_success = 0; 1288*d19533e8SHuawei Xie uint32_t retry = 0; 1289*d19533e8SHuawei Xie uint16_t avail_idx, res_cur_idx; 1290*d19533e8SHuawei Xie uint16_t res_base_idx, res_end_idx; 1291*d19533e8SHuawei Xie uint8_t success = 0; 1292*d19533e8SHuawei Xie 1293*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", 1294*d19533e8SHuawei Xie dev->device_fh); 1295*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_RXQ]; 1296*d19533e8SHuawei Xie count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); 1297*d19533e8SHuawei Xie 1298*d19533e8SHuawei Xie if (count == 0) 1299*d19533e8SHuawei Xie return 0; 1300*d19533e8SHuawei Xie 1301*d19533e8SHuawei Xie for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { 1302*d19533e8SHuawei Xie uint32_t secure_len = 0; 1303*d19533e8SHuawei Xie uint16_t need_cnt; 1304*d19533e8SHuawei Xie uint32_t vec_idx = 0; 1305*d19533e8SHuawei Xie uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen; 1306*d19533e8SHuawei Xie uint16_t i, id; 1307*d19533e8SHuawei Xie 1308*d19533e8SHuawei Xie do { 1309*d19533e8SHuawei Xie /* 1310*d19533e8SHuawei Xie * As many data cores may want access to available 1311*d19533e8SHuawei Xie * buffers, they need to be reserved. 1312*d19533e8SHuawei Xie */ 1313*d19533e8SHuawei Xie res_base_idx = vq->last_used_idx_res; 1314*d19533e8SHuawei Xie res_cur_idx = res_base_idx; 1315*d19533e8SHuawei Xie 1316*d19533e8SHuawei Xie do { 1317*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 1318*d19533e8SHuawei Xie if (unlikely(res_cur_idx == avail_idx)) { 1319*d19533e8SHuawei Xie /* 1320*d19533e8SHuawei Xie * If retry is enabled and the queue is 1321*d19533e8SHuawei Xie * full then we wait and retry to avoid 1322*d19533e8SHuawei Xie * packet loss. 1323*d19533e8SHuawei Xie */ 1324*d19533e8SHuawei Xie if (enable_retry) { 1325*d19533e8SHuawei Xie uint8_t cont = 0; 1326*d19533e8SHuawei Xie for (retry = 0; retry < burst_rx_retry_num; retry++) { 1327*d19533e8SHuawei Xie rte_delay_us(burst_rx_delay_time); 1328*d19533e8SHuawei Xie avail_idx = 1329*d19533e8SHuawei Xie *((volatile uint16_t *)&vq->avail->idx); 1330*d19533e8SHuawei Xie if (likely(res_cur_idx != avail_idx)) { 1331*d19533e8SHuawei Xie cont = 1; 1332*d19533e8SHuawei Xie break; 1333*d19533e8SHuawei Xie } 1334*d19533e8SHuawei Xie } 1335*d19533e8SHuawei Xie if (cont == 1) 1336*d19533e8SHuawei Xie continue; 1337*d19533e8SHuawei Xie } 1338*d19533e8SHuawei Xie 1339*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 1340*d19533e8SHuawei Xie "(%"PRIu64") Failed " 1341*d19533e8SHuawei Xie "to get enough desc from " 1342*d19533e8SHuawei Xie "vring\n", 1343*d19533e8SHuawei Xie dev->device_fh); 1344*d19533e8SHuawei Xie return pkt_idx; 1345*d19533e8SHuawei Xie } else { 1346*d19533e8SHuawei Xie uint16_t wrapped_idx = 1347*d19533e8SHuawei Xie (res_cur_idx) & (vq->size - 1); 1348*d19533e8SHuawei Xie uint32_t idx = 1349*d19533e8SHuawei Xie vq->avail->ring[wrapped_idx]; 1350*d19533e8SHuawei Xie uint8_t next_desc; 1351*d19533e8SHuawei Xie 1352*d19533e8SHuawei Xie do { 1353*d19533e8SHuawei Xie next_desc = 0; 1354*d19533e8SHuawei Xie secure_len += vq->desc[idx].len; 1355*d19533e8SHuawei Xie if (vq->desc[idx].flags & 1356*d19533e8SHuawei Xie VRING_DESC_F_NEXT) { 1357*d19533e8SHuawei Xie idx = vq->desc[idx].next; 1358*d19533e8SHuawei Xie next_desc = 1; 1359*d19533e8SHuawei Xie } 1360*d19533e8SHuawei Xie } while (next_desc); 1361*d19533e8SHuawei Xie 1362*d19533e8SHuawei Xie res_cur_idx++; 1363*d19533e8SHuawei Xie } 1364*d19533e8SHuawei Xie } while (pkt_len > secure_len); 1365*d19533e8SHuawei Xie 1366*d19533e8SHuawei Xie /* vq->last_used_idx_res is atomically updated. */ 1367*d19533e8SHuawei Xie success = rte_atomic16_cmpset(&vq->last_used_idx_res, 1368*d19533e8SHuawei Xie res_base_idx, 1369*d19533e8SHuawei Xie res_cur_idx); 1370*d19533e8SHuawei Xie } while (success == 0); 1371*d19533e8SHuawei Xie 1372*d19533e8SHuawei Xie id = res_base_idx; 1373*d19533e8SHuawei Xie need_cnt = res_cur_idx - res_base_idx; 1374*d19533e8SHuawei Xie 1375*d19533e8SHuawei Xie for (i = 0; i < need_cnt; i++, id++) { 1376*d19533e8SHuawei Xie uint16_t wrapped_idx = id & (vq->size - 1); 1377*d19533e8SHuawei Xie uint32_t idx = vq->avail->ring[wrapped_idx]; 1378*d19533e8SHuawei Xie uint8_t next_desc; 1379*d19533e8SHuawei Xie do { 1380*d19533e8SHuawei Xie next_desc = 0; 1381*d19533e8SHuawei Xie vq->buf_vec[vec_idx].buf_addr = 1382*d19533e8SHuawei Xie vq->desc[idx].addr; 1383*d19533e8SHuawei Xie vq->buf_vec[vec_idx].buf_len = 1384*d19533e8SHuawei Xie vq->desc[idx].len; 1385*d19533e8SHuawei Xie vq->buf_vec[vec_idx].desc_idx = idx; 1386*d19533e8SHuawei Xie vec_idx++; 1387*d19533e8SHuawei Xie 1388*d19533e8SHuawei Xie if (vq->desc[idx].flags & VRING_DESC_F_NEXT) { 1389*d19533e8SHuawei Xie idx = vq->desc[idx].next; 1390*d19533e8SHuawei Xie next_desc = 1; 1391*d19533e8SHuawei Xie } 1392*d19533e8SHuawei Xie } while (next_desc); 1393*d19533e8SHuawei Xie } 1394*d19533e8SHuawei Xie 1395*d19533e8SHuawei Xie res_end_idx = res_cur_idx; 1396*d19533e8SHuawei Xie 1397*d19533e8SHuawei Xie entry_success = copy_from_mbuf_to_vring(dev, res_base_idx, 1398*d19533e8SHuawei Xie res_end_idx, pkts[pkt_idx]); 1399*d19533e8SHuawei Xie 1400*d19533e8SHuawei Xie rte_compiler_barrier(); 1401*d19533e8SHuawei Xie 1402*d19533e8SHuawei Xie /* 1403*d19533e8SHuawei Xie * Wait until it's our turn to add our buffer 1404*d19533e8SHuawei Xie * to the used ring. 1405*d19533e8SHuawei Xie */ 1406*d19533e8SHuawei Xie while (unlikely(vq->last_used_idx != res_base_idx)) 1407*d19533e8SHuawei Xie rte_pause(); 1408*d19533e8SHuawei Xie 1409*d19533e8SHuawei Xie *(volatile uint16_t *)&vq->used->idx += entry_success; 1410*d19533e8SHuawei Xie vq->last_used_idx = res_end_idx; 1411*d19533e8SHuawei Xie 1412*d19533e8SHuawei Xie /* Kick the guest if necessary. */ 1413*d19533e8SHuawei Xie if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 1414*d19533e8SHuawei Xie eventfd_write((int)vq->kickfd, 1); 1415*d19533e8SHuawei Xie } 1416*d19533e8SHuawei Xie 1417*d19533e8SHuawei Xie return count; 1418*d19533e8SHuawei Xie } 1419*d19533e8SHuawei Xie 1420*d19533e8SHuawei Xie /* 1421*d19533e8SHuawei Xie * Compares a packet destination MAC address to a device MAC address. 1422*d19533e8SHuawei Xie */ 1423*d19533e8SHuawei Xie static inline int __attribute__((always_inline)) 1424*d19533e8SHuawei Xie ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb) 1425*d19533e8SHuawei Xie { 1426*d19533e8SHuawei Xie return (((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0); 1427*d19533e8SHuawei Xie } 1428*d19533e8SHuawei Xie 1429*d19533e8SHuawei Xie /* 1430*d19533e8SHuawei Xie * This function learns the MAC address of the device and registers this along with a 1431*d19533e8SHuawei Xie * vlan tag to a VMDQ. 1432*d19533e8SHuawei Xie */ 1433*d19533e8SHuawei Xie static int 1434*d19533e8SHuawei Xie link_vmdq(struct virtio_net *dev, struct rte_mbuf *m) 1435*d19533e8SHuawei Xie { 1436*d19533e8SHuawei Xie struct ether_hdr *pkt_hdr; 1437*d19533e8SHuawei Xie struct virtio_net_data_ll *dev_ll; 1438*d19533e8SHuawei Xie int i, ret; 1439*d19533e8SHuawei Xie 1440*d19533e8SHuawei Xie /* Learn MAC address of guest device from packet */ 1441*d19533e8SHuawei Xie pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 1442*d19533e8SHuawei Xie 1443*d19533e8SHuawei Xie dev_ll = ll_root_used; 1444*d19533e8SHuawei Xie 1445*d19533e8SHuawei Xie while (dev_ll != NULL) { 1446*d19533e8SHuawei Xie if (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->dev->mac_address)) { 1447*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh); 1448*d19533e8SHuawei Xie return -1; 1449*d19533e8SHuawei Xie } 1450*d19533e8SHuawei Xie dev_ll = dev_ll->next; 1451*d19533e8SHuawei Xie } 1452*d19533e8SHuawei Xie 1453*d19533e8SHuawei Xie for (i = 0; i < ETHER_ADDR_LEN; i++) 1454*d19533e8SHuawei Xie dev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i]; 1455*d19533e8SHuawei Xie 1456*d19533e8SHuawei Xie /* vlan_tag currently uses the device_id. */ 1457*d19533e8SHuawei Xie dev->vlan_tag = vlan_tags[dev->device_fh]; 1458*d19533e8SHuawei Xie 1459*d19533e8SHuawei Xie /* Print out VMDQ registration info. */ 1460*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n", 1461*d19533e8SHuawei Xie dev->device_fh, 1462*d19533e8SHuawei Xie dev->mac_address.addr_bytes[0], dev->mac_address.addr_bytes[1], 1463*d19533e8SHuawei Xie dev->mac_address.addr_bytes[2], dev->mac_address.addr_bytes[3], 1464*d19533e8SHuawei Xie dev->mac_address.addr_bytes[4], dev->mac_address.addr_bytes[5], 1465*d19533e8SHuawei Xie dev->vlan_tag); 1466*d19533e8SHuawei Xie 1467*d19533e8SHuawei Xie /* Register the MAC address. */ 1468*d19533e8SHuawei Xie ret = rte_eth_dev_mac_addr_add(ports[0], &dev->mac_address, (uint32_t)dev->device_fh); 1469*d19533e8SHuawei Xie if (ret) 1470*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n", 1471*d19533e8SHuawei Xie dev->device_fh); 1472*d19533e8SHuawei Xie 1473*d19533e8SHuawei Xie /* Enable stripping of the vlan tag as we handle routing. */ 1474*d19533e8SHuawei Xie rte_eth_dev_set_vlan_strip_on_queue(ports[0], (uint16_t)dev->vmdq_rx_q, 1); 1475*d19533e8SHuawei Xie 1476*d19533e8SHuawei Xie /* Set device as ready for RX. */ 1477*d19533e8SHuawei Xie dev->ready = DEVICE_RX; 1478*d19533e8SHuawei Xie 1479*d19533e8SHuawei Xie return 0; 1480*d19533e8SHuawei Xie } 1481*d19533e8SHuawei Xie 1482*d19533e8SHuawei Xie /* 1483*d19533e8SHuawei Xie * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX 1484*d19533e8SHuawei Xie * queue before disabling RX on the device. 1485*d19533e8SHuawei Xie */ 1486*d19533e8SHuawei Xie static inline void 1487*d19533e8SHuawei Xie unlink_vmdq(struct virtio_net *dev) 1488*d19533e8SHuawei Xie { 1489*d19533e8SHuawei Xie unsigned i = 0; 1490*d19533e8SHuawei Xie unsigned rx_count; 1491*d19533e8SHuawei Xie struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 1492*d19533e8SHuawei Xie 1493*d19533e8SHuawei Xie if (dev->ready == DEVICE_RX) { 1494*d19533e8SHuawei Xie /*clear MAC and VLAN settings*/ 1495*d19533e8SHuawei Xie rte_eth_dev_mac_addr_remove(ports[0], &dev->mac_address); 1496*d19533e8SHuawei Xie for (i = 0; i < 6; i++) 1497*d19533e8SHuawei Xie dev->mac_address.addr_bytes[i] = 0; 1498*d19533e8SHuawei Xie 1499*d19533e8SHuawei Xie dev->vlan_tag = 0; 1500*d19533e8SHuawei Xie 1501*d19533e8SHuawei Xie /*Clear out the receive buffers*/ 1502*d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 1503*d19533e8SHuawei Xie (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); 1504*d19533e8SHuawei Xie 1505*d19533e8SHuawei Xie while (rx_count) { 1506*d19533e8SHuawei Xie for (i = 0; i < rx_count; i++) 1507*d19533e8SHuawei Xie rte_pktmbuf_free(pkts_burst[i]); 1508*d19533e8SHuawei Xie 1509*d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 1510*d19533e8SHuawei Xie (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); 1511*d19533e8SHuawei Xie } 1512*d19533e8SHuawei Xie 1513*d19533e8SHuawei Xie dev->ready = DEVICE_MAC_LEARNING; 1514*d19533e8SHuawei Xie } 1515*d19533e8SHuawei Xie } 1516*d19533e8SHuawei Xie 1517*d19533e8SHuawei Xie /* 1518*d19533e8SHuawei Xie * Check if the packet destination MAC address is for a local device. If so then put 1519*d19533e8SHuawei Xie * the packet on that devices RX queue. If not then return. 1520*d19533e8SHuawei Xie */ 1521*d19533e8SHuawei Xie static inline unsigned __attribute__((always_inline)) 1522*d19533e8SHuawei Xie virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m) 1523*d19533e8SHuawei Xie { 1524*d19533e8SHuawei Xie struct virtio_net_data_ll *dev_ll; 1525*d19533e8SHuawei Xie struct ether_hdr *pkt_hdr; 1526*d19533e8SHuawei Xie uint64_t ret = 0; 1527*d19533e8SHuawei Xie 1528*d19533e8SHuawei Xie pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 1529*d19533e8SHuawei Xie 1530*d19533e8SHuawei Xie /*get the used devices list*/ 1531*d19533e8SHuawei Xie dev_ll = ll_root_used; 1532*d19533e8SHuawei Xie 1533*d19533e8SHuawei Xie while (dev_ll != NULL) { 1534*d19533e8SHuawei Xie if ((dev_ll->dev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr), 1535*d19533e8SHuawei Xie &dev_ll->dev->mac_address)) { 1536*d19533e8SHuawei Xie 1537*d19533e8SHuawei Xie /* Drop the packet if the TX packet is destined for the TX device. */ 1538*d19533e8SHuawei Xie if (dev_ll->dev->device_fh == dev->device_fh) { 1539*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n", 1540*d19533e8SHuawei Xie dev_ll->dev->device_fh); 1541*d19533e8SHuawei Xie return 0; 1542*d19533e8SHuawei Xie } 1543*d19533e8SHuawei Xie 1544*d19533e8SHuawei Xie 1545*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", dev_ll->dev->device_fh); 1546*d19533e8SHuawei Xie 1547*d19533e8SHuawei Xie if (dev_ll->dev->remove) { 1548*d19533e8SHuawei Xie /*drop the packet if the device is marked for removal*/ 1549*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", dev_ll->dev->device_fh); 1550*d19533e8SHuawei Xie } else { 1551*d19533e8SHuawei Xie uint32_t mergeable = 1552*d19533e8SHuawei Xie dev_ll->dev->features & 1553*d19533e8SHuawei Xie (1 << VIRTIO_NET_F_MRG_RXBUF); 1554*d19533e8SHuawei Xie 1555*d19533e8SHuawei Xie /*send the packet to the local virtio device*/ 1556*d19533e8SHuawei Xie if (likely(mergeable == 0)) 1557*d19533e8SHuawei Xie ret = virtio_dev_rx(dev_ll->dev, &m, 1); 1558*d19533e8SHuawei Xie else 1559*d19533e8SHuawei Xie ret = virtio_dev_merge_rx(dev_ll->dev, 1560*d19533e8SHuawei Xie &m, 1); 1561*d19533e8SHuawei Xie 1562*d19533e8SHuawei Xie if (enable_stats) { 1563*d19533e8SHuawei Xie rte_atomic64_add( 1564*d19533e8SHuawei Xie &dev_statistics[dev_ll->dev->device_fh].rx_total_atomic, 1565*d19533e8SHuawei Xie 1); 1566*d19533e8SHuawei Xie rte_atomic64_add( 1567*d19533e8SHuawei Xie &dev_statistics[dev_ll->dev->device_fh].rx_atomic, 1568*d19533e8SHuawei Xie ret); 1569*d19533e8SHuawei Xie dev_statistics[dev->device_fh].tx_total++; 1570*d19533e8SHuawei Xie dev_statistics[dev->device_fh].tx += ret; 1571*d19533e8SHuawei Xie } 1572*d19533e8SHuawei Xie } 1573*d19533e8SHuawei Xie 1574*d19533e8SHuawei Xie return 0; 1575*d19533e8SHuawei Xie } 1576*d19533e8SHuawei Xie dev_ll = dev_ll->next; 1577*d19533e8SHuawei Xie } 1578*d19533e8SHuawei Xie 1579*d19533e8SHuawei Xie return -1; 1580*d19533e8SHuawei Xie } 1581*d19533e8SHuawei Xie 1582*d19533e8SHuawei Xie /* 1583*d19533e8SHuawei Xie * This function routes the TX packet to the correct interface. This may be a local device 1584*d19533e8SHuawei Xie * or the physical port. 1585*d19533e8SHuawei Xie */ 1586*d19533e8SHuawei Xie static inline void __attribute__((always_inline)) 1587*d19533e8SHuawei Xie virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag) 1588*d19533e8SHuawei Xie { 1589*d19533e8SHuawei Xie struct mbuf_table *tx_q; 1590*d19533e8SHuawei Xie struct vlan_ethhdr *vlan_hdr; 1591*d19533e8SHuawei Xie struct rte_mbuf **m_table; 1592*d19533e8SHuawei Xie struct rte_mbuf *mbuf, *prev; 1593*d19533e8SHuawei Xie unsigned len, ret, offset = 0; 1594*d19533e8SHuawei Xie const uint16_t lcore_id = rte_lcore_id(); 1595*d19533e8SHuawei Xie struct virtio_net_data_ll *dev_ll = ll_root_used; 1596*d19533e8SHuawei Xie struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 1597*d19533e8SHuawei Xie 1598*d19533e8SHuawei Xie /*check if destination is local VM*/ 1599*d19533e8SHuawei Xie if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(dev, m) == 0)) 1600*d19533e8SHuawei Xie return; 1601*d19533e8SHuawei Xie 1602*d19533e8SHuawei Xie if (vm2vm_mode == VM2VM_HARDWARE) { 1603*d19533e8SHuawei Xie while (dev_ll != NULL) { 1604*d19533e8SHuawei Xie if ((dev_ll->dev->ready == DEVICE_RX) 1605*d19533e8SHuawei Xie && ether_addr_cmp(&(pkt_hdr->d_addr), 1606*d19533e8SHuawei Xie &dev_ll->dev->mac_address)) { 1607*d19533e8SHuawei Xie /* 1608*d19533e8SHuawei Xie * Drop the packet if the TX packet is 1609*d19533e8SHuawei Xie * destined for the TX device. 1610*d19533e8SHuawei Xie */ 1611*d19533e8SHuawei Xie if (dev_ll->dev->device_fh == dev->device_fh) { 1612*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 1613*d19533e8SHuawei Xie "(%"PRIu64") TX: Source and destination" 1614*d19533e8SHuawei Xie " MAC addresses are the same. Dropping " 1615*d19533e8SHuawei Xie "packet.\n", 1616*d19533e8SHuawei Xie dev_ll->dev->device_fh); 1617*d19533e8SHuawei Xie return; 1618*d19533e8SHuawei Xie } 1619*d19533e8SHuawei Xie offset = 4; 1620*d19533e8SHuawei Xie vlan_tag = 1621*d19533e8SHuawei Xie (uint16_t) 1622*d19533e8SHuawei Xie vlan_tags[(uint16_t)dev_ll->dev->device_fh]; 1623*d19533e8SHuawei Xie 1624*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 1625*d19533e8SHuawei Xie "(%"PRIu64") TX: pkt to local VM device id:" 1626*d19533e8SHuawei Xie "(%"PRIu64") vlan tag: %d.\n", 1627*d19533e8SHuawei Xie dev->device_fh, dev_ll->dev->device_fh, 1628*d19533e8SHuawei Xie vlan_tag); 1629*d19533e8SHuawei Xie 1630*d19533e8SHuawei Xie break; 1631*d19533e8SHuawei Xie } 1632*d19533e8SHuawei Xie dev_ll = dev_ll->next; 1633*d19533e8SHuawei Xie } 1634*d19533e8SHuawei Xie } 1635*d19533e8SHuawei Xie 1636*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", dev->device_fh); 1637*d19533e8SHuawei Xie 1638*d19533e8SHuawei Xie /*Add packet to the port tx queue*/ 1639*d19533e8SHuawei Xie tx_q = &lcore_tx_queue[lcore_id]; 1640*d19533e8SHuawei Xie len = tx_q->len; 1641*d19533e8SHuawei Xie 1642*d19533e8SHuawei Xie /* Allocate an mbuf and populate the structure. */ 1643*d19533e8SHuawei Xie mbuf = rte_pktmbuf_alloc(mbuf_pool); 1644*d19533e8SHuawei Xie if (unlikely(mbuf == NULL)) { 1645*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, 1646*d19533e8SHuawei Xie "Failed to allocate memory for mbuf.\n"); 1647*d19533e8SHuawei Xie return; 1648*d19533e8SHuawei Xie } 1649*d19533e8SHuawei Xie 1650*d19533e8SHuawei Xie mbuf->data_len = m->data_len + VLAN_HLEN + offset; 1651*d19533e8SHuawei Xie mbuf->pkt_len = m->pkt_len + VLAN_HLEN + offset; 1652*d19533e8SHuawei Xie mbuf->nb_segs = m->nb_segs; 1653*d19533e8SHuawei Xie 1654*d19533e8SHuawei Xie /* Copy ethernet header to mbuf. */ 1655*d19533e8SHuawei Xie rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), 1656*d19533e8SHuawei Xie rte_pktmbuf_mtod(m, const void *), 1657*d19533e8SHuawei Xie ETH_HLEN); 1658*d19533e8SHuawei Xie 1659*d19533e8SHuawei Xie 1660*d19533e8SHuawei Xie /* Setup vlan header. Bytes need to be re-ordered for network with htons()*/ 1661*d19533e8SHuawei Xie vlan_hdr = rte_pktmbuf_mtod(mbuf, struct vlan_ethhdr *); 1662*d19533e8SHuawei Xie vlan_hdr->h_vlan_encapsulated_proto = vlan_hdr->h_vlan_proto; 1663*d19533e8SHuawei Xie vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q); 1664*d19533e8SHuawei Xie vlan_hdr->h_vlan_TCI = htons(vlan_tag); 1665*d19533e8SHuawei Xie 1666*d19533e8SHuawei Xie /* Copy the remaining packet contents to the mbuf. */ 1667*d19533e8SHuawei Xie rte_memcpy((void *)(rte_pktmbuf_mtod(mbuf, uint8_t *) + VLAN_ETH_HLEN), 1668*d19533e8SHuawei Xie (const void *)(rte_pktmbuf_mtod(m, uint8_t *) + ETH_HLEN), 1669*d19533e8SHuawei Xie (m->data_len - ETH_HLEN)); 1670*d19533e8SHuawei Xie 1671*d19533e8SHuawei Xie /* Copy the remaining segments for the whole packet. */ 1672*d19533e8SHuawei Xie prev = mbuf; 1673*d19533e8SHuawei Xie while (m->next) { 1674*d19533e8SHuawei Xie /* Allocate an mbuf and populate the structure. */ 1675*d19533e8SHuawei Xie struct rte_mbuf *next_mbuf = rte_pktmbuf_alloc(mbuf_pool); 1676*d19533e8SHuawei Xie if (unlikely(next_mbuf == NULL)) { 1677*d19533e8SHuawei Xie rte_pktmbuf_free(mbuf); 1678*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, 1679*d19533e8SHuawei Xie "Failed to allocate memory for mbuf.\n"); 1680*d19533e8SHuawei Xie return; 1681*d19533e8SHuawei Xie } 1682*d19533e8SHuawei Xie 1683*d19533e8SHuawei Xie m = m->next; 1684*d19533e8SHuawei Xie prev->next = next_mbuf; 1685*d19533e8SHuawei Xie prev = next_mbuf; 1686*d19533e8SHuawei Xie next_mbuf->data_len = m->data_len; 1687*d19533e8SHuawei Xie 1688*d19533e8SHuawei Xie /* Copy data to next mbuf. */ 1689*d19533e8SHuawei Xie rte_memcpy(rte_pktmbuf_mtod(next_mbuf, void *), 1690*d19533e8SHuawei Xie rte_pktmbuf_mtod(m, const void *), m->data_len); 1691*d19533e8SHuawei Xie } 1692*d19533e8SHuawei Xie 1693*d19533e8SHuawei Xie tx_q->m_table[len] = mbuf; 1694*d19533e8SHuawei Xie len++; 1695*d19533e8SHuawei Xie if (enable_stats) { 1696*d19533e8SHuawei Xie dev_statistics[dev->device_fh].tx_total++; 1697*d19533e8SHuawei Xie dev_statistics[dev->device_fh].tx++; 1698*d19533e8SHuawei Xie } 1699*d19533e8SHuawei Xie 1700*d19533e8SHuawei Xie if (unlikely(len == MAX_PKT_BURST)) { 1701*d19533e8SHuawei Xie m_table = (struct rte_mbuf **)tx_q->m_table; 1702*d19533e8SHuawei Xie ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len); 1703*d19533e8SHuawei Xie /* Free any buffers not handled by TX and update the port stats. */ 1704*d19533e8SHuawei Xie if (unlikely(ret < len)) { 1705*d19533e8SHuawei Xie do { 1706*d19533e8SHuawei Xie rte_pktmbuf_free(m_table[ret]); 1707*d19533e8SHuawei Xie } while (++ret < len); 1708*d19533e8SHuawei Xie } 1709*d19533e8SHuawei Xie 1710*d19533e8SHuawei Xie len = 0; 1711*d19533e8SHuawei Xie } 1712*d19533e8SHuawei Xie 1713*d19533e8SHuawei Xie tx_q->len = len; 1714*d19533e8SHuawei Xie return; 1715*d19533e8SHuawei Xie } 1716*d19533e8SHuawei Xie 1717*d19533e8SHuawei Xie static inline void __attribute__((always_inline)) 1718*d19533e8SHuawei Xie virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool) 1719*d19533e8SHuawei Xie { 1720*d19533e8SHuawei Xie struct rte_mbuf m; 1721*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 1722*d19533e8SHuawei Xie struct vring_desc *desc; 1723*d19533e8SHuawei Xie uint64_t buff_addr = 0; 1724*d19533e8SHuawei Xie uint32_t head[MAX_PKT_BURST]; 1725*d19533e8SHuawei Xie uint32_t used_idx; 1726*d19533e8SHuawei Xie uint32_t i; 1727*d19533e8SHuawei Xie uint16_t free_entries, packet_success = 0; 1728*d19533e8SHuawei Xie uint16_t avail_idx; 1729*d19533e8SHuawei Xie 1730*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_TXQ]; 1731*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 1732*d19533e8SHuawei Xie 1733*d19533e8SHuawei Xie /* If there are no available buffers then return. */ 1734*d19533e8SHuawei Xie if (vq->last_used_idx == avail_idx) 1735*d19533e8SHuawei Xie return; 1736*d19533e8SHuawei Xie 1737*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); 1738*d19533e8SHuawei Xie 1739*d19533e8SHuawei Xie /* Prefetch available ring to retrieve head indexes. */ 1740*d19533e8SHuawei Xie rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); 1741*d19533e8SHuawei Xie 1742*d19533e8SHuawei Xie /*get the number of free entries in the ring*/ 1743*d19533e8SHuawei Xie free_entries = (avail_idx - vq->last_used_idx); 1744*d19533e8SHuawei Xie 1745*d19533e8SHuawei Xie /* Limit to MAX_PKT_BURST. */ 1746*d19533e8SHuawei Xie if (free_entries > MAX_PKT_BURST) 1747*d19533e8SHuawei Xie free_entries = MAX_PKT_BURST; 1748*d19533e8SHuawei Xie 1749*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries); 1750*d19533e8SHuawei Xie /* Retrieve all of the head indexes first to avoid caching issues. */ 1751*d19533e8SHuawei Xie for (i = 0; i < free_entries; i++) 1752*d19533e8SHuawei Xie head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; 1753*d19533e8SHuawei Xie 1754*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 1755*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success]]); 1756*d19533e8SHuawei Xie rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); 1757*d19533e8SHuawei Xie 1758*d19533e8SHuawei Xie while (packet_success < free_entries) { 1759*d19533e8SHuawei Xie desc = &vq->desc[head[packet_success]]; 1760*d19533e8SHuawei Xie 1761*d19533e8SHuawei Xie /* Discard first buffer as it is the virtio header */ 1762*d19533e8SHuawei Xie desc = &vq->desc[desc->next]; 1763*d19533e8SHuawei Xie 1764*d19533e8SHuawei Xie /* Buffer address translation. */ 1765*d19533e8SHuawei Xie buff_addr = gpa_to_vva(dev, desc->addr); 1766*d19533e8SHuawei Xie /* Prefetch buffer address. */ 1767*d19533e8SHuawei Xie rte_prefetch0((void*)(uintptr_t)buff_addr); 1768*d19533e8SHuawei Xie 1769*d19533e8SHuawei Xie used_idx = vq->last_used_idx & (vq->size - 1); 1770*d19533e8SHuawei Xie 1771*d19533e8SHuawei Xie if (packet_success < (free_entries - 1)) { 1772*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 1773*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success+1]]); 1774*d19533e8SHuawei Xie rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]); 1775*d19533e8SHuawei Xie } 1776*d19533e8SHuawei Xie 1777*d19533e8SHuawei Xie /* Update used index buffer information. */ 1778*d19533e8SHuawei Xie vq->used->ring[used_idx].id = head[packet_success]; 1779*d19533e8SHuawei Xie vq->used->ring[used_idx].len = 0; 1780*d19533e8SHuawei Xie 1781*d19533e8SHuawei Xie /* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */ 1782*d19533e8SHuawei Xie m.data_len = desc->len; 1783*d19533e8SHuawei Xie m.pkt_len = desc->len; 1784*d19533e8SHuawei Xie m.data_off = 0; 1785*d19533e8SHuawei Xie 1786*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0); 1787*d19533e8SHuawei Xie 1788*d19533e8SHuawei Xie /* If this is the first received packet we need to learn the MAC and setup VMDQ */ 1789*d19533e8SHuawei Xie if (dev->ready == DEVICE_MAC_LEARNING) { 1790*d19533e8SHuawei Xie if (dev->remove || (link_vmdq(dev, &m) == -1)) { 1791*d19533e8SHuawei Xie /*discard frame if device is scheduled for removal or a duplicate MAC address is found. */ 1792*d19533e8SHuawei Xie packet_success += free_entries; 1793*d19533e8SHuawei Xie vq->last_used_idx += packet_success; 1794*d19533e8SHuawei Xie break; 1795*d19533e8SHuawei Xie } 1796*d19533e8SHuawei Xie } 1797*d19533e8SHuawei Xie virtio_tx_route(dev, &m, mbuf_pool, (uint16_t)dev->device_fh); 1798*d19533e8SHuawei Xie 1799*d19533e8SHuawei Xie vq->last_used_idx++; 1800*d19533e8SHuawei Xie packet_success++; 1801*d19533e8SHuawei Xie } 1802*d19533e8SHuawei Xie 1803*d19533e8SHuawei Xie rte_compiler_barrier(); 1804*d19533e8SHuawei Xie vq->used->idx += packet_success; 1805*d19533e8SHuawei Xie /* Kick guest if required. */ 1806*d19533e8SHuawei Xie if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 1807*d19533e8SHuawei Xie eventfd_write((int)vq->kickfd, 1); 1808*d19533e8SHuawei Xie } 1809*d19533e8SHuawei Xie 1810*d19533e8SHuawei Xie /* This function works for TX packets with mergeable feature enabled. */ 1811*d19533e8SHuawei Xie static inline void __attribute__((always_inline)) 1812*d19533e8SHuawei Xie virtio_dev_merge_tx(struct virtio_net *dev, struct rte_mempool *mbuf_pool) 1813*d19533e8SHuawei Xie { 1814*d19533e8SHuawei Xie struct rte_mbuf *m, *prev; 1815*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 1816*d19533e8SHuawei Xie struct vring_desc *desc; 1817*d19533e8SHuawei Xie uint64_t vb_addr = 0; 1818*d19533e8SHuawei Xie uint32_t head[MAX_PKT_BURST]; 1819*d19533e8SHuawei Xie uint32_t used_idx; 1820*d19533e8SHuawei Xie uint32_t i; 1821*d19533e8SHuawei Xie uint16_t free_entries, entry_success = 0; 1822*d19533e8SHuawei Xie uint16_t avail_idx; 1823*d19533e8SHuawei Xie uint32_t buf_size = MBUF_SIZE - (sizeof(struct rte_mbuf) 1824*d19533e8SHuawei Xie + RTE_PKTMBUF_HEADROOM); 1825*d19533e8SHuawei Xie 1826*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_TXQ]; 1827*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 1828*d19533e8SHuawei Xie 1829*d19533e8SHuawei Xie /* If there are no available buffers then return. */ 1830*d19533e8SHuawei Xie if (vq->last_used_idx == avail_idx) 1831*d19533e8SHuawei Xie return; 1832*d19533e8SHuawei Xie 1833*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_tx()\n", 1834*d19533e8SHuawei Xie dev->device_fh); 1835*d19533e8SHuawei Xie 1836*d19533e8SHuawei Xie /* Prefetch available ring to retrieve head indexes. */ 1837*d19533e8SHuawei Xie rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]); 1838*d19533e8SHuawei Xie 1839*d19533e8SHuawei Xie /*get the number of free entries in the ring*/ 1840*d19533e8SHuawei Xie free_entries = (avail_idx - vq->last_used_idx); 1841*d19533e8SHuawei Xie 1842*d19533e8SHuawei Xie /* Limit to MAX_PKT_BURST. */ 1843*d19533e8SHuawei Xie free_entries = RTE_MIN(free_entries, MAX_PKT_BURST); 1844*d19533e8SHuawei Xie 1845*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", 1846*d19533e8SHuawei Xie dev->device_fh, free_entries); 1847*d19533e8SHuawei Xie /* Retrieve all of the head indexes first to avoid caching issues. */ 1848*d19533e8SHuawei Xie for (i = 0; i < free_entries; i++) 1849*d19533e8SHuawei Xie head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)]; 1850*d19533e8SHuawei Xie 1851*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 1852*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[entry_success]]); 1853*d19533e8SHuawei Xie rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); 1854*d19533e8SHuawei Xie 1855*d19533e8SHuawei Xie while (entry_success < free_entries) { 1856*d19533e8SHuawei Xie uint32_t vb_avail, vb_offset; 1857*d19533e8SHuawei Xie uint32_t seg_avail, seg_offset; 1858*d19533e8SHuawei Xie uint32_t cpy_len; 1859*d19533e8SHuawei Xie uint32_t seg_num = 0; 1860*d19533e8SHuawei Xie struct rte_mbuf *cur; 1861*d19533e8SHuawei Xie uint8_t alloc_err = 0; 1862*d19533e8SHuawei Xie 1863*d19533e8SHuawei Xie desc = &vq->desc[head[entry_success]]; 1864*d19533e8SHuawei Xie 1865*d19533e8SHuawei Xie /* Discard first buffer as it is the virtio header */ 1866*d19533e8SHuawei Xie desc = &vq->desc[desc->next]; 1867*d19533e8SHuawei Xie 1868*d19533e8SHuawei Xie /* Buffer address translation. */ 1869*d19533e8SHuawei Xie vb_addr = gpa_to_vva(dev, desc->addr); 1870*d19533e8SHuawei Xie /* Prefetch buffer address. */ 1871*d19533e8SHuawei Xie rte_prefetch0((void *)(uintptr_t)vb_addr); 1872*d19533e8SHuawei Xie 1873*d19533e8SHuawei Xie used_idx = vq->last_used_idx & (vq->size - 1); 1874*d19533e8SHuawei Xie 1875*d19533e8SHuawei Xie if (entry_success < (free_entries - 1)) { 1876*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 1877*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[entry_success+1]]); 1878*d19533e8SHuawei Xie rte_prefetch0(&vq->used->ring[(used_idx + 1) & (vq->size - 1)]); 1879*d19533e8SHuawei Xie } 1880*d19533e8SHuawei Xie 1881*d19533e8SHuawei Xie /* Update used index buffer information. */ 1882*d19533e8SHuawei Xie vq->used->ring[used_idx].id = head[entry_success]; 1883*d19533e8SHuawei Xie vq->used->ring[used_idx].len = 0; 1884*d19533e8SHuawei Xie 1885*d19533e8SHuawei Xie vb_offset = 0; 1886*d19533e8SHuawei Xie vb_avail = desc->len; 1887*d19533e8SHuawei Xie seg_offset = 0; 1888*d19533e8SHuawei Xie seg_avail = buf_size; 1889*d19533e8SHuawei Xie cpy_len = RTE_MIN(vb_avail, seg_avail); 1890*d19533e8SHuawei Xie 1891*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)vb_addr, desc->len, 0); 1892*d19533e8SHuawei Xie 1893*d19533e8SHuawei Xie /* Allocate an mbuf and populate the structure. */ 1894*d19533e8SHuawei Xie m = rte_pktmbuf_alloc(mbuf_pool); 1895*d19533e8SHuawei Xie if (unlikely(m == NULL)) { 1896*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, 1897*d19533e8SHuawei Xie "Failed to allocate memory for mbuf.\n"); 1898*d19533e8SHuawei Xie return; 1899*d19533e8SHuawei Xie } 1900*d19533e8SHuawei Xie 1901*d19533e8SHuawei Xie seg_num++; 1902*d19533e8SHuawei Xie cur = m; 1903*d19533e8SHuawei Xie prev = m; 1904*d19533e8SHuawei Xie while (cpy_len != 0) { 1905*d19533e8SHuawei Xie rte_memcpy((void *)(rte_pktmbuf_mtod(cur, char *) + seg_offset), 1906*d19533e8SHuawei Xie (void *)((uintptr_t)(vb_addr + vb_offset)), 1907*d19533e8SHuawei Xie cpy_len); 1908*d19533e8SHuawei Xie 1909*d19533e8SHuawei Xie seg_offset += cpy_len; 1910*d19533e8SHuawei Xie vb_offset += cpy_len; 1911*d19533e8SHuawei Xie vb_avail -= cpy_len; 1912*d19533e8SHuawei Xie seg_avail -= cpy_len; 1913*d19533e8SHuawei Xie 1914*d19533e8SHuawei Xie if (vb_avail != 0) { 1915*d19533e8SHuawei Xie /* 1916*d19533e8SHuawei Xie * The segment reachs to its end, 1917*d19533e8SHuawei Xie * while the virtio buffer in TX vring has 1918*d19533e8SHuawei Xie * more data to be copied. 1919*d19533e8SHuawei Xie */ 1920*d19533e8SHuawei Xie cur->data_len = seg_offset; 1921*d19533e8SHuawei Xie m->pkt_len += seg_offset; 1922*d19533e8SHuawei Xie /* Allocate mbuf and populate the structure. */ 1923*d19533e8SHuawei Xie cur = rte_pktmbuf_alloc(mbuf_pool); 1924*d19533e8SHuawei Xie if (unlikely(cur == NULL)) { 1925*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, "Failed to " 1926*d19533e8SHuawei Xie "allocate memory for mbuf.\n"); 1927*d19533e8SHuawei Xie rte_pktmbuf_free(m); 1928*d19533e8SHuawei Xie alloc_err = 1; 1929*d19533e8SHuawei Xie break; 1930*d19533e8SHuawei Xie } 1931*d19533e8SHuawei Xie 1932*d19533e8SHuawei Xie seg_num++; 1933*d19533e8SHuawei Xie prev->next = cur; 1934*d19533e8SHuawei Xie prev = cur; 1935*d19533e8SHuawei Xie seg_offset = 0; 1936*d19533e8SHuawei Xie seg_avail = buf_size; 1937*d19533e8SHuawei Xie } else { 1938*d19533e8SHuawei Xie if (desc->flags & VRING_DESC_F_NEXT) { 1939*d19533e8SHuawei Xie /* 1940*d19533e8SHuawei Xie * There are more virtio buffers in 1941*d19533e8SHuawei Xie * same vring entry need to be copied. 1942*d19533e8SHuawei Xie */ 1943*d19533e8SHuawei Xie if (seg_avail == 0) { 1944*d19533e8SHuawei Xie /* 1945*d19533e8SHuawei Xie * The current segment hasn't 1946*d19533e8SHuawei Xie * room to accomodate more 1947*d19533e8SHuawei Xie * data. 1948*d19533e8SHuawei Xie */ 1949*d19533e8SHuawei Xie cur->data_len = seg_offset; 1950*d19533e8SHuawei Xie m->pkt_len += seg_offset; 1951*d19533e8SHuawei Xie /* 1952*d19533e8SHuawei Xie * Allocate an mbuf and 1953*d19533e8SHuawei Xie * populate the structure. 1954*d19533e8SHuawei Xie */ 1955*d19533e8SHuawei Xie cur = rte_pktmbuf_alloc(mbuf_pool); 1956*d19533e8SHuawei Xie if (unlikely(cur == NULL)) { 1957*d19533e8SHuawei Xie RTE_LOG(ERR, 1958*d19533e8SHuawei Xie VHOST_DATA, 1959*d19533e8SHuawei Xie "Failed to " 1960*d19533e8SHuawei Xie "allocate memory " 1961*d19533e8SHuawei Xie "for mbuf\n"); 1962*d19533e8SHuawei Xie rte_pktmbuf_free(m); 1963*d19533e8SHuawei Xie alloc_err = 1; 1964*d19533e8SHuawei Xie break; 1965*d19533e8SHuawei Xie } 1966*d19533e8SHuawei Xie seg_num++; 1967*d19533e8SHuawei Xie prev->next = cur; 1968*d19533e8SHuawei Xie prev = cur; 1969*d19533e8SHuawei Xie seg_offset = 0; 1970*d19533e8SHuawei Xie seg_avail = buf_size; 1971*d19533e8SHuawei Xie } 1972*d19533e8SHuawei Xie 1973*d19533e8SHuawei Xie desc = &vq->desc[desc->next]; 1974*d19533e8SHuawei Xie 1975*d19533e8SHuawei Xie /* Buffer address translation. */ 1976*d19533e8SHuawei Xie vb_addr = gpa_to_vva(dev, desc->addr); 1977*d19533e8SHuawei Xie /* Prefetch buffer address. */ 1978*d19533e8SHuawei Xie rte_prefetch0((void *)(uintptr_t)vb_addr); 1979*d19533e8SHuawei Xie vb_offset = 0; 1980*d19533e8SHuawei Xie vb_avail = desc->len; 1981*d19533e8SHuawei Xie 1982*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)vb_addr, 1983*d19533e8SHuawei Xie desc->len, 0); 1984*d19533e8SHuawei Xie } else { 1985*d19533e8SHuawei Xie /* The whole packet completes. */ 1986*d19533e8SHuawei Xie cur->data_len = seg_offset; 1987*d19533e8SHuawei Xie m->pkt_len += seg_offset; 1988*d19533e8SHuawei Xie vb_avail = 0; 1989*d19533e8SHuawei Xie } 1990*d19533e8SHuawei Xie } 1991*d19533e8SHuawei Xie 1992*d19533e8SHuawei Xie cpy_len = RTE_MIN(vb_avail, seg_avail); 1993*d19533e8SHuawei Xie } 1994*d19533e8SHuawei Xie 1995*d19533e8SHuawei Xie if (unlikely(alloc_err == 1)) 1996*d19533e8SHuawei Xie break; 1997*d19533e8SHuawei Xie 1998*d19533e8SHuawei Xie m->nb_segs = seg_num; 1999*d19533e8SHuawei Xie 2000*d19533e8SHuawei Xie /* 2001*d19533e8SHuawei Xie * If this is the first received packet we need to learn 2002*d19533e8SHuawei Xie * the MAC and setup VMDQ 2003*d19533e8SHuawei Xie */ 2004*d19533e8SHuawei Xie if (dev->ready == DEVICE_MAC_LEARNING) { 2005*d19533e8SHuawei Xie if (dev->remove || (link_vmdq(dev, m) == -1)) { 2006*d19533e8SHuawei Xie /* 2007*d19533e8SHuawei Xie * Discard frame if device is scheduled for 2008*d19533e8SHuawei Xie * removal or a duplicate MAC address is found. 2009*d19533e8SHuawei Xie */ 2010*d19533e8SHuawei Xie entry_success = free_entries; 2011*d19533e8SHuawei Xie vq->last_used_idx += entry_success; 2012*d19533e8SHuawei Xie rte_pktmbuf_free(m); 2013*d19533e8SHuawei Xie break; 2014*d19533e8SHuawei Xie } 2015*d19533e8SHuawei Xie } 2016*d19533e8SHuawei Xie 2017*d19533e8SHuawei Xie virtio_tx_route(dev, m, mbuf_pool, (uint16_t)dev->device_fh); 2018*d19533e8SHuawei Xie vq->last_used_idx++; 2019*d19533e8SHuawei Xie entry_success++; 2020*d19533e8SHuawei Xie rte_pktmbuf_free(m); 2021*d19533e8SHuawei Xie } 2022*d19533e8SHuawei Xie 2023*d19533e8SHuawei Xie rte_compiler_barrier(); 2024*d19533e8SHuawei Xie vq->used->idx += entry_success; 2025*d19533e8SHuawei Xie /* Kick guest if required. */ 2026*d19533e8SHuawei Xie if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 2027*d19533e8SHuawei Xie eventfd_write((int)vq->kickfd, 1); 2028*d19533e8SHuawei Xie 2029*d19533e8SHuawei Xie } 2030*d19533e8SHuawei Xie 2031*d19533e8SHuawei Xie /* 2032*d19533e8SHuawei Xie * This function is called by each data core. It handles all RX/TX registered with the 2033*d19533e8SHuawei Xie * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared 2034*d19533e8SHuawei Xie * with all devices in the main linked list. 2035*d19533e8SHuawei Xie */ 2036*d19533e8SHuawei Xie static int 2037*d19533e8SHuawei Xie switch_worker(__attribute__((unused)) void *arg) 2038*d19533e8SHuawei Xie { 2039*d19533e8SHuawei Xie struct rte_mempool *mbuf_pool = arg; 2040*d19533e8SHuawei Xie struct virtio_net *dev = NULL; 2041*d19533e8SHuawei Xie struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2042*d19533e8SHuawei Xie struct virtio_net_data_ll *dev_ll; 2043*d19533e8SHuawei Xie struct mbuf_table *tx_q; 2044*d19533e8SHuawei Xie volatile struct lcore_ll_info *lcore_ll; 2045*d19533e8SHuawei Xie const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; 2046*d19533e8SHuawei Xie uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0; 2047*d19533e8SHuawei Xie unsigned ret, i; 2048*d19533e8SHuawei Xie const uint16_t lcore_id = rte_lcore_id(); 2049*d19533e8SHuawei Xie const uint16_t num_cores = (uint16_t)rte_lcore_count(); 2050*d19533e8SHuawei Xie uint16_t rx_count = 0; 2051*d19533e8SHuawei Xie uint32_t mergeable = 0; 2052*d19533e8SHuawei Xie 2053*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); 2054*d19533e8SHuawei Xie lcore_ll = lcore_info[lcore_id].lcore_ll; 2055*d19533e8SHuawei Xie prev_tsc = 0; 2056*d19533e8SHuawei Xie 2057*d19533e8SHuawei Xie tx_q = &lcore_tx_queue[lcore_id]; 2058*d19533e8SHuawei Xie for (i = 0; i < num_cores; i ++) { 2059*d19533e8SHuawei Xie if (lcore_ids[i] == lcore_id) { 2060*d19533e8SHuawei Xie tx_q->txq_id = i; 2061*d19533e8SHuawei Xie break; 2062*d19533e8SHuawei Xie } 2063*d19533e8SHuawei Xie } 2064*d19533e8SHuawei Xie 2065*d19533e8SHuawei Xie while(1) { 2066*d19533e8SHuawei Xie cur_tsc = rte_rdtsc(); 2067*d19533e8SHuawei Xie /* 2068*d19533e8SHuawei Xie * TX burst queue drain 2069*d19533e8SHuawei Xie */ 2070*d19533e8SHuawei Xie diff_tsc = cur_tsc - prev_tsc; 2071*d19533e8SHuawei Xie if (unlikely(diff_tsc > drain_tsc)) { 2072*d19533e8SHuawei Xie 2073*d19533e8SHuawei Xie if (tx_q->len) { 2074*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "TX queue drained after timeout with burst size %u \n", tx_q->len); 2075*d19533e8SHuawei Xie 2076*d19533e8SHuawei Xie /*Tx any packets in the queue*/ 2077*d19533e8SHuawei Xie ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, 2078*d19533e8SHuawei Xie (struct rte_mbuf **)tx_q->m_table, 2079*d19533e8SHuawei Xie (uint16_t)tx_q->len); 2080*d19533e8SHuawei Xie if (unlikely(ret < tx_q->len)) { 2081*d19533e8SHuawei Xie do { 2082*d19533e8SHuawei Xie rte_pktmbuf_free(tx_q->m_table[ret]); 2083*d19533e8SHuawei Xie } while (++ret < tx_q->len); 2084*d19533e8SHuawei Xie } 2085*d19533e8SHuawei Xie 2086*d19533e8SHuawei Xie tx_q->len = 0; 2087*d19533e8SHuawei Xie } 2088*d19533e8SHuawei Xie 2089*d19533e8SHuawei Xie prev_tsc = cur_tsc; 2090*d19533e8SHuawei Xie 2091*d19533e8SHuawei Xie } 2092*d19533e8SHuawei Xie 2093*d19533e8SHuawei Xie rte_prefetch0(lcore_ll->ll_root_used); 2094*d19533e8SHuawei Xie /* 2095*d19533e8SHuawei Xie * Inform the configuration core that we have exited the linked list and that no devices are 2096*d19533e8SHuawei Xie * in use if requested. 2097*d19533e8SHuawei Xie */ 2098*d19533e8SHuawei Xie if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL) 2099*d19533e8SHuawei Xie lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; 2100*d19533e8SHuawei Xie 2101*d19533e8SHuawei Xie /* 2102*d19533e8SHuawei Xie * Process devices 2103*d19533e8SHuawei Xie */ 2104*d19533e8SHuawei Xie dev_ll = lcore_ll->ll_root_used; 2105*d19533e8SHuawei Xie 2106*d19533e8SHuawei Xie while (dev_ll != NULL) { 2107*d19533e8SHuawei Xie /*get virtio device ID*/ 2108*d19533e8SHuawei Xie dev = dev_ll->dev; 2109*d19533e8SHuawei Xie mergeable = 2110*d19533e8SHuawei Xie dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF); 2111*d19533e8SHuawei Xie 2112*d19533e8SHuawei Xie if (dev->remove) { 2113*d19533e8SHuawei Xie dev_ll = dev_ll->next; 2114*d19533e8SHuawei Xie unlink_vmdq(dev); 2115*d19533e8SHuawei Xie dev->ready = DEVICE_SAFE_REMOVE; 2116*d19533e8SHuawei Xie continue; 2117*d19533e8SHuawei Xie } 2118*d19533e8SHuawei Xie if (likely(dev->ready == DEVICE_RX)) { 2119*d19533e8SHuawei Xie /*Handle guest RX*/ 2120*d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 2121*d19533e8SHuawei Xie (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST); 2122*d19533e8SHuawei Xie 2123*d19533e8SHuawei Xie if (rx_count) { 2124*d19533e8SHuawei Xie if (likely(mergeable == 0)) 2125*d19533e8SHuawei Xie ret_count = 2126*d19533e8SHuawei Xie virtio_dev_rx(dev, 2127*d19533e8SHuawei Xie pkts_burst, rx_count); 2128*d19533e8SHuawei Xie else 2129*d19533e8SHuawei Xie ret_count = 2130*d19533e8SHuawei Xie virtio_dev_merge_rx(dev, 2131*d19533e8SHuawei Xie pkts_burst, rx_count); 2132*d19533e8SHuawei Xie 2133*d19533e8SHuawei Xie if (enable_stats) { 2134*d19533e8SHuawei Xie rte_atomic64_add( 2135*d19533e8SHuawei Xie &dev_statistics[dev_ll->dev->device_fh].rx_total_atomic, 2136*d19533e8SHuawei Xie rx_count); 2137*d19533e8SHuawei Xie rte_atomic64_add( 2138*d19533e8SHuawei Xie &dev_statistics[dev_ll->dev->device_fh].rx_atomic, ret_count); 2139*d19533e8SHuawei Xie } 2140*d19533e8SHuawei Xie while (likely(rx_count)) { 2141*d19533e8SHuawei Xie rx_count--; 2142*d19533e8SHuawei Xie rte_pktmbuf_free(pkts_burst[rx_count]); 2143*d19533e8SHuawei Xie } 2144*d19533e8SHuawei Xie 2145*d19533e8SHuawei Xie } 2146*d19533e8SHuawei Xie } 2147*d19533e8SHuawei Xie 2148*d19533e8SHuawei Xie if (!dev->remove) { 2149*d19533e8SHuawei Xie /*Handle guest TX*/ 2150*d19533e8SHuawei Xie if (likely(mergeable == 0)) 2151*d19533e8SHuawei Xie virtio_dev_tx(dev, mbuf_pool); 2152*d19533e8SHuawei Xie else 2153*d19533e8SHuawei Xie virtio_dev_merge_tx(dev, mbuf_pool); 2154*d19533e8SHuawei Xie } 2155*d19533e8SHuawei Xie 2156*d19533e8SHuawei Xie /*move to the next device in the list*/ 2157*d19533e8SHuawei Xie dev_ll = dev_ll->next; 2158*d19533e8SHuawei Xie } 2159*d19533e8SHuawei Xie } 2160*d19533e8SHuawei Xie 2161*d19533e8SHuawei Xie return 0; 2162*d19533e8SHuawei Xie } 2163*d19533e8SHuawei Xie 2164*d19533e8SHuawei Xie /* 2165*d19533e8SHuawei Xie * This function gets available ring number for zero copy rx. 2166*d19533e8SHuawei Xie * Only one thread will call this funciton for a paticular virtio device, 2167*d19533e8SHuawei Xie * so, it is designed as non-thread-safe function. 2168*d19533e8SHuawei Xie */ 2169*d19533e8SHuawei Xie static inline uint32_t __attribute__((always_inline)) 2170*d19533e8SHuawei Xie get_available_ring_num_zcp(struct virtio_net *dev) 2171*d19533e8SHuawei Xie { 2172*d19533e8SHuawei Xie struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ]; 2173*d19533e8SHuawei Xie uint16_t avail_idx; 2174*d19533e8SHuawei Xie 2175*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 2176*d19533e8SHuawei Xie return (uint32_t)(avail_idx - vq->last_used_idx_res); 2177*d19533e8SHuawei Xie } 2178*d19533e8SHuawei Xie 2179*d19533e8SHuawei Xie /* 2180*d19533e8SHuawei Xie * This function gets available ring index for zero copy rx, 2181*d19533e8SHuawei Xie * it will retry 'burst_rx_retry_num' times till it get enough ring index. 2182*d19533e8SHuawei Xie * Only one thread will call this funciton for a paticular virtio device, 2183*d19533e8SHuawei Xie * so, it is designed as non-thread-safe function. 2184*d19533e8SHuawei Xie */ 2185*d19533e8SHuawei Xie static inline uint32_t __attribute__((always_inline)) 2186*d19533e8SHuawei Xie get_available_ring_index_zcp(struct virtio_net *dev, 2187*d19533e8SHuawei Xie uint16_t *res_base_idx, uint32_t count) 2188*d19533e8SHuawei Xie { 2189*d19533e8SHuawei Xie struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ]; 2190*d19533e8SHuawei Xie uint16_t avail_idx; 2191*d19533e8SHuawei Xie uint32_t retry = 0; 2192*d19533e8SHuawei Xie uint16_t free_entries; 2193*d19533e8SHuawei Xie 2194*d19533e8SHuawei Xie *res_base_idx = vq->last_used_idx_res; 2195*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 2196*d19533e8SHuawei Xie free_entries = (avail_idx - *res_base_idx); 2197*d19533e8SHuawei Xie 2198*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") in get_available_ring_index_zcp: " 2199*d19533e8SHuawei Xie "avail idx: %d, " 2200*d19533e8SHuawei Xie "res base idx:%d, free entries:%d\n", 2201*d19533e8SHuawei Xie dev->device_fh, avail_idx, *res_base_idx, 2202*d19533e8SHuawei Xie free_entries); 2203*d19533e8SHuawei Xie 2204*d19533e8SHuawei Xie /* 2205*d19533e8SHuawei Xie * If retry is enabled and the queue is full then we wait 2206*d19533e8SHuawei Xie * and retry to avoid packet loss. 2207*d19533e8SHuawei Xie */ 2208*d19533e8SHuawei Xie if (enable_retry && unlikely(count > free_entries)) { 2209*d19533e8SHuawei Xie for (retry = 0; retry < burst_rx_retry_num; retry++) { 2210*d19533e8SHuawei Xie rte_delay_us(burst_rx_delay_time); 2211*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 2212*d19533e8SHuawei Xie free_entries = (avail_idx - *res_base_idx); 2213*d19533e8SHuawei Xie if (count <= free_entries) 2214*d19533e8SHuawei Xie break; 2215*d19533e8SHuawei Xie } 2216*d19533e8SHuawei Xie } 2217*d19533e8SHuawei Xie 2218*d19533e8SHuawei Xie /*check that we have enough buffers*/ 2219*d19533e8SHuawei Xie if (unlikely(count > free_entries)) 2220*d19533e8SHuawei Xie count = free_entries; 2221*d19533e8SHuawei Xie 2222*d19533e8SHuawei Xie if (unlikely(count == 0)) { 2223*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2224*d19533e8SHuawei Xie "(%"PRIu64") Fail in get_available_ring_index_zcp: " 2225*d19533e8SHuawei Xie "avail idx: %d, res base idx:%d, free entries:%d\n", 2226*d19533e8SHuawei Xie dev->device_fh, avail_idx, 2227*d19533e8SHuawei Xie *res_base_idx, free_entries); 2228*d19533e8SHuawei Xie return 0; 2229*d19533e8SHuawei Xie } 2230*d19533e8SHuawei Xie 2231*d19533e8SHuawei Xie vq->last_used_idx_res = *res_base_idx + count; 2232*d19533e8SHuawei Xie 2233*d19533e8SHuawei Xie return count; 2234*d19533e8SHuawei Xie } 2235*d19533e8SHuawei Xie 2236*d19533e8SHuawei Xie /* 2237*d19533e8SHuawei Xie * This function put descriptor back to used list. 2238*d19533e8SHuawei Xie */ 2239*d19533e8SHuawei Xie static inline void __attribute__((always_inline)) 2240*d19533e8SHuawei Xie put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx) 2241*d19533e8SHuawei Xie { 2242*d19533e8SHuawei Xie uint16_t res_cur_idx = vq->last_used_idx; 2243*d19533e8SHuawei Xie vq->used->ring[res_cur_idx & (vq->size - 1)].id = (uint32_t)desc_idx; 2244*d19533e8SHuawei Xie vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0; 2245*d19533e8SHuawei Xie rte_compiler_barrier(); 2246*d19533e8SHuawei Xie *(volatile uint16_t *)&vq->used->idx += 1; 2247*d19533e8SHuawei Xie vq->last_used_idx += 1; 2248*d19533e8SHuawei Xie 2249*d19533e8SHuawei Xie /* Kick the guest if necessary. */ 2250*d19533e8SHuawei Xie if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 2251*d19533e8SHuawei Xie eventfd_write((int)vq->kickfd, 1); 2252*d19533e8SHuawei Xie } 2253*d19533e8SHuawei Xie 2254*d19533e8SHuawei Xie /* 2255*d19533e8SHuawei Xie * This function get available descriptor from vitio vring and un-attached mbuf 2256*d19533e8SHuawei Xie * from vpool->ring, and then attach them together. It needs adjust the offset 2257*d19533e8SHuawei Xie * for buff_addr and phys_addr accroding to PMD implementation, otherwise the 2258*d19533e8SHuawei Xie * frame data may be put to wrong location in mbuf. 2259*d19533e8SHuawei Xie */ 2260*d19533e8SHuawei Xie static inline void __attribute__((always_inline)) 2261*d19533e8SHuawei Xie attach_rxmbuf_zcp(struct virtio_net *dev) 2262*d19533e8SHuawei Xie { 2263*d19533e8SHuawei Xie uint16_t res_base_idx, desc_idx; 2264*d19533e8SHuawei Xie uint64_t buff_addr, phys_addr; 2265*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 2266*d19533e8SHuawei Xie struct vring_desc *desc; 2267*d19533e8SHuawei Xie struct rte_mbuf *mbuf = NULL; 2268*d19533e8SHuawei Xie struct vpool *vpool; 2269*d19533e8SHuawei Xie hpa_type addr_type; 2270*d19533e8SHuawei Xie 2271*d19533e8SHuawei Xie vpool = &vpool_array[dev->vmdq_rx_q]; 2272*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_RXQ]; 2273*d19533e8SHuawei Xie 2274*d19533e8SHuawei Xie do { 2275*d19533e8SHuawei Xie if (unlikely(get_available_ring_index_zcp(dev, &res_base_idx, 2276*d19533e8SHuawei Xie 1) != 1)) 2277*d19533e8SHuawei Xie return; 2278*d19533e8SHuawei Xie desc_idx = vq->avail->ring[(res_base_idx) & (vq->size - 1)]; 2279*d19533e8SHuawei Xie 2280*d19533e8SHuawei Xie desc = &vq->desc[desc_idx]; 2281*d19533e8SHuawei Xie if (desc->flags & VRING_DESC_F_NEXT) { 2282*d19533e8SHuawei Xie desc = &vq->desc[desc->next]; 2283*d19533e8SHuawei Xie buff_addr = gpa_to_vva(dev, desc->addr); 2284*d19533e8SHuawei Xie phys_addr = gpa_to_hpa(dev, desc->addr, desc->len, 2285*d19533e8SHuawei Xie &addr_type); 2286*d19533e8SHuawei Xie } else { 2287*d19533e8SHuawei Xie buff_addr = gpa_to_vva(dev, 2288*d19533e8SHuawei Xie desc->addr + vq->vhost_hlen); 2289*d19533e8SHuawei Xie phys_addr = gpa_to_hpa(dev, 2290*d19533e8SHuawei Xie desc->addr + vq->vhost_hlen, 2291*d19533e8SHuawei Xie desc->len, &addr_type); 2292*d19533e8SHuawei Xie } 2293*d19533e8SHuawei Xie 2294*d19533e8SHuawei Xie if (unlikely(addr_type == PHYS_ADDR_INVALID)) { 2295*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Invalid frame buffer" 2296*d19533e8SHuawei Xie " address found when attaching RX frame buffer" 2297*d19533e8SHuawei Xie " address!\n", dev->device_fh); 2298*d19533e8SHuawei Xie put_desc_to_used_list_zcp(vq, desc_idx); 2299*d19533e8SHuawei Xie continue; 2300*d19533e8SHuawei Xie } 2301*d19533e8SHuawei Xie 2302*d19533e8SHuawei Xie /* 2303*d19533e8SHuawei Xie * Check if the frame buffer address from guest crosses 2304*d19533e8SHuawei Xie * sub-region or not. 2305*d19533e8SHuawei Xie */ 2306*d19533e8SHuawei Xie if (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) { 2307*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, 2308*d19533e8SHuawei Xie "(%"PRIu64") Frame buffer address cross " 2309*d19533e8SHuawei Xie "sub-regioin found when attaching RX frame " 2310*d19533e8SHuawei Xie "buffer address!\n", 2311*d19533e8SHuawei Xie dev->device_fh); 2312*d19533e8SHuawei Xie put_desc_to_used_list_zcp(vq, desc_idx); 2313*d19533e8SHuawei Xie continue; 2314*d19533e8SHuawei Xie } 2315*d19533e8SHuawei Xie } while (unlikely(phys_addr == 0)); 2316*d19533e8SHuawei Xie 2317*d19533e8SHuawei Xie rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); 2318*d19533e8SHuawei Xie if (unlikely(mbuf == NULL)) { 2319*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2320*d19533e8SHuawei Xie "(%"PRIu64") in attach_rxmbuf_zcp: " 2321*d19533e8SHuawei Xie "ring_sc_dequeue fail.\n", 2322*d19533e8SHuawei Xie dev->device_fh); 2323*d19533e8SHuawei Xie put_desc_to_used_list_zcp(vq, desc_idx); 2324*d19533e8SHuawei Xie return; 2325*d19533e8SHuawei Xie } 2326*d19533e8SHuawei Xie 2327*d19533e8SHuawei Xie if (unlikely(vpool->buf_size > desc->len)) { 2328*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2329*d19533e8SHuawei Xie "(%"PRIu64") in attach_rxmbuf_zcp: frame buffer " 2330*d19533e8SHuawei Xie "length(%d) of descriptor idx: %d less than room " 2331*d19533e8SHuawei Xie "size required: %d\n", 2332*d19533e8SHuawei Xie dev->device_fh, desc->len, desc_idx, vpool->buf_size); 2333*d19533e8SHuawei Xie put_desc_to_used_list_zcp(vq, desc_idx); 2334*d19533e8SHuawei Xie rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); 2335*d19533e8SHuawei Xie return; 2336*d19533e8SHuawei Xie } 2337*d19533e8SHuawei Xie 2338*d19533e8SHuawei Xie mbuf->buf_addr = (void *)(uintptr_t)(buff_addr - RTE_PKTMBUF_HEADROOM); 2339*d19533e8SHuawei Xie mbuf->data_off = RTE_PKTMBUF_HEADROOM; 2340*d19533e8SHuawei Xie mbuf->buf_physaddr = phys_addr - RTE_PKTMBUF_HEADROOM; 2341*d19533e8SHuawei Xie mbuf->data_len = desc->len; 2342*d19533e8SHuawei Xie MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx; 2343*d19533e8SHuawei Xie 2344*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2345*d19533e8SHuawei Xie "(%"PRIu64") in attach_rxmbuf_zcp: res base idx:%d, " 2346*d19533e8SHuawei Xie "descriptor idx:%d\n", 2347*d19533e8SHuawei Xie dev->device_fh, res_base_idx, desc_idx); 2348*d19533e8SHuawei Xie 2349*d19533e8SHuawei Xie __rte_mbuf_raw_free(mbuf); 2350*d19533e8SHuawei Xie 2351*d19533e8SHuawei Xie return; 2352*d19533e8SHuawei Xie } 2353*d19533e8SHuawei Xie 2354*d19533e8SHuawei Xie /* 2355*d19533e8SHuawei Xie * Detach an attched packet mbuf - 2356*d19533e8SHuawei Xie * - restore original mbuf address and length values. 2357*d19533e8SHuawei Xie * - reset pktmbuf data and data_len to their default values. 2358*d19533e8SHuawei Xie * All other fields of the given packet mbuf will be left intact. 2359*d19533e8SHuawei Xie * 2360*d19533e8SHuawei Xie * @param m 2361*d19533e8SHuawei Xie * The attached packet mbuf. 2362*d19533e8SHuawei Xie */ 2363*d19533e8SHuawei Xie static inline void pktmbuf_detach_zcp(struct rte_mbuf *m) 2364*d19533e8SHuawei Xie { 2365*d19533e8SHuawei Xie const struct rte_mempool *mp = m->pool; 2366*d19533e8SHuawei Xie void *buf = RTE_MBUF_TO_BADDR(m); 2367*d19533e8SHuawei Xie uint32_t buf_ofs; 2368*d19533e8SHuawei Xie uint32_t buf_len = mp->elt_size - sizeof(*m); 2369*d19533e8SHuawei Xie m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m); 2370*d19533e8SHuawei Xie 2371*d19533e8SHuawei Xie m->buf_addr = buf; 2372*d19533e8SHuawei Xie m->buf_len = (uint16_t)buf_len; 2373*d19533e8SHuawei Xie 2374*d19533e8SHuawei Xie buf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? 2375*d19533e8SHuawei Xie RTE_PKTMBUF_HEADROOM : m->buf_len; 2376*d19533e8SHuawei Xie m->data_off = buf_ofs; 2377*d19533e8SHuawei Xie 2378*d19533e8SHuawei Xie m->data_len = 0; 2379*d19533e8SHuawei Xie } 2380*d19533e8SHuawei Xie 2381*d19533e8SHuawei Xie /* 2382*d19533e8SHuawei Xie * This function is called after packets have been transimited. It fetchs mbuf 2383*d19533e8SHuawei Xie * from vpool->pool, detached it and put into vpool->ring. It also update the 2384*d19533e8SHuawei Xie * used index and kick the guest if necessary. 2385*d19533e8SHuawei Xie */ 2386*d19533e8SHuawei Xie static inline uint32_t __attribute__((always_inline)) 2387*d19533e8SHuawei Xie txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool) 2388*d19533e8SHuawei Xie { 2389*d19533e8SHuawei Xie struct rte_mbuf *mbuf; 2390*d19533e8SHuawei Xie struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; 2391*d19533e8SHuawei Xie uint32_t used_idx = vq->last_used_idx & (vq->size - 1); 2392*d19533e8SHuawei Xie uint32_t index = 0; 2393*d19533e8SHuawei Xie uint32_t mbuf_count = rte_mempool_count(vpool->pool); 2394*d19533e8SHuawei Xie 2395*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2396*d19533e8SHuawei Xie "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool before " 2397*d19533e8SHuawei Xie "clean is: %d\n", 2398*d19533e8SHuawei Xie dev->device_fh, mbuf_count); 2399*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2400*d19533e8SHuawei Xie "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring before " 2401*d19533e8SHuawei Xie "clean is : %d\n", 2402*d19533e8SHuawei Xie dev->device_fh, rte_ring_count(vpool->ring)); 2403*d19533e8SHuawei Xie 2404*d19533e8SHuawei Xie for (index = 0; index < mbuf_count; index++) { 2405*d19533e8SHuawei Xie mbuf = __rte_mbuf_raw_alloc(vpool->pool); 2406*d19533e8SHuawei Xie if (likely(RTE_MBUF_INDIRECT(mbuf))) 2407*d19533e8SHuawei Xie pktmbuf_detach_zcp(mbuf); 2408*d19533e8SHuawei Xie rte_ring_sp_enqueue(vpool->ring, mbuf); 2409*d19533e8SHuawei Xie 2410*d19533e8SHuawei Xie /* Update used index buffer information. */ 2411*d19533e8SHuawei Xie vq->used->ring[used_idx].id = MBUF_HEADROOM_UINT32(mbuf); 2412*d19533e8SHuawei Xie vq->used->ring[used_idx].len = 0; 2413*d19533e8SHuawei Xie 2414*d19533e8SHuawei Xie used_idx = (used_idx + 1) & (vq->size - 1); 2415*d19533e8SHuawei Xie } 2416*d19533e8SHuawei Xie 2417*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2418*d19533e8SHuawei Xie "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool after " 2419*d19533e8SHuawei Xie "clean is: %d\n", 2420*d19533e8SHuawei Xie dev->device_fh, rte_mempool_count(vpool->pool)); 2421*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2422*d19533e8SHuawei Xie "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring after " 2423*d19533e8SHuawei Xie "clean is : %d\n", 2424*d19533e8SHuawei Xie dev->device_fh, rte_ring_count(vpool->ring)); 2425*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2426*d19533e8SHuawei Xie "(%"PRIu64") in txmbuf_clean_zcp: before updated " 2427*d19533e8SHuawei Xie "vq->last_used_idx:%d\n", 2428*d19533e8SHuawei Xie dev->device_fh, vq->last_used_idx); 2429*d19533e8SHuawei Xie 2430*d19533e8SHuawei Xie vq->last_used_idx += mbuf_count; 2431*d19533e8SHuawei Xie 2432*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2433*d19533e8SHuawei Xie "(%"PRIu64") in txmbuf_clean_zcp: after updated " 2434*d19533e8SHuawei Xie "vq->last_used_idx:%d\n", 2435*d19533e8SHuawei Xie dev->device_fh, vq->last_used_idx); 2436*d19533e8SHuawei Xie 2437*d19533e8SHuawei Xie rte_compiler_barrier(); 2438*d19533e8SHuawei Xie 2439*d19533e8SHuawei Xie *(volatile uint16_t *)&vq->used->idx += mbuf_count; 2440*d19533e8SHuawei Xie 2441*d19533e8SHuawei Xie /* Kick guest if required. */ 2442*d19533e8SHuawei Xie if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 2443*d19533e8SHuawei Xie eventfd_write((int)vq->kickfd, 1); 2444*d19533e8SHuawei Xie 2445*d19533e8SHuawei Xie return 0; 2446*d19533e8SHuawei Xie } 2447*d19533e8SHuawei Xie 2448*d19533e8SHuawei Xie /* 2449*d19533e8SHuawei Xie * This function is called when a virtio device is destroy. 2450*d19533e8SHuawei Xie * It fetchs mbuf from vpool->pool, and detached it, and put into vpool->ring. 2451*d19533e8SHuawei Xie */ 2452*d19533e8SHuawei Xie static void mbuf_destroy_zcp(struct vpool *vpool) 2453*d19533e8SHuawei Xie { 2454*d19533e8SHuawei Xie struct rte_mbuf *mbuf = NULL; 2455*d19533e8SHuawei Xie uint32_t index, mbuf_count = rte_mempool_count(vpool->pool); 2456*d19533e8SHuawei Xie 2457*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 2458*d19533e8SHuawei Xie "in mbuf_destroy_zcp: mbuf count in mempool before " 2459*d19533e8SHuawei Xie "mbuf_destroy_zcp is: %d\n", 2460*d19533e8SHuawei Xie mbuf_count); 2461*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 2462*d19533e8SHuawei Xie "in mbuf_destroy_zcp: mbuf count in ring before " 2463*d19533e8SHuawei Xie "mbuf_destroy_zcp is : %d\n", 2464*d19533e8SHuawei Xie rte_ring_count(vpool->ring)); 2465*d19533e8SHuawei Xie 2466*d19533e8SHuawei Xie for (index = 0; index < mbuf_count; index++) { 2467*d19533e8SHuawei Xie mbuf = __rte_mbuf_raw_alloc(vpool->pool); 2468*d19533e8SHuawei Xie if (likely(mbuf != NULL)) { 2469*d19533e8SHuawei Xie if (likely(RTE_MBUF_INDIRECT(mbuf))) 2470*d19533e8SHuawei Xie pktmbuf_detach_zcp(mbuf); 2471*d19533e8SHuawei Xie rte_ring_sp_enqueue(vpool->ring, (void *)mbuf); 2472*d19533e8SHuawei Xie } 2473*d19533e8SHuawei Xie } 2474*d19533e8SHuawei Xie 2475*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 2476*d19533e8SHuawei Xie "in mbuf_destroy_zcp: mbuf count in mempool after " 2477*d19533e8SHuawei Xie "mbuf_destroy_zcp is: %d\n", 2478*d19533e8SHuawei Xie rte_mempool_count(vpool->pool)); 2479*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 2480*d19533e8SHuawei Xie "in mbuf_destroy_zcp: mbuf count in ring after " 2481*d19533e8SHuawei Xie "mbuf_destroy_zcp is : %d\n", 2482*d19533e8SHuawei Xie rte_ring_count(vpool->ring)); 2483*d19533e8SHuawei Xie } 2484*d19533e8SHuawei Xie 2485*d19533e8SHuawei Xie /* 2486*d19533e8SHuawei Xie * This function update the use flag and counter. 2487*d19533e8SHuawei Xie */ 2488*d19533e8SHuawei Xie static inline uint32_t __attribute__((always_inline)) 2489*d19533e8SHuawei Xie virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts, 2490*d19533e8SHuawei Xie uint32_t count) 2491*d19533e8SHuawei Xie { 2492*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 2493*d19533e8SHuawei Xie struct vring_desc *desc; 2494*d19533e8SHuawei Xie struct rte_mbuf *buff; 2495*d19533e8SHuawei Xie /* The virtio_hdr is initialised to 0. */ 2496*d19533e8SHuawei Xie struct virtio_net_hdr_mrg_rxbuf virtio_hdr 2497*d19533e8SHuawei Xie = {{0, 0, 0, 0, 0, 0}, 0}; 2498*d19533e8SHuawei Xie uint64_t buff_hdr_addr = 0; 2499*d19533e8SHuawei Xie uint32_t head[MAX_PKT_BURST], packet_len = 0; 2500*d19533e8SHuawei Xie uint32_t head_idx, packet_success = 0; 2501*d19533e8SHuawei Xie uint16_t res_cur_idx; 2502*d19533e8SHuawei Xie 2503*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); 2504*d19533e8SHuawei Xie 2505*d19533e8SHuawei Xie if (count == 0) 2506*d19533e8SHuawei Xie return 0; 2507*d19533e8SHuawei Xie 2508*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_RXQ]; 2509*d19533e8SHuawei Xie count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count; 2510*d19533e8SHuawei Xie 2511*d19533e8SHuawei Xie res_cur_idx = vq->last_used_idx; 2512*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", 2513*d19533e8SHuawei Xie dev->device_fh, res_cur_idx, res_cur_idx + count); 2514*d19533e8SHuawei Xie 2515*d19533e8SHuawei Xie /* Retrieve all of the head indexes first to avoid caching issues. */ 2516*d19533e8SHuawei Xie for (head_idx = 0; head_idx < count; head_idx++) 2517*d19533e8SHuawei Xie head[head_idx] = MBUF_HEADROOM_UINT32(pkts[head_idx]); 2518*d19533e8SHuawei Xie 2519*d19533e8SHuawei Xie /*Prefetch descriptor index. */ 2520*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success]]); 2521*d19533e8SHuawei Xie 2522*d19533e8SHuawei Xie while (packet_success != count) { 2523*d19533e8SHuawei Xie /* Get descriptor from available ring */ 2524*d19533e8SHuawei Xie desc = &vq->desc[head[packet_success]]; 2525*d19533e8SHuawei Xie 2526*d19533e8SHuawei Xie buff = pkts[packet_success]; 2527*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2528*d19533e8SHuawei Xie "(%"PRIu64") in dev_rx_zcp: update the used idx for " 2529*d19533e8SHuawei Xie "pkt[%d] descriptor idx: %d\n", 2530*d19533e8SHuawei Xie dev->device_fh, packet_success, 2531*d19533e8SHuawei Xie MBUF_HEADROOM_UINT32(buff)); 2532*d19533e8SHuawei Xie 2533*d19533e8SHuawei Xie PRINT_PACKET(dev, 2534*d19533e8SHuawei Xie (uintptr_t)(((uint64_t)(uintptr_t)buff->buf_addr) 2535*d19533e8SHuawei Xie + RTE_PKTMBUF_HEADROOM), 2536*d19533e8SHuawei Xie rte_pktmbuf_data_len(buff), 0); 2537*d19533e8SHuawei Xie 2538*d19533e8SHuawei Xie /* Buffer address translation for virtio header. */ 2539*d19533e8SHuawei Xie buff_hdr_addr = gpa_to_vva(dev, desc->addr); 2540*d19533e8SHuawei Xie packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen; 2541*d19533e8SHuawei Xie 2542*d19533e8SHuawei Xie /* 2543*d19533e8SHuawei Xie * If the descriptors are chained the header and data are 2544*d19533e8SHuawei Xie * placed in separate buffers. 2545*d19533e8SHuawei Xie */ 2546*d19533e8SHuawei Xie if (desc->flags & VRING_DESC_F_NEXT) { 2547*d19533e8SHuawei Xie desc->len = vq->vhost_hlen; 2548*d19533e8SHuawei Xie desc = &vq->desc[desc->next]; 2549*d19533e8SHuawei Xie desc->len = rte_pktmbuf_data_len(buff); 2550*d19533e8SHuawei Xie } else { 2551*d19533e8SHuawei Xie desc->len = packet_len; 2552*d19533e8SHuawei Xie } 2553*d19533e8SHuawei Xie 2554*d19533e8SHuawei Xie /* Update used ring with desc information */ 2555*d19533e8SHuawei Xie vq->used->ring[res_cur_idx & (vq->size - 1)].id 2556*d19533e8SHuawei Xie = head[packet_success]; 2557*d19533e8SHuawei Xie vq->used->ring[res_cur_idx & (vq->size - 1)].len 2558*d19533e8SHuawei Xie = packet_len; 2559*d19533e8SHuawei Xie res_cur_idx++; 2560*d19533e8SHuawei Xie packet_success++; 2561*d19533e8SHuawei Xie 2562*d19533e8SHuawei Xie /* A header is required per buffer. */ 2563*d19533e8SHuawei Xie rte_memcpy((void *)(uintptr_t)buff_hdr_addr, 2564*d19533e8SHuawei Xie (const void *)&virtio_hdr, vq->vhost_hlen); 2565*d19533e8SHuawei Xie 2566*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1); 2567*d19533e8SHuawei Xie 2568*d19533e8SHuawei Xie if (likely(packet_success < count)) { 2569*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 2570*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success]]); 2571*d19533e8SHuawei Xie } 2572*d19533e8SHuawei Xie } 2573*d19533e8SHuawei Xie 2574*d19533e8SHuawei Xie rte_compiler_barrier(); 2575*d19533e8SHuawei Xie 2576*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2577*d19533e8SHuawei Xie "(%"PRIu64") in dev_rx_zcp: before update used idx: " 2578*d19533e8SHuawei Xie "vq.last_used_idx: %d, vq->used->idx: %d\n", 2579*d19533e8SHuawei Xie dev->device_fh, vq->last_used_idx, vq->used->idx); 2580*d19533e8SHuawei Xie 2581*d19533e8SHuawei Xie *(volatile uint16_t *)&vq->used->idx += count; 2582*d19533e8SHuawei Xie vq->last_used_idx += count; 2583*d19533e8SHuawei Xie 2584*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2585*d19533e8SHuawei Xie "(%"PRIu64") in dev_rx_zcp: after update used idx: " 2586*d19533e8SHuawei Xie "vq.last_used_idx: %d, vq->used->idx: %d\n", 2587*d19533e8SHuawei Xie dev->device_fh, vq->last_used_idx, vq->used->idx); 2588*d19533e8SHuawei Xie 2589*d19533e8SHuawei Xie /* Kick the guest if necessary. */ 2590*d19533e8SHuawei Xie if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) 2591*d19533e8SHuawei Xie eventfd_write((int)vq->kickfd, 1); 2592*d19533e8SHuawei Xie 2593*d19533e8SHuawei Xie return count; 2594*d19533e8SHuawei Xie } 2595*d19533e8SHuawei Xie 2596*d19533e8SHuawei Xie /* 2597*d19533e8SHuawei Xie * This function routes the TX packet to the correct interface. 2598*d19533e8SHuawei Xie * This may be a local device or the physical port. 2599*d19533e8SHuawei Xie */ 2600*d19533e8SHuawei Xie static inline void __attribute__((always_inline)) 2601*d19533e8SHuawei Xie virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m, 2602*d19533e8SHuawei Xie uint32_t desc_idx, uint8_t need_copy) 2603*d19533e8SHuawei Xie { 2604*d19533e8SHuawei Xie struct mbuf_table *tx_q; 2605*d19533e8SHuawei Xie struct rte_mbuf **m_table; 2606*d19533e8SHuawei Xie struct rte_mbuf *mbuf = NULL; 2607*d19533e8SHuawei Xie unsigned len, ret, offset = 0; 2608*d19533e8SHuawei Xie struct vpool *vpool; 2609*d19533e8SHuawei Xie struct virtio_net_data_ll *dev_ll = ll_root_used; 2610*d19533e8SHuawei Xie struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); 2611*d19533e8SHuawei Xie uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh]; 2612*d19533e8SHuawei Xie 2613*d19533e8SHuawei Xie /*Add packet to the port tx queue*/ 2614*d19533e8SHuawei Xie tx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q]; 2615*d19533e8SHuawei Xie len = tx_q->len; 2616*d19533e8SHuawei Xie 2617*d19533e8SHuawei Xie /* Allocate an mbuf and populate the structure. */ 2618*d19533e8SHuawei Xie vpool = &vpool_array[MAX_QUEUES + (uint16_t)dev->vmdq_rx_q]; 2619*d19533e8SHuawei Xie rte_ring_sc_dequeue(vpool->ring, (void **)&mbuf); 2620*d19533e8SHuawei Xie if (unlikely(mbuf == NULL)) { 2621*d19533e8SHuawei Xie struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ]; 2622*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, 2623*d19533e8SHuawei Xie "(%"PRIu64") Failed to allocate memory for mbuf.\n", 2624*d19533e8SHuawei Xie dev->device_fh); 2625*d19533e8SHuawei Xie put_desc_to_used_list_zcp(vq, desc_idx); 2626*d19533e8SHuawei Xie return; 2627*d19533e8SHuawei Xie } 2628*d19533e8SHuawei Xie 2629*d19533e8SHuawei Xie if (vm2vm_mode == VM2VM_HARDWARE) { 2630*d19533e8SHuawei Xie /* Avoid using a vlan tag from any vm for external pkt, such as 2631*d19533e8SHuawei Xie * vlan_tags[dev->device_fh], oterwise, it conflicts when pool 2632*d19533e8SHuawei Xie * selection, MAC address determines it as an external pkt 2633*d19533e8SHuawei Xie * which should go to network, while vlan tag determine it as 2634*d19533e8SHuawei Xie * a vm2vm pkt should forward to another vm. Hardware confuse 2635*d19533e8SHuawei Xie * such a ambiguous situation, so pkt will lost. 2636*d19533e8SHuawei Xie */ 2637*d19533e8SHuawei Xie vlan_tag = external_pkt_default_vlan_tag; 2638*d19533e8SHuawei Xie while (dev_ll != NULL) { 2639*d19533e8SHuawei Xie if (likely(dev_ll->dev->ready == DEVICE_RX) && 2640*d19533e8SHuawei Xie ether_addr_cmp(&(pkt_hdr->d_addr), 2641*d19533e8SHuawei Xie &dev_ll->dev->mac_address)) { 2642*d19533e8SHuawei Xie 2643*d19533e8SHuawei Xie /* 2644*d19533e8SHuawei Xie * Drop the packet if the TX packet is destined 2645*d19533e8SHuawei Xie * for the TX device. 2646*d19533e8SHuawei Xie */ 2647*d19533e8SHuawei Xie if (unlikely(dev_ll->dev->device_fh 2648*d19533e8SHuawei Xie == dev->device_fh)) { 2649*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2650*d19533e8SHuawei Xie "(%"PRIu64") TX: Source and destination" 2651*d19533e8SHuawei Xie "MAC addresses are the same. Dropping " 2652*d19533e8SHuawei Xie "packet.\n", 2653*d19533e8SHuawei Xie dev_ll->dev->device_fh); 2654*d19533e8SHuawei Xie MBUF_HEADROOM_UINT32(mbuf) 2655*d19533e8SHuawei Xie = (uint32_t)desc_idx; 2656*d19533e8SHuawei Xie __rte_mbuf_raw_free(mbuf); 2657*d19533e8SHuawei Xie return; 2658*d19533e8SHuawei Xie } 2659*d19533e8SHuawei Xie 2660*d19533e8SHuawei Xie /* 2661*d19533e8SHuawei Xie * Packet length offset 4 bytes for HW vlan 2662*d19533e8SHuawei Xie * strip when L2 switch back. 2663*d19533e8SHuawei Xie */ 2664*d19533e8SHuawei Xie offset = 4; 2665*d19533e8SHuawei Xie vlan_tag = 2666*d19533e8SHuawei Xie (uint16_t) 2667*d19533e8SHuawei Xie vlan_tags[(uint16_t)dev_ll->dev->device_fh]; 2668*d19533e8SHuawei Xie 2669*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2670*d19533e8SHuawei Xie "(%"PRIu64") TX: pkt to local VM device id:" 2671*d19533e8SHuawei Xie "(%"PRIu64") vlan tag: %d.\n", 2672*d19533e8SHuawei Xie dev->device_fh, dev_ll->dev->device_fh, 2673*d19533e8SHuawei Xie vlan_tag); 2674*d19533e8SHuawei Xie 2675*d19533e8SHuawei Xie break; 2676*d19533e8SHuawei Xie } 2677*d19533e8SHuawei Xie dev_ll = dev_ll->next; 2678*d19533e8SHuawei Xie } 2679*d19533e8SHuawei Xie } 2680*d19533e8SHuawei Xie 2681*d19533e8SHuawei Xie mbuf->nb_segs = m->nb_segs; 2682*d19533e8SHuawei Xie mbuf->next = m->next; 2683*d19533e8SHuawei Xie mbuf->data_len = m->data_len + offset; 2684*d19533e8SHuawei Xie mbuf->pkt_len = mbuf->data_len; 2685*d19533e8SHuawei Xie if (unlikely(need_copy)) { 2686*d19533e8SHuawei Xie /* Copy the packet contents to the mbuf. */ 2687*d19533e8SHuawei Xie rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), 2688*d19533e8SHuawei Xie rte_pktmbuf_mtod(m, void *), 2689*d19533e8SHuawei Xie m->data_len); 2690*d19533e8SHuawei Xie } else { 2691*d19533e8SHuawei Xie mbuf->data_off = m->data_off; 2692*d19533e8SHuawei Xie mbuf->buf_physaddr = m->buf_physaddr; 2693*d19533e8SHuawei Xie mbuf->buf_addr = m->buf_addr; 2694*d19533e8SHuawei Xie } 2695*d19533e8SHuawei Xie mbuf->ol_flags = PKT_TX_VLAN_PKT; 2696*d19533e8SHuawei Xie mbuf->vlan_tci = vlan_tag; 2697*d19533e8SHuawei Xie mbuf->l2_len = sizeof(struct ether_hdr); 2698*d19533e8SHuawei Xie mbuf->l3_len = sizeof(struct ipv4_hdr); 2699*d19533e8SHuawei Xie MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx; 2700*d19533e8SHuawei Xie 2701*d19533e8SHuawei Xie tx_q->m_table[len] = mbuf; 2702*d19533e8SHuawei Xie len++; 2703*d19533e8SHuawei Xie 2704*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2705*d19533e8SHuawei Xie "(%"PRIu64") in tx_route_zcp: pkt: nb_seg: %d, next:%s\n", 2706*d19533e8SHuawei Xie dev->device_fh, 2707*d19533e8SHuawei Xie mbuf->nb_segs, 2708*d19533e8SHuawei Xie (mbuf->next == NULL) ? "null" : "non-null"); 2709*d19533e8SHuawei Xie 2710*d19533e8SHuawei Xie if (enable_stats) { 2711*d19533e8SHuawei Xie dev_statistics[dev->device_fh].tx_total++; 2712*d19533e8SHuawei Xie dev_statistics[dev->device_fh].tx++; 2713*d19533e8SHuawei Xie } 2714*d19533e8SHuawei Xie 2715*d19533e8SHuawei Xie if (unlikely(len == MAX_PKT_BURST)) { 2716*d19533e8SHuawei Xie m_table = (struct rte_mbuf **)tx_q->m_table; 2717*d19533e8SHuawei Xie ret = rte_eth_tx_burst(ports[0], 2718*d19533e8SHuawei Xie (uint16_t)tx_q->txq_id, m_table, (uint16_t) len); 2719*d19533e8SHuawei Xie 2720*d19533e8SHuawei Xie /* 2721*d19533e8SHuawei Xie * Free any buffers not handled by TX and update 2722*d19533e8SHuawei Xie * the port stats. 2723*d19533e8SHuawei Xie */ 2724*d19533e8SHuawei Xie if (unlikely(ret < len)) { 2725*d19533e8SHuawei Xie do { 2726*d19533e8SHuawei Xie rte_pktmbuf_free(m_table[ret]); 2727*d19533e8SHuawei Xie } while (++ret < len); 2728*d19533e8SHuawei Xie } 2729*d19533e8SHuawei Xie 2730*d19533e8SHuawei Xie len = 0; 2731*d19533e8SHuawei Xie txmbuf_clean_zcp(dev, vpool); 2732*d19533e8SHuawei Xie } 2733*d19533e8SHuawei Xie 2734*d19533e8SHuawei Xie tx_q->len = len; 2735*d19533e8SHuawei Xie 2736*d19533e8SHuawei Xie return; 2737*d19533e8SHuawei Xie } 2738*d19533e8SHuawei Xie 2739*d19533e8SHuawei Xie /* 2740*d19533e8SHuawei Xie * This function TX all available packets in virtio TX queue for one 2741*d19533e8SHuawei Xie * virtio-net device. If it is first packet, it learns MAC address and 2742*d19533e8SHuawei Xie * setup VMDQ. 2743*d19533e8SHuawei Xie */ 2744*d19533e8SHuawei Xie static inline void __attribute__((always_inline)) 2745*d19533e8SHuawei Xie virtio_dev_tx_zcp(struct virtio_net *dev) 2746*d19533e8SHuawei Xie { 2747*d19533e8SHuawei Xie struct rte_mbuf m; 2748*d19533e8SHuawei Xie struct vhost_virtqueue *vq; 2749*d19533e8SHuawei Xie struct vring_desc *desc; 2750*d19533e8SHuawei Xie uint64_t buff_addr = 0, phys_addr; 2751*d19533e8SHuawei Xie uint32_t head[MAX_PKT_BURST]; 2752*d19533e8SHuawei Xie uint32_t i; 2753*d19533e8SHuawei Xie uint16_t free_entries, packet_success = 0; 2754*d19533e8SHuawei Xie uint16_t avail_idx; 2755*d19533e8SHuawei Xie uint8_t need_copy = 0; 2756*d19533e8SHuawei Xie hpa_type addr_type; 2757*d19533e8SHuawei Xie 2758*d19533e8SHuawei Xie vq = dev->virtqueue[VIRTIO_TXQ]; 2759*d19533e8SHuawei Xie avail_idx = *((volatile uint16_t *)&vq->avail->idx); 2760*d19533e8SHuawei Xie 2761*d19533e8SHuawei Xie /* If there are no available buffers then return. */ 2762*d19533e8SHuawei Xie if (vq->last_used_idx_res == avail_idx) 2763*d19533e8SHuawei Xie return; 2764*d19533e8SHuawei Xie 2765*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh); 2766*d19533e8SHuawei Xie 2767*d19533e8SHuawei Xie /* Prefetch available ring to retrieve head indexes. */ 2768*d19533e8SHuawei Xie rte_prefetch0(&vq->avail->ring[vq->last_used_idx_res & (vq->size - 1)]); 2769*d19533e8SHuawei Xie 2770*d19533e8SHuawei Xie /* Get the number of free entries in the ring */ 2771*d19533e8SHuawei Xie free_entries = (avail_idx - vq->last_used_idx_res); 2772*d19533e8SHuawei Xie 2773*d19533e8SHuawei Xie /* Limit to MAX_PKT_BURST. */ 2774*d19533e8SHuawei Xie free_entries 2775*d19533e8SHuawei Xie = (free_entries > MAX_PKT_BURST) ? MAX_PKT_BURST : free_entries; 2776*d19533e8SHuawei Xie 2777*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", 2778*d19533e8SHuawei Xie dev->device_fh, free_entries); 2779*d19533e8SHuawei Xie 2780*d19533e8SHuawei Xie /* Retrieve all of the head indexes first to avoid caching issues. */ 2781*d19533e8SHuawei Xie for (i = 0; i < free_entries; i++) 2782*d19533e8SHuawei Xie head[i] 2783*d19533e8SHuawei Xie = vq->avail->ring[(vq->last_used_idx_res + i) 2784*d19533e8SHuawei Xie & (vq->size - 1)]; 2785*d19533e8SHuawei Xie 2786*d19533e8SHuawei Xie vq->last_used_idx_res += free_entries; 2787*d19533e8SHuawei Xie 2788*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 2789*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success]]); 2790*d19533e8SHuawei Xie rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); 2791*d19533e8SHuawei Xie 2792*d19533e8SHuawei Xie while (packet_success < free_entries) { 2793*d19533e8SHuawei Xie desc = &vq->desc[head[packet_success]]; 2794*d19533e8SHuawei Xie 2795*d19533e8SHuawei Xie /* Discard first buffer as it is the virtio header */ 2796*d19533e8SHuawei Xie desc = &vq->desc[desc->next]; 2797*d19533e8SHuawei Xie 2798*d19533e8SHuawei Xie /* Buffer address translation. */ 2799*d19533e8SHuawei Xie buff_addr = gpa_to_vva(dev, desc->addr); 2800*d19533e8SHuawei Xie phys_addr = gpa_to_hpa(dev, desc->addr, desc->len, &addr_type); 2801*d19533e8SHuawei Xie 2802*d19533e8SHuawei Xie if (likely(packet_success < (free_entries - 1))) 2803*d19533e8SHuawei Xie /* Prefetch descriptor index. */ 2804*d19533e8SHuawei Xie rte_prefetch0(&vq->desc[head[packet_success + 1]]); 2805*d19533e8SHuawei Xie 2806*d19533e8SHuawei Xie if (unlikely(addr_type == PHYS_ADDR_INVALID)) { 2807*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, 2808*d19533e8SHuawei Xie "(%"PRIu64") Invalid frame buffer address found" 2809*d19533e8SHuawei Xie "when TX packets!\n", 2810*d19533e8SHuawei Xie dev->device_fh); 2811*d19533e8SHuawei Xie packet_success++; 2812*d19533e8SHuawei Xie continue; 2813*d19533e8SHuawei Xie } 2814*d19533e8SHuawei Xie 2815*d19533e8SHuawei Xie /* Prefetch buffer address. */ 2816*d19533e8SHuawei Xie rte_prefetch0((void *)(uintptr_t)buff_addr); 2817*d19533e8SHuawei Xie 2818*d19533e8SHuawei Xie /* 2819*d19533e8SHuawei Xie * Setup dummy mbuf. This is copied to a real mbuf if 2820*d19533e8SHuawei Xie * transmitted out the physical port. 2821*d19533e8SHuawei Xie */ 2822*d19533e8SHuawei Xie m.data_len = desc->len; 2823*d19533e8SHuawei Xie m.nb_segs = 1; 2824*d19533e8SHuawei Xie m.next = NULL; 2825*d19533e8SHuawei Xie m.data_off = 0; 2826*d19533e8SHuawei Xie m.buf_addr = (void *)(uintptr_t)buff_addr; 2827*d19533e8SHuawei Xie m.buf_physaddr = phys_addr; 2828*d19533e8SHuawei Xie 2829*d19533e8SHuawei Xie /* 2830*d19533e8SHuawei Xie * Check if the frame buffer address from guest crosses 2831*d19533e8SHuawei Xie * sub-region or not. 2832*d19533e8SHuawei Xie */ 2833*d19533e8SHuawei Xie if (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) { 2834*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_DATA, 2835*d19533e8SHuawei Xie "(%"PRIu64") Frame buffer address cross " 2836*d19533e8SHuawei Xie "sub-regioin found when attaching TX frame " 2837*d19533e8SHuawei Xie "buffer address!\n", 2838*d19533e8SHuawei Xie dev->device_fh); 2839*d19533e8SHuawei Xie need_copy = 1; 2840*d19533e8SHuawei Xie } else 2841*d19533e8SHuawei Xie need_copy = 0; 2842*d19533e8SHuawei Xie 2843*d19533e8SHuawei Xie PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0); 2844*d19533e8SHuawei Xie 2845*d19533e8SHuawei Xie /* 2846*d19533e8SHuawei Xie * If this is the first received packet we need to learn 2847*d19533e8SHuawei Xie * the MAC and setup VMDQ 2848*d19533e8SHuawei Xie */ 2849*d19533e8SHuawei Xie if (unlikely(dev->ready == DEVICE_MAC_LEARNING)) { 2850*d19533e8SHuawei Xie if (dev->remove || (link_vmdq(dev, &m) == -1)) { 2851*d19533e8SHuawei Xie /* 2852*d19533e8SHuawei Xie * Discard frame if device is scheduled for 2853*d19533e8SHuawei Xie * removal or a duplicate MAC address is found. 2854*d19533e8SHuawei Xie */ 2855*d19533e8SHuawei Xie packet_success += free_entries; 2856*d19533e8SHuawei Xie vq->last_used_idx += packet_success; 2857*d19533e8SHuawei Xie break; 2858*d19533e8SHuawei Xie } 2859*d19533e8SHuawei Xie } 2860*d19533e8SHuawei Xie 2861*d19533e8SHuawei Xie virtio_tx_route_zcp(dev, &m, head[packet_success], need_copy); 2862*d19533e8SHuawei Xie packet_success++; 2863*d19533e8SHuawei Xie } 2864*d19533e8SHuawei Xie } 2865*d19533e8SHuawei Xie 2866*d19533e8SHuawei Xie /* 2867*d19533e8SHuawei Xie * This function is called by each data core. It handles all RX/TX registered 2868*d19533e8SHuawei Xie * with the core. For TX the specific lcore linked list is used. For RX, MAC 2869*d19533e8SHuawei Xie * addresses are compared with all devices in the main linked list. 2870*d19533e8SHuawei Xie */ 2871*d19533e8SHuawei Xie static int 2872*d19533e8SHuawei Xie switch_worker_zcp(__attribute__((unused)) void *arg) 2873*d19533e8SHuawei Xie { 2874*d19533e8SHuawei Xie struct virtio_net *dev = NULL; 2875*d19533e8SHuawei Xie struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 2876*d19533e8SHuawei Xie struct virtio_net_data_ll *dev_ll; 2877*d19533e8SHuawei Xie struct mbuf_table *tx_q; 2878*d19533e8SHuawei Xie volatile struct lcore_ll_info *lcore_ll; 2879*d19533e8SHuawei Xie const uint64_t drain_tsc 2880*d19533e8SHuawei Xie = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S 2881*d19533e8SHuawei Xie * BURST_TX_DRAIN_US; 2882*d19533e8SHuawei Xie uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0; 2883*d19533e8SHuawei Xie unsigned ret; 2884*d19533e8SHuawei Xie const uint16_t lcore_id = rte_lcore_id(); 2885*d19533e8SHuawei Xie uint16_t count_in_ring, rx_count = 0; 2886*d19533e8SHuawei Xie 2887*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); 2888*d19533e8SHuawei Xie 2889*d19533e8SHuawei Xie lcore_ll = lcore_info[lcore_id].lcore_ll; 2890*d19533e8SHuawei Xie prev_tsc = 0; 2891*d19533e8SHuawei Xie 2892*d19533e8SHuawei Xie while (1) { 2893*d19533e8SHuawei Xie cur_tsc = rte_rdtsc(); 2894*d19533e8SHuawei Xie 2895*d19533e8SHuawei Xie /* TX burst queue drain */ 2896*d19533e8SHuawei Xie diff_tsc = cur_tsc - prev_tsc; 2897*d19533e8SHuawei Xie if (unlikely(diff_tsc > drain_tsc)) { 2898*d19533e8SHuawei Xie /* 2899*d19533e8SHuawei Xie * Get mbuf from vpool.pool and detach mbuf and 2900*d19533e8SHuawei Xie * put back into vpool.ring. 2901*d19533e8SHuawei Xie */ 2902*d19533e8SHuawei Xie dev_ll = lcore_ll->ll_root_used; 2903*d19533e8SHuawei Xie while ((dev_ll != NULL) && (dev_ll->dev != NULL)) { 2904*d19533e8SHuawei Xie /* Get virtio device ID */ 2905*d19533e8SHuawei Xie dev = dev_ll->dev; 2906*d19533e8SHuawei Xie 2907*d19533e8SHuawei Xie if (likely(!dev->remove)) { 2908*d19533e8SHuawei Xie tx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q]; 2909*d19533e8SHuawei Xie if (tx_q->len) { 2910*d19533e8SHuawei Xie LOG_DEBUG(VHOST_DATA, 2911*d19533e8SHuawei Xie "TX queue drained after timeout" 2912*d19533e8SHuawei Xie " with burst size %u\n", 2913*d19533e8SHuawei Xie tx_q->len); 2914*d19533e8SHuawei Xie 2915*d19533e8SHuawei Xie /* 2916*d19533e8SHuawei Xie * Tx any packets in the queue 2917*d19533e8SHuawei Xie */ 2918*d19533e8SHuawei Xie ret = rte_eth_tx_burst( 2919*d19533e8SHuawei Xie ports[0], 2920*d19533e8SHuawei Xie (uint16_t)tx_q->txq_id, 2921*d19533e8SHuawei Xie (struct rte_mbuf **) 2922*d19533e8SHuawei Xie tx_q->m_table, 2923*d19533e8SHuawei Xie (uint16_t)tx_q->len); 2924*d19533e8SHuawei Xie if (unlikely(ret < tx_q->len)) { 2925*d19533e8SHuawei Xie do { 2926*d19533e8SHuawei Xie rte_pktmbuf_free( 2927*d19533e8SHuawei Xie tx_q->m_table[ret]); 2928*d19533e8SHuawei Xie } while (++ret < tx_q->len); 2929*d19533e8SHuawei Xie } 2930*d19533e8SHuawei Xie tx_q->len = 0; 2931*d19533e8SHuawei Xie 2932*d19533e8SHuawei Xie txmbuf_clean_zcp(dev, 2933*d19533e8SHuawei Xie &vpool_array[MAX_QUEUES+dev->vmdq_rx_q]); 2934*d19533e8SHuawei Xie } 2935*d19533e8SHuawei Xie } 2936*d19533e8SHuawei Xie dev_ll = dev_ll->next; 2937*d19533e8SHuawei Xie } 2938*d19533e8SHuawei Xie prev_tsc = cur_tsc; 2939*d19533e8SHuawei Xie } 2940*d19533e8SHuawei Xie 2941*d19533e8SHuawei Xie rte_prefetch0(lcore_ll->ll_root_used); 2942*d19533e8SHuawei Xie 2943*d19533e8SHuawei Xie /* 2944*d19533e8SHuawei Xie * Inform the configuration core that we have exited the linked 2945*d19533e8SHuawei Xie * list and that no devices are in use if requested. 2946*d19533e8SHuawei Xie */ 2947*d19533e8SHuawei Xie if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL) 2948*d19533e8SHuawei Xie lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; 2949*d19533e8SHuawei Xie 2950*d19533e8SHuawei Xie /* Process devices */ 2951*d19533e8SHuawei Xie dev_ll = lcore_ll->ll_root_used; 2952*d19533e8SHuawei Xie 2953*d19533e8SHuawei Xie while ((dev_ll != NULL) && (dev_ll->dev != NULL)) { 2954*d19533e8SHuawei Xie dev = dev_ll->dev; 2955*d19533e8SHuawei Xie if (unlikely(dev->remove)) { 2956*d19533e8SHuawei Xie dev_ll = dev_ll->next; 2957*d19533e8SHuawei Xie unlink_vmdq(dev); 2958*d19533e8SHuawei Xie dev->ready = DEVICE_SAFE_REMOVE; 2959*d19533e8SHuawei Xie continue; 2960*d19533e8SHuawei Xie } 2961*d19533e8SHuawei Xie 2962*d19533e8SHuawei Xie if (likely(dev->ready == DEVICE_RX)) { 2963*d19533e8SHuawei Xie uint32_t index = dev->vmdq_rx_q; 2964*d19533e8SHuawei Xie uint16_t i; 2965*d19533e8SHuawei Xie count_in_ring 2966*d19533e8SHuawei Xie = rte_ring_count(vpool_array[index].ring); 2967*d19533e8SHuawei Xie uint16_t free_entries 2968*d19533e8SHuawei Xie = (uint16_t)get_available_ring_num_zcp(dev); 2969*d19533e8SHuawei Xie 2970*d19533e8SHuawei Xie /* 2971*d19533e8SHuawei Xie * Attach all mbufs in vpool.ring and put back 2972*d19533e8SHuawei Xie * into vpool.pool. 2973*d19533e8SHuawei Xie */ 2974*d19533e8SHuawei Xie for (i = 0; 2975*d19533e8SHuawei Xie i < RTE_MIN(free_entries, 2976*d19533e8SHuawei Xie RTE_MIN(count_in_ring, MAX_PKT_BURST)); 2977*d19533e8SHuawei Xie i++) 2978*d19533e8SHuawei Xie attach_rxmbuf_zcp(dev); 2979*d19533e8SHuawei Xie 2980*d19533e8SHuawei Xie /* Handle guest RX */ 2981*d19533e8SHuawei Xie rx_count = rte_eth_rx_burst(ports[0], 2982*d19533e8SHuawei Xie (uint16_t)dev->vmdq_rx_q, pkts_burst, 2983*d19533e8SHuawei Xie MAX_PKT_BURST); 2984*d19533e8SHuawei Xie 2985*d19533e8SHuawei Xie if (rx_count) { 2986*d19533e8SHuawei Xie ret_count = virtio_dev_rx_zcp(dev, 2987*d19533e8SHuawei Xie pkts_burst, rx_count); 2988*d19533e8SHuawei Xie if (enable_stats) { 2989*d19533e8SHuawei Xie dev_statistics[dev->device_fh].rx_total 2990*d19533e8SHuawei Xie += rx_count; 2991*d19533e8SHuawei Xie dev_statistics[dev->device_fh].rx 2992*d19533e8SHuawei Xie += ret_count; 2993*d19533e8SHuawei Xie } 2994*d19533e8SHuawei Xie while (likely(rx_count)) { 2995*d19533e8SHuawei Xie rx_count--; 2996*d19533e8SHuawei Xie pktmbuf_detach_zcp( 2997*d19533e8SHuawei Xie pkts_burst[rx_count]); 2998*d19533e8SHuawei Xie rte_ring_sp_enqueue( 2999*d19533e8SHuawei Xie vpool_array[index].ring, 3000*d19533e8SHuawei Xie (void *)pkts_burst[rx_count]); 3001*d19533e8SHuawei Xie } 3002*d19533e8SHuawei Xie } 3003*d19533e8SHuawei Xie } 3004*d19533e8SHuawei Xie 3005*d19533e8SHuawei Xie if (likely(!dev->remove)) 3006*d19533e8SHuawei Xie /* Handle guest TX */ 3007*d19533e8SHuawei Xie virtio_dev_tx_zcp(dev); 3008*d19533e8SHuawei Xie 3009*d19533e8SHuawei Xie /* Move to the next device in the list */ 3010*d19533e8SHuawei Xie dev_ll = dev_ll->next; 3011*d19533e8SHuawei Xie } 3012*d19533e8SHuawei Xie } 3013*d19533e8SHuawei Xie 3014*d19533e8SHuawei Xie return 0; 3015*d19533e8SHuawei Xie } 3016*d19533e8SHuawei Xie 3017*d19533e8SHuawei Xie 3018*d19533e8SHuawei Xie /* 3019*d19533e8SHuawei Xie * Add an entry to a used linked list. A free entry must first be found 3020*d19533e8SHuawei Xie * in the free linked list using get_data_ll_free_entry(); 3021*d19533e8SHuawei Xie */ 3022*d19533e8SHuawei Xie static void 3023*d19533e8SHuawei Xie add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, 3024*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_dev) 3025*d19533e8SHuawei Xie { 3026*d19533e8SHuawei Xie struct virtio_net_data_ll *ll = *ll_root_addr; 3027*d19533e8SHuawei Xie 3028*d19533e8SHuawei Xie /* Set next as NULL and use a compiler barrier to avoid reordering. */ 3029*d19533e8SHuawei Xie ll_dev->next = NULL; 3030*d19533e8SHuawei Xie rte_compiler_barrier(); 3031*d19533e8SHuawei Xie 3032*d19533e8SHuawei Xie /* If ll == NULL then this is the first device. */ 3033*d19533e8SHuawei Xie if (ll) { 3034*d19533e8SHuawei Xie /* Increment to the tail of the linked list. */ 3035*d19533e8SHuawei Xie while ((ll->next != NULL) ) 3036*d19533e8SHuawei Xie ll = ll->next; 3037*d19533e8SHuawei Xie 3038*d19533e8SHuawei Xie ll->next = ll_dev; 3039*d19533e8SHuawei Xie } else { 3040*d19533e8SHuawei Xie *ll_root_addr = ll_dev; 3041*d19533e8SHuawei Xie } 3042*d19533e8SHuawei Xie } 3043*d19533e8SHuawei Xie 3044*d19533e8SHuawei Xie /* 3045*d19533e8SHuawei Xie * Remove an entry from a used linked list. The entry must then be added to 3046*d19533e8SHuawei Xie * the free linked list using put_data_ll_free_entry(). 3047*d19533e8SHuawei Xie */ 3048*d19533e8SHuawei Xie static void 3049*d19533e8SHuawei Xie rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, 3050*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_dev, 3051*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_dev_last) 3052*d19533e8SHuawei Xie { 3053*d19533e8SHuawei Xie struct virtio_net_data_ll *ll = *ll_root_addr; 3054*d19533e8SHuawei Xie 3055*d19533e8SHuawei Xie if (unlikely((ll == NULL) || (ll_dev == NULL))) 3056*d19533e8SHuawei Xie return; 3057*d19533e8SHuawei Xie 3058*d19533e8SHuawei Xie if (ll_dev == ll) 3059*d19533e8SHuawei Xie *ll_root_addr = ll_dev->next; 3060*d19533e8SHuawei Xie else 3061*d19533e8SHuawei Xie if (likely(ll_dev_last != NULL)) 3062*d19533e8SHuawei Xie ll_dev_last->next = ll_dev->next; 3063*d19533e8SHuawei Xie else 3064*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_CONFIG, "Remove entry form ll failed.\n"); 3065*d19533e8SHuawei Xie } 3066*d19533e8SHuawei Xie 3067*d19533e8SHuawei Xie /* 3068*d19533e8SHuawei Xie * Find and return an entry from the free linked list. 3069*d19533e8SHuawei Xie */ 3070*d19533e8SHuawei Xie static struct virtio_net_data_ll * 3071*d19533e8SHuawei Xie get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr) 3072*d19533e8SHuawei Xie { 3073*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_free = *ll_root_addr; 3074*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_dev; 3075*d19533e8SHuawei Xie 3076*d19533e8SHuawei Xie if (ll_free == NULL) 3077*d19533e8SHuawei Xie return NULL; 3078*d19533e8SHuawei Xie 3079*d19533e8SHuawei Xie ll_dev = ll_free; 3080*d19533e8SHuawei Xie *ll_root_addr = ll_free->next; 3081*d19533e8SHuawei Xie 3082*d19533e8SHuawei Xie return ll_dev; 3083*d19533e8SHuawei Xie } 3084*d19533e8SHuawei Xie 3085*d19533e8SHuawei Xie /* 3086*d19533e8SHuawei Xie * Place an entry back on to the free linked list. 3087*d19533e8SHuawei Xie */ 3088*d19533e8SHuawei Xie static void 3089*d19533e8SHuawei Xie put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr, 3090*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_dev) 3091*d19533e8SHuawei Xie { 3092*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_free = *ll_root_addr; 3093*d19533e8SHuawei Xie 3094*d19533e8SHuawei Xie if (ll_dev == NULL) 3095*d19533e8SHuawei Xie return; 3096*d19533e8SHuawei Xie 3097*d19533e8SHuawei Xie ll_dev->next = ll_free; 3098*d19533e8SHuawei Xie *ll_root_addr = ll_dev; 3099*d19533e8SHuawei Xie } 3100*d19533e8SHuawei Xie 3101*d19533e8SHuawei Xie /* 3102*d19533e8SHuawei Xie * Creates a linked list of a given size. 3103*d19533e8SHuawei Xie */ 3104*d19533e8SHuawei Xie static struct virtio_net_data_ll * 3105*d19533e8SHuawei Xie alloc_data_ll(uint32_t size) 3106*d19533e8SHuawei Xie { 3107*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_new; 3108*d19533e8SHuawei Xie uint32_t i; 3109*d19533e8SHuawei Xie 3110*d19533e8SHuawei Xie /* Malloc and then chain the linked list. */ 3111*d19533e8SHuawei Xie ll_new = malloc(size * sizeof(struct virtio_net_data_ll)); 3112*d19533e8SHuawei Xie if (ll_new == NULL) { 3113*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for ll_new.\n"); 3114*d19533e8SHuawei Xie return NULL; 3115*d19533e8SHuawei Xie } 3116*d19533e8SHuawei Xie 3117*d19533e8SHuawei Xie for (i = 0; i < size - 1; i++) { 3118*d19533e8SHuawei Xie ll_new[i].dev = NULL; 3119*d19533e8SHuawei Xie ll_new[i].next = &ll_new[i+1]; 3120*d19533e8SHuawei Xie } 3121*d19533e8SHuawei Xie ll_new[i].next = NULL; 3122*d19533e8SHuawei Xie 3123*d19533e8SHuawei Xie return (ll_new); 3124*d19533e8SHuawei Xie } 3125*d19533e8SHuawei Xie 3126*d19533e8SHuawei Xie /* 3127*d19533e8SHuawei Xie * Create the main linked list along with each individual cores linked list. A used and a free list 3128*d19533e8SHuawei Xie * are created to manage entries. 3129*d19533e8SHuawei Xie */ 3130*d19533e8SHuawei Xie static int 3131*d19533e8SHuawei Xie init_data_ll (void) 3132*d19533e8SHuawei Xie { 3133*d19533e8SHuawei Xie int lcore; 3134*d19533e8SHuawei Xie 3135*d19533e8SHuawei Xie RTE_LCORE_FOREACH_SLAVE(lcore) { 3136*d19533e8SHuawei Xie lcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info)); 3137*d19533e8SHuawei Xie if (lcore_info[lcore].lcore_ll == NULL) { 3138*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for lcore_ll.\n"); 3139*d19533e8SHuawei Xie return -1; 3140*d19533e8SHuawei Xie } 3141*d19533e8SHuawei Xie 3142*d19533e8SHuawei Xie lcore_info[lcore].lcore_ll->device_num = 0; 3143*d19533e8SHuawei Xie lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL; 3144*d19533e8SHuawei Xie lcore_info[lcore].lcore_ll->ll_root_used = NULL; 3145*d19533e8SHuawei Xie if (num_devices % num_switching_cores) 3146*d19533e8SHuawei Xie lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1); 3147*d19533e8SHuawei Xie else 3148*d19533e8SHuawei Xie lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores); 3149*d19533e8SHuawei Xie } 3150*d19533e8SHuawei Xie 3151*d19533e8SHuawei Xie /* Allocate devices up to a maximum of MAX_DEVICES. */ 3152*d19533e8SHuawei Xie ll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES)); 3153*d19533e8SHuawei Xie 3154*d19533e8SHuawei Xie return 0; 3155*d19533e8SHuawei Xie } 3156*d19533e8SHuawei Xie 3157*d19533e8SHuawei Xie /* 3158*d19533e8SHuawei Xie * Set virtqueue flags so that we do not receive interrupts. 3159*d19533e8SHuawei Xie */ 3160*d19533e8SHuawei Xie static void 3161*d19533e8SHuawei Xie set_irq_status (struct virtio_net *dev) 3162*d19533e8SHuawei Xie { 3163*d19533e8SHuawei Xie dev->virtqueue[VIRTIO_RXQ]->used->flags = VRING_USED_F_NO_NOTIFY; 3164*d19533e8SHuawei Xie dev->virtqueue[VIRTIO_TXQ]->used->flags = VRING_USED_F_NO_NOTIFY; 3165*d19533e8SHuawei Xie } 3166*d19533e8SHuawei Xie 3167*d19533e8SHuawei Xie /* 3168*d19533e8SHuawei Xie * Remove a device from the specific data core linked list and from the main linked list. Synchonization 3169*d19533e8SHuawei Xie * occurs through the use of the lcore dev_removal_flag. Device is made volatile here to avoid re-ordering 3170*d19533e8SHuawei Xie * of dev->remove=1 which can cause an infinite loop in the rte_pause loop. 3171*d19533e8SHuawei Xie */ 3172*d19533e8SHuawei Xie static void 3173*d19533e8SHuawei Xie destroy_device (volatile struct virtio_net *dev) 3174*d19533e8SHuawei Xie { 3175*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_lcore_dev_cur; 3176*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_main_dev_cur; 3177*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_lcore_dev_last = NULL; 3178*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_main_dev_last = NULL; 3179*d19533e8SHuawei Xie int lcore; 3180*d19533e8SHuawei Xie 3181*d19533e8SHuawei Xie dev->flags &= ~VIRTIO_DEV_RUNNING; 3182*d19533e8SHuawei Xie 3183*d19533e8SHuawei Xie /*set the remove flag. */ 3184*d19533e8SHuawei Xie dev->remove = 1; 3185*d19533e8SHuawei Xie 3186*d19533e8SHuawei Xie while(dev->ready != DEVICE_SAFE_REMOVE) { 3187*d19533e8SHuawei Xie rte_pause(); 3188*d19533e8SHuawei Xie } 3189*d19533e8SHuawei Xie 3190*d19533e8SHuawei Xie /* Search for entry to be removed from lcore ll */ 3191*d19533e8SHuawei Xie ll_lcore_dev_cur = lcore_info[dev->coreid].lcore_ll->ll_root_used; 3192*d19533e8SHuawei Xie while (ll_lcore_dev_cur != NULL) { 3193*d19533e8SHuawei Xie if (ll_lcore_dev_cur->dev == dev) { 3194*d19533e8SHuawei Xie break; 3195*d19533e8SHuawei Xie } else { 3196*d19533e8SHuawei Xie ll_lcore_dev_last = ll_lcore_dev_cur; 3197*d19533e8SHuawei Xie ll_lcore_dev_cur = ll_lcore_dev_cur->next; 3198*d19533e8SHuawei Xie } 3199*d19533e8SHuawei Xie } 3200*d19533e8SHuawei Xie 3201*d19533e8SHuawei Xie if (ll_lcore_dev_cur == NULL) { 3202*d19533e8SHuawei Xie RTE_LOG(ERR, VHOST_CONFIG, 3203*d19533e8SHuawei Xie "(%"PRIu64") Failed to find the dev to be destroy.\n", 3204*d19533e8SHuawei Xie dev->device_fh); 3205*d19533e8SHuawei Xie return; 3206*d19533e8SHuawei Xie } 3207*d19533e8SHuawei Xie 3208*d19533e8SHuawei Xie /* Search for entry to be removed from main ll */ 3209*d19533e8SHuawei Xie ll_main_dev_cur = ll_root_used; 3210*d19533e8SHuawei Xie ll_main_dev_last = NULL; 3211*d19533e8SHuawei Xie while (ll_main_dev_cur != NULL) { 3212*d19533e8SHuawei Xie if (ll_main_dev_cur->dev == dev) { 3213*d19533e8SHuawei Xie break; 3214*d19533e8SHuawei Xie } else { 3215*d19533e8SHuawei Xie ll_main_dev_last = ll_main_dev_cur; 3216*d19533e8SHuawei Xie ll_main_dev_cur = ll_main_dev_cur->next; 3217*d19533e8SHuawei Xie } 3218*d19533e8SHuawei Xie } 3219*d19533e8SHuawei Xie 3220*d19533e8SHuawei Xie /* Remove entries from the lcore and main ll. */ 3221*d19533e8SHuawei Xie rm_data_ll_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last); 3222*d19533e8SHuawei Xie rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last); 3223*d19533e8SHuawei Xie 3224*d19533e8SHuawei Xie /* Set the dev_removal_flag on each lcore. */ 3225*d19533e8SHuawei Xie RTE_LCORE_FOREACH_SLAVE(lcore) { 3226*d19533e8SHuawei Xie lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL; 3227*d19533e8SHuawei Xie } 3228*d19533e8SHuawei Xie 3229*d19533e8SHuawei Xie /* 3230*d19533e8SHuawei Xie * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that 3231*d19533e8SHuawei Xie * they can no longer access the device removed from the linked lists and that the devices 3232*d19533e8SHuawei Xie * are no longer in use. 3233*d19533e8SHuawei Xie */ 3234*d19533e8SHuawei Xie RTE_LCORE_FOREACH_SLAVE(lcore) { 3235*d19533e8SHuawei Xie while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) { 3236*d19533e8SHuawei Xie rte_pause(); 3237*d19533e8SHuawei Xie } 3238*d19533e8SHuawei Xie } 3239*d19533e8SHuawei Xie 3240*d19533e8SHuawei Xie /* Add the entries back to the lcore and main free ll.*/ 3241*d19533e8SHuawei Xie put_data_ll_free_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur); 3242*d19533e8SHuawei Xie put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur); 3243*d19533e8SHuawei Xie 3244*d19533e8SHuawei Xie /* Decrement number of device on the lcore. */ 3245*d19533e8SHuawei Xie lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->device_num--; 3246*d19533e8SHuawei Xie 3247*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed from data core\n", dev->device_fh); 3248*d19533e8SHuawei Xie 3249*d19533e8SHuawei Xie if (zero_copy) { 3250*d19533e8SHuawei Xie struct vpool *vpool = &vpool_array[dev->vmdq_rx_q]; 3251*d19533e8SHuawei Xie 3252*d19533e8SHuawei Xie /* Stop the RX queue. */ 3253*d19533e8SHuawei Xie if (rte_eth_dev_rx_queue_stop(ports[0], dev->vmdq_rx_q) != 0) { 3254*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3255*d19533e8SHuawei Xie "(%"PRIu64") In destroy_device: Failed to stop " 3256*d19533e8SHuawei Xie "rx queue:%d\n", 3257*d19533e8SHuawei Xie dev->device_fh, 3258*d19533e8SHuawei Xie dev->vmdq_rx_q); 3259*d19533e8SHuawei Xie } 3260*d19533e8SHuawei Xie 3261*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3262*d19533e8SHuawei Xie "(%"PRIu64") in destroy_device: Start put mbuf in " 3263*d19533e8SHuawei Xie "mempool back to ring for RX queue: %d\n", 3264*d19533e8SHuawei Xie dev->device_fh, dev->vmdq_rx_q); 3265*d19533e8SHuawei Xie 3266*d19533e8SHuawei Xie mbuf_destroy_zcp(vpool); 3267*d19533e8SHuawei Xie 3268*d19533e8SHuawei Xie /* Stop the TX queue. */ 3269*d19533e8SHuawei Xie if (rte_eth_dev_tx_queue_stop(ports[0], dev->vmdq_rx_q) != 0) { 3270*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3271*d19533e8SHuawei Xie "(%"PRIu64") In destroy_device: Failed to " 3272*d19533e8SHuawei Xie "stop tx queue:%d\n", 3273*d19533e8SHuawei Xie dev->device_fh, dev->vmdq_rx_q); 3274*d19533e8SHuawei Xie } 3275*d19533e8SHuawei Xie 3276*d19533e8SHuawei Xie vpool = &vpool_array[dev->vmdq_rx_q + MAX_QUEUES]; 3277*d19533e8SHuawei Xie 3278*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3279*d19533e8SHuawei Xie "(%"PRIu64") destroy_device: Start put mbuf in mempool " 3280*d19533e8SHuawei Xie "back to ring for TX queue: %d, dev:(%"PRIu64")\n", 3281*d19533e8SHuawei Xie dev->device_fh, (dev->vmdq_rx_q + MAX_QUEUES), 3282*d19533e8SHuawei Xie dev->device_fh); 3283*d19533e8SHuawei Xie 3284*d19533e8SHuawei Xie mbuf_destroy_zcp(vpool); 3285*d19533e8SHuawei Xie } 3286*d19533e8SHuawei Xie 3287*d19533e8SHuawei Xie } 3288*d19533e8SHuawei Xie 3289*d19533e8SHuawei Xie /* 3290*d19533e8SHuawei Xie * A new device is added to a data core. First the device is added to the main linked list 3291*d19533e8SHuawei Xie * and the allocated to a specific data core. 3292*d19533e8SHuawei Xie */ 3293*d19533e8SHuawei Xie static int 3294*d19533e8SHuawei Xie new_device (struct virtio_net *dev) 3295*d19533e8SHuawei Xie { 3296*d19533e8SHuawei Xie struct virtio_net_data_ll *ll_dev; 3297*d19533e8SHuawei Xie int lcore, core_add = 0; 3298*d19533e8SHuawei Xie uint32_t device_num_min = num_devices; 3299*d19533e8SHuawei Xie 3300*d19533e8SHuawei Xie /* Add device to main ll */ 3301*d19533e8SHuawei Xie ll_dev = get_data_ll_free_entry(&ll_root_free); 3302*d19533e8SHuawei Xie if (ll_dev == NULL) { 3303*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in linked list. Device limit " 3304*d19533e8SHuawei Xie "of %d devices per core has been reached\n", 3305*d19533e8SHuawei Xie dev->device_fh, num_devices); 3306*d19533e8SHuawei Xie return -1; 3307*d19533e8SHuawei Xie } 3308*d19533e8SHuawei Xie ll_dev->dev = dev; 3309*d19533e8SHuawei Xie add_data_ll_entry(&ll_root_used, ll_dev); 3310*d19533e8SHuawei Xie ll_dev->dev->vmdq_rx_q 3311*d19533e8SHuawei Xie = ll_dev->dev->device_fh * (num_queues / num_devices); 3312*d19533e8SHuawei Xie 3313*d19533e8SHuawei Xie if (zero_copy) { 3314*d19533e8SHuawei Xie uint32_t index = ll_dev->dev->vmdq_rx_q; 3315*d19533e8SHuawei Xie uint32_t count_in_ring, i; 3316*d19533e8SHuawei Xie struct mbuf_table *tx_q; 3317*d19533e8SHuawei Xie 3318*d19533e8SHuawei Xie count_in_ring = rte_ring_count(vpool_array[index].ring); 3319*d19533e8SHuawei Xie 3320*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3321*d19533e8SHuawei Xie "(%"PRIu64") in new_device: mbuf count in mempool " 3322*d19533e8SHuawei Xie "before attach is: %d\n", 3323*d19533e8SHuawei Xie dev->device_fh, 3324*d19533e8SHuawei Xie rte_mempool_count(vpool_array[index].pool)); 3325*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3326*d19533e8SHuawei Xie "(%"PRIu64") in new_device: mbuf count in ring " 3327*d19533e8SHuawei Xie "before attach is : %d\n", 3328*d19533e8SHuawei Xie dev->device_fh, count_in_ring); 3329*d19533e8SHuawei Xie 3330*d19533e8SHuawei Xie /* 3331*d19533e8SHuawei Xie * Attach all mbufs in vpool.ring and put back intovpool.pool. 3332*d19533e8SHuawei Xie */ 3333*d19533e8SHuawei Xie for (i = 0; i < count_in_ring; i++) 3334*d19533e8SHuawei Xie attach_rxmbuf_zcp(dev); 3335*d19533e8SHuawei Xie 3336*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in " 3337*d19533e8SHuawei Xie "mempool after attach is: %d\n", 3338*d19533e8SHuawei Xie dev->device_fh, 3339*d19533e8SHuawei Xie rte_mempool_count(vpool_array[index].pool)); 3340*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in " 3341*d19533e8SHuawei Xie "ring after attach is : %d\n", 3342*d19533e8SHuawei Xie dev->device_fh, 3343*d19533e8SHuawei Xie rte_ring_count(vpool_array[index].ring)); 3344*d19533e8SHuawei Xie 3345*d19533e8SHuawei Xie tx_q = &tx_queue_zcp[(uint16_t)dev->vmdq_rx_q]; 3346*d19533e8SHuawei Xie tx_q->txq_id = dev->vmdq_rx_q; 3347*d19533e8SHuawei Xie 3348*d19533e8SHuawei Xie if (rte_eth_dev_tx_queue_start(ports[0], dev->vmdq_rx_q) != 0) { 3349*d19533e8SHuawei Xie struct vpool *vpool = &vpool_array[dev->vmdq_rx_q]; 3350*d19533e8SHuawei Xie 3351*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3352*d19533e8SHuawei Xie "(%"PRIu64") In new_device: Failed to start " 3353*d19533e8SHuawei Xie "tx queue:%d\n", 3354*d19533e8SHuawei Xie dev->device_fh, dev->vmdq_rx_q); 3355*d19533e8SHuawei Xie 3356*d19533e8SHuawei Xie mbuf_destroy_zcp(vpool); 3357*d19533e8SHuawei Xie return -1; 3358*d19533e8SHuawei Xie } 3359*d19533e8SHuawei Xie 3360*d19533e8SHuawei Xie if (rte_eth_dev_rx_queue_start(ports[0], dev->vmdq_rx_q) != 0) { 3361*d19533e8SHuawei Xie struct vpool *vpool = &vpool_array[dev->vmdq_rx_q]; 3362*d19533e8SHuawei Xie 3363*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3364*d19533e8SHuawei Xie "(%"PRIu64") In new_device: Failed to start " 3365*d19533e8SHuawei Xie "rx queue:%d\n", 3366*d19533e8SHuawei Xie dev->device_fh, dev->vmdq_rx_q); 3367*d19533e8SHuawei Xie 3368*d19533e8SHuawei Xie /* Stop the TX queue. */ 3369*d19533e8SHuawei Xie if (rte_eth_dev_tx_queue_stop(ports[0], 3370*d19533e8SHuawei Xie dev->vmdq_rx_q) != 0) { 3371*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3372*d19533e8SHuawei Xie "(%"PRIu64") In new_device: Failed to " 3373*d19533e8SHuawei Xie "stop tx queue:%d\n", 3374*d19533e8SHuawei Xie dev->device_fh, dev->vmdq_rx_q); 3375*d19533e8SHuawei Xie } 3376*d19533e8SHuawei Xie 3377*d19533e8SHuawei Xie mbuf_destroy_zcp(vpool); 3378*d19533e8SHuawei Xie return -1; 3379*d19533e8SHuawei Xie } 3380*d19533e8SHuawei Xie 3381*d19533e8SHuawei Xie } 3382*d19533e8SHuawei Xie 3383*d19533e8SHuawei Xie /*reset ready flag*/ 3384*d19533e8SHuawei Xie dev->ready = DEVICE_MAC_LEARNING; 3385*d19533e8SHuawei Xie dev->remove = 0; 3386*d19533e8SHuawei Xie 3387*d19533e8SHuawei Xie /* Find a suitable lcore to add the device. */ 3388*d19533e8SHuawei Xie RTE_LCORE_FOREACH_SLAVE(lcore) { 3389*d19533e8SHuawei Xie if (lcore_info[lcore].lcore_ll->device_num < device_num_min) { 3390*d19533e8SHuawei Xie device_num_min = lcore_info[lcore].lcore_ll->device_num; 3391*d19533e8SHuawei Xie core_add = lcore; 3392*d19533e8SHuawei Xie } 3393*d19533e8SHuawei Xie } 3394*d19533e8SHuawei Xie /* Add device to lcore ll */ 3395*d19533e8SHuawei Xie ll_dev->dev->coreid = core_add; 3396*d19533e8SHuawei Xie ll_dev = get_data_ll_free_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_free); 3397*d19533e8SHuawei Xie if (ll_dev == NULL) { 3398*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh); 3399*d19533e8SHuawei Xie dev->ready = DEVICE_SAFE_REMOVE; 3400*d19533e8SHuawei Xie destroy_device(dev); 3401*d19533e8SHuawei Xie return -1; 3402*d19533e8SHuawei Xie } 3403*d19533e8SHuawei Xie ll_dev->dev = dev; 3404*d19533e8SHuawei Xie add_data_ll_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_used, ll_dev); 3405*d19533e8SHuawei Xie 3406*d19533e8SHuawei Xie /* Initialize device stats */ 3407*d19533e8SHuawei Xie memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics)); 3408*d19533e8SHuawei Xie 3409*d19533e8SHuawei Xie /* Disable notifications. */ 3410*d19533e8SHuawei Xie set_irq_status(dev); 3411*d19533e8SHuawei Xie lcore_info[ll_dev->dev->coreid].lcore_ll->device_num++; 3412*d19533e8SHuawei Xie dev->flags |= VIRTIO_DEV_RUNNING; 3413*d19533e8SHuawei Xie 3414*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, dev->coreid); 3415*d19533e8SHuawei Xie 3416*d19533e8SHuawei Xie return 0; 3417*d19533e8SHuawei Xie } 3418*d19533e8SHuawei Xie 3419*d19533e8SHuawei Xie /* 3420*d19533e8SHuawei Xie * These callback allow devices to be added to the data core when configuration 3421*d19533e8SHuawei Xie * has been fully complete. 3422*d19533e8SHuawei Xie */ 3423*d19533e8SHuawei Xie static const struct virtio_net_device_ops virtio_net_device_ops = 3424*d19533e8SHuawei Xie { 3425*d19533e8SHuawei Xie .new_device = new_device, 3426*d19533e8SHuawei Xie .destroy_device = destroy_device, 3427*d19533e8SHuawei Xie }; 3428*d19533e8SHuawei Xie 3429*d19533e8SHuawei Xie /* 3430*d19533e8SHuawei Xie * This is a thread will wake up after a period to print stats if the user has 3431*d19533e8SHuawei Xie * enabled them. 3432*d19533e8SHuawei Xie */ 3433*d19533e8SHuawei Xie static void 3434*d19533e8SHuawei Xie print_stats(void) 3435*d19533e8SHuawei Xie { 3436*d19533e8SHuawei Xie struct virtio_net_data_ll *dev_ll; 3437*d19533e8SHuawei Xie uint64_t tx_dropped, rx_dropped; 3438*d19533e8SHuawei Xie uint64_t tx, tx_total, rx, rx_total; 3439*d19533e8SHuawei Xie uint32_t device_fh; 3440*d19533e8SHuawei Xie const char clr[] = { 27, '[', '2', 'J', '\0' }; 3441*d19533e8SHuawei Xie const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' }; 3442*d19533e8SHuawei Xie 3443*d19533e8SHuawei Xie while(1) { 3444*d19533e8SHuawei Xie sleep(enable_stats); 3445*d19533e8SHuawei Xie 3446*d19533e8SHuawei Xie /* Clear screen and move to top left */ 3447*d19533e8SHuawei Xie printf("%s%s", clr, top_left); 3448*d19533e8SHuawei Xie 3449*d19533e8SHuawei Xie printf("\nDevice statistics ===================================="); 3450*d19533e8SHuawei Xie 3451*d19533e8SHuawei Xie dev_ll = ll_root_used; 3452*d19533e8SHuawei Xie while (dev_ll != NULL) { 3453*d19533e8SHuawei Xie device_fh = (uint32_t)dev_ll->dev->device_fh; 3454*d19533e8SHuawei Xie tx_total = dev_statistics[device_fh].tx_total; 3455*d19533e8SHuawei Xie tx = dev_statistics[device_fh].tx; 3456*d19533e8SHuawei Xie tx_dropped = tx_total - tx; 3457*d19533e8SHuawei Xie if (zero_copy == 0) { 3458*d19533e8SHuawei Xie rx_total = rte_atomic64_read( 3459*d19533e8SHuawei Xie &dev_statistics[device_fh].rx_total_atomic); 3460*d19533e8SHuawei Xie rx = rte_atomic64_read( 3461*d19533e8SHuawei Xie &dev_statistics[device_fh].rx_atomic); 3462*d19533e8SHuawei Xie } else { 3463*d19533e8SHuawei Xie rx_total = dev_statistics[device_fh].rx_total; 3464*d19533e8SHuawei Xie rx = dev_statistics[device_fh].rx; 3465*d19533e8SHuawei Xie } 3466*d19533e8SHuawei Xie rx_dropped = rx_total - rx; 3467*d19533e8SHuawei Xie 3468*d19533e8SHuawei Xie printf("\nStatistics for device %"PRIu32" ------------------------------" 3469*d19533e8SHuawei Xie "\nTX total: %"PRIu64"" 3470*d19533e8SHuawei Xie "\nTX dropped: %"PRIu64"" 3471*d19533e8SHuawei Xie "\nTX successful: %"PRIu64"" 3472*d19533e8SHuawei Xie "\nRX total: %"PRIu64"" 3473*d19533e8SHuawei Xie "\nRX dropped: %"PRIu64"" 3474*d19533e8SHuawei Xie "\nRX successful: %"PRIu64"", 3475*d19533e8SHuawei Xie device_fh, 3476*d19533e8SHuawei Xie tx_total, 3477*d19533e8SHuawei Xie tx_dropped, 3478*d19533e8SHuawei Xie tx, 3479*d19533e8SHuawei Xie rx_total, 3480*d19533e8SHuawei Xie rx_dropped, 3481*d19533e8SHuawei Xie rx); 3482*d19533e8SHuawei Xie 3483*d19533e8SHuawei Xie dev_ll = dev_ll->next; 3484*d19533e8SHuawei Xie } 3485*d19533e8SHuawei Xie printf("\n======================================================\n"); 3486*d19533e8SHuawei Xie } 3487*d19533e8SHuawei Xie } 3488*d19533e8SHuawei Xie 3489*d19533e8SHuawei Xie static void 3490*d19533e8SHuawei Xie setup_mempool_tbl(int socket, uint32_t index, char *pool_name, 3491*d19533e8SHuawei Xie char *ring_name, uint32_t nb_mbuf) 3492*d19533e8SHuawei Xie { 3493*d19533e8SHuawei Xie uint16_t roomsize = VIRTIO_DESCRIPTOR_LEN_ZCP + RTE_PKTMBUF_HEADROOM; 3494*d19533e8SHuawei Xie vpool_array[index].pool 3495*d19533e8SHuawei Xie = rte_mempool_create(pool_name, nb_mbuf, MBUF_SIZE_ZCP, 3496*d19533e8SHuawei Xie MBUF_CACHE_SIZE_ZCP, sizeof(struct rte_pktmbuf_pool_private), 3497*d19533e8SHuawei Xie rte_pktmbuf_pool_init, (void *)(uintptr_t)roomsize, 3498*d19533e8SHuawei Xie rte_pktmbuf_init, NULL, socket, 0); 3499*d19533e8SHuawei Xie if (vpool_array[index].pool != NULL) { 3500*d19533e8SHuawei Xie vpool_array[index].ring 3501*d19533e8SHuawei Xie = rte_ring_create(ring_name, 3502*d19533e8SHuawei Xie rte_align32pow2(nb_mbuf + 1), 3503*d19533e8SHuawei Xie socket, RING_F_SP_ENQ | RING_F_SC_DEQ); 3504*d19533e8SHuawei Xie if (likely(vpool_array[index].ring != NULL)) { 3505*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3506*d19533e8SHuawei Xie "in setup_mempool_tbl: mbuf count in " 3507*d19533e8SHuawei Xie "mempool is: %d\n", 3508*d19533e8SHuawei Xie rte_mempool_count(vpool_array[index].pool)); 3509*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3510*d19533e8SHuawei Xie "in setup_mempool_tbl: mbuf count in " 3511*d19533e8SHuawei Xie "ring is: %d\n", 3512*d19533e8SHuawei Xie rte_ring_count(vpool_array[index].ring)); 3513*d19533e8SHuawei Xie } else { 3514*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "ring_create(%s) failed", 3515*d19533e8SHuawei Xie ring_name); 3516*d19533e8SHuawei Xie } 3517*d19533e8SHuawei Xie 3518*d19533e8SHuawei Xie /* Need consider head room. */ 3519*d19533e8SHuawei Xie vpool_array[index].buf_size = roomsize - RTE_PKTMBUF_HEADROOM; 3520*d19533e8SHuawei Xie } else { 3521*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", pool_name); 3522*d19533e8SHuawei Xie } 3523*d19533e8SHuawei Xie } 3524*d19533e8SHuawei Xie 3525*d19533e8SHuawei Xie 3526*d19533e8SHuawei Xie /* 3527*d19533e8SHuawei Xie * Main function, does initialisation and calls the per-lcore functions. The CUSE 3528*d19533e8SHuawei Xie * device is also registered here to handle the IOCTLs. 3529*d19533e8SHuawei Xie */ 3530*d19533e8SHuawei Xie int 3531*d19533e8SHuawei Xie MAIN(int argc, char *argv[]) 3532*d19533e8SHuawei Xie { 3533*d19533e8SHuawei Xie struct rte_mempool *mbuf_pool = NULL; 3534*d19533e8SHuawei Xie unsigned lcore_id, core_id = 0; 3535*d19533e8SHuawei Xie unsigned nb_ports, valid_num_ports; 3536*d19533e8SHuawei Xie int ret; 3537*d19533e8SHuawei Xie uint8_t portid, queue_id = 0; 3538*d19533e8SHuawei Xie static pthread_t tid; 3539*d19533e8SHuawei Xie 3540*d19533e8SHuawei Xie /* init EAL */ 3541*d19533e8SHuawei Xie ret = rte_eal_init(argc, argv); 3542*d19533e8SHuawei Xie if (ret < 0) 3543*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 3544*d19533e8SHuawei Xie argc -= ret; 3545*d19533e8SHuawei Xie argv += ret; 3546*d19533e8SHuawei Xie 3547*d19533e8SHuawei Xie /* parse app arguments */ 3548*d19533e8SHuawei Xie ret = us_vhost_parse_args(argc, argv); 3549*d19533e8SHuawei Xie if (ret < 0) 3550*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Invalid argument\n"); 3551*d19533e8SHuawei Xie 3552*d19533e8SHuawei Xie for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) 3553*d19533e8SHuawei Xie if (rte_lcore_is_enabled(lcore_id)) 3554*d19533e8SHuawei Xie lcore_ids[core_id ++] = lcore_id; 3555*d19533e8SHuawei Xie 3556*d19533e8SHuawei Xie if (rte_lcore_count() > RTE_MAX_LCORE) 3557*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE,"Not enough cores\n"); 3558*d19533e8SHuawei Xie 3559*d19533e8SHuawei Xie /*set the number of swithcing cores available*/ 3560*d19533e8SHuawei Xie num_switching_cores = rte_lcore_count()-1; 3561*d19533e8SHuawei Xie 3562*d19533e8SHuawei Xie /* Get the number of physical ports. */ 3563*d19533e8SHuawei Xie nb_ports = rte_eth_dev_count(); 3564*d19533e8SHuawei Xie if (nb_ports > RTE_MAX_ETHPORTS) 3565*d19533e8SHuawei Xie nb_ports = RTE_MAX_ETHPORTS; 3566*d19533e8SHuawei Xie 3567*d19533e8SHuawei Xie /* 3568*d19533e8SHuawei Xie * Update the global var NUM_PORTS and global array PORTS 3569*d19533e8SHuawei Xie * and get value of var VALID_NUM_PORTS according to system ports number 3570*d19533e8SHuawei Xie */ 3571*d19533e8SHuawei Xie valid_num_ports = check_ports_num(nb_ports); 3572*d19533e8SHuawei Xie 3573*d19533e8SHuawei Xie if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) { 3574*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u," 3575*d19533e8SHuawei Xie "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS); 3576*d19533e8SHuawei Xie return -1; 3577*d19533e8SHuawei Xie } 3578*d19533e8SHuawei Xie 3579*d19533e8SHuawei Xie if (zero_copy == 0) { 3580*d19533e8SHuawei Xie /* Create the mbuf pool. */ 3581*d19533e8SHuawei Xie mbuf_pool = rte_mempool_create( 3582*d19533e8SHuawei Xie "MBUF_POOL", 3583*d19533e8SHuawei Xie NUM_MBUFS_PER_PORT 3584*d19533e8SHuawei Xie * valid_num_ports, 3585*d19533e8SHuawei Xie MBUF_SIZE, MBUF_CACHE_SIZE, 3586*d19533e8SHuawei Xie sizeof(struct rte_pktmbuf_pool_private), 3587*d19533e8SHuawei Xie rte_pktmbuf_pool_init, NULL, 3588*d19533e8SHuawei Xie rte_pktmbuf_init, NULL, 3589*d19533e8SHuawei Xie rte_socket_id(), 0); 3590*d19533e8SHuawei Xie if (mbuf_pool == NULL) 3591*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); 3592*d19533e8SHuawei Xie 3593*d19533e8SHuawei Xie for (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++) 3594*d19533e8SHuawei Xie vpool_array[queue_id].pool = mbuf_pool; 3595*d19533e8SHuawei Xie 3596*d19533e8SHuawei Xie if (vm2vm_mode == VM2VM_HARDWARE) { 3597*d19533e8SHuawei Xie /* Enable VT loop back to let L2 switch to do it. */ 3598*d19533e8SHuawei Xie vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; 3599*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3600*d19533e8SHuawei Xie "Enable loop back for L2 switch in vmdq.\n"); 3601*d19533e8SHuawei Xie } 3602*d19533e8SHuawei Xie } else { 3603*d19533e8SHuawei Xie uint32_t nb_mbuf; 3604*d19533e8SHuawei Xie char pool_name[RTE_MEMPOOL_NAMESIZE]; 3605*d19533e8SHuawei Xie char ring_name[RTE_MEMPOOL_NAMESIZE]; 3606*d19533e8SHuawei Xie 3607*d19533e8SHuawei Xie /* 3608*d19533e8SHuawei Xie * Zero copy defers queue RX/TX start to the time when guest 3609*d19533e8SHuawei Xie * finishes its startup and packet buffers from that guest are 3610*d19533e8SHuawei Xie * available. 3611*d19533e8SHuawei Xie */ 3612*d19533e8SHuawei Xie rx_conf_default.rx_deferred_start = (uint8_t)zero_copy; 3613*d19533e8SHuawei Xie rx_conf_default.rx_drop_en = 0; 3614*d19533e8SHuawei Xie tx_conf_default.tx_deferred_start = (uint8_t)zero_copy; 3615*d19533e8SHuawei Xie nb_mbuf = num_rx_descriptor 3616*d19533e8SHuawei Xie + num_switching_cores * MBUF_CACHE_SIZE_ZCP 3617*d19533e8SHuawei Xie + num_switching_cores * MAX_PKT_BURST; 3618*d19533e8SHuawei Xie 3619*d19533e8SHuawei Xie for (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) { 3620*d19533e8SHuawei Xie snprintf(pool_name, sizeof(pool_name), 3621*d19533e8SHuawei Xie "rxmbuf_pool_%u", queue_id); 3622*d19533e8SHuawei Xie snprintf(ring_name, sizeof(ring_name), 3623*d19533e8SHuawei Xie "rxmbuf_ring_%u", queue_id); 3624*d19533e8SHuawei Xie setup_mempool_tbl(rte_socket_id(), queue_id, 3625*d19533e8SHuawei Xie pool_name, ring_name, nb_mbuf); 3626*d19533e8SHuawei Xie } 3627*d19533e8SHuawei Xie 3628*d19533e8SHuawei Xie nb_mbuf = num_tx_descriptor 3629*d19533e8SHuawei Xie + num_switching_cores * MBUF_CACHE_SIZE_ZCP 3630*d19533e8SHuawei Xie + num_switching_cores * MAX_PKT_BURST; 3631*d19533e8SHuawei Xie 3632*d19533e8SHuawei Xie for (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) { 3633*d19533e8SHuawei Xie snprintf(pool_name, sizeof(pool_name), 3634*d19533e8SHuawei Xie "txmbuf_pool_%u", queue_id); 3635*d19533e8SHuawei Xie snprintf(ring_name, sizeof(ring_name), 3636*d19533e8SHuawei Xie "txmbuf_ring_%u", queue_id); 3637*d19533e8SHuawei Xie setup_mempool_tbl(rte_socket_id(), 3638*d19533e8SHuawei Xie (queue_id + MAX_QUEUES), 3639*d19533e8SHuawei Xie pool_name, ring_name, nb_mbuf); 3640*d19533e8SHuawei Xie } 3641*d19533e8SHuawei Xie 3642*d19533e8SHuawei Xie if (vm2vm_mode == VM2VM_HARDWARE) { 3643*d19533e8SHuawei Xie /* Enable VT loop back to let L2 switch to do it. */ 3644*d19533e8SHuawei Xie vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1; 3645*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3646*d19533e8SHuawei Xie "Enable loop back for L2 switch in vmdq.\n"); 3647*d19533e8SHuawei Xie } 3648*d19533e8SHuawei Xie } 3649*d19533e8SHuawei Xie /* Set log level. */ 3650*d19533e8SHuawei Xie rte_set_log_level(LOG_LEVEL); 3651*d19533e8SHuawei Xie 3652*d19533e8SHuawei Xie /* initialize all ports */ 3653*d19533e8SHuawei Xie for (portid = 0; portid < nb_ports; portid++) { 3654*d19533e8SHuawei Xie /* skip ports that are not enabled */ 3655*d19533e8SHuawei Xie if ((enabled_port_mask & (1 << portid)) == 0) { 3656*d19533e8SHuawei Xie RTE_LOG(INFO, VHOST_PORT, 3657*d19533e8SHuawei Xie "Skipping disabled port %d\n", portid); 3658*d19533e8SHuawei Xie continue; 3659*d19533e8SHuawei Xie } 3660*d19533e8SHuawei Xie if (port_init(portid) != 0) 3661*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, 3662*d19533e8SHuawei Xie "Cannot initialize network ports\n"); 3663*d19533e8SHuawei Xie } 3664*d19533e8SHuawei Xie 3665*d19533e8SHuawei Xie /* Initialise all linked lists. */ 3666*d19533e8SHuawei Xie if (init_data_ll() == -1) 3667*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n"); 3668*d19533e8SHuawei Xie 3669*d19533e8SHuawei Xie /* Initialize device stats */ 3670*d19533e8SHuawei Xie memset(&dev_statistics, 0, sizeof(dev_statistics)); 3671*d19533e8SHuawei Xie 3672*d19533e8SHuawei Xie /* Enable stats if the user option is set. */ 3673*d19533e8SHuawei Xie if (enable_stats) 3674*d19533e8SHuawei Xie pthread_create(&tid, NULL, (void*)print_stats, NULL ); 3675*d19533e8SHuawei Xie 3676*d19533e8SHuawei Xie /* Launch all data cores. */ 3677*d19533e8SHuawei Xie if (zero_copy == 0) { 3678*d19533e8SHuawei Xie RTE_LCORE_FOREACH_SLAVE(lcore_id) { 3679*d19533e8SHuawei Xie rte_eal_remote_launch(switch_worker, 3680*d19533e8SHuawei Xie mbuf_pool, lcore_id); 3681*d19533e8SHuawei Xie } 3682*d19533e8SHuawei Xie } else { 3683*d19533e8SHuawei Xie uint32_t count_in_mempool, index, i; 3684*d19533e8SHuawei Xie for (index = 0; index < 2*MAX_QUEUES; index++) { 3685*d19533e8SHuawei Xie /* For all RX and TX queues. */ 3686*d19533e8SHuawei Xie count_in_mempool 3687*d19533e8SHuawei Xie = rte_mempool_count(vpool_array[index].pool); 3688*d19533e8SHuawei Xie 3689*d19533e8SHuawei Xie /* 3690*d19533e8SHuawei Xie * Transfer all un-attached mbufs from vpool.pool 3691*d19533e8SHuawei Xie * to vpoo.ring. 3692*d19533e8SHuawei Xie */ 3693*d19533e8SHuawei Xie for (i = 0; i < count_in_mempool; i++) { 3694*d19533e8SHuawei Xie struct rte_mbuf *mbuf 3695*d19533e8SHuawei Xie = __rte_mbuf_raw_alloc( 3696*d19533e8SHuawei Xie vpool_array[index].pool); 3697*d19533e8SHuawei Xie rte_ring_sp_enqueue(vpool_array[index].ring, 3698*d19533e8SHuawei Xie (void *)mbuf); 3699*d19533e8SHuawei Xie } 3700*d19533e8SHuawei Xie 3701*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3702*d19533e8SHuawei Xie "in MAIN: mbuf count in mempool at initial " 3703*d19533e8SHuawei Xie "is: %d\n", count_in_mempool); 3704*d19533e8SHuawei Xie LOG_DEBUG(VHOST_CONFIG, 3705*d19533e8SHuawei Xie "in MAIN: mbuf count in ring at initial is :" 3706*d19533e8SHuawei Xie " %d\n", 3707*d19533e8SHuawei Xie rte_ring_count(vpool_array[index].ring)); 3708*d19533e8SHuawei Xie } 3709*d19533e8SHuawei Xie 3710*d19533e8SHuawei Xie RTE_LCORE_FOREACH_SLAVE(lcore_id) 3711*d19533e8SHuawei Xie rte_eal_remote_launch(switch_worker_zcp, NULL, 3712*d19533e8SHuawei Xie lcore_id); 3713*d19533e8SHuawei Xie } 3714*d19533e8SHuawei Xie 3715*d19533e8SHuawei Xie /* Register CUSE device to handle IOCTLs. */ 3716*d19533e8SHuawei Xie ret = register_cuse_device((char*)&dev_basename, dev_index, get_virtio_net_callbacks()); 3717*d19533e8SHuawei Xie if (ret != 0) 3718*d19533e8SHuawei Xie rte_exit(EXIT_FAILURE,"CUSE device setup failure.\n"); 3719*d19533e8SHuawei Xie 3720*d19533e8SHuawei Xie init_virtio_net(&virtio_net_device_ops); 3721*d19533e8SHuawei Xie 3722*d19533e8SHuawei Xie /* Start CUSE session. */ 3723*d19533e8SHuawei Xie start_cuse_session_loop(); 3724*d19533e8SHuawei Xie return 0; 3725*d19533e8SHuawei Xie 3726*d19533e8SHuawei Xie } 3727*d19533e8SHuawei Xie 3728