1*6f9cba8fSJoseph Mingrone /* 2*6f9cba8fSJoseph Mingrone * Copyright (C) 2018 jingle YANG. All rights reserved. 3*6f9cba8fSJoseph Mingrone * 4*6f9cba8fSJoseph Mingrone * Redistribution and use in source and binary forms, with or without 5*6f9cba8fSJoseph Mingrone * modification, are permitted provided that the following conditions 6*6f9cba8fSJoseph Mingrone * are met: 7*6f9cba8fSJoseph Mingrone * 8*6f9cba8fSJoseph Mingrone * 1. Redistributions of source code must retain the above copyright 9*6f9cba8fSJoseph Mingrone * notice, this list of conditions and the following disclaimer. 10*6f9cba8fSJoseph Mingrone * 2. Redistributions in binary form must reproduce the above copyright 11*6f9cba8fSJoseph Mingrone * notice, this list of conditions and the following disclaimer in the 12*6f9cba8fSJoseph Mingrone * documentation and/or other materials provided with the distribution. 13*6f9cba8fSJoseph Mingrone * 14*6f9cba8fSJoseph Mingrone * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND 15*6f9cba8fSJoseph Mingrone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16*6f9cba8fSJoseph Mingrone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17*6f9cba8fSJoseph Mingrone * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18*6f9cba8fSJoseph Mingrone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19*6f9cba8fSJoseph Mingrone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20*6f9cba8fSJoseph Mingrone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21*6f9cba8fSJoseph Mingrone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22*6f9cba8fSJoseph Mingrone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23*6f9cba8fSJoseph Mingrone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24*6f9cba8fSJoseph Mingrone * SUCH DAMAGE. 25*6f9cba8fSJoseph Mingrone */ 26*6f9cba8fSJoseph Mingrone 27*6f9cba8fSJoseph Mingrone /* 28*6f9cba8fSJoseph Mingrone Date: Dec 16, 2018 29*6f9cba8fSJoseph Mingrone 30*6f9cba8fSJoseph Mingrone Description: 31*6f9cba8fSJoseph Mingrone 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0. 32*6f9cba8fSJoseph Mingrone 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/) 33*6f9cba8fSJoseph Mingrone 3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11. 34*6f9cba8fSJoseph Mingrone 35*6f9cba8fSJoseph Mingrone Limitations: 36*6f9cba8fSJoseph Mingrone 1. DPDK support will be on if DPDK is available. Please set DIR for --with-dpdk[=DIR] with ./configure or -DDPDK_DIR[=DIR] with cmake if DPDK is installed manually. 37*6f9cba8fSJoseph Mingrone 2. Only support link libdpdk.so dynamically, because the libdpdk.a will not work correctly. 38*6f9cba8fSJoseph Mingrone 3. Only support read operation, and packet injection has not been supported yet. 39*6f9cba8fSJoseph Mingrone 40*6f9cba8fSJoseph Mingrone Usage: 41*6f9cba8fSJoseph Mingrone 1. Compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git) 42*6f9cba8fSJoseph Mingrone 43*6f9cba8fSJoseph Mingrone You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set: 44*6f9cba8fSJoseph Mingrone CONFIG_RTE_BUILD_SHARED_LIB=y 45*6f9cba8fSJoseph Mingrone By the following command: 46*6f9cba8fSJoseph Mingrone sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config 47*6f9cba8fSJoseph Mingrone 48*6f9cba8fSJoseph Mingrone 2. Launch l2fwd that is one of DPDK examples correctly, and get device information. 49*6f9cba8fSJoseph Mingrone 50*6f9cba8fSJoseph Mingrone You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio. 51*6f9cba8fSJoseph Mingrone And enable hugepages by dpdk-setup.sh 52*6f9cba8fSJoseph Mingrone 53*6f9cba8fSJoseph Mingrone Then launch the l2fwd with dynamic driver support. For example: 54*6f9cba8fSJoseph Mingrone $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1 55*6f9cba8fSJoseph Mingrone 56*6f9cba8fSJoseph Mingrone 3. Compile libpcap with dpdk options. 57*6f9cba8fSJoseph Mingrone 58*6f9cba8fSJoseph Mingrone If DPDK has not been found automatically, you shall export DPDK environment variable which are used for compiling DPDK. And then pass $RTE_SDK/$RTE_TARGET to --with-dpdk or -DDPDK_DIR 59*6f9cba8fSJoseph Mingrone 60*6f9cba8fSJoseph Mingrone export RTE_SDK={your DPDK base directory} 61*6f9cba8fSJoseph Mingrone export RTE_TARGET={your target name} 62*6f9cba8fSJoseph Mingrone 63*6f9cba8fSJoseph Mingrone 3.1 With configure 64*6f9cba8fSJoseph Mingrone 65*6f9cba8fSJoseph Mingrone ./configure --with-dpdk=$RTE_SDK/$RTE_TARGET && make -s all && make -s testprogs && make install 66*6f9cba8fSJoseph Mingrone 67*6f9cba8fSJoseph Mingrone 3.2 With cmake 68*6f9cba8fSJoseph Mingrone 69*6f9cba8fSJoseph Mingrone mkdir -p build && cd build && cmake -DDPDK_DIR=$RTE_SDK/$RTE_TARGET ../ && make -s all && make -s testprogs && make install 70*6f9cba8fSJoseph Mingrone 71*6f9cba8fSJoseph Mingrone 4. Link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0. 72*6f9cba8fSJoseph Mingrone And you shall set DPDK configure options by environment variable DPDK_CFG 73*6f9cba8fSJoseph Mingrone For example, the testprogs/capturetest could be lanched by: 74*6f9cba8fSJoseph Mingrone 75*6f9cba8fSJoseph Mingrone env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0 76*6f9cba8fSJoseph Mingrone */ 77*6f9cba8fSJoseph Mingrone 78*6f9cba8fSJoseph Mingrone #ifdef HAVE_CONFIG_H 79*6f9cba8fSJoseph Mingrone #include <config.h> 80*6f9cba8fSJoseph Mingrone #endif 81*6f9cba8fSJoseph Mingrone 82*6f9cba8fSJoseph Mingrone #include <errno.h> 83*6f9cba8fSJoseph Mingrone #include <netdb.h> 84*6f9cba8fSJoseph Mingrone #include <stdio.h> 85*6f9cba8fSJoseph Mingrone #include <stdlib.h> 86*6f9cba8fSJoseph Mingrone #include <string.h> 87*6f9cba8fSJoseph Mingrone #include <unistd.h> 88*6f9cba8fSJoseph Mingrone #include <limits.h> /* for INT_MAX */ 89*6f9cba8fSJoseph Mingrone #include <time.h> 90*6f9cba8fSJoseph Mingrone 91*6f9cba8fSJoseph Mingrone #include <sys/time.h> 92*6f9cba8fSJoseph Mingrone 93*6f9cba8fSJoseph Mingrone //header for calling dpdk 94*6f9cba8fSJoseph Mingrone #include <rte_config.h> 95*6f9cba8fSJoseph Mingrone #include <rte_common.h> 96*6f9cba8fSJoseph Mingrone #include <rte_errno.h> 97*6f9cba8fSJoseph Mingrone #include <rte_log.h> 98*6f9cba8fSJoseph Mingrone #include <rte_malloc.h> 99*6f9cba8fSJoseph Mingrone #include <rte_memory.h> 100*6f9cba8fSJoseph Mingrone #include <rte_eal.h> 101*6f9cba8fSJoseph Mingrone #include <rte_launch.h> 102*6f9cba8fSJoseph Mingrone #include <rte_atomic.h> 103*6f9cba8fSJoseph Mingrone #include <rte_cycles.h> 104*6f9cba8fSJoseph Mingrone #include <rte_lcore.h> 105*6f9cba8fSJoseph Mingrone #include <rte_per_lcore.h> 106*6f9cba8fSJoseph Mingrone #include <rte_branch_prediction.h> 107*6f9cba8fSJoseph Mingrone #include <rte_interrupts.h> 108*6f9cba8fSJoseph Mingrone #include <rte_random.h> 109*6f9cba8fSJoseph Mingrone #include <rte_debug.h> 110*6f9cba8fSJoseph Mingrone #include <rte_ether.h> 111*6f9cba8fSJoseph Mingrone #include <rte_ethdev.h> 112*6f9cba8fSJoseph Mingrone #include <rte_mempool.h> 113*6f9cba8fSJoseph Mingrone #include <rte_mbuf.h> 114*6f9cba8fSJoseph Mingrone #include <rte_bus.h> 115*6f9cba8fSJoseph Mingrone 116*6f9cba8fSJoseph Mingrone #include "pcap-int.h" 117*6f9cba8fSJoseph Mingrone #include "pcap-dpdk.h" 118*6f9cba8fSJoseph Mingrone 119*6f9cba8fSJoseph Mingrone /* 120*6f9cba8fSJoseph Mingrone * Deal with API changes that break source compatibility. 121*6f9cba8fSJoseph Mingrone */ 122*6f9cba8fSJoseph Mingrone 123*6f9cba8fSJoseph Mingrone #ifdef HAVE_STRUCT_RTE_ETHER_ADDR 124*6f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE struct rte_ether_addr 125*6f9cba8fSJoseph Mingrone #else 126*6f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE struct ether_addr 127*6f9cba8fSJoseph Mingrone #endif 128*6f9cba8fSJoseph Mingrone 129*6f9cba8fSJoseph Mingrone #define DPDK_DEF_LOG_LEV RTE_LOG_ERR 130*6f9cba8fSJoseph Mingrone // 131*6f9cba8fSJoseph Mingrone // This is set to 0 if we haven't initialized DPDK yet, 1 if we've 132*6f9cba8fSJoseph Mingrone // successfully initialized it, a negative value, which is the negative 133*6f9cba8fSJoseph Mingrone // of the rte_errno from rte_eal_init(), if we tried to initialize it 134*6f9cba8fSJoseph Mingrone // and got an error. 135*6f9cba8fSJoseph Mingrone // 136*6f9cba8fSJoseph Mingrone static int is_dpdk_pre_inited=0; 137*6f9cba8fSJoseph Mingrone #define DPDK_LIB_NAME "libpcap_dpdk" 138*6f9cba8fSJoseph Mingrone #define DPDK_DESC "Data Plane Development Kit (DPDK) Interface" 139*6f9cba8fSJoseph Mingrone #define DPDK_ERR_PERM_MSG "permission denied, DPDK needs root permission" 140*6f9cba8fSJoseph Mingrone #define DPDK_ARGC_MAX 64 141*6f9cba8fSJoseph Mingrone #define DPDK_CFG_MAX_LEN 1024 142*6f9cba8fSJoseph Mingrone #define DPDK_DEV_NAME_MAX 32 143*6f9cba8fSJoseph Mingrone #define DPDK_DEV_DESC_MAX 512 144*6f9cba8fSJoseph Mingrone #define DPDK_CFG_ENV_NAME "DPDK_CFG" 145*6f9cba8fSJoseph Mingrone #define DPDK_DEF_MIN_SLEEP_MS 1 146*6f9cba8fSJoseph Mingrone static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN]; 147*6f9cba8fSJoseph Mingrone #define DPDK_MAC_ADDR_SIZE 32 148*6f9cba8fSJoseph Mingrone #define DPDK_DEF_MAC_ADDR "00:00:00:00:00:00" 149*6f9cba8fSJoseph Mingrone #define DPDK_PCI_ADDR_SIZE 16 150*6f9cba8fSJoseph Mingrone #define DPDK_DEF_CFG "--log-level=error -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" 151*6f9cba8fSJoseph Mingrone #define DPDK_PREFIX "dpdk:" 152*6f9cba8fSJoseph Mingrone #define DPDK_PORTID_MAX 65535U 153*6f9cba8fSJoseph Mingrone #define MBUF_POOL_NAME "mbuf_pool" 154*6f9cba8fSJoseph Mingrone #define DPDK_TX_BUF_NAME "tx_buffer" 155*6f9cba8fSJoseph Mingrone //The number of elements in the mbuf pool. 156*6f9cba8fSJoseph Mingrone #define DPDK_NB_MBUFS 8192U 157*6f9cba8fSJoseph Mingrone #define MEMPOOL_CACHE_SIZE 256 158*6f9cba8fSJoseph Mingrone #define MAX_PKT_BURST 32 159*6f9cba8fSJoseph Mingrone // Configurable number of RX/TX ring descriptors 160*6f9cba8fSJoseph Mingrone #define RTE_TEST_RX_DESC_DEFAULT 1024 161*6f9cba8fSJoseph Mingrone #define RTE_TEST_TX_DESC_DEFAULT 1024 162*6f9cba8fSJoseph Mingrone 163*6f9cba8fSJoseph Mingrone static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 164*6f9cba8fSJoseph Mingrone static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 165*6f9cba8fSJoseph Mingrone 166*6f9cba8fSJoseph Mingrone #ifdef RTE_ETHER_MAX_JUMBO_FRAME_LEN 167*6f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN 168*6f9cba8fSJoseph Mingrone #else 169*6f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN 170*6f9cba8fSJoseph Mingrone #endif 171*6f9cba8fSJoseph Mingrone 172*6f9cba8fSJoseph Mingrone static struct rte_eth_dev_tx_buffer *tx_buffer; 173*6f9cba8fSJoseph Mingrone 174*6f9cba8fSJoseph Mingrone struct dpdk_ts_helper{ 175*6f9cba8fSJoseph Mingrone struct timeval start_time; 176*6f9cba8fSJoseph Mingrone uint64_t start_cycles; 177*6f9cba8fSJoseph Mingrone uint64_t hz; 178*6f9cba8fSJoseph Mingrone }; 179*6f9cba8fSJoseph Mingrone struct pcap_dpdk{ 180*6f9cba8fSJoseph Mingrone pcap_t * orig; 181*6f9cba8fSJoseph Mingrone uint16_t portid; // portid of DPDK 182*6f9cba8fSJoseph Mingrone int must_clear_promisc; 183*6f9cba8fSJoseph Mingrone uint64_t bpf_drop; 184*6f9cba8fSJoseph Mingrone int nonblock; 185*6f9cba8fSJoseph Mingrone struct timeval required_select_timeout; 186*6f9cba8fSJoseph Mingrone struct timeval prev_ts; 187*6f9cba8fSJoseph Mingrone struct rte_eth_stats prev_stats; 188*6f9cba8fSJoseph Mingrone struct timeval curr_ts; 189*6f9cba8fSJoseph Mingrone struct rte_eth_stats curr_stats; 190*6f9cba8fSJoseph Mingrone uint64_t pps; 191*6f9cba8fSJoseph Mingrone uint64_t bps; 192*6f9cba8fSJoseph Mingrone struct rte_mempool * pktmbuf_pool; 193*6f9cba8fSJoseph Mingrone struct dpdk_ts_helper ts_helper; 194*6f9cba8fSJoseph Mingrone ETHER_ADDR_TYPE eth_addr; 195*6f9cba8fSJoseph Mingrone char mac_addr[DPDK_MAC_ADDR_SIZE]; 196*6f9cba8fSJoseph Mingrone char pci_addr[DPDK_PCI_ADDR_SIZE]; 197*6f9cba8fSJoseph Mingrone unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN]; 198*6f9cba8fSJoseph Mingrone }; 199*6f9cba8fSJoseph Mingrone 200*6f9cba8fSJoseph Mingrone static struct rte_eth_conf port_conf = { 201*6f9cba8fSJoseph Mingrone .rxmode = { 202*6f9cba8fSJoseph Mingrone .split_hdr_size = 0, 203*6f9cba8fSJoseph Mingrone }, 204*6f9cba8fSJoseph Mingrone .txmode = { 205*6f9cba8fSJoseph Mingrone .mq_mode = ETH_MQ_TX_NONE, 206*6f9cba8fSJoseph Mingrone }, 207*6f9cba8fSJoseph Mingrone }; 208*6f9cba8fSJoseph Mingrone 209*6f9cba8fSJoseph Mingrone static void dpdk_fmt_errmsg_for_rte_errno(char *, size_t, int, 210*6f9cba8fSJoseph Mingrone PCAP_FORMAT_STRING(const char *), ...) PCAP_PRINTFLIKE(4, 5); 211*6f9cba8fSJoseph Mingrone 212*6f9cba8fSJoseph Mingrone /* 213*6f9cba8fSJoseph Mingrone * Generate an error message based on a format, arguments, and an 214*6f9cba8fSJoseph Mingrone * rte_errno, with a message for the rte_errno after the formatted output. 215*6f9cba8fSJoseph Mingrone */ 216*6f9cba8fSJoseph Mingrone static void dpdk_fmt_errmsg_for_rte_errno(char *errbuf, size_t errbuflen, 217*6f9cba8fSJoseph Mingrone int errnum, const char *fmt, ...) 218*6f9cba8fSJoseph Mingrone { 219*6f9cba8fSJoseph Mingrone va_list ap; 220*6f9cba8fSJoseph Mingrone size_t msglen; 221*6f9cba8fSJoseph Mingrone char *p; 222*6f9cba8fSJoseph Mingrone size_t errbuflen_remaining; 223*6f9cba8fSJoseph Mingrone 224*6f9cba8fSJoseph Mingrone va_start(ap, fmt); 225*6f9cba8fSJoseph Mingrone vsnprintf(errbuf, errbuflen, fmt, ap); 226*6f9cba8fSJoseph Mingrone va_end(ap); 227*6f9cba8fSJoseph Mingrone msglen = strlen(errbuf); 228*6f9cba8fSJoseph Mingrone 229*6f9cba8fSJoseph Mingrone /* 230*6f9cba8fSJoseph Mingrone * Do we have enough space to append ": "? 231*6f9cba8fSJoseph Mingrone * Including the terminating '\0', that's 3 bytes. 232*6f9cba8fSJoseph Mingrone */ 233*6f9cba8fSJoseph Mingrone if (msglen + 3 > errbuflen) { 234*6f9cba8fSJoseph Mingrone /* No - just give them what we've produced. */ 235*6f9cba8fSJoseph Mingrone return; 236*6f9cba8fSJoseph Mingrone } 237*6f9cba8fSJoseph Mingrone p = errbuf + msglen; 238*6f9cba8fSJoseph Mingrone errbuflen_remaining = errbuflen - msglen; 239*6f9cba8fSJoseph Mingrone *p++ = ':'; 240*6f9cba8fSJoseph Mingrone *p++ = ' '; 241*6f9cba8fSJoseph Mingrone *p = '\0'; 242*6f9cba8fSJoseph Mingrone msglen += 2; 243*6f9cba8fSJoseph Mingrone errbuflen_remaining -= 2; 244*6f9cba8fSJoseph Mingrone 245*6f9cba8fSJoseph Mingrone /* 246*6f9cba8fSJoseph Mingrone * Now append the string for the error code. 247*6f9cba8fSJoseph Mingrone * rte_strerror() is thread-safe, at least as of dpdk 18.11, 248*6f9cba8fSJoseph Mingrone * unlike strerror() - it uses strerror_r() rather than strerror() 249*6f9cba8fSJoseph Mingrone * for UN*X errno values, and prints to what I assume is a per-thread 250*6f9cba8fSJoseph Mingrone * buffer (based on the "PER_LCORE" in "RTE_DEFINE_PER_LCORE" used 251*6f9cba8fSJoseph Mingrone * to declare the buffers statically) for DPDK errors. 252*6f9cba8fSJoseph Mingrone */ 253*6f9cba8fSJoseph Mingrone snprintf(p, errbuflen_remaining, "%s", rte_strerror(errnum)); 254*6f9cba8fSJoseph Mingrone } 255*6f9cba8fSJoseph Mingrone 256*6f9cba8fSJoseph Mingrone static int dpdk_init_timer(struct pcap_dpdk *pd){ 257*6f9cba8fSJoseph Mingrone gettimeofday(&(pd->ts_helper.start_time),NULL); 258*6f9cba8fSJoseph Mingrone pd->ts_helper.start_cycles = rte_get_timer_cycles(); 259*6f9cba8fSJoseph Mingrone pd->ts_helper.hz = rte_get_timer_hz(); 260*6f9cba8fSJoseph Mingrone if (pd->ts_helper.hz == 0){ 261*6f9cba8fSJoseph Mingrone return -1; 262*6f9cba8fSJoseph Mingrone } 263*6f9cba8fSJoseph Mingrone return 0; 264*6f9cba8fSJoseph Mingrone } 265*6f9cba8fSJoseph Mingrone static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts) 266*6f9cba8fSJoseph Mingrone { 267*6f9cba8fSJoseph Mingrone uint64_t cycles; 268*6f9cba8fSJoseph Mingrone // delta 269*6f9cba8fSJoseph Mingrone struct timeval cur_time; 270*6f9cba8fSJoseph Mingrone cycles = rte_get_timer_cycles() - helper->start_cycles; 271*6f9cba8fSJoseph Mingrone cur_time.tv_sec = (time_t)(cycles/helper->hz); 272*6f9cba8fSJoseph Mingrone cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz); 273*6f9cba8fSJoseph Mingrone timeradd(&(helper->start_time), &cur_time, ts); 274*6f9cba8fSJoseph Mingrone } 275*6f9cba8fSJoseph Mingrone 276*6f9cba8fSJoseph Mingrone static uint32_t dpdk_gather_data(unsigned char *data, uint32_t len, struct rte_mbuf *mbuf) 277*6f9cba8fSJoseph Mingrone { 278*6f9cba8fSJoseph Mingrone uint32_t total_len = 0; 279*6f9cba8fSJoseph Mingrone while (mbuf && (total_len+mbuf->data_len) < len ){ 280*6f9cba8fSJoseph Mingrone rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len); 281*6f9cba8fSJoseph Mingrone total_len+=mbuf->data_len; 282*6f9cba8fSJoseph Mingrone mbuf=mbuf->next; 283*6f9cba8fSJoseph Mingrone } 284*6f9cba8fSJoseph Mingrone return total_len; 285*6f9cba8fSJoseph Mingrone } 286*6f9cba8fSJoseph Mingrone 287*6f9cba8fSJoseph Mingrone 288*6f9cba8fSJoseph Mingrone static int dpdk_read_with_timeout(pcap_t *p, struct rte_mbuf **pkts_burst, const uint16_t burst_cnt){ 289*6f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 290*6f9cba8fSJoseph Mingrone int nb_rx = 0; 291*6f9cba8fSJoseph Mingrone int timeout_ms = p->opt.timeout; 292*6f9cba8fSJoseph Mingrone int sleep_ms = 0; 293*6f9cba8fSJoseph Mingrone if (pd->nonblock){ 294*6f9cba8fSJoseph Mingrone // In non-blocking mode, just read once, no matter how many packets are captured. 295*6f9cba8fSJoseph Mingrone nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt); 296*6f9cba8fSJoseph Mingrone }else{ 297*6f9cba8fSJoseph Mingrone // In blocking mode, read many times until packets are captured or timeout or break_loop is set. 298*6f9cba8fSJoseph Mingrone // if timeout_ms == 0, it may be blocked forever. 299*6f9cba8fSJoseph Mingrone while (timeout_ms == 0 || sleep_ms < timeout_ms){ 300*6f9cba8fSJoseph Mingrone nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt); 301*6f9cba8fSJoseph Mingrone if (nb_rx){ // got packets within timeout_ms 302*6f9cba8fSJoseph Mingrone break; 303*6f9cba8fSJoseph Mingrone }else{ // no packet arrives at this round. 304*6f9cba8fSJoseph Mingrone if (p->break_loop){ 305*6f9cba8fSJoseph Mingrone break; 306*6f9cba8fSJoseph Mingrone } 307*6f9cba8fSJoseph Mingrone // sleep for a very short while. 308*6f9cba8fSJoseph Mingrone // block sleep is the only choice, since usleep() will impact performance dramatically. 309*6f9cba8fSJoseph Mingrone rte_delay_us_block(DPDK_DEF_MIN_SLEEP_MS*1000); 310*6f9cba8fSJoseph Mingrone sleep_ms += DPDK_DEF_MIN_SLEEP_MS; 311*6f9cba8fSJoseph Mingrone } 312*6f9cba8fSJoseph Mingrone } 313*6f9cba8fSJoseph Mingrone } 314*6f9cba8fSJoseph Mingrone return nb_rx; 315*6f9cba8fSJoseph Mingrone } 316*6f9cba8fSJoseph Mingrone 317*6f9cba8fSJoseph Mingrone static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *cb_arg) 318*6f9cba8fSJoseph Mingrone { 319*6f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 320*6f9cba8fSJoseph Mingrone int burst_cnt = 0; 321*6f9cba8fSJoseph Mingrone int nb_rx = 0; 322*6f9cba8fSJoseph Mingrone struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 323*6f9cba8fSJoseph Mingrone struct rte_mbuf *m; 324*6f9cba8fSJoseph Mingrone struct pcap_pkthdr pcap_header; 325*6f9cba8fSJoseph Mingrone // In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment 326*6f9cba8fSJoseph Mingrone uint32_t pkt_len = 0; 327*6f9cba8fSJoseph Mingrone uint32_t caplen = 0; 328*6f9cba8fSJoseph Mingrone u_char *bp = NULL; 329*6f9cba8fSJoseph Mingrone int i=0; 330*6f9cba8fSJoseph Mingrone unsigned int gather_len =0; 331*6f9cba8fSJoseph Mingrone int pkt_cnt = 0; 332*6f9cba8fSJoseph Mingrone u_char *large_buffer=NULL; 333*6f9cba8fSJoseph Mingrone int timeout_ms = p->opt.timeout; 334*6f9cba8fSJoseph Mingrone 335*6f9cba8fSJoseph Mingrone /* 336*6f9cba8fSJoseph Mingrone * This can conceivably process more than INT_MAX packets, 337*6f9cba8fSJoseph Mingrone * which would overflow the packet count, causing it either 338*6f9cba8fSJoseph Mingrone * to look like a negative number, and thus cause us to 339*6f9cba8fSJoseph Mingrone * return a value that looks like an error, or overflow 340*6f9cba8fSJoseph Mingrone * back into positive territory, and thus cause us to 341*6f9cba8fSJoseph Mingrone * return a too-low count. 342*6f9cba8fSJoseph Mingrone * 343*6f9cba8fSJoseph Mingrone * Therefore, if the packet count is unlimited, we clip 344*6f9cba8fSJoseph Mingrone * it at INT_MAX; this routine is not expected to 345*6f9cba8fSJoseph Mingrone * process packets indefinitely, so that's not an issue. 346*6f9cba8fSJoseph Mingrone */ 347*6f9cba8fSJoseph Mingrone if (PACKET_COUNT_IS_UNLIMITED(max_cnt)) 348*6f9cba8fSJoseph Mingrone max_cnt = INT_MAX; 349*6f9cba8fSJoseph Mingrone 350*6f9cba8fSJoseph Mingrone if (max_cnt < MAX_PKT_BURST){ 351*6f9cba8fSJoseph Mingrone burst_cnt = max_cnt; 352*6f9cba8fSJoseph Mingrone }else{ 353*6f9cba8fSJoseph Mingrone burst_cnt = MAX_PKT_BURST; 354*6f9cba8fSJoseph Mingrone } 355*6f9cba8fSJoseph Mingrone 356*6f9cba8fSJoseph Mingrone while( pkt_cnt < max_cnt){ 357*6f9cba8fSJoseph Mingrone if (p->break_loop){ 358*6f9cba8fSJoseph Mingrone p->break_loop = 0; 359*6f9cba8fSJoseph Mingrone return PCAP_ERROR_BREAK; 360*6f9cba8fSJoseph Mingrone } 361*6f9cba8fSJoseph Mingrone // read once in non-blocking mode, or try many times waiting for timeout_ms. 362*6f9cba8fSJoseph Mingrone // if timeout_ms == 0, it will be blocked until one packet arrives or break_loop is set. 363*6f9cba8fSJoseph Mingrone nb_rx = dpdk_read_with_timeout(p, pkts_burst, burst_cnt); 364*6f9cba8fSJoseph Mingrone if (nb_rx == 0){ 365*6f9cba8fSJoseph Mingrone if (pd->nonblock){ 366*6f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "dpdk: no packets available in non-blocking mode.\n"); 367*6f9cba8fSJoseph Mingrone }else{ 368*6f9cba8fSJoseph Mingrone if (p->break_loop){ 369*6f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "dpdk: no packets available and break_loop is set in blocking mode.\n"); 370*6f9cba8fSJoseph Mingrone p->break_loop = 0; 371*6f9cba8fSJoseph Mingrone return PCAP_ERROR_BREAK; 372*6f9cba8fSJoseph Mingrone 373*6f9cba8fSJoseph Mingrone } 374*6f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "dpdk: no packets available for timeout %d ms in blocking mode.\n", timeout_ms); 375*6f9cba8fSJoseph Mingrone } 376*6f9cba8fSJoseph Mingrone // break if dpdk reads 0 packet, no matter in blocking(timeout) or non-blocking mode. 377*6f9cba8fSJoseph Mingrone break; 378*6f9cba8fSJoseph Mingrone } 379*6f9cba8fSJoseph Mingrone pkt_cnt += nb_rx; 380*6f9cba8fSJoseph Mingrone for ( i = 0; i < nb_rx; i++) { 381*6f9cba8fSJoseph Mingrone m = pkts_burst[i]; 382*6f9cba8fSJoseph Mingrone calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts)); 383*6f9cba8fSJoseph Mingrone pkt_len = rte_pktmbuf_pkt_len(m); 384*6f9cba8fSJoseph Mingrone // caplen = min(pkt_len, p->snapshot); 385*6f9cba8fSJoseph Mingrone // caplen will not be changed, no matter how long the rte_pktmbuf 386*6f9cba8fSJoseph Mingrone caplen = pkt_len < (uint32_t)p->snapshot ? pkt_len: (uint32_t)p->snapshot; 387*6f9cba8fSJoseph Mingrone pcap_header.caplen = caplen; 388*6f9cba8fSJoseph Mingrone pcap_header.len = pkt_len; 389*6f9cba8fSJoseph Mingrone // volatile prefetch 390*6f9cba8fSJoseph Mingrone rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 391*6f9cba8fSJoseph Mingrone bp = NULL; 392*6f9cba8fSJoseph Mingrone if (m->nb_segs == 1) 393*6f9cba8fSJoseph Mingrone { 394*6f9cba8fSJoseph Mingrone bp = rte_pktmbuf_mtod(m, u_char *); 395*6f9cba8fSJoseph Mingrone }else{ 396*6f9cba8fSJoseph Mingrone // use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free 397*6f9cba8fSJoseph Mingrone if ( pkt_len <= RTE_ETH_PCAP_SNAPLEN) 398*6f9cba8fSJoseph Mingrone { 399*6f9cba8fSJoseph Mingrone gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m); 400*6f9cba8fSJoseph Mingrone bp = pd->pcap_tmp_buf; 401*6f9cba8fSJoseph Mingrone }else{ 402*6f9cba8fSJoseph Mingrone // need call free later 403*6f9cba8fSJoseph Mingrone large_buffer = (u_char *)malloc(caplen*sizeof(u_char)); 404*6f9cba8fSJoseph Mingrone gather_len = dpdk_gather_data(large_buffer, caplen, m); 405*6f9cba8fSJoseph Mingrone bp = large_buffer; 406*6f9cba8fSJoseph Mingrone } 407*6f9cba8fSJoseph Mingrone 408*6f9cba8fSJoseph Mingrone } 409*6f9cba8fSJoseph Mingrone if (bp){ 410*6f9cba8fSJoseph Mingrone if (p->fcode.bf_insns==NULL || pcap_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){ 411*6f9cba8fSJoseph Mingrone cb(cb_arg, &pcap_header, bp); 412*6f9cba8fSJoseph Mingrone }else{ 413*6f9cba8fSJoseph Mingrone pd->bpf_drop++; 414*6f9cba8fSJoseph Mingrone } 415*6f9cba8fSJoseph Mingrone } 416*6f9cba8fSJoseph Mingrone //free all pktmbuf 417*6f9cba8fSJoseph Mingrone rte_pktmbuf_free(m); 418*6f9cba8fSJoseph Mingrone if (large_buffer){ 419*6f9cba8fSJoseph Mingrone free(large_buffer); 420*6f9cba8fSJoseph Mingrone large_buffer=NULL; 421*6f9cba8fSJoseph Mingrone } 422*6f9cba8fSJoseph Mingrone } 423*6f9cba8fSJoseph Mingrone } 424*6f9cba8fSJoseph Mingrone return pkt_cnt; 425*6f9cba8fSJoseph Mingrone } 426*6f9cba8fSJoseph Mingrone 427*6f9cba8fSJoseph Mingrone static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_) 428*6f9cba8fSJoseph Mingrone { 429*6f9cba8fSJoseph Mingrone //not implemented yet 430*6f9cba8fSJoseph Mingrone pcap_strlcpy(p->errbuf, 431*6f9cba8fSJoseph Mingrone "dpdk error: Inject function has not been implemented yet", 432*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE); 433*6f9cba8fSJoseph Mingrone return PCAP_ERROR; 434*6f9cba8fSJoseph Mingrone } 435*6f9cba8fSJoseph Mingrone 436*6f9cba8fSJoseph Mingrone static void pcap_dpdk_close(pcap_t *p) 437*6f9cba8fSJoseph Mingrone { 438*6f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = p->priv; 439*6f9cba8fSJoseph Mingrone if (pd==NULL) 440*6f9cba8fSJoseph Mingrone { 441*6f9cba8fSJoseph Mingrone return; 442*6f9cba8fSJoseph Mingrone } 443*6f9cba8fSJoseph Mingrone if (pd->must_clear_promisc) 444*6f9cba8fSJoseph Mingrone { 445*6f9cba8fSJoseph Mingrone rte_eth_promiscuous_disable(pd->portid); 446*6f9cba8fSJoseph Mingrone } 447*6f9cba8fSJoseph Mingrone rte_eth_dev_stop(pd->portid); 448*6f9cba8fSJoseph Mingrone rte_eth_dev_close(pd->portid); 449*6f9cba8fSJoseph Mingrone pcap_cleanup_live_common(p); 450*6f9cba8fSJoseph Mingrone } 451*6f9cba8fSJoseph Mingrone 452*6f9cba8fSJoseph Mingrone static void nic_stats_display(struct pcap_dpdk *pd) 453*6f9cba8fSJoseph Mingrone { 454*6f9cba8fSJoseph Mingrone uint16_t portid = pd->portid; 455*6f9cba8fSJoseph Mingrone struct rte_eth_stats stats; 456*6f9cba8fSJoseph Mingrone rte_eth_stats_get(portid, &stats); 457*6f9cba8fSJoseph Mingrone RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64" RX-errors: %-10"PRIu64 458*6f9cba8fSJoseph Mingrone " RX-bytes: %-10"PRIu64" RX-Imissed: %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors, 459*6f9cba8fSJoseph Mingrone stats.ibytes,stats.imissed); 460*6f9cba8fSJoseph Mingrone RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f ); 461*6f9cba8fSJoseph Mingrone } 462*6f9cba8fSJoseph Mingrone 463*6f9cba8fSJoseph Mingrone static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps) 464*6f9cba8fSJoseph Mingrone { 465*6f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = p->priv; 466*6f9cba8fSJoseph Mingrone calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts)); 467*6f9cba8fSJoseph Mingrone rte_eth_stats_get(pd->portid,&(pd->curr_stats)); 468*6f9cba8fSJoseph Mingrone if (ps){ 469*6f9cba8fSJoseph Mingrone ps->ps_recv = pd->curr_stats.ipackets; 470*6f9cba8fSJoseph Mingrone ps->ps_drop = pd->curr_stats.ierrors; 471*6f9cba8fSJoseph Mingrone ps->ps_drop += pd->bpf_drop; 472*6f9cba8fSJoseph Mingrone ps->ps_ifdrop = pd->curr_stats.imissed; 473*6f9cba8fSJoseph Mingrone } 474*6f9cba8fSJoseph Mingrone uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets; 475*6f9cba8fSJoseph Mingrone struct timeval delta_tm; 476*6f9cba8fSJoseph Mingrone timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm); 477*6f9cba8fSJoseph Mingrone uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec; 478*6f9cba8fSJoseph Mingrone uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8; 479*6f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit); 480*6f9cba8fSJoseph Mingrone pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec); 481*6f9cba8fSJoseph Mingrone pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec); 482*6f9cba8fSJoseph Mingrone nic_stats_display(pd); 483*6f9cba8fSJoseph Mingrone pd->prev_stats = pd->curr_stats; 484*6f9cba8fSJoseph Mingrone pd->prev_ts = pd->curr_ts; 485*6f9cba8fSJoseph Mingrone return 0; 486*6f9cba8fSJoseph Mingrone } 487*6f9cba8fSJoseph Mingrone 488*6f9cba8fSJoseph Mingrone static int pcap_dpdk_setnonblock(pcap_t *p, int nonblock){ 489*6f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 490*6f9cba8fSJoseph Mingrone pd->nonblock = nonblock; 491*6f9cba8fSJoseph Mingrone return 0; 492*6f9cba8fSJoseph Mingrone } 493*6f9cba8fSJoseph Mingrone 494*6f9cba8fSJoseph Mingrone static int pcap_dpdk_getnonblock(pcap_t *p){ 495*6f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 496*6f9cba8fSJoseph Mingrone return pd->nonblock; 497*6f9cba8fSJoseph Mingrone } 498*6f9cba8fSJoseph Mingrone static int check_link_status(uint16_t portid, struct rte_eth_link *plink) 499*6f9cba8fSJoseph Mingrone { 500*6f9cba8fSJoseph Mingrone // wait up to 9 seconds to get link status 501*6f9cba8fSJoseph Mingrone rte_eth_link_get(portid, plink); 502*6f9cba8fSJoseph Mingrone return plink->link_status == ETH_LINK_UP; 503*6f9cba8fSJoseph Mingrone } 504*6f9cba8fSJoseph Mingrone static void eth_addr_str(ETHER_ADDR_TYPE *addrp, char* mac_str, int len) 505*6f9cba8fSJoseph Mingrone { 506*6f9cba8fSJoseph Mingrone int offset=0; 507*6f9cba8fSJoseph Mingrone if (addrp == NULL){ 508*6f9cba8fSJoseph Mingrone snprintf(mac_str, len-1, DPDK_DEF_MAC_ADDR); 509*6f9cba8fSJoseph Mingrone return; 510*6f9cba8fSJoseph Mingrone } 511*6f9cba8fSJoseph Mingrone for (int i=0; i<6; i++) 512*6f9cba8fSJoseph Mingrone { 513*6f9cba8fSJoseph Mingrone if (offset >= len) 514*6f9cba8fSJoseph Mingrone { // buffer overflow 515*6f9cba8fSJoseph Mingrone return; 516*6f9cba8fSJoseph Mingrone } 517*6f9cba8fSJoseph Mingrone if (i==0) 518*6f9cba8fSJoseph Mingrone { 519*6f9cba8fSJoseph Mingrone snprintf(mac_str+offset, len-1-offset, "%02X",addrp->addr_bytes[i]); 520*6f9cba8fSJoseph Mingrone offset+=2; // FF 521*6f9cba8fSJoseph Mingrone }else{ 522*6f9cba8fSJoseph Mingrone snprintf(mac_str+offset, len-1-offset, ":%02X", addrp->addr_bytes[i]); 523*6f9cba8fSJoseph Mingrone offset+=3; // :FF 524*6f9cba8fSJoseph Mingrone } 525*6f9cba8fSJoseph Mingrone } 526*6f9cba8fSJoseph Mingrone return; 527*6f9cba8fSJoseph Mingrone } 528*6f9cba8fSJoseph Mingrone // return portid by device name, otherwise return -1 529*6f9cba8fSJoseph Mingrone static uint16_t portid_by_device(char * device) 530*6f9cba8fSJoseph Mingrone { 531*6f9cba8fSJoseph Mingrone uint16_t ret = DPDK_PORTID_MAX; 532*6f9cba8fSJoseph Mingrone int len = strlen(device); 533*6f9cba8fSJoseph Mingrone int prefix_len = strlen(DPDK_PREFIX); 534*6f9cba8fSJoseph Mingrone unsigned long ret_ul = 0L; 535*6f9cba8fSJoseph Mingrone char *pEnd; 536*6f9cba8fSJoseph Mingrone if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk: 537*6f9cba8fSJoseph Mingrone { 538*6f9cba8fSJoseph Mingrone return ret; 539*6f9cba8fSJoseph Mingrone } 540*6f9cba8fSJoseph Mingrone //check all chars are digital 541*6f9cba8fSJoseph Mingrone for (int i=prefix_len; device[i]; i++){ 542*6f9cba8fSJoseph Mingrone if (device[i]<'0' || device[i]>'9'){ 543*6f9cba8fSJoseph Mingrone return ret; 544*6f9cba8fSJoseph Mingrone } 545*6f9cba8fSJoseph Mingrone } 546*6f9cba8fSJoseph Mingrone ret_ul = strtoul(&(device[prefix_len]), &pEnd, 10); 547*6f9cba8fSJoseph Mingrone if (pEnd == &(device[prefix_len]) || *pEnd != '\0'){ 548*6f9cba8fSJoseph Mingrone return ret; 549*6f9cba8fSJoseph Mingrone } 550*6f9cba8fSJoseph Mingrone // too large for portid 551*6f9cba8fSJoseph Mingrone if (ret_ul >= DPDK_PORTID_MAX){ 552*6f9cba8fSJoseph Mingrone return ret; 553*6f9cba8fSJoseph Mingrone } 554*6f9cba8fSJoseph Mingrone ret = (uint16_t)ret_ul; 555*6f9cba8fSJoseph Mingrone return ret; 556*6f9cba8fSJoseph Mingrone } 557*6f9cba8fSJoseph Mingrone 558*6f9cba8fSJoseph Mingrone static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv) 559*6f9cba8fSJoseph Mingrone { 560*6f9cba8fSJoseph Mingrone int cnt=0; 561*6f9cba8fSJoseph Mingrone memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX); 562*6f9cba8fSJoseph Mingrone //current process name 563*6f9cba8fSJoseph Mingrone int skip_space = 1; 564*6f9cba8fSJoseph Mingrone int i=0; 565*6f9cba8fSJoseph Mingrone RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg); 566*6f9cba8fSJoseph Mingrone // find first non space char 567*6f9cba8fSJoseph Mingrone // The last opt is NULL 568*6f9cba8fSJoseph Mingrone for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){ 569*6f9cba8fSJoseph Mingrone if (skip_space && dpdk_cfg[i]!=' '){ // not space 570*6f9cba8fSJoseph Mingrone skip_space=!skip_space; // skip normal char 571*6f9cba8fSJoseph Mingrone dargv[cnt++] = dpdk_cfg+i; 572*6f9cba8fSJoseph Mingrone } 573*6f9cba8fSJoseph Mingrone if (!skip_space && dpdk_cfg[i]==' '){ // fint a space 574*6f9cba8fSJoseph Mingrone dpdk_cfg[i]=0x00; // end of this opt 575*6f9cba8fSJoseph Mingrone skip_space=!skip_space; // skip space char 576*6f9cba8fSJoseph Mingrone } 577*6f9cba8fSJoseph Mingrone } 578*6f9cba8fSJoseph Mingrone dargv[cnt]=NULL; 579*6f9cba8fSJoseph Mingrone return cnt; 580*6f9cba8fSJoseph Mingrone } 581*6f9cba8fSJoseph Mingrone 582*6f9cba8fSJoseph Mingrone // only called once 583*6f9cba8fSJoseph Mingrone // Returns: 584*6f9cba8fSJoseph Mingrone // 585*6f9cba8fSJoseph Mingrone // 1 on success; 586*6f9cba8fSJoseph Mingrone // 587*6f9cba8fSJoseph Mingrone // 0 if "the EAL cannot initialize on this system", which we treat as 588*6f9cba8fSJoseph Mingrone // meaning "DPDK isn't available"; 589*6f9cba8fSJoseph Mingrone // 590*6f9cba8fSJoseph Mingrone // a PCAP_ERROR_ code for other errors. 591*6f9cba8fSJoseph Mingrone // 592*6f9cba8fSJoseph Mingrone // If eaccess_not_fatal is non-zero, treat "a permissions issue" the way 593*6f9cba8fSJoseph Mingrone // we treat "the EAL cannot initialize on this system". We use that 594*6f9cba8fSJoseph Mingrone // when trying to find DPDK devices, as we don't want to fail to return 595*6f9cba8fSJoseph Mingrone // *any* devices just because we can't support DPDK; when we're trying 596*6f9cba8fSJoseph Mingrone // to open a device, we need to return a permissions error in that case. 597*6f9cba8fSJoseph Mingrone static int dpdk_pre_init(char * ebuf, int eaccess_not_fatal) 598*6f9cba8fSJoseph Mingrone { 599*6f9cba8fSJoseph Mingrone int dargv_cnt=0; 600*6f9cba8fSJoseph Mingrone char *dargv[DPDK_ARGC_MAX]; 601*6f9cba8fSJoseph Mingrone char *ptr_dpdk_cfg = NULL; 602*6f9cba8fSJoseph Mingrone int ret; 603*6f9cba8fSJoseph Mingrone // globale var 604*6f9cba8fSJoseph Mingrone if (is_dpdk_pre_inited != 0) 605*6f9cba8fSJoseph Mingrone { 606*6f9cba8fSJoseph Mingrone // already inited; did that succeed? 607*6f9cba8fSJoseph Mingrone if (is_dpdk_pre_inited < 0) 608*6f9cba8fSJoseph Mingrone { 609*6f9cba8fSJoseph Mingrone // failed 610*6f9cba8fSJoseph Mingrone goto error; 611*6f9cba8fSJoseph Mingrone } 612*6f9cba8fSJoseph Mingrone else 613*6f9cba8fSJoseph Mingrone { 614*6f9cba8fSJoseph Mingrone // succeeded 615*6f9cba8fSJoseph Mingrone return 1; 616*6f9cba8fSJoseph Mingrone } 617*6f9cba8fSJoseph Mingrone } 618*6f9cba8fSJoseph Mingrone // init EAL 619*6f9cba8fSJoseph Mingrone ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME); 620*6f9cba8fSJoseph Mingrone // set default log level to debug 621*6f9cba8fSJoseph Mingrone rte_log_set_global_level(DPDK_DEF_LOG_LEV); 622*6f9cba8fSJoseph Mingrone if (ptr_dpdk_cfg == NULL) 623*6f9cba8fSJoseph Mingrone { 624*6f9cba8fSJoseph Mingrone RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG); 625*6f9cba8fSJoseph Mingrone ptr_dpdk_cfg = DPDK_DEF_CFG; 626*6f9cba8fSJoseph Mingrone } 627*6f9cba8fSJoseph Mingrone memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf)); 628*6f9cba8fSJoseph Mingrone snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg); 629*6f9cba8fSJoseph Mingrone dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv); 630*6f9cba8fSJoseph Mingrone ret = rte_eal_init(dargv_cnt,dargv); 631*6f9cba8fSJoseph Mingrone if (ret == -1) 632*6f9cba8fSJoseph Mingrone { 633*6f9cba8fSJoseph Mingrone // Indicate that we've called rte_eal_init() by setting 634*6f9cba8fSJoseph Mingrone // is_dpdk_pre_inited to the negative of the error code, 635*6f9cba8fSJoseph Mingrone // and process the error. 636*6f9cba8fSJoseph Mingrone is_dpdk_pre_inited = -rte_errno; 637*6f9cba8fSJoseph Mingrone goto error; 638*6f9cba8fSJoseph Mingrone } 639*6f9cba8fSJoseph Mingrone // init succeeded, so we do not need to do it again later. 640*6f9cba8fSJoseph Mingrone is_dpdk_pre_inited = 1; 641*6f9cba8fSJoseph Mingrone return 1; 642*6f9cba8fSJoseph Mingrone 643*6f9cba8fSJoseph Mingrone error: 644*6f9cba8fSJoseph Mingrone switch (-is_dpdk_pre_inited) 645*6f9cba8fSJoseph Mingrone { 646*6f9cba8fSJoseph Mingrone case EACCES: 647*6f9cba8fSJoseph Mingrone // This "indicates a permissions issue.". 648*6f9cba8fSJoseph Mingrone RTE_LOG(ERR, USER1, "%s\n", DPDK_ERR_PERM_MSG); 649*6f9cba8fSJoseph Mingrone // If we were told to treat this as just meaning 650*6f9cba8fSJoseph Mingrone // DPDK isn't available, do so. 651*6f9cba8fSJoseph Mingrone if (eaccess_not_fatal) 652*6f9cba8fSJoseph Mingrone return 0; 653*6f9cba8fSJoseph Mingrone // Otherwise report a fatal error. 654*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 655*6f9cba8fSJoseph Mingrone "DPDK requires that it run as root"); 656*6f9cba8fSJoseph Mingrone return PCAP_ERROR_PERM_DENIED; 657*6f9cba8fSJoseph Mingrone 658*6f9cba8fSJoseph Mingrone case EAGAIN: 659*6f9cba8fSJoseph Mingrone // This "indicates either a bus or system 660*6f9cba8fSJoseph Mingrone // resource was not available, setup may 661*6f9cba8fSJoseph Mingrone // be attempted again." 662*6f9cba8fSJoseph Mingrone // There's no such error in pcap, so I'm 663*6f9cba8fSJoseph Mingrone // not sure what we should do here. 664*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 665*6f9cba8fSJoseph Mingrone "Bus or system resource was not available"); 666*6f9cba8fSJoseph Mingrone break; 667*6f9cba8fSJoseph Mingrone 668*6f9cba8fSJoseph Mingrone case EALREADY: 669*6f9cba8fSJoseph Mingrone // This "indicates that the rte_eal_init 670*6f9cba8fSJoseph Mingrone // function has already been called, and 671*6f9cba8fSJoseph Mingrone // cannot be called again." 672*6f9cba8fSJoseph Mingrone // That's not an error; set the "we've 673*6f9cba8fSJoseph Mingrone // been here before" flag and return 674*6f9cba8fSJoseph Mingrone // success. 675*6f9cba8fSJoseph Mingrone is_dpdk_pre_inited = 1; 676*6f9cba8fSJoseph Mingrone return 1; 677*6f9cba8fSJoseph Mingrone 678*6f9cba8fSJoseph Mingrone case EFAULT: 679*6f9cba8fSJoseph Mingrone // This "indicates the tailq configuration 680*6f9cba8fSJoseph Mingrone // name was not found in memory configuration." 681*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 682*6f9cba8fSJoseph Mingrone "The tailq configuration name was not found in the memory configuration"); 683*6f9cba8fSJoseph Mingrone return PCAP_ERROR; 684*6f9cba8fSJoseph Mingrone 685*6f9cba8fSJoseph Mingrone case EINVAL: 686*6f9cba8fSJoseph Mingrone // This "indicates invalid parameters were 687*6f9cba8fSJoseph Mingrone // passed as argv/argc." Those came from 688*6f9cba8fSJoseph Mingrone // the configuration file. 689*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 690*6f9cba8fSJoseph Mingrone "The configuration file has invalid parameters"); 691*6f9cba8fSJoseph Mingrone break; 692*6f9cba8fSJoseph Mingrone 693*6f9cba8fSJoseph Mingrone case ENOMEM: 694*6f9cba8fSJoseph Mingrone // This "indicates failure likely caused by 695*6f9cba8fSJoseph Mingrone // an out-of-memory condition." 696*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 697*6f9cba8fSJoseph Mingrone "Out of memory"); 698*6f9cba8fSJoseph Mingrone break; 699*6f9cba8fSJoseph Mingrone 700*6f9cba8fSJoseph Mingrone case ENODEV: 701*6f9cba8fSJoseph Mingrone // This "indicates memory setup issues." 702*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 703*6f9cba8fSJoseph Mingrone "An error occurred setting up memory"); 704*6f9cba8fSJoseph Mingrone break; 705*6f9cba8fSJoseph Mingrone 706*6f9cba8fSJoseph Mingrone case ENOTSUP: 707*6f9cba8fSJoseph Mingrone // This "indicates that the EAL cannot 708*6f9cba8fSJoseph Mingrone // initialize on this system." We treat 709*6f9cba8fSJoseph Mingrone // that as meaning DPDK isn't available 710*6f9cba8fSJoseph Mingrone // on this machine, rather than as a 711*6f9cba8fSJoseph Mingrone // fatal error, and let our caller decide 712*6f9cba8fSJoseph Mingrone // whether that's a fatal error (if trying 713*6f9cba8fSJoseph Mingrone // to activate a DPDK device) or not (if 714*6f9cba8fSJoseph Mingrone // trying to enumerate devices). 715*6f9cba8fSJoseph Mingrone return 0; 716*6f9cba8fSJoseph Mingrone 717*6f9cba8fSJoseph Mingrone case EPROTO: 718*6f9cba8fSJoseph Mingrone // This "indicates that the PCI bus is 719*6f9cba8fSJoseph Mingrone // either not present, or is not readable 720*6f9cba8fSJoseph Mingrone // by the eal." Does "the PCI bus is not 721*6f9cba8fSJoseph Mingrone // present" mean "this machine has no PCI 722*6f9cba8fSJoseph Mingrone // bus", which strikes me as a "not available" 723*6f9cba8fSJoseph Mingrone // case? If so, should "is not readable by 724*6f9cba8fSJoseph Mingrone // the EAL" also something we should treat 725*6f9cba8fSJoseph Mingrone // as a "not available" case? If not, we 726*6f9cba8fSJoseph Mingrone // can't distinguish between the two, so 727*6f9cba8fSJoseph Mingrone // we're stuck. 728*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 729*6f9cba8fSJoseph Mingrone "PCI bus is not present or not readable by the EAL"); 730*6f9cba8fSJoseph Mingrone break; 731*6f9cba8fSJoseph Mingrone 732*6f9cba8fSJoseph Mingrone case ENOEXEC: 733*6f9cba8fSJoseph Mingrone // This "indicates that a service core 734*6f9cba8fSJoseph Mingrone // failed to launch successfully." 735*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 736*6f9cba8fSJoseph Mingrone "A service core failed to launch successfully"); 737*6f9cba8fSJoseph Mingrone break; 738*6f9cba8fSJoseph Mingrone 739*6f9cba8fSJoseph Mingrone default: 740*6f9cba8fSJoseph Mingrone // 741*6f9cba8fSJoseph Mingrone // That's not in the list of errors in 742*6f9cba8fSJoseph Mingrone // the documentation; let it be reported 743*6f9cba8fSJoseph Mingrone // as an error. 744*6f9cba8fSJoseph Mingrone // 745*6f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(ebuf, 746*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -is_dpdk_pre_inited, 747*6f9cba8fSJoseph Mingrone "dpdk error: dpdk_pre_init failed"); 748*6f9cba8fSJoseph Mingrone break; 749*6f9cba8fSJoseph Mingrone } 750*6f9cba8fSJoseph Mingrone // Error. 751*6f9cba8fSJoseph Mingrone return PCAP_ERROR; 752*6f9cba8fSJoseph Mingrone } 753*6f9cba8fSJoseph Mingrone 754*6f9cba8fSJoseph Mingrone static int pcap_dpdk_activate(pcap_t *p) 755*6f9cba8fSJoseph Mingrone { 756*6f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = p->priv; 757*6f9cba8fSJoseph Mingrone pd->orig = p; 758*6f9cba8fSJoseph Mingrone int ret = PCAP_ERROR; 759*6f9cba8fSJoseph Mingrone uint16_t nb_ports=0; 760*6f9cba8fSJoseph Mingrone uint16_t portid= DPDK_PORTID_MAX; 761*6f9cba8fSJoseph Mingrone unsigned nb_mbufs = DPDK_NB_MBUFS; 762*6f9cba8fSJoseph Mingrone struct rte_eth_rxconf rxq_conf; 763*6f9cba8fSJoseph Mingrone struct rte_eth_txconf txq_conf; 764*6f9cba8fSJoseph Mingrone struct rte_eth_conf local_port_conf = port_conf; 765*6f9cba8fSJoseph Mingrone struct rte_eth_dev_info dev_info; 766*6f9cba8fSJoseph Mingrone int is_port_up = 0; 767*6f9cba8fSJoseph Mingrone struct rte_eth_link link; 768*6f9cba8fSJoseph Mingrone do{ 769*6f9cba8fSJoseph Mingrone //init EAL; fail if we have insufficient permission 770*6f9cba8fSJoseph Mingrone char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE]; 771*6f9cba8fSJoseph Mingrone ret = dpdk_pre_init(dpdk_pre_init_errbuf, 0); 772*6f9cba8fSJoseph Mingrone if (ret < 0) 773*6f9cba8fSJoseph Mingrone { 774*6f9cba8fSJoseph Mingrone // This returns a negative value on an error. 775*6f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 776*6f9cba8fSJoseph Mingrone "Can't open device %s: %s", 777*6f9cba8fSJoseph Mingrone p->opt.device, dpdk_pre_init_errbuf); 778*6f9cba8fSJoseph Mingrone // ret is set to the correct error 779*6f9cba8fSJoseph Mingrone break; 780*6f9cba8fSJoseph Mingrone } 781*6f9cba8fSJoseph Mingrone if (ret == 0) 782*6f9cba8fSJoseph Mingrone { 783*6f9cba8fSJoseph Mingrone // This means DPDK isn't available on this machine. 784*6f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 785*6f9cba8fSJoseph Mingrone "Can't open device %s: DPDK is not available on this machine", 786*6f9cba8fSJoseph Mingrone p->opt.device); 787*6f9cba8fSJoseph Mingrone return PCAP_ERROR_NO_SUCH_DEVICE; 788*6f9cba8fSJoseph Mingrone } 789*6f9cba8fSJoseph Mingrone 790*6f9cba8fSJoseph Mingrone ret = dpdk_init_timer(pd); 791*6f9cba8fSJoseph Mingrone if (ret<0) 792*6f9cba8fSJoseph Mingrone { 793*6f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 794*6f9cba8fSJoseph Mingrone "dpdk error: Init timer is zero with device %s", 795*6f9cba8fSJoseph Mingrone p->opt.device); 796*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 797*6f9cba8fSJoseph Mingrone break; 798*6f9cba8fSJoseph Mingrone } 799*6f9cba8fSJoseph Mingrone 800*6f9cba8fSJoseph Mingrone nb_ports = rte_eth_dev_count_avail(); 801*6f9cba8fSJoseph Mingrone if (nb_ports == 0) 802*6f9cba8fSJoseph Mingrone { 803*6f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 804*6f9cba8fSJoseph Mingrone "dpdk error: No Ethernet ports"); 805*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 806*6f9cba8fSJoseph Mingrone break; 807*6f9cba8fSJoseph Mingrone } 808*6f9cba8fSJoseph Mingrone 809*6f9cba8fSJoseph Mingrone portid = portid_by_device(p->opt.device); 810*6f9cba8fSJoseph Mingrone if (portid == DPDK_PORTID_MAX){ 811*6f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 812*6f9cba8fSJoseph Mingrone "dpdk error: portid is invalid. device %s", 813*6f9cba8fSJoseph Mingrone p->opt.device); 814*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR_NO_SUCH_DEVICE; 815*6f9cba8fSJoseph Mingrone break; 816*6f9cba8fSJoseph Mingrone } 817*6f9cba8fSJoseph Mingrone 818*6f9cba8fSJoseph Mingrone pd->portid = portid; 819*6f9cba8fSJoseph Mingrone 820*6f9cba8fSJoseph Mingrone if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN) 821*6f9cba8fSJoseph Mingrone { 822*6f9cba8fSJoseph Mingrone p->snapshot = MAXIMUM_SNAPLEN; 823*6f9cba8fSJoseph Mingrone } 824*6f9cba8fSJoseph Mingrone // create the mbuf pool 825*6f9cba8fSJoseph Mingrone pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs, 826*6f9cba8fSJoseph Mingrone MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, 827*6f9cba8fSJoseph Mingrone rte_socket_id()); 828*6f9cba8fSJoseph Mingrone if (pd->pktmbuf_pool == NULL) 829*6f9cba8fSJoseph Mingrone { 830*6f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 831*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, rte_errno, 832*6f9cba8fSJoseph Mingrone "dpdk error: Cannot init mbuf pool"); 833*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 834*6f9cba8fSJoseph Mingrone break; 835*6f9cba8fSJoseph Mingrone } 836*6f9cba8fSJoseph Mingrone // config dev 837*6f9cba8fSJoseph Mingrone rte_eth_dev_info_get(portid, &dev_info); 838*6f9cba8fSJoseph Mingrone if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 839*6f9cba8fSJoseph Mingrone { 840*6f9cba8fSJoseph Mingrone local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE; 841*6f9cba8fSJoseph Mingrone } 842*6f9cba8fSJoseph Mingrone // only support 1 queue 843*6f9cba8fSJoseph Mingrone ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf); 844*6f9cba8fSJoseph Mingrone if (ret < 0) 845*6f9cba8fSJoseph Mingrone { 846*6f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 847*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 848*6f9cba8fSJoseph Mingrone "dpdk error: Cannot configure device: port=%u", 849*6f9cba8fSJoseph Mingrone portid); 850*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 851*6f9cba8fSJoseph Mingrone break; 852*6f9cba8fSJoseph Mingrone } 853*6f9cba8fSJoseph Mingrone // adjust rx tx 854*6f9cba8fSJoseph Mingrone ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd); 855*6f9cba8fSJoseph Mingrone if (ret < 0) 856*6f9cba8fSJoseph Mingrone { 857*6f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 858*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 859*6f9cba8fSJoseph Mingrone "dpdk error: Cannot adjust number of descriptors: port=%u", 860*6f9cba8fSJoseph Mingrone portid); 861*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 862*6f9cba8fSJoseph Mingrone break; 863*6f9cba8fSJoseph Mingrone } 864*6f9cba8fSJoseph Mingrone // get MAC addr 865*6f9cba8fSJoseph Mingrone rte_eth_macaddr_get(portid, &(pd->eth_addr)); 866*6f9cba8fSJoseph Mingrone eth_addr_str(&(pd->eth_addr), pd->mac_addr, DPDK_MAC_ADDR_SIZE-1); 867*6f9cba8fSJoseph Mingrone 868*6f9cba8fSJoseph Mingrone // init one RX queue 869*6f9cba8fSJoseph Mingrone rxq_conf = dev_info.default_rxconf; 870*6f9cba8fSJoseph Mingrone rxq_conf.offloads = local_port_conf.rxmode.offloads; 871*6f9cba8fSJoseph Mingrone ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, 872*6f9cba8fSJoseph Mingrone rte_eth_dev_socket_id(portid), 873*6f9cba8fSJoseph Mingrone &rxq_conf, 874*6f9cba8fSJoseph Mingrone pd->pktmbuf_pool); 875*6f9cba8fSJoseph Mingrone if (ret < 0) 876*6f9cba8fSJoseph Mingrone { 877*6f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 878*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 879*6f9cba8fSJoseph Mingrone "dpdk error: rte_eth_rx_queue_setup:port=%u", 880*6f9cba8fSJoseph Mingrone portid); 881*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 882*6f9cba8fSJoseph Mingrone break; 883*6f9cba8fSJoseph Mingrone } 884*6f9cba8fSJoseph Mingrone 885*6f9cba8fSJoseph Mingrone // init one TX queue 886*6f9cba8fSJoseph Mingrone txq_conf = dev_info.default_txconf; 887*6f9cba8fSJoseph Mingrone txq_conf.offloads = local_port_conf.txmode.offloads; 888*6f9cba8fSJoseph Mingrone ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, 889*6f9cba8fSJoseph Mingrone rte_eth_dev_socket_id(portid), 890*6f9cba8fSJoseph Mingrone &txq_conf); 891*6f9cba8fSJoseph Mingrone if (ret < 0) 892*6f9cba8fSJoseph Mingrone { 893*6f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 894*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 895*6f9cba8fSJoseph Mingrone "dpdk error: rte_eth_tx_queue_setup:port=%u", 896*6f9cba8fSJoseph Mingrone portid); 897*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 898*6f9cba8fSJoseph Mingrone break; 899*6f9cba8fSJoseph Mingrone } 900*6f9cba8fSJoseph Mingrone // Initialize TX buffers 901*6f9cba8fSJoseph Mingrone tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME, 902*6f9cba8fSJoseph Mingrone RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 903*6f9cba8fSJoseph Mingrone rte_eth_dev_socket_id(portid)); 904*6f9cba8fSJoseph Mingrone if (tx_buffer == NULL) 905*6f9cba8fSJoseph Mingrone { 906*6f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 907*6f9cba8fSJoseph Mingrone "dpdk error: Cannot allocate buffer for tx on port %u", portid); 908*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 909*6f9cba8fSJoseph Mingrone break; 910*6f9cba8fSJoseph Mingrone } 911*6f9cba8fSJoseph Mingrone rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST); 912*6f9cba8fSJoseph Mingrone // Start device 913*6f9cba8fSJoseph Mingrone ret = rte_eth_dev_start(portid); 914*6f9cba8fSJoseph Mingrone if (ret < 0) 915*6f9cba8fSJoseph Mingrone { 916*6f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 917*6f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 918*6f9cba8fSJoseph Mingrone "dpdk error: rte_eth_dev_start:port=%u", 919*6f9cba8fSJoseph Mingrone portid); 920*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 921*6f9cba8fSJoseph Mingrone break; 922*6f9cba8fSJoseph Mingrone } 923*6f9cba8fSJoseph Mingrone // set promiscuous mode 924*6f9cba8fSJoseph Mingrone if (p->opt.promisc){ 925*6f9cba8fSJoseph Mingrone pd->must_clear_promisc=1; 926*6f9cba8fSJoseph Mingrone rte_eth_promiscuous_enable(portid); 927*6f9cba8fSJoseph Mingrone } 928*6f9cba8fSJoseph Mingrone // check link status 929*6f9cba8fSJoseph Mingrone is_port_up = check_link_status(portid, &link); 930*6f9cba8fSJoseph Mingrone if (!is_port_up){ 931*6f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 932*6f9cba8fSJoseph Mingrone "dpdk error: link is down, port=%u",portid); 933*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR_IFACE_NOT_UP; 934*6f9cba8fSJoseph Mingrone break; 935*6f9cba8fSJoseph Mingrone } 936*6f9cba8fSJoseph Mingrone // reset statistics 937*6f9cba8fSJoseph Mingrone rte_eth_stats_reset(pd->portid); 938*6f9cba8fSJoseph Mingrone calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts)); 939*6f9cba8fSJoseph Mingrone rte_eth_stats_get(pd->portid,&(pd->prev_stats)); 940*6f9cba8fSJoseph Mingrone // format pcap_t 941*6f9cba8fSJoseph Mingrone pd->portid = portid; 942*6f9cba8fSJoseph Mingrone p->fd = pd->portid; 943*6f9cba8fSJoseph Mingrone if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN) 944*6f9cba8fSJoseph Mingrone { 945*6f9cba8fSJoseph Mingrone p->snapshot = MAXIMUM_SNAPLEN; 946*6f9cba8fSJoseph Mingrone } 947*6f9cba8fSJoseph Mingrone p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical. 948*6f9cba8fSJoseph Mingrone p->selectable_fd = p->fd; 949*6f9cba8fSJoseph Mingrone p->read_op = pcap_dpdk_dispatch; 950*6f9cba8fSJoseph Mingrone p->inject_op = pcap_dpdk_inject; 951*6f9cba8fSJoseph Mingrone // using pcap_filter currently, though DPDK provides their own BPF function. Because DPDK BPF needs load a ELF file as a filter. 952*6f9cba8fSJoseph Mingrone p->setfilter_op = install_bpf_program; 953*6f9cba8fSJoseph Mingrone p->setdirection_op = NULL; 954*6f9cba8fSJoseph Mingrone p->set_datalink_op = NULL; 955*6f9cba8fSJoseph Mingrone p->getnonblock_op = pcap_dpdk_getnonblock; 956*6f9cba8fSJoseph Mingrone p->setnonblock_op = pcap_dpdk_setnonblock; 957*6f9cba8fSJoseph Mingrone p->stats_op = pcap_dpdk_stats; 958*6f9cba8fSJoseph Mingrone p->cleanup_op = pcap_dpdk_close; 959*6f9cba8fSJoseph Mingrone p->breakloop_op = pcap_breakloop_common; 960*6f9cba8fSJoseph Mingrone // set default timeout 961*6f9cba8fSJoseph Mingrone pd->required_select_timeout.tv_sec = 0; 962*6f9cba8fSJoseph Mingrone pd->required_select_timeout.tv_usec = DPDK_DEF_MIN_SLEEP_MS*1000; 963*6f9cba8fSJoseph Mingrone p->required_select_timeout = &pd->required_select_timeout; 964*6f9cba8fSJoseph Mingrone ret = 0; // OK 965*6f9cba8fSJoseph Mingrone }while(0); 966*6f9cba8fSJoseph Mingrone 967*6f9cba8fSJoseph Mingrone if (ret <= PCAP_ERROR) // all kinds of error code 968*6f9cba8fSJoseph Mingrone { 969*6f9cba8fSJoseph Mingrone pcap_cleanup_live_common(p); 970*6f9cba8fSJoseph Mingrone }else{ 971*6f9cba8fSJoseph Mingrone rte_eth_dev_get_name_by_port(portid,pd->pci_addr); 972*6f9cba8fSJoseph Mingrone RTE_LOG(INFO, USER1,"Port %d device: %s, MAC:%s, PCI:%s\n", portid, p->opt.device, pd->mac_addr, pd->pci_addr); 973*6f9cba8fSJoseph Mingrone RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n", 974*6f9cba8fSJoseph Mingrone portid, link.link_speed, 975*6f9cba8fSJoseph Mingrone (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 976*6f9cba8fSJoseph Mingrone ("full-duplex") : ("half-duplex\n")); 977*6f9cba8fSJoseph Mingrone } 978*6f9cba8fSJoseph Mingrone return ret; 979*6f9cba8fSJoseph Mingrone } 980*6f9cba8fSJoseph Mingrone 981*6f9cba8fSJoseph Mingrone // device name for dpdk should be in the form as dpdk:number, such as dpdk:0 982*6f9cba8fSJoseph Mingrone pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours) 983*6f9cba8fSJoseph Mingrone { 984*6f9cba8fSJoseph Mingrone pcap_t *p=NULL; 985*6f9cba8fSJoseph Mingrone *is_ours = 0; 986*6f9cba8fSJoseph Mingrone 987*6f9cba8fSJoseph Mingrone *is_ours = !strncmp(device, "dpdk:", 5); 988*6f9cba8fSJoseph Mingrone if (! *is_ours) 989*6f9cba8fSJoseph Mingrone return NULL; 990*6f9cba8fSJoseph Mingrone //memset will happen 991*6f9cba8fSJoseph Mingrone p = PCAP_CREATE_COMMON(ebuf, struct pcap_dpdk); 992*6f9cba8fSJoseph Mingrone 993*6f9cba8fSJoseph Mingrone if (p == NULL) 994*6f9cba8fSJoseph Mingrone return NULL; 995*6f9cba8fSJoseph Mingrone p->activate_op = pcap_dpdk_activate; 996*6f9cba8fSJoseph Mingrone return p; 997*6f9cba8fSJoseph Mingrone } 998*6f9cba8fSJoseph Mingrone 999*6f9cba8fSJoseph Mingrone int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp, char *ebuf) 1000*6f9cba8fSJoseph Mingrone { 1001*6f9cba8fSJoseph Mingrone int ret=0; 1002*6f9cba8fSJoseph Mingrone unsigned int nb_ports = 0; 1003*6f9cba8fSJoseph Mingrone char dpdk_name[DPDK_DEV_NAME_MAX]; 1004*6f9cba8fSJoseph Mingrone char dpdk_desc[DPDK_DEV_DESC_MAX]; 1005*6f9cba8fSJoseph Mingrone ETHER_ADDR_TYPE eth_addr; 1006*6f9cba8fSJoseph Mingrone char mac_addr[DPDK_MAC_ADDR_SIZE]; 1007*6f9cba8fSJoseph Mingrone char pci_addr[DPDK_PCI_ADDR_SIZE]; 1008*6f9cba8fSJoseph Mingrone do{ 1009*6f9cba8fSJoseph Mingrone // init EAL; return "DPDK not available" if we 1010*6f9cba8fSJoseph Mingrone // have insufficient permission 1011*6f9cba8fSJoseph Mingrone char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE]; 1012*6f9cba8fSJoseph Mingrone ret = dpdk_pre_init(dpdk_pre_init_errbuf, 1); 1013*6f9cba8fSJoseph Mingrone if (ret < 0) 1014*6f9cba8fSJoseph Mingrone { 1015*6f9cba8fSJoseph Mingrone // This returns a negative value on an error. 1016*6f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 1017*6f9cba8fSJoseph Mingrone "Can't look for DPDK devices: %s", 1018*6f9cba8fSJoseph Mingrone dpdk_pre_init_errbuf); 1019*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 1020*6f9cba8fSJoseph Mingrone break; 1021*6f9cba8fSJoseph Mingrone } 1022*6f9cba8fSJoseph Mingrone if (ret == 0) 1023*6f9cba8fSJoseph Mingrone { 1024*6f9cba8fSJoseph Mingrone // This means DPDK isn't available on this machine. 1025*6f9cba8fSJoseph Mingrone // That just means "don't return any devices". 1026*6f9cba8fSJoseph Mingrone break; 1027*6f9cba8fSJoseph Mingrone } 1028*6f9cba8fSJoseph Mingrone nb_ports = rte_eth_dev_count_avail(); 1029*6f9cba8fSJoseph Mingrone if (nb_ports == 0) 1030*6f9cba8fSJoseph Mingrone { 1031*6f9cba8fSJoseph Mingrone // That just means "don't return any devices". 1032*6f9cba8fSJoseph Mingrone ret = 0; 1033*6f9cba8fSJoseph Mingrone break; 1034*6f9cba8fSJoseph Mingrone } 1035*6f9cba8fSJoseph Mingrone for (unsigned int i=0; i<nb_ports; i++){ 1036*6f9cba8fSJoseph Mingrone snprintf(dpdk_name, DPDK_DEV_NAME_MAX-1, 1037*6f9cba8fSJoseph Mingrone "%s%u", DPDK_PREFIX, i); 1038*6f9cba8fSJoseph Mingrone // mac addr 1039*6f9cba8fSJoseph Mingrone rte_eth_macaddr_get(i, ð_addr); 1040*6f9cba8fSJoseph Mingrone eth_addr_str(ð_addr,mac_addr,DPDK_MAC_ADDR_SIZE); 1041*6f9cba8fSJoseph Mingrone // PCI addr 1042*6f9cba8fSJoseph Mingrone rte_eth_dev_get_name_by_port(i,pci_addr); 1043*6f9cba8fSJoseph Mingrone snprintf(dpdk_desc,DPDK_DEV_DESC_MAX-1,"%s %s, MAC:%s, PCI:%s", DPDK_DESC, dpdk_name, mac_addr, pci_addr); 1044*6f9cba8fSJoseph Mingrone if (add_dev(devlistp, dpdk_name, 0, dpdk_desc, ebuf)==NULL){ 1045*6f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 1046*6f9cba8fSJoseph Mingrone break; 1047*6f9cba8fSJoseph Mingrone } 1048*6f9cba8fSJoseph Mingrone } 1049*6f9cba8fSJoseph Mingrone }while(0); 1050*6f9cba8fSJoseph Mingrone return ret; 1051*6f9cba8fSJoseph Mingrone } 1052*6f9cba8fSJoseph Mingrone 1053*6f9cba8fSJoseph Mingrone #ifdef DPDK_ONLY 1054*6f9cba8fSJoseph Mingrone /* 1055*6f9cba8fSJoseph Mingrone * This libpcap build supports only DPDK, not regular network interfaces. 1056*6f9cba8fSJoseph Mingrone */ 1057*6f9cba8fSJoseph Mingrone 1058*6f9cba8fSJoseph Mingrone /* 1059*6f9cba8fSJoseph Mingrone * There are no regular interfaces, just DPDK interfaces. 1060*6f9cba8fSJoseph Mingrone */ 1061*6f9cba8fSJoseph Mingrone int 1062*6f9cba8fSJoseph Mingrone pcap_platform_finddevs(pcap_if_list_t *devlistp _U_, char *errbuf) 1063*6f9cba8fSJoseph Mingrone { 1064*6f9cba8fSJoseph Mingrone return (0); 1065*6f9cba8fSJoseph Mingrone } 1066*6f9cba8fSJoseph Mingrone 1067*6f9cba8fSJoseph Mingrone /* 1068*6f9cba8fSJoseph Mingrone * Attempts to open a regular interface fail. 1069*6f9cba8fSJoseph Mingrone */ 1070*6f9cba8fSJoseph Mingrone pcap_t * 1071*6f9cba8fSJoseph Mingrone pcap_create_interface(const char *device, char *errbuf) 1072*6f9cba8fSJoseph Mingrone { 1073*6f9cba8fSJoseph Mingrone snprintf(errbuf, PCAP_ERRBUF_SIZE, 1074*6f9cba8fSJoseph Mingrone "This version of libpcap only supports DPDK"); 1075*6f9cba8fSJoseph Mingrone return NULL; 1076*6f9cba8fSJoseph Mingrone } 1077*6f9cba8fSJoseph Mingrone 1078*6f9cba8fSJoseph Mingrone /* 1079*6f9cba8fSJoseph Mingrone * Libpcap version string. 1080*6f9cba8fSJoseph Mingrone */ 1081*6f9cba8fSJoseph Mingrone const char * 1082*6f9cba8fSJoseph Mingrone pcap_lib_version(void) 1083*6f9cba8fSJoseph Mingrone { 1084*6f9cba8fSJoseph Mingrone return (PCAP_VERSION_STRING " (DPDK-only)"); 1085*6f9cba8fSJoseph Mingrone } 1086*6f9cba8fSJoseph Mingrone #endif 1087