16f9cba8fSJoseph Mingrone /* 26f9cba8fSJoseph Mingrone * Copyright (C) 2018 jingle YANG. All rights reserved. 36f9cba8fSJoseph Mingrone * 46f9cba8fSJoseph Mingrone * Redistribution and use in source and binary forms, with or without 56f9cba8fSJoseph Mingrone * modification, are permitted provided that the following conditions 66f9cba8fSJoseph Mingrone * are met: 76f9cba8fSJoseph Mingrone * 86f9cba8fSJoseph Mingrone * 1. Redistributions of source code must retain the above copyright 96f9cba8fSJoseph Mingrone * notice, this list of conditions and the following disclaimer. 106f9cba8fSJoseph Mingrone * 2. Redistributions in binary form must reproduce the above copyright 116f9cba8fSJoseph Mingrone * notice, this list of conditions and the following disclaimer in the 126f9cba8fSJoseph Mingrone * documentation and/or other materials provided with the distribution. 136f9cba8fSJoseph Mingrone * 146f9cba8fSJoseph Mingrone * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND 156f9cba8fSJoseph Mingrone * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 166f9cba8fSJoseph Mingrone * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 176f9cba8fSJoseph Mingrone * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 186f9cba8fSJoseph Mingrone * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 196f9cba8fSJoseph Mingrone * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 206f9cba8fSJoseph Mingrone * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 216f9cba8fSJoseph Mingrone * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 226f9cba8fSJoseph Mingrone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 236f9cba8fSJoseph Mingrone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 246f9cba8fSJoseph Mingrone * SUCH DAMAGE. 256f9cba8fSJoseph Mingrone */ 266f9cba8fSJoseph Mingrone 276f9cba8fSJoseph Mingrone /* 286f9cba8fSJoseph Mingrone Date: Dec 16, 2018 296f9cba8fSJoseph Mingrone 306f9cba8fSJoseph Mingrone Description: 316f9cba8fSJoseph Mingrone 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0. 326f9cba8fSJoseph Mingrone 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/) 336f9cba8fSJoseph Mingrone 3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11. 346f9cba8fSJoseph Mingrone 356f9cba8fSJoseph Mingrone Limitations: 366f9cba8fSJoseph Mingrone 1. DPDK support will be on if DPDK is available. Please set DIR for --with-dpdk[=DIR] with ./configure or -DDPDK_DIR[=DIR] with cmake if DPDK is installed manually. 376f9cba8fSJoseph Mingrone 2. Only support link libdpdk.so dynamically, because the libdpdk.a will not work correctly. 386f9cba8fSJoseph Mingrone 3. Only support read operation, and packet injection has not been supported yet. 396f9cba8fSJoseph Mingrone 406f9cba8fSJoseph Mingrone Usage: 416f9cba8fSJoseph Mingrone 1. Compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git) 426f9cba8fSJoseph Mingrone 436f9cba8fSJoseph Mingrone You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set: 446f9cba8fSJoseph Mingrone CONFIG_RTE_BUILD_SHARED_LIB=y 456f9cba8fSJoseph Mingrone By the following command: 466f9cba8fSJoseph Mingrone sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config 476f9cba8fSJoseph Mingrone 486f9cba8fSJoseph Mingrone 2. Launch l2fwd that is one of DPDK examples correctly, and get device information. 496f9cba8fSJoseph Mingrone 506f9cba8fSJoseph Mingrone You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio. 516f9cba8fSJoseph Mingrone And enable hugepages by dpdk-setup.sh 526f9cba8fSJoseph Mingrone 536f9cba8fSJoseph Mingrone Then launch the l2fwd with dynamic driver support. For example: 546f9cba8fSJoseph Mingrone $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1 556f9cba8fSJoseph Mingrone 566f9cba8fSJoseph Mingrone 3. Compile libpcap with dpdk options. 576f9cba8fSJoseph Mingrone 586f9cba8fSJoseph Mingrone If DPDK has not been found automatically, you shall export DPDK environment variable which are used for compiling DPDK. And then pass $RTE_SDK/$RTE_TARGET to --with-dpdk or -DDPDK_DIR 596f9cba8fSJoseph Mingrone 606f9cba8fSJoseph Mingrone export RTE_SDK={your DPDK base directory} 616f9cba8fSJoseph Mingrone export RTE_TARGET={your target name} 626f9cba8fSJoseph Mingrone 636f9cba8fSJoseph Mingrone 3.1 With configure 646f9cba8fSJoseph Mingrone 656f9cba8fSJoseph Mingrone ./configure --with-dpdk=$RTE_SDK/$RTE_TARGET && make -s all && make -s testprogs && make install 666f9cba8fSJoseph Mingrone 676f9cba8fSJoseph Mingrone 3.2 With cmake 686f9cba8fSJoseph Mingrone 696f9cba8fSJoseph Mingrone mkdir -p build && cd build && cmake -DDPDK_DIR=$RTE_SDK/$RTE_TARGET ../ && make -s all && make -s testprogs && make install 706f9cba8fSJoseph Mingrone 716f9cba8fSJoseph Mingrone 4. Link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0. 726f9cba8fSJoseph Mingrone And you shall set DPDK configure options by environment variable DPDK_CFG 73*afdbf109SJoseph Mingrone For example, the testprogs/capturetest could be launched by: 746f9cba8fSJoseph Mingrone 756f9cba8fSJoseph Mingrone env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0 766f9cba8fSJoseph Mingrone */ 776f9cba8fSJoseph Mingrone 786f9cba8fSJoseph Mingrone #include <config.h> 796f9cba8fSJoseph Mingrone 806f9cba8fSJoseph Mingrone #include <errno.h> 816f9cba8fSJoseph Mingrone #include <netdb.h> 826f9cba8fSJoseph Mingrone #include <stdio.h> 836f9cba8fSJoseph Mingrone #include <stdlib.h> 846f9cba8fSJoseph Mingrone #include <string.h> 856f9cba8fSJoseph Mingrone #include <unistd.h> 866f9cba8fSJoseph Mingrone #include <limits.h> /* for INT_MAX */ 876f9cba8fSJoseph Mingrone #include <time.h> 886f9cba8fSJoseph Mingrone 896f9cba8fSJoseph Mingrone #include <sys/time.h> 906f9cba8fSJoseph Mingrone 916f9cba8fSJoseph Mingrone //header for calling dpdk 926f9cba8fSJoseph Mingrone #include <rte_config.h> 936f9cba8fSJoseph Mingrone #include <rte_common.h> 946f9cba8fSJoseph Mingrone #include <rte_errno.h> 956f9cba8fSJoseph Mingrone #include <rte_log.h> 966f9cba8fSJoseph Mingrone #include <rte_malloc.h> 976f9cba8fSJoseph Mingrone #include <rte_memory.h> 986f9cba8fSJoseph Mingrone #include <rte_eal.h> 996f9cba8fSJoseph Mingrone #include <rte_launch.h> 1006f9cba8fSJoseph Mingrone #include <rte_atomic.h> 1016f9cba8fSJoseph Mingrone #include <rte_cycles.h> 1026f9cba8fSJoseph Mingrone #include <rte_lcore.h> 1036f9cba8fSJoseph Mingrone #include <rte_per_lcore.h> 1046f9cba8fSJoseph Mingrone #include <rte_branch_prediction.h> 1056f9cba8fSJoseph Mingrone #include <rte_interrupts.h> 1066f9cba8fSJoseph Mingrone #include <rte_random.h> 1076f9cba8fSJoseph Mingrone #include <rte_debug.h> 1086f9cba8fSJoseph Mingrone #include <rte_ether.h> 1096f9cba8fSJoseph Mingrone #include <rte_ethdev.h> 1106f9cba8fSJoseph Mingrone #include <rte_mempool.h> 1116f9cba8fSJoseph Mingrone #include <rte_mbuf.h> 1126f9cba8fSJoseph Mingrone #include <rte_bus.h> 1136f9cba8fSJoseph Mingrone 1146f9cba8fSJoseph Mingrone #include "pcap-int.h" 1156f9cba8fSJoseph Mingrone #include "pcap-dpdk.h" 1166f9cba8fSJoseph Mingrone 1176f9cba8fSJoseph Mingrone /* 1186f9cba8fSJoseph Mingrone * Deal with API changes that break source compatibility. 1196f9cba8fSJoseph Mingrone */ 1206f9cba8fSJoseph Mingrone 1216f9cba8fSJoseph Mingrone #ifdef HAVE_STRUCT_RTE_ETHER_ADDR 1226f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE struct rte_ether_addr 1236f9cba8fSJoseph Mingrone #else 1246f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE struct ether_addr 1256f9cba8fSJoseph Mingrone #endif 1266f9cba8fSJoseph Mingrone 1276f9cba8fSJoseph Mingrone #define DPDK_DEF_LOG_LEV RTE_LOG_ERR 1286f9cba8fSJoseph Mingrone // 1296f9cba8fSJoseph Mingrone // This is set to 0 if we haven't initialized DPDK yet, 1 if we've 1306f9cba8fSJoseph Mingrone // successfully initialized it, a negative value, which is the negative 1316f9cba8fSJoseph Mingrone // of the rte_errno from rte_eal_init(), if we tried to initialize it 1326f9cba8fSJoseph Mingrone // and got an error. 1336f9cba8fSJoseph Mingrone // 1346f9cba8fSJoseph Mingrone static int is_dpdk_pre_inited=0; 1356f9cba8fSJoseph Mingrone #define DPDK_LIB_NAME "libpcap_dpdk" 1366f9cba8fSJoseph Mingrone #define DPDK_DESC "Data Plane Development Kit (DPDK) Interface" 1376f9cba8fSJoseph Mingrone #define DPDK_ERR_PERM_MSG "permission denied, DPDK needs root permission" 1386f9cba8fSJoseph Mingrone #define DPDK_ARGC_MAX 64 1396f9cba8fSJoseph Mingrone #define DPDK_CFG_MAX_LEN 1024 1406f9cba8fSJoseph Mingrone #define DPDK_DEV_NAME_MAX 32 1416f9cba8fSJoseph Mingrone #define DPDK_DEV_DESC_MAX 512 1426f9cba8fSJoseph Mingrone #define DPDK_CFG_ENV_NAME "DPDK_CFG" 1436f9cba8fSJoseph Mingrone #define DPDK_DEF_MIN_SLEEP_MS 1 1446f9cba8fSJoseph Mingrone static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN]; 1456f9cba8fSJoseph Mingrone #define DPDK_MAC_ADDR_SIZE 32 1466f9cba8fSJoseph Mingrone #define DPDK_DEF_MAC_ADDR "00:00:00:00:00:00" 1476f9cba8fSJoseph Mingrone #define DPDK_PCI_ADDR_SIZE 16 1486f9cba8fSJoseph Mingrone #define DPDK_DEF_CFG "--log-level=error -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" 1496f9cba8fSJoseph Mingrone #define DPDK_PREFIX "dpdk:" 1506f9cba8fSJoseph Mingrone #define DPDK_PORTID_MAX 65535U 1516f9cba8fSJoseph Mingrone #define MBUF_POOL_NAME "mbuf_pool" 1526f9cba8fSJoseph Mingrone #define DPDK_TX_BUF_NAME "tx_buffer" 1536f9cba8fSJoseph Mingrone //The number of elements in the mbuf pool. 1546f9cba8fSJoseph Mingrone #define DPDK_NB_MBUFS 8192U 1556f9cba8fSJoseph Mingrone #define MEMPOOL_CACHE_SIZE 256 1566f9cba8fSJoseph Mingrone #define MAX_PKT_BURST 32 1576f9cba8fSJoseph Mingrone // Configurable number of RX/TX ring descriptors 1586f9cba8fSJoseph Mingrone #define RTE_TEST_RX_DESC_DEFAULT 1024 1596f9cba8fSJoseph Mingrone #define RTE_TEST_TX_DESC_DEFAULT 1024 1606f9cba8fSJoseph Mingrone 1616f9cba8fSJoseph Mingrone static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 1626f9cba8fSJoseph Mingrone static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 1636f9cba8fSJoseph Mingrone 1646f9cba8fSJoseph Mingrone #ifdef RTE_ETHER_MAX_JUMBO_FRAME_LEN 1656f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN 1666f9cba8fSJoseph Mingrone #else 1676f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN 1686f9cba8fSJoseph Mingrone #endif 1696f9cba8fSJoseph Mingrone 1706f9cba8fSJoseph Mingrone static struct rte_eth_dev_tx_buffer *tx_buffer; 1716f9cba8fSJoseph Mingrone 1726f9cba8fSJoseph Mingrone struct dpdk_ts_helper{ 1736f9cba8fSJoseph Mingrone struct timeval start_time; 1746f9cba8fSJoseph Mingrone uint64_t start_cycles; 1756f9cba8fSJoseph Mingrone uint64_t hz; 1766f9cba8fSJoseph Mingrone }; 1776f9cba8fSJoseph Mingrone struct pcap_dpdk{ 1786f9cba8fSJoseph Mingrone pcap_t * orig; 1796f9cba8fSJoseph Mingrone uint16_t portid; // portid of DPDK 1806f9cba8fSJoseph Mingrone int must_clear_promisc; 1816f9cba8fSJoseph Mingrone uint64_t bpf_drop; 1826f9cba8fSJoseph Mingrone int nonblock; 1836f9cba8fSJoseph Mingrone struct timeval required_select_timeout; 1846f9cba8fSJoseph Mingrone struct timeval prev_ts; 1856f9cba8fSJoseph Mingrone struct rte_eth_stats prev_stats; 1866f9cba8fSJoseph Mingrone struct timeval curr_ts; 1876f9cba8fSJoseph Mingrone struct rte_eth_stats curr_stats; 1886f9cba8fSJoseph Mingrone uint64_t pps; 1896f9cba8fSJoseph Mingrone uint64_t bps; 1906f9cba8fSJoseph Mingrone struct rte_mempool * pktmbuf_pool; 1916f9cba8fSJoseph Mingrone struct dpdk_ts_helper ts_helper; 1926f9cba8fSJoseph Mingrone ETHER_ADDR_TYPE eth_addr; 1936f9cba8fSJoseph Mingrone char mac_addr[DPDK_MAC_ADDR_SIZE]; 1946f9cba8fSJoseph Mingrone char pci_addr[DPDK_PCI_ADDR_SIZE]; 1956f9cba8fSJoseph Mingrone unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN]; 1966f9cba8fSJoseph Mingrone }; 1976f9cba8fSJoseph Mingrone 1986f9cba8fSJoseph Mingrone static struct rte_eth_conf port_conf = { 1996f9cba8fSJoseph Mingrone .rxmode = { 2006f9cba8fSJoseph Mingrone .split_hdr_size = 0, 2016f9cba8fSJoseph Mingrone }, 2026f9cba8fSJoseph Mingrone .txmode = { 2036f9cba8fSJoseph Mingrone .mq_mode = ETH_MQ_TX_NONE, 2046f9cba8fSJoseph Mingrone }, 2056f9cba8fSJoseph Mingrone }; 2066f9cba8fSJoseph Mingrone 2076f9cba8fSJoseph Mingrone static void dpdk_fmt_errmsg_for_rte_errno(char *, size_t, int, 2086f9cba8fSJoseph Mingrone PCAP_FORMAT_STRING(const char *), ...) PCAP_PRINTFLIKE(4, 5); 2096f9cba8fSJoseph Mingrone 2106f9cba8fSJoseph Mingrone /* 2116f9cba8fSJoseph Mingrone * Generate an error message based on a format, arguments, and an 2126f9cba8fSJoseph Mingrone * rte_errno, with a message for the rte_errno after the formatted output. 2136f9cba8fSJoseph Mingrone */ 2146f9cba8fSJoseph Mingrone static void dpdk_fmt_errmsg_for_rte_errno(char *errbuf, size_t errbuflen, 2156f9cba8fSJoseph Mingrone int errnum, const char *fmt, ...) 2166f9cba8fSJoseph Mingrone { 2176f9cba8fSJoseph Mingrone va_list ap; 2186f9cba8fSJoseph Mingrone size_t msglen; 2196f9cba8fSJoseph Mingrone char *p; 2206f9cba8fSJoseph Mingrone size_t errbuflen_remaining; 2216f9cba8fSJoseph Mingrone 2226f9cba8fSJoseph Mingrone va_start(ap, fmt); 2236f9cba8fSJoseph Mingrone vsnprintf(errbuf, errbuflen, fmt, ap); 2246f9cba8fSJoseph Mingrone va_end(ap); 2256f9cba8fSJoseph Mingrone msglen = strlen(errbuf); 2266f9cba8fSJoseph Mingrone 2276f9cba8fSJoseph Mingrone /* 2286f9cba8fSJoseph Mingrone * Do we have enough space to append ": "? 2296f9cba8fSJoseph Mingrone * Including the terminating '\0', that's 3 bytes. 2306f9cba8fSJoseph Mingrone */ 2316f9cba8fSJoseph Mingrone if (msglen + 3 > errbuflen) { 2326f9cba8fSJoseph Mingrone /* No - just give them what we've produced. */ 2336f9cba8fSJoseph Mingrone return; 2346f9cba8fSJoseph Mingrone } 2356f9cba8fSJoseph Mingrone p = errbuf + msglen; 2366f9cba8fSJoseph Mingrone errbuflen_remaining = errbuflen - msglen; 2376f9cba8fSJoseph Mingrone *p++ = ':'; 2386f9cba8fSJoseph Mingrone *p++ = ' '; 2396f9cba8fSJoseph Mingrone *p = '\0'; 2406f9cba8fSJoseph Mingrone msglen += 2; 2416f9cba8fSJoseph Mingrone errbuflen_remaining -= 2; 2426f9cba8fSJoseph Mingrone 2436f9cba8fSJoseph Mingrone /* 2446f9cba8fSJoseph Mingrone * Now append the string for the error code. 2456f9cba8fSJoseph Mingrone * rte_strerror() is thread-safe, at least as of dpdk 18.11, 2466f9cba8fSJoseph Mingrone * unlike strerror() - it uses strerror_r() rather than strerror() 2476f9cba8fSJoseph Mingrone * for UN*X errno values, and prints to what I assume is a per-thread 2486f9cba8fSJoseph Mingrone * buffer (based on the "PER_LCORE" in "RTE_DEFINE_PER_LCORE" used 2496f9cba8fSJoseph Mingrone * to declare the buffers statically) for DPDK errors. 2506f9cba8fSJoseph Mingrone */ 2516f9cba8fSJoseph Mingrone snprintf(p, errbuflen_remaining, "%s", rte_strerror(errnum)); 2526f9cba8fSJoseph Mingrone } 2536f9cba8fSJoseph Mingrone 2546f9cba8fSJoseph Mingrone static int dpdk_init_timer(struct pcap_dpdk *pd){ 2556f9cba8fSJoseph Mingrone gettimeofday(&(pd->ts_helper.start_time),NULL); 2566f9cba8fSJoseph Mingrone pd->ts_helper.start_cycles = rte_get_timer_cycles(); 2576f9cba8fSJoseph Mingrone pd->ts_helper.hz = rte_get_timer_hz(); 2586f9cba8fSJoseph Mingrone if (pd->ts_helper.hz == 0){ 2596f9cba8fSJoseph Mingrone return -1; 2606f9cba8fSJoseph Mingrone } 2616f9cba8fSJoseph Mingrone return 0; 2626f9cba8fSJoseph Mingrone } 2636f9cba8fSJoseph Mingrone static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts) 2646f9cba8fSJoseph Mingrone { 2656f9cba8fSJoseph Mingrone uint64_t cycles; 2666f9cba8fSJoseph Mingrone // delta 2676f9cba8fSJoseph Mingrone struct timeval cur_time; 2686f9cba8fSJoseph Mingrone cycles = rte_get_timer_cycles() - helper->start_cycles; 2696f9cba8fSJoseph Mingrone cur_time.tv_sec = (time_t)(cycles/helper->hz); 2706f9cba8fSJoseph Mingrone cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz); 2716f9cba8fSJoseph Mingrone timeradd(&(helper->start_time), &cur_time, ts); 2726f9cba8fSJoseph Mingrone } 2736f9cba8fSJoseph Mingrone 2746f9cba8fSJoseph Mingrone static uint32_t dpdk_gather_data(unsigned char *data, uint32_t len, struct rte_mbuf *mbuf) 2756f9cba8fSJoseph Mingrone { 2766f9cba8fSJoseph Mingrone uint32_t total_len = 0; 2776f9cba8fSJoseph Mingrone while (mbuf && (total_len+mbuf->data_len) < len ){ 2786f9cba8fSJoseph Mingrone rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len); 2796f9cba8fSJoseph Mingrone total_len+=mbuf->data_len; 2806f9cba8fSJoseph Mingrone mbuf=mbuf->next; 2816f9cba8fSJoseph Mingrone } 2826f9cba8fSJoseph Mingrone return total_len; 2836f9cba8fSJoseph Mingrone } 2846f9cba8fSJoseph Mingrone 2856f9cba8fSJoseph Mingrone 2866f9cba8fSJoseph Mingrone static int dpdk_read_with_timeout(pcap_t *p, struct rte_mbuf **pkts_burst, const uint16_t burst_cnt){ 2876f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 2886f9cba8fSJoseph Mingrone int nb_rx = 0; 2896f9cba8fSJoseph Mingrone int timeout_ms = p->opt.timeout; 2906f9cba8fSJoseph Mingrone int sleep_ms = 0; 2916f9cba8fSJoseph Mingrone if (pd->nonblock){ 2926f9cba8fSJoseph Mingrone // In non-blocking mode, just read once, no matter how many packets are captured. 2936f9cba8fSJoseph Mingrone nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt); 2946f9cba8fSJoseph Mingrone }else{ 2956f9cba8fSJoseph Mingrone // In blocking mode, read many times until packets are captured or timeout or break_loop is set. 2966f9cba8fSJoseph Mingrone // if timeout_ms == 0, it may be blocked forever. 2976f9cba8fSJoseph Mingrone while (timeout_ms == 0 || sleep_ms < timeout_ms){ 2986f9cba8fSJoseph Mingrone nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt); 2996f9cba8fSJoseph Mingrone if (nb_rx){ // got packets within timeout_ms 3006f9cba8fSJoseph Mingrone break; 3016f9cba8fSJoseph Mingrone }else{ // no packet arrives at this round. 3026f9cba8fSJoseph Mingrone if (p->break_loop){ 3036f9cba8fSJoseph Mingrone break; 3046f9cba8fSJoseph Mingrone } 3056f9cba8fSJoseph Mingrone // sleep for a very short while. 3066f9cba8fSJoseph Mingrone // block sleep is the only choice, since usleep() will impact performance dramatically. 3076f9cba8fSJoseph Mingrone rte_delay_us_block(DPDK_DEF_MIN_SLEEP_MS*1000); 3086f9cba8fSJoseph Mingrone sleep_ms += DPDK_DEF_MIN_SLEEP_MS; 3096f9cba8fSJoseph Mingrone } 3106f9cba8fSJoseph Mingrone } 3116f9cba8fSJoseph Mingrone } 3126f9cba8fSJoseph Mingrone return nb_rx; 3136f9cba8fSJoseph Mingrone } 3146f9cba8fSJoseph Mingrone 3156f9cba8fSJoseph Mingrone static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *cb_arg) 3166f9cba8fSJoseph Mingrone { 3176f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 3186f9cba8fSJoseph Mingrone int burst_cnt = 0; 3196f9cba8fSJoseph Mingrone int nb_rx = 0; 3206f9cba8fSJoseph Mingrone struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 3216f9cba8fSJoseph Mingrone struct rte_mbuf *m; 3226f9cba8fSJoseph Mingrone struct pcap_pkthdr pcap_header; 3236f9cba8fSJoseph Mingrone // In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment 3246f9cba8fSJoseph Mingrone uint32_t pkt_len = 0; 3256f9cba8fSJoseph Mingrone uint32_t caplen = 0; 3266f9cba8fSJoseph Mingrone u_char *bp = NULL; 3276f9cba8fSJoseph Mingrone int i=0; 3286f9cba8fSJoseph Mingrone unsigned int gather_len =0; 3296f9cba8fSJoseph Mingrone int pkt_cnt = 0; 3306f9cba8fSJoseph Mingrone u_char *large_buffer=NULL; 3316f9cba8fSJoseph Mingrone int timeout_ms = p->opt.timeout; 3326f9cba8fSJoseph Mingrone 3336f9cba8fSJoseph Mingrone /* 3346f9cba8fSJoseph Mingrone * This can conceivably process more than INT_MAX packets, 3356f9cba8fSJoseph Mingrone * which would overflow the packet count, causing it either 3366f9cba8fSJoseph Mingrone * to look like a negative number, and thus cause us to 3376f9cba8fSJoseph Mingrone * return a value that looks like an error, or overflow 3386f9cba8fSJoseph Mingrone * back into positive territory, and thus cause us to 3396f9cba8fSJoseph Mingrone * return a too-low count. 3406f9cba8fSJoseph Mingrone * 3416f9cba8fSJoseph Mingrone * Therefore, if the packet count is unlimited, we clip 3426f9cba8fSJoseph Mingrone * it at INT_MAX; this routine is not expected to 3436f9cba8fSJoseph Mingrone * process packets indefinitely, so that's not an issue. 3446f9cba8fSJoseph Mingrone */ 3456f9cba8fSJoseph Mingrone if (PACKET_COUNT_IS_UNLIMITED(max_cnt)) 3466f9cba8fSJoseph Mingrone max_cnt = INT_MAX; 3476f9cba8fSJoseph Mingrone 3486f9cba8fSJoseph Mingrone if (max_cnt < MAX_PKT_BURST){ 3496f9cba8fSJoseph Mingrone burst_cnt = max_cnt; 3506f9cba8fSJoseph Mingrone }else{ 3516f9cba8fSJoseph Mingrone burst_cnt = MAX_PKT_BURST; 3526f9cba8fSJoseph Mingrone } 3536f9cba8fSJoseph Mingrone 3546f9cba8fSJoseph Mingrone while( pkt_cnt < max_cnt){ 3556f9cba8fSJoseph Mingrone if (p->break_loop){ 3566f9cba8fSJoseph Mingrone p->break_loop = 0; 3576f9cba8fSJoseph Mingrone return PCAP_ERROR_BREAK; 3586f9cba8fSJoseph Mingrone } 3596f9cba8fSJoseph Mingrone // read once in non-blocking mode, or try many times waiting for timeout_ms. 3606f9cba8fSJoseph Mingrone // if timeout_ms == 0, it will be blocked until one packet arrives or break_loop is set. 3616f9cba8fSJoseph Mingrone nb_rx = dpdk_read_with_timeout(p, pkts_burst, burst_cnt); 3626f9cba8fSJoseph Mingrone if (nb_rx == 0){ 3636f9cba8fSJoseph Mingrone if (pd->nonblock){ 3646f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "dpdk: no packets available in non-blocking mode.\n"); 3656f9cba8fSJoseph Mingrone }else{ 3666f9cba8fSJoseph Mingrone if (p->break_loop){ 3676f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "dpdk: no packets available and break_loop is set in blocking mode.\n"); 3686f9cba8fSJoseph Mingrone p->break_loop = 0; 3696f9cba8fSJoseph Mingrone return PCAP_ERROR_BREAK; 3706f9cba8fSJoseph Mingrone 3716f9cba8fSJoseph Mingrone } 3726f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "dpdk: no packets available for timeout %d ms in blocking mode.\n", timeout_ms); 3736f9cba8fSJoseph Mingrone } 3746f9cba8fSJoseph Mingrone // break if dpdk reads 0 packet, no matter in blocking(timeout) or non-blocking mode. 3756f9cba8fSJoseph Mingrone break; 3766f9cba8fSJoseph Mingrone } 3776f9cba8fSJoseph Mingrone pkt_cnt += nb_rx; 3786f9cba8fSJoseph Mingrone for ( i = 0; i < nb_rx; i++) { 3796f9cba8fSJoseph Mingrone m = pkts_burst[i]; 3806f9cba8fSJoseph Mingrone calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts)); 3816f9cba8fSJoseph Mingrone pkt_len = rte_pktmbuf_pkt_len(m); 3826f9cba8fSJoseph Mingrone // caplen = min(pkt_len, p->snapshot); 3836f9cba8fSJoseph Mingrone // caplen will not be changed, no matter how long the rte_pktmbuf 3846f9cba8fSJoseph Mingrone caplen = pkt_len < (uint32_t)p->snapshot ? pkt_len: (uint32_t)p->snapshot; 3856f9cba8fSJoseph Mingrone pcap_header.caplen = caplen; 3866f9cba8fSJoseph Mingrone pcap_header.len = pkt_len; 3876f9cba8fSJoseph Mingrone // volatile prefetch 3886f9cba8fSJoseph Mingrone rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 3896f9cba8fSJoseph Mingrone bp = NULL; 3906f9cba8fSJoseph Mingrone if (m->nb_segs == 1) 3916f9cba8fSJoseph Mingrone { 3926f9cba8fSJoseph Mingrone bp = rte_pktmbuf_mtod(m, u_char *); 3936f9cba8fSJoseph Mingrone }else{ 3946f9cba8fSJoseph Mingrone // use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free 3956f9cba8fSJoseph Mingrone if ( pkt_len <= RTE_ETH_PCAP_SNAPLEN) 3966f9cba8fSJoseph Mingrone { 3976f9cba8fSJoseph Mingrone gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m); 3986f9cba8fSJoseph Mingrone bp = pd->pcap_tmp_buf; 3996f9cba8fSJoseph Mingrone }else{ 4006f9cba8fSJoseph Mingrone // need call free later 4016f9cba8fSJoseph Mingrone large_buffer = (u_char *)malloc(caplen*sizeof(u_char)); 4026f9cba8fSJoseph Mingrone gather_len = dpdk_gather_data(large_buffer, caplen, m); 4036f9cba8fSJoseph Mingrone bp = large_buffer; 4046f9cba8fSJoseph Mingrone } 4056f9cba8fSJoseph Mingrone 4066f9cba8fSJoseph Mingrone } 4076f9cba8fSJoseph Mingrone if (bp){ 408*afdbf109SJoseph Mingrone if (p->fcode.bf_insns==NULL || pcapint_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){ 4096f9cba8fSJoseph Mingrone cb(cb_arg, &pcap_header, bp); 4106f9cba8fSJoseph Mingrone }else{ 4116f9cba8fSJoseph Mingrone pd->bpf_drop++; 4126f9cba8fSJoseph Mingrone } 4136f9cba8fSJoseph Mingrone } 4146f9cba8fSJoseph Mingrone //free all pktmbuf 4156f9cba8fSJoseph Mingrone rte_pktmbuf_free(m); 4166f9cba8fSJoseph Mingrone if (large_buffer){ 4176f9cba8fSJoseph Mingrone free(large_buffer); 4186f9cba8fSJoseph Mingrone large_buffer=NULL; 4196f9cba8fSJoseph Mingrone } 4206f9cba8fSJoseph Mingrone } 4216f9cba8fSJoseph Mingrone } 4226f9cba8fSJoseph Mingrone return pkt_cnt; 4236f9cba8fSJoseph Mingrone } 4246f9cba8fSJoseph Mingrone 4256f9cba8fSJoseph Mingrone static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_) 4266f9cba8fSJoseph Mingrone { 4276f9cba8fSJoseph Mingrone //not implemented yet 428*afdbf109SJoseph Mingrone pcapint_strlcpy(p->errbuf, 4296f9cba8fSJoseph Mingrone "dpdk error: Inject function has not been implemented yet", 4306f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE); 4316f9cba8fSJoseph Mingrone return PCAP_ERROR; 4326f9cba8fSJoseph Mingrone } 4336f9cba8fSJoseph Mingrone 4346f9cba8fSJoseph Mingrone static void pcap_dpdk_close(pcap_t *p) 4356f9cba8fSJoseph Mingrone { 4366f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = p->priv; 4376f9cba8fSJoseph Mingrone if (pd==NULL) 4386f9cba8fSJoseph Mingrone { 4396f9cba8fSJoseph Mingrone return; 4406f9cba8fSJoseph Mingrone } 4416f9cba8fSJoseph Mingrone if (pd->must_clear_promisc) 4426f9cba8fSJoseph Mingrone { 4436f9cba8fSJoseph Mingrone rte_eth_promiscuous_disable(pd->portid); 4446f9cba8fSJoseph Mingrone } 4456f9cba8fSJoseph Mingrone rte_eth_dev_stop(pd->portid); 4466f9cba8fSJoseph Mingrone rte_eth_dev_close(pd->portid); 447*afdbf109SJoseph Mingrone pcapint_cleanup_live_common(p); 4486f9cba8fSJoseph Mingrone } 4496f9cba8fSJoseph Mingrone 4506f9cba8fSJoseph Mingrone static void nic_stats_display(struct pcap_dpdk *pd) 4516f9cba8fSJoseph Mingrone { 4526f9cba8fSJoseph Mingrone uint16_t portid = pd->portid; 4536f9cba8fSJoseph Mingrone struct rte_eth_stats stats; 4546f9cba8fSJoseph Mingrone rte_eth_stats_get(portid, &stats); 4556f9cba8fSJoseph Mingrone RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64" RX-errors: %-10"PRIu64 4566f9cba8fSJoseph Mingrone " RX-bytes: %-10"PRIu64" RX-Imissed: %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors, 4576f9cba8fSJoseph Mingrone stats.ibytes,stats.imissed); 4586f9cba8fSJoseph Mingrone RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f ); 4596f9cba8fSJoseph Mingrone } 4606f9cba8fSJoseph Mingrone 4616f9cba8fSJoseph Mingrone static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps) 4626f9cba8fSJoseph Mingrone { 4636f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = p->priv; 4646f9cba8fSJoseph Mingrone calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts)); 4656f9cba8fSJoseph Mingrone rte_eth_stats_get(pd->portid,&(pd->curr_stats)); 4666f9cba8fSJoseph Mingrone if (ps){ 4676f9cba8fSJoseph Mingrone ps->ps_recv = pd->curr_stats.ipackets; 4686f9cba8fSJoseph Mingrone ps->ps_drop = pd->curr_stats.ierrors; 4696f9cba8fSJoseph Mingrone ps->ps_drop += pd->bpf_drop; 4706f9cba8fSJoseph Mingrone ps->ps_ifdrop = pd->curr_stats.imissed; 4716f9cba8fSJoseph Mingrone } 4726f9cba8fSJoseph Mingrone uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets; 4736f9cba8fSJoseph Mingrone struct timeval delta_tm; 4746f9cba8fSJoseph Mingrone timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm); 4756f9cba8fSJoseph Mingrone uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec; 4766f9cba8fSJoseph Mingrone uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8; 4776f9cba8fSJoseph Mingrone RTE_LOG(DEBUG, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit); 4786f9cba8fSJoseph Mingrone pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec); 4796f9cba8fSJoseph Mingrone pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec); 4806f9cba8fSJoseph Mingrone nic_stats_display(pd); 4816f9cba8fSJoseph Mingrone pd->prev_stats = pd->curr_stats; 4826f9cba8fSJoseph Mingrone pd->prev_ts = pd->curr_ts; 4836f9cba8fSJoseph Mingrone return 0; 4846f9cba8fSJoseph Mingrone } 4856f9cba8fSJoseph Mingrone 4866f9cba8fSJoseph Mingrone static int pcap_dpdk_setnonblock(pcap_t *p, int nonblock){ 4876f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 4886f9cba8fSJoseph Mingrone pd->nonblock = nonblock; 4896f9cba8fSJoseph Mingrone return 0; 4906f9cba8fSJoseph Mingrone } 4916f9cba8fSJoseph Mingrone 4926f9cba8fSJoseph Mingrone static int pcap_dpdk_getnonblock(pcap_t *p){ 4936f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv); 4946f9cba8fSJoseph Mingrone return pd->nonblock; 4956f9cba8fSJoseph Mingrone } 4966f9cba8fSJoseph Mingrone static int check_link_status(uint16_t portid, struct rte_eth_link *plink) 4976f9cba8fSJoseph Mingrone { 4986f9cba8fSJoseph Mingrone // wait up to 9 seconds to get link status 4996f9cba8fSJoseph Mingrone rte_eth_link_get(portid, plink); 5006f9cba8fSJoseph Mingrone return plink->link_status == ETH_LINK_UP; 5016f9cba8fSJoseph Mingrone } 5026f9cba8fSJoseph Mingrone static void eth_addr_str(ETHER_ADDR_TYPE *addrp, char* mac_str, int len) 5036f9cba8fSJoseph Mingrone { 5046f9cba8fSJoseph Mingrone int offset=0; 5056f9cba8fSJoseph Mingrone if (addrp == NULL){ 5066f9cba8fSJoseph Mingrone snprintf(mac_str, len-1, DPDK_DEF_MAC_ADDR); 5076f9cba8fSJoseph Mingrone return; 5086f9cba8fSJoseph Mingrone } 5096f9cba8fSJoseph Mingrone for (int i=0; i<6; i++) 5106f9cba8fSJoseph Mingrone { 5116f9cba8fSJoseph Mingrone if (offset >= len) 5126f9cba8fSJoseph Mingrone { // buffer overflow 5136f9cba8fSJoseph Mingrone return; 5146f9cba8fSJoseph Mingrone } 5156f9cba8fSJoseph Mingrone if (i==0) 5166f9cba8fSJoseph Mingrone { 5176f9cba8fSJoseph Mingrone snprintf(mac_str+offset, len-1-offset, "%02X",addrp->addr_bytes[i]); 5186f9cba8fSJoseph Mingrone offset+=2; // FF 5196f9cba8fSJoseph Mingrone }else{ 5206f9cba8fSJoseph Mingrone snprintf(mac_str+offset, len-1-offset, ":%02X", addrp->addr_bytes[i]); 5216f9cba8fSJoseph Mingrone offset+=3; // :FF 5226f9cba8fSJoseph Mingrone } 5236f9cba8fSJoseph Mingrone } 5246f9cba8fSJoseph Mingrone return; 5256f9cba8fSJoseph Mingrone } 5266f9cba8fSJoseph Mingrone // return portid by device name, otherwise return -1 5276f9cba8fSJoseph Mingrone static uint16_t portid_by_device(char * device) 5286f9cba8fSJoseph Mingrone { 5296f9cba8fSJoseph Mingrone uint16_t ret = DPDK_PORTID_MAX; 530*afdbf109SJoseph Mingrone size_t len = strlen(device); 531*afdbf109SJoseph Mingrone size_t prefix_len = strlen(DPDK_PREFIX); 5326f9cba8fSJoseph Mingrone unsigned long ret_ul = 0L; 5336f9cba8fSJoseph Mingrone char *pEnd; 5346f9cba8fSJoseph Mingrone if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk: 5356f9cba8fSJoseph Mingrone { 5366f9cba8fSJoseph Mingrone return ret; 5376f9cba8fSJoseph Mingrone } 5386f9cba8fSJoseph Mingrone //check all chars are digital 5396f9cba8fSJoseph Mingrone for (int i=prefix_len; device[i]; i++){ 5406f9cba8fSJoseph Mingrone if (device[i]<'0' || device[i]>'9'){ 5416f9cba8fSJoseph Mingrone return ret; 5426f9cba8fSJoseph Mingrone } 5436f9cba8fSJoseph Mingrone } 5446f9cba8fSJoseph Mingrone ret_ul = strtoul(&(device[prefix_len]), &pEnd, 10); 5456f9cba8fSJoseph Mingrone if (pEnd == &(device[prefix_len]) || *pEnd != '\0'){ 5466f9cba8fSJoseph Mingrone return ret; 5476f9cba8fSJoseph Mingrone } 5486f9cba8fSJoseph Mingrone // too large for portid 5496f9cba8fSJoseph Mingrone if (ret_ul >= DPDK_PORTID_MAX){ 5506f9cba8fSJoseph Mingrone return ret; 5516f9cba8fSJoseph Mingrone } 5526f9cba8fSJoseph Mingrone ret = (uint16_t)ret_ul; 5536f9cba8fSJoseph Mingrone return ret; 5546f9cba8fSJoseph Mingrone } 5556f9cba8fSJoseph Mingrone 5566f9cba8fSJoseph Mingrone static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv) 5576f9cba8fSJoseph Mingrone { 5586f9cba8fSJoseph Mingrone int cnt=0; 5596f9cba8fSJoseph Mingrone memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX); 5606f9cba8fSJoseph Mingrone //current process name 5616f9cba8fSJoseph Mingrone int skip_space = 1; 5626f9cba8fSJoseph Mingrone int i=0; 5636f9cba8fSJoseph Mingrone RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg); 5646f9cba8fSJoseph Mingrone // find first non space char 5656f9cba8fSJoseph Mingrone // The last opt is NULL 5666f9cba8fSJoseph Mingrone for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){ 5676f9cba8fSJoseph Mingrone if (skip_space && dpdk_cfg[i]!=' '){ // not space 5686f9cba8fSJoseph Mingrone skip_space=!skip_space; // skip normal char 5696f9cba8fSJoseph Mingrone dargv[cnt++] = dpdk_cfg+i; 5706f9cba8fSJoseph Mingrone } 571*afdbf109SJoseph Mingrone if (!skip_space && dpdk_cfg[i]==' '){ // find a space 5726f9cba8fSJoseph Mingrone dpdk_cfg[i]=0x00; // end of this opt 5736f9cba8fSJoseph Mingrone skip_space=!skip_space; // skip space char 5746f9cba8fSJoseph Mingrone } 5756f9cba8fSJoseph Mingrone } 5766f9cba8fSJoseph Mingrone dargv[cnt]=NULL; 5776f9cba8fSJoseph Mingrone return cnt; 5786f9cba8fSJoseph Mingrone } 5796f9cba8fSJoseph Mingrone 5806f9cba8fSJoseph Mingrone // only called once 5816f9cba8fSJoseph Mingrone // Returns: 5826f9cba8fSJoseph Mingrone // 5836f9cba8fSJoseph Mingrone // 1 on success; 5846f9cba8fSJoseph Mingrone // 5856f9cba8fSJoseph Mingrone // 0 if "the EAL cannot initialize on this system", which we treat as 5866f9cba8fSJoseph Mingrone // meaning "DPDK isn't available"; 5876f9cba8fSJoseph Mingrone // 5886f9cba8fSJoseph Mingrone // a PCAP_ERROR_ code for other errors. 5896f9cba8fSJoseph Mingrone // 5906f9cba8fSJoseph Mingrone // If eaccess_not_fatal is non-zero, treat "a permissions issue" the way 5916f9cba8fSJoseph Mingrone // we treat "the EAL cannot initialize on this system". We use that 5926f9cba8fSJoseph Mingrone // when trying to find DPDK devices, as we don't want to fail to return 5936f9cba8fSJoseph Mingrone // *any* devices just because we can't support DPDK; when we're trying 5946f9cba8fSJoseph Mingrone // to open a device, we need to return a permissions error in that case. 5956f9cba8fSJoseph Mingrone static int dpdk_pre_init(char * ebuf, int eaccess_not_fatal) 5966f9cba8fSJoseph Mingrone { 5976f9cba8fSJoseph Mingrone int dargv_cnt=0; 5986f9cba8fSJoseph Mingrone char *dargv[DPDK_ARGC_MAX]; 5996f9cba8fSJoseph Mingrone char *ptr_dpdk_cfg = NULL; 6006f9cba8fSJoseph Mingrone int ret; 601*afdbf109SJoseph Mingrone // global var 6026f9cba8fSJoseph Mingrone if (is_dpdk_pre_inited != 0) 6036f9cba8fSJoseph Mingrone { 6046f9cba8fSJoseph Mingrone // already inited; did that succeed? 6056f9cba8fSJoseph Mingrone if (is_dpdk_pre_inited < 0) 6066f9cba8fSJoseph Mingrone { 6076f9cba8fSJoseph Mingrone // failed 6086f9cba8fSJoseph Mingrone goto error; 6096f9cba8fSJoseph Mingrone } 6106f9cba8fSJoseph Mingrone else 6116f9cba8fSJoseph Mingrone { 6126f9cba8fSJoseph Mingrone // succeeded 6136f9cba8fSJoseph Mingrone return 1; 6146f9cba8fSJoseph Mingrone } 6156f9cba8fSJoseph Mingrone } 6166f9cba8fSJoseph Mingrone // init EAL 6176f9cba8fSJoseph Mingrone ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME); 6186f9cba8fSJoseph Mingrone // set default log level to debug 6196f9cba8fSJoseph Mingrone rte_log_set_global_level(DPDK_DEF_LOG_LEV); 6206f9cba8fSJoseph Mingrone if (ptr_dpdk_cfg == NULL) 6216f9cba8fSJoseph Mingrone { 6226f9cba8fSJoseph Mingrone RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG); 6236f9cba8fSJoseph Mingrone ptr_dpdk_cfg = DPDK_DEF_CFG; 6246f9cba8fSJoseph Mingrone } 6256f9cba8fSJoseph Mingrone memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf)); 6266f9cba8fSJoseph Mingrone snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg); 6276f9cba8fSJoseph Mingrone dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv); 6286f9cba8fSJoseph Mingrone ret = rte_eal_init(dargv_cnt,dargv); 6296f9cba8fSJoseph Mingrone if (ret == -1) 6306f9cba8fSJoseph Mingrone { 6316f9cba8fSJoseph Mingrone // Indicate that we've called rte_eal_init() by setting 6326f9cba8fSJoseph Mingrone // is_dpdk_pre_inited to the negative of the error code, 6336f9cba8fSJoseph Mingrone // and process the error. 6346f9cba8fSJoseph Mingrone is_dpdk_pre_inited = -rte_errno; 6356f9cba8fSJoseph Mingrone goto error; 6366f9cba8fSJoseph Mingrone } 6376f9cba8fSJoseph Mingrone // init succeeded, so we do not need to do it again later. 6386f9cba8fSJoseph Mingrone is_dpdk_pre_inited = 1; 6396f9cba8fSJoseph Mingrone return 1; 6406f9cba8fSJoseph Mingrone 6416f9cba8fSJoseph Mingrone error: 6426f9cba8fSJoseph Mingrone switch (-is_dpdk_pre_inited) 6436f9cba8fSJoseph Mingrone { 6446f9cba8fSJoseph Mingrone case EACCES: 6456f9cba8fSJoseph Mingrone // This "indicates a permissions issue.". 6466f9cba8fSJoseph Mingrone RTE_LOG(ERR, USER1, "%s\n", DPDK_ERR_PERM_MSG); 6476f9cba8fSJoseph Mingrone // If we were told to treat this as just meaning 6486f9cba8fSJoseph Mingrone // DPDK isn't available, do so. 6496f9cba8fSJoseph Mingrone if (eaccess_not_fatal) 6506f9cba8fSJoseph Mingrone return 0; 6516f9cba8fSJoseph Mingrone // Otherwise report a fatal error. 6526f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 6536f9cba8fSJoseph Mingrone "DPDK requires that it run as root"); 6546f9cba8fSJoseph Mingrone return PCAP_ERROR_PERM_DENIED; 6556f9cba8fSJoseph Mingrone 6566f9cba8fSJoseph Mingrone case EAGAIN: 6576f9cba8fSJoseph Mingrone // This "indicates either a bus or system 6586f9cba8fSJoseph Mingrone // resource was not available, setup may 6596f9cba8fSJoseph Mingrone // be attempted again." 6606f9cba8fSJoseph Mingrone // There's no such error in pcap, so I'm 6616f9cba8fSJoseph Mingrone // not sure what we should do here. 6626f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 6636f9cba8fSJoseph Mingrone "Bus or system resource was not available"); 6646f9cba8fSJoseph Mingrone break; 6656f9cba8fSJoseph Mingrone 6666f9cba8fSJoseph Mingrone case EALREADY: 6676f9cba8fSJoseph Mingrone // This "indicates that the rte_eal_init 6686f9cba8fSJoseph Mingrone // function has already been called, and 6696f9cba8fSJoseph Mingrone // cannot be called again." 6706f9cba8fSJoseph Mingrone // That's not an error; set the "we've 6716f9cba8fSJoseph Mingrone // been here before" flag and return 6726f9cba8fSJoseph Mingrone // success. 6736f9cba8fSJoseph Mingrone is_dpdk_pre_inited = 1; 6746f9cba8fSJoseph Mingrone return 1; 6756f9cba8fSJoseph Mingrone 6766f9cba8fSJoseph Mingrone case EFAULT: 6776f9cba8fSJoseph Mingrone // This "indicates the tailq configuration 6786f9cba8fSJoseph Mingrone // name was not found in memory configuration." 6796f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 6806f9cba8fSJoseph Mingrone "The tailq configuration name was not found in the memory configuration"); 6816f9cba8fSJoseph Mingrone return PCAP_ERROR; 6826f9cba8fSJoseph Mingrone 6836f9cba8fSJoseph Mingrone case EINVAL: 6846f9cba8fSJoseph Mingrone // This "indicates invalid parameters were 6856f9cba8fSJoseph Mingrone // passed as argv/argc." Those came from 6866f9cba8fSJoseph Mingrone // the configuration file. 6876f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 6886f9cba8fSJoseph Mingrone "The configuration file has invalid parameters"); 6896f9cba8fSJoseph Mingrone break; 6906f9cba8fSJoseph Mingrone 6916f9cba8fSJoseph Mingrone case ENOMEM: 6926f9cba8fSJoseph Mingrone // This "indicates failure likely caused by 6936f9cba8fSJoseph Mingrone // an out-of-memory condition." 6946f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 6956f9cba8fSJoseph Mingrone "Out of memory"); 6966f9cba8fSJoseph Mingrone break; 6976f9cba8fSJoseph Mingrone 6986f9cba8fSJoseph Mingrone case ENODEV: 6996f9cba8fSJoseph Mingrone // This "indicates memory setup issues." 7006f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 7016f9cba8fSJoseph Mingrone "An error occurred setting up memory"); 7026f9cba8fSJoseph Mingrone break; 7036f9cba8fSJoseph Mingrone 7046f9cba8fSJoseph Mingrone case ENOTSUP: 7056f9cba8fSJoseph Mingrone // This "indicates that the EAL cannot 7066f9cba8fSJoseph Mingrone // initialize on this system." We treat 7076f9cba8fSJoseph Mingrone // that as meaning DPDK isn't available 7086f9cba8fSJoseph Mingrone // on this machine, rather than as a 7096f9cba8fSJoseph Mingrone // fatal error, and let our caller decide 7106f9cba8fSJoseph Mingrone // whether that's a fatal error (if trying 7116f9cba8fSJoseph Mingrone // to activate a DPDK device) or not (if 7126f9cba8fSJoseph Mingrone // trying to enumerate devices). 7136f9cba8fSJoseph Mingrone return 0; 7146f9cba8fSJoseph Mingrone 7156f9cba8fSJoseph Mingrone case EPROTO: 7166f9cba8fSJoseph Mingrone // This "indicates that the PCI bus is 7176f9cba8fSJoseph Mingrone // either not present, or is not readable 7186f9cba8fSJoseph Mingrone // by the eal." Does "the PCI bus is not 7196f9cba8fSJoseph Mingrone // present" mean "this machine has no PCI 7206f9cba8fSJoseph Mingrone // bus", which strikes me as a "not available" 7216f9cba8fSJoseph Mingrone // case? If so, should "is not readable by 7226f9cba8fSJoseph Mingrone // the EAL" also something we should treat 7236f9cba8fSJoseph Mingrone // as a "not available" case? If not, we 7246f9cba8fSJoseph Mingrone // can't distinguish between the two, so 7256f9cba8fSJoseph Mingrone // we're stuck. 7266f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 7276f9cba8fSJoseph Mingrone "PCI bus is not present or not readable by the EAL"); 7286f9cba8fSJoseph Mingrone break; 7296f9cba8fSJoseph Mingrone 7306f9cba8fSJoseph Mingrone case ENOEXEC: 7316f9cba8fSJoseph Mingrone // This "indicates that a service core 7326f9cba8fSJoseph Mingrone // failed to launch successfully." 7336f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 7346f9cba8fSJoseph Mingrone "A service core failed to launch successfully"); 7356f9cba8fSJoseph Mingrone break; 7366f9cba8fSJoseph Mingrone 7376f9cba8fSJoseph Mingrone default: 7386f9cba8fSJoseph Mingrone // 7396f9cba8fSJoseph Mingrone // That's not in the list of errors in 7406f9cba8fSJoseph Mingrone // the documentation; let it be reported 7416f9cba8fSJoseph Mingrone // as an error. 7426f9cba8fSJoseph Mingrone // 7436f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(ebuf, 7446f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -is_dpdk_pre_inited, 7456f9cba8fSJoseph Mingrone "dpdk error: dpdk_pre_init failed"); 7466f9cba8fSJoseph Mingrone break; 7476f9cba8fSJoseph Mingrone } 7486f9cba8fSJoseph Mingrone // Error. 7496f9cba8fSJoseph Mingrone return PCAP_ERROR; 7506f9cba8fSJoseph Mingrone } 7516f9cba8fSJoseph Mingrone 7526f9cba8fSJoseph Mingrone static int pcap_dpdk_activate(pcap_t *p) 7536f9cba8fSJoseph Mingrone { 7546f9cba8fSJoseph Mingrone struct pcap_dpdk *pd = p->priv; 7556f9cba8fSJoseph Mingrone pd->orig = p; 7566f9cba8fSJoseph Mingrone int ret = PCAP_ERROR; 7576f9cba8fSJoseph Mingrone uint16_t nb_ports=0; 7586f9cba8fSJoseph Mingrone uint16_t portid= DPDK_PORTID_MAX; 7596f9cba8fSJoseph Mingrone unsigned nb_mbufs = DPDK_NB_MBUFS; 7606f9cba8fSJoseph Mingrone struct rte_eth_rxconf rxq_conf; 7616f9cba8fSJoseph Mingrone struct rte_eth_txconf txq_conf; 7626f9cba8fSJoseph Mingrone struct rte_eth_conf local_port_conf = port_conf; 7636f9cba8fSJoseph Mingrone struct rte_eth_dev_info dev_info; 7646f9cba8fSJoseph Mingrone int is_port_up = 0; 7656f9cba8fSJoseph Mingrone struct rte_eth_link link; 7666f9cba8fSJoseph Mingrone do{ 7676f9cba8fSJoseph Mingrone //init EAL; fail if we have insufficient permission 7686f9cba8fSJoseph Mingrone char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE]; 7696f9cba8fSJoseph Mingrone ret = dpdk_pre_init(dpdk_pre_init_errbuf, 0); 7706f9cba8fSJoseph Mingrone if (ret < 0) 7716f9cba8fSJoseph Mingrone { 7726f9cba8fSJoseph Mingrone // This returns a negative value on an error. 7736f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 7746f9cba8fSJoseph Mingrone "Can't open device %s: %s", 7756f9cba8fSJoseph Mingrone p->opt.device, dpdk_pre_init_errbuf); 7766f9cba8fSJoseph Mingrone // ret is set to the correct error 7776f9cba8fSJoseph Mingrone break; 7786f9cba8fSJoseph Mingrone } 7796f9cba8fSJoseph Mingrone if (ret == 0) 7806f9cba8fSJoseph Mingrone { 7816f9cba8fSJoseph Mingrone // This means DPDK isn't available on this machine. 7826f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 7836f9cba8fSJoseph Mingrone "Can't open device %s: DPDK is not available on this machine", 7846f9cba8fSJoseph Mingrone p->opt.device); 7856f9cba8fSJoseph Mingrone return PCAP_ERROR_NO_SUCH_DEVICE; 7866f9cba8fSJoseph Mingrone } 7876f9cba8fSJoseph Mingrone 7886f9cba8fSJoseph Mingrone ret = dpdk_init_timer(pd); 7896f9cba8fSJoseph Mingrone if (ret<0) 7906f9cba8fSJoseph Mingrone { 7916f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 7926f9cba8fSJoseph Mingrone "dpdk error: Init timer is zero with device %s", 7936f9cba8fSJoseph Mingrone p->opt.device); 7946f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 7956f9cba8fSJoseph Mingrone break; 7966f9cba8fSJoseph Mingrone } 7976f9cba8fSJoseph Mingrone 7986f9cba8fSJoseph Mingrone nb_ports = rte_eth_dev_count_avail(); 7996f9cba8fSJoseph Mingrone if (nb_ports == 0) 8006f9cba8fSJoseph Mingrone { 8016f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 8026f9cba8fSJoseph Mingrone "dpdk error: No Ethernet ports"); 8036f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 8046f9cba8fSJoseph Mingrone break; 8056f9cba8fSJoseph Mingrone } 8066f9cba8fSJoseph Mingrone 8076f9cba8fSJoseph Mingrone portid = portid_by_device(p->opt.device); 8086f9cba8fSJoseph Mingrone if (portid == DPDK_PORTID_MAX){ 8096f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 8106f9cba8fSJoseph Mingrone "dpdk error: portid is invalid. device %s", 8116f9cba8fSJoseph Mingrone p->opt.device); 8126f9cba8fSJoseph Mingrone ret = PCAP_ERROR_NO_SUCH_DEVICE; 8136f9cba8fSJoseph Mingrone break; 8146f9cba8fSJoseph Mingrone } 8156f9cba8fSJoseph Mingrone 8166f9cba8fSJoseph Mingrone pd->portid = portid; 8176f9cba8fSJoseph Mingrone 8186f9cba8fSJoseph Mingrone if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN) 8196f9cba8fSJoseph Mingrone { 8206f9cba8fSJoseph Mingrone p->snapshot = MAXIMUM_SNAPLEN; 8216f9cba8fSJoseph Mingrone } 8226f9cba8fSJoseph Mingrone // create the mbuf pool 8236f9cba8fSJoseph Mingrone pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs, 8246f9cba8fSJoseph Mingrone MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, 8256f9cba8fSJoseph Mingrone rte_socket_id()); 8266f9cba8fSJoseph Mingrone if (pd->pktmbuf_pool == NULL) 8276f9cba8fSJoseph Mingrone { 8286f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 8296f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, rte_errno, 8306f9cba8fSJoseph Mingrone "dpdk error: Cannot init mbuf pool"); 8316f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 8326f9cba8fSJoseph Mingrone break; 8336f9cba8fSJoseph Mingrone } 8346f9cba8fSJoseph Mingrone // config dev 8356f9cba8fSJoseph Mingrone rte_eth_dev_info_get(portid, &dev_info); 8366f9cba8fSJoseph Mingrone if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) 8376f9cba8fSJoseph Mingrone { 8386f9cba8fSJoseph Mingrone local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE; 8396f9cba8fSJoseph Mingrone } 8406f9cba8fSJoseph Mingrone // only support 1 queue 8416f9cba8fSJoseph Mingrone ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf); 8426f9cba8fSJoseph Mingrone if (ret < 0) 8436f9cba8fSJoseph Mingrone { 8446f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 8456f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 8466f9cba8fSJoseph Mingrone "dpdk error: Cannot configure device: port=%u", 8476f9cba8fSJoseph Mingrone portid); 8486f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 8496f9cba8fSJoseph Mingrone break; 8506f9cba8fSJoseph Mingrone } 8516f9cba8fSJoseph Mingrone // adjust rx tx 8526f9cba8fSJoseph Mingrone ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd); 8536f9cba8fSJoseph Mingrone if (ret < 0) 8546f9cba8fSJoseph Mingrone { 8556f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 8566f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 8576f9cba8fSJoseph Mingrone "dpdk error: Cannot adjust number of descriptors: port=%u", 8586f9cba8fSJoseph Mingrone portid); 8596f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 8606f9cba8fSJoseph Mingrone break; 8616f9cba8fSJoseph Mingrone } 8626f9cba8fSJoseph Mingrone // get MAC addr 8636f9cba8fSJoseph Mingrone rte_eth_macaddr_get(portid, &(pd->eth_addr)); 8646f9cba8fSJoseph Mingrone eth_addr_str(&(pd->eth_addr), pd->mac_addr, DPDK_MAC_ADDR_SIZE-1); 8656f9cba8fSJoseph Mingrone 8666f9cba8fSJoseph Mingrone // init one RX queue 8676f9cba8fSJoseph Mingrone rxq_conf = dev_info.default_rxconf; 8686f9cba8fSJoseph Mingrone rxq_conf.offloads = local_port_conf.rxmode.offloads; 8696f9cba8fSJoseph Mingrone ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd, 8706f9cba8fSJoseph Mingrone rte_eth_dev_socket_id(portid), 8716f9cba8fSJoseph Mingrone &rxq_conf, 8726f9cba8fSJoseph Mingrone pd->pktmbuf_pool); 8736f9cba8fSJoseph Mingrone if (ret < 0) 8746f9cba8fSJoseph Mingrone { 8756f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 8766f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 8776f9cba8fSJoseph Mingrone "dpdk error: rte_eth_rx_queue_setup:port=%u", 8786f9cba8fSJoseph Mingrone portid); 8796f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 8806f9cba8fSJoseph Mingrone break; 8816f9cba8fSJoseph Mingrone } 8826f9cba8fSJoseph Mingrone 8836f9cba8fSJoseph Mingrone // init one TX queue 8846f9cba8fSJoseph Mingrone txq_conf = dev_info.default_txconf; 8856f9cba8fSJoseph Mingrone txq_conf.offloads = local_port_conf.txmode.offloads; 8866f9cba8fSJoseph Mingrone ret = rte_eth_tx_queue_setup(portid, 0, nb_txd, 8876f9cba8fSJoseph Mingrone rte_eth_dev_socket_id(portid), 8886f9cba8fSJoseph Mingrone &txq_conf); 8896f9cba8fSJoseph Mingrone if (ret < 0) 8906f9cba8fSJoseph Mingrone { 8916f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 8926f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 8936f9cba8fSJoseph Mingrone "dpdk error: rte_eth_tx_queue_setup:port=%u", 8946f9cba8fSJoseph Mingrone portid); 8956f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 8966f9cba8fSJoseph Mingrone break; 8976f9cba8fSJoseph Mingrone } 8986f9cba8fSJoseph Mingrone // Initialize TX buffers 8996f9cba8fSJoseph Mingrone tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME, 9006f9cba8fSJoseph Mingrone RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, 9016f9cba8fSJoseph Mingrone rte_eth_dev_socket_id(portid)); 9026f9cba8fSJoseph Mingrone if (tx_buffer == NULL) 9036f9cba8fSJoseph Mingrone { 9046f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 9056f9cba8fSJoseph Mingrone "dpdk error: Cannot allocate buffer for tx on port %u", portid); 9066f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 9076f9cba8fSJoseph Mingrone break; 9086f9cba8fSJoseph Mingrone } 9096f9cba8fSJoseph Mingrone rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST); 9106f9cba8fSJoseph Mingrone // Start device 9116f9cba8fSJoseph Mingrone ret = rte_eth_dev_start(portid); 9126f9cba8fSJoseph Mingrone if (ret < 0) 9136f9cba8fSJoseph Mingrone { 9146f9cba8fSJoseph Mingrone dpdk_fmt_errmsg_for_rte_errno(p->errbuf, 9156f9cba8fSJoseph Mingrone PCAP_ERRBUF_SIZE, -ret, 9166f9cba8fSJoseph Mingrone "dpdk error: rte_eth_dev_start:port=%u", 9176f9cba8fSJoseph Mingrone portid); 9186f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 9196f9cba8fSJoseph Mingrone break; 9206f9cba8fSJoseph Mingrone } 9216f9cba8fSJoseph Mingrone // set promiscuous mode 9226f9cba8fSJoseph Mingrone if (p->opt.promisc){ 9236f9cba8fSJoseph Mingrone pd->must_clear_promisc=1; 9246f9cba8fSJoseph Mingrone rte_eth_promiscuous_enable(portid); 9256f9cba8fSJoseph Mingrone } 9266f9cba8fSJoseph Mingrone // check link status 9276f9cba8fSJoseph Mingrone is_port_up = check_link_status(portid, &link); 9286f9cba8fSJoseph Mingrone if (!is_port_up){ 9296f9cba8fSJoseph Mingrone snprintf(p->errbuf, PCAP_ERRBUF_SIZE, 9306f9cba8fSJoseph Mingrone "dpdk error: link is down, port=%u",portid); 9316f9cba8fSJoseph Mingrone ret = PCAP_ERROR_IFACE_NOT_UP; 9326f9cba8fSJoseph Mingrone break; 9336f9cba8fSJoseph Mingrone } 9346f9cba8fSJoseph Mingrone // reset statistics 9356f9cba8fSJoseph Mingrone rte_eth_stats_reset(pd->portid); 9366f9cba8fSJoseph Mingrone calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts)); 9376f9cba8fSJoseph Mingrone rte_eth_stats_get(pd->portid,&(pd->prev_stats)); 9386f9cba8fSJoseph Mingrone // format pcap_t 9396f9cba8fSJoseph Mingrone pd->portid = portid; 9406f9cba8fSJoseph Mingrone p->fd = pd->portid; 9416f9cba8fSJoseph Mingrone if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN) 9426f9cba8fSJoseph Mingrone { 9436f9cba8fSJoseph Mingrone p->snapshot = MAXIMUM_SNAPLEN; 9446f9cba8fSJoseph Mingrone } 9456f9cba8fSJoseph Mingrone p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical. 9466f9cba8fSJoseph Mingrone p->selectable_fd = p->fd; 9476f9cba8fSJoseph Mingrone p->read_op = pcap_dpdk_dispatch; 9486f9cba8fSJoseph Mingrone p->inject_op = pcap_dpdk_inject; 949*afdbf109SJoseph Mingrone // using pcapint_filter currently, though DPDK provides their own BPF function. Because DPDK BPF needs load a ELF file as a filter. 950*afdbf109SJoseph Mingrone p->setfilter_op = pcapint_install_bpf_program; 9516f9cba8fSJoseph Mingrone p->setdirection_op = NULL; 9526f9cba8fSJoseph Mingrone p->set_datalink_op = NULL; 9536f9cba8fSJoseph Mingrone p->getnonblock_op = pcap_dpdk_getnonblock; 9546f9cba8fSJoseph Mingrone p->setnonblock_op = pcap_dpdk_setnonblock; 9556f9cba8fSJoseph Mingrone p->stats_op = pcap_dpdk_stats; 9566f9cba8fSJoseph Mingrone p->cleanup_op = pcap_dpdk_close; 957*afdbf109SJoseph Mingrone p->breakloop_op = pcapint_breakloop_common; 9586f9cba8fSJoseph Mingrone // set default timeout 9596f9cba8fSJoseph Mingrone pd->required_select_timeout.tv_sec = 0; 9606f9cba8fSJoseph Mingrone pd->required_select_timeout.tv_usec = DPDK_DEF_MIN_SLEEP_MS*1000; 9616f9cba8fSJoseph Mingrone p->required_select_timeout = &pd->required_select_timeout; 9626f9cba8fSJoseph Mingrone ret = 0; // OK 9636f9cba8fSJoseph Mingrone }while(0); 9646f9cba8fSJoseph Mingrone 9656f9cba8fSJoseph Mingrone if (ret <= PCAP_ERROR) // all kinds of error code 9666f9cba8fSJoseph Mingrone { 967*afdbf109SJoseph Mingrone pcapint_cleanup_live_common(p); 9686f9cba8fSJoseph Mingrone }else{ 9696f9cba8fSJoseph Mingrone rte_eth_dev_get_name_by_port(portid,pd->pci_addr); 9706f9cba8fSJoseph Mingrone RTE_LOG(INFO, USER1,"Port %d device: %s, MAC:%s, PCI:%s\n", portid, p->opt.device, pd->mac_addr, pd->pci_addr); 9716f9cba8fSJoseph Mingrone RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n", 9726f9cba8fSJoseph Mingrone portid, link.link_speed, 9736f9cba8fSJoseph Mingrone (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 9746f9cba8fSJoseph Mingrone ("full-duplex") : ("half-duplex\n")); 9756f9cba8fSJoseph Mingrone } 9766f9cba8fSJoseph Mingrone return ret; 9776f9cba8fSJoseph Mingrone } 9786f9cba8fSJoseph Mingrone 9796f9cba8fSJoseph Mingrone // device name for dpdk should be in the form as dpdk:number, such as dpdk:0 9806f9cba8fSJoseph Mingrone pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours) 9816f9cba8fSJoseph Mingrone { 9826f9cba8fSJoseph Mingrone pcap_t *p=NULL; 9836f9cba8fSJoseph Mingrone *is_ours = 0; 9846f9cba8fSJoseph Mingrone 9856f9cba8fSJoseph Mingrone *is_ours = !strncmp(device, "dpdk:", 5); 9866f9cba8fSJoseph Mingrone if (! *is_ours) 9876f9cba8fSJoseph Mingrone return NULL; 9886f9cba8fSJoseph Mingrone //memset will happen 9896f9cba8fSJoseph Mingrone p = PCAP_CREATE_COMMON(ebuf, struct pcap_dpdk); 9906f9cba8fSJoseph Mingrone 9916f9cba8fSJoseph Mingrone if (p == NULL) 9926f9cba8fSJoseph Mingrone return NULL; 9936f9cba8fSJoseph Mingrone p->activate_op = pcap_dpdk_activate; 9946f9cba8fSJoseph Mingrone return p; 9956f9cba8fSJoseph Mingrone } 9966f9cba8fSJoseph Mingrone 9976f9cba8fSJoseph Mingrone int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp, char *ebuf) 9986f9cba8fSJoseph Mingrone { 9996f9cba8fSJoseph Mingrone int ret=0; 10006f9cba8fSJoseph Mingrone unsigned int nb_ports = 0; 10016f9cba8fSJoseph Mingrone char dpdk_name[DPDK_DEV_NAME_MAX]; 10026f9cba8fSJoseph Mingrone char dpdk_desc[DPDK_DEV_DESC_MAX]; 10036f9cba8fSJoseph Mingrone ETHER_ADDR_TYPE eth_addr; 10046f9cba8fSJoseph Mingrone char mac_addr[DPDK_MAC_ADDR_SIZE]; 10056f9cba8fSJoseph Mingrone char pci_addr[DPDK_PCI_ADDR_SIZE]; 10066f9cba8fSJoseph Mingrone do{ 10076f9cba8fSJoseph Mingrone // init EAL; return "DPDK not available" if we 10086f9cba8fSJoseph Mingrone // have insufficient permission 10096f9cba8fSJoseph Mingrone char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE]; 10106f9cba8fSJoseph Mingrone ret = dpdk_pre_init(dpdk_pre_init_errbuf, 1); 10116f9cba8fSJoseph Mingrone if (ret < 0) 10126f9cba8fSJoseph Mingrone { 10136f9cba8fSJoseph Mingrone // This returns a negative value on an error. 10146f9cba8fSJoseph Mingrone snprintf(ebuf, PCAP_ERRBUF_SIZE, 10156f9cba8fSJoseph Mingrone "Can't look for DPDK devices: %s", 10166f9cba8fSJoseph Mingrone dpdk_pre_init_errbuf); 10176f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 10186f9cba8fSJoseph Mingrone break; 10196f9cba8fSJoseph Mingrone } 10206f9cba8fSJoseph Mingrone if (ret == 0) 10216f9cba8fSJoseph Mingrone { 10226f9cba8fSJoseph Mingrone // This means DPDK isn't available on this machine. 10236f9cba8fSJoseph Mingrone // That just means "don't return any devices". 10246f9cba8fSJoseph Mingrone break; 10256f9cba8fSJoseph Mingrone } 10266f9cba8fSJoseph Mingrone nb_ports = rte_eth_dev_count_avail(); 10276f9cba8fSJoseph Mingrone if (nb_ports == 0) 10286f9cba8fSJoseph Mingrone { 10296f9cba8fSJoseph Mingrone // That just means "don't return any devices". 10306f9cba8fSJoseph Mingrone ret = 0; 10316f9cba8fSJoseph Mingrone break; 10326f9cba8fSJoseph Mingrone } 10336f9cba8fSJoseph Mingrone for (unsigned int i=0; i<nb_ports; i++){ 10346f9cba8fSJoseph Mingrone snprintf(dpdk_name, DPDK_DEV_NAME_MAX-1, 10356f9cba8fSJoseph Mingrone "%s%u", DPDK_PREFIX, i); 10366f9cba8fSJoseph Mingrone // mac addr 10376f9cba8fSJoseph Mingrone rte_eth_macaddr_get(i, ð_addr); 10386f9cba8fSJoseph Mingrone eth_addr_str(ð_addr,mac_addr,DPDK_MAC_ADDR_SIZE); 10396f9cba8fSJoseph Mingrone // PCI addr 10406f9cba8fSJoseph Mingrone rte_eth_dev_get_name_by_port(i,pci_addr); 10416f9cba8fSJoseph Mingrone snprintf(dpdk_desc,DPDK_DEV_DESC_MAX-1,"%s %s, MAC:%s, PCI:%s", DPDK_DESC, dpdk_name, mac_addr, pci_addr); 1042*afdbf109SJoseph Mingrone if (pcapint_add_dev(devlistp, dpdk_name, 0, dpdk_desc, ebuf)==NULL){ 10436f9cba8fSJoseph Mingrone ret = PCAP_ERROR; 10446f9cba8fSJoseph Mingrone break; 10456f9cba8fSJoseph Mingrone } 10466f9cba8fSJoseph Mingrone } 10476f9cba8fSJoseph Mingrone }while(0); 10486f9cba8fSJoseph Mingrone return ret; 10496f9cba8fSJoseph Mingrone } 10506f9cba8fSJoseph Mingrone 10516f9cba8fSJoseph Mingrone #ifdef DPDK_ONLY 10526f9cba8fSJoseph Mingrone /* 10536f9cba8fSJoseph Mingrone * This libpcap build supports only DPDK, not regular network interfaces. 10546f9cba8fSJoseph Mingrone */ 10556f9cba8fSJoseph Mingrone 10566f9cba8fSJoseph Mingrone /* 10576f9cba8fSJoseph Mingrone * There are no regular interfaces, just DPDK interfaces. 10586f9cba8fSJoseph Mingrone */ 10596f9cba8fSJoseph Mingrone int 1060*afdbf109SJoseph Mingrone pcapint_platform_finddevs(pcap_if_list_t *devlistp _U_, char *errbuf) 10616f9cba8fSJoseph Mingrone { 10626f9cba8fSJoseph Mingrone return (0); 10636f9cba8fSJoseph Mingrone } 10646f9cba8fSJoseph Mingrone 10656f9cba8fSJoseph Mingrone /* 10666f9cba8fSJoseph Mingrone * Attempts to open a regular interface fail. 10676f9cba8fSJoseph Mingrone */ 10686f9cba8fSJoseph Mingrone pcap_t * 1069*afdbf109SJoseph Mingrone pcapint_create_interface(const char *device, char *errbuf) 10706f9cba8fSJoseph Mingrone { 10716f9cba8fSJoseph Mingrone snprintf(errbuf, PCAP_ERRBUF_SIZE, 10726f9cba8fSJoseph Mingrone "This version of libpcap only supports DPDK"); 10736f9cba8fSJoseph Mingrone return NULL; 10746f9cba8fSJoseph Mingrone } 10756f9cba8fSJoseph Mingrone 10766f9cba8fSJoseph Mingrone /* 10776f9cba8fSJoseph Mingrone * Libpcap version string. 10786f9cba8fSJoseph Mingrone */ 10796f9cba8fSJoseph Mingrone const char * 10806f9cba8fSJoseph Mingrone pcap_lib_version(void) 10816f9cba8fSJoseph Mingrone { 10826f9cba8fSJoseph Mingrone return (PCAP_VERSION_STRING " (DPDK-only)"); 10836f9cba8fSJoseph Mingrone } 10846f9cba8fSJoseph Mingrone #endif 1085