xref: /freebsd-src/contrib/libpcap/pcap-dpdk.c (revision 6f9cba8f8b5efd16249633e52483ea351876b67b)
1*6f9cba8fSJoseph Mingrone /*
2*6f9cba8fSJoseph Mingrone  * Copyright (C) 2018 jingle YANG. All rights reserved.
3*6f9cba8fSJoseph Mingrone  *
4*6f9cba8fSJoseph Mingrone  * Redistribution and use in source and binary forms, with or without
5*6f9cba8fSJoseph Mingrone  * modification, are permitted provided that the following conditions
6*6f9cba8fSJoseph Mingrone  * are met:
7*6f9cba8fSJoseph Mingrone  *
8*6f9cba8fSJoseph Mingrone  *   1. Redistributions of source code must retain the above copyright
9*6f9cba8fSJoseph Mingrone  *      notice, this list of conditions and the following disclaimer.
10*6f9cba8fSJoseph Mingrone  *   2. Redistributions in binary form must reproduce the above copyright
11*6f9cba8fSJoseph Mingrone  *      notice, this list of conditions and the following disclaimer in the
12*6f9cba8fSJoseph Mingrone  *      documentation and/or other materials provided with the distribution.
13*6f9cba8fSJoseph Mingrone  *
14*6f9cba8fSJoseph Mingrone  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
15*6f9cba8fSJoseph Mingrone  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*6f9cba8fSJoseph Mingrone  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*6f9cba8fSJoseph Mingrone  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18*6f9cba8fSJoseph Mingrone  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19*6f9cba8fSJoseph Mingrone  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20*6f9cba8fSJoseph Mingrone  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21*6f9cba8fSJoseph Mingrone  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22*6f9cba8fSJoseph Mingrone  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23*6f9cba8fSJoseph Mingrone  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24*6f9cba8fSJoseph Mingrone  * SUCH DAMAGE.
25*6f9cba8fSJoseph Mingrone  */
26*6f9cba8fSJoseph Mingrone 
27*6f9cba8fSJoseph Mingrone /*
28*6f9cba8fSJoseph Mingrone Date: Dec 16, 2018
29*6f9cba8fSJoseph Mingrone 
30*6f9cba8fSJoseph Mingrone Description:
31*6f9cba8fSJoseph Mingrone 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
32*6f9cba8fSJoseph Mingrone 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/)
33*6f9cba8fSJoseph Mingrone 3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11.
34*6f9cba8fSJoseph Mingrone 
35*6f9cba8fSJoseph Mingrone Limitations:
36*6f9cba8fSJoseph Mingrone 1. DPDK support will be on if DPDK is available. Please set DIR for --with-dpdk[=DIR] with ./configure or -DDPDK_DIR[=DIR] with cmake if DPDK is installed manually.
37*6f9cba8fSJoseph Mingrone 2. Only support link libdpdk.so dynamically, because the libdpdk.a will not work correctly.
38*6f9cba8fSJoseph Mingrone 3. Only support read operation, and packet injection has not been supported yet.
39*6f9cba8fSJoseph Mingrone 
40*6f9cba8fSJoseph Mingrone Usage:
41*6f9cba8fSJoseph Mingrone 1. Compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
42*6f9cba8fSJoseph Mingrone 
43*6f9cba8fSJoseph Mingrone You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
44*6f9cba8fSJoseph Mingrone CONFIG_RTE_BUILD_SHARED_LIB=y
45*6f9cba8fSJoseph Mingrone By the following command:
46*6f9cba8fSJoseph Mingrone sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config
47*6f9cba8fSJoseph Mingrone 
48*6f9cba8fSJoseph Mingrone 2. Launch l2fwd that is one of DPDK examples correctly, and get device information.
49*6f9cba8fSJoseph Mingrone 
50*6f9cba8fSJoseph Mingrone You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio.
51*6f9cba8fSJoseph Mingrone And enable hugepages by dpdk-setup.sh
52*6f9cba8fSJoseph Mingrone 
53*6f9cba8fSJoseph Mingrone Then launch the l2fwd with dynamic driver support. For example:
54*6f9cba8fSJoseph Mingrone $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1
55*6f9cba8fSJoseph Mingrone 
56*6f9cba8fSJoseph Mingrone 3. Compile libpcap with dpdk options.
57*6f9cba8fSJoseph Mingrone 
58*6f9cba8fSJoseph Mingrone If DPDK has not been found automatically, you shall export DPDK environment variable which are used for compiling DPDK. And then pass $RTE_SDK/$RTE_TARGET to --with-dpdk or -DDPDK_DIR
59*6f9cba8fSJoseph Mingrone 
60*6f9cba8fSJoseph Mingrone export RTE_SDK={your DPDK base directory}
61*6f9cba8fSJoseph Mingrone export RTE_TARGET={your target name}
62*6f9cba8fSJoseph Mingrone 
63*6f9cba8fSJoseph Mingrone 3.1 With configure
64*6f9cba8fSJoseph Mingrone 
65*6f9cba8fSJoseph Mingrone ./configure --with-dpdk=$RTE_SDK/$RTE_TARGET && make -s all && make -s testprogs && make install
66*6f9cba8fSJoseph Mingrone 
67*6f9cba8fSJoseph Mingrone 3.2 With cmake
68*6f9cba8fSJoseph Mingrone 
69*6f9cba8fSJoseph Mingrone mkdir -p build && cd build && cmake -DDPDK_DIR=$RTE_SDK/$RTE_TARGET ../ && make -s all && make -s testprogs && make install
70*6f9cba8fSJoseph Mingrone 
71*6f9cba8fSJoseph Mingrone 4. Link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
72*6f9cba8fSJoseph Mingrone And you shall set DPDK configure options by environment variable DPDK_CFG
73*6f9cba8fSJoseph Mingrone For example, the testprogs/capturetest could be lanched by:
74*6f9cba8fSJoseph Mingrone 
75*6f9cba8fSJoseph Mingrone env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0
76*6f9cba8fSJoseph Mingrone */
77*6f9cba8fSJoseph Mingrone 
78*6f9cba8fSJoseph Mingrone #ifdef HAVE_CONFIG_H
79*6f9cba8fSJoseph Mingrone #include <config.h>
80*6f9cba8fSJoseph Mingrone #endif
81*6f9cba8fSJoseph Mingrone 
82*6f9cba8fSJoseph Mingrone #include <errno.h>
83*6f9cba8fSJoseph Mingrone #include <netdb.h>
84*6f9cba8fSJoseph Mingrone #include <stdio.h>
85*6f9cba8fSJoseph Mingrone #include <stdlib.h>
86*6f9cba8fSJoseph Mingrone #include <string.h>
87*6f9cba8fSJoseph Mingrone #include <unistd.h>
88*6f9cba8fSJoseph Mingrone #include <limits.h> /* for INT_MAX */
89*6f9cba8fSJoseph Mingrone #include <time.h>
90*6f9cba8fSJoseph Mingrone 
91*6f9cba8fSJoseph Mingrone #include <sys/time.h>
92*6f9cba8fSJoseph Mingrone 
93*6f9cba8fSJoseph Mingrone //header for calling dpdk
94*6f9cba8fSJoseph Mingrone #include <rte_config.h>
95*6f9cba8fSJoseph Mingrone #include <rte_common.h>
96*6f9cba8fSJoseph Mingrone #include <rte_errno.h>
97*6f9cba8fSJoseph Mingrone #include <rte_log.h>
98*6f9cba8fSJoseph Mingrone #include <rte_malloc.h>
99*6f9cba8fSJoseph Mingrone #include <rte_memory.h>
100*6f9cba8fSJoseph Mingrone #include <rte_eal.h>
101*6f9cba8fSJoseph Mingrone #include <rte_launch.h>
102*6f9cba8fSJoseph Mingrone #include <rte_atomic.h>
103*6f9cba8fSJoseph Mingrone #include <rte_cycles.h>
104*6f9cba8fSJoseph Mingrone #include <rte_lcore.h>
105*6f9cba8fSJoseph Mingrone #include <rte_per_lcore.h>
106*6f9cba8fSJoseph Mingrone #include <rte_branch_prediction.h>
107*6f9cba8fSJoseph Mingrone #include <rte_interrupts.h>
108*6f9cba8fSJoseph Mingrone #include <rte_random.h>
109*6f9cba8fSJoseph Mingrone #include <rte_debug.h>
110*6f9cba8fSJoseph Mingrone #include <rte_ether.h>
111*6f9cba8fSJoseph Mingrone #include <rte_ethdev.h>
112*6f9cba8fSJoseph Mingrone #include <rte_mempool.h>
113*6f9cba8fSJoseph Mingrone #include <rte_mbuf.h>
114*6f9cba8fSJoseph Mingrone #include <rte_bus.h>
115*6f9cba8fSJoseph Mingrone 
116*6f9cba8fSJoseph Mingrone #include "pcap-int.h"
117*6f9cba8fSJoseph Mingrone #include "pcap-dpdk.h"
118*6f9cba8fSJoseph Mingrone 
119*6f9cba8fSJoseph Mingrone /*
120*6f9cba8fSJoseph Mingrone  * Deal with API changes that break source compatibility.
121*6f9cba8fSJoseph Mingrone  */
122*6f9cba8fSJoseph Mingrone 
123*6f9cba8fSJoseph Mingrone #ifdef HAVE_STRUCT_RTE_ETHER_ADDR
124*6f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE	struct rte_ether_addr
125*6f9cba8fSJoseph Mingrone #else
126*6f9cba8fSJoseph Mingrone #define ETHER_ADDR_TYPE	struct ether_addr
127*6f9cba8fSJoseph Mingrone #endif
128*6f9cba8fSJoseph Mingrone 
129*6f9cba8fSJoseph Mingrone #define DPDK_DEF_LOG_LEV RTE_LOG_ERR
130*6f9cba8fSJoseph Mingrone //
131*6f9cba8fSJoseph Mingrone // This is set to 0 if we haven't initialized DPDK yet, 1 if we've
132*6f9cba8fSJoseph Mingrone // successfully initialized it, a negative value, which is the negative
133*6f9cba8fSJoseph Mingrone // of the rte_errno from rte_eal_init(), if we tried to initialize it
134*6f9cba8fSJoseph Mingrone // and got an error.
135*6f9cba8fSJoseph Mingrone //
136*6f9cba8fSJoseph Mingrone static int is_dpdk_pre_inited=0;
137*6f9cba8fSJoseph Mingrone #define DPDK_LIB_NAME "libpcap_dpdk"
138*6f9cba8fSJoseph Mingrone #define DPDK_DESC "Data Plane Development Kit (DPDK) Interface"
139*6f9cba8fSJoseph Mingrone #define DPDK_ERR_PERM_MSG "permission denied, DPDK needs root permission"
140*6f9cba8fSJoseph Mingrone #define DPDK_ARGC_MAX 64
141*6f9cba8fSJoseph Mingrone #define DPDK_CFG_MAX_LEN 1024
142*6f9cba8fSJoseph Mingrone #define DPDK_DEV_NAME_MAX 32
143*6f9cba8fSJoseph Mingrone #define DPDK_DEV_DESC_MAX 512
144*6f9cba8fSJoseph Mingrone #define DPDK_CFG_ENV_NAME "DPDK_CFG"
145*6f9cba8fSJoseph Mingrone #define DPDK_DEF_MIN_SLEEP_MS 1
146*6f9cba8fSJoseph Mingrone static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN];
147*6f9cba8fSJoseph Mingrone #define DPDK_MAC_ADDR_SIZE 32
148*6f9cba8fSJoseph Mingrone #define DPDK_DEF_MAC_ADDR "00:00:00:00:00:00"
149*6f9cba8fSJoseph Mingrone #define DPDK_PCI_ADDR_SIZE 16
150*6f9cba8fSJoseph Mingrone #define DPDK_DEF_CFG "--log-level=error -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so"
151*6f9cba8fSJoseph Mingrone #define DPDK_PREFIX "dpdk:"
152*6f9cba8fSJoseph Mingrone #define DPDK_PORTID_MAX 65535U
153*6f9cba8fSJoseph Mingrone #define MBUF_POOL_NAME "mbuf_pool"
154*6f9cba8fSJoseph Mingrone #define DPDK_TX_BUF_NAME "tx_buffer"
155*6f9cba8fSJoseph Mingrone //The number of elements in the mbuf pool.
156*6f9cba8fSJoseph Mingrone #define DPDK_NB_MBUFS 8192U
157*6f9cba8fSJoseph Mingrone #define MEMPOOL_CACHE_SIZE 256
158*6f9cba8fSJoseph Mingrone #define MAX_PKT_BURST 32
159*6f9cba8fSJoseph Mingrone // Configurable number of RX/TX ring descriptors
160*6f9cba8fSJoseph Mingrone #define RTE_TEST_RX_DESC_DEFAULT 1024
161*6f9cba8fSJoseph Mingrone #define RTE_TEST_TX_DESC_DEFAULT 1024
162*6f9cba8fSJoseph Mingrone 
163*6f9cba8fSJoseph Mingrone static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
164*6f9cba8fSJoseph Mingrone static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
165*6f9cba8fSJoseph Mingrone 
166*6f9cba8fSJoseph Mingrone #ifdef RTE_ETHER_MAX_JUMBO_FRAME_LEN
167*6f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN
168*6f9cba8fSJoseph Mingrone #else
169*6f9cba8fSJoseph Mingrone #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
170*6f9cba8fSJoseph Mingrone #endif
171*6f9cba8fSJoseph Mingrone 
172*6f9cba8fSJoseph Mingrone static struct rte_eth_dev_tx_buffer *tx_buffer;
173*6f9cba8fSJoseph Mingrone 
174*6f9cba8fSJoseph Mingrone struct dpdk_ts_helper{
175*6f9cba8fSJoseph Mingrone 	struct timeval start_time;
176*6f9cba8fSJoseph Mingrone 	uint64_t start_cycles;
177*6f9cba8fSJoseph Mingrone 	uint64_t hz;
178*6f9cba8fSJoseph Mingrone };
179*6f9cba8fSJoseph Mingrone struct pcap_dpdk{
180*6f9cba8fSJoseph Mingrone 	pcap_t * orig;
181*6f9cba8fSJoseph Mingrone 	uint16_t portid; // portid of DPDK
182*6f9cba8fSJoseph Mingrone 	int must_clear_promisc;
183*6f9cba8fSJoseph Mingrone 	uint64_t bpf_drop;
184*6f9cba8fSJoseph Mingrone 	int nonblock;
185*6f9cba8fSJoseph Mingrone 	struct timeval required_select_timeout;
186*6f9cba8fSJoseph Mingrone 	struct timeval prev_ts;
187*6f9cba8fSJoseph Mingrone 	struct rte_eth_stats prev_stats;
188*6f9cba8fSJoseph Mingrone 	struct timeval curr_ts;
189*6f9cba8fSJoseph Mingrone 	struct rte_eth_stats curr_stats;
190*6f9cba8fSJoseph Mingrone 	uint64_t pps;
191*6f9cba8fSJoseph Mingrone 	uint64_t bps;
192*6f9cba8fSJoseph Mingrone 	struct rte_mempool * pktmbuf_pool;
193*6f9cba8fSJoseph Mingrone 	struct dpdk_ts_helper ts_helper;
194*6f9cba8fSJoseph Mingrone 	ETHER_ADDR_TYPE eth_addr;
195*6f9cba8fSJoseph Mingrone 	char mac_addr[DPDK_MAC_ADDR_SIZE];
196*6f9cba8fSJoseph Mingrone 	char pci_addr[DPDK_PCI_ADDR_SIZE];
197*6f9cba8fSJoseph Mingrone 	unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
198*6f9cba8fSJoseph Mingrone };
199*6f9cba8fSJoseph Mingrone 
200*6f9cba8fSJoseph Mingrone static struct rte_eth_conf port_conf = {
201*6f9cba8fSJoseph Mingrone 	.rxmode = {
202*6f9cba8fSJoseph Mingrone 		.split_hdr_size = 0,
203*6f9cba8fSJoseph Mingrone 	},
204*6f9cba8fSJoseph Mingrone 	.txmode = {
205*6f9cba8fSJoseph Mingrone 		.mq_mode = ETH_MQ_TX_NONE,
206*6f9cba8fSJoseph Mingrone 	},
207*6f9cba8fSJoseph Mingrone };
208*6f9cba8fSJoseph Mingrone 
209*6f9cba8fSJoseph Mingrone static void	dpdk_fmt_errmsg_for_rte_errno(char *, size_t, int,
210*6f9cba8fSJoseph Mingrone     PCAP_FORMAT_STRING(const char *), ...) PCAP_PRINTFLIKE(4, 5);
211*6f9cba8fSJoseph Mingrone 
212*6f9cba8fSJoseph Mingrone /*
213*6f9cba8fSJoseph Mingrone  * Generate an error message based on a format, arguments, and an
214*6f9cba8fSJoseph Mingrone  * rte_errno, with a message for the rte_errno after the formatted output.
215*6f9cba8fSJoseph Mingrone  */
216*6f9cba8fSJoseph Mingrone static void dpdk_fmt_errmsg_for_rte_errno(char *errbuf, size_t errbuflen,
217*6f9cba8fSJoseph Mingrone     int errnum, const char *fmt, ...)
218*6f9cba8fSJoseph Mingrone {
219*6f9cba8fSJoseph Mingrone 	va_list ap;
220*6f9cba8fSJoseph Mingrone 	size_t msglen;
221*6f9cba8fSJoseph Mingrone 	char *p;
222*6f9cba8fSJoseph Mingrone 	size_t errbuflen_remaining;
223*6f9cba8fSJoseph Mingrone 
224*6f9cba8fSJoseph Mingrone 	va_start(ap, fmt);
225*6f9cba8fSJoseph Mingrone 	vsnprintf(errbuf, errbuflen, fmt, ap);
226*6f9cba8fSJoseph Mingrone 	va_end(ap);
227*6f9cba8fSJoseph Mingrone 	msglen = strlen(errbuf);
228*6f9cba8fSJoseph Mingrone 
229*6f9cba8fSJoseph Mingrone 	/*
230*6f9cba8fSJoseph Mingrone 	 * Do we have enough space to append ": "?
231*6f9cba8fSJoseph Mingrone 	 * Including the terminating '\0', that's 3 bytes.
232*6f9cba8fSJoseph Mingrone 	 */
233*6f9cba8fSJoseph Mingrone 	if (msglen + 3 > errbuflen) {
234*6f9cba8fSJoseph Mingrone 		/* No - just give them what we've produced. */
235*6f9cba8fSJoseph Mingrone 		return;
236*6f9cba8fSJoseph Mingrone 	}
237*6f9cba8fSJoseph Mingrone 	p = errbuf + msglen;
238*6f9cba8fSJoseph Mingrone 	errbuflen_remaining = errbuflen - msglen;
239*6f9cba8fSJoseph Mingrone 	*p++ = ':';
240*6f9cba8fSJoseph Mingrone 	*p++ = ' ';
241*6f9cba8fSJoseph Mingrone 	*p = '\0';
242*6f9cba8fSJoseph Mingrone 	msglen += 2;
243*6f9cba8fSJoseph Mingrone 	errbuflen_remaining -= 2;
244*6f9cba8fSJoseph Mingrone 
245*6f9cba8fSJoseph Mingrone 	/*
246*6f9cba8fSJoseph Mingrone 	 * Now append the string for the error code.
247*6f9cba8fSJoseph Mingrone 	 * rte_strerror() is thread-safe, at least as of dpdk 18.11,
248*6f9cba8fSJoseph Mingrone 	 * unlike strerror() - it uses strerror_r() rather than strerror()
249*6f9cba8fSJoseph Mingrone 	 * for UN*X errno values, and prints to what I assume is a per-thread
250*6f9cba8fSJoseph Mingrone 	 * buffer (based on the "PER_LCORE" in "RTE_DEFINE_PER_LCORE" used
251*6f9cba8fSJoseph Mingrone 	 * to declare the buffers statically) for DPDK errors.
252*6f9cba8fSJoseph Mingrone 	 */
253*6f9cba8fSJoseph Mingrone 	snprintf(p, errbuflen_remaining, "%s", rte_strerror(errnum));
254*6f9cba8fSJoseph Mingrone }
255*6f9cba8fSJoseph Mingrone 
256*6f9cba8fSJoseph Mingrone static int dpdk_init_timer(struct pcap_dpdk *pd){
257*6f9cba8fSJoseph Mingrone 	gettimeofday(&(pd->ts_helper.start_time),NULL);
258*6f9cba8fSJoseph Mingrone 	pd->ts_helper.start_cycles = rte_get_timer_cycles();
259*6f9cba8fSJoseph Mingrone 	pd->ts_helper.hz = rte_get_timer_hz();
260*6f9cba8fSJoseph Mingrone 	if (pd->ts_helper.hz == 0){
261*6f9cba8fSJoseph Mingrone 		return -1;
262*6f9cba8fSJoseph Mingrone 	}
263*6f9cba8fSJoseph Mingrone 	return 0;
264*6f9cba8fSJoseph Mingrone }
265*6f9cba8fSJoseph Mingrone static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts)
266*6f9cba8fSJoseph Mingrone {
267*6f9cba8fSJoseph Mingrone 	uint64_t cycles;
268*6f9cba8fSJoseph Mingrone 	// delta
269*6f9cba8fSJoseph Mingrone 	struct timeval cur_time;
270*6f9cba8fSJoseph Mingrone 	cycles = rte_get_timer_cycles() - helper->start_cycles;
271*6f9cba8fSJoseph Mingrone 	cur_time.tv_sec = (time_t)(cycles/helper->hz);
272*6f9cba8fSJoseph Mingrone 	cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz);
273*6f9cba8fSJoseph Mingrone 	timeradd(&(helper->start_time), &cur_time, ts);
274*6f9cba8fSJoseph Mingrone }
275*6f9cba8fSJoseph Mingrone 
276*6f9cba8fSJoseph Mingrone static uint32_t dpdk_gather_data(unsigned char *data, uint32_t len, struct rte_mbuf *mbuf)
277*6f9cba8fSJoseph Mingrone {
278*6f9cba8fSJoseph Mingrone 	uint32_t total_len = 0;
279*6f9cba8fSJoseph Mingrone 	while (mbuf && (total_len+mbuf->data_len) < len ){
280*6f9cba8fSJoseph Mingrone 		rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
281*6f9cba8fSJoseph Mingrone 		total_len+=mbuf->data_len;
282*6f9cba8fSJoseph Mingrone 		mbuf=mbuf->next;
283*6f9cba8fSJoseph Mingrone 	}
284*6f9cba8fSJoseph Mingrone 	return total_len;
285*6f9cba8fSJoseph Mingrone }
286*6f9cba8fSJoseph Mingrone 
287*6f9cba8fSJoseph Mingrone 
288*6f9cba8fSJoseph Mingrone static int dpdk_read_with_timeout(pcap_t *p, struct rte_mbuf **pkts_burst, const uint16_t burst_cnt){
289*6f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
290*6f9cba8fSJoseph Mingrone 	int nb_rx = 0;
291*6f9cba8fSJoseph Mingrone 	int timeout_ms = p->opt.timeout;
292*6f9cba8fSJoseph Mingrone 	int sleep_ms = 0;
293*6f9cba8fSJoseph Mingrone 	if (pd->nonblock){
294*6f9cba8fSJoseph Mingrone 		// In non-blocking mode, just read once, no matter how many packets are captured.
295*6f9cba8fSJoseph Mingrone 		nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt);
296*6f9cba8fSJoseph Mingrone 	}else{
297*6f9cba8fSJoseph Mingrone 		// In blocking mode, read many times until packets are captured or timeout or break_loop is set.
298*6f9cba8fSJoseph Mingrone 		// if timeout_ms == 0, it may be blocked forever.
299*6f9cba8fSJoseph Mingrone 		while (timeout_ms == 0 || sleep_ms < timeout_ms){
300*6f9cba8fSJoseph Mingrone 			nb_rx = (int)rte_eth_rx_burst(pd->portid, 0, pkts_burst, burst_cnt);
301*6f9cba8fSJoseph Mingrone 			if (nb_rx){ // got packets within timeout_ms
302*6f9cba8fSJoseph Mingrone 				break;
303*6f9cba8fSJoseph Mingrone 			}else{ // no packet arrives at this round.
304*6f9cba8fSJoseph Mingrone 				if (p->break_loop){
305*6f9cba8fSJoseph Mingrone 					break;
306*6f9cba8fSJoseph Mingrone 				}
307*6f9cba8fSJoseph Mingrone 				// sleep for a very short while.
308*6f9cba8fSJoseph Mingrone 				// block sleep is the only choice, since usleep() will impact performance dramatically.
309*6f9cba8fSJoseph Mingrone 				rte_delay_us_block(DPDK_DEF_MIN_SLEEP_MS*1000);
310*6f9cba8fSJoseph Mingrone 				sleep_ms += DPDK_DEF_MIN_SLEEP_MS;
311*6f9cba8fSJoseph Mingrone 			}
312*6f9cba8fSJoseph Mingrone 		}
313*6f9cba8fSJoseph Mingrone 	}
314*6f9cba8fSJoseph Mingrone 	return nb_rx;
315*6f9cba8fSJoseph Mingrone }
316*6f9cba8fSJoseph Mingrone 
317*6f9cba8fSJoseph Mingrone static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *cb_arg)
318*6f9cba8fSJoseph Mingrone {
319*6f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
320*6f9cba8fSJoseph Mingrone 	int burst_cnt = 0;
321*6f9cba8fSJoseph Mingrone 	int nb_rx = 0;
322*6f9cba8fSJoseph Mingrone 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
323*6f9cba8fSJoseph Mingrone 	struct rte_mbuf *m;
324*6f9cba8fSJoseph Mingrone 	struct pcap_pkthdr pcap_header;
325*6f9cba8fSJoseph Mingrone 	// In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment
326*6f9cba8fSJoseph Mingrone 	uint32_t pkt_len = 0;
327*6f9cba8fSJoseph Mingrone 	uint32_t caplen = 0;
328*6f9cba8fSJoseph Mingrone 	u_char *bp = NULL;
329*6f9cba8fSJoseph Mingrone 	int i=0;
330*6f9cba8fSJoseph Mingrone 	unsigned int gather_len =0;
331*6f9cba8fSJoseph Mingrone 	int pkt_cnt = 0;
332*6f9cba8fSJoseph Mingrone 	u_char *large_buffer=NULL;
333*6f9cba8fSJoseph Mingrone 	int timeout_ms = p->opt.timeout;
334*6f9cba8fSJoseph Mingrone 
335*6f9cba8fSJoseph Mingrone 	/*
336*6f9cba8fSJoseph Mingrone 	 * This can conceivably process more than INT_MAX packets,
337*6f9cba8fSJoseph Mingrone 	 * which would overflow the packet count, causing it either
338*6f9cba8fSJoseph Mingrone 	 * to look like a negative number, and thus cause us to
339*6f9cba8fSJoseph Mingrone 	 * return a value that looks like an error, or overflow
340*6f9cba8fSJoseph Mingrone 	 * back into positive territory, and thus cause us to
341*6f9cba8fSJoseph Mingrone 	 * return a too-low count.
342*6f9cba8fSJoseph Mingrone 	 *
343*6f9cba8fSJoseph Mingrone 	 * Therefore, if the packet count is unlimited, we clip
344*6f9cba8fSJoseph Mingrone 	 * it at INT_MAX; this routine is not expected to
345*6f9cba8fSJoseph Mingrone 	 * process packets indefinitely, so that's not an issue.
346*6f9cba8fSJoseph Mingrone 	 */
347*6f9cba8fSJoseph Mingrone 	if (PACKET_COUNT_IS_UNLIMITED(max_cnt))
348*6f9cba8fSJoseph Mingrone 		max_cnt = INT_MAX;
349*6f9cba8fSJoseph Mingrone 
350*6f9cba8fSJoseph Mingrone 	if (max_cnt < MAX_PKT_BURST){
351*6f9cba8fSJoseph Mingrone 		burst_cnt = max_cnt;
352*6f9cba8fSJoseph Mingrone 	}else{
353*6f9cba8fSJoseph Mingrone 		burst_cnt = MAX_PKT_BURST;
354*6f9cba8fSJoseph Mingrone 	}
355*6f9cba8fSJoseph Mingrone 
356*6f9cba8fSJoseph Mingrone 	while( pkt_cnt < max_cnt){
357*6f9cba8fSJoseph Mingrone 		if (p->break_loop){
358*6f9cba8fSJoseph Mingrone 			p->break_loop = 0;
359*6f9cba8fSJoseph Mingrone 			return PCAP_ERROR_BREAK;
360*6f9cba8fSJoseph Mingrone 		}
361*6f9cba8fSJoseph Mingrone 		// read once in non-blocking mode, or try many times waiting for timeout_ms.
362*6f9cba8fSJoseph Mingrone 		// if timeout_ms == 0, it will be blocked until one packet arrives or break_loop is set.
363*6f9cba8fSJoseph Mingrone 		nb_rx = dpdk_read_with_timeout(p, pkts_burst, burst_cnt);
364*6f9cba8fSJoseph Mingrone 		if (nb_rx == 0){
365*6f9cba8fSJoseph Mingrone 			if (pd->nonblock){
366*6f9cba8fSJoseph Mingrone 				RTE_LOG(DEBUG, USER1, "dpdk: no packets available in non-blocking mode.\n");
367*6f9cba8fSJoseph Mingrone 			}else{
368*6f9cba8fSJoseph Mingrone 				if (p->break_loop){
369*6f9cba8fSJoseph Mingrone 					RTE_LOG(DEBUG, USER1, "dpdk: no packets available and break_loop is set in blocking mode.\n");
370*6f9cba8fSJoseph Mingrone 					p->break_loop = 0;
371*6f9cba8fSJoseph Mingrone 					return PCAP_ERROR_BREAK;
372*6f9cba8fSJoseph Mingrone 
373*6f9cba8fSJoseph Mingrone 				}
374*6f9cba8fSJoseph Mingrone 				RTE_LOG(DEBUG, USER1, "dpdk: no packets available for timeout %d ms in blocking mode.\n", timeout_ms);
375*6f9cba8fSJoseph Mingrone 			}
376*6f9cba8fSJoseph Mingrone 			// break if dpdk reads 0 packet, no matter in blocking(timeout) or non-blocking mode.
377*6f9cba8fSJoseph Mingrone 			break;
378*6f9cba8fSJoseph Mingrone 		}
379*6f9cba8fSJoseph Mingrone 		pkt_cnt += nb_rx;
380*6f9cba8fSJoseph Mingrone 		for ( i = 0; i < nb_rx; i++) {
381*6f9cba8fSJoseph Mingrone 			m = pkts_burst[i];
382*6f9cba8fSJoseph Mingrone 			calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
383*6f9cba8fSJoseph Mingrone 			pkt_len = rte_pktmbuf_pkt_len(m);
384*6f9cba8fSJoseph Mingrone 			// caplen = min(pkt_len, p->snapshot);
385*6f9cba8fSJoseph Mingrone 			// caplen will not be changed, no matter how long the rte_pktmbuf
386*6f9cba8fSJoseph Mingrone 			caplen = pkt_len < (uint32_t)p->snapshot ? pkt_len: (uint32_t)p->snapshot;
387*6f9cba8fSJoseph Mingrone 			pcap_header.caplen = caplen;
388*6f9cba8fSJoseph Mingrone 			pcap_header.len = pkt_len;
389*6f9cba8fSJoseph Mingrone 			// volatile prefetch
390*6f9cba8fSJoseph Mingrone 			rte_prefetch0(rte_pktmbuf_mtod(m, void *));
391*6f9cba8fSJoseph Mingrone 			bp = NULL;
392*6f9cba8fSJoseph Mingrone 			if (m->nb_segs == 1)
393*6f9cba8fSJoseph Mingrone 			{
394*6f9cba8fSJoseph Mingrone 				bp = rte_pktmbuf_mtod(m, u_char *);
395*6f9cba8fSJoseph Mingrone 			}else{
396*6f9cba8fSJoseph Mingrone 				// use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free
397*6f9cba8fSJoseph Mingrone 				if ( pkt_len <= RTE_ETH_PCAP_SNAPLEN)
398*6f9cba8fSJoseph Mingrone 				{
399*6f9cba8fSJoseph Mingrone 					gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m);
400*6f9cba8fSJoseph Mingrone 					bp = pd->pcap_tmp_buf;
401*6f9cba8fSJoseph Mingrone 				}else{
402*6f9cba8fSJoseph Mingrone 					// need call free later
403*6f9cba8fSJoseph Mingrone 					large_buffer = (u_char *)malloc(caplen*sizeof(u_char));
404*6f9cba8fSJoseph Mingrone 					gather_len = dpdk_gather_data(large_buffer, caplen, m);
405*6f9cba8fSJoseph Mingrone 					bp = large_buffer;
406*6f9cba8fSJoseph Mingrone 				}
407*6f9cba8fSJoseph Mingrone 
408*6f9cba8fSJoseph Mingrone 			}
409*6f9cba8fSJoseph Mingrone 			if (bp){
410*6f9cba8fSJoseph Mingrone 				if (p->fcode.bf_insns==NULL || pcap_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){
411*6f9cba8fSJoseph Mingrone 					cb(cb_arg, &pcap_header, bp);
412*6f9cba8fSJoseph Mingrone 				}else{
413*6f9cba8fSJoseph Mingrone 					pd->bpf_drop++;
414*6f9cba8fSJoseph Mingrone 				}
415*6f9cba8fSJoseph Mingrone 			}
416*6f9cba8fSJoseph Mingrone 			//free all pktmbuf
417*6f9cba8fSJoseph Mingrone 			rte_pktmbuf_free(m);
418*6f9cba8fSJoseph Mingrone 			if (large_buffer){
419*6f9cba8fSJoseph Mingrone 				free(large_buffer);
420*6f9cba8fSJoseph Mingrone 				large_buffer=NULL;
421*6f9cba8fSJoseph Mingrone 			}
422*6f9cba8fSJoseph Mingrone 		}
423*6f9cba8fSJoseph Mingrone 	}
424*6f9cba8fSJoseph Mingrone 	return pkt_cnt;
425*6f9cba8fSJoseph Mingrone }
426*6f9cba8fSJoseph Mingrone 
427*6f9cba8fSJoseph Mingrone static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_)
428*6f9cba8fSJoseph Mingrone {
429*6f9cba8fSJoseph Mingrone 	//not implemented yet
430*6f9cba8fSJoseph Mingrone 	pcap_strlcpy(p->errbuf,
431*6f9cba8fSJoseph Mingrone 	    "dpdk error: Inject function has not been implemented yet",
432*6f9cba8fSJoseph Mingrone 	    PCAP_ERRBUF_SIZE);
433*6f9cba8fSJoseph Mingrone 	return PCAP_ERROR;
434*6f9cba8fSJoseph Mingrone }
435*6f9cba8fSJoseph Mingrone 
436*6f9cba8fSJoseph Mingrone static void pcap_dpdk_close(pcap_t *p)
437*6f9cba8fSJoseph Mingrone {
438*6f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = p->priv;
439*6f9cba8fSJoseph Mingrone 	if (pd==NULL)
440*6f9cba8fSJoseph Mingrone 	{
441*6f9cba8fSJoseph Mingrone 		return;
442*6f9cba8fSJoseph Mingrone 	}
443*6f9cba8fSJoseph Mingrone 	if (pd->must_clear_promisc)
444*6f9cba8fSJoseph Mingrone 	{
445*6f9cba8fSJoseph Mingrone 		rte_eth_promiscuous_disable(pd->portid);
446*6f9cba8fSJoseph Mingrone 	}
447*6f9cba8fSJoseph Mingrone 	rte_eth_dev_stop(pd->portid);
448*6f9cba8fSJoseph Mingrone 	rte_eth_dev_close(pd->portid);
449*6f9cba8fSJoseph Mingrone 	pcap_cleanup_live_common(p);
450*6f9cba8fSJoseph Mingrone }
451*6f9cba8fSJoseph Mingrone 
452*6f9cba8fSJoseph Mingrone static void nic_stats_display(struct pcap_dpdk *pd)
453*6f9cba8fSJoseph Mingrone {
454*6f9cba8fSJoseph Mingrone 	uint16_t portid = pd->portid;
455*6f9cba8fSJoseph Mingrone 	struct rte_eth_stats stats;
456*6f9cba8fSJoseph Mingrone 	rte_eth_stats_get(portid, &stats);
457*6f9cba8fSJoseph Mingrone 	RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64"  RX-errors:  %-10"PRIu64
458*6f9cba8fSJoseph Mingrone 	       "  RX-bytes:  %-10"PRIu64"  RX-Imissed:  %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors,
459*6f9cba8fSJoseph Mingrone 	       stats.ibytes,stats.imissed);
460*6f9cba8fSJoseph Mingrone 	RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f );
461*6f9cba8fSJoseph Mingrone }
462*6f9cba8fSJoseph Mingrone 
463*6f9cba8fSJoseph Mingrone static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
464*6f9cba8fSJoseph Mingrone {
465*6f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = p->priv;
466*6f9cba8fSJoseph Mingrone 	calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
467*6f9cba8fSJoseph Mingrone 	rte_eth_stats_get(pd->portid,&(pd->curr_stats));
468*6f9cba8fSJoseph Mingrone 	if (ps){
469*6f9cba8fSJoseph Mingrone 		ps->ps_recv = pd->curr_stats.ipackets;
470*6f9cba8fSJoseph Mingrone 		ps->ps_drop = pd->curr_stats.ierrors;
471*6f9cba8fSJoseph Mingrone 		ps->ps_drop += pd->bpf_drop;
472*6f9cba8fSJoseph Mingrone 		ps->ps_ifdrop = pd->curr_stats.imissed;
473*6f9cba8fSJoseph Mingrone 	}
474*6f9cba8fSJoseph Mingrone 	uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
475*6f9cba8fSJoseph Mingrone 	struct timeval delta_tm;
476*6f9cba8fSJoseph Mingrone 	timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
477*6f9cba8fSJoseph Mingrone 	uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec;
478*6f9cba8fSJoseph Mingrone 	uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8;
479*6f9cba8fSJoseph Mingrone 	RTE_LOG(DEBUG, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit);
480*6f9cba8fSJoseph Mingrone 	pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec);
481*6f9cba8fSJoseph Mingrone 	pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec);
482*6f9cba8fSJoseph Mingrone 	nic_stats_display(pd);
483*6f9cba8fSJoseph Mingrone 	pd->prev_stats = pd->curr_stats;
484*6f9cba8fSJoseph Mingrone 	pd->prev_ts = pd->curr_ts;
485*6f9cba8fSJoseph Mingrone 	return 0;
486*6f9cba8fSJoseph Mingrone }
487*6f9cba8fSJoseph Mingrone 
488*6f9cba8fSJoseph Mingrone static int pcap_dpdk_setnonblock(pcap_t *p, int nonblock){
489*6f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
490*6f9cba8fSJoseph Mingrone 	pd->nonblock = nonblock;
491*6f9cba8fSJoseph Mingrone 	return 0;
492*6f9cba8fSJoseph Mingrone }
493*6f9cba8fSJoseph Mingrone 
494*6f9cba8fSJoseph Mingrone static int pcap_dpdk_getnonblock(pcap_t *p){
495*6f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
496*6f9cba8fSJoseph Mingrone 	return pd->nonblock;
497*6f9cba8fSJoseph Mingrone }
498*6f9cba8fSJoseph Mingrone static int check_link_status(uint16_t portid, struct rte_eth_link *plink)
499*6f9cba8fSJoseph Mingrone {
500*6f9cba8fSJoseph Mingrone 	// wait up to 9 seconds to get link status
501*6f9cba8fSJoseph Mingrone 	rte_eth_link_get(portid, plink);
502*6f9cba8fSJoseph Mingrone 	return plink->link_status == ETH_LINK_UP;
503*6f9cba8fSJoseph Mingrone }
504*6f9cba8fSJoseph Mingrone static void eth_addr_str(ETHER_ADDR_TYPE *addrp, char* mac_str, int len)
505*6f9cba8fSJoseph Mingrone {
506*6f9cba8fSJoseph Mingrone 	int offset=0;
507*6f9cba8fSJoseph Mingrone 	if (addrp == NULL){
508*6f9cba8fSJoseph Mingrone 		snprintf(mac_str, len-1, DPDK_DEF_MAC_ADDR);
509*6f9cba8fSJoseph Mingrone 		return;
510*6f9cba8fSJoseph Mingrone 	}
511*6f9cba8fSJoseph Mingrone 	for (int i=0; i<6; i++)
512*6f9cba8fSJoseph Mingrone 	{
513*6f9cba8fSJoseph Mingrone 		if (offset >= len)
514*6f9cba8fSJoseph Mingrone 		{ // buffer overflow
515*6f9cba8fSJoseph Mingrone 			return;
516*6f9cba8fSJoseph Mingrone 		}
517*6f9cba8fSJoseph Mingrone 		if (i==0)
518*6f9cba8fSJoseph Mingrone 		{
519*6f9cba8fSJoseph Mingrone 			snprintf(mac_str+offset, len-1-offset, "%02X",addrp->addr_bytes[i]);
520*6f9cba8fSJoseph Mingrone 			offset+=2; // FF
521*6f9cba8fSJoseph Mingrone 		}else{
522*6f9cba8fSJoseph Mingrone 			snprintf(mac_str+offset, len-1-offset, ":%02X", addrp->addr_bytes[i]);
523*6f9cba8fSJoseph Mingrone 			offset+=3; // :FF
524*6f9cba8fSJoseph Mingrone 		}
525*6f9cba8fSJoseph Mingrone 	}
526*6f9cba8fSJoseph Mingrone 	return;
527*6f9cba8fSJoseph Mingrone }
528*6f9cba8fSJoseph Mingrone // return portid by device name, otherwise return -1
529*6f9cba8fSJoseph Mingrone static uint16_t portid_by_device(char * device)
530*6f9cba8fSJoseph Mingrone {
531*6f9cba8fSJoseph Mingrone 	uint16_t ret = DPDK_PORTID_MAX;
532*6f9cba8fSJoseph Mingrone 	int len = strlen(device);
533*6f9cba8fSJoseph Mingrone 	int prefix_len = strlen(DPDK_PREFIX);
534*6f9cba8fSJoseph Mingrone 	unsigned long ret_ul = 0L;
535*6f9cba8fSJoseph Mingrone 	char *pEnd;
536*6f9cba8fSJoseph Mingrone 	if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk:
537*6f9cba8fSJoseph Mingrone 	{
538*6f9cba8fSJoseph Mingrone 		return ret;
539*6f9cba8fSJoseph Mingrone 	}
540*6f9cba8fSJoseph Mingrone 	//check all chars are digital
541*6f9cba8fSJoseph Mingrone 	for (int i=prefix_len; device[i]; i++){
542*6f9cba8fSJoseph Mingrone 		if (device[i]<'0' || device[i]>'9'){
543*6f9cba8fSJoseph Mingrone 			return ret;
544*6f9cba8fSJoseph Mingrone 		}
545*6f9cba8fSJoseph Mingrone 	}
546*6f9cba8fSJoseph Mingrone 	ret_ul = strtoul(&(device[prefix_len]), &pEnd, 10);
547*6f9cba8fSJoseph Mingrone 	if (pEnd == &(device[prefix_len]) || *pEnd != '\0'){
548*6f9cba8fSJoseph Mingrone 		return ret;
549*6f9cba8fSJoseph Mingrone 	}
550*6f9cba8fSJoseph Mingrone 	// too large for portid
551*6f9cba8fSJoseph Mingrone 	if (ret_ul >= DPDK_PORTID_MAX){
552*6f9cba8fSJoseph Mingrone 		return ret;
553*6f9cba8fSJoseph Mingrone 	}
554*6f9cba8fSJoseph Mingrone 	ret = (uint16_t)ret_ul;
555*6f9cba8fSJoseph Mingrone 	return ret;
556*6f9cba8fSJoseph Mingrone }
557*6f9cba8fSJoseph Mingrone 
558*6f9cba8fSJoseph Mingrone static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv)
559*6f9cba8fSJoseph Mingrone {
560*6f9cba8fSJoseph Mingrone 	int cnt=0;
561*6f9cba8fSJoseph Mingrone 	memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX);
562*6f9cba8fSJoseph Mingrone 	//current process name
563*6f9cba8fSJoseph Mingrone 	int skip_space = 1;
564*6f9cba8fSJoseph Mingrone 	int i=0;
565*6f9cba8fSJoseph Mingrone 	RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg);
566*6f9cba8fSJoseph Mingrone 	// find first non space char
567*6f9cba8fSJoseph Mingrone 	// The last opt is NULL
568*6f9cba8fSJoseph Mingrone 	for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){
569*6f9cba8fSJoseph Mingrone 		if (skip_space && dpdk_cfg[i]!=' '){ // not space
570*6f9cba8fSJoseph Mingrone 			skip_space=!skip_space; // skip normal char
571*6f9cba8fSJoseph Mingrone 			dargv[cnt++] = dpdk_cfg+i;
572*6f9cba8fSJoseph Mingrone 		}
573*6f9cba8fSJoseph Mingrone 		if (!skip_space && dpdk_cfg[i]==' '){ // fint a space
574*6f9cba8fSJoseph Mingrone 			dpdk_cfg[i]=0x00; // end of this opt
575*6f9cba8fSJoseph Mingrone 			skip_space=!skip_space; // skip space char
576*6f9cba8fSJoseph Mingrone 		}
577*6f9cba8fSJoseph Mingrone 	}
578*6f9cba8fSJoseph Mingrone 	dargv[cnt]=NULL;
579*6f9cba8fSJoseph Mingrone 	return cnt;
580*6f9cba8fSJoseph Mingrone }
581*6f9cba8fSJoseph Mingrone 
582*6f9cba8fSJoseph Mingrone // only called once
583*6f9cba8fSJoseph Mingrone // Returns:
584*6f9cba8fSJoseph Mingrone //
585*6f9cba8fSJoseph Mingrone //    1 on success;
586*6f9cba8fSJoseph Mingrone //
587*6f9cba8fSJoseph Mingrone //    0 if "the EAL cannot initialize on this system", which we treat as
588*6f9cba8fSJoseph Mingrone //    meaning "DPDK isn't available";
589*6f9cba8fSJoseph Mingrone //
590*6f9cba8fSJoseph Mingrone //    a PCAP_ERROR_ code for other errors.
591*6f9cba8fSJoseph Mingrone //
592*6f9cba8fSJoseph Mingrone // If eaccess_not_fatal is non-zero, treat "a permissions issue" the way
593*6f9cba8fSJoseph Mingrone // we treat "the EAL cannot initialize on this system".  We use that
594*6f9cba8fSJoseph Mingrone // when trying to find DPDK devices, as we don't want to fail to return
595*6f9cba8fSJoseph Mingrone // *any* devices just because we can't support DPDK; when we're trying
596*6f9cba8fSJoseph Mingrone // to open a device, we need to return a permissions error in that case.
597*6f9cba8fSJoseph Mingrone static int dpdk_pre_init(char * ebuf, int eaccess_not_fatal)
598*6f9cba8fSJoseph Mingrone {
599*6f9cba8fSJoseph Mingrone 	int dargv_cnt=0;
600*6f9cba8fSJoseph Mingrone 	char *dargv[DPDK_ARGC_MAX];
601*6f9cba8fSJoseph Mingrone 	char *ptr_dpdk_cfg = NULL;
602*6f9cba8fSJoseph Mingrone 	int ret;
603*6f9cba8fSJoseph Mingrone 	// globale var
604*6f9cba8fSJoseph Mingrone 	if (is_dpdk_pre_inited != 0)
605*6f9cba8fSJoseph Mingrone 	{
606*6f9cba8fSJoseph Mingrone 		// already inited; did that succeed?
607*6f9cba8fSJoseph Mingrone 		if (is_dpdk_pre_inited < 0)
608*6f9cba8fSJoseph Mingrone 		{
609*6f9cba8fSJoseph Mingrone 			// failed
610*6f9cba8fSJoseph Mingrone 			goto error;
611*6f9cba8fSJoseph Mingrone 		}
612*6f9cba8fSJoseph Mingrone 		else
613*6f9cba8fSJoseph Mingrone 		{
614*6f9cba8fSJoseph Mingrone 			// succeeded
615*6f9cba8fSJoseph Mingrone 			return 1;
616*6f9cba8fSJoseph Mingrone 		}
617*6f9cba8fSJoseph Mingrone 	}
618*6f9cba8fSJoseph Mingrone 	// init EAL
619*6f9cba8fSJoseph Mingrone 	ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME);
620*6f9cba8fSJoseph Mingrone 	// set default log level to debug
621*6f9cba8fSJoseph Mingrone 	rte_log_set_global_level(DPDK_DEF_LOG_LEV);
622*6f9cba8fSJoseph Mingrone 	if (ptr_dpdk_cfg == NULL)
623*6f9cba8fSJoseph Mingrone 	{
624*6f9cba8fSJoseph Mingrone 		RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG);
625*6f9cba8fSJoseph Mingrone 		ptr_dpdk_cfg = DPDK_DEF_CFG;
626*6f9cba8fSJoseph Mingrone 	}
627*6f9cba8fSJoseph Mingrone 	memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf));
628*6f9cba8fSJoseph Mingrone 	snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg);
629*6f9cba8fSJoseph Mingrone 	dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv);
630*6f9cba8fSJoseph Mingrone 	ret = rte_eal_init(dargv_cnt,dargv);
631*6f9cba8fSJoseph Mingrone 	if (ret == -1)
632*6f9cba8fSJoseph Mingrone 	{
633*6f9cba8fSJoseph Mingrone 		// Indicate that we've called rte_eal_init() by setting
634*6f9cba8fSJoseph Mingrone 		// is_dpdk_pre_inited to the negative of the error code,
635*6f9cba8fSJoseph Mingrone 		// and process the error.
636*6f9cba8fSJoseph Mingrone 		is_dpdk_pre_inited = -rte_errno;
637*6f9cba8fSJoseph Mingrone 		goto error;
638*6f9cba8fSJoseph Mingrone 	}
639*6f9cba8fSJoseph Mingrone 	// init succeeded, so we do not need to do it again later.
640*6f9cba8fSJoseph Mingrone 	is_dpdk_pre_inited = 1;
641*6f9cba8fSJoseph Mingrone 	return 1;
642*6f9cba8fSJoseph Mingrone 
643*6f9cba8fSJoseph Mingrone error:
644*6f9cba8fSJoseph Mingrone 	switch (-is_dpdk_pre_inited)
645*6f9cba8fSJoseph Mingrone 	{
646*6f9cba8fSJoseph Mingrone 		case EACCES:
647*6f9cba8fSJoseph Mingrone 			// This "indicates a permissions issue.".
648*6f9cba8fSJoseph Mingrone 			RTE_LOG(ERR, USER1, "%s\n", DPDK_ERR_PERM_MSG);
649*6f9cba8fSJoseph Mingrone 			// If we were told to treat this as just meaning
650*6f9cba8fSJoseph Mingrone 			// DPDK isn't available, do so.
651*6f9cba8fSJoseph Mingrone 			if (eaccess_not_fatal)
652*6f9cba8fSJoseph Mingrone 				return 0;
653*6f9cba8fSJoseph Mingrone 			// Otherwise report a fatal error.
654*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
655*6f9cba8fSJoseph Mingrone 			    "DPDK requires that it run as root");
656*6f9cba8fSJoseph Mingrone 			return PCAP_ERROR_PERM_DENIED;
657*6f9cba8fSJoseph Mingrone 
658*6f9cba8fSJoseph Mingrone 		case EAGAIN:
659*6f9cba8fSJoseph Mingrone 			// This "indicates either a bus or system
660*6f9cba8fSJoseph Mingrone 			// resource was not available, setup may
661*6f9cba8fSJoseph Mingrone 			// be attempted again."
662*6f9cba8fSJoseph Mingrone 			// There's no such error in pcap, so I'm
663*6f9cba8fSJoseph Mingrone 			// not sure what we should do here.
664*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
665*6f9cba8fSJoseph Mingrone 			    "Bus or system resource was not available");
666*6f9cba8fSJoseph Mingrone 			break;
667*6f9cba8fSJoseph Mingrone 
668*6f9cba8fSJoseph Mingrone 		case EALREADY:
669*6f9cba8fSJoseph Mingrone 			// This "indicates that the rte_eal_init
670*6f9cba8fSJoseph Mingrone 			// function has already been called, and
671*6f9cba8fSJoseph Mingrone 			// cannot be called again."
672*6f9cba8fSJoseph Mingrone 			// That's not an error; set the "we've
673*6f9cba8fSJoseph Mingrone 			// been here before" flag and return
674*6f9cba8fSJoseph Mingrone 			// success.
675*6f9cba8fSJoseph Mingrone 			is_dpdk_pre_inited = 1;
676*6f9cba8fSJoseph Mingrone 			return 1;
677*6f9cba8fSJoseph Mingrone 
678*6f9cba8fSJoseph Mingrone 		case EFAULT:
679*6f9cba8fSJoseph Mingrone 			// This "indicates the tailq configuration
680*6f9cba8fSJoseph Mingrone 			// name was not found in memory configuration."
681*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
682*6f9cba8fSJoseph Mingrone 			    "The tailq configuration name was not found in the memory configuration");
683*6f9cba8fSJoseph Mingrone 			return PCAP_ERROR;
684*6f9cba8fSJoseph Mingrone 
685*6f9cba8fSJoseph Mingrone 		case EINVAL:
686*6f9cba8fSJoseph Mingrone 			// This "indicates invalid parameters were
687*6f9cba8fSJoseph Mingrone 			// passed as argv/argc."  Those came from
688*6f9cba8fSJoseph Mingrone 			// the configuration file.
689*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
690*6f9cba8fSJoseph Mingrone 			    "The configuration file has invalid parameters");
691*6f9cba8fSJoseph Mingrone 			break;
692*6f9cba8fSJoseph Mingrone 
693*6f9cba8fSJoseph Mingrone 		case ENOMEM:
694*6f9cba8fSJoseph Mingrone 			// This "indicates failure likely caused by
695*6f9cba8fSJoseph Mingrone 			// an out-of-memory condition."
696*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
697*6f9cba8fSJoseph Mingrone 			    "Out of memory");
698*6f9cba8fSJoseph Mingrone 			break;
699*6f9cba8fSJoseph Mingrone 
700*6f9cba8fSJoseph Mingrone 		case ENODEV:
701*6f9cba8fSJoseph Mingrone 			// This "indicates memory setup issues."
702*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
703*6f9cba8fSJoseph Mingrone 			    "An error occurred setting up memory");
704*6f9cba8fSJoseph Mingrone 			break;
705*6f9cba8fSJoseph Mingrone 
706*6f9cba8fSJoseph Mingrone 		case ENOTSUP:
707*6f9cba8fSJoseph Mingrone 			// This "indicates that the EAL cannot
708*6f9cba8fSJoseph Mingrone 			// initialize on this system."  We treat
709*6f9cba8fSJoseph Mingrone 			// that as meaning DPDK isn't available
710*6f9cba8fSJoseph Mingrone 			// on this machine, rather than as a
711*6f9cba8fSJoseph Mingrone 			// fatal error, and let our caller decide
712*6f9cba8fSJoseph Mingrone 			// whether that's a fatal error (if trying
713*6f9cba8fSJoseph Mingrone 			// to activate a DPDK device) or not (if
714*6f9cba8fSJoseph Mingrone 			// trying to enumerate devices).
715*6f9cba8fSJoseph Mingrone 			return 0;
716*6f9cba8fSJoseph Mingrone 
717*6f9cba8fSJoseph Mingrone 		case EPROTO:
718*6f9cba8fSJoseph Mingrone 			// This "indicates that the PCI bus is
719*6f9cba8fSJoseph Mingrone 			// either not present, or is not readable
720*6f9cba8fSJoseph Mingrone 			// by the eal."  Does "the PCI bus is not
721*6f9cba8fSJoseph Mingrone 			// present" mean "this machine has no PCI
722*6f9cba8fSJoseph Mingrone 			// bus", which strikes me as a "not available"
723*6f9cba8fSJoseph Mingrone 			// case?  If so, should "is not readable by
724*6f9cba8fSJoseph Mingrone 			// the EAL" also something we should treat
725*6f9cba8fSJoseph Mingrone 			// as a "not available" case?  If not, we
726*6f9cba8fSJoseph Mingrone 			// can't distinguish between the two, so
727*6f9cba8fSJoseph Mingrone 			// we're stuck.
728*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
729*6f9cba8fSJoseph Mingrone 			    "PCI bus is not present or not readable by the EAL");
730*6f9cba8fSJoseph Mingrone 			break;
731*6f9cba8fSJoseph Mingrone 
732*6f9cba8fSJoseph Mingrone 		case ENOEXEC:
733*6f9cba8fSJoseph Mingrone 			// This "indicates that a service core
734*6f9cba8fSJoseph Mingrone 			// failed to launch successfully."
735*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
736*6f9cba8fSJoseph Mingrone 			    "A service core failed to launch successfully");
737*6f9cba8fSJoseph Mingrone 			break;
738*6f9cba8fSJoseph Mingrone 
739*6f9cba8fSJoseph Mingrone 		default:
740*6f9cba8fSJoseph Mingrone 			//
741*6f9cba8fSJoseph Mingrone 			// That's not in the list of errors in
742*6f9cba8fSJoseph Mingrone 			// the documentation; let it be reported
743*6f9cba8fSJoseph Mingrone 			// as an error.
744*6f9cba8fSJoseph Mingrone 			//
745*6f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(ebuf,
746*6f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -is_dpdk_pre_inited,
747*6f9cba8fSJoseph Mingrone 			    "dpdk error: dpdk_pre_init failed");
748*6f9cba8fSJoseph Mingrone 			break;
749*6f9cba8fSJoseph Mingrone 	}
750*6f9cba8fSJoseph Mingrone 	// Error.
751*6f9cba8fSJoseph Mingrone 	return PCAP_ERROR;
752*6f9cba8fSJoseph Mingrone }
753*6f9cba8fSJoseph Mingrone 
754*6f9cba8fSJoseph Mingrone static int pcap_dpdk_activate(pcap_t *p)
755*6f9cba8fSJoseph Mingrone {
756*6f9cba8fSJoseph Mingrone 	struct pcap_dpdk *pd = p->priv;
757*6f9cba8fSJoseph Mingrone 	pd->orig = p;
758*6f9cba8fSJoseph Mingrone 	int ret = PCAP_ERROR;
759*6f9cba8fSJoseph Mingrone 	uint16_t nb_ports=0;
760*6f9cba8fSJoseph Mingrone 	uint16_t portid= DPDK_PORTID_MAX;
761*6f9cba8fSJoseph Mingrone 	unsigned nb_mbufs = DPDK_NB_MBUFS;
762*6f9cba8fSJoseph Mingrone 	struct rte_eth_rxconf rxq_conf;
763*6f9cba8fSJoseph Mingrone 	struct rte_eth_txconf txq_conf;
764*6f9cba8fSJoseph Mingrone 	struct rte_eth_conf local_port_conf = port_conf;
765*6f9cba8fSJoseph Mingrone 	struct rte_eth_dev_info dev_info;
766*6f9cba8fSJoseph Mingrone 	int is_port_up = 0;
767*6f9cba8fSJoseph Mingrone 	struct rte_eth_link link;
768*6f9cba8fSJoseph Mingrone 	do{
769*6f9cba8fSJoseph Mingrone 		//init EAL; fail if we have insufficient permission
770*6f9cba8fSJoseph Mingrone 		char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE];
771*6f9cba8fSJoseph Mingrone 		ret = dpdk_pre_init(dpdk_pre_init_errbuf, 0);
772*6f9cba8fSJoseph Mingrone 		if (ret < 0)
773*6f9cba8fSJoseph Mingrone 		{
774*6f9cba8fSJoseph Mingrone 			// This returns a negative value on an error.
775*6f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
776*6f9cba8fSJoseph Mingrone 			    "Can't open device %s: %s",
777*6f9cba8fSJoseph Mingrone 			    p->opt.device, dpdk_pre_init_errbuf);
778*6f9cba8fSJoseph Mingrone 			// ret is set to the correct error
779*6f9cba8fSJoseph Mingrone 			break;
780*6f9cba8fSJoseph Mingrone 		}
781*6f9cba8fSJoseph Mingrone 		if (ret == 0)
782*6f9cba8fSJoseph Mingrone 		{
783*6f9cba8fSJoseph Mingrone 			// This means DPDK isn't available on this machine.
784*6f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
785*6f9cba8fSJoseph Mingrone 			    "Can't open device %s: DPDK is not available on this machine",
786*6f9cba8fSJoseph Mingrone 			    p->opt.device);
787*6f9cba8fSJoseph Mingrone 			return PCAP_ERROR_NO_SUCH_DEVICE;
788*6f9cba8fSJoseph Mingrone 		}
789*6f9cba8fSJoseph Mingrone 
790*6f9cba8fSJoseph Mingrone 		ret = dpdk_init_timer(pd);
791*6f9cba8fSJoseph Mingrone 		if (ret<0)
792*6f9cba8fSJoseph Mingrone 		{
793*6f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
794*6f9cba8fSJoseph Mingrone 				"dpdk error: Init timer is zero with device %s",
795*6f9cba8fSJoseph Mingrone 				p->opt.device);
796*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
797*6f9cba8fSJoseph Mingrone 			break;
798*6f9cba8fSJoseph Mingrone 		}
799*6f9cba8fSJoseph Mingrone 
800*6f9cba8fSJoseph Mingrone 		nb_ports = rte_eth_dev_count_avail();
801*6f9cba8fSJoseph Mingrone 		if (nb_ports == 0)
802*6f9cba8fSJoseph Mingrone 		{
803*6f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
804*6f9cba8fSJoseph Mingrone 			    "dpdk error: No Ethernet ports");
805*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
806*6f9cba8fSJoseph Mingrone 			break;
807*6f9cba8fSJoseph Mingrone 		}
808*6f9cba8fSJoseph Mingrone 
809*6f9cba8fSJoseph Mingrone 		portid = portid_by_device(p->opt.device);
810*6f9cba8fSJoseph Mingrone 		if (portid == DPDK_PORTID_MAX){
811*6f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
812*6f9cba8fSJoseph Mingrone 			    "dpdk error: portid is invalid. device %s",
813*6f9cba8fSJoseph Mingrone 			    p->opt.device);
814*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR_NO_SUCH_DEVICE;
815*6f9cba8fSJoseph Mingrone 			break;
816*6f9cba8fSJoseph Mingrone 		}
817*6f9cba8fSJoseph Mingrone 
818*6f9cba8fSJoseph Mingrone 		pd->portid = portid;
819*6f9cba8fSJoseph Mingrone 
820*6f9cba8fSJoseph Mingrone 		if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
821*6f9cba8fSJoseph Mingrone 		{
822*6f9cba8fSJoseph Mingrone 			p->snapshot = MAXIMUM_SNAPLEN;
823*6f9cba8fSJoseph Mingrone 		}
824*6f9cba8fSJoseph Mingrone 		// create the mbuf pool
825*6f9cba8fSJoseph Mingrone 		pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs,
826*6f9cba8fSJoseph Mingrone 			MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
827*6f9cba8fSJoseph Mingrone 			rte_socket_id());
828*6f9cba8fSJoseph Mingrone 		if (pd->pktmbuf_pool == NULL)
829*6f9cba8fSJoseph Mingrone 		{
830*6f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
831*6f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, rte_errno,
832*6f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot init mbuf pool");
833*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
834*6f9cba8fSJoseph Mingrone 			break;
835*6f9cba8fSJoseph Mingrone 		}
836*6f9cba8fSJoseph Mingrone 		// config dev
837*6f9cba8fSJoseph Mingrone 		rte_eth_dev_info_get(portid, &dev_info);
838*6f9cba8fSJoseph Mingrone 		if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
839*6f9cba8fSJoseph Mingrone 		{
840*6f9cba8fSJoseph Mingrone 			local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE;
841*6f9cba8fSJoseph Mingrone 		}
842*6f9cba8fSJoseph Mingrone 		// only support 1 queue
843*6f9cba8fSJoseph Mingrone 		ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
844*6f9cba8fSJoseph Mingrone 		if (ret < 0)
845*6f9cba8fSJoseph Mingrone 		{
846*6f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
847*6f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
848*6f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot configure device: port=%u",
849*6f9cba8fSJoseph Mingrone 			    portid);
850*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
851*6f9cba8fSJoseph Mingrone 			break;
852*6f9cba8fSJoseph Mingrone 		}
853*6f9cba8fSJoseph Mingrone 		// adjust rx tx
854*6f9cba8fSJoseph Mingrone 		ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
855*6f9cba8fSJoseph Mingrone 		if (ret < 0)
856*6f9cba8fSJoseph Mingrone 		{
857*6f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
858*6f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
859*6f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot adjust number of descriptors: port=%u",
860*6f9cba8fSJoseph Mingrone 			    portid);
861*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
862*6f9cba8fSJoseph Mingrone 			break;
863*6f9cba8fSJoseph Mingrone 		}
864*6f9cba8fSJoseph Mingrone 		// get MAC addr
865*6f9cba8fSJoseph Mingrone 		rte_eth_macaddr_get(portid, &(pd->eth_addr));
866*6f9cba8fSJoseph Mingrone 		eth_addr_str(&(pd->eth_addr), pd->mac_addr, DPDK_MAC_ADDR_SIZE-1);
867*6f9cba8fSJoseph Mingrone 
868*6f9cba8fSJoseph Mingrone 		// init one RX queue
869*6f9cba8fSJoseph Mingrone 		rxq_conf = dev_info.default_rxconf;
870*6f9cba8fSJoseph Mingrone 		rxq_conf.offloads = local_port_conf.rxmode.offloads;
871*6f9cba8fSJoseph Mingrone 		ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
872*6f9cba8fSJoseph Mingrone 					     rte_eth_dev_socket_id(portid),
873*6f9cba8fSJoseph Mingrone 					     &rxq_conf,
874*6f9cba8fSJoseph Mingrone 					     pd->pktmbuf_pool);
875*6f9cba8fSJoseph Mingrone 		if (ret < 0)
876*6f9cba8fSJoseph Mingrone 		{
877*6f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
878*6f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
879*6f9cba8fSJoseph Mingrone 			    "dpdk error: rte_eth_rx_queue_setup:port=%u",
880*6f9cba8fSJoseph Mingrone 			    portid);
881*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
882*6f9cba8fSJoseph Mingrone 			break;
883*6f9cba8fSJoseph Mingrone 		}
884*6f9cba8fSJoseph Mingrone 
885*6f9cba8fSJoseph Mingrone 		// init one TX queue
886*6f9cba8fSJoseph Mingrone 		txq_conf = dev_info.default_txconf;
887*6f9cba8fSJoseph Mingrone 		txq_conf.offloads = local_port_conf.txmode.offloads;
888*6f9cba8fSJoseph Mingrone 		ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
889*6f9cba8fSJoseph Mingrone 				rte_eth_dev_socket_id(portid),
890*6f9cba8fSJoseph Mingrone 				&txq_conf);
891*6f9cba8fSJoseph Mingrone 		if (ret < 0)
892*6f9cba8fSJoseph Mingrone 		{
893*6f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
894*6f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
895*6f9cba8fSJoseph Mingrone 			    "dpdk error: rte_eth_tx_queue_setup:port=%u",
896*6f9cba8fSJoseph Mingrone 			    portid);
897*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
898*6f9cba8fSJoseph Mingrone 			break;
899*6f9cba8fSJoseph Mingrone 		}
900*6f9cba8fSJoseph Mingrone 		// Initialize TX buffers
901*6f9cba8fSJoseph Mingrone 		tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME,
902*6f9cba8fSJoseph Mingrone 				RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
903*6f9cba8fSJoseph Mingrone 				rte_eth_dev_socket_id(portid));
904*6f9cba8fSJoseph Mingrone 		if (tx_buffer == NULL)
905*6f9cba8fSJoseph Mingrone 		{
906*6f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
907*6f9cba8fSJoseph Mingrone 			    "dpdk error: Cannot allocate buffer for tx on port %u", portid);
908*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
909*6f9cba8fSJoseph Mingrone 			break;
910*6f9cba8fSJoseph Mingrone 		}
911*6f9cba8fSJoseph Mingrone 		rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST);
912*6f9cba8fSJoseph Mingrone 		// Start device
913*6f9cba8fSJoseph Mingrone 		ret = rte_eth_dev_start(portid);
914*6f9cba8fSJoseph Mingrone 		if (ret < 0)
915*6f9cba8fSJoseph Mingrone 		{
916*6f9cba8fSJoseph Mingrone 			dpdk_fmt_errmsg_for_rte_errno(p->errbuf,
917*6f9cba8fSJoseph Mingrone 			    PCAP_ERRBUF_SIZE, -ret,
918*6f9cba8fSJoseph Mingrone 			    "dpdk error: rte_eth_dev_start:port=%u",
919*6f9cba8fSJoseph Mingrone 			    portid);
920*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
921*6f9cba8fSJoseph Mingrone 			break;
922*6f9cba8fSJoseph Mingrone 		}
923*6f9cba8fSJoseph Mingrone 		// set promiscuous mode
924*6f9cba8fSJoseph Mingrone 		if (p->opt.promisc){
925*6f9cba8fSJoseph Mingrone 			pd->must_clear_promisc=1;
926*6f9cba8fSJoseph Mingrone 			rte_eth_promiscuous_enable(portid);
927*6f9cba8fSJoseph Mingrone 		}
928*6f9cba8fSJoseph Mingrone 		// check link status
929*6f9cba8fSJoseph Mingrone 		is_port_up = check_link_status(portid, &link);
930*6f9cba8fSJoseph Mingrone 		if (!is_port_up){
931*6f9cba8fSJoseph Mingrone 			snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
932*6f9cba8fSJoseph Mingrone 			    "dpdk error: link is down, port=%u",portid);
933*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR_IFACE_NOT_UP;
934*6f9cba8fSJoseph Mingrone 			break;
935*6f9cba8fSJoseph Mingrone 		}
936*6f9cba8fSJoseph Mingrone 		// reset statistics
937*6f9cba8fSJoseph Mingrone 		rte_eth_stats_reset(pd->portid);
938*6f9cba8fSJoseph Mingrone 		calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts));
939*6f9cba8fSJoseph Mingrone 		rte_eth_stats_get(pd->portid,&(pd->prev_stats));
940*6f9cba8fSJoseph Mingrone 		// format pcap_t
941*6f9cba8fSJoseph Mingrone 		pd->portid = portid;
942*6f9cba8fSJoseph Mingrone 		p->fd = pd->portid;
943*6f9cba8fSJoseph Mingrone 		if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN)
944*6f9cba8fSJoseph Mingrone 		{
945*6f9cba8fSJoseph Mingrone 			p->snapshot = MAXIMUM_SNAPLEN;
946*6f9cba8fSJoseph Mingrone 		}
947*6f9cba8fSJoseph Mingrone 		p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical.
948*6f9cba8fSJoseph Mingrone 		p->selectable_fd = p->fd;
949*6f9cba8fSJoseph Mingrone 		p->read_op = pcap_dpdk_dispatch;
950*6f9cba8fSJoseph Mingrone 		p->inject_op = pcap_dpdk_inject;
951*6f9cba8fSJoseph Mingrone 		// using pcap_filter currently, though DPDK provides their own BPF function. Because DPDK BPF needs load a ELF file as a filter.
952*6f9cba8fSJoseph Mingrone 		p->setfilter_op = install_bpf_program;
953*6f9cba8fSJoseph Mingrone 		p->setdirection_op = NULL;
954*6f9cba8fSJoseph Mingrone 		p->set_datalink_op = NULL;
955*6f9cba8fSJoseph Mingrone 		p->getnonblock_op = pcap_dpdk_getnonblock;
956*6f9cba8fSJoseph Mingrone 		p->setnonblock_op = pcap_dpdk_setnonblock;
957*6f9cba8fSJoseph Mingrone 		p->stats_op = pcap_dpdk_stats;
958*6f9cba8fSJoseph Mingrone 		p->cleanup_op = pcap_dpdk_close;
959*6f9cba8fSJoseph Mingrone 		p->breakloop_op = pcap_breakloop_common;
960*6f9cba8fSJoseph Mingrone 		// set default timeout
961*6f9cba8fSJoseph Mingrone 		pd->required_select_timeout.tv_sec = 0;
962*6f9cba8fSJoseph Mingrone 		pd->required_select_timeout.tv_usec = DPDK_DEF_MIN_SLEEP_MS*1000;
963*6f9cba8fSJoseph Mingrone 		p->required_select_timeout = &pd->required_select_timeout;
964*6f9cba8fSJoseph Mingrone 		ret = 0; // OK
965*6f9cba8fSJoseph Mingrone 	}while(0);
966*6f9cba8fSJoseph Mingrone 
967*6f9cba8fSJoseph Mingrone 	if (ret <= PCAP_ERROR) // all kinds of error code
968*6f9cba8fSJoseph Mingrone 	{
969*6f9cba8fSJoseph Mingrone 		pcap_cleanup_live_common(p);
970*6f9cba8fSJoseph Mingrone 	}else{
971*6f9cba8fSJoseph Mingrone 		rte_eth_dev_get_name_by_port(portid,pd->pci_addr);
972*6f9cba8fSJoseph Mingrone 		RTE_LOG(INFO, USER1,"Port %d device: %s, MAC:%s, PCI:%s\n", portid, p->opt.device, pd->mac_addr, pd->pci_addr);
973*6f9cba8fSJoseph Mingrone 		RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n",
974*6f9cba8fSJoseph Mingrone 							portid, link.link_speed,
975*6f9cba8fSJoseph Mingrone 					(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
976*6f9cba8fSJoseph Mingrone 						("full-duplex") : ("half-duplex\n"));
977*6f9cba8fSJoseph Mingrone 	}
978*6f9cba8fSJoseph Mingrone 	return ret;
979*6f9cba8fSJoseph Mingrone }
980*6f9cba8fSJoseph Mingrone 
981*6f9cba8fSJoseph Mingrone // device name for dpdk should be in the form as dpdk:number, such as dpdk:0
982*6f9cba8fSJoseph Mingrone pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours)
983*6f9cba8fSJoseph Mingrone {
984*6f9cba8fSJoseph Mingrone 	pcap_t *p=NULL;
985*6f9cba8fSJoseph Mingrone 	*is_ours = 0;
986*6f9cba8fSJoseph Mingrone 
987*6f9cba8fSJoseph Mingrone 	*is_ours = !strncmp(device, "dpdk:", 5);
988*6f9cba8fSJoseph Mingrone 	if (! *is_ours)
989*6f9cba8fSJoseph Mingrone 		return NULL;
990*6f9cba8fSJoseph Mingrone 	//memset will happen
991*6f9cba8fSJoseph Mingrone 	p = PCAP_CREATE_COMMON(ebuf, struct pcap_dpdk);
992*6f9cba8fSJoseph Mingrone 
993*6f9cba8fSJoseph Mingrone 	if (p == NULL)
994*6f9cba8fSJoseph Mingrone 		return NULL;
995*6f9cba8fSJoseph Mingrone 	p->activate_op = pcap_dpdk_activate;
996*6f9cba8fSJoseph Mingrone 	return p;
997*6f9cba8fSJoseph Mingrone }
998*6f9cba8fSJoseph Mingrone 
999*6f9cba8fSJoseph Mingrone int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp, char *ebuf)
1000*6f9cba8fSJoseph Mingrone {
1001*6f9cba8fSJoseph Mingrone 	int ret=0;
1002*6f9cba8fSJoseph Mingrone 	unsigned int nb_ports = 0;
1003*6f9cba8fSJoseph Mingrone 	char dpdk_name[DPDK_DEV_NAME_MAX];
1004*6f9cba8fSJoseph Mingrone 	char dpdk_desc[DPDK_DEV_DESC_MAX];
1005*6f9cba8fSJoseph Mingrone 	ETHER_ADDR_TYPE eth_addr;
1006*6f9cba8fSJoseph Mingrone 	char mac_addr[DPDK_MAC_ADDR_SIZE];
1007*6f9cba8fSJoseph Mingrone 	char pci_addr[DPDK_PCI_ADDR_SIZE];
1008*6f9cba8fSJoseph Mingrone 	do{
1009*6f9cba8fSJoseph Mingrone 		// init EAL; return "DPDK not available" if we
1010*6f9cba8fSJoseph Mingrone 		// have insufficient permission
1011*6f9cba8fSJoseph Mingrone 		char dpdk_pre_init_errbuf[PCAP_ERRBUF_SIZE];
1012*6f9cba8fSJoseph Mingrone 		ret = dpdk_pre_init(dpdk_pre_init_errbuf, 1);
1013*6f9cba8fSJoseph Mingrone 		if (ret < 0)
1014*6f9cba8fSJoseph Mingrone 		{
1015*6f9cba8fSJoseph Mingrone 			// This returns a negative value on an error.
1016*6f9cba8fSJoseph Mingrone 			snprintf(ebuf, PCAP_ERRBUF_SIZE,
1017*6f9cba8fSJoseph Mingrone 			    "Can't look for DPDK devices: %s",
1018*6f9cba8fSJoseph Mingrone 			    dpdk_pre_init_errbuf);
1019*6f9cba8fSJoseph Mingrone 			ret = PCAP_ERROR;
1020*6f9cba8fSJoseph Mingrone 			break;
1021*6f9cba8fSJoseph Mingrone 		}
1022*6f9cba8fSJoseph Mingrone 		if (ret == 0)
1023*6f9cba8fSJoseph Mingrone 		{
1024*6f9cba8fSJoseph Mingrone 			// This means DPDK isn't available on this machine.
1025*6f9cba8fSJoseph Mingrone 			// That just means "don't return any devices".
1026*6f9cba8fSJoseph Mingrone 			break;
1027*6f9cba8fSJoseph Mingrone 		}
1028*6f9cba8fSJoseph Mingrone 		nb_ports = rte_eth_dev_count_avail();
1029*6f9cba8fSJoseph Mingrone 		if (nb_ports == 0)
1030*6f9cba8fSJoseph Mingrone 		{
1031*6f9cba8fSJoseph Mingrone 			// That just means "don't return any devices".
1032*6f9cba8fSJoseph Mingrone 			ret = 0;
1033*6f9cba8fSJoseph Mingrone 			break;
1034*6f9cba8fSJoseph Mingrone 		}
1035*6f9cba8fSJoseph Mingrone 		for (unsigned int i=0; i<nb_ports; i++){
1036*6f9cba8fSJoseph Mingrone 			snprintf(dpdk_name, DPDK_DEV_NAME_MAX-1,
1037*6f9cba8fSJoseph Mingrone 			    "%s%u", DPDK_PREFIX, i);
1038*6f9cba8fSJoseph Mingrone 			// mac addr
1039*6f9cba8fSJoseph Mingrone 			rte_eth_macaddr_get(i, &eth_addr);
1040*6f9cba8fSJoseph Mingrone 			eth_addr_str(&eth_addr,mac_addr,DPDK_MAC_ADDR_SIZE);
1041*6f9cba8fSJoseph Mingrone 			// PCI addr
1042*6f9cba8fSJoseph Mingrone 			rte_eth_dev_get_name_by_port(i,pci_addr);
1043*6f9cba8fSJoseph Mingrone 			snprintf(dpdk_desc,DPDK_DEV_DESC_MAX-1,"%s %s, MAC:%s, PCI:%s", DPDK_DESC, dpdk_name, mac_addr, pci_addr);
1044*6f9cba8fSJoseph Mingrone 			if (add_dev(devlistp, dpdk_name, 0, dpdk_desc, ebuf)==NULL){
1045*6f9cba8fSJoseph Mingrone 				ret = PCAP_ERROR;
1046*6f9cba8fSJoseph Mingrone 				break;
1047*6f9cba8fSJoseph Mingrone 			}
1048*6f9cba8fSJoseph Mingrone 		}
1049*6f9cba8fSJoseph Mingrone 	}while(0);
1050*6f9cba8fSJoseph Mingrone 	return ret;
1051*6f9cba8fSJoseph Mingrone }
1052*6f9cba8fSJoseph Mingrone 
1053*6f9cba8fSJoseph Mingrone #ifdef DPDK_ONLY
1054*6f9cba8fSJoseph Mingrone /*
1055*6f9cba8fSJoseph Mingrone  * This libpcap build supports only DPDK, not regular network interfaces.
1056*6f9cba8fSJoseph Mingrone  */
1057*6f9cba8fSJoseph Mingrone 
1058*6f9cba8fSJoseph Mingrone /*
1059*6f9cba8fSJoseph Mingrone  * There are no regular interfaces, just DPDK interfaces.
1060*6f9cba8fSJoseph Mingrone  */
1061*6f9cba8fSJoseph Mingrone int
1062*6f9cba8fSJoseph Mingrone pcap_platform_finddevs(pcap_if_list_t *devlistp _U_, char *errbuf)
1063*6f9cba8fSJoseph Mingrone {
1064*6f9cba8fSJoseph Mingrone 	return (0);
1065*6f9cba8fSJoseph Mingrone }
1066*6f9cba8fSJoseph Mingrone 
1067*6f9cba8fSJoseph Mingrone /*
1068*6f9cba8fSJoseph Mingrone  * Attempts to open a regular interface fail.
1069*6f9cba8fSJoseph Mingrone  */
1070*6f9cba8fSJoseph Mingrone pcap_t *
1071*6f9cba8fSJoseph Mingrone pcap_create_interface(const char *device, char *errbuf)
1072*6f9cba8fSJoseph Mingrone {
1073*6f9cba8fSJoseph Mingrone 	snprintf(errbuf, PCAP_ERRBUF_SIZE,
1074*6f9cba8fSJoseph Mingrone 	    "This version of libpcap only supports DPDK");
1075*6f9cba8fSJoseph Mingrone 	return NULL;
1076*6f9cba8fSJoseph Mingrone }
1077*6f9cba8fSJoseph Mingrone 
1078*6f9cba8fSJoseph Mingrone /*
1079*6f9cba8fSJoseph Mingrone  * Libpcap version string.
1080*6f9cba8fSJoseph Mingrone  */
1081*6f9cba8fSJoseph Mingrone const char *
1082*6f9cba8fSJoseph Mingrone pcap_lib_version(void)
1083*6f9cba8fSJoseph Mingrone {
1084*6f9cba8fSJoseph Mingrone 	return (PCAP_VERSION_STRING " (DPDK-only)");
1085*6f9cba8fSJoseph Mingrone }
1086*6f9cba8fSJoseph Mingrone #endif
1087