xref: /dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c (revision 2b843cac232eb3f2fa79e4254e21766817e2019f)
1f1debd77SXiaolong Ye /* SPDX-License-Identifier: BSD-3-Clause
274b46340SCiara Loftus  * Copyright(c) 2019-2020 Intel Corporation.
3f1debd77SXiaolong Ye  */
4f1debd77SXiaolong Ye #include <unistd.h>
5f1debd77SXiaolong Ye #include <errno.h>
6f1debd77SXiaolong Ye #include <stdlib.h>
7f1debd77SXiaolong Ye #include <string.h>
8f1debd77SXiaolong Ye #include <netinet/in.h>
9f1debd77SXiaolong Ye #include <net/if.h>
107fc6ae50SShibin Koikkara Reeny #include <sys/un.h>
11f1debd77SXiaolong Ye #include <sys/socket.h>
12f1debd77SXiaolong Ye #include <sys/ioctl.h>
13f1debd77SXiaolong Ye #include <linux/if_ether.h>
14f1debd77SXiaolong Ye #include <linux/if_xdp.h>
15f1debd77SXiaolong Ye #include <linux/if_link.h>
16339b88c6SXiaolong Ye #include <linux/ethtool.h>
17339b88c6SXiaolong Ye #include <linux/sockios.h>
18f1debd77SXiaolong Ye #include "af_xdp_deps.h"
19f1debd77SXiaolong Ye 
20f1debd77SXiaolong Ye #include <rte_ethdev.h>
21df96fd0dSBruce Richardson #include <ethdev_driver.h>
22df96fd0dSBruce Richardson #include <ethdev_vdev.h>
23f1debd77SXiaolong Ye #include <rte_kvargs.h>
244851ef2bSDavid Marchand #include <bus_vdev_driver.h>
25f1debd77SXiaolong Ye #include <rte_string_fns.h>
26f1debd77SXiaolong Ye #include <rte_branch_prediction.h>
27f1debd77SXiaolong Ye #include <rte_common.h>
281acb7f54SDavid Marchand #include <dev_driver.h>
29f1debd77SXiaolong Ye #include <rte_eal.h>
30f1debd77SXiaolong Ye #include <rte_ether.h>
31f1debd77SXiaolong Ye #include <rte_lcore.h>
32f1debd77SXiaolong Ye #include <rte_log.h>
33f1debd77SXiaolong Ye #include <rte_memory.h>
34f1debd77SXiaolong Ye #include <rte_memzone.h>
35b79ae90cSCiara Loftus #include <rte_mempool.h>
36f1debd77SXiaolong Ye #include <rte_mbuf.h>
37f1debd77SXiaolong Ye #include <rte_malloc.h>
38f1debd77SXiaolong Ye #include <rte_ring.h>
3974b46340SCiara Loftus #include <rte_spinlock.h>
4043fb6eeaSAnatoly Burakov #include <rte_power_intrinsics.h>
4174b46340SCiara Loftus 
4274b46340SCiara Loftus #include "compat.h"
433d28387cSFrank Du #include "eal_filesystem.h"
4474b46340SCiara Loftus 
45055a3936SCiara Loftus #ifndef SO_PREFER_BUSY_POLL
46055a3936SCiara Loftus #define SO_PREFER_BUSY_POLL 69
47055a3936SCiara Loftus #endif
48055a3936SCiara Loftus #ifndef SO_BUSY_POLL_BUDGET
49055a3936SCiara Loftus #define SO_BUSY_POLL_BUDGET 70
50055a3936SCiara Loftus #endif
51055a3936SCiara Loftus 
52f1debd77SXiaolong Ye 
53f1debd77SXiaolong Ye #ifndef SOL_XDP
54f1debd77SXiaolong Ye #define SOL_XDP 283
55f1debd77SXiaolong Ye #endif
56f1debd77SXiaolong Ye 
57f1debd77SXiaolong Ye #ifndef AF_XDP
58f1debd77SXiaolong Ye #define AF_XDP 44
59f1debd77SXiaolong Ye #endif
60f1debd77SXiaolong Ye 
61f1debd77SXiaolong Ye #ifndef PF_XDP
62f1debd77SXiaolong Ye #define PF_XDP AF_XDP
63f1debd77SXiaolong Ye #endif
64f1debd77SXiaolong Ye 
65eeded204SDavid Marchand RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
66e99981afSDavid Marchand #define RTE_LOGTYPE_NET_AF_XDP af_xdp_logtype
67f1debd77SXiaolong Ye 
68*2b843cacSDavid Marchand #define AF_XDP_LOG_LINE(level, ...) \
69*2b843cacSDavid Marchand 	RTE_LOG_LINE_PREFIX(level, NET_AF_XDP, "%s(): ", __func__, __VA_ARGS__)
70f1debd77SXiaolong Ye 
713e912e39SXiaolong Ye #define ETH_AF_XDP_FRAME_SIZE		2048
72f1debd77SXiaolong Ye #define ETH_AF_XDP_NUM_BUFFERS		4096
73f1debd77SXiaolong Ye #define ETH_AF_XDP_DFLT_NUM_DESCS	XSK_RING_CONS__DEFAULT_NUM_DESCS
74339b88c6SXiaolong Ye #define ETH_AF_XDP_DFLT_START_QUEUE_IDX	0
75339b88c6SXiaolong Ye #define ETH_AF_XDP_DFLT_QUEUE_COUNT	1
76055a3936SCiara Loftus #define ETH_AF_XDP_DFLT_BUSY_BUDGET	64
77055a3936SCiara Loftus #define ETH_AF_XDP_DFLT_BUSY_TIMEOUT	20
78f1debd77SXiaolong Ye 
79d96394eaSCiara Loftus #define ETH_AF_XDP_RX_BATCH_SIZE	XSK_RING_CONS__DEFAULT_NUM_DESCS
80d96394eaSCiara Loftus #define ETH_AF_XDP_TX_BATCH_SIZE	XSK_RING_CONS__DEFAULT_NUM_DESCS
81f1debd77SXiaolong Ye 
82985e7673SCiara Loftus #define ETH_AF_XDP_ETH_OVERHEAD		(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN)
83f1debd77SXiaolong Ye 
849876cf83SCiara Loftus #define ETH_AF_XDP_MP_KEY "afxdp_mp_send_fds"
859876cf83SCiara Loftus 
869c132373SMaryam Tahhan #define DP_BASE_PATH			"/tmp/afxdp_dp"
879c132373SMaryam Tahhan #define DP_UDS_SOCK             "afxdp.sock"
888a324b1cSMaryam Tahhan #define DP_XSK_MAP				"xsks_map"
897fc6ae50SShibin Koikkara Reeny #define MAX_LONG_OPT_SZ			64
907fc6ae50SShibin Koikkara Reeny #define UDS_MAX_FD_NUM			2
917fc6ae50SShibin Koikkara Reeny #define UDS_MAX_CMD_LEN			64
927fc6ae50SShibin Koikkara Reeny #define UDS_MAX_CMD_RESP		128
937fc6ae50SShibin Koikkara Reeny #define UDS_XSK_MAP_FD_MSG		"/xsk_map_fd"
947fc6ae50SShibin Koikkara Reeny #define UDS_CONNECT_MSG			"/connect"
957fc6ae50SShibin Koikkara Reeny #define UDS_HOST_OK_MSG			"/host_ok"
967fc6ae50SShibin Koikkara Reeny #define UDS_HOST_NAK_MSG		"/host_nak"
977fc6ae50SShibin Koikkara Reeny #define UDS_VERSION_MSG			"/version"
987fc6ae50SShibin Koikkara Reeny #define UDS_XSK_MAP_FD_MSG		"/xsk_map_fd"
997fc6ae50SShibin Koikkara Reeny #define UDS_XSK_SOCKET_MSG		"/xsk_socket"
1007fc6ae50SShibin Koikkara Reeny #define UDS_FD_ACK_MSG			"/fd_ack"
1017fc6ae50SShibin Koikkara Reeny #define UDS_FD_NAK_MSG			"/fd_nak"
1027fc6ae50SShibin Koikkara Reeny #define UDS_FIN_MSG			"/fin"
1037fc6ae50SShibin Koikkara Reeny #define UDS_FIN_ACK_MSG			"/fin_ack"
1047fc6ae50SShibin Koikkara Reeny 
1059876cf83SCiara Loftus static int afxdp_dev_count;
1069876cf83SCiara Loftus 
1079876cf83SCiara Loftus /* Message header to synchronize fds via IPC */
1089876cf83SCiara Loftus struct ipc_hdr {
1099876cf83SCiara Loftus 	char port_name[RTE_DEV_NAME_MAX_LEN];
1109876cf83SCiara Loftus 	/* The file descriptors are in the dedicated part
1119876cf83SCiara Loftus 	 * of the Unix message to be translated by the kernel.
1129876cf83SCiara Loftus 	 */
1139876cf83SCiara Loftus };
1149876cf83SCiara Loftus 
115f1debd77SXiaolong Ye struct xsk_umem_info {
116f1debd77SXiaolong Ye 	struct xsk_umem *umem;
117f1debd77SXiaolong Ye 	struct rte_ring *buf_ring;
118f1debd77SXiaolong Ye 	const struct rte_memzone *mz;
119d8a21077SCiara Loftus 	struct rte_mempool *mb_pool;
120d8a21077SCiara Loftus 	void *buffer;
121e12a0166STyler Retzlaff 	RTE_ATOMIC(uint8_t) refcnt;
12274b46340SCiara Loftus 	uint32_t max_xsks;
123f1debd77SXiaolong Ye };
124f1debd77SXiaolong Ye 
125f1debd77SXiaolong Ye struct rx_stats {
126f1debd77SXiaolong Ye 	uint64_t rx_pkts;
127f1debd77SXiaolong Ye 	uint64_t rx_bytes;
1283577b1eeSCiara Loftus 	uint64_t imissed_offset;
129f1debd77SXiaolong Ye };
130f1debd77SXiaolong Ye 
131f1debd77SXiaolong Ye struct pkt_rx_queue {
132f1debd77SXiaolong Ye 	struct xsk_ring_cons rx;
133f1debd77SXiaolong Ye 	struct xsk_umem_info *umem;
134f1debd77SXiaolong Ye 	struct xsk_socket *xsk;
135f1debd77SXiaolong Ye 	struct rte_mempool *mb_pool;
1369bab1d26SCiara Loftus 	uint16_t port;
137f1debd77SXiaolong Ye 
138f1debd77SXiaolong Ye 	struct rx_stats stats;
139f1debd77SXiaolong Ye 
14074b46340SCiara Loftus 	struct xsk_ring_prod fq;
14174b46340SCiara Loftus 	struct xsk_ring_cons cq;
14274b46340SCiara Loftus 
143f1debd77SXiaolong Ye 	struct pkt_tx_queue *pair;
14445bba02cSXiaolong Ye 	struct pollfd fds[1];
145339b88c6SXiaolong Ye 	int xsk_queue_idx;
146055a3936SCiara Loftus 	int busy_budget;
147f1debd77SXiaolong Ye };
148f1debd77SXiaolong Ye 
149f1debd77SXiaolong Ye struct tx_stats {
150f1debd77SXiaolong Ye 	uint64_t tx_pkts;
151f1debd77SXiaolong Ye 	uint64_t tx_bytes;
152d8a21077SCiara Loftus 	uint64_t tx_dropped;
153f1debd77SXiaolong Ye };
154f1debd77SXiaolong Ye 
155f1debd77SXiaolong Ye struct pkt_tx_queue {
156f1debd77SXiaolong Ye 	struct xsk_ring_prod tx;
157d8a21077SCiara Loftus 	struct xsk_umem_info *umem;
158f1debd77SXiaolong Ye 
159f1debd77SXiaolong Ye 	struct tx_stats stats;
160f1debd77SXiaolong Ye 
161f1debd77SXiaolong Ye 	struct pkt_rx_queue *pair;
162339b88c6SXiaolong Ye 	int xsk_queue_idx;
163f1debd77SXiaolong Ye };
164f1debd77SXiaolong Ye 
165f1debd77SXiaolong Ye struct pmd_internals {
166f1debd77SXiaolong Ye 	int if_index;
167f1debd77SXiaolong Ye 	char if_name[IFNAMSIZ];
168339b88c6SXiaolong Ye 	int start_queue_idx;
169339b88c6SXiaolong Ye 	int queue_cnt;
170339b88c6SXiaolong Ye 	int max_queue_cnt;
171339b88c6SXiaolong Ye 	int combined_queue_cnt;
17274b46340SCiara Loftus 	bool shared_umem;
173288a85aeSCiara Loftus 	char prog_path[PATH_MAX];
174288a85aeSCiara Loftus 	bool custom_prog_configured;
175b275e298SXiaoyun Li 	bool force_copy;
1767fc6ae50SShibin Koikkara Reeny 	bool use_cni;
1778a324b1cSMaryam Tahhan 	bool use_pinned_map;
1789c132373SMaryam Tahhan 	char dp_path[PATH_MAX];
17901fa83c9SCiara Loftus 	struct bpf_map *map;
180339b88c6SXiaolong Ye 
1816d13ea8eSOlivier Matz 	struct rte_ether_addr eth_addr;
182f1debd77SXiaolong Ye 
183339b88c6SXiaolong Ye 	struct pkt_rx_queue *rx_queues;
184339b88c6SXiaolong Ye 	struct pkt_tx_queue *tx_queues;
185f1debd77SXiaolong Ye };
186f1debd77SXiaolong Ye 
1879876cf83SCiara Loftus struct pmd_process_private {
1889876cf83SCiara Loftus 	int rxq_xsk_fds[RTE_MAX_QUEUES_PER_PORT];
1899876cf83SCiara Loftus };
1909876cf83SCiara Loftus 
191f1debd77SXiaolong Ye #define ETH_AF_XDP_IFACE_ARG			"iface"
192339b88c6SXiaolong Ye #define ETH_AF_XDP_START_QUEUE_ARG		"start_queue"
193339b88c6SXiaolong Ye #define ETH_AF_XDP_QUEUE_COUNT_ARG		"queue_count"
19474b46340SCiara Loftus #define ETH_AF_XDP_SHARED_UMEM_ARG		"shared_umem"
195288a85aeSCiara Loftus #define ETH_AF_XDP_PROG_ARG			"xdp_prog"
196055a3936SCiara Loftus #define ETH_AF_XDP_BUDGET_ARG			"busy_budget"
197b275e298SXiaoyun Li #define ETH_AF_XDP_FORCE_COPY_ARG		"force_copy"
1987fc6ae50SShibin Koikkara Reeny #define ETH_AF_XDP_USE_CNI_ARG			"use_cni"
1998a324b1cSMaryam Tahhan #define ETH_AF_XDP_USE_PINNED_MAP_ARG	"use_pinned_map"
2009c132373SMaryam Tahhan #define ETH_AF_XDP_DP_PATH_ARG			"dp_path"
201f1debd77SXiaolong Ye 
202f1debd77SXiaolong Ye static const char * const valid_arguments[] = {
203f1debd77SXiaolong Ye 	ETH_AF_XDP_IFACE_ARG,
204339b88c6SXiaolong Ye 	ETH_AF_XDP_START_QUEUE_ARG,
205339b88c6SXiaolong Ye 	ETH_AF_XDP_QUEUE_COUNT_ARG,
20674b46340SCiara Loftus 	ETH_AF_XDP_SHARED_UMEM_ARG,
207288a85aeSCiara Loftus 	ETH_AF_XDP_PROG_ARG,
208055a3936SCiara Loftus 	ETH_AF_XDP_BUDGET_ARG,
209b275e298SXiaoyun Li 	ETH_AF_XDP_FORCE_COPY_ARG,
2107fc6ae50SShibin Koikkara Reeny 	ETH_AF_XDP_USE_CNI_ARG,
2118a324b1cSMaryam Tahhan 	ETH_AF_XDP_USE_PINNED_MAP_ARG,
2129c132373SMaryam Tahhan 	ETH_AF_XDP_DP_PATH_ARG,
213f1debd77SXiaolong Ye 	NULL
214f1debd77SXiaolong Ye };
215f1debd77SXiaolong Ye 
216f1debd77SXiaolong Ye static const struct rte_eth_link pmd_link = {
217295968d1SFerruh Yigit 	.link_speed = RTE_ETH_SPEED_NUM_10G,
218295968d1SFerruh Yigit 	.link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
219295968d1SFerruh Yigit 	.link_status = RTE_ETH_LINK_DOWN,
220295968d1SFerruh Yigit 	.link_autoneg = RTE_ETH_LINK_AUTONEG
221f1debd77SXiaolong Ye };
222f1debd77SXiaolong Ye 
22374b46340SCiara Loftus /* List which tracks PMDs to facilitate sharing UMEMs across them. */
22474b46340SCiara Loftus struct internal_list {
22574b46340SCiara Loftus 	TAILQ_ENTRY(internal_list) next;
22674b46340SCiara Loftus 	struct rte_eth_dev *eth_dev;
22774b46340SCiara Loftus };
22874b46340SCiara Loftus 
22974b46340SCiara Loftus TAILQ_HEAD(internal_list_head, internal_list);
23074b46340SCiara Loftus static struct internal_list_head internal_list =
23174b46340SCiara Loftus 	TAILQ_HEAD_INITIALIZER(internal_list);
23274b46340SCiara Loftus 
23374b46340SCiara Loftus static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
23474b46340SCiara Loftus 
235d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
236f1debd77SXiaolong Ye static inline int
237d8a21077SCiara Loftus reserve_fill_queue_zc(struct xsk_umem_info *umem, uint16_t reserve_size,
23874b46340SCiara Loftus 		      struct rte_mbuf **bufs, struct xsk_ring_prod *fq)
239d8a21077SCiara Loftus {
240d8a21077SCiara Loftus 	uint32_t idx;
241d8a21077SCiara Loftus 	uint16_t i;
242d8a21077SCiara Loftus 
243d8a21077SCiara Loftus 	if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
244d8a21077SCiara Loftus 		for (i = 0; i < reserve_size; i++)
245d8a21077SCiara Loftus 			rte_pktmbuf_free(bufs[i]);
246e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Failed to reserve enough fq descs.");
247d8a21077SCiara Loftus 		return -1;
248d8a21077SCiara Loftus 	}
249d8a21077SCiara Loftus 
250d8a21077SCiara Loftus 	for (i = 0; i < reserve_size; i++) {
251d8a21077SCiara Loftus 		__u64 *fq_addr;
252d8a21077SCiara Loftus 		uint64_t addr;
253d8a21077SCiara Loftus 
254d8a21077SCiara Loftus 		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
25596d8ae99SCiara Loftus 		addr = (uint64_t)bufs[i] - (uint64_t)umem->buffer -
25696d8ae99SCiara Loftus 				umem->mb_pool->header_size;
257d8a21077SCiara Loftus 		*fq_addr = addr;
258d8a21077SCiara Loftus 	}
259d8a21077SCiara Loftus 
260d8a21077SCiara Loftus 	xsk_ring_prod__submit(fq, reserve_size);
261d8a21077SCiara Loftus 
262d8a21077SCiara Loftus 	return 0;
263d8a21077SCiara Loftus }
264d8a21077SCiara Loftus #else
265d8a21077SCiara Loftus static inline int
266d8a21077SCiara Loftus reserve_fill_queue_cp(struct xsk_umem_info *umem, uint16_t reserve_size,
26774b46340SCiara Loftus 		      struct rte_mbuf **bufs __rte_unused,
26874b46340SCiara Loftus 		      struct xsk_ring_prod *fq)
269f1debd77SXiaolong Ye {
27010edf857SXiaolong Ye 	void *addrs[reserve_size];
271f1debd77SXiaolong Ye 	uint32_t idx;
27210edf857SXiaolong Ye 	uint16_t i;
273f1debd77SXiaolong Ye 
27410edf857SXiaolong Ye 	if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, NULL)
27510edf857SXiaolong Ye 		    != reserve_size) {
276e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Failed to get enough buffers for fq.");
27710edf857SXiaolong Ye 		return -1;
27810edf857SXiaolong Ye 	}
27910edf857SXiaolong Ye 
28010edf857SXiaolong Ye 	if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
281e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Failed to reserve enough fq descs.");
28210edf857SXiaolong Ye 		rte_ring_enqueue_bulk(umem->buf_ring, addrs,
28310edf857SXiaolong Ye 				reserve_size, NULL);
28410edf857SXiaolong Ye 		return -1;
285f1debd77SXiaolong Ye 	}
286f1debd77SXiaolong Ye 
287f1debd77SXiaolong Ye 	for (i = 0; i < reserve_size; i++) {
288f1debd77SXiaolong Ye 		__u64 *fq_addr;
28910edf857SXiaolong Ye 
290f1debd77SXiaolong Ye 		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
29110edf857SXiaolong Ye 		*fq_addr = (uint64_t)addrs[i];
292f1debd77SXiaolong Ye 	}
293f1debd77SXiaolong Ye 
29410edf857SXiaolong Ye 	xsk_ring_prod__submit(fq, reserve_size);
295f1debd77SXiaolong Ye 
296f1debd77SXiaolong Ye 	return 0;
297f1debd77SXiaolong Ye }
298d8a21077SCiara Loftus #endif
299f1debd77SXiaolong Ye 
300d8a21077SCiara Loftus static inline int
301d8a21077SCiara Loftus reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size,
30274b46340SCiara Loftus 		   struct rte_mbuf **bufs, struct xsk_ring_prod *fq)
303e9ff8bb7SXiaolong Ye {
304d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
30574b46340SCiara Loftus 	return reserve_fill_queue_zc(umem, reserve_size, bufs, fq);
306d8a21077SCiara Loftus #else
30774b46340SCiara Loftus 	return reserve_fill_queue_cp(umem, reserve_size, bufs, fq);
308d8a21077SCiara Loftus #endif
309e9ff8bb7SXiaolong Ye }
310e9ff8bb7SXiaolong Ye 
311d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
312f1debd77SXiaolong Ye static uint16_t
313d8a21077SCiara Loftus af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
314d8a21077SCiara Loftus {
315d8a21077SCiara Loftus 	struct pkt_rx_queue *rxq = queue;
316d8a21077SCiara Loftus 	struct xsk_ring_cons *rx = &rxq->rx;
31774b46340SCiara Loftus 	struct xsk_ring_prod *fq = &rxq->fq;
318d8a21077SCiara Loftus 	struct xsk_umem_info *umem = rxq->umem;
319d8a21077SCiara Loftus 	uint32_t idx_rx = 0;
320d8a21077SCiara Loftus 	unsigned long rx_bytes = 0;
321543e64d2SRongQing Li 	int i;
322d8a21077SCiara Loftus 	struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
323f294405aSCiara Loftus 	struct rte_eth_dev *dev = &rte_eth_devices[rxq->port];
324d8a21077SCiara Loftus 
325543e64d2SRongQing Li 	nb_pkts = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
326d8a21077SCiara Loftus 
327543e64d2SRongQing Li 	if (nb_pkts == 0) {
32889233c08SCiara Loftus 		/* we can assume a kernel >= 5.11 is in use if busy polling is
32989233c08SCiara Loftus 		 * enabled and thus we can safely use the recvfrom() syscall
33089233c08SCiara Loftus 		 * which is only supported for AF_XDP sockets in kernels >=
33189233c08SCiara Loftus 		 * 5.11.
33289233c08SCiara Loftus 		 */
33389233c08SCiara Loftus 		if (rxq->busy_budget) {
33489233c08SCiara Loftus 			(void)recvfrom(xsk_socket__fd(rxq->xsk), NULL, 0,
33589233c08SCiara Loftus 				       MSG_DONTWAIT, NULL, NULL);
33689233c08SCiara Loftus 		} else if (xsk_ring_prod__needs_wakeup(fq)) {
33789233c08SCiara Loftus 			(void)poll(&rxq->fds[0], 1, 1000);
33889233c08SCiara Loftus 		}
33989233c08SCiara Loftus 
340543e64d2SRongQing Li 		return 0;
341d8a21077SCiara Loftus 	}
342d8a21077SCiara Loftus 
343543e64d2SRongQing Li 	/* allocate bufs for fill queue replenishment after rx */
344543e64d2SRongQing Li 	if (rte_pktmbuf_alloc_bulk(umem->mb_pool, fq_bufs, nb_pkts)) {
345e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG,
346e99981afSDavid Marchand 			"Failed to get enough buffers for fq.");
347543e64d2SRongQing Li 		/* rollback cached_cons which is added by
348543e64d2SRongQing Li 		 * xsk_ring_cons__peek
349543e64d2SRongQing Li 		 */
350543e64d2SRongQing Li 		rx->cached_cons -= nb_pkts;
351f294405aSCiara Loftus 		dev->data->rx_mbuf_alloc_failed += nb_pkts;
352f294405aSCiara Loftus 
353543e64d2SRongQing Li 		return 0;
354543e64d2SRongQing Li 	}
355543e64d2SRongQing Li 
356543e64d2SRongQing Li 	for (i = 0; i < nb_pkts; i++) {
357d8a21077SCiara Loftus 		const struct xdp_desc *desc;
358d8a21077SCiara Loftus 		uint64_t addr;
359d8a21077SCiara Loftus 		uint32_t len;
360d8a21077SCiara Loftus 		uint64_t offset;
361d8a21077SCiara Loftus 
362d8a21077SCiara Loftus 		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
363d8a21077SCiara Loftus 		addr = desc->addr;
364d8a21077SCiara Loftus 		len = desc->len;
365d8a21077SCiara Loftus 
366d8a21077SCiara Loftus 		offset = xsk_umem__extract_offset(addr);
367d8a21077SCiara Loftus 		addr = xsk_umem__extract_addr(addr);
368d8a21077SCiara Loftus 
369d8a21077SCiara Loftus 		bufs[i] = (struct rte_mbuf *)
37096d8ae99SCiara Loftus 				xsk_umem__get_data(umem->buffer, addr +
37196d8ae99SCiara Loftus 					umem->mb_pool->header_size);
37296d8ae99SCiara Loftus 		bufs[i]->data_off = offset - sizeof(struct rte_mbuf) -
37396d8ae99SCiara Loftus 			rte_pktmbuf_priv_size(umem->mb_pool) -
37496d8ae99SCiara Loftus 			umem->mb_pool->header_size;
3759bab1d26SCiara Loftus 		bufs[i]->port = rxq->port;
376d8a21077SCiara Loftus 
377d8a21077SCiara Loftus 		rte_pktmbuf_pkt_len(bufs[i]) = len;
378d8a21077SCiara Loftus 		rte_pktmbuf_data_len(bufs[i]) = len;
379d8a21077SCiara Loftus 		rx_bytes += len;
380d8a21077SCiara Loftus 	}
381d8a21077SCiara Loftus 
382543e64d2SRongQing Li 	xsk_ring_cons__release(rx, nb_pkts);
383543e64d2SRongQing Li 	(void)reserve_fill_queue(umem, nb_pkts, fq_bufs, fq);
384d8a21077SCiara Loftus 
385d8a21077SCiara Loftus 	/* statistics */
386543e64d2SRongQing Li 	rxq->stats.rx_pkts += nb_pkts;
387d8a21077SCiara Loftus 	rxq->stats.rx_bytes += rx_bytes;
388d8a21077SCiara Loftus 
389543e64d2SRongQing Li 	return nb_pkts;
390d8a21077SCiara Loftus }
391d8a21077SCiara Loftus #else
392d8a21077SCiara Loftus static uint16_t
393d8a21077SCiara Loftus af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
394f1debd77SXiaolong Ye {
395f1debd77SXiaolong Ye 	struct pkt_rx_queue *rxq = queue;
396f1debd77SXiaolong Ye 	struct xsk_ring_cons *rx = &rxq->rx;
397f1debd77SXiaolong Ye 	struct xsk_umem_info *umem = rxq->umem;
39874b46340SCiara Loftus 	struct xsk_ring_prod *fq = &rxq->fq;
399f1debd77SXiaolong Ye 	uint32_t idx_rx = 0;
400f1debd77SXiaolong Ye 	unsigned long rx_bytes = 0;
401543e64d2SRongQing Li 	int i;
402d8a21077SCiara Loftus 	uint32_t free_thresh = fq->size >> 1;
403d8a21077SCiara Loftus 	struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
404f294405aSCiara Loftus 	struct rte_eth_dev *dev = &rte_eth_devices[rxq->port];
405f1debd77SXiaolong Ye 
406ab7ed23aSRongQing Li 	if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh)
407d96394eaSCiara Loftus 		(void)reserve_fill_queue(umem, nb_pkts, NULL, fq);
408ab7ed23aSRongQing Li 
409543e64d2SRongQing Li 	nb_pkts = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
410543e64d2SRongQing Li 	if (nb_pkts == 0) {
41145bba02cSXiaolong Ye #if defined(XDP_USE_NEED_WAKEUP)
41245bba02cSXiaolong Ye 		if (xsk_ring_prod__needs_wakeup(fq))
413b42cffabSCiara Loftus 			(void)poll(rxq->fds, 1, 1000);
41445bba02cSXiaolong Ye #endif
415543e64d2SRongQing Li 		return 0;
41645bba02cSXiaolong Ye 	}
417f1debd77SXiaolong Ye 
418543e64d2SRongQing Li 	if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts))) {
419543e64d2SRongQing Li 		/* rollback cached_cons which is added by
420543e64d2SRongQing Li 		 * xsk_ring_cons__peek
421543e64d2SRongQing Li 		 */
422543e64d2SRongQing Li 		rx->cached_cons -= nb_pkts;
423f294405aSCiara Loftus 		dev->data->rx_mbuf_alloc_failed += nb_pkts;
424543e64d2SRongQing Li 		return 0;
425543e64d2SRongQing Li 	}
426543e64d2SRongQing Li 
427543e64d2SRongQing Li 	for (i = 0; i < nb_pkts; i++) {
428f1debd77SXiaolong Ye 		const struct xdp_desc *desc;
429f1debd77SXiaolong Ye 		uint64_t addr;
430f1debd77SXiaolong Ye 		uint32_t len;
431f1debd77SXiaolong Ye 		void *pkt;
432f1debd77SXiaolong Ye 
433f1debd77SXiaolong Ye 		desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
434f1debd77SXiaolong Ye 		addr = desc->addr;
435f1debd77SXiaolong Ye 		len = desc->len;
436f1debd77SXiaolong Ye 		pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
437f1debd77SXiaolong Ye 
438d8a21077SCiara Loftus 		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
439e9ff8bb7SXiaolong Ye 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
440f1debd77SXiaolong Ye 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
441f1debd77SXiaolong Ye 		rte_pktmbuf_data_len(mbufs[i]) = len;
442f1debd77SXiaolong Ye 		rx_bytes += len;
44310edf857SXiaolong Ye 		bufs[i] = mbufs[i];
4449bab1d26SCiara Loftus 		bufs[i]->port = rxq->port;
445f1debd77SXiaolong Ye 	}
446f1debd77SXiaolong Ye 
447543e64d2SRongQing Li 	xsk_ring_cons__release(rx, nb_pkts);
448f1debd77SXiaolong Ye 
449f1debd77SXiaolong Ye 	/* statistics */
450543e64d2SRongQing Li 	rxq->stats.rx_pkts += nb_pkts;
451f1debd77SXiaolong Ye 	rxq->stats.rx_bytes += rx_bytes;
452f1debd77SXiaolong Ye 
453543e64d2SRongQing Li 	return nb_pkts;
454f1debd77SXiaolong Ye }
455d8a21077SCiara Loftus #endif
456d8a21077SCiara Loftus 
457d8a21077SCiara Loftus static uint16_t
458d96394eaSCiara Loftus af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
459d8a21077SCiara Loftus {
460d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
461d8a21077SCiara Loftus 	return af_xdp_rx_zc(queue, bufs, nb_pkts);
462d8a21077SCiara Loftus #else
463d8a21077SCiara Loftus 	return af_xdp_rx_cp(queue, bufs, nb_pkts);
464d8a21077SCiara Loftus #endif
465d8a21077SCiara Loftus }
466f1debd77SXiaolong Ye 
467d96394eaSCiara Loftus static uint16_t
468d96394eaSCiara Loftus eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
469d96394eaSCiara Loftus {
470d96394eaSCiara Loftus 	uint16_t nb_rx;
471d96394eaSCiara Loftus 
472d96394eaSCiara Loftus 	if (likely(nb_pkts <= ETH_AF_XDP_RX_BATCH_SIZE))
473d96394eaSCiara Loftus 		return af_xdp_rx(queue, bufs, nb_pkts);
474d96394eaSCiara Loftus 
475d96394eaSCiara Loftus 	/* Split larger batch into smaller batches of size
476d96394eaSCiara Loftus 	 * ETH_AF_XDP_RX_BATCH_SIZE or less.
477d96394eaSCiara Loftus 	 */
478d96394eaSCiara Loftus 	nb_rx = 0;
479d96394eaSCiara Loftus 	while (nb_pkts) {
480d96394eaSCiara Loftus 		uint16_t ret, n;
481d96394eaSCiara Loftus 
482d96394eaSCiara Loftus 		n = (uint16_t)RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
483d96394eaSCiara Loftus 		ret = af_xdp_rx(queue, &bufs[nb_rx], n);
484d96394eaSCiara Loftus 		nb_rx = (uint16_t)(nb_rx + ret);
485d96394eaSCiara Loftus 		nb_pkts = (uint16_t)(nb_pkts - ret);
486d96394eaSCiara Loftus 		if (ret < n)
487d96394eaSCiara Loftus 			break;
488d96394eaSCiara Loftus 	}
489d96394eaSCiara Loftus 
490d96394eaSCiara Loftus 	return nb_rx;
491d96394eaSCiara Loftus }
492d96394eaSCiara Loftus 
493f1debd77SXiaolong Ye static void
49474b46340SCiara Loftus pull_umem_cq(struct xsk_umem_info *umem, int size, struct xsk_ring_cons *cq)
495f1debd77SXiaolong Ye {
496f1debd77SXiaolong Ye 	size_t i, n;
497f1debd77SXiaolong Ye 	uint32_t idx_cq = 0;
498f1debd77SXiaolong Ye 
499f1debd77SXiaolong Ye 	n = xsk_ring_cons__peek(cq, size, &idx_cq);
500f1debd77SXiaolong Ye 
501f1debd77SXiaolong Ye 	for (i = 0; i < n; i++) {
502f1debd77SXiaolong Ye 		uint64_t addr;
503f1debd77SXiaolong Ye 		addr = *xsk_ring_cons__comp_addr(cq, idx_cq++);
504d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
505d8a21077SCiara Loftus 		addr = xsk_umem__extract_addr(addr);
506d8a21077SCiara Loftus 		rte_pktmbuf_free((struct rte_mbuf *)
50796d8ae99SCiara Loftus 					xsk_umem__get_data(umem->buffer,
50896d8ae99SCiara Loftus 					addr + umem->mb_pool->header_size));
509d8a21077SCiara Loftus #else
510f1debd77SXiaolong Ye 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
511d8a21077SCiara Loftus #endif
512f1debd77SXiaolong Ye 	}
513f1debd77SXiaolong Ye 
514f1debd77SXiaolong Ye 	xsk_ring_cons__release(cq, n);
515f1debd77SXiaolong Ye }
516f1debd77SXiaolong Ye 
517f1debd77SXiaolong Ye static void
51874b46340SCiara Loftus kick_tx(struct pkt_tx_queue *txq, struct xsk_ring_cons *cq)
519f1debd77SXiaolong Ye {
520d8a21077SCiara Loftus 	struct xsk_umem_info *umem = txq->umem;
521f1debd77SXiaolong Ye 
52274b46340SCiara Loftus 	pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);
523a63d4fcaSCiara Loftus 
5242aa51cddSCiara Loftus 	if (tx_syscall_needed(&txq->tx))
525f1debd77SXiaolong Ye 		while (send(xsk_socket__fd(txq->pair->xsk), NULL,
526f1debd77SXiaolong Ye 			    0, MSG_DONTWAIT) < 0) {
527f1debd77SXiaolong Ye 			/* some thing unexpected */
528f1debd77SXiaolong Ye 			if (errno != EBUSY && errno != EAGAIN && errno != EINTR)
529f1debd77SXiaolong Ye 				break;
530f1debd77SXiaolong Ye 
531f1debd77SXiaolong Ye 			/* pull from completion queue to leave more space */
532f1debd77SXiaolong Ye 			if (errno == EAGAIN)
533a63d4fcaSCiara Loftus 				pull_umem_cq(umem,
53474b46340SCiara Loftus 					     XSK_RING_CONS__DEFAULT_NUM_DESCS,
53574b46340SCiara Loftus 					     cq);
536f1debd77SXiaolong Ye 		}
537f1debd77SXiaolong Ye }
538f1debd77SXiaolong Ye 
539d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
540f1debd77SXiaolong Ye static uint16_t
541d8a21077SCiara Loftus af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
542f1debd77SXiaolong Ye {
543f1debd77SXiaolong Ye 	struct pkt_tx_queue *txq = queue;
544d8a21077SCiara Loftus 	struct xsk_umem_info *umem = txq->umem;
545f1debd77SXiaolong Ye 	struct rte_mbuf *mbuf;
546d8a21077SCiara Loftus 	unsigned long tx_bytes = 0;
547d8a21077SCiara Loftus 	int i;
548d8a21077SCiara Loftus 	uint32_t idx_tx;
549d8a21077SCiara Loftus 	uint16_t count = 0;
550d8a21077SCiara Loftus 	struct xdp_desc *desc;
551d8a21077SCiara Loftus 	uint64_t addr, offset;
55274b46340SCiara Loftus 	struct xsk_ring_cons *cq = &txq->pair->cq;
55374b46340SCiara Loftus 	uint32_t free_thresh = cq->size >> 1;
554d8a21077SCiara Loftus 
55574b46340SCiara Loftus 	if (xsk_cons_nb_avail(cq, free_thresh) >= free_thresh)
55674b46340SCiara Loftus 		pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);
557d8a21077SCiara Loftus 
558d8a21077SCiara Loftus 	for (i = 0; i < nb_pkts; i++) {
559d8a21077SCiara Loftus 		mbuf = bufs[i];
560d8a21077SCiara Loftus 
561d8a21077SCiara Loftus 		if (mbuf->pool == umem->mb_pool) {
562d8a21077SCiara Loftus 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
56374b46340SCiara Loftus 				kick_tx(txq, cq);
564a63d4fcaSCiara Loftus 				if (!xsk_ring_prod__reserve(&txq->tx, 1,
565a63d4fcaSCiara Loftus 							    &idx_tx))
566d8a21077SCiara Loftus 					goto out;
567d8a21077SCiara Loftus 			}
568d8a21077SCiara Loftus 			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
569d8a21077SCiara Loftus 			desc->len = mbuf->pkt_len;
57096d8ae99SCiara Loftus 			addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
57196d8ae99SCiara Loftus 					umem->mb_pool->header_size;
572d8a21077SCiara Loftus 			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
57396d8ae99SCiara Loftus 					(uint64_t)mbuf +
57496d8ae99SCiara Loftus 					umem->mb_pool->header_size;
575d8a21077SCiara Loftus 			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
576d8a21077SCiara Loftus 			desc->addr = addr | offset;
577d8a21077SCiara Loftus 			count++;
578d8a21077SCiara Loftus 		} else {
579d8a21077SCiara Loftus 			struct rte_mbuf *local_mbuf =
580d8a21077SCiara Loftus 					rte_pktmbuf_alloc(umem->mb_pool);
581d8a21077SCiara Loftus 			void *pkt;
582d8a21077SCiara Loftus 
583d8a21077SCiara Loftus 			if (local_mbuf == NULL)
584d8a21077SCiara Loftus 				goto out;
585d8a21077SCiara Loftus 
586d8a21077SCiara Loftus 			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
587d8a21077SCiara Loftus 				rte_pktmbuf_free(local_mbuf);
588d8a21077SCiara Loftus 				goto out;
589d8a21077SCiara Loftus 			}
590d8a21077SCiara Loftus 
591d8a21077SCiara Loftus 			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
592d8a21077SCiara Loftus 			desc->len = mbuf->pkt_len;
593d8a21077SCiara Loftus 
59496d8ae99SCiara Loftus 			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer -
59596d8ae99SCiara Loftus 					umem->mb_pool->header_size;
596d8a21077SCiara Loftus 			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
59796d8ae99SCiara Loftus 					(uint64_t)local_mbuf +
59896d8ae99SCiara Loftus 					umem->mb_pool->header_size;
599d8a21077SCiara Loftus 			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
600d8a21077SCiara Loftus 			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
601d8a21077SCiara Loftus 			desc->addr = addr | offset;
602d8a21077SCiara Loftus 			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
603d8a21077SCiara Loftus 					desc->len);
604d8a21077SCiara Loftus 			rte_pktmbuf_free(mbuf);
605d8a21077SCiara Loftus 			count++;
606d8a21077SCiara Loftus 		}
607d8a21077SCiara Loftus 
608d8a21077SCiara Loftus 		tx_bytes += mbuf->pkt_len;
609d8a21077SCiara Loftus 	}
610d8a21077SCiara Loftus 
611d8a21077SCiara Loftus out:
612d8a21077SCiara Loftus 	xsk_ring_prod__submit(&txq->tx, count);
6130f2ffb49SBaruch Siach 	kick_tx(txq, cq);
614d8a21077SCiara Loftus 
615d8a21077SCiara Loftus 	txq->stats.tx_pkts += count;
616d8a21077SCiara Loftus 	txq->stats.tx_bytes += tx_bytes;
617d8a21077SCiara Loftus 	txq->stats.tx_dropped += nb_pkts - count;
618d8a21077SCiara Loftus 
619d8a21077SCiara Loftus 	return count;
620d8a21077SCiara Loftus }
621d8a21077SCiara Loftus #else
622d8a21077SCiara Loftus static uint16_t
623d8a21077SCiara Loftus af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
624d8a21077SCiara Loftus {
625d8a21077SCiara Loftus 	struct pkt_tx_queue *txq = queue;
626d8a21077SCiara Loftus 	struct xsk_umem_info *umem = txq->umem;
627d8a21077SCiara Loftus 	struct rte_mbuf *mbuf;
628f1debd77SXiaolong Ye 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
629f1debd77SXiaolong Ye 	unsigned long tx_bytes = 0;
63010edf857SXiaolong Ye 	int i;
631f1debd77SXiaolong Ye 	uint32_t idx_tx;
63274b46340SCiara Loftus 	struct xsk_ring_cons *cq = &txq->pair->cq;
633f1debd77SXiaolong Ye 
63474b46340SCiara Loftus 	pull_umem_cq(umem, nb_pkts, cq);
635f1debd77SXiaolong Ye 
636f1debd77SXiaolong Ye 	nb_pkts = rte_ring_dequeue_bulk(umem->buf_ring, addrs,
637f1debd77SXiaolong Ye 					nb_pkts, NULL);
638f1debd77SXiaolong Ye 	if (nb_pkts == 0)
639f1debd77SXiaolong Ye 		return 0;
640f1debd77SXiaolong Ye 
641f1debd77SXiaolong Ye 	if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) {
64274b46340SCiara Loftus 		kick_tx(txq, cq);
643f57ea89cSXiaolong Ye 		rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL);
644f1debd77SXiaolong Ye 		return 0;
645f1debd77SXiaolong Ye 	}
646f1debd77SXiaolong Ye 
647f1debd77SXiaolong Ye 	for (i = 0; i < nb_pkts; i++) {
648f1debd77SXiaolong Ye 		struct xdp_desc *desc;
649f1debd77SXiaolong Ye 		void *pkt;
65010edf857SXiaolong Ye 
651f1debd77SXiaolong Ye 		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
652f1debd77SXiaolong Ye 		mbuf = bufs[i];
653f1debd77SXiaolong Ye 		desc->len = mbuf->pkt_len;
654e9ff8bb7SXiaolong Ye 
655e9ff8bb7SXiaolong Ye 		desc->addr = (uint64_t)addrs[i];
656f1debd77SXiaolong Ye 		pkt = xsk_umem__get_data(umem->mz->addr,
657f1debd77SXiaolong Ye 					 desc->addr);
658d8a21077SCiara Loftus 		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
659f1debd77SXiaolong Ye 		tx_bytes += mbuf->pkt_len;
660d8a21077SCiara Loftus 		rte_pktmbuf_free(mbuf);
661f1debd77SXiaolong Ye 	}
662f1debd77SXiaolong Ye 
663f1debd77SXiaolong Ye 	xsk_ring_prod__submit(&txq->tx, nb_pkts);
664f1debd77SXiaolong Ye 
66574b46340SCiara Loftus 	kick_tx(txq, cq);
666f1debd77SXiaolong Ye 
66710edf857SXiaolong Ye 	txq->stats.tx_pkts += nb_pkts;
668f1debd77SXiaolong Ye 	txq->stats.tx_bytes += tx_bytes;
669f1debd77SXiaolong Ye 
670f1debd77SXiaolong Ye 	return nb_pkts;
671f1debd77SXiaolong Ye }
672d96394eaSCiara Loftus 
673d96394eaSCiara Loftus static uint16_t
674d96394eaSCiara Loftus af_xdp_tx_cp_batch(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
675d96394eaSCiara Loftus {
676d96394eaSCiara Loftus 	uint16_t nb_tx;
677d96394eaSCiara Loftus 
678d96394eaSCiara Loftus 	if (likely(nb_pkts <= ETH_AF_XDP_TX_BATCH_SIZE))
679d96394eaSCiara Loftus 		return af_xdp_tx_cp(queue, bufs, nb_pkts);
680d96394eaSCiara Loftus 
681d96394eaSCiara Loftus 	nb_tx = 0;
682d96394eaSCiara Loftus 	while (nb_pkts) {
683d96394eaSCiara Loftus 		uint16_t ret, n;
684d96394eaSCiara Loftus 
685d96394eaSCiara Loftus 		/* Split larger batch into smaller batches of size
686d96394eaSCiara Loftus 		 * ETH_AF_XDP_TX_BATCH_SIZE or less.
687d96394eaSCiara Loftus 		 */
688d96394eaSCiara Loftus 		n = (uint16_t)RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
689d96394eaSCiara Loftus 		ret = af_xdp_tx_cp(queue, &bufs[nb_tx], n);
690d96394eaSCiara Loftus 		nb_tx = (uint16_t)(nb_tx + ret);
691d96394eaSCiara Loftus 		nb_pkts = (uint16_t)(nb_pkts - ret);
692d96394eaSCiara Loftus 		if (ret < n)
693d96394eaSCiara Loftus 			break;
694d96394eaSCiara Loftus 	}
695d96394eaSCiara Loftus 
696d96394eaSCiara Loftus 	return nb_tx;
697d96394eaSCiara Loftus }
698d8a21077SCiara Loftus #endif
699d8a21077SCiara Loftus 
700d8a21077SCiara Loftus static uint16_t
701d8a21077SCiara Loftus eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
702d8a21077SCiara Loftus {
703d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
704d8a21077SCiara Loftus 	return af_xdp_tx_zc(queue, bufs, nb_pkts);
705d8a21077SCiara Loftus #else
706d96394eaSCiara Loftus 	return af_xdp_tx_cp_batch(queue, bufs, nb_pkts);
707d8a21077SCiara Loftus #endif
708d8a21077SCiara Loftus }
709f1debd77SXiaolong Ye 
710f1debd77SXiaolong Ye static int
711f1debd77SXiaolong Ye eth_dev_start(struct rte_eth_dev *dev)
712f1debd77SXiaolong Ye {
7139a204f7eSJie Hai 	uint16_t i;
7149a204f7eSJie Hai 
715295968d1SFerruh Yigit 	dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
7169a204f7eSJie Hai 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
7179a204f7eSJie Hai 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
7189a204f7eSJie Hai 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
7199a204f7eSJie Hai 	}
720f1debd77SXiaolong Ye 
721f1debd77SXiaolong Ye 	return 0;
722f1debd77SXiaolong Ye }
723f1debd77SXiaolong Ye 
724f1debd77SXiaolong Ye /* This function gets called when the current port gets stopped. */
72562024eb8SIvan Ilchenko static int
726f1debd77SXiaolong Ye eth_dev_stop(struct rte_eth_dev *dev)
727f1debd77SXiaolong Ye {
7289a204f7eSJie Hai 	uint16_t i;
7299a204f7eSJie Hai 
730295968d1SFerruh Yigit 	dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
7319a204f7eSJie Hai 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
7329a204f7eSJie Hai 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
7339a204f7eSJie Hai 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
7349a204f7eSJie Hai 	}
7359a204f7eSJie Hai 
73662024eb8SIvan Ilchenko 	return 0;
737f1debd77SXiaolong Ye }
738f1debd77SXiaolong Ye 
73974b46340SCiara Loftus /* Find ethdev in list */
74074b46340SCiara Loftus static inline struct internal_list *
74174b46340SCiara Loftus find_internal_resource(struct pmd_internals *port_int)
74274b46340SCiara Loftus {
74374b46340SCiara Loftus 	int found = 0;
74474b46340SCiara Loftus 	struct internal_list *list = NULL;
74574b46340SCiara Loftus 
74674b46340SCiara Loftus 	if (port_int == NULL)
74774b46340SCiara Loftus 		return NULL;
74874b46340SCiara Loftus 
74974b46340SCiara Loftus 	pthread_mutex_lock(&internal_list_lock);
75074b46340SCiara Loftus 
75174b46340SCiara Loftus 	TAILQ_FOREACH(list, &internal_list, next) {
75274b46340SCiara Loftus 		struct pmd_internals *list_int =
75374b46340SCiara Loftus 				list->eth_dev->data->dev_private;
75474b46340SCiara Loftus 		if (list_int == port_int) {
75574b46340SCiara Loftus 			found = 1;
75674b46340SCiara Loftus 			break;
75774b46340SCiara Loftus 		}
75874b46340SCiara Loftus 	}
75974b46340SCiara Loftus 
76074b46340SCiara Loftus 	pthread_mutex_unlock(&internal_list_lock);
76174b46340SCiara Loftus 
76274b46340SCiara Loftus 	if (!found)
76374b46340SCiara Loftus 		return NULL;
76474b46340SCiara Loftus 
76574b46340SCiara Loftus 	return list;
76674b46340SCiara Loftus }
76774b46340SCiara Loftus 
768f1debd77SXiaolong Ye static int
769f1debd77SXiaolong Ye eth_dev_configure(struct rte_eth_dev *dev)
770f1debd77SXiaolong Ye {
77174b46340SCiara Loftus 	struct pmd_internals *internal = dev->data->dev_private;
77274b46340SCiara Loftus 
773f1debd77SXiaolong Ye 	/* rx/tx must be paired */
774f1debd77SXiaolong Ye 	if (dev->data->nb_rx_queues != dev->data->nb_tx_queues)
775f1debd77SXiaolong Ye 		return -EINVAL;
776f1debd77SXiaolong Ye 
77774b46340SCiara Loftus 	if (internal->shared_umem) {
77874b46340SCiara Loftus 		struct internal_list *list = NULL;
77974b46340SCiara Loftus 		const char *name = dev->device->name;
78074b46340SCiara Loftus 
78174b46340SCiara Loftus 		/* Ensure PMD is not already inserted into the list */
78274b46340SCiara Loftus 		list = find_internal_resource(internal);
78374b46340SCiara Loftus 		if (list)
78474b46340SCiara Loftus 			return 0;
78574b46340SCiara Loftus 
78674b46340SCiara Loftus 		list = rte_zmalloc_socket(name, sizeof(*list), 0,
78774b46340SCiara Loftus 					dev->device->numa_node);
78874b46340SCiara Loftus 		if (list == NULL)
78974b46340SCiara Loftus 			return -1;
79074b46340SCiara Loftus 
79174b46340SCiara Loftus 		list->eth_dev = dev;
79274b46340SCiara Loftus 		pthread_mutex_lock(&internal_list_lock);
79374b46340SCiara Loftus 		TAILQ_INSERT_TAIL(&internal_list, list, next);
79474b46340SCiara Loftus 		pthread_mutex_unlock(&internal_list_lock);
79574b46340SCiara Loftus 	}
79674b46340SCiara Loftus 
797f1debd77SXiaolong Ye 	return 0;
798f1debd77SXiaolong Ye }
799f1debd77SXiaolong Ye 
80043fb6eeaSAnatoly Burakov #define CLB_VAL_IDX 0
80143fb6eeaSAnatoly Burakov static int
80243fb6eeaSAnatoly Burakov eth_monitor_callback(const uint64_t value,
80343fb6eeaSAnatoly Burakov 		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
80443fb6eeaSAnatoly Burakov {
80543fb6eeaSAnatoly Burakov 	const uint64_t v = opaque[CLB_VAL_IDX];
80643fb6eeaSAnatoly Burakov 	const uint64_t m = (uint32_t)~0;
80743fb6eeaSAnatoly Burakov 
80843fb6eeaSAnatoly Burakov 	/* if the value has changed, abort entering power optimized state */
80943fb6eeaSAnatoly Burakov 	return (value & m) == v ? 0 : -1;
81043fb6eeaSAnatoly Burakov }
81143fb6eeaSAnatoly Burakov 
81243fb6eeaSAnatoly Burakov static int
81343fb6eeaSAnatoly Burakov eth_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
81443fb6eeaSAnatoly Burakov {
81543fb6eeaSAnatoly Burakov 	struct pkt_rx_queue *rxq = rx_queue;
81643fb6eeaSAnatoly Burakov 	unsigned int *prod = rxq->rx.producer;
81743fb6eeaSAnatoly Burakov 	const uint32_t cur_val = rxq->rx.cached_prod; /* use cached value */
81843fb6eeaSAnatoly Burakov 
81943fb6eeaSAnatoly Burakov 	/* watch for changes in producer ring */
82043fb6eeaSAnatoly Burakov 	pmc->addr = (void *)prod;
82143fb6eeaSAnatoly Burakov 
82243fb6eeaSAnatoly Burakov 	/* store current value */
82343fb6eeaSAnatoly Burakov 	pmc->opaque[CLB_VAL_IDX] = cur_val;
82443fb6eeaSAnatoly Burakov 	pmc->fn = eth_monitor_callback;
82543fb6eeaSAnatoly Burakov 
82643fb6eeaSAnatoly Burakov 	/* AF_XDP producer ring index is 32-bit */
82743fb6eeaSAnatoly Burakov 	pmc->size = sizeof(uint32_t);
82843fb6eeaSAnatoly Burakov 
82943fb6eeaSAnatoly Burakov 	return 0;
83043fb6eeaSAnatoly Burakov }
83143fb6eeaSAnatoly Burakov 
832bdad90d1SIvan Ilchenko static int
833f1debd77SXiaolong Ye eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
834f1debd77SXiaolong Ye {
835f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
836f1debd77SXiaolong Ye 
837f1debd77SXiaolong Ye 	dev_info->if_index = internals->if_index;
838f1debd77SXiaolong Ye 	dev_info->max_mac_addrs = 1;
839339b88c6SXiaolong Ye 	dev_info->max_rx_queues = internals->queue_cnt;
840339b88c6SXiaolong Ye 	dev_info->max_tx_queues = internals->queue_cnt;
841f1debd77SXiaolong Ye 
84235b2d13fSOlivier Matz 	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
8431668e87dSCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
844985e7673SCiara Loftus 	dev_info->max_rx_pktlen = getpagesize() -
8451668e87dSCiara Loftus 				  sizeof(struct rte_mempool_objhdr) -
8461668e87dSCiara Loftus 				  sizeof(struct rte_mbuf) -
8471668e87dSCiara Loftus 				  RTE_PKTMBUF_HEADROOM - XDP_PACKET_HEADROOM;
8481668e87dSCiara Loftus #else
849985e7673SCiara Loftus 	dev_info->max_rx_pktlen = ETH_AF_XDP_FRAME_SIZE - XDP_PACKET_HEADROOM;
8501668e87dSCiara Loftus #endif
851985e7673SCiara Loftus 	dev_info->max_mtu = dev_info->max_rx_pktlen - ETH_AF_XDP_ETH_OVERHEAD;
852fb6c5f2aSXiaolong Ye 
853055a3936SCiara Loftus 	dev_info->default_rxportconf.burst_size = ETH_AF_XDP_DFLT_BUSY_BUDGET;
854055a3936SCiara Loftus 	dev_info->default_txportconf.burst_size = ETH_AF_XDP_DFLT_BUSY_BUDGET;
855f1debd77SXiaolong Ye 	dev_info->default_rxportconf.nb_queues = 1;
856f1debd77SXiaolong Ye 	dev_info->default_txportconf.nb_queues = 1;
857f1debd77SXiaolong Ye 	dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
858f1debd77SXiaolong Ye 	dev_info->default_txportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
859bdad90d1SIvan Ilchenko 
860bdad90d1SIvan Ilchenko 	return 0;
861f1debd77SXiaolong Ye }
862f1debd77SXiaolong Ye 
863f1debd77SXiaolong Ye static int
864f1debd77SXiaolong Ye eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
865f1debd77SXiaolong Ye {
866f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
8679876cf83SCiara Loftus 	struct pmd_process_private *process_private = dev->process_private;
868f1debd77SXiaolong Ye 	struct xdp_statistics xdp_stats;
869f1debd77SXiaolong Ye 	struct pkt_rx_queue *rxq;
870339b88c6SXiaolong Ye 	struct pkt_tx_queue *txq;
871f1debd77SXiaolong Ye 	socklen_t optlen;
8729876cf83SCiara Loftus 	int i, ret, fd;
873f1debd77SXiaolong Ye 
874f1debd77SXiaolong Ye 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
875f1debd77SXiaolong Ye 		optlen = sizeof(struct xdp_statistics);
876f1debd77SXiaolong Ye 		rxq = &internals->rx_queues[i];
877339b88c6SXiaolong Ye 		txq = rxq->pair;
878339b88c6SXiaolong Ye 		stats->q_ipackets[i] = rxq->stats.rx_pkts;
879339b88c6SXiaolong Ye 		stats->q_ibytes[i] = rxq->stats.rx_bytes;
880f1debd77SXiaolong Ye 
881339b88c6SXiaolong Ye 		stats->q_opackets[i] = txq->stats.tx_pkts;
882339b88c6SXiaolong Ye 		stats->q_obytes[i] = txq->stats.tx_bytes;
883f1debd77SXiaolong Ye 
884f1debd77SXiaolong Ye 		stats->ipackets += stats->q_ipackets[i];
885f1debd77SXiaolong Ye 		stats->ibytes += stats->q_ibytes[i];
886d8a21077SCiara Loftus 		stats->oerrors += txq->stats.tx_dropped;
8879876cf83SCiara Loftus 		fd = process_private->rxq_xsk_fds[i];
8889876cf83SCiara Loftus 		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
8899876cf83SCiara Loftus 					   &xdp_stats, &optlen) : -1;
890f1debd77SXiaolong Ye 		if (ret != 0) {
891e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "getsockopt() failed for XDP_STATISTICS.");
892f1debd77SXiaolong Ye 			return -1;
893f1debd77SXiaolong Ye 		}
8943577b1eeSCiara Loftus 		stats->imissed += xdp_stats.rx_dropped - rxq->stats.imissed_offset;
895f1debd77SXiaolong Ye 
896f1debd77SXiaolong Ye 		stats->opackets += stats->q_opackets[i];
897f1debd77SXiaolong Ye 		stats->obytes += stats->q_obytes[i];
898f1debd77SXiaolong Ye 	}
899f1debd77SXiaolong Ye 
900f1debd77SXiaolong Ye 	return 0;
901f1debd77SXiaolong Ye }
902f1debd77SXiaolong Ye 
9039970a9adSIgor Romanov static int
904f1debd77SXiaolong Ye eth_stats_reset(struct rte_eth_dev *dev)
905f1debd77SXiaolong Ye {
906f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
9073577b1eeSCiara Loftus 	struct pmd_process_private *process_private = dev->process_private;
9083577b1eeSCiara Loftus 	struct xdp_statistics xdp_stats;
9093577b1eeSCiara Loftus 	socklen_t optlen;
9103577b1eeSCiara Loftus 	int i, ret, fd;
911f1debd77SXiaolong Ye 
912339b88c6SXiaolong Ye 	for (i = 0; i < internals->queue_cnt; i++) {
913f1debd77SXiaolong Ye 		memset(&internals->rx_queues[i].stats, 0,
914f1debd77SXiaolong Ye 					sizeof(struct rx_stats));
915f1debd77SXiaolong Ye 		memset(&internals->tx_queues[i].stats, 0,
916f1debd77SXiaolong Ye 					sizeof(struct tx_stats));
9173577b1eeSCiara Loftus 		fd = process_private->rxq_xsk_fds[i];
9183577b1eeSCiara Loftus 		optlen = sizeof(struct xdp_statistics);
9193577b1eeSCiara Loftus 		ret = fd >= 0 ? getsockopt(fd, SOL_XDP, XDP_STATISTICS,
9203577b1eeSCiara Loftus 					   &xdp_stats, &optlen) : -1;
9213577b1eeSCiara Loftus 		if (ret != 0) {
922e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "getsockopt() failed for XDP_STATISTICS.");
9233577b1eeSCiara Loftus 			return -1;
9243577b1eeSCiara Loftus 		}
9253577b1eeSCiara Loftus 		internals->rx_queues[i].stats.imissed_offset = xdp_stats.rx_dropped;
926f1debd77SXiaolong Ye 	}
9279970a9adSIgor Romanov 
9289970a9adSIgor Romanov 	return 0;
929f1debd77SXiaolong Ye }
930f1debd77SXiaolong Ye 
9311eb1846bSCiara Loftus #ifdef RTE_NET_AF_XDP_LIBBPF_XDP_ATTACH
9321eb1846bSCiara Loftus 
9331eb1846bSCiara Loftus static int link_xdp_prog_with_dev(int ifindex, int fd, __u32 flags)
9341eb1846bSCiara Loftus {
9351eb1846bSCiara Loftus 	return bpf_xdp_attach(ifindex, fd, flags, NULL);
9361eb1846bSCiara Loftus }
9371eb1846bSCiara Loftus 
9381eb1846bSCiara Loftus static int
9391eb1846bSCiara Loftus remove_xdp_program(struct pmd_internals *internals)
9401eb1846bSCiara Loftus {
9411eb1846bSCiara Loftus 	uint32_t curr_prog_id = 0;
9421eb1846bSCiara Loftus 	int ret;
9431eb1846bSCiara Loftus 
9441eb1846bSCiara Loftus 	ret = bpf_xdp_query_id(internals->if_index, XDP_FLAGS_UPDATE_IF_NOEXIST,
9451eb1846bSCiara Loftus 			       &curr_prog_id);
9461eb1846bSCiara Loftus 	if (ret != 0) {
947e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "bpf_xdp_query_id failed");
9481eb1846bSCiara Loftus 		return ret;
9491eb1846bSCiara Loftus 	}
9501eb1846bSCiara Loftus 
9511eb1846bSCiara Loftus 	ret = bpf_xdp_detach(internals->if_index, XDP_FLAGS_UPDATE_IF_NOEXIST,
9521eb1846bSCiara Loftus 			     NULL);
9531eb1846bSCiara Loftus 	if (ret != 0)
954e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "bpf_xdp_detach failed");
9551eb1846bSCiara Loftus 	return ret;
9561eb1846bSCiara Loftus }
9571eb1846bSCiara Loftus 
9581eb1846bSCiara Loftus #else
9591eb1846bSCiara Loftus 
9601eb1846bSCiara Loftus static int link_xdp_prog_with_dev(int ifindex, int fd, __u32 flags)
9611eb1846bSCiara Loftus {
9621eb1846bSCiara Loftus 	return bpf_set_link_xdp_fd(ifindex, fd, flags);
9631eb1846bSCiara Loftus }
9641eb1846bSCiara Loftus 
9655ff3dbe6SAndrew Rybchenko static int
966f1debd77SXiaolong Ye remove_xdp_program(struct pmd_internals *internals)
967f1debd77SXiaolong Ye {
968f1debd77SXiaolong Ye 	uint32_t curr_prog_id = 0;
9695ff3dbe6SAndrew Rybchenko 	int ret;
970f1debd77SXiaolong Ye 
9715ff3dbe6SAndrew Rybchenko 	ret = bpf_get_link_xdp_id(internals->if_index, &curr_prog_id,
972f1debd77SXiaolong Ye 				  XDP_FLAGS_UPDATE_IF_NOEXIST);
9735ff3dbe6SAndrew Rybchenko 	if (ret != 0) {
974e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "bpf_get_link_xdp_id failed");
9755ff3dbe6SAndrew Rybchenko 		return ret;
9765ff3dbe6SAndrew Rybchenko 	}
9775ff3dbe6SAndrew Rybchenko 
9785ff3dbe6SAndrew Rybchenko 	ret = bpf_set_link_xdp_fd(internals->if_index, -1,
9795ff3dbe6SAndrew Rybchenko 				  XDP_FLAGS_UPDATE_IF_NOEXIST);
9805ff3dbe6SAndrew Rybchenko 	if (ret != 0)
981e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "bpf_set_link_xdp_fd failed");
9825ff3dbe6SAndrew Rybchenko 	return ret;
983f1debd77SXiaolong Ye }
984f1debd77SXiaolong Ye 
9851eb1846bSCiara Loftus #endif
9861eb1846bSCiara Loftus 
987f1debd77SXiaolong Ye static void
988f0ce7af0SXiaolong Ye xdp_umem_destroy(struct xsk_umem_info *umem)
989f0ce7af0SXiaolong Ye {
990955acb95SYunjian Wang 	(void)xsk_umem__delete(umem->umem);
991955acb95SYunjian Wang 	umem->umem = NULL;
992955acb95SYunjian Wang 
993d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
994d8a21077SCiara Loftus 	umem->mb_pool = NULL;
995d8a21077SCiara Loftus #else
996f0ce7af0SXiaolong Ye 	rte_memzone_free(umem->mz);
997f0ce7af0SXiaolong Ye 	umem->mz = NULL;
998f0ce7af0SXiaolong Ye 
999f0ce7af0SXiaolong Ye 	rte_ring_free(umem->buf_ring);
1000f0ce7af0SXiaolong Ye 	umem->buf_ring = NULL;
1001d8a21077SCiara Loftus #endif
1002f0ce7af0SXiaolong Ye 
1003f0ce7af0SXiaolong Ye 	rte_free(umem);
1004f0ce7af0SXiaolong Ye }
1005f0ce7af0SXiaolong Ye 
1006b142387bSThomas Monjalon static int
1007f1debd77SXiaolong Ye eth_dev_close(struct rte_eth_dev *dev)
1008f1debd77SXiaolong Ye {
1009f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
1010f1debd77SXiaolong Ye 	struct pkt_rx_queue *rxq;
1011f1debd77SXiaolong Ye 	int i;
1012f1debd77SXiaolong Ye 
101330410493SThomas Monjalon 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
10149876cf83SCiara Loftus 		goto out;
101530410493SThomas Monjalon 
1016e99981afSDavid Marchand 	AF_XDP_LOG_LINE(INFO, "Closing AF_XDP ethdev on numa socket %u",
1017f1debd77SXiaolong Ye 		rte_socket_id());
1018f1debd77SXiaolong Ye 
1019339b88c6SXiaolong Ye 	for (i = 0; i < internals->queue_cnt; i++) {
1020f1debd77SXiaolong Ye 		rxq = &internals->rx_queues[i];
1021f1debd77SXiaolong Ye 		if (rxq->umem == NULL)
1022f1debd77SXiaolong Ye 			break;
1023f1debd77SXiaolong Ye 		xsk_socket__delete(rxq->xsk);
102474b46340SCiara Loftus 
1025e12a0166STyler Retzlaff 		if (rte_atomic_fetch_sub_explicit(&rxq->umem->refcnt, 1,
1026e12a0166STyler Retzlaff 				rte_memory_order_acquire) - 1 == 0)
1027e9ff8bb7SXiaolong Ye 			xdp_umem_destroy(rxq->umem);
1028339b88c6SXiaolong Ye 
1029339b88c6SXiaolong Ye 		/* free pkt_tx_queue */
1030339b88c6SXiaolong Ye 		rte_free(rxq->pair);
1031339b88c6SXiaolong Ye 		rte_free(rxq);
1032f1debd77SXiaolong Ye 	}
1033f1debd77SXiaolong Ye 
1034f0ce7af0SXiaolong Ye 	/*
1035f0ce7af0SXiaolong Ye 	 * MAC is not allocated dynamically, setting it to NULL would prevent
1036f0ce7af0SXiaolong Ye 	 * from releasing it in rte_eth_dev_release_port.
1037f0ce7af0SXiaolong Ye 	 */
1038f0ce7af0SXiaolong Ye 	dev->data->mac_addrs = NULL;
1039f0ce7af0SXiaolong Ye 
10405ff3dbe6SAndrew Rybchenko 	if (remove_xdp_program(internals) != 0)
1041e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error while removing XDP program.");
104274b46340SCiara Loftus 
104374b46340SCiara Loftus 	if (internals->shared_umem) {
104474b46340SCiara Loftus 		struct internal_list *list;
104574b46340SCiara Loftus 
104674b46340SCiara Loftus 		/* Remove ethdev from list used to track and share UMEMs */
104774b46340SCiara Loftus 		list = find_internal_resource(internals);
104874b46340SCiara Loftus 		if (list) {
104974b46340SCiara Loftus 			pthread_mutex_lock(&internal_list_lock);
105074b46340SCiara Loftus 			TAILQ_REMOVE(&internal_list, list, next);
105174b46340SCiara Loftus 			pthread_mutex_unlock(&internal_list_lock);
105274b46340SCiara Loftus 			rte_free(list);
105374b46340SCiara Loftus 		}
105474b46340SCiara Loftus 	}
1055b142387bSThomas Monjalon 
10569876cf83SCiara Loftus out:
10579876cf83SCiara Loftus 	rte_free(dev->process_private);
10589876cf83SCiara Loftus 
1059b142387bSThomas Monjalon 	return 0;
1060f1debd77SXiaolong Ye }
1061f1debd77SXiaolong Ye 
1062f1debd77SXiaolong Ye static int
1063f1debd77SXiaolong Ye eth_link_update(struct rte_eth_dev *dev __rte_unused,
1064f1debd77SXiaolong Ye 		int wait_to_complete __rte_unused)
1065f1debd77SXiaolong Ye {
1066f1debd77SXiaolong Ye 	return 0;
1067f1debd77SXiaolong Ye }
1068f1debd77SXiaolong Ye 
1069d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
1070af8d89a0SCiara Loftus /* Check if the netdev,qid context already exists */
1071af8d89a0SCiara Loftus static inline bool
1072af8d89a0SCiara Loftus ctx_exists(struct pkt_rx_queue *rxq, const char *ifname,
1073af8d89a0SCiara Loftus 		struct pkt_rx_queue *list_rxq, const char *list_ifname)
1074af8d89a0SCiara Loftus {
1075af8d89a0SCiara Loftus 	bool exists = false;
1076af8d89a0SCiara Loftus 
1077af8d89a0SCiara Loftus 	if (rxq->xsk_queue_idx == list_rxq->xsk_queue_idx &&
1078af8d89a0SCiara Loftus 			!strncmp(ifname, list_ifname, IFNAMSIZ)) {
1079e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "ctx %s,%i already exists, cannot share umem",
1080af8d89a0SCiara Loftus 					ifname, rxq->xsk_queue_idx);
1081af8d89a0SCiara Loftus 		exists = true;
1082af8d89a0SCiara Loftus 	}
1083af8d89a0SCiara Loftus 
1084af8d89a0SCiara Loftus 	return exists;
1085af8d89a0SCiara Loftus }
1086af8d89a0SCiara Loftus 
1087af8d89a0SCiara Loftus /* Get a pointer to an existing UMEM which overlays the rxq's mb_pool */
1088af8d89a0SCiara Loftus static inline int
1089af8d89a0SCiara Loftus get_shared_umem(struct pkt_rx_queue *rxq, const char *ifname,
1090af8d89a0SCiara Loftus 			struct xsk_umem_info **umem)
1091af8d89a0SCiara Loftus {
1092af8d89a0SCiara Loftus 	struct internal_list *list;
1093af8d89a0SCiara Loftus 	struct pmd_internals *internals;
1094af8d89a0SCiara Loftus 	int i = 0, ret = 0;
1095af8d89a0SCiara Loftus 	struct rte_mempool *mb_pool = rxq->mb_pool;
1096af8d89a0SCiara Loftus 
1097af8d89a0SCiara Loftus 	if (mb_pool == NULL)
1098af8d89a0SCiara Loftus 		return ret;
1099af8d89a0SCiara Loftus 
1100af8d89a0SCiara Loftus 	pthread_mutex_lock(&internal_list_lock);
1101af8d89a0SCiara Loftus 
1102af8d89a0SCiara Loftus 	TAILQ_FOREACH(list, &internal_list, next) {
1103af8d89a0SCiara Loftus 		internals = list->eth_dev->data->dev_private;
1104af8d89a0SCiara Loftus 		for (i = 0; i < internals->queue_cnt; i++) {
1105af8d89a0SCiara Loftus 			struct pkt_rx_queue *list_rxq =
1106af8d89a0SCiara Loftus 						&internals->rx_queues[i];
1107af8d89a0SCiara Loftus 			if (rxq == list_rxq)
1108af8d89a0SCiara Loftus 				continue;
1109af8d89a0SCiara Loftus 			if (mb_pool == internals->rx_queues[i].mb_pool) {
1110af8d89a0SCiara Loftus 				if (ctx_exists(rxq, ifname, list_rxq,
1111af8d89a0SCiara Loftus 						internals->if_name)) {
1112af8d89a0SCiara Loftus 					ret = -1;
1113af8d89a0SCiara Loftus 					goto out;
1114af8d89a0SCiara Loftus 				}
1115e12a0166STyler Retzlaff 				if (rte_atomic_load_explicit(&internals->rx_queues[i].umem->refcnt,
1116e12a0166STyler Retzlaff 						    rte_memory_order_acquire)) {
1117af8d89a0SCiara Loftus 					*umem = internals->rx_queues[i].umem;
1118af8d89a0SCiara Loftus 					goto out;
1119af8d89a0SCiara Loftus 				}
1120af8d89a0SCiara Loftus 			}
1121af8d89a0SCiara Loftus 		}
1122af8d89a0SCiara Loftus 	}
1123af8d89a0SCiara Loftus 
1124af8d89a0SCiara Loftus out:
1125af8d89a0SCiara Loftus 	pthread_mutex_unlock(&internal_list_lock);
1126af8d89a0SCiara Loftus 
1127af8d89a0SCiara Loftus 	return ret;
1128af8d89a0SCiara Loftus }
1129af8d89a0SCiara Loftus 
1130d8a21077SCiara Loftus static struct
113174b46340SCiara Loftus xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
1132d8a21077SCiara Loftus 				  struct pkt_rx_queue *rxq)
1133d8a21077SCiara Loftus {
113474b46340SCiara Loftus 	struct xsk_umem_info *umem = NULL;
1135d8a21077SCiara Loftus 	int ret;
1136d8a21077SCiara Loftus 	struct xsk_umem_config usr_config = {
1137a63d4fcaSCiara Loftus 		.fill_size = ETH_AF_XDP_DFLT_NUM_DESCS * 2,
1138d8a21077SCiara Loftus 		.comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
1139d8a21077SCiara Loftus 		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG};
1140d8a21077SCiara Loftus 	struct rte_mempool *mb_pool = rxq->mb_pool;
114197039941SFrank Du 	void *aligned_addr;
114297039941SFrank Du 	uint64_t umem_size;
114397039941SFrank Du 	struct rte_mempool_mem_range_info range;
1144d8a21077SCiara Loftus 
114574b46340SCiara Loftus 	if (internals->shared_umem) {
114653a73b7bSCiara Loftus 		if (get_shared_umem(rxq, internals->if_name, &umem) < 0)
114753a73b7bSCiara Loftus 			return NULL;
114853a73b7bSCiara Loftus 
114974b46340SCiara Loftus 		if (umem != NULL &&
1150e12a0166STyler Retzlaff 			rte_atomic_load_explicit(&umem->refcnt, rte_memory_order_acquire) <
115174b46340SCiara Loftus 					umem->max_xsks) {
1152e99981afSDavid Marchand 			AF_XDP_LOG_LINE(INFO, "%s,qid%i sharing UMEM",
115374b46340SCiara Loftus 					internals->if_name, rxq->xsk_queue_idx);
1154e12a0166STyler Retzlaff 			rte_atomic_fetch_add_explicit(&umem->refcnt, 1, rte_memory_order_acquire);
115574b46340SCiara Loftus 		}
115674b46340SCiara Loftus 	}
115774b46340SCiara Loftus 
115874b46340SCiara Loftus 	if (umem == NULL) {
115974b46340SCiara Loftus 		usr_config.frame_size =
116074b46340SCiara Loftus 			rte_mempool_calc_obj_size(mb_pool->elt_size,
116174b46340SCiara Loftus 						  mb_pool->flags, NULL);
1162b79ae90cSCiara Loftus 		usr_config.frame_headroom = mb_pool->header_size +
1163b79ae90cSCiara Loftus 						sizeof(struct rte_mbuf) +
1164b79ae90cSCiara Loftus 						rte_pktmbuf_priv_size(mb_pool) +
1165b79ae90cSCiara Loftus 						RTE_PKTMBUF_HEADROOM;
1166d8a21077SCiara Loftus 
116774b46340SCiara Loftus 		umem = rte_zmalloc_socket("umem", sizeof(*umem), 0,
116874b46340SCiara Loftus 					  rte_socket_id());
1169d8a21077SCiara Loftus 		if (umem == NULL) {
1170e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed to allocate umem info");
1171d8a21077SCiara Loftus 			return NULL;
1172d8a21077SCiara Loftus 		}
1173d8a21077SCiara Loftus 
1174d8a21077SCiara Loftus 		umem->mb_pool = mb_pool;
117597039941SFrank Du 		ret = rte_mempool_get_mem_range(mb_pool, &range);
117697039941SFrank Du 		if (ret < 0) {
1177e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed(%d) to get range from mempool", ret);
117897039941SFrank Du 			goto err;
117997039941SFrank Du 		}
118097039941SFrank Du 		if (!range.is_contiguous) {
1181e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Can't mapped to umem as mempool is not contiguous");
118297039941SFrank Du 			goto err;
118397039941SFrank Du 		}
118497039941SFrank Du 		/*
118597039941SFrank Du 		 * umem requires the memory area be page aligned, safe to map with a large area as
118697039941SFrank Du 		 * the memory pointer for each XSK TX/RX descriptor is derived from mbuf data area.
118797039941SFrank Du 		 */
118897039941SFrank Du 		aligned_addr = (void *)RTE_ALIGN_FLOOR((uintptr_t)range.start, getpagesize());
118997039941SFrank Du 		umem_size = range.length + RTE_PTR_DIFF(range.start, aligned_addr);
119097039941SFrank Du 		ret = xsk_umem__create(&umem->umem, aligned_addr, umem_size,
119174b46340SCiara Loftus 				&rxq->fq, &rxq->cq, &usr_config);
1192d8a21077SCiara Loftus 		if (ret) {
1193e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed to create umem [%d]: [%s]",
11947fc6ae50SShibin Koikkara Reeny 				   errno, strerror(errno));
1195d8a21077SCiara Loftus 			goto err;
1196d8a21077SCiara Loftus 		}
119797039941SFrank Du 		umem->buffer = aligned_addr;
1198d8a21077SCiara Loftus 
119974b46340SCiara Loftus 		if (internals->shared_umem) {
120074b46340SCiara Loftus 			umem->max_xsks = mb_pool->populated_size /
120174b46340SCiara Loftus 						ETH_AF_XDP_NUM_BUFFERS;
1202e99981afSDavid Marchand 			AF_XDP_LOG_LINE(INFO, "Max xsks for UMEM %s: %u",
120374b46340SCiara Loftus 						mb_pool->name, umem->max_xsks);
120474b46340SCiara Loftus 		}
120574b46340SCiara Loftus 
1206e12a0166STyler Retzlaff 		rte_atomic_store_explicit(&umem->refcnt, 1, rte_memory_order_release);
120774b46340SCiara Loftus 	}
120874b46340SCiara Loftus 
1209e1543baeSHaiyue Wang 	return umem;
1210e1543baeSHaiyue Wang 
1211e1543baeSHaiyue Wang err:
1212e1543baeSHaiyue Wang 	xdp_umem_destroy(umem);
1213e1543baeSHaiyue Wang 	return NULL;
1214e1543baeSHaiyue Wang }
1215d8a21077SCiara Loftus #else
1216f1debd77SXiaolong Ye static struct
1217339b88c6SXiaolong Ye xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
1218339b88c6SXiaolong Ye 				  struct pkt_rx_queue *rxq)
1219f1debd77SXiaolong Ye {
1220f1debd77SXiaolong Ye 	struct xsk_umem_info *umem;
1221f1debd77SXiaolong Ye 	const struct rte_memzone *mz;
1222f1debd77SXiaolong Ye 	struct xsk_umem_config usr_config = {
1223f1debd77SXiaolong Ye 		.fill_size = ETH_AF_XDP_DFLT_NUM_DESCS,
1224f1debd77SXiaolong Ye 		.comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
1225f1debd77SXiaolong Ye 		.frame_size = ETH_AF_XDP_FRAME_SIZE,
12261668e87dSCiara Loftus 		.frame_headroom = 0 };
12271d630c95SXiaolong Ye 	char ring_name[RTE_RING_NAMESIZE];
12281d630c95SXiaolong Ye 	char mz_name[RTE_MEMZONE_NAMESIZE];
1229f1debd77SXiaolong Ye 	int ret;
1230f1debd77SXiaolong Ye 	uint64_t i;
1231f1debd77SXiaolong Ye 
1232f1debd77SXiaolong Ye 	umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id());
1233f1debd77SXiaolong Ye 	if (umem == NULL) {
1234e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to allocate umem info");
1235f1debd77SXiaolong Ye 		return NULL;
1236f1debd77SXiaolong Ye 	}
1237f1debd77SXiaolong Ye 
12381d630c95SXiaolong Ye 	snprintf(ring_name, sizeof(ring_name), "af_xdp_ring_%s_%u",
1239339b88c6SXiaolong Ye 		       internals->if_name, rxq->xsk_queue_idx);
12401d630c95SXiaolong Ye 	umem->buf_ring = rte_ring_create(ring_name,
1241f1debd77SXiaolong Ye 					 ETH_AF_XDP_NUM_BUFFERS,
1242f1debd77SXiaolong Ye 					 rte_socket_id(),
12435b2baff9SCiara Loftus 					 0x0);
1244f1debd77SXiaolong Ye 	if (umem->buf_ring == NULL) {
1245e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to create rte_ring");
1246f1debd77SXiaolong Ye 		goto err;
1247f1debd77SXiaolong Ye 	}
1248f1debd77SXiaolong Ye 
1249f1debd77SXiaolong Ye 	for (i = 0; i < ETH_AF_XDP_NUM_BUFFERS; i++)
1250f1debd77SXiaolong Ye 		rte_ring_enqueue(umem->buf_ring,
12511668e87dSCiara Loftus 				 (void *)(i * ETH_AF_XDP_FRAME_SIZE));
1252f1debd77SXiaolong Ye 
12531d630c95SXiaolong Ye 	snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u",
1254339b88c6SXiaolong Ye 		       internals->if_name, rxq->xsk_queue_idx);
12551d630c95SXiaolong Ye 	mz = rte_memzone_reserve_aligned(mz_name,
1256f1debd77SXiaolong Ye 			ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE,
1257f1debd77SXiaolong Ye 			rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG,
1258f1debd77SXiaolong Ye 			getpagesize());
1259f1debd77SXiaolong Ye 	if (mz == NULL) {
1260e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to reserve memzone for af_xdp umem.");
1261f1debd77SXiaolong Ye 		goto err;
1262f1debd77SXiaolong Ye 	}
1263960775fcSYunjian Wang 	umem->mz = mz;
1264f1debd77SXiaolong Ye 
1265f1debd77SXiaolong Ye 	ret = xsk_umem__create(&umem->umem, mz->addr,
1266f1debd77SXiaolong Ye 			       ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE,
126774b46340SCiara Loftus 			       &rxq->fq, &rxq->cq,
1268f1debd77SXiaolong Ye 			       &usr_config);
1269f1debd77SXiaolong Ye 
1270f1debd77SXiaolong Ye 	if (ret) {
1271e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to create umem");
1272f1debd77SXiaolong Ye 		goto err;
1273f1debd77SXiaolong Ye 	}
1274f1debd77SXiaolong Ye 
1275f1debd77SXiaolong Ye 	return umem;
1276f1debd77SXiaolong Ye 
1277f1debd77SXiaolong Ye err:
1278f1debd77SXiaolong Ye 	xdp_umem_destroy(umem);
1279f1debd77SXiaolong Ye 	return NULL;
1280f1debd77SXiaolong Ye }
1281e1543baeSHaiyue Wang #endif
1282f1debd77SXiaolong Ye 
1283f1debd77SXiaolong Ye static int
12848a324b1cSMaryam Tahhan get_pinned_map(const char *dp_path, int *map_fd)
12858a324b1cSMaryam Tahhan {
12868a324b1cSMaryam Tahhan 	*map_fd  = bpf_obj_get(dp_path);
12878a324b1cSMaryam Tahhan 	if (!*map_fd) {
1288e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to find xsks_map in %s", dp_path);
12898a324b1cSMaryam Tahhan 		return -1;
12908a324b1cSMaryam Tahhan 	}
12918a324b1cSMaryam Tahhan 
1292e99981afSDavid Marchand 	AF_XDP_LOG_LINE(INFO, "Successfully retrieved map %s with fd %d",
12938a324b1cSMaryam Tahhan 				dp_path, *map_fd);
12948a324b1cSMaryam Tahhan 
12958a324b1cSMaryam Tahhan 	return 0;
12968a324b1cSMaryam Tahhan }
12978a324b1cSMaryam Tahhan 
12988a324b1cSMaryam Tahhan static int
129901fa83c9SCiara Loftus load_custom_xdp_prog(const char *prog_path, int if_index, struct bpf_map **map)
1300288a85aeSCiara Loftus {
13018d3d9c72SCiara Loftus 	int ret, prog_fd;
1302288a85aeSCiara Loftus 	struct bpf_object *obj;
1303288a85aeSCiara Loftus 
13048d3d9c72SCiara Loftus 	prog_fd = load_program(prog_path, &obj);
13058d3d9c72SCiara Loftus 	if (prog_fd < 0) {
1306e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to load program %s", prog_path);
13078d3d9c72SCiara Loftus 		return -1;
1308288a85aeSCiara Loftus 	}
1309288a85aeSCiara Loftus 
1310288a85aeSCiara Loftus 	/*
1311288a85aeSCiara Loftus 	 * The loaded program must provision for a map of xsks, such that some
131201fa83c9SCiara Loftus 	 * traffic can be redirected to userspace.
1313288a85aeSCiara Loftus 	 */
131401fa83c9SCiara Loftus 	*map = bpf_object__find_map_by_name(obj, "xsks_map");
131501fa83c9SCiara Loftus 	if (!*map) {
1316e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to find xsks_map in %s", prog_path);
1317288a85aeSCiara Loftus 		return -1;
1318288a85aeSCiara Loftus 	}
1319288a85aeSCiara Loftus 
1320288a85aeSCiara Loftus 	/* Link the program with the given network device */
13211eb1846bSCiara Loftus 	ret = link_xdp_prog_with_dev(if_index, prog_fd,
13229f394004SCiara Loftus 					XDP_FLAGS_UPDATE_IF_NOEXIST);
1323288a85aeSCiara Loftus 	if (ret) {
1324e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to set prog fd %d on interface",
1325288a85aeSCiara Loftus 				prog_fd);
1326288a85aeSCiara Loftus 		return -1;
1327288a85aeSCiara Loftus 	}
1328288a85aeSCiara Loftus 
1329e99981afSDavid Marchand 	AF_XDP_LOG_LINE(INFO, "Successfully loaded XDP program %s with fd %d",
1330288a85aeSCiara Loftus 				prog_path, prog_fd);
1331288a85aeSCiara Loftus 
1332288a85aeSCiara Loftus 	return 0;
1333288a85aeSCiara Loftus }
1334288a85aeSCiara Loftus 
1335055a3936SCiara Loftus /* Detect support for busy polling through setsockopt(). */
1336055a3936SCiara Loftus static int
1337055a3936SCiara Loftus configure_preferred_busy_poll(struct pkt_rx_queue *rxq)
1338055a3936SCiara Loftus {
1339055a3936SCiara Loftus 	int sock_opt = 1;
1340055a3936SCiara Loftus 	int fd = xsk_socket__fd(rxq->xsk);
1341055a3936SCiara Loftus 	int ret = 0;
1342055a3936SCiara Loftus 
1343055a3936SCiara Loftus 	ret = setsockopt(fd, SOL_SOCKET, SO_PREFER_BUSY_POLL,
1344055a3936SCiara Loftus 			(void *)&sock_opt, sizeof(sock_opt));
1345055a3936SCiara Loftus 	if (ret < 0) {
1346e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Failed to set SO_PREFER_BUSY_POLL");
1347055a3936SCiara Loftus 		goto err_prefer;
1348055a3936SCiara Loftus 	}
1349055a3936SCiara Loftus 
1350055a3936SCiara Loftus 	sock_opt = ETH_AF_XDP_DFLT_BUSY_TIMEOUT;
1351055a3936SCiara Loftus 	ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL, (void *)&sock_opt,
1352055a3936SCiara Loftus 			sizeof(sock_opt));
1353055a3936SCiara Loftus 	if (ret < 0) {
1354e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Failed to set SO_BUSY_POLL");
1355055a3936SCiara Loftus 		goto err_timeout;
1356055a3936SCiara Loftus 	}
1357055a3936SCiara Loftus 
1358055a3936SCiara Loftus 	sock_opt = rxq->busy_budget;
1359055a3936SCiara Loftus 	ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL_BUDGET,
1360055a3936SCiara Loftus 			(void *)&sock_opt, sizeof(sock_opt));
1361055a3936SCiara Loftus 	if (ret < 0) {
1362e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Failed to set SO_BUSY_POLL_BUDGET");
1363055a3936SCiara Loftus 	} else {
1364e99981afSDavid Marchand 		AF_XDP_LOG_LINE(INFO, "Busy polling budget set to: %u",
1365055a3936SCiara Loftus 					rxq->busy_budget);
1366055a3936SCiara Loftus 		return 0;
1367055a3936SCiara Loftus 	}
1368055a3936SCiara Loftus 
1369055a3936SCiara Loftus 	/* setsockopt failure - attempt to restore xsk to default state and
1370055a3936SCiara Loftus 	 * proceed without busy polling support.
1371055a3936SCiara Loftus 	 */
1372055a3936SCiara Loftus 	sock_opt = 0;
1373055a3936SCiara Loftus 	ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL, (void *)&sock_opt,
1374055a3936SCiara Loftus 			sizeof(sock_opt));
1375055a3936SCiara Loftus 	if (ret < 0) {
1376e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to unset SO_BUSY_POLL");
1377055a3936SCiara Loftus 		return -1;
1378055a3936SCiara Loftus 	}
1379055a3936SCiara Loftus 
1380055a3936SCiara Loftus err_timeout:
1381055a3936SCiara Loftus 	sock_opt = 0;
1382055a3936SCiara Loftus 	ret = setsockopt(fd, SOL_SOCKET, SO_PREFER_BUSY_POLL,
1383055a3936SCiara Loftus 			(void *)&sock_opt, sizeof(sock_opt));
1384055a3936SCiara Loftus 	if (ret < 0) {
1385e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to unset SO_PREFER_BUSY_POLL");
1386055a3936SCiara Loftus 		return -1;
1387055a3936SCiara Loftus 	}
1388055a3936SCiara Loftus 
1389055a3936SCiara Loftus err_prefer:
1390055a3936SCiara Loftus 	rxq->busy_budget = 0;
1391055a3936SCiara Loftus 	return 0;
1392055a3936SCiara Loftus }
1393055a3936SCiara Loftus 
1394288a85aeSCiara Loftus static int
13959c132373SMaryam Tahhan init_uds_sock(struct sockaddr_un *server, const char *dp_path)
13967fc6ae50SShibin Koikkara Reeny {
13977fc6ae50SShibin Koikkara Reeny 	int sock;
13987fc6ae50SShibin Koikkara Reeny 
13997fc6ae50SShibin Koikkara Reeny 	sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
14007fc6ae50SShibin Koikkara Reeny 	if (sock < 0) {
1401e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to opening stream socket");
14027fc6ae50SShibin Koikkara Reeny 		return -1;
14037fc6ae50SShibin Koikkara Reeny 	}
14047fc6ae50SShibin Koikkara Reeny 
14057fc6ae50SShibin Koikkara Reeny 	server->sun_family = AF_UNIX;
14069c132373SMaryam Tahhan 	strlcpy(server->sun_path, dp_path, sizeof(server->sun_path));
14077fc6ae50SShibin Koikkara Reeny 
14087fc6ae50SShibin Koikkara Reeny 	if (connect(sock, (struct sockaddr *)server, sizeof(struct sockaddr_un)) < 0) {
14097fc6ae50SShibin Koikkara Reeny 		close(sock);
1410e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error connecting stream socket errno = [%d]: [%s]",
14117fc6ae50SShibin Koikkara Reeny 			   errno, strerror(errno));
14127fc6ae50SShibin Koikkara Reeny 		return -1;
14137fc6ae50SShibin Koikkara Reeny 	}
14147fc6ae50SShibin Koikkara Reeny 
14157fc6ae50SShibin Koikkara Reeny 	return sock;
14167fc6ae50SShibin Koikkara Reeny }
14177fc6ae50SShibin Koikkara Reeny 
14187fc6ae50SShibin Koikkara Reeny struct msg_internal {
14197fc6ae50SShibin Koikkara Reeny 	char response[UDS_MAX_CMD_RESP];
14207fc6ae50SShibin Koikkara Reeny 	int len_param;
14217fc6ae50SShibin Koikkara Reeny 	int num_fds;
14227fc6ae50SShibin Koikkara Reeny 	int fds[UDS_MAX_FD_NUM];
14237fc6ae50SShibin Koikkara Reeny };
14247fc6ae50SShibin Koikkara Reeny 
14257fc6ae50SShibin Koikkara Reeny static int
14269c132373SMaryam Tahhan send_msg(int sock, char *request, int *fd, const char *dp_path)
14277fc6ae50SShibin Koikkara Reeny {
14287fc6ae50SShibin Koikkara Reeny 	int snd;
14297fc6ae50SShibin Koikkara Reeny 	struct iovec iov;
14307fc6ae50SShibin Koikkara Reeny 	struct msghdr msgh;
14317fc6ae50SShibin Koikkara Reeny 	struct cmsghdr *cmsg;
14327fc6ae50SShibin Koikkara Reeny 	struct sockaddr_un dst;
14337fc6ae50SShibin Koikkara Reeny 	char control[CMSG_SPACE(sizeof(*fd))];
14347fc6ae50SShibin Koikkara Reeny 
14357fc6ae50SShibin Koikkara Reeny 	memset(&dst, 0, sizeof(dst));
14367fc6ae50SShibin Koikkara Reeny 	dst.sun_family = AF_UNIX;
14379c132373SMaryam Tahhan 	strlcpy(dst.sun_path, dp_path, sizeof(dst.sun_path));
14387fc6ae50SShibin Koikkara Reeny 
14397fc6ae50SShibin Koikkara Reeny 	/* Initialize message header structure */
14407fc6ae50SShibin Koikkara Reeny 	memset(&msgh, 0, sizeof(msgh));
14417fc6ae50SShibin Koikkara Reeny 	memset(control, 0, sizeof(control));
14427fc6ae50SShibin Koikkara Reeny 	iov.iov_base = request;
14437fc6ae50SShibin Koikkara Reeny 	iov.iov_len = strlen(request);
14447fc6ae50SShibin Koikkara Reeny 
14457fc6ae50SShibin Koikkara Reeny 	msgh.msg_name = &dst;
14467fc6ae50SShibin Koikkara Reeny 	msgh.msg_namelen = sizeof(dst);
14477fc6ae50SShibin Koikkara Reeny 	msgh.msg_iov = &iov;
14487fc6ae50SShibin Koikkara Reeny 	msgh.msg_iovlen = 1;
14497fc6ae50SShibin Koikkara Reeny 	msgh.msg_control = control;
14507fc6ae50SShibin Koikkara Reeny 	msgh.msg_controllen = sizeof(control);
14517fc6ae50SShibin Koikkara Reeny 
14527fc6ae50SShibin Koikkara Reeny 	/* Translate the FD. */
14537fc6ae50SShibin Koikkara Reeny 	cmsg = CMSG_FIRSTHDR(&msgh);
14547fc6ae50SShibin Koikkara Reeny 	cmsg->cmsg_len = CMSG_LEN(sizeof(*fd));
14557fc6ae50SShibin Koikkara Reeny 	cmsg->cmsg_level = SOL_SOCKET;
14567fc6ae50SShibin Koikkara Reeny 	cmsg->cmsg_type = SCM_RIGHTS;
14577fc6ae50SShibin Koikkara Reeny 	memcpy(CMSG_DATA(cmsg), fd, sizeof(*fd));
14587fc6ae50SShibin Koikkara Reeny 
14597fc6ae50SShibin Koikkara Reeny 	/* Send the request message. */
14607fc6ae50SShibin Koikkara Reeny 	do {
14617fc6ae50SShibin Koikkara Reeny 		snd = sendmsg(sock, &msgh, 0);
14627fc6ae50SShibin Koikkara Reeny 	} while (snd < 0 && errno == EINTR);
14637fc6ae50SShibin Koikkara Reeny 
14647fc6ae50SShibin Koikkara Reeny 	return snd;
14657fc6ae50SShibin Koikkara Reeny }
14667fc6ae50SShibin Koikkara Reeny 
14677fc6ae50SShibin Koikkara Reeny static int
14687fc6ae50SShibin Koikkara Reeny read_msg(int sock, char *response, struct sockaddr_un *s, int *fd)
14697fc6ae50SShibin Koikkara Reeny {
14707fc6ae50SShibin Koikkara Reeny 	int msglen;
14717fc6ae50SShibin Koikkara Reeny 	struct msghdr msgh;
14727fc6ae50SShibin Koikkara Reeny 	struct iovec iov;
14737fc6ae50SShibin Koikkara Reeny 	char control[CMSG_SPACE(sizeof(*fd))];
14747fc6ae50SShibin Koikkara Reeny 	struct cmsghdr *cmsg;
14757fc6ae50SShibin Koikkara Reeny 
14767fc6ae50SShibin Koikkara Reeny 	/* Initialize message header structure */
14777fc6ae50SShibin Koikkara Reeny 	memset(&msgh, 0, sizeof(msgh));
14787fc6ae50SShibin Koikkara Reeny 	iov.iov_base = response;
14797fc6ae50SShibin Koikkara Reeny 	iov.iov_len = UDS_MAX_CMD_RESP;
14807fc6ae50SShibin Koikkara Reeny 
14817fc6ae50SShibin Koikkara Reeny 	msgh.msg_name = s;
14827fc6ae50SShibin Koikkara Reeny 	msgh.msg_namelen = sizeof(*s);
14837fc6ae50SShibin Koikkara Reeny 	msgh.msg_iov = &iov;
14847fc6ae50SShibin Koikkara Reeny 	msgh.msg_iovlen = 1;
14857fc6ae50SShibin Koikkara Reeny 	msgh.msg_control = control;
14867fc6ae50SShibin Koikkara Reeny 	msgh.msg_controllen = sizeof(control);
14877fc6ae50SShibin Koikkara Reeny 
14887fc6ae50SShibin Koikkara Reeny 	msglen = recvmsg(sock, &msgh, 0);
14897fc6ae50SShibin Koikkara Reeny 
14907fc6ae50SShibin Koikkara Reeny 	/* zero length message means socket was closed */
14917fc6ae50SShibin Koikkara Reeny 	if (msglen == 0)
14927fc6ae50SShibin Koikkara Reeny 		return 0;
14937fc6ae50SShibin Koikkara Reeny 
14947fc6ae50SShibin Koikkara Reeny 	if (msglen < 0) {
1495e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "recvmsg failed, %s", strerror(errno));
14967fc6ae50SShibin Koikkara Reeny 		return -1;
14977fc6ae50SShibin Koikkara Reeny 	}
14987fc6ae50SShibin Koikkara Reeny 
14997fc6ae50SShibin Koikkara Reeny 	/* read auxiliary FDs if any */
15007fc6ae50SShibin Koikkara Reeny 	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
15017fc6ae50SShibin Koikkara Reeny 			cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
15027fc6ae50SShibin Koikkara Reeny 		if (cmsg->cmsg_level == SOL_SOCKET &&
15037fc6ae50SShibin Koikkara Reeny 				cmsg->cmsg_type == SCM_RIGHTS) {
15047fc6ae50SShibin Koikkara Reeny 			memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
15057fc6ae50SShibin Koikkara Reeny 			break;
15067fc6ae50SShibin Koikkara Reeny 		}
15077fc6ae50SShibin Koikkara Reeny 	}
15087fc6ae50SShibin Koikkara Reeny 
15097fc6ae50SShibin Koikkara Reeny 	response[msglen] = '\0';
15107fc6ae50SShibin Koikkara Reeny 	return msglen;
15117fc6ae50SShibin Koikkara Reeny }
15127fc6ae50SShibin Koikkara Reeny 
15137fc6ae50SShibin Koikkara Reeny static int
15149c132373SMaryam Tahhan make_request_dp(int sock, struct sockaddr_un *server, char *request,
15159c132373SMaryam Tahhan 		 int *req_fd, char *response, int *out_fd, const char *dp_path)
15167fc6ae50SShibin Koikkara Reeny {
15177fc6ae50SShibin Koikkara Reeny 	int rval;
15187fc6ae50SShibin Koikkara Reeny 
1519e99981afSDavid Marchand 	AF_XDP_LOG_LINE(DEBUG, "Request: [%s]", request);
15207fc6ae50SShibin Koikkara Reeny 
15217fc6ae50SShibin Koikkara Reeny 	/* if no file descriptor to send then directly write to socket.
15227fc6ae50SShibin Koikkara Reeny 	 * else use sendmsg() to send the file descriptor.
15237fc6ae50SShibin Koikkara Reeny 	 */
15247fc6ae50SShibin Koikkara Reeny 	if (req_fd == NULL)
15257fc6ae50SShibin Koikkara Reeny 		rval = write(sock, request, strlen(request));
15267fc6ae50SShibin Koikkara Reeny 	else
15279c132373SMaryam Tahhan 		rval = send_msg(sock, request, req_fd, dp_path);
15287fc6ae50SShibin Koikkara Reeny 
15297fc6ae50SShibin Koikkara Reeny 	if (rval < 0) {
1530e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Write error %s", strerror(errno));
15317fc6ae50SShibin Koikkara Reeny 		return -1;
15327fc6ae50SShibin Koikkara Reeny 	}
15337fc6ae50SShibin Koikkara Reeny 
15347fc6ae50SShibin Koikkara Reeny 	rval = read_msg(sock, response, server, out_fd);
15357fc6ae50SShibin Koikkara Reeny 	if (rval <= 0) {
1536e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Read error %d", rval);
15377fc6ae50SShibin Koikkara Reeny 		return -1;
15387fc6ae50SShibin Koikkara Reeny 	}
1539e99981afSDavid Marchand 	AF_XDP_LOG_LINE(DEBUG, "Response: [%s]", request);
15407fc6ae50SShibin Koikkara Reeny 
15417fc6ae50SShibin Koikkara Reeny 	return 0;
15427fc6ae50SShibin Koikkara Reeny }
15437fc6ae50SShibin Koikkara Reeny 
15447fc6ae50SShibin Koikkara Reeny static int
15457fc6ae50SShibin Koikkara Reeny check_response(char *response, char *exp_resp, long size)
15467fc6ae50SShibin Koikkara Reeny {
15477fc6ae50SShibin Koikkara Reeny 	return strncmp(response, exp_resp, size);
15487fc6ae50SShibin Koikkara Reeny }
15497fc6ae50SShibin Koikkara Reeny 
15507fc6ae50SShibin Koikkara Reeny static int
15519c132373SMaryam Tahhan uds_get_xskmap_fd(char *if_name, const char *dp_path)
15527fc6ae50SShibin Koikkara Reeny {
15537fc6ae50SShibin Koikkara Reeny 	char request[UDS_MAX_CMD_LEN], response[UDS_MAX_CMD_RESP];
15547fc6ae50SShibin Koikkara Reeny 	char hostname[MAX_LONG_OPT_SZ], exp_resp[UDS_MAX_CMD_RESP];
15557fc6ae50SShibin Koikkara Reeny 	struct sockaddr_un server;
15567fc6ae50SShibin Koikkara Reeny 	int xsk_map_fd = -1, out_fd = 0;
15577fc6ae50SShibin Koikkara Reeny 	int sock, err;
15587fc6ae50SShibin Koikkara Reeny 
15597fc6ae50SShibin Koikkara Reeny 	err = gethostname(hostname, MAX_LONG_OPT_SZ - 1);
15607fc6ae50SShibin Koikkara Reeny 	if (err)
15617fc6ae50SShibin Koikkara Reeny 		return -1;
15627fc6ae50SShibin Koikkara Reeny 
15637fc6ae50SShibin Koikkara Reeny 	memset(&server, 0, sizeof(server));
15649c132373SMaryam Tahhan 	sock = init_uds_sock(&server, dp_path);
1565cd2729e4SShibin Koikkara Reeny 	if (sock < 0)
1566cd2729e4SShibin Koikkara Reeny 		return -1;
15677fc6ae50SShibin Koikkara Reeny 
15689c132373SMaryam Tahhan 	/* Initiates handshake to the AF_XDP Device Plugin send: /connect,hostname */
15697fc6ae50SShibin Koikkara Reeny 	snprintf(request, sizeof(request), "%s,%s", UDS_CONNECT_MSG, hostname);
15707fc6ae50SShibin Koikkara Reeny 	memset(response, 0, sizeof(response));
15719c132373SMaryam Tahhan 	if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
1572e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error in processing cmd [%s]", request);
15737fc6ae50SShibin Koikkara Reeny 		goto err_close;
15747fc6ae50SShibin Koikkara Reeny 	}
15757fc6ae50SShibin Koikkara Reeny 
15767fc6ae50SShibin Koikkara Reeny 	/* Expect /host_ok */
15777fc6ae50SShibin Koikkara Reeny 	strlcpy(exp_resp, UDS_HOST_OK_MSG, UDS_MAX_CMD_LEN);
15787fc6ae50SShibin Koikkara Reeny 	if (check_response(response, exp_resp, strlen(exp_resp)) < 0) {
1579e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Unexpected response [%s]", response);
15807fc6ae50SShibin Koikkara Reeny 		goto err_close;
15817fc6ae50SShibin Koikkara Reeny 	}
15827fc6ae50SShibin Koikkara Reeny 	/* Request for "/version" */
15837fc6ae50SShibin Koikkara Reeny 	strlcpy(request, UDS_VERSION_MSG, UDS_MAX_CMD_LEN);
15847fc6ae50SShibin Koikkara Reeny 	memset(response, 0, sizeof(response));
15859c132373SMaryam Tahhan 	if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
1586e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error in processing cmd [%s]", request);
15877fc6ae50SShibin Koikkara Reeny 		goto err_close;
15887fc6ae50SShibin Koikkara Reeny 	}
15897fc6ae50SShibin Koikkara Reeny 
15907fc6ae50SShibin Koikkara Reeny 	/* Request for file descriptor for netdev name*/
15917fc6ae50SShibin Koikkara Reeny 	snprintf(request, sizeof(request), "%s,%s", UDS_XSK_MAP_FD_MSG, if_name);
15927fc6ae50SShibin Koikkara Reeny 	memset(response, 0, sizeof(response));
15939c132373SMaryam Tahhan 	if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
1594e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error in processing cmd [%s]", request);
15957fc6ae50SShibin Koikkara Reeny 		goto err_close;
15967fc6ae50SShibin Koikkara Reeny 	}
15977fc6ae50SShibin Koikkara Reeny 
15987fc6ae50SShibin Koikkara Reeny 	if (out_fd < 0) {
1599e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error in processing cmd [%s]", request);
16007fc6ae50SShibin Koikkara Reeny 		goto err_close;
16017fc6ae50SShibin Koikkara Reeny 	}
16027fc6ae50SShibin Koikkara Reeny 
16037fc6ae50SShibin Koikkara Reeny 	xsk_map_fd = out_fd;
16047fc6ae50SShibin Koikkara Reeny 
16057fc6ae50SShibin Koikkara Reeny 	/* Expect fd_ack with file descriptor */
16067fc6ae50SShibin Koikkara Reeny 	strlcpy(exp_resp, UDS_FD_ACK_MSG, UDS_MAX_CMD_LEN);
16077fc6ae50SShibin Koikkara Reeny 	if (check_response(response, exp_resp, strlen(exp_resp)) < 0) {
1608e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Unexpected response [%s]", response);
16097fc6ae50SShibin Koikkara Reeny 		goto err_close;
16107fc6ae50SShibin Koikkara Reeny 	}
16117fc6ae50SShibin Koikkara Reeny 
16127fc6ae50SShibin Koikkara Reeny 	/* Initiate close connection */
16137fc6ae50SShibin Koikkara Reeny 	strlcpy(request, UDS_FIN_MSG, UDS_MAX_CMD_LEN);
16147fc6ae50SShibin Koikkara Reeny 	memset(response, 0, sizeof(response));
16159c132373SMaryam Tahhan 	if (make_request_dp(sock, &server, request, NULL, response, &out_fd, dp_path) < 0) {
1616e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error in processing cmd [%s]", request);
16177fc6ae50SShibin Koikkara Reeny 		goto err_close;
16187fc6ae50SShibin Koikkara Reeny 	}
16197fc6ae50SShibin Koikkara Reeny 
16207fc6ae50SShibin Koikkara Reeny 	/* Connection close */
16217fc6ae50SShibin Koikkara Reeny 	strlcpy(exp_resp, UDS_FIN_ACK_MSG, UDS_MAX_CMD_LEN);
16227fc6ae50SShibin Koikkara Reeny 	if (check_response(response, exp_resp, strlen(exp_resp)) < 0) {
1623e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Unexpected response [%s]", response);
16247fc6ae50SShibin Koikkara Reeny 		goto err_close;
16257fc6ae50SShibin Koikkara Reeny 	}
16267fc6ae50SShibin Koikkara Reeny 	close(sock);
16277fc6ae50SShibin Koikkara Reeny 
16287fc6ae50SShibin Koikkara Reeny 	return xsk_map_fd;
16297fc6ae50SShibin Koikkara Reeny 
16307fc6ae50SShibin Koikkara Reeny err_close:
16317fc6ae50SShibin Koikkara Reeny 	close(sock);
16327fc6ae50SShibin Koikkara Reeny 	return -1;
16337fc6ae50SShibin Koikkara Reeny }
16347fc6ae50SShibin Koikkara Reeny 
16357fc6ae50SShibin Koikkara Reeny static int
1636f1debd77SXiaolong Ye xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
1637f1debd77SXiaolong Ye 	      int ring_size)
1638f1debd77SXiaolong Ye {
1639f1debd77SXiaolong Ye 	struct xsk_socket_config cfg;
1640f1debd77SXiaolong Ye 	struct pkt_tx_queue *txq = rxq->pair;
1641f1debd77SXiaolong Ye 	int ret = 0;
1642a63d4fcaSCiara Loftus 	int reserve_size = ETH_AF_XDP_DFLT_NUM_DESCS;
1643d8a21077SCiara Loftus 	struct rte_mbuf *fq_bufs[reserve_size];
16446dd3286fSCiara Loftus 	bool reserve_before;
1645f1debd77SXiaolong Ye 
1646339b88c6SXiaolong Ye 	rxq->umem = xdp_umem_configure(internals, rxq);
1647f1debd77SXiaolong Ye 	if (rxq->umem == NULL)
1648f1debd77SXiaolong Ye 		return -ENOMEM;
1649d8a21077SCiara Loftus 	txq->umem = rxq->umem;
1650e12a0166STyler Retzlaff 	reserve_before = rte_atomic_load_explicit(&rxq->umem->refcnt,
1651e12a0166STyler Retzlaff 			rte_memory_order_acquire) <= 1;
1652f1debd77SXiaolong Ye 
165381fe6720SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
165481fe6720SCiara Loftus 	ret = rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size);
165581fe6720SCiara Loftus 	if (ret) {
1656e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Failed to get enough buffers for fq.");
165781fe6720SCiara Loftus 		goto out_umem;
165881fe6720SCiara Loftus 	}
165981fe6720SCiara Loftus #endif
166081fe6720SCiara Loftus 
16616dd3286fSCiara Loftus 	/* reserve fill queue of queues not (yet) sharing UMEM */
16626dd3286fSCiara Loftus 	if (reserve_before) {
166381fe6720SCiara Loftus 		ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq);
166481fe6720SCiara Loftus 		if (ret) {
1665e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed to reserve fill queue.");
166681fe6720SCiara Loftus 			goto out_umem;
166781fe6720SCiara Loftus 		}
16686dd3286fSCiara Loftus 	}
166981fe6720SCiara Loftus 
1670f1debd77SXiaolong Ye 	cfg.rx_size = ring_size;
1671f1debd77SXiaolong Ye 	cfg.tx_size = ring_size;
1672f1debd77SXiaolong Ye 	cfg.libbpf_flags = 0;
1673f1debd77SXiaolong Ye 	cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
1674f1debd77SXiaolong Ye 	cfg.bind_flags = 0;
167545bba02cSXiaolong Ye 
1676b275e298SXiaoyun Li 	/* Force AF_XDP socket into copy mode when users want it */
1677b275e298SXiaoyun Li 	if (internals->force_copy)
1678b275e298SXiaoyun Li 		cfg.bind_flags |= XDP_COPY;
1679b275e298SXiaoyun Li 
168045bba02cSXiaolong Ye #if defined(XDP_USE_NEED_WAKEUP)
168145bba02cSXiaolong Ye 	cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
168245bba02cSXiaolong Ye #endif
168345bba02cSXiaolong Ye 
16847fc6ae50SShibin Koikkara Reeny 	/* Disable libbpf from loading XDP program */
16858a324b1cSMaryam Tahhan 	if (internals->use_cni || internals->use_pinned_map)
16867fc6ae50SShibin Koikkara Reeny 		cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
16877fc6ae50SShibin Koikkara Reeny 
16886f6134c3SJunxiao Shi 	if (strnlen(internals->prog_path, PATH_MAX)) {
16896f6134c3SJunxiao Shi 		if (!internals->custom_prog_configured) {
1690288a85aeSCiara Loftus 			ret = load_custom_xdp_prog(internals->prog_path,
169101fa83c9SCiara Loftus 							internals->if_index,
169201fa83c9SCiara Loftus 							&internals->map);
1693288a85aeSCiara Loftus 			if (ret) {
1694e99981afSDavid Marchand 				AF_XDP_LOG_LINE(ERR, "Failed to load custom XDP program %s",
1695288a85aeSCiara Loftus 						internals->prog_path);
1696b26431a6SCiara Loftus 				goto out_umem;
1697288a85aeSCiara Loftus 			}
1698288a85aeSCiara Loftus 			internals->custom_prog_configured = 1;
16996f6134c3SJunxiao Shi 		}
17006f6134c3SJunxiao Shi 		cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
1701288a85aeSCiara Loftus 	}
1702288a85aeSCiara Loftus 
170374b46340SCiara Loftus 	if (internals->shared_umem)
170474b46340SCiara Loftus 		ret = create_shared_socket(&rxq->xsk, internals->if_name,
170574b46340SCiara Loftus 				rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx,
170674b46340SCiara Loftus 				&txq->tx, &rxq->fq, &rxq->cq, &cfg);
170774b46340SCiara Loftus 	else
1708f1debd77SXiaolong Ye 		ret = xsk_socket__create(&rxq->xsk, internals->if_name,
1709339b88c6SXiaolong Ye 				rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx,
1710f1debd77SXiaolong Ye 				&txq->tx, &cfg);
171174b46340SCiara Loftus 
1712f1debd77SXiaolong Ye 	if (ret) {
1713e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to create xsk socket.");
1714b26431a6SCiara Loftus 		goto out_umem;
1715f1debd77SXiaolong Ye 	}
1716f1debd77SXiaolong Ye 
17176dd3286fSCiara Loftus 	if (!reserve_before) {
17186dd3286fSCiara Loftus 		/* reserve fill queue of queues sharing UMEM */
17196dd3286fSCiara Loftus 		ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq);
17206dd3286fSCiara Loftus 		if (ret) {
1721e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed to reserve fill queue.");
17226dd3286fSCiara Loftus 			goto out_xsk;
17236dd3286fSCiara Loftus 		}
17246dd3286fSCiara Loftus 	}
17256dd3286fSCiara Loftus 
172601fa83c9SCiara Loftus 	/* insert the xsk into the xsks_map */
172701fa83c9SCiara Loftus 	if (internals->custom_prog_configured) {
172801fa83c9SCiara Loftus 		int err, fd;
172901fa83c9SCiara Loftus 
173001fa83c9SCiara Loftus 		fd = xsk_socket__fd(rxq->xsk);
173101fa83c9SCiara Loftus 		err = bpf_map_update_elem(bpf_map__fd(internals->map),
173201fa83c9SCiara Loftus 					  &rxq->xsk_queue_idx, &fd, 0);
173301fa83c9SCiara Loftus 		if (err) {
1734e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed to insert xsk in map.");
1735b26431a6SCiara Loftus 			goto out_xsk;
173601fa83c9SCiara Loftus 		}
173701fa83c9SCiara Loftus 	}
173801fa83c9SCiara Loftus 
17398a324b1cSMaryam Tahhan 	if (internals->use_cni || internals->use_pinned_map) {
17409c132373SMaryam Tahhan 		int err, map_fd;
17417fc6ae50SShibin Koikkara Reeny 
17428a324b1cSMaryam Tahhan 		if (internals->use_cni) {
17439c132373SMaryam Tahhan 			/* get socket fd from AF_XDP Device Plugin */
17449c132373SMaryam Tahhan 			map_fd = uds_get_xskmap_fd(internals->if_name, internals->dp_path);
17457fc6ae50SShibin Koikkara Reeny 			if (map_fd < 0) {
1746e99981afSDavid Marchand 				AF_XDP_LOG_LINE(ERR, "Failed to receive xskmap fd from AF_XDP Device Plugin");
17477fc6ae50SShibin Koikkara Reeny 				goto out_xsk;
17487fc6ae50SShibin Koikkara Reeny 			}
17498a324b1cSMaryam Tahhan 		} else {
17508a324b1cSMaryam Tahhan 			/* get socket fd from AF_XDP plugin */
17518a324b1cSMaryam Tahhan 			err = get_pinned_map(internals->dp_path, &map_fd);
17528a324b1cSMaryam Tahhan 			if (err < 0 || map_fd < 0) {
1753e99981afSDavid Marchand 				AF_XDP_LOG_LINE(ERR, "Failed to retrieve pinned map fd");
17548a324b1cSMaryam Tahhan 				goto out_xsk;
17558a324b1cSMaryam Tahhan 			}
17568a324b1cSMaryam Tahhan 		}
17579c132373SMaryam Tahhan 
17589c132373SMaryam Tahhan 		err = update_xskmap(rxq->xsk, map_fd, rxq->xsk_queue_idx);
17597fc6ae50SShibin Koikkara Reeny 		if (err) {
1760e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed to insert xsk in map.");
17617fc6ae50SShibin Koikkara Reeny 			goto out_xsk;
17627fc6ae50SShibin Koikkara Reeny 		}
17639c132373SMaryam Tahhan 
17647fc6ae50SShibin Koikkara Reeny 	} else if (rxq->busy_budget) {
1765055a3936SCiara Loftus 		ret = configure_preferred_busy_poll(rxq);
1766055a3936SCiara Loftus 		if (ret) {
1767e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed configure busy polling.");
1768b26431a6SCiara Loftus 			goto out_xsk;
1769055a3936SCiara Loftus 		}
1770055a3936SCiara Loftus 	}
1771055a3936SCiara Loftus 
1772f1debd77SXiaolong Ye 	return 0;
1773f1debd77SXiaolong Ye 
1774b26431a6SCiara Loftus out_xsk:
1775b26431a6SCiara Loftus 	xsk_socket__delete(rxq->xsk);
1776b26431a6SCiara Loftus out_umem:
1777e12a0166STyler Retzlaff 	if (rte_atomic_fetch_sub_explicit(&rxq->umem->refcnt, 1, rte_memory_order_acquire) - 1 == 0)
1778f1debd77SXiaolong Ye 		xdp_umem_destroy(rxq->umem);
1779f1debd77SXiaolong Ye 
1780f1debd77SXiaolong Ye 	return ret;
1781f1debd77SXiaolong Ye }
1782f1debd77SXiaolong Ye 
1783f1debd77SXiaolong Ye static int
1784f1debd77SXiaolong Ye eth_rx_queue_setup(struct rte_eth_dev *dev,
1785f1debd77SXiaolong Ye 		   uint16_t rx_queue_id,
1786f1debd77SXiaolong Ye 		   uint16_t nb_rx_desc,
1787f1debd77SXiaolong Ye 		   unsigned int socket_id __rte_unused,
1788f1debd77SXiaolong Ye 		   const struct rte_eth_rxconf *rx_conf __rte_unused,
1789f1debd77SXiaolong Ye 		   struct rte_mempool *mb_pool)
1790f1debd77SXiaolong Ye {
1791f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
17929876cf83SCiara Loftus 	struct pmd_process_private *process_private = dev->process_private;
1793f1debd77SXiaolong Ye 	struct pkt_rx_queue *rxq;
1794f1debd77SXiaolong Ye 	int ret;
1795f1debd77SXiaolong Ye 
1796f1debd77SXiaolong Ye 	rxq = &internals->rx_queues[rx_queue_id];
1797f1debd77SXiaolong Ye 
1798e99981afSDavid Marchand 	AF_XDP_LOG_LINE(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d",
1799339b88c6SXiaolong Ye 		   rx_queue_id, rxq->xsk_queue_idx);
1800d8a21077SCiara Loftus 
1801d8a21077SCiara Loftus #ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG
1802d8a21077SCiara Loftus 	uint32_t buf_size, data_size;
1803d8a21077SCiara Loftus 
1804f1debd77SXiaolong Ye 	/* Now get the space available for data in the mbuf */
1805f1debd77SXiaolong Ye 	buf_size = rte_pktmbuf_data_room_size(mb_pool) -
1806f1debd77SXiaolong Ye 		RTE_PKTMBUF_HEADROOM;
18071668e87dSCiara Loftus 	data_size = ETH_AF_XDP_FRAME_SIZE;
1808f1debd77SXiaolong Ye 
1809f1debd77SXiaolong Ye 	if (data_size > buf_size) {
1810e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "%s: %d bytes will not fit in mbuf (%d bytes)",
1811f1debd77SXiaolong Ye 			dev->device->name, data_size, buf_size);
1812f1debd77SXiaolong Ye 		ret = -ENOMEM;
1813f1debd77SXiaolong Ye 		goto err;
1814f1debd77SXiaolong Ye 	}
1815d8a21077SCiara Loftus #endif
1816f1debd77SXiaolong Ye 
1817f1debd77SXiaolong Ye 	rxq->mb_pool = mb_pool;
1818f1debd77SXiaolong Ye 
1819f1debd77SXiaolong Ye 	if (xsk_configure(internals, rxq, nb_rx_desc)) {
1820e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to configure xdp socket");
1821f1debd77SXiaolong Ye 		ret = -EINVAL;
1822f1debd77SXiaolong Ye 		goto err;
1823f1debd77SXiaolong Ye 	}
1824f1debd77SXiaolong Ye 
1825055a3936SCiara Loftus 	if (!rxq->busy_budget)
1826e99981afSDavid Marchand 		AF_XDP_LOG_LINE(DEBUG, "Preferred busy polling not enabled");
1827055a3936SCiara Loftus 
182845bba02cSXiaolong Ye 	rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
182945bba02cSXiaolong Ye 	rxq->fds[0].events = POLLIN;
183045bba02cSXiaolong Ye 
18319876cf83SCiara Loftus 	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
18329876cf83SCiara Loftus 
18339bab1d26SCiara Loftus 	rxq->port = dev->data->port_id;
18349bab1d26SCiara Loftus 
1835f1debd77SXiaolong Ye 	dev->data->rx_queues[rx_queue_id] = rxq;
1836f1debd77SXiaolong Ye 	return 0;
1837f1debd77SXiaolong Ye 
1838f1debd77SXiaolong Ye err:
1839f1debd77SXiaolong Ye 	return ret;
1840f1debd77SXiaolong Ye }
1841f1debd77SXiaolong Ye 
1842f1debd77SXiaolong Ye static int
1843f1debd77SXiaolong Ye eth_tx_queue_setup(struct rte_eth_dev *dev,
1844f1debd77SXiaolong Ye 		   uint16_t tx_queue_id,
1845f1debd77SXiaolong Ye 		   uint16_t nb_tx_desc __rte_unused,
1846f1debd77SXiaolong Ye 		   unsigned int socket_id __rte_unused,
1847f1debd77SXiaolong Ye 		   const struct rte_eth_txconf *tx_conf __rte_unused)
1848f1debd77SXiaolong Ye {
1849f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
1850f1debd77SXiaolong Ye 	struct pkt_tx_queue *txq;
1851f1debd77SXiaolong Ye 
1852f1debd77SXiaolong Ye 	txq = &internals->tx_queues[tx_queue_id];
1853f1debd77SXiaolong Ye 
1854f1debd77SXiaolong Ye 	dev->data->tx_queues[tx_queue_id] = txq;
1855f1debd77SXiaolong Ye 	return 0;
1856f1debd77SXiaolong Ye }
1857f1debd77SXiaolong Ye 
1858f1debd77SXiaolong Ye static int
1859f1debd77SXiaolong Ye eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1860f1debd77SXiaolong Ye {
1861f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
1862f1debd77SXiaolong Ye 	struct ifreq ifr = { .ifr_mtu = mtu };
1863f1debd77SXiaolong Ye 	int ret;
1864f1debd77SXiaolong Ye 	int s;
1865f1debd77SXiaolong Ye 
1866f1debd77SXiaolong Ye 	s = socket(PF_INET, SOCK_DGRAM, 0);
1867f1debd77SXiaolong Ye 	if (s < 0)
1868f1debd77SXiaolong Ye 		return -EINVAL;
1869f1debd77SXiaolong Ye 
1870f1debd77SXiaolong Ye 	strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ);
1871f1debd77SXiaolong Ye 	ret = ioctl(s, SIOCSIFMTU, &ifr);
1872f1debd77SXiaolong Ye 	close(s);
1873f1debd77SXiaolong Ye 
1874f1debd77SXiaolong Ye 	return (ret < 0) ? -errno : 0;
1875f1debd77SXiaolong Ye }
1876f1debd77SXiaolong Ye 
18779039c812SAndrew Rybchenko static int
1878f1debd77SXiaolong Ye eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask)
1879f1debd77SXiaolong Ye {
1880f1debd77SXiaolong Ye 	struct ifreq ifr;
18819039c812SAndrew Rybchenko 	int ret = 0;
1882f1debd77SXiaolong Ye 	int s;
1883f1debd77SXiaolong Ye 
1884f1debd77SXiaolong Ye 	s = socket(PF_INET, SOCK_DGRAM, 0);
1885f1debd77SXiaolong Ye 	if (s < 0)
18869039c812SAndrew Rybchenko 		return -errno;
1887f1debd77SXiaolong Ye 
1888f1debd77SXiaolong Ye 	strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
18899039c812SAndrew Rybchenko 	if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) {
18909039c812SAndrew Rybchenko 		ret = -errno;
1891f1debd77SXiaolong Ye 		goto out;
18929039c812SAndrew Rybchenko 	}
1893f1debd77SXiaolong Ye 	ifr.ifr_flags &= mask;
1894f1debd77SXiaolong Ye 	ifr.ifr_flags |= flags;
18959039c812SAndrew Rybchenko 	if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) {
18969039c812SAndrew Rybchenko 		ret = -errno;
1897f1debd77SXiaolong Ye 		goto out;
18989039c812SAndrew Rybchenko 	}
1899f1debd77SXiaolong Ye out:
1900f1debd77SXiaolong Ye 	close(s);
19019039c812SAndrew Rybchenko 	return ret;
1902f1debd77SXiaolong Ye }
1903f1debd77SXiaolong Ye 
19049039c812SAndrew Rybchenko static int
1905f1debd77SXiaolong Ye eth_dev_promiscuous_enable(struct rte_eth_dev *dev)
1906f1debd77SXiaolong Ye {
1907f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
1908f1debd77SXiaolong Ye 
19099039c812SAndrew Rybchenko 	return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0);
1910f1debd77SXiaolong Ye }
1911f1debd77SXiaolong Ye 
19129039c812SAndrew Rybchenko static int
1913f1debd77SXiaolong Ye eth_dev_promiscuous_disable(struct rte_eth_dev *dev)
1914f1debd77SXiaolong Ye {
1915f1debd77SXiaolong Ye 	struct pmd_internals *internals = dev->data->dev_private;
1916f1debd77SXiaolong Ye 
19179039c812SAndrew Rybchenko 	return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC);
1918f1debd77SXiaolong Ye }
1919f1debd77SXiaolong Ye 
1920f1debd77SXiaolong Ye static const struct eth_dev_ops ops = {
1921f1debd77SXiaolong Ye 	.dev_start = eth_dev_start,
1922f1debd77SXiaolong Ye 	.dev_stop = eth_dev_stop,
1923f1debd77SXiaolong Ye 	.dev_close = eth_dev_close,
1924f1debd77SXiaolong Ye 	.dev_configure = eth_dev_configure,
1925f1debd77SXiaolong Ye 	.dev_infos_get = eth_dev_info,
1926f1debd77SXiaolong Ye 	.mtu_set = eth_dev_mtu_set,
1927f1debd77SXiaolong Ye 	.promiscuous_enable = eth_dev_promiscuous_enable,
1928f1debd77SXiaolong Ye 	.promiscuous_disable = eth_dev_promiscuous_disable,
1929f1debd77SXiaolong Ye 	.rx_queue_setup = eth_rx_queue_setup,
1930f1debd77SXiaolong Ye 	.tx_queue_setup = eth_tx_queue_setup,
1931f1debd77SXiaolong Ye 	.link_update = eth_link_update,
1932f1debd77SXiaolong Ye 	.stats_get = eth_stats_get,
1933f1debd77SXiaolong Ye 	.stats_reset = eth_stats_reset,
193443fb6eeaSAnatoly Burakov 	.get_monitor_addr = eth_get_monitor_addr,
1935f1debd77SXiaolong Ye };
1936f1debd77SXiaolong Ye 
19379c132373SMaryam Tahhan /* AF_XDP Device Plugin option works in unprivileged
19389c132373SMaryam Tahhan  * container environments and ethernet device functionality
19399c132373SMaryam Tahhan  * will be reduced. So additional customised eth_dev_ops
19409c132373SMaryam Tahhan  * struct is needed for the Device Plugin. Promiscuous
19419c132373SMaryam Tahhan  * enable and disable functionality is removed.
19427fc6ae50SShibin Koikkara Reeny  **/
19439c132373SMaryam Tahhan static const struct eth_dev_ops ops_afxdp_dp = {
19447fc6ae50SShibin Koikkara Reeny 	.dev_start = eth_dev_start,
19457fc6ae50SShibin Koikkara Reeny 	.dev_stop = eth_dev_stop,
19467fc6ae50SShibin Koikkara Reeny 	.dev_close = eth_dev_close,
19477fc6ae50SShibin Koikkara Reeny 	.dev_configure = eth_dev_configure,
19487fc6ae50SShibin Koikkara Reeny 	.dev_infos_get = eth_dev_info,
19497fc6ae50SShibin Koikkara Reeny 	.mtu_set = eth_dev_mtu_set,
19507fc6ae50SShibin Koikkara Reeny 	.rx_queue_setup = eth_rx_queue_setup,
19517fc6ae50SShibin Koikkara Reeny 	.tx_queue_setup = eth_tx_queue_setup,
19527fc6ae50SShibin Koikkara Reeny 	.link_update = eth_link_update,
19537fc6ae50SShibin Koikkara Reeny 	.stats_get = eth_stats_get,
19547fc6ae50SShibin Koikkara Reeny 	.stats_reset = eth_stats_reset,
19557fc6ae50SShibin Koikkara Reeny 	.get_monitor_addr = eth_get_monitor_addr,
19567fc6ae50SShibin Koikkara Reeny };
19577fc6ae50SShibin Koikkara Reeny 
1958055a3936SCiara Loftus /** parse busy_budget argument */
1959055a3936SCiara Loftus static int
1960055a3936SCiara Loftus parse_budget_arg(const char *key __rte_unused,
1961055a3936SCiara Loftus 		  const char *value, void *extra_args)
1962055a3936SCiara Loftus {
1963055a3936SCiara Loftus 	int *i = (int *)extra_args;
1964055a3936SCiara Loftus 	char *end;
1965055a3936SCiara Loftus 
1966055a3936SCiara Loftus 	*i = strtol(value, &end, 10);
1967055a3936SCiara Loftus 	if (*i < 0 || *i > UINT16_MAX) {
1968e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Invalid busy_budget %i, must be >= 0 and <= %u",
1969055a3936SCiara Loftus 				*i, UINT16_MAX);
1970055a3936SCiara Loftus 		return -EINVAL;
1971055a3936SCiara Loftus 	}
1972055a3936SCiara Loftus 
1973055a3936SCiara Loftus 	return 0;
1974055a3936SCiara Loftus }
1975055a3936SCiara Loftus 
1976f1debd77SXiaolong Ye /** parse integer from integer argument */
1977f1debd77SXiaolong Ye static int
1978f1debd77SXiaolong Ye parse_integer_arg(const char *key __rte_unused,
1979f1debd77SXiaolong Ye 		  const char *value, void *extra_args)
1980f1debd77SXiaolong Ye {
1981f1debd77SXiaolong Ye 	int *i = (int *)extra_args;
1982f1debd77SXiaolong Ye 	char *end;
1983f1debd77SXiaolong Ye 
1984f1debd77SXiaolong Ye 	*i = strtol(value, &end, 10);
1985f1debd77SXiaolong Ye 	if (*i < 0) {
1986e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Argument has to be positive.");
1987f1debd77SXiaolong Ye 		return -EINVAL;
1988f1debd77SXiaolong Ye 	}
1989f1debd77SXiaolong Ye 
1990f1debd77SXiaolong Ye 	return 0;
1991f1debd77SXiaolong Ye }
1992f1debd77SXiaolong Ye 
1993f1debd77SXiaolong Ye /** parse name argument */
1994f1debd77SXiaolong Ye static int
1995f1debd77SXiaolong Ye parse_name_arg(const char *key __rte_unused,
1996f1debd77SXiaolong Ye 	       const char *value, void *extra_args)
1997f1debd77SXiaolong Ye {
1998f1debd77SXiaolong Ye 	char *name = extra_args;
1999f1debd77SXiaolong Ye 
2000f1debd77SXiaolong Ye 	if (strnlen(value, IFNAMSIZ) > IFNAMSIZ - 1) {
2001e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Invalid name %s, should be less than %u bytes.",
2002f1debd77SXiaolong Ye 			   value, IFNAMSIZ);
2003f1debd77SXiaolong Ye 		return -EINVAL;
2004f1debd77SXiaolong Ye 	}
2005f1debd77SXiaolong Ye 
2006f1debd77SXiaolong Ye 	strlcpy(name, value, IFNAMSIZ);
2007f1debd77SXiaolong Ye 
2008f1debd77SXiaolong Ye 	return 0;
2009f1debd77SXiaolong Ye }
2010f1debd77SXiaolong Ye 
2011288a85aeSCiara Loftus /** parse xdp prog argument */
2012288a85aeSCiara Loftus static int
2013288a85aeSCiara Loftus parse_prog_arg(const char *key __rte_unused,
2014288a85aeSCiara Loftus 	       const char *value, void *extra_args)
2015288a85aeSCiara Loftus {
2016288a85aeSCiara Loftus 	char *path = extra_args;
2017288a85aeSCiara Loftus 
2018288a85aeSCiara Loftus 	if (strnlen(value, PATH_MAX) == PATH_MAX) {
2019e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Invalid path %s, should be less than %u bytes.",
2020288a85aeSCiara Loftus 			   value, PATH_MAX);
2021288a85aeSCiara Loftus 		return -EINVAL;
2022288a85aeSCiara Loftus 	}
2023288a85aeSCiara Loftus 
2024288a85aeSCiara Loftus 	if (access(value, F_OK) != 0) {
2025e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Error accessing %s: %s",
2026288a85aeSCiara Loftus 			   value, strerror(errno));
2027288a85aeSCiara Loftus 		return -EINVAL;
2028288a85aeSCiara Loftus 	}
2029288a85aeSCiara Loftus 
2030288a85aeSCiara Loftus 	strlcpy(path, value, PATH_MAX);
2031288a85aeSCiara Loftus 
2032288a85aeSCiara Loftus 	return 0;
2033288a85aeSCiara Loftus }
2034288a85aeSCiara Loftus 
2035f1debd77SXiaolong Ye static int
2036339b88c6SXiaolong Ye xdp_get_channels_info(const char *if_name, int *max_queues,
2037339b88c6SXiaolong Ye 				int *combined_queues)
2038339b88c6SXiaolong Ye {
2039339b88c6SXiaolong Ye 	struct ethtool_channels channels;
2040339b88c6SXiaolong Ye 	struct ifreq ifr;
2041339b88c6SXiaolong Ye 	int fd, ret;
2042339b88c6SXiaolong Ye 
2043339b88c6SXiaolong Ye 	fd = socket(AF_INET, SOCK_DGRAM, 0);
2044339b88c6SXiaolong Ye 	if (fd < 0)
2045339b88c6SXiaolong Ye 		return -1;
2046339b88c6SXiaolong Ye 
2047339b88c6SXiaolong Ye 	channels.cmd = ETHTOOL_GCHANNELS;
2048339b88c6SXiaolong Ye 	ifr.ifr_data = (void *)&channels;
20496d3c595dSCiara Loftus 	strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
2050339b88c6SXiaolong Ye 	ret = ioctl(fd, SIOCETHTOOL, &ifr);
205106bb59f6SJúlius Milan 	if (ret) {
205206bb59f6SJúlius Milan 		if (errno == EOPNOTSUPP) {
205306bb59f6SJúlius Milan 			ret = 0;
205406bb59f6SJúlius Milan 		} else {
2055339b88c6SXiaolong Ye 			ret = -errno;
2056339b88c6SXiaolong Ye 			goto out;
2057339b88c6SXiaolong Ye 		}
205806bb59f6SJúlius Milan 	}
2059339b88c6SXiaolong Ye 
2060339b88c6SXiaolong Ye 	if (channels.max_combined == 0 || errno == EOPNOTSUPP) {
2061339b88c6SXiaolong Ye 		/* If the device says it has no channels, then all traffic
2062339b88c6SXiaolong Ye 		 * is sent to a single stream, so max queues = 1.
2063339b88c6SXiaolong Ye 		 */
2064339b88c6SXiaolong Ye 		*max_queues = 1;
2065339b88c6SXiaolong Ye 		*combined_queues = 1;
2066339b88c6SXiaolong Ye 	} else {
2067339b88c6SXiaolong Ye 		*max_queues = channels.max_combined;
2068339b88c6SXiaolong Ye 		*combined_queues = channels.combined_count;
2069339b88c6SXiaolong Ye 	}
2070339b88c6SXiaolong Ye 
2071339b88c6SXiaolong Ye  out:
2072339b88c6SXiaolong Ye 	close(fd);
2073339b88c6SXiaolong Ye 	return ret;
2074339b88c6SXiaolong Ye }
2075339b88c6SXiaolong Ye 
2076339b88c6SXiaolong Ye static int
2077339b88c6SXiaolong Ye parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
2078055a3936SCiara Loftus 		 int *queue_cnt, int *shared_umem, char *prog_path,
20799c132373SMaryam Tahhan 		 int *busy_budget, int *force_copy, int *use_cni,
20808a324b1cSMaryam Tahhan 		 int *use_pinned_map, char *dp_path)
2081f1debd77SXiaolong Ye {
2082f1debd77SXiaolong Ye 	int ret;
2083f1debd77SXiaolong Ye 
2084f1debd77SXiaolong Ye 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_IFACE_ARG,
2085f1debd77SXiaolong Ye 				 &parse_name_arg, if_name);
2086f1debd77SXiaolong Ye 	if (ret < 0)
2087f1debd77SXiaolong Ye 		goto free_kvlist;
2088f1debd77SXiaolong Ye 
2089339b88c6SXiaolong Ye 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_START_QUEUE_ARG,
2090339b88c6SXiaolong Ye 				 &parse_integer_arg, start_queue);
2091f1debd77SXiaolong Ye 	if (ret < 0)
2092f1debd77SXiaolong Ye 		goto free_kvlist;
2093f1debd77SXiaolong Ye 
2094339b88c6SXiaolong Ye 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_COUNT_ARG,
2095339b88c6SXiaolong Ye 				 &parse_integer_arg, queue_cnt);
2096339b88c6SXiaolong Ye 	if (ret < 0 || *queue_cnt <= 0) {
2097339b88c6SXiaolong Ye 		ret = -EINVAL;
2098339b88c6SXiaolong Ye 		goto free_kvlist;
2099339b88c6SXiaolong Ye 	}
2100339b88c6SXiaolong Ye 
210174b46340SCiara Loftus 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SHARED_UMEM_ARG,
210274b46340SCiara Loftus 				&parse_integer_arg, shared_umem);
210374b46340SCiara Loftus 	if (ret < 0)
210474b46340SCiara Loftus 		goto free_kvlist;
210574b46340SCiara Loftus 
2106288a85aeSCiara Loftus 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PROG_ARG,
2107288a85aeSCiara Loftus 				 &parse_prog_arg, prog_path);
2108288a85aeSCiara Loftus 	if (ret < 0)
2109288a85aeSCiara Loftus 		goto free_kvlist;
2110288a85aeSCiara Loftus 
2111055a3936SCiara Loftus 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUDGET_ARG,
2112055a3936SCiara Loftus 				&parse_budget_arg, busy_budget);
2113055a3936SCiara Loftus 	if (ret < 0)
2114055a3936SCiara Loftus 		goto free_kvlist;
2115055a3936SCiara Loftus 
2116b275e298SXiaoyun Li 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_FORCE_COPY_ARG,
2117b275e298SXiaoyun Li 				&parse_integer_arg, force_copy);
2118b275e298SXiaoyun Li 	if (ret < 0)
2119b275e298SXiaoyun Li 		goto free_kvlist;
2120b275e298SXiaoyun Li 
21217fc6ae50SShibin Koikkara Reeny 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_USE_CNI_ARG,
21227fc6ae50SShibin Koikkara Reeny 				 &parse_integer_arg, use_cni);
21237fc6ae50SShibin Koikkara Reeny 	if (ret < 0)
21247fc6ae50SShibin Koikkara Reeny 		goto free_kvlist;
21257fc6ae50SShibin Koikkara Reeny 
21268a324b1cSMaryam Tahhan 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_USE_PINNED_MAP_ARG,
21278a324b1cSMaryam Tahhan 				 &parse_integer_arg, use_pinned_map);
21288a324b1cSMaryam Tahhan 	if (ret < 0)
21298a324b1cSMaryam Tahhan 		goto free_kvlist;
21308a324b1cSMaryam Tahhan 
21319c132373SMaryam Tahhan 	ret = rte_kvargs_process(kvlist, ETH_AF_XDP_DP_PATH_ARG,
21329c132373SMaryam Tahhan 				 &parse_prog_arg, dp_path);
21339c132373SMaryam Tahhan 	if (ret < 0)
21349c132373SMaryam Tahhan 		goto free_kvlist;
21359c132373SMaryam Tahhan 
2136f1debd77SXiaolong Ye free_kvlist:
2137f1debd77SXiaolong Ye 	rte_kvargs_free(kvlist);
2138f1debd77SXiaolong Ye 	return ret;
2139f1debd77SXiaolong Ye }
2140f1debd77SXiaolong Ye 
2141f1debd77SXiaolong Ye static int
2142f1debd77SXiaolong Ye get_iface_info(const char *if_name,
21436d13ea8eSOlivier Matz 	       struct rte_ether_addr *eth_addr,
2144f1debd77SXiaolong Ye 	       int *if_index)
2145f1debd77SXiaolong Ye {
2146f1debd77SXiaolong Ye 	struct ifreq ifr;
2147f1debd77SXiaolong Ye 	int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
2148f1debd77SXiaolong Ye 
2149f1debd77SXiaolong Ye 	if (sock < 0)
2150f1debd77SXiaolong Ye 		return -1;
2151f1debd77SXiaolong Ye 
2152f1debd77SXiaolong Ye 	strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
2153f1debd77SXiaolong Ye 	if (ioctl(sock, SIOCGIFINDEX, &ifr))
2154f1debd77SXiaolong Ye 		goto error;
2155f1debd77SXiaolong Ye 
2156f1debd77SXiaolong Ye 	*if_index = ifr.ifr_ifindex;
2157f1debd77SXiaolong Ye 
2158f1debd77SXiaolong Ye 	if (ioctl(sock, SIOCGIFHWADDR, &ifr))
2159f1debd77SXiaolong Ye 		goto error;
2160f1debd77SXiaolong Ye 
216135b2d13fSOlivier Matz 	rte_memcpy(eth_addr, ifr.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
2162f1debd77SXiaolong Ye 
2163f1debd77SXiaolong Ye 	close(sock);
2164f1debd77SXiaolong Ye 	return 0;
2165f1debd77SXiaolong Ye 
2166f1debd77SXiaolong Ye error:
2167f1debd77SXiaolong Ye 	close(sock);
2168f1debd77SXiaolong Ye 	return -1;
2169f1debd77SXiaolong Ye }
2170f1debd77SXiaolong Ye 
2171f1debd77SXiaolong Ye static struct rte_eth_dev *
2172339b88c6SXiaolong Ye init_internals(struct rte_vdev_device *dev, const char *if_name,
2173288a85aeSCiara Loftus 	       int start_queue_idx, int queue_cnt, int shared_umem,
21747fc6ae50SShibin Koikkara Reeny 	       const char *prog_path, int busy_budget, int force_copy,
21758a324b1cSMaryam Tahhan 	       int use_cni, int use_pinned_map, const char *dp_path)
2176f1debd77SXiaolong Ye {
2177f1debd77SXiaolong Ye 	const char *name = rte_vdev_device_name(dev);
2178f1debd77SXiaolong Ye 	const unsigned int numa_node = dev->device.numa_node;
21799876cf83SCiara Loftus 	struct pmd_process_private *process_private;
2180f1debd77SXiaolong Ye 	struct pmd_internals *internals;
2181f1debd77SXiaolong Ye 	struct rte_eth_dev *eth_dev;
2182f1debd77SXiaolong Ye 	int ret;
2183f1debd77SXiaolong Ye 	int i;
2184f1debd77SXiaolong Ye 
2185f1debd77SXiaolong Ye 	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
2186f1debd77SXiaolong Ye 	if (internals == NULL)
2187f1debd77SXiaolong Ye 		return NULL;
2188f1debd77SXiaolong Ye 
2189339b88c6SXiaolong Ye 	internals->start_queue_idx = start_queue_idx;
2190339b88c6SXiaolong Ye 	internals->queue_cnt = queue_cnt;
2191f1debd77SXiaolong Ye 	strlcpy(internals->if_name, if_name, IFNAMSIZ);
2192288a85aeSCiara Loftus 	strlcpy(internals->prog_path, prog_path, PATH_MAX);
2193288a85aeSCiara Loftus 	internals->custom_prog_configured = 0;
2194f1debd77SXiaolong Ye 
219574b46340SCiara Loftus #ifndef ETH_AF_XDP_SHARED_UMEM
219674b46340SCiara Loftus 	if (shared_umem) {
2197e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Shared UMEM feature not available. "
2198e99981afSDavid Marchand 				"Check kernel and libbpf version");
219974b46340SCiara Loftus 		goto err_free_internals;
220074b46340SCiara Loftus 	}
220174b46340SCiara Loftus #endif
220274b46340SCiara Loftus 	internals->shared_umem = shared_umem;
2203b275e298SXiaoyun Li 	internals->force_copy = force_copy;
22047fc6ae50SShibin Koikkara Reeny 	internals->use_cni = use_cni;
22058a324b1cSMaryam Tahhan 	internals->use_pinned_map = use_pinned_map;
22069c132373SMaryam Tahhan 	strlcpy(internals->dp_path, dp_path, PATH_MAX);
220774b46340SCiara Loftus 
2208339b88c6SXiaolong Ye 	if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
2209339b88c6SXiaolong Ye 				  &internals->combined_queue_cnt)) {
2210e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to get channel info of interface: %s",
2211339b88c6SXiaolong Ye 				if_name);
2212339b88c6SXiaolong Ye 		goto err_free_internals;
2213339b88c6SXiaolong Ye 	}
2214339b88c6SXiaolong Ye 
2215339b88c6SXiaolong Ye 	if (queue_cnt > internals->combined_queue_cnt) {
2216e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Specified queue count %d is larger than combined queue count %d.",
2217339b88c6SXiaolong Ye 				queue_cnt, internals->combined_queue_cnt);
2218339b88c6SXiaolong Ye 		goto err_free_internals;
2219339b88c6SXiaolong Ye 	}
2220339b88c6SXiaolong Ye 
2221339b88c6SXiaolong Ye 	internals->rx_queues = rte_zmalloc_socket(NULL,
2222339b88c6SXiaolong Ye 					sizeof(struct pkt_rx_queue) * queue_cnt,
2223339b88c6SXiaolong Ye 					0, numa_node);
2224339b88c6SXiaolong Ye 	if (internals->rx_queues == NULL) {
2225e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to allocate memory for rx queues.");
2226339b88c6SXiaolong Ye 		goto err_free_internals;
2227339b88c6SXiaolong Ye 	}
2228339b88c6SXiaolong Ye 
2229339b88c6SXiaolong Ye 	internals->tx_queues = rte_zmalloc_socket(NULL,
2230339b88c6SXiaolong Ye 					sizeof(struct pkt_tx_queue) * queue_cnt,
2231339b88c6SXiaolong Ye 					0, numa_node);
2232339b88c6SXiaolong Ye 	if (internals->tx_queues == NULL) {
2233e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to allocate memory for tx queues.");
2234339b88c6SXiaolong Ye 		goto err_free_rx;
2235339b88c6SXiaolong Ye 	}
2236339b88c6SXiaolong Ye 	for (i = 0; i < queue_cnt; i++) {
2237f1debd77SXiaolong Ye 		internals->tx_queues[i].pair = &internals->rx_queues[i];
2238f1debd77SXiaolong Ye 		internals->rx_queues[i].pair = &internals->tx_queues[i];
2239339b88c6SXiaolong Ye 		internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i;
2240339b88c6SXiaolong Ye 		internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i;
2241055a3936SCiara Loftus 		internals->rx_queues[i].busy_budget = busy_budget;
2242f1debd77SXiaolong Ye 	}
2243f1debd77SXiaolong Ye 
2244f1debd77SXiaolong Ye 	ret = get_iface_info(if_name, &internals->eth_addr,
2245f1debd77SXiaolong Ye 			     &internals->if_index);
2246f1debd77SXiaolong Ye 	if (ret)
2247339b88c6SXiaolong Ye 		goto err_free_tx;
2248f1debd77SXiaolong Ye 
22499876cf83SCiara Loftus 	process_private = (struct pmd_process_private *)
22509876cf83SCiara Loftus 		rte_zmalloc_socket(name, sizeof(struct pmd_process_private),
22519876cf83SCiara Loftus 				   RTE_CACHE_LINE_SIZE, numa_node);
22529876cf83SCiara Loftus 	if (process_private == NULL) {
2253e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to alloc memory for process private");
22549876cf83SCiara Loftus 		goto err_free_tx;
22559876cf83SCiara Loftus 	}
22569876cf83SCiara Loftus 
2257f1debd77SXiaolong Ye 	eth_dev = rte_eth_vdev_allocate(dev, 0);
2258f1debd77SXiaolong Ye 	if (eth_dev == NULL)
22599876cf83SCiara Loftus 		goto err_free_pp;
2260f1debd77SXiaolong Ye 
2261f1debd77SXiaolong Ye 	eth_dev->data->dev_private = internals;
2262f1debd77SXiaolong Ye 	eth_dev->data->dev_link = pmd_link;
2263f1debd77SXiaolong Ye 	eth_dev->data->mac_addrs = &internals->eth_addr;
2264f30e69b4SFerruh Yigit 	eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
22658a324b1cSMaryam Tahhan 	if (!internals->use_cni && !internals->use_pinned_map)
2266f1debd77SXiaolong Ye 		eth_dev->dev_ops = &ops;
22677fc6ae50SShibin Koikkara Reeny 	else
22689c132373SMaryam Tahhan 		eth_dev->dev_ops = &ops_afxdp_dp;
22697fc6ae50SShibin Koikkara Reeny 
2270f1debd77SXiaolong Ye 	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
2271f1debd77SXiaolong Ye 	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
22729876cf83SCiara Loftus 	eth_dev->process_private = process_private;
22739876cf83SCiara Loftus 
22749876cf83SCiara Loftus 	for (i = 0; i < queue_cnt; i++)
22759876cf83SCiara Loftus 		process_private->rxq_xsk_fds[i] = -1;
2276f1debd77SXiaolong Ye 
2277d8a21077SCiara Loftus #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
2278e99981afSDavid Marchand 	AF_XDP_LOG_LINE(INFO, "Zero copy between umem and mbuf enabled.");
2279d8a21077SCiara Loftus #endif
2280e9ff8bb7SXiaolong Ye 
2281f1debd77SXiaolong Ye 	return eth_dev;
2282f1debd77SXiaolong Ye 
22839876cf83SCiara Loftus err_free_pp:
22849876cf83SCiara Loftus 	rte_free(process_private);
2285339b88c6SXiaolong Ye err_free_tx:
2286339b88c6SXiaolong Ye 	rte_free(internals->tx_queues);
2287339b88c6SXiaolong Ye err_free_rx:
2288339b88c6SXiaolong Ye 	rte_free(internals->rx_queues);
2289339b88c6SXiaolong Ye err_free_internals:
2290f1debd77SXiaolong Ye 	rte_free(internals);
2291f1debd77SXiaolong Ye 	return NULL;
2292f1debd77SXiaolong Ye }
2293f1debd77SXiaolong Ye 
22949876cf83SCiara Loftus /* Secondary process requests rxq fds from primary. */
22959876cf83SCiara Loftus static int
22969876cf83SCiara Loftus afxdp_mp_request_fds(const char *name, struct rte_eth_dev *dev)
22979876cf83SCiara Loftus {
22989876cf83SCiara Loftus 	struct pmd_process_private *process_private = dev->process_private;
22999876cf83SCiara Loftus 	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
23009876cf83SCiara Loftus 	struct rte_mp_msg request, *reply;
23019876cf83SCiara Loftus 	struct rte_mp_reply replies;
23029876cf83SCiara Loftus 	struct ipc_hdr *request_param = (struct ipc_hdr *)request.param;
23039876cf83SCiara Loftus 	int i, ret;
23049876cf83SCiara Loftus 
23059876cf83SCiara Loftus 	/* Prepare the request */
23069876cf83SCiara Loftus 	memset(&request, 0, sizeof(request));
23079876cf83SCiara Loftus 	strlcpy(request.name, ETH_AF_XDP_MP_KEY, sizeof(request.name));
23089876cf83SCiara Loftus 	strlcpy(request_param->port_name, name,
23099876cf83SCiara Loftus 		sizeof(request_param->port_name));
23109876cf83SCiara Loftus 	request.len_param = sizeof(*request_param);
23119876cf83SCiara Loftus 
23129876cf83SCiara Loftus 	/* Send the request and receive the reply */
2313e99981afSDavid Marchand 	AF_XDP_LOG_LINE(DEBUG, "Sending multi-process IPC request for %s", name);
23149876cf83SCiara Loftus 	ret = rte_mp_request_sync(&request, &replies, &timeout);
23159876cf83SCiara Loftus 	if (ret < 0 || replies.nb_received != 1) {
2316e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to request fds from primary: %d",
23179876cf83SCiara Loftus 			   rte_errno);
23189876cf83SCiara Loftus 		return -1;
23199876cf83SCiara Loftus 	}
23209876cf83SCiara Loftus 	reply = replies.msgs;
2321e99981afSDavid Marchand 	AF_XDP_LOG_LINE(DEBUG, "Received multi-process IPC reply for %s", name);
23229876cf83SCiara Loftus 	if (dev->data->nb_rx_queues != reply->num_fds) {
2323e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Incorrect number of fds received: %d != %d",
23249876cf83SCiara Loftus 			   reply->num_fds, dev->data->nb_rx_queues);
23259876cf83SCiara Loftus 		return -EINVAL;
23269876cf83SCiara Loftus 	}
23279876cf83SCiara Loftus 
23289876cf83SCiara Loftus 	for (i = 0; i < reply->num_fds; i++)
23299876cf83SCiara Loftus 		process_private->rxq_xsk_fds[i] = reply->fds[i];
23309876cf83SCiara Loftus 
23319876cf83SCiara Loftus 	free(reply);
23329876cf83SCiara Loftus 	return 0;
23339876cf83SCiara Loftus }
23349876cf83SCiara Loftus 
23359876cf83SCiara Loftus /* Primary process sends rxq fds to secondary. */
23369876cf83SCiara Loftus static int
23379876cf83SCiara Loftus afxdp_mp_send_fds(const struct rte_mp_msg *request, const void *peer)
23389876cf83SCiara Loftus {
23399876cf83SCiara Loftus 	struct rte_eth_dev *dev;
23409876cf83SCiara Loftus 	struct pmd_process_private *process_private;
23419876cf83SCiara Loftus 	struct rte_mp_msg reply;
23429876cf83SCiara Loftus 	const struct ipc_hdr *request_param =
23439876cf83SCiara Loftus 		(const struct ipc_hdr *)request->param;
23449876cf83SCiara Loftus 	struct ipc_hdr *reply_param =
23459876cf83SCiara Loftus 		(struct ipc_hdr *)reply.param;
23469876cf83SCiara Loftus 	const char *request_name = request_param->port_name;
23479876cf83SCiara Loftus 	int i;
23489876cf83SCiara Loftus 
2349e99981afSDavid Marchand 	AF_XDP_LOG_LINE(DEBUG, "Received multi-process IPC request for %s",
23509876cf83SCiara Loftus 		   request_name);
23519876cf83SCiara Loftus 
23529876cf83SCiara Loftus 	/* Find the requested port */
23539876cf83SCiara Loftus 	dev = rte_eth_dev_get_by_name(request_name);
23549876cf83SCiara Loftus 	if (!dev) {
2355e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to get port id for %s", request_name);
23569876cf83SCiara Loftus 		return -1;
23579876cf83SCiara Loftus 	}
23589876cf83SCiara Loftus 	process_private = dev->process_private;
23599876cf83SCiara Loftus 
23609876cf83SCiara Loftus 	/* Populate the reply with the xsk fd for each queue */
23619876cf83SCiara Loftus 	reply.num_fds = 0;
23629876cf83SCiara Loftus 	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
2363e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Number of rx queues (%d) exceeds max number of fds (%d)",
23649876cf83SCiara Loftus 			   dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
23659876cf83SCiara Loftus 		return -EINVAL;
23669876cf83SCiara Loftus 	}
23679876cf83SCiara Loftus 
23689876cf83SCiara Loftus 	for (i = 0; i < dev->data->nb_rx_queues; i++)
23699876cf83SCiara Loftus 		reply.fds[reply.num_fds++] = process_private->rxq_xsk_fds[i];
23709876cf83SCiara Loftus 
23719876cf83SCiara Loftus 	/* Send the reply */
23729876cf83SCiara Loftus 	strlcpy(reply.name, request->name, sizeof(reply.name));
23739876cf83SCiara Loftus 	strlcpy(reply_param->port_name, request_name,
23749876cf83SCiara Loftus 		sizeof(reply_param->port_name));
23759876cf83SCiara Loftus 	reply.len_param = sizeof(*reply_param);
2376e99981afSDavid Marchand 	AF_XDP_LOG_LINE(DEBUG, "Sending multi-process IPC reply for %s",
23779876cf83SCiara Loftus 		   reply_param->port_name);
23789876cf83SCiara Loftus 	if (rte_mp_reply(&reply, peer) < 0) {
2379e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to reply to multi-process IPC request");
23809876cf83SCiara Loftus 		return -1;
23819876cf83SCiara Loftus 	}
23829876cf83SCiara Loftus 	return 0;
23839876cf83SCiara Loftus }
23849876cf83SCiara Loftus 
2385f1debd77SXiaolong Ye static int
2386f1debd77SXiaolong Ye rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
2387f1debd77SXiaolong Ye {
2388f1debd77SXiaolong Ye 	struct rte_kvargs *kvlist;
2389f1debd77SXiaolong Ye 	char if_name[IFNAMSIZ] = {'\0'};
2390339b88c6SXiaolong Ye 	int xsk_start_queue_idx = ETH_AF_XDP_DFLT_START_QUEUE_IDX;
2391339b88c6SXiaolong Ye 	int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
239274b46340SCiara Loftus 	int shared_umem = 0;
2393288a85aeSCiara Loftus 	char prog_path[PATH_MAX] = {'\0'};
23949876cf83SCiara Loftus 	int busy_budget = -1, ret;
2395b275e298SXiaoyun Li 	int force_copy = 0;
23967fc6ae50SShibin Koikkara Reeny 	int use_cni = 0;
23978a324b1cSMaryam Tahhan 	int use_pinned_map = 0;
23989c132373SMaryam Tahhan 	char dp_path[PATH_MAX] = {'\0'};
2399f1debd77SXiaolong Ye 	struct rte_eth_dev *eth_dev = NULL;
24009876cf83SCiara Loftus 	const char *name = rte_vdev_device_name(dev);
2401f1debd77SXiaolong Ye 
2402e99981afSDavid Marchand 	AF_XDP_LOG_LINE(INFO, "Initializing pmd_af_xdp for %s", name);
2403f1debd77SXiaolong Ye 
24040668d829SCiara Loftus 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
24059876cf83SCiara Loftus 		eth_dev = rte_eth_dev_attach_secondary(name);
24069876cf83SCiara Loftus 		if (eth_dev == NULL) {
2407e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "Failed to probe %s", name);
24089876cf83SCiara Loftus 			return -EINVAL;
24099876cf83SCiara Loftus 		}
24109876cf83SCiara Loftus 		eth_dev->dev_ops = &ops;
24119876cf83SCiara Loftus 		eth_dev->device = &dev->device;
2412a41f593fSFerruh Yigit 		eth_dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
2413a41f593fSFerruh Yigit 		eth_dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
24149876cf83SCiara Loftus 		eth_dev->process_private = (struct pmd_process_private *)
24159876cf83SCiara Loftus 			rte_zmalloc_socket(name,
24169876cf83SCiara Loftus 					   sizeof(struct pmd_process_private),
24179876cf83SCiara Loftus 					   RTE_CACHE_LINE_SIZE,
24189876cf83SCiara Loftus 					   eth_dev->device->numa_node);
24199876cf83SCiara Loftus 		if (eth_dev->process_private == NULL) {
2420e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR,
2421e99981afSDavid Marchand 				"Failed to alloc memory for process private");
24229876cf83SCiara Loftus 			return -ENOMEM;
24239876cf83SCiara Loftus 		}
24249876cf83SCiara Loftus 
24259876cf83SCiara Loftus 		/* Obtain the xsk fds from the primary process. */
24269876cf83SCiara Loftus 		if (afxdp_mp_request_fds(name, eth_dev))
24279876cf83SCiara Loftus 			return -1;
24289876cf83SCiara Loftus 
24299876cf83SCiara Loftus 		rte_eth_dev_probing_finish(eth_dev);
24309876cf83SCiara Loftus 		return 0;
2431f1debd77SXiaolong Ye 	}
2432f1debd77SXiaolong Ye 
2433f1debd77SXiaolong Ye 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
2434f1debd77SXiaolong Ye 	if (kvlist == NULL) {
2435e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Invalid kvargs key");
2436f1debd77SXiaolong Ye 		return -EINVAL;
2437f1debd77SXiaolong Ye 	}
2438f1debd77SXiaolong Ye 
2439339b88c6SXiaolong Ye 	if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
2440055a3936SCiara Loftus 			     &xsk_queue_cnt, &shared_umem, prog_path,
24418a324b1cSMaryam Tahhan 			     &busy_budget, &force_copy, &use_cni, &use_pinned_map,
24428a324b1cSMaryam Tahhan 			     dp_path) < 0) {
2443e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Invalid kvargs value");
2444f1debd77SXiaolong Ye 		return -EINVAL;
2445f1debd77SXiaolong Ye 	}
2446f1debd77SXiaolong Ye 
24478a324b1cSMaryam Tahhan 	if (use_cni && use_pinned_map) {
2448e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "When '%s' parameter is used, '%s' parameter is not valid",
24498a324b1cSMaryam Tahhan 			ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_USE_PINNED_MAP_ARG);
24507fc6ae50SShibin Koikkara Reeny 		return -EINVAL;
24517fc6ae50SShibin Koikkara Reeny 	}
24527fc6ae50SShibin Koikkara Reeny 
24538a324b1cSMaryam Tahhan 	if ((use_cni || use_pinned_map) && busy_budget > 0) {
2454e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "When '%s' or '%s' parameter is used, '%s' parameter is not valid",
24558a324b1cSMaryam Tahhan 			ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_USE_PINNED_MAP_ARG,
24568a324b1cSMaryam Tahhan 			ETH_AF_XDP_BUDGET_ARG);
24578a324b1cSMaryam Tahhan 		return -EINVAL;
24588a324b1cSMaryam Tahhan 	}
24598a324b1cSMaryam Tahhan 
24608a324b1cSMaryam Tahhan 	if ((use_cni || use_pinned_map) && strnlen(prog_path, PATH_MAX)) {
2461e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "When '%s' or '%s' parameter is used, '%s' parameter is not valid",
24628a324b1cSMaryam Tahhan 			ETH_AF_XDP_USE_CNI_ARG, ETH_AF_XDP_USE_PINNED_MAP_ARG,
24638a324b1cSMaryam Tahhan 			ETH_AF_XDP_PROG_ARG);
24647fc6ae50SShibin Koikkara Reeny 		return -EINVAL;
24657fc6ae50SShibin Koikkara Reeny 	}
24667fc6ae50SShibin Koikkara Reeny 
24679c132373SMaryam Tahhan 	if (use_cni && !strnlen(dp_path, PATH_MAX)) {
24689c132373SMaryam Tahhan 		snprintf(dp_path, sizeof(dp_path), "%s/%s/%s", DP_BASE_PATH, if_name, DP_UDS_SOCK);
2469e99981afSDavid Marchand 		AF_XDP_LOG_LINE(INFO, "'%s' parameter not provided, setting value to '%s'",
24709c132373SMaryam Tahhan 			ETH_AF_XDP_DP_PATH_ARG, dp_path);
24719c132373SMaryam Tahhan 	}
24729c132373SMaryam Tahhan 
24738a324b1cSMaryam Tahhan 	if (use_pinned_map && !strnlen(dp_path, PATH_MAX)) {
24748a324b1cSMaryam Tahhan 		snprintf(dp_path, sizeof(dp_path), "%s/%s/%s", DP_BASE_PATH, if_name, DP_XSK_MAP);
2475e99981afSDavid Marchand 		AF_XDP_LOG_LINE(INFO, "'%s' parameter not provided, setting value to '%s'",
24768a324b1cSMaryam Tahhan 			ETH_AF_XDP_DP_PATH_ARG, dp_path);
24778a324b1cSMaryam Tahhan 	}
24788a324b1cSMaryam Tahhan 
24798a324b1cSMaryam Tahhan 	if ((!use_cni && !use_pinned_map) && strnlen(dp_path, PATH_MAX)) {
2480e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "'%s' parameter is set, but '%s' or '%s' were not enabled",
24818a324b1cSMaryam Tahhan 			ETH_AF_XDP_DP_PATH_ARG, ETH_AF_XDP_USE_CNI_ARG,
24828a324b1cSMaryam Tahhan 			ETH_AF_XDP_USE_PINNED_MAP_ARG);
24839c132373SMaryam Tahhan 		return -EINVAL;
24849c132373SMaryam Tahhan 	}
24859c132373SMaryam Tahhan 
2486f1debd77SXiaolong Ye 	if (strlen(if_name) == 0) {
2487e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Network interface must be specified");
2488f1debd77SXiaolong Ye 		return -EINVAL;
2489f1debd77SXiaolong Ye 	}
2490f1debd77SXiaolong Ye 
24913d28387cSFrank Du 	/* get numa node id from net sysfs */
24923d28387cSFrank Du 	if (dev->device.numa_node == SOCKET_ID_ANY) {
24933d28387cSFrank Du 		unsigned long numa = 0;
24943d28387cSFrank Du 		char numa_path[PATH_MAX];
24953d28387cSFrank Du 
24963d28387cSFrank Du 		snprintf(numa_path, sizeof(numa_path), "/sys/class/net/%s/device/numa_node",
24973d28387cSFrank Du 			 if_name);
2498140bc789SDavid Marchand 		if (access(numa_path, R_OK) != 0 || eal_parse_sysfs_value(numa_path, &numa) != 0)
24993d28387cSFrank Du 			dev->device.numa_node = rte_socket_id();
25003d28387cSFrank Du 		else
25013d28387cSFrank Du 			dev->device.numa_node = numa;
25023d28387cSFrank Du 	}
25033d28387cSFrank Du 
2504055a3936SCiara Loftus 	busy_budget = busy_budget == -1 ? ETH_AF_XDP_DFLT_BUSY_BUDGET :
2505055a3936SCiara Loftus 					busy_budget;
2506055a3936SCiara Loftus 
2507339b88c6SXiaolong Ye 	eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
2508055a3936SCiara Loftus 				 xsk_queue_cnt, shared_umem, prog_path,
25098a324b1cSMaryam Tahhan 				 busy_budget, force_copy, use_cni, use_pinned_map,
25108a324b1cSMaryam Tahhan 				 dp_path);
2511f1debd77SXiaolong Ye 	if (eth_dev == NULL) {
2512e99981afSDavid Marchand 		AF_XDP_LOG_LINE(ERR, "Failed to init internals");
2513f1debd77SXiaolong Ye 		return -1;
2514f1debd77SXiaolong Ye 	}
2515f1debd77SXiaolong Ye 
25169876cf83SCiara Loftus 	/* Register IPC callback which shares xsk fds from primary to secondary */
25179876cf83SCiara Loftus 	if (!afxdp_dev_count) {
25189876cf83SCiara Loftus 		ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, afxdp_mp_send_fds);
251917ec9678SJunxiao Shi 		if (ret < 0 && rte_errno != ENOTSUP) {
2520e99981afSDavid Marchand 			AF_XDP_LOG_LINE(ERR, "%s: Failed to register multi-process IPC callback: %s",
25219876cf83SCiara Loftus 				   name, strerror(rte_errno));
25229876cf83SCiara Loftus 			return -1;
25239876cf83SCiara Loftus 		}
25249876cf83SCiara Loftus 	}
25259876cf83SCiara Loftus 	afxdp_dev_count++;
25269876cf83SCiara Loftus 
2527f1debd77SXiaolong Ye 	rte_eth_dev_probing_finish(eth_dev);
2528f1debd77SXiaolong Ye 
2529f1debd77SXiaolong Ye 	return 0;
2530f1debd77SXiaolong Ye }
2531f1debd77SXiaolong Ye 
2532f1debd77SXiaolong Ye static int
2533f1debd77SXiaolong Ye rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
2534f1debd77SXiaolong Ye {
2535f1debd77SXiaolong Ye 	struct rte_eth_dev *eth_dev = NULL;
2536f1debd77SXiaolong Ye 
2537e99981afSDavid Marchand 	AF_XDP_LOG_LINE(INFO, "Removing AF_XDP ethdev on numa socket %u",
2538f1debd77SXiaolong Ye 		rte_socket_id());
2539f1debd77SXiaolong Ye 
2540f1debd77SXiaolong Ye 	if (dev == NULL)
2541f1debd77SXiaolong Ye 		return -1;
2542f1debd77SXiaolong Ye 
2543f1debd77SXiaolong Ye 	/* find the ethdev entry */
2544f1debd77SXiaolong Ye 	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
2545f1debd77SXiaolong Ye 	if (eth_dev == NULL)
2546bf2644cdSWilliam Tu 		return 0;
2547f1debd77SXiaolong Ye 
2548f0ce7af0SXiaolong Ye 	eth_dev_close(eth_dev);
25499876cf83SCiara Loftus 	if (afxdp_dev_count == 1)
25509876cf83SCiara Loftus 		rte_mp_action_unregister(ETH_AF_XDP_MP_KEY);
25519876cf83SCiara Loftus 	afxdp_dev_count--;
2552f1debd77SXiaolong Ye 	rte_eth_dev_release_port(eth_dev);
2553f1debd77SXiaolong Ye 
2554f1debd77SXiaolong Ye 	return 0;
2555f1debd77SXiaolong Ye }
2556f1debd77SXiaolong Ye 
2557f1debd77SXiaolong Ye static struct rte_vdev_driver pmd_af_xdp_drv = {
2558f1debd77SXiaolong Ye 	.probe = rte_pmd_af_xdp_probe,
2559f1debd77SXiaolong Ye 	.remove = rte_pmd_af_xdp_remove,
2560f1debd77SXiaolong Ye };
2561f1debd77SXiaolong Ye 
2562f1debd77SXiaolong Ye RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
2563f1debd77SXiaolong Ye RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
2564f1debd77SXiaolong Ye 			      "iface=<string> "
2565339b88c6SXiaolong Ye 			      "start_queue=<int> "
256674b46340SCiara Loftus 			      "queue_count=<int> "
2567288a85aeSCiara Loftus 			      "shared_umem=<int> "
2568055a3936SCiara Loftus 			      "xdp_prog=<string> "
2569b275e298SXiaoyun Li 			      "busy_budget=<int> "
25707fc6ae50SShibin Koikkara Reeny 			      "force_copy=<int> "
25719c132373SMaryam Tahhan 			      "use_cni=<int> "
25728a324b1cSMaryam Tahhan 			      "use_pinned_map=<int> "
25739c132373SMaryam Tahhan 			      "dp_path=<string> ");
2574