xref: /dpdk/drivers/net/af_packet/rte_eth_af_packet.c (revision 6b3246245507e5257a17c1684daed27d8a82fc7a)
105e34de2SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
2df65e967SBruce Richardson  * Copyright(c) 2014 John W. Linville <linville@tuxdriver.com>
3df65e967SBruce Richardson  * Originally based upon librte_pmd_pcap code:
405e34de2SOlivier Matz  * Copyright(c) 2010-2015 Intel Corporation.
5df65e967SBruce Richardson  * Copyright(c) 2014 6WIND S.A.
6df65e967SBruce Richardson  * All rights reserved.
7df65e967SBruce Richardson  */
8df65e967SBruce Richardson 
9c7a2ce0cSMattias Rönnblom #include <rte_common.h>
106723c0fcSBruce Richardson #include <rte_string_fns.h>
11df65e967SBruce Richardson #include <rte_mbuf.h>
12df96fd0dSBruce Richardson #include <ethdev_driver.h>
13df96fd0dSBruce Richardson #include <ethdev_vdev.h>
14df65e967SBruce Richardson #include <rte_malloc.h>
15df65e967SBruce Richardson #include <rte_kvargs.h>
164851ef2bSDavid Marchand #include <bus_vdev_driver.h>
17df65e967SBruce Richardson 
1889178d62SKrzysztof Kanas #include <errno.h>
19df65e967SBruce Richardson #include <linux/if_ether.h>
20df65e967SBruce Richardson #include <linux/if_packet.h>
21df65e967SBruce Richardson #include <arpa/inet.h>
22df65e967SBruce Richardson #include <net/if.h>
23c524527dSStephen Hemminger #include <net/if_arp.h>
24df65e967SBruce Richardson #include <sys/types.h>
25df65e967SBruce Richardson #include <sys/socket.h>
26df65e967SBruce Richardson #include <sys/ioctl.h>
2772b452c5SDmitry Kozlyuk #include <stdlib.h>
2889178d62SKrzysztof Kanas #include <string.h>
29df65e967SBruce Richardson #include <sys/mman.h>
30df65e967SBruce Richardson #include <unistd.h>
31df65e967SBruce Richardson #include <poll.h>
32df65e967SBruce Richardson 
33df65e967SBruce Richardson #define ETH_AF_PACKET_IFACE_ARG		"iface"
34df65e967SBruce Richardson #define ETH_AF_PACKET_NUM_Q_ARG		"qpairs"
35df65e967SBruce Richardson #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
36df65e967SBruce Richardson #define ETH_AF_PACKET_FRAMESIZE_ARG	"framesz"
37df65e967SBruce Richardson #define ETH_AF_PACKET_FRAMECOUNT_ARG	"framecnt"
38d97de5b5SChas Williams #define ETH_AF_PACKET_QDISC_BYPASS_ARG	"qdisc_bypass"
39d3bc77abSTudor Cornea #define ETH_AF_PACKET_FANOUT_MODE_ARG	"fanout_mode"
40df65e967SBruce Richardson 
41df65e967SBruce Richardson #define DFLT_FRAME_SIZE		(1 << 11)
42df65e967SBruce Richardson #define DFLT_FRAME_COUNT	(1 << 9)
43df65e967SBruce Richardson 
44be10211cSStefan Laesser static uint64_t timestamp_dynflag;
45be10211cSStefan Laesser static int timestamp_dynfield_offset = -1;
46be10211cSStefan Laesser 
47c7a2ce0cSMattias Rönnblom struct __rte_cache_aligned pkt_rx_queue {
48df65e967SBruce Richardson 	int sockfd;
49df65e967SBruce Richardson 
50df65e967SBruce Richardson 	struct iovec *rd;
51df65e967SBruce Richardson 	uint8_t *map;
52df65e967SBruce Richardson 	unsigned int framecount;
53df65e967SBruce Richardson 	unsigned int framenum;
54df65e967SBruce Richardson 
55df65e967SBruce Richardson 	struct rte_mempool *mb_pool;
56f8244c63SZhiyong Yang 	uint16_t in_port;
57d41d39bcSTudor Cornea 	uint8_t vlan_strip;
58be10211cSStefan Laesser 	uint8_t timestamp_offloading;
59df65e967SBruce Richardson 
60df65e967SBruce Richardson 	volatile unsigned long rx_pkts;
61330f11adSRich Lane 	volatile unsigned long rx_bytes;
62*6b324624SStefan Laesser 	volatile unsigned long rx_nombuf;
63*6b324624SStefan Laesser 	volatile unsigned long rx_dropped_pkts;
64df65e967SBruce Richardson };
65df65e967SBruce Richardson 
66c7a2ce0cSMattias Rönnblom struct __rte_cache_aligned pkt_tx_queue {
67df65e967SBruce Richardson 	int sockfd;
68f4ff17a4SMichał Mirosław 	unsigned int frame_data_size;
69df65e967SBruce Richardson 
70df65e967SBruce Richardson 	struct iovec *rd;
71df65e967SBruce Richardson 	uint8_t *map;
72df65e967SBruce Richardson 	unsigned int framecount;
73df65e967SBruce Richardson 	unsigned int framenum;
74df65e967SBruce Richardson 
75df65e967SBruce Richardson 	volatile unsigned long tx_pkts;
76df65e967SBruce Richardson 	volatile unsigned long err_pkts;
77330f11adSRich Lane 	volatile unsigned long tx_bytes;
78df65e967SBruce Richardson };
79df65e967SBruce Richardson 
80df65e967SBruce Richardson struct pmd_internals {
81df65e967SBruce Richardson 	unsigned nb_queues;
82df65e967SBruce Richardson 
83df65e967SBruce Richardson 	int if_index;
841b93c2aaSChas Williams 	char *if_name;
856d13ea8eSOlivier Matz 	struct rte_ether_addr eth_addr;
86df65e967SBruce Richardson 
87df65e967SBruce Richardson 	struct tpacket_req req;
88df65e967SBruce Richardson 
89ccd37d34SStephen Hemminger 	struct pkt_rx_queue *rx_queue;
90ccd37d34SStephen Hemminger 	struct pkt_tx_queue *tx_queue;
91d41d39bcSTudor Cornea 	uint8_t vlan_strip;
92be10211cSStefan Laesser 	uint8_t timestamp_offloading;
93df65e967SBruce Richardson };
94df65e967SBruce Richardson 
95df65e967SBruce Richardson static const char *valid_arguments[] = {
96df65e967SBruce Richardson 	ETH_AF_PACKET_IFACE_ARG,
97df65e967SBruce Richardson 	ETH_AF_PACKET_NUM_Q_ARG,
98df65e967SBruce Richardson 	ETH_AF_PACKET_BLOCKSIZE_ARG,
99df65e967SBruce Richardson 	ETH_AF_PACKET_FRAMESIZE_ARG,
100df65e967SBruce Richardson 	ETH_AF_PACKET_FRAMECOUNT_ARG,
101d97de5b5SChas Williams 	ETH_AF_PACKET_QDISC_BYPASS_ARG,
102d3bc77abSTudor Cornea 	ETH_AF_PACKET_FANOUT_MODE_ARG,
103df65e967SBruce Richardson 	NULL
104df65e967SBruce Richardson };
105df65e967SBruce Richardson 
106df65e967SBruce Richardson static struct rte_eth_link pmd_link = {
107295968d1SFerruh Yigit 	.link_speed = RTE_ETH_SPEED_NUM_10G,
108295968d1SFerruh Yigit 	.link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
109295968d1SFerruh Yigit 	.link_status = RTE_ETH_LINK_DOWN,
110295968d1SFerruh Yigit 	.link_autoneg = RTE_ETH_LINK_FIXED,
111df65e967SBruce Richardson };
112df65e967SBruce Richardson 
113eeded204SDavid Marchand RTE_LOG_REGISTER_DEFAULT(af_packet_logtype, NOTICE);
1142b843cacSDavid Marchand #define RTE_LOGTYPE_AFPACKET af_packet_logtype
1158c54a3c6SStephen Hemminger 
1162b843cacSDavid Marchand #define PMD_LOG(level, ...) \
1172b843cacSDavid Marchand 	RTE_LOG_LINE_PREFIX(level, AFPACKET, "%s(): ", __func__, __VA_ARGS__)
1188c54a3c6SStephen Hemminger 
1192b843cacSDavid Marchand #define PMD_LOG_ERRNO(level, fmt, ...) \
1202b843cacSDavid Marchand 	RTE_LOG_LINE(level, AFPACKET, "%s(): " fmt ":%s", __func__, \
1212b843cacSDavid Marchand 		## __VA_ARGS__, strerror(errno))
12289178d62SKrzysztof Kanas 
123df65e967SBruce Richardson static uint16_t
124df65e967SBruce Richardson eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
125df65e967SBruce Richardson {
126df65e967SBruce Richardson 	unsigned i;
127df65e967SBruce Richardson 	struct tpacket2_hdr *ppd;
128df65e967SBruce Richardson 	struct rte_mbuf *mbuf;
129df65e967SBruce Richardson 	uint8_t *pbuf;
130df65e967SBruce Richardson 	struct pkt_rx_queue *pkt_q = queue;
131df65e967SBruce Richardson 	uint16_t num_rx = 0;
132330f11adSRich Lane 	unsigned long num_rx_bytes = 0;
133df65e967SBruce Richardson 	unsigned int framecount, framenum;
134df65e967SBruce Richardson 
135df65e967SBruce Richardson 	if (unlikely(nb_pkts == 0))
136df65e967SBruce Richardson 		return 0;
137df65e967SBruce Richardson 
138df65e967SBruce Richardson 	/*
139df65e967SBruce Richardson 	 * Reads the given number of packets from the AF_PACKET socket one by
140df65e967SBruce Richardson 	 * one and copies the packet data into a newly allocated mbuf.
141df65e967SBruce Richardson 	 */
142df65e967SBruce Richardson 	framecount = pkt_q->framecount;
143df65e967SBruce Richardson 	framenum = pkt_q->framenum;
144df65e967SBruce Richardson 	for (i = 0; i < nb_pkts; i++) {
145df65e967SBruce Richardson 		/* point at the next incoming frame */
146df65e967SBruce Richardson 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
147df65e967SBruce Richardson 		if ((ppd->tp_status & TP_STATUS_USER) == 0)
148df65e967SBruce Richardson 			break;
149df65e967SBruce Richardson 
150df65e967SBruce Richardson 		/* allocate the next mbuf */
151df65e967SBruce Richardson 		mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool);
152*6b324624SStefan Laesser 		if (unlikely(mbuf == NULL)) {
153*6b324624SStefan Laesser 			pkt_q->rx_nombuf++;
154df65e967SBruce Richardson 			break;
155*6b324624SStefan Laesser 		}
156df65e967SBruce Richardson 
157df65e967SBruce Richardson 		/* packet will fit in the mbuf, go ahead and receive it */
158df65e967SBruce Richardson 		rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen;
159df65e967SBruce Richardson 		pbuf = (uint8_t *) ppd + ppd->tp_mac;
160df65e967SBruce Richardson 		memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf));
161df65e967SBruce Richardson 
16223deeebfSChas Williams 		/* check for vlan info */
16323deeebfSChas Williams 		if (ppd->tp_status & TP_STATUS_VLAN_VALID) {
16423deeebfSChas Williams 			mbuf->vlan_tci = ppd->tp_vlan_tci;
165daa02b5cSOlivier Matz 			mbuf->ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
166d41d39bcSTudor Cornea 
167d41d39bcSTudor Cornea 			if (!pkt_q->vlan_strip && rte_vlan_insert(&mbuf))
168d41d39bcSTudor Cornea 				PMD_LOG(ERR, "Failed to reinsert VLAN tag");
16923deeebfSChas Williams 		}
17023deeebfSChas Williams 
171be10211cSStefan Laesser 		/* add kernel provided timestamp when offloading is enabled */
172be10211cSStefan Laesser 		if (pkt_q->timestamp_offloading) {
173be10211cSStefan Laesser 			/* since TPACKET_V2 timestamps are provided in nanoseconds resolution */
174be10211cSStefan Laesser 			*RTE_MBUF_DYNFIELD(mbuf, timestamp_dynfield_offset,
175be10211cSStefan Laesser 				rte_mbuf_timestamp_t *) =
176be10211cSStefan Laesser 					(uint64_t)ppd->tp_sec * 1000000000 + ppd->tp_nsec;
177be10211cSStefan Laesser 
178be10211cSStefan Laesser 			mbuf->ol_flags |= timestamp_dynflag;
179be10211cSStefan Laesser 		}
180be10211cSStefan Laesser 
181df65e967SBruce Richardson 		/* release incoming frame and advance ring buffer */
182df65e967SBruce Richardson 		ppd->tp_status = TP_STATUS_KERNEL;
183df65e967SBruce Richardson 		if (++framenum >= framecount)
184df65e967SBruce Richardson 			framenum = 0;
185e01993bfSPavel Krauz 		mbuf->port = pkt_q->in_port;
186df65e967SBruce Richardson 
187df65e967SBruce Richardson 		/* account for the receive frame */
188df65e967SBruce Richardson 		bufs[i] = mbuf;
189df65e967SBruce Richardson 		num_rx++;
190330f11adSRich Lane 		num_rx_bytes += mbuf->pkt_len;
191df65e967SBruce Richardson 	}
192df65e967SBruce Richardson 	pkt_q->framenum = framenum;
193df65e967SBruce Richardson 	pkt_q->rx_pkts += num_rx;
194330f11adSRich Lane 	pkt_q->rx_bytes += num_rx_bytes;
195df65e967SBruce Richardson 	return num_rx;
196df65e967SBruce Richardson }
197df65e967SBruce Richardson 
198df65e967SBruce Richardson /*
19984b3e455STudor Cornea  * Check if there is an available frame in the ring
20084b3e455STudor Cornea  */
20184b3e455STudor Cornea static inline bool
20284b3e455STudor Cornea tx_ring_status_available(uint32_t tp_status)
20384b3e455STudor Cornea {
20484b3e455STudor Cornea 	/*
20584b3e455STudor Cornea 	 * We eliminate the timestamp status from the packet status.
20684b3e455STudor Cornea 	 * This should only matter if timestamping is enabled on the socket,
20784b3e455STudor Cornea 	 * but there is a bug in the kernel which is fixed in newer releases.
20884b3e455STudor Cornea 	 *
20984b3e455STudor Cornea 	 * See the following kernel commit for reference:
21084b3e455STudor Cornea 	 *     commit 171c3b151118a2fe0fc1e2a9d1b5a1570cfe82d2
21184b3e455STudor Cornea 	 *     net: packetmmap: fix only tx timestamp on request
21284b3e455STudor Cornea 	 */
21384b3e455STudor Cornea 	tp_status &= ~(TP_STATUS_TS_SOFTWARE | TP_STATUS_TS_RAW_HARDWARE);
21484b3e455STudor Cornea 
21584b3e455STudor Cornea 	return tp_status == TP_STATUS_AVAILABLE;
21684b3e455STudor Cornea }
21784b3e455STudor Cornea 
21884b3e455STudor Cornea /*
219df65e967SBruce Richardson  * Callback to handle sending packets through a real NIC.
220df65e967SBruce Richardson  */
221df65e967SBruce Richardson static uint16_t
222df65e967SBruce Richardson eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
223df65e967SBruce Richardson {
224df65e967SBruce Richardson 	struct tpacket2_hdr *ppd;
225df65e967SBruce Richardson 	struct rte_mbuf *mbuf;
226df65e967SBruce Richardson 	uint8_t *pbuf;
227df65e967SBruce Richardson 	unsigned int framecount, framenum;
228df65e967SBruce Richardson 	struct pollfd pfd;
229df65e967SBruce Richardson 	struct pkt_tx_queue *pkt_q = queue;
230df65e967SBruce Richardson 	uint16_t num_tx = 0;
231330f11adSRich Lane 	unsigned long num_tx_bytes = 0;
232df65e967SBruce Richardson 	int i;
233df65e967SBruce Richardson 
234df65e967SBruce Richardson 	if (unlikely(nb_pkts == 0))
235df65e967SBruce Richardson 		return 0;
236df65e967SBruce Richardson 
237df65e967SBruce Richardson 	memset(&pfd, 0, sizeof(pfd));
238df65e967SBruce Richardson 	pfd.fd = pkt_q->sockfd;
239df65e967SBruce Richardson 	pfd.events = POLLOUT;
240df65e967SBruce Richardson 	pfd.revents = 0;
241df65e967SBruce Richardson 
242df65e967SBruce Richardson 	framecount = pkt_q->framecount;
243df65e967SBruce Richardson 	framenum = pkt_q->framenum;
244df65e967SBruce Richardson 	ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
245df65e967SBruce Richardson 	for (i = 0; i < nb_pkts; i++) {
246f4ff17a4SMichał Mirosław 		mbuf = *bufs++;
247f4ff17a4SMichał Mirosław 
248f4ff17a4SMichał Mirosław 		/* drop oversized packets */
249ef6cb930SWenfeng Liu 		if (mbuf->pkt_len > pkt_q->frame_data_size) {
250f4ff17a4SMichał Mirosław 			rte_pktmbuf_free(mbuf);
251f4ff17a4SMichał Mirosław 			continue;
252f4ff17a4SMichał Mirosław 		}
253f4ff17a4SMichał Mirosław 
25423deeebfSChas Williams 		/* insert vlan info if necessary */
255daa02b5cSOlivier Matz 		if (mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) {
25623deeebfSChas Williams 			if (rte_vlan_insert(&mbuf)) {
25723deeebfSChas Williams 				rte_pktmbuf_free(mbuf);
25823deeebfSChas Williams 				continue;
25923deeebfSChas Williams 			}
26023deeebfSChas Williams 		}
26123deeebfSChas Williams 
262df65e967SBruce Richardson 		/* point at the next incoming frame */
263f86d553cSTudor Cornea 		if (!tx_ring_status_available(ppd->tp_status)) {
264f86d553cSTudor Cornea 			if (poll(&pfd, 1, -1) < 0)
265f86d553cSTudor Cornea 				break;
266f86d553cSTudor Cornea 
267f86d553cSTudor Cornea 			/* poll() can return POLLERR if the interface is down */
268f86d553cSTudor Cornea 			if (pfd.revents & POLLERR)
269f86d553cSTudor Cornea 				break;
270f86d553cSTudor Cornea 		}
271f86d553cSTudor Cornea 
272f86d553cSTudor Cornea 		/*
273f86d553cSTudor Cornea 		 * poll() will almost always return POLLOUT, even if there
274f86d553cSTudor Cornea 		 * are no extra buffers available
275f86d553cSTudor Cornea 		 *
276f86d553cSTudor Cornea 		 * This happens, because packet_poll() calls datagram_poll()
277f86d553cSTudor Cornea 		 * which checks the space left in the socket buffer and,
278f86d553cSTudor Cornea 		 * in the case of packet_mmap, the default socket buffer length
279f86d553cSTudor Cornea 		 * doesn't match the requested size for the tx_ring.
280f86d553cSTudor Cornea 		 * As such, there is almost always space left in socket buffer,
281f86d553cSTudor Cornea 		 * which doesn't seem to be correlated to the requested size
282f86d553cSTudor Cornea 		 * for the tx_ring in packet_mmap.
283f86d553cSTudor Cornea 		 *
284f86d553cSTudor Cornea 		 * This results in poll() returning POLLOUT.
285f86d553cSTudor Cornea 		 */
286f86d553cSTudor Cornea 		if (!tx_ring_status_available(ppd->tp_status))
287f4ff17a4SMichał Mirosław 			break;
288df65e967SBruce Richardson 
289df65e967SBruce Richardson 		/* copy the tx frame data */
290df65e967SBruce Richardson 		pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN -
291df65e967SBruce Richardson 			sizeof(struct sockaddr_ll);
292ef6cb930SWenfeng Liu 
293ef6cb930SWenfeng Liu 		struct rte_mbuf *tmp_mbuf = mbuf;
294ef6cb930SWenfeng Liu 		while (tmp_mbuf) {
295ef6cb930SWenfeng Liu 			uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf);
296ef6cb930SWenfeng Liu 			memcpy(pbuf, rte_pktmbuf_mtod(tmp_mbuf, void*), data_len);
297ef6cb930SWenfeng Liu 			pbuf += data_len;
298ef6cb930SWenfeng Liu 			tmp_mbuf = tmp_mbuf->next;
299ef6cb930SWenfeng Liu 		}
300ef6cb930SWenfeng Liu 
301ef6cb930SWenfeng Liu 		ppd->tp_len = mbuf->pkt_len;
302ef6cb930SWenfeng Liu 		ppd->tp_snaplen = mbuf->pkt_len;
303df65e967SBruce Richardson 
304df65e967SBruce Richardson 		/* release incoming frame and advance ring buffer */
305df65e967SBruce Richardson 		ppd->tp_status = TP_STATUS_SEND_REQUEST;
306df65e967SBruce Richardson 		if (++framenum >= framecount)
307df65e967SBruce Richardson 			framenum = 0;
308df65e967SBruce Richardson 		ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
309df65e967SBruce Richardson 
310df65e967SBruce Richardson 		num_tx++;
311330f11adSRich Lane 		num_tx_bytes += mbuf->pkt_len;
312df65e967SBruce Richardson 		rte_pktmbuf_free(mbuf);
313df65e967SBruce Richardson 	}
314df65e967SBruce Richardson 
315df65e967SBruce Richardson 	/* kick-off transmits */
316d4bda0abSFlavia Musatescu 	if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1 &&
317d4bda0abSFlavia Musatescu 			errno != ENOBUFS && errno != EAGAIN) {
318d4bda0abSFlavia Musatescu 		/*
319d4bda0abSFlavia Musatescu 		 * In case of a ENOBUFS/EAGAIN error all of the enqueued
320d4bda0abSFlavia Musatescu 		 * packets will be considered successful even though only some
321d4bda0abSFlavia Musatescu 		 * are sent.
322d4bda0abSFlavia Musatescu 		 */
323d4bda0abSFlavia Musatescu 
32474b7fc0aSChas Williams 		num_tx = 0;
32574b7fc0aSChas Williams 		num_tx_bytes = 0;
32674b7fc0aSChas Williams 	}
327df65e967SBruce Richardson 
328df65e967SBruce Richardson 	pkt_q->framenum = framenum;
329df65e967SBruce Richardson 	pkt_q->tx_pkts += num_tx;
330f4ff17a4SMichał Mirosław 	pkt_q->err_pkts += i - num_tx;
331330f11adSRich Lane 	pkt_q->tx_bytes += num_tx_bytes;
332f4ff17a4SMichał Mirosław 	return i;
333df65e967SBruce Richardson }
334df65e967SBruce Richardson 
335df65e967SBruce Richardson static int
336df65e967SBruce Richardson eth_dev_start(struct rte_eth_dev *dev)
337df65e967SBruce Richardson {
338a059e463SJie Hai 	struct pmd_internals *internals = dev->data->dev_private;
339a059e463SJie Hai 	uint16_t i;
340a059e463SJie Hai 
341be10211cSStefan Laesser 	if (internals->timestamp_offloading) {
342be10211cSStefan Laesser 		/* Register mbuf field and flag for Rx timestamp */
343be10211cSStefan Laesser 		int rc = rte_mbuf_dyn_rx_timestamp_register(&timestamp_dynfield_offset,
344be10211cSStefan Laesser 				&timestamp_dynflag);
345be10211cSStefan Laesser 		if (rc) {
346be10211cSStefan Laesser 			PMD_LOG(ERR, "Cannot register mbuf field/flag for timestamp");
347be10211cSStefan Laesser 			return rc;
348be10211cSStefan Laesser 		}
349be10211cSStefan Laesser 	}
350be10211cSStefan Laesser 
351295968d1SFerruh Yigit 	dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
352a059e463SJie Hai 	for (i = 0; i < internals->nb_queues; i++) {
353a059e463SJie Hai 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
354a059e463SJie Hai 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
355a059e463SJie Hai 	}
356df65e967SBruce Richardson 	return 0;
357df65e967SBruce Richardson }
358df65e967SBruce Richardson 
359df65e967SBruce Richardson /*
360df65e967SBruce Richardson  * This function gets called when the current port gets stopped.
361df65e967SBruce Richardson  */
36262024eb8SIvan Ilchenko static int
363df65e967SBruce Richardson eth_dev_stop(struct rte_eth_dev *dev)
364df65e967SBruce Richardson {
365df65e967SBruce Richardson 	unsigned i;
366df65e967SBruce Richardson 	int sockfd;
367df65e967SBruce Richardson 	struct pmd_internals *internals = dev->data->dev_private;
368df65e967SBruce Richardson 
369df65e967SBruce Richardson 	for (i = 0; i < internals->nb_queues; i++) {
370df65e967SBruce Richardson 		sockfd = internals->rx_queue[i].sockfd;
371df65e967SBruce Richardson 		if (sockfd != -1)
372df65e967SBruce Richardson 			close(sockfd);
3735d16a43cSTimmons C. Player 
3745d16a43cSTimmons C. Player 		/* Prevent use after free in case tx fd == rx fd */
3755d16a43cSTimmons C. Player 		if (sockfd != internals->tx_queue[i].sockfd) {
376df65e967SBruce Richardson 			sockfd = internals->tx_queue[i].sockfd;
377df65e967SBruce Richardson 			if (sockfd != -1)
378df65e967SBruce Richardson 				close(sockfd);
379df65e967SBruce Richardson 		}
380df65e967SBruce Richardson 
3815d16a43cSTimmons C. Player 		internals->rx_queue[i].sockfd = -1;
3825d16a43cSTimmons C. Player 		internals->tx_queue[i].sockfd = -1;
383a059e463SJie Hai 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
384a059e463SJie Hai 		dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
3855d16a43cSTimmons C. Player 	}
3865d16a43cSTimmons C. Player 
387295968d1SFerruh Yigit 	dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
38862024eb8SIvan Ilchenko 	return 0;
389df65e967SBruce Richardson }
390df65e967SBruce Richardson 
391df65e967SBruce Richardson static int
392df65e967SBruce Richardson eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
393df65e967SBruce Richardson {
394d41d39bcSTudor Cornea 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
395d41d39bcSTudor Cornea 	const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode;
396d41d39bcSTudor Cornea 	struct pmd_internals *internals = dev->data->dev_private;
397d41d39bcSTudor Cornea 
398295968d1SFerruh Yigit 	internals->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
399be10211cSStefan Laesser 	internals->timestamp_offloading = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP);
400df65e967SBruce Richardson 	return 0;
401df65e967SBruce Richardson }
402df65e967SBruce Richardson 
403bdad90d1SIvan Ilchenko static int
404df65e967SBruce Richardson eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
405df65e967SBruce Richardson {
406df65e967SBruce Richardson 	struct pmd_internals *internals = dev->data->dev_private;
407df65e967SBruce Richardson 
408df65e967SBruce Richardson 	dev_info->if_index = internals->if_index;
409df65e967SBruce Richardson 	dev_info->max_mac_addrs = 1;
4104e8a9107SFerruh Yigit 	dev_info->max_rx_pktlen = RTE_ETHER_MAX_LEN;
411df65e967SBruce Richardson 	dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
412df65e967SBruce Richardson 	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
413df65e967SBruce Richardson 	dev_info->min_rx_bufsize = 0;
414295968d1SFerruh Yigit 	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
415295968d1SFerruh Yigit 		RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
416be10211cSStefan Laesser 	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP |
417be10211cSStefan Laesser 		RTE_ETH_RX_OFFLOAD_TIMESTAMP;
418bdad90d1SIvan Ilchenko 
419bdad90d1SIvan Ilchenko 	return 0;
420df65e967SBruce Richardson }
421df65e967SBruce Richardson 
422*6b324624SStefan Laesser 
423*6b324624SStefan Laesser /*
424*6b324624SStefan Laesser  * Query dropped packets counter from socket.
425*6b324624SStefan Laesser  * Reading drop count clears the value of the socket!
426*6b324624SStefan Laesser  */
427*6b324624SStefan Laesser static unsigned int
428*6b324624SStefan Laesser packet_drop_count(int sockfd)
429df65e967SBruce Richardson {
430*6b324624SStefan Laesser 	struct tpacket_stats pkt_stats;
431*6b324624SStefan Laesser 	socklen_t pkt_stats_len = sizeof(struct tpacket_stats);
432*6b324624SStefan Laesser 
433*6b324624SStefan Laesser 	if (sockfd == -1)
434*6b324624SStefan Laesser 		return 0;
435*6b324624SStefan Laesser 
436*6b324624SStefan Laesser 	if (getsockopt(sockfd, SOL_PACKET, PACKET_STATISTICS, &pkt_stats,
437*6b324624SStefan Laesser 		&pkt_stats_len) < -1)
438*6b324624SStefan Laesser 		return 0;
439*6b324624SStefan Laesser 
440*6b324624SStefan Laesser 	return pkt_stats.tp_drops;
441*6b324624SStefan Laesser }
442*6b324624SStefan Laesser 
443*6b324624SStefan Laesser static int
444*6b324624SStefan Laesser eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
445*6b324624SStefan Laesser {
446*6b324624SStefan Laesser 	unsigned int i;
447*6b324624SStefan Laesser 	unsigned long rx_total = 0, rx_dropped_total = 0, rx_nombuf_total = 0;
448*6b324624SStefan Laesser 	unsigned long tx_total = 0, tx_err_total = 0;
449330f11adSRich Lane 	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
450df65e967SBruce Richardson 	const struct pmd_internals *internal = dev->data->dev_private;
451df65e967SBruce Richardson 
452*6b324624SStefan Laesser 	for (i = 0; i < internal->nb_queues; i++) {
453*6b324624SStefan Laesser 		/* reading drop count clears the value, therefore keep total value */
454*6b324624SStefan Laesser 		internal->rx_queue[i].rx_dropped_pkts +=
455*6b324624SStefan Laesser 			packet_drop_count(internal->rx_queue[i].sockfd);
456df65e967SBruce Richardson 
457*6b324624SStefan Laesser 		rx_total += internal->rx_queue[i].rx_pkts;
458*6b324624SStefan Laesser 		rx_bytes_total += internal->rx_queue[i].rx_bytes;
459*6b324624SStefan Laesser 		rx_dropped_total += internal->rx_queue[i].rx_dropped_pkts;
460*6b324624SStefan Laesser 		rx_nombuf_total += internal->rx_queue[i].rx_nombuf;
461*6b324624SStefan Laesser 
462*6b324624SStefan Laesser 		tx_total += internal->tx_queue[i].tx_pkts;
4633b79ed8aSDavid Marchand 		tx_err_total += internal->tx_queue[i].err_pkts;
464*6b324624SStefan Laesser 		tx_bytes_total += internal->tx_queue[i].tx_bytes;
465*6b324624SStefan Laesser 
466*6b324624SStefan Laesser 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
467*6b324624SStefan Laesser 			stats->q_ipackets[i] = internal->rx_queue[i].rx_pkts;
468*6b324624SStefan Laesser 			stats->q_ibytes[i] = internal->rx_queue[i].rx_bytes;
469*6b324624SStefan Laesser 			stats->q_opackets[i] = internal->tx_queue[i].tx_pkts;
470*6b324624SStefan Laesser 			stats->q_obytes[i] = internal->tx_queue[i].tx_bytes;
471*6b324624SStefan Laesser 		}
472df65e967SBruce Richardson 	}
473df65e967SBruce Richardson 
474*6b324624SStefan Laesser 	stats->ipackets = rx_total;
475*6b324624SStefan Laesser 	stats->ibytes = rx_bytes_total;
476*6b324624SStefan Laesser 	stats->imissed = rx_dropped_total;
477*6b324624SStefan Laesser 	stats->rx_nombuf = rx_nombuf_total;
478*6b324624SStefan Laesser 	stats->opackets = tx_total;
479*6b324624SStefan Laesser 	stats->oerrors = tx_err_total;
480*6b324624SStefan Laesser 	stats->obytes = tx_bytes_total;
481d5b0924bSMatan Azrad 	return 0;
482df65e967SBruce Richardson }
483df65e967SBruce Richardson 
4849970a9adSIgor Romanov static int
485df65e967SBruce Richardson eth_stats_reset(struct rte_eth_dev *dev)
486df65e967SBruce Richardson {
487df65e967SBruce Richardson 	unsigned i;
488df65e967SBruce Richardson 	struct pmd_internals *internal = dev->data->dev_private;
489df65e967SBruce Richardson 
490330f11adSRich Lane 	for (i = 0; i < internal->nb_queues; i++) {
491*6b324624SStefan Laesser 		/* clear socket counter */
492*6b324624SStefan Laesser 		packet_drop_count(internal->rx_queue[i].sockfd);
493*6b324624SStefan Laesser 
494df65e967SBruce Richardson 		internal->rx_queue[i].rx_pkts = 0;
495330f11adSRich Lane 		internal->rx_queue[i].rx_bytes = 0;
496*6b324624SStefan Laesser 		internal->rx_queue[i].rx_nombuf = 0;
497*6b324624SStefan Laesser 		internal->rx_queue[i].rx_dropped_pkts = 0;
498df65e967SBruce Richardson 
499df65e967SBruce Richardson 		internal->tx_queue[i].tx_pkts = 0;
500df65e967SBruce Richardson 		internal->tx_queue[i].err_pkts = 0;
501330f11adSRich Lane 		internal->tx_queue[i].tx_bytes = 0;
502df65e967SBruce Richardson 	}
5039970a9adSIgor Romanov 
5049970a9adSIgor Romanov 	return 0;
505df65e967SBruce Richardson }
506df65e967SBruce Richardson 
507b142387bSThomas Monjalon static int
508dc89abe5SThomas Monjalon eth_dev_close(struct rte_eth_dev *dev)
509df65e967SBruce Richardson {
510dc89abe5SThomas Monjalon 	struct pmd_internals *internals;
511dc89abe5SThomas Monjalon 	struct tpacket_req *req;
512dc89abe5SThomas Monjalon 	unsigned int q;
513dc89abe5SThomas Monjalon 
514dc89abe5SThomas Monjalon 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
515dc89abe5SThomas Monjalon 		return 0;
516dc89abe5SThomas Monjalon 
517dc89abe5SThomas Monjalon 	PMD_LOG(INFO, "Closing AF_PACKET ethdev on NUMA socket %u",
518dc89abe5SThomas Monjalon 		rte_socket_id());
519dc89abe5SThomas Monjalon 
520dc89abe5SThomas Monjalon 	internals = dev->data->dev_private;
521dc89abe5SThomas Monjalon 	req = &internals->req;
522dc89abe5SThomas Monjalon 	for (q = 0; q < internals->nb_queues; q++) {
523dc89abe5SThomas Monjalon 		munmap(internals->rx_queue[q].map,
524dc89abe5SThomas Monjalon 			2 * req->tp_block_size * req->tp_block_nr);
525dc89abe5SThomas Monjalon 		rte_free(internals->rx_queue[q].rd);
526dc89abe5SThomas Monjalon 		rte_free(internals->tx_queue[q].rd);
527dc89abe5SThomas Monjalon 	}
528dc89abe5SThomas Monjalon 	free(internals->if_name);
529dc89abe5SThomas Monjalon 	rte_free(internals->rx_queue);
530dc89abe5SThomas Monjalon 	rte_free(internals->tx_queue);
531dc89abe5SThomas Monjalon 
532dc89abe5SThomas Monjalon 	/* mac_addrs must not be freed alone because part of dev_private */
533dc89abe5SThomas Monjalon 	dev->data->mac_addrs = NULL;
534b142387bSThomas Monjalon 	return 0;
535df65e967SBruce Richardson }
536df65e967SBruce Richardson 
537df65e967SBruce Richardson static int
538dcb035b0SGur Stavi eth_link_update(struct rte_eth_dev *dev,
539df65e967SBruce Richardson                 int wait_to_complete __rte_unused)
540df65e967SBruce Richardson {
541dcb035b0SGur Stavi 	const struct pmd_internals *internals = dev->data->dev_private;
542dcb035b0SGur Stavi 	struct rte_eth_link *dev_link = &dev->data->dev_link;
543dcb035b0SGur Stavi 	int sockfd = internals->rx_queue[0].sockfd;
544dcb035b0SGur Stavi 	struct ifreq ifr = { };
545dcb035b0SGur Stavi 
546dcb035b0SGur Stavi 	if (sockfd == -1)
547dcb035b0SGur Stavi 		return 0;
548dcb035b0SGur Stavi 
549dcb035b0SGur Stavi 	strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ);
550dcb035b0SGur Stavi 	if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0)
551dcb035b0SGur Stavi 		return -errno;
552dcb035b0SGur Stavi 	dev_link->link_status = (ifr.ifr_flags & IFF_RUNNING) ?
553dcb035b0SGur Stavi 		RTE_ETH_LINK_UP : RTE_ETH_LINK_DOWN;
554df65e967SBruce Richardson 	return 0;
555df65e967SBruce Richardson }
556df65e967SBruce Richardson 
557df65e967SBruce Richardson static int
558df65e967SBruce Richardson eth_rx_queue_setup(struct rte_eth_dev *dev,
559df65e967SBruce Richardson                    uint16_t rx_queue_id,
560df65e967SBruce Richardson                    uint16_t nb_rx_desc __rte_unused,
561df65e967SBruce Richardson                    unsigned int socket_id __rte_unused,
562df65e967SBruce Richardson                    const struct rte_eth_rxconf *rx_conf __rte_unused,
563df65e967SBruce Richardson                    struct rte_mempool *mb_pool)
564df65e967SBruce Richardson {
565df65e967SBruce Richardson 	struct pmd_internals *internals = dev->data->dev_private;
566df65e967SBruce Richardson 	struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id];
567b5a8868bSMichał Mirosław 	unsigned int buf_size, data_size;
568df65e967SBruce Richardson 
569df65e967SBruce Richardson 	pkt_q->mb_pool = mb_pool;
570df65e967SBruce Richardson 
571df65e967SBruce Richardson 	/* Now get the space available for data in the mbuf */
572b5a8868bSMichał Mirosław 	buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) -
573b5a8868bSMichał Mirosław 		RTE_PKTMBUF_HEADROOM;
574b5a8868bSMichał Mirosław 	data_size = internals->req.tp_frame_size;
575b5a8868bSMichał Mirosław 	data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll);
576df65e967SBruce Richardson 
577b5a8868bSMichał Mirosław 	if (data_size > buf_size) {
5788c54a3c6SStephen Hemminger 		PMD_LOG(ERR,
5798c54a3c6SStephen Hemminger 			"%s: %d bytes will not fit in mbuf (%d bytes)",
5804be4659aSFerruh Yigit 			dev->device->name, data_size, buf_size);
581df65e967SBruce Richardson 		return -ENOMEM;
582df65e967SBruce Richardson 	}
583df65e967SBruce Richardson 
584df65e967SBruce Richardson 	dev->data->rx_queues[rx_queue_id] = pkt_q;
585e01993bfSPavel Krauz 	pkt_q->in_port = dev->data->port_id;
586d41d39bcSTudor Cornea 	pkt_q->vlan_strip = internals->vlan_strip;
587be10211cSStefan Laesser 	pkt_q->timestamp_offloading = internals->timestamp_offloading;
588df65e967SBruce Richardson 
589df65e967SBruce Richardson 	return 0;
590df65e967SBruce Richardson }
591df65e967SBruce Richardson 
592df65e967SBruce Richardson static int
593df65e967SBruce Richardson eth_tx_queue_setup(struct rte_eth_dev *dev,
594df65e967SBruce Richardson                    uint16_t tx_queue_id,
595df65e967SBruce Richardson                    uint16_t nb_tx_desc __rte_unused,
596df65e967SBruce Richardson                    unsigned int socket_id __rte_unused,
597df65e967SBruce Richardson                    const struct rte_eth_txconf *tx_conf __rte_unused)
598df65e967SBruce Richardson {
599df65e967SBruce Richardson 
600df65e967SBruce Richardson 	struct pmd_internals *internals = dev->data->dev_private;
601df65e967SBruce Richardson 
602df65e967SBruce Richardson 	dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id];
603df65e967SBruce Richardson 	return 0;
604df65e967SBruce Richardson }
605df65e967SBruce Richardson 
606cc68ac48SChas Williams static int
607cc68ac48SChas Williams eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
608cc68ac48SChas Williams {
609cc68ac48SChas Williams 	struct pmd_internals *internals = dev->data->dev_private;
610cc68ac48SChas Williams 	struct ifreq ifr = { .ifr_mtu = mtu };
611cc68ac48SChas Williams 	int ret;
612cc68ac48SChas Williams 	int s;
613cc68ac48SChas Williams 	unsigned int data_size = internals->req.tp_frame_size -
61480205738STiago Lam 				 TPACKET2_HDRLEN;
615cc68ac48SChas Williams 
616cc68ac48SChas Williams 	if (mtu > data_size)
617cc68ac48SChas Williams 		return -EINVAL;
618cc68ac48SChas Williams 
619cc68ac48SChas Williams 	s = socket(PF_INET, SOCK_DGRAM, 0);
620cc68ac48SChas Williams 	if (s < 0)
621cc68ac48SChas Williams 		return -EINVAL;
622cc68ac48SChas Williams 
6236723c0fcSBruce Richardson 	strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ);
624cc68ac48SChas Williams 	ret = ioctl(s, SIOCSIFMTU, &ifr);
625cc68ac48SChas Williams 	close(s);
626cc68ac48SChas Williams 
627cc68ac48SChas Williams 	if (ret < 0)
628cc68ac48SChas Williams 		return -EINVAL;
629cc68ac48SChas Williams 
630cc68ac48SChas Williams 	return 0;
631cc68ac48SChas Williams }
632cc68ac48SChas Williams 
6339039c812SAndrew Rybchenko static int
634c524527dSStephen Hemminger eth_dev_macaddr_set(struct rte_eth_dev *dev, struct rte_ether_addr *addr)
635c524527dSStephen Hemminger {
636c524527dSStephen Hemminger 	struct pmd_internals *internals = dev->data->dev_private;
637c524527dSStephen Hemminger 	struct ifreq ifr = { };
638c524527dSStephen Hemminger 	int sockfd = internals->rx_queue[0].sockfd;
639c524527dSStephen Hemminger 	int ret;
640c524527dSStephen Hemminger 
641c524527dSStephen Hemminger 	if (sockfd == -1) {
642c524527dSStephen Hemminger 		PMD_LOG(ERR, "receive socket not found");
643c524527dSStephen Hemminger 		return -EINVAL;
644c524527dSStephen Hemminger 	}
645c524527dSStephen Hemminger 
646c524527dSStephen Hemminger 	strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ);
647c524527dSStephen Hemminger 	ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
648c524527dSStephen Hemminger 	memcpy(ifr.ifr_hwaddr.sa_data, addr, sizeof(*addr));
649c524527dSStephen Hemminger 	ret = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
650c524527dSStephen Hemminger 
651c524527dSStephen Hemminger 	if (ret < 0) {
652c524527dSStephen Hemminger 		PMD_LOG_ERRNO(ERR, "ioctl(SIOCSIFHWADDR) failed");
653c524527dSStephen Hemminger 		return -EINVAL;
654c524527dSStephen Hemminger 	}
655c524527dSStephen Hemminger 
656c524527dSStephen Hemminger 	return 0;
657c524527dSStephen Hemminger }
658c524527dSStephen Hemminger 
659c524527dSStephen Hemminger static int
66021825959SChas Williams eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask)
66121825959SChas Williams {
66221825959SChas Williams 	struct ifreq ifr;
6639039c812SAndrew Rybchenko 	int ret = 0;
66421825959SChas Williams 	int s;
66521825959SChas Williams 
66621825959SChas Williams 	s = socket(PF_INET, SOCK_DGRAM, 0);
66721825959SChas Williams 	if (s < 0)
6689039c812SAndrew Rybchenko 		return -errno;
66921825959SChas Williams 
6706723c0fcSBruce Richardson 	strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
6719039c812SAndrew Rybchenko 	if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) {
6729039c812SAndrew Rybchenko 		ret = -errno;
67321825959SChas Williams 		goto out;
6749039c812SAndrew Rybchenko 	}
67521825959SChas Williams 	ifr.ifr_flags &= mask;
67621825959SChas Williams 	ifr.ifr_flags |= flags;
6779039c812SAndrew Rybchenko 	if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) {
6789039c812SAndrew Rybchenko 		ret = -errno;
67921825959SChas Williams 		goto out;
6809039c812SAndrew Rybchenko 	}
68121825959SChas Williams out:
68221825959SChas Williams 	close(s);
6839039c812SAndrew Rybchenko 	return ret;
68421825959SChas Williams }
68521825959SChas Williams 
6869039c812SAndrew Rybchenko static int
68721825959SChas Williams eth_dev_promiscuous_enable(struct rte_eth_dev *dev)
68821825959SChas Williams {
68921825959SChas Williams 	struct pmd_internals *internals = dev->data->dev_private;
69021825959SChas Williams 
6919039c812SAndrew Rybchenko 	return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0);
69221825959SChas Williams }
69321825959SChas Williams 
6949039c812SAndrew Rybchenko static int
69521825959SChas Williams eth_dev_promiscuous_disable(struct rte_eth_dev *dev)
69621825959SChas Williams {
69721825959SChas Williams 	struct pmd_internals *internals = dev->data->dev_private;
69821825959SChas Williams 
6999039c812SAndrew Rybchenko 	return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC);
70021825959SChas Williams }
70121825959SChas Williams 
702df65e967SBruce Richardson static const struct eth_dev_ops ops = {
703df65e967SBruce Richardson 	.dev_start = eth_dev_start,
704df65e967SBruce Richardson 	.dev_stop = eth_dev_stop,
705df65e967SBruce Richardson 	.dev_close = eth_dev_close,
706df65e967SBruce Richardson 	.dev_configure = eth_dev_configure,
707df65e967SBruce Richardson 	.dev_infos_get = eth_dev_info,
708c524527dSStephen Hemminger 	.mac_addr_set = eth_dev_macaddr_set,
709cc68ac48SChas Williams 	.mtu_set = eth_dev_mtu_set,
71021825959SChas Williams 	.promiscuous_enable = eth_dev_promiscuous_enable,
71121825959SChas Williams 	.promiscuous_disable = eth_dev_promiscuous_disable,
712df65e967SBruce Richardson 	.rx_queue_setup = eth_rx_queue_setup,
713df65e967SBruce Richardson 	.tx_queue_setup = eth_tx_queue_setup,
714df65e967SBruce Richardson 	.link_update = eth_link_update,
715df65e967SBruce Richardson 	.stats_get = eth_stats_get,
716df65e967SBruce Richardson 	.stats_reset = eth_stats_reset,
717df65e967SBruce Richardson };
718df65e967SBruce Richardson 
719df65e967SBruce Richardson /*
720df65e967SBruce Richardson  * Opens an AF_PACKET socket
721df65e967SBruce Richardson  */
722df65e967SBruce Richardson static int
723df65e967SBruce Richardson open_packet_iface(const char *key __rte_unused,
724df65e967SBruce Richardson                   const char *value __rte_unused,
725df65e967SBruce Richardson                   void *extra_args)
726df65e967SBruce Richardson {
727df65e967SBruce Richardson 	int *sockfd = extra_args;
728df65e967SBruce Richardson 
729df65e967SBruce Richardson 	/* Open an AF_PACKET socket... */
7305b81eac5SGur Stavi 	*sockfd = socket(AF_PACKET, SOCK_RAW, 0);
731df65e967SBruce Richardson 	if (*sockfd == -1) {
7328c54a3c6SStephen Hemminger 		PMD_LOG(ERR, "Could not open AF_PACKET socket");
733df65e967SBruce Richardson 		return -1;
734df65e967SBruce Richardson 	}
735df65e967SBruce Richardson 
736df65e967SBruce Richardson 	return 0;
737df65e967SBruce Richardson }
738df65e967SBruce Richardson 
739d3bc77abSTudor Cornea #define PACKET_FANOUT_INVALID -1
740d3bc77abSTudor Cornea 
741d3bc77abSTudor Cornea static int
742d3bc77abSTudor Cornea get_fanout_group_id(int if_index)
743d3bc77abSTudor Cornea {
744d3bc77abSTudor Cornea 	return (getpid() ^ if_index) & 0xffff;
745d3bc77abSTudor Cornea }
746d3bc77abSTudor Cornea 
747d3bc77abSTudor Cornea static int
748d3bc77abSTudor Cornea get_fanout_mode(const char *fanout_mode)
749d3bc77abSTudor Cornea {
750d3bc77abSTudor Cornea 	int load_balance = PACKET_FANOUT_FLAG_DEFRAG |
751d3bc77abSTudor Cornea 			   PACKET_FANOUT_FLAG_ROLLOVER;
752d3bc77abSTudor Cornea 
753d3bc77abSTudor Cornea 	if (!fanout_mode) {
754d3bc77abSTudor Cornea 		/* Default */
755d3bc77abSTudor Cornea 		load_balance |= PACKET_FANOUT_HASH;
756d3bc77abSTudor Cornea 	} else if (!strcmp(fanout_mode, "hash")) {
757d3bc77abSTudor Cornea 		load_balance |= PACKET_FANOUT_HASH;
758d3bc77abSTudor Cornea 	} else if (!strcmp(fanout_mode, "lb")) {
759d3bc77abSTudor Cornea 		load_balance |= PACKET_FANOUT_LB;
760d3bc77abSTudor Cornea 	} else if (!strcmp(fanout_mode, "cpu")) {
761d3bc77abSTudor Cornea 		load_balance |= PACKET_FANOUT_CPU;
762d3bc77abSTudor Cornea 	} else if (!strcmp(fanout_mode, "rollover")) {
763d3bc77abSTudor Cornea 		load_balance |= PACKET_FANOUT_ROLLOVER;
764d3bc77abSTudor Cornea 	} else if (!strcmp(fanout_mode, "rnd")) {
765d3bc77abSTudor Cornea 		load_balance |= PACKET_FANOUT_RND;
766d3bc77abSTudor Cornea 	} else if (!strcmp(fanout_mode, "qm")) {
767d3bc77abSTudor Cornea 		load_balance |= PACKET_FANOUT_QM;
768d3bc77abSTudor Cornea 	} else {
769d3bc77abSTudor Cornea 		/* Invalid Fanout Mode */
770d3bc77abSTudor Cornea 		load_balance = PACKET_FANOUT_INVALID;
771d3bc77abSTudor Cornea 	}
772d3bc77abSTudor Cornea 
773d3bc77abSTudor Cornea 	return load_balance;
774d3bc77abSTudor Cornea }
775d3bc77abSTudor Cornea 
776d3bc77abSTudor Cornea static int
777d3bc77abSTudor Cornea get_fanout(const char *fanout_mode, int if_index)
778d3bc77abSTudor Cornea {
779d3bc77abSTudor Cornea 	int load_balance = get_fanout_mode(fanout_mode);
780d3bc77abSTudor Cornea 	if (load_balance != PACKET_FANOUT_INVALID)
781d3bc77abSTudor Cornea 		return get_fanout_group_id(if_index) | (load_balance << 16);
782d3bc77abSTudor Cornea 	else
783d3bc77abSTudor Cornea 		return PACKET_FANOUT_INVALID;
784d3bc77abSTudor Cornea }
785d3bc77abSTudor Cornea 
786df65e967SBruce Richardson static int
787050fe6e9SJan Blunck rte_pmd_init_internals(struct rte_vdev_device *dev,
788df65e967SBruce Richardson                        const int sockfd,
789df65e967SBruce Richardson                        const unsigned nb_queues,
790df65e967SBruce Richardson                        unsigned int blocksize,
791df65e967SBruce Richardson                        unsigned int blockcnt,
792df65e967SBruce Richardson                        unsigned int framesize,
793df65e967SBruce Richardson                        unsigned int framecnt,
794d97de5b5SChas Williams 		       unsigned int qdisc_bypass,
795d3bc77abSTudor Cornea 		       const char *fanout_mode,
796df65e967SBruce Richardson                        struct pmd_internals **internals,
797df65e967SBruce Richardson                        struct rte_eth_dev **eth_dev,
798df65e967SBruce Richardson                        struct rte_kvargs *kvlist)
799df65e967SBruce Richardson {
800050fe6e9SJan Blunck 	const char *name = rte_vdev_device_name(dev);
801050fe6e9SJan Blunck 	const unsigned int numa_node = dev->device.numa_node;
802df65e967SBruce Richardson 	struct rte_eth_dev_data *data = NULL;
803df65e967SBruce Richardson 	struct rte_kvargs_pair *pair = NULL;
804df65e967SBruce Richardson 	struct ifreq ifr;
805df65e967SBruce Richardson 	size_t ifnamelen;
806df65e967SBruce Richardson 	unsigned k_idx;
807df65e967SBruce Richardson 	struct sockaddr_ll sockaddr;
808df65e967SBruce Richardson 	struct tpacket_req *req;
809df65e967SBruce Richardson 	struct pkt_rx_queue *rx_queue;
810df65e967SBruce Richardson 	struct pkt_tx_queue *tx_queue;
811df65e967SBruce Richardson 	int rc, tpver, discard;
812df65e967SBruce Richardson 	int qsockfd = -1;
813df65e967SBruce Richardson 	unsigned int i, q, rdsize;
814dd2c630aSFerruh Yigit 	int fanout_arg;
815df65e967SBruce Richardson 
816df65e967SBruce Richardson 	for (k_idx = 0; k_idx < kvlist->count; k_idx++) {
817df65e967SBruce Richardson 		pair = &kvlist->pairs[k_idx];
818df65e967SBruce Richardson 		if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL)
819df65e967SBruce Richardson 			break;
820df65e967SBruce Richardson 	}
821df65e967SBruce Richardson 	if (pair == NULL) {
8228c54a3c6SStephen Hemminger 		PMD_LOG(ERR,
8238c54a3c6SStephen Hemminger 			"%s: no interface specified for AF_PACKET ethdev",
824df65e967SBruce Richardson 		        name);
8255f19dee6SJianfeng Tan 		return -1;
826df65e967SBruce Richardson 	}
827df65e967SBruce Richardson 
8288c54a3c6SStephen Hemminger 	PMD_LOG(INFO,
8298c54a3c6SStephen Hemminger 		"%s: creating AF_PACKET-backed ethdev on numa socket %u",
830df65e967SBruce Richardson 		name, numa_node);
831df65e967SBruce Richardson 
832df65e967SBruce Richardson 	*internals = rte_zmalloc_socket(name, sizeof(**internals),
833df65e967SBruce Richardson 	                                0, numa_node);
834df65e967SBruce Richardson 	if (*internals == NULL)
8355f19dee6SJianfeng Tan 		return -1;
836df65e967SBruce Richardson 
837ccd37d34SStephen Hemminger 
838ccd37d34SStephen Hemminger 	(*internals)->rx_queue = rte_calloc_socket("af_packet_rx",
839ccd37d34SStephen Hemminger 						nb_queues,
840ccd37d34SStephen Hemminger 						sizeof(struct pkt_rx_queue),
841ccd37d34SStephen Hemminger 						0, numa_node);
842ccd37d34SStephen Hemminger 	(*internals)->tx_queue = rte_calloc_socket("af_packet_tx",
843ccd37d34SStephen Hemminger 						nb_queues,
844ccd37d34SStephen Hemminger 						sizeof(struct pkt_tx_queue),
845ccd37d34SStephen Hemminger 						0, numa_node);
846ccd37d34SStephen Hemminger 	if (!(*internals)->rx_queue || !(*internals)->tx_queue) {
84734001489SYunjian Wang 		goto free_internals;
848ccd37d34SStephen Hemminger 	}
849ccd37d34SStephen Hemminger 
850df65e967SBruce Richardson 	for (q = 0; q < nb_queues; q++) {
851df65e967SBruce Richardson 		(*internals)->rx_queue[q].map = MAP_FAILED;
852df65e967SBruce Richardson 		(*internals)->tx_queue[q].map = MAP_FAILED;
853c6d1a552SYunjian Wang 		(*internals)->rx_queue[q].sockfd = -1;
854c6d1a552SYunjian Wang 		(*internals)->tx_queue[q].sockfd = -1;
855df65e967SBruce Richardson 	}
856df65e967SBruce Richardson 
857df65e967SBruce Richardson 	req = &((*internals)->req);
858df65e967SBruce Richardson 
859df65e967SBruce Richardson 	req->tp_block_size = blocksize;
860df65e967SBruce Richardson 	req->tp_block_nr = blockcnt;
861df65e967SBruce Richardson 	req->tp_frame_size = framesize;
862df65e967SBruce Richardson 	req->tp_frame_nr = framecnt;
863df65e967SBruce Richardson 
864df65e967SBruce Richardson 	ifnamelen = strlen(pair->value);
865df65e967SBruce Richardson 	if (ifnamelen < sizeof(ifr.ifr_name)) {
866df65e967SBruce Richardson 		memcpy(ifr.ifr_name, pair->value, ifnamelen);
867df65e967SBruce Richardson 		ifr.ifr_name[ifnamelen] = '\0';
868df65e967SBruce Richardson 	} else {
8698c54a3c6SStephen Hemminger 		PMD_LOG(ERR,
8708c54a3c6SStephen Hemminger 			"%s: I/F name too long (%s)",
871df65e967SBruce Richardson 			name, pair->value);
87234001489SYunjian Wang 		goto free_internals;
873df65e967SBruce Richardson 	}
874df65e967SBruce Richardson 	if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) {
87589178d62SKrzysztof Kanas 		PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name);
87634001489SYunjian Wang 		goto free_internals;
877df65e967SBruce Richardson 	}
8781b93c2aaSChas Williams 	(*internals)->if_name = strdup(pair->value);
87992656e96SChas Williams 	if ((*internals)->if_name == NULL)
88034001489SYunjian Wang 		goto free_internals;
881df65e967SBruce Richardson 	(*internals)->if_index = ifr.ifr_ifindex;
882df65e967SBruce Richardson 
883df65e967SBruce Richardson 	if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) {
88489178d62SKrzysztof Kanas 		PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFHWADDR)", name);
88534001489SYunjian Wang 		goto free_internals;
886df65e967SBruce Richardson 	}
887df65e967SBruce Richardson 	memcpy(&(*internals)->eth_addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
888df65e967SBruce Richardson 
889df65e967SBruce Richardson 	memset(&sockaddr, 0, sizeof(sockaddr));
890df65e967SBruce Richardson 	sockaddr.sll_family = AF_PACKET;
891df65e967SBruce Richardson 	sockaddr.sll_protocol = htons(ETH_P_ALL);
892df65e967SBruce Richardson 	sockaddr.sll_ifindex = (*internals)->if_index;
893df65e967SBruce Richardson 
894d3bc77abSTudor Cornea 	fanout_arg = get_fanout(fanout_mode, (*internals)->if_index);
895d3bc77abSTudor Cornea 	if (fanout_arg == PACKET_FANOUT_INVALID) {
896d3bc77abSTudor Cornea 		PMD_LOG(ERR, "Invalid fanout mode: %s", fanout_mode);
897d3bc77abSTudor Cornea 		goto error;
898d3bc77abSTudor Cornea 	}
899df65e967SBruce Richardson 
900df65e967SBruce Richardson 	for (q = 0; q < nb_queues; q++) {
901df65e967SBruce Richardson 		/* Open an AF_PACKET socket for this queue... */
9025b81eac5SGur Stavi 		qsockfd = socket(AF_PACKET, SOCK_RAW, 0);
903df65e967SBruce Richardson 		if (qsockfd == -1) {
90489178d62SKrzysztof Kanas 			PMD_LOG_ERRNO(ERR,
9058c54a3c6SStephen Hemminger 				"%s: could not open AF_PACKET socket",
906df65e967SBruce Richardson 				name);
90734001489SYunjian Wang 			goto error;
908df65e967SBruce Richardson 		}
909df65e967SBruce Richardson 
910df65e967SBruce Richardson 		tpver = TPACKET_V2;
911df65e967SBruce Richardson 		rc = setsockopt(qsockfd, SOL_PACKET, PACKET_VERSION,
912df65e967SBruce Richardson 				&tpver, sizeof(tpver));
913df65e967SBruce Richardson 		if (rc == -1) {
91489178d62SKrzysztof Kanas 			PMD_LOG_ERRNO(ERR,
9158c54a3c6SStephen Hemminger 				"%s: could not set PACKET_VERSION on AF_PACKET socket for %s",
9168c54a3c6SStephen Hemminger 				name, pair->value);
917df65e967SBruce Richardson 			goto error;
918df65e967SBruce Richardson 		}
919df65e967SBruce Richardson 
920df65e967SBruce Richardson 		discard = 1;
921df65e967SBruce Richardson 		rc = setsockopt(qsockfd, SOL_PACKET, PACKET_LOSS,
922df65e967SBruce Richardson 				&discard, sizeof(discard));
923df65e967SBruce Richardson 		if (rc == -1) {
92489178d62SKrzysztof Kanas 			PMD_LOG_ERRNO(ERR,
9258c54a3c6SStephen Hemminger 				"%s: could not set PACKET_LOSS on AF_PACKET socket for %s",
9268c54a3c6SStephen Hemminger 				name, pair->value);
927df65e967SBruce Richardson 			goto error;
928df65e967SBruce Richardson 		}
929df65e967SBruce Richardson 
9308089aa75STudor Cornea 		if (qdisc_bypass) {
931df65e967SBruce Richardson #if defined(PACKET_QDISC_BYPASS)
932df65e967SBruce Richardson 			rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS,
933d97de5b5SChas Williams 					&qdisc_bypass, sizeof(qdisc_bypass));
934df65e967SBruce Richardson 			if (rc == -1) {
93589178d62SKrzysztof Kanas 				PMD_LOG_ERRNO(ERR,
9368c54a3c6SStephen Hemminger 					"%s: could not set PACKET_QDISC_BYPASS on AF_PACKET socket for %s",
9378c54a3c6SStephen Hemminger 					name, pair->value);
938df65e967SBruce Richardson 				goto error;
939df65e967SBruce Richardson 			}
940df65e967SBruce Richardson #endif
9418089aa75STudor Cornea 		}
942df65e967SBruce Richardson 
943df65e967SBruce Richardson 		rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req));
944df65e967SBruce Richardson 		if (rc == -1) {
94589178d62SKrzysztof Kanas 			PMD_LOG_ERRNO(ERR,
9468c54a3c6SStephen Hemminger 				"%s: could not set PACKET_RX_RING on AF_PACKET socket for %s",
9478c54a3c6SStephen Hemminger 				name, pair->value);
948df65e967SBruce Richardson 			goto error;
949df65e967SBruce Richardson 		}
950df65e967SBruce Richardson 
951df65e967SBruce Richardson 		rc = setsockopt(qsockfd, SOL_PACKET, PACKET_TX_RING, req, sizeof(*req));
952df65e967SBruce Richardson 		if (rc == -1) {
95389178d62SKrzysztof Kanas 			PMD_LOG_ERRNO(ERR,
954df65e967SBruce Richardson 				"%s: could not set PACKET_TX_RING on AF_PACKET "
9558c54a3c6SStephen Hemminger 				"socket for %s", name, pair->value);
956df65e967SBruce Richardson 			goto error;
957df65e967SBruce Richardson 		}
958df65e967SBruce Richardson 
959df65e967SBruce Richardson 		rx_queue = &((*internals)->rx_queue[q]);
960df65e967SBruce Richardson 		rx_queue->framecount = req->tp_frame_nr;
961df65e967SBruce Richardson 
962df65e967SBruce Richardson 		rx_queue->map = mmap(NULL, 2 * req->tp_block_size * req->tp_block_nr,
963df65e967SBruce Richardson 				    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED,
964df65e967SBruce Richardson 				    qsockfd, 0);
965df65e967SBruce Richardson 		if (rx_queue->map == MAP_FAILED) {
96689178d62SKrzysztof Kanas 			PMD_LOG_ERRNO(ERR,
9678c54a3c6SStephen Hemminger 				"%s: call to mmap failed on AF_PACKET socket for %s",
968df65e967SBruce Richardson 				name, pair->value);
969df65e967SBruce Richardson 			goto error;
970df65e967SBruce Richardson 		}
971df65e967SBruce Richardson 
972df65e967SBruce Richardson 		/* rdsize is same for both Tx and Rx */
973df65e967SBruce Richardson 		rdsize = req->tp_frame_nr * sizeof(*(rx_queue->rd));
974df65e967SBruce Richardson 
975df65e967SBruce Richardson 		rx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node);
976df65e967SBruce Richardson 		if (rx_queue->rd == NULL)
977df65e967SBruce Richardson 			goto error;
978df65e967SBruce Richardson 		for (i = 0; i < req->tp_frame_nr; ++i) {
979df65e967SBruce Richardson 			rx_queue->rd[i].iov_base = rx_queue->map + (i * framesize);
980df65e967SBruce Richardson 			rx_queue->rd[i].iov_len = req->tp_frame_size;
981df65e967SBruce Richardson 		}
982df65e967SBruce Richardson 		rx_queue->sockfd = qsockfd;
983df65e967SBruce Richardson 
984df65e967SBruce Richardson 		tx_queue = &((*internals)->tx_queue[q]);
985df65e967SBruce Richardson 		tx_queue->framecount = req->tp_frame_nr;
986f4ff17a4SMichał Mirosław 		tx_queue->frame_data_size = req->tp_frame_size;
987f4ff17a4SMichał Mirosław 		tx_queue->frame_data_size -= TPACKET2_HDRLEN -
988f4ff17a4SMichał Mirosław 			sizeof(struct sockaddr_ll);
989df65e967SBruce Richardson 
990df65e967SBruce Richardson 		tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr;
991df65e967SBruce Richardson 
992df65e967SBruce Richardson 		tx_queue->rd = rte_zmalloc_socket(name, rdsize, 0, numa_node);
993df65e967SBruce Richardson 		if (tx_queue->rd == NULL)
994df65e967SBruce Richardson 			goto error;
995df65e967SBruce Richardson 		for (i = 0; i < req->tp_frame_nr; ++i) {
996df65e967SBruce Richardson 			tx_queue->rd[i].iov_base = tx_queue->map + (i * framesize);
997df65e967SBruce Richardson 			tx_queue->rd[i].iov_len = req->tp_frame_size;
998df65e967SBruce Richardson 		}
999df65e967SBruce Richardson 		tx_queue->sockfd = qsockfd;
1000df65e967SBruce Richardson 
1001df65e967SBruce Richardson 		rc = bind(qsockfd, (const struct sockaddr*)&sockaddr, sizeof(sockaddr));
1002df65e967SBruce Richardson 		if (rc == -1) {
100389178d62SKrzysztof Kanas 			PMD_LOG_ERRNO(ERR,
10048c54a3c6SStephen Hemminger 				"%s: could not bind AF_PACKET socket to %s",
1005df65e967SBruce Richardson 				name, pair->value);
1006df65e967SBruce Richardson 			goto error;
1007df65e967SBruce Richardson 		}
1008df65e967SBruce Richardson 
1009d3bc77abSTudor Cornea 		if (nb_queues > 1) {
1010df65e967SBruce Richardson 			rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT,
1011df65e967SBruce Richardson 					&fanout_arg, sizeof(fanout_arg));
1012df65e967SBruce Richardson 			if (rc == -1) {
101389178d62SKrzysztof Kanas 				PMD_LOG_ERRNO(ERR,
1014d3bc77abSTudor Cornea 					"%s: could not set PACKET_FANOUT "
1015d3bc77abSTudor Cornea 					"on AF_PACKET socket for %s",
101689178d62SKrzysztof Kanas 					name, pair->value);
1017df65e967SBruce Richardson 				goto error;
1018df65e967SBruce Richardson 			}
1019d3bc77abSTudor Cornea 		}
1020df65e967SBruce Richardson 	}
1021df65e967SBruce Richardson 
1022df65e967SBruce Richardson 	/* reserve an ethdev entry */
1023050fe6e9SJan Blunck 	*eth_dev = rte_eth_vdev_allocate(dev, 0);
1024df65e967SBruce Richardson 	if (*eth_dev == NULL)
1025df65e967SBruce Richardson 		goto error;
1026df65e967SBruce Richardson 
1027df65e967SBruce Richardson 	/*
1028df65e967SBruce Richardson 	 * now put it all together
1029df65e967SBruce Richardson 	 * - store queue data in internals,
10308fb9e2bbSBernard Iremonger 	 * - store numa_node in eth_dev
10318fb9e2bbSBernard Iremonger 	 * - point eth_dev_data to internals
1032df65e967SBruce Richardson 	 * - and point eth_dev structure to new eth_dev_data structure
1033df65e967SBruce Richardson 	 */
1034df65e967SBruce Richardson 
1035df65e967SBruce Richardson 	(*internals)->nb_queues = nb_queues;
1036df65e967SBruce Richardson 
10375f19dee6SJianfeng Tan 	data = (*eth_dev)->data;
1038df65e967SBruce Richardson 	data->dev_private = *internals;
1039df65e967SBruce Richardson 	data->nb_rx_queues = (uint16_t)nb_queues;
1040df65e967SBruce Richardson 	data->nb_tx_queues = (uint16_t)nb_queues;
1041df65e967SBruce Richardson 	data->dev_link = pmd_link;
1042df65e967SBruce Richardson 	data->mac_addrs = &(*internals)->eth_addr;
1043f30e69b4SFerruh Yigit 	data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1044df65e967SBruce Richardson 
1045df65e967SBruce Richardson 	(*eth_dev)->dev_ops = &ops;
1046df65e967SBruce Richardson 
1047df65e967SBruce Richardson 	return 0;
1048df65e967SBruce Richardson 
1049df65e967SBruce Richardson error:
105043254a33SJohn W. Linville 	if (qsockfd != -1)
105143254a33SJohn W. Linville 		close(qsockfd);
1052df65e967SBruce Richardson 	for (q = 0; q < nb_queues; q++) {
1053b02e1742SYunjian Wang 		if ((*internals)->rx_queue[q].map != MAP_FAILED)
1054df65e967SBruce Richardson 			munmap((*internals)->rx_queue[q].map,
1055df65e967SBruce Richardson 			       2 * req->tp_block_size * req->tp_block_nr);
1056df65e967SBruce Richardson 
1057df65e967SBruce Richardson 		rte_free((*internals)->rx_queue[q].rd);
1058df65e967SBruce Richardson 		rte_free((*internals)->tx_queue[q].rd);
1059c6d1a552SYunjian Wang 		if (((*internals)->rx_queue[q].sockfd >= 0) &&
1060df65e967SBruce Richardson 			((*internals)->rx_queue[q].sockfd != qsockfd))
1061df65e967SBruce Richardson 			close((*internals)->rx_queue[q].sockfd);
1062df65e967SBruce Richardson 	}
106334001489SYunjian Wang free_internals:
106434001489SYunjian Wang 	rte_free((*internals)->rx_queue);
106534001489SYunjian Wang 	rte_free((*internals)->tx_queue);
10661b93c2aaSChas Williams 	free((*internals)->if_name);
1067df65e967SBruce Richardson 	rte_free(*internals);
1068df65e967SBruce Richardson 	return -1;
1069df65e967SBruce Richardson }
1070df65e967SBruce Richardson 
1071df65e967SBruce Richardson static int
1072050fe6e9SJan Blunck rte_eth_from_packet(struct rte_vdev_device *dev,
1073df65e967SBruce Richardson                     int const *sockfd,
1074df65e967SBruce Richardson                     struct rte_kvargs *kvlist)
1075df65e967SBruce Richardson {
1076050fe6e9SJan Blunck 	const char *name = rte_vdev_device_name(dev);
1077df65e967SBruce Richardson 	struct pmd_internals *internals = NULL;
1078df65e967SBruce Richardson 	struct rte_eth_dev *eth_dev = NULL;
1079df65e967SBruce Richardson 	struct rte_kvargs_pair *pair = NULL;
1080df65e967SBruce Richardson 	unsigned k_idx;
1081df65e967SBruce Richardson 	unsigned int blockcount;
10824f538fa2SKrzysztof Kanas 	unsigned int blocksize;
1083df65e967SBruce Richardson 	unsigned int framesize = DFLT_FRAME_SIZE;
1084df65e967SBruce Richardson 	unsigned int framecount = DFLT_FRAME_COUNT;
1085df65e967SBruce Richardson 	unsigned int qpairs = 1;
1086d97de5b5SChas Williams 	unsigned int qdisc_bypass = 1;
1087d3bc77abSTudor Cornea 	const char *fanout_mode = NULL;
1088df65e967SBruce Richardson 
1089df65e967SBruce Richardson 	/* do some parameter checking */
1090df65e967SBruce Richardson 	if (*sockfd < 0)
1091df65e967SBruce Richardson 		return -1;
1092df65e967SBruce Richardson 
10934f538fa2SKrzysztof Kanas 	blocksize = getpagesize();
10944f538fa2SKrzysztof Kanas 
1095df65e967SBruce Richardson 	/*
1096df65e967SBruce Richardson 	 * Walk arguments for configurable settings
1097df65e967SBruce Richardson 	 */
1098df65e967SBruce Richardson 	for (k_idx = 0; k_idx < kvlist->count; k_idx++) {
1099df65e967SBruce Richardson 		pair = &kvlist->pairs[k_idx];
1100df65e967SBruce Richardson 		if (strstr(pair->key, ETH_AF_PACKET_NUM_Q_ARG) != NULL) {
1101df65e967SBruce Richardson 			qpairs = atoi(pair->value);
1102ccd37d34SStephen Hemminger 			if (qpairs < 1) {
11038c54a3c6SStephen Hemminger 				PMD_LOG(ERR,
11048c54a3c6SStephen Hemminger 					"%s: invalid qpairs value",
1105df65e967SBruce Richardson 				        name);
1106df65e967SBruce Richardson 				return -1;
1107df65e967SBruce Richardson 			}
1108df65e967SBruce Richardson 			continue;
1109df65e967SBruce Richardson 		}
1110df65e967SBruce Richardson 		if (strstr(pair->key, ETH_AF_PACKET_BLOCKSIZE_ARG) != NULL) {
1111df65e967SBruce Richardson 			blocksize = atoi(pair->value);
1112df65e967SBruce Richardson 			if (!blocksize) {
11138c54a3c6SStephen Hemminger 				PMD_LOG(ERR,
11148c54a3c6SStephen Hemminger 					"%s: invalid blocksize value",
1115df65e967SBruce Richardson 				        name);
1116df65e967SBruce Richardson 				return -1;
1117df65e967SBruce Richardson 			}
1118df65e967SBruce Richardson 			continue;
1119df65e967SBruce Richardson 		}
1120df65e967SBruce Richardson 		if (strstr(pair->key, ETH_AF_PACKET_FRAMESIZE_ARG) != NULL) {
1121df65e967SBruce Richardson 			framesize = atoi(pair->value);
1122df65e967SBruce Richardson 			if (!framesize) {
11238c54a3c6SStephen Hemminger 				PMD_LOG(ERR,
11248c54a3c6SStephen Hemminger 					"%s: invalid framesize value",
1125df65e967SBruce Richardson 				        name);
1126df65e967SBruce Richardson 				return -1;
1127df65e967SBruce Richardson 			}
1128df65e967SBruce Richardson 			continue;
1129df65e967SBruce Richardson 		}
1130df65e967SBruce Richardson 		if (strstr(pair->key, ETH_AF_PACKET_FRAMECOUNT_ARG) != NULL) {
1131df65e967SBruce Richardson 			framecount = atoi(pair->value);
1132df65e967SBruce Richardson 			if (!framecount) {
11338c54a3c6SStephen Hemminger 				PMD_LOG(ERR,
11348c54a3c6SStephen Hemminger 					"%s: invalid framecount value",
1135df65e967SBruce Richardson 				        name);
1136df65e967SBruce Richardson 				return -1;
1137df65e967SBruce Richardson 			}
1138df65e967SBruce Richardson 			continue;
1139df65e967SBruce Richardson 		}
1140d97de5b5SChas Williams 		if (strstr(pair->key, ETH_AF_PACKET_QDISC_BYPASS_ARG) != NULL) {
1141d97de5b5SChas Williams 			qdisc_bypass = atoi(pair->value);
1142d97de5b5SChas Williams 			if (qdisc_bypass > 1) {
11438c54a3c6SStephen Hemminger 				PMD_LOG(ERR,
11448c54a3c6SStephen Hemminger 					"%s: invalid bypass value",
1145d97de5b5SChas Williams 					name);
1146d97de5b5SChas Williams 				return -1;
1147d97de5b5SChas Williams 			}
1148d97de5b5SChas Williams 			continue;
1149d97de5b5SChas Williams 		}
1150d3bc77abSTudor Cornea 		if (strstr(pair->key, ETH_AF_PACKET_FANOUT_MODE_ARG) != NULL) {
1151d3bc77abSTudor Cornea 			fanout_mode = pair->value;
1152d3bc77abSTudor Cornea 			continue;
1153d3bc77abSTudor Cornea 		}
1154df65e967SBruce Richardson 	}
1155df65e967SBruce Richardson 
1156df65e967SBruce Richardson 	if (framesize > blocksize) {
11578c54a3c6SStephen Hemminger 		PMD_LOG(ERR,
11588c54a3c6SStephen Hemminger 			"%s: AF_PACKET MMAP frame size exceeds block size!",
1159df65e967SBruce Richardson 		        name);
1160df65e967SBruce Richardson 		return -1;
1161df65e967SBruce Richardson 	}
1162df65e967SBruce Richardson 
1163df65e967SBruce Richardson 	blockcount = framecount / (blocksize / framesize);
1164df65e967SBruce Richardson 	if (!blockcount) {
11658c54a3c6SStephen Hemminger 		PMD_LOG(ERR,
11668c54a3c6SStephen Hemminger 			"%s: invalid AF_PACKET MMAP parameters", name);
1167df65e967SBruce Richardson 		return -1;
1168df65e967SBruce Richardson 	}
1169df65e967SBruce Richardson 
11708c54a3c6SStephen Hemminger 	PMD_LOG(INFO, "%s: AF_PACKET MMAP parameters:", name);
11718c54a3c6SStephen Hemminger 	PMD_LOG(INFO, "%s:\tblock size %d", name, blocksize);
11728c54a3c6SStephen Hemminger 	PMD_LOG(INFO, "%s:\tblock count %d", name, blockcount);
11738c54a3c6SStephen Hemminger 	PMD_LOG(INFO, "%s:\tframe size %d", name, framesize);
11748c54a3c6SStephen Hemminger 	PMD_LOG(INFO, "%s:\tframe count %d", name, framecount);
1175df65e967SBruce Richardson 
1176050fe6e9SJan Blunck 	if (rte_pmd_init_internals(dev, *sockfd, qpairs,
1177df65e967SBruce Richardson 				   blocksize, blockcount,
1178df65e967SBruce Richardson 				   framesize, framecount,
1179d97de5b5SChas Williams 				   qdisc_bypass,
1180d3bc77abSTudor Cornea 				   fanout_mode,
1181050fe6e9SJan Blunck 				   &internals, &eth_dev,
1182df65e967SBruce Richardson 				   kvlist) < 0)
1183df65e967SBruce Richardson 		return -1;
1184df65e967SBruce Richardson 
1185df65e967SBruce Richardson 	eth_dev->rx_pkt_burst = eth_af_packet_rx;
1186df65e967SBruce Richardson 	eth_dev->tx_pkt_burst = eth_af_packet_tx;
1187df65e967SBruce Richardson 
1188fbe90cddSThomas Monjalon 	rte_eth_dev_probing_finish(eth_dev);
1189df65e967SBruce Richardson 	return 0;
1190df65e967SBruce Richardson }
1191df65e967SBruce Richardson 
1192e6ee4db0SWojciech Zmuda static int
11935d2aa461SJan Blunck rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
1194df65e967SBruce Richardson {
1195df65e967SBruce Richardson 	int ret = 0;
1196df65e967SBruce Richardson 	struct rte_kvargs *kvlist;
1197df65e967SBruce Richardson 	int sockfd = -1;
1198ee27edbeSJianfeng Tan 	struct rte_eth_dev *eth_dev;
1199ee27edbeSJianfeng Tan 	const char *name = rte_vdev_device_name(dev);
1200df65e967SBruce Richardson 
12018c54a3c6SStephen Hemminger 	PMD_LOG(INFO, "Initializing pmd_af_packet for %s", name);
1202ee27edbeSJianfeng Tan 
12034852aa8fSQi Zhang 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1204ee27edbeSJianfeng Tan 		eth_dev = rte_eth_dev_attach_secondary(name);
1205ee27edbeSJianfeng Tan 		if (!eth_dev) {
12068c54a3c6SStephen Hemminger 			PMD_LOG(ERR, "Failed to probe %s", name);
1207ee27edbeSJianfeng Tan 			return -1;
1208ee27edbeSJianfeng Tan 		}
1209ee27edbeSJianfeng Tan 		/* TODO: request info from primary to set up Rx and Tx */
1210ee27edbeSJianfeng Tan 		eth_dev->dev_ops = &ops;
1211d1c3ab22SFerruh Yigit 		eth_dev->device = &dev->device;
1212fbe90cddSThomas Monjalon 		rte_eth_dev_probing_finish(eth_dev);
1213ee27edbeSJianfeng Tan 		return 0;
1214ee27edbeSJianfeng Tan 	}
1215df65e967SBruce Richardson 
12165d2aa461SJan Blunck 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1217df65e967SBruce Richardson 	if (kvlist == NULL) {
1218df65e967SBruce Richardson 		ret = -1;
1219df65e967SBruce Richardson 		goto exit;
1220df65e967SBruce Richardson 	}
1221df65e967SBruce Richardson 
1222df65e967SBruce Richardson 	/*
1223df65e967SBruce Richardson 	 * If iface argument is passed we open the NICs and use them for
1224df65e967SBruce Richardson 	 * reading / writing
1225df65e967SBruce Richardson 	 */
1226df65e967SBruce Richardson 	if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) {
1227df65e967SBruce Richardson 
1228df65e967SBruce Richardson 		ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG,
1229df65e967SBruce Richardson 		                         &open_packet_iface, &sockfd);
1230df65e967SBruce Richardson 		if (ret < 0)
1231df65e967SBruce Richardson 			goto exit;
1232df65e967SBruce Richardson 	}
1233df65e967SBruce Richardson 
1234050fe6e9SJan Blunck 	if (dev->device.numa_node == SOCKET_ID_ANY)
1235050fe6e9SJan Blunck 		dev->device.numa_node = rte_socket_id();
1236050fe6e9SJan Blunck 
1237050fe6e9SJan Blunck 	ret = rte_eth_from_packet(dev, &sockfd, kvlist);
1238df65e967SBruce Richardson 	close(sockfd); /* no longer needed */
1239df65e967SBruce Richardson 
1240df65e967SBruce Richardson exit:
1241df65e967SBruce Richardson 	rte_kvargs_free(kvlist);
1242df65e967SBruce Richardson 	return ret;
1243df65e967SBruce Richardson }
1244df65e967SBruce Richardson 
1245e6ee4db0SWojciech Zmuda static int
12465d2aa461SJan Blunck rte_pmd_af_packet_remove(struct rte_vdev_device *dev)
1247e6ee4db0SWojciech Zmuda {
1248dc89abe5SThomas Monjalon 	struct rte_eth_dev *eth_dev;
1249e6ee4db0SWojciech Zmuda 
12505d2aa461SJan Blunck 	if (dev == NULL)
1251e6ee4db0SWojciech Zmuda 		return -1;
1252e6ee4db0SWojciech Zmuda 
1253e6ee4db0SWojciech Zmuda 	/* find the ethdev entry */
12545d2aa461SJan Blunck 	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1255e6ee4db0SWojciech Zmuda 	if (eth_dev == NULL)
1256dc89abe5SThomas Monjalon 		return 0; /* port already released */
1257e6ee4db0SWojciech Zmuda 
1258dc89abe5SThomas Monjalon 	eth_dev_close(eth_dev);
1259e6ee4db0SWojciech Zmuda 	rte_eth_dev_release_port(eth_dev);
1260e6ee4db0SWojciech Zmuda 
1261e6ee4db0SWojciech Zmuda 	return 0;
1262e6ee4db0SWojciech Zmuda }
1263e6ee4db0SWojciech Zmuda 
1264fe363dd4SJan Viktorin static struct rte_vdev_driver pmd_af_packet_drv = {
126550a3345fSShreyansh Jain 	.probe = rte_pmd_af_packet_probe,
126650a3345fSShreyansh Jain 	.remove = rte_pmd_af_packet_remove,
1267df65e967SBruce Richardson };
1268df65e967SBruce Richardson 
126901f19227SShreyansh Jain RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv);
12709fa80cb2SJan Blunck RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet);
127101f19227SShreyansh Jain RTE_PMD_REGISTER_PARAM_STRING(net_af_packet,
127265eca099SPablo de Lara 	"iface=<string> "
127365eca099SPablo de Lara 	"qpairs=<int> "
127465eca099SPablo de Lara 	"blocksz=<int> "
127565eca099SPablo de Lara 	"framesz=<int> "
1276d97de5b5SChas Williams 	"framecnt=<int> "
1277d97de5b5SChas Williams 	"qdisc_bypass=<0|1>");
1278