xref: /dpdk/drivers/net/sfc/sfc_ef100_rx.c (revision d38febb08d57fec29fed27a2d12a507fc6fcdfa1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9 
10 /* EF100 native datapath implementation */
11 
12 #include <stdbool.h>
13 
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18 
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22 
23 #include "sfc_debug.h"
24 #include "sfc_tweak.h"
25 #include "sfc_dp_rx.h"
26 #include "sfc_kvargs.h"
27 #include "sfc_ef100.h"
28 
29 
30 #define sfc_ef100_rx_err(_rxq, ...) \
31 	SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
32 
33 #define sfc_ef100_rx_debug(_rxq, ...) \
34 	SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
35 		   __VA_ARGS__)
36 
37 /**
38  * Maximum number of descriptors/buffers in the Rx ring.
39  * It should guarantee that corresponding event queue never overfill.
40  * EF10 native datapath uses event queue of the same size as Rx queue.
41  * Maximum number of events on datapath can be estimated as number of
42  * Rx queue entries (one event per Rx buffer in the worst case) plus
43  * Rx error and flush events.
44  */
45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
46 	((_ndesc) - 1 /* head must not step on tail */ - \
47 	 1 /* Rx error */ - 1 /* flush */)
48 
49 /** Invalid user mark value when the mark should be treated as unset */
50 #define SFC_EF100_USER_MARK_INVALID	0
51 
52 struct sfc_ef100_rx_sw_desc {
53 	struct rte_mbuf			*mbuf;
54 };
55 
56 struct sfc_ef100_rxq {
57 	/* Used on data path */
58 	unsigned int			flags;
59 #define SFC_EF100_RXQ_STARTED		0x1
60 #define SFC_EF100_RXQ_NOT_RUNNING	0x2
61 #define SFC_EF100_RXQ_EXCEPTION		0x4
62 #define SFC_EF100_RXQ_RSS_HASH		0x10
63 #define SFC_EF100_RXQ_USER_MARK		0x20
64 #define SFC_EF100_RXQ_FLAG_INTR_EN	0x40
65 	unsigned int			ptr_mask;
66 	unsigned int			evq_phase_bit_shift;
67 	unsigned int			ready_pkts;
68 	unsigned int			completed;
69 	unsigned int			evq_read_ptr;
70 	unsigned int			evq_read_ptr_primed;
71 	volatile efx_qword_t		*evq_hw_ring;
72 	struct sfc_ef100_rx_sw_desc	*sw_ring;
73 	uint64_t			rearm_data;
74 	uint16_t			buf_size;
75 	uint16_t			prefix_size;
76 
77 	unsigned int			evq_hw_index;
78 	volatile void			*evq_prime;
79 
80 	/* Used on refill */
81 	unsigned int			added;
82 	unsigned int			max_fill_level;
83 	unsigned int			refill_threshold;
84 	struct rte_mempool		*refill_mb_pool;
85 	efx_qword_t			*rxq_hw_ring;
86 	volatile void			*doorbell;
87 
88 	/* Datapath receive queue anchor */
89 	struct sfc_dp_rxq		dp;
90 };
91 
92 static inline struct sfc_ef100_rxq *
93 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
94 {
95 	return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
96 }
97 
98 static void
99 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq)
100 {
101 	sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index,
102 			    rxq->evq_read_ptr & rxq->ptr_mask);
103 	rxq->evq_read_ptr_primed = rxq->evq_read_ptr;
104 }
105 
106 static inline void
107 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
108 {
109 	efx_dword_t dword;
110 
111 	EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
112 
113 	/* DMA sync to device is not required */
114 
115 	/*
116 	 * rte_write32() has rte_io_wmb() which guarantees that the STORE
117 	 * operations (i.e. Rx and event descriptor updates) that precede
118 	 * the rte_io_wmb() call are visible to NIC before the STORE
119 	 * operations that follow it (i.e. doorbell write).
120 	 */
121 	rte_write32(dword.ed_u32[0], rxq->doorbell);
122 	rxq->dp.dpq.rx_dbells++;
123 
124 	sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
125 			   EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
126 			   added);
127 }
128 
129 static void
130 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
131 {
132 	const unsigned int ptr_mask = rxq->ptr_mask;
133 	unsigned int free_space;
134 	unsigned int bulks;
135 	void *objs[SFC_RX_REFILL_BULK];
136 	unsigned int added = rxq->added;
137 
138 	free_space = rxq->max_fill_level - (added - rxq->completed);
139 
140 	if (free_space < rxq->refill_threshold)
141 		return;
142 
143 	bulks = free_space / RTE_DIM(objs);
144 	/* refill_threshold guarantees that bulks is positive */
145 	SFC_ASSERT(bulks > 0);
146 
147 	do {
148 		unsigned int id;
149 		unsigned int i;
150 
151 		if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
152 						  RTE_DIM(objs)) < 0)) {
153 			struct rte_eth_dev_data *dev_data =
154 				rte_eth_devices[rxq->dp.dpq.port_id].data;
155 
156 			/*
157 			 * It is hardly a safe way to increment counter
158 			 * from different contexts, but all PMDs do it.
159 			 */
160 			dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
161 			/* Return if we have posted nothing yet */
162 			if (added == rxq->added)
163 				return;
164 			/* Push posted */
165 			break;
166 		}
167 
168 		for (i = 0, id = added & ptr_mask;
169 		     i < RTE_DIM(objs);
170 		     ++i, ++id) {
171 			struct rte_mbuf *m = objs[i];
172 			struct sfc_ef100_rx_sw_desc *rxd;
173 			rte_iova_t phys_addr;
174 
175 			__rte_mbuf_raw_sanity_check(m);
176 
177 			SFC_ASSERT((id & ~ptr_mask) == 0);
178 			rxd = &rxq->sw_ring[id];
179 			rxd->mbuf = m;
180 
181 			/*
182 			 * Avoid writing to mbuf. It is cheaper to do it
183 			 * when we receive packet and fill in nearby
184 			 * structure members.
185 			 */
186 
187 			phys_addr = rte_mbuf_data_iova_default(m);
188 			EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
189 			    ESF_GZ_RX_BUF_ADDR, phys_addr);
190 		}
191 
192 		added += RTE_DIM(objs);
193 	} while (--bulks > 0);
194 
195 	SFC_ASSERT(rxq->added != added);
196 	rxq->added = added;
197 	sfc_ef100_rx_qpush(rxq, added);
198 }
199 
200 static inline uint64_t
201 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
202 {
203 	return EFX_WORD_FIELD(class,
204 			      ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
205 		ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
206 		PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
207 }
208 
209 static inline uint64_t
210 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
211 {
212 	return EFX_WORD_FIELD(class,
213 			      ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
214 		ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
215 		PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD;
216 }
217 
218 static uint32_t
219 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
220 {
221 	uint32_t ptype;
222 	bool no_tunnel = false;
223 
224 	if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
225 		     ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
226 		return 0;
227 
228 	switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
229 	case 0:
230 		ptype = RTE_PTYPE_L2_ETHER;
231 		break;
232 	case 1:
233 		ptype = RTE_PTYPE_L2_ETHER_VLAN;
234 		break;
235 	default:
236 		ptype = RTE_PTYPE_L2_ETHER_QINQ;
237 		break;
238 	}
239 
240 	switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
241 	case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
242 		no_tunnel = true;
243 		break;
244 	case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
245 		ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
246 		*ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
247 		break;
248 	case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
249 		ptype |= RTE_PTYPE_TUNNEL_NVGRE;
250 		break;
251 	case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
252 		ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
253 		*ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
254 		break;
255 	default:
256 		/*
257 		 * Driver does not know the tunnel, but it is
258 		 * still a tunnel and NT_OR_INNER refer to inner
259 		 * frame.
260 		 */
261 		no_tunnel = false;
262 	}
263 
264 	if (no_tunnel) {
265 		bool l4_valid = true;
266 
267 		switch (EFX_WORD_FIELD(class,
268 			ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
269 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
270 			ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
271 			*ol_flags |= PKT_RX_IP_CKSUM_GOOD;
272 			break;
273 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
274 			ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
275 			*ol_flags |= PKT_RX_IP_CKSUM_BAD;
276 			break;
277 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
278 			ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
279 			break;
280 		default:
281 			l4_valid = false;
282 		}
283 
284 		if (l4_valid) {
285 			switch (EFX_WORD_FIELD(class,
286 				ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
287 			case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
288 				ptype |= RTE_PTYPE_L4_TCP;
289 				*ol_flags |=
290 					sfc_ef100_rx_nt_or_inner_l4_csum(class);
291 				break;
292 			case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
293 				ptype |= RTE_PTYPE_L4_UDP;
294 				*ol_flags |=
295 					sfc_ef100_rx_nt_or_inner_l4_csum(class);
296 				break;
297 			case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
298 				ptype |= RTE_PTYPE_L4_FRAG;
299 				break;
300 			}
301 		}
302 	} else {
303 		bool l4_valid = true;
304 
305 		switch (EFX_WORD_FIELD(class,
306 			ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
307 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
308 			ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
309 			break;
310 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
311 			ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
312 			*ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
313 			break;
314 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
315 			ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
316 			break;
317 		}
318 
319 		switch (EFX_WORD_FIELD(class,
320 			ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
321 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
322 			ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
323 			*ol_flags |= PKT_RX_IP_CKSUM_GOOD;
324 			break;
325 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
326 			ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
327 			*ol_flags |= PKT_RX_IP_CKSUM_BAD;
328 			break;
329 		case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
330 			ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
331 			break;
332 		default:
333 			l4_valid = false;
334 			break;
335 		}
336 
337 		if (l4_valid) {
338 			switch (EFX_WORD_FIELD(class,
339 				ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
340 			case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
341 				ptype |= RTE_PTYPE_INNER_L4_TCP;
342 				*ol_flags |=
343 					sfc_ef100_rx_nt_or_inner_l4_csum(class);
344 				break;
345 			case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
346 				ptype |= RTE_PTYPE_INNER_L4_UDP;
347 				*ol_flags |=
348 					sfc_ef100_rx_nt_or_inner_l4_csum(class);
349 				break;
350 			case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
351 				ptype |= RTE_PTYPE_INNER_L4_FRAG;
352 				break;
353 			}
354 		}
355 	}
356 
357 	return ptype;
358 }
359 
360 /*
361  * Below function relies on the following fields in Rx prefix.
362  * Some fields are mandatory, some fields are optional.
363  * See sfc_ef100_rx_qstart() below.
364  */
365 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
366 	.erpl_fields	= {
367 #define	SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
368 	EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
369 
370 		SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
371 		SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
372 		SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
373 		SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
374 		SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
375 
376 #undef	SFC_EF100_RX_PREFIX_FIELD
377 	}
378 };
379 
380 static bool
381 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
382 				const efx_oword_t *rx_prefix,
383 				struct rte_mbuf *m)
384 {
385 	const efx_word_t *class;
386 	uint64_t ol_flags = 0;
387 
388 	RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
389 	RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
390 	RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
391 			 sizeof(*class));
392 	class = (const efx_word_t *)((const uint8_t *)rx_prefix +
393 		EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
394 	if (unlikely(EFX_WORD_FIELD(*class,
395 				    ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
396 		     ESE_GZ_RH_HCLASS_L2_STATUS_OK))
397 		return false;
398 
399 	m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
400 
401 	if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
402 	    EFX_TEST_OWORD_BIT(rx_prefix[0],
403 			       ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
404 		ol_flags |= PKT_RX_RSS_HASH;
405 		/* EFX_OWORD_FIELD converts little-endian to CPU */
406 		m->hash.rss = EFX_OWORD_FIELD(rx_prefix[0],
407 					      ESF_GZ_RX_PREFIX_RSS_HASH);
408 	}
409 
410 	if (rxq->flags & SFC_EF100_RXQ_USER_MARK) {
411 		uint32_t user_mark;
412 
413 		/* EFX_OWORD_FIELD converts little-endian to CPU */
414 		user_mark = EFX_OWORD_FIELD(rx_prefix[0],
415 					    ESF_GZ_RX_PREFIX_USER_MARK);
416 		if (user_mark != SFC_EF100_USER_MARK_INVALID) {
417 			ol_flags |= PKT_RX_FDIR_ID;
418 			m->hash.fdir.hi = user_mark;
419 		}
420 	}
421 
422 	m->ol_flags = ol_flags;
423 	return true;
424 }
425 
426 static const uint8_t *
427 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
428 {
429 	return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
430 }
431 
432 static struct rte_mbuf *
433 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
434 {
435 	struct rte_mbuf *m;
436 	unsigned int id;
437 
438 	/* mbuf associated with current Rx descriptor */
439 	m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
440 
441 	/* completed is already moved to the next one */
442 	if (unlikely(rxq->completed == rxq->added))
443 		goto done;
444 
445 	/*
446 	 * Prefetch Rx prefix of the next packet.
447 	 * Current packet is scattered and the next mbuf is its fragment
448 	 * it simply prefetches some data - no harm since packet rate
449 	 * should not be high if scatter is used.
450 	 */
451 	id = rxq->completed & rxq->ptr_mask;
452 	rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
453 
454 	if (unlikely(rxq->completed + 1 == rxq->added))
455 		goto done;
456 
457 	/*
458 	 * Prefetch mbuf control structure of the next after next Rx
459 	 * descriptor.
460 	 */
461 	id = (id == rxq->ptr_mask) ? 0 : (id + 1);
462 	rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
463 
464 	/*
465 	 * If the next time we'll need SW Rx descriptor from the next
466 	 * cache line, try to make sure that we have it in cache.
467 	 */
468 	if ((id & 0x7) == 0x7)
469 		rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
470 
471 done:
472 	return m;
473 }
474 
475 static struct rte_mbuf **
476 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
477 				struct rte_mbuf **rx_pkts,
478 				struct rte_mbuf ** const rx_pkts_end)
479 {
480 	while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
481 		struct rte_mbuf *pkt;
482 		struct rte_mbuf *lastseg;
483 		const efx_oword_t *rx_prefix;
484 		uint16_t pkt_len;
485 		uint16_t seg_len;
486 		bool deliver;
487 
488 		rxq->ready_pkts--;
489 
490 		pkt = sfc_ef100_rx_next_mbuf(rxq);
491 		__rte_mbuf_raw_sanity_check(pkt);
492 
493 		RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
494 				 sizeof(rxq->rearm_data));
495 		pkt->rearm_data[0] = rxq->rearm_data;
496 
497 		/* data_off already moved past Rx prefix */
498 		rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt);
499 
500 		pkt_len = EFX_OWORD_FIELD(rx_prefix[0],
501 					  ESF_GZ_RX_PREFIX_LENGTH);
502 		SFC_ASSERT(pkt_len > 0);
503 		rte_pktmbuf_pkt_len(pkt) = pkt_len;
504 
505 		seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
506 		rte_pktmbuf_data_len(pkt) = seg_len;
507 
508 		deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
509 
510 		lastseg = pkt;
511 		while ((pkt_len -= seg_len) > 0) {
512 			struct rte_mbuf *seg;
513 
514 			seg = sfc_ef100_rx_next_mbuf(rxq);
515 			__rte_mbuf_raw_sanity_check(seg);
516 
517 			seg->data_off = RTE_PKTMBUF_HEADROOM;
518 
519 			seg_len = RTE_MIN(pkt_len, rxq->buf_size);
520 			rte_pktmbuf_data_len(seg) = seg_len;
521 			rte_pktmbuf_pkt_len(seg) = seg_len;
522 
523 			pkt->nb_segs++;
524 			lastseg->next = seg;
525 			lastseg = seg;
526 		}
527 
528 		if (likely(deliver))
529 			*rx_pkts++ = pkt;
530 		else
531 			rte_pktmbuf_free(pkt);
532 	}
533 
534 	return rx_pkts;
535 }
536 
537 static bool
538 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
539 {
540 	*ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
541 
542 	if (!sfc_ef100_ev_present(ev,
543 			(rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
544 		return false;
545 
546 	if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
547 		/*
548 		 * Do not move read_ptr to keep the event for exception
549 		 * handling by the control path.
550 		 */
551 		rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
552 		sfc_ef100_rx_err(rxq,
553 			"RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
554 			rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
555 			EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
556 			EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
557 		return false;
558 	}
559 
560 	sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
561 			   EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
562 			   EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
563 			   rxq->evq_read_ptr,
564 			   rxq->evq_read_ptr & rxq->ptr_mask);
565 
566 	rxq->evq_read_ptr++;
567 	return true;
568 }
569 
570 static uint16_t
571 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
572 {
573 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
574 	struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
575 	efx_qword_t rx_ev;
576 
577 	rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
578 
579 	if (unlikely(rxq->flags &
580 		     (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
581 		goto done;
582 
583 	while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
584 		rxq->ready_pkts =
585 			EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
586 		rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
587 							  rx_pkts_end);
588 	}
589 
590 	/* It is not a problem if we refill in the case of exception */
591 	sfc_ef100_rx_qrefill(rxq);
592 
593 	if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) &&
594 	    rxq->evq_read_ptr_primed != rxq->evq_read_ptr)
595 		sfc_ef100_rx_qprime(rxq);
596 
597 done:
598 	return nb_pkts - (rx_pkts_end - rx_pkts);
599 }
600 
601 static const uint32_t *
602 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
603 {
604 	static const uint32_t ef100_native_ptypes[] = {
605 		RTE_PTYPE_L2_ETHER,
606 		RTE_PTYPE_L2_ETHER_VLAN,
607 		RTE_PTYPE_L2_ETHER_QINQ,
608 		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
609 		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
610 		RTE_PTYPE_L4_TCP,
611 		RTE_PTYPE_L4_UDP,
612 		RTE_PTYPE_L4_FRAG,
613 		RTE_PTYPE_TUNNEL_VXLAN,
614 		RTE_PTYPE_TUNNEL_NVGRE,
615 		RTE_PTYPE_TUNNEL_GENEVE,
616 		RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
617 		RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
618 		RTE_PTYPE_INNER_L4_TCP,
619 		RTE_PTYPE_INNER_L4_UDP,
620 		RTE_PTYPE_INNER_L4_FRAG,
621 		RTE_PTYPE_UNKNOWN
622 	};
623 
624 	return ef100_native_ptypes;
625 }
626 
627 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
628 static unsigned int
629 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
630 {
631 	return 0;
632 }
633 
634 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
635 static int
636 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
637 			  __rte_unused uint16_t offset)
638 {
639 	return -ENOTSUP;
640 }
641 
642 
643 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
644 static void
645 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
646 {
647 	/*
648 	 * Number of descriptors just defines maximum number of pushed
649 	 * descriptors (fill level).
650 	 */
651 	dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
652 	dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
653 }
654 
655 
656 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
657 static int
658 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
659 			   struct sfc_dp_rx_hw_limits *limits,
660 			   __rte_unused struct rte_mempool *mb_pool,
661 			   unsigned int *rxq_entries,
662 			   unsigned int *evq_entries,
663 			   unsigned int *rxq_max_fill_level)
664 {
665 	/*
666 	 * rte_ethdev API guarantees that the number meets min, max and
667 	 * alignment requirements.
668 	 */
669 	if (nb_rx_desc <= limits->rxq_min_entries)
670 		*rxq_entries = limits->rxq_min_entries;
671 	else
672 		*rxq_entries = rte_align32pow2(nb_rx_desc);
673 
674 	*evq_entries = *rxq_entries;
675 
676 	*rxq_max_fill_level = RTE_MIN(nb_rx_desc,
677 				      SFC_EF100_RXQ_LIMIT(*evq_entries));
678 	return 0;
679 }
680 
681 
682 static uint64_t
683 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
684 {
685 	struct rte_mbuf m;
686 
687 	memset(&m, 0, sizeof(m));
688 
689 	rte_mbuf_refcnt_set(&m, 1);
690 	m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
691 	m.nb_segs = 1;
692 	m.port = port_id;
693 
694 	/* rearm_data covers structure members filled in above */
695 	rte_compiler_barrier();
696 	RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
697 	return m.rearm_data[0];
698 }
699 
700 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
701 static int
702 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
703 		    const struct rte_pci_addr *pci_addr, int socket_id,
704 		    const struct sfc_dp_rx_qcreate_info *info,
705 		    struct sfc_dp_rxq **dp_rxqp)
706 {
707 	struct sfc_ef100_rxq *rxq;
708 	int rc;
709 
710 	rc = EINVAL;
711 	if (info->rxq_entries != info->evq_entries)
712 		goto fail_rxq_args;
713 
714 	rc = ENOMEM;
715 	rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
716 				 RTE_CACHE_LINE_SIZE, socket_id);
717 	if (rxq == NULL)
718 		goto fail_rxq_alloc;
719 
720 	sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
721 
722 	rc = ENOMEM;
723 	rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
724 					 info->rxq_entries,
725 					 sizeof(*rxq->sw_ring),
726 					 RTE_CACHE_LINE_SIZE, socket_id);
727 	if (rxq->sw_ring == NULL)
728 		goto fail_desc_alloc;
729 
730 	rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
731 	rxq->ptr_mask = info->rxq_entries - 1;
732 	rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
733 	rxq->evq_hw_ring = info->evq_hw_ring;
734 	rxq->max_fill_level = info->max_fill_level;
735 	rxq->refill_threshold = info->refill_threshold;
736 	rxq->prefix_size = info->prefix_size;
737 	rxq->buf_size = info->buf_size;
738 	rxq->refill_mb_pool = info->refill_mb_pool;
739 	rxq->rxq_hw_ring = info->rxq_hw_ring;
740 	rxq->doorbell = (volatile uint8_t *)info->mem_bar +
741 			ER_GZ_RX_RING_DOORBELL_OFST +
742 			(info->hw_index << info->vi_window_shift);
743 
744 	rxq->evq_hw_index = info->evq_hw_index;
745 	rxq->evq_prime = (volatile uint8_t *)info->mem_bar +
746 			 info->fcw_offset +
747 			 ER_GZ_EVQ_INT_PRIME_OFST;
748 
749 	sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
750 
751 	*dp_rxqp = &rxq->dp;
752 	return 0;
753 
754 fail_desc_alloc:
755 	rte_free(rxq);
756 
757 fail_rxq_alloc:
758 fail_rxq_args:
759 	return rc;
760 }
761 
762 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
763 static void
764 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
765 {
766 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
767 
768 	rte_free(rxq->sw_ring);
769 	rte_free(rxq);
770 }
771 
772 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
773 static int
774 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
775 		    const efx_rx_prefix_layout_t *pinfo)
776 {
777 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
778 	uint32_t unsup_rx_prefix_fields;
779 
780 	SFC_ASSERT(rxq->completed == 0);
781 	SFC_ASSERT(rxq->added == 0);
782 
783 	/* Prefix must fit into reserved Rx buffer space */
784 	if (pinfo->erpl_length > rxq->prefix_size)
785 		return ENOTSUP;
786 
787 	unsup_rx_prefix_fields =
788 		efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
789 
790 	/* LENGTH and CLASS filds must always be present */
791 	if ((unsup_rx_prefix_fields &
792 	     ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
793 	      (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
794 		return ENOTSUP;
795 
796 	if ((unsup_rx_prefix_fields &
797 	     ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
798 	      (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
799 		rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
800 	else
801 		rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
802 
803 	if ((unsup_rx_prefix_fields &
804 	     (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0)
805 		rxq->flags |= SFC_EF100_RXQ_USER_MARK;
806 	else
807 		rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
808 
809 	rxq->prefix_size = pinfo->erpl_length;
810 	rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
811 						       rxq->prefix_size);
812 
813 	sfc_ef100_rx_qrefill(rxq);
814 
815 	rxq->evq_read_ptr = evq_read_ptr;
816 
817 	rxq->flags |= SFC_EF100_RXQ_STARTED;
818 	rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
819 
820 	if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN)
821 		sfc_ef100_rx_qprime(rxq);
822 
823 	return 0;
824 }
825 
826 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
827 static void
828 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
829 {
830 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
831 
832 	rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
833 
834 	*evq_read_ptr = rxq->evq_read_ptr;
835 }
836 
837 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
838 static bool
839 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
840 {
841 	__rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
842 
843 	SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
844 
845 	/*
846 	 * It is safe to ignore Rx event since we free all mbufs on
847 	 * queue purge anyway.
848 	 */
849 
850 	return false;
851 }
852 
853 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
854 static void
855 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
856 {
857 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
858 	unsigned int i;
859 	struct sfc_ef100_rx_sw_desc *rxd;
860 
861 	for (i = rxq->completed; i != rxq->added; ++i) {
862 		rxd = &rxq->sw_ring[i & rxq->ptr_mask];
863 		rte_mbuf_raw_free(rxd->mbuf);
864 		rxd->mbuf = NULL;
865 	}
866 
867 	rxq->completed = rxq->added = 0;
868 	rxq->ready_pkts = 0;
869 
870 	rxq->flags &= ~SFC_EF100_RXQ_STARTED;
871 }
872 
873 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable;
874 static int
875 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq)
876 {
877 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
878 
879 	rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN;
880 	if (rxq->flags & SFC_EF100_RXQ_STARTED)
881 		sfc_ef100_rx_qprime(rxq);
882 	return 0;
883 }
884 
885 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable;
886 static int
887 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq)
888 {
889 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
890 
891 	/* Cannot disarm, just disable rearm */
892 	rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN;
893 	return 0;
894 }
895 
896 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed;
897 static unsigned int
898 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq)
899 {
900 	struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
901 
902 	/*
903 	 * The datapath keeps track only of added descriptors, since
904 	 * the number of pushed descriptors always equals the number
905 	 * of added descriptors due to enforced alignment.
906 	 */
907 	return rxq->added;
908 }
909 
910 struct sfc_dp_rx sfc_ef100_rx = {
911 	.dp = {
912 		.name		= SFC_KVARG_DATAPATH_EF100,
913 		.type		= SFC_DP_RX,
914 		.hw_fw_caps	= SFC_DP_HW_FW_CAP_EF100,
915 	},
916 	.features		= SFC_DP_RX_FEAT_MULTI_PROCESS |
917 				  SFC_DP_RX_FEAT_INTR,
918 	.dev_offload_capa	= 0,
919 	.queue_offload_capa	= DEV_RX_OFFLOAD_CHECKSUM |
920 				  DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
921 				  DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
922 				  DEV_RX_OFFLOAD_SCATTER |
923 				  DEV_RX_OFFLOAD_RSS_HASH,
924 	.get_dev_info		= sfc_ef100_rx_get_dev_info,
925 	.qsize_up_rings		= sfc_ef100_rx_qsize_up_rings,
926 	.qcreate		= sfc_ef100_rx_qcreate,
927 	.qdestroy		= sfc_ef100_rx_qdestroy,
928 	.qstart			= sfc_ef100_rx_qstart,
929 	.qstop			= sfc_ef100_rx_qstop,
930 	.qrx_ev			= sfc_ef100_rx_qrx_ev,
931 	.qpurge			= sfc_ef100_rx_qpurge,
932 	.supported_ptypes_get	= sfc_ef100_supported_ptypes_get,
933 	.qdesc_npending		= sfc_ef100_rx_qdesc_npending,
934 	.qdesc_status		= sfc_ef100_rx_qdesc_status,
935 	.intr_enable		= sfc_ef100_rx_intr_enable,
936 	.intr_disable		= sfc_ef100_rx_intr_disable,
937 	.get_pushed		= sfc_ef100_rx_get_pushed,
938 	.pkt_burst		= sfc_ef100_recv_pkts,
939 };
940