xref: /dpdk/drivers/net/sfc/sfc_rx.c (revision 89f0711f9ddfb5822da9d34f384b92f72a61c4dc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright (c) 2016-2018 Solarflare Communications Inc.
4  * All rights reserved.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9 
10 #include <rte_mempool.h>
11 
12 #include "efx.h"
13 
14 #include "sfc.h"
15 #include "sfc_debug.h"
16 #include "sfc_log.h"
17 #include "sfc_ev.h"
18 #include "sfc_rx.h"
19 #include "sfc_kvargs.h"
20 #include "sfc_tweak.h"
21 
22 /*
23  * Maximum number of Rx queue flush attempt in the case of failure or
24  * flush timeout
25  */
26 #define SFC_RX_QFLUSH_ATTEMPTS		(3)
27 
28 /*
29  * Time to wait between event queue polling attempts when waiting for Rx
30  * queue flush done or failed events.
31  */
32 #define SFC_RX_QFLUSH_POLL_WAIT_MS	(1)
33 
34 /*
35  * Maximum number of event queue polling attempts when waiting for Rx queue
36  * flush done or failed events. It defines Rx queue flush attempt timeout
37  * together with SFC_RX_QFLUSH_POLL_WAIT_MS.
38  */
39 #define SFC_RX_QFLUSH_POLL_ATTEMPTS	(2000)
40 
41 void
42 sfc_rx_qflush_done(struct sfc_rxq *rxq)
43 {
44 	rxq->state |= SFC_RXQ_FLUSHED;
45 	rxq->state &= ~SFC_RXQ_FLUSHING;
46 }
47 
48 void
49 sfc_rx_qflush_failed(struct sfc_rxq *rxq)
50 {
51 	rxq->state |= SFC_RXQ_FLUSH_FAILED;
52 	rxq->state &= ~SFC_RXQ_FLUSHING;
53 }
54 
55 static void
56 sfc_efx_rx_qrefill(struct sfc_efx_rxq *rxq)
57 {
58 	unsigned int free_space;
59 	unsigned int bulks;
60 	void *objs[SFC_RX_REFILL_BULK];
61 	efsys_dma_addr_t addr[RTE_DIM(objs)];
62 	unsigned int added = rxq->added;
63 	unsigned int id;
64 	unsigned int i;
65 	struct sfc_efx_rx_sw_desc *rxd;
66 	struct rte_mbuf *m;
67 	uint16_t port_id = rxq->dp.dpq.port_id;
68 
69 	free_space = rxq->max_fill_level - (added - rxq->completed);
70 
71 	if (free_space < rxq->refill_threshold)
72 		return;
73 
74 	bulks = free_space / RTE_DIM(objs);
75 	/* refill_threshold guarantees that bulks is positive */
76 	SFC_ASSERT(bulks > 0);
77 
78 	id = added & rxq->ptr_mask;
79 	do {
80 		if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
81 						  RTE_DIM(objs)) < 0)) {
82 			/*
83 			 * It is hardly a safe way to increment counter
84 			 * from different contexts, but all PMDs do it.
85 			 */
86 			rxq->evq->sa->eth_dev->data->rx_mbuf_alloc_failed +=
87 				RTE_DIM(objs);
88 			/* Return if we have posted nothing yet */
89 			if (added == rxq->added)
90 				return;
91 			/* Push posted */
92 			break;
93 		}
94 
95 		for (i = 0; i < RTE_DIM(objs);
96 		     ++i, id = (id + 1) & rxq->ptr_mask) {
97 			m = objs[i];
98 
99 			rxd = &rxq->sw_desc[id];
100 			rxd->mbuf = m;
101 
102 			SFC_ASSERT(rte_mbuf_refcnt_read(m) == 1);
103 			m->data_off = RTE_PKTMBUF_HEADROOM;
104 			SFC_ASSERT(m->next == NULL);
105 			SFC_ASSERT(m->nb_segs == 1);
106 			m->port = port_id;
107 
108 			addr[i] = rte_pktmbuf_iova(m);
109 		}
110 
111 		efx_rx_qpost(rxq->common, addr, rxq->buf_size,
112 			     RTE_DIM(objs), rxq->completed, added);
113 		added += RTE_DIM(objs);
114 	} while (--bulks > 0);
115 
116 	SFC_ASSERT(added != rxq->added);
117 	rxq->added = added;
118 	efx_rx_qpush(rxq->common, added, &rxq->pushed);
119 }
120 
121 static uint64_t
122 sfc_efx_rx_desc_flags_to_offload_flags(const unsigned int desc_flags)
123 {
124 	uint64_t mbuf_flags = 0;
125 
126 	switch (desc_flags & (EFX_PKT_IPV4 | EFX_CKSUM_IPV4)) {
127 	case (EFX_PKT_IPV4 | EFX_CKSUM_IPV4):
128 		mbuf_flags |= PKT_RX_IP_CKSUM_GOOD;
129 		break;
130 	case EFX_PKT_IPV4:
131 		mbuf_flags |= PKT_RX_IP_CKSUM_BAD;
132 		break;
133 	default:
134 		RTE_BUILD_BUG_ON(PKT_RX_IP_CKSUM_UNKNOWN != 0);
135 		SFC_ASSERT((mbuf_flags & PKT_RX_IP_CKSUM_MASK) ==
136 			   PKT_RX_IP_CKSUM_UNKNOWN);
137 		break;
138 	}
139 
140 	switch ((desc_flags &
141 		 (EFX_PKT_TCP | EFX_PKT_UDP | EFX_CKSUM_TCPUDP))) {
142 	case (EFX_PKT_TCP | EFX_CKSUM_TCPUDP):
143 	case (EFX_PKT_UDP | EFX_CKSUM_TCPUDP):
144 		mbuf_flags |= PKT_RX_L4_CKSUM_GOOD;
145 		break;
146 	case EFX_PKT_TCP:
147 	case EFX_PKT_UDP:
148 		mbuf_flags |= PKT_RX_L4_CKSUM_BAD;
149 		break;
150 	default:
151 		RTE_BUILD_BUG_ON(PKT_RX_L4_CKSUM_UNKNOWN != 0);
152 		SFC_ASSERT((mbuf_flags & PKT_RX_L4_CKSUM_MASK) ==
153 			   PKT_RX_L4_CKSUM_UNKNOWN);
154 		break;
155 	}
156 
157 	return mbuf_flags;
158 }
159 
160 static uint32_t
161 sfc_efx_rx_desc_flags_to_packet_type(const unsigned int desc_flags)
162 {
163 	return RTE_PTYPE_L2_ETHER |
164 		((desc_flags & EFX_PKT_IPV4) ?
165 			RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 0) |
166 		((desc_flags & EFX_PKT_IPV6) ?
167 			RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 0) |
168 		((desc_flags & EFX_PKT_TCP) ? RTE_PTYPE_L4_TCP : 0) |
169 		((desc_flags & EFX_PKT_UDP) ? RTE_PTYPE_L4_UDP : 0);
170 }
171 
172 static const uint32_t *
173 sfc_efx_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
174 {
175 	static const uint32_t ptypes[] = {
176 		RTE_PTYPE_L2_ETHER,
177 		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
178 		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
179 		RTE_PTYPE_L4_TCP,
180 		RTE_PTYPE_L4_UDP,
181 		RTE_PTYPE_UNKNOWN
182 	};
183 
184 	return ptypes;
185 }
186 
187 #if EFSYS_OPT_RX_SCALE
188 static void
189 sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags,
190 			struct rte_mbuf *m)
191 {
192 	uint8_t *mbuf_data;
193 
194 
195 	if ((rxq->flags & SFC_EFX_RXQ_FLAG_RSS_HASH) == 0)
196 		return;
197 
198 	mbuf_data = rte_pktmbuf_mtod(m, uint8_t *);
199 
200 	if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) {
201 		m->hash.rss = efx_pseudo_hdr_hash_get(rxq->common,
202 						      EFX_RX_HASHALG_TOEPLITZ,
203 						      mbuf_data);
204 
205 		m->ol_flags |= PKT_RX_RSS_HASH;
206 	}
207 }
208 #else
209 static void
210 sfc_efx_rx_set_rss_hash(__rte_unused struct sfc_efx_rxq *rxq,
211 			__rte_unused unsigned int flags,
212 			__rte_unused struct rte_mbuf *m)
213 {
214 }
215 #endif
216 
217 static uint16_t
218 sfc_efx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
219 {
220 	struct sfc_dp_rxq *dp_rxq = rx_queue;
221 	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
222 	unsigned int completed;
223 	unsigned int prefix_size = rxq->prefix_size;
224 	unsigned int done_pkts = 0;
225 	boolean_t discard_next = B_FALSE;
226 	struct rte_mbuf *scatter_pkt = NULL;
227 
228 	if (unlikely((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0))
229 		return 0;
230 
231 	sfc_ev_qpoll(rxq->evq);
232 
233 	completed = rxq->completed;
234 	while (completed != rxq->pending && done_pkts < nb_pkts) {
235 		unsigned int id;
236 		struct sfc_efx_rx_sw_desc *rxd;
237 		struct rte_mbuf *m;
238 		unsigned int seg_len;
239 		unsigned int desc_flags;
240 
241 		id = completed++ & rxq->ptr_mask;
242 		rxd = &rxq->sw_desc[id];
243 		m = rxd->mbuf;
244 		desc_flags = rxd->flags;
245 
246 		if (discard_next)
247 			goto discard;
248 
249 		if (desc_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
250 			goto discard;
251 
252 		if (desc_flags & EFX_PKT_PREFIX_LEN) {
253 			uint16_t tmp_size;
254 			int rc __rte_unused;
255 
256 			rc = efx_pseudo_hdr_pkt_length_get(rxq->common,
257 				rte_pktmbuf_mtod(m, uint8_t *), &tmp_size);
258 			SFC_ASSERT(rc == 0);
259 			seg_len = tmp_size;
260 		} else {
261 			seg_len = rxd->size - prefix_size;
262 		}
263 
264 		rte_pktmbuf_data_len(m) = seg_len;
265 		rte_pktmbuf_pkt_len(m) = seg_len;
266 
267 		if (scatter_pkt != NULL) {
268 			if (rte_pktmbuf_chain(scatter_pkt, m) != 0) {
269 				rte_pktmbuf_free(scatter_pkt);
270 				goto discard;
271 			}
272 			/* The packet to deliver */
273 			m = scatter_pkt;
274 		}
275 
276 		if (desc_flags & EFX_PKT_CONT) {
277 			/* The packet is scattered, more fragments to come */
278 			scatter_pkt = m;
279 			/* Further fragments have no prefix */
280 			prefix_size = 0;
281 			continue;
282 		}
283 
284 		/* Scattered packet is done */
285 		scatter_pkt = NULL;
286 		/* The first fragment of the packet has prefix */
287 		prefix_size = rxq->prefix_size;
288 
289 		m->ol_flags =
290 			sfc_efx_rx_desc_flags_to_offload_flags(desc_flags);
291 		m->packet_type =
292 			sfc_efx_rx_desc_flags_to_packet_type(desc_flags);
293 
294 		/*
295 		 * Extract RSS hash from the packet prefix and
296 		 * set the corresponding field (if needed and possible)
297 		 */
298 		sfc_efx_rx_set_rss_hash(rxq, desc_flags, m);
299 
300 		m->data_off += prefix_size;
301 
302 		*rx_pkts++ = m;
303 		done_pkts++;
304 		continue;
305 
306 discard:
307 		discard_next = ((desc_flags & EFX_PKT_CONT) != 0);
308 		rte_mempool_put(rxq->refill_mb_pool, m);
309 		rxd->mbuf = NULL;
310 	}
311 
312 	/* pending is only moved when entire packet is received */
313 	SFC_ASSERT(scatter_pkt == NULL);
314 
315 	rxq->completed = completed;
316 
317 	sfc_efx_rx_qrefill(rxq);
318 
319 	return done_pkts;
320 }
321 
322 static sfc_dp_rx_qdesc_npending_t sfc_efx_rx_qdesc_npending;
323 static unsigned int
324 sfc_efx_rx_qdesc_npending(struct sfc_dp_rxq *dp_rxq)
325 {
326 	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
327 
328 	if ((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0)
329 		return 0;
330 
331 	sfc_ev_qpoll(rxq->evq);
332 
333 	return rxq->pending - rxq->completed;
334 }
335 
336 static sfc_dp_rx_qdesc_status_t sfc_efx_rx_qdesc_status;
337 static int
338 sfc_efx_rx_qdesc_status(struct sfc_dp_rxq *dp_rxq, uint16_t offset)
339 {
340 	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
341 
342 	if (unlikely(offset > rxq->ptr_mask))
343 		return -EINVAL;
344 
345 	/*
346 	 * Poll EvQ to derive up-to-date 'rxq->pending' figure;
347 	 * it is required for the queue to be running, but the
348 	 * check is omitted because API design assumes that it
349 	 * is the duty of the caller to satisfy all conditions
350 	 */
351 	SFC_ASSERT((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) ==
352 		   SFC_EFX_RXQ_FLAG_RUNNING);
353 	sfc_ev_qpoll(rxq->evq);
354 
355 	/*
356 	 * There is a handful of reserved entries in the ring,
357 	 * but an explicit check whether the offset points to
358 	 * a reserved entry is neglected since the two checks
359 	 * below rely on the figures which take the HW limits
360 	 * into account and thus if an entry is reserved, the
361 	 * checks will fail and UNAVAIL code will be returned
362 	 */
363 
364 	if (offset < (rxq->pending - rxq->completed))
365 		return RTE_ETH_RX_DESC_DONE;
366 
367 	if (offset < (rxq->added - rxq->completed))
368 		return RTE_ETH_RX_DESC_AVAIL;
369 
370 	return RTE_ETH_RX_DESC_UNAVAIL;
371 }
372 
373 struct sfc_rxq *
374 sfc_rxq_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq)
375 {
376 	const struct sfc_dp_queue *dpq = &dp_rxq->dpq;
377 	struct rte_eth_dev *eth_dev;
378 	struct sfc_adapter *sa;
379 	struct sfc_rxq *rxq;
380 
381 	SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id));
382 	eth_dev = &rte_eth_devices[dpq->port_id];
383 
384 	sa = eth_dev->data->dev_private;
385 
386 	SFC_ASSERT(dpq->queue_id < sa->rxq_count);
387 	rxq = sa->rxq_info[dpq->queue_id].rxq;
388 
389 	SFC_ASSERT(rxq != NULL);
390 	return rxq;
391 }
392 
393 static sfc_dp_rx_qsize_up_rings_t sfc_efx_rx_qsize_up_rings;
394 static int
395 sfc_efx_rx_qsize_up_rings(uint16_t nb_rx_desc,
396 			  unsigned int *rxq_entries,
397 			  unsigned int *evq_entries,
398 			  unsigned int *rxq_max_fill_level)
399 {
400 	*rxq_entries = nb_rx_desc;
401 	*evq_entries = nb_rx_desc;
402 	*rxq_max_fill_level = EFX_RXQ_LIMIT(*rxq_entries);
403 	return 0;
404 }
405 
406 static sfc_dp_rx_qcreate_t sfc_efx_rx_qcreate;
407 static int
408 sfc_efx_rx_qcreate(uint16_t port_id, uint16_t queue_id,
409 		   const struct rte_pci_addr *pci_addr, int socket_id,
410 		   const struct sfc_dp_rx_qcreate_info *info,
411 		   struct sfc_dp_rxq **dp_rxqp)
412 {
413 	struct sfc_efx_rxq *rxq;
414 	int rc;
415 
416 	rc = ENOMEM;
417 	rxq = rte_zmalloc_socket("sfc-efx-rxq", sizeof(*rxq),
418 				 RTE_CACHE_LINE_SIZE, socket_id);
419 	if (rxq == NULL)
420 		goto fail_rxq_alloc;
421 
422 	sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
423 
424 	rc = ENOMEM;
425 	rxq->sw_desc = rte_calloc_socket("sfc-efx-rxq-sw_desc",
426 					 info->rxq_entries,
427 					 sizeof(*rxq->sw_desc),
428 					 RTE_CACHE_LINE_SIZE, socket_id);
429 	if (rxq->sw_desc == NULL)
430 		goto fail_desc_alloc;
431 
432 	/* efx datapath is bound to efx control path */
433 	rxq->evq = sfc_rxq_by_dp_rxq(&rxq->dp)->evq;
434 	if (info->flags & SFC_RXQ_FLAG_RSS_HASH)
435 		rxq->flags |= SFC_EFX_RXQ_FLAG_RSS_HASH;
436 	rxq->ptr_mask = info->rxq_entries - 1;
437 	rxq->batch_max = info->batch_max;
438 	rxq->prefix_size = info->prefix_size;
439 	rxq->max_fill_level = info->max_fill_level;
440 	rxq->refill_threshold = info->refill_threshold;
441 	rxq->buf_size = info->buf_size;
442 	rxq->refill_mb_pool = info->refill_mb_pool;
443 
444 	*dp_rxqp = &rxq->dp;
445 	return 0;
446 
447 fail_desc_alloc:
448 	rte_free(rxq);
449 
450 fail_rxq_alloc:
451 	return rc;
452 }
453 
454 static sfc_dp_rx_qdestroy_t sfc_efx_rx_qdestroy;
455 static void
456 sfc_efx_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
457 {
458 	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
459 
460 	rte_free(rxq->sw_desc);
461 	rte_free(rxq);
462 }
463 
464 static sfc_dp_rx_qstart_t sfc_efx_rx_qstart;
465 static int
466 sfc_efx_rx_qstart(struct sfc_dp_rxq *dp_rxq,
467 		  __rte_unused unsigned int evq_read_ptr)
468 {
469 	/* libefx-based datapath is specific to libefx-based PMD */
470 	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
471 	struct sfc_rxq *crxq = sfc_rxq_by_dp_rxq(dp_rxq);
472 
473 	rxq->common = crxq->common;
474 
475 	rxq->pending = rxq->completed = rxq->added = rxq->pushed = 0;
476 
477 	sfc_efx_rx_qrefill(rxq);
478 
479 	rxq->flags |= (SFC_EFX_RXQ_FLAG_STARTED | SFC_EFX_RXQ_FLAG_RUNNING);
480 
481 	return 0;
482 }
483 
484 static sfc_dp_rx_qstop_t sfc_efx_rx_qstop;
485 static void
486 sfc_efx_rx_qstop(struct sfc_dp_rxq *dp_rxq,
487 		 __rte_unused unsigned int *evq_read_ptr)
488 {
489 	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
490 
491 	rxq->flags &= ~SFC_EFX_RXQ_FLAG_RUNNING;
492 
493 	/* libefx-based datapath is bound to libefx-based PMD and uses
494 	 * event queue structure directly. So, there is no necessity to
495 	 * return EvQ read pointer.
496 	 */
497 }
498 
499 static sfc_dp_rx_qpurge_t sfc_efx_rx_qpurge;
500 static void
501 sfc_efx_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
502 {
503 	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
504 	unsigned int i;
505 	struct sfc_efx_rx_sw_desc *rxd;
506 
507 	for (i = rxq->completed; i != rxq->added; ++i) {
508 		rxd = &rxq->sw_desc[i & rxq->ptr_mask];
509 		rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf);
510 		rxd->mbuf = NULL;
511 		/* Packed stream relies on 0 in inactive SW desc.
512 		 * Rx queue stop is not performance critical, so
513 		 * there is no harm to do it always.
514 		 */
515 		rxd->flags = 0;
516 		rxd->size = 0;
517 	}
518 
519 	rxq->flags &= ~SFC_EFX_RXQ_FLAG_STARTED;
520 }
521 
522 struct sfc_dp_rx sfc_efx_rx = {
523 	.dp = {
524 		.name		= SFC_KVARG_DATAPATH_EFX,
525 		.type		= SFC_DP_RX,
526 		.hw_fw_caps	= 0,
527 	},
528 	.features		= SFC_DP_RX_FEAT_SCATTER,
529 	.qsize_up_rings		= sfc_efx_rx_qsize_up_rings,
530 	.qcreate		= sfc_efx_rx_qcreate,
531 	.qdestroy		= sfc_efx_rx_qdestroy,
532 	.qstart			= sfc_efx_rx_qstart,
533 	.qstop			= sfc_efx_rx_qstop,
534 	.qpurge			= sfc_efx_rx_qpurge,
535 	.supported_ptypes_get	= sfc_efx_supported_ptypes_get,
536 	.qdesc_npending		= sfc_efx_rx_qdesc_npending,
537 	.qdesc_status		= sfc_efx_rx_qdesc_status,
538 	.pkt_burst		= sfc_efx_recv_pkts,
539 };
540 
541 unsigned int
542 sfc_rx_qdesc_npending(struct sfc_adapter *sa, unsigned int sw_index)
543 {
544 	struct sfc_rxq *rxq;
545 
546 	SFC_ASSERT(sw_index < sa->rxq_count);
547 	rxq = sa->rxq_info[sw_index].rxq;
548 
549 	if (rxq == NULL || (rxq->state & SFC_RXQ_STARTED) == 0)
550 		return 0;
551 
552 	return sa->dp_rx->qdesc_npending(rxq->dp);
553 }
554 
555 int
556 sfc_rx_qdesc_done(struct sfc_dp_rxq *dp_rxq, unsigned int offset)
557 {
558 	struct sfc_rxq *rxq = sfc_rxq_by_dp_rxq(dp_rxq);
559 
560 	return offset < rxq->evq->sa->dp_rx->qdesc_npending(dp_rxq);
561 }
562 
563 static void
564 sfc_rx_qflush(struct sfc_adapter *sa, unsigned int sw_index)
565 {
566 	struct sfc_rxq *rxq;
567 	unsigned int retry_count;
568 	unsigned int wait_count;
569 	int rc;
570 
571 	rxq = sa->rxq_info[sw_index].rxq;
572 	SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
573 
574 	/*
575 	 * Retry Rx queue flushing in the case of flush failed or
576 	 * timeout. In the worst case it can delay for 6 seconds.
577 	 */
578 	for (retry_count = 0;
579 	     ((rxq->state & SFC_RXQ_FLUSHED) == 0) &&
580 	     (retry_count < SFC_RX_QFLUSH_ATTEMPTS);
581 	     ++retry_count) {
582 		rc = efx_rx_qflush(rxq->common);
583 		if (rc != 0) {
584 			rxq->state |= (rc == EALREADY) ?
585 				SFC_RXQ_FLUSHED : SFC_RXQ_FLUSH_FAILED;
586 			break;
587 		}
588 		rxq->state &= ~SFC_RXQ_FLUSH_FAILED;
589 		rxq->state |= SFC_RXQ_FLUSHING;
590 
591 		/*
592 		 * Wait for Rx queue flush done or failed event at least
593 		 * SFC_RX_QFLUSH_POLL_WAIT_MS milliseconds and not more
594 		 * than 2 seconds (SFC_RX_QFLUSH_POLL_WAIT_MS multiplied
595 		 * by SFC_RX_QFLUSH_POLL_ATTEMPTS).
596 		 */
597 		wait_count = 0;
598 		do {
599 			rte_delay_ms(SFC_RX_QFLUSH_POLL_WAIT_MS);
600 			sfc_ev_qpoll(rxq->evq);
601 		} while ((rxq->state & SFC_RXQ_FLUSHING) &&
602 			 (wait_count++ < SFC_RX_QFLUSH_POLL_ATTEMPTS));
603 
604 		if (rxq->state & SFC_RXQ_FLUSHING)
605 			sfc_err(sa, "RxQ %u flush timed out", sw_index);
606 
607 		if (rxq->state & SFC_RXQ_FLUSH_FAILED)
608 			sfc_err(sa, "RxQ %u flush failed", sw_index);
609 
610 		if (rxq->state & SFC_RXQ_FLUSHED)
611 			sfc_info(sa, "RxQ %u flushed", sw_index);
612 	}
613 
614 	sa->dp_rx->qpurge(rxq->dp);
615 }
616 
617 static int
618 sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq)
619 {
620 	boolean_t rss = (sa->rss_channels > 0) ? B_TRUE : B_FALSE;
621 	struct sfc_port *port = &sa->port;
622 	int rc;
623 
624 	/*
625 	 * If promiscuous or all-multicast mode has been requested, setting
626 	 * filter for the default Rx queue might fail, in particular, while
627 	 * running over PCI function which is not a member of corresponding
628 	 * privilege groups; if this occurs, few iterations will be made to
629 	 * repeat this step without promiscuous and all-multicast flags set
630 	 */
631 retry:
632 	rc = efx_mac_filter_default_rxq_set(sa->nic, rxq->common, rss);
633 	if (rc == 0)
634 		return 0;
635 	else if (rc != EOPNOTSUPP)
636 		return rc;
637 
638 	if (port->promisc) {
639 		sfc_warn(sa, "promiscuous mode has been requested, "
640 			     "but the HW rejects it");
641 		sfc_warn(sa, "promiscuous mode will be disabled");
642 
643 		port->promisc = B_FALSE;
644 		rc = sfc_set_rx_mode(sa);
645 		if (rc != 0)
646 			return rc;
647 
648 		goto retry;
649 	}
650 
651 	if (port->allmulti) {
652 		sfc_warn(sa, "all-multicast mode has been requested, "
653 			     "but the HW rejects it");
654 		sfc_warn(sa, "all-multicast mode will be disabled");
655 
656 		port->allmulti = B_FALSE;
657 		rc = sfc_set_rx_mode(sa);
658 		if (rc != 0)
659 			return rc;
660 
661 		goto retry;
662 	}
663 
664 	return rc;
665 }
666 
667 int
668 sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
669 {
670 	struct sfc_port *port = &sa->port;
671 	struct sfc_rxq_info *rxq_info;
672 	struct sfc_rxq *rxq;
673 	struct sfc_evq *evq;
674 	int rc;
675 
676 	sfc_log_init(sa, "sw_index=%u", sw_index);
677 
678 	SFC_ASSERT(sw_index < sa->rxq_count);
679 
680 	rxq_info = &sa->rxq_info[sw_index];
681 	rxq = rxq_info->rxq;
682 	SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
683 
684 	evq = rxq->evq;
685 
686 	rc = sfc_ev_qstart(evq, sfc_evq_index_by_rxq_sw_index(sa, sw_index));
687 	if (rc != 0)
688 		goto fail_ev_qstart;
689 
690 	rc = efx_rx_qcreate(sa->nic, rxq->hw_index, 0, rxq_info->type,
691 			    &rxq->mem, rxq_info->entries,
692 			    0 /* not used on EF10 */, rxq_info->type_flags,
693 			    evq->common, &rxq->common);
694 	if (rc != 0)
695 		goto fail_rx_qcreate;
696 
697 	efx_rx_qenable(rxq->common);
698 
699 	rc = sa->dp_rx->qstart(rxq->dp, evq->read_ptr);
700 	if (rc != 0)
701 		goto fail_dp_qstart;
702 
703 	rxq->state |= SFC_RXQ_STARTED;
704 
705 	if ((sw_index == 0) && !port->isolated) {
706 		rc = sfc_rx_default_rxq_set_filter(sa, rxq);
707 		if (rc != 0)
708 			goto fail_mac_filter_default_rxq_set;
709 	}
710 
711 	/* It seems to be used by DPDK for debug purposes only ('rte_ether') */
712 	sa->eth_dev->data->rx_queue_state[sw_index] =
713 		RTE_ETH_QUEUE_STATE_STARTED;
714 
715 	return 0;
716 
717 fail_mac_filter_default_rxq_set:
718 	sa->dp_rx->qstop(rxq->dp, &rxq->evq->read_ptr);
719 
720 fail_dp_qstart:
721 	sfc_rx_qflush(sa, sw_index);
722 
723 fail_rx_qcreate:
724 	sfc_ev_qstop(evq);
725 
726 fail_ev_qstart:
727 	return rc;
728 }
729 
730 void
731 sfc_rx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
732 {
733 	struct sfc_rxq_info *rxq_info;
734 	struct sfc_rxq *rxq;
735 
736 	sfc_log_init(sa, "sw_index=%u", sw_index);
737 
738 	SFC_ASSERT(sw_index < sa->rxq_count);
739 
740 	rxq_info = &sa->rxq_info[sw_index];
741 	rxq = rxq_info->rxq;
742 
743 	if (rxq->state == SFC_RXQ_INITIALIZED)
744 		return;
745 	SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
746 
747 	/* It seems to be used by DPDK for debug purposes only ('rte_ether') */
748 	sa->eth_dev->data->rx_queue_state[sw_index] =
749 		RTE_ETH_QUEUE_STATE_STOPPED;
750 
751 	sa->dp_rx->qstop(rxq->dp, &rxq->evq->read_ptr);
752 
753 	if (sw_index == 0)
754 		efx_mac_filter_default_rxq_clear(sa->nic);
755 
756 	sfc_rx_qflush(sa, sw_index);
757 
758 	rxq->state = SFC_RXQ_INITIALIZED;
759 
760 	efx_rx_qdestroy(rxq->common);
761 
762 	sfc_ev_qstop(rxq->evq);
763 }
764 
765 uint64_t
766 sfc_rx_get_dev_offload_caps(struct sfc_adapter *sa)
767 {
768 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
769 	uint64_t caps = 0;
770 
771 	caps |= DEV_RX_OFFLOAD_JUMBO_FRAME;
772 	caps |= DEV_RX_OFFLOAD_CRC_STRIP;
773 	caps |= DEV_RX_OFFLOAD_IPV4_CKSUM;
774 	caps |= DEV_RX_OFFLOAD_UDP_CKSUM;
775 	caps |= DEV_RX_OFFLOAD_TCP_CKSUM;
776 
777 	if (encp->enc_tunnel_encapsulations_supported &&
778 	    (sa->dp_rx->features & SFC_DP_RX_FEAT_TUNNELS))
779 		caps |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
780 
781 	return caps;
782 }
783 
784 uint64_t
785 sfc_rx_get_queue_offload_caps(struct sfc_adapter *sa)
786 {
787 	uint64_t caps = 0;
788 
789 	if (sa->dp_rx->features & SFC_DP_RX_FEAT_SCATTER)
790 		caps |= DEV_RX_OFFLOAD_SCATTER;
791 
792 	return caps;
793 }
794 
795 static void
796 sfc_rx_log_offloads(struct sfc_adapter *sa, const char *offload_group,
797 		    const char *verdict, uint64_t offloads)
798 {
799 	unsigned long long bit;
800 
801 	while ((bit = __builtin_ffsll(offloads)) != 0) {
802 		uint64_t flag = (1ULL << --bit);
803 
804 		sfc_err(sa, "Rx %s offload %s %s", offload_group,
805 			rte_eth_dev_rx_offload_name(flag), verdict);
806 
807 		offloads &= ~flag;
808 	}
809 }
810 
811 static boolean_t
812 sfc_rx_queue_offloads_mismatch(struct sfc_adapter *sa, uint64_t requested)
813 {
814 	uint64_t mandatory = sa->eth_dev->data->dev_conf.rxmode.offloads;
815 	uint64_t supported = sfc_rx_get_dev_offload_caps(sa) |
816 			     sfc_rx_get_queue_offload_caps(sa);
817 	uint64_t rejected = requested & ~supported;
818 	uint64_t missing = (requested & mandatory) ^ mandatory;
819 	boolean_t mismatch = B_FALSE;
820 
821 	if (rejected) {
822 		sfc_rx_log_offloads(sa, "queue", "is unsupported", rejected);
823 		mismatch = B_TRUE;
824 	}
825 
826 	if (missing) {
827 		sfc_rx_log_offloads(sa, "queue", "must be set", missing);
828 		mismatch = B_TRUE;
829 	}
830 
831 	return mismatch;
832 }
833 
834 static int
835 sfc_rx_qcheck_conf(struct sfc_adapter *sa, unsigned int rxq_max_fill_level,
836 		   const struct rte_eth_rxconf *rx_conf)
837 {
838 	uint64_t offloads_supported = sfc_rx_get_dev_offload_caps(sa) |
839 				      sfc_rx_get_queue_offload_caps(sa);
840 	int rc = 0;
841 
842 	if (rx_conf->rx_thresh.pthresh != 0 ||
843 	    rx_conf->rx_thresh.hthresh != 0 ||
844 	    rx_conf->rx_thresh.wthresh != 0) {
845 		sfc_warn(sa,
846 			"RxQ prefetch/host/writeback thresholds are not supported");
847 	}
848 
849 	if (rx_conf->rx_free_thresh > rxq_max_fill_level) {
850 		sfc_err(sa,
851 			"RxQ free threshold too large: %u vs maximum %u",
852 			rx_conf->rx_free_thresh, rxq_max_fill_level);
853 		rc = EINVAL;
854 	}
855 
856 	if (rx_conf->rx_drop_en == 0) {
857 		sfc_err(sa, "RxQ drop disable is not supported");
858 		rc = EINVAL;
859 	}
860 
861 	if ((rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM) !=
862 	    DEV_RX_OFFLOAD_CHECKSUM)
863 		sfc_warn(sa, "Rx checksum offloads cannot be disabled - always on (IPv4/TCP/UDP)");
864 
865 	if ((offloads_supported & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM) &&
866 	    (~rx_conf->offloads & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM))
867 		sfc_warn(sa, "Rx outer IPv4 checksum offload cannot be disabled - always on");
868 
869 	if (sfc_rx_queue_offloads_mismatch(sa, rx_conf->offloads))
870 		rc = EINVAL;
871 
872 	return rc;
873 }
874 
875 static unsigned int
876 sfc_rx_mbuf_data_alignment(struct rte_mempool *mb_pool)
877 {
878 	uint32_t data_off;
879 	uint32_t order;
880 
881 	/* The mbuf object itself is always cache line aligned */
882 	order = rte_bsf32(RTE_CACHE_LINE_SIZE);
883 
884 	/* Data offset from mbuf object start */
885 	data_off = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mb_pool) +
886 		RTE_PKTMBUF_HEADROOM;
887 
888 	order = MIN(order, rte_bsf32(data_off));
889 
890 	return 1u << (order - 1);
891 }
892 
893 static uint16_t
894 sfc_rx_mb_pool_buf_size(struct sfc_adapter *sa, struct rte_mempool *mb_pool)
895 {
896 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
897 	const uint32_t nic_align_start = MAX(1, encp->enc_rx_buf_align_start);
898 	const uint32_t nic_align_end = MAX(1, encp->enc_rx_buf_align_end);
899 	uint16_t buf_size;
900 	unsigned int buf_aligned;
901 	unsigned int start_alignment;
902 	unsigned int end_padding_alignment;
903 
904 	/* Below it is assumed that both alignments are power of 2 */
905 	SFC_ASSERT(rte_is_power_of_2(nic_align_start));
906 	SFC_ASSERT(rte_is_power_of_2(nic_align_end));
907 
908 	/*
909 	 * mbuf is always cache line aligned, double-check
910 	 * that it meets rx buffer start alignment requirements.
911 	 */
912 
913 	/* Start from mbuf pool data room size */
914 	buf_size = rte_pktmbuf_data_room_size(mb_pool);
915 
916 	/* Remove headroom */
917 	if (buf_size <= RTE_PKTMBUF_HEADROOM) {
918 		sfc_err(sa,
919 			"RxQ mbuf pool %s object data room size %u is smaller than headroom %u",
920 			mb_pool->name, buf_size, RTE_PKTMBUF_HEADROOM);
921 		return 0;
922 	}
923 	buf_size -= RTE_PKTMBUF_HEADROOM;
924 
925 	/* Calculate guaranteed data start alignment */
926 	buf_aligned = sfc_rx_mbuf_data_alignment(mb_pool);
927 
928 	/* Reserve space for start alignment */
929 	if (buf_aligned < nic_align_start) {
930 		start_alignment = nic_align_start - buf_aligned;
931 		if (buf_size <= start_alignment) {
932 			sfc_err(sa,
933 				"RxQ mbuf pool %s object data room size %u is insufficient for headroom %u and buffer start alignment %u required by NIC",
934 				mb_pool->name,
935 				rte_pktmbuf_data_room_size(mb_pool),
936 				RTE_PKTMBUF_HEADROOM, start_alignment);
937 			return 0;
938 		}
939 		buf_aligned = nic_align_start;
940 		buf_size -= start_alignment;
941 	} else {
942 		start_alignment = 0;
943 	}
944 
945 	/* Make sure that end padding does not write beyond the buffer */
946 	if (buf_aligned < nic_align_end) {
947 		/*
948 		 * Estimate space which can be lost. If guarnteed buffer
949 		 * size is odd, lost space is (nic_align_end - 1). More
950 		 * accurate formula is below.
951 		 */
952 		end_padding_alignment = nic_align_end -
953 			MIN(buf_aligned, 1u << (rte_bsf32(buf_size) - 1));
954 		if (buf_size <= end_padding_alignment) {
955 			sfc_err(sa,
956 				"RxQ mbuf pool %s object data room size %u is insufficient for headroom %u, buffer start alignment %u and end padding alignment %u required by NIC",
957 				mb_pool->name,
958 				rte_pktmbuf_data_room_size(mb_pool),
959 				RTE_PKTMBUF_HEADROOM, start_alignment,
960 				end_padding_alignment);
961 			return 0;
962 		}
963 		buf_size -= end_padding_alignment;
964 	} else {
965 		/*
966 		 * Start is aligned the same or better than end,
967 		 * just align length.
968 		 */
969 		buf_size = P2ALIGN(buf_size, nic_align_end);
970 	}
971 
972 	return buf_size;
973 }
974 
975 int
976 sfc_rx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
977 	     uint16_t nb_rx_desc, unsigned int socket_id,
978 	     const struct rte_eth_rxconf *rx_conf,
979 	     struct rte_mempool *mb_pool)
980 {
981 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
982 	int rc;
983 	unsigned int rxq_entries;
984 	unsigned int evq_entries;
985 	unsigned int rxq_max_fill_level;
986 	uint16_t buf_size;
987 	struct sfc_rxq_info *rxq_info;
988 	struct sfc_evq *evq;
989 	struct sfc_rxq *rxq;
990 	struct sfc_dp_rx_qcreate_info info;
991 
992 	rc = sa->dp_rx->qsize_up_rings(nb_rx_desc, &rxq_entries, &evq_entries,
993 				       &rxq_max_fill_level);
994 	if (rc != 0)
995 		goto fail_size_up_rings;
996 	SFC_ASSERT(rxq_entries >= EFX_RXQ_MINNDESCS);
997 	SFC_ASSERT(rxq_entries <= EFX_RXQ_MAXNDESCS);
998 	SFC_ASSERT(rxq_entries >= nb_rx_desc);
999 	SFC_ASSERT(rxq_max_fill_level <= nb_rx_desc);
1000 
1001 	rc = sfc_rx_qcheck_conf(sa, rxq_max_fill_level, rx_conf);
1002 	if (rc != 0)
1003 		goto fail_bad_conf;
1004 
1005 	buf_size = sfc_rx_mb_pool_buf_size(sa, mb_pool);
1006 	if (buf_size == 0) {
1007 		sfc_err(sa, "RxQ %u mbuf pool object size is too small",
1008 			sw_index);
1009 		rc = EINVAL;
1010 		goto fail_bad_conf;
1011 	}
1012 
1013 	if ((buf_size < sa->port.pdu + encp->enc_rx_prefix_size) &&
1014 	    (~rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)) {
1015 		sfc_err(sa, "Rx scatter is disabled and RxQ %u mbuf pool "
1016 			"object size is too small", sw_index);
1017 		sfc_err(sa, "RxQ %u calculated Rx buffer size is %u vs "
1018 			"PDU size %u plus Rx prefix %u bytes",
1019 			sw_index, buf_size, (unsigned int)sa->port.pdu,
1020 			encp->enc_rx_prefix_size);
1021 		rc = EINVAL;
1022 		goto fail_bad_conf;
1023 	}
1024 
1025 	SFC_ASSERT(sw_index < sa->rxq_count);
1026 	rxq_info = &sa->rxq_info[sw_index];
1027 
1028 	SFC_ASSERT(rxq_entries <= rxq_info->max_entries);
1029 	rxq_info->entries = rxq_entries;
1030 	rxq_info->type = EFX_RXQ_TYPE_DEFAULT;
1031 	rxq_info->type_flags =
1032 		(rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER) ?
1033 		EFX_RXQ_FLAG_SCATTER : EFX_RXQ_FLAG_NONE;
1034 
1035 	if ((encp->enc_tunnel_encapsulations_supported != 0) &&
1036 	    (sa->dp_rx->features & SFC_DP_RX_FEAT_TUNNELS))
1037 		rxq_info->type_flags |= EFX_RXQ_FLAG_INNER_CLASSES;
1038 
1039 	rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_RX, sw_index,
1040 			  evq_entries, socket_id, &evq);
1041 	if (rc != 0)
1042 		goto fail_ev_qinit;
1043 
1044 	rc = ENOMEM;
1045 	rxq = rte_zmalloc_socket("sfc-rxq", sizeof(*rxq), RTE_CACHE_LINE_SIZE,
1046 				 socket_id);
1047 	if (rxq == NULL)
1048 		goto fail_rxq_alloc;
1049 
1050 	rxq_info->rxq = rxq;
1051 
1052 	rxq->evq = evq;
1053 	rxq->hw_index = sw_index;
1054 	rxq->refill_threshold =
1055 		RTE_MAX(rx_conf->rx_free_thresh, SFC_RX_REFILL_BULK);
1056 	rxq->refill_mb_pool = mb_pool;
1057 
1058 	rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_RXQ_SIZE(rxq_info->entries),
1059 			   socket_id, &rxq->mem);
1060 	if (rc != 0)
1061 		goto fail_dma_alloc;
1062 
1063 	memset(&info, 0, sizeof(info));
1064 	info.refill_mb_pool = rxq->refill_mb_pool;
1065 	info.max_fill_level = rxq_max_fill_level;
1066 	info.refill_threshold = rxq->refill_threshold;
1067 	info.buf_size = buf_size;
1068 	info.batch_max = encp->enc_rx_batch_max;
1069 	info.prefix_size = encp->enc_rx_prefix_size;
1070 
1071 #if EFSYS_OPT_RX_SCALE
1072 	if (sa->hash_support == EFX_RX_HASH_AVAILABLE && sa->rss_channels > 0)
1073 		info.flags |= SFC_RXQ_FLAG_RSS_HASH;
1074 #endif
1075 
1076 	info.rxq_entries = rxq_info->entries;
1077 	info.rxq_hw_ring = rxq->mem.esm_base;
1078 	info.evq_entries = evq_entries;
1079 	info.evq_hw_ring = evq->mem.esm_base;
1080 	info.hw_index = rxq->hw_index;
1081 	info.mem_bar = sa->mem_bar.esb_base;
1082 
1083 	rc = sa->dp_rx->qcreate(sa->eth_dev->data->port_id, sw_index,
1084 				&RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
1085 				socket_id, &info, &rxq->dp);
1086 	if (rc != 0)
1087 		goto fail_dp_rx_qcreate;
1088 
1089 	evq->dp_rxq = rxq->dp;
1090 
1091 	rxq->state = SFC_RXQ_INITIALIZED;
1092 
1093 	rxq_info->deferred_start = (rx_conf->rx_deferred_start != 0);
1094 
1095 	return 0;
1096 
1097 fail_dp_rx_qcreate:
1098 	sfc_dma_free(sa, &rxq->mem);
1099 
1100 fail_dma_alloc:
1101 	rxq_info->rxq = NULL;
1102 	rte_free(rxq);
1103 
1104 fail_rxq_alloc:
1105 	sfc_ev_qfini(evq);
1106 
1107 fail_ev_qinit:
1108 	rxq_info->entries = 0;
1109 
1110 fail_bad_conf:
1111 fail_size_up_rings:
1112 	sfc_log_init(sa, "failed %d", rc);
1113 	return rc;
1114 }
1115 
1116 void
1117 sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
1118 {
1119 	struct sfc_rxq_info *rxq_info;
1120 	struct sfc_rxq *rxq;
1121 
1122 	SFC_ASSERT(sw_index < sa->rxq_count);
1123 
1124 	rxq_info = &sa->rxq_info[sw_index];
1125 
1126 	rxq = rxq_info->rxq;
1127 	SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
1128 
1129 	sa->dp_rx->qdestroy(rxq->dp);
1130 	rxq->dp = NULL;
1131 
1132 	rxq_info->rxq = NULL;
1133 	rxq_info->entries = 0;
1134 
1135 	sfc_dma_free(sa, &rxq->mem);
1136 
1137 	sfc_ev_qfini(rxq->evq);
1138 	rxq->evq = NULL;
1139 
1140 	rte_free(rxq);
1141 }
1142 
1143 #if EFSYS_OPT_RX_SCALE
1144 efx_rx_hash_type_t
1145 sfc_rte_to_efx_hash_type(uint64_t rss_hf)
1146 {
1147 	efx_rx_hash_type_t efx_hash_types = 0;
1148 
1149 	if ((rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1150 		       ETH_RSS_NONFRAG_IPV4_OTHER)) != 0)
1151 		efx_hash_types |= EFX_RX_HASH_IPV4;
1152 
1153 	if ((rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) != 0)
1154 		efx_hash_types |= EFX_RX_HASH_TCPIPV4;
1155 
1156 	if ((rss_hf & (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
1157 			ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX)) != 0)
1158 		efx_hash_types |= EFX_RX_HASH_IPV6;
1159 
1160 	if ((rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX)) != 0)
1161 		efx_hash_types |= EFX_RX_HASH_TCPIPV6;
1162 
1163 	return efx_hash_types;
1164 }
1165 
1166 uint64_t
1167 sfc_efx_to_rte_hash_type(efx_rx_hash_type_t efx_hash_types)
1168 {
1169 	uint64_t rss_hf = 0;
1170 
1171 	if ((efx_hash_types & EFX_RX_HASH_IPV4) != 0)
1172 		rss_hf |= (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1173 			   ETH_RSS_NONFRAG_IPV4_OTHER);
1174 
1175 	if ((efx_hash_types & EFX_RX_HASH_TCPIPV4) != 0)
1176 		rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
1177 
1178 	if ((efx_hash_types & EFX_RX_HASH_IPV6) != 0)
1179 		rss_hf |= (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
1180 			   ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX);
1181 
1182 	if ((efx_hash_types & EFX_RX_HASH_TCPIPV6) != 0)
1183 		rss_hf |= (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX);
1184 
1185 	return rss_hf;
1186 }
1187 #endif
1188 
1189 #if EFSYS_OPT_RX_SCALE
1190 static int
1191 sfc_rx_rss_config(struct sfc_adapter *sa)
1192 {
1193 	int rc = 0;
1194 
1195 	if (sa->rss_channels > 0) {
1196 		rc = efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
1197 					   EFX_RX_HASHALG_TOEPLITZ,
1198 					   sa->rss_hash_types, B_TRUE);
1199 		if (rc != 0)
1200 			goto finish;
1201 
1202 		rc = efx_rx_scale_key_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
1203 					  sa->rss_key,
1204 					  sizeof(sa->rss_key));
1205 		if (rc != 0)
1206 			goto finish;
1207 
1208 		rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
1209 					  sa->rss_tbl, RTE_DIM(sa->rss_tbl));
1210 	}
1211 
1212 finish:
1213 	return rc;
1214 }
1215 #else
1216 static int
1217 sfc_rx_rss_config(__rte_unused struct sfc_adapter *sa)
1218 {
1219 	return 0;
1220 }
1221 #endif
1222 
1223 int
1224 sfc_rx_start(struct sfc_adapter *sa)
1225 {
1226 	unsigned int sw_index;
1227 	int rc;
1228 
1229 	sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
1230 
1231 	rc = efx_rx_init(sa->nic);
1232 	if (rc != 0)
1233 		goto fail_rx_init;
1234 
1235 	rc = sfc_rx_rss_config(sa);
1236 	if (rc != 0)
1237 		goto fail_rss_config;
1238 
1239 	for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) {
1240 		if ((!sa->rxq_info[sw_index].deferred_start ||
1241 		     sa->rxq_info[sw_index].deferred_started)) {
1242 			rc = sfc_rx_qstart(sa, sw_index);
1243 			if (rc != 0)
1244 				goto fail_rx_qstart;
1245 		}
1246 	}
1247 
1248 	return 0;
1249 
1250 fail_rx_qstart:
1251 	while (sw_index-- > 0)
1252 		sfc_rx_qstop(sa, sw_index);
1253 
1254 fail_rss_config:
1255 	efx_rx_fini(sa->nic);
1256 
1257 fail_rx_init:
1258 	sfc_log_init(sa, "failed %d", rc);
1259 	return rc;
1260 }
1261 
1262 void
1263 sfc_rx_stop(struct sfc_adapter *sa)
1264 {
1265 	unsigned int sw_index;
1266 
1267 	sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
1268 
1269 	sw_index = sa->rxq_count;
1270 	while (sw_index-- > 0) {
1271 		if (sa->rxq_info[sw_index].rxq != NULL)
1272 			sfc_rx_qstop(sa, sw_index);
1273 	}
1274 
1275 	efx_rx_fini(sa->nic);
1276 }
1277 
1278 static int
1279 sfc_rx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index)
1280 {
1281 	struct sfc_rxq_info *rxq_info = &sa->rxq_info[sw_index];
1282 	unsigned int max_entries;
1283 
1284 	max_entries = EFX_RXQ_MAXNDESCS;
1285 	SFC_ASSERT(rte_is_power_of_2(max_entries));
1286 
1287 	rxq_info->max_entries = max_entries;
1288 
1289 	return 0;
1290 }
1291 
1292 static int
1293 sfc_rx_check_mode(struct sfc_adapter *sa, struct rte_eth_rxmode *rxmode)
1294 {
1295 	uint64_t offloads_supported = sfc_rx_get_dev_offload_caps(sa) |
1296 				      sfc_rx_get_queue_offload_caps(sa);
1297 	uint64_t offloads_rejected = rxmode->offloads & ~offloads_supported;
1298 	int rc = 0;
1299 
1300 	switch (rxmode->mq_mode) {
1301 	case ETH_MQ_RX_NONE:
1302 		/* No special checks are required */
1303 		break;
1304 #if EFSYS_OPT_RX_SCALE
1305 	case ETH_MQ_RX_RSS:
1306 		if (sa->rss_support == EFX_RX_SCALE_UNAVAILABLE) {
1307 			sfc_err(sa, "RSS is not available");
1308 			rc = EINVAL;
1309 		}
1310 		break;
1311 #endif
1312 	default:
1313 		sfc_err(sa, "Rx multi-queue mode %u not supported",
1314 			rxmode->mq_mode);
1315 		rc = EINVAL;
1316 	}
1317 
1318 	if (offloads_rejected) {
1319 		sfc_rx_log_offloads(sa, "device", "is unsupported",
1320 				    offloads_rejected);
1321 		rc = EINVAL;
1322 	}
1323 
1324 	if (~rxmode->offloads & DEV_RX_OFFLOAD_CRC_STRIP) {
1325 		sfc_warn(sa, "FCS stripping cannot be disabled - always on");
1326 		rxmode->offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
1327 		rxmode->hw_strip_crc = 1;
1328 	}
1329 
1330 	return rc;
1331 }
1332 
1333 /**
1334  * Destroy excess queues that are no longer needed after reconfiguration
1335  * or complete close.
1336  */
1337 static void
1338 sfc_rx_fini_queues(struct sfc_adapter *sa, unsigned int nb_rx_queues)
1339 {
1340 	int sw_index;
1341 
1342 	SFC_ASSERT(nb_rx_queues <= sa->rxq_count);
1343 
1344 	sw_index = sa->rxq_count;
1345 	while (--sw_index >= (int)nb_rx_queues) {
1346 		if (sa->rxq_info[sw_index].rxq != NULL)
1347 			sfc_rx_qfini(sa, sw_index);
1348 	}
1349 
1350 	sa->rxq_count = nb_rx_queues;
1351 }
1352 
1353 /**
1354  * Initialize Rx subsystem.
1355  *
1356  * Called at device (re)configuration stage when number of receive queues is
1357  * specified together with other device level receive configuration.
1358  *
1359  * It should be used to allocate NUMA-unaware resources.
1360  */
1361 int
1362 sfc_rx_configure(struct sfc_adapter *sa)
1363 {
1364 	struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf;
1365 	const unsigned int nb_rx_queues = sa->eth_dev->data->nb_rx_queues;
1366 	int rc;
1367 
1368 	sfc_log_init(sa, "nb_rx_queues=%u (old %u)",
1369 		     nb_rx_queues, sa->rxq_count);
1370 
1371 	rc = sfc_rx_check_mode(sa, &dev_conf->rxmode);
1372 	if (rc != 0)
1373 		goto fail_check_mode;
1374 
1375 	if (nb_rx_queues == sa->rxq_count)
1376 		goto done;
1377 
1378 	if (sa->rxq_info == NULL) {
1379 		rc = ENOMEM;
1380 		sa->rxq_info = rte_calloc_socket("sfc-rxqs", nb_rx_queues,
1381 						 sizeof(sa->rxq_info[0]), 0,
1382 						 sa->socket_id);
1383 		if (sa->rxq_info == NULL)
1384 			goto fail_rxqs_alloc;
1385 	} else {
1386 		struct sfc_rxq_info *new_rxq_info;
1387 
1388 		if (nb_rx_queues < sa->rxq_count)
1389 			sfc_rx_fini_queues(sa, nb_rx_queues);
1390 
1391 		rc = ENOMEM;
1392 		new_rxq_info =
1393 			rte_realloc(sa->rxq_info,
1394 				    nb_rx_queues * sizeof(sa->rxq_info[0]), 0);
1395 		if (new_rxq_info == NULL && nb_rx_queues > 0)
1396 			goto fail_rxqs_realloc;
1397 
1398 		sa->rxq_info = new_rxq_info;
1399 		if (nb_rx_queues > sa->rxq_count)
1400 			memset(&sa->rxq_info[sa->rxq_count], 0,
1401 			       (nb_rx_queues - sa->rxq_count) *
1402 			       sizeof(sa->rxq_info[0]));
1403 	}
1404 
1405 	while (sa->rxq_count < nb_rx_queues) {
1406 		rc = sfc_rx_qinit_info(sa, sa->rxq_count);
1407 		if (rc != 0)
1408 			goto fail_rx_qinit_info;
1409 
1410 		sa->rxq_count++;
1411 	}
1412 
1413 #if EFSYS_OPT_RX_SCALE
1414 	sa->rss_channels = (dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ?
1415 			   MIN(sa->rxq_count, EFX_MAXRSS) : 0;
1416 
1417 	if (sa->rss_channels > 0) {
1418 		unsigned int sw_index;
1419 
1420 		for (sw_index = 0; sw_index < EFX_RSS_TBL_SIZE; ++sw_index)
1421 			sa->rss_tbl[sw_index] = sw_index % sa->rss_channels;
1422 	}
1423 #endif
1424 
1425 done:
1426 	return 0;
1427 
1428 fail_rx_qinit_info:
1429 fail_rxqs_realloc:
1430 fail_rxqs_alloc:
1431 	sfc_rx_close(sa);
1432 
1433 fail_check_mode:
1434 	sfc_log_init(sa, "failed %d", rc);
1435 	return rc;
1436 }
1437 
1438 /**
1439  * Shutdown Rx subsystem.
1440  *
1441  * Called at device close stage, for example, before device shutdown.
1442  */
1443 void
1444 sfc_rx_close(struct sfc_adapter *sa)
1445 {
1446 	sfc_rx_fini_queues(sa, 0);
1447 
1448 	sa->rss_channels = 0;
1449 
1450 	rte_free(sa->rxq_info);
1451 	sa->rxq_info = NULL;
1452 }
1453