xref: /dpdk/drivers/net/sfc/sfc_ethdev.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2016-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9 
10 #include <rte_dev.h>
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_errno.h>
16 #include <rte_string_fns.h>
17 #include <rte_ether.h>
18 
19 #include "efx.h"
20 
21 #include "sfc.h"
22 #include "sfc_debug.h"
23 #include "sfc_log.h"
24 #include "sfc_kvargs.h"
25 #include "sfc_ev.h"
26 #include "sfc_rx.h"
27 #include "sfc_tx.h"
28 #include "sfc_flow.h"
29 #include "sfc_flow_tunnel.h"
30 #include "sfc_dp.h"
31 #include "sfc_dp_rx.h"
32 #include "sfc_repr.h"
33 #include "sfc_sw_stats.h"
34 #include "sfc_switch.h"
35 
36 #define SFC_XSTAT_ID_INVALID_VAL  UINT64_MAX
37 #define SFC_XSTAT_ID_INVALID_NAME '\0'
38 
39 uint32_t sfc_logtype_driver;
40 
41 static struct sfc_dp_list sfc_dp_head =
42 	TAILQ_HEAD_INITIALIZER(sfc_dp_head);
43 
44 
45 static void sfc_eth_dev_clear_ops(struct rte_eth_dev *dev);
46 
47 
48 static int
49 sfc_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
50 {
51 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
52 	efx_nic_fw_info_t enfi;
53 	int ret;
54 	int rc;
55 
56 	rc = efx_nic_get_fw_version(sa->nic, &enfi);
57 	if (rc != 0)
58 		return -rc;
59 
60 	ret = snprintf(fw_version, fw_size,
61 		       "%" PRIu16 ".%" PRIu16 ".%" PRIu16 ".%" PRIu16,
62 		       enfi.enfi_mc_fw_version[0], enfi.enfi_mc_fw_version[1],
63 		       enfi.enfi_mc_fw_version[2], enfi.enfi_mc_fw_version[3]);
64 	if (ret < 0)
65 		return ret;
66 
67 	if (enfi.enfi_dpcpu_fw_ids_valid) {
68 		size_t dpcpu_fw_ids_offset = MIN(fw_size - 1, (size_t)ret);
69 		int ret_extra;
70 
71 		ret_extra = snprintf(fw_version + dpcpu_fw_ids_offset,
72 				     fw_size - dpcpu_fw_ids_offset,
73 				     " rx%" PRIx16 " tx%" PRIx16,
74 				     enfi.enfi_rx_dpcpu_fw_id,
75 				     enfi.enfi_tx_dpcpu_fw_id);
76 		if (ret_extra < 0)
77 			return ret_extra;
78 
79 		ret += ret_extra;
80 	}
81 
82 	if (fw_size < (size_t)(++ret))
83 		return ret;
84 	else
85 		return 0;
86 }
87 
88 static int
89 sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
90 {
91 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
92 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
93 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
94 	struct sfc_rss *rss = &sas->rss;
95 	struct sfc_mae *mae = &sa->mae;
96 	uint64_t txq_offloads_def = 0;
97 
98 	sfc_log_init(sa, "entry");
99 
100 	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
101 	dev_info->max_mtu = EFX_MAC_SDU_MAX;
102 
103 	dev_info->max_rx_pktlen = EFX_MAC_PDU_MAX;
104 
105 	dev_info->max_vfs = sa->sriov.num_vfs;
106 
107 	/* Autonegotiation may be disabled */
108 	dev_info->speed_capa = RTE_ETH_LINK_SPEED_FIXED;
109 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_1000FDX))
110 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_1G;
111 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_10000FDX))
112 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_10G;
113 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_25000FDX))
114 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_25G;
115 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_40000FDX))
116 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_40G;
117 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_50000FDX))
118 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_50G;
119 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_100000FDX))
120 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_100G;
121 
122 	dev_info->max_rx_queues = sa->rxq_max;
123 	dev_info->max_tx_queues = sa->txq_max;
124 
125 	/* By default packets are dropped if no descriptors are available */
126 	dev_info->default_rxconf.rx_drop_en = 1;
127 
128 	dev_info->rx_queue_offload_capa = sfc_rx_get_queue_offload_caps(sa);
129 
130 	/*
131 	 * rx_offload_capa includes both device and queue offloads since
132 	 * the latter may be requested on a per device basis which makes
133 	 * sense when some offloads are needed to be set on all queues.
134 	 */
135 	dev_info->rx_offload_capa = sfc_rx_get_dev_offload_caps(sa) |
136 				    dev_info->rx_queue_offload_capa;
137 
138 	dev_info->tx_queue_offload_capa = sfc_tx_get_queue_offload_caps(sa);
139 
140 	/*
141 	 * tx_offload_capa includes both device and queue offloads since
142 	 * the latter may be requested on a per device basis which makes
143 	 * sense when some offloads are needed to be set on all queues.
144 	 */
145 	dev_info->tx_offload_capa = sfc_tx_get_dev_offload_caps(sa) |
146 				    dev_info->tx_queue_offload_capa;
147 
148 	if (dev_info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
149 		txq_offloads_def |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
150 
151 	dev_info->default_txconf.offloads |= txq_offloads_def;
152 
153 	if (rss->context_type != EFX_RX_SCALE_UNAVAILABLE) {
154 		uint64_t rte_hf = 0;
155 		unsigned int i;
156 
157 		for (i = 0; i < rss->hf_map_nb_entries; ++i)
158 			rte_hf |= rss->hf_map[i].rte;
159 
160 		dev_info->reta_size = EFX_RSS_TBL_SIZE;
161 		dev_info->hash_key_size = EFX_RSS_KEY_SIZE;
162 		dev_info->flow_type_rss_offloads = rte_hf;
163 	}
164 
165 	/* Initialize to hardware limits */
166 	dev_info->rx_desc_lim.nb_max = sa->rxq_max_entries;
167 	dev_info->rx_desc_lim.nb_min = sa->rxq_min_entries;
168 	/* The RXQ hardware requires that the descriptor count is a power
169 	 * of 2, but rx_desc_lim cannot properly describe that constraint.
170 	 */
171 	dev_info->rx_desc_lim.nb_align = sa->rxq_min_entries;
172 
173 	/* Initialize to hardware limits */
174 	dev_info->tx_desc_lim.nb_max = sa->txq_max_entries;
175 	dev_info->tx_desc_lim.nb_min = sa->txq_min_entries;
176 	/*
177 	 * The TXQ hardware requires that the descriptor count is a power
178 	 * of 2, but tx_desc_lim cannot properly describe that constraint
179 	 */
180 	dev_info->tx_desc_lim.nb_align = sa->txq_min_entries;
181 
182 	if (sap->dp_rx->get_dev_info != NULL)
183 		sap->dp_rx->get_dev_info(dev_info);
184 	if (sap->dp_tx->get_dev_info != NULL)
185 		sap->dp_tx->get_dev_info(dev_info);
186 
187 	dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
188 			     RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
189 
190 	if (mae->status == SFC_MAE_STATUS_SUPPORTED ||
191 	    mae->status == SFC_MAE_STATUS_ADMIN) {
192 		dev_info->switch_info.name = dev->device->driver->name;
193 		dev_info->switch_info.domain_id = mae->switch_domain_id;
194 		dev_info->switch_info.port_id = mae->switch_port_id;
195 	}
196 
197 	return 0;
198 }
199 
200 static const uint32_t *
201 sfc_dev_supported_ptypes_get(struct rte_eth_dev *dev)
202 {
203 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
204 
205 	return sap->dp_rx->supported_ptypes_get(sap->shared->tunnel_encaps);
206 }
207 
208 static int
209 sfc_dev_configure(struct rte_eth_dev *dev)
210 {
211 	struct rte_eth_dev_data *dev_data = dev->data;
212 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
213 	int rc;
214 
215 	sfc_log_init(sa, "entry n_rxq=%u n_txq=%u",
216 		     dev_data->nb_rx_queues, dev_data->nb_tx_queues);
217 
218 	sfc_adapter_lock(sa);
219 	switch (sa->state) {
220 	case SFC_ETHDEV_CONFIGURED:
221 		/* FALLTHROUGH */
222 	case SFC_ETHDEV_INITIALIZED:
223 		rc = sfc_configure(sa);
224 		break;
225 	default:
226 		sfc_err(sa, "unexpected adapter state %u to configure",
227 			sa->state);
228 		rc = EINVAL;
229 		break;
230 	}
231 	sfc_adapter_unlock(sa);
232 
233 	sfc_log_init(sa, "done %d", rc);
234 	SFC_ASSERT(rc >= 0);
235 	return -rc;
236 }
237 
238 static int
239 sfc_dev_start(struct rte_eth_dev *dev)
240 {
241 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
242 	int rc;
243 
244 	sfc_log_init(sa, "entry");
245 
246 	sfc_adapter_lock(sa);
247 	rc = sfc_start(sa);
248 	sfc_adapter_unlock(sa);
249 
250 	sfc_log_init(sa, "done %d", rc);
251 	SFC_ASSERT(rc >= 0);
252 	return -rc;
253 }
254 
255 static int
256 sfc_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
257 {
258 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
259 	struct rte_eth_link current_link;
260 	int ret;
261 
262 	sfc_log_init(sa, "entry");
263 
264 	if (sa->state != SFC_ETHDEV_STARTED) {
265 		sfc_port_link_mode_to_info(EFX_LINK_UNKNOWN, &current_link);
266 	} else if (wait_to_complete) {
267 		efx_link_mode_t link_mode;
268 
269 		if (efx_port_poll(sa->nic, &link_mode) != 0)
270 			link_mode = EFX_LINK_UNKNOWN;
271 		sfc_port_link_mode_to_info(link_mode, &current_link);
272 
273 	} else {
274 		sfc_ev_mgmt_qpoll(sa);
275 		rte_eth_linkstatus_get(dev, &current_link);
276 	}
277 
278 	ret = rte_eth_linkstatus_set(dev, &current_link);
279 	if (ret == 0)
280 		sfc_notice(sa, "Link status is %s",
281 			   current_link.link_status ? "UP" : "DOWN");
282 
283 	return ret;
284 }
285 
286 static int
287 sfc_dev_stop(struct rte_eth_dev *dev)
288 {
289 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
290 
291 	sfc_log_init(sa, "entry");
292 
293 	sfc_adapter_lock(sa);
294 	sfc_stop(sa);
295 	sfc_adapter_unlock(sa);
296 
297 	sfc_log_init(sa, "done");
298 
299 	return 0;
300 }
301 
302 static int
303 sfc_dev_set_link_up(struct rte_eth_dev *dev)
304 {
305 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
306 	int rc;
307 
308 	sfc_log_init(sa, "entry");
309 
310 	sfc_adapter_lock(sa);
311 	rc = sfc_start(sa);
312 	sfc_adapter_unlock(sa);
313 
314 	SFC_ASSERT(rc >= 0);
315 	return -rc;
316 }
317 
318 static int
319 sfc_dev_set_link_down(struct rte_eth_dev *dev)
320 {
321 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
322 
323 	sfc_log_init(sa, "entry");
324 
325 	sfc_adapter_lock(sa);
326 	sfc_stop(sa);
327 	sfc_adapter_unlock(sa);
328 
329 	return 0;
330 }
331 
332 static void
333 sfc_eth_dev_secondary_clear_ops(struct rte_eth_dev *dev)
334 {
335 	free(dev->process_private);
336 	rte_eth_dev_release_port(dev);
337 }
338 
339 static int
340 sfc_dev_close(struct rte_eth_dev *dev)
341 {
342 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
343 
344 	sfc_log_init(sa, "entry");
345 
346 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
347 		sfc_eth_dev_secondary_clear_ops(dev);
348 		return 0;
349 	}
350 
351 	sfc_pre_detach(sa);
352 
353 	sfc_adapter_lock(sa);
354 	switch (sa->state) {
355 	case SFC_ETHDEV_STARTED:
356 		sfc_stop(sa);
357 		SFC_ASSERT(sa->state == SFC_ETHDEV_CONFIGURED);
358 		/* FALLTHROUGH */
359 	case SFC_ETHDEV_CONFIGURED:
360 		sfc_close(sa);
361 		SFC_ASSERT(sa->state == SFC_ETHDEV_INITIALIZED);
362 		/* FALLTHROUGH */
363 	case SFC_ETHDEV_INITIALIZED:
364 		break;
365 	default:
366 		sfc_err(sa, "unexpected adapter state %u on close", sa->state);
367 		break;
368 	}
369 
370 	/*
371 	 * Cleanup all resources.
372 	 * Rollback primary process sfc_eth_dev_init() below.
373 	 */
374 
375 	sfc_eth_dev_clear_ops(dev);
376 
377 	sfc_detach(sa);
378 	sfc_unprobe(sa);
379 
380 	sfc_kvargs_cleanup(sa);
381 
382 	sfc_adapter_unlock(sa);
383 	sfc_adapter_lock_fini(sa);
384 
385 	sfc_log_init(sa, "done");
386 
387 	/* Required for logging, so cleanup last */
388 	sa->eth_dev = NULL;
389 
390 	free(sa);
391 
392 	return 0;
393 }
394 
395 static int
396 sfc_dev_filter_set(struct rte_eth_dev *dev, enum sfc_dev_filter_mode mode,
397 		   boolean_t enabled)
398 {
399 	struct sfc_port *port;
400 	boolean_t *toggle;
401 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
402 	boolean_t allmulti = (mode == SFC_DEV_FILTER_MODE_ALLMULTI);
403 	const char *desc = (allmulti) ? "all-multi" : "promiscuous";
404 	int rc = 0;
405 
406 	sfc_adapter_lock(sa);
407 
408 	port = &sa->port;
409 	toggle = (allmulti) ? (&port->allmulti) : (&port->promisc);
410 
411 	if (*toggle != enabled) {
412 		*toggle = enabled;
413 
414 		if (sfc_sa2shared(sa)->isolated) {
415 			sfc_warn(sa, "isolated mode is active on the port");
416 			sfc_warn(sa, "the change is to be applied on the next "
417 				     "start provided that isolated mode is "
418 				     "disabled prior the next start");
419 		} else if ((sa->state == SFC_ETHDEV_STARTED) &&
420 			   ((rc = sfc_set_rx_mode(sa)) != 0)) {
421 			*toggle = !(enabled);
422 			sfc_warn(sa, "Failed to %s %s mode, rc = %d",
423 				 ((enabled) ? "enable" : "disable"), desc, rc);
424 
425 			/*
426 			 * For promiscuous and all-multicast filters a
427 			 * permission failure should be reported as an
428 			 * unsupported filter.
429 			 */
430 			if (rc == EPERM)
431 				rc = ENOTSUP;
432 		}
433 	}
434 
435 	sfc_adapter_unlock(sa);
436 	return rc;
437 }
438 
439 static int
440 sfc_dev_promisc_enable(struct rte_eth_dev *dev)
441 {
442 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_TRUE);
443 
444 	SFC_ASSERT(rc >= 0);
445 	return -rc;
446 }
447 
448 static int
449 sfc_dev_promisc_disable(struct rte_eth_dev *dev)
450 {
451 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_FALSE);
452 
453 	SFC_ASSERT(rc >= 0);
454 	return -rc;
455 }
456 
457 static int
458 sfc_dev_allmulti_enable(struct rte_eth_dev *dev)
459 {
460 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_TRUE);
461 
462 	SFC_ASSERT(rc >= 0);
463 	return -rc;
464 }
465 
466 static int
467 sfc_dev_allmulti_disable(struct rte_eth_dev *dev)
468 {
469 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_FALSE);
470 
471 	SFC_ASSERT(rc >= 0);
472 	return -rc;
473 }
474 
475 static int
476 sfc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t ethdev_qid,
477 		   uint16_t nb_rx_desc, unsigned int socket_id,
478 		   const struct rte_eth_rxconf *rx_conf,
479 		   struct rte_mempool *mb_pool)
480 {
481 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
482 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
483 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
484 	struct sfc_rxq_info *rxq_info;
485 	sfc_sw_index_t sw_index;
486 	int rc;
487 
488 	sfc_log_init(sa, "RxQ=%u nb_rx_desc=%u socket_id=%u",
489 		     ethdev_qid, nb_rx_desc, socket_id);
490 
491 	sfc_adapter_lock(sa);
492 
493 	sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
494 	rc = sfc_rx_qinit(sa, sw_index, nb_rx_desc, socket_id,
495 			  rx_conf, mb_pool);
496 	if (rc != 0)
497 		goto fail_rx_qinit;
498 
499 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
500 	dev->data->rx_queues[ethdev_qid] = rxq_info->dp;
501 
502 	sfc_adapter_unlock(sa);
503 
504 	return 0;
505 
506 fail_rx_qinit:
507 	sfc_adapter_unlock(sa);
508 	SFC_ASSERT(rc > 0);
509 	return -rc;
510 }
511 
512 static void
513 sfc_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
514 {
515 	struct sfc_dp_rxq *dp_rxq = dev->data->rx_queues[qid];
516 	struct sfc_rxq *rxq;
517 	struct sfc_adapter *sa;
518 	sfc_sw_index_t sw_index;
519 
520 	if (dp_rxq == NULL)
521 		return;
522 
523 	rxq = sfc_rxq_by_dp_rxq(dp_rxq);
524 	sa = rxq->evq->sa;
525 	sfc_adapter_lock(sa);
526 
527 	sw_index = dp_rxq->dpq.queue_id;
528 
529 	sfc_log_init(sa, "RxQ=%u", sw_index);
530 
531 	sfc_rx_qfini(sa, sw_index);
532 
533 	sfc_adapter_unlock(sa);
534 }
535 
536 static int
537 sfc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t ethdev_qid,
538 		   uint16_t nb_tx_desc, unsigned int socket_id,
539 		   const struct rte_eth_txconf *tx_conf)
540 {
541 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
542 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
543 	struct sfc_txq_info *txq_info;
544 	sfc_sw_index_t sw_index;
545 	int rc;
546 
547 	sfc_log_init(sa, "TxQ = %u, nb_tx_desc = %u, socket_id = %u",
548 		     ethdev_qid, nb_tx_desc, socket_id);
549 
550 	sfc_adapter_lock(sa);
551 
552 	sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
553 	rc = sfc_tx_qinit(sa, sw_index, nb_tx_desc, socket_id, tx_conf);
554 	if (rc != 0)
555 		goto fail_tx_qinit;
556 
557 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
558 	dev->data->tx_queues[ethdev_qid] = txq_info->dp;
559 
560 	sfc_adapter_unlock(sa);
561 	return 0;
562 
563 fail_tx_qinit:
564 	sfc_adapter_unlock(sa);
565 	SFC_ASSERT(rc > 0);
566 	return -rc;
567 }
568 
569 static void
570 sfc_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
571 {
572 	struct sfc_dp_txq *dp_txq = dev->data->tx_queues[qid];
573 	struct sfc_txq *txq;
574 	sfc_sw_index_t sw_index;
575 	struct sfc_adapter *sa;
576 
577 	if (dp_txq == NULL)
578 		return;
579 
580 	txq = sfc_txq_by_dp_txq(dp_txq);
581 	sw_index = dp_txq->dpq.queue_id;
582 
583 	SFC_ASSERT(txq->evq != NULL);
584 	sa = txq->evq->sa;
585 
586 	sfc_log_init(sa, "TxQ = %u", sw_index);
587 
588 	sfc_adapter_lock(sa);
589 
590 	sfc_tx_qfini(sa, sw_index);
591 
592 	sfc_adapter_unlock(sa);
593 }
594 
595 static void
596 sfc_stats_get_dp_rx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
597 {
598 	struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
599 	uint64_t pkts_sum = 0;
600 	uint64_t bytes_sum = 0;
601 	unsigned int i;
602 
603 	for (i = 0; i < sas->ethdev_rxq_count; ++i) {
604 		struct sfc_rxq_info *rxq_info;
605 
606 		rxq_info = sfc_rxq_info_by_ethdev_qid(sas, i);
607 		if (rxq_info->state & SFC_RXQ_INITIALIZED) {
608 			union sfc_pkts_bytes qstats;
609 
610 			sfc_pkts_bytes_get(&rxq_info->dp->dpq.stats, &qstats);
611 			pkts_sum += qstats.pkts -
612 					sa->sw_stats.reset_rx_pkts[i];
613 			bytes_sum += qstats.bytes -
614 					sa->sw_stats.reset_rx_bytes[i];
615 		}
616 	}
617 
618 	*pkts = pkts_sum;
619 	*bytes = bytes_sum;
620 }
621 
622 static void
623 sfc_stats_get_dp_tx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
624 {
625 	struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
626 	uint64_t pkts_sum = 0;
627 	uint64_t bytes_sum = 0;
628 	unsigned int i;
629 
630 	for (i = 0; i < sas->ethdev_txq_count; ++i) {
631 		struct sfc_txq_info *txq_info;
632 
633 		txq_info = sfc_txq_info_by_ethdev_qid(sas, i);
634 		if (txq_info->state & SFC_TXQ_INITIALIZED) {
635 			union sfc_pkts_bytes qstats;
636 
637 			sfc_pkts_bytes_get(&txq_info->dp->dpq.stats, &qstats);
638 			pkts_sum += qstats.pkts -
639 					sa->sw_stats.reset_tx_pkts[i];
640 			bytes_sum += qstats.bytes -
641 					sa->sw_stats.reset_tx_bytes[i];
642 		}
643 	}
644 
645 	*pkts = pkts_sum;
646 	*bytes = bytes_sum;
647 }
648 
649 /*
650  * Some statistics are computed as A - B where A and B each increase
651  * monotonically with some hardware counter(s) and the counters are read
652  * asynchronously.
653  *
654  * If packet X is counted in A, but not counted in B yet, computed value is
655  * greater than real.
656  *
657  * If packet X is not counted in A at the moment of reading the counter,
658  * but counted in B at the moment of reading the counter, computed value
659  * is less than real.
660  *
661  * However, counter which grows backward is worse evil than slightly wrong
662  * value. So, let's try to guarantee that it never happens except may be
663  * the case when the MAC stats are zeroed as a result of a NIC reset.
664  */
665 static void
666 sfc_update_diff_stat(uint64_t *stat, uint64_t newval)
667 {
668 	if ((int64_t)(newval - *stat) > 0 || newval == 0)
669 		*stat = newval;
670 }
671 
672 static int
673 sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
674 {
675 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
676 	bool have_dp_rx_stats = sap->dp_rx->features & SFC_DP_RX_FEAT_STATS;
677 	bool have_dp_tx_stats = sap->dp_tx->features & SFC_DP_TX_FEAT_STATS;
678 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
679 	struct sfc_port *port = &sa->port;
680 	uint64_t *mac_stats;
681 	int ret;
682 
683 	sfc_adapter_lock(sa);
684 
685 	if (have_dp_rx_stats)
686 		sfc_stats_get_dp_rx(sa, &stats->ipackets, &stats->ibytes);
687 	if (have_dp_tx_stats)
688 		sfc_stats_get_dp_tx(sa, &stats->opackets, &stats->obytes);
689 
690 	ret = sfc_port_update_mac_stats(sa, B_FALSE);
691 	if (ret != 0)
692 		goto unlock;
693 
694 	mac_stats = port->mac_stats_buf;
695 
696 	if (EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask,
697 				   EFX_MAC_VADAPTER_RX_UNICAST_PACKETS)) {
698 		if (!have_dp_rx_stats) {
699 			stats->ipackets =
700 				mac_stats[EFX_MAC_VADAPTER_RX_UNICAST_PACKETS] +
701 				mac_stats[EFX_MAC_VADAPTER_RX_MULTICAST_PACKETS] +
702 				mac_stats[EFX_MAC_VADAPTER_RX_BROADCAST_PACKETS];
703 			stats->ibytes =
704 				mac_stats[EFX_MAC_VADAPTER_RX_UNICAST_BYTES] +
705 				mac_stats[EFX_MAC_VADAPTER_RX_MULTICAST_BYTES] +
706 				mac_stats[EFX_MAC_VADAPTER_RX_BROADCAST_BYTES];
707 
708 			/* CRC is included in these stats, but shouldn't be */
709 			stats->ibytes -= stats->ipackets * RTE_ETHER_CRC_LEN;
710 		}
711 		if (!have_dp_tx_stats) {
712 			stats->opackets =
713 				mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_PACKETS] +
714 				mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_PACKETS] +
715 				mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_PACKETS];
716 			stats->obytes =
717 				mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_BYTES] +
718 				mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_BYTES] +
719 				mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_BYTES];
720 
721 			/* CRC is included in these stats, but shouldn't be */
722 			stats->obytes -= stats->opackets * RTE_ETHER_CRC_LEN;
723 		}
724 		stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS];
725 		stats->oerrors = mac_stats[EFX_MAC_VADAPTER_TX_BAD_PACKETS];
726 	} else {
727 		if (!have_dp_tx_stats) {
728 			stats->opackets = mac_stats[EFX_MAC_TX_PKTS];
729 			stats->obytes = mac_stats[EFX_MAC_TX_OCTETS] -
730 				mac_stats[EFX_MAC_TX_PKTS] * RTE_ETHER_CRC_LEN;
731 		}
732 
733 		/*
734 		 * Take into account stats which are whenever supported
735 		 * on EF10. If some stat is not supported by current
736 		 * firmware variant or HW revision, it is guaranteed
737 		 * to be zero in mac_stats.
738 		 */
739 		stats->imissed =
740 			mac_stats[EFX_MAC_RX_NODESC_DROP_CNT] +
741 			mac_stats[EFX_MAC_PM_TRUNC_BB_OVERFLOW] +
742 			mac_stats[EFX_MAC_PM_DISCARD_BB_OVERFLOW] +
743 			mac_stats[EFX_MAC_PM_TRUNC_VFIFO_FULL] +
744 			mac_stats[EFX_MAC_PM_DISCARD_VFIFO_FULL] +
745 			mac_stats[EFX_MAC_PM_TRUNC_QBB] +
746 			mac_stats[EFX_MAC_PM_DISCARD_QBB] +
747 			mac_stats[EFX_MAC_PM_DISCARD_MAPPING] +
748 			mac_stats[EFX_MAC_RXDP_Q_DISABLED_PKTS] +
749 			mac_stats[EFX_MAC_RXDP_DI_DROPPED_PKTS];
750 		stats->ierrors =
751 			mac_stats[EFX_MAC_RX_FCS_ERRORS] +
752 			mac_stats[EFX_MAC_RX_ALIGN_ERRORS] +
753 			mac_stats[EFX_MAC_RX_JABBER_PKTS];
754 		/* no oerrors counters supported on EF10 */
755 
756 		if (!have_dp_rx_stats) {
757 			/* Exclude missed, errors and pauses from Rx packets */
758 			sfc_update_diff_stat(&port->ipackets,
759 				mac_stats[EFX_MAC_RX_PKTS] -
760 				mac_stats[EFX_MAC_RX_PAUSE_PKTS] -
761 				stats->imissed - stats->ierrors);
762 			stats->ipackets = port->ipackets;
763 			stats->ibytes = mac_stats[EFX_MAC_RX_OCTETS] -
764 				mac_stats[EFX_MAC_RX_PKTS] * RTE_ETHER_CRC_LEN;
765 		}
766 	}
767 
768 unlock:
769 	sfc_adapter_unlock(sa);
770 	SFC_ASSERT(ret >= 0);
771 	return -ret;
772 }
773 
774 static int
775 sfc_stats_reset(struct rte_eth_dev *dev)
776 {
777 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
778 	struct sfc_port *port = &sa->port;
779 	int rc;
780 
781 	sfc_adapter_lock(sa);
782 
783 	if (sa->state != SFC_ETHDEV_STARTED) {
784 		/*
785 		 * The operation cannot be done if port is not started; it
786 		 * will be scheduled to be done during the next port start
787 		 */
788 		port->mac_stats_reset_pending = B_TRUE;
789 		sfc_adapter_unlock(sa);
790 		return 0;
791 	}
792 
793 	rc = sfc_port_reset_mac_stats(sa);
794 	if (rc != 0)
795 		sfc_err(sa, "failed to reset statistics (rc = %d)", rc);
796 
797 	sfc_sw_xstats_reset(sa);
798 
799 	sfc_adapter_unlock(sa);
800 
801 	SFC_ASSERT(rc >= 0);
802 	return -rc;
803 }
804 
805 static unsigned int
806 sfc_xstats_get_nb_supported(struct sfc_adapter *sa)
807 {
808 	struct sfc_port *port = &sa->port;
809 	unsigned int nb_supported;
810 
811 	sfc_adapter_lock(sa);
812 	nb_supported = port->mac_stats_nb_supported +
813 		       sfc_sw_xstats_get_nb_supported(sa);
814 	sfc_adapter_unlock(sa);
815 
816 	return nb_supported;
817 }
818 
819 static int
820 sfc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
821 	       unsigned int xstats_count)
822 {
823 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
824 	unsigned int nb_written = 0;
825 	unsigned int nb_supported = 0;
826 	int rc;
827 
828 	if (unlikely(xstats == NULL))
829 		return sfc_xstats_get_nb_supported(sa);
830 
831 	rc = sfc_port_get_mac_stats(sa, xstats, xstats_count, &nb_written);
832 	if (rc < 0)
833 		return rc;
834 
835 	nb_supported = rc;
836 	sfc_sw_xstats_get_vals(sa, xstats, xstats_count, &nb_written,
837 			       &nb_supported);
838 
839 	return nb_supported;
840 }
841 
842 static int
843 sfc_xstats_get_names(struct rte_eth_dev *dev,
844 		     struct rte_eth_xstat_name *xstats_names,
845 		     unsigned int xstats_count)
846 {
847 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
848 	struct sfc_port *port = &sa->port;
849 	unsigned int i;
850 	unsigned int nstats = 0;
851 	unsigned int nb_written = 0;
852 	int ret;
853 
854 	if (unlikely(xstats_names == NULL))
855 		return sfc_xstats_get_nb_supported(sa);
856 
857 	for (i = 0; i < EFX_MAC_NSTATS; ++i) {
858 		if (EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask, i)) {
859 			if (nstats < xstats_count) {
860 				strlcpy(xstats_names[nstats].name,
861 					efx_mac_stat_name(sa->nic, i),
862 					sizeof(xstats_names[0].name));
863 				nb_written++;
864 			}
865 			nstats++;
866 		}
867 	}
868 
869 	ret = sfc_sw_xstats_get_names(sa, xstats_names, xstats_count,
870 				      &nb_written, &nstats);
871 	if (ret != 0) {
872 		SFC_ASSERT(ret < 0);
873 		return ret;
874 	}
875 
876 	return nstats;
877 }
878 
879 static int
880 sfc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
881 		     uint64_t *values, unsigned int n)
882 {
883 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
884 	struct sfc_port *port = &sa->port;
885 	unsigned int nb_supported;
886 	unsigned int i;
887 	int rc;
888 
889 	if (unlikely(ids == NULL || values == NULL))
890 		return -EINVAL;
891 
892 	/*
893 	 * Values array could be filled in nonsequential order. Fill values with
894 	 * constant indicating invalid ID first.
895 	 */
896 	for (i = 0; i < n; i++)
897 		values[i] = SFC_XSTAT_ID_INVALID_VAL;
898 
899 	rc = sfc_port_get_mac_stats_by_id(sa, ids, values, n);
900 	if (rc != 0)
901 		return rc;
902 
903 	nb_supported = port->mac_stats_nb_supported;
904 	sfc_sw_xstats_get_vals_by_id(sa, ids, values, n, &nb_supported);
905 
906 	/* Return number of written stats before invalid ID is encountered. */
907 	for (i = 0; i < n; i++) {
908 		if (values[i] == SFC_XSTAT_ID_INVALID_VAL)
909 			return i;
910 	}
911 
912 	return n;
913 }
914 
915 static int
916 sfc_xstats_get_names_by_id(struct rte_eth_dev *dev,
917 			   const uint64_t *ids,
918 			   struct rte_eth_xstat_name *xstats_names,
919 			   unsigned int size)
920 {
921 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
922 	struct sfc_port *port = &sa->port;
923 	unsigned int nb_supported;
924 	unsigned int i;
925 	int ret;
926 
927 	if (unlikely(xstats_names == NULL && ids != NULL) ||
928 	    unlikely(xstats_names != NULL && ids == NULL))
929 		return -EINVAL;
930 
931 	if (unlikely(xstats_names == NULL && ids == NULL))
932 		return sfc_xstats_get_nb_supported(sa);
933 
934 	/*
935 	 * Names array could be filled in nonsequential order. Fill names with
936 	 * string indicating invalid ID first.
937 	 */
938 	for (i = 0; i < size; i++)
939 		xstats_names[i].name[0] = SFC_XSTAT_ID_INVALID_NAME;
940 
941 	sfc_adapter_lock(sa);
942 
943 	SFC_ASSERT(port->mac_stats_nb_supported <=
944 		   RTE_DIM(port->mac_stats_by_id));
945 
946 	for (i = 0; i < size; i++) {
947 		if (ids[i] < port->mac_stats_nb_supported) {
948 			strlcpy(xstats_names[i].name,
949 				efx_mac_stat_name(sa->nic,
950 						 port->mac_stats_by_id[ids[i]]),
951 				sizeof(xstats_names[0].name));
952 		}
953 	}
954 
955 	nb_supported = port->mac_stats_nb_supported;
956 
957 	sfc_adapter_unlock(sa);
958 
959 	ret = sfc_sw_xstats_get_names_by_id(sa, ids, xstats_names, size,
960 					    &nb_supported);
961 	if (ret != 0) {
962 		SFC_ASSERT(ret < 0);
963 		return ret;
964 	}
965 
966 	/* Return number of written names before invalid ID is encountered. */
967 	for (i = 0; i < size; i++) {
968 		if (xstats_names[i].name[0] == SFC_XSTAT_ID_INVALID_NAME)
969 			return i;
970 	}
971 
972 	return size;
973 }
974 
975 static int
976 sfc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
977 {
978 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
979 	unsigned int wanted_fc, link_fc;
980 
981 	memset(fc_conf, 0, sizeof(*fc_conf));
982 
983 	sfc_adapter_lock(sa);
984 
985 	if (sa->state == SFC_ETHDEV_STARTED)
986 		efx_mac_fcntl_get(sa->nic, &wanted_fc, &link_fc);
987 	else
988 		link_fc = sa->port.flow_ctrl;
989 
990 	switch (link_fc) {
991 	case 0:
992 		fc_conf->mode = RTE_ETH_FC_NONE;
993 		break;
994 	case EFX_FCNTL_RESPOND:
995 		fc_conf->mode = RTE_ETH_FC_RX_PAUSE;
996 		break;
997 	case EFX_FCNTL_GENERATE:
998 		fc_conf->mode = RTE_ETH_FC_TX_PAUSE;
999 		break;
1000 	case (EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE):
1001 		fc_conf->mode = RTE_ETH_FC_FULL;
1002 		break;
1003 	default:
1004 		sfc_err(sa, "%s: unexpected flow control value %#x",
1005 			__func__, link_fc);
1006 	}
1007 
1008 	fc_conf->autoneg = sa->port.flow_ctrl_autoneg;
1009 
1010 	sfc_adapter_unlock(sa);
1011 
1012 	return 0;
1013 }
1014 
1015 static int
1016 sfc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
1017 {
1018 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1019 	struct sfc_port *port = &sa->port;
1020 	unsigned int fcntl;
1021 	int rc;
1022 
1023 	if (fc_conf->high_water != 0 || fc_conf->low_water != 0 ||
1024 	    fc_conf->pause_time != 0 || fc_conf->send_xon != 0 ||
1025 	    fc_conf->mac_ctrl_frame_fwd != 0) {
1026 		sfc_err(sa, "unsupported flow control settings specified");
1027 		rc = EINVAL;
1028 		goto fail_inval;
1029 	}
1030 
1031 	switch (fc_conf->mode) {
1032 	case RTE_ETH_FC_NONE:
1033 		fcntl = 0;
1034 		break;
1035 	case RTE_ETH_FC_RX_PAUSE:
1036 		fcntl = EFX_FCNTL_RESPOND;
1037 		break;
1038 	case RTE_ETH_FC_TX_PAUSE:
1039 		fcntl = EFX_FCNTL_GENERATE;
1040 		break;
1041 	case RTE_ETH_FC_FULL:
1042 		fcntl = EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE;
1043 		break;
1044 	default:
1045 		rc = EINVAL;
1046 		goto fail_inval;
1047 	}
1048 
1049 	sfc_adapter_lock(sa);
1050 
1051 	if (sa->state == SFC_ETHDEV_STARTED) {
1052 		rc = efx_mac_fcntl_set(sa->nic, fcntl, fc_conf->autoneg);
1053 		if (rc != 0)
1054 			goto fail_mac_fcntl_set;
1055 	}
1056 
1057 	port->flow_ctrl = fcntl;
1058 	port->flow_ctrl_autoneg = fc_conf->autoneg;
1059 
1060 	sfc_adapter_unlock(sa);
1061 
1062 	return 0;
1063 
1064 fail_mac_fcntl_set:
1065 	sfc_adapter_unlock(sa);
1066 fail_inval:
1067 	SFC_ASSERT(rc > 0);
1068 	return -rc;
1069 }
1070 
1071 static int
1072 sfc_check_scatter_on_all_rx_queues(struct sfc_adapter *sa, size_t pdu)
1073 {
1074 	struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
1075 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
1076 	boolean_t scatter_enabled;
1077 	const char *error;
1078 	unsigned int i;
1079 
1080 	for (i = 0; i < sas->rxq_count; i++) {
1081 		if ((sas->rxq_info[i].state & SFC_RXQ_INITIALIZED) == 0)
1082 			continue;
1083 
1084 		scatter_enabled = (sas->rxq_info[i].type_flags &
1085 				   EFX_RXQ_FLAG_SCATTER);
1086 
1087 		if (!sfc_rx_check_scatter(pdu, sa->rxq_ctrl[i].buf_size,
1088 					  encp->enc_rx_prefix_size,
1089 					  scatter_enabled,
1090 					  encp->enc_rx_scatter_max, &error)) {
1091 			sfc_err(sa, "MTU check for RxQ %u failed: %s", i,
1092 				error);
1093 			return EINVAL;
1094 		}
1095 	}
1096 
1097 	return 0;
1098 }
1099 
1100 static int
1101 sfc_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
1102 {
1103 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1104 	size_t pdu = EFX_MAC_PDU(mtu);
1105 	size_t old_pdu;
1106 	int rc;
1107 
1108 	sfc_log_init(sa, "mtu=%u", mtu);
1109 
1110 	rc = EINVAL;
1111 	if (pdu < EFX_MAC_PDU_MIN) {
1112 		sfc_err(sa, "too small MTU %u (PDU size %u less than min %u)",
1113 			(unsigned int)mtu, (unsigned int)pdu,
1114 			EFX_MAC_PDU_MIN);
1115 		goto fail_inval;
1116 	}
1117 	if (pdu > EFX_MAC_PDU_MAX) {
1118 		sfc_err(sa, "too big MTU %u (PDU size %u greater than max %u)",
1119 			(unsigned int)mtu, (unsigned int)pdu,
1120 			(unsigned int)EFX_MAC_PDU_MAX);
1121 		goto fail_inval;
1122 	}
1123 
1124 	sfc_adapter_lock(sa);
1125 
1126 	rc = sfc_check_scatter_on_all_rx_queues(sa, pdu);
1127 	if (rc != 0)
1128 		goto fail_check_scatter;
1129 
1130 	if (pdu != sa->port.pdu) {
1131 		if (sa->state == SFC_ETHDEV_STARTED) {
1132 			sfc_stop(sa);
1133 
1134 			old_pdu = sa->port.pdu;
1135 			sa->port.pdu = pdu;
1136 			rc = sfc_start(sa);
1137 			if (rc != 0)
1138 				goto fail_start;
1139 		} else {
1140 			sa->port.pdu = pdu;
1141 		}
1142 	}
1143 
1144 	sfc_adapter_unlock(sa);
1145 
1146 	sfc_log_init(sa, "done");
1147 	return 0;
1148 
1149 fail_start:
1150 	sa->port.pdu = old_pdu;
1151 	if (sfc_start(sa) != 0)
1152 		sfc_err(sa, "cannot start with neither new (%u) nor old (%u) "
1153 			"PDU max size - port is stopped",
1154 			(unsigned int)pdu, (unsigned int)old_pdu);
1155 
1156 fail_check_scatter:
1157 	sfc_adapter_unlock(sa);
1158 
1159 fail_inval:
1160 	sfc_log_init(sa, "failed %d", rc);
1161 	SFC_ASSERT(rc > 0);
1162 	return -rc;
1163 }
1164 static int
1165 sfc_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1166 {
1167 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1168 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
1169 	struct sfc_port *port = &sa->port;
1170 	struct rte_ether_addr *old_addr = &dev->data->mac_addrs[0];
1171 	int rc = 0;
1172 
1173 	sfc_adapter_lock(sa);
1174 
1175 	if (rte_is_same_ether_addr(mac_addr, &port->default_mac_addr))
1176 		goto unlock;
1177 
1178 	/*
1179 	 * Copy the address to the device private data so that
1180 	 * it could be recalled in the case of adapter restart.
1181 	 */
1182 	rte_ether_addr_copy(mac_addr, &port->default_mac_addr);
1183 
1184 	/*
1185 	 * Neither of the two following checks can return
1186 	 * an error. The new MAC address is preserved in
1187 	 * the device private data and can be activated
1188 	 * on the next port start if the user prevents
1189 	 * isolated mode from being enabled.
1190 	 */
1191 	if (sfc_sa2shared(sa)->isolated) {
1192 		sfc_warn(sa, "isolated mode is active on the port");
1193 		sfc_warn(sa, "will not set MAC address");
1194 		goto unlock;
1195 	}
1196 
1197 	if (sa->state != SFC_ETHDEV_STARTED) {
1198 		sfc_notice(sa, "the port is not started");
1199 		sfc_notice(sa, "the new MAC address will be set on port start");
1200 
1201 		goto unlock;
1202 	}
1203 
1204 	if (encp->enc_allow_set_mac_with_installed_filters) {
1205 		rc = efx_mac_addr_set(sa->nic, mac_addr->addr_bytes);
1206 		if (rc != 0) {
1207 			sfc_err(sa, "cannot set MAC address (rc = %u)", rc);
1208 			goto unlock;
1209 		}
1210 
1211 		/*
1212 		 * Changing the MAC address by means of MCDI request
1213 		 * has no effect on received traffic, therefore
1214 		 * we also need to update unicast filters
1215 		 */
1216 		rc = sfc_set_rx_mode_unchecked(sa);
1217 		if (rc != 0) {
1218 			sfc_err(sa, "cannot set filter (rc = %u)", rc);
1219 			/* Rollback the old address */
1220 			(void)efx_mac_addr_set(sa->nic, old_addr->addr_bytes);
1221 			(void)sfc_set_rx_mode_unchecked(sa);
1222 		}
1223 	} else {
1224 		sfc_warn(sa, "cannot set MAC address with filters installed");
1225 		sfc_warn(sa, "adapter will be restarted to pick the new MAC");
1226 		sfc_warn(sa, "(some traffic may be dropped)");
1227 
1228 		/*
1229 		 * Since setting MAC address with filters installed is not
1230 		 * allowed on the adapter, the new MAC address will be set
1231 		 * by means of adapter restart. sfc_start() shall retrieve
1232 		 * the new address from the device private data and set it.
1233 		 */
1234 		sfc_stop(sa);
1235 		rc = sfc_start(sa);
1236 		if (rc != 0)
1237 			sfc_err(sa, "cannot restart adapter (rc = %u)", rc);
1238 	}
1239 
1240 unlock:
1241 	if (rc != 0)
1242 		rte_ether_addr_copy(old_addr, &port->default_mac_addr);
1243 
1244 	sfc_adapter_unlock(sa);
1245 
1246 	SFC_ASSERT(rc >= 0);
1247 	return -rc;
1248 }
1249 
1250 
1251 static int
1252 sfc_set_mc_addr_list(struct rte_eth_dev *dev,
1253 		struct rte_ether_addr *mc_addr_set, uint32_t nb_mc_addr)
1254 {
1255 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1256 	struct sfc_port *port = &sa->port;
1257 	uint8_t *mc_addrs = port->mcast_addrs;
1258 	int rc;
1259 	unsigned int i;
1260 
1261 	if (sfc_sa2shared(sa)->isolated) {
1262 		sfc_err(sa, "isolated mode is active on the port");
1263 		sfc_err(sa, "will not set multicast address list");
1264 		return -ENOTSUP;
1265 	}
1266 
1267 	if (mc_addrs == NULL)
1268 		return -ENOBUFS;
1269 
1270 	if (nb_mc_addr > port->max_mcast_addrs) {
1271 		sfc_err(sa, "too many multicast addresses: %u > %u",
1272 			 nb_mc_addr, port->max_mcast_addrs);
1273 		return -EINVAL;
1274 	}
1275 
1276 	for (i = 0; i < nb_mc_addr; ++i) {
1277 		rte_memcpy(mc_addrs, mc_addr_set[i].addr_bytes,
1278 				 EFX_MAC_ADDR_LEN);
1279 		mc_addrs += EFX_MAC_ADDR_LEN;
1280 	}
1281 
1282 	port->nb_mcast_addrs = nb_mc_addr;
1283 
1284 	if (sa->state != SFC_ETHDEV_STARTED)
1285 		return 0;
1286 
1287 	rc = efx_mac_multicast_list_set(sa->nic, port->mcast_addrs,
1288 					port->nb_mcast_addrs);
1289 	if (rc != 0)
1290 		sfc_err(sa, "cannot set multicast address list (rc = %u)", rc);
1291 
1292 	SFC_ASSERT(rc >= 0);
1293 	return -rc;
1294 }
1295 
1296 /*
1297  * The function is used by the secondary process as well. It must not
1298  * use any process-local pointers from the adapter data.
1299  */
1300 static void
1301 sfc_rx_queue_info_get(struct rte_eth_dev *dev, uint16_t ethdev_qid,
1302 		      struct rte_eth_rxq_info *qinfo)
1303 {
1304 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1305 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1306 	struct sfc_rxq_info *rxq_info;
1307 
1308 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1309 
1310 	qinfo->mp = rxq_info->refill_mb_pool;
1311 	qinfo->conf.rx_free_thresh = rxq_info->refill_threshold;
1312 	qinfo->conf.rx_drop_en = 1;
1313 	qinfo->conf.rx_deferred_start = rxq_info->deferred_start;
1314 	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
1315 	if (rxq_info->type_flags & EFX_RXQ_FLAG_SCATTER) {
1316 		qinfo->conf.offloads |= RTE_ETH_RX_OFFLOAD_SCATTER;
1317 		qinfo->scattered_rx = 1;
1318 	}
1319 	qinfo->nb_desc = rxq_info->entries;
1320 }
1321 
1322 /*
1323  * The function is used by the secondary process as well. It must not
1324  * use any process-local pointers from the adapter data.
1325  */
1326 static void
1327 sfc_tx_queue_info_get(struct rte_eth_dev *dev, uint16_t ethdev_qid,
1328 		      struct rte_eth_txq_info *qinfo)
1329 {
1330 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1331 	struct sfc_txq_info *txq_info;
1332 
1333 	SFC_ASSERT(ethdev_qid < sas->ethdev_txq_count);
1334 
1335 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1336 
1337 	memset(qinfo, 0, sizeof(*qinfo));
1338 
1339 	qinfo->conf.offloads = txq_info->offloads;
1340 	qinfo->conf.tx_free_thresh = txq_info->free_thresh;
1341 	qinfo->conf.tx_deferred_start = txq_info->deferred_start;
1342 	qinfo->nb_desc = txq_info->entries;
1343 }
1344 
1345 /*
1346  * The function is used by the secondary process as well. It must not
1347  * use any process-local pointers from the adapter data.
1348  */
1349 static uint32_t
1350 sfc_rx_queue_count(void *rx_queue)
1351 {
1352 	struct sfc_dp_rxq *dp_rxq = rx_queue;
1353 	const struct sfc_dp_rx *dp_rx;
1354 	struct sfc_rxq_info *rxq_info;
1355 
1356 	dp_rx = sfc_dp_rx_by_dp_rxq(dp_rxq);
1357 	rxq_info = sfc_rxq_info_by_dp_rxq(dp_rxq);
1358 
1359 	if ((rxq_info->state & SFC_RXQ_STARTED) == 0)
1360 		return 0;
1361 
1362 	return dp_rx->qdesc_npending(dp_rxq);
1363 }
1364 
1365 /*
1366  * The function is used by the secondary process as well. It must not
1367  * use any process-local pointers from the adapter data.
1368  */
1369 static int
1370 sfc_rx_descriptor_status(void *queue, uint16_t offset)
1371 {
1372 	struct sfc_dp_rxq *dp_rxq = queue;
1373 	const struct sfc_dp_rx *dp_rx;
1374 
1375 	dp_rx = sfc_dp_rx_by_dp_rxq(dp_rxq);
1376 
1377 	return dp_rx->qdesc_status(dp_rxq, offset);
1378 }
1379 
1380 /*
1381  * The function is used by the secondary process as well. It must not
1382  * use any process-local pointers from the adapter data.
1383  */
1384 static int
1385 sfc_tx_descriptor_status(void *queue, uint16_t offset)
1386 {
1387 	struct sfc_dp_txq *dp_txq = queue;
1388 	const struct sfc_dp_tx *dp_tx;
1389 
1390 	dp_tx = sfc_dp_tx_by_dp_txq(dp_txq);
1391 
1392 	return dp_tx->qdesc_status(dp_txq, offset);
1393 }
1394 
1395 static int
1396 sfc_rx_queue_start(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1397 {
1398 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1399 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1400 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1401 	struct sfc_rxq_info *rxq_info;
1402 	sfc_sw_index_t sw_index;
1403 	int rc;
1404 
1405 	sfc_log_init(sa, "RxQ=%u", ethdev_qid);
1406 
1407 	sfc_adapter_lock(sa);
1408 
1409 	rc = EINVAL;
1410 	if (sa->state != SFC_ETHDEV_STARTED)
1411 		goto fail_not_started;
1412 
1413 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1414 	if (rxq_info->state != SFC_RXQ_INITIALIZED)
1415 		goto fail_not_setup;
1416 
1417 	sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
1418 	rc = sfc_rx_qstart(sa, sw_index);
1419 	if (rc != 0)
1420 		goto fail_rx_qstart;
1421 
1422 	rxq_info->deferred_started = B_TRUE;
1423 
1424 	sfc_adapter_unlock(sa);
1425 
1426 	return 0;
1427 
1428 fail_rx_qstart:
1429 fail_not_setup:
1430 fail_not_started:
1431 	sfc_adapter_unlock(sa);
1432 	SFC_ASSERT(rc > 0);
1433 	return -rc;
1434 }
1435 
1436 static int
1437 sfc_rx_queue_stop(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1438 {
1439 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1440 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1441 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1442 	struct sfc_rxq_info *rxq_info;
1443 	sfc_sw_index_t sw_index;
1444 
1445 	sfc_log_init(sa, "RxQ=%u", ethdev_qid);
1446 
1447 	sfc_adapter_lock(sa);
1448 
1449 	sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
1450 	sfc_rx_qstop(sa, sw_index);
1451 
1452 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1453 	rxq_info->deferred_started = B_FALSE;
1454 
1455 	sfc_adapter_unlock(sa);
1456 
1457 	return 0;
1458 }
1459 
1460 static int
1461 sfc_tx_queue_start(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1462 {
1463 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1464 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1465 	struct sfc_txq_info *txq_info;
1466 	sfc_sw_index_t sw_index;
1467 	int rc;
1468 
1469 	sfc_log_init(sa, "TxQ = %u", ethdev_qid);
1470 
1471 	sfc_adapter_lock(sa);
1472 
1473 	rc = EINVAL;
1474 	if (sa->state != SFC_ETHDEV_STARTED)
1475 		goto fail_not_started;
1476 
1477 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1478 	if (txq_info->state != SFC_TXQ_INITIALIZED)
1479 		goto fail_not_setup;
1480 
1481 	sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
1482 	rc = sfc_tx_qstart(sa, sw_index);
1483 	if (rc != 0)
1484 		goto fail_tx_qstart;
1485 
1486 	txq_info->deferred_started = B_TRUE;
1487 
1488 	sfc_adapter_unlock(sa);
1489 	return 0;
1490 
1491 fail_tx_qstart:
1492 
1493 fail_not_setup:
1494 fail_not_started:
1495 	sfc_adapter_unlock(sa);
1496 	SFC_ASSERT(rc > 0);
1497 	return -rc;
1498 }
1499 
1500 static int
1501 sfc_tx_queue_stop(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1502 {
1503 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1504 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1505 	struct sfc_txq_info *txq_info;
1506 	sfc_sw_index_t sw_index;
1507 
1508 	sfc_log_init(sa, "TxQ = %u", ethdev_qid);
1509 
1510 	sfc_adapter_lock(sa);
1511 
1512 	sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
1513 	sfc_tx_qstop(sa, sw_index);
1514 
1515 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1516 	txq_info->deferred_started = B_FALSE;
1517 
1518 	sfc_adapter_unlock(sa);
1519 	return 0;
1520 }
1521 
1522 static efx_tunnel_protocol_t
1523 sfc_tunnel_rte_type_to_efx_udp_proto(enum rte_eth_tunnel_type rte_type)
1524 {
1525 	switch (rte_type) {
1526 	case RTE_ETH_TUNNEL_TYPE_VXLAN:
1527 		return EFX_TUNNEL_PROTOCOL_VXLAN;
1528 	case RTE_ETH_TUNNEL_TYPE_GENEVE:
1529 		return EFX_TUNNEL_PROTOCOL_GENEVE;
1530 	default:
1531 		return EFX_TUNNEL_NPROTOS;
1532 	}
1533 }
1534 
1535 enum sfc_udp_tunnel_op_e {
1536 	SFC_UDP_TUNNEL_ADD_PORT,
1537 	SFC_UDP_TUNNEL_DEL_PORT,
1538 };
1539 
1540 static int
1541 sfc_dev_udp_tunnel_op(struct rte_eth_dev *dev,
1542 		      struct rte_eth_udp_tunnel *tunnel_udp,
1543 		      enum sfc_udp_tunnel_op_e op)
1544 {
1545 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1546 	efx_tunnel_protocol_t tunnel_proto;
1547 	int rc;
1548 
1549 	sfc_log_init(sa, "%s udp_port=%u prot_type=%u",
1550 		     (op == SFC_UDP_TUNNEL_ADD_PORT) ? "add" :
1551 		     (op == SFC_UDP_TUNNEL_DEL_PORT) ? "delete" : "unknown",
1552 		     tunnel_udp->udp_port, tunnel_udp->prot_type);
1553 
1554 	tunnel_proto =
1555 		sfc_tunnel_rte_type_to_efx_udp_proto(tunnel_udp->prot_type);
1556 	if (tunnel_proto >= EFX_TUNNEL_NPROTOS) {
1557 		rc = ENOTSUP;
1558 		goto fail_bad_proto;
1559 	}
1560 
1561 	sfc_adapter_lock(sa);
1562 
1563 	switch (op) {
1564 	case SFC_UDP_TUNNEL_ADD_PORT:
1565 		rc = efx_tunnel_config_udp_add(sa->nic,
1566 					       tunnel_udp->udp_port,
1567 					       tunnel_proto);
1568 		break;
1569 	case SFC_UDP_TUNNEL_DEL_PORT:
1570 		rc = efx_tunnel_config_udp_remove(sa->nic,
1571 						  tunnel_udp->udp_port,
1572 						  tunnel_proto);
1573 		break;
1574 	default:
1575 		rc = EINVAL;
1576 		goto fail_bad_op;
1577 	}
1578 
1579 	if (rc != 0)
1580 		goto fail_op;
1581 
1582 	if (sa->state == SFC_ETHDEV_STARTED) {
1583 		rc = efx_tunnel_reconfigure(sa->nic);
1584 		if (rc == EAGAIN) {
1585 			/*
1586 			 * Configuration is accepted by FW and MC reboot
1587 			 * is initiated to apply the changes. MC reboot
1588 			 * will be handled in a usual way (MC reboot
1589 			 * event on management event queue and adapter
1590 			 * restart).
1591 			 */
1592 			rc = 0;
1593 		} else if (rc != 0) {
1594 			goto fail_reconfigure;
1595 		}
1596 	}
1597 
1598 	sfc_adapter_unlock(sa);
1599 	return 0;
1600 
1601 fail_reconfigure:
1602 	/* Remove/restore entry since the change makes the trouble */
1603 	switch (op) {
1604 	case SFC_UDP_TUNNEL_ADD_PORT:
1605 		(void)efx_tunnel_config_udp_remove(sa->nic,
1606 						   tunnel_udp->udp_port,
1607 						   tunnel_proto);
1608 		break;
1609 	case SFC_UDP_TUNNEL_DEL_PORT:
1610 		(void)efx_tunnel_config_udp_add(sa->nic,
1611 						tunnel_udp->udp_port,
1612 						tunnel_proto);
1613 		break;
1614 	}
1615 
1616 fail_op:
1617 fail_bad_op:
1618 	sfc_adapter_unlock(sa);
1619 
1620 fail_bad_proto:
1621 	SFC_ASSERT(rc > 0);
1622 	return -rc;
1623 }
1624 
1625 static int
1626 sfc_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
1627 			    struct rte_eth_udp_tunnel *tunnel_udp)
1628 {
1629 	return sfc_dev_udp_tunnel_op(dev, tunnel_udp, SFC_UDP_TUNNEL_ADD_PORT);
1630 }
1631 
1632 static int
1633 sfc_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
1634 			    struct rte_eth_udp_tunnel *tunnel_udp)
1635 {
1636 	return sfc_dev_udp_tunnel_op(dev, tunnel_udp, SFC_UDP_TUNNEL_DEL_PORT);
1637 }
1638 
1639 /*
1640  * The function is used by the secondary process as well. It must not
1641  * use any process-local pointers from the adapter data.
1642  */
1643 static int
1644 sfc_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
1645 			  struct rte_eth_rss_conf *rss_conf)
1646 {
1647 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1648 	struct sfc_rss *rss = &sas->rss;
1649 
1650 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE)
1651 		return -ENOTSUP;
1652 
1653 	/*
1654 	 * Mapping of hash configuration between RTE and EFX is not one-to-one,
1655 	 * hence, conversion is done here to derive a correct set of RTE_ETH_RSS
1656 	 * flags which corresponds to the active EFX configuration stored
1657 	 * locally in 'sfc_adapter' and kept up-to-date
1658 	 */
1659 	rss_conf->rss_hf = sfc_rx_hf_efx_to_rte(rss, rss->hash_types);
1660 	rss_conf->rss_key_len = EFX_RSS_KEY_SIZE;
1661 	if (rss_conf->rss_key != NULL)
1662 		rte_memcpy(rss_conf->rss_key, rss->key, EFX_RSS_KEY_SIZE);
1663 
1664 	return 0;
1665 }
1666 
1667 static int
1668 sfc_dev_rss_hash_update(struct rte_eth_dev *dev,
1669 			struct rte_eth_rss_conf *rss_conf)
1670 {
1671 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1672 	struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
1673 	unsigned int efx_hash_types;
1674 	uint32_t contexts[] = {EFX_RSS_CONTEXT_DEFAULT, rss->dummy_rss_context};
1675 	unsigned int n_contexts;
1676 	unsigned int mode_i = 0;
1677 	unsigned int key_i = 0;
1678 	unsigned int i = 0;
1679 	int rc = 0;
1680 
1681 	n_contexts = rss->dummy_rss_context == EFX_RSS_CONTEXT_DEFAULT ? 1 : 2;
1682 
1683 	if (sfc_sa2shared(sa)->isolated)
1684 		return -ENOTSUP;
1685 
1686 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE) {
1687 		sfc_err(sa, "RSS is not available");
1688 		return -ENOTSUP;
1689 	}
1690 
1691 	if (rss->channels == 0) {
1692 		sfc_err(sa, "RSS is not configured");
1693 		return -EINVAL;
1694 	}
1695 
1696 	if ((rss_conf->rss_key != NULL) &&
1697 	    (rss_conf->rss_key_len != sizeof(rss->key))) {
1698 		sfc_err(sa, "RSS key size is wrong (should be %zu)",
1699 			sizeof(rss->key));
1700 		return -EINVAL;
1701 	}
1702 
1703 	sfc_adapter_lock(sa);
1704 
1705 	rc = sfc_rx_hf_rte_to_efx(sa, rss_conf->rss_hf, &efx_hash_types);
1706 	if (rc != 0)
1707 		goto fail_rx_hf_rte_to_efx;
1708 
1709 	for (mode_i = 0; mode_i < n_contexts; mode_i++) {
1710 		rc = efx_rx_scale_mode_set(sa->nic, contexts[mode_i],
1711 					   rss->hash_alg, efx_hash_types,
1712 					   B_TRUE);
1713 		if (rc != 0)
1714 			goto fail_scale_mode_set;
1715 	}
1716 
1717 	if (rss_conf->rss_key != NULL) {
1718 		if (sa->state == SFC_ETHDEV_STARTED) {
1719 			for (key_i = 0; key_i < n_contexts; key_i++) {
1720 				rc = efx_rx_scale_key_set(sa->nic,
1721 							  contexts[key_i],
1722 							  rss_conf->rss_key,
1723 							  sizeof(rss->key));
1724 				if (rc != 0)
1725 					goto fail_scale_key_set;
1726 			}
1727 		}
1728 
1729 		rte_memcpy(rss->key, rss_conf->rss_key, sizeof(rss->key));
1730 	}
1731 
1732 	rss->hash_types = efx_hash_types;
1733 
1734 	sfc_adapter_unlock(sa);
1735 
1736 	return 0;
1737 
1738 fail_scale_key_set:
1739 	for (i = 0; i < key_i; i++) {
1740 		if (efx_rx_scale_key_set(sa->nic, contexts[i], rss->key,
1741 					 sizeof(rss->key)) != 0)
1742 			sfc_err(sa, "failed to restore RSS key");
1743 	}
1744 
1745 fail_scale_mode_set:
1746 	for (i = 0; i < mode_i; i++) {
1747 		if (efx_rx_scale_mode_set(sa->nic, contexts[i],
1748 					  EFX_RX_HASHALG_TOEPLITZ,
1749 					  rss->hash_types, B_TRUE) != 0)
1750 			sfc_err(sa, "failed to restore RSS mode");
1751 	}
1752 
1753 fail_rx_hf_rte_to_efx:
1754 	sfc_adapter_unlock(sa);
1755 	return -rc;
1756 }
1757 
1758 /*
1759  * The function is used by the secondary process as well. It must not
1760  * use any process-local pointers from the adapter data.
1761  */
1762 static int
1763 sfc_dev_rss_reta_query(struct rte_eth_dev *dev,
1764 		       struct rte_eth_rss_reta_entry64 *reta_conf,
1765 		       uint16_t reta_size)
1766 {
1767 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1768 	struct sfc_rss *rss = &sas->rss;
1769 	int entry;
1770 
1771 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE || sas->isolated)
1772 		return -ENOTSUP;
1773 
1774 	if (rss->channels == 0)
1775 		return -EINVAL;
1776 
1777 	if (reta_size != EFX_RSS_TBL_SIZE)
1778 		return -EINVAL;
1779 
1780 	for (entry = 0; entry < reta_size; entry++) {
1781 		int grp = entry / RTE_ETH_RETA_GROUP_SIZE;
1782 		int grp_idx = entry % RTE_ETH_RETA_GROUP_SIZE;
1783 
1784 		if ((reta_conf[grp].mask >> grp_idx) & 1)
1785 			reta_conf[grp].reta[grp_idx] = rss->tbl[entry];
1786 	}
1787 
1788 	return 0;
1789 }
1790 
1791 static int
1792 sfc_dev_rss_reta_update(struct rte_eth_dev *dev,
1793 			struct rte_eth_rss_reta_entry64 *reta_conf,
1794 			uint16_t reta_size)
1795 {
1796 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1797 	struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
1798 	unsigned int *rss_tbl_new;
1799 	uint16_t entry;
1800 	int rc = 0;
1801 
1802 
1803 	if (sfc_sa2shared(sa)->isolated)
1804 		return -ENOTSUP;
1805 
1806 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE) {
1807 		sfc_err(sa, "RSS is not available");
1808 		return -ENOTSUP;
1809 	}
1810 
1811 	if (rss->channels == 0) {
1812 		sfc_err(sa, "RSS is not configured");
1813 		return -EINVAL;
1814 	}
1815 
1816 	if (reta_size != EFX_RSS_TBL_SIZE) {
1817 		sfc_err(sa, "RETA size is wrong (should be %u)",
1818 			EFX_RSS_TBL_SIZE);
1819 		return -EINVAL;
1820 	}
1821 
1822 	rss_tbl_new = rte_zmalloc("rss_tbl_new", sizeof(rss->tbl), 0);
1823 	if (rss_tbl_new == NULL)
1824 		return -ENOMEM;
1825 
1826 	sfc_adapter_lock(sa);
1827 
1828 	rte_memcpy(rss_tbl_new, rss->tbl, sizeof(rss->tbl));
1829 
1830 	for (entry = 0; entry < reta_size; entry++) {
1831 		int grp_idx = entry % RTE_ETH_RETA_GROUP_SIZE;
1832 		struct rte_eth_rss_reta_entry64 *grp;
1833 
1834 		grp = &reta_conf[entry / RTE_ETH_RETA_GROUP_SIZE];
1835 
1836 		if (grp->mask & (1ull << grp_idx)) {
1837 			if (grp->reta[grp_idx] >= rss->channels) {
1838 				rc = EINVAL;
1839 				goto bad_reta_entry;
1840 			}
1841 			rss_tbl_new[entry] = grp->reta[grp_idx];
1842 		}
1843 	}
1844 
1845 	if (sa->state == SFC_ETHDEV_STARTED) {
1846 		rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
1847 					  rss_tbl_new, EFX_RSS_TBL_SIZE);
1848 		if (rc != 0)
1849 			goto fail_scale_tbl_set;
1850 	}
1851 
1852 	rte_memcpy(rss->tbl, rss_tbl_new, sizeof(rss->tbl));
1853 
1854 fail_scale_tbl_set:
1855 bad_reta_entry:
1856 	sfc_adapter_unlock(sa);
1857 
1858 	rte_free(rss_tbl_new);
1859 
1860 	SFC_ASSERT(rc >= 0);
1861 	return -rc;
1862 }
1863 
1864 static int
1865 sfc_dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
1866 		     const struct rte_flow_ops **ops)
1867 {
1868 	*ops = &sfc_flow_ops;
1869 	return 0;
1870 }
1871 
1872 static int
1873 sfc_pool_ops_supported(struct rte_eth_dev *dev, const char *pool)
1874 {
1875 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1876 
1877 	/*
1878 	 * If Rx datapath does not provide callback to check mempool,
1879 	 * all pools are supported.
1880 	 */
1881 	if (sap->dp_rx->pool_ops_supported == NULL)
1882 		return 1;
1883 
1884 	return sap->dp_rx->pool_ops_supported(pool);
1885 }
1886 
1887 static int
1888 sfc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1889 {
1890 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1891 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1892 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1893 	struct sfc_rxq_info *rxq_info;
1894 
1895 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1896 
1897 	return sap->dp_rx->intr_enable(rxq_info->dp);
1898 }
1899 
1900 static int
1901 sfc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1902 {
1903 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1904 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1905 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1906 	struct sfc_rxq_info *rxq_info;
1907 
1908 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1909 
1910 	return sap->dp_rx->intr_disable(rxq_info->dp);
1911 }
1912 
1913 struct sfc_mport_journal_ctx {
1914 	struct sfc_adapter		*sa;
1915 	uint16_t			switch_domain_id;
1916 	uint32_t			mcdi_handle;
1917 	bool				controllers_assigned;
1918 	efx_pcie_interface_t		*controllers;
1919 	size_t				nb_controllers;
1920 };
1921 
1922 static int
1923 sfc_journal_ctx_add_controller(struct sfc_mport_journal_ctx *ctx,
1924 			       efx_pcie_interface_t intf)
1925 {
1926 	efx_pcie_interface_t *new_controllers;
1927 	size_t i, target;
1928 	size_t new_size;
1929 
1930 	if (ctx->controllers == NULL) {
1931 		ctx->controllers = rte_malloc("sfc_controller_mapping",
1932 					      sizeof(ctx->controllers[0]), 0);
1933 		if (ctx->controllers == NULL)
1934 			return ENOMEM;
1935 
1936 		ctx->controllers[0] = intf;
1937 		ctx->nb_controllers = 1;
1938 
1939 		return 0;
1940 	}
1941 
1942 	for (i = 0; i < ctx->nb_controllers; i++) {
1943 		if (ctx->controllers[i] == intf)
1944 			return 0;
1945 		if (ctx->controllers[i] > intf)
1946 			break;
1947 	}
1948 	target = i;
1949 
1950 	ctx->nb_controllers += 1;
1951 	new_size = ctx->nb_controllers * sizeof(ctx->controllers[0]);
1952 
1953 	new_controllers = rte_realloc(ctx->controllers, new_size, 0);
1954 	if (new_controllers == NULL) {
1955 		rte_free(ctx->controllers);
1956 		return ENOMEM;
1957 	}
1958 	ctx->controllers = new_controllers;
1959 
1960 	for (i = target + 1; i < ctx->nb_controllers; i++)
1961 		ctx->controllers[i] = ctx->controllers[i - 1];
1962 
1963 	ctx->controllers[target] = intf;
1964 
1965 	return 0;
1966 }
1967 
1968 static efx_rc_t
1969 sfc_process_mport_journal_entry(struct sfc_mport_journal_ctx *ctx,
1970 				efx_mport_desc_t *mport)
1971 {
1972 	struct sfc_mae_switch_port_request req;
1973 	efx_mport_sel_t entity_selector;
1974 	efx_mport_sel_t ethdev_mport;
1975 	uint16_t switch_port_id;
1976 	efx_rc_t efx_rc;
1977 	int rc;
1978 
1979 	sfc_dbg(ctx->sa,
1980 		"processing mport id %u (controller %u pf %u vf %u)",
1981 		mport->emd_id.id, mport->emd_vnic.ev_intf,
1982 		mport->emd_vnic.ev_pf, mport->emd_vnic.ev_vf);
1983 	efx_mae_mport_invalid(&ethdev_mport);
1984 
1985 	if (!ctx->controllers_assigned) {
1986 		rc = sfc_journal_ctx_add_controller(ctx,
1987 						    mport->emd_vnic.ev_intf);
1988 		if (rc != 0)
1989 			return rc;
1990 	}
1991 
1992 	/* Build Mport selector */
1993 	efx_rc = efx_mae_mport_by_pcie_mh_function(mport->emd_vnic.ev_intf,
1994 						mport->emd_vnic.ev_pf,
1995 						mport->emd_vnic.ev_vf,
1996 						&entity_selector);
1997 	if (efx_rc != 0) {
1998 		sfc_err(ctx->sa, "failed to build entity mport selector for c%upf%uvf%u",
1999 			mport->emd_vnic.ev_intf,
2000 			mport->emd_vnic.ev_pf,
2001 			mport->emd_vnic.ev_vf);
2002 		return efx_rc;
2003 	}
2004 
2005 	rc = sfc_mae_switch_port_id_by_entity(ctx->switch_domain_id,
2006 					      &entity_selector,
2007 					      SFC_MAE_SWITCH_PORT_REPRESENTOR,
2008 					      &switch_port_id);
2009 	switch (rc) {
2010 	case 0:
2011 		/* Already registered */
2012 		break;
2013 	case ENOENT:
2014 		/*
2015 		 * No representor has been created for this entity.
2016 		 * Create a dummy switch registry entry with an invalid ethdev
2017 		 * mport selector. When a corresponding representor is created,
2018 		 * this entry will be updated.
2019 		 */
2020 		req.type = SFC_MAE_SWITCH_PORT_REPRESENTOR;
2021 		req.entity_mportp = &entity_selector;
2022 		req.ethdev_mportp = &ethdev_mport;
2023 		req.ethdev_port_id = RTE_MAX_ETHPORTS;
2024 		req.port_data.repr.intf = mport->emd_vnic.ev_intf;
2025 		req.port_data.repr.pf = mport->emd_vnic.ev_pf;
2026 		req.port_data.repr.vf = mport->emd_vnic.ev_vf;
2027 
2028 		rc = sfc_mae_assign_switch_port(ctx->switch_domain_id,
2029 						&req, &switch_port_id);
2030 		if (rc != 0) {
2031 			sfc_err(ctx->sa,
2032 				"failed to assign MAE switch port for c%upf%uvf%u: %s",
2033 				mport->emd_vnic.ev_intf,
2034 				mport->emd_vnic.ev_pf,
2035 				mport->emd_vnic.ev_vf,
2036 				rte_strerror(rc));
2037 			return rc;
2038 		}
2039 		break;
2040 	default:
2041 		sfc_err(ctx->sa, "failed to find MAE switch port for c%upf%uvf%u: %s",
2042 			mport->emd_vnic.ev_intf,
2043 			mport->emd_vnic.ev_pf,
2044 			mport->emd_vnic.ev_vf,
2045 			rte_strerror(rc));
2046 		return rc;
2047 	}
2048 
2049 	return 0;
2050 }
2051 
2052 static efx_rc_t
2053 sfc_process_mport_journal_cb(void *data, efx_mport_desc_t *mport,
2054 			     size_t mport_len)
2055 {
2056 	struct sfc_mport_journal_ctx *ctx = data;
2057 
2058 	if (ctx == NULL || ctx->sa == NULL) {
2059 		sfc_err(ctx->sa, "received NULL context or SFC adapter");
2060 		return EINVAL;
2061 	}
2062 
2063 	if (mport_len != sizeof(*mport)) {
2064 		sfc_err(ctx->sa, "actual and expected mport buffer sizes differ");
2065 		return EINVAL;
2066 	}
2067 
2068 	SFC_ASSERT(sfc_adapter_is_locked(ctx->sa));
2069 
2070 	/*
2071 	 * If a zombie flag is set, it means the mport has been marked for
2072 	 * deletion and cannot be used for any new operations. The mport will
2073 	 * be destroyed completely once all references to it are released.
2074 	 */
2075 	if (mport->emd_zombie) {
2076 		sfc_dbg(ctx->sa, "mport is a zombie, skipping");
2077 		return 0;
2078 	}
2079 	if (mport->emd_type != EFX_MPORT_TYPE_VNIC) {
2080 		sfc_dbg(ctx->sa, "mport is not a VNIC, skipping");
2081 		return 0;
2082 	}
2083 	if (mport->emd_vnic.ev_client_type != EFX_MPORT_VNIC_CLIENT_FUNCTION) {
2084 		sfc_dbg(ctx->sa, "mport is not a function, skipping");
2085 		return 0;
2086 	}
2087 	if (mport->emd_vnic.ev_handle == ctx->mcdi_handle) {
2088 		sfc_dbg(ctx->sa, "mport is this driver instance, skipping");
2089 		return 0;
2090 	}
2091 
2092 	return sfc_process_mport_journal_entry(ctx, mport);
2093 }
2094 
2095 static int
2096 sfc_process_mport_journal(struct sfc_adapter *sa)
2097 {
2098 	struct sfc_mport_journal_ctx ctx;
2099 	const efx_pcie_interface_t *controllers;
2100 	size_t nb_controllers;
2101 	efx_rc_t efx_rc;
2102 	int rc;
2103 
2104 	memset(&ctx, 0, sizeof(ctx));
2105 	ctx.sa = sa;
2106 	ctx.switch_domain_id = sa->mae.switch_domain_id;
2107 
2108 	efx_rc = efx_mcdi_get_own_client_handle(sa->nic, &ctx.mcdi_handle);
2109 	if (efx_rc != 0) {
2110 		sfc_err(sa, "failed to get own MCDI handle");
2111 		SFC_ASSERT(efx_rc > 0);
2112 		return efx_rc;
2113 	}
2114 
2115 	rc = sfc_mae_switch_domain_controllers(ctx.switch_domain_id,
2116 					       &controllers, &nb_controllers);
2117 	if (rc != 0) {
2118 		sfc_err(sa, "failed to get controller mapping");
2119 		return rc;
2120 	}
2121 
2122 	ctx.controllers_assigned = controllers != NULL;
2123 	ctx.controllers = NULL;
2124 	ctx.nb_controllers = 0;
2125 
2126 	efx_rc = efx_mae_read_mport_journal(sa->nic,
2127 					    sfc_process_mport_journal_cb, &ctx);
2128 	if (efx_rc != 0) {
2129 		sfc_err(sa, "failed to process MAE mport journal");
2130 		SFC_ASSERT(efx_rc > 0);
2131 		return efx_rc;
2132 	}
2133 
2134 	if (controllers == NULL) {
2135 		rc = sfc_mae_switch_domain_map_controllers(ctx.switch_domain_id,
2136 							   ctx.controllers,
2137 							   ctx.nb_controllers);
2138 		if (rc != 0)
2139 			return rc;
2140 	}
2141 
2142 	return 0;
2143 }
2144 
2145 static void
2146 sfc_count_representors_cb(enum sfc_mae_switch_port_type type,
2147 			  const efx_mport_sel_t *ethdev_mportp __rte_unused,
2148 			  uint16_t ethdev_port_id __rte_unused,
2149 			  const efx_mport_sel_t *entity_mportp __rte_unused,
2150 			  uint16_t switch_port_id __rte_unused,
2151 			  union sfc_mae_switch_port_data *port_datap
2152 				__rte_unused,
2153 			  void *user_datap)
2154 {
2155 	int *counter = user_datap;
2156 
2157 	SFC_ASSERT(counter != NULL);
2158 
2159 	if (type == SFC_MAE_SWITCH_PORT_REPRESENTOR)
2160 		(*counter)++;
2161 }
2162 
2163 struct sfc_get_representors_ctx {
2164 	struct rte_eth_representor_info	*info;
2165 	struct sfc_adapter		*sa;
2166 	uint16_t			switch_domain_id;
2167 	const efx_pcie_interface_t	*controllers;
2168 	size_t				nb_controllers;
2169 };
2170 
2171 static void
2172 sfc_get_representors_cb(enum sfc_mae_switch_port_type type,
2173 			const efx_mport_sel_t *ethdev_mportp __rte_unused,
2174 			uint16_t ethdev_port_id __rte_unused,
2175 			const efx_mport_sel_t *entity_mportp __rte_unused,
2176 			uint16_t switch_port_id,
2177 			union sfc_mae_switch_port_data *port_datap,
2178 			void *user_datap)
2179 {
2180 	struct sfc_get_representors_ctx *ctx = user_datap;
2181 	struct rte_eth_representor_range *range;
2182 	int ret;
2183 	int rc;
2184 
2185 	SFC_ASSERT(ctx != NULL);
2186 	SFC_ASSERT(ctx->info != NULL);
2187 	SFC_ASSERT(ctx->sa != NULL);
2188 
2189 	if (type != SFC_MAE_SWITCH_PORT_REPRESENTOR) {
2190 		sfc_dbg(ctx->sa, "not a representor, skipping");
2191 		return;
2192 	}
2193 	if (ctx->info->nb_ranges >= ctx->info->nb_ranges_alloc) {
2194 		sfc_dbg(ctx->sa, "info structure is full already");
2195 		return;
2196 	}
2197 
2198 	range = &ctx->info->ranges[ctx->info->nb_ranges];
2199 	rc = sfc_mae_switch_controller_from_mapping(ctx->controllers,
2200 						    ctx->nb_controllers,
2201 						    port_datap->repr.intf,
2202 						    &range->controller);
2203 	if (rc != 0) {
2204 		sfc_err(ctx->sa, "invalid representor controller: %d",
2205 			port_datap->repr.intf);
2206 		range->controller = -1;
2207 	}
2208 	range->pf = port_datap->repr.pf;
2209 	range->id_base = switch_port_id;
2210 	range->id_end = switch_port_id;
2211 
2212 	if (port_datap->repr.vf != EFX_PCI_VF_INVALID) {
2213 		range->type = RTE_ETH_REPRESENTOR_VF;
2214 		range->vf = port_datap->repr.vf;
2215 		ret = snprintf(range->name, RTE_DEV_NAME_MAX_LEN,
2216 			       "c%dpf%dvf%d", range->controller, range->pf,
2217 			       range->vf);
2218 	} else {
2219 		range->type = RTE_ETH_REPRESENTOR_PF;
2220 		ret = snprintf(range->name, RTE_DEV_NAME_MAX_LEN,
2221 			 "c%dpf%d", range->controller, range->pf);
2222 	}
2223 	if (ret >= RTE_DEV_NAME_MAX_LEN) {
2224 		sfc_err(ctx->sa, "representor name has been truncated: %s",
2225 			range->name);
2226 	}
2227 
2228 	ctx->info->nb_ranges++;
2229 }
2230 
2231 static int
2232 sfc_representor_info_get(struct rte_eth_dev *dev,
2233 			 struct rte_eth_representor_info *info)
2234 {
2235 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2236 	struct sfc_get_representors_ctx get_repr_ctx;
2237 	const efx_nic_cfg_t *nic_cfg;
2238 	uint16_t switch_domain_id;
2239 	uint32_t nb_repr;
2240 	int controller;
2241 	int rc;
2242 
2243 	sfc_adapter_lock(sa);
2244 
2245 	if (sa->mae.status != SFC_MAE_STATUS_ADMIN) {
2246 		sfc_adapter_unlock(sa);
2247 		return -ENOTSUP;
2248 	}
2249 
2250 	rc = sfc_process_mport_journal(sa);
2251 	if (rc != 0) {
2252 		sfc_adapter_unlock(sa);
2253 		SFC_ASSERT(rc > 0);
2254 		return -rc;
2255 	}
2256 
2257 	switch_domain_id = sa->mae.switch_domain_id;
2258 
2259 	nb_repr = 0;
2260 	rc = sfc_mae_switch_ports_iterate(switch_domain_id,
2261 					  sfc_count_representors_cb,
2262 					  &nb_repr);
2263 	if (rc != 0) {
2264 		sfc_adapter_unlock(sa);
2265 		SFC_ASSERT(rc > 0);
2266 		return -rc;
2267 	}
2268 
2269 	if (info == NULL) {
2270 		sfc_adapter_unlock(sa);
2271 		return nb_repr;
2272 	}
2273 
2274 	rc = sfc_mae_switch_domain_controllers(switch_domain_id,
2275 					       &get_repr_ctx.controllers,
2276 					       &get_repr_ctx.nb_controllers);
2277 	if (rc != 0) {
2278 		sfc_adapter_unlock(sa);
2279 		SFC_ASSERT(rc > 0);
2280 		return -rc;
2281 	}
2282 
2283 	nic_cfg = efx_nic_cfg_get(sa->nic);
2284 
2285 	rc = sfc_mae_switch_domain_get_controller(switch_domain_id,
2286 						  nic_cfg->enc_intf,
2287 						  &controller);
2288 	if (rc != 0) {
2289 		sfc_err(sa, "invalid controller: %d", nic_cfg->enc_intf);
2290 		controller = -1;
2291 	}
2292 
2293 	info->controller = controller;
2294 	info->pf = nic_cfg->enc_pf;
2295 
2296 	get_repr_ctx.info = info;
2297 	get_repr_ctx.sa = sa;
2298 	get_repr_ctx.switch_domain_id = switch_domain_id;
2299 	rc = sfc_mae_switch_ports_iterate(switch_domain_id,
2300 					  sfc_get_representors_cb,
2301 					  &get_repr_ctx);
2302 	if (rc != 0) {
2303 		sfc_adapter_unlock(sa);
2304 		SFC_ASSERT(rc > 0);
2305 		return -rc;
2306 	}
2307 
2308 	sfc_adapter_unlock(sa);
2309 	return nb_repr;
2310 }
2311 
2312 static int
2313 sfc_rx_metadata_negotiate(struct rte_eth_dev *dev, uint64_t *features)
2314 {
2315 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2316 	uint64_t supported = 0;
2317 
2318 	sfc_adapter_lock(sa);
2319 
2320 	if ((sa->priv.dp_rx->features & SFC_DP_RX_FEAT_FLOW_FLAG) != 0)
2321 		supported |= RTE_ETH_RX_METADATA_USER_FLAG;
2322 
2323 	if ((sa->priv.dp_rx->features & SFC_DP_RX_FEAT_FLOW_MARK) != 0)
2324 		supported |= RTE_ETH_RX_METADATA_USER_MARK;
2325 
2326 	if (sfc_flow_tunnel_is_supported(sa))
2327 		supported |= RTE_ETH_RX_METADATA_TUNNEL_ID;
2328 
2329 	sa->negotiated_rx_metadata = supported & *features;
2330 	*features = sa->negotiated_rx_metadata;
2331 
2332 	sfc_adapter_unlock(sa);
2333 
2334 	return 0;
2335 }
2336 
2337 static const struct eth_dev_ops sfc_eth_dev_ops = {
2338 	.dev_configure			= sfc_dev_configure,
2339 	.dev_start			= sfc_dev_start,
2340 	.dev_stop			= sfc_dev_stop,
2341 	.dev_set_link_up		= sfc_dev_set_link_up,
2342 	.dev_set_link_down		= sfc_dev_set_link_down,
2343 	.dev_close			= sfc_dev_close,
2344 	.promiscuous_enable		= sfc_dev_promisc_enable,
2345 	.promiscuous_disable		= sfc_dev_promisc_disable,
2346 	.allmulticast_enable		= sfc_dev_allmulti_enable,
2347 	.allmulticast_disable		= sfc_dev_allmulti_disable,
2348 	.link_update			= sfc_dev_link_update,
2349 	.stats_get			= sfc_stats_get,
2350 	.stats_reset			= sfc_stats_reset,
2351 	.xstats_get			= sfc_xstats_get,
2352 	.xstats_reset			= sfc_stats_reset,
2353 	.xstats_get_names		= sfc_xstats_get_names,
2354 	.dev_infos_get			= sfc_dev_infos_get,
2355 	.dev_supported_ptypes_get	= sfc_dev_supported_ptypes_get,
2356 	.mtu_set			= sfc_dev_set_mtu,
2357 	.rx_queue_start			= sfc_rx_queue_start,
2358 	.rx_queue_stop			= sfc_rx_queue_stop,
2359 	.tx_queue_start			= sfc_tx_queue_start,
2360 	.tx_queue_stop			= sfc_tx_queue_stop,
2361 	.rx_queue_setup			= sfc_rx_queue_setup,
2362 	.rx_queue_release		= sfc_rx_queue_release,
2363 	.rx_queue_intr_enable		= sfc_rx_queue_intr_enable,
2364 	.rx_queue_intr_disable		= sfc_rx_queue_intr_disable,
2365 	.tx_queue_setup			= sfc_tx_queue_setup,
2366 	.tx_queue_release		= sfc_tx_queue_release,
2367 	.flow_ctrl_get			= sfc_flow_ctrl_get,
2368 	.flow_ctrl_set			= sfc_flow_ctrl_set,
2369 	.mac_addr_set			= sfc_mac_addr_set,
2370 	.udp_tunnel_port_add		= sfc_dev_udp_tunnel_port_add,
2371 	.udp_tunnel_port_del		= sfc_dev_udp_tunnel_port_del,
2372 	.reta_update			= sfc_dev_rss_reta_update,
2373 	.reta_query			= sfc_dev_rss_reta_query,
2374 	.rss_hash_update		= sfc_dev_rss_hash_update,
2375 	.rss_hash_conf_get		= sfc_dev_rss_hash_conf_get,
2376 	.flow_ops_get			= sfc_dev_flow_ops_get,
2377 	.set_mc_addr_list		= sfc_set_mc_addr_list,
2378 	.rxq_info_get			= sfc_rx_queue_info_get,
2379 	.txq_info_get			= sfc_tx_queue_info_get,
2380 	.fw_version_get			= sfc_fw_version_get,
2381 	.xstats_get_by_id		= sfc_xstats_get_by_id,
2382 	.xstats_get_names_by_id		= sfc_xstats_get_names_by_id,
2383 	.pool_ops_supported		= sfc_pool_ops_supported,
2384 	.representor_info_get		= sfc_representor_info_get,
2385 	.rx_metadata_negotiate		= sfc_rx_metadata_negotiate,
2386 };
2387 
2388 struct sfc_ethdev_init_data {
2389 	uint16_t		nb_representors;
2390 };
2391 
2392 /**
2393  * Duplicate a string in potentially shared memory required for
2394  * multi-process support.
2395  *
2396  * strdup() allocates from process-local heap/memory.
2397  */
2398 static char *
2399 sfc_strdup(const char *str)
2400 {
2401 	size_t size;
2402 	char *copy;
2403 
2404 	if (str == NULL)
2405 		return NULL;
2406 
2407 	size = strlen(str) + 1;
2408 	copy = rte_malloc(__func__, size, 0);
2409 	if (copy != NULL)
2410 		rte_memcpy(copy, str, size);
2411 
2412 	return copy;
2413 }
2414 
2415 static int
2416 sfc_eth_dev_set_ops(struct rte_eth_dev *dev)
2417 {
2418 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2419 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2420 	const struct sfc_dp_rx *dp_rx;
2421 	const struct sfc_dp_tx *dp_tx;
2422 	const efx_nic_cfg_t *encp;
2423 	unsigned int avail_caps = 0;
2424 	const char *rx_name = NULL;
2425 	const char *tx_name = NULL;
2426 	int rc;
2427 
2428 	switch (sa->family) {
2429 	case EFX_FAMILY_HUNTINGTON:
2430 	case EFX_FAMILY_MEDFORD:
2431 	case EFX_FAMILY_MEDFORD2:
2432 		avail_caps |= SFC_DP_HW_FW_CAP_EF10;
2433 		avail_caps |= SFC_DP_HW_FW_CAP_RX_EFX;
2434 		avail_caps |= SFC_DP_HW_FW_CAP_TX_EFX;
2435 		break;
2436 	case EFX_FAMILY_RIVERHEAD:
2437 		avail_caps |= SFC_DP_HW_FW_CAP_EF100;
2438 		break;
2439 	default:
2440 		break;
2441 	}
2442 
2443 	encp = efx_nic_cfg_get(sa->nic);
2444 	if (encp->enc_rx_es_super_buffer_supported)
2445 		avail_caps |= SFC_DP_HW_FW_CAP_RX_ES_SUPER_BUFFER;
2446 
2447 	rc = sfc_kvargs_process(sa, SFC_KVARG_RX_DATAPATH,
2448 				sfc_kvarg_string_handler, &rx_name);
2449 	if (rc != 0)
2450 		goto fail_kvarg_rx_datapath;
2451 
2452 	if (rx_name != NULL) {
2453 		dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, rx_name);
2454 		if (dp_rx == NULL) {
2455 			sfc_err(sa, "Rx datapath %s not found", rx_name);
2456 			rc = ENOENT;
2457 			goto fail_dp_rx;
2458 		}
2459 		if (!sfc_dp_match_hw_fw_caps(&dp_rx->dp, avail_caps)) {
2460 			sfc_err(sa,
2461 				"Insufficient Hw/FW capabilities to use Rx datapath %s",
2462 				rx_name);
2463 			rc = EINVAL;
2464 			goto fail_dp_rx_caps;
2465 		}
2466 	} else {
2467 		dp_rx = sfc_dp_find_rx_by_caps(&sfc_dp_head, avail_caps);
2468 		if (dp_rx == NULL) {
2469 			sfc_err(sa, "Rx datapath by caps %#x not found",
2470 				avail_caps);
2471 			rc = ENOENT;
2472 			goto fail_dp_rx;
2473 		}
2474 	}
2475 
2476 	sas->dp_rx_name = sfc_strdup(dp_rx->dp.name);
2477 	if (sas->dp_rx_name == NULL) {
2478 		rc = ENOMEM;
2479 		goto fail_dp_rx_name;
2480 	}
2481 
2482 	if (strcmp(dp_rx->dp.name, SFC_KVARG_DATAPATH_EF10_ESSB) == 0) {
2483 		/* FLAG and MARK are always available from Rx prefix. */
2484 		sa->negotiated_rx_metadata |= RTE_ETH_RX_METADATA_USER_FLAG;
2485 		sa->negotiated_rx_metadata |= RTE_ETH_RX_METADATA_USER_MARK;
2486 	}
2487 
2488 	sfc_notice(sa, "use %s Rx datapath", sas->dp_rx_name);
2489 
2490 	rc = sfc_kvargs_process(sa, SFC_KVARG_TX_DATAPATH,
2491 				sfc_kvarg_string_handler, &tx_name);
2492 	if (rc != 0)
2493 		goto fail_kvarg_tx_datapath;
2494 
2495 	if (tx_name != NULL) {
2496 		dp_tx = sfc_dp_find_tx_by_name(&sfc_dp_head, tx_name);
2497 		if (dp_tx == NULL) {
2498 			sfc_err(sa, "Tx datapath %s not found", tx_name);
2499 			rc = ENOENT;
2500 			goto fail_dp_tx;
2501 		}
2502 		if (!sfc_dp_match_hw_fw_caps(&dp_tx->dp, avail_caps)) {
2503 			sfc_err(sa,
2504 				"Insufficient Hw/FW capabilities to use Tx datapath %s",
2505 				tx_name);
2506 			rc = EINVAL;
2507 			goto fail_dp_tx_caps;
2508 		}
2509 	} else {
2510 		dp_tx = sfc_dp_find_tx_by_caps(&sfc_dp_head, avail_caps);
2511 		if (dp_tx == NULL) {
2512 			sfc_err(sa, "Tx datapath by caps %#x not found",
2513 				avail_caps);
2514 			rc = ENOENT;
2515 			goto fail_dp_tx;
2516 		}
2517 	}
2518 
2519 	sas->dp_tx_name = sfc_strdup(dp_tx->dp.name);
2520 	if (sas->dp_tx_name == NULL) {
2521 		rc = ENOMEM;
2522 		goto fail_dp_tx_name;
2523 	}
2524 
2525 	sfc_notice(sa, "use %s Tx datapath", sas->dp_tx_name);
2526 
2527 	sa->priv.dp_rx = dp_rx;
2528 	sa->priv.dp_tx = dp_tx;
2529 
2530 	dev->rx_pkt_burst = dp_rx->pkt_burst;
2531 	dev->tx_pkt_prepare = dp_tx->pkt_prepare;
2532 	dev->tx_pkt_burst = dp_tx->pkt_burst;
2533 
2534 	dev->rx_queue_count = sfc_rx_queue_count;
2535 	dev->rx_descriptor_status = sfc_rx_descriptor_status;
2536 	dev->tx_descriptor_status = sfc_tx_descriptor_status;
2537 	dev->dev_ops = &sfc_eth_dev_ops;
2538 
2539 	return 0;
2540 
2541 fail_dp_tx_name:
2542 fail_dp_tx_caps:
2543 fail_dp_tx:
2544 fail_kvarg_tx_datapath:
2545 	rte_free(sas->dp_rx_name);
2546 	sas->dp_rx_name = NULL;
2547 
2548 fail_dp_rx_name:
2549 fail_dp_rx_caps:
2550 fail_dp_rx:
2551 fail_kvarg_rx_datapath:
2552 	return rc;
2553 }
2554 
2555 static void
2556 sfc_eth_dev_clear_ops(struct rte_eth_dev *dev)
2557 {
2558 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2559 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2560 
2561 	dev->dev_ops = NULL;
2562 	dev->tx_pkt_prepare = NULL;
2563 	dev->rx_pkt_burst = NULL;
2564 	dev->tx_pkt_burst = NULL;
2565 
2566 	rte_free(sas->dp_tx_name);
2567 	sas->dp_tx_name = NULL;
2568 	sa->priv.dp_tx = NULL;
2569 
2570 	rte_free(sas->dp_rx_name);
2571 	sas->dp_rx_name = NULL;
2572 	sa->priv.dp_rx = NULL;
2573 }
2574 
2575 static const struct eth_dev_ops sfc_eth_dev_secondary_ops = {
2576 	.dev_supported_ptypes_get	= sfc_dev_supported_ptypes_get,
2577 	.reta_query			= sfc_dev_rss_reta_query,
2578 	.rss_hash_conf_get		= sfc_dev_rss_hash_conf_get,
2579 	.rxq_info_get			= sfc_rx_queue_info_get,
2580 	.txq_info_get			= sfc_tx_queue_info_get,
2581 };
2582 
2583 static int
2584 sfc_eth_dev_secondary_init(struct rte_eth_dev *dev, uint32_t logtype_main)
2585 {
2586 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2587 	struct sfc_adapter_priv *sap;
2588 	const struct sfc_dp_rx *dp_rx;
2589 	const struct sfc_dp_tx *dp_tx;
2590 	int rc;
2591 
2592 	/*
2593 	 * Allocate process private data from heap, since it should not
2594 	 * be located in shared memory allocated using rte_malloc() API.
2595 	 */
2596 	sap = calloc(1, sizeof(*sap));
2597 	if (sap == NULL) {
2598 		rc = ENOMEM;
2599 		goto fail_alloc_priv;
2600 	}
2601 
2602 	sap->logtype_main = logtype_main;
2603 
2604 	dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, sas->dp_rx_name);
2605 	if (dp_rx == NULL) {
2606 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2607 			"cannot find %s Rx datapath", sas->dp_rx_name);
2608 		rc = ENOENT;
2609 		goto fail_dp_rx;
2610 	}
2611 	if (~dp_rx->features & SFC_DP_RX_FEAT_MULTI_PROCESS) {
2612 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2613 			"%s Rx datapath does not support multi-process",
2614 			sas->dp_rx_name);
2615 		rc = EINVAL;
2616 		goto fail_dp_rx_multi_process;
2617 	}
2618 
2619 	dp_tx = sfc_dp_find_tx_by_name(&sfc_dp_head, sas->dp_tx_name);
2620 	if (dp_tx == NULL) {
2621 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2622 			"cannot find %s Tx datapath", sas->dp_tx_name);
2623 		rc = ENOENT;
2624 		goto fail_dp_tx;
2625 	}
2626 	if (~dp_tx->features & SFC_DP_TX_FEAT_MULTI_PROCESS) {
2627 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2628 			"%s Tx datapath does not support multi-process",
2629 			sas->dp_tx_name);
2630 		rc = EINVAL;
2631 		goto fail_dp_tx_multi_process;
2632 	}
2633 
2634 	sap->dp_rx = dp_rx;
2635 	sap->dp_tx = dp_tx;
2636 
2637 	dev->process_private = sap;
2638 	dev->rx_pkt_burst = dp_rx->pkt_burst;
2639 	dev->tx_pkt_prepare = dp_tx->pkt_prepare;
2640 	dev->tx_pkt_burst = dp_tx->pkt_burst;
2641 	dev->rx_queue_count = sfc_rx_queue_count;
2642 	dev->rx_descriptor_status = sfc_rx_descriptor_status;
2643 	dev->tx_descriptor_status = sfc_tx_descriptor_status;
2644 	dev->dev_ops = &sfc_eth_dev_secondary_ops;
2645 
2646 	return 0;
2647 
2648 fail_dp_tx_multi_process:
2649 fail_dp_tx:
2650 fail_dp_rx_multi_process:
2651 fail_dp_rx:
2652 	free(sap);
2653 
2654 fail_alloc_priv:
2655 	return rc;
2656 }
2657 
2658 static void
2659 sfc_register_dp(void)
2660 {
2661 	/* Register once */
2662 	if (TAILQ_EMPTY(&sfc_dp_head)) {
2663 		/* Prefer EF10 datapath */
2664 		sfc_dp_register(&sfc_dp_head, &sfc_ef100_rx.dp);
2665 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_essb_rx.dp);
2666 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_rx.dp);
2667 		sfc_dp_register(&sfc_dp_head, &sfc_efx_rx.dp);
2668 
2669 		sfc_dp_register(&sfc_dp_head, &sfc_ef100_tx.dp);
2670 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_tx.dp);
2671 		sfc_dp_register(&sfc_dp_head, &sfc_efx_tx.dp);
2672 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_simple_tx.dp);
2673 	}
2674 }
2675 
2676 static int
2677 sfc_parse_switch_mode(struct sfc_adapter *sa, bool has_representors)
2678 {
2679 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
2680 	const char *switch_mode = NULL;
2681 	int rc;
2682 
2683 	sfc_log_init(sa, "entry");
2684 
2685 	rc = sfc_kvargs_process(sa, SFC_KVARG_SWITCH_MODE,
2686 				sfc_kvarg_string_handler, &switch_mode);
2687 	if (rc != 0)
2688 		goto fail_kvargs;
2689 
2690 	if (switch_mode == NULL) {
2691 		sa->switchdev = encp->enc_mae_admin &&
2692 				(!encp->enc_datapath_cap_evb ||
2693 				 has_representors);
2694 	} else if (strcasecmp(switch_mode, SFC_KVARG_SWITCH_MODE_LEGACY) == 0) {
2695 		sa->switchdev = false;
2696 	} else if (strcasecmp(switch_mode,
2697 			      SFC_KVARG_SWITCH_MODE_SWITCHDEV) == 0) {
2698 		sa->switchdev = true;
2699 	} else {
2700 		sfc_err(sa, "invalid switch mode device argument '%s'",
2701 			switch_mode);
2702 		rc = EINVAL;
2703 		goto fail_mode;
2704 	}
2705 
2706 	sfc_log_init(sa, "done");
2707 
2708 	return 0;
2709 
2710 fail_mode:
2711 fail_kvargs:
2712 	sfc_log_init(sa, "failed: %s", rte_strerror(rc));
2713 
2714 	return rc;
2715 }
2716 
2717 static int
2718 sfc_eth_dev_init(struct rte_eth_dev *dev, void *init_params)
2719 {
2720 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2721 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2722 	struct sfc_ethdev_init_data *init_data = init_params;
2723 	uint32_t logtype_main;
2724 	struct sfc_adapter *sa;
2725 	int rc;
2726 	const efx_nic_cfg_t *encp;
2727 	const struct rte_ether_addr *from;
2728 	int ret;
2729 
2730 	if (sfc_efx_dev_class_get(pci_dev->device.devargs) !=
2731 			SFC_EFX_DEV_CLASS_NET) {
2732 		SFC_GENERIC_LOG(DEBUG,
2733 			"Incompatible device class: skip probing, should be probed by other sfc driver.");
2734 		return 1;
2735 	}
2736 
2737 	rc = sfc_dp_mport_register();
2738 	if (rc != 0)
2739 		return rc;
2740 
2741 	sfc_register_dp();
2742 
2743 	logtype_main = sfc_register_logtype(&pci_dev->addr,
2744 					    SFC_LOGTYPE_MAIN_STR,
2745 					    RTE_LOG_NOTICE);
2746 
2747 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2748 		return -sfc_eth_dev_secondary_init(dev, logtype_main);
2749 
2750 	/* Required for logging */
2751 	ret = snprintf(sas->log_prefix, sizeof(sas->log_prefix),
2752 			"PMD: sfc_efx " PCI_PRI_FMT " #%" PRIu16 ": ",
2753 			pci_dev->addr.domain, pci_dev->addr.bus,
2754 			pci_dev->addr.devid, pci_dev->addr.function,
2755 			dev->data->port_id);
2756 	if (ret < 0 || ret >= (int)sizeof(sas->log_prefix)) {
2757 		SFC_GENERIC_LOG(ERR,
2758 			"reserved log prefix is too short for " PCI_PRI_FMT,
2759 			pci_dev->addr.domain, pci_dev->addr.bus,
2760 			pci_dev->addr.devid, pci_dev->addr.function);
2761 		return -EINVAL;
2762 	}
2763 	sas->pci_addr = pci_dev->addr;
2764 	sas->port_id = dev->data->port_id;
2765 
2766 	/*
2767 	 * Allocate process private data from heap, since it should not
2768 	 * be located in shared memory allocated using rte_malloc() API.
2769 	 */
2770 	sa = calloc(1, sizeof(*sa));
2771 	if (sa == NULL) {
2772 		rc = ENOMEM;
2773 		goto fail_alloc_sa;
2774 	}
2775 
2776 	dev->process_private = sa;
2777 
2778 	/* Required for logging */
2779 	sa->priv.shared = sas;
2780 	sa->priv.logtype_main = logtype_main;
2781 
2782 	sa->eth_dev = dev;
2783 
2784 	/* Copy PCI device info to the dev->data */
2785 	rte_eth_copy_pci_info(dev, pci_dev);
2786 	dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE;
2787 
2788 	rc = sfc_kvargs_parse(sa);
2789 	if (rc != 0)
2790 		goto fail_kvargs_parse;
2791 
2792 	sfc_log_init(sa, "entry");
2793 
2794 	dev->data->mac_addrs = rte_zmalloc("sfc", RTE_ETHER_ADDR_LEN, 0);
2795 	if (dev->data->mac_addrs == NULL) {
2796 		rc = ENOMEM;
2797 		goto fail_mac_addrs;
2798 	}
2799 
2800 	sfc_adapter_lock_init(sa);
2801 	sfc_adapter_lock(sa);
2802 
2803 	sfc_log_init(sa, "probing");
2804 	rc = sfc_probe(sa);
2805 	if (rc != 0)
2806 		goto fail_probe;
2807 
2808 	/*
2809 	 * Selecting a default switch mode requires the NIC to be probed and
2810 	 * to have its capabilities filled in.
2811 	 */
2812 	rc = sfc_parse_switch_mode(sa, init_data->nb_representors > 0);
2813 	if (rc != 0)
2814 		goto fail_switch_mode;
2815 
2816 	sfc_log_init(sa, "set device ops");
2817 	rc = sfc_eth_dev_set_ops(dev);
2818 	if (rc != 0)
2819 		goto fail_set_ops;
2820 
2821 	sfc_log_init(sa, "attaching");
2822 	rc = sfc_attach(sa);
2823 	if (rc != 0)
2824 		goto fail_attach;
2825 
2826 	if (sa->switchdev && sa->mae.status != SFC_MAE_STATUS_ADMIN) {
2827 		sfc_err(sa,
2828 			"failed to enable switchdev mode without admin MAE privilege");
2829 		rc = ENOTSUP;
2830 		goto fail_switchdev_no_mae;
2831 	}
2832 
2833 	encp = efx_nic_cfg_get(sa->nic);
2834 
2835 	/*
2836 	 * The arguments are really reverse order in comparison to
2837 	 * Linux kernel. Copy from NIC config to Ethernet device data.
2838 	 */
2839 	from = (const struct rte_ether_addr *)(encp->enc_mac_addr);
2840 	rte_ether_addr_copy(from, &dev->data->mac_addrs[0]);
2841 
2842 	sfc_adapter_unlock(sa);
2843 
2844 	sfc_log_init(sa, "done");
2845 	return 0;
2846 
2847 fail_switchdev_no_mae:
2848 	sfc_detach(sa);
2849 
2850 fail_attach:
2851 	sfc_eth_dev_clear_ops(dev);
2852 
2853 fail_set_ops:
2854 fail_switch_mode:
2855 	sfc_unprobe(sa);
2856 
2857 fail_probe:
2858 	sfc_adapter_unlock(sa);
2859 	sfc_adapter_lock_fini(sa);
2860 	rte_free(dev->data->mac_addrs);
2861 	dev->data->mac_addrs = NULL;
2862 
2863 fail_mac_addrs:
2864 	sfc_kvargs_cleanup(sa);
2865 
2866 fail_kvargs_parse:
2867 	sfc_log_init(sa, "failed %d", rc);
2868 	dev->process_private = NULL;
2869 	free(sa);
2870 
2871 fail_alloc_sa:
2872 	SFC_ASSERT(rc > 0);
2873 	return -rc;
2874 }
2875 
2876 static int
2877 sfc_eth_dev_uninit(struct rte_eth_dev *dev)
2878 {
2879 	sfc_dev_close(dev);
2880 
2881 	return 0;
2882 }
2883 
2884 static const struct rte_pci_id pci_id_sfc_efx_map[] = {
2885 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_FARMINGDALE) },
2886 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_FARMINGDALE_VF) },
2887 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_GREENPORT) },
2888 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_GREENPORT_VF) },
2889 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD) },
2890 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD_VF) },
2891 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD2) },
2892 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD2_VF) },
2893 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_XILINX, EFX_PCI_DEVID_RIVERHEAD) },
2894 	{ .vendor_id = 0 /* sentinel */ }
2895 };
2896 
2897 static int
2898 sfc_parse_rte_devargs(const char *args, struct rte_eth_devargs *devargs)
2899 {
2900 	struct rte_eth_devargs eth_da = { .nb_representor_ports = 0 };
2901 	int rc;
2902 
2903 	if (args != NULL) {
2904 		rc = rte_eth_devargs_parse(args, &eth_da);
2905 		if (rc != 0) {
2906 			SFC_GENERIC_LOG(ERR,
2907 					"Failed to parse generic devargs '%s'",
2908 					args);
2909 			return rc;
2910 		}
2911 	}
2912 
2913 	*devargs = eth_da;
2914 
2915 	return 0;
2916 }
2917 
2918 static int
2919 sfc_eth_dev_find_or_create(struct rte_pci_device *pci_dev,
2920 			   struct sfc_ethdev_init_data *init_data,
2921 			   struct rte_eth_dev **devp,
2922 			   bool *dev_created)
2923 {
2924 	struct rte_eth_dev *dev;
2925 	bool created = false;
2926 	int rc;
2927 
2928 	dev = rte_eth_dev_allocated(pci_dev->device.name);
2929 	if (dev == NULL) {
2930 		rc = rte_eth_dev_create(&pci_dev->device, pci_dev->device.name,
2931 					sizeof(struct sfc_adapter_shared),
2932 					eth_dev_pci_specific_init, pci_dev,
2933 					sfc_eth_dev_init, init_data);
2934 		if (rc != 0) {
2935 			SFC_GENERIC_LOG(ERR, "Failed to create sfc ethdev '%s'",
2936 					pci_dev->device.name);
2937 			return rc;
2938 		}
2939 
2940 		created = true;
2941 
2942 		dev = rte_eth_dev_allocated(pci_dev->device.name);
2943 		if (dev == NULL) {
2944 			SFC_GENERIC_LOG(ERR,
2945 				"Failed to find allocated sfc ethdev '%s'",
2946 				pci_dev->device.name);
2947 			return -ENODEV;
2948 		}
2949 	}
2950 
2951 	*devp = dev;
2952 	*dev_created = created;
2953 
2954 	return 0;
2955 }
2956 
2957 static int
2958 sfc_eth_dev_create_repr(struct sfc_adapter *sa,
2959 			efx_pcie_interface_t controller,
2960 			uint16_t port,
2961 			uint16_t repr_port,
2962 			enum rte_eth_representor_type type)
2963 {
2964 	struct sfc_repr_entity_info entity;
2965 	efx_mport_sel_t mport_sel;
2966 	int rc;
2967 
2968 	switch (type) {
2969 	case RTE_ETH_REPRESENTOR_NONE:
2970 		return 0;
2971 	case RTE_ETH_REPRESENTOR_VF:
2972 	case RTE_ETH_REPRESENTOR_PF:
2973 		break;
2974 	case RTE_ETH_REPRESENTOR_SF:
2975 		sfc_err(sa, "SF representors are not supported");
2976 		return ENOTSUP;
2977 	default:
2978 		sfc_err(sa, "unknown representor type: %d", type);
2979 		return ENOTSUP;
2980 	}
2981 
2982 	rc = efx_mae_mport_by_pcie_mh_function(controller,
2983 					       port,
2984 					       repr_port,
2985 					       &mport_sel);
2986 	if (rc != 0) {
2987 		sfc_err(sa,
2988 			"failed to get m-port selector for controller %u port %u repr_port %u: %s",
2989 			controller, port, repr_port, rte_strerror(-rc));
2990 		return rc;
2991 	}
2992 
2993 	memset(&entity, 0, sizeof(entity));
2994 	entity.type = type;
2995 	entity.intf = controller;
2996 	entity.pf = port;
2997 	entity.vf = repr_port;
2998 
2999 	rc = sfc_repr_create(sa->eth_dev, &entity, sa->mae.switch_domain_id,
3000 			     &mport_sel);
3001 	if (rc != 0) {
3002 		sfc_err(sa,
3003 			"failed to create representor for controller %u port %u repr_port %u: %s",
3004 			controller, port, repr_port, rte_strerror(-rc));
3005 		return rc;
3006 	}
3007 
3008 	return 0;
3009 }
3010 
3011 static int
3012 sfc_eth_dev_create_repr_port(struct sfc_adapter *sa,
3013 			     const struct rte_eth_devargs *eth_da,
3014 			     efx_pcie_interface_t controller,
3015 			     uint16_t port)
3016 {
3017 	int first_error = 0;
3018 	uint16_t i;
3019 	int rc;
3020 
3021 	if (eth_da->type == RTE_ETH_REPRESENTOR_PF) {
3022 		return sfc_eth_dev_create_repr(sa, controller, port,
3023 					       EFX_PCI_VF_INVALID,
3024 					       eth_da->type);
3025 	}
3026 
3027 	for (i = 0; i < eth_da->nb_representor_ports; i++) {
3028 		rc = sfc_eth_dev_create_repr(sa, controller, port,
3029 					     eth_da->representor_ports[i],
3030 					     eth_da->type);
3031 		if (rc != 0 && first_error == 0)
3032 			first_error = rc;
3033 	}
3034 
3035 	return first_error;
3036 }
3037 
3038 static int
3039 sfc_eth_dev_create_repr_controller(struct sfc_adapter *sa,
3040 				   const struct rte_eth_devargs *eth_da,
3041 				   efx_pcie_interface_t controller)
3042 {
3043 	const efx_nic_cfg_t *encp;
3044 	int first_error = 0;
3045 	uint16_t default_port;
3046 	uint16_t i;
3047 	int rc;
3048 
3049 	if (eth_da->nb_ports == 0) {
3050 		encp = efx_nic_cfg_get(sa->nic);
3051 		default_port = encp->enc_intf == controller ? encp->enc_pf : 0;
3052 		return sfc_eth_dev_create_repr_port(sa, eth_da, controller,
3053 						    default_port);
3054 	}
3055 
3056 	for (i = 0; i < eth_da->nb_ports; i++) {
3057 		rc = sfc_eth_dev_create_repr_port(sa, eth_da, controller,
3058 						  eth_da->ports[i]);
3059 		if (rc != 0 && first_error == 0)
3060 			first_error = rc;
3061 	}
3062 
3063 	return first_error;
3064 }
3065 
3066 static int
3067 sfc_eth_dev_create_representors(struct rte_eth_dev *dev,
3068 				const struct rte_eth_devargs *eth_da)
3069 {
3070 	efx_pcie_interface_t intf;
3071 	const efx_nic_cfg_t *encp;
3072 	struct sfc_adapter *sa;
3073 	uint16_t switch_domain_id;
3074 	uint16_t i;
3075 	int rc;
3076 
3077 	sa = sfc_adapter_by_eth_dev(dev);
3078 	switch_domain_id = sa->mae.switch_domain_id;
3079 
3080 	switch (eth_da->type) {
3081 	case RTE_ETH_REPRESENTOR_NONE:
3082 		return 0;
3083 	case RTE_ETH_REPRESENTOR_PF:
3084 	case RTE_ETH_REPRESENTOR_VF:
3085 		break;
3086 	case RTE_ETH_REPRESENTOR_SF:
3087 		sfc_err(sa, "SF representors are not supported");
3088 		return -ENOTSUP;
3089 	default:
3090 		sfc_err(sa, "unknown representor type: %d",
3091 			eth_da->type);
3092 		return -ENOTSUP;
3093 	}
3094 
3095 	if (!sa->switchdev) {
3096 		sfc_err(sa, "cannot create representors in non-switchdev mode");
3097 		return -EINVAL;
3098 	}
3099 
3100 	if (!sfc_repr_available(sfc_sa2shared(sa))) {
3101 		sfc_err(sa, "cannot create representors: unsupported");
3102 
3103 		return -ENOTSUP;
3104 	}
3105 
3106 	/*
3107 	 * This is needed to construct the DPDK controller -> EFX interface
3108 	 * mapping.
3109 	 */
3110 	sfc_adapter_lock(sa);
3111 	rc = sfc_process_mport_journal(sa);
3112 	sfc_adapter_unlock(sa);
3113 	if (rc != 0) {
3114 		SFC_ASSERT(rc > 0);
3115 		return -rc;
3116 	}
3117 
3118 	if (eth_da->nb_mh_controllers > 0) {
3119 		for (i = 0; i < eth_da->nb_mh_controllers; i++) {
3120 			rc = sfc_mae_switch_domain_get_intf(switch_domain_id,
3121 						eth_da->mh_controllers[i],
3122 						&intf);
3123 			if (rc != 0) {
3124 				sfc_err(sa, "failed to get representor");
3125 				continue;
3126 			}
3127 			sfc_eth_dev_create_repr_controller(sa, eth_da, intf);
3128 		}
3129 	} else {
3130 		encp = efx_nic_cfg_get(sa->nic);
3131 		sfc_eth_dev_create_repr_controller(sa, eth_da, encp->enc_intf);
3132 	}
3133 
3134 	return 0;
3135 }
3136 
3137 static int sfc_eth_dev_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
3138 	struct rte_pci_device *pci_dev)
3139 {
3140 	struct sfc_ethdev_init_data init_data;
3141 	struct rte_eth_devargs eth_da;
3142 	struct rte_eth_dev *dev;
3143 	bool dev_created;
3144 	int rc;
3145 
3146 	if (pci_dev->device.devargs != NULL) {
3147 		rc = sfc_parse_rte_devargs(pci_dev->device.devargs->args,
3148 					   &eth_da);
3149 		if (rc != 0)
3150 			return rc;
3151 	} else {
3152 		memset(&eth_da, 0, sizeof(eth_da));
3153 	}
3154 
3155 	/* If no VF representors specified, check for PF ones */
3156 	if (eth_da.nb_representor_ports > 0)
3157 		init_data.nb_representors = eth_da.nb_representor_ports;
3158 	else
3159 		init_data.nb_representors = eth_da.nb_ports;
3160 
3161 	if (init_data.nb_representors > 0 &&
3162 	    rte_eal_process_type() != RTE_PROC_PRIMARY) {
3163 		SFC_GENERIC_LOG(ERR,
3164 			"Create representors from secondary process not supported, dev '%s'",
3165 			pci_dev->device.name);
3166 		return -ENOTSUP;
3167 	}
3168 
3169 	/*
3170 	 * Driver supports RTE_PCI_DRV_PROBE_AGAIN. Hence create device only
3171 	 * if it does not already exist. Re-probing an existing device is
3172 	 * expected to allow additional representors to be configured.
3173 	 */
3174 	rc = sfc_eth_dev_find_or_create(pci_dev, &init_data, &dev,
3175 					&dev_created);
3176 	if (rc != 0)
3177 		return rc;
3178 
3179 	rc = sfc_eth_dev_create_representors(dev, &eth_da);
3180 	if (rc != 0) {
3181 		if (dev_created)
3182 			(void)rte_eth_dev_destroy(dev, sfc_eth_dev_uninit);
3183 
3184 		return rc;
3185 	}
3186 
3187 	return 0;
3188 }
3189 
3190 static int sfc_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
3191 {
3192 	return rte_eth_dev_pci_generic_remove(pci_dev, sfc_eth_dev_uninit);
3193 }
3194 
3195 static struct rte_pci_driver sfc_efx_pmd = {
3196 	.id_table = pci_id_sfc_efx_map,
3197 	.drv_flags =
3198 		RTE_PCI_DRV_INTR_LSC |
3199 		RTE_PCI_DRV_NEED_MAPPING |
3200 		RTE_PCI_DRV_PROBE_AGAIN,
3201 	.probe = sfc_eth_dev_pci_probe,
3202 	.remove = sfc_eth_dev_pci_remove,
3203 };
3204 
3205 RTE_PMD_REGISTER_PCI(net_sfc_efx, sfc_efx_pmd);
3206 RTE_PMD_REGISTER_PCI_TABLE(net_sfc_efx, pci_id_sfc_efx_map);
3207 RTE_PMD_REGISTER_KMOD_DEP(net_sfc_efx, "* igb_uio | uio_pci_generic | vfio-pci");
3208 RTE_PMD_REGISTER_PARAM_STRING(net_sfc_efx,
3209 	SFC_KVARG_SWITCH_MODE "=" SFC_KVARG_VALUES_SWITCH_MODE " "
3210 	SFC_KVARG_RX_DATAPATH "=" SFC_KVARG_VALUES_RX_DATAPATH " "
3211 	SFC_KVARG_TX_DATAPATH "=" SFC_KVARG_VALUES_TX_DATAPATH " "
3212 	SFC_KVARG_PERF_PROFILE "=" SFC_KVARG_VALUES_PERF_PROFILE " "
3213 	SFC_KVARG_FW_VARIANT "=" SFC_KVARG_VALUES_FW_VARIANT " "
3214 	SFC_KVARG_RXD_WAIT_TIMEOUT_NS "=<long> "
3215 	SFC_KVARG_STATS_UPDATE_PERIOD_MS "=<long>");
3216 
3217 RTE_INIT(sfc_driver_register_logtype)
3218 {
3219 	int ret;
3220 
3221 	ret = rte_log_register_type_and_pick_level(SFC_LOGTYPE_PREFIX "driver",
3222 						   RTE_LOG_NOTICE);
3223 	sfc_logtype_driver = (ret < 0) ? RTE_LOGTYPE_PMD : ret;
3224 }
3225