xref: /dpdk/drivers/net/sfc/sfc_ethdev.c (revision 8b8036a66e3d59ffa58afb8d96fa2c73262155a7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2016-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9 
10 #include <rte_dev.h>
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_errno.h>
16 #include <rte_string_fns.h>
17 #include <rte_ether.h>
18 
19 #include "efx.h"
20 
21 #include "sfc.h"
22 #include "sfc_debug.h"
23 #include "sfc_log.h"
24 #include "sfc_kvargs.h"
25 #include "sfc_ev.h"
26 #include "sfc_rx.h"
27 #include "sfc_tx.h"
28 #include "sfc_flow.h"
29 #include "sfc_flow_tunnel.h"
30 #include "sfc_dp.h"
31 #include "sfc_dp_rx.h"
32 #include "sfc_repr.h"
33 #include "sfc_sw_stats.h"
34 #include "sfc_switch.h"
35 
36 #define SFC_XSTAT_ID_INVALID_VAL  UINT64_MAX
37 #define SFC_XSTAT_ID_INVALID_NAME '\0'
38 
39 uint32_t sfc_logtype_driver;
40 
41 static struct sfc_dp_list sfc_dp_head =
42 	TAILQ_HEAD_INITIALIZER(sfc_dp_head);
43 
44 
45 static void sfc_eth_dev_clear_ops(struct rte_eth_dev *dev);
46 
47 
48 static int
49 sfc_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
50 {
51 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
52 	efx_nic_fw_info_t enfi;
53 	int ret;
54 	int rc;
55 
56 	rc = efx_nic_get_fw_version(sa->nic, &enfi);
57 	if (rc != 0)
58 		return -rc;
59 
60 	ret = snprintf(fw_version, fw_size,
61 		       "%" PRIu16 ".%" PRIu16 ".%" PRIu16 ".%" PRIu16,
62 		       enfi.enfi_mc_fw_version[0], enfi.enfi_mc_fw_version[1],
63 		       enfi.enfi_mc_fw_version[2], enfi.enfi_mc_fw_version[3]);
64 	if (ret < 0)
65 		return ret;
66 
67 	if (enfi.enfi_dpcpu_fw_ids_valid) {
68 		size_t dpcpu_fw_ids_offset = MIN(fw_size - 1, (size_t)ret);
69 		int ret_extra;
70 
71 		ret_extra = snprintf(fw_version + dpcpu_fw_ids_offset,
72 				     fw_size - dpcpu_fw_ids_offset,
73 				     " rx%" PRIx16 " tx%" PRIx16,
74 				     enfi.enfi_rx_dpcpu_fw_id,
75 				     enfi.enfi_tx_dpcpu_fw_id);
76 		if (ret_extra < 0)
77 			return ret_extra;
78 
79 		ret += ret_extra;
80 	}
81 
82 	if (fw_size < (size_t)(++ret))
83 		return ret;
84 	else
85 		return 0;
86 }
87 
88 static int
89 sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
90 {
91 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
92 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
93 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
94 	struct sfc_rss *rss = &sas->rss;
95 	struct sfc_mae *mae = &sa->mae;
96 	uint64_t txq_offloads_def = 0;
97 
98 	sfc_log_init(sa, "entry");
99 
100 	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
101 	dev_info->max_mtu = EFX_MAC_SDU_MAX;
102 
103 	dev_info->max_rx_pktlen = EFX_MAC_PDU_MAX;
104 
105 	dev_info->max_vfs = sa->sriov.num_vfs;
106 
107 	/* Autonegotiation may be disabled */
108 	dev_info->speed_capa = RTE_ETH_LINK_SPEED_FIXED;
109 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_1000FDX))
110 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_1G;
111 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_10000FDX))
112 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_10G;
113 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_25000FDX))
114 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_25G;
115 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_40000FDX))
116 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_40G;
117 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_50000FDX))
118 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_50G;
119 	if (sa->port.phy_adv_cap_mask & (1u << EFX_PHY_CAP_100000FDX))
120 		dev_info->speed_capa |= RTE_ETH_LINK_SPEED_100G;
121 
122 	dev_info->max_rx_queues = sa->rxq_max;
123 	dev_info->max_tx_queues = sa->txq_max;
124 
125 	/* By default packets are dropped if no descriptors are available */
126 	dev_info->default_rxconf.rx_drop_en = 1;
127 
128 	dev_info->rx_queue_offload_capa = sfc_rx_get_queue_offload_caps(sa);
129 
130 	/*
131 	 * rx_offload_capa includes both device and queue offloads since
132 	 * the latter may be requested on a per device basis which makes
133 	 * sense when some offloads are needed to be set on all queues.
134 	 */
135 	dev_info->rx_offload_capa = sfc_rx_get_dev_offload_caps(sa) |
136 				    dev_info->rx_queue_offload_capa;
137 
138 	dev_info->tx_queue_offload_capa = sfc_tx_get_queue_offload_caps(sa);
139 
140 	/*
141 	 * tx_offload_capa includes both device and queue offloads since
142 	 * the latter may be requested on a per device basis which makes
143 	 * sense when some offloads are needed to be set on all queues.
144 	 */
145 	dev_info->tx_offload_capa = sfc_tx_get_dev_offload_caps(sa) |
146 				    dev_info->tx_queue_offload_capa;
147 
148 	if (dev_info->tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
149 		txq_offloads_def |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
150 
151 	dev_info->default_txconf.offloads |= txq_offloads_def;
152 
153 	if (rss->context_type != EFX_RX_SCALE_UNAVAILABLE) {
154 		uint64_t rte_hf = 0;
155 		unsigned int i;
156 
157 		for (i = 0; i < rss->hf_map_nb_entries; ++i)
158 			rte_hf |= rss->hf_map[i].rte;
159 
160 		dev_info->reta_size = EFX_RSS_TBL_SIZE;
161 		dev_info->hash_key_size = EFX_RSS_KEY_SIZE;
162 		dev_info->flow_type_rss_offloads = rte_hf;
163 	}
164 
165 	/* Initialize to hardware limits */
166 	dev_info->rx_desc_lim.nb_max = sa->rxq_max_entries;
167 	dev_info->rx_desc_lim.nb_min = sa->rxq_min_entries;
168 	/* The RXQ hardware requires that the descriptor count is a power
169 	 * of 2, but rx_desc_lim cannot properly describe that constraint.
170 	 */
171 	dev_info->rx_desc_lim.nb_align = sa->rxq_min_entries;
172 
173 	/* Initialize to hardware limits */
174 	dev_info->tx_desc_lim.nb_max = sa->txq_max_entries;
175 	dev_info->tx_desc_lim.nb_min = sa->txq_min_entries;
176 	/*
177 	 * The TXQ hardware requires that the descriptor count is a power
178 	 * of 2, but tx_desc_lim cannot properly describe that constraint
179 	 */
180 	dev_info->tx_desc_lim.nb_align = sa->txq_min_entries;
181 
182 	if (sap->dp_rx->get_dev_info != NULL)
183 		sap->dp_rx->get_dev_info(dev_info);
184 	if (sap->dp_tx->get_dev_info != NULL)
185 		sap->dp_tx->get_dev_info(dev_info);
186 
187 	dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
188 			     RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
189 	dev_info->dev_capa &= ~RTE_ETH_DEV_CAPA_FLOW_RULE_KEEP;
190 
191 	if (mae->status == SFC_MAE_STATUS_SUPPORTED ||
192 	    mae->status == SFC_MAE_STATUS_ADMIN) {
193 		dev_info->switch_info.name = dev->device->driver->name;
194 		dev_info->switch_info.domain_id = mae->switch_domain_id;
195 		dev_info->switch_info.port_id = mae->switch_port_id;
196 	}
197 
198 	return 0;
199 }
200 
201 static const uint32_t *
202 sfc_dev_supported_ptypes_get(struct rte_eth_dev *dev)
203 {
204 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
205 
206 	return sap->dp_rx->supported_ptypes_get(sap->shared->tunnel_encaps);
207 }
208 
209 static int
210 sfc_dev_configure(struct rte_eth_dev *dev)
211 {
212 	struct rte_eth_dev_data *dev_data = dev->data;
213 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
214 	int rc;
215 
216 	sfc_log_init(sa, "entry n_rxq=%u n_txq=%u",
217 		     dev_data->nb_rx_queues, dev_data->nb_tx_queues);
218 
219 	sfc_adapter_lock(sa);
220 	switch (sa->state) {
221 	case SFC_ETHDEV_CONFIGURED:
222 		/* FALLTHROUGH */
223 	case SFC_ETHDEV_INITIALIZED:
224 		rc = sfc_configure(sa);
225 		break;
226 	default:
227 		sfc_err(sa, "unexpected adapter state %u to configure",
228 			sa->state);
229 		rc = EINVAL;
230 		break;
231 	}
232 	sfc_adapter_unlock(sa);
233 
234 	sfc_log_init(sa, "done %d", rc);
235 	SFC_ASSERT(rc >= 0);
236 	return -rc;
237 }
238 
239 static int
240 sfc_dev_start(struct rte_eth_dev *dev)
241 {
242 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
243 	int rc;
244 
245 	sfc_log_init(sa, "entry");
246 
247 	sfc_adapter_lock(sa);
248 	rc = sfc_start(sa);
249 	sfc_adapter_unlock(sa);
250 
251 	sfc_log_init(sa, "done %d", rc);
252 	SFC_ASSERT(rc >= 0);
253 	return -rc;
254 }
255 
256 static int
257 sfc_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete)
258 {
259 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
260 	struct rte_eth_link current_link;
261 	int ret;
262 
263 	sfc_log_init(sa, "entry");
264 
265 	if (sa->state != SFC_ETHDEV_STARTED) {
266 		sfc_port_link_mode_to_info(EFX_LINK_UNKNOWN, &current_link);
267 	} else if (wait_to_complete) {
268 		efx_link_mode_t link_mode;
269 
270 		if (efx_port_poll(sa->nic, &link_mode) != 0)
271 			link_mode = EFX_LINK_UNKNOWN;
272 		sfc_port_link_mode_to_info(link_mode, &current_link);
273 
274 	} else {
275 		sfc_ev_mgmt_qpoll(sa);
276 		rte_eth_linkstatus_get(dev, &current_link);
277 	}
278 
279 	ret = rte_eth_linkstatus_set(dev, &current_link);
280 	if (ret == 0)
281 		sfc_notice(sa, "Link status is %s",
282 			   current_link.link_status ? "UP" : "DOWN");
283 
284 	return ret;
285 }
286 
287 static int
288 sfc_dev_stop(struct rte_eth_dev *dev)
289 {
290 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
291 
292 	sfc_log_init(sa, "entry");
293 
294 	sfc_adapter_lock(sa);
295 	sfc_stop(sa);
296 	sfc_adapter_unlock(sa);
297 
298 	sfc_log_init(sa, "done");
299 
300 	return 0;
301 }
302 
303 static int
304 sfc_dev_set_link_up(struct rte_eth_dev *dev)
305 {
306 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
307 	int rc;
308 
309 	sfc_log_init(sa, "entry");
310 
311 	sfc_adapter_lock(sa);
312 	rc = sfc_start(sa);
313 	sfc_adapter_unlock(sa);
314 
315 	SFC_ASSERT(rc >= 0);
316 	return -rc;
317 }
318 
319 static int
320 sfc_dev_set_link_down(struct rte_eth_dev *dev)
321 {
322 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
323 
324 	sfc_log_init(sa, "entry");
325 
326 	sfc_adapter_lock(sa);
327 	sfc_stop(sa);
328 	sfc_adapter_unlock(sa);
329 
330 	return 0;
331 }
332 
333 static void
334 sfc_eth_dev_secondary_clear_ops(struct rte_eth_dev *dev)
335 {
336 	free(dev->process_private);
337 	rte_eth_dev_release_port(dev);
338 }
339 
340 static int
341 sfc_dev_close(struct rte_eth_dev *dev)
342 {
343 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
344 
345 	sfc_log_init(sa, "entry");
346 
347 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
348 		sfc_eth_dev_secondary_clear_ops(dev);
349 		return 0;
350 	}
351 
352 	sfc_pre_detach(sa);
353 
354 	sfc_adapter_lock(sa);
355 	switch (sa->state) {
356 	case SFC_ETHDEV_STARTED:
357 		sfc_stop(sa);
358 		SFC_ASSERT(sa->state == SFC_ETHDEV_CONFIGURED);
359 		/* FALLTHROUGH */
360 	case SFC_ETHDEV_CONFIGURED:
361 		sfc_close(sa);
362 		SFC_ASSERT(sa->state == SFC_ETHDEV_INITIALIZED);
363 		/* FALLTHROUGH */
364 	case SFC_ETHDEV_INITIALIZED:
365 		break;
366 	default:
367 		sfc_err(sa, "unexpected adapter state %u on close", sa->state);
368 		break;
369 	}
370 
371 	/*
372 	 * Cleanup all resources.
373 	 * Rollback primary process sfc_eth_dev_init() below.
374 	 */
375 
376 	sfc_eth_dev_clear_ops(dev);
377 
378 	sfc_detach(sa);
379 	sfc_unprobe(sa);
380 
381 	sfc_kvargs_cleanup(sa);
382 
383 	sfc_adapter_unlock(sa);
384 	sfc_adapter_lock_fini(sa);
385 
386 	sfc_log_init(sa, "done");
387 
388 	/* Required for logging, so cleanup last */
389 	sa->eth_dev = NULL;
390 
391 	free(sa);
392 
393 	return 0;
394 }
395 
396 static int
397 sfc_dev_filter_set(struct rte_eth_dev *dev, enum sfc_dev_filter_mode mode,
398 		   boolean_t enabled)
399 {
400 	struct sfc_port *port;
401 	boolean_t *toggle;
402 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
403 	boolean_t allmulti = (mode == SFC_DEV_FILTER_MODE_ALLMULTI);
404 	const char *desc = (allmulti) ? "all-multi" : "promiscuous";
405 	int rc = 0;
406 
407 	sfc_adapter_lock(sa);
408 
409 	port = &sa->port;
410 	toggle = (allmulti) ? (&port->allmulti) : (&port->promisc);
411 
412 	if (*toggle != enabled) {
413 		*toggle = enabled;
414 
415 		if (sfc_sa2shared(sa)->isolated) {
416 			sfc_warn(sa, "isolated mode is active on the port");
417 			sfc_warn(sa, "the change is to be applied on the next "
418 				     "start provided that isolated mode is "
419 				     "disabled prior the next start");
420 		} else if ((sa->state == SFC_ETHDEV_STARTED) &&
421 			   ((rc = sfc_set_rx_mode(sa)) != 0)) {
422 			*toggle = !(enabled);
423 			sfc_warn(sa, "Failed to %s %s mode, rc = %d",
424 				 ((enabled) ? "enable" : "disable"), desc, rc);
425 
426 			/*
427 			 * For promiscuous and all-multicast filters a
428 			 * permission failure should be reported as an
429 			 * unsupported filter.
430 			 */
431 			if (rc == EPERM)
432 				rc = ENOTSUP;
433 		}
434 	}
435 
436 	sfc_adapter_unlock(sa);
437 	return rc;
438 }
439 
440 static int
441 sfc_dev_promisc_enable(struct rte_eth_dev *dev)
442 {
443 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_TRUE);
444 
445 	SFC_ASSERT(rc >= 0);
446 	return -rc;
447 }
448 
449 static int
450 sfc_dev_promisc_disable(struct rte_eth_dev *dev)
451 {
452 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_FALSE);
453 
454 	SFC_ASSERT(rc >= 0);
455 	return -rc;
456 }
457 
458 static int
459 sfc_dev_allmulti_enable(struct rte_eth_dev *dev)
460 {
461 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_TRUE);
462 
463 	SFC_ASSERT(rc >= 0);
464 	return -rc;
465 }
466 
467 static int
468 sfc_dev_allmulti_disable(struct rte_eth_dev *dev)
469 {
470 	int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_FALSE);
471 
472 	SFC_ASSERT(rc >= 0);
473 	return -rc;
474 }
475 
476 static int
477 sfc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t ethdev_qid,
478 		   uint16_t nb_rx_desc, unsigned int socket_id,
479 		   const struct rte_eth_rxconf *rx_conf,
480 		   struct rte_mempool *mb_pool)
481 {
482 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
483 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
484 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
485 	struct sfc_rxq_info *rxq_info;
486 	sfc_sw_index_t sw_index;
487 	int rc;
488 
489 	sfc_log_init(sa, "RxQ=%u nb_rx_desc=%u socket_id=%u",
490 		     ethdev_qid, nb_rx_desc, socket_id);
491 
492 	sfc_adapter_lock(sa);
493 
494 	sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
495 	rc = sfc_rx_qinit(sa, sw_index, nb_rx_desc, socket_id,
496 			  rx_conf, mb_pool);
497 	if (rc != 0)
498 		goto fail_rx_qinit;
499 
500 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
501 	dev->data->rx_queues[ethdev_qid] = rxq_info->dp;
502 
503 	sfc_adapter_unlock(sa);
504 
505 	return 0;
506 
507 fail_rx_qinit:
508 	sfc_adapter_unlock(sa);
509 	SFC_ASSERT(rc > 0);
510 	return -rc;
511 }
512 
513 static void
514 sfc_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
515 {
516 	struct sfc_dp_rxq *dp_rxq = dev->data->rx_queues[qid];
517 	struct sfc_rxq *rxq;
518 	struct sfc_adapter *sa;
519 	sfc_sw_index_t sw_index;
520 
521 	if (dp_rxq == NULL)
522 		return;
523 
524 	rxq = sfc_rxq_by_dp_rxq(dp_rxq);
525 	sa = rxq->evq->sa;
526 	sfc_adapter_lock(sa);
527 
528 	sw_index = dp_rxq->dpq.queue_id;
529 
530 	sfc_log_init(sa, "RxQ=%u", sw_index);
531 
532 	sfc_rx_qfini(sa, sw_index);
533 
534 	sfc_adapter_unlock(sa);
535 }
536 
537 static int
538 sfc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t ethdev_qid,
539 		   uint16_t nb_tx_desc, unsigned int socket_id,
540 		   const struct rte_eth_txconf *tx_conf)
541 {
542 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
543 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
544 	struct sfc_txq_info *txq_info;
545 	sfc_sw_index_t sw_index;
546 	int rc;
547 
548 	sfc_log_init(sa, "TxQ = %u, nb_tx_desc = %u, socket_id = %u",
549 		     ethdev_qid, nb_tx_desc, socket_id);
550 
551 	sfc_adapter_lock(sa);
552 
553 	sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
554 	rc = sfc_tx_qinit(sa, sw_index, nb_tx_desc, socket_id, tx_conf);
555 	if (rc != 0)
556 		goto fail_tx_qinit;
557 
558 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
559 	dev->data->tx_queues[ethdev_qid] = txq_info->dp;
560 
561 	sfc_adapter_unlock(sa);
562 	return 0;
563 
564 fail_tx_qinit:
565 	sfc_adapter_unlock(sa);
566 	SFC_ASSERT(rc > 0);
567 	return -rc;
568 }
569 
570 static void
571 sfc_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
572 {
573 	struct sfc_dp_txq *dp_txq = dev->data->tx_queues[qid];
574 	struct sfc_txq *txq;
575 	sfc_sw_index_t sw_index;
576 	struct sfc_adapter *sa;
577 
578 	if (dp_txq == NULL)
579 		return;
580 
581 	txq = sfc_txq_by_dp_txq(dp_txq);
582 	sw_index = dp_txq->dpq.queue_id;
583 
584 	SFC_ASSERT(txq->evq != NULL);
585 	sa = txq->evq->sa;
586 
587 	sfc_log_init(sa, "TxQ = %u", sw_index);
588 
589 	sfc_adapter_lock(sa);
590 
591 	sfc_tx_qfini(sa, sw_index);
592 
593 	sfc_adapter_unlock(sa);
594 }
595 
596 static void
597 sfc_stats_get_dp_rx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
598 {
599 	struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
600 	uint64_t pkts_sum = 0;
601 	uint64_t bytes_sum = 0;
602 	unsigned int i;
603 
604 	for (i = 0; i < sas->ethdev_rxq_count; ++i) {
605 		struct sfc_rxq_info *rxq_info;
606 
607 		rxq_info = sfc_rxq_info_by_ethdev_qid(sas, i);
608 		if (rxq_info->state & SFC_RXQ_INITIALIZED) {
609 			union sfc_pkts_bytes qstats;
610 
611 			sfc_pkts_bytes_get(&rxq_info->dp->dpq.stats, &qstats);
612 			pkts_sum += qstats.pkts -
613 					sa->sw_stats.reset_rx_pkts[i];
614 			bytes_sum += qstats.bytes -
615 					sa->sw_stats.reset_rx_bytes[i];
616 		}
617 	}
618 
619 	*pkts = pkts_sum;
620 	*bytes = bytes_sum;
621 }
622 
623 static void
624 sfc_stats_get_dp_tx(struct sfc_adapter *sa, uint64_t *pkts, uint64_t *bytes)
625 {
626 	struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
627 	uint64_t pkts_sum = 0;
628 	uint64_t bytes_sum = 0;
629 	unsigned int i;
630 
631 	for (i = 0; i < sas->ethdev_txq_count; ++i) {
632 		struct sfc_txq_info *txq_info;
633 
634 		txq_info = sfc_txq_info_by_ethdev_qid(sas, i);
635 		if (txq_info->state & SFC_TXQ_INITIALIZED) {
636 			union sfc_pkts_bytes qstats;
637 
638 			sfc_pkts_bytes_get(&txq_info->dp->dpq.stats, &qstats);
639 			pkts_sum += qstats.pkts -
640 					sa->sw_stats.reset_tx_pkts[i];
641 			bytes_sum += qstats.bytes -
642 					sa->sw_stats.reset_tx_bytes[i];
643 		}
644 	}
645 
646 	*pkts = pkts_sum;
647 	*bytes = bytes_sum;
648 }
649 
650 /*
651  * Some statistics are computed as A - B where A and B each increase
652  * monotonically with some hardware counter(s) and the counters are read
653  * asynchronously.
654  *
655  * If packet X is counted in A, but not counted in B yet, computed value is
656  * greater than real.
657  *
658  * If packet X is not counted in A at the moment of reading the counter,
659  * but counted in B at the moment of reading the counter, computed value
660  * is less than real.
661  *
662  * However, counter which grows backward is worse evil than slightly wrong
663  * value. So, let's try to guarantee that it never happens except may be
664  * the case when the MAC stats are zeroed as a result of a NIC reset.
665  */
666 static void
667 sfc_update_diff_stat(uint64_t *stat, uint64_t newval)
668 {
669 	if ((int64_t)(newval - *stat) > 0 || newval == 0)
670 		*stat = newval;
671 }
672 
673 static int
674 sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
675 {
676 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
677 	bool have_dp_rx_stats = sap->dp_rx->features & SFC_DP_RX_FEAT_STATS;
678 	bool have_dp_tx_stats = sap->dp_tx->features & SFC_DP_TX_FEAT_STATS;
679 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
680 	struct sfc_port *port = &sa->port;
681 	uint64_t *mac_stats;
682 	int ret;
683 
684 	sfc_adapter_lock(sa);
685 
686 	if (have_dp_rx_stats)
687 		sfc_stats_get_dp_rx(sa, &stats->ipackets, &stats->ibytes);
688 	if (have_dp_tx_stats)
689 		sfc_stats_get_dp_tx(sa, &stats->opackets, &stats->obytes);
690 
691 	ret = sfc_port_update_mac_stats(sa, B_FALSE);
692 	if (ret != 0)
693 		goto unlock;
694 
695 	mac_stats = port->mac_stats_buf;
696 
697 	if (EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask,
698 				   EFX_MAC_VADAPTER_RX_UNICAST_PACKETS)) {
699 		if (!have_dp_rx_stats) {
700 			stats->ipackets =
701 				mac_stats[EFX_MAC_VADAPTER_RX_UNICAST_PACKETS] +
702 				mac_stats[EFX_MAC_VADAPTER_RX_MULTICAST_PACKETS] +
703 				mac_stats[EFX_MAC_VADAPTER_RX_BROADCAST_PACKETS];
704 			stats->ibytes =
705 				mac_stats[EFX_MAC_VADAPTER_RX_UNICAST_BYTES] +
706 				mac_stats[EFX_MAC_VADAPTER_RX_MULTICAST_BYTES] +
707 				mac_stats[EFX_MAC_VADAPTER_RX_BROADCAST_BYTES];
708 
709 			/* CRC is included in these stats, but shouldn't be */
710 			stats->ibytes -= stats->ipackets * RTE_ETHER_CRC_LEN;
711 		}
712 		if (!have_dp_tx_stats) {
713 			stats->opackets =
714 				mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_PACKETS] +
715 				mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_PACKETS] +
716 				mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_PACKETS];
717 			stats->obytes =
718 				mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_BYTES] +
719 				mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_BYTES] +
720 				mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_BYTES];
721 
722 			/* CRC is included in these stats, but shouldn't be */
723 			stats->obytes -= stats->opackets * RTE_ETHER_CRC_LEN;
724 		}
725 		stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS];
726 		stats->oerrors = mac_stats[EFX_MAC_VADAPTER_TX_BAD_PACKETS];
727 	} else {
728 		if (!have_dp_tx_stats) {
729 			stats->opackets = mac_stats[EFX_MAC_TX_PKTS];
730 			stats->obytes = mac_stats[EFX_MAC_TX_OCTETS] -
731 				mac_stats[EFX_MAC_TX_PKTS] * RTE_ETHER_CRC_LEN;
732 		}
733 
734 		/*
735 		 * Take into account stats which are whenever supported
736 		 * on EF10. If some stat is not supported by current
737 		 * firmware variant or HW revision, it is guaranteed
738 		 * to be zero in mac_stats.
739 		 */
740 		stats->imissed =
741 			mac_stats[EFX_MAC_RX_NODESC_DROP_CNT] +
742 			mac_stats[EFX_MAC_PM_TRUNC_BB_OVERFLOW] +
743 			mac_stats[EFX_MAC_PM_DISCARD_BB_OVERFLOW] +
744 			mac_stats[EFX_MAC_PM_TRUNC_VFIFO_FULL] +
745 			mac_stats[EFX_MAC_PM_DISCARD_VFIFO_FULL] +
746 			mac_stats[EFX_MAC_PM_TRUNC_QBB] +
747 			mac_stats[EFX_MAC_PM_DISCARD_QBB] +
748 			mac_stats[EFX_MAC_PM_DISCARD_MAPPING] +
749 			mac_stats[EFX_MAC_RXDP_Q_DISABLED_PKTS] +
750 			mac_stats[EFX_MAC_RXDP_DI_DROPPED_PKTS];
751 		stats->ierrors =
752 			mac_stats[EFX_MAC_RX_FCS_ERRORS] +
753 			mac_stats[EFX_MAC_RX_ALIGN_ERRORS] +
754 			mac_stats[EFX_MAC_RX_JABBER_PKTS];
755 		/* no oerrors counters supported on EF10 */
756 
757 		if (!have_dp_rx_stats) {
758 			/* Exclude missed, errors and pauses from Rx packets */
759 			sfc_update_diff_stat(&port->ipackets,
760 				mac_stats[EFX_MAC_RX_PKTS] -
761 				mac_stats[EFX_MAC_RX_PAUSE_PKTS] -
762 				stats->imissed - stats->ierrors);
763 			stats->ipackets = port->ipackets;
764 			stats->ibytes = mac_stats[EFX_MAC_RX_OCTETS] -
765 				mac_stats[EFX_MAC_RX_PKTS] * RTE_ETHER_CRC_LEN;
766 		}
767 	}
768 
769 unlock:
770 	sfc_adapter_unlock(sa);
771 	SFC_ASSERT(ret >= 0);
772 	return -ret;
773 }
774 
775 static int
776 sfc_stats_reset(struct rte_eth_dev *dev)
777 {
778 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
779 	struct sfc_port *port = &sa->port;
780 	int rc;
781 
782 	sfc_adapter_lock(sa);
783 
784 	if (sa->state != SFC_ETHDEV_STARTED) {
785 		/*
786 		 * The operation cannot be done if port is not started; it
787 		 * will be scheduled to be done during the next port start
788 		 */
789 		port->mac_stats_reset_pending = B_TRUE;
790 		sfc_adapter_unlock(sa);
791 		return 0;
792 	}
793 
794 	rc = sfc_port_reset_mac_stats(sa);
795 	if (rc != 0)
796 		sfc_err(sa, "failed to reset statistics (rc = %d)", rc);
797 
798 	sfc_sw_xstats_reset(sa);
799 
800 	sfc_adapter_unlock(sa);
801 
802 	SFC_ASSERT(rc >= 0);
803 	return -rc;
804 }
805 
806 static unsigned int
807 sfc_xstats_get_nb_supported(struct sfc_adapter *sa)
808 {
809 	struct sfc_port *port = &sa->port;
810 	unsigned int nb_supported;
811 
812 	sfc_adapter_lock(sa);
813 	nb_supported = port->mac_stats_nb_supported +
814 		       sfc_sw_xstats_get_nb_supported(sa);
815 	sfc_adapter_unlock(sa);
816 
817 	return nb_supported;
818 }
819 
820 static int
821 sfc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
822 	       unsigned int xstats_count)
823 {
824 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
825 	unsigned int nb_written = 0;
826 	unsigned int nb_supported = 0;
827 	int rc;
828 
829 	if (unlikely(xstats == NULL))
830 		return sfc_xstats_get_nb_supported(sa);
831 
832 	rc = sfc_port_get_mac_stats(sa, xstats, xstats_count, &nb_written);
833 	if (rc < 0)
834 		return rc;
835 
836 	nb_supported = rc;
837 	sfc_sw_xstats_get_vals(sa, xstats, xstats_count, &nb_written,
838 			       &nb_supported);
839 
840 	return nb_supported;
841 }
842 
843 static int
844 sfc_xstats_get_names(struct rte_eth_dev *dev,
845 		     struct rte_eth_xstat_name *xstats_names,
846 		     unsigned int xstats_count)
847 {
848 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
849 	struct sfc_port *port = &sa->port;
850 	unsigned int i;
851 	unsigned int nstats = 0;
852 	unsigned int nb_written = 0;
853 	int ret;
854 
855 	if (unlikely(xstats_names == NULL))
856 		return sfc_xstats_get_nb_supported(sa);
857 
858 	for (i = 0; i < EFX_MAC_NSTATS; ++i) {
859 		if (EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask, i)) {
860 			if (nstats < xstats_count) {
861 				strlcpy(xstats_names[nstats].name,
862 					efx_mac_stat_name(sa->nic, i),
863 					sizeof(xstats_names[0].name));
864 				nb_written++;
865 			}
866 			nstats++;
867 		}
868 	}
869 
870 	ret = sfc_sw_xstats_get_names(sa, xstats_names, xstats_count,
871 				      &nb_written, &nstats);
872 	if (ret != 0) {
873 		SFC_ASSERT(ret < 0);
874 		return ret;
875 	}
876 
877 	return nstats;
878 }
879 
880 static int
881 sfc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
882 		     uint64_t *values, unsigned int n)
883 {
884 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
885 	struct sfc_port *port = &sa->port;
886 	unsigned int nb_supported;
887 	unsigned int i;
888 	int rc;
889 
890 	if (unlikely(ids == NULL || values == NULL))
891 		return -EINVAL;
892 
893 	/*
894 	 * Values array could be filled in nonsequential order. Fill values with
895 	 * constant indicating invalid ID first.
896 	 */
897 	for (i = 0; i < n; i++)
898 		values[i] = SFC_XSTAT_ID_INVALID_VAL;
899 
900 	rc = sfc_port_get_mac_stats_by_id(sa, ids, values, n);
901 	if (rc != 0)
902 		return rc;
903 
904 	nb_supported = port->mac_stats_nb_supported;
905 	sfc_sw_xstats_get_vals_by_id(sa, ids, values, n, &nb_supported);
906 
907 	/* Return number of written stats before invalid ID is encountered. */
908 	for (i = 0; i < n; i++) {
909 		if (values[i] == SFC_XSTAT_ID_INVALID_VAL)
910 			return i;
911 	}
912 
913 	return n;
914 }
915 
916 static int
917 sfc_xstats_get_names_by_id(struct rte_eth_dev *dev,
918 			   const uint64_t *ids,
919 			   struct rte_eth_xstat_name *xstats_names,
920 			   unsigned int size)
921 {
922 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
923 	struct sfc_port *port = &sa->port;
924 	unsigned int nb_supported;
925 	unsigned int i;
926 	int ret;
927 
928 	if (unlikely(xstats_names == NULL && ids != NULL) ||
929 	    unlikely(xstats_names != NULL && ids == NULL))
930 		return -EINVAL;
931 
932 	if (unlikely(xstats_names == NULL && ids == NULL))
933 		return sfc_xstats_get_nb_supported(sa);
934 
935 	/*
936 	 * Names array could be filled in nonsequential order. Fill names with
937 	 * string indicating invalid ID first.
938 	 */
939 	for (i = 0; i < size; i++)
940 		xstats_names[i].name[0] = SFC_XSTAT_ID_INVALID_NAME;
941 
942 	sfc_adapter_lock(sa);
943 
944 	SFC_ASSERT(port->mac_stats_nb_supported <=
945 		   RTE_DIM(port->mac_stats_by_id));
946 
947 	for (i = 0; i < size; i++) {
948 		if (ids[i] < port->mac_stats_nb_supported) {
949 			strlcpy(xstats_names[i].name,
950 				efx_mac_stat_name(sa->nic,
951 						 port->mac_stats_by_id[ids[i]]),
952 				sizeof(xstats_names[0].name));
953 		}
954 	}
955 
956 	nb_supported = port->mac_stats_nb_supported;
957 
958 	sfc_adapter_unlock(sa);
959 
960 	ret = sfc_sw_xstats_get_names_by_id(sa, ids, xstats_names, size,
961 					    &nb_supported);
962 	if (ret != 0) {
963 		SFC_ASSERT(ret < 0);
964 		return ret;
965 	}
966 
967 	/* Return number of written names before invalid ID is encountered. */
968 	for (i = 0; i < size; i++) {
969 		if (xstats_names[i].name[0] == SFC_XSTAT_ID_INVALID_NAME)
970 			return i;
971 	}
972 
973 	return size;
974 }
975 
976 static int
977 sfc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
978 {
979 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
980 	unsigned int wanted_fc, link_fc;
981 
982 	memset(fc_conf, 0, sizeof(*fc_conf));
983 
984 	sfc_adapter_lock(sa);
985 
986 	if (sa->state == SFC_ETHDEV_STARTED)
987 		efx_mac_fcntl_get(sa->nic, &wanted_fc, &link_fc);
988 	else
989 		link_fc = sa->port.flow_ctrl;
990 
991 	switch (link_fc) {
992 	case 0:
993 		fc_conf->mode = RTE_ETH_FC_NONE;
994 		break;
995 	case EFX_FCNTL_RESPOND:
996 		fc_conf->mode = RTE_ETH_FC_RX_PAUSE;
997 		break;
998 	case EFX_FCNTL_GENERATE:
999 		fc_conf->mode = RTE_ETH_FC_TX_PAUSE;
1000 		break;
1001 	case (EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE):
1002 		fc_conf->mode = RTE_ETH_FC_FULL;
1003 		break;
1004 	default:
1005 		sfc_err(sa, "%s: unexpected flow control value %#x",
1006 			__func__, link_fc);
1007 	}
1008 
1009 	fc_conf->autoneg = sa->port.flow_ctrl_autoneg;
1010 
1011 	sfc_adapter_unlock(sa);
1012 
1013 	return 0;
1014 }
1015 
1016 static int
1017 sfc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
1018 {
1019 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1020 	struct sfc_port *port = &sa->port;
1021 	unsigned int fcntl;
1022 	int rc;
1023 
1024 	if (fc_conf->high_water != 0 || fc_conf->low_water != 0 ||
1025 	    fc_conf->pause_time != 0 || fc_conf->send_xon != 0 ||
1026 	    fc_conf->mac_ctrl_frame_fwd != 0) {
1027 		sfc_err(sa, "unsupported flow control settings specified");
1028 		rc = EINVAL;
1029 		goto fail_inval;
1030 	}
1031 
1032 	switch (fc_conf->mode) {
1033 	case RTE_ETH_FC_NONE:
1034 		fcntl = 0;
1035 		break;
1036 	case RTE_ETH_FC_RX_PAUSE:
1037 		fcntl = EFX_FCNTL_RESPOND;
1038 		break;
1039 	case RTE_ETH_FC_TX_PAUSE:
1040 		fcntl = EFX_FCNTL_GENERATE;
1041 		break;
1042 	case RTE_ETH_FC_FULL:
1043 		fcntl = EFX_FCNTL_RESPOND | EFX_FCNTL_GENERATE;
1044 		break;
1045 	default:
1046 		rc = EINVAL;
1047 		goto fail_inval;
1048 	}
1049 
1050 	sfc_adapter_lock(sa);
1051 
1052 	if (sa->state == SFC_ETHDEV_STARTED) {
1053 		rc = efx_mac_fcntl_set(sa->nic, fcntl, fc_conf->autoneg);
1054 		if (rc != 0)
1055 			goto fail_mac_fcntl_set;
1056 	}
1057 
1058 	port->flow_ctrl = fcntl;
1059 	port->flow_ctrl_autoneg = fc_conf->autoneg;
1060 
1061 	sfc_adapter_unlock(sa);
1062 
1063 	return 0;
1064 
1065 fail_mac_fcntl_set:
1066 	sfc_adapter_unlock(sa);
1067 fail_inval:
1068 	SFC_ASSERT(rc > 0);
1069 	return -rc;
1070 }
1071 
1072 static int
1073 sfc_check_scatter_on_all_rx_queues(struct sfc_adapter *sa, size_t pdu)
1074 {
1075 	struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
1076 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
1077 	boolean_t scatter_enabled;
1078 	const char *error;
1079 	unsigned int i;
1080 
1081 	for (i = 0; i < sas->rxq_count; i++) {
1082 		if ((sas->rxq_info[i].state & SFC_RXQ_INITIALIZED) == 0)
1083 			continue;
1084 
1085 		scatter_enabled = (sas->rxq_info[i].type_flags &
1086 				   EFX_RXQ_FLAG_SCATTER);
1087 
1088 		if (!sfc_rx_check_scatter(pdu, sa->rxq_ctrl[i].buf_size,
1089 					  encp->enc_rx_prefix_size,
1090 					  scatter_enabled,
1091 					  encp->enc_rx_scatter_max, &error)) {
1092 			sfc_err(sa, "MTU check for RxQ %u failed: %s", i,
1093 				error);
1094 			return EINVAL;
1095 		}
1096 	}
1097 
1098 	return 0;
1099 }
1100 
1101 static int
1102 sfc_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
1103 {
1104 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1105 	size_t pdu = EFX_MAC_PDU(mtu);
1106 	size_t old_pdu;
1107 	int rc;
1108 
1109 	sfc_log_init(sa, "mtu=%u", mtu);
1110 
1111 	rc = EINVAL;
1112 	if (pdu < EFX_MAC_PDU_MIN) {
1113 		sfc_err(sa, "too small MTU %u (PDU size %u less than min %u)",
1114 			(unsigned int)mtu, (unsigned int)pdu,
1115 			EFX_MAC_PDU_MIN);
1116 		goto fail_inval;
1117 	}
1118 	if (pdu > EFX_MAC_PDU_MAX) {
1119 		sfc_err(sa, "too big MTU %u (PDU size %u greater than max %u)",
1120 			(unsigned int)mtu, (unsigned int)pdu,
1121 			(unsigned int)EFX_MAC_PDU_MAX);
1122 		goto fail_inval;
1123 	}
1124 
1125 	sfc_adapter_lock(sa);
1126 
1127 	rc = sfc_check_scatter_on_all_rx_queues(sa, pdu);
1128 	if (rc != 0)
1129 		goto fail_check_scatter;
1130 
1131 	if (pdu != sa->port.pdu) {
1132 		if (sa->state == SFC_ETHDEV_STARTED) {
1133 			sfc_stop(sa);
1134 
1135 			old_pdu = sa->port.pdu;
1136 			sa->port.pdu = pdu;
1137 			rc = sfc_start(sa);
1138 			if (rc != 0)
1139 				goto fail_start;
1140 		} else {
1141 			sa->port.pdu = pdu;
1142 		}
1143 	}
1144 
1145 	sfc_adapter_unlock(sa);
1146 
1147 	sfc_log_init(sa, "done");
1148 	return 0;
1149 
1150 fail_start:
1151 	sa->port.pdu = old_pdu;
1152 	if (sfc_start(sa) != 0)
1153 		sfc_err(sa, "cannot start with neither new (%u) nor old (%u) "
1154 			"PDU max size - port is stopped",
1155 			(unsigned int)pdu, (unsigned int)old_pdu);
1156 
1157 fail_check_scatter:
1158 	sfc_adapter_unlock(sa);
1159 
1160 fail_inval:
1161 	sfc_log_init(sa, "failed %d", rc);
1162 	SFC_ASSERT(rc > 0);
1163 	return -rc;
1164 }
1165 static int
1166 sfc_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1167 {
1168 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1169 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
1170 	struct sfc_port *port = &sa->port;
1171 	struct rte_ether_addr *old_addr = &dev->data->mac_addrs[0];
1172 	int rc = 0;
1173 
1174 	sfc_adapter_lock(sa);
1175 
1176 	if (rte_is_same_ether_addr(mac_addr, &port->default_mac_addr))
1177 		goto unlock;
1178 
1179 	/*
1180 	 * Copy the address to the device private data so that
1181 	 * it could be recalled in the case of adapter restart.
1182 	 */
1183 	rte_ether_addr_copy(mac_addr, &port->default_mac_addr);
1184 
1185 	/*
1186 	 * Neither of the two following checks can return
1187 	 * an error. The new MAC address is preserved in
1188 	 * the device private data and can be activated
1189 	 * on the next port start if the user prevents
1190 	 * isolated mode from being enabled.
1191 	 */
1192 	if (sfc_sa2shared(sa)->isolated) {
1193 		sfc_warn(sa, "isolated mode is active on the port");
1194 		sfc_warn(sa, "will not set MAC address");
1195 		goto unlock;
1196 	}
1197 
1198 	if (sa->state != SFC_ETHDEV_STARTED) {
1199 		sfc_notice(sa, "the port is not started");
1200 		sfc_notice(sa, "the new MAC address will be set on port start");
1201 
1202 		goto unlock;
1203 	}
1204 
1205 	if (encp->enc_allow_set_mac_with_installed_filters) {
1206 		rc = efx_mac_addr_set(sa->nic, mac_addr->addr_bytes);
1207 		if (rc != 0) {
1208 			sfc_err(sa, "cannot set MAC address (rc = %u)", rc);
1209 			goto unlock;
1210 		}
1211 
1212 		/*
1213 		 * Changing the MAC address by means of MCDI request
1214 		 * has no effect on received traffic, therefore
1215 		 * we also need to update unicast filters
1216 		 */
1217 		rc = sfc_set_rx_mode_unchecked(sa);
1218 		if (rc != 0) {
1219 			sfc_err(sa, "cannot set filter (rc = %u)", rc);
1220 			/* Rollback the old address */
1221 			(void)efx_mac_addr_set(sa->nic, old_addr->addr_bytes);
1222 			(void)sfc_set_rx_mode_unchecked(sa);
1223 		}
1224 	} else {
1225 		sfc_warn(sa, "cannot set MAC address with filters installed");
1226 		sfc_warn(sa, "adapter will be restarted to pick the new MAC");
1227 		sfc_warn(sa, "(some traffic may be dropped)");
1228 
1229 		/*
1230 		 * Since setting MAC address with filters installed is not
1231 		 * allowed on the adapter, the new MAC address will be set
1232 		 * by means of adapter restart. sfc_start() shall retrieve
1233 		 * the new address from the device private data and set it.
1234 		 */
1235 		sfc_stop(sa);
1236 		rc = sfc_start(sa);
1237 		if (rc != 0)
1238 			sfc_err(sa, "cannot restart adapter (rc = %u)", rc);
1239 	}
1240 
1241 unlock:
1242 	if (rc != 0)
1243 		rte_ether_addr_copy(old_addr, &port->default_mac_addr);
1244 
1245 	sfc_adapter_unlock(sa);
1246 
1247 	SFC_ASSERT(rc >= 0);
1248 	return -rc;
1249 }
1250 
1251 
1252 static int
1253 sfc_set_mc_addr_list(struct rte_eth_dev *dev,
1254 		struct rte_ether_addr *mc_addr_set, uint32_t nb_mc_addr)
1255 {
1256 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1257 	struct sfc_port *port = &sa->port;
1258 	uint8_t *mc_addrs = port->mcast_addrs;
1259 	int rc;
1260 	unsigned int i;
1261 
1262 	if (sfc_sa2shared(sa)->isolated) {
1263 		sfc_err(sa, "isolated mode is active on the port");
1264 		sfc_err(sa, "will not set multicast address list");
1265 		return -ENOTSUP;
1266 	}
1267 
1268 	if (mc_addrs == NULL)
1269 		return -ENOBUFS;
1270 
1271 	if (nb_mc_addr > port->max_mcast_addrs) {
1272 		sfc_err(sa, "too many multicast addresses: %u > %u",
1273 			 nb_mc_addr, port->max_mcast_addrs);
1274 		return -EINVAL;
1275 	}
1276 
1277 	for (i = 0; i < nb_mc_addr; ++i) {
1278 		rte_memcpy(mc_addrs, mc_addr_set[i].addr_bytes,
1279 				 EFX_MAC_ADDR_LEN);
1280 		mc_addrs += EFX_MAC_ADDR_LEN;
1281 	}
1282 
1283 	port->nb_mcast_addrs = nb_mc_addr;
1284 
1285 	if (sa->state != SFC_ETHDEV_STARTED)
1286 		return 0;
1287 
1288 	rc = efx_mac_multicast_list_set(sa->nic, port->mcast_addrs,
1289 					port->nb_mcast_addrs);
1290 	if (rc != 0)
1291 		sfc_err(sa, "cannot set multicast address list (rc = %u)", rc);
1292 
1293 	SFC_ASSERT(rc >= 0);
1294 	return -rc;
1295 }
1296 
1297 /*
1298  * The function is used by the secondary process as well. It must not
1299  * use any process-local pointers from the adapter data.
1300  */
1301 static void
1302 sfc_rx_queue_info_get(struct rte_eth_dev *dev, uint16_t ethdev_qid,
1303 		      struct rte_eth_rxq_info *qinfo)
1304 {
1305 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1306 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1307 	struct sfc_rxq_info *rxq_info;
1308 
1309 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1310 
1311 	qinfo->mp = rxq_info->refill_mb_pool;
1312 	qinfo->conf.rx_free_thresh = rxq_info->refill_threshold;
1313 	qinfo->conf.rx_drop_en = 1;
1314 	qinfo->conf.rx_deferred_start = rxq_info->deferred_start;
1315 	qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads;
1316 	if (rxq_info->type_flags & EFX_RXQ_FLAG_SCATTER) {
1317 		qinfo->conf.offloads |= RTE_ETH_RX_OFFLOAD_SCATTER;
1318 		qinfo->scattered_rx = 1;
1319 	}
1320 	qinfo->nb_desc = rxq_info->entries;
1321 }
1322 
1323 /*
1324  * The function is used by the secondary process as well. It must not
1325  * use any process-local pointers from the adapter data.
1326  */
1327 static void
1328 sfc_tx_queue_info_get(struct rte_eth_dev *dev, uint16_t ethdev_qid,
1329 		      struct rte_eth_txq_info *qinfo)
1330 {
1331 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1332 	struct sfc_txq_info *txq_info;
1333 
1334 	SFC_ASSERT(ethdev_qid < sas->ethdev_txq_count);
1335 
1336 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1337 
1338 	memset(qinfo, 0, sizeof(*qinfo));
1339 
1340 	qinfo->conf.offloads = txq_info->offloads;
1341 	qinfo->conf.tx_free_thresh = txq_info->free_thresh;
1342 	qinfo->conf.tx_deferred_start = txq_info->deferred_start;
1343 	qinfo->nb_desc = txq_info->entries;
1344 }
1345 
1346 /*
1347  * The function is used by the secondary process as well. It must not
1348  * use any process-local pointers from the adapter data.
1349  */
1350 static uint32_t
1351 sfc_rx_queue_count(void *rx_queue)
1352 {
1353 	struct sfc_dp_rxq *dp_rxq = rx_queue;
1354 	const struct sfc_dp_rx *dp_rx;
1355 	struct sfc_rxq_info *rxq_info;
1356 
1357 	dp_rx = sfc_dp_rx_by_dp_rxq(dp_rxq);
1358 	rxq_info = sfc_rxq_info_by_dp_rxq(dp_rxq);
1359 
1360 	if ((rxq_info->state & SFC_RXQ_STARTED) == 0)
1361 		return 0;
1362 
1363 	return dp_rx->qdesc_npending(dp_rxq);
1364 }
1365 
1366 /*
1367  * The function is used by the secondary process as well. It must not
1368  * use any process-local pointers from the adapter data.
1369  */
1370 static int
1371 sfc_rx_descriptor_status(void *queue, uint16_t offset)
1372 {
1373 	struct sfc_dp_rxq *dp_rxq = queue;
1374 	const struct sfc_dp_rx *dp_rx;
1375 
1376 	dp_rx = sfc_dp_rx_by_dp_rxq(dp_rxq);
1377 
1378 	return dp_rx->qdesc_status(dp_rxq, offset);
1379 }
1380 
1381 /*
1382  * The function is used by the secondary process as well. It must not
1383  * use any process-local pointers from the adapter data.
1384  */
1385 static int
1386 sfc_tx_descriptor_status(void *queue, uint16_t offset)
1387 {
1388 	struct sfc_dp_txq *dp_txq = queue;
1389 	const struct sfc_dp_tx *dp_tx;
1390 
1391 	dp_tx = sfc_dp_tx_by_dp_txq(dp_txq);
1392 
1393 	return dp_tx->qdesc_status(dp_txq, offset);
1394 }
1395 
1396 static int
1397 sfc_rx_queue_start(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1398 {
1399 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1400 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1401 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1402 	struct sfc_rxq_info *rxq_info;
1403 	sfc_sw_index_t sw_index;
1404 	int rc;
1405 
1406 	sfc_log_init(sa, "RxQ=%u", ethdev_qid);
1407 
1408 	sfc_adapter_lock(sa);
1409 
1410 	rc = EINVAL;
1411 	if (sa->state != SFC_ETHDEV_STARTED)
1412 		goto fail_not_started;
1413 
1414 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1415 	if (rxq_info->state != SFC_RXQ_INITIALIZED)
1416 		goto fail_not_setup;
1417 
1418 	sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
1419 	rc = sfc_rx_qstart(sa, sw_index);
1420 	if (rc != 0)
1421 		goto fail_rx_qstart;
1422 
1423 	rxq_info->deferred_started = B_TRUE;
1424 
1425 	sfc_adapter_unlock(sa);
1426 
1427 	return 0;
1428 
1429 fail_rx_qstart:
1430 fail_not_setup:
1431 fail_not_started:
1432 	sfc_adapter_unlock(sa);
1433 	SFC_ASSERT(rc > 0);
1434 	return -rc;
1435 }
1436 
1437 static int
1438 sfc_rx_queue_stop(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1439 {
1440 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1441 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1442 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1443 	struct sfc_rxq_info *rxq_info;
1444 	sfc_sw_index_t sw_index;
1445 
1446 	sfc_log_init(sa, "RxQ=%u", ethdev_qid);
1447 
1448 	sfc_adapter_lock(sa);
1449 
1450 	sw_index = sfc_rxq_sw_index_by_ethdev_rx_qid(sas, sfc_ethdev_qid);
1451 	sfc_rx_qstop(sa, sw_index);
1452 
1453 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1454 	rxq_info->deferred_started = B_FALSE;
1455 
1456 	sfc_adapter_unlock(sa);
1457 
1458 	return 0;
1459 }
1460 
1461 static int
1462 sfc_tx_queue_start(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1463 {
1464 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1465 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1466 	struct sfc_txq_info *txq_info;
1467 	sfc_sw_index_t sw_index;
1468 	int rc;
1469 
1470 	sfc_log_init(sa, "TxQ = %u", ethdev_qid);
1471 
1472 	sfc_adapter_lock(sa);
1473 
1474 	rc = EINVAL;
1475 	if (sa->state != SFC_ETHDEV_STARTED)
1476 		goto fail_not_started;
1477 
1478 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1479 	if (txq_info->state != SFC_TXQ_INITIALIZED)
1480 		goto fail_not_setup;
1481 
1482 	sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
1483 	rc = sfc_tx_qstart(sa, sw_index);
1484 	if (rc != 0)
1485 		goto fail_tx_qstart;
1486 
1487 	txq_info->deferred_started = B_TRUE;
1488 
1489 	sfc_adapter_unlock(sa);
1490 	return 0;
1491 
1492 fail_tx_qstart:
1493 
1494 fail_not_setup:
1495 fail_not_started:
1496 	sfc_adapter_unlock(sa);
1497 	SFC_ASSERT(rc > 0);
1498 	return -rc;
1499 }
1500 
1501 static int
1502 sfc_tx_queue_stop(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1503 {
1504 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1505 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1506 	struct sfc_txq_info *txq_info;
1507 	sfc_sw_index_t sw_index;
1508 
1509 	sfc_log_init(sa, "TxQ = %u", ethdev_qid);
1510 
1511 	sfc_adapter_lock(sa);
1512 
1513 	sw_index = sfc_txq_sw_index_by_ethdev_tx_qid(sas, ethdev_qid);
1514 	sfc_tx_qstop(sa, sw_index);
1515 
1516 	txq_info = sfc_txq_info_by_ethdev_qid(sas, ethdev_qid);
1517 	txq_info->deferred_started = B_FALSE;
1518 
1519 	sfc_adapter_unlock(sa);
1520 	return 0;
1521 }
1522 
1523 static efx_tunnel_protocol_t
1524 sfc_tunnel_rte_type_to_efx_udp_proto(enum rte_eth_tunnel_type rte_type)
1525 {
1526 	switch (rte_type) {
1527 	case RTE_ETH_TUNNEL_TYPE_VXLAN:
1528 		return EFX_TUNNEL_PROTOCOL_VXLAN;
1529 	case RTE_ETH_TUNNEL_TYPE_GENEVE:
1530 		return EFX_TUNNEL_PROTOCOL_GENEVE;
1531 	default:
1532 		return EFX_TUNNEL_NPROTOS;
1533 	}
1534 }
1535 
1536 enum sfc_udp_tunnel_op_e {
1537 	SFC_UDP_TUNNEL_ADD_PORT,
1538 	SFC_UDP_TUNNEL_DEL_PORT,
1539 };
1540 
1541 static int
1542 sfc_dev_udp_tunnel_op(struct rte_eth_dev *dev,
1543 		      struct rte_eth_udp_tunnel *tunnel_udp,
1544 		      enum sfc_udp_tunnel_op_e op)
1545 {
1546 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1547 	efx_tunnel_protocol_t tunnel_proto;
1548 	int rc;
1549 
1550 	sfc_log_init(sa, "%s udp_port=%u prot_type=%u",
1551 		     (op == SFC_UDP_TUNNEL_ADD_PORT) ? "add" :
1552 		     (op == SFC_UDP_TUNNEL_DEL_PORT) ? "delete" : "unknown",
1553 		     tunnel_udp->udp_port, tunnel_udp->prot_type);
1554 
1555 	tunnel_proto =
1556 		sfc_tunnel_rte_type_to_efx_udp_proto(tunnel_udp->prot_type);
1557 	if (tunnel_proto >= EFX_TUNNEL_NPROTOS) {
1558 		rc = ENOTSUP;
1559 		goto fail_bad_proto;
1560 	}
1561 
1562 	sfc_adapter_lock(sa);
1563 
1564 	switch (op) {
1565 	case SFC_UDP_TUNNEL_ADD_PORT:
1566 		rc = efx_tunnel_config_udp_add(sa->nic,
1567 					       tunnel_udp->udp_port,
1568 					       tunnel_proto);
1569 		break;
1570 	case SFC_UDP_TUNNEL_DEL_PORT:
1571 		rc = efx_tunnel_config_udp_remove(sa->nic,
1572 						  tunnel_udp->udp_port,
1573 						  tunnel_proto);
1574 		break;
1575 	default:
1576 		rc = EINVAL;
1577 		goto fail_bad_op;
1578 	}
1579 
1580 	if (rc != 0)
1581 		goto fail_op;
1582 
1583 	if (sa->state == SFC_ETHDEV_STARTED) {
1584 		rc = efx_tunnel_reconfigure(sa->nic);
1585 		if (rc == EAGAIN) {
1586 			/*
1587 			 * Configuration is accepted by FW and MC reboot
1588 			 * is initiated to apply the changes. MC reboot
1589 			 * will be handled in a usual way (MC reboot
1590 			 * event on management event queue and adapter
1591 			 * restart).
1592 			 */
1593 			rc = 0;
1594 		} else if (rc != 0) {
1595 			goto fail_reconfigure;
1596 		}
1597 	}
1598 
1599 	sfc_adapter_unlock(sa);
1600 	return 0;
1601 
1602 fail_reconfigure:
1603 	/* Remove/restore entry since the change makes the trouble */
1604 	switch (op) {
1605 	case SFC_UDP_TUNNEL_ADD_PORT:
1606 		(void)efx_tunnel_config_udp_remove(sa->nic,
1607 						   tunnel_udp->udp_port,
1608 						   tunnel_proto);
1609 		break;
1610 	case SFC_UDP_TUNNEL_DEL_PORT:
1611 		(void)efx_tunnel_config_udp_add(sa->nic,
1612 						tunnel_udp->udp_port,
1613 						tunnel_proto);
1614 		break;
1615 	}
1616 
1617 fail_op:
1618 fail_bad_op:
1619 	sfc_adapter_unlock(sa);
1620 
1621 fail_bad_proto:
1622 	SFC_ASSERT(rc > 0);
1623 	return -rc;
1624 }
1625 
1626 static int
1627 sfc_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
1628 			    struct rte_eth_udp_tunnel *tunnel_udp)
1629 {
1630 	return sfc_dev_udp_tunnel_op(dev, tunnel_udp, SFC_UDP_TUNNEL_ADD_PORT);
1631 }
1632 
1633 static int
1634 sfc_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
1635 			    struct rte_eth_udp_tunnel *tunnel_udp)
1636 {
1637 	return sfc_dev_udp_tunnel_op(dev, tunnel_udp, SFC_UDP_TUNNEL_DEL_PORT);
1638 }
1639 
1640 /*
1641  * The function is used by the secondary process as well. It must not
1642  * use any process-local pointers from the adapter data.
1643  */
1644 static int
1645 sfc_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
1646 			  struct rte_eth_rss_conf *rss_conf)
1647 {
1648 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1649 	struct sfc_rss *rss = &sas->rss;
1650 
1651 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE)
1652 		return -ENOTSUP;
1653 
1654 	/*
1655 	 * Mapping of hash configuration between RTE and EFX is not one-to-one,
1656 	 * hence, conversion is done here to derive a correct set of RTE_ETH_RSS
1657 	 * flags which corresponds to the active EFX configuration stored
1658 	 * locally in 'sfc_adapter' and kept up-to-date
1659 	 */
1660 	rss_conf->rss_hf = sfc_rx_hf_efx_to_rte(rss, rss->hash_types);
1661 	rss_conf->rss_key_len = EFX_RSS_KEY_SIZE;
1662 	if (rss_conf->rss_key != NULL)
1663 		rte_memcpy(rss_conf->rss_key, rss->key, EFX_RSS_KEY_SIZE);
1664 
1665 	return 0;
1666 }
1667 
1668 static int
1669 sfc_dev_rss_hash_update(struct rte_eth_dev *dev,
1670 			struct rte_eth_rss_conf *rss_conf)
1671 {
1672 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1673 	struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
1674 	unsigned int efx_hash_types;
1675 	uint32_t contexts[] = {EFX_RSS_CONTEXT_DEFAULT, rss->dummy_rss_context};
1676 	unsigned int n_contexts;
1677 	unsigned int mode_i = 0;
1678 	unsigned int key_i = 0;
1679 	unsigned int i = 0;
1680 	int rc = 0;
1681 
1682 	n_contexts = rss->dummy_rss_context == EFX_RSS_CONTEXT_DEFAULT ? 1 : 2;
1683 
1684 	if (sfc_sa2shared(sa)->isolated)
1685 		return -ENOTSUP;
1686 
1687 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE) {
1688 		sfc_err(sa, "RSS is not available");
1689 		return -ENOTSUP;
1690 	}
1691 
1692 	if (rss->channels == 0) {
1693 		sfc_err(sa, "RSS is not configured");
1694 		return -EINVAL;
1695 	}
1696 
1697 	if ((rss_conf->rss_key != NULL) &&
1698 	    (rss_conf->rss_key_len != sizeof(rss->key))) {
1699 		sfc_err(sa, "RSS key size is wrong (should be %zu)",
1700 			sizeof(rss->key));
1701 		return -EINVAL;
1702 	}
1703 
1704 	sfc_adapter_lock(sa);
1705 
1706 	rc = sfc_rx_hf_rte_to_efx(sa, rss_conf->rss_hf, &efx_hash_types);
1707 	if (rc != 0)
1708 		goto fail_rx_hf_rte_to_efx;
1709 
1710 	for (mode_i = 0; mode_i < n_contexts; mode_i++) {
1711 		rc = efx_rx_scale_mode_set(sa->nic, contexts[mode_i],
1712 					   rss->hash_alg, efx_hash_types,
1713 					   B_TRUE);
1714 		if (rc != 0)
1715 			goto fail_scale_mode_set;
1716 	}
1717 
1718 	if (rss_conf->rss_key != NULL) {
1719 		if (sa->state == SFC_ETHDEV_STARTED) {
1720 			for (key_i = 0; key_i < n_contexts; key_i++) {
1721 				rc = efx_rx_scale_key_set(sa->nic,
1722 							  contexts[key_i],
1723 							  rss_conf->rss_key,
1724 							  sizeof(rss->key));
1725 				if (rc != 0)
1726 					goto fail_scale_key_set;
1727 			}
1728 		}
1729 
1730 		rte_memcpy(rss->key, rss_conf->rss_key, sizeof(rss->key));
1731 	}
1732 
1733 	rss->hash_types = efx_hash_types;
1734 
1735 	sfc_adapter_unlock(sa);
1736 
1737 	return 0;
1738 
1739 fail_scale_key_set:
1740 	for (i = 0; i < key_i; i++) {
1741 		if (efx_rx_scale_key_set(sa->nic, contexts[i], rss->key,
1742 					 sizeof(rss->key)) != 0)
1743 			sfc_err(sa, "failed to restore RSS key");
1744 	}
1745 
1746 fail_scale_mode_set:
1747 	for (i = 0; i < mode_i; i++) {
1748 		if (efx_rx_scale_mode_set(sa->nic, contexts[i],
1749 					  EFX_RX_HASHALG_TOEPLITZ,
1750 					  rss->hash_types, B_TRUE) != 0)
1751 			sfc_err(sa, "failed to restore RSS mode");
1752 	}
1753 
1754 fail_rx_hf_rte_to_efx:
1755 	sfc_adapter_unlock(sa);
1756 	return -rc;
1757 }
1758 
1759 /*
1760  * The function is used by the secondary process as well. It must not
1761  * use any process-local pointers from the adapter data.
1762  */
1763 static int
1764 sfc_dev_rss_reta_query(struct rte_eth_dev *dev,
1765 		       struct rte_eth_rss_reta_entry64 *reta_conf,
1766 		       uint16_t reta_size)
1767 {
1768 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1769 	struct sfc_rss *rss = &sas->rss;
1770 	int entry;
1771 
1772 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE || sas->isolated)
1773 		return -ENOTSUP;
1774 
1775 	if (rss->channels == 0)
1776 		return -EINVAL;
1777 
1778 	if (reta_size != EFX_RSS_TBL_SIZE)
1779 		return -EINVAL;
1780 
1781 	for (entry = 0; entry < reta_size; entry++) {
1782 		int grp = entry / RTE_ETH_RETA_GROUP_SIZE;
1783 		int grp_idx = entry % RTE_ETH_RETA_GROUP_SIZE;
1784 
1785 		if ((reta_conf[grp].mask >> grp_idx) & 1)
1786 			reta_conf[grp].reta[grp_idx] = rss->tbl[entry];
1787 	}
1788 
1789 	return 0;
1790 }
1791 
1792 static int
1793 sfc_dev_rss_reta_update(struct rte_eth_dev *dev,
1794 			struct rte_eth_rss_reta_entry64 *reta_conf,
1795 			uint16_t reta_size)
1796 {
1797 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
1798 	struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
1799 	unsigned int *rss_tbl_new;
1800 	uint16_t entry;
1801 	int rc = 0;
1802 
1803 
1804 	if (sfc_sa2shared(sa)->isolated)
1805 		return -ENOTSUP;
1806 
1807 	if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE) {
1808 		sfc_err(sa, "RSS is not available");
1809 		return -ENOTSUP;
1810 	}
1811 
1812 	if (rss->channels == 0) {
1813 		sfc_err(sa, "RSS is not configured");
1814 		return -EINVAL;
1815 	}
1816 
1817 	if (reta_size != EFX_RSS_TBL_SIZE) {
1818 		sfc_err(sa, "RETA size is wrong (should be %u)",
1819 			EFX_RSS_TBL_SIZE);
1820 		return -EINVAL;
1821 	}
1822 
1823 	rss_tbl_new = rte_zmalloc("rss_tbl_new", sizeof(rss->tbl), 0);
1824 	if (rss_tbl_new == NULL)
1825 		return -ENOMEM;
1826 
1827 	sfc_adapter_lock(sa);
1828 
1829 	rte_memcpy(rss_tbl_new, rss->tbl, sizeof(rss->tbl));
1830 
1831 	for (entry = 0; entry < reta_size; entry++) {
1832 		int grp_idx = entry % RTE_ETH_RETA_GROUP_SIZE;
1833 		struct rte_eth_rss_reta_entry64 *grp;
1834 
1835 		grp = &reta_conf[entry / RTE_ETH_RETA_GROUP_SIZE];
1836 
1837 		if (grp->mask & (1ull << grp_idx)) {
1838 			if (grp->reta[grp_idx] >= rss->channels) {
1839 				rc = EINVAL;
1840 				goto bad_reta_entry;
1841 			}
1842 			rss_tbl_new[entry] = grp->reta[grp_idx];
1843 		}
1844 	}
1845 
1846 	if (sa->state == SFC_ETHDEV_STARTED) {
1847 		rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
1848 					  rss_tbl_new, EFX_RSS_TBL_SIZE);
1849 		if (rc != 0)
1850 			goto fail_scale_tbl_set;
1851 	}
1852 
1853 	rte_memcpy(rss->tbl, rss_tbl_new, sizeof(rss->tbl));
1854 
1855 fail_scale_tbl_set:
1856 bad_reta_entry:
1857 	sfc_adapter_unlock(sa);
1858 
1859 	rte_free(rss_tbl_new);
1860 
1861 	SFC_ASSERT(rc >= 0);
1862 	return -rc;
1863 }
1864 
1865 static int
1866 sfc_dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
1867 		     const struct rte_flow_ops **ops)
1868 {
1869 	*ops = &sfc_flow_ops;
1870 	return 0;
1871 }
1872 
1873 static int
1874 sfc_pool_ops_supported(struct rte_eth_dev *dev, const char *pool)
1875 {
1876 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1877 
1878 	/*
1879 	 * If Rx datapath does not provide callback to check mempool,
1880 	 * all pools are supported.
1881 	 */
1882 	if (sap->dp_rx->pool_ops_supported == NULL)
1883 		return 1;
1884 
1885 	return sap->dp_rx->pool_ops_supported(pool);
1886 }
1887 
1888 static int
1889 sfc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1890 {
1891 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1892 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1893 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1894 	struct sfc_rxq_info *rxq_info;
1895 
1896 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1897 
1898 	return sap->dp_rx->intr_enable(rxq_info->dp);
1899 }
1900 
1901 static int
1902 sfc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t ethdev_qid)
1903 {
1904 	const struct sfc_adapter_priv *sap = sfc_adapter_priv_by_eth_dev(dev);
1905 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
1906 	sfc_ethdev_qid_t sfc_ethdev_qid = ethdev_qid;
1907 	struct sfc_rxq_info *rxq_info;
1908 
1909 	rxq_info = sfc_rxq_info_by_ethdev_qid(sas, sfc_ethdev_qid);
1910 
1911 	return sap->dp_rx->intr_disable(rxq_info->dp);
1912 }
1913 
1914 struct sfc_mport_journal_ctx {
1915 	struct sfc_adapter		*sa;
1916 	uint16_t			switch_domain_id;
1917 	uint32_t			mcdi_handle;
1918 	bool				controllers_assigned;
1919 	efx_pcie_interface_t		*controllers;
1920 	size_t				nb_controllers;
1921 };
1922 
1923 static int
1924 sfc_journal_ctx_add_controller(struct sfc_mport_journal_ctx *ctx,
1925 			       efx_pcie_interface_t intf)
1926 {
1927 	efx_pcie_interface_t *new_controllers;
1928 	size_t i, target;
1929 	size_t new_size;
1930 
1931 	if (ctx->controllers == NULL) {
1932 		ctx->controllers = rte_malloc("sfc_controller_mapping",
1933 					      sizeof(ctx->controllers[0]), 0);
1934 		if (ctx->controllers == NULL)
1935 			return ENOMEM;
1936 
1937 		ctx->controllers[0] = intf;
1938 		ctx->nb_controllers = 1;
1939 
1940 		return 0;
1941 	}
1942 
1943 	for (i = 0; i < ctx->nb_controllers; i++) {
1944 		if (ctx->controllers[i] == intf)
1945 			return 0;
1946 		if (ctx->controllers[i] > intf)
1947 			break;
1948 	}
1949 	target = i;
1950 
1951 	ctx->nb_controllers += 1;
1952 	new_size = ctx->nb_controllers * sizeof(ctx->controllers[0]);
1953 
1954 	new_controllers = rte_realloc(ctx->controllers, new_size, 0);
1955 	if (new_controllers == NULL) {
1956 		rte_free(ctx->controllers);
1957 		return ENOMEM;
1958 	}
1959 	ctx->controllers = new_controllers;
1960 
1961 	for (i = target + 1; i < ctx->nb_controllers; i++)
1962 		ctx->controllers[i] = ctx->controllers[i - 1];
1963 
1964 	ctx->controllers[target] = intf;
1965 
1966 	return 0;
1967 }
1968 
1969 static efx_rc_t
1970 sfc_process_mport_journal_entry(struct sfc_mport_journal_ctx *ctx,
1971 				efx_mport_desc_t *mport)
1972 {
1973 	struct sfc_mae_switch_port_request req;
1974 	efx_mport_sel_t entity_selector;
1975 	efx_mport_sel_t ethdev_mport;
1976 	uint16_t switch_port_id;
1977 	efx_rc_t efx_rc;
1978 	int rc;
1979 
1980 	sfc_dbg(ctx->sa,
1981 		"processing mport id %u (controller %u pf %u vf %u)",
1982 		mport->emd_id.id, mport->emd_vnic.ev_intf,
1983 		mport->emd_vnic.ev_pf, mport->emd_vnic.ev_vf);
1984 	efx_mae_mport_invalid(&ethdev_mport);
1985 
1986 	if (!ctx->controllers_assigned) {
1987 		rc = sfc_journal_ctx_add_controller(ctx,
1988 						    mport->emd_vnic.ev_intf);
1989 		if (rc != 0)
1990 			return rc;
1991 	}
1992 
1993 	/* Build Mport selector */
1994 	efx_rc = efx_mae_mport_by_pcie_mh_function(mport->emd_vnic.ev_intf,
1995 						mport->emd_vnic.ev_pf,
1996 						mport->emd_vnic.ev_vf,
1997 						&entity_selector);
1998 	if (efx_rc != 0) {
1999 		sfc_err(ctx->sa, "failed to build entity mport selector for c%upf%uvf%u",
2000 			mport->emd_vnic.ev_intf,
2001 			mport->emd_vnic.ev_pf,
2002 			mport->emd_vnic.ev_vf);
2003 		return efx_rc;
2004 	}
2005 
2006 	rc = sfc_mae_switch_port_id_by_entity(ctx->switch_domain_id,
2007 					      &entity_selector,
2008 					      SFC_MAE_SWITCH_PORT_REPRESENTOR,
2009 					      &switch_port_id);
2010 	switch (rc) {
2011 	case 0:
2012 		/* Already registered */
2013 		break;
2014 	case ENOENT:
2015 		/*
2016 		 * No representor has been created for this entity.
2017 		 * Create a dummy switch registry entry with an invalid ethdev
2018 		 * mport selector. When a corresponding representor is created,
2019 		 * this entry will be updated.
2020 		 */
2021 		req.type = SFC_MAE_SWITCH_PORT_REPRESENTOR;
2022 		req.entity_mportp = &entity_selector;
2023 		req.ethdev_mportp = &ethdev_mport;
2024 		req.ethdev_port_id = RTE_MAX_ETHPORTS;
2025 		req.port_data.repr.intf = mport->emd_vnic.ev_intf;
2026 		req.port_data.repr.pf = mport->emd_vnic.ev_pf;
2027 		req.port_data.repr.vf = mport->emd_vnic.ev_vf;
2028 
2029 		rc = sfc_mae_assign_switch_port(ctx->switch_domain_id,
2030 						&req, &switch_port_id);
2031 		if (rc != 0) {
2032 			sfc_err(ctx->sa,
2033 				"failed to assign MAE switch port for c%upf%uvf%u: %s",
2034 				mport->emd_vnic.ev_intf,
2035 				mport->emd_vnic.ev_pf,
2036 				mport->emd_vnic.ev_vf,
2037 				rte_strerror(rc));
2038 			return rc;
2039 		}
2040 		break;
2041 	default:
2042 		sfc_err(ctx->sa, "failed to find MAE switch port for c%upf%uvf%u: %s",
2043 			mport->emd_vnic.ev_intf,
2044 			mport->emd_vnic.ev_pf,
2045 			mport->emd_vnic.ev_vf,
2046 			rte_strerror(rc));
2047 		return rc;
2048 	}
2049 
2050 	return 0;
2051 }
2052 
2053 static efx_rc_t
2054 sfc_process_mport_journal_cb(void *data, efx_mport_desc_t *mport,
2055 			     size_t mport_len)
2056 {
2057 	struct sfc_mport_journal_ctx *ctx = data;
2058 
2059 	if (ctx == NULL || ctx->sa == NULL) {
2060 		sfc_err(ctx->sa, "received NULL context or SFC adapter");
2061 		return EINVAL;
2062 	}
2063 
2064 	if (mport_len != sizeof(*mport)) {
2065 		sfc_err(ctx->sa, "actual and expected mport buffer sizes differ");
2066 		return EINVAL;
2067 	}
2068 
2069 	SFC_ASSERT(sfc_adapter_is_locked(ctx->sa));
2070 
2071 	/*
2072 	 * If a zombie flag is set, it means the mport has been marked for
2073 	 * deletion and cannot be used for any new operations. The mport will
2074 	 * be destroyed completely once all references to it are released.
2075 	 */
2076 	if (mport->emd_zombie) {
2077 		sfc_dbg(ctx->sa, "mport is a zombie, skipping");
2078 		return 0;
2079 	}
2080 	if (mport->emd_type != EFX_MPORT_TYPE_VNIC) {
2081 		sfc_dbg(ctx->sa, "mport is not a VNIC, skipping");
2082 		return 0;
2083 	}
2084 	if (mport->emd_vnic.ev_client_type != EFX_MPORT_VNIC_CLIENT_FUNCTION) {
2085 		sfc_dbg(ctx->sa, "mport is not a function, skipping");
2086 		return 0;
2087 	}
2088 	if (mport->emd_vnic.ev_handle == ctx->mcdi_handle) {
2089 		sfc_dbg(ctx->sa, "mport is this driver instance, skipping");
2090 		return 0;
2091 	}
2092 
2093 	return sfc_process_mport_journal_entry(ctx, mport);
2094 }
2095 
2096 static int
2097 sfc_process_mport_journal(struct sfc_adapter *sa)
2098 {
2099 	struct sfc_mport_journal_ctx ctx;
2100 	const efx_pcie_interface_t *controllers;
2101 	size_t nb_controllers;
2102 	efx_rc_t efx_rc;
2103 	int rc;
2104 
2105 	memset(&ctx, 0, sizeof(ctx));
2106 	ctx.sa = sa;
2107 	ctx.switch_domain_id = sa->mae.switch_domain_id;
2108 
2109 	efx_rc = efx_mcdi_get_own_client_handle(sa->nic, &ctx.mcdi_handle);
2110 	if (efx_rc != 0) {
2111 		sfc_err(sa, "failed to get own MCDI handle");
2112 		SFC_ASSERT(efx_rc > 0);
2113 		return efx_rc;
2114 	}
2115 
2116 	rc = sfc_mae_switch_domain_controllers(ctx.switch_domain_id,
2117 					       &controllers, &nb_controllers);
2118 	if (rc != 0) {
2119 		sfc_err(sa, "failed to get controller mapping");
2120 		return rc;
2121 	}
2122 
2123 	ctx.controllers_assigned = controllers != NULL;
2124 	ctx.controllers = NULL;
2125 	ctx.nb_controllers = 0;
2126 
2127 	efx_rc = efx_mae_read_mport_journal(sa->nic,
2128 					    sfc_process_mport_journal_cb, &ctx);
2129 	if (efx_rc != 0) {
2130 		sfc_err(sa, "failed to process MAE mport journal");
2131 		SFC_ASSERT(efx_rc > 0);
2132 		return efx_rc;
2133 	}
2134 
2135 	if (controllers == NULL) {
2136 		rc = sfc_mae_switch_domain_map_controllers(ctx.switch_domain_id,
2137 							   ctx.controllers,
2138 							   ctx.nb_controllers);
2139 		if (rc != 0)
2140 			return rc;
2141 	}
2142 
2143 	return 0;
2144 }
2145 
2146 static void
2147 sfc_count_representors_cb(enum sfc_mae_switch_port_type type,
2148 			  const efx_mport_sel_t *ethdev_mportp __rte_unused,
2149 			  uint16_t ethdev_port_id __rte_unused,
2150 			  const efx_mport_sel_t *entity_mportp __rte_unused,
2151 			  uint16_t switch_port_id __rte_unused,
2152 			  union sfc_mae_switch_port_data *port_datap
2153 				__rte_unused,
2154 			  void *user_datap)
2155 {
2156 	int *counter = user_datap;
2157 
2158 	SFC_ASSERT(counter != NULL);
2159 
2160 	if (type == SFC_MAE_SWITCH_PORT_REPRESENTOR)
2161 		(*counter)++;
2162 }
2163 
2164 struct sfc_get_representors_ctx {
2165 	struct rte_eth_representor_info	*info;
2166 	struct sfc_adapter		*sa;
2167 	uint16_t			switch_domain_id;
2168 	const efx_pcie_interface_t	*controllers;
2169 	size_t				nb_controllers;
2170 };
2171 
2172 static void
2173 sfc_get_representors_cb(enum sfc_mae_switch_port_type type,
2174 			const efx_mport_sel_t *ethdev_mportp __rte_unused,
2175 			uint16_t ethdev_port_id __rte_unused,
2176 			const efx_mport_sel_t *entity_mportp __rte_unused,
2177 			uint16_t switch_port_id,
2178 			union sfc_mae_switch_port_data *port_datap,
2179 			void *user_datap)
2180 {
2181 	struct sfc_get_representors_ctx *ctx = user_datap;
2182 	struct rte_eth_representor_range *range;
2183 	int ret;
2184 	int rc;
2185 
2186 	SFC_ASSERT(ctx != NULL);
2187 	SFC_ASSERT(ctx->info != NULL);
2188 	SFC_ASSERT(ctx->sa != NULL);
2189 
2190 	if (type != SFC_MAE_SWITCH_PORT_REPRESENTOR) {
2191 		sfc_dbg(ctx->sa, "not a representor, skipping");
2192 		return;
2193 	}
2194 	if (ctx->info->nb_ranges >= ctx->info->nb_ranges_alloc) {
2195 		sfc_dbg(ctx->sa, "info structure is full already");
2196 		return;
2197 	}
2198 
2199 	range = &ctx->info->ranges[ctx->info->nb_ranges];
2200 	rc = sfc_mae_switch_controller_from_mapping(ctx->controllers,
2201 						    ctx->nb_controllers,
2202 						    port_datap->repr.intf,
2203 						    &range->controller);
2204 	if (rc != 0) {
2205 		sfc_err(ctx->sa, "invalid representor controller: %d",
2206 			port_datap->repr.intf);
2207 		range->controller = -1;
2208 	}
2209 	range->pf = port_datap->repr.pf;
2210 	range->id_base = switch_port_id;
2211 	range->id_end = switch_port_id;
2212 
2213 	if (port_datap->repr.vf != EFX_PCI_VF_INVALID) {
2214 		range->type = RTE_ETH_REPRESENTOR_VF;
2215 		range->vf = port_datap->repr.vf;
2216 		ret = snprintf(range->name, RTE_DEV_NAME_MAX_LEN,
2217 			       "c%dpf%dvf%d", range->controller, range->pf,
2218 			       range->vf);
2219 	} else {
2220 		range->type = RTE_ETH_REPRESENTOR_PF;
2221 		ret = snprintf(range->name, RTE_DEV_NAME_MAX_LEN,
2222 			 "c%dpf%d", range->controller, range->pf);
2223 	}
2224 	if (ret >= RTE_DEV_NAME_MAX_LEN) {
2225 		sfc_err(ctx->sa, "representor name has been truncated: %s",
2226 			range->name);
2227 	}
2228 
2229 	ctx->info->nb_ranges++;
2230 }
2231 
2232 static int
2233 sfc_representor_info_get(struct rte_eth_dev *dev,
2234 			 struct rte_eth_representor_info *info)
2235 {
2236 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2237 	struct sfc_get_representors_ctx get_repr_ctx;
2238 	const efx_nic_cfg_t *nic_cfg;
2239 	uint16_t switch_domain_id;
2240 	uint32_t nb_repr;
2241 	int controller;
2242 	int rc;
2243 
2244 	sfc_adapter_lock(sa);
2245 
2246 	if (sa->mae.status != SFC_MAE_STATUS_ADMIN) {
2247 		sfc_adapter_unlock(sa);
2248 		return -ENOTSUP;
2249 	}
2250 
2251 	rc = sfc_process_mport_journal(sa);
2252 	if (rc != 0) {
2253 		sfc_adapter_unlock(sa);
2254 		SFC_ASSERT(rc > 0);
2255 		return -rc;
2256 	}
2257 
2258 	switch_domain_id = sa->mae.switch_domain_id;
2259 
2260 	nb_repr = 0;
2261 	rc = sfc_mae_switch_ports_iterate(switch_domain_id,
2262 					  sfc_count_representors_cb,
2263 					  &nb_repr);
2264 	if (rc != 0) {
2265 		sfc_adapter_unlock(sa);
2266 		SFC_ASSERT(rc > 0);
2267 		return -rc;
2268 	}
2269 
2270 	if (info == NULL) {
2271 		sfc_adapter_unlock(sa);
2272 		return nb_repr;
2273 	}
2274 
2275 	rc = sfc_mae_switch_domain_controllers(switch_domain_id,
2276 					       &get_repr_ctx.controllers,
2277 					       &get_repr_ctx.nb_controllers);
2278 	if (rc != 0) {
2279 		sfc_adapter_unlock(sa);
2280 		SFC_ASSERT(rc > 0);
2281 		return -rc;
2282 	}
2283 
2284 	nic_cfg = efx_nic_cfg_get(sa->nic);
2285 
2286 	rc = sfc_mae_switch_domain_get_controller(switch_domain_id,
2287 						  nic_cfg->enc_intf,
2288 						  &controller);
2289 	if (rc != 0) {
2290 		sfc_err(sa, "invalid controller: %d", nic_cfg->enc_intf);
2291 		controller = -1;
2292 	}
2293 
2294 	info->controller = controller;
2295 	info->pf = nic_cfg->enc_pf;
2296 
2297 	get_repr_ctx.info = info;
2298 	get_repr_ctx.sa = sa;
2299 	get_repr_ctx.switch_domain_id = switch_domain_id;
2300 	rc = sfc_mae_switch_ports_iterate(switch_domain_id,
2301 					  sfc_get_representors_cb,
2302 					  &get_repr_ctx);
2303 	if (rc != 0) {
2304 		sfc_adapter_unlock(sa);
2305 		SFC_ASSERT(rc > 0);
2306 		return -rc;
2307 	}
2308 
2309 	sfc_adapter_unlock(sa);
2310 	return nb_repr;
2311 }
2312 
2313 static int
2314 sfc_rx_metadata_negotiate(struct rte_eth_dev *dev, uint64_t *features)
2315 {
2316 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2317 	uint64_t supported = 0;
2318 
2319 	sfc_adapter_lock(sa);
2320 
2321 	if ((sa->priv.dp_rx->features & SFC_DP_RX_FEAT_FLOW_FLAG) != 0)
2322 		supported |= RTE_ETH_RX_METADATA_USER_FLAG;
2323 
2324 	if ((sa->priv.dp_rx->features & SFC_DP_RX_FEAT_FLOW_MARK) != 0)
2325 		supported |= RTE_ETH_RX_METADATA_USER_MARK;
2326 
2327 	if (sfc_flow_tunnel_is_supported(sa))
2328 		supported |= RTE_ETH_RX_METADATA_TUNNEL_ID;
2329 
2330 	sa->negotiated_rx_metadata = supported & *features;
2331 	*features = sa->negotiated_rx_metadata;
2332 
2333 	sfc_adapter_unlock(sa);
2334 
2335 	return 0;
2336 }
2337 
2338 static const struct eth_dev_ops sfc_eth_dev_ops = {
2339 	.dev_configure			= sfc_dev_configure,
2340 	.dev_start			= sfc_dev_start,
2341 	.dev_stop			= sfc_dev_stop,
2342 	.dev_set_link_up		= sfc_dev_set_link_up,
2343 	.dev_set_link_down		= sfc_dev_set_link_down,
2344 	.dev_close			= sfc_dev_close,
2345 	.promiscuous_enable		= sfc_dev_promisc_enable,
2346 	.promiscuous_disable		= sfc_dev_promisc_disable,
2347 	.allmulticast_enable		= sfc_dev_allmulti_enable,
2348 	.allmulticast_disable		= sfc_dev_allmulti_disable,
2349 	.link_update			= sfc_dev_link_update,
2350 	.stats_get			= sfc_stats_get,
2351 	.stats_reset			= sfc_stats_reset,
2352 	.xstats_get			= sfc_xstats_get,
2353 	.xstats_reset			= sfc_stats_reset,
2354 	.xstats_get_names		= sfc_xstats_get_names,
2355 	.dev_infos_get			= sfc_dev_infos_get,
2356 	.dev_supported_ptypes_get	= sfc_dev_supported_ptypes_get,
2357 	.mtu_set			= sfc_dev_set_mtu,
2358 	.rx_queue_start			= sfc_rx_queue_start,
2359 	.rx_queue_stop			= sfc_rx_queue_stop,
2360 	.tx_queue_start			= sfc_tx_queue_start,
2361 	.tx_queue_stop			= sfc_tx_queue_stop,
2362 	.rx_queue_setup			= sfc_rx_queue_setup,
2363 	.rx_queue_release		= sfc_rx_queue_release,
2364 	.rx_queue_intr_enable		= sfc_rx_queue_intr_enable,
2365 	.rx_queue_intr_disable		= sfc_rx_queue_intr_disable,
2366 	.tx_queue_setup			= sfc_tx_queue_setup,
2367 	.tx_queue_release		= sfc_tx_queue_release,
2368 	.flow_ctrl_get			= sfc_flow_ctrl_get,
2369 	.flow_ctrl_set			= sfc_flow_ctrl_set,
2370 	.mac_addr_set			= sfc_mac_addr_set,
2371 	.udp_tunnel_port_add		= sfc_dev_udp_tunnel_port_add,
2372 	.udp_tunnel_port_del		= sfc_dev_udp_tunnel_port_del,
2373 	.reta_update			= sfc_dev_rss_reta_update,
2374 	.reta_query			= sfc_dev_rss_reta_query,
2375 	.rss_hash_update		= sfc_dev_rss_hash_update,
2376 	.rss_hash_conf_get		= sfc_dev_rss_hash_conf_get,
2377 	.flow_ops_get			= sfc_dev_flow_ops_get,
2378 	.set_mc_addr_list		= sfc_set_mc_addr_list,
2379 	.rxq_info_get			= sfc_rx_queue_info_get,
2380 	.txq_info_get			= sfc_tx_queue_info_get,
2381 	.fw_version_get			= sfc_fw_version_get,
2382 	.xstats_get_by_id		= sfc_xstats_get_by_id,
2383 	.xstats_get_names_by_id		= sfc_xstats_get_names_by_id,
2384 	.pool_ops_supported		= sfc_pool_ops_supported,
2385 	.representor_info_get		= sfc_representor_info_get,
2386 	.rx_metadata_negotiate		= sfc_rx_metadata_negotiate,
2387 };
2388 
2389 struct sfc_ethdev_init_data {
2390 	uint16_t		nb_representors;
2391 };
2392 
2393 /**
2394  * Duplicate a string in potentially shared memory required for
2395  * multi-process support.
2396  *
2397  * strdup() allocates from process-local heap/memory.
2398  */
2399 static char *
2400 sfc_strdup(const char *str)
2401 {
2402 	size_t size;
2403 	char *copy;
2404 
2405 	if (str == NULL)
2406 		return NULL;
2407 
2408 	size = strlen(str) + 1;
2409 	copy = rte_malloc(__func__, size, 0);
2410 	if (copy != NULL)
2411 		rte_memcpy(copy, str, size);
2412 
2413 	return copy;
2414 }
2415 
2416 static int
2417 sfc_eth_dev_set_ops(struct rte_eth_dev *dev)
2418 {
2419 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2420 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2421 	const struct sfc_dp_rx *dp_rx;
2422 	const struct sfc_dp_tx *dp_tx;
2423 	const efx_nic_cfg_t *encp;
2424 	unsigned int avail_caps = 0;
2425 	const char *rx_name = NULL;
2426 	const char *tx_name = NULL;
2427 	int rc;
2428 
2429 	switch (sa->family) {
2430 	case EFX_FAMILY_HUNTINGTON:
2431 	case EFX_FAMILY_MEDFORD:
2432 	case EFX_FAMILY_MEDFORD2:
2433 		avail_caps |= SFC_DP_HW_FW_CAP_EF10;
2434 		avail_caps |= SFC_DP_HW_FW_CAP_RX_EFX;
2435 		avail_caps |= SFC_DP_HW_FW_CAP_TX_EFX;
2436 		break;
2437 	case EFX_FAMILY_RIVERHEAD:
2438 		avail_caps |= SFC_DP_HW_FW_CAP_EF100;
2439 		break;
2440 	default:
2441 		break;
2442 	}
2443 
2444 	encp = efx_nic_cfg_get(sa->nic);
2445 	if (encp->enc_rx_es_super_buffer_supported)
2446 		avail_caps |= SFC_DP_HW_FW_CAP_RX_ES_SUPER_BUFFER;
2447 
2448 	rc = sfc_kvargs_process(sa, SFC_KVARG_RX_DATAPATH,
2449 				sfc_kvarg_string_handler, &rx_name);
2450 	if (rc != 0)
2451 		goto fail_kvarg_rx_datapath;
2452 
2453 	if (rx_name != NULL) {
2454 		dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, rx_name);
2455 		if (dp_rx == NULL) {
2456 			sfc_err(sa, "Rx datapath %s not found", rx_name);
2457 			rc = ENOENT;
2458 			goto fail_dp_rx;
2459 		}
2460 		if (!sfc_dp_match_hw_fw_caps(&dp_rx->dp, avail_caps)) {
2461 			sfc_err(sa,
2462 				"Insufficient Hw/FW capabilities to use Rx datapath %s",
2463 				rx_name);
2464 			rc = EINVAL;
2465 			goto fail_dp_rx_caps;
2466 		}
2467 	} else {
2468 		dp_rx = sfc_dp_find_rx_by_caps(&sfc_dp_head, avail_caps);
2469 		if (dp_rx == NULL) {
2470 			sfc_err(sa, "Rx datapath by caps %#x not found",
2471 				avail_caps);
2472 			rc = ENOENT;
2473 			goto fail_dp_rx;
2474 		}
2475 	}
2476 
2477 	sas->dp_rx_name = sfc_strdup(dp_rx->dp.name);
2478 	if (sas->dp_rx_name == NULL) {
2479 		rc = ENOMEM;
2480 		goto fail_dp_rx_name;
2481 	}
2482 
2483 	if (strcmp(dp_rx->dp.name, SFC_KVARG_DATAPATH_EF10_ESSB) == 0) {
2484 		/* FLAG and MARK are always available from Rx prefix. */
2485 		sa->negotiated_rx_metadata |= RTE_ETH_RX_METADATA_USER_FLAG;
2486 		sa->negotiated_rx_metadata |= RTE_ETH_RX_METADATA_USER_MARK;
2487 	}
2488 
2489 	sfc_notice(sa, "use %s Rx datapath", sas->dp_rx_name);
2490 
2491 	rc = sfc_kvargs_process(sa, SFC_KVARG_TX_DATAPATH,
2492 				sfc_kvarg_string_handler, &tx_name);
2493 	if (rc != 0)
2494 		goto fail_kvarg_tx_datapath;
2495 
2496 	if (tx_name != NULL) {
2497 		dp_tx = sfc_dp_find_tx_by_name(&sfc_dp_head, tx_name);
2498 		if (dp_tx == NULL) {
2499 			sfc_err(sa, "Tx datapath %s not found", tx_name);
2500 			rc = ENOENT;
2501 			goto fail_dp_tx;
2502 		}
2503 		if (!sfc_dp_match_hw_fw_caps(&dp_tx->dp, avail_caps)) {
2504 			sfc_err(sa,
2505 				"Insufficient Hw/FW capabilities to use Tx datapath %s",
2506 				tx_name);
2507 			rc = EINVAL;
2508 			goto fail_dp_tx_caps;
2509 		}
2510 	} else {
2511 		dp_tx = sfc_dp_find_tx_by_caps(&sfc_dp_head, avail_caps);
2512 		if (dp_tx == NULL) {
2513 			sfc_err(sa, "Tx datapath by caps %#x not found",
2514 				avail_caps);
2515 			rc = ENOENT;
2516 			goto fail_dp_tx;
2517 		}
2518 	}
2519 
2520 	sas->dp_tx_name = sfc_strdup(dp_tx->dp.name);
2521 	if (sas->dp_tx_name == NULL) {
2522 		rc = ENOMEM;
2523 		goto fail_dp_tx_name;
2524 	}
2525 
2526 	sfc_notice(sa, "use %s Tx datapath", sas->dp_tx_name);
2527 
2528 	sa->priv.dp_rx = dp_rx;
2529 	sa->priv.dp_tx = dp_tx;
2530 
2531 	dev->rx_pkt_burst = dp_rx->pkt_burst;
2532 	dev->tx_pkt_prepare = dp_tx->pkt_prepare;
2533 	dev->tx_pkt_burst = dp_tx->pkt_burst;
2534 
2535 	dev->rx_queue_count = sfc_rx_queue_count;
2536 	dev->rx_descriptor_status = sfc_rx_descriptor_status;
2537 	dev->tx_descriptor_status = sfc_tx_descriptor_status;
2538 	dev->dev_ops = &sfc_eth_dev_ops;
2539 
2540 	return 0;
2541 
2542 fail_dp_tx_name:
2543 fail_dp_tx_caps:
2544 fail_dp_tx:
2545 fail_kvarg_tx_datapath:
2546 	rte_free(sas->dp_rx_name);
2547 	sas->dp_rx_name = NULL;
2548 
2549 fail_dp_rx_name:
2550 fail_dp_rx_caps:
2551 fail_dp_rx:
2552 fail_kvarg_rx_datapath:
2553 	return rc;
2554 }
2555 
2556 static void
2557 sfc_eth_dev_clear_ops(struct rte_eth_dev *dev)
2558 {
2559 	struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev);
2560 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2561 
2562 	dev->dev_ops = NULL;
2563 	dev->tx_pkt_prepare = NULL;
2564 	dev->rx_pkt_burst = NULL;
2565 	dev->tx_pkt_burst = NULL;
2566 
2567 	rte_free(sas->dp_tx_name);
2568 	sas->dp_tx_name = NULL;
2569 	sa->priv.dp_tx = NULL;
2570 
2571 	rte_free(sas->dp_rx_name);
2572 	sas->dp_rx_name = NULL;
2573 	sa->priv.dp_rx = NULL;
2574 }
2575 
2576 static const struct eth_dev_ops sfc_eth_dev_secondary_ops = {
2577 	.dev_supported_ptypes_get	= sfc_dev_supported_ptypes_get,
2578 	.reta_query			= sfc_dev_rss_reta_query,
2579 	.rss_hash_conf_get		= sfc_dev_rss_hash_conf_get,
2580 	.rxq_info_get			= sfc_rx_queue_info_get,
2581 	.txq_info_get			= sfc_tx_queue_info_get,
2582 };
2583 
2584 static int
2585 sfc_eth_dev_secondary_init(struct rte_eth_dev *dev, uint32_t logtype_main)
2586 {
2587 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2588 	struct sfc_adapter_priv *sap;
2589 	const struct sfc_dp_rx *dp_rx;
2590 	const struct sfc_dp_tx *dp_tx;
2591 	int rc;
2592 
2593 	/*
2594 	 * Allocate process private data from heap, since it should not
2595 	 * be located in shared memory allocated using rte_malloc() API.
2596 	 */
2597 	sap = calloc(1, sizeof(*sap));
2598 	if (sap == NULL) {
2599 		rc = ENOMEM;
2600 		goto fail_alloc_priv;
2601 	}
2602 
2603 	sap->logtype_main = logtype_main;
2604 
2605 	dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, sas->dp_rx_name);
2606 	if (dp_rx == NULL) {
2607 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2608 			"cannot find %s Rx datapath", sas->dp_rx_name);
2609 		rc = ENOENT;
2610 		goto fail_dp_rx;
2611 	}
2612 	if (~dp_rx->features & SFC_DP_RX_FEAT_MULTI_PROCESS) {
2613 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2614 			"%s Rx datapath does not support multi-process",
2615 			sas->dp_rx_name);
2616 		rc = EINVAL;
2617 		goto fail_dp_rx_multi_process;
2618 	}
2619 
2620 	dp_tx = sfc_dp_find_tx_by_name(&sfc_dp_head, sas->dp_tx_name);
2621 	if (dp_tx == NULL) {
2622 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2623 			"cannot find %s Tx datapath", sas->dp_tx_name);
2624 		rc = ENOENT;
2625 		goto fail_dp_tx;
2626 	}
2627 	if (~dp_tx->features & SFC_DP_TX_FEAT_MULTI_PROCESS) {
2628 		SFC_LOG(sas, RTE_LOG_ERR, logtype_main,
2629 			"%s Tx datapath does not support multi-process",
2630 			sas->dp_tx_name);
2631 		rc = EINVAL;
2632 		goto fail_dp_tx_multi_process;
2633 	}
2634 
2635 	sap->dp_rx = dp_rx;
2636 	sap->dp_tx = dp_tx;
2637 
2638 	dev->process_private = sap;
2639 	dev->rx_pkt_burst = dp_rx->pkt_burst;
2640 	dev->tx_pkt_prepare = dp_tx->pkt_prepare;
2641 	dev->tx_pkt_burst = dp_tx->pkt_burst;
2642 	dev->rx_queue_count = sfc_rx_queue_count;
2643 	dev->rx_descriptor_status = sfc_rx_descriptor_status;
2644 	dev->tx_descriptor_status = sfc_tx_descriptor_status;
2645 	dev->dev_ops = &sfc_eth_dev_secondary_ops;
2646 
2647 	return 0;
2648 
2649 fail_dp_tx_multi_process:
2650 fail_dp_tx:
2651 fail_dp_rx_multi_process:
2652 fail_dp_rx:
2653 	free(sap);
2654 
2655 fail_alloc_priv:
2656 	return rc;
2657 }
2658 
2659 static void
2660 sfc_register_dp(void)
2661 {
2662 	/* Register once */
2663 	if (TAILQ_EMPTY(&sfc_dp_head)) {
2664 		/* Prefer EF10 datapath */
2665 		sfc_dp_register(&sfc_dp_head, &sfc_ef100_rx.dp);
2666 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_essb_rx.dp);
2667 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_rx.dp);
2668 		sfc_dp_register(&sfc_dp_head, &sfc_efx_rx.dp);
2669 
2670 		sfc_dp_register(&sfc_dp_head, &sfc_ef100_tx.dp);
2671 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_tx.dp);
2672 		sfc_dp_register(&sfc_dp_head, &sfc_efx_tx.dp);
2673 		sfc_dp_register(&sfc_dp_head, &sfc_ef10_simple_tx.dp);
2674 	}
2675 }
2676 
2677 static int
2678 sfc_parse_switch_mode(struct sfc_adapter *sa, bool has_representors)
2679 {
2680 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
2681 	const char *switch_mode = NULL;
2682 	int rc;
2683 
2684 	sfc_log_init(sa, "entry");
2685 
2686 	rc = sfc_kvargs_process(sa, SFC_KVARG_SWITCH_MODE,
2687 				sfc_kvarg_string_handler, &switch_mode);
2688 	if (rc != 0)
2689 		goto fail_kvargs;
2690 
2691 	if (switch_mode == NULL) {
2692 		sa->switchdev = encp->enc_mae_admin &&
2693 				(!encp->enc_datapath_cap_evb ||
2694 				 has_representors);
2695 	} else if (strcasecmp(switch_mode, SFC_KVARG_SWITCH_MODE_LEGACY) == 0) {
2696 		sa->switchdev = false;
2697 	} else if (strcasecmp(switch_mode,
2698 			      SFC_KVARG_SWITCH_MODE_SWITCHDEV) == 0) {
2699 		sa->switchdev = true;
2700 	} else {
2701 		sfc_err(sa, "invalid switch mode device argument '%s'",
2702 			switch_mode);
2703 		rc = EINVAL;
2704 		goto fail_mode;
2705 	}
2706 
2707 	sfc_log_init(sa, "done");
2708 
2709 	return 0;
2710 
2711 fail_mode:
2712 fail_kvargs:
2713 	sfc_log_init(sa, "failed: %s", rte_strerror(rc));
2714 
2715 	return rc;
2716 }
2717 
2718 static int
2719 sfc_eth_dev_init(struct rte_eth_dev *dev, void *init_params)
2720 {
2721 	struct sfc_adapter_shared *sas = sfc_adapter_shared_by_eth_dev(dev);
2722 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2723 	struct sfc_ethdev_init_data *init_data = init_params;
2724 	uint32_t logtype_main;
2725 	struct sfc_adapter *sa;
2726 	int rc;
2727 	const efx_nic_cfg_t *encp;
2728 	const struct rte_ether_addr *from;
2729 	int ret;
2730 
2731 	if (sfc_efx_dev_class_get(pci_dev->device.devargs) !=
2732 			SFC_EFX_DEV_CLASS_NET) {
2733 		SFC_GENERIC_LOG(DEBUG,
2734 			"Incompatible device class: skip probing, should be probed by other sfc driver.");
2735 		return 1;
2736 	}
2737 
2738 	rc = sfc_dp_mport_register();
2739 	if (rc != 0)
2740 		return rc;
2741 
2742 	sfc_register_dp();
2743 
2744 	logtype_main = sfc_register_logtype(&pci_dev->addr,
2745 					    SFC_LOGTYPE_MAIN_STR,
2746 					    RTE_LOG_NOTICE);
2747 
2748 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2749 		return -sfc_eth_dev_secondary_init(dev, logtype_main);
2750 
2751 	/* Required for logging */
2752 	ret = snprintf(sas->log_prefix, sizeof(sas->log_prefix),
2753 			"PMD: sfc_efx " PCI_PRI_FMT " #%" PRIu16 ": ",
2754 			pci_dev->addr.domain, pci_dev->addr.bus,
2755 			pci_dev->addr.devid, pci_dev->addr.function,
2756 			dev->data->port_id);
2757 	if (ret < 0 || ret >= (int)sizeof(sas->log_prefix)) {
2758 		SFC_GENERIC_LOG(ERR,
2759 			"reserved log prefix is too short for " PCI_PRI_FMT,
2760 			pci_dev->addr.domain, pci_dev->addr.bus,
2761 			pci_dev->addr.devid, pci_dev->addr.function);
2762 		return -EINVAL;
2763 	}
2764 	sas->pci_addr = pci_dev->addr;
2765 	sas->port_id = dev->data->port_id;
2766 
2767 	/*
2768 	 * Allocate process private data from heap, since it should not
2769 	 * be located in shared memory allocated using rte_malloc() API.
2770 	 */
2771 	sa = calloc(1, sizeof(*sa));
2772 	if (sa == NULL) {
2773 		rc = ENOMEM;
2774 		goto fail_alloc_sa;
2775 	}
2776 
2777 	dev->process_private = sa;
2778 
2779 	/* Required for logging */
2780 	sa->priv.shared = sas;
2781 	sa->priv.logtype_main = logtype_main;
2782 
2783 	sa->eth_dev = dev;
2784 
2785 	/* Copy PCI device info to the dev->data */
2786 	rte_eth_copy_pci_info(dev, pci_dev);
2787 	dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE;
2788 
2789 	rc = sfc_kvargs_parse(sa);
2790 	if (rc != 0)
2791 		goto fail_kvargs_parse;
2792 
2793 	sfc_log_init(sa, "entry");
2794 
2795 	dev->data->mac_addrs = rte_zmalloc("sfc", RTE_ETHER_ADDR_LEN, 0);
2796 	if (dev->data->mac_addrs == NULL) {
2797 		rc = ENOMEM;
2798 		goto fail_mac_addrs;
2799 	}
2800 
2801 	sfc_adapter_lock_init(sa);
2802 	sfc_adapter_lock(sa);
2803 
2804 	sfc_log_init(sa, "probing");
2805 	rc = sfc_probe(sa);
2806 	if (rc != 0)
2807 		goto fail_probe;
2808 
2809 	/*
2810 	 * Selecting a default switch mode requires the NIC to be probed and
2811 	 * to have its capabilities filled in.
2812 	 */
2813 	rc = sfc_parse_switch_mode(sa, init_data->nb_representors > 0);
2814 	if (rc != 0)
2815 		goto fail_switch_mode;
2816 
2817 	sfc_log_init(sa, "set device ops");
2818 	rc = sfc_eth_dev_set_ops(dev);
2819 	if (rc != 0)
2820 		goto fail_set_ops;
2821 
2822 	sfc_log_init(sa, "attaching");
2823 	rc = sfc_attach(sa);
2824 	if (rc != 0)
2825 		goto fail_attach;
2826 
2827 	if (sa->switchdev && sa->mae.status != SFC_MAE_STATUS_ADMIN) {
2828 		sfc_err(sa,
2829 			"failed to enable switchdev mode without admin MAE privilege");
2830 		rc = ENOTSUP;
2831 		goto fail_switchdev_no_mae;
2832 	}
2833 
2834 	encp = efx_nic_cfg_get(sa->nic);
2835 
2836 	/*
2837 	 * The arguments are really reverse order in comparison to
2838 	 * Linux kernel. Copy from NIC config to Ethernet device data.
2839 	 */
2840 	from = (const struct rte_ether_addr *)(encp->enc_mac_addr);
2841 	rte_ether_addr_copy(from, &dev->data->mac_addrs[0]);
2842 
2843 	sfc_adapter_unlock(sa);
2844 
2845 	sfc_log_init(sa, "done");
2846 	return 0;
2847 
2848 fail_switchdev_no_mae:
2849 	sfc_detach(sa);
2850 
2851 fail_attach:
2852 	sfc_eth_dev_clear_ops(dev);
2853 
2854 fail_set_ops:
2855 fail_switch_mode:
2856 	sfc_unprobe(sa);
2857 
2858 fail_probe:
2859 	sfc_adapter_unlock(sa);
2860 	sfc_adapter_lock_fini(sa);
2861 	rte_free(dev->data->mac_addrs);
2862 	dev->data->mac_addrs = NULL;
2863 
2864 fail_mac_addrs:
2865 	sfc_kvargs_cleanup(sa);
2866 
2867 fail_kvargs_parse:
2868 	sfc_log_init(sa, "failed %d", rc);
2869 	dev->process_private = NULL;
2870 	free(sa);
2871 
2872 fail_alloc_sa:
2873 	SFC_ASSERT(rc > 0);
2874 	return -rc;
2875 }
2876 
2877 static int
2878 sfc_eth_dev_uninit(struct rte_eth_dev *dev)
2879 {
2880 	sfc_dev_close(dev);
2881 
2882 	return 0;
2883 }
2884 
2885 static const struct rte_pci_id pci_id_sfc_efx_map[] = {
2886 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_FARMINGDALE) },
2887 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_FARMINGDALE_VF) },
2888 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_GREENPORT) },
2889 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_GREENPORT_VF) },
2890 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD) },
2891 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD_VF) },
2892 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD2) },
2893 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_SFC, EFX_PCI_DEVID_MEDFORD2_VF) },
2894 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_XILINX, EFX_PCI_DEVID_RIVERHEAD) },
2895 	{ RTE_PCI_DEVICE(EFX_PCI_VENID_XILINX, EFX_PCI_DEVID_RIVERHEAD_VF) },
2896 	{ .vendor_id = 0 /* sentinel */ }
2897 };
2898 
2899 static int
2900 sfc_parse_rte_devargs(const char *args, struct rte_eth_devargs *devargs)
2901 {
2902 	struct rte_eth_devargs eth_da = { .nb_representor_ports = 0 };
2903 	int rc;
2904 
2905 	if (args != NULL) {
2906 		rc = rte_eth_devargs_parse(args, &eth_da);
2907 		if (rc != 0) {
2908 			SFC_GENERIC_LOG(ERR,
2909 					"Failed to parse generic devargs '%s'",
2910 					args);
2911 			return rc;
2912 		}
2913 	}
2914 
2915 	*devargs = eth_da;
2916 
2917 	return 0;
2918 }
2919 
2920 static int
2921 sfc_eth_dev_find_or_create(struct rte_pci_device *pci_dev,
2922 			   struct sfc_ethdev_init_data *init_data,
2923 			   struct rte_eth_dev **devp,
2924 			   bool *dev_created)
2925 {
2926 	struct rte_eth_dev *dev;
2927 	bool created = false;
2928 	int rc;
2929 
2930 	dev = rte_eth_dev_allocated(pci_dev->device.name);
2931 	if (dev == NULL) {
2932 		rc = rte_eth_dev_create(&pci_dev->device, pci_dev->device.name,
2933 					sizeof(struct sfc_adapter_shared),
2934 					eth_dev_pci_specific_init, pci_dev,
2935 					sfc_eth_dev_init, init_data);
2936 		if (rc != 0) {
2937 			SFC_GENERIC_LOG(ERR, "Failed to create sfc ethdev '%s'",
2938 					pci_dev->device.name);
2939 			return rc;
2940 		}
2941 
2942 		created = true;
2943 
2944 		dev = rte_eth_dev_allocated(pci_dev->device.name);
2945 		if (dev == NULL) {
2946 			SFC_GENERIC_LOG(ERR,
2947 				"Failed to find allocated sfc ethdev '%s'",
2948 				pci_dev->device.name);
2949 			return -ENODEV;
2950 		}
2951 	}
2952 
2953 	*devp = dev;
2954 	*dev_created = created;
2955 
2956 	return 0;
2957 }
2958 
2959 static int
2960 sfc_eth_dev_create_repr(struct sfc_adapter *sa,
2961 			efx_pcie_interface_t controller,
2962 			uint16_t port,
2963 			uint16_t repr_port,
2964 			enum rte_eth_representor_type type)
2965 {
2966 	struct sfc_repr_entity_info entity;
2967 	efx_mport_sel_t mport_sel;
2968 	int rc;
2969 
2970 	switch (type) {
2971 	case RTE_ETH_REPRESENTOR_NONE:
2972 		return 0;
2973 	case RTE_ETH_REPRESENTOR_VF:
2974 	case RTE_ETH_REPRESENTOR_PF:
2975 		break;
2976 	case RTE_ETH_REPRESENTOR_SF:
2977 		sfc_err(sa, "SF representors are not supported");
2978 		return ENOTSUP;
2979 	default:
2980 		sfc_err(sa, "unknown representor type: %d", type);
2981 		return ENOTSUP;
2982 	}
2983 
2984 	rc = efx_mae_mport_by_pcie_mh_function(controller,
2985 					       port,
2986 					       repr_port,
2987 					       &mport_sel);
2988 	if (rc != 0) {
2989 		sfc_err(sa,
2990 			"failed to get m-port selector for controller %u port %u repr_port %u: %s",
2991 			controller, port, repr_port, rte_strerror(-rc));
2992 		return rc;
2993 	}
2994 
2995 	memset(&entity, 0, sizeof(entity));
2996 	entity.type = type;
2997 	entity.intf = controller;
2998 	entity.pf = port;
2999 	entity.vf = repr_port;
3000 
3001 	rc = sfc_repr_create(sa->eth_dev, &entity, sa->mae.switch_domain_id,
3002 			     &mport_sel);
3003 	if (rc != 0) {
3004 		sfc_err(sa,
3005 			"failed to create representor for controller %u port %u repr_port %u: %s",
3006 			controller, port, repr_port, rte_strerror(-rc));
3007 		return rc;
3008 	}
3009 
3010 	return 0;
3011 }
3012 
3013 static int
3014 sfc_eth_dev_create_repr_port(struct sfc_adapter *sa,
3015 			     const struct rte_eth_devargs *eth_da,
3016 			     efx_pcie_interface_t controller,
3017 			     uint16_t port)
3018 {
3019 	int first_error = 0;
3020 	uint16_t i;
3021 	int rc;
3022 
3023 	if (eth_da->type == RTE_ETH_REPRESENTOR_PF) {
3024 		return sfc_eth_dev_create_repr(sa, controller, port,
3025 					       EFX_PCI_VF_INVALID,
3026 					       eth_da->type);
3027 	}
3028 
3029 	for (i = 0; i < eth_da->nb_representor_ports; i++) {
3030 		rc = sfc_eth_dev_create_repr(sa, controller, port,
3031 					     eth_da->representor_ports[i],
3032 					     eth_da->type);
3033 		if (rc != 0 && first_error == 0)
3034 			first_error = rc;
3035 	}
3036 
3037 	return first_error;
3038 }
3039 
3040 static int
3041 sfc_eth_dev_create_repr_controller(struct sfc_adapter *sa,
3042 				   const struct rte_eth_devargs *eth_da,
3043 				   efx_pcie_interface_t controller)
3044 {
3045 	const efx_nic_cfg_t *encp;
3046 	int first_error = 0;
3047 	uint16_t default_port;
3048 	uint16_t i;
3049 	int rc;
3050 
3051 	if (eth_da->nb_ports == 0) {
3052 		encp = efx_nic_cfg_get(sa->nic);
3053 		default_port = encp->enc_intf == controller ? encp->enc_pf : 0;
3054 		return sfc_eth_dev_create_repr_port(sa, eth_da, controller,
3055 						    default_port);
3056 	}
3057 
3058 	for (i = 0; i < eth_da->nb_ports; i++) {
3059 		rc = sfc_eth_dev_create_repr_port(sa, eth_da, controller,
3060 						  eth_da->ports[i]);
3061 		if (rc != 0 && first_error == 0)
3062 			first_error = rc;
3063 	}
3064 
3065 	return first_error;
3066 }
3067 
3068 static int
3069 sfc_eth_dev_create_representors(struct rte_eth_dev *dev,
3070 				const struct rte_eth_devargs *eth_da)
3071 {
3072 	efx_pcie_interface_t intf;
3073 	const efx_nic_cfg_t *encp;
3074 	struct sfc_adapter *sa;
3075 	uint16_t switch_domain_id;
3076 	uint16_t i;
3077 	int rc;
3078 
3079 	sa = sfc_adapter_by_eth_dev(dev);
3080 	switch_domain_id = sa->mae.switch_domain_id;
3081 
3082 	switch (eth_da->type) {
3083 	case RTE_ETH_REPRESENTOR_NONE:
3084 		return 0;
3085 	case RTE_ETH_REPRESENTOR_PF:
3086 	case RTE_ETH_REPRESENTOR_VF:
3087 		break;
3088 	case RTE_ETH_REPRESENTOR_SF:
3089 		sfc_err(sa, "SF representors are not supported");
3090 		return -ENOTSUP;
3091 	default:
3092 		sfc_err(sa, "unknown representor type: %d",
3093 			eth_da->type);
3094 		return -ENOTSUP;
3095 	}
3096 
3097 	if (!sa->switchdev) {
3098 		sfc_err(sa, "cannot create representors in non-switchdev mode");
3099 		return -EINVAL;
3100 	}
3101 
3102 	if (!sfc_repr_available(sfc_sa2shared(sa))) {
3103 		sfc_err(sa, "cannot create representors: unsupported");
3104 
3105 		return -ENOTSUP;
3106 	}
3107 
3108 	/*
3109 	 * This is needed to construct the DPDK controller -> EFX interface
3110 	 * mapping.
3111 	 */
3112 	sfc_adapter_lock(sa);
3113 	rc = sfc_process_mport_journal(sa);
3114 	sfc_adapter_unlock(sa);
3115 	if (rc != 0) {
3116 		SFC_ASSERT(rc > 0);
3117 		return -rc;
3118 	}
3119 
3120 	if (eth_da->nb_mh_controllers > 0) {
3121 		for (i = 0; i < eth_da->nb_mh_controllers; i++) {
3122 			rc = sfc_mae_switch_domain_get_intf(switch_domain_id,
3123 						eth_da->mh_controllers[i],
3124 						&intf);
3125 			if (rc != 0) {
3126 				sfc_err(sa, "failed to get representor");
3127 				continue;
3128 			}
3129 			sfc_eth_dev_create_repr_controller(sa, eth_da, intf);
3130 		}
3131 	} else {
3132 		encp = efx_nic_cfg_get(sa->nic);
3133 		sfc_eth_dev_create_repr_controller(sa, eth_da, encp->enc_intf);
3134 	}
3135 
3136 	return 0;
3137 }
3138 
3139 static int sfc_eth_dev_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
3140 	struct rte_pci_device *pci_dev)
3141 {
3142 	struct sfc_ethdev_init_data init_data;
3143 	struct rte_eth_devargs eth_da;
3144 	struct rte_eth_dev *dev;
3145 	bool dev_created;
3146 	int rc;
3147 
3148 	if (pci_dev->device.devargs != NULL) {
3149 		rc = sfc_parse_rte_devargs(pci_dev->device.devargs->args,
3150 					   &eth_da);
3151 		if (rc != 0)
3152 			return rc;
3153 	} else {
3154 		memset(&eth_da, 0, sizeof(eth_da));
3155 	}
3156 
3157 	/* If no VF representors specified, check for PF ones */
3158 	if (eth_da.nb_representor_ports > 0)
3159 		init_data.nb_representors = eth_da.nb_representor_ports;
3160 	else
3161 		init_data.nb_representors = eth_da.nb_ports;
3162 
3163 	if (init_data.nb_representors > 0 &&
3164 	    rte_eal_process_type() != RTE_PROC_PRIMARY) {
3165 		SFC_GENERIC_LOG(ERR,
3166 			"Create representors from secondary process not supported, dev '%s'",
3167 			pci_dev->device.name);
3168 		return -ENOTSUP;
3169 	}
3170 
3171 	/*
3172 	 * Driver supports RTE_PCI_DRV_PROBE_AGAIN. Hence create device only
3173 	 * if it does not already exist. Re-probing an existing device is
3174 	 * expected to allow additional representors to be configured.
3175 	 */
3176 	rc = sfc_eth_dev_find_or_create(pci_dev, &init_data, &dev,
3177 					&dev_created);
3178 	if (rc != 0)
3179 		return rc;
3180 
3181 	rc = sfc_eth_dev_create_representors(dev, &eth_da);
3182 	if (rc != 0) {
3183 		if (dev_created)
3184 			(void)rte_eth_dev_destroy(dev, sfc_eth_dev_uninit);
3185 
3186 		return rc;
3187 	}
3188 
3189 	return 0;
3190 }
3191 
3192 static int sfc_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
3193 {
3194 	return rte_eth_dev_pci_generic_remove(pci_dev, sfc_eth_dev_uninit);
3195 }
3196 
3197 static struct rte_pci_driver sfc_efx_pmd = {
3198 	.id_table = pci_id_sfc_efx_map,
3199 	.drv_flags =
3200 		RTE_PCI_DRV_INTR_LSC |
3201 		RTE_PCI_DRV_NEED_MAPPING |
3202 		RTE_PCI_DRV_PROBE_AGAIN,
3203 	.probe = sfc_eth_dev_pci_probe,
3204 	.remove = sfc_eth_dev_pci_remove,
3205 };
3206 
3207 RTE_PMD_REGISTER_PCI(net_sfc_efx, sfc_efx_pmd);
3208 RTE_PMD_REGISTER_PCI_TABLE(net_sfc_efx, pci_id_sfc_efx_map);
3209 RTE_PMD_REGISTER_KMOD_DEP(net_sfc_efx, "* igb_uio | uio_pci_generic | vfio-pci");
3210 RTE_PMD_REGISTER_PARAM_STRING(net_sfc_efx,
3211 	SFC_KVARG_SWITCH_MODE "=" SFC_KVARG_VALUES_SWITCH_MODE " "
3212 	SFC_KVARG_RX_DATAPATH "=" SFC_KVARG_VALUES_RX_DATAPATH " "
3213 	SFC_KVARG_TX_DATAPATH "=" SFC_KVARG_VALUES_TX_DATAPATH " "
3214 	SFC_KVARG_PERF_PROFILE "=" SFC_KVARG_VALUES_PERF_PROFILE " "
3215 	SFC_KVARG_FW_VARIANT "=" SFC_KVARG_VALUES_FW_VARIANT " "
3216 	SFC_KVARG_RXD_WAIT_TIMEOUT_NS "=<long> "
3217 	SFC_KVARG_STATS_UPDATE_PERIOD_MS "=<long>");
3218 
3219 RTE_INIT(sfc_driver_register_logtype)
3220 {
3221 	int ret;
3222 
3223 	ret = rte_log_register_type_and_pick_level(SFC_LOGTYPE_PREFIX "driver",
3224 						   RTE_LOG_NOTICE);
3225 	sfc_logtype_driver = (ret < 0) ? RTE_LOGTYPE_PMD : ret;
3226 }
3227