xref: /dpdk/drivers/net/sfc/sfc.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2016-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9 
10 /* sysconf() */
11 #include <unistd.h>
12 
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
15 
16 #include "efx.h"
17 
18 #include "sfc.h"
19 #include "sfc_debug.h"
20 #include "sfc_log.h"
21 #include "sfc_ev.h"
22 #include "sfc_rx.h"
23 #include "sfc_tx.h"
24 #include "sfc_kvargs.h"
25 #include "sfc_tweak.h"
26 
27 
28 int
29 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
30 	      size_t len, int socket_id, efsys_mem_t *esmp)
31 {
32 	const struct rte_memzone *mz;
33 
34 	sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
35 		     name, id, len, socket_id);
36 
37 	mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
38 				      sysconf(_SC_PAGESIZE), socket_id);
39 	if (mz == NULL) {
40 		sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
41 			name, (unsigned int)id, (unsigned int)len, socket_id,
42 			rte_strerror(rte_errno));
43 		return ENOMEM;
44 	}
45 
46 	esmp->esm_addr = mz->iova;
47 	if (esmp->esm_addr == RTE_BAD_IOVA) {
48 		(void)rte_memzone_free(mz);
49 		return EFAULT;
50 	}
51 
52 	esmp->esm_mz = mz;
53 	esmp->esm_base = mz->addr;
54 
55 	sfc_info(sa,
56 		 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
57 		 name, id, len, socket_id, esmp->esm_base,
58 		 (unsigned long)esmp->esm_addr);
59 
60 	return 0;
61 }
62 
63 void
64 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
65 {
66 	int rc;
67 
68 	sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
69 
70 	rc = rte_memzone_free(esmp->esm_mz);
71 	if (rc != 0)
72 		sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
73 
74 	memset(esmp, 0, sizeof(*esmp));
75 }
76 
77 static uint32_t
78 sfc_phy_cap_from_link_speeds(uint32_t speeds)
79 {
80 	uint32_t phy_caps = 0;
81 
82 	if (~speeds & ETH_LINK_SPEED_FIXED) {
83 		phy_caps |= (1 << EFX_PHY_CAP_AN);
84 		/*
85 		 * If no speeds are specified in the mask, any supported
86 		 * may be negotiated
87 		 */
88 		if (speeds == ETH_LINK_SPEED_AUTONEG)
89 			phy_caps |=
90 				(1 << EFX_PHY_CAP_1000FDX) |
91 				(1 << EFX_PHY_CAP_10000FDX) |
92 				(1 << EFX_PHY_CAP_25000FDX) |
93 				(1 << EFX_PHY_CAP_40000FDX) |
94 				(1 << EFX_PHY_CAP_50000FDX) |
95 				(1 << EFX_PHY_CAP_100000FDX);
96 	}
97 	if (speeds & ETH_LINK_SPEED_1G)
98 		phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
99 	if (speeds & ETH_LINK_SPEED_10G)
100 		phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
101 	if (speeds & ETH_LINK_SPEED_25G)
102 		phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
103 	if (speeds & ETH_LINK_SPEED_40G)
104 		phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
105 	if (speeds & ETH_LINK_SPEED_50G)
106 		phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
107 	if (speeds & ETH_LINK_SPEED_100G)
108 		phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
109 
110 	return phy_caps;
111 }
112 
113 /*
114  * Check requested device level configuration.
115  * Receive and transmit configuration is checked in corresponding
116  * modules.
117  */
118 static int
119 sfc_check_conf(struct sfc_adapter *sa)
120 {
121 	const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
122 	int rc = 0;
123 
124 	sa->port.phy_adv_cap =
125 		sfc_phy_cap_from_link_speeds(conf->link_speeds) &
126 		sa->port.phy_adv_cap_mask;
127 	if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
128 		sfc_err(sa, "No link speeds from mask %#x are supported",
129 			conf->link_speeds);
130 		rc = EINVAL;
131 	}
132 
133 #if !EFSYS_OPT_LOOPBACK
134 	if (conf->lpbk_mode != 0) {
135 		sfc_err(sa, "Loopback not supported");
136 		rc = EINVAL;
137 	}
138 #endif
139 
140 	if (conf->dcb_capability_en != 0) {
141 		sfc_err(sa, "Priority-based flow control not supported");
142 		rc = EINVAL;
143 	}
144 
145 	if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
146 		sfc_err(sa, "Flow Director not supported");
147 		rc = EINVAL;
148 	}
149 
150 	if ((conf->intr_conf.lsc != 0) &&
151 	    (sa->intr.type != EFX_INTR_LINE) &&
152 	    (sa->intr.type != EFX_INTR_MESSAGE)) {
153 		sfc_err(sa, "Link status change interrupt not supported");
154 		rc = EINVAL;
155 	}
156 
157 	if (conf->intr_conf.rxq != 0 &&
158 	    (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
159 		sfc_err(sa, "Receive queue interrupt not supported");
160 		rc = EINVAL;
161 	}
162 
163 	return rc;
164 }
165 
166 /*
167  * Find out maximum number of receive and transmit queues which could be
168  * advertised.
169  *
170  * NIC is kept initialized on success to allow other modules acquire
171  * defaults and capabilities.
172  */
173 static int
174 sfc_estimate_resource_limits(struct sfc_adapter *sa)
175 {
176 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
177 	efx_drv_limits_t limits;
178 	int rc;
179 	uint32_t evq_allocated;
180 	uint32_t rxq_allocated;
181 	uint32_t txq_allocated;
182 
183 	memset(&limits, 0, sizeof(limits));
184 
185 	/* Request at least one Rx and Tx queue */
186 	limits.edl_min_rxq_count = 1;
187 	limits.edl_min_txq_count = 1;
188 	/* Management event queue plus event queue for each Tx and Rx queue */
189 	limits.edl_min_evq_count =
190 		1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
191 
192 	/* Divide by number of functions to guarantee that all functions
193 	 * will get promised resources
194 	 */
195 	/* FIXME Divide by number of functions (not 2) below */
196 	limits.edl_max_evq_count = encp->enc_evq_limit / 2;
197 	SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
198 
199 	/* Split equally between receive and transmit */
200 	limits.edl_max_rxq_count =
201 		MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
202 	SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
203 
204 	limits.edl_max_txq_count =
205 		MIN(encp->enc_txq_limit,
206 		    limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
207 
208 	if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
209 		limits.edl_max_txq_count =
210 			MIN(limits.edl_max_txq_count,
211 			    encp->enc_fw_assisted_tso_v2_n_contexts /
212 			    encp->enc_hw_pf_count);
213 
214 	SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
215 
216 	/* Configure the minimum required resources needed for the
217 	 * driver to operate, and the maximum desired resources that the
218 	 * driver is capable of using.
219 	 */
220 	efx_nic_set_drv_limits(sa->nic, &limits);
221 
222 	sfc_log_init(sa, "init nic");
223 	rc = efx_nic_init(sa->nic);
224 	if (rc != 0)
225 		goto fail_nic_init;
226 
227 	/* Find resource dimensions assigned by firmware to this function */
228 	rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
229 				 &txq_allocated);
230 	if (rc != 0)
231 		goto fail_get_vi_pool;
232 
233 	/* It still may allocate more than maximum, ensure limit */
234 	evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
235 	rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
236 	txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
237 
238 	/* Subtract management EVQ not used for traffic */
239 	SFC_ASSERT(evq_allocated > 0);
240 	evq_allocated--;
241 
242 	/* Right now we use separate EVQ for Rx and Tx */
243 	sa->rxq_max = MIN(rxq_allocated, evq_allocated / 2);
244 	sa->txq_max = MIN(txq_allocated, evq_allocated - sa->rxq_max);
245 
246 	/* Keep NIC initialized */
247 	return 0;
248 
249 fail_get_vi_pool:
250 	efx_nic_fini(sa->nic);
251 fail_nic_init:
252 	return rc;
253 }
254 
255 static int
256 sfc_set_drv_limits(struct sfc_adapter *sa)
257 {
258 	const struct rte_eth_dev_data *data = sa->eth_dev->data;
259 	efx_drv_limits_t lim;
260 
261 	memset(&lim, 0, sizeof(lim));
262 
263 	/* Limits are strict since take into account initial estimation */
264 	lim.edl_min_evq_count = lim.edl_max_evq_count =
265 		1 + data->nb_rx_queues + data->nb_tx_queues;
266 	lim.edl_min_rxq_count = lim.edl_max_rxq_count = data->nb_rx_queues;
267 	lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues;
268 
269 	return efx_nic_set_drv_limits(sa->nic, &lim);
270 }
271 
272 static int
273 sfc_set_fw_subvariant(struct sfc_adapter *sa)
274 {
275 	struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
276 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
277 	uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
278 	unsigned int txq_index;
279 	efx_nic_fw_subvariant_t req_fw_subvariant;
280 	efx_nic_fw_subvariant_t cur_fw_subvariant;
281 	int rc;
282 
283 	if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
284 		sfc_info(sa, "no-Tx-checksum subvariant not supported");
285 		return 0;
286 	}
287 
288 	for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
289 		struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
290 
291 		if (txq_info->state & SFC_TXQ_INITIALIZED)
292 			tx_offloads |= txq_info->offloads;
293 	}
294 
295 	if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
296 			   DEV_TX_OFFLOAD_TCP_CKSUM |
297 			   DEV_TX_OFFLOAD_UDP_CKSUM |
298 			   DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
299 		req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
300 	else
301 		req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
302 
303 	rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
304 	if (rc != 0) {
305 		sfc_err(sa, "failed to get FW subvariant: %d", rc);
306 		return rc;
307 	}
308 	sfc_info(sa, "FW subvariant is %u vs required %u",
309 		 cur_fw_subvariant, req_fw_subvariant);
310 
311 	if (cur_fw_subvariant == req_fw_subvariant)
312 		return 0;
313 
314 	rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
315 	if (rc != 0) {
316 		sfc_err(sa, "failed to set FW subvariant %u: %d",
317 			req_fw_subvariant, rc);
318 		return rc;
319 	}
320 	sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
321 
322 	return 0;
323 }
324 
325 static int
326 sfc_try_start(struct sfc_adapter *sa)
327 {
328 	const efx_nic_cfg_t *encp;
329 	int rc;
330 
331 	sfc_log_init(sa, "entry");
332 
333 	SFC_ASSERT(sfc_adapter_is_locked(sa));
334 	SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
335 
336 	sfc_log_init(sa, "set FW subvariant");
337 	rc = sfc_set_fw_subvariant(sa);
338 	if (rc != 0)
339 		goto fail_set_fw_subvariant;
340 
341 	sfc_log_init(sa, "set resource limits");
342 	rc = sfc_set_drv_limits(sa);
343 	if (rc != 0)
344 		goto fail_set_drv_limits;
345 
346 	sfc_log_init(sa, "init nic");
347 	rc = efx_nic_init(sa->nic);
348 	if (rc != 0)
349 		goto fail_nic_init;
350 
351 	encp = efx_nic_cfg_get(sa->nic);
352 
353 	/*
354 	 * Refresh (since it may change on NIC reset/restart) a copy of
355 	 * supported tunnel encapsulations in shared memory to be used
356 	 * on supported Rx packet type classes get.
357 	 */
358 	sa->priv.shared->tunnel_encaps =
359 		encp->enc_tunnel_encapsulations_supported;
360 
361 	if (encp->enc_tunnel_encapsulations_supported != 0) {
362 		sfc_log_init(sa, "apply tunnel config");
363 		rc = efx_tunnel_reconfigure(sa->nic);
364 		if (rc != 0)
365 			goto fail_tunnel_reconfigure;
366 	}
367 
368 	rc = sfc_intr_start(sa);
369 	if (rc != 0)
370 		goto fail_intr_start;
371 
372 	rc = sfc_ev_start(sa);
373 	if (rc != 0)
374 		goto fail_ev_start;
375 
376 	rc = sfc_port_start(sa);
377 	if (rc != 0)
378 		goto fail_port_start;
379 
380 	rc = sfc_rx_start(sa);
381 	if (rc != 0)
382 		goto fail_rx_start;
383 
384 	rc = sfc_tx_start(sa);
385 	if (rc != 0)
386 		goto fail_tx_start;
387 
388 	rc = sfc_flow_start(sa);
389 	if (rc != 0)
390 		goto fail_flows_insert;
391 
392 	sfc_log_init(sa, "done");
393 	return 0;
394 
395 fail_flows_insert:
396 	sfc_tx_stop(sa);
397 
398 fail_tx_start:
399 	sfc_rx_stop(sa);
400 
401 fail_rx_start:
402 	sfc_port_stop(sa);
403 
404 fail_port_start:
405 	sfc_ev_stop(sa);
406 
407 fail_ev_start:
408 	sfc_intr_stop(sa);
409 
410 fail_intr_start:
411 fail_tunnel_reconfigure:
412 	efx_nic_fini(sa->nic);
413 
414 fail_nic_init:
415 fail_set_drv_limits:
416 fail_set_fw_subvariant:
417 	sfc_log_init(sa, "failed %d", rc);
418 	return rc;
419 }
420 
421 int
422 sfc_start(struct sfc_adapter *sa)
423 {
424 	unsigned int start_tries = 3;
425 	int rc;
426 
427 	sfc_log_init(sa, "entry");
428 
429 	SFC_ASSERT(sfc_adapter_is_locked(sa));
430 
431 	switch (sa->state) {
432 	case SFC_ADAPTER_CONFIGURED:
433 		break;
434 	case SFC_ADAPTER_STARTED:
435 		sfc_notice(sa, "already started");
436 		return 0;
437 	default:
438 		rc = EINVAL;
439 		goto fail_bad_state;
440 	}
441 
442 	sa->state = SFC_ADAPTER_STARTING;
443 
444 	rc = 0;
445 	do {
446 		/*
447 		 * FIXME Try to recreate vSwitch on start retry.
448 		 * vSwitch is absent after MC reboot like events and
449 		 * we should recreate it. May be we need proper
450 		 * indication instead of guessing.
451 		 */
452 		if (rc != 0) {
453 			sfc_sriov_vswitch_destroy(sa);
454 			rc = sfc_sriov_vswitch_create(sa);
455 			if (rc != 0)
456 				goto fail_sriov_vswitch_create;
457 		}
458 		rc = sfc_try_start(sa);
459 	} while ((--start_tries > 0) &&
460 		 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
461 
462 	if (rc != 0)
463 		goto fail_try_start;
464 
465 	sa->state = SFC_ADAPTER_STARTED;
466 	sfc_log_init(sa, "done");
467 	return 0;
468 
469 fail_try_start:
470 fail_sriov_vswitch_create:
471 	sa->state = SFC_ADAPTER_CONFIGURED;
472 fail_bad_state:
473 	sfc_log_init(sa, "failed %d", rc);
474 	return rc;
475 }
476 
477 void
478 sfc_stop(struct sfc_adapter *sa)
479 {
480 	sfc_log_init(sa, "entry");
481 
482 	SFC_ASSERT(sfc_adapter_is_locked(sa));
483 
484 	switch (sa->state) {
485 	case SFC_ADAPTER_STARTED:
486 		break;
487 	case SFC_ADAPTER_CONFIGURED:
488 		sfc_notice(sa, "already stopped");
489 		return;
490 	default:
491 		sfc_err(sa, "stop in unexpected state %u", sa->state);
492 		SFC_ASSERT(B_FALSE);
493 		return;
494 	}
495 
496 	sa->state = SFC_ADAPTER_STOPPING;
497 
498 	sfc_flow_stop(sa);
499 	sfc_tx_stop(sa);
500 	sfc_rx_stop(sa);
501 	sfc_port_stop(sa);
502 	sfc_ev_stop(sa);
503 	sfc_intr_stop(sa);
504 	efx_nic_fini(sa->nic);
505 
506 	sa->state = SFC_ADAPTER_CONFIGURED;
507 	sfc_log_init(sa, "done");
508 }
509 
510 static int
511 sfc_restart(struct sfc_adapter *sa)
512 {
513 	int rc;
514 
515 	SFC_ASSERT(sfc_adapter_is_locked(sa));
516 
517 	if (sa->state != SFC_ADAPTER_STARTED)
518 		return EINVAL;
519 
520 	sfc_stop(sa);
521 
522 	rc = sfc_start(sa);
523 	if (rc != 0)
524 		sfc_err(sa, "restart failed");
525 
526 	return rc;
527 }
528 
529 static void
530 sfc_restart_if_required(void *arg)
531 {
532 	struct sfc_adapter *sa = arg;
533 
534 	/* If restart is scheduled, clear the flag and do it */
535 	if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
536 				1, 0)) {
537 		sfc_adapter_lock(sa);
538 		if (sa->state == SFC_ADAPTER_STARTED)
539 			(void)sfc_restart(sa);
540 		sfc_adapter_unlock(sa);
541 	}
542 }
543 
544 void
545 sfc_schedule_restart(struct sfc_adapter *sa)
546 {
547 	int rc;
548 
549 	/* Schedule restart alarm if it is not scheduled yet */
550 	if (!rte_atomic32_test_and_set(&sa->restart_required))
551 		return;
552 
553 	rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
554 	if (rc == -ENOTSUP)
555 		sfc_warn(sa, "alarms are not supported, restart is pending");
556 	else if (rc != 0)
557 		sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
558 	else
559 		sfc_notice(sa, "restart scheduled");
560 }
561 
562 int
563 sfc_configure(struct sfc_adapter *sa)
564 {
565 	int rc;
566 
567 	sfc_log_init(sa, "entry");
568 
569 	SFC_ASSERT(sfc_adapter_is_locked(sa));
570 
571 	SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
572 		   sa->state == SFC_ADAPTER_CONFIGURED);
573 	sa->state = SFC_ADAPTER_CONFIGURING;
574 
575 	rc = sfc_check_conf(sa);
576 	if (rc != 0)
577 		goto fail_check_conf;
578 
579 	rc = sfc_intr_configure(sa);
580 	if (rc != 0)
581 		goto fail_intr_configure;
582 
583 	rc = sfc_port_configure(sa);
584 	if (rc != 0)
585 		goto fail_port_configure;
586 
587 	rc = sfc_rx_configure(sa);
588 	if (rc != 0)
589 		goto fail_rx_configure;
590 
591 	rc = sfc_tx_configure(sa);
592 	if (rc != 0)
593 		goto fail_tx_configure;
594 
595 	sa->state = SFC_ADAPTER_CONFIGURED;
596 	sfc_log_init(sa, "done");
597 	return 0;
598 
599 fail_tx_configure:
600 	sfc_rx_close(sa);
601 
602 fail_rx_configure:
603 	sfc_port_close(sa);
604 
605 fail_port_configure:
606 	sfc_intr_close(sa);
607 
608 fail_intr_configure:
609 fail_check_conf:
610 	sa->state = SFC_ADAPTER_INITIALIZED;
611 	sfc_log_init(sa, "failed %d", rc);
612 	return rc;
613 }
614 
615 void
616 sfc_close(struct sfc_adapter *sa)
617 {
618 	sfc_log_init(sa, "entry");
619 
620 	SFC_ASSERT(sfc_adapter_is_locked(sa));
621 
622 	SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
623 	sa->state = SFC_ADAPTER_CLOSING;
624 
625 	sfc_tx_close(sa);
626 	sfc_rx_close(sa);
627 	sfc_port_close(sa);
628 	sfc_intr_close(sa);
629 
630 	sa->state = SFC_ADAPTER_INITIALIZED;
631 	sfc_log_init(sa, "done");
632 }
633 
634 static int
635 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
636 {
637 	struct rte_eth_dev *eth_dev = sa->eth_dev;
638 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
639 	efsys_bar_t *ebp = &sa->mem_bar;
640 	struct rte_mem_resource *res =
641 		&pci_dev->mem_resource[mem_ebrp->ebr_index];
642 
643 	SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
644 	ebp->esb_rid = mem_ebrp->ebr_index;
645 	ebp->esb_dev = pci_dev;
646 	ebp->esb_base = res->addr;
647 
648 	sa->fcw_offset = mem_ebrp->ebr_offset;
649 
650 	return 0;
651 }
652 
653 static void
654 sfc_mem_bar_fini(struct sfc_adapter *sa)
655 {
656 	efsys_bar_t *ebp = &sa->mem_bar;
657 
658 	SFC_BAR_LOCK_DESTROY(ebp);
659 	memset(ebp, 0, sizeof(*ebp));
660 }
661 
662 /*
663  * A fixed RSS key which has a property of being symmetric
664  * (symmetrical flows are distributed to the same CPU)
665  * and also known to give a uniform distribution
666  * (a good distribution of traffic between different CPUs)
667  */
668 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
669 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
670 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
671 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
672 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
673 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
674 };
675 
676 static int
677 sfc_rss_attach(struct sfc_adapter *sa)
678 {
679 	struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
680 	int rc;
681 
682 	rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
683 	if (rc != 0)
684 		goto fail_intr_init;
685 
686 	rc = efx_ev_init(sa->nic);
687 	if (rc != 0)
688 		goto fail_ev_init;
689 
690 	rc = efx_rx_init(sa->nic);
691 	if (rc != 0)
692 		goto fail_rx_init;
693 
694 	rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
695 	if (rc != 0)
696 		goto fail_scale_support_get;
697 
698 	rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
699 	if (rc != 0)
700 		goto fail_hash_support_get;
701 
702 	rc = sfc_rx_hash_init(sa);
703 	if (rc != 0)
704 		goto fail_rx_hash_init;
705 
706 	efx_rx_fini(sa->nic);
707 	efx_ev_fini(sa->nic);
708 	efx_intr_fini(sa->nic);
709 
710 	rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
711 	rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
712 
713 	return 0;
714 
715 fail_rx_hash_init:
716 fail_hash_support_get:
717 fail_scale_support_get:
718 	efx_rx_fini(sa->nic);
719 
720 fail_rx_init:
721 	efx_ev_fini(sa->nic);
722 
723 fail_ev_init:
724 	efx_intr_fini(sa->nic);
725 
726 fail_intr_init:
727 	return rc;
728 }
729 
730 static void
731 sfc_rss_detach(struct sfc_adapter *sa)
732 {
733 	sfc_rx_hash_fini(sa);
734 }
735 
736 int
737 sfc_attach(struct sfc_adapter *sa)
738 {
739 	const efx_nic_cfg_t *encp;
740 	efx_nic_t *enp = sa->nic;
741 	int rc;
742 
743 	sfc_log_init(sa, "entry");
744 
745 	SFC_ASSERT(sfc_adapter_is_locked(sa));
746 
747 	efx_mcdi_new_epoch(enp);
748 
749 	sfc_log_init(sa, "reset nic");
750 	rc = efx_nic_reset(enp);
751 	if (rc != 0)
752 		goto fail_nic_reset;
753 
754 	rc = sfc_sriov_attach(sa);
755 	if (rc != 0)
756 		goto fail_sriov_attach;
757 
758 	/*
759 	 * Probed NIC is sufficient for tunnel init.
760 	 * Initialize tunnel support to be able to use libefx
761 	 * efx_tunnel_config_udp_{add,remove}() in any state and
762 	 * efx_tunnel_reconfigure() on start up.
763 	 */
764 	rc = efx_tunnel_init(enp);
765 	if (rc != 0)
766 		goto fail_tunnel_init;
767 
768 	encp = efx_nic_cfg_get(sa->nic);
769 
770 	/*
771 	 * Make a copy of supported tunnel encapsulations in shared
772 	 * memory to be used on supported Rx packet type classes get.
773 	 */
774 	sa->priv.shared->tunnel_encaps =
775 		encp->enc_tunnel_encapsulations_supported;
776 
777 	if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
778 		sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
779 			  encp->enc_tso_v3_enabled;
780 		if (!sa->tso)
781 			sfc_info(sa, "TSO support isn't available on this adapter");
782 	}
783 
784 	if (sa->tso &&
785 	    (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
786 	     (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
787 	      DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
788 		sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
789 				encp->enc_tso_v3_enabled;
790 		if (!sa->tso_encap)
791 			sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
792 	}
793 
794 	sfc_log_init(sa, "estimate resource limits");
795 	rc = sfc_estimate_resource_limits(sa);
796 	if (rc != 0)
797 		goto fail_estimate_rsrc_limits;
798 
799 	sa->evq_max_entries = encp->enc_evq_max_nevs;
800 	SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
801 
802 	sa->evq_min_entries = encp->enc_evq_min_nevs;
803 	SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
804 
805 	sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
806 	SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
807 
808 	sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
809 	SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
810 
811 	sa->txq_max_entries = encp->enc_txq_max_ndescs;
812 	SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
813 
814 	sa->txq_min_entries = encp->enc_txq_min_ndescs;
815 	SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
816 
817 	rc = sfc_intr_attach(sa);
818 	if (rc != 0)
819 		goto fail_intr_attach;
820 
821 	rc = sfc_ev_attach(sa);
822 	if (rc != 0)
823 		goto fail_ev_attach;
824 
825 	rc = sfc_port_attach(sa);
826 	if (rc != 0)
827 		goto fail_port_attach;
828 
829 	rc = sfc_rss_attach(sa);
830 	if (rc != 0)
831 		goto fail_rss_attach;
832 
833 	rc = sfc_filter_attach(sa);
834 	if (rc != 0)
835 		goto fail_filter_attach;
836 
837 	rc = sfc_mae_attach(sa);
838 	if (rc != 0)
839 		goto fail_mae_attach;
840 
841 	sfc_log_init(sa, "fini nic");
842 	efx_nic_fini(enp);
843 
844 	sfc_flow_init(sa);
845 
846 	/*
847 	 * Create vSwitch to be able to use VFs when PF is not started yet
848 	 * as DPDK port. VFs should be able to talk to each other even
849 	 * if PF is down.
850 	 */
851 	rc = sfc_sriov_vswitch_create(sa);
852 	if (rc != 0)
853 		goto fail_sriov_vswitch_create;
854 
855 	sa->state = SFC_ADAPTER_INITIALIZED;
856 
857 	sfc_log_init(sa, "done");
858 	return 0;
859 
860 fail_sriov_vswitch_create:
861 	sfc_flow_fini(sa);
862 	sfc_mae_detach(sa);
863 
864 fail_mae_attach:
865 	sfc_filter_detach(sa);
866 
867 fail_filter_attach:
868 	sfc_rss_detach(sa);
869 
870 fail_rss_attach:
871 	sfc_port_detach(sa);
872 
873 fail_port_attach:
874 	sfc_ev_detach(sa);
875 
876 fail_ev_attach:
877 	sfc_intr_detach(sa);
878 
879 fail_intr_attach:
880 	efx_nic_fini(sa->nic);
881 
882 fail_estimate_rsrc_limits:
883 fail_tunnel_init:
884 	efx_tunnel_fini(sa->nic);
885 	sfc_sriov_detach(sa);
886 
887 fail_sriov_attach:
888 fail_nic_reset:
889 
890 	sfc_log_init(sa, "failed %d", rc);
891 	return rc;
892 }
893 
894 void
895 sfc_detach(struct sfc_adapter *sa)
896 {
897 	sfc_log_init(sa, "entry");
898 
899 	SFC_ASSERT(sfc_adapter_is_locked(sa));
900 
901 	sfc_sriov_vswitch_destroy(sa);
902 
903 	sfc_flow_fini(sa);
904 
905 	sfc_mae_detach(sa);
906 	sfc_filter_detach(sa);
907 	sfc_rss_detach(sa);
908 	sfc_port_detach(sa);
909 	sfc_ev_detach(sa);
910 	sfc_intr_detach(sa);
911 	efx_tunnel_fini(sa->nic);
912 	sfc_sriov_detach(sa);
913 
914 	sa->state = SFC_ADAPTER_UNINITIALIZED;
915 }
916 
917 static int
918 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
919 			     const char *value_str, void *opaque)
920 {
921 	uint32_t *value = opaque;
922 
923 	if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
924 		*value = EFX_FW_VARIANT_DONT_CARE;
925 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
926 		*value = EFX_FW_VARIANT_FULL_FEATURED;
927 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
928 		*value = EFX_FW_VARIANT_LOW_LATENCY;
929 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
930 		*value = EFX_FW_VARIANT_PACKED_STREAM;
931 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
932 		*value = EFX_FW_VARIANT_DPDK;
933 	else
934 		return -EINVAL;
935 
936 	return 0;
937 }
938 
939 static int
940 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
941 {
942 	efx_nic_fw_info_t enfi;
943 	int rc;
944 
945 	rc = efx_nic_get_fw_version(sa->nic, &enfi);
946 	if (rc != 0)
947 		return rc;
948 	else if (!enfi.enfi_dpcpu_fw_ids_valid)
949 		return ENOTSUP;
950 
951 	/*
952 	 * Firmware variant can be uniquely identified by the RxDPCPU
953 	 * firmware id
954 	 */
955 	switch (enfi.enfi_rx_dpcpu_fw_id) {
956 	case EFX_RXDP_FULL_FEATURED_FW_ID:
957 		*efv = EFX_FW_VARIANT_FULL_FEATURED;
958 		break;
959 
960 	case EFX_RXDP_LOW_LATENCY_FW_ID:
961 		*efv = EFX_FW_VARIANT_LOW_LATENCY;
962 		break;
963 
964 	case EFX_RXDP_PACKED_STREAM_FW_ID:
965 		*efv = EFX_FW_VARIANT_PACKED_STREAM;
966 		break;
967 
968 	case EFX_RXDP_DPDK_FW_ID:
969 		*efv = EFX_FW_VARIANT_DPDK;
970 		break;
971 
972 	default:
973 		/*
974 		 * Other firmware variants are not considered, since they are
975 		 * not supported in the device parameters
976 		 */
977 		*efv = EFX_FW_VARIANT_DONT_CARE;
978 		break;
979 	}
980 
981 	return 0;
982 }
983 
984 static const char *
985 sfc_fw_variant2str(efx_fw_variant_t efv)
986 {
987 	switch (efv) {
988 	case EFX_RXDP_FULL_FEATURED_FW_ID:
989 		return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
990 	case EFX_RXDP_LOW_LATENCY_FW_ID:
991 		return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
992 	case EFX_RXDP_PACKED_STREAM_FW_ID:
993 		return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
994 	case EFX_RXDP_DPDK_FW_ID:
995 		return SFC_KVARG_FW_VARIANT_DPDK;
996 	default:
997 		return "unknown";
998 	}
999 }
1000 
1001 static int
1002 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1003 {
1004 	int rc;
1005 	long value;
1006 
1007 	value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1008 
1009 	rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1010 				sfc_kvarg_long_handler, &value);
1011 	if (rc != 0)
1012 		return rc;
1013 
1014 	if (value < 0 ||
1015 	    (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1016 		sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1017 			    "was set (%ld);", value);
1018 		sfc_err(sa, "it must not be less than 0 or greater than %u",
1019 			    EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1020 		return EINVAL;
1021 	}
1022 
1023 	sa->rxd_wait_timeout_ns = value;
1024 	return 0;
1025 }
1026 
1027 static int
1028 sfc_nic_probe(struct sfc_adapter *sa)
1029 {
1030 	efx_nic_t *enp = sa->nic;
1031 	efx_fw_variant_t preferred_efv;
1032 	efx_fw_variant_t efv;
1033 	int rc;
1034 
1035 	preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1036 	rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1037 				sfc_kvarg_fv_variant_handler,
1038 				&preferred_efv);
1039 	if (rc != 0) {
1040 		sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1041 		return rc;
1042 	}
1043 
1044 	rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1045 	if (rc != 0)
1046 		return rc;
1047 
1048 	rc = efx_nic_probe(enp, preferred_efv);
1049 	if (rc == EACCES) {
1050 		/* Unprivileged functions cannot set FW variant */
1051 		rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1052 	}
1053 	if (rc != 0)
1054 		return rc;
1055 
1056 	rc = sfc_get_fw_variant(sa, &efv);
1057 	if (rc == ENOTSUP) {
1058 		sfc_warn(sa, "FW variant can not be obtained");
1059 		return 0;
1060 	}
1061 	if (rc != 0)
1062 		return rc;
1063 
1064 	/* Check that firmware variant was changed to the requested one */
1065 	if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1066 		sfc_warn(sa, "FW variant has not changed to the requested %s",
1067 			 sfc_fw_variant2str(preferred_efv));
1068 	}
1069 
1070 	sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1071 
1072 	return 0;
1073 }
1074 
1075 int
1076 sfc_probe(struct sfc_adapter *sa)
1077 {
1078 	efx_bar_region_t mem_ebrp;
1079 	struct rte_eth_dev *eth_dev = sa->eth_dev;
1080 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1081 	efx_nic_t *enp;
1082 	int rc;
1083 
1084 	sfc_log_init(sa, "entry");
1085 
1086 	SFC_ASSERT(sfc_adapter_is_locked(sa));
1087 
1088 	sa->socket_id = rte_socket_id();
1089 	rte_atomic32_init(&sa->restart_required);
1090 
1091 	sfc_log_init(sa, "get family");
1092 	rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family);
1093 
1094 	if (rc != 0)
1095 		goto fail_family;
1096 	sfc_log_init(sa,
1097 		     "family is %u, membar is %u, function control window offset is %lu",
1098 		     sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1099 
1100 	sfc_log_init(sa, "init mem bar");
1101 	rc = sfc_mem_bar_init(sa, &mem_ebrp);
1102 	if (rc != 0)
1103 		goto fail_mem_bar_init;
1104 
1105 	sfc_log_init(sa, "create nic");
1106 	rte_spinlock_init(&sa->nic_lock);
1107 	rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1108 			    &sa->mem_bar, mem_ebrp.ebr_offset,
1109 			    &sa->nic_lock, &enp);
1110 	if (rc != 0)
1111 		goto fail_nic_create;
1112 	sa->nic = enp;
1113 
1114 	rc = sfc_mcdi_init(sa);
1115 	if (rc != 0)
1116 		goto fail_mcdi_init;
1117 
1118 	sfc_log_init(sa, "probe nic");
1119 	rc = sfc_nic_probe(sa);
1120 	if (rc != 0)
1121 		goto fail_nic_probe;
1122 
1123 	sfc_log_init(sa, "done");
1124 	return 0;
1125 
1126 fail_nic_probe:
1127 	sfc_mcdi_fini(sa);
1128 
1129 fail_mcdi_init:
1130 	sfc_log_init(sa, "destroy nic");
1131 	sa->nic = NULL;
1132 	efx_nic_destroy(enp);
1133 
1134 fail_nic_create:
1135 	sfc_mem_bar_fini(sa);
1136 
1137 fail_mem_bar_init:
1138 fail_family:
1139 	sfc_log_init(sa, "failed %d", rc);
1140 	return rc;
1141 }
1142 
1143 void
1144 sfc_unprobe(struct sfc_adapter *sa)
1145 {
1146 	efx_nic_t *enp = sa->nic;
1147 
1148 	sfc_log_init(sa, "entry");
1149 
1150 	SFC_ASSERT(sfc_adapter_is_locked(sa));
1151 
1152 	sfc_log_init(sa, "unprobe nic");
1153 	efx_nic_unprobe(enp);
1154 
1155 	sfc_mcdi_fini(sa);
1156 
1157 	/*
1158 	 * Make sure there is no pending alarm to restart since we are
1159 	 * going to free device private which is passed as the callback
1160 	 * opaque data. A new alarm cannot be scheduled since MCDI is
1161 	 * shut down.
1162 	 */
1163 	rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1164 
1165 	sfc_log_init(sa, "destroy nic");
1166 	sa->nic = NULL;
1167 	efx_nic_destroy(enp);
1168 
1169 	sfc_mem_bar_fini(sa);
1170 
1171 	sfc_flow_fini(sa);
1172 	sa->state = SFC_ADAPTER_UNINITIALIZED;
1173 }
1174 
1175 uint32_t
1176 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1177 		     const char *lt_prefix_str, uint32_t ll_default)
1178 {
1179 	size_t lt_prefix_str_size = strlen(lt_prefix_str);
1180 	size_t lt_str_size_max;
1181 	char *lt_str = NULL;
1182 	int ret;
1183 
1184 	if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1185 		++lt_prefix_str_size; /* Reserve space for prefix separator */
1186 		lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1187 	} else {
1188 		return sfc_logtype_driver;
1189 	}
1190 
1191 	lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1192 	if (lt_str == NULL)
1193 		return sfc_logtype_driver;
1194 
1195 	strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1196 	lt_str[lt_prefix_str_size - 1] = '.';
1197 	rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1198 			    lt_str_size_max - lt_prefix_str_size);
1199 	lt_str[lt_str_size_max - 1] = '\0';
1200 
1201 	ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1202 	rte_free(lt_str);
1203 
1204 	if (ret < 0)
1205 		return sfc_logtype_driver;
1206 
1207 	return ret;
1208 }
1209 
1210 struct sfc_hw_switch_id {
1211 	char	board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)];
1212 };
1213 
1214 int
1215 sfc_hw_switch_id_init(struct sfc_adapter *sa,
1216 		      struct sfc_hw_switch_id **idp)
1217 {
1218 	efx_nic_board_info_t board_info;
1219 	struct sfc_hw_switch_id *id;
1220 	int rc;
1221 
1222 	if (idp == NULL)
1223 		return EINVAL;
1224 
1225 	id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0);
1226 	if (id == NULL)
1227 		return ENOMEM;
1228 
1229 	rc = efx_nic_get_board_info(sa->nic, &board_info);
1230 	if (rc != 0)
1231 		return rc;
1232 
1233 	memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn));
1234 
1235 	*idp = id;
1236 
1237 	return 0;
1238 }
1239 
1240 void
1241 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa,
1242 		      struct sfc_hw_switch_id *id)
1243 {
1244 	rte_free(id);
1245 }
1246 
1247 bool
1248 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left,
1249 			const struct sfc_hw_switch_id *right)
1250 {
1251 	return strncmp(left->board_sn, right->board_sn,
1252 		       sizeof(left->board_sn)) == 0;
1253 }
1254