xref: /dpdk/drivers/net/sfc/sfc.c (revision 8809f78c7dd9f33a44a4f89c58fc91ded34296ed)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2020 Xilinx, Inc.
4  * Copyright(c) 2016-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9 
10 /* sysconf() */
11 #include <unistd.h>
12 
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
15 
16 #include "efx.h"
17 
18 #include "sfc.h"
19 #include "sfc_debug.h"
20 #include "sfc_log.h"
21 #include "sfc_ev.h"
22 #include "sfc_rx.h"
23 #include "sfc_tx.h"
24 #include "sfc_kvargs.h"
25 #include "sfc_tweak.h"
26 
27 
28 int
29 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
30 	      size_t len, int socket_id, efsys_mem_t *esmp)
31 {
32 	const struct rte_memzone *mz;
33 
34 	sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
35 		     name, id, len, socket_id);
36 
37 	mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
38 				      sysconf(_SC_PAGESIZE), socket_id);
39 	if (mz == NULL) {
40 		sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
41 			name, (unsigned int)id, (unsigned int)len, socket_id,
42 			rte_strerror(rte_errno));
43 		return ENOMEM;
44 	}
45 
46 	esmp->esm_addr = mz->iova;
47 	if (esmp->esm_addr == RTE_BAD_IOVA) {
48 		(void)rte_memzone_free(mz);
49 		return EFAULT;
50 	}
51 
52 	esmp->esm_mz = mz;
53 	esmp->esm_base = mz->addr;
54 
55 	sfc_info(sa,
56 		 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx",
57 		 name, id, len, socket_id, esmp->esm_base,
58 		 (unsigned long)esmp->esm_addr);
59 
60 	return 0;
61 }
62 
63 void
64 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
65 {
66 	int rc;
67 
68 	sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
69 
70 	rc = rte_memzone_free(esmp->esm_mz);
71 	if (rc != 0)
72 		sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
73 
74 	memset(esmp, 0, sizeof(*esmp));
75 }
76 
77 static uint32_t
78 sfc_phy_cap_from_link_speeds(uint32_t speeds)
79 {
80 	uint32_t phy_caps = 0;
81 
82 	if (~speeds & ETH_LINK_SPEED_FIXED) {
83 		phy_caps |= (1 << EFX_PHY_CAP_AN);
84 		/*
85 		 * If no speeds are specified in the mask, any supported
86 		 * may be negotiated
87 		 */
88 		if (speeds == ETH_LINK_SPEED_AUTONEG)
89 			phy_caps |=
90 				(1 << EFX_PHY_CAP_1000FDX) |
91 				(1 << EFX_PHY_CAP_10000FDX) |
92 				(1 << EFX_PHY_CAP_25000FDX) |
93 				(1 << EFX_PHY_CAP_40000FDX) |
94 				(1 << EFX_PHY_CAP_50000FDX) |
95 				(1 << EFX_PHY_CAP_100000FDX);
96 	}
97 	if (speeds & ETH_LINK_SPEED_1G)
98 		phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
99 	if (speeds & ETH_LINK_SPEED_10G)
100 		phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
101 	if (speeds & ETH_LINK_SPEED_25G)
102 		phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
103 	if (speeds & ETH_LINK_SPEED_40G)
104 		phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
105 	if (speeds & ETH_LINK_SPEED_50G)
106 		phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
107 	if (speeds & ETH_LINK_SPEED_100G)
108 		phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
109 
110 	return phy_caps;
111 }
112 
113 /*
114  * Check requested device level configuration.
115  * Receive and transmit configuration is checked in corresponding
116  * modules.
117  */
118 static int
119 sfc_check_conf(struct sfc_adapter *sa)
120 {
121 	const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
122 	int rc = 0;
123 
124 	sa->port.phy_adv_cap =
125 		sfc_phy_cap_from_link_speeds(conf->link_speeds) &
126 		sa->port.phy_adv_cap_mask;
127 	if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
128 		sfc_err(sa, "No link speeds from mask %#x are supported",
129 			conf->link_speeds);
130 		rc = EINVAL;
131 	}
132 
133 #if !EFSYS_OPT_LOOPBACK
134 	if (conf->lpbk_mode != 0) {
135 		sfc_err(sa, "Loopback not supported");
136 		rc = EINVAL;
137 	}
138 #endif
139 
140 	if (conf->dcb_capability_en != 0) {
141 		sfc_err(sa, "Priority-based flow control not supported");
142 		rc = EINVAL;
143 	}
144 
145 	if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
146 		sfc_err(sa, "Flow Director not supported");
147 		rc = EINVAL;
148 	}
149 
150 	if ((conf->intr_conf.lsc != 0) &&
151 	    (sa->intr.type != EFX_INTR_LINE) &&
152 	    (sa->intr.type != EFX_INTR_MESSAGE)) {
153 		sfc_err(sa, "Link status change interrupt not supported");
154 		rc = EINVAL;
155 	}
156 
157 	if (conf->intr_conf.rxq != 0 &&
158 	    (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
159 		sfc_err(sa, "Receive queue interrupt not supported");
160 		rc = EINVAL;
161 	}
162 
163 	return rc;
164 }
165 
166 /*
167  * Find out maximum number of receive and transmit queues which could be
168  * advertised.
169  *
170  * NIC is kept initialized on success to allow other modules acquire
171  * defaults and capabilities.
172  */
173 static int
174 sfc_estimate_resource_limits(struct sfc_adapter *sa)
175 {
176 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
177 	efx_drv_limits_t limits;
178 	int rc;
179 	uint32_t evq_allocated;
180 	uint32_t rxq_allocated;
181 	uint32_t txq_allocated;
182 
183 	memset(&limits, 0, sizeof(limits));
184 
185 	/* Request at least one Rx and Tx queue */
186 	limits.edl_min_rxq_count = 1;
187 	limits.edl_min_txq_count = 1;
188 	/* Management event queue plus event queue for each Tx and Rx queue */
189 	limits.edl_min_evq_count =
190 		1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
191 
192 	/* Divide by number of functions to guarantee that all functions
193 	 * will get promised resources
194 	 */
195 	/* FIXME Divide by number of functions (not 2) below */
196 	limits.edl_max_evq_count = encp->enc_evq_limit / 2;
197 	SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
198 
199 	/* Split equally between receive and transmit */
200 	limits.edl_max_rxq_count =
201 		MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
202 	SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
203 
204 	limits.edl_max_txq_count =
205 		MIN(encp->enc_txq_limit,
206 		    limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
207 
208 	if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
209 		limits.edl_max_txq_count =
210 			MIN(limits.edl_max_txq_count,
211 			    encp->enc_fw_assisted_tso_v2_n_contexts /
212 			    encp->enc_hw_pf_count);
213 
214 	SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
215 
216 	/* Configure the minimum required resources needed for the
217 	 * driver to operate, and the maximum desired resources that the
218 	 * driver is capable of using.
219 	 */
220 	efx_nic_set_drv_limits(sa->nic, &limits);
221 
222 	sfc_log_init(sa, "init nic");
223 	rc = efx_nic_init(sa->nic);
224 	if (rc != 0)
225 		goto fail_nic_init;
226 
227 	/* Find resource dimensions assigned by firmware to this function */
228 	rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
229 				 &txq_allocated);
230 	if (rc != 0)
231 		goto fail_get_vi_pool;
232 
233 	/* It still may allocate more than maximum, ensure limit */
234 	evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
235 	rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
236 	txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
237 
238 	/* Subtract management EVQ not used for traffic */
239 	SFC_ASSERT(evq_allocated > 0);
240 	evq_allocated--;
241 
242 	/* Right now we use separate EVQ for Rx and Tx */
243 	sa->rxq_max = MIN(rxq_allocated, evq_allocated / 2);
244 	sa->txq_max = MIN(txq_allocated, evq_allocated - sa->rxq_max);
245 
246 	/* Keep NIC initialized */
247 	return 0;
248 
249 fail_get_vi_pool:
250 	efx_nic_fini(sa->nic);
251 fail_nic_init:
252 	return rc;
253 }
254 
255 static int
256 sfc_set_drv_limits(struct sfc_adapter *sa)
257 {
258 	const struct rte_eth_dev_data *data = sa->eth_dev->data;
259 	efx_drv_limits_t lim;
260 
261 	memset(&lim, 0, sizeof(lim));
262 
263 	/* Limits are strict since take into account initial estimation */
264 	lim.edl_min_evq_count = lim.edl_max_evq_count =
265 		1 + data->nb_rx_queues + data->nb_tx_queues;
266 	lim.edl_min_rxq_count = lim.edl_max_rxq_count = data->nb_rx_queues;
267 	lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues;
268 
269 	return efx_nic_set_drv_limits(sa->nic, &lim);
270 }
271 
272 static int
273 sfc_set_fw_subvariant(struct sfc_adapter *sa)
274 {
275 	struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
276 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
277 	uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
278 	unsigned int txq_index;
279 	efx_nic_fw_subvariant_t req_fw_subvariant;
280 	efx_nic_fw_subvariant_t cur_fw_subvariant;
281 	int rc;
282 
283 	if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
284 		sfc_info(sa, "no-Tx-checksum subvariant not supported");
285 		return 0;
286 	}
287 
288 	for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
289 		struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
290 
291 		if (txq_info->state & SFC_TXQ_INITIALIZED)
292 			tx_offloads |= txq_info->offloads;
293 	}
294 
295 	if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
296 			   DEV_TX_OFFLOAD_TCP_CKSUM |
297 			   DEV_TX_OFFLOAD_UDP_CKSUM |
298 			   DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
299 		req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
300 	else
301 		req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
302 
303 	rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
304 	if (rc != 0) {
305 		sfc_err(sa, "failed to get FW subvariant: %d", rc);
306 		return rc;
307 	}
308 	sfc_info(sa, "FW subvariant is %u vs required %u",
309 		 cur_fw_subvariant, req_fw_subvariant);
310 
311 	if (cur_fw_subvariant == req_fw_subvariant)
312 		return 0;
313 
314 	rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
315 	if (rc != 0) {
316 		sfc_err(sa, "failed to set FW subvariant %u: %d",
317 			req_fw_subvariant, rc);
318 		return rc;
319 	}
320 	sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
321 
322 	return 0;
323 }
324 
325 static int
326 sfc_try_start(struct sfc_adapter *sa)
327 {
328 	const efx_nic_cfg_t *encp;
329 	int rc;
330 
331 	sfc_log_init(sa, "entry");
332 
333 	SFC_ASSERT(sfc_adapter_is_locked(sa));
334 	SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
335 
336 	sfc_log_init(sa, "set FW subvariant");
337 	rc = sfc_set_fw_subvariant(sa);
338 	if (rc != 0)
339 		goto fail_set_fw_subvariant;
340 
341 	sfc_log_init(sa, "set resource limits");
342 	rc = sfc_set_drv_limits(sa);
343 	if (rc != 0)
344 		goto fail_set_drv_limits;
345 
346 	sfc_log_init(sa, "init nic");
347 	rc = efx_nic_init(sa->nic);
348 	if (rc != 0)
349 		goto fail_nic_init;
350 
351 	encp = efx_nic_cfg_get(sa->nic);
352 
353 	/*
354 	 * Refresh (since it may change on NIC reset/restart) a copy of
355 	 * supported tunnel encapsulations in shared memory to be used
356 	 * on supported Rx packet type classes get.
357 	 */
358 	sa->priv.shared->tunnel_encaps =
359 		encp->enc_tunnel_encapsulations_supported;
360 
361 	if (encp->enc_tunnel_encapsulations_supported != 0) {
362 		sfc_log_init(sa, "apply tunnel config");
363 		rc = efx_tunnel_reconfigure(sa->nic);
364 		if (rc != 0)
365 			goto fail_tunnel_reconfigure;
366 	}
367 
368 	rc = sfc_intr_start(sa);
369 	if (rc != 0)
370 		goto fail_intr_start;
371 
372 	rc = sfc_ev_start(sa);
373 	if (rc != 0)
374 		goto fail_ev_start;
375 
376 	rc = sfc_port_start(sa);
377 	if (rc != 0)
378 		goto fail_port_start;
379 
380 	rc = sfc_rx_start(sa);
381 	if (rc != 0)
382 		goto fail_rx_start;
383 
384 	rc = sfc_tx_start(sa);
385 	if (rc != 0)
386 		goto fail_tx_start;
387 
388 	rc = sfc_flow_start(sa);
389 	if (rc != 0)
390 		goto fail_flows_insert;
391 
392 	sfc_log_init(sa, "done");
393 	return 0;
394 
395 fail_flows_insert:
396 	sfc_tx_stop(sa);
397 
398 fail_tx_start:
399 	sfc_rx_stop(sa);
400 
401 fail_rx_start:
402 	sfc_port_stop(sa);
403 
404 fail_port_start:
405 	sfc_ev_stop(sa);
406 
407 fail_ev_start:
408 	sfc_intr_stop(sa);
409 
410 fail_intr_start:
411 fail_tunnel_reconfigure:
412 	efx_nic_fini(sa->nic);
413 
414 fail_nic_init:
415 fail_set_drv_limits:
416 fail_set_fw_subvariant:
417 	sfc_log_init(sa, "failed %d", rc);
418 	return rc;
419 }
420 
421 int
422 sfc_start(struct sfc_adapter *sa)
423 {
424 	unsigned int start_tries = 3;
425 	int rc;
426 
427 	sfc_log_init(sa, "entry");
428 
429 	SFC_ASSERT(sfc_adapter_is_locked(sa));
430 
431 	switch (sa->state) {
432 	case SFC_ADAPTER_CONFIGURED:
433 		break;
434 	case SFC_ADAPTER_STARTED:
435 		sfc_notice(sa, "already started");
436 		return 0;
437 	default:
438 		rc = EINVAL;
439 		goto fail_bad_state;
440 	}
441 
442 	sa->state = SFC_ADAPTER_STARTING;
443 
444 	rc = 0;
445 	do {
446 		/*
447 		 * FIXME Try to recreate vSwitch on start retry.
448 		 * vSwitch is absent after MC reboot like events and
449 		 * we should recreate it. May be we need proper
450 		 * indication instead of guessing.
451 		 */
452 		if (rc != 0) {
453 			sfc_sriov_vswitch_destroy(sa);
454 			rc = sfc_sriov_vswitch_create(sa);
455 			if (rc != 0)
456 				goto fail_sriov_vswitch_create;
457 		}
458 		rc = sfc_try_start(sa);
459 	} while ((--start_tries > 0) &&
460 		 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
461 
462 	if (rc != 0)
463 		goto fail_try_start;
464 
465 	sa->state = SFC_ADAPTER_STARTED;
466 	sfc_log_init(sa, "done");
467 	return 0;
468 
469 fail_try_start:
470 fail_sriov_vswitch_create:
471 	sa->state = SFC_ADAPTER_CONFIGURED;
472 fail_bad_state:
473 	sfc_log_init(sa, "failed %d", rc);
474 	return rc;
475 }
476 
477 void
478 sfc_stop(struct sfc_adapter *sa)
479 {
480 	sfc_log_init(sa, "entry");
481 
482 	SFC_ASSERT(sfc_adapter_is_locked(sa));
483 
484 	switch (sa->state) {
485 	case SFC_ADAPTER_STARTED:
486 		break;
487 	case SFC_ADAPTER_CONFIGURED:
488 		sfc_notice(sa, "already stopped");
489 		return;
490 	default:
491 		sfc_err(sa, "stop in unexpected state %u", sa->state);
492 		SFC_ASSERT(B_FALSE);
493 		return;
494 	}
495 
496 	sa->state = SFC_ADAPTER_STOPPING;
497 
498 	sfc_flow_stop(sa);
499 	sfc_tx_stop(sa);
500 	sfc_rx_stop(sa);
501 	sfc_port_stop(sa);
502 	sfc_ev_stop(sa);
503 	sfc_intr_stop(sa);
504 	efx_nic_fini(sa->nic);
505 
506 	sa->state = SFC_ADAPTER_CONFIGURED;
507 	sfc_log_init(sa, "done");
508 }
509 
510 static int
511 sfc_restart(struct sfc_adapter *sa)
512 {
513 	int rc;
514 
515 	SFC_ASSERT(sfc_adapter_is_locked(sa));
516 
517 	if (sa->state != SFC_ADAPTER_STARTED)
518 		return EINVAL;
519 
520 	sfc_stop(sa);
521 
522 	rc = sfc_start(sa);
523 	if (rc != 0)
524 		sfc_err(sa, "restart failed");
525 
526 	return rc;
527 }
528 
529 static void
530 sfc_restart_if_required(void *arg)
531 {
532 	struct sfc_adapter *sa = arg;
533 
534 	/* If restart is scheduled, clear the flag and do it */
535 	if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
536 				1, 0)) {
537 		sfc_adapter_lock(sa);
538 		if (sa->state == SFC_ADAPTER_STARTED)
539 			(void)sfc_restart(sa);
540 		sfc_adapter_unlock(sa);
541 	}
542 }
543 
544 void
545 sfc_schedule_restart(struct sfc_adapter *sa)
546 {
547 	int rc;
548 
549 	/* Schedule restart alarm if it is not scheduled yet */
550 	if (!rte_atomic32_test_and_set(&sa->restart_required))
551 		return;
552 
553 	rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
554 	if (rc == -ENOTSUP)
555 		sfc_warn(sa, "alarms are not supported, restart is pending");
556 	else if (rc != 0)
557 		sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
558 	else
559 		sfc_notice(sa, "restart scheduled");
560 }
561 
562 int
563 sfc_configure(struct sfc_adapter *sa)
564 {
565 	int rc;
566 
567 	sfc_log_init(sa, "entry");
568 
569 	SFC_ASSERT(sfc_adapter_is_locked(sa));
570 
571 	SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
572 		   sa->state == SFC_ADAPTER_CONFIGURED);
573 	sa->state = SFC_ADAPTER_CONFIGURING;
574 
575 	rc = sfc_check_conf(sa);
576 	if (rc != 0)
577 		goto fail_check_conf;
578 
579 	rc = sfc_intr_configure(sa);
580 	if (rc != 0)
581 		goto fail_intr_configure;
582 
583 	rc = sfc_port_configure(sa);
584 	if (rc != 0)
585 		goto fail_port_configure;
586 
587 	rc = sfc_rx_configure(sa);
588 	if (rc != 0)
589 		goto fail_rx_configure;
590 
591 	rc = sfc_tx_configure(sa);
592 	if (rc != 0)
593 		goto fail_tx_configure;
594 
595 	sa->state = SFC_ADAPTER_CONFIGURED;
596 	sfc_log_init(sa, "done");
597 	return 0;
598 
599 fail_tx_configure:
600 	sfc_rx_close(sa);
601 
602 fail_rx_configure:
603 	sfc_port_close(sa);
604 
605 fail_port_configure:
606 	sfc_intr_close(sa);
607 
608 fail_intr_configure:
609 fail_check_conf:
610 	sa->state = SFC_ADAPTER_INITIALIZED;
611 	sfc_log_init(sa, "failed %d", rc);
612 	return rc;
613 }
614 
615 void
616 sfc_close(struct sfc_adapter *sa)
617 {
618 	sfc_log_init(sa, "entry");
619 
620 	SFC_ASSERT(sfc_adapter_is_locked(sa));
621 
622 	SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
623 	sa->state = SFC_ADAPTER_CLOSING;
624 
625 	sfc_tx_close(sa);
626 	sfc_rx_close(sa);
627 	sfc_port_close(sa);
628 	sfc_intr_close(sa);
629 
630 	sa->state = SFC_ADAPTER_INITIALIZED;
631 	sfc_log_init(sa, "done");
632 }
633 
634 static efx_rc_t
635 sfc_find_mem_bar(efsys_pci_config_t *configp, int bar_index,
636 		 efsys_bar_t *barp)
637 {
638 	efsys_bar_t result;
639 	struct rte_pci_device *dev;
640 
641 	memset(&result, 0, sizeof(result));
642 
643 	if (bar_index < 0 || bar_index >= PCI_MAX_RESOURCE)
644 		return EINVAL;
645 
646 	dev = configp->espc_dev;
647 
648 	result.esb_rid = bar_index;
649 	result.esb_dev = dev;
650 	result.esb_base = dev->mem_resource[bar_index].addr;
651 
652 	*barp = result;
653 
654 	return 0;
655 }
656 
657 static int
658 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp)
659 {
660 	struct rte_eth_dev *eth_dev = sa->eth_dev;
661 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
662 	efsys_bar_t *ebp = &sa->mem_bar;
663 	struct rte_mem_resource *res =
664 		&pci_dev->mem_resource[mem_ebrp->ebr_index];
665 
666 	SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
667 	ebp->esb_rid = mem_ebrp->ebr_index;
668 	ebp->esb_dev = pci_dev;
669 	ebp->esb_base = res->addr;
670 
671 	sa->fcw_offset = mem_ebrp->ebr_offset;
672 
673 	return 0;
674 }
675 
676 static void
677 sfc_mem_bar_fini(struct sfc_adapter *sa)
678 {
679 	efsys_bar_t *ebp = &sa->mem_bar;
680 
681 	SFC_BAR_LOCK_DESTROY(ebp);
682 	memset(ebp, 0, sizeof(*ebp));
683 }
684 
685 /*
686  * A fixed RSS key which has a property of being symmetric
687  * (symmetrical flows are distributed to the same CPU)
688  * and also known to give a uniform distribution
689  * (a good distribution of traffic between different CPUs)
690  */
691 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
692 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
693 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
694 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
695 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
696 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
697 };
698 
699 static int
700 sfc_rss_attach(struct sfc_adapter *sa)
701 {
702 	struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
703 	int rc;
704 
705 	rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
706 	if (rc != 0)
707 		goto fail_intr_init;
708 
709 	rc = efx_ev_init(sa->nic);
710 	if (rc != 0)
711 		goto fail_ev_init;
712 
713 	rc = efx_rx_init(sa->nic);
714 	if (rc != 0)
715 		goto fail_rx_init;
716 
717 	rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
718 	if (rc != 0)
719 		goto fail_scale_support_get;
720 
721 	rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
722 	if (rc != 0)
723 		goto fail_hash_support_get;
724 
725 	rc = sfc_rx_hash_init(sa);
726 	if (rc != 0)
727 		goto fail_rx_hash_init;
728 
729 	efx_rx_fini(sa->nic);
730 	efx_ev_fini(sa->nic);
731 	efx_intr_fini(sa->nic);
732 
733 	rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
734 	rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
735 
736 	return 0;
737 
738 fail_rx_hash_init:
739 fail_hash_support_get:
740 fail_scale_support_get:
741 	efx_rx_fini(sa->nic);
742 
743 fail_rx_init:
744 	efx_ev_fini(sa->nic);
745 
746 fail_ev_init:
747 	efx_intr_fini(sa->nic);
748 
749 fail_intr_init:
750 	return rc;
751 }
752 
753 static void
754 sfc_rss_detach(struct sfc_adapter *sa)
755 {
756 	sfc_rx_hash_fini(sa);
757 }
758 
759 int
760 sfc_attach(struct sfc_adapter *sa)
761 {
762 	const efx_nic_cfg_t *encp;
763 	efx_nic_t *enp = sa->nic;
764 	int rc;
765 
766 	sfc_log_init(sa, "entry");
767 
768 	SFC_ASSERT(sfc_adapter_is_locked(sa));
769 
770 	efx_mcdi_new_epoch(enp);
771 
772 	sfc_log_init(sa, "reset nic");
773 	rc = efx_nic_reset(enp);
774 	if (rc != 0)
775 		goto fail_nic_reset;
776 
777 	rc = sfc_sriov_attach(sa);
778 	if (rc != 0)
779 		goto fail_sriov_attach;
780 
781 	/*
782 	 * Probed NIC is sufficient for tunnel init.
783 	 * Initialize tunnel support to be able to use libefx
784 	 * efx_tunnel_config_udp_{add,remove}() in any state and
785 	 * efx_tunnel_reconfigure() on start up.
786 	 */
787 	rc = efx_tunnel_init(enp);
788 	if (rc != 0)
789 		goto fail_tunnel_init;
790 
791 	encp = efx_nic_cfg_get(sa->nic);
792 
793 	/*
794 	 * Make a copy of supported tunnel encapsulations in shared
795 	 * memory to be used on supported Rx packet type classes get.
796 	 */
797 	sa->priv.shared->tunnel_encaps =
798 		encp->enc_tunnel_encapsulations_supported;
799 
800 	if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
801 		sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
802 			  encp->enc_tso_v3_enabled;
803 		if (!sa->tso)
804 			sfc_info(sa, "TSO support isn't available on this adapter");
805 	}
806 
807 	if (sa->tso &&
808 	    (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
809 	     (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
810 	      DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
811 		sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled ||
812 				encp->enc_tso_v3_enabled;
813 		if (!sa->tso_encap)
814 			sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
815 	}
816 
817 	sfc_log_init(sa, "estimate resource limits");
818 	rc = sfc_estimate_resource_limits(sa);
819 	if (rc != 0)
820 		goto fail_estimate_rsrc_limits;
821 
822 	sa->evq_max_entries = encp->enc_evq_max_nevs;
823 	SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
824 
825 	sa->evq_min_entries = encp->enc_evq_min_nevs;
826 	SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
827 
828 	sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
829 	SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
830 
831 	sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
832 	SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
833 
834 	sa->txq_max_entries = encp->enc_txq_max_ndescs;
835 	SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
836 
837 	sa->txq_min_entries = encp->enc_txq_min_ndescs;
838 	SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
839 
840 	rc = sfc_intr_attach(sa);
841 	if (rc != 0)
842 		goto fail_intr_attach;
843 
844 	rc = sfc_ev_attach(sa);
845 	if (rc != 0)
846 		goto fail_ev_attach;
847 
848 	rc = sfc_port_attach(sa);
849 	if (rc != 0)
850 		goto fail_port_attach;
851 
852 	rc = sfc_rss_attach(sa);
853 	if (rc != 0)
854 		goto fail_rss_attach;
855 
856 	rc = sfc_filter_attach(sa);
857 	if (rc != 0)
858 		goto fail_filter_attach;
859 
860 	sfc_log_init(sa, "fini nic");
861 	efx_nic_fini(enp);
862 
863 	sfc_flow_init(sa);
864 
865 	/*
866 	 * Create vSwitch to be able to use VFs when PF is not started yet
867 	 * as DPDK port. VFs should be able to talk to each other even
868 	 * if PF is down.
869 	 */
870 	rc = sfc_sriov_vswitch_create(sa);
871 	if (rc != 0)
872 		goto fail_sriov_vswitch_create;
873 
874 	sa->state = SFC_ADAPTER_INITIALIZED;
875 
876 	sfc_log_init(sa, "done");
877 	return 0;
878 
879 fail_sriov_vswitch_create:
880 	sfc_flow_fini(sa);
881 	sfc_filter_detach(sa);
882 
883 fail_filter_attach:
884 	sfc_rss_detach(sa);
885 
886 fail_rss_attach:
887 	sfc_port_detach(sa);
888 
889 fail_port_attach:
890 	sfc_ev_detach(sa);
891 
892 fail_ev_attach:
893 	sfc_intr_detach(sa);
894 
895 fail_intr_attach:
896 	efx_nic_fini(sa->nic);
897 
898 fail_estimate_rsrc_limits:
899 fail_tunnel_init:
900 	efx_tunnel_fini(sa->nic);
901 	sfc_sriov_detach(sa);
902 
903 fail_sriov_attach:
904 fail_nic_reset:
905 
906 	sfc_log_init(sa, "failed %d", rc);
907 	return rc;
908 }
909 
910 void
911 sfc_detach(struct sfc_adapter *sa)
912 {
913 	sfc_log_init(sa, "entry");
914 
915 	SFC_ASSERT(sfc_adapter_is_locked(sa));
916 
917 	sfc_sriov_vswitch_destroy(sa);
918 
919 	sfc_flow_fini(sa);
920 
921 	sfc_filter_detach(sa);
922 	sfc_rss_detach(sa);
923 	sfc_port_detach(sa);
924 	sfc_ev_detach(sa);
925 	sfc_intr_detach(sa);
926 	efx_tunnel_fini(sa->nic);
927 	sfc_sriov_detach(sa);
928 
929 	sa->state = SFC_ADAPTER_UNINITIALIZED;
930 }
931 
932 static int
933 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
934 			     const char *value_str, void *opaque)
935 {
936 	uint32_t *value = opaque;
937 
938 	if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
939 		*value = EFX_FW_VARIANT_DONT_CARE;
940 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
941 		*value = EFX_FW_VARIANT_FULL_FEATURED;
942 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
943 		*value = EFX_FW_VARIANT_LOW_LATENCY;
944 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
945 		*value = EFX_FW_VARIANT_PACKED_STREAM;
946 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
947 		*value = EFX_FW_VARIANT_DPDK;
948 	else
949 		return -EINVAL;
950 
951 	return 0;
952 }
953 
954 static int
955 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
956 {
957 	efx_nic_fw_info_t enfi;
958 	int rc;
959 
960 	rc = efx_nic_get_fw_version(sa->nic, &enfi);
961 	if (rc != 0)
962 		return rc;
963 	else if (!enfi.enfi_dpcpu_fw_ids_valid)
964 		return ENOTSUP;
965 
966 	/*
967 	 * Firmware variant can be uniquely identified by the RxDPCPU
968 	 * firmware id
969 	 */
970 	switch (enfi.enfi_rx_dpcpu_fw_id) {
971 	case EFX_RXDP_FULL_FEATURED_FW_ID:
972 		*efv = EFX_FW_VARIANT_FULL_FEATURED;
973 		break;
974 
975 	case EFX_RXDP_LOW_LATENCY_FW_ID:
976 		*efv = EFX_FW_VARIANT_LOW_LATENCY;
977 		break;
978 
979 	case EFX_RXDP_PACKED_STREAM_FW_ID:
980 		*efv = EFX_FW_VARIANT_PACKED_STREAM;
981 		break;
982 
983 	case EFX_RXDP_DPDK_FW_ID:
984 		*efv = EFX_FW_VARIANT_DPDK;
985 		break;
986 
987 	default:
988 		/*
989 		 * Other firmware variants are not considered, since they are
990 		 * not supported in the device parameters
991 		 */
992 		*efv = EFX_FW_VARIANT_DONT_CARE;
993 		break;
994 	}
995 
996 	return 0;
997 }
998 
999 static const char *
1000 sfc_fw_variant2str(efx_fw_variant_t efv)
1001 {
1002 	switch (efv) {
1003 	case EFX_RXDP_FULL_FEATURED_FW_ID:
1004 		return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
1005 	case EFX_RXDP_LOW_LATENCY_FW_ID:
1006 		return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
1007 	case EFX_RXDP_PACKED_STREAM_FW_ID:
1008 		return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
1009 	case EFX_RXDP_DPDK_FW_ID:
1010 		return SFC_KVARG_FW_VARIANT_DPDK;
1011 	default:
1012 		return "unknown";
1013 	}
1014 }
1015 
1016 static int
1017 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
1018 {
1019 	int rc;
1020 	long value;
1021 
1022 	value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
1023 
1024 	rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
1025 				sfc_kvarg_long_handler, &value);
1026 	if (rc != 0)
1027 		return rc;
1028 
1029 	if (value < 0 ||
1030 	    (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
1031 		sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
1032 			    "was set (%ld);", value);
1033 		sfc_err(sa, "it must not be less than 0 or greater than %u",
1034 			    EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1035 		return EINVAL;
1036 	}
1037 
1038 	sa->rxd_wait_timeout_ns = value;
1039 	return 0;
1040 }
1041 
1042 static int
1043 sfc_nic_probe(struct sfc_adapter *sa)
1044 {
1045 	efx_nic_t *enp = sa->nic;
1046 	efx_fw_variant_t preferred_efv;
1047 	efx_fw_variant_t efv;
1048 	int rc;
1049 
1050 	preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1051 	rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1052 				sfc_kvarg_fv_variant_handler,
1053 				&preferred_efv);
1054 	if (rc != 0) {
1055 		sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1056 		return rc;
1057 	}
1058 
1059 	rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1060 	if (rc != 0)
1061 		return rc;
1062 
1063 	rc = efx_nic_probe(enp, preferred_efv);
1064 	if (rc == EACCES) {
1065 		/* Unprivileged functions cannot set FW variant */
1066 		rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1067 	}
1068 	if (rc != 0)
1069 		return rc;
1070 
1071 	rc = sfc_get_fw_variant(sa, &efv);
1072 	if (rc == ENOTSUP) {
1073 		sfc_warn(sa, "FW variant can not be obtained");
1074 		return 0;
1075 	}
1076 	if (rc != 0)
1077 		return rc;
1078 
1079 	/* Check that firmware variant was changed to the requested one */
1080 	if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1081 		sfc_warn(sa, "FW variant has not changed to the requested %s",
1082 			 sfc_fw_variant2str(preferred_efv));
1083 	}
1084 
1085 	sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1086 
1087 	return 0;
1088 }
1089 
1090 static efx_rc_t
1091 sfc_pci_config_readd(efsys_pci_config_t *configp, uint32_t offset,
1092 		     efx_dword_t *edp)
1093 {
1094 	int rc;
1095 
1096 	rc = rte_pci_read_config(configp->espc_dev, edp->ed_u32, sizeof(*edp),
1097 				 offset);
1098 
1099 	return (rc < 0 || rc != sizeof(*edp)) ? EIO : 0;
1100 }
1101 
1102 static int
1103 sfc_family(struct sfc_adapter *sa, efx_bar_region_t *mem_ebrp)
1104 {
1105 	struct rte_eth_dev *eth_dev = sa->eth_dev;
1106 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1107 	efsys_pci_config_t espcp;
1108 	static const efx_pci_ops_t ops = {
1109 		.epo_config_readd = sfc_pci_config_readd,
1110 		.epo_find_mem_bar = sfc_find_mem_bar,
1111 	};
1112 	int rc;
1113 
1114 	espcp.espc_dev = pci_dev;
1115 
1116 	rc = efx_family_probe_bar(pci_dev->id.vendor_id,
1117 				  pci_dev->id.device_id,
1118 				  &espcp, &ops, &sa->family, mem_ebrp);
1119 
1120 	return rc;
1121 }
1122 
1123 int
1124 sfc_probe(struct sfc_adapter *sa)
1125 {
1126 	efx_bar_region_t mem_ebrp;
1127 	efx_nic_t *enp;
1128 	int rc;
1129 
1130 	sfc_log_init(sa, "entry");
1131 
1132 	SFC_ASSERT(sfc_adapter_is_locked(sa));
1133 
1134 	sa->socket_id = rte_socket_id();
1135 	rte_atomic32_init(&sa->restart_required);
1136 
1137 	sfc_log_init(sa, "get family");
1138 	rc = sfc_family(sa, &mem_ebrp);
1139 	if (rc != 0)
1140 		goto fail_family;
1141 	sfc_log_init(sa,
1142 		     "family is %u, membar is %u, function control window offset is %lu",
1143 		     sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset);
1144 
1145 	sfc_log_init(sa, "init mem bar");
1146 	rc = sfc_mem_bar_init(sa, &mem_ebrp);
1147 	if (rc != 0)
1148 		goto fail_mem_bar_init;
1149 
1150 	sfc_log_init(sa, "create nic");
1151 	rte_spinlock_init(&sa->nic_lock);
1152 	rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1153 			    &sa->mem_bar, mem_ebrp.ebr_offset,
1154 			    &sa->nic_lock, &enp);
1155 	if (rc != 0)
1156 		goto fail_nic_create;
1157 	sa->nic = enp;
1158 
1159 	rc = sfc_mcdi_init(sa);
1160 	if (rc != 0)
1161 		goto fail_mcdi_init;
1162 
1163 	sfc_log_init(sa, "probe nic");
1164 	rc = sfc_nic_probe(sa);
1165 	if (rc != 0)
1166 		goto fail_nic_probe;
1167 
1168 	sfc_log_init(sa, "done");
1169 	return 0;
1170 
1171 fail_nic_probe:
1172 	sfc_mcdi_fini(sa);
1173 
1174 fail_mcdi_init:
1175 	sfc_log_init(sa, "destroy nic");
1176 	sa->nic = NULL;
1177 	efx_nic_destroy(enp);
1178 
1179 fail_nic_create:
1180 	sfc_mem_bar_fini(sa);
1181 
1182 fail_mem_bar_init:
1183 fail_family:
1184 	sfc_log_init(sa, "failed %d", rc);
1185 	return rc;
1186 }
1187 
1188 void
1189 sfc_unprobe(struct sfc_adapter *sa)
1190 {
1191 	efx_nic_t *enp = sa->nic;
1192 
1193 	sfc_log_init(sa, "entry");
1194 
1195 	SFC_ASSERT(sfc_adapter_is_locked(sa));
1196 
1197 	sfc_log_init(sa, "unprobe nic");
1198 	efx_nic_unprobe(enp);
1199 
1200 	sfc_mcdi_fini(sa);
1201 
1202 	/*
1203 	 * Make sure there is no pending alarm to restart since we are
1204 	 * going to free device private which is passed as the callback
1205 	 * opaque data. A new alarm cannot be scheduled since MCDI is
1206 	 * shut down.
1207 	 */
1208 	rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1209 
1210 	sfc_log_init(sa, "destroy nic");
1211 	sa->nic = NULL;
1212 	efx_nic_destroy(enp);
1213 
1214 	sfc_mem_bar_fini(sa);
1215 
1216 	sfc_flow_fini(sa);
1217 	sa->state = SFC_ADAPTER_UNINITIALIZED;
1218 }
1219 
1220 uint32_t
1221 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1222 		     const char *lt_prefix_str, uint32_t ll_default)
1223 {
1224 	size_t lt_prefix_str_size = strlen(lt_prefix_str);
1225 	size_t lt_str_size_max;
1226 	char *lt_str = NULL;
1227 	int ret;
1228 
1229 	if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1230 		++lt_prefix_str_size; /* Reserve space for prefix separator */
1231 		lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1232 	} else {
1233 		return sfc_logtype_driver;
1234 	}
1235 
1236 	lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1237 	if (lt_str == NULL)
1238 		return sfc_logtype_driver;
1239 
1240 	strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1241 	lt_str[lt_prefix_str_size - 1] = '.';
1242 	rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1243 			    lt_str_size_max - lt_prefix_str_size);
1244 	lt_str[lt_str_size_max - 1] = '\0';
1245 
1246 	ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1247 	rte_free(lt_str);
1248 
1249 	if (ret < 0)
1250 		return sfc_logtype_driver;
1251 
1252 	return ret;
1253 }
1254