xref: /dpdk/drivers/net/sfc/sfc.c (revision f5057be340e44f3edc0fe90fa875eb89a4c49b4f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2020 Xilinx, Inc.
4  * Copyright(c) 2016-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9 
10 /* sysconf() */
11 #include <unistd.h>
12 
13 #include <rte_errno.h>
14 #include <rte_alarm.h>
15 
16 #include "efx.h"
17 
18 #include "sfc.h"
19 #include "sfc_debug.h"
20 #include "sfc_log.h"
21 #include "sfc_ev.h"
22 #include "sfc_rx.h"
23 #include "sfc_tx.h"
24 #include "sfc_kvargs.h"
25 #include "sfc_tweak.h"
26 
27 
28 int
29 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
30 	      size_t len, int socket_id, efsys_mem_t *esmp)
31 {
32 	const struct rte_memzone *mz;
33 
34 	sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
35 		     name, id, len, socket_id);
36 
37 	mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len,
38 				      sysconf(_SC_PAGESIZE), socket_id);
39 	if (mz == NULL) {
40 		sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s",
41 			name, (unsigned int)id, (unsigned int)len, socket_id,
42 			rte_strerror(rte_errno));
43 		return ENOMEM;
44 	}
45 
46 	esmp->esm_addr = mz->iova;
47 	if (esmp->esm_addr == RTE_BAD_IOVA) {
48 		(void)rte_memzone_free(mz);
49 		return EFAULT;
50 	}
51 
52 	esmp->esm_mz = mz;
53 	esmp->esm_base = mz->addr;
54 
55 	return 0;
56 }
57 
58 void
59 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp)
60 {
61 	int rc;
62 
63 	sfc_log_init(sa, "name=%s", esmp->esm_mz->name);
64 
65 	rc = rte_memzone_free(esmp->esm_mz);
66 	if (rc != 0)
67 		sfc_err(sa, "rte_memzone_free(() failed: %d", rc);
68 
69 	memset(esmp, 0, sizeof(*esmp));
70 }
71 
72 static uint32_t
73 sfc_phy_cap_from_link_speeds(uint32_t speeds)
74 {
75 	uint32_t phy_caps = 0;
76 
77 	if (~speeds & ETH_LINK_SPEED_FIXED) {
78 		phy_caps |= (1 << EFX_PHY_CAP_AN);
79 		/*
80 		 * If no speeds are specified in the mask, any supported
81 		 * may be negotiated
82 		 */
83 		if (speeds == ETH_LINK_SPEED_AUTONEG)
84 			phy_caps |=
85 				(1 << EFX_PHY_CAP_1000FDX) |
86 				(1 << EFX_PHY_CAP_10000FDX) |
87 				(1 << EFX_PHY_CAP_25000FDX) |
88 				(1 << EFX_PHY_CAP_40000FDX) |
89 				(1 << EFX_PHY_CAP_50000FDX) |
90 				(1 << EFX_PHY_CAP_100000FDX);
91 	}
92 	if (speeds & ETH_LINK_SPEED_1G)
93 		phy_caps |= (1 << EFX_PHY_CAP_1000FDX);
94 	if (speeds & ETH_LINK_SPEED_10G)
95 		phy_caps |= (1 << EFX_PHY_CAP_10000FDX);
96 	if (speeds & ETH_LINK_SPEED_25G)
97 		phy_caps |= (1 << EFX_PHY_CAP_25000FDX);
98 	if (speeds & ETH_LINK_SPEED_40G)
99 		phy_caps |= (1 << EFX_PHY_CAP_40000FDX);
100 	if (speeds & ETH_LINK_SPEED_50G)
101 		phy_caps |= (1 << EFX_PHY_CAP_50000FDX);
102 	if (speeds & ETH_LINK_SPEED_100G)
103 		phy_caps |= (1 << EFX_PHY_CAP_100000FDX);
104 
105 	return phy_caps;
106 }
107 
108 /*
109  * Check requested device level configuration.
110  * Receive and transmit configuration is checked in corresponding
111  * modules.
112  */
113 static int
114 sfc_check_conf(struct sfc_adapter *sa)
115 {
116 	const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf;
117 	int rc = 0;
118 
119 	sa->port.phy_adv_cap =
120 		sfc_phy_cap_from_link_speeds(conf->link_speeds) &
121 		sa->port.phy_adv_cap_mask;
122 	if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) {
123 		sfc_err(sa, "No link speeds from mask %#x are supported",
124 			conf->link_speeds);
125 		rc = EINVAL;
126 	}
127 
128 #if !EFSYS_OPT_LOOPBACK
129 	if (conf->lpbk_mode != 0) {
130 		sfc_err(sa, "Loopback not supported");
131 		rc = EINVAL;
132 	}
133 #endif
134 
135 	if (conf->dcb_capability_en != 0) {
136 		sfc_err(sa, "Priority-based flow control not supported");
137 		rc = EINVAL;
138 	}
139 
140 	if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
141 		sfc_err(sa, "Flow Director not supported");
142 		rc = EINVAL;
143 	}
144 
145 	if ((conf->intr_conf.lsc != 0) &&
146 	    (sa->intr.type != EFX_INTR_LINE) &&
147 	    (sa->intr.type != EFX_INTR_MESSAGE)) {
148 		sfc_err(sa, "Link status change interrupt not supported");
149 		rc = EINVAL;
150 	}
151 
152 	if (conf->intr_conf.rxq != 0 &&
153 	    (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) {
154 		sfc_err(sa, "Receive queue interrupt not supported");
155 		rc = EINVAL;
156 	}
157 
158 	return rc;
159 }
160 
161 /*
162  * Find out maximum number of receive and transmit queues which could be
163  * advertised.
164  *
165  * NIC is kept initialized on success to allow other modules acquire
166  * defaults and capabilities.
167  */
168 static int
169 sfc_estimate_resource_limits(struct sfc_adapter *sa)
170 {
171 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
172 	efx_drv_limits_t limits;
173 	int rc;
174 	uint32_t evq_allocated;
175 	uint32_t rxq_allocated;
176 	uint32_t txq_allocated;
177 
178 	memset(&limits, 0, sizeof(limits));
179 
180 	/* Request at least one Rx and Tx queue */
181 	limits.edl_min_rxq_count = 1;
182 	limits.edl_min_txq_count = 1;
183 	/* Management event queue plus event queue for each Tx and Rx queue */
184 	limits.edl_min_evq_count =
185 		1 + limits.edl_min_rxq_count + limits.edl_min_txq_count;
186 
187 	/* Divide by number of functions to guarantee that all functions
188 	 * will get promised resources
189 	 */
190 	/* FIXME Divide by number of functions (not 2) below */
191 	limits.edl_max_evq_count = encp->enc_evq_limit / 2;
192 	SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
193 
194 	/* Split equally between receive and transmit */
195 	limits.edl_max_rxq_count =
196 		MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2);
197 	SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
198 
199 	limits.edl_max_txq_count =
200 		MIN(encp->enc_txq_limit,
201 		    limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
202 
203 	if (sa->tso)
204 		limits.edl_max_txq_count =
205 			MIN(limits.edl_max_txq_count,
206 			    encp->enc_fw_assisted_tso_v2_n_contexts /
207 			    encp->enc_hw_pf_count);
208 
209 	SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
210 
211 	/* Configure the minimum required resources needed for the
212 	 * driver to operate, and the maximum desired resources that the
213 	 * driver is capable of using.
214 	 */
215 	efx_nic_set_drv_limits(sa->nic, &limits);
216 
217 	sfc_log_init(sa, "init nic");
218 	rc = efx_nic_init(sa->nic);
219 	if (rc != 0)
220 		goto fail_nic_init;
221 
222 	/* Find resource dimensions assigned by firmware to this function */
223 	rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated,
224 				 &txq_allocated);
225 	if (rc != 0)
226 		goto fail_get_vi_pool;
227 
228 	/* It still may allocate more than maximum, ensure limit */
229 	evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count);
230 	rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count);
231 	txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count);
232 
233 	/* Subtract management EVQ not used for traffic */
234 	SFC_ASSERT(evq_allocated > 0);
235 	evq_allocated--;
236 
237 	/* Right now we use separate EVQ for Rx and Tx */
238 	sa->rxq_max = MIN(rxq_allocated, evq_allocated / 2);
239 	sa->txq_max = MIN(txq_allocated, evq_allocated - sa->rxq_max);
240 
241 	/* Keep NIC initialized */
242 	return 0;
243 
244 fail_get_vi_pool:
245 	efx_nic_fini(sa->nic);
246 fail_nic_init:
247 	return rc;
248 }
249 
250 static int
251 sfc_set_drv_limits(struct sfc_adapter *sa)
252 {
253 	const struct rte_eth_dev_data *data = sa->eth_dev->data;
254 	efx_drv_limits_t lim;
255 
256 	memset(&lim, 0, sizeof(lim));
257 
258 	/* Limits are strict since take into account initial estimation */
259 	lim.edl_min_evq_count = lim.edl_max_evq_count =
260 		1 + data->nb_rx_queues + data->nb_tx_queues;
261 	lim.edl_min_rxq_count = lim.edl_max_rxq_count = data->nb_rx_queues;
262 	lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues;
263 
264 	return efx_nic_set_drv_limits(sa->nic, &lim);
265 }
266 
267 static int
268 sfc_set_fw_subvariant(struct sfc_adapter *sa)
269 {
270 	struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
271 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
272 	uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads;
273 	unsigned int txq_index;
274 	efx_nic_fw_subvariant_t req_fw_subvariant;
275 	efx_nic_fw_subvariant_t cur_fw_subvariant;
276 	int rc;
277 
278 	if (!encp->enc_fw_subvariant_no_tx_csum_supported) {
279 		sfc_info(sa, "no-Tx-checksum subvariant not supported");
280 		return 0;
281 	}
282 
283 	for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) {
284 		struct sfc_txq_info *txq_info = &sas->txq_info[txq_index];
285 
286 		if (txq_info->state & SFC_TXQ_INITIALIZED)
287 			tx_offloads |= txq_info->offloads;
288 	}
289 
290 	if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
291 			   DEV_TX_OFFLOAD_TCP_CKSUM |
292 			   DEV_TX_OFFLOAD_UDP_CKSUM |
293 			   DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM))
294 		req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT;
295 	else
296 		req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM;
297 
298 	rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant);
299 	if (rc != 0) {
300 		sfc_err(sa, "failed to get FW subvariant: %d", rc);
301 		return rc;
302 	}
303 	sfc_info(sa, "FW subvariant is %u vs required %u",
304 		 cur_fw_subvariant, req_fw_subvariant);
305 
306 	if (cur_fw_subvariant == req_fw_subvariant)
307 		return 0;
308 
309 	rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant);
310 	if (rc != 0) {
311 		sfc_err(sa, "failed to set FW subvariant %u: %d",
312 			req_fw_subvariant, rc);
313 		return rc;
314 	}
315 	sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant);
316 
317 	return 0;
318 }
319 
320 static int
321 sfc_try_start(struct sfc_adapter *sa)
322 {
323 	const efx_nic_cfg_t *encp;
324 	int rc;
325 
326 	sfc_log_init(sa, "entry");
327 
328 	SFC_ASSERT(sfc_adapter_is_locked(sa));
329 	SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING);
330 
331 	sfc_log_init(sa, "set FW subvariant");
332 	rc = sfc_set_fw_subvariant(sa);
333 	if (rc != 0)
334 		goto fail_set_fw_subvariant;
335 
336 	sfc_log_init(sa, "set resource limits");
337 	rc = sfc_set_drv_limits(sa);
338 	if (rc != 0)
339 		goto fail_set_drv_limits;
340 
341 	sfc_log_init(sa, "init nic");
342 	rc = efx_nic_init(sa->nic);
343 	if (rc != 0)
344 		goto fail_nic_init;
345 
346 	encp = efx_nic_cfg_get(sa->nic);
347 
348 	/*
349 	 * Refresh (since it may change on NIC reset/restart) a copy of
350 	 * supported tunnel encapsulations in shared memory to be used
351 	 * on supported Rx packet type classes get.
352 	 */
353 	sa->priv.shared->tunnel_encaps =
354 		encp->enc_tunnel_encapsulations_supported;
355 
356 	if (encp->enc_tunnel_encapsulations_supported != 0) {
357 		sfc_log_init(sa, "apply tunnel config");
358 		rc = efx_tunnel_reconfigure(sa->nic);
359 		if (rc != 0)
360 			goto fail_tunnel_reconfigure;
361 	}
362 
363 	rc = sfc_intr_start(sa);
364 	if (rc != 0)
365 		goto fail_intr_start;
366 
367 	rc = sfc_ev_start(sa);
368 	if (rc != 0)
369 		goto fail_ev_start;
370 
371 	rc = sfc_port_start(sa);
372 	if (rc != 0)
373 		goto fail_port_start;
374 
375 	rc = sfc_rx_start(sa);
376 	if (rc != 0)
377 		goto fail_rx_start;
378 
379 	rc = sfc_tx_start(sa);
380 	if (rc != 0)
381 		goto fail_tx_start;
382 
383 	rc = sfc_flow_start(sa);
384 	if (rc != 0)
385 		goto fail_flows_insert;
386 
387 	sfc_log_init(sa, "done");
388 	return 0;
389 
390 fail_flows_insert:
391 	sfc_tx_stop(sa);
392 
393 fail_tx_start:
394 	sfc_rx_stop(sa);
395 
396 fail_rx_start:
397 	sfc_port_stop(sa);
398 
399 fail_port_start:
400 	sfc_ev_stop(sa);
401 
402 fail_ev_start:
403 	sfc_intr_stop(sa);
404 
405 fail_intr_start:
406 fail_tunnel_reconfigure:
407 	efx_nic_fini(sa->nic);
408 
409 fail_nic_init:
410 fail_set_drv_limits:
411 fail_set_fw_subvariant:
412 	sfc_log_init(sa, "failed %d", rc);
413 	return rc;
414 }
415 
416 int
417 sfc_start(struct sfc_adapter *sa)
418 {
419 	unsigned int start_tries = 3;
420 	int rc;
421 
422 	sfc_log_init(sa, "entry");
423 
424 	SFC_ASSERT(sfc_adapter_is_locked(sa));
425 
426 	switch (sa->state) {
427 	case SFC_ADAPTER_CONFIGURED:
428 		break;
429 	case SFC_ADAPTER_STARTED:
430 		sfc_notice(sa, "already started");
431 		return 0;
432 	default:
433 		rc = EINVAL;
434 		goto fail_bad_state;
435 	}
436 
437 	sa->state = SFC_ADAPTER_STARTING;
438 
439 	rc = 0;
440 	do {
441 		/*
442 		 * FIXME Try to recreate vSwitch on start retry.
443 		 * vSwitch is absent after MC reboot like events and
444 		 * we should recreate it. May be we need proper
445 		 * indication instead of guessing.
446 		 */
447 		if (rc != 0) {
448 			sfc_sriov_vswitch_destroy(sa);
449 			rc = sfc_sriov_vswitch_create(sa);
450 			if (rc != 0)
451 				goto fail_sriov_vswitch_create;
452 		}
453 		rc = sfc_try_start(sa);
454 	} while ((--start_tries > 0) &&
455 		 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL));
456 
457 	if (rc != 0)
458 		goto fail_try_start;
459 
460 	sa->state = SFC_ADAPTER_STARTED;
461 	sfc_log_init(sa, "done");
462 	return 0;
463 
464 fail_try_start:
465 fail_sriov_vswitch_create:
466 	sa->state = SFC_ADAPTER_CONFIGURED;
467 fail_bad_state:
468 	sfc_log_init(sa, "failed %d", rc);
469 	return rc;
470 }
471 
472 void
473 sfc_stop(struct sfc_adapter *sa)
474 {
475 	sfc_log_init(sa, "entry");
476 
477 	SFC_ASSERT(sfc_adapter_is_locked(sa));
478 
479 	switch (sa->state) {
480 	case SFC_ADAPTER_STARTED:
481 		break;
482 	case SFC_ADAPTER_CONFIGURED:
483 		sfc_notice(sa, "already stopped");
484 		return;
485 	default:
486 		sfc_err(sa, "stop in unexpected state %u", sa->state);
487 		SFC_ASSERT(B_FALSE);
488 		return;
489 	}
490 
491 	sa->state = SFC_ADAPTER_STOPPING;
492 
493 	sfc_flow_stop(sa);
494 	sfc_tx_stop(sa);
495 	sfc_rx_stop(sa);
496 	sfc_port_stop(sa);
497 	sfc_ev_stop(sa);
498 	sfc_intr_stop(sa);
499 	efx_nic_fini(sa->nic);
500 
501 	sa->state = SFC_ADAPTER_CONFIGURED;
502 	sfc_log_init(sa, "done");
503 }
504 
505 static int
506 sfc_restart(struct sfc_adapter *sa)
507 {
508 	int rc;
509 
510 	SFC_ASSERT(sfc_adapter_is_locked(sa));
511 
512 	if (sa->state != SFC_ADAPTER_STARTED)
513 		return EINVAL;
514 
515 	sfc_stop(sa);
516 
517 	rc = sfc_start(sa);
518 	if (rc != 0)
519 		sfc_err(sa, "restart failed");
520 
521 	return rc;
522 }
523 
524 static void
525 sfc_restart_if_required(void *arg)
526 {
527 	struct sfc_adapter *sa = arg;
528 
529 	/* If restart is scheduled, clear the flag and do it */
530 	if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required,
531 				1, 0)) {
532 		sfc_adapter_lock(sa);
533 		if (sa->state == SFC_ADAPTER_STARTED)
534 			(void)sfc_restart(sa);
535 		sfc_adapter_unlock(sa);
536 	}
537 }
538 
539 void
540 sfc_schedule_restart(struct sfc_adapter *sa)
541 {
542 	int rc;
543 
544 	/* Schedule restart alarm if it is not scheduled yet */
545 	if (!rte_atomic32_test_and_set(&sa->restart_required))
546 		return;
547 
548 	rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa);
549 	if (rc == -ENOTSUP)
550 		sfc_warn(sa, "alarms are not supported, restart is pending");
551 	else if (rc != 0)
552 		sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc);
553 	else
554 		sfc_notice(sa, "restart scheduled");
555 }
556 
557 int
558 sfc_configure(struct sfc_adapter *sa)
559 {
560 	int rc;
561 
562 	sfc_log_init(sa, "entry");
563 
564 	SFC_ASSERT(sfc_adapter_is_locked(sa));
565 
566 	SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED ||
567 		   sa->state == SFC_ADAPTER_CONFIGURED);
568 	sa->state = SFC_ADAPTER_CONFIGURING;
569 
570 	rc = sfc_check_conf(sa);
571 	if (rc != 0)
572 		goto fail_check_conf;
573 
574 	rc = sfc_intr_configure(sa);
575 	if (rc != 0)
576 		goto fail_intr_configure;
577 
578 	rc = sfc_port_configure(sa);
579 	if (rc != 0)
580 		goto fail_port_configure;
581 
582 	rc = sfc_rx_configure(sa);
583 	if (rc != 0)
584 		goto fail_rx_configure;
585 
586 	rc = sfc_tx_configure(sa);
587 	if (rc != 0)
588 		goto fail_tx_configure;
589 
590 	sa->state = SFC_ADAPTER_CONFIGURED;
591 	sfc_log_init(sa, "done");
592 	return 0;
593 
594 fail_tx_configure:
595 	sfc_rx_close(sa);
596 
597 fail_rx_configure:
598 	sfc_port_close(sa);
599 
600 fail_port_configure:
601 	sfc_intr_close(sa);
602 
603 fail_intr_configure:
604 fail_check_conf:
605 	sa->state = SFC_ADAPTER_INITIALIZED;
606 	sfc_log_init(sa, "failed %d", rc);
607 	return rc;
608 }
609 
610 void
611 sfc_close(struct sfc_adapter *sa)
612 {
613 	sfc_log_init(sa, "entry");
614 
615 	SFC_ASSERT(sfc_adapter_is_locked(sa));
616 
617 	SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED);
618 	sa->state = SFC_ADAPTER_CLOSING;
619 
620 	sfc_tx_close(sa);
621 	sfc_rx_close(sa);
622 	sfc_port_close(sa);
623 	sfc_intr_close(sa);
624 
625 	sa->state = SFC_ADAPTER_INITIALIZED;
626 	sfc_log_init(sa, "done");
627 }
628 
629 static int
630 sfc_mem_bar_init(struct sfc_adapter *sa, unsigned int membar)
631 {
632 	struct rte_eth_dev *eth_dev = sa->eth_dev;
633 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
634 	efsys_bar_t *ebp = &sa->mem_bar;
635 	struct rte_mem_resource *res = &pci_dev->mem_resource[membar];
636 
637 	SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name);
638 	ebp->esb_rid = membar;
639 	ebp->esb_dev = pci_dev;
640 	ebp->esb_base = res->addr;
641 	return 0;
642 }
643 
644 static void
645 sfc_mem_bar_fini(struct sfc_adapter *sa)
646 {
647 	efsys_bar_t *ebp = &sa->mem_bar;
648 
649 	SFC_BAR_LOCK_DESTROY(ebp);
650 	memset(ebp, 0, sizeof(*ebp));
651 }
652 
653 /*
654  * A fixed RSS key which has a property of being symmetric
655  * (symmetrical flows are distributed to the same CPU)
656  * and also known to give a uniform distribution
657  * (a good distribution of traffic between different CPUs)
658  */
659 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
660 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
661 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
662 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
663 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
664 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
665 };
666 
667 static int
668 sfc_rss_attach(struct sfc_adapter *sa)
669 {
670 	struct sfc_rss *rss = &sfc_sa2shared(sa)->rss;
671 	int rc;
672 
673 	rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
674 	if (rc != 0)
675 		goto fail_intr_init;
676 
677 	rc = efx_ev_init(sa->nic);
678 	if (rc != 0)
679 		goto fail_ev_init;
680 
681 	rc = efx_rx_init(sa->nic);
682 	if (rc != 0)
683 		goto fail_rx_init;
684 
685 	rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type);
686 	if (rc != 0)
687 		goto fail_scale_support_get;
688 
689 	rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support);
690 	if (rc != 0)
691 		goto fail_hash_support_get;
692 
693 	rc = sfc_rx_hash_init(sa);
694 	if (rc != 0)
695 		goto fail_rx_hash_init;
696 
697 	efx_rx_fini(sa->nic);
698 	efx_ev_fini(sa->nic);
699 	efx_intr_fini(sa->nic);
700 
701 	rte_memcpy(rss->key, default_rss_key, sizeof(rss->key));
702 	rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
703 
704 	return 0;
705 
706 fail_rx_hash_init:
707 fail_hash_support_get:
708 fail_scale_support_get:
709 	efx_rx_fini(sa->nic);
710 
711 fail_rx_init:
712 	efx_ev_fini(sa->nic);
713 
714 fail_ev_init:
715 	efx_intr_fini(sa->nic);
716 
717 fail_intr_init:
718 	return rc;
719 }
720 
721 static void
722 sfc_rss_detach(struct sfc_adapter *sa)
723 {
724 	sfc_rx_hash_fini(sa);
725 }
726 
727 int
728 sfc_attach(struct sfc_adapter *sa)
729 {
730 	const efx_nic_cfg_t *encp;
731 	efx_nic_t *enp = sa->nic;
732 	int rc;
733 
734 	sfc_log_init(sa, "entry");
735 
736 	SFC_ASSERT(sfc_adapter_is_locked(sa));
737 
738 	efx_mcdi_new_epoch(enp);
739 
740 	sfc_log_init(sa, "reset nic");
741 	rc = efx_nic_reset(enp);
742 	if (rc != 0)
743 		goto fail_nic_reset;
744 
745 	rc = sfc_sriov_attach(sa);
746 	if (rc != 0)
747 		goto fail_sriov_attach;
748 
749 	/*
750 	 * Probed NIC is sufficient for tunnel init.
751 	 * Initialize tunnel support to be able to use libefx
752 	 * efx_tunnel_config_udp_{add,remove}() in any state and
753 	 * efx_tunnel_reconfigure() on start up.
754 	 */
755 	rc = efx_tunnel_init(enp);
756 	if (rc != 0)
757 		goto fail_tunnel_init;
758 
759 	encp = efx_nic_cfg_get(sa->nic);
760 
761 	/*
762 	 * Make a copy of supported tunnel encapsulations in shared
763 	 * memory to be used on supported Rx packet type classes get.
764 	 */
765 	sa->priv.shared->tunnel_encaps =
766 		encp->enc_tunnel_encapsulations_supported;
767 
768 	if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
769 		sa->tso = encp->enc_fw_assisted_tso_v2_enabled;
770 		if (!sa->tso)
771 			sfc_info(sa, "TSO support isn't available on this adapter");
772 	}
773 
774 	if (sa->tso &&
775 	    (sfc_dp_tx_offload_capa(sa->priv.dp_tx) &
776 	     (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
777 	      DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) {
778 		sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled;
779 		if (!sa->tso_encap)
780 			sfc_info(sa, "Encapsulated TSO support isn't available on this adapter");
781 	}
782 
783 	sfc_log_init(sa, "estimate resource limits");
784 	rc = sfc_estimate_resource_limits(sa);
785 	if (rc != 0)
786 		goto fail_estimate_rsrc_limits;
787 
788 	sa->evq_max_entries = encp->enc_evq_max_nevs;
789 	SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries));
790 
791 	sa->evq_min_entries = encp->enc_evq_min_nevs;
792 	SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries));
793 
794 	sa->rxq_max_entries = encp->enc_rxq_max_ndescs;
795 	SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries));
796 
797 	sa->rxq_min_entries = encp->enc_rxq_min_ndescs;
798 	SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries));
799 
800 	sa->txq_max_entries = encp->enc_txq_max_ndescs;
801 	SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries));
802 
803 	sa->txq_min_entries = encp->enc_txq_min_ndescs;
804 	SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
805 
806 	rc = sfc_intr_attach(sa);
807 	if (rc != 0)
808 		goto fail_intr_attach;
809 
810 	rc = sfc_ev_attach(sa);
811 	if (rc != 0)
812 		goto fail_ev_attach;
813 
814 	rc = sfc_port_attach(sa);
815 	if (rc != 0)
816 		goto fail_port_attach;
817 
818 	rc = sfc_rss_attach(sa);
819 	if (rc != 0)
820 		goto fail_rss_attach;
821 
822 	rc = sfc_filter_attach(sa);
823 	if (rc != 0)
824 		goto fail_filter_attach;
825 
826 	sfc_log_init(sa, "fini nic");
827 	efx_nic_fini(enp);
828 
829 	sfc_flow_init(sa);
830 
831 	/*
832 	 * Create vSwitch to be able to use VFs when PF is not started yet
833 	 * as DPDK port. VFs should be able to talk to each other even
834 	 * if PF is down.
835 	 */
836 	rc = sfc_sriov_vswitch_create(sa);
837 	if (rc != 0)
838 		goto fail_sriov_vswitch_create;
839 
840 	sa->state = SFC_ADAPTER_INITIALIZED;
841 
842 	sfc_log_init(sa, "done");
843 	return 0;
844 
845 fail_sriov_vswitch_create:
846 	sfc_flow_fini(sa);
847 	sfc_filter_detach(sa);
848 
849 fail_filter_attach:
850 	sfc_rss_detach(sa);
851 
852 fail_rss_attach:
853 	sfc_port_detach(sa);
854 
855 fail_port_attach:
856 	sfc_ev_detach(sa);
857 
858 fail_ev_attach:
859 	sfc_intr_detach(sa);
860 
861 fail_intr_attach:
862 	efx_nic_fini(sa->nic);
863 
864 fail_estimate_rsrc_limits:
865 fail_tunnel_init:
866 	efx_tunnel_fini(sa->nic);
867 	sfc_sriov_detach(sa);
868 
869 fail_sriov_attach:
870 fail_nic_reset:
871 
872 	sfc_log_init(sa, "failed %d", rc);
873 	return rc;
874 }
875 
876 void
877 sfc_detach(struct sfc_adapter *sa)
878 {
879 	sfc_log_init(sa, "entry");
880 
881 	SFC_ASSERT(sfc_adapter_is_locked(sa));
882 
883 	sfc_sriov_vswitch_destroy(sa);
884 
885 	sfc_flow_fini(sa);
886 
887 	sfc_filter_detach(sa);
888 	sfc_rss_detach(sa);
889 	sfc_port_detach(sa);
890 	sfc_ev_detach(sa);
891 	sfc_intr_detach(sa);
892 	efx_tunnel_fini(sa->nic);
893 	sfc_sriov_detach(sa);
894 
895 	sa->state = SFC_ADAPTER_UNINITIALIZED;
896 }
897 
898 static int
899 sfc_kvarg_fv_variant_handler(__rte_unused const char *key,
900 			     const char *value_str, void *opaque)
901 {
902 	uint32_t *value = opaque;
903 
904 	if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0)
905 		*value = EFX_FW_VARIANT_DONT_CARE;
906 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0)
907 		*value = EFX_FW_VARIANT_FULL_FEATURED;
908 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0)
909 		*value = EFX_FW_VARIANT_LOW_LATENCY;
910 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0)
911 		*value = EFX_FW_VARIANT_PACKED_STREAM;
912 	else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0)
913 		*value = EFX_FW_VARIANT_DPDK;
914 	else
915 		return -EINVAL;
916 
917 	return 0;
918 }
919 
920 static int
921 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv)
922 {
923 	efx_nic_fw_info_t enfi;
924 	int rc;
925 
926 	rc = efx_nic_get_fw_version(sa->nic, &enfi);
927 	if (rc != 0)
928 		return rc;
929 	else if (!enfi.enfi_dpcpu_fw_ids_valid)
930 		return ENOTSUP;
931 
932 	/*
933 	 * Firmware variant can be uniquely identified by the RxDPCPU
934 	 * firmware id
935 	 */
936 	switch (enfi.enfi_rx_dpcpu_fw_id) {
937 	case EFX_RXDP_FULL_FEATURED_FW_ID:
938 		*efv = EFX_FW_VARIANT_FULL_FEATURED;
939 		break;
940 
941 	case EFX_RXDP_LOW_LATENCY_FW_ID:
942 		*efv = EFX_FW_VARIANT_LOW_LATENCY;
943 		break;
944 
945 	case EFX_RXDP_PACKED_STREAM_FW_ID:
946 		*efv = EFX_FW_VARIANT_PACKED_STREAM;
947 		break;
948 
949 	case EFX_RXDP_DPDK_FW_ID:
950 		*efv = EFX_FW_VARIANT_DPDK;
951 		break;
952 
953 	default:
954 		/*
955 		 * Other firmware variants are not considered, since they are
956 		 * not supported in the device parameters
957 		 */
958 		*efv = EFX_FW_VARIANT_DONT_CARE;
959 		break;
960 	}
961 
962 	return 0;
963 }
964 
965 static const char *
966 sfc_fw_variant2str(efx_fw_variant_t efv)
967 {
968 	switch (efv) {
969 	case EFX_RXDP_FULL_FEATURED_FW_ID:
970 		return SFC_KVARG_FW_VARIANT_FULL_FEATURED;
971 	case EFX_RXDP_LOW_LATENCY_FW_ID:
972 		return SFC_KVARG_FW_VARIANT_LOW_LATENCY;
973 	case EFX_RXDP_PACKED_STREAM_FW_ID:
974 		return SFC_KVARG_FW_VARIANT_PACKED_STREAM;
975 	case EFX_RXDP_DPDK_FW_ID:
976 		return SFC_KVARG_FW_VARIANT_DPDK;
977 	default:
978 		return "unknown";
979 	}
980 }
981 
982 static int
983 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa)
984 {
985 	int rc;
986 	long value;
987 
988 	value = SFC_RXD_WAIT_TIMEOUT_NS_DEF;
989 
990 	rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS,
991 				sfc_kvarg_long_handler, &value);
992 	if (rc != 0)
993 		return rc;
994 
995 	if (value < 0 ||
996 	    (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) {
997 		sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' "
998 			    "was set (%ld);", value);
999 		sfc_err(sa, "it must not be less than 0 or greater than %u",
1000 			    EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX);
1001 		return EINVAL;
1002 	}
1003 
1004 	sa->rxd_wait_timeout_ns = value;
1005 	return 0;
1006 }
1007 
1008 static int
1009 sfc_nic_probe(struct sfc_adapter *sa)
1010 {
1011 	efx_nic_t *enp = sa->nic;
1012 	efx_fw_variant_t preferred_efv;
1013 	efx_fw_variant_t efv;
1014 	int rc;
1015 
1016 	preferred_efv = EFX_FW_VARIANT_DONT_CARE;
1017 	rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT,
1018 				sfc_kvarg_fv_variant_handler,
1019 				&preferred_efv);
1020 	if (rc != 0) {
1021 		sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT);
1022 		return rc;
1023 	}
1024 
1025 	rc = sfc_kvarg_rxd_wait_timeout_ns(sa);
1026 	if (rc != 0)
1027 		return rc;
1028 
1029 	rc = efx_nic_probe(enp, preferred_efv);
1030 	if (rc == EACCES) {
1031 		/* Unprivileged functions cannot set FW variant */
1032 		rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
1033 	}
1034 	if (rc != 0)
1035 		return rc;
1036 
1037 	rc = sfc_get_fw_variant(sa, &efv);
1038 	if (rc == ENOTSUP) {
1039 		sfc_warn(sa, "FW variant can not be obtained");
1040 		return 0;
1041 	}
1042 	if (rc != 0)
1043 		return rc;
1044 
1045 	/* Check that firmware variant was changed to the requested one */
1046 	if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) {
1047 		sfc_warn(sa, "FW variant has not changed to the requested %s",
1048 			 sfc_fw_variant2str(preferred_efv));
1049 	}
1050 
1051 	sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv));
1052 
1053 	return 0;
1054 }
1055 
1056 int
1057 sfc_probe(struct sfc_adapter *sa)
1058 {
1059 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(sa->eth_dev);
1060 	unsigned int membar;
1061 	efx_nic_t *enp;
1062 	int rc;
1063 
1064 	sfc_log_init(sa, "entry");
1065 
1066 	SFC_ASSERT(sfc_adapter_is_locked(sa));
1067 
1068 	sa->socket_id = rte_socket_id();
1069 	rte_atomic32_init(&sa->restart_required);
1070 
1071 	sfc_log_init(sa, "get family");
1072 	rc = efx_family(pci_dev->id.vendor_id, pci_dev->id.device_id,
1073 			&sa->family, &membar);
1074 	if (rc != 0)
1075 		goto fail_family;
1076 	sfc_log_init(sa, "family is %u, membar is %u", sa->family, membar);
1077 
1078 	sfc_log_init(sa, "init mem bar");
1079 	rc = sfc_mem_bar_init(sa, membar);
1080 	if (rc != 0)
1081 		goto fail_mem_bar_init;
1082 
1083 	sfc_log_init(sa, "create nic");
1084 	rte_spinlock_init(&sa->nic_lock);
1085 	rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa,
1086 			    &sa->mem_bar, 0,
1087 			    &sa->nic_lock, &enp);
1088 	if (rc != 0)
1089 		goto fail_nic_create;
1090 	sa->nic = enp;
1091 
1092 	rc = sfc_mcdi_init(sa);
1093 	if (rc != 0)
1094 		goto fail_mcdi_init;
1095 
1096 	sfc_log_init(sa, "probe nic");
1097 	rc = sfc_nic_probe(sa);
1098 	if (rc != 0)
1099 		goto fail_nic_probe;
1100 
1101 	sfc_log_init(sa, "done");
1102 	return 0;
1103 
1104 fail_nic_probe:
1105 	sfc_mcdi_fini(sa);
1106 
1107 fail_mcdi_init:
1108 	sfc_log_init(sa, "destroy nic");
1109 	sa->nic = NULL;
1110 	efx_nic_destroy(enp);
1111 
1112 fail_nic_create:
1113 	sfc_mem_bar_fini(sa);
1114 
1115 fail_mem_bar_init:
1116 fail_family:
1117 	sfc_log_init(sa, "failed %d", rc);
1118 	return rc;
1119 }
1120 
1121 void
1122 sfc_unprobe(struct sfc_adapter *sa)
1123 {
1124 	efx_nic_t *enp = sa->nic;
1125 
1126 	sfc_log_init(sa, "entry");
1127 
1128 	SFC_ASSERT(sfc_adapter_is_locked(sa));
1129 
1130 	sfc_log_init(sa, "unprobe nic");
1131 	efx_nic_unprobe(enp);
1132 
1133 	sfc_mcdi_fini(sa);
1134 
1135 	/*
1136 	 * Make sure there is no pending alarm to restart since we are
1137 	 * going to free device private which is passed as the callback
1138 	 * opaque data. A new alarm cannot be scheduled since MCDI is
1139 	 * shut down.
1140 	 */
1141 	rte_eal_alarm_cancel(sfc_restart_if_required, sa);
1142 
1143 	sfc_log_init(sa, "destroy nic");
1144 	sa->nic = NULL;
1145 	efx_nic_destroy(enp);
1146 
1147 	sfc_mem_bar_fini(sa);
1148 
1149 	sfc_flow_fini(sa);
1150 	sa->state = SFC_ADAPTER_UNINITIALIZED;
1151 }
1152 
1153 uint32_t
1154 sfc_register_logtype(const struct rte_pci_addr *pci_addr,
1155 		     const char *lt_prefix_str, uint32_t ll_default)
1156 {
1157 	size_t lt_prefix_str_size = strlen(lt_prefix_str);
1158 	size_t lt_str_size_max;
1159 	char *lt_str = NULL;
1160 	int ret;
1161 
1162 	if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) {
1163 		++lt_prefix_str_size; /* Reserve space for prefix separator */
1164 		lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1;
1165 	} else {
1166 		return sfc_logtype_driver;
1167 	}
1168 
1169 	lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0);
1170 	if (lt_str == NULL)
1171 		return sfc_logtype_driver;
1172 
1173 	strncpy(lt_str, lt_prefix_str, lt_prefix_str_size);
1174 	lt_str[lt_prefix_str_size - 1] = '.';
1175 	rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size,
1176 			    lt_str_size_max - lt_prefix_str_size);
1177 	lt_str[lt_str_size_max - 1] = '\0';
1178 
1179 	ret = rte_log_register_type_and_pick_level(lt_str, ll_default);
1180 	rte_free(lt_str);
1181 
1182 	if (ret < 0)
1183 		return sfc_logtype_driver;
1184 
1185 	return ret;
1186 }
1187