1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2019-2021 Xilinx, Inc. 4 * Copyright(c) 2016-2019 Solarflare Communications Inc. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 /* sysconf() */ 11 #include <unistd.h> 12 13 #include <rte_errno.h> 14 #include <rte_alarm.h> 15 16 #include "efx.h" 17 18 #include "sfc.h" 19 #include "sfc_debug.h" 20 #include "sfc_log.h" 21 #include "sfc_ev.h" 22 #include "sfc_rx.h" 23 #include "sfc_mae_counter.h" 24 #include "sfc_tx.h" 25 #include "sfc_kvargs.h" 26 #include "sfc_tweak.h" 27 #include "sfc_sw_stats.h" 28 29 30 int 31 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, 32 size_t len, int socket_id, efsys_mem_t *esmp) 33 { 34 const struct rte_memzone *mz; 35 36 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d", 37 name, id, len, socket_id); 38 39 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len, 40 sysconf(_SC_PAGESIZE), socket_id); 41 if (mz == NULL) { 42 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s", 43 name, (unsigned int)id, (unsigned int)len, socket_id, 44 rte_strerror(rte_errno)); 45 return ENOMEM; 46 } 47 48 esmp->esm_addr = mz->iova; 49 if (esmp->esm_addr == RTE_BAD_IOVA) { 50 (void)rte_memzone_free(mz); 51 return EFAULT; 52 } 53 54 esmp->esm_mz = mz; 55 esmp->esm_base = mz->addr; 56 57 sfc_info(sa, 58 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx", 59 name, id, len, socket_id, esmp->esm_base, 60 (unsigned long)esmp->esm_addr); 61 62 return 0; 63 } 64 65 void 66 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp) 67 { 68 int rc; 69 70 sfc_log_init(sa, "name=%s", esmp->esm_mz->name); 71 72 rc = rte_memzone_free(esmp->esm_mz); 73 if (rc != 0) 74 sfc_err(sa, "rte_memzone_free(() failed: %d", rc); 75 76 memset(esmp, 0, sizeof(*esmp)); 77 } 78 79 static uint32_t 80 sfc_phy_cap_from_link_speeds(uint32_t speeds) 81 { 82 uint32_t phy_caps = 0; 83 84 if (~speeds & ETH_LINK_SPEED_FIXED) { 85 phy_caps |= (1 << EFX_PHY_CAP_AN); 86 /* 87 * If no speeds are specified in the mask, any supported 88 * may be negotiated 89 */ 90 if (speeds == ETH_LINK_SPEED_AUTONEG) 91 phy_caps |= 92 (1 << EFX_PHY_CAP_1000FDX) | 93 (1 << EFX_PHY_CAP_10000FDX) | 94 (1 << EFX_PHY_CAP_25000FDX) | 95 (1 << EFX_PHY_CAP_40000FDX) | 96 (1 << EFX_PHY_CAP_50000FDX) | 97 (1 << EFX_PHY_CAP_100000FDX); 98 } 99 if (speeds & ETH_LINK_SPEED_1G) 100 phy_caps |= (1 << EFX_PHY_CAP_1000FDX); 101 if (speeds & ETH_LINK_SPEED_10G) 102 phy_caps |= (1 << EFX_PHY_CAP_10000FDX); 103 if (speeds & ETH_LINK_SPEED_25G) 104 phy_caps |= (1 << EFX_PHY_CAP_25000FDX); 105 if (speeds & ETH_LINK_SPEED_40G) 106 phy_caps |= (1 << EFX_PHY_CAP_40000FDX); 107 if (speeds & ETH_LINK_SPEED_50G) 108 phy_caps |= (1 << EFX_PHY_CAP_50000FDX); 109 if (speeds & ETH_LINK_SPEED_100G) 110 phy_caps |= (1 << EFX_PHY_CAP_100000FDX); 111 112 return phy_caps; 113 } 114 115 /* 116 * Check requested device level configuration. 117 * Receive and transmit configuration is checked in corresponding 118 * modules. 119 */ 120 static int 121 sfc_check_conf(struct sfc_adapter *sa) 122 { 123 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf; 124 int rc = 0; 125 126 sa->port.phy_adv_cap = 127 sfc_phy_cap_from_link_speeds(conf->link_speeds) & 128 sa->port.phy_adv_cap_mask; 129 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) { 130 sfc_err(sa, "No link speeds from mask %#x are supported", 131 conf->link_speeds); 132 rc = EINVAL; 133 } 134 135 #if !EFSYS_OPT_LOOPBACK 136 if (conf->lpbk_mode != 0) { 137 sfc_err(sa, "Loopback not supported"); 138 rc = EINVAL; 139 } 140 #endif 141 142 if (conf->dcb_capability_en != 0) { 143 sfc_err(sa, "Priority-based flow control not supported"); 144 rc = EINVAL; 145 } 146 147 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) { 148 sfc_err(sa, "Flow Director not supported"); 149 rc = EINVAL; 150 } 151 152 if ((conf->intr_conf.lsc != 0) && 153 (sa->intr.type != EFX_INTR_LINE) && 154 (sa->intr.type != EFX_INTR_MESSAGE)) { 155 sfc_err(sa, "Link status change interrupt not supported"); 156 rc = EINVAL; 157 } 158 159 if (conf->intr_conf.rxq != 0 && 160 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) { 161 sfc_err(sa, "Receive queue interrupt not supported"); 162 rc = EINVAL; 163 } 164 165 return rc; 166 } 167 168 /* 169 * Find out maximum number of receive and transmit queues which could be 170 * advertised. 171 * 172 * NIC is kept initialized on success to allow other modules acquire 173 * defaults and capabilities. 174 */ 175 static int 176 sfc_estimate_resource_limits(struct sfc_adapter *sa) 177 { 178 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 179 struct sfc_adapter_shared *sas = sfc_sa2shared(sa); 180 efx_drv_limits_t limits; 181 int rc; 182 uint32_t evq_allocated; 183 uint32_t rxq_allocated; 184 uint32_t txq_allocated; 185 186 memset(&limits, 0, sizeof(limits)); 187 188 /* Request at least one Rx and Tx queue */ 189 limits.edl_min_rxq_count = 1; 190 limits.edl_min_txq_count = 1; 191 /* Management event queue plus event queue for each Tx and Rx queue */ 192 limits.edl_min_evq_count = 193 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count; 194 195 /* Divide by number of functions to guarantee that all functions 196 * will get promised resources 197 */ 198 /* FIXME Divide by number of functions (not 2) below */ 199 limits.edl_max_evq_count = encp->enc_evq_limit / 2; 200 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count); 201 202 /* Split equally between receive and transmit */ 203 limits.edl_max_rxq_count = 204 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2); 205 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count); 206 207 limits.edl_max_txq_count = 208 MIN(encp->enc_txq_limit, 209 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count); 210 211 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled) 212 limits.edl_max_txq_count = 213 MIN(limits.edl_max_txq_count, 214 encp->enc_fw_assisted_tso_v2_n_contexts / 215 encp->enc_hw_pf_count); 216 217 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count); 218 219 /* Configure the minimum required resources needed for the 220 * driver to operate, and the maximum desired resources that the 221 * driver is capable of using. 222 */ 223 efx_nic_set_drv_limits(sa->nic, &limits); 224 225 sfc_log_init(sa, "init nic"); 226 rc = efx_nic_init(sa->nic); 227 if (rc != 0) 228 goto fail_nic_init; 229 230 /* Find resource dimensions assigned by firmware to this function */ 231 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated, 232 &txq_allocated); 233 if (rc != 0) 234 goto fail_get_vi_pool; 235 236 /* It still may allocate more than maximum, ensure limit */ 237 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count); 238 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count); 239 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count); 240 241 /* 242 * Subtract management EVQ not used for traffic 243 * The resource allocation strategy is as follows: 244 * - one EVQ for management 245 * - one EVQ for each ethdev RXQ 246 * - one EVQ for each ethdev TXQ 247 * - one EVQ and one RXQ for optional MAE counters. 248 */ 249 if (evq_allocated == 0) { 250 sfc_err(sa, "count of allocated EvQ is 0"); 251 rc = ENOMEM; 252 goto fail_allocate_evq; 253 } 254 evq_allocated--; 255 256 /* 257 * Reserve absolutely required minimum. 258 * Right now we use separate EVQ for Rx and Tx. 259 */ 260 if (rxq_allocated > 0 && evq_allocated > 0) { 261 sa->rxq_max = 1; 262 rxq_allocated--; 263 evq_allocated--; 264 } 265 if (txq_allocated > 0 && evq_allocated > 0) { 266 sa->txq_max = 1; 267 txq_allocated--; 268 evq_allocated--; 269 } 270 271 if (sfc_mae_counter_rxq_required(sa) && 272 rxq_allocated > 0 && evq_allocated > 0) { 273 rxq_allocated--; 274 evq_allocated--; 275 sas->counters_rxq_allocated = true; 276 } else { 277 sas->counters_rxq_allocated = false; 278 } 279 280 /* Add remaining allocated queues */ 281 sa->rxq_max += MIN(rxq_allocated, evq_allocated / 2); 282 sa->txq_max += MIN(txq_allocated, evq_allocated - sa->rxq_max); 283 284 /* Keep NIC initialized */ 285 return 0; 286 287 fail_allocate_evq: 288 fail_get_vi_pool: 289 efx_nic_fini(sa->nic); 290 fail_nic_init: 291 return rc; 292 } 293 294 static int 295 sfc_set_drv_limits(struct sfc_adapter *sa) 296 { 297 const struct rte_eth_dev_data *data = sa->eth_dev->data; 298 uint32_t rxq_reserved = sfc_nb_reserved_rxq(sfc_sa2shared(sa)); 299 efx_drv_limits_t lim; 300 301 memset(&lim, 0, sizeof(lim)); 302 303 /* 304 * Limits are strict since take into account initial estimation. 305 * Resource allocation stategy is described in 306 * sfc_estimate_resource_limits(). 307 */ 308 lim.edl_min_evq_count = lim.edl_max_evq_count = 309 1 + data->nb_rx_queues + data->nb_tx_queues + rxq_reserved; 310 lim.edl_min_rxq_count = lim.edl_max_rxq_count = 311 data->nb_rx_queues + rxq_reserved; 312 lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues; 313 314 return efx_nic_set_drv_limits(sa->nic, &lim); 315 } 316 317 static int 318 sfc_set_fw_subvariant(struct sfc_adapter *sa) 319 { 320 struct sfc_adapter_shared *sas = sfc_sa2shared(sa); 321 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 322 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads; 323 unsigned int txq_index; 324 efx_nic_fw_subvariant_t req_fw_subvariant; 325 efx_nic_fw_subvariant_t cur_fw_subvariant; 326 int rc; 327 328 if (!encp->enc_fw_subvariant_no_tx_csum_supported) { 329 sfc_info(sa, "no-Tx-checksum subvariant not supported"); 330 return 0; 331 } 332 333 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) { 334 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index]; 335 336 if (txq_info->state & SFC_TXQ_INITIALIZED) 337 tx_offloads |= txq_info->offloads; 338 } 339 340 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 341 DEV_TX_OFFLOAD_TCP_CKSUM | 342 DEV_TX_OFFLOAD_UDP_CKSUM | 343 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) 344 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT; 345 else 346 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM; 347 348 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant); 349 if (rc != 0) { 350 sfc_err(sa, "failed to get FW subvariant: %d", rc); 351 return rc; 352 } 353 sfc_info(sa, "FW subvariant is %u vs required %u", 354 cur_fw_subvariant, req_fw_subvariant); 355 356 if (cur_fw_subvariant == req_fw_subvariant) 357 return 0; 358 359 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant); 360 if (rc != 0) { 361 sfc_err(sa, "failed to set FW subvariant %u: %d", 362 req_fw_subvariant, rc); 363 return rc; 364 } 365 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant); 366 367 return 0; 368 } 369 370 static int 371 sfc_try_start(struct sfc_adapter *sa) 372 { 373 const efx_nic_cfg_t *encp; 374 int rc; 375 376 sfc_log_init(sa, "entry"); 377 378 SFC_ASSERT(sfc_adapter_is_locked(sa)); 379 SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING); 380 381 sfc_log_init(sa, "set FW subvariant"); 382 rc = sfc_set_fw_subvariant(sa); 383 if (rc != 0) 384 goto fail_set_fw_subvariant; 385 386 sfc_log_init(sa, "set resource limits"); 387 rc = sfc_set_drv_limits(sa); 388 if (rc != 0) 389 goto fail_set_drv_limits; 390 391 sfc_log_init(sa, "init nic"); 392 rc = efx_nic_init(sa->nic); 393 if (rc != 0) 394 goto fail_nic_init; 395 396 encp = efx_nic_cfg_get(sa->nic); 397 398 /* 399 * Refresh (since it may change on NIC reset/restart) a copy of 400 * supported tunnel encapsulations in shared memory to be used 401 * on supported Rx packet type classes get. 402 */ 403 sa->priv.shared->tunnel_encaps = 404 encp->enc_tunnel_encapsulations_supported; 405 406 if (encp->enc_tunnel_encapsulations_supported != 0) { 407 sfc_log_init(sa, "apply tunnel config"); 408 rc = efx_tunnel_reconfigure(sa->nic); 409 if (rc != 0) 410 goto fail_tunnel_reconfigure; 411 } 412 413 rc = sfc_intr_start(sa); 414 if (rc != 0) 415 goto fail_intr_start; 416 417 rc = sfc_ev_start(sa); 418 if (rc != 0) 419 goto fail_ev_start; 420 421 rc = sfc_port_start(sa); 422 if (rc != 0) 423 goto fail_port_start; 424 425 rc = sfc_rx_start(sa); 426 if (rc != 0) 427 goto fail_rx_start; 428 429 rc = sfc_tx_start(sa); 430 if (rc != 0) 431 goto fail_tx_start; 432 433 rc = sfc_flow_start(sa); 434 if (rc != 0) 435 goto fail_flows_insert; 436 437 sfc_log_init(sa, "done"); 438 return 0; 439 440 fail_flows_insert: 441 sfc_tx_stop(sa); 442 443 fail_tx_start: 444 sfc_rx_stop(sa); 445 446 fail_rx_start: 447 sfc_port_stop(sa); 448 449 fail_port_start: 450 sfc_ev_stop(sa); 451 452 fail_ev_start: 453 sfc_intr_stop(sa); 454 455 fail_intr_start: 456 fail_tunnel_reconfigure: 457 efx_nic_fini(sa->nic); 458 459 fail_nic_init: 460 fail_set_drv_limits: 461 fail_set_fw_subvariant: 462 sfc_log_init(sa, "failed %d", rc); 463 return rc; 464 } 465 466 int 467 sfc_start(struct sfc_adapter *sa) 468 { 469 unsigned int start_tries = 3; 470 int rc; 471 472 sfc_log_init(sa, "entry"); 473 474 SFC_ASSERT(sfc_adapter_is_locked(sa)); 475 476 switch (sa->state) { 477 case SFC_ADAPTER_CONFIGURED: 478 break; 479 case SFC_ADAPTER_STARTED: 480 sfc_notice(sa, "already started"); 481 return 0; 482 default: 483 rc = EINVAL; 484 goto fail_bad_state; 485 } 486 487 sa->state = SFC_ADAPTER_STARTING; 488 489 rc = 0; 490 do { 491 /* 492 * FIXME Try to recreate vSwitch on start retry. 493 * vSwitch is absent after MC reboot like events and 494 * we should recreate it. May be we need proper 495 * indication instead of guessing. 496 */ 497 if (rc != 0) { 498 sfc_sriov_vswitch_destroy(sa); 499 rc = sfc_sriov_vswitch_create(sa); 500 if (rc != 0) 501 goto fail_sriov_vswitch_create; 502 } 503 rc = sfc_try_start(sa); 504 } while ((--start_tries > 0) && 505 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL)); 506 507 if (rc != 0) 508 goto fail_try_start; 509 510 sa->state = SFC_ADAPTER_STARTED; 511 sfc_log_init(sa, "done"); 512 return 0; 513 514 fail_try_start: 515 fail_sriov_vswitch_create: 516 sa->state = SFC_ADAPTER_CONFIGURED; 517 fail_bad_state: 518 sfc_log_init(sa, "failed %d", rc); 519 return rc; 520 } 521 522 void 523 sfc_stop(struct sfc_adapter *sa) 524 { 525 sfc_log_init(sa, "entry"); 526 527 SFC_ASSERT(sfc_adapter_is_locked(sa)); 528 529 switch (sa->state) { 530 case SFC_ADAPTER_STARTED: 531 break; 532 case SFC_ADAPTER_CONFIGURED: 533 sfc_notice(sa, "already stopped"); 534 return; 535 default: 536 sfc_err(sa, "stop in unexpected state %u", sa->state); 537 SFC_ASSERT(B_FALSE); 538 return; 539 } 540 541 sa->state = SFC_ADAPTER_STOPPING; 542 543 sfc_flow_stop(sa); 544 sfc_tx_stop(sa); 545 sfc_rx_stop(sa); 546 sfc_port_stop(sa); 547 sfc_ev_stop(sa); 548 sfc_intr_stop(sa); 549 efx_nic_fini(sa->nic); 550 551 sa->state = SFC_ADAPTER_CONFIGURED; 552 sfc_log_init(sa, "done"); 553 } 554 555 static int 556 sfc_restart(struct sfc_adapter *sa) 557 { 558 int rc; 559 560 SFC_ASSERT(sfc_adapter_is_locked(sa)); 561 562 if (sa->state != SFC_ADAPTER_STARTED) 563 return EINVAL; 564 565 sfc_stop(sa); 566 567 rc = sfc_start(sa); 568 if (rc != 0) 569 sfc_err(sa, "restart failed"); 570 571 return rc; 572 } 573 574 static void 575 sfc_restart_if_required(void *arg) 576 { 577 struct sfc_adapter *sa = arg; 578 579 /* If restart is scheduled, clear the flag and do it */ 580 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required, 581 1, 0)) { 582 sfc_adapter_lock(sa); 583 if (sa->state == SFC_ADAPTER_STARTED) 584 (void)sfc_restart(sa); 585 sfc_adapter_unlock(sa); 586 } 587 } 588 589 void 590 sfc_schedule_restart(struct sfc_adapter *sa) 591 { 592 int rc; 593 594 /* Schedule restart alarm if it is not scheduled yet */ 595 if (!rte_atomic32_test_and_set(&sa->restart_required)) 596 return; 597 598 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa); 599 if (rc == -ENOTSUP) 600 sfc_warn(sa, "alarms are not supported, restart is pending"); 601 else if (rc != 0) 602 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc); 603 else 604 sfc_notice(sa, "restart scheduled"); 605 } 606 607 int 608 sfc_configure(struct sfc_adapter *sa) 609 { 610 int rc; 611 612 sfc_log_init(sa, "entry"); 613 614 SFC_ASSERT(sfc_adapter_is_locked(sa)); 615 616 SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED || 617 sa->state == SFC_ADAPTER_CONFIGURED); 618 sa->state = SFC_ADAPTER_CONFIGURING; 619 620 rc = sfc_check_conf(sa); 621 if (rc != 0) 622 goto fail_check_conf; 623 624 rc = sfc_intr_configure(sa); 625 if (rc != 0) 626 goto fail_intr_configure; 627 628 rc = sfc_port_configure(sa); 629 if (rc != 0) 630 goto fail_port_configure; 631 632 rc = sfc_rx_configure(sa); 633 if (rc != 0) 634 goto fail_rx_configure; 635 636 rc = sfc_tx_configure(sa); 637 if (rc != 0) 638 goto fail_tx_configure; 639 640 rc = sfc_sw_xstats_configure(sa); 641 if (rc != 0) 642 goto fail_sw_xstats_configure; 643 644 sa->state = SFC_ADAPTER_CONFIGURED; 645 sfc_log_init(sa, "done"); 646 return 0; 647 648 fail_sw_xstats_configure: 649 sfc_tx_close(sa); 650 651 fail_tx_configure: 652 sfc_rx_close(sa); 653 654 fail_rx_configure: 655 sfc_port_close(sa); 656 657 fail_port_configure: 658 sfc_intr_close(sa); 659 660 fail_intr_configure: 661 fail_check_conf: 662 sa->state = SFC_ADAPTER_INITIALIZED; 663 sfc_log_init(sa, "failed %d", rc); 664 return rc; 665 } 666 667 void 668 sfc_close(struct sfc_adapter *sa) 669 { 670 sfc_log_init(sa, "entry"); 671 672 SFC_ASSERT(sfc_adapter_is_locked(sa)); 673 674 SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED); 675 sa->state = SFC_ADAPTER_CLOSING; 676 677 sfc_sw_xstats_close(sa); 678 sfc_tx_close(sa); 679 sfc_rx_close(sa); 680 sfc_port_close(sa); 681 sfc_intr_close(sa); 682 683 sa->state = SFC_ADAPTER_INITIALIZED; 684 sfc_log_init(sa, "done"); 685 } 686 687 static int 688 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp) 689 { 690 struct rte_eth_dev *eth_dev = sa->eth_dev; 691 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 692 efsys_bar_t *ebp = &sa->mem_bar; 693 struct rte_mem_resource *res = 694 &pci_dev->mem_resource[mem_ebrp->ebr_index]; 695 696 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name); 697 ebp->esb_rid = mem_ebrp->ebr_index; 698 ebp->esb_dev = pci_dev; 699 ebp->esb_base = res->addr; 700 701 sa->fcw_offset = mem_ebrp->ebr_offset; 702 703 return 0; 704 } 705 706 static void 707 sfc_mem_bar_fini(struct sfc_adapter *sa) 708 { 709 efsys_bar_t *ebp = &sa->mem_bar; 710 711 SFC_BAR_LOCK_DESTROY(ebp); 712 memset(ebp, 0, sizeof(*ebp)); 713 } 714 715 /* 716 * A fixed RSS key which has a property of being symmetric 717 * (symmetrical flows are distributed to the same CPU) 718 * and also known to give a uniform distribution 719 * (a good distribution of traffic between different CPUs) 720 */ 721 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = { 722 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 723 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 724 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 725 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 726 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 727 }; 728 729 static int 730 sfc_rss_attach(struct sfc_adapter *sa) 731 { 732 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 733 int rc; 734 735 rc = efx_intr_init(sa->nic, sa->intr.type, NULL); 736 if (rc != 0) 737 goto fail_intr_init; 738 739 rc = efx_ev_init(sa->nic); 740 if (rc != 0) 741 goto fail_ev_init; 742 743 rc = efx_rx_init(sa->nic); 744 if (rc != 0) 745 goto fail_rx_init; 746 747 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type); 748 if (rc != 0) 749 goto fail_scale_support_get; 750 751 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support); 752 if (rc != 0) 753 goto fail_hash_support_get; 754 755 rc = sfc_rx_hash_init(sa); 756 if (rc != 0) 757 goto fail_rx_hash_init; 758 759 efx_rx_fini(sa->nic); 760 efx_ev_fini(sa->nic); 761 efx_intr_fini(sa->nic); 762 763 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key)); 764 rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT; 765 766 return 0; 767 768 fail_rx_hash_init: 769 fail_hash_support_get: 770 fail_scale_support_get: 771 efx_rx_fini(sa->nic); 772 773 fail_rx_init: 774 efx_ev_fini(sa->nic); 775 776 fail_ev_init: 777 efx_intr_fini(sa->nic); 778 779 fail_intr_init: 780 return rc; 781 } 782 783 static void 784 sfc_rss_detach(struct sfc_adapter *sa) 785 { 786 sfc_rx_hash_fini(sa); 787 } 788 789 int 790 sfc_attach(struct sfc_adapter *sa) 791 { 792 const efx_nic_cfg_t *encp; 793 efx_nic_t *enp = sa->nic; 794 int rc; 795 796 sfc_log_init(sa, "entry"); 797 798 SFC_ASSERT(sfc_adapter_is_locked(sa)); 799 800 efx_mcdi_new_epoch(enp); 801 802 sfc_log_init(sa, "reset nic"); 803 rc = efx_nic_reset(enp); 804 if (rc != 0) 805 goto fail_nic_reset; 806 807 rc = sfc_sriov_attach(sa); 808 if (rc != 0) 809 goto fail_sriov_attach; 810 811 /* 812 * Probed NIC is sufficient for tunnel init. 813 * Initialize tunnel support to be able to use libefx 814 * efx_tunnel_config_udp_{add,remove}() in any state and 815 * efx_tunnel_reconfigure() on start up. 816 */ 817 rc = efx_tunnel_init(enp); 818 if (rc != 0) 819 goto fail_tunnel_init; 820 821 encp = efx_nic_cfg_get(sa->nic); 822 823 /* 824 * Make a copy of supported tunnel encapsulations in shared 825 * memory to be used on supported Rx packet type classes get. 826 */ 827 sa->priv.shared->tunnel_encaps = 828 encp->enc_tunnel_encapsulations_supported; 829 830 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) { 831 sa->tso = encp->enc_fw_assisted_tso_v2_enabled || 832 encp->enc_tso_v3_enabled; 833 if (!sa->tso) 834 sfc_info(sa, "TSO support isn't available on this adapter"); 835 } 836 837 if (sa->tso && 838 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & 839 (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 840 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) { 841 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled || 842 encp->enc_tso_v3_enabled; 843 if (!sa->tso_encap) 844 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter"); 845 } 846 847 sfc_log_init(sa, "estimate resource limits"); 848 rc = sfc_estimate_resource_limits(sa); 849 if (rc != 0) 850 goto fail_estimate_rsrc_limits; 851 852 sa->evq_max_entries = encp->enc_evq_max_nevs; 853 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries)); 854 855 sa->evq_min_entries = encp->enc_evq_min_nevs; 856 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries)); 857 858 sa->rxq_max_entries = encp->enc_rxq_max_ndescs; 859 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries)); 860 861 sa->rxq_min_entries = encp->enc_rxq_min_ndescs; 862 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries)); 863 864 sa->txq_max_entries = encp->enc_txq_max_ndescs; 865 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries)); 866 867 sa->txq_min_entries = encp->enc_txq_min_ndescs; 868 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries)); 869 870 rc = sfc_intr_attach(sa); 871 if (rc != 0) 872 goto fail_intr_attach; 873 874 rc = sfc_ev_attach(sa); 875 if (rc != 0) 876 goto fail_ev_attach; 877 878 rc = sfc_port_attach(sa); 879 if (rc != 0) 880 goto fail_port_attach; 881 882 rc = sfc_rss_attach(sa); 883 if (rc != 0) 884 goto fail_rss_attach; 885 886 rc = sfc_filter_attach(sa); 887 if (rc != 0) 888 goto fail_filter_attach; 889 890 rc = sfc_mae_counter_rxq_attach(sa); 891 if (rc != 0) 892 goto fail_mae_counter_rxq_attach; 893 894 rc = sfc_mae_attach(sa); 895 if (rc != 0) 896 goto fail_mae_attach; 897 898 sfc_log_init(sa, "fini nic"); 899 efx_nic_fini(enp); 900 901 sfc_flow_init(sa); 902 903 rc = sfc_sw_xstats_init(sa); 904 if (rc != 0) 905 goto fail_sw_xstats_init; 906 907 /* 908 * Create vSwitch to be able to use VFs when PF is not started yet 909 * as DPDK port. VFs should be able to talk to each other even 910 * if PF is down. 911 */ 912 rc = sfc_sriov_vswitch_create(sa); 913 if (rc != 0) 914 goto fail_sriov_vswitch_create; 915 916 sa->state = SFC_ADAPTER_INITIALIZED; 917 918 sfc_log_init(sa, "done"); 919 return 0; 920 921 fail_sriov_vswitch_create: 922 sfc_sw_xstats_close(sa); 923 924 fail_sw_xstats_init: 925 sfc_flow_fini(sa); 926 sfc_mae_detach(sa); 927 928 fail_mae_attach: 929 sfc_mae_counter_rxq_detach(sa); 930 931 fail_mae_counter_rxq_attach: 932 sfc_filter_detach(sa); 933 934 fail_filter_attach: 935 sfc_rss_detach(sa); 936 937 fail_rss_attach: 938 sfc_port_detach(sa); 939 940 fail_port_attach: 941 sfc_ev_detach(sa); 942 943 fail_ev_attach: 944 sfc_intr_detach(sa); 945 946 fail_intr_attach: 947 efx_nic_fini(sa->nic); 948 949 fail_estimate_rsrc_limits: 950 fail_tunnel_init: 951 efx_tunnel_fini(sa->nic); 952 sfc_sriov_detach(sa); 953 954 fail_sriov_attach: 955 fail_nic_reset: 956 957 sfc_log_init(sa, "failed %d", rc); 958 return rc; 959 } 960 961 void 962 sfc_detach(struct sfc_adapter *sa) 963 { 964 sfc_log_init(sa, "entry"); 965 966 SFC_ASSERT(sfc_adapter_is_locked(sa)); 967 968 sfc_sriov_vswitch_destroy(sa); 969 970 sfc_flow_fini(sa); 971 972 sfc_mae_detach(sa); 973 sfc_mae_counter_rxq_detach(sa); 974 sfc_filter_detach(sa); 975 sfc_rss_detach(sa); 976 sfc_port_detach(sa); 977 sfc_ev_detach(sa); 978 sfc_intr_detach(sa); 979 efx_tunnel_fini(sa->nic); 980 sfc_sriov_detach(sa); 981 982 sa->state = SFC_ADAPTER_UNINITIALIZED; 983 } 984 985 static int 986 sfc_kvarg_fv_variant_handler(__rte_unused const char *key, 987 const char *value_str, void *opaque) 988 { 989 uint32_t *value = opaque; 990 991 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0) 992 *value = EFX_FW_VARIANT_DONT_CARE; 993 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0) 994 *value = EFX_FW_VARIANT_FULL_FEATURED; 995 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0) 996 *value = EFX_FW_VARIANT_LOW_LATENCY; 997 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0) 998 *value = EFX_FW_VARIANT_PACKED_STREAM; 999 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0) 1000 *value = EFX_FW_VARIANT_DPDK; 1001 else 1002 return -EINVAL; 1003 1004 return 0; 1005 } 1006 1007 static int 1008 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv) 1009 { 1010 efx_nic_fw_info_t enfi; 1011 int rc; 1012 1013 rc = efx_nic_get_fw_version(sa->nic, &enfi); 1014 if (rc != 0) 1015 return rc; 1016 else if (!enfi.enfi_dpcpu_fw_ids_valid) 1017 return ENOTSUP; 1018 1019 /* 1020 * Firmware variant can be uniquely identified by the RxDPCPU 1021 * firmware id 1022 */ 1023 switch (enfi.enfi_rx_dpcpu_fw_id) { 1024 case EFX_RXDP_FULL_FEATURED_FW_ID: 1025 *efv = EFX_FW_VARIANT_FULL_FEATURED; 1026 break; 1027 1028 case EFX_RXDP_LOW_LATENCY_FW_ID: 1029 *efv = EFX_FW_VARIANT_LOW_LATENCY; 1030 break; 1031 1032 case EFX_RXDP_PACKED_STREAM_FW_ID: 1033 *efv = EFX_FW_VARIANT_PACKED_STREAM; 1034 break; 1035 1036 case EFX_RXDP_DPDK_FW_ID: 1037 *efv = EFX_FW_VARIANT_DPDK; 1038 break; 1039 1040 default: 1041 /* 1042 * Other firmware variants are not considered, since they are 1043 * not supported in the device parameters 1044 */ 1045 *efv = EFX_FW_VARIANT_DONT_CARE; 1046 break; 1047 } 1048 1049 return 0; 1050 } 1051 1052 static const char * 1053 sfc_fw_variant2str(efx_fw_variant_t efv) 1054 { 1055 switch (efv) { 1056 case EFX_RXDP_FULL_FEATURED_FW_ID: 1057 return SFC_KVARG_FW_VARIANT_FULL_FEATURED; 1058 case EFX_RXDP_LOW_LATENCY_FW_ID: 1059 return SFC_KVARG_FW_VARIANT_LOW_LATENCY; 1060 case EFX_RXDP_PACKED_STREAM_FW_ID: 1061 return SFC_KVARG_FW_VARIANT_PACKED_STREAM; 1062 case EFX_RXDP_DPDK_FW_ID: 1063 return SFC_KVARG_FW_VARIANT_DPDK; 1064 default: 1065 return "unknown"; 1066 } 1067 } 1068 1069 static int 1070 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa) 1071 { 1072 int rc; 1073 long value; 1074 1075 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF; 1076 1077 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS, 1078 sfc_kvarg_long_handler, &value); 1079 if (rc != 0) 1080 return rc; 1081 1082 if (value < 0 || 1083 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) { 1084 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' " 1085 "was set (%ld);", value); 1086 sfc_err(sa, "it must not be less than 0 or greater than %u", 1087 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX); 1088 return EINVAL; 1089 } 1090 1091 sa->rxd_wait_timeout_ns = value; 1092 return 0; 1093 } 1094 1095 static int 1096 sfc_nic_probe(struct sfc_adapter *sa) 1097 { 1098 efx_nic_t *enp = sa->nic; 1099 efx_fw_variant_t preferred_efv; 1100 efx_fw_variant_t efv; 1101 int rc; 1102 1103 preferred_efv = EFX_FW_VARIANT_DONT_CARE; 1104 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT, 1105 sfc_kvarg_fv_variant_handler, 1106 &preferred_efv); 1107 if (rc != 0) { 1108 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT); 1109 return rc; 1110 } 1111 1112 rc = sfc_kvarg_rxd_wait_timeout_ns(sa); 1113 if (rc != 0) 1114 return rc; 1115 1116 rc = efx_nic_probe(enp, preferred_efv); 1117 if (rc == EACCES) { 1118 /* Unprivileged functions cannot set FW variant */ 1119 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE); 1120 } 1121 if (rc != 0) 1122 return rc; 1123 1124 rc = sfc_get_fw_variant(sa, &efv); 1125 if (rc == ENOTSUP) { 1126 sfc_warn(sa, "FW variant can not be obtained"); 1127 return 0; 1128 } 1129 if (rc != 0) 1130 return rc; 1131 1132 /* Check that firmware variant was changed to the requested one */ 1133 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) { 1134 sfc_warn(sa, "FW variant has not changed to the requested %s", 1135 sfc_fw_variant2str(preferred_efv)); 1136 } 1137 1138 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv)); 1139 1140 return 0; 1141 } 1142 1143 int 1144 sfc_probe(struct sfc_adapter *sa) 1145 { 1146 efx_bar_region_t mem_ebrp; 1147 struct rte_eth_dev *eth_dev = sa->eth_dev; 1148 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 1149 efx_nic_t *enp; 1150 int rc; 1151 1152 sfc_log_init(sa, "entry"); 1153 1154 SFC_ASSERT(sfc_adapter_is_locked(sa)); 1155 1156 sa->socket_id = rte_socket_id(); 1157 rte_atomic32_init(&sa->restart_required); 1158 1159 sfc_log_init(sa, "get family"); 1160 rc = sfc_efx_family(pci_dev, &mem_ebrp, &sa->family); 1161 1162 if (rc != 0) 1163 goto fail_family; 1164 sfc_log_init(sa, 1165 "family is %u, membar is %u, function control window offset is %lu", 1166 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset); 1167 1168 sfc_log_init(sa, "init mem bar"); 1169 rc = sfc_mem_bar_init(sa, &mem_ebrp); 1170 if (rc != 0) 1171 goto fail_mem_bar_init; 1172 1173 sfc_log_init(sa, "create nic"); 1174 rte_spinlock_init(&sa->nic_lock); 1175 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa, 1176 &sa->mem_bar, mem_ebrp.ebr_offset, 1177 &sa->nic_lock, &enp); 1178 if (rc != 0) 1179 goto fail_nic_create; 1180 sa->nic = enp; 1181 1182 rc = sfc_mcdi_init(sa); 1183 if (rc != 0) 1184 goto fail_mcdi_init; 1185 1186 sfc_log_init(sa, "probe nic"); 1187 rc = sfc_nic_probe(sa); 1188 if (rc != 0) 1189 goto fail_nic_probe; 1190 1191 sfc_log_init(sa, "done"); 1192 return 0; 1193 1194 fail_nic_probe: 1195 sfc_mcdi_fini(sa); 1196 1197 fail_mcdi_init: 1198 sfc_log_init(sa, "destroy nic"); 1199 sa->nic = NULL; 1200 efx_nic_destroy(enp); 1201 1202 fail_nic_create: 1203 sfc_mem_bar_fini(sa); 1204 1205 fail_mem_bar_init: 1206 fail_family: 1207 sfc_log_init(sa, "failed %d", rc); 1208 return rc; 1209 } 1210 1211 void 1212 sfc_unprobe(struct sfc_adapter *sa) 1213 { 1214 efx_nic_t *enp = sa->nic; 1215 1216 sfc_log_init(sa, "entry"); 1217 1218 SFC_ASSERT(sfc_adapter_is_locked(sa)); 1219 1220 sfc_log_init(sa, "unprobe nic"); 1221 efx_nic_unprobe(enp); 1222 1223 sfc_mcdi_fini(sa); 1224 1225 /* 1226 * Make sure there is no pending alarm to restart since we are 1227 * going to free device private which is passed as the callback 1228 * opaque data. A new alarm cannot be scheduled since MCDI is 1229 * shut down. 1230 */ 1231 rte_eal_alarm_cancel(sfc_restart_if_required, sa); 1232 1233 sfc_log_init(sa, "destroy nic"); 1234 sa->nic = NULL; 1235 efx_nic_destroy(enp); 1236 1237 sfc_mem_bar_fini(sa); 1238 1239 sfc_flow_fini(sa); 1240 sa->state = SFC_ADAPTER_UNINITIALIZED; 1241 } 1242 1243 uint32_t 1244 sfc_register_logtype(const struct rte_pci_addr *pci_addr, 1245 const char *lt_prefix_str, uint32_t ll_default) 1246 { 1247 size_t lt_prefix_str_size = strlen(lt_prefix_str); 1248 size_t lt_str_size_max; 1249 char *lt_str = NULL; 1250 int ret; 1251 1252 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) { 1253 ++lt_prefix_str_size; /* Reserve space for prefix separator */ 1254 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1; 1255 } else { 1256 return sfc_logtype_driver; 1257 } 1258 1259 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0); 1260 if (lt_str == NULL) 1261 return sfc_logtype_driver; 1262 1263 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size); 1264 lt_str[lt_prefix_str_size - 1] = '.'; 1265 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size, 1266 lt_str_size_max - lt_prefix_str_size); 1267 lt_str[lt_str_size_max - 1] = '\0'; 1268 1269 ret = rte_log_register_type_and_pick_level(lt_str, ll_default); 1270 rte_free(lt_str); 1271 1272 if (ret < 0) 1273 return sfc_logtype_driver; 1274 1275 return ret; 1276 } 1277 1278 struct sfc_hw_switch_id { 1279 char board_sn[RTE_SIZEOF_FIELD(efx_nic_board_info_t, enbi_serial)]; 1280 }; 1281 1282 int 1283 sfc_hw_switch_id_init(struct sfc_adapter *sa, 1284 struct sfc_hw_switch_id **idp) 1285 { 1286 efx_nic_board_info_t board_info; 1287 struct sfc_hw_switch_id *id; 1288 int rc; 1289 1290 if (idp == NULL) 1291 return EINVAL; 1292 1293 id = rte_zmalloc("sfc_hw_switch_id", sizeof(*id), 0); 1294 if (id == NULL) 1295 return ENOMEM; 1296 1297 rc = efx_nic_get_board_info(sa->nic, &board_info); 1298 if (rc != 0) 1299 return rc; 1300 1301 memcpy(id->board_sn, board_info.enbi_serial, sizeof(id->board_sn)); 1302 1303 *idp = id; 1304 1305 return 0; 1306 } 1307 1308 void 1309 sfc_hw_switch_id_fini(__rte_unused struct sfc_adapter *sa, 1310 struct sfc_hw_switch_id *id) 1311 { 1312 rte_free(id); 1313 } 1314 1315 bool 1316 sfc_hw_switch_ids_equal(const struct sfc_hw_switch_id *left, 1317 const struct sfc_hw_switch_id *right) 1318 { 1319 return strncmp(left->board_sn, right->board_sn, 1320 sizeof(left->board_sn)) == 0; 1321 } 1322