1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2019-2020 Xilinx, Inc. 4 * Copyright(c) 2016-2019 Solarflare Communications Inc. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 /* sysconf() */ 11 #include <unistd.h> 12 13 #include <rte_errno.h> 14 #include <rte_alarm.h> 15 16 #include "efx.h" 17 18 #include "sfc.h" 19 #include "sfc_debug.h" 20 #include "sfc_log.h" 21 #include "sfc_ev.h" 22 #include "sfc_rx.h" 23 #include "sfc_tx.h" 24 #include "sfc_kvargs.h" 25 #include "sfc_tweak.h" 26 27 28 int 29 sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, 30 size_t len, int socket_id, efsys_mem_t *esmp) 31 { 32 const struct rte_memzone *mz; 33 34 sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d", 35 name, id, len, socket_id); 36 37 mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len, 38 sysconf(_SC_PAGESIZE), socket_id); 39 if (mz == NULL) { 40 sfc_err(sa, "cannot reserve DMA zone for %s:%u %#x@%d: %s", 41 name, (unsigned int)id, (unsigned int)len, socket_id, 42 rte_strerror(rte_errno)); 43 return ENOMEM; 44 } 45 46 esmp->esm_addr = mz->iova; 47 if (esmp->esm_addr == RTE_BAD_IOVA) { 48 (void)rte_memzone_free(mz); 49 return EFAULT; 50 } 51 52 esmp->esm_mz = mz; 53 esmp->esm_base = mz->addr; 54 55 sfc_info(sa, 56 "DMA name=%s id=%u len=%lu socket_id=%d => virt=%p iova=%lx", 57 name, id, len, socket_id, esmp->esm_base, 58 (unsigned long)esmp->esm_addr); 59 60 return 0; 61 } 62 63 void 64 sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp) 65 { 66 int rc; 67 68 sfc_log_init(sa, "name=%s", esmp->esm_mz->name); 69 70 rc = rte_memzone_free(esmp->esm_mz); 71 if (rc != 0) 72 sfc_err(sa, "rte_memzone_free(() failed: %d", rc); 73 74 memset(esmp, 0, sizeof(*esmp)); 75 } 76 77 static uint32_t 78 sfc_phy_cap_from_link_speeds(uint32_t speeds) 79 { 80 uint32_t phy_caps = 0; 81 82 if (~speeds & ETH_LINK_SPEED_FIXED) { 83 phy_caps |= (1 << EFX_PHY_CAP_AN); 84 /* 85 * If no speeds are specified in the mask, any supported 86 * may be negotiated 87 */ 88 if (speeds == ETH_LINK_SPEED_AUTONEG) 89 phy_caps |= 90 (1 << EFX_PHY_CAP_1000FDX) | 91 (1 << EFX_PHY_CAP_10000FDX) | 92 (1 << EFX_PHY_CAP_25000FDX) | 93 (1 << EFX_PHY_CAP_40000FDX) | 94 (1 << EFX_PHY_CAP_50000FDX) | 95 (1 << EFX_PHY_CAP_100000FDX); 96 } 97 if (speeds & ETH_LINK_SPEED_1G) 98 phy_caps |= (1 << EFX_PHY_CAP_1000FDX); 99 if (speeds & ETH_LINK_SPEED_10G) 100 phy_caps |= (1 << EFX_PHY_CAP_10000FDX); 101 if (speeds & ETH_LINK_SPEED_25G) 102 phy_caps |= (1 << EFX_PHY_CAP_25000FDX); 103 if (speeds & ETH_LINK_SPEED_40G) 104 phy_caps |= (1 << EFX_PHY_CAP_40000FDX); 105 if (speeds & ETH_LINK_SPEED_50G) 106 phy_caps |= (1 << EFX_PHY_CAP_50000FDX); 107 if (speeds & ETH_LINK_SPEED_100G) 108 phy_caps |= (1 << EFX_PHY_CAP_100000FDX); 109 110 return phy_caps; 111 } 112 113 /* 114 * Check requested device level configuration. 115 * Receive and transmit configuration is checked in corresponding 116 * modules. 117 */ 118 static int 119 sfc_check_conf(struct sfc_adapter *sa) 120 { 121 const struct rte_eth_conf *conf = &sa->eth_dev->data->dev_conf; 122 int rc = 0; 123 124 sa->port.phy_adv_cap = 125 sfc_phy_cap_from_link_speeds(conf->link_speeds) & 126 sa->port.phy_adv_cap_mask; 127 if ((sa->port.phy_adv_cap & ~(1 << EFX_PHY_CAP_AN)) == 0) { 128 sfc_err(sa, "No link speeds from mask %#x are supported", 129 conf->link_speeds); 130 rc = EINVAL; 131 } 132 133 #if !EFSYS_OPT_LOOPBACK 134 if (conf->lpbk_mode != 0) { 135 sfc_err(sa, "Loopback not supported"); 136 rc = EINVAL; 137 } 138 #endif 139 140 if (conf->dcb_capability_en != 0) { 141 sfc_err(sa, "Priority-based flow control not supported"); 142 rc = EINVAL; 143 } 144 145 if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) { 146 sfc_err(sa, "Flow Director not supported"); 147 rc = EINVAL; 148 } 149 150 if ((conf->intr_conf.lsc != 0) && 151 (sa->intr.type != EFX_INTR_LINE) && 152 (sa->intr.type != EFX_INTR_MESSAGE)) { 153 sfc_err(sa, "Link status change interrupt not supported"); 154 rc = EINVAL; 155 } 156 157 if (conf->intr_conf.rxq != 0 && 158 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_INTR) == 0) { 159 sfc_err(sa, "Receive queue interrupt not supported"); 160 rc = EINVAL; 161 } 162 163 return rc; 164 } 165 166 /* 167 * Find out maximum number of receive and transmit queues which could be 168 * advertised. 169 * 170 * NIC is kept initialized on success to allow other modules acquire 171 * defaults and capabilities. 172 */ 173 static int 174 sfc_estimate_resource_limits(struct sfc_adapter *sa) 175 { 176 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 177 efx_drv_limits_t limits; 178 int rc; 179 uint32_t evq_allocated; 180 uint32_t rxq_allocated; 181 uint32_t txq_allocated; 182 183 memset(&limits, 0, sizeof(limits)); 184 185 /* Request at least one Rx and Tx queue */ 186 limits.edl_min_rxq_count = 1; 187 limits.edl_min_txq_count = 1; 188 /* Management event queue plus event queue for each Tx and Rx queue */ 189 limits.edl_min_evq_count = 190 1 + limits.edl_min_rxq_count + limits.edl_min_txq_count; 191 192 /* Divide by number of functions to guarantee that all functions 193 * will get promised resources 194 */ 195 /* FIXME Divide by number of functions (not 2) below */ 196 limits.edl_max_evq_count = encp->enc_evq_limit / 2; 197 SFC_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count); 198 199 /* Split equally between receive and transmit */ 200 limits.edl_max_rxq_count = 201 MIN(encp->enc_rxq_limit, (limits.edl_max_evq_count - 1) / 2); 202 SFC_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count); 203 204 limits.edl_max_txq_count = 205 MIN(encp->enc_txq_limit, 206 limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count); 207 208 if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled) 209 limits.edl_max_txq_count = 210 MIN(limits.edl_max_txq_count, 211 encp->enc_fw_assisted_tso_v2_n_contexts / 212 encp->enc_hw_pf_count); 213 214 SFC_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count); 215 216 /* Configure the minimum required resources needed for the 217 * driver to operate, and the maximum desired resources that the 218 * driver is capable of using. 219 */ 220 efx_nic_set_drv_limits(sa->nic, &limits); 221 222 sfc_log_init(sa, "init nic"); 223 rc = efx_nic_init(sa->nic); 224 if (rc != 0) 225 goto fail_nic_init; 226 227 /* Find resource dimensions assigned by firmware to this function */ 228 rc = efx_nic_get_vi_pool(sa->nic, &evq_allocated, &rxq_allocated, 229 &txq_allocated); 230 if (rc != 0) 231 goto fail_get_vi_pool; 232 233 /* It still may allocate more than maximum, ensure limit */ 234 evq_allocated = MIN(evq_allocated, limits.edl_max_evq_count); 235 rxq_allocated = MIN(rxq_allocated, limits.edl_max_rxq_count); 236 txq_allocated = MIN(txq_allocated, limits.edl_max_txq_count); 237 238 /* Subtract management EVQ not used for traffic */ 239 SFC_ASSERT(evq_allocated > 0); 240 evq_allocated--; 241 242 /* Right now we use separate EVQ for Rx and Tx */ 243 sa->rxq_max = MIN(rxq_allocated, evq_allocated / 2); 244 sa->txq_max = MIN(txq_allocated, evq_allocated - sa->rxq_max); 245 246 /* Keep NIC initialized */ 247 return 0; 248 249 fail_get_vi_pool: 250 efx_nic_fini(sa->nic); 251 fail_nic_init: 252 return rc; 253 } 254 255 static int 256 sfc_set_drv_limits(struct sfc_adapter *sa) 257 { 258 const struct rte_eth_dev_data *data = sa->eth_dev->data; 259 efx_drv_limits_t lim; 260 261 memset(&lim, 0, sizeof(lim)); 262 263 /* Limits are strict since take into account initial estimation */ 264 lim.edl_min_evq_count = lim.edl_max_evq_count = 265 1 + data->nb_rx_queues + data->nb_tx_queues; 266 lim.edl_min_rxq_count = lim.edl_max_rxq_count = data->nb_rx_queues; 267 lim.edl_min_txq_count = lim.edl_max_txq_count = data->nb_tx_queues; 268 269 return efx_nic_set_drv_limits(sa->nic, &lim); 270 } 271 272 static int 273 sfc_set_fw_subvariant(struct sfc_adapter *sa) 274 { 275 struct sfc_adapter_shared *sas = sfc_sa2shared(sa); 276 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 277 uint64_t tx_offloads = sa->eth_dev->data->dev_conf.txmode.offloads; 278 unsigned int txq_index; 279 efx_nic_fw_subvariant_t req_fw_subvariant; 280 efx_nic_fw_subvariant_t cur_fw_subvariant; 281 int rc; 282 283 if (!encp->enc_fw_subvariant_no_tx_csum_supported) { 284 sfc_info(sa, "no-Tx-checksum subvariant not supported"); 285 return 0; 286 } 287 288 for (txq_index = 0; txq_index < sas->txq_count; ++txq_index) { 289 struct sfc_txq_info *txq_info = &sas->txq_info[txq_index]; 290 291 if (txq_info->state & SFC_TXQ_INITIALIZED) 292 tx_offloads |= txq_info->offloads; 293 } 294 295 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 296 DEV_TX_OFFLOAD_TCP_CKSUM | 297 DEV_TX_OFFLOAD_UDP_CKSUM | 298 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) 299 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_DEFAULT; 300 else 301 req_fw_subvariant = EFX_NIC_FW_SUBVARIANT_NO_TX_CSUM; 302 303 rc = efx_nic_get_fw_subvariant(sa->nic, &cur_fw_subvariant); 304 if (rc != 0) { 305 sfc_err(sa, "failed to get FW subvariant: %d", rc); 306 return rc; 307 } 308 sfc_info(sa, "FW subvariant is %u vs required %u", 309 cur_fw_subvariant, req_fw_subvariant); 310 311 if (cur_fw_subvariant == req_fw_subvariant) 312 return 0; 313 314 rc = efx_nic_set_fw_subvariant(sa->nic, req_fw_subvariant); 315 if (rc != 0) { 316 sfc_err(sa, "failed to set FW subvariant %u: %d", 317 req_fw_subvariant, rc); 318 return rc; 319 } 320 sfc_info(sa, "FW subvariant set to %u", req_fw_subvariant); 321 322 return 0; 323 } 324 325 static int 326 sfc_try_start(struct sfc_adapter *sa) 327 { 328 const efx_nic_cfg_t *encp; 329 int rc; 330 331 sfc_log_init(sa, "entry"); 332 333 SFC_ASSERT(sfc_adapter_is_locked(sa)); 334 SFC_ASSERT(sa->state == SFC_ADAPTER_STARTING); 335 336 sfc_log_init(sa, "set FW subvariant"); 337 rc = sfc_set_fw_subvariant(sa); 338 if (rc != 0) 339 goto fail_set_fw_subvariant; 340 341 sfc_log_init(sa, "set resource limits"); 342 rc = sfc_set_drv_limits(sa); 343 if (rc != 0) 344 goto fail_set_drv_limits; 345 346 sfc_log_init(sa, "init nic"); 347 rc = efx_nic_init(sa->nic); 348 if (rc != 0) 349 goto fail_nic_init; 350 351 encp = efx_nic_cfg_get(sa->nic); 352 353 /* 354 * Refresh (since it may change on NIC reset/restart) a copy of 355 * supported tunnel encapsulations in shared memory to be used 356 * on supported Rx packet type classes get. 357 */ 358 sa->priv.shared->tunnel_encaps = 359 encp->enc_tunnel_encapsulations_supported; 360 361 if (encp->enc_tunnel_encapsulations_supported != 0) { 362 sfc_log_init(sa, "apply tunnel config"); 363 rc = efx_tunnel_reconfigure(sa->nic); 364 if (rc != 0) 365 goto fail_tunnel_reconfigure; 366 } 367 368 rc = sfc_intr_start(sa); 369 if (rc != 0) 370 goto fail_intr_start; 371 372 rc = sfc_ev_start(sa); 373 if (rc != 0) 374 goto fail_ev_start; 375 376 rc = sfc_port_start(sa); 377 if (rc != 0) 378 goto fail_port_start; 379 380 rc = sfc_rx_start(sa); 381 if (rc != 0) 382 goto fail_rx_start; 383 384 rc = sfc_tx_start(sa); 385 if (rc != 0) 386 goto fail_tx_start; 387 388 rc = sfc_flow_start(sa); 389 if (rc != 0) 390 goto fail_flows_insert; 391 392 sfc_log_init(sa, "done"); 393 return 0; 394 395 fail_flows_insert: 396 sfc_tx_stop(sa); 397 398 fail_tx_start: 399 sfc_rx_stop(sa); 400 401 fail_rx_start: 402 sfc_port_stop(sa); 403 404 fail_port_start: 405 sfc_ev_stop(sa); 406 407 fail_ev_start: 408 sfc_intr_stop(sa); 409 410 fail_intr_start: 411 fail_tunnel_reconfigure: 412 efx_nic_fini(sa->nic); 413 414 fail_nic_init: 415 fail_set_drv_limits: 416 fail_set_fw_subvariant: 417 sfc_log_init(sa, "failed %d", rc); 418 return rc; 419 } 420 421 int 422 sfc_start(struct sfc_adapter *sa) 423 { 424 unsigned int start_tries = 3; 425 int rc; 426 427 sfc_log_init(sa, "entry"); 428 429 SFC_ASSERT(sfc_adapter_is_locked(sa)); 430 431 switch (sa->state) { 432 case SFC_ADAPTER_CONFIGURED: 433 break; 434 case SFC_ADAPTER_STARTED: 435 sfc_notice(sa, "already started"); 436 return 0; 437 default: 438 rc = EINVAL; 439 goto fail_bad_state; 440 } 441 442 sa->state = SFC_ADAPTER_STARTING; 443 444 rc = 0; 445 do { 446 /* 447 * FIXME Try to recreate vSwitch on start retry. 448 * vSwitch is absent after MC reboot like events and 449 * we should recreate it. May be we need proper 450 * indication instead of guessing. 451 */ 452 if (rc != 0) { 453 sfc_sriov_vswitch_destroy(sa); 454 rc = sfc_sriov_vswitch_create(sa); 455 if (rc != 0) 456 goto fail_sriov_vswitch_create; 457 } 458 rc = sfc_try_start(sa); 459 } while ((--start_tries > 0) && 460 (rc == EIO || rc == EAGAIN || rc == ENOENT || rc == EINVAL)); 461 462 if (rc != 0) 463 goto fail_try_start; 464 465 sa->state = SFC_ADAPTER_STARTED; 466 sfc_log_init(sa, "done"); 467 return 0; 468 469 fail_try_start: 470 fail_sriov_vswitch_create: 471 sa->state = SFC_ADAPTER_CONFIGURED; 472 fail_bad_state: 473 sfc_log_init(sa, "failed %d", rc); 474 return rc; 475 } 476 477 void 478 sfc_stop(struct sfc_adapter *sa) 479 { 480 sfc_log_init(sa, "entry"); 481 482 SFC_ASSERT(sfc_adapter_is_locked(sa)); 483 484 switch (sa->state) { 485 case SFC_ADAPTER_STARTED: 486 break; 487 case SFC_ADAPTER_CONFIGURED: 488 sfc_notice(sa, "already stopped"); 489 return; 490 default: 491 sfc_err(sa, "stop in unexpected state %u", sa->state); 492 SFC_ASSERT(B_FALSE); 493 return; 494 } 495 496 sa->state = SFC_ADAPTER_STOPPING; 497 498 sfc_flow_stop(sa); 499 sfc_tx_stop(sa); 500 sfc_rx_stop(sa); 501 sfc_port_stop(sa); 502 sfc_ev_stop(sa); 503 sfc_intr_stop(sa); 504 efx_nic_fini(sa->nic); 505 506 sa->state = SFC_ADAPTER_CONFIGURED; 507 sfc_log_init(sa, "done"); 508 } 509 510 static int 511 sfc_restart(struct sfc_adapter *sa) 512 { 513 int rc; 514 515 SFC_ASSERT(sfc_adapter_is_locked(sa)); 516 517 if (sa->state != SFC_ADAPTER_STARTED) 518 return EINVAL; 519 520 sfc_stop(sa); 521 522 rc = sfc_start(sa); 523 if (rc != 0) 524 sfc_err(sa, "restart failed"); 525 526 return rc; 527 } 528 529 static void 530 sfc_restart_if_required(void *arg) 531 { 532 struct sfc_adapter *sa = arg; 533 534 /* If restart is scheduled, clear the flag and do it */ 535 if (rte_atomic32_cmpset((volatile uint32_t *)&sa->restart_required, 536 1, 0)) { 537 sfc_adapter_lock(sa); 538 if (sa->state == SFC_ADAPTER_STARTED) 539 (void)sfc_restart(sa); 540 sfc_adapter_unlock(sa); 541 } 542 } 543 544 void 545 sfc_schedule_restart(struct sfc_adapter *sa) 546 { 547 int rc; 548 549 /* Schedule restart alarm if it is not scheduled yet */ 550 if (!rte_atomic32_test_and_set(&sa->restart_required)) 551 return; 552 553 rc = rte_eal_alarm_set(1, sfc_restart_if_required, sa); 554 if (rc == -ENOTSUP) 555 sfc_warn(sa, "alarms are not supported, restart is pending"); 556 else if (rc != 0) 557 sfc_err(sa, "cannot arm restart alarm (rc=%d)", rc); 558 else 559 sfc_notice(sa, "restart scheduled"); 560 } 561 562 int 563 sfc_configure(struct sfc_adapter *sa) 564 { 565 int rc; 566 567 sfc_log_init(sa, "entry"); 568 569 SFC_ASSERT(sfc_adapter_is_locked(sa)); 570 571 SFC_ASSERT(sa->state == SFC_ADAPTER_INITIALIZED || 572 sa->state == SFC_ADAPTER_CONFIGURED); 573 sa->state = SFC_ADAPTER_CONFIGURING; 574 575 rc = sfc_check_conf(sa); 576 if (rc != 0) 577 goto fail_check_conf; 578 579 rc = sfc_intr_configure(sa); 580 if (rc != 0) 581 goto fail_intr_configure; 582 583 rc = sfc_port_configure(sa); 584 if (rc != 0) 585 goto fail_port_configure; 586 587 rc = sfc_rx_configure(sa); 588 if (rc != 0) 589 goto fail_rx_configure; 590 591 rc = sfc_tx_configure(sa); 592 if (rc != 0) 593 goto fail_tx_configure; 594 595 sa->state = SFC_ADAPTER_CONFIGURED; 596 sfc_log_init(sa, "done"); 597 return 0; 598 599 fail_tx_configure: 600 sfc_rx_close(sa); 601 602 fail_rx_configure: 603 sfc_port_close(sa); 604 605 fail_port_configure: 606 sfc_intr_close(sa); 607 608 fail_intr_configure: 609 fail_check_conf: 610 sa->state = SFC_ADAPTER_INITIALIZED; 611 sfc_log_init(sa, "failed %d", rc); 612 return rc; 613 } 614 615 void 616 sfc_close(struct sfc_adapter *sa) 617 { 618 sfc_log_init(sa, "entry"); 619 620 SFC_ASSERT(sfc_adapter_is_locked(sa)); 621 622 SFC_ASSERT(sa->state == SFC_ADAPTER_CONFIGURED); 623 sa->state = SFC_ADAPTER_CLOSING; 624 625 sfc_tx_close(sa); 626 sfc_rx_close(sa); 627 sfc_port_close(sa); 628 sfc_intr_close(sa); 629 630 sa->state = SFC_ADAPTER_INITIALIZED; 631 sfc_log_init(sa, "done"); 632 } 633 634 static efx_rc_t 635 sfc_find_mem_bar(efsys_pci_config_t *configp, int bar_index, 636 efsys_bar_t *barp) 637 { 638 efsys_bar_t result; 639 struct rte_pci_device *dev; 640 641 memset(&result, 0, sizeof(result)); 642 643 if (bar_index < 0 || bar_index >= PCI_MAX_RESOURCE) 644 return EINVAL; 645 646 dev = configp->espc_dev; 647 648 result.esb_rid = bar_index; 649 result.esb_dev = dev; 650 result.esb_base = dev->mem_resource[bar_index].addr; 651 652 *barp = result; 653 654 return 0; 655 } 656 657 static int 658 sfc_mem_bar_init(struct sfc_adapter *sa, const efx_bar_region_t *mem_ebrp) 659 { 660 struct rte_eth_dev *eth_dev = sa->eth_dev; 661 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 662 efsys_bar_t *ebp = &sa->mem_bar; 663 struct rte_mem_resource *res = 664 &pci_dev->mem_resource[mem_ebrp->ebr_index]; 665 666 SFC_BAR_LOCK_INIT(ebp, eth_dev->data->name); 667 ebp->esb_rid = mem_ebrp->ebr_index; 668 ebp->esb_dev = pci_dev; 669 ebp->esb_base = res->addr; 670 671 sa->fcw_offset = mem_ebrp->ebr_offset; 672 673 return 0; 674 } 675 676 static void 677 sfc_mem_bar_fini(struct sfc_adapter *sa) 678 { 679 efsys_bar_t *ebp = &sa->mem_bar; 680 681 SFC_BAR_LOCK_DESTROY(ebp); 682 memset(ebp, 0, sizeof(*ebp)); 683 } 684 685 /* 686 * A fixed RSS key which has a property of being symmetric 687 * (symmetrical flows are distributed to the same CPU) 688 * and also known to give a uniform distribution 689 * (a good distribution of traffic between different CPUs) 690 */ 691 static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = { 692 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 693 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 694 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 695 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 696 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 697 }; 698 699 static int 700 sfc_rss_attach(struct sfc_adapter *sa) 701 { 702 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 703 int rc; 704 705 rc = efx_intr_init(sa->nic, sa->intr.type, NULL); 706 if (rc != 0) 707 goto fail_intr_init; 708 709 rc = efx_ev_init(sa->nic); 710 if (rc != 0) 711 goto fail_ev_init; 712 713 rc = efx_rx_init(sa->nic); 714 if (rc != 0) 715 goto fail_rx_init; 716 717 rc = efx_rx_scale_default_support_get(sa->nic, &rss->context_type); 718 if (rc != 0) 719 goto fail_scale_support_get; 720 721 rc = efx_rx_hash_default_support_get(sa->nic, &rss->hash_support); 722 if (rc != 0) 723 goto fail_hash_support_get; 724 725 rc = sfc_rx_hash_init(sa); 726 if (rc != 0) 727 goto fail_rx_hash_init; 728 729 efx_rx_fini(sa->nic); 730 efx_ev_fini(sa->nic); 731 efx_intr_fini(sa->nic); 732 733 rte_memcpy(rss->key, default_rss_key, sizeof(rss->key)); 734 rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT; 735 736 return 0; 737 738 fail_rx_hash_init: 739 fail_hash_support_get: 740 fail_scale_support_get: 741 efx_rx_fini(sa->nic); 742 743 fail_rx_init: 744 efx_ev_fini(sa->nic); 745 746 fail_ev_init: 747 efx_intr_fini(sa->nic); 748 749 fail_intr_init: 750 return rc; 751 } 752 753 static void 754 sfc_rss_detach(struct sfc_adapter *sa) 755 { 756 sfc_rx_hash_fini(sa); 757 } 758 759 int 760 sfc_attach(struct sfc_adapter *sa) 761 { 762 const efx_nic_cfg_t *encp; 763 efx_nic_t *enp = sa->nic; 764 int rc; 765 766 sfc_log_init(sa, "entry"); 767 768 SFC_ASSERT(sfc_adapter_is_locked(sa)); 769 770 efx_mcdi_new_epoch(enp); 771 772 sfc_log_init(sa, "reset nic"); 773 rc = efx_nic_reset(enp); 774 if (rc != 0) 775 goto fail_nic_reset; 776 777 rc = sfc_sriov_attach(sa); 778 if (rc != 0) 779 goto fail_sriov_attach; 780 781 /* 782 * Probed NIC is sufficient for tunnel init. 783 * Initialize tunnel support to be able to use libefx 784 * efx_tunnel_config_udp_{add,remove}() in any state and 785 * efx_tunnel_reconfigure() on start up. 786 */ 787 rc = efx_tunnel_init(enp); 788 if (rc != 0) 789 goto fail_tunnel_init; 790 791 encp = efx_nic_cfg_get(sa->nic); 792 793 /* 794 * Make a copy of supported tunnel encapsulations in shared 795 * memory to be used on supported Rx packet type classes get. 796 */ 797 sa->priv.shared->tunnel_encaps = 798 encp->enc_tunnel_encapsulations_supported; 799 800 if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) { 801 sa->tso = encp->enc_fw_assisted_tso_v2_enabled || 802 encp->enc_tso_v3_enabled; 803 if (!sa->tso) 804 sfc_info(sa, "TSO support isn't available on this adapter"); 805 } 806 807 if (sa->tso && 808 (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & 809 (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 810 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) != 0) { 811 sa->tso_encap = encp->enc_fw_assisted_tso_v2_encap_enabled || 812 encp->enc_tso_v3_enabled; 813 if (!sa->tso_encap) 814 sfc_info(sa, "Encapsulated TSO support isn't available on this adapter"); 815 } 816 817 sfc_log_init(sa, "estimate resource limits"); 818 rc = sfc_estimate_resource_limits(sa); 819 if (rc != 0) 820 goto fail_estimate_rsrc_limits; 821 822 sa->evq_max_entries = encp->enc_evq_max_nevs; 823 SFC_ASSERT(rte_is_power_of_2(sa->evq_max_entries)); 824 825 sa->evq_min_entries = encp->enc_evq_min_nevs; 826 SFC_ASSERT(rte_is_power_of_2(sa->evq_min_entries)); 827 828 sa->rxq_max_entries = encp->enc_rxq_max_ndescs; 829 SFC_ASSERT(rte_is_power_of_2(sa->rxq_max_entries)); 830 831 sa->rxq_min_entries = encp->enc_rxq_min_ndescs; 832 SFC_ASSERT(rte_is_power_of_2(sa->rxq_min_entries)); 833 834 sa->txq_max_entries = encp->enc_txq_max_ndescs; 835 SFC_ASSERT(rte_is_power_of_2(sa->txq_max_entries)); 836 837 sa->txq_min_entries = encp->enc_txq_min_ndescs; 838 SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries)); 839 840 rc = sfc_intr_attach(sa); 841 if (rc != 0) 842 goto fail_intr_attach; 843 844 rc = sfc_ev_attach(sa); 845 if (rc != 0) 846 goto fail_ev_attach; 847 848 rc = sfc_port_attach(sa); 849 if (rc != 0) 850 goto fail_port_attach; 851 852 rc = sfc_rss_attach(sa); 853 if (rc != 0) 854 goto fail_rss_attach; 855 856 rc = sfc_filter_attach(sa); 857 if (rc != 0) 858 goto fail_filter_attach; 859 860 sfc_log_init(sa, "fini nic"); 861 efx_nic_fini(enp); 862 863 sfc_flow_init(sa); 864 865 /* 866 * Create vSwitch to be able to use VFs when PF is not started yet 867 * as DPDK port. VFs should be able to talk to each other even 868 * if PF is down. 869 */ 870 rc = sfc_sriov_vswitch_create(sa); 871 if (rc != 0) 872 goto fail_sriov_vswitch_create; 873 874 sa->state = SFC_ADAPTER_INITIALIZED; 875 876 sfc_log_init(sa, "done"); 877 return 0; 878 879 fail_sriov_vswitch_create: 880 sfc_flow_fini(sa); 881 sfc_filter_detach(sa); 882 883 fail_filter_attach: 884 sfc_rss_detach(sa); 885 886 fail_rss_attach: 887 sfc_port_detach(sa); 888 889 fail_port_attach: 890 sfc_ev_detach(sa); 891 892 fail_ev_attach: 893 sfc_intr_detach(sa); 894 895 fail_intr_attach: 896 efx_nic_fini(sa->nic); 897 898 fail_estimate_rsrc_limits: 899 fail_tunnel_init: 900 efx_tunnel_fini(sa->nic); 901 sfc_sriov_detach(sa); 902 903 fail_sriov_attach: 904 fail_nic_reset: 905 906 sfc_log_init(sa, "failed %d", rc); 907 return rc; 908 } 909 910 void 911 sfc_detach(struct sfc_adapter *sa) 912 { 913 sfc_log_init(sa, "entry"); 914 915 SFC_ASSERT(sfc_adapter_is_locked(sa)); 916 917 sfc_sriov_vswitch_destroy(sa); 918 919 sfc_flow_fini(sa); 920 921 sfc_filter_detach(sa); 922 sfc_rss_detach(sa); 923 sfc_port_detach(sa); 924 sfc_ev_detach(sa); 925 sfc_intr_detach(sa); 926 efx_tunnel_fini(sa->nic); 927 sfc_sriov_detach(sa); 928 929 sa->state = SFC_ADAPTER_UNINITIALIZED; 930 } 931 932 static int 933 sfc_kvarg_fv_variant_handler(__rte_unused const char *key, 934 const char *value_str, void *opaque) 935 { 936 uint32_t *value = opaque; 937 938 if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DONT_CARE) == 0) 939 *value = EFX_FW_VARIANT_DONT_CARE; 940 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_FULL_FEATURED) == 0) 941 *value = EFX_FW_VARIANT_FULL_FEATURED; 942 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_LOW_LATENCY) == 0) 943 *value = EFX_FW_VARIANT_LOW_LATENCY; 944 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_PACKED_STREAM) == 0) 945 *value = EFX_FW_VARIANT_PACKED_STREAM; 946 else if (strcasecmp(value_str, SFC_KVARG_FW_VARIANT_DPDK) == 0) 947 *value = EFX_FW_VARIANT_DPDK; 948 else 949 return -EINVAL; 950 951 return 0; 952 } 953 954 static int 955 sfc_get_fw_variant(struct sfc_adapter *sa, efx_fw_variant_t *efv) 956 { 957 efx_nic_fw_info_t enfi; 958 int rc; 959 960 rc = efx_nic_get_fw_version(sa->nic, &enfi); 961 if (rc != 0) 962 return rc; 963 else if (!enfi.enfi_dpcpu_fw_ids_valid) 964 return ENOTSUP; 965 966 /* 967 * Firmware variant can be uniquely identified by the RxDPCPU 968 * firmware id 969 */ 970 switch (enfi.enfi_rx_dpcpu_fw_id) { 971 case EFX_RXDP_FULL_FEATURED_FW_ID: 972 *efv = EFX_FW_VARIANT_FULL_FEATURED; 973 break; 974 975 case EFX_RXDP_LOW_LATENCY_FW_ID: 976 *efv = EFX_FW_VARIANT_LOW_LATENCY; 977 break; 978 979 case EFX_RXDP_PACKED_STREAM_FW_ID: 980 *efv = EFX_FW_VARIANT_PACKED_STREAM; 981 break; 982 983 case EFX_RXDP_DPDK_FW_ID: 984 *efv = EFX_FW_VARIANT_DPDK; 985 break; 986 987 default: 988 /* 989 * Other firmware variants are not considered, since they are 990 * not supported in the device parameters 991 */ 992 *efv = EFX_FW_VARIANT_DONT_CARE; 993 break; 994 } 995 996 return 0; 997 } 998 999 static const char * 1000 sfc_fw_variant2str(efx_fw_variant_t efv) 1001 { 1002 switch (efv) { 1003 case EFX_RXDP_FULL_FEATURED_FW_ID: 1004 return SFC_KVARG_FW_VARIANT_FULL_FEATURED; 1005 case EFX_RXDP_LOW_LATENCY_FW_ID: 1006 return SFC_KVARG_FW_VARIANT_LOW_LATENCY; 1007 case EFX_RXDP_PACKED_STREAM_FW_ID: 1008 return SFC_KVARG_FW_VARIANT_PACKED_STREAM; 1009 case EFX_RXDP_DPDK_FW_ID: 1010 return SFC_KVARG_FW_VARIANT_DPDK; 1011 default: 1012 return "unknown"; 1013 } 1014 } 1015 1016 static int 1017 sfc_kvarg_rxd_wait_timeout_ns(struct sfc_adapter *sa) 1018 { 1019 int rc; 1020 long value; 1021 1022 value = SFC_RXD_WAIT_TIMEOUT_NS_DEF; 1023 1024 rc = sfc_kvargs_process(sa, SFC_KVARG_RXD_WAIT_TIMEOUT_NS, 1025 sfc_kvarg_long_handler, &value); 1026 if (rc != 0) 1027 return rc; 1028 1029 if (value < 0 || 1030 (unsigned long)value > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) { 1031 sfc_err(sa, "wrong '" SFC_KVARG_RXD_WAIT_TIMEOUT_NS "' " 1032 "was set (%ld);", value); 1033 sfc_err(sa, "it must not be less than 0 or greater than %u", 1034 EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX); 1035 return EINVAL; 1036 } 1037 1038 sa->rxd_wait_timeout_ns = value; 1039 return 0; 1040 } 1041 1042 static int 1043 sfc_nic_probe(struct sfc_adapter *sa) 1044 { 1045 efx_nic_t *enp = sa->nic; 1046 efx_fw_variant_t preferred_efv; 1047 efx_fw_variant_t efv; 1048 int rc; 1049 1050 preferred_efv = EFX_FW_VARIANT_DONT_CARE; 1051 rc = sfc_kvargs_process(sa, SFC_KVARG_FW_VARIANT, 1052 sfc_kvarg_fv_variant_handler, 1053 &preferred_efv); 1054 if (rc != 0) { 1055 sfc_err(sa, "invalid %s parameter value", SFC_KVARG_FW_VARIANT); 1056 return rc; 1057 } 1058 1059 rc = sfc_kvarg_rxd_wait_timeout_ns(sa); 1060 if (rc != 0) 1061 return rc; 1062 1063 rc = efx_nic_probe(enp, preferred_efv); 1064 if (rc == EACCES) { 1065 /* Unprivileged functions cannot set FW variant */ 1066 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE); 1067 } 1068 if (rc != 0) 1069 return rc; 1070 1071 rc = sfc_get_fw_variant(sa, &efv); 1072 if (rc == ENOTSUP) { 1073 sfc_warn(sa, "FW variant can not be obtained"); 1074 return 0; 1075 } 1076 if (rc != 0) 1077 return rc; 1078 1079 /* Check that firmware variant was changed to the requested one */ 1080 if (preferred_efv != EFX_FW_VARIANT_DONT_CARE && preferred_efv != efv) { 1081 sfc_warn(sa, "FW variant has not changed to the requested %s", 1082 sfc_fw_variant2str(preferred_efv)); 1083 } 1084 1085 sfc_notice(sa, "running FW variant is %s", sfc_fw_variant2str(efv)); 1086 1087 return 0; 1088 } 1089 1090 static efx_rc_t 1091 sfc_pci_config_readd(efsys_pci_config_t *configp, uint32_t offset, 1092 efx_dword_t *edp) 1093 { 1094 int rc; 1095 1096 rc = rte_pci_read_config(configp->espc_dev, edp->ed_u32, sizeof(*edp), 1097 offset); 1098 1099 return (rc < 0 || rc != sizeof(*edp)) ? EIO : 0; 1100 } 1101 1102 static int 1103 sfc_family(struct sfc_adapter *sa, efx_bar_region_t *mem_ebrp) 1104 { 1105 struct rte_eth_dev *eth_dev = sa->eth_dev; 1106 struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); 1107 efsys_pci_config_t espcp; 1108 static const efx_pci_ops_t ops = { 1109 .epo_config_readd = sfc_pci_config_readd, 1110 .epo_find_mem_bar = sfc_find_mem_bar, 1111 }; 1112 int rc; 1113 1114 espcp.espc_dev = pci_dev; 1115 1116 rc = efx_family_probe_bar(pci_dev->id.vendor_id, 1117 pci_dev->id.device_id, 1118 &espcp, &ops, &sa->family, mem_ebrp); 1119 1120 return rc; 1121 } 1122 1123 int 1124 sfc_probe(struct sfc_adapter *sa) 1125 { 1126 efx_bar_region_t mem_ebrp; 1127 efx_nic_t *enp; 1128 int rc; 1129 1130 sfc_log_init(sa, "entry"); 1131 1132 SFC_ASSERT(sfc_adapter_is_locked(sa)); 1133 1134 sa->socket_id = rte_socket_id(); 1135 rte_atomic32_init(&sa->restart_required); 1136 1137 sfc_log_init(sa, "get family"); 1138 rc = sfc_family(sa, &mem_ebrp); 1139 if (rc != 0) 1140 goto fail_family; 1141 sfc_log_init(sa, 1142 "family is %u, membar is %u, function control window offset is %lu", 1143 sa->family, mem_ebrp.ebr_index, mem_ebrp.ebr_offset); 1144 1145 sfc_log_init(sa, "init mem bar"); 1146 rc = sfc_mem_bar_init(sa, &mem_ebrp); 1147 if (rc != 0) 1148 goto fail_mem_bar_init; 1149 1150 sfc_log_init(sa, "create nic"); 1151 rte_spinlock_init(&sa->nic_lock); 1152 rc = efx_nic_create(sa->family, (efsys_identifier_t *)sa, 1153 &sa->mem_bar, mem_ebrp.ebr_offset, 1154 &sa->nic_lock, &enp); 1155 if (rc != 0) 1156 goto fail_nic_create; 1157 sa->nic = enp; 1158 1159 rc = sfc_mcdi_init(sa); 1160 if (rc != 0) 1161 goto fail_mcdi_init; 1162 1163 sfc_log_init(sa, "probe nic"); 1164 rc = sfc_nic_probe(sa); 1165 if (rc != 0) 1166 goto fail_nic_probe; 1167 1168 sfc_log_init(sa, "done"); 1169 return 0; 1170 1171 fail_nic_probe: 1172 sfc_mcdi_fini(sa); 1173 1174 fail_mcdi_init: 1175 sfc_log_init(sa, "destroy nic"); 1176 sa->nic = NULL; 1177 efx_nic_destroy(enp); 1178 1179 fail_nic_create: 1180 sfc_mem_bar_fini(sa); 1181 1182 fail_mem_bar_init: 1183 fail_family: 1184 sfc_log_init(sa, "failed %d", rc); 1185 return rc; 1186 } 1187 1188 void 1189 sfc_unprobe(struct sfc_adapter *sa) 1190 { 1191 efx_nic_t *enp = sa->nic; 1192 1193 sfc_log_init(sa, "entry"); 1194 1195 SFC_ASSERT(sfc_adapter_is_locked(sa)); 1196 1197 sfc_log_init(sa, "unprobe nic"); 1198 efx_nic_unprobe(enp); 1199 1200 sfc_mcdi_fini(sa); 1201 1202 /* 1203 * Make sure there is no pending alarm to restart since we are 1204 * going to free device private which is passed as the callback 1205 * opaque data. A new alarm cannot be scheduled since MCDI is 1206 * shut down. 1207 */ 1208 rte_eal_alarm_cancel(sfc_restart_if_required, sa); 1209 1210 sfc_log_init(sa, "destroy nic"); 1211 sa->nic = NULL; 1212 efx_nic_destroy(enp); 1213 1214 sfc_mem_bar_fini(sa); 1215 1216 sfc_flow_fini(sa); 1217 sa->state = SFC_ADAPTER_UNINITIALIZED; 1218 } 1219 1220 uint32_t 1221 sfc_register_logtype(const struct rte_pci_addr *pci_addr, 1222 const char *lt_prefix_str, uint32_t ll_default) 1223 { 1224 size_t lt_prefix_str_size = strlen(lt_prefix_str); 1225 size_t lt_str_size_max; 1226 char *lt_str = NULL; 1227 int ret; 1228 1229 if (SIZE_MAX - PCI_PRI_STR_SIZE - 1 > lt_prefix_str_size) { 1230 ++lt_prefix_str_size; /* Reserve space for prefix separator */ 1231 lt_str_size_max = lt_prefix_str_size + PCI_PRI_STR_SIZE + 1; 1232 } else { 1233 return sfc_logtype_driver; 1234 } 1235 1236 lt_str = rte_zmalloc("logtype_str", lt_str_size_max, 0); 1237 if (lt_str == NULL) 1238 return sfc_logtype_driver; 1239 1240 strncpy(lt_str, lt_prefix_str, lt_prefix_str_size); 1241 lt_str[lt_prefix_str_size - 1] = '.'; 1242 rte_pci_device_name(pci_addr, lt_str + lt_prefix_str_size, 1243 lt_str_size_max - lt_prefix_str_size); 1244 lt_str[lt_str_size_max - 1] = '\0'; 1245 1246 ret = rte_log_register_type_and_pick_level(lt_str, ll_default); 1247 rte_free(lt_str); 1248 1249 if (ret < 0) 1250 return sfc_logtype_driver; 1251 1252 return ret; 1253 } 1254