1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include "sfc.h" 11 #include "sfc_debug.h" 12 #include "sfc_log.h" 13 #include "sfc_ev.h" 14 #include "sfc_tx.h" 15 #include "sfc_tweak.h" 16 #include "sfc_kvargs.h" 17 18 /* 19 * Maximum number of TX queue flush attempts in case of 20 * failure or flush timeout 21 */ 22 #define SFC_TX_QFLUSH_ATTEMPTS (3) 23 24 /* 25 * Time to wait between event queue polling attempts when waiting for TX 26 * queue flush done or flush failed events 27 */ 28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 29 30 /* 31 * Maximum number of event queue polling attempts when waiting for TX queue 32 * flush done or flush failed events; it defines TX queue flush attempt timeout 33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 34 */ 35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 36 37 uint64_t 38 sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa) 39 { 40 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 41 uint64_t caps = 0; 42 43 if ((sa->priv.dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) && 44 encp->enc_hw_tx_insert_vlan_enabled) 45 caps |= DEV_TX_OFFLOAD_VLAN_INSERT; 46 47 if (sa->priv.dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG) 48 caps |= DEV_TX_OFFLOAD_MULTI_SEGS; 49 50 if ((~sa->priv.dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL) && 51 (~sa->priv.dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) 52 caps |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; 53 54 return caps; 55 } 56 57 uint64_t 58 sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa) 59 { 60 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 61 uint64_t caps = 0; 62 63 caps |= DEV_TX_OFFLOAD_IPV4_CKSUM; 64 caps |= DEV_TX_OFFLOAD_UDP_CKSUM; 65 caps |= DEV_TX_OFFLOAD_TCP_CKSUM; 66 67 if (encp->enc_tunnel_encapsulations_supported) 68 caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 69 70 if (sa->tso) 71 caps |= DEV_TX_OFFLOAD_TCP_TSO; 72 73 if (sa->tso_encap) 74 caps |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 75 DEV_TX_OFFLOAD_GENEVE_TNL_TSO); 76 77 return caps; 78 } 79 80 static int 81 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level, 82 const struct rte_eth_txconf *tx_conf, 83 uint64_t offloads) 84 { 85 int rc = 0; 86 87 if (tx_conf->tx_rs_thresh != 0) { 88 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 89 rc = EINVAL; 90 } 91 92 if (tx_conf->tx_free_thresh > txq_max_fill_level) { 93 sfc_err(sa, 94 "TxQ free threshold too large: %u vs maximum %u", 95 tx_conf->tx_free_thresh, txq_max_fill_level); 96 rc = EINVAL; 97 } 98 99 if (tx_conf->tx_thresh.pthresh != 0 || 100 tx_conf->tx_thresh.hthresh != 0 || 101 tx_conf->tx_thresh.wthresh != 0) { 102 sfc_warn(sa, 103 "prefetch/host/writeback thresholds are not supported"); 104 } 105 106 /* We either perform both TCP and UDP offload, or no offload at all */ 107 if (((offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) != 108 ((offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) { 109 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 110 rc = EINVAL; 111 } 112 113 return rc; 114 } 115 116 void 117 sfc_tx_qflush_done(struct sfc_txq_info *txq_info) 118 { 119 txq_info->state |= SFC_TXQ_FLUSHED; 120 txq_info->state &= ~SFC_TXQ_FLUSHING; 121 } 122 123 int 124 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 125 uint16_t nb_tx_desc, unsigned int socket_id, 126 const struct rte_eth_txconf *tx_conf) 127 { 128 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 129 unsigned int txq_entries; 130 unsigned int evq_entries; 131 unsigned int txq_max_fill_level; 132 struct sfc_txq_info *txq_info; 133 struct sfc_evq *evq; 134 struct sfc_txq *txq; 135 int rc = 0; 136 struct sfc_dp_tx_qcreate_info info; 137 uint64_t offloads; 138 struct sfc_dp_tx_hw_limits hw_limits; 139 140 sfc_log_init(sa, "TxQ = %u", sw_index); 141 142 memset(&hw_limits, 0, sizeof(hw_limits)); 143 hw_limits.txq_max_entries = sa->txq_max_entries; 144 hw_limits.txq_min_entries = sa->txq_min_entries; 145 146 rc = sa->priv.dp_tx->qsize_up_rings(nb_tx_desc, &hw_limits, 147 &txq_entries, &evq_entries, 148 &txq_max_fill_level); 149 if (rc != 0) 150 goto fail_size_up_rings; 151 SFC_ASSERT(txq_entries >= sa->txq_min_entries); 152 SFC_ASSERT(txq_entries <= sa->txq_max_entries); 153 SFC_ASSERT(txq_entries >= nb_tx_desc); 154 SFC_ASSERT(txq_max_fill_level <= nb_tx_desc); 155 156 offloads = tx_conf->offloads | 157 sa->eth_dev->data->dev_conf.txmode.offloads; 158 rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf, offloads); 159 if (rc != 0) 160 goto fail_bad_conf; 161 162 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 163 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 164 165 txq_info->entries = txq_entries; 166 167 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 168 evq_entries, socket_id, &evq); 169 if (rc != 0) 170 goto fail_ev_qinit; 171 172 txq = &sa->txq_ctrl[sw_index]; 173 txq->hw_index = sw_index; 174 txq->evq = evq; 175 txq_info->free_thresh = 176 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 177 SFC_TX_DEFAULT_FREE_THRESH; 178 txq_info->offloads = offloads; 179 180 rc = sfc_dma_alloc(sa, "txq", sw_index, 181 efx_txq_size(sa->nic, txq_info->entries), 182 socket_id, &txq->mem); 183 if (rc != 0) 184 goto fail_dma_alloc; 185 186 memset(&info, 0, sizeof(info)); 187 info.max_fill_level = txq_max_fill_level; 188 info.free_thresh = txq_info->free_thresh; 189 info.offloads = offloads; 190 info.txq_entries = txq_info->entries; 191 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 192 info.txq_hw_ring = txq->mem.esm_base; 193 info.evq_entries = evq_entries; 194 info.evq_hw_ring = evq->mem.esm_base; 195 info.hw_index = txq->hw_index; 196 info.mem_bar = sa->mem_bar.esb_base; 197 info.vi_window_shift = encp->enc_vi_window_shift; 198 info.tso_tcp_header_offset_limit = 199 encp->enc_tx_tso_tcp_header_offset_limit; 200 201 rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 202 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 203 socket_id, &info, &txq_info->dp); 204 if (rc != 0) 205 goto fail_dp_tx_qinit; 206 207 evq->dp_txq = txq_info->dp; 208 209 txq_info->state = SFC_TXQ_INITIALIZED; 210 211 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 212 213 return 0; 214 215 fail_dp_tx_qinit: 216 sfc_dma_free(sa, &txq->mem); 217 218 fail_dma_alloc: 219 sfc_ev_qfini(evq); 220 221 fail_ev_qinit: 222 txq_info->entries = 0; 223 224 fail_bad_conf: 225 fail_size_up_rings: 226 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 227 return rc; 228 } 229 230 void 231 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 232 { 233 struct sfc_txq_info *txq_info; 234 struct sfc_txq *txq; 235 236 sfc_log_init(sa, "TxQ = %u", sw_index); 237 238 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 239 sa->eth_dev->data->tx_queues[sw_index] = NULL; 240 241 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 242 243 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 244 245 sa->priv.dp_tx->qdestroy(txq_info->dp); 246 txq_info->dp = NULL; 247 248 txq_info->state &= ~SFC_TXQ_INITIALIZED; 249 txq_info->entries = 0; 250 251 txq = &sa->txq_ctrl[sw_index]; 252 253 sfc_dma_free(sa, &txq->mem); 254 255 sfc_ev_qfini(txq->evq); 256 txq->evq = NULL; 257 } 258 259 static int 260 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 261 { 262 sfc_log_init(sa, "TxQ = %u", sw_index); 263 264 return 0; 265 } 266 267 static int 268 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 269 { 270 int rc = 0; 271 272 switch (txmode->mq_mode) { 273 case ETH_MQ_TX_NONE: 274 break; 275 default: 276 sfc_err(sa, "Tx multi-queue mode %u not supported", 277 txmode->mq_mode); 278 rc = EINVAL; 279 } 280 281 /* 282 * These features are claimed to be i40e-specific, 283 * but it does make sense to double-check their absence 284 */ 285 if (txmode->hw_vlan_reject_tagged) { 286 sfc_err(sa, "Rejecting tagged packets not supported"); 287 rc = EINVAL; 288 } 289 290 if (txmode->hw_vlan_reject_untagged) { 291 sfc_err(sa, "Rejecting untagged packets not supported"); 292 rc = EINVAL; 293 } 294 295 if (txmode->hw_vlan_insert_pvid) { 296 sfc_err(sa, "Port-based VLAN insertion not supported"); 297 rc = EINVAL; 298 } 299 300 return rc; 301 } 302 303 /** 304 * Destroy excess queues that are no longer needed after reconfiguration 305 * or complete close. 306 */ 307 static void 308 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 309 { 310 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 311 int sw_index; 312 313 SFC_ASSERT(nb_tx_queues <= sas->txq_count); 314 315 sw_index = sas->txq_count; 316 while (--sw_index >= (int)nb_tx_queues) { 317 if (sas->txq_info[sw_index].state & SFC_TXQ_INITIALIZED) 318 sfc_tx_qfini(sa, sw_index); 319 } 320 321 sas->txq_count = nb_tx_queues; 322 } 323 324 int 325 sfc_tx_configure(struct sfc_adapter *sa) 326 { 327 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 328 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 329 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 330 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 331 int rc = 0; 332 333 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 334 nb_tx_queues, sas->txq_count); 335 336 /* 337 * The datapath implementation assumes absence of boundary 338 * limits on Tx DMA descriptors. Addition of these checks on 339 * datapath would simply make the datapath slower. 340 */ 341 if (encp->enc_tx_dma_desc_boundary != 0) { 342 rc = ENOTSUP; 343 goto fail_tx_dma_desc_boundary; 344 } 345 346 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 347 if (rc != 0) 348 goto fail_check_mode; 349 350 if (nb_tx_queues == sas->txq_count) 351 goto done; 352 353 if (sas->txq_info == NULL) { 354 sas->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 355 sizeof(sas->txq_info[0]), 0, 356 sa->socket_id); 357 if (sas->txq_info == NULL) 358 goto fail_txqs_alloc; 359 360 /* 361 * Allocate primary process only TxQ control from heap 362 * since it should not be shared. 363 */ 364 rc = ENOMEM; 365 sa->txq_ctrl = calloc(nb_tx_queues, sizeof(sa->txq_ctrl[0])); 366 if (sa->txq_ctrl == NULL) 367 goto fail_txqs_ctrl_alloc; 368 } else { 369 struct sfc_txq_info *new_txq_info; 370 struct sfc_txq *new_txq_ctrl; 371 372 if (nb_tx_queues < sas->txq_count) 373 sfc_tx_fini_queues(sa, nb_tx_queues); 374 375 new_txq_info = 376 rte_realloc(sas->txq_info, 377 nb_tx_queues * sizeof(sas->txq_info[0]), 0); 378 if (new_txq_info == NULL && nb_tx_queues > 0) 379 goto fail_txqs_realloc; 380 381 new_txq_ctrl = realloc(sa->txq_ctrl, 382 nb_tx_queues * sizeof(sa->txq_ctrl[0])); 383 if (new_txq_ctrl == NULL && nb_tx_queues > 0) 384 goto fail_txqs_ctrl_realloc; 385 386 sas->txq_info = new_txq_info; 387 sa->txq_ctrl = new_txq_ctrl; 388 if (nb_tx_queues > sas->txq_count) { 389 memset(&sas->txq_info[sas->txq_count], 0, 390 (nb_tx_queues - sas->txq_count) * 391 sizeof(sas->txq_info[0])); 392 memset(&sa->txq_ctrl[sas->txq_count], 0, 393 (nb_tx_queues - sas->txq_count) * 394 sizeof(sa->txq_ctrl[0])); 395 } 396 } 397 398 while (sas->txq_count < nb_tx_queues) { 399 rc = sfc_tx_qinit_info(sa, sas->txq_count); 400 if (rc != 0) 401 goto fail_tx_qinit_info; 402 403 sas->txq_count++; 404 } 405 406 done: 407 return 0; 408 409 fail_tx_qinit_info: 410 fail_txqs_ctrl_realloc: 411 fail_txqs_realloc: 412 fail_txqs_ctrl_alloc: 413 fail_txqs_alloc: 414 sfc_tx_close(sa); 415 416 fail_check_mode: 417 fail_tx_dma_desc_boundary: 418 sfc_log_init(sa, "failed (rc = %d)", rc); 419 return rc; 420 } 421 422 void 423 sfc_tx_close(struct sfc_adapter *sa) 424 { 425 sfc_tx_fini_queues(sa, 0); 426 427 free(sa->txq_ctrl); 428 sa->txq_ctrl = NULL; 429 430 rte_free(sfc_sa2shared(sa)->txq_info); 431 sfc_sa2shared(sa)->txq_info = NULL; 432 } 433 434 int 435 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 436 { 437 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 438 uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) | 439 sfc_tx_get_queue_offload_caps(sa); 440 struct rte_eth_dev_data *dev_data; 441 struct sfc_txq_info *txq_info; 442 struct sfc_txq *txq; 443 struct sfc_evq *evq; 444 uint16_t flags = 0; 445 unsigned int desc_index; 446 int rc = 0; 447 448 sfc_log_init(sa, "TxQ = %u", sw_index); 449 450 SFC_ASSERT(sw_index < sas->txq_count); 451 txq_info = &sas->txq_info[sw_index]; 452 453 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 454 455 txq = &sa->txq_ctrl[sw_index]; 456 evq = txq->evq; 457 458 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 459 if (rc != 0) 460 goto fail_ev_qstart; 461 462 if (txq_info->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 463 flags |= EFX_TXQ_CKSUM_IPV4; 464 465 if (txq_info->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 466 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 467 468 if ((txq_info->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) || 469 (txq_info->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) { 470 flags |= EFX_TXQ_CKSUM_TCPUDP; 471 472 if (offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 473 flags |= EFX_TXQ_CKSUM_INNER_TCPUDP; 474 } 475 476 if (txq_info->offloads & (DEV_TX_OFFLOAD_TCP_TSO | 477 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 478 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) 479 flags |= EFX_TXQ_FATSOV2; 480 481 rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem, 482 txq_info->entries, 0 /* not used on EF10 */, 483 flags, evq->common, 484 &txq->common, &desc_index); 485 if (rc != 0) { 486 if (sa->tso && (rc == ENOSPC)) 487 sfc_err(sa, "ran out of TSO contexts"); 488 489 goto fail_tx_qcreate; 490 } 491 492 efx_tx_qenable(txq->common); 493 494 txq_info->state |= SFC_TXQ_STARTED; 495 496 rc = sa->priv.dp_tx->qstart(txq_info->dp, evq->read_ptr, desc_index); 497 if (rc != 0) 498 goto fail_dp_qstart; 499 500 /* 501 * It seems to be used by DPDK for debug purposes only ('rte_ether') 502 */ 503 dev_data = sa->eth_dev->data; 504 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 505 506 return 0; 507 508 fail_dp_qstart: 509 txq_info->state = SFC_TXQ_INITIALIZED; 510 efx_tx_qdestroy(txq->common); 511 512 fail_tx_qcreate: 513 sfc_ev_qstop(evq); 514 515 fail_ev_qstart: 516 return rc; 517 } 518 519 void 520 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 521 { 522 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 523 struct rte_eth_dev_data *dev_data; 524 struct sfc_txq_info *txq_info; 525 struct sfc_txq *txq; 526 unsigned int retry_count; 527 unsigned int wait_count; 528 int rc; 529 530 sfc_log_init(sa, "TxQ = %u", sw_index); 531 532 SFC_ASSERT(sw_index < sas->txq_count); 533 txq_info = &sas->txq_info[sw_index]; 534 535 if (txq_info->state == SFC_TXQ_INITIALIZED) 536 return; 537 538 SFC_ASSERT(txq_info->state & SFC_TXQ_STARTED); 539 540 txq = &sa->txq_ctrl[sw_index]; 541 sa->priv.dp_tx->qstop(txq_info->dp, &txq->evq->read_ptr); 542 543 /* 544 * Retry TX queue flushing in case of flush failed or 545 * timeout; in the worst case it can delay for 6 seconds 546 */ 547 for (retry_count = 0; 548 ((txq_info->state & SFC_TXQ_FLUSHED) == 0) && 549 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 550 ++retry_count) { 551 rc = efx_tx_qflush(txq->common); 552 if (rc != 0) { 553 txq_info->state |= (rc == EALREADY) ? 554 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 555 break; 556 } 557 558 /* 559 * Wait for TX queue flush done or flush failed event at least 560 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 561 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 562 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 563 */ 564 wait_count = 0; 565 do { 566 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 567 sfc_ev_qpoll(txq->evq); 568 } while ((txq_info->state & SFC_TXQ_FLUSHING) && 569 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 570 571 if (txq_info->state & SFC_TXQ_FLUSHING) 572 sfc_err(sa, "TxQ %u flush timed out", sw_index); 573 574 if (txq_info->state & SFC_TXQ_FLUSHED) 575 sfc_notice(sa, "TxQ %u flushed", sw_index); 576 } 577 578 sa->priv.dp_tx->qreap(txq_info->dp); 579 580 txq_info->state = SFC_TXQ_INITIALIZED; 581 582 efx_tx_qdestroy(txq->common); 583 584 sfc_ev_qstop(txq->evq); 585 586 /* 587 * It seems to be used by DPDK for debug purposes only ('rte_ether') 588 */ 589 dev_data = sa->eth_dev->data; 590 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 591 } 592 593 int 594 sfc_tx_start(struct sfc_adapter *sa) 595 { 596 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 597 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 598 unsigned int sw_index; 599 int rc = 0; 600 601 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 602 603 if (sa->tso) { 604 if (!encp->enc_fw_assisted_tso_v2_enabled) { 605 sfc_warn(sa, "TSO support was unable to be restored"); 606 sa->tso = B_FALSE; 607 sa->tso_encap = B_FALSE; 608 } 609 } 610 611 if (sa->tso_encap && !encp->enc_fw_assisted_tso_v2_encap_enabled) { 612 sfc_warn(sa, "Encapsulated TSO support was unable to be restored"); 613 sa->tso_encap = B_FALSE; 614 } 615 616 rc = efx_tx_init(sa->nic); 617 if (rc != 0) 618 goto fail_efx_tx_init; 619 620 for (sw_index = 0; sw_index < sas->txq_count; ++sw_index) { 621 if (sas->txq_info[sw_index].state == SFC_TXQ_INITIALIZED && 622 (!(sas->txq_info[sw_index].deferred_start) || 623 sas->txq_info[sw_index].deferred_started)) { 624 rc = sfc_tx_qstart(sa, sw_index); 625 if (rc != 0) 626 goto fail_tx_qstart; 627 } 628 } 629 630 return 0; 631 632 fail_tx_qstart: 633 while (sw_index-- > 0) 634 sfc_tx_qstop(sa, sw_index); 635 636 efx_tx_fini(sa->nic); 637 638 fail_efx_tx_init: 639 sfc_log_init(sa, "failed (rc = %d)", rc); 640 return rc; 641 } 642 643 void 644 sfc_tx_stop(struct sfc_adapter *sa) 645 { 646 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 647 unsigned int sw_index; 648 649 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 650 651 sw_index = sas->txq_count; 652 while (sw_index-- > 0) { 653 if (sas->txq_info[sw_index].state & SFC_TXQ_STARTED) 654 sfc_tx_qstop(sa, sw_index); 655 } 656 657 efx_tx_fini(sa->nic); 658 } 659 660 static void 661 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 662 { 663 unsigned int completed; 664 665 sfc_ev_qpoll(txq->evq); 666 667 for (completed = txq->completed; 668 completed != txq->pending; completed++) { 669 struct sfc_efx_tx_sw_desc *txd; 670 671 txd = &txq->sw_ring[completed & txq->ptr_mask]; 672 673 if (txd->mbuf != NULL) { 674 rte_pktmbuf_free(txd->mbuf); 675 txd->mbuf = NULL; 676 } 677 } 678 679 txq->completed = completed; 680 } 681 682 /* 683 * The function is used to insert or update VLAN tag; 684 * the firmware has state of the firmware tag to insert per TxQ 685 * (controlled by option descriptors), hence, if the tag of the 686 * packet to be sent is different from one remembered by the firmware, 687 * the function will update it 688 */ 689 static unsigned int 690 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 691 efx_desc_t **pend) 692 { 693 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 694 m->vlan_tci : 0); 695 696 if (this_tag == txq->hw_vlan_tci) 697 return 0; 698 699 /* 700 * The expression inside SFC_ASSERT() is not desired to be checked in 701 * a non-debug build because it might be too expensive on the data path 702 */ 703 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 704 705 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 706 *pend); 707 (*pend)++; 708 txq->hw_vlan_tci = this_tag; 709 710 return 1; 711 } 712 713 static uint16_t 714 sfc_efx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 715 uint16_t nb_pkts) 716 { 717 struct sfc_dp_txq *dp_txq = tx_queue; 718 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 719 const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic); 720 uint16_t i; 721 722 for (i = 0; i < nb_pkts; i++) { 723 int ret; 724 725 /* 726 * EFX Tx datapath may require extra VLAN descriptor if VLAN 727 * insertion offload is requested regardless the offload 728 * requested/supported. 729 */ 730 ret = sfc_dp_tx_prepare_pkt(tx_pkts[i], 731 encp->enc_tx_tso_tcp_header_offset_limit, 732 txq->max_fill_level, EFX_TX_FATSOV2_OPT_NDESCS, 733 1); 734 if (unlikely(ret != 0)) { 735 rte_errno = ret; 736 break; 737 } 738 } 739 740 return i; 741 } 742 743 static uint16_t 744 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 745 { 746 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 747 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 748 unsigned int added = txq->added; 749 unsigned int pushed = added; 750 unsigned int pkts_sent = 0; 751 efx_desc_t *pend = &txq->pend_desc[0]; 752 const unsigned int hard_max_fill = txq->max_fill_level; 753 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 754 unsigned int fill_level = added - txq->completed; 755 boolean_t reap_done; 756 int rc __rte_unused; 757 struct rte_mbuf **pktp; 758 759 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 760 goto done; 761 762 /* 763 * If insufficient space for a single packet is present, 764 * we should reap; otherwise, we shouldn't do that all the time 765 * to avoid latency increase 766 */ 767 reap_done = (fill_level > soft_max_fill); 768 769 if (reap_done) { 770 sfc_efx_tx_reap(txq); 771 /* 772 * Recalculate fill level since 'txq->completed' 773 * might have changed on reap 774 */ 775 fill_level = added - txq->completed; 776 } 777 778 for (pkts_sent = 0, pktp = &tx_pkts[0]; 779 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 780 pkts_sent++, pktp++) { 781 uint16_t hw_vlan_tci_prev = txq->hw_vlan_tci; 782 struct rte_mbuf *m_seg = *pktp; 783 size_t pkt_len = m_seg->pkt_len; 784 unsigned int pkt_descs = 0; 785 size_t in_off = 0; 786 787 /* 788 * Here VLAN TCI is expected to be zero in case if no 789 * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised; 790 * if the calling app ignores the absence of 791 * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then 792 * TX_ERROR will occur 793 */ 794 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 795 796 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 797 /* 798 * We expect correct 'pkt->l[2, 3, 4]_len' values 799 * to be set correctly by the caller 800 */ 801 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 802 &pkt_descs, &pkt_len) != 0) { 803 /* We may have reached this place if packet 804 * header linearization is needed but the 805 * header length is greater than 806 * SFC_TSOH_STD_LEN 807 * 808 * We will deceive RTE saying that we have sent 809 * the packet, but we will actually drop it. 810 * Hence, we should revert 'pend' to the 811 * previous state (in case we have added 812 * VLAN descriptor) and start processing 813 * another one packet. But the original 814 * mbuf shouldn't be orphaned 815 */ 816 pend -= pkt_descs; 817 txq->hw_vlan_tci = hw_vlan_tci_prev; 818 819 rte_pktmbuf_free(*pktp); 820 821 continue; 822 } 823 824 /* 825 * We've only added 2 FATSOv2 option descriptors 826 * and 1 descriptor for the linearized packet header. 827 * The outstanding work will be done in the same manner 828 * as for the usual non-TSO path 829 */ 830 } 831 832 for (; m_seg != NULL; m_seg = m_seg->next) { 833 efsys_dma_addr_t next_frag; 834 size_t seg_len; 835 836 seg_len = m_seg->data_len; 837 next_frag = rte_mbuf_data_iova(m_seg); 838 839 /* 840 * If we've started TSO transaction few steps earlier, 841 * we'll skip packet header using an offset in the 842 * current segment (which has been set to the 843 * first one containing payload) 844 */ 845 seg_len -= in_off; 846 next_frag += in_off; 847 in_off = 0; 848 849 do { 850 efsys_dma_addr_t frag_addr = next_frag; 851 size_t frag_len; 852 853 /* 854 * It is assumed here that there is no 855 * limitation on address boundary 856 * crossing by DMA descriptor. 857 */ 858 frag_len = MIN(seg_len, txq->dma_desc_size_max); 859 next_frag += frag_len; 860 seg_len -= frag_len; 861 pkt_len -= frag_len; 862 863 efx_tx_qdesc_dma_create(txq->common, 864 frag_addr, frag_len, 865 (pkt_len == 0), 866 pend++); 867 868 pkt_descs++; 869 } while (seg_len != 0); 870 } 871 872 added += pkt_descs; 873 874 fill_level += pkt_descs; 875 if (unlikely(fill_level > hard_max_fill)) { 876 /* 877 * Our estimation for maximum number of descriptors 878 * required to send a packet seems to be wrong. 879 * Try to reap (if we haven't yet). 880 */ 881 if (!reap_done) { 882 sfc_efx_tx_reap(txq); 883 reap_done = B_TRUE; 884 fill_level = added - txq->completed; 885 if (fill_level > hard_max_fill) { 886 pend -= pkt_descs; 887 txq->hw_vlan_tci = hw_vlan_tci_prev; 888 break; 889 } 890 } else { 891 pend -= pkt_descs; 892 txq->hw_vlan_tci = hw_vlan_tci_prev; 893 break; 894 } 895 } 896 897 /* Assign mbuf to the last used desc */ 898 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 899 } 900 901 if (likely(pkts_sent > 0)) { 902 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 903 pend - &txq->pend_desc[0], 904 txq->completed, &txq->added); 905 SFC_ASSERT(rc == 0); 906 907 if (likely(pushed != txq->added)) 908 efx_tx_qpush(txq->common, txq->added, pushed); 909 } 910 911 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 912 if (!reap_done) 913 sfc_efx_tx_reap(txq); 914 #endif 915 916 done: 917 return pkts_sent; 918 } 919 920 const struct sfc_dp_tx * 921 sfc_dp_tx_by_dp_txq(const struct sfc_dp_txq *dp_txq) 922 { 923 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 924 struct rte_eth_dev *eth_dev; 925 struct sfc_adapter_priv *sap; 926 927 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 928 eth_dev = &rte_eth_devices[dpq->port_id]; 929 930 sap = sfc_adapter_priv_by_eth_dev(eth_dev); 931 932 return sap->dp_tx; 933 } 934 935 struct sfc_txq_info * 936 sfc_txq_info_by_dp_txq(const struct sfc_dp_txq *dp_txq) 937 { 938 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 939 struct rte_eth_dev *eth_dev; 940 struct sfc_adapter_shared *sas; 941 942 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 943 eth_dev = &rte_eth_devices[dpq->port_id]; 944 945 sas = sfc_adapter_shared_by_eth_dev(eth_dev); 946 947 SFC_ASSERT(dpq->queue_id < sas->txq_count); 948 return &sas->txq_info[dpq->queue_id]; 949 } 950 951 struct sfc_txq * 952 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 953 { 954 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 955 struct rte_eth_dev *eth_dev; 956 struct sfc_adapter *sa; 957 958 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 959 eth_dev = &rte_eth_devices[dpq->port_id]; 960 961 sa = sfc_adapter_by_eth_dev(eth_dev); 962 963 SFC_ASSERT(dpq->queue_id < sfc_sa2shared(sa)->txq_count); 964 return &sa->txq_ctrl[dpq->queue_id]; 965 } 966 967 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings; 968 static int 969 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc, 970 __rte_unused struct sfc_dp_tx_hw_limits *limits, 971 unsigned int *txq_entries, 972 unsigned int *evq_entries, 973 unsigned int *txq_max_fill_level) 974 { 975 *txq_entries = nb_tx_desc; 976 *evq_entries = nb_tx_desc; 977 *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries); 978 return 0; 979 } 980 981 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 982 static int 983 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 984 const struct rte_pci_addr *pci_addr, 985 int socket_id, 986 const struct sfc_dp_tx_qcreate_info *info, 987 struct sfc_dp_txq **dp_txqp) 988 { 989 struct sfc_efx_txq *txq; 990 struct sfc_txq *ctrl_txq; 991 int rc; 992 993 rc = ENOMEM; 994 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 995 RTE_CACHE_LINE_SIZE, socket_id); 996 if (txq == NULL) 997 goto fail_txq_alloc; 998 999 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 1000 1001 rc = ENOMEM; 1002 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 1003 EFX_TXQ_LIMIT(info->txq_entries), 1004 sizeof(*txq->pend_desc), 0, 1005 socket_id); 1006 if (txq->pend_desc == NULL) 1007 goto fail_pend_desc_alloc; 1008 1009 rc = ENOMEM; 1010 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 1011 info->txq_entries, 1012 sizeof(*txq->sw_ring), 1013 RTE_CACHE_LINE_SIZE, socket_id); 1014 if (txq->sw_ring == NULL) 1015 goto fail_sw_ring_alloc; 1016 1017 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 1018 if (ctrl_txq->evq->sa->tso) { 1019 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 1020 info->txq_entries, socket_id); 1021 if (rc != 0) 1022 goto fail_alloc_tsoh_objs; 1023 } 1024 1025 txq->evq = ctrl_txq->evq; 1026 txq->ptr_mask = info->txq_entries - 1; 1027 txq->max_fill_level = info->max_fill_level; 1028 txq->free_thresh = info->free_thresh; 1029 txq->dma_desc_size_max = info->dma_desc_size_max; 1030 1031 *dp_txqp = &txq->dp; 1032 return 0; 1033 1034 fail_alloc_tsoh_objs: 1035 rte_free(txq->sw_ring); 1036 1037 fail_sw_ring_alloc: 1038 rte_free(txq->pend_desc); 1039 1040 fail_pend_desc_alloc: 1041 rte_free(txq); 1042 1043 fail_txq_alloc: 1044 return rc; 1045 } 1046 1047 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 1048 static void 1049 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 1050 { 1051 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1052 1053 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 1054 rte_free(txq->sw_ring); 1055 rte_free(txq->pend_desc); 1056 rte_free(txq); 1057 } 1058 1059 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 1060 static int 1061 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 1062 __rte_unused unsigned int evq_read_ptr, 1063 unsigned int txq_desc_index) 1064 { 1065 /* libefx-based datapath is specific to libefx-based PMD */ 1066 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1067 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 1068 1069 txq->common = ctrl_txq->common; 1070 1071 txq->pending = txq->completed = txq->added = txq_desc_index; 1072 txq->hw_vlan_tci = 0; 1073 1074 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 1075 1076 return 0; 1077 } 1078 1079 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 1080 static void 1081 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 1082 __rte_unused unsigned int *evq_read_ptr) 1083 { 1084 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1085 1086 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 1087 } 1088 1089 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 1090 static void 1091 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 1092 { 1093 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1094 unsigned int txds; 1095 1096 sfc_efx_tx_reap(txq); 1097 1098 for (txds = 0; txds <= txq->ptr_mask; txds++) { 1099 if (txq->sw_ring[txds].mbuf != NULL) { 1100 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 1101 txq->sw_ring[txds].mbuf = NULL; 1102 } 1103 } 1104 1105 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1106 } 1107 1108 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1109 static int 1110 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1111 { 1112 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1113 1114 if (unlikely(offset > txq->ptr_mask)) 1115 return -EINVAL; 1116 1117 if (unlikely(offset >= txq->max_fill_level)) 1118 return RTE_ETH_TX_DESC_UNAVAIL; 1119 1120 /* 1121 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1122 * it is required for the queue to be running, but the 1123 * check is omitted because API design assumes that it 1124 * is the duty of the caller to satisfy all conditions 1125 */ 1126 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1127 SFC_EFX_TXQ_FLAG_RUNNING); 1128 sfc_ev_qpoll(txq->evq); 1129 1130 /* 1131 * Ring tail is 'txq->pending', and although descriptors 1132 * between 'txq->completed' and 'txq->pending' are still 1133 * in use by the driver, they should be reported as DONE 1134 */ 1135 if (unlikely(offset < (txq->added - txq->pending))) 1136 return RTE_ETH_TX_DESC_FULL; 1137 1138 /* 1139 * There is no separate return value for unused descriptors; 1140 * the latter will be reported as DONE because genuine DONE 1141 * descriptors will be freed anyway in SW on the next burst 1142 */ 1143 return RTE_ETH_TX_DESC_DONE; 1144 } 1145 1146 struct sfc_dp_tx sfc_efx_tx = { 1147 .dp = { 1148 .name = SFC_KVARG_DATAPATH_EFX, 1149 .type = SFC_DP_TX, 1150 .hw_fw_caps = 0, 1151 }, 1152 .features = SFC_DP_TX_FEAT_VLAN_INSERT | 1153 SFC_DP_TX_FEAT_TSO | 1154 SFC_DP_TX_FEAT_MULTI_POOL | 1155 SFC_DP_TX_FEAT_REFCNT | 1156 SFC_DP_TX_FEAT_MULTI_SEG, 1157 .qsize_up_rings = sfc_efx_tx_qsize_up_rings, 1158 .qcreate = sfc_efx_tx_qcreate, 1159 .qdestroy = sfc_efx_tx_qdestroy, 1160 .qstart = sfc_efx_tx_qstart, 1161 .qstop = sfc_efx_tx_qstop, 1162 .qreap = sfc_efx_tx_qreap, 1163 .qdesc_status = sfc_efx_tx_qdesc_status, 1164 .pkt_prepare = sfc_efx_prepare_pkts, 1165 .pkt_burst = sfc_efx_xmit_pkts, 1166 }; 1167