1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include "sfc.h" 11 #include "sfc_debug.h" 12 #include "sfc_log.h" 13 #include "sfc_ev.h" 14 #include "sfc_tx.h" 15 #include "sfc_tweak.h" 16 #include "sfc_kvargs.h" 17 18 /* 19 * Maximum number of TX queue flush attempts in case of 20 * failure or flush timeout 21 */ 22 #define SFC_TX_QFLUSH_ATTEMPTS (3) 23 24 /* 25 * Time to wait between event queue polling attempts when waiting for TX 26 * queue flush done or flush failed events 27 */ 28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 29 30 /* 31 * Maximum number of event queue polling attempts when waiting for TX queue 32 * flush done or flush failed events; it defines TX queue flush attempt timeout 33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 34 */ 35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 36 37 static int 38 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level, 39 const struct rte_eth_txconf *tx_conf) 40 { 41 unsigned int flags = tx_conf->txq_flags; 42 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 43 int rc = 0; 44 45 if (tx_conf->tx_rs_thresh != 0) { 46 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 47 rc = EINVAL; 48 } 49 50 if (tx_conf->tx_free_thresh > txq_max_fill_level) { 51 sfc_err(sa, 52 "TxQ free threshold too large: %u vs maximum %u", 53 tx_conf->tx_free_thresh, txq_max_fill_level); 54 rc = EINVAL; 55 } 56 57 if (tx_conf->tx_thresh.pthresh != 0 || 58 tx_conf->tx_thresh.hthresh != 0 || 59 tx_conf->tx_thresh.wthresh != 0) { 60 sfc_warn(sa, 61 "prefetch/host/writeback thresholds are not supported"); 62 } 63 64 if (((flags & ETH_TXQ_FLAGS_NOMULTSEGS) == 0) && 65 (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG)) { 66 sfc_err(sa, "Multi-segment is not supported by %s datapath", 67 sa->dp_tx->dp.name); 68 rc = EINVAL; 69 } 70 71 if (((flags & ETH_TXQ_FLAGS_NOMULTMEMP) == 0) && 72 (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL)) { 73 sfc_err(sa, "multi-mempool is not supported by %s datapath", 74 sa->dp_tx->dp.name); 75 rc = EINVAL; 76 } 77 78 if (((flags & ETH_TXQ_FLAGS_NOREFCOUNT) == 0) && 79 (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) { 80 sfc_err(sa, 81 "mbuf reference counters are neglected by %s datapath", 82 sa->dp_tx->dp.name); 83 rc = EINVAL; 84 } 85 86 if ((flags & ETH_TXQ_FLAGS_NOVLANOFFL) == 0) { 87 if (!encp->enc_hw_tx_insert_vlan_enabled) { 88 sfc_err(sa, "VLAN offload is not supported"); 89 rc = EINVAL; 90 } else if (~sa->dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) { 91 sfc_err(sa, 92 "VLAN offload is not supported by %s datapath", 93 sa->dp_tx->dp.name); 94 rc = EINVAL; 95 } 96 } 97 98 if ((flags & ETH_TXQ_FLAGS_NOXSUMSCTP) == 0) { 99 sfc_err(sa, "SCTP offload is not supported"); 100 rc = EINVAL; 101 } 102 103 /* We either perform both TCP and UDP offload, or no offload at all */ 104 if (((flags & ETH_TXQ_FLAGS_NOXSUMTCP) == 0) != 105 ((flags & ETH_TXQ_FLAGS_NOXSUMUDP) == 0)) { 106 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 107 rc = EINVAL; 108 } 109 110 return rc; 111 } 112 113 void 114 sfc_tx_qflush_done(struct sfc_txq *txq) 115 { 116 txq->state |= SFC_TXQ_FLUSHED; 117 txq->state &= ~SFC_TXQ_FLUSHING; 118 } 119 120 int 121 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 122 uint16_t nb_tx_desc, unsigned int socket_id, 123 const struct rte_eth_txconf *tx_conf) 124 { 125 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 126 unsigned int txq_entries; 127 unsigned int evq_entries; 128 unsigned int txq_max_fill_level; 129 struct sfc_txq_info *txq_info; 130 struct sfc_evq *evq; 131 struct sfc_txq *txq; 132 int rc = 0; 133 struct sfc_dp_tx_qcreate_info info; 134 135 sfc_log_init(sa, "TxQ = %u", sw_index); 136 137 rc = sa->dp_tx->qsize_up_rings(nb_tx_desc, &txq_entries, &evq_entries, 138 &txq_max_fill_level); 139 if (rc != 0) 140 goto fail_size_up_rings; 141 SFC_ASSERT(txq_entries >= EFX_TXQ_MINNDESCS); 142 SFC_ASSERT(txq_entries <= sa->txq_max_entries); 143 SFC_ASSERT(txq_entries >= nb_tx_desc); 144 SFC_ASSERT(txq_max_fill_level <= nb_tx_desc); 145 146 rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf); 147 if (rc != 0) 148 goto fail_bad_conf; 149 150 SFC_ASSERT(sw_index < sa->txq_count); 151 txq_info = &sa->txq_info[sw_index]; 152 153 txq_info->entries = txq_entries; 154 155 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 156 evq_entries, socket_id, &evq); 157 if (rc != 0) 158 goto fail_ev_qinit; 159 160 rc = ENOMEM; 161 txq = rte_zmalloc_socket("sfc-txq", sizeof(*txq), 0, socket_id); 162 if (txq == NULL) 163 goto fail_txq_alloc; 164 165 txq_info->txq = txq; 166 167 txq->hw_index = sw_index; 168 txq->evq = evq; 169 txq->free_thresh = 170 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 171 SFC_TX_DEFAULT_FREE_THRESH; 172 txq->flags = tx_conf->txq_flags; 173 174 rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_TXQ_SIZE(txq_info->entries), 175 socket_id, &txq->mem); 176 if (rc != 0) 177 goto fail_dma_alloc; 178 179 memset(&info, 0, sizeof(info)); 180 info.max_fill_level = txq_max_fill_level; 181 info.free_thresh = txq->free_thresh; 182 info.flags = tx_conf->txq_flags; 183 info.txq_entries = txq_info->entries; 184 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 185 info.txq_hw_ring = txq->mem.esm_base; 186 info.evq_entries = evq_entries; 187 info.evq_hw_ring = evq->mem.esm_base; 188 info.hw_index = txq->hw_index; 189 info.mem_bar = sa->mem_bar.esb_base; 190 191 rc = sa->dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 192 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 193 socket_id, &info, &txq->dp); 194 if (rc != 0) 195 goto fail_dp_tx_qinit; 196 197 evq->dp_txq = txq->dp; 198 199 txq->state = SFC_TXQ_INITIALIZED; 200 201 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 202 203 return 0; 204 205 fail_dp_tx_qinit: 206 sfc_dma_free(sa, &txq->mem); 207 208 fail_dma_alloc: 209 txq_info->txq = NULL; 210 rte_free(txq); 211 212 fail_txq_alloc: 213 sfc_ev_qfini(evq); 214 215 fail_ev_qinit: 216 txq_info->entries = 0; 217 218 fail_bad_conf: 219 fail_size_up_rings: 220 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 221 return rc; 222 } 223 224 void 225 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 226 { 227 struct sfc_txq_info *txq_info; 228 struct sfc_txq *txq; 229 230 sfc_log_init(sa, "TxQ = %u", sw_index); 231 232 SFC_ASSERT(sw_index < sa->txq_count); 233 txq_info = &sa->txq_info[sw_index]; 234 235 txq = txq_info->txq; 236 SFC_ASSERT(txq != NULL); 237 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 238 239 sa->dp_tx->qdestroy(txq->dp); 240 txq->dp = NULL; 241 242 txq_info->txq = NULL; 243 txq_info->entries = 0; 244 245 sfc_dma_free(sa, &txq->mem); 246 247 sfc_ev_qfini(txq->evq); 248 txq->evq = NULL; 249 250 rte_free(txq); 251 } 252 253 static int 254 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 255 { 256 sfc_log_init(sa, "TxQ = %u", sw_index); 257 258 return 0; 259 } 260 261 static int 262 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 263 { 264 int rc = 0; 265 266 switch (txmode->mq_mode) { 267 case ETH_MQ_TX_NONE: 268 break; 269 default: 270 sfc_err(sa, "Tx multi-queue mode %u not supported", 271 txmode->mq_mode); 272 rc = EINVAL; 273 } 274 275 /* 276 * These features are claimed to be i40e-specific, 277 * but it does make sense to double-check their absence 278 */ 279 if (txmode->hw_vlan_reject_tagged) { 280 sfc_err(sa, "Rejecting tagged packets not supported"); 281 rc = EINVAL; 282 } 283 284 if (txmode->hw_vlan_reject_untagged) { 285 sfc_err(sa, "Rejecting untagged packets not supported"); 286 rc = EINVAL; 287 } 288 289 if (txmode->hw_vlan_insert_pvid) { 290 sfc_err(sa, "Port-based VLAN insertion not supported"); 291 rc = EINVAL; 292 } 293 294 return rc; 295 } 296 297 /** 298 * Destroy excess queues that are no longer needed after reconfiguration 299 * or complete close. 300 */ 301 static void 302 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 303 { 304 int sw_index; 305 306 SFC_ASSERT(nb_tx_queues <= sa->txq_count); 307 308 sw_index = sa->txq_count; 309 while (--sw_index >= (int)nb_tx_queues) { 310 if (sa->txq_info[sw_index].txq != NULL) 311 sfc_tx_qfini(sa, sw_index); 312 } 313 314 sa->txq_count = nb_tx_queues; 315 } 316 317 int 318 sfc_tx_configure(struct sfc_adapter *sa) 319 { 320 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 321 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 322 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 323 int rc = 0; 324 325 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 326 nb_tx_queues, sa->txq_count); 327 328 /* 329 * The datapath implementation assumes absence of boundary 330 * limits on Tx DMA descriptors. Addition of these checks on 331 * datapath would simply make the datapath slower. 332 */ 333 if (encp->enc_tx_dma_desc_boundary != 0) { 334 rc = ENOTSUP; 335 goto fail_tx_dma_desc_boundary; 336 } 337 338 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 339 if (rc != 0) 340 goto fail_check_mode; 341 342 if (nb_tx_queues == sa->txq_count) 343 goto done; 344 345 if (sa->txq_info == NULL) { 346 sa->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 347 sizeof(sa->txq_info[0]), 0, 348 sa->socket_id); 349 if (sa->txq_info == NULL) 350 goto fail_txqs_alloc; 351 } else { 352 struct sfc_txq_info *new_txq_info; 353 354 if (nb_tx_queues < sa->txq_count) 355 sfc_tx_fini_queues(sa, nb_tx_queues); 356 357 new_txq_info = 358 rte_realloc(sa->txq_info, 359 nb_tx_queues * sizeof(sa->txq_info[0]), 0); 360 if (new_txq_info == NULL && nb_tx_queues > 0) 361 goto fail_txqs_realloc; 362 363 sa->txq_info = new_txq_info; 364 if (nb_tx_queues > sa->txq_count) 365 memset(&sa->txq_info[sa->txq_count], 0, 366 (nb_tx_queues - sa->txq_count) * 367 sizeof(sa->txq_info[0])); 368 } 369 370 while (sa->txq_count < nb_tx_queues) { 371 rc = sfc_tx_qinit_info(sa, sa->txq_count); 372 if (rc != 0) 373 goto fail_tx_qinit_info; 374 375 sa->txq_count++; 376 } 377 378 done: 379 return 0; 380 381 fail_tx_qinit_info: 382 fail_txqs_realloc: 383 fail_txqs_alloc: 384 sfc_tx_close(sa); 385 386 fail_check_mode: 387 fail_tx_dma_desc_boundary: 388 sfc_log_init(sa, "failed (rc = %d)", rc); 389 return rc; 390 } 391 392 void 393 sfc_tx_close(struct sfc_adapter *sa) 394 { 395 sfc_tx_fini_queues(sa, 0); 396 397 rte_free(sa->txq_info); 398 sa->txq_info = NULL; 399 } 400 401 int 402 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 403 { 404 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 405 struct rte_eth_dev_data *dev_data; 406 struct sfc_txq_info *txq_info; 407 struct sfc_txq *txq; 408 struct sfc_evq *evq; 409 uint16_t flags; 410 unsigned int desc_index; 411 int rc = 0; 412 413 sfc_log_init(sa, "TxQ = %u", sw_index); 414 415 SFC_ASSERT(sw_index < sa->txq_count); 416 txq_info = &sa->txq_info[sw_index]; 417 418 txq = txq_info->txq; 419 420 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 421 422 evq = txq->evq; 423 424 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 425 if (rc != 0) 426 goto fail_ev_qstart; 427 428 /* 429 * It seems that DPDK has no controls regarding IPv4 offloads, 430 * hence, we always enable it here 431 */ 432 if ((txq->flags & ETH_TXQ_FLAGS_NOXSUMTCP) || 433 (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP)) { 434 flags = EFX_TXQ_CKSUM_IPV4; 435 436 if (encp->enc_tunnel_encapsulations_supported != 0) 437 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 438 } else { 439 flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; 440 441 if (encp->enc_tunnel_encapsulations_supported != 0) 442 flags |= EFX_TXQ_CKSUM_INNER_IPV4 | 443 EFX_TXQ_CKSUM_INNER_TCPUDP; 444 445 if (sa->tso) 446 flags |= EFX_TXQ_FATSOV2; 447 } 448 449 rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, 450 txq_info->entries, 0 /* not used on EF10 */, 451 flags, evq->common, 452 &txq->common, &desc_index); 453 if (rc != 0) { 454 if (sa->tso && (rc == ENOSPC)) 455 sfc_err(sa, "ran out of TSO contexts"); 456 457 goto fail_tx_qcreate; 458 } 459 460 efx_tx_qenable(txq->common); 461 462 txq->state |= SFC_TXQ_STARTED; 463 464 rc = sa->dp_tx->qstart(txq->dp, evq->read_ptr, desc_index); 465 if (rc != 0) 466 goto fail_dp_qstart; 467 468 /* 469 * It seems to be used by DPDK for debug purposes only ('rte_ether') 470 */ 471 dev_data = sa->eth_dev->data; 472 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 473 474 return 0; 475 476 fail_dp_qstart: 477 txq->state = SFC_TXQ_INITIALIZED; 478 efx_tx_qdestroy(txq->common); 479 480 fail_tx_qcreate: 481 sfc_ev_qstop(evq); 482 483 fail_ev_qstart: 484 return rc; 485 } 486 487 void 488 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 489 { 490 struct rte_eth_dev_data *dev_data; 491 struct sfc_txq_info *txq_info; 492 struct sfc_txq *txq; 493 unsigned int retry_count; 494 unsigned int wait_count; 495 int rc; 496 497 sfc_log_init(sa, "TxQ = %u", sw_index); 498 499 SFC_ASSERT(sw_index < sa->txq_count); 500 txq_info = &sa->txq_info[sw_index]; 501 502 txq = txq_info->txq; 503 504 if (txq->state == SFC_TXQ_INITIALIZED) 505 return; 506 507 SFC_ASSERT(txq->state & SFC_TXQ_STARTED); 508 509 sa->dp_tx->qstop(txq->dp, &txq->evq->read_ptr); 510 511 /* 512 * Retry TX queue flushing in case of flush failed or 513 * timeout; in the worst case it can delay for 6 seconds 514 */ 515 for (retry_count = 0; 516 ((txq->state & SFC_TXQ_FLUSHED) == 0) && 517 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 518 ++retry_count) { 519 rc = efx_tx_qflush(txq->common); 520 if (rc != 0) { 521 txq->state |= (rc == EALREADY) ? 522 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 523 break; 524 } 525 526 /* 527 * Wait for TX queue flush done or flush failed event at least 528 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 529 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 530 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 531 */ 532 wait_count = 0; 533 do { 534 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 535 sfc_ev_qpoll(txq->evq); 536 } while ((txq->state & SFC_TXQ_FLUSHING) && 537 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 538 539 if (txq->state & SFC_TXQ_FLUSHING) 540 sfc_err(sa, "TxQ %u flush timed out", sw_index); 541 542 if (txq->state & SFC_TXQ_FLUSHED) 543 sfc_info(sa, "TxQ %u flushed", sw_index); 544 } 545 546 sa->dp_tx->qreap(txq->dp); 547 548 txq->state = SFC_TXQ_INITIALIZED; 549 550 efx_tx_qdestroy(txq->common); 551 552 sfc_ev_qstop(txq->evq); 553 554 /* 555 * It seems to be used by DPDK for debug purposes only ('rte_ether') 556 */ 557 dev_data = sa->eth_dev->data; 558 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 559 } 560 561 int 562 sfc_tx_start(struct sfc_adapter *sa) 563 { 564 unsigned int sw_index; 565 int rc = 0; 566 567 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 568 569 if (sa->tso) { 570 if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) { 571 sfc_warn(sa, "TSO support was unable to be restored"); 572 sa->tso = B_FALSE; 573 } 574 } 575 576 rc = efx_tx_init(sa->nic); 577 if (rc != 0) 578 goto fail_efx_tx_init; 579 580 for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) { 581 if (!(sa->txq_info[sw_index].deferred_start) || 582 sa->txq_info[sw_index].deferred_started) { 583 rc = sfc_tx_qstart(sa, sw_index); 584 if (rc != 0) 585 goto fail_tx_qstart; 586 } 587 } 588 589 return 0; 590 591 fail_tx_qstart: 592 while (sw_index-- > 0) 593 sfc_tx_qstop(sa, sw_index); 594 595 efx_tx_fini(sa->nic); 596 597 fail_efx_tx_init: 598 sfc_log_init(sa, "failed (rc = %d)", rc); 599 return rc; 600 } 601 602 void 603 sfc_tx_stop(struct sfc_adapter *sa) 604 { 605 unsigned int sw_index; 606 607 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 608 609 sw_index = sa->txq_count; 610 while (sw_index-- > 0) { 611 if (sa->txq_info[sw_index].txq != NULL) 612 sfc_tx_qstop(sa, sw_index); 613 } 614 615 efx_tx_fini(sa->nic); 616 } 617 618 static void 619 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 620 { 621 unsigned int completed; 622 623 sfc_ev_qpoll(txq->evq); 624 625 for (completed = txq->completed; 626 completed != txq->pending; completed++) { 627 struct sfc_efx_tx_sw_desc *txd; 628 629 txd = &txq->sw_ring[completed & txq->ptr_mask]; 630 631 if (txd->mbuf != NULL) { 632 rte_pktmbuf_free(txd->mbuf); 633 txd->mbuf = NULL; 634 } 635 } 636 637 txq->completed = completed; 638 } 639 640 /* 641 * The function is used to insert or update VLAN tag; 642 * the firmware has state of the firmware tag to insert per TxQ 643 * (controlled by option descriptors), hence, if the tag of the 644 * packet to be sent is different from one remembered by the firmware, 645 * the function will update it 646 */ 647 static unsigned int 648 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 649 efx_desc_t **pend) 650 { 651 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 652 m->vlan_tci : 0); 653 654 if (this_tag == txq->hw_vlan_tci) 655 return 0; 656 657 /* 658 * The expression inside SFC_ASSERT() is not desired to be checked in 659 * a non-debug build because it might be too expensive on the data path 660 */ 661 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 662 663 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 664 *pend); 665 (*pend)++; 666 txq->hw_vlan_tci = this_tag; 667 668 return 1; 669 } 670 671 static uint16_t 672 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 673 { 674 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 675 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 676 unsigned int added = txq->added; 677 unsigned int pushed = added; 678 unsigned int pkts_sent = 0; 679 efx_desc_t *pend = &txq->pend_desc[0]; 680 const unsigned int hard_max_fill = txq->max_fill_level; 681 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 682 unsigned int fill_level = added - txq->completed; 683 boolean_t reap_done; 684 int rc __rte_unused; 685 struct rte_mbuf **pktp; 686 687 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 688 goto done; 689 690 /* 691 * If insufficient space for a single packet is present, 692 * we should reap; otherwise, we shouldn't do that all the time 693 * to avoid latency increase 694 */ 695 reap_done = (fill_level > soft_max_fill); 696 697 if (reap_done) { 698 sfc_efx_tx_reap(txq); 699 /* 700 * Recalculate fill level since 'txq->completed' 701 * might have changed on reap 702 */ 703 fill_level = added - txq->completed; 704 } 705 706 for (pkts_sent = 0, pktp = &tx_pkts[0]; 707 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 708 pkts_sent++, pktp++) { 709 struct rte_mbuf *m_seg = *pktp; 710 size_t pkt_len = m_seg->pkt_len; 711 unsigned int pkt_descs = 0; 712 size_t in_off = 0; 713 714 /* 715 * Here VLAN TCI is expected to be zero in case if no 716 * DEV_TX_VLAN_OFFLOAD capability is advertised; 717 * if the calling app ignores the absence of 718 * DEV_TX_VLAN_OFFLOAD and pushes VLAN TCI, then 719 * TX_ERROR will occur 720 */ 721 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 722 723 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 724 /* 725 * We expect correct 'pkt->l[2, 3, 4]_len' values 726 * to be set correctly by the caller 727 */ 728 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 729 &pkt_descs, &pkt_len) != 0) { 730 /* We may have reached this place for 731 * one of the following reasons: 732 * 733 * 1) Packet header length is greater 734 * than SFC_TSOH_STD_LEN 735 * 2) TCP header starts at more then 736 * 208 bytes into the frame 737 * 738 * We will deceive RTE saying that we have sent 739 * the packet, but we will actually drop it. 740 * Hence, we should revert 'pend' to the 741 * previous state (in case we have added 742 * VLAN descriptor) and start processing 743 * another one packet. But the original 744 * mbuf shouldn't be orphaned 745 */ 746 pend -= pkt_descs; 747 748 rte_pktmbuf_free(*pktp); 749 750 continue; 751 } 752 753 /* 754 * We've only added 2 FATSOv2 option descriptors 755 * and 1 descriptor for the linearized packet header. 756 * The outstanding work will be done in the same manner 757 * as for the usual non-TSO path 758 */ 759 } 760 761 for (; m_seg != NULL; m_seg = m_seg->next) { 762 efsys_dma_addr_t next_frag; 763 size_t seg_len; 764 765 seg_len = m_seg->data_len; 766 next_frag = rte_mbuf_data_iova(m_seg); 767 768 /* 769 * If we've started TSO transaction few steps earlier, 770 * we'll skip packet header using an offset in the 771 * current segment (which has been set to the 772 * first one containing payload) 773 */ 774 seg_len -= in_off; 775 next_frag += in_off; 776 in_off = 0; 777 778 do { 779 efsys_dma_addr_t frag_addr = next_frag; 780 size_t frag_len; 781 782 /* 783 * It is assumed here that there is no 784 * limitation on address boundary 785 * crossing by DMA descriptor. 786 */ 787 frag_len = MIN(seg_len, txq->dma_desc_size_max); 788 next_frag += frag_len; 789 seg_len -= frag_len; 790 pkt_len -= frag_len; 791 792 efx_tx_qdesc_dma_create(txq->common, 793 frag_addr, frag_len, 794 (pkt_len == 0), 795 pend++); 796 797 pkt_descs++; 798 } while (seg_len != 0); 799 } 800 801 added += pkt_descs; 802 803 fill_level += pkt_descs; 804 if (unlikely(fill_level > hard_max_fill)) { 805 /* 806 * Our estimation for maximum number of descriptors 807 * required to send a packet seems to be wrong. 808 * Try to reap (if we haven't yet). 809 */ 810 if (!reap_done) { 811 sfc_efx_tx_reap(txq); 812 reap_done = B_TRUE; 813 fill_level = added - txq->completed; 814 if (fill_level > hard_max_fill) { 815 pend -= pkt_descs; 816 break; 817 } 818 } else { 819 pend -= pkt_descs; 820 break; 821 } 822 } 823 824 /* Assign mbuf to the last used desc */ 825 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 826 } 827 828 if (likely(pkts_sent > 0)) { 829 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 830 pend - &txq->pend_desc[0], 831 txq->completed, &txq->added); 832 SFC_ASSERT(rc == 0); 833 834 if (likely(pushed != txq->added)) 835 efx_tx_qpush(txq->common, txq->added, pushed); 836 } 837 838 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 839 if (!reap_done) 840 sfc_efx_tx_reap(txq); 841 #endif 842 843 done: 844 return pkts_sent; 845 } 846 847 struct sfc_txq * 848 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 849 { 850 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 851 struct rte_eth_dev *eth_dev; 852 struct sfc_adapter *sa; 853 struct sfc_txq *txq; 854 855 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 856 eth_dev = &rte_eth_devices[dpq->port_id]; 857 858 sa = eth_dev->data->dev_private; 859 860 SFC_ASSERT(dpq->queue_id < sa->txq_count); 861 txq = sa->txq_info[dpq->queue_id].txq; 862 863 SFC_ASSERT(txq != NULL); 864 return txq; 865 } 866 867 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings; 868 static int 869 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc, 870 unsigned int *txq_entries, 871 unsigned int *evq_entries, 872 unsigned int *txq_max_fill_level) 873 { 874 *txq_entries = nb_tx_desc; 875 *evq_entries = nb_tx_desc; 876 *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries); 877 return 0; 878 } 879 880 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 881 static int 882 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 883 const struct rte_pci_addr *pci_addr, 884 int socket_id, 885 const struct sfc_dp_tx_qcreate_info *info, 886 struct sfc_dp_txq **dp_txqp) 887 { 888 struct sfc_efx_txq *txq; 889 struct sfc_txq *ctrl_txq; 890 int rc; 891 892 rc = ENOMEM; 893 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 894 RTE_CACHE_LINE_SIZE, socket_id); 895 if (txq == NULL) 896 goto fail_txq_alloc; 897 898 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 899 900 rc = ENOMEM; 901 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 902 EFX_TXQ_LIMIT(info->txq_entries), 903 sizeof(*txq->pend_desc), 0, 904 socket_id); 905 if (txq->pend_desc == NULL) 906 goto fail_pend_desc_alloc; 907 908 rc = ENOMEM; 909 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 910 info->txq_entries, 911 sizeof(*txq->sw_ring), 912 RTE_CACHE_LINE_SIZE, socket_id); 913 if (txq->sw_ring == NULL) 914 goto fail_sw_ring_alloc; 915 916 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 917 if (ctrl_txq->evq->sa->tso) { 918 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 919 info->txq_entries, socket_id); 920 if (rc != 0) 921 goto fail_alloc_tsoh_objs; 922 } 923 924 txq->evq = ctrl_txq->evq; 925 txq->ptr_mask = info->txq_entries - 1; 926 txq->max_fill_level = info->max_fill_level; 927 txq->free_thresh = info->free_thresh; 928 txq->dma_desc_size_max = info->dma_desc_size_max; 929 930 *dp_txqp = &txq->dp; 931 return 0; 932 933 fail_alloc_tsoh_objs: 934 rte_free(txq->sw_ring); 935 936 fail_sw_ring_alloc: 937 rte_free(txq->pend_desc); 938 939 fail_pend_desc_alloc: 940 rte_free(txq); 941 942 fail_txq_alloc: 943 return rc; 944 } 945 946 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 947 static void 948 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 949 { 950 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 951 952 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 953 rte_free(txq->sw_ring); 954 rte_free(txq->pend_desc); 955 rte_free(txq); 956 } 957 958 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 959 static int 960 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 961 __rte_unused unsigned int evq_read_ptr, 962 unsigned int txq_desc_index) 963 { 964 /* libefx-based datapath is specific to libefx-based PMD */ 965 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 966 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 967 968 txq->common = ctrl_txq->common; 969 970 txq->pending = txq->completed = txq->added = txq_desc_index; 971 txq->hw_vlan_tci = 0; 972 973 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 974 975 return 0; 976 } 977 978 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 979 static void 980 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 981 __rte_unused unsigned int *evq_read_ptr) 982 { 983 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 984 985 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 986 } 987 988 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 989 static void 990 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 991 { 992 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 993 unsigned int txds; 994 995 sfc_efx_tx_reap(txq); 996 997 for (txds = 0; txds <= txq->ptr_mask; txds++) { 998 if (txq->sw_ring[txds].mbuf != NULL) { 999 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 1000 txq->sw_ring[txds].mbuf = NULL; 1001 } 1002 } 1003 1004 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1005 } 1006 1007 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1008 static int 1009 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1010 { 1011 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1012 1013 if (unlikely(offset > txq->ptr_mask)) 1014 return -EINVAL; 1015 1016 if (unlikely(offset >= txq->max_fill_level)) 1017 return RTE_ETH_TX_DESC_UNAVAIL; 1018 1019 /* 1020 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1021 * it is required for the queue to be running, but the 1022 * check is omitted because API design assumes that it 1023 * is the duty of the caller to satisfy all conditions 1024 */ 1025 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1026 SFC_EFX_TXQ_FLAG_RUNNING); 1027 sfc_ev_qpoll(txq->evq); 1028 1029 /* 1030 * Ring tail is 'txq->pending', and although descriptors 1031 * between 'txq->completed' and 'txq->pending' are still 1032 * in use by the driver, they should be reported as DONE 1033 */ 1034 if (unlikely(offset < (txq->added - txq->pending))) 1035 return RTE_ETH_TX_DESC_FULL; 1036 1037 /* 1038 * There is no separate return value for unused descriptors; 1039 * the latter will be reported as DONE because genuine DONE 1040 * descriptors will be freed anyway in SW on the next burst 1041 */ 1042 return RTE_ETH_TX_DESC_DONE; 1043 } 1044 1045 struct sfc_dp_tx sfc_efx_tx = { 1046 .dp = { 1047 .name = SFC_KVARG_DATAPATH_EFX, 1048 .type = SFC_DP_TX, 1049 .hw_fw_caps = 0, 1050 }, 1051 .features = SFC_DP_TX_FEAT_VLAN_INSERT | 1052 SFC_DP_TX_FEAT_TSO | 1053 SFC_DP_TX_FEAT_MULTI_POOL | 1054 SFC_DP_TX_FEAT_REFCNT | 1055 SFC_DP_TX_FEAT_MULTI_SEG, 1056 .qsize_up_rings = sfc_efx_tx_qsize_up_rings, 1057 .qcreate = sfc_efx_tx_qcreate, 1058 .qdestroy = sfc_efx_tx_qdestroy, 1059 .qstart = sfc_efx_tx_qstart, 1060 .qstop = sfc_efx_tx_qstop, 1061 .qreap = sfc_efx_tx_qreap, 1062 .qdesc_status = sfc_efx_tx_qdesc_status, 1063 .pkt_burst = sfc_efx_xmit_pkts, 1064 }; 1065