1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include "sfc.h" 11 #include "sfc_debug.h" 12 #include "sfc_log.h" 13 #include "sfc_ev.h" 14 #include "sfc_tx.h" 15 #include "sfc_tweak.h" 16 #include "sfc_kvargs.h" 17 18 /* 19 * Maximum number of TX queue flush attempts in case of 20 * failure or flush timeout 21 */ 22 #define SFC_TX_QFLUSH_ATTEMPTS (3) 23 24 /* 25 * Time to wait between event queue polling attempts when waiting for TX 26 * queue flush done or flush failed events 27 */ 28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 29 30 /* 31 * Maximum number of event queue polling attempts when waiting for TX queue 32 * flush done or flush failed events; it defines TX queue flush attempt timeout 33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 34 */ 35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 36 37 uint64_t 38 sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa) 39 { 40 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 41 uint64_t caps = 0; 42 43 if ((sa->dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) && 44 encp->enc_hw_tx_insert_vlan_enabled) 45 caps |= DEV_TX_OFFLOAD_VLAN_INSERT; 46 47 if (sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG) 48 caps |= DEV_TX_OFFLOAD_MULTI_SEGS; 49 50 if ((~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL) && 51 (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) 52 caps |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; 53 54 return caps; 55 } 56 57 uint64_t 58 sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa) 59 { 60 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 61 uint64_t caps = 0; 62 63 caps |= DEV_TX_OFFLOAD_IPV4_CKSUM; 64 caps |= DEV_TX_OFFLOAD_UDP_CKSUM; 65 caps |= DEV_TX_OFFLOAD_TCP_CKSUM; 66 67 if (encp->enc_tunnel_encapsulations_supported) 68 caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 69 70 if (sa->tso) 71 caps |= DEV_TX_OFFLOAD_TCP_TSO; 72 73 return caps; 74 } 75 76 static int 77 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level, 78 const struct rte_eth_txconf *tx_conf, 79 uint64_t offloads) 80 { 81 int rc = 0; 82 83 if (tx_conf->tx_rs_thresh != 0) { 84 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 85 rc = EINVAL; 86 } 87 88 if (tx_conf->tx_free_thresh > txq_max_fill_level) { 89 sfc_err(sa, 90 "TxQ free threshold too large: %u vs maximum %u", 91 tx_conf->tx_free_thresh, txq_max_fill_level); 92 rc = EINVAL; 93 } 94 95 if (tx_conf->tx_thresh.pthresh != 0 || 96 tx_conf->tx_thresh.hthresh != 0 || 97 tx_conf->tx_thresh.wthresh != 0) { 98 sfc_warn(sa, 99 "prefetch/host/writeback thresholds are not supported"); 100 } 101 102 /* We either perform both TCP and UDP offload, or no offload at all */ 103 if (((offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) != 104 ((offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) { 105 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 106 rc = EINVAL; 107 } 108 109 return rc; 110 } 111 112 void 113 sfc_tx_qflush_done(struct sfc_txq *txq) 114 { 115 txq->state |= SFC_TXQ_FLUSHED; 116 txq->state &= ~SFC_TXQ_FLUSHING; 117 } 118 119 int 120 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 121 uint16_t nb_tx_desc, unsigned int socket_id, 122 const struct rte_eth_txconf *tx_conf) 123 { 124 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 125 unsigned int txq_entries; 126 unsigned int evq_entries; 127 unsigned int txq_max_fill_level; 128 struct sfc_txq_info *txq_info; 129 struct sfc_evq *evq; 130 struct sfc_txq *txq; 131 int rc = 0; 132 struct sfc_dp_tx_qcreate_info info; 133 uint64_t offloads; 134 135 sfc_log_init(sa, "TxQ = %u", sw_index); 136 137 rc = sa->dp_tx->qsize_up_rings(nb_tx_desc, &txq_entries, &evq_entries, 138 &txq_max_fill_level); 139 if (rc != 0) 140 goto fail_size_up_rings; 141 SFC_ASSERT(txq_entries >= EFX_TXQ_MINNDESCS); 142 SFC_ASSERT(txq_entries <= sa->txq_max_entries); 143 SFC_ASSERT(txq_entries >= nb_tx_desc); 144 SFC_ASSERT(txq_max_fill_level <= nb_tx_desc); 145 146 offloads = tx_conf->offloads | 147 sa->eth_dev->data->dev_conf.txmode.offloads; 148 rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf, offloads); 149 if (rc != 0) 150 goto fail_bad_conf; 151 152 SFC_ASSERT(sw_index < sa->txq_count); 153 txq_info = &sa->txq_info[sw_index]; 154 155 txq_info->entries = txq_entries; 156 157 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 158 evq_entries, socket_id, &evq); 159 if (rc != 0) 160 goto fail_ev_qinit; 161 162 rc = ENOMEM; 163 txq = rte_zmalloc_socket("sfc-txq", sizeof(*txq), 0, socket_id); 164 if (txq == NULL) 165 goto fail_txq_alloc; 166 167 txq_info->txq = txq; 168 169 txq->hw_index = sw_index; 170 txq->evq = evq; 171 txq->free_thresh = 172 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 173 SFC_TX_DEFAULT_FREE_THRESH; 174 txq->offloads = offloads; 175 176 rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_TXQ_SIZE(txq_info->entries), 177 socket_id, &txq->mem); 178 if (rc != 0) 179 goto fail_dma_alloc; 180 181 memset(&info, 0, sizeof(info)); 182 info.max_fill_level = txq_max_fill_level; 183 info.free_thresh = txq->free_thresh; 184 info.offloads = offloads; 185 info.txq_entries = txq_info->entries; 186 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 187 info.txq_hw_ring = txq->mem.esm_base; 188 info.evq_entries = evq_entries; 189 info.evq_hw_ring = evq->mem.esm_base; 190 info.hw_index = txq->hw_index; 191 info.mem_bar = sa->mem_bar.esb_base; 192 info.vi_window_shift = encp->enc_vi_window_shift; 193 194 rc = sa->dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 195 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 196 socket_id, &info, &txq->dp); 197 if (rc != 0) 198 goto fail_dp_tx_qinit; 199 200 evq->dp_txq = txq->dp; 201 202 txq->state = SFC_TXQ_INITIALIZED; 203 204 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 205 206 return 0; 207 208 fail_dp_tx_qinit: 209 sfc_dma_free(sa, &txq->mem); 210 211 fail_dma_alloc: 212 txq_info->txq = NULL; 213 rte_free(txq); 214 215 fail_txq_alloc: 216 sfc_ev_qfini(evq); 217 218 fail_ev_qinit: 219 txq_info->entries = 0; 220 221 fail_bad_conf: 222 fail_size_up_rings: 223 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 224 return rc; 225 } 226 227 void 228 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 229 { 230 struct sfc_txq_info *txq_info; 231 struct sfc_txq *txq; 232 233 sfc_log_init(sa, "TxQ = %u", sw_index); 234 235 SFC_ASSERT(sw_index < sa->txq_count); 236 sa->eth_dev->data->tx_queues[sw_index] = NULL; 237 238 txq_info = &sa->txq_info[sw_index]; 239 240 txq = txq_info->txq; 241 SFC_ASSERT(txq != NULL); 242 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 243 244 sa->dp_tx->qdestroy(txq->dp); 245 txq->dp = NULL; 246 247 txq_info->txq = NULL; 248 txq_info->entries = 0; 249 250 sfc_dma_free(sa, &txq->mem); 251 252 sfc_ev_qfini(txq->evq); 253 txq->evq = NULL; 254 255 rte_free(txq); 256 } 257 258 static int 259 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 260 { 261 sfc_log_init(sa, "TxQ = %u", sw_index); 262 263 return 0; 264 } 265 266 static int 267 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 268 { 269 int rc = 0; 270 271 switch (txmode->mq_mode) { 272 case ETH_MQ_TX_NONE: 273 break; 274 default: 275 sfc_err(sa, "Tx multi-queue mode %u not supported", 276 txmode->mq_mode); 277 rc = EINVAL; 278 } 279 280 /* 281 * These features are claimed to be i40e-specific, 282 * but it does make sense to double-check their absence 283 */ 284 if (txmode->hw_vlan_reject_tagged) { 285 sfc_err(sa, "Rejecting tagged packets not supported"); 286 rc = EINVAL; 287 } 288 289 if (txmode->hw_vlan_reject_untagged) { 290 sfc_err(sa, "Rejecting untagged packets not supported"); 291 rc = EINVAL; 292 } 293 294 if (txmode->hw_vlan_insert_pvid) { 295 sfc_err(sa, "Port-based VLAN insertion not supported"); 296 rc = EINVAL; 297 } 298 299 return rc; 300 } 301 302 /** 303 * Destroy excess queues that are no longer needed after reconfiguration 304 * or complete close. 305 */ 306 static void 307 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 308 { 309 int sw_index; 310 311 SFC_ASSERT(nb_tx_queues <= sa->txq_count); 312 313 sw_index = sa->txq_count; 314 while (--sw_index >= (int)nb_tx_queues) { 315 if (sa->txq_info[sw_index].txq != NULL) 316 sfc_tx_qfini(sa, sw_index); 317 } 318 319 sa->txq_count = nb_tx_queues; 320 } 321 322 int 323 sfc_tx_configure(struct sfc_adapter *sa) 324 { 325 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 326 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 327 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 328 int rc = 0; 329 330 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 331 nb_tx_queues, sa->txq_count); 332 333 /* 334 * The datapath implementation assumes absence of boundary 335 * limits on Tx DMA descriptors. Addition of these checks on 336 * datapath would simply make the datapath slower. 337 */ 338 if (encp->enc_tx_dma_desc_boundary != 0) { 339 rc = ENOTSUP; 340 goto fail_tx_dma_desc_boundary; 341 } 342 343 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 344 if (rc != 0) 345 goto fail_check_mode; 346 347 if (nb_tx_queues == sa->txq_count) 348 goto done; 349 350 if (sa->txq_info == NULL) { 351 sa->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 352 sizeof(sa->txq_info[0]), 0, 353 sa->socket_id); 354 if (sa->txq_info == NULL) 355 goto fail_txqs_alloc; 356 } else { 357 struct sfc_txq_info *new_txq_info; 358 359 if (nb_tx_queues < sa->txq_count) 360 sfc_tx_fini_queues(sa, nb_tx_queues); 361 362 new_txq_info = 363 rte_realloc(sa->txq_info, 364 nb_tx_queues * sizeof(sa->txq_info[0]), 0); 365 if (new_txq_info == NULL && nb_tx_queues > 0) 366 goto fail_txqs_realloc; 367 368 sa->txq_info = new_txq_info; 369 if (nb_tx_queues > sa->txq_count) 370 memset(&sa->txq_info[sa->txq_count], 0, 371 (nb_tx_queues - sa->txq_count) * 372 sizeof(sa->txq_info[0])); 373 } 374 375 while (sa->txq_count < nb_tx_queues) { 376 rc = sfc_tx_qinit_info(sa, sa->txq_count); 377 if (rc != 0) 378 goto fail_tx_qinit_info; 379 380 sa->txq_count++; 381 } 382 383 done: 384 return 0; 385 386 fail_tx_qinit_info: 387 fail_txqs_realloc: 388 fail_txqs_alloc: 389 sfc_tx_close(sa); 390 391 fail_check_mode: 392 fail_tx_dma_desc_boundary: 393 sfc_log_init(sa, "failed (rc = %d)", rc); 394 return rc; 395 } 396 397 void 398 sfc_tx_close(struct sfc_adapter *sa) 399 { 400 sfc_tx_fini_queues(sa, 0); 401 402 rte_free(sa->txq_info); 403 sa->txq_info = NULL; 404 } 405 406 int 407 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 408 { 409 uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) | 410 sfc_tx_get_queue_offload_caps(sa); 411 struct rte_eth_dev_data *dev_data; 412 struct sfc_txq_info *txq_info; 413 struct sfc_txq *txq; 414 struct sfc_evq *evq; 415 uint16_t flags = 0; 416 unsigned int desc_index; 417 int rc = 0; 418 419 sfc_log_init(sa, "TxQ = %u", sw_index); 420 421 SFC_ASSERT(sw_index < sa->txq_count); 422 txq_info = &sa->txq_info[sw_index]; 423 424 txq = txq_info->txq; 425 426 SFC_ASSERT(txq != NULL); 427 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 428 429 evq = txq->evq; 430 431 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 432 if (rc != 0) 433 goto fail_ev_qstart; 434 435 if (txq->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 436 flags |= EFX_TXQ_CKSUM_IPV4; 437 438 if (txq->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 439 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 440 441 if ((txq->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) || 442 (txq->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) { 443 flags |= EFX_TXQ_CKSUM_TCPUDP; 444 445 if (offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 446 flags |= EFX_TXQ_CKSUM_INNER_TCPUDP; 447 } 448 449 if (txq->offloads & DEV_TX_OFFLOAD_TCP_TSO) 450 flags |= EFX_TXQ_FATSOV2; 451 452 rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, 453 txq_info->entries, 0 /* not used on EF10 */, 454 flags, evq->common, 455 &txq->common, &desc_index); 456 if (rc != 0) { 457 if (sa->tso && (rc == ENOSPC)) 458 sfc_err(sa, "ran out of TSO contexts"); 459 460 goto fail_tx_qcreate; 461 } 462 463 efx_tx_qenable(txq->common); 464 465 txq->state |= SFC_TXQ_STARTED; 466 467 rc = sa->dp_tx->qstart(txq->dp, evq->read_ptr, desc_index); 468 if (rc != 0) 469 goto fail_dp_qstart; 470 471 /* 472 * It seems to be used by DPDK for debug purposes only ('rte_ether') 473 */ 474 dev_data = sa->eth_dev->data; 475 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 476 477 return 0; 478 479 fail_dp_qstart: 480 txq->state = SFC_TXQ_INITIALIZED; 481 efx_tx_qdestroy(txq->common); 482 483 fail_tx_qcreate: 484 sfc_ev_qstop(evq); 485 486 fail_ev_qstart: 487 return rc; 488 } 489 490 void 491 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 492 { 493 struct rte_eth_dev_data *dev_data; 494 struct sfc_txq_info *txq_info; 495 struct sfc_txq *txq; 496 unsigned int retry_count; 497 unsigned int wait_count; 498 int rc; 499 500 sfc_log_init(sa, "TxQ = %u", sw_index); 501 502 SFC_ASSERT(sw_index < sa->txq_count); 503 txq_info = &sa->txq_info[sw_index]; 504 505 txq = txq_info->txq; 506 507 if (txq == NULL || txq->state == SFC_TXQ_INITIALIZED) 508 return; 509 510 SFC_ASSERT(txq->state & SFC_TXQ_STARTED); 511 512 sa->dp_tx->qstop(txq->dp, &txq->evq->read_ptr); 513 514 /* 515 * Retry TX queue flushing in case of flush failed or 516 * timeout; in the worst case it can delay for 6 seconds 517 */ 518 for (retry_count = 0; 519 ((txq->state & SFC_TXQ_FLUSHED) == 0) && 520 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 521 ++retry_count) { 522 rc = efx_tx_qflush(txq->common); 523 if (rc != 0) { 524 txq->state |= (rc == EALREADY) ? 525 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 526 break; 527 } 528 529 /* 530 * Wait for TX queue flush done or flush failed event at least 531 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 532 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 533 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 534 */ 535 wait_count = 0; 536 do { 537 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 538 sfc_ev_qpoll(txq->evq); 539 } while ((txq->state & SFC_TXQ_FLUSHING) && 540 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 541 542 if (txq->state & SFC_TXQ_FLUSHING) 543 sfc_err(sa, "TxQ %u flush timed out", sw_index); 544 545 if (txq->state & SFC_TXQ_FLUSHED) 546 sfc_notice(sa, "TxQ %u flushed", sw_index); 547 } 548 549 sa->dp_tx->qreap(txq->dp); 550 551 txq->state = SFC_TXQ_INITIALIZED; 552 553 efx_tx_qdestroy(txq->common); 554 555 sfc_ev_qstop(txq->evq); 556 557 /* 558 * It seems to be used by DPDK for debug purposes only ('rte_ether') 559 */ 560 dev_data = sa->eth_dev->data; 561 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 562 } 563 564 int 565 sfc_tx_start(struct sfc_adapter *sa) 566 { 567 unsigned int sw_index; 568 int rc = 0; 569 570 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 571 572 if (sa->tso) { 573 if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) { 574 sfc_warn(sa, "TSO support was unable to be restored"); 575 sa->tso = B_FALSE; 576 } 577 } 578 579 rc = efx_tx_init(sa->nic); 580 if (rc != 0) 581 goto fail_efx_tx_init; 582 583 for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) { 584 if (sa->txq_info[sw_index].txq != NULL && 585 (!(sa->txq_info[sw_index].deferred_start) || 586 sa->txq_info[sw_index].deferred_started)) { 587 rc = sfc_tx_qstart(sa, sw_index); 588 if (rc != 0) 589 goto fail_tx_qstart; 590 } 591 } 592 593 return 0; 594 595 fail_tx_qstart: 596 while (sw_index-- > 0) 597 sfc_tx_qstop(sa, sw_index); 598 599 efx_tx_fini(sa->nic); 600 601 fail_efx_tx_init: 602 sfc_log_init(sa, "failed (rc = %d)", rc); 603 return rc; 604 } 605 606 void 607 sfc_tx_stop(struct sfc_adapter *sa) 608 { 609 unsigned int sw_index; 610 611 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 612 613 sw_index = sa->txq_count; 614 while (sw_index-- > 0) { 615 if (sa->txq_info[sw_index].txq != NULL) 616 sfc_tx_qstop(sa, sw_index); 617 } 618 619 efx_tx_fini(sa->nic); 620 } 621 622 static void 623 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 624 { 625 unsigned int completed; 626 627 sfc_ev_qpoll(txq->evq); 628 629 for (completed = txq->completed; 630 completed != txq->pending; completed++) { 631 struct sfc_efx_tx_sw_desc *txd; 632 633 txd = &txq->sw_ring[completed & txq->ptr_mask]; 634 635 if (txd->mbuf != NULL) { 636 rte_pktmbuf_free(txd->mbuf); 637 txd->mbuf = NULL; 638 } 639 } 640 641 txq->completed = completed; 642 } 643 644 /* 645 * The function is used to insert or update VLAN tag; 646 * the firmware has state of the firmware tag to insert per TxQ 647 * (controlled by option descriptors), hence, if the tag of the 648 * packet to be sent is different from one remembered by the firmware, 649 * the function will update it 650 */ 651 static unsigned int 652 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 653 efx_desc_t **pend) 654 { 655 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 656 m->vlan_tci : 0); 657 658 if (this_tag == txq->hw_vlan_tci) 659 return 0; 660 661 /* 662 * The expression inside SFC_ASSERT() is not desired to be checked in 663 * a non-debug build because it might be too expensive on the data path 664 */ 665 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 666 667 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 668 *pend); 669 (*pend)++; 670 txq->hw_vlan_tci = this_tag; 671 672 return 1; 673 } 674 675 static uint16_t 676 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 677 { 678 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 679 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 680 unsigned int added = txq->added; 681 unsigned int pushed = added; 682 unsigned int pkts_sent = 0; 683 efx_desc_t *pend = &txq->pend_desc[0]; 684 const unsigned int hard_max_fill = txq->max_fill_level; 685 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 686 unsigned int fill_level = added - txq->completed; 687 boolean_t reap_done; 688 int rc __rte_unused; 689 struct rte_mbuf **pktp; 690 691 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 692 goto done; 693 694 /* 695 * If insufficient space for a single packet is present, 696 * we should reap; otherwise, we shouldn't do that all the time 697 * to avoid latency increase 698 */ 699 reap_done = (fill_level > soft_max_fill); 700 701 if (reap_done) { 702 sfc_efx_tx_reap(txq); 703 /* 704 * Recalculate fill level since 'txq->completed' 705 * might have changed on reap 706 */ 707 fill_level = added - txq->completed; 708 } 709 710 for (pkts_sent = 0, pktp = &tx_pkts[0]; 711 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 712 pkts_sent++, pktp++) { 713 struct rte_mbuf *m_seg = *pktp; 714 size_t pkt_len = m_seg->pkt_len; 715 unsigned int pkt_descs = 0; 716 size_t in_off = 0; 717 718 /* 719 * Here VLAN TCI is expected to be zero in case if no 720 * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised; 721 * if the calling app ignores the absence of 722 * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then 723 * TX_ERROR will occur 724 */ 725 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 726 727 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 728 /* 729 * We expect correct 'pkt->l[2, 3, 4]_len' values 730 * to be set correctly by the caller 731 */ 732 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 733 &pkt_descs, &pkt_len) != 0) { 734 /* We may have reached this place for 735 * one of the following reasons: 736 * 737 * 1) Packet header length is greater 738 * than SFC_TSOH_STD_LEN 739 * 2) TCP header starts at more then 740 * 208 bytes into the frame 741 * 742 * We will deceive RTE saying that we have sent 743 * the packet, but we will actually drop it. 744 * Hence, we should revert 'pend' to the 745 * previous state (in case we have added 746 * VLAN descriptor) and start processing 747 * another one packet. But the original 748 * mbuf shouldn't be orphaned 749 */ 750 pend -= pkt_descs; 751 752 rte_pktmbuf_free(*pktp); 753 754 continue; 755 } 756 757 /* 758 * We've only added 2 FATSOv2 option descriptors 759 * and 1 descriptor for the linearized packet header. 760 * The outstanding work will be done in the same manner 761 * as for the usual non-TSO path 762 */ 763 } 764 765 for (; m_seg != NULL; m_seg = m_seg->next) { 766 efsys_dma_addr_t next_frag; 767 size_t seg_len; 768 769 seg_len = m_seg->data_len; 770 next_frag = rte_mbuf_data_iova(m_seg); 771 772 /* 773 * If we've started TSO transaction few steps earlier, 774 * we'll skip packet header using an offset in the 775 * current segment (which has been set to the 776 * first one containing payload) 777 */ 778 seg_len -= in_off; 779 next_frag += in_off; 780 in_off = 0; 781 782 do { 783 efsys_dma_addr_t frag_addr = next_frag; 784 size_t frag_len; 785 786 /* 787 * It is assumed here that there is no 788 * limitation on address boundary 789 * crossing by DMA descriptor. 790 */ 791 frag_len = MIN(seg_len, txq->dma_desc_size_max); 792 next_frag += frag_len; 793 seg_len -= frag_len; 794 pkt_len -= frag_len; 795 796 efx_tx_qdesc_dma_create(txq->common, 797 frag_addr, frag_len, 798 (pkt_len == 0), 799 pend++); 800 801 pkt_descs++; 802 } while (seg_len != 0); 803 } 804 805 added += pkt_descs; 806 807 fill_level += pkt_descs; 808 if (unlikely(fill_level > hard_max_fill)) { 809 /* 810 * Our estimation for maximum number of descriptors 811 * required to send a packet seems to be wrong. 812 * Try to reap (if we haven't yet). 813 */ 814 if (!reap_done) { 815 sfc_efx_tx_reap(txq); 816 reap_done = B_TRUE; 817 fill_level = added - txq->completed; 818 if (fill_level > hard_max_fill) { 819 pend -= pkt_descs; 820 break; 821 } 822 } else { 823 pend -= pkt_descs; 824 break; 825 } 826 } 827 828 /* Assign mbuf to the last used desc */ 829 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 830 } 831 832 if (likely(pkts_sent > 0)) { 833 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 834 pend - &txq->pend_desc[0], 835 txq->completed, &txq->added); 836 SFC_ASSERT(rc == 0); 837 838 if (likely(pushed != txq->added)) 839 efx_tx_qpush(txq->common, txq->added, pushed); 840 } 841 842 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 843 if (!reap_done) 844 sfc_efx_tx_reap(txq); 845 #endif 846 847 done: 848 return pkts_sent; 849 } 850 851 struct sfc_txq * 852 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 853 { 854 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 855 struct rte_eth_dev *eth_dev; 856 struct sfc_adapter *sa; 857 struct sfc_txq *txq; 858 859 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 860 eth_dev = &rte_eth_devices[dpq->port_id]; 861 862 sa = eth_dev->data->dev_private; 863 864 SFC_ASSERT(dpq->queue_id < sa->txq_count); 865 txq = sa->txq_info[dpq->queue_id].txq; 866 867 SFC_ASSERT(txq != NULL); 868 return txq; 869 } 870 871 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings; 872 static int 873 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc, 874 unsigned int *txq_entries, 875 unsigned int *evq_entries, 876 unsigned int *txq_max_fill_level) 877 { 878 *txq_entries = nb_tx_desc; 879 *evq_entries = nb_tx_desc; 880 *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries); 881 return 0; 882 } 883 884 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 885 static int 886 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 887 const struct rte_pci_addr *pci_addr, 888 int socket_id, 889 const struct sfc_dp_tx_qcreate_info *info, 890 struct sfc_dp_txq **dp_txqp) 891 { 892 struct sfc_efx_txq *txq; 893 struct sfc_txq *ctrl_txq; 894 int rc; 895 896 rc = ENOMEM; 897 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 898 RTE_CACHE_LINE_SIZE, socket_id); 899 if (txq == NULL) 900 goto fail_txq_alloc; 901 902 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 903 904 rc = ENOMEM; 905 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 906 EFX_TXQ_LIMIT(info->txq_entries), 907 sizeof(*txq->pend_desc), 0, 908 socket_id); 909 if (txq->pend_desc == NULL) 910 goto fail_pend_desc_alloc; 911 912 rc = ENOMEM; 913 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 914 info->txq_entries, 915 sizeof(*txq->sw_ring), 916 RTE_CACHE_LINE_SIZE, socket_id); 917 if (txq->sw_ring == NULL) 918 goto fail_sw_ring_alloc; 919 920 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 921 if (ctrl_txq->evq->sa->tso) { 922 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 923 info->txq_entries, socket_id); 924 if (rc != 0) 925 goto fail_alloc_tsoh_objs; 926 } 927 928 txq->evq = ctrl_txq->evq; 929 txq->ptr_mask = info->txq_entries - 1; 930 txq->max_fill_level = info->max_fill_level; 931 txq->free_thresh = info->free_thresh; 932 txq->dma_desc_size_max = info->dma_desc_size_max; 933 934 *dp_txqp = &txq->dp; 935 return 0; 936 937 fail_alloc_tsoh_objs: 938 rte_free(txq->sw_ring); 939 940 fail_sw_ring_alloc: 941 rte_free(txq->pend_desc); 942 943 fail_pend_desc_alloc: 944 rte_free(txq); 945 946 fail_txq_alloc: 947 return rc; 948 } 949 950 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 951 static void 952 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 953 { 954 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 955 956 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 957 rte_free(txq->sw_ring); 958 rte_free(txq->pend_desc); 959 rte_free(txq); 960 } 961 962 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 963 static int 964 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 965 __rte_unused unsigned int evq_read_ptr, 966 unsigned int txq_desc_index) 967 { 968 /* libefx-based datapath is specific to libefx-based PMD */ 969 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 970 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 971 972 txq->common = ctrl_txq->common; 973 974 txq->pending = txq->completed = txq->added = txq_desc_index; 975 txq->hw_vlan_tci = 0; 976 977 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 978 979 return 0; 980 } 981 982 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 983 static void 984 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 985 __rte_unused unsigned int *evq_read_ptr) 986 { 987 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 988 989 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 990 } 991 992 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 993 static void 994 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 995 { 996 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 997 unsigned int txds; 998 999 sfc_efx_tx_reap(txq); 1000 1001 for (txds = 0; txds <= txq->ptr_mask; txds++) { 1002 if (txq->sw_ring[txds].mbuf != NULL) { 1003 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 1004 txq->sw_ring[txds].mbuf = NULL; 1005 } 1006 } 1007 1008 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1009 } 1010 1011 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1012 static int 1013 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1014 { 1015 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1016 1017 if (unlikely(offset > txq->ptr_mask)) 1018 return -EINVAL; 1019 1020 if (unlikely(offset >= txq->max_fill_level)) 1021 return RTE_ETH_TX_DESC_UNAVAIL; 1022 1023 /* 1024 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1025 * it is required for the queue to be running, but the 1026 * check is omitted because API design assumes that it 1027 * is the duty of the caller to satisfy all conditions 1028 */ 1029 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1030 SFC_EFX_TXQ_FLAG_RUNNING); 1031 sfc_ev_qpoll(txq->evq); 1032 1033 /* 1034 * Ring tail is 'txq->pending', and although descriptors 1035 * between 'txq->completed' and 'txq->pending' are still 1036 * in use by the driver, they should be reported as DONE 1037 */ 1038 if (unlikely(offset < (txq->added - txq->pending))) 1039 return RTE_ETH_TX_DESC_FULL; 1040 1041 /* 1042 * There is no separate return value for unused descriptors; 1043 * the latter will be reported as DONE because genuine DONE 1044 * descriptors will be freed anyway in SW on the next burst 1045 */ 1046 return RTE_ETH_TX_DESC_DONE; 1047 } 1048 1049 struct sfc_dp_tx sfc_efx_tx = { 1050 .dp = { 1051 .name = SFC_KVARG_DATAPATH_EFX, 1052 .type = SFC_DP_TX, 1053 .hw_fw_caps = 0, 1054 }, 1055 .features = SFC_DP_TX_FEAT_VLAN_INSERT | 1056 SFC_DP_TX_FEAT_TSO | 1057 SFC_DP_TX_FEAT_MULTI_POOL | 1058 SFC_DP_TX_FEAT_REFCNT | 1059 SFC_DP_TX_FEAT_MULTI_SEG, 1060 .qsize_up_rings = sfc_efx_tx_qsize_up_rings, 1061 .qcreate = sfc_efx_tx_qcreate, 1062 .qdestroy = sfc_efx_tx_qdestroy, 1063 .qstart = sfc_efx_tx_qstart, 1064 .qstop = sfc_efx_tx_qstop, 1065 .qreap = sfc_efx_tx_qreap, 1066 .qdesc_status = sfc_efx_tx_qdesc_status, 1067 .pkt_burst = sfc_efx_xmit_pkts, 1068 }; 1069