1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) 2016-2017 Solarflare Communications Inc. 5 * All rights reserved. 6 * 7 * This software was jointly developed between OKTET Labs (under contract 8 * for Solarflare) and Solarflare Communications, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright notice, 14 * this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 29 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "sfc.h" 33 #include "sfc_debug.h" 34 #include "sfc_log.h" 35 #include "sfc_ev.h" 36 #include "sfc_tx.h" 37 #include "sfc_tweak.h" 38 #include "sfc_kvargs.h" 39 40 /* 41 * Maximum number of TX queue flush attempts in case of 42 * failure or flush timeout 43 */ 44 #define SFC_TX_QFLUSH_ATTEMPTS (3) 45 46 /* 47 * Time to wait between event queue polling attempts when waiting for TX 48 * queue flush done or flush failed events 49 */ 50 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 51 52 /* 53 * Maximum number of event queue polling attempts when waiting for TX queue 54 * flush done or flush failed events; it defines TX queue flush attempt timeout 55 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 56 */ 57 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 58 59 static int 60 sfc_tx_qcheck_conf(struct sfc_adapter *sa, uint16_t nb_tx_desc, 61 const struct rte_eth_txconf *tx_conf) 62 { 63 unsigned int flags = tx_conf->txq_flags; 64 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 65 int rc = 0; 66 67 if (tx_conf->tx_rs_thresh != 0) { 68 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 69 rc = EINVAL; 70 } 71 72 if (tx_conf->tx_free_thresh > EFX_TXQ_LIMIT(nb_tx_desc)) { 73 sfc_err(sa, 74 "TxQ free threshold too large: %u vs maximum %u", 75 tx_conf->tx_free_thresh, EFX_TXQ_LIMIT(nb_tx_desc)); 76 rc = EINVAL; 77 } 78 79 if (tx_conf->tx_thresh.pthresh != 0 || 80 tx_conf->tx_thresh.hthresh != 0 || 81 tx_conf->tx_thresh.wthresh != 0) { 82 sfc_err(sa, 83 "prefetch/host/writeback thresholds are not supported"); 84 rc = EINVAL; 85 } 86 87 if (((flags & ETH_TXQ_FLAGS_NOMULTSEGS) == 0) && 88 (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG)) { 89 sfc_err(sa, "Multi-segment is not supported by %s datapath", 90 sa->dp_tx->dp.name); 91 rc = EINVAL; 92 } 93 94 if ((flags & ETH_TXQ_FLAGS_NOVLANOFFL) == 0) { 95 if (!encp->enc_hw_tx_insert_vlan_enabled) { 96 sfc_err(sa, "VLAN offload is not supported"); 97 rc = EINVAL; 98 } else if (~sa->dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) { 99 sfc_err(sa, 100 "VLAN offload is not supported by %s datapath", 101 sa->dp_tx->dp.name); 102 rc = EINVAL; 103 } 104 } 105 106 if ((flags & ETH_TXQ_FLAGS_NOXSUMSCTP) == 0) { 107 sfc_err(sa, "SCTP offload is not supported"); 108 rc = EINVAL; 109 } 110 111 /* We either perform both TCP and UDP offload, or no offload at all */ 112 if (((flags & ETH_TXQ_FLAGS_NOXSUMTCP) == 0) != 113 ((flags & ETH_TXQ_FLAGS_NOXSUMUDP) == 0)) { 114 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 115 rc = EINVAL; 116 } 117 118 return rc; 119 } 120 121 void 122 sfc_tx_qflush_done(struct sfc_txq *txq) 123 { 124 txq->state |= SFC_TXQ_FLUSHED; 125 txq->state &= ~SFC_TXQ_FLUSHING; 126 } 127 128 int 129 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 130 uint16_t nb_tx_desc, unsigned int socket_id, 131 const struct rte_eth_txconf *tx_conf) 132 { 133 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 134 struct sfc_txq_info *txq_info; 135 struct sfc_evq *evq; 136 struct sfc_txq *txq; 137 int rc = 0; 138 struct sfc_dp_tx_qcreate_info info; 139 140 sfc_log_init(sa, "TxQ = %u", sw_index); 141 142 rc = sfc_tx_qcheck_conf(sa, nb_tx_desc, tx_conf); 143 if (rc != 0) 144 goto fail_bad_conf; 145 146 SFC_ASSERT(sw_index < sa->txq_count); 147 txq_info = &sa->txq_info[sw_index]; 148 149 SFC_ASSERT(nb_tx_desc <= sa->txq_max_entries); 150 txq_info->entries = nb_tx_desc; 151 152 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 153 txq_info->entries, socket_id, &evq); 154 if (rc != 0) 155 goto fail_ev_qinit; 156 157 rc = ENOMEM; 158 txq = rte_zmalloc_socket("sfc-txq", sizeof(*txq), 0, socket_id); 159 if (txq == NULL) 160 goto fail_txq_alloc; 161 162 txq_info->txq = txq; 163 164 txq->hw_index = sw_index; 165 txq->evq = evq; 166 txq->free_thresh = 167 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 168 SFC_TX_DEFAULT_FREE_THRESH; 169 txq->flags = tx_conf->txq_flags; 170 171 rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_TXQ_SIZE(txq_info->entries), 172 socket_id, &txq->mem); 173 if (rc != 0) 174 goto fail_dma_alloc; 175 176 memset(&info, 0, sizeof(info)); 177 info.free_thresh = txq->free_thresh; 178 info.flags = tx_conf->txq_flags; 179 info.txq_entries = txq_info->entries; 180 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 181 info.txq_hw_ring = txq->mem.esm_base; 182 info.evq_entries = txq_info->entries; 183 info.evq_hw_ring = evq->mem.esm_base; 184 info.hw_index = txq->hw_index; 185 info.mem_bar = sa->mem_bar.esb_base; 186 187 rc = sa->dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 188 &SFC_DEV_TO_PCI(sa->eth_dev)->addr, 189 socket_id, &info, &txq->dp); 190 if (rc != 0) 191 goto fail_dp_tx_qinit; 192 193 evq->dp_txq = txq->dp; 194 195 txq->state = SFC_TXQ_INITIALIZED; 196 197 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 198 199 return 0; 200 201 fail_dp_tx_qinit: 202 sfc_dma_free(sa, &txq->mem); 203 204 fail_dma_alloc: 205 txq_info->txq = NULL; 206 rte_free(txq); 207 208 fail_txq_alloc: 209 sfc_ev_qfini(evq); 210 211 fail_ev_qinit: 212 txq_info->entries = 0; 213 214 fail_bad_conf: 215 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 216 return rc; 217 } 218 219 void 220 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 221 { 222 struct sfc_txq_info *txq_info; 223 struct sfc_txq *txq; 224 225 sfc_log_init(sa, "TxQ = %u", sw_index); 226 227 SFC_ASSERT(sw_index < sa->txq_count); 228 txq_info = &sa->txq_info[sw_index]; 229 230 txq = txq_info->txq; 231 SFC_ASSERT(txq != NULL); 232 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 233 234 sa->dp_tx->qdestroy(txq->dp); 235 txq->dp = NULL; 236 237 txq_info->txq = NULL; 238 txq_info->entries = 0; 239 240 sfc_dma_free(sa, &txq->mem); 241 242 sfc_ev_qfini(txq->evq); 243 txq->evq = NULL; 244 245 rte_free(txq); 246 } 247 248 static int 249 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 250 { 251 sfc_log_init(sa, "TxQ = %u", sw_index); 252 253 return 0; 254 } 255 256 static int 257 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 258 { 259 int rc = 0; 260 261 switch (txmode->mq_mode) { 262 case ETH_MQ_TX_NONE: 263 break; 264 default: 265 sfc_err(sa, "Tx multi-queue mode %u not supported", 266 txmode->mq_mode); 267 rc = EINVAL; 268 } 269 270 /* 271 * These features are claimed to be i40e-specific, 272 * but it does make sense to double-check their absence 273 */ 274 if (txmode->hw_vlan_reject_tagged) { 275 sfc_err(sa, "Rejecting tagged packets not supported"); 276 rc = EINVAL; 277 } 278 279 if (txmode->hw_vlan_reject_untagged) { 280 sfc_err(sa, "Rejecting untagged packets not supported"); 281 rc = EINVAL; 282 } 283 284 if (txmode->hw_vlan_insert_pvid) { 285 sfc_err(sa, "Port-based VLAN insertion not supported"); 286 rc = EINVAL; 287 } 288 289 return rc; 290 } 291 292 /** 293 * Destroy excess queues that are no longer needed after reconfiguration 294 * or complete close. 295 */ 296 static void 297 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 298 { 299 int sw_index; 300 301 SFC_ASSERT(nb_tx_queues <= sa->txq_count); 302 303 sw_index = sa->txq_count; 304 while (--sw_index >= (int)nb_tx_queues) { 305 if (sa->txq_info[sw_index].txq != NULL) 306 sfc_tx_qfini(sa, sw_index); 307 } 308 309 sa->txq_count = nb_tx_queues; 310 } 311 312 int 313 sfc_tx_configure(struct sfc_adapter *sa) 314 { 315 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 316 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 317 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 318 int rc = 0; 319 320 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 321 nb_tx_queues, sa->txq_count); 322 323 /* 324 * The datapath implementation assumes absence of boundary 325 * limits on Tx DMA descriptors. Addition of these checks on 326 * datapath would simply make the datapath slower. 327 */ 328 if (encp->enc_tx_dma_desc_boundary != 0) { 329 rc = ENOTSUP; 330 goto fail_tx_dma_desc_boundary; 331 } 332 333 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 334 if (rc != 0) 335 goto fail_check_mode; 336 337 if (nb_tx_queues == sa->txq_count) 338 goto done; 339 340 if (sa->txq_info == NULL) { 341 sa->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 342 sizeof(sa->txq_info[0]), 0, 343 sa->socket_id); 344 if (sa->txq_info == NULL) 345 goto fail_txqs_alloc; 346 } else { 347 struct sfc_txq_info *new_txq_info; 348 349 if (nb_tx_queues < sa->txq_count) 350 sfc_tx_fini_queues(sa, nb_tx_queues); 351 352 new_txq_info = 353 rte_realloc(sa->txq_info, 354 nb_tx_queues * sizeof(sa->txq_info[0]), 0); 355 if (new_txq_info == NULL && nb_tx_queues > 0) 356 goto fail_txqs_realloc; 357 358 sa->txq_info = new_txq_info; 359 if (nb_tx_queues > sa->txq_count) 360 memset(&sa->txq_info[sa->txq_count], 0, 361 (nb_tx_queues - sa->txq_count) * 362 sizeof(sa->txq_info[0])); 363 } 364 365 while (sa->txq_count < nb_tx_queues) { 366 rc = sfc_tx_qinit_info(sa, sa->txq_count); 367 if (rc != 0) 368 goto fail_tx_qinit_info; 369 370 sa->txq_count++; 371 } 372 373 done: 374 return 0; 375 376 fail_tx_qinit_info: 377 fail_txqs_realloc: 378 fail_txqs_alloc: 379 sfc_tx_close(sa); 380 381 fail_check_mode: 382 fail_tx_dma_desc_boundary: 383 sfc_log_init(sa, "failed (rc = %d)", rc); 384 return rc; 385 } 386 387 void 388 sfc_tx_close(struct sfc_adapter *sa) 389 { 390 sfc_tx_fini_queues(sa, 0); 391 392 rte_free(sa->txq_info); 393 sa->txq_info = NULL; 394 } 395 396 int 397 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 398 { 399 struct rte_eth_dev_data *dev_data; 400 struct sfc_txq_info *txq_info; 401 struct sfc_txq *txq; 402 struct sfc_evq *evq; 403 uint16_t flags; 404 unsigned int desc_index; 405 int rc = 0; 406 407 sfc_log_init(sa, "TxQ = %u", sw_index); 408 409 SFC_ASSERT(sw_index < sa->txq_count); 410 txq_info = &sa->txq_info[sw_index]; 411 412 txq = txq_info->txq; 413 414 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 415 416 evq = txq->evq; 417 418 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 419 if (rc != 0) 420 goto fail_ev_qstart; 421 422 /* 423 * It seems that DPDK has no controls regarding IPv4 offloads, 424 * hence, we always enable it here 425 */ 426 if ((txq->flags & ETH_TXQ_FLAGS_NOXSUMTCP) || 427 (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP)) { 428 flags = EFX_TXQ_CKSUM_IPV4; 429 } else { 430 flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; 431 432 if (sa->tso) 433 flags |= EFX_TXQ_FATSOV2; 434 } 435 436 rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, 437 txq_info->entries, 0 /* not used on EF10 */, 438 flags, evq->common, 439 &txq->common, &desc_index); 440 if (rc != 0) { 441 if (sa->tso && (rc == ENOSPC)) 442 sfc_err(sa, "ran out of TSO contexts"); 443 444 goto fail_tx_qcreate; 445 } 446 447 efx_tx_qenable(txq->common); 448 449 txq->state |= SFC_TXQ_STARTED; 450 451 rc = sa->dp_tx->qstart(txq->dp, evq->read_ptr, desc_index); 452 if (rc != 0) 453 goto fail_dp_qstart; 454 455 /* 456 * It seems to be used by DPDK for debug purposes only ('rte_ether') 457 */ 458 dev_data = sa->eth_dev->data; 459 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 460 461 return 0; 462 463 fail_dp_qstart: 464 txq->state = SFC_TXQ_INITIALIZED; 465 efx_tx_qdestroy(txq->common); 466 467 fail_tx_qcreate: 468 sfc_ev_qstop(evq); 469 470 fail_ev_qstart: 471 return rc; 472 } 473 474 void 475 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 476 { 477 struct rte_eth_dev_data *dev_data; 478 struct sfc_txq_info *txq_info; 479 struct sfc_txq *txq; 480 unsigned int retry_count; 481 unsigned int wait_count; 482 483 sfc_log_init(sa, "TxQ = %u", sw_index); 484 485 SFC_ASSERT(sw_index < sa->txq_count); 486 txq_info = &sa->txq_info[sw_index]; 487 488 txq = txq_info->txq; 489 490 if (txq->state == SFC_TXQ_INITIALIZED) 491 return; 492 493 SFC_ASSERT(txq->state & SFC_TXQ_STARTED); 494 495 sa->dp_tx->qstop(txq->dp, &txq->evq->read_ptr); 496 497 /* 498 * Retry TX queue flushing in case of flush failed or 499 * timeout; in the worst case it can delay for 6 seconds 500 */ 501 for (retry_count = 0; 502 ((txq->state & SFC_TXQ_FLUSHED) == 0) && 503 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 504 ++retry_count) { 505 if (efx_tx_qflush(txq->common) != 0) { 506 txq->state |= SFC_TXQ_FLUSHING; 507 break; 508 } 509 510 /* 511 * Wait for TX queue flush done or flush failed event at least 512 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 513 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 514 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 515 */ 516 wait_count = 0; 517 do { 518 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 519 sfc_ev_qpoll(txq->evq); 520 } while ((txq->state & SFC_TXQ_FLUSHING) && 521 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 522 523 if (txq->state & SFC_TXQ_FLUSHING) 524 sfc_err(sa, "TxQ %u flush timed out", sw_index); 525 526 if (txq->state & SFC_TXQ_FLUSHED) 527 sfc_info(sa, "TxQ %u flushed", sw_index); 528 } 529 530 sa->dp_tx->qreap(txq->dp); 531 532 txq->state = SFC_TXQ_INITIALIZED; 533 534 efx_tx_qdestroy(txq->common); 535 536 sfc_ev_qstop(txq->evq); 537 538 /* 539 * It seems to be used by DPDK for debug purposes only ('rte_ether') 540 */ 541 dev_data = sa->eth_dev->data; 542 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 543 } 544 545 int 546 sfc_tx_start(struct sfc_adapter *sa) 547 { 548 unsigned int sw_index; 549 int rc = 0; 550 551 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 552 553 if (sa->tso) { 554 if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) { 555 sfc_warn(sa, "TSO support was unable to be restored"); 556 sa->tso = B_FALSE; 557 } 558 } 559 560 rc = efx_tx_init(sa->nic); 561 if (rc != 0) 562 goto fail_efx_tx_init; 563 564 for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) { 565 if (!(sa->txq_info[sw_index].deferred_start) || 566 sa->txq_info[sw_index].deferred_started) { 567 rc = sfc_tx_qstart(sa, sw_index); 568 if (rc != 0) 569 goto fail_tx_qstart; 570 } 571 } 572 573 return 0; 574 575 fail_tx_qstart: 576 while (sw_index-- > 0) 577 sfc_tx_qstop(sa, sw_index); 578 579 efx_tx_fini(sa->nic); 580 581 fail_efx_tx_init: 582 sfc_log_init(sa, "failed (rc = %d)", rc); 583 return rc; 584 } 585 586 void 587 sfc_tx_stop(struct sfc_adapter *sa) 588 { 589 unsigned int sw_index; 590 591 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 592 593 sw_index = sa->txq_count; 594 while (sw_index-- > 0) { 595 if (sa->txq_info[sw_index].txq != NULL) 596 sfc_tx_qstop(sa, sw_index); 597 } 598 599 efx_tx_fini(sa->nic); 600 } 601 602 static void 603 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 604 { 605 unsigned int completed; 606 607 sfc_ev_qpoll(txq->evq); 608 609 for (completed = txq->completed; 610 completed != txq->pending; completed++) { 611 struct sfc_efx_tx_sw_desc *txd; 612 613 txd = &txq->sw_ring[completed & txq->ptr_mask]; 614 615 if (txd->mbuf != NULL) { 616 rte_pktmbuf_free(txd->mbuf); 617 txd->mbuf = NULL; 618 } 619 } 620 621 txq->completed = completed; 622 } 623 624 /* 625 * The function is used to insert or update VLAN tag; 626 * the firmware has state of the firmware tag to insert per TxQ 627 * (controlled by option descriptors), hence, if the tag of the 628 * packet to be sent is different from one remembered by the firmware, 629 * the function will update it 630 */ 631 static unsigned int 632 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 633 efx_desc_t **pend) 634 { 635 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 636 m->vlan_tci : 0); 637 638 if (this_tag == txq->hw_vlan_tci) 639 return 0; 640 641 /* 642 * The expression inside SFC_ASSERT() is not desired to be checked in 643 * a non-debug build because it might be too expensive on the data path 644 */ 645 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 646 647 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 648 *pend); 649 (*pend)++; 650 txq->hw_vlan_tci = this_tag; 651 652 return 1; 653 } 654 655 static uint16_t 656 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 657 { 658 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 659 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 660 unsigned int added = txq->added; 661 unsigned int pushed = added; 662 unsigned int pkts_sent = 0; 663 efx_desc_t *pend = &txq->pend_desc[0]; 664 const unsigned int hard_max_fill = EFX_TXQ_LIMIT(txq->ptr_mask + 1); 665 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 666 unsigned int fill_level = added - txq->completed; 667 boolean_t reap_done; 668 int rc __rte_unused; 669 struct rte_mbuf **pktp; 670 671 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 672 goto done; 673 674 /* 675 * If insufficient space for a single packet is present, 676 * we should reap; otherwise, we shouldn't do that all the time 677 * to avoid latency increase 678 */ 679 reap_done = (fill_level > soft_max_fill); 680 681 if (reap_done) { 682 sfc_efx_tx_reap(txq); 683 /* 684 * Recalculate fill level since 'txq->completed' 685 * might have changed on reap 686 */ 687 fill_level = added - txq->completed; 688 } 689 690 for (pkts_sent = 0, pktp = &tx_pkts[0]; 691 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 692 pkts_sent++, pktp++) { 693 struct rte_mbuf *m_seg = *pktp; 694 size_t pkt_len = m_seg->pkt_len; 695 unsigned int pkt_descs = 0; 696 size_t in_off = 0; 697 698 /* 699 * Here VLAN TCI is expected to be zero in case if no 700 * DEV_TX_VLAN_OFFLOAD capability is advertised; 701 * if the calling app ignores the absence of 702 * DEV_TX_VLAN_OFFLOAD and pushes VLAN TCI, then 703 * TX_ERROR will occur 704 */ 705 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 706 707 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 708 /* 709 * We expect correct 'pkt->l[2, 3, 4]_len' values 710 * to be set correctly by the caller 711 */ 712 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 713 &pkt_descs, &pkt_len) != 0) { 714 /* We may have reached this place for 715 * one of the following reasons: 716 * 717 * 1) Packet header length is greater 718 * than SFC_TSOH_STD_LEN 719 * 2) TCP header starts at more then 720 * 208 bytes into the frame 721 * 722 * We will deceive RTE saying that we have sent 723 * the packet, but we will actually drop it. 724 * Hence, we should revert 'pend' to the 725 * previous state (in case we have added 726 * VLAN descriptor) and start processing 727 * another one packet. But the original 728 * mbuf shouldn't be orphaned 729 */ 730 pend -= pkt_descs; 731 732 rte_pktmbuf_free(*pktp); 733 734 continue; 735 } 736 737 /* 738 * We've only added 2 FATSOv2 option descriptors 739 * and 1 descriptor for the linearized packet header. 740 * The outstanding work will be done in the same manner 741 * as for the usual non-TSO path 742 */ 743 } 744 745 for (; m_seg != NULL; m_seg = m_seg->next) { 746 efsys_dma_addr_t next_frag; 747 size_t seg_len; 748 749 seg_len = m_seg->data_len; 750 next_frag = rte_mbuf_data_dma_addr(m_seg); 751 752 /* 753 * If we've started TSO transaction few steps earlier, 754 * we'll skip packet header using an offset in the 755 * current segment (which has been set to the 756 * first one containing payload) 757 */ 758 seg_len -= in_off; 759 next_frag += in_off; 760 in_off = 0; 761 762 do { 763 efsys_dma_addr_t frag_addr = next_frag; 764 size_t frag_len; 765 766 /* 767 * It is assumed here that there is no 768 * limitation on address boundary 769 * crossing by DMA descriptor. 770 */ 771 frag_len = MIN(seg_len, txq->dma_desc_size_max); 772 next_frag += frag_len; 773 seg_len -= frag_len; 774 pkt_len -= frag_len; 775 776 efx_tx_qdesc_dma_create(txq->common, 777 frag_addr, frag_len, 778 (pkt_len == 0), 779 pend++); 780 781 pkt_descs++; 782 } while (seg_len != 0); 783 } 784 785 added += pkt_descs; 786 787 fill_level += pkt_descs; 788 if (unlikely(fill_level > hard_max_fill)) { 789 /* 790 * Our estimation for maximum number of descriptors 791 * required to send a packet seems to be wrong. 792 * Try to reap (if we haven't yet). 793 */ 794 if (!reap_done) { 795 sfc_efx_tx_reap(txq); 796 reap_done = B_TRUE; 797 fill_level = added - txq->completed; 798 if (fill_level > hard_max_fill) { 799 pend -= pkt_descs; 800 break; 801 } 802 } else { 803 pend -= pkt_descs; 804 break; 805 } 806 } 807 808 /* Assign mbuf to the last used desc */ 809 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 810 } 811 812 if (likely(pkts_sent > 0)) { 813 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 814 pend - &txq->pend_desc[0], 815 txq->completed, &txq->added); 816 SFC_ASSERT(rc == 0); 817 818 if (likely(pushed != txq->added)) 819 efx_tx_qpush(txq->common, txq->added, pushed); 820 } 821 822 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 823 if (!reap_done) 824 sfc_efx_tx_reap(txq); 825 #endif 826 827 done: 828 return pkts_sent; 829 } 830 831 struct sfc_txq * 832 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 833 { 834 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 835 struct rte_eth_dev *eth_dev; 836 struct sfc_adapter *sa; 837 struct sfc_txq *txq; 838 839 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 840 eth_dev = &rte_eth_devices[dpq->port_id]; 841 842 sa = eth_dev->data->dev_private; 843 844 SFC_ASSERT(dpq->queue_id < sa->txq_count); 845 txq = sa->txq_info[dpq->queue_id].txq; 846 847 SFC_ASSERT(txq != NULL); 848 return txq; 849 } 850 851 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 852 static int 853 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 854 const struct rte_pci_addr *pci_addr, 855 int socket_id, 856 const struct sfc_dp_tx_qcreate_info *info, 857 struct sfc_dp_txq **dp_txqp) 858 { 859 struct sfc_efx_txq *txq; 860 struct sfc_txq *ctrl_txq; 861 int rc; 862 863 rc = ENOMEM; 864 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 865 RTE_CACHE_LINE_SIZE, socket_id); 866 if (txq == NULL) 867 goto fail_txq_alloc; 868 869 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 870 871 rc = ENOMEM; 872 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 873 EFX_TXQ_LIMIT(info->txq_entries), 874 sizeof(*txq->pend_desc), 0, 875 socket_id); 876 if (txq->pend_desc == NULL) 877 goto fail_pend_desc_alloc; 878 879 rc = ENOMEM; 880 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 881 info->txq_entries, 882 sizeof(*txq->sw_ring), 883 RTE_CACHE_LINE_SIZE, socket_id); 884 if (txq->sw_ring == NULL) 885 goto fail_sw_ring_alloc; 886 887 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 888 if (ctrl_txq->evq->sa->tso) { 889 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 890 info->txq_entries, socket_id); 891 if (rc != 0) 892 goto fail_alloc_tsoh_objs; 893 } 894 895 txq->evq = ctrl_txq->evq; 896 txq->ptr_mask = info->txq_entries - 1; 897 txq->free_thresh = info->free_thresh; 898 txq->dma_desc_size_max = info->dma_desc_size_max; 899 900 *dp_txqp = &txq->dp; 901 return 0; 902 903 fail_alloc_tsoh_objs: 904 rte_free(txq->sw_ring); 905 906 fail_sw_ring_alloc: 907 rte_free(txq->pend_desc); 908 909 fail_pend_desc_alloc: 910 rte_free(txq); 911 912 fail_txq_alloc: 913 return rc; 914 } 915 916 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 917 static void 918 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 919 { 920 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 921 922 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 923 rte_free(txq->sw_ring); 924 rte_free(txq->pend_desc); 925 rte_free(txq); 926 } 927 928 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 929 static int 930 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 931 __rte_unused unsigned int evq_read_ptr, 932 unsigned int txq_desc_index) 933 { 934 /* libefx-based datapath is specific to libefx-based PMD */ 935 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 936 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 937 938 txq->common = ctrl_txq->common; 939 940 txq->pending = txq->completed = txq->added = txq_desc_index; 941 txq->hw_vlan_tci = 0; 942 943 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 944 945 return 0; 946 } 947 948 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 949 static void 950 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 951 __rte_unused unsigned int *evq_read_ptr) 952 { 953 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 954 955 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 956 } 957 958 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 959 static void 960 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 961 { 962 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 963 unsigned int txds; 964 965 sfc_efx_tx_reap(txq); 966 967 for (txds = 0; txds <= txq->ptr_mask; txds++) { 968 if (txq->sw_ring[txds].mbuf != NULL) { 969 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 970 txq->sw_ring[txds].mbuf = NULL; 971 } 972 } 973 974 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 975 } 976 977 struct sfc_dp_tx sfc_efx_tx = { 978 .dp = { 979 .name = SFC_KVARG_DATAPATH_EFX, 980 .type = SFC_DP_TX, 981 .hw_fw_caps = 0, 982 }, 983 .features = SFC_DP_TX_FEAT_VLAN_INSERT | 984 SFC_DP_TX_FEAT_TSO | 985 SFC_DP_TX_FEAT_MULTI_SEG, 986 .qcreate = sfc_efx_tx_qcreate, 987 .qdestroy = sfc_efx_tx_qdestroy, 988 .qstart = sfc_efx_tx_qstart, 989 .qstop = sfc_efx_tx_qstop, 990 .qreap = sfc_efx_tx_qreap, 991 .pkt_burst = sfc_efx_xmit_pkts, 992 }; 993