1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) 2016-2017 Solarflare Communications Inc. 5 * All rights reserved. 6 * 7 * This software was jointly developed between OKTET Labs (under contract 8 * for Solarflare) and Solarflare Communications, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright notice, 14 * this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 29 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "sfc.h" 33 #include "sfc_debug.h" 34 #include "sfc_log.h" 35 #include "sfc_ev.h" 36 #include "sfc_tx.h" 37 #include "sfc_tweak.h" 38 #include "sfc_kvargs.h" 39 40 /* 41 * Maximum number of TX queue flush attempts in case of 42 * failure or flush timeout 43 */ 44 #define SFC_TX_QFLUSH_ATTEMPTS (3) 45 46 /* 47 * Time to wait between event queue polling attempts when waiting for TX 48 * queue flush done or flush failed events 49 */ 50 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 51 52 /* 53 * Maximum number of event queue polling attempts when waiting for TX queue 54 * flush done or flush failed events; it defines TX queue flush attempt timeout 55 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 56 */ 57 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 58 59 static int 60 sfc_tx_qcheck_conf(struct sfc_adapter *sa, uint16_t nb_tx_desc, 61 const struct rte_eth_txconf *tx_conf) 62 { 63 unsigned int flags = tx_conf->txq_flags; 64 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 65 int rc = 0; 66 67 if (tx_conf->tx_rs_thresh != 0) { 68 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 69 rc = EINVAL; 70 } 71 72 if (tx_conf->tx_free_thresh > EFX_TXQ_LIMIT(nb_tx_desc)) { 73 sfc_err(sa, 74 "TxQ free threshold too large: %u vs maximum %u", 75 tx_conf->tx_free_thresh, EFX_TXQ_LIMIT(nb_tx_desc)); 76 rc = EINVAL; 77 } 78 79 if (tx_conf->tx_thresh.pthresh != 0 || 80 tx_conf->tx_thresh.hthresh != 0 || 81 tx_conf->tx_thresh.wthresh != 0) { 82 sfc_warn(sa, 83 "prefetch/host/writeback thresholds are not supported"); 84 } 85 86 if (((flags & ETH_TXQ_FLAGS_NOMULTSEGS) == 0) && 87 (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG)) { 88 sfc_err(sa, "Multi-segment is not supported by %s datapath", 89 sa->dp_tx->dp.name); 90 rc = EINVAL; 91 } 92 93 if (((flags & ETH_TXQ_FLAGS_NOMULTMEMP) == 0) && 94 (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL)) { 95 sfc_err(sa, "multi-mempool is not supported by %s datapath", 96 sa->dp_tx->dp.name); 97 rc = EINVAL; 98 } 99 100 if (((flags & ETH_TXQ_FLAGS_NOREFCOUNT) == 0) && 101 (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) { 102 sfc_err(sa, 103 "mbuf reference counters are neglected by %s datapath", 104 sa->dp_tx->dp.name); 105 rc = EINVAL; 106 } 107 108 if ((flags & ETH_TXQ_FLAGS_NOVLANOFFL) == 0) { 109 if (!encp->enc_hw_tx_insert_vlan_enabled) { 110 sfc_err(sa, "VLAN offload is not supported"); 111 rc = EINVAL; 112 } else if (~sa->dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) { 113 sfc_err(sa, 114 "VLAN offload is not supported by %s datapath", 115 sa->dp_tx->dp.name); 116 rc = EINVAL; 117 } 118 } 119 120 if ((flags & ETH_TXQ_FLAGS_NOXSUMSCTP) == 0) { 121 sfc_err(sa, "SCTP offload is not supported"); 122 rc = EINVAL; 123 } 124 125 /* We either perform both TCP and UDP offload, or no offload at all */ 126 if (((flags & ETH_TXQ_FLAGS_NOXSUMTCP) == 0) != 127 ((flags & ETH_TXQ_FLAGS_NOXSUMUDP) == 0)) { 128 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 129 rc = EINVAL; 130 } 131 132 return rc; 133 } 134 135 void 136 sfc_tx_qflush_done(struct sfc_txq *txq) 137 { 138 txq->state |= SFC_TXQ_FLUSHED; 139 txq->state &= ~SFC_TXQ_FLUSHING; 140 } 141 142 int 143 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 144 uint16_t nb_tx_desc, unsigned int socket_id, 145 const struct rte_eth_txconf *tx_conf) 146 { 147 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 148 struct sfc_txq_info *txq_info; 149 struct sfc_evq *evq; 150 struct sfc_txq *txq; 151 int rc = 0; 152 struct sfc_dp_tx_qcreate_info info; 153 154 sfc_log_init(sa, "TxQ = %u", sw_index); 155 156 rc = sfc_tx_qcheck_conf(sa, nb_tx_desc, tx_conf); 157 if (rc != 0) 158 goto fail_bad_conf; 159 160 SFC_ASSERT(sw_index < sa->txq_count); 161 txq_info = &sa->txq_info[sw_index]; 162 163 SFC_ASSERT(nb_tx_desc <= sa->txq_max_entries); 164 txq_info->entries = nb_tx_desc; 165 166 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 167 txq_info->entries, socket_id, &evq); 168 if (rc != 0) 169 goto fail_ev_qinit; 170 171 rc = ENOMEM; 172 txq = rte_zmalloc_socket("sfc-txq", sizeof(*txq), 0, socket_id); 173 if (txq == NULL) 174 goto fail_txq_alloc; 175 176 txq_info->txq = txq; 177 178 txq->hw_index = sw_index; 179 txq->evq = evq; 180 txq->free_thresh = 181 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 182 SFC_TX_DEFAULT_FREE_THRESH; 183 txq->flags = tx_conf->txq_flags; 184 185 rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_TXQ_SIZE(txq_info->entries), 186 socket_id, &txq->mem); 187 if (rc != 0) 188 goto fail_dma_alloc; 189 190 memset(&info, 0, sizeof(info)); 191 info.free_thresh = txq->free_thresh; 192 info.flags = tx_conf->txq_flags; 193 info.txq_entries = txq_info->entries; 194 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 195 info.txq_hw_ring = txq->mem.esm_base; 196 info.evq_entries = txq_info->entries; 197 info.evq_hw_ring = evq->mem.esm_base; 198 info.hw_index = txq->hw_index; 199 info.mem_bar = sa->mem_bar.esb_base; 200 201 rc = sa->dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 202 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 203 socket_id, &info, &txq->dp); 204 if (rc != 0) 205 goto fail_dp_tx_qinit; 206 207 evq->dp_txq = txq->dp; 208 209 txq->state = SFC_TXQ_INITIALIZED; 210 211 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 212 213 return 0; 214 215 fail_dp_tx_qinit: 216 sfc_dma_free(sa, &txq->mem); 217 218 fail_dma_alloc: 219 txq_info->txq = NULL; 220 rte_free(txq); 221 222 fail_txq_alloc: 223 sfc_ev_qfini(evq); 224 225 fail_ev_qinit: 226 txq_info->entries = 0; 227 228 fail_bad_conf: 229 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 230 return rc; 231 } 232 233 void 234 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 235 { 236 struct sfc_txq_info *txq_info; 237 struct sfc_txq *txq; 238 239 sfc_log_init(sa, "TxQ = %u", sw_index); 240 241 SFC_ASSERT(sw_index < sa->txq_count); 242 txq_info = &sa->txq_info[sw_index]; 243 244 txq = txq_info->txq; 245 SFC_ASSERT(txq != NULL); 246 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 247 248 sa->dp_tx->qdestroy(txq->dp); 249 txq->dp = NULL; 250 251 txq_info->txq = NULL; 252 txq_info->entries = 0; 253 254 sfc_dma_free(sa, &txq->mem); 255 256 sfc_ev_qfini(txq->evq); 257 txq->evq = NULL; 258 259 rte_free(txq); 260 } 261 262 static int 263 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 264 { 265 sfc_log_init(sa, "TxQ = %u", sw_index); 266 267 return 0; 268 } 269 270 static int 271 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 272 { 273 int rc = 0; 274 275 switch (txmode->mq_mode) { 276 case ETH_MQ_TX_NONE: 277 break; 278 default: 279 sfc_err(sa, "Tx multi-queue mode %u not supported", 280 txmode->mq_mode); 281 rc = EINVAL; 282 } 283 284 /* 285 * These features are claimed to be i40e-specific, 286 * but it does make sense to double-check their absence 287 */ 288 if (txmode->hw_vlan_reject_tagged) { 289 sfc_err(sa, "Rejecting tagged packets not supported"); 290 rc = EINVAL; 291 } 292 293 if (txmode->hw_vlan_reject_untagged) { 294 sfc_err(sa, "Rejecting untagged packets not supported"); 295 rc = EINVAL; 296 } 297 298 if (txmode->hw_vlan_insert_pvid) { 299 sfc_err(sa, "Port-based VLAN insertion not supported"); 300 rc = EINVAL; 301 } 302 303 return rc; 304 } 305 306 /** 307 * Destroy excess queues that are no longer needed after reconfiguration 308 * or complete close. 309 */ 310 static void 311 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 312 { 313 int sw_index; 314 315 SFC_ASSERT(nb_tx_queues <= sa->txq_count); 316 317 sw_index = sa->txq_count; 318 while (--sw_index >= (int)nb_tx_queues) { 319 if (sa->txq_info[sw_index].txq != NULL) 320 sfc_tx_qfini(sa, sw_index); 321 } 322 323 sa->txq_count = nb_tx_queues; 324 } 325 326 int 327 sfc_tx_configure(struct sfc_adapter *sa) 328 { 329 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 330 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 331 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 332 int rc = 0; 333 334 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 335 nb_tx_queues, sa->txq_count); 336 337 /* 338 * The datapath implementation assumes absence of boundary 339 * limits on Tx DMA descriptors. Addition of these checks on 340 * datapath would simply make the datapath slower. 341 */ 342 if (encp->enc_tx_dma_desc_boundary != 0) { 343 rc = ENOTSUP; 344 goto fail_tx_dma_desc_boundary; 345 } 346 347 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 348 if (rc != 0) 349 goto fail_check_mode; 350 351 if (nb_tx_queues == sa->txq_count) 352 goto done; 353 354 if (sa->txq_info == NULL) { 355 sa->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 356 sizeof(sa->txq_info[0]), 0, 357 sa->socket_id); 358 if (sa->txq_info == NULL) 359 goto fail_txqs_alloc; 360 } else { 361 struct sfc_txq_info *new_txq_info; 362 363 if (nb_tx_queues < sa->txq_count) 364 sfc_tx_fini_queues(sa, nb_tx_queues); 365 366 new_txq_info = 367 rte_realloc(sa->txq_info, 368 nb_tx_queues * sizeof(sa->txq_info[0]), 0); 369 if (new_txq_info == NULL && nb_tx_queues > 0) 370 goto fail_txqs_realloc; 371 372 sa->txq_info = new_txq_info; 373 if (nb_tx_queues > sa->txq_count) 374 memset(&sa->txq_info[sa->txq_count], 0, 375 (nb_tx_queues - sa->txq_count) * 376 sizeof(sa->txq_info[0])); 377 } 378 379 while (sa->txq_count < nb_tx_queues) { 380 rc = sfc_tx_qinit_info(sa, sa->txq_count); 381 if (rc != 0) 382 goto fail_tx_qinit_info; 383 384 sa->txq_count++; 385 } 386 387 done: 388 return 0; 389 390 fail_tx_qinit_info: 391 fail_txqs_realloc: 392 fail_txqs_alloc: 393 sfc_tx_close(sa); 394 395 fail_check_mode: 396 fail_tx_dma_desc_boundary: 397 sfc_log_init(sa, "failed (rc = %d)", rc); 398 return rc; 399 } 400 401 void 402 sfc_tx_close(struct sfc_adapter *sa) 403 { 404 sfc_tx_fini_queues(sa, 0); 405 406 rte_free(sa->txq_info); 407 sa->txq_info = NULL; 408 } 409 410 int 411 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 412 { 413 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 414 struct rte_eth_dev_data *dev_data; 415 struct sfc_txq_info *txq_info; 416 struct sfc_txq *txq; 417 struct sfc_evq *evq; 418 uint16_t flags; 419 unsigned int desc_index; 420 int rc = 0; 421 422 sfc_log_init(sa, "TxQ = %u", sw_index); 423 424 SFC_ASSERT(sw_index < sa->txq_count); 425 txq_info = &sa->txq_info[sw_index]; 426 427 txq = txq_info->txq; 428 429 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 430 431 evq = txq->evq; 432 433 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 434 if (rc != 0) 435 goto fail_ev_qstart; 436 437 /* 438 * It seems that DPDK has no controls regarding IPv4 offloads, 439 * hence, we always enable it here 440 */ 441 if ((txq->flags & ETH_TXQ_FLAGS_NOXSUMTCP) || 442 (txq->flags & ETH_TXQ_FLAGS_NOXSUMUDP)) { 443 flags = EFX_TXQ_CKSUM_IPV4; 444 445 if (encp->enc_tunnel_encapsulations_supported != 0) 446 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 447 } else { 448 flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; 449 450 if (encp->enc_tunnel_encapsulations_supported != 0) 451 flags |= EFX_TXQ_CKSUM_INNER_IPV4 | 452 EFX_TXQ_CKSUM_INNER_TCPUDP; 453 454 if (sa->tso) 455 flags |= EFX_TXQ_FATSOV2; 456 } 457 458 rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, 459 txq_info->entries, 0 /* not used on EF10 */, 460 flags, evq->common, 461 &txq->common, &desc_index); 462 if (rc != 0) { 463 if (sa->tso && (rc == ENOSPC)) 464 sfc_err(sa, "ran out of TSO contexts"); 465 466 goto fail_tx_qcreate; 467 } 468 469 efx_tx_qenable(txq->common); 470 471 txq->state |= SFC_TXQ_STARTED; 472 473 rc = sa->dp_tx->qstart(txq->dp, evq->read_ptr, desc_index); 474 if (rc != 0) 475 goto fail_dp_qstart; 476 477 /* 478 * It seems to be used by DPDK for debug purposes only ('rte_ether') 479 */ 480 dev_data = sa->eth_dev->data; 481 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 482 483 return 0; 484 485 fail_dp_qstart: 486 txq->state = SFC_TXQ_INITIALIZED; 487 efx_tx_qdestroy(txq->common); 488 489 fail_tx_qcreate: 490 sfc_ev_qstop(evq); 491 492 fail_ev_qstart: 493 return rc; 494 } 495 496 void 497 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 498 { 499 struct rte_eth_dev_data *dev_data; 500 struct sfc_txq_info *txq_info; 501 struct sfc_txq *txq; 502 unsigned int retry_count; 503 unsigned int wait_count; 504 int rc; 505 506 sfc_log_init(sa, "TxQ = %u", sw_index); 507 508 SFC_ASSERT(sw_index < sa->txq_count); 509 txq_info = &sa->txq_info[sw_index]; 510 511 txq = txq_info->txq; 512 513 if (txq->state == SFC_TXQ_INITIALIZED) 514 return; 515 516 SFC_ASSERT(txq->state & SFC_TXQ_STARTED); 517 518 sa->dp_tx->qstop(txq->dp, &txq->evq->read_ptr); 519 520 /* 521 * Retry TX queue flushing in case of flush failed or 522 * timeout; in the worst case it can delay for 6 seconds 523 */ 524 for (retry_count = 0; 525 ((txq->state & SFC_TXQ_FLUSHED) == 0) && 526 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 527 ++retry_count) { 528 rc = efx_tx_qflush(txq->common); 529 if (rc != 0) { 530 txq->state |= (rc == EALREADY) ? 531 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 532 break; 533 } 534 535 /* 536 * Wait for TX queue flush done or flush failed event at least 537 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 538 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 539 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 540 */ 541 wait_count = 0; 542 do { 543 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 544 sfc_ev_qpoll(txq->evq); 545 } while ((txq->state & SFC_TXQ_FLUSHING) && 546 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 547 548 if (txq->state & SFC_TXQ_FLUSHING) 549 sfc_err(sa, "TxQ %u flush timed out", sw_index); 550 551 if (txq->state & SFC_TXQ_FLUSHED) 552 sfc_info(sa, "TxQ %u flushed", sw_index); 553 } 554 555 sa->dp_tx->qreap(txq->dp); 556 557 txq->state = SFC_TXQ_INITIALIZED; 558 559 efx_tx_qdestroy(txq->common); 560 561 sfc_ev_qstop(txq->evq); 562 563 /* 564 * It seems to be used by DPDK for debug purposes only ('rte_ether') 565 */ 566 dev_data = sa->eth_dev->data; 567 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 568 } 569 570 int 571 sfc_tx_start(struct sfc_adapter *sa) 572 { 573 unsigned int sw_index; 574 int rc = 0; 575 576 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 577 578 if (sa->tso) { 579 if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) { 580 sfc_warn(sa, "TSO support was unable to be restored"); 581 sa->tso = B_FALSE; 582 } 583 } 584 585 rc = efx_tx_init(sa->nic); 586 if (rc != 0) 587 goto fail_efx_tx_init; 588 589 for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) { 590 if (!(sa->txq_info[sw_index].deferred_start) || 591 sa->txq_info[sw_index].deferred_started) { 592 rc = sfc_tx_qstart(sa, sw_index); 593 if (rc != 0) 594 goto fail_tx_qstart; 595 } 596 } 597 598 return 0; 599 600 fail_tx_qstart: 601 while (sw_index-- > 0) 602 sfc_tx_qstop(sa, sw_index); 603 604 efx_tx_fini(sa->nic); 605 606 fail_efx_tx_init: 607 sfc_log_init(sa, "failed (rc = %d)", rc); 608 return rc; 609 } 610 611 void 612 sfc_tx_stop(struct sfc_adapter *sa) 613 { 614 unsigned int sw_index; 615 616 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 617 618 sw_index = sa->txq_count; 619 while (sw_index-- > 0) { 620 if (sa->txq_info[sw_index].txq != NULL) 621 sfc_tx_qstop(sa, sw_index); 622 } 623 624 efx_tx_fini(sa->nic); 625 } 626 627 static void 628 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 629 { 630 unsigned int completed; 631 632 sfc_ev_qpoll(txq->evq); 633 634 for (completed = txq->completed; 635 completed != txq->pending; completed++) { 636 struct sfc_efx_tx_sw_desc *txd; 637 638 txd = &txq->sw_ring[completed & txq->ptr_mask]; 639 640 if (txd->mbuf != NULL) { 641 rte_pktmbuf_free(txd->mbuf); 642 txd->mbuf = NULL; 643 } 644 } 645 646 txq->completed = completed; 647 } 648 649 /* 650 * The function is used to insert or update VLAN tag; 651 * the firmware has state of the firmware tag to insert per TxQ 652 * (controlled by option descriptors), hence, if the tag of the 653 * packet to be sent is different from one remembered by the firmware, 654 * the function will update it 655 */ 656 static unsigned int 657 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 658 efx_desc_t **pend) 659 { 660 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 661 m->vlan_tci : 0); 662 663 if (this_tag == txq->hw_vlan_tci) 664 return 0; 665 666 /* 667 * The expression inside SFC_ASSERT() is not desired to be checked in 668 * a non-debug build because it might be too expensive on the data path 669 */ 670 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 671 672 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 673 *pend); 674 (*pend)++; 675 txq->hw_vlan_tci = this_tag; 676 677 return 1; 678 } 679 680 static uint16_t 681 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 682 { 683 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 684 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 685 unsigned int added = txq->added; 686 unsigned int pushed = added; 687 unsigned int pkts_sent = 0; 688 efx_desc_t *pend = &txq->pend_desc[0]; 689 const unsigned int hard_max_fill = EFX_TXQ_LIMIT(txq->ptr_mask + 1); 690 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 691 unsigned int fill_level = added - txq->completed; 692 boolean_t reap_done; 693 int rc __rte_unused; 694 struct rte_mbuf **pktp; 695 696 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 697 goto done; 698 699 /* 700 * If insufficient space for a single packet is present, 701 * we should reap; otherwise, we shouldn't do that all the time 702 * to avoid latency increase 703 */ 704 reap_done = (fill_level > soft_max_fill); 705 706 if (reap_done) { 707 sfc_efx_tx_reap(txq); 708 /* 709 * Recalculate fill level since 'txq->completed' 710 * might have changed on reap 711 */ 712 fill_level = added - txq->completed; 713 } 714 715 for (pkts_sent = 0, pktp = &tx_pkts[0]; 716 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 717 pkts_sent++, pktp++) { 718 struct rte_mbuf *m_seg = *pktp; 719 size_t pkt_len = m_seg->pkt_len; 720 unsigned int pkt_descs = 0; 721 size_t in_off = 0; 722 723 /* 724 * Here VLAN TCI is expected to be zero in case if no 725 * DEV_TX_VLAN_OFFLOAD capability is advertised; 726 * if the calling app ignores the absence of 727 * DEV_TX_VLAN_OFFLOAD and pushes VLAN TCI, then 728 * TX_ERROR will occur 729 */ 730 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 731 732 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 733 /* 734 * We expect correct 'pkt->l[2, 3, 4]_len' values 735 * to be set correctly by the caller 736 */ 737 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 738 &pkt_descs, &pkt_len) != 0) { 739 /* We may have reached this place for 740 * one of the following reasons: 741 * 742 * 1) Packet header length is greater 743 * than SFC_TSOH_STD_LEN 744 * 2) TCP header starts at more then 745 * 208 bytes into the frame 746 * 747 * We will deceive RTE saying that we have sent 748 * the packet, but we will actually drop it. 749 * Hence, we should revert 'pend' to the 750 * previous state (in case we have added 751 * VLAN descriptor) and start processing 752 * another one packet. But the original 753 * mbuf shouldn't be orphaned 754 */ 755 pend -= pkt_descs; 756 757 rte_pktmbuf_free(*pktp); 758 759 continue; 760 } 761 762 /* 763 * We've only added 2 FATSOv2 option descriptors 764 * and 1 descriptor for the linearized packet header. 765 * The outstanding work will be done in the same manner 766 * as for the usual non-TSO path 767 */ 768 } 769 770 for (; m_seg != NULL; m_seg = m_seg->next) { 771 efsys_dma_addr_t next_frag; 772 size_t seg_len; 773 774 seg_len = m_seg->data_len; 775 next_frag = rte_mbuf_data_iova(m_seg); 776 777 /* 778 * If we've started TSO transaction few steps earlier, 779 * we'll skip packet header using an offset in the 780 * current segment (which has been set to the 781 * first one containing payload) 782 */ 783 seg_len -= in_off; 784 next_frag += in_off; 785 in_off = 0; 786 787 do { 788 efsys_dma_addr_t frag_addr = next_frag; 789 size_t frag_len; 790 791 /* 792 * It is assumed here that there is no 793 * limitation on address boundary 794 * crossing by DMA descriptor. 795 */ 796 frag_len = MIN(seg_len, txq->dma_desc_size_max); 797 next_frag += frag_len; 798 seg_len -= frag_len; 799 pkt_len -= frag_len; 800 801 efx_tx_qdesc_dma_create(txq->common, 802 frag_addr, frag_len, 803 (pkt_len == 0), 804 pend++); 805 806 pkt_descs++; 807 } while (seg_len != 0); 808 } 809 810 added += pkt_descs; 811 812 fill_level += pkt_descs; 813 if (unlikely(fill_level > hard_max_fill)) { 814 /* 815 * Our estimation for maximum number of descriptors 816 * required to send a packet seems to be wrong. 817 * Try to reap (if we haven't yet). 818 */ 819 if (!reap_done) { 820 sfc_efx_tx_reap(txq); 821 reap_done = B_TRUE; 822 fill_level = added - txq->completed; 823 if (fill_level > hard_max_fill) { 824 pend -= pkt_descs; 825 break; 826 } 827 } else { 828 pend -= pkt_descs; 829 break; 830 } 831 } 832 833 /* Assign mbuf to the last used desc */ 834 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 835 } 836 837 if (likely(pkts_sent > 0)) { 838 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 839 pend - &txq->pend_desc[0], 840 txq->completed, &txq->added); 841 SFC_ASSERT(rc == 0); 842 843 if (likely(pushed != txq->added)) 844 efx_tx_qpush(txq->common, txq->added, pushed); 845 } 846 847 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 848 if (!reap_done) 849 sfc_efx_tx_reap(txq); 850 #endif 851 852 done: 853 return pkts_sent; 854 } 855 856 struct sfc_txq * 857 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 858 { 859 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 860 struct rte_eth_dev *eth_dev; 861 struct sfc_adapter *sa; 862 struct sfc_txq *txq; 863 864 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 865 eth_dev = &rte_eth_devices[dpq->port_id]; 866 867 sa = eth_dev->data->dev_private; 868 869 SFC_ASSERT(dpq->queue_id < sa->txq_count); 870 txq = sa->txq_info[dpq->queue_id].txq; 871 872 SFC_ASSERT(txq != NULL); 873 return txq; 874 } 875 876 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 877 static int 878 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 879 const struct rte_pci_addr *pci_addr, 880 int socket_id, 881 const struct sfc_dp_tx_qcreate_info *info, 882 struct sfc_dp_txq **dp_txqp) 883 { 884 struct sfc_efx_txq *txq; 885 struct sfc_txq *ctrl_txq; 886 int rc; 887 888 rc = ENOMEM; 889 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 890 RTE_CACHE_LINE_SIZE, socket_id); 891 if (txq == NULL) 892 goto fail_txq_alloc; 893 894 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 895 896 rc = ENOMEM; 897 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 898 EFX_TXQ_LIMIT(info->txq_entries), 899 sizeof(*txq->pend_desc), 0, 900 socket_id); 901 if (txq->pend_desc == NULL) 902 goto fail_pend_desc_alloc; 903 904 rc = ENOMEM; 905 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 906 info->txq_entries, 907 sizeof(*txq->sw_ring), 908 RTE_CACHE_LINE_SIZE, socket_id); 909 if (txq->sw_ring == NULL) 910 goto fail_sw_ring_alloc; 911 912 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 913 if (ctrl_txq->evq->sa->tso) { 914 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 915 info->txq_entries, socket_id); 916 if (rc != 0) 917 goto fail_alloc_tsoh_objs; 918 } 919 920 txq->evq = ctrl_txq->evq; 921 txq->ptr_mask = info->txq_entries - 1; 922 txq->free_thresh = info->free_thresh; 923 txq->dma_desc_size_max = info->dma_desc_size_max; 924 925 *dp_txqp = &txq->dp; 926 return 0; 927 928 fail_alloc_tsoh_objs: 929 rte_free(txq->sw_ring); 930 931 fail_sw_ring_alloc: 932 rte_free(txq->pend_desc); 933 934 fail_pend_desc_alloc: 935 rte_free(txq); 936 937 fail_txq_alloc: 938 return rc; 939 } 940 941 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 942 static void 943 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 944 { 945 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 946 947 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 948 rte_free(txq->sw_ring); 949 rte_free(txq->pend_desc); 950 rte_free(txq); 951 } 952 953 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 954 static int 955 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 956 __rte_unused unsigned int evq_read_ptr, 957 unsigned int txq_desc_index) 958 { 959 /* libefx-based datapath is specific to libefx-based PMD */ 960 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 961 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 962 963 txq->common = ctrl_txq->common; 964 965 txq->pending = txq->completed = txq->added = txq_desc_index; 966 txq->hw_vlan_tci = 0; 967 968 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 969 970 return 0; 971 } 972 973 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 974 static void 975 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 976 __rte_unused unsigned int *evq_read_ptr) 977 { 978 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 979 980 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 981 } 982 983 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 984 static void 985 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 986 { 987 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 988 unsigned int txds; 989 990 sfc_efx_tx_reap(txq); 991 992 for (txds = 0; txds <= txq->ptr_mask; txds++) { 993 if (txq->sw_ring[txds].mbuf != NULL) { 994 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 995 txq->sw_ring[txds].mbuf = NULL; 996 } 997 } 998 999 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1000 } 1001 1002 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1003 static int 1004 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1005 { 1006 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1007 1008 if (unlikely(offset > txq->ptr_mask)) 1009 return -EINVAL; 1010 1011 if (unlikely(offset >= EFX_TXQ_LIMIT(txq->ptr_mask + 1))) 1012 return RTE_ETH_TX_DESC_UNAVAIL; 1013 1014 /* 1015 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1016 * it is required for the queue to be running, but the 1017 * check is omitted because API design assumes that it 1018 * is the duty of the caller to satisfy all conditions 1019 */ 1020 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1021 SFC_EFX_TXQ_FLAG_RUNNING); 1022 sfc_ev_qpoll(txq->evq); 1023 1024 /* 1025 * Ring tail is 'txq->pending', and although descriptors 1026 * between 'txq->completed' and 'txq->pending' are still 1027 * in use by the driver, they should be reported as DONE 1028 */ 1029 if (unlikely(offset < (txq->added - txq->pending))) 1030 return RTE_ETH_TX_DESC_FULL; 1031 1032 /* 1033 * There is no separate return value for unused descriptors; 1034 * the latter will be reported as DONE because genuine DONE 1035 * descriptors will be freed anyway in SW on the next burst 1036 */ 1037 return RTE_ETH_TX_DESC_DONE; 1038 } 1039 1040 struct sfc_dp_tx sfc_efx_tx = { 1041 .dp = { 1042 .name = SFC_KVARG_DATAPATH_EFX, 1043 .type = SFC_DP_TX, 1044 .hw_fw_caps = 0, 1045 }, 1046 .features = SFC_DP_TX_FEAT_VLAN_INSERT | 1047 SFC_DP_TX_FEAT_TSO | 1048 SFC_DP_TX_FEAT_MULTI_POOL | 1049 SFC_DP_TX_FEAT_REFCNT | 1050 SFC_DP_TX_FEAT_MULTI_SEG, 1051 .qcreate = sfc_efx_tx_qcreate, 1052 .qdestroy = sfc_efx_tx_qdestroy, 1053 .qstart = sfc_efx_tx_qstart, 1054 .qstop = sfc_efx_tx_qstop, 1055 .qreap = sfc_efx_tx_qreap, 1056 .qdesc_status = sfc_efx_tx_qdesc_status, 1057 .pkt_burst = sfc_efx_xmit_pkts, 1058 }; 1059