1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include "sfc.h" 11 #include "sfc_debug.h" 12 #include "sfc_log.h" 13 #include "sfc_ev.h" 14 #include "sfc_tx.h" 15 #include "sfc_tweak.h" 16 #include "sfc_kvargs.h" 17 18 /* 19 * Maximum number of TX queue flush attempts in case of 20 * failure or flush timeout 21 */ 22 #define SFC_TX_QFLUSH_ATTEMPTS (3) 23 24 /* 25 * Time to wait between event queue polling attempts when waiting for TX 26 * queue flush done or flush failed events 27 */ 28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 29 30 /* 31 * Maximum number of event queue polling attempts when waiting for TX queue 32 * flush done or flush failed events; it defines TX queue flush attempt timeout 33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 34 */ 35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 36 37 uint64_t 38 sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa) 39 { 40 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 41 uint64_t caps = 0; 42 43 if ((sa->dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) && 44 encp->enc_hw_tx_insert_vlan_enabled) 45 caps |= DEV_TX_OFFLOAD_VLAN_INSERT; 46 47 if (sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG) 48 caps |= DEV_TX_OFFLOAD_MULTI_SEGS; 49 50 if ((~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL) && 51 (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) 52 caps |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; 53 54 return caps; 55 } 56 57 uint64_t 58 sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa) 59 { 60 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 61 uint64_t caps = 0; 62 63 caps |= DEV_TX_OFFLOAD_IPV4_CKSUM; 64 caps |= DEV_TX_OFFLOAD_UDP_CKSUM; 65 caps |= DEV_TX_OFFLOAD_TCP_CKSUM; 66 67 if (encp->enc_tunnel_encapsulations_supported) 68 caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 69 70 if (sa->tso) 71 caps |= DEV_TX_OFFLOAD_TCP_TSO; 72 73 return caps; 74 } 75 76 static int 77 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level, 78 const struct rte_eth_txconf *tx_conf, 79 uint64_t offloads) 80 { 81 int rc = 0; 82 83 if (tx_conf->tx_rs_thresh != 0) { 84 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 85 rc = EINVAL; 86 } 87 88 if (tx_conf->tx_free_thresh > txq_max_fill_level) { 89 sfc_err(sa, 90 "TxQ free threshold too large: %u vs maximum %u", 91 tx_conf->tx_free_thresh, txq_max_fill_level); 92 rc = EINVAL; 93 } 94 95 if (tx_conf->tx_thresh.pthresh != 0 || 96 tx_conf->tx_thresh.hthresh != 0 || 97 tx_conf->tx_thresh.wthresh != 0) { 98 sfc_warn(sa, 99 "prefetch/host/writeback thresholds are not supported"); 100 } 101 102 /* We either perform both TCP and UDP offload, or no offload at all */ 103 if (((offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) != 104 ((offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) { 105 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 106 rc = EINVAL; 107 } 108 109 return rc; 110 } 111 112 void 113 sfc_tx_qflush_done(struct sfc_txq *txq) 114 { 115 txq->state |= SFC_TXQ_FLUSHED; 116 txq->state &= ~SFC_TXQ_FLUSHING; 117 } 118 119 int 120 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 121 uint16_t nb_tx_desc, unsigned int socket_id, 122 const struct rte_eth_txconf *tx_conf) 123 { 124 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 125 unsigned int txq_entries; 126 unsigned int evq_entries; 127 unsigned int txq_max_fill_level; 128 struct sfc_txq_info *txq_info; 129 struct sfc_evq *evq; 130 struct sfc_txq *txq; 131 int rc = 0; 132 struct sfc_dp_tx_qcreate_info info; 133 uint64_t offloads; 134 135 sfc_log_init(sa, "TxQ = %u", sw_index); 136 137 rc = sa->dp_tx->qsize_up_rings(nb_tx_desc, &txq_entries, &evq_entries, 138 &txq_max_fill_level); 139 if (rc != 0) 140 goto fail_size_up_rings; 141 SFC_ASSERT(txq_entries >= EFX_TXQ_MINNDESCS); 142 SFC_ASSERT(txq_entries <= sa->txq_max_entries); 143 SFC_ASSERT(txq_entries >= nb_tx_desc); 144 SFC_ASSERT(txq_max_fill_level <= nb_tx_desc); 145 146 offloads = tx_conf->offloads | 147 sa->eth_dev->data->dev_conf.txmode.offloads; 148 rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf, offloads); 149 if (rc != 0) 150 goto fail_bad_conf; 151 152 SFC_ASSERT(sw_index < sa->txq_count); 153 txq_info = &sa->txq_info[sw_index]; 154 155 txq_info->entries = txq_entries; 156 157 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 158 evq_entries, socket_id, &evq); 159 if (rc != 0) 160 goto fail_ev_qinit; 161 162 rc = ENOMEM; 163 txq = rte_zmalloc_socket("sfc-txq", sizeof(*txq), 0, socket_id); 164 if (txq == NULL) 165 goto fail_txq_alloc; 166 167 txq_info->txq = txq; 168 169 txq->hw_index = sw_index; 170 txq->evq = evq; 171 txq->free_thresh = 172 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 173 SFC_TX_DEFAULT_FREE_THRESH; 174 txq->offloads = offloads; 175 176 rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_TXQ_SIZE(txq_info->entries), 177 socket_id, &txq->mem); 178 if (rc != 0) 179 goto fail_dma_alloc; 180 181 memset(&info, 0, sizeof(info)); 182 info.max_fill_level = txq_max_fill_level; 183 info.free_thresh = txq->free_thresh; 184 info.offloads = offloads; 185 info.txq_entries = txq_info->entries; 186 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 187 info.txq_hw_ring = txq->mem.esm_base; 188 info.evq_entries = evq_entries; 189 info.evq_hw_ring = evq->mem.esm_base; 190 info.hw_index = txq->hw_index; 191 info.mem_bar = sa->mem_bar.esb_base; 192 info.vi_window_shift = encp->enc_vi_window_shift; 193 info.tso_tcp_header_offset_limit = 194 encp->enc_tx_tso_tcp_header_offset_limit; 195 196 rc = sa->dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 197 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 198 socket_id, &info, &txq->dp); 199 if (rc != 0) 200 goto fail_dp_tx_qinit; 201 202 evq->dp_txq = txq->dp; 203 204 txq->state = SFC_TXQ_INITIALIZED; 205 206 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 207 208 return 0; 209 210 fail_dp_tx_qinit: 211 sfc_dma_free(sa, &txq->mem); 212 213 fail_dma_alloc: 214 txq_info->txq = NULL; 215 rte_free(txq); 216 217 fail_txq_alloc: 218 sfc_ev_qfini(evq); 219 220 fail_ev_qinit: 221 txq_info->entries = 0; 222 223 fail_bad_conf: 224 fail_size_up_rings: 225 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 226 return rc; 227 } 228 229 void 230 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 231 { 232 struct sfc_txq_info *txq_info; 233 struct sfc_txq *txq; 234 235 sfc_log_init(sa, "TxQ = %u", sw_index); 236 237 SFC_ASSERT(sw_index < sa->txq_count); 238 sa->eth_dev->data->tx_queues[sw_index] = NULL; 239 240 txq_info = &sa->txq_info[sw_index]; 241 242 txq = txq_info->txq; 243 SFC_ASSERT(txq != NULL); 244 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 245 246 sa->dp_tx->qdestroy(txq->dp); 247 txq->dp = NULL; 248 249 txq_info->txq = NULL; 250 txq_info->entries = 0; 251 252 sfc_dma_free(sa, &txq->mem); 253 254 sfc_ev_qfini(txq->evq); 255 txq->evq = NULL; 256 257 rte_free(txq); 258 } 259 260 static int 261 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 262 { 263 sfc_log_init(sa, "TxQ = %u", sw_index); 264 265 return 0; 266 } 267 268 static int 269 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 270 { 271 int rc = 0; 272 273 switch (txmode->mq_mode) { 274 case ETH_MQ_TX_NONE: 275 break; 276 default: 277 sfc_err(sa, "Tx multi-queue mode %u not supported", 278 txmode->mq_mode); 279 rc = EINVAL; 280 } 281 282 /* 283 * These features are claimed to be i40e-specific, 284 * but it does make sense to double-check their absence 285 */ 286 if (txmode->hw_vlan_reject_tagged) { 287 sfc_err(sa, "Rejecting tagged packets not supported"); 288 rc = EINVAL; 289 } 290 291 if (txmode->hw_vlan_reject_untagged) { 292 sfc_err(sa, "Rejecting untagged packets not supported"); 293 rc = EINVAL; 294 } 295 296 if (txmode->hw_vlan_insert_pvid) { 297 sfc_err(sa, "Port-based VLAN insertion not supported"); 298 rc = EINVAL; 299 } 300 301 return rc; 302 } 303 304 /** 305 * Destroy excess queues that are no longer needed after reconfiguration 306 * or complete close. 307 */ 308 static void 309 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 310 { 311 int sw_index; 312 313 SFC_ASSERT(nb_tx_queues <= sa->txq_count); 314 315 sw_index = sa->txq_count; 316 while (--sw_index >= (int)nb_tx_queues) { 317 if (sa->txq_info[sw_index].txq != NULL) 318 sfc_tx_qfini(sa, sw_index); 319 } 320 321 sa->txq_count = nb_tx_queues; 322 } 323 324 int 325 sfc_tx_configure(struct sfc_adapter *sa) 326 { 327 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 328 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 329 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 330 int rc = 0; 331 332 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 333 nb_tx_queues, sa->txq_count); 334 335 /* 336 * The datapath implementation assumes absence of boundary 337 * limits on Tx DMA descriptors. Addition of these checks on 338 * datapath would simply make the datapath slower. 339 */ 340 if (encp->enc_tx_dma_desc_boundary != 0) { 341 rc = ENOTSUP; 342 goto fail_tx_dma_desc_boundary; 343 } 344 345 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 346 if (rc != 0) 347 goto fail_check_mode; 348 349 if (nb_tx_queues == sa->txq_count) 350 goto done; 351 352 if (sa->txq_info == NULL) { 353 sa->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 354 sizeof(sa->txq_info[0]), 0, 355 sa->socket_id); 356 if (sa->txq_info == NULL) 357 goto fail_txqs_alloc; 358 } else { 359 struct sfc_txq_info *new_txq_info; 360 361 if (nb_tx_queues < sa->txq_count) 362 sfc_tx_fini_queues(sa, nb_tx_queues); 363 364 new_txq_info = 365 rte_realloc(sa->txq_info, 366 nb_tx_queues * sizeof(sa->txq_info[0]), 0); 367 if (new_txq_info == NULL && nb_tx_queues > 0) 368 goto fail_txqs_realloc; 369 370 sa->txq_info = new_txq_info; 371 if (nb_tx_queues > sa->txq_count) 372 memset(&sa->txq_info[sa->txq_count], 0, 373 (nb_tx_queues - sa->txq_count) * 374 sizeof(sa->txq_info[0])); 375 } 376 377 while (sa->txq_count < nb_tx_queues) { 378 rc = sfc_tx_qinit_info(sa, sa->txq_count); 379 if (rc != 0) 380 goto fail_tx_qinit_info; 381 382 sa->txq_count++; 383 } 384 385 done: 386 return 0; 387 388 fail_tx_qinit_info: 389 fail_txqs_realloc: 390 fail_txqs_alloc: 391 sfc_tx_close(sa); 392 393 fail_check_mode: 394 fail_tx_dma_desc_boundary: 395 sfc_log_init(sa, "failed (rc = %d)", rc); 396 return rc; 397 } 398 399 void 400 sfc_tx_close(struct sfc_adapter *sa) 401 { 402 sfc_tx_fini_queues(sa, 0); 403 404 rte_free(sa->txq_info); 405 sa->txq_info = NULL; 406 } 407 408 int 409 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 410 { 411 uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) | 412 sfc_tx_get_queue_offload_caps(sa); 413 struct rte_eth_dev_data *dev_data; 414 struct sfc_txq_info *txq_info; 415 struct sfc_txq *txq; 416 struct sfc_evq *evq; 417 uint16_t flags = 0; 418 unsigned int desc_index; 419 int rc = 0; 420 421 sfc_log_init(sa, "TxQ = %u", sw_index); 422 423 SFC_ASSERT(sw_index < sa->txq_count); 424 txq_info = &sa->txq_info[sw_index]; 425 426 txq = txq_info->txq; 427 428 SFC_ASSERT(txq != NULL); 429 SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED); 430 431 evq = txq->evq; 432 433 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 434 if (rc != 0) 435 goto fail_ev_qstart; 436 437 if (txq->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 438 flags |= EFX_TXQ_CKSUM_IPV4; 439 440 if (txq->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 441 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 442 443 if ((txq->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) || 444 (txq->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) { 445 flags |= EFX_TXQ_CKSUM_TCPUDP; 446 447 if (offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 448 flags |= EFX_TXQ_CKSUM_INNER_TCPUDP; 449 } 450 451 if (txq->offloads & DEV_TX_OFFLOAD_TCP_TSO) 452 flags |= EFX_TXQ_FATSOV2; 453 454 rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem, 455 txq_info->entries, 0 /* not used on EF10 */, 456 flags, evq->common, 457 &txq->common, &desc_index); 458 if (rc != 0) { 459 if (sa->tso && (rc == ENOSPC)) 460 sfc_err(sa, "ran out of TSO contexts"); 461 462 goto fail_tx_qcreate; 463 } 464 465 efx_tx_qenable(txq->common); 466 467 txq->state |= SFC_TXQ_STARTED; 468 469 rc = sa->dp_tx->qstart(txq->dp, evq->read_ptr, desc_index); 470 if (rc != 0) 471 goto fail_dp_qstart; 472 473 /* 474 * It seems to be used by DPDK for debug purposes only ('rte_ether') 475 */ 476 dev_data = sa->eth_dev->data; 477 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 478 479 return 0; 480 481 fail_dp_qstart: 482 txq->state = SFC_TXQ_INITIALIZED; 483 efx_tx_qdestroy(txq->common); 484 485 fail_tx_qcreate: 486 sfc_ev_qstop(evq); 487 488 fail_ev_qstart: 489 return rc; 490 } 491 492 void 493 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 494 { 495 struct rte_eth_dev_data *dev_data; 496 struct sfc_txq_info *txq_info; 497 struct sfc_txq *txq; 498 unsigned int retry_count; 499 unsigned int wait_count; 500 int rc; 501 502 sfc_log_init(sa, "TxQ = %u", sw_index); 503 504 SFC_ASSERT(sw_index < sa->txq_count); 505 txq_info = &sa->txq_info[sw_index]; 506 507 txq = txq_info->txq; 508 509 if (txq == NULL || txq->state == SFC_TXQ_INITIALIZED) 510 return; 511 512 SFC_ASSERT(txq->state & SFC_TXQ_STARTED); 513 514 sa->dp_tx->qstop(txq->dp, &txq->evq->read_ptr); 515 516 /* 517 * Retry TX queue flushing in case of flush failed or 518 * timeout; in the worst case it can delay for 6 seconds 519 */ 520 for (retry_count = 0; 521 ((txq->state & SFC_TXQ_FLUSHED) == 0) && 522 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 523 ++retry_count) { 524 rc = efx_tx_qflush(txq->common); 525 if (rc != 0) { 526 txq->state |= (rc == EALREADY) ? 527 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 528 break; 529 } 530 531 /* 532 * Wait for TX queue flush done or flush failed event at least 533 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 534 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 535 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 536 */ 537 wait_count = 0; 538 do { 539 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 540 sfc_ev_qpoll(txq->evq); 541 } while ((txq->state & SFC_TXQ_FLUSHING) && 542 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 543 544 if (txq->state & SFC_TXQ_FLUSHING) 545 sfc_err(sa, "TxQ %u flush timed out", sw_index); 546 547 if (txq->state & SFC_TXQ_FLUSHED) 548 sfc_notice(sa, "TxQ %u flushed", sw_index); 549 } 550 551 sa->dp_tx->qreap(txq->dp); 552 553 txq->state = SFC_TXQ_INITIALIZED; 554 555 efx_tx_qdestroy(txq->common); 556 557 sfc_ev_qstop(txq->evq); 558 559 /* 560 * It seems to be used by DPDK for debug purposes only ('rte_ether') 561 */ 562 dev_data = sa->eth_dev->data; 563 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 564 } 565 566 int 567 sfc_tx_start(struct sfc_adapter *sa) 568 { 569 unsigned int sw_index; 570 int rc = 0; 571 572 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 573 574 if (sa->tso) { 575 if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) { 576 sfc_warn(sa, "TSO support was unable to be restored"); 577 sa->tso = B_FALSE; 578 } 579 } 580 581 rc = efx_tx_init(sa->nic); 582 if (rc != 0) 583 goto fail_efx_tx_init; 584 585 for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) { 586 if (sa->txq_info[sw_index].txq != NULL && 587 (!(sa->txq_info[sw_index].deferred_start) || 588 sa->txq_info[sw_index].deferred_started)) { 589 rc = sfc_tx_qstart(sa, sw_index); 590 if (rc != 0) 591 goto fail_tx_qstart; 592 } 593 } 594 595 return 0; 596 597 fail_tx_qstart: 598 while (sw_index-- > 0) 599 sfc_tx_qstop(sa, sw_index); 600 601 efx_tx_fini(sa->nic); 602 603 fail_efx_tx_init: 604 sfc_log_init(sa, "failed (rc = %d)", rc); 605 return rc; 606 } 607 608 void 609 sfc_tx_stop(struct sfc_adapter *sa) 610 { 611 unsigned int sw_index; 612 613 sfc_log_init(sa, "txq_count = %u", sa->txq_count); 614 615 sw_index = sa->txq_count; 616 while (sw_index-- > 0) { 617 if (sa->txq_info[sw_index].txq != NULL) 618 sfc_tx_qstop(sa, sw_index); 619 } 620 621 efx_tx_fini(sa->nic); 622 } 623 624 static void 625 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 626 { 627 unsigned int completed; 628 629 sfc_ev_qpoll(txq->evq); 630 631 for (completed = txq->completed; 632 completed != txq->pending; completed++) { 633 struct sfc_efx_tx_sw_desc *txd; 634 635 txd = &txq->sw_ring[completed & txq->ptr_mask]; 636 637 if (txd->mbuf != NULL) { 638 rte_pktmbuf_free(txd->mbuf); 639 txd->mbuf = NULL; 640 } 641 } 642 643 txq->completed = completed; 644 } 645 646 /* 647 * The function is used to insert or update VLAN tag; 648 * the firmware has state of the firmware tag to insert per TxQ 649 * (controlled by option descriptors), hence, if the tag of the 650 * packet to be sent is different from one remembered by the firmware, 651 * the function will update it 652 */ 653 static unsigned int 654 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 655 efx_desc_t **pend) 656 { 657 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 658 m->vlan_tci : 0); 659 660 if (this_tag == txq->hw_vlan_tci) 661 return 0; 662 663 /* 664 * The expression inside SFC_ASSERT() is not desired to be checked in 665 * a non-debug build because it might be too expensive on the data path 666 */ 667 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 668 669 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 670 *pend); 671 (*pend)++; 672 txq->hw_vlan_tci = this_tag; 673 674 return 1; 675 } 676 677 static uint16_t 678 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 679 { 680 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 681 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 682 unsigned int added = txq->added; 683 unsigned int pushed = added; 684 unsigned int pkts_sent = 0; 685 efx_desc_t *pend = &txq->pend_desc[0]; 686 const unsigned int hard_max_fill = txq->max_fill_level; 687 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 688 unsigned int fill_level = added - txq->completed; 689 boolean_t reap_done; 690 int rc __rte_unused; 691 struct rte_mbuf **pktp; 692 693 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 694 goto done; 695 696 /* 697 * If insufficient space for a single packet is present, 698 * we should reap; otherwise, we shouldn't do that all the time 699 * to avoid latency increase 700 */ 701 reap_done = (fill_level > soft_max_fill); 702 703 if (reap_done) { 704 sfc_efx_tx_reap(txq); 705 /* 706 * Recalculate fill level since 'txq->completed' 707 * might have changed on reap 708 */ 709 fill_level = added - txq->completed; 710 } 711 712 for (pkts_sent = 0, pktp = &tx_pkts[0]; 713 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 714 pkts_sent++, pktp++) { 715 uint16_t hw_vlan_tci_prev = txq->hw_vlan_tci; 716 struct rte_mbuf *m_seg = *pktp; 717 size_t pkt_len = m_seg->pkt_len; 718 unsigned int pkt_descs = 0; 719 size_t in_off = 0; 720 721 /* 722 * Here VLAN TCI is expected to be zero in case if no 723 * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised; 724 * if the calling app ignores the absence of 725 * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then 726 * TX_ERROR will occur 727 */ 728 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 729 730 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 731 /* 732 * We expect correct 'pkt->l[2, 3, 4]_len' values 733 * to be set correctly by the caller 734 */ 735 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 736 &pkt_descs, &pkt_len) != 0) { 737 /* We may have reached this place for 738 * one of the following reasons: 739 * 740 * 1) Packet header length is greater 741 * than SFC_TSOH_STD_LEN 742 * 2) TCP header starts at more then 743 * 208 bytes into the frame 744 * 745 * We will deceive RTE saying that we have sent 746 * the packet, but we will actually drop it. 747 * Hence, we should revert 'pend' to the 748 * previous state (in case we have added 749 * VLAN descriptor) and start processing 750 * another one packet. But the original 751 * mbuf shouldn't be orphaned 752 */ 753 pend -= pkt_descs; 754 txq->hw_vlan_tci = hw_vlan_tci_prev; 755 756 rte_pktmbuf_free(*pktp); 757 758 continue; 759 } 760 761 /* 762 * We've only added 2 FATSOv2 option descriptors 763 * and 1 descriptor for the linearized packet header. 764 * The outstanding work will be done in the same manner 765 * as for the usual non-TSO path 766 */ 767 } 768 769 for (; m_seg != NULL; m_seg = m_seg->next) { 770 efsys_dma_addr_t next_frag; 771 size_t seg_len; 772 773 seg_len = m_seg->data_len; 774 next_frag = rte_mbuf_data_iova(m_seg); 775 776 /* 777 * If we've started TSO transaction few steps earlier, 778 * we'll skip packet header using an offset in the 779 * current segment (which has been set to the 780 * first one containing payload) 781 */ 782 seg_len -= in_off; 783 next_frag += in_off; 784 in_off = 0; 785 786 do { 787 efsys_dma_addr_t frag_addr = next_frag; 788 size_t frag_len; 789 790 /* 791 * It is assumed here that there is no 792 * limitation on address boundary 793 * crossing by DMA descriptor. 794 */ 795 frag_len = MIN(seg_len, txq->dma_desc_size_max); 796 next_frag += frag_len; 797 seg_len -= frag_len; 798 pkt_len -= frag_len; 799 800 efx_tx_qdesc_dma_create(txq->common, 801 frag_addr, frag_len, 802 (pkt_len == 0), 803 pend++); 804 805 pkt_descs++; 806 } while (seg_len != 0); 807 } 808 809 added += pkt_descs; 810 811 fill_level += pkt_descs; 812 if (unlikely(fill_level > hard_max_fill)) { 813 /* 814 * Our estimation for maximum number of descriptors 815 * required to send a packet seems to be wrong. 816 * Try to reap (if we haven't yet). 817 */ 818 if (!reap_done) { 819 sfc_efx_tx_reap(txq); 820 reap_done = B_TRUE; 821 fill_level = added - txq->completed; 822 if (fill_level > hard_max_fill) { 823 pend -= pkt_descs; 824 txq->hw_vlan_tci = hw_vlan_tci_prev; 825 break; 826 } 827 } else { 828 pend -= pkt_descs; 829 txq->hw_vlan_tci = hw_vlan_tci_prev; 830 break; 831 } 832 } 833 834 /* Assign mbuf to the last used desc */ 835 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 836 } 837 838 if (likely(pkts_sent > 0)) { 839 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 840 pend - &txq->pend_desc[0], 841 txq->completed, &txq->added); 842 SFC_ASSERT(rc == 0); 843 844 if (likely(pushed != txq->added)) 845 efx_tx_qpush(txq->common, txq->added, pushed); 846 } 847 848 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 849 if (!reap_done) 850 sfc_efx_tx_reap(txq); 851 #endif 852 853 done: 854 return pkts_sent; 855 } 856 857 struct sfc_txq * 858 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 859 { 860 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 861 struct rte_eth_dev *eth_dev; 862 struct sfc_adapter *sa; 863 struct sfc_txq *txq; 864 865 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 866 eth_dev = &rte_eth_devices[dpq->port_id]; 867 868 sa = eth_dev->data->dev_private; 869 870 SFC_ASSERT(dpq->queue_id < sa->txq_count); 871 txq = sa->txq_info[dpq->queue_id].txq; 872 873 SFC_ASSERT(txq != NULL); 874 return txq; 875 } 876 877 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings; 878 static int 879 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc, 880 unsigned int *txq_entries, 881 unsigned int *evq_entries, 882 unsigned int *txq_max_fill_level) 883 { 884 *txq_entries = nb_tx_desc; 885 *evq_entries = nb_tx_desc; 886 *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries); 887 return 0; 888 } 889 890 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 891 static int 892 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 893 const struct rte_pci_addr *pci_addr, 894 int socket_id, 895 const struct sfc_dp_tx_qcreate_info *info, 896 struct sfc_dp_txq **dp_txqp) 897 { 898 struct sfc_efx_txq *txq; 899 struct sfc_txq *ctrl_txq; 900 int rc; 901 902 rc = ENOMEM; 903 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 904 RTE_CACHE_LINE_SIZE, socket_id); 905 if (txq == NULL) 906 goto fail_txq_alloc; 907 908 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 909 910 rc = ENOMEM; 911 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 912 EFX_TXQ_LIMIT(info->txq_entries), 913 sizeof(*txq->pend_desc), 0, 914 socket_id); 915 if (txq->pend_desc == NULL) 916 goto fail_pend_desc_alloc; 917 918 rc = ENOMEM; 919 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 920 info->txq_entries, 921 sizeof(*txq->sw_ring), 922 RTE_CACHE_LINE_SIZE, socket_id); 923 if (txq->sw_ring == NULL) 924 goto fail_sw_ring_alloc; 925 926 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 927 if (ctrl_txq->evq->sa->tso) { 928 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 929 info->txq_entries, socket_id); 930 if (rc != 0) 931 goto fail_alloc_tsoh_objs; 932 } 933 934 txq->evq = ctrl_txq->evq; 935 txq->ptr_mask = info->txq_entries - 1; 936 txq->max_fill_level = info->max_fill_level; 937 txq->free_thresh = info->free_thresh; 938 txq->dma_desc_size_max = info->dma_desc_size_max; 939 940 *dp_txqp = &txq->dp; 941 return 0; 942 943 fail_alloc_tsoh_objs: 944 rte_free(txq->sw_ring); 945 946 fail_sw_ring_alloc: 947 rte_free(txq->pend_desc); 948 949 fail_pend_desc_alloc: 950 rte_free(txq); 951 952 fail_txq_alloc: 953 return rc; 954 } 955 956 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 957 static void 958 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 959 { 960 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 961 962 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 963 rte_free(txq->sw_ring); 964 rte_free(txq->pend_desc); 965 rte_free(txq); 966 } 967 968 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 969 static int 970 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 971 __rte_unused unsigned int evq_read_ptr, 972 unsigned int txq_desc_index) 973 { 974 /* libefx-based datapath is specific to libefx-based PMD */ 975 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 976 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 977 978 txq->common = ctrl_txq->common; 979 980 txq->pending = txq->completed = txq->added = txq_desc_index; 981 txq->hw_vlan_tci = 0; 982 983 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 984 985 return 0; 986 } 987 988 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 989 static void 990 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 991 __rte_unused unsigned int *evq_read_ptr) 992 { 993 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 994 995 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 996 } 997 998 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 999 static void 1000 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 1001 { 1002 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1003 unsigned int txds; 1004 1005 sfc_efx_tx_reap(txq); 1006 1007 for (txds = 0; txds <= txq->ptr_mask; txds++) { 1008 if (txq->sw_ring[txds].mbuf != NULL) { 1009 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 1010 txq->sw_ring[txds].mbuf = NULL; 1011 } 1012 } 1013 1014 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1015 } 1016 1017 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1018 static int 1019 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1020 { 1021 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1022 1023 if (unlikely(offset > txq->ptr_mask)) 1024 return -EINVAL; 1025 1026 if (unlikely(offset >= txq->max_fill_level)) 1027 return RTE_ETH_TX_DESC_UNAVAIL; 1028 1029 /* 1030 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1031 * it is required for the queue to be running, but the 1032 * check is omitted because API design assumes that it 1033 * is the duty of the caller to satisfy all conditions 1034 */ 1035 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1036 SFC_EFX_TXQ_FLAG_RUNNING); 1037 sfc_ev_qpoll(txq->evq); 1038 1039 /* 1040 * Ring tail is 'txq->pending', and although descriptors 1041 * between 'txq->completed' and 'txq->pending' are still 1042 * in use by the driver, they should be reported as DONE 1043 */ 1044 if (unlikely(offset < (txq->added - txq->pending))) 1045 return RTE_ETH_TX_DESC_FULL; 1046 1047 /* 1048 * There is no separate return value for unused descriptors; 1049 * the latter will be reported as DONE because genuine DONE 1050 * descriptors will be freed anyway in SW on the next burst 1051 */ 1052 return RTE_ETH_TX_DESC_DONE; 1053 } 1054 1055 struct sfc_dp_tx sfc_efx_tx = { 1056 .dp = { 1057 .name = SFC_KVARG_DATAPATH_EFX, 1058 .type = SFC_DP_TX, 1059 .hw_fw_caps = 0, 1060 }, 1061 .features = SFC_DP_TX_FEAT_VLAN_INSERT | 1062 SFC_DP_TX_FEAT_TSO | 1063 SFC_DP_TX_FEAT_MULTI_POOL | 1064 SFC_DP_TX_FEAT_REFCNT | 1065 SFC_DP_TX_FEAT_MULTI_SEG, 1066 .qsize_up_rings = sfc_efx_tx_qsize_up_rings, 1067 .qcreate = sfc_efx_tx_qcreate, 1068 .qdestroy = sfc_efx_tx_qdestroy, 1069 .qstart = sfc_efx_tx_qstart, 1070 .qstop = sfc_efx_tx_qstop, 1071 .qreap = sfc_efx_tx_qreap, 1072 .qdesc_status = sfc_efx_tx_qdesc_status, 1073 .pkt_burst = sfc_efx_xmit_pkts, 1074 }; 1075