1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include "sfc.h" 11 #include "sfc_debug.h" 12 #include "sfc_log.h" 13 #include "sfc_ev.h" 14 #include "sfc_tx.h" 15 #include "sfc_tweak.h" 16 #include "sfc_kvargs.h" 17 18 /* 19 * Maximum number of TX queue flush attempts in case of 20 * failure or flush timeout 21 */ 22 #define SFC_TX_QFLUSH_ATTEMPTS (3) 23 24 /* 25 * Time to wait between event queue polling attempts when waiting for TX 26 * queue flush done or flush failed events 27 */ 28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 29 30 /* 31 * Maximum number of event queue polling attempts when waiting for TX queue 32 * flush done or flush failed events; it defines TX queue flush attempt timeout 33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 34 */ 35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 36 37 uint64_t 38 sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa) 39 { 40 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 41 uint64_t caps = 0; 42 43 if ((sa->priv.dp_tx->features & SFC_DP_TX_FEAT_VLAN_INSERT) && 44 encp->enc_hw_tx_insert_vlan_enabled) 45 caps |= DEV_TX_OFFLOAD_VLAN_INSERT; 46 47 if (sa->priv.dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG) 48 caps |= DEV_TX_OFFLOAD_MULTI_SEGS; 49 50 if ((~sa->priv.dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL) && 51 (~sa->priv.dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) 52 caps |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; 53 54 return caps; 55 } 56 57 uint64_t 58 sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa) 59 { 60 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 61 uint64_t caps = 0; 62 63 caps |= DEV_TX_OFFLOAD_IPV4_CKSUM; 64 caps |= DEV_TX_OFFLOAD_UDP_CKSUM; 65 caps |= DEV_TX_OFFLOAD_TCP_CKSUM; 66 67 if (encp->enc_tunnel_encapsulations_supported) 68 caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 69 70 if (sa->tso) 71 caps |= DEV_TX_OFFLOAD_TCP_TSO; 72 73 return caps; 74 } 75 76 static int 77 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level, 78 const struct rte_eth_txconf *tx_conf, 79 uint64_t offloads) 80 { 81 int rc = 0; 82 83 if (tx_conf->tx_rs_thresh != 0) { 84 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 85 rc = EINVAL; 86 } 87 88 if (tx_conf->tx_free_thresh > txq_max_fill_level) { 89 sfc_err(sa, 90 "TxQ free threshold too large: %u vs maximum %u", 91 tx_conf->tx_free_thresh, txq_max_fill_level); 92 rc = EINVAL; 93 } 94 95 if (tx_conf->tx_thresh.pthresh != 0 || 96 tx_conf->tx_thresh.hthresh != 0 || 97 tx_conf->tx_thresh.wthresh != 0) { 98 sfc_warn(sa, 99 "prefetch/host/writeback thresholds are not supported"); 100 } 101 102 /* We either perform both TCP and UDP offload, or no offload at all */ 103 if (((offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) != 104 ((offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) { 105 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 106 rc = EINVAL; 107 } 108 109 return rc; 110 } 111 112 void 113 sfc_tx_qflush_done(struct sfc_txq_info *txq_info) 114 { 115 txq_info->state |= SFC_TXQ_FLUSHED; 116 txq_info->state &= ~SFC_TXQ_FLUSHING; 117 } 118 119 int 120 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 121 uint16_t nb_tx_desc, unsigned int socket_id, 122 const struct rte_eth_txconf *tx_conf) 123 { 124 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 125 unsigned int txq_entries; 126 unsigned int evq_entries; 127 unsigned int txq_max_fill_level; 128 struct sfc_txq_info *txq_info; 129 struct sfc_evq *evq; 130 struct sfc_txq *txq; 131 int rc = 0; 132 struct sfc_dp_tx_qcreate_info info; 133 uint64_t offloads; 134 struct sfc_dp_tx_hw_limits hw_limits; 135 136 sfc_log_init(sa, "TxQ = %u", sw_index); 137 138 memset(&hw_limits, 0, sizeof(hw_limits)); 139 hw_limits.txq_max_entries = sa->txq_max_entries; 140 hw_limits.txq_min_entries = sa->txq_min_entries; 141 142 rc = sa->priv.dp_tx->qsize_up_rings(nb_tx_desc, &hw_limits, 143 &txq_entries, &evq_entries, 144 &txq_max_fill_level); 145 if (rc != 0) 146 goto fail_size_up_rings; 147 SFC_ASSERT(txq_entries >= sa->txq_min_entries); 148 SFC_ASSERT(txq_entries <= sa->txq_max_entries); 149 SFC_ASSERT(txq_entries >= nb_tx_desc); 150 SFC_ASSERT(txq_max_fill_level <= nb_tx_desc); 151 152 offloads = tx_conf->offloads | 153 sa->eth_dev->data->dev_conf.txmode.offloads; 154 rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf, offloads); 155 if (rc != 0) 156 goto fail_bad_conf; 157 158 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 159 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 160 161 txq_info->entries = txq_entries; 162 163 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 164 evq_entries, socket_id, &evq); 165 if (rc != 0) 166 goto fail_ev_qinit; 167 168 txq = &sa->txq_ctrl[sw_index]; 169 txq->hw_index = sw_index; 170 txq->evq = evq; 171 txq_info->free_thresh = 172 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 173 SFC_TX_DEFAULT_FREE_THRESH; 174 txq_info->offloads = offloads; 175 176 rc = sfc_dma_alloc(sa, "txq", sw_index, 177 efx_txq_size(sa->nic, txq_info->entries), 178 socket_id, &txq->mem); 179 if (rc != 0) 180 goto fail_dma_alloc; 181 182 memset(&info, 0, sizeof(info)); 183 info.max_fill_level = txq_max_fill_level; 184 info.free_thresh = txq_info->free_thresh; 185 info.offloads = offloads; 186 info.txq_entries = txq_info->entries; 187 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 188 info.txq_hw_ring = txq->mem.esm_base; 189 info.evq_entries = evq_entries; 190 info.evq_hw_ring = evq->mem.esm_base; 191 info.hw_index = txq->hw_index; 192 info.mem_bar = sa->mem_bar.esb_base; 193 info.vi_window_shift = encp->enc_vi_window_shift; 194 info.tso_tcp_header_offset_limit = 195 encp->enc_tx_tso_tcp_header_offset_limit; 196 197 rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 198 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 199 socket_id, &info, &txq_info->dp); 200 if (rc != 0) 201 goto fail_dp_tx_qinit; 202 203 evq->dp_txq = txq_info->dp; 204 205 txq_info->state = SFC_TXQ_INITIALIZED; 206 207 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 208 209 return 0; 210 211 fail_dp_tx_qinit: 212 sfc_dma_free(sa, &txq->mem); 213 214 fail_dma_alloc: 215 sfc_ev_qfini(evq); 216 217 fail_ev_qinit: 218 txq_info->entries = 0; 219 220 fail_bad_conf: 221 fail_size_up_rings: 222 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 223 return rc; 224 } 225 226 void 227 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 228 { 229 struct sfc_txq_info *txq_info; 230 struct sfc_txq *txq; 231 232 sfc_log_init(sa, "TxQ = %u", sw_index); 233 234 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 235 sa->eth_dev->data->tx_queues[sw_index] = NULL; 236 237 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 238 239 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 240 241 sa->priv.dp_tx->qdestroy(txq_info->dp); 242 txq_info->dp = NULL; 243 244 txq_info->state &= ~SFC_TXQ_INITIALIZED; 245 txq_info->entries = 0; 246 247 txq = &sa->txq_ctrl[sw_index]; 248 249 sfc_dma_free(sa, &txq->mem); 250 251 sfc_ev_qfini(txq->evq); 252 txq->evq = NULL; 253 } 254 255 static int 256 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 257 { 258 sfc_log_init(sa, "TxQ = %u", sw_index); 259 260 return 0; 261 } 262 263 static int 264 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 265 { 266 int rc = 0; 267 268 switch (txmode->mq_mode) { 269 case ETH_MQ_TX_NONE: 270 break; 271 default: 272 sfc_err(sa, "Tx multi-queue mode %u not supported", 273 txmode->mq_mode); 274 rc = EINVAL; 275 } 276 277 /* 278 * These features are claimed to be i40e-specific, 279 * but it does make sense to double-check their absence 280 */ 281 if (txmode->hw_vlan_reject_tagged) { 282 sfc_err(sa, "Rejecting tagged packets not supported"); 283 rc = EINVAL; 284 } 285 286 if (txmode->hw_vlan_reject_untagged) { 287 sfc_err(sa, "Rejecting untagged packets not supported"); 288 rc = EINVAL; 289 } 290 291 if (txmode->hw_vlan_insert_pvid) { 292 sfc_err(sa, "Port-based VLAN insertion not supported"); 293 rc = EINVAL; 294 } 295 296 return rc; 297 } 298 299 /** 300 * Destroy excess queues that are no longer needed after reconfiguration 301 * or complete close. 302 */ 303 static void 304 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 305 { 306 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 307 int sw_index; 308 309 SFC_ASSERT(nb_tx_queues <= sas->txq_count); 310 311 sw_index = sas->txq_count; 312 while (--sw_index >= (int)nb_tx_queues) { 313 if (sas->txq_info[sw_index].state & SFC_TXQ_INITIALIZED) 314 sfc_tx_qfini(sa, sw_index); 315 } 316 317 sas->txq_count = nb_tx_queues; 318 } 319 320 int 321 sfc_tx_configure(struct sfc_adapter *sa) 322 { 323 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 324 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 325 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 326 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 327 int rc = 0; 328 329 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 330 nb_tx_queues, sas->txq_count); 331 332 /* 333 * The datapath implementation assumes absence of boundary 334 * limits on Tx DMA descriptors. Addition of these checks on 335 * datapath would simply make the datapath slower. 336 */ 337 if (encp->enc_tx_dma_desc_boundary != 0) { 338 rc = ENOTSUP; 339 goto fail_tx_dma_desc_boundary; 340 } 341 342 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 343 if (rc != 0) 344 goto fail_check_mode; 345 346 if (nb_tx_queues == sas->txq_count) 347 goto done; 348 349 if (sas->txq_info == NULL) { 350 sas->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 351 sizeof(sas->txq_info[0]), 0, 352 sa->socket_id); 353 if (sas->txq_info == NULL) 354 goto fail_txqs_alloc; 355 356 /* 357 * Allocate primary process only TxQ control from heap 358 * since it should not be shared. 359 */ 360 rc = ENOMEM; 361 sa->txq_ctrl = calloc(nb_tx_queues, sizeof(sa->txq_ctrl[0])); 362 if (sa->txq_ctrl == NULL) 363 goto fail_txqs_ctrl_alloc; 364 } else { 365 struct sfc_txq_info *new_txq_info; 366 struct sfc_txq *new_txq_ctrl; 367 368 if (nb_tx_queues < sas->txq_count) 369 sfc_tx_fini_queues(sa, nb_tx_queues); 370 371 new_txq_info = 372 rte_realloc(sas->txq_info, 373 nb_tx_queues * sizeof(sas->txq_info[0]), 0); 374 if (new_txq_info == NULL && nb_tx_queues > 0) 375 goto fail_txqs_realloc; 376 377 new_txq_ctrl = realloc(sa->txq_ctrl, 378 nb_tx_queues * sizeof(sa->txq_ctrl[0])); 379 if (new_txq_ctrl == NULL && nb_tx_queues > 0) 380 goto fail_txqs_ctrl_realloc; 381 382 sas->txq_info = new_txq_info; 383 sa->txq_ctrl = new_txq_ctrl; 384 if (nb_tx_queues > sas->txq_count) { 385 memset(&sas->txq_info[sas->txq_count], 0, 386 (nb_tx_queues - sas->txq_count) * 387 sizeof(sas->txq_info[0])); 388 memset(&sa->txq_ctrl[sas->txq_count], 0, 389 (nb_tx_queues - sas->txq_count) * 390 sizeof(sa->txq_ctrl[0])); 391 } 392 } 393 394 while (sas->txq_count < nb_tx_queues) { 395 rc = sfc_tx_qinit_info(sa, sas->txq_count); 396 if (rc != 0) 397 goto fail_tx_qinit_info; 398 399 sas->txq_count++; 400 } 401 402 done: 403 return 0; 404 405 fail_tx_qinit_info: 406 fail_txqs_ctrl_realloc: 407 fail_txqs_realloc: 408 fail_txqs_ctrl_alloc: 409 fail_txqs_alloc: 410 sfc_tx_close(sa); 411 412 fail_check_mode: 413 fail_tx_dma_desc_boundary: 414 sfc_log_init(sa, "failed (rc = %d)", rc); 415 return rc; 416 } 417 418 void 419 sfc_tx_close(struct sfc_adapter *sa) 420 { 421 sfc_tx_fini_queues(sa, 0); 422 423 free(sa->txq_ctrl); 424 sa->txq_ctrl = NULL; 425 426 rte_free(sfc_sa2shared(sa)->txq_info); 427 sfc_sa2shared(sa)->txq_info = NULL; 428 } 429 430 int 431 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 432 { 433 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 434 uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) | 435 sfc_tx_get_queue_offload_caps(sa); 436 struct rte_eth_dev_data *dev_data; 437 struct sfc_txq_info *txq_info; 438 struct sfc_txq *txq; 439 struct sfc_evq *evq; 440 uint16_t flags = 0; 441 unsigned int desc_index; 442 int rc = 0; 443 444 sfc_log_init(sa, "TxQ = %u", sw_index); 445 446 SFC_ASSERT(sw_index < sas->txq_count); 447 txq_info = &sas->txq_info[sw_index]; 448 449 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 450 451 txq = &sa->txq_ctrl[sw_index]; 452 evq = txq->evq; 453 454 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 455 if (rc != 0) 456 goto fail_ev_qstart; 457 458 if (txq_info->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 459 flags |= EFX_TXQ_CKSUM_IPV4; 460 461 if (txq_info->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 462 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 463 464 if ((txq_info->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) || 465 (txq_info->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) { 466 flags |= EFX_TXQ_CKSUM_TCPUDP; 467 468 if (offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 469 flags |= EFX_TXQ_CKSUM_INNER_TCPUDP; 470 } 471 472 if (txq_info->offloads & DEV_TX_OFFLOAD_TCP_TSO) 473 flags |= EFX_TXQ_FATSOV2; 474 475 rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem, 476 txq_info->entries, 0 /* not used on EF10 */, 477 flags, evq->common, 478 &txq->common, &desc_index); 479 if (rc != 0) { 480 if (sa->tso && (rc == ENOSPC)) 481 sfc_err(sa, "ran out of TSO contexts"); 482 483 goto fail_tx_qcreate; 484 } 485 486 efx_tx_qenable(txq->common); 487 488 txq_info->state |= SFC_TXQ_STARTED; 489 490 rc = sa->priv.dp_tx->qstart(txq_info->dp, evq->read_ptr, desc_index); 491 if (rc != 0) 492 goto fail_dp_qstart; 493 494 /* 495 * It seems to be used by DPDK for debug purposes only ('rte_ether') 496 */ 497 dev_data = sa->eth_dev->data; 498 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 499 500 return 0; 501 502 fail_dp_qstart: 503 txq_info->state = SFC_TXQ_INITIALIZED; 504 efx_tx_qdestroy(txq->common); 505 506 fail_tx_qcreate: 507 sfc_ev_qstop(evq); 508 509 fail_ev_qstart: 510 return rc; 511 } 512 513 void 514 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 515 { 516 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 517 struct rte_eth_dev_data *dev_data; 518 struct sfc_txq_info *txq_info; 519 struct sfc_txq *txq; 520 unsigned int retry_count; 521 unsigned int wait_count; 522 int rc; 523 524 sfc_log_init(sa, "TxQ = %u", sw_index); 525 526 SFC_ASSERT(sw_index < sas->txq_count); 527 txq_info = &sas->txq_info[sw_index]; 528 529 if (txq_info->state == SFC_TXQ_INITIALIZED) 530 return; 531 532 SFC_ASSERT(txq_info->state & SFC_TXQ_STARTED); 533 534 txq = &sa->txq_ctrl[sw_index]; 535 sa->priv.dp_tx->qstop(txq_info->dp, &txq->evq->read_ptr); 536 537 /* 538 * Retry TX queue flushing in case of flush failed or 539 * timeout; in the worst case it can delay for 6 seconds 540 */ 541 for (retry_count = 0; 542 ((txq_info->state & SFC_TXQ_FLUSHED) == 0) && 543 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 544 ++retry_count) { 545 rc = efx_tx_qflush(txq->common); 546 if (rc != 0) { 547 txq_info->state |= (rc == EALREADY) ? 548 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 549 break; 550 } 551 552 /* 553 * Wait for TX queue flush done or flush failed event at least 554 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 555 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 556 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 557 */ 558 wait_count = 0; 559 do { 560 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 561 sfc_ev_qpoll(txq->evq); 562 } while ((txq_info->state & SFC_TXQ_FLUSHING) && 563 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 564 565 if (txq_info->state & SFC_TXQ_FLUSHING) 566 sfc_err(sa, "TxQ %u flush timed out", sw_index); 567 568 if (txq_info->state & SFC_TXQ_FLUSHED) 569 sfc_notice(sa, "TxQ %u flushed", sw_index); 570 } 571 572 sa->priv.dp_tx->qreap(txq_info->dp); 573 574 txq_info->state = SFC_TXQ_INITIALIZED; 575 576 efx_tx_qdestroy(txq->common); 577 578 sfc_ev_qstop(txq->evq); 579 580 /* 581 * It seems to be used by DPDK for debug purposes only ('rte_ether') 582 */ 583 dev_data = sa->eth_dev->data; 584 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 585 } 586 587 int 588 sfc_tx_start(struct sfc_adapter *sa) 589 { 590 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 591 unsigned int sw_index; 592 int rc = 0; 593 594 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 595 596 if (sa->tso) { 597 if (!efx_nic_cfg_get(sa->nic)->enc_fw_assisted_tso_v2_enabled) { 598 sfc_warn(sa, "TSO support was unable to be restored"); 599 sa->tso = B_FALSE; 600 } 601 } 602 603 rc = efx_tx_init(sa->nic); 604 if (rc != 0) 605 goto fail_efx_tx_init; 606 607 for (sw_index = 0; sw_index < sas->txq_count; ++sw_index) { 608 if (sas->txq_info[sw_index].state == SFC_TXQ_INITIALIZED && 609 (!(sas->txq_info[sw_index].deferred_start) || 610 sas->txq_info[sw_index].deferred_started)) { 611 rc = sfc_tx_qstart(sa, sw_index); 612 if (rc != 0) 613 goto fail_tx_qstart; 614 } 615 } 616 617 return 0; 618 619 fail_tx_qstart: 620 while (sw_index-- > 0) 621 sfc_tx_qstop(sa, sw_index); 622 623 efx_tx_fini(sa->nic); 624 625 fail_efx_tx_init: 626 sfc_log_init(sa, "failed (rc = %d)", rc); 627 return rc; 628 } 629 630 void 631 sfc_tx_stop(struct sfc_adapter *sa) 632 { 633 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 634 unsigned int sw_index; 635 636 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 637 638 sw_index = sas->txq_count; 639 while (sw_index-- > 0) { 640 if (sas->txq_info[sw_index].state & SFC_TXQ_STARTED) 641 sfc_tx_qstop(sa, sw_index); 642 } 643 644 efx_tx_fini(sa->nic); 645 } 646 647 static void 648 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 649 { 650 unsigned int completed; 651 652 sfc_ev_qpoll(txq->evq); 653 654 for (completed = txq->completed; 655 completed != txq->pending; completed++) { 656 struct sfc_efx_tx_sw_desc *txd; 657 658 txd = &txq->sw_ring[completed & txq->ptr_mask]; 659 660 if (txd->mbuf != NULL) { 661 rte_pktmbuf_free(txd->mbuf); 662 txd->mbuf = NULL; 663 } 664 } 665 666 txq->completed = completed; 667 } 668 669 /* 670 * The function is used to insert or update VLAN tag; 671 * the firmware has state of the firmware tag to insert per TxQ 672 * (controlled by option descriptors), hence, if the tag of the 673 * packet to be sent is different from one remembered by the firmware, 674 * the function will update it 675 */ 676 static unsigned int 677 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 678 efx_desc_t **pend) 679 { 680 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 681 m->vlan_tci : 0); 682 683 if (this_tag == txq->hw_vlan_tci) 684 return 0; 685 686 /* 687 * The expression inside SFC_ASSERT() is not desired to be checked in 688 * a non-debug build because it might be too expensive on the data path 689 */ 690 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 691 692 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 693 *pend); 694 (*pend)++; 695 txq->hw_vlan_tci = this_tag; 696 697 return 1; 698 } 699 700 static uint16_t 701 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 702 { 703 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 704 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 705 unsigned int added = txq->added; 706 unsigned int pushed = added; 707 unsigned int pkts_sent = 0; 708 efx_desc_t *pend = &txq->pend_desc[0]; 709 const unsigned int hard_max_fill = txq->max_fill_level; 710 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 711 unsigned int fill_level = added - txq->completed; 712 boolean_t reap_done; 713 int rc __rte_unused; 714 struct rte_mbuf **pktp; 715 716 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 717 goto done; 718 719 /* 720 * If insufficient space for a single packet is present, 721 * we should reap; otherwise, we shouldn't do that all the time 722 * to avoid latency increase 723 */ 724 reap_done = (fill_level > soft_max_fill); 725 726 if (reap_done) { 727 sfc_efx_tx_reap(txq); 728 /* 729 * Recalculate fill level since 'txq->completed' 730 * might have changed on reap 731 */ 732 fill_level = added - txq->completed; 733 } 734 735 for (pkts_sent = 0, pktp = &tx_pkts[0]; 736 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 737 pkts_sent++, pktp++) { 738 uint16_t hw_vlan_tci_prev = txq->hw_vlan_tci; 739 struct rte_mbuf *m_seg = *pktp; 740 size_t pkt_len = m_seg->pkt_len; 741 unsigned int pkt_descs = 0; 742 size_t in_off = 0; 743 744 /* 745 * Here VLAN TCI is expected to be zero in case if no 746 * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised; 747 * if the calling app ignores the absence of 748 * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then 749 * TX_ERROR will occur 750 */ 751 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 752 753 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 754 /* 755 * We expect correct 'pkt->l[2, 3, 4]_len' values 756 * to be set correctly by the caller 757 */ 758 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 759 &pkt_descs, &pkt_len) != 0) { 760 /* We may have reached this place for 761 * one of the following reasons: 762 * 763 * 1) Packet header length is greater 764 * than SFC_TSOH_STD_LEN 765 * 2) TCP header starts at more then 766 * 208 bytes into the frame 767 * 768 * We will deceive RTE saying that we have sent 769 * the packet, but we will actually drop it. 770 * Hence, we should revert 'pend' to the 771 * previous state (in case we have added 772 * VLAN descriptor) and start processing 773 * another one packet. But the original 774 * mbuf shouldn't be orphaned 775 */ 776 pend -= pkt_descs; 777 txq->hw_vlan_tci = hw_vlan_tci_prev; 778 779 rte_pktmbuf_free(*pktp); 780 781 continue; 782 } 783 784 /* 785 * We've only added 2 FATSOv2 option descriptors 786 * and 1 descriptor for the linearized packet header. 787 * The outstanding work will be done in the same manner 788 * as for the usual non-TSO path 789 */ 790 } 791 792 for (; m_seg != NULL; m_seg = m_seg->next) { 793 efsys_dma_addr_t next_frag; 794 size_t seg_len; 795 796 seg_len = m_seg->data_len; 797 next_frag = rte_mbuf_data_iova(m_seg); 798 799 /* 800 * If we've started TSO transaction few steps earlier, 801 * we'll skip packet header using an offset in the 802 * current segment (which has been set to the 803 * first one containing payload) 804 */ 805 seg_len -= in_off; 806 next_frag += in_off; 807 in_off = 0; 808 809 do { 810 efsys_dma_addr_t frag_addr = next_frag; 811 size_t frag_len; 812 813 /* 814 * It is assumed here that there is no 815 * limitation on address boundary 816 * crossing by DMA descriptor. 817 */ 818 frag_len = MIN(seg_len, txq->dma_desc_size_max); 819 next_frag += frag_len; 820 seg_len -= frag_len; 821 pkt_len -= frag_len; 822 823 efx_tx_qdesc_dma_create(txq->common, 824 frag_addr, frag_len, 825 (pkt_len == 0), 826 pend++); 827 828 pkt_descs++; 829 } while (seg_len != 0); 830 } 831 832 added += pkt_descs; 833 834 fill_level += pkt_descs; 835 if (unlikely(fill_level > hard_max_fill)) { 836 /* 837 * Our estimation for maximum number of descriptors 838 * required to send a packet seems to be wrong. 839 * Try to reap (if we haven't yet). 840 */ 841 if (!reap_done) { 842 sfc_efx_tx_reap(txq); 843 reap_done = B_TRUE; 844 fill_level = added - txq->completed; 845 if (fill_level > hard_max_fill) { 846 pend -= pkt_descs; 847 txq->hw_vlan_tci = hw_vlan_tci_prev; 848 break; 849 } 850 } else { 851 pend -= pkt_descs; 852 txq->hw_vlan_tci = hw_vlan_tci_prev; 853 break; 854 } 855 } 856 857 /* Assign mbuf to the last used desc */ 858 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 859 } 860 861 if (likely(pkts_sent > 0)) { 862 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 863 pend - &txq->pend_desc[0], 864 txq->completed, &txq->added); 865 SFC_ASSERT(rc == 0); 866 867 if (likely(pushed != txq->added)) 868 efx_tx_qpush(txq->common, txq->added, pushed); 869 } 870 871 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 872 if (!reap_done) 873 sfc_efx_tx_reap(txq); 874 #endif 875 876 done: 877 return pkts_sent; 878 } 879 880 const struct sfc_dp_tx * 881 sfc_dp_tx_by_dp_txq(const struct sfc_dp_txq *dp_txq) 882 { 883 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 884 struct rte_eth_dev *eth_dev; 885 struct sfc_adapter_priv *sap; 886 887 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 888 eth_dev = &rte_eth_devices[dpq->port_id]; 889 890 sap = sfc_adapter_priv_by_eth_dev(eth_dev); 891 892 return sap->dp_tx; 893 } 894 895 struct sfc_txq_info * 896 sfc_txq_info_by_dp_txq(const struct sfc_dp_txq *dp_txq) 897 { 898 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 899 struct rte_eth_dev *eth_dev; 900 struct sfc_adapter_shared *sas; 901 902 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 903 eth_dev = &rte_eth_devices[dpq->port_id]; 904 905 sas = sfc_adapter_shared_by_eth_dev(eth_dev); 906 907 SFC_ASSERT(dpq->queue_id < sas->txq_count); 908 return &sas->txq_info[dpq->queue_id]; 909 } 910 911 struct sfc_txq * 912 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 913 { 914 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 915 struct rte_eth_dev *eth_dev; 916 struct sfc_adapter *sa; 917 918 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 919 eth_dev = &rte_eth_devices[dpq->port_id]; 920 921 sa = sfc_adapter_by_eth_dev(eth_dev); 922 923 SFC_ASSERT(dpq->queue_id < sfc_sa2shared(sa)->txq_count); 924 return &sa->txq_ctrl[dpq->queue_id]; 925 } 926 927 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings; 928 static int 929 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc, 930 __rte_unused struct sfc_dp_tx_hw_limits *limits, 931 unsigned int *txq_entries, 932 unsigned int *evq_entries, 933 unsigned int *txq_max_fill_level) 934 { 935 *txq_entries = nb_tx_desc; 936 *evq_entries = nb_tx_desc; 937 *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries); 938 return 0; 939 } 940 941 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 942 static int 943 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 944 const struct rte_pci_addr *pci_addr, 945 int socket_id, 946 const struct sfc_dp_tx_qcreate_info *info, 947 struct sfc_dp_txq **dp_txqp) 948 { 949 struct sfc_efx_txq *txq; 950 struct sfc_txq *ctrl_txq; 951 int rc; 952 953 rc = ENOMEM; 954 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 955 RTE_CACHE_LINE_SIZE, socket_id); 956 if (txq == NULL) 957 goto fail_txq_alloc; 958 959 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 960 961 rc = ENOMEM; 962 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 963 EFX_TXQ_LIMIT(info->txq_entries), 964 sizeof(*txq->pend_desc), 0, 965 socket_id); 966 if (txq->pend_desc == NULL) 967 goto fail_pend_desc_alloc; 968 969 rc = ENOMEM; 970 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 971 info->txq_entries, 972 sizeof(*txq->sw_ring), 973 RTE_CACHE_LINE_SIZE, socket_id); 974 if (txq->sw_ring == NULL) 975 goto fail_sw_ring_alloc; 976 977 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 978 if (ctrl_txq->evq->sa->tso) { 979 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 980 info->txq_entries, socket_id); 981 if (rc != 0) 982 goto fail_alloc_tsoh_objs; 983 } 984 985 txq->evq = ctrl_txq->evq; 986 txq->ptr_mask = info->txq_entries - 1; 987 txq->max_fill_level = info->max_fill_level; 988 txq->free_thresh = info->free_thresh; 989 txq->dma_desc_size_max = info->dma_desc_size_max; 990 991 *dp_txqp = &txq->dp; 992 return 0; 993 994 fail_alloc_tsoh_objs: 995 rte_free(txq->sw_ring); 996 997 fail_sw_ring_alloc: 998 rte_free(txq->pend_desc); 999 1000 fail_pend_desc_alloc: 1001 rte_free(txq); 1002 1003 fail_txq_alloc: 1004 return rc; 1005 } 1006 1007 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 1008 static void 1009 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 1010 { 1011 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1012 1013 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 1014 rte_free(txq->sw_ring); 1015 rte_free(txq->pend_desc); 1016 rte_free(txq); 1017 } 1018 1019 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 1020 static int 1021 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 1022 __rte_unused unsigned int evq_read_ptr, 1023 unsigned int txq_desc_index) 1024 { 1025 /* libefx-based datapath is specific to libefx-based PMD */ 1026 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1027 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 1028 1029 txq->common = ctrl_txq->common; 1030 1031 txq->pending = txq->completed = txq->added = txq_desc_index; 1032 txq->hw_vlan_tci = 0; 1033 1034 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 1035 1036 return 0; 1037 } 1038 1039 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 1040 static void 1041 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 1042 __rte_unused unsigned int *evq_read_ptr) 1043 { 1044 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1045 1046 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 1047 } 1048 1049 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 1050 static void 1051 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 1052 { 1053 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1054 unsigned int txds; 1055 1056 sfc_efx_tx_reap(txq); 1057 1058 for (txds = 0; txds <= txq->ptr_mask; txds++) { 1059 if (txq->sw_ring[txds].mbuf != NULL) { 1060 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 1061 txq->sw_ring[txds].mbuf = NULL; 1062 } 1063 } 1064 1065 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1066 } 1067 1068 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1069 static int 1070 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1071 { 1072 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1073 1074 if (unlikely(offset > txq->ptr_mask)) 1075 return -EINVAL; 1076 1077 if (unlikely(offset >= txq->max_fill_level)) 1078 return RTE_ETH_TX_DESC_UNAVAIL; 1079 1080 /* 1081 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1082 * it is required for the queue to be running, but the 1083 * check is omitted because API design assumes that it 1084 * is the duty of the caller to satisfy all conditions 1085 */ 1086 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1087 SFC_EFX_TXQ_FLAG_RUNNING); 1088 sfc_ev_qpoll(txq->evq); 1089 1090 /* 1091 * Ring tail is 'txq->pending', and although descriptors 1092 * between 'txq->completed' and 'txq->pending' are still 1093 * in use by the driver, they should be reported as DONE 1094 */ 1095 if (unlikely(offset < (txq->added - txq->pending))) 1096 return RTE_ETH_TX_DESC_FULL; 1097 1098 /* 1099 * There is no separate return value for unused descriptors; 1100 * the latter will be reported as DONE because genuine DONE 1101 * descriptors will be freed anyway in SW on the next burst 1102 */ 1103 return RTE_ETH_TX_DESC_DONE; 1104 } 1105 1106 struct sfc_dp_tx sfc_efx_tx = { 1107 .dp = { 1108 .name = SFC_KVARG_DATAPATH_EFX, 1109 .type = SFC_DP_TX, 1110 .hw_fw_caps = 0, 1111 }, 1112 .features = SFC_DP_TX_FEAT_VLAN_INSERT | 1113 SFC_DP_TX_FEAT_TSO | 1114 SFC_DP_TX_FEAT_MULTI_POOL | 1115 SFC_DP_TX_FEAT_REFCNT | 1116 SFC_DP_TX_FEAT_MULTI_SEG, 1117 .qsize_up_rings = sfc_efx_tx_qsize_up_rings, 1118 .qcreate = sfc_efx_tx_qcreate, 1119 .qdestroy = sfc_efx_tx_qdestroy, 1120 .qstart = sfc_efx_tx_qstart, 1121 .qstop = sfc_efx_tx_qstop, 1122 .qreap = sfc_efx_tx_qreap, 1123 .qdesc_status = sfc_efx_tx_qdesc_status, 1124 .pkt_burst = sfc_efx_xmit_pkts, 1125 }; 1126