1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include "sfc.h" 11 #include "sfc_debug.h" 12 #include "sfc_log.h" 13 #include "sfc_ev.h" 14 #include "sfc_tx.h" 15 #include "sfc_tweak.h" 16 #include "sfc_kvargs.h" 17 18 /* 19 * Maximum number of TX queue flush attempts in case of 20 * failure or flush timeout 21 */ 22 #define SFC_TX_QFLUSH_ATTEMPTS (3) 23 24 /* 25 * Time to wait between event queue polling attempts when waiting for TX 26 * queue flush done or flush failed events 27 */ 28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 29 30 /* 31 * Maximum number of event queue polling attempts when waiting for TX queue 32 * flush done or flush failed events; it defines TX queue flush attempt timeout 33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 34 */ 35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 36 37 static uint64_t 38 sfc_tx_get_offload_mask(struct sfc_adapter *sa) 39 { 40 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 41 uint64_t no_caps = 0; 42 43 if (!encp->enc_hw_tx_insert_vlan_enabled) 44 no_caps |= DEV_TX_OFFLOAD_VLAN_INSERT; 45 46 if (!encp->enc_tunnel_encapsulations_supported) 47 no_caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 48 49 if (!sa->tso) 50 no_caps |= DEV_TX_OFFLOAD_TCP_TSO; 51 52 if (!sa->tso_encap) 53 no_caps |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 54 DEV_TX_OFFLOAD_GENEVE_TNL_TSO); 55 56 return ~no_caps; 57 } 58 59 uint64_t 60 sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa) 61 { 62 return sa->priv.dp_tx->dev_offload_capa & sfc_tx_get_offload_mask(sa); 63 } 64 65 uint64_t 66 sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa) 67 { 68 return sa->priv.dp_tx->queue_offload_capa & sfc_tx_get_offload_mask(sa); 69 } 70 71 static int 72 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level, 73 const struct rte_eth_txconf *tx_conf, 74 uint64_t offloads) 75 { 76 int rc = 0; 77 78 if (tx_conf->tx_rs_thresh != 0) { 79 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 80 rc = EINVAL; 81 } 82 83 if (tx_conf->tx_free_thresh > txq_max_fill_level) { 84 sfc_err(sa, 85 "TxQ free threshold too large: %u vs maximum %u", 86 tx_conf->tx_free_thresh, txq_max_fill_level); 87 rc = EINVAL; 88 } 89 90 if (tx_conf->tx_thresh.pthresh != 0 || 91 tx_conf->tx_thresh.hthresh != 0 || 92 tx_conf->tx_thresh.wthresh != 0) { 93 sfc_warn(sa, 94 "prefetch/host/writeback thresholds are not supported"); 95 } 96 97 /* We either perform both TCP and UDP offload, or no offload at all */ 98 if (((offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) != 99 ((offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) { 100 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 101 rc = EINVAL; 102 } 103 104 return rc; 105 } 106 107 void 108 sfc_tx_qflush_done(struct sfc_txq_info *txq_info) 109 { 110 txq_info->state |= SFC_TXQ_FLUSHED; 111 txq_info->state &= ~SFC_TXQ_FLUSHING; 112 } 113 114 int 115 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 116 uint16_t nb_tx_desc, unsigned int socket_id, 117 const struct rte_eth_txconf *tx_conf) 118 { 119 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 120 unsigned int txq_entries; 121 unsigned int evq_entries; 122 unsigned int txq_max_fill_level; 123 struct sfc_txq_info *txq_info; 124 struct sfc_evq *evq; 125 struct sfc_txq *txq; 126 int rc = 0; 127 struct sfc_dp_tx_qcreate_info info; 128 uint64_t offloads; 129 struct sfc_dp_tx_hw_limits hw_limits; 130 131 sfc_log_init(sa, "TxQ = %u", sw_index); 132 133 memset(&hw_limits, 0, sizeof(hw_limits)); 134 hw_limits.txq_max_entries = sa->txq_max_entries; 135 hw_limits.txq_min_entries = sa->txq_min_entries; 136 137 rc = sa->priv.dp_tx->qsize_up_rings(nb_tx_desc, &hw_limits, 138 &txq_entries, &evq_entries, 139 &txq_max_fill_level); 140 if (rc != 0) 141 goto fail_size_up_rings; 142 SFC_ASSERT(txq_entries >= sa->txq_min_entries); 143 SFC_ASSERT(txq_entries <= sa->txq_max_entries); 144 SFC_ASSERT(txq_entries >= nb_tx_desc); 145 SFC_ASSERT(txq_max_fill_level <= nb_tx_desc); 146 147 offloads = tx_conf->offloads | 148 sa->eth_dev->data->dev_conf.txmode.offloads; 149 rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf, offloads); 150 if (rc != 0) 151 goto fail_bad_conf; 152 153 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 154 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 155 156 txq_info->entries = txq_entries; 157 158 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 159 evq_entries, socket_id, &evq); 160 if (rc != 0) 161 goto fail_ev_qinit; 162 163 txq = &sa->txq_ctrl[sw_index]; 164 txq->hw_index = sw_index; 165 txq->evq = evq; 166 txq_info->free_thresh = 167 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 168 SFC_TX_DEFAULT_FREE_THRESH; 169 txq_info->offloads = offloads; 170 171 rc = sfc_dma_alloc(sa, "txq", sw_index, 172 efx_txq_size(sa->nic, txq_info->entries), 173 socket_id, &txq->mem); 174 if (rc != 0) 175 goto fail_dma_alloc; 176 177 memset(&info, 0, sizeof(info)); 178 info.max_fill_level = txq_max_fill_level; 179 info.free_thresh = txq_info->free_thresh; 180 info.offloads = offloads; 181 info.txq_entries = txq_info->entries; 182 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 183 info.txq_hw_ring = txq->mem.esm_base; 184 info.evq_entries = evq_entries; 185 info.evq_hw_ring = evq->mem.esm_base; 186 info.hw_index = txq->hw_index; 187 info.mem_bar = sa->mem_bar.esb_base; 188 info.vi_window_shift = encp->enc_vi_window_shift; 189 info.tso_tcp_header_offset_limit = 190 encp->enc_tx_tso_tcp_header_offset_limit; 191 192 rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 193 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 194 socket_id, &info, &txq_info->dp); 195 if (rc != 0) 196 goto fail_dp_tx_qinit; 197 198 evq->dp_txq = txq_info->dp; 199 200 txq_info->state = SFC_TXQ_INITIALIZED; 201 202 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 203 204 return 0; 205 206 fail_dp_tx_qinit: 207 sfc_dma_free(sa, &txq->mem); 208 209 fail_dma_alloc: 210 sfc_ev_qfini(evq); 211 212 fail_ev_qinit: 213 txq_info->entries = 0; 214 215 fail_bad_conf: 216 fail_size_up_rings: 217 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 218 return rc; 219 } 220 221 void 222 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 223 { 224 struct sfc_txq_info *txq_info; 225 struct sfc_txq *txq; 226 227 sfc_log_init(sa, "TxQ = %u", sw_index); 228 229 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 230 sa->eth_dev->data->tx_queues[sw_index] = NULL; 231 232 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 233 234 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 235 236 sa->priv.dp_tx->qdestroy(txq_info->dp); 237 txq_info->dp = NULL; 238 239 txq_info->state &= ~SFC_TXQ_INITIALIZED; 240 txq_info->entries = 0; 241 242 txq = &sa->txq_ctrl[sw_index]; 243 244 sfc_dma_free(sa, &txq->mem); 245 246 sfc_ev_qfini(txq->evq); 247 txq->evq = NULL; 248 } 249 250 static int 251 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 252 { 253 sfc_log_init(sa, "TxQ = %u", sw_index); 254 255 return 0; 256 } 257 258 static int 259 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 260 { 261 int rc = 0; 262 263 switch (txmode->mq_mode) { 264 case ETH_MQ_TX_NONE: 265 break; 266 default: 267 sfc_err(sa, "Tx multi-queue mode %u not supported", 268 txmode->mq_mode); 269 rc = EINVAL; 270 } 271 272 /* 273 * These features are claimed to be i40e-specific, 274 * but it does make sense to double-check their absence 275 */ 276 if (txmode->hw_vlan_reject_tagged) { 277 sfc_err(sa, "Rejecting tagged packets not supported"); 278 rc = EINVAL; 279 } 280 281 if (txmode->hw_vlan_reject_untagged) { 282 sfc_err(sa, "Rejecting untagged packets not supported"); 283 rc = EINVAL; 284 } 285 286 if (txmode->hw_vlan_insert_pvid) { 287 sfc_err(sa, "Port-based VLAN insertion not supported"); 288 rc = EINVAL; 289 } 290 291 return rc; 292 } 293 294 /** 295 * Destroy excess queues that are no longer needed after reconfiguration 296 * or complete close. 297 */ 298 static void 299 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 300 { 301 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 302 int sw_index; 303 304 SFC_ASSERT(nb_tx_queues <= sas->txq_count); 305 306 sw_index = sas->txq_count; 307 while (--sw_index >= (int)nb_tx_queues) { 308 if (sas->txq_info[sw_index].state & SFC_TXQ_INITIALIZED) 309 sfc_tx_qfini(sa, sw_index); 310 } 311 312 sas->txq_count = nb_tx_queues; 313 } 314 315 int 316 sfc_tx_configure(struct sfc_adapter *sa) 317 { 318 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 319 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 320 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 321 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 322 int rc = 0; 323 324 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 325 nb_tx_queues, sas->txq_count); 326 327 /* 328 * The datapath implementation assumes absence of boundary 329 * limits on Tx DMA descriptors. Addition of these checks on 330 * datapath would simply make the datapath slower. 331 */ 332 if (encp->enc_tx_dma_desc_boundary != 0) { 333 rc = ENOTSUP; 334 goto fail_tx_dma_desc_boundary; 335 } 336 337 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 338 if (rc != 0) 339 goto fail_check_mode; 340 341 if (nb_tx_queues == sas->txq_count) 342 goto done; 343 344 if (sas->txq_info == NULL) { 345 sas->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 346 sizeof(sas->txq_info[0]), 0, 347 sa->socket_id); 348 if (sas->txq_info == NULL) 349 goto fail_txqs_alloc; 350 351 /* 352 * Allocate primary process only TxQ control from heap 353 * since it should not be shared. 354 */ 355 rc = ENOMEM; 356 sa->txq_ctrl = calloc(nb_tx_queues, sizeof(sa->txq_ctrl[0])); 357 if (sa->txq_ctrl == NULL) 358 goto fail_txqs_ctrl_alloc; 359 } else { 360 struct sfc_txq_info *new_txq_info; 361 struct sfc_txq *new_txq_ctrl; 362 363 if (nb_tx_queues < sas->txq_count) 364 sfc_tx_fini_queues(sa, nb_tx_queues); 365 366 new_txq_info = 367 rte_realloc(sas->txq_info, 368 nb_tx_queues * sizeof(sas->txq_info[0]), 0); 369 if (new_txq_info == NULL && nb_tx_queues > 0) 370 goto fail_txqs_realloc; 371 372 new_txq_ctrl = realloc(sa->txq_ctrl, 373 nb_tx_queues * sizeof(sa->txq_ctrl[0])); 374 if (new_txq_ctrl == NULL && nb_tx_queues > 0) 375 goto fail_txqs_ctrl_realloc; 376 377 sas->txq_info = new_txq_info; 378 sa->txq_ctrl = new_txq_ctrl; 379 if (nb_tx_queues > sas->txq_count) { 380 memset(&sas->txq_info[sas->txq_count], 0, 381 (nb_tx_queues - sas->txq_count) * 382 sizeof(sas->txq_info[0])); 383 memset(&sa->txq_ctrl[sas->txq_count], 0, 384 (nb_tx_queues - sas->txq_count) * 385 sizeof(sa->txq_ctrl[0])); 386 } 387 } 388 389 while (sas->txq_count < nb_tx_queues) { 390 rc = sfc_tx_qinit_info(sa, sas->txq_count); 391 if (rc != 0) 392 goto fail_tx_qinit_info; 393 394 sas->txq_count++; 395 } 396 397 done: 398 return 0; 399 400 fail_tx_qinit_info: 401 fail_txqs_ctrl_realloc: 402 fail_txqs_realloc: 403 fail_txqs_ctrl_alloc: 404 fail_txqs_alloc: 405 sfc_tx_close(sa); 406 407 fail_check_mode: 408 fail_tx_dma_desc_boundary: 409 sfc_log_init(sa, "failed (rc = %d)", rc); 410 return rc; 411 } 412 413 void 414 sfc_tx_close(struct sfc_adapter *sa) 415 { 416 sfc_tx_fini_queues(sa, 0); 417 418 free(sa->txq_ctrl); 419 sa->txq_ctrl = NULL; 420 421 rte_free(sfc_sa2shared(sa)->txq_info); 422 sfc_sa2shared(sa)->txq_info = NULL; 423 } 424 425 int 426 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 427 { 428 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 429 uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) | 430 sfc_tx_get_queue_offload_caps(sa); 431 struct rte_eth_dev_data *dev_data; 432 struct sfc_txq_info *txq_info; 433 struct sfc_txq *txq; 434 struct sfc_evq *evq; 435 uint16_t flags = 0; 436 unsigned int desc_index; 437 int rc = 0; 438 439 sfc_log_init(sa, "TxQ = %u", sw_index); 440 441 SFC_ASSERT(sw_index < sas->txq_count); 442 txq_info = &sas->txq_info[sw_index]; 443 444 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 445 446 txq = &sa->txq_ctrl[sw_index]; 447 evq = txq->evq; 448 449 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 450 if (rc != 0) 451 goto fail_ev_qstart; 452 453 if (txq_info->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 454 flags |= EFX_TXQ_CKSUM_IPV4; 455 456 if (txq_info->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 457 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 458 459 if ((txq_info->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) || 460 (txq_info->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) { 461 flags |= EFX_TXQ_CKSUM_TCPUDP; 462 463 if (offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 464 flags |= EFX_TXQ_CKSUM_INNER_TCPUDP; 465 } 466 467 if (txq_info->offloads & (DEV_TX_OFFLOAD_TCP_TSO | 468 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 469 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) 470 flags |= EFX_TXQ_FATSOV2; 471 472 rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem, 473 txq_info->entries, 0 /* not used on EF10 */, 474 flags, evq->common, 475 &txq->common, &desc_index); 476 if (rc != 0) { 477 if (sa->tso && (rc == ENOSPC)) 478 sfc_err(sa, "ran out of TSO contexts"); 479 480 goto fail_tx_qcreate; 481 } 482 483 efx_tx_qenable(txq->common); 484 485 txq_info->state |= SFC_TXQ_STARTED; 486 487 rc = sa->priv.dp_tx->qstart(txq_info->dp, evq->read_ptr, desc_index); 488 if (rc != 0) 489 goto fail_dp_qstart; 490 491 /* 492 * It seems to be used by DPDK for debug purposes only ('rte_ether') 493 */ 494 dev_data = sa->eth_dev->data; 495 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 496 497 return 0; 498 499 fail_dp_qstart: 500 txq_info->state = SFC_TXQ_INITIALIZED; 501 efx_tx_qdestroy(txq->common); 502 503 fail_tx_qcreate: 504 sfc_ev_qstop(evq); 505 506 fail_ev_qstart: 507 return rc; 508 } 509 510 void 511 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 512 { 513 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 514 struct rte_eth_dev_data *dev_data; 515 struct sfc_txq_info *txq_info; 516 struct sfc_txq *txq; 517 unsigned int retry_count; 518 unsigned int wait_count; 519 int rc; 520 521 sfc_log_init(sa, "TxQ = %u", sw_index); 522 523 SFC_ASSERT(sw_index < sas->txq_count); 524 txq_info = &sas->txq_info[sw_index]; 525 526 if (txq_info->state == SFC_TXQ_INITIALIZED) 527 return; 528 529 SFC_ASSERT(txq_info->state & SFC_TXQ_STARTED); 530 531 txq = &sa->txq_ctrl[sw_index]; 532 sa->priv.dp_tx->qstop(txq_info->dp, &txq->evq->read_ptr); 533 534 /* 535 * Retry TX queue flushing in case of flush failed or 536 * timeout; in the worst case it can delay for 6 seconds 537 */ 538 for (retry_count = 0; 539 ((txq_info->state & SFC_TXQ_FLUSHED) == 0) && 540 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 541 ++retry_count) { 542 rc = efx_tx_qflush(txq->common); 543 if (rc != 0) { 544 txq_info->state |= (rc == EALREADY) ? 545 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 546 break; 547 } 548 549 /* 550 * Wait for TX queue flush done or flush failed event at least 551 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 552 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 553 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 554 */ 555 wait_count = 0; 556 do { 557 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 558 sfc_ev_qpoll(txq->evq); 559 } while ((txq_info->state & SFC_TXQ_FLUSHING) && 560 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 561 562 if (txq_info->state & SFC_TXQ_FLUSHING) 563 sfc_err(sa, "TxQ %u flush timed out", sw_index); 564 565 if (txq_info->state & SFC_TXQ_FLUSHED) 566 sfc_notice(sa, "TxQ %u flushed", sw_index); 567 } 568 569 sa->priv.dp_tx->qreap(txq_info->dp); 570 571 txq_info->state = SFC_TXQ_INITIALIZED; 572 573 efx_tx_qdestroy(txq->common); 574 575 sfc_ev_qstop(txq->evq); 576 577 /* 578 * It seems to be used by DPDK for debug purposes only ('rte_ether') 579 */ 580 dev_data = sa->eth_dev->data; 581 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 582 } 583 584 int 585 sfc_tx_start(struct sfc_adapter *sa) 586 { 587 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 588 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 589 unsigned int sw_index; 590 int rc = 0; 591 592 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 593 594 if (sa->tso) { 595 if (!encp->enc_fw_assisted_tso_v2_enabled) { 596 sfc_warn(sa, "TSO support was unable to be restored"); 597 sa->tso = B_FALSE; 598 sa->tso_encap = B_FALSE; 599 } 600 } 601 602 if (sa->tso_encap && !encp->enc_fw_assisted_tso_v2_encap_enabled) { 603 sfc_warn(sa, "Encapsulated TSO support was unable to be restored"); 604 sa->tso_encap = B_FALSE; 605 } 606 607 rc = efx_tx_init(sa->nic); 608 if (rc != 0) 609 goto fail_efx_tx_init; 610 611 for (sw_index = 0; sw_index < sas->txq_count; ++sw_index) { 612 if (sas->txq_info[sw_index].state == SFC_TXQ_INITIALIZED && 613 (!(sas->txq_info[sw_index].deferred_start) || 614 sas->txq_info[sw_index].deferred_started)) { 615 rc = sfc_tx_qstart(sa, sw_index); 616 if (rc != 0) 617 goto fail_tx_qstart; 618 } 619 } 620 621 return 0; 622 623 fail_tx_qstart: 624 while (sw_index-- > 0) 625 sfc_tx_qstop(sa, sw_index); 626 627 efx_tx_fini(sa->nic); 628 629 fail_efx_tx_init: 630 sfc_log_init(sa, "failed (rc = %d)", rc); 631 return rc; 632 } 633 634 void 635 sfc_tx_stop(struct sfc_adapter *sa) 636 { 637 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 638 unsigned int sw_index; 639 640 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 641 642 sw_index = sas->txq_count; 643 while (sw_index-- > 0) { 644 if (sas->txq_info[sw_index].state & SFC_TXQ_STARTED) 645 sfc_tx_qstop(sa, sw_index); 646 } 647 648 efx_tx_fini(sa->nic); 649 } 650 651 static void 652 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 653 { 654 unsigned int completed; 655 656 sfc_ev_qpoll(txq->evq); 657 658 for (completed = txq->completed; 659 completed != txq->pending; completed++) { 660 struct sfc_efx_tx_sw_desc *txd; 661 662 txd = &txq->sw_ring[completed & txq->ptr_mask]; 663 664 if (txd->mbuf != NULL) { 665 rte_pktmbuf_free(txd->mbuf); 666 txd->mbuf = NULL; 667 } 668 } 669 670 txq->completed = completed; 671 } 672 673 /* 674 * The function is used to insert or update VLAN tag; 675 * the firmware has state of the firmware tag to insert per TxQ 676 * (controlled by option descriptors), hence, if the tag of the 677 * packet to be sent is different from one remembered by the firmware, 678 * the function will update it 679 */ 680 static unsigned int 681 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 682 efx_desc_t **pend) 683 { 684 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 685 m->vlan_tci : 0); 686 687 if (this_tag == txq->hw_vlan_tci) 688 return 0; 689 690 /* 691 * The expression inside SFC_ASSERT() is not desired to be checked in 692 * a non-debug build because it might be too expensive on the data path 693 */ 694 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 695 696 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 697 *pend); 698 (*pend)++; 699 txq->hw_vlan_tci = this_tag; 700 701 return 1; 702 } 703 704 static uint16_t 705 sfc_efx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 706 uint16_t nb_pkts) 707 { 708 struct sfc_dp_txq *dp_txq = tx_queue; 709 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 710 const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic); 711 uint16_t i; 712 713 for (i = 0; i < nb_pkts; i++) { 714 int ret; 715 716 /* 717 * EFX Tx datapath may require extra VLAN descriptor if VLAN 718 * insertion offload is requested regardless the offload 719 * requested/supported. 720 */ 721 ret = sfc_dp_tx_prepare_pkt(tx_pkts[i], 722 encp->enc_tx_tso_tcp_header_offset_limit, 723 txq->max_fill_level, EFX_TX_FATSOV2_OPT_NDESCS, 724 1); 725 if (unlikely(ret != 0)) { 726 rte_errno = ret; 727 break; 728 } 729 } 730 731 return i; 732 } 733 734 static uint16_t 735 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 736 { 737 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 738 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 739 unsigned int added = txq->added; 740 unsigned int pushed = added; 741 unsigned int pkts_sent = 0; 742 efx_desc_t *pend = &txq->pend_desc[0]; 743 const unsigned int hard_max_fill = txq->max_fill_level; 744 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 745 unsigned int fill_level = added - txq->completed; 746 boolean_t reap_done; 747 int rc __rte_unused; 748 struct rte_mbuf **pktp; 749 750 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 751 goto done; 752 753 /* 754 * If insufficient space for a single packet is present, 755 * we should reap; otherwise, we shouldn't do that all the time 756 * to avoid latency increase 757 */ 758 reap_done = (fill_level > soft_max_fill); 759 760 if (reap_done) { 761 sfc_efx_tx_reap(txq); 762 /* 763 * Recalculate fill level since 'txq->completed' 764 * might have changed on reap 765 */ 766 fill_level = added - txq->completed; 767 } 768 769 for (pkts_sent = 0, pktp = &tx_pkts[0]; 770 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 771 pkts_sent++, pktp++) { 772 uint16_t hw_vlan_tci_prev = txq->hw_vlan_tci; 773 struct rte_mbuf *m_seg = *pktp; 774 size_t pkt_len = m_seg->pkt_len; 775 unsigned int pkt_descs = 0; 776 size_t in_off = 0; 777 778 /* 779 * Here VLAN TCI is expected to be zero in case if no 780 * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised; 781 * if the calling app ignores the absence of 782 * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then 783 * TX_ERROR will occur 784 */ 785 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 786 787 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 788 /* 789 * We expect correct 'pkt->l[2, 3, 4]_len' values 790 * to be set correctly by the caller 791 */ 792 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 793 &pkt_descs, &pkt_len) != 0) { 794 /* We may have reached this place if packet 795 * header linearization is needed but the 796 * header length is greater than 797 * SFC_TSOH_STD_LEN 798 * 799 * We will deceive RTE saying that we have sent 800 * the packet, but we will actually drop it. 801 * Hence, we should revert 'pend' to the 802 * previous state (in case we have added 803 * VLAN descriptor) and start processing 804 * another one packet. But the original 805 * mbuf shouldn't be orphaned 806 */ 807 pend -= pkt_descs; 808 txq->hw_vlan_tci = hw_vlan_tci_prev; 809 810 rte_pktmbuf_free(*pktp); 811 812 continue; 813 } 814 815 /* 816 * We've only added 2 FATSOv2 option descriptors 817 * and 1 descriptor for the linearized packet header. 818 * The outstanding work will be done in the same manner 819 * as for the usual non-TSO path 820 */ 821 } 822 823 for (; m_seg != NULL; m_seg = m_seg->next) { 824 efsys_dma_addr_t next_frag; 825 size_t seg_len; 826 827 seg_len = m_seg->data_len; 828 next_frag = rte_mbuf_data_iova(m_seg); 829 830 /* 831 * If we've started TSO transaction few steps earlier, 832 * we'll skip packet header using an offset in the 833 * current segment (which has been set to the 834 * first one containing payload) 835 */ 836 seg_len -= in_off; 837 next_frag += in_off; 838 in_off = 0; 839 840 do { 841 efsys_dma_addr_t frag_addr = next_frag; 842 size_t frag_len; 843 844 /* 845 * It is assumed here that there is no 846 * limitation on address boundary 847 * crossing by DMA descriptor. 848 */ 849 frag_len = MIN(seg_len, txq->dma_desc_size_max); 850 next_frag += frag_len; 851 seg_len -= frag_len; 852 pkt_len -= frag_len; 853 854 efx_tx_qdesc_dma_create(txq->common, 855 frag_addr, frag_len, 856 (pkt_len == 0), 857 pend++); 858 859 pkt_descs++; 860 } while (seg_len != 0); 861 } 862 863 added += pkt_descs; 864 865 fill_level += pkt_descs; 866 if (unlikely(fill_level > hard_max_fill)) { 867 /* 868 * Our estimation for maximum number of descriptors 869 * required to send a packet seems to be wrong. 870 * Try to reap (if we haven't yet). 871 */ 872 if (!reap_done) { 873 sfc_efx_tx_reap(txq); 874 reap_done = B_TRUE; 875 fill_level = added - txq->completed; 876 if (fill_level > hard_max_fill) { 877 pend -= pkt_descs; 878 txq->hw_vlan_tci = hw_vlan_tci_prev; 879 break; 880 } 881 } else { 882 pend -= pkt_descs; 883 txq->hw_vlan_tci = hw_vlan_tci_prev; 884 break; 885 } 886 } 887 888 /* Assign mbuf to the last used desc */ 889 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 890 } 891 892 if (likely(pkts_sent > 0)) { 893 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 894 pend - &txq->pend_desc[0], 895 txq->completed, &txq->added); 896 SFC_ASSERT(rc == 0); 897 898 if (likely(pushed != txq->added)) 899 efx_tx_qpush(txq->common, txq->added, pushed); 900 } 901 902 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 903 if (!reap_done) 904 sfc_efx_tx_reap(txq); 905 #endif 906 907 done: 908 return pkts_sent; 909 } 910 911 const struct sfc_dp_tx * 912 sfc_dp_tx_by_dp_txq(const struct sfc_dp_txq *dp_txq) 913 { 914 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 915 struct rte_eth_dev *eth_dev; 916 struct sfc_adapter_priv *sap; 917 918 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 919 eth_dev = &rte_eth_devices[dpq->port_id]; 920 921 sap = sfc_adapter_priv_by_eth_dev(eth_dev); 922 923 return sap->dp_tx; 924 } 925 926 struct sfc_txq_info * 927 sfc_txq_info_by_dp_txq(const struct sfc_dp_txq *dp_txq) 928 { 929 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 930 struct rte_eth_dev *eth_dev; 931 struct sfc_adapter_shared *sas; 932 933 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 934 eth_dev = &rte_eth_devices[dpq->port_id]; 935 936 sas = sfc_adapter_shared_by_eth_dev(eth_dev); 937 938 SFC_ASSERT(dpq->queue_id < sas->txq_count); 939 return &sas->txq_info[dpq->queue_id]; 940 } 941 942 struct sfc_txq * 943 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 944 { 945 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 946 struct rte_eth_dev *eth_dev; 947 struct sfc_adapter *sa; 948 949 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 950 eth_dev = &rte_eth_devices[dpq->port_id]; 951 952 sa = sfc_adapter_by_eth_dev(eth_dev); 953 954 SFC_ASSERT(dpq->queue_id < sfc_sa2shared(sa)->txq_count); 955 return &sa->txq_ctrl[dpq->queue_id]; 956 } 957 958 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings; 959 static int 960 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc, 961 __rte_unused struct sfc_dp_tx_hw_limits *limits, 962 unsigned int *txq_entries, 963 unsigned int *evq_entries, 964 unsigned int *txq_max_fill_level) 965 { 966 *txq_entries = nb_tx_desc; 967 *evq_entries = nb_tx_desc; 968 *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries); 969 return 0; 970 } 971 972 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 973 static int 974 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 975 const struct rte_pci_addr *pci_addr, 976 int socket_id, 977 const struct sfc_dp_tx_qcreate_info *info, 978 struct sfc_dp_txq **dp_txqp) 979 { 980 struct sfc_efx_txq *txq; 981 struct sfc_txq *ctrl_txq; 982 int rc; 983 984 rc = ENOMEM; 985 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 986 RTE_CACHE_LINE_SIZE, socket_id); 987 if (txq == NULL) 988 goto fail_txq_alloc; 989 990 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 991 992 rc = ENOMEM; 993 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 994 EFX_TXQ_LIMIT(info->txq_entries), 995 sizeof(*txq->pend_desc), 0, 996 socket_id); 997 if (txq->pend_desc == NULL) 998 goto fail_pend_desc_alloc; 999 1000 rc = ENOMEM; 1001 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 1002 info->txq_entries, 1003 sizeof(*txq->sw_ring), 1004 RTE_CACHE_LINE_SIZE, socket_id); 1005 if (txq->sw_ring == NULL) 1006 goto fail_sw_ring_alloc; 1007 1008 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 1009 if (ctrl_txq->evq->sa->tso) { 1010 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 1011 info->txq_entries, socket_id); 1012 if (rc != 0) 1013 goto fail_alloc_tsoh_objs; 1014 } 1015 1016 txq->evq = ctrl_txq->evq; 1017 txq->ptr_mask = info->txq_entries - 1; 1018 txq->max_fill_level = info->max_fill_level; 1019 txq->free_thresh = info->free_thresh; 1020 txq->dma_desc_size_max = info->dma_desc_size_max; 1021 1022 *dp_txqp = &txq->dp; 1023 return 0; 1024 1025 fail_alloc_tsoh_objs: 1026 rte_free(txq->sw_ring); 1027 1028 fail_sw_ring_alloc: 1029 rte_free(txq->pend_desc); 1030 1031 fail_pend_desc_alloc: 1032 rte_free(txq); 1033 1034 fail_txq_alloc: 1035 return rc; 1036 } 1037 1038 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 1039 static void 1040 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 1041 { 1042 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1043 1044 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 1045 rte_free(txq->sw_ring); 1046 rte_free(txq->pend_desc); 1047 rte_free(txq); 1048 } 1049 1050 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 1051 static int 1052 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 1053 __rte_unused unsigned int evq_read_ptr, 1054 unsigned int txq_desc_index) 1055 { 1056 /* libefx-based datapath is specific to libefx-based PMD */ 1057 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1058 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 1059 1060 txq->common = ctrl_txq->common; 1061 1062 txq->pending = txq->completed = txq->added = txq_desc_index; 1063 txq->hw_vlan_tci = 0; 1064 1065 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 1066 1067 return 0; 1068 } 1069 1070 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 1071 static void 1072 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 1073 __rte_unused unsigned int *evq_read_ptr) 1074 { 1075 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1076 1077 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 1078 } 1079 1080 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 1081 static void 1082 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 1083 { 1084 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1085 unsigned int txds; 1086 1087 sfc_efx_tx_reap(txq); 1088 1089 for (txds = 0; txds <= txq->ptr_mask; txds++) { 1090 if (txq->sw_ring[txds].mbuf != NULL) { 1091 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 1092 txq->sw_ring[txds].mbuf = NULL; 1093 } 1094 } 1095 1096 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1097 } 1098 1099 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1100 static int 1101 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1102 { 1103 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1104 1105 if (unlikely(offset > txq->ptr_mask)) 1106 return -EINVAL; 1107 1108 if (unlikely(offset >= txq->max_fill_level)) 1109 return RTE_ETH_TX_DESC_UNAVAIL; 1110 1111 /* 1112 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1113 * it is required for the queue to be running, but the 1114 * check is omitted because API design assumes that it 1115 * is the duty of the caller to satisfy all conditions 1116 */ 1117 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1118 SFC_EFX_TXQ_FLAG_RUNNING); 1119 sfc_ev_qpoll(txq->evq); 1120 1121 /* 1122 * Ring tail is 'txq->pending', and although descriptors 1123 * between 'txq->completed' and 'txq->pending' are still 1124 * in use by the driver, they should be reported as DONE 1125 */ 1126 if (unlikely(offset < (txq->added - txq->pending))) 1127 return RTE_ETH_TX_DESC_FULL; 1128 1129 /* 1130 * There is no separate return value for unused descriptors; 1131 * the latter will be reported as DONE because genuine DONE 1132 * descriptors will be freed anyway in SW on the next burst 1133 */ 1134 return RTE_ETH_TX_DESC_DONE; 1135 } 1136 1137 struct sfc_dp_tx sfc_efx_tx = { 1138 .dp = { 1139 .name = SFC_KVARG_DATAPATH_EFX, 1140 .type = SFC_DP_TX, 1141 .hw_fw_caps = 0, 1142 }, 1143 .features = 0, 1144 .dev_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT | 1145 DEV_TX_OFFLOAD_MULTI_SEGS, 1146 .queue_offload_capa = DEV_TX_OFFLOAD_IPV4_CKSUM | 1147 DEV_TX_OFFLOAD_UDP_CKSUM | 1148 DEV_TX_OFFLOAD_TCP_CKSUM | 1149 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | 1150 DEV_TX_OFFLOAD_TCP_TSO, 1151 .qsize_up_rings = sfc_efx_tx_qsize_up_rings, 1152 .qcreate = sfc_efx_tx_qcreate, 1153 .qdestroy = sfc_efx_tx_qdestroy, 1154 .qstart = sfc_efx_tx_qstart, 1155 .qstop = sfc_efx_tx_qstop, 1156 .qreap = sfc_efx_tx_qreap, 1157 .qdesc_status = sfc_efx_tx_qdesc_status, 1158 .pkt_prepare = sfc_efx_prepare_pkts, 1159 .pkt_burst = sfc_efx_xmit_pkts, 1160 }; 1161