1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2019-2021 Xilinx, Inc. 4 * Copyright(c) 2016-2019 Solarflare Communications Inc. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include "sfc.h" 11 #include "sfc_debug.h" 12 #include "sfc_log.h" 13 #include "sfc_ev.h" 14 #include "sfc_tx.h" 15 #include "sfc_tweak.h" 16 #include "sfc_kvargs.h" 17 18 /* 19 * Maximum number of TX queue flush attempts in case of 20 * failure or flush timeout 21 */ 22 #define SFC_TX_QFLUSH_ATTEMPTS (3) 23 24 /* 25 * Time to wait between event queue polling attempts when waiting for TX 26 * queue flush done or flush failed events 27 */ 28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1) 29 30 /* 31 * Maximum number of event queue polling attempts when waiting for TX queue 32 * flush done or flush failed events; it defines TX queue flush attempt timeout 33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS 34 */ 35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000) 36 37 static uint64_t 38 sfc_tx_get_offload_mask(struct sfc_adapter *sa) 39 { 40 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 41 uint64_t no_caps = 0; 42 43 if (!encp->enc_hw_tx_insert_vlan_enabled) 44 no_caps |= DEV_TX_OFFLOAD_VLAN_INSERT; 45 46 if (!encp->enc_tunnel_encapsulations_supported) 47 no_caps |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 48 49 if (!sa->tso) 50 no_caps |= DEV_TX_OFFLOAD_TCP_TSO; 51 52 if (!sa->tso_encap || 53 (encp->enc_tunnel_encapsulations_supported & 54 (1u << EFX_TUNNEL_PROTOCOL_VXLAN)) == 0) 55 no_caps |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO; 56 57 if (!sa->tso_encap || 58 (encp->enc_tunnel_encapsulations_supported & 59 (1u << EFX_TUNNEL_PROTOCOL_GENEVE)) == 0) 60 no_caps |= DEV_TX_OFFLOAD_GENEVE_TNL_TSO; 61 62 return ~no_caps; 63 } 64 65 uint64_t 66 sfc_tx_get_dev_offload_caps(struct sfc_adapter *sa) 67 { 68 return sa->priv.dp_tx->dev_offload_capa & sfc_tx_get_offload_mask(sa); 69 } 70 71 uint64_t 72 sfc_tx_get_queue_offload_caps(struct sfc_adapter *sa) 73 { 74 return sa->priv.dp_tx->queue_offload_capa & sfc_tx_get_offload_mask(sa); 75 } 76 77 static int 78 sfc_tx_qcheck_conf(struct sfc_adapter *sa, unsigned int txq_max_fill_level, 79 const struct rte_eth_txconf *tx_conf, 80 uint64_t offloads) 81 { 82 int rc = 0; 83 84 if (tx_conf->tx_rs_thresh != 0) { 85 sfc_err(sa, "RS bit in transmit descriptor is not supported"); 86 rc = EINVAL; 87 } 88 89 if (tx_conf->tx_free_thresh > txq_max_fill_level) { 90 sfc_err(sa, 91 "TxQ free threshold too large: %u vs maximum %u", 92 tx_conf->tx_free_thresh, txq_max_fill_level); 93 rc = EINVAL; 94 } 95 96 if (tx_conf->tx_thresh.pthresh != 0 || 97 tx_conf->tx_thresh.hthresh != 0 || 98 tx_conf->tx_thresh.wthresh != 0) { 99 sfc_warn(sa, 100 "prefetch/host/writeback thresholds are not supported"); 101 } 102 103 /* We either perform both TCP and UDP offload, or no offload at all */ 104 if (((offloads & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) != 105 ((offloads & DEV_TX_OFFLOAD_UDP_CKSUM) == 0)) { 106 sfc_err(sa, "TCP and UDP offloads can't be set independently"); 107 rc = EINVAL; 108 } 109 110 return rc; 111 } 112 113 void 114 sfc_tx_qflush_done(struct sfc_txq_info *txq_info) 115 { 116 txq_info->state |= SFC_TXQ_FLUSHED; 117 txq_info->state &= ~SFC_TXQ_FLUSHING; 118 } 119 120 int 121 sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 122 uint16_t nb_tx_desc, unsigned int socket_id, 123 const struct rte_eth_txconf *tx_conf) 124 { 125 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 126 unsigned int txq_entries; 127 unsigned int evq_entries; 128 unsigned int txq_max_fill_level; 129 struct sfc_txq_info *txq_info; 130 struct sfc_evq *evq; 131 struct sfc_txq *txq; 132 int rc = 0; 133 struct sfc_dp_tx_qcreate_info info; 134 uint64_t offloads; 135 struct sfc_dp_tx_hw_limits hw_limits; 136 137 sfc_log_init(sa, "TxQ = %u", sw_index); 138 139 memset(&hw_limits, 0, sizeof(hw_limits)); 140 hw_limits.txq_max_entries = sa->txq_max_entries; 141 hw_limits.txq_min_entries = sa->txq_min_entries; 142 143 rc = sa->priv.dp_tx->qsize_up_rings(nb_tx_desc, &hw_limits, 144 &txq_entries, &evq_entries, 145 &txq_max_fill_level); 146 if (rc != 0) 147 goto fail_size_up_rings; 148 SFC_ASSERT(txq_entries >= sa->txq_min_entries); 149 SFC_ASSERT(txq_entries <= sa->txq_max_entries); 150 SFC_ASSERT(txq_entries >= nb_tx_desc); 151 SFC_ASSERT(txq_max_fill_level <= nb_tx_desc); 152 153 offloads = tx_conf->offloads | 154 sa->eth_dev->data->dev_conf.txmode.offloads; 155 rc = sfc_tx_qcheck_conf(sa, txq_max_fill_level, tx_conf, offloads); 156 if (rc != 0) 157 goto fail_bad_conf; 158 159 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 160 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 161 162 txq_info->entries = txq_entries; 163 164 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_TX, sw_index, 165 evq_entries, socket_id, &evq); 166 if (rc != 0) 167 goto fail_ev_qinit; 168 169 txq = &sa->txq_ctrl[sw_index]; 170 txq->hw_index = sw_index; 171 txq->evq = evq; 172 txq_info->free_thresh = 173 (tx_conf->tx_free_thresh) ? tx_conf->tx_free_thresh : 174 SFC_TX_DEFAULT_FREE_THRESH; 175 txq_info->offloads = offloads; 176 177 rc = sfc_dma_alloc(sa, "txq", sw_index, 178 efx_txq_size(sa->nic, txq_info->entries), 179 socket_id, &txq->mem); 180 if (rc != 0) 181 goto fail_dma_alloc; 182 183 memset(&info, 0, sizeof(info)); 184 info.max_fill_level = txq_max_fill_level; 185 info.free_thresh = txq_info->free_thresh; 186 info.offloads = offloads; 187 info.txq_entries = txq_info->entries; 188 info.dma_desc_size_max = encp->enc_tx_dma_desc_size_max; 189 info.txq_hw_ring = txq->mem.esm_base; 190 info.evq_entries = evq_entries; 191 info.evq_hw_ring = evq->mem.esm_base; 192 info.hw_index = txq->hw_index; 193 info.mem_bar = sa->mem_bar.esb_base; 194 info.vi_window_shift = encp->enc_vi_window_shift; 195 info.tso_tcp_header_offset_limit = 196 encp->enc_tx_tso_tcp_header_offset_limit; 197 info.tso_max_nb_header_descs = 198 RTE_MIN(encp->enc_tx_tso_max_header_ndescs, 199 (uint32_t)UINT16_MAX); 200 info.tso_max_header_len = 201 RTE_MIN(encp->enc_tx_tso_max_header_length, 202 (uint32_t)UINT16_MAX); 203 info.tso_max_nb_payload_descs = 204 RTE_MIN(encp->enc_tx_tso_max_payload_ndescs, 205 (uint32_t)UINT16_MAX); 206 info.tso_max_payload_len = encp->enc_tx_tso_max_payload_length; 207 info.tso_max_nb_outgoing_frames = encp->enc_tx_tso_max_nframes; 208 209 rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, 210 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 211 socket_id, &info, &txq_info->dp); 212 if (rc != 0) 213 goto fail_dp_tx_qinit; 214 215 evq->dp_txq = txq_info->dp; 216 217 txq_info->state = SFC_TXQ_INITIALIZED; 218 219 txq_info->deferred_start = (tx_conf->tx_deferred_start != 0); 220 221 return 0; 222 223 fail_dp_tx_qinit: 224 sfc_dma_free(sa, &txq->mem); 225 226 fail_dma_alloc: 227 sfc_ev_qfini(evq); 228 229 fail_ev_qinit: 230 txq_info->entries = 0; 231 232 fail_bad_conf: 233 fail_size_up_rings: 234 sfc_log_init(sa, "failed (TxQ = %u, rc = %d)", sw_index, rc); 235 return rc; 236 } 237 238 void 239 sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 240 { 241 struct sfc_txq_info *txq_info; 242 struct sfc_txq *txq; 243 244 sfc_log_init(sa, "TxQ = %u", sw_index); 245 246 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->txq_count); 247 sa->eth_dev->data->tx_queues[sw_index] = NULL; 248 249 txq_info = &sfc_sa2shared(sa)->txq_info[sw_index]; 250 251 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 252 253 sa->priv.dp_tx->qdestroy(txq_info->dp); 254 txq_info->dp = NULL; 255 256 txq_info->state &= ~SFC_TXQ_INITIALIZED; 257 txq_info->entries = 0; 258 259 txq = &sa->txq_ctrl[sw_index]; 260 261 sfc_dma_free(sa, &txq->mem); 262 263 sfc_ev_qfini(txq->evq); 264 txq->evq = NULL; 265 } 266 267 static int 268 sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 269 { 270 sfc_log_init(sa, "TxQ = %u", sw_index); 271 272 return 0; 273 } 274 275 static int 276 sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) 277 { 278 int rc = 0; 279 280 switch (txmode->mq_mode) { 281 case ETH_MQ_TX_NONE: 282 break; 283 default: 284 sfc_err(sa, "Tx multi-queue mode %u not supported", 285 txmode->mq_mode); 286 rc = EINVAL; 287 } 288 289 /* 290 * These features are claimed to be i40e-specific, 291 * but it does make sense to double-check their absence 292 */ 293 if (txmode->hw_vlan_reject_tagged) { 294 sfc_err(sa, "Rejecting tagged packets not supported"); 295 rc = EINVAL; 296 } 297 298 if (txmode->hw_vlan_reject_untagged) { 299 sfc_err(sa, "Rejecting untagged packets not supported"); 300 rc = EINVAL; 301 } 302 303 if (txmode->hw_vlan_insert_pvid) { 304 sfc_err(sa, "Port-based VLAN insertion not supported"); 305 rc = EINVAL; 306 } 307 308 return rc; 309 } 310 311 /** 312 * Destroy excess queues that are no longer needed after reconfiguration 313 * or complete close. 314 */ 315 static void 316 sfc_tx_fini_queues(struct sfc_adapter *sa, unsigned int nb_tx_queues) 317 { 318 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 319 int sw_index; 320 321 SFC_ASSERT(nb_tx_queues <= sas->txq_count); 322 323 sw_index = sas->txq_count; 324 while (--sw_index >= (int)nb_tx_queues) { 325 if (sas->txq_info[sw_index].state & SFC_TXQ_INITIALIZED) 326 sfc_tx_qfini(sa, sw_index); 327 } 328 329 sas->txq_count = nb_tx_queues; 330 } 331 332 int 333 sfc_tx_configure(struct sfc_adapter *sa) 334 { 335 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 336 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 337 const struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 338 const unsigned int nb_tx_queues = sa->eth_dev->data->nb_tx_queues; 339 int rc = 0; 340 341 sfc_log_init(sa, "nb_tx_queues=%u (old %u)", 342 nb_tx_queues, sas->txq_count); 343 344 /* 345 * The datapath implementation assumes absence of boundary 346 * limits on Tx DMA descriptors. Addition of these checks on 347 * datapath would simply make the datapath slower. 348 */ 349 if (encp->enc_tx_dma_desc_boundary != 0) { 350 rc = ENOTSUP; 351 goto fail_tx_dma_desc_boundary; 352 } 353 354 rc = sfc_tx_check_mode(sa, &dev_conf->txmode); 355 if (rc != 0) 356 goto fail_check_mode; 357 358 if (nb_tx_queues == sas->txq_count) 359 goto done; 360 361 if (sas->txq_info == NULL) { 362 sas->txq_info = rte_calloc_socket("sfc-txqs", nb_tx_queues, 363 sizeof(sas->txq_info[0]), 0, 364 sa->socket_id); 365 if (sas->txq_info == NULL) 366 goto fail_txqs_alloc; 367 368 /* 369 * Allocate primary process only TxQ control from heap 370 * since it should not be shared. 371 */ 372 rc = ENOMEM; 373 sa->txq_ctrl = calloc(nb_tx_queues, sizeof(sa->txq_ctrl[0])); 374 if (sa->txq_ctrl == NULL) 375 goto fail_txqs_ctrl_alloc; 376 } else { 377 struct sfc_txq_info *new_txq_info; 378 struct sfc_txq *new_txq_ctrl; 379 380 if (nb_tx_queues < sas->txq_count) 381 sfc_tx_fini_queues(sa, nb_tx_queues); 382 383 new_txq_info = 384 rte_realloc(sas->txq_info, 385 nb_tx_queues * sizeof(sas->txq_info[0]), 0); 386 if (new_txq_info == NULL && nb_tx_queues > 0) 387 goto fail_txqs_realloc; 388 389 new_txq_ctrl = realloc(sa->txq_ctrl, 390 nb_tx_queues * sizeof(sa->txq_ctrl[0])); 391 if (new_txq_ctrl == NULL && nb_tx_queues > 0) 392 goto fail_txqs_ctrl_realloc; 393 394 sas->txq_info = new_txq_info; 395 sa->txq_ctrl = new_txq_ctrl; 396 if (nb_tx_queues > sas->txq_count) { 397 memset(&sas->txq_info[sas->txq_count], 0, 398 (nb_tx_queues - sas->txq_count) * 399 sizeof(sas->txq_info[0])); 400 memset(&sa->txq_ctrl[sas->txq_count], 0, 401 (nb_tx_queues - sas->txq_count) * 402 sizeof(sa->txq_ctrl[0])); 403 } 404 } 405 406 while (sas->txq_count < nb_tx_queues) { 407 rc = sfc_tx_qinit_info(sa, sas->txq_count); 408 if (rc != 0) 409 goto fail_tx_qinit_info; 410 411 sas->txq_count++; 412 } 413 414 done: 415 return 0; 416 417 fail_tx_qinit_info: 418 fail_txqs_ctrl_realloc: 419 fail_txqs_realloc: 420 fail_txqs_ctrl_alloc: 421 fail_txqs_alloc: 422 sfc_tx_close(sa); 423 424 fail_check_mode: 425 fail_tx_dma_desc_boundary: 426 sfc_log_init(sa, "failed (rc = %d)", rc); 427 return rc; 428 } 429 430 void 431 sfc_tx_close(struct sfc_adapter *sa) 432 { 433 sfc_tx_fini_queues(sa, 0); 434 435 free(sa->txq_ctrl); 436 sa->txq_ctrl = NULL; 437 438 rte_free(sfc_sa2shared(sa)->txq_info); 439 sfc_sa2shared(sa)->txq_info = NULL; 440 } 441 442 int 443 sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 444 { 445 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 446 uint64_t offloads_supported = sfc_tx_get_dev_offload_caps(sa) | 447 sfc_tx_get_queue_offload_caps(sa); 448 struct rte_eth_dev_data *dev_data; 449 struct sfc_txq_info *txq_info; 450 struct sfc_txq *txq; 451 struct sfc_evq *evq; 452 uint16_t flags = 0; 453 unsigned int desc_index; 454 int rc = 0; 455 456 sfc_log_init(sa, "TxQ = %u", sw_index); 457 458 SFC_ASSERT(sw_index < sas->txq_count); 459 txq_info = &sas->txq_info[sw_index]; 460 461 SFC_ASSERT(txq_info->state == SFC_TXQ_INITIALIZED); 462 463 txq = &sa->txq_ctrl[sw_index]; 464 evq = txq->evq; 465 466 rc = sfc_ev_qstart(evq, sfc_evq_index_by_txq_sw_index(sa, sw_index)); 467 if (rc != 0) 468 goto fail_ev_qstart; 469 470 if (txq_info->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) 471 flags |= EFX_TXQ_CKSUM_IPV4; 472 473 if (txq_info->offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 474 flags |= EFX_TXQ_CKSUM_INNER_IPV4; 475 476 if ((txq_info->offloads & DEV_TX_OFFLOAD_TCP_CKSUM) || 477 (txq_info->offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) { 478 flags |= EFX_TXQ_CKSUM_TCPUDP; 479 480 if (offloads_supported & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) 481 flags |= EFX_TXQ_CKSUM_INNER_TCPUDP; 482 } 483 484 if (txq_info->offloads & (DEV_TX_OFFLOAD_TCP_TSO | 485 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 486 DEV_TX_OFFLOAD_GENEVE_TNL_TSO)) 487 flags |= EFX_TXQ_FATSOV2; 488 489 rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem, 490 txq_info->entries, 0 /* not used on EF10 */, 491 flags, evq->common, 492 &txq->common, &desc_index); 493 if (rc != 0) { 494 if (sa->tso && (rc == ENOSPC)) 495 sfc_err(sa, "ran out of TSO contexts"); 496 497 goto fail_tx_qcreate; 498 } 499 500 efx_tx_qenable(txq->common); 501 502 txq_info->state |= SFC_TXQ_STARTED; 503 504 rc = sa->priv.dp_tx->qstart(txq_info->dp, evq->read_ptr, desc_index); 505 if (rc != 0) 506 goto fail_dp_qstart; 507 508 /* 509 * It seems to be used by DPDK for debug purposes only ('rte_ether') 510 */ 511 dev_data = sa->eth_dev->data; 512 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STARTED; 513 514 return 0; 515 516 fail_dp_qstart: 517 txq_info->state = SFC_TXQ_INITIALIZED; 518 efx_tx_qdestroy(txq->common); 519 520 fail_tx_qcreate: 521 sfc_ev_qstop(evq); 522 523 fail_ev_qstart: 524 return rc; 525 } 526 527 void 528 sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 529 { 530 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 531 struct rte_eth_dev_data *dev_data; 532 struct sfc_txq_info *txq_info; 533 struct sfc_txq *txq; 534 unsigned int retry_count; 535 unsigned int wait_count; 536 int rc; 537 538 sfc_log_init(sa, "TxQ = %u", sw_index); 539 540 SFC_ASSERT(sw_index < sas->txq_count); 541 txq_info = &sas->txq_info[sw_index]; 542 543 if (txq_info->state == SFC_TXQ_INITIALIZED) 544 return; 545 546 SFC_ASSERT(txq_info->state & SFC_TXQ_STARTED); 547 548 txq = &sa->txq_ctrl[sw_index]; 549 sa->priv.dp_tx->qstop(txq_info->dp, &txq->evq->read_ptr); 550 551 /* 552 * Retry TX queue flushing in case of flush failed or 553 * timeout; in the worst case it can delay for 6 seconds 554 */ 555 for (retry_count = 0; 556 ((txq_info->state & SFC_TXQ_FLUSHED) == 0) && 557 (retry_count < SFC_TX_QFLUSH_ATTEMPTS); 558 ++retry_count) { 559 rc = efx_tx_qflush(txq->common); 560 if (rc != 0) { 561 txq_info->state |= (rc == EALREADY) ? 562 SFC_TXQ_FLUSHED : SFC_TXQ_FLUSH_FAILED; 563 break; 564 } 565 566 /* 567 * Wait for TX queue flush done or flush failed event at least 568 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more 569 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied 570 * by SFC_TX_QFLUSH_POLL_ATTEMPTS) 571 */ 572 wait_count = 0; 573 do { 574 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS); 575 sfc_ev_qpoll(txq->evq); 576 } while ((txq_info->state & SFC_TXQ_FLUSHING) && 577 wait_count++ < SFC_TX_QFLUSH_POLL_ATTEMPTS); 578 579 if (txq_info->state & SFC_TXQ_FLUSHING) 580 sfc_err(sa, "TxQ %u flush timed out", sw_index); 581 582 if (txq_info->state & SFC_TXQ_FLUSHED) 583 sfc_notice(sa, "TxQ %u flushed", sw_index); 584 } 585 586 sa->priv.dp_tx->qreap(txq_info->dp); 587 588 txq_info->state = SFC_TXQ_INITIALIZED; 589 590 efx_tx_qdestroy(txq->common); 591 592 sfc_ev_qstop(txq->evq); 593 594 /* 595 * It seems to be used by DPDK for debug purposes only ('rte_ether') 596 */ 597 dev_data = sa->eth_dev->data; 598 dev_data->tx_queue_state[sw_index] = RTE_ETH_QUEUE_STATE_STOPPED; 599 } 600 601 int 602 sfc_tx_start(struct sfc_adapter *sa) 603 { 604 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 605 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 606 unsigned int sw_index; 607 int rc = 0; 608 609 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 610 611 if (sa->tso) { 612 if (!encp->enc_fw_assisted_tso_v2_enabled && 613 !encp->enc_tso_v3_enabled) { 614 sfc_warn(sa, "TSO support was unable to be restored"); 615 sa->tso = B_FALSE; 616 sa->tso_encap = B_FALSE; 617 } 618 } 619 620 if (sa->tso_encap && !encp->enc_fw_assisted_tso_v2_encap_enabled && 621 !encp->enc_tso_v3_enabled) { 622 sfc_warn(sa, "Encapsulated TSO support was unable to be restored"); 623 sa->tso_encap = B_FALSE; 624 } 625 626 rc = efx_tx_init(sa->nic); 627 if (rc != 0) 628 goto fail_efx_tx_init; 629 630 for (sw_index = 0; sw_index < sas->txq_count; ++sw_index) { 631 if (sas->txq_info[sw_index].state == SFC_TXQ_INITIALIZED && 632 (!(sas->txq_info[sw_index].deferred_start) || 633 sas->txq_info[sw_index].deferred_started)) { 634 rc = sfc_tx_qstart(sa, sw_index); 635 if (rc != 0) 636 goto fail_tx_qstart; 637 } 638 } 639 640 return 0; 641 642 fail_tx_qstart: 643 while (sw_index-- > 0) 644 sfc_tx_qstop(sa, sw_index); 645 646 efx_tx_fini(sa->nic); 647 648 fail_efx_tx_init: 649 sfc_log_init(sa, "failed (rc = %d)", rc); 650 return rc; 651 } 652 653 void 654 sfc_tx_stop(struct sfc_adapter *sa) 655 { 656 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 657 unsigned int sw_index; 658 659 sfc_log_init(sa, "txq_count = %u", sas->txq_count); 660 661 sw_index = sas->txq_count; 662 while (sw_index-- > 0) { 663 if (sas->txq_info[sw_index].state & SFC_TXQ_STARTED) 664 sfc_tx_qstop(sa, sw_index); 665 } 666 667 efx_tx_fini(sa->nic); 668 } 669 670 static void 671 sfc_efx_tx_reap(struct sfc_efx_txq *txq) 672 { 673 unsigned int completed; 674 675 sfc_ev_qpoll(txq->evq); 676 677 for (completed = txq->completed; 678 completed != txq->pending; completed++) { 679 struct sfc_efx_tx_sw_desc *txd; 680 681 txd = &txq->sw_ring[completed & txq->ptr_mask]; 682 683 if (txd->mbuf != NULL) { 684 rte_pktmbuf_free(txd->mbuf); 685 txd->mbuf = NULL; 686 } 687 } 688 689 txq->completed = completed; 690 } 691 692 /* 693 * The function is used to insert or update VLAN tag; 694 * the firmware has state of the firmware tag to insert per TxQ 695 * (controlled by option descriptors), hence, if the tag of the 696 * packet to be sent is different from one remembered by the firmware, 697 * the function will update it 698 */ 699 static unsigned int 700 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq *txq, struct rte_mbuf *m, 701 efx_desc_t **pend) 702 { 703 uint16_t this_tag = ((m->ol_flags & PKT_TX_VLAN_PKT) ? 704 m->vlan_tci : 0); 705 706 if (this_tag == txq->hw_vlan_tci) 707 return 0; 708 709 /* 710 * The expression inside SFC_ASSERT() is not desired to be checked in 711 * a non-debug build because it might be too expensive on the data path 712 */ 713 SFC_ASSERT(efx_nic_cfg_get(txq->evq->sa->nic)->enc_hw_tx_insert_vlan_enabled); 714 715 efx_tx_qdesc_vlantci_create(txq->common, rte_cpu_to_be_16(this_tag), 716 *pend); 717 (*pend)++; 718 txq->hw_vlan_tci = this_tag; 719 720 return 1; 721 } 722 723 static uint16_t 724 sfc_efx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 725 uint16_t nb_pkts) 726 { 727 struct sfc_dp_txq *dp_txq = tx_queue; 728 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 729 const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic); 730 uint16_t i; 731 732 for (i = 0; i < nb_pkts; i++) { 733 int ret; 734 735 /* 736 * EFX Tx datapath may require extra VLAN descriptor if VLAN 737 * insertion offload is requested regardless the offload 738 * requested/supported. 739 */ 740 ret = sfc_dp_tx_prepare_pkt(tx_pkts[i], 0, SFC_TSOH_STD_LEN, 741 encp->enc_tx_tso_tcp_header_offset_limit, 742 txq->max_fill_level, EFX_TX_FATSOV2_OPT_NDESCS, 743 1); 744 if (unlikely(ret != 0)) { 745 rte_errno = ret; 746 break; 747 } 748 } 749 750 return i; 751 } 752 753 static uint16_t 754 sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 755 { 756 struct sfc_dp_txq *dp_txq = (struct sfc_dp_txq *)tx_queue; 757 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 758 unsigned int added = txq->added; 759 unsigned int pushed = added; 760 unsigned int pkts_sent = 0; 761 efx_desc_t *pend = &txq->pend_desc[0]; 762 const unsigned int hard_max_fill = txq->max_fill_level; 763 const unsigned int soft_max_fill = hard_max_fill - txq->free_thresh; 764 unsigned int fill_level = added - txq->completed; 765 boolean_t reap_done; 766 int rc __rte_unused; 767 struct rte_mbuf **pktp; 768 769 if (unlikely((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 0)) 770 goto done; 771 772 /* 773 * If insufficient space for a single packet is present, 774 * we should reap; otherwise, we shouldn't do that all the time 775 * to avoid latency increase 776 */ 777 reap_done = (fill_level > soft_max_fill); 778 779 if (reap_done) { 780 sfc_efx_tx_reap(txq); 781 /* 782 * Recalculate fill level since 'txq->completed' 783 * might have changed on reap 784 */ 785 fill_level = added - txq->completed; 786 } 787 788 for (pkts_sent = 0, pktp = &tx_pkts[0]; 789 (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); 790 pkts_sent++, pktp++) { 791 uint16_t hw_vlan_tci_prev = txq->hw_vlan_tci; 792 struct rte_mbuf *m_seg = *pktp; 793 size_t pkt_len = m_seg->pkt_len; 794 unsigned int pkt_descs = 0; 795 size_t in_off = 0; 796 797 /* 798 * Here VLAN TCI is expected to be zero in case if no 799 * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised; 800 * if the calling app ignores the absence of 801 * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then 802 * TX_ERROR will occur 803 */ 804 pkt_descs += sfc_efx_tx_maybe_insert_tag(txq, m_seg, &pend); 805 806 if (m_seg->ol_flags & PKT_TX_TCP_SEG) { 807 /* 808 * We expect correct 'pkt->l[2, 3, 4]_len' values 809 * to be set correctly by the caller 810 */ 811 if (sfc_efx_tso_do(txq, added, &m_seg, &in_off, &pend, 812 &pkt_descs, &pkt_len) != 0) { 813 /* We may have reached this place if packet 814 * header linearization is needed but the 815 * header length is greater than 816 * SFC_TSOH_STD_LEN 817 * 818 * We will deceive RTE saying that we have sent 819 * the packet, but we will actually drop it. 820 * Hence, we should revert 'pend' to the 821 * previous state (in case we have added 822 * VLAN descriptor) and start processing 823 * another one packet. But the original 824 * mbuf shouldn't be orphaned 825 */ 826 pend -= pkt_descs; 827 txq->hw_vlan_tci = hw_vlan_tci_prev; 828 829 rte_pktmbuf_free(*pktp); 830 831 continue; 832 } 833 834 /* 835 * We've only added 2 FATSOv2 option descriptors 836 * and 1 descriptor for the linearized packet header. 837 * The outstanding work will be done in the same manner 838 * as for the usual non-TSO path 839 */ 840 } 841 842 for (; m_seg != NULL; m_seg = m_seg->next) { 843 efsys_dma_addr_t next_frag; 844 size_t seg_len; 845 846 seg_len = m_seg->data_len; 847 next_frag = rte_mbuf_data_iova(m_seg); 848 849 /* 850 * If we've started TSO transaction few steps earlier, 851 * we'll skip packet header using an offset in the 852 * current segment (which has been set to the 853 * first one containing payload) 854 */ 855 seg_len -= in_off; 856 next_frag += in_off; 857 in_off = 0; 858 859 do { 860 efsys_dma_addr_t frag_addr = next_frag; 861 size_t frag_len; 862 863 /* 864 * It is assumed here that there is no 865 * limitation on address boundary 866 * crossing by DMA descriptor. 867 */ 868 frag_len = MIN(seg_len, txq->dma_desc_size_max); 869 next_frag += frag_len; 870 seg_len -= frag_len; 871 pkt_len -= frag_len; 872 873 efx_tx_qdesc_dma_create(txq->common, 874 frag_addr, frag_len, 875 (pkt_len == 0), 876 pend++); 877 878 pkt_descs++; 879 } while (seg_len != 0); 880 } 881 882 added += pkt_descs; 883 884 fill_level += pkt_descs; 885 if (unlikely(fill_level > hard_max_fill)) { 886 /* 887 * Our estimation for maximum number of descriptors 888 * required to send a packet seems to be wrong. 889 * Try to reap (if we haven't yet). 890 */ 891 if (!reap_done) { 892 sfc_efx_tx_reap(txq); 893 reap_done = B_TRUE; 894 fill_level = added - txq->completed; 895 if (fill_level > hard_max_fill) { 896 pend -= pkt_descs; 897 txq->hw_vlan_tci = hw_vlan_tci_prev; 898 break; 899 } 900 } else { 901 pend -= pkt_descs; 902 txq->hw_vlan_tci = hw_vlan_tci_prev; 903 break; 904 } 905 } 906 907 /* Assign mbuf to the last used desc */ 908 txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; 909 } 910 911 if (likely(pkts_sent > 0)) { 912 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, 913 pend - &txq->pend_desc[0], 914 txq->completed, &txq->added); 915 SFC_ASSERT(rc == 0); 916 917 if (likely(pushed != txq->added)) 918 efx_tx_qpush(txq->common, txq->added, pushed); 919 } 920 921 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 922 if (!reap_done) 923 sfc_efx_tx_reap(txq); 924 #endif 925 926 done: 927 return pkts_sent; 928 } 929 930 const struct sfc_dp_tx * 931 sfc_dp_tx_by_dp_txq(const struct sfc_dp_txq *dp_txq) 932 { 933 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 934 struct rte_eth_dev *eth_dev; 935 struct sfc_adapter_priv *sap; 936 937 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 938 eth_dev = &rte_eth_devices[dpq->port_id]; 939 940 sap = sfc_adapter_priv_by_eth_dev(eth_dev); 941 942 return sap->dp_tx; 943 } 944 945 struct sfc_txq_info * 946 sfc_txq_info_by_dp_txq(const struct sfc_dp_txq *dp_txq) 947 { 948 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 949 struct rte_eth_dev *eth_dev; 950 struct sfc_adapter_shared *sas; 951 952 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 953 eth_dev = &rte_eth_devices[dpq->port_id]; 954 955 sas = sfc_adapter_shared_by_eth_dev(eth_dev); 956 957 SFC_ASSERT(dpq->queue_id < sas->txq_count); 958 return &sas->txq_info[dpq->queue_id]; 959 } 960 961 struct sfc_txq * 962 sfc_txq_by_dp_txq(const struct sfc_dp_txq *dp_txq) 963 { 964 const struct sfc_dp_queue *dpq = &dp_txq->dpq; 965 struct rte_eth_dev *eth_dev; 966 struct sfc_adapter *sa; 967 968 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 969 eth_dev = &rte_eth_devices[dpq->port_id]; 970 971 sa = sfc_adapter_by_eth_dev(eth_dev); 972 973 SFC_ASSERT(dpq->queue_id < sfc_sa2shared(sa)->txq_count); 974 return &sa->txq_ctrl[dpq->queue_id]; 975 } 976 977 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings; 978 static int 979 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc, 980 __rte_unused struct sfc_dp_tx_hw_limits *limits, 981 unsigned int *txq_entries, 982 unsigned int *evq_entries, 983 unsigned int *txq_max_fill_level) 984 { 985 *txq_entries = nb_tx_desc; 986 *evq_entries = nb_tx_desc; 987 *txq_max_fill_level = EFX_TXQ_LIMIT(*txq_entries); 988 return 0; 989 } 990 991 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate; 992 static int 993 sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id, 994 const struct rte_pci_addr *pci_addr, 995 int socket_id, 996 const struct sfc_dp_tx_qcreate_info *info, 997 struct sfc_dp_txq **dp_txqp) 998 { 999 struct sfc_efx_txq *txq; 1000 struct sfc_txq *ctrl_txq; 1001 int rc; 1002 1003 rc = ENOMEM; 1004 txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq), 1005 RTE_CACHE_LINE_SIZE, socket_id); 1006 if (txq == NULL) 1007 goto fail_txq_alloc; 1008 1009 sfc_dp_queue_init(&txq->dp.dpq, port_id, queue_id, pci_addr); 1010 1011 rc = ENOMEM; 1012 txq->pend_desc = rte_calloc_socket("sfc-efx-txq-pend-desc", 1013 EFX_TXQ_LIMIT(info->txq_entries), 1014 sizeof(*txq->pend_desc), 0, 1015 socket_id); 1016 if (txq->pend_desc == NULL) 1017 goto fail_pend_desc_alloc; 1018 1019 rc = ENOMEM; 1020 txq->sw_ring = rte_calloc_socket("sfc-efx-txq-sw_ring", 1021 info->txq_entries, 1022 sizeof(*txq->sw_ring), 1023 RTE_CACHE_LINE_SIZE, socket_id); 1024 if (txq->sw_ring == NULL) 1025 goto fail_sw_ring_alloc; 1026 1027 ctrl_txq = sfc_txq_by_dp_txq(&txq->dp); 1028 if (ctrl_txq->evq->sa->tso) { 1029 rc = sfc_efx_tso_alloc_tsoh_objs(txq->sw_ring, 1030 info->txq_entries, socket_id); 1031 if (rc != 0) 1032 goto fail_alloc_tsoh_objs; 1033 } 1034 1035 txq->evq = ctrl_txq->evq; 1036 txq->ptr_mask = info->txq_entries - 1; 1037 txq->max_fill_level = info->max_fill_level; 1038 txq->free_thresh = info->free_thresh; 1039 txq->dma_desc_size_max = info->dma_desc_size_max; 1040 1041 *dp_txqp = &txq->dp; 1042 return 0; 1043 1044 fail_alloc_tsoh_objs: 1045 rte_free(txq->sw_ring); 1046 1047 fail_sw_ring_alloc: 1048 rte_free(txq->pend_desc); 1049 1050 fail_pend_desc_alloc: 1051 rte_free(txq); 1052 1053 fail_txq_alloc: 1054 return rc; 1055 } 1056 1057 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy; 1058 static void 1059 sfc_efx_tx_qdestroy(struct sfc_dp_txq *dp_txq) 1060 { 1061 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1062 1063 sfc_efx_tso_free_tsoh_objs(txq->sw_ring, txq->ptr_mask + 1); 1064 rte_free(txq->sw_ring); 1065 rte_free(txq->pend_desc); 1066 rte_free(txq); 1067 } 1068 1069 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart; 1070 static int 1071 sfc_efx_tx_qstart(struct sfc_dp_txq *dp_txq, 1072 __rte_unused unsigned int evq_read_ptr, 1073 unsigned int txq_desc_index) 1074 { 1075 /* libefx-based datapath is specific to libefx-based PMD */ 1076 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1077 struct sfc_txq *ctrl_txq = sfc_txq_by_dp_txq(dp_txq); 1078 1079 txq->common = ctrl_txq->common; 1080 1081 txq->pending = txq->completed = txq->added = txq_desc_index; 1082 txq->hw_vlan_tci = 0; 1083 1084 txq->flags |= (SFC_EFX_TXQ_FLAG_STARTED | SFC_EFX_TXQ_FLAG_RUNNING); 1085 1086 return 0; 1087 } 1088 1089 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop; 1090 static void 1091 sfc_efx_tx_qstop(struct sfc_dp_txq *dp_txq, 1092 __rte_unused unsigned int *evq_read_ptr) 1093 { 1094 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1095 1096 txq->flags &= ~SFC_EFX_TXQ_FLAG_RUNNING; 1097 } 1098 1099 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap; 1100 static void 1101 sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) 1102 { 1103 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1104 unsigned int txds; 1105 1106 sfc_efx_tx_reap(txq); 1107 1108 for (txds = 0; txds <= txq->ptr_mask; txds++) { 1109 if (txq->sw_ring[txds].mbuf != NULL) { 1110 rte_pktmbuf_free(txq->sw_ring[txds].mbuf); 1111 txq->sw_ring[txds].mbuf = NULL; 1112 } 1113 } 1114 1115 txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; 1116 } 1117 1118 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; 1119 static int 1120 sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) 1121 { 1122 struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); 1123 1124 if (unlikely(offset > txq->ptr_mask)) 1125 return -EINVAL; 1126 1127 if (unlikely(offset >= txq->max_fill_level)) 1128 return RTE_ETH_TX_DESC_UNAVAIL; 1129 1130 /* 1131 * Poll EvQ to derive up-to-date 'txq->pending' figure; 1132 * it is required for the queue to be running, but the 1133 * check is omitted because API design assumes that it 1134 * is the duty of the caller to satisfy all conditions 1135 */ 1136 SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == 1137 SFC_EFX_TXQ_FLAG_RUNNING); 1138 sfc_ev_qpoll(txq->evq); 1139 1140 /* 1141 * Ring tail is 'txq->pending', and although descriptors 1142 * between 'txq->completed' and 'txq->pending' are still 1143 * in use by the driver, they should be reported as DONE 1144 */ 1145 if (unlikely(offset < (txq->added - txq->pending))) 1146 return RTE_ETH_TX_DESC_FULL; 1147 1148 /* 1149 * There is no separate return value for unused descriptors; 1150 * the latter will be reported as DONE because genuine DONE 1151 * descriptors will be freed anyway in SW on the next burst 1152 */ 1153 return RTE_ETH_TX_DESC_DONE; 1154 } 1155 1156 struct sfc_dp_tx sfc_efx_tx = { 1157 .dp = { 1158 .name = SFC_KVARG_DATAPATH_EFX, 1159 .type = SFC_DP_TX, 1160 .hw_fw_caps = SFC_DP_HW_FW_CAP_TX_EFX, 1161 }, 1162 .features = 0, 1163 .dev_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT | 1164 DEV_TX_OFFLOAD_MULTI_SEGS, 1165 .queue_offload_capa = DEV_TX_OFFLOAD_IPV4_CKSUM | 1166 DEV_TX_OFFLOAD_UDP_CKSUM | 1167 DEV_TX_OFFLOAD_TCP_CKSUM | 1168 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | 1169 DEV_TX_OFFLOAD_TCP_TSO, 1170 .qsize_up_rings = sfc_efx_tx_qsize_up_rings, 1171 .qcreate = sfc_efx_tx_qcreate, 1172 .qdestroy = sfc_efx_tx_qdestroy, 1173 .qstart = sfc_efx_tx_qstart, 1174 .qstop = sfc_efx_tx_qstop, 1175 .qreap = sfc_efx_tx_qreap, 1176 .qdesc_status = sfc_efx_tx_qdesc_status, 1177 .pkt_prepare = sfc_efx_prepare_pkts, 1178 .pkt_burst = sfc_efx_xmit_pkts, 1179 }; 1180