1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <bus_pci_driver.h> 17 #include <rte_common.h> 18 #include <rte_eal_paging.h> 19 20 #include <mlx5_common.h> 21 #include <mlx5_common_mr.h> 22 #include <mlx5_malloc.h> 23 24 #include "mlx5_defs.h" 25 #include "mlx5_utils.h" 26 #include "mlx5.h" 27 #include "mlx5_tx.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_autoconf.h" 30 #include "rte_pmd_mlx5.h" 31 #include "mlx5_flow.h" 32 33 /** 34 * Allocate TX queue elements. 35 * 36 * @param txq_ctrl 37 * Pointer to TX queue structure. 38 */ 39 void 40 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 41 { 42 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 43 unsigned int i; 44 45 for (i = 0; (i != elts_n); ++i) 46 txq_ctrl->txq.elts[i] = NULL; 47 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 48 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 49 txq_ctrl->txq.elts_head = 0; 50 txq_ctrl->txq.elts_tail = 0; 51 txq_ctrl->txq.elts_comp = 0; 52 } 53 54 /** 55 * Free TX queue elements. 56 * 57 * @param txq_ctrl 58 * Pointer to TX queue structure. 59 */ 60 void 61 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 62 { 63 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 64 const uint16_t elts_m = elts_n - 1; 65 uint16_t elts_head = txq_ctrl->txq.elts_head; 66 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 67 struct rte_mbuf *(*elts)[elts_n] = &txq_ctrl->txq.elts; 68 69 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 70 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 71 txq_ctrl->txq.elts_head = 0; 72 txq_ctrl->txq.elts_tail = 0; 73 txq_ctrl->txq.elts_comp = 0; 74 75 while (elts_tail != elts_head) { 76 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 77 78 MLX5_ASSERT(elt != NULL); 79 rte_pktmbuf_free_seg(elt); 80 #ifdef RTE_LIBRTE_MLX5_DEBUG 81 /* Poisoning. */ 82 memset(&(*elts)[elts_tail & elts_m], 83 0x77, 84 sizeof((*elts)[elts_tail & elts_m])); 85 #endif 86 ++elts_tail; 87 } 88 } 89 90 /** 91 * Returns the per-port supported offloads. 92 * 93 * @param dev 94 * Pointer to Ethernet device. 95 * 96 * @return 97 * Supported Tx offloads. 98 */ 99 uint64_t 100 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 101 { 102 struct mlx5_priv *priv = dev->data->dev_private; 103 uint64_t offloads = (RTE_ETH_TX_OFFLOAD_MULTI_SEGS | 104 RTE_ETH_TX_OFFLOAD_VLAN_INSERT); 105 struct mlx5_port_config *config = &priv->config; 106 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 107 108 if (dev_cap->hw_csum) 109 offloads |= (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | 110 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | 111 RTE_ETH_TX_OFFLOAD_TCP_CKSUM); 112 if (dev_cap->tso) 113 offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 114 if (priv->sh->config.tx_pp || 115 priv->sh->cdev->config.hca_attr.wait_on_time) 116 offloads |= RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP; 117 if (dev_cap->swp) { 118 if (dev_cap->swp & MLX5_SW_PARSING_CSUM_CAP) 119 offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; 120 if (dev_cap->swp & MLX5_SW_PARSING_TSO_CAP) 121 offloads |= (RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 122 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO); 123 } 124 if (dev_cap->tunnel_en) { 125 if (dev_cap->hw_csum) 126 offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; 127 if (dev_cap->tso) { 128 if (dev_cap->tunnel_en & 129 MLX5_TUNNELED_OFFLOADS_VXLAN_CAP) 130 offloads |= RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO; 131 if (dev_cap->tunnel_en & 132 MLX5_TUNNELED_OFFLOADS_GRE_CAP) 133 offloads |= RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO; 134 if (dev_cap->tunnel_en & 135 MLX5_TUNNELED_OFFLOADS_GENEVE_CAP) 136 offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; 137 } 138 } 139 if (!config->mprq.enabled) 140 offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 141 return offloads; 142 } 143 144 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 145 static void 146 txq_sync_cq(struct mlx5_txq_data *txq) 147 { 148 volatile struct mlx5_cqe *cqe; 149 int ret, i; 150 151 i = txq->cqe_s; 152 do { 153 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 154 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 155 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 156 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 157 /* No new CQEs in completion queue. */ 158 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 159 break; 160 } 161 } 162 ++txq->cq_ci; 163 } while (--i); 164 /* Move all CQEs to HW ownership. */ 165 for (i = 0; i < txq->cqe_s; i++) { 166 cqe = &txq->cqes[i]; 167 cqe->op_own = MLX5_CQE_INVALIDATE; 168 } 169 /* Resync CQE and WQE (WQ in reset state). */ 170 rte_io_wmb(); 171 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 172 txq->cq_pi = txq->cq_ci; 173 rte_io_wmb(); 174 } 175 176 /** 177 * Tx queue stop. Device queue goes to the idle state, 178 * all involved mbufs are freed from elts/WQ. 179 * 180 * @param dev 181 * Pointer to Ethernet device structure. 182 * @param idx 183 * Tx queue index. 184 * 185 * @return 186 * 0 on success, a negative errno value otherwise and rte_errno is set. 187 */ 188 int 189 mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 190 { 191 struct mlx5_priv *priv = dev->data->dev_private; 192 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 193 struct mlx5_txq_ctrl *txq_ctrl = 194 container_of(txq, struct mlx5_txq_ctrl, txq); 195 int ret; 196 197 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 198 /* Move QP to RESET state. */ 199 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, MLX5_TXQ_MOD_RDY2RST, 200 (uint8_t)priv->dev_port); 201 if (ret) 202 return ret; 203 /* Handle all send completions. */ 204 txq_sync_cq(txq); 205 /* Free elts stored in the SQ. */ 206 txq_free_elts(txq_ctrl); 207 /* Prevent writing new pkts to SQ by setting no free WQE.*/ 208 txq->wqe_ci = txq->wqe_s; 209 txq->wqe_pi = 0; 210 txq->elts_comp = 0; 211 /* Set the actual queue state. */ 212 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 213 return 0; 214 } 215 216 /** 217 * Tx queue stop. Device queue goes to the idle state, 218 * all involved mbufs are freed from elts/WQ. 219 * 220 * @param dev 221 * Pointer to Ethernet device structure. 222 * @param idx 223 * Tx queue index. 224 * 225 * @return 226 * 0 on success, a negative errno value otherwise and rte_errno is set. 227 */ 228 int 229 mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 230 { 231 int ret; 232 233 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 234 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 235 rte_errno = EINVAL; 236 return -EINVAL; 237 } 238 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 239 return 0; 240 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 241 ret = mlx5_mp_os_req_queue_control(dev, idx, 242 MLX5_MP_REQ_QUEUE_TX_STOP); 243 } else { 244 ret = mlx5_tx_queue_stop_primary(dev, idx); 245 } 246 return ret; 247 } 248 249 /** 250 * Rx queue start. Device queue goes to the ready state, 251 * all required mbufs are allocated and WQ is replenished. 252 * 253 * @param dev 254 * Pointer to Ethernet device structure. 255 * @param idx 256 * RX queue index. 257 * 258 * @return 259 * 0 on success, a negative errno value otherwise and rte_errno is set. 260 */ 261 int 262 mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 263 { 264 struct mlx5_priv *priv = dev->data->dev_private; 265 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 266 struct mlx5_txq_ctrl *txq_ctrl = 267 container_of(txq, struct mlx5_txq_ctrl, txq); 268 int ret; 269 270 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 271 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 272 MLX5_TXQ_MOD_RST2RDY, 273 (uint8_t)priv->dev_port); 274 if (ret) 275 return ret; 276 txq_ctrl->txq.wqe_ci = 0; 277 txq_ctrl->txq.wqe_pi = 0; 278 txq_ctrl->txq.elts_comp = 0; 279 /* Set the actual queue state. */ 280 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 281 return 0; 282 } 283 284 /** 285 * Rx queue start. Device queue goes to the ready state, 286 * all required mbufs are allocated and WQ is replenished. 287 * 288 * @param dev 289 * Pointer to Ethernet device structure. 290 * @param idx 291 * RX queue index. 292 * 293 * @return 294 * 0 on success, a negative errno value otherwise and rte_errno is set. 295 */ 296 int 297 mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 298 { 299 int ret; 300 301 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 302 DRV_LOG(ERR, "Hairpin queue can't be started"); 303 rte_errno = EINVAL; 304 return -EINVAL; 305 } 306 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 307 return 0; 308 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 309 ret = mlx5_mp_os_req_queue_control(dev, idx, 310 MLX5_MP_REQ_QUEUE_TX_START); 311 } else { 312 ret = mlx5_tx_queue_start_primary(dev, idx); 313 } 314 return ret; 315 } 316 317 /** 318 * Tx queue presetup checks. 319 * 320 * @param dev 321 * Pointer to Ethernet device structure. 322 * @param idx 323 * Tx queue index. 324 * @param desc 325 * Number of descriptors to configure in queue. 326 * 327 * @return 328 * 0 on success, a negative errno value otherwise and rte_errno is set. 329 */ 330 static int 331 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc) 332 { 333 struct mlx5_priv *priv = dev->data->dev_private; 334 335 if (*desc <= MLX5_TX_COMP_THRESH) { 336 DRV_LOG(WARNING, 337 "port %u number of descriptors requested for Tx queue" 338 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 339 " instead of %u", dev->data->port_id, idx, 340 MLX5_TX_COMP_THRESH + 1, *desc); 341 *desc = MLX5_TX_COMP_THRESH + 1; 342 } 343 if (!rte_is_power_of_2(*desc)) { 344 *desc = 1 << log2above(*desc); 345 DRV_LOG(WARNING, 346 "port %u increased number of descriptors in Tx queue" 347 " %u to the next power of two (%d)", 348 dev->data->port_id, idx, *desc); 349 } 350 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 351 dev->data->port_id, idx, *desc); 352 if (idx >= priv->txqs_n) { 353 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 354 dev->data->port_id, idx, priv->txqs_n); 355 rte_errno = EOVERFLOW; 356 return -rte_errno; 357 } 358 if (!mlx5_txq_releasable(dev, idx)) { 359 rte_errno = EBUSY; 360 DRV_LOG(ERR, "port %u unable to release queue index %u", 361 dev->data->port_id, idx); 362 return -rte_errno; 363 } 364 mlx5_txq_release(dev, idx); 365 return 0; 366 } 367 368 /** 369 * DPDK callback to configure a TX queue. 370 * 371 * @param dev 372 * Pointer to Ethernet device structure. 373 * @param idx 374 * TX queue index. 375 * @param desc 376 * Number of descriptors to configure in queue. 377 * @param socket 378 * NUMA socket on which memory must be allocated. 379 * @param[in] conf 380 * Thresholds parameters. 381 * 382 * @return 383 * 0 on success, a negative errno value otherwise and rte_errno is set. 384 */ 385 int 386 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 387 unsigned int socket, const struct rte_eth_txconf *conf) 388 { 389 struct mlx5_priv *priv = dev->data->dev_private; 390 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 391 struct mlx5_txq_ctrl *txq_ctrl = 392 container_of(txq, struct mlx5_txq_ctrl, txq); 393 int res; 394 395 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 396 if (res) 397 return res; 398 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 399 if (!txq_ctrl) { 400 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 401 dev->data->port_id, idx); 402 return -rte_errno; 403 } 404 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 405 dev->data->port_id, idx); 406 (*priv->txqs)[idx] = &txq_ctrl->txq; 407 return 0; 408 } 409 410 /** 411 * DPDK callback to configure a TX hairpin queue. 412 * 413 * @param dev 414 * Pointer to Ethernet device structure. 415 * @param idx 416 * TX queue index. 417 * @param desc 418 * Number of descriptors to configure in queue. 419 * @param[in] hairpin_conf 420 * The hairpin binding configuration. 421 * 422 * @return 423 * 0 on success, a negative errno value otherwise and rte_errno is set. 424 */ 425 int 426 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 427 uint16_t desc, 428 const struct rte_eth_hairpin_conf *hairpin_conf) 429 { 430 struct mlx5_priv *priv = dev->data->dev_private; 431 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 432 struct mlx5_txq_ctrl *txq_ctrl = 433 container_of(txq, struct mlx5_txq_ctrl, txq); 434 int res; 435 436 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 437 if (res) 438 return res; 439 if (hairpin_conf->peer_count != 1) { 440 rte_errno = EINVAL; 441 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue index %u" 442 " peer count is %u", dev->data->port_id, 443 idx, hairpin_conf->peer_count); 444 return -rte_errno; 445 } 446 if (hairpin_conf->peers[0].port == dev->data->port_id) { 447 if (hairpin_conf->peers[0].queue >= priv->rxqs_n) { 448 rte_errno = EINVAL; 449 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue" 450 " index %u, Rx %u is larger than %u", 451 dev->data->port_id, idx, 452 hairpin_conf->peers[0].queue, priv->txqs_n); 453 return -rte_errno; 454 } 455 } else { 456 if (hairpin_conf->manual_bind == 0 || 457 hairpin_conf->tx_explicit == 0) { 458 rte_errno = EINVAL; 459 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue" 460 " index %u peer port %u with attributes %u %u", 461 dev->data->port_id, idx, 462 hairpin_conf->peers[0].port, 463 hairpin_conf->manual_bind, 464 hairpin_conf->tx_explicit); 465 return -rte_errno; 466 } 467 } 468 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 469 if (!txq_ctrl) { 470 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 471 dev->data->port_id, idx); 472 return -rte_errno; 473 } 474 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 475 dev->data->port_id, idx); 476 (*priv->txqs)[idx] = &txq_ctrl->txq; 477 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN; 478 return 0; 479 } 480 481 /** 482 * DPDK callback to release a TX queue. 483 * 484 * @param dev 485 * Pointer to Ethernet device structure. 486 * @param qid 487 * Transmit queue index. 488 */ 489 void 490 mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 491 { 492 struct mlx5_txq_data *txq = dev->data->tx_queues[qid]; 493 494 if (txq == NULL) 495 return; 496 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 497 dev->data->port_id, qid); 498 mlx5_txq_release(dev, qid); 499 } 500 501 /** 502 * Remap UAR register of a Tx queue for secondary process. 503 * 504 * Remapped address is stored at the table in the process private structure of 505 * the device, indexed by queue index. 506 * 507 * @param txq_ctrl 508 * Pointer to Tx queue control structure. 509 * @param fd 510 * Verbs file descriptor to map UAR pages. 511 * 512 * @return 513 * 0 on success, a negative errno value otherwise and rte_errno is set. 514 */ 515 static int 516 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 517 { 518 struct mlx5_priv *priv = txq_ctrl->priv; 519 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 520 struct mlx5_proc_priv *primary_ppriv = priv->sh->pppriv; 521 struct mlx5_txq_data *txq = &txq_ctrl->txq; 522 void *addr; 523 uintptr_t uar_va; 524 uintptr_t offset; 525 const size_t page_size = rte_mem_page_size(); 526 if (page_size == (size_t)-1) { 527 DRV_LOG(ERR, "Failed to get mem page size"); 528 rte_errno = ENOMEM; 529 return -rte_errno; 530 } 531 532 if (txq_ctrl->is_hairpin) 533 return 0; 534 MLX5_ASSERT(ppriv); 535 /* 536 * As rdma-core, UARs are mapped in size of OS page 537 * size. Ref to libmlx5 function: mlx5_init_context() 538 */ 539 uar_va = (uintptr_t)primary_ppriv->uar_table[txq->idx].db; 540 offset = uar_va & (page_size - 1); /* Offset in page. */ 541 addr = rte_mem_map(NULL, page_size, RTE_PROT_WRITE, RTE_MAP_SHARED, 542 fd, txq_ctrl->uar_mmap_offset); 543 if (!addr) { 544 DRV_LOG(ERR, "Port %u mmap failed for BF reg of txq %u.", 545 txq->port_id, txq->idx); 546 rte_errno = ENXIO; 547 return -rte_errno; 548 } 549 addr = RTE_PTR_ADD(addr, offset); 550 ppriv->uar_table[txq->idx].db = addr; 551 #ifndef RTE_ARCH_64 552 ppriv->uar_table[txq->idx].sl_p = 553 primary_ppriv->uar_table[txq->idx].sl_p; 554 #endif 555 return 0; 556 } 557 558 /** 559 * Unmap UAR register of a Tx queue for secondary process. 560 * 561 * @param txq_ctrl 562 * Pointer to Tx queue control structure. 563 */ 564 static void 565 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 566 { 567 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 568 void *addr; 569 const size_t page_size = rte_mem_page_size(); 570 if (page_size == (size_t)-1) { 571 DRV_LOG(ERR, "Failed to get mem page size"); 572 rte_errno = ENOMEM; 573 } 574 575 if (txq_ctrl->is_hairpin) 576 return; 577 addr = ppriv->uar_table[txq_ctrl->txq.idx].db; 578 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 579 } 580 581 /** 582 * Deinitialize Tx UAR registers for secondary process. 583 * 584 * @param dev 585 * Pointer to Ethernet device. 586 */ 587 void 588 mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev) 589 { 590 struct mlx5_proc_priv *ppriv = (struct mlx5_proc_priv *) 591 dev->process_private; 592 const size_t page_size = rte_mem_page_size(); 593 void *addr; 594 unsigned int i; 595 596 if (page_size == (size_t)-1) { 597 DRV_LOG(ERR, "Failed to get mem page size"); 598 return; 599 } 600 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 601 for (i = 0; i != ppriv->uar_table_sz; ++i) { 602 if (!ppriv->uar_table[i].db) 603 continue; 604 addr = ppriv->uar_table[i].db; 605 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 606 607 } 608 } 609 610 /** 611 * Initialize Tx UAR registers for secondary process. 612 * 613 * @param dev 614 * Pointer to Ethernet device. 615 * @param fd 616 * Verbs file descriptor to map UAR pages. 617 * 618 * @return 619 * 0 on success, a negative errno value otherwise and rte_errno is set. 620 */ 621 int 622 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 623 { 624 struct mlx5_priv *priv = dev->data->dev_private; 625 struct mlx5_txq_data *txq; 626 struct mlx5_txq_ctrl *txq_ctrl; 627 unsigned int i; 628 int ret; 629 630 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 631 for (i = 0; i != priv->txqs_n; ++i) { 632 if (!(*priv->txqs)[i]) 633 continue; 634 txq = (*priv->txqs)[i]; 635 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 636 if (txq_ctrl->is_hairpin) 637 continue; 638 MLX5_ASSERT(txq->idx == (uint16_t)i); 639 ret = txq_uar_init_secondary(txq_ctrl, fd); 640 if (ret) 641 goto error; 642 } 643 return 0; 644 error: 645 /* Rollback. */ 646 do { 647 if (!(*priv->txqs)[i]) 648 continue; 649 txq = (*priv->txqs)[i]; 650 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 651 txq_uar_uninit_secondary(txq_ctrl); 652 } while (i--); 653 return -rte_errno; 654 } 655 656 /** 657 * Verify the Verbs Tx queue list is empty 658 * 659 * @param dev 660 * Pointer to Ethernet device. 661 * 662 * @return 663 * The number of object not released. 664 */ 665 int 666 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 667 { 668 struct mlx5_priv *priv = dev->data->dev_private; 669 int ret = 0; 670 struct mlx5_txq_obj *txq_obj; 671 672 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 673 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 674 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 675 ++ret; 676 } 677 return ret; 678 } 679 680 /** 681 * Calculate the total number of WQEBB for Tx queue. 682 * 683 * Simplified version of calc_sq_size() in rdma-core. 684 * 685 * @param txq_ctrl 686 * Pointer to Tx queue control structure. 687 * 688 * @return 689 * The number of WQEBB. 690 */ 691 static int 692 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 693 { 694 unsigned int wqe_size; 695 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 696 697 wqe_size = MLX5_WQE_CSEG_SIZE + 698 MLX5_WQE_ESEG_SIZE + 699 MLX5_WSEG_SIZE - 700 MLX5_ESEG_MIN_INLINE_SIZE + 701 txq_ctrl->max_inline_data; 702 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 703 } 704 705 /** 706 * Calculate the maximal inline data size for Tx queue. 707 * 708 * @param txq_ctrl 709 * Pointer to Tx queue control structure. 710 * 711 * @return 712 * The maximal inline data size. 713 */ 714 static unsigned int 715 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 716 { 717 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 718 struct mlx5_priv *priv = txq_ctrl->priv; 719 unsigned int wqe_size; 720 721 wqe_size = priv->sh->dev_cap.max_qp_wr / desc; 722 if (!wqe_size) 723 return 0; 724 /* 725 * This calculation is derived from tthe source of 726 * mlx5_calc_send_wqe() in rdma_core library. 727 */ 728 wqe_size = wqe_size * MLX5_WQE_SIZE - 729 MLX5_WQE_CSEG_SIZE - 730 MLX5_WQE_ESEG_SIZE - 731 MLX5_WSEG_SIZE - 732 MLX5_WSEG_SIZE + 733 MLX5_DSEG_MIN_INLINE_SIZE; 734 return wqe_size; 735 } 736 737 /** 738 * Set Tx queue parameters from device configuration. 739 * 740 * @param txq_ctrl 741 * Pointer to Tx queue control structure. 742 */ 743 static void 744 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 745 { 746 struct mlx5_priv *priv = txq_ctrl->priv; 747 struct mlx5_port_config *config = &priv->config; 748 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 749 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 750 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 751 unsigned int inlen_mode; /* Minimal required Inline data. */ 752 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 753 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 754 bool tso = txq_ctrl->txq.offloads & (RTE_ETH_TX_OFFLOAD_TCP_TSO | 755 RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | 756 RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | 757 RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 758 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO); 759 bool vlan_inline; 760 unsigned int temp; 761 762 txq_ctrl->txq.fast_free = 763 !!((txq_ctrl->txq.offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) && 764 !(txq_ctrl->txq.offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS) && 765 !config->mprq.enabled); 766 if (config->txqs_inline == MLX5_ARG_UNSET) 767 txqs_inline = 768 #if defined(RTE_ARCH_ARM64) 769 (priv->pci_dev && priv->pci_dev->id.device_id == 770 PCI_DEVICE_ID_MELLANOX_BLUEFIELD) ? 771 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 772 #endif 773 MLX5_INLINE_MAX_TXQS; 774 else 775 txqs_inline = (unsigned int)config->txqs_inline; 776 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 777 MLX5_SEND_DEF_INLINE_LEN : 778 (unsigned int)config->txq_inline_max; 779 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 780 MLX5_EMPW_DEF_INLINE_LEN : 781 (unsigned int)config->txq_inline_mpw; 782 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 783 0 : (unsigned int)config->txq_inline_min; 784 if (config->mps != MLX5_MPW_ENHANCED && config->mps != MLX5_MPW) 785 inlen_empw = 0; 786 /* 787 * If there is requested minimal amount of data to inline 788 * we MUST enable inlining. This is a case for ConnectX-4 789 * which usually requires L2 inlined for correct operating 790 * and ConnectX-4 Lx which requires L2-L4 inlined to 791 * support E-Switch Flows. 792 */ 793 if (inlen_mode) { 794 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 795 /* 796 * Optimize minimal inlining for single 797 * segment packets to fill one WQEBB 798 * without gaps. 799 */ 800 temp = MLX5_ESEG_MIN_INLINE_SIZE; 801 } else { 802 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 803 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 804 MLX5_ESEG_MIN_INLINE_SIZE; 805 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 806 } 807 if (temp != inlen_mode) { 808 DRV_LOG(INFO, 809 "port %u minimal required inline setting" 810 " aligned from %u to %u", 811 PORT_ID(priv), inlen_mode, temp); 812 inlen_mode = temp; 813 } 814 } 815 /* 816 * If port is configured to support VLAN insertion and device 817 * does not support this feature by HW (for NICs before ConnectX-5 818 * or in case of wqe_vlan_insert flag is not set) we must enable 819 * data inline on all queues because it is supported by single 820 * tx_burst routine. 821 */ 822 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 823 vlan_inline = (dev_txoff & RTE_ETH_TX_OFFLOAD_VLAN_INSERT) && 824 !config->hw_vlan_insert; 825 /* 826 * If there are few Tx queues it is prioritized 827 * to save CPU cycles and disable data inlining at all. 828 */ 829 if (inlen_send && priv->txqs_n >= txqs_inline) { 830 /* 831 * The data sent with ordinal MLX5_OPCODE_SEND 832 * may be inlined in Ethernet Segment, align the 833 * length accordingly to fit entire WQEBBs. 834 */ 835 temp = RTE_MAX(inlen_send, 836 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 837 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 838 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 839 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 840 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 841 MLX5_ESEG_MIN_INLINE_SIZE - 842 MLX5_WQE_CSEG_SIZE - 843 MLX5_WQE_ESEG_SIZE - 844 MLX5_WQE_DSEG_SIZE * 2); 845 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 846 temp = RTE_MAX(temp, inlen_mode); 847 if (temp != inlen_send) { 848 DRV_LOG(INFO, 849 "port %u ordinary send inline setting" 850 " aligned from %u to %u", 851 PORT_ID(priv), inlen_send, temp); 852 inlen_send = temp; 853 } 854 /* 855 * Not aligned to cache lines, but to WQEs. 856 * First bytes of data (initial alignment) 857 * is going to be copied explicitly at the 858 * beginning of inlining buffer in Ethernet 859 * Segment. 860 */ 861 MLX5_ASSERT(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 862 MLX5_ASSERT(inlen_send <= MLX5_WQE_SIZE_MAX + 863 MLX5_ESEG_MIN_INLINE_SIZE - 864 MLX5_WQE_CSEG_SIZE - 865 MLX5_WQE_ESEG_SIZE - 866 MLX5_WQE_DSEG_SIZE * 2); 867 } else if (inlen_mode) { 868 /* 869 * If minimal inlining is requested we must 870 * enable inlining in general, despite the 871 * number of configured queues. Ignore the 872 * txq_inline_max devarg, this is not 873 * full-featured inline. 874 */ 875 inlen_send = inlen_mode; 876 inlen_empw = 0; 877 } else if (vlan_inline) { 878 /* 879 * Hardware does not report offload for 880 * VLAN insertion, we must enable data inline 881 * to implement feature by software. 882 */ 883 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 884 inlen_empw = 0; 885 } else { 886 inlen_send = 0; 887 inlen_empw = 0; 888 } 889 txq_ctrl->txq.inlen_send = inlen_send; 890 txq_ctrl->txq.inlen_mode = inlen_mode; 891 txq_ctrl->txq.inlen_empw = 0; 892 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 893 /* 894 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 895 * may be inlined in Data Segment, align the 896 * length accordingly to fit entire WQEBBs. 897 */ 898 temp = RTE_MAX(inlen_empw, 899 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 900 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 901 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 902 temp += MLX5_DSEG_MIN_INLINE_SIZE; 903 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 904 MLX5_DSEG_MIN_INLINE_SIZE - 905 MLX5_WQE_CSEG_SIZE - 906 MLX5_WQE_ESEG_SIZE - 907 MLX5_WQE_DSEG_SIZE); 908 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 909 if (temp != inlen_empw) { 910 DRV_LOG(INFO, 911 "port %u enhanced empw inline setting" 912 " aligned from %u to %u", 913 PORT_ID(priv), inlen_empw, temp); 914 inlen_empw = temp; 915 } 916 MLX5_ASSERT(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 917 MLX5_ASSERT(inlen_empw <= MLX5_WQE_SIZE_MAX + 918 MLX5_DSEG_MIN_INLINE_SIZE - 919 MLX5_WQE_CSEG_SIZE - 920 MLX5_WQE_ESEG_SIZE - 921 MLX5_WQE_DSEG_SIZE); 922 txq_ctrl->txq.inlen_empw = inlen_empw; 923 } 924 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 925 if (tso) { 926 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 927 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 928 MLX5_MAX_TSO_HEADER); 929 txq_ctrl->txq.tso_en = 1; 930 } 931 if (((RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO & txq_ctrl->txq.offloads) && 932 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_VXLAN_CAP)) | 933 ((RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO & txq_ctrl->txq.offloads) && 934 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_GRE_CAP)) | 935 ((RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO & txq_ctrl->txq.offloads) && 936 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_GENEVE_CAP)) | 937 (dev_cap->swp & MLX5_SW_PARSING_TSO_CAP)) 938 txq_ctrl->txq.tunnel_en = 1; 939 txq_ctrl->txq.swp_en = (((RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 940 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO) & 941 txq_ctrl->txq.offloads) && (dev_cap->swp & 942 MLX5_SW_PARSING_TSO_CAP)) | 943 ((RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM & 944 txq_ctrl->txq.offloads) && (dev_cap->swp & 945 MLX5_SW_PARSING_CSUM_CAP)); 946 } 947 948 /** 949 * Adjust Tx queue data inline parameters for large queue sizes. 950 * The data inline feature requires multiple WQEs to fit the packets, 951 * and if the large amount of Tx descriptors is requested by application 952 * the total WQE amount may exceed the hardware capabilities. If the 953 * default inline setting are used we can try to adjust these ones and 954 * meet the hardware requirements and not exceed the queue size. 955 * 956 * @param txq_ctrl 957 * Pointer to Tx queue control structure. 958 * 959 * @return 960 * Zero on success, otherwise the parameters can not be adjusted. 961 */ 962 static int 963 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 964 { 965 struct mlx5_priv *priv = txq_ctrl->priv; 966 struct mlx5_port_config *config = &priv->config; 967 unsigned int max_inline; 968 969 max_inline = txq_calc_inline_max(txq_ctrl); 970 if (!txq_ctrl->txq.inlen_send) { 971 /* 972 * Inline data feature is not engaged at all. 973 * There is nothing to adjust. 974 */ 975 return 0; 976 } 977 if (txq_ctrl->max_inline_data <= max_inline) { 978 /* 979 * The requested inline data length does not 980 * exceed queue capabilities. 981 */ 982 return 0; 983 } 984 if (txq_ctrl->txq.inlen_mode > max_inline) { 985 DRV_LOG(ERR, 986 "minimal data inline requirements (%u) are not" 987 " satisfied (%u) on port %u, try the smaller" 988 " Tx queue size (%d)", 989 txq_ctrl->txq.inlen_mode, max_inline, 990 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 991 goto error; 992 } 993 if (txq_ctrl->txq.inlen_send > max_inline && 994 config->txq_inline_max != MLX5_ARG_UNSET && 995 config->txq_inline_max > (int)max_inline) { 996 DRV_LOG(ERR, 997 "txq_inline_max requirements (%u) are not" 998 " satisfied (%u) on port %u, try the smaller" 999 " Tx queue size (%d)", 1000 txq_ctrl->txq.inlen_send, max_inline, 1001 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1002 goto error; 1003 } 1004 if (txq_ctrl->txq.inlen_empw > max_inline && 1005 config->txq_inline_mpw != MLX5_ARG_UNSET && 1006 config->txq_inline_mpw > (int)max_inline) { 1007 DRV_LOG(ERR, 1008 "txq_inline_mpw requirements (%u) are not" 1009 " satisfied (%u) on port %u, try the smaller" 1010 " Tx queue size (%d)", 1011 txq_ctrl->txq.inlen_empw, max_inline, 1012 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1013 goto error; 1014 } 1015 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1016 DRV_LOG(ERR, 1017 "tso header inline requirements (%u) are not" 1018 " satisfied (%u) on port %u, try the smaller" 1019 " Tx queue size (%d)", 1020 MLX5_MAX_TSO_HEADER, max_inline, 1021 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1022 goto error; 1023 } 1024 if (txq_ctrl->txq.inlen_send > max_inline) { 1025 DRV_LOG(WARNING, 1026 "adjust txq_inline_max (%u->%u)" 1027 " due to large Tx queue on port %u", 1028 txq_ctrl->txq.inlen_send, max_inline, 1029 priv->dev_data->port_id); 1030 txq_ctrl->txq.inlen_send = max_inline; 1031 } 1032 if (txq_ctrl->txq.inlen_empw > max_inline) { 1033 DRV_LOG(WARNING, 1034 "adjust txq_inline_mpw (%u->%u)" 1035 "due to large Tx queue on port %u", 1036 txq_ctrl->txq.inlen_empw, max_inline, 1037 priv->dev_data->port_id); 1038 txq_ctrl->txq.inlen_empw = max_inline; 1039 } 1040 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1041 txq_ctrl->txq.inlen_empw); 1042 MLX5_ASSERT(txq_ctrl->max_inline_data <= max_inline); 1043 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= max_inline); 1044 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1045 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw || 1046 !txq_ctrl->txq.inlen_empw); 1047 return 0; 1048 error: 1049 rte_errno = ENOMEM; 1050 return -ENOMEM; 1051 } 1052 1053 /** 1054 * Create a DPDK Tx queue. 1055 * 1056 * @param dev 1057 * Pointer to Ethernet device. 1058 * @param idx 1059 * TX queue index. 1060 * @param desc 1061 * Number of descriptors to configure in queue. 1062 * @param socket 1063 * NUMA socket on which memory must be allocated. 1064 * @param[in] conf 1065 * Thresholds parameters. 1066 * 1067 * @return 1068 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1069 */ 1070 struct mlx5_txq_ctrl * 1071 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1072 unsigned int socket, const struct rte_eth_txconf *conf) 1073 { 1074 struct mlx5_priv *priv = dev->data->dev_private; 1075 struct mlx5_txq_ctrl *tmpl; 1076 1077 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) + 1078 desc * sizeof(struct rte_mbuf *), 0, socket); 1079 if (!tmpl) { 1080 rte_errno = ENOMEM; 1081 return NULL; 1082 } 1083 if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl, 1084 &priv->sh->cdev->mr_scache.dev_gen, socket)) { 1085 /* rte_errno is already set. */ 1086 goto error; 1087 } 1088 MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH); 1089 tmpl->txq.offloads = conf->offloads | 1090 dev->data->dev_conf.txmode.offloads; 1091 tmpl->priv = priv; 1092 tmpl->socket = socket; 1093 tmpl->txq.elts_n = log2above(desc); 1094 tmpl->txq.elts_s = desc; 1095 tmpl->txq.elts_m = desc - 1; 1096 tmpl->txq.port_id = dev->data->port_id; 1097 tmpl->txq.idx = idx; 1098 txq_set_params(tmpl); 1099 if (txq_adjust_params(tmpl)) 1100 goto error; 1101 if (txq_calc_wqebb_cnt(tmpl) > 1102 priv->sh->dev_cap.max_qp_wr) { 1103 DRV_LOG(ERR, 1104 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1105 " try smaller queue size", 1106 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1107 priv->sh->dev_cap.max_qp_wr); 1108 rte_errno = ENOMEM; 1109 goto error; 1110 } 1111 __atomic_fetch_add(&tmpl->refcnt, 1, __ATOMIC_RELAXED); 1112 tmpl->is_hairpin = false; 1113 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1114 return tmpl; 1115 error: 1116 mlx5_mr_btree_free(&tmpl->txq.mr_ctrl.cache_bh); 1117 mlx5_free(tmpl); 1118 return NULL; 1119 } 1120 1121 /** 1122 * Create a DPDK Tx hairpin queue. 1123 * 1124 * @param dev 1125 * Pointer to Ethernet device. 1126 * @param idx 1127 * TX queue index. 1128 * @param desc 1129 * Number of descriptors to configure in queue. 1130 * @param hairpin_conf 1131 * The hairpin configuration. 1132 * 1133 * @return 1134 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1135 */ 1136 struct mlx5_txq_ctrl * 1137 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1138 const struct rte_eth_hairpin_conf *hairpin_conf) 1139 { 1140 struct mlx5_priv *priv = dev->data->dev_private; 1141 struct mlx5_txq_ctrl *tmpl; 1142 1143 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1144 SOCKET_ID_ANY); 1145 if (!tmpl) { 1146 rte_errno = ENOMEM; 1147 return NULL; 1148 } 1149 tmpl->priv = priv; 1150 tmpl->socket = SOCKET_ID_ANY; 1151 tmpl->txq.elts_n = log2above(desc); 1152 tmpl->txq.port_id = dev->data->port_id; 1153 tmpl->txq.idx = idx; 1154 tmpl->hairpin_conf = *hairpin_conf; 1155 tmpl->is_hairpin = true; 1156 __atomic_fetch_add(&tmpl->refcnt, 1, __ATOMIC_RELAXED); 1157 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1158 return tmpl; 1159 } 1160 1161 /** 1162 * Get a Tx queue. 1163 * 1164 * @param dev 1165 * Pointer to Ethernet device. 1166 * @param idx 1167 * TX queue index. 1168 * 1169 * @return 1170 * A pointer to the queue if it exists. 1171 */ 1172 struct mlx5_txq_ctrl * 1173 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1174 { 1175 struct mlx5_priv *priv = dev->data->dev_private; 1176 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 1177 struct mlx5_txq_ctrl *ctrl = NULL; 1178 1179 if (txq_data) { 1180 ctrl = container_of(txq_data, struct mlx5_txq_ctrl, txq); 1181 __atomic_fetch_add(&ctrl->refcnt, 1, __ATOMIC_RELAXED); 1182 } 1183 return ctrl; 1184 } 1185 1186 /** 1187 * Release a Tx queue. 1188 * 1189 * @param dev 1190 * Pointer to Ethernet device. 1191 * @param idx 1192 * TX queue index. 1193 * 1194 * @return 1195 * 1 while a reference on it exists, 0 when freed. 1196 */ 1197 int 1198 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1199 { 1200 struct mlx5_priv *priv = dev->data->dev_private; 1201 struct mlx5_txq_ctrl *txq_ctrl; 1202 1203 if (priv->txqs == NULL || (*priv->txqs)[idx] == NULL) 1204 return 0; 1205 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1206 if (__atomic_fetch_sub(&txq_ctrl->refcnt, 1, __ATOMIC_RELAXED) - 1 > 1) 1207 return 1; 1208 if (txq_ctrl->obj) { 1209 priv->obj_ops.txq_obj_release(txq_ctrl->obj); 1210 LIST_REMOVE(txq_ctrl->obj, next); 1211 mlx5_free(txq_ctrl->obj); 1212 txq_ctrl->obj = NULL; 1213 } 1214 if (!txq_ctrl->is_hairpin) { 1215 if (txq_ctrl->txq.fcqs) { 1216 mlx5_free(txq_ctrl->txq.fcqs); 1217 txq_ctrl->txq.fcqs = NULL; 1218 } 1219 txq_free_elts(txq_ctrl); 1220 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 1221 } 1222 if (!__atomic_load_n(&txq_ctrl->refcnt, __ATOMIC_RELAXED)) { 1223 if (!txq_ctrl->is_hairpin) 1224 mlx5_mr_btree_free(&txq_ctrl->txq.mr_ctrl.cache_bh); 1225 LIST_REMOVE(txq_ctrl, next); 1226 mlx5_free(txq_ctrl); 1227 (*priv->txqs)[idx] = NULL; 1228 } 1229 return 0; 1230 } 1231 1232 /** 1233 * Verify if the queue can be released. 1234 * 1235 * @param dev 1236 * Pointer to Ethernet device. 1237 * @param idx 1238 * TX queue index. 1239 * 1240 * @return 1241 * 1 if the queue can be released. 1242 */ 1243 int 1244 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1245 { 1246 struct mlx5_priv *priv = dev->data->dev_private; 1247 struct mlx5_txq_ctrl *txq; 1248 1249 if (!(*priv->txqs)[idx]) 1250 return -1; 1251 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1252 return (__atomic_load_n(&txq->refcnt, __ATOMIC_RELAXED) == 1); 1253 } 1254 1255 /** 1256 * Verify the Tx Queue list is empty 1257 * 1258 * @param dev 1259 * Pointer to Ethernet device. 1260 * 1261 * @return 1262 * The number of object not released. 1263 */ 1264 int 1265 mlx5_txq_verify(struct rte_eth_dev *dev) 1266 { 1267 struct mlx5_priv *priv = dev->data->dev_private; 1268 struct mlx5_txq_ctrl *txq_ctrl; 1269 int ret = 0; 1270 1271 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 1272 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 1273 dev->data->port_id, txq_ctrl->txq.idx); 1274 ++ret; 1275 } 1276 return ret; 1277 } 1278 1279 int 1280 mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq) 1281 { 1282 return txq->is_hairpin ? txq->obj->sq->id : txq->obj->sq_obj.sq->id; 1283 } 1284 1285 int 1286 rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num) 1287 { 1288 struct rte_eth_dev *dev; 1289 struct mlx5_priv *priv; 1290 uint32_t flow; 1291 1292 if (rte_eth_dev_is_valid_port(port_id) < 0) { 1293 DRV_LOG(ERR, "There is no Ethernet device for port %u.", 1294 port_id); 1295 rte_errno = ENODEV; 1296 return -rte_errno; 1297 } 1298 dev = &rte_eth_devices[port_id]; 1299 priv = dev->data->dev_private; 1300 if ((!priv->representor && !priv->master) || 1301 !priv->sh->config.dv_esw_en) { 1302 DRV_LOG(ERR, "Port %u must be represetnor or master port in E-Switch mode.", 1303 port_id); 1304 rte_errno = EINVAL; 1305 return -rte_errno; 1306 } 1307 if (sq_num == 0) { 1308 DRV_LOG(ERR, "Invalid SQ number."); 1309 rte_errno = EINVAL; 1310 return -rte_errno; 1311 } 1312 #ifdef HAVE_MLX5_HWS_SUPPORT 1313 if (priv->sh->config.dv_flow_en == 2) { 1314 if (mlx5_flow_hw_esw_create_sq_miss_flow(dev, sq_num, true)) 1315 return -rte_errno; 1316 if (priv->sh->config.repr_matching && 1317 mlx5_flow_hw_tx_repr_matching_flow(dev, sq_num, true)) { 1318 mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num); 1319 return -rte_errno; 1320 } 1321 return 0; 1322 } 1323 #endif 1324 flow = mlx5_flow_create_devx_sq_miss_flow(dev, sq_num); 1325 if (flow > 0) 1326 return 0; 1327 DRV_LOG(ERR, "Port %u failed to create default miss flow for SQ %u.", 1328 port_id, sq_num); 1329 return -rte_errno; 1330 } 1331 1332 /** 1333 * Set the Tx queue dynamic timestamp (mask and offset) 1334 * 1335 * @param[in] dev 1336 * Pointer to the Ethernet device structure. 1337 */ 1338 void 1339 mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev) 1340 { 1341 struct mlx5_priv *priv = dev->data->dev_private; 1342 struct mlx5_dev_ctx_shared *sh = priv->sh; 1343 struct mlx5_txq_data *data; 1344 int off, nbit; 1345 unsigned int i; 1346 uint64_t mask = 0; 1347 uint64_t ts_mask; 1348 1349 if (sh->dev_cap.rt_timestamp || 1350 !sh->cdev->config.hca_attr.dev_freq_khz) 1351 ts_mask = MLX5_TS_MASK_SECS << 32; 1352 else 1353 ts_mask = rte_align64pow2(MLX5_TS_MASK_SECS * 1000ull * 1354 sh->cdev->config.hca_attr.dev_freq_khz); 1355 ts_mask = rte_cpu_to_be_64(ts_mask - 1ull); 1356 nbit = rte_mbuf_dynflag_lookup 1357 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1358 off = rte_mbuf_dynfield_lookup 1359 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); 1360 if (nbit >= 0 && off >= 0 && 1361 (sh->txpp.refcnt || priv->sh->cdev->config.hca_attr.wait_on_time)) 1362 mask = 1ULL << nbit; 1363 for (i = 0; i != priv->txqs_n; ++i) { 1364 data = (*priv->txqs)[i]; 1365 if (!data) 1366 continue; 1367 data->sh = sh; 1368 data->ts_mask = mask; 1369 data->ts_offset = off; 1370 data->rt_timestamp = sh->dev_cap.rt_timestamp; 1371 data->rt_timemask = (data->offloads & 1372 RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) ? 1373 ts_mask : 0; 1374 } 1375 } 1376 1377 int mlx5_count_aggr_ports(struct rte_eth_dev *dev) 1378 { 1379 struct mlx5_priv *priv = dev->data->dev_private; 1380 1381 return priv->sh->bond.n_port; 1382 } 1383 1384 int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1385 uint8_t affinity) 1386 { 1387 struct mlx5_txq_ctrl *txq_ctrl; 1388 struct mlx5_txq_data *txq; 1389 struct mlx5_priv *priv; 1390 1391 priv = dev->data->dev_private; 1392 if (!mlx5_devx_obj_ops_en(priv->sh)) { 1393 DRV_LOG(ERR, "Tx affinity mapping isn't supported by Verbs API."); 1394 rte_errno = ENOTSUP; 1395 return -rte_errno; 1396 } 1397 txq = (*priv->txqs)[tx_queue_id]; 1398 if (!txq) 1399 return -1; 1400 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 1401 if (tx_queue_id >= priv->txqs_n) { 1402 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 1403 dev->data->port_id, tx_queue_id, priv->txqs_n); 1404 rte_errno = EOVERFLOW; 1405 return -rte_errno; 1406 } 1407 if (affinity > priv->num_lag_ports) { 1408 DRV_LOG(ERR, "port %u unable to setup Tx queue index %u" 1409 " affinity is %u exceeds the maximum %u", dev->data->port_id, 1410 tx_queue_id, affinity, priv->num_lag_ports); 1411 rte_errno = EINVAL; 1412 return -rte_errno; 1413 } 1414 DRV_LOG(DEBUG, "port %u configuring queue %u for aggregated affinity %u", 1415 dev->data->port_id, tx_queue_id, affinity); 1416 txq_ctrl->txq.tx_aggr_affinity = affinity; 1417 return 0; 1418 } 1419