1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <bus_pci_driver.h> 17 #include <rte_common.h> 18 #include <rte_eal_paging.h> 19 20 #include <mlx5_common.h> 21 #include <mlx5_common_mr.h> 22 #include <mlx5_malloc.h> 23 24 #include "mlx5_defs.h" 25 #include "mlx5_utils.h" 26 #include "mlx5.h" 27 #include "mlx5_tx.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_autoconf.h" 30 #include "mlx5_devx.h" 31 #include "rte_pmd_mlx5.h" 32 #include "mlx5_flow.h" 33 34 /** 35 * Allocate TX queue elements. 36 * 37 * @param txq_ctrl 38 * Pointer to TX queue structure. 39 */ 40 void 41 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 42 { 43 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 44 unsigned int i; 45 46 for (i = 0; (i != elts_n); ++i) 47 txq_ctrl->txq.elts[i] = NULL; 48 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 49 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 50 txq_ctrl->txq.elts_head = 0; 51 txq_ctrl->txq.elts_tail = 0; 52 txq_ctrl->txq.elts_comp = 0; 53 } 54 55 /** 56 * Free TX queue elements. 57 * 58 * @param txq_ctrl 59 * Pointer to TX queue structure. 60 */ 61 void 62 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 63 { 64 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 65 const uint16_t elts_m = elts_n - 1; 66 uint16_t elts_head = txq_ctrl->txq.elts_head; 67 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 68 struct rte_mbuf *(*elts)[] = &txq_ctrl->txq.elts; 69 70 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 71 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 72 txq_ctrl->txq.elts_head = 0; 73 txq_ctrl->txq.elts_tail = 0; 74 txq_ctrl->txq.elts_comp = 0; 75 76 while (elts_tail != elts_head) { 77 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 78 79 MLX5_ASSERT(elt != NULL); 80 rte_pktmbuf_free_seg(elt); 81 #ifdef RTE_LIBRTE_MLX5_DEBUG 82 /* Poisoning. */ 83 memset(&(*elts)[elts_tail & elts_m], 84 0x77, 85 sizeof((*elts)[elts_tail & elts_m])); 86 #endif 87 ++elts_tail; 88 } 89 } 90 91 /** 92 * Returns the per-port supported offloads. 93 * 94 * @param dev 95 * Pointer to Ethernet device. 96 * 97 * @return 98 * Supported Tx offloads. 99 */ 100 uint64_t 101 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 102 { 103 struct mlx5_priv *priv = dev->data->dev_private; 104 uint64_t offloads = (RTE_ETH_TX_OFFLOAD_MULTI_SEGS | 105 RTE_ETH_TX_OFFLOAD_VLAN_INSERT); 106 struct mlx5_port_config *config = &priv->config; 107 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 108 109 if (dev_cap->hw_csum) 110 offloads |= (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | 111 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | 112 RTE_ETH_TX_OFFLOAD_TCP_CKSUM); 113 if (dev_cap->tso) 114 offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 115 if (priv->sh->config.tx_pp || 116 priv->sh->cdev->config.hca_attr.wait_on_time) 117 offloads |= RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP; 118 if (dev_cap->swp) { 119 if (dev_cap->swp & MLX5_SW_PARSING_CSUM_CAP) 120 offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; 121 if (dev_cap->swp & MLX5_SW_PARSING_TSO_CAP) 122 offloads |= (RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 123 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO); 124 } 125 if (dev_cap->tunnel_en) { 126 if (dev_cap->hw_csum) 127 offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; 128 if (dev_cap->tso) { 129 if (dev_cap->tunnel_en & 130 MLX5_TUNNELED_OFFLOADS_VXLAN_CAP) 131 offloads |= RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO; 132 if (dev_cap->tunnel_en & 133 MLX5_TUNNELED_OFFLOADS_GRE_CAP) 134 offloads |= RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO; 135 if (dev_cap->tunnel_en & 136 MLX5_TUNNELED_OFFLOADS_GENEVE_CAP) 137 offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; 138 } 139 } 140 if (!config->mprq.enabled) 141 offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 142 return offloads; 143 } 144 145 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 146 static void 147 txq_sync_cq(struct mlx5_txq_data *txq) 148 { 149 volatile struct mlx5_cqe *cqe; 150 int ret, i; 151 152 i = txq->cqe_s; 153 do { 154 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 155 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 156 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 157 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 158 /* No new CQEs in completion queue. */ 159 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 160 break; 161 } 162 } 163 ++txq->cq_ci; 164 } while (--i); 165 /* Move all CQEs to HW ownership. */ 166 for (i = 0; i < txq->cqe_s; i++) { 167 cqe = &txq->cqes[i]; 168 cqe->op_own = MLX5_CQE_INVALIDATE; 169 } 170 /* Resync CQE and WQE (WQ in reset state). */ 171 rte_io_wmb(); 172 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 173 txq->cq_pi = txq->cq_ci; 174 rte_io_wmb(); 175 } 176 177 /** 178 * Tx queue stop. Device queue goes to the idle state, 179 * all involved mbufs are freed from elts/WQ. 180 * 181 * @param dev 182 * Pointer to Ethernet device structure. 183 * @param idx 184 * Tx queue index. 185 * 186 * @return 187 * 0 on success, a negative errno value otherwise and rte_errno is set. 188 */ 189 int 190 mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 191 { 192 struct mlx5_priv *priv = dev->data->dev_private; 193 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 194 struct mlx5_txq_ctrl *txq_ctrl = 195 container_of(txq, struct mlx5_txq_ctrl, txq); 196 int ret; 197 198 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 199 /* Move QP to RESET state. */ 200 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, MLX5_TXQ_MOD_RDY2RST, 201 (uint8_t)priv->dev_port); 202 if (ret) 203 return ret; 204 /* Handle all send completions. */ 205 txq_sync_cq(txq); 206 /* Free elts stored in the SQ. */ 207 txq_free_elts(txq_ctrl); 208 /* Prevent writing new pkts to SQ by setting no free WQE.*/ 209 txq->wqe_ci = txq->wqe_s; 210 txq->wqe_pi = 0; 211 txq->elts_comp = 0; 212 /* Set the actual queue state. */ 213 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 214 return 0; 215 } 216 217 /** 218 * Tx queue stop. Device queue goes to the idle state, 219 * all involved mbufs are freed from elts/WQ. 220 * 221 * @param dev 222 * Pointer to Ethernet device structure. 223 * @param idx 224 * Tx queue index. 225 * 226 * @return 227 * 0 on success, a negative errno value otherwise and rte_errno is set. 228 */ 229 int 230 mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 231 { 232 int ret; 233 234 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 235 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 236 rte_errno = EINVAL; 237 return -EINVAL; 238 } 239 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 240 return 0; 241 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 242 ret = mlx5_mp_os_req_queue_control(dev, idx, 243 MLX5_MP_REQ_QUEUE_TX_STOP); 244 } else { 245 ret = mlx5_tx_queue_stop_primary(dev, idx); 246 } 247 return ret; 248 } 249 250 /** 251 * Rx queue start. Device queue goes to the ready state, 252 * all required mbufs are allocated and WQ is replenished. 253 * 254 * @param dev 255 * Pointer to Ethernet device structure. 256 * @param idx 257 * RX queue index. 258 * 259 * @return 260 * 0 on success, a negative errno value otherwise and rte_errno is set. 261 */ 262 int 263 mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 264 { 265 struct mlx5_priv *priv = dev->data->dev_private; 266 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 267 struct mlx5_txq_ctrl *txq_ctrl = 268 container_of(txq, struct mlx5_txq_ctrl, txq); 269 int ret; 270 271 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 272 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 273 MLX5_TXQ_MOD_RST2RDY, 274 (uint8_t)priv->dev_port); 275 if (ret) 276 return ret; 277 txq_ctrl->txq.wqe_ci = 0; 278 txq_ctrl->txq.wqe_pi = 0; 279 txq_ctrl->txq.elts_comp = 0; 280 /* Set the actual queue state. */ 281 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 282 return 0; 283 } 284 285 /** 286 * Rx queue start. Device queue goes to the ready state, 287 * all required mbufs are allocated and WQ is replenished. 288 * 289 * @param dev 290 * Pointer to Ethernet device structure. 291 * @param idx 292 * RX queue index. 293 * 294 * @return 295 * 0 on success, a negative errno value otherwise and rte_errno is set. 296 */ 297 int 298 mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 299 { 300 int ret; 301 302 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 303 DRV_LOG(ERR, "Hairpin queue can't be started"); 304 rte_errno = EINVAL; 305 return -EINVAL; 306 } 307 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 308 return 0; 309 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 310 ret = mlx5_mp_os_req_queue_control(dev, idx, 311 MLX5_MP_REQ_QUEUE_TX_START); 312 } else { 313 ret = mlx5_tx_queue_start_primary(dev, idx); 314 } 315 return ret; 316 } 317 318 /** 319 * Tx queue presetup checks. 320 * 321 * @param dev 322 * Pointer to Ethernet device structure. 323 * @param idx 324 * Tx queue index. 325 * @param desc 326 * Number of descriptors to configure in queue. 327 * 328 * @return 329 * 0 on success, a negative errno value otherwise and rte_errno is set. 330 */ 331 static int 332 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc) 333 { 334 struct mlx5_priv *priv = dev->data->dev_private; 335 336 if (*desc <= MLX5_TX_COMP_THRESH) { 337 DRV_LOG(WARNING, 338 "port %u number of descriptors requested for Tx queue" 339 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 340 " instead of %u", dev->data->port_id, idx, 341 MLX5_TX_COMP_THRESH + 1, *desc); 342 *desc = MLX5_TX_COMP_THRESH + 1; 343 } 344 if (!rte_is_power_of_2(*desc)) { 345 *desc = 1 << log2above(*desc); 346 DRV_LOG(WARNING, 347 "port %u increased number of descriptors in Tx queue" 348 " %u to the next power of two (%d)", 349 dev->data->port_id, idx, *desc); 350 } 351 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 352 dev->data->port_id, idx, *desc); 353 if (idx >= priv->txqs_n) { 354 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 355 dev->data->port_id, idx, priv->txqs_n); 356 rte_errno = EOVERFLOW; 357 return -rte_errno; 358 } 359 if (!mlx5_txq_releasable(dev, idx)) { 360 rte_errno = EBUSY; 361 DRV_LOG(ERR, "port %u unable to release queue index %u", 362 dev->data->port_id, idx); 363 return -rte_errno; 364 } 365 mlx5_txq_release(dev, idx); 366 return 0; 367 } 368 369 /** 370 * DPDK callback to configure a TX queue. 371 * 372 * @param dev 373 * Pointer to Ethernet device structure. 374 * @param idx 375 * TX queue index. 376 * @param desc 377 * Number of descriptors to configure in queue. 378 * @param socket 379 * NUMA socket on which memory must be allocated. 380 * @param[in] conf 381 * Thresholds parameters. 382 * 383 * @return 384 * 0 on success, a negative errno value otherwise and rte_errno is set. 385 */ 386 int 387 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 388 unsigned int socket, const struct rte_eth_txconf *conf) 389 { 390 struct mlx5_priv *priv = dev->data->dev_private; 391 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 392 struct mlx5_txq_ctrl *txq_ctrl = 393 container_of(txq, struct mlx5_txq_ctrl, txq); 394 int res; 395 396 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 397 if (res) 398 return res; 399 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 400 if (!txq_ctrl) { 401 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 402 dev->data->port_id, idx); 403 return -rte_errno; 404 } 405 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 406 dev->data->port_id, idx); 407 (*priv->txqs)[idx] = &txq_ctrl->txq; 408 return 0; 409 } 410 411 /** 412 * DPDK callback to configure a TX hairpin queue. 413 * 414 * @param dev 415 * Pointer to Ethernet device structure. 416 * @param idx 417 * TX queue index. 418 * @param desc 419 * Number of descriptors to configure in queue. 420 * @param[in] hairpin_conf 421 * The hairpin binding configuration. 422 * 423 * @return 424 * 0 on success, a negative errno value otherwise and rte_errno is set. 425 */ 426 int 427 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 428 uint16_t desc, 429 const struct rte_eth_hairpin_conf *hairpin_conf) 430 { 431 struct mlx5_priv *priv = dev->data->dev_private; 432 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 433 struct mlx5_txq_ctrl *txq_ctrl = 434 container_of(txq, struct mlx5_txq_ctrl, txq); 435 int res; 436 437 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 438 if (res) 439 return res; 440 if (hairpin_conf->peer_count != 1) { 441 rte_errno = EINVAL; 442 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue index %u" 443 " peer count is %u", dev->data->port_id, 444 idx, hairpin_conf->peer_count); 445 return -rte_errno; 446 } 447 if (hairpin_conf->peers[0].port == dev->data->port_id) { 448 if (hairpin_conf->peers[0].queue >= priv->rxqs_n) { 449 rte_errno = EINVAL; 450 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue" 451 " index %u, Rx %u is larger than %u", 452 dev->data->port_id, idx, 453 hairpin_conf->peers[0].queue, priv->txqs_n); 454 return -rte_errno; 455 } 456 } else { 457 if (hairpin_conf->manual_bind == 0 || 458 hairpin_conf->tx_explicit == 0) { 459 rte_errno = EINVAL; 460 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue" 461 " index %u peer port %u with attributes %u %u", 462 dev->data->port_id, idx, 463 hairpin_conf->peers[0].port, 464 hairpin_conf->manual_bind, 465 hairpin_conf->tx_explicit); 466 return -rte_errno; 467 } 468 } 469 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 470 if (!txq_ctrl) { 471 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 472 dev->data->port_id, idx); 473 return -rte_errno; 474 } 475 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 476 dev->data->port_id, idx); 477 (*priv->txqs)[idx] = &txq_ctrl->txq; 478 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN; 479 return 0; 480 } 481 482 /** 483 * DPDK callback to release a TX queue. 484 * 485 * @param dev 486 * Pointer to Ethernet device structure. 487 * @param qid 488 * Transmit queue index. 489 */ 490 void 491 mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 492 { 493 struct mlx5_txq_data *txq = dev->data->tx_queues[qid]; 494 495 if (txq == NULL) 496 return; 497 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 498 dev->data->port_id, qid); 499 mlx5_txq_release(dev, qid); 500 } 501 502 /** 503 * Remap UAR register of a Tx queue for secondary process. 504 * 505 * Remapped address is stored at the table in the process private structure of 506 * the device, indexed by queue index. 507 * 508 * @param txq_ctrl 509 * Pointer to Tx queue control structure. 510 * @param fd 511 * Verbs file descriptor to map UAR pages. 512 * 513 * @return 514 * 0 on success, a negative errno value otherwise and rte_errno is set. 515 */ 516 static int 517 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 518 { 519 struct mlx5_priv *priv = txq_ctrl->priv; 520 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 521 struct mlx5_proc_priv *primary_ppriv = priv->sh->pppriv; 522 struct mlx5_txq_data *txq = &txq_ctrl->txq; 523 void *addr; 524 uintptr_t uar_va; 525 uintptr_t offset; 526 const size_t page_size = rte_mem_page_size(); 527 if (page_size == (size_t)-1) { 528 DRV_LOG(ERR, "Failed to get mem page size"); 529 rte_errno = ENOMEM; 530 return -rte_errno; 531 } 532 533 if (txq_ctrl->is_hairpin) 534 return 0; 535 MLX5_ASSERT(ppriv); 536 /* 537 * As rdma-core, UARs are mapped in size of OS page 538 * size. Ref to libmlx5 function: mlx5_init_context() 539 */ 540 uar_va = (uintptr_t)primary_ppriv->uar_table[txq->idx].db; 541 offset = uar_va & (page_size - 1); /* Offset in page. */ 542 addr = rte_mem_map(NULL, page_size, RTE_PROT_WRITE, RTE_MAP_SHARED, 543 fd, txq_ctrl->uar_mmap_offset); 544 if (!addr) { 545 DRV_LOG(ERR, "Port %u mmap failed for BF reg of txq %u.", 546 txq->port_id, txq->idx); 547 rte_errno = ENXIO; 548 return -rte_errno; 549 } 550 addr = RTE_PTR_ADD(addr, offset); 551 ppriv->uar_table[txq->idx].db = addr; 552 #ifndef RTE_ARCH_64 553 ppriv->uar_table[txq->idx].sl_p = 554 primary_ppriv->uar_table[txq->idx].sl_p; 555 #endif 556 return 0; 557 } 558 559 /** 560 * Unmap UAR register of a Tx queue for secondary process. 561 * 562 * @param txq_ctrl 563 * Pointer to Tx queue control structure. 564 */ 565 static void 566 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 567 { 568 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 569 void *addr; 570 const size_t page_size = rte_mem_page_size(); 571 if (page_size == (size_t)-1) { 572 DRV_LOG(ERR, "Failed to get mem page size"); 573 rte_errno = ENOMEM; 574 } 575 576 if (txq_ctrl->is_hairpin) 577 return; 578 addr = ppriv->uar_table[txq_ctrl->txq.idx].db; 579 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 580 } 581 582 /** 583 * Deinitialize Tx UAR registers for secondary process. 584 * 585 * @param dev 586 * Pointer to Ethernet device. 587 */ 588 void 589 mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev) 590 { 591 struct mlx5_proc_priv *ppriv = (struct mlx5_proc_priv *) 592 dev->process_private; 593 const size_t page_size = rte_mem_page_size(); 594 void *addr; 595 unsigned int i; 596 597 if (page_size == (size_t)-1) { 598 DRV_LOG(ERR, "Failed to get mem page size"); 599 return; 600 } 601 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 602 for (i = 0; i != ppriv->uar_table_sz; ++i) { 603 if (!ppriv->uar_table[i].db) 604 continue; 605 addr = ppriv->uar_table[i].db; 606 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 607 608 } 609 } 610 611 /** 612 * Initialize Tx UAR registers for secondary process. 613 * 614 * @param dev 615 * Pointer to Ethernet device. 616 * @param fd 617 * Verbs file descriptor to map UAR pages. 618 * 619 * @return 620 * 0 on success, a negative errno value otherwise and rte_errno is set. 621 */ 622 int 623 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 624 { 625 struct mlx5_priv *priv = dev->data->dev_private; 626 struct mlx5_txq_data *txq; 627 struct mlx5_txq_ctrl *txq_ctrl; 628 unsigned int i; 629 int ret; 630 631 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 632 for (i = 0; i != priv->txqs_n; ++i) { 633 if (!(*priv->txqs)[i]) 634 continue; 635 txq = (*priv->txqs)[i]; 636 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 637 if (txq_ctrl->is_hairpin) 638 continue; 639 MLX5_ASSERT(txq->idx == (uint16_t)i); 640 ret = txq_uar_init_secondary(txq_ctrl, fd); 641 if (ret) 642 goto error; 643 } 644 return 0; 645 error: 646 /* Rollback. */ 647 do { 648 if (!(*priv->txqs)[i]) 649 continue; 650 txq = (*priv->txqs)[i]; 651 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 652 txq_uar_uninit_secondary(txq_ctrl); 653 } while (i--); 654 return -rte_errno; 655 } 656 657 /** 658 * Verify the Verbs Tx queue list is empty 659 * 660 * @param dev 661 * Pointer to Ethernet device. 662 * 663 * @return 664 * The number of object not released. 665 */ 666 int 667 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 668 { 669 struct mlx5_priv *priv = dev->data->dev_private; 670 int ret = 0; 671 struct mlx5_txq_obj *txq_obj; 672 673 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 674 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 675 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 676 ++ret; 677 } 678 return ret; 679 } 680 681 /** 682 * Calculate the total number of WQEBB for Tx queue. 683 * 684 * Simplified version of calc_sq_size() in rdma-core. 685 * 686 * @param txq_ctrl 687 * Pointer to Tx queue control structure. 688 * 689 * @return 690 * The number of WQEBB. 691 */ 692 static int 693 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 694 { 695 unsigned int wqe_size; 696 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 697 698 wqe_size = MLX5_WQE_CSEG_SIZE + 699 MLX5_WQE_ESEG_SIZE + 700 MLX5_WSEG_SIZE - 701 MLX5_ESEG_MIN_INLINE_SIZE + 702 txq_ctrl->max_inline_data; 703 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 704 } 705 706 /** 707 * Calculate the maximal inline data size for Tx queue. 708 * 709 * @param txq_ctrl 710 * Pointer to Tx queue control structure. 711 * 712 * @return 713 * The maximal inline data size. 714 */ 715 static unsigned int 716 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 717 { 718 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 719 struct mlx5_priv *priv = txq_ctrl->priv; 720 unsigned int wqe_size; 721 722 wqe_size = priv->sh->dev_cap.max_qp_wr / desc; 723 if (!wqe_size) 724 return 0; 725 /* 726 * This calculation is derived from tthe source of 727 * mlx5_calc_send_wqe() in rdma_core library. 728 */ 729 wqe_size = wqe_size * MLX5_WQE_SIZE - 730 MLX5_WQE_CSEG_SIZE - 731 MLX5_WQE_ESEG_SIZE - 732 MLX5_WSEG_SIZE - 733 MLX5_WSEG_SIZE + 734 MLX5_DSEG_MIN_INLINE_SIZE; 735 return wqe_size; 736 } 737 738 /** 739 * Set Tx queue parameters from device configuration. 740 * 741 * @param txq_ctrl 742 * Pointer to Tx queue control structure. 743 */ 744 static void 745 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 746 { 747 struct mlx5_priv *priv = txq_ctrl->priv; 748 struct mlx5_port_config *config = &priv->config; 749 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 750 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 751 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 752 unsigned int inlen_mode; /* Minimal required Inline data. */ 753 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 754 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 755 bool tso = txq_ctrl->txq.offloads & (RTE_ETH_TX_OFFLOAD_TCP_TSO | 756 RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | 757 RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | 758 RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 759 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO); 760 bool vlan_inline; 761 unsigned int temp; 762 763 txq_ctrl->txq.fast_free = 764 !!((txq_ctrl->txq.offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) && 765 !(txq_ctrl->txq.offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS) && 766 !config->mprq.enabled); 767 if (config->txqs_inline == MLX5_ARG_UNSET) 768 txqs_inline = 769 #if defined(RTE_ARCH_ARM64) 770 (priv->pci_dev && priv->pci_dev->id.device_id == 771 PCI_DEVICE_ID_MELLANOX_BLUEFIELD) ? 772 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 773 #endif 774 MLX5_INLINE_MAX_TXQS; 775 else 776 txqs_inline = (unsigned int)config->txqs_inline; 777 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 778 MLX5_SEND_DEF_INLINE_LEN : 779 (unsigned int)config->txq_inline_max; 780 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 781 MLX5_EMPW_DEF_INLINE_LEN : 782 (unsigned int)config->txq_inline_mpw; 783 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 784 0 : (unsigned int)config->txq_inline_min; 785 if (config->mps != MLX5_MPW_ENHANCED && config->mps != MLX5_MPW) 786 inlen_empw = 0; 787 /* 788 * If there is requested minimal amount of data to inline 789 * we MUST enable inlining. This is a case for ConnectX-4 790 * which usually requires L2 inlined for correct operating 791 * and ConnectX-4 Lx which requires L2-L4 inlined to 792 * support E-Switch Flows. 793 */ 794 if (inlen_mode) { 795 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 796 /* 797 * Optimize minimal inlining for single 798 * segment packets to fill one WQEBB 799 * without gaps. 800 */ 801 temp = MLX5_ESEG_MIN_INLINE_SIZE; 802 } else { 803 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 804 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 805 MLX5_ESEG_MIN_INLINE_SIZE; 806 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 807 } 808 if (temp != inlen_mode) { 809 DRV_LOG(INFO, 810 "port %u minimal required inline setting" 811 " aligned from %u to %u", 812 PORT_ID(priv), inlen_mode, temp); 813 inlen_mode = temp; 814 } 815 } 816 /* 817 * If port is configured to support VLAN insertion and device 818 * does not support this feature by HW (for NICs before ConnectX-5 819 * or in case of wqe_vlan_insert flag is not set) we must enable 820 * data inline on all queues because it is supported by single 821 * tx_burst routine. 822 */ 823 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 824 vlan_inline = (dev_txoff & RTE_ETH_TX_OFFLOAD_VLAN_INSERT) && 825 !config->hw_vlan_insert; 826 /* 827 * If there are few Tx queues it is prioritized 828 * to save CPU cycles and disable data inlining at all. 829 */ 830 if (inlen_send && priv->txqs_n >= txqs_inline) { 831 /* 832 * The data sent with ordinal MLX5_OPCODE_SEND 833 * may be inlined in Ethernet Segment, align the 834 * length accordingly to fit entire WQEBBs. 835 */ 836 temp = RTE_MAX(inlen_send, 837 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 838 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 839 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 840 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 841 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 842 MLX5_ESEG_MIN_INLINE_SIZE - 843 MLX5_WQE_CSEG_SIZE - 844 MLX5_WQE_ESEG_SIZE - 845 MLX5_WQE_DSEG_SIZE * 2); 846 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 847 temp = RTE_MAX(temp, inlen_mode); 848 if (temp != inlen_send) { 849 DRV_LOG(INFO, 850 "port %u ordinary send inline setting" 851 " aligned from %u to %u", 852 PORT_ID(priv), inlen_send, temp); 853 inlen_send = temp; 854 } 855 /* 856 * Not aligned to cache lines, but to WQEs. 857 * First bytes of data (initial alignment) 858 * is going to be copied explicitly at the 859 * beginning of inlining buffer in Ethernet 860 * Segment. 861 */ 862 MLX5_ASSERT(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 863 MLX5_ASSERT(inlen_send <= MLX5_WQE_SIZE_MAX + 864 MLX5_ESEG_MIN_INLINE_SIZE - 865 MLX5_WQE_CSEG_SIZE - 866 MLX5_WQE_ESEG_SIZE - 867 MLX5_WQE_DSEG_SIZE * 2); 868 } else if (inlen_mode) { 869 /* 870 * If minimal inlining is requested we must 871 * enable inlining in general, despite the 872 * number of configured queues. Ignore the 873 * txq_inline_max devarg, this is not 874 * full-featured inline. 875 */ 876 inlen_send = inlen_mode; 877 inlen_empw = 0; 878 } else if (vlan_inline) { 879 /* 880 * Hardware does not report offload for 881 * VLAN insertion, we must enable data inline 882 * to implement feature by software. 883 */ 884 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 885 inlen_empw = 0; 886 } else { 887 inlen_send = 0; 888 inlen_empw = 0; 889 } 890 txq_ctrl->txq.inlen_send = inlen_send; 891 txq_ctrl->txq.inlen_mode = inlen_mode; 892 txq_ctrl->txq.inlen_empw = 0; 893 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 894 /* 895 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 896 * may be inlined in Data Segment, align the 897 * length accordingly to fit entire WQEBBs. 898 */ 899 temp = RTE_MAX(inlen_empw, 900 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 901 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 902 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 903 temp += MLX5_DSEG_MIN_INLINE_SIZE; 904 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 905 MLX5_DSEG_MIN_INLINE_SIZE - 906 MLX5_WQE_CSEG_SIZE - 907 MLX5_WQE_ESEG_SIZE - 908 MLX5_WQE_DSEG_SIZE); 909 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 910 if (temp != inlen_empw) { 911 DRV_LOG(INFO, 912 "port %u enhanced empw inline setting" 913 " aligned from %u to %u", 914 PORT_ID(priv), inlen_empw, temp); 915 inlen_empw = temp; 916 } 917 MLX5_ASSERT(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 918 MLX5_ASSERT(inlen_empw <= MLX5_WQE_SIZE_MAX + 919 MLX5_DSEG_MIN_INLINE_SIZE - 920 MLX5_WQE_CSEG_SIZE - 921 MLX5_WQE_ESEG_SIZE - 922 MLX5_WQE_DSEG_SIZE); 923 txq_ctrl->txq.inlen_empw = inlen_empw; 924 } 925 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 926 if (tso) { 927 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 928 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 929 MLX5_MAX_TSO_HEADER); 930 txq_ctrl->txq.tso_en = 1; 931 } 932 if (((RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO & txq_ctrl->txq.offloads) && 933 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_VXLAN_CAP)) | 934 ((RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO & txq_ctrl->txq.offloads) && 935 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_GRE_CAP)) | 936 ((RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO & txq_ctrl->txq.offloads) && 937 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_GENEVE_CAP)) | 938 (dev_cap->swp & MLX5_SW_PARSING_TSO_CAP)) 939 txq_ctrl->txq.tunnel_en = 1; 940 txq_ctrl->txq.swp_en = (((RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 941 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO) & 942 txq_ctrl->txq.offloads) && (dev_cap->swp & 943 MLX5_SW_PARSING_TSO_CAP)) | 944 ((RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM & 945 txq_ctrl->txq.offloads) && (dev_cap->swp & 946 MLX5_SW_PARSING_CSUM_CAP)); 947 } 948 949 /** 950 * Adjust Tx queue data inline parameters for large queue sizes. 951 * The data inline feature requires multiple WQEs to fit the packets, 952 * and if the large amount of Tx descriptors is requested by application 953 * the total WQE amount may exceed the hardware capabilities. If the 954 * default inline setting are used we can try to adjust these ones and 955 * meet the hardware requirements and not exceed the queue size. 956 * 957 * @param txq_ctrl 958 * Pointer to Tx queue control structure. 959 * 960 * @return 961 * Zero on success, otherwise the parameters can not be adjusted. 962 */ 963 static int 964 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 965 { 966 struct mlx5_priv *priv = txq_ctrl->priv; 967 struct mlx5_port_config *config = &priv->config; 968 unsigned int max_inline; 969 970 max_inline = txq_calc_inline_max(txq_ctrl); 971 if (!txq_ctrl->txq.inlen_send) { 972 /* 973 * Inline data feature is not engaged at all. 974 * There is nothing to adjust. 975 */ 976 return 0; 977 } 978 if (txq_ctrl->max_inline_data <= max_inline) { 979 /* 980 * The requested inline data length does not 981 * exceed queue capabilities. 982 */ 983 return 0; 984 } 985 if (txq_ctrl->txq.inlen_mode > max_inline) { 986 DRV_LOG(ERR, 987 "minimal data inline requirements (%u) are not" 988 " satisfied (%u) on port %u, try the smaller" 989 " Tx queue size (%d)", 990 txq_ctrl->txq.inlen_mode, max_inline, 991 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 992 goto error; 993 } 994 if (txq_ctrl->txq.inlen_send > max_inline && 995 config->txq_inline_max != MLX5_ARG_UNSET && 996 config->txq_inline_max > (int)max_inline) { 997 DRV_LOG(ERR, 998 "txq_inline_max requirements (%u) are not" 999 " satisfied (%u) on port %u, try the smaller" 1000 " Tx queue size (%d)", 1001 txq_ctrl->txq.inlen_send, max_inline, 1002 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1003 goto error; 1004 } 1005 if (txq_ctrl->txq.inlen_empw > max_inline && 1006 config->txq_inline_mpw != MLX5_ARG_UNSET && 1007 config->txq_inline_mpw > (int)max_inline) { 1008 DRV_LOG(ERR, 1009 "txq_inline_mpw requirements (%u) are not" 1010 " satisfied (%u) on port %u, try the smaller" 1011 " Tx queue size (%d)", 1012 txq_ctrl->txq.inlen_empw, max_inline, 1013 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1014 goto error; 1015 } 1016 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1017 DRV_LOG(ERR, 1018 "tso header inline requirements (%u) are not" 1019 " satisfied (%u) on port %u, try the smaller" 1020 " Tx queue size (%d)", 1021 MLX5_MAX_TSO_HEADER, max_inline, 1022 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1023 goto error; 1024 } 1025 if (txq_ctrl->txq.inlen_send > max_inline) { 1026 DRV_LOG(WARNING, 1027 "adjust txq_inline_max (%u->%u)" 1028 " due to large Tx queue on port %u", 1029 txq_ctrl->txq.inlen_send, max_inline, 1030 priv->dev_data->port_id); 1031 txq_ctrl->txq.inlen_send = max_inline; 1032 } 1033 if (txq_ctrl->txq.inlen_empw > max_inline) { 1034 DRV_LOG(WARNING, 1035 "adjust txq_inline_mpw (%u->%u)" 1036 "due to large Tx queue on port %u", 1037 txq_ctrl->txq.inlen_empw, max_inline, 1038 priv->dev_data->port_id); 1039 txq_ctrl->txq.inlen_empw = max_inline; 1040 } 1041 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1042 txq_ctrl->txq.inlen_empw); 1043 MLX5_ASSERT(txq_ctrl->max_inline_data <= max_inline); 1044 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= max_inline); 1045 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1046 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw || 1047 !txq_ctrl->txq.inlen_empw); 1048 return 0; 1049 error: 1050 rte_errno = ENOMEM; 1051 return -ENOMEM; 1052 } 1053 1054 /** 1055 * Create a DPDK Tx queue. 1056 * 1057 * @param dev 1058 * Pointer to Ethernet device. 1059 * @param idx 1060 * TX queue index. 1061 * @param desc 1062 * Number of descriptors to configure in queue. 1063 * @param socket 1064 * NUMA socket on which memory must be allocated. 1065 * @param[in] conf 1066 * Thresholds parameters. 1067 * 1068 * @return 1069 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1070 */ 1071 struct mlx5_txq_ctrl * 1072 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1073 unsigned int socket, const struct rte_eth_txconf *conf) 1074 { 1075 struct mlx5_priv *priv = dev->data->dev_private; 1076 struct mlx5_txq_ctrl *tmpl; 1077 1078 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) + 1079 desc * sizeof(struct rte_mbuf *), 0, socket); 1080 if (!tmpl) { 1081 rte_errno = ENOMEM; 1082 return NULL; 1083 } 1084 if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl, 1085 &priv->sh->cdev->mr_scache.dev_gen, socket)) { 1086 /* rte_errno is already set. */ 1087 goto error; 1088 } 1089 MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH); 1090 tmpl->txq.offloads = conf->offloads | 1091 dev->data->dev_conf.txmode.offloads; 1092 tmpl->priv = priv; 1093 tmpl->socket = socket; 1094 tmpl->txq.elts_n = log2above(desc); 1095 tmpl->txq.elts_s = desc; 1096 tmpl->txq.elts_m = desc - 1; 1097 tmpl->txq.port_id = dev->data->port_id; 1098 tmpl->txq.idx = idx; 1099 txq_set_params(tmpl); 1100 if (txq_adjust_params(tmpl)) 1101 goto error; 1102 if (txq_calc_wqebb_cnt(tmpl) > 1103 priv->sh->dev_cap.max_qp_wr) { 1104 DRV_LOG(ERR, 1105 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1106 " try smaller queue size", 1107 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1108 priv->sh->dev_cap.max_qp_wr); 1109 rte_errno = ENOMEM; 1110 goto error; 1111 } 1112 rte_atomic_fetch_add_explicit(&tmpl->refcnt, 1, rte_memory_order_relaxed); 1113 tmpl->is_hairpin = false; 1114 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1115 return tmpl; 1116 error: 1117 mlx5_mr_btree_free(&tmpl->txq.mr_ctrl.cache_bh); 1118 mlx5_free(tmpl); 1119 return NULL; 1120 } 1121 1122 /** 1123 * Create a DPDK Tx hairpin queue. 1124 * 1125 * @param dev 1126 * Pointer to Ethernet device. 1127 * @param idx 1128 * TX queue index. 1129 * @param desc 1130 * Number of descriptors to configure in queue. 1131 * @param hairpin_conf 1132 * The hairpin configuration. 1133 * 1134 * @return 1135 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1136 */ 1137 struct mlx5_txq_ctrl * 1138 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1139 const struct rte_eth_hairpin_conf *hairpin_conf) 1140 { 1141 struct mlx5_priv *priv = dev->data->dev_private; 1142 struct mlx5_txq_ctrl *tmpl; 1143 1144 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1145 SOCKET_ID_ANY); 1146 if (!tmpl) { 1147 rte_errno = ENOMEM; 1148 return NULL; 1149 } 1150 tmpl->priv = priv; 1151 tmpl->socket = SOCKET_ID_ANY; 1152 tmpl->txq.elts_n = log2above(desc); 1153 tmpl->txq.port_id = dev->data->port_id; 1154 tmpl->txq.idx = idx; 1155 tmpl->hairpin_conf = *hairpin_conf; 1156 tmpl->is_hairpin = true; 1157 rte_atomic_fetch_add_explicit(&tmpl->refcnt, 1, rte_memory_order_relaxed); 1158 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1159 return tmpl; 1160 } 1161 1162 /** 1163 * Get a Tx queue. 1164 * 1165 * @param dev 1166 * Pointer to Ethernet device. 1167 * @param idx 1168 * TX queue index. 1169 * 1170 * @return 1171 * A pointer to the queue if it exists. 1172 */ 1173 struct mlx5_txq_ctrl * 1174 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1175 { 1176 struct mlx5_priv *priv = dev->data->dev_private; 1177 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 1178 struct mlx5_txq_ctrl *ctrl = NULL; 1179 1180 if (txq_data) { 1181 ctrl = container_of(txq_data, struct mlx5_txq_ctrl, txq); 1182 rte_atomic_fetch_add_explicit(&ctrl->refcnt, 1, rte_memory_order_relaxed); 1183 } 1184 return ctrl; 1185 } 1186 1187 /** 1188 * Get an external Tx queue. 1189 * 1190 * @param dev 1191 * Pointer to Ethernet device. 1192 * @param idx 1193 * External Tx queue index. 1194 * 1195 * @return 1196 * A pointer to the queue if it exists, NULL otherwise. 1197 */ 1198 struct mlx5_external_q * 1199 mlx5_ext_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1200 { 1201 struct mlx5_priv *priv = dev->data->dev_private; 1202 1203 MLX5_ASSERT(mlx5_is_external_txq(dev, idx)); 1204 return &priv->ext_txqs[idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN]; 1205 } 1206 1207 /** 1208 * Verify the external Tx Queue list is empty. 1209 * 1210 * @param dev 1211 * Pointer to Ethernet device. 1212 * 1213 * @return 1214 * The number of object not released. 1215 */ 1216 int 1217 mlx5_ext_txq_verify(struct rte_eth_dev *dev) 1218 { 1219 struct mlx5_priv *priv = dev->data->dev_private; 1220 struct mlx5_external_q *txq; 1221 uint32_t i; 1222 int ret = 0; 1223 1224 if (priv->ext_txqs == NULL) 1225 return 0; 1226 1227 for (i = MLX5_EXTERNAL_TX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { 1228 txq = mlx5_ext_txq_get(dev, i); 1229 if (txq->refcnt < 2) 1230 continue; 1231 DRV_LOG(DEBUG, "Port %u external TxQ %u still referenced.", 1232 dev->data->port_id, i); 1233 ++ret; 1234 } 1235 return ret; 1236 } 1237 1238 /** 1239 * Release a Tx queue. 1240 * 1241 * @param dev 1242 * Pointer to Ethernet device. 1243 * @param idx 1244 * TX queue index. 1245 * 1246 * @return 1247 * 1 while a reference on it exists, 0 when freed. 1248 */ 1249 int 1250 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1251 { 1252 struct mlx5_priv *priv = dev->data->dev_private; 1253 struct mlx5_txq_ctrl *txq_ctrl; 1254 1255 if (priv->txqs == NULL || (*priv->txqs)[idx] == NULL) 1256 return 0; 1257 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1258 if (rte_atomic_fetch_sub_explicit(&txq_ctrl->refcnt, 1, rte_memory_order_relaxed) - 1 > 1) 1259 return 1; 1260 if (txq_ctrl->obj) { 1261 priv->obj_ops.txq_obj_release(txq_ctrl->obj); 1262 LIST_REMOVE(txq_ctrl->obj, next); 1263 mlx5_free(txq_ctrl->obj); 1264 txq_ctrl->obj = NULL; 1265 } 1266 if (!txq_ctrl->is_hairpin) { 1267 if (txq_ctrl->txq.fcqs) { 1268 mlx5_free(txq_ctrl->txq.fcqs); 1269 txq_ctrl->txq.fcqs = NULL; 1270 } 1271 txq_free_elts(txq_ctrl); 1272 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 1273 } 1274 if (!rte_atomic_load_explicit(&txq_ctrl->refcnt, rte_memory_order_relaxed)) { 1275 if (!txq_ctrl->is_hairpin) 1276 mlx5_mr_btree_free(&txq_ctrl->txq.mr_ctrl.cache_bh); 1277 LIST_REMOVE(txq_ctrl, next); 1278 mlx5_free(txq_ctrl); 1279 (*priv->txqs)[idx] = NULL; 1280 } 1281 return 0; 1282 } 1283 1284 /** 1285 * Verify if the queue can be released. 1286 * 1287 * @param dev 1288 * Pointer to Ethernet device. 1289 * @param idx 1290 * TX queue index. 1291 * 1292 * @return 1293 * 1 if the queue can be released. 1294 */ 1295 int 1296 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1297 { 1298 struct mlx5_priv *priv = dev->data->dev_private; 1299 struct mlx5_txq_ctrl *txq; 1300 1301 if (!(*priv->txqs)[idx]) 1302 return -1; 1303 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1304 return (rte_atomic_load_explicit(&txq->refcnt, rte_memory_order_relaxed) == 1); 1305 } 1306 1307 /** 1308 * Verify the Tx Queue list is empty 1309 * 1310 * @param dev 1311 * Pointer to Ethernet device. 1312 * 1313 * @return 1314 * The number of object not released. 1315 */ 1316 int 1317 mlx5_txq_verify(struct rte_eth_dev *dev) 1318 { 1319 struct mlx5_priv *priv = dev->data->dev_private; 1320 struct mlx5_txq_ctrl *txq_ctrl; 1321 int ret = 0; 1322 1323 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 1324 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 1325 dev->data->port_id, txq_ctrl->txq.idx); 1326 ++ret; 1327 } 1328 return ret; 1329 } 1330 1331 int 1332 mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq) 1333 { 1334 return txq->is_hairpin ? txq->obj->sq->id : txq->obj->sq_obj.sq->id; 1335 } 1336 1337 int 1338 rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num) 1339 { 1340 struct rte_eth_dev *dev; 1341 struct mlx5_priv *priv; 1342 uint32_t flow; 1343 1344 if (rte_eth_dev_is_valid_port(port_id) < 0) { 1345 DRV_LOG(ERR, "There is no Ethernet device for port %u.", 1346 port_id); 1347 rte_errno = ENODEV; 1348 return -rte_errno; 1349 } 1350 dev = &rte_eth_devices[port_id]; 1351 priv = dev->data->dev_private; 1352 if ((!priv->representor && !priv->master) || 1353 !priv->sh->config.dv_esw_en) { 1354 DRV_LOG(ERR, "Port %u must be represetnor or master port in E-Switch mode.", 1355 port_id); 1356 rte_errno = EINVAL; 1357 return -rte_errno; 1358 } 1359 if (sq_num == 0) { 1360 DRV_LOG(ERR, "Invalid SQ number."); 1361 rte_errno = EINVAL; 1362 return -rte_errno; 1363 } 1364 #ifdef HAVE_MLX5_HWS_SUPPORT 1365 if (priv->sh->config.dv_flow_en == 2) { 1366 bool sq_miss_created = false; 1367 1368 if (priv->sh->config.fdb_def_rule) { 1369 if (mlx5_flow_hw_esw_create_sq_miss_flow(dev, sq_num, true)) 1370 return -rte_errno; 1371 sq_miss_created = true; 1372 } 1373 1374 if (priv->sh->config.repr_matching && 1375 mlx5_flow_hw_tx_repr_matching_flow(dev, sq_num, true)) { 1376 if (sq_miss_created) 1377 mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num); 1378 return -rte_errno; 1379 } 1380 return 0; 1381 } 1382 #endif 1383 flow = mlx5_flow_create_devx_sq_miss_flow(dev, sq_num); 1384 if (flow > 0) 1385 return 0; 1386 DRV_LOG(ERR, "Port %u failed to create default miss flow for SQ %u.", 1387 port_id, sq_num); 1388 return -rte_errno; 1389 } 1390 1391 /** 1392 * Set the Tx queue dynamic timestamp (mask and offset) 1393 * 1394 * @param[in] dev 1395 * Pointer to the Ethernet device structure. 1396 */ 1397 void 1398 mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev) 1399 { 1400 struct mlx5_priv *priv = dev->data->dev_private; 1401 struct mlx5_dev_ctx_shared *sh = priv->sh; 1402 struct mlx5_txq_data *data; 1403 int off, nbit; 1404 unsigned int i; 1405 uint64_t mask = 0; 1406 uint64_t ts_mask; 1407 1408 if (sh->dev_cap.rt_timestamp || 1409 !sh->cdev->config.hca_attr.dev_freq_khz) 1410 ts_mask = MLX5_TS_MASK_SECS << 32; 1411 else 1412 ts_mask = rte_align64pow2(MLX5_TS_MASK_SECS * 1000ull * 1413 sh->cdev->config.hca_attr.dev_freq_khz); 1414 ts_mask = rte_cpu_to_be_64(ts_mask - 1ull); 1415 nbit = rte_mbuf_dynflag_lookup 1416 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1417 off = rte_mbuf_dynfield_lookup 1418 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); 1419 if (nbit >= 0 && off >= 0 && 1420 (sh->txpp.refcnt || priv->sh->cdev->config.hca_attr.wait_on_time)) 1421 mask = 1ULL << nbit; 1422 for (i = 0; i != priv->txqs_n; ++i) { 1423 data = (*priv->txqs)[i]; 1424 if (!data) 1425 continue; 1426 data->sh = sh; 1427 data->ts_mask = mask; 1428 data->ts_offset = off; 1429 data->rt_timestamp = sh->dev_cap.rt_timestamp; 1430 data->rt_timemask = (data->offloads & 1431 RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) ? 1432 ts_mask : 0; 1433 } 1434 } 1435 1436 int mlx5_count_aggr_ports(struct rte_eth_dev *dev) 1437 { 1438 struct mlx5_priv *priv = dev->data->dev_private; 1439 1440 return priv->sh->bond.n_port; 1441 } 1442 1443 int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1444 uint8_t affinity) 1445 { 1446 struct mlx5_txq_ctrl *txq_ctrl; 1447 struct mlx5_txq_data *txq; 1448 struct mlx5_priv *priv; 1449 1450 priv = dev->data->dev_private; 1451 if (!mlx5_devx_obj_ops_en(priv->sh)) { 1452 DRV_LOG(ERR, "Tx affinity mapping isn't supported by Verbs API."); 1453 rte_errno = ENOTSUP; 1454 return -rte_errno; 1455 } 1456 txq = (*priv->txqs)[tx_queue_id]; 1457 if (!txq) 1458 return -1; 1459 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 1460 if (tx_queue_id >= priv->txqs_n) { 1461 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 1462 dev->data->port_id, tx_queue_id, priv->txqs_n); 1463 rte_errno = EOVERFLOW; 1464 return -rte_errno; 1465 } 1466 if (affinity > priv->num_lag_ports) { 1467 DRV_LOG(ERR, "port %u unable to setup Tx queue index %u" 1468 " affinity is %u exceeds the maximum %u", dev->data->port_id, 1469 tx_queue_id, affinity, priv->num_lag_ports); 1470 rte_errno = EINVAL; 1471 return -rte_errno; 1472 } 1473 DRV_LOG(DEBUG, "port %u configuring queue %u for aggregated affinity %u", 1474 dev->data->port_id, tx_queue_id, affinity); 1475 txq_ctrl->txq.tx_aggr_affinity = affinity; 1476 return 0; 1477 } 1478 1479 /** 1480 * Validate given external TxQ rte_flow index, and get pointer to concurrent 1481 * external TxQ object to map/unmap. 1482 * 1483 * @param[in] port_id 1484 * The port identifier of the Ethernet device. 1485 * @param[in] dpdk_idx 1486 * Tx Queue index in rte_flow. 1487 * 1488 * @return 1489 * Pointer to concurrent external TxQ on success, 1490 * NULL otherwise and rte_errno is set. 1491 */ 1492 static struct mlx5_external_q * 1493 mlx5_external_tx_queue_get_validate(uint16_t port_id, uint16_t dpdk_idx) 1494 { 1495 struct rte_eth_dev *dev; 1496 struct mlx5_priv *priv; 1497 int ret; 1498 1499 if (dpdk_idx < MLX5_EXTERNAL_TX_QUEUE_ID_MIN) { 1500 DRV_LOG(ERR, "Queue index %u should be in range: [%u, %u].", 1501 dpdk_idx, MLX5_EXTERNAL_TX_QUEUE_ID_MIN, UINT16_MAX); 1502 rte_errno = EINVAL; 1503 return NULL; 1504 } 1505 ret = mlx5_devx_extq_port_validate(port_id); 1506 if (unlikely(ret)) 1507 return NULL; 1508 dev = &rte_eth_devices[port_id]; 1509 priv = dev->data->dev_private; 1510 /* 1511 * When user configures remote PD and CTX and device creates TxQ by 1512 * DevX, external TxQs array is allocated. 1513 */ 1514 MLX5_ASSERT(priv->ext_txqs != NULL); 1515 return &priv->ext_txqs[dpdk_idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN]; 1516 } 1517 1518 int 1519 rte_pmd_mlx5_external_tx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx, 1520 uint32_t hw_idx) 1521 { 1522 struct mlx5_external_q *ext_txq; 1523 uint32_t unmapped = 0; 1524 1525 ext_txq = mlx5_external_tx_queue_get_validate(port_id, dpdk_idx); 1526 if (ext_txq == NULL) 1527 return -rte_errno; 1528 if (!rte_atomic_compare_exchange_strong_explicit(&ext_txq->refcnt, &unmapped, 1, 1529 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 1530 if (ext_txq->hw_id != hw_idx) { 1531 DRV_LOG(ERR, "Port %u external TxQ index %u " 1532 "is already mapped to HW index (requesting is " 1533 "%u, existing is %u).", 1534 port_id, dpdk_idx, hw_idx, ext_txq->hw_id); 1535 rte_errno = EEXIST; 1536 return -rte_errno; 1537 } 1538 DRV_LOG(WARNING, "Port %u external TxQ index %u " 1539 "is already mapped to the requested HW index (%u)", 1540 port_id, dpdk_idx, hw_idx); 1541 1542 } else { 1543 ext_txq->hw_id = hw_idx; 1544 DRV_LOG(DEBUG, "Port %u external TxQ index %u " 1545 "is successfully mapped to the requested HW index (%u)", 1546 port_id, dpdk_idx, hw_idx); 1547 } 1548 return 0; 1549 } 1550 1551 int 1552 rte_pmd_mlx5_external_tx_queue_id_unmap(uint16_t port_id, uint16_t dpdk_idx) 1553 { 1554 struct mlx5_external_q *ext_txq; 1555 uint32_t mapped = 1; 1556 1557 ext_txq = mlx5_external_tx_queue_get_validate(port_id, dpdk_idx); 1558 if (ext_txq == NULL) 1559 return -rte_errno; 1560 if (ext_txq->refcnt > 1) { 1561 DRV_LOG(ERR, "Port %u external TxQ index %u still referenced.", 1562 port_id, dpdk_idx); 1563 rte_errno = EINVAL; 1564 return -rte_errno; 1565 } 1566 if (!rte_atomic_compare_exchange_strong_explicit(&ext_txq->refcnt, &mapped, 0, 1567 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 1568 DRV_LOG(ERR, "Port %u external TxQ index %u doesn't exist.", 1569 port_id, dpdk_idx); 1570 rte_errno = EINVAL; 1571 return -rte_errno; 1572 } 1573 DRV_LOG(DEBUG, 1574 "Port %u external TxQ index %u is successfully unmapped.", 1575 port_id, dpdk_idx); 1576 return 0; 1577 } 1578