1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <bus_pci_driver.h> 17 #include <rte_common.h> 18 #include <rte_eal_paging.h> 19 20 #include <mlx5_common.h> 21 #include <mlx5_common_mr.h> 22 #include <mlx5_malloc.h> 23 24 #include "mlx5_defs.h" 25 #include "mlx5_utils.h" 26 #include "mlx5.h" 27 #include "mlx5_tx.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_autoconf.h" 30 #include "mlx5_devx.h" 31 #include "rte_pmd_mlx5.h" 32 #include "mlx5_flow.h" 33 34 /** 35 * Allocate TX queue elements. 36 * 37 * @param txq_ctrl 38 * Pointer to TX queue structure. 39 */ 40 void 41 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 42 { 43 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 44 unsigned int i; 45 46 for (i = 0; (i != elts_n); ++i) 47 txq_ctrl->txq.elts[i] = NULL; 48 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 49 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 50 txq_ctrl->txq.elts_head = 0; 51 txq_ctrl->txq.elts_tail = 0; 52 txq_ctrl->txq.elts_comp = 0; 53 } 54 55 /** 56 * Free TX queue elements. 57 * 58 * @param txq_ctrl 59 * Pointer to TX queue structure. 60 */ 61 void 62 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 63 { 64 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 65 const uint16_t elts_m = elts_n - 1; 66 uint16_t elts_head = txq_ctrl->txq.elts_head; 67 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 68 struct rte_mbuf *(*elts)[] = &txq_ctrl->txq.elts; 69 70 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 71 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 72 txq_ctrl->txq.elts_head = 0; 73 txq_ctrl->txq.elts_tail = 0; 74 txq_ctrl->txq.elts_comp = 0; 75 76 while (elts_tail != elts_head) { 77 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 78 79 MLX5_ASSERT(elt != NULL); 80 rte_pktmbuf_free_seg(elt); 81 #ifdef RTE_LIBRTE_MLX5_DEBUG 82 /* Poisoning. */ 83 memset(&(*elts)[elts_tail & elts_m], 84 0x77, 85 sizeof((*elts)[elts_tail & elts_m])); 86 #endif 87 ++elts_tail; 88 } 89 } 90 91 /** 92 * Returns the per-port supported offloads. 93 * 94 * @param dev 95 * Pointer to Ethernet device. 96 * 97 * @return 98 * Supported Tx offloads. 99 */ 100 uint64_t 101 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 102 { 103 struct mlx5_priv *priv = dev->data->dev_private; 104 uint64_t offloads = (RTE_ETH_TX_OFFLOAD_MULTI_SEGS | 105 RTE_ETH_TX_OFFLOAD_VLAN_INSERT); 106 struct mlx5_port_config *config = &priv->config; 107 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 108 109 if (dev_cap->hw_csum) 110 offloads |= (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | 111 RTE_ETH_TX_OFFLOAD_UDP_CKSUM | 112 RTE_ETH_TX_OFFLOAD_TCP_CKSUM); 113 if (dev_cap->tso) 114 offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO; 115 if (priv->sh->config.tx_pp || 116 priv->sh->cdev->config.hca_attr.wait_on_time) 117 offloads |= RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP; 118 if (dev_cap->swp) { 119 if (dev_cap->swp & MLX5_SW_PARSING_CSUM_CAP) 120 offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; 121 if (dev_cap->swp & MLX5_SW_PARSING_TSO_CAP) 122 offloads |= (RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 123 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO); 124 } 125 if (dev_cap->tunnel_en) { 126 if (dev_cap->hw_csum) 127 offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM; 128 if (dev_cap->tso) { 129 if (dev_cap->tunnel_en & 130 MLX5_TUNNELED_OFFLOADS_VXLAN_CAP) 131 offloads |= RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO; 132 if (dev_cap->tunnel_en & 133 MLX5_TUNNELED_OFFLOADS_GRE_CAP) 134 offloads |= RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO; 135 if (dev_cap->tunnel_en & 136 MLX5_TUNNELED_OFFLOADS_GENEVE_CAP) 137 offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO; 138 } 139 } 140 if (!config->mprq.enabled) 141 offloads |= RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; 142 return offloads; 143 } 144 145 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 146 static void 147 txq_sync_cq(struct mlx5_txq_data *txq) 148 { 149 volatile struct mlx5_cqe *cqe; 150 int ret, i; 151 152 i = txq->cqe_s; 153 do { 154 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 155 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 156 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 157 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 158 /* No new CQEs in completion queue. */ 159 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 160 break; 161 } 162 } 163 ++txq->cq_ci; 164 } while (--i); 165 /* Move all CQEs to HW ownership. */ 166 for (i = 0; i < txq->cqe_s; i++) { 167 cqe = &txq->cqes[i]; 168 cqe->op_own = MLX5_CQE_INVALIDATE; 169 } 170 /* Resync CQE and WQE (WQ in reset state). */ 171 rte_io_wmb(); 172 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 173 txq->cq_pi = txq->cq_ci; 174 rte_io_wmb(); 175 } 176 177 /** 178 * Tx queue stop. Device queue goes to the idle state, 179 * all involved mbufs are freed from elts/WQ. 180 * 181 * @param dev 182 * Pointer to Ethernet device structure. 183 * @param idx 184 * Tx queue index. 185 * 186 * @return 187 * 0 on success, a negative errno value otherwise and rte_errno is set. 188 */ 189 int 190 mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 191 { 192 struct mlx5_priv *priv = dev->data->dev_private; 193 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 194 struct mlx5_txq_ctrl *txq_ctrl = 195 container_of(txq, struct mlx5_txq_ctrl, txq); 196 int ret; 197 198 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 199 /* Move QP to RESET state. */ 200 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, MLX5_TXQ_MOD_RDY2RST, 201 (uint8_t)priv->dev_port); 202 if (ret) 203 return ret; 204 /* Handle all send completions. */ 205 txq_sync_cq(txq); 206 /* Free elts stored in the SQ. */ 207 txq_free_elts(txq_ctrl); 208 /* Prevent writing new pkts to SQ by setting no free WQE.*/ 209 txq->wqe_ci = txq->wqe_s; 210 txq->wqe_pi = 0; 211 txq->elts_comp = 0; 212 /* Set the actual queue state. */ 213 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 214 return 0; 215 } 216 217 /** 218 * Tx queue stop. Device queue goes to the idle state, 219 * all involved mbufs are freed from elts/WQ. 220 * 221 * @param dev 222 * Pointer to Ethernet device structure. 223 * @param idx 224 * Tx queue index. 225 * 226 * @return 227 * 0 on success, a negative errno value otherwise and rte_errno is set. 228 */ 229 int 230 mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 231 { 232 int ret; 233 234 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 235 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 236 rte_errno = EINVAL; 237 return -EINVAL; 238 } 239 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 240 return 0; 241 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 242 ret = mlx5_mp_os_req_queue_control(dev, idx, 243 MLX5_MP_REQ_QUEUE_TX_STOP); 244 } else { 245 ret = mlx5_tx_queue_stop_primary(dev, idx); 246 } 247 return ret; 248 } 249 250 /** 251 * Rx queue start. Device queue goes to the ready state, 252 * all required mbufs are allocated and WQ is replenished. 253 * 254 * @param dev 255 * Pointer to Ethernet device structure. 256 * @param idx 257 * RX queue index. 258 * 259 * @return 260 * 0 on success, a negative errno value otherwise and rte_errno is set. 261 */ 262 int 263 mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 264 { 265 struct mlx5_priv *priv = dev->data->dev_private; 266 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 267 struct mlx5_txq_ctrl *txq_ctrl = 268 container_of(txq, struct mlx5_txq_ctrl, txq); 269 int ret; 270 271 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 272 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 273 MLX5_TXQ_MOD_RST2RDY, 274 (uint8_t)priv->dev_port); 275 if (ret) 276 return ret; 277 txq_ctrl->txq.wqe_ci = 0; 278 txq_ctrl->txq.wqe_pi = 0; 279 txq_ctrl->txq.elts_comp = 0; 280 /* Set the actual queue state. */ 281 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 282 return 0; 283 } 284 285 /** 286 * Rx queue start. Device queue goes to the ready state, 287 * all required mbufs are allocated and WQ is replenished. 288 * 289 * @param dev 290 * Pointer to Ethernet device structure. 291 * @param idx 292 * RX queue index. 293 * 294 * @return 295 * 0 on success, a negative errno value otherwise and rte_errno is set. 296 */ 297 int 298 mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 299 { 300 int ret; 301 302 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 303 DRV_LOG(ERR, "Hairpin queue can't be started"); 304 rte_errno = EINVAL; 305 return -EINVAL; 306 } 307 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 308 return 0; 309 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 310 ret = mlx5_mp_os_req_queue_control(dev, idx, 311 MLX5_MP_REQ_QUEUE_TX_START); 312 } else { 313 ret = mlx5_tx_queue_start_primary(dev, idx); 314 } 315 return ret; 316 } 317 318 /** 319 * Tx queue presetup checks. 320 * 321 * @param dev 322 * Pointer to Ethernet device structure. 323 * @param idx 324 * Tx queue index. 325 * @param desc 326 * Number of descriptors to configure in queue. 327 * 328 * @return 329 * 0 on success, a negative errno value otherwise and rte_errno is set. 330 */ 331 static int 332 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc) 333 { 334 struct mlx5_priv *priv = dev->data->dev_private; 335 336 if (*desc > 1 << priv->sh->cdev->config.hca_attr.log_max_wq_sz) { 337 DRV_LOG(ERR, 338 "port %u number of descriptors requested for Tx queue" 339 " %u is more than supported", 340 dev->data->port_id, idx); 341 rte_errno = EINVAL; 342 return -EINVAL; 343 } 344 if (*desc <= MLX5_TX_COMP_THRESH) { 345 DRV_LOG(WARNING, 346 "port %u number of descriptors requested for Tx queue" 347 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 348 " instead of %u", dev->data->port_id, idx, 349 MLX5_TX_COMP_THRESH + 1, *desc); 350 *desc = MLX5_TX_COMP_THRESH + 1; 351 } 352 if (!rte_is_power_of_2(*desc)) { 353 *desc = 1 << log2above(*desc); 354 DRV_LOG(WARNING, 355 "port %u increased number of descriptors in Tx queue" 356 " %u to the next power of two (%d)", 357 dev->data->port_id, idx, *desc); 358 } 359 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 360 dev->data->port_id, idx, *desc); 361 if (idx >= priv->txqs_n) { 362 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 363 dev->data->port_id, idx, priv->txqs_n); 364 rte_errno = EOVERFLOW; 365 return -rte_errno; 366 } 367 if (!mlx5_txq_releasable(dev, idx)) { 368 rte_errno = EBUSY; 369 DRV_LOG(ERR, "port %u unable to release queue index %u", 370 dev->data->port_id, idx); 371 return -rte_errno; 372 } 373 mlx5_txq_release(dev, idx); 374 return 0; 375 } 376 377 /** 378 * DPDK callback to configure a TX queue. 379 * 380 * @param dev 381 * Pointer to Ethernet device structure. 382 * @param idx 383 * TX queue index. 384 * @param desc 385 * Number of descriptors to configure in queue. 386 * @param socket 387 * NUMA socket on which memory must be allocated. 388 * @param[in] conf 389 * Thresholds parameters. 390 * 391 * @return 392 * 0 on success, a negative errno value otherwise and rte_errno is set. 393 */ 394 int 395 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 396 unsigned int socket, const struct rte_eth_txconf *conf) 397 { 398 struct mlx5_priv *priv = dev->data->dev_private; 399 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 400 struct mlx5_txq_ctrl *txq_ctrl = 401 container_of(txq, struct mlx5_txq_ctrl, txq); 402 int res; 403 404 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 405 if (res) 406 return res; 407 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 408 if (!txq_ctrl) { 409 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 410 dev->data->port_id, idx); 411 return -rte_errno; 412 } 413 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 414 dev->data->port_id, idx); 415 (*priv->txqs)[idx] = &txq_ctrl->txq; 416 return 0; 417 } 418 419 /** 420 * DPDK callback to configure a TX hairpin queue. 421 * 422 * @param dev 423 * Pointer to Ethernet device structure. 424 * @param idx 425 * TX queue index. 426 * @param desc 427 * Number of descriptors to configure in queue. 428 * @param[in] hairpin_conf 429 * The hairpin binding configuration. 430 * 431 * @return 432 * 0 on success, a negative errno value otherwise and rte_errno is set. 433 */ 434 int 435 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 436 uint16_t desc, 437 const struct rte_eth_hairpin_conf *hairpin_conf) 438 { 439 struct mlx5_priv *priv = dev->data->dev_private; 440 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 441 struct mlx5_txq_ctrl *txq_ctrl = 442 container_of(txq, struct mlx5_txq_ctrl, txq); 443 int res; 444 445 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 446 if (res) 447 return res; 448 if (hairpin_conf->peer_count != 1) { 449 rte_errno = EINVAL; 450 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue index %u" 451 " peer count is %u", dev->data->port_id, 452 idx, hairpin_conf->peer_count); 453 return -rte_errno; 454 } 455 if (hairpin_conf->peers[0].port == dev->data->port_id) { 456 if (hairpin_conf->peers[0].queue >= priv->rxqs_n) { 457 rte_errno = EINVAL; 458 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue" 459 " index %u, Rx %u is larger than %u", 460 dev->data->port_id, idx, 461 hairpin_conf->peers[0].queue, priv->txqs_n); 462 return -rte_errno; 463 } 464 } else { 465 if (hairpin_conf->manual_bind == 0 || 466 hairpin_conf->tx_explicit == 0) { 467 rte_errno = EINVAL; 468 DRV_LOG(ERR, "port %u unable to setup Tx hairpin queue" 469 " index %u peer port %u with attributes %u %u", 470 dev->data->port_id, idx, 471 hairpin_conf->peers[0].port, 472 hairpin_conf->manual_bind, 473 hairpin_conf->tx_explicit); 474 return -rte_errno; 475 } 476 } 477 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 478 if (!txq_ctrl) { 479 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 480 dev->data->port_id, idx); 481 return -rte_errno; 482 } 483 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 484 dev->data->port_id, idx); 485 (*priv->txqs)[idx] = &txq_ctrl->txq; 486 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN; 487 return 0; 488 } 489 490 /** 491 * DPDK callback to release a TX queue. 492 * 493 * @param dev 494 * Pointer to Ethernet device structure. 495 * @param qid 496 * Transmit queue index. 497 */ 498 void 499 mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 500 { 501 struct mlx5_txq_data *txq = dev->data->tx_queues[qid]; 502 503 if (txq == NULL) 504 return; 505 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 506 dev->data->port_id, qid); 507 mlx5_txq_release(dev, qid); 508 } 509 510 /** 511 * Remap UAR register of a Tx queue for secondary process. 512 * 513 * Remapped address is stored at the table in the process private structure of 514 * the device, indexed by queue index. 515 * 516 * @param txq_ctrl 517 * Pointer to Tx queue control structure. 518 * @param fd 519 * Verbs file descriptor to map UAR pages. 520 * 521 * @return 522 * 0 on success, a negative errno value otherwise and rte_errno is set. 523 */ 524 static int 525 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 526 { 527 struct mlx5_priv *priv = txq_ctrl->priv; 528 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 529 struct mlx5_proc_priv *primary_ppriv = priv->sh->pppriv; 530 struct mlx5_txq_data *txq = &txq_ctrl->txq; 531 void *addr; 532 uintptr_t uar_va; 533 uintptr_t offset; 534 const size_t page_size = rte_mem_page_size(); 535 if (page_size == (size_t)-1) { 536 DRV_LOG(ERR, "Failed to get mem page size"); 537 rte_errno = ENOMEM; 538 return -rte_errno; 539 } 540 541 if (txq_ctrl->is_hairpin) 542 return 0; 543 MLX5_ASSERT(ppriv); 544 /* 545 * As rdma-core, UARs are mapped in size of OS page 546 * size. Ref to libmlx5 function: mlx5_init_context() 547 */ 548 uar_va = (uintptr_t)primary_ppriv->uar_table[txq->idx].db; 549 offset = uar_va & (page_size - 1); /* Offset in page. */ 550 addr = rte_mem_map(NULL, page_size, RTE_PROT_WRITE, RTE_MAP_SHARED, 551 fd, txq_ctrl->uar_mmap_offset); 552 if (!addr) { 553 DRV_LOG(ERR, "Port %u mmap failed for BF reg of txq %u.", 554 txq->port_id, txq->idx); 555 rte_errno = ENXIO; 556 return -rte_errno; 557 } 558 addr = RTE_PTR_ADD(addr, offset); 559 ppriv->uar_table[txq->idx].db = addr; 560 #ifndef RTE_ARCH_64 561 ppriv->uar_table[txq->idx].sl_p = 562 primary_ppriv->uar_table[txq->idx].sl_p; 563 #endif 564 return 0; 565 } 566 567 /** 568 * Unmap UAR register of a Tx queue for secondary process. 569 * 570 * @param txq_ctrl 571 * Pointer to Tx queue control structure. 572 */ 573 static void 574 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 575 { 576 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 577 void *addr; 578 const size_t page_size = rte_mem_page_size(); 579 if (page_size == (size_t)-1) { 580 DRV_LOG(ERR, "Failed to get mem page size"); 581 rte_errno = ENOMEM; 582 } 583 584 if (txq_ctrl->is_hairpin) 585 return; 586 addr = ppriv->uar_table[txq_ctrl->txq.idx].db; 587 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 588 } 589 590 /** 591 * Deinitialize Tx UAR registers for secondary process. 592 * 593 * @param dev 594 * Pointer to Ethernet device. 595 */ 596 void 597 mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev) 598 { 599 struct mlx5_proc_priv *ppriv = (struct mlx5_proc_priv *) 600 dev->process_private; 601 const size_t page_size = rte_mem_page_size(); 602 void *addr; 603 unsigned int i; 604 605 if (page_size == (size_t)-1) { 606 DRV_LOG(ERR, "Failed to get mem page size"); 607 return; 608 } 609 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 610 for (i = 0; i != ppriv->uar_table_sz; ++i) { 611 if (!ppriv->uar_table[i].db) 612 continue; 613 addr = ppriv->uar_table[i].db; 614 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 615 616 } 617 } 618 619 /** 620 * Initialize Tx UAR registers for secondary process. 621 * 622 * @param dev 623 * Pointer to Ethernet device. 624 * @param fd 625 * Verbs file descriptor to map UAR pages. 626 * 627 * @return 628 * 0 on success, a negative errno value otherwise and rte_errno is set. 629 */ 630 int 631 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 632 { 633 struct mlx5_priv *priv = dev->data->dev_private; 634 struct mlx5_txq_data *txq; 635 struct mlx5_txq_ctrl *txq_ctrl; 636 unsigned int i; 637 int ret; 638 639 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 640 for (i = 0; i != priv->txqs_n; ++i) { 641 if (!(*priv->txqs)[i]) 642 continue; 643 txq = (*priv->txqs)[i]; 644 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 645 if (txq_ctrl->is_hairpin) 646 continue; 647 MLX5_ASSERT(txq->idx == (uint16_t)i); 648 ret = txq_uar_init_secondary(txq_ctrl, fd); 649 if (ret) 650 goto error; 651 } 652 return 0; 653 error: 654 /* Rollback. */ 655 do { 656 if (!(*priv->txqs)[i]) 657 continue; 658 txq = (*priv->txqs)[i]; 659 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 660 txq_uar_uninit_secondary(txq_ctrl); 661 } while (i--); 662 return -rte_errno; 663 } 664 665 /** 666 * Verify the Verbs Tx queue list is empty 667 * 668 * @param dev 669 * Pointer to Ethernet device. 670 * 671 * @return 672 * The number of object not released. 673 */ 674 int 675 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 676 { 677 struct mlx5_priv *priv = dev->data->dev_private; 678 int ret = 0; 679 struct mlx5_txq_obj *txq_obj; 680 681 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 682 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 683 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 684 ++ret; 685 } 686 return ret; 687 } 688 689 /** 690 * Calculate the total number of WQEBB for Tx queue. 691 * 692 * Simplified version of calc_sq_size() in rdma-core. 693 * 694 * @param txq_ctrl 695 * Pointer to Tx queue control structure. 696 * 697 * @return 698 * The number of WQEBB. 699 */ 700 static int 701 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 702 { 703 unsigned int wqe_size; 704 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 705 706 wqe_size = MLX5_WQE_CSEG_SIZE + 707 MLX5_WQE_ESEG_SIZE + 708 MLX5_WSEG_SIZE - 709 MLX5_ESEG_MIN_INLINE_SIZE + 710 txq_ctrl->max_inline_data; 711 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 712 } 713 714 /** 715 * Calculate the maximal inline data size for Tx queue. 716 * 717 * @param txq_ctrl 718 * Pointer to Tx queue control structure. 719 * 720 * @return 721 * The maximal inline data size. 722 */ 723 static unsigned int 724 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 725 { 726 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 727 struct mlx5_priv *priv = txq_ctrl->priv; 728 unsigned int wqe_size; 729 730 wqe_size = priv->sh->dev_cap.max_qp_wr / desc; 731 if (!wqe_size) 732 return 0; 733 /* 734 * This calculation is derived from tthe source of 735 * mlx5_calc_send_wqe() in rdma_core library. 736 */ 737 wqe_size = wqe_size * MLX5_WQE_SIZE - 738 MLX5_WQE_CSEG_SIZE - 739 MLX5_WQE_ESEG_SIZE - 740 MLX5_WSEG_SIZE - 741 MLX5_WSEG_SIZE + 742 MLX5_DSEG_MIN_INLINE_SIZE; 743 return wqe_size; 744 } 745 746 /** 747 * Set Tx queue parameters from device configuration. 748 * 749 * @param txq_ctrl 750 * Pointer to Tx queue control structure. 751 */ 752 static void 753 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 754 { 755 struct mlx5_priv *priv = txq_ctrl->priv; 756 struct mlx5_port_config *config = &priv->config; 757 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 758 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 759 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 760 unsigned int inlen_mode; /* Minimal required Inline data. */ 761 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 762 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 763 bool tso = txq_ctrl->txq.offloads & (RTE_ETH_TX_OFFLOAD_TCP_TSO | 764 RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | 765 RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | 766 RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 767 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO); 768 bool vlan_inline; 769 unsigned int temp; 770 771 txq_ctrl->txq.fast_free = 772 !!((txq_ctrl->txq.offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) && 773 !(txq_ctrl->txq.offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS) && 774 !config->mprq.enabled); 775 if (config->txqs_inline == MLX5_ARG_UNSET) 776 txqs_inline = 777 #if defined(RTE_ARCH_ARM64) 778 (priv->pci_dev && priv->pci_dev->id.device_id == 779 PCI_DEVICE_ID_MELLANOX_BLUEFIELD) ? 780 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 781 #endif 782 MLX5_INLINE_MAX_TXQS; 783 else 784 txqs_inline = (unsigned int)config->txqs_inline; 785 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 786 MLX5_SEND_DEF_INLINE_LEN : 787 (unsigned int)config->txq_inline_max; 788 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 789 MLX5_EMPW_DEF_INLINE_LEN : 790 (unsigned int)config->txq_inline_mpw; 791 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 792 0 : (unsigned int)config->txq_inline_min; 793 if (config->mps != MLX5_MPW_ENHANCED && config->mps != MLX5_MPW) 794 inlen_empw = 0; 795 /* 796 * If there is requested minimal amount of data to inline 797 * we MUST enable inlining. This is a case for ConnectX-4 798 * which usually requires L2 inlined for correct operating 799 * and ConnectX-4 Lx which requires L2-L4 inlined to 800 * support E-Switch Flows. 801 */ 802 if (inlen_mode) { 803 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 804 /* 805 * Optimize minimal inlining for single 806 * segment packets to fill one WQEBB 807 * without gaps. 808 */ 809 temp = MLX5_ESEG_MIN_INLINE_SIZE; 810 } else { 811 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 812 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 813 MLX5_ESEG_MIN_INLINE_SIZE; 814 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 815 } 816 if (temp != inlen_mode) { 817 DRV_LOG(INFO, 818 "port %u minimal required inline setting" 819 " aligned from %u to %u", 820 PORT_ID(priv), inlen_mode, temp); 821 inlen_mode = temp; 822 } 823 } 824 /* 825 * If port is configured to support VLAN insertion and device 826 * does not support this feature by HW (for NICs before ConnectX-5 827 * or in case of wqe_vlan_insert flag is not set) we must enable 828 * data inline on all queues because it is supported by single 829 * tx_burst routine. 830 */ 831 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 832 vlan_inline = (dev_txoff & RTE_ETH_TX_OFFLOAD_VLAN_INSERT) && 833 !config->hw_vlan_insert; 834 /* 835 * If there are few Tx queues it is prioritized 836 * to save CPU cycles and disable data inlining at all. 837 */ 838 if (inlen_send && priv->txqs_n >= txqs_inline) { 839 /* 840 * The data sent with ordinal MLX5_OPCODE_SEND 841 * may be inlined in Ethernet Segment, align the 842 * length accordingly to fit entire WQEBBs. 843 */ 844 temp = RTE_MAX(inlen_send, 845 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 846 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 847 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 848 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 849 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 850 MLX5_ESEG_MIN_INLINE_SIZE - 851 MLX5_WQE_CSEG_SIZE - 852 MLX5_WQE_ESEG_SIZE - 853 MLX5_WQE_DSEG_SIZE * 2); 854 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 855 temp = RTE_MAX(temp, inlen_mode); 856 if (temp != inlen_send) { 857 DRV_LOG(INFO, 858 "port %u ordinary send inline setting" 859 " aligned from %u to %u", 860 PORT_ID(priv), inlen_send, temp); 861 inlen_send = temp; 862 } 863 /* 864 * Not aligned to cache lines, but to WQEs. 865 * First bytes of data (initial alignment) 866 * is going to be copied explicitly at the 867 * beginning of inlining buffer in Ethernet 868 * Segment. 869 */ 870 MLX5_ASSERT(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 871 MLX5_ASSERT(inlen_send <= MLX5_WQE_SIZE_MAX + 872 MLX5_ESEG_MIN_INLINE_SIZE - 873 MLX5_WQE_CSEG_SIZE - 874 MLX5_WQE_ESEG_SIZE - 875 MLX5_WQE_DSEG_SIZE * 2); 876 } else if (inlen_mode) { 877 /* 878 * If minimal inlining is requested we must 879 * enable inlining in general, despite the 880 * number of configured queues. Ignore the 881 * txq_inline_max devarg, this is not 882 * full-featured inline. 883 */ 884 inlen_send = inlen_mode; 885 inlen_empw = 0; 886 } else if (vlan_inline) { 887 /* 888 * Hardware does not report offload for 889 * VLAN insertion, we must enable data inline 890 * to implement feature by software. 891 */ 892 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 893 inlen_empw = 0; 894 } else { 895 inlen_send = 0; 896 inlen_empw = 0; 897 } 898 txq_ctrl->txq.inlen_send = inlen_send; 899 txq_ctrl->txq.inlen_mode = inlen_mode; 900 txq_ctrl->txq.inlen_empw = 0; 901 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 902 /* 903 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 904 * may be inlined in Data Segment, align the 905 * length accordingly to fit entire WQEBBs. 906 */ 907 temp = RTE_MAX(inlen_empw, 908 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 909 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 910 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 911 temp += MLX5_DSEG_MIN_INLINE_SIZE; 912 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 913 MLX5_DSEG_MIN_INLINE_SIZE - 914 MLX5_WQE_CSEG_SIZE - 915 MLX5_WQE_ESEG_SIZE - 916 MLX5_WQE_DSEG_SIZE); 917 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 918 if (temp != inlen_empw) { 919 DRV_LOG(INFO, 920 "port %u enhanced empw inline setting" 921 " aligned from %u to %u", 922 PORT_ID(priv), inlen_empw, temp); 923 inlen_empw = temp; 924 } 925 MLX5_ASSERT(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 926 MLX5_ASSERT(inlen_empw <= MLX5_WQE_SIZE_MAX + 927 MLX5_DSEG_MIN_INLINE_SIZE - 928 MLX5_WQE_CSEG_SIZE - 929 MLX5_WQE_ESEG_SIZE - 930 MLX5_WQE_DSEG_SIZE); 931 txq_ctrl->txq.inlen_empw = inlen_empw; 932 } 933 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 934 if (tso) { 935 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 936 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 937 MLX5_MAX_TSO_HEADER); 938 txq_ctrl->txq.tso_en = 1; 939 } 940 if (((RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO & txq_ctrl->txq.offloads) && 941 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_VXLAN_CAP)) | 942 ((RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO & txq_ctrl->txq.offloads) && 943 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_GRE_CAP)) | 944 ((RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO & txq_ctrl->txq.offloads) && 945 (dev_cap->tunnel_en & MLX5_TUNNELED_OFFLOADS_GENEVE_CAP)) | 946 (dev_cap->swp & MLX5_SW_PARSING_TSO_CAP)) 947 txq_ctrl->txq.tunnel_en = 1; 948 txq_ctrl->txq.swp_en = (((RTE_ETH_TX_OFFLOAD_IP_TNL_TSO | 949 RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO) & 950 txq_ctrl->txq.offloads) && (dev_cap->swp & 951 MLX5_SW_PARSING_TSO_CAP)) | 952 ((RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM & 953 txq_ctrl->txq.offloads) && (dev_cap->swp & 954 MLX5_SW_PARSING_CSUM_CAP)); 955 } 956 957 /** 958 * Adjust Tx queue data inline parameters for large queue sizes. 959 * The data inline feature requires multiple WQEs to fit the packets, 960 * and if the large amount of Tx descriptors is requested by application 961 * the total WQE amount may exceed the hardware capabilities. If the 962 * default inline setting are used we can try to adjust these ones and 963 * meet the hardware requirements and not exceed the queue size. 964 * 965 * @param txq_ctrl 966 * Pointer to Tx queue control structure. 967 * 968 * @return 969 * Zero on success, otherwise the parameters can not be adjusted. 970 */ 971 static int 972 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 973 { 974 struct mlx5_priv *priv = txq_ctrl->priv; 975 struct mlx5_port_config *config = &priv->config; 976 unsigned int max_inline; 977 978 max_inline = txq_calc_inline_max(txq_ctrl); 979 if (!txq_ctrl->txq.inlen_send) { 980 /* 981 * Inline data feature is not engaged at all. 982 * There is nothing to adjust. 983 */ 984 return 0; 985 } 986 if (txq_ctrl->max_inline_data <= max_inline) { 987 /* 988 * The requested inline data length does not 989 * exceed queue capabilities. 990 */ 991 return 0; 992 } 993 if (txq_ctrl->txq.inlen_mode > max_inline) { 994 DRV_LOG(ERR, 995 "minimal data inline requirements (%u) are not" 996 " satisfied (%u) on port %u, try the smaller" 997 " Tx queue size (%d)", 998 txq_ctrl->txq.inlen_mode, max_inline, 999 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1000 goto error; 1001 } 1002 if (txq_ctrl->txq.inlen_send > max_inline && 1003 config->txq_inline_max != MLX5_ARG_UNSET && 1004 config->txq_inline_max > (int)max_inline) { 1005 DRV_LOG(ERR, 1006 "txq_inline_max requirements (%u) are not" 1007 " satisfied (%u) on port %u, try the smaller" 1008 " Tx queue size (%d)", 1009 txq_ctrl->txq.inlen_send, max_inline, 1010 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1011 goto error; 1012 } 1013 if (txq_ctrl->txq.inlen_empw > max_inline && 1014 config->txq_inline_mpw != MLX5_ARG_UNSET && 1015 config->txq_inline_mpw > (int)max_inline) { 1016 DRV_LOG(ERR, 1017 "txq_inline_mpw requirements (%u) are not" 1018 " satisfied (%u) on port %u, try the smaller" 1019 " Tx queue size (%d)", 1020 txq_ctrl->txq.inlen_empw, max_inline, 1021 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1022 goto error; 1023 } 1024 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1025 DRV_LOG(ERR, 1026 "tso header inline requirements (%u) are not" 1027 " satisfied (%u) on port %u, try the smaller" 1028 " Tx queue size (%d)", 1029 MLX5_MAX_TSO_HEADER, max_inline, 1030 priv->dev_data->port_id, priv->sh->dev_cap.max_qp_wr); 1031 goto error; 1032 } 1033 if (txq_ctrl->txq.inlen_send > max_inline) { 1034 DRV_LOG(WARNING, 1035 "adjust txq_inline_max (%u->%u)" 1036 " due to large Tx queue on port %u", 1037 txq_ctrl->txq.inlen_send, max_inline, 1038 priv->dev_data->port_id); 1039 txq_ctrl->txq.inlen_send = max_inline; 1040 } 1041 if (txq_ctrl->txq.inlen_empw > max_inline) { 1042 DRV_LOG(WARNING, 1043 "adjust txq_inline_mpw (%u->%u)" 1044 "due to large Tx queue on port %u", 1045 txq_ctrl->txq.inlen_empw, max_inline, 1046 priv->dev_data->port_id); 1047 txq_ctrl->txq.inlen_empw = max_inline; 1048 } 1049 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1050 txq_ctrl->txq.inlen_empw); 1051 MLX5_ASSERT(txq_ctrl->max_inline_data <= max_inline); 1052 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= max_inline); 1053 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1054 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw || 1055 !txq_ctrl->txq.inlen_empw); 1056 return 0; 1057 error: 1058 rte_errno = ENOMEM; 1059 return -ENOMEM; 1060 } 1061 1062 /** 1063 * Create a DPDK Tx queue. 1064 * 1065 * @param dev 1066 * Pointer to Ethernet device. 1067 * @param idx 1068 * TX queue index. 1069 * @param desc 1070 * Number of descriptors to configure in queue. 1071 * @param socket 1072 * NUMA socket on which memory must be allocated. 1073 * @param[in] conf 1074 * Thresholds parameters. 1075 * 1076 * @return 1077 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1078 */ 1079 struct mlx5_txq_ctrl * 1080 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1081 unsigned int socket, const struct rte_eth_txconf *conf) 1082 { 1083 struct mlx5_priv *priv = dev->data->dev_private; 1084 struct mlx5_txq_ctrl *tmpl; 1085 1086 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) + 1087 desc * sizeof(struct rte_mbuf *), 0, socket); 1088 if (!tmpl) { 1089 rte_errno = ENOMEM; 1090 return NULL; 1091 } 1092 if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl, 1093 &priv->sh->cdev->mr_scache.dev_gen, socket)) { 1094 /* rte_errno is already set. */ 1095 goto error; 1096 } 1097 MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH); 1098 tmpl->txq.offloads = conf->offloads | 1099 dev->data->dev_conf.txmode.offloads; 1100 tmpl->priv = priv; 1101 tmpl->socket = socket; 1102 tmpl->txq.elts_n = log2above(desc); 1103 tmpl->txq.elts_s = desc; 1104 tmpl->txq.elts_m = desc - 1; 1105 tmpl->txq.port_id = dev->data->port_id; 1106 tmpl->txq.idx = idx; 1107 txq_set_params(tmpl); 1108 if (txq_adjust_params(tmpl)) 1109 goto error; 1110 if (txq_calc_wqebb_cnt(tmpl) > 1111 priv->sh->dev_cap.max_qp_wr) { 1112 DRV_LOG(ERR, 1113 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1114 " try smaller queue size", 1115 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1116 priv->sh->dev_cap.max_qp_wr); 1117 rte_errno = ENOMEM; 1118 goto error; 1119 } 1120 rte_atomic_fetch_add_explicit(&tmpl->refcnt, 1, rte_memory_order_relaxed); 1121 tmpl->is_hairpin = false; 1122 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1123 return tmpl; 1124 error: 1125 mlx5_mr_btree_free(&tmpl->txq.mr_ctrl.cache_bh); 1126 mlx5_free(tmpl); 1127 return NULL; 1128 } 1129 1130 /** 1131 * Create a DPDK Tx hairpin queue. 1132 * 1133 * @param dev 1134 * Pointer to Ethernet device. 1135 * @param idx 1136 * TX queue index. 1137 * @param desc 1138 * Number of descriptors to configure in queue. 1139 * @param hairpin_conf 1140 * The hairpin configuration. 1141 * 1142 * @return 1143 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1144 */ 1145 struct mlx5_txq_ctrl * 1146 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1147 const struct rte_eth_hairpin_conf *hairpin_conf) 1148 { 1149 struct mlx5_priv *priv = dev->data->dev_private; 1150 struct mlx5_txq_ctrl *tmpl; 1151 1152 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1153 SOCKET_ID_ANY); 1154 if (!tmpl) { 1155 rte_errno = ENOMEM; 1156 return NULL; 1157 } 1158 tmpl->priv = priv; 1159 tmpl->socket = SOCKET_ID_ANY; 1160 tmpl->txq.elts_n = log2above(desc); 1161 tmpl->txq.port_id = dev->data->port_id; 1162 tmpl->txq.idx = idx; 1163 tmpl->hairpin_conf = *hairpin_conf; 1164 tmpl->is_hairpin = true; 1165 rte_atomic_fetch_add_explicit(&tmpl->refcnt, 1, rte_memory_order_relaxed); 1166 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1167 return tmpl; 1168 } 1169 1170 /** 1171 * Get a Tx queue. 1172 * 1173 * @param dev 1174 * Pointer to Ethernet device. 1175 * @param idx 1176 * TX queue index. 1177 * 1178 * @return 1179 * A pointer to the queue if it exists. 1180 */ 1181 struct mlx5_txq_ctrl * 1182 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1183 { 1184 struct mlx5_priv *priv = dev->data->dev_private; 1185 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 1186 struct mlx5_txq_ctrl *ctrl = NULL; 1187 1188 if (txq_data) { 1189 ctrl = container_of(txq_data, struct mlx5_txq_ctrl, txq); 1190 rte_atomic_fetch_add_explicit(&ctrl->refcnt, 1, rte_memory_order_relaxed); 1191 } 1192 return ctrl; 1193 } 1194 1195 /** 1196 * Get an external Tx queue. 1197 * 1198 * @param dev 1199 * Pointer to Ethernet device. 1200 * @param idx 1201 * External Tx queue index. 1202 * 1203 * @return 1204 * A pointer to the queue if it exists, NULL otherwise. 1205 */ 1206 struct mlx5_external_q * 1207 mlx5_ext_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1208 { 1209 struct mlx5_priv *priv = dev->data->dev_private; 1210 1211 MLX5_ASSERT(mlx5_is_external_txq(dev, idx)); 1212 return &priv->ext_txqs[idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN]; 1213 } 1214 1215 /** 1216 * Verify the external Tx Queue list is empty. 1217 * 1218 * @param dev 1219 * Pointer to Ethernet device. 1220 * 1221 * @return 1222 * The number of object not released. 1223 */ 1224 int 1225 mlx5_ext_txq_verify(struct rte_eth_dev *dev) 1226 { 1227 struct mlx5_priv *priv = dev->data->dev_private; 1228 struct mlx5_external_q *txq; 1229 uint32_t i; 1230 int ret = 0; 1231 1232 if (priv->ext_txqs == NULL) 1233 return 0; 1234 1235 for (i = MLX5_EXTERNAL_TX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { 1236 txq = mlx5_ext_txq_get(dev, i); 1237 if (txq->refcnt < 2) 1238 continue; 1239 DRV_LOG(DEBUG, "Port %u external TxQ %u still referenced.", 1240 dev->data->port_id, i); 1241 ++ret; 1242 } 1243 return ret; 1244 } 1245 1246 /** 1247 * Release a Tx queue. 1248 * 1249 * @param dev 1250 * Pointer to Ethernet device. 1251 * @param idx 1252 * TX queue index. 1253 * 1254 * @return 1255 * 1 while a reference on it exists, 0 when freed. 1256 */ 1257 int 1258 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1259 { 1260 struct mlx5_priv *priv = dev->data->dev_private; 1261 struct mlx5_txq_ctrl *txq_ctrl; 1262 1263 if (priv->txqs == NULL || (*priv->txqs)[idx] == NULL) 1264 return 0; 1265 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1266 if (rte_atomic_fetch_sub_explicit(&txq_ctrl->refcnt, 1, rte_memory_order_relaxed) - 1 > 1) 1267 return 1; 1268 if (txq_ctrl->obj) { 1269 priv->obj_ops.txq_obj_release(txq_ctrl->obj); 1270 LIST_REMOVE(txq_ctrl->obj, next); 1271 mlx5_free(txq_ctrl->obj); 1272 txq_ctrl->obj = NULL; 1273 } 1274 if (!txq_ctrl->is_hairpin) { 1275 if (txq_ctrl->txq.fcqs) { 1276 mlx5_free(txq_ctrl->txq.fcqs); 1277 txq_ctrl->txq.fcqs = NULL; 1278 } 1279 txq_free_elts(txq_ctrl); 1280 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 1281 } 1282 if (!rte_atomic_load_explicit(&txq_ctrl->refcnt, rte_memory_order_relaxed)) { 1283 if (!txq_ctrl->is_hairpin) 1284 mlx5_mr_btree_free(&txq_ctrl->txq.mr_ctrl.cache_bh); 1285 LIST_REMOVE(txq_ctrl, next); 1286 mlx5_free(txq_ctrl); 1287 (*priv->txqs)[idx] = NULL; 1288 } 1289 return 0; 1290 } 1291 1292 /** 1293 * Verify if the queue can be released. 1294 * 1295 * @param dev 1296 * Pointer to Ethernet device. 1297 * @param idx 1298 * TX queue index. 1299 * 1300 * @return 1301 * 1 if the queue can be released. 1302 */ 1303 int 1304 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1305 { 1306 struct mlx5_priv *priv = dev->data->dev_private; 1307 struct mlx5_txq_ctrl *txq; 1308 1309 if (!(*priv->txqs)[idx]) 1310 return -1; 1311 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1312 return (rte_atomic_load_explicit(&txq->refcnt, rte_memory_order_relaxed) == 1); 1313 } 1314 1315 /** 1316 * Verify the Tx Queue list is empty 1317 * 1318 * @param dev 1319 * Pointer to Ethernet device. 1320 * 1321 * @return 1322 * The number of object not released. 1323 */ 1324 int 1325 mlx5_txq_verify(struct rte_eth_dev *dev) 1326 { 1327 struct mlx5_priv *priv = dev->data->dev_private; 1328 struct mlx5_txq_ctrl *txq_ctrl; 1329 int ret = 0; 1330 1331 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 1332 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 1333 dev->data->port_id, txq_ctrl->txq.idx); 1334 ++ret; 1335 } 1336 return ret; 1337 } 1338 1339 int 1340 mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq) 1341 { 1342 return txq->is_hairpin ? txq->obj->sq->id : txq->obj->sq_obj.sq->id; 1343 } 1344 1345 int 1346 rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num) 1347 { 1348 struct rte_eth_dev *dev; 1349 struct mlx5_priv *priv; 1350 uint32_t flow; 1351 1352 if (rte_eth_dev_is_valid_port(port_id) < 0) { 1353 DRV_LOG(ERR, "There is no Ethernet device for port %u.", 1354 port_id); 1355 rte_errno = ENODEV; 1356 return -rte_errno; 1357 } 1358 dev = &rte_eth_devices[port_id]; 1359 priv = dev->data->dev_private; 1360 if ((!priv->representor && !priv->master) || 1361 !priv->sh->config.dv_esw_en) { 1362 DRV_LOG(ERR, "Port %u must be represetnor or master port in E-Switch mode.", 1363 port_id); 1364 rte_errno = EINVAL; 1365 return -rte_errno; 1366 } 1367 if (sq_num == 0) { 1368 DRV_LOG(ERR, "Invalid SQ number."); 1369 rte_errno = EINVAL; 1370 return -rte_errno; 1371 } 1372 #ifdef HAVE_MLX5_HWS_SUPPORT 1373 if (priv->sh->config.dv_flow_en == 2) { 1374 bool sq_miss_created = false; 1375 1376 if (priv->sh->config.fdb_def_rule) { 1377 if (mlx5_flow_hw_esw_create_sq_miss_flow(dev, sq_num, true)) 1378 return -rte_errno; 1379 sq_miss_created = true; 1380 } 1381 1382 if (priv->sh->config.repr_matching && 1383 mlx5_flow_hw_tx_repr_matching_flow(dev, sq_num, true)) { 1384 if (sq_miss_created) 1385 mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num); 1386 return -rte_errno; 1387 } 1388 return 0; 1389 } 1390 #endif 1391 flow = mlx5_flow_create_devx_sq_miss_flow(dev, sq_num); 1392 if (flow > 0) 1393 return 0; 1394 DRV_LOG(ERR, "Port %u failed to create default miss flow for SQ %u.", 1395 port_id, sq_num); 1396 return -rte_errno; 1397 } 1398 1399 /** 1400 * Set the Tx queue dynamic timestamp (mask and offset) 1401 * 1402 * @param[in] dev 1403 * Pointer to the Ethernet device structure. 1404 */ 1405 void 1406 mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev) 1407 { 1408 struct mlx5_priv *priv = dev->data->dev_private; 1409 struct mlx5_dev_ctx_shared *sh = priv->sh; 1410 struct mlx5_txq_data *data; 1411 int off, nbit; 1412 unsigned int i; 1413 uint64_t mask = 0; 1414 uint64_t ts_mask; 1415 1416 if (sh->dev_cap.rt_timestamp || 1417 !sh->cdev->config.hca_attr.dev_freq_khz) 1418 ts_mask = MLX5_TS_MASK_SECS << 32; 1419 else 1420 ts_mask = rte_align64pow2(MLX5_TS_MASK_SECS * 1000ull * 1421 sh->cdev->config.hca_attr.dev_freq_khz); 1422 ts_mask = rte_cpu_to_be_64(ts_mask - 1ull); 1423 nbit = rte_mbuf_dynflag_lookup 1424 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 1425 off = rte_mbuf_dynfield_lookup 1426 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); 1427 if (nbit >= 0 && off >= 0 && 1428 (sh->txpp.refcnt || priv->sh->cdev->config.hca_attr.wait_on_time)) 1429 mask = 1ULL << nbit; 1430 for (i = 0; i != priv->txqs_n; ++i) { 1431 data = (*priv->txqs)[i]; 1432 if (!data) 1433 continue; 1434 data->sh = sh; 1435 data->ts_mask = mask; 1436 data->ts_offset = off; 1437 data->rt_timestamp = sh->dev_cap.rt_timestamp; 1438 data->rt_timemask = (data->offloads & 1439 RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP) ? 1440 ts_mask : 0; 1441 } 1442 } 1443 1444 int mlx5_count_aggr_ports(struct rte_eth_dev *dev) 1445 { 1446 struct mlx5_priv *priv = dev->data->dev_private; 1447 1448 return priv->sh->bond.n_port; 1449 } 1450 1451 int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, 1452 uint8_t affinity) 1453 { 1454 struct mlx5_txq_ctrl *txq_ctrl; 1455 struct mlx5_txq_data *txq; 1456 struct mlx5_priv *priv; 1457 1458 priv = dev->data->dev_private; 1459 if (!mlx5_devx_obj_ops_en(priv->sh)) { 1460 DRV_LOG(ERR, "Tx affinity mapping isn't supported by Verbs API."); 1461 rte_errno = ENOTSUP; 1462 return -rte_errno; 1463 } 1464 txq = (*priv->txqs)[tx_queue_id]; 1465 if (!txq) 1466 return -1; 1467 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 1468 if (tx_queue_id >= priv->txqs_n) { 1469 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 1470 dev->data->port_id, tx_queue_id, priv->txqs_n); 1471 rte_errno = EOVERFLOW; 1472 return -rte_errno; 1473 } 1474 if (affinity > priv->num_lag_ports) { 1475 DRV_LOG(ERR, "port %u unable to setup Tx queue index %u" 1476 " affinity is %u exceeds the maximum %u", dev->data->port_id, 1477 tx_queue_id, affinity, priv->num_lag_ports); 1478 rte_errno = EINVAL; 1479 return -rte_errno; 1480 } 1481 DRV_LOG(DEBUG, "port %u configuring queue %u for aggregated affinity %u", 1482 dev->data->port_id, tx_queue_id, affinity); 1483 txq_ctrl->txq.tx_aggr_affinity = affinity; 1484 return 0; 1485 } 1486 1487 /** 1488 * Validate given external TxQ rte_flow index, and get pointer to concurrent 1489 * external TxQ object to map/unmap. 1490 * 1491 * @param[in] port_id 1492 * The port identifier of the Ethernet device. 1493 * @param[in] dpdk_idx 1494 * Tx Queue index in rte_flow. 1495 * 1496 * @return 1497 * Pointer to concurrent external TxQ on success, 1498 * NULL otherwise and rte_errno is set. 1499 */ 1500 static struct mlx5_external_q * 1501 mlx5_external_tx_queue_get_validate(uint16_t port_id, uint16_t dpdk_idx) 1502 { 1503 struct rte_eth_dev *dev; 1504 struct mlx5_priv *priv; 1505 int ret; 1506 1507 if (dpdk_idx < MLX5_EXTERNAL_TX_QUEUE_ID_MIN) { 1508 DRV_LOG(ERR, "Queue index %u should be in range: [%u, %u].", 1509 dpdk_idx, MLX5_EXTERNAL_TX_QUEUE_ID_MIN, UINT16_MAX); 1510 rte_errno = EINVAL; 1511 return NULL; 1512 } 1513 ret = mlx5_devx_extq_port_validate(port_id); 1514 if (unlikely(ret)) 1515 return NULL; 1516 dev = &rte_eth_devices[port_id]; 1517 priv = dev->data->dev_private; 1518 /* 1519 * When user configures remote PD and CTX and device creates TxQ by 1520 * DevX, external TxQs array is allocated. 1521 */ 1522 MLX5_ASSERT(priv->ext_txqs != NULL); 1523 return &priv->ext_txqs[dpdk_idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN]; 1524 } 1525 1526 int 1527 rte_pmd_mlx5_external_tx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx, 1528 uint32_t hw_idx) 1529 { 1530 struct mlx5_external_q *ext_txq; 1531 uint32_t unmapped = 0; 1532 1533 ext_txq = mlx5_external_tx_queue_get_validate(port_id, dpdk_idx); 1534 if (ext_txq == NULL) 1535 return -rte_errno; 1536 if (!rte_atomic_compare_exchange_strong_explicit(&ext_txq->refcnt, &unmapped, 1, 1537 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 1538 if (ext_txq->hw_id != hw_idx) { 1539 DRV_LOG(ERR, "Port %u external TxQ index %u " 1540 "is already mapped to HW index (requesting is " 1541 "%u, existing is %u).", 1542 port_id, dpdk_idx, hw_idx, ext_txq->hw_id); 1543 rte_errno = EEXIST; 1544 return -rte_errno; 1545 } 1546 DRV_LOG(WARNING, "Port %u external TxQ index %u " 1547 "is already mapped to the requested HW index (%u)", 1548 port_id, dpdk_idx, hw_idx); 1549 1550 } else { 1551 ext_txq->hw_id = hw_idx; 1552 DRV_LOG(DEBUG, "Port %u external TxQ index %u " 1553 "is successfully mapped to the requested HW index (%u)", 1554 port_id, dpdk_idx, hw_idx); 1555 } 1556 return 0; 1557 } 1558 1559 int 1560 rte_pmd_mlx5_external_tx_queue_id_unmap(uint16_t port_id, uint16_t dpdk_idx) 1561 { 1562 struct mlx5_external_q *ext_txq; 1563 uint32_t mapped = 1; 1564 1565 ext_txq = mlx5_external_tx_queue_get_validate(port_id, dpdk_idx); 1566 if (ext_txq == NULL) 1567 return -rte_errno; 1568 if (ext_txq->refcnt > 1) { 1569 DRV_LOG(ERR, "Port %u external TxQ index %u still referenced.", 1570 port_id, dpdk_idx); 1571 rte_errno = EINVAL; 1572 return -rte_errno; 1573 } 1574 if (!rte_atomic_compare_exchange_strong_explicit(&ext_txq->refcnt, &mapped, 0, 1575 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 1576 DRV_LOG(ERR, "Port %u external TxQ index %u doesn't exist.", 1577 port_id, dpdk_idx); 1578 rte_errno = EINVAL; 1579 return -rte_errno; 1580 } 1581 DRV_LOG(DEBUG, 1582 "Port %u external TxQ index %u is successfully unmapped.", 1583 port_id, dpdk_idx); 1584 return 0; 1585 } 1586