1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_common.h> 17 #include <rte_eal_paging.h> 18 19 #include <mlx5_glue.h> 20 #include <mlx5_devx_cmds.h> 21 #include <mlx5_common.h> 22 #include <mlx5_common_mr.h> 23 #include <mlx5_common_os.h> 24 #include <mlx5_malloc.h> 25 26 #include "mlx5_defs.h" 27 #include "mlx5_utils.h" 28 #include "mlx5.h" 29 #include "mlx5_rxtx.h" 30 #include "mlx5_autoconf.h" 31 32 /** 33 * Allocate TX queue elements. 34 * 35 * @param txq_ctrl 36 * Pointer to TX queue structure. 37 */ 38 void 39 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 40 { 41 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 42 unsigned int i; 43 44 for (i = 0; (i != elts_n); ++i) 45 txq_ctrl->txq.elts[i] = NULL; 46 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 47 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 48 txq_ctrl->txq.elts_head = 0; 49 txq_ctrl->txq.elts_tail = 0; 50 txq_ctrl->txq.elts_comp = 0; 51 } 52 53 /** 54 * Free TX queue elements. 55 * 56 * @param txq_ctrl 57 * Pointer to TX queue structure. 58 */ 59 void 60 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 61 { 62 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 63 const uint16_t elts_m = elts_n - 1; 64 uint16_t elts_head = txq_ctrl->txq.elts_head; 65 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 66 struct rte_mbuf *(*elts)[elts_n] = &txq_ctrl->txq.elts; 67 68 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 69 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 70 txq_ctrl->txq.elts_head = 0; 71 txq_ctrl->txq.elts_tail = 0; 72 txq_ctrl->txq.elts_comp = 0; 73 74 while (elts_tail != elts_head) { 75 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 76 77 MLX5_ASSERT(elt != NULL); 78 rte_pktmbuf_free_seg(elt); 79 #ifdef RTE_LIBRTE_MLX5_DEBUG 80 /* Poisoning. */ 81 memset(&(*elts)[elts_tail & elts_m], 82 0x77, 83 sizeof((*elts)[elts_tail & elts_m])); 84 #endif 85 ++elts_tail; 86 } 87 } 88 89 /** 90 * Returns the per-port supported offloads. 91 * 92 * @param dev 93 * Pointer to Ethernet device. 94 * 95 * @return 96 * Supported Tx offloads. 97 */ 98 uint64_t 99 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 100 { 101 struct mlx5_priv *priv = dev->data->dev_private; 102 uint64_t offloads = (DEV_TX_OFFLOAD_MULTI_SEGS | 103 DEV_TX_OFFLOAD_VLAN_INSERT); 104 struct mlx5_dev_config *config = &priv->config; 105 106 if (config->hw_csum) 107 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | 108 DEV_TX_OFFLOAD_UDP_CKSUM | 109 DEV_TX_OFFLOAD_TCP_CKSUM); 110 if (config->tso) 111 offloads |= DEV_TX_OFFLOAD_TCP_TSO; 112 if (config->tx_pp) 113 offloads |= DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP; 114 if (config->swp) { 115 if (config->hw_csum) 116 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 117 if (config->tso) 118 offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO | 119 DEV_TX_OFFLOAD_UDP_TNL_TSO); 120 } 121 if (config->tunnel_en) { 122 if (config->hw_csum) 123 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 124 if (config->tso) 125 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 126 DEV_TX_OFFLOAD_GRE_TNL_TSO | 127 DEV_TX_OFFLOAD_GENEVE_TNL_TSO); 128 } 129 return offloads; 130 } 131 132 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 133 static void 134 txq_sync_cq(struct mlx5_txq_data *txq) 135 { 136 volatile struct mlx5_cqe *cqe; 137 int ret, i; 138 139 i = txq->cqe_s; 140 do { 141 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 142 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 143 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 144 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 145 /* No new CQEs in completion queue. */ 146 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 147 break; 148 } 149 } 150 ++txq->cq_ci; 151 } while (--i); 152 /* Move all CQEs to HW ownership. */ 153 for (i = 0; i < txq->cqe_s; i++) { 154 cqe = &txq->cqes[i]; 155 cqe->op_own = MLX5_CQE_INVALIDATE; 156 } 157 /* Resync CQE and WQE (WQ in reset state). */ 158 rte_cio_wmb(); 159 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 160 rte_cio_wmb(); 161 } 162 163 /** 164 * Tx queue stop. Device queue goes to the idle state, 165 * all involved mbufs are freed from elts/WQ. 166 * 167 * @param dev 168 * Pointer to Ethernet device structure. 169 * @param idx 170 * Tx queue index. 171 * 172 * @return 173 * 0 on success, a negative errno value otherwise and rte_errno is set. 174 */ 175 int 176 mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 177 { 178 struct mlx5_priv *priv = dev->data->dev_private; 179 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 180 struct mlx5_txq_ctrl *txq_ctrl = 181 container_of(txq, struct mlx5_txq_ctrl, txq); 182 int ret; 183 184 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 185 /* Move QP to RESET state. */ 186 if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 187 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 188 189 /* Change queue state to reset with DevX. */ 190 msq_attr.sq_state = MLX5_SQC_STATE_RDY; 191 msq_attr.state = MLX5_SQC_STATE_RST; 192 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, 193 &msq_attr); 194 if (ret) { 195 DRV_LOG(ERR, "Cannot change the " 196 "Tx QP state to RESET %s", 197 strerror(errno)); 198 rte_errno = errno; 199 return ret; 200 } 201 } else { 202 struct ibv_qp_attr mod = { 203 .qp_state = IBV_QPS_RESET, 204 .port_num = (uint8_t)priv->dev_port, 205 }; 206 struct ibv_qp *qp = txq_ctrl->obj->qp; 207 208 /* Change queue state to reset with Verbs. */ 209 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 210 if (ret) { 211 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " 212 "%s", strerror(errno)); 213 rte_errno = errno; 214 return ret; 215 } 216 } 217 /* Handle all send completions. */ 218 txq_sync_cq(txq); 219 /* Free elts stored in the SQ. */ 220 txq_free_elts(txq_ctrl); 221 /* Prevent writing new pkts to SQ by setting no free WQE.*/ 222 txq->wqe_ci = txq->wqe_s; 223 txq->wqe_pi = 0; 224 txq->elts_comp = 0; 225 /* Set the actual queue state. */ 226 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 227 return 0; 228 } 229 230 /** 231 * Tx queue stop. Device queue goes to the idle state, 232 * all involved mbufs are freed from elts/WQ. 233 * 234 * @param dev 235 * Pointer to Ethernet device structure. 236 * @param idx 237 * Tx queue index. 238 * 239 * @return 240 * 0 on success, a negative errno value otherwise and rte_errno is set. 241 */ 242 int 243 mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 244 { 245 int ret; 246 247 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_HAIRPIN) { 248 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 249 rte_errno = EINVAL; 250 return -EINVAL; 251 } 252 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 253 return 0; 254 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 255 ret = mlx5_mp_os_req_queue_control(dev, idx, 256 MLX5_MP_REQ_QUEUE_TX_STOP); 257 } else { 258 ret = mlx5_tx_queue_stop_primary(dev, idx); 259 } 260 return ret; 261 } 262 263 /** 264 * Rx queue start. Device queue goes to the ready state, 265 * all required mbufs are allocated and WQ is replenished. 266 * 267 * @param dev 268 * Pointer to Ethernet device structure. 269 * @param idx 270 * RX queue index. 271 * 272 * @return 273 * 0 on success, a negative errno value otherwise and rte_errno is set. 274 */ 275 int 276 mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 277 { 278 struct mlx5_priv *priv = dev->data->dev_private; 279 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 280 struct mlx5_txq_ctrl *txq_ctrl = 281 container_of(txq, struct mlx5_txq_ctrl, txq); 282 int ret; 283 284 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 285 if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 286 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 287 struct mlx5_txq_obj *obj = txq_ctrl->obj; 288 289 msq_attr.sq_state = MLX5_SQC_STATE_RDY; 290 msq_attr.state = MLX5_SQC_STATE_RST; 291 ret = mlx5_devx_cmd_modify_sq(obj->sq_devx, &msq_attr); 292 if (ret) { 293 rte_errno = errno; 294 DRV_LOG(ERR, 295 "Cannot change the Tx QP state to RESET " 296 "%s", strerror(errno)); 297 return ret; 298 } 299 msq_attr.sq_state = MLX5_SQC_STATE_RST; 300 msq_attr.state = MLX5_SQC_STATE_RDY; 301 ret = mlx5_devx_cmd_modify_sq(obj->sq_devx, &msq_attr); 302 if (ret) { 303 rte_errno = errno; 304 DRV_LOG(ERR, 305 "Cannot change the Tx QP state to READY " 306 "%s", strerror(errno)); 307 return ret; 308 } 309 } else { 310 struct ibv_qp_attr mod = { 311 .qp_state = IBV_QPS_RESET, 312 .port_num = (uint8_t)priv->dev_port, 313 }; 314 struct ibv_qp *qp = txq_ctrl->obj->qp; 315 316 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 317 if (ret) { 318 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " 319 "%s", strerror(errno)); 320 rte_errno = errno; 321 return ret; 322 } 323 mod.qp_state = IBV_QPS_INIT; 324 ret = mlx5_glue->modify_qp(qp, &mod, 325 (IBV_QP_STATE | IBV_QP_PORT)); 326 if (ret) { 327 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", 328 strerror(errno)); 329 rte_errno = errno; 330 return ret; 331 } 332 mod.qp_state = IBV_QPS_RTR; 333 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 334 if (ret) { 335 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s", 336 strerror(errno)); 337 rte_errno = errno; 338 return ret; 339 } 340 mod.qp_state = IBV_QPS_RTS; 341 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 342 if (ret) { 343 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s", 344 strerror(errno)); 345 rte_errno = errno; 346 return ret; 347 } 348 } 349 txq_ctrl->txq.wqe_ci = 0; 350 txq_ctrl->txq.wqe_pi = 0; 351 txq_ctrl->txq.elts_comp = 0; 352 /* Set the actual queue state. */ 353 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 354 return 0; 355 } 356 357 /** 358 * Rx queue start. Device queue goes to the ready state, 359 * all required mbufs are allocated and WQ is replenished. 360 * 361 * @param dev 362 * Pointer to Ethernet device structure. 363 * @param idx 364 * RX queue index. 365 * 366 * @return 367 * 0 on success, a negative errno value otherwise and rte_errno is set. 368 */ 369 int 370 mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 371 { 372 int ret; 373 374 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_HAIRPIN) { 375 DRV_LOG(ERR, "Hairpin queue can't be started"); 376 rte_errno = EINVAL; 377 return -EINVAL; 378 } 379 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 380 return 0; 381 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 382 ret = mlx5_mp_os_req_queue_control(dev, idx, 383 MLX5_MP_REQ_QUEUE_TX_START); 384 } else { 385 ret = mlx5_tx_queue_start_primary(dev, idx); 386 } 387 return ret; 388 } 389 390 /** 391 * Tx queue presetup checks. 392 * 393 * @param dev 394 * Pointer to Ethernet device structure. 395 * @param idx 396 * Tx queue index. 397 * @param desc 398 * Number of descriptors to configure in queue. 399 * 400 * @return 401 * 0 on success, a negative errno value otherwise and rte_errno is set. 402 */ 403 static int 404 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc) 405 { 406 struct mlx5_priv *priv = dev->data->dev_private; 407 408 if (*desc <= MLX5_TX_COMP_THRESH) { 409 DRV_LOG(WARNING, 410 "port %u number of descriptors requested for Tx queue" 411 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 412 " instead of %u", dev->data->port_id, idx, 413 MLX5_TX_COMP_THRESH + 1, *desc); 414 *desc = MLX5_TX_COMP_THRESH + 1; 415 } 416 if (!rte_is_power_of_2(*desc)) { 417 *desc = 1 << log2above(*desc); 418 DRV_LOG(WARNING, 419 "port %u increased number of descriptors in Tx queue" 420 " %u to the next power of two (%d)", 421 dev->data->port_id, idx, *desc); 422 } 423 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 424 dev->data->port_id, idx, *desc); 425 if (idx >= priv->txqs_n) { 426 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 427 dev->data->port_id, idx, priv->txqs_n); 428 rte_errno = EOVERFLOW; 429 return -rte_errno; 430 } 431 if (!mlx5_txq_releasable(dev, idx)) { 432 rte_errno = EBUSY; 433 DRV_LOG(ERR, "port %u unable to release queue index %u", 434 dev->data->port_id, idx); 435 return -rte_errno; 436 } 437 mlx5_txq_release(dev, idx); 438 return 0; 439 } 440 /** 441 * DPDK callback to configure a TX queue. 442 * 443 * @param dev 444 * Pointer to Ethernet device structure. 445 * @param idx 446 * TX queue index. 447 * @param desc 448 * Number of descriptors to configure in queue. 449 * @param socket 450 * NUMA socket on which memory must be allocated. 451 * @param[in] conf 452 * Thresholds parameters. 453 * 454 * @return 455 * 0 on success, a negative errno value otherwise and rte_errno is set. 456 */ 457 int 458 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 459 unsigned int socket, const struct rte_eth_txconf *conf) 460 { 461 struct mlx5_priv *priv = dev->data->dev_private; 462 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 463 struct mlx5_txq_ctrl *txq_ctrl = 464 container_of(txq, struct mlx5_txq_ctrl, txq); 465 int res; 466 467 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 468 if (res) 469 return res; 470 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 471 if (!txq_ctrl) { 472 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 473 dev->data->port_id, idx); 474 return -rte_errno; 475 } 476 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 477 dev->data->port_id, idx); 478 (*priv->txqs)[idx] = &txq_ctrl->txq; 479 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 480 return 0; 481 } 482 483 /** 484 * DPDK callback to configure a TX hairpin queue. 485 * 486 * @param dev 487 * Pointer to Ethernet device structure. 488 * @param idx 489 * TX queue index. 490 * @param desc 491 * Number of descriptors to configure in queue. 492 * @param[in] hairpin_conf 493 * The hairpin binding configuration. 494 * 495 * @return 496 * 0 on success, a negative errno value otherwise and rte_errno is set. 497 */ 498 int 499 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 500 uint16_t desc, 501 const struct rte_eth_hairpin_conf *hairpin_conf) 502 { 503 struct mlx5_priv *priv = dev->data->dev_private; 504 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 505 struct mlx5_txq_ctrl *txq_ctrl = 506 container_of(txq, struct mlx5_txq_ctrl, txq); 507 int res; 508 509 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 510 if (res) 511 return res; 512 if (hairpin_conf->peer_count != 1 || 513 hairpin_conf->peers[0].port != dev->data->port_id || 514 hairpin_conf->peers[0].queue >= priv->rxqs_n) { 515 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u " 516 " invalid hairpind configuration", dev->data->port_id, 517 idx); 518 rte_errno = EINVAL; 519 return -rte_errno; 520 } 521 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 522 if (!txq_ctrl) { 523 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 524 dev->data->port_id, idx); 525 return -rte_errno; 526 } 527 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 528 dev->data->port_id, idx); 529 (*priv->txqs)[idx] = &txq_ctrl->txq; 530 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN; 531 return 0; 532 } 533 534 /** 535 * DPDK callback to release a TX queue. 536 * 537 * @param dpdk_txq 538 * Generic TX queue pointer. 539 */ 540 void 541 mlx5_tx_queue_release(void *dpdk_txq) 542 { 543 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 544 struct mlx5_txq_ctrl *txq_ctrl; 545 struct mlx5_priv *priv; 546 unsigned int i; 547 548 if (txq == NULL) 549 return; 550 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 551 priv = txq_ctrl->priv; 552 for (i = 0; (i != priv->txqs_n); ++i) 553 if ((*priv->txqs)[i] == txq) { 554 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 555 PORT_ID(priv), txq->idx); 556 mlx5_txq_release(ETH_DEV(priv), i); 557 break; 558 } 559 } 560 561 /** 562 * Configure the doorbell register non-cached attribute. 563 * 564 * @param txq_ctrl 565 * Pointer to Tx queue control structure. 566 * @param page_size 567 * Systme page size 568 */ 569 static void 570 txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size) 571 { 572 struct mlx5_priv *priv = txq_ctrl->priv; 573 off_t cmd; 574 575 txq_ctrl->txq.db_heu = priv->config.dbnc == MLX5_TXDB_HEURISTIC; 576 txq_ctrl->txq.db_nc = 0; 577 /* Check the doorbell register mapping type. */ 578 cmd = txq_ctrl->uar_mmap_offset / page_size; 579 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; 580 cmd &= MLX5_UAR_MMAP_CMD_MASK; 581 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) 582 txq_ctrl->txq.db_nc = 1; 583 } 584 585 /** 586 * Initialize Tx UAR registers for primary process. 587 * 588 * @param txq_ctrl 589 * Pointer to Tx queue control structure. 590 */ 591 static void 592 txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) 593 { 594 struct mlx5_priv *priv = txq_ctrl->priv; 595 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 596 #ifndef RTE_ARCH_64 597 unsigned int lock_idx; 598 #endif 599 const size_t page_size = rte_mem_page_size(); 600 if (page_size == (size_t)-1) { 601 DRV_LOG(ERR, "Failed to get mem page size"); 602 rte_errno = ENOMEM; 603 } 604 605 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 606 return; 607 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 608 MLX5_ASSERT(ppriv); 609 ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; 610 txq_uar_ncattr_init(txq_ctrl, page_size); 611 #ifndef RTE_ARCH_64 612 /* Assign an UAR lock according to UAR page number */ 613 lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & 614 MLX5_UAR_PAGE_NUM_MASK; 615 txq_ctrl->txq.uar_lock = &priv->sh->uar_lock[lock_idx]; 616 #endif 617 } 618 619 /** 620 * Remap UAR register of a Tx queue for secondary process. 621 * 622 * Remapped address is stored at the table in the process private structure of 623 * the device, indexed by queue index. 624 * 625 * @param txq_ctrl 626 * Pointer to Tx queue control structure. 627 * @param fd 628 * Verbs file descriptor to map UAR pages. 629 * 630 * @return 631 * 0 on success, a negative errno value otherwise and rte_errno is set. 632 */ 633 static int 634 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 635 { 636 struct mlx5_priv *priv = txq_ctrl->priv; 637 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 638 struct mlx5_txq_data *txq = &txq_ctrl->txq; 639 void *addr; 640 uintptr_t uar_va; 641 uintptr_t offset; 642 const size_t page_size = rte_mem_page_size(); 643 if (page_size == (size_t)-1) { 644 DRV_LOG(ERR, "Failed to get mem page size"); 645 rte_errno = ENOMEM; 646 return -rte_errno; 647 } 648 649 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 650 return 0; 651 MLX5_ASSERT(ppriv); 652 /* 653 * As rdma-core, UARs are mapped in size of OS page 654 * size. Ref to libmlx5 function: mlx5_init_context() 655 */ 656 uar_va = (uintptr_t)txq_ctrl->bf_reg; 657 offset = uar_va & (page_size - 1); /* Offset in page. */ 658 addr = rte_mem_map(NULL, page_size, RTE_PROT_WRITE, RTE_MAP_SHARED, 659 fd, txq_ctrl->uar_mmap_offset); 660 if (!addr) { 661 DRV_LOG(ERR, 662 "port %u mmap failed for BF reg of txq %u", 663 txq->port_id, txq->idx); 664 rte_errno = ENXIO; 665 return -rte_errno; 666 } 667 addr = RTE_PTR_ADD(addr, offset); 668 ppriv->uar_table[txq->idx] = addr; 669 txq_uar_ncattr_init(txq_ctrl, page_size); 670 return 0; 671 } 672 673 /** 674 * Unmap UAR register of a Tx queue for secondary process. 675 * 676 * @param txq_ctrl 677 * Pointer to Tx queue control structure. 678 */ 679 static void 680 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 681 { 682 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 683 void *addr; 684 const size_t page_size = rte_mem_page_size(); 685 if (page_size == (size_t)-1) { 686 DRV_LOG(ERR, "Failed to get mem page size"); 687 rte_errno = ENOMEM; 688 } 689 690 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 691 return; 692 addr = ppriv->uar_table[txq_ctrl->txq.idx]; 693 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 694 } 695 696 /** 697 * Deinitialize Tx UAR registers for secondary process. 698 * 699 * @param dev 700 * Pointer to Ethernet device. 701 */ 702 void 703 mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev) 704 { 705 struct mlx5_priv *priv = dev->data->dev_private; 706 struct mlx5_txq_data *txq; 707 struct mlx5_txq_ctrl *txq_ctrl; 708 unsigned int i; 709 710 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 711 for (i = 0; i != priv->txqs_n; ++i) { 712 if (!(*priv->txqs)[i]) 713 continue; 714 txq = (*priv->txqs)[i]; 715 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 716 txq_uar_uninit_secondary(txq_ctrl); 717 } 718 } 719 720 /** 721 * Initialize Tx UAR registers for secondary process. 722 * 723 * @param dev 724 * Pointer to Ethernet device. 725 * @param fd 726 * Verbs file descriptor to map UAR pages. 727 * 728 * @return 729 * 0 on success, a negative errno value otherwise and rte_errno is set. 730 */ 731 int 732 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 733 { 734 struct mlx5_priv *priv = dev->data->dev_private; 735 struct mlx5_txq_data *txq; 736 struct mlx5_txq_ctrl *txq_ctrl; 737 unsigned int i; 738 int ret; 739 740 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 741 for (i = 0; i != priv->txqs_n; ++i) { 742 if (!(*priv->txqs)[i]) 743 continue; 744 txq = (*priv->txqs)[i]; 745 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 746 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 747 continue; 748 MLX5_ASSERT(txq->idx == (uint16_t)i); 749 ret = txq_uar_init_secondary(txq_ctrl, fd); 750 if (ret) 751 goto error; 752 } 753 return 0; 754 error: 755 /* Rollback. */ 756 do { 757 if (!(*priv->txqs)[i]) 758 continue; 759 txq = (*priv->txqs)[i]; 760 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 761 txq_uar_uninit_secondary(txq_ctrl); 762 } while (i--); 763 return -rte_errno; 764 } 765 766 /** 767 * Create the Tx hairpin queue object. 768 * 769 * @param dev 770 * Pointer to Ethernet device. 771 * @param idx 772 * Queue index in DPDK Tx queue array 773 * 774 * @return 775 * The hairpin DevX object initialised, NULL otherwise and rte_errno is set. 776 */ 777 static struct mlx5_txq_obj * 778 mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) 779 { 780 struct mlx5_priv *priv = dev->data->dev_private; 781 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 782 struct mlx5_txq_ctrl *txq_ctrl = 783 container_of(txq_data, struct mlx5_txq_ctrl, txq); 784 struct mlx5_devx_create_sq_attr attr = { 0 }; 785 struct mlx5_txq_obj *tmpl = NULL; 786 uint32_t max_wq_data; 787 788 MLX5_ASSERT(txq_data); 789 MLX5_ASSERT(!txq_ctrl->obj); 790 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 791 txq_ctrl->socket); 792 if (!tmpl) { 793 DRV_LOG(ERR, 794 "port %u Tx queue %u cannot allocate memory resources", 795 dev->data->port_id, txq_data->idx); 796 rte_errno = ENOMEM; 797 return NULL; 798 } 799 tmpl->type = MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN; 800 tmpl->txq_ctrl = txq_ctrl; 801 attr.hairpin = 1; 802 attr.tis_lst_sz = 1; 803 max_wq_data = priv->config.hca_attr.log_max_hairpin_wq_data_sz; 804 /* Jumbo frames > 9KB should be supported, and more packets. */ 805 if (priv->config.log_hp_size != (uint32_t)MLX5_ARG_UNSET) { 806 if (priv->config.log_hp_size > max_wq_data) { 807 DRV_LOG(ERR, "total data size %u power of 2 is " 808 "too large for hairpin", 809 priv->config.log_hp_size); 810 mlx5_free(tmpl); 811 rte_errno = ERANGE; 812 return NULL; 813 } 814 attr.wq_attr.log_hairpin_data_sz = priv->config.log_hp_size; 815 } else { 816 attr.wq_attr.log_hairpin_data_sz = 817 (max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ? 818 max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE; 819 } 820 /* Set the packets number to the maximum value for performance. */ 821 attr.wq_attr.log_hairpin_num_packets = 822 attr.wq_attr.log_hairpin_data_sz - 823 MLX5_HAIRPIN_QUEUE_STRIDE; 824 attr.tis_num = priv->sh->tis->id; 825 tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr); 826 if (!tmpl->sq) { 827 DRV_LOG(ERR, 828 "port %u tx hairpin queue %u can't create sq object", 829 dev->data->port_id, idx); 830 mlx5_free(tmpl); 831 rte_errno = errno; 832 return NULL; 833 } 834 DRV_LOG(DEBUG, "port %u sxq %u updated with %p", dev->data->port_id, 835 idx, (void *)&tmpl); 836 rte_atomic32_inc(&tmpl->refcnt); 837 LIST_INSERT_HEAD(&priv->txqsobj, tmpl, next); 838 return tmpl; 839 } 840 841 /** 842 * Destroy the Tx queue DevX object. 843 * 844 * @param txq_obj 845 * Txq object to destroy 846 */ 847 static void 848 txq_release_sq_resources(struct mlx5_txq_obj *txq_obj) 849 { 850 MLX5_ASSERT(txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ); 851 852 if (txq_obj->sq_devx) 853 claim_zero(mlx5_devx_cmd_destroy(txq_obj->sq_devx)); 854 if (txq_obj->sq_dbrec_page) 855 claim_zero(mlx5_release_dbr 856 (&txq_obj->txq_ctrl->priv->dbrpgs, 857 mlx5_os_get_umem_id 858 (txq_obj->sq_dbrec_page->umem), 859 txq_obj->sq_dbrec_offset)); 860 if (txq_obj->sq_umem) 861 claim_zero(mlx5_glue->devx_umem_dereg(txq_obj->sq_umem)); 862 if (txq_obj->sq_buf) 863 mlx5_free(txq_obj->sq_buf); 864 if (txq_obj->cq_devx) 865 claim_zero(mlx5_devx_cmd_destroy(txq_obj->cq_devx)); 866 if (txq_obj->cq_dbrec_page) 867 claim_zero(mlx5_release_dbr 868 (&txq_obj->txq_ctrl->priv->dbrpgs, 869 mlx5_os_get_umem_id 870 (txq_obj->cq_dbrec_page->umem), 871 txq_obj->cq_dbrec_offset)); 872 if (txq_obj->cq_umem) 873 claim_zero(mlx5_glue->devx_umem_dereg(txq_obj->cq_umem)); 874 if (txq_obj->cq_buf) 875 mlx5_free(txq_obj->cq_buf); 876 } 877 878 /** 879 * Create the Tx queue DevX object. 880 * 881 * @param dev 882 * Pointer to Ethernet device. 883 * @param idx 884 * Queue index in DPDK Tx queue array 885 * 886 * @return 887 * The DevX object initialised, NULL otherwise and rte_errno is set. 888 */ 889 static struct mlx5_txq_obj * 890 mlx5_txq_obj_devx_new(struct rte_eth_dev *dev, uint16_t idx) 891 { 892 #ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET 893 DRV_LOG(ERR, "port %u Tx queue %u cannot create with DevX, no UAR", 894 dev->data->port_id, idx); 895 rte_errno = ENOMEM; 896 return NULL; 897 #else 898 struct mlx5_priv *priv = dev->data->dev_private; 899 struct mlx5_dev_ctx_shared *sh = priv->sh; 900 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 901 struct mlx5_txq_ctrl *txq_ctrl = 902 container_of(txq_data, struct mlx5_txq_ctrl, txq); 903 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 904 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 905 struct mlx5_devx_cq_attr cq_attr = { 0 }; 906 struct mlx5_txq_obj *txq_obj = NULL; 907 size_t page_size; 908 struct mlx5_cqe *cqe; 909 uint32_t i, nqe; 910 size_t alignment = (size_t)-1; 911 int ret = 0; 912 913 MLX5_ASSERT(txq_data); 914 MLX5_ASSERT(!txq_ctrl->obj); 915 page_size = rte_mem_page_size(); 916 if (page_size == (size_t)-1) { 917 DRV_LOG(ERR, "Failed to get mem page size"); 918 rte_errno = ENOMEM; 919 return NULL; 920 } 921 txq_obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 922 sizeof(struct mlx5_txq_obj), 0, 923 txq_ctrl->socket); 924 if (!txq_obj) { 925 DRV_LOG(ERR, 926 "port %u Tx queue %u cannot allocate memory resources", 927 dev->data->port_id, txq_data->idx); 928 rte_errno = ENOMEM; 929 return NULL; 930 } 931 txq_obj->type = MLX5_TXQ_OBJ_TYPE_DEVX_SQ; 932 txq_obj->txq_ctrl = txq_ctrl; 933 txq_obj->dev = dev; 934 /* Create the Completion Queue. */ 935 nqe = (1UL << txq_data->elts_n) / MLX5_TX_COMP_THRESH + 936 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 937 nqe = 1UL << log2above(nqe); 938 if (nqe > UINT16_MAX) { 939 DRV_LOG(ERR, 940 "port %u Tx queue %u requests to many CQEs %u", 941 dev->data->port_id, txq_data->idx, nqe); 942 rte_errno = EINVAL; 943 goto error; 944 } 945 /* Allocate memory buffer for CQEs. */ 946 alignment = MLX5_CQE_BUF_ALIGNMENT; 947 if (alignment == (size_t)-1) { 948 DRV_LOG(ERR, "Failed to get mem page size"); 949 rte_errno = ENOMEM; 950 goto error; 951 } 952 txq_obj->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 953 nqe * sizeof(struct mlx5_cqe), 954 alignment, 955 sh->numa_node); 956 if (!txq_obj->cq_buf) { 957 DRV_LOG(ERR, 958 "port %u Tx queue %u cannot allocate memory (CQ)", 959 dev->data->port_id, txq_data->idx); 960 rte_errno = ENOMEM; 961 goto error; 962 } 963 txq_data->cqe_n = log2above(nqe); 964 txq_data->cqe_s = 1 << txq_data->cqe_n; 965 txq_data->cqe_m = txq_data->cqe_s - 1; 966 txq_data->cqes = (volatile struct mlx5_cqe *)txq_obj->cq_buf; 967 txq_data->cq_ci = 0; 968 txq_data->cq_pi = 0; 969 /* Register allocated buffer in user space with DevX. */ 970 txq_obj->cq_umem = mlx5_glue->devx_umem_reg 971 (sh->ctx, 972 (void *)txq_obj->cq_buf, 973 nqe * sizeof(struct mlx5_cqe), 974 IBV_ACCESS_LOCAL_WRITE); 975 if (!txq_obj->cq_umem) { 976 rte_errno = errno; 977 DRV_LOG(ERR, 978 "port %u Tx queue %u cannot register memory (CQ)", 979 dev->data->port_id, txq_data->idx); 980 goto error; 981 } 982 /* Allocate doorbell record for completion queue. */ 983 txq_obj->cq_dbrec_offset = mlx5_get_dbr(sh->ctx, 984 &priv->dbrpgs, 985 &txq_obj->cq_dbrec_page); 986 if (txq_obj->cq_dbrec_offset < 0) 987 goto error; 988 txq_data->cq_db = (volatile uint32_t *)(txq_obj->cq_dbrec_page->dbrs + 989 txq_obj->cq_dbrec_offset); 990 *txq_data->cq_db = 0; 991 /* Create completion queue object with DevX. */ 992 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 993 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 994 cq_attr.uar_page_id = sh->tx_uar->page_id; 995 cq_attr.eqn = sh->txpp.eqn; 996 cq_attr.q_umem_valid = 1; 997 cq_attr.q_umem_offset = (uintptr_t)txq_obj->cq_buf % page_size; 998 cq_attr.q_umem_id = txq_obj->cq_umem->umem_id; 999 cq_attr.db_umem_valid = 1; 1000 cq_attr.db_umem_offset = txq_obj->cq_dbrec_offset; 1001 cq_attr.db_umem_id = mlx5_os_get_umem_id(txq_obj->cq_dbrec_page->umem); 1002 cq_attr.log_cq_size = rte_log2_u32(nqe); 1003 cq_attr.log_page_size = rte_log2_u32(page_size); 1004 txq_obj->cq_devx = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 1005 if (!txq_obj->cq_devx) { 1006 rte_errno = errno; 1007 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 1008 dev->data->port_id, idx); 1009 goto error; 1010 } 1011 /* Initial fill CQ buffer with invalid CQE opcode. */ 1012 cqe = (struct mlx5_cqe *)txq_obj->cq_buf; 1013 for (i = 0; i < txq_data->cqe_s; i++) { 1014 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 1015 ++cqe; 1016 } 1017 /* Create the Work Queue. */ 1018 nqe = RTE_MIN(1UL << txq_data->elts_n, 1019 (uint32_t)sh->device_attr.max_qp_wr); 1020 txq_obj->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1021 nqe * sizeof(struct mlx5_wqe), 1022 page_size, sh->numa_node); 1023 if (!txq_obj->sq_buf) { 1024 DRV_LOG(ERR, 1025 "port %u Tx queue %u cannot allocate memory (SQ)", 1026 dev->data->port_id, txq_data->idx); 1027 rte_errno = ENOMEM; 1028 goto error; 1029 } 1030 txq_data->wqe_n = log2above(nqe); 1031 txq_data->wqe_s = 1 << txq_data->wqe_n; 1032 txq_data->wqe_m = txq_data->wqe_s - 1; 1033 txq_data->wqes = (struct mlx5_wqe *)txq_obj->sq_buf; 1034 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 1035 txq_data->wqe_ci = 0; 1036 txq_data->wqe_pi = 0; 1037 txq_data->wqe_comp = 0; 1038 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 1039 /* Register allocated buffer in user space with DevX. */ 1040 txq_obj->sq_umem = mlx5_glue->devx_umem_reg 1041 (sh->ctx, 1042 (void *)txq_obj->sq_buf, 1043 nqe * sizeof(struct mlx5_wqe), 1044 IBV_ACCESS_LOCAL_WRITE); 1045 if (!txq_obj->sq_umem) { 1046 rte_errno = errno; 1047 DRV_LOG(ERR, 1048 "port %u Tx queue %u cannot register memory (SQ)", 1049 dev->data->port_id, txq_data->idx); 1050 goto error; 1051 } 1052 /* Allocate doorbell record for completion queue. */ 1053 txq_obj->cq_dbrec_offset = mlx5_get_dbr(sh->ctx, 1054 &priv->dbrpgs, 1055 &txq_obj->sq_dbrec_page); 1056 if (txq_obj->sq_dbrec_offset < 0) 1057 goto error; 1058 txq_data->qp_db = (volatile uint32_t *) 1059 (txq_obj->sq_dbrec_page->dbrs + 1060 txq_obj->sq_dbrec_offset + 1061 MLX5_SND_DBR * sizeof(uint32_t)); 1062 *txq_data->qp_db = 0; 1063 /* Create Send Queue object with DevX. */ 1064 sq_attr.tis_lst_sz = 1; 1065 sq_attr.tis_num = sh->tis->id; 1066 sq_attr.state = MLX5_SQC_STATE_RST; 1067 sq_attr.cqn = txq_obj->cq_devx->id; 1068 sq_attr.flush_in_error_en = 1; 1069 sq_attr.allow_multi_pkt_send_wqe = !!priv->config.mps; 1070 sq_attr.allow_swp = !!priv->config.swp; 1071 sq_attr.min_wqe_inline_mode = priv->config.hca_attr.vport_inline_mode; 1072 sq_attr.wq_attr.uar_page = sh->tx_uar->page_id; 1073 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 1074 sq_attr.wq_attr.pd = sh->pdn; 1075 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 1076 sq_attr.wq_attr.log_wq_sz = txq_data->wqe_n; 1077 sq_attr.wq_attr.dbr_umem_valid = 1; 1078 sq_attr.wq_attr.dbr_addr = txq_obj->cq_dbrec_offset; 1079 sq_attr.wq_attr.dbr_umem_id = 1080 mlx5_os_get_umem_id(txq_obj->cq_dbrec_page->umem); 1081 sq_attr.wq_attr.wq_umem_valid = 1; 1082 sq_attr.wq_attr.wq_umem_id = txq_obj->sq_umem->umem_id; 1083 sq_attr.wq_attr.wq_umem_offset = (uintptr_t)txq_obj->sq_buf % page_size; 1084 txq_obj->sq_devx = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 1085 if (!txq_obj->sq_devx) { 1086 rte_errno = errno; 1087 DRV_LOG(ERR, "port %u Tx queue %u SQ creation failure", 1088 dev->data->port_id, idx); 1089 goto error; 1090 } 1091 txq_data->qp_num_8s = txq_obj->sq_devx->id << 8; 1092 /* Change Send Queue state to Ready-to-Send. */ 1093 msq_attr.sq_state = MLX5_SQC_STATE_RST; 1094 msq_attr.state = MLX5_SQC_STATE_RDY; 1095 ret = mlx5_devx_cmd_modify_sq(txq_obj->sq_devx, &msq_attr); 1096 if (ret) { 1097 rte_errno = errno; 1098 DRV_LOG(ERR, 1099 "port %u Tx queue %u SP state to SQC_STATE_RDY failed", 1100 dev->data->port_id, idx); 1101 goto error; 1102 } 1103 txq_data->fcqs = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1104 txq_data->cqe_s * sizeof(*txq_data->fcqs), 1105 RTE_CACHE_LINE_SIZE, 1106 txq_ctrl->socket); 1107 if (!txq_data->fcqs) { 1108 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)", 1109 dev->data->port_id, idx); 1110 rte_errno = ENOMEM; 1111 goto error; 1112 } 1113 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1114 /* 1115 * If using DevX need to query and store TIS transport domain value. 1116 * This is done once per port. 1117 * Will use this value on Rx, when creating matching TIR. 1118 */ 1119 if (priv->config.devx && !priv->sh->tdn) 1120 priv->sh->tdn = priv->sh->td->id; 1121 #endif 1122 MLX5_ASSERT(sh->tx_uar); 1123 MLX5_ASSERT(sh->tx_uar->reg_addr); 1124 txq_ctrl->bf_reg = sh->tx_uar->reg_addr; 1125 txq_ctrl->uar_mmap_offset = sh->tx_uar->mmap_off; 1126 rte_atomic32_set(&txq_obj->refcnt, 1); 1127 txq_uar_init(txq_ctrl); 1128 LIST_INSERT_HEAD(&priv->txqsobj, txq_obj, next); 1129 return txq_obj; 1130 error: 1131 ret = rte_errno; /* Save rte_errno before cleanup. */ 1132 txq_release_sq_resources(txq_obj); 1133 if (txq_data->fcqs) { 1134 mlx5_free(txq_data->fcqs); 1135 txq_data->fcqs = NULL; 1136 } 1137 mlx5_free(txq_obj); 1138 rte_errno = ret; /* Restore rte_errno. */ 1139 return NULL; 1140 #endif 1141 } 1142 1143 /** 1144 * Create the Tx queue Verbs object. 1145 * 1146 * @param dev 1147 * Pointer to Ethernet device. 1148 * @param idx 1149 * Queue index in DPDK Tx queue array. 1150 * @param type 1151 * Type of the Tx queue object to create. 1152 * 1153 * @return 1154 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1155 */ 1156 struct mlx5_txq_obj * 1157 mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, 1158 enum mlx5_txq_obj_type type) 1159 { 1160 struct mlx5_priv *priv = dev->data->dev_private; 1161 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 1162 struct mlx5_txq_ctrl *txq_ctrl = 1163 container_of(txq_data, struct mlx5_txq_ctrl, txq); 1164 struct mlx5_txq_obj tmpl; 1165 struct mlx5_txq_obj *txq_obj = NULL; 1166 union { 1167 struct ibv_qp_init_attr_ex init; 1168 struct ibv_cq_init_attr_ex cq; 1169 struct ibv_qp_attr mod; 1170 } attr; 1171 unsigned int cqe_n; 1172 struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET }; 1173 struct mlx5dv_cq cq_info; 1174 struct mlx5dv_obj obj; 1175 const int desc = 1 << txq_data->elts_n; 1176 int ret = 0; 1177 1178 if (type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) 1179 return mlx5_txq_obj_hairpin_new(dev, idx); 1180 if (type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) 1181 return mlx5_txq_obj_devx_new(dev, idx); 1182 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1183 /* If using DevX, need additional mask to read tisn value. */ 1184 if (priv->config.devx && !priv->sh->tdn) 1185 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 1186 #endif 1187 MLX5_ASSERT(txq_data); 1188 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; 1189 priv->verbs_alloc_ctx.obj = txq_ctrl; 1190 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 1191 DRV_LOG(ERR, 1192 "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set", 1193 dev->data->port_id); 1194 rte_errno = EINVAL; 1195 return NULL; 1196 } 1197 memset(&tmpl, 0, sizeof(struct mlx5_txq_obj)); 1198 attr.cq = (struct ibv_cq_init_attr_ex){ 1199 .comp_mask = 0, 1200 }; 1201 cqe_n = desc / MLX5_TX_COMP_THRESH + 1202 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 1203 tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 1204 if (tmpl.cq == NULL) { 1205 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 1206 dev->data->port_id, idx); 1207 rte_errno = errno; 1208 goto error; 1209 } 1210 attr.init = (struct ibv_qp_init_attr_ex){ 1211 /* CQ to be associated with the send queue. */ 1212 .send_cq = tmpl.cq, 1213 /* CQ to be associated with the receive queue. */ 1214 .recv_cq = tmpl.cq, 1215 .cap = { 1216 /* Max number of outstanding WRs. */ 1217 .max_send_wr = 1218 ((priv->sh->device_attr.max_qp_wr < 1219 desc) ? 1220 priv->sh->device_attr.max_qp_wr : 1221 desc), 1222 /* 1223 * Max number of scatter/gather elements in a WR, 1224 * must be 1 to prevent libmlx5 from trying to affect 1225 * too much memory. TX gather is not impacted by the 1226 * device_attr.max_sge limit and will still work 1227 * properly. 1228 */ 1229 .max_send_sge = 1, 1230 }, 1231 .qp_type = IBV_QPT_RAW_PACKET, 1232 /* 1233 * Do *NOT* enable this, completions events are managed per 1234 * Tx burst. 1235 */ 1236 .sq_sig_all = 0, 1237 .pd = priv->sh->pd, 1238 .comp_mask = IBV_QP_INIT_ATTR_PD, 1239 }; 1240 if (txq_data->inlen_send) 1241 attr.init.cap.max_inline_data = txq_ctrl->max_inline_data; 1242 if (txq_data->tso_en) { 1243 attr.init.max_tso_header = txq_ctrl->max_tso_header; 1244 attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 1245 } 1246 tmpl.qp = mlx5_glue->create_qp_ex(priv->sh->ctx, &attr.init); 1247 if (tmpl.qp == NULL) { 1248 DRV_LOG(ERR, "port %u Tx queue %u QP creation failure", 1249 dev->data->port_id, idx); 1250 rte_errno = errno; 1251 goto error; 1252 } 1253 attr.mod = (struct ibv_qp_attr){ 1254 /* Move the QP to this state. */ 1255 .qp_state = IBV_QPS_INIT, 1256 /* IB device port number. */ 1257 .port_num = (uint8_t)priv->dev_port, 1258 }; 1259 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, 1260 (IBV_QP_STATE | IBV_QP_PORT)); 1261 if (ret) { 1262 DRV_LOG(ERR, 1263 "port %u Tx queue %u QP state to IBV_QPS_INIT failed", 1264 dev->data->port_id, idx); 1265 rte_errno = errno; 1266 goto error; 1267 } 1268 attr.mod = (struct ibv_qp_attr){ 1269 .qp_state = IBV_QPS_RTR 1270 }; 1271 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 1272 if (ret) { 1273 DRV_LOG(ERR, 1274 "port %u Tx queue %u QP state to IBV_QPS_RTR failed", 1275 dev->data->port_id, idx); 1276 rte_errno = errno; 1277 goto error; 1278 } 1279 attr.mod.qp_state = IBV_QPS_RTS; 1280 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 1281 if (ret) { 1282 DRV_LOG(ERR, 1283 "port %u Tx queue %u QP state to IBV_QPS_RTS failed", 1284 dev->data->port_id, idx); 1285 rte_errno = errno; 1286 goto error; 1287 } 1288 txq_obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1289 sizeof(struct mlx5_txq_obj), 0, 1290 txq_ctrl->socket); 1291 if (!txq_obj) { 1292 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory", 1293 dev->data->port_id, idx); 1294 rte_errno = ENOMEM; 1295 goto error; 1296 } 1297 obj.cq.in = tmpl.cq; 1298 obj.cq.out = &cq_info; 1299 obj.qp.in = tmpl.qp; 1300 obj.qp.out = &qp; 1301 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 1302 if (ret != 0) { 1303 rte_errno = errno; 1304 goto error; 1305 } 1306 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 1307 DRV_LOG(ERR, 1308 "port %u wrong MLX5_CQE_SIZE environment variable" 1309 " value: it should be set to %u", 1310 dev->data->port_id, RTE_CACHE_LINE_SIZE); 1311 rte_errno = EINVAL; 1312 goto error; 1313 } 1314 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 1315 txq_data->cqe_s = 1 << txq_data->cqe_n; 1316 txq_data->cqe_m = txq_data->cqe_s - 1; 1317 txq_data->qp_num_8s = tmpl.qp->qp_num << 8; 1318 txq_data->wqes = qp.sq.buf; 1319 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 1320 txq_data->wqe_s = 1 << txq_data->wqe_n; 1321 txq_data->wqe_m = txq_data->wqe_s - 1; 1322 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 1323 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 1324 txq_data->cq_db = cq_info.dbrec; 1325 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 1326 txq_data->cq_ci = 0; 1327 txq_data->cq_pi = 0; 1328 txq_data->wqe_ci = 0; 1329 txq_data->wqe_pi = 0; 1330 txq_data->wqe_comp = 0; 1331 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 1332 txq_data->fcqs = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1333 txq_data->cqe_s * sizeof(*txq_data->fcqs), 1334 RTE_CACHE_LINE_SIZE, txq_ctrl->socket); 1335 if (!txq_data->fcqs) { 1336 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)", 1337 dev->data->port_id, idx); 1338 rte_errno = ENOMEM; 1339 goto error; 1340 } 1341 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1342 /* 1343 * If using DevX need to query and store TIS transport domain value. 1344 * This is done once per port. 1345 * Will use this value on Rx, when creating matching TIR. 1346 */ 1347 if (priv->config.devx && !priv->sh->tdn) { 1348 ret = mlx5_devx_cmd_qp_query_tis_td(tmpl.qp, qp.tisn, 1349 &priv->sh->tdn); 1350 if (ret) { 1351 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 1352 "transport domain", dev->data->port_id, idx); 1353 rte_errno = EINVAL; 1354 goto error; 1355 } else { 1356 DRV_LOG(DEBUG, "port %u Tx queue %u TIS number %d " 1357 "transport domain %d", dev->data->port_id, 1358 idx, qp.tisn, priv->sh->tdn); 1359 } 1360 } 1361 #endif 1362 txq_obj->qp = tmpl.qp; 1363 txq_obj->cq = tmpl.cq; 1364 rte_atomic32_inc(&txq_obj->refcnt); 1365 txq_ctrl->bf_reg = qp.bf.reg; 1366 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 1367 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 1368 DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%"PRIx64, 1369 dev->data->port_id, txq_ctrl->uar_mmap_offset); 1370 } else { 1371 DRV_LOG(ERR, 1372 "port %u failed to retrieve UAR info, invalid" 1373 " libmlx5.so", 1374 dev->data->port_id); 1375 rte_errno = EINVAL; 1376 goto error; 1377 } 1378 txq_uar_init(txq_ctrl); 1379 LIST_INSERT_HEAD(&priv->txqsobj, txq_obj, next); 1380 txq_obj->txq_ctrl = txq_ctrl; 1381 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1382 return txq_obj; 1383 error: 1384 ret = rte_errno; /* Save rte_errno before cleanup. */ 1385 if (tmpl.cq) 1386 claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); 1387 if (tmpl.qp) 1388 claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); 1389 if (txq_data && txq_data->fcqs) { 1390 mlx5_free(txq_data->fcqs); 1391 txq_data->fcqs = NULL; 1392 } 1393 if (txq_obj) 1394 mlx5_free(txq_obj); 1395 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1396 rte_errno = ret; /* Restore rte_errno. */ 1397 return NULL; 1398 } 1399 1400 /** 1401 * Get an Tx queue Verbs object. 1402 * 1403 * @param dev 1404 * Pointer to Ethernet device. 1405 * @param idx 1406 * Queue index in DPDK Tx queue array. 1407 * 1408 * @return 1409 * The Verbs object if it exists. 1410 */ 1411 struct mlx5_txq_obj * 1412 mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx) 1413 { 1414 struct mlx5_priv *priv = dev->data->dev_private; 1415 struct mlx5_txq_ctrl *txq_ctrl; 1416 1417 if (idx >= priv->txqs_n) 1418 return NULL; 1419 if (!(*priv->txqs)[idx]) 1420 return NULL; 1421 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1422 if (txq_ctrl->obj) 1423 rte_atomic32_inc(&txq_ctrl->obj->refcnt); 1424 return txq_ctrl->obj; 1425 } 1426 1427 /** 1428 * Release an Tx verbs queue object. 1429 * 1430 * @param txq_obj 1431 * Verbs Tx queue object. 1432 * 1433 * @return 1434 * 1 while a reference on it exists, 0 when freed. 1435 */ 1436 int 1437 mlx5_txq_obj_release(struct mlx5_txq_obj *txq_obj) 1438 { 1439 MLX5_ASSERT(txq_obj); 1440 if (rte_atomic32_dec_and_test(&txq_obj->refcnt)) { 1441 if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) { 1442 if (txq_obj->tis) 1443 claim_zero(mlx5_devx_cmd_destroy(txq_obj->tis)); 1444 } else if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 1445 txq_release_sq_resources(txq_obj); 1446 } else { 1447 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 1448 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 1449 } 1450 if (txq_obj->txq_ctrl->txq.fcqs) { 1451 mlx5_free(txq_obj->txq_ctrl->txq.fcqs); 1452 txq_obj->txq_ctrl->txq.fcqs = NULL; 1453 } 1454 LIST_REMOVE(txq_obj, next); 1455 mlx5_free(txq_obj); 1456 return 0; 1457 } 1458 return 1; 1459 } 1460 1461 /** 1462 * Verify the Verbs Tx queue list is empty 1463 * 1464 * @param dev 1465 * Pointer to Ethernet device. 1466 * 1467 * @return 1468 * The number of object not released. 1469 */ 1470 int 1471 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 1472 { 1473 struct mlx5_priv *priv = dev->data->dev_private; 1474 int ret = 0; 1475 struct mlx5_txq_obj *txq_obj; 1476 1477 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 1478 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 1479 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 1480 ++ret; 1481 } 1482 return ret; 1483 } 1484 1485 /** 1486 * Calculate the total number of WQEBB for Tx queue. 1487 * 1488 * Simplified version of calc_sq_size() in rdma-core. 1489 * 1490 * @param txq_ctrl 1491 * Pointer to Tx queue control structure. 1492 * 1493 * @return 1494 * The number of WQEBB. 1495 */ 1496 static int 1497 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 1498 { 1499 unsigned int wqe_size; 1500 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 1501 1502 wqe_size = MLX5_WQE_CSEG_SIZE + 1503 MLX5_WQE_ESEG_SIZE + 1504 MLX5_WSEG_SIZE - 1505 MLX5_ESEG_MIN_INLINE_SIZE + 1506 txq_ctrl->max_inline_data; 1507 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 1508 } 1509 1510 /** 1511 * Calculate the maximal inline data size for Tx queue. 1512 * 1513 * @param txq_ctrl 1514 * Pointer to Tx queue control structure. 1515 * 1516 * @return 1517 * The maximal inline data size. 1518 */ 1519 static unsigned int 1520 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 1521 { 1522 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 1523 struct mlx5_priv *priv = txq_ctrl->priv; 1524 unsigned int wqe_size; 1525 1526 wqe_size = priv->sh->device_attr.max_qp_wr / desc; 1527 if (!wqe_size) 1528 return 0; 1529 /* 1530 * This calculation is derived from tthe source of 1531 * mlx5_calc_send_wqe() in rdma_core library. 1532 */ 1533 wqe_size = wqe_size * MLX5_WQE_SIZE - 1534 MLX5_WQE_CSEG_SIZE - 1535 MLX5_WQE_ESEG_SIZE - 1536 MLX5_WSEG_SIZE - 1537 MLX5_WSEG_SIZE + 1538 MLX5_DSEG_MIN_INLINE_SIZE; 1539 return wqe_size; 1540 } 1541 1542 /** 1543 * Set Tx queue parameters from device configuration. 1544 * 1545 * @param txq_ctrl 1546 * Pointer to Tx queue control structure. 1547 */ 1548 static void 1549 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 1550 { 1551 struct mlx5_priv *priv = txq_ctrl->priv; 1552 struct mlx5_dev_config *config = &priv->config; 1553 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 1554 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 1555 unsigned int inlen_mode; /* Minimal required Inline data. */ 1556 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 1557 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 1558 bool tso = txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1559 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1560 DEV_TX_OFFLOAD_GRE_TNL_TSO | 1561 DEV_TX_OFFLOAD_IP_TNL_TSO | 1562 DEV_TX_OFFLOAD_UDP_TNL_TSO); 1563 bool vlan_inline; 1564 unsigned int temp; 1565 1566 if (config->txqs_inline == MLX5_ARG_UNSET) 1567 txqs_inline = 1568 #if defined(RTE_ARCH_ARM64) 1569 (priv->pci_dev->id.device_id == 1570 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) ? 1571 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 1572 #endif 1573 MLX5_INLINE_MAX_TXQS; 1574 else 1575 txqs_inline = (unsigned int)config->txqs_inline; 1576 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 1577 MLX5_SEND_DEF_INLINE_LEN : 1578 (unsigned int)config->txq_inline_max; 1579 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 1580 MLX5_EMPW_DEF_INLINE_LEN : 1581 (unsigned int)config->txq_inline_mpw; 1582 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 1583 0 : (unsigned int)config->txq_inline_min; 1584 if (config->mps != MLX5_MPW_ENHANCED && config->mps != MLX5_MPW) 1585 inlen_empw = 0; 1586 /* 1587 * If there is requested minimal amount of data to inline 1588 * we MUST enable inlining. This is a case for ConnectX-4 1589 * which usually requires L2 inlined for correct operating 1590 * and ConnectX-4 Lx which requires L2-L4 inlined to 1591 * support E-Switch Flows. 1592 */ 1593 if (inlen_mode) { 1594 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 1595 /* 1596 * Optimize minimal inlining for single 1597 * segment packets to fill one WQEBB 1598 * without gaps. 1599 */ 1600 temp = MLX5_ESEG_MIN_INLINE_SIZE; 1601 } else { 1602 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 1603 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 1604 MLX5_ESEG_MIN_INLINE_SIZE; 1605 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1606 } 1607 if (temp != inlen_mode) { 1608 DRV_LOG(INFO, 1609 "port %u minimal required inline setting" 1610 " aligned from %u to %u", 1611 PORT_ID(priv), inlen_mode, temp); 1612 inlen_mode = temp; 1613 } 1614 } 1615 /* 1616 * If port is configured to support VLAN insertion and device 1617 * does not support this feature by HW (for NICs before ConnectX-5 1618 * or in case of wqe_vlan_insert flag is not set) we must enable 1619 * data inline on all queues because it is supported by single 1620 * tx_burst routine. 1621 */ 1622 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 1623 vlan_inline = (dev_txoff & DEV_TX_OFFLOAD_VLAN_INSERT) && 1624 !config->hw_vlan_insert; 1625 /* 1626 * If there are few Tx queues it is prioritized 1627 * to save CPU cycles and disable data inlining at all. 1628 */ 1629 if (inlen_send && priv->txqs_n >= txqs_inline) { 1630 /* 1631 * The data sent with ordinal MLX5_OPCODE_SEND 1632 * may be inlined in Ethernet Segment, align the 1633 * length accordingly to fit entire WQEBBs. 1634 */ 1635 temp = RTE_MAX(inlen_send, 1636 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 1637 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1638 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1639 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1640 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1641 MLX5_ESEG_MIN_INLINE_SIZE - 1642 MLX5_WQE_CSEG_SIZE - 1643 MLX5_WQE_ESEG_SIZE - 1644 MLX5_WQE_DSEG_SIZE * 2); 1645 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1646 temp = RTE_MAX(temp, inlen_mode); 1647 if (temp != inlen_send) { 1648 DRV_LOG(INFO, 1649 "port %u ordinary send inline setting" 1650 " aligned from %u to %u", 1651 PORT_ID(priv), inlen_send, temp); 1652 inlen_send = temp; 1653 } 1654 /* 1655 * Not aligned to cache lines, but to WQEs. 1656 * First bytes of data (initial alignment) 1657 * is going to be copied explicitly at the 1658 * beginning of inlining buffer in Ethernet 1659 * Segment. 1660 */ 1661 MLX5_ASSERT(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1662 MLX5_ASSERT(inlen_send <= MLX5_WQE_SIZE_MAX + 1663 MLX5_ESEG_MIN_INLINE_SIZE - 1664 MLX5_WQE_CSEG_SIZE - 1665 MLX5_WQE_ESEG_SIZE - 1666 MLX5_WQE_DSEG_SIZE * 2); 1667 } else if (inlen_mode) { 1668 /* 1669 * If minimal inlining is requested we must 1670 * enable inlining in general, despite the 1671 * number of configured queues. Ignore the 1672 * txq_inline_max devarg, this is not 1673 * full-featured inline. 1674 */ 1675 inlen_send = inlen_mode; 1676 inlen_empw = 0; 1677 } else if (vlan_inline) { 1678 /* 1679 * Hardware does not report offload for 1680 * VLAN insertion, we must enable data inline 1681 * to implement feature by software. 1682 */ 1683 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 1684 inlen_empw = 0; 1685 } else { 1686 inlen_send = 0; 1687 inlen_empw = 0; 1688 } 1689 txq_ctrl->txq.inlen_send = inlen_send; 1690 txq_ctrl->txq.inlen_mode = inlen_mode; 1691 txq_ctrl->txq.inlen_empw = 0; 1692 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 1693 /* 1694 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 1695 * may be inlined in Data Segment, align the 1696 * length accordingly to fit entire WQEBBs. 1697 */ 1698 temp = RTE_MAX(inlen_empw, 1699 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 1700 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 1701 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1702 temp += MLX5_DSEG_MIN_INLINE_SIZE; 1703 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1704 MLX5_DSEG_MIN_INLINE_SIZE - 1705 MLX5_WQE_CSEG_SIZE - 1706 MLX5_WQE_ESEG_SIZE - 1707 MLX5_WQE_DSEG_SIZE); 1708 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 1709 if (temp != inlen_empw) { 1710 DRV_LOG(INFO, 1711 "port %u enhanced empw inline setting" 1712 " aligned from %u to %u", 1713 PORT_ID(priv), inlen_empw, temp); 1714 inlen_empw = temp; 1715 } 1716 MLX5_ASSERT(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 1717 MLX5_ASSERT(inlen_empw <= MLX5_WQE_SIZE_MAX + 1718 MLX5_DSEG_MIN_INLINE_SIZE - 1719 MLX5_WQE_CSEG_SIZE - 1720 MLX5_WQE_ESEG_SIZE - 1721 MLX5_WQE_DSEG_SIZE); 1722 txq_ctrl->txq.inlen_empw = inlen_empw; 1723 } 1724 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 1725 if (tso) { 1726 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 1727 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 1728 MLX5_MAX_TSO_HEADER); 1729 txq_ctrl->txq.tso_en = 1; 1730 } 1731 txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp; 1732 txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO | 1733 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1734 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) & 1735 txq_ctrl->txq.offloads) && config->swp; 1736 } 1737 1738 /** 1739 * Adjust Tx queue data inline parameters for large queue sizes. 1740 * The data inline feature requires multiple WQEs to fit the packets, 1741 * and if the large amount of Tx descriptors is requested by application 1742 * the total WQE amount may exceed the hardware capabilities. If the 1743 * default inline setting are used we can try to adjust these ones and 1744 * meet the hardware requirements and not exceed the queue size. 1745 * 1746 * @param txq_ctrl 1747 * Pointer to Tx queue control structure. 1748 * 1749 * @return 1750 * Zero on success, otherwise the parameters can not be adjusted. 1751 */ 1752 static int 1753 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 1754 { 1755 struct mlx5_priv *priv = txq_ctrl->priv; 1756 struct mlx5_dev_config *config = &priv->config; 1757 unsigned int max_inline; 1758 1759 max_inline = txq_calc_inline_max(txq_ctrl); 1760 if (!txq_ctrl->txq.inlen_send) { 1761 /* 1762 * Inline data feature is not engaged at all. 1763 * There is nothing to adjust. 1764 */ 1765 return 0; 1766 } 1767 if (txq_ctrl->max_inline_data <= max_inline) { 1768 /* 1769 * The requested inline data length does not 1770 * exceed queue capabilities. 1771 */ 1772 return 0; 1773 } 1774 if (txq_ctrl->txq.inlen_mode > max_inline) { 1775 DRV_LOG(ERR, 1776 "minimal data inline requirements (%u) are not" 1777 " satisfied (%u) on port %u, try the smaller" 1778 " Tx queue size (%d)", 1779 txq_ctrl->txq.inlen_mode, max_inline, 1780 priv->dev_data->port_id, 1781 priv->sh->device_attr.max_qp_wr); 1782 goto error; 1783 } 1784 if (txq_ctrl->txq.inlen_send > max_inline && 1785 config->txq_inline_max != MLX5_ARG_UNSET && 1786 config->txq_inline_max > (int)max_inline) { 1787 DRV_LOG(ERR, 1788 "txq_inline_max requirements (%u) are not" 1789 " satisfied (%u) on port %u, try the smaller" 1790 " Tx queue size (%d)", 1791 txq_ctrl->txq.inlen_send, max_inline, 1792 priv->dev_data->port_id, 1793 priv->sh->device_attr.max_qp_wr); 1794 goto error; 1795 } 1796 if (txq_ctrl->txq.inlen_empw > max_inline && 1797 config->txq_inline_mpw != MLX5_ARG_UNSET && 1798 config->txq_inline_mpw > (int)max_inline) { 1799 DRV_LOG(ERR, 1800 "txq_inline_mpw requirements (%u) are not" 1801 " satisfied (%u) on port %u, try the smaller" 1802 " Tx queue size (%d)", 1803 txq_ctrl->txq.inlen_empw, max_inline, 1804 priv->dev_data->port_id, 1805 priv->sh->device_attr.max_qp_wr); 1806 goto error; 1807 } 1808 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1809 DRV_LOG(ERR, 1810 "tso header inline requirements (%u) are not" 1811 " satisfied (%u) on port %u, try the smaller" 1812 " Tx queue size (%d)", 1813 MLX5_MAX_TSO_HEADER, max_inline, 1814 priv->dev_data->port_id, 1815 priv->sh->device_attr.max_qp_wr); 1816 goto error; 1817 } 1818 if (txq_ctrl->txq.inlen_send > max_inline) { 1819 DRV_LOG(WARNING, 1820 "adjust txq_inline_max (%u->%u)" 1821 " due to large Tx queue on port %u", 1822 txq_ctrl->txq.inlen_send, max_inline, 1823 priv->dev_data->port_id); 1824 txq_ctrl->txq.inlen_send = max_inline; 1825 } 1826 if (txq_ctrl->txq.inlen_empw > max_inline) { 1827 DRV_LOG(WARNING, 1828 "adjust txq_inline_mpw (%u->%u)" 1829 "due to large Tx queue on port %u", 1830 txq_ctrl->txq.inlen_empw, max_inline, 1831 priv->dev_data->port_id); 1832 txq_ctrl->txq.inlen_empw = max_inline; 1833 } 1834 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1835 txq_ctrl->txq.inlen_empw); 1836 MLX5_ASSERT(txq_ctrl->max_inline_data <= max_inline); 1837 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= max_inline); 1838 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1839 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw || 1840 !txq_ctrl->txq.inlen_empw); 1841 return 0; 1842 error: 1843 rte_errno = ENOMEM; 1844 return -ENOMEM; 1845 } 1846 1847 /** 1848 * Create a DPDK Tx queue. 1849 * 1850 * @param dev 1851 * Pointer to Ethernet device. 1852 * @param idx 1853 * TX queue index. 1854 * @param desc 1855 * Number of descriptors to configure in queue. 1856 * @param socket 1857 * NUMA socket on which memory must be allocated. 1858 * @param[in] conf 1859 * Thresholds parameters. 1860 * 1861 * @return 1862 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1863 */ 1864 struct mlx5_txq_ctrl * 1865 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1866 unsigned int socket, const struct rte_eth_txconf *conf) 1867 { 1868 struct mlx5_priv *priv = dev->data->dev_private; 1869 struct mlx5_txq_ctrl *tmpl; 1870 1871 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) + 1872 desc * sizeof(struct rte_mbuf *), 0, socket); 1873 if (!tmpl) { 1874 rte_errno = ENOMEM; 1875 return NULL; 1876 } 1877 if (mlx5_mr_btree_init(&tmpl->txq.mr_ctrl.cache_bh, 1878 MLX5_MR_BTREE_CACHE_N, socket)) { 1879 /* rte_errno is already set. */ 1880 goto error; 1881 } 1882 /* Save pointer of global generation number to check memory event. */ 1883 tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->sh->share_cache.dev_gen; 1884 MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH); 1885 tmpl->txq.offloads = conf->offloads | 1886 dev->data->dev_conf.txmode.offloads; 1887 tmpl->priv = priv; 1888 tmpl->socket = socket; 1889 tmpl->txq.elts_n = log2above(desc); 1890 tmpl->txq.elts_s = desc; 1891 tmpl->txq.elts_m = desc - 1; 1892 tmpl->txq.port_id = dev->data->port_id; 1893 tmpl->txq.idx = idx; 1894 txq_set_params(tmpl); 1895 if (txq_adjust_params(tmpl)) 1896 goto error; 1897 if (txq_calc_wqebb_cnt(tmpl) > 1898 priv->sh->device_attr.max_qp_wr) { 1899 DRV_LOG(ERR, 1900 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1901 " try smaller queue size", 1902 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1903 priv->sh->device_attr.max_qp_wr); 1904 rte_errno = ENOMEM; 1905 goto error; 1906 } 1907 rte_atomic32_inc(&tmpl->refcnt); 1908 tmpl->type = MLX5_TXQ_TYPE_STANDARD; 1909 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1910 return tmpl; 1911 error: 1912 mlx5_free(tmpl); 1913 return NULL; 1914 } 1915 1916 /** 1917 * Create a DPDK Tx hairpin queue. 1918 * 1919 * @param dev 1920 * Pointer to Ethernet device. 1921 * @param idx 1922 * TX queue index. 1923 * @param desc 1924 * Number of descriptors to configure in queue. 1925 * @param hairpin_conf 1926 * The hairpin configuration. 1927 * 1928 * @return 1929 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1930 */ 1931 struct mlx5_txq_ctrl * 1932 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1933 const struct rte_eth_hairpin_conf *hairpin_conf) 1934 { 1935 struct mlx5_priv *priv = dev->data->dev_private; 1936 struct mlx5_txq_ctrl *tmpl; 1937 1938 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1939 SOCKET_ID_ANY); 1940 if (!tmpl) { 1941 rte_errno = ENOMEM; 1942 return NULL; 1943 } 1944 tmpl->priv = priv; 1945 tmpl->socket = SOCKET_ID_ANY; 1946 tmpl->txq.elts_n = log2above(desc); 1947 tmpl->txq.port_id = dev->data->port_id; 1948 tmpl->txq.idx = idx; 1949 tmpl->hairpin_conf = *hairpin_conf; 1950 tmpl->type = MLX5_TXQ_TYPE_HAIRPIN; 1951 rte_atomic32_inc(&tmpl->refcnt); 1952 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1953 return tmpl; 1954 } 1955 1956 /** 1957 * Get a Tx queue. 1958 * 1959 * @param dev 1960 * Pointer to Ethernet device. 1961 * @param idx 1962 * TX queue index. 1963 * 1964 * @return 1965 * A pointer to the queue if it exists. 1966 */ 1967 struct mlx5_txq_ctrl * 1968 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1969 { 1970 struct mlx5_priv *priv = dev->data->dev_private; 1971 struct mlx5_txq_ctrl *ctrl = NULL; 1972 1973 if ((*priv->txqs)[idx]) { 1974 ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, 1975 txq); 1976 mlx5_txq_obj_get(dev, idx); 1977 rte_atomic32_inc(&ctrl->refcnt); 1978 } 1979 return ctrl; 1980 } 1981 1982 /** 1983 * Release a Tx queue. 1984 * 1985 * @param dev 1986 * Pointer to Ethernet device. 1987 * @param idx 1988 * TX queue index. 1989 * 1990 * @return 1991 * 1 while a reference on it exists, 0 when freed. 1992 */ 1993 int 1994 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1995 { 1996 struct mlx5_priv *priv = dev->data->dev_private; 1997 struct mlx5_txq_ctrl *txq; 1998 1999 if (!(*priv->txqs)[idx]) 2000 return 0; 2001 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 2002 if (txq->obj && !mlx5_txq_obj_release(txq->obj)) 2003 txq->obj = NULL; 2004 if (rte_atomic32_dec_and_test(&txq->refcnt)) { 2005 txq_free_elts(txq); 2006 mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh); 2007 LIST_REMOVE(txq, next); 2008 mlx5_free(txq); 2009 (*priv->txqs)[idx] = NULL; 2010 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 2011 return 0; 2012 } 2013 return 1; 2014 } 2015 2016 /** 2017 * Verify if the queue can be released. 2018 * 2019 * @param dev 2020 * Pointer to Ethernet device. 2021 * @param idx 2022 * TX queue index. 2023 * 2024 * @return 2025 * 1 if the queue can be released. 2026 */ 2027 int 2028 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 2029 { 2030 struct mlx5_priv *priv = dev->data->dev_private; 2031 struct mlx5_txq_ctrl *txq; 2032 2033 if (!(*priv->txqs)[idx]) 2034 return -1; 2035 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 2036 return (rte_atomic32_read(&txq->refcnt) == 1); 2037 } 2038 2039 /** 2040 * Verify the Tx Queue list is empty 2041 * 2042 * @param dev 2043 * Pointer to Ethernet device. 2044 * 2045 * @return 2046 * The number of object not released. 2047 */ 2048 int 2049 mlx5_txq_verify(struct rte_eth_dev *dev) 2050 { 2051 struct mlx5_priv *priv = dev->data->dev_private; 2052 struct mlx5_txq_ctrl *txq_ctrl; 2053 int ret = 0; 2054 2055 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 2056 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 2057 dev->data->port_id, txq_ctrl->txq.idx); 2058 ++ret; 2059 } 2060 return ret; 2061 } 2062 2063 /** 2064 * Set the Tx queue dynamic timestamp (mask and offset) 2065 * 2066 * @param[in] dev 2067 * Pointer to the Ethernet device structure. 2068 */ 2069 void 2070 mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev) 2071 { 2072 struct mlx5_priv *priv = dev->data->dev_private; 2073 struct mlx5_dev_ctx_shared *sh = priv->sh; 2074 struct mlx5_txq_data *data; 2075 int off, nbit; 2076 unsigned int i; 2077 uint64_t mask = 0; 2078 2079 nbit = rte_mbuf_dynflag_lookup 2080 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 2081 off = rte_mbuf_dynfield_lookup 2082 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); 2083 if (nbit > 0 && off >= 0 && sh->txpp.refcnt) 2084 mask = 1ULL << nbit; 2085 for (i = 0; i != priv->txqs_n; ++i) { 2086 data = (*priv->txqs)[i]; 2087 if (!data) 2088 continue; 2089 data->sh = sh; 2090 data->ts_mask = mask; 2091 data->ts_offset = off; 2092 } 2093 } 2094