1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_common.h> 17 #include <rte_eal_paging.h> 18 19 #include <mlx5_glue.h> 20 #include <mlx5_devx_cmds.h> 21 #include <mlx5_common.h> 22 #include <mlx5_common_mr.h> 23 #include <mlx5_common_os.h> 24 #include <mlx5_malloc.h> 25 26 #include "mlx5_defs.h" 27 #include "mlx5_utils.h" 28 #include "mlx5.h" 29 #include "mlx5_rxtx.h" 30 #include "mlx5_autoconf.h" 31 32 /** 33 * Allocate TX queue elements. 34 * 35 * @param txq_ctrl 36 * Pointer to TX queue structure. 37 */ 38 void 39 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 40 { 41 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 42 unsigned int i; 43 44 for (i = 0; (i != elts_n); ++i) 45 txq_ctrl->txq.elts[i] = NULL; 46 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 47 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 48 txq_ctrl->txq.elts_head = 0; 49 txq_ctrl->txq.elts_tail = 0; 50 txq_ctrl->txq.elts_comp = 0; 51 } 52 53 /** 54 * Free TX queue elements. 55 * 56 * @param txq_ctrl 57 * Pointer to TX queue structure. 58 */ 59 void 60 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 61 { 62 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 63 const uint16_t elts_m = elts_n - 1; 64 uint16_t elts_head = txq_ctrl->txq.elts_head; 65 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 66 struct rte_mbuf *(*elts)[elts_n] = &txq_ctrl->txq.elts; 67 68 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 69 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 70 txq_ctrl->txq.elts_head = 0; 71 txq_ctrl->txq.elts_tail = 0; 72 txq_ctrl->txq.elts_comp = 0; 73 74 while (elts_tail != elts_head) { 75 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 76 77 MLX5_ASSERT(elt != NULL); 78 rte_pktmbuf_free_seg(elt); 79 #ifdef RTE_LIBRTE_MLX5_DEBUG 80 /* Poisoning. */ 81 memset(&(*elts)[elts_tail & elts_m], 82 0x77, 83 sizeof((*elts)[elts_tail & elts_m])); 84 #endif 85 ++elts_tail; 86 } 87 } 88 89 /** 90 * Returns the per-port supported offloads. 91 * 92 * @param dev 93 * Pointer to Ethernet device. 94 * 95 * @return 96 * Supported Tx offloads. 97 */ 98 uint64_t 99 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 100 { 101 struct mlx5_priv *priv = dev->data->dev_private; 102 uint64_t offloads = (DEV_TX_OFFLOAD_MULTI_SEGS | 103 DEV_TX_OFFLOAD_VLAN_INSERT); 104 struct mlx5_dev_config *config = &priv->config; 105 106 if (config->hw_csum) 107 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | 108 DEV_TX_OFFLOAD_UDP_CKSUM | 109 DEV_TX_OFFLOAD_TCP_CKSUM); 110 if (config->tso) 111 offloads |= DEV_TX_OFFLOAD_TCP_TSO; 112 if (config->tx_pp) 113 offloads |= DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP; 114 if (config->swp) { 115 if (config->hw_csum) 116 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 117 if (config->tso) 118 offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO | 119 DEV_TX_OFFLOAD_UDP_TNL_TSO); 120 } 121 if (config->tunnel_en) { 122 if (config->hw_csum) 123 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 124 if (config->tso) 125 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 126 DEV_TX_OFFLOAD_GRE_TNL_TSO | 127 DEV_TX_OFFLOAD_GENEVE_TNL_TSO); 128 } 129 return offloads; 130 } 131 132 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 133 static void 134 txq_sync_cq(struct mlx5_txq_data *txq) 135 { 136 volatile struct mlx5_cqe *cqe; 137 int ret, i; 138 139 i = txq->cqe_s; 140 do { 141 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 142 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 143 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 144 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 145 /* No new CQEs in completion queue. */ 146 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 147 break; 148 } 149 } 150 ++txq->cq_ci; 151 } while (--i); 152 /* Move all CQEs to HW ownership. */ 153 for (i = 0; i < txq->cqe_s; i++) { 154 cqe = &txq->cqes[i]; 155 cqe->op_own = MLX5_CQE_INVALIDATE; 156 } 157 /* Resync CQE and WQE (WQ in reset state). */ 158 rte_io_wmb(); 159 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 160 rte_io_wmb(); 161 } 162 163 /** 164 * Tx queue stop. Device queue goes to the idle state, 165 * all involved mbufs are freed from elts/WQ. 166 * 167 * @param dev 168 * Pointer to Ethernet device structure. 169 * @param idx 170 * Tx queue index. 171 * 172 * @return 173 * 0 on success, a negative errno value otherwise and rte_errno is set. 174 */ 175 int 176 mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 177 { 178 struct mlx5_priv *priv = dev->data->dev_private; 179 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 180 struct mlx5_txq_ctrl *txq_ctrl = 181 container_of(txq, struct mlx5_txq_ctrl, txq); 182 int ret; 183 184 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 185 /* Move QP to RESET state. */ 186 if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 187 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 188 189 /* Change queue state to reset with DevX. */ 190 msq_attr.sq_state = MLX5_SQC_STATE_RDY; 191 msq_attr.state = MLX5_SQC_STATE_RST; 192 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, 193 &msq_attr); 194 if (ret) { 195 DRV_LOG(ERR, "Cannot change the " 196 "Tx QP state to RESET %s", 197 strerror(errno)); 198 rte_errno = errno; 199 return ret; 200 } 201 } else { 202 struct ibv_qp_attr mod = { 203 .qp_state = IBV_QPS_RESET, 204 .port_num = (uint8_t)priv->dev_port, 205 }; 206 struct ibv_qp *qp = txq_ctrl->obj->qp; 207 208 /* Change queue state to reset with Verbs. */ 209 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 210 if (ret) { 211 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " 212 "%s", strerror(errno)); 213 rte_errno = errno; 214 return ret; 215 } 216 } 217 /* Handle all send completions. */ 218 txq_sync_cq(txq); 219 /* Free elts stored in the SQ. */ 220 txq_free_elts(txq_ctrl); 221 /* Prevent writing new pkts to SQ by setting no free WQE.*/ 222 txq->wqe_ci = txq->wqe_s; 223 txq->wqe_pi = 0; 224 txq->elts_comp = 0; 225 /* Set the actual queue state. */ 226 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 227 return 0; 228 } 229 230 /** 231 * Tx queue stop. Device queue goes to the idle state, 232 * all involved mbufs are freed from elts/WQ. 233 * 234 * @param dev 235 * Pointer to Ethernet device structure. 236 * @param idx 237 * Tx queue index. 238 * 239 * @return 240 * 0 on success, a negative errno value otherwise and rte_errno is set. 241 */ 242 int 243 mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 244 { 245 int ret; 246 247 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 248 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 249 rte_errno = EINVAL; 250 return -EINVAL; 251 } 252 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 253 return 0; 254 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 255 ret = mlx5_mp_os_req_queue_control(dev, idx, 256 MLX5_MP_REQ_QUEUE_TX_STOP); 257 } else { 258 ret = mlx5_tx_queue_stop_primary(dev, idx); 259 } 260 return ret; 261 } 262 263 /** 264 * Rx queue start. Device queue goes to the ready state, 265 * all required mbufs are allocated and WQ is replenished. 266 * 267 * @param dev 268 * Pointer to Ethernet device structure. 269 * @param idx 270 * RX queue index. 271 * 272 * @return 273 * 0 on success, a negative errno value otherwise and rte_errno is set. 274 */ 275 int 276 mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 277 { 278 struct mlx5_priv *priv = dev->data->dev_private; 279 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 280 struct mlx5_txq_ctrl *txq_ctrl = 281 container_of(txq, struct mlx5_txq_ctrl, txq); 282 int ret; 283 284 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 285 if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 286 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 287 struct mlx5_txq_obj *obj = txq_ctrl->obj; 288 289 msq_attr.sq_state = MLX5_SQC_STATE_RDY; 290 msq_attr.state = MLX5_SQC_STATE_RST; 291 ret = mlx5_devx_cmd_modify_sq(obj->sq_devx, &msq_attr); 292 if (ret) { 293 rte_errno = errno; 294 DRV_LOG(ERR, 295 "Cannot change the Tx QP state to RESET " 296 "%s", strerror(errno)); 297 return ret; 298 } 299 msq_attr.sq_state = MLX5_SQC_STATE_RST; 300 msq_attr.state = MLX5_SQC_STATE_RDY; 301 ret = mlx5_devx_cmd_modify_sq(obj->sq_devx, &msq_attr); 302 if (ret) { 303 rte_errno = errno; 304 DRV_LOG(ERR, 305 "Cannot change the Tx QP state to READY " 306 "%s", strerror(errno)); 307 return ret; 308 } 309 } else { 310 struct ibv_qp_attr mod = { 311 .qp_state = IBV_QPS_RESET, 312 .port_num = (uint8_t)priv->dev_port, 313 }; 314 struct ibv_qp *qp = txq_ctrl->obj->qp; 315 316 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 317 if (ret) { 318 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " 319 "%s", strerror(errno)); 320 rte_errno = errno; 321 return ret; 322 } 323 mod.qp_state = IBV_QPS_INIT; 324 ret = mlx5_glue->modify_qp(qp, &mod, 325 (IBV_QP_STATE | IBV_QP_PORT)); 326 if (ret) { 327 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", 328 strerror(errno)); 329 rte_errno = errno; 330 return ret; 331 } 332 mod.qp_state = IBV_QPS_RTR; 333 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 334 if (ret) { 335 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s", 336 strerror(errno)); 337 rte_errno = errno; 338 return ret; 339 } 340 mod.qp_state = IBV_QPS_RTS; 341 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 342 if (ret) { 343 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s", 344 strerror(errno)); 345 rte_errno = errno; 346 return ret; 347 } 348 } 349 txq_ctrl->txq.wqe_ci = 0; 350 txq_ctrl->txq.wqe_pi = 0; 351 txq_ctrl->txq.elts_comp = 0; 352 /* Set the actual queue state. */ 353 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 354 return 0; 355 } 356 357 /** 358 * Rx queue start. Device queue goes to the ready state, 359 * all required mbufs are allocated and WQ is replenished. 360 * 361 * @param dev 362 * Pointer to Ethernet device structure. 363 * @param idx 364 * RX queue index. 365 * 366 * @return 367 * 0 on success, a negative errno value otherwise and rte_errno is set. 368 */ 369 int 370 mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 371 { 372 int ret; 373 374 if (rte_eth_dev_is_tx_hairpin_queue(dev, idx)) { 375 DRV_LOG(ERR, "Hairpin queue can't be started"); 376 rte_errno = EINVAL; 377 return -EINVAL; 378 } 379 if (dev->data->tx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 380 return 0; 381 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 382 ret = mlx5_mp_os_req_queue_control(dev, idx, 383 MLX5_MP_REQ_QUEUE_TX_START); 384 } else { 385 ret = mlx5_tx_queue_start_primary(dev, idx); 386 } 387 return ret; 388 } 389 390 /** 391 * Tx queue presetup checks. 392 * 393 * @param dev 394 * Pointer to Ethernet device structure. 395 * @param idx 396 * Tx queue index. 397 * @param desc 398 * Number of descriptors to configure in queue. 399 * 400 * @return 401 * 0 on success, a negative errno value otherwise and rte_errno is set. 402 */ 403 static int 404 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc) 405 { 406 struct mlx5_priv *priv = dev->data->dev_private; 407 408 if (*desc <= MLX5_TX_COMP_THRESH) { 409 DRV_LOG(WARNING, 410 "port %u number of descriptors requested for Tx queue" 411 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 412 " instead of %u", dev->data->port_id, idx, 413 MLX5_TX_COMP_THRESH + 1, *desc); 414 *desc = MLX5_TX_COMP_THRESH + 1; 415 } 416 if (!rte_is_power_of_2(*desc)) { 417 *desc = 1 << log2above(*desc); 418 DRV_LOG(WARNING, 419 "port %u increased number of descriptors in Tx queue" 420 " %u to the next power of two (%d)", 421 dev->data->port_id, idx, *desc); 422 } 423 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 424 dev->data->port_id, idx, *desc); 425 if (idx >= priv->txqs_n) { 426 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 427 dev->data->port_id, idx, priv->txqs_n); 428 rte_errno = EOVERFLOW; 429 return -rte_errno; 430 } 431 if (!mlx5_txq_releasable(dev, idx)) { 432 rte_errno = EBUSY; 433 DRV_LOG(ERR, "port %u unable to release queue index %u", 434 dev->data->port_id, idx); 435 return -rte_errno; 436 } 437 mlx5_txq_release(dev, idx); 438 return 0; 439 } 440 /** 441 * DPDK callback to configure a TX queue. 442 * 443 * @param dev 444 * Pointer to Ethernet device structure. 445 * @param idx 446 * TX queue index. 447 * @param desc 448 * Number of descriptors to configure in queue. 449 * @param socket 450 * NUMA socket on which memory must be allocated. 451 * @param[in] conf 452 * Thresholds parameters. 453 * 454 * @return 455 * 0 on success, a negative errno value otherwise and rte_errno is set. 456 */ 457 int 458 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 459 unsigned int socket, const struct rte_eth_txconf *conf) 460 { 461 struct mlx5_priv *priv = dev->data->dev_private; 462 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 463 struct mlx5_txq_ctrl *txq_ctrl = 464 container_of(txq, struct mlx5_txq_ctrl, txq); 465 int res; 466 467 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 468 if (res) 469 return res; 470 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 471 if (!txq_ctrl) { 472 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 473 dev->data->port_id, idx); 474 return -rte_errno; 475 } 476 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 477 dev->data->port_id, idx); 478 (*priv->txqs)[idx] = &txq_ctrl->txq; 479 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 480 return 0; 481 } 482 483 /** 484 * DPDK callback to configure a TX hairpin queue. 485 * 486 * @param dev 487 * Pointer to Ethernet device structure. 488 * @param idx 489 * TX queue index. 490 * @param desc 491 * Number of descriptors to configure in queue. 492 * @param[in] hairpin_conf 493 * The hairpin binding configuration. 494 * 495 * @return 496 * 0 on success, a negative errno value otherwise and rte_errno is set. 497 */ 498 int 499 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 500 uint16_t desc, 501 const struct rte_eth_hairpin_conf *hairpin_conf) 502 { 503 struct mlx5_priv *priv = dev->data->dev_private; 504 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 505 struct mlx5_txq_ctrl *txq_ctrl = 506 container_of(txq, struct mlx5_txq_ctrl, txq); 507 int res; 508 509 res = mlx5_tx_queue_pre_setup(dev, idx, &desc); 510 if (res) 511 return res; 512 if (hairpin_conf->peer_count != 1 || 513 hairpin_conf->peers[0].port != dev->data->port_id || 514 hairpin_conf->peers[0].queue >= priv->rxqs_n) { 515 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u " 516 " invalid hairpind configuration", dev->data->port_id, 517 idx); 518 rte_errno = EINVAL; 519 return -rte_errno; 520 } 521 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 522 if (!txq_ctrl) { 523 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 524 dev->data->port_id, idx); 525 return -rte_errno; 526 } 527 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 528 dev->data->port_id, idx); 529 (*priv->txqs)[idx] = &txq_ctrl->txq; 530 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN; 531 return 0; 532 } 533 534 /** 535 * DPDK callback to release a TX queue. 536 * 537 * @param dpdk_txq 538 * Generic TX queue pointer. 539 */ 540 void 541 mlx5_tx_queue_release(void *dpdk_txq) 542 { 543 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 544 struct mlx5_txq_ctrl *txq_ctrl; 545 struct mlx5_priv *priv; 546 unsigned int i; 547 548 if (txq == NULL) 549 return; 550 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 551 priv = txq_ctrl->priv; 552 for (i = 0; (i != priv->txqs_n); ++i) 553 if ((*priv->txqs)[i] == txq) { 554 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 555 PORT_ID(priv), txq->idx); 556 mlx5_txq_release(ETH_DEV(priv), i); 557 break; 558 } 559 } 560 561 /** 562 * Configure the doorbell register non-cached attribute. 563 * 564 * @param txq_ctrl 565 * Pointer to Tx queue control structure. 566 * @param page_size 567 * Systme page size 568 */ 569 static void 570 txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size) 571 { 572 struct mlx5_priv *priv = txq_ctrl->priv; 573 off_t cmd; 574 575 txq_ctrl->txq.db_heu = priv->config.dbnc == MLX5_TXDB_HEURISTIC; 576 txq_ctrl->txq.db_nc = 0; 577 /* Check the doorbell register mapping type. */ 578 cmd = txq_ctrl->uar_mmap_offset / page_size; 579 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; 580 cmd &= MLX5_UAR_MMAP_CMD_MASK; 581 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) 582 txq_ctrl->txq.db_nc = 1; 583 } 584 585 /** 586 * Initialize Tx UAR registers for primary process. 587 * 588 * @param txq_ctrl 589 * Pointer to Tx queue control structure. 590 */ 591 static void 592 txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) 593 { 594 struct mlx5_priv *priv = txq_ctrl->priv; 595 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 596 #ifndef RTE_ARCH_64 597 unsigned int lock_idx; 598 #endif 599 const size_t page_size = rte_mem_page_size(); 600 if (page_size == (size_t)-1) { 601 DRV_LOG(ERR, "Failed to get mem page size"); 602 rte_errno = ENOMEM; 603 } 604 605 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 606 return; 607 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 608 MLX5_ASSERT(ppriv); 609 ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; 610 txq_uar_ncattr_init(txq_ctrl, page_size); 611 #ifndef RTE_ARCH_64 612 /* Assign an UAR lock according to UAR page number */ 613 lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & 614 MLX5_UAR_PAGE_NUM_MASK; 615 txq_ctrl->txq.uar_lock = &priv->sh->uar_lock[lock_idx]; 616 #endif 617 } 618 619 /** 620 * Remap UAR register of a Tx queue for secondary process. 621 * 622 * Remapped address is stored at the table in the process private structure of 623 * the device, indexed by queue index. 624 * 625 * @param txq_ctrl 626 * Pointer to Tx queue control structure. 627 * @param fd 628 * Verbs file descriptor to map UAR pages. 629 * 630 * @return 631 * 0 on success, a negative errno value otherwise and rte_errno is set. 632 */ 633 static int 634 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 635 { 636 struct mlx5_priv *priv = txq_ctrl->priv; 637 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 638 struct mlx5_txq_data *txq = &txq_ctrl->txq; 639 void *addr; 640 uintptr_t uar_va; 641 uintptr_t offset; 642 const size_t page_size = rte_mem_page_size(); 643 if (page_size == (size_t)-1) { 644 DRV_LOG(ERR, "Failed to get mem page size"); 645 rte_errno = ENOMEM; 646 return -rte_errno; 647 } 648 649 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 650 return 0; 651 MLX5_ASSERT(ppriv); 652 /* 653 * As rdma-core, UARs are mapped in size of OS page 654 * size. Ref to libmlx5 function: mlx5_init_context() 655 */ 656 uar_va = (uintptr_t)txq_ctrl->bf_reg; 657 offset = uar_va & (page_size - 1); /* Offset in page. */ 658 addr = rte_mem_map(NULL, page_size, RTE_PROT_WRITE, RTE_MAP_SHARED, 659 fd, txq_ctrl->uar_mmap_offset); 660 if (!addr) { 661 DRV_LOG(ERR, 662 "port %u mmap failed for BF reg of txq %u", 663 txq->port_id, txq->idx); 664 rte_errno = ENXIO; 665 return -rte_errno; 666 } 667 addr = RTE_PTR_ADD(addr, offset); 668 ppriv->uar_table[txq->idx] = addr; 669 txq_uar_ncattr_init(txq_ctrl, page_size); 670 return 0; 671 } 672 673 /** 674 * Unmap UAR register of a Tx queue for secondary process. 675 * 676 * @param txq_ctrl 677 * Pointer to Tx queue control structure. 678 */ 679 static void 680 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 681 { 682 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 683 void *addr; 684 const size_t page_size = rte_mem_page_size(); 685 if (page_size == (size_t)-1) { 686 DRV_LOG(ERR, "Failed to get mem page size"); 687 rte_errno = ENOMEM; 688 } 689 690 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 691 return; 692 addr = ppriv->uar_table[txq_ctrl->txq.idx]; 693 rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 694 } 695 696 /** 697 * Deinitialize Tx UAR registers for secondary process. 698 * 699 * @param dev 700 * Pointer to Ethernet device. 701 */ 702 void 703 mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev) 704 { 705 struct mlx5_priv *priv = dev->data->dev_private; 706 struct mlx5_txq_data *txq; 707 struct mlx5_txq_ctrl *txq_ctrl; 708 unsigned int i; 709 710 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 711 for (i = 0; i != priv->txqs_n; ++i) { 712 if (!(*priv->txqs)[i]) 713 continue; 714 txq = (*priv->txqs)[i]; 715 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 716 txq_uar_uninit_secondary(txq_ctrl); 717 } 718 } 719 720 /** 721 * Initialize Tx UAR registers for secondary process. 722 * 723 * @param dev 724 * Pointer to Ethernet device. 725 * @param fd 726 * Verbs file descriptor to map UAR pages. 727 * 728 * @return 729 * 0 on success, a negative errno value otherwise and rte_errno is set. 730 */ 731 int 732 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 733 { 734 struct mlx5_priv *priv = dev->data->dev_private; 735 struct mlx5_txq_data *txq; 736 struct mlx5_txq_ctrl *txq_ctrl; 737 unsigned int i; 738 int ret; 739 740 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 741 for (i = 0; i != priv->txqs_n; ++i) { 742 if (!(*priv->txqs)[i]) 743 continue; 744 txq = (*priv->txqs)[i]; 745 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 746 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 747 continue; 748 MLX5_ASSERT(txq->idx == (uint16_t)i); 749 ret = txq_uar_init_secondary(txq_ctrl, fd); 750 if (ret) 751 goto error; 752 } 753 return 0; 754 error: 755 /* Rollback. */ 756 do { 757 if (!(*priv->txqs)[i]) 758 continue; 759 txq = (*priv->txqs)[i]; 760 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 761 txq_uar_uninit_secondary(txq_ctrl); 762 } while (i--); 763 return -rte_errno; 764 } 765 766 /** 767 * Create the Tx hairpin queue object. 768 * 769 * @param dev 770 * Pointer to Ethernet device. 771 * @param idx 772 * Queue index in DPDK Tx queue array 773 * 774 * @return 775 * The hairpin DevX object initialised, NULL otherwise and rte_errno is set. 776 */ 777 static struct mlx5_txq_obj * 778 mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) 779 { 780 struct mlx5_priv *priv = dev->data->dev_private; 781 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 782 struct mlx5_txq_ctrl *txq_ctrl = 783 container_of(txq_data, struct mlx5_txq_ctrl, txq); 784 struct mlx5_devx_create_sq_attr attr = { 0 }; 785 struct mlx5_txq_obj *tmpl = NULL; 786 uint32_t max_wq_data; 787 788 MLX5_ASSERT(txq_data); 789 MLX5_ASSERT(!txq_ctrl->obj); 790 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 791 txq_ctrl->socket); 792 if (!tmpl) { 793 DRV_LOG(ERR, 794 "port %u Tx queue %u cannot allocate memory resources", 795 dev->data->port_id, txq_data->idx); 796 rte_errno = ENOMEM; 797 return NULL; 798 } 799 tmpl->type = MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN; 800 tmpl->txq_ctrl = txq_ctrl; 801 attr.hairpin = 1; 802 attr.tis_lst_sz = 1; 803 max_wq_data = priv->config.hca_attr.log_max_hairpin_wq_data_sz; 804 /* Jumbo frames > 9KB should be supported, and more packets. */ 805 if (priv->config.log_hp_size != (uint32_t)MLX5_ARG_UNSET) { 806 if (priv->config.log_hp_size > max_wq_data) { 807 DRV_LOG(ERR, "total data size %u power of 2 is " 808 "too large for hairpin", 809 priv->config.log_hp_size); 810 mlx5_free(tmpl); 811 rte_errno = ERANGE; 812 return NULL; 813 } 814 attr.wq_attr.log_hairpin_data_sz = priv->config.log_hp_size; 815 } else { 816 attr.wq_attr.log_hairpin_data_sz = 817 (max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ? 818 max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE; 819 } 820 /* Set the packets number to the maximum value for performance. */ 821 attr.wq_attr.log_hairpin_num_packets = 822 attr.wq_attr.log_hairpin_data_sz - 823 MLX5_HAIRPIN_QUEUE_STRIDE; 824 attr.tis_num = priv->sh->tis->id; 825 tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr); 826 if (!tmpl->sq) { 827 DRV_LOG(ERR, 828 "port %u tx hairpin queue %u can't create sq object", 829 dev->data->port_id, idx); 830 mlx5_free(tmpl); 831 rte_errno = errno; 832 return NULL; 833 } 834 DRV_LOG(DEBUG, "port %u sxq %u updated with %p", dev->data->port_id, 835 idx, (void *)&tmpl); 836 rte_atomic32_inc(&tmpl->refcnt); 837 LIST_INSERT_HEAD(&priv->txqsobj, tmpl, next); 838 return tmpl; 839 } 840 841 /** 842 * Destroy the Tx queue DevX object. 843 * 844 * @param txq_obj 845 * Txq object to destroy 846 */ 847 static void 848 txq_release_sq_resources(struct mlx5_txq_obj *txq_obj) 849 { 850 MLX5_ASSERT(txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ); 851 852 if (txq_obj->sq_devx) 853 claim_zero(mlx5_devx_cmd_destroy(txq_obj->sq_devx)); 854 if (txq_obj->sq_dbrec_page) 855 claim_zero(mlx5_release_dbr 856 (&txq_obj->txq_ctrl->priv->dbrpgs, 857 mlx5_os_get_umem_id 858 (txq_obj->sq_dbrec_page->umem), 859 txq_obj->sq_dbrec_offset)); 860 if (txq_obj->sq_umem) 861 claim_zero(mlx5_glue->devx_umem_dereg(txq_obj->sq_umem)); 862 if (txq_obj->sq_buf) 863 mlx5_free(txq_obj->sq_buf); 864 if (txq_obj->cq_devx) 865 claim_zero(mlx5_devx_cmd_destroy(txq_obj->cq_devx)); 866 if (txq_obj->cq_dbrec_page) 867 claim_zero(mlx5_release_dbr 868 (&txq_obj->txq_ctrl->priv->dbrpgs, 869 mlx5_os_get_umem_id 870 (txq_obj->cq_dbrec_page->umem), 871 txq_obj->cq_dbrec_offset)); 872 if (txq_obj->cq_umem) 873 claim_zero(mlx5_glue->devx_umem_dereg(txq_obj->cq_umem)); 874 if (txq_obj->cq_buf) 875 mlx5_free(txq_obj->cq_buf); 876 } 877 878 /** 879 * Create the Tx queue DevX object. 880 * 881 * @param dev 882 * Pointer to Ethernet device. 883 * @param idx 884 * Queue index in DPDK Tx queue array 885 * 886 * @return 887 * The DevX object initialised, NULL otherwise and rte_errno is set. 888 */ 889 static struct mlx5_txq_obj * 890 mlx5_txq_obj_devx_new(struct rte_eth_dev *dev, uint16_t idx) 891 { 892 #ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET 893 DRV_LOG(ERR, "port %u Tx queue %u cannot create with DevX, no UAR", 894 dev->data->port_id, idx); 895 rte_errno = ENOMEM; 896 return NULL; 897 #else 898 struct mlx5_priv *priv = dev->data->dev_private; 899 struct mlx5_dev_ctx_shared *sh = priv->sh; 900 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 901 struct mlx5_txq_ctrl *txq_ctrl = 902 container_of(txq_data, struct mlx5_txq_ctrl, txq); 903 struct mlx5_devx_create_sq_attr sq_attr = { 0 }; 904 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 905 struct mlx5_devx_cq_attr cq_attr = { 0 }; 906 struct mlx5_txq_obj *txq_obj = NULL; 907 size_t page_size; 908 struct mlx5_cqe *cqe; 909 uint32_t i, nqe; 910 void *reg_addr; 911 size_t alignment = (size_t)-1; 912 int ret = 0; 913 914 MLX5_ASSERT(txq_data); 915 MLX5_ASSERT(!txq_ctrl->obj); 916 page_size = rte_mem_page_size(); 917 if (page_size == (size_t)-1) { 918 DRV_LOG(ERR, "Failed to get mem page size"); 919 rte_errno = ENOMEM; 920 return NULL; 921 } 922 txq_obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 923 sizeof(struct mlx5_txq_obj), 0, 924 txq_ctrl->socket); 925 if (!txq_obj) { 926 DRV_LOG(ERR, 927 "port %u Tx queue %u cannot allocate memory resources", 928 dev->data->port_id, txq_data->idx); 929 rte_errno = ENOMEM; 930 return NULL; 931 } 932 txq_obj->type = MLX5_TXQ_OBJ_TYPE_DEVX_SQ; 933 txq_obj->txq_ctrl = txq_ctrl; 934 txq_obj->dev = dev; 935 /* Create the Completion Queue. */ 936 nqe = (1UL << txq_data->elts_n) / MLX5_TX_COMP_THRESH + 937 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 938 nqe = 1UL << log2above(nqe); 939 if (nqe > UINT16_MAX) { 940 DRV_LOG(ERR, 941 "port %u Tx queue %u requests to many CQEs %u", 942 dev->data->port_id, txq_data->idx, nqe); 943 rte_errno = EINVAL; 944 goto error; 945 } 946 /* Allocate memory buffer for CQEs. */ 947 alignment = MLX5_CQE_BUF_ALIGNMENT; 948 if (alignment == (size_t)-1) { 949 DRV_LOG(ERR, "Failed to get mem page size"); 950 rte_errno = ENOMEM; 951 goto error; 952 } 953 txq_obj->cq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 954 nqe * sizeof(struct mlx5_cqe), 955 alignment, 956 sh->numa_node); 957 if (!txq_obj->cq_buf) { 958 DRV_LOG(ERR, 959 "port %u Tx queue %u cannot allocate memory (CQ)", 960 dev->data->port_id, txq_data->idx); 961 rte_errno = ENOMEM; 962 goto error; 963 } 964 txq_data->cqe_n = log2above(nqe); 965 txq_data->cqe_s = 1 << txq_data->cqe_n; 966 txq_data->cqe_m = txq_data->cqe_s - 1; 967 txq_data->cqes = (volatile struct mlx5_cqe *)txq_obj->cq_buf; 968 txq_data->cq_ci = 0; 969 txq_data->cq_pi = 0; 970 /* Register allocated buffer in user space with DevX. */ 971 txq_obj->cq_umem = mlx5_glue->devx_umem_reg 972 (sh->ctx, 973 (void *)txq_obj->cq_buf, 974 nqe * sizeof(struct mlx5_cqe), 975 IBV_ACCESS_LOCAL_WRITE); 976 if (!txq_obj->cq_umem) { 977 rte_errno = errno; 978 DRV_LOG(ERR, 979 "port %u Tx queue %u cannot register memory (CQ)", 980 dev->data->port_id, txq_data->idx); 981 goto error; 982 } 983 /* Allocate doorbell record for completion queue. */ 984 txq_obj->cq_dbrec_offset = mlx5_get_dbr(sh->ctx, 985 &priv->dbrpgs, 986 &txq_obj->cq_dbrec_page); 987 if (txq_obj->cq_dbrec_offset < 0) 988 goto error; 989 txq_data->cq_db = (volatile uint32_t *)(txq_obj->cq_dbrec_page->dbrs + 990 txq_obj->cq_dbrec_offset); 991 *txq_data->cq_db = 0; 992 /* Create completion queue object with DevX. */ 993 cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? 994 MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; 995 cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 996 cq_attr.eqn = sh->txpp.eqn; 997 cq_attr.q_umem_valid = 1; 998 cq_attr.q_umem_offset = (uintptr_t)txq_obj->cq_buf % page_size; 999 cq_attr.q_umem_id = mlx5_os_get_umem_id(txq_obj->cq_umem); 1000 cq_attr.db_umem_valid = 1; 1001 cq_attr.db_umem_offset = txq_obj->cq_dbrec_offset; 1002 cq_attr.db_umem_id = mlx5_os_get_umem_id(txq_obj->cq_dbrec_page->umem); 1003 cq_attr.log_cq_size = rte_log2_u32(nqe); 1004 cq_attr.log_page_size = rte_log2_u32(page_size); 1005 txq_obj->cq_devx = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr); 1006 if (!txq_obj->cq_devx) { 1007 rte_errno = errno; 1008 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 1009 dev->data->port_id, idx); 1010 goto error; 1011 } 1012 /* Initial fill CQ buffer with invalid CQE opcode. */ 1013 cqe = (struct mlx5_cqe *)txq_obj->cq_buf; 1014 for (i = 0; i < txq_data->cqe_s; i++) { 1015 cqe->op_own = (MLX5_CQE_INVALID << 4) | MLX5_CQE_OWNER_MASK; 1016 ++cqe; 1017 } 1018 /* Create the Work Queue. */ 1019 nqe = RTE_MIN(1UL << txq_data->elts_n, 1020 (uint32_t)sh->device_attr.max_qp_wr); 1021 txq_obj->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1022 nqe * sizeof(struct mlx5_wqe), 1023 page_size, sh->numa_node); 1024 if (!txq_obj->sq_buf) { 1025 DRV_LOG(ERR, 1026 "port %u Tx queue %u cannot allocate memory (SQ)", 1027 dev->data->port_id, txq_data->idx); 1028 rte_errno = ENOMEM; 1029 goto error; 1030 } 1031 txq_data->wqe_n = log2above(nqe); 1032 txq_data->wqe_s = 1 << txq_data->wqe_n; 1033 txq_data->wqe_m = txq_data->wqe_s - 1; 1034 txq_data->wqes = (struct mlx5_wqe *)txq_obj->sq_buf; 1035 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 1036 txq_data->wqe_ci = 0; 1037 txq_data->wqe_pi = 0; 1038 txq_data->wqe_comp = 0; 1039 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 1040 /* Register allocated buffer in user space with DevX. */ 1041 txq_obj->sq_umem = mlx5_glue->devx_umem_reg 1042 (sh->ctx, 1043 (void *)txq_obj->sq_buf, 1044 nqe * sizeof(struct mlx5_wqe), 1045 IBV_ACCESS_LOCAL_WRITE); 1046 if (!txq_obj->sq_umem) { 1047 rte_errno = errno; 1048 DRV_LOG(ERR, 1049 "port %u Tx queue %u cannot register memory (SQ)", 1050 dev->data->port_id, txq_data->idx); 1051 goto error; 1052 } 1053 /* Allocate doorbell record for completion queue. */ 1054 txq_obj->cq_dbrec_offset = mlx5_get_dbr(sh->ctx, 1055 &priv->dbrpgs, 1056 &txq_obj->sq_dbrec_page); 1057 if (txq_obj->sq_dbrec_offset < 0) 1058 goto error; 1059 txq_data->qp_db = (volatile uint32_t *) 1060 (txq_obj->sq_dbrec_page->dbrs + 1061 txq_obj->sq_dbrec_offset + 1062 MLX5_SND_DBR * sizeof(uint32_t)); 1063 *txq_data->qp_db = 0; 1064 /* Create Send Queue object with DevX. */ 1065 sq_attr.tis_lst_sz = 1; 1066 sq_attr.tis_num = sh->tis->id; 1067 sq_attr.state = MLX5_SQC_STATE_RST; 1068 sq_attr.cqn = txq_obj->cq_devx->id; 1069 sq_attr.flush_in_error_en = 1; 1070 sq_attr.allow_multi_pkt_send_wqe = !!priv->config.mps; 1071 sq_attr.allow_swp = !!priv->config.swp; 1072 sq_attr.min_wqe_inline_mode = priv->config.hca_attr.vport_inline_mode; 1073 sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); 1074 sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 1075 sq_attr.wq_attr.pd = sh->pdn; 1076 sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE); 1077 sq_attr.wq_attr.log_wq_sz = txq_data->wqe_n; 1078 sq_attr.wq_attr.dbr_umem_valid = 1; 1079 sq_attr.wq_attr.dbr_addr = txq_obj->cq_dbrec_offset; 1080 sq_attr.wq_attr.dbr_umem_id = 1081 mlx5_os_get_umem_id(txq_obj->cq_dbrec_page->umem); 1082 sq_attr.wq_attr.wq_umem_valid = 1; 1083 sq_attr.wq_attr.wq_umem_id = mlx5_os_get_umem_id(txq_obj->sq_umem); 1084 sq_attr.wq_attr.wq_umem_offset = (uintptr_t)txq_obj->sq_buf % page_size; 1085 txq_obj->sq_devx = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr); 1086 if (!txq_obj->sq_devx) { 1087 rte_errno = errno; 1088 DRV_LOG(ERR, "port %u Tx queue %u SQ creation failure", 1089 dev->data->port_id, idx); 1090 goto error; 1091 } 1092 txq_data->qp_num_8s = txq_obj->sq_devx->id << 8; 1093 /* Change Send Queue state to Ready-to-Send. */ 1094 msq_attr.sq_state = MLX5_SQC_STATE_RST; 1095 msq_attr.state = MLX5_SQC_STATE_RDY; 1096 ret = mlx5_devx_cmd_modify_sq(txq_obj->sq_devx, &msq_attr); 1097 if (ret) { 1098 rte_errno = errno; 1099 DRV_LOG(ERR, 1100 "port %u Tx queue %u SP state to SQC_STATE_RDY failed", 1101 dev->data->port_id, idx); 1102 goto error; 1103 } 1104 txq_data->fcqs = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1105 txq_data->cqe_s * sizeof(*txq_data->fcqs), 1106 RTE_CACHE_LINE_SIZE, 1107 txq_ctrl->socket); 1108 if (!txq_data->fcqs) { 1109 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)", 1110 dev->data->port_id, idx); 1111 rte_errno = ENOMEM; 1112 goto error; 1113 } 1114 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1115 /* 1116 * If using DevX need to query and store TIS transport domain value. 1117 * This is done once per port. 1118 * Will use this value on Rx, when creating matching TIR. 1119 */ 1120 if (priv->config.devx && !priv->sh->tdn) 1121 priv->sh->tdn = priv->sh->td->id; 1122 #endif 1123 MLX5_ASSERT(sh->tx_uar); 1124 reg_addr = mlx5_os_get_devx_uar_reg_addr(sh->tx_uar); 1125 MLX5_ASSERT(reg_addr); 1126 txq_ctrl->bf_reg = reg_addr; 1127 txq_ctrl->uar_mmap_offset = 1128 mlx5_os_get_devx_uar_mmap_offset(sh->tx_uar); 1129 rte_atomic32_set(&txq_obj->refcnt, 1); 1130 txq_uar_init(txq_ctrl); 1131 LIST_INSERT_HEAD(&priv->txqsobj, txq_obj, next); 1132 return txq_obj; 1133 error: 1134 ret = rte_errno; /* Save rte_errno before cleanup. */ 1135 txq_release_sq_resources(txq_obj); 1136 if (txq_data->fcqs) { 1137 mlx5_free(txq_data->fcqs); 1138 txq_data->fcqs = NULL; 1139 } 1140 mlx5_free(txq_obj); 1141 rte_errno = ret; /* Restore rte_errno. */ 1142 return NULL; 1143 #endif 1144 } 1145 1146 /** 1147 * Create the Tx queue Verbs object. 1148 * 1149 * @param dev 1150 * Pointer to Ethernet device. 1151 * @param idx 1152 * Queue index in DPDK Tx queue array. 1153 * @param type 1154 * Type of the Tx queue object to create. 1155 * 1156 * @return 1157 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1158 */ 1159 struct mlx5_txq_obj * 1160 mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, 1161 enum mlx5_txq_obj_type type) 1162 { 1163 struct mlx5_priv *priv = dev->data->dev_private; 1164 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 1165 struct mlx5_txq_ctrl *txq_ctrl = 1166 container_of(txq_data, struct mlx5_txq_ctrl, txq); 1167 struct mlx5_txq_obj tmpl; 1168 struct mlx5_txq_obj *txq_obj = NULL; 1169 union { 1170 struct ibv_qp_init_attr_ex init; 1171 struct ibv_cq_init_attr_ex cq; 1172 struct ibv_qp_attr mod; 1173 } attr; 1174 unsigned int cqe_n; 1175 struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET }; 1176 struct mlx5dv_cq cq_info; 1177 struct mlx5dv_obj obj; 1178 const int desc = 1 << txq_data->elts_n; 1179 int ret = 0; 1180 1181 if (type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) 1182 return mlx5_txq_obj_hairpin_new(dev, idx); 1183 if (type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) 1184 return mlx5_txq_obj_devx_new(dev, idx); 1185 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1186 /* If using DevX, need additional mask to read tisn value. */ 1187 if (priv->config.devx && !priv->sh->tdn) 1188 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 1189 #endif 1190 MLX5_ASSERT(txq_data); 1191 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; 1192 priv->verbs_alloc_ctx.obj = txq_ctrl; 1193 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 1194 DRV_LOG(ERR, 1195 "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set", 1196 dev->data->port_id); 1197 rte_errno = EINVAL; 1198 return NULL; 1199 } 1200 memset(&tmpl, 0, sizeof(struct mlx5_txq_obj)); 1201 attr.cq = (struct ibv_cq_init_attr_ex){ 1202 .comp_mask = 0, 1203 }; 1204 cqe_n = desc / MLX5_TX_COMP_THRESH + 1205 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 1206 tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 1207 if (tmpl.cq == NULL) { 1208 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 1209 dev->data->port_id, idx); 1210 rte_errno = errno; 1211 goto error; 1212 } 1213 attr.init = (struct ibv_qp_init_attr_ex){ 1214 /* CQ to be associated with the send queue. */ 1215 .send_cq = tmpl.cq, 1216 /* CQ to be associated with the receive queue. */ 1217 .recv_cq = tmpl.cq, 1218 .cap = { 1219 /* Max number of outstanding WRs. */ 1220 .max_send_wr = 1221 ((priv->sh->device_attr.max_qp_wr < 1222 desc) ? 1223 priv->sh->device_attr.max_qp_wr : 1224 desc), 1225 /* 1226 * Max number of scatter/gather elements in a WR, 1227 * must be 1 to prevent libmlx5 from trying to affect 1228 * too much memory. TX gather is not impacted by the 1229 * device_attr.max_sge limit and will still work 1230 * properly. 1231 */ 1232 .max_send_sge = 1, 1233 }, 1234 .qp_type = IBV_QPT_RAW_PACKET, 1235 /* 1236 * Do *NOT* enable this, completions events are managed per 1237 * Tx burst. 1238 */ 1239 .sq_sig_all = 0, 1240 .pd = priv->sh->pd, 1241 .comp_mask = IBV_QP_INIT_ATTR_PD, 1242 }; 1243 if (txq_data->inlen_send) 1244 attr.init.cap.max_inline_data = txq_ctrl->max_inline_data; 1245 if (txq_data->tso_en) { 1246 attr.init.max_tso_header = txq_ctrl->max_tso_header; 1247 attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 1248 } 1249 tmpl.qp = mlx5_glue->create_qp_ex(priv->sh->ctx, &attr.init); 1250 if (tmpl.qp == NULL) { 1251 DRV_LOG(ERR, "port %u Tx queue %u QP creation failure", 1252 dev->data->port_id, idx); 1253 rte_errno = errno; 1254 goto error; 1255 } 1256 attr.mod = (struct ibv_qp_attr){ 1257 /* Move the QP to this state. */ 1258 .qp_state = IBV_QPS_INIT, 1259 /* IB device port number. */ 1260 .port_num = (uint8_t)priv->dev_port, 1261 }; 1262 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, 1263 (IBV_QP_STATE | IBV_QP_PORT)); 1264 if (ret) { 1265 DRV_LOG(ERR, 1266 "port %u Tx queue %u QP state to IBV_QPS_INIT failed", 1267 dev->data->port_id, idx); 1268 rte_errno = errno; 1269 goto error; 1270 } 1271 attr.mod = (struct ibv_qp_attr){ 1272 .qp_state = IBV_QPS_RTR 1273 }; 1274 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 1275 if (ret) { 1276 DRV_LOG(ERR, 1277 "port %u Tx queue %u QP state to IBV_QPS_RTR failed", 1278 dev->data->port_id, idx); 1279 rte_errno = errno; 1280 goto error; 1281 } 1282 attr.mod.qp_state = IBV_QPS_RTS; 1283 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 1284 if (ret) { 1285 DRV_LOG(ERR, 1286 "port %u Tx queue %u QP state to IBV_QPS_RTS failed", 1287 dev->data->port_id, idx); 1288 rte_errno = errno; 1289 goto error; 1290 } 1291 txq_obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1292 sizeof(struct mlx5_txq_obj), 0, 1293 txq_ctrl->socket); 1294 if (!txq_obj) { 1295 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory", 1296 dev->data->port_id, idx); 1297 rte_errno = ENOMEM; 1298 goto error; 1299 } 1300 obj.cq.in = tmpl.cq; 1301 obj.cq.out = &cq_info; 1302 obj.qp.in = tmpl.qp; 1303 obj.qp.out = &qp; 1304 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 1305 if (ret != 0) { 1306 rte_errno = errno; 1307 goto error; 1308 } 1309 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 1310 DRV_LOG(ERR, 1311 "port %u wrong MLX5_CQE_SIZE environment variable" 1312 " value: it should be set to %u", 1313 dev->data->port_id, RTE_CACHE_LINE_SIZE); 1314 rte_errno = EINVAL; 1315 goto error; 1316 } 1317 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 1318 txq_data->cqe_s = 1 << txq_data->cqe_n; 1319 txq_data->cqe_m = txq_data->cqe_s - 1; 1320 txq_data->qp_num_8s = ((struct ibv_qp *)tmpl.qp)->qp_num << 8; 1321 txq_data->wqes = qp.sq.buf; 1322 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 1323 txq_data->wqe_s = 1 << txq_data->wqe_n; 1324 txq_data->wqe_m = txq_data->wqe_s - 1; 1325 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 1326 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 1327 txq_data->cq_db = cq_info.dbrec; 1328 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 1329 txq_data->cq_ci = 0; 1330 txq_data->cq_pi = 0; 1331 txq_data->wqe_ci = 0; 1332 txq_data->wqe_pi = 0; 1333 txq_data->wqe_comp = 0; 1334 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 1335 txq_data->fcqs = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 1336 txq_data->cqe_s * sizeof(*txq_data->fcqs), 1337 RTE_CACHE_LINE_SIZE, txq_ctrl->socket); 1338 if (!txq_data->fcqs) { 1339 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)", 1340 dev->data->port_id, idx); 1341 rte_errno = ENOMEM; 1342 goto error; 1343 } 1344 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1345 /* 1346 * If using DevX need to query and store TIS transport domain value. 1347 * This is done once per port. 1348 * Will use this value on Rx, when creating matching TIR. 1349 */ 1350 if (priv->config.devx && !priv->sh->tdn) { 1351 ret = mlx5_devx_cmd_qp_query_tis_td(tmpl.qp, qp.tisn, 1352 &priv->sh->tdn); 1353 if (ret) { 1354 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 1355 "transport domain", dev->data->port_id, idx); 1356 rte_errno = EINVAL; 1357 goto error; 1358 } else { 1359 DRV_LOG(DEBUG, "port %u Tx queue %u TIS number %d " 1360 "transport domain %d", dev->data->port_id, 1361 idx, qp.tisn, priv->sh->tdn); 1362 } 1363 } 1364 #endif 1365 txq_obj->qp = tmpl.qp; 1366 txq_obj->cq = tmpl.cq; 1367 rte_atomic32_inc(&txq_obj->refcnt); 1368 txq_ctrl->bf_reg = qp.bf.reg; 1369 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 1370 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 1371 DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%"PRIx64, 1372 dev->data->port_id, txq_ctrl->uar_mmap_offset); 1373 } else { 1374 DRV_LOG(ERR, 1375 "port %u failed to retrieve UAR info, invalid" 1376 " libmlx5.so", 1377 dev->data->port_id); 1378 rte_errno = EINVAL; 1379 goto error; 1380 } 1381 txq_uar_init(txq_ctrl); 1382 LIST_INSERT_HEAD(&priv->txqsobj, txq_obj, next); 1383 txq_obj->txq_ctrl = txq_ctrl; 1384 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1385 return txq_obj; 1386 error: 1387 ret = rte_errno; /* Save rte_errno before cleanup. */ 1388 if (tmpl.cq) 1389 claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); 1390 if (tmpl.qp) 1391 claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); 1392 if (txq_data && txq_data->fcqs) { 1393 mlx5_free(txq_data->fcqs); 1394 txq_data->fcqs = NULL; 1395 } 1396 if (txq_obj) 1397 mlx5_free(txq_obj); 1398 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1399 rte_errno = ret; /* Restore rte_errno. */ 1400 return NULL; 1401 } 1402 1403 /** 1404 * Get an Tx queue Verbs object. 1405 * 1406 * @param dev 1407 * Pointer to Ethernet device. 1408 * @param idx 1409 * Queue index in DPDK Tx queue array. 1410 * 1411 * @return 1412 * The Verbs object if it exists. 1413 */ 1414 struct mlx5_txq_obj * 1415 mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx) 1416 { 1417 struct mlx5_priv *priv = dev->data->dev_private; 1418 struct mlx5_txq_ctrl *txq_ctrl; 1419 1420 if (idx >= priv->txqs_n) 1421 return NULL; 1422 if (!(*priv->txqs)[idx]) 1423 return NULL; 1424 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1425 if (txq_ctrl->obj) 1426 rte_atomic32_inc(&txq_ctrl->obj->refcnt); 1427 return txq_ctrl->obj; 1428 } 1429 1430 /** 1431 * Release an Tx verbs queue object. 1432 * 1433 * @param txq_obj 1434 * Verbs Tx queue object. 1435 * 1436 * @return 1437 * 1 while a reference on it exists, 0 when freed. 1438 */ 1439 int 1440 mlx5_txq_obj_release(struct mlx5_txq_obj *txq_obj) 1441 { 1442 MLX5_ASSERT(txq_obj); 1443 if (rte_atomic32_dec_and_test(&txq_obj->refcnt)) { 1444 if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) { 1445 if (txq_obj->tis) 1446 claim_zero(mlx5_devx_cmd_destroy(txq_obj->tis)); 1447 } else if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 1448 txq_release_sq_resources(txq_obj); 1449 } else { 1450 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 1451 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 1452 } 1453 if (txq_obj->txq_ctrl->txq.fcqs) { 1454 mlx5_free(txq_obj->txq_ctrl->txq.fcqs); 1455 txq_obj->txq_ctrl->txq.fcqs = NULL; 1456 } 1457 LIST_REMOVE(txq_obj, next); 1458 mlx5_free(txq_obj); 1459 return 0; 1460 } 1461 return 1; 1462 } 1463 1464 /** 1465 * Verify the Verbs Tx queue list is empty 1466 * 1467 * @param dev 1468 * Pointer to Ethernet device. 1469 * 1470 * @return 1471 * The number of object not released. 1472 */ 1473 int 1474 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 1475 { 1476 struct mlx5_priv *priv = dev->data->dev_private; 1477 int ret = 0; 1478 struct mlx5_txq_obj *txq_obj; 1479 1480 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 1481 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 1482 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 1483 ++ret; 1484 } 1485 return ret; 1486 } 1487 1488 /** 1489 * Calculate the total number of WQEBB for Tx queue. 1490 * 1491 * Simplified version of calc_sq_size() in rdma-core. 1492 * 1493 * @param txq_ctrl 1494 * Pointer to Tx queue control structure. 1495 * 1496 * @return 1497 * The number of WQEBB. 1498 */ 1499 static int 1500 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 1501 { 1502 unsigned int wqe_size; 1503 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 1504 1505 wqe_size = MLX5_WQE_CSEG_SIZE + 1506 MLX5_WQE_ESEG_SIZE + 1507 MLX5_WSEG_SIZE - 1508 MLX5_ESEG_MIN_INLINE_SIZE + 1509 txq_ctrl->max_inline_data; 1510 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 1511 } 1512 1513 /** 1514 * Calculate the maximal inline data size for Tx queue. 1515 * 1516 * @param txq_ctrl 1517 * Pointer to Tx queue control structure. 1518 * 1519 * @return 1520 * The maximal inline data size. 1521 */ 1522 static unsigned int 1523 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 1524 { 1525 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 1526 struct mlx5_priv *priv = txq_ctrl->priv; 1527 unsigned int wqe_size; 1528 1529 wqe_size = priv->sh->device_attr.max_qp_wr / desc; 1530 if (!wqe_size) 1531 return 0; 1532 /* 1533 * This calculation is derived from tthe source of 1534 * mlx5_calc_send_wqe() in rdma_core library. 1535 */ 1536 wqe_size = wqe_size * MLX5_WQE_SIZE - 1537 MLX5_WQE_CSEG_SIZE - 1538 MLX5_WQE_ESEG_SIZE - 1539 MLX5_WSEG_SIZE - 1540 MLX5_WSEG_SIZE + 1541 MLX5_DSEG_MIN_INLINE_SIZE; 1542 return wqe_size; 1543 } 1544 1545 /** 1546 * Set Tx queue parameters from device configuration. 1547 * 1548 * @param txq_ctrl 1549 * Pointer to Tx queue control structure. 1550 */ 1551 static void 1552 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 1553 { 1554 struct mlx5_priv *priv = txq_ctrl->priv; 1555 struct mlx5_dev_config *config = &priv->config; 1556 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 1557 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 1558 unsigned int inlen_mode; /* Minimal required Inline data. */ 1559 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 1560 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 1561 bool tso = txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1562 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1563 DEV_TX_OFFLOAD_GRE_TNL_TSO | 1564 DEV_TX_OFFLOAD_IP_TNL_TSO | 1565 DEV_TX_OFFLOAD_UDP_TNL_TSO); 1566 bool vlan_inline; 1567 unsigned int temp; 1568 1569 if (config->txqs_inline == MLX5_ARG_UNSET) 1570 txqs_inline = 1571 #if defined(RTE_ARCH_ARM64) 1572 (priv->pci_dev->id.device_id == 1573 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) ? 1574 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 1575 #endif 1576 MLX5_INLINE_MAX_TXQS; 1577 else 1578 txqs_inline = (unsigned int)config->txqs_inline; 1579 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 1580 MLX5_SEND_DEF_INLINE_LEN : 1581 (unsigned int)config->txq_inline_max; 1582 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 1583 MLX5_EMPW_DEF_INLINE_LEN : 1584 (unsigned int)config->txq_inline_mpw; 1585 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 1586 0 : (unsigned int)config->txq_inline_min; 1587 if (config->mps != MLX5_MPW_ENHANCED && config->mps != MLX5_MPW) 1588 inlen_empw = 0; 1589 /* 1590 * If there is requested minimal amount of data to inline 1591 * we MUST enable inlining. This is a case for ConnectX-4 1592 * which usually requires L2 inlined for correct operating 1593 * and ConnectX-4 Lx which requires L2-L4 inlined to 1594 * support E-Switch Flows. 1595 */ 1596 if (inlen_mode) { 1597 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 1598 /* 1599 * Optimize minimal inlining for single 1600 * segment packets to fill one WQEBB 1601 * without gaps. 1602 */ 1603 temp = MLX5_ESEG_MIN_INLINE_SIZE; 1604 } else { 1605 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 1606 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 1607 MLX5_ESEG_MIN_INLINE_SIZE; 1608 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1609 } 1610 if (temp != inlen_mode) { 1611 DRV_LOG(INFO, 1612 "port %u minimal required inline setting" 1613 " aligned from %u to %u", 1614 PORT_ID(priv), inlen_mode, temp); 1615 inlen_mode = temp; 1616 } 1617 } 1618 /* 1619 * If port is configured to support VLAN insertion and device 1620 * does not support this feature by HW (for NICs before ConnectX-5 1621 * or in case of wqe_vlan_insert flag is not set) we must enable 1622 * data inline on all queues because it is supported by single 1623 * tx_burst routine. 1624 */ 1625 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 1626 vlan_inline = (dev_txoff & DEV_TX_OFFLOAD_VLAN_INSERT) && 1627 !config->hw_vlan_insert; 1628 /* 1629 * If there are few Tx queues it is prioritized 1630 * to save CPU cycles and disable data inlining at all. 1631 */ 1632 if (inlen_send && priv->txqs_n >= txqs_inline) { 1633 /* 1634 * The data sent with ordinal MLX5_OPCODE_SEND 1635 * may be inlined in Ethernet Segment, align the 1636 * length accordingly to fit entire WQEBBs. 1637 */ 1638 temp = RTE_MAX(inlen_send, 1639 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 1640 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1641 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1642 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1643 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1644 MLX5_ESEG_MIN_INLINE_SIZE - 1645 MLX5_WQE_CSEG_SIZE - 1646 MLX5_WQE_ESEG_SIZE - 1647 MLX5_WQE_DSEG_SIZE * 2); 1648 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1649 temp = RTE_MAX(temp, inlen_mode); 1650 if (temp != inlen_send) { 1651 DRV_LOG(INFO, 1652 "port %u ordinary send inline setting" 1653 " aligned from %u to %u", 1654 PORT_ID(priv), inlen_send, temp); 1655 inlen_send = temp; 1656 } 1657 /* 1658 * Not aligned to cache lines, but to WQEs. 1659 * First bytes of data (initial alignment) 1660 * is going to be copied explicitly at the 1661 * beginning of inlining buffer in Ethernet 1662 * Segment. 1663 */ 1664 MLX5_ASSERT(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1665 MLX5_ASSERT(inlen_send <= MLX5_WQE_SIZE_MAX + 1666 MLX5_ESEG_MIN_INLINE_SIZE - 1667 MLX5_WQE_CSEG_SIZE - 1668 MLX5_WQE_ESEG_SIZE - 1669 MLX5_WQE_DSEG_SIZE * 2); 1670 } else if (inlen_mode) { 1671 /* 1672 * If minimal inlining is requested we must 1673 * enable inlining in general, despite the 1674 * number of configured queues. Ignore the 1675 * txq_inline_max devarg, this is not 1676 * full-featured inline. 1677 */ 1678 inlen_send = inlen_mode; 1679 inlen_empw = 0; 1680 } else if (vlan_inline) { 1681 /* 1682 * Hardware does not report offload for 1683 * VLAN insertion, we must enable data inline 1684 * to implement feature by software. 1685 */ 1686 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 1687 inlen_empw = 0; 1688 } else { 1689 inlen_send = 0; 1690 inlen_empw = 0; 1691 } 1692 txq_ctrl->txq.inlen_send = inlen_send; 1693 txq_ctrl->txq.inlen_mode = inlen_mode; 1694 txq_ctrl->txq.inlen_empw = 0; 1695 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 1696 /* 1697 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 1698 * may be inlined in Data Segment, align the 1699 * length accordingly to fit entire WQEBBs. 1700 */ 1701 temp = RTE_MAX(inlen_empw, 1702 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 1703 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 1704 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1705 temp += MLX5_DSEG_MIN_INLINE_SIZE; 1706 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1707 MLX5_DSEG_MIN_INLINE_SIZE - 1708 MLX5_WQE_CSEG_SIZE - 1709 MLX5_WQE_ESEG_SIZE - 1710 MLX5_WQE_DSEG_SIZE); 1711 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 1712 if (temp != inlen_empw) { 1713 DRV_LOG(INFO, 1714 "port %u enhanced empw inline setting" 1715 " aligned from %u to %u", 1716 PORT_ID(priv), inlen_empw, temp); 1717 inlen_empw = temp; 1718 } 1719 MLX5_ASSERT(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 1720 MLX5_ASSERT(inlen_empw <= MLX5_WQE_SIZE_MAX + 1721 MLX5_DSEG_MIN_INLINE_SIZE - 1722 MLX5_WQE_CSEG_SIZE - 1723 MLX5_WQE_ESEG_SIZE - 1724 MLX5_WQE_DSEG_SIZE); 1725 txq_ctrl->txq.inlen_empw = inlen_empw; 1726 } 1727 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 1728 if (tso) { 1729 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 1730 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 1731 MLX5_MAX_TSO_HEADER); 1732 txq_ctrl->txq.tso_en = 1; 1733 } 1734 txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp; 1735 txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO | 1736 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1737 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) & 1738 txq_ctrl->txq.offloads) && config->swp; 1739 } 1740 1741 /** 1742 * Adjust Tx queue data inline parameters for large queue sizes. 1743 * The data inline feature requires multiple WQEs to fit the packets, 1744 * and if the large amount of Tx descriptors is requested by application 1745 * the total WQE amount may exceed the hardware capabilities. If the 1746 * default inline setting are used we can try to adjust these ones and 1747 * meet the hardware requirements and not exceed the queue size. 1748 * 1749 * @param txq_ctrl 1750 * Pointer to Tx queue control structure. 1751 * 1752 * @return 1753 * Zero on success, otherwise the parameters can not be adjusted. 1754 */ 1755 static int 1756 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 1757 { 1758 struct mlx5_priv *priv = txq_ctrl->priv; 1759 struct mlx5_dev_config *config = &priv->config; 1760 unsigned int max_inline; 1761 1762 max_inline = txq_calc_inline_max(txq_ctrl); 1763 if (!txq_ctrl->txq.inlen_send) { 1764 /* 1765 * Inline data feature is not engaged at all. 1766 * There is nothing to adjust. 1767 */ 1768 return 0; 1769 } 1770 if (txq_ctrl->max_inline_data <= max_inline) { 1771 /* 1772 * The requested inline data length does not 1773 * exceed queue capabilities. 1774 */ 1775 return 0; 1776 } 1777 if (txq_ctrl->txq.inlen_mode > max_inline) { 1778 DRV_LOG(ERR, 1779 "minimal data inline requirements (%u) are not" 1780 " satisfied (%u) on port %u, try the smaller" 1781 " Tx queue size (%d)", 1782 txq_ctrl->txq.inlen_mode, max_inline, 1783 priv->dev_data->port_id, 1784 priv->sh->device_attr.max_qp_wr); 1785 goto error; 1786 } 1787 if (txq_ctrl->txq.inlen_send > max_inline && 1788 config->txq_inline_max != MLX5_ARG_UNSET && 1789 config->txq_inline_max > (int)max_inline) { 1790 DRV_LOG(ERR, 1791 "txq_inline_max requirements (%u) are not" 1792 " satisfied (%u) on port %u, try the smaller" 1793 " Tx queue size (%d)", 1794 txq_ctrl->txq.inlen_send, max_inline, 1795 priv->dev_data->port_id, 1796 priv->sh->device_attr.max_qp_wr); 1797 goto error; 1798 } 1799 if (txq_ctrl->txq.inlen_empw > max_inline && 1800 config->txq_inline_mpw != MLX5_ARG_UNSET && 1801 config->txq_inline_mpw > (int)max_inline) { 1802 DRV_LOG(ERR, 1803 "txq_inline_mpw requirements (%u) are not" 1804 " satisfied (%u) on port %u, try the smaller" 1805 " Tx queue size (%d)", 1806 txq_ctrl->txq.inlen_empw, max_inline, 1807 priv->dev_data->port_id, 1808 priv->sh->device_attr.max_qp_wr); 1809 goto error; 1810 } 1811 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1812 DRV_LOG(ERR, 1813 "tso header inline requirements (%u) are not" 1814 " satisfied (%u) on port %u, try the smaller" 1815 " Tx queue size (%d)", 1816 MLX5_MAX_TSO_HEADER, max_inline, 1817 priv->dev_data->port_id, 1818 priv->sh->device_attr.max_qp_wr); 1819 goto error; 1820 } 1821 if (txq_ctrl->txq.inlen_send > max_inline) { 1822 DRV_LOG(WARNING, 1823 "adjust txq_inline_max (%u->%u)" 1824 " due to large Tx queue on port %u", 1825 txq_ctrl->txq.inlen_send, max_inline, 1826 priv->dev_data->port_id); 1827 txq_ctrl->txq.inlen_send = max_inline; 1828 } 1829 if (txq_ctrl->txq.inlen_empw > max_inline) { 1830 DRV_LOG(WARNING, 1831 "adjust txq_inline_mpw (%u->%u)" 1832 "due to large Tx queue on port %u", 1833 txq_ctrl->txq.inlen_empw, max_inline, 1834 priv->dev_data->port_id); 1835 txq_ctrl->txq.inlen_empw = max_inline; 1836 } 1837 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1838 txq_ctrl->txq.inlen_empw); 1839 MLX5_ASSERT(txq_ctrl->max_inline_data <= max_inline); 1840 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= max_inline); 1841 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1842 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw || 1843 !txq_ctrl->txq.inlen_empw); 1844 return 0; 1845 error: 1846 rte_errno = ENOMEM; 1847 return -ENOMEM; 1848 } 1849 1850 /** 1851 * Create a DPDK Tx queue. 1852 * 1853 * @param dev 1854 * Pointer to Ethernet device. 1855 * @param idx 1856 * TX queue index. 1857 * @param desc 1858 * Number of descriptors to configure in queue. 1859 * @param socket 1860 * NUMA socket on which memory must be allocated. 1861 * @param[in] conf 1862 * Thresholds parameters. 1863 * 1864 * @return 1865 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1866 */ 1867 struct mlx5_txq_ctrl * 1868 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1869 unsigned int socket, const struct rte_eth_txconf *conf) 1870 { 1871 struct mlx5_priv *priv = dev->data->dev_private; 1872 struct mlx5_txq_ctrl *tmpl; 1873 1874 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) + 1875 desc * sizeof(struct rte_mbuf *), 0, socket); 1876 if (!tmpl) { 1877 rte_errno = ENOMEM; 1878 return NULL; 1879 } 1880 if (mlx5_mr_btree_init(&tmpl->txq.mr_ctrl.cache_bh, 1881 MLX5_MR_BTREE_CACHE_N, socket)) { 1882 /* rte_errno is already set. */ 1883 goto error; 1884 } 1885 /* Save pointer of global generation number to check memory event. */ 1886 tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->sh->share_cache.dev_gen; 1887 MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH); 1888 tmpl->txq.offloads = conf->offloads | 1889 dev->data->dev_conf.txmode.offloads; 1890 tmpl->priv = priv; 1891 tmpl->socket = socket; 1892 tmpl->txq.elts_n = log2above(desc); 1893 tmpl->txq.elts_s = desc; 1894 tmpl->txq.elts_m = desc - 1; 1895 tmpl->txq.port_id = dev->data->port_id; 1896 tmpl->txq.idx = idx; 1897 txq_set_params(tmpl); 1898 if (txq_adjust_params(tmpl)) 1899 goto error; 1900 if (txq_calc_wqebb_cnt(tmpl) > 1901 priv->sh->device_attr.max_qp_wr) { 1902 DRV_LOG(ERR, 1903 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1904 " try smaller queue size", 1905 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1906 priv->sh->device_attr.max_qp_wr); 1907 rte_errno = ENOMEM; 1908 goto error; 1909 } 1910 rte_atomic32_inc(&tmpl->refcnt); 1911 tmpl->type = MLX5_TXQ_TYPE_STANDARD; 1912 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1913 return tmpl; 1914 error: 1915 mlx5_free(tmpl); 1916 return NULL; 1917 } 1918 1919 /** 1920 * Create a DPDK Tx hairpin queue. 1921 * 1922 * @param dev 1923 * Pointer to Ethernet device. 1924 * @param idx 1925 * TX queue index. 1926 * @param desc 1927 * Number of descriptors to configure in queue. 1928 * @param hairpin_conf 1929 * The hairpin configuration. 1930 * 1931 * @return 1932 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1933 */ 1934 struct mlx5_txq_ctrl * 1935 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1936 const struct rte_eth_hairpin_conf *hairpin_conf) 1937 { 1938 struct mlx5_priv *priv = dev->data->dev_private; 1939 struct mlx5_txq_ctrl *tmpl; 1940 1941 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1942 SOCKET_ID_ANY); 1943 if (!tmpl) { 1944 rte_errno = ENOMEM; 1945 return NULL; 1946 } 1947 tmpl->priv = priv; 1948 tmpl->socket = SOCKET_ID_ANY; 1949 tmpl->txq.elts_n = log2above(desc); 1950 tmpl->txq.port_id = dev->data->port_id; 1951 tmpl->txq.idx = idx; 1952 tmpl->hairpin_conf = *hairpin_conf; 1953 tmpl->type = MLX5_TXQ_TYPE_HAIRPIN; 1954 rte_atomic32_inc(&tmpl->refcnt); 1955 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1956 return tmpl; 1957 } 1958 1959 /** 1960 * Get a Tx queue. 1961 * 1962 * @param dev 1963 * Pointer to Ethernet device. 1964 * @param idx 1965 * TX queue index. 1966 * 1967 * @return 1968 * A pointer to the queue if it exists. 1969 */ 1970 struct mlx5_txq_ctrl * 1971 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1972 { 1973 struct mlx5_priv *priv = dev->data->dev_private; 1974 struct mlx5_txq_ctrl *ctrl = NULL; 1975 1976 if ((*priv->txqs)[idx]) { 1977 ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, 1978 txq); 1979 mlx5_txq_obj_get(dev, idx); 1980 rte_atomic32_inc(&ctrl->refcnt); 1981 } 1982 return ctrl; 1983 } 1984 1985 /** 1986 * Release a Tx queue. 1987 * 1988 * @param dev 1989 * Pointer to Ethernet device. 1990 * @param idx 1991 * TX queue index. 1992 * 1993 * @return 1994 * 1 while a reference on it exists, 0 when freed. 1995 */ 1996 int 1997 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1998 { 1999 struct mlx5_priv *priv = dev->data->dev_private; 2000 struct mlx5_txq_ctrl *txq; 2001 2002 if (!(*priv->txqs)[idx]) 2003 return 0; 2004 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 2005 if (txq->obj && !mlx5_txq_obj_release(txq->obj)) 2006 txq->obj = NULL; 2007 if (rte_atomic32_dec_and_test(&txq->refcnt)) { 2008 txq_free_elts(txq); 2009 mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh); 2010 LIST_REMOVE(txq, next); 2011 mlx5_free(txq); 2012 (*priv->txqs)[idx] = NULL; 2013 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 2014 return 0; 2015 } 2016 return 1; 2017 } 2018 2019 /** 2020 * Verify if the queue can be released. 2021 * 2022 * @param dev 2023 * Pointer to Ethernet device. 2024 * @param idx 2025 * TX queue index. 2026 * 2027 * @return 2028 * 1 if the queue can be released. 2029 */ 2030 int 2031 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 2032 { 2033 struct mlx5_priv *priv = dev->data->dev_private; 2034 struct mlx5_txq_ctrl *txq; 2035 2036 if (!(*priv->txqs)[idx]) 2037 return -1; 2038 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 2039 return (rte_atomic32_read(&txq->refcnt) == 1); 2040 } 2041 2042 /** 2043 * Verify the Tx Queue list is empty 2044 * 2045 * @param dev 2046 * Pointer to Ethernet device. 2047 * 2048 * @return 2049 * The number of object not released. 2050 */ 2051 int 2052 mlx5_txq_verify(struct rte_eth_dev *dev) 2053 { 2054 struct mlx5_priv *priv = dev->data->dev_private; 2055 struct mlx5_txq_ctrl *txq_ctrl; 2056 int ret = 0; 2057 2058 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 2059 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 2060 dev->data->port_id, txq_ctrl->txq.idx); 2061 ++ret; 2062 } 2063 return ret; 2064 } 2065 2066 /** 2067 * Set the Tx queue dynamic timestamp (mask and offset) 2068 * 2069 * @param[in] dev 2070 * Pointer to the Ethernet device structure. 2071 */ 2072 void 2073 mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev) 2074 { 2075 struct mlx5_priv *priv = dev->data->dev_private; 2076 struct mlx5_dev_ctx_shared *sh = priv->sh; 2077 struct mlx5_txq_data *data; 2078 int off, nbit; 2079 unsigned int i; 2080 uint64_t mask = 0; 2081 2082 nbit = rte_mbuf_dynflag_lookup 2083 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 2084 off = rte_mbuf_dynfield_lookup 2085 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); 2086 if (nbit > 0 && off >= 0 && sh->txpp.refcnt) 2087 mask = 1ULL << nbit; 2088 for (i = 0; i != priv->txqs_n; ++i) { 2089 data = (*priv->txqs)[i]; 2090 if (!data) 2091 continue; 2092 data->sh = sh; 2093 data->ts_mask = mask; 2094 data->ts_offset = off; 2095 } 2096 } 2097