1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <sys/mman.h> 12 #include <inttypes.h> 13 14 /* Verbs header. */ 15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic ignored "-Wpedantic" 18 #endif 19 #include <infiniband/verbs.h> 20 #include <infiniband/mlx5dv.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 30 #include <mlx5_glue.h> 31 #include <mlx5_devx_cmds.h> 32 #include <mlx5_common.h> 33 34 #include "mlx5_defs.h" 35 #include "mlx5_utils.h" 36 #include "mlx5.h" 37 #include "mlx5_rxtx.h" 38 #include "mlx5_autoconf.h" 39 40 /** 41 * Allocate TX queue elements. 42 * 43 * @param txq_ctrl 44 * Pointer to TX queue structure. 45 */ 46 void 47 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 48 { 49 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 50 unsigned int i; 51 52 for (i = 0; (i != elts_n); ++i) 53 txq_ctrl->txq.elts[i] = NULL; 54 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 55 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 56 txq_ctrl->txq.elts_head = 0; 57 txq_ctrl->txq.elts_tail = 0; 58 txq_ctrl->txq.elts_comp = 0; 59 } 60 61 /** 62 * Free TX queue elements. 63 * 64 * @param txq_ctrl 65 * Pointer to TX queue structure. 66 */ 67 void 68 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 69 { 70 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 71 const uint16_t elts_m = elts_n - 1; 72 uint16_t elts_head = txq_ctrl->txq.elts_head; 73 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 74 struct rte_mbuf *(*elts)[elts_n] = &txq_ctrl->txq.elts; 75 76 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 77 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 78 txq_ctrl->txq.elts_head = 0; 79 txq_ctrl->txq.elts_tail = 0; 80 txq_ctrl->txq.elts_comp = 0; 81 82 while (elts_tail != elts_head) { 83 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 84 85 MLX5_ASSERT(elt != NULL); 86 rte_pktmbuf_free_seg(elt); 87 #ifdef RTE_LIBRTE_MLX5_DEBUG 88 /* Poisoning. */ 89 memset(&(*elts)[elts_tail & elts_m], 90 0x77, 91 sizeof((*elts)[elts_tail & elts_m])); 92 #endif 93 ++elts_tail; 94 } 95 } 96 97 /** 98 * Returns the per-port supported offloads. 99 * 100 * @param dev 101 * Pointer to Ethernet device. 102 * 103 * @return 104 * Supported Tx offloads. 105 */ 106 uint64_t 107 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 108 { 109 struct mlx5_priv *priv = dev->data->dev_private; 110 uint64_t offloads = (DEV_TX_OFFLOAD_MULTI_SEGS | 111 DEV_TX_OFFLOAD_VLAN_INSERT); 112 struct mlx5_dev_config *config = &priv->config; 113 114 if (config->hw_csum) 115 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | 116 DEV_TX_OFFLOAD_UDP_CKSUM | 117 DEV_TX_OFFLOAD_TCP_CKSUM); 118 if (config->tso) 119 offloads |= DEV_TX_OFFLOAD_TCP_TSO; 120 if (config->swp) { 121 if (config->hw_csum) 122 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 123 if (config->tso) 124 offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO | 125 DEV_TX_OFFLOAD_UDP_TNL_TSO); 126 } 127 if (config->tunnel_en) { 128 if (config->hw_csum) 129 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 130 if (config->tso) 131 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 132 DEV_TX_OFFLOAD_GRE_TNL_TSO | 133 DEV_TX_OFFLOAD_GENEVE_TNL_TSO); 134 } 135 return offloads; 136 } 137 138 /** 139 * Tx queue presetup checks. 140 * 141 * @param dev 142 * Pointer to Ethernet device structure. 143 * @param idx 144 * Tx queue index. 145 * @param desc 146 * Number of descriptors to configure in queue. 147 * 148 * @return 149 * 0 on success, a negative errno value otherwise and rte_errno is set. 150 */ 151 static int 152 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc) 153 { 154 struct mlx5_priv *priv = dev->data->dev_private; 155 156 if (desc <= MLX5_TX_COMP_THRESH) { 157 DRV_LOG(WARNING, 158 "port %u number of descriptors requested for Tx queue" 159 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 160 " instead of %u", 161 dev->data->port_id, idx, MLX5_TX_COMP_THRESH + 1, desc); 162 desc = MLX5_TX_COMP_THRESH + 1; 163 } 164 if (!rte_is_power_of_2(desc)) { 165 desc = 1 << log2above(desc); 166 DRV_LOG(WARNING, 167 "port %u increased number of descriptors in Tx queue" 168 " %u to the next power of two (%d)", 169 dev->data->port_id, idx, desc); 170 } 171 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 172 dev->data->port_id, idx, desc); 173 if (idx >= priv->txqs_n) { 174 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 175 dev->data->port_id, idx, priv->txqs_n); 176 rte_errno = EOVERFLOW; 177 return -rte_errno; 178 } 179 if (!mlx5_txq_releasable(dev, idx)) { 180 rte_errno = EBUSY; 181 DRV_LOG(ERR, "port %u unable to release queue index %u", 182 dev->data->port_id, idx); 183 return -rte_errno; 184 } 185 mlx5_txq_release(dev, idx); 186 return 0; 187 } 188 /** 189 * DPDK callback to configure a TX queue. 190 * 191 * @param dev 192 * Pointer to Ethernet device structure. 193 * @param idx 194 * TX queue index. 195 * @param desc 196 * Number of descriptors to configure in queue. 197 * @param socket 198 * NUMA socket on which memory must be allocated. 199 * @param[in] conf 200 * Thresholds parameters. 201 * 202 * @return 203 * 0 on success, a negative errno value otherwise and rte_errno is set. 204 */ 205 int 206 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 207 unsigned int socket, const struct rte_eth_txconf *conf) 208 { 209 struct mlx5_priv *priv = dev->data->dev_private; 210 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 211 struct mlx5_txq_ctrl *txq_ctrl = 212 container_of(txq, struct mlx5_txq_ctrl, txq); 213 int res; 214 215 res = mlx5_tx_queue_pre_setup(dev, idx, desc); 216 if (res) 217 return res; 218 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 219 if (!txq_ctrl) { 220 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 221 dev->data->port_id, idx); 222 return -rte_errno; 223 } 224 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 225 dev->data->port_id, idx); 226 (*priv->txqs)[idx] = &txq_ctrl->txq; 227 return 0; 228 } 229 230 /** 231 * DPDK callback to configure a TX hairpin queue. 232 * 233 * @param dev 234 * Pointer to Ethernet device structure. 235 * @param idx 236 * TX queue index. 237 * @param desc 238 * Number of descriptors to configure in queue. 239 * @param[in] hairpin_conf 240 * The hairpin binding configuration. 241 * 242 * @return 243 * 0 on success, a negative errno value otherwise and rte_errno is set. 244 */ 245 int 246 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 247 uint16_t desc, 248 const struct rte_eth_hairpin_conf *hairpin_conf) 249 { 250 struct mlx5_priv *priv = dev->data->dev_private; 251 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 252 struct mlx5_txq_ctrl *txq_ctrl = 253 container_of(txq, struct mlx5_txq_ctrl, txq); 254 int res; 255 256 res = mlx5_tx_queue_pre_setup(dev, idx, desc); 257 if (res) 258 return res; 259 if (hairpin_conf->peer_count != 1 || 260 hairpin_conf->peers[0].port != dev->data->port_id || 261 hairpin_conf->peers[0].queue >= priv->rxqs_n) { 262 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u " 263 " invalid hairpind configuration", dev->data->port_id, 264 idx); 265 rte_errno = EINVAL; 266 return -rte_errno; 267 } 268 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 269 if (!txq_ctrl) { 270 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 271 dev->data->port_id, idx); 272 return -rte_errno; 273 } 274 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 275 dev->data->port_id, idx); 276 (*priv->txqs)[idx] = &txq_ctrl->txq; 277 return 0; 278 } 279 280 /** 281 * DPDK callback to release a TX queue. 282 * 283 * @param dpdk_txq 284 * Generic TX queue pointer. 285 */ 286 void 287 mlx5_tx_queue_release(void *dpdk_txq) 288 { 289 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 290 struct mlx5_txq_ctrl *txq_ctrl; 291 struct mlx5_priv *priv; 292 unsigned int i; 293 294 if (txq == NULL) 295 return; 296 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 297 priv = txq_ctrl->priv; 298 for (i = 0; (i != priv->txqs_n); ++i) 299 if ((*priv->txqs)[i] == txq) { 300 mlx5_txq_release(ETH_DEV(priv), i); 301 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 302 PORT_ID(priv), txq->idx); 303 break; 304 } 305 } 306 307 /** 308 * Configure the doorbell register non-cached attribute. 309 * 310 * @param txq_ctrl 311 * Pointer to Tx queue control structure. 312 * @param page_size 313 * Systme page size 314 */ 315 static void 316 txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size) 317 { 318 struct mlx5_priv *priv = txq_ctrl->priv; 319 off_t cmd; 320 321 txq_ctrl->txq.db_heu = priv->config.dbnc == MLX5_TXDB_HEURISTIC; 322 txq_ctrl->txq.db_nc = 0; 323 /* Check the doorbell register mapping type. */ 324 cmd = txq_ctrl->uar_mmap_offset / page_size; 325 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; 326 cmd &= MLX5_UAR_MMAP_CMD_MASK; 327 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) 328 txq_ctrl->txq.db_nc = 1; 329 } 330 331 /** 332 * Initialize Tx UAR registers for primary process. 333 * 334 * @param txq_ctrl 335 * Pointer to Tx queue control structure. 336 */ 337 static void 338 txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) 339 { 340 struct mlx5_priv *priv = txq_ctrl->priv; 341 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 342 const size_t page_size = sysconf(_SC_PAGESIZE); 343 #ifndef RTE_ARCH_64 344 unsigned int lock_idx; 345 #endif 346 347 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 348 return; 349 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 350 MLX5_ASSERT(ppriv); 351 ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; 352 txq_uar_ncattr_init(txq_ctrl, page_size); 353 #ifndef RTE_ARCH_64 354 /* Assign an UAR lock according to UAR page number */ 355 lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & 356 MLX5_UAR_PAGE_NUM_MASK; 357 txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx]; 358 #endif 359 } 360 361 /** 362 * Remap UAR register of a Tx queue for secondary process. 363 * 364 * Remapped address is stored at the table in the process private structure of 365 * the device, indexed by queue index. 366 * 367 * @param txq_ctrl 368 * Pointer to Tx queue control structure. 369 * @param fd 370 * Verbs file descriptor to map UAR pages. 371 * 372 * @return 373 * 0 on success, a negative errno value otherwise and rte_errno is set. 374 */ 375 static int 376 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 377 { 378 struct mlx5_priv *priv = txq_ctrl->priv; 379 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 380 struct mlx5_txq_data *txq = &txq_ctrl->txq; 381 void *addr; 382 uintptr_t uar_va; 383 uintptr_t offset; 384 const size_t page_size = sysconf(_SC_PAGESIZE); 385 386 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 387 return 0; 388 MLX5_ASSERT(ppriv); 389 /* 390 * As rdma-core, UARs are mapped in size of OS page 391 * size. Ref to libmlx5 function: mlx5_init_context() 392 */ 393 uar_va = (uintptr_t)txq_ctrl->bf_reg; 394 offset = uar_va & (page_size - 1); /* Offset in page. */ 395 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 396 txq_ctrl->uar_mmap_offset); 397 if (addr == MAP_FAILED) { 398 DRV_LOG(ERR, 399 "port %u mmap failed for BF reg of txq %u", 400 txq->port_id, txq->idx); 401 rte_errno = ENXIO; 402 return -rte_errno; 403 } 404 addr = RTE_PTR_ADD(addr, offset); 405 ppriv->uar_table[txq->idx] = addr; 406 txq_uar_ncattr_init(txq_ctrl, page_size); 407 return 0; 408 } 409 410 /** 411 * Unmap UAR register of a Tx queue for secondary process. 412 * 413 * @param txq_ctrl 414 * Pointer to Tx queue control structure. 415 */ 416 static void 417 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 418 { 419 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 420 const size_t page_size = sysconf(_SC_PAGESIZE); 421 void *addr; 422 423 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 424 return; 425 addr = ppriv->uar_table[txq_ctrl->txq.idx]; 426 munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 427 } 428 429 /** 430 * Initialize Tx UAR registers for secondary process. 431 * 432 * @param dev 433 * Pointer to Ethernet device. 434 * @param fd 435 * Verbs file descriptor to map UAR pages. 436 * 437 * @return 438 * 0 on success, a negative errno value otherwise and rte_errno is set. 439 */ 440 int 441 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 442 { 443 struct mlx5_priv *priv = dev->data->dev_private; 444 struct mlx5_txq_data *txq; 445 struct mlx5_txq_ctrl *txq_ctrl; 446 unsigned int i; 447 int ret; 448 449 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 450 for (i = 0; i != priv->txqs_n; ++i) { 451 if (!(*priv->txqs)[i]) 452 continue; 453 txq = (*priv->txqs)[i]; 454 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 455 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 456 continue; 457 MLX5_ASSERT(txq->idx == (uint16_t)i); 458 ret = txq_uar_init_secondary(txq_ctrl, fd); 459 if (ret) 460 goto error; 461 } 462 return 0; 463 error: 464 /* Rollback. */ 465 do { 466 if (!(*priv->txqs)[i]) 467 continue; 468 txq = (*priv->txqs)[i]; 469 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 470 txq_uar_uninit_secondary(txq_ctrl); 471 } while (i--); 472 return -rte_errno; 473 } 474 475 /** 476 * Create the Tx hairpin queue object. 477 * 478 * @param dev 479 * Pointer to Ethernet device. 480 * @param idx 481 * Queue index in DPDK Tx queue array 482 * 483 * @return 484 * The hairpin DevX object initialised, NULL otherwise and rte_errno is set. 485 */ 486 static struct mlx5_txq_obj * 487 mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) 488 { 489 struct mlx5_priv *priv = dev->data->dev_private; 490 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 491 struct mlx5_txq_ctrl *txq_ctrl = 492 container_of(txq_data, struct mlx5_txq_ctrl, txq); 493 struct mlx5_devx_create_sq_attr attr = { 0 }; 494 struct mlx5_txq_obj *tmpl = NULL; 495 int ret = 0; 496 uint32_t max_wq_data; 497 498 MLX5_ASSERT(txq_data); 499 MLX5_ASSERT(!txq_ctrl->obj); 500 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 501 txq_ctrl->socket); 502 if (!tmpl) { 503 DRV_LOG(ERR, 504 "port %u Tx queue %u cannot allocate memory resources", 505 dev->data->port_id, txq_data->idx); 506 rte_errno = ENOMEM; 507 goto error; 508 } 509 tmpl->type = MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN; 510 tmpl->txq_ctrl = txq_ctrl; 511 attr.hairpin = 1; 512 attr.tis_lst_sz = 1; 513 max_wq_data = priv->config.hca_attr.log_max_hairpin_wq_data_sz; 514 /* Jumbo frames > 9KB should be supported, and more packets. */ 515 attr.wq_attr.log_hairpin_data_sz = 516 (max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ? 517 max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE; 518 /* Set the packets number to the maximum value for performance. */ 519 attr.wq_attr.log_hairpin_num_packets = 520 attr.wq_attr.log_hairpin_data_sz - 521 MLX5_HAIRPIN_QUEUE_STRIDE; 522 attr.tis_num = priv->sh->tis->id; 523 tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr); 524 if (!tmpl->sq) { 525 DRV_LOG(ERR, 526 "port %u tx hairpin queue %u can't create sq object", 527 dev->data->port_id, idx); 528 rte_errno = errno; 529 goto error; 530 } 531 DRV_LOG(DEBUG, "port %u sxq %u updated with %p", dev->data->port_id, 532 idx, (void *)&tmpl); 533 rte_atomic32_inc(&tmpl->refcnt); 534 LIST_INSERT_HEAD(&priv->txqsobj, tmpl, next); 535 return tmpl; 536 error: 537 ret = rte_errno; /* Save rte_errno before cleanup. */ 538 if (tmpl->tis) 539 mlx5_devx_cmd_destroy(tmpl->tis); 540 if (tmpl->sq) 541 mlx5_devx_cmd_destroy(tmpl->sq); 542 rte_errno = ret; /* Restore rte_errno. */ 543 return NULL; 544 } 545 546 /** 547 * Create the Tx queue Verbs object. 548 * 549 * @param dev 550 * Pointer to Ethernet device. 551 * @param idx 552 * Queue index in DPDK Tx queue array. 553 * @param type 554 * Type of the Tx queue object to create. 555 * 556 * @return 557 * The Verbs object initialised, NULL otherwise and rte_errno is set. 558 */ 559 struct mlx5_txq_obj * 560 mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, 561 enum mlx5_txq_obj_type type) 562 { 563 struct mlx5_priv *priv = dev->data->dev_private; 564 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 565 struct mlx5_txq_ctrl *txq_ctrl = 566 container_of(txq_data, struct mlx5_txq_ctrl, txq); 567 struct mlx5_txq_obj tmpl; 568 struct mlx5_txq_obj *txq_obj = NULL; 569 union { 570 struct ibv_qp_init_attr_ex init; 571 struct ibv_cq_init_attr_ex cq; 572 struct ibv_qp_attr mod; 573 } attr; 574 unsigned int cqe_n; 575 struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET }; 576 struct mlx5dv_cq cq_info; 577 struct mlx5dv_obj obj; 578 const int desc = 1 << txq_data->elts_n; 579 int ret = 0; 580 581 if (type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) 582 return mlx5_txq_obj_hairpin_new(dev, idx); 583 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 584 /* If using DevX, need additional mask to read tisn value. */ 585 if (priv->config.devx && !priv->sh->tdn) 586 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 587 #endif 588 MLX5_ASSERT(txq_data); 589 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; 590 priv->verbs_alloc_ctx.obj = txq_ctrl; 591 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 592 DRV_LOG(ERR, 593 "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set", 594 dev->data->port_id); 595 rte_errno = EINVAL; 596 return NULL; 597 } 598 memset(&tmpl, 0, sizeof(struct mlx5_txq_obj)); 599 attr.cq = (struct ibv_cq_init_attr_ex){ 600 .comp_mask = 0, 601 }; 602 cqe_n = desc / MLX5_TX_COMP_THRESH + 603 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 604 tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 605 if (tmpl.cq == NULL) { 606 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 607 dev->data->port_id, idx); 608 rte_errno = errno; 609 goto error; 610 } 611 attr.init = (struct ibv_qp_init_attr_ex){ 612 /* CQ to be associated with the send queue. */ 613 .send_cq = tmpl.cq, 614 /* CQ to be associated with the receive queue. */ 615 .recv_cq = tmpl.cq, 616 .cap = { 617 /* Max number of outstanding WRs. */ 618 .max_send_wr = 619 ((priv->sh->device_attr.orig_attr.max_qp_wr < 620 desc) ? 621 priv->sh->device_attr.orig_attr.max_qp_wr : 622 desc), 623 /* 624 * Max number of scatter/gather elements in a WR, 625 * must be 1 to prevent libmlx5 from trying to affect 626 * too much memory. TX gather is not impacted by the 627 * device_attr.max_sge limit and will still work 628 * properly. 629 */ 630 .max_send_sge = 1, 631 }, 632 .qp_type = IBV_QPT_RAW_PACKET, 633 /* 634 * Do *NOT* enable this, completions events are managed per 635 * Tx burst. 636 */ 637 .sq_sig_all = 0, 638 .pd = priv->sh->pd, 639 .comp_mask = IBV_QP_INIT_ATTR_PD, 640 }; 641 if (txq_data->inlen_send) 642 attr.init.cap.max_inline_data = txq_ctrl->max_inline_data; 643 if (txq_data->tso_en) { 644 attr.init.max_tso_header = txq_ctrl->max_tso_header; 645 attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 646 } 647 tmpl.qp = mlx5_glue->create_qp_ex(priv->sh->ctx, &attr.init); 648 if (tmpl.qp == NULL) { 649 DRV_LOG(ERR, "port %u Tx queue %u QP creation failure", 650 dev->data->port_id, idx); 651 rte_errno = errno; 652 goto error; 653 } 654 attr.mod = (struct ibv_qp_attr){ 655 /* Move the QP to this state. */ 656 .qp_state = IBV_QPS_INIT, 657 /* IB device port number. */ 658 .port_num = (uint8_t)priv->ibv_port, 659 }; 660 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, 661 (IBV_QP_STATE | IBV_QP_PORT)); 662 if (ret) { 663 DRV_LOG(ERR, 664 "port %u Tx queue %u QP state to IBV_QPS_INIT failed", 665 dev->data->port_id, idx); 666 rte_errno = errno; 667 goto error; 668 } 669 attr.mod = (struct ibv_qp_attr){ 670 .qp_state = IBV_QPS_RTR 671 }; 672 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 673 if (ret) { 674 DRV_LOG(ERR, 675 "port %u Tx queue %u QP state to IBV_QPS_RTR failed", 676 dev->data->port_id, idx); 677 rte_errno = errno; 678 goto error; 679 } 680 attr.mod.qp_state = IBV_QPS_RTS; 681 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 682 if (ret) { 683 DRV_LOG(ERR, 684 "port %u Tx queue %u QP state to IBV_QPS_RTS failed", 685 dev->data->port_id, idx); 686 rte_errno = errno; 687 goto error; 688 } 689 txq_obj = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_obj), 0, 690 txq_ctrl->socket); 691 if (!txq_obj) { 692 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory", 693 dev->data->port_id, idx); 694 rte_errno = ENOMEM; 695 goto error; 696 } 697 obj.cq.in = tmpl.cq; 698 obj.cq.out = &cq_info; 699 obj.qp.in = tmpl.qp; 700 obj.qp.out = &qp; 701 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 702 if (ret != 0) { 703 rte_errno = errno; 704 goto error; 705 } 706 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 707 DRV_LOG(ERR, 708 "port %u wrong MLX5_CQE_SIZE environment variable" 709 " value: it should be set to %u", 710 dev->data->port_id, RTE_CACHE_LINE_SIZE); 711 rte_errno = EINVAL; 712 goto error; 713 } 714 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 715 txq_data->cqe_s = 1 << txq_data->cqe_n; 716 txq_data->cqe_m = txq_data->cqe_s - 1; 717 txq_data->qp_num_8s = tmpl.qp->qp_num << 8; 718 txq_data->wqes = qp.sq.buf; 719 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 720 txq_data->wqe_s = 1 << txq_data->wqe_n; 721 txq_data->wqe_m = txq_data->wqe_s - 1; 722 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 723 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 724 txq_data->cq_db = cq_info.dbrec; 725 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 726 txq_data->cq_ci = 0; 727 txq_data->cq_pi = 0; 728 txq_data->wqe_ci = 0; 729 txq_data->wqe_pi = 0; 730 txq_data->wqe_comp = 0; 731 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 732 txq_data->fcqs = rte_calloc_socket(__func__, 733 txq_data->cqe_s, 734 sizeof(*txq_data->fcqs), 735 RTE_CACHE_LINE_SIZE, 736 txq_ctrl->socket); 737 if (!txq_data->fcqs) { 738 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)", 739 dev->data->port_id, idx); 740 rte_errno = ENOMEM; 741 goto error; 742 } 743 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 744 /* 745 * If using DevX need to query and store TIS transport domain value. 746 * This is done once per port. 747 * Will use this value on Rx, when creating matching TIR. 748 */ 749 if (priv->config.devx && !priv->sh->tdn) { 750 ret = mlx5_devx_cmd_qp_query_tis_td(tmpl.qp, qp.tisn, 751 &priv->sh->tdn); 752 if (ret) { 753 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 754 "transport domain", dev->data->port_id, idx); 755 rte_errno = EINVAL; 756 goto error; 757 } else { 758 DRV_LOG(DEBUG, "port %u Tx queue %u TIS number %d " 759 "transport domain %d", dev->data->port_id, 760 idx, qp.tisn, priv->sh->tdn); 761 } 762 } 763 #endif 764 txq_obj->qp = tmpl.qp; 765 txq_obj->cq = tmpl.cq; 766 rte_atomic32_inc(&txq_obj->refcnt); 767 txq_ctrl->bf_reg = qp.bf.reg; 768 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 769 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 770 DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%"PRIx64, 771 dev->data->port_id, txq_ctrl->uar_mmap_offset); 772 } else { 773 DRV_LOG(ERR, 774 "port %u failed to retrieve UAR info, invalid" 775 " libmlx5.so", 776 dev->data->port_id); 777 rte_errno = EINVAL; 778 goto error; 779 } 780 txq_uar_init(txq_ctrl); 781 LIST_INSERT_HEAD(&priv->txqsobj, txq_obj, next); 782 txq_obj->txq_ctrl = txq_ctrl; 783 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 784 return txq_obj; 785 error: 786 ret = rte_errno; /* Save rte_errno before cleanup. */ 787 if (tmpl.cq) 788 claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); 789 if (tmpl.qp) 790 claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); 791 if (txq_data && txq_data->fcqs) 792 rte_free(txq_data->fcqs); 793 if (txq_obj) 794 rte_free(txq_obj); 795 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 796 rte_errno = ret; /* Restore rte_errno. */ 797 return NULL; 798 } 799 800 /** 801 * Get an Tx queue Verbs object. 802 * 803 * @param dev 804 * Pointer to Ethernet device. 805 * @param idx 806 * Queue index in DPDK Tx queue array. 807 * 808 * @return 809 * The Verbs object if it exists. 810 */ 811 struct mlx5_txq_obj * 812 mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx) 813 { 814 struct mlx5_priv *priv = dev->data->dev_private; 815 struct mlx5_txq_ctrl *txq_ctrl; 816 817 if (idx >= priv->txqs_n) 818 return NULL; 819 if (!(*priv->txqs)[idx]) 820 return NULL; 821 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 822 if (txq_ctrl->obj) 823 rte_atomic32_inc(&txq_ctrl->obj->refcnt); 824 return txq_ctrl->obj; 825 } 826 827 /** 828 * Release an Tx verbs queue object. 829 * 830 * @param txq_obj 831 * Verbs Tx queue object. 832 * 833 * @return 834 * 1 while a reference on it exists, 0 when freed. 835 */ 836 int 837 mlx5_txq_obj_release(struct mlx5_txq_obj *txq_obj) 838 { 839 MLX5_ASSERT(txq_obj); 840 if (rte_atomic32_dec_and_test(&txq_obj->refcnt)) { 841 if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) { 842 if (txq_obj->tis) 843 claim_zero(mlx5_devx_cmd_destroy(txq_obj->tis)); 844 } else { 845 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 846 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 847 if (txq_obj->txq_ctrl->txq.fcqs) 848 rte_free(txq_obj->txq_ctrl->txq.fcqs); 849 } 850 LIST_REMOVE(txq_obj, next); 851 rte_free(txq_obj); 852 return 0; 853 } 854 return 1; 855 } 856 857 /** 858 * Verify the Verbs Tx queue list is empty 859 * 860 * @param dev 861 * Pointer to Ethernet device. 862 * 863 * @return 864 * The number of object not released. 865 */ 866 int 867 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 868 { 869 struct mlx5_priv *priv = dev->data->dev_private; 870 int ret = 0; 871 struct mlx5_txq_obj *txq_obj; 872 873 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 874 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 875 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 876 ++ret; 877 } 878 return ret; 879 } 880 881 /** 882 * Calculate the total number of WQEBB for Tx queue. 883 * 884 * Simplified version of calc_sq_size() in rdma-core. 885 * 886 * @param txq_ctrl 887 * Pointer to Tx queue control structure. 888 * 889 * @return 890 * The number of WQEBB. 891 */ 892 static int 893 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 894 { 895 unsigned int wqe_size; 896 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 897 898 wqe_size = MLX5_WQE_CSEG_SIZE + 899 MLX5_WQE_ESEG_SIZE + 900 MLX5_WSEG_SIZE - 901 MLX5_ESEG_MIN_INLINE_SIZE + 902 txq_ctrl->max_inline_data; 903 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 904 } 905 906 /** 907 * Calculate the maximal inline data size for Tx queue. 908 * 909 * @param txq_ctrl 910 * Pointer to Tx queue control structure. 911 * 912 * @return 913 * The maximal inline data size. 914 */ 915 static unsigned int 916 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 917 { 918 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 919 struct mlx5_priv *priv = txq_ctrl->priv; 920 unsigned int wqe_size; 921 922 wqe_size = priv->sh->device_attr.orig_attr.max_qp_wr / desc; 923 if (!wqe_size) 924 return 0; 925 /* 926 * This calculation is derived from tthe source of 927 * mlx5_calc_send_wqe() in rdma_core library. 928 */ 929 wqe_size = wqe_size * MLX5_WQE_SIZE - 930 MLX5_WQE_CSEG_SIZE - 931 MLX5_WQE_ESEG_SIZE - 932 MLX5_WSEG_SIZE - 933 MLX5_WSEG_SIZE + 934 MLX5_DSEG_MIN_INLINE_SIZE; 935 return wqe_size; 936 } 937 938 /** 939 * Set Tx queue parameters from device configuration. 940 * 941 * @param txq_ctrl 942 * Pointer to Tx queue control structure. 943 */ 944 static void 945 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 946 { 947 struct mlx5_priv *priv = txq_ctrl->priv; 948 struct mlx5_dev_config *config = &priv->config; 949 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 950 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 951 unsigned int inlen_mode; /* Minimal required Inline data. */ 952 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 953 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 954 bool tso = txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO | 955 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 956 DEV_TX_OFFLOAD_GRE_TNL_TSO | 957 DEV_TX_OFFLOAD_IP_TNL_TSO | 958 DEV_TX_OFFLOAD_UDP_TNL_TSO); 959 bool vlan_inline; 960 unsigned int temp; 961 962 if (config->txqs_inline == MLX5_ARG_UNSET) 963 txqs_inline = 964 #if defined(RTE_ARCH_ARM64) 965 (priv->pci_dev->id.device_id == 966 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) ? 967 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 968 #endif 969 MLX5_INLINE_MAX_TXQS; 970 else 971 txqs_inline = (unsigned int)config->txqs_inline; 972 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 973 MLX5_SEND_DEF_INLINE_LEN : 974 (unsigned int)config->txq_inline_max; 975 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 976 MLX5_EMPW_DEF_INLINE_LEN : 977 (unsigned int)config->txq_inline_mpw; 978 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 979 0 : (unsigned int)config->txq_inline_min; 980 if (config->mps != MLX5_MPW_ENHANCED && config->mps != MLX5_MPW) 981 inlen_empw = 0; 982 /* 983 * If there is requested minimal amount of data to inline 984 * we MUST enable inlining. This is a case for ConnectX-4 985 * which usually requires L2 inlined for correct operating 986 * and ConnectX-4 Lx which requires L2-L4 inlined to 987 * support E-Switch Flows. 988 */ 989 if (inlen_mode) { 990 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 991 /* 992 * Optimize minimal inlining for single 993 * segment packets to fill one WQEBB 994 * without gaps. 995 */ 996 temp = MLX5_ESEG_MIN_INLINE_SIZE; 997 } else { 998 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 999 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 1000 MLX5_ESEG_MIN_INLINE_SIZE; 1001 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1002 } 1003 if (temp != inlen_mode) { 1004 DRV_LOG(INFO, 1005 "port %u minimal required inline setting" 1006 " aligned from %u to %u", 1007 PORT_ID(priv), inlen_mode, temp); 1008 inlen_mode = temp; 1009 } 1010 } 1011 /* 1012 * If port is configured to support VLAN insertion and device 1013 * does not support this feature by HW (for NICs before ConnectX-5 1014 * or in case of wqe_vlan_insert flag is not set) we must enable 1015 * data inline on all queues because it is supported by single 1016 * tx_burst routine. 1017 */ 1018 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 1019 vlan_inline = (dev_txoff & DEV_TX_OFFLOAD_VLAN_INSERT) && 1020 !config->hw_vlan_insert; 1021 /* 1022 * If there are few Tx queues it is prioritized 1023 * to save CPU cycles and disable data inlining at all. 1024 */ 1025 if (inlen_send && priv->txqs_n >= txqs_inline) { 1026 /* 1027 * The data sent with ordinal MLX5_OPCODE_SEND 1028 * may be inlined in Ethernet Segment, align the 1029 * length accordingly to fit entire WQEBBs. 1030 */ 1031 temp = RTE_MAX(inlen_send, 1032 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 1033 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1034 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1035 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1036 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1037 MLX5_ESEG_MIN_INLINE_SIZE - 1038 MLX5_WQE_CSEG_SIZE - 1039 MLX5_WQE_ESEG_SIZE - 1040 MLX5_WQE_DSEG_SIZE * 2); 1041 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1042 temp = RTE_MAX(temp, inlen_mode); 1043 if (temp != inlen_send) { 1044 DRV_LOG(INFO, 1045 "port %u ordinary send inline setting" 1046 " aligned from %u to %u", 1047 PORT_ID(priv), inlen_send, temp); 1048 inlen_send = temp; 1049 } 1050 /* 1051 * Not aligned to cache lines, but to WQEs. 1052 * First bytes of data (initial alignment) 1053 * is going to be copied explicitly at the 1054 * beginning of inlining buffer in Ethernet 1055 * Segment. 1056 */ 1057 MLX5_ASSERT(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1058 MLX5_ASSERT(inlen_send <= MLX5_WQE_SIZE_MAX + 1059 MLX5_ESEG_MIN_INLINE_SIZE - 1060 MLX5_WQE_CSEG_SIZE - 1061 MLX5_WQE_ESEG_SIZE - 1062 MLX5_WQE_DSEG_SIZE * 2); 1063 } else if (inlen_mode) { 1064 /* 1065 * If minimal inlining is requested we must 1066 * enable inlining in general, despite the 1067 * number of configured queues. Ignore the 1068 * txq_inline_max devarg, this is not 1069 * full-featured inline. 1070 */ 1071 inlen_send = inlen_mode; 1072 inlen_empw = 0; 1073 } else if (vlan_inline) { 1074 /* 1075 * Hardware does not report offload for 1076 * VLAN insertion, we must enable data inline 1077 * to implement feature by software. 1078 */ 1079 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 1080 inlen_empw = 0; 1081 } else { 1082 inlen_send = 0; 1083 inlen_empw = 0; 1084 } 1085 txq_ctrl->txq.inlen_send = inlen_send; 1086 txq_ctrl->txq.inlen_mode = inlen_mode; 1087 txq_ctrl->txq.inlen_empw = 0; 1088 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 1089 /* 1090 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 1091 * may be inlined in Data Segment, align the 1092 * length accordingly to fit entire WQEBBs. 1093 */ 1094 temp = RTE_MAX(inlen_empw, 1095 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 1096 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 1097 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1098 temp += MLX5_DSEG_MIN_INLINE_SIZE; 1099 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1100 MLX5_DSEG_MIN_INLINE_SIZE - 1101 MLX5_WQE_CSEG_SIZE - 1102 MLX5_WQE_ESEG_SIZE - 1103 MLX5_WQE_DSEG_SIZE); 1104 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 1105 if (temp != inlen_empw) { 1106 DRV_LOG(INFO, 1107 "port %u enhanced empw inline setting" 1108 " aligned from %u to %u", 1109 PORT_ID(priv), inlen_empw, temp); 1110 inlen_empw = temp; 1111 } 1112 MLX5_ASSERT(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 1113 MLX5_ASSERT(inlen_empw <= MLX5_WQE_SIZE_MAX + 1114 MLX5_DSEG_MIN_INLINE_SIZE - 1115 MLX5_WQE_CSEG_SIZE - 1116 MLX5_WQE_ESEG_SIZE - 1117 MLX5_WQE_DSEG_SIZE); 1118 txq_ctrl->txq.inlen_empw = inlen_empw; 1119 } 1120 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 1121 if (tso) { 1122 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 1123 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 1124 MLX5_MAX_TSO_HEADER); 1125 txq_ctrl->txq.tso_en = 1; 1126 } 1127 txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp; 1128 txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO | 1129 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1130 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) & 1131 txq_ctrl->txq.offloads) && config->swp; 1132 } 1133 1134 /** 1135 * Adjust Tx queue data inline parameters for large queue sizes. 1136 * The data inline feature requires multiple WQEs to fit the packets, 1137 * and if the large amount of Tx descriptors is requested by application 1138 * the total WQE amount may exceed the hardware capabilities. If the 1139 * default inline setting are used we can try to adjust these ones and 1140 * meet the hardware requirements and not exceed the queue size. 1141 * 1142 * @param txq_ctrl 1143 * Pointer to Tx queue control structure. 1144 * 1145 * @return 1146 * Zero on success, otherwise the parameters can not be adjusted. 1147 */ 1148 static int 1149 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 1150 { 1151 struct mlx5_priv *priv = txq_ctrl->priv; 1152 struct mlx5_dev_config *config = &priv->config; 1153 unsigned int max_inline; 1154 1155 max_inline = txq_calc_inline_max(txq_ctrl); 1156 if (!txq_ctrl->txq.inlen_send) { 1157 /* 1158 * Inline data feature is not engaged at all. 1159 * There is nothing to adjust. 1160 */ 1161 return 0; 1162 } 1163 if (txq_ctrl->max_inline_data <= max_inline) { 1164 /* 1165 * The requested inline data length does not 1166 * exceed queue capabilities. 1167 */ 1168 return 0; 1169 } 1170 if (txq_ctrl->txq.inlen_mode > max_inline) { 1171 DRV_LOG(ERR, 1172 "minimal data inline requirements (%u) are not" 1173 " satisfied (%u) on port %u, try the smaller" 1174 " Tx queue size (%d)", 1175 txq_ctrl->txq.inlen_mode, max_inline, 1176 priv->dev_data->port_id, 1177 priv->sh->device_attr.orig_attr.max_qp_wr); 1178 goto error; 1179 } 1180 if (txq_ctrl->txq.inlen_send > max_inline && 1181 config->txq_inline_max != MLX5_ARG_UNSET && 1182 config->txq_inline_max > (int)max_inline) { 1183 DRV_LOG(ERR, 1184 "txq_inline_max requirements (%u) are not" 1185 " satisfied (%u) on port %u, try the smaller" 1186 " Tx queue size (%d)", 1187 txq_ctrl->txq.inlen_send, max_inline, 1188 priv->dev_data->port_id, 1189 priv->sh->device_attr.orig_attr.max_qp_wr); 1190 goto error; 1191 } 1192 if (txq_ctrl->txq.inlen_empw > max_inline && 1193 config->txq_inline_mpw != MLX5_ARG_UNSET && 1194 config->txq_inline_mpw > (int)max_inline) { 1195 DRV_LOG(ERR, 1196 "txq_inline_mpw requirements (%u) are not" 1197 " satisfied (%u) on port %u, try the smaller" 1198 " Tx queue size (%d)", 1199 txq_ctrl->txq.inlen_empw, max_inline, 1200 priv->dev_data->port_id, 1201 priv->sh->device_attr.orig_attr.max_qp_wr); 1202 goto error; 1203 } 1204 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1205 DRV_LOG(ERR, 1206 "tso header inline requirements (%u) are not" 1207 " satisfied (%u) on port %u, try the smaller" 1208 " Tx queue size (%d)", 1209 MLX5_MAX_TSO_HEADER, max_inline, 1210 priv->dev_data->port_id, 1211 priv->sh->device_attr.orig_attr.max_qp_wr); 1212 goto error; 1213 } 1214 if (txq_ctrl->txq.inlen_send > max_inline) { 1215 DRV_LOG(WARNING, 1216 "adjust txq_inline_max (%u->%u)" 1217 " due to large Tx queue on port %u", 1218 txq_ctrl->txq.inlen_send, max_inline, 1219 priv->dev_data->port_id); 1220 txq_ctrl->txq.inlen_send = max_inline; 1221 } 1222 if (txq_ctrl->txq.inlen_empw > max_inline) { 1223 DRV_LOG(WARNING, 1224 "adjust txq_inline_mpw (%u->%u)" 1225 "due to large Tx queue on port %u", 1226 txq_ctrl->txq.inlen_empw, max_inline, 1227 priv->dev_data->port_id); 1228 txq_ctrl->txq.inlen_empw = max_inline; 1229 } 1230 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1231 txq_ctrl->txq.inlen_empw); 1232 MLX5_ASSERT(txq_ctrl->max_inline_data <= max_inline); 1233 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= max_inline); 1234 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1235 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw || 1236 !txq_ctrl->txq.inlen_empw); 1237 return 0; 1238 error: 1239 rte_errno = ENOMEM; 1240 return -ENOMEM; 1241 } 1242 1243 /** 1244 * Create a DPDK Tx queue. 1245 * 1246 * @param dev 1247 * Pointer to Ethernet device. 1248 * @param idx 1249 * TX queue index. 1250 * @param desc 1251 * Number of descriptors to configure in queue. 1252 * @param socket 1253 * NUMA socket on which memory must be allocated. 1254 * @param[in] conf 1255 * Thresholds parameters. 1256 * 1257 * @return 1258 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1259 */ 1260 struct mlx5_txq_ctrl * 1261 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1262 unsigned int socket, const struct rte_eth_txconf *conf) 1263 { 1264 struct mlx5_priv *priv = dev->data->dev_private; 1265 struct mlx5_txq_ctrl *tmpl; 1266 1267 tmpl = rte_calloc_socket("TXQ", 1, 1268 sizeof(*tmpl) + 1269 desc * sizeof(struct rte_mbuf *), 1270 0, socket); 1271 if (!tmpl) { 1272 rte_errno = ENOMEM; 1273 return NULL; 1274 } 1275 if (mlx5_mr_btree_init(&tmpl->txq.mr_ctrl.cache_bh, 1276 MLX5_MR_BTREE_CACHE_N, socket)) { 1277 /* rte_errno is already set. */ 1278 goto error; 1279 } 1280 /* Save pointer of global generation number to check memory event. */ 1281 tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->sh->mr.dev_gen; 1282 MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH); 1283 tmpl->txq.offloads = conf->offloads | 1284 dev->data->dev_conf.txmode.offloads; 1285 tmpl->priv = priv; 1286 tmpl->socket = socket; 1287 tmpl->txq.elts_n = log2above(desc); 1288 tmpl->txq.elts_s = desc; 1289 tmpl->txq.elts_m = desc - 1; 1290 tmpl->txq.port_id = dev->data->port_id; 1291 tmpl->txq.idx = idx; 1292 txq_set_params(tmpl); 1293 if (txq_adjust_params(tmpl)) 1294 goto error; 1295 if (txq_calc_wqebb_cnt(tmpl) > 1296 priv->sh->device_attr.orig_attr.max_qp_wr) { 1297 DRV_LOG(ERR, 1298 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1299 " try smaller queue size", 1300 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1301 priv->sh->device_attr.orig_attr.max_qp_wr); 1302 rte_errno = ENOMEM; 1303 goto error; 1304 } 1305 rte_atomic32_inc(&tmpl->refcnt); 1306 tmpl->type = MLX5_TXQ_TYPE_STANDARD; 1307 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1308 return tmpl; 1309 error: 1310 rte_free(tmpl); 1311 return NULL; 1312 } 1313 1314 /** 1315 * Create a DPDK Tx hairpin queue. 1316 * 1317 * @param dev 1318 * Pointer to Ethernet device. 1319 * @param idx 1320 * TX queue index. 1321 * @param desc 1322 * Number of descriptors to configure in queue. 1323 * @param hairpin_conf 1324 * The hairpin configuration. 1325 * 1326 * @return 1327 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1328 */ 1329 struct mlx5_txq_ctrl * 1330 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1331 const struct rte_eth_hairpin_conf *hairpin_conf) 1332 { 1333 struct mlx5_priv *priv = dev->data->dev_private; 1334 struct mlx5_txq_ctrl *tmpl; 1335 1336 tmpl = rte_calloc_socket("TXQ", 1, 1337 sizeof(*tmpl), 0, SOCKET_ID_ANY); 1338 if (!tmpl) { 1339 rte_errno = ENOMEM; 1340 return NULL; 1341 } 1342 tmpl->priv = priv; 1343 tmpl->socket = SOCKET_ID_ANY; 1344 tmpl->txq.elts_n = log2above(desc); 1345 tmpl->txq.port_id = dev->data->port_id; 1346 tmpl->txq.idx = idx; 1347 tmpl->hairpin_conf = *hairpin_conf; 1348 tmpl->type = MLX5_TXQ_TYPE_HAIRPIN; 1349 rte_atomic32_inc(&tmpl->refcnt); 1350 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1351 return tmpl; 1352 } 1353 1354 /** 1355 * Get a Tx queue. 1356 * 1357 * @param dev 1358 * Pointer to Ethernet device. 1359 * @param idx 1360 * TX queue index. 1361 * 1362 * @return 1363 * A pointer to the queue if it exists. 1364 */ 1365 struct mlx5_txq_ctrl * 1366 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1367 { 1368 struct mlx5_priv *priv = dev->data->dev_private; 1369 struct mlx5_txq_ctrl *ctrl = NULL; 1370 1371 if ((*priv->txqs)[idx]) { 1372 ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, 1373 txq); 1374 mlx5_txq_obj_get(dev, idx); 1375 rte_atomic32_inc(&ctrl->refcnt); 1376 } 1377 return ctrl; 1378 } 1379 1380 /** 1381 * Release a Tx queue. 1382 * 1383 * @param dev 1384 * Pointer to Ethernet device. 1385 * @param idx 1386 * TX queue index. 1387 * 1388 * @return 1389 * 1 while a reference on it exists, 0 when freed. 1390 */ 1391 int 1392 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1393 { 1394 struct mlx5_priv *priv = dev->data->dev_private; 1395 struct mlx5_txq_ctrl *txq; 1396 1397 if (!(*priv->txqs)[idx]) 1398 return 0; 1399 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1400 if (txq->obj && !mlx5_txq_obj_release(txq->obj)) 1401 txq->obj = NULL; 1402 if (rte_atomic32_dec_and_test(&txq->refcnt)) { 1403 txq_free_elts(txq); 1404 mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh); 1405 LIST_REMOVE(txq, next); 1406 rte_free(txq); 1407 (*priv->txqs)[idx] = NULL; 1408 return 0; 1409 } 1410 return 1; 1411 } 1412 1413 /** 1414 * Verify if the queue can be released. 1415 * 1416 * @param dev 1417 * Pointer to Ethernet device. 1418 * @param idx 1419 * TX queue index. 1420 * 1421 * @return 1422 * 1 if the queue can be released. 1423 */ 1424 int 1425 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1426 { 1427 struct mlx5_priv *priv = dev->data->dev_private; 1428 struct mlx5_txq_ctrl *txq; 1429 1430 if (!(*priv->txqs)[idx]) 1431 return -1; 1432 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1433 return (rte_atomic32_read(&txq->refcnt) == 1); 1434 } 1435 1436 /** 1437 * Verify the Tx Queue list is empty 1438 * 1439 * @param dev 1440 * Pointer to Ethernet device. 1441 * 1442 * @return 1443 * The number of object not released. 1444 */ 1445 int 1446 mlx5_txq_verify(struct rte_eth_dev *dev) 1447 { 1448 struct mlx5_priv *priv = dev->data->dev_private; 1449 struct mlx5_txq_ctrl *txq_ctrl; 1450 int ret = 0; 1451 1452 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 1453 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 1454 dev->data->port_id, txq_ctrl->txq.idx); 1455 ++ret; 1456 } 1457 return ret; 1458 } 1459