1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <sys/mman.h> 12 #include <inttypes.h> 13 14 /* Verbs header. */ 15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic ignored "-Wpedantic" 18 #endif 19 #include <infiniband/verbs.h> 20 #include <infiniband/mlx5dv.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 30 #include <mlx5_glue.h> 31 #include <mlx5_devx_cmds.h> 32 #include <mlx5_common.h> 33 34 #include "mlx5_defs.h" 35 #include "mlx5_utils.h" 36 #include "mlx5.h" 37 #include "mlx5_rxtx.h" 38 #include "mlx5_autoconf.h" 39 40 /** 41 * Allocate TX queue elements. 42 * 43 * @param txq_ctrl 44 * Pointer to TX queue structure. 45 */ 46 void 47 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 48 { 49 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 50 unsigned int i; 51 52 for (i = 0; (i != elts_n); ++i) 53 txq_ctrl->txq.elts[i] = NULL; 54 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 55 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 56 txq_ctrl->txq.elts_head = 0; 57 txq_ctrl->txq.elts_tail = 0; 58 txq_ctrl->txq.elts_comp = 0; 59 } 60 61 /** 62 * Free TX queue elements. 63 * 64 * @param txq_ctrl 65 * Pointer to TX queue structure. 66 */ 67 void 68 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 69 { 70 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 71 const uint16_t elts_m = elts_n - 1; 72 uint16_t elts_head = txq_ctrl->txq.elts_head; 73 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 74 struct rte_mbuf *(*elts)[elts_n] = &txq_ctrl->txq.elts; 75 76 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 77 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 78 txq_ctrl->txq.elts_head = 0; 79 txq_ctrl->txq.elts_tail = 0; 80 txq_ctrl->txq.elts_comp = 0; 81 82 while (elts_tail != elts_head) { 83 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 84 85 MLX5_ASSERT(elt != NULL); 86 rte_pktmbuf_free_seg(elt); 87 #ifdef RTE_LIBRTE_MLX5_DEBUG 88 /* Poisoning. */ 89 memset(&(*elts)[elts_tail & elts_m], 90 0x77, 91 sizeof((*elts)[elts_tail & elts_m])); 92 #endif 93 ++elts_tail; 94 } 95 } 96 97 /** 98 * Returns the per-port supported offloads. 99 * 100 * @param dev 101 * Pointer to Ethernet device. 102 * 103 * @return 104 * Supported Tx offloads. 105 */ 106 uint64_t 107 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 108 { 109 struct mlx5_priv *priv = dev->data->dev_private; 110 uint64_t offloads = (DEV_TX_OFFLOAD_MULTI_SEGS | 111 DEV_TX_OFFLOAD_VLAN_INSERT); 112 struct mlx5_dev_config *config = &priv->config; 113 114 if (config->hw_csum) 115 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | 116 DEV_TX_OFFLOAD_UDP_CKSUM | 117 DEV_TX_OFFLOAD_TCP_CKSUM); 118 if (config->tso) 119 offloads |= DEV_TX_OFFLOAD_TCP_TSO; 120 if (config->swp) { 121 if (config->hw_csum) 122 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 123 if (config->tso) 124 offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO | 125 DEV_TX_OFFLOAD_UDP_TNL_TSO); 126 } 127 if (config->tunnel_en) { 128 if (config->hw_csum) 129 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 130 if (config->tso) 131 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 132 DEV_TX_OFFLOAD_GRE_TNL_TSO | 133 DEV_TX_OFFLOAD_GENEVE_TNL_TSO); 134 } 135 return offloads; 136 } 137 138 /** 139 * Tx queue presetup checks. 140 * 141 * @param dev 142 * Pointer to Ethernet device structure. 143 * @param idx 144 * Tx queue index. 145 * @param desc 146 * Number of descriptors to configure in queue. 147 * 148 * @return 149 * 0 on success, a negative errno value otherwise and rte_errno is set. 150 */ 151 static int 152 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc) 153 { 154 struct mlx5_priv *priv = dev->data->dev_private; 155 156 if (desc <= MLX5_TX_COMP_THRESH) { 157 DRV_LOG(WARNING, 158 "port %u number of descriptors requested for Tx queue" 159 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 160 " instead of %u", 161 dev->data->port_id, idx, MLX5_TX_COMP_THRESH + 1, desc); 162 desc = MLX5_TX_COMP_THRESH + 1; 163 } 164 if (!rte_is_power_of_2(desc)) { 165 desc = 1 << log2above(desc); 166 DRV_LOG(WARNING, 167 "port %u increased number of descriptors in Tx queue" 168 " %u to the next power of two (%d)", 169 dev->data->port_id, idx, desc); 170 } 171 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 172 dev->data->port_id, idx, desc); 173 if (idx >= priv->txqs_n) { 174 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 175 dev->data->port_id, idx, priv->txqs_n); 176 rte_errno = EOVERFLOW; 177 return -rte_errno; 178 } 179 if (!mlx5_txq_releasable(dev, idx)) { 180 rte_errno = EBUSY; 181 DRV_LOG(ERR, "port %u unable to release queue index %u", 182 dev->data->port_id, idx); 183 return -rte_errno; 184 } 185 mlx5_txq_release(dev, idx); 186 return 0; 187 } 188 /** 189 * DPDK callback to configure a TX queue. 190 * 191 * @param dev 192 * Pointer to Ethernet device structure. 193 * @param idx 194 * TX queue index. 195 * @param desc 196 * Number of descriptors to configure in queue. 197 * @param socket 198 * NUMA socket on which memory must be allocated. 199 * @param[in] conf 200 * Thresholds parameters. 201 * 202 * @return 203 * 0 on success, a negative errno value otherwise and rte_errno is set. 204 */ 205 int 206 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 207 unsigned int socket, const struct rte_eth_txconf *conf) 208 { 209 struct mlx5_priv *priv = dev->data->dev_private; 210 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 211 struct mlx5_txq_ctrl *txq_ctrl = 212 container_of(txq, struct mlx5_txq_ctrl, txq); 213 int res; 214 215 res = mlx5_tx_queue_pre_setup(dev, idx, desc); 216 if (res) 217 return res; 218 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 219 if (!txq_ctrl) { 220 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 221 dev->data->port_id, idx); 222 return -rte_errno; 223 } 224 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 225 dev->data->port_id, idx); 226 (*priv->txqs)[idx] = &txq_ctrl->txq; 227 return 0; 228 } 229 230 /** 231 * DPDK callback to configure a TX hairpin queue. 232 * 233 * @param dev 234 * Pointer to Ethernet device structure. 235 * @param idx 236 * TX queue index. 237 * @param desc 238 * Number of descriptors to configure in queue. 239 * @param[in] hairpin_conf 240 * The hairpin binding configuration. 241 * 242 * @return 243 * 0 on success, a negative errno value otherwise and rte_errno is set. 244 */ 245 int 246 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 247 uint16_t desc, 248 const struct rte_eth_hairpin_conf *hairpin_conf) 249 { 250 struct mlx5_priv *priv = dev->data->dev_private; 251 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 252 struct mlx5_txq_ctrl *txq_ctrl = 253 container_of(txq, struct mlx5_txq_ctrl, txq); 254 int res; 255 256 res = mlx5_tx_queue_pre_setup(dev, idx, desc); 257 if (res) 258 return res; 259 if (hairpin_conf->peer_count != 1 || 260 hairpin_conf->peers[0].port != dev->data->port_id || 261 hairpin_conf->peers[0].queue >= priv->rxqs_n) { 262 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u " 263 " invalid hairpind configuration", dev->data->port_id, 264 idx); 265 rte_errno = EINVAL; 266 return -rte_errno; 267 } 268 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 269 if (!txq_ctrl) { 270 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 271 dev->data->port_id, idx); 272 return -rte_errno; 273 } 274 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 275 dev->data->port_id, idx); 276 (*priv->txqs)[idx] = &txq_ctrl->txq; 277 return 0; 278 } 279 280 /** 281 * DPDK callback to release a TX queue. 282 * 283 * @param dpdk_txq 284 * Generic TX queue pointer. 285 */ 286 void 287 mlx5_tx_queue_release(void *dpdk_txq) 288 { 289 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 290 struct mlx5_txq_ctrl *txq_ctrl; 291 struct mlx5_priv *priv; 292 unsigned int i; 293 294 if (txq == NULL) 295 return; 296 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 297 priv = txq_ctrl->priv; 298 for (i = 0; (i != priv->txqs_n); ++i) 299 if ((*priv->txqs)[i] == txq) { 300 mlx5_txq_release(ETH_DEV(priv), i); 301 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 302 PORT_ID(priv), txq->idx); 303 break; 304 } 305 } 306 307 /** 308 * Configure the doorbell register non-cached attribute. 309 * 310 * @param txq_ctrl 311 * Pointer to Tx queue control structure. 312 * @param page_size 313 * Systme page size 314 */ 315 static void 316 txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size) 317 { 318 struct mlx5_priv *priv = txq_ctrl->priv; 319 off_t cmd; 320 321 txq_ctrl->txq.db_heu = priv->config.dbnc == MLX5_TXDB_HEURISTIC; 322 txq_ctrl->txq.db_nc = 0; 323 /* Check the doorbell register mapping type. */ 324 cmd = txq_ctrl->uar_mmap_offset / page_size; 325 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; 326 cmd &= MLX5_UAR_MMAP_CMD_MASK; 327 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) 328 txq_ctrl->txq.db_nc = 1; 329 } 330 331 /** 332 * Initialize Tx UAR registers for primary process. 333 * 334 * @param txq_ctrl 335 * Pointer to Tx queue control structure. 336 */ 337 static void 338 txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) 339 { 340 struct mlx5_priv *priv = txq_ctrl->priv; 341 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 342 const size_t page_size = sysconf(_SC_PAGESIZE); 343 #ifndef RTE_ARCH_64 344 unsigned int lock_idx; 345 #endif 346 347 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 348 return; 349 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 350 MLX5_ASSERT(ppriv); 351 ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; 352 txq_uar_ncattr_init(txq_ctrl, page_size); 353 #ifndef RTE_ARCH_64 354 /* Assign an UAR lock according to UAR page number */ 355 lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & 356 MLX5_UAR_PAGE_NUM_MASK; 357 txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx]; 358 #endif 359 } 360 361 /** 362 * Remap UAR register of a Tx queue for secondary process. 363 * 364 * Remapped address is stored at the table in the process private structure of 365 * the device, indexed by queue index. 366 * 367 * @param txq_ctrl 368 * Pointer to Tx queue control structure. 369 * @param fd 370 * Verbs file descriptor to map UAR pages. 371 * 372 * @return 373 * 0 on success, a negative errno value otherwise and rte_errno is set. 374 */ 375 static int 376 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 377 { 378 struct mlx5_priv *priv = txq_ctrl->priv; 379 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 380 struct mlx5_txq_data *txq = &txq_ctrl->txq; 381 void *addr; 382 uintptr_t uar_va; 383 uintptr_t offset; 384 const size_t page_size = sysconf(_SC_PAGESIZE); 385 386 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 387 return 0; 388 MLX5_ASSERT(ppriv); 389 /* 390 * As rdma-core, UARs are mapped in size of OS page 391 * size. Ref to libmlx5 function: mlx5_init_context() 392 */ 393 uar_va = (uintptr_t)txq_ctrl->bf_reg; 394 offset = uar_va & (page_size - 1); /* Offset in page. */ 395 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 396 txq_ctrl->uar_mmap_offset); 397 if (addr == MAP_FAILED) { 398 DRV_LOG(ERR, 399 "port %u mmap failed for BF reg of txq %u", 400 txq->port_id, txq->idx); 401 rte_errno = ENXIO; 402 return -rte_errno; 403 } 404 addr = RTE_PTR_ADD(addr, offset); 405 ppriv->uar_table[txq->idx] = addr; 406 txq_uar_ncattr_init(txq_ctrl, page_size); 407 return 0; 408 } 409 410 /** 411 * Unmap UAR register of a Tx queue for secondary process. 412 * 413 * @param txq_ctrl 414 * Pointer to Tx queue control structure. 415 */ 416 static void 417 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 418 { 419 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 420 const size_t page_size = sysconf(_SC_PAGESIZE); 421 void *addr; 422 423 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 424 return; 425 addr = ppriv->uar_table[txq_ctrl->txq.idx]; 426 munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 427 } 428 429 /** 430 * Initialize Tx UAR registers for secondary process. 431 * 432 * @param dev 433 * Pointer to Ethernet device. 434 * @param fd 435 * Verbs file descriptor to map UAR pages. 436 * 437 * @return 438 * 0 on success, a negative errno value otherwise and rte_errno is set. 439 */ 440 int 441 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 442 { 443 struct mlx5_priv *priv = dev->data->dev_private; 444 struct mlx5_txq_data *txq; 445 struct mlx5_txq_ctrl *txq_ctrl; 446 unsigned int i; 447 int ret; 448 449 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 450 for (i = 0; i != priv->txqs_n; ++i) { 451 if (!(*priv->txqs)[i]) 452 continue; 453 txq = (*priv->txqs)[i]; 454 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 455 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 456 continue; 457 MLX5_ASSERT(txq->idx == (uint16_t)i); 458 ret = txq_uar_init_secondary(txq_ctrl, fd); 459 if (ret) 460 goto error; 461 } 462 return 0; 463 error: 464 /* Rollback. */ 465 do { 466 if (!(*priv->txqs)[i]) 467 continue; 468 txq = (*priv->txqs)[i]; 469 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 470 txq_uar_uninit_secondary(txq_ctrl); 471 } while (i--); 472 return -rte_errno; 473 } 474 475 /** 476 * Create the Tx hairpin queue object. 477 * 478 * @param dev 479 * Pointer to Ethernet device. 480 * @param idx 481 * Queue index in DPDK Tx queue array 482 * 483 * @return 484 * The hairpin DevX object initialised, NULL otherwise and rte_errno is set. 485 */ 486 static struct mlx5_txq_obj * 487 mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) 488 { 489 struct mlx5_priv *priv = dev->data->dev_private; 490 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 491 struct mlx5_txq_ctrl *txq_ctrl = 492 container_of(txq_data, struct mlx5_txq_ctrl, txq); 493 struct mlx5_devx_create_sq_attr attr = { 0 }; 494 struct mlx5_txq_obj *tmpl = NULL; 495 int ret = 0; 496 497 MLX5_ASSERT(txq_data); 498 MLX5_ASSERT(!txq_ctrl->obj); 499 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 500 txq_ctrl->socket); 501 if (!tmpl) { 502 DRV_LOG(ERR, 503 "port %u Tx queue %u cannot allocate memory resources", 504 dev->data->port_id, txq_data->idx); 505 rte_errno = ENOMEM; 506 goto error; 507 } 508 tmpl->type = MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN; 509 tmpl->txq_ctrl = txq_ctrl; 510 attr.hairpin = 1; 511 attr.tis_lst_sz = 1; 512 /* Workaround for hairpin startup */ 513 attr.wq_attr.log_hairpin_num_packets = log2above(32); 514 /* Workaround for packets larger than 1KB */ 515 attr.wq_attr.log_hairpin_data_sz = 516 priv->config.hca_attr.log_max_hairpin_wq_data_sz; 517 attr.tis_num = priv->sh->tis->id; 518 tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr); 519 if (!tmpl->sq) { 520 DRV_LOG(ERR, 521 "port %u tx hairpin queue %u can't create sq object", 522 dev->data->port_id, idx); 523 rte_errno = errno; 524 goto error; 525 } 526 DRV_LOG(DEBUG, "port %u sxq %u updated with %p", dev->data->port_id, 527 idx, (void *)&tmpl); 528 rte_atomic32_inc(&tmpl->refcnt); 529 LIST_INSERT_HEAD(&priv->txqsobj, tmpl, next); 530 return tmpl; 531 error: 532 ret = rte_errno; /* Save rte_errno before cleanup. */ 533 if (tmpl->tis) 534 mlx5_devx_cmd_destroy(tmpl->tis); 535 if (tmpl->sq) 536 mlx5_devx_cmd_destroy(tmpl->sq); 537 rte_errno = ret; /* Restore rte_errno. */ 538 return NULL; 539 } 540 541 /** 542 * Create the Tx queue Verbs object. 543 * 544 * @param dev 545 * Pointer to Ethernet device. 546 * @param idx 547 * Queue index in DPDK Tx queue array. 548 * @param type 549 * Type of the Tx queue object to create. 550 * 551 * @return 552 * The Verbs object initialised, NULL otherwise and rte_errno is set. 553 */ 554 struct mlx5_txq_obj * 555 mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, 556 enum mlx5_txq_obj_type type) 557 { 558 struct mlx5_priv *priv = dev->data->dev_private; 559 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 560 struct mlx5_txq_ctrl *txq_ctrl = 561 container_of(txq_data, struct mlx5_txq_ctrl, txq); 562 struct mlx5_txq_obj tmpl; 563 struct mlx5_txq_obj *txq_obj = NULL; 564 union { 565 struct ibv_qp_init_attr_ex init; 566 struct ibv_cq_init_attr_ex cq; 567 struct ibv_qp_attr mod; 568 } attr; 569 unsigned int cqe_n; 570 struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET }; 571 struct mlx5dv_cq cq_info; 572 struct mlx5dv_obj obj; 573 const int desc = 1 << txq_data->elts_n; 574 int ret = 0; 575 576 if (type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) 577 return mlx5_txq_obj_hairpin_new(dev, idx); 578 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 579 /* If using DevX, need additional mask to read tisn value. */ 580 if (priv->config.devx && !priv->sh->tdn) 581 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 582 #endif 583 MLX5_ASSERT(txq_data); 584 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; 585 priv->verbs_alloc_ctx.obj = txq_ctrl; 586 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 587 DRV_LOG(ERR, 588 "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set", 589 dev->data->port_id); 590 rte_errno = EINVAL; 591 return NULL; 592 } 593 memset(&tmpl, 0, sizeof(struct mlx5_txq_obj)); 594 attr.cq = (struct ibv_cq_init_attr_ex){ 595 .comp_mask = 0, 596 }; 597 cqe_n = desc / MLX5_TX_COMP_THRESH + 598 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 599 tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 600 if (tmpl.cq == NULL) { 601 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 602 dev->data->port_id, idx); 603 rte_errno = errno; 604 goto error; 605 } 606 attr.init = (struct ibv_qp_init_attr_ex){ 607 /* CQ to be associated with the send queue. */ 608 .send_cq = tmpl.cq, 609 /* CQ to be associated with the receive queue. */ 610 .recv_cq = tmpl.cq, 611 .cap = { 612 /* Max number of outstanding WRs. */ 613 .max_send_wr = 614 ((priv->sh->device_attr.orig_attr.max_qp_wr < 615 desc) ? 616 priv->sh->device_attr.orig_attr.max_qp_wr : 617 desc), 618 /* 619 * Max number of scatter/gather elements in a WR, 620 * must be 1 to prevent libmlx5 from trying to affect 621 * too much memory. TX gather is not impacted by the 622 * device_attr.max_sge limit and will still work 623 * properly. 624 */ 625 .max_send_sge = 1, 626 }, 627 .qp_type = IBV_QPT_RAW_PACKET, 628 /* 629 * Do *NOT* enable this, completions events are managed per 630 * Tx burst. 631 */ 632 .sq_sig_all = 0, 633 .pd = priv->sh->pd, 634 .comp_mask = IBV_QP_INIT_ATTR_PD, 635 }; 636 if (txq_data->inlen_send) 637 attr.init.cap.max_inline_data = txq_ctrl->max_inline_data; 638 if (txq_data->tso_en) { 639 attr.init.max_tso_header = txq_ctrl->max_tso_header; 640 attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 641 } 642 tmpl.qp = mlx5_glue->create_qp_ex(priv->sh->ctx, &attr.init); 643 if (tmpl.qp == NULL) { 644 DRV_LOG(ERR, "port %u Tx queue %u QP creation failure", 645 dev->data->port_id, idx); 646 rte_errno = errno; 647 goto error; 648 } 649 attr.mod = (struct ibv_qp_attr){ 650 /* Move the QP to this state. */ 651 .qp_state = IBV_QPS_INIT, 652 /* IB device port number. */ 653 .port_num = (uint8_t)priv->ibv_port, 654 }; 655 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, 656 (IBV_QP_STATE | IBV_QP_PORT)); 657 if (ret) { 658 DRV_LOG(ERR, 659 "port %u Tx queue %u QP state to IBV_QPS_INIT failed", 660 dev->data->port_id, idx); 661 rte_errno = errno; 662 goto error; 663 } 664 attr.mod = (struct ibv_qp_attr){ 665 .qp_state = IBV_QPS_RTR 666 }; 667 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 668 if (ret) { 669 DRV_LOG(ERR, 670 "port %u Tx queue %u QP state to IBV_QPS_RTR failed", 671 dev->data->port_id, idx); 672 rte_errno = errno; 673 goto error; 674 } 675 attr.mod.qp_state = IBV_QPS_RTS; 676 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 677 if (ret) { 678 DRV_LOG(ERR, 679 "port %u Tx queue %u QP state to IBV_QPS_RTS failed", 680 dev->data->port_id, idx); 681 rte_errno = errno; 682 goto error; 683 } 684 txq_obj = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_obj), 0, 685 txq_ctrl->socket); 686 if (!txq_obj) { 687 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory", 688 dev->data->port_id, idx); 689 rte_errno = ENOMEM; 690 goto error; 691 } 692 obj.cq.in = tmpl.cq; 693 obj.cq.out = &cq_info; 694 obj.qp.in = tmpl.qp; 695 obj.qp.out = &qp; 696 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 697 if (ret != 0) { 698 rte_errno = errno; 699 goto error; 700 } 701 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 702 DRV_LOG(ERR, 703 "port %u wrong MLX5_CQE_SIZE environment variable" 704 " value: it should be set to %u", 705 dev->data->port_id, RTE_CACHE_LINE_SIZE); 706 rte_errno = EINVAL; 707 goto error; 708 } 709 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 710 txq_data->cqe_s = 1 << txq_data->cqe_n; 711 txq_data->cqe_m = txq_data->cqe_s - 1; 712 txq_data->qp_num_8s = tmpl.qp->qp_num << 8; 713 txq_data->wqes = qp.sq.buf; 714 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 715 txq_data->wqe_s = 1 << txq_data->wqe_n; 716 txq_data->wqe_m = txq_data->wqe_s - 1; 717 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 718 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 719 txq_data->cq_db = cq_info.dbrec; 720 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 721 txq_data->cq_ci = 0; 722 txq_data->cq_pi = 0; 723 txq_data->wqe_ci = 0; 724 txq_data->wqe_pi = 0; 725 txq_data->wqe_comp = 0; 726 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 727 txq_data->fcqs = rte_calloc_socket(__func__, 728 txq_data->cqe_s, 729 sizeof(*txq_data->fcqs), 730 RTE_CACHE_LINE_SIZE, 731 txq_ctrl->socket); 732 if (!txq_data->fcqs) { 733 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)", 734 dev->data->port_id, idx); 735 rte_errno = ENOMEM; 736 goto error; 737 } 738 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 739 /* 740 * If using DevX need to query and store TIS transport domain value. 741 * This is done once per port. 742 * Will use this value on Rx, when creating matching TIR. 743 */ 744 if (priv->config.devx && !priv->sh->tdn) { 745 ret = mlx5_devx_cmd_qp_query_tis_td(tmpl.qp, qp.tisn, 746 &priv->sh->tdn); 747 if (ret) { 748 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 749 "transport domain", dev->data->port_id, idx); 750 rte_errno = EINVAL; 751 goto error; 752 } else { 753 DRV_LOG(DEBUG, "port %u Tx queue %u TIS number %d " 754 "transport domain %d", dev->data->port_id, 755 idx, qp.tisn, priv->sh->tdn); 756 } 757 } 758 #endif 759 txq_obj->qp = tmpl.qp; 760 txq_obj->cq = tmpl.cq; 761 rte_atomic32_inc(&txq_obj->refcnt); 762 txq_ctrl->bf_reg = qp.bf.reg; 763 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 764 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 765 DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%"PRIx64, 766 dev->data->port_id, txq_ctrl->uar_mmap_offset); 767 } else { 768 DRV_LOG(ERR, 769 "port %u failed to retrieve UAR info, invalid" 770 " libmlx5.so", 771 dev->data->port_id); 772 rte_errno = EINVAL; 773 goto error; 774 } 775 txq_uar_init(txq_ctrl); 776 LIST_INSERT_HEAD(&priv->txqsobj, txq_obj, next); 777 txq_obj->txq_ctrl = txq_ctrl; 778 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 779 return txq_obj; 780 error: 781 ret = rte_errno; /* Save rte_errno before cleanup. */ 782 if (tmpl.cq) 783 claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); 784 if (tmpl.qp) 785 claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); 786 if (txq_data && txq_data->fcqs) 787 rte_free(txq_data->fcqs); 788 if (txq_obj) 789 rte_free(txq_obj); 790 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 791 rte_errno = ret; /* Restore rte_errno. */ 792 return NULL; 793 } 794 795 /** 796 * Get an Tx queue Verbs object. 797 * 798 * @param dev 799 * Pointer to Ethernet device. 800 * @param idx 801 * Queue index in DPDK Tx queue array. 802 * 803 * @return 804 * The Verbs object if it exists. 805 */ 806 struct mlx5_txq_obj * 807 mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx) 808 { 809 struct mlx5_priv *priv = dev->data->dev_private; 810 struct mlx5_txq_ctrl *txq_ctrl; 811 812 if (idx >= priv->txqs_n) 813 return NULL; 814 if (!(*priv->txqs)[idx]) 815 return NULL; 816 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 817 if (txq_ctrl->obj) 818 rte_atomic32_inc(&txq_ctrl->obj->refcnt); 819 return txq_ctrl->obj; 820 } 821 822 /** 823 * Release an Tx verbs queue object. 824 * 825 * @param txq_obj 826 * Verbs Tx queue object. 827 * 828 * @return 829 * 1 while a reference on it exists, 0 when freed. 830 */ 831 int 832 mlx5_txq_obj_release(struct mlx5_txq_obj *txq_obj) 833 { 834 MLX5_ASSERT(txq_obj); 835 if (rte_atomic32_dec_and_test(&txq_obj->refcnt)) { 836 if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) { 837 if (txq_obj->tis) 838 claim_zero(mlx5_devx_cmd_destroy(txq_obj->tis)); 839 } else { 840 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 841 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 842 if (txq_obj->txq_ctrl->txq.fcqs) 843 rte_free(txq_obj->txq_ctrl->txq.fcqs); 844 } 845 LIST_REMOVE(txq_obj, next); 846 rte_free(txq_obj); 847 return 0; 848 } 849 return 1; 850 } 851 852 /** 853 * Verify the Verbs Tx queue list is empty 854 * 855 * @param dev 856 * Pointer to Ethernet device. 857 * 858 * @return 859 * The number of object not released. 860 */ 861 int 862 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 863 { 864 struct mlx5_priv *priv = dev->data->dev_private; 865 int ret = 0; 866 struct mlx5_txq_obj *txq_obj; 867 868 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 869 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 870 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 871 ++ret; 872 } 873 return ret; 874 } 875 876 /** 877 * Calculate the total number of WQEBB for Tx queue. 878 * 879 * Simplified version of calc_sq_size() in rdma-core. 880 * 881 * @param txq_ctrl 882 * Pointer to Tx queue control structure. 883 * 884 * @return 885 * The number of WQEBB. 886 */ 887 static int 888 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 889 { 890 unsigned int wqe_size; 891 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 892 893 wqe_size = MLX5_WQE_CSEG_SIZE + 894 MLX5_WQE_ESEG_SIZE + 895 MLX5_WSEG_SIZE - 896 MLX5_ESEG_MIN_INLINE_SIZE + 897 txq_ctrl->max_inline_data; 898 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 899 } 900 901 /** 902 * Calculate the maximal inline data size for Tx queue. 903 * 904 * @param txq_ctrl 905 * Pointer to Tx queue control structure. 906 * 907 * @return 908 * The maximal inline data size. 909 */ 910 static unsigned int 911 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 912 { 913 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 914 struct mlx5_priv *priv = txq_ctrl->priv; 915 unsigned int wqe_size; 916 917 wqe_size = priv->sh->device_attr.orig_attr.max_qp_wr / desc; 918 if (!wqe_size) 919 return 0; 920 /* 921 * This calculation is derived from tthe source of 922 * mlx5_calc_send_wqe() in rdma_core library. 923 */ 924 wqe_size = wqe_size * MLX5_WQE_SIZE - 925 MLX5_WQE_CSEG_SIZE - 926 MLX5_WQE_ESEG_SIZE - 927 MLX5_WSEG_SIZE - 928 MLX5_WSEG_SIZE + 929 MLX5_DSEG_MIN_INLINE_SIZE; 930 return wqe_size; 931 } 932 933 /** 934 * Set Tx queue parameters from device configuration. 935 * 936 * @param txq_ctrl 937 * Pointer to Tx queue control structure. 938 */ 939 static void 940 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 941 { 942 struct mlx5_priv *priv = txq_ctrl->priv; 943 struct mlx5_dev_config *config = &priv->config; 944 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 945 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 946 unsigned int inlen_mode; /* Minimal required Inline data. */ 947 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 948 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 949 bool tso = txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO | 950 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 951 DEV_TX_OFFLOAD_GRE_TNL_TSO | 952 DEV_TX_OFFLOAD_IP_TNL_TSO | 953 DEV_TX_OFFLOAD_UDP_TNL_TSO); 954 bool vlan_inline; 955 unsigned int temp; 956 957 if (config->txqs_inline == MLX5_ARG_UNSET) 958 txqs_inline = 959 #if defined(RTE_ARCH_ARM64) 960 (priv->pci_dev->id.device_id == 961 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) ? 962 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 963 #endif 964 MLX5_INLINE_MAX_TXQS; 965 else 966 txqs_inline = (unsigned int)config->txqs_inline; 967 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 968 MLX5_SEND_DEF_INLINE_LEN : 969 (unsigned int)config->txq_inline_max; 970 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 971 MLX5_EMPW_DEF_INLINE_LEN : 972 (unsigned int)config->txq_inline_mpw; 973 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 974 0 : (unsigned int)config->txq_inline_min; 975 if (config->mps != MLX5_MPW_ENHANCED && config->mps != MLX5_MPW) 976 inlen_empw = 0; 977 /* 978 * If there is requested minimal amount of data to inline 979 * we MUST enable inlining. This is a case for ConnectX-4 980 * which usually requires L2 inlined for correct operating 981 * and ConnectX-4LX which requires L2-L4 inlined to 982 * support E-Switch Flows. 983 */ 984 if (inlen_mode) { 985 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 986 /* 987 * Optimize minimal inlining for single 988 * segment packets to fill one WQEBB 989 * without gaps. 990 */ 991 temp = MLX5_ESEG_MIN_INLINE_SIZE; 992 } else { 993 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 994 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 995 MLX5_ESEG_MIN_INLINE_SIZE; 996 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 997 } 998 if (temp != inlen_mode) { 999 DRV_LOG(INFO, 1000 "port %u minimal required inline setting" 1001 " aligned from %u to %u", 1002 PORT_ID(priv), inlen_mode, temp); 1003 inlen_mode = temp; 1004 } 1005 } 1006 /* 1007 * If port is configured to support VLAN insertion and device 1008 * does not support this feature by HW (for NICs before ConnectX-5 1009 * or in case of wqe_vlan_insert flag is not set) we must enable 1010 * data inline on all queues because it is supported by single 1011 * tx_burst routine. 1012 */ 1013 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 1014 vlan_inline = (dev_txoff & DEV_TX_OFFLOAD_VLAN_INSERT) && 1015 !config->hw_vlan_insert; 1016 /* 1017 * If there are few Tx queues it is prioritized 1018 * to save CPU cycles and disable data inlining at all. 1019 */ 1020 if (inlen_send && priv->txqs_n >= txqs_inline) { 1021 /* 1022 * The data sent with ordinal MLX5_OPCODE_SEND 1023 * may be inlined in Ethernet Segment, align the 1024 * length accordingly to fit entire WQEBBs. 1025 */ 1026 temp = RTE_MAX(inlen_send, 1027 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 1028 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1029 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1030 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1031 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1032 MLX5_ESEG_MIN_INLINE_SIZE - 1033 MLX5_WQE_CSEG_SIZE - 1034 MLX5_WQE_ESEG_SIZE - 1035 MLX5_WQE_DSEG_SIZE * 2); 1036 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1037 temp = RTE_MAX(temp, inlen_mode); 1038 if (temp != inlen_send) { 1039 DRV_LOG(INFO, 1040 "port %u ordinary send inline setting" 1041 " aligned from %u to %u", 1042 PORT_ID(priv), inlen_send, temp); 1043 inlen_send = temp; 1044 } 1045 /* 1046 * Not aligned to cache lines, but to WQEs. 1047 * First bytes of data (initial alignment) 1048 * is going to be copied explicitly at the 1049 * beginning of inlining buffer in Ethernet 1050 * Segment. 1051 */ 1052 MLX5_ASSERT(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1053 MLX5_ASSERT(inlen_send <= MLX5_WQE_SIZE_MAX + 1054 MLX5_ESEG_MIN_INLINE_SIZE - 1055 MLX5_WQE_CSEG_SIZE - 1056 MLX5_WQE_ESEG_SIZE - 1057 MLX5_WQE_DSEG_SIZE * 2); 1058 } else if (inlen_mode) { 1059 /* 1060 * If minimal inlining is requested we must 1061 * enable inlining in general, despite the 1062 * number of configured queues. Ignore the 1063 * txq_inline_max devarg, this is not 1064 * full-featured inline. 1065 */ 1066 inlen_send = inlen_mode; 1067 inlen_empw = 0; 1068 } else if (vlan_inline) { 1069 /* 1070 * Hardware does not report offload for 1071 * VLAN insertion, we must enable data inline 1072 * to implement feature by software. 1073 */ 1074 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 1075 inlen_empw = 0; 1076 } else { 1077 inlen_send = 0; 1078 inlen_empw = 0; 1079 } 1080 txq_ctrl->txq.inlen_send = inlen_send; 1081 txq_ctrl->txq.inlen_mode = inlen_mode; 1082 txq_ctrl->txq.inlen_empw = 0; 1083 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 1084 /* 1085 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 1086 * may be inlined in Data Segment, align the 1087 * length accordingly to fit entire WQEBBs. 1088 */ 1089 temp = RTE_MAX(inlen_empw, 1090 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 1091 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 1092 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1093 temp += MLX5_DSEG_MIN_INLINE_SIZE; 1094 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1095 MLX5_DSEG_MIN_INLINE_SIZE - 1096 MLX5_WQE_CSEG_SIZE - 1097 MLX5_WQE_ESEG_SIZE - 1098 MLX5_WQE_DSEG_SIZE); 1099 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 1100 if (temp != inlen_empw) { 1101 DRV_LOG(INFO, 1102 "port %u enhanced empw inline setting" 1103 " aligned from %u to %u", 1104 PORT_ID(priv), inlen_empw, temp); 1105 inlen_empw = temp; 1106 } 1107 MLX5_ASSERT(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 1108 MLX5_ASSERT(inlen_empw <= MLX5_WQE_SIZE_MAX + 1109 MLX5_DSEG_MIN_INLINE_SIZE - 1110 MLX5_WQE_CSEG_SIZE - 1111 MLX5_WQE_ESEG_SIZE - 1112 MLX5_WQE_DSEG_SIZE); 1113 txq_ctrl->txq.inlen_empw = inlen_empw; 1114 } 1115 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 1116 if (tso) { 1117 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 1118 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 1119 MLX5_MAX_TSO_HEADER); 1120 txq_ctrl->txq.tso_en = 1; 1121 } 1122 txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp; 1123 txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO | 1124 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1125 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) & 1126 txq_ctrl->txq.offloads) && config->swp; 1127 } 1128 1129 /** 1130 * Adjust Tx queue data inline parameters for large queue sizes. 1131 * The data inline feature requires multiple WQEs to fit the packets, 1132 * and if the large amount of Tx descriptors is requested by application 1133 * the total WQE amount may exceed the hardware capabilities. If the 1134 * default inline setting are used we can try to adjust these ones and 1135 * meet the hardware requirements and not exceed the queue size. 1136 * 1137 * @param txq_ctrl 1138 * Pointer to Tx queue control structure. 1139 * 1140 * @return 1141 * Zero on success, otherwise the parameters can not be adjusted. 1142 */ 1143 static int 1144 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 1145 { 1146 struct mlx5_priv *priv = txq_ctrl->priv; 1147 struct mlx5_dev_config *config = &priv->config; 1148 unsigned int max_inline; 1149 1150 max_inline = txq_calc_inline_max(txq_ctrl); 1151 if (!txq_ctrl->txq.inlen_send) { 1152 /* 1153 * Inline data feature is not engaged at all. 1154 * There is nothing to adjust. 1155 */ 1156 return 0; 1157 } 1158 if (txq_ctrl->max_inline_data <= max_inline) { 1159 /* 1160 * The requested inline data length does not 1161 * exceed queue capabilities. 1162 */ 1163 return 0; 1164 } 1165 if (txq_ctrl->txq.inlen_mode > max_inline) { 1166 DRV_LOG(ERR, 1167 "minimal data inline requirements (%u) are not" 1168 " satisfied (%u) on port %u, try the smaller" 1169 " Tx queue size (%d)", 1170 txq_ctrl->txq.inlen_mode, max_inline, 1171 priv->dev_data->port_id, 1172 priv->sh->device_attr.orig_attr.max_qp_wr); 1173 goto error; 1174 } 1175 if (txq_ctrl->txq.inlen_send > max_inline && 1176 config->txq_inline_max != MLX5_ARG_UNSET && 1177 config->txq_inline_max > (int)max_inline) { 1178 DRV_LOG(ERR, 1179 "txq_inline_max requirements (%u) are not" 1180 " satisfied (%u) on port %u, try the smaller" 1181 " Tx queue size (%d)", 1182 txq_ctrl->txq.inlen_send, max_inline, 1183 priv->dev_data->port_id, 1184 priv->sh->device_attr.orig_attr.max_qp_wr); 1185 goto error; 1186 } 1187 if (txq_ctrl->txq.inlen_empw > max_inline && 1188 config->txq_inline_mpw != MLX5_ARG_UNSET && 1189 config->txq_inline_mpw > (int)max_inline) { 1190 DRV_LOG(ERR, 1191 "txq_inline_mpw requirements (%u) are not" 1192 " satisfied (%u) on port %u, try the smaller" 1193 " Tx queue size (%d)", 1194 txq_ctrl->txq.inlen_empw, max_inline, 1195 priv->dev_data->port_id, 1196 priv->sh->device_attr.orig_attr.max_qp_wr); 1197 goto error; 1198 } 1199 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1200 DRV_LOG(ERR, 1201 "tso header inline requirements (%u) are not" 1202 " satisfied (%u) on port %u, try the smaller" 1203 " Tx queue size (%d)", 1204 MLX5_MAX_TSO_HEADER, max_inline, 1205 priv->dev_data->port_id, 1206 priv->sh->device_attr.orig_attr.max_qp_wr); 1207 goto error; 1208 } 1209 if (txq_ctrl->txq.inlen_send > max_inline) { 1210 DRV_LOG(WARNING, 1211 "adjust txq_inline_max (%u->%u)" 1212 " due to large Tx queue on port %u", 1213 txq_ctrl->txq.inlen_send, max_inline, 1214 priv->dev_data->port_id); 1215 txq_ctrl->txq.inlen_send = max_inline; 1216 } 1217 if (txq_ctrl->txq.inlen_empw > max_inline) { 1218 DRV_LOG(WARNING, 1219 "adjust txq_inline_mpw (%u->%u)" 1220 "due to large Tx queue on port %u", 1221 txq_ctrl->txq.inlen_empw, max_inline, 1222 priv->dev_data->port_id); 1223 txq_ctrl->txq.inlen_empw = max_inline; 1224 } 1225 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1226 txq_ctrl->txq.inlen_empw); 1227 MLX5_ASSERT(txq_ctrl->max_inline_data <= max_inline); 1228 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= max_inline); 1229 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1230 MLX5_ASSERT(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw || 1231 !txq_ctrl->txq.inlen_empw); 1232 return 0; 1233 error: 1234 rte_errno = ENOMEM; 1235 return -ENOMEM; 1236 } 1237 1238 /** 1239 * Create a DPDK Tx queue. 1240 * 1241 * @param dev 1242 * Pointer to Ethernet device. 1243 * @param idx 1244 * TX queue index. 1245 * @param desc 1246 * Number of descriptors to configure in queue. 1247 * @param socket 1248 * NUMA socket on which memory must be allocated. 1249 * @param[in] conf 1250 * Thresholds parameters. 1251 * 1252 * @return 1253 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1254 */ 1255 struct mlx5_txq_ctrl * 1256 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1257 unsigned int socket, const struct rte_eth_txconf *conf) 1258 { 1259 struct mlx5_priv *priv = dev->data->dev_private; 1260 struct mlx5_txq_ctrl *tmpl; 1261 1262 tmpl = rte_calloc_socket("TXQ", 1, 1263 sizeof(*tmpl) + 1264 desc * sizeof(struct rte_mbuf *), 1265 0, socket); 1266 if (!tmpl) { 1267 rte_errno = ENOMEM; 1268 return NULL; 1269 } 1270 if (mlx5_mr_btree_init(&tmpl->txq.mr_ctrl.cache_bh, 1271 MLX5_MR_BTREE_CACHE_N, socket)) { 1272 /* rte_errno is already set. */ 1273 goto error; 1274 } 1275 /* Save pointer of global generation number to check memory event. */ 1276 tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->sh->mr.dev_gen; 1277 MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH); 1278 tmpl->txq.offloads = conf->offloads | 1279 dev->data->dev_conf.txmode.offloads; 1280 tmpl->priv = priv; 1281 tmpl->socket = socket; 1282 tmpl->txq.elts_n = log2above(desc); 1283 tmpl->txq.elts_s = desc; 1284 tmpl->txq.elts_m = desc - 1; 1285 tmpl->txq.port_id = dev->data->port_id; 1286 tmpl->txq.idx = idx; 1287 txq_set_params(tmpl); 1288 if (txq_adjust_params(tmpl)) 1289 goto error; 1290 if (txq_calc_wqebb_cnt(tmpl) > 1291 priv->sh->device_attr.orig_attr.max_qp_wr) { 1292 DRV_LOG(ERR, 1293 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1294 " try smaller queue size", 1295 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1296 priv->sh->device_attr.orig_attr.max_qp_wr); 1297 rte_errno = ENOMEM; 1298 goto error; 1299 } 1300 rte_atomic32_inc(&tmpl->refcnt); 1301 tmpl->type = MLX5_TXQ_TYPE_STANDARD; 1302 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1303 return tmpl; 1304 error: 1305 rte_free(tmpl); 1306 return NULL; 1307 } 1308 1309 /** 1310 * Create a DPDK Tx hairpin queue. 1311 * 1312 * @param dev 1313 * Pointer to Ethernet device. 1314 * @param idx 1315 * TX queue index. 1316 * @param desc 1317 * Number of descriptors to configure in queue. 1318 * @param hairpin_conf 1319 * The hairpin configuration. 1320 * 1321 * @return 1322 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1323 */ 1324 struct mlx5_txq_ctrl * 1325 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1326 const struct rte_eth_hairpin_conf *hairpin_conf) 1327 { 1328 struct mlx5_priv *priv = dev->data->dev_private; 1329 struct mlx5_txq_ctrl *tmpl; 1330 1331 tmpl = rte_calloc_socket("TXQ", 1, 1332 sizeof(*tmpl), 0, SOCKET_ID_ANY); 1333 if (!tmpl) { 1334 rte_errno = ENOMEM; 1335 return NULL; 1336 } 1337 tmpl->priv = priv; 1338 tmpl->socket = SOCKET_ID_ANY; 1339 tmpl->txq.elts_n = log2above(desc); 1340 tmpl->txq.port_id = dev->data->port_id; 1341 tmpl->txq.idx = idx; 1342 tmpl->hairpin_conf = *hairpin_conf; 1343 tmpl->type = MLX5_TXQ_TYPE_HAIRPIN; 1344 rte_atomic32_inc(&tmpl->refcnt); 1345 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1346 return tmpl; 1347 } 1348 1349 /** 1350 * Get a Tx queue. 1351 * 1352 * @param dev 1353 * Pointer to Ethernet device. 1354 * @param idx 1355 * TX queue index. 1356 * 1357 * @return 1358 * A pointer to the queue if it exists. 1359 */ 1360 struct mlx5_txq_ctrl * 1361 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1362 { 1363 struct mlx5_priv *priv = dev->data->dev_private; 1364 struct mlx5_txq_ctrl *ctrl = NULL; 1365 1366 if ((*priv->txqs)[idx]) { 1367 ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, 1368 txq); 1369 mlx5_txq_obj_get(dev, idx); 1370 rte_atomic32_inc(&ctrl->refcnt); 1371 } 1372 return ctrl; 1373 } 1374 1375 /** 1376 * Release a Tx queue. 1377 * 1378 * @param dev 1379 * Pointer to Ethernet device. 1380 * @param idx 1381 * TX queue index. 1382 * 1383 * @return 1384 * 1 while a reference on it exists, 0 when freed. 1385 */ 1386 int 1387 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1388 { 1389 struct mlx5_priv *priv = dev->data->dev_private; 1390 struct mlx5_txq_ctrl *txq; 1391 1392 if (!(*priv->txqs)[idx]) 1393 return 0; 1394 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1395 if (txq->obj && !mlx5_txq_obj_release(txq->obj)) 1396 txq->obj = NULL; 1397 if (rte_atomic32_dec_and_test(&txq->refcnt)) { 1398 txq_free_elts(txq); 1399 mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh); 1400 LIST_REMOVE(txq, next); 1401 rte_free(txq); 1402 (*priv->txqs)[idx] = NULL; 1403 return 0; 1404 } 1405 return 1; 1406 } 1407 1408 /** 1409 * Verify if the queue can be released. 1410 * 1411 * @param dev 1412 * Pointer to Ethernet device. 1413 * @param idx 1414 * TX queue index. 1415 * 1416 * @return 1417 * 1 if the queue can be released. 1418 */ 1419 int 1420 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1421 { 1422 struct mlx5_priv *priv = dev->data->dev_private; 1423 struct mlx5_txq_ctrl *txq; 1424 1425 if (!(*priv->txqs)[idx]) 1426 return -1; 1427 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1428 return (rte_atomic32_read(&txq->refcnt) == 1); 1429 } 1430 1431 /** 1432 * Verify the Tx Queue list is empty 1433 * 1434 * @param dev 1435 * Pointer to Ethernet device. 1436 * 1437 * @return 1438 * The number of object not released. 1439 */ 1440 int 1441 mlx5_txq_verify(struct rte_eth_dev *dev) 1442 { 1443 struct mlx5_priv *priv = dev->data->dev_private; 1444 struct mlx5_txq_ctrl *txq_ctrl; 1445 int ret = 0; 1446 1447 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 1448 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 1449 dev->data->port_id, txq_ctrl->txq.idx); 1450 ++ret; 1451 } 1452 return ret; 1453 } 1454