1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <stdint.h> 11 #include <unistd.h> 12 #include <sys/mman.h> 13 #include <inttypes.h> 14 15 /* Verbs header. */ 16 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 17 #ifdef PEDANTIC 18 #pragma GCC diagnostic ignored "-Wpedantic" 19 #endif 20 #include <infiniband/verbs.h> 21 #include <infiniband/mlx5dv.h> 22 #ifdef PEDANTIC 23 #pragma GCC diagnostic error "-Wpedantic" 24 #endif 25 26 #include <rte_mbuf.h> 27 #include <rte_malloc.h> 28 #include <rte_ethdev_driver.h> 29 #include <rte_common.h> 30 31 #include "mlx5_utils.h" 32 #include "mlx5_defs.h" 33 #include "mlx5.h" 34 #include "mlx5_rxtx.h" 35 #include "mlx5_autoconf.h" 36 #include "mlx5_glue.h" 37 38 /** 39 * Allocate TX queue elements. 40 * 41 * @param txq_ctrl 42 * Pointer to TX queue structure. 43 */ 44 void 45 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 46 { 47 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 48 unsigned int i; 49 50 for (i = 0; (i != elts_n); ++i) 51 txq_ctrl->txq.elts[i] = NULL; 52 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 53 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 54 txq_ctrl->txq.elts_head = 0; 55 txq_ctrl->txq.elts_tail = 0; 56 txq_ctrl->txq.elts_comp = 0; 57 } 58 59 /** 60 * Free TX queue elements. 61 * 62 * @param txq_ctrl 63 * Pointer to TX queue structure. 64 */ 65 static void 66 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 67 { 68 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 69 const uint16_t elts_m = elts_n - 1; 70 uint16_t elts_head = txq_ctrl->txq.elts_head; 71 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 72 struct rte_mbuf *(*elts)[elts_n] = &txq_ctrl->txq.elts; 73 74 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 75 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 76 txq_ctrl->txq.elts_head = 0; 77 txq_ctrl->txq.elts_tail = 0; 78 txq_ctrl->txq.elts_comp = 0; 79 80 while (elts_tail != elts_head) { 81 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 82 83 assert(elt != NULL); 84 rte_pktmbuf_free_seg(elt); 85 #ifndef NDEBUG 86 /* Poisoning. */ 87 memset(&(*elts)[elts_tail & elts_m], 88 0x77, 89 sizeof((*elts)[elts_tail & elts_m])); 90 #endif 91 ++elts_tail; 92 } 93 } 94 95 /** 96 * Returns the per-port supported offloads. 97 * 98 * @param dev 99 * Pointer to Ethernet device. 100 * 101 * @return 102 * Supported Tx offloads. 103 */ 104 uint64_t 105 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 106 { 107 struct mlx5_priv *priv = dev->data->dev_private; 108 uint64_t offloads = (DEV_TX_OFFLOAD_MULTI_SEGS | 109 DEV_TX_OFFLOAD_VLAN_INSERT); 110 struct mlx5_dev_config *config = &priv->config; 111 112 if (config->hw_csum) 113 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | 114 DEV_TX_OFFLOAD_UDP_CKSUM | 115 DEV_TX_OFFLOAD_TCP_CKSUM); 116 if (config->tso) 117 offloads |= DEV_TX_OFFLOAD_TCP_TSO; 118 if (config->swp) { 119 if (config->hw_csum) 120 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 121 if (config->tso) 122 offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO | 123 DEV_TX_OFFLOAD_UDP_TNL_TSO); 124 } 125 if (config->tunnel_en) { 126 if (config->hw_csum) 127 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 128 if (config->tso) 129 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 130 DEV_TX_OFFLOAD_GRE_TNL_TSO); 131 } 132 return offloads; 133 } 134 135 /** 136 * Tx queue presetup checks. 137 * 138 * @param dev 139 * Pointer to Ethernet device structure. 140 * @param idx 141 * Tx queue index. 142 * @param desc 143 * Number of descriptors to configure in queue. 144 * 145 * @return 146 * 0 on success, a negative errno value otherwise and rte_errno is set. 147 */ 148 static int 149 mlx5_tx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc) 150 { 151 struct mlx5_priv *priv = dev->data->dev_private; 152 153 if (desc <= MLX5_TX_COMP_THRESH) { 154 DRV_LOG(WARNING, 155 "port %u number of descriptors requested for Tx queue" 156 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 157 " instead of %u", 158 dev->data->port_id, idx, MLX5_TX_COMP_THRESH + 1, desc); 159 desc = MLX5_TX_COMP_THRESH + 1; 160 } 161 if (!rte_is_power_of_2(desc)) { 162 desc = 1 << log2above(desc); 163 DRV_LOG(WARNING, 164 "port %u increased number of descriptors in Tx queue" 165 " %u to the next power of two (%d)", 166 dev->data->port_id, idx, desc); 167 } 168 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 169 dev->data->port_id, idx, desc); 170 if (idx >= priv->txqs_n) { 171 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 172 dev->data->port_id, idx, priv->txqs_n); 173 rte_errno = EOVERFLOW; 174 return -rte_errno; 175 } 176 if (!mlx5_txq_releasable(dev, idx)) { 177 rte_errno = EBUSY; 178 DRV_LOG(ERR, "port %u unable to release queue index %u", 179 dev->data->port_id, idx); 180 return -rte_errno; 181 } 182 mlx5_txq_release(dev, idx); 183 return 0; 184 } 185 /** 186 * DPDK callback to configure a TX queue. 187 * 188 * @param dev 189 * Pointer to Ethernet device structure. 190 * @param idx 191 * TX queue index. 192 * @param desc 193 * Number of descriptors to configure in queue. 194 * @param socket 195 * NUMA socket on which memory must be allocated. 196 * @param[in] conf 197 * Thresholds parameters. 198 * 199 * @return 200 * 0 on success, a negative errno value otherwise and rte_errno is set. 201 */ 202 int 203 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 204 unsigned int socket, const struct rte_eth_txconf *conf) 205 { 206 struct mlx5_priv *priv = dev->data->dev_private; 207 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 208 struct mlx5_txq_ctrl *txq_ctrl = 209 container_of(txq, struct mlx5_txq_ctrl, txq); 210 int res; 211 212 res = mlx5_tx_queue_pre_setup(dev, idx, desc); 213 if (res) 214 return res; 215 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 216 if (!txq_ctrl) { 217 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 218 dev->data->port_id, idx); 219 return -rte_errno; 220 } 221 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 222 dev->data->port_id, idx); 223 (*priv->txqs)[idx] = &txq_ctrl->txq; 224 return 0; 225 } 226 227 /** 228 * DPDK callback to configure a TX hairpin queue. 229 * 230 * @param dev 231 * Pointer to Ethernet device structure. 232 * @param idx 233 * TX queue index. 234 * @param desc 235 * Number of descriptors to configure in queue. 236 * @param[in] hairpin_conf 237 * The hairpin binding configuration. 238 * 239 * @return 240 * 0 on success, a negative errno value otherwise and rte_errno is set. 241 */ 242 int 243 mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 244 uint16_t desc, 245 const struct rte_eth_hairpin_conf *hairpin_conf) 246 { 247 struct mlx5_priv *priv = dev->data->dev_private; 248 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 249 struct mlx5_txq_ctrl *txq_ctrl = 250 container_of(txq, struct mlx5_txq_ctrl, txq); 251 int res; 252 253 res = mlx5_tx_queue_pre_setup(dev, idx, desc); 254 if (res) 255 return res; 256 if (hairpin_conf->peer_count != 1 || 257 hairpin_conf->peers[0].port != dev->data->port_id || 258 hairpin_conf->peers[0].queue >= priv->rxqs_n) { 259 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u " 260 " invalid hairpind configuration", dev->data->port_id, 261 idx); 262 rte_errno = EINVAL; 263 return -rte_errno; 264 } 265 txq_ctrl = mlx5_txq_hairpin_new(dev, idx, desc, hairpin_conf); 266 if (!txq_ctrl) { 267 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 268 dev->data->port_id, idx); 269 return -rte_errno; 270 } 271 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 272 dev->data->port_id, idx); 273 (*priv->txqs)[idx] = &txq_ctrl->txq; 274 txq_ctrl->type = MLX5_TXQ_TYPE_HAIRPIN; 275 return 0; 276 } 277 278 /** 279 * DPDK callback to release a TX queue. 280 * 281 * @param dpdk_txq 282 * Generic TX queue pointer. 283 */ 284 void 285 mlx5_tx_queue_release(void *dpdk_txq) 286 { 287 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 288 struct mlx5_txq_ctrl *txq_ctrl; 289 struct mlx5_priv *priv; 290 unsigned int i; 291 292 if (txq == NULL) 293 return; 294 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 295 priv = txq_ctrl->priv; 296 for (i = 0; (i != priv->txqs_n); ++i) 297 if ((*priv->txqs)[i] == txq) { 298 mlx5_txq_release(ETH_DEV(priv), i); 299 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 300 PORT_ID(priv), txq->idx); 301 break; 302 } 303 } 304 305 /** 306 * Configure the doorbell register non-cached attribute. 307 * 308 * @param txq_ctrl 309 * Pointer to Tx queue control structure. 310 * @param page_size 311 * Systme page size 312 */ 313 static void 314 txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size) 315 { 316 unsigned int cmd; 317 318 txq_ctrl->txq.db_nc = 0; 319 /* Check the doorbell register mapping type. */ 320 cmd = txq_ctrl->uar_mmap_offset / page_size; 321 cmd >>= MLX5_UAR_MMAP_CMD_SHIFT; 322 cmd &= MLX5_UAR_MMAP_CMD_MASK; 323 if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD) 324 txq_ctrl->txq.db_nc = 1; 325 } 326 327 /** 328 * Initialize Tx UAR registers for primary process. 329 * 330 * @param txq_ctrl 331 * Pointer to Tx queue control structure. 332 */ 333 static void 334 txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) 335 { 336 struct mlx5_priv *priv = txq_ctrl->priv; 337 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 338 const size_t page_size = sysconf(_SC_PAGESIZE); 339 #ifndef RTE_ARCH_64 340 unsigned int lock_idx; 341 #endif 342 343 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 344 return; 345 assert(rte_eal_process_type() == RTE_PROC_PRIMARY); 346 assert(ppriv); 347 ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; 348 txq_uar_ncattr_init(txq_ctrl, page_size); 349 #ifndef RTE_ARCH_64 350 /* Assign an UAR lock according to UAR page number */ 351 lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & 352 MLX5_UAR_PAGE_NUM_MASK; 353 txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx]; 354 #endif 355 } 356 357 /** 358 * Remap UAR register of a Tx queue for secondary process. 359 * 360 * Remapped address is stored at the table in the process private structure of 361 * the device, indexed by queue index. 362 * 363 * @param txq_ctrl 364 * Pointer to Tx queue control structure. 365 * @param fd 366 * Verbs file descriptor to map UAR pages. 367 * 368 * @return 369 * 0 on success, a negative errno value otherwise and rte_errno is set. 370 */ 371 static int 372 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 373 { 374 struct mlx5_priv *priv = txq_ctrl->priv; 375 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 376 struct mlx5_txq_data *txq = &txq_ctrl->txq; 377 void *addr; 378 uintptr_t uar_va; 379 uintptr_t offset; 380 const size_t page_size = sysconf(_SC_PAGESIZE); 381 382 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 383 return 0; 384 assert(ppriv); 385 /* 386 * As rdma-core, UARs are mapped in size of OS page 387 * size. Ref to libmlx5 function: mlx5_init_context() 388 */ 389 uar_va = (uintptr_t)txq_ctrl->bf_reg; 390 offset = uar_va & (page_size - 1); /* Offset in page. */ 391 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 392 txq_ctrl->uar_mmap_offset); 393 if (addr == MAP_FAILED) { 394 DRV_LOG(ERR, 395 "port %u mmap failed for BF reg of txq %u", 396 txq->port_id, txq->idx); 397 rte_errno = ENXIO; 398 return -rte_errno; 399 } 400 addr = RTE_PTR_ADD(addr, offset); 401 ppriv->uar_table[txq->idx] = addr; 402 txq_uar_ncattr_init(txq_ctrl, page_size); 403 return 0; 404 } 405 406 /** 407 * Unmap UAR register of a Tx queue for secondary process. 408 * 409 * @param txq_ctrl 410 * Pointer to Tx queue control structure. 411 */ 412 static void 413 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 414 { 415 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 416 const size_t page_size = sysconf(_SC_PAGESIZE); 417 void *addr; 418 419 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 420 return; 421 addr = ppriv->uar_table[txq_ctrl->txq.idx]; 422 munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 423 } 424 425 /** 426 * Initialize Tx UAR registers for secondary process. 427 * 428 * @param dev 429 * Pointer to Ethernet device. 430 * @param fd 431 * Verbs file descriptor to map UAR pages. 432 * 433 * @return 434 * 0 on success, a negative errno value otherwise and rte_errno is set. 435 */ 436 int 437 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 438 { 439 struct mlx5_priv *priv = dev->data->dev_private; 440 struct mlx5_txq_data *txq; 441 struct mlx5_txq_ctrl *txq_ctrl; 442 unsigned int i; 443 int ret; 444 445 assert(rte_eal_process_type() == RTE_PROC_SECONDARY); 446 for (i = 0; i != priv->txqs_n; ++i) { 447 if (!(*priv->txqs)[i]) 448 continue; 449 txq = (*priv->txqs)[i]; 450 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 451 if (txq_ctrl->type != MLX5_TXQ_TYPE_STANDARD) 452 continue; 453 assert(txq->idx == (uint16_t)i); 454 ret = txq_uar_init_secondary(txq_ctrl, fd); 455 if (ret) 456 goto error; 457 } 458 return 0; 459 error: 460 /* Rollback. */ 461 do { 462 if (!(*priv->txqs)[i]) 463 continue; 464 txq = (*priv->txqs)[i]; 465 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 466 txq_uar_uninit_secondary(txq_ctrl); 467 } while (i--); 468 return -rte_errno; 469 } 470 471 /** 472 * Create the Tx hairpin queue object. 473 * 474 * @param dev 475 * Pointer to Ethernet device. 476 * @param idx 477 * Queue index in DPDK Tx queue array 478 * 479 * @return 480 * The hairpin DevX object initialised, NULL otherwise and rte_errno is set. 481 */ 482 static struct mlx5_txq_obj * 483 mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) 484 { 485 struct mlx5_priv *priv = dev->data->dev_private; 486 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 487 struct mlx5_txq_ctrl *txq_ctrl = 488 container_of(txq_data, struct mlx5_txq_ctrl, txq); 489 struct mlx5_devx_create_sq_attr attr = { 0 }; 490 struct mlx5_txq_obj *tmpl = NULL; 491 int ret = 0; 492 493 assert(txq_data); 494 assert(!txq_ctrl->obj); 495 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 496 txq_ctrl->socket); 497 if (!tmpl) { 498 DRV_LOG(ERR, 499 "port %u Tx queue %u cannot allocate memory resources", 500 dev->data->port_id, txq_data->idx); 501 rte_errno = ENOMEM; 502 goto error; 503 } 504 tmpl->type = MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN; 505 tmpl->txq_ctrl = txq_ctrl; 506 attr.hairpin = 1; 507 attr.tis_lst_sz = 1; 508 /* Workaround for hairpin startup */ 509 attr.wq_attr.log_hairpin_num_packets = log2above(32); 510 /* Workaround for packets larger than 1KB */ 511 attr.wq_attr.log_hairpin_data_sz = 512 priv->config.hca_attr.log_max_hairpin_wq_data_sz; 513 attr.tis_num = priv->sh->tis->id; 514 tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr); 515 if (!tmpl->sq) { 516 DRV_LOG(ERR, 517 "port %u tx hairpin queue %u can't create sq object", 518 dev->data->port_id, idx); 519 rte_errno = errno; 520 goto error; 521 } 522 DRV_LOG(DEBUG, "port %u sxq %u updated with %p", dev->data->port_id, 523 idx, (void *)&tmpl); 524 rte_atomic32_inc(&tmpl->refcnt); 525 LIST_INSERT_HEAD(&priv->txqsobj, tmpl, next); 526 return tmpl; 527 error: 528 ret = rte_errno; /* Save rte_errno before cleanup. */ 529 if (tmpl->tis) 530 mlx5_devx_cmd_destroy(tmpl->tis); 531 if (tmpl->sq) 532 mlx5_devx_cmd_destroy(tmpl->sq); 533 rte_errno = ret; /* Restore rte_errno. */ 534 return NULL; 535 } 536 537 /** 538 * Create the Tx queue Verbs object. 539 * 540 * @param dev 541 * Pointer to Ethernet device. 542 * @param idx 543 * Queue index in DPDK Tx queue array. 544 * @param type 545 * Type of the Tx queue object to create. 546 * 547 * @return 548 * The Verbs object initialised, NULL otherwise and rte_errno is set. 549 */ 550 struct mlx5_txq_obj * 551 mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, 552 enum mlx5_txq_obj_type type) 553 { 554 struct mlx5_priv *priv = dev->data->dev_private; 555 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 556 struct mlx5_txq_ctrl *txq_ctrl = 557 container_of(txq_data, struct mlx5_txq_ctrl, txq); 558 struct mlx5_txq_obj tmpl; 559 struct mlx5_txq_obj *txq_obj = NULL; 560 union { 561 struct ibv_qp_init_attr_ex init; 562 struct ibv_cq_init_attr_ex cq; 563 struct ibv_qp_attr mod; 564 } attr; 565 unsigned int cqe_n; 566 struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET }; 567 struct mlx5dv_cq cq_info; 568 struct mlx5dv_obj obj; 569 const int desc = 1 << txq_data->elts_n; 570 int ret = 0; 571 572 if (type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) 573 return mlx5_txq_obj_hairpin_new(dev, idx); 574 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 575 /* If using DevX, need additional mask to read tisn value. */ 576 if (priv->config.devx && !priv->sh->tdn) 577 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 578 #endif 579 assert(txq_data); 580 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; 581 priv->verbs_alloc_ctx.obj = txq_ctrl; 582 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 583 DRV_LOG(ERR, 584 "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set", 585 dev->data->port_id); 586 rte_errno = EINVAL; 587 return NULL; 588 } 589 memset(&tmpl, 0, sizeof(struct mlx5_txq_obj)); 590 attr.cq = (struct ibv_cq_init_attr_ex){ 591 .comp_mask = 0, 592 }; 593 cqe_n = desc / MLX5_TX_COMP_THRESH + 594 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 595 tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 596 if (tmpl.cq == NULL) { 597 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 598 dev->data->port_id, idx); 599 rte_errno = errno; 600 goto error; 601 } 602 attr.init = (struct ibv_qp_init_attr_ex){ 603 /* CQ to be associated with the send queue. */ 604 .send_cq = tmpl.cq, 605 /* CQ to be associated with the receive queue. */ 606 .recv_cq = tmpl.cq, 607 .cap = { 608 /* Max number of outstanding WRs. */ 609 .max_send_wr = 610 ((priv->sh->device_attr.orig_attr.max_qp_wr < 611 desc) ? 612 priv->sh->device_attr.orig_attr.max_qp_wr : 613 desc), 614 /* 615 * Max number of scatter/gather elements in a WR, 616 * must be 1 to prevent libmlx5 from trying to affect 617 * too much memory. TX gather is not impacted by the 618 * device_attr.max_sge limit and will still work 619 * properly. 620 */ 621 .max_send_sge = 1, 622 }, 623 .qp_type = IBV_QPT_RAW_PACKET, 624 /* 625 * Do *NOT* enable this, completions events are managed per 626 * Tx burst. 627 */ 628 .sq_sig_all = 0, 629 .pd = priv->sh->pd, 630 .comp_mask = IBV_QP_INIT_ATTR_PD, 631 }; 632 if (txq_data->inlen_send) 633 attr.init.cap.max_inline_data = txq_ctrl->max_inline_data; 634 if (txq_data->tso_en) { 635 attr.init.max_tso_header = txq_ctrl->max_tso_header; 636 attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 637 } 638 tmpl.qp = mlx5_glue->create_qp_ex(priv->sh->ctx, &attr.init); 639 if (tmpl.qp == NULL) { 640 DRV_LOG(ERR, "port %u Tx queue %u QP creation failure", 641 dev->data->port_id, idx); 642 rte_errno = errno; 643 goto error; 644 } 645 attr.mod = (struct ibv_qp_attr){ 646 /* Move the QP to this state. */ 647 .qp_state = IBV_QPS_INIT, 648 /* IB device port number. */ 649 .port_num = (uint8_t)priv->ibv_port, 650 }; 651 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, 652 (IBV_QP_STATE | IBV_QP_PORT)); 653 if (ret) { 654 DRV_LOG(ERR, 655 "port %u Tx queue %u QP state to IBV_QPS_INIT failed", 656 dev->data->port_id, idx); 657 rte_errno = errno; 658 goto error; 659 } 660 attr.mod = (struct ibv_qp_attr){ 661 .qp_state = IBV_QPS_RTR 662 }; 663 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 664 if (ret) { 665 DRV_LOG(ERR, 666 "port %u Tx queue %u QP state to IBV_QPS_RTR failed", 667 dev->data->port_id, idx); 668 rte_errno = errno; 669 goto error; 670 } 671 attr.mod.qp_state = IBV_QPS_RTS; 672 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 673 if (ret) { 674 DRV_LOG(ERR, 675 "port %u Tx queue %u QP state to IBV_QPS_RTS failed", 676 dev->data->port_id, idx); 677 rte_errno = errno; 678 goto error; 679 } 680 txq_obj = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_obj), 0, 681 txq_ctrl->socket); 682 if (!txq_obj) { 683 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory", 684 dev->data->port_id, idx); 685 rte_errno = ENOMEM; 686 goto error; 687 } 688 obj.cq.in = tmpl.cq; 689 obj.cq.out = &cq_info; 690 obj.qp.in = tmpl.qp; 691 obj.qp.out = &qp; 692 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 693 if (ret != 0) { 694 rte_errno = errno; 695 goto error; 696 } 697 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 698 DRV_LOG(ERR, 699 "port %u wrong MLX5_CQE_SIZE environment variable" 700 " value: it should be set to %u", 701 dev->data->port_id, RTE_CACHE_LINE_SIZE); 702 rte_errno = EINVAL; 703 goto error; 704 } 705 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 706 txq_data->cqe_s = 1 << txq_data->cqe_n; 707 txq_data->cqe_m = txq_data->cqe_s - 1; 708 txq_data->qp_num_8s = tmpl.qp->qp_num << 8; 709 txq_data->wqes = qp.sq.buf; 710 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 711 txq_data->wqe_s = 1 << txq_data->wqe_n; 712 txq_data->wqe_m = txq_data->wqe_s - 1; 713 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 714 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 715 txq_data->cq_db = cq_info.dbrec; 716 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 717 txq_data->cq_ci = 0; 718 #ifndef NDEBUG 719 txq_data->cq_pi = 0; 720 #endif 721 txq_data->wqe_ci = 0; 722 txq_data->wqe_pi = 0; 723 txq_data->wqe_comp = 0; 724 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 725 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 726 /* 727 * If using DevX need to query and store TIS transport domain value. 728 * This is done once per port. 729 * Will use this value on Rx, when creating matching TIR. 730 */ 731 if (priv->config.devx && !priv->sh->tdn) { 732 ret = mlx5_devx_cmd_qp_query_tis_td(tmpl.qp, qp.tisn, 733 &priv->sh->tdn); 734 if (ret) { 735 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 736 "transport domain", dev->data->port_id, idx); 737 rte_errno = EINVAL; 738 goto error; 739 } else { 740 DRV_LOG(DEBUG, "port %u Tx queue %u TIS number %d " 741 "transport domain %d", dev->data->port_id, 742 idx, qp.tisn, priv->sh->tdn); 743 } 744 } 745 #endif 746 txq_obj->qp = tmpl.qp; 747 txq_obj->cq = tmpl.cq; 748 rte_atomic32_inc(&txq_obj->refcnt); 749 txq_ctrl->bf_reg = qp.bf.reg; 750 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 751 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 752 DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%"PRIx64, 753 dev->data->port_id, txq_ctrl->uar_mmap_offset); 754 } else { 755 DRV_LOG(ERR, 756 "port %u failed to retrieve UAR info, invalid" 757 " libmlx5.so", 758 dev->data->port_id); 759 rte_errno = EINVAL; 760 goto error; 761 } 762 txq_uar_init(txq_ctrl); 763 LIST_INSERT_HEAD(&priv->txqsobj, txq_obj, next); 764 txq_obj->txq_ctrl = txq_ctrl; 765 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 766 return txq_obj; 767 error: 768 ret = rte_errno; /* Save rte_errno before cleanup. */ 769 if (tmpl.cq) 770 claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); 771 if (tmpl.qp) 772 claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); 773 if (txq_obj) 774 rte_free(txq_obj); 775 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 776 rte_errno = ret; /* Restore rte_errno. */ 777 return NULL; 778 } 779 780 /** 781 * Get an Tx queue Verbs object. 782 * 783 * @param dev 784 * Pointer to Ethernet device. 785 * @param idx 786 * Queue index in DPDK Tx queue array. 787 * 788 * @return 789 * The Verbs object if it exists. 790 */ 791 struct mlx5_txq_obj * 792 mlx5_txq_obj_get(struct rte_eth_dev *dev, uint16_t idx) 793 { 794 struct mlx5_priv *priv = dev->data->dev_private; 795 struct mlx5_txq_ctrl *txq_ctrl; 796 797 if (idx >= priv->txqs_n) 798 return NULL; 799 if (!(*priv->txqs)[idx]) 800 return NULL; 801 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 802 if (txq_ctrl->obj) 803 rte_atomic32_inc(&txq_ctrl->obj->refcnt); 804 return txq_ctrl->obj; 805 } 806 807 /** 808 * Release an Tx verbs queue object. 809 * 810 * @param txq_obj 811 * Verbs Tx queue object. 812 * 813 * @return 814 * 1 while a reference on it exists, 0 when freed. 815 */ 816 int 817 mlx5_txq_obj_release(struct mlx5_txq_obj *txq_obj) 818 { 819 assert(txq_obj); 820 if (rte_atomic32_dec_and_test(&txq_obj->refcnt)) { 821 if (txq_obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN) { 822 if (txq_obj->tis) 823 claim_zero(mlx5_devx_cmd_destroy(txq_obj->tis)); 824 } else { 825 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 826 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 827 } 828 LIST_REMOVE(txq_obj, next); 829 rte_free(txq_obj); 830 return 0; 831 } 832 return 1; 833 } 834 835 /** 836 * Verify the Verbs Tx queue list is empty 837 * 838 * @param dev 839 * Pointer to Ethernet device. 840 * 841 * @return 842 * The number of object not released. 843 */ 844 int 845 mlx5_txq_obj_verify(struct rte_eth_dev *dev) 846 { 847 struct mlx5_priv *priv = dev->data->dev_private; 848 int ret = 0; 849 struct mlx5_txq_obj *txq_obj; 850 851 LIST_FOREACH(txq_obj, &priv->txqsobj, next) { 852 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 853 dev->data->port_id, txq_obj->txq_ctrl->txq.idx); 854 ++ret; 855 } 856 return ret; 857 } 858 859 /** 860 * Calculate the total number of WQEBB for Tx queue. 861 * 862 * Simplified version of calc_sq_size() in rdma-core. 863 * 864 * @param txq_ctrl 865 * Pointer to Tx queue control structure. 866 * 867 * @return 868 * The number of WQEBB. 869 */ 870 static int 871 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 872 { 873 unsigned int wqe_size; 874 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 875 876 wqe_size = MLX5_WQE_CSEG_SIZE + 877 MLX5_WQE_ESEG_SIZE + 878 MLX5_WSEG_SIZE - 879 MLX5_ESEG_MIN_INLINE_SIZE + 880 txq_ctrl->max_inline_data; 881 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 882 } 883 884 /** 885 * Calculate the maximal inline data size for Tx queue. 886 * 887 * @param txq_ctrl 888 * Pointer to Tx queue control structure. 889 * 890 * @return 891 * The maximal inline data size. 892 */ 893 static unsigned int 894 txq_calc_inline_max(struct mlx5_txq_ctrl *txq_ctrl) 895 { 896 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 897 struct mlx5_priv *priv = txq_ctrl->priv; 898 unsigned int wqe_size; 899 900 wqe_size = priv->sh->device_attr.orig_attr.max_qp_wr / desc; 901 if (!wqe_size) 902 return 0; 903 /* 904 * This calculation is derived from tthe source of 905 * mlx5_calc_send_wqe() in rdma_core library. 906 */ 907 wqe_size = wqe_size * MLX5_WQE_SIZE - 908 MLX5_WQE_CSEG_SIZE - 909 MLX5_WQE_ESEG_SIZE - 910 MLX5_WSEG_SIZE - 911 MLX5_WSEG_SIZE + 912 MLX5_DSEG_MIN_INLINE_SIZE; 913 return wqe_size; 914 } 915 916 /** 917 * Set Tx queue parameters from device configuration. 918 * 919 * @param txq_ctrl 920 * Pointer to Tx queue control structure. 921 */ 922 static void 923 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 924 { 925 struct mlx5_priv *priv = txq_ctrl->priv; 926 struct mlx5_dev_config *config = &priv->config; 927 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 928 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 929 unsigned int inlen_mode; /* Minimal required Inline data. */ 930 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 931 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 932 bool tso = txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO | 933 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 934 DEV_TX_OFFLOAD_GRE_TNL_TSO | 935 DEV_TX_OFFLOAD_IP_TNL_TSO | 936 DEV_TX_OFFLOAD_UDP_TNL_TSO); 937 bool vlan_inline; 938 unsigned int temp; 939 940 if (config->txqs_inline == MLX5_ARG_UNSET) 941 txqs_inline = 942 #if defined(RTE_ARCH_ARM64) 943 (priv->pci_dev->id.device_id == 944 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) ? 945 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 946 #endif 947 MLX5_INLINE_MAX_TXQS; 948 else 949 txqs_inline = (unsigned int)config->txqs_inline; 950 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 951 MLX5_SEND_DEF_INLINE_LEN : 952 (unsigned int)config->txq_inline_max; 953 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 954 MLX5_EMPW_DEF_INLINE_LEN : 955 (unsigned int)config->txq_inline_mpw; 956 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 957 0 : (unsigned int)config->txq_inline_min; 958 if (config->mps != MLX5_MPW_ENHANCED) 959 inlen_empw = 0; 960 /* 961 * If there is requested minimal amount of data to inline 962 * we MUST enable inlining. This is a case for ConnectX-4 963 * which usually requires L2 inlined for correct operating 964 * and ConnectX-4LX which requires L2-L4 inlined to 965 * support E-Switch Flows. 966 */ 967 if (inlen_mode) { 968 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 969 /* 970 * Optimize minimal inlining for single 971 * segment packets to fill one WQEBB 972 * without gaps. 973 */ 974 temp = MLX5_ESEG_MIN_INLINE_SIZE; 975 } else { 976 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 977 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 978 MLX5_ESEG_MIN_INLINE_SIZE; 979 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 980 } 981 if (temp != inlen_mode) { 982 DRV_LOG(INFO, 983 "port %u minimal required inline setting" 984 " aligned from %u to %u", 985 PORT_ID(priv), inlen_mode, temp); 986 inlen_mode = temp; 987 } 988 } 989 /* 990 * If port is configured to support VLAN insertion and device 991 * does not support this feature by HW (for NICs before ConnectX-5 992 * or in case of wqe_vlan_insert flag is not set) we must enable 993 * data inline on all queues because it is supported by single 994 * tx_burst routine. 995 */ 996 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 997 vlan_inline = (dev_txoff & DEV_TX_OFFLOAD_VLAN_INSERT) && 998 !config->hw_vlan_insert; 999 /* 1000 * If there are few Tx queues it is prioritized 1001 * to save CPU cycles and disable data inlining at all. 1002 */ 1003 if (inlen_send && priv->txqs_n >= txqs_inline) { 1004 /* 1005 * The data sent with ordinal MLX5_OPCODE_SEND 1006 * may be inlined in Ethernet Segment, align the 1007 * length accordingly to fit entire WQEBBs. 1008 */ 1009 temp = RTE_MAX(inlen_send, 1010 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE); 1011 temp -= MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1012 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1013 temp += MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 1014 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1015 MLX5_ESEG_MIN_INLINE_SIZE - 1016 MLX5_WQE_CSEG_SIZE - 1017 MLX5_WQE_ESEG_SIZE - 1018 MLX5_WQE_DSEG_SIZE * 2); 1019 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 1020 temp = RTE_MAX(temp, inlen_mode); 1021 if (temp != inlen_send) { 1022 DRV_LOG(INFO, 1023 "port %u ordinary send inline setting" 1024 " aligned from %u to %u", 1025 PORT_ID(priv), inlen_send, temp); 1026 inlen_send = temp; 1027 } 1028 /* 1029 * Not aligned to cache lines, but to WQEs. 1030 * First bytes of data (initial alignment) 1031 * is going to be copied explicitly at the 1032 * beginning of inlining buffer in Ethernet 1033 * Segment. 1034 */ 1035 assert(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1036 assert(inlen_send <= MLX5_WQE_SIZE_MAX + 1037 MLX5_ESEG_MIN_INLINE_SIZE - 1038 MLX5_WQE_CSEG_SIZE - 1039 MLX5_WQE_ESEG_SIZE - 1040 MLX5_WQE_DSEG_SIZE * 2); 1041 } else if (inlen_mode) { 1042 /* 1043 * If minimal inlining is requested we must 1044 * enable inlining in general, despite the 1045 * number of configured queues. Ignore the 1046 * txq_inline_max devarg, this is not 1047 * full-featured inline. 1048 */ 1049 inlen_send = inlen_mode; 1050 inlen_empw = 0; 1051 } else if (vlan_inline) { 1052 /* 1053 * Hardware does not report offload for 1054 * VLAN insertion, we must enable data inline 1055 * to implement feature by software. 1056 */ 1057 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 1058 inlen_empw = 0; 1059 } else { 1060 inlen_send = 0; 1061 inlen_empw = 0; 1062 } 1063 txq_ctrl->txq.inlen_send = inlen_send; 1064 txq_ctrl->txq.inlen_mode = inlen_mode; 1065 txq_ctrl->txq.inlen_empw = 0; 1066 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 1067 /* 1068 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 1069 * may be inlined in Data Segment, align the 1070 * length accordingly to fit entire WQEBBs. 1071 */ 1072 temp = RTE_MAX(inlen_empw, 1073 MLX5_WQE_SIZE + MLX5_DSEG_MIN_INLINE_SIZE); 1074 temp -= MLX5_DSEG_MIN_INLINE_SIZE; 1075 temp = RTE_ALIGN(temp, MLX5_WQE_SIZE); 1076 temp += MLX5_DSEG_MIN_INLINE_SIZE; 1077 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 1078 MLX5_DSEG_MIN_INLINE_SIZE - 1079 MLX5_WQE_CSEG_SIZE - 1080 MLX5_WQE_ESEG_SIZE - 1081 MLX5_WQE_DSEG_SIZE); 1082 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 1083 if (temp != inlen_empw) { 1084 DRV_LOG(INFO, 1085 "port %u enhanced empw inline setting" 1086 " aligned from %u to %u", 1087 PORT_ID(priv), inlen_empw, temp); 1088 inlen_empw = temp; 1089 } 1090 assert(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 1091 assert(inlen_empw <= MLX5_WQE_SIZE_MAX + 1092 MLX5_DSEG_MIN_INLINE_SIZE - 1093 MLX5_WQE_CSEG_SIZE - 1094 MLX5_WQE_ESEG_SIZE - 1095 MLX5_WQE_DSEG_SIZE); 1096 txq_ctrl->txq.inlen_empw = inlen_empw; 1097 } 1098 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 1099 if (tso) { 1100 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 1101 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 1102 MLX5_MAX_TSO_HEADER); 1103 txq_ctrl->txq.tso_en = 1; 1104 } 1105 txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp; 1106 txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO | 1107 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1108 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) & 1109 txq_ctrl->txq.offloads) && config->swp; 1110 } 1111 1112 /** 1113 * Adjust Tx queue data inline parameters for large queue sizes. 1114 * The data inline feature requires multiple WQEs to fit the packets, 1115 * and if the large amount of Tx descriptors is requested by application 1116 * the total WQE amount may exceed the hardware capabilities. If the 1117 * default inline setting are used we can try to adjust these ones and 1118 * meet the hardware requirements and not exceed the queue size. 1119 * 1120 * @param txq_ctrl 1121 * Pointer to Tx queue control structure. 1122 * 1123 * @return 1124 * Zero on success, otherwise the parameters can not be adjusted. 1125 */ 1126 static int 1127 txq_adjust_params(struct mlx5_txq_ctrl *txq_ctrl) 1128 { 1129 struct mlx5_priv *priv = txq_ctrl->priv; 1130 struct mlx5_dev_config *config = &priv->config; 1131 unsigned int max_inline; 1132 1133 max_inline = txq_calc_inline_max(txq_ctrl); 1134 if (!txq_ctrl->txq.inlen_send) { 1135 /* 1136 * Inline data feature is not engaged at all. 1137 * There is nothing to adjust. 1138 */ 1139 return 0; 1140 } 1141 if (txq_ctrl->max_inline_data <= max_inline) { 1142 /* 1143 * The requested inline data length does not 1144 * exceed queue capabilities. 1145 */ 1146 return 0; 1147 } 1148 if (txq_ctrl->txq.inlen_mode > max_inline) { 1149 DRV_LOG(ERR, 1150 "minimal data inline requirements (%u) are not" 1151 " satisfied (%u) on port %u, try the smaller" 1152 " Tx queue size (%d)", 1153 txq_ctrl->txq.inlen_mode, max_inline, 1154 priv->dev_data->port_id, 1155 priv->sh->device_attr.orig_attr.max_qp_wr); 1156 goto error; 1157 } 1158 if (txq_ctrl->txq.inlen_send > max_inline && 1159 config->txq_inline_max != MLX5_ARG_UNSET && 1160 config->txq_inline_max > (int)max_inline) { 1161 DRV_LOG(ERR, 1162 "txq_inline_max requirements (%u) are not" 1163 " satisfied (%u) on port %u, try the smaller" 1164 " Tx queue size (%d)", 1165 txq_ctrl->txq.inlen_send, max_inline, 1166 priv->dev_data->port_id, 1167 priv->sh->device_attr.orig_attr.max_qp_wr); 1168 goto error; 1169 } 1170 if (txq_ctrl->txq.inlen_empw > max_inline && 1171 config->txq_inline_mpw != MLX5_ARG_UNSET && 1172 config->txq_inline_mpw > (int)max_inline) { 1173 DRV_LOG(ERR, 1174 "txq_inline_mpw requirements (%u) are not" 1175 " satisfied (%u) on port %u, try the smaller" 1176 " Tx queue size (%d)", 1177 txq_ctrl->txq.inlen_empw, max_inline, 1178 priv->dev_data->port_id, 1179 priv->sh->device_attr.orig_attr.max_qp_wr); 1180 goto error; 1181 } 1182 if (txq_ctrl->txq.tso_en && max_inline < MLX5_MAX_TSO_HEADER) { 1183 DRV_LOG(ERR, 1184 "tso header inline requirements (%u) are not" 1185 " satisfied (%u) on port %u, try the smaller" 1186 " Tx queue size (%d)", 1187 MLX5_MAX_TSO_HEADER, max_inline, 1188 priv->dev_data->port_id, 1189 priv->sh->device_attr.orig_attr.max_qp_wr); 1190 goto error; 1191 } 1192 if (txq_ctrl->txq.inlen_send > max_inline) { 1193 DRV_LOG(WARNING, 1194 "adjust txq_inline_max (%u->%u)" 1195 " due to large Tx queue on port %u", 1196 txq_ctrl->txq.inlen_send, max_inline, 1197 priv->dev_data->port_id); 1198 txq_ctrl->txq.inlen_send = max_inline; 1199 } 1200 if (txq_ctrl->txq.inlen_empw > max_inline) { 1201 DRV_LOG(WARNING, 1202 "adjust txq_inline_mpw (%u->%u)" 1203 "due to large Tx queue on port %u", 1204 txq_ctrl->txq.inlen_empw, max_inline, 1205 priv->dev_data->port_id); 1206 txq_ctrl->txq.inlen_empw = max_inline; 1207 } 1208 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->txq.inlen_send, 1209 txq_ctrl->txq.inlen_empw); 1210 assert(txq_ctrl->max_inline_data <= max_inline); 1211 assert(txq_ctrl->txq.inlen_mode <= max_inline); 1212 assert(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_send); 1213 assert(txq_ctrl->txq.inlen_mode <= txq_ctrl->txq.inlen_empw); 1214 return 0; 1215 error: 1216 rte_errno = ENOMEM; 1217 return -ENOMEM; 1218 } 1219 1220 /** 1221 * Create a DPDK Tx queue. 1222 * 1223 * @param dev 1224 * Pointer to Ethernet device. 1225 * @param idx 1226 * TX queue index. 1227 * @param desc 1228 * Number of descriptors to configure in queue. 1229 * @param socket 1230 * NUMA socket on which memory must be allocated. 1231 * @param[in] conf 1232 * Thresholds parameters. 1233 * 1234 * @return 1235 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1236 */ 1237 struct mlx5_txq_ctrl * 1238 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1239 unsigned int socket, const struct rte_eth_txconf *conf) 1240 { 1241 struct mlx5_priv *priv = dev->data->dev_private; 1242 struct mlx5_txq_ctrl *tmpl; 1243 1244 tmpl = rte_calloc_socket("TXQ", 1, 1245 sizeof(*tmpl) + 1246 desc * sizeof(struct rte_mbuf *), 1247 0, socket); 1248 if (!tmpl) { 1249 rte_errno = ENOMEM; 1250 return NULL; 1251 } 1252 if (mlx5_mr_btree_init(&tmpl->txq.mr_ctrl.cache_bh, 1253 MLX5_MR_BTREE_CACHE_N, socket)) { 1254 /* rte_errno is already set. */ 1255 goto error; 1256 } 1257 /* Save pointer of global generation number to check memory event. */ 1258 tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->sh->mr.dev_gen; 1259 assert(desc > MLX5_TX_COMP_THRESH); 1260 tmpl->txq.offloads = conf->offloads | 1261 dev->data->dev_conf.txmode.offloads; 1262 tmpl->priv = priv; 1263 tmpl->socket = socket; 1264 tmpl->txq.elts_n = log2above(desc); 1265 tmpl->txq.elts_s = desc; 1266 tmpl->txq.elts_m = desc - 1; 1267 tmpl->txq.port_id = dev->data->port_id; 1268 tmpl->txq.idx = idx; 1269 txq_set_params(tmpl); 1270 if (txq_adjust_params(tmpl)) 1271 goto error; 1272 if (txq_calc_wqebb_cnt(tmpl) > 1273 priv->sh->device_attr.orig_attr.max_qp_wr) { 1274 DRV_LOG(ERR, 1275 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 1276 " try smaller queue size", 1277 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 1278 priv->sh->device_attr.orig_attr.max_qp_wr); 1279 rte_errno = ENOMEM; 1280 goto error; 1281 } 1282 rte_atomic32_inc(&tmpl->refcnt); 1283 tmpl->type = MLX5_TXQ_TYPE_STANDARD; 1284 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1285 return tmpl; 1286 error: 1287 rte_free(tmpl); 1288 return NULL; 1289 } 1290 1291 /** 1292 * Create a DPDK Tx hairpin queue. 1293 * 1294 * @param dev 1295 * Pointer to Ethernet device. 1296 * @param idx 1297 * TX queue index. 1298 * @param desc 1299 * Number of descriptors to configure in queue. 1300 * @param hairpin_conf 1301 * The hairpin configuration. 1302 * 1303 * @return 1304 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1305 */ 1306 struct mlx5_txq_ctrl * 1307 mlx5_txq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1308 const struct rte_eth_hairpin_conf *hairpin_conf) 1309 { 1310 struct mlx5_priv *priv = dev->data->dev_private; 1311 struct mlx5_txq_ctrl *tmpl; 1312 1313 tmpl = rte_calloc_socket("TXQ", 1, 1314 sizeof(*tmpl), 0, SOCKET_ID_ANY); 1315 if (!tmpl) { 1316 rte_errno = ENOMEM; 1317 return NULL; 1318 } 1319 tmpl->priv = priv; 1320 tmpl->socket = SOCKET_ID_ANY; 1321 tmpl->txq.elts_n = log2above(desc); 1322 tmpl->txq.port_id = dev->data->port_id; 1323 tmpl->txq.idx = idx; 1324 tmpl->hairpin_conf = *hairpin_conf; 1325 tmpl->type = MLX5_TXQ_TYPE_HAIRPIN; 1326 rte_atomic32_inc(&tmpl->refcnt); 1327 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 1328 return tmpl; 1329 } 1330 1331 /** 1332 * Get a Tx queue. 1333 * 1334 * @param dev 1335 * Pointer to Ethernet device. 1336 * @param idx 1337 * TX queue index. 1338 * 1339 * @return 1340 * A pointer to the queue if it exists. 1341 */ 1342 struct mlx5_txq_ctrl * 1343 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 1344 { 1345 struct mlx5_priv *priv = dev->data->dev_private; 1346 struct mlx5_txq_ctrl *ctrl = NULL; 1347 1348 if ((*priv->txqs)[idx]) { 1349 ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, 1350 txq); 1351 mlx5_txq_obj_get(dev, idx); 1352 rte_atomic32_inc(&ctrl->refcnt); 1353 } 1354 return ctrl; 1355 } 1356 1357 /** 1358 * Release a Tx queue. 1359 * 1360 * @param dev 1361 * Pointer to Ethernet device. 1362 * @param idx 1363 * TX queue index. 1364 * 1365 * @return 1366 * 1 while a reference on it exists, 0 when freed. 1367 */ 1368 int 1369 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1370 { 1371 struct mlx5_priv *priv = dev->data->dev_private; 1372 struct mlx5_txq_ctrl *txq; 1373 1374 if (!(*priv->txqs)[idx]) 1375 return 0; 1376 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1377 if (txq->obj && !mlx5_txq_obj_release(txq->obj)) 1378 txq->obj = NULL; 1379 if (rte_atomic32_dec_and_test(&txq->refcnt)) { 1380 txq_free_elts(txq); 1381 mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh); 1382 LIST_REMOVE(txq, next); 1383 rte_free(txq); 1384 (*priv->txqs)[idx] = NULL; 1385 return 0; 1386 } 1387 return 1; 1388 } 1389 1390 /** 1391 * Verify if the queue can be released. 1392 * 1393 * @param dev 1394 * Pointer to Ethernet device. 1395 * @param idx 1396 * TX queue index. 1397 * 1398 * @return 1399 * 1 if the queue can be released. 1400 */ 1401 int 1402 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1403 { 1404 struct mlx5_priv *priv = dev->data->dev_private; 1405 struct mlx5_txq_ctrl *txq; 1406 1407 if (!(*priv->txqs)[idx]) 1408 return -1; 1409 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1410 return (rte_atomic32_read(&txq->refcnt) == 1); 1411 } 1412 1413 /** 1414 * Verify the Tx Queue list is empty 1415 * 1416 * @param dev 1417 * Pointer to Ethernet device. 1418 * 1419 * @return 1420 * The number of object not released. 1421 */ 1422 int 1423 mlx5_txq_verify(struct rte_eth_dev *dev) 1424 { 1425 struct mlx5_priv *priv = dev->data->dev_private; 1426 struct mlx5_txq_ctrl *txq_ctrl; 1427 int ret = 0; 1428 1429 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 1430 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 1431 dev->data->port_id, txq_ctrl->txq.idx); 1432 ++ret; 1433 } 1434 return ret; 1435 } 1436