1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <stdint.h> 11 #include <unistd.h> 12 #include <sys/mman.h> 13 #include <inttypes.h> 14 15 /* Verbs header. */ 16 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 17 #ifdef PEDANTIC 18 #pragma GCC diagnostic ignored "-Wpedantic" 19 #endif 20 #include <infiniband/verbs.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 30 #include "mlx5_utils.h" 31 #include "mlx5_defs.h" 32 #include "mlx5.h" 33 #include "mlx5_rxtx.h" 34 #include "mlx5_autoconf.h" 35 #include "mlx5_glue.h" 36 37 /** 38 * Allocate TX queue elements. 39 * 40 * @param txq_ctrl 41 * Pointer to TX queue structure. 42 */ 43 void 44 txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) 45 { 46 const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; 47 unsigned int i; 48 49 for (i = 0; (i != elts_n); ++i) 50 txq_ctrl->txq.elts[i] = NULL; 51 DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", 52 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); 53 txq_ctrl->txq.elts_head = 0; 54 txq_ctrl->txq.elts_tail = 0; 55 txq_ctrl->txq.elts_comp = 0; 56 } 57 58 /** 59 * Free TX queue elements. 60 * 61 * @param txq_ctrl 62 * Pointer to TX queue structure. 63 */ 64 static void 65 txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) 66 { 67 const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; 68 const uint16_t elts_m = elts_n - 1; 69 uint16_t elts_head = txq_ctrl->txq.elts_head; 70 uint16_t elts_tail = txq_ctrl->txq.elts_tail; 71 struct rte_mbuf *(*elts)[elts_n] = &txq_ctrl->txq.elts; 72 73 DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", 74 PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); 75 txq_ctrl->txq.elts_head = 0; 76 txq_ctrl->txq.elts_tail = 0; 77 txq_ctrl->txq.elts_comp = 0; 78 79 while (elts_tail != elts_head) { 80 struct rte_mbuf *elt = (*elts)[elts_tail & elts_m]; 81 82 assert(elt != NULL); 83 rte_pktmbuf_free_seg(elt); 84 #ifndef NDEBUG 85 /* Poisoning. */ 86 memset(&(*elts)[elts_tail & elts_m], 87 0x77, 88 sizeof((*elts)[elts_tail & elts_m])); 89 #endif 90 ++elts_tail; 91 } 92 } 93 94 /** 95 * Returns the per-port supported offloads. 96 * 97 * @param dev 98 * Pointer to Ethernet device. 99 * 100 * @return 101 * Supported Tx offloads. 102 */ 103 uint64_t 104 mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) 105 { 106 struct mlx5_priv *priv = dev->data->dev_private; 107 uint64_t offloads = (DEV_TX_OFFLOAD_MULTI_SEGS | 108 DEV_TX_OFFLOAD_VLAN_INSERT); 109 struct mlx5_dev_config *config = &priv->config; 110 111 if (config->hw_csum) 112 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | 113 DEV_TX_OFFLOAD_UDP_CKSUM | 114 DEV_TX_OFFLOAD_TCP_CKSUM); 115 if (config->tso) 116 offloads |= DEV_TX_OFFLOAD_TCP_TSO; 117 if (config->swp) { 118 if (config->hw_csum) 119 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 120 if (config->tso) 121 offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO | 122 DEV_TX_OFFLOAD_UDP_TNL_TSO); 123 } 124 if (config->tunnel_en) { 125 if (config->hw_csum) 126 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 127 if (config->tso) 128 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 129 DEV_TX_OFFLOAD_GRE_TNL_TSO); 130 } 131 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 132 if (config->dv_flow_en) 133 offloads |= DEV_TX_OFFLOAD_MATCH_METADATA; 134 #endif 135 return offloads; 136 } 137 138 /** 139 * DPDK callback to configure a TX queue. 140 * 141 * @param dev 142 * Pointer to Ethernet device structure. 143 * @param idx 144 * TX queue index. 145 * @param desc 146 * Number of descriptors to configure in queue. 147 * @param socket 148 * NUMA socket on which memory must be allocated. 149 * @param[in] conf 150 * Thresholds parameters. 151 * 152 * @return 153 * 0 on success, a negative errno value otherwise and rte_errno is set. 154 */ 155 int 156 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 157 unsigned int socket, const struct rte_eth_txconf *conf) 158 { 159 struct mlx5_priv *priv = dev->data->dev_private; 160 struct mlx5_txq_data *txq = (*priv->txqs)[idx]; 161 struct mlx5_txq_ctrl *txq_ctrl = 162 container_of(txq, struct mlx5_txq_ctrl, txq); 163 164 if (desc <= MLX5_TX_COMP_THRESH) { 165 DRV_LOG(WARNING, 166 "port %u number of descriptors requested for Tx queue" 167 " %u must be higher than MLX5_TX_COMP_THRESH, using %u" 168 " instead of %u", 169 dev->data->port_id, idx, MLX5_TX_COMP_THRESH + 1, desc); 170 desc = MLX5_TX_COMP_THRESH + 1; 171 } 172 if (!rte_is_power_of_2(desc)) { 173 desc = 1 << log2above(desc); 174 DRV_LOG(WARNING, 175 "port %u increased number of descriptors in Tx queue" 176 " %u to the next power of two (%d)", 177 dev->data->port_id, idx, desc); 178 } 179 DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", 180 dev->data->port_id, idx, desc); 181 if (idx >= priv->txqs_n) { 182 DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", 183 dev->data->port_id, idx, priv->txqs_n); 184 rte_errno = EOVERFLOW; 185 return -rte_errno; 186 } 187 if (!mlx5_txq_releasable(dev, idx)) { 188 rte_errno = EBUSY; 189 DRV_LOG(ERR, "port %u unable to release queue index %u", 190 dev->data->port_id, idx); 191 return -rte_errno; 192 } 193 mlx5_txq_release(dev, idx); 194 txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); 195 if (!txq_ctrl) { 196 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 197 dev->data->port_id, idx); 198 return -rte_errno; 199 } 200 DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", 201 dev->data->port_id, idx); 202 (*priv->txqs)[idx] = &txq_ctrl->txq; 203 return 0; 204 } 205 206 /** 207 * DPDK callback to release a TX queue. 208 * 209 * @param dpdk_txq 210 * Generic TX queue pointer. 211 */ 212 void 213 mlx5_tx_queue_release(void *dpdk_txq) 214 { 215 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 216 struct mlx5_txq_ctrl *txq_ctrl; 217 struct mlx5_priv *priv; 218 unsigned int i; 219 220 if (txq == NULL) 221 return; 222 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 223 priv = txq_ctrl->priv; 224 for (i = 0; (i != priv->txqs_n); ++i) 225 if ((*priv->txqs)[i] == txq) { 226 mlx5_txq_release(ETH_DEV(priv), i); 227 DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", 228 PORT_ID(priv), txq->idx); 229 break; 230 } 231 } 232 233 /** 234 * Initialize Tx UAR registers for primary process. 235 * 236 * @param txq_ctrl 237 * Pointer to Tx queue control structure. 238 */ 239 static void 240 txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) 241 { 242 struct mlx5_priv *priv = txq_ctrl->priv; 243 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 244 #ifndef RTE_ARCH_64 245 unsigned int lock_idx; 246 const size_t page_size = sysconf(_SC_PAGESIZE); 247 #endif 248 249 assert(rte_eal_process_type() == RTE_PROC_PRIMARY); 250 assert(ppriv); 251 ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; 252 #ifndef RTE_ARCH_64 253 /* Assign an UAR lock according to UAR page number */ 254 lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & 255 MLX5_UAR_PAGE_NUM_MASK; 256 txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx]; 257 #endif 258 } 259 260 /** 261 * Remap UAR register of a Tx queue for secondary process. 262 * 263 * Remapped address is stored at the table in the process private structure of 264 * the device, indexed by queue index. 265 * 266 * @param txq_ctrl 267 * Pointer to Tx queue control structure. 268 * @param fd 269 * Verbs file descriptor to map UAR pages. 270 * 271 * @return 272 * 0 on success, a negative errno value otherwise and rte_errno is set. 273 */ 274 static int 275 txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) 276 { 277 struct mlx5_priv *priv = txq_ctrl->priv; 278 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); 279 struct mlx5_txq_data *txq = &txq_ctrl->txq; 280 void *addr; 281 uintptr_t uar_va; 282 uintptr_t offset; 283 const size_t page_size = sysconf(_SC_PAGESIZE); 284 285 assert(ppriv); 286 /* 287 * As rdma-core, UARs are mapped in size of OS page 288 * size. Ref to libmlx5 function: mlx5_init_context() 289 */ 290 uar_va = (uintptr_t)txq_ctrl->bf_reg; 291 offset = uar_va & (page_size - 1); /* Offset in page. */ 292 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 293 txq_ctrl->uar_mmap_offset); 294 if (addr == MAP_FAILED) { 295 DRV_LOG(ERR, 296 "port %u mmap failed for BF reg of txq %u", 297 txq->port_id, txq->idx); 298 rte_errno = ENXIO; 299 return -rte_errno; 300 } 301 addr = RTE_PTR_ADD(addr, offset); 302 ppriv->uar_table[txq->idx] = addr; 303 return 0; 304 } 305 306 /** 307 * Unmap UAR register of a Tx queue for secondary process. 308 * 309 * @param txq_ctrl 310 * Pointer to Tx queue control structure. 311 */ 312 static void 313 txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) 314 { 315 struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); 316 const size_t page_size = sysconf(_SC_PAGESIZE); 317 void *addr; 318 319 addr = ppriv->uar_table[txq_ctrl->txq.idx]; 320 munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 321 } 322 323 /** 324 * Initialize Tx UAR registers for secondary process. 325 * 326 * @param dev 327 * Pointer to Ethernet device. 328 * @param fd 329 * Verbs file descriptor to map UAR pages. 330 * 331 * @return 332 * 0 on success, a negative errno value otherwise and rte_errno is set. 333 */ 334 int 335 mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 336 { 337 struct mlx5_priv *priv = dev->data->dev_private; 338 struct mlx5_txq_data *txq; 339 struct mlx5_txq_ctrl *txq_ctrl; 340 unsigned int i; 341 int ret; 342 343 assert(rte_eal_process_type() == RTE_PROC_SECONDARY); 344 for (i = 0; i != priv->txqs_n; ++i) { 345 if (!(*priv->txqs)[i]) 346 continue; 347 txq = (*priv->txqs)[i]; 348 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 349 assert(txq->idx == (uint16_t)i); 350 ret = txq_uar_init_secondary(txq_ctrl, fd); 351 if (ret) 352 goto error; 353 } 354 return 0; 355 error: 356 /* Rollback. */ 357 do { 358 if (!(*priv->txqs)[i]) 359 continue; 360 txq = (*priv->txqs)[i]; 361 txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 362 txq_uar_uninit_secondary(txq_ctrl); 363 } while (i--); 364 return -rte_errno; 365 } 366 367 /** 368 * Create the Tx queue Verbs object. 369 * 370 * @param dev 371 * Pointer to Ethernet device. 372 * @param idx 373 * Queue index in DPDK Tx queue array. 374 * 375 * @return 376 * The Verbs object initialised, NULL otherwise and rte_errno is set. 377 */ 378 struct mlx5_txq_ibv * 379 mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) 380 { 381 struct mlx5_priv *priv = dev->data->dev_private; 382 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 383 struct mlx5_txq_ctrl *txq_ctrl = 384 container_of(txq_data, struct mlx5_txq_ctrl, txq); 385 struct mlx5_txq_ibv tmpl; 386 struct mlx5_txq_ibv *txq_ibv = NULL; 387 union { 388 struct ibv_qp_init_attr_ex init; 389 struct ibv_cq_init_attr_ex cq; 390 struct ibv_qp_attr mod; 391 } attr; 392 unsigned int cqe_n; 393 struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET }; 394 struct mlx5dv_cq cq_info; 395 struct mlx5dv_obj obj; 396 const int desc = 1 << txq_data->elts_n; 397 int ret = 0; 398 399 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 400 /* If using DevX, need additional mask to read tisn value. */ 401 if (priv->config.devx && !priv->sh->tdn) 402 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 403 #endif 404 assert(txq_data); 405 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; 406 priv->verbs_alloc_ctx.obj = txq_ctrl; 407 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 408 DRV_LOG(ERR, 409 "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set", 410 dev->data->port_id); 411 rte_errno = EINVAL; 412 return NULL; 413 } 414 memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv)); 415 attr.cq = (struct ibv_cq_init_attr_ex){ 416 .comp_mask = 0, 417 }; 418 cqe_n = desc / MLX5_TX_COMP_THRESH + 419 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 420 tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 421 if (tmpl.cq == NULL) { 422 DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", 423 dev->data->port_id, idx); 424 rte_errno = errno; 425 goto error; 426 } 427 attr.init = (struct ibv_qp_init_attr_ex){ 428 /* CQ to be associated with the send queue. */ 429 .send_cq = tmpl.cq, 430 /* CQ to be associated with the receive queue. */ 431 .recv_cq = tmpl.cq, 432 .cap = { 433 /* Max number of outstanding WRs. */ 434 .max_send_wr = 435 ((priv->sh->device_attr.orig_attr.max_qp_wr < 436 desc) ? 437 priv->sh->device_attr.orig_attr.max_qp_wr : 438 desc), 439 /* 440 * Max number of scatter/gather elements in a WR, 441 * must be 1 to prevent libmlx5 from trying to affect 442 * too much memory. TX gather is not impacted by the 443 * device_attr.max_sge limit and will still work 444 * properly. 445 */ 446 .max_send_sge = 1, 447 }, 448 .qp_type = IBV_QPT_RAW_PACKET, 449 /* 450 * Do *NOT* enable this, completions events are managed per 451 * Tx burst. 452 */ 453 .sq_sig_all = 0, 454 .pd = priv->sh->pd, 455 .comp_mask = IBV_QP_INIT_ATTR_PD, 456 }; 457 if (txq_data->inlen_send) 458 attr.init.cap.max_inline_data = txq_ctrl->max_inline_data; 459 if (txq_data->tso_en) { 460 attr.init.max_tso_header = txq_ctrl->max_tso_header; 461 attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 462 } 463 tmpl.qp = mlx5_glue->create_qp_ex(priv->sh->ctx, &attr.init); 464 if (tmpl.qp == NULL) { 465 DRV_LOG(ERR, "port %u Tx queue %u QP creation failure", 466 dev->data->port_id, idx); 467 rte_errno = errno; 468 goto error; 469 } 470 attr.mod = (struct ibv_qp_attr){ 471 /* Move the QP to this state. */ 472 .qp_state = IBV_QPS_INIT, 473 /* IB device port number. */ 474 .port_num = (uint8_t)priv->ibv_port, 475 }; 476 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, 477 (IBV_QP_STATE | IBV_QP_PORT)); 478 if (ret) { 479 DRV_LOG(ERR, 480 "port %u Tx queue %u QP state to IBV_QPS_INIT failed", 481 dev->data->port_id, idx); 482 rte_errno = errno; 483 goto error; 484 } 485 attr.mod = (struct ibv_qp_attr){ 486 .qp_state = IBV_QPS_RTR 487 }; 488 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 489 if (ret) { 490 DRV_LOG(ERR, 491 "port %u Tx queue %u QP state to IBV_QPS_RTR failed", 492 dev->data->port_id, idx); 493 rte_errno = errno; 494 goto error; 495 } 496 attr.mod.qp_state = IBV_QPS_RTS; 497 ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); 498 if (ret) { 499 DRV_LOG(ERR, 500 "port %u Tx queue %u QP state to IBV_QPS_RTS failed", 501 dev->data->port_id, idx); 502 rte_errno = errno; 503 goto error; 504 } 505 txq_ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0, 506 txq_ctrl->socket); 507 if (!txq_ibv) { 508 DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory", 509 dev->data->port_id, idx); 510 rte_errno = ENOMEM; 511 goto error; 512 } 513 obj.cq.in = tmpl.cq; 514 obj.cq.out = &cq_info; 515 obj.qp.in = tmpl.qp; 516 obj.qp.out = &qp; 517 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 518 if (ret != 0) { 519 rte_errno = errno; 520 goto error; 521 } 522 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 523 DRV_LOG(ERR, 524 "port %u wrong MLX5_CQE_SIZE environment variable" 525 " value: it should be set to %u", 526 dev->data->port_id, RTE_CACHE_LINE_SIZE); 527 rte_errno = EINVAL; 528 goto error; 529 } 530 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 531 txq_data->cqe_s = 1 << txq_data->cqe_n; 532 txq_data->cqe_m = txq_data->cqe_s - 1; 533 txq_data->qp_num_8s = tmpl.qp->qp_num << 8; 534 txq_data->wqes = qp.sq.buf; 535 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 536 txq_data->wqe_s = 1 << txq_data->wqe_n; 537 txq_data->wqe_m = txq_data->wqe_s - 1; 538 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 539 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 540 txq_data->cq_db = cq_info.dbrec; 541 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 542 txq_data->cq_ci = 0; 543 #ifndef NDEBUG 544 txq_data->cq_pi = 0; 545 #endif 546 txq_data->wqe_ci = 0; 547 txq_data->wqe_pi = 0; 548 txq_data->wqe_comp = 0; 549 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 550 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 551 /* 552 * If using DevX need to query and store TIS transport domain value. 553 * This is done once per port. 554 * Will use this value on Rx, when creating matching TIR. 555 */ 556 if (priv->config.devx && !priv->sh->tdn) { 557 ret = mlx5_devx_cmd_qp_query_tis_td(tmpl.qp, qp.tisn, 558 &priv->sh->tdn); 559 if (ret) { 560 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 561 "transport domain", dev->data->port_id, idx); 562 rte_errno = EINVAL; 563 goto error; 564 } else { 565 DRV_LOG(DEBUG, "port %u Tx queue %u TIS number %d " 566 "transport domain %d", dev->data->port_id, 567 idx, qp.tisn, priv->sh->tdn); 568 } 569 } 570 #endif 571 txq_ibv->qp = tmpl.qp; 572 txq_ibv->cq = tmpl.cq; 573 rte_atomic32_inc(&txq_ibv->refcnt); 574 txq_ctrl->bf_reg = qp.bf.reg; 575 txq_uar_init(txq_ctrl); 576 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 577 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 578 DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%"PRIx64, 579 dev->data->port_id, txq_ctrl->uar_mmap_offset); 580 } else { 581 DRV_LOG(ERR, 582 "port %u failed to retrieve UAR info, invalid" 583 " libmlx5.so", 584 dev->data->port_id); 585 rte_errno = EINVAL; 586 goto error; 587 } 588 LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next); 589 txq_ibv->txq_ctrl = txq_ctrl; 590 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 591 return txq_ibv; 592 error: 593 ret = rte_errno; /* Save rte_errno before cleanup. */ 594 if (tmpl.cq) 595 claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); 596 if (tmpl.qp) 597 claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); 598 if (txq_ibv) 599 rte_free(txq_ibv); 600 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 601 rte_errno = ret; /* Restore rte_errno. */ 602 return NULL; 603 } 604 605 /** 606 * Get an Tx queue Verbs object. 607 * 608 * @param dev 609 * Pointer to Ethernet device. 610 * @param idx 611 * Queue index in DPDK Tx queue array. 612 * 613 * @return 614 * The Verbs object if it exists. 615 */ 616 struct mlx5_txq_ibv * 617 mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) 618 { 619 struct mlx5_priv *priv = dev->data->dev_private; 620 struct mlx5_txq_ctrl *txq_ctrl; 621 622 if (idx >= priv->txqs_n) 623 return NULL; 624 if (!(*priv->txqs)[idx]) 625 return NULL; 626 txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 627 if (txq_ctrl->ibv) 628 rte_atomic32_inc(&txq_ctrl->ibv->refcnt); 629 return txq_ctrl->ibv; 630 } 631 632 /** 633 * Release an Tx verbs queue object. 634 * 635 * @param txq_ibv 636 * Verbs Tx queue object. 637 * 638 * @return 639 * 1 while a reference on it exists, 0 when freed. 640 */ 641 int 642 mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv) 643 { 644 assert(txq_ibv); 645 if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) { 646 claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp)); 647 claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq)); 648 LIST_REMOVE(txq_ibv, next); 649 rte_free(txq_ibv); 650 return 0; 651 } 652 return 1; 653 } 654 655 /** 656 * Verify the Verbs Tx queue list is empty 657 * 658 * @param dev 659 * Pointer to Ethernet device. 660 * 661 * @return 662 * The number of object not released. 663 */ 664 int 665 mlx5_txq_ibv_verify(struct rte_eth_dev *dev) 666 { 667 struct mlx5_priv *priv = dev->data->dev_private; 668 int ret = 0; 669 struct mlx5_txq_ibv *txq_ibv; 670 671 LIST_FOREACH(txq_ibv, &priv->txqsibv, next) { 672 DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", 673 dev->data->port_id, txq_ibv->txq_ctrl->txq.idx); 674 ++ret; 675 } 676 return ret; 677 } 678 679 /** 680 * Calculate the total number of WQEBB for Tx queue. 681 * 682 * Simplified version of calc_sq_size() in rdma-core. 683 * 684 * @param txq_ctrl 685 * Pointer to Tx queue control structure. 686 * 687 * @return 688 * The number of WQEBB. 689 */ 690 static int 691 txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl) 692 { 693 unsigned int wqe_size; 694 const unsigned int desc = 1 << txq_ctrl->txq.elts_n; 695 696 wqe_size = MLX5_WQE_CSEG_SIZE + 697 MLX5_WQE_ESEG_SIZE + 698 MLX5_WSEG_SIZE - 699 MLX5_ESEG_MIN_INLINE_SIZE + 700 txq_ctrl->max_inline_data; 701 return rte_align32pow2(wqe_size * desc) / MLX5_WQE_SIZE; 702 } 703 704 /** 705 * Set Tx queue parameters from device configuration. 706 * 707 * @param txq_ctrl 708 * Pointer to Tx queue control structure. 709 */ 710 static void 711 txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) 712 { 713 struct mlx5_priv *priv = txq_ctrl->priv; 714 struct mlx5_dev_config *config = &priv->config; 715 unsigned int inlen_send; /* Inline data for ordinary SEND.*/ 716 unsigned int inlen_empw; /* Inline data for enhanced MPW. */ 717 unsigned int inlen_mode; /* Minimal required Inline data. */ 718 unsigned int txqs_inline; /* Min Tx queues to enable inline. */ 719 uint64_t dev_txoff = priv->dev_data->dev_conf.txmode.offloads; 720 bool tso = txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO | 721 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 722 DEV_TX_OFFLOAD_GRE_TNL_TSO | 723 DEV_TX_OFFLOAD_IP_TNL_TSO | 724 DEV_TX_OFFLOAD_UDP_TNL_TSO); 725 bool vlan_inline; 726 unsigned int temp; 727 728 if (config->txqs_inline == MLX5_ARG_UNSET) 729 txqs_inline = 730 #if defined(RTE_ARCH_ARM64) 731 (priv->sh->pci_dev->id.device_id == 732 PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) ? 733 MLX5_INLINE_MAX_TXQS_BLUEFIELD : 734 #endif 735 MLX5_INLINE_MAX_TXQS; 736 else 737 txqs_inline = (unsigned int)config->txqs_inline; 738 inlen_send = (config->txq_inline_max == MLX5_ARG_UNSET) ? 739 MLX5_SEND_DEF_INLINE_LEN : 740 (unsigned int)config->txq_inline_max; 741 inlen_empw = (config->txq_inline_mpw == MLX5_ARG_UNSET) ? 742 MLX5_EMPW_DEF_INLINE_LEN : 743 (unsigned int)config->txq_inline_mpw; 744 inlen_mode = (config->txq_inline_min == MLX5_ARG_UNSET) ? 745 0 : (unsigned int)config->txq_inline_min; 746 if (config->mps != MLX5_MPW_ENHANCED) 747 inlen_empw = 0; 748 /* 749 * If there is requested minimal amount of data to inline 750 * we MUST enable inlining. This is a case for ConnectX-4 751 * which usually requires L2 inlined for correct operating 752 * and ConnectX-4LX which requires L2-L4 inlined to 753 * support E-Switch Flows. 754 */ 755 if (inlen_mode) { 756 if (inlen_mode <= MLX5_ESEG_MIN_INLINE_SIZE) { 757 /* 758 * Optimize minimal inlining for single 759 * segment packets to fill one WQEBB 760 * without gaps. 761 */ 762 temp = MLX5_ESEG_MIN_INLINE_SIZE; 763 } else { 764 temp = inlen_mode - MLX5_ESEG_MIN_INLINE_SIZE; 765 temp = RTE_ALIGN(temp, MLX5_WSEG_SIZE) + 766 MLX5_ESEG_MIN_INLINE_SIZE; 767 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 768 } 769 if (temp != inlen_mode) { 770 DRV_LOG(INFO, 771 "port %u minimal required inline setting" 772 " aligned from %u to %u", 773 PORT_ID(priv), inlen_mode, temp); 774 inlen_mode = temp; 775 } 776 } 777 /* 778 * If port is configured to support VLAN insertion and device 779 * does not support this feature by HW (for NICs before ConnectX-5 780 * or in case of wqe_vlan_insert flag is not set) we must enable 781 * data inline on all queues because it is supported by single 782 * tx_burst routine. 783 */ 784 txq_ctrl->txq.vlan_en = config->hw_vlan_insert; 785 vlan_inline = (dev_txoff & DEV_TX_OFFLOAD_VLAN_INSERT) && 786 !config->hw_vlan_insert; 787 /* 788 * If there are few Tx queues it is prioritized 789 * to save CPU cycles and disable data inlining at all. 790 */ 791 if (inlen_send && priv->txqs_n >= txqs_inline) { 792 /* 793 * The data sent with ordinal MLX5_OPCODE_SEND 794 * may be inlined in Ethernet Segment, align the 795 * length accordingly to fit entire WQEBBs. 796 */ 797 temp = (inlen_send / MLX5_WQE_SIZE) * MLX5_WQE_SIZE + 798 MLX5_ESEG_MIN_INLINE_SIZE + MLX5_WQE_DSEG_SIZE; 799 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 800 MLX5_ESEG_MIN_INLINE_SIZE - 801 MLX5_WQE_CSEG_SIZE - 802 MLX5_WQE_ESEG_SIZE - 803 MLX5_WQE_DSEG_SIZE * 2); 804 temp = RTE_MIN(temp, MLX5_SEND_MAX_INLINE_LEN); 805 temp = RTE_MAX(temp, inlen_mode); 806 if (temp != inlen_send) { 807 DRV_LOG(INFO, 808 "port %u ordinary send inline setting" 809 " aligned from %u to %u", 810 PORT_ID(priv), inlen_send, temp); 811 inlen_send = temp; 812 } 813 /* 814 * Not aligned to cache lines, but to WQEs. 815 * First bytes of data (initial alignment) 816 * is going to be copied explicitly at the 817 * beginning of inlining buffer in Ethernet 818 * Segment. 819 */ 820 assert(inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 821 assert(inlen_send <= MLX5_WQE_SIZE_MAX + 822 MLX5_ESEG_MIN_INLINE_SIZE - 823 MLX5_WQE_CSEG_SIZE - 824 MLX5_WQE_ESEG_SIZE - 825 MLX5_WQE_DSEG_SIZE * 2); 826 } else if (inlen_mode) { 827 /* 828 * If minimal inlining is requested we must 829 * enable inlining in general, despite the 830 * number of configured queues. Ignore the 831 * txq_inline_max devarg, this is not 832 * full-featured inline. 833 */ 834 inlen_send = inlen_mode; 835 inlen_empw = 0; 836 } else if (vlan_inline) { 837 /* 838 * Hardware does not report offload for 839 * VLAN insertion, we must enable data inline 840 * to implement feature by software. 841 */ 842 inlen_send = MLX5_ESEG_MIN_INLINE_SIZE; 843 inlen_empw = 0; 844 } else { 845 inlen_send = 0; 846 inlen_empw = 0; 847 } 848 txq_ctrl->txq.inlen_send = inlen_send; 849 txq_ctrl->txq.inlen_mode = inlen_mode; 850 txq_ctrl->txq.inlen_empw = 0; 851 if (inlen_send && inlen_empw && priv->txqs_n >= txqs_inline) { 852 /* 853 * The data sent with MLX5_OPCODE_ENHANCED_MPSW 854 * may be inlined in Data Segment, align the 855 * length accordingly to fit entire WQEBBs. 856 */ 857 temp = (inlen_empw + MLX5_WQE_SIZE - 1) / MLX5_WQE_SIZE; 858 temp = temp * MLX5_WQE_SIZE + 859 MLX5_DSEG_MIN_INLINE_SIZE - MLX5_WQE_DSEG_SIZE; 860 temp = RTE_MIN(temp, MLX5_WQE_SIZE_MAX + 861 MLX5_DSEG_MIN_INLINE_SIZE - 862 MLX5_WQE_CSEG_SIZE - 863 MLX5_WQE_ESEG_SIZE - 864 MLX5_WQE_DSEG_SIZE); 865 temp = RTE_MIN(temp, MLX5_EMPW_MAX_INLINE_LEN); 866 if (temp != inlen_empw) { 867 DRV_LOG(INFO, 868 "port %u enhanced empw inline setting" 869 " aligned from %u to %u", 870 PORT_ID(priv), inlen_empw, temp); 871 inlen_empw = temp; 872 } 873 assert(inlen_empw >= MLX5_ESEG_MIN_INLINE_SIZE); 874 assert(inlen_empw <= MLX5_WQE_SIZE_MAX + 875 MLX5_DSEG_MIN_INLINE_SIZE - 876 MLX5_WQE_CSEG_SIZE - 877 MLX5_WQE_ESEG_SIZE - 878 MLX5_WQE_DSEG_SIZE); 879 txq_ctrl->txq.inlen_empw = inlen_empw; 880 } 881 txq_ctrl->max_inline_data = RTE_MAX(inlen_send, inlen_empw); 882 if (tso) { 883 txq_ctrl->max_tso_header = MLX5_MAX_TSO_HEADER; 884 txq_ctrl->max_inline_data = RTE_MAX(txq_ctrl->max_inline_data, 885 MLX5_MAX_TSO_HEADER); 886 txq_ctrl->txq.tso_en = 1; 887 } 888 txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp; 889 txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO | 890 DEV_TX_OFFLOAD_UDP_TNL_TSO | 891 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) & 892 txq_ctrl->txq.offloads) && config->swp; 893 } 894 895 /** 896 * Create a DPDK Tx queue. 897 * 898 * @param dev 899 * Pointer to Ethernet device. 900 * @param idx 901 * TX queue index. 902 * @param desc 903 * Number of descriptors to configure in queue. 904 * @param socket 905 * NUMA socket on which memory must be allocated. 906 * @param[in] conf 907 * Thresholds parameters. 908 * 909 * @return 910 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 911 */ 912 struct mlx5_txq_ctrl * 913 mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 914 unsigned int socket, const struct rte_eth_txconf *conf) 915 { 916 struct mlx5_priv *priv = dev->data->dev_private; 917 struct mlx5_txq_ctrl *tmpl; 918 919 tmpl = rte_calloc_socket("TXQ", 1, 920 sizeof(*tmpl) + 921 desc * sizeof(struct rte_mbuf *), 922 0, socket); 923 if (!tmpl) { 924 rte_errno = ENOMEM; 925 return NULL; 926 } 927 if (mlx5_mr_btree_init(&tmpl->txq.mr_ctrl.cache_bh, 928 MLX5_MR_BTREE_CACHE_N, socket)) { 929 /* rte_errno is already set. */ 930 goto error; 931 } 932 /* Save pointer of global generation number to check memory event. */ 933 tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->sh->mr.dev_gen; 934 assert(desc > MLX5_TX_COMP_THRESH); 935 tmpl->txq.offloads = conf->offloads | 936 dev->data->dev_conf.txmode.offloads; 937 tmpl->priv = priv; 938 tmpl->socket = socket; 939 tmpl->txq.elts_n = log2above(desc); 940 tmpl->txq.elts_s = desc; 941 tmpl->txq.elts_m = desc - 1; 942 tmpl->txq.port_id = dev->data->port_id; 943 tmpl->txq.idx = idx; 944 txq_set_params(tmpl); 945 if (txq_calc_wqebb_cnt(tmpl) > 946 priv->sh->device_attr.orig_attr.max_qp_wr) { 947 DRV_LOG(ERR, 948 "port %u Tx WQEBB count (%d) exceeds the limit (%d)," 949 " try smaller queue size", 950 dev->data->port_id, txq_calc_wqebb_cnt(tmpl), 951 priv->sh->device_attr.orig_attr.max_qp_wr); 952 rte_errno = ENOMEM; 953 goto error; 954 } 955 rte_atomic32_inc(&tmpl->refcnt); 956 LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); 957 return tmpl; 958 error: 959 rte_free(tmpl); 960 return NULL; 961 } 962 963 /** 964 * Get a Tx queue. 965 * 966 * @param dev 967 * Pointer to Ethernet device. 968 * @param idx 969 * TX queue index. 970 * 971 * @return 972 * A pointer to the queue if it exists. 973 */ 974 struct mlx5_txq_ctrl * 975 mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) 976 { 977 struct mlx5_priv *priv = dev->data->dev_private; 978 struct mlx5_txq_ctrl *ctrl = NULL; 979 980 if ((*priv->txqs)[idx]) { 981 ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, 982 txq); 983 mlx5_txq_ibv_get(dev, idx); 984 rte_atomic32_inc(&ctrl->refcnt); 985 } 986 return ctrl; 987 } 988 989 /** 990 * Release a Tx queue. 991 * 992 * @param dev 993 * Pointer to Ethernet device. 994 * @param idx 995 * TX queue index. 996 * 997 * @return 998 * 1 while a reference on it exists, 0 when freed. 999 */ 1000 int 1001 mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) 1002 { 1003 struct mlx5_priv *priv = dev->data->dev_private; 1004 struct mlx5_txq_ctrl *txq; 1005 1006 if (!(*priv->txqs)[idx]) 1007 return 0; 1008 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1009 if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv)) 1010 txq->ibv = NULL; 1011 if (rte_atomic32_dec_and_test(&txq->refcnt)) { 1012 txq_free_elts(txq); 1013 mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh); 1014 LIST_REMOVE(txq, next); 1015 rte_free(txq); 1016 (*priv->txqs)[idx] = NULL; 1017 return 0; 1018 } 1019 return 1; 1020 } 1021 1022 /** 1023 * Verify if the queue can be released. 1024 * 1025 * @param dev 1026 * Pointer to Ethernet device. 1027 * @param idx 1028 * TX queue index. 1029 * 1030 * @return 1031 * 1 if the queue can be released. 1032 */ 1033 int 1034 mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1035 { 1036 struct mlx5_priv *priv = dev->data->dev_private; 1037 struct mlx5_txq_ctrl *txq; 1038 1039 if (!(*priv->txqs)[idx]) 1040 return -1; 1041 txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); 1042 return (rte_atomic32_read(&txq->refcnt) == 1); 1043 } 1044 1045 /** 1046 * Verify the Tx Queue list is empty 1047 * 1048 * @param dev 1049 * Pointer to Ethernet device. 1050 * 1051 * @return 1052 * The number of object not released. 1053 */ 1054 int 1055 mlx5_txq_verify(struct rte_eth_dev *dev) 1056 { 1057 struct mlx5_priv *priv = dev->data->dev_private; 1058 struct mlx5_txq_ctrl *txq_ctrl; 1059 int ret = 0; 1060 1061 LIST_FOREACH(txq_ctrl, &priv->txqsctrl, next) { 1062 DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", 1063 dev->data->port_id, txq_ctrl->txq.idx); 1064 ++ret; 1065 } 1066 return ret; 1067 } 1068