1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 /** 7 * @file 8 * Tx queues configuration for mlx4 driver. 9 */ 10 11 #include <errno.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 #include <string.h> 15 #include <sys/mman.h> 16 #include <inttypes.h> 17 #include <unistd.h> 18 19 /* Verbs headers do not support -pedantic. */ 20 #ifdef PEDANTIC 21 #pragma GCC diagnostic ignored "-Wpedantic" 22 #endif 23 #include <infiniband/verbs.h> 24 #ifdef PEDANTIC 25 #pragma GCC diagnostic error "-Wpedantic" 26 #endif 27 28 #include <rte_common.h> 29 #include <rte_errno.h> 30 #include <ethdev_driver.h> 31 #include <rte_malloc.h> 32 #include <rte_mbuf.h> 33 #include <rte_mempool.h> 34 35 #include "mlx4.h" 36 #include "mlx4_glue.h" 37 #include "mlx4_prm.h" 38 #include "mlx4_rxtx.h" 39 #include "mlx4_utils.h" 40 41 /** 42 * Initialize Tx UAR registers for primary process. 43 * 44 * @param txq 45 * Pointer to Tx queue structure. 46 */ 47 static void 48 txq_uar_init(struct txq *txq) 49 { 50 struct mlx4_priv *priv = txq->priv; 51 struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv)); 52 53 MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 54 MLX4_ASSERT(ppriv); 55 ppriv->uar_table[txq->stats.idx] = txq->msq.db; 56 } 57 58 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET 59 /** 60 * Remap UAR register of a Tx queue for secondary process. 61 * 62 * Remapped address is stored at the table in the process private structure of 63 * the device, indexed by queue index. 64 * 65 * @param txq 66 * Pointer to Tx queue structure. 67 * @param fd 68 * Verbs file descriptor to map UAR pages. 69 * 70 * @return 71 * 0 on success, a negative errno value otherwise and rte_errno is set. 72 */ 73 static int 74 txq_uar_init_secondary(struct txq *txq, int fd) 75 { 76 struct mlx4_priv *priv = txq->priv; 77 struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv)); 78 void *addr; 79 uintptr_t uar_va; 80 uintptr_t offset; 81 const size_t page_size = sysconf(_SC_PAGESIZE); 82 83 MLX4_ASSERT(ppriv); 84 /* 85 * As rdma-core, UARs are mapped in size of OS page 86 * size. Ref to libmlx4 function: mlx4_init_context() 87 */ 88 uar_va = (uintptr_t)txq->msq.db; 89 offset = uar_va & (page_size - 1); /* Offset in page. */ 90 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 91 txq->msq.uar_mmap_offset); 92 if (addr == MAP_FAILED) { 93 ERROR("port %u mmap failed for BF reg of txq %u", 94 txq->port_id, txq->stats.idx); 95 rte_errno = ENXIO; 96 return -rte_errno; 97 } 98 addr = RTE_PTR_ADD(addr, offset); 99 ppriv->uar_table[txq->stats.idx] = addr; 100 return 0; 101 } 102 103 /** 104 * Unmap UAR register of a Tx queue for secondary process. 105 * 106 * @param txq 107 * Pointer to Tx queue structure. 108 */ 109 static void 110 txq_uar_uninit_secondary(struct txq *txq) 111 { 112 struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv)); 113 const size_t page_size = sysconf(_SC_PAGESIZE); 114 void *addr; 115 116 addr = ppriv->uar_table[txq->stats.idx]; 117 munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 118 } 119 120 /** 121 * Initialize Tx UAR registers for secondary process. 122 * 123 * @param dev 124 * Pointer to Ethernet device. 125 * @param fd 126 * Verbs file descriptor to map UAR pages. 127 * 128 * @return 129 * 0 on success, a negative errno value otherwise and rte_errno is set. 130 */ 131 int 132 mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) 133 { 134 const unsigned int txqs_n = dev->data->nb_tx_queues; 135 struct txq *txq; 136 unsigned int i; 137 int ret; 138 139 MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 140 for (i = 0; i != txqs_n; ++i) { 141 txq = dev->data->tx_queues[i]; 142 if (!txq) 143 continue; 144 MLX4_ASSERT(txq->stats.idx == (uint16_t)i); 145 ret = txq_uar_init_secondary(txq, fd); 146 if (ret) 147 goto error; 148 } 149 return 0; 150 error: 151 /* Rollback. */ 152 do { 153 txq = dev->data->tx_queues[i]; 154 if (!txq) 155 continue; 156 txq_uar_uninit_secondary(txq); 157 } while (i--); 158 return -rte_errno; 159 } 160 161 void 162 mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev) 163 { 164 struct mlx4_proc_priv *ppriv = 165 (struct mlx4_proc_priv *)dev->process_private; 166 const size_t page_size = sysconf(_SC_PAGESIZE); 167 void *addr; 168 size_t i; 169 170 if (page_size == (size_t)-1) { 171 ERROR("Failed to get mem page size"); 172 return; 173 } 174 for (i = 0; i < ppriv->uar_table_sz; i++) { 175 addr = ppriv->uar_table[i]; 176 if (addr) 177 munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); 178 } 179 } 180 181 #else 182 int 183 mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused, 184 int fd __rte_unused) 185 { 186 MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); 187 ERROR("UAR remap is not supported"); 188 rte_errno = ENOTSUP; 189 return -rte_errno; 190 } 191 192 void 193 mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev __rte_unused) 194 { 195 assert(rte_eal_process_type() == RTE_PROC_SECONDARY); 196 ERROR("UAR remap is not supported"); 197 } 198 #endif 199 200 /** 201 * Free Tx queue elements. 202 * 203 * @param txq 204 * Pointer to Tx queue structure. 205 */ 206 static void 207 mlx4_txq_free_elts(struct txq *txq) 208 { 209 unsigned int elts_head = txq->elts_head; 210 unsigned int elts_tail = txq->elts_tail; 211 struct txq_elt (*elts)[txq->elts_n] = txq->elts; 212 unsigned int elts_m = txq->elts_n - 1; 213 214 DEBUG("%p: freeing WRs", (void *)txq); 215 while (elts_tail != elts_head) { 216 struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m]; 217 218 MLX4_ASSERT(elt->buf != NULL); 219 rte_pktmbuf_free(elt->buf); 220 elt->buf = NULL; 221 elt->wqe = NULL; 222 } 223 txq->elts_tail = txq->elts_head; 224 } 225 226 /** 227 * Retrieves information needed in order to directly access the Tx queue. 228 * 229 * @param txq 230 * Pointer to Tx queue structure. 231 * @param mlxdv 232 * Pointer to device information for this Tx queue. 233 */ 234 static void 235 mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv) 236 { 237 struct mlx4_sq *sq = &txq->msq; 238 struct mlx4_cq *cq = &txq->mcq; 239 struct mlx4dv_qp *dqp = mlxdv->qp.out; 240 struct mlx4dv_cq *dcq = mlxdv->cq.out; 241 242 /* Total length, including headroom and spare WQEs. */ 243 sq->size = (uint32_t)dqp->rq.offset - (uint32_t)dqp->sq.offset; 244 sq->buf = (uint8_t *)dqp->buf.buf + dqp->sq.offset; 245 sq->eob = sq->buf + sq->size; 246 uint32_t headroom_size = 2048 + (1 << dqp->sq.wqe_shift); 247 /* Continuous headroom size bytes must always stay freed. */ 248 sq->remain_size = sq->size - headroom_size; 249 sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT); 250 sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL | 251 (0u << MLX4_SQ_OWNER_BIT)); 252 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET 253 sq->uar_mmap_offset = dqp->uar_mmap_offset; 254 #else 255 sq->uar_mmap_offset = -1; /* Make mmap() fail. */ 256 #endif 257 sq->db = dqp->sdb; 258 sq->doorbell_qpn = dqp->doorbell_qpn; 259 cq->buf = dcq->buf.buf; 260 cq->cqe_cnt = dcq->cqe_cnt; 261 cq->set_ci_db = dcq->set_ci_db; 262 cq->cqe_64 = (dcq->cqe_size & 64) ? 1 : 0; 263 } 264 265 /** 266 * Returns the per-port supported offloads. 267 * 268 * @param priv 269 * Pointer to private structure. 270 * 271 * @return 272 * Supported Tx offloads. 273 */ 274 uint64_t 275 mlx4_get_tx_port_offloads(struct mlx4_priv *priv) 276 { 277 uint64_t offloads = DEV_TX_OFFLOAD_MULTI_SEGS; 278 279 if (priv->hw_csum) { 280 offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | 281 DEV_TX_OFFLOAD_UDP_CKSUM | 282 DEV_TX_OFFLOAD_TCP_CKSUM); 283 } 284 if (priv->tso) 285 offloads |= DEV_TX_OFFLOAD_TCP_TSO; 286 if (priv->hw_csum_l2tun) { 287 offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; 288 if (priv->tso) 289 offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 290 DEV_TX_OFFLOAD_GRE_TNL_TSO); 291 } 292 return offloads; 293 } 294 295 /** 296 * DPDK callback to configure a Tx queue. 297 * 298 * @param dev 299 * Pointer to Ethernet device structure. 300 * @param idx 301 * Tx queue index. 302 * @param desc 303 * Number of descriptors to configure in queue. 304 * @param socket 305 * NUMA socket on which memory must be allocated. 306 * @param[in] conf 307 * Thresholds parameters. 308 * 309 * @return 310 * 0 on success, negative errno value otherwise and rte_errno is set. 311 */ 312 int 313 mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 314 unsigned int socket, const struct rte_eth_txconf *conf) 315 { 316 struct mlx4_priv *priv = dev->data->dev_private; 317 struct mlx4dv_obj mlxdv; 318 struct mlx4dv_qp dv_qp; 319 struct mlx4dv_cq dv_cq; 320 struct txq_elt (*elts)[rte_align32pow2(desc)]; 321 struct ibv_qp_init_attr qp_init_attr; 322 struct txq *txq; 323 uint8_t *bounce_buf; 324 struct mlx4_malloc_vec vec[] = { 325 { 326 .align = RTE_CACHE_LINE_SIZE, 327 .size = sizeof(*txq), 328 .addr = (void **)&txq, 329 }, 330 { 331 .align = RTE_CACHE_LINE_SIZE, 332 .size = sizeof(*elts), 333 .addr = (void **)&elts, 334 }, 335 { 336 .align = RTE_CACHE_LINE_SIZE, 337 .size = MLX4_MAX_WQE_SIZE, 338 .addr = (void **)&bounce_buf, 339 }, 340 }; 341 int ret; 342 uint64_t offloads; 343 344 offloads = conf->offloads | dev->data->dev_conf.txmode.offloads; 345 DEBUG("%p: configuring queue %u for %u descriptors", 346 (void *)dev, idx, desc); 347 if (idx >= dev->data->nb_tx_queues) { 348 rte_errno = EOVERFLOW; 349 ERROR("%p: queue index out of range (%u >= %u)", 350 (void *)dev, idx, dev->data->nb_tx_queues); 351 return -rte_errno; 352 } 353 txq = dev->data->tx_queues[idx]; 354 if (txq) { 355 rte_errno = EEXIST; 356 DEBUG("%p: Tx queue %u already configured, release it first", 357 (void *)dev, idx); 358 return -rte_errno; 359 } 360 if (!desc) { 361 rte_errno = EINVAL; 362 ERROR("%p: invalid number of Tx descriptors", (void *)dev); 363 return -rte_errno; 364 } 365 if (desc != RTE_DIM(*elts)) { 366 desc = RTE_DIM(*elts); 367 WARN("%p: increased number of descriptors in Tx queue %u" 368 " to the next power of two (%u)", 369 (void *)dev, idx, desc); 370 } 371 /* Allocate and initialize Tx queue. */ 372 mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket); 373 if (!txq) { 374 ERROR("%p: unable to allocate queue index %u", 375 (void *)dev, idx); 376 return -rte_errno; 377 } 378 *txq = (struct txq){ 379 .priv = priv, 380 .port_id = dev->data->port_id, 381 .stats = { 382 .idx = idx, 383 }, 384 .socket = socket, 385 .elts_n = desc, 386 .elts = elts, 387 .elts_head = 0, 388 .elts_tail = 0, 389 /* 390 * Request send completion every MLX4_PMD_TX_PER_COMP_REQ 391 * packets or at least 4 times per ring. 392 */ 393 .elts_comp_cd = 394 RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4), 395 .elts_comp_cd_init = 396 RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4), 397 .csum = priv->hw_csum && 398 (offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 399 DEV_TX_OFFLOAD_UDP_CKSUM | 400 DEV_TX_OFFLOAD_TCP_CKSUM)), 401 .csum_l2tun = priv->hw_csum_l2tun && 402 (offloads & 403 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM), 404 /* Enable Tx loopback for VF devices. */ 405 .lb = !!priv->vf, 406 .bounce_buf = bounce_buf, 407 }; 408 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE; 409 priv->verbs_alloc_ctx.obj = txq; 410 txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0); 411 if (!txq->cq) { 412 rte_errno = ENOMEM; 413 ERROR("%p: CQ creation failure: %s", 414 (void *)dev, strerror(rte_errno)); 415 goto error; 416 } 417 qp_init_attr = (struct ibv_qp_init_attr){ 418 .send_cq = txq->cq, 419 .recv_cq = txq->cq, 420 .cap = { 421 .max_send_wr = 422 RTE_MIN(priv->device_attr.max_qp_wr, desc), 423 .max_send_sge = 1, 424 .max_inline_data = MLX4_PMD_MAX_INLINE, 425 }, 426 .qp_type = IBV_QPT_RAW_PACKET, 427 /* No completion events must occur by default. */ 428 .sq_sig_all = 0, 429 }; 430 txq->qp = mlx4_glue->create_qp(priv->pd, &qp_init_attr); 431 if (!txq->qp) { 432 rte_errno = errno ? errno : EINVAL; 433 ERROR("%p: QP creation failure: %s", 434 (void *)dev, strerror(rte_errno)); 435 goto error; 436 } 437 txq->max_inline = qp_init_attr.cap.max_inline_data; 438 ret = mlx4_glue->modify_qp 439 (txq->qp, 440 &(struct ibv_qp_attr){ 441 .qp_state = IBV_QPS_INIT, 442 .port_num = priv->port, 443 }, 444 IBV_QP_STATE | IBV_QP_PORT); 445 if (ret) { 446 rte_errno = ret; 447 ERROR("%p: QP state to IBV_QPS_INIT failed: %s", 448 (void *)dev, strerror(rte_errno)); 449 goto error; 450 } 451 ret = mlx4_glue->modify_qp 452 (txq->qp, 453 &(struct ibv_qp_attr){ 454 .qp_state = IBV_QPS_RTR, 455 }, 456 IBV_QP_STATE); 457 if (ret) { 458 rte_errno = ret; 459 ERROR("%p: QP state to IBV_QPS_RTR failed: %s", 460 (void *)dev, strerror(rte_errno)); 461 goto error; 462 } 463 ret = mlx4_glue->modify_qp 464 (txq->qp, 465 &(struct ibv_qp_attr){ 466 .qp_state = IBV_QPS_RTS, 467 }, 468 IBV_QP_STATE); 469 if (ret) { 470 rte_errno = ret; 471 ERROR("%p: QP state to IBV_QPS_RTS failed: %s", 472 (void *)dev, strerror(rte_errno)); 473 goto error; 474 } 475 /* Retrieve device queue information. */ 476 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET 477 dv_qp = (struct mlx4dv_qp){ 478 .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET, 479 }; 480 #endif 481 mlxdv.cq.in = txq->cq; 482 mlxdv.cq.out = &dv_cq; 483 mlxdv.qp.in = txq->qp; 484 mlxdv.qp.out = &dv_qp; 485 ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ); 486 if (ret) { 487 rte_errno = EINVAL; 488 ERROR("%p: failed to obtain information needed for" 489 " accessing the device queues", (void *)dev); 490 goto error; 491 } 492 #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET 493 if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) { 494 WARN("%p: failed to obtain UAR mmap offset", (void *)dev); 495 dv_qp.uar_mmap_offset = -1; /* Make mmap() fail. */ 496 } 497 #endif 498 mlx4_txq_fill_dv_obj_info(txq, &mlxdv); 499 txq_uar_init(txq); 500 /* Save first wqe pointer in the first element. */ 501 (&(*txq->elts)[0])->wqe = 502 (volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf; 503 if (mlx4_mr_btree_init(&txq->mr_ctrl.cache_bh, 504 MLX4_MR_BTREE_CACHE_N, socket)) { 505 /* rte_errno is already set. */ 506 goto error; 507 } 508 /* Save pointer of global generation number to check memory event. */ 509 txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen; 510 DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq); 511 dev->data->tx_queues[idx] = txq; 512 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE; 513 return 0; 514 error: 515 dev->data->tx_queues[idx] = NULL; 516 ret = rte_errno; 517 mlx4_tx_queue_release(txq); 518 rte_errno = ret; 519 MLX4_ASSERT(rte_errno > 0); 520 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE; 521 return -rte_errno; 522 } 523 524 /** 525 * DPDK callback to release a Tx queue. 526 * 527 * @param dpdk_txq 528 * Generic Tx queue pointer. 529 */ 530 void 531 mlx4_tx_queue_release(void *dpdk_txq) 532 { 533 struct txq *txq = (struct txq *)dpdk_txq; 534 struct mlx4_priv *priv; 535 unsigned int i; 536 537 if (txq == NULL) 538 return; 539 priv = txq->priv; 540 for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i) 541 if (ETH_DEV(priv)->data->tx_queues[i] == txq) { 542 DEBUG("%p: removing Tx queue %p from list", 543 (void *)ETH_DEV(priv), (void *)txq); 544 ETH_DEV(priv)->data->tx_queues[i] = NULL; 545 break; 546 } 547 mlx4_txq_free_elts(txq); 548 if (txq->qp) 549 claim_zero(mlx4_glue->destroy_qp(txq->qp)); 550 if (txq->cq) 551 claim_zero(mlx4_glue->destroy_cq(txq->cq)); 552 mlx4_mr_btree_free(&txq->mr_ctrl.cache_bh); 553 rte_free(txq); 554 } 555