1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <stddef.h> 6 #include <errno.h> 7 #include <string.h> 8 #include <stdint.h> 9 #include <unistd.h> 10 #include <inttypes.h> 11 #include <sys/queue.h> 12 13 #include "mlx5_autoconf.h" 14 15 #include <rte_mbuf.h> 16 #include <rte_malloc.h> 17 #include <rte_ethdev_driver.h> 18 #include <rte_common.h> 19 20 #include <mlx5_glue.h> 21 #include <mlx5_common.h> 22 #include <mlx5_common_mr.h> 23 #include <mlx5_rxtx.h> 24 #include <mlx5_verbs.h> 25 #include <mlx5_utils.h> 26 #include <mlx5_malloc.h> 27 28 /** 29 * Register mr. Given protection domain pointer, pointer to addr and length 30 * register the memory region. 31 * 32 * @param[in] pd 33 * Pointer to protection domain context. 34 * @param[in] addr 35 * Pointer to memory start address. 36 * @param[in] length 37 * Length of the memory to register. 38 * @param[out] pmd_mr 39 * pmd_mr struct set with lkey, address, length and pointer to mr object 40 * 41 * @return 42 * 0 on successful registration, -1 otherwise 43 */ 44 static int 45 mlx5_reg_mr(void *pd, void *addr, size_t length, 46 struct mlx5_pmd_mr *pmd_mr) 47 { 48 return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr); 49 } 50 51 /** 52 * Deregister mr. Given the mlx5 pmd MR - deregister the MR 53 * 54 * @param[in] pmd_mr 55 * pmd_mr struct set with lkey, address, length and pointer to mr object 56 * 57 */ 58 static void 59 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr) 60 { 61 mlx5_common_verbs_dereg_mr(pmd_mr); 62 } 63 64 /* verbs operations. */ 65 const struct mlx5_verbs_ops mlx5_verbs_ops = { 66 .reg_mr = mlx5_reg_mr, 67 .dereg_mr = mlx5_dereg_mr, 68 }; 69 70 /** 71 * Modify Rx WQ vlan stripping offload 72 * 73 * @param rxq_obj 74 * Rx queue object. 75 * 76 * @return 0 on success, non-0 otherwise 77 */ 78 static int 79 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on) 80 { 81 uint16_t vlan_offloads = 82 (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) | 83 0; 84 struct ibv_wq_attr mod; 85 mod = (struct ibv_wq_attr){ 86 .attr_mask = IBV_WQ_ATTR_FLAGS, 87 .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING, 88 .flags = vlan_offloads, 89 }; 90 91 return mlx5_glue->modify_wq(rxq_obj->wq, &mod); 92 } 93 94 /** 95 * Modifies the attributes for the specified WQ. 96 * 97 * @param rxq_obj 98 * Verbs Rx queue object. 99 * @param type 100 * Type of change queue state. 101 * 102 * @return 103 * 0 on success, a negative errno value otherwise and rte_errno is set. 104 */ 105 static int 106 mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, uint8_t type) 107 { 108 struct ibv_wq_attr mod = { 109 .attr_mask = IBV_WQ_ATTR_STATE, 110 .wq_state = (enum ibv_wq_state)type, 111 }; 112 113 return mlx5_glue->modify_wq(rxq_obj->wq, &mod); 114 } 115 116 /** 117 * Modify QP using Verbs API. 118 * 119 * @param txq_obj 120 * Verbs Tx queue object. 121 * @param type 122 * Type of change queue state. 123 * @param dev_port 124 * IB device port number. 125 * 126 * @return 127 * 0 on success, a negative errno value otherwise and rte_errno is set. 128 */ 129 static int 130 mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type, 131 uint8_t dev_port) 132 { 133 struct ibv_qp_attr mod = { 134 .qp_state = IBV_QPS_RESET, 135 .port_num = dev_port, 136 }; 137 int attr_mask = (IBV_QP_STATE | IBV_QP_PORT); 138 int ret; 139 140 if (type != MLX5_TXQ_MOD_RST2RDY) { 141 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE); 142 if (ret) { 143 DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s", 144 strerror(errno)); 145 rte_errno = errno; 146 return ret; 147 } 148 if (type == MLX5_TXQ_MOD_RDY2RST) 149 return 0; 150 } 151 if (type == MLX5_TXQ_MOD_ERR2RDY) 152 attr_mask = IBV_QP_STATE; 153 mod.qp_state = IBV_QPS_INIT; 154 ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask); 155 if (ret) { 156 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", 157 strerror(errno)); 158 rte_errno = errno; 159 return ret; 160 } 161 mod.qp_state = IBV_QPS_RTR; 162 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE); 163 if (ret) { 164 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s", 165 strerror(errno)); 166 rte_errno = errno; 167 return ret; 168 } 169 mod.qp_state = IBV_QPS_RTS; 170 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE); 171 if (ret) { 172 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s", 173 strerror(errno)); 174 rte_errno = errno; 175 return ret; 176 } 177 return 0; 178 } 179 180 /** 181 * Create a CQ Verbs object. 182 * 183 * @param dev 184 * Pointer to Ethernet device. 185 * @param idx 186 * Queue index in DPDK Rx queue array. 187 * 188 * @return 189 * The Verbs CQ object initialized, NULL otherwise and rte_errno is set. 190 */ 191 static struct ibv_cq * 192 mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx) 193 { 194 struct mlx5_priv *priv = dev->data->dev_private; 195 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 196 struct mlx5_rxq_ctrl *rxq_ctrl = 197 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 198 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj; 199 unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data); 200 struct { 201 struct ibv_cq_init_attr_ex ibv; 202 struct mlx5dv_cq_init_attr mlx5; 203 } cq_attr; 204 205 cq_attr.ibv = (struct ibv_cq_init_attr_ex){ 206 .cqe = cqe_n, 207 .channel = rxq_obj->ibv_channel, 208 .comp_mask = 0, 209 }; 210 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){ 211 .comp_mask = 0, 212 }; 213 if (priv->config.cqe_comp && !rxq_data->hw_timestamp) { 214 cq_attr.mlx5.comp_mask |= 215 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 216 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 217 cq_attr.mlx5.cqe_comp_res_format = 218 mlx5_rxq_mprq_enabled(rxq_data) ? 219 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 220 MLX5DV_CQE_RES_FORMAT_HASH; 221 #else 222 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 223 #endif 224 /* 225 * For vectorized Rx, it must not be doubled in order to 226 * make cq_ci and rq_ci aligned. 227 */ 228 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 229 cq_attr.ibv.cqe *= 2; 230 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) { 231 DRV_LOG(DEBUG, 232 "Port %u Rx CQE compression is disabled for HW" 233 " timestamp.", 234 dev->data->port_id); 235 } 236 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 237 if (priv->config.cqe_pad) { 238 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; 239 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 240 } 241 #endif 242 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx, 243 &cq_attr.ibv, 244 &cq_attr.mlx5)); 245 } 246 247 /** 248 * Create a WQ Verbs object. 249 * 250 * @param dev 251 * Pointer to Ethernet device. 252 * @param idx 253 * Queue index in DPDK Rx queue array. 254 * 255 * @return 256 * The Verbs WQ object initialized, NULL otherwise and rte_errno is set. 257 */ 258 static struct ibv_wq * 259 mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx) 260 { 261 struct mlx5_priv *priv = dev->data->dev_private; 262 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 263 struct mlx5_rxq_ctrl *rxq_ctrl = 264 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 265 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj; 266 unsigned int wqe_n = 1 << rxq_data->elts_n; 267 struct { 268 struct ibv_wq_init_attr ibv; 269 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 270 struct mlx5dv_wq_init_attr mlx5; 271 #endif 272 } wq_attr; 273 274 wq_attr.ibv = (struct ibv_wq_init_attr){ 275 .wq_context = NULL, /* Could be useful in the future. */ 276 .wq_type = IBV_WQT_RQ, 277 /* Max number of outstanding WRs. */ 278 .max_wr = wqe_n >> rxq_data->sges_n, 279 /* Max number of scatter/gather elements in a WR. */ 280 .max_sge = 1 << rxq_data->sges_n, 281 .pd = priv->sh->pd, 282 .cq = rxq_obj->ibv_cq, 283 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0, 284 .create_flags = (rxq_data->vlan_strip ? 285 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0), 286 }; 287 /* By default, FCS (CRC) is stripped by hardware. */ 288 if (rxq_data->crc_present) { 289 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 290 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 291 } 292 if (priv->config.hw_padding) { 293 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 294 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 295 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 296 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 297 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; 298 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 299 #endif 300 } 301 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 302 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){ 303 .comp_mask = 0, 304 }; 305 if (mlx5_rxq_mprq_enabled(rxq_data)) { 306 struct mlx5dv_striding_rq_init_attr *mprq_attr = 307 &wq_attr.mlx5.striding_rq_attrs; 308 309 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 310 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 311 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 312 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 313 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 314 }; 315 } 316 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv, 317 &wq_attr.mlx5); 318 #else 319 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv); 320 #endif 321 if (rxq_obj->wq) { 322 /* 323 * Make sure number of WRs*SGEs match expectations since a queue 324 * cannot allocate more than "desc" buffers. 325 */ 326 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 327 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) { 328 DRV_LOG(ERR, 329 "Port %u Rx queue %u requested %u*%u but got" 330 " %u*%u WRs*SGEs.", 331 dev->data->port_id, idx, 332 wqe_n >> rxq_data->sges_n, 333 (1 << rxq_data->sges_n), 334 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge); 335 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 336 rxq_obj->wq = NULL; 337 rte_errno = EINVAL; 338 } 339 } 340 return rxq_obj->wq; 341 } 342 343 /** 344 * Create the Rx queue Verbs object. 345 * 346 * @param dev 347 * Pointer to Ethernet device. 348 * @param idx 349 * Queue index in DPDK Rx queue array. 350 * 351 * @return 352 * 0 on success, a negative errno value otherwise and rte_errno is set. 353 */ 354 static int 355 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) 356 { 357 struct mlx5_priv *priv = dev->data->dev_private; 358 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 359 struct mlx5_rxq_ctrl *rxq_ctrl = 360 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 361 struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj; 362 struct mlx5dv_cq cq_info; 363 struct mlx5dv_rwq rwq; 364 int ret = 0; 365 struct mlx5dv_obj obj; 366 367 MLX5_ASSERT(rxq_data); 368 MLX5_ASSERT(tmpl); 369 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 370 priv->verbs_alloc_ctx.obj = rxq_ctrl; 371 tmpl->rxq_ctrl = rxq_ctrl; 372 if (rxq_ctrl->irq) { 373 tmpl->ibv_channel = 374 mlx5_glue->create_comp_channel(priv->sh->ctx); 375 if (!tmpl->ibv_channel) { 376 DRV_LOG(ERR, "Port %u: comp channel creation failure.", 377 dev->data->port_id); 378 rte_errno = ENOMEM; 379 goto error; 380 } 381 tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd; 382 } 383 /* Create CQ using Verbs API. */ 384 tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx); 385 if (!tmpl->ibv_cq) { 386 DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.", 387 dev->data->port_id, idx); 388 rte_errno = ENOMEM; 389 goto error; 390 } 391 obj.cq.in = tmpl->ibv_cq; 392 obj.cq.out = &cq_info; 393 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ); 394 if (ret) { 395 rte_errno = ret; 396 goto error; 397 } 398 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 399 DRV_LOG(ERR, 400 "Port %u wrong MLX5_CQE_SIZE environment " 401 "variable value: it should be set to %u.", 402 dev->data->port_id, RTE_CACHE_LINE_SIZE); 403 rte_errno = EINVAL; 404 goto error; 405 } 406 /* Fill the rings. */ 407 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 408 rxq_data->cq_db = cq_info.dbrec; 409 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 410 rxq_data->cq_uar = cq_info.cq_uar; 411 rxq_data->cqn = cq_info.cqn; 412 /* Create WQ (RQ) using Verbs API. */ 413 tmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx); 414 if (!tmpl->wq) { 415 DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.", 416 dev->data->port_id, idx); 417 rte_errno = ENOMEM; 418 goto error; 419 } 420 /* Change queue state to ready. */ 421 ret = mlx5_ibv_modify_wq(tmpl, IBV_WQS_RDY); 422 if (ret) { 423 DRV_LOG(ERR, 424 "Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.", 425 dev->data->port_id, idx); 426 rte_errno = ret; 427 goto error; 428 } 429 obj.rwq.in = tmpl->wq; 430 obj.rwq.out = &rwq; 431 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ); 432 if (ret) { 433 rte_errno = ret; 434 goto error; 435 } 436 rxq_data->wqes = rwq.buf; 437 rxq_data->rq_db = rwq.dbrec; 438 rxq_data->cq_arm_sn = 0; 439 mlx5_rxq_initialize(rxq_data); 440 rxq_data->cq_ci = 0; 441 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 442 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 443 rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num; 444 return 0; 445 error: 446 ret = rte_errno; /* Save rte_errno before cleanup. */ 447 if (tmpl->wq) 448 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 449 if (tmpl->ibv_cq) 450 claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq)); 451 if (tmpl->ibv_channel) 452 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel)); 453 rte_errno = ret; /* Restore rte_errno. */ 454 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 455 return -rte_errno; 456 } 457 458 /** 459 * Release an Rx verbs queue object. 460 * 461 * @param rxq_obj 462 * Verbs Rx queue object. 463 */ 464 static void 465 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj) 466 { 467 MLX5_ASSERT(rxq_obj); 468 MLX5_ASSERT(rxq_obj->wq); 469 MLX5_ASSERT(rxq_obj->ibv_cq); 470 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 471 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq)); 472 if (rxq_obj->ibv_channel) 473 claim_zero(mlx5_glue->destroy_comp_channel 474 (rxq_obj->ibv_channel)); 475 } 476 477 /** 478 * Get event for an Rx verbs queue object. 479 * 480 * @param rxq_obj 481 * Verbs Rx queue object. 482 * 483 * @return 484 * 0 on success, a negative errno value otherwise and rte_errno is set. 485 */ 486 static int 487 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj) 488 { 489 struct ibv_cq *ev_cq; 490 void *ev_ctx; 491 int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel, 492 &ev_cq, &ev_ctx); 493 494 if (ret < 0 || ev_cq != rxq_obj->ibv_cq) 495 goto exit; 496 mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1); 497 return 0; 498 exit: 499 if (ret < 0) 500 rte_errno = errno; 501 else 502 rte_errno = EINVAL; 503 return -rte_errno; 504 } 505 506 /** 507 * Creates a receive work queue as a filed of indirection table. 508 * 509 * @param dev 510 * Pointer to Ethernet device. 511 * @param log_n 512 * Log of number of queues in the array. 513 * @param ind_tbl 514 * Verbs indirection table object. 515 * 516 * @return 517 * 0 on success, a negative errno value otherwise and rte_errno is set. 518 */ 519 static int 520 mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n, 521 struct mlx5_ind_table_obj *ind_tbl) 522 { 523 struct mlx5_priv *priv = dev->data->dev_private; 524 struct ibv_wq *wq[1 << log_n]; 525 unsigned int i, j; 526 527 MLX5_ASSERT(ind_tbl); 528 for (i = 0; i != ind_tbl->queues_n; ++i) { 529 struct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]]; 530 struct mlx5_rxq_ctrl *rxq_ctrl = 531 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 532 533 wq[i] = rxq_ctrl->obj->wq; 534 } 535 MLX5_ASSERT(i > 0); 536 /* Finalise indirection table. */ 537 for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i) 538 wq[i] = wq[j]; 539 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table(priv->sh->ctx, 540 &(struct ibv_rwq_ind_table_init_attr){ 541 .log_ind_tbl_size = log_n, 542 .ind_tbl = wq, 543 .comp_mask = 0, 544 }); 545 if (!ind_tbl->ind_table) { 546 rte_errno = errno; 547 return -rte_errno; 548 } 549 return 0; 550 } 551 552 /** 553 * Destroys the specified Indirection Table. 554 * 555 * @param ind_table 556 * Indirection table to release. 557 */ 558 static void 559 mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl) 560 { 561 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 562 } 563 564 /** 565 * Create an Rx Hash queue. 566 * 567 * @param dev 568 * Pointer to Ethernet device. 569 * @param hrxq 570 * Pointer to Rx Hash queue. 571 * @param tunnel 572 * Tunnel type. 573 * 574 * @return 575 * 0 on success, a negative errno value otherwise and rte_errno is set. 576 */ 577 static int 578 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq, 579 int tunnel __rte_unused) 580 { 581 struct mlx5_priv *priv = dev->data->dev_private; 582 struct ibv_qp *qp = NULL; 583 struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table; 584 const uint8_t *rss_key = hrxq->rss_key; 585 uint64_t hash_fields = hrxq->hash_fields; 586 int err; 587 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 588 struct mlx5dv_qp_init_attr qp_init_attr; 589 590 memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 591 if (tunnel) { 592 qp_init_attr.comp_mask = 593 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 594 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; 595 } 596 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 597 if (dev->data->dev_conf.lpbk_mode) { 598 /* Allow packet sent from NIC loop back w/o source MAC check. */ 599 qp_init_attr.comp_mask |= 600 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 601 qp_init_attr.create_flags |= 602 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; 603 } 604 #endif 605 qp = mlx5_glue->dv_create_qp 606 (priv->sh->ctx, 607 &(struct ibv_qp_init_attr_ex){ 608 .qp_type = IBV_QPT_RAW_PACKET, 609 .comp_mask = 610 IBV_QP_INIT_ATTR_PD | 611 IBV_QP_INIT_ATTR_IND_TABLE | 612 IBV_QP_INIT_ATTR_RX_HASH, 613 .rx_hash_conf = (struct ibv_rx_hash_conf){ 614 .rx_hash_function = 615 IBV_RX_HASH_FUNC_TOEPLITZ, 616 .rx_hash_key_len = hrxq->rss_key_len, 617 .rx_hash_key = 618 (void *)(uintptr_t)rss_key, 619 .rx_hash_fields_mask = hash_fields, 620 }, 621 .rwq_ind_tbl = ind_tbl->ind_table, 622 .pd = priv->sh->pd, 623 }, 624 &qp_init_attr); 625 #else 626 qp = mlx5_glue->create_qp_ex 627 (priv->sh->ctx, 628 &(struct ibv_qp_init_attr_ex){ 629 .qp_type = IBV_QPT_RAW_PACKET, 630 .comp_mask = 631 IBV_QP_INIT_ATTR_PD | 632 IBV_QP_INIT_ATTR_IND_TABLE | 633 IBV_QP_INIT_ATTR_RX_HASH, 634 .rx_hash_conf = (struct ibv_rx_hash_conf){ 635 .rx_hash_function = 636 IBV_RX_HASH_FUNC_TOEPLITZ, 637 .rx_hash_key_len = hrxq->rss_key_len, 638 .rx_hash_key = 639 (void *)(uintptr_t)rss_key, 640 .rx_hash_fields_mask = hash_fields, 641 }, 642 .rwq_ind_tbl = ind_tbl->ind_table, 643 .pd = priv->sh->pd, 644 }); 645 #endif 646 if (!qp) { 647 rte_errno = errno; 648 goto error; 649 } 650 hrxq->qp = qp; 651 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 652 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 653 if (!hrxq->action) { 654 rte_errno = errno; 655 goto error; 656 } 657 #endif 658 return 0; 659 error: 660 err = rte_errno; /* Save rte_errno before cleanup. */ 661 if (qp) 662 claim_zero(mlx5_glue->destroy_qp(qp)); 663 rte_errno = err; /* Restore rte_errno. */ 664 return -rte_errno; 665 } 666 667 /** 668 * Destroy a Verbs queue pair. 669 * 670 * @param hrxq 671 * Hash Rx queue to release its qp. 672 */ 673 static void 674 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq) 675 { 676 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 677 } 678 679 /** 680 * Release a drop Rx queue Verbs object. 681 * 682 * @param dev 683 * Pointer to Ethernet device. 684 */ 685 static void 686 mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev) 687 { 688 struct mlx5_priv *priv = dev->data->dev_private; 689 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq; 690 691 if (rxq->wq) 692 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 693 if (rxq->ibv_cq) 694 claim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq)); 695 mlx5_free(rxq); 696 priv->drop_queue.rxq = NULL; 697 } 698 699 /** 700 * Create a drop Rx queue Verbs object. 701 * 702 * @param dev 703 * Pointer to Ethernet device. 704 * 705 * @return 706 * 0 on success, a negative errno value otherwise and rte_errno is set. 707 */ 708 static int 709 mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev) 710 { 711 struct mlx5_priv *priv = dev->data->dev_private; 712 struct ibv_context *ctx = priv->sh->ctx; 713 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq; 714 715 if (rxq) 716 return 0; 717 rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY); 718 if (!rxq) { 719 DEBUG("Port %u cannot allocate drop Rx queue memory.", 720 dev->data->port_id); 721 rte_errno = ENOMEM; 722 return -rte_errno; 723 } 724 priv->drop_queue.rxq = rxq; 725 rxq->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); 726 if (!rxq->ibv_cq) { 727 DEBUG("Port %u cannot allocate CQ for drop queue.", 728 dev->data->port_id); 729 rte_errno = errno; 730 goto error; 731 } 732 rxq->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){ 733 .wq_type = IBV_WQT_RQ, 734 .max_wr = 1, 735 .max_sge = 1, 736 .pd = priv->sh->pd, 737 .cq = rxq->ibv_cq, 738 }); 739 if (!rxq->wq) { 740 DEBUG("Port %u cannot allocate WQ for drop queue.", 741 dev->data->port_id); 742 rte_errno = errno; 743 goto error; 744 } 745 priv->drop_queue.rxq = rxq; 746 return 0; 747 error: 748 mlx5_rxq_ibv_obj_drop_release(dev); 749 return -rte_errno; 750 } 751 752 /** 753 * Create a Verbs drop action for Rx Hash queue. 754 * 755 * @param dev 756 * Pointer to Ethernet device. 757 * 758 * @return 759 * 0 on success, a negative errno value otherwise and rte_errno is set. 760 */ 761 static int 762 mlx5_ibv_drop_action_create(struct rte_eth_dev *dev) 763 { 764 struct mlx5_priv *priv = dev->data->dev_private; 765 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 766 struct ibv_rwq_ind_table *ind_tbl = NULL; 767 struct mlx5_rxq_obj *rxq; 768 int ret; 769 770 MLX5_ASSERT(hrxq && hrxq->ind_table); 771 ret = mlx5_rxq_ibv_obj_drop_create(dev); 772 if (ret < 0) 773 goto error; 774 rxq = priv->drop_queue.rxq; 775 ind_tbl = mlx5_glue->create_rwq_ind_table 776 (priv->sh->ctx, 777 &(struct ibv_rwq_ind_table_init_attr){ 778 .log_ind_tbl_size = 0, 779 .ind_tbl = (struct ibv_wq **)&rxq->wq, 780 .comp_mask = 0, 781 }); 782 if (!ind_tbl) { 783 DEBUG("Port %u cannot allocate indirection table for drop" 784 " queue.", dev->data->port_id); 785 rte_errno = errno; 786 goto error; 787 } 788 hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->ctx, 789 &(struct ibv_qp_init_attr_ex){ 790 .qp_type = IBV_QPT_RAW_PACKET, 791 .comp_mask = IBV_QP_INIT_ATTR_PD | 792 IBV_QP_INIT_ATTR_IND_TABLE | 793 IBV_QP_INIT_ATTR_RX_HASH, 794 .rx_hash_conf = (struct ibv_rx_hash_conf){ 795 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 796 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 797 .rx_hash_key = rss_hash_default_key, 798 .rx_hash_fields_mask = 0, 799 }, 800 .rwq_ind_tbl = ind_tbl, 801 .pd = priv->sh->pd 802 }); 803 if (!hrxq->qp) { 804 DEBUG("Port %u cannot allocate QP for drop queue.", 805 dev->data->port_id); 806 rte_errno = errno; 807 goto error; 808 } 809 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 810 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 811 if (!hrxq->action) { 812 rte_errno = errno; 813 goto error; 814 } 815 #endif 816 hrxq->ind_table->ind_table = ind_tbl; 817 return 0; 818 error: 819 if (hrxq->qp) 820 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 821 if (ind_tbl) 822 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl)); 823 if (priv->drop_queue.rxq) 824 mlx5_rxq_ibv_obj_drop_release(dev); 825 return -rte_errno; 826 } 827 828 /** 829 * Release a drop hash Rx queue. 830 * 831 * @param dev 832 * Pointer to Ethernet device. 833 */ 834 static void 835 mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev) 836 { 837 struct mlx5_priv *priv = dev->data->dev_private; 838 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 839 struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table; 840 841 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 842 claim_zero(mlx5_glue->destroy_flow_action(hrxq->action)); 843 #endif 844 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 845 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl)); 846 mlx5_rxq_ibv_obj_drop_release(dev); 847 } 848 849 /** 850 * Create a QP Verbs object. 851 * 852 * @param dev 853 * Pointer to Ethernet device. 854 * @param idx 855 * Queue index in DPDK Tx queue array. 856 * 857 * @return 858 * The QP Verbs object, NULL otherwise and rte_errno is set. 859 */ 860 static struct ibv_qp * 861 mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx) 862 { 863 struct mlx5_priv *priv = dev->data->dev_private; 864 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 865 struct mlx5_txq_ctrl *txq_ctrl = 866 container_of(txq_data, struct mlx5_txq_ctrl, txq); 867 struct ibv_qp *qp_obj = NULL; 868 struct ibv_qp_init_attr_ex qp_attr = { 0 }; 869 const int desc = 1 << txq_data->elts_n; 870 871 MLX5_ASSERT(txq_ctrl->obj->cq); 872 /* CQ to be associated with the send queue. */ 873 qp_attr.send_cq = txq_ctrl->obj->cq; 874 /* CQ to be associated with the receive queue. */ 875 qp_attr.recv_cq = txq_ctrl->obj->cq; 876 /* Max number of outstanding WRs. */ 877 qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ? 878 priv->sh->device_attr.max_qp_wr : desc); 879 /* 880 * Max number of scatter/gather elements in a WR, must be 1 to prevent 881 * libmlx5 from trying to affect must be 1 to prevent libmlx5 from 882 * trying to affect too much memory. TX gather is not impacted by the 883 * device_attr.max_sge limit and will still work properly. 884 */ 885 qp_attr.cap.max_send_sge = 1; 886 qp_attr.qp_type = IBV_QPT_RAW_PACKET, 887 /* Do *NOT* enable this, completions events are managed per Tx burst. */ 888 qp_attr.sq_sig_all = 0; 889 qp_attr.pd = priv->sh->pd; 890 qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD; 891 if (txq_data->inlen_send) 892 qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data; 893 if (txq_data->tso_en) { 894 qp_attr.max_tso_header = txq_ctrl->max_tso_header; 895 qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 896 } 897 qp_obj = mlx5_glue->create_qp_ex(priv->sh->ctx, &qp_attr); 898 if (qp_obj == NULL) { 899 DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.", 900 dev->data->port_id, idx); 901 rte_errno = errno; 902 } 903 return qp_obj; 904 } 905 906 /** 907 * Create the Tx queue Verbs object. 908 * 909 * @param dev 910 * Pointer to Ethernet device. 911 * @param idx 912 * Queue index in DPDK Tx queue array. 913 * 914 * @return 915 * 0 on success, a negative errno value otherwise and rte_errno is set. 916 */ 917 int 918 mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) 919 { 920 struct mlx5_priv *priv = dev->data->dev_private; 921 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 922 struct mlx5_txq_ctrl *txq_ctrl = 923 container_of(txq_data, struct mlx5_txq_ctrl, txq); 924 struct mlx5_txq_obj *txq_obj = txq_ctrl->obj; 925 unsigned int cqe_n; 926 struct mlx5dv_qp qp; 927 struct mlx5dv_cq cq_info; 928 struct mlx5dv_obj obj; 929 const int desc = 1 << txq_data->elts_n; 930 int ret = 0; 931 932 MLX5_ASSERT(txq_data); 933 MLX5_ASSERT(txq_obj); 934 txq_obj->txq_ctrl = txq_ctrl; 935 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; 936 priv->verbs_alloc_ctx.obj = txq_ctrl; 937 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 938 DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION " 939 "must never be set.", dev->data->port_id); 940 rte_errno = EINVAL; 941 return -rte_errno; 942 } 943 cqe_n = desc / MLX5_TX_COMP_THRESH + 944 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 945 txq_obj->cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 946 if (txq_obj->cq == NULL) { 947 DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.", 948 dev->data->port_id, idx); 949 rte_errno = errno; 950 goto error; 951 } 952 txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx); 953 if (txq_obj->qp == NULL) { 954 rte_errno = errno; 955 goto error; 956 } 957 ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY, 958 (uint8_t)priv->dev_port); 959 if (ret) { 960 DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.", 961 dev->data->port_id, idx); 962 rte_errno = errno; 963 goto error; 964 } 965 qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET; 966 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 967 /* If using DevX, need additional mask to read tisn value. */ 968 if (priv->sh->devx && !priv->sh->tdn) 969 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 970 #endif 971 obj.cq.in = txq_obj->cq; 972 obj.cq.out = &cq_info; 973 obj.qp.in = txq_obj->qp; 974 obj.qp.out = &qp; 975 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 976 if (ret != 0) { 977 rte_errno = errno; 978 goto error; 979 } 980 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 981 DRV_LOG(ERR, 982 "Port %u wrong MLX5_CQE_SIZE environment variable" 983 " value: it should be set to %u.", 984 dev->data->port_id, RTE_CACHE_LINE_SIZE); 985 rte_errno = EINVAL; 986 goto error; 987 } 988 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 989 txq_data->cqe_s = 1 << txq_data->cqe_n; 990 txq_data->cqe_m = txq_data->cqe_s - 1; 991 txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8; 992 txq_data->wqes = qp.sq.buf; 993 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 994 txq_data->wqe_s = 1 << txq_data->wqe_n; 995 txq_data->wqe_m = txq_data->wqe_s - 1; 996 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 997 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 998 txq_data->cq_db = cq_info.dbrec; 999 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 1000 txq_data->cq_ci = 0; 1001 txq_data->cq_pi = 0; 1002 txq_data->wqe_ci = 0; 1003 txq_data->wqe_pi = 0; 1004 txq_data->wqe_comp = 0; 1005 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 1006 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1007 /* 1008 * If using DevX need to query and store TIS transport domain value. 1009 * This is done once per port. 1010 * Will use this value on Rx, when creating matching TIR. 1011 */ 1012 if (priv->sh->devx && !priv->sh->tdn) { 1013 ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn, 1014 &priv->sh->tdn); 1015 if (ret) { 1016 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 1017 "transport domain.", dev->data->port_id, idx); 1018 rte_errno = EINVAL; 1019 goto error; 1020 } else { 1021 DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d " 1022 "transport domain %d.", dev->data->port_id, 1023 idx, qp.tisn, priv->sh->tdn); 1024 } 1025 } 1026 #endif 1027 txq_ctrl->bf_reg = qp.bf.reg; 1028 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 1029 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 1030 DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".", 1031 dev->data->port_id, txq_ctrl->uar_mmap_offset); 1032 } else { 1033 DRV_LOG(ERR, 1034 "Port %u failed to retrieve UAR info, invalid" 1035 " libmlx5.so", 1036 dev->data->port_id); 1037 rte_errno = EINVAL; 1038 goto error; 1039 } 1040 txq_uar_init(txq_ctrl); 1041 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 1042 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1043 return 0; 1044 error: 1045 ret = rte_errno; /* Save rte_errno before cleanup. */ 1046 if (txq_obj->cq) 1047 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 1048 if (txq_obj->qp) 1049 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 1050 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1051 rte_errno = ret; /* Restore rte_errno. */ 1052 return -rte_errno; 1053 } 1054 1055 /** 1056 * Release an Tx verbs queue object. 1057 * 1058 * @param txq_obj 1059 * Verbs Tx queue object.. 1060 */ 1061 void 1062 mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj) 1063 { 1064 MLX5_ASSERT(txq_obj); 1065 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 1066 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 1067 } 1068 1069 struct mlx5_obj_ops ibv_obj_ops = { 1070 .rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip, 1071 .rxq_obj_new = mlx5_rxq_ibv_obj_new, 1072 .rxq_event_get = mlx5_rx_ibv_get_event, 1073 .rxq_obj_modify = mlx5_ibv_modify_wq, 1074 .rxq_obj_release = mlx5_rxq_ibv_obj_release, 1075 .ind_table_new = mlx5_ibv_ind_table_new, 1076 .ind_table_destroy = mlx5_ibv_ind_table_destroy, 1077 .hrxq_new = mlx5_ibv_hrxq_new, 1078 .hrxq_destroy = mlx5_ibv_qp_destroy, 1079 .drop_action_create = mlx5_ibv_drop_action_create, 1080 .drop_action_destroy = mlx5_ibv_drop_action_destroy, 1081 .txq_obj_new = mlx5_txq_ibv_obj_new, 1082 .txq_obj_modify = mlx5_ibv_modify_qp, 1083 .txq_obj_release = mlx5_txq_ibv_obj_release, 1084 }; 1085