1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <stddef.h> 6 #include <errno.h> 7 #include <string.h> 8 #include <stdint.h> 9 #include <unistd.h> 10 #include <inttypes.h> 11 #include <sys/queue.h> 12 13 #include "mlx5_autoconf.h" 14 15 #include <rte_mbuf.h> 16 #include <rte_malloc.h> 17 #include <ethdev_driver.h> 18 #include <rte_common.h> 19 20 #include <mlx5_glue.h> 21 #include <mlx5_common.h> 22 #include <mlx5_common_mr.h> 23 #include <mlx5_rxtx.h> 24 #include <mlx5_verbs.h> 25 #include <mlx5_utils.h> 26 #include <mlx5_malloc.h> 27 28 /** 29 * Register mr. Given protection domain pointer, pointer to addr and length 30 * register the memory region. 31 * 32 * @param[in] pd 33 * Pointer to protection domain context. 34 * @param[in] addr 35 * Pointer to memory start address. 36 * @param[in] length 37 * Length of the memory to register. 38 * @param[out] pmd_mr 39 * pmd_mr struct set with lkey, address, length and pointer to mr object 40 * 41 * @return 42 * 0 on successful registration, -1 otherwise 43 */ 44 static int 45 mlx5_reg_mr(void *pd, void *addr, size_t length, 46 struct mlx5_pmd_mr *pmd_mr) 47 { 48 return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr); 49 } 50 51 /** 52 * Deregister mr. Given the mlx5 pmd MR - deregister the MR 53 * 54 * @param[in] pmd_mr 55 * pmd_mr struct set with lkey, address, length and pointer to mr object 56 * 57 */ 58 static void 59 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr) 60 { 61 mlx5_common_verbs_dereg_mr(pmd_mr); 62 } 63 64 /* verbs operations. */ 65 const struct mlx5_mr_ops mlx5_mr_verbs_ops = { 66 .reg_mr = mlx5_reg_mr, 67 .dereg_mr = mlx5_dereg_mr, 68 }; 69 70 /** 71 * Modify Rx WQ vlan stripping offload 72 * 73 * @param rxq_obj 74 * Rx queue object. 75 * 76 * @return 0 on success, non-0 otherwise 77 */ 78 static int 79 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on) 80 { 81 uint16_t vlan_offloads = 82 (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) | 83 0; 84 struct ibv_wq_attr mod; 85 mod = (struct ibv_wq_attr){ 86 .attr_mask = IBV_WQ_ATTR_FLAGS, 87 .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING, 88 .flags = vlan_offloads, 89 }; 90 91 return mlx5_glue->modify_wq(rxq_obj->wq, &mod); 92 } 93 94 /** 95 * Modifies the attributes for the specified WQ. 96 * 97 * @param rxq_obj 98 * Verbs Rx queue object. 99 * @param type 100 * Type of change queue state. 101 * 102 * @return 103 * 0 on success, a negative errno value otherwise and rte_errno is set. 104 */ 105 static int 106 mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, uint8_t type) 107 { 108 struct ibv_wq_attr mod = { 109 .attr_mask = IBV_WQ_ATTR_STATE, 110 .wq_state = (enum ibv_wq_state)type, 111 }; 112 113 return mlx5_glue->modify_wq(rxq_obj->wq, &mod); 114 } 115 116 /** 117 * Modify QP using Verbs API. 118 * 119 * @param txq_obj 120 * Verbs Tx queue object. 121 * @param type 122 * Type of change queue state. 123 * @param dev_port 124 * IB device port number. 125 * 126 * @return 127 * 0 on success, a negative errno value otherwise and rte_errno is set. 128 */ 129 static int 130 mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type, 131 uint8_t dev_port) 132 { 133 struct ibv_qp_attr mod = { 134 .qp_state = IBV_QPS_RESET, 135 .port_num = dev_port, 136 }; 137 int attr_mask = (IBV_QP_STATE | IBV_QP_PORT); 138 int ret; 139 140 if (type != MLX5_TXQ_MOD_RST2RDY) { 141 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE); 142 if (ret) { 143 DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s", 144 strerror(errno)); 145 rte_errno = errno; 146 return ret; 147 } 148 if (type == MLX5_TXQ_MOD_RDY2RST) 149 return 0; 150 } 151 if (type == MLX5_TXQ_MOD_ERR2RDY) 152 attr_mask = IBV_QP_STATE; 153 mod.qp_state = IBV_QPS_INIT; 154 ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask); 155 if (ret) { 156 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", 157 strerror(errno)); 158 rte_errno = errno; 159 return ret; 160 } 161 mod.qp_state = IBV_QPS_RTR; 162 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE); 163 if (ret) { 164 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s", 165 strerror(errno)); 166 rte_errno = errno; 167 return ret; 168 } 169 mod.qp_state = IBV_QPS_RTS; 170 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE); 171 if (ret) { 172 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s", 173 strerror(errno)); 174 rte_errno = errno; 175 return ret; 176 } 177 return 0; 178 } 179 180 /** 181 * Create a CQ Verbs object. 182 * 183 * @param dev 184 * Pointer to Ethernet device. 185 * @param idx 186 * Queue index in DPDK Rx queue array. 187 * 188 * @return 189 * The Verbs CQ object initialized, NULL otherwise and rte_errno is set. 190 */ 191 static struct ibv_cq * 192 mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx) 193 { 194 struct mlx5_priv *priv = dev->data->dev_private; 195 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 196 struct mlx5_rxq_ctrl *rxq_ctrl = 197 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 198 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj; 199 unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data); 200 struct { 201 struct ibv_cq_init_attr_ex ibv; 202 struct mlx5dv_cq_init_attr mlx5; 203 } cq_attr; 204 205 cq_attr.ibv = (struct ibv_cq_init_attr_ex){ 206 .cqe = cqe_n, 207 .channel = rxq_obj->ibv_channel, 208 .comp_mask = 0, 209 }; 210 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){ 211 .comp_mask = 0, 212 }; 213 if (priv->config.cqe_comp && !rxq_data->hw_timestamp) { 214 cq_attr.mlx5.comp_mask |= 215 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 216 rxq_data->byte_mask = UINT32_MAX; 217 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 218 if (mlx5_rxq_mprq_enabled(rxq_data)) { 219 cq_attr.mlx5.cqe_comp_res_format = 220 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX; 221 rxq_data->mcqe_format = 222 MLX5_CQE_RESP_FORMAT_CSUM_STRIDX; 223 } else { 224 cq_attr.mlx5.cqe_comp_res_format = 225 MLX5DV_CQE_RES_FORMAT_HASH; 226 rxq_data->mcqe_format = 227 MLX5_CQE_RESP_FORMAT_HASH; 228 } 229 #else 230 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 231 rxq_data->mcqe_format = MLX5_CQE_RESP_FORMAT_HASH; 232 #endif 233 /* 234 * For vectorized Rx, it must not be doubled in order to 235 * make cq_ci and rq_ci aligned. 236 */ 237 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 238 cq_attr.ibv.cqe *= 2; 239 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) { 240 DRV_LOG(DEBUG, 241 "Port %u Rx CQE compression is disabled for HW" 242 " timestamp.", 243 dev->data->port_id); 244 } 245 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 246 if (RTE_CACHE_LINE_SIZE == 128) { 247 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; 248 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 249 } 250 #endif 251 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx, 252 &cq_attr.ibv, 253 &cq_attr.mlx5)); 254 } 255 256 /** 257 * Create a WQ Verbs object. 258 * 259 * @param dev 260 * Pointer to Ethernet device. 261 * @param idx 262 * Queue index in DPDK Rx queue array. 263 * 264 * @return 265 * The Verbs WQ object initialized, NULL otherwise and rte_errno is set. 266 */ 267 static struct ibv_wq * 268 mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx) 269 { 270 struct mlx5_priv *priv = dev->data->dev_private; 271 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 272 struct mlx5_rxq_ctrl *rxq_ctrl = 273 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 274 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj; 275 unsigned int wqe_n = 1 << rxq_data->elts_n; 276 struct { 277 struct ibv_wq_init_attr ibv; 278 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 279 struct mlx5dv_wq_init_attr mlx5; 280 #endif 281 } wq_attr; 282 283 wq_attr.ibv = (struct ibv_wq_init_attr){ 284 .wq_context = NULL, /* Could be useful in the future. */ 285 .wq_type = IBV_WQT_RQ, 286 /* Max number of outstanding WRs. */ 287 .max_wr = wqe_n >> rxq_data->sges_n, 288 /* Max number of scatter/gather elements in a WR. */ 289 .max_sge = 1 << rxq_data->sges_n, 290 .pd = priv->sh->pd, 291 .cq = rxq_obj->ibv_cq, 292 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0, 293 .create_flags = (rxq_data->vlan_strip ? 294 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0), 295 }; 296 /* By default, FCS (CRC) is stripped by hardware. */ 297 if (rxq_data->crc_present) { 298 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 299 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 300 } 301 if (priv->config.hw_padding) { 302 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 303 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 304 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 305 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 306 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; 307 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 308 #endif 309 } 310 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 311 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){ 312 .comp_mask = 0, 313 }; 314 if (mlx5_rxq_mprq_enabled(rxq_data)) { 315 struct mlx5dv_striding_rq_init_attr *mprq_attr = 316 &wq_attr.mlx5.striding_rq_attrs; 317 318 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 319 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 320 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 321 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 322 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 323 }; 324 } 325 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv, 326 &wq_attr.mlx5); 327 #else 328 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv); 329 #endif 330 if (rxq_obj->wq) { 331 /* 332 * Make sure number of WRs*SGEs match expectations since a queue 333 * cannot allocate more than "desc" buffers. 334 */ 335 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 336 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) { 337 DRV_LOG(ERR, 338 "Port %u Rx queue %u requested %u*%u but got" 339 " %u*%u WRs*SGEs.", 340 dev->data->port_id, idx, 341 wqe_n >> rxq_data->sges_n, 342 (1 << rxq_data->sges_n), 343 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge); 344 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 345 rxq_obj->wq = NULL; 346 rte_errno = EINVAL; 347 } 348 } 349 return rxq_obj->wq; 350 } 351 352 /** 353 * Create the Rx queue Verbs object. 354 * 355 * @param dev 356 * Pointer to Ethernet device. 357 * @param idx 358 * Queue index in DPDK Rx queue array. 359 * 360 * @return 361 * 0 on success, a negative errno value otherwise and rte_errno is set. 362 */ 363 static int 364 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) 365 { 366 struct mlx5_priv *priv = dev->data->dev_private; 367 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 368 struct mlx5_rxq_ctrl *rxq_ctrl = 369 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 370 struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj; 371 struct mlx5dv_cq cq_info; 372 struct mlx5dv_rwq rwq; 373 int ret = 0; 374 struct mlx5dv_obj obj; 375 376 MLX5_ASSERT(rxq_data); 377 MLX5_ASSERT(tmpl); 378 tmpl->rxq_ctrl = rxq_ctrl; 379 if (rxq_ctrl->irq) { 380 tmpl->ibv_channel = 381 mlx5_glue->create_comp_channel(priv->sh->ctx); 382 if (!tmpl->ibv_channel) { 383 DRV_LOG(ERR, "Port %u: comp channel creation failure.", 384 dev->data->port_id); 385 rte_errno = ENOMEM; 386 goto error; 387 } 388 tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd; 389 } 390 /* Create CQ using Verbs API. */ 391 tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx); 392 if (!tmpl->ibv_cq) { 393 DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.", 394 dev->data->port_id, idx); 395 rte_errno = ENOMEM; 396 goto error; 397 } 398 obj.cq.in = tmpl->ibv_cq; 399 obj.cq.out = &cq_info; 400 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ); 401 if (ret) { 402 rte_errno = ret; 403 goto error; 404 } 405 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 406 DRV_LOG(ERR, 407 "Port %u wrong MLX5_CQE_SIZE environment " 408 "variable value: it should be set to %u.", 409 dev->data->port_id, RTE_CACHE_LINE_SIZE); 410 rte_errno = EINVAL; 411 goto error; 412 } 413 /* Fill the rings. */ 414 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 415 rxq_data->cq_db = cq_info.dbrec; 416 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 417 rxq_data->cq_uar = cq_info.cq_uar; 418 rxq_data->cqn = cq_info.cqn; 419 /* Create WQ (RQ) using Verbs API. */ 420 tmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx); 421 if (!tmpl->wq) { 422 DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.", 423 dev->data->port_id, idx); 424 rte_errno = ENOMEM; 425 goto error; 426 } 427 /* Change queue state to ready. */ 428 ret = mlx5_ibv_modify_wq(tmpl, IBV_WQS_RDY); 429 if (ret) { 430 DRV_LOG(ERR, 431 "Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.", 432 dev->data->port_id, idx); 433 rte_errno = ret; 434 goto error; 435 } 436 obj.rwq.in = tmpl->wq; 437 obj.rwq.out = &rwq; 438 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ); 439 if (ret) { 440 rte_errno = ret; 441 goto error; 442 } 443 rxq_data->wqes = rwq.buf; 444 rxq_data->rq_db = rwq.dbrec; 445 rxq_data->cq_arm_sn = 0; 446 mlx5_rxq_initialize(rxq_data); 447 rxq_data->cq_ci = 0; 448 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 449 rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num; 450 return 0; 451 error: 452 ret = rte_errno; /* Save rte_errno before cleanup. */ 453 if (tmpl->wq) 454 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 455 if (tmpl->ibv_cq) 456 claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq)); 457 if (tmpl->ibv_channel) 458 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel)); 459 rte_errno = ret; /* Restore rte_errno. */ 460 return -rte_errno; 461 } 462 463 /** 464 * Release an Rx verbs queue object. 465 * 466 * @param rxq_obj 467 * Verbs Rx queue object. 468 */ 469 static void 470 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj) 471 { 472 MLX5_ASSERT(rxq_obj); 473 MLX5_ASSERT(rxq_obj->wq); 474 MLX5_ASSERT(rxq_obj->ibv_cq); 475 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 476 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq)); 477 if (rxq_obj->ibv_channel) 478 claim_zero(mlx5_glue->destroy_comp_channel 479 (rxq_obj->ibv_channel)); 480 } 481 482 /** 483 * Get event for an Rx verbs queue object. 484 * 485 * @param rxq_obj 486 * Verbs Rx queue object. 487 * 488 * @return 489 * 0 on success, a negative errno value otherwise and rte_errno is set. 490 */ 491 static int 492 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj) 493 { 494 struct ibv_cq *ev_cq; 495 void *ev_ctx; 496 int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel, 497 &ev_cq, &ev_ctx); 498 499 if (ret < 0 || ev_cq != rxq_obj->ibv_cq) 500 goto exit; 501 mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1); 502 return 0; 503 exit: 504 if (ret < 0) 505 rte_errno = errno; 506 else 507 rte_errno = EINVAL; 508 return -rte_errno; 509 } 510 511 /** 512 * Creates a receive work queue as a filed of indirection table. 513 * 514 * @param dev 515 * Pointer to Ethernet device. 516 * @param log_n 517 * Log of number of queues in the array. 518 * @param ind_tbl 519 * Verbs indirection table object. 520 * 521 * @return 522 * 0 on success, a negative errno value otherwise and rte_errno is set. 523 */ 524 static int 525 mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n, 526 struct mlx5_ind_table_obj *ind_tbl) 527 { 528 struct mlx5_priv *priv = dev->data->dev_private; 529 struct ibv_wq *wq[1 << log_n]; 530 unsigned int i, j; 531 532 MLX5_ASSERT(ind_tbl); 533 for (i = 0; i != ind_tbl->queues_n; ++i) { 534 struct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]]; 535 struct mlx5_rxq_ctrl *rxq_ctrl = 536 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 537 538 wq[i] = rxq_ctrl->obj->wq; 539 } 540 MLX5_ASSERT(i > 0); 541 /* Finalise indirection table. */ 542 for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i) 543 wq[i] = wq[j]; 544 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table(priv->sh->ctx, 545 &(struct ibv_rwq_ind_table_init_attr){ 546 .log_ind_tbl_size = log_n, 547 .ind_tbl = wq, 548 .comp_mask = 0, 549 }); 550 if (!ind_tbl->ind_table) { 551 rte_errno = errno; 552 return -rte_errno; 553 } 554 return 0; 555 } 556 557 /** 558 * Destroys the specified Indirection Table. 559 * 560 * @param ind_table 561 * Indirection table to release. 562 */ 563 static void 564 mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl) 565 { 566 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 567 } 568 569 /** 570 * Create an Rx Hash queue. 571 * 572 * @param dev 573 * Pointer to Ethernet device. 574 * @param hrxq 575 * Pointer to Rx Hash queue. 576 * @param tunnel 577 * Tunnel type. 578 * 579 * @return 580 * 0 on success, a negative errno value otherwise and rte_errno is set. 581 */ 582 static int 583 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq, 584 int tunnel __rte_unused) 585 { 586 struct mlx5_priv *priv = dev->data->dev_private; 587 struct ibv_qp *qp = NULL; 588 struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table; 589 const uint8_t *rss_key = hrxq->rss_key; 590 uint64_t hash_fields = hrxq->hash_fields; 591 int err; 592 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 593 struct mlx5dv_qp_init_attr qp_init_attr; 594 595 memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 596 if (tunnel) { 597 qp_init_attr.comp_mask = 598 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 599 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; 600 } 601 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 602 if (dev->data->dev_conf.lpbk_mode) { 603 /* Allow packet sent from NIC loop back w/o source MAC check. */ 604 qp_init_attr.comp_mask |= 605 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 606 qp_init_attr.create_flags |= 607 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; 608 } 609 #endif 610 qp = mlx5_glue->dv_create_qp 611 (priv->sh->ctx, 612 &(struct ibv_qp_init_attr_ex){ 613 .qp_type = IBV_QPT_RAW_PACKET, 614 .comp_mask = 615 IBV_QP_INIT_ATTR_PD | 616 IBV_QP_INIT_ATTR_IND_TABLE | 617 IBV_QP_INIT_ATTR_RX_HASH, 618 .rx_hash_conf = (struct ibv_rx_hash_conf){ 619 .rx_hash_function = 620 IBV_RX_HASH_FUNC_TOEPLITZ, 621 .rx_hash_key_len = hrxq->rss_key_len, 622 .rx_hash_key = 623 (void *)(uintptr_t)rss_key, 624 .rx_hash_fields_mask = hash_fields, 625 }, 626 .rwq_ind_tbl = ind_tbl->ind_table, 627 .pd = priv->sh->pd, 628 }, 629 &qp_init_attr); 630 #else 631 qp = mlx5_glue->create_qp_ex 632 (priv->sh->ctx, 633 &(struct ibv_qp_init_attr_ex){ 634 .qp_type = IBV_QPT_RAW_PACKET, 635 .comp_mask = 636 IBV_QP_INIT_ATTR_PD | 637 IBV_QP_INIT_ATTR_IND_TABLE | 638 IBV_QP_INIT_ATTR_RX_HASH, 639 .rx_hash_conf = (struct ibv_rx_hash_conf){ 640 .rx_hash_function = 641 IBV_RX_HASH_FUNC_TOEPLITZ, 642 .rx_hash_key_len = hrxq->rss_key_len, 643 .rx_hash_key = 644 (void *)(uintptr_t)rss_key, 645 .rx_hash_fields_mask = hash_fields, 646 }, 647 .rwq_ind_tbl = ind_tbl->ind_table, 648 .pd = priv->sh->pd, 649 }); 650 #endif 651 if (!qp) { 652 rte_errno = errno; 653 goto error; 654 } 655 hrxq->qp = qp; 656 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 657 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 658 if (!hrxq->action) { 659 rte_errno = errno; 660 goto error; 661 } 662 #endif 663 return 0; 664 error: 665 err = rte_errno; /* Save rte_errno before cleanup. */ 666 if (qp) 667 claim_zero(mlx5_glue->destroy_qp(qp)); 668 rte_errno = err; /* Restore rte_errno. */ 669 return -rte_errno; 670 } 671 672 /** 673 * Destroy a Verbs queue pair. 674 * 675 * @param hrxq 676 * Hash Rx queue to release its qp. 677 */ 678 static void 679 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq) 680 { 681 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 682 } 683 684 /** 685 * Release a drop Rx queue Verbs object. 686 * 687 * @param dev 688 * Pointer to Ethernet device. 689 */ 690 static void 691 mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev) 692 { 693 struct mlx5_priv *priv = dev->data->dev_private; 694 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq; 695 696 if (rxq->wq) 697 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 698 if (rxq->ibv_cq) 699 claim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq)); 700 mlx5_free(rxq); 701 priv->drop_queue.rxq = NULL; 702 } 703 704 /** 705 * Create a drop Rx queue Verbs object. 706 * 707 * @param dev 708 * Pointer to Ethernet device. 709 * 710 * @return 711 * 0 on success, a negative errno value otherwise and rte_errno is set. 712 */ 713 static int 714 mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev) 715 { 716 struct mlx5_priv *priv = dev->data->dev_private; 717 struct ibv_context *ctx = priv->sh->ctx; 718 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq; 719 720 if (rxq) 721 return 0; 722 rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY); 723 if (!rxq) { 724 DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.", 725 dev->data->port_id); 726 rte_errno = ENOMEM; 727 return -rte_errno; 728 } 729 priv->drop_queue.rxq = rxq; 730 rxq->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); 731 if (!rxq->ibv_cq) { 732 DRV_LOG(DEBUG, "Port %u cannot allocate CQ for drop queue.", 733 dev->data->port_id); 734 rte_errno = errno; 735 goto error; 736 } 737 rxq->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){ 738 .wq_type = IBV_WQT_RQ, 739 .max_wr = 1, 740 .max_sge = 1, 741 .pd = priv->sh->pd, 742 .cq = rxq->ibv_cq, 743 }); 744 if (!rxq->wq) { 745 DRV_LOG(DEBUG, "Port %u cannot allocate WQ for drop queue.", 746 dev->data->port_id); 747 rte_errno = errno; 748 goto error; 749 } 750 priv->drop_queue.rxq = rxq; 751 return 0; 752 error: 753 mlx5_rxq_ibv_obj_drop_release(dev); 754 return -rte_errno; 755 } 756 757 /** 758 * Create a Verbs drop action for Rx Hash queue. 759 * 760 * @param dev 761 * Pointer to Ethernet device. 762 * 763 * @return 764 * 0 on success, a negative errno value otherwise and rte_errno is set. 765 */ 766 static int 767 mlx5_ibv_drop_action_create(struct rte_eth_dev *dev) 768 { 769 struct mlx5_priv *priv = dev->data->dev_private; 770 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 771 struct ibv_rwq_ind_table *ind_tbl = NULL; 772 struct mlx5_rxq_obj *rxq; 773 int ret; 774 775 MLX5_ASSERT(hrxq && hrxq->ind_table); 776 ret = mlx5_rxq_ibv_obj_drop_create(dev); 777 if (ret < 0) 778 goto error; 779 rxq = priv->drop_queue.rxq; 780 ind_tbl = mlx5_glue->create_rwq_ind_table 781 (priv->sh->ctx, 782 &(struct ibv_rwq_ind_table_init_attr){ 783 .log_ind_tbl_size = 0, 784 .ind_tbl = (struct ibv_wq **)&rxq->wq, 785 .comp_mask = 0, 786 }); 787 if (!ind_tbl) { 788 DRV_LOG(DEBUG, "Port %u" 789 " cannot allocate indirection table for drop queue.", 790 dev->data->port_id); 791 rte_errno = errno; 792 goto error; 793 } 794 hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->ctx, 795 &(struct ibv_qp_init_attr_ex){ 796 .qp_type = IBV_QPT_RAW_PACKET, 797 .comp_mask = IBV_QP_INIT_ATTR_PD | 798 IBV_QP_INIT_ATTR_IND_TABLE | 799 IBV_QP_INIT_ATTR_RX_HASH, 800 .rx_hash_conf = (struct ibv_rx_hash_conf){ 801 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 802 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 803 .rx_hash_key = rss_hash_default_key, 804 .rx_hash_fields_mask = 0, 805 }, 806 .rwq_ind_tbl = ind_tbl, 807 .pd = priv->sh->pd 808 }); 809 if (!hrxq->qp) { 810 DRV_LOG(DEBUG, "Port %u cannot allocate QP for drop queue.", 811 dev->data->port_id); 812 rte_errno = errno; 813 goto error; 814 } 815 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 816 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 817 if (!hrxq->action) { 818 rte_errno = errno; 819 goto error; 820 } 821 #endif 822 hrxq->ind_table->ind_table = ind_tbl; 823 return 0; 824 error: 825 if (hrxq->qp) 826 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 827 if (ind_tbl) 828 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl)); 829 if (priv->drop_queue.rxq) 830 mlx5_rxq_ibv_obj_drop_release(dev); 831 return -rte_errno; 832 } 833 834 /** 835 * Release a drop hash Rx queue. 836 * 837 * @param dev 838 * Pointer to Ethernet device. 839 */ 840 static void 841 mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev) 842 { 843 struct mlx5_priv *priv = dev->data->dev_private; 844 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 845 struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table; 846 847 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 848 claim_zero(mlx5_glue->destroy_flow_action(hrxq->action)); 849 #endif 850 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 851 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl)); 852 mlx5_rxq_ibv_obj_drop_release(dev); 853 } 854 855 /** 856 * Create a QP Verbs object. 857 * 858 * @param dev 859 * Pointer to Ethernet device. 860 * @param idx 861 * Queue index in DPDK Tx queue array. 862 * 863 * @return 864 * The QP Verbs object, NULL otherwise and rte_errno is set. 865 */ 866 static struct ibv_qp * 867 mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx) 868 { 869 struct mlx5_priv *priv = dev->data->dev_private; 870 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 871 struct mlx5_txq_ctrl *txq_ctrl = 872 container_of(txq_data, struct mlx5_txq_ctrl, txq); 873 struct ibv_qp *qp_obj = NULL; 874 struct ibv_qp_init_attr_ex qp_attr = { 0 }; 875 const int desc = 1 << txq_data->elts_n; 876 877 MLX5_ASSERT(txq_ctrl->obj->cq); 878 /* CQ to be associated with the send queue. */ 879 qp_attr.send_cq = txq_ctrl->obj->cq; 880 /* CQ to be associated with the receive queue. */ 881 qp_attr.recv_cq = txq_ctrl->obj->cq; 882 /* Max number of outstanding WRs. */ 883 qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ? 884 priv->sh->device_attr.max_qp_wr : desc); 885 /* 886 * Max number of scatter/gather elements in a WR, must be 1 to prevent 887 * libmlx5 from trying to affect must be 1 to prevent libmlx5 from 888 * trying to affect too much memory. TX gather is not impacted by the 889 * device_attr.max_sge limit and will still work properly. 890 */ 891 qp_attr.cap.max_send_sge = 1; 892 qp_attr.qp_type = IBV_QPT_RAW_PACKET, 893 /* Do *NOT* enable this, completions events are managed per Tx burst. */ 894 qp_attr.sq_sig_all = 0; 895 qp_attr.pd = priv->sh->pd; 896 qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD; 897 if (txq_data->inlen_send) 898 qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data; 899 if (txq_data->tso_en) { 900 qp_attr.max_tso_header = txq_ctrl->max_tso_header; 901 qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; 902 } 903 qp_obj = mlx5_glue->create_qp_ex(priv->sh->ctx, &qp_attr); 904 if (qp_obj == NULL) { 905 DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.", 906 dev->data->port_id, idx); 907 rte_errno = errno; 908 } 909 return qp_obj; 910 } 911 912 /** 913 * Create the Tx queue Verbs object. 914 * 915 * @param dev 916 * Pointer to Ethernet device. 917 * @param idx 918 * Queue index in DPDK Tx queue array. 919 * 920 * @return 921 * 0 on success, a negative errno value otherwise and rte_errno is set. 922 */ 923 int 924 mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) 925 { 926 struct mlx5_priv *priv = dev->data->dev_private; 927 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; 928 struct mlx5_txq_ctrl *txq_ctrl = 929 container_of(txq_data, struct mlx5_txq_ctrl, txq); 930 struct mlx5_txq_obj *txq_obj = txq_ctrl->obj; 931 unsigned int cqe_n; 932 struct mlx5dv_qp qp; 933 struct mlx5dv_cq cq_info; 934 struct mlx5dv_obj obj; 935 const int desc = 1 << txq_data->elts_n; 936 int ret = 0; 937 938 MLX5_ASSERT(txq_data); 939 MLX5_ASSERT(txq_obj); 940 txq_obj->txq_ctrl = txq_ctrl; 941 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { 942 DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION " 943 "must never be set.", dev->data->port_id); 944 rte_errno = EINVAL; 945 return -rte_errno; 946 } 947 cqe_n = desc / MLX5_TX_COMP_THRESH + 948 1 + MLX5_TX_COMP_THRESH_INLINE_DIV; 949 txq_obj->cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0); 950 if (txq_obj->cq == NULL) { 951 DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.", 952 dev->data->port_id, idx); 953 rte_errno = errno; 954 goto error; 955 } 956 txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx); 957 if (txq_obj->qp == NULL) { 958 rte_errno = errno; 959 goto error; 960 } 961 ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY, 962 (uint8_t)priv->dev_port); 963 if (ret) { 964 DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.", 965 dev->data->port_id, idx); 966 rte_errno = errno; 967 goto error; 968 } 969 qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET; 970 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 971 /* If using DevX, need additional mask to read tisn value. */ 972 if (priv->sh->devx && !priv->sh->tdn) 973 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES; 974 #endif 975 obj.cq.in = txq_obj->cq; 976 obj.cq.out = &cq_info; 977 obj.qp.in = txq_obj->qp; 978 obj.qp.out = &qp; 979 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); 980 if (ret != 0) { 981 rte_errno = errno; 982 goto error; 983 } 984 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 985 DRV_LOG(ERR, 986 "Port %u wrong MLX5_CQE_SIZE environment variable" 987 " value: it should be set to %u.", 988 dev->data->port_id, RTE_CACHE_LINE_SIZE); 989 rte_errno = EINVAL; 990 goto error; 991 } 992 txq_data->cqe_n = log2above(cq_info.cqe_cnt); 993 txq_data->cqe_s = 1 << txq_data->cqe_n; 994 txq_data->cqe_m = txq_data->cqe_s - 1; 995 txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8; 996 txq_data->wqes = qp.sq.buf; 997 txq_data->wqe_n = log2above(qp.sq.wqe_cnt); 998 txq_data->wqe_s = 1 << txq_data->wqe_n; 999 txq_data->wqe_m = txq_data->wqe_s - 1; 1000 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s; 1001 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; 1002 txq_data->cq_db = cq_info.dbrec; 1003 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; 1004 txq_data->cq_ci = 0; 1005 txq_data->cq_pi = 0; 1006 txq_data->wqe_ci = 0; 1007 txq_data->wqe_pi = 0; 1008 txq_data->wqe_comp = 0; 1009 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; 1010 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1011 /* 1012 * If using DevX need to query and store TIS transport domain value. 1013 * This is done once per port. 1014 * Will use this value on Rx, when creating matching TIR. 1015 */ 1016 if (priv->sh->devx && !priv->sh->tdn) { 1017 ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn, 1018 &priv->sh->tdn); 1019 if (ret) { 1020 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS " 1021 "transport domain.", dev->data->port_id, idx); 1022 rte_errno = EINVAL; 1023 goto error; 1024 } else { 1025 DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d " 1026 "transport domain %d.", dev->data->port_id, 1027 idx, qp.tisn, priv->sh->tdn); 1028 } 1029 } 1030 #endif 1031 txq_ctrl->bf_reg = qp.bf.reg; 1032 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { 1033 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; 1034 DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".", 1035 dev->data->port_id, txq_ctrl->uar_mmap_offset); 1036 } else { 1037 DRV_LOG(ERR, 1038 "Port %u failed to retrieve UAR info, invalid" 1039 " libmlx5.so", 1040 dev->data->port_id); 1041 rte_errno = EINVAL; 1042 goto error; 1043 } 1044 txq_uar_init(txq_ctrl); 1045 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 1046 return 0; 1047 error: 1048 ret = rte_errno; /* Save rte_errno before cleanup. */ 1049 if (txq_obj->cq) 1050 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 1051 if (txq_obj->qp) 1052 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 1053 rte_errno = ret; /* Restore rte_errno. */ 1054 return -rte_errno; 1055 } 1056 1057 /** 1058 * Release an Tx verbs queue object. 1059 * 1060 * @param txq_obj 1061 * Verbs Tx queue object.. 1062 */ 1063 void 1064 mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj) 1065 { 1066 MLX5_ASSERT(txq_obj); 1067 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); 1068 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); 1069 } 1070 1071 struct mlx5_obj_ops ibv_obj_ops = { 1072 .rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip, 1073 .rxq_obj_new = mlx5_rxq_ibv_obj_new, 1074 .rxq_event_get = mlx5_rx_ibv_get_event, 1075 .rxq_obj_modify = mlx5_ibv_modify_wq, 1076 .rxq_obj_release = mlx5_rxq_ibv_obj_release, 1077 .ind_table_new = mlx5_ibv_ind_table_new, 1078 .ind_table_destroy = mlx5_ibv_ind_table_destroy, 1079 .hrxq_new = mlx5_ibv_hrxq_new, 1080 .hrxq_destroy = mlx5_ibv_qp_destroy, 1081 .drop_action_create = mlx5_ibv_drop_action_create, 1082 .drop_action_destroy = mlx5_ibv_drop_action_destroy, 1083 .txq_obj_new = mlx5_txq_ibv_obj_new, 1084 .txq_obj_modify = mlx5_ibv_modify_qp, 1085 .txq_obj_release = mlx5_txq_ibv_obj_release, 1086 }; 1087