1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <fcntl.h> 11 #include <sys/queue.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_common.h> 17 #include <rte_interrupts.h> 18 #include <rte_debug.h> 19 #include <rte_io.h> 20 #include <rte_eal_paging.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_devx_cmds.h> 24 #include <mlx5_malloc.h> 25 26 #include "mlx5_defs.h" 27 #include "mlx5.h" 28 #include "mlx5_common_os.h" 29 #include "mlx5_rxtx.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_autoconf.h" 32 #include "mlx5_flow.h" 33 34 35 /* Default RSS hash key also used for ConnectX-3. */ 36 uint8_t rss_hash_default_key[] = { 37 0x2c, 0xc6, 0x81, 0xd1, 38 0x5b, 0xdb, 0xf4, 0xf7, 39 0xfc, 0xa2, 0x83, 0x19, 40 0xdb, 0x1a, 0x3e, 0x94, 41 0x6b, 0x9e, 0x38, 0xd9, 42 0x2c, 0x9c, 0x03, 0xd1, 43 0xad, 0x99, 0x44, 0xa7, 44 0xd9, 0x56, 0x3d, 0x59, 45 0x06, 0x3c, 0x25, 0xf3, 46 0xfc, 0x1f, 0xdc, 0x2a, 47 }; 48 49 /* Length of the default RSS hash key. */ 50 static_assert(MLX5_RSS_HASH_KEY_LEN == 51 (unsigned int)sizeof(rss_hash_default_key), 52 "wrong RSS default key size."); 53 54 /** 55 * Check whether Multi-Packet RQ can be enabled for the device. 56 * 57 * @param dev 58 * Pointer to Ethernet device. 59 * 60 * @return 61 * 1 if supported, negative errno value if not. 62 */ 63 inline int 64 mlx5_check_mprq_support(struct rte_eth_dev *dev) 65 { 66 struct mlx5_priv *priv = dev->data->dev_private; 67 68 if (priv->config.mprq.enabled && 69 priv->rxqs_n >= priv->config.mprq.min_rxqs_num) 70 return 1; 71 return -ENOTSUP; 72 } 73 74 /** 75 * Check whether Multi-Packet RQ is enabled for the Rx queue. 76 * 77 * @param rxq 78 * Pointer to receive queue structure. 79 * 80 * @return 81 * 0 if disabled, otherwise enabled. 82 */ 83 inline int 84 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) 85 { 86 return rxq->strd_num_n > 0; 87 } 88 89 /** 90 * Check whether Multi-Packet RQ is enabled for the device. 91 * 92 * @param dev 93 * Pointer to Ethernet device. 94 * 95 * @return 96 * 0 if disabled, otherwise enabled. 97 */ 98 inline int 99 mlx5_mprq_enabled(struct rte_eth_dev *dev) 100 { 101 struct mlx5_priv *priv = dev->data->dev_private; 102 uint32_t i; 103 uint16_t n = 0; 104 uint16_t n_ibv = 0; 105 106 if (mlx5_check_mprq_support(dev) < 0) 107 return 0; 108 /* All the configured queues should be enabled. */ 109 for (i = 0; i < priv->rxqs_n; ++i) { 110 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 111 struct mlx5_rxq_ctrl *rxq_ctrl = container_of 112 (rxq, struct mlx5_rxq_ctrl, rxq); 113 114 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD) 115 continue; 116 n_ibv++; 117 if (mlx5_rxq_mprq_enabled(rxq)) 118 ++n; 119 } 120 /* Multi-Packet RQ can't be partially configured. */ 121 MLX5_ASSERT(n == 0 || n == n_ibv); 122 return n == n_ibv; 123 } 124 125 /** 126 * Allocate RX queue elements for Multi-Packet RQ. 127 * 128 * @param rxq_ctrl 129 * Pointer to RX queue structure. 130 * 131 * @return 132 * 0 on success, a negative errno value otherwise and rte_errno is set. 133 */ 134 static int 135 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 136 { 137 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 138 unsigned int wqe_n = 1 << rxq->elts_n; 139 unsigned int i; 140 int err; 141 142 /* Iterate on segments. */ 143 for (i = 0; i <= wqe_n; ++i) { 144 struct mlx5_mprq_buf *buf; 145 146 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 147 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 148 rte_errno = ENOMEM; 149 goto error; 150 } 151 if (i < wqe_n) 152 (*rxq->mprq_bufs)[i] = buf; 153 else 154 rxq->mprq_repl = buf; 155 } 156 DRV_LOG(DEBUG, 157 "port %u Rx queue %u allocated and configured %u segments", 158 rxq->port_id, rxq->idx, wqe_n); 159 return 0; 160 error: 161 err = rte_errno; /* Save rte_errno before cleanup. */ 162 wqe_n = i; 163 for (i = 0; (i != wqe_n); ++i) { 164 if ((*rxq->mprq_bufs)[i] != NULL) 165 rte_mempool_put(rxq->mprq_mp, 166 (*rxq->mprq_bufs)[i]); 167 (*rxq->mprq_bufs)[i] = NULL; 168 } 169 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 170 rxq->port_id, rxq->idx); 171 rte_errno = err; /* Restore rte_errno. */ 172 return -rte_errno; 173 } 174 175 /** 176 * Allocate RX queue elements for Single-Packet RQ. 177 * 178 * @param rxq_ctrl 179 * Pointer to RX queue structure. 180 * 181 * @return 182 * 0 on success, errno value on failure. 183 */ 184 static int 185 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 186 { 187 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 188 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 189 unsigned int i; 190 int err; 191 192 /* Iterate on segments. */ 193 for (i = 0; (i != elts_n); ++i) { 194 struct rte_mbuf *buf; 195 196 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 197 if (buf == NULL) { 198 DRV_LOG(ERR, "port %u empty mbuf pool", 199 PORT_ID(rxq_ctrl->priv)); 200 rte_errno = ENOMEM; 201 goto error; 202 } 203 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 204 MLX5_ASSERT(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 205 /* Buffer is supposed to be empty. */ 206 MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0); 207 MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0); 208 MLX5_ASSERT(!buf->next); 209 /* Only the first segment keeps headroom. */ 210 if (i % sges_n) 211 SET_DATA_OFF(buf, 0); 212 PORT(buf) = rxq_ctrl->rxq.port_id; 213 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 214 PKT_LEN(buf) = DATA_LEN(buf); 215 NB_SEGS(buf) = 1; 216 (*rxq_ctrl->rxq.elts)[i] = buf; 217 } 218 /* If Rx vector is activated. */ 219 if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { 220 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 221 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 222 struct rte_pktmbuf_pool_private *priv = 223 (struct rte_pktmbuf_pool_private *) 224 rte_mempool_get_priv(rxq_ctrl->rxq.mp); 225 int j; 226 227 /* Initialize default rearm_data for vPMD. */ 228 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 229 rte_mbuf_refcnt_set(mbuf_init, 1); 230 mbuf_init->nb_segs = 1; 231 mbuf_init->port = rxq->port_id; 232 if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) 233 mbuf_init->ol_flags = EXT_ATTACHED_MBUF; 234 /* 235 * prevent compiler reordering: 236 * rearm_data covers previous fields. 237 */ 238 rte_compiler_barrier(); 239 rxq->mbuf_initializer = 240 *(rte_xmm_t *)&mbuf_init->rearm_data; 241 /* Padding with a fake mbuf for vectorized Rx. */ 242 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 243 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 244 } 245 DRV_LOG(DEBUG, 246 "port %u Rx queue %u allocated and configured %u segments" 247 " (max %u packets)", 248 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n, 249 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 250 return 0; 251 error: 252 err = rte_errno; /* Save rte_errno before cleanup. */ 253 elts_n = i; 254 for (i = 0; (i != elts_n); ++i) { 255 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 256 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 257 (*rxq_ctrl->rxq.elts)[i] = NULL; 258 } 259 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 260 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx); 261 rte_errno = err; /* Restore rte_errno. */ 262 return -rte_errno; 263 } 264 265 /** 266 * Allocate RX queue elements. 267 * 268 * @param rxq_ctrl 269 * Pointer to RX queue structure. 270 * 271 * @return 272 * 0 on success, errno value on failure. 273 */ 274 int 275 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 276 { 277 return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 278 rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl); 279 } 280 281 /** 282 * Free RX queue elements for Multi-Packet RQ. 283 * 284 * @param rxq_ctrl 285 * Pointer to RX queue structure. 286 */ 287 static void 288 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 289 { 290 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 291 uint16_t i; 292 293 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs", 294 rxq->port_id, rxq->idx); 295 if (rxq->mprq_bufs == NULL) 296 return; 297 MLX5_ASSERT(mlx5_rxq_check_vec_support(rxq) < 0); 298 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 299 if ((*rxq->mprq_bufs)[i] != NULL) 300 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 301 (*rxq->mprq_bufs)[i] = NULL; 302 } 303 if (rxq->mprq_repl != NULL) { 304 mlx5_mprq_buf_free(rxq->mprq_repl); 305 rxq->mprq_repl = NULL; 306 } 307 } 308 309 /** 310 * Free RX queue elements for Single-Packet RQ. 311 * 312 * @param rxq_ctrl 313 * Pointer to RX queue structure. 314 */ 315 static void 316 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 317 { 318 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 319 const uint16_t q_n = (1 << rxq->elts_n); 320 const uint16_t q_mask = q_n - 1; 321 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); 322 uint16_t i; 323 324 DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs", 325 PORT_ID(rxq_ctrl->priv), rxq->idx); 326 if (rxq->elts == NULL) 327 return; 328 /** 329 * Some mbuf in the Ring belongs to the application. They cannot be 330 * freed. 331 */ 332 if (mlx5_rxq_check_vec_support(rxq) > 0) { 333 for (i = 0; i < used; ++i) 334 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; 335 rxq->rq_pi = rxq->rq_ci; 336 } 337 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 338 if ((*rxq->elts)[i] != NULL) 339 rte_pktmbuf_free_seg((*rxq->elts)[i]); 340 (*rxq->elts)[i] = NULL; 341 } 342 } 343 344 /** 345 * Free RX queue elements. 346 * 347 * @param rxq_ctrl 348 * Pointer to RX queue structure. 349 */ 350 static void 351 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 352 { 353 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 354 rxq_free_elts_mprq(rxq_ctrl); 355 else 356 rxq_free_elts_sprq(rxq_ctrl); 357 } 358 359 /** 360 * Returns the per-queue supported offloads. 361 * 362 * @param dev 363 * Pointer to Ethernet device. 364 * 365 * @return 366 * Supported Rx offloads. 367 */ 368 uint64_t 369 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 370 { 371 struct mlx5_priv *priv = dev->data->dev_private; 372 struct mlx5_dev_config *config = &priv->config; 373 uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | 374 DEV_RX_OFFLOAD_TIMESTAMP | 375 DEV_RX_OFFLOAD_JUMBO_FRAME | 376 DEV_RX_OFFLOAD_RSS_HASH); 377 378 if (config->hw_fcs_strip) 379 offloads |= DEV_RX_OFFLOAD_KEEP_CRC; 380 381 if (config->hw_csum) 382 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | 383 DEV_RX_OFFLOAD_UDP_CKSUM | 384 DEV_RX_OFFLOAD_TCP_CKSUM); 385 if (config->hw_vlan_strip) 386 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 387 if (MLX5_LRO_SUPPORTED(dev)) 388 offloads |= DEV_RX_OFFLOAD_TCP_LRO; 389 return offloads; 390 } 391 392 393 /** 394 * Returns the per-port supported offloads. 395 * 396 * @return 397 * Supported Rx offloads. 398 */ 399 uint64_t 400 mlx5_get_rx_port_offloads(void) 401 { 402 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 403 404 return offloads; 405 } 406 407 /** 408 * Verify if the queue can be released. 409 * 410 * @param dev 411 * Pointer to Ethernet device. 412 * @param idx 413 * RX queue index. 414 * 415 * @return 416 * 1 if the queue can be released 417 * 0 if the queue can not be released, there are references to it. 418 * Negative errno and rte_errno is set if queue doesn't exist. 419 */ 420 static int 421 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 422 { 423 struct mlx5_priv *priv = dev->data->dev_private; 424 struct mlx5_rxq_ctrl *rxq_ctrl; 425 426 if (!(*priv->rxqs)[idx]) { 427 rte_errno = EINVAL; 428 return -rte_errno; 429 } 430 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 431 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); 432 } 433 434 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 435 static void 436 rxq_sync_cq(struct mlx5_rxq_data *rxq) 437 { 438 const uint16_t cqe_n = 1 << rxq->cqe_n; 439 const uint16_t cqe_mask = cqe_n - 1; 440 volatile struct mlx5_cqe *cqe; 441 int ret, i; 442 443 i = cqe_n; 444 do { 445 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 446 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 447 if (ret == MLX5_CQE_STATUS_HW_OWN) 448 break; 449 if (ret == MLX5_CQE_STATUS_ERR) { 450 rxq->cq_ci++; 451 continue; 452 } 453 MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN); 454 if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) { 455 rxq->cq_ci++; 456 continue; 457 } 458 /* Compute the next non compressed CQE. */ 459 rxq->cq_ci += rte_be_to_cpu_32(cqe->byte_cnt); 460 461 } while (--i); 462 /* Move all CQEs to HW ownership, including possible MiniCQEs. */ 463 for (i = 0; i < cqe_n; i++) { 464 cqe = &(*rxq->cqes)[i]; 465 cqe->op_own = MLX5_CQE_INVALIDATE; 466 } 467 /* Resync CQE and WQE (WQ in RESET state). */ 468 rte_cio_wmb(); 469 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 470 rte_cio_wmb(); 471 *rxq->rq_db = rte_cpu_to_be_32(0); 472 rte_cio_wmb(); 473 } 474 475 /** 476 * Rx queue stop. Device queue goes to the RESET state, 477 * all involved mbufs are freed from WQ. 478 * 479 * @param dev 480 * Pointer to Ethernet device structure. 481 * @param idx 482 * RX queue index. 483 * 484 * @return 485 * 0 on success, a negative errno value otherwise and rte_errno is set. 486 */ 487 int 488 mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 489 { 490 struct mlx5_priv *priv = dev->data->dev_private; 491 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 492 struct mlx5_rxq_ctrl *rxq_ctrl = 493 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 494 int ret; 495 496 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 497 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 498 struct ibv_wq_attr mod = { 499 .attr_mask = IBV_WQ_ATTR_STATE, 500 .wq_state = IBV_WQS_RESET, 501 }; 502 503 ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); 504 } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ 505 struct mlx5_devx_modify_rq_attr rq_attr; 506 507 memset(&rq_attr, 0, sizeof(rq_attr)); 508 rq_attr.rq_state = MLX5_RQC_STATE_RST; 509 rq_attr.state = MLX5_RQC_STATE_RDY; 510 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 511 } 512 if (ret) { 513 DRV_LOG(ERR, "Cannot change Rx WQ state to RESET: %s", 514 strerror(errno)); 515 rte_errno = errno; 516 return ret; 517 } 518 /* Remove all processes CQEs. */ 519 rxq_sync_cq(rxq); 520 /* Free all involved mbufs. */ 521 rxq_free_elts(rxq_ctrl); 522 /* Set the actual queue state. */ 523 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 524 return 0; 525 } 526 527 /** 528 * Rx queue stop. Device queue goes to the RESET state, 529 * all involved mbufs are freed from WQ. 530 * 531 * @param dev 532 * Pointer to Ethernet device structure. 533 * @param idx 534 * RX queue index. 535 * 536 * @return 537 * 0 on success, a negative errno value otherwise and rte_errno is set. 538 */ 539 int 540 mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 541 { 542 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 543 int ret; 544 545 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_HAIRPIN) { 546 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 547 rte_errno = EINVAL; 548 return -EINVAL; 549 } 550 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 551 return 0; 552 /* 553 * Vectorized Rx burst requires the CQ and RQ indices 554 * synchronized, that might be broken on RQ restart 555 * and cause Rx malfunction, so queue stopping is 556 * not supported if vectorized Rx burst is engaged. 557 * The routine pointer depends on the process 558 * type, should perform check there. 559 */ 560 if (pkt_burst == mlx5_rx_burst) { 561 DRV_LOG(ERR, "Rx queue stop is not supported " 562 "for vectorized Rx"); 563 rte_errno = EINVAL; 564 return -EINVAL; 565 } 566 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 567 ret = mlx5_mp_os_req_queue_control(dev, idx, 568 MLX5_MP_REQ_QUEUE_RX_STOP); 569 } else { 570 ret = mlx5_rx_queue_stop_primary(dev, idx); 571 } 572 return ret; 573 } 574 575 /** 576 * Rx queue start. Device queue goes to the ready state, 577 * all required mbufs are allocated and WQ is replenished. 578 * 579 * @param dev 580 * Pointer to Ethernet device structure. 581 * @param idx 582 * RX queue index. 583 * 584 * @return 585 * 0 on success, a negative errno value otherwise and rte_errno is set. 586 */ 587 int 588 mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 589 { 590 struct mlx5_priv *priv = dev->data->dev_private; 591 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 592 struct mlx5_rxq_ctrl *rxq_ctrl = 593 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 594 int ret; 595 596 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 597 /* Allocate needed buffers. */ 598 ret = rxq_alloc_elts(rxq_ctrl); 599 if (ret) { 600 DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ"); 601 rte_errno = errno; 602 return ret; 603 } 604 rte_cio_wmb(); 605 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 606 rte_cio_wmb(); 607 /* Reset RQ consumer before moving queue ro READY state. */ 608 *rxq->rq_db = rte_cpu_to_be_32(0); 609 rte_cio_wmb(); 610 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 611 struct ibv_wq_attr mod = { 612 .attr_mask = IBV_WQ_ATTR_STATE, 613 .wq_state = IBV_WQS_RDY, 614 }; 615 616 ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); 617 } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ 618 struct mlx5_devx_modify_rq_attr rq_attr; 619 620 memset(&rq_attr, 0, sizeof(rq_attr)); 621 rq_attr.rq_state = MLX5_RQC_STATE_RDY; 622 rq_attr.state = MLX5_RQC_STATE_RST; 623 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 624 } 625 if (ret) { 626 DRV_LOG(ERR, "Cannot change Rx WQ state to READY: %s", 627 strerror(errno)); 628 rte_errno = errno; 629 return ret; 630 } 631 /* Reinitialize RQ - set WQEs. */ 632 mlx5_rxq_initialize(rxq); 633 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 634 /* Set actual queue state. */ 635 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 636 return 0; 637 } 638 639 /** 640 * Rx queue start. Device queue goes to the ready state, 641 * all required mbufs are allocated and WQ is replenished. 642 * 643 * @param dev 644 * Pointer to Ethernet device structure. 645 * @param idx 646 * RX queue index. 647 * 648 * @return 649 * 0 on success, a negative errno value otherwise and rte_errno is set. 650 */ 651 int 652 mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 653 { 654 int ret; 655 656 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_HAIRPIN) { 657 DRV_LOG(ERR, "Hairpin queue can't be started"); 658 rte_errno = EINVAL; 659 return -EINVAL; 660 } 661 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 662 return 0; 663 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 664 ret = mlx5_mp_os_req_queue_control(dev, idx, 665 MLX5_MP_REQ_QUEUE_RX_START); 666 } else { 667 ret = mlx5_rx_queue_start_primary(dev, idx); 668 } 669 return ret; 670 } 671 672 /** 673 * Rx queue presetup checks. 674 * 675 * @param dev 676 * Pointer to Ethernet device structure. 677 * @param idx 678 * RX queue index. 679 * @param desc 680 * Number of descriptors to configure in queue. 681 * 682 * @return 683 * 0 on success, a negative errno value otherwise and rte_errno is set. 684 */ 685 static int 686 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc) 687 { 688 struct mlx5_priv *priv = dev->data->dev_private; 689 690 if (!rte_is_power_of_2(*desc)) { 691 *desc = 1 << log2above(*desc); 692 DRV_LOG(WARNING, 693 "port %u increased number of descriptors in Rx queue %u" 694 " to the next power of two (%d)", 695 dev->data->port_id, idx, *desc); 696 } 697 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 698 dev->data->port_id, idx, *desc); 699 if (idx >= priv->rxqs_n) { 700 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 701 dev->data->port_id, idx, priv->rxqs_n); 702 rte_errno = EOVERFLOW; 703 return -rte_errno; 704 } 705 if (!mlx5_rxq_releasable(dev, idx)) { 706 DRV_LOG(ERR, "port %u unable to release queue index %u", 707 dev->data->port_id, idx); 708 rte_errno = EBUSY; 709 return -rte_errno; 710 } 711 mlx5_rxq_release(dev, idx); 712 return 0; 713 } 714 715 /** 716 * 717 * @param dev 718 * Pointer to Ethernet device structure. 719 * @param idx 720 * RX queue index. 721 * @param desc 722 * Number of descriptors to configure in queue. 723 * @param socket 724 * NUMA socket on which memory must be allocated. 725 * @param[in] conf 726 * Thresholds parameters. 727 * @param mp 728 * Memory pool for buffer allocations. 729 * 730 * @return 731 * 0 on success, a negative errno value otherwise and rte_errno is set. 732 */ 733 int 734 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 735 unsigned int socket, const struct rte_eth_rxconf *conf, 736 struct rte_mempool *mp) 737 { 738 struct mlx5_priv *priv = dev->data->dev_private; 739 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 740 struct mlx5_rxq_ctrl *rxq_ctrl = 741 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 742 int res; 743 744 res = mlx5_rx_queue_pre_setup(dev, idx, &desc); 745 if (res) 746 return res; 747 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp); 748 if (!rxq_ctrl) { 749 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 750 dev->data->port_id, idx); 751 rte_errno = ENOMEM; 752 return -rte_errno; 753 } 754 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 755 dev->data->port_id, idx); 756 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 757 return 0; 758 } 759 760 /** 761 * 762 * @param dev 763 * Pointer to Ethernet device structure. 764 * @param idx 765 * RX queue index. 766 * @param desc 767 * Number of descriptors to configure in queue. 768 * @param hairpin_conf 769 * Hairpin configuration parameters. 770 * 771 * @return 772 * 0 on success, a negative errno value otherwise and rte_errno is set. 773 */ 774 int 775 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 776 uint16_t desc, 777 const struct rte_eth_hairpin_conf *hairpin_conf) 778 { 779 struct mlx5_priv *priv = dev->data->dev_private; 780 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 781 struct mlx5_rxq_ctrl *rxq_ctrl = 782 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 783 int res; 784 785 res = mlx5_rx_queue_pre_setup(dev, idx, &desc); 786 if (res) 787 return res; 788 if (hairpin_conf->peer_count != 1 || 789 hairpin_conf->peers[0].port != dev->data->port_id || 790 hairpin_conf->peers[0].queue >= priv->txqs_n) { 791 DRV_LOG(ERR, "port %u unable to setup hairpin queue index %u " 792 " invalid hairpind configuration", dev->data->port_id, 793 idx); 794 rte_errno = EINVAL; 795 return -rte_errno; 796 } 797 rxq_ctrl = mlx5_rxq_hairpin_new(dev, idx, desc, hairpin_conf); 798 if (!rxq_ctrl) { 799 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 800 dev->data->port_id, idx); 801 rte_errno = ENOMEM; 802 return -rte_errno; 803 } 804 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 805 dev->data->port_id, idx); 806 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 807 return 0; 808 } 809 810 /** 811 * DPDK callback to release a RX queue. 812 * 813 * @param dpdk_rxq 814 * Generic RX queue pointer. 815 */ 816 void 817 mlx5_rx_queue_release(void *dpdk_rxq) 818 { 819 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq; 820 struct mlx5_rxq_ctrl *rxq_ctrl; 821 struct mlx5_priv *priv; 822 823 if (rxq == NULL) 824 return; 825 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); 826 priv = rxq_ctrl->priv; 827 if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx)) 828 rte_panic("port %u Rx queue %u is still used by a flow and" 829 " cannot be removed\n", 830 PORT_ID(priv), rxq->idx); 831 mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx); 832 } 833 834 /** 835 * Get an Rx queue Verbs/DevX object. 836 * 837 * @param dev 838 * Pointer to Ethernet device. 839 * @param idx 840 * Queue index in DPDK Rx queue array 841 * 842 * @return 843 * The Verbs/DevX object if it exists. 844 */ 845 static struct mlx5_rxq_obj * 846 mlx5_rxq_obj_get(struct rte_eth_dev *dev, uint16_t idx) 847 { 848 struct mlx5_priv *priv = dev->data->dev_private; 849 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 850 struct mlx5_rxq_ctrl *rxq_ctrl; 851 852 if (idx >= priv->rxqs_n) 853 return NULL; 854 if (!rxq_data) 855 return NULL; 856 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 857 if (rxq_ctrl->obj) 858 rte_atomic32_inc(&rxq_ctrl->obj->refcnt); 859 return rxq_ctrl->obj; 860 } 861 862 /** 863 * Release the resources allocated for an RQ DevX object. 864 * 865 * @param rxq_ctrl 866 * DevX Rx queue object. 867 */ 868 static void 869 rxq_release_devx_rq_resources(struct mlx5_rxq_ctrl *rxq_ctrl) 870 { 871 if (rxq_ctrl->rxq.wqes) { 872 mlx5_free((void *)(uintptr_t)rxq_ctrl->rxq.wqes); 873 rxq_ctrl->rxq.wqes = NULL; 874 } 875 if (rxq_ctrl->wq_umem) { 876 mlx5_glue->devx_umem_dereg(rxq_ctrl->wq_umem); 877 rxq_ctrl->wq_umem = NULL; 878 } 879 } 880 881 /** 882 * Release the resources allocated for the Rx CQ DevX object. 883 * 884 * @param rxq_ctrl 885 * DevX Rx queue object. 886 */ 887 static void 888 rxq_release_devx_cq_resources(struct mlx5_rxq_ctrl *rxq_ctrl) 889 { 890 if (rxq_ctrl->rxq.cqes) { 891 rte_free((void *)(uintptr_t)rxq_ctrl->rxq.cqes); 892 rxq_ctrl->rxq.cqes = NULL; 893 } 894 if (rxq_ctrl->cq_umem) { 895 mlx5_glue->devx_umem_dereg(rxq_ctrl->cq_umem); 896 rxq_ctrl->cq_umem = NULL; 897 } 898 } 899 900 /** 901 * Release an Rx hairpin related resources. 902 * 903 * @param rxq_obj 904 * Hairpin Rx queue object. 905 */ 906 static void 907 rxq_obj_hairpin_release(struct mlx5_rxq_obj *rxq_obj) 908 { 909 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 910 911 MLX5_ASSERT(rxq_obj); 912 rq_attr.state = MLX5_RQC_STATE_RST; 913 rq_attr.rq_state = MLX5_RQC_STATE_RDY; 914 mlx5_devx_cmd_modify_rq(rxq_obj->rq, &rq_attr); 915 claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq)); 916 } 917 918 /** 919 * Release an Rx verbs/DevX queue object. 920 * 921 * @param rxq_obj 922 * Verbs/DevX Rx queue object. 923 * 924 * @return 925 * 1 while a reference on it exists, 0 when freed. 926 */ 927 static int 928 mlx5_rxq_obj_release(struct mlx5_rxq_obj *rxq_obj) 929 { 930 MLX5_ASSERT(rxq_obj); 931 if (rte_atomic32_dec_and_test(&rxq_obj->refcnt)) { 932 switch (rxq_obj->type) { 933 case MLX5_RXQ_OBJ_TYPE_IBV: 934 MLX5_ASSERT(rxq_obj->wq); 935 MLX5_ASSERT(rxq_obj->ibv_cq); 936 rxq_free_elts(rxq_obj->rxq_ctrl); 937 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 938 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq)); 939 if (rxq_obj->ibv_channel) 940 claim_zero(mlx5_glue->destroy_comp_channel 941 (rxq_obj->ibv_channel)); 942 break; 943 case MLX5_RXQ_OBJ_TYPE_DEVX_RQ: 944 MLX5_ASSERT(rxq_obj->rq); 945 MLX5_ASSERT(rxq_obj->devx_cq); 946 rxq_free_elts(rxq_obj->rxq_ctrl); 947 claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq)); 948 claim_zero(mlx5_devx_cmd_destroy(rxq_obj->devx_cq)); 949 if (rxq_obj->devx_channel) 950 mlx5_glue->devx_destroy_event_channel 951 (rxq_obj->devx_channel); 952 rxq_release_devx_rq_resources(rxq_obj->rxq_ctrl); 953 rxq_release_devx_cq_resources(rxq_obj->rxq_ctrl); 954 break; 955 case MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN: 956 MLX5_ASSERT(rxq_obj->rq); 957 rxq_obj_hairpin_release(rxq_obj); 958 break; 959 } 960 LIST_REMOVE(rxq_obj, next); 961 mlx5_free(rxq_obj); 962 return 0; 963 } 964 return 1; 965 } 966 967 /** 968 * Allocate queue vector and fill epoll fd list for Rx interrupts. 969 * 970 * @param dev 971 * Pointer to Ethernet device. 972 * 973 * @return 974 * 0 on success, a negative errno value otherwise and rte_errno is set. 975 */ 976 int 977 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 978 { 979 struct mlx5_priv *priv = dev->data->dev_private; 980 unsigned int i; 981 unsigned int rxqs_n = priv->rxqs_n; 982 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 983 unsigned int count = 0; 984 struct rte_intr_handle *intr_handle = dev->intr_handle; 985 986 if (!dev->data->dev_conf.intr_conf.rxq) 987 return 0; 988 mlx5_rx_intr_vec_disable(dev); 989 intr_handle->intr_vec = mlx5_malloc(0, 990 n * sizeof(intr_handle->intr_vec[0]), 991 0, SOCKET_ID_ANY); 992 if (intr_handle->intr_vec == NULL) { 993 DRV_LOG(ERR, 994 "port %u failed to allocate memory for interrupt" 995 " vector, Rx interrupts will not be supported", 996 dev->data->port_id); 997 rte_errno = ENOMEM; 998 return -rte_errno; 999 } 1000 intr_handle->type = RTE_INTR_HANDLE_EXT; 1001 for (i = 0; i != n; ++i) { 1002 /* This rxq obj must not be released in this function. */ 1003 struct mlx5_rxq_obj *rxq_obj = mlx5_rxq_obj_get(dev, i); 1004 int rc; 1005 1006 /* Skip queues that cannot request interrupts. */ 1007 if (!rxq_obj || (!rxq_obj->ibv_channel && 1008 !rxq_obj->devx_channel)) { 1009 /* Use invalid intr_vec[] index to disable entry. */ 1010 intr_handle->intr_vec[i] = 1011 RTE_INTR_VEC_RXTX_OFFSET + 1012 RTE_MAX_RXTX_INTR_VEC_ID; 1013 continue; 1014 } 1015 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 1016 DRV_LOG(ERR, 1017 "port %u too many Rx queues for interrupt" 1018 " vector size (%d), Rx interrupts cannot be" 1019 " enabled", 1020 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 1021 mlx5_rx_intr_vec_disable(dev); 1022 rte_errno = ENOMEM; 1023 return -rte_errno; 1024 } 1025 rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd); 1026 if (rc < 0) { 1027 rte_errno = errno; 1028 DRV_LOG(ERR, 1029 "port %u failed to make Rx interrupt file" 1030 " descriptor %d non-blocking for queue index" 1031 " %d", 1032 dev->data->port_id, rxq_obj->fd, i); 1033 mlx5_rx_intr_vec_disable(dev); 1034 return -rte_errno; 1035 } 1036 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; 1037 intr_handle->efds[count] = rxq_obj->fd; 1038 count++; 1039 } 1040 if (!count) 1041 mlx5_rx_intr_vec_disable(dev); 1042 else 1043 intr_handle->nb_efd = count; 1044 return 0; 1045 } 1046 1047 /** 1048 * Clean up Rx interrupts handler. 1049 * 1050 * @param dev 1051 * Pointer to Ethernet device. 1052 */ 1053 void 1054 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 1055 { 1056 struct mlx5_priv *priv = dev->data->dev_private; 1057 struct rte_intr_handle *intr_handle = dev->intr_handle; 1058 unsigned int i; 1059 unsigned int rxqs_n = priv->rxqs_n; 1060 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1061 1062 if (!dev->data->dev_conf.intr_conf.rxq) 1063 return; 1064 if (!intr_handle->intr_vec) 1065 goto free; 1066 for (i = 0; i != n; ++i) { 1067 struct mlx5_rxq_ctrl *rxq_ctrl; 1068 struct mlx5_rxq_data *rxq_data; 1069 1070 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + 1071 RTE_MAX_RXTX_INTR_VEC_ID) 1072 continue; 1073 /** 1074 * Need to access directly the queue to release the reference 1075 * kept in mlx5_rx_intr_vec_enable(). 1076 */ 1077 rxq_data = (*priv->rxqs)[i]; 1078 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1079 if (rxq_ctrl->obj) 1080 mlx5_rxq_obj_release(rxq_ctrl->obj); 1081 } 1082 free: 1083 rte_intr_free_epoll_fd(intr_handle); 1084 if (intr_handle->intr_vec) 1085 mlx5_free(intr_handle->intr_vec); 1086 intr_handle->nb_efd = 0; 1087 intr_handle->intr_vec = NULL; 1088 } 1089 1090 /** 1091 * MLX5 CQ notification . 1092 * 1093 * @param rxq 1094 * Pointer to receive queue structure. 1095 * @param sq_n_rxq 1096 * Sequence number per receive queue . 1097 */ 1098 static inline void 1099 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 1100 { 1101 int sq_n = 0; 1102 uint32_t doorbell_hi; 1103 uint64_t doorbell; 1104 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL; 1105 1106 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 1107 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 1108 doorbell = (uint64_t)doorbell_hi << 32; 1109 doorbell |= rxq->cqn; 1110 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); 1111 mlx5_uar_write64(rte_cpu_to_be_64(doorbell), 1112 cq_db_reg, rxq->uar_lock_cq); 1113 } 1114 1115 /** 1116 * DPDK callback for Rx queue interrupt enable. 1117 * 1118 * @param dev 1119 * Pointer to Ethernet device structure. 1120 * @param rx_queue_id 1121 * Rx queue number. 1122 * 1123 * @return 1124 * 0 on success, a negative errno value otherwise and rte_errno is set. 1125 */ 1126 int 1127 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1128 { 1129 struct mlx5_priv *priv = dev->data->dev_private; 1130 struct mlx5_rxq_data *rxq_data; 1131 struct mlx5_rxq_ctrl *rxq_ctrl; 1132 1133 rxq_data = (*priv->rxqs)[rx_queue_id]; 1134 if (!rxq_data) { 1135 rte_errno = EINVAL; 1136 return -rte_errno; 1137 } 1138 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1139 if (rxq_ctrl->irq) { 1140 struct mlx5_rxq_obj *rxq_obj; 1141 1142 rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id); 1143 if (!rxq_obj) { 1144 rte_errno = EINVAL; 1145 return -rte_errno; 1146 } 1147 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); 1148 mlx5_rxq_obj_release(rxq_obj); 1149 } 1150 return 0; 1151 } 1152 1153 /** 1154 * DPDK callback for Rx queue interrupt disable. 1155 * 1156 * @param dev 1157 * Pointer to Ethernet device structure. 1158 * @param rx_queue_id 1159 * Rx queue number. 1160 * 1161 * @return 1162 * 0 on success, a negative errno value otherwise and rte_errno is set. 1163 */ 1164 int 1165 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1166 { 1167 struct mlx5_priv *priv = dev->data->dev_private; 1168 struct mlx5_rxq_data *rxq_data; 1169 struct mlx5_rxq_ctrl *rxq_ctrl; 1170 struct mlx5_rxq_obj *rxq_obj = NULL; 1171 struct ibv_cq *ev_cq; 1172 void *ev_ctx; 1173 int ret; 1174 1175 rxq_data = (*priv->rxqs)[rx_queue_id]; 1176 if (!rxq_data) { 1177 rte_errno = EINVAL; 1178 return -rte_errno; 1179 } 1180 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1181 if (!rxq_ctrl->irq) 1182 return 0; 1183 rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id); 1184 if (!rxq_obj) { 1185 rte_errno = EINVAL; 1186 return -rte_errno; 1187 } 1188 if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 1189 ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel, &ev_cq, 1190 &ev_ctx); 1191 if (ret < 0 || ev_cq != rxq_obj->ibv_cq) 1192 goto exit; 1193 mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1); 1194 } else if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) { 1195 #ifdef HAVE_IBV_DEVX_EVENT 1196 union { 1197 struct mlx5dv_devx_async_event_hdr event_resp; 1198 uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) 1199 + 128]; 1200 } out; 1201 1202 ret = mlx5_glue->devx_get_event 1203 (rxq_obj->devx_channel, &out.event_resp, 1204 sizeof(out.buf)); 1205 if (ret < 0 || out.event_resp.cookie != 1206 (uint64_t)(uintptr_t)rxq_obj->devx_cq) 1207 goto exit; 1208 #endif /* HAVE_IBV_DEVX_EVENT */ 1209 } 1210 rxq_data->cq_arm_sn++; 1211 mlx5_rxq_obj_release(rxq_obj); 1212 return 0; 1213 exit: 1214 /** 1215 * For ret < 0 save the errno (may be EAGAIN which means the get_event 1216 * function was called before receiving one). 1217 */ 1218 if (ret < 0) 1219 rte_errno = errno; 1220 else 1221 rte_errno = EINVAL; 1222 ret = rte_errno; /* Save rte_errno before cleanup. */ 1223 if (rxq_obj) 1224 mlx5_rxq_obj_release(rxq_obj); 1225 if (ret != EAGAIN) 1226 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 1227 dev->data->port_id, rx_queue_id); 1228 rte_errno = ret; /* Restore rte_errno. */ 1229 return -rte_errno; 1230 } 1231 1232 /** 1233 * Create a CQ Verbs object. 1234 * 1235 * @param dev 1236 * Pointer to Ethernet device. 1237 * @param priv 1238 * Pointer to device private data. 1239 * @param rxq_data 1240 * Pointer to Rx queue data. 1241 * @param cqe_n 1242 * Number of CQEs in CQ. 1243 * @param rxq_obj 1244 * Pointer to Rx queue object data. 1245 * 1246 * @return 1247 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1248 */ 1249 static struct ibv_cq * 1250 mlx5_ibv_cq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv, 1251 struct mlx5_rxq_data *rxq_data, 1252 unsigned int cqe_n, struct mlx5_rxq_obj *rxq_obj) 1253 { 1254 struct { 1255 struct ibv_cq_init_attr_ex ibv; 1256 struct mlx5dv_cq_init_attr mlx5; 1257 } cq_attr; 1258 1259 cq_attr.ibv = (struct ibv_cq_init_attr_ex){ 1260 .cqe = cqe_n, 1261 .channel = rxq_obj->ibv_channel, 1262 .comp_mask = 0, 1263 }; 1264 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){ 1265 .comp_mask = 0, 1266 }; 1267 if (priv->config.cqe_comp && !rxq_data->hw_timestamp && 1268 !rxq_data->lro) { 1269 cq_attr.mlx5.comp_mask |= 1270 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 1271 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 1272 cq_attr.mlx5.cqe_comp_res_format = 1273 mlx5_rxq_mprq_enabled(rxq_data) ? 1274 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 1275 MLX5DV_CQE_RES_FORMAT_HASH; 1276 #else 1277 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 1278 #endif 1279 /* 1280 * For vectorized Rx, it must not be doubled in order to 1281 * make cq_ci and rq_ci aligned. 1282 */ 1283 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 1284 cq_attr.ibv.cqe *= 2; 1285 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) { 1286 DRV_LOG(DEBUG, 1287 "port %u Rx CQE compression is disabled for HW" 1288 " timestamp", 1289 dev->data->port_id); 1290 } else if (priv->config.cqe_comp && rxq_data->lro) { 1291 DRV_LOG(DEBUG, 1292 "port %u Rx CQE compression is disabled for LRO", 1293 dev->data->port_id); 1294 } 1295 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 1296 if (priv->config.cqe_pad) { 1297 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; 1298 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 1299 } 1300 #endif 1301 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx, 1302 &cq_attr.ibv, 1303 &cq_attr.mlx5)); 1304 } 1305 1306 /** 1307 * Create a WQ Verbs object. 1308 * 1309 * @param dev 1310 * Pointer to Ethernet device. 1311 * @param priv 1312 * Pointer to device private data. 1313 * @param rxq_data 1314 * Pointer to Rx queue data. 1315 * @param idx 1316 * Queue index in DPDK Rx queue array 1317 * @param wqe_n 1318 * Number of WQEs in WQ. 1319 * @param rxq_obj 1320 * Pointer to Rx queue object data. 1321 * 1322 * @return 1323 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1324 */ 1325 static struct ibv_wq * 1326 mlx5_ibv_wq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv, 1327 struct mlx5_rxq_data *rxq_data, uint16_t idx, 1328 unsigned int wqe_n, struct mlx5_rxq_obj *rxq_obj) 1329 { 1330 struct { 1331 struct ibv_wq_init_attr ibv; 1332 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 1333 struct mlx5dv_wq_init_attr mlx5; 1334 #endif 1335 } wq_attr; 1336 1337 wq_attr.ibv = (struct ibv_wq_init_attr){ 1338 .wq_context = NULL, /* Could be useful in the future. */ 1339 .wq_type = IBV_WQT_RQ, 1340 /* Max number of outstanding WRs. */ 1341 .max_wr = wqe_n >> rxq_data->sges_n, 1342 /* Max number of scatter/gather elements in a WR. */ 1343 .max_sge = 1 << rxq_data->sges_n, 1344 .pd = priv->sh->pd, 1345 .cq = rxq_obj->ibv_cq, 1346 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0, 1347 .create_flags = (rxq_data->vlan_strip ? 1348 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0), 1349 }; 1350 /* By default, FCS (CRC) is stripped by hardware. */ 1351 if (rxq_data->crc_present) { 1352 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 1353 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 1354 } 1355 if (priv->config.hw_padding) { 1356 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 1357 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 1358 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 1359 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 1360 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; 1361 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 1362 #endif 1363 } 1364 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 1365 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){ 1366 .comp_mask = 0, 1367 }; 1368 if (mlx5_rxq_mprq_enabled(rxq_data)) { 1369 struct mlx5dv_striding_rq_init_attr *mprq_attr = 1370 &wq_attr.mlx5.striding_rq_attrs; 1371 1372 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 1373 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 1374 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 1375 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 1376 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 1377 }; 1378 } 1379 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv, 1380 &wq_attr.mlx5); 1381 #else 1382 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv); 1383 #endif 1384 if (rxq_obj->wq) { 1385 /* 1386 * Make sure number of WRs*SGEs match expectations since a queue 1387 * cannot allocate more than "desc" buffers. 1388 */ 1389 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 1390 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) { 1391 DRV_LOG(ERR, 1392 "port %u Rx queue %u requested %u*%u but got" 1393 " %u*%u WRs*SGEs", 1394 dev->data->port_id, idx, 1395 wqe_n >> rxq_data->sges_n, 1396 (1 << rxq_data->sges_n), 1397 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge); 1398 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 1399 rxq_obj->wq = NULL; 1400 rte_errno = EINVAL; 1401 } 1402 } 1403 return rxq_obj->wq; 1404 } 1405 1406 /** 1407 * Fill common fields of create RQ attributes structure. 1408 * 1409 * @param rxq_data 1410 * Pointer to Rx queue data. 1411 * @param cqn 1412 * CQ number to use with this RQ. 1413 * @param rq_attr 1414 * RQ attributes structure to fill.. 1415 */ 1416 static void 1417 mlx5_devx_create_rq_attr_fill(struct mlx5_rxq_data *rxq_data, uint32_t cqn, 1418 struct mlx5_devx_create_rq_attr *rq_attr) 1419 { 1420 rq_attr->state = MLX5_RQC_STATE_RST; 1421 rq_attr->vsd = (rxq_data->vlan_strip) ? 0 : 1; 1422 rq_attr->cqn = cqn; 1423 rq_attr->scatter_fcs = (rxq_data->crc_present) ? 1 : 0; 1424 } 1425 1426 /** 1427 * Fill common fields of DevX WQ attributes structure. 1428 * 1429 * @param priv 1430 * Pointer to device private data. 1431 * @param rxq_ctrl 1432 * Pointer to Rx queue control structure. 1433 * @param wq_attr 1434 * WQ attributes structure to fill.. 1435 */ 1436 static void 1437 mlx5_devx_wq_attr_fill(struct mlx5_priv *priv, struct mlx5_rxq_ctrl *rxq_ctrl, 1438 struct mlx5_devx_wq_attr *wq_attr) 1439 { 1440 wq_attr->end_padding_mode = priv->config.cqe_pad ? 1441 MLX5_WQ_END_PAD_MODE_ALIGN : 1442 MLX5_WQ_END_PAD_MODE_NONE; 1443 wq_attr->pd = priv->sh->pdn; 1444 wq_attr->dbr_addr = rxq_ctrl->rq_dbr_offset; 1445 wq_attr->dbr_umem_id = rxq_ctrl->rq_dbr_umem_id; 1446 wq_attr->dbr_umem_valid = 1; 1447 wq_attr->wq_umem_id = rxq_ctrl->wq_umem->umem_id; 1448 wq_attr->wq_umem_valid = 1; 1449 } 1450 1451 /** 1452 * Create a RQ object using DevX. 1453 * 1454 * @param dev 1455 * Pointer to Ethernet device. 1456 * @param idx 1457 * Queue index in DPDK Rx queue array 1458 * @param cqn 1459 * CQ number to use with this RQ. 1460 * 1461 * @return 1462 * The DevX object initialised, NULL otherwise and rte_errno is set. 1463 */ 1464 static struct mlx5_devx_obj * 1465 mlx5_devx_rq_new(struct rte_eth_dev *dev, uint16_t idx, uint32_t cqn) 1466 { 1467 struct mlx5_priv *priv = dev->data->dev_private; 1468 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1469 struct mlx5_rxq_ctrl *rxq_ctrl = 1470 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1471 struct mlx5_devx_create_rq_attr rq_attr = { 0 }; 1472 uint32_t wqe_n = 1 << (rxq_data->elts_n - rxq_data->sges_n); 1473 uint32_t wq_size = 0; 1474 uint32_t wqe_size = 0; 1475 uint32_t log_wqe_size = 0; 1476 void *buf = NULL; 1477 struct mlx5_devx_obj *rq; 1478 1479 /* Fill RQ attributes. */ 1480 rq_attr.mem_rq_type = MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE; 1481 rq_attr.flush_in_error_en = 1; 1482 mlx5_devx_create_rq_attr_fill(rxq_data, cqn, &rq_attr); 1483 /* Fill WQ attributes for this RQ. */ 1484 if (mlx5_rxq_mprq_enabled(rxq_data)) { 1485 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ; 1486 /* 1487 * Number of strides in each WQE: 1488 * 512*2^single_wqe_log_num_of_strides. 1489 */ 1490 rq_attr.wq_attr.single_wqe_log_num_of_strides = 1491 rxq_data->strd_num_n - 1492 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; 1493 /* Stride size = (2^single_stride_log_num_of_bytes)*64B. */ 1494 rq_attr.wq_attr.single_stride_log_num_of_bytes = 1495 rxq_data->strd_sz_n - 1496 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; 1497 wqe_size = sizeof(struct mlx5_wqe_mprq); 1498 } else { 1499 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 1500 wqe_size = sizeof(struct mlx5_wqe_data_seg); 1501 } 1502 log_wqe_size = log2above(wqe_size) + rxq_data->sges_n; 1503 rq_attr.wq_attr.log_wq_stride = log_wqe_size; 1504 rq_attr.wq_attr.log_wq_sz = rxq_data->elts_n - rxq_data->sges_n; 1505 /* Calculate and allocate WQ memory space. */ 1506 wqe_size = 1 << log_wqe_size; /* round up power of two.*/ 1507 wq_size = wqe_n * wqe_size; 1508 size_t alignment = MLX5_WQE_BUF_ALIGNMENT; 1509 if (alignment == (size_t)-1) { 1510 DRV_LOG(ERR, "Failed to get mem page size"); 1511 rte_errno = ENOMEM; 1512 return NULL; 1513 } 1514 buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, wq_size, 1515 alignment, rxq_ctrl->socket); 1516 if (!buf) 1517 return NULL; 1518 rxq_data->wqes = buf; 1519 rxq_ctrl->wq_umem = mlx5_glue->devx_umem_reg(priv->sh->ctx, 1520 buf, wq_size, 0); 1521 if (!rxq_ctrl->wq_umem) { 1522 mlx5_free(buf); 1523 return NULL; 1524 } 1525 mlx5_devx_wq_attr_fill(priv, rxq_ctrl, &rq_attr.wq_attr); 1526 rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &rq_attr, rxq_ctrl->socket); 1527 if (!rq) 1528 rxq_release_devx_rq_resources(rxq_ctrl); 1529 return rq; 1530 } 1531 1532 /** 1533 * Create a DevX CQ object for an Rx queue. 1534 * 1535 * @param dev 1536 * Pointer to Ethernet device. 1537 * @param cqe_n 1538 * Number of CQEs in CQ. 1539 * @param idx 1540 * Queue index in DPDK Rx queue array 1541 * @param rxq_obj 1542 * Pointer to Rx queue object data. 1543 * 1544 * @return 1545 * The DevX object initialised, NULL otherwise and rte_errno is set. 1546 */ 1547 static struct mlx5_devx_obj * 1548 mlx5_devx_cq_new(struct rte_eth_dev *dev, unsigned int cqe_n, uint16_t idx, 1549 struct mlx5_rxq_obj *rxq_obj) 1550 { 1551 struct mlx5_devx_obj *cq_obj = 0; 1552 struct mlx5_devx_cq_attr cq_attr = { 0 }; 1553 struct mlx5_priv *priv = dev->data->dev_private; 1554 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1555 struct mlx5_rxq_ctrl *rxq_ctrl = 1556 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1557 size_t page_size = rte_mem_page_size(); 1558 uint32_t lcore = (uint32_t)rte_lcore_to_cpu_id(-1); 1559 uint32_t eqn = 0; 1560 void *buf = NULL; 1561 uint16_t event_nums[1] = {0}; 1562 uint32_t log_cqe_n; 1563 uint32_t cq_size; 1564 int ret = 0; 1565 1566 if (page_size == (size_t)-1) { 1567 DRV_LOG(ERR, "Failed to get page_size."); 1568 goto error; 1569 } 1570 if (priv->config.cqe_comp && !rxq_data->hw_timestamp && 1571 !rxq_data->lro) { 1572 cq_attr.cqe_comp_en = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 1573 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 1574 cq_attr.mini_cqe_res_format = 1575 mlx5_rxq_mprq_enabled(rxq_data) ? 1576 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 1577 MLX5DV_CQE_RES_FORMAT_HASH; 1578 #else 1579 cq_attr.mini_cqe_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 1580 #endif 1581 /* 1582 * For vectorized Rx, it must not be doubled in order to 1583 * make cq_ci and rq_ci aligned. 1584 */ 1585 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 1586 cqe_n *= 2; 1587 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) { 1588 DRV_LOG(DEBUG, 1589 "port %u Rx CQE compression is disabled for HW" 1590 " timestamp", 1591 dev->data->port_id); 1592 } else if (priv->config.cqe_comp && rxq_data->lro) { 1593 DRV_LOG(DEBUG, 1594 "port %u Rx CQE compression is disabled for LRO", 1595 dev->data->port_id); 1596 } 1597 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 1598 if (priv->config.cqe_pad) 1599 cq_attr.cqe_size = MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 1600 #endif 1601 log_cqe_n = log2above(cqe_n); 1602 cq_size = sizeof(struct mlx5_cqe) * (1 << log_cqe_n); 1603 /* Query the EQN for this core. */ 1604 if (mlx5_glue->devx_query_eqn(priv->sh->ctx, lcore, &eqn)) { 1605 DRV_LOG(ERR, "Failed to query EQN for CQ."); 1606 goto error; 1607 } 1608 cq_attr.eqn = eqn; 1609 buf = rte_calloc_socket(__func__, 1, cq_size, page_size, 1610 rxq_ctrl->socket); 1611 if (!buf) { 1612 DRV_LOG(ERR, "Failed to allocate memory for CQ."); 1613 goto error; 1614 } 1615 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)buf; 1616 rxq_ctrl->cq_umem = mlx5_glue->devx_umem_reg(priv->sh->ctx, buf, 1617 cq_size, 1618 IBV_ACCESS_LOCAL_WRITE); 1619 if (!rxq_ctrl->cq_umem) { 1620 DRV_LOG(ERR, "Failed to register umem for CQ."); 1621 goto error; 1622 } 1623 cq_attr.uar_page_id = priv->sh->devx_rx_uar->page_id; 1624 cq_attr.q_umem_id = rxq_ctrl->cq_umem->umem_id; 1625 cq_attr.q_umem_valid = 1; 1626 cq_attr.log_cq_size = log_cqe_n; 1627 cq_attr.log_page_size = rte_log2_u32(page_size); 1628 cq_attr.db_umem_offset = rxq_ctrl->cq_dbr_offset; 1629 cq_attr.db_umem_id = rxq_ctrl->cq_dbr_umem_id; 1630 cq_attr.db_umem_valid = rxq_ctrl->cq_dbr_umem_id_valid; 1631 cq_obj = mlx5_devx_cmd_create_cq(priv->sh->ctx, &cq_attr); 1632 if (!cq_obj) 1633 goto error; 1634 rxq_data->cqe_n = log_cqe_n; 1635 rxq_data->cqn = cq_obj->id; 1636 if (rxq_obj->devx_channel) { 1637 ret = mlx5_glue->devx_subscribe_devx_event 1638 (rxq_obj->devx_channel, 1639 cq_obj->obj, 1640 sizeof(event_nums), 1641 event_nums, 1642 (uint64_t)(uintptr_t)cq_obj); 1643 if (ret) { 1644 DRV_LOG(ERR, "Fail to subscribe CQ to event channel."); 1645 rte_errno = errno; 1646 goto error; 1647 } 1648 } 1649 /* Initialise CQ to 1's to mark HW ownership for all CQEs. */ 1650 memset((void *)(uintptr_t)rxq_data->cqes, 0xFF, cq_size); 1651 return cq_obj; 1652 error: 1653 if (cq_obj) 1654 mlx5_devx_cmd_destroy(cq_obj); 1655 rxq_release_devx_cq_resources(rxq_ctrl); 1656 return NULL; 1657 } 1658 1659 /** 1660 * Create the Rx hairpin queue object. 1661 * 1662 * @param dev 1663 * Pointer to Ethernet device. 1664 * @param idx 1665 * Queue index in DPDK Rx queue array 1666 * 1667 * @return 1668 * The hairpin DevX object initialised, NULL otherwise and rte_errno is set. 1669 */ 1670 static struct mlx5_rxq_obj * 1671 mlx5_rxq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) 1672 { 1673 struct mlx5_priv *priv = dev->data->dev_private; 1674 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1675 struct mlx5_rxq_ctrl *rxq_ctrl = 1676 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1677 struct mlx5_devx_create_rq_attr attr = { 0 }; 1678 struct mlx5_rxq_obj *tmpl = NULL; 1679 uint32_t max_wq_data; 1680 1681 MLX5_ASSERT(rxq_data); 1682 MLX5_ASSERT(!rxq_ctrl->obj); 1683 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1684 rxq_ctrl->socket); 1685 if (!tmpl) { 1686 DRV_LOG(ERR, 1687 "port %u Rx queue %u cannot allocate verbs resources", 1688 dev->data->port_id, rxq_data->idx); 1689 rte_errno = ENOMEM; 1690 return NULL; 1691 } 1692 tmpl->type = MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN; 1693 tmpl->rxq_ctrl = rxq_ctrl; 1694 attr.hairpin = 1; 1695 max_wq_data = priv->config.hca_attr.log_max_hairpin_wq_data_sz; 1696 /* Jumbo frames > 9KB should be supported, and more packets. */ 1697 if (priv->config.log_hp_size != (uint32_t)MLX5_ARG_UNSET) { 1698 if (priv->config.log_hp_size > max_wq_data) { 1699 DRV_LOG(ERR, "total data size %u power of 2 is " 1700 "too large for hairpin", 1701 priv->config.log_hp_size); 1702 mlx5_free(tmpl); 1703 rte_errno = ERANGE; 1704 return NULL; 1705 } 1706 attr.wq_attr.log_hairpin_data_sz = priv->config.log_hp_size; 1707 } else { 1708 attr.wq_attr.log_hairpin_data_sz = 1709 (max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ? 1710 max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE; 1711 } 1712 /* Set the packets number to the maximum value for performance. */ 1713 attr.wq_attr.log_hairpin_num_packets = 1714 attr.wq_attr.log_hairpin_data_sz - 1715 MLX5_HAIRPIN_QUEUE_STRIDE; 1716 tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &attr, 1717 rxq_ctrl->socket); 1718 if (!tmpl->rq) { 1719 DRV_LOG(ERR, 1720 "port %u Rx hairpin queue %u can't create rq object", 1721 dev->data->port_id, idx); 1722 mlx5_free(tmpl); 1723 rte_errno = errno; 1724 return NULL; 1725 } 1726 DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, 1727 idx, (void *)&tmpl); 1728 rte_atomic32_inc(&tmpl->refcnt); 1729 LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next); 1730 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1731 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN; 1732 return tmpl; 1733 } 1734 1735 /** 1736 * Create the Rx queue Verbs/DevX object. 1737 * 1738 * @param dev 1739 * Pointer to Ethernet device. 1740 * @param idx 1741 * Queue index in DPDK Rx queue array 1742 * @param type 1743 * Type of Rx queue object to create. 1744 * 1745 * @return 1746 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 1747 */ 1748 struct mlx5_rxq_obj * 1749 mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx, 1750 enum mlx5_rxq_obj_type type) 1751 { 1752 struct mlx5_priv *priv = dev->data->dev_private; 1753 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1754 struct mlx5_rxq_ctrl *rxq_ctrl = 1755 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1756 struct ibv_wq_attr mod; 1757 unsigned int cqe_n; 1758 unsigned int wqe_n = 1 << rxq_data->elts_n; 1759 struct mlx5_rxq_obj *tmpl = NULL; 1760 struct mlx5dv_cq cq_info; 1761 struct mlx5dv_rwq rwq; 1762 int ret = 0; 1763 struct mlx5dv_obj obj; 1764 1765 MLX5_ASSERT(rxq_data); 1766 MLX5_ASSERT(!rxq_ctrl->obj); 1767 if (type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN) 1768 return mlx5_rxq_obj_hairpin_new(dev, idx); 1769 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 1770 priv->verbs_alloc_ctx.obj = rxq_ctrl; 1771 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1772 rxq_ctrl->socket); 1773 if (!tmpl) { 1774 DRV_LOG(ERR, 1775 "port %u Rx queue %u cannot allocate resources", 1776 dev->data->port_id, rxq_data->idx); 1777 rte_errno = ENOMEM; 1778 goto error; 1779 } 1780 tmpl->type = type; 1781 tmpl->rxq_ctrl = rxq_ctrl; 1782 if (rxq_ctrl->irq) { 1783 if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) { 1784 tmpl->ibv_channel = 1785 mlx5_glue->create_comp_channel(priv->sh->ctx); 1786 if (!tmpl->ibv_channel) { 1787 DRV_LOG(ERR, "port %u: comp channel creation " 1788 "failure", dev->data->port_id); 1789 rte_errno = ENOMEM; 1790 goto error; 1791 } 1792 tmpl->fd = tmpl->ibv_channel->fd; 1793 } else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) { 1794 int devx_ev_flag = 1795 MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA; 1796 1797 tmpl->devx_channel = 1798 mlx5_glue->devx_create_event_channel 1799 (priv->sh->ctx, 1800 devx_ev_flag); 1801 if (!tmpl->devx_channel) { 1802 rte_errno = errno; 1803 DRV_LOG(ERR, 1804 "Failed to create event channel %d.", 1805 rte_errno); 1806 goto error; 1807 } 1808 tmpl->fd = tmpl->devx_channel->fd; 1809 } 1810 } 1811 if (mlx5_rxq_mprq_enabled(rxq_data)) 1812 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1; 1813 else 1814 cqe_n = wqe_n - 1; 1815 DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d", 1816 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 1817 DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d", 1818 dev->data->port_id, priv->sh->device_attr.max_sge); 1819 if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) { 1820 /* Create CQ using Verbs API. */ 1821 tmpl->ibv_cq = mlx5_ibv_cq_new(dev, priv, rxq_data, cqe_n, 1822 tmpl); 1823 if (!tmpl->ibv_cq) { 1824 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure", 1825 dev->data->port_id, idx); 1826 rte_errno = ENOMEM; 1827 goto error; 1828 } 1829 obj.cq.in = tmpl->ibv_cq; 1830 obj.cq.out = &cq_info; 1831 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ); 1832 if (ret) { 1833 rte_errno = ret; 1834 goto error; 1835 } 1836 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 1837 DRV_LOG(ERR, 1838 "port %u wrong MLX5_CQE_SIZE environment " 1839 "variable value: it should be set to %u", 1840 dev->data->port_id, RTE_CACHE_LINE_SIZE); 1841 rte_errno = EINVAL; 1842 goto error; 1843 } 1844 /* Fill the rings. */ 1845 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 1846 rxq_data->cq_db = cq_info.dbrec; 1847 rxq_data->cqes = 1848 (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 1849 rxq_data->cq_uar = cq_info.cq_uar; 1850 rxq_data->cqn = cq_info.cqn; 1851 /* Create WQ (RQ) using Verbs API. */ 1852 tmpl->wq = mlx5_ibv_wq_new(dev, priv, rxq_data, idx, wqe_n, 1853 tmpl); 1854 if (!tmpl->wq) { 1855 DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure", 1856 dev->data->port_id, idx); 1857 rte_errno = ENOMEM; 1858 goto error; 1859 } 1860 /* Change queue state to ready. */ 1861 mod = (struct ibv_wq_attr){ 1862 .attr_mask = IBV_WQ_ATTR_STATE, 1863 .wq_state = IBV_WQS_RDY, 1864 }; 1865 ret = mlx5_glue->modify_wq(tmpl->wq, &mod); 1866 if (ret) { 1867 DRV_LOG(ERR, 1868 "port %u Rx queue %u WQ state to IBV_WQS_RDY" 1869 " failed", dev->data->port_id, idx); 1870 rte_errno = ret; 1871 goto error; 1872 } 1873 obj.rwq.in = tmpl->wq; 1874 obj.rwq.out = &rwq; 1875 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ); 1876 if (ret) { 1877 rte_errno = ret; 1878 goto error; 1879 } 1880 rxq_data->wqes = rwq.buf; 1881 rxq_data->rq_db = rwq.dbrec; 1882 } else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) { 1883 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 1884 struct mlx5_devx_dbr_page *dbr_page; 1885 int64_t dbr_offset; 1886 1887 /* Allocate CQ door-bell. */ 1888 dbr_offset = mlx5_get_dbr(priv->sh->ctx, &priv->dbrpgs, 1889 &dbr_page); 1890 if (dbr_offset < 0) { 1891 DRV_LOG(ERR, "Failed to allocate CQ door-bell."); 1892 goto error; 1893 } 1894 rxq_ctrl->cq_dbr_offset = dbr_offset; 1895 rxq_ctrl->cq_dbr_umem_id = mlx5_os_get_umem_id(dbr_page->umem); 1896 rxq_ctrl->cq_dbr_umem_id_valid = 1; 1897 rxq_data->cq_db = 1898 (uint32_t *)((uintptr_t)dbr_page->dbrs + 1899 (uintptr_t)rxq_ctrl->cq_dbr_offset); 1900 rxq_data->cq_uar = priv->sh->devx_rx_uar->base_addr; 1901 /* Create CQ using DevX API. */ 1902 tmpl->devx_cq = mlx5_devx_cq_new(dev, cqe_n, idx, tmpl); 1903 if (!tmpl->devx_cq) { 1904 DRV_LOG(ERR, "Failed to create CQ."); 1905 goto error; 1906 } 1907 /* Allocate RQ door-bell. */ 1908 dbr_offset = mlx5_get_dbr(priv->sh->ctx, &priv->dbrpgs, 1909 &dbr_page); 1910 if (dbr_offset < 0) { 1911 DRV_LOG(ERR, "Failed to allocate RQ door-bell."); 1912 goto error; 1913 } 1914 rxq_ctrl->rq_dbr_offset = dbr_offset; 1915 rxq_ctrl->rq_dbr_umem_id = mlx5_os_get_umem_id(dbr_page->umem); 1916 rxq_ctrl->rq_dbr_umem_id_valid = 1; 1917 rxq_data->rq_db = 1918 (uint32_t *)((uintptr_t)dbr_page->dbrs + 1919 (uintptr_t)rxq_ctrl->rq_dbr_offset); 1920 /* Create RQ using DevX API. */ 1921 tmpl->rq = mlx5_devx_rq_new(dev, idx, tmpl->devx_cq->id); 1922 if (!tmpl->rq) { 1923 DRV_LOG(ERR, "port %u Rx queue %u RQ creation failure", 1924 dev->data->port_id, idx); 1925 rte_errno = ENOMEM; 1926 goto error; 1927 } 1928 /* Change queue state to ready. */ 1929 rq_attr.rq_state = MLX5_RQC_STATE_RST; 1930 rq_attr.state = MLX5_RQC_STATE_RDY; 1931 ret = mlx5_devx_cmd_modify_rq(tmpl->rq, &rq_attr); 1932 if (ret) 1933 goto error; 1934 } 1935 rxq_data->cq_arm_sn = 0; 1936 mlx5_rxq_initialize(rxq_data); 1937 rxq_data->cq_ci = 0; 1938 DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, 1939 idx, (void *)&tmpl); 1940 rte_atomic32_inc(&tmpl->refcnt); 1941 LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next); 1942 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1943 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 1944 return tmpl; 1945 error: 1946 if (tmpl) { 1947 ret = rte_errno; /* Save rte_errno before cleanup. */ 1948 if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) { 1949 if (tmpl->wq) 1950 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 1951 if (tmpl->ibv_cq) 1952 claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq)); 1953 if (tmpl->ibv_channel) 1954 claim_zero(mlx5_glue->destroy_comp_channel 1955 (tmpl->ibv_channel)); 1956 } else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) { 1957 if (tmpl->rq) 1958 claim_zero(mlx5_devx_cmd_destroy(tmpl->rq)); 1959 if (tmpl->devx_cq) 1960 claim_zero(mlx5_devx_cmd_destroy 1961 (tmpl->devx_cq)); 1962 if (tmpl->devx_channel) 1963 mlx5_glue->devx_destroy_event_channel 1964 (tmpl->devx_channel); 1965 } 1966 mlx5_free(tmpl); 1967 rte_errno = ret; /* Restore rte_errno. */ 1968 } 1969 if (type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) { 1970 rxq_release_devx_rq_resources(rxq_ctrl); 1971 rxq_release_devx_cq_resources(rxq_ctrl); 1972 } 1973 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1974 return NULL; 1975 } 1976 1977 /** 1978 * Verify the Rx queue objects list is empty 1979 * 1980 * @param dev 1981 * Pointer to Ethernet device. 1982 * 1983 * @return 1984 * The number of objects not released. 1985 */ 1986 int 1987 mlx5_rxq_obj_verify(struct rte_eth_dev *dev) 1988 { 1989 struct mlx5_priv *priv = dev->data->dev_private; 1990 int ret = 0; 1991 struct mlx5_rxq_obj *rxq_obj; 1992 1993 LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) { 1994 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced", 1995 dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx); 1996 ++ret; 1997 } 1998 return ret; 1999 } 2000 2001 /** 2002 * Callback function to initialize mbufs for Multi-Packet RQ. 2003 */ 2004 static inline void 2005 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg, 2006 void *_m, unsigned int i __rte_unused) 2007 { 2008 struct mlx5_mprq_buf *buf = _m; 2009 struct rte_mbuf_ext_shared_info *shinfo; 2010 unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg; 2011 unsigned int j; 2012 2013 memset(_m, 0, sizeof(*buf)); 2014 buf->mp = mp; 2015 rte_atomic16_set(&buf->refcnt, 1); 2016 for (j = 0; j != strd_n; ++j) { 2017 shinfo = &buf->shinfos[j]; 2018 shinfo->free_cb = mlx5_mprq_buf_free_cb; 2019 shinfo->fcb_opaque = buf; 2020 } 2021 } 2022 2023 /** 2024 * Free mempool of Multi-Packet RQ. 2025 * 2026 * @param dev 2027 * Pointer to Ethernet device. 2028 * 2029 * @return 2030 * 0 on success, negative errno value on failure. 2031 */ 2032 int 2033 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 2034 { 2035 struct mlx5_priv *priv = dev->data->dev_private; 2036 struct rte_mempool *mp = priv->mprq_mp; 2037 unsigned int i; 2038 2039 if (mp == NULL) 2040 return 0; 2041 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 2042 dev->data->port_id, mp->name); 2043 /* 2044 * If a buffer in the pool has been externally attached to a mbuf and it 2045 * is still in use by application, destroying the Rx queue can spoil 2046 * the packet. It is unlikely to happen but if application dynamically 2047 * creates and destroys with holding Rx packets, this can happen. 2048 * 2049 * TODO: It is unavoidable for now because the mempool for Multi-Packet 2050 * RQ isn't provided by application but managed by PMD. 2051 */ 2052 if (!rte_mempool_full(mp)) { 2053 DRV_LOG(ERR, 2054 "port %u mempool for Multi-Packet RQ is still in use", 2055 dev->data->port_id); 2056 rte_errno = EBUSY; 2057 return -rte_errno; 2058 } 2059 rte_mempool_free(mp); 2060 /* Unset mempool for each Rx queue. */ 2061 for (i = 0; i != priv->rxqs_n; ++i) { 2062 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 2063 2064 if (rxq == NULL) 2065 continue; 2066 rxq->mprq_mp = NULL; 2067 } 2068 priv->mprq_mp = NULL; 2069 return 0; 2070 } 2071 2072 /** 2073 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 2074 * mempool. If already allocated, reuse it if there're enough elements. 2075 * Otherwise, resize it. 2076 * 2077 * @param dev 2078 * Pointer to Ethernet device. 2079 * 2080 * @return 2081 * 0 on success, negative errno value on failure. 2082 */ 2083 int 2084 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 2085 { 2086 struct mlx5_priv *priv = dev->data->dev_private; 2087 struct rte_mempool *mp = priv->mprq_mp; 2088 char name[RTE_MEMPOOL_NAMESIZE]; 2089 unsigned int desc = 0; 2090 unsigned int buf_len; 2091 unsigned int obj_num; 2092 unsigned int obj_size; 2093 unsigned int strd_num_n = 0; 2094 unsigned int strd_sz_n = 0; 2095 unsigned int i; 2096 unsigned int n_ibv = 0; 2097 2098 if (!mlx5_mprq_enabled(dev)) 2099 return 0; 2100 /* Count the total number of descriptors configured. */ 2101 for (i = 0; i != priv->rxqs_n; ++i) { 2102 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 2103 struct mlx5_rxq_ctrl *rxq_ctrl = container_of 2104 (rxq, struct mlx5_rxq_ctrl, rxq); 2105 2106 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD) 2107 continue; 2108 n_ibv++; 2109 desc += 1 << rxq->elts_n; 2110 /* Get the max number of strides. */ 2111 if (strd_num_n < rxq->strd_num_n) 2112 strd_num_n = rxq->strd_num_n; 2113 /* Get the max size of a stride. */ 2114 if (strd_sz_n < rxq->strd_sz_n) 2115 strd_sz_n = rxq->strd_sz_n; 2116 } 2117 MLX5_ASSERT(strd_num_n && strd_sz_n); 2118 buf_len = (1 << strd_num_n) * (1 << strd_sz_n); 2119 obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) * 2120 sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM; 2121 /* 2122 * Received packets can be either memcpy'd or externally referenced. In 2123 * case that the packet is attached to an mbuf as an external buffer, as 2124 * it isn't possible to predict how the buffers will be queued by 2125 * application, there's no option to exactly pre-allocate needed buffers 2126 * in advance but to speculatively prepares enough buffers. 2127 * 2128 * In the data path, if this Mempool is depleted, PMD will try to memcpy 2129 * received packets to buffers provided by application (rxq->mp) until 2130 * this Mempool gets available again. 2131 */ 2132 desc *= 4; 2133 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv; 2134 /* 2135 * rte_mempool_create_empty() has sanity check to refuse large cache 2136 * size compared to the number of elements. 2137 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a 2138 * constant number 2 instead. 2139 */ 2140 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 2141 /* Check a mempool is already allocated and if it can be resued. */ 2142 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 2143 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 2144 dev->data->port_id, mp->name); 2145 /* Reuse. */ 2146 goto exit; 2147 } else if (mp != NULL) { 2148 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 2149 dev->data->port_id, mp->name); 2150 /* 2151 * If failed to free, which means it may be still in use, no way 2152 * but to keep using the existing one. On buffer underrun, 2153 * packets will be memcpy'd instead of external buffer 2154 * attachment. 2155 */ 2156 if (mlx5_mprq_free_mp(dev)) { 2157 if (mp->elt_size >= obj_size) 2158 goto exit; 2159 else 2160 return -rte_errno; 2161 } 2162 } 2163 snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); 2164 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 2165 0, NULL, NULL, mlx5_mprq_buf_init, 2166 (void *)(uintptr_t)(1 << strd_num_n), 2167 dev->device->numa_node, 0); 2168 if (mp == NULL) { 2169 DRV_LOG(ERR, 2170 "port %u failed to allocate a mempool for" 2171 " Multi-Packet RQ, count=%u, size=%u", 2172 dev->data->port_id, obj_num, obj_size); 2173 rte_errno = ENOMEM; 2174 return -rte_errno; 2175 } 2176 priv->mprq_mp = mp; 2177 exit: 2178 /* Set mempool for each Rx queue. */ 2179 for (i = 0; i != priv->rxqs_n; ++i) { 2180 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 2181 struct mlx5_rxq_ctrl *rxq_ctrl = container_of 2182 (rxq, struct mlx5_rxq_ctrl, rxq); 2183 2184 if (rxq == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD) 2185 continue; 2186 rxq->mprq_mp = mp; 2187 } 2188 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 2189 dev->data->port_id); 2190 return 0; 2191 } 2192 2193 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \ 2194 sizeof(struct rte_vlan_hdr) * 2 + \ 2195 sizeof(struct rte_ipv6_hdr))) 2196 #define MAX_TCP_OPTION_SIZE 40u 2197 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \ 2198 sizeof(struct rte_tcp_hdr) + \ 2199 MAX_TCP_OPTION_SIZE)) 2200 2201 /** 2202 * Adjust the maximum LRO massage size. 2203 * 2204 * @param dev 2205 * Pointer to Ethernet device. 2206 * @param idx 2207 * RX queue index. 2208 * @param max_lro_size 2209 * The maximum size for LRO packet. 2210 */ 2211 static void 2212 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx, 2213 uint32_t max_lro_size) 2214 { 2215 struct mlx5_priv *priv = dev->data->dev_private; 2216 2217 if (priv->config.hca_attr.lro_max_msg_sz_mode == 2218 MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size > 2219 MLX5_MAX_TCP_HDR_OFFSET) 2220 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET; 2221 max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE); 2222 MLX5_ASSERT(max_lro_size >= MLX5_LRO_SEG_CHUNK_SIZE); 2223 max_lro_size /= MLX5_LRO_SEG_CHUNK_SIZE; 2224 if (priv->max_lro_msg_size) 2225 priv->max_lro_msg_size = 2226 RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size); 2227 else 2228 priv->max_lro_msg_size = max_lro_size; 2229 DRV_LOG(DEBUG, 2230 "port %u Rx Queue %u max LRO message size adjusted to %u bytes", 2231 dev->data->port_id, idx, 2232 priv->max_lro_msg_size * MLX5_LRO_SEG_CHUNK_SIZE); 2233 } 2234 2235 /** 2236 * Create a DPDK Rx queue. 2237 * 2238 * @param dev 2239 * Pointer to Ethernet device. 2240 * @param idx 2241 * RX queue index. 2242 * @param desc 2243 * Number of descriptors to configure in queue. 2244 * @param socket 2245 * NUMA socket on which memory must be allocated. 2246 * 2247 * @return 2248 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 2249 */ 2250 struct mlx5_rxq_ctrl * 2251 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 2252 unsigned int socket, const struct rte_eth_rxconf *conf, 2253 struct rte_mempool *mp) 2254 { 2255 struct mlx5_priv *priv = dev->data->dev_private; 2256 struct mlx5_rxq_ctrl *tmpl; 2257 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 2258 unsigned int mprq_stride_nums; 2259 unsigned int mprq_stride_size; 2260 unsigned int mprq_stride_cap; 2261 struct mlx5_dev_config *config = &priv->config; 2262 /* 2263 * Always allocate extra slots, even if eventually 2264 * the vector Rx will not be used. 2265 */ 2266 uint16_t desc_n = 2267 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 2268 uint64_t offloads = conf->offloads | 2269 dev->data->dev_conf.rxmode.offloads; 2270 unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO); 2271 const int mprq_en = mlx5_check_mprq_support(dev) > 0; 2272 unsigned int max_rx_pkt_len = lro_on_queue ? 2273 dev->data->dev_conf.rxmode.max_lro_pkt_size : 2274 dev->data->dev_conf.rxmode.max_rx_pkt_len; 2275 unsigned int non_scatter_min_mbuf_size = max_rx_pkt_len + 2276 RTE_PKTMBUF_HEADROOM; 2277 unsigned int max_lro_size = 0; 2278 unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM; 2279 2280 if (non_scatter_min_mbuf_size > mb_len && !(offloads & 2281 DEV_RX_OFFLOAD_SCATTER)) { 2282 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not" 2283 " configured and no enough mbuf space(%u) to contain " 2284 "the maximum RX packet length(%u) with head-room(%u)", 2285 dev->data->port_id, idx, mb_len, max_rx_pkt_len, 2286 RTE_PKTMBUF_HEADROOM); 2287 rte_errno = ENOSPC; 2288 return NULL; 2289 } 2290 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) + 2291 desc_n * sizeof(struct rte_mbuf *), 0, socket); 2292 if (!tmpl) { 2293 rte_errno = ENOMEM; 2294 return NULL; 2295 } 2296 tmpl->type = MLX5_RXQ_TYPE_STANDARD; 2297 if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh, 2298 MLX5_MR_BTREE_CACHE_N, socket)) { 2299 /* rte_errno is already set. */ 2300 goto error; 2301 } 2302 tmpl->socket = socket; 2303 if (dev->data->dev_conf.intr_conf.rxq) 2304 tmpl->irq = 1; 2305 mprq_stride_nums = config->mprq.stride_num_n ? 2306 config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N; 2307 mprq_stride_size = non_scatter_min_mbuf_size <= 2308 (1U << config->mprq.max_stride_size_n) ? 2309 log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N; 2310 mprq_stride_cap = (config->mprq.stride_num_n ? 2311 (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) * 2312 (config->mprq.stride_size_n ? 2313 (1U << config->mprq.stride_size_n) : (1U << mprq_stride_size)); 2314 /* 2315 * This Rx queue can be configured as a Multi-Packet RQ if all of the 2316 * following conditions are met: 2317 * - MPRQ is enabled. 2318 * - The number of descs is more than the number of strides. 2319 * - max_rx_pkt_len plus overhead is less than the max size 2320 * of a stride or mprq_stride_size is specified by a user. 2321 * Need to nake sure that there are enough stides to encap 2322 * the maximum packet size in case mprq_stride_size is set. 2323 * Otherwise, enable Rx scatter if necessary. 2324 */ 2325 if (mprq_en && desc > (1U << mprq_stride_nums) && 2326 (non_scatter_min_mbuf_size <= 2327 (1U << config->mprq.max_stride_size_n) || 2328 (config->mprq.stride_size_n && 2329 non_scatter_min_mbuf_size <= mprq_stride_cap))) { 2330 /* TODO: Rx scatter isn't supported yet. */ 2331 tmpl->rxq.sges_n = 0; 2332 /* Trim the number of descs needed. */ 2333 desc >>= mprq_stride_nums; 2334 tmpl->rxq.strd_num_n = config->mprq.stride_num_n ? 2335 config->mprq.stride_num_n : mprq_stride_nums; 2336 tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ? 2337 config->mprq.stride_size_n : mprq_stride_size; 2338 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 2339 tmpl->rxq.strd_scatter_en = 2340 !!(offloads & DEV_RX_OFFLOAD_SCATTER); 2341 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, 2342 config->mprq.max_memcpy_len); 2343 max_lro_size = RTE_MIN(max_rx_pkt_len, 2344 (1u << tmpl->rxq.strd_num_n) * 2345 (1u << tmpl->rxq.strd_sz_n)); 2346 DRV_LOG(DEBUG, 2347 "port %u Rx queue %u: Multi-Packet RQ is enabled" 2348 " strd_num_n = %u, strd_sz_n = %u", 2349 dev->data->port_id, idx, 2350 tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n); 2351 } else if (max_rx_pkt_len <= first_mb_free_size) { 2352 tmpl->rxq.sges_n = 0; 2353 max_lro_size = max_rx_pkt_len; 2354 } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { 2355 unsigned int size = non_scatter_min_mbuf_size; 2356 unsigned int sges_n; 2357 2358 if (lro_on_queue && first_mb_free_size < 2359 MLX5_MAX_LRO_HEADER_FIX) { 2360 DRV_LOG(ERR, "Not enough space in the first segment(%u)" 2361 " to include the max header size(%u) for LRO", 2362 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX); 2363 rte_errno = ENOTSUP; 2364 goto error; 2365 } 2366 /* 2367 * Determine the number of SGEs needed for a full packet 2368 * and round it to the next power of two. 2369 */ 2370 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 2371 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) { 2372 DRV_LOG(ERR, 2373 "port %u too many SGEs (%u) needed to handle" 2374 " requested maximum packet size %u, the maximum" 2375 " supported are %u", dev->data->port_id, 2376 1 << sges_n, max_rx_pkt_len, 2377 1u << MLX5_MAX_LOG_RQ_SEGS); 2378 rte_errno = ENOTSUP; 2379 goto error; 2380 } 2381 tmpl->rxq.sges_n = sges_n; 2382 max_lro_size = max_rx_pkt_len; 2383 } 2384 if (config->mprq.enabled && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) 2385 DRV_LOG(WARNING, 2386 "port %u MPRQ is requested but cannot be enabled\n" 2387 " (requested: pkt_sz = %u, desc_num = %u," 2388 " rxq_num = %u, stride_sz = %u, stride_num = %u\n" 2389 " supported: min_rxqs_num = %u," 2390 " min_stride_sz = %u, max_stride_sz = %u).", 2391 dev->data->port_id, non_scatter_min_mbuf_size, 2392 desc, priv->rxqs_n, 2393 config->mprq.stride_size_n ? 2394 (1U << config->mprq.stride_size_n) : 2395 (1U << mprq_stride_size), 2396 config->mprq.stride_num_n ? 2397 (1U << config->mprq.stride_num_n) : 2398 (1U << mprq_stride_nums), 2399 config->mprq.min_rxqs_num, 2400 (1U << config->mprq.min_stride_size_n), 2401 (1U << config->mprq.max_stride_size_n)); 2402 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 2403 dev->data->port_id, 1 << tmpl->rxq.sges_n); 2404 if (desc % (1 << tmpl->rxq.sges_n)) { 2405 DRV_LOG(ERR, 2406 "port %u number of Rx queue descriptors (%u) is not a" 2407 " multiple of SGEs per packet (%u)", 2408 dev->data->port_id, 2409 desc, 2410 1 << tmpl->rxq.sges_n); 2411 rte_errno = EINVAL; 2412 goto error; 2413 } 2414 mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size); 2415 /* Toggle RX checksum offload if hardware supports it. */ 2416 tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM); 2417 tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP); 2418 /* Configure VLAN stripping. */ 2419 tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 2420 /* By default, FCS (CRC) is stripped by hardware. */ 2421 tmpl->rxq.crc_present = 0; 2422 tmpl->rxq.lro = lro_on_queue; 2423 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { 2424 if (config->hw_fcs_strip) { 2425 /* 2426 * RQs used for LRO-enabled TIRs should not be 2427 * configured to scatter the FCS. 2428 */ 2429 if (lro_on_queue) 2430 DRV_LOG(WARNING, 2431 "port %u CRC stripping has been " 2432 "disabled but will still be performed " 2433 "by hardware, because LRO is enabled", 2434 dev->data->port_id); 2435 else 2436 tmpl->rxq.crc_present = 1; 2437 } else { 2438 DRV_LOG(WARNING, 2439 "port %u CRC stripping has been disabled but will" 2440 " still be performed by hardware, make sure MLNX_OFED" 2441 " and firmware are up to date", 2442 dev->data->port_id); 2443 } 2444 } 2445 DRV_LOG(DEBUG, 2446 "port %u CRC stripping is %s, %u bytes will be subtracted from" 2447 " incoming frames to hide it", 2448 dev->data->port_id, 2449 tmpl->rxq.crc_present ? "disabled" : "enabled", 2450 tmpl->rxq.crc_present << 2); 2451 /* Save port ID. */ 2452 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 2453 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); 2454 tmpl->rxq.port_id = dev->data->port_id; 2455 tmpl->priv = priv; 2456 tmpl->rxq.mp = mp; 2457 tmpl->rxq.elts_n = log2above(desc); 2458 tmpl->rxq.rq_repl_thresh = 2459 MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n); 2460 tmpl->rxq.elts = 2461 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); 2462 #ifndef RTE_ARCH_64 2463 tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq; 2464 #endif 2465 tmpl->rxq.idx = idx; 2466 rte_atomic32_inc(&tmpl->refcnt); 2467 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 2468 return tmpl; 2469 error: 2470 mlx5_free(tmpl); 2471 return NULL; 2472 } 2473 2474 /** 2475 * Create a DPDK Rx hairpin queue. 2476 * 2477 * @param dev 2478 * Pointer to Ethernet device. 2479 * @param idx 2480 * RX queue index. 2481 * @param desc 2482 * Number of descriptors to configure in queue. 2483 * @param hairpin_conf 2484 * The hairpin binding configuration. 2485 * 2486 * @return 2487 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 2488 */ 2489 struct mlx5_rxq_ctrl * 2490 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 2491 const struct rte_eth_hairpin_conf *hairpin_conf) 2492 { 2493 struct mlx5_priv *priv = dev->data->dev_private; 2494 struct mlx5_rxq_ctrl *tmpl; 2495 2496 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 2497 SOCKET_ID_ANY); 2498 if (!tmpl) { 2499 rte_errno = ENOMEM; 2500 return NULL; 2501 } 2502 tmpl->type = MLX5_RXQ_TYPE_HAIRPIN; 2503 tmpl->socket = SOCKET_ID_ANY; 2504 tmpl->rxq.rss_hash = 0; 2505 tmpl->rxq.port_id = dev->data->port_id; 2506 tmpl->priv = priv; 2507 tmpl->rxq.mp = NULL; 2508 tmpl->rxq.elts_n = log2above(desc); 2509 tmpl->rxq.elts = NULL; 2510 tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 }; 2511 tmpl->hairpin_conf = *hairpin_conf; 2512 tmpl->rxq.idx = idx; 2513 rte_atomic32_inc(&tmpl->refcnt); 2514 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 2515 return tmpl; 2516 } 2517 2518 /** 2519 * Get a Rx queue. 2520 * 2521 * @param dev 2522 * Pointer to Ethernet device. 2523 * @param idx 2524 * RX queue index. 2525 * 2526 * @return 2527 * A pointer to the queue if it exists, NULL otherwise. 2528 */ 2529 struct mlx5_rxq_ctrl * 2530 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2531 { 2532 struct mlx5_priv *priv = dev->data->dev_private; 2533 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 2534 2535 if ((*priv->rxqs)[idx]) { 2536 rxq_ctrl = container_of((*priv->rxqs)[idx], 2537 struct mlx5_rxq_ctrl, 2538 rxq); 2539 mlx5_rxq_obj_get(dev, idx); 2540 rte_atomic32_inc(&rxq_ctrl->refcnt); 2541 } 2542 return rxq_ctrl; 2543 } 2544 2545 /** 2546 * Release a Rx queue. 2547 * 2548 * @param dev 2549 * Pointer to Ethernet device. 2550 * @param idx 2551 * RX queue index. 2552 * 2553 * @return 2554 * 1 while a reference on it exists, 0 when freed. 2555 */ 2556 int 2557 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 2558 { 2559 struct mlx5_priv *priv = dev->data->dev_private; 2560 struct mlx5_rxq_ctrl *rxq_ctrl; 2561 2562 if (!(*priv->rxqs)[idx]) 2563 return 0; 2564 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 2565 MLX5_ASSERT(rxq_ctrl->priv); 2566 if (rxq_ctrl->obj && !mlx5_rxq_obj_release(rxq_ctrl->obj)) 2567 rxq_ctrl->obj = NULL; 2568 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { 2569 if (rxq_ctrl->rq_dbr_umem_id_valid) 2570 claim_zero(mlx5_release_dbr(&priv->dbrpgs, 2571 rxq_ctrl->rq_dbr_umem_id, 2572 rxq_ctrl->rq_dbr_offset)); 2573 if (rxq_ctrl->cq_dbr_umem_id_valid) 2574 claim_zero(mlx5_release_dbr(&priv->dbrpgs, 2575 rxq_ctrl->cq_dbr_umem_id, 2576 rxq_ctrl->cq_dbr_offset)); 2577 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 2578 mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); 2579 LIST_REMOVE(rxq_ctrl, next); 2580 mlx5_free(rxq_ctrl); 2581 (*priv->rxqs)[idx] = NULL; 2582 return 0; 2583 } 2584 return 1; 2585 } 2586 2587 /** 2588 * Verify the Rx Queue list is empty 2589 * 2590 * @param dev 2591 * Pointer to Ethernet device. 2592 * 2593 * @return 2594 * The number of object not released. 2595 */ 2596 int 2597 mlx5_rxq_verify(struct rte_eth_dev *dev) 2598 { 2599 struct mlx5_priv *priv = dev->data->dev_private; 2600 struct mlx5_rxq_ctrl *rxq_ctrl; 2601 int ret = 0; 2602 2603 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 2604 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 2605 dev->data->port_id, rxq_ctrl->rxq.idx); 2606 ++ret; 2607 } 2608 return ret; 2609 } 2610 2611 /** 2612 * Get a Rx queue type. 2613 * 2614 * @param dev 2615 * Pointer to Ethernet device. 2616 * @param idx 2617 * Rx queue index. 2618 * 2619 * @return 2620 * The Rx queue type. 2621 */ 2622 enum mlx5_rxq_type 2623 mlx5_rxq_get_type(struct rte_eth_dev *dev, uint16_t idx) 2624 { 2625 struct mlx5_priv *priv = dev->data->dev_private; 2626 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 2627 2628 if (idx < priv->rxqs_n && (*priv->rxqs)[idx]) { 2629 rxq_ctrl = container_of((*priv->rxqs)[idx], 2630 struct mlx5_rxq_ctrl, 2631 rxq); 2632 return rxq_ctrl->type; 2633 } 2634 return MLX5_RXQ_TYPE_UNDEFINED; 2635 } 2636 2637 /** 2638 * Create an indirection table. 2639 * 2640 * @param dev 2641 * Pointer to Ethernet device. 2642 * @param queues 2643 * Queues entering in the indirection table. 2644 * @param queues_n 2645 * Number of queues in the array. 2646 * 2647 * @return 2648 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 2649 */ 2650 static struct mlx5_ind_table_obj * 2651 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues, 2652 uint32_t queues_n, enum mlx5_ind_tbl_type type) 2653 { 2654 struct mlx5_priv *priv = dev->data->dev_private; 2655 struct mlx5_ind_table_obj *ind_tbl; 2656 unsigned int i = 0, j = 0, k = 0; 2657 2658 ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) + 2659 queues_n * sizeof(uint16_t), 0, SOCKET_ID_ANY); 2660 if (!ind_tbl) { 2661 rte_errno = ENOMEM; 2662 return NULL; 2663 } 2664 ind_tbl->type = type; 2665 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) { 2666 const unsigned int wq_n = rte_is_power_of_2(queues_n) ? 2667 log2above(queues_n) : 2668 log2above(priv->config.ind_table_max_size); 2669 struct ibv_wq *wq[1 << wq_n]; 2670 2671 for (i = 0; i != queues_n; ++i) { 2672 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, 2673 queues[i]); 2674 if (!rxq) 2675 goto error; 2676 wq[i] = rxq->obj->wq; 2677 ind_tbl->queues[i] = queues[i]; 2678 } 2679 ind_tbl->queues_n = queues_n; 2680 /* Finalise indirection table. */ 2681 k = i; /* Retain value of i for use in error case. */ 2682 for (j = 0; k != (unsigned int)(1 << wq_n); ++k, ++j) 2683 wq[k] = wq[j]; 2684 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table 2685 (priv->sh->ctx, 2686 &(struct ibv_rwq_ind_table_init_attr){ 2687 .log_ind_tbl_size = wq_n, 2688 .ind_tbl = wq, 2689 .comp_mask = 0, 2690 }); 2691 if (!ind_tbl->ind_table) { 2692 rte_errno = errno; 2693 goto error; 2694 } 2695 } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */ 2696 struct mlx5_devx_rqt_attr *rqt_attr = NULL; 2697 const unsigned int rqt_n = 2698 1 << (rte_is_power_of_2(queues_n) ? 2699 log2above(queues_n) : 2700 log2above(priv->config.ind_table_max_size)); 2701 2702 rqt_attr = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rqt_attr) + 2703 rqt_n * sizeof(uint32_t), 0, 2704 SOCKET_ID_ANY); 2705 if (!rqt_attr) { 2706 DRV_LOG(ERR, "port %u cannot allocate RQT resources", 2707 dev->data->port_id); 2708 rte_errno = ENOMEM; 2709 goto error; 2710 } 2711 rqt_attr->rqt_max_size = priv->config.ind_table_max_size; 2712 rqt_attr->rqt_actual_size = rqt_n; 2713 for (i = 0; i != queues_n; ++i) { 2714 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, 2715 queues[i]); 2716 if (!rxq) 2717 goto error; 2718 rqt_attr->rq_list[i] = rxq->obj->rq->id; 2719 ind_tbl->queues[i] = queues[i]; 2720 } 2721 k = i; /* Retain value of i for use in error case. */ 2722 for (j = 0; k != rqt_n; ++k, ++j) 2723 rqt_attr->rq_list[k] = rqt_attr->rq_list[j]; 2724 ind_tbl->rqt = mlx5_devx_cmd_create_rqt(priv->sh->ctx, 2725 rqt_attr); 2726 mlx5_free(rqt_attr); 2727 if (!ind_tbl->rqt) { 2728 DRV_LOG(ERR, "port %u cannot create DevX RQT", 2729 dev->data->port_id); 2730 rte_errno = errno; 2731 goto error; 2732 } 2733 ind_tbl->queues_n = queues_n; 2734 } 2735 rte_atomic32_inc(&ind_tbl->refcnt); 2736 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 2737 return ind_tbl; 2738 error: 2739 for (j = 0; j < i; j++) 2740 mlx5_rxq_release(dev, ind_tbl->queues[j]); 2741 mlx5_free(ind_tbl); 2742 DEBUG("port %u cannot create indirection table", dev->data->port_id); 2743 return NULL; 2744 } 2745 2746 /** 2747 * Get an indirection table. 2748 * 2749 * @param dev 2750 * Pointer to Ethernet device. 2751 * @param queues 2752 * Queues entering in the indirection table. 2753 * @param queues_n 2754 * Number of queues in the array. 2755 * 2756 * @return 2757 * An indirection table if found. 2758 */ 2759 static struct mlx5_ind_table_obj * 2760 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues, 2761 uint32_t queues_n) 2762 { 2763 struct mlx5_priv *priv = dev->data->dev_private; 2764 struct mlx5_ind_table_obj *ind_tbl; 2765 2766 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2767 if ((ind_tbl->queues_n == queues_n) && 2768 (memcmp(ind_tbl->queues, queues, 2769 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 2770 == 0)) 2771 break; 2772 } 2773 if (ind_tbl) { 2774 unsigned int i; 2775 2776 rte_atomic32_inc(&ind_tbl->refcnt); 2777 for (i = 0; i != ind_tbl->queues_n; ++i) 2778 mlx5_rxq_get(dev, ind_tbl->queues[i]); 2779 } 2780 return ind_tbl; 2781 } 2782 2783 /** 2784 * Release an indirection table. 2785 * 2786 * @param dev 2787 * Pointer to Ethernet device. 2788 * @param ind_table 2789 * Indirection table to release. 2790 * 2791 * @return 2792 * 1 while a reference on it exists, 0 when freed. 2793 */ 2794 static int 2795 mlx5_ind_table_obj_release(struct rte_eth_dev *dev, 2796 struct mlx5_ind_table_obj *ind_tbl) 2797 { 2798 unsigned int i; 2799 2800 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) { 2801 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) 2802 claim_zero(mlx5_glue->destroy_rwq_ind_table 2803 (ind_tbl->ind_table)); 2804 else if (ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX) 2805 claim_zero(mlx5_devx_cmd_destroy(ind_tbl->rqt)); 2806 } 2807 for (i = 0; i != ind_tbl->queues_n; ++i) 2808 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i])); 2809 if (!rte_atomic32_read(&ind_tbl->refcnt)) { 2810 LIST_REMOVE(ind_tbl, next); 2811 mlx5_free(ind_tbl); 2812 return 0; 2813 } 2814 return 1; 2815 } 2816 2817 /** 2818 * Verify the Rx Queue list is empty 2819 * 2820 * @param dev 2821 * Pointer to Ethernet device. 2822 * 2823 * @return 2824 * The number of object not released. 2825 */ 2826 int 2827 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev) 2828 { 2829 struct mlx5_priv *priv = dev->data->dev_private; 2830 struct mlx5_ind_table_obj *ind_tbl; 2831 int ret = 0; 2832 2833 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2834 DRV_LOG(DEBUG, 2835 "port %u indirection table obj %p still referenced", 2836 dev->data->port_id, (void *)ind_tbl); 2837 ++ret; 2838 } 2839 return ret; 2840 } 2841 2842 /** 2843 * Create an Rx Hash queue. 2844 * 2845 * @param dev 2846 * Pointer to Ethernet device. 2847 * @param rss_key 2848 * RSS key for the Rx hash queue. 2849 * @param rss_key_len 2850 * RSS key length. 2851 * @param hash_fields 2852 * Verbs protocol hash field to make the RSS on. 2853 * @param queues 2854 * Queues entering in hash queue. In case of empty hash_fields only the 2855 * first queue index will be taken for the indirection table. 2856 * @param queues_n 2857 * Number of queues. 2858 * @param tunnel 2859 * Tunnel type. 2860 * 2861 * @return 2862 * The Verbs/DevX object initialised index, 0 otherwise and rte_errno is set. 2863 */ 2864 uint32_t 2865 mlx5_hrxq_new(struct rte_eth_dev *dev, 2866 const uint8_t *rss_key, uint32_t rss_key_len, 2867 uint64_t hash_fields, 2868 const uint16_t *queues, uint32_t queues_n, 2869 int tunnel __rte_unused) 2870 { 2871 struct mlx5_priv *priv = dev->data->dev_private; 2872 struct mlx5_hrxq *hrxq; 2873 uint32_t hrxq_idx = 0; 2874 struct ibv_qp *qp = NULL; 2875 struct mlx5_ind_table_obj *ind_tbl; 2876 int err; 2877 struct mlx5_devx_obj *tir = NULL; 2878 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[0]]; 2879 struct mlx5_rxq_ctrl *rxq_ctrl = 2880 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 2881 2882 queues_n = hash_fields ? queues_n : 1; 2883 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2884 if (!ind_tbl) { 2885 enum mlx5_ind_tbl_type type; 2886 2887 type = rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV ? 2888 MLX5_IND_TBL_TYPE_IBV : MLX5_IND_TBL_TYPE_DEVX; 2889 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, type); 2890 } 2891 if (!ind_tbl) { 2892 rte_errno = ENOMEM; 2893 return 0; 2894 } 2895 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) { 2896 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2897 struct mlx5dv_qp_init_attr qp_init_attr; 2898 2899 memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 2900 if (tunnel) { 2901 qp_init_attr.comp_mask = 2902 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 2903 qp_init_attr.create_flags = 2904 MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; 2905 } 2906 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2907 if (dev->data->dev_conf.lpbk_mode) { 2908 /* 2909 * Allow packet sent from NIC loop back 2910 * w/o source MAC check. 2911 */ 2912 qp_init_attr.comp_mask |= 2913 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 2914 qp_init_attr.create_flags |= 2915 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; 2916 } 2917 #endif 2918 qp = mlx5_glue->dv_create_qp 2919 (priv->sh->ctx, 2920 &(struct ibv_qp_init_attr_ex){ 2921 .qp_type = IBV_QPT_RAW_PACKET, 2922 .comp_mask = 2923 IBV_QP_INIT_ATTR_PD | 2924 IBV_QP_INIT_ATTR_IND_TABLE | 2925 IBV_QP_INIT_ATTR_RX_HASH, 2926 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2927 .rx_hash_function = 2928 IBV_RX_HASH_FUNC_TOEPLITZ, 2929 .rx_hash_key_len = rss_key_len, 2930 .rx_hash_key = 2931 (void *)(uintptr_t)rss_key, 2932 .rx_hash_fields_mask = hash_fields, 2933 }, 2934 .rwq_ind_tbl = ind_tbl->ind_table, 2935 .pd = priv->sh->pd, 2936 }, 2937 &qp_init_attr); 2938 #else 2939 qp = mlx5_glue->create_qp_ex 2940 (priv->sh->ctx, 2941 &(struct ibv_qp_init_attr_ex){ 2942 .qp_type = IBV_QPT_RAW_PACKET, 2943 .comp_mask = 2944 IBV_QP_INIT_ATTR_PD | 2945 IBV_QP_INIT_ATTR_IND_TABLE | 2946 IBV_QP_INIT_ATTR_RX_HASH, 2947 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2948 .rx_hash_function = 2949 IBV_RX_HASH_FUNC_TOEPLITZ, 2950 .rx_hash_key_len = rss_key_len, 2951 .rx_hash_key = 2952 (void *)(uintptr_t)rss_key, 2953 .rx_hash_fields_mask = hash_fields, 2954 }, 2955 .rwq_ind_tbl = ind_tbl->ind_table, 2956 .pd = priv->sh->pd, 2957 }); 2958 #endif 2959 if (!qp) { 2960 rte_errno = errno; 2961 goto error; 2962 } 2963 } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */ 2964 struct mlx5_devx_tir_attr tir_attr; 2965 uint32_t i; 2966 uint32_t lro = 1; 2967 2968 /* Enable TIR LRO only if all the queues were configured for. */ 2969 for (i = 0; i < queues_n; ++i) { 2970 if (!(*priv->rxqs)[queues[i]]->lro) { 2971 lro = 0; 2972 break; 2973 } 2974 } 2975 memset(&tir_attr, 0, sizeof(tir_attr)); 2976 tir_attr.disp_type = MLX5_TIRC_DISP_TYPE_INDIRECT; 2977 tir_attr.rx_hash_fn = MLX5_RX_HASH_FN_TOEPLITZ; 2978 tir_attr.tunneled_offload_en = !!tunnel; 2979 /* If needed, translate hash_fields bitmap to PRM format. */ 2980 if (hash_fields) { 2981 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2982 struct mlx5_rx_hash_field_select *rx_hash_field_select = 2983 hash_fields & IBV_RX_HASH_INNER ? 2984 &tir_attr.rx_hash_field_selector_inner : 2985 &tir_attr.rx_hash_field_selector_outer; 2986 #else 2987 struct mlx5_rx_hash_field_select *rx_hash_field_select = 2988 &tir_attr.rx_hash_field_selector_outer; 2989 #endif 2990 2991 /* 1 bit: 0: IPv4, 1: IPv6. */ 2992 rx_hash_field_select->l3_prot_type = 2993 !!(hash_fields & MLX5_IPV6_IBV_RX_HASH); 2994 /* 1 bit: 0: TCP, 1: UDP. */ 2995 rx_hash_field_select->l4_prot_type = 2996 !!(hash_fields & MLX5_UDP_IBV_RX_HASH); 2997 /* Bitmask which sets which fields to use in RX Hash. */ 2998 rx_hash_field_select->selected_fields = 2999 ((!!(hash_fields & MLX5_L3_SRC_IBV_RX_HASH)) << 3000 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP) | 3001 (!!(hash_fields & MLX5_L3_DST_IBV_RX_HASH)) << 3002 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP | 3003 (!!(hash_fields & MLX5_L4_SRC_IBV_RX_HASH)) << 3004 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT | 3005 (!!(hash_fields & MLX5_L4_DST_IBV_RX_HASH)) << 3006 MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT; 3007 } 3008 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN) 3009 tir_attr.transport_domain = priv->sh->td->id; 3010 else 3011 tir_attr.transport_domain = priv->sh->tdn; 3012 memcpy(tir_attr.rx_hash_toeplitz_key, rss_key, 3013 MLX5_RSS_HASH_KEY_LEN); 3014 tir_attr.indirect_table = ind_tbl->rqt->id; 3015 if (dev->data->dev_conf.lpbk_mode) 3016 tir_attr.self_lb_block = 3017 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; 3018 if (lro) { 3019 tir_attr.lro_timeout_period_usecs = 3020 priv->config.lro.timeout; 3021 tir_attr.lro_max_msg_sz = priv->max_lro_msg_size; 3022 tir_attr.lro_enable_mask = 3023 MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | 3024 MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO; 3025 } 3026 tir = mlx5_devx_cmd_create_tir(priv->sh->ctx, &tir_attr); 3027 if (!tir) { 3028 DRV_LOG(ERR, "port %u cannot create DevX TIR", 3029 dev->data->port_id); 3030 rte_errno = errno; 3031 goto error; 3032 } 3033 } 3034 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 3035 if (!hrxq) 3036 goto error; 3037 hrxq->ind_table = ind_tbl; 3038 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) { 3039 hrxq->qp = qp; 3040 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 3041 hrxq->action = 3042 mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 3043 if (!hrxq->action) { 3044 rte_errno = errno; 3045 goto error; 3046 } 3047 #endif 3048 } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */ 3049 hrxq->tir = tir; 3050 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 3051 hrxq->action = mlx5_glue->dv_create_flow_action_dest_devx_tir 3052 (hrxq->tir->obj); 3053 if (!hrxq->action) { 3054 rte_errno = errno; 3055 goto error; 3056 } 3057 #endif 3058 } 3059 hrxq->rss_key_len = rss_key_len; 3060 hrxq->hash_fields = hash_fields; 3061 memcpy(hrxq->rss_key, rss_key, rss_key_len); 3062 rte_atomic32_inc(&hrxq->refcnt); 3063 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_HRXQ], &priv->hrxqs, hrxq_idx, 3064 hrxq, next); 3065 return hrxq_idx; 3066 error: 3067 err = rte_errno; /* Save rte_errno before cleanup. */ 3068 mlx5_ind_table_obj_release(dev, ind_tbl); 3069 if (qp) 3070 claim_zero(mlx5_glue->destroy_qp(qp)); 3071 else if (tir) 3072 claim_zero(mlx5_devx_cmd_destroy(tir)); 3073 rte_errno = err; /* Restore rte_errno. */ 3074 return 0; 3075 } 3076 3077 /** 3078 * Get an Rx Hash queue. 3079 * 3080 * @param dev 3081 * Pointer to Ethernet device. 3082 * @param rss_conf 3083 * RSS configuration for the Rx hash queue. 3084 * @param queues 3085 * Queues entering in hash queue. In case of empty hash_fields only the 3086 * first queue index will be taken for the indirection table. 3087 * @param queues_n 3088 * Number of queues. 3089 * 3090 * @return 3091 * An hash Rx queue index on success. 3092 */ 3093 uint32_t 3094 mlx5_hrxq_get(struct rte_eth_dev *dev, 3095 const uint8_t *rss_key, uint32_t rss_key_len, 3096 uint64_t hash_fields, 3097 const uint16_t *queues, uint32_t queues_n) 3098 { 3099 struct mlx5_priv *priv = dev->data->dev_private; 3100 struct mlx5_hrxq *hrxq; 3101 uint32_t idx; 3102 3103 queues_n = hash_fields ? queues_n : 1; 3104 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_HRXQ], priv->hrxqs, idx, 3105 hrxq, next) { 3106 struct mlx5_ind_table_obj *ind_tbl; 3107 3108 if (hrxq->rss_key_len != rss_key_len) 3109 continue; 3110 if (memcmp(hrxq->rss_key, rss_key, rss_key_len)) 3111 continue; 3112 if (hrxq->hash_fields != hash_fields) 3113 continue; 3114 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 3115 if (!ind_tbl) 3116 continue; 3117 if (ind_tbl != hrxq->ind_table) { 3118 mlx5_ind_table_obj_release(dev, ind_tbl); 3119 continue; 3120 } 3121 rte_atomic32_inc(&hrxq->refcnt); 3122 return idx; 3123 } 3124 return 0; 3125 } 3126 3127 /** 3128 * Release the hash Rx queue. 3129 * 3130 * @param dev 3131 * Pointer to Ethernet device. 3132 * @param hrxq 3133 * Index to Hash Rx queue to release. 3134 * 3135 * @return 3136 * 1 while a reference on it exists, 0 when freed. 3137 */ 3138 int 3139 mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx) 3140 { 3141 struct mlx5_priv *priv = dev->data->dev_private; 3142 struct mlx5_hrxq *hrxq; 3143 3144 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 3145 if (!hrxq) 3146 return 0; 3147 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 3148 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 3149 mlx5_glue->destroy_flow_action(hrxq->action); 3150 #endif 3151 if (hrxq->ind_table->type == MLX5_IND_TBL_TYPE_IBV) 3152 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 3153 else /* hrxq->ind_table->type == MLX5_IND_TBL_TYPE_DEVX */ 3154 claim_zero(mlx5_devx_cmd_destroy(hrxq->tir)); 3155 mlx5_ind_table_obj_release(dev, hrxq->ind_table); 3156 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_HRXQ], &priv->hrxqs, 3157 hrxq_idx, hrxq, next); 3158 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 3159 return 0; 3160 } 3161 claim_nonzero(mlx5_ind_table_obj_release(dev, hrxq->ind_table)); 3162 return 1; 3163 } 3164 3165 /** 3166 * Verify the Rx Queue list is empty 3167 * 3168 * @param dev 3169 * Pointer to Ethernet device. 3170 * 3171 * @return 3172 * The number of object not released. 3173 */ 3174 int 3175 mlx5_hrxq_verify(struct rte_eth_dev *dev) 3176 { 3177 struct mlx5_priv *priv = dev->data->dev_private; 3178 struct mlx5_hrxq *hrxq; 3179 uint32_t idx; 3180 int ret = 0; 3181 3182 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_HRXQ], priv->hrxqs, idx, 3183 hrxq, next) { 3184 DRV_LOG(DEBUG, 3185 "port %u hash Rx queue %p still referenced", 3186 dev->data->port_id, (void *)hrxq); 3187 ++ret; 3188 } 3189 return ret; 3190 } 3191 3192 /** 3193 * Create a drop Rx queue Verbs/DevX object. 3194 * 3195 * @param dev 3196 * Pointer to Ethernet device. 3197 * 3198 * @return 3199 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 3200 */ 3201 static struct mlx5_rxq_obj * 3202 mlx5_rxq_obj_drop_new(struct rte_eth_dev *dev) 3203 { 3204 struct mlx5_priv *priv = dev->data->dev_private; 3205 struct ibv_context *ctx = priv->sh->ctx; 3206 struct ibv_cq *cq; 3207 struct ibv_wq *wq = NULL; 3208 struct mlx5_rxq_obj *rxq; 3209 3210 if (priv->drop_queue.rxq) 3211 return priv->drop_queue.rxq; 3212 cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); 3213 if (!cq) { 3214 DEBUG("port %u cannot allocate CQ for drop queue", 3215 dev->data->port_id); 3216 rte_errno = errno; 3217 goto error; 3218 } 3219 wq = mlx5_glue->create_wq(ctx, 3220 &(struct ibv_wq_init_attr){ 3221 .wq_type = IBV_WQT_RQ, 3222 .max_wr = 1, 3223 .max_sge = 1, 3224 .pd = priv->sh->pd, 3225 .cq = cq, 3226 }); 3227 if (!wq) { 3228 DEBUG("port %u cannot allocate WQ for drop queue", 3229 dev->data->port_id); 3230 rte_errno = errno; 3231 goto error; 3232 } 3233 rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY); 3234 if (!rxq) { 3235 DEBUG("port %u cannot allocate drop Rx queue memory", 3236 dev->data->port_id); 3237 rte_errno = ENOMEM; 3238 goto error; 3239 } 3240 rxq->ibv_cq = cq; 3241 rxq->wq = wq; 3242 priv->drop_queue.rxq = rxq; 3243 return rxq; 3244 error: 3245 if (wq) 3246 claim_zero(mlx5_glue->destroy_wq(wq)); 3247 if (cq) 3248 claim_zero(mlx5_glue->destroy_cq(cq)); 3249 return NULL; 3250 } 3251 3252 /** 3253 * Release a drop Rx queue Verbs/DevX object. 3254 * 3255 * @param dev 3256 * Pointer to Ethernet device. 3257 * 3258 * @return 3259 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 3260 */ 3261 static void 3262 mlx5_rxq_obj_drop_release(struct rte_eth_dev *dev) 3263 { 3264 struct mlx5_priv *priv = dev->data->dev_private; 3265 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq; 3266 3267 if (rxq->wq) 3268 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 3269 if (rxq->ibv_cq) 3270 claim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq)); 3271 mlx5_free(rxq); 3272 priv->drop_queue.rxq = NULL; 3273 } 3274 3275 /** 3276 * Create a drop indirection table. 3277 * 3278 * @param dev 3279 * Pointer to Ethernet device. 3280 * 3281 * @return 3282 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 3283 */ 3284 static struct mlx5_ind_table_obj * 3285 mlx5_ind_table_obj_drop_new(struct rte_eth_dev *dev) 3286 { 3287 struct mlx5_priv *priv = dev->data->dev_private; 3288 struct mlx5_ind_table_obj *ind_tbl; 3289 struct mlx5_rxq_obj *rxq; 3290 struct mlx5_ind_table_obj tmpl; 3291 3292 rxq = mlx5_rxq_obj_drop_new(dev); 3293 if (!rxq) 3294 return NULL; 3295 tmpl.ind_table = mlx5_glue->create_rwq_ind_table 3296 (priv->sh->ctx, 3297 &(struct ibv_rwq_ind_table_init_attr){ 3298 .log_ind_tbl_size = 0, 3299 .ind_tbl = &rxq->wq, 3300 .comp_mask = 0, 3301 }); 3302 if (!tmpl.ind_table) { 3303 DEBUG("port %u cannot allocate indirection table for drop" 3304 " queue", 3305 dev->data->port_id); 3306 rte_errno = errno; 3307 goto error; 3308 } 3309 ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl), 0, 3310 SOCKET_ID_ANY); 3311 if (!ind_tbl) { 3312 rte_errno = ENOMEM; 3313 goto error; 3314 } 3315 ind_tbl->ind_table = tmpl.ind_table; 3316 return ind_tbl; 3317 error: 3318 mlx5_rxq_obj_drop_release(dev); 3319 return NULL; 3320 } 3321 3322 /** 3323 * Release a drop indirection table. 3324 * 3325 * @param dev 3326 * Pointer to Ethernet device. 3327 */ 3328 static void 3329 mlx5_ind_table_obj_drop_release(struct rte_eth_dev *dev) 3330 { 3331 struct mlx5_priv *priv = dev->data->dev_private; 3332 struct mlx5_ind_table_obj *ind_tbl = priv->drop_queue.hrxq->ind_table; 3333 3334 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 3335 mlx5_rxq_obj_drop_release(dev); 3336 mlx5_free(ind_tbl); 3337 priv->drop_queue.hrxq->ind_table = NULL; 3338 } 3339 3340 /** 3341 * Create a drop Rx Hash queue. 3342 * 3343 * @param dev 3344 * Pointer to Ethernet device. 3345 * 3346 * @return 3347 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 3348 */ 3349 struct mlx5_hrxq * 3350 mlx5_hrxq_drop_new(struct rte_eth_dev *dev) 3351 { 3352 struct mlx5_priv *priv = dev->data->dev_private; 3353 struct mlx5_ind_table_obj *ind_tbl = NULL; 3354 struct ibv_qp *qp = NULL; 3355 struct mlx5_hrxq *hrxq = NULL; 3356 3357 if (priv->drop_queue.hrxq) { 3358 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt); 3359 return priv->drop_queue.hrxq; 3360 } 3361 hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq), 0, SOCKET_ID_ANY); 3362 if (!hrxq) { 3363 DRV_LOG(WARNING, 3364 "port %u cannot allocate memory for drop queue", 3365 dev->data->port_id); 3366 rte_errno = ENOMEM; 3367 goto error; 3368 } 3369 priv->drop_queue.hrxq = hrxq; 3370 ind_tbl = mlx5_ind_table_obj_drop_new(dev); 3371 if (!ind_tbl) 3372 goto error; 3373 hrxq->ind_table = ind_tbl; 3374 qp = mlx5_glue->create_qp_ex(priv->sh->ctx, 3375 &(struct ibv_qp_init_attr_ex){ 3376 .qp_type = IBV_QPT_RAW_PACKET, 3377 .comp_mask = 3378 IBV_QP_INIT_ATTR_PD | 3379 IBV_QP_INIT_ATTR_IND_TABLE | 3380 IBV_QP_INIT_ATTR_RX_HASH, 3381 .rx_hash_conf = (struct ibv_rx_hash_conf){ 3382 .rx_hash_function = 3383 IBV_RX_HASH_FUNC_TOEPLITZ, 3384 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 3385 .rx_hash_key = rss_hash_default_key, 3386 .rx_hash_fields_mask = 0, 3387 }, 3388 .rwq_ind_tbl = ind_tbl->ind_table, 3389 .pd = priv->sh->pd 3390 }); 3391 if (!qp) { 3392 DEBUG("port %u cannot allocate QP for drop queue", 3393 dev->data->port_id); 3394 rte_errno = errno; 3395 goto error; 3396 } 3397 hrxq->qp = qp; 3398 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 3399 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 3400 if (!hrxq->action) { 3401 rte_errno = errno; 3402 goto error; 3403 } 3404 #endif 3405 rte_atomic32_set(&hrxq->refcnt, 1); 3406 return hrxq; 3407 error: 3408 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 3409 if (hrxq && hrxq->action) 3410 mlx5_glue->destroy_flow_action(hrxq->action); 3411 #endif 3412 if (qp) 3413 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 3414 if (ind_tbl) 3415 mlx5_ind_table_obj_drop_release(dev); 3416 if (hrxq) { 3417 priv->drop_queue.hrxq = NULL; 3418 mlx5_free(hrxq); 3419 } 3420 return NULL; 3421 } 3422 3423 /** 3424 * Release a drop hash Rx queue. 3425 * 3426 * @param dev 3427 * Pointer to Ethernet device. 3428 */ 3429 void 3430 mlx5_hrxq_drop_release(struct rte_eth_dev *dev) 3431 { 3432 struct mlx5_priv *priv = dev->data->dev_private; 3433 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 3434 3435 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 3436 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 3437 mlx5_glue->destroy_flow_action(hrxq->action); 3438 #endif 3439 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 3440 mlx5_ind_table_obj_drop_release(dev); 3441 mlx5_free(hrxq); 3442 priv->drop_queue.hrxq = NULL; 3443 } 3444 } 3445 3446 3447 /** 3448 * Set the Rx queue timestamp conversion parameters 3449 * 3450 * @param[in] dev 3451 * Pointer to the Ethernet device structure. 3452 */ 3453 void 3454 mlx5_rxq_timestamp_set(struct rte_eth_dev *dev) 3455 { 3456 struct mlx5_priv *priv = dev->data->dev_private; 3457 struct mlx5_dev_ctx_shared *sh = priv->sh; 3458 struct mlx5_rxq_data *data; 3459 unsigned int i; 3460 3461 for (i = 0; i != priv->rxqs_n; ++i) { 3462 if (!(*priv->rxqs)[i]) 3463 continue; 3464 data = (*priv->rxqs)[i]; 3465 data->sh = sh; 3466 data->rt_timestamp = priv->config.rt_timestamp; 3467 } 3468 } 3469