1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <fcntl.h> 11 #include <sys/queue.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <rte_common.h> 17 #include <rte_interrupts.h> 18 #include <rte_debug.h> 19 #include <rte_io.h> 20 #include <rte_eal_paging.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_malloc.h> 24 #include <mlx5_common.h> 25 #include <mlx5_common_mr.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_rx.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_autoconf.h" 32 #include "mlx5_devx.h" 33 #include "rte_pmd_mlx5.h" 34 35 36 /* Default RSS hash key also used for ConnectX-3. */ 37 uint8_t rss_hash_default_key[] = { 38 0x2c, 0xc6, 0x81, 0xd1, 39 0x5b, 0xdb, 0xf4, 0xf7, 40 0xfc, 0xa2, 0x83, 0x19, 41 0xdb, 0x1a, 0x3e, 0x94, 42 0x6b, 0x9e, 0x38, 0xd9, 43 0x2c, 0x9c, 0x03, 0xd1, 44 0xad, 0x99, 0x44, 0xa7, 45 0xd9, 0x56, 0x3d, 0x59, 46 0x06, 0x3c, 0x25, 0xf3, 47 0xfc, 0x1f, 0xdc, 0x2a, 48 }; 49 50 /* Length of the default RSS hash key. */ 51 static_assert(MLX5_RSS_HASH_KEY_LEN == 52 (unsigned int)sizeof(rss_hash_default_key), 53 "wrong RSS default key size."); 54 55 /** 56 * Calculate the number of CQEs in CQ for the Rx queue. 57 * 58 * @param rxq_data 59 * Pointer to receive queue structure. 60 * 61 * @return 62 * Number of CQEs in CQ. 63 */ 64 unsigned int 65 mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data) 66 { 67 unsigned int cqe_n; 68 unsigned int wqe_n = 1 << rxq_data->elts_n; 69 70 if (mlx5_rxq_mprq_enabled(rxq_data)) 71 cqe_n = wqe_n * RTE_BIT32(rxq_data->log_strd_num) - 1; 72 else 73 cqe_n = wqe_n - 1; 74 return cqe_n; 75 } 76 77 /** 78 * Allocate RX queue elements for Multi-Packet RQ. 79 * 80 * @param rxq_ctrl 81 * Pointer to RX queue structure. 82 * 83 * @return 84 * 0 on success, a negative errno value otherwise and rte_errno is set. 85 */ 86 static int 87 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 88 { 89 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 90 unsigned int wqe_n = 1 << rxq->elts_n; 91 unsigned int i; 92 int err; 93 94 /* Iterate on segments. */ 95 for (i = 0; i <= wqe_n; ++i) { 96 struct mlx5_mprq_buf *buf; 97 98 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 99 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 100 rte_errno = ENOMEM; 101 goto error; 102 } 103 if (i < wqe_n) 104 (*rxq->mprq_bufs)[i] = buf; 105 else 106 rxq->mprq_repl = buf; 107 } 108 DRV_LOG(DEBUG, 109 "port %u MPRQ queue %u allocated and configured %u segments", 110 rxq->port_id, rxq->idx, wqe_n); 111 return 0; 112 error: 113 err = rte_errno; /* Save rte_errno before cleanup. */ 114 wqe_n = i; 115 for (i = 0; (i != wqe_n); ++i) { 116 if ((*rxq->mprq_bufs)[i] != NULL) 117 rte_mempool_put(rxq->mprq_mp, 118 (*rxq->mprq_bufs)[i]); 119 (*rxq->mprq_bufs)[i] = NULL; 120 } 121 DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything", 122 rxq->port_id, rxq->idx); 123 rte_errno = err; /* Restore rte_errno. */ 124 return -rte_errno; 125 } 126 127 /** 128 * Allocate RX queue elements for Single-Packet RQ. 129 * 130 * @param rxq_ctrl 131 * Pointer to RX queue structure. 132 * 133 * @return 134 * 0 on success, negative errno value on failure. 135 */ 136 static int 137 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 138 { 139 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 140 unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 141 RTE_BIT32(rxq_ctrl->rxq.elts_n) * 142 RTE_BIT32(rxq_ctrl->rxq.log_strd_num) : 143 RTE_BIT32(rxq_ctrl->rxq.elts_n); 144 bool has_vec_support = mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0; 145 unsigned int i; 146 int err; 147 148 /* Iterate on segments. */ 149 for (i = 0; (i != elts_n); ++i) { 150 struct mlx5_eth_rxseg *seg = &rxq_ctrl->rxq.rxseg[i % sges_n]; 151 struct rte_mbuf *buf; 152 153 buf = rte_pktmbuf_alloc(seg->mp); 154 if (buf == NULL) { 155 if (rxq_ctrl->share_group == 0) 156 DRV_LOG(ERR, "port %u queue %u empty mbuf pool", 157 RXQ_PORT_ID(rxq_ctrl), 158 rxq_ctrl->rxq.idx); 159 else 160 DRV_LOG(ERR, "share group %u queue %u empty mbuf pool", 161 rxq_ctrl->share_group, 162 rxq_ctrl->share_qid); 163 rte_errno = ENOMEM; 164 goto error; 165 } 166 /* Only vectored Rx routines rely on headroom size. */ 167 MLX5_ASSERT(!has_vec_support || 168 DATA_OFF(buf) >= RTE_PKTMBUF_HEADROOM); 169 /* Buffer is supposed to be empty. */ 170 MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0); 171 MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0); 172 MLX5_ASSERT(!buf->next); 173 SET_DATA_OFF(buf, seg->offset); 174 PORT(buf) = rxq_ctrl->rxq.port_id; 175 DATA_LEN(buf) = seg->length; 176 PKT_LEN(buf) = seg->length; 177 NB_SEGS(buf) = 1; 178 (*rxq_ctrl->rxq.elts)[i] = buf; 179 } 180 /* If Rx vector is activated. */ 181 if (has_vec_support) { 182 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 183 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 184 struct rte_pktmbuf_pool_private *priv = 185 (struct rte_pktmbuf_pool_private *) 186 rte_mempool_get_priv(rxq_ctrl->rxq.mp); 187 int j; 188 189 /* Initialize default rearm_data for vPMD. */ 190 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 191 rte_mbuf_refcnt_set(mbuf_init, 1); 192 mbuf_init->nb_segs = 1; 193 /* For shared queues port is provided in CQE */ 194 mbuf_init->port = rxq->shared ? 0 : rxq->port_id; 195 if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) 196 mbuf_init->ol_flags = RTE_MBUF_F_EXTERNAL; 197 /* 198 * prevent compiler reordering: 199 * rearm_data covers previous fields. 200 */ 201 rte_compiler_barrier(); 202 rxq->mbuf_initializer = 203 *(rte_xmm_t *)&mbuf_init->rearm_data; 204 /* Padding with a fake mbuf for vectorized Rx. */ 205 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 206 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 207 } 208 if (rxq_ctrl->share_group == 0) 209 DRV_LOG(DEBUG, 210 "port %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 211 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx, elts_n, 212 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 213 else 214 DRV_LOG(DEBUG, 215 "share group %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 216 rxq_ctrl->share_group, rxq_ctrl->share_qid, elts_n, 217 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 218 return 0; 219 error: 220 err = rte_errno; /* Save rte_errno before cleanup. */ 221 elts_n = i; 222 for (i = 0; (i != elts_n); ++i) { 223 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 224 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 225 (*rxq_ctrl->rxq.elts)[i] = NULL; 226 } 227 if (rxq_ctrl->share_group == 0) 228 DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything", 229 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx); 230 else 231 DRV_LOG(DEBUG, "share group %u SPRQ queue %u failed, freed everything", 232 rxq_ctrl->share_group, rxq_ctrl->share_qid); 233 rte_errno = err; /* Restore rte_errno. */ 234 return -rte_errno; 235 } 236 237 /** 238 * Allocate RX queue elements. 239 * 240 * @param rxq_ctrl 241 * Pointer to RX queue structure. 242 * 243 * @return 244 * 0 on success, negative errno value on failure. 245 */ 246 int 247 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 248 { 249 int ret = 0; 250 251 /** 252 * For MPRQ we need to allocate both MPRQ buffers 253 * for WQEs and simple mbufs for vector processing. 254 */ 255 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 256 ret = rxq_alloc_elts_mprq(rxq_ctrl); 257 if (ret == 0) 258 ret = rxq_alloc_elts_sprq(rxq_ctrl); 259 return ret; 260 } 261 262 /** 263 * Free RX queue elements for Multi-Packet RQ. 264 * 265 * @param rxq_ctrl 266 * Pointer to RX queue structure. 267 */ 268 static void 269 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 270 { 271 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 272 uint16_t i; 273 274 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs", 275 rxq->port_id, rxq->idx, (1u << rxq->elts_n)); 276 if (rxq->mprq_bufs == NULL) 277 return; 278 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 279 if ((*rxq->mprq_bufs)[i] != NULL) 280 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 281 (*rxq->mprq_bufs)[i] = NULL; 282 } 283 if (rxq->mprq_repl != NULL) { 284 mlx5_mprq_buf_free(rxq->mprq_repl); 285 rxq->mprq_repl = NULL; 286 } 287 } 288 289 /** 290 * Free RX queue elements for Single-Packet RQ. 291 * 292 * @param rxq_ctrl 293 * Pointer to RX queue structure. 294 */ 295 static void 296 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 297 { 298 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 299 const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 300 RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : 301 RTE_BIT32(rxq->elts_n); 302 const uint16_t q_mask = q_n - 1; 303 uint16_t elts_ci = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 304 rxq->elts_ci : rxq->rq_ci; 305 uint16_t used = q_n - (elts_ci - rxq->rq_pi); 306 uint16_t i; 307 308 if (rxq_ctrl->share_group == 0) 309 DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs", 310 RXQ_PORT_ID(rxq_ctrl), rxq->idx, q_n); 311 else 312 DRV_LOG(DEBUG, "share group %u Rx queue %u freeing %d WRs", 313 rxq_ctrl->share_group, rxq_ctrl->share_qid, q_n); 314 if (rxq->elts == NULL) 315 return; 316 /** 317 * Some mbuf in the Ring belongs to the application. 318 * They cannot be freed. 319 */ 320 if (mlx5_rxq_check_vec_support(rxq) > 0) { 321 for (i = 0; i < used; ++i) 322 (*rxq->elts)[(elts_ci + i) & q_mask] = NULL; 323 rxq->rq_pi = elts_ci; 324 } 325 for (i = 0; i != q_n; ++i) { 326 if ((*rxq->elts)[i] != NULL) 327 rte_pktmbuf_free_seg((*rxq->elts)[i]); 328 (*rxq->elts)[i] = NULL; 329 } 330 } 331 332 /** 333 * Free RX queue elements. 334 * 335 * @param rxq_ctrl 336 * Pointer to RX queue structure. 337 */ 338 static void 339 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 340 { 341 /* 342 * For MPRQ we need to allocate both MPRQ buffers 343 * for WQEs and simple mbufs for vector processing. 344 */ 345 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 346 rxq_free_elts_mprq(rxq_ctrl); 347 rxq_free_elts_sprq(rxq_ctrl); 348 } 349 350 /** 351 * Returns the per-queue supported offloads. 352 * 353 * @param dev 354 * Pointer to Ethernet device. 355 * 356 * @return 357 * Supported Rx offloads. 358 */ 359 uint64_t 360 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 361 { 362 struct mlx5_priv *priv = dev->data->dev_private; 363 uint64_t offloads = (RTE_ETH_RX_OFFLOAD_SCATTER | 364 RTE_ETH_RX_OFFLOAD_TIMESTAMP | 365 RTE_ETH_RX_OFFLOAD_RSS_HASH); 366 367 if (!priv->config.mprq.enabled) 368 offloads |= RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT; 369 if (priv->sh->config.hw_fcs_strip) 370 offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC; 371 if (priv->sh->dev_cap.hw_csum) 372 offloads |= (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | 373 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | 374 RTE_ETH_RX_OFFLOAD_TCP_CKSUM); 375 if (priv->sh->dev_cap.hw_vlan_strip) 376 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP; 377 if (priv->sh->config.lro_allowed) 378 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO; 379 return offloads; 380 } 381 382 383 /** 384 * Returns the per-port supported offloads. 385 * 386 * @return 387 * Supported Rx offloads. 388 */ 389 uint64_t 390 mlx5_get_rx_port_offloads(void) 391 { 392 uint64_t offloads = RTE_ETH_RX_OFFLOAD_VLAN_FILTER; 393 394 return offloads; 395 } 396 397 /** 398 * Verify if the queue can be released. 399 * 400 * @param dev 401 * Pointer to Ethernet device. 402 * @param idx 403 * RX queue index. 404 * 405 * @return 406 * 1 if the queue can be released 407 * 0 if the queue can not be released, there are references to it. 408 * Negative errno and rte_errno is set if queue doesn't exist. 409 */ 410 static int 411 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 412 { 413 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 414 415 if (rxq == NULL) { 416 rte_errno = EINVAL; 417 return -rte_errno; 418 } 419 return (rte_atomic_load_explicit(&rxq->refcnt, rte_memory_order_relaxed) == 1); 420 } 421 422 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 423 static void 424 rxq_sync_cq(struct mlx5_rxq_data *rxq) 425 { 426 const uint16_t cqe_n = 1 << rxq->cqe_n; 427 const uint16_t cqe_mask = cqe_n - 1; 428 volatile struct mlx5_cqe *cqe; 429 int ret, i; 430 431 i = cqe_n; 432 do { 433 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 434 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 435 if (ret == MLX5_CQE_STATUS_HW_OWN) 436 break; 437 if (ret == MLX5_CQE_STATUS_ERR) { 438 rxq->cq_ci++; 439 continue; 440 } 441 MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN); 442 if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) { 443 rxq->cq_ci++; 444 continue; 445 } 446 /* Compute the next non compressed CQE. */ 447 rxq->cq_ci += rxq->cqe_comp_layout ? 448 (MLX5_CQE_NUM_MINIS(cqe->op_own) + 1U) : 449 rte_be_to_cpu_32(cqe->byte_cnt); 450 451 } while (--i); 452 /* Move all CQEs to HW ownership, including possible MiniCQEs. */ 453 for (i = 0; i < cqe_n; i++) { 454 cqe = &(*rxq->cqes)[i]; 455 cqe->validity_iteration_count = MLX5_CQE_VIC_INIT; 456 cqe->op_own = MLX5_CQE_INVALIDATE; 457 } 458 /* Resync CQE and WQE (WQ in RESET state). */ 459 rte_io_wmb(); 460 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 461 rte_io_wmb(); 462 *rxq->rq_db = rte_cpu_to_be_32(0); 463 rte_io_wmb(); 464 } 465 466 /** 467 * Rx queue stop. Device queue goes to the RESET state, 468 * all involved mbufs are freed from WQ. 469 * 470 * @param dev 471 * Pointer to Ethernet device structure. 472 * @param idx 473 * RX queue index. 474 * 475 * @return 476 * 0 on success, a negative errno value otherwise and rte_errno is set. 477 */ 478 int 479 mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 480 { 481 struct mlx5_priv *priv = dev->data->dev_private; 482 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 483 struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl; 484 int ret; 485 486 MLX5_ASSERT(rxq != NULL && rxq_ctrl != NULL); 487 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 488 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RDY2RST); 489 if (ret) { 490 DRV_LOG(ERR, "Cannot change Rx WQ state to RESET: %s", 491 strerror(errno)); 492 rte_errno = errno; 493 return ret; 494 } 495 /* Remove all processes CQEs. */ 496 rxq_sync_cq(&rxq_ctrl->rxq); 497 /* Free all involved mbufs. */ 498 rxq_free_elts(rxq_ctrl); 499 /* Set the actual queue state. */ 500 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 501 return 0; 502 } 503 504 /** 505 * Rx queue stop. Device queue goes to the RESET state, 506 * all involved mbufs are freed from WQ. 507 * 508 * @param dev 509 * Pointer to Ethernet device structure. 510 * @param idx 511 * RX queue index. 512 * 513 * @return 514 * 0 on success, a negative errno value otherwise and rte_errno is set. 515 */ 516 int 517 mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 518 { 519 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 520 int ret; 521 522 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 523 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 524 rte_errno = EINVAL; 525 return -EINVAL; 526 } 527 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 528 return 0; 529 /* 530 * Vectorized Rx burst requires the CQ and RQ indices 531 * synchronized, that might be broken on RQ restart 532 * and cause Rx malfunction, so queue stopping is 533 * not supported if vectorized Rx burst is engaged. 534 * The routine pointer depends on the process type, 535 * should perform check there. MPRQ is not supported as well. 536 */ 537 if (pkt_burst != mlx5_rx_burst) { 538 DRV_LOG(ERR, "Rx queue stop is only supported " 539 "for non-vectorized single-packet Rx"); 540 rte_errno = EINVAL; 541 return -EINVAL; 542 } 543 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 544 ret = mlx5_mp_os_req_queue_control(dev, idx, 545 MLX5_MP_REQ_QUEUE_RX_STOP); 546 } else { 547 ret = mlx5_rx_queue_stop_primary(dev, idx); 548 } 549 return ret; 550 } 551 552 /** 553 * Rx queue start. Device queue goes to the ready state, 554 * all required mbufs are allocated and WQ is replenished. 555 * 556 * @param dev 557 * Pointer to Ethernet device structure. 558 * @param idx 559 * RX queue index. 560 * 561 * @return 562 * 0 on success, a negative errno value otherwise and rte_errno is set. 563 */ 564 int 565 mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 566 { 567 struct mlx5_priv *priv = dev->data->dev_private; 568 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 569 struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq; 570 int ret; 571 572 MLX5_ASSERT(rxq != NULL && rxq->ctrl != NULL); 573 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 574 /* Allocate needed buffers. */ 575 ret = rxq_alloc_elts(rxq->ctrl); 576 if (ret) { 577 DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ"); 578 rte_errno = errno; 579 return ret; 580 } 581 rte_io_wmb(); 582 *rxq_data->cq_db = rte_cpu_to_be_32(rxq_data->cq_ci); 583 rte_io_wmb(); 584 /* Reset RQ consumer before moving queue to READY state. */ 585 *rxq_data->rq_db = rte_cpu_to_be_32(0); 586 rte_io_wmb(); 587 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RST2RDY); 588 if (ret) { 589 DRV_LOG(ERR, "Cannot change Rx WQ state to READY: %s", 590 strerror(errno)); 591 rte_errno = errno; 592 return ret; 593 } 594 /* Reinitialize RQ - set WQEs. */ 595 mlx5_rxq_initialize(rxq_data); 596 rxq_data->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 597 /* Set actual queue state. */ 598 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 599 return 0; 600 } 601 602 /** 603 * Rx queue start. Device queue goes to the ready state, 604 * all required mbufs are allocated and WQ is replenished. 605 * 606 * @param dev 607 * Pointer to Ethernet device structure. 608 * @param idx 609 * RX queue index. 610 * 611 * @return 612 * 0 on success, a negative errno value otherwise and rte_errno is set. 613 */ 614 int 615 mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 616 { 617 int ret; 618 619 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 620 DRV_LOG(ERR, "Hairpin queue can't be started"); 621 rte_errno = EINVAL; 622 return -EINVAL; 623 } 624 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 625 return 0; 626 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 627 ret = mlx5_mp_os_req_queue_control(dev, idx, 628 MLX5_MP_REQ_QUEUE_RX_START); 629 } else { 630 ret = mlx5_rx_queue_start_primary(dev, idx); 631 } 632 return ret; 633 } 634 635 /** 636 * Rx queue presetup checks. 637 * 638 * @param dev 639 * Pointer to Ethernet device structure. 640 * @param idx 641 * RX queue index. 642 * @param desc 643 * Number of descriptors to configure in queue. 644 * @param[out] rxq_ctrl 645 * Address of pointer to shared Rx queue control. 646 * 647 * @return 648 * 0 on success, a negative errno value otherwise and rte_errno is set. 649 */ 650 static int 651 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc, 652 struct mlx5_rxq_ctrl **rxq_ctrl) 653 { 654 struct mlx5_priv *priv = dev->data->dev_private; 655 struct mlx5_rxq_priv *rxq; 656 bool empty; 657 658 if (*desc > 1 << priv->sh->cdev->config.hca_attr.log_max_wq_sz) { 659 DRV_LOG(ERR, 660 "port %u number of descriptors requested for Rx queue" 661 " %u is more than supported", 662 dev->data->port_id, idx); 663 rte_errno = EINVAL; 664 return -EINVAL; 665 } 666 if (!rte_is_power_of_2(*desc)) { 667 *desc = 1 << log2above(*desc); 668 DRV_LOG(WARNING, 669 "port %u increased number of descriptors in Rx queue %u" 670 " to the next power of two (%d)", 671 dev->data->port_id, idx, *desc); 672 } 673 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 674 dev->data->port_id, idx, *desc); 675 if (idx >= priv->rxqs_n) { 676 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 677 dev->data->port_id, idx, priv->rxqs_n); 678 rte_errno = EOVERFLOW; 679 return -rte_errno; 680 } 681 if (rxq_ctrl == NULL || *rxq_ctrl == NULL) 682 return 0; 683 if (!(*rxq_ctrl)->rxq.shared) { 684 if (!mlx5_rxq_releasable(dev, idx)) { 685 DRV_LOG(ERR, "port %u unable to release queue index %u", 686 dev->data->port_id, idx); 687 rte_errno = EBUSY; 688 return -rte_errno; 689 } 690 mlx5_rxq_release(dev, idx); 691 } else { 692 if ((*rxq_ctrl)->obj != NULL) 693 /* Some port using shared Rx queue has been started. */ 694 return 0; 695 /* Release all owner RxQ to reconfigure Shared RxQ. */ 696 do { 697 rxq = LIST_FIRST(&(*rxq_ctrl)->owners); 698 LIST_REMOVE(rxq, owner_entry); 699 empty = LIST_EMPTY(&(*rxq_ctrl)->owners); 700 mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx); 701 } while (!empty); 702 *rxq_ctrl = NULL; 703 } 704 return 0; 705 } 706 707 /** 708 * Get the shared Rx queue object that matches group and queue index. 709 * 710 * @param dev 711 * Pointer to Ethernet device structure. 712 * @param group 713 * Shared RXQ group. 714 * @param share_qid 715 * Shared RX queue index. 716 * 717 * @return 718 * Shared RXQ object that matching, or NULL if not found. 719 */ 720 static struct mlx5_rxq_ctrl * 721 mlx5_shared_rxq_get(struct rte_eth_dev *dev, uint32_t group, uint16_t share_qid) 722 { 723 struct mlx5_rxq_ctrl *rxq_ctrl; 724 struct mlx5_priv *priv = dev->data->dev_private; 725 726 LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) { 727 if (rxq_ctrl->share_group == group && 728 rxq_ctrl->share_qid == share_qid) 729 return rxq_ctrl; 730 } 731 return NULL; 732 } 733 734 /** 735 * Check whether requested Rx queue configuration matches shared RXQ. 736 * 737 * @param rxq_ctrl 738 * Pointer to shared RXQ. 739 * @param dev 740 * Pointer to Ethernet device structure. 741 * @param idx 742 * Queue index. 743 * @param desc 744 * Number of descriptors to configure in queue. 745 * @param socket 746 * NUMA socket on which memory must be allocated. 747 * @param[in] conf 748 * Thresholds parameters. 749 * @param mp 750 * Memory pool for buffer allocations. 751 * 752 * @return 753 * 0 on success, a negative errno value otherwise and rte_errno is set. 754 */ 755 static bool 756 mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev, 757 uint16_t idx, uint16_t desc, unsigned int socket, 758 const struct rte_eth_rxconf *conf, 759 struct rte_mempool *mp) 760 { 761 struct mlx5_priv *spriv = LIST_FIRST(&rxq_ctrl->owners)->priv; 762 struct mlx5_priv *priv = dev->data->dev_private; 763 unsigned int i; 764 765 RTE_SET_USED(conf); 766 if (rxq_ctrl->socket != socket) { 767 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: socket mismatch", 768 dev->data->port_id, idx); 769 return false; 770 } 771 if (rxq_ctrl->rxq.elts_n != log2above(desc)) { 772 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: descriptor number mismatch", 773 dev->data->port_id, idx); 774 return false; 775 } 776 if (priv->mtu != spriv->mtu) { 777 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch", 778 dev->data->port_id, idx); 779 return false; 780 } 781 if (priv->dev_data->dev_conf.intr_conf.rxq != 782 spriv->dev_data->dev_conf.intr_conf.rxq) { 783 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: interrupt mismatch", 784 dev->data->port_id, idx); 785 return false; 786 } 787 if (mp != NULL && rxq_ctrl->rxq.mp != mp) { 788 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mempool mismatch", 789 dev->data->port_id, idx); 790 return false; 791 } else if (mp == NULL) { 792 if (conf->rx_nseg != rxq_ctrl->rxseg_n) { 793 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment number mismatch", 794 dev->data->port_id, idx); 795 return false; 796 } 797 for (i = 0; i < conf->rx_nseg; i++) { 798 if (memcmp(&conf->rx_seg[i].split, &rxq_ctrl->rxseg[i], 799 sizeof(struct rte_eth_rxseg_split))) { 800 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment %u configuration mismatch", 801 dev->data->port_id, idx, i); 802 return false; 803 } 804 } 805 } 806 if (priv->config.hw_padding != spriv->config.hw_padding) { 807 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: padding mismatch", 808 dev->data->port_id, idx); 809 return false; 810 } 811 if (priv->config.cqe_comp != spriv->config.cqe_comp || 812 (priv->config.cqe_comp && 813 priv->config.cqe_comp_fmt != spriv->config.cqe_comp_fmt)) { 814 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: CQE compression mismatch", 815 dev->data->port_id, idx); 816 return false; 817 } 818 return true; 819 } 820 821 /** 822 * 823 * @param dev 824 * Pointer to Ethernet device structure. 825 * @param idx 826 * RX queue index. 827 * @param desc 828 * Number of descriptors to configure in queue. 829 * @param socket 830 * NUMA socket on which memory must be allocated. 831 * @param[in] conf 832 * Thresholds parameters. 833 * @param mp 834 * Memory pool for buffer allocations. 835 * 836 * @return 837 * 0 on success, a negative errno value otherwise and rte_errno is set. 838 */ 839 int 840 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 841 unsigned int socket, const struct rte_eth_rxconf *conf, 842 struct rte_mempool *mp) 843 { 844 struct mlx5_priv *priv = dev->data->dev_private; 845 struct mlx5_rxq_priv *rxq; 846 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 847 struct rte_eth_rxseg_split *rx_seg = 848 (struct rte_eth_rxseg_split *)conf->rx_seg; 849 struct rte_eth_rxseg_split rx_single = {.mp = mp}; 850 uint16_t n_seg = conf->rx_nseg; 851 int res; 852 uint64_t offloads = conf->offloads | 853 dev->data->dev_conf.rxmode.offloads; 854 bool is_extmem = false; 855 856 if ((offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) && 857 !priv->sh->config.lro_allowed) { 858 DRV_LOG(ERR, 859 "Port %u queue %u LRO is configured but not allowed.", 860 dev->data->port_id, idx); 861 rte_errno = EINVAL; 862 return -rte_errno; 863 } 864 if (mp) { 865 /* 866 * The parameters should be checked on rte_eth_dev layer. 867 * If mp is specified it means the compatible configuration 868 * without buffer split feature tuning. 869 */ 870 rx_seg = &rx_single; 871 n_seg = 1; 872 is_extmem = rte_pktmbuf_priv_flags(mp) & 873 RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF; 874 } 875 if (n_seg > 1) { 876 /* The offloads should be checked on rte_eth_dev layer. */ 877 MLX5_ASSERT(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 878 if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) { 879 DRV_LOG(ERR, "port %u queue index %u split " 880 "offload not configured", 881 dev->data->port_id, idx); 882 rte_errno = ENOSPC; 883 return -rte_errno; 884 } 885 MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG); 886 } 887 if (conf->share_group > 0) { 888 if (!priv->sh->cdev->config.hca_attr.mem_rq_rmp) { 889 DRV_LOG(ERR, "port %u queue index %u shared Rx queue not supported by fw", 890 dev->data->port_id, idx); 891 rte_errno = EINVAL; 892 return -rte_errno; 893 } 894 if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) { 895 DRV_LOG(ERR, "port %u queue index %u shared Rx queue needs DevX api", 896 dev->data->port_id, idx); 897 rte_errno = EINVAL; 898 return -rte_errno; 899 } 900 if (conf->share_qid >= priv->rxqs_n) { 901 DRV_LOG(ERR, "port %u shared Rx queue index %u > number of Rx queues %u", 902 dev->data->port_id, conf->share_qid, 903 priv->rxqs_n); 904 rte_errno = EINVAL; 905 return -rte_errno; 906 } 907 if (priv->config.mprq.enabled) { 908 DRV_LOG(ERR, "port %u shared Rx queue index %u: not supported when MPRQ enabled", 909 dev->data->port_id, conf->share_qid); 910 rte_errno = EINVAL; 911 return -rte_errno; 912 } 913 /* Try to reuse shared RXQ. */ 914 rxq_ctrl = mlx5_shared_rxq_get(dev, conf->share_group, 915 conf->share_qid); 916 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 917 if (res) 918 return res; 919 if (rxq_ctrl != NULL && 920 !mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket, 921 conf, mp)) { 922 rte_errno = EINVAL; 923 return -rte_errno; 924 } 925 } else { 926 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 927 if (res) 928 return res; 929 } 930 /* Allocate RXQ. */ 931 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 932 SOCKET_ID_ANY); 933 if (!rxq) { 934 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u private data", 935 dev->data->port_id, idx); 936 rte_errno = ENOMEM; 937 return -rte_errno; 938 } 939 if (rxq_ctrl == NULL) { 940 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg, 941 n_seg, is_extmem); 942 if (rxq_ctrl == NULL) { 943 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u", 944 dev->data->port_id, idx); 945 mlx5_free(rxq); 946 rte_errno = ENOMEM; 947 return -rte_errno; 948 } 949 } 950 rxq->priv = priv; 951 rxq->idx = idx; 952 (*priv->rxq_privs)[idx] = rxq; 953 /* Join owner list. */ 954 LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry); 955 rxq->ctrl = rxq_ctrl; 956 rte_atomic_fetch_add_explicit(&rxq_ctrl->ctrl_ref, 1, rte_memory_order_relaxed); 957 mlx5_rxq_ref(dev, idx); 958 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 959 dev->data->port_id, idx); 960 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 961 return 0; 962 } 963 964 /** 965 * 966 * @param dev 967 * Pointer to Ethernet device structure. 968 * @param idx 969 * RX queue index. 970 * @param desc 971 * Number of descriptors to configure in queue. 972 * @param hairpin_conf 973 * Hairpin configuration parameters. 974 * 975 * @return 976 * 0 on success, a negative errno value otherwise and rte_errno is set. 977 */ 978 int 979 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 980 uint16_t desc, 981 const struct rte_eth_hairpin_conf *hairpin_conf) 982 { 983 struct mlx5_priv *priv = dev->data->dev_private; 984 struct mlx5_rxq_priv *rxq; 985 struct mlx5_rxq_ctrl *rxq_ctrl; 986 int res; 987 988 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, NULL); 989 if (res) 990 return res; 991 if (hairpin_conf->peer_count != 1) { 992 rte_errno = EINVAL; 993 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue index %u" 994 " peer count is %u", dev->data->port_id, 995 idx, hairpin_conf->peer_count); 996 return -rte_errno; 997 } 998 if (hairpin_conf->peers[0].port == dev->data->port_id) { 999 if (hairpin_conf->peers[0].queue >= priv->txqs_n) { 1000 rte_errno = EINVAL; 1001 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 1002 " index %u, Tx %u is larger than %u", 1003 dev->data->port_id, idx, 1004 hairpin_conf->peers[0].queue, priv->txqs_n); 1005 return -rte_errno; 1006 } 1007 } else { 1008 if (hairpin_conf->manual_bind == 0 || 1009 hairpin_conf->tx_explicit == 0) { 1010 rte_errno = EINVAL; 1011 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 1012 " index %u peer port %u with attributes %u %u", 1013 dev->data->port_id, idx, 1014 hairpin_conf->peers[0].port, 1015 hairpin_conf->manual_bind, 1016 hairpin_conf->tx_explicit); 1017 return -rte_errno; 1018 } 1019 } 1020 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 1021 SOCKET_ID_ANY); 1022 if (!rxq) { 1023 DRV_LOG(ERR, "port %u unable to allocate hairpin rx queue index %u private data", 1024 dev->data->port_id, idx); 1025 rte_errno = ENOMEM; 1026 return -rte_errno; 1027 } 1028 rxq->priv = priv; 1029 rxq->idx = idx; 1030 (*priv->rxq_privs)[idx] = rxq; 1031 rxq_ctrl = mlx5_rxq_hairpin_new(dev, rxq, desc, hairpin_conf); 1032 if (!rxq_ctrl) { 1033 DRV_LOG(ERR, "port %u unable to allocate hairpin queue index %u", 1034 dev->data->port_id, idx); 1035 mlx5_free(rxq); 1036 (*priv->rxq_privs)[idx] = NULL; 1037 rte_errno = ENOMEM; 1038 return -rte_errno; 1039 } 1040 DRV_LOG(DEBUG, "port %u adding hairpin Rx queue %u to list", 1041 dev->data->port_id, idx); 1042 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 1043 return 0; 1044 } 1045 1046 /** 1047 * DPDK callback to release a RX queue. 1048 * 1049 * @param dev 1050 * Pointer to Ethernet device structure. 1051 * @param qid 1052 * Receive queue index. 1053 */ 1054 void 1055 mlx5_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1056 { 1057 struct mlx5_rxq_data *rxq = dev->data->rx_queues[qid]; 1058 1059 if (rxq == NULL) 1060 return; 1061 if (!mlx5_rxq_releasable(dev, qid)) 1062 rte_panic("port %u Rx queue %u is still used by a flow and" 1063 " cannot be removed\n", dev->data->port_id, qid); 1064 mlx5_rxq_release(dev, qid); 1065 } 1066 1067 /** 1068 * Allocate queue vector and fill epoll fd list for Rx interrupts. 1069 * 1070 * @param dev 1071 * Pointer to Ethernet device. 1072 * 1073 * @return 1074 * 0 on success, a negative errno value otherwise and rte_errno is set. 1075 */ 1076 int 1077 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 1078 { 1079 struct mlx5_priv *priv = dev->data->dev_private; 1080 unsigned int i; 1081 unsigned int rxqs_n = priv->rxqs_n; 1082 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1083 unsigned int count = 0; 1084 struct rte_intr_handle *intr_handle = dev->intr_handle; 1085 1086 if (!dev->data->dev_conf.intr_conf.rxq) 1087 return 0; 1088 mlx5_rx_intr_vec_disable(dev); 1089 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 1090 DRV_LOG(ERR, 1091 "port %u failed to allocate memory for interrupt" 1092 " vector, Rx interrupts will not be supported", 1093 dev->data->port_id); 1094 rte_errno = ENOMEM; 1095 return -rte_errno; 1096 } 1097 1098 if (rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_EXT)) 1099 return -rte_errno; 1100 1101 for (i = 0; i != n; ++i) { 1102 /* This rxq obj must not be released in this function. */ 1103 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i); 1104 struct mlx5_rxq_obj *rxq_obj = rxq ? rxq->ctrl->obj : NULL; 1105 int rc; 1106 1107 /* Skip queues that cannot request interrupts. */ 1108 if (!rxq_obj || (!rxq_obj->ibv_channel && 1109 !rxq_obj->devx_channel)) { 1110 /* Use invalid intr_vec[] index to disable entry. */ 1111 if (rte_intr_vec_list_index_set(intr_handle, i, 1112 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID)) 1113 return -rte_errno; 1114 continue; 1115 } 1116 mlx5_rxq_ref(dev, i); 1117 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 1118 DRV_LOG(ERR, 1119 "port %u too many Rx queues for interrupt" 1120 " vector size (%d), Rx interrupts cannot be" 1121 " enabled", 1122 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 1123 mlx5_rx_intr_vec_disable(dev); 1124 rte_errno = ENOMEM; 1125 return -rte_errno; 1126 } 1127 rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd); 1128 if (rc < 0) { 1129 rte_errno = errno; 1130 DRV_LOG(ERR, 1131 "port %u failed to make Rx interrupt file" 1132 " descriptor %d non-blocking for queue index" 1133 " %d", 1134 dev->data->port_id, rxq_obj->fd, i); 1135 mlx5_rx_intr_vec_disable(dev); 1136 return -rte_errno; 1137 } 1138 1139 if (rte_intr_vec_list_index_set(intr_handle, i, 1140 RTE_INTR_VEC_RXTX_OFFSET + count)) 1141 return -rte_errno; 1142 if (rte_intr_efds_index_set(intr_handle, count, 1143 rxq_obj->fd)) 1144 return -rte_errno; 1145 count++; 1146 } 1147 if (!count) 1148 mlx5_rx_intr_vec_disable(dev); 1149 else if (rte_intr_nb_efd_set(intr_handle, count)) 1150 return -rte_errno; 1151 return 0; 1152 } 1153 1154 /** 1155 * Clean up Rx interrupts handler. 1156 * 1157 * @param dev 1158 * Pointer to Ethernet device. 1159 */ 1160 void 1161 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 1162 { 1163 struct mlx5_priv *priv = dev->data->dev_private; 1164 struct rte_intr_handle *intr_handle = dev->intr_handle; 1165 unsigned int i; 1166 unsigned int rxqs_n = priv->rxqs_n; 1167 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1168 1169 if (!dev->data->dev_conf.intr_conf.rxq) 1170 return; 1171 if (rte_intr_vec_list_index_get(intr_handle, 0) < 0) 1172 goto free; 1173 for (i = 0; i != n; ++i) { 1174 if (rte_intr_vec_list_index_get(intr_handle, i) == 1175 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID) 1176 continue; 1177 /** 1178 * Need to access directly the queue to release the reference 1179 * kept in mlx5_rx_intr_vec_enable(). 1180 */ 1181 mlx5_rxq_deref(dev, i); 1182 } 1183 free: 1184 rte_intr_free_epoll_fd(intr_handle); 1185 1186 rte_intr_vec_list_free(intr_handle); 1187 1188 rte_intr_nb_efd_set(intr_handle, 0); 1189 } 1190 1191 /** 1192 * MLX5 CQ notification . 1193 * 1194 * @param rxq 1195 * Pointer to receive queue structure. 1196 * @param sq_n_rxq 1197 * Sequence number per receive queue . 1198 */ 1199 static inline void 1200 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 1201 { 1202 int sq_n = 0; 1203 uint32_t doorbell_hi; 1204 uint64_t doorbell; 1205 1206 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 1207 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 1208 doorbell = (uint64_t)doorbell_hi << 32; 1209 doorbell |= rxq->cqn; 1210 mlx5_doorbell_ring(&rxq->uar_data, rte_cpu_to_be_64(doorbell), 1211 doorbell_hi, &rxq->cq_db[MLX5_CQ_ARM_DB], 0); 1212 } 1213 1214 /** 1215 * DPDK callback for Rx queue interrupt enable. 1216 * 1217 * @param dev 1218 * Pointer to Ethernet device structure. 1219 * @param rx_queue_id 1220 * Rx queue number. 1221 * 1222 * @return 1223 * 0 on success, a negative errno value otherwise and rte_errno is set. 1224 */ 1225 int 1226 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1227 { 1228 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1229 if (!rxq) 1230 goto error; 1231 if (rxq->ctrl->irq) { 1232 if (!rxq->ctrl->obj) 1233 goto error; 1234 mlx5_arm_cq(&rxq->ctrl->rxq, rxq->ctrl->rxq.cq_arm_sn); 1235 } 1236 return 0; 1237 error: 1238 rte_errno = EINVAL; 1239 return -rte_errno; 1240 } 1241 1242 /** 1243 * DPDK callback for Rx queue interrupt disable. 1244 * 1245 * @param dev 1246 * Pointer to Ethernet device structure. 1247 * @param rx_queue_id 1248 * Rx queue number. 1249 * 1250 * @return 1251 * 0 on success, a negative errno value otherwise and rte_errno is set. 1252 */ 1253 int 1254 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1255 { 1256 struct mlx5_priv *priv = dev->data->dev_private; 1257 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1258 int ret = 0; 1259 1260 if (!rxq) { 1261 rte_errno = EINVAL; 1262 return -rte_errno; 1263 } 1264 if (!rxq->ctrl->obj) 1265 goto error; 1266 if (rxq->ctrl->irq) { 1267 ret = priv->obj_ops.rxq_event_get(rxq->ctrl->obj); 1268 if (ret < 0) 1269 goto error; 1270 rxq->ctrl->rxq.cq_arm_sn++; 1271 } 1272 return 0; 1273 error: 1274 /** 1275 * The ret variable may be EAGAIN which means the get_event function was 1276 * called before receiving one. 1277 */ 1278 if (ret < 0) 1279 rte_errno = errno; 1280 else 1281 rte_errno = EINVAL; 1282 if (rte_errno != EAGAIN) 1283 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 1284 dev->data->port_id, rx_queue_id); 1285 return -rte_errno; 1286 } 1287 1288 /** 1289 * Verify the Rx queue objects list is empty 1290 * 1291 * @param dev 1292 * Pointer to Ethernet device. 1293 * 1294 * @return 1295 * The number of objects not released. 1296 */ 1297 int 1298 mlx5_rxq_obj_verify(struct rte_eth_dev *dev) 1299 { 1300 struct mlx5_priv *priv = dev->data->dev_private; 1301 int ret = 0; 1302 struct mlx5_rxq_obj *rxq_obj; 1303 1304 LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) { 1305 if (rxq_obj->rxq_ctrl == NULL) 1306 continue; 1307 if (rxq_obj->rxq_ctrl->rxq.shared && 1308 !LIST_EMPTY(&rxq_obj->rxq_ctrl->owners)) 1309 continue; 1310 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced", 1311 dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx); 1312 ++ret; 1313 } 1314 return ret; 1315 } 1316 1317 /** 1318 * Callback function to initialize mbufs for Multi-Packet RQ. 1319 */ 1320 static inline void 1321 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg, 1322 void *_m, unsigned int i __rte_unused) 1323 { 1324 struct mlx5_mprq_buf *buf = _m; 1325 struct rte_mbuf_ext_shared_info *shinfo; 1326 unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg; 1327 unsigned int j; 1328 1329 memset(_m, 0, sizeof(*buf)); 1330 buf->mp = mp; 1331 rte_atomic_store_explicit(&buf->refcnt, 1, rte_memory_order_relaxed); 1332 for (j = 0; j != strd_n; ++j) { 1333 shinfo = &buf->shinfos[j]; 1334 shinfo->free_cb = mlx5_mprq_buf_free_cb; 1335 shinfo->fcb_opaque = buf; 1336 } 1337 } 1338 1339 /** 1340 * Free mempool of Multi-Packet RQ. 1341 * 1342 * @param dev 1343 * Pointer to Ethernet device. 1344 * 1345 * @return 1346 * 0 on success, negative errno value on failure. 1347 */ 1348 int 1349 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1350 { 1351 struct mlx5_priv *priv = dev->data->dev_private; 1352 struct rte_mempool *mp = priv->mprq_mp; 1353 unsigned int i; 1354 1355 if (mp == NULL) 1356 return 0; 1357 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1358 dev->data->port_id, mp->name); 1359 /* 1360 * If a buffer in the pool has been externally attached to a mbuf and it 1361 * is still in use by application, destroying the Rx queue can spoil 1362 * the packet. It is unlikely to happen but if application dynamically 1363 * creates and destroys with holding Rx packets, this can happen. 1364 * 1365 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1366 * RQ isn't provided by application but managed by PMD. 1367 */ 1368 if (!rte_mempool_full(mp)) { 1369 DRV_LOG(ERR, 1370 "port %u mempool for Multi-Packet RQ is still in use", 1371 dev->data->port_id); 1372 rte_errno = EBUSY; 1373 return -rte_errno; 1374 } 1375 rte_mempool_free(mp); 1376 /* Unset mempool for each Rx queue. */ 1377 for (i = 0; i != priv->rxqs_n; ++i) { 1378 struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, i); 1379 1380 if (rxq == NULL) 1381 continue; 1382 rxq->mprq_mp = NULL; 1383 } 1384 priv->mprq_mp = NULL; 1385 return 0; 1386 } 1387 1388 /** 1389 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1390 * mempool. If already allocated, reuse it if there're enough elements. 1391 * Otherwise, resize it. 1392 * 1393 * @param dev 1394 * Pointer to Ethernet device. 1395 * 1396 * @return 1397 * 0 on success, negative errno value on failure. 1398 */ 1399 int 1400 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1401 { 1402 struct mlx5_priv *priv = dev->data->dev_private; 1403 struct rte_mempool *mp = priv->mprq_mp; 1404 char name[RTE_MEMPOOL_NAMESIZE]; 1405 unsigned int desc = 0; 1406 unsigned int buf_len; 1407 unsigned int obj_num; 1408 unsigned int obj_size; 1409 unsigned int log_strd_num = 0; 1410 unsigned int log_strd_sz = 0; 1411 unsigned int i; 1412 unsigned int n_ibv = 0; 1413 int ret; 1414 1415 if (!mlx5_mprq_enabled(dev)) 1416 return 0; 1417 /* Count the total number of descriptors configured. */ 1418 for (i = 0; i != priv->rxqs_n; ++i) { 1419 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1420 struct mlx5_rxq_data *rxq; 1421 1422 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1423 continue; 1424 rxq = &rxq_ctrl->rxq; 1425 n_ibv++; 1426 desc += 1 << rxq->elts_n; 1427 /* Get the max number of strides. */ 1428 if (log_strd_num < rxq->log_strd_num) 1429 log_strd_num = rxq->log_strd_num; 1430 /* Get the max size of a stride. */ 1431 if (log_strd_sz < rxq->log_strd_sz) 1432 log_strd_sz = rxq->log_strd_sz; 1433 } 1434 MLX5_ASSERT(log_strd_num && log_strd_sz); 1435 buf_len = RTE_BIT32(log_strd_num) * RTE_BIT32(log_strd_sz); 1436 obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + 1437 RTE_BIT32(log_strd_num) * 1438 sizeof(struct rte_mbuf_ext_shared_info) + 1439 RTE_PKTMBUF_HEADROOM; 1440 /* 1441 * Received packets can be either memcpy'd or externally referenced. In 1442 * case that the packet is attached to an mbuf as an external buffer, as 1443 * it isn't possible to predict how the buffers will be queued by 1444 * application, there's no option to exactly pre-allocate needed buffers 1445 * in advance but to speculatively prepares enough buffers. 1446 * 1447 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1448 * received packets to buffers provided by application (rxq->mp) until 1449 * this Mempool gets available again. 1450 */ 1451 desc *= 4; 1452 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv; 1453 /* 1454 * rte_mempool_create_empty() has sanity check to refuse large cache 1455 * size compared to the number of elements. 1456 * CALC_CACHE_FLUSHTHRESH() is defined in a C file, so using a 1457 * constant number 2 instead. 1458 */ 1459 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1460 /* Check a mempool is already allocated and if it can be resued. */ 1461 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1462 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1463 dev->data->port_id, mp->name); 1464 /* Reuse. */ 1465 goto exit; 1466 } else if (mp != NULL) { 1467 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1468 dev->data->port_id, mp->name); 1469 /* 1470 * If failed to free, which means it may be still in use, no way 1471 * but to keep using the existing one. On buffer underrun, 1472 * packets will be memcpy'd instead of external buffer 1473 * attachment. 1474 */ 1475 if (mlx5_mprq_free_mp(dev)) { 1476 if (mp->elt_size >= obj_size) 1477 goto exit; 1478 else 1479 return -rte_errno; 1480 } 1481 } 1482 snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); 1483 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1484 0, NULL, NULL, mlx5_mprq_buf_init, 1485 (void *)((uintptr_t)1 << log_strd_num), 1486 dev->device->numa_node, 0); 1487 if (mp == NULL) { 1488 DRV_LOG(ERR, 1489 "port %u failed to allocate a mempool for" 1490 " Multi-Packet RQ, count=%u, size=%u", 1491 dev->data->port_id, obj_num, obj_size); 1492 rte_errno = ENOMEM; 1493 return -rte_errno; 1494 } 1495 ret = mlx5_mr_mempool_register(priv->sh->cdev, mp, false); 1496 if (ret < 0 && rte_errno != EEXIST) { 1497 ret = rte_errno; 1498 DRV_LOG(ERR, "port %u failed to register a mempool for Multi-Packet RQ", 1499 dev->data->port_id); 1500 rte_mempool_free(mp); 1501 rte_errno = ret; 1502 return -rte_errno; 1503 } 1504 priv->mprq_mp = mp; 1505 exit: 1506 /* Set mempool for each Rx queue. */ 1507 for (i = 0; i != priv->rxqs_n; ++i) { 1508 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1509 1510 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1511 continue; 1512 rxq_ctrl->rxq.mprq_mp = mp; 1513 } 1514 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1515 dev->data->port_id); 1516 return 0; 1517 } 1518 1519 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \ 1520 sizeof(struct rte_vlan_hdr) * 2 + \ 1521 sizeof(struct rte_ipv6_hdr))) 1522 #define MAX_TCP_OPTION_SIZE 40u 1523 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \ 1524 sizeof(struct rte_tcp_hdr) + \ 1525 MAX_TCP_OPTION_SIZE)) 1526 1527 /** 1528 * Adjust the maximum LRO massage size. 1529 * 1530 * @param dev 1531 * Pointer to Ethernet device. 1532 * @param idx 1533 * RX queue index. 1534 * @param max_lro_size 1535 * The maximum size for LRO packet. 1536 */ 1537 static void 1538 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx, 1539 uint32_t max_lro_size) 1540 { 1541 struct mlx5_priv *priv = dev->data->dev_private; 1542 1543 if (priv->sh->cdev->config.hca_attr.lro_max_msg_sz_mode == 1544 MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size > 1545 MLX5_MAX_TCP_HDR_OFFSET) 1546 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET; 1547 max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE); 1548 if (priv->max_lro_msg_size) 1549 priv->max_lro_msg_size = 1550 RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size); 1551 else 1552 priv->max_lro_msg_size = max_lro_size; 1553 DRV_LOG(DEBUG, 1554 "port %u Rx Queue %u max LRO message size adjusted to %u bytes", 1555 dev->data->port_id, idx, priv->max_lro_msg_size); 1556 } 1557 1558 /** 1559 * Prepare both size and number of stride for Multi-Packet RQ. 1560 * 1561 * @param dev 1562 * Pointer to Ethernet device. 1563 * @param idx 1564 * RX queue index. 1565 * @param desc 1566 * Number of descriptors to configure in queue. 1567 * @param rx_seg_en 1568 * Indicator if Rx segment enables, if so Multi-Packet RQ doesn't enable. 1569 * @param min_mbuf_size 1570 * Non scatter min mbuf size, max_rx_pktlen plus overhead. 1571 * @param actual_log_stride_num 1572 * Log number of strides to configure for this queue. 1573 * @param actual_log_stride_size 1574 * Log stride size to configure for this queue. 1575 * @param is_extmem 1576 * Is external pinned memory pool used. 1577 * @return 1578 * 0 if Multi-Packet RQ is supported, otherwise -1. 1579 */ 1580 static int 1581 mlx5_mprq_prepare(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1582 bool rx_seg_en, uint32_t min_mbuf_size, 1583 uint32_t *actual_log_stride_num, 1584 uint32_t *actual_log_stride_size, 1585 bool is_extmem) 1586 { 1587 struct mlx5_priv *priv = dev->data->dev_private; 1588 struct mlx5_port_config *config = &priv->config; 1589 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 1590 uint32_t log_min_stride_num = dev_cap->mprq.log_min_stride_num; 1591 uint32_t log_max_stride_num = dev_cap->mprq.log_max_stride_num; 1592 uint32_t log_def_stride_num = 1593 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM, 1594 log_min_stride_num), 1595 log_max_stride_num); 1596 uint32_t log_min_stride_size = dev_cap->mprq.log_min_stride_size; 1597 uint32_t log_max_stride_size = dev_cap->mprq.log_max_stride_size; 1598 uint32_t log_def_stride_size = 1599 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE, 1600 log_min_stride_size), 1601 log_max_stride_size); 1602 uint32_t log_stride_wqe_size; 1603 1604 if (mlx5_check_mprq_support(dev) != 1 || rx_seg_en || is_extmem) 1605 goto unsupport; 1606 /* Checks if chosen number of strides is in supported range. */ 1607 if (config->mprq.log_stride_num > log_max_stride_num || 1608 config->mprq.log_stride_num < log_min_stride_num) { 1609 *actual_log_stride_num = log_def_stride_num; 1610 DRV_LOG(WARNING, 1611 "Port %u Rx queue %u number of strides for Multi-Packet RQ is out of range, setting default value (%u)", 1612 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num)); 1613 } else { 1614 *actual_log_stride_num = config->mprq.log_stride_num; 1615 } 1616 /* Checks if chosen size of stride is in supported range. */ 1617 if (config->mprq.log_stride_size != (uint32_t)MLX5_ARG_UNSET) { 1618 if (config->mprq.log_stride_size > log_max_stride_size || 1619 config->mprq.log_stride_size < log_min_stride_size) { 1620 *actual_log_stride_size = log_def_stride_size; 1621 DRV_LOG(WARNING, 1622 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is out of range, setting default value (%u)", 1623 dev->data->port_id, idx, 1624 RTE_BIT32(log_def_stride_size)); 1625 } else { 1626 *actual_log_stride_size = config->mprq.log_stride_size; 1627 } 1628 } else { 1629 /* Make the stride fit the mbuf size by default. */ 1630 if (min_mbuf_size <= RTE_BIT32(log_max_stride_size)) { 1631 DRV_LOG(WARNING, 1632 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to match the mbuf size (%u)", 1633 dev->data->port_id, idx, min_mbuf_size); 1634 *actual_log_stride_size = log2above(min_mbuf_size); 1635 } else { 1636 goto unsupport; 1637 } 1638 } 1639 /* Make sure the stride size is greater than the headroom. */ 1640 if (RTE_BIT32(*actual_log_stride_size) < RTE_PKTMBUF_HEADROOM) { 1641 if (RTE_BIT32(log_max_stride_size) > RTE_PKTMBUF_HEADROOM) { 1642 DRV_LOG(WARNING, 1643 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to accommodate the headroom (%u)", 1644 dev->data->port_id, idx, RTE_PKTMBUF_HEADROOM); 1645 *actual_log_stride_size = log2above(RTE_PKTMBUF_HEADROOM); 1646 } else { 1647 goto unsupport; 1648 } 1649 } 1650 log_stride_wqe_size = *actual_log_stride_num + *actual_log_stride_size; 1651 /* Check if WQE buffer size is supported by hardware. */ 1652 if (log_stride_wqe_size < dev_cap->mprq.log_min_stride_wqe_size) { 1653 *actual_log_stride_num = log_def_stride_num; 1654 *actual_log_stride_size = log_def_stride_size; 1655 DRV_LOG(WARNING, 1656 "Port %u Rx queue %u size of WQE buffer for Multi-Packet RQ is too small, setting default values (stride_num_n=%u, stride_size_n=%u)", 1657 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num), 1658 RTE_BIT32(log_def_stride_size)); 1659 log_stride_wqe_size = log_def_stride_num + log_def_stride_size; 1660 } 1661 MLX5_ASSERT(log_stride_wqe_size >= 1662 dev_cap->mprq.log_min_stride_wqe_size); 1663 if (desc <= RTE_BIT32(*actual_log_stride_num)) 1664 goto unsupport; 1665 if (min_mbuf_size > RTE_BIT32(log_stride_wqe_size)) { 1666 DRV_LOG(WARNING, "Port %u Rx queue %u " 1667 "Multi-Packet RQ is unsupported, WQE buffer size (%u) " 1668 "is smaller than min mbuf size (%u)", 1669 dev->data->port_id, idx, RTE_BIT32(log_stride_wqe_size), 1670 min_mbuf_size); 1671 goto unsupport; 1672 } 1673 DRV_LOG(DEBUG, "Port %u Rx queue %u " 1674 "Multi-Packet RQ is enabled strd_num_n = %u, strd_sz_n = %u", 1675 dev->data->port_id, idx, RTE_BIT32(*actual_log_stride_num), 1676 RTE_BIT32(*actual_log_stride_size)); 1677 return 0; 1678 unsupport: 1679 if (config->mprq.enabled) 1680 DRV_LOG(WARNING, 1681 "Port %u MPRQ is requested but cannot be enabled\n" 1682 " (requested: pkt_sz = %u, desc_num = %u," 1683 " rxq_num = %u, stride_sz = %u, stride_num = %u\n" 1684 " supported: min_rxqs_num = %u, min_buf_wqe_sz = %u" 1685 " min_stride_sz = %u, max_stride_sz = %u).\n" 1686 "Rx segment is %senabled. External mempool is %sused.", 1687 dev->data->port_id, min_mbuf_size, desc, priv->rxqs_n, 1688 config->mprq.log_stride_size == (uint32_t)MLX5_ARG_UNSET ? 1689 RTE_BIT32(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE) : 1690 RTE_BIT32(config->mprq.log_stride_size), 1691 RTE_BIT32(config->mprq.log_stride_num), 1692 config->mprq.min_rxqs_num, 1693 RTE_BIT32(dev_cap->mprq.log_min_stride_wqe_size), 1694 RTE_BIT32(dev_cap->mprq.log_min_stride_size), 1695 RTE_BIT32(dev_cap->mprq.log_max_stride_size), 1696 rx_seg_en ? "" : "not ", is_extmem ? "" : "not "); 1697 return -1; 1698 } 1699 1700 /** 1701 * Create a DPDK Rx queue. 1702 * 1703 * @param dev 1704 * Pointer to Ethernet device. 1705 * @param idx 1706 * RX queue index. 1707 * @param desc 1708 * Number of descriptors to configure in queue. 1709 * @param socket 1710 * NUMA socket on which memory must be allocated. 1711 * 1712 * @return 1713 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1714 */ 1715 struct mlx5_rxq_ctrl * 1716 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1717 unsigned int socket, const struct rte_eth_rxconf *conf, 1718 const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg, 1719 bool is_extmem) 1720 { 1721 struct mlx5_priv *priv = dev->data->dev_private; 1722 struct mlx5_rxq_ctrl *tmpl; 1723 unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp); 1724 struct mlx5_port_config *config = &priv->config; 1725 uint64_t offloads = conf->offloads | 1726 dev->data->dev_conf.rxmode.offloads; 1727 unsigned int lro_on_queue = !!(offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO); 1728 unsigned int max_rx_pktlen = lro_on_queue ? 1729 dev->data->dev_conf.rxmode.max_lro_pkt_size : 1730 dev->data->mtu + (unsigned int)RTE_ETHER_HDR_LEN + 1731 RTE_ETHER_CRC_LEN; 1732 unsigned int non_scatter_min_mbuf_size = max_rx_pktlen + 1733 RTE_PKTMBUF_HEADROOM; 1734 unsigned int max_lro_size = 0; 1735 unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM; 1736 uint32_t mprq_log_actual_stride_num = 0; 1737 uint32_t mprq_log_actual_stride_size = 0; 1738 bool rx_seg_en = n_seg != 1 || rx_seg[0].offset || rx_seg[0].length; 1739 const int mprq_en = !mlx5_mprq_prepare(dev, idx, desc, rx_seg_en, 1740 non_scatter_min_mbuf_size, 1741 &mprq_log_actual_stride_num, 1742 &mprq_log_actual_stride_size, 1743 is_extmem); 1744 /* 1745 * Always allocate extra slots, even if eventually 1746 * the vector Rx will not be used. 1747 */ 1748 uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1749 size_t alloc_size = sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *); 1750 const struct rte_eth_rxseg_split *qs_seg = rx_seg; 1751 unsigned int tail_len; 1752 1753 if (mprq_en) { 1754 /* Trim the number of descs needed. */ 1755 desc >>= mprq_log_actual_stride_num; 1756 alloc_size += desc * sizeof(struct mlx5_mprq_buf *); 1757 } 1758 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket); 1759 if (!tmpl) { 1760 rte_errno = ENOMEM; 1761 return NULL; 1762 } 1763 LIST_INIT(&tmpl->owners); 1764 MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG); 1765 /* 1766 * Save the original segment configuration in the shared queue 1767 * descriptor for the later check on the sibling queue creation. 1768 */ 1769 tmpl->rxseg_n = n_seg; 1770 rte_memcpy(tmpl->rxseg, qs_seg, 1771 sizeof(struct rte_eth_rxseg_split) * n_seg); 1772 /* 1773 * Build the array of actual buffer offsets and lengths. 1774 * Pad with the buffers from the last memory pool if 1775 * needed to handle max size packets, replace zero length 1776 * with the buffer length from the pool. 1777 */ 1778 tail_len = max_rx_pktlen; 1779 do { 1780 struct mlx5_eth_rxseg *hw_seg = 1781 &tmpl->rxq.rxseg[tmpl->rxq.rxseg_n]; 1782 uint32_t buf_len, offset, seg_len; 1783 1784 /* 1785 * For the buffers beyond descriptions offset is zero, 1786 * the first buffer contains head room. 1787 */ 1788 buf_len = rte_pktmbuf_data_room_size(qs_seg->mp); 1789 offset = (tmpl->rxq.rxseg_n >= n_seg ? 0 : qs_seg->offset) + 1790 (tmpl->rxq.rxseg_n ? 0 : RTE_PKTMBUF_HEADROOM); 1791 /* 1792 * For the buffers beyond descriptions the length is 1793 * pool buffer length, zero lengths are replaced with 1794 * pool buffer length either. 1795 */ 1796 seg_len = tmpl->rxq.rxseg_n >= n_seg ? buf_len : 1797 qs_seg->length ? 1798 qs_seg->length : 1799 (buf_len - offset); 1800 /* Check is done in long int, now overflows. */ 1801 if (buf_len < seg_len + offset) { 1802 DRV_LOG(ERR, "port %u Rx queue %u: Split offset/length " 1803 "%u/%u can't be satisfied", 1804 dev->data->port_id, idx, 1805 qs_seg->length, qs_seg->offset); 1806 rte_errno = EINVAL; 1807 goto error; 1808 } 1809 if (seg_len > tail_len) 1810 seg_len = buf_len - offset; 1811 if (++tmpl->rxq.rxseg_n > MLX5_MAX_RXQ_NSEG) { 1812 DRV_LOG(ERR, 1813 "port %u too many SGEs (%u) needed to handle" 1814 " requested maximum packet size %u, the maximum" 1815 " supported are %u", dev->data->port_id, 1816 tmpl->rxq.rxseg_n, max_rx_pktlen, 1817 MLX5_MAX_RXQ_NSEG); 1818 rte_errno = ENOTSUP; 1819 goto error; 1820 } 1821 /* Build the actual scattering element in the queue object. */ 1822 hw_seg->mp = qs_seg->mp; 1823 MLX5_ASSERT(offset <= UINT16_MAX); 1824 MLX5_ASSERT(seg_len <= UINT16_MAX); 1825 hw_seg->offset = (uint16_t)offset; 1826 hw_seg->length = (uint16_t)seg_len; 1827 /* 1828 * Advance the segment descriptor, the padding is the based 1829 * on the attributes of the last descriptor. 1830 */ 1831 if (tmpl->rxq.rxseg_n < n_seg) 1832 qs_seg++; 1833 tail_len -= RTE_MIN(tail_len, seg_len); 1834 } while (tail_len || !rte_is_power_of_2(tmpl->rxq.rxseg_n)); 1835 MLX5_ASSERT(tmpl->rxq.rxseg_n && 1836 tmpl->rxq.rxseg_n <= MLX5_MAX_RXQ_NSEG); 1837 if (tmpl->rxq.rxseg_n > 1 && !(offloads & RTE_ETH_RX_OFFLOAD_SCATTER)) { 1838 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not" 1839 " configured and no enough mbuf space(%u) to contain " 1840 "the maximum RX packet length(%u) with head-room(%u)", 1841 dev->data->port_id, idx, mb_len, max_rx_pktlen, 1842 RTE_PKTMBUF_HEADROOM); 1843 rte_errno = ENOSPC; 1844 goto error; 1845 } 1846 tmpl->is_hairpin = false; 1847 if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl, 1848 &priv->sh->cdev->mr_scache.dev_gen, socket)) { 1849 /* rte_errno is already set. */ 1850 goto error; 1851 } 1852 tmpl->socket = socket; 1853 if (dev->data->dev_conf.intr_conf.rxq) 1854 tmpl->irq = 1; 1855 if (mprq_en) { 1856 /* TODO: Rx scatter isn't supported yet. */ 1857 tmpl->rxq.sges_n = 0; 1858 tmpl->rxq.log_strd_num = mprq_log_actual_stride_num; 1859 tmpl->rxq.log_strd_sz = mprq_log_actual_stride_size; 1860 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1861 tmpl->rxq.strd_scatter_en = 1862 !!(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 1863 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, 1864 config->mprq.max_memcpy_len); 1865 max_lro_size = RTE_MIN(max_rx_pktlen, 1866 RTE_BIT32(tmpl->rxq.log_strd_num) * 1867 RTE_BIT32(tmpl->rxq.log_strd_sz)); 1868 } else if (tmpl->rxq.rxseg_n == 1) { 1869 MLX5_ASSERT(max_rx_pktlen <= first_mb_free_size); 1870 tmpl->rxq.sges_n = 0; 1871 max_lro_size = max_rx_pktlen; 1872 } else if (offloads & RTE_ETH_RX_OFFLOAD_SCATTER) { 1873 unsigned int sges_n; 1874 1875 if (lro_on_queue && first_mb_free_size < 1876 MLX5_MAX_LRO_HEADER_FIX) { 1877 DRV_LOG(ERR, "Not enough space in the first segment(%u)" 1878 " to include the max header size(%u) for LRO", 1879 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX); 1880 rte_errno = ENOTSUP; 1881 goto error; 1882 } 1883 /* 1884 * Determine the number of SGEs needed for a full packet 1885 * and round it to the next power of two. 1886 */ 1887 sges_n = log2above(tmpl->rxq.rxseg_n); 1888 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) { 1889 DRV_LOG(ERR, 1890 "port %u too many SGEs (%u) needed to handle" 1891 " requested maximum packet size %u, the maximum" 1892 " supported are %u", dev->data->port_id, 1893 1 << sges_n, max_rx_pktlen, 1894 1u << MLX5_MAX_LOG_RQ_SEGS); 1895 rte_errno = ENOTSUP; 1896 goto error; 1897 } 1898 tmpl->rxq.sges_n = sges_n; 1899 max_lro_size = max_rx_pktlen; 1900 } 1901 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1902 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1903 if (desc % (1 << tmpl->rxq.sges_n)) { 1904 DRV_LOG(ERR, 1905 "port %u number of Rx queue descriptors (%u) is not a" 1906 " multiple of SGEs per packet (%u)", 1907 dev->data->port_id, 1908 desc, 1909 1 << tmpl->rxq.sges_n); 1910 rte_errno = EINVAL; 1911 goto error; 1912 } 1913 mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size); 1914 /* Toggle RX checksum offload if hardware supports it. */ 1915 tmpl->rxq.csum = !!(offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM); 1916 /* Configure Rx timestamp. */ 1917 tmpl->rxq.hw_timestamp = !!(offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP); 1918 tmpl->rxq.timestamp_rx_flag = 0; 1919 if (tmpl->rxq.hw_timestamp && rte_mbuf_dyn_rx_timestamp_register( 1920 &tmpl->rxq.timestamp_offset, 1921 &tmpl->rxq.timestamp_rx_flag) != 0) { 1922 DRV_LOG(ERR, "Cannot register Rx timestamp field/flag"); 1923 goto error; 1924 } 1925 /* Configure VLAN stripping. */ 1926 tmpl->rxq.vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 1927 /* By default, FCS (CRC) is stripped by hardware. */ 1928 tmpl->rxq.crc_present = 0; 1929 tmpl->rxq.lro = lro_on_queue; 1930 if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) { 1931 if (priv->sh->config.hw_fcs_strip) { 1932 /* 1933 * RQs used for LRO-enabled TIRs should not be 1934 * configured to scatter the FCS. 1935 */ 1936 if (lro_on_queue) 1937 DRV_LOG(WARNING, 1938 "port %u CRC stripping has been " 1939 "disabled but will still be performed " 1940 "by hardware, because LRO is enabled", 1941 dev->data->port_id); 1942 else 1943 tmpl->rxq.crc_present = 1; 1944 } else { 1945 DRV_LOG(WARNING, 1946 "port %u CRC stripping has been disabled but will" 1947 " still be performed by hardware, make sure MLNX_OFED" 1948 " and firmware are up to date", 1949 dev->data->port_id); 1950 } 1951 } 1952 DRV_LOG(DEBUG, 1953 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1954 " incoming frames to hide it", 1955 dev->data->port_id, 1956 tmpl->rxq.crc_present ? "disabled" : "enabled", 1957 tmpl->rxq.crc_present << 2); 1958 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1959 (!!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS)); 1960 /* Save port ID. */ 1961 tmpl->rxq.port_id = dev->data->port_id; 1962 tmpl->sh = priv->sh; 1963 tmpl->rxq.mp = rx_seg[0].mp; 1964 tmpl->rxq.elts_n = log2above(desc); 1965 tmpl->rxq.rq_repl_thresh = MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n); 1966 tmpl->rxq.elts = (struct rte_mbuf *(*)[])(tmpl + 1); 1967 tmpl->rxq.mprq_bufs = (struct mlx5_mprq_buf *(*)[])(*tmpl->rxq.elts + desc_n); 1968 tmpl->rxq.idx = idx; 1969 if (conf->share_group > 0) { 1970 tmpl->rxq.shared = 1; 1971 tmpl->share_group = conf->share_group; 1972 tmpl->share_qid = conf->share_qid; 1973 } 1974 LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry); 1975 rte_atomic_store_explicit(&tmpl->ctrl_ref, 1, rte_memory_order_relaxed); 1976 return tmpl; 1977 error: 1978 mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh); 1979 mlx5_free(tmpl); 1980 return NULL; 1981 } 1982 1983 /** 1984 * Create a DPDK Rx hairpin queue. 1985 * 1986 * @param dev 1987 * Pointer to Ethernet device. 1988 * @param rxq 1989 * RX queue. 1990 * @param desc 1991 * Number of descriptors to configure in queue. 1992 * @param hairpin_conf 1993 * The hairpin binding configuration. 1994 * 1995 * @return 1996 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1997 */ 1998 struct mlx5_rxq_ctrl * 1999 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq, 2000 uint16_t desc, 2001 const struct rte_eth_hairpin_conf *hairpin_conf) 2002 { 2003 uint16_t idx = rxq->idx; 2004 struct mlx5_priv *priv = dev->data->dev_private; 2005 struct mlx5_rxq_ctrl *tmpl; 2006 2007 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 2008 SOCKET_ID_ANY); 2009 if (!tmpl) { 2010 rte_errno = ENOMEM; 2011 return NULL; 2012 } 2013 LIST_INIT(&tmpl->owners); 2014 rxq->ctrl = tmpl; 2015 LIST_INSERT_HEAD(&tmpl->owners, rxq, owner_entry); 2016 tmpl->is_hairpin = true; 2017 tmpl->socket = SOCKET_ID_ANY; 2018 tmpl->rxq.rss_hash = 0; 2019 tmpl->rxq.port_id = dev->data->port_id; 2020 tmpl->sh = priv->sh; 2021 tmpl->rxq.mp = NULL; 2022 tmpl->rxq.elts_n = log2above(desc); 2023 tmpl->rxq.elts = NULL; 2024 tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 }; 2025 tmpl->rxq.idx = idx; 2026 rxq->hairpin_conf = *hairpin_conf; 2027 mlx5_rxq_ref(dev, idx); 2028 LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry); 2029 rte_atomic_store_explicit(&tmpl->ctrl_ref, 1, rte_memory_order_relaxed); 2030 return tmpl; 2031 } 2032 2033 /** 2034 * Increase Rx queue reference count. 2035 * 2036 * @param dev 2037 * Pointer to Ethernet device. 2038 * @param idx 2039 * RX queue index. 2040 * 2041 * @return 2042 * A pointer to the queue if it exists, NULL otherwise. 2043 */ 2044 struct mlx5_rxq_priv * 2045 mlx5_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2046 { 2047 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2048 2049 if (rxq != NULL) 2050 rte_atomic_fetch_add_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed); 2051 return rxq; 2052 } 2053 2054 /** 2055 * Dereference a Rx queue. 2056 * 2057 * @param dev 2058 * Pointer to Ethernet device. 2059 * @param idx 2060 * RX queue index. 2061 * 2062 * @return 2063 * Updated reference count. 2064 */ 2065 uint32_t 2066 mlx5_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2067 { 2068 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2069 2070 if (rxq == NULL) 2071 return 0; 2072 return rte_atomic_fetch_sub_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed) - 1; 2073 } 2074 2075 /** 2076 * Get a Rx queue. 2077 * 2078 * @param dev 2079 * Pointer to Ethernet device. 2080 * @param idx 2081 * RX queue index. 2082 * 2083 * @return 2084 * A pointer to the queue if it exists, NULL otherwise. 2085 */ 2086 struct mlx5_rxq_priv * 2087 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2088 { 2089 struct mlx5_priv *priv = dev->data->dev_private; 2090 2091 if (idx >= priv->rxqs_n) 2092 return NULL; 2093 MLX5_ASSERT(priv->rxq_privs != NULL); 2094 return (*priv->rxq_privs)[idx]; 2095 } 2096 2097 /** 2098 * Get Rx queue shareable control. 2099 * 2100 * @param dev 2101 * Pointer to Ethernet device. 2102 * @param idx 2103 * RX queue index. 2104 * 2105 * @return 2106 * A pointer to the queue control if it exists, NULL otherwise. 2107 */ 2108 struct mlx5_rxq_ctrl * 2109 mlx5_rxq_ctrl_get(struct rte_eth_dev *dev, uint16_t idx) 2110 { 2111 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2112 2113 return rxq == NULL ? NULL : rxq->ctrl; 2114 } 2115 2116 /** 2117 * Get Rx queue shareable data. 2118 * 2119 * @param dev 2120 * Pointer to Ethernet device. 2121 * @param idx 2122 * RX queue index. 2123 * 2124 * @return 2125 * A pointer to the queue data if it exists, NULL otherwise. 2126 */ 2127 struct mlx5_rxq_data * 2128 mlx5_rxq_data_get(struct rte_eth_dev *dev, uint16_t idx) 2129 { 2130 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2131 2132 return rxq == NULL ? NULL : &rxq->ctrl->rxq; 2133 } 2134 2135 /** 2136 * Increase an external Rx queue reference count. 2137 * 2138 * @param dev 2139 * Pointer to Ethernet device. 2140 * @param idx 2141 * External RX queue index. 2142 * 2143 * @return 2144 * A pointer to the queue if it exists, NULL otherwise. 2145 */ 2146 struct mlx5_external_q * 2147 mlx5_ext_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2148 { 2149 struct mlx5_external_q *rxq = mlx5_ext_rxq_get(dev, idx); 2150 2151 rte_atomic_fetch_add_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed); 2152 return rxq; 2153 } 2154 2155 /** 2156 * Decrease an external Rx queue reference count. 2157 * 2158 * @param dev 2159 * Pointer to Ethernet device. 2160 * @param idx 2161 * External RX queue index. 2162 * 2163 * @return 2164 * Updated reference count. 2165 */ 2166 uint32_t 2167 mlx5_ext_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2168 { 2169 struct mlx5_external_q *rxq = mlx5_ext_rxq_get(dev, idx); 2170 2171 return rte_atomic_fetch_sub_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed) - 1; 2172 } 2173 2174 /** 2175 * Get an external Rx queue. 2176 * 2177 * @param dev 2178 * Pointer to Ethernet device. 2179 * @param idx 2180 * External Rx queue index. 2181 * 2182 * @return 2183 * A pointer to the queue if it exists, NULL otherwise. 2184 */ 2185 struct mlx5_external_q * 2186 mlx5_ext_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2187 { 2188 struct mlx5_priv *priv = dev->data->dev_private; 2189 2190 MLX5_ASSERT(mlx5_is_external_rxq(dev, idx)); 2191 return &priv->ext_rxqs[idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 2192 } 2193 2194 /** 2195 * Dereference a list of Rx queues. 2196 * 2197 * @param dev 2198 * Pointer to Ethernet device. 2199 * @param queues 2200 * List of Rx queues to deref. 2201 * @param queues_n 2202 * Number of queues in the array. 2203 */ 2204 static void 2205 mlx5_rxqs_deref(struct rte_eth_dev *dev, uint16_t *queues, 2206 const uint32_t queues_n) 2207 { 2208 uint32_t i; 2209 2210 for (i = 0; i < queues_n; i++) { 2211 if (mlx5_is_external_rxq(dev, queues[i])) 2212 claim_nonzero(mlx5_ext_rxq_deref(dev, queues[i])); 2213 else 2214 claim_nonzero(mlx5_rxq_deref(dev, queues[i])); 2215 } 2216 } 2217 2218 /** 2219 * Increase reference count for list of Rx queues. 2220 * 2221 * @param dev 2222 * Pointer to Ethernet device. 2223 * @param queues 2224 * List of Rx queues to ref. 2225 * @param queues_n 2226 * Number of queues in the array. 2227 * 2228 * @return 2229 * 0 on success, a negative errno value otherwise and rte_errno is set. 2230 */ 2231 static int 2232 mlx5_rxqs_ref(struct rte_eth_dev *dev, uint16_t *queues, 2233 const uint32_t queues_n) 2234 { 2235 uint32_t i; 2236 2237 for (i = 0; i != queues_n; ++i) { 2238 if (mlx5_is_external_rxq(dev, queues[i])) { 2239 if (mlx5_ext_rxq_ref(dev, queues[i]) == NULL) 2240 goto error; 2241 } else { 2242 if (mlx5_rxq_ref(dev, queues[i]) == NULL) 2243 goto error; 2244 } 2245 } 2246 return 0; 2247 error: 2248 mlx5_rxqs_deref(dev, queues, i); 2249 rte_errno = EINVAL; 2250 return -rte_errno; 2251 } 2252 2253 /** 2254 * Release a Rx queue. 2255 * 2256 * @param dev 2257 * Pointer to Ethernet device. 2258 * @param idx 2259 * RX queue index. 2260 * 2261 * @return 2262 * 1 while a reference on it exists, 0 when freed. 2263 */ 2264 int 2265 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 2266 { 2267 struct mlx5_priv *priv = dev->data->dev_private; 2268 struct mlx5_rxq_priv *rxq; 2269 struct mlx5_rxq_ctrl *rxq_ctrl; 2270 uint32_t refcnt; 2271 int32_t ctrl_ref; 2272 2273 if (priv->rxq_privs == NULL) 2274 return 0; 2275 rxq = mlx5_rxq_get(dev, idx); 2276 if (rxq == NULL || rxq->refcnt == 0) 2277 return 0; 2278 rxq_ctrl = rxq->ctrl; 2279 refcnt = mlx5_rxq_deref(dev, idx); 2280 if (refcnt > 1) { 2281 return 1; 2282 } else if (refcnt == 1) { /* RxQ stopped. */ 2283 priv->obj_ops.rxq_obj_release(rxq); 2284 if (!rxq_ctrl->started && rxq_ctrl->obj != NULL) { 2285 LIST_REMOVE(rxq_ctrl->obj, next); 2286 mlx5_free(rxq_ctrl->obj); 2287 rxq_ctrl->obj = NULL; 2288 } 2289 if (!rxq_ctrl->is_hairpin) { 2290 if (!rxq_ctrl->started) 2291 rxq_free_elts(rxq_ctrl); 2292 dev->data->rx_queue_state[idx] = 2293 RTE_ETH_QUEUE_STATE_STOPPED; 2294 } 2295 } else { /* Refcnt zero, closing device. */ 2296 LIST_REMOVE(rxq, owner_entry); 2297 ctrl_ref = rte_atomic_fetch_sub_explicit(&rxq_ctrl->ctrl_ref, 1, 2298 rte_memory_order_relaxed) - 1; 2299 if (ctrl_ref == 1 && LIST_EMPTY(&rxq_ctrl->owners)) { 2300 if (!rxq_ctrl->is_hairpin) 2301 mlx5_mr_btree_free 2302 (&rxq_ctrl->rxq.mr_ctrl.cache_bh); 2303 LIST_REMOVE(rxq_ctrl, share_entry); 2304 mlx5_free(rxq_ctrl); 2305 } 2306 dev->data->rx_queues[idx] = NULL; 2307 mlx5_free(rxq); 2308 (*priv->rxq_privs)[idx] = NULL; 2309 } 2310 return 0; 2311 } 2312 2313 /** 2314 * Verify the Rx Queue list is empty 2315 * 2316 * @param dev 2317 * Pointer to Ethernet device. 2318 * 2319 * @return 2320 * The number of object not released. 2321 */ 2322 int 2323 mlx5_rxq_verify(struct rte_eth_dev *dev) 2324 { 2325 struct mlx5_priv *priv = dev->data->dev_private; 2326 struct mlx5_rxq_ctrl *rxq_ctrl; 2327 int ret = 0; 2328 2329 LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) { 2330 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 2331 dev->data->port_id, rxq_ctrl->rxq.idx); 2332 ++ret; 2333 } 2334 return ret; 2335 } 2336 2337 /** 2338 * Verify the external Rx Queue list is empty. 2339 * 2340 * @param dev 2341 * Pointer to Ethernet device. 2342 * 2343 * @return 2344 * The number of object not released. 2345 */ 2346 int 2347 mlx5_ext_rxq_verify(struct rte_eth_dev *dev) 2348 { 2349 struct mlx5_priv *priv = dev->data->dev_private; 2350 struct mlx5_external_q *rxq; 2351 uint32_t i; 2352 int ret = 0; 2353 2354 if (priv->ext_rxqs == NULL) 2355 return 0; 2356 2357 for (i = RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { 2358 rxq = mlx5_ext_rxq_get(dev, i); 2359 if (rxq->refcnt < 2) 2360 continue; 2361 DRV_LOG(DEBUG, "Port %u external RxQ %u still referenced.", 2362 dev->data->port_id, i); 2363 ++ret; 2364 } 2365 return ret; 2366 } 2367 2368 /** 2369 * Check whether RxQ type is Hairpin. 2370 * 2371 * @param dev 2372 * Pointer to Ethernet device. 2373 * @param idx 2374 * Rx queue index. 2375 * 2376 * @return 2377 * True if Rx queue type is Hairpin, otherwise False. 2378 */ 2379 bool 2380 mlx5_rxq_is_hairpin(struct rte_eth_dev *dev, uint16_t idx) 2381 { 2382 struct mlx5_rxq_ctrl *rxq_ctrl; 2383 2384 if (mlx5_is_external_rxq(dev, idx)) 2385 return false; 2386 rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx); 2387 return (rxq_ctrl != NULL && rxq_ctrl->is_hairpin); 2388 } 2389 2390 /* 2391 * Get a Rx hairpin queue configuration. 2392 * 2393 * @param dev 2394 * Pointer to Ethernet device. 2395 * @param idx 2396 * Rx queue index. 2397 * 2398 * @return 2399 * Pointer to the configuration if a hairpin RX queue, otherwise NULL. 2400 */ 2401 const struct rte_eth_hairpin_conf * 2402 mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx) 2403 { 2404 if (mlx5_rxq_is_hairpin(dev, idx)) { 2405 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2406 2407 return rxq != NULL ? &rxq->hairpin_conf : NULL; 2408 } 2409 return NULL; 2410 } 2411 2412 /** 2413 * Match queues listed in arguments to queues contained in indirection table 2414 * object. 2415 * 2416 * @param ind_tbl 2417 * Pointer to indirection table to match. 2418 * @param queues 2419 * Queues to match to queues in indirection table. 2420 * @param queues_n 2421 * Number of queues in the array. 2422 * 2423 * @return 2424 * 1 if all queues in indirection table match 0 otherwise. 2425 */ 2426 static int 2427 mlx5_ind_table_obj_match_queues(const struct mlx5_ind_table_obj *ind_tbl, 2428 const uint16_t *queues, uint32_t queues_n) 2429 { 2430 return (ind_tbl->queues_n == queues_n) && 2431 (!memcmp(ind_tbl->queues, queues, 2432 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))); 2433 } 2434 2435 /** 2436 * Get an indirection table. 2437 * 2438 * @param dev 2439 * Pointer to Ethernet device. 2440 * @param queues 2441 * Queues entering in the indirection table. 2442 * @param queues_n 2443 * Number of queues in the array. 2444 * 2445 * @return 2446 * An indirection table if found. 2447 */ 2448 struct mlx5_ind_table_obj * 2449 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues, 2450 uint32_t queues_n) 2451 { 2452 struct mlx5_priv *priv = dev->data->dev_private; 2453 struct mlx5_ind_table_obj *ind_tbl; 2454 2455 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2456 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2457 if ((ind_tbl->queues_n == queues_n) && 2458 (memcmp(ind_tbl->queues, queues, 2459 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 2460 == 0)) { 2461 rte_atomic_fetch_add_explicit(&ind_tbl->refcnt, 1, 2462 rte_memory_order_relaxed); 2463 break; 2464 } 2465 } 2466 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2467 return ind_tbl; 2468 } 2469 2470 /** 2471 * Release an indirection table. 2472 * 2473 * @param dev 2474 * Pointer to Ethernet device. 2475 * @param ind_table 2476 * Indirection table to release. 2477 * @param deref_rxqs 2478 * If true, then dereference RX queues related to indirection table. 2479 * Otherwise, no additional action will be taken. 2480 * 2481 * @return 2482 * 1 while a reference on it exists, 0 when freed. 2483 */ 2484 int 2485 mlx5_ind_table_obj_release(struct rte_eth_dev *dev, 2486 struct mlx5_ind_table_obj *ind_tbl, 2487 bool deref_rxqs) 2488 { 2489 struct mlx5_priv *priv = dev->data->dev_private; 2490 unsigned int ret; 2491 2492 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2493 ret = rte_atomic_fetch_sub_explicit(&ind_tbl->refcnt, 1, rte_memory_order_relaxed) - 1; 2494 if (!ret) 2495 LIST_REMOVE(ind_tbl, next); 2496 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2497 if (ret) 2498 return 1; 2499 priv->obj_ops.ind_table_destroy(ind_tbl); 2500 if (deref_rxqs) 2501 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2502 mlx5_free(ind_tbl); 2503 return 0; 2504 } 2505 2506 /** 2507 * Verify the Rx Queue list is empty 2508 * 2509 * @param dev 2510 * Pointer to Ethernet device. 2511 * 2512 * @return 2513 * The number of object not released. 2514 */ 2515 int 2516 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev) 2517 { 2518 struct mlx5_priv *priv = dev->data->dev_private; 2519 struct mlx5_ind_table_obj *ind_tbl; 2520 int ret = 0; 2521 2522 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2523 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2524 DRV_LOG(DEBUG, 2525 "port %u indirection table obj %p still referenced", 2526 dev->data->port_id, (void *)ind_tbl); 2527 ++ret; 2528 } 2529 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2530 return ret; 2531 } 2532 2533 /** 2534 * Setup an indirection table structure fields. 2535 * 2536 * @param dev 2537 * Pointer to Ethernet device. 2538 * @param ind_table 2539 * Indirection table to modify. 2540 * @param ref_qs 2541 * Whether to increment RxQ reference counters. 2542 * 2543 * @return 2544 * 0 on success, a negative errno value otherwise and rte_errno is set. 2545 */ 2546 int 2547 mlx5_ind_table_obj_setup(struct rte_eth_dev *dev, 2548 struct mlx5_ind_table_obj *ind_tbl, 2549 bool ref_qs) 2550 { 2551 struct mlx5_priv *priv = dev->data->dev_private; 2552 uint32_t queues_n = ind_tbl->queues_n; 2553 int ret; 2554 const unsigned int n = rte_is_power_of_2(queues_n) ? 2555 log2above(queues_n) : 2556 log2above(priv->sh->dev_cap.ind_table_max_size); 2557 2558 if (ref_qs && mlx5_rxqs_ref(dev, ind_tbl->queues, queues_n) < 0) { 2559 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2560 dev->data->port_id); 2561 return -rte_errno; 2562 } 2563 ret = priv->obj_ops.ind_table_new(dev, n, ind_tbl); 2564 if (ret) { 2565 DRV_LOG(DEBUG, "Port %u cannot create a new indirection table.", 2566 dev->data->port_id); 2567 if (ref_qs) { 2568 int err = rte_errno; 2569 2570 mlx5_rxqs_deref(dev, ind_tbl->queues, queues_n); 2571 rte_errno = err; 2572 } 2573 return ret; 2574 } 2575 rte_atomic_fetch_add_explicit(&ind_tbl->refcnt, 1, rte_memory_order_relaxed); 2576 return 0; 2577 } 2578 2579 /** 2580 * Create an indirection table. 2581 * 2582 * @param dev 2583 * Pointer to Ethernet device. 2584 * @param queues 2585 * Queues entering in the indirection table. 2586 * @param queues_n 2587 * Number of queues in the array. 2588 * @param standalone 2589 * Indirection table for Standalone queue. 2590 * @param ref_qs 2591 * Whether to increment RxQ reference counters. 2592 * 2593 * @return 2594 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 2595 */ 2596 struct mlx5_ind_table_obj * 2597 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues, 2598 uint32_t queues_n, bool standalone, bool ref_qs) 2599 { 2600 struct mlx5_priv *priv = dev->data->dev_private; 2601 struct mlx5_ind_table_obj *ind_tbl; 2602 int ret; 2603 uint32_t max_queues_n = priv->rxqs_n > queues_n ? priv->rxqs_n : queues_n; 2604 2605 /* 2606 * Allocate maximum queues for shared action as queue number 2607 * maybe modified later. 2608 */ 2609 ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) + 2610 (standalone ? max_queues_n : queues_n) * 2611 sizeof(uint16_t), 0, SOCKET_ID_ANY); 2612 if (!ind_tbl) { 2613 rte_errno = ENOMEM; 2614 return NULL; 2615 } 2616 ind_tbl->queues_n = queues_n; 2617 ind_tbl->queues = (uint16_t *)(ind_tbl + 1); 2618 memcpy(ind_tbl->queues, queues, queues_n * sizeof(*queues)); 2619 ret = mlx5_ind_table_obj_setup(dev, ind_tbl, ref_qs); 2620 if (ret < 0) { 2621 mlx5_free(ind_tbl); 2622 return NULL; 2623 } 2624 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2625 if (!standalone) 2626 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 2627 else 2628 LIST_INSERT_HEAD(&priv->standalone_ind_tbls, ind_tbl, next); 2629 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2630 2631 return ind_tbl; 2632 } 2633 2634 static int 2635 mlx5_ind_table_obj_check_standalone(struct rte_eth_dev *dev __rte_unused, 2636 struct mlx5_ind_table_obj *ind_tbl) 2637 { 2638 uint32_t refcnt; 2639 2640 refcnt = rte_atomic_load_explicit(&ind_tbl->refcnt, rte_memory_order_relaxed); 2641 if (refcnt <= 1) 2642 return 0; 2643 /* 2644 * Modification of indirection tables having more than 1 2645 * reference is unsupported. 2646 */ 2647 DRV_LOG(DEBUG, 2648 "Port %u cannot modify indirection table %p (refcnt %u > 1).", 2649 dev->data->port_id, (void *)ind_tbl, refcnt); 2650 rte_errno = EINVAL; 2651 return -rte_errno; 2652 } 2653 2654 /** 2655 * Modify an indirection table. 2656 * 2657 * @param dev 2658 * Pointer to Ethernet device. 2659 * @param ind_table 2660 * Indirection table to modify. 2661 * @param queues 2662 * Queues replacement for the indirection table. 2663 * @param queues_n 2664 * Number of queues in the array. 2665 * @param standalone 2666 * Indirection table for Standalone queue. 2667 * @param ref_new_qs 2668 * Whether to increment new RxQ set reference counters. 2669 * @param deref_old_qs 2670 * Whether to decrement old RxQ set reference counters. 2671 * 2672 * @return 2673 * 0 on success, a negative errno value otherwise and rte_errno is set. 2674 */ 2675 int 2676 mlx5_ind_table_obj_modify(struct rte_eth_dev *dev, 2677 struct mlx5_ind_table_obj *ind_tbl, 2678 uint16_t *queues, const uint32_t queues_n, 2679 bool standalone, bool ref_new_qs, bool deref_old_qs) 2680 { 2681 struct mlx5_priv *priv = dev->data->dev_private; 2682 int ret; 2683 const unsigned int n = rte_is_power_of_2(queues_n) ? 2684 log2above(queues_n) : 2685 log2above(priv->sh->dev_cap.ind_table_max_size); 2686 2687 MLX5_ASSERT(standalone); 2688 RTE_SET_USED(standalone); 2689 if (mlx5_ind_table_obj_check_standalone(dev, ind_tbl) < 0) 2690 return -rte_errno; 2691 if (ref_new_qs && mlx5_rxqs_ref(dev, queues, queues_n) < 0) { 2692 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2693 dev->data->port_id); 2694 return -rte_errno; 2695 } 2696 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2697 ret = priv->obj_ops.ind_table_modify(dev, n, queues, queues_n, ind_tbl); 2698 if (ret) { 2699 DRV_LOG(DEBUG, "Port %u cannot modify indirection table.", 2700 dev->data->port_id); 2701 if (ref_new_qs) { 2702 int err = rte_errno; 2703 2704 mlx5_rxqs_deref(dev, queues, queues_n); 2705 rte_errno = err; 2706 } 2707 return ret; 2708 } 2709 if (deref_old_qs) 2710 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2711 ind_tbl->queues_n = queues_n; 2712 ind_tbl->queues = queues; 2713 return 0; 2714 } 2715 2716 /** 2717 * Attach an indirection table to its queues. 2718 * 2719 * @param dev 2720 * Pointer to Ethernet device. 2721 * @param ind_table 2722 * Indirection table to attach. 2723 * 2724 * @return 2725 * 0 on success, a negative errno value otherwise and rte_errno is set. 2726 */ 2727 int 2728 mlx5_ind_table_obj_attach(struct rte_eth_dev *dev, 2729 struct mlx5_ind_table_obj *ind_tbl) 2730 { 2731 int ret; 2732 2733 ret = mlx5_ind_table_obj_modify(dev, ind_tbl, ind_tbl->queues, 2734 ind_tbl->queues_n, 2735 true /* standalone */, 2736 true /* ref_new_qs */, 2737 false /* deref_old_qs */); 2738 if (ret != 0) 2739 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2740 dev->data->port_id, (void *)ind_tbl); 2741 return ret; 2742 } 2743 2744 /** 2745 * Detach an indirection table from its queues. 2746 * 2747 * @param dev 2748 * Pointer to Ethernet device. 2749 * @param ind_table 2750 * Indirection table to detach. 2751 * 2752 * @return 2753 * 0 on success, a negative errno value otherwise and rte_errno is set. 2754 */ 2755 int 2756 mlx5_ind_table_obj_detach(struct rte_eth_dev *dev, 2757 struct mlx5_ind_table_obj *ind_tbl) 2758 { 2759 struct mlx5_priv *priv = dev->data->dev_private; 2760 const unsigned int n = rte_is_power_of_2(ind_tbl->queues_n) ? 2761 log2above(ind_tbl->queues_n) : 2762 log2above(priv->sh->dev_cap.ind_table_max_size); 2763 unsigned int i; 2764 int ret; 2765 2766 ret = mlx5_ind_table_obj_check_standalone(dev, ind_tbl); 2767 if (ret != 0) 2768 return ret; 2769 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2770 ret = priv->obj_ops.ind_table_modify(dev, n, NULL, 0, ind_tbl); 2771 if (ret != 0) { 2772 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2773 dev->data->port_id, (void *)ind_tbl); 2774 return ret; 2775 } 2776 for (i = 0; i < ind_tbl->queues_n; i++) 2777 mlx5_rxq_release(dev, ind_tbl->queues[i]); 2778 return ret; 2779 } 2780 2781 int 2782 mlx5_hrxq_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry, 2783 void *cb_ctx) 2784 { 2785 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2786 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2787 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2788 2789 return (hrxq->rss_key_len != rss_desc->key_len || 2790 hrxq->symmetric_hash_function != rss_desc->symmetric_hash_function || 2791 memcmp(hrxq->rss_key, rss_desc->key, rss_desc->key_len) || 2792 hrxq->hws_flags != rss_desc->hws_flags || 2793 hrxq->hash_fields != rss_desc->hash_fields || 2794 hrxq->ind_table->queues_n != rss_desc->queue_num || 2795 memcmp(hrxq->ind_table->queues, rss_desc->queue, 2796 rss_desc->queue_num * sizeof(rss_desc->queue[0]))); 2797 } 2798 2799 /** 2800 * Modify an Rx Hash queue configuration. 2801 * 2802 * @param dev 2803 * Pointer to Ethernet device. 2804 * @param hrxq 2805 * Index to Hash Rx queue to modify. 2806 * @param rss_key 2807 * RSS key for the Rx hash queue. 2808 * @param rss_key_len 2809 * RSS key length. 2810 * @param hash_fields 2811 * Verbs protocol hash field to make the RSS on. 2812 * @param queues 2813 * Queues entering in hash queue. In case of empty hash_fields only the 2814 * first queue index will be taken for the indirection table. 2815 * @param queues_n 2816 * Number of queues. 2817 * 2818 * @return 2819 * 0 on success, a negative errno value otherwise and rte_errno is set. 2820 */ 2821 int 2822 mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hrxq_idx, 2823 const uint8_t *rss_key, uint32_t rss_key_len, 2824 uint64_t hash_fields, bool symmetric_hash_function, 2825 const uint16_t *queues, uint32_t queues_n) 2826 { 2827 int err; 2828 struct mlx5_ind_table_obj *ind_tbl = NULL; 2829 struct mlx5_priv *priv = dev->data->dev_private; 2830 struct mlx5_hrxq *hrxq = 2831 mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2832 bool dev_started = !!dev->data->dev_started; 2833 int ret; 2834 2835 if (!hrxq) { 2836 rte_errno = EINVAL; 2837 return -rte_errno; 2838 } 2839 /* validations */ 2840 if (hrxq->rss_key_len != rss_key_len) { 2841 /* rss_key_len is fixed size 40 byte & not supposed to change */ 2842 rte_errno = EINVAL; 2843 return -rte_errno; 2844 } 2845 queues_n = hash_fields ? queues_n : 1; 2846 if (mlx5_ind_table_obj_match_queues(hrxq->ind_table, 2847 queues, queues_n)) { 2848 ind_tbl = hrxq->ind_table; 2849 } else { 2850 if (hrxq->standalone) { 2851 /* 2852 * Replacement of indirection table unsupported for 2853 * standalone hrxq objects (used by shared RSS). 2854 */ 2855 rte_errno = ENOTSUP; 2856 return -rte_errno; 2857 } 2858 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2859 if (!ind_tbl) 2860 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2861 hrxq->standalone, 2862 dev_started); 2863 } 2864 if (!ind_tbl) { 2865 rte_errno = ENOMEM; 2866 return -rte_errno; 2867 } 2868 MLX5_ASSERT(priv->obj_ops.hrxq_modify); 2869 ret = priv->obj_ops.hrxq_modify(dev, hrxq, rss_key, hash_fields, 2870 symmetric_hash_function, ind_tbl); 2871 if (ret) { 2872 rte_errno = errno; 2873 goto error; 2874 } 2875 if (ind_tbl != hrxq->ind_table) { 2876 MLX5_ASSERT(!hrxq->standalone); 2877 mlx5_ind_table_obj_release(dev, hrxq->ind_table, true); 2878 hrxq->ind_table = ind_tbl; 2879 } 2880 hrxq->hash_fields = hash_fields; 2881 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2882 return 0; 2883 error: 2884 err = rte_errno; 2885 if (ind_tbl != hrxq->ind_table) { 2886 MLX5_ASSERT(!hrxq->standalone); 2887 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2888 } 2889 rte_errno = err; 2890 return -rte_errno; 2891 } 2892 2893 static void 2894 __mlx5_hrxq_remove(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 2895 { 2896 struct mlx5_priv *priv = dev->data->dev_private; 2897 bool deref_rxqs = true; 2898 2899 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2900 if (hrxq->hws_flags) 2901 mlx5dr_action_destroy(hrxq->action); 2902 else 2903 mlx5_glue->destroy_flow_action(hrxq->action); 2904 #endif 2905 priv->obj_ops.hrxq_destroy(hrxq); 2906 if (!hrxq->standalone) { 2907 if (!dev->data->dev_started && hrxq->hws_flags && 2908 !priv->hws_rule_flushing) 2909 deref_rxqs = false; 2910 mlx5_ind_table_obj_release(dev, hrxq->ind_table, deref_rxqs); 2911 } 2912 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 2913 } 2914 2915 /** 2916 * Release the hash Rx queue. 2917 * 2918 * @param dev 2919 * Pointer to Ethernet device. 2920 * @param hrxq 2921 * Index to Hash Rx queue to release. 2922 * 2923 * @param list 2924 * mlx5 list pointer. 2925 * @param entry 2926 * Hash queue entry pointer. 2927 */ 2928 void 2929 mlx5_hrxq_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry) 2930 { 2931 struct rte_eth_dev *dev = tool_ctx; 2932 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2933 2934 __mlx5_hrxq_remove(dev, hrxq); 2935 } 2936 2937 static struct mlx5_hrxq * 2938 __mlx5_hrxq_create(struct rte_eth_dev *dev, 2939 struct mlx5_flow_rss_desc *rss_desc) 2940 { 2941 struct mlx5_priv *priv = dev->data->dev_private; 2942 const uint8_t *rss_key = rss_desc->key; 2943 uint32_t rss_key_len = rss_desc->key_len; 2944 bool standalone = !!rss_desc->shared_rss; 2945 const uint16_t *queues = 2946 standalone ? rss_desc->const_q : rss_desc->queue; 2947 uint32_t queues_n = rss_desc->queue_num; 2948 struct mlx5_hrxq *hrxq = NULL; 2949 uint32_t hrxq_idx = 0; 2950 struct mlx5_ind_table_obj *ind_tbl = rss_desc->ind_tbl; 2951 int ret; 2952 2953 queues_n = rss_desc->hash_fields ? queues_n : 1; 2954 if (!ind_tbl && !rss_desc->hws_flags) 2955 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2956 if (!ind_tbl) 2957 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2958 standalone || 2959 rss_desc->hws_flags, 2960 !!dev->data->dev_started); 2961 if (!ind_tbl) 2962 return NULL; 2963 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 2964 if (!hrxq) 2965 goto error; 2966 hrxq->standalone = standalone; 2967 hrxq->idx = hrxq_idx; 2968 hrxq->ind_table = ind_tbl; 2969 hrxq->rss_key_len = rss_key_len; 2970 hrxq->hash_fields = rss_desc->hash_fields; 2971 hrxq->hws_flags = rss_desc->hws_flags; 2972 hrxq->symmetric_hash_function = rss_desc->symmetric_hash_function; 2973 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2974 ret = priv->obj_ops.hrxq_new(dev, hrxq, rss_desc->tunnel); 2975 if (ret < 0) 2976 goto error; 2977 return hrxq; 2978 error: 2979 if (!rss_desc->ind_tbl) 2980 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2981 if (hrxq) 2982 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2983 return NULL; 2984 } 2985 2986 struct mlx5_list_entry * 2987 mlx5_hrxq_create_cb(void *tool_ctx, void *cb_ctx) 2988 { 2989 struct rte_eth_dev *dev = tool_ctx; 2990 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2991 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2992 struct mlx5_hrxq *hrxq; 2993 2994 hrxq = __mlx5_hrxq_create(dev, rss_desc); 2995 return hrxq ? &hrxq->entry : NULL; 2996 } 2997 2998 struct mlx5_list_entry * 2999 mlx5_hrxq_clone_cb(void *tool_ctx, struct mlx5_list_entry *entry, 3000 void *cb_ctx __rte_unused) 3001 { 3002 struct rte_eth_dev *dev = tool_ctx; 3003 struct mlx5_priv *priv = dev->data->dev_private; 3004 struct mlx5_hrxq *hrxq; 3005 uint32_t hrxq_idx = 0; 3006 3007 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 3008 if (!hrxq) 3009 return NULL; 3010 memcpy(hrxq, entry, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN); 3011 hrxq->idx = hrxq_idx; 3012 return &hrxq->entry; 3013 } 3014 3015 void 3016 mlx5_hrxq_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry) 3017 { 3018 struct rte_eth_dev *dev = tool_ctx; 3019 struct mlx5_priv *priv = dev->data->dev_private; 3020 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 3021 3022 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 3023 } 3024 3025 /** 3026 * Get an Rx Hash queue. 3027 * 3028 * @param dev 3029 * Pointer to Ethernet device. 3030 * @param rss_desc 3031 * RSS configuration for the Rx hash queue. 3032 * 3033 * @return 3034 * An hash Rx queue on success. 3035 */ 3036 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, 3037 struct mlx5_flow_rss_desc *rss_desc) 3038 { 3039 struct mlx5_priv *priv = dev->data->dev_private; 3040 struct mlx5_hrxq *hrxq = NULL; 3041 struct mlx5_list_entry *entry; 3042 struct mlx5_flow_cb_ctx ctx = { 3043 .data = rss_desc, 3044 }; 3045 3046 if (rss_desc->shared_rss) { 3047 hrxq = __mlx5_hrxq_create(dev, rss_desc); 3048 } else { 3049 entry = mlx5_list_register(priv->hrxqs, &ctx); 3050 if (!entry) 3051 return NULL; 3052 hrxq = container_of(entry, typeof(*hrxq), entry); 3053 } 3054 return hrxq; 3055 } 3056 3057 /** 3058 * Release the hash Rx queue. 3059 * 3060 * @param dev 3061 * Pointer to Ethernet device. 3062 * @param hrxq_idx 3063 * Hash Rx queue to release. 3064 * 3065 * @return 3066 * 1 while a reference on it exists, 0 when freed. 3067 */ 3068 int mlx5_hrxq_obj_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 3069 { 3070 struct mlx5_priv *priv = dev->data->dev_private; 3071 3072 if (!hrxq) 3073 return 0; 3074 if (!hrxq->standalone) 3075 return mlx5_list_unregister(priv->hrxqs, &hrxq->entry); 3076 __mlx5_hrxq_remove(dev, hrxq); 3077 return 0; 3078 } 3079 3080 /** 3081 * Release the hash Rx queue with index. 3082 * 3083 * @param dev 3084 * Pointer to Ethernet device. 3085 * @param hrxq_idx 3086 * Index to Hash Rx queue to release. 3087 * 3088 * @return 3089 * 1 while a reference on it exists, 0 when freed. 3090 */ 3091 int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx) 3092 { 3093 struct mlx5_priv *priv = dev->data->dev_private; 3094 struct mlx5_hrxq *hrxq; 3095 3096 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 3097 return mlx5_hrxq_obj_release(dev, hrxq); 3098 } 3099 3100 /** 3101 * Create a drop Rx Hash queue. 3102 * 3103 * @param dev 3104 * Pointer to Ethernet device. 3105 * 3106 * @return 3107 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 3108 */ 3109 struct mlx5_hrxq * 3110 mlx5_drop_action_create(struct rte_eth_dev *dev) 3111 { 3112 struct mlx5_priv *priv = dev->data->dev_private; 3113 struct mlx5_hrxq *hrxq = NULL; 3114 int ret; 3115 3116 if (priv->drop_queue.hrxq) 3117 return priv->drop_queue.hrxq; 3118 hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 3119 if (!hrxq) { 3120 DRV_LOG(WARNING, 3121 "Port %u cannot allocate memory for drop queue.", 3122 dev->data->port_id); 3123 rte_errno = ENOMEM; 3124 goto error; 3125 } 3126 priv->drop_queue.hrxq = hrxq; 3127 hrxq->ind_table = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq->ind_table), 3128 0, SOCKET_ID_ANY); 3129 if (!hrxq->ind_table) { 3130 rte_errno = ENOMEM; 3131 goto error; 3132 } 3133 ret = priv->obj_ops.drop_action_create(dev); 3134 if (ret < 0) 3135 goto error; 3136 return hrxq; 3137 error: 3138 if (hrxq) { 3139 if (hrxq->ind_table) 3140 mlx5_free(hrxq->ind_table); 3141 priv->drop_queue.hrxq = NULL; 3142 mlx5_free(hrxq); 3143 } 3144 return NULL; 3145 } 3146 3147 /** 3148 * Release a drop hash Rx queue. 3149 * 3150 * @param dev 3151 * Pointer to Ethernet device. 3152 */ 3153 void 3154 mlx5_drop_action_destroy(struct rte_eth_dev *dev) 3155 { 3156 struct mlx5_priv *priv = dev->data->dev_private; 3157 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 3158 3159 if (!priv->drop_queue.hrxq) 3160 return; 3161 priv->obj_ops.drop_action_destroy(dev); 3162 mlx5_free(priv->drop_queue.rxq); 3163 mlx5_free(hrxq->ind_table); 3164 mlx5_free(hrxq); 3165 priv->drop_queue.rxq = NULL; 3166 priv->drop_queue.hrxq = NULL; 3167 } 3168 3169 /** 3170 * Verify the Rx Queue list is empty 3171 * 3172 * @param dev 3173 * Pointer to Ethernet device. 3174 * 3175 * @return 3176 * The number of object not released. 3177 */ 3178 uint32_t 3179 mlx5_hrxq_verify(struct rte_eth_dev *dev) 3180 { 3181 struct mlx5_priv *priv = dev->data->dev_private; 3182 3183 return mlx5_list_get_entry_num(priv->hrxqs); 3184 } 3185 3186 /** 3187 * Set the Rx queue timestamp conversion parameters 3188 * 3189 * @param[in] dev 3190 * Pointer to the Ethernet device structure. 3191 */ 3192 void 3193 mlx5_rxq_timestamp_set(struct rte_eth_dev *dev) 3194 { 3195 struct mlx5_priv *priv = dev->data->dev_private; 3196 struct mlx5_dev_ctx_shared *sh = priv->sh; 3197 unsigned int i; 3198 3199 for (i = 0; i != priv->rxqs_n; ++i) { 3200 struct mlx5_rxq_data *data = mlx5_rxq_data_get(dev, i); 3201 3202 if (data == NULL) 3203 continue; 3204 data->sh = sh; 3205 data->rt_timestamp = sh->dev_cap.rt_timestamp; 3206 } 3207 } 3208 3209 /** 3210 * Validate given external RxQ rte_plow index, and get pointer to concurrent 3211 * external RxQ object to map/unmap. 3212 * 3213 * @param[in] port_id 3214 * The port identifier of the Ethernet device. 3215 * @param[in] dpdk_idx 3216 * Queue index in rte_flow. 3217 * 3218 * @return 3219 * Pointer to concurrent external RxQ on success, 3220 * NULL otherwise and rte_errno is set. 3221 */ 3222 static struct mlx5_external_q * 3223 mlx5_external_rx_queue_get_validate(uint16_t port_id, uint16_t dpdk_idx) 3224 { 3225 struct rte_eth_dev *dev; 3226 struct mlx5_priv *priv; 3227 int ret; 3228 3229 if (dpdk_idx < RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN) { 3230 DRV_LOG(ERR, "Queue index %u should be in range: [%u, %u].", 3231 dpdk_idx, RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN, UINT16_MAX); 3232 rte_errno = EINVAL; 3233 return NULL; 3234 } 3235 ret = mlx5_devx_extq_port_validate(port_id); 3236 if (unlikely(ret)) 3237 return NULL; 3238 dev = &rte_eth_devices[port_id]; 3239 priv = dev->data->dev_private; 3240 /* 3241 * When user configures remote PD and CTX and device creates RxQ by 3242 * DevX, external RxQs array is allocated. 3243 */ 3244 MLX5_ASSERT(priv->ext_rxqs != NULL); 3245 return &priv->ext_rxqs[dpdk_idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 3246 } 3247 3248 int 3249 rte_pmd_mlx5_external_rx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx, 3250 uint32_t hw_idx) 3251 { 3252 struct mlx5_external_q *ext_rxq; 3253 uint32_t unmapped = 0; 3254 3255 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3256 if (ext_rxq == NULL) 3257 return -rte_errno; 3258 if (!rte_atomic_compare_exchange_strong_explicit(&ext_rxq->refcnt, &unmapped, 1, 3259 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 3260 if (ext_rxq->hw_id != hw_idx) { 3261 DRV_LOG(ERR, "Port %u external RxQ index %u " 3262 "is already mapped to HW index (requesting is " 3263 "%u, existing is %u).", 3264 port_id, dpdk_idx, hw_idx, ext_rxq->hw_id); 3265 rte_errno = EEXIST; 3266 return -rte_errno; 3267 } 3268 DRV_LOG(WARNING, "Port %u external RxQ index %u " 3269 "is already mapped to the requested HW index (%u)", 3270 port_id, dpdk_idx, hw_idx); 3271 3272 } else { 3273 ext_rxq->hw_id = hw_idx; 3274 DRV_LOG(DEBUG, "Port %u external RxQ index %u " 3275 "is successfully mapped to the requested HW index (%u)", 3276 port_id, dpdk_idx, hw_idx); 3277 } 3278 return 0; 3279 } 3280 3281 int 3282 rte_pmd_mlx5_external_rx_queue_id_unmap(uint16_t port_id, uint16_t dpdk_idx) 3283 { 3284 struct mlx5_external_q *ext_rxq; 3285 uint32_t mapped = 1; 3286 3287 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3288 if (ext_rxq == NULL) 3289 return -rte_errno; 3290 if (ext_rxq->refcnt > 1) { 3291 DRV_LOG(ERR, "Port %u external RxQ index %u still referenced.", 3292 port_id, dpdk_idx); 3293 rte_errno = EINVAL; 3294 return -rte_errno; 3295 } 3296 if (!rte_atomic_compare_exchange_strong_explicit(&ext_rxq->refcnt, &mapped, 0, 3297 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 3298 DRV_LOG(ERR, "Port %u external RxQ index %u doesn't exist.", 3299 port_id, dpdk_idx); 3300 rte_errno = EINVAL; 3301 return -rte_errno; 3302 } 3303 DRV_LOG(DEBUG, 3304 "Port %u external RxQ index %u is successfully unmapped.", 3305 port_id, dpdk_idx); 3306 return 0; 3307 } 3308