1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <fcntl.h> 11 #include <sys/queue.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <rte_common.h> 17 #include <rte_interrupts.h> 18 #include <rte_debug.h> 19 #include <rte_io.h> 20 #include <rte_eal_paging.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_malloc.h> 24 #include <mlx5_common.h> 25 #include <mlx5_common_mr.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_rx.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_autoconf.h" 32 #include "mlx5_devx.h" 33 #include "rte_pmd_mlx5.h" 34 35 36 /* Default RSS hash key also used for ConnectX-3. */ 37 uint8_t rss_hash_default_key[] = { 38 0x2c, 0xc6, 0x81, 0xd1, 39 0x5b, 0xdb, 0xf4, 0xf7, 40 0xfc, 0xa2, 0x83, 0x19, 41 0xdb, 0x1a, 0x3e, 0x94, 42 0x6b, 0x9e, 0x38, 0xd9, 43 0x2c, 0x9c, 0x03, 0xd1, 44 0xad, 0x99, 0x44, 0xa7, 45 0xd9, 0x56, 0x3d, 0x59, 46 0x06, 0x3c, 0x25, 0xf3, 47 0xfc, 0x1f, 0xdc, 0x2a, 48 }; 49 50 /* Length of the default RSS hash key. */ 51 static_assert(MLX5_RSS_HASH_KEY_LEN == 52 (unsigned int)sizeof(rss_hash_default_key), 53 "wrong RSS default key size."); 54 55 /** 56 * Calculate the number of CQEs in CQ for the Rx queue. 57 * 58 * @param rxq_data 59 * Pointer to receive queue structure. 60 * 61 * @return 62 * Number of CQEs in CQ. 63 */ 64 unsigned int 65 mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data) 66 { 67 unsigned int cqe_n; 68 unsigned int wqe_n = 1 << rxq_data->elts_n; 69 70 if (mlx5_rxq_mprq_enabled(rxq_data)) 71 cqe_n = wqe_n * RTE_BIT32(rxq_data->log_strd_num) - 1; 72 else 73 cqe_n = wqe_n - 1; 74 return cqe_n; 75 } 76 77 /** 78 * Allocate RX queue elements for Multi-Packet RQ. 79 * 80 * @param rxq_ctrl 81 * Pointer to RX queue structure. 82 * 83 * @return 84 * 0 on success, a negative errno value otherwise and rte_errno is set. 85 */ 86 static int 87 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 88 { 89 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 90 unsigned int wqe_n = 1 << rxq->elts_n; 91 unsigned int i; 92 int err; 93 94 /* Iterate on segments. */ 95 for (i = 0; i <= wqe_n; ++i) { 96 struct mlx5_mprq_buf *buf; 97 98 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 99 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 100 rte_errno = ENOMEM; 101 goto error; 102 } 103 if (i < wqe_n) 104 (*rxq->mprq_bufs)[i] = buf; 105 else 106 rxq->mprq_repl = buf; 107 } 108 DRV_LOG(DEBUG, 109 "port %u MPRQ queue %u allocated and configured %u segments", 110 rxq->port_id, rxq->idx, wqe_n); 111 return 0; 112 error: 113 err = rte_errno; /* Save rte_errno before cleanup. */ 114 wqe_n = i; 115 for (i = 0; (i != wqe_n); ++i) { 116 if ((*rxq->mprq_bufs)[i] != NULL) 117 rte_mempool_put(rxq->mprq_mp, 118 (*rxq->mprq_bufs)[i]); 119 (*rxq->mprq_bufs)[i] = NULL; 120 } 121 DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything", 122 rxq->port_id, rxq->idx); 123 rte_errno = err; /* Restore rte_errno. */ 124 return -rte_errno; 125 } 126 127 /** 128 * Allocate RX queue elements for Single-Packet RQ. 129 * 130 * @param rxq_ctrl 131 * Pointer to RX queue structure. 132 * 133 * @return 134 * 0 on success, negative errno value on failure. 135 */ 136 static int 137 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 138 { 139 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 140 unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 141 RTE_BIT32(rxq_ctrl->rxq.elts_n) * 142 RTE_BIT32(rxq_ctrl->rxq.log_strd_num) : 143 RTE_BIT32(rxq_ctrl->rxq.elts_n); 144 bool has_vec_support = mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0; 145 unsigned int i; 146 int err; 147 148 /* Iterate on segments. */ 149 for (i = 0; (i != elts_n); ++i) { 150 struct mlx5_eth_rxseg *seg = &rxq_ctrl->rxq.rxseg[i % sges_n]; 151 struct rte_mbuf *buf; 152 153 buf = rte_pktmbuf_alloc(seg->mp); 154 if (buf == NULL) { 155 if (rxq_ctrl->share_group == 0) 156 DRV_LOG(ERR, "port %u queue %u empty mbuf pool", 157 RXQ_PORT_ID(rxq_ctrl), 158 rxq_ctrl->rxq.idx); 159 else 160 DRV_LOG(ERR, "share group %u queue %u empty mbuf pool", 161 rxq_ctrl->share_group, 162 rxq_ctrl->share_qid); 163 rte_errno = ENOMEM; 164 goto error; 165 } 166 /* Only vectored Rx routines rely on headroom size. */ 167 MLX5_ASSERT(!has_vec_support || 168 DATA_OFF(buf) >= RTE_PKTMBUF_HEADROOM); 169 /* Buffer is supposed to be empty. */ 170 MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0); 171 MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0); 172 MLX5_ASSERT(!buf->next); 173 SET_DATA_OFF(buf, seg->offset); 174 PORT(buf) = rxq_ctrl->rxq.port_id; 175 DATA_LEN(buf) = seg->length; 176 PKT_LEN(buf) = seg->length; 177 NB_SEGS(buf) = 1; 178 (*rxq_ctrl->rxq.elts)[i] = buf; 179 } 180 /* If Rx vector is activated. */ 181 if (has_vec_support) { 182 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 183 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 184 struct rte_pktmbuf_pool_private *priv = 185 (struct rte_pktmbuf_pool_private *) 186 rte_mempool_get_priv(rxq_ctrl->rxq.mp); 187 int j; 188 189 /* Initialize default rearm_data for vPMD. */ 190 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 191 rte_mbuf_refcnt_set(mbuf_init, 1); 192 mbuf_init->nb_segs = 1; 193 /* For shared queues port is provided in CQE */ 194 mbuf_init->port = rxq->shared ? 0 : rxq->port_id; 195 if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) 196 mbuf_init->ol_flags = RTE_MBUF_F_EXTERNAL; 197 /* 198 * prevent compiler reordering: 199 * rearm_data covers previous fields. 200 */ 201 rte_compiler_barrier(); 202 rxq->mbuf_initializer = 203 *(rte_xmm_t *)&mbuf_init->rearm_data; 204 /* Padding with a fake mbuf for vectorized Rx. */ 205 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 206 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 207 } 208 if (rxq_ctrl->share_group == 0) 209 DRV_LOG(DEBUG, 210 "port %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 211 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx, elts_n, 212 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 213 else 214 DRV_LOG(DEBUG, 215 "share group %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 216 rxq_ctrl->share_group, rxq_ctrl->share_qid, elts_n, 217 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 218 return 0; 219 error: 220 err = rte_errno; /* Save rte_errno before cleanup. */ 221 elts_n = i; 222 for (i = 0; (i != elts_n); ++i) { 223 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 224 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 225 (*rxq_ctrl->rxq.elts)[i] = NULL; 226 } 227 if (rxq_ctrl->share_group == 0) 228 DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything", 229 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx); 230 else 231 DRV_LOG(DEBUG, "share group %u SPRQ queue %u failed, freed everything", 232 rxq_ctrl->share_group, rxq_ctrl->share_qid); 233 rte_errno = err; /* Restore rte_errno. */ 234 return -rte_errno; 235 } 236 237 /** 238 * Allocate RX queue elements. 239 * 240 * @param rxq_ctrl 241 * Pointer to RX queue structure. 242 * 243 * @return 244 * 0 on success, negative errno value on failure. 245 */ 246 int 247 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 248 { 249 int ret = 0; 250 251 /** 252 * For MPRQ we need to allocate both MPRQ buffers 253 * for WQEs and simple mbufs for vector processing. 254 */ 255 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 256 ret = rxq_alloc_elts_mprq(rxq_ctrl); 257 if (ret == 0) 258 ret = rxq_alloc_elts_sprq(rxq_ctrl); 259 return ret; 260 } 261 262 /** 263 * Free RX queue elements for Multi-Packet RQ. 264 * 265 * @param rxq_ctrl 266 * Pointer to RX queue structure. 267 */ 268 static void 269 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 270 { 271 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 272 uint16_t i; 273 274 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs", 275 rxq->port_id, rxq->idx, (1u << rxq->elts_n)); 276 if (rxq->mprq_bufs == NULL) 277 return; 278 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 279 if ((*rxq->mprq_bufs)[i] != NULL) 280 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 281 (*rxq->mprq_bufs)[i] = NULL; 282 } 283 if (rxq->mprq_repl != NULL) { 284 mlx5_mprq_buf_free(rxq->mprq_repl); 285 rxq->mprq_repl = NULL; 286 } 287 } 288 289 /** 290 * Free RX queue elements for Single-Packet RQ. 291 * 292 * @param rxq_ctrl 293 * Pointer to RX queue structure. 294 */ 295 static void 296 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 297 { 298 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 299 const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 300 RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : 301 RTE_BIT32(rxq->elts_n); 302 const uint16_t q_mask = q_n - 1; 303 uint16_t elts_ci = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 304 rxq->elts_ci : rxq->rq_ci; 305 uint16_t used = q_n - (elts_ci - rxq->rq_pi); 306 uint16_t i; 307 308 if (rxq_ctrl->share_group == 0) 309 DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs", 310 RXQ_PORT_ID(rxq_ctrl), rxq->idx, q_n); 311 else 312 DRV_LOG(DEBUG, "share group %u Rx queue %u freeing %d WRs", 313 rxq_ctrl->share_group, rxq_ctrl->share_qid, q_n); 314 if (rxq->elts == NULL) 315 return; 316 /** 317 * Some mbuf in the Ring belongs to the application. 318 * They cannot be freed. 319 */ 320 if (mlx5_rxq_check_vec_support(rxq) > 0) { 321 for (i = 0; i < used; ++i) 322 (*rxq->elts)[(elts_ci + i) & q_mask] = NULL; 323 rxq->rq_pi = elts_ci; 324 } 325 for (i = 0; i != q_n; ++i) { 326 if ((*rxq->elts)[i] != NULL) 327 rte_pktmbuf_free_seg((*rxq->elts)[i]); 328 (*rxq->elts)[i] = NULL; 329 } 330 } 331 332 /** 333 * Free RX queue elements. 334 * 335 * @param rxq_ctrl 336 * Pointer to RX queue structure. 337 */ 338 static void 339 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 340 { 341 /* 342 * For MPRQ we need to allocate both MPRQ buffers 343 * for WQEs and simple mbufs for vector processing. 344 */ 345 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 346 rxq_free_elts_mprq(rxq_ctrl); 347 rxq_free_elts_sprq(rxq_ctrl); 348 } 349 350 /** 351 * Returns the per-queue supported offloads. 352 * 353 * @param dev 354 * Pointer to Ethernet device. 355 * 356 * @return 357 * Supported Rx offloads. 358 */ 359 uint64_t 360 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 361 { 362 struct mlx5_priv *priv = dev->data->dev_private; 363 uint64_t offloads = (RTE_ETH_RX_OFFLOAD_SCATTER | 364 RTE_ETH_RX_OFFLOAD_TIMESTAMP | 365 RTE_ETH_RX_OFFLOAD_RSS_HASH); 366 367 if (!priv->config.mprq.enabled) 368 offloads |= RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT; 369 if (priv->sh->config.hw_fcs_strip) 370 offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC; 371 if (priv->sh->dev_cap.hw_csum) 372 offloads |= (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | 373 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | 374 RTE_ETH_RX_OFFLOAD_TCP_CKSUM); 375 if (priv->sh->dev_cap.hw_vlan_strip) 376 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP; 377 if (priv->sh->config.lro_allowed) 378 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO; 379 return offloads; 380 } 381 382 383 /** 384 * Returns the per-port supported offloads. 385 * 386 * @return 387 * Supported Rx offloads. 388 */ 389 uint64_t 390 mlx5_get_rx_port_offloads(void) 391 { 392 uint64_t offloads = RTE_ETH_RX_OFFLOAD_VLAN_FILTER; 393 394 return offloads; 395 } 396 397 /** 398 * Verify if the queue can be released. 399 * 400 * @param dev 401 * Pointer to Ethernet device. 402 * @param idx 403 * RX queue index. 404 * 405 * @return 406 * 1 if the queue can be released 407 * 0 if the queue can not be released, there are references to it. 408 * Negative errno and rte_errno is set if queue doesn't exist. 409 */ 410 static int 411 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 412 { 413 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 414 415 if (rxq == NULL) { 416 rte_errno = EINVAL; 417 return -rte_errno; 418 } 419 return (__atomic_load_n(&rxq->refcnt, __ATOMIC_RELAXED) == 1); 420 } 421 422 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 423 static void 424 rxq_sync_cq(struct mlx5_rxq_data *rxq) 425 { 426 const uint16_t cqe_n = 1 << rxq->cqe_n; 427 const uint16_t cqe_mask = cqe_n - 1; 428 volatile struct mlx5_cqe *cqe; 429 int ret, i; 430 431 i = cqe_n; 432 do { 433 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 434 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 435 if (ret == MLX5_CQE_STATUS_HW_OWN) 436 break; 437 if (ret == MLX5_CQE_STATUS_ERR) { 438 rxq->cq_ci++; 439 continue; 440 } 441 MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN); 442 if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) { 443 rxq->cq_ci++; 444 continue; 445 } 446 /* Compute the next non compressed CQE. */ 447 rxq->cq_ci += rxq->cqe_comp_layout ? 448 (MLX5_CQE_NUM_MINIS(cqe->op_own) + 1U) : 449 rte_be_to_cpu_32(cqe->byte_cnt); 450 451 } while (--i); 452 /* Move all CQEs to HW ownership, including possible MiniCQEs. */ 453 for (i = 0; i < cqe_n; i++) { 454 cqe = &(*rxq->cqes)[i]; 455 cqe->validity_iteration_count = MLX5_CQE_VIC_INIT; 456 cqe->op_own = MLX5_CQE_INVALIDATE; 457 } 458 /* Resync CQE and WQE (WQ in RESET state). */ 459 rte_io_wmb(); 460 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 461 rte_io_wmb(); 462 *rxq->rq_db = rte_cpu_to_be_32(0); 463 rte_io_wmb(); 464 } 465 466 /** 467 * Rx queue stop. Device queue goes to the RESET state, 468 * all involved mbufs are freed from WQ. 469 * 470 * @param dev 471 * Pointer to Ethernet device structure. 472 * @param idx 473 * RX queue index. 474 * 475 * @return 476 * 0 on success, a negative errno value otherwise and rte_errno is set. 477 */ 478 int 479 mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 480 { 481 struct mlx5_priv *priv = dev->data->dev_private; 482 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 483 struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl; 484 int ret; 485 486 MLX5_ASSERT(rxq != NULL && rxq_ctrl != NULL); 487 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 488 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RDY2RST); 489 if (ret) { 490 DRV_LOG(ERR, "Cannot change Rx WQ state to RESET: %s", 491 strerror(errno)); 492 rte_errno = errno; 493 return ret; 494 } 495 /* Remove all processes CQEs. */ 496 rxq_sync_cq(&rxq_ctrl->rxq); 497 /* Free all involved mbufs. */ 498 rxq_free_elts(rxq_ctrl); 499 /* Set the actual queue state. */ 500 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 501 return 0; 502 } 503 504 /** 505 * Rx queue stop. Device queue goes to the RESET state, 506 * all involved mbufs are freed from WQ. 507 * 508 * @param dev 509 * Pointer to Ethernet device structure. 510 * @param idx 511 * RX queue index. 512 * 513 * @return 514 * 0 on success, a negative errno value otherwise and rte_errno is set. 515 */ 516 int 517 mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 518 { 519 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 520 int ret; 521 522 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 523 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 524 rte_errno = EINVAL; 525 return -EINVAL; 526 } 527 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 528 return 0; 529 /* 530 * Vectorized Rx burst requires the CQ and RQ indices 531 * synchronized, that might be broken on RQ restart 532 * and cause Rx malfunction, so queue stopping is 533 * not supported if vectorized Rx burst is engaged. 534 * The routine pointer depends on the process type, 535 * should perform check there. MPRQ is not supported as well. 536 */ 537 if (pkt_burst != mlx5_rx_burst) { 538 DRV_LOG(ERR, "Rx queue stop is only supported " 539 "for non-vectorized single-packet Rx"); 540 rte_errno = EINVAL; 541 return -EINVAL; 542 } 543 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 544 ret = mlx5_mp_os_req_queue_control(dev, idx, 545 MLX5_MP_REQ_QUEUE_RX_STOP); 546 } else { 547 ret = mlx5_rx_queue_stop_primary(dev, idx); 548 } 549 return ret; 550 } 551 552 /** 553 * Rx queue start. Device queue goes to the ready state, 554 * all required mbufs are allocated and WQ is replenished. 555 * 556 * @param dev 557 * Pointer to Ethernet device structure. 558 * @param idx 559 * RX queue index. 560 * 561 * @return 562 * 0 on success, a negative errno value otherwise and rte_errno is set. 563 */ 564 int 565 mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 566 { 567 struct mlx5_priv *priv = dev->data->dev_private; 568 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 569 struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq; 570 int ret; 571 572 MLX5_ASSERT(rxq != NULL && rxq->ctrl != NULL); 573 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 574 /* Allocate needed buffers. */ 575 ret = rxq_alloc_elts(rxq->ctrl); 576 if (ret) { 577 DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ"); 578 rte_errno = errno; 579 return ret; 580 } 581 rte_io_wmb(); 582 *rxq_data->cq_db = rte_cpu_to_be_32(rxq_data->cq_ci); 583 rte_io_wmb(); 584 /* Reset RQ consumer before moving queue to READY state. */ 585 *rxq_data->rq_db = rte_cpu_to_be_32(0); 586 rte_io_wmb(); 587 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RST2RDY); 588 if (ret) { 589 DRV_LOG(ERR, "Cannot change Rx WQ state to READY: %s", 590 strerror(errno)); 591 rte_errno = errno; 592 return ret; 593 } 594 /* Reinitialize RQ - set WQEs. */ 595 mlx5_rxq_initialize(rxq_data); 596 rxq_data->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 597 /* Set actual queue state. */ 598 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 599 return 0; 600 } 601 602 /** 603 * Rx queue start. Device queue goes to the ready state, 604 * all required mbufs are allocated and WQ is replenished. 605 * 606 * @param dev 607 * Pointer to Ethernet device structure. 608 * @param idx 609 * RX queue index. 610 * 611 * @return 612 * 0 on success, a negative errno value otherwise and rte_errno is set. 613 */ 614 int 615 mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 616 { 617 int ret; 618 619 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 620 DRV_LOG(ERR, "Hairpin queue can't be started"); 621 rte_errno = EINVAL; 622 return -EINVAL; 623 } 624 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 625 return 0; 626 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 627 ret = mlx5_mp_os_req_queue_control(dev, idx, 628 MLX5_MP_REQ_QUEUE_RX_START); 629 } else { 630 ret = mlx5_rx_queue_start_primary(dev, idx); 631 } 632 return ret; 633 } 634 635 /** 636 * Rx queue presetup checks. 637 * 638 * @param dev 639 * Pointer to Ethernet device structure. 640 * @param idx 641 * RX queue index. 642 * @param desc 643 * Number of descriptors to configure in queue. 644 * @param[out] rxq_ctrl 645 * Address of pointer to shared Rx queue control. 646 * 647 * @return 648 * 0 on success, a negative errno value otherwise and rte_errno is set. 649 */ 650 static int 651 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc, 652 struct mlx5_rxq_ctrl **rxq_ctrl) 653 { 654 struct mlx5_priv *priv = dev->data->dev_private; 655 struct mlx5_rxq_priv *rxq; 656 bool empty; 657 658 if (!rte_is_power_of_2(*desc)) { 659 *desc = 1 << log2above(*desc); 660 DRV_LOG(WARNING, 661 "port %u increased number of descriptors in Rx queue %u" 662 " to the next power of two (%d)", 663 dev->data->port_id, idx, *desc); 664 } 665 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 666 dev->data->port_id, idx, *desc); 667 if (idx >= priv->rxqs_n) { 668 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 669 dev->data->port_id, idx, priv->rxqs_n); 670 rte_errno = EOVERFLOW; 671 return -rte_errno; 672 } 673 if (rxq_ctrl == NULL || *rxq_ctrl == NULL) 674 return 0; 675 if (!(*rxq_ctrl)->rxq.shared) { 676 if (!mlx5_rxq_releasable(dev, idx)) { 677 DRV_LOG(ERR, "port %u unable to release queue index %u", 678 dev->data->port_id, idx); 679 rte_errno = EBUSY; 680 return -rte_errno; 681 } 682 mlx5_rxq_release(dev, idx); 683 } else { 684 if ((*rxq_ctrl)->obj != NULL) 685 /* Some port using shared Rx queue has been started. */ 686 return 0; 687 /* Release all owner RxQ to reconfigure Shared RxQ. */ 688 do { 689 rxq = LIST_FIRST(&(*rxq_ctrl)->owners); 690 LIST_REMOVE(rxq, owner_entry); 691 empty = LIST_EMPTY(&(*rxq_ctrl)->owners); 692 mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx); 693 } while (!empty); 694 *rxq_ctrl = NULL; 695 } 696 return 0; 697 } 698 699 /** 700 * Get the shared Rx queue object that matches group and queue index. 701 * 702 * @param dev 703 * Pointer to Ethernet device structure. 704 * @param group 705 * Shared RXQ group. 706 * @param share_qid 707 * Shared RX queue index. 708 * 709 * @return 710 * Shared RXQ object that matching, or NULL if not found. 711 */ 712 static struct mlx5_rxq_ctrl * 713 mlx5_shared_rxq_get(struct rte_eth_dev *dev, uint32_t group, uint16_t share_qid) 714 { 715 struct mlx5_rxq_ctrl *rxq_ctrl; 716 struct mlx5_priv *priv = dev->data->dev_private; 717 718 LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) { 719 if (rxq_ctrl->share_group == group && 720 rxq_ctrl->share_qid == share_qid) 721 return rxq_ctrl; 722 } 723 return NULL; 724 } 725 726 /** 727 * Check whether requested Rx queue configuration matches shared RXQ. 728 * 729 * @param rxq_ctrl 730 * Pointer to shared RXQ. 731 * @param dev 732 * Pointer to Ethernet device structure. 733 * @param idx 734 * Queue index. 735 * @param desc 736 * Number of descriptors to configure in queue. 737 * @param socket 738 * NUMA socket on which memory must be allocated. 739 * @param[in] conf 740 * Thresholds parameters. 741 * @param mp 742 * Memory pool for buffer allocations. 743 * 744 * @return 745 * 0 on success, a negative errno value otherwise and rte_errno is set. 746 */ 747 static bool 748 mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev, 749 uint16_t idx, uint16_t desc, unsigned int socket, 750 const struct rte_eth_rxconf *conf, 751 struct rte_mempool *mp) 752 { 753 struct mlx5_priv *spriv = LIST_FIRST(&rxq_ctrl->owners)->priv; 754 struct mlx5_priv *priv = dev->data->dev_private; 755 unsigned int i; 756 757 RTE_SET_USED(conf); 758 if (rxq_ctrl->socket != socket) { 759 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: socket mismatch", 760 dev->data->port_id, idx); 761 return false; 762 } 763 if (rxq_ctrl->rxq.elts_n != log2above(desc)) { 764 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: descriptor number mismatch", 765 dev->data->port_id, idx); 766 return false; 767 } 768 if (priv->mtu != spriv->mtu) { 769 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch", 770 dev->data->port_id, idx); 771 return false; 772 } 773 if (priv->dev_data->dev_conf.intr_conf.rxq != 774 spriv->dev_data->dev_conf.intr_conf.rxq) { 775 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: interrupt mismatch", 776 dev->data->port_id, idx); 777 return false; 778 } 779 if (mp != NULL && rxq_ctrl->rxq.mp != mp) { 780 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mempool mismatch", 781 dev->data->port_id, idx); 782 return false; 783 } else if (mp == NULL) { 784 if (conf->rx_nseg != rxq_ctrl->rxseg_n) { 785 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment number mismatch", 786 dev->data->port_id, idx); 787 return false; 788 } 789 for (i = 0; i < conf->rx_nseg; i++) { 790 if (memcmp(&conf->rx_seg[i].split, &rxq_ctrl->rxseg[i], 791 sizeof(struct rte_eth_rxseg_split))) { 792 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment %u configuration mismatch", 793 dev->data->port_id, idx, i); 794 return false; 795 } 796 } 797 } 798 if (priv->config.hw_padding != spriv->config.hw_padding) { 799 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: padding mismatch", 800 dev->data->port_id, idx); 801 return false; 802 } 803 if (priv->config.cqe_comp != spriv->config.cqe_comp || 804 (priv->config.cqe_comp && 805 priv->config.cqe_comp_fmt != spriv->config.cqe_comp_fmt)) { 806 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: CQE compression mismatch", 807 dev->data->port_id, idx); 808 return false; 809 } 810 return true; 811 } 812 813 /** 814 * 815 * @param dev 816 * Pointer to Ethernet device structure. 817 * @param idx 818 * RX queue index. 819 * @param desc 820 * Number of descriptors to configure in queue. 821 * @param socket 822 * NUMA socket on which memory must be allocated. 823 * @param[in] conf 824 * Thresholds parameters. 825 * @param mp 826 * Memory pool for buffer allocations. 827 * 828 * @return 829 * 0 on success, a negative errno value otherwise and rte_errno is set. 830 */ 831 int 832 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 833 unsigned int socket, const struct rte_eth_rxconf *conf, 834 struct rte_mempool *mp) 835 { 836 struct mlx5_priv *priv = dev->data->dev_private; 837 struct mlx5_rxq_priv *rxq; 838 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 839 struct rte_eth_rxseg_split *rx_seg = 840 (struct rte_eth_rxseg_split *)conf->rx_seg; 841 struct rte_eth_rxseg_split rx_single = {.mp = mp}; 842 uint16_t n_seg = conf->rx_nseg; 843 int res; 844 uint64_t offloads = conf->offloads | 845 dev->data->dev_conf.rxmode.offloads; 846 bool is_extmem = false; 847 848 if ((offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) && 849 !priv->sh->config.lro_allowed) { 850 DRV_LOG(ERR, 851 "Port %u queue %u LRO is configured but not allowed.", 852 dev->data->port_id, idx); 853 rte_errno = EINVAL; 854 return -rte_errno; 855 } 856 if (mp) { 857 /* 858 * The parameters should be checked on rte_eth_dev layer. 859 * If mp is specified it means the compatible configuration 860 * without buffer split feature tuning. 861 */ 862 rx_seg = &rx_single; 863 n_seg = 1; 864 is_extmem = rte_pktmbuf_priv_flags(mp) & 865 RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF; 866 } 867 if (n_seg > 1) { 868 /* The offloads should be checked on rte_eth_dev layer. */ 869 MLX5_ASSERT(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 870 if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) { 871 DRV_LOG(ERR, "port %u queue index %u split " 872 "offload not configured", 873 dev->data->port_id, idx); 874 rte_errno = ENOSPC; 875 return -rte_errno; 876 } 877 MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG); 878 } 879 if (conf->share_group > 0) { 880 if (!priv->sh->cdev->config.hca_attr.mem_rq_rmp) { 881 DRV_LOG(ERR, "port %u queue index %u shared Rx queue not supported by fw", 882 dev->data->port_id, idx); 883 rte_errno = EINVAL; 884 return -rte_errno; 885 } 886 if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) { 887 DRV_LOG(ERR, "port %u queue index %u shared Rx queue needs DevX api", 888 dev->data->port_id, idx); 889 rte_errno = EINVAL; 890 return -rte_errno; 891 } 892 if (conf->share_qid >= priv->rxqs_n) { 893 DRV_LOG(ERR, "port %u shared Rx queue index %u > number of Rx queues %u", 894 dev->data->port_id, conf->share_qid, 895 priv->rxqs_n); 896 rte_errno = EINVAL; 897 return -rte_errno; 898 } 899 if (priv->config.mprq.enabled) { 900 DRV_LOG(ERR, "port %u shared Rx queue index %u: not supported when MPRQ enabled", 901 dev->data->port_id, conf->share_qid); 902 rte_errno = EINVAL; 903 return -rte_errno; 904 } 905 /* Try to reuse shared RXQ. */ 906 rxq_ctrl = mlx5_shared_rxq_get(dev, conf->share_group, 907 conf->share_qid); 908 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 909 if (res) 910 return res; 911 if (rxq_ctrl != NULL && 912 !mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket, 913 conf, mp)) { 914 rte_errno = EINVAL; 915 return -rte_errno; 916 } 917 } else { 918 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 919 if (res) 920 return res; 921 } 922 /* Allocate RXQ. */ 923 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 924 SOCKET_ID_ANY); 925 if (!rxq) { 926 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u private data", 927 dev->data->port_id, idx); 928 rte_errno = ENOMEM; 929 return -rte_errno; 930 } 931 if (rxq_ctrl == NULL) { 932 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg, 933 n_seg, is_extmem); 934 if (rxq_ctrl == NULL) { 935 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u", 936 dev->data->port_id, idx); 937 mlx5_free(rxq); 938 rte_errno = ENOMEM; 939 return -rte_errno; 940 } 941 } 942 rxq->priv = priv; 943 rxq->idx = idx; 944 (*priv->rxq_privs)[idx] = rxq; 945 /* Join owner list. */ 946 LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry); 947 rxq->ctrl = rxq_ctrl; 948 mlx5_rxq_ref(dev, idx); 949 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 950 dev->data->port_id, idx); 951 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 952 return 0; 953 } 954 955 /** 956 * 957 * @param dev 958 * Pointer to Ethernet device structure. 959 * @param idx 960 * RX queue index. 961 * @param desc 962 * Number of descriptors to configure in queue. 963 * @param hairpin_conf 964 * Hairpin configuration parameters. 965 * 966 * @return 967 * 0 on success, a negative errno value otherwise and rte_errno is set. 968 */ 969 int 970 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 971 uint16_t desc, 972 const struct rte_eth_hairpin_conf *hairpin_conf) 973 { 974 struct mlx5_priv *priv = dev->data->dev_private; 975 struct mlx5_rxq_priv *rxq; 976 struct mlx5_rxq_ctrl *rxq_ctrl; 977 int res; 978 979 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, NULL); 980 if (res) 981 return res; 982 if (hairpin_conf->peer_count != 1) { 983 rte_errno = EINVAL; 984 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue index %u" 985 " peer count is %u", dev->data->port_id, 986 idx, hairpin_conf->peer_count); 987 return -rte_errno; 988 } 989 if (hairpin_conf->peers[0].port == dev->data->port_id) { 990 if (hairpin_conf->peers[0].queue >= priv->txqs_n) { 991 rte_errno = EINVAL; 992 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 993 " index %u, Tx %u is larger than %u", 994 dev->data->port_id, idx, 995 hairpin_conf->peers[0].queue, priv->txqs_n); 996 return -rte_errno; 997 } 998 } else { 999 if (hairpin_conf->manual_bind == 0 || 1000 hairpin_conf->tx_explicit == 0) { 1001 rte_errno = EINVAL; 1002 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 1003 " index %u peer port %u with attributes %u %u", 1004 dev->data->port_id, idx, 1005 hairpin_conf->peers[0].port, 1006 hairpin_conf->manual_bind, 1007 hairpin_conf->tx_explicit); 1008 return -rte_errno; 1009 } 1010 } 1011 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 1012 SOCKET_ID_ANY); 1013 if (!rxq) { 1014 DRV_LOG(ERR, "port %u unable to allocate hairpin rx queue index %u private data", 1015 dev->data->port_id, idx); 1016 rte_errno = ENOMEM; 1017 return -rte_errno; 1018 } 1019 rxq->priv = priv; 1020 rxq->idx = idx; 1021 (*priv->rxq_privs)[idx] = rxq; 1022 rxq_ctrl = mlx5_rxq_hairpin_new(dev, rxq, desc, hairpin_conf); 1023 if (!rxq_ctrl) { 1024 DRV_LOG(ERR, "port %u unable to allocate hairpin queue index %u", 1025 dev->data->port_id, idx); 1026 mlx5_free(rxq); 1027 (*priv->rxq_privs)[idx] = NULL; 1028 rte_errno = ENOMEM; 1029 return -rte_errno; 1030 } 1031 DRV_LOG(DEBUG, "port %u adding hairpin Rx queue %u to list", 1032 dev->data->port_id, idx); 1033 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 1034 return 0; 1035 } 1036 1037 /** 1038 * DPDK callback to release a RX queue. 1039 * 1040 * @param dev 1041 * Pointer to Ethernet device structure. 1042 * @param qid 1043 * Receive queue index. 1044 */ 1045 void 1046 mlx5_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1047 { 1048 struct mlx5_rxq_data *rxq = dev->data->rx_queues[qid]; 1049 1050 if (rxq == NULL) 1051 return; 1052 if (!mlx5_rxq_releasable(dev, qid)) 1053 rte_panic("port %u Rx queue %u is still used by a flow and" 1054 " cannot be removed\n", dev->data->port_id, qid); 1055 mlx5_rxq_release(dev, qid); 1056 } 1057 1058 /** 1059 * Allocate queue vector and fill epoll fd list for Rx interrupts. 1060 * 1061 * @param dev 1062 * Pointer to Ethernet device. 1063 * 1064 * @return 1065 * 0 on success, a negative errno value otherwise and rte_errno is set. 1066 */ 1067 int 1068 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 1069 { 1070 struct mlx5_priv *priv = dev->data->dev_private; 1071 unsigned int i; 1072 unsigned int rxqs_n = priv->rxqs_n; 1073 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1074 unsigned int count = 0; 1075 struct rte_intr_handle *intr_handle = dev->intr_handle; 1076 1077 if (!dev->data->dev_conf.intr_conf.rxq) 1078 return 0; 1079 mlx5_rx_intr_vec_disable(dev); 1080 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 1081 DRV_LOG(ERR, 1082 "port %u failed to allocate memory for interrupt" 1083 " vector, Rx interrupts will not be supported", 1084 dev->data->port_id); 1085 rte_errno = ENOMEM; 1086 return -rte_errno; 1087 } 1088 1089 if (rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_EXT)) 1090 return -rte_errno; 1091 1092 for (i = 0; i != n; ++i) { 1093 /* This rxq obj must not be released in this function. */ 1094 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i); 1095 struct mlx5_rxq_obj *rxq_obj = rxq ? rxq->ctrl->obj : NULL; 1096 int rc; 1097 1098 /* Skip queues that cannot request interrupts. */ 1099 if (!rxq_obj || (!rxq_obj->ibv_channel && 1100 !rxq_obj->devx_channel)) { 1101 /* Use invalid intr_vec[] index to disable entry. */ 1102 if (rte_intr_vec_list_index_set(intr_handle, i, 1103 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID)) 1104 return -rte_errno; 1105 continue; 1106 } 1107 mlx5_rxq_ref(dev, i); 1108 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 1109 DRV_LOG(ERR, 1110 "port %u too many Rx queues for interrupt" 1111 " vector size (%d), Rx interrupts cannot be" 1112 " enabled", 1113 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 1114 mlx5_rx_intr_vec_disable(dev); 1115 rte_errno = ENOMEM; 1116 return -rte_errno; 1117 } 1118 rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd); 1119 if (rc < 0) { 1120 rte_errno = errno; 1121 DRV_LOG(ERR, 1122 "port %u failed to make Rx interrupt file" 1123 " descriptor %d non-blocking for queue index" 1124 " %d", 1125 dev->data->port_id, rxq_obj->fd, i); 1126 mlx5_rx_intr_vec_disable(dev); 1127 return -rte_errno; 1128 } 1129 1130 if (rte_intr_vec_list_index_set(intr_handle, i, 1131 RTE_INTR_VEC_RXTX_OFFSET + count)) 1132 return -rte_errno; 1133 if (rte_intr_efds_index_set(intr_handle, count, 1134 rxq_obj->fd)) 1135 return -rte_errno; 1136 count++; 1137 } 1138 if (!count) 1139 mlx5_rx_intr_vec_disable(dev); 1140 else if (rte_intr_nb_efd_set(intr_handle, count)) 1141 return -rte_errno; 1142 return 0; 1143 } 1144 1145 /** 1146 * Clean up Rx interrupts handler. 1147 * 1148 * @param dev 1149 * Pointer to Ethernet device. 1150 */ 1151 void 1152 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 1153 { 1154 struct mlx5_priv *priv = dev->data->dev_private; 1155 struct rte_intr_handle *intr_handle = dev->intr_handle; 1156 unsigned int i; 1157 unsigned int rxqs_n = priv->rxqs_n; 1158 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1159 1160 if (!dev->data->dev_conf.intr_conf.rxq) 1161 return; 1162 if (rte_intr_vec_list_index_get(intr_handle, 0) < 0) 1163 goto free; 1164 for (i = 0; i != n; ++i) { 1165 if (rte_intr_vec_list_index_get(intr_handle, i) == 1166 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID) 1167 continue; 1168 /** 1169 * Need to access directly the queue to release the reference 1170 * kept in mlx5_rx_intr_vec_enable(). 1171 */ 1172 mlx5_rxq_deref(dev, i); 1173 } 1174 free: 1175 rte_intr_free_epoll_fd(intr_handle); 1176 1177 rte_intr_vec_list_free(intr_handle); 1178 1179 rte_intr_nb_efd_set(intr_handle, 0); 1180 } 1181 1182 /** 1183 * MLX5 CQ notification . 1184 * 1185 * @param rxq 1186 * Pointer to receive queue structure. 1187 * @param sq_n_rxq 1188 * Sequence number per receive queue . 1189 */ 1190 static inline void 1191 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 1192 { 1193 int sq_n = 0; 1194 uint32_t doorbell_hi; 1195 uint64_t doorbell; 1196 1197 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 1198 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 1199 doorbell = (uint64_t)doorbell_hi << 32; 1200 doorbell |= rxq->cqn; 1201 mlx5_doorbell_ring(&rxq->uar_data, rte_cpu_to_be_64(doorbell), 1202 doorbell_hi, &rxq->cq_db[MLX5_CQ_ARM_DB], 0); 1203 } 1204 1205 /** 1206 * DPDK callback for Rx queue interrupt enable. 1207 * 1208 * @param dev 1209 * Pointer to Ethernet device structure. 1210 * @param rx_queue_id 1211 * Rx queue number. 1212 * 1213 * @return 1214 * 0 on success, a negative errno value otherwise and rte_errno is set. 1215 */ 1216 int 1217 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1218 { 1219 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1220 if (!rxq) 1221 goto error; 1222 if (rxq->ctrl->irq) { 1223 if (!rxq->ctrl->obj) 1224 goto error; 1225 mlx5_arm_cq(&rxq->ctrl->rxq, rxq->ctrl->rxq.cq_arm_sn); 1226 } 1227 return 0; 1228 error: 1229 rte_errno = EINVAL; 1230 return -rte_errno; 1231 } 1232 1233 /** 1234 * DPDK callback for Rx queue interrupt disable. 1235 * 1236 * @param dev 1237 * Pointer to Ethernet device structure. 1238 * @param rx_queue_id 1239 * Rx queue number. 1240 * 1241 * @return 1242 * 0 on success, a negative errno value otherwise and rte_errno is set. 1243 */ 1244 int 1245 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1246 { 1247 struct mlx5_priv *priv = dev->data->dev_private; 1248 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1249 int ret = 0; 1250 1251 if (!rxq) { 1252 rte_errno = EINVAL; 1253 return -rte_errno; 1254 } 1255 if (!rxq->ctrl->obj) 1256 goto error; 1257 if (rxq->ctrl->irq) { 1258 ret = priv->obj_ops.rxq_event_get(rxq->ctrl->obj); 1259 if (ret < 0) 1260 goto error; 1261 rxq->ctrl->rxq.cq_arm_sn++; 1262 } 1263 return 0; 1264 error: 1265 /** 1266 * The ret variable may be EAGAIN which means the get_event function was 1267 * called before receiving one. 1268 */ 1269 if (ret < 0) 1270 rte_errno = errno; 1271 else 1272 rte_errno = EINVAL; 1273 if (rte_errno != EAGAIN) 1274 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 1275 dev->data->port_id, rx_queue_id); 1276 return -rte_errno; 1277 } 1278 1279 /** 1280 * Verify the Rx queue objects list is empty 1281 * 1282 * @param dev 1283 * Pointer to Ethernet device. 1284 * 1285 * @return 1286 * The number of objects not released. 1287 */ 1288 int 1289 mlx5_rxq_obj_verify(struct rte_eth_dev *dev) 1290 { 1291 struct mlx5_priv *priv = dev->data->dev_private; 1292 int ret = 0; 1293 struct mlx5_rxq_obj *rxq_obj; 1294 1295 LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) { 1296 if (rxq_obj->rxq_ctrl == NULL) 1297 continue; 1298 if (rxq_obj->rxq_ctrl->rxq.shared && 1299 !LIST_EMPTY(&rxq_obj->rxq_ctrl->owners)) 1300 continue; 1301 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced", 1302 dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx); 1303 ++ret; 1304 } 1305 return ret; 1306 } 1307 1308 /** 1309 * Callback function to initialize mbufs for Multi-Packet RQ. 1310 */ 1311 static inline void 1312 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg, 1313 void *_m, unsigned int i __rte_unused) 1314 { 1315 struct mlx5_mprq_buf *buf = _m; 1316 struct rte_mbuf_ext_shared_info *shinfo; 1317 unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg; 1318 unsigned int j; 1319 1320 memset(_m, 0, sizeof(*buf)); 1321 buf->mp = mp; 1322 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1323 for (j = 0; j != strd_n; ++j) { 1324 shinfo = &buf->shinfos[j]; 1325 shinfo->free_cb = mlx5_mprq_buf_free_cb; 1326 shinfo->fcb_opaque = buf; 1327 } 1328 } 1329 1330 /** 1331 * Free mempool of Multi-Packet RQ. 1332 * 1333 * @param dev 1334 * Pointer to Ethernet device. 1335 * 1336 * @return 1337 * 0 on success, negative errno value on failure. 1338 */ 1339 int 1340 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1341 { 1342 struct mlx5_priv *priv = dev->data->dev_private; 1343 struct rte_mempool *mp = priv->mprq_mp; 1344 unsigned int i; 1345 1346 if (mp == NULL) 1347 return 0; 1348 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1349 dev->data->port_id, mp->name); 1350 /* 1351 * If a buffer in the pool has been externally attached to a mbuf and it 1352 * is still in use by application, destroying the Rx queue can spoil 1353 * the packet. It is unlikely to happen but if application dynamically 1354 * creates and destroys with holding Rx packets, this can happen. 1355 * 1356 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1357 * RQ isn't provided by application but managed by PMD. 1358 */ 1359 if (!rte_mempool_full(mp)) { 1360 DRV_LOG(ERR, 1361 "port %u mempool for Multi-Packet RQ is still in use", 1362 dev->data->port_id); 1363 rte_errno = EBUSY; 1364 return -rte_errno; 1365 } 1366 rte_mempool_free(mp); 1367 /* Unset mempool for each Rx queue. */ 1368 for (i = 0; i != priv->rxqs_n; ++i) { 1369 struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, i); 1370 1371 if (rxq == NULL) 1372 continue; 1373 rxq->mprq_mp = NULL; 1374 } 1375 priv->mprq_mp = NULL; 1376 return 0; 1377 } 1378 1379 /** 1380 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1381 * mempool. If already allocated, reuse it if there're enough elements. 1382 * Otherwise, resize it. 1383 * 1384 * @param dev 1385 * Pointer to Ethernet device. 1386 * 1387 * @return 1388 * 0 on success, negative errno value on failure. 1389 */ 1390 int 1391 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1392 { 1393 struct mlx5_priv *priv = dev->data->dev_private; 1394 struct rte_mempool *mp = priv->mprq_mp; 1395 char name[RTE_MEMPOOL_NAMESIZE]; 1396 unsigned int desc = 0; 1397 unsigned int buf_len; 1398 unsigned int obj_num; 1399 unsigned int obj_size; 1400 unsigned int log_strd_num = 0; 1401 unsigned int log_strd_sz = 0; 1402 unsigned int i; 1403 unsigned int n_ibv = 0; 1404 int ret; 1405 1406 if (!mlx5_mprq_enabled(dev)) 1407 return 0; 1408 /* Count the total number of descriptors configured. */ 1409 for (i = 0; i != priv->rxqs_n; ++i) { 1410 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1411 struct mlx5_rxq_data *rxq; 1412 1413 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1414 continue; 1415 rxq = &rxq_ctrl->rxq; 1416 n_ibv++; 1417 desc += 1 << rxq->elts_n; 1418 /* Get the max number of strides. */ 1419 if (log_strd_num < rxq->log_strd_num) 1420 log_strd_num = rxq->log_strd_num; 1421 /* Get the max size of a stride. */ 1422 if (log_strd_sz < rxq->log_strd_sz) 1423 log_strd_sz = rxq->log_strd_sz; 1424 } 1425 MLX5_ASSERT(log_strd_num && log_strd_sz); 1426 buf_len = RTE_BIT32(log_strd_num) * RTE_BIT32(log_strd_sz); 1427 obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + 1428 RTE_BIT32(log_strd_num) * 1429 sizeof(struct rte_mbuf_ext_shared_info) + 1430 RTE_PKTMBUF_HEADROOM; 1431 /* 1432 * Received packets can be either memcpy'd or externally referenced. In 1433 * case that the packet is attached to an mbuf as an external buffer, as 1434 * it isn't possible to predict how the buffers will be queued by 1435 * application, there's no option to exactly pre-allocate needed buffers 1436 * in advance but to speculatively prepares enough buffers. 1437 * 1438 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1439 * received packets to buffers provided by application (rxq->mp) until 1440 * this Mempool gets available again. 1441 */ 1442 desc *= 4; 1443 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv; 1444 /* 1445 * rte_mempool_create_empty() has sanity check to refuse large cache 1446 * size compared to the number of elements. 1447 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a 1448 * constant number 2 instead. 1449 */ 1450 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1451 /* Check a mempool is already allocated and if it can be resued. */ 1452 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1453 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1454 dev->data->port_id, mp->name); 1455 /* Reuse. */ 1456 goto exit; 1457 } else if (mp != NULL) { 1458 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1459 dev->data->port_id, mp->name); 1460 /* 1461 * If failed to free, which means it may be still in use, no way 1462 * but to keep using the existing one. On buffer underrun, 1463 * packets will be memcpy'd instead of external buffer 1464 * attachment. 1465 */ 1466 if (mlx5_mprq_free_mp(dev)) { 1467 if (mp->elt_size >= obj_size) 1468 goto exit; 1469 else 1470 return -rte_errno; 1471 } 1472 } 1473 snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); 1474 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1475 0, NULL, NULL, mlx5_mprq_buf_init, 1476 (void *)((uintptr_t)1 << log_strd_num), 1477 dev->device->numa_node, 0); 1478 if (mp == NULL) { 1479 DRV_LOG(ERR, 1480 "port %u failed to allocate a mempool for" 1481 " Multi-Packet RQ, count=%u, size=%u", 1482 dev->data->port_id, obj_num, obj_size); 1483 rte_errno = ENOMEM; 1484 return -rte_errno; 1485 } 1486 ret = mlx5_mr_mempool_register(priv->sh->cdev, mp, false); 1487 if (ret < 0 && rte_errno != EEXIST) { 1488 ret = rte_errno; 1489 DRV_LOG(ERR, "port %u failed to register a mempool for Multi-Packet RQ", 1490 dev->data->port_id); 1491 rte_mempool_free(mp); 1492 rte_errno = ret; 1493 return -rte_errno; 1494 } 1495 priv->mprq_mp = mp; 1496 exit: 1497 /* Set mempool for each Rx queue. */ 1498 for (i = 0; i != priv->rxqs_n; ++i) { 1499 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1500 1501 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1502 continue; 1503 rxq_ctrl->rxq.mprq_mp = mp; 1504 } 1505 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1506 dev->data->port_id); 1507 return 0; 1508 } 1509 1510 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \ 1511 sizeof(struct rte_vlan_hdr) * 2 + \ 1512 sizeof(struct rte_ipv6_hdr))) 1513 #define MAX_TCP_OPTION_SIZE 40u 1514 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \ 1515 sizeof(struct rte_tcp_hdr) + \ 1516 MAX_TCP_OPTION_SIZE)) 1517 1518 /** 1519 * Adjust the maximum LRO massage size. 1520 * 1521 * @param dev 1522 * Pointer to Ethernet device. 1523 * @param idx 1524 * RX queue index. 1525 * @param max_lro_size 1526 * The maximum size for LRO packet. 1527 */ 1528 static void 1529 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx, 1530 uint32_t max_lro_size) 1531 { 1532 struct mlx5_priv *priv = dev->data->dev_private; 1533 1534 if (priv->sh->cdev->config.hca_attr.lro_max_msg_sz_mode == 1535 MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size > 1536 MLX5_MAX_TCP_HDR_OFFSET) 1537 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET; 1538 max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE); 1539 if (priv->max_lro_msg_size) 1540 priv->max_lro_msg_size = 1541 RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size); 1542 else 1543 priv->max_lro_msg_size = max_lro_size; 1544 DRV_LOG(DEBUG, 1545 "port %u Rx Queue %u max LRO message size adjusted to %u bytes", 1546 dev->data->port_id, idx, priv->max_lro_msg_size); 1547 } 1548 1549 /** 1550 * Prepare both size and number of stride for Multi-Packet RQ. 1551 * 1552 * @param dev 1553 * Pointer to Ethernet device. 1554 * @param idx 1555 * RX queue index. 1556 * @param desc 1557 * Number of descriptors to configure in queue. 1558 * @param rx_seg_en 1559 * Indicator if Rx segment enables, if so Multi-Packet RQ doesn't enable. 1560 * @param min_mbuf_size 1561 * Non scatter min mbuf size, max_rx_pktlen plus overhead. 1562 * @param actual_log_stride_num 1563 * Log number of strides to configure for this queue. 1564 * @param actual_log_stride_size 1565 * Log stride size to configure for this queue. 1566 * @param is_extmem 1567 * Is external pinned memory pool used. 1568 * @return 1569 * 0 if Multi-Packet RQ is supported, otherwise -1. 1570 */ 1571 static int 1572 mlx5_mprq_prepare(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1573 bool rx_seg_en, uint32_t min_mbuf_size, 1574 uint32_t *actual_log_stride_num, 1575 uint32_t *actual_log_stride_size, 1576 bool is_extmem) 1577 { 1578 struct mlx5_priv *priv = dev->data->dev_private; 1579 struct mlx5_port_config *config = &priv->config; 1580 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 1581 uint32_t log_min_stride_num = dev_cap->mprq.log_min_stride_num; 1582 uint32_t log_max_stride_num = dev_cap->mprq.log_max_stride_num; 1583 uint32_t log_def_stride_num = 1584 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM, 1585 log_min_stride_num), 1586 log_max_stride_num); 1587 uint32_t log_min_stride_size = dev_cap->mprq.log_min_stride_size; 1588 uint32_t log_max_stride_size = dev_cap->mprq.log_max_stride_size; 1589 uint32_t log_def_stride_size = 1590 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE, 1591 log_min_stride_size), 1592 log_max_stride_size); 1593 uint32_t log_stride_wqe_size; 1594 1595 if (mlx5_check_mprq_support(dev) != 1 || rx_seg_en || is_extmem) 1596 goto unsupport; 1597 /* Checks if chosen number of strides is in supported range. */ 1598 if (config->mprq.log_stride_num > log_max_stride_num || 1599 config->mprq.log_stride_num < log_min_stride_num) { 1600 *actual_log_stride_num = log_def_stride_num; 1601 DRV_LOG(WARNING, 1602 "Port %u Rx queue %u number of strides for Multi-Packet RQ is out of range, setting default value (%u)", 1603 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num)); 1604 } else { 1605 *actual_log_stride_num = config->mprq.log_stride_num; 1606 } 1607 /* Checks if chosen size of stride is in supported range. */ 1608 if (config->mprq.log_stride_size > log_max_stride_size || 1609 config->mprq.log_stride_size < log_min_stride_size) { 1610 *actual_log_stride_size = log_def_stride_size; 1611 DRV_LOG(WARNING, 1612 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is out of range, setting default value (%u)", 1613 dev->data->port_id, idx, 1614 RTE_BIT32(log_def_stride_size)); 1615 } else { 1616 *actual_log_stride_size = config->mprq.log_stride_size; 1617 } 1618 /* Make the stride fit the mbuf size by default. */ 1619 if (*actual_log_stride_size == MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE) { 1620 if (min_mbuf_size <= RTE_BIT32(log_max_stride_size)) { 1621 DRV_LOG(WARNING, 1622 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to match the mbuf size (%u)", 1623 dev->data->port_id, idx, min_mbuf_size); 1624 *actual_log_stride_size = log2above(min_mbuf_size); 1625 } else { 1626 goto unsupport; 1627 } 1628 } 1629 /* Make sure the stride size is greater than the headroom. */ 1630 if (RTE_BIT32(*actual_log_stride_size) < RTE_PKTMBUF_HEADROOM) { 1631 if (RTE_BIT32(log_max_stride_size) > RTE_PKTMBUF_HEADROOM) { 1632 DRV_LOG(WARNING, 1633 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to accommodate the headroom (%u)", 1634 dev->data->port_id, idx, RTE_PKTMBUF_HEADROOM); 1635 *actual_log_stride_size = log2above(RTE_PKTMBUF_HEADROOM); 1636 } else { 1637 goto unsupport; 1638 } 1639 } 1640 log_stride_wqe_size = *actual_log_stride_num + *actual_log_stride_size; 1641 /* Check if WQE buffer size is supported by hardware. */ 1642 if (log_stride_wqe_size < dev_cap->mprq.log_min_stride_wqe_size) { 1643 *actual_log_stride_num = log_def_stride_num; 1644 *actual_log_stride_size = log_def_stride_size; 1645 DRV_LOG(WARNING, 1646 "Port %u Rx queue %u size of WQE buffer for Multi-Packet RQ is too small, setting default values (stride_num_n=%u, stride_size_n=%u)", 1647 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num), 1648 RTE_BIT32(log_def_stride_size)); 1649 log_stride_wqe_size = log_def_stride_num + log_def_stride_size; 1650 } 1651 MLX5_ASSERT(log_stride_wqe_size >= 1652 dev_cap->mprq.log_min_stride_wqe_size); 1653 if (desc <= RTE_BIT32(*actual_log_stride_num)) 1654 goto unsupport; 1655 if (min_mbuf_size > RTE_BIT32(log_stride_wqe_size)) { 1656 DRV_LOG(WARNING, "Port %u Rx queue %u " 1657 "Multi-Packet RQ is unsupported, WQE buffer size (%u) " 1658 "is smaller than min mbuf size (%u)", 1659 dev->data->port_id, idx, RTE_BIT32(log_stride_wqe_size), 1660 min_mbuf_size); 1661 goto unsupport; 1662 } 1663 DRV_LOG(DEBUG, "Port %u Rx queue %u " 1664 "Multi-Packet RQ is enabled strd_num_n = %u, strd_sz_n = %u", 1665 dev->data->port_id, idx, RTE_BIT32(*actual_log_stride_num), 1666 RTE_BIT32(*actual_log_stride_size)); 1667 return 0; 1668 unsupport: 1669 if (config->mprq.enabled) 1670 DRV_LOG(WARNING, 1671 "Port %u MPRQ is requested but cannot be enabled\n" 1672 " (requested: pkt_sz = %u, desc_num = %u," 1673 " rxq_num = %u, stride_sz = %u, stride_num = %u\n" 1674 " supported: min_rxqs_num = %u, min_buf_wqe_sz = %u" 1675 " min_stride_sz = %u, max_stride_sz = %u).\n" 1676 "Rx segment is %senabled. External mempool is %sused.", 1677 dev->data->port_id, min_mbuf_size, desc, priv->rxqs_n, 1678 RTE_BIT32(config->mprq.log_stride_size), 1679 RTE_BIT32(config->mprq.log_stride_num), 1680 config->mprq.min_rxqs_num, 1681 RTE_BIT32(dev_cap->mprq.log_min_stride_wqe_size), 1682 RTE_BIT32(dev_cap->mprq.log_min_stride_size), 1683 RTE_BIT32(dev_cap->mprq.log_max_stride_size), 1684 rx_seg_en ? "" : "not ", is_extmem ? "" : "not "); 1685 return -1; 1686 } 1687 1688 /** 1689 * Create a DPDK Rx queue. 1690 * 1691 * @param dev 1692 * Pointer to Ethernet device. 1693 * @param idx 1694 * RX queue index. 1695 * @param desc 1696 * Number of descriptors to configure in queue. 1697 * @param socket 1698 * NUMA socket on which memory must be allocated. 1699 * 1700 * @return 1701 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1702 */ 1703 struct mlx5_rxq_ctrl * 1704 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1705 unsigned int socket, const struct rte_eth_rxconf *conf, 1706 const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg, 1707 bool is_extmem) 1708 { 1709 struct mlx5_priv *priv = dev->data->dev_private; 1710 struct mlx5_rxq_ctrl *tmpl; 1711 unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp); 1712 struct mlx5_port_config *config = &priv->config; 1713 uint64_t offloads = conf->offloads | 1714 dev->data->dev_conf.rxmode.offloads; 1715 unsigned int lro_on_queue = !!(offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO); 1716 unsigned int max_rx_pktlen = lro_on_queue ? 1717 dev->data->dev_conf.rxmode.max_lro_pkt_size : 1718 dev->data->mtu + (unsigned int)RTE_ETHER_HDR_LEN + 1719 RTE_ETHER_CRC_LEN; 1720 unsigned int non_scatter_min_mbuf_size = max_rx_pktlen + 1721 RTE_PKTMBUF_HEADROOM; 1722 unsigned int max_lro_size = 0; 1723 unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM; 1724 uint32_t mprq_log_actual_stride_num = 0; 1725 uint32_t mprq_log_actual_stride_size = 0; 1726 bool rx_seg_en = n_seg != 1 || rx_seg[0].offset || rx_seg[0].length; 1727 const int mprq_en = !mlx5_mprq_prepare(dev, idx, desc, rx_seg_en, 1728 non_scatter_min_mbuf_size, 1729 &mprq_log_actual_stride_num, 1730 &mprq_log_actual_stride_size, 1731 is_extmem); 1732 /* 1733 * Always allocate extra slots, even if eventually 1734 * the vector Rx will not be used. 1735 */ 1736 uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1737 size_t alloc_size = sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *); 1738 const struct rte_eth_rxseg_split *qs_seg = rx_seg; 1739 unsigned int tail_len; 1740 1741 if (mprq_en) { 1742 /* Trim the number of descs needed. */ 1743 desc >>= mprq_log_actual_stride_num; 1744 alloc_size += desc * sizeof(struct mlx5_mprq_buf *); 1745 } 1746 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket); 1747 if (!tmpl) { 1748 rte_errno = ENOMEM; 1749 return NULL; 1750 } 1751 LIST_INIT(&tmpl->owners); 1752 MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG); 1753 /* 1754 * Save the original segment configuration in the shared queue 1755 * descriptor for the later check on the sibling queue creation. 1756 */ 1757 tmpl->rxseg_n = n_seg; 1758 rte_memcpy(tmpl->rxseg, qs_seg, 1759 sizeof(struct rte_eth_rxseg_split) * n_seg); 1760 /* 1761 * Build the array of actual buffer offsets and lengths. 1762 * Pad with the buffers from the last memory pool if 1763 * needed to handle max size packets, replace zero length 1764 * with the buffer length from the pool. 1765 */ 1766 tail_len = max_rx_pktlen; 1767 do { 1768 struct mlx5_eth_rxseg *hw_seg = 1769 &tmpl->rxq.rxseg[tmpl->rxq.rxseg_n]; 1770 uint32_t buf_len, offset, seg_len; 1771 1772 /* 1773 * For the buffers beyond descriptions offset is zero, 1774 * the first buffer contains head room. 1775 */ 1776 buf_len = rte_pktmbuf_data_room_size(qs_seg->mp); 1777 offset = (tmpl->rxq.rxseg_n >= n_seg ? 0 : qs_seg->offset) + 1778 (tmpl->rxq.rxseg_n ? 0 : RTE_PKTMBUF_HEADROOM); 1779 /* 1780 * For the buffers beyond descriptions the length is 1781 * pool buffer length, zero lengths are replaced with 1782 * pool buffer length either. 1783 */ 1784 seg_len = tmpl->rxq.rxseg_n >= n_seg ? buf_len : 1785 qs_seg->length ? 1786 qs_seg->length : 1787 (buf_len - offset); 1788 /* Check is done in long int, now overflows. */ 1789 if (buf_len < seg_len + offset) { 1790 DRV_LOG(ERR, "port %u Rx queue %u: Split offset/length " 1791 "%u/%u can't be satisfied", 1792 dev->data->port_id, idx, 1793 qs_seg->length, qs_seg->offset); 1794 rte_errno = EINVAL; 1795 goto error; 1796 } 1797 if (seg_len > tail_len) 1798 seg_len = buf_len - offset; 1799 if (++tmpl->rxq.rxseg_n > MLX5_MAX_RXQ_NSEG) { 1800 DRV_LOG(ERR, 1801 "port %u too many SGEs (%u) needed to handle" 1802 " requested maximum packet size %u, the maximum" 1803 " supported are %u", dev->data->port_id, 1804 tmpl->rxq.rxseg_n, max_rx_pktlen, 1805 MLX5_MAX_RXQ_NSEG); 1806 rte_errno = ENOTSUP; 1807 goto error; 1808 } 1809 /* Build the actual scattering element in the queue object. */ 1810 hw_seg->mp = qs_seg->mp; 1811 MLX5_ASSERT(offset <= UINT16_MAX); 1812 MLX5_ASSERT(seg_len <= UINT16_MAX); 1813 hw_seg->offset = (uint16_t)offset; 1814 hw_seg->length = (uint16_t)seg_len; 1815 /* 1816 * Advance the segment descriptor, the padding is the based 1817 * on the attributes of the last descriptor. 1818 */ 1819 if (tmpl->rxq.rxseg_n < n_seg) 1820 qs_seg++; 1821 tail_len -= RTE_MIN(tail_len, seg_len); 1822 } while (tail_len || !rte_is_power_of_2(tmpl->rxq.rxseg_n)); 1823 MLX5_ASSERT(tmpl->rxq.rxseg_n && 1824 tmpl->rxq.rxseg_n <= MLX5_MAX_RXQ_NSEG); 1825 if (tmpl->rxq.rxseg_n > 1 && !(offloads & RTE_ETH_RX_OFFLOAD_SCATTER)) { 1826 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not" 1827 " configured and no enough mbuf space(%u) to contain " 1828 "the maximum RX packet length(%u) with head-room(%u)", 1829 dev->data->port_id, idx, mb_len, max_rx_pktlen, 1830 RTE_PKTMBUF_HEADROOM); 1831 rte_errno = ENOSPC; 1832 goto error; 1833 } 1834 tmpl->is_hairpin = false; 1835 if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl, 1836 &priv->sh->cdev->mr_scache.dev_gen, socket)) { 1837 /* rte_errno is already set. */ 1838 goto error; 1839 } 1840 tmpl->socket = socket; 1841 if (dev->data->dev_conf.intr_conf.rxq) 1842 tmpl->irq = 1; 1843 if (mprq_en) { 1844 /* TODO: Rx scatter isn't supported yet. */ 1845 tmpl->rxq.sges_n = 0; 1846 tmpl->rxq.log_strd_num = mprq_log_actual_stride_num; 1847 tmpl->rxq.log_strd_sz = mprq_log_actual_stride_size; 1848 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1849 tmpl->rxq.strd_scatter_en = 1850 !!(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 1851 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, 1852 config->mprq.max_memcpy_len); 1853 max_lro_size = RTE_MIN(max_rx_pktlen, 1854 RTE_BIT32(tmpl->rxq.log_strd_num) * 1855 RTE_BIT32(tmpl->rxq.log_strd_sz)); 1856 } else if (tmpl->rxq.rxseg_n == 1) { 1857 MLX5_ASSERT(max_rx_pktlen <= first_mb_free_size); 1858 tmpl->rxq.sges_n = 0; 1859 max_lro_size = max_rx_pktlen; 1860 } else if (offloads & RTE_ETH_RX_OFFLOAD_SCATTER) { 1861 unsigned int sges_n; 1862 1863 if (lro_on_queue && first_mb_free_size < 1864 MLX5_MAX_LRO_HEADER_FIX) { 1865 DRV_LOG(ERR, "Not enough space in the first segment(%u)" 1866 " to include the max header size(%u) for LRO", 1867 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX); 1868 rte_errno = ENOTSUP; 1869 goto error; 1870 } 1871 /* 1872 * Determine the number of SGEs needed for a full packet 1873 * and round it to the next power of two. 1874 */ 1875 sges_n = log2above(tmpl->rxq.rxseg_n); 1876 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) { 1877 DRV_LOG(ERR, 1878 "port %u too many SGEs (%u) needed to handle" 1879 " requested maximum packet size %u, the maximum" 1880 " supported are %u", dev->data->port_id, 1881 1 << sges_n, max_rx_pktlen, 1882 1u << MLX5_MAX_LOG_RQ_SEGS); 1883 rte_errno = ENOTSUP; 1884 goto error; 1885 } 1886 tmpl->rxq.sges_n = sges_n; 1887 max_lro_size = max_rx_pktlen; 1888 } 1889 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1890 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1891 if (desc % (1 << tmpl->rxq.sges_n)) { 1892 DRV_LOG(ERR, 1893 "port %u number of Rx queue descriptors (%u) is not a" 1894 " multiple of SGEs per packet (%u)", 1895 dev->data->port_id, 1896 desc, 1897 1 << tmpl->rxq.sges_n); 1898 rte_errno = EINVAL; 1899 goto error; 1900 } 1901 mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size); 1902 /* Toggle RX checksum offload if hardware supports it. */ 1903 tmpl->rxq.csum = !!(offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM); 1904 /* Configure Rx timestamp. */ 1905 tmpl->rxq.hw_timestamp = !!(offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP); 1906 tmpl->rxq.timestamp_rx_flag = 0; 1907 if (tmpl->rxq.hw_timestamp && rte_mbuf_dyn_rx_timestamp_register( 1908 &tmpl->rxq.timestamp_offset, 1909 &tmpl->rxq.timestamp_rx_flag) != 0) { 1910 DRV_LOG(ERR, "Cannot register Rx timestamp field/flag"); 1911 goto error; 1912 } 1913 /* Configure VLAN stripping. */ 1914 tmpl->rxq.vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 1915 /* By default, FCS (CRC) is stripped by hardware. */ 1916 tmpl->rxq.crc_present = 0; 1917 tmpl->rxq.lro = lro_on_queue; 1918 if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) { 1919 if (priv->sh->config.hw_fcs_strip) { 1920 /* 1921 * RQs used for LRO-enabled TIRs should not be 1922 * configured to scatter the FCS. 1923 */ 1924 if (lro_on_queue) 1925 DRV_LOG(WARNING, 1926 "port %u CRC stripping has been " 1927 "disabled but will still be performed " 1928 "by hardware, because LRO is enabled", 1929 dev->data->port_id); 1930 else 1931 tmpl->rxq.crc_present = 1; 1932 } else { 1933 DRV_LOG(WARNING, 1934 "port %u CRC stripping has been disabled but will" 1935 " still be performed by hardware, make sure MLNX_OFED" 1936 " and firmware are up to date", 1937 dev->data->port_id); 1938 } 1939 } 1940 DRV_LOG(DEBUG, 1941 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1942 " incoming frames to hide it", 1943 dev->data->port_id, 1944 tmpl->rxq.crc_present ? "disabled" : "enabled", 1945 tmpl->rxq.crc_present << 2); 1946 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1947 (!!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS)); 1948 /* Save port ID. */ 1949 tmpl->rxq.port_id = dev->data->port_id; 1950 tmpl->sh = priv->sh; 1951 tmpl->rxq.mp = rx_seg[0].mp; 1952 tmpl->rxq.elts_n = log2above(desc); 1953 tmpl->rxq.rq_repl_thresh = MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n); 1954 tmpl->rxq.elts = (struct rte_mbuf *(*)[desc_n])(tmpl + 1); 1955 tmpl->rxq.mprq_bufs = 1956 (struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n); 1957 tmpl->rxq.idx = idx; 1958 if (conf->share_group > 0) { 1959 tmpl->rxq.shared = 1; 1960 tmpl->share_group = conf->share_group; 1961 tmpl->share_qid = conf->share_qid; 1962 LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry); 1963 } 1964 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1965 return tmpl; 1966 error: 1967 mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh); 1968 mlx5_free(tmpl); 1969 return NULL; 1970 } 1971 1972 /** 1973 * Create a DPDK Rx hairpin queue. 1974 * 1975 * @param dev 1976 * Pointer to Ethernet device. 1977 * @param rxq 1978 * RX queue. 1979 * @param desc 1980 * Number of descriptors to configure in queue. 1981 * @param hairpin_conf 1982 * The hairpin binding configuration. 1983 * 1984 * @return 1985 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1986 */ 1987 struct mlx5_rxq_ctrl * 1988 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq, 1989 uint16_t desc, 1990 const struct rte_eth_hairpin_conf *hairpin_conf) 1991 { 1992 uint16_t idx = rxq->idx; 1993 struct mlx5_priv *priv = dev->data->dev_private; 1994 struct mlx5_rxq_ctrl *tmpl; 1995 1996 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1997 SOCKET_ID_ANY); 1998 if (!tmpl) { 1999 rte_errno = ENOMEM; 2000 return NULL; 2001 } 2002 LIST_INIT(&tmpl->owners); 2003 rxq->ctrl = tmpl; 2004 LIST_INSERT_HEAD(&tmpl->owners, rxq, owner_entry); 2005 tmpl->is_hairpin = true; 2006 tmpl->socket = SOCKET_ID_ANY; 2007 tmpl->rxq.rss_hash = 0; 2008 tmpl->rxq.port_id = dev->data->port_id; 2009 tmpl->sh = priv->sh; 2010 tmpl->rxq.mp = NULL; 2011 tmpl->rxq.elts_n = log2above(desc); 2012 tmpl->rxq.elts = NULL; 2013 tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 }; 2014 tmpl->rxq.idx = idx; 2015 rxq->hairpin_conf = *hairpin_conf; 2016 mlx5_rxq_ref(dev, idx); 2017 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 2018 return tmpl; 2019 } 2020 2021 /** 2022 * Increase Rx queue reference count. 2023 * 2024 * @param dev 2025 * Pointer to Ethernet device. 2026 * @param idx 2027 * RX queue index. 2028 * 2029 * @return 2030 * A pointer to the queue if it exists, NULL otherwise. 2031 */ 2032 struct mlx5_rxq_priv * 2033 mlx5_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2034 { 2035 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2036 2037 if (rxq != NULL) 2038 __atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED); 2039 return rxq; 2040 } 2041 2042 /** 2043 * Dereference a Rx queue. 2044 * 2045 * @param dev 2046 * Pointer to Ethernet device. 2047 * @param idx 2048 * RX queue index. 2049 * 2050 * @return 2051 * Updated reference count. 2052 */ 2053 uint32_t 2054 mlx5_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2055 { 2056 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2057 2058 if (rxq == NULL) 2059 return 0; 2060 return __atomic_fetch_sub(&rxq->refcnt, 1, __ATOMIC_RELAXED) - 1; 2061 } 2062 2063 /** 2064 * Get a Rx queue. 2065 * 2066 * @param dev 2067 * Pointer to Ethernet device. 2068 * @param idx 2069 * RX queue index. 2070 * 2071 * @return 2072 * A pointer to the queue if it exists, NULL otherwise. 2073 */ 2074 struct mlx5_rxq_priv * 2075 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2076 { 2077 struct mlx5_priv *priv = dev->data->dev_private; 2078 2079 if (idx >= priv->rxqs_n) 2080 return NULL; 2081 MLX5_ASSERT(priv->rxq_privs != NULL); 2082 return (*priv->rxq_privs)[idx]; 2083 } 2084 2085 /** 2086 * Get Rx queue shareable control. 2087 * 2088 * @param dev 2089 * Pointer to Ethernet device. 2090 * @param idx 2091 * RX queue index. 2092 * 2093 * @return 2094 * A pointer to the queue control if it exists, NULL otherwise. 2095 */ 2096 struct mlx5_rxq_ctrl * 2097 mlx5_rxq_ctrl_get(struct rte_eth_dev *dev, uint16_t idx) 2098 { 2099 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2100 2101 return rxq == NULL ? NULL : rxq->ctrl; 2102 } 2103 2104 /** 2105 * Get Rx queue shareable data. 2106 * 2107 * @param dev 2108 * Pointer to Ethernet device. 2109 * @param idx 2110 * RX queue index. 2111 * 2112 * @return 2113 * A pointer to the queue data if it exists, NULL otherwise. 2114 */ 2115 struct mlx5_rxq_data * 2116 mlx5_rxq_data_get(struct rte_eth_dev *dev, uint16_t idx) 2117 { 2118 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2119 2120 return rxq == NULL ? NULL : &rxq->ctrl->rxq; 2121 } 2122 2123 /** 2124 * Increase an external Rx queue reference count. 2125 * 2126 * @param dev 2127 * Pointer to Ethernet device. 2128 * @param idx 2129 * External RX queue index. 2130 * 2131 * @return 2132 * A pointer to the queue if it exists, NULL otherwise. 2133 */ 2134 struct mlx5_external_rxq * 2135 mlx5_ext_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2136 { 2137 struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx); 2138 2139 __atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED); 2140 return rxq; 2141 } 2142 2143 /** 2144 * Decrease an external Rx queue reference count. 2145 * 2146 * @param dev 2147 * Pointer to Ethernet device. 2148 * @param idx 2149 * External RX queue index. 2150 * 2151 * @return 2152 * Updated reference count. 2153 */ 2154 uint32_t 2155 mlx5_ext_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2156 { 2157 struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx); 2158 2159 return __atomic_fetch_sub(&rxq->refcnt, 1, __ATOMIC_RELAXED) - 1; 2160 } 2161 2162 /** 2163 * Get an external Rx queue. 2164 * 2165 * @param dev 2166 * Pointer to Ethernet device. 2167 * @param idx 2168 * External Rx queue index. 2169 * 2170 * @return 2171 * A pointer to the queue if it exists, NULL otherwise. 2172 */ 2173 struct mlx5_external_rxq * 2174 mlx5_ext_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2175 { 2176 struct mlx5_priv *priv = dev->data->dev_private; 2177 2178 MLX5_ASSERT(mlx5_is_external_rxq(dev, idx)); 2179 return &priv->ext_rxqs[idx - MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 2180 } 2181 2182 /** 2183 * Dereference a list of Rx queues. 2184 * 2185 * @param dev 2186 * Pointer to Ethernet device. 2187 * @param queues 2188 * List of Rx queues to deref. 2189 * @param queues_n 2190 * Number of queues in the array. 2191 */ 2192 static void 2193 mlx5_rxqs_deref(struct rte_eth_dev *dev, uint16_t *queues, 2194 const uint32_t queues_n) 2195 { 2196 uint32_t i; 2197 2198 for (i = 0; i < queues_n; i++) { 2199 if (mlx5_is_external_rxq(dev, queues[i])) 2200 claim_nonzero(mlx5_ext_rxq_deref(dev, queues[i])); 2201 else 2202 claim_nonzero(mlx5_rxq_deref(dev, queues[i])); 2203 } 2204 } 2205 2206 /** 2207 * Increase reference count for list of Rx queues. 2208 * 2209 * @param dev 2210 * Pointer to Ethernet device. 2211 * @param queues 2212 * List of Rx queues to ref. 2213 * @param queues_n 2214 * Number of queues in the array. 2215 * 2216 * @return 2217 * 0 on success, a negative errno value otherwise and rte_errno is set. 2218 */ 2219 static int 2220 mlx5_rxqs_ref(struct rte_eth_dev *dev, uint16_t *queues, 2221 const uint32_t queues_n) 2222 { 2223 uint32_t i; 2224 2225 for (i = 0; i != queues_n; ++i) { 2226 if (mlx5_is_external_rxq(dev, queues[i])) { 2227 if (mlx5_ext_rxq_ref(dev, queues[i]) == NULL) 2228 goto error; 2229 } else { 2230 if (mlx5_rxq_ref(dev, queues[i]) == NULL) 2231 goto error; 2232 } 2233 } 2234 return 0; 2235 error: 2236 mlx5_rxqs_deref(dev, queues, i); 2237 rte_errno = EINVAL; 2238 return -rte_errno; 2239 } 2240 2241 /** 2242 * Release a Rx queue. 2243 * 2244 * @param dev 2245 * Pointer to Ethernet device. 2246 * @param idx 2247 * RX queue index. 2248 * 2249 * @return 2250 * 1 while a reference on it exists, 0 when freed. 2251 */ 2252 int 2253 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 2254 { 2255 struct mlx5_priv *priv = dev->data->dev_private; 2256 struct mlx5_rxq_priv *rxq; 2257 struct mlx5_rxq_ctrl *rxq_ctrl; 2258 uint32_t refcnt; 2259 2260 if (priv->rxq_privs == NULL) 2261 return 0; 2262 rxq = mlx5_rxq_get(dev, idx); 2263 if (rxq == NULL || rxq->refcnt == 0) 2264 return 0; 2265 rxq_ctrl = rxq->ctrl; 2266 refcnt = mlx5_rxq_deref(dev, idx); 2267 if (refcnt > 1) { 2268 return 1; 2269 } else if (refcnt == 1) { /* RxQ stopped. */ 2270 priv->obj_ops.rxq_obj_release(rxq); 2271 if (!rxq_ctrl->started && rxq_ctrl->obj != NULL) { 2272 LIST_REMOVE(rxq_ctrl->obj, next); 2273 mlx5_free(rxq_ctrl->obj); 2274 rxq_ctrl->obj = NULL; 2275 } 2276 if (!rxq_ctrl->is_hairpin) { 2277 if (!rxq_ctrl->started) 2278 rxq_free_elts(rxq_ctrl); 2279 dev->data->rx_queue_state[idx] = 2280 RTE_ETH_QUEUE_STATE_STOPPED; 2281 } 2282 } else { /* Refcnt zero, closing device. */ 2283 LIST_REMOVE(rxq, owner_entry); 2284 if (LIST_EMPTY(&rxq_ctrl->owners)) { 2285 if (!rxq_ctrl->is_hairpin) 2286 mlx5_mr_btree_free 2287 (&rxq_ctrl->rxq.mr_ctrl.cache_bh); 2288 if (rxq_ctrl->rxq.shared) 2289 LIST_REMOVE(rxq_ctrl, share_entry); 2290 LIST_REMOVE(rxq_ctrl, next); 2291 mlx5_free(rxq_ctrl); 2292 } 2293 dev->data->rx_queues[idx] = NULL; 2294 mlx5_free(rxq); 2295 (*priv->rxq_privs)[idx] = NULL; 2296 } 2297 return 0; 2298 } 2299 2300 /** 2301 * Verify the Rx Queue list is empty 2302 * 2303 * @param dev 2304 * Pointer to Ethernet device. 2305 * 2306 * @return 2307 * The number of object not released. 2308 */ 2309 int 2310 mlx5_rxq_verify(struct rte_eth_dev *dev) 2311 { 2312 struct mlx5_priv *priv = dev->data->dev_private; 2313 struct mlx5_rxq_ctrl *rxq_ctrl; 2314 int ret = 0; 2315 2316 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 2317 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 2318 dev->data->port_id, rxq_ctrl->rxq.idx); 2319 ++ret; 2320 } 2321 return ret; 2322 } 2323 2324 /** 2325 * Verify the external Rx Queue list is empty. 2326 * 2327 * @param dev 2328 * Pointer to Ethernet device. 2329 * 2330 * @return 2331 * The number of object not released. 2332 */ 2333 int 2334 mlx5_ext_rxq_verify(struct rte_eth_dev *dev) 2335 { 2336 struct mlx5_priv *priv = dev->data->dev_private; 2337 struct mlx5_external_rxq *rxq; 2338 uint32_t i; 2339 int ret = 0; 2340 2341 if (priv->ext_rxqs == NULL) 2342 return 0; 2343 2344 for (i = MLX5_EXTERNAL_RX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { 2345 rxq = mlx5_ext_rxq_get(dev, i); 2346 if (rxq->refcnt < 2) 2347 continue; 2348 DRV_LOG(DEBUG, "Port %u external RxQ %u still referenced.", 2349 dev->data->port_id, i); 2350 ++ret; 2351 } 2352 return ret; 2353 } 2354 2355 /** 2356 * Check whether RxQ type is Hairpin. 2357 * 2358 * @param dev 2359 * Pointer to Ethernet device. 2360 * @param idx 2361 * Rx queue index. 2362 * 2363 * @return 2364 * True if Rx queue type is Hairpin, otherwise False. 2365 */ 2366 bool 2367 mlx5_rxq_is_hairpin(struct rte_eth_dev *dev, uint16_t idx) 2368 { 2369 struct mlx5_rxq_ctrl *rxq_ctrl; 2370 2371 if (mlx5_is_external_rxq(dev, idx)) 2372 return false; 2373 rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx); 2374 return (rxq_ctrl != NULL && rxq_ctrl->is_hairpin); 2375 } 2376 2377 /* 2378 * Get a Rx hairpin queue configuration. 2379 * 2380 * @param dev 2381 * Pointer to Ethernet device. 2382 * @param idx 2383 * Rx queue index. 2384 * 2385 * @return 2386 * Pointer to the configuration if a hairpin RX queue, otherwise NULL. 2387 */ 2388 const struct rte_eth_hairpin_conf * 2389 mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx) 2390 { 2391 if (mlx5_rxq_is_hairpin(dev, idx)) { 2392 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2393 2394 return rxq != NULL ? &rxq->hairpin_conf : NULL; 2395 } 2396 return NULL; 2397 } 2398 2399 /** 2400 * Match queues listed in arguments to queues contained in indirection table 2401 * object. 2402 * 2403 * @param ind_tbl 2404 * Pointer to indirection table to match. 2405 * @param queues 2406 * Queues to match to queues in indirection table. 2407 * @param queues_n 2408 * Number of queues in the array. 2409 * 2410 * @return 2411 * 1 if all queues in indirection table match 0 otherwise. 2412 */ 2413 static int 2414 mlx5_ind_table_obj_match_queues(const struct mlx5_ind_table_obj *ind_tbl, 2415 const uint16_t *queues, uint32_t queues_n) 2416 { 2417 return (ind_tbl->queues_n == queues_n) && 2418 (!memcmp(ind_tbl->queues, queues, 2419 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))); 2420 } 2421 2422 /** 2423 * Get an indirection table. 2424 * 2425 * @param dev 2426 * Pointer to Ethernet device. 2427 * @param queues 2428 * Queues entering in the indirection table. 2429 * @param queues_n 2430 * Number of queues in the array. 2431 * 2432 * @return 2433 * An indirection table if found. 2434 */ 2435 struct mlx5_ind_table_obj * 2436 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues, 2437 uint32_t queues_n) 2438 { 2439 struct mlx5_priv *priv = dev->data->dev_private; 2440 struct mlx5_ind_table_obj *ind_tbl; 2441 2442 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2443 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2444 if ((ind_tbl->queues_n == queues_n) && 2445 (memcmp(ind_tbl->queues, queues, 2446 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 2447 == 0)) { 2448 __atomic_fetch_add(&ind_tbl->refcnt, 1, 2449 __ATOMIC_RELAXED); 2450 break; 2451 } 2452 } 2453 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2454 return ind_tbl; 2455 } 2456 2457 /** 2458 * Release an indirection table. 2459 * 2460 * @param dev 2461 * Pointer to Ethernet device. 2462 * @param ind_table 2463 * Indirection table to release. 2464 * @param deref_rxqs 2465 * If true, then dereference RX queues related to indirection table. 2466 * Otherwise, no additional action will be taken. 2467 * 2468 * @return 2469 * 1 while a reference on it exists, 0 when freed. 2470 */ 2471 int 2472 mlx5_ind_table_obj_release(struct rte_eth_dev *dev, 2473 struct mlx5_ind_table_obj *ind_tbl, 2474 bool deref_rxqs) 2475 { 2476 struct mlx5_priv *priv = dev->data->dev_private; 2477 unsigned int ret; 2478 2479 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2480 ret = __atomic_fetch_sub(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED) - 1; 2481 if (!ret) 2482 LIST_REMOVE(ind_tbl, next); 2483 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2484 if (ret) 2485 return 1; 2486 priv->obj_ops.ind_table_destroy(ind_tbl); 2487 if (deref_rxqs) 2488 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2489 mlx5_free(ind_tbl); 2490 return 0; 2491 } 2492 2493 /** 2494 * Verify the Rx Queue list is empty 2495 * 2496 * @param dev 2497 * Pointer to Ethernet device. 2498 * 2499 * @return 2500 * The number of object not released. 2501 */ 2502 int 2503 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev) 2504 { 2505 struct mlx5_priv *priv = dev->data->dev_private; 2506 struct mlx5_ind_table_obj *ind_tbl; 2507 int ret = 0; 2508 2509 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2510 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2511 DRV_LOG(DEBUG, 2512 "port %u indirection table obj %p still referenced", 2513 dev->data->port_id, (void *)ind_tbl); 2514 ++ret; 2515 } 2516 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2517 return ret; 2518 } 2519 2520 /** 2521 * Setup an indirection table structure fields. 2522 * 2523 * @param dev 2524 * Pointer to Ethernet device. 2525 * @param ind_table 2526 * Indirection table to modify. 2527 * @param ref_qs 2528 * Whether to increment RxQ reference counters. 2529 * 2530 * @return 2531 * 0 on success, a negative errno value otherwise and rte_errno is set. 2532 */ 2533 int 2534 mlx5_ind_table_obj_setup(struct rte_eth_dev *dev, 2535 struct mlx5_ind_table_obj *ind_tbl, 2536 bool ref_qs) 2537 { 2538 struct mlx5_priv *priv = dev->data->dev_private; 2539 uint32_t queues_n = ind_tbl->queues_n; 2540 int ret; 2541 const unsigned int n = rte_is_power_of_2(queues_n) ? 2542 log2above(queues_n) : 2543 log2above(priv->sh->dev_cap.ind_table_max_size); 2544 2545 if (ref_qs && mlx5_rxqs_ref(dev, ind_tbl->queues, queues_n) < 0) { 2546 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2547 dev->data->port_id); 2548 return -rte_errno; 2549 } 2550 ret = priv->obj_ops.ind_table_new(dev, n, ind_tbl); 2551 if (ret) { 2552 DRV_LOG(DEBUG, "Port %u cannot create a new indirection table.", 2553 dev->data->port_id); 2554 if (ref_qs) { 2555 int err = rte_errno; 2556 2557 mlx5_rxqs_deref(dev, ind_tbl->queues, queues_n); 2558 rte_errno = err; 2559 } 2560 return ret; 2561 } 2562 __atomic_fetch_add(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED); 2563 return 0; 2564 } 2565 2566 /** 2567 * Create an indirection table. 2568 * 2569 * @param dev 2570 * Pointer to Ethernet device. 2571 * @param queues 2572 * Queues entering in the indirection table. 2573 * @param queues_n 2574 * Number of queues in the array. 2575 * @param standalone 2576 * Indirection table for Standalone queue. 2577 * @param ref_qs 2578 * Whether to increment RxQ reference counters. 2579 * 2580 * @return 2581 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 2582 */ 2583 struct mlx5_ind_table_obj * 2584 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues, 2585 uint32_t queues_n, bool standalone, bool ref_qs) 2586 { 2587 struct mlx5_priv *priv = dev->data->dev_private; 2588 struct mlx5_ind_table_obj *ind_tbl; 2589 int ret; 2590 uint32_t max_queues_n = priv->rxqs_n > queues_n ? priv->rxqs_n : queues_n; 2591 2592 /* 2593 * Allocate maximum queues for shared action as queue number 2594 * maybe modified later. 2595 */ 2596 ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) + 2597 (standalone ? max_queues_n : queues_n) * 2598 sizeof(uint16_t), 0, SOCKET_ID_ANY); 2599 if (!ind_tbl) { 2600 rte_errno = ENOMEM; 2601 return NULL; 2602 } 2603 ind_tbl->queues_n = queues_n; 2604 ind_tbl->queues = (uint16_t *)(ind_tbl + 1); 2605 memcpy(ind_tbl->queues, queues, queues_n * sizeof(*queues)); 2606 ret = mlx5_ind_table_obj_setup(dev, ind_tbl, ref_qs); 2607 if (ret < 0) { 2608 mlx5_free(ind_tbl); 2609 return NULL; 2610 } 2611 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2612 if (!standalone) 2613 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 2614 else 2615 LIST_INSERT_HEAD(&priv->standalone_ind_tbls, ind_tbl, next); 2616 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2617 2618 return ind_tbl; 2619 } 2620 2621 static int 2622 mlx5_ind_table_obj_check_standalone(struct rte_eth_dev *dev __rte_unused, 2623 struct mlx5_ind_table_obj *ind_tbl) 2624 { 2625 uint32_t refcnt; 2626 2627 refcnt = __atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED); 2628 if (refcnt <= 1) 2629 return 0; 2630 /* 2631 * Modification of indirection tables having more than 1 2632 * reference is unsupported. 2633 */ 2634 DRV_LOG(DEBUG, 2635 "Port %u cannot modify indirection table %p (refcnt %u > 1).", 2636 dev->data->port_id, (void *)ind_tbl, refcnt); 2637 rte_errno = EINVAL; 2638 return -rte_errno; 2639 } 2640 2641 /** 2642 * Modify an indirection table. 2643 * 2644 * @param dev 2645 * Pointer to Ethernet device. 2646 * @param ind_table 2647 * Indirection table to modify. 2648 * @param queues 2649 * Queues replacement for the indirection table. 2650 * @param queues_n 2651 * Number of queues in the array. 2652 * @param standalone 2653 * Indirection table for Standalone queue. 2654 * @param ref_new_qs 2655 * Whether to increment new RxQ set reference counters. 2656 * @param deref_old_qs 2657 * Whether to decrement old RxQ set reference counters. 2658 * 2659 * @return 2660 * 0 on success, a negative errno value otherwise and rte_errno is set. 2661 */ 2662 int 2663 mlx5_ind_table_obj_modify(struct rte_eth_dev *dev, 2664 struct mlx5_ind_table_obj *ind_tbl, 2665 uint16_t *queues, const uint32_t queues_n, 2666 bool standalone, bool ref_new_qs, bool deref_old_qs) 2667 { 2668 struct mlx5_priv *priv = dev->data->dev_private; 2669 int ret; 2670 const unsigned int n = rte_is_power_of_2(queues_n) ? 2671 log2above(queues_n) : 2672 log2above(priv->sh->dev_cap.ind_table_max_size); 2673 2674 MLX5_ASSERT(standalone); 2675 RTE_SET_USED(standalone); 2676 if (mlx5_ind_table_obj_check_standalone(dev, ind_tbl) < 0) 2677 return -rte_errno; 2678 if (ref_new_qs && mlx5_rxqs_ref(dev, queues, queues_n) < 0) { 2679 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2680 dev->data->port_id); 2681 return -rte_errno; 2682 } 2683 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2684 ret = priv->obj_ops.ind_table_modify(dev, n, queues, queues_n, ind_tbl); 2685 if (ret) { 2686 DRV_LOG(DEBUG, "Port %u cannot modify indirection table.", 2687 dev->data->port_id); 2688 if (ref_new_qs) { 2689 int err = rte_errno; 2690 2691 mlx5_rxqs_deref(dev, queues, queues_n); 2692 rte_errno = err; 2693 } 2694 return ret; 2695 } 2696 if (deref_old_qs) 2697 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2698 ind_tbl->queues_n = queues_n; 2699 ind_tbl->queues = queues; 2700 return 0; 2701 } 2702 2703 /** 2704 * Attach an indirection table to its queues. 2705 * 2706 * @param dev 2707 * Pointer to Ethernet device. 2708 * @param ind_table 2709 * Indirection table to attach. 2710 * 2711 * @return 2712 * 0 on success, a negative errno value otherwise and rte_errno is set. 2713 */ 2714 int 2715 mlx5_ind_table_obj_attach(struct rte_eth_dev *dev, 2716 struct mlx5_ind_table_obj *ind_tbl) 2717 { 2718 int ret; 2719 2720 ret = mlx5_ind_table_obj_modify(dev, ind_tbl, ind_tbl->queues, 2721 ind_tbl->queues_n, 2722 true /* standalone */, 2723 true /* ref_new_qs */, 2724 false /* deref_old_qs */); 2725 if (ret != 0) 2726 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2727 dev->data->port_id, (void *)ind_tbl); 2728 return ret; 2729 } 2730 2731 /** 2732 * Detach an indirection table from its queues. 2733 * 2734 * @param dev 2735 * Pointer to Ethernet device. 2736 * @param ind_table 2737 * Indirection table to detach. 2738 * 2739 * @return 2740 * 0 on success, a negative errno value otherwise and rte_errno is set. 2741 */ 2742 int 2743 mlx5_ind_table_obj_detach(struct rte_eth_dev *dev, 2744 struct mlx5_ind_table_obj *ind_tbl) 2745 { 2746 struct mlx5_priv *priv = dev->data->dev_private; 2747 const unsigned int n = rte_is_power_of_2(ind_tbl->queues_n) ? 2748 log2above(ind_tbl->queues_n) : 2749 log2above(priv->sh->dev_cap.ind_table_max_size); 2750 unsigned int i; 2751 int ret; 2752 2753 ret = mlx5_ind_table_obj_check_standalone(dev, ind_tbl); 2754 if (ret != 0) 2755 return ret; 2756 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2757 ret = priv->obj_ops.ind_table_modify(dev, n, NULL, 0, ind_tbl); 2758 if (ret != 0) { 2759 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2760 dev->data->port_id, (void *)ind_tbl); 2761 return ret; 2762 } 2763 for (i = 0; i < ind_tbl->queues_n; i++) 2764 mlx5_rxq_release(dev, ind_tbl->queues[i]); 2765 return ret; 2766 } 2767 2768 int 2769 mlx5_hrxq_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry, 2770 void *cb_ctx) 2771 { 2772 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2773 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2774 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2775 2776 return (hrxq->rss_key_len != rss_desc->key_len || 2777 hrxq->symmetric_hash_function != rss_desc->symmetric_hash_function || 2778 memcmp(hrxq->rss_key, rss_desc->key, rss_desc->key_len) || 2779 hrxq->hws_flags != rss_desc->hws_flags || 2780 hrxq->hash_fields != rss_desc->hash_fields || 2781 hrxq->ind_table->queues_n != rss_desc->queue_num || 2782 memcmp(hrxq->ind_table->queues, rss_desc->queue, 2783 rss_desc->queue_num * sizeof(rss_desc->queue[0]))); 2784 } 2785 2786 /** 2787 * Modify an Rx Hash queue configuration. 2788 * 2789 * @param dev 2790 * Pointer to Ethernet device. 2791 * @param hrxq 2792 * Index to Hash Rx queue to modify. 2793 * @param rss_key 2794 * RSS key for the Rx hash queue. 2795 * @param rss_key_len 2796 * RSS key length. 2797 * @param hash_fields 2798 * Verbs protocol hash field to make the RSS on. 2799 * @param queues 2800 * Queues entering in hash queue. In case of empty hash_fields only the 2801 * first queue index will be taken for the indirection table. 2802 * @param queues_n 2803 * Number of queues. 2804 * 2805 * @return 2806 * 0 on success, a negative errno value otherwise and rte_errno is set. 2807 */ 2808 int 2809 mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hrxq_idx, 2810 const uint8_t *rss_key, uint32_t rss_key_len, 2811 uint64_t hash_fields, bool symmetric_hash_function, 2812 const uint16_t *queues, uint32_t queues_n) 2813 { 2814 int err; 2815 struct mlx5_ind_table_obj *ind_tbl = NULL; 2816 struct mlx5_priv *priv = dev->data->dev_private; 2817 struct mlx5_hrxq *hrxq = 2818 mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2819 bool dev_started = !!dev->data->dev_started; 2820 int ret; 2821 2822 if (!hrxq) { 2823 rte_errno = EINVAL; 2824 return -rte_errno; 2825 } 2826 /* validations */ 2827 if (hrxq->rss_key_len != rss_key_len) { 2828 /* rss_key_len is fixed size 40 byte & not supposed to change */ 2829 rte_errno = EINVAL; 2830 return -rte_errno; 2831 } 2832 queues_n = hash_fields ? queues_n : 1; 2833 if (mlx5_ind_table_obj_match_queues(hrxq->ind_table, 2834 queues, queues_n)) { 2835 ind_tbl = hrxq->ind_table; 2836 } else { 2837 if (hrxq->standalone) { 2838 /* 2839 * Replacement of indirection table unsupported for 2840 * standalone hrxq objects (used by shared RSS). 2841 */ 2842 rte_errno = ENOTSUP; 2843 return -rte_errno; 2844 } 2845 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2846 if (!ind_tbl) 2847 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2848 hrxq->standalone, 2849 dev_started); 2850 } 2851 if (!ind_tbl) { 2852 rte_errno = ENOMEM; 2853 return -rte_errno; 2854 } 2855 MLX5_ASSERT(priv->obj_ops.hrxq_modify); 2856 ret = priv->obj_ops.hrxq_modify(dev, hrxq, rss_key, hash_fields, 2857 symmetric_hash_function, ind_tbl); 2858 if (ret) { 2859 rte_errno = errno; 2860 goto error; 2861 } 2862 if (ind_tbl != hrxq->ind_table) { 2863 MLX5_ASSERT(!hrxq->standalone); 2864 mlx5_ind_table_obj_release(dev, hrxq->ind_table, true); 2865 hrxq->ind_table = ind_tbl; 2866 } 2867 hrxq->hash_fields = hash_fields; 2868 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2869 return 0; 2870 error: 2871 err = rte_errno; 2872 if (ind_tbl != hrxq->ind_table) { 2873 MLX5_ASSERT(!hrxq->standalone); 2874 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2875 } 2876 rte_errno = err; 2877 return -rte_errno; 2878 } 2879 2880 static void 2881 __mlx5_hrxq_remove(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 2882 { 2883 struct mlx5_priv *priv = dev->data->dev_private; 2884 2885 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2886 if (hrxq->hws_flags) 2887 mlx5dr_action_destroy(hrxq->action); 2888 else 2889 mlx5_glue->destroy_flow_action(hrxq->action); 2890 #endif 2891 priv->obj_ops.hrxq_destroy(hrxq); 2892 if (!hrxq->standalone) { 2893 mlx5_ind_table_obj_release(dev, hrxq->ind_table, 2894 hrxq->hws_flags ? 2895 (!!dev->data->dev_started) : true); 2896 } 2897 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 2898 } 2899 2900 /** 2901 * Release the hash Rx queue. 2902 * 2903 * @param dev 2904 * Pointer to Ethernet device. 2905 * @param hrxq 2906 * Index to Hash Rx queue to release. 2907 * 2908 * @param list 2909 * mlx5 list pointer. 2910 * @param entry 2911 * Hash queue entry pointer. 2912 */ 2913 void 2914 mlx5_hrxq_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry) 2915 { 2916 struct rte_eth_dev *dev = tool_ctx; 2917 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2918 2919 __mlx5_hrxq_remove(dev, hrxq); 2920 } 2921 2922 static struct mlx5_hrxq * 2923 __mlx5_hrxq_create(struct rte_eth_dev *dev, 2924 struct mlx5_flow_rss_desc *rss_desc) 2925 { 2926 struct mlx5_priv *priv = dev->data->dev_private; 2927 const uint8_t *rss_key = rss_desc->key; 2928 uint32_t rss_key_len = rss_desc->key_len; 2929 bool standalone = !!rss_desc->shared_rss; 2930 const uint16_t *queues = 2931 standalone ? rss_desc->const_q : rss_desc->queue; 2932 uint32_t queues_n = rss_desc->queue_num; 2933 struct mlx5_hrxq *hrxq = NULL; 2934 uint32_t hrxq_idx = 0; 2935 struct mlx5_ind_table_obj *ind_tbl = rss_desc->ind_tbl; 2936 int ret; 2937 2938 queues_n = rss_desc->hash_fields ? queues_n : 1; 2939 if (!ind_tbl && !rss_desc->hws_flags) 2940 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2941 if (!ind_tbl) 2942 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2943 standalone || 2944 rss_desc->hws_flags, 2945 !!dev->data->dev_started); 2946 if (!ind_tbl) 2947 return NULL; 2948 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 2949 if (!hrxq) 2950 goto error; 2951 hrxq->standalone = standalone; 2952 hrxq->idx = hrxq_idx; 2953 hrxq->ind_table = ind_tbl; 2954 hrxq->rss_key_len = rss_key_len; 2955 hrxq->hash_fields = rss_desc->hash_fields; 2956 hrxq->hws_flags = rss_desc->hws_flags; 2957 hrxq->symmetric_hash_function = rss_desc->symmetric_hash_function; 2958 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2959 ret = priv->obj_ops.hrxq_new(dev, hrxq, rss_desc->tunnel); 2960 if (ret < 0) 2961 goto error; 2962 return hrxq; 2963 error: 2964 if (!rss_desc->ind_tbl) 2965 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2966 if (hrxq) 2967 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2968 return NULL; 2969 } 2970 2971 struct mlx5_list_entry * 2972 mlx5_hrxq_create_cb(void *tool_ctx, void *cb_ctx) 2973 { 2974 struct rte_eth_dev *dev = tool_ctx; 2975 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2976 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2977 struct mlx5_hrxq *hrxq; 2978 2979 hrxq = __mlx5_hrxq_create(dev, rss_desc); 2980 return hrxq ? &hrxq->entry : NULL; 2981 } 2982 2983 struct mlx5_list_entry * 2984 mlx5_hrxq_clone_cb(void *tool_ctx, struct mlx5_list_entry *entry, 2985 void *cb_ctx __rte_unused) 2986 { 2987 struct rte_eth_dev *dev = tool_ctx; 2988 struct mlx5_priv *priv = dev->data->dev_private; 2989 struct mlx5_hrxq *hrxq; 2990 uint32_t hrxq_idx = 0; 2991 2992 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 2993 if (!hrxq) 2994 return NULL; 2995 memcpy(hrxq, entry, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN); 2996 hrxq->idx = hrxq_idx; 2997 return &hrxq->entry; 2998 } 2999 3000 void 3001 mlx5_hrxq_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry) 3002 { 3003 struct rte_eth_dev *dev = tool_ctx; 3004 struct mlx5_priv *priv = dev->data->dev_private; 3005 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 3006 3007 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 3008 } 3009 3010 /** 3011 * Get an Rx Hash queue. 3012 * 3013 * @param dev 3014 * Pointer to Ethernet device. 3015 * @param rss_desc 3016 * RSS configuration for the Rx hash queue. 3017 * 3018 * @return 3019 * An hash Rx queue on success. 3020 */ 3021 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, 3022 struct mlx5_flow_rss_desc *rss_desc) 3023 { 3024 struct mlx5_priv *priv = dev->data->dev_private; 3025 struct mlx5_hrxq *hrxq = NULL; 3026 struct mlx5_list_entry *entry; 3027 struct mlx5_flow_cb_ctx ctx = { 3028 .data = rss_desc, 3029 }; 3030 3031 if (rss_desc->shared_rss) { 3032 hrxq = __mlx5_hrxq_create(dev, rss_desc); 3033 } else { 3034 entry = mlx5_list_register(priv->hrxqs, &ctx); 3035 if (!entry) 3036 return NULL; 3037 hrxq = container_of(entry, typeof(*hrxq), entry); 3038 } 3039 return hrxq; 3040 } 3041 3042 /** 3043 * Release the hash Rx queue. 3044 * 3045 * @param dev 3046 * Pointer to Ethernet device. 3047 * @param hrxq_idx 3048 * Hash Rx queue to release. 3049 * 3050 * @return 3051 * 1 while a reference on it exists, 0 when freed. 3052 */ 3053 int mlx5_hrxq_obj_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 3054 { 3055 struct mlx5_priv *priv = dev->data->dev_private; 3056 3057 if (!hrxq) 3058 return 0; 3059 if (!hrxq->standalone) 3060 return mlx5_list_unregister(priv->hrxqs, &hrxq->entry); 3061 __mlx5_hrxq_remove(dev, hrxq); 3062 return 0; 3063 } 3064 3065 /** 3066 * Release the hash Rx queue with index. 3067 * 3068 * @param dev 3069 * Pointer to Ethernet device. 3070 * @param hrxq_idx 3071 * Index to Hash Rx queue to release. 3072 * 3073 * @return 3074 * 1 while a reference on it exists, 0 when freed. 3075 */ 3076 int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx) 3077 { 3078 struct mlx5_priv *priv = dev->data->dev_private; 3079 struct mlx5_hrxq *hrxq; 3080 3081 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 3082 return mlx5_hrxq_obj_release(dev, hrxq); 3083 } 3084 3085 /** 3086 * Create a drop Rx Hash queue. 3087 * 3088 * @param dev 3089 * Pointer to Ethernet device. 3090 * 3091 * @return 3092 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 3093 */ 3094 struct mlx5_hrxq * 3095 mlx5_drop_action_create(struct rte_eth_dev *dev) 3096 { 3097 struct mlx5_priv *priv = dev->data->dev_private; 3098 struct mlx5_hrxq *hrxq = NULL; 3099 int ret; 3100 3101 if (priv->drop_queue.hrxq) 3102 return priv->drop_queue.hrxq; 3103 hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 3104 if (!hrxq) { 3105 DRV_LOG(WARNING, 3106 "Port %u cannot allocate memory for drop queue.", 3107 dev->data->port_id); 3108 rte_errno = ENOMEM; 3109 goto error; 3110 } 3111 priv->drop_queue.hrxq = hrxq; 3112 hrxq->ind_table = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq->ind_table), 3113 0, SOCKET_ID_ANY); 3114 if (!hrxq->ind_table) { 3115 rte_errno = ENOMEM; 3116 goto error; 3117 } 3118 ret = priv->obj_ops.drop_action_create(dev); 3119 if (ret < 0) 3120 goto error; 3121 return hrxq; 3122 error: 3123 if (hrxq) { 3124 if (hrxq->ind_table) 3125 mlx5_free(hrxq->ind_table); 3126 priv->drop_queue.hrxq = NULL; 3127 mlx5_free(hrxq); 3128 } 3129 return NULL; 3130 } 3131 3132 /** 3133 * Release a drop hash Rx queue. 3134 * 3135 * @param dev 3136 * Pointer to Ethernet device. 3137 */ 3138 void 3139 mlx5_drop_action_destroy(struct rte_eth_dev *dev) 3140 { 3141 struct mlx5_priv *priv = dev->data->dev_private; 3142 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 3143 3144 if (!priv->drop_queue.hrxq) 3145 return; 3146 priv->obj_ops.drop_action_destroy(dev); 3147 mlx5_free(priv->drop_queue.rxq); 3148 mlx5_free(hrxq->ind_table); 3149 mlx5_free(hrxq); 3150 priv->drop_queue.rxq = NULL; 3151 priv->drop_queue.hrxq = NULL; 3152 } 3153 3154 /** 3155 * Verify the Rx Queue list is empty 3156 * 3157 * @param dev 3158 * Pointer to Ethernet device. 3159 * 3160 * @return 3161 * The number of object not released. 3162 */ 3163 uint32_t 3164 mlx5_hrxq_verify(struct rte_eth_dev *dev) 3165 { 3166 struct mlx5_priv *priv = dev->data->dev_private; 3167 3168 return mlx5_list_get_entry_num(priv->hrxqs); 3169 } 3170 3171 /** 3172 * Set the Rx queue timestamp conversion parameters 3173 * 3174 * @param[in] dev 3175 * Pointer to the Ethernet device structure. 3176 */ 3177 void 3178 mlx5_rxq_timestamp_set(struct rte_eth_dev *dev) 3179 { 3180 struct mlx5_priv *priv = dev->data->dev_private; 3181 struct mlx5_dev_ctx_shared *sh = priv->sh; 3182 unsigned int i; 3183 3184 for (i = 0; i != priv->rxqs_n; ++i) { 3185 struct mlx5_rxq_data *data = mlx5_rxq_data_get(dev, i); 3186 3187 if (data == NULL) 3188 continue; 3189 data->sh = sh; 3190 data->rt_timestamp = sh->dev_cap.rt_timestamp; 3191 } 3192 } 3193 3194 /** 3195 * Validate given external RxQ rte_plow index, and get pointer to concurrent 3196 * external RxQ object to map/unmap. 3197 * 3198 * @param[in] port_id 3199 * The port identifier of the Ethernet device. 3200 * @param[in] dpdk_idx 3201 * Queue index in rte_flow. 3202 * 3203 * @return 3204 * Pointer to concurrent external RxQ on success, 3205 * NULL otherwise and rte_errno is set. 3206 */ 3207 static struct mlx5_external_rxq * 3208 mlx5_external_rx_queue_get_validate(uint16_t port_id, uint16_t dpdk_idx) 3209 { 3210 struct rte_eth_dev *dev; 3211 struct mlx5_priv *priv; 3212 3213 if (dpdk_idx < MLX5_EXTERNAL_RX_QUEUE_ID_MIN) { 3214 DRV_LOG(ERR, "Queue index %u should be in range: [%u, %u].", 3215 dpdk_idx, MLX5_EXTERNAL_RX_QUEUE_ID_MIN, UINT16_MAX); 3216 rte_errno = EINVAL; 3217 return NULL; 3218 } 3219 if (rte_eth_dev_is_valid_port(port_id) < 0) { 3220 DRV_LOG(ERR, "There is no Ethernet device for port %u.", 3221 port_id); 3222 rte_errno = ENODEV; 3223 return NULL; 3224 } 3225 dev = &rte_eth_devices[port_id]; 3226 priv = dev->data->dev_private; 3227 if (!mlx5_imported_pd_and_ctx(priv->sh->cdev)) { 3228 DRV_LOG(ERR, "Port %u " 3229 "external RxQ isn't supported on local PD and CTX.", 3230 port_id); 3231 rte_errno = ENOTSUP; 3232 return NULL; 3233 } 3234 if (!mlx5_devx_obj_ops_en(priv->sh)) { 3235 DRV_LOG(ERR, 3236 "Port %u external RxQ isn't supported by Verbs API.", 3237 port_id); 3238 rte_errno = ENOTSUP; 3239 return NULL; 3240 } 3241 /* 3242 * When user configures remote PD and CTX and device creates RxQ by 3243 * DevX, external RxQs array is allocated. 3244 */ 3245 MLX5_ASSERT(priv->ext_rxqs != NULL); 3246 return &priv->ext_rxqs[dpdk_idx - MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 3247 } 3248 3249 int 3250 rte_pmd_mlx5_external_rx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx, 3251 uint32_t hw_idx) 3252 { 3253 struct mlx5_external_rxq *ext_rxq; 3254 uint32_t unmapped = 0; 3255 3256 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3257 if (ext_rxq == NULL) 3258 return -rte_errno; 3259 if (!__atomic_compare_exchange_n(&ext_rxq->refcnt, &unmapped, 1, false, 3260 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 3261 if (ext_rxq->hw_id != hw_idx) { 3262 DRV_LOG(ERR, "Port %u external RxQ index %u " 3263 "is already mapped to HW index (requesting is " 3264 "%u, existing is %u).", 3265 port_id, dpdk_idx, hw_idx, ext_rxq->hw_id); 3266 rte_errno = EEXIST; 3267 return -rte_errno; 3268 } 3269 DRV_LOG(WARNING, "Port %u external RxQ index %u " 3270 "is already mapped to the requested HW index (%u)", 3271 port_id, dpdk_idx, hw_idx); 3272 3273 } else { 3274 ext_rxq->hw_id = hw_idx; 3275 DRV_LOG(DEBUG, "Port %u external RxQ index %u " 3276 "is successfully mapped to the requested HW index (%u)", 3277 port_id, dpdk_idx, hw_idx); 3278 } 3279 return 0; 3280 } 3281 3282 int 3283 rte_pmd_mlx5_external_rx_queue_id_unmap(uint16_t port_id, uint16_t dpdk_idx) 3284 { 3285 struct mlx5_external_rxq *ext_rxq; 3286 uint32_t mapped = 1; 3287 3288 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3289 if (ext_rxq == NULL) 3290 return -rte_errno; 3291 if (ext_rxq->refcnt > 1) { 3292 DRV_LOG(ERR, "Port %u external RxQ index %u still referenced.", 3293 port_id, dpdk_idx); 3294 rte_errno = EINVAL; 3295 return -rte_errno; 3296 } 3297 if (!__atomic_compare_exchange_n(&ext_rxq->refcnt, &mapped, 0, false, 3298 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 3299 DRV_LOG(ERR, "Port %u external RxQ index %u doesn't exist.", 3300 port_id, dpdk_idx); 3301 rte_errno = EINVAL; 3302 return -rte_errno; 3303 } 3304 DRV_LOG(DEBUG, 3305 "Port %u external RxQ index %u is successfully unmapped.", 3306 port_id, dpdk_idx); 3307 return 0; 3308 } 3309