1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <fcntl.h> 11 #include <sys/queue.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <rte_common.h> 17 #include <rte_interrupts.h> 18 #include <rte_debug.h> 19 #include <rte_io.h> 20 #include <rte_eal_paging.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_malloc.h> 24 #include <mlx5_common.h> 25 #include <mlx5_common_mr.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_rx.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_autoconf.h" 32 #include "mlx5_devx.h" 33 #include "rte_pmd_mlx5.h" 34 35 36 /* Default RSS hash key also used for ConnectX-3. */ 37 uint8_t rss_hash_default_key[] = { 38 0x2c, 0xc6, 0x81, 0xd1, 39 0x5b, 0xdb, 0xf4, 0xf7, 40 0xfc, 0xa2, 0x83, 0x19, 41 0xdb, 0x1a, 0x3e, 0x94, 42 0x6b, 0x9e, 0x38, 0xd9, 43 0x2c, 0x9c, 0x03, 0xd1, 44 0xad, 0x99, 0x44, 0xa7, 45 0xd9, 0x56, 0x3d, 0x59, 46 0x06, 0x3c, 0x25, 0xf3, 47 0xfc, 0x1f, 0xdc, 0x2a, 48 }; 49 50 /* Length of the default RSS hash key. */ 51 static_assert(MLX5_RSS_HASH_KEY_LEN == 52 (unsigned int)sizeof(rss_hash_default_key), 53 "wrong RSS default key size."); 54 55 /** 56 * Calculate the number of CQEs in CQ for the Rx queue. 57 * 58 * @param rxq_data 59 * Pointer to receive queue structure. 60 * 61 * @return 62 * Number of CQEs in CQ. 63 */ 64 unsigned int 65 mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data) 66 { 67 unsigned int cqe_n; 68 unsigned int wqe_n = 1 << rxq_data->elts_n; 69 70 if (mlx5_rxq_mprq_enabled(rxq_data)) 71 cqe_n = wqe_n * RTE_BIT32(rxq_data->log_strd_num) - 1; 72 else 73 cqe_n = wqe_n - 1; 74 return cqe_n; 75 } 76 77 /** 78 * Allocate RX queue elements for Multi-Packet RQ. 79 * 80 * @param rxq_ctrl 81 * Pointer to RX queue structure. 82 * 83 * @return 84 * 0 on success, a negative errno value otherwise and rte_errno is set. 85 */ 86 static int 87 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 88 { 89 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 90 unsigned int wqe_n = 1 << rxq->elts_n; 91 unsigned int i; 92 int err; 93 94 /* Iterate on segments. */ 95 for (i = 0; i <= wqe_n; ++i) { 96 struct mlx5_mprq_buf *buf; 97 98 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 99 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 100 rte_errno = ENOMEM; 101 goto error; 102 } 103 if (i < wqe_n) 104 (*rxq->mprq_bufs)[i] = buf; 105 else 106 rxq->mprq_repl = buf; 107 } 108 DRV_LOG(DEBUG, 109 "port %u MPRQ queue %u allocated and configured %u segments", 110 rxq->port_id, rxq->idx, wqe_n); 111 return 0; 112 error: 113 err = rte_errno; /* Save rte_errno before cleanup. */ 114 wqe_n = i; 115 for (i = 0; (i != wqe_n); ++i) { 116 if ((*rxq->mprq_bufs)[i] != NULL) 117 rte_mempool_put(rxq->mprq_mp, 118 (*rxq->mprq_bufs)[i]); 119 (*rxq->mprq_bufs)[i] = NULL; 120 } 121 DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything", 122 rxq->port_id, rxq->idx); 123 rte_errno = err; /* Restore rte_errno. */ 124 return -rte_errno; 125 } 126 127 /** 128 * Allocate RX queue elements for Single-Packet RQ. 129 * 130 * @param rxq_ctrl 131 * Pointer to RX queue structure. 132 * 133 * @return 134 * 0 on success, negative errno value on failure. 135 */ 136 static int 137 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 138 { 139 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 140 unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 141 RTE_BIT32(rxq_ctrl->rxq.elts_n) * 142 RTE_BIT32(rxq_ctrl->rxq.log_strd_num) : 143 RTE_BIT32(rxq_ctrl->rxq.elts_n); 144 bool has_vec_support = mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0; 145 unsigned int i; 146 int err; 147 148 /* Iterate on segments. */ 149 for (i = 0; (i != elts_n); ++i) { 150 struct mlx5_eth_rxseg *seg = &rxq_ctrl->rxq.rxseg[i % sges_n]; 151 struct rte_mbuf *buf; 152 153 buf = rte_pktmbuf_alloc(seg->mp); 154 if (buf == NULL) { 155 if (rxq_ctrl->share_group == 0) 156 DRV_LOG(ERR, "port %u queue %u empty mbuf pool", 157 RXQ_PORT_ID(rxq_ctrl), 158 rxq_ctrl->rxq.idx); 159 else 160 DRV_LOG(ERR, "share group %u queue %u empty mbuf pool", 161 rxq_ctrl->share_group, 162 rxq_ctrl->share_qid); 163 rte_errno = ENOMEM; 164 goto error; 165 } 166 /* Only vectored Rx routines rely on headroom size. */ 167 MLX5_ASSERT(!has_vec_support || 168 DATA_OFF(buf) >= RTE_PKTMBUF_HEADROOM); 169 /* Buffer is supposed to be empty. */ 170 MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0); 171 MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0); 172 MLX5_ASSERT(!buf->next); 173 SET_DATA_OFF(buf, seg->offset); 174 PORT(buf) = rxq_ctrl->rxq.port_id; 175 DATA_LEN(buf) = seg->length; 176 PKT_LEN(buf) = seg->length; 177 NB_SEGS(buf) = 1; 178 (*rxq_ctrl->rxq.elts)[i] = buf; 179 } 180 /* If Rx vector is activated. */ 181 if (has_vec_support) { 182 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 183 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 184 struct rte_pktmbuf_pool_private *priv = 185 (struct rte_pktmbuf_pool_private *) 186 rte_mempool_get_priv(rxq_ctrl->rxq.mp); 187 int j; 188 189 /* Initialize default rearm_data for vPMD. */ 190 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 191 rte_mbuf_refcnt_set(mbuf_init, 1); 192 mbuf_init->nb_segs = 1; 193 /* For shared queues port is provided in CQE */ 194 mbuf_init->port = rxq->shared ? 0 : rxq->port_id; 195 if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) 196 mbuf_init->ol_flags = RTE_MBUF_F_EXTERNAL; 197 /* 198 * prevent compiler reordering: 199 * rearm_data covers previous fields. 200 */ 201 rte_compiler_barrier(); 202 rxq->mbuf_initializer = 203 *(rte_xmm_t *)&mbuf_init->rearm_data; 204 /* Padding with a fake mbuf for vectorized Rx. */ 205 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 206 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 207 } 208 if (rxq_ctrl->share_group == 0) 209 DRV_LOG(DEBUG, 210 "port %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 211 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx, elts_n, 212 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 213 else 214 DRV_LOG(DEBUG, 215 "share group %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 216 rxq_ctrl->share_group, rxq_ctrl->share_qid, elts_n, 217 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 218 return 0; 219 error: 220 err = rte_errno; /* Save rte_errno before cleanup. */ 221 elts_n = i; 222 for (i = 0; (i != elts_n); ++i) { 223 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 224 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 225 (*rxq_ctrl->rxq.elts)[i] = NULL; 226 } 227 if (rxq_ctrl->share_group == 0) 228 DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything", 229 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx); 230 else 231 DRV_LOG(DEBUG, "share group %u SPRQ queue %u failed, freed everything", 232 rxq_ctrl->share_group, rxq_ctrl->share_qid); 233 rte_errno = err; /* Restore rte_errno. */ 234 return -rte_errno; 235 } 236 237 /** 238 * Allocate RX queue elements. 239 * 240 * @param rxq_ctrl 241 * Pointer to RX queue structure. 242 * 243 * @return 244 * 0 on success, negative errno value on failure. 245 */ 246 int 247 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 248 { 249 int ret = 0; 250 251 /** 252 * For MPRQ we need to allocate both MPRQ buffers 253 * for WQEs and simple mbufs for vector processing. 254 */ 255 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 256 ret = rxq_alloc_elts_mprq(rxq_ctrl); 257 if (ret == 0) 258 ret = rxq_alloc_elts_sprq(rxq_ctrl); 259 return ret; 260 } 261 262 /** 263 * Free RX queue elements for Multi-Packet RQ. 264 * 265 * @param rxq_ctrl 266 * Pointer to RX queue structure. 267 */ 268 static void 269 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 270 { 271 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 272 uint16_t i; 273 274 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs", 275 rxq->port_id, rxq->idx, (1u << rxq->elts_n)); 276 if (rxq->mprq_bufs == NULL) 277 return; 278 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 279 if ((*rxq->mprq_bufs)[i] != NULL) 280 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 281 (*rxq->mprq_bufs)[i] = NULL; 282 } 283 if (rxq->mprq_repl != NULL) { 284 mlx5_mprq_buf_free(rxq->mprq_repl); 285 rxq->mprq_repl = NULL; 286 } 287 } 288 289 /** 290 * Free RX queue elements for Single-Packet RQ. 291 * 292 * @param rxq_ctrl 293 * Pointer to RX queue structure. 294 */ 295 static void 296 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 297 { 298 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 299 const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 300 RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : 301 RTE_BIT32(rxq->elts_n); 302 const uint16_t q_mask = q_n - 1; 303 uint16_t elts_ci = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 304 rxq->elts_ci : rxq->rq_ci; 305 uint16_t used = q_n - (elts_ci - rxq->rq_pi); 306 uint16_t i; 307 308 if (rxq_ctrl->share_group == 0) 309 DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs", 310 RXQ_PORT_ID(rxq_ctrl), rxq->idx, q_n); 311 else 312 DRV_LOG(DEBUG, "share group %u Rx queue %u freeing %d WRs", 313 rxq_ctrl->share_group, rxq_ctrl->share_qid, q_n); 314 if (rxq->elts == NULL) 315 return; 316 /** 317 * Some mbuf in the Ring belongs to the application. 318 * They cannot be freed. 319 */ 320 if (mlx5_rxq_check_vec_support(rxq) > 0) { 321 for (i = 0; i < used; ++i) 322 (*rxq->elts)[(elts_ci + i) & q_mask] = NULL; 323 rxq->rq_pi = elts_ci; 324 } 325 for (i = 0; i != q_n; ++i) { 326 if ((*rxq->elts)[i] != NULL) 327 rte_pktmbuf_free_seg((*rxq->elts)[i]); 328 (*rxq->elts)[i] = NULL; 329 } 330 } 331 332 /** 333 * Free RX queue elements. 334 * 335 * @param rxq_ctrl 336 * Pointer to RX queue structure. 337 */ 338 static void 339 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 340 { 341 /* 342 * For MPRQ we need to allocate both MPRQ buffers 343 * for WQEs and simple mbufs for vector processing. 344 */ 345 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 346 rxq_free_elts_mprq(rxq_ctrl); 347 rxq_free_elts_sprq(rxq_ctrl); 348 } 349 350 /** 351 * Returns the per-queue supported offloads. 352 * 353 * @param dev 354 * Pointer to Ethernet device. 355 * 356 * @return 357 * Supported Rx offloads. 358 */ 359 uint64_t 360 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 361 { 362 struct mlx5_priv *priv = dev->data->dev_private; 363 uint64_t offloads = (RTE_ETH_RX_OFFLOAD_SCATTER | 364 RTE_ETH_RX_OFFLOAD_TIMESTAMP | 365 RTE_ETH_RX_OFFLOAD_RSS_HASH); 366 367 if (!priv->config.mprq.enabled) 368 offloads |= RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT; 369 if (priv->sh->config.hw_fcs_strip) 370 offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC; 371 if (priv->sh->dev_cap.hw_csum) 372 offloads |= (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | 373 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | 374 RTE_ETH_RX_OFFLOAD_TCP_CKSUM); 375 if (priv->sh->dev_cap.hw_vlan_strip) 376 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP; 377 if (priv->sh->config.lro_allowed) 378 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO; 379 return offloads; 380 } 381 382 383 /** 384 * Returns the per-port supported offloads. 385 * 386 * @return 387 * Supported Rx offloads. 388 */ 389 uint64_t 390 mlx5_get_rx_port_offloads(void) 391 { 392 uint64_t offloads = RTE_ETH_RX_OFFLOAD_VLAN_FILTER; 393 394 return offloads; 395 } 396 397 /** 398 * Verify if the queue can be released. 399 * 400 * @param dev 401 * Pointer to Ethernet device. 402 * @param idx 403 * RX queue index. 404 * 405 * @return 406 * 1 if the queue can be released 407 * 0 if the queue can not be released, there are references to it. 408 * Negative errno and rte_errno is set if queue doesn't exist. 409 */ 410 static int 411 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 412 { 413 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 414 415 if (rxq == NULL) { 416 rte_errno = EINVAL; 417 return -rte_errno; 418 } 419 return (__atomic_load_n(&rxq->refcnt, __ATOMIC_RELAXED) == 1); 420 } 421 422 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 423 static void 424 rxq_sync_cq(struct mlx5_rxq_data *rxq) 425 { 426 const uint16_t cqe_n = 1 << rxq->cqe_n; 427 const uint16_t cqe_mask = cqe_n - 1; 428 volatile struct mlx5_cqe *cqe; 429 int ret, i; 430 431 i = cqe_n; 432 do { 433 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 434 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 435 if (ret == MLX5_CQE_STATUS_HW_OWN) 436 break; 437 if (ret == MLX5_CQE_STATUS_ERR) { 438 rxq->cq_ci++; 439 continue; 440 } 441 MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN); 442 if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) { 443 rxq->cq_ci++; 444 continue; 445 } 446 /* Compute the next non compressed CQE. */ 447 rxq->cq_ci += rxq->cqe_comp_layout ? 448 (MLX5_CQE_NUM_MINIS(cqe->op_own) + 1U) : 449 rte_be_to_cpu_32(cqe->byte_cnt); 450 451 } while (--i); 452 /* Move all CQEs to HW ownership, including possible MiniCQEs. */ 453 for (i = 0; i < cqe_n; i++) { 454 cqe = &(*rxq->cqes)[i]; 455 cqe->validity_iteration_count = MLX5_CQE_VIC_INIT; 456 cqe->op_own = MLX5_CQE_INVALIDATE; 457 } 458 /* Resync CQE and WQE (WQ in RESET state). */ 459 rte_io_wmb(); 460 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 461 rte_io_wmb(); 462 *rxq->rq_db = rte_cpu_to_be_32(0); 463 rte_io_wmb(); 464 } 465 466 /** 467 * Rx queue stop. Device queue goes to the RESET state, 468 * all involved mbufs are freed from WQ. 469 * 470 * @param dev 471 * Pointer to Ethernet device structure. 472 * @param idx 473 * RX queue index. 474 * 475 * @return 476 * 0 on success, a negative errno value otherwise and rte_errno is set. 477 */ 478 int 479 mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 480 { 481 struct mlx5_priv *priv = dev->data->dev_private; 482 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 483 struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl; 484 int ret; 485 486 MLX5_ASSERT(rxq != NULL && rxq_ctrl != NULL); 487 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 488 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RDY2RST); 489 if (ret) { 490 DRV_LOG(ERR, "Cannot change Rx WQ state to RESET: %s", 491 strerror(errno)); 492 rte_errno = errno; 493 return ret; 494 } 495 /* Remove all processes CQEs. */ 496 rxq_sync_cq(&rxq_ctrl->rxq); 497 /* Free all involved mbufs. */ 498 rxq_free_elts(rxq_ctrl); 499 /* Set the actual queue state. */ 500 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 501 return 0; 502 } 503 504 /** 505 * Rx queue stop. Device queue goes to the RESET state, 506 * all involved mbufs are freed from WQ. 507 * 508 * @param dev 509 * Pointer to Ethernet device structure. 510 * @param idx 511 * RX queue index. 512 * 513 * @return 514 * 0 on success, a negative errno value otherwise and rte_errno is set. 515 */ 516 int 517 mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 518 { 519 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 520 int ret; 521 522 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 523 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 524 rte_errno = EINVAL; 525 return -EINVAL; 526 } 527 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 528 return 0; 529 /* 530 * Vectorized Rx burst requires the CQ and RQ indices 531 * synchronized, that might be broken on RQ restart 532 * and cause Rx malfunction, so queue stopping is 533 * not supported if vectorized Rx burst is engaged. 534 * The routine pointer depends on the process type, 535 * should perform check there. MPRQ is not supported as well. 536 */ 537 if (pkt_burst != mlx5_rx_burst) { 538 DRV_LOG(ERR, "Rx queue stop is only supported " 539 "for non-vectorized single-packet Rx"); 540 rte_errno = EINVAL; 541 return -EINVAL; 542 } 543 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 544 ret = mlx5_mp_os_req_queue_control(dev, idx, 545 MLX5_MP_REQ_QUEUE_RX_STOP); 546 } else { 547 ret = mlx5_rx_queue_stop_primary(dev, idx); 548 } 549 return ret; 550 } 551 552 /** 553 * Rx queue start. Device queue goes to the ready state, 554 * all required mbufs are allocated and WQ is replenished. 555 * 556 * @param dev 557 * Pointer to Ethernet device structure. 558 * @param idx 559 * RX queue index. 560 * 561 * @return 562 * 0 on success, a negative errno value otherwise and rte_errno is set. 563 */ 564 int 565 mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 566 { 567 struct mlx5_priv *priv = dev->data->dev_private; 568 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 569 struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq; 570 int ret; 571 572 MLX5_ASSERT(rxq != NULL && rxq->ctrl != NULL); 573 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 574 /* Allocate needed buffers. */ 575 ret = rxq_alloc_elts(rxq->ctrl); 576 if (ret) { 577 DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ"); 578 rte_errno = errno; 579 return ret; 580 } 581 rte_io_wmb(); 582 *rxq_data->cq_db = rte_cpu_to_be_32(rxq_data->cq_ci); 583 rte_io_wmb(); 584 /* Reset RQ consumer before moving queue to READY state. */ 585 *rxq_data->rq_db = rte_cpu_to_be_32(0); 586 rte_io_wmb(); 587 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RST2RDY); 588 if (ret) { 589 DRV_LOG(ERR, "Cannot change Rx WQ state to READY: %s", 590 strerror(errno)); 591 rte_errno = errno; 592 return ret; 593 } 594 /* Reinitialize RQ - set WQEs. */ 595 mlx5_rxq_initialize(rxq_data); 596 rxq_data->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 597 /* Set actual queue state. */ 598 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 599 return 0; 600 } 601 602 /** 603 * Rx queue start. Device queue goes to the ready state, 604 * all required mbufs are allocated and WQ is replenished. 605 * 606 * @param dev 607 * Pointer to Ethernet device structure. 608 * @param idx 609 * RX queue index. 610 * 611 * @return 612 * 0 on success, a negative errno value otherwise and rte_errno is set. 613 */ 614 int 615 mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 616 { 617 int ret; 618 619 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 620 DRV_LOG(ERR, "Hairpin queue can't be started"); 621 rte_errno = EINVAL; 622 return -EINVAL; 623 } 624 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 625 return 0; 626 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 627 ret = mlx5_mp_os_req_queue_control(dev, idx, 628 MLX5_MP_REQ_QUEUE_RX_START); 629 } else { 630 ret = mlx5_rx_queue_start_primary(dev, idx); 631 } 632 return ret; 633 } 634 635 /** 636 * Rx queue presetup checks. 637 * 638 * @param dev 639 * Pointer to Ethernet device structure. 640 * @param idx 641 * RX queue index. 642 * @param desc 643 * Number of descriptors to configure in queue. 644 * @param[out] rxq_ctrl 645 * Address of pointer to shared Rx queue control. 646 * 647 * @return 648 * 0 on success, a negative errno value otherwise and rte_errno is set. 649 */ 650 static int 651 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc, 652 struct mlx5_rxq_ctrl **rxq_ctrl) 653 { 654 struct mlx5_priv *priv = dev->data->dev_private; 655 struct mlx5_rxq_priv *rxq; 656 bool empty; 657 658 if (!rte_is_power_of_2(*desc)) { 659 *desc = 1 << log2above(*desc); 660 DRV_LOG(WARNING, 661 "port %u increased number of descriptors in Rx queue %u" 662 " to the next power of two (%d)", 663 dev->data->port_id, idx, *desc); 664 } 665 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 666 dev->data->port_id, idx, *desc); 667 if (idx >= priv->rxqs_n) { 668 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 669 dev->data->port_id, idx, priv->rxqs_n); 670 rte_errno = EOVERFLOW; 671 return -rte_errno; 672 } 673 if (rxq_ctrl == NULL || *rxq_ctrl == NULL) 674 return 0; 675 if (!(*rxq_ctrl)->rxq.shared) { 676 if (!mlx5_rxq_releasable(dev, idx)) { 677 DRV_LOG(ERR, "port %u unable to release queue index %u", 678 dev->data->port_id, idx); 679 rte_errno = EBUSY; 680 return -rte_errno; 681 } 682 mlx5_rxq_release(dev, idx); 683 } else { 684 if ((*rxq_ctrl)->obj != NULL) 685 /* Some port using shared Rx queue has been started. */ 686 return 0; 687 /* Release all owner RxQ to reconfigure Shared RxQ. */ 688 do { 689 rxq = LIST_FIRST(&(*rxq_ctrl)->owners); 690 LIST_REMOVE(rxq, owner_entry); 691 empty = LIST_EMPTY(&(*rxq_ctrl)->owners); 692 mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx); 693 } while (!empty); 694 *rxq_ctrl = NULL; 695 } 696 return 0; 697 } 698 699 /** 700 * Get the shared Rx queue object that matches group and queue index. 701 * 702 * @param dev 703 * Pointer to Ethernet device structure. 704 * @param group 705 * Shared RXQ group. 706 * @param share_qid 707 * Shared RX queue index. 708 * 709 * @return 710 * Shared RXQ object that matching, or NULL if not found. 711 */ 712 static struct mlx5_rxq_ctrl * 713 mlx5_shared_rxq_get(struct rte_eth_dev *dev, uint32_t group, uint16_t share_qid) 714 { 715 struct mlx5_rxq_ctrl *rxq_ctrl; 716 struct mlx5_priv *priv = dev->data->dev_private; 717 718 LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) { 719 if (rxq_ctrl->share_group == group && 720 rxq_ctrl->share_qid == share_qid) 721 return rxq_ctrl; 722 } 723 return NULL; 724 } 725 726 /** 727 * Check whether requested Rx queue configuration matches shared RXQ. 728 * 729 * @param rxq_ctrl 730 * Pointer to shared RXQ. 731 * @param dev 732 * Pointer to Ethernet device structure. 733 * @param idx 734 * Queue index. 735 * @param desc 736 * Number of descriptors to configure in queue. 737 * @param socket 738 * NUMA socket on which memory must be allocated. 739 * @param[in] conf 740 * Thresholds parameters. 741 * @param mp 742 * Memory pool for buffer allocations. 743 * 744 * @return 745 * 0 on success, a negative errno value otherwise and rte_errno is set. 746 */ 747 static bool 748 mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev, 749 uint16_t idx, uint16_t desc, unsigned int socket, 750 const struct rte_eth_rxconf *conf, 751 struct rte_mempool *mp) 752 { 753 struct mlx5_priv *spriv = LIST_FIRST(&rxq_ctrl->owners)->priv; 754 struct mlx5_priv *priv = dev->data->dev_private; 755 unsigned int i; 756 757 RTE_SET_USED(conf); 758 if (rxq_ctrl->socket != socket) { 759 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: socket mismatch", 760 dev->data->port_id, idx); 761 return false; 762 } 763 if (rxq_ctrl->rxq.elts_n != log2above(desc)) { 764 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: descriptor number mismatch", 765 dev->data->port_id, idx); 766 return false; 767 } 768 if (priv->mtu != spriv->mtu) { 769 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch", 770 dev->data->port_id, idx); 771 return false; 772 } 773 if (priv->dev_data->dev_conf.intr_conf.rxq != 774 spriv->dev_data->dev_conf.intr_conf.rxq) { 775 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: interrupt mismatch", 776 dev->data->port_id, idx); 777 return false; 778 } 779 if (mp != NULL && rxq_ctrl->rxq.mp != mp) { 780 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mempool mismatch", 781 dev->data->port_id, idx); 782 return false; 783 } else if (mp == NULL) { 784 if (conf->rx_nseg != rxq_ctrl->rxseg_n) { 785 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment number mismatch", 786 dev->data->port_id, idx); 787 return false; 788 } 789 for (i = 0; i < conf->rx_nseg; i++) { 790 if (memcmp(&conf->rx_seg[i].split, &rxq_ctrl->rxseg[i], 791 sizeof(struct rte_eth_rxseg_split))) { 792 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment %u configuration mismatch", 793 dev->data->port_id, idx, i); 794 return false; 795 } 796 } 797 } 798 if (priv->config.hw_padding != spriv->config.hw_padding) { 799 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: padding mismatch", 800 dev->data->port_id, idx); 801 return false; 802 } 803 if (priv->config.cqe_comp != spriv->config.cqe_comp || 804 (priv->config.cqe_comp && 805 priv->config.cqe_comp_fmt != spriv->config.cqe_comp_fmt)) { 806 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: CQE compression mismatch", 807 dev->data->port_id, idx); 808 return false; 809 } 810 return true; 811 } 812 813 /** 814 * 815 * @param dev 816 * Pointer to Ethernet device structure. 817 * @param idx 818 * RX queue index. 819 * @param desc 820 * Number of descriptors to configure in queue. 821 * @param socket 822 * NUMA socket on which memory must be allocated. 823 * @param[in] conf 824 * Thresholds parameters. 825 * @param mp 826 * Memory pool for buffer allocations. 827 * 828 * @return 829 * 0 on success, a negative errno value otherwise and rte_errno is set. 830 */ 831 int 832 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 833 unsigned int socket, const struct rte_eth_rxconf *conf, 834 struct rte_mempool *mp) 835 { 836 struct mlx5_priv *priv = dev->data->dev_private; 837 struct mlx5_rxq_priv *rxq; 838 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 839 struct rte_eth_rxseg_split *rx_seg = 840 (struct rte_eth_rxseg_split *)conf->rx_seg; 841 struct rte_eth_rxseg_split rx_single = {.mp = mp}; 842 uint16_t n_seg = conf->rx_nseg; 843 int res; 844 uint64_t offloads = conf->offloads | 845 dev->data->dev_conf.rxmode.offloads; 846 bool is_extmem = false; 847 848 if ((offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) && 849 !priv->sh->config.lro_allowed) { 850 DRV_LOG(ERR, 851 "Port %u queue %u LRO is configured but not allowed.", 852 dev->data->port_id, idx); 853 rte_errno = EINVAL; 854 return -rte_errno; 855 } 856 if (mp) { 857 /* 858 * The parameters should be checked on rte_eth_dev layer. 859 * If mp is specified it means the compatible configuration 860 * without buffer split feature tuning. 861 */ 862 rx_seg = &rx_single; 863 n_seg = 1; 864 is_extmem = rte_pktmbuf_priv_flags(mp) & 865 RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF; 866 } 867 if (n_seg > 1) { 868 /* The offloads should be checked on rte_eth_dev layer. */ 869 MLX5_ASSERT(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 870 if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) { 871 DRV_LOG(ERR, "port %u queue index %u split " 872 "offload not configured", 873 dev->data->port_id, idx); 874 rte_errno = ENOSPC; 875 return -rte_errno; 876 } 877 MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG); 878 } 879 if (conf->share_group > 0) { 880 if (!priv->sh->cdev->config.hca_attr.mem_rq_rmp) { 881 DRV_LOG(ERR, "port %u queue index %u shared Rx queue not supported by fw", 882 dev->data->port_id, idx); 883 rte_errno = EINVAL; 884 return -rte_errno; 885 } 886 if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) { 887 DRV_LOG(ERR, "port %u queue index %u shared Rx queue needs DevX api", 888 dev->data->port_id, idx); 889 rte_errno = EINVAL; 890 return -rte_errno; 891 } 892 if (conf->share_qid >= priv->rxqs_n) { 893 DRV_LOG(ERR, "port %u shared Rx queue index %u > number of Rx queues %u", 894 dev->data->port_id, conf->share_qid, 895 priv->rxqs_n); 896 rte_errno = EINVAL; 897 return -rte_errno; 898 } 899 if (priv->config.mprq.enabled) { 900 DRV_LOG(ERR, "port %u shared Rx queue index %u: not supported when MPRQ enabled", 901 dev->data->port_id, conf->share_qid); 902 rte_errno = EINVAL; 903 return -rte_errno; 904 } 905 /* Try to reuse shared RXQ. */ 906 rxq_ctrl = mlx5_shared_rxq_get(dev, conf->share_group, 907 conf->share_qid); 908 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 909 if (res) 910 return res; 911 if (rxq_ctrl != NULL && 912 !mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket, 913 conf, mp)) { 914 rte_errno = EINVAL; 915 return -rte_errno; 916 } 917 } else { 918 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 919 if (res) 920 return res; 921 } 922 /* Allocate RXQ. */ 923 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 924 SOCKET_ID_ANY); 925 if (!rxq) { 926 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u private data", 927 dev->data->port_id, idx); 928 rte_errno = ENOMEM; 929 return -rte_errno; 930 } 931 if (rxq_ctrl == NULL) { 932 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg, 933 n_seg, is_extmem); 934 if (rxq_ctrl == NULL) { 935 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u", 936 dev->data->port_id, idx); 937 mlx5_free(rxq); 938 rte_errno = ENOMEM; 939 return -rte_errno; 940 } 941 } 942 rxq->priv = priv; 943 rxq->idx = idx; 944 (*priv->rxq_privs)[idx] = rxq; 945 /* Join owner list. */ 946 LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry); 947 rxq->ctrl = rxq_ctrl; 948 mlx5_rxq_ref(dev, idx); 949 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 950 dev->data->port_id, idx); 951 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 952 return 0; 953 } 954 955 /** 956 * 957 * @param dev 958 * Pointer to Ethernet device structure. 959 * @param idx 960 * RX queue index. 961 * @param desc 962 * Number of descriptors to configure in queue. 963 * @param hairpin_conf 964 * Hairpin configuration parameters. 965 * 966 * @return 967 * 0 on success, a negative errno value otherwise and rte_errno is set. 968 */ 969 int 970 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 971 uint16_t desc, 972 const struct rte_eth_hairpin_conf *hairpin_conf) 973 { 974 struct mlx5_priv *priv = dev->data->dev_private; 975 struct mlx5_rxq_priv *rxq; 976 struct mlx5_rxq_ctrl *rxq_ctrl; 977 int res; 978 979 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, NULL); 980 if (res) 981 return res; 982 if (hairpin_conf->peer_count != 1) { 983 rte_errno = EINVAL; 984 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue index %u" 985 " peer count is %u", dev->data->port_id, 986 idx, hairpin_conf->peer_count); 987 return -rte_errno; 988 } 989 if (hairpin_conf->peers[0].port == dev->data->port_id) { 990 if (hairpin_conf->peers[0].queue >= priv->txqs_n) { 991 rte_errno = EINVAL; 992 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 993 " index %u, Tx %u is larger than %u", 994 dev->data->port_id, idx, 995 hairpin_conf->peers[0].queue, priv->txqs_n); 996 return -rte_errno; 997 } 998 } else { 999 if (hairpin_conf->manual_bind == 0 || 1000 hairpin_conf->tx_explicit == 0) { 1001 rte_errno = EINVAL; 1002 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 1003 " index %u peer port %u with attributes %u %u", 1004 dev->data->port_id, idx, 1005 hairpin_conf->peers[0].port, 1006 hairpin_conf->manual_bind, 1007 hairpin_conf->tx_explicit); 1008 return -rte_errno; 1009 } 1010 } 1011 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 1012 SOCKET_ID_ANY); 1013 if (!rxq) { 1014 DRV_LOG(ERR, "port %u unable to allocate hairpin rx queue index %u private data", 1015 dev->data->port_id, idx); 1016 rte_errno = ENOMEM; 1017 return -rte_errno; 1018 } 1019 rxq->priv = priv; 1020 rxq->idx = idx; 1021 (*priv->rxq_privs)[idx] = rxq; 1022 rxq_ctrl = mlx5_rxq_hairpin_new(dev, rxq, desc, hairpin_conf); 1023 if (!rxq_ctrl) { 1024 DRV_LOG(ERR, "port %u unable to allocate hairpin queue index %u", 1025 dev->data->port_id, idx); 1026 mlx5_free(rxq); 1027 (*priv->rxq_privs)[idx] = NULL; 1028 rte_errno = ENOMEM; 1029 return -rte_errno; 1030 } 1031 DRV_LOG(DEBUG, "port %u adding hairpin Rx queue %u to list", 1032 dev->data->port_id, idx); 1033 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 1034 return 0; 1035 } 1036 1037 /** 1038 * DPDK callback to release a RX queue. 1039 * 1040 * @param dev 1041 * Pointer to Ethernet device structure. 1042 * @param qid 1043 * Receive queue index. 1044 */ 1045 void 1046 mlx5_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1047 { 1048 struct mlx5_rxq_data *rxq = dev->data->rx_queues[qid]; 1049 1050 if (rxq == NULL) 1051 return; 1052 if (!mlx5_rxq_releasable(dev, qid)) 1053 rte_panic("port %u Rx queue %u is still used by a flow and" 1054 " cannot be removed\n", dev->data->port_id, qid); 1055 mlx5_rxq_release(dev, qid); 1056 } 1057 1058 /** 1059 * Allocate queue vector and fill epoll fd list for Rx interrupts. 1060 * 1061 * @param dev 1062 * Pointer to Ethernet device. 1063 * 1064 * @return 1065 * 0 on success, a negative errno value otherwise and rte_errno is set. 1066 */ 1067 int 1068 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 1069 { 1070 struct mlx5_priv *priv = dev->data->dev_private; 1071 unsigned int i; 1072 unsigned int rxqs_n = priv->rxqs_n; 1073 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1074 unsigned int count = 0; 1075 struct rte_intr_handle *intr_handle = dev->intr_handle; 1076 1077 if (!dev->data->dev_conf.intr_conf.rxq) 1078 return 0; 1079 mlx5_rx_intr_vec_disable(dev); 1080 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 1081 DRV_LOG(ERR, 1082 "port %u failed to allocate memory for interrupt" 1083 " vector, Rx interrupts will not be supported", 1084 dev->data->port_id); 1085 rte_errno = ENOMEM; 1086 return -rte_errno; 1087 } 1088 1089 if (rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_EXT)) 1090 return -rte_errno; 1091 1092 for (i = 0; i != n; ++i) { 1093 /* This rxq obj must not be released in this function. */ 1094 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i); 1095 struct mlx5_rxq_obj *rxq_obj = rxq ? rxq->ctrl->obj : NULL; 1096 int rc; 1097 1098 /* Skip queues that cannot request interrupts. */ 1099 if (!rxq_obj || (!rxq_obj->ibv_channel && 1100 !rxq_obj->devx_channel)) { 1101 /* Use invalid intr_vec[] index to disable entry. */ 1102 if (rte_intr_vec_list_index_set(intr_handle, i, 1103 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID)) 1104 return -rte_errno; 1105 continue; 1106 } 1107 mlx5_rxq_ref(dev, i); 1108 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 1109 DRV_LOG(ERR, 1110 "port %u too many Rx queues for interrupt" 1111 " vector size (%d), Rx interrupts cannot be" 1112 " enabled", 1113 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 1114 mlx5_rx_intr_vec_disable(dev); 1115 rte_errno = ENOMEM; 1116 return -rte_errno; 1117 } 1118 rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd); 1119 if (rc < 0) { 1120 rte_errno = errno; 1121 DRV_LOG(ERR, 1122 "port %u failed to make Rx interrupt file" 1123 " descriptor %d non-blocking for queue index" 1124 " %d", 1125 dev->data->port_id, rxq_obj->fd, i); 1126 mlx5_rx_intr_vec_disable(dev); 1127 return -rte_errno; 1128 } 1129 1130 if (rte_intr_vec_list_index_set(intr_handle, i, 1131 RTE_INTR_VEC_RXTX_OFFSET + count)) 1132 return -rte_errno; 1133 if (rte_intr_efds_index_set(intr_handle, count, 1134 rxq_obj->fd)) 1135 return -rte_errno; 1136 count++; 1137 } 1138 if (!count) 1139 mlx5_rx_intr_vec_disable(dev); 1140 else if (rte_intr_nb_efd_set(intr_handle, count)) 1141 return -rte_errno; 1142 return 0; 1143 } 1144 1145 /** 1146 * Clean up Rx interrupts handler. 1147 * 1148 * @param dev 1149 * Pointer to Ethernet device. 1150 */ 1151 void 1152 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 1153 { 1154 struct mlx5_priv *priv = dev->data->dev_private; 1155 struct rte_intr_handle *intr_handle = dev->intr_handle; 1156 unsigned int i; 1157 unsigned int rxqs_n = priv->rxqs_n; 1158 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1159 1160 if (!dev->data->dev_conf.intr_conf.rxq) 1161 return; 1162 if (rte_intr_vec_list_index_get(intr_handle, 0) < 0) 1163 goto free; 1164 for (i = 0; i != n; ++i) { 1165 if (rte_intr_vec_list_index_get(intr_handle, i) == 1166 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID) 1167 continue; 1168 /** 1169 * Need to access directly the queue to release the reference 1170 * kept in mlx5_rx_intr_vec_enable(). 1171 */ 1172 mlx5_rxq_deref(dev, i); 1173 } 1174 free: 1175 rte_intr_free_epoll_fd(intr_handle); 1176 1177 rte_intr_vec_list_free(intr_handle); 1178 1179 rte_intr_nb_efd_set(intr_handle, 0); 1180 } 1181 1182 /** 1183 * MLX5 CQ notification . 1184 * 1185 * @param rxq 1186 * Pointer to receive queue structure. 1187 * @param sq_n_rxq 1188 * Sequence number per receive queue . 1189 */ 1190 static inline void 1191 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 1192 { 1193 int sq_n = 0; 1194 uint32_t doorbell_hi; 1195 uint64_t doorbell; 1196 1197 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 1198 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 1199 doorbell = (uint64_t)doorbell_hi << 32; 1200 doorbell |= rxq->cqn; 1201 mlx5_doorbell_ring(&rxq->uar_data, rte_cpu_to_be_64(doorbell), 1202 doorbell_hi, &rxq->cq_db[MLX5_CQ_ARM_DB], 0); 1203 } 1204 1205 /** 1206 * DPDK callback for Rx queue interrupt enable. 1207 * 1208 * @param dev 1209 * Pointer to Ethernet device structure. 1210 * @param rx_queue_id 1211 * Rx queue number. 1212 * 1213 * @return 1214 * 0 on success, a negative errno value otherwise and rte_errno is set. 1215 */ 1216 int 1217 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1218 { 1219 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1220 if (!rxq) 1221 goto error; 1222 if (rxq->ctrl->irq) { 1223 if (!rxq->ctrl->obj) 1224 goto error; 1225 mlx5_arm_cq(&rxq->ctrl->rxq, rxq->ctrl->rxq.cq_arm_sn); 1226 } 1227 return 0; 1228 error: 1229 rte_errno = EINVAL; 1230 return -rte_errno; 1231 } 1232 1233 /** 1234 * DPDK callback for Rx queue interrupt disable. 1235 * 1236 * @param dev 1237 * Pointer to Ethernet device structure. 1238 * @param rx_queue_id 1239 * Rx queue number. 1240 * 1241 * @return 1242 * 0 on success, a negative errno value otherwise and rte_errno is set. 1243 */ 1244 int 1245 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1246 { 1247 struct mlx5_priv *priv = dev->data->dev_private; 1248 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1249 int ret = 0; 1250 1251 if (!rxq) { 1252 rte_errno = EINVAL; 1253 return -rte_errno; 1254 } 1255 if (!rxq->ctrl->obj) 1256 goto error; 1257 if (rxq->ctrl->irq) { 1258 ret = priv->obj_ops.rxq_event_get(rxq->ctrl->obj); 1259 if (ret < 0) 1260 goto error; 1261 rxq->ctrl->rxq.cq_arm_sn++; 1262 } 1263 return 0; 1264 error: 1265 /** 1266 * The ret variable may be EAGAIN which means the get_event function was 1267 * called before receiving one. 1268 */ 1269 if (ret < 0) 1270 rte_errno = errno; 1271 else 1272 rte_errno = EINVAL; 1273 if (rte_errno != EAGAIN) 1274 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 1275 dev->data->port_id, rx_queue_id); 1276 return -rte_errno; 1277 } 1278 1279 /** 1280 * Verify the Rx queue objects list is empty 1281 * 1282 * @param dev 1283 * Pointer to Ethernet device. 1284 * 1285 * @return 1286 * The number of objects not released. 1287 */ 1288 int 1289 mlx5_rxq_obj_verify(struct rte_eth_dev *dev) 1290 { 1291 struct mlx5_priv *priv = dev->data->dev_private; 1292 int ret = 0; 1293 struct mlx5_rxq_obj *rxq_obj; 1294 1295 LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) { 1296 if (rxq_obj->rxq_ctrl == NULL) 1297 continue; 1298 if (rxq_obj->rxq_ctrl->rxq.shared && 1299 !LIST_EMPTY(&rxq_obj->rxq_ctrl->owners)) 1300 continue; 1301 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced", 1302 dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx); 1303 ++ret; 1304 } 1305 return ret; 1306 } 1307 1308 /** 1309 * Callback function to initialize mbufs for Multi-Packet RQ. 1310 */ 1311 static inline void 1312 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg, 1313 void *_m, unsigned int i __rte_unused) 1314 { 1315 struct mlx5_mprq_buf *buf = _m; 1316 struct rte_mbuf_ext_shared_info *shinfo; 1317 unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg; 1318 unsigned int j; 1319 1320 memset(_m, 0, sizeof(*buf)); 1321 buf->mp = mp; 1322 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1323 for (j = 0; j != strd_n; ++j) { 1324 shinfo = &buf->shinfos[j]; 1325 shinfo->free_cb = mlx5_mprq_buf_free_cb; 1326 shinfo->fcb_opaque = buf; 1327 } 1328 } 1329 1330 /** 1331 * Free mempool of Multi-Packet RQ. 1332 * 1333 * @param dev 1334 * Pointer to Ethernet device. 1335 * 1336 * @return 1337 * 0 on success, negative errno value on failure. 1338 */ 1339 int 1340 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1341 { 1342 struct mlx5_priv *priv = dev->data->dev_private; 1343 struct rte_mempool *mp = priv->mprq_mp; 1344 unsigned int i; 1345 1346 if (mp == NULL) 1347 return 0; 1348 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1349 dev->data->port_id, mp->name); 1350 /* 1351 * If a buffer in the pool has been externally attached to a mbuf and it 1352 * is still in use by application, destroying the Rx queue can spoil 1353 * the packet. It is unlikely to happen but if application dynamically 1354 * creates and destroys with holding Rx packets, this can happen. 1355 * 1356 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1357 * RQ isn't provided by application but managed by PMD. 1358 */ 1359 if (!rte_mempool_full(mp)) { 1360 DRV_LOG(ERR, 1361 "port %u mempool for Multi-Packet RQ is still in use", 1362 dev->data->port_id); 1363 rte_errno = EBUSY; 1364 return -rte_errno; 1365 } 1366 rte_mempool_free(mp); 1367 /* Unset mempool for each Rx queue. */ 1368 for (i = 0; i != priv->rxqs_n; ++i) { 1369 struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, i); 1370 1371 if (rxq == NULL) 1372 continue; 1373 rxq->mprq_mp = NULL; 1374 } 1375 priv->mprq_mp = NULL; 1376 return 0; 1377 } 1378 1379 /** 1380 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1381 * mempool. If already allocated, reuse it if there're enough elements. 1382 * Otherwise, resize it. 1383 * 1384 * @param dev 1385 * Pointer to Ethernet device. 1386 * 1387 * @return 1388 * 0 on success, negative errno value on failure. 1389 */ 1390 int 1391 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1392 { 1393 struct mlx5_priv *priv = dev->data->dev_private; 1394 struct rte_mempool *mp = priv->mprq_mp; 1395 char name[RTE_MEMPOOL_NAMESIZE]; 1396 unsigned int desc = 0; 1397 unsigned int buf_len; 1398 unsigned int obj_num; 1399 unsigned int obj_size; 1400 unsigned int log_strd_num = 0; 1401 unsigned int log_strd_sz = 0; 1402 unsigned int i; 1403 unsigned int n_ibv = 0; 1404 int ret; 1405 1406 if (!mlx5_mprq_enabled(dev)) 1407 return 0; 1408 /* Count the total number of descriptors configured. */ 1409 for (i = 0; i != priv->rxqs_n; ++i) { 1410 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1411 struct mlx5_rxq_data *rxq; 1412 1413 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1414 continue; 1415 rxq = &rxq_ctrl->rxq; 1416 n_ibv++; 1417 desc += 1 << rxq->elts_n; 1418 /* Get the max number of strides. */ 1419 if (log_strd_num < rxq->log_strd_num) 1420 log_strd_num = rxq->log_strd_num; 1421 /* Get the max size of a stride. */ 1422 if (log_strd_sz < rxq->log_strd_sz) 1423 log_strd_sz = rxq->log_strd_sz; 1424 } 1425 MLX5_ASSERT(log_strd_num && log_strd_sz); 1426 buf_len = RTE_BIT32(log_strd_num) * RTE_BIT32(log_strd_sz); 1427 obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + 1428 RTE_BIT32(log_strd_num) * 1429 sizeof(struct rte_mbuf_ext_shared_info) + 1430 RTE_PKTMBUF_HEADROOM; 1431 /* 1432 * Received packets can be either memcpy'd or externally referenced. In 1433 * case that the packet is attached to an mbuf as an external buffer, as 1434 * it isn't possible to predict how the buffers will be queued by 1435 * application, there's no option to exactly pre-allocate needed buffers 1436 * in advance but to speculatively prepares enough buffers. 1437 * 1438 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1439 * received packets to buffers provided by application (rxq->mp) until 1440 * this Mempool gets available again. 1441 */ 1442 desc *= 4; 1443 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv; 1444 /* 1445 * rte_mempool_create_empty() has sanity check to refuse large cache 1446 * size compared to the number of elements. 1447 * CALC_CACHE_FLUSHTHRESH() is defined in a C file, so using a 1448 * constant number 2 instead. 1449 */ 1450 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1451 /* Check a mempool is already allocated and if it can be resued. */ 1452 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1453 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1454 dev->data->port_id, mp->name); 1455 /* Reuse. */ 1456 goto exit; 1457 } else if (mp != NULL) { 1458 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1459 dev->data->port_id, mp->name); 1460 /* 1461 * If failed to free, which means it may be still in use, no way 1462 * but to keep using the existing one. On buffer underrun, 1463 * packets will be memcpy'd instead of external buffer 1464 * attachment. 1465 */ 1466 if (mlx5_mprq_free_mp(dev)) { 1467 if (mp->elt_size >= obj_size) 1468 goto exit; 1469 else 1470 return -rte_errno; 1471 } 1472 } 1473 snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); 1474 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1475 0, NULL, NULL, mlx5_mprq_buf_init, 1476 (void *)((uintptr_t)1 << log_strd_num), 1477 dev->device->numa_node, 0); 1478 if (mp == NULL) { 1479 DRV_LOG(ERR, 1480 "port %u failed to allocate a mempool for" 1481 " Multi-Packet RQ, count=%u, size=%u", 1482 dev->data->port_id, obj_num, obj_size); 1483 rte_errno = ENOMEM; 1484 return -rte_errno; 1485 } 1486 ret = mlx5_mr_mempool_register(priv->sh->cdev, mp, false); 1487 if (ret < 0 && rte_errno != EEXIST) { 1488 ret = rte_errno; 1489 DRV_LOG(ERR, "port %u failed to register a mempool for Multi-Packet RQ", 1490 dev->data->port_id); 1491 rte_mempool_free(mp); 1492 rte_errno = ret; 1493 return -rte_errno; 1494 } 1495 priv->mprq_mp = mp; 1496 exit: 1497 /* Set mempool for each Rx queue. */ 1498 for (i = 0; i != priv->rxqs_n; ++i) { 1499 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1500 1501 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1502 continue; 1503 rxq_ctrl->rxq.mprq_mp = mp; 1504 } 1505 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1506 dev->data->port_id); 1507 return 0; 1508 } 1509 1510 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \ 1511 sizeof(struct rte_vlan_hdr) * 2 + \ 1512 sizeof(struct rte_ipv6_hdr))) 1513 #define MAX_TCP_OPTION_SIZE 40u 1514 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \ 1515 sizeof(struct rte_tcp_hdr) + \ 1516 MAX_TCP_OPTION_SIZE)) 1517 1518 /** 1519 * Adjust the maximum LRO massage size. 1520 * 1521 * @param dev 1522 * Pointer to Ethernet device. 1523 * @param idx 1524 * RX queue index. 1525 * @param max_lro_size 1526 * The maximum size for LRO packet. 1527 */ 1528 static void 1529 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx, 1530 uint32_t max_lro_size) 1531 { 1532 struct mlx5_priv *priv = dev->data->dev_private; 1533 1534 if (priv->sh->cdev->config.hca_attr.lro_max_msg_sz_mode == 1535 MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size > 1536 MLX5_MAX_TCP_HDR_OFFSET) 1537 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET; 1538 max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE); 1539 if (priv->max_lro_msg_size) 1540 priv->max_lro_msg_size = 1541 RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size); 1542 else 1543 priv->max_lro_msg_size = max_lro_size; 1544 DRV_LOG(DEBUG, 1545 "port %u Rx Queue %u max LRO message size adjusted to %u bytes", 1546 dev->data->port_id, idx, priv->max_lro_msg_size); 1547 } 1548 1549 /** 1550 * Prepare both size and number of stride for Multi-Packet RQ. 1551 * 1552 * @param dev 1553 * Pointer to Ethernet device. 1554 * @param idx 1555 * RX queue index. 1556 * @param desc 1557 * Number of descriptors to configure in queue. 1558 * @param rx_seg_en 1559 * Indicator if Rx segment enables, if so Multi-Packet RQ doesn't enable. 1560 * @param min_mbuf_size 1561 * Non scatter min mbuf size, max_rx_pktlen plus overhead. 1562 * @param actual_log_stride_num 1563 * Log number of strides to configure for this queue. 1564 * @param actual_log_stride_size 1565 * Log stride size to configure for this queue. 1566 * @param is_extmem 1567 * Is external pinned memory pool used. 1568 * @return 1569 * 0 if Multi-Packet RQ is supported, otherwise -1. 1570 */ 1571 static int 1572 mlx5_mprq_prepare(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1573 bool rx_seg_en, uint32_t min_mbuf_size, 1574 uint32_t *actual_log_stride_num, 1575 uint32_t *actual_log_stride_size, 1576 bool is_extmem) 1577 { 1578 struct mlx5_priv *priv = dev->data->dev_private; 1579 struct mlx5_port_config *config = &priv->config; 1580 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 1581 uint32_t log_min_stride_num = dev_cap->mprq.log_min_stride_num; 1582 uint32_t log_max_stride_num = dev_cap->mprq.log_max_stride_num; 1583 uint32_t log_def_stride_num = 1584 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM, 1585 log_min_stride_num), 1586 log_max_stride_num); 1587 uint32_t log_min_stride_size = dev_cap->mprq.log_min_stride_size; 1588 uint32_t log_max_stride_size = dev_cap->mprq.log_max_stride_size; 1589 uint32_t log_def_stride_size = 1590 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE, 1591 log_min_stride_size), 1592 log_max_stride_size); 1593 uint32_t log_stride_wqe_size; 1594 1595 if (mlx5_check_mprq_support(dev) != 1 || rx_seg_en || is_extmem) 1596 goto unsupport; 1597 /* Checks if chosen number of strides is in supported range. */ 1598 if (config->mprq.log_stride_num > log_max_stride_num || 1599 config->mprq.log_stride_num < log_min_stride_num) { 1600 *actual_log_stride_num = log_def_stride_num; 1601 DRV_LOG(WARNING, 1602 "Port %u Rx queue %u number of strides for Multi-Packet RQ is out of range, setting default value (%u)", 1603 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num)); 1604 } else { 1605 *actual_log_stride_num = config->mprq.log_stride_num; 1606 } 1607 /* Checks if chosen size of stride is in supported range. */ 1608 if (config->mprq.log_stride_size != (uint32_t)MLX5_ARG_UNSET) { 1609 if (config->mprq.log_stride_size > log_max_stride_size || 1610 config->mprq.log_stride_size < log_min_stride_size) { 1611 *actual_log_stride_size = log_def_stride_size; 1612 DRV_LOG(WARNING, 1613 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is out of range, setting default value (%u)", 1614 dev->data->port_id, idx, 1615 RTE_BIT32(log_def_stride_size)); 1616 } else { 1617 *actual_log_stride_size = config->mprq.log_stride_size; 1618 } 1619 } else { 1620 /* Make the stride fit the mbuf size by default. */ 1621 if (min_mbuf_size <= RTE_BIT32(log_max_stride_size)) { 1622 DRV_LOG(WARNING, 1623 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to match the mbuf size (%u)", 1624 dev->data->port_id, idx, min_mbuf_size); 1625 *actual_log_stride_size = log2above(min_mbuf_size); 1626 } else { 1627 goto unsupport; 1628 } 1629 } 1630 /* Make sure the stride size is greater than the headroom. */ 1631 if (RTE_BIT32(*actual_log_stride_size) < RTE_PKTMBUF_HEADROOM) { 1632 if (RTE_BIT32(log_max_stride_size) > RTE_PKTMBUF_HEADROOM) { 1633 DRV_LOG(WARNING, 1634 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to accommodate the headroom (%u)", 1635 dev->data->port_id, idx, RTE_PKTMBUF_HEADROOM); 1636 *actual_log_stride_size = log2above(RTE_PKTMBUF_HEADROOM); 1637 } else { 1638 goto unsupport; 1639 } 1640 } 1641 log_stride_wqe_size = *actual_log_stride_num + *actual_log_stride_size; 1642 /* Check if WQE buffer size is supported by hardware. */ 1643 if (log_stride_wqe_size < dev_cap->mprq.log_min_stride_wqe_size) { 1644 *actual_log_stride_num = log_def_stride_num; 1645 *actual_log_stride_size = log_def_stride_size; 1646 DRV_LOG(WARNING, 1647 "Port %u Rx queue %u size of WQE buffer for Multi-Packet RQ is too small, setting default values (stride_num_n=%u, stride_size_n=%u)", 1648 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num), 1649 RTE_BIT32(log_def_stride_size)); 1650 log_stride_wqe_size = log_def_stride_num + log_def_stride_size; 1651 } 1652 MLX5_ASSERT(log_stride_wqe_size >= 1653 dev_cap->mprq.log_min_stride_wqe_size); 1654 if (desc <= RTE_BIT32(*actual_log_stride_num)) 1655 goto unsupport; 1656 if (min_mbuf_size > RTE_BIT32(log_stride_wqe_size)) { 1657 DRV_LOG(WARNING, "Port %u Rx queue %u " 1658 "Multi-Packet RQ is unsupported, WQE buffer size (%u) " 1659 "is smaller than min mbuf size (%u)", 1660 dev->data->port_id, idx, RTE_BIT32(log_stride_wqe_size), 1661 min_mbuf_size); 1662 goto unsupport; 1663 } 1664 DRV_LOG(DEBUG, "Port %u Rx queue %u " 1665 "Multi-Packet RQ is enabled strd_num_n = %u, strd_sz_n = %u", 1666 dev->data->port_id, idx, RTE_BIT32(*actual_log_stride_num), 1667 RTE_BIT32(*actual_log_stride_size)); 1668 return 0; 1669 unsupport: 1670 if (config->mprq.enabled) 1671 DRV_LOG(WARNING, 1672 "Port %u MPRQ is requested but cannot be enabled\n" 1673 " (requested: pkt_sz = %u, desc_num = %u," 1674 " rxq_num = %u, stride_sz = %u, stride_num = %u\n" 1675 " supported: min_rxqs_num = %u, min_buf_wqe_sz = %u" 1676 " min_stride_sz = %u, max_stride_sz = %u).\n" 1677 "Rx segment is %senabled. External mempool is %sused.", 1678 dev->data->port_id, min_mbuf_size, desc, priv->rxqs_n, 1679 config->mprq.log_stride_size == (uint32_t)MLX5_ARG_UNSET ? 1680 RTE_BIT32(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE) : 1681 RTE_BIT32(config->mprq.log_stride_size), 1682 RTE_BIT32(config->mprq.log_stride_num), 1683 config->mprq.min_rxqs_num, 1684 RTE_BIT32(dev_cap->mprq.log_min_stride_wqe_size), 1685 RTE_BIT32(dev_cap->mprq.log_min_stride_size), 1686 RTE_BIT32(dev_cap->mprq.log_max_stride_size), 1687 rx_seg_en ? "" : "not ", is_extmem ? "" : "not "); 1688 return -1; 1689 } 1690 1691 /** 1692 * Create a DPDK Rx queue. 1693 * 1694 * @param dev 1695 * Pointer to Ethernet device. 1696 * @param idx 1697 * RX queue index. 1698 * @param desc 1699 * Number of descriptors to configure in queue. 1700 * @param socket 1701 * NUMA socket on which memory must be allocated. 1702 * 1703 * @return 1704 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1705 */ 1706 struct mlx5_rxq_ctrl * 1707 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1708 unsigned int socket, const struct rte_eth_rxconf *conf, 1709 const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg, 1710 bool is_extmem) 1711 { 1712 struct mlx5_priv *priv = dev->data->dev_private; 1713 struct mlx5_rxq_ctrl *tmpl; 1714 unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp); 1715 struct mlx5_port_config *config = &priv->config; 1716 uint64_t offloads = conf->offloads | 1717 dev->data->dev_conf.rxmode.offloads; 1718 unsigned int lro_on_queue = !!(offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO); 1719 unsigned int max_rx_pktlen = lro_on_queue ? 1720 dev->data->dev_conf.rxmode.max_lro_pkt_size : 1721 dev->data->mtu + (unsigned int)RTE_ETHER_HDR_LEN + 1722 RTE_ETHER_CRC_LEN; 1723 unsigned int non_scatter_min_mbuf_size = max_rx_pktlen + 1724 RTE_PKTMBUF_HEADROOM; 1725 unsigned int max_lro_size = 0; 1726 unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM; 1727 uint32_t mprq_log_actual_stride_num = 0; 1728 uint32_t mprq_log_actual_stride_size = 0; 1729 bool rx_seg_en = n_seg != 1 || rx_seg[0].offset || rx_seg[0].length; 1730 const int mprq_en = !mlx5_mprq_prepare(dev, idx, desc, rx_seg_en, 1731 non_scatter_min_mbuf_size, 1732 &mprq_log_actual_stride_num, 1733 &mprq_log_actual_stride_size, 1734 is_extmem); 1735 /* 1736 * Always allocate extra slots, even if eventually 1737 * the vector Rx will not be used. 1738 */ 1739 uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1740 size_t alloc_size = sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *); 1741 const struct rte_eth_rxseg_split *qs_seg = rx_seg; 1742 unsigned int tail_len; 1743 1744 if (mprq_en) { 1745 /* Trim the number of descs needed. */ 1746 desc >>= mprq_log_actual_stride_num; 1747 alloc_size += desc * sizeof(struct mlx5_mprq_buf *); 1748 } 1749 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket); 1750 if (!tmpl) { 1751 rte_errno = ENOMEM; 1752 return NULL; 1753 } 1754 LIST_INIT(&tmpl->owners); 1755 MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG); 1756 /* 1757 * Save the original segment configuration in the shared queue 1758 * descriptor for the later check on the sibling queue creation. 1759 */ 1760 tmpl->rxseg_n = n_seg; 1761 rte_memcpy(tmpl->rxseg, qs_seg, 1762 sizeof(struct rte_eth_rxseg_split) * n_seg); 1763 /* 1764 * Build the array of actual buffer offsets and lengths. 1765 * Pad with the buffers from the last memory pool if 1766 * needed to handle max size packets, replace zero length 1767 * with the buffer length from the pool. 1768 */ 1769 tail_len = max_rx_pktlen; 1770 do { 1771 struct mlx5_eth_rxseg *hw_seg = 1772 &tmpl->rxq.rxseg[tmpl->rxq.rxseg_n]; 1773 uint32_t buf_len, offset, seg_len; 1774 1775 /* 1776 * For the buffers beyond descriptions offset is zero, 1777 * the first buffer contains head room. 1778 */ 1779 buf_len = rte_pktmbuf_data_room_size(qs_seg->mp); 1780 offset = (tmpl->rxq.rxseg_n >= n_seg ? 0 : qs_seg->offset) + 1781 (tmpl->rxq.rxseg_n ? 0 : RTE_PKTMBUF_HEADROOM); 1782 /* 1783 * For the buffers beyond descriptions the length is 1784 * pool buffer length, zero lengths are replaced with 1785 * pool buffer length either. 1786 */ 1787 seg_len = tmpl->rxq.rxseg_n >= n_seg ? buf_len : 1788 qs_seg->length ? 1789 qs_seg->length : 1790 (buf_len - offset); 1791 /* Check is done in long int, now overflows. */ 1792 if (buf_len < seg_len + offset) { 1793 DRV_LOG(ERR, "port %u Rx queue %u: Split offset/length " 1794 "%u/%u can't be satisfied", 1795 dev->data->port_id, idx, 1796 qs_seg->length, qs_seg->offset); 1797 rte_errno = EINVAL; 1798 goto error; 1799 } 1800 if (seg_len > tail_len) 1801 seg_len = buf_len - offset; 1802 if (++tmpl->rxq.rxseg_n > MLX5_MAX_RXQ_NSEG) { 1803 DRV_LOG(ERR, 1804 "port %u too many SGEs (%u) needed to handle" 1805 " requested maximum packet size %u, the maximum" 1806 " supported are %u", dev->data->port_id, 1807 tmpl->rxq.rxseg_n, max_rx_pktlen, 1808 MLX5_MAX_RXQ_NSEG); 1809 rte_errno = ENOTSUP; 1810 goto error; 1811 } 1812 /* Build the actual scattering element in the queue object. */ 1813 hw_seg->mp = qs_seg->mp; 1814 MLX5_ASSERT(offset <= UINT16_MAX); 1815 MLX5_ASSERT(seg_len <= UINT16_MAX); 1816 hw_seg->offset = (uint16_t)offset; 1817 hw_seg->length = (uint16_t)seg_len; 1818 /* 1819 * Advance the segment descriptor, the padding is the based 1820 * on the attributes of the last descriptor. 1821 */ 1822 if (tmpl->rxq.rxseg_n < n_seg) 1823 qs_seg++; 1824 tail_len -= RTE_MIN(tail_len, seg_len); 1825 } while (tail_len || !rte_is_power_of_2(tmpl->rxq.rxseg_n)); 1826 MLX5_ASSERT(tmpl->rxq.rxseg_n && 1827 tmpl->rxq.rxseg_n <= MLX5_MAX_RXQ_NSEG); 1828 if (tmpl->rxq.rxseg_n > 1 && !(offloads & RTE_ETH_RX_OFFLOAD_SCATTER)) { 1829 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not" 1830 " configured and no enough mbuf space(%u) to contain " 1831 "the maximum RX packet length(%u) with head-room(%u)", 1832 dev->data->port_id, idx, mb_len, max_rx_pktlen, 1833 RTE_PKTMBUF_HEADROOM); 1834 rte_errno = ENOSPC; 1835 goto error; 1836 } 1837 tmpl->is_hairpin = false; 1838 if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl, 1839 &priv->sh->cdev->mr_scache.dev_gen, socket)) { 1840 /* rte_errno is already set. */ 1841 goto error; 1842 } 1843 tmpl->socket = socket; 1844 if (dev->data->dev_conf.intr_conf.rxq) 1845 tmpl->irq = 1; 1846 if (mprq_en) { 1847 /* TODO: Rx scatter isn't supported yet. */ 1848 tmpl->rxq.sges_n = 0; 1849 tmpl->rxq.log_strd_num = mprq_log_actual_stride_num; 1850 tmpl->rxq.log_strd_sz = mprq_log_actual_stride_size; 1851 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1852 tmpl->rxq.strd_scatter_en = 1853 !!(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 1854 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, 1855 config->mprq.max_memcpy_len); 1856 max_lro_size = RTE_MIN(max_rx_pktlen, 1857 RTE_BIT32(tmpl->rxq.log_strd_num) * 1858 RTE_BIT32(tmpl->rxq.log_strd_sz)); 1859 } else if (tmpl->rxq.rxseg_n == 1) { 1860 MLX5_ASSERT(max_rx_pktlen <= first_mb_free_size); 1861 tmpl->rxq.sges_n = 0; 1862 max_lro_size = max_rx_pktlen; 1863 } else if (offloads & RTE_ETH_RX_OFFLOAD_SCATTER) { 1864 unsigned int sges_n; 1865 1866 if (lro_on_queue && first_mb_free_size < 1867 MLX5_MAX_LRO_HEADER_FIX) { 1868 DRV_LOG(ERR, "Not enough space in the first segment(%u)" 1869 " to include the max header size(%u) for LRO", 1870 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX); 1871 rte_errno = ENOTSUP; 1872 goto error; 1873 } 1874 /* 1875 * Determine the number of SGEs needed for a full packet 1876 * and round it to the next power of two. 1877 */ 1878 sges_n = log2above(tmpl->rxq.rxseg_n); 1879 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) { 1880 DRV_LOG(ERR, 1881 "port %u too many SGEs (%u) needed to handle" 1882 " requested maximum packet size %u, the maximum" 1883 " supported are %u", dev->data->port_id, 1884 1 << sges_n, max_rx_pktlen, 1885 1u << MLX5_MAX_LOG_RQ_SEGS); 1886 rte_errno = ENOTSUP; 1887 goto error; 1888 } 1889 tmpl->rxq.sges_n = sges_n; 1890 max_lro_size = max_rx_pktlen; 1891 } 1892 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1893 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1894 if (desc % (1 << tmpl->rxq.sges_n)) { 1895 DRV_LOG(ERR, 1896 "port %u number of Rx queue descriptors (%u) is not a" 1897 " multiple of SGEs per packet (%u)", 1898 dev->data->port_id, 1899 desc, 1900 1 << tmpl->rxq.sges_n); 1901 rte_errno = EINVAL; 1902 goto error; 1903 } 1904 mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size); 1905 /* Toggle RX checksum offload if hardware supports it. */ 1906 tmpl->rxq.csum = !!(offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM); 1907 /* Configure Rx timestamp. */ 1908 tmpl->rxq.hw_timestamp = !!(offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP); 1909 tmpl->rxq.timestamp_rx_flag = 0; 1910 if (tmpl->rxq.hw_timestamp && rte_mbuf_dyn_rx_timestamp_register( 1911 &tmpl->rxq.timestamp_offset, 1912 &tmpl->rxq.timestamp_rx_flag) != 0) { 1913 DRV_LOG(ERR, "Cannot register Rx timestamp field/flag"); 1914 goto error; 1915 } 1916 /* Configure VLAN stripping. */ 1917 tmpl->rxq.vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 1918 /* By default, FCS (CRC) is stripped by hardware. */ 1919 tmpl->rxq.crc_present = 0; 1920 tmpl->rxq.lro = lro_on_queue; 1921 if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) { 1922 if (priv->sh->config.hw_fcs_strip) { 1923 /* 1924 * RQs used for LRO-enabled TIRs should not be 1925 * configured to scatter the FCS. 1926 */ 1927 if (lro_on_queue) 1928 DRV_LOG(WARNING, 1929 "port %u CRC stripping has been " 1930 "disabled but will still be performed " 1931 "by hardware, because LRO is enabled", 1932 dev->data->port_id); 1933 else 1934 tmpl->rxq.crc_present = 1; 1935 } else { 1936 DRV_LOG(WARNING, 1937 "port %u CRC stripping has been disabled but will" 1938 " still be performed by hardware, make sure MLNX_OFED" 1939 " and firmware are up to date", 1940 dev->data->port_id); 1941 } 1942 } 1943 DRV_LOG(DEBUG, 1944 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1945 " incoming frames to hide it", 1946 dev->data->port_id, 1947 tmpl->rxq.crc_present ? "disabled" : "enabled", 1948 tmpl->rxq.crc_present << 2); 1949 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1950 (!!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS)); 1951 /* Save port ID. */ 1952 tmpl->rxq.port_id = dev->data->port_id; 1953 tmpl->sh = priv->sh; 1954 tmpl->rxq.mp = rx_seg[0].mp; 1955 tmpl->rxq.elts_n = log2above(desc); 1956 tmpl->rxq.rq_repl_thresh = MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n); 1957 tmpl->rxq.elts = (struct rte_mbuf *(*)[desc_n])(tmpl + 1); 1958 tmpl->rxq.mprq_bufs = 1959 (struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n); 1960 tmpl->rxq.idx = idx; 1961 if (conf->share_group > 0) { 1962 tmpl->rxq.shared = 1; 1963 tmpl->share_group = conf->share_group; 1964 tmpl->share_qid = conf->share_qid; 1965 LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry); 1966 } 1967 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1968 return tmpl; 1969 error: 1970 mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh); 1971 mlx5_free(tmpl); 1972 return NULL; 1973 } 1974 1975 /** 1976 * Create a DPDK Rx hairpin queue. 1977 * 1978 * @param dev 1979 * Pointer to Ethernet device. 1980 * @param rxq 1981 * RX queue. 1982 * @param desc 1983 * Number of descriptors to configure in queue. 1984 * @param hairpin_conf 1985 * The hairpin binding configuration. 1986 * 1987 * @return 1988 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1989 */ 1990 struct mlx5_rxq_ctrl * 1991 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq, 1992 uint16_t desc, 1993 const struct rte_eth_hairpin_conf *hairpin_conf) 1994 { 1995 uint16_t idx = rxq->idx; 1996 struct mlx5_priv *priv = dev->data->dev_private; 1997 struct mlx5_rxq_ctrl *tmpl; 1998 1999 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 2000 SOCKET_ID_ANY); 2001 if (!tmpl) { 2002 rte_errno = ENOMEM; 2003 return NULL; 2004 } 2005 LIST_INIT(&tmpl->owners); 2006 rxq->ctrl = tmpl; 2007 LIST_INSERT_HEAD(&tmpl->owners, rxq, owner_entry); 2008 tmpl->is_hairpin = true; 2009 tmpl->socket = SOCKET_ID_ANY; 2010 tmpl->rxq.rss_hash = 0; 2011 tmpl->rxq.port_id = dev->data->port_id; 2012 tmpl->sh = priv->sh; 2013 tmpl->rxq.mp = NULL; 2014 tmpl->rxq.elts_n = log2above(desc); 2015 tmpl->rxq.elts = NULL; 2016 tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 }; 2017 tmpl->rxq.idx = idx; 2018 rxq->hairpin_conf = *hairpin_conf; 2019 mlx5_rxq_ref(dev, idx); 2020 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 2021 return tmpl; 2022 } 2023 2024 /** 2025 * Increase Rx queue reference count. 2026 * 2027 * @param dev 2028 * Pointer to Ethernet device. 2029 * @param idx 2030 * RX queue index. 2031 * 2032 * @return 2033 * A pointer to the queue if it exists, NULL otherwise. 2034 */ 2035 struct mlx5_rxq_priv * 2036 mlx5_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2037 { 2038 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2039 2040 if (rxq != NULL) 2041 __atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED); 2042 return rxq; 2043 } 2044 2045 /** 2046 * Dereference a Rx queue. 2047 * 2048 * @param dev 2049 * Pointer to Ethernet device. 2050 * @param idx 2051 * RX queue index. 2052 * 2053 * @return 2054 * Updated reference count. 2055 */ 2056 uint32_t 2057 mlx5_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2058 { 2059 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2060 2061 if (rxq == NULL) 2062 return 0; 2063 return __atomic_fetch_sub(&rxq->refcnt, 1, __ATOMIC_RELAXED) - 1; 2064 } 2065 2066 /** 2067 * Get a Rx queue. 2068 * 2069 * @param dev 2070 * Pointer to Ethernet device. 2071 * @param idx 2072 * RX queue index. 2073 * 2074 * @return 2075 * A pointer to the queue if it exists, NULL otherwise. 2076 */ 2077 struct mlx5_rxq_priv * 2078 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2079 { 2080 struct mlx5_priv *priv = dev->data->dev_private; 2081 2082 if (idx >= priv->rxqs_n) 2083 return NULL; 2084 MLX5_ASSERT(priv->rxq_privs != NULL); 2085 return (*priv->rxq_privs)[idx]; 2086 } 2087 2088 /** 2089 * Get Rx queue shareable control. 2090 * 2091 * @param dev 2092 * Pointer to Ethernet device. 2093 * @param idx 2094 * RX queue index. 2095 * 2096 * @return 2097 * A pointer to the queue control if it exists, NULL otherwise. 2098 */ 2099 struct mlx5_rxq_ctrl * 2100 mlx5_rxq_ctrl_get(struct rte_eth_dev *dev, uint16_t idx) 2101 { 2102 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2103 2104 return rxq == NULL ? NULL : rxq->ctrl; 2105 } 2106 2107 /** 2108 * Get Rx queue shareable data. 2109 * 2110 * @param dev 2111 * Pointer to Ethernet device. 2112 * @param idx 2113 * RX queue index. 2114 * 2115 * @return 2116 * A pointer to the queue data if it exists, NULL otherwise. 2117 */ 2118 struct mlx5_rxq_data * 2119 mlx5_rxq_data_get(struct rte_eth_dev *dev, uint16_t idx) 2120 { 2121 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2122 2123 return rxq == NULL ? NULL : &rxq->ctrl->rxq; 2124 } 2125 2126 /** 2127 * Increase an external Rx queue reference count. 2128 * 2129 * @param dev 2130 * Pointer to Ethernet device. 2131 * @param idx 2132 * External RX queue index. 2133 * 2134 * @return 2135 * A pointer to the queue if it exists, NULL otherwise. 2136 */ 2137 struct mlx5_external_rxq * 2138 mlx5_ext_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2139 { 2140 struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx); 2141 2142 __atomic_fetch_add(&rxq->refcnt, 1, __ATOMIC_RELAXED); 2143 return rxq; 2144 } 2145 2146 /** 2147 * Decrease an external Rx queue reference count. 2148 * 2149 * @param dev 2150 * Pointer to Ethernet device. 2151 * @param idx 2152 * External RX queue index. 2153 * 2154 * @return 2155 * Updated reference count. 2156 */ 2157 uint32_t 2158 mlx5_ext_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2159 { 2160 struct mlx5_external_rxq *rxq = mlx5_ext_rxq_get(dev, idx); 2161 2162 return __atomic_fetch_sub(&rxq->refcnt, 1, __ATOMIC_RELAXED) - 1; 2163 } 2164 2165 /** 2166 * Get an external Rx queue. 2167 * 2168 * @param dev 2169 * Pointer to Ethernet device. 2170 * @param idx 2171 * External Rx queue index. 2172 * 2173 * @return 2174 * A pointer to the queue if it exists, NULL otherwise. 2175 */ 2176 struct mlx5_external_rxq * 2177 mlx5_ext_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2178 { 2179 struct mlx5_priv *priv = dev->data->dev_private; 2180 2181 MLX5_ASSERT(mlx5_is_external_rxq(dev, idx)); 2182 return &priv->ext_rxqs[idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 2183 } 2184 2185 /** 2186 * Dereference a list of Rx queues. 2187 * 2188 * @param dev 2189 * Pointer to Ethernet device. 2190 * @param queues 2191 * List of Rx queues to deref. 2192 * @param queues_n 2193 * Number of queues in the array. 2194 */ 2195 static void 2196 mlx5_rxqs_deref(struct rte_eth_dev *dev, uint16_t *queues, 2197 const uint32_t queues_n) 2198 { 2199 uint32_t i; 2200 2201 for (i = 0; i < queues_n; i++) { 2202 if (mlx5_is_external_rxq(dev, queues[i])) 2203 claim_nonzero(mlx5_ext_rxq_deref(dev, queues[i])); 2204 else 2205 claim_nonzero(mlx5_rxq_deref(dev, queues[i])); 2206 } 2207 } 2208 2209 /** 2210 * Increase reference count for list of Rx queues. 2211 * 2212 * @param dev 2213 * Pointer to Ethernet device. 2214 * @param queues 2215 * List of Rx queues to ref. 2216 * @param queues_n 2217 * Number of queues in the array. 2218 * 2219 * @return 2220 * 0 on success, a negative errno value otherwise and rte_errno is set. 2221 */ 2222 static int 2223 mlx5_rxqs_ref(struct rte_eth_dev *dev, uint16_t *queues, 2224 const uint32_t queues_n) 2225 { 2226 uint32_t i; 2227 2228 for (i = 0; i != queues_n; ++i) { 2229 if (mlx5_is_external_rxq(dev, queues[i])) { 2230 if (mlx5_ext_rxq_ref(dev, queues[i]) == NULL) 2231 goto error; 2232 } else { 2233 if (mlx5_rxq_ref(dev, queues[i]) == NULL) 2234 goto error; 2235 } 2236 } 2237 return 0; 2238 error: 2239 mlx5_rxqs_deref(dev, queues, i); 2240 rte_errno = EINVAL; 2241 return -rte_errno; 2242 } 2243 2244 /** 2245 * Release a Rx queue. 2246 * 2247 * @param dev 2248 * Pointer to Ethernet device. 2249 * @param idx 2250 * RX queue index. 2251 * 2252 * @return 2253 * 1 while a reference on it exists, 0 when freed. 2254 */ 2255 int 2256 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 2257 { 2258 struct mlx5_priv *priv = dev->data->dev_private; 2259 struct mlx5_rxq_priv *rxq; 2260 struct mlx5_rxq_ctrl *rxq_ctrl; 2261 uint32_t refcnt; 2262 2263 if (priv->rxq_privs == NULL) 2264 return 0; 2265 rxq = mlx5_rxq_get(dev, idx); 2266 if (rxq == NULL || rxq->refcnt == 0) 2267 return 0; 2268 rxq_ctrl = rxq->ctrl; 2269 refcnt = mlx5_rxq_deref(dev, idx); 2270 if (refcnt > 1) { 2271 return 1; 2272 } else if (refcnt == 1) { /* RxQ stopped. */ 2273 priv->obj_ops.rxq_obj_release(rxq); 2274 if (!rxq_ctrl->started && rxq_ctrl->obj != NULL) { 2275 LIST_REMOVE(rxq_ctrl->obj, next); 2276 mlx5_free(rxq_ctrl->obj); 2277 rxq_ctrl->obj = NULL; 2278 } 2279 if (!rxq_ctrl->is_hairpin) { 2280 if (!rxq_ctrl->started) 2281 rxq_free_elts(rxq_ctrl); 2282 dev->data->rx_queue_state[idx] = 2283 RTE_ETH_QUEUE_STATE_STOPPED; 2284 } 2285 } else { /* Refcnt zero, closing device. */ 2286 LIST_REMOVE(rxq_ctrl, next); 2287 LIST_REMOVE(rxq, owner_entry); 2288 if (LIST_EMPTY(&rxq_ctrl->owners)) { 2289 if (!rxq_ctrl->is_hairpin) 2290 mlx5_mr_btree_free 2291 (&rxq_ctrl->rxq.mr_ctrl.cache_bh); 2292 if (rxq_ctrl->rxq.shared) 2293 LIST_REMOVE(rxq_ctrl, share_entry); 2294 mlx5_free(rxq_ctrl); 2295 } 2296 dev->data->rx_queues[idx] = NULL; 2297 mlx5_free(rxq); 2298 (*priv->rxq_privs)[idx] = NULL; 2299 } 2300 return 0; 2301 } 2302 2303 /** 2304 * Verify the Rx Queue list is empty 2305 * 2306 * @param dev 2307 * Pointer to Ethernet device. 2308 * 2309 * @return 2310 * The number of object not released. 2311 */ 2312 int 2313 mlx5_rxq_verify(struct rte_eth_dev *dev) 2314 { 2315 struct mlx5_priv *priv = dev->data->dev_private; 2316 struct mlx5_rxq_ctrl *rxq_ctrl; 2317 int ret = 0; 2318 2319 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 2320 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 2321 dev->data->port_id, rxq_ctrl->rxq.idx); 2322 ++ret; 2323 } 2324 return ret; 2325 } 2326 2327 /** 2328 * Verify the external Rx Queue list is empty. 2329 * 2330 * @param dev 2331 * Pointer to Ethernet device. 2332 * 2333 * @return 2334 * The number of object not released. 2335 */ 2336 int 2337 mlx5_ext_rxq_verify(struct rte_eth_dev *dev) 2338 { 2339 struct mlx5_priv *priv = dev->data->dev_private; 2340 struct mlx5_external_rxq *rxq; 2341 uint32_t i; 2342 int ret = 0; 2343 2344 if (priv->ext_rxqs == NULL) 2345 return 0; 2346 2347 for (i = RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { 2348 rxq = mlx5_ext_rxq_get(dev, i); 2349 if (rxq->refcnt < 2) 2350 continue; 2351 DRV_LOG(DEBUG, "Port %u external RxQ %u still referenced.", 2352 dev->data->port_id, i); 2353 ++ret; 2354 } 2355 return ret; 2356 } 2357 2358 /** 2359 * Check whether RxQ type is Hairpin. 2360 * 2361 * @param dev 2362 * Pointer to Ethernet device. 2363 * @param idx 2364 * Rx queue index. 2365 * 2366 * @return 2367 * True if Rx queue type is Hairpin, otherwise False. 2368 */ 2369 bool 2370 mlx5_rxq_is_hairpin(struct rte_eth_dev *dev, uint16_t idx) 2371 { 2372 struct mlx5_rxq_ctrl *rxq_ctrl; 2373 2374 if (mlx5_is_external_rxq(dev, idx)) 2375 return false; 2376 rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx); 2377 return (rxq_ctrl != NULL && rxq_ctrl->is_hairpin); 2378 } 2379 2380 /* 2381 * Get a Rx hairpin queue configuration. 2382 * 2383 * @param dev 2384 * Pointer to Ethernet device. 2385 * @param idx 2386 * Rx queue index. 2387 * 2388 * @return 2389 * Pointer to the configuration if a hairpin RX queue, otherwise NULL. 2390 */ 2391 const struct rte_eth_hairpin_conf * 2392 mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx) 2393 { 2394 if (mlx5_rxq_is_hairpin(dev, idx)) { 2395 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2396 2397 return rxq != NULL ? &rxq->hairpin_conf : NULL; 2398 } 2399 return NULL; 2400 } 2401 2402 /** 2403 * Match queues listed in arguments to queues contained in indirection table 2404 * object. 2405 * 2406 * @param ind_tbl 2407 * Pointer to indirection table to match. 2408 * @param queues 2409 * Queues to match to queues in indirection table. 2410 * @param queues_n 2411 * Number of queues in the array. 2412 * 2413 * @return 2414 * 1 if all queues in indirection table match 0 otherwise. 2415 */ 2416 static int 2417 mlx5_ind_table_obj_match_queues(const struct mlx5_ind_table_obj *ind_tbl, 2418 const uint16_t *queues, uint32_t queues_n) 2419 { 2420 return (ind_tbl->queues_n == queues_n) && 2421 (!memcmp(ind_tbl->queues, queues, 2422 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))); 2423 } 2424 2425 /** 2426 * Get an indirection table. 2427 * 2428 * @param dev 2429 * Pointer to Ethernet device. 2430 * @param queues 2431 * Queues entering in the indirection table. 2432 * @param queues_n 2433 * Number of queues in the array. 2434 * 2435 * @return 2436 * An indirection table if found. 2437 */ 2438 struct mlx5_ind_table_obj * 2439 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues, 2440 uint32_t queues_n) 2441 { 2442 struct mlx5_priv *priv = dev->data->dev_private; 2443 struct mlx5_ind_table_obj *ind_tbl; 2444 2445 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2446 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2447 if ((ind_tbl->queues_n == queues_n) && 2448 (memcmp(ind_tbl->queues, queues, 2449 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 2450 == 0)) { 2451 __atomic_fetch_add(&ind_tbl->refcnt, 1, 2452 __ATOMIC_RELAXED); 2453 break; 2454 } 2455 } 2456 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2457 return ind_tbl; 2458 } 2459 2460 /** 2461 * Release an indirection table. 2462 * 2463 * @param dev 2464 * Pointer to Ethernet device. 2465 * @param ind_table 2466 * Indirection table to release. 2467 * @param deref_rxqs 2468 * If true, then dereference RX queues related to indirection table. 2469 * Otherwise, no additional action will be taken. 2470 * 2471 * @return 2472 * 1 while a reference on it exists, 0 when freed. 2473 */ 2474 int 2475 mlx5_ind_table_obj_release(struct rte_eth_dev *dev, 2476 struct mlx5_ind_table_obj *ind_tbl, 2477 bool deref_rxqs) 2478 { 2479 struct mlx5_priv *priv = dev->data->dev_private; 2480 unsigned int ret; 2481 2482 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2483 ret = __atomic_fetch_sub(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED) - 1; 2484 if (!ret) 2485 LIST_REMOVE(ind_tbl, next); 2486 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2487 if (ret) 2488 return 1; 2489 priv->obj_ops.ind_table_destroy(ind_tbl); 2490 if (deref_rxqs) 2491 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2492 mlx5_free(ind_tbl); 2493 return 0; 2494 } 2495 2496 /** 2497 * Verify the Rx Queue list is empty 2498 * 2499 * @param dev 2500 * Pointer to Ethernet device. 2501 * 2502 * @return 2503 * The number of object not released. 2504 */ 2505 int 2506 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev) 2507 { 2508 struct mlx5_priv *priv = dev->data->dev_private; 2509 struct mlx5_ind_table_obj *ind_tbl; 2510 int ret = 0; 2511 2512 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2513 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2514 DRV_LOG(DEBUG, 2515 "port %u indirection table obj %p still referenced", 2516 dev->data->port_id, (void *)ind_tbl); 2517 ++ret; 2518 } 2519 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2520 return ret; 2521 } 2522 2523 /** 2524 * Setup an indirection table structure fields. 2525 * 2526 * @param dev 2527 * Pointer to Ethernet device. 2528 * @param ind_table 2529 * Indirection table to modify. 2530 * @param ref_qs 2531 * Whether to increment RxQ reference counters. 2532 * 2533 * @return 2534 * 0 on success, a negative errno value otherwise and rte_errno is set. 2535 */ 2536 int 2537 mlx5_ind_table_obj_setup(struct rte_eth_dev *dev, 2538 struct mlx5_ind_table_obj *ind_tbl, 2539 bool ref_qs) 2540 { 2541 struct mlx5_priv *priv = dev->data->dev_private; 2542 uint32_t queues_n = ind_tbl->queues_n; 2543 int ret; 2544 const unsigned int n = rte_is_power_of_2(queues_n) ? 2545 log2above(queues_n) : 2546 log2above(priv->sh->dev_cap.ind_table_max_size); 2547 2548 if (ref_qs && mlx5_rxqs_ref(dev, ind_tbl->queues, queues_n) < 0) { 2549 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2550 dev->data->port_id); 2551 return -rte_errno; 2552 } 2553 ret = priv->obj_ops.ind_table_new(dev, n, ind_tbl); 2554 if (ret) { 2555 DRV_LOG(DEBUG, "Port %u cannot create a new indirection table.", 2556 dev->data->port_id); 2557 if (ref_qs) { 2558 int err = rte_errno; 2559 2560 mlx5_rxqs_deref(dev, ind_tbl->queues, queues_n); 2561 rte_errno = err; 2562 } 2563 return ret; 2564 } 2565 __atomic_fetch_add(&ind_tbl->refcnt, 1, __ATOMIC_RELAXED); 2566 return 0; 2567 } 2568 2569 /** 2570 * Create an indirection table. 2571 * 2572 * @param dev 2573 * Pointer to Ethernet device. 2574 * @param queues 2575 * Queues entering in the indirection table. 2576 * @param queues_n 2577 * Number of queues in the array. 2578 * @param standalone 2579 * Indirection table for Standalone queue. 2580 * @param ref_qs 2581 * Whether to increment RxQ reference counters. 2582 * 2583 * @return 2584 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 2585 */ 2586 struct mlx5_ind_table_obj * 2587 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues, 2588 uint32_t queues_n, bool standalone, bool ref_qs) 2589 { 2590 struct mlx5_priv *priv = dev->data->dev_private; 2591 struct mlx5_ind_table_obj *ind_tbl; 2592 int ret; 2593 uint32_t max_queues_n = priv->rxqs_n > queues_n ? priv->rxqs_n : queues_n; 2594 2595 /* 2596 * Allocate maximum queues for shared action as queue number 2597 * maybe modified later. 2598 */ 2599 ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) + 2600 (standalone ? max_queues_n : queues_n) * 2601 sizeof(uint16_t), 0, SOCKET_ID_ANY); 2602 if (!ind_tbl) { 2603 rte_errno = ENOMEM; 2604 return NULL; 2605 } 2606 ind_tbl->queues_n = queues_n; 2607 ind_tbl->queues = (uint16_t *)(ind_tbl + 1); 2608 memcpy(ind_tbl->queues, queues, queues_n * sizeof(*queues)); 2609 ret = mlx5_ind_table_obj_setup(dev, ind_tbl, ref_qs); 2610 if (ret < 0) { 2611 mlx5_free(ind_tbl); 2612 return NULL; 2613 } 2614 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2615 if (!standalone) 2616 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 2617 else 2618 LIST_INSERT_HEAD(&priv->standalone_ind_tbls, ind_tbl, next); 2619 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2620 2621 return ind_tbl; 2622 } 2623 2624 static int 2625 mlx5_ind_table_obj_check_standalone(struct rte_eth_dev *dev __rte_unused, 2626 struct mlx5_ind_table_obj *ind_tbl) 2627 { 2628 uint32_t refcnt; 2629 2630 refcnt = __atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED); 2631 if (refcnt <= 1) 2632 return 0; 2633 /* 2634 * Modification of indirection tables having more than 1 2635 * reference is unsupported. 2636 */ 2637 DRV_LOG(DEBUG, 2638 "Port %u cannot modify indirection table %p (refcnt %u > 1).", 2639 dev->data->port_id, (void *)ind_tbl, refcnt); 2640 rte_errno = EINVAL; 2641 return -rte_errno; 2642 } 2643 2644 /** 2645 * Modify an indirection table. 2646 * 2647 * @param dev 2648 * Pointer to Ethernet device. 2649 * @param ind_table 2650 * Indirection table to modify. 2651 * @param queues 2652 * Queues replacement for the indirection table. 2653 * @param queues_n 2654 * Number of queues in the array. 2655 * @param standalone 2656 * Indirection table for Standalone queue. 2657 * @param ref_new_qs 2658 * Whether to increment new RxQ set reference counters. 2659 * @param deref_old_qs 2660 * Whether to decrement old RxQ set reference counters. 2661 * 2662 * @return 2663 * 0 on success, a negative errno value otherwise and rte_errno is set. 2664 */ 2665 int 2666 mlx5_ind_table_obj_modify(struct rte_eth_dev *dev, 2667 struct mlx5_ind_table_obj *ind_tbl, 2668 uint16_t *queues, const uint32_t queues_n, 2669 bool standalone, bool ref_new_qs, bool deref_old_qs) 2670 { 2671 struct mlx5_priv *priv = dev->data->dev_private; 2672 int ret; 2673 const unsigned int n = rte_is_power_of_2(queues_n) ? 2674 log2above(queues_n) : 2675 log2above(priv->sh->dev_cap.ind_table_max_size); 2676 2677 MLX5_ASSERT(standalone); 2678 RTE_SET_USED(standalone); 2679 if (mlx5_ind_table_obj_check_standalone(dev, ind_tbl) < 0) 2680 return -rte_errno; 2681 if (ref_new_qs && mlx5_rxqs_ref(dev, queues, queues_n) < 0) { 2682 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2683 dev->data->port_id); 2684 return -rte_errno; 2685 } 2686 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2687 ret = priv->obj_ops.ind_table_modify(dev, n, queues, queues_n, ind_tbl); 2688 if (ret) { 2689 DRV_LOG(DEBUG, "Port %u cannot modify indirection table.", 2690 dev->data->port_id); 2691 if (ref_new_qs) { 2692 int err = rte_errno; 2693 2694 mlx5_rxqs_deref(dev, queues, queues_n); 2695 rte_errno = err; 2696 } 2697 return ret; 2698 } 2699 if (deref_old_qs) 2700 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2701 ind_tbl->queues_n = queues_n; 2702 ind_tbl->queues = queues; 2703 return 0; 2704 } 2705 2706 /** 2707 * Attach an indirection table to its queues. 2708 * 2709 * @param dev 2710 * Pointer to Ethernet device. 2711 * @param ind_table 2712 * Indirection table to attach. 2713 * 2714 * @return 2715 * 0 on success, a negative errno value otherwise and rte_errno is set. 2716 */ 2717 int 2718 mlx5_ind_table_obj_attach(struct rte_eth_dev *dev, 2719 struct mlx5_ind_table_obj *ind_tbl) 2720 { 2721 int ret; 2722 2723 ret = mlx5_ind_table_obj_modify(dev, ind_tbl, ind_tbl->queues, 2724 ind_tbl->queues_n, 2725 true /* standalone */, 2726 true /* ref_new_qs */, 2727 false /* deref_old_qs */); 2728 if (ret != 0) 2729 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2730 dev->data->port_id, (void *)ind_tbl); 2731 return ret; 2732 } 2733 2734 /** 2735 * Detach an indirection table from its queues. 2736 * 2737 * @param dev 2738 * Pointer to Ethernet device. 2739 * @param ind_table 2740 * Indirection table to detach. 2741 * 2742 * @return 2743 * 0 on success, a negative errno value otherwise and rte_errno is set. 2744 */ 2745 int 2746 mlx5_ind_table_obj_detach(struct rte_eth_dev *dev, 2747 struct mlx5_ind_table_obj *ind_tbl) 2748 { 2749 struct mlx5_priv *priv = dev->data->dev_private; 2750 const unsigned int n = rte_is_power_of_2(ind_tbl->queues_n) ? 2751 log2above(ind_tbl->queues_n) : 2752 log2above(priv->sh->dev_cap.ind_table_max_size); 2753 unsigned int i; 2754 int ret; 2755 2756 ret = mlx5_ind_table_obj_check_standalone(dev, ind_tbl); 2757 if (ret != 0) 2758 return ret; 2759 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2760 ret = priv->obj_ops.ind_table_modify(dev, n, NULL, 0, ind_tbl); 2761 if (ret != 0) { 2762 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2763 dev->data->port_id, (void *)ind_tbl); 2764 return ret; 2765 } 2766 for (i = 0; i < ind_tbl->queues_n; i++) 2767 mlx5_rxq_release(dev, ind_tbl->queues[i]); 2768 return ret; 2769 } 2770 2771 int 2772 mlx5_hrxq_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry, 2773 void *cb_ctx) 2774 { 2775 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2776 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2777 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2778 2779 return (hrxq->rss_key_len != rss_desc->key_len || 2780 hrxq->symmetric_hash_function != rss_desc->symmetric_hash_function || 2781 memcmp(hrxq->rss_key, rss_desc->key, rss_desc->key_len) || 2782 hrxq->hws_flags != rss_desc->hws_flags || 2783 hrxq->hash_fields != rss_desc->hash_fields || 2784 hrxq->ind_table->queues_n != rss_desc->queue_num || 2785 memcmp(hrxq->ind_table->queues, rss_desc->queue, 2786 rss_desc->queue_num * sizeof(rss_desc->queue[0]))); 2787 } 2788 2789 /** 2790 * Modify an Rx Hash queue configuration. 2791 * 2792 * @param dev 2793 * Pointer to Ethernet device. 2794 * @param hrxq 2795 * Index to Hash Rx queue to modify. 2796 * @param rss_key 2797 * RSS key for the Rx hash queue. 2798 * @param rss_key_len 2799 * RSS key length. 2800 * @param hash_fields 2801 * Verbs protocol hash field to make the RSS on. 2802 * @param queues 2803 * Queues entering in hash queue. In case of empty hash_fields only the 2804 * first queue index will be taken for the indirection table. 2805 * @param queues_n 2806 * Number of queues. 2807 * 2808 * @return 2809 * 0 on success, a negative errno value otherwise and rte_errno is set. 2810 */ 2811 int 2812 mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hrxq_idx, 2813 const uint8_t *rss_key, uint32_t rss_key_len, 2814 uint64_t hash_fields, bool symmetric_hash_function, 2815 const uint16_t *queues, uint32_t queues_n) 2816 { 2817 int err; 2818 struct mlx5_ind_table_obj *ind_tbl = NULL; 2819 struct mlx5_priv *priv = dev->data->dev_private; 2820 struct mlx5_hrxq *hrxq = 2821 mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2822 bool dev_started = !!dev->data->dev_started; 2823 int ret; 2824 2825 if (!hrxq) { 2826 rte_errno = EINVAL; 2827 return -rte_errno; 2828 } 2829 /* validations */ 2830 if (hrxq->rss_key_len != rss_key_len) { 2831 /* rss_key_len is fixed size 40 byte & not supposed to change */ 2832 rte_errno = EINVAL; 2833 return -rte_errno; 2834 } 2835 queues_n = hash_fields ? queues_n : 1; 2836 if (mlx5_ind_table_obj_match_queues(hrxq->ind_table, 2837 queues, queues_n)) { 2838 ind_tbl = hrxq->ind_table; 2839 } else { 2840 if (hrxq->standalone) { 2841 /* 2842 * Replacement of indirection table unsupported for 2843 * standalone hrxq objects (used by shared RSS). 2844 */ 2845 rte_errno = ENOTSUP; 2846 return -rte_errno; 2847 } 2848 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2849 if (!ind_tbl) 2850 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2851 hrxq->standalone, 2852 dev_started); 2853 } 2854 if (!ind_tbl) { 2855 rte_errno = ENOMEM; 2856 return -rte_errno; 2857 } 2858 MLX5_ASSERT(priv->obj_ops.hrxq_modify); 2859 ret = priv->obj_ops.hrxq_modify(dev, hrxq, rss_key, hash_fields, 2860 symmetric_hash_function, ind_tbl); 2861 if (ret) { 2862 rte_errno = errno; 2863 goto error; 2864 } 2865 if (ind_tbl != hrxq->ind_table) { 2866 MLX5_ASSERT(!hrxq->standalone); 2867 mlx5_ind_table_obj_release(dev, hrxq->ind_table, true); 2868 hrxq->ind_table = ind_tbl; 2869 } 2870 hrxq->hash_fields = hash_fields; 2871 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2872 return 0; 2873 error: 2874 err = rte_errno; 2875 if (ind_tbl != hrxq->ind_table) { 2876 MLX5_ASSERT(!hrxq->standalone); 2877 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2878 } 2879 rte_errno = err; 2880 return -rte_errno; 2881 } 2882 2883 static void 2884 __mlx5_hrxq_remove(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 2885 { 2886 struct mlx5_priv *priv = dev->data->dev_private; 2887 2888 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2889 if (hrxq->hws_flags) 2890 mlx5dr_action_destroy(hrxq->action); 2891 else 2892 mlx5_glue->destroy_flow_action(hrxq->action); 2893 #endif 2894 priv->obj_ops.hrxq_destroy(hrxq); 2895 if (!hrxq->standalone) { 2896 mlx5_ind_table_obj_release(dev, hrxq->ind_table, 2897 hrxq->hws_flags ? 2898 (!!dev->data->dev_started) : true); 2899 } 2900 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 2901 } 2902 2903 /** 2904 * Release the hash Rx queue. 2905 * 2906 * @param dev 2907 * Pointer to Ethernet device. 2908 * @param hrxq 2909 * Index to Hash Rx queue to release. 2910 * 2911 * @param list 2912 * mlx5 list pointer. 2913 * @param entry 2914 * Hash queue entry pointer. 2915 */ 2916 void 2917 mlx5_hrxq_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry) 2918 { 2919 struct rte_eth_dev *dev = tool_ctx; 2920 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2921 2922 __mlx5_hrxq_remove(dev, hrxq); 2923 } 2924 2925 static struct mlx5_hrxq * 2926 __mlx5_hrxq_create(struct rte_eth_dev *dev, 2927 struct mlx5_flow_rss_desc *rss_desc) 2928 { 2929 struct mlx5_priv *priv = dev->data->dev_private; 2930 const uint8_t *rss_key = rss_desc->key; 2931 uint32_t rss_key_len = rss_desc->key_len; 2932 bool standalone = !!rss_desc->shared_rss; 2933 const uint16_t *queues = 2934 standalone ? rss_desc->const_q : rss_desc->queue; 2935 uint32_t queues_n = rss_desc->queue_num; 2936 struct mlx5_hrxq *hrxq = NULL; 2937 uint32_t hrxq_idx = 0; 2938 struct mlx5_ind_table_obj *ind_tbl = rss_desc->ind_tbl; 2939 int ret; 2940 2941 queues_n = rss_desc->hash_fields ? queues_n : 1; 2942 if (!ind_tbl && !rss_desc->hws_flags) 2943 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2944 if (!ind_tbl) 2945 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2946 standalone || 2947 rss_desc->hws_flags, 2948 !!dev->data->dev_started); 2949 if (!ind_tbl) 2950 return NULL; 2951 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 2952 if (!hrxq) 2953 goto error; 2954 hrxq->standalone = standalone; 2955 hrxq->idx = hrxq_idx; 2956 hrxq->ind_table = ind_tbl; 2957 hrxq->rss_key_len = rss_key_len; 2958 hrxq->hash_fields = rss_desc->hash_fields; 2959 hrxq->hws_flags = rss_desc->hws_flags; 2960 hrxq->symmetric_hash_function = rss_desc->symmetric_hash_function; 2961 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2962 ret = priv->obj_ops.hrxq_new(dev, hrxq, rss_desc->tunnel); 2963 if (ret < 0) 2964 goto error; 2965 return hrxq; 2966 error: 2967 if (!rss_desc->ind_tbl) 2968 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2969 if (hrxq) 2970 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2971 return NULL; 2972 } 2973 2974 struct mlx5_list_entry * 2975 mlx5_hrxq_create_cb(void *tool_ctx, void *cb_ctx) 2976 { 2977 struct rte_eth_dev *dev = tool_ctx; 2978 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2979 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2980 struct mlx5_hrxq *hrxq; 2981 2982 hrxq = __mlx5_hrxq_create(dev, rss_desc); 2983 return hrxq ? &hrxq->entry : NULL; 2984 } 2985 2986 struct mlx5_list_entry * 2987 mlx5_hrxq_clone_cb(void *tool_ctx, struct mlx5_list_entry *entry, 2988 void *cb_ctx __rte_unused) 2989 { 2990 struct rte_eth_dev *dev = tool_ctx; 2991 struct mlx5_priv *priv = dev->data->dev_private; 2992 struct mlx5_hrxq *hrxq; 2993 uint32_t hrxq_idx = 0; 2994 2995 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 2996 if (!hrxq) 2997 return NULL; 2998 memcpy(hrxq, entry, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN); 2999 hrxq->idx = hrxq_idx; 3000 return &hrxq->entry; 3001 } 3002 3003 void 3004 mlx5_hrxq_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry) 3005 { 3006 struct rte_eth_dev *dev = tool_ctx; 3007 struct mlx5_priv *priv = dev->data->dev_private; 3008 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 3009 3010 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 3011 } 3012 3013 /** 3014 * Get an Rx Hash queue. 3015 * 3016 * @param dev 3017 * Pointer to Ethernet device. 3018 * @param rss_desc 3019 * RSS configuration for the Rx hash queue. 3020 * 3021 * @return 3022 * An hash Rx queue on success. 3023 */ 3024 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, 3025 struct mlx5_flow_rss_desc *rss_desc) 3026 { 3027 struct mlx5_priv *priv = dev->data->dev_private; 3028 struct mlx5_hrxq *hrxq = NULL; 3029 struct mlx5_list_entry *entry; 3030 struct mlx5_flow_cb_ctx ctx = { 3031 .data = rss_desc, 3032 }; 3033 3034 if (rss_desc->shared_rss) { 3035 hrxq = __mlx5_hrxq_create(dev, rss_desc); 3036 } else { 3037 entry = mlx5_list_register(priv->hrxqs, &ctx); 3038 if (!entry) 3039 return NULL; 3040 hrxq = container_of(entry, typeof(*hrxq), entry); 3041 } 3042 return hrxq; 3043 } 3044 3045 /** 3046 * Release the hash Rx queue. 3047 * 3048 * @param dev 3049 * Pointer to Ethernet device. 3050 * @param hrxq_idx 3051 * Hash Rx queue to release. 3052 * 3053 * @return 3054 * 1 while a reference on it exists, 0 when freed. 3055 */ 3056 int mlx5_hrxq_obj_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 3057 { 3058 struct mlx5_priv *priv = dev->data->dev_private; 3059 3060 if (!hrxq) 3061 return 0; 3062 if (!hrxq->standalone) 3063 return mlx5_list_unregister(priv->hrxqs, &hrxq->entry); 3064 __mlx5_hrxq_remove(dev, hrxq); 3065 return 0; 3066 } 3067 3068 /** 3069 * Release the hash Rx queue with index. 3070 * 3071 * @param dev 3072 * Pointer to Ethernet device. 3073 * @param hrxq_idx 3074 * Index to Hash Rx queue to release. 3075 * 3076 * @return 3077 * 1 while a reference on it exists, 0 when freed. 3078 */ 3079 int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx) 3080 { 3081 struct mlx5_priv *priv = dev->data->dev_private; 3082 struct mlx5_hrxq *hrxq; 3083 3084 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 3085 return mlx5_hrxq_obj_release(dev, hrxq); 3086 } 3087 3088 /** 3089 * Create a drop Rx Hash queue. 3090 * 3091 * @param dev 3092 * Pointer to Ethernet device. 3093 * 3094 * @return 3095 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 3096 */ 3097 struct mlx5_hrxq * 3098 mlx5_drop_action_create(struct rte_eth_dev *dev) 3099 { 3100 struct mlx5_priv *priv = dev->data->dev_private; 3101 struct mlx5_hrxq *hrxq = NULL; 3102 int ret; 3103 3104 if (priv->drop_queue.hrxq) 3105 return priv->drop_queue.hrxq; 3106 hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 3107 if (!hrxq) { 3108 DRV_LOG(WARNING, 3109 "Port %u cannot allocate memory for drop queue.", 3110 dev->data->port_id); 3111 rte_errno = ENOMEM; 3112 goto error; 3113 } 3114 priv->drop_queue.hrxq = hrxq; 3115 hrxq->ind_table = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq->ind_table), 3116 0, SOCKET_ID_ANY); 3117 if (!hrxq->ind_table) { 3118 rte_errno = ENOMEM; 3119 goto error; 3120 } 3121 ret = priv->obj_ops.drop_action_create(dev); 3122 if (ret < 0) 3123 goto error; 3124 return hrxq; 3125 error: 3126 if (hrxq) { 3127 if (hrxq->ind_table) 3128 mlx5_free(hrxq->ind_table); 3129 priv->drop_queue.hrxq = NULL; 3130 mlx5_free(hrxq); 3131 } 3132 return NULL; 3133 } 3134 3135 /** 3136 * Release a drop hash Rx queue. 3137 * 3138 * @param dev 3139 * Pointer to Ethernet device. 3140 */ 3141 void 3142 mlx5_drop_action_destroy(struct rte_eth_dev *dev) 3143 { 3144 struct mlx5_priv *priv = dev->data->dev_private; 3145 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 3146 3147 if (!priv->drop_queue.hrxq) 3148 return; 3149 priv->obj_ops.drop_action_destroy(dev); 3150 mlx5_free(priv->drop_queue.rxq); 3151 mlx5_free(hrxq->ind_table); 3152 mlx5_free(hrxq); 3153 priv->drop_queue.rxq = NULL; 3154 priv->drop_queue.hrxq = NULL; 3155 } 3156 3157 /** 3158 * Verify the Rx Queue list is empty 3159 * 3160 * @param dev 3161 * Pointer to Ethernet device. 3162 * 3163 * @return 3164 * The number of object not released. 3165 */ 3166 uint32_t 3167 mlx5_hrxq_verify(struct rte_eth_dev *dev) 3168 { 3169 struct mlx5_priv *priv = dev->data->dev_private; 3170 3171 return mlx5_list_get_entry_num(priv->hrxqs); 3172 } 3173 3174 /** 3175 * Set the Rx queue timestamp conversion parameters 3176 * 3177 * @param[in] dev 3178 * Pointer to the Ethernet device structure. 3179 */ 3180 void 3181 mlx5_rxq_timestamp_set(struct rte_eth_dev *dev) 3182 { 3183 struct mlx5_priv *priv = dev->data->dev_private; 3184 struct mlx5_dev_ctx_shared *sh = priv->sh; 3185 unsigned int i; 3186 3187 for (i = 0; i != priv->rxqs_n; ++i) { 3188 struct mlx5_rxq_data *data = mlx5_rxq_data_get(dev, i); 3189 3190 if (data == NULL) 3191 continue; 3192 data->sh = sh; 3193 data->rt_timestamp = sh->dev_cap.rt_timestamp; 3194 } 3195 } 3196 3197 /** 3198 * Validate given external RxQ rte_plow index, and get pointer to concurrent 3199 * external RxQ object to map/unmap. 3200 * 3201 * @param[in] port_id 3202 * The port identifier of the Ethernet device. 3203 * @param[in] dpdk_idx 3204 * Queue index in rte_flow. 3205 * 3206 * @return 3207 * Pointer to concurrent external RxQ on success, 3208 * NULL otherwise and rte_errno is set. 3209 */ 3210 static struct mlx5_external_rxq * 3211 mlx5_external_rx_queue_get_validate(uint16_t port_id, uint16_t dpdk_idx) 3212 { 3213 struct rte_eth_dev *dev; 3214 struct mlx5_priv *priv; 3215 3216 if (dpdk_idx < RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN) { 3217 DRV_LOG(ERR, "Queue index %u should be in range: [%u, %u].", 3218 dpdk_idx, RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN, UINT16_MAX); 3219 rte_errno = EINVAL; 3220 return NULL; 3221 } 3222 if (rte_eth_dev_is_valid_port(port_id) < 0) { 3223 DRV_LOG(ERR, "There is no Ethernet device for port %u.", 3224 port_id); 3225 rte_errno = ENODEV; 3226 return NULL; 3227 } 3228 dev = &rte_eth_devices[port_id]; 3229 priv = dev->data->dev_private; 3230 if (!mlx5_imported_pd_and_ctx(priv->sh->cdev)) { 3231 DRV_LOG(ERR, "Port %u " 3232 "external RxQ isn't supported on local PD and CTX.", 3233 port_id); 3234 rte_errno = ENOTSUP; 3235 return NULL; 3236 } 3237 if (!mlx5_devx_obj_ops_en(priv->sh)) { 3238 DRV_LOG(ERR, 3239 "Port %u external RxQ isn't supported by Verbs API.", 3240 port_id); 3241 rte_errno = ENOTSUP; 3242 return NULL; 3243 } 3244 /* 3245 * When user configures remote PD and CTX and device creates RxQ by 3246 * DevX, external RxQs array is allocated. 3247 */ 3248 MLX5_ASSERT(priv->ext_rxqs != NULL); 3249 return &priv->ext_rxqs[dpdk_idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 3250 } 3251 3252 int 3253 rte_pmd_mlx5_external_rx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx, 3254 uint32_t hw_idx) 3255 { 3256 struct mlx5_external_rxq *ext_rxq; 3257 uint32_t unmapped = 0; 3258 3259 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3260 if (ext_rxq == NULL) 3261 return -rte_errno; 3262 if (!__atomic_compare_exchange_n(&ext_rxq->refcnt, &unmapped, 1, false, 3263 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 3264 if (ext_rxq->hw_id != hw_idx) { 3265 DRV_LOG(ERR, "Port %u external RxQ index %u " 3266 "is already mapped to HW index (requesting is " 3267 "%u, existing is %u).", 3268 port_id, dpdk_idx, hw_idx, ext_rxq->hw_id); 3269 rte_errno = EEXIST; 3270 return -rte_errno; 3271 } 3272 DRV_LOG(WARNING, "Port %u external RxQ index %u " 3273 "is already mapped to the requested HW index (%u)", 3274 port_id, dpdk_idx, hw_idx); 3275 3276 } else { 3277 ext_rxq->hw_id = hw_idx; 3278 DRV_LOG(DEBUG, "Port %u external RxQ index %u " 3279 "is successfully mapped to the requested HW index (%u)", 3280 port_id, dpdk_idx, hw_idx); 3281 } 3282 return 0; 3283 } 3284 3285 int 3286 rte_pmd_mlx5_external_rx_queue_id_unmap(uint16_t port_id, uint16_t dpdk_idx) 3287 { 3288 struct mlx5_external_rxq *ext_rxq; 3289 uint32_t mapped = 1; 3290 3291 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3292 if (ext_rxq == NULL) 3293 return -rte_errno; 3294 if (ext_rxq->refcnt > 1) { 3295 DRV_LOG(ERR, "Port %u external RxQ index %u still referenced.", 3296 port_id, dpdk_idx); 3297 rte_errno = EINVAL; 3298 return -rte_errno; 3299 } 3300 if (!__atomic_compare_exchange_n(&ext_rxq->refcnt, &mapped, 0, false, 3301 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { 3302 DRV_LOG(ERR, "Port %u external RxQ index %u doesn't exist.", 3303 port_id, dpdk_idx); 3304 rte_errno = EINVAL; 3305 return -rte_errno; 3306 } 3307 DRV_LOG(DEBUG, 3308 "Port %u external RxQ index %u is successfully unmapped.", 3309 port_id, dpdk_idx); 3310 return 0; 3311 } 3312