1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <errno.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <fcntl.h> 11 #include <sys/queue.h> 12 13 #include <rte_mbuf.h> 14 #include <rte_malloc.h> 15 #include <ethdev_driver.h> 16 #include <rte_common.h> 17 #include <rte_interrupts.h> 18 #include <rte_debug.h> 19 #include <rte_io.h> 20 #include <rte_eal_paging.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_malloc.h> 24 #include <mlx5_common.h> 25 #include <mlx5_common_mr.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_rx.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_autoconf.h" 32 #include "mlx5_devx.h" 33 #include "rte_pmd_mlx5.h" 34 35 36 /* Default RSS hash key also used for ConnectX-3. */ 37 uint8_t rss_hash_default_key[] = { 38 0x2c, 0xc6, 0x81, 0xd1, 39 0x5b, 0xdb, 0xf4, 0xf7, 40 0xfc, 0xa2, 0x83, 0x19, 41 0xdb, 0x1a, 0x3e, 0x94, 42 0x6b, 0x9e, 0x38, 0xd9, 43 0x2c, 0x9c, 0x03, 0xd1, 44 0xad, 0x99, 0x44, 0xa7, 45 0xd9, 0x56, 0x3d, 0x59, 46 0x06, 0x3c, 0x25, 0xf3, 47 0xfc, 0x1f, 0xdc, 0x2a, 48 }; 49 50 /* Length of the default RSS hash key. */ 51 static_assert(MLX5_RSS_HASH_KEY_LEN == 52 (unsigned int)sizeof(rss_hash_default_key), 53 "wrong RSS default key size."); 54 55 /** 56 * Calculate the number of CQEs in CQ for the Rx queue. 57 * 58 * @param rxq_data 59 * Pointer to receive queue structure. 60 * 61 * @return 62 * Number of CQEs in CQ. 63 */ 64 unsigned int 65 mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data) 66 { 67 unsigned int cqe_n; 68 unsigned int wqe_n = 1 << rxq_data->elts_n; 69 70 if (mlx5_rxq_mprq_enabled(rxq_data)) 71 cqe_n = wqe_n * RTE_BIT32(rxq_data->log_strd_num) - 1; 72 else 73 cqe_n = wqe_n - 1; 74 return cqe_n; 75 } 76 77 /** 78 * Allocate RX queue elements for Multi-Packet RQ. 79 * 80 * @param rxq_ctrl 81 * Pointer to RX queue structure. 82 * 83 * @return 84 * 0 on success, a negative errno value otherwise and rte_errno is set. 85 */ 86 static int 87 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 88 { 89 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 90 unsigned int wqe_n = 1 << rxq->elts_n; 91 unsigned int i; 92 int err; 93 94 /* Iterate on segments. */ 95 for (i = 0; i <= wqe_n; ++i) { 96 struct mlx5_mprq_buf *buf; 97 98 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 99 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 100 rte_errno = ENOMEM; 101 goto error; 102 } 103 if (i < wqe_n) 104 (*rxq->mprq_bufs)[i] = buf; 105 else 106 rxq->mprq_repl = buf; 107 } 108 DRV_LOG(DEBUG, 109 "port %u MPRQ queue %u allocated and configured %u segments", 110 rxq->port_id, rxq->idx, wqe_n); 111 return 0; 112 error: 113 err = rte_errno; /* Save rte_errno before cleanup. */ 114 wqe_n = i; 115 for (i = 0; (i != wqe_n); ++i) { 116 if ((*rxq->mprq_bufs)[i] != NULL) 117 rte_mempool_put(rxq->mprq_mp, 118 (*rxq->mprq_bufs)[i]); 119 (*rxq->mprq_bufs)[i] = NULL; 120 } 121 DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything", 122 rxq->port_id, rxq->idx); 123 rte_errno = err; /* Restore rte_errno. */ 124 return -rte_errno; 125 } 126 127 /** 128 * Allocate RX queue elements for Single-Packet RQ. 129 * 130 * @param rxq_ctrl 131 * Pointer to RX queue structure. 132 * 133 * @return 134 * 0 on success, negative errno value on failure. 135 */ 136 static int 137 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 138 { 139 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 140 unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 141 RTE_BIT32(rxq_ctrl->rxq.elts_n) * 142 RTE_BIT32(rxq_ctrl->rxq.log_strd_num) : 143 RTE_BIT32(rxq_ctrl->rxq.elts_n); 144 bool has_vec_support = mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0; 145 unsigned int i; 146 int err; 147 148 /* Iterate on segments. */ 149 for (i = 0; (i != elts_n); ++i) { 150 struct mlx5_eth_rxseg *seg = &rxq_ctrl->rxq.rxseg[i % sges_n]; 151 struct rte_mbuf *buf; 152 153 buf = rte_pktmbuf_alloc(seg->mp); 154 if (buf == NULL) { 155 if (rxq_ctrl->share_group == 0) 156 DRV_LOG(ERR, "port %u queue %u empty mbuf pool", 157 RXQ_PORT_ID(rxq_ctrl), 158 rxq_ctrl->rxq.idx); 159 else 160 DRV_LOG(ERR, "share group %u queue %u empty mbuf pool", 161 rxq_ctrl->share_group, 162 rxq_ctrl->share_qid); 163 rte_errno = ENOMEM; 164 goto error; 165 } 166 /* Only vectored Rx routines rely on headroom size. */ 167 MLX5_ASSERT(!has_vec_support || 168 DATA_OFF(buf) >= RTE_PKTMBUF_HEADROOM); 169 /* Buffer is supposed to be empty. */ 170 MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0); 171 MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0); 172 MLX5_ASSERT(!buf->next); 173 SET_DATA_OFF(buf, seg->offset); 174 PORT(buf) = rxq_ctrl->rxq.port_id; 175 DATA_LEN(buf) = seg->length; 176 PKT_LEN(buf) = seg->length; 177 NB_SEGS(buf) = 1; 178 (*rxq_ctrl->rxq.elts)[i] = buf; 179 } 180 /* If Rx vector is activated. */ 181 if (has_vec_support) { 182 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 183 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 184 struct rte_pktmbuf_pool_private *priv = 185 (struct rte_pktmbuf_pool_private *) 186 rte_mempool_get_priv(rxq_ctrl->rxq.mp); 187 int j; 188 189 /* Initialize default rearm_data for vPMD. */ 190 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 191 rte_mbuf_refcnt_set(mbuf_init, 1); 192 mbuf_init->nb_segs = 1; 193 /* For shared queues port is provided in CQE */ 194 mbuf_init->port = rxq->shared ? 0 : rxq->port_id; 195 if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) 196 mbuf_init->ol_flags = RTE_MBUF_F_EXTERNAL; 197 /* 198 * prevent compiler reordering: 199 * rearm_data covers previous fields. 200 */ 201 rte_compiler_barrier(); 202 rxq->mbuf_initializer = 203 *(rte_xmm_t *)&mbuf_init->rearm_data; 204 /* Padding with a fake mbuf for vectorized Rx. */ 205 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 206 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 207 } 208 if (rxq_ctrl->share_group == 0) 209 DRV_LOG(DEBUG, 210 "port %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 211 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx, elts_n, 212 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 213 else 214 DRV_LOG(DEBUG, 215 "share group %u SPRQ queue %u allocated and configured %u segments (max %u packets)", 216 rxq_ctrl->share_group, rxq_ctrl->share_qid, elts_n, 217 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 218 return 0; 219 error: 220 err = rte_errno; /* Save rte_errno before cleanup. */ 221 elts_n = i; 222 for (i = 0; (i != elts_n); ++i) { 223 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 224 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 225 (*rxq_ctrl->rxq.elts)[i] = NULL; 226 } 227 if (rxq_ctrl->share_group == 0) 228 DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything", 229 RXQ_PORT_ID(rxq_ctrl), rxq_ctrl->rxq.idx); 230 else 231 DRV_LOG(DEBUG, "share group %u SPRQ queue %u failed, freed everything", 232 rxq_ctrl->share_group, rxq_ctrl->share_qid); 233 rte_errno = err; /* Restore rte_errno. */ 234 return -rte_errno; 235 } 236 237 /** 238 * Allocate RX queue elements. 239 * 240 * @param rxq_ctrl 241 * Pointer to RX queue structure. 242 * 243 * @return 244 * 0 on success, negative errno value on failure. 245 */ 246 int 247 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 248 { 249 int ret = 0; 250 251 /** 252 * For MPRQ we need to allocate both MPRQ buffers 253 * for WQEs and simple mbufs for vector processing. 254 */ 255 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 256 ret = rxq_alloc_elts_mprq(rxq_ctrl); 257 if (ret == 0) 258 ret = rxq_alloc_elts_sprq(rxq_ctrl); 259 return ret; 260 } 261 262 /** 263 * Free RX queue elements for Multi-Packet RQ. 264 * 265 * @param rxq_ctrl 266 * Pointer to RX queue structure. 267 */ 268 static void 269 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 270 { 271 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 272 uint16_t i; 273 274 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs", 275 rxq->port_id, rxq->idx, (1u << rxq->elts_n)); 276 if (rxq->mprq_bufs == NULL) 277 return; 278 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 279 if ((*rxq->mprq_bufs)[i] != NULL) 280 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 281 (*rxq->mprq_bufs)[i] = NULL; 282 } 283 if (rxq->mprq_repl != NULL) { 284 mlx5_mprq_buf_free(rxq->mprq_repl); 285 rxq->mprq_repl = NULL; 286 } 287 } 288 289 /** 290 * Free RX queue elements for Single-Packet RQ. 291 * 292 * @param rxq_ctrl 293 * Pointer to RX queue structure. 294 */ 295 static void 296 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 297 { 298 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 299 const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 300 RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : 301 RTE_BIT32(rxq->elts_n); 302 const uint16_t q_mask = q_n - 1; 303 uint16_t elts_ci = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 304 rxq->elts_ci : rxq->rq_ci; 305 uint16_t used = q_n - (elts_ci - rxq->rq_pi); 306 uint16_t i; 307 308 if (rxq_ctrl->share_group == 0) 309 DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs", 310 RXQ_PORT_ID(rxq_ctrl), rxq->idx, q_n); 311 else 312 DRV_LOG(DEBUG, "share group %u Rx queue %u freeing %d WRs", 313 rxq_ctrl->share_group, rxq_ctrl->share_qid, q_n); 314 if (rxq->elts == NULL) 315 return; 316 /** 317 * Some mbuf in the Ring belongs to the application. 318 * They cannot be freed. 319 */ 320 if (mlx5_rxq_check_vec_support(rxq) > 0) { 321 for (i = 0; i < used; ++i) 322 (*rxq->elts)[(elts_ci + i) & q_mask] = NULL; 323 rxq->rq_pi = elts_ci; 324 } 325 for (i = 0; i != q_n; ++i) { 326 if ((*rxq->elts)[i] != NULL) 327 rte_pktmbuf_free_seg((*rxq->elts)[i]); 328 (*rxq->elts)[i] = NULL; 329 } 330 } 331 332 /** 333 * Free RX queue elements. 334 * 335 * @param rxq_ctrl 336 * Pointer to RX queue structure. 337 */ 338 static void 339 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 340 { 341 /* 342 * For MPRQ we need to allocate both MPRQ buffers 343 * for WQEs and simple mbufs for vector processing. 344 */ 345 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 346 rxq_free_elts_mprq(rxq_ctrl); 347 rxq_free_elts_sprq(rxq_ctrl); 348 } 349 350 /** 351 * Returns the per-queue supported offloads. 352 * 353 * @param dev 354 * Pointer to Ethernet device. 355 * 356 * @return 357 * Supported Rx offloads. 358 */ 359 uint64_t 360 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 361 { 362 struct mlx5_priv *priv = dev->data->dev_private; 363 uint64_t offloads = (RTE_ETH_RX_OFFLOAD_SCATTER | 364 RTE_ETH_RX_OFFLOAD_TIMESTAMP | 365 RTE_ETH_RX_OFFLOAD_RSS_HASH); 366 367 if (!priv->config.mprq.enabled) 368 offloads |= RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT; 369 if (priv->sh->config.hw_fcs_strip) 370 offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC; 371 if (priv->sh->dev_cap.hw_csum) 372 offloads |= (RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | 373 RTE_ETH_RX_OFFLOAD_UDP_CKSUM | 374 RTE_ETH_RX_OFFLOAD_TCP_CKSUM); 375 if (priv->sh->dev_cap.hw_vlan_strip) 376 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP; 377 if (priv->sh->config.lro_allowed) 378 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO; 379 return offloads; 380 } 381 382 383 /** 384 * Returns the per-port supported offloads. 385 * 386 * @return 387 * Supported Rx offloads. 388 */ 389 uint64_t 390 mlx5_get_rx_port_offloads(void) 391 { 392 uint64_t offloads = RTE_ETH_RX_OFFLOAD_VLAN_FILTER; 393 394 return offloads; 395 } 396 397 /** 398 * Verify if the queue can be released. 399 * 400 * @param dev 401 * Pointer to Ethernet device. 402 * @param idx 403 * RX queue index. 404 * 405 * @return 406 * 1 if the queue can be released 407 * 0 if the queue can not be released, there are references to it. 408 * Negative errno and rte_errno is set if queue doesn't exist. 409 */ 410 static int 411 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 412 { 413 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 414 415 if (rxq == NULL) { 416 rte_errno = EINVAL; 417 return -rte_errno; 418 } 419 return (rte_atomic_load_explicit(&rxq->refcnt, rte_memory_order_relaxed) == 1); 420 } 421 422 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */ 423 static void 424 rxq_sync_cq(struct mlx5_rxq_data *rxq) 425 { 426 const uint16_t cqe_n = 1 << rxq->cqe_n; 427 const uint16_t cqe_mask = cqe_n - 1; 428 volatile struct mlx5_cqe *cqe; 429 int ret, i; 430 431 i = cqe_n; 432 do { 433 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 434 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 435 if (ret == MLX5_CQE_STATUS_HW_OWN) 436 break; 437 if (ret == MLX5_CQE_STATUS_ERR) { 438 rxq->cq_ci++; 439 continue; 440 } 441 MLX5_ASSERT(ret == MLX5_CQE_STATUS_SW_OWN); 442 if (MLX5_CQE_FORMAT(cqe->op_own) != MLX5_COMPRESSED) { 443 rxq->cq_ci++; 444 continue; 445 } 446 /* Compute the next non compressed CQE. */ 447 rxq->cq_ci += rxq->cqe_comp_layout ? 448 (MLX5_CQE_NUM_MINIS(cqe->op_own) + 1U) : 449 rte_be_to_cpu_32(cqe->byte_cnt); 450 451 } while (--i); 452 /* Move all CQEs to HW ownership, including possible MiniCQEs. */ 453 for (i = 0; i < cqe_n; i++) { 454 cqe = &(*rxq->cqes)[i]; 455 cqe->validity_iteration_count = MLX5_CQE_VIC_INIT; 456 cqe->op_own = MLX5_CQE_INVALIDATE; 457 } 458 /* Resync CQE and WQE (WQ in RESET state). */ 459 rte_io_wmb(); 460 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 461 rte_io_wmb(); 462 *rxq->rq_db = rte_cpu_to_be_32(0); 463 rte_io_wmb(); 464 } 465 466 /** 467 * Rx queue stop. Device queue goes to the RESET state, 468 * all involved mbufs are freed from WQ. 469 * 470 * @param dev 471 * Pointer to Ethernet device structure. 472 * @param idx 473 * RX queue index. 474 * 475 * @return 476 * 0 on success, a negative errno value otherwise and rte_errno is set. 477 */ 478 int 479 mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t idx) 480 { 481 struct mlx5_priv *priv = dev->data->dev_private; 482 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 483 struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl; 484 int ret; 485 486 MLX5_ASSERT(rxq != NULL && rxq_ctrl != NULL); 487 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 488 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RDY2RST); 489 if (ret) { 490 DRV_LOG(ERR, "Cannot change Rx WQ state to RESET: %s", 491 strerror(errno)); 492 rte_errno = errno; 493 return ret; 494 } 495 /* Remove all processes CQEs. */ 496 rxq_sync_cq(&rxq_ctrl->rxq); 497 /* Free all involved mbufs. */ 498 rxq_free_elts(rxq_ctrl); 499 /* Set the actual queue state. */ 500 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED; 501 return 0; 502 } 503 504 /** 505 * Rx queue stop. Device queue goes to the RESET state, 506 * all involved mbufs are freed from WQ. 507 * 508 * @param dev 509 * Pointer to Ethernet device structure. 510 * @param idx 511 * RX queue index. 512 * 513 * @return 514 * 0 on success, a negative errno value otherwise and rte_errno is set. 515 */ 516 int 517 mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t idx) 518 { 519 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 520 int ret; 521 522 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 523 DRV_LOG(ERR, "Hairpin queue can't be stopped"); 524 rte_errno = EINVAL; 525 return -EINVAL; 526 } 527 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STOPPED) 528 return 0; 529 /* 530 * Vectorized Rx burst requires the CQ and RQ indices 531 * synchronized, that might be broken on RQ restart 532 * and cause Rx malfunction, so queue stopping is 533 * not supported if vectorized Rx burst is engaged. 534 * The routine pointer depends on the process type, 535 * should perform check there. MPRQ is not supported as well. 536 */ 537 if (pkt_burst != mlx5_rx_burst) { 538 DRV_LOG(ERR, "Rx queue stop is only supported " 539 "for non-vectorized single-packet Rx"); 540 rte_errno = EINVAL; 541 return -EINVAL; 542 } 543 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 544 ret = mlx5_mp_os_req_queue_control(dev, idx, 545 MLX5_MP_REQ_QUEUE_RX_STOP); 546 } else { 547 ret = mlx5_rx_queue_stop_primary(dev, idx); 548 } 549 return ret; 550 } 551 552 /** 553 * Rx queue start. Device queue goes to the ready state, 554 * all required mbufs are allocated and WQ is replenished. 555 * 556 * @param dev 557 * Pointer to Ethernet device structure. 558 * @param idx 559 * RX queue index. 560 * 561 * @return 562 * 0 on success, a negative errno value otherwise and rte_errno is set. 563 */ 564 int 565 mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t idx) 566 { 567 struct mlx5_priv *priv = dev->data->dev_private; 568 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 569 struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq; 570 int ret; 571 572 MLX5_ASSERT(rxq != NULL && rxq->ctrl != NULL); 573 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 574 /* Allocate needed buffers. */ 575 ret = rxq_alloc_elts(rxq->ctrl); 576 if (ret) { 577 DRV_LOG(ERR, "Cannot reallocate buffers for Rx WQ"); 578 rte_errno = errno; 579 return ret; 580 } 581 rte_io_wmb(); 582 *rxq_data->cq_db = rte_cpu_to_be_32(rxq_data->cq_ci); 583 rte_io_wmb(); 584 /* Reset RQ consumer before moving queue to READY state. */ 585 *rxq_data->rq_db = rte_cpu_to_be_32(0); 586 rte_io_wmb(); 587 ret = priv->obj_ops.rxq_obj_modify(rxq, MLX5_RXQ_MOD_RST2RDY); 588 if (ret) { 589 DRV_LOG(ERR, "Cannot change Rx WQ state to READY: %s", 590 strerror(errno)); 591 rte_errno = errno; 592 return ret; 593 } 594 /* Reinitialize RQ - set WQEs. */ 595 mlx5_rxq_initialize(rxq_data); 596 rxq_data->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 597 /* Set actual queue state. */ 598 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; 599 return 0; 600 } 601 602 /** 603 * Rx queue start. Device queue goes to the ready state, 604 * all required mbufs are allocated and WQ is replenished. 605 * 606 * @param dev 607 * Pointer to Ethernet device structure. 608 * @param idx 609 * RX queue index. 610 * 611 * @return 612 * 0 on success, a negative errno value otherwise and rte_errno is set. 613 */ 614 int 615 mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx) 616 { 617 int ret; 618 619 if (rte_eth_dev_is_rx_hairpin_queue(dev, idx)) { 620 DRV_LOG(ERR, "Hairpin queue can't be started"); 621 rte_errno = EINVAL; 622 return -EINVAL; 623 } 624 if (dev->data->rx_queue_state[idx] == RTE_ETH_QUEUE_STATE_STARTED) 625 return 0; 626 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 627 ret = mlx5_mp_os_req_queue_control(dev, idx, 628 MLX5_MP_REQ_QUEUE_RX_START); 629 } else { 630 ret = mlx5_rx_queue_start_primary(dev, idx); 631 } 632 return ret; 633 } 634 635 /** 636 * Rx queue presetup checks. 637 * 638 * @param dev 639 * Pointer to Ethernet device structure. 640 * @param idx 641 * RX queue index. 642 * @param desc 643 * Number of descriptors to configure in queue. 644 * @param[out] rxq_ctrl 645 * Address of pointer to shared Rx queue control. 646 * 647 * @return 648 * 0 on success, a negative errno value otherwise and rte_errno is set. 649 */ 650 static int 651 mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc, 652 struct mlx5_rxq_ctrl **rxq_ctrl) 653 { 654 struct mlx5_priv *priv = dev->data->dev_private; 655 struct mlx5_rxq_priv *rxq; 656 bool empty; 657 658 if (!rte_is_power_of_2(*desc)) { 659 *desc = 1 << log2above(*desc); 660 DRV_LOG(WARNING, 661 "port %u increased number of descriptors in Rx queue %u" 662 " to the next power of two (%d)", 663 dev->data->port_id, idx, *desc); 664 } 665 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 666 dev->data->port_id, idx, *desc); 667 if (idx >= priv->rxqs_n) { 668 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 669 dev->data->port_id, idx, priv->rxqs_n); 670 rte_errno = EOVERFLOW; 671 return -rte_errno; 672 } 673 if (rxq_ctrl == NULL || *rxq_ctrl == NULL) 674 return 0; 675 if (!(*rxq_ctrl)->rxq.shared) { 676 if (!mlx5_rxq_releasable(dev, idx)) { 677 DRV_LOG(ERR, "port %u unable to release queue index %u", 678 dev->data->port_id, idx); 679 rte_errno = EBUSY; 680 return -rte_errno; 681 } 682 mlx5_rxq_release(dev, idx); 683 } else { 684 if ((*rxq_ctrl)->obj != NULL) 685 /* Some port using shared Rx queue has been started. */ 686 return 0; 687 /* Release all owner RxQ to reconfigure Shared RxQ. */ 688 do { 689 rxq = LIST_FIRST(&(*rxq_ctrl)->owners); 690 LIST_REMOVE(rxq, owner_entry); 691 empty = LIST_EMPTY(&(*rxq_ctrl)->owners); 692 mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx); 693 } while (!empty); 694 *rxq_ctrl = NULL; 695 } 696 return 0; 697 } 698 699 /** 700 * Get the shared Rx queue object that matches group and queue index. 701 * 702 * @param dev 703 * Pointer to Ethernet device structure. 704 * @param group 705 * Shared RXQ group. 706 * @param share_qid 707 * Shared RX queue index. 708 * 709 * @return 710 * Shared RXQ object that matching, or NULL if not found. 711 */ 712 static struct mlx5_rxq_ctrl * 713 mlx5_shared_rxq_get(struct rte_eth_dev *dev, uint32_t group, uint16_t share_qid) 714 { 715 struct mlx5_rxq_ctrl *rxq_ctrl; 716 struct mlx5_priv *priv = dev->data->dev_private; 717 718 LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) { 719 if (rxq_ctrl->share_group == group && 720 rxq_ctrl->share_qid == share_qid) 721 return rxq_ctrl; 722 } 723 return NULL; 724 } 725 726 /** 727 * Check whether requested Rx queue configuration matches shared RXQ. 728 * 729 * @param rxq_ctrl 730 * Pointer to shared RXQ. 731 * @param dev 732 * Pointer to Ethernet device structure. 733 * @param idx 734 * Queue index. 735 * @param desc 736 * Number of descriptors to configure in queue. 737 * @param socket 738 * NUMA socket on which memory must be allocated. 739 * @param[in] conf 740 * Thresholds parameters. 741 * @param mp 742 * Memory pool for buffer allocations. 743 * 744 * @return 745 * 0 on success, a negative errno value otherwise and rte_errno is set. 746 */ 747 static bool 748 mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev, 749 uint16_t idx, uint16_t desc, unsigned int socket, 750 const struct rte_eth_rxconf *conf, 751 struct rte_mempool *mp) 752 { 753 struct mlx5_priv *spriv = LIST_FIRST(&rxq_ctrl->owners)->priv; 754 struct mlx5_priv *priv = dev->data->dev_private; 755 unsigned int i; 756 757 RTE_SET_USED(conf); 758 if (rxq_ctrl->socket != socket) { 759 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: socket mismatch", 760 dev->data->port_id, idx); 761 return false; 762 } 763 if (rxq_ctrl->rxq.elts_n != log2above(desc)) { 764 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: descriptor number mismatch", 765 dev->data->port_id, idx); 766 return false; 767 } 768 if (priv->mtu != spriv->mtu) { 769 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch", 770 dev->data->port_id, idx); 771 return false; 772 } 773 if (priv->dev_data->dev_conf.intr_conf.rxq != 774 spriv->dev_data->dev_conf.intr_conf.rxq) { 775 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: interrupt mismatch", 776 dev->data->port_id, idx); 777 return false; 778 } 779 if (mp != NULL && rxq_ctrl->rxq.mp != mp) { 780 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mempool mismatch", 781 dev->data->port_id, idx); 782 return false; 783 } else if (mp == NULL) { 784 if (conf->rx_nseg != rxq_ctrl->rxseg_n) { 785 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment number mismatch", 786 dev->data->port_id, idx); 787 return false; 788 } 789 for (i = 0; i < conf->rx_nseg; i++) { 790 if (memcmp(&conf->rx_seg[i].split, &rxq_ctrl->rxseg[i], 791 sizeof(struct rte_eth_rxseg_split))) { 792 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment %u configuration mismatch", 793 dev->data->port_id, idx, i); 794 return false; 795 } 796 } 797 } 798 if (priv->config.hw_padding != spriv->config.hw_padding) { 799 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: padding mismatch", 800 dev->data->port_id, idx); 801 return false; 802 } 803 if (priv->config.cqe_comp != spriv->config.cqe_comp || 804 (priv->config.cqe_comp && 805 priv->config.cqe_comp_fmt != spriv->config.cqe_comp_fmt)) { 806 DRV_LOG(ERR, "port %u queue index %u failed to join shared group: CQE compression mismatch", 807 dev->data->port_id, idx); 808 return false; 809 } 810 return true; 811 } 812 813 /** 814 * 815 * @param dev 816 * Pointer to Ethernet device structure. 817 * @param idx 818 * RX queue index. 819 * @param desc 820 * Number of descriptors to configure in queue. 821 * @param socket 822 * NUMA socket on which memory must be allocated. 823 * @param[in] conf 824 * Thresholds parameters. 825 * @param mp 826 * Memory pool for buffer allocations. 827 * 828 * @return 829 * 0 on success, a negative errno value otherwise and rte_errno is set. 830 */ 831 int 832 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 833 unsigned int socket, const struct rte_eth_rxconf *conf, 834 struct rte_mempool *mp) 835 { 836 struct mlx5_priv *priv = dev->data->dev_private; 837 struct mlx5_rxq_priv *rxq; 838 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 839 struct rte_eth_rxseg_split *rx_seg = 840 (struct rte_eth_rxseg_split *)conf->rx_seg; 841 struct rte_eth_rxseg_split rx_single = {.mp = mp}; 842 uint16_t n_seg = conf->rx_nseg; 843 int res; 844 uint64_t offloads = conf->offloads | 845 dev->data->dev_conf.rxmode.offloads; 846 bool is_extmem = false; 847 848 if ((offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) && 849 !priv->sh->config.lro_allowed) { 850 DRV_LOG(ERR, 851 "Port %u queue %u LRO is configured but not allowed.", 852 dev->data->port_id, idx); 853 rte_errno = EINVAL; 854 return -rte_errno; 855 } 856 if (mp) { 857 /* 858 * The parameters should be checked on rte_eth_dev layer. 859 * If mp is specified it means the compatible configuration 860 * without buffer split feature tuning. 861 */ 862 rx_seg = &rx_single; 863 n_seg = 1; 864 is_extmem = rte_pktmbuf_priv_flags(mp) & 865 RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF; 866 } 867 if (n_seg > 1) { 868 /* The offloads should be checked on rte_eth_dev layer. */ 869 MLX5_ASSERT(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 870 if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) { 871 DRV_LOG(ERR, "port %u queue index %u split " 872 "offload not configured", 873 dev->data->port_id, idx); 874 rte_errno = ENOSPC; 875 return -rte_errno; 876 } 877 MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG); 878 } 879 if (conf->share_group > 0) { 880 if (!priv->sh->cdev->config.hca_attr.mem_rq_rmp) { 881 DRV_LOG(ERR, "port %u queue index %u shared Rx queue not supported by fw", 882 dev->data->port_id, idx); 883 rte_errno = EINVAL; 884 return -rte_errno; 885 } 886 if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) { 887 DRV_LOG(ERR, "port %u queue index %u shared Rx queue needs DevX api", 888 dev->data->port_id, idx); 889 rte_errno = EINVAL; 890 return -rte_errno; 891 } 892 if (conf->share_qid >= priv->rxqs_n) { 893 DRV_LOG(ERR, "port %u shared Rx queue index %u > number of Rx queues %u", 894 dev->data->port_id, conf->share_qid, 895 priv->rxqs_n); 896 rte_errno = EINVAL; 897 return -rte_errno; 898 } 899 if (priv->config.mprq.enabled) { 900 DRV_LOG(ERR, "port %u shared Rx queue index %u: not supported when MPRQ enabled", 901 dev->data->port_id, conf->share_qid); 902 rte_errno = EINVAL; 903 return -rte_errno; 904 } 905 /* Try to reuse shared RXQ. */ 906 rxq_ctrl = mlx5_shared_rxq_get(dev, conf->share_group, 907 conf->share_qid); 908 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 909 if (res) 910 return res; 911 if (rxq_ctrl != NULL && 912 !mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket, 913 conf, mp)) { 914 rte_errno = EINVAL; 915 return -rte_errno; 916 } 917 } else { 918 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl); 919 if (res) 920 return res; 921 } 922 /* Allocate RXQ. */ 923 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 924 SOCKET_ID_ANY); 925 if (!rxq) { 926 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u private data", 927 dev->data->port_id, idx); 928 rte_errno = ENOMEM; 929 return -rte_errno; 930 } 931 if (rxq_ctrl == NULL) { 932 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg, 933 n_seg, is_extmem); 934 if (rxq_ctrl == NULL) { 935 DRV_LOG(ERR, "port %u unable to allocate rx queue index %u", 936 dev->data->port_id, idx); 937 mlx5_free(rxq); 938 rte_errno = ENOMEM; 939 return -rte_errno; 940 } 941 } 942 rxq->priv = priv; 943 rxq->idx = idx; 944 (*priv->rxq_privs)[idx] = rxq; 945 /* Join owner list. */ 946 LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry); 947 rxq->ctrl = rxq_ctrl; 948 mlx5_rxq_ref(dev, idx); 949 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 950 dev->data->port_id, idx); 951 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 952 return 0; 953 } 954 955 /** 956 * 957 * @param dev 958 * Pointer to Ethernet device structure. 959 * @param idx 960 * RX queue index. 961 * @param desc 962 * Number of descriptors to configure in queue. 963 * @param hairpin_conf 964 * Hairpin configuration parameters. 965 * 966 * @return 967 * 0 on success, a negative errno value otherwise and rte_errno is set. 968 */ 969 int 970 mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 971 uint16_t desc, 972 const struct rte_eth_hairpin_conf *hairpin_conf) 973 { 974 struct mlx5_priv *priv = dev->data->dev_private; 975 struct mlx5_rxq_priv *rxq; 976 struct mlx5_rxq_ctrl *rxq_ctrl; 977 int res; 978 979 res = mlx5_rx_queue_pre_setup(dev, idx, &desc, NULL); 980 if (res) 981 return res; 982 if (hairpin_conf->peer_count != 1) { 983 rte_errno = EINVAL; 984 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue index %u" 985 " peer count is %u", dev->data->port_id, 986 idx, hairpin_conf->peer_count); 987 return -rte_errno; 988 } 989 if (hairpin_conf->peers[0].port == dev->data->port_id) { 990 if (hairpin_conf->peers[0].queue >= priv->txqs_n) { 991 rte_errno = EINVAL; 992 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 993 " index %u, Tx %u is larger than %u", 994 dev->data->port_id, idx, 995 hairpin_conf->peers[0].queue, priv->txqs_n); 996 return -rte_errno; 997 } 998 } else { 999 if (hairpin_conf->manual_bind == 0 || 1000 hairpin_conf->tx_explicit == 0) { 1001 rte_errno = EINVAL; 1002 DRV_LOG(ERR, "port %u unable to setup Rx hairpin queue" 1003 " index %u peer port %u with attributes %u %u", 1004 dev->data->port_id, idx, 1005 hairpin_conf->peers[0].port, 1006 hairpin_conf->manual_bind, 1007 hairpin_conf->tx_explicit); 1008 return -rte_errno; 1009 } 1010 } 1011 rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0, 1012 SOCKET_ID_ANY); 1013 if (!rxq) { 1014 DRV_LOG(ERR, "port %u unable to allocate hairpin rx queue index %u private data", 1015 dev->data->port_id, idx); 1016 rte_errno = ENOMEM; 1017 return -rte_errno; 1018 } 1019 rxq->priv = priv; 1020 rxq->idx = idx; 1021 (*priv->rxq_privs)[idx] = rxq; 1022 rxq_ctrl = mlx5_rxq_hairpin_new(dev, rxq, desc, hairpin_conf); 1023 if (!rxq_ctrl) { 1024 DRV_LOG(ERR, "port %u unable to allocate hairpin queue index %u", 1025 dev->data->port_id, idx); 1026 mlx5_free(rxq); 1027 (*priv->rxq_privs)[idx] = NULL; 1028 rte_errno = ENOMEM; 1029 return -rte_errno; 1030 } 1031 DRV_LOG(DEBUG, "port %u adding hairpin Rx queue %u to list", 1032 dev->data->port_id, idx); 1033 dev->data->rx_queues[idx] = &rxq_ctrl->rxq; 1034 return 0; 1035 } 1036 1037 /** 1038 * DPDK callback to release a RX queue. 1039 * 1040 * @param dev 1041 * Pointer to Ethernet device structure. 1042 * @param qid 1043 * Receive queue index. 1044 */ 1045 void 1046 mlx5_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 1047 { 1048 struct mlx5_rxq_data *rxq = dev->data->rx_queues[qid]; 1049 1050 if (rxq == NULL) 1051 return; 1052 if (!mlx5_rxq_releasable(dev, qid)) 1053 rte_panic("port %u Rx queue %u is still used by a flow and" 1054 " cannot be removed\n", dev->data->port_id, qid); 1055 mlx5_rxq_release(dev, qid); 1056 } 1057 1058 /** 1059 * Allocate queue vector and fill epoll fd list for Rx interrupts. 1060 * 1061 * @param dev 1062 * Pointer to Ethernet device. 1063 * 1064 * @return 1065 * 0 on success, a negative errno value otherwise and rte_errno is set. 1066 */ 1067 int 1068 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 1069 { 1070 struct mlx5_priv *priv = dev->data->dev_private; 1071 unsigned int i; 1072 unsigned int rxqs_n = priv->rxqs_n; 1073 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1074 unsigned int count = 0; 1075 struct rte_intr_handle *intr_handle = dev->intr_handle; 1076 1077 if (!dev->data->dev_conf.intr_conf.rxq) 1078 return 0; 1079 mlx5_rx_intr_vec_disable(dev); 1080 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 1081 DRV_LOG(ERR, 1082 "port %u failed to allocate memory for interrupt" 1083 " vector, Rx interrupts will not be supported", 1084 dev->data->port_id); 1085 rte_errno = ENOMEM; 1086 return -rte_errno; 1087 } 1088 1089 if (rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_EXT)) 1090 return -rte_errno; 1091 1092 for (i = 0; i != n; ++i) { 1093 /* This rxq obj must not be released in this function. */ 1094 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i); 1095 struct mlx5_rxq_obj *rxq_obj = rxq ? rxq->ctrl->obj : NULL; 1096 int rc; 1097 1098 /* Skip queues that cannot request interrupts. */ 1099 if (!rxq_obj || (!rxq_obj->ibv_channel && 1100 !rxq_obj->devx_channel)) { 1101 /* Use invalid intr_vec[] index to disable entry. */ 1102 if (rte_intr_vec_list_index_set(intr_handle, i, 1103 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID)) 1104 return -rte_errno; 1105 continue; 1106 } 1107 mlx5_rxq_ref(dev, i); 1108 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 1109 DRV_LOG(ERR, 1110 "port %u too many Rx queues for interrupt" 1111 " vector size (%d), Rx interrupts cannot be" 1112 " enabled", 1113 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 1114 mlx5_rx_intr_vec_disable(dev); 1115 rte_errno = ENOMEM; 1116 return -rte_errno; 1117 } 1118 rc = mlx5_os_set_nonblock_channel_fd(rxq_obj->fd); 1119 if (rc < 0) { 1120 rte_errno = errno; 1121 DRV_LOG(ERR, 1122 "port %u failed to make Rx interrupt file" 1123 " descriptor %d non-blocking for queue index" 1124 " %d", 1125 dev->data->port_id, rxq_obj->fd, i); 1126 mlx5_rx_intr_vec_disable(dev); 1127 return -rte_errno; 1128 } 1129 1130 if (rte_intr_vec_list_index_set(intr_handle, i, 1131 RTE_INTR_VEC_RXTX_OFFSET + count)) 1132 return -rte_errno; 1133 if (rte_intr_efds_index_set(intr_handle, count, 1134 rxq_obj->fd)) 1135 return -rte_errno; 1136 count++; 1137 } 1138 if (!count) 1139 mlx5_rx_intr_vec_disable(dev); 1140 else if (rte_intr_nb_efd_set(intr_handle, count)) 1141 return -rte_errno; 1142 return 0; 1143 } 1144 1145 /** 1146 * Clean up Rx interrupts handler. 1147 * 1148 * @param dev 1149 * Pointer to Ethernet device. 1150 */ 1151 void 1152 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 1153 { 1154 struct mlx5_priv *priv = dev->data->dev_private; 1155 struct rte_intr_handle *intr_handle = dev->intr_handle; 1156 unsigned int i; 1157 unsigned int rxqs_n = priv->rxqs_n; 1158 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1159 1160 if (!dev->data->dev_conf.intr_conf.rxq) 1161 return; 1162 if (rte_intr_vec_list_index_get(intr_handle, 0) < 0) 1163 goto free; 1164 for (i = 0; i != n; ++i) { 1165 if (rte_intr_vec_list_index_get(intr_handle, i) == 1166 RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID) 1167 continue; 1168 /** 1169 * Need to access directly the queue to release the reference 1170 * kept in mlx5_rx_intr_vec_enable(). 1171 */ 1172 mlx5_rxq_deref(dev, i); 1173 } 1174 free: 1175 rte_intr_free_epoll_fd(intr_handle); 1176 1177 rte_intr_vec_list_free(intr_handle); 1178 1179 rte_intr_nb_efd_set(intr_handle, 0); 1180 } 1181 1182 /** 1183 * MLX5 CQ notification . 1184 * 1185 * @param rxq 1186 * Pointer to receive queue structure. 1187 * @param sq_n_rxq 1188 * Sequence number per receive queue . 1189 */ 1190 static inline void 1191 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 1192 { 1193 int sq_n = 0; 1194 uint32_t doorbell_hi; 1195 uint64_t doorbell; 1196 1197 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 1198 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 1199 doorbell = (uint64_t)doorbell_hi << 32; 1200 doorbell |= rxq->cqn; 1201 mlx5_doorbell_ring(&rxq->uar_data, rte_cpu_to_be_64(doorbell), 1202 doorbell_hi, &rxq->cq_db[MLX5_CQ_ARM_DB], 0); 1203 } 1204 1205 /** 1206 * DPDK callback for Rx queue interrupt enable. 1207 * 1208 * @param dev 1209 * Pointer to Ethernet device structure. 1210 * @param rx_queue_id 1211 * Rx queue number. 1212 * 1213 * @return 1214 * 0 on success, a negative errno value otherwise and rte_errno is set. 1215 */ 1216 int 1217 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1218 { 1219 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1220 if (!rxq) 1221 goto error; 1222 if (rxq->ctrl->irq) { 1223 if (!rxq->ctrl->obj) 1224 goto error; 1225 mlx5_arm_cq(&rxq->ctrl->rxq, rxq->ctrl->rxq.cq_arm_sn); 1226 } 1227 return 0; 1228 error: 1229 rte_errno = EINVAL; 1230 return -rte_errno; 1231 } 1232 1233 /** 1234 * DPDK callback for Rx queue interrupt disable. 1235 * 1236 * @param dev 1237 * Pointer to Ethernet device structure. 1238 * @param rx_queue_id 1239 * Rx queue number. 1240 * 1241 * @return 1242 * 0 on success, a negative errno value otherwise and rte_errno is set. 1243 */ 1244 int 1245 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 1246 { 1247 struct mlx5_priv *priv = dev->data->dev_private; 1248 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1249 int ret = 0; 1250 1251 if (!rxq) { 1252 rte_errno = EINVAL; 1253 return -rte_errno; 1254 } 1255 if (!rxq->ctrl->obj) 1256 goto error; 1257 if (rxq->ctrl->irq) { 1258 ret = priv->obj_ops.rxq_event_get(rxq->ctrl->obj); 1259 if (ret < 0) 1260 goto error; 1261 rxq->ctrl->rxq.cq_arm_sn++; 1262 } 1263 return 0; 1264 error: 1265 /** 1266 * The ret variable may be EAGAIN which means the get_event function was 1267 * called before receiving one. 1268 */ 1269 if (ret < 0) 1270 rte_errno = errno; 1271 else 1272 rte_errno = EINVAL; 1273 if (rte_errno != EAGAIN) 1274 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 1275 dev->data->port_id, rx_queue_id); 1276 return -rte_errno; 1277 } 1278 1279 /** 1280 * Verify the Rx queue objects list is empty 1281 * 1282 * @param dev 1283 * Pointer to Ethernet device. 1284 * 1285 * @return 1286 * The number of objects not released. 1287 */ 1288 int 1289 mlx5_rxq_obj_verify(struct rte_eth_dev *dev) 1290 { 1291 struct mlx5_priv *priv = dev->data->dev_private; 1292 int ret = 0; 1293 struct mlx5_rxq_obj *rxq_obj; 1294 1295 LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) { 1296 if (rxq_obj->rxq_ctrl == NULL) 1297 continue; 1298 if (rxq_obj->rxq_ctrl->rxq.shared && 1299 !LIST_EMPTY(&rxq_obj->rxq_ctrl->owners)) 1300 continue; 1301 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced", 1302 dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx); 1303 ++ret; 1304 } 1305 return ret; 1306 } 1307 1308 /** 1309 * Callback function to initialize mbufs for Multi-Packet RQ. 1310 */ 1311 static inline void 1312 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg, 1313 void *_m, unsigned int i __rte_unused) 1314 { 1315 struct mlx5_mprq_buf *buf = _m; 1316 struct rte_mbuf_ext_shared_info *shinfo; 1317 unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg; 1318 unsigned int j; 1319 1320 memset(_m, 0, sizeof(*buf)); 1321 buf->mp = mp; 1322 rte_atomic_store_explicit(&buf->refcnt, 1, rte_memory_order_relaxed); 1323 for (j = 0; j != strd_n; ++j) { 1324 shinfo = &buf->shinfos[j]; 1325 shinfo->free_cb = mlx5_mprq_buf_free_cb; 1326 shinfo->fcb_opaque = buf; 1327 } 1328 } 1329 1330 /** 1331 * Free mempool of Multi-Packet RQ. 1332 * 1333 * @param dev 1334 * Pointer to Ethernet device. 1335 * 1336 * @return 1337 * 0 on success, negative errno value on failure. 1338 */ 1339 int 1340 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1341 { 1342 struct mlx5_priv *priv = dev->data->dev_private; 1343 struct rte_mempool *mp = priv->mprq_mp; 1344 unsigned int i; 1345 1346 if (mp == NULL) 1347 return 0; 1348 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1349 dev->data->port_id, mp->name); 1350 /* 1351 * If a buffer in the pool has been externally attached to a mbuf and it 1352 * is still in use by application, destroying the Rx queue can spoil 1353 * the packet. It is unlikely to happen but if application dynamically 1354 * creates and destroys with holding Rx packets, this can happen. 1355 * 1356 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1357 * RQ isn't provided by application but managed by PMD. 1358 */ 1359 if (!rte_mempool_full(mp)) { 1360 DRV_LOG(ERR, 1361 "port %u mempool for Multi-Packet RQ is still in use", 1362 dev->data->port_id); 1363 rte_errno = EBUSY; 1364 return -rte_errno; 1365 } 1366 rte_mempool_free(mp); 1367 /* Unset mempool for each Rx queue. */ 1368 for (i = 0; i != priv->rxqs_n; ++i) { 1369 struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, i); 1370 1371 if (rxq == NULL) 1372 continue; 1373 rxq->mprq_mp = NULL; 1374 } 1375 priv->mprq_mp = NULL; 1376 return 0; 1377 } 1378 1379 /** 1380 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1381 * mempool. If already allocated, reuse it if there're enough elements. 1382 * Otherwise, resize it. 1383 * 1384 * @param dev 1385 * Pointer to Ethernet device. 1386 * 1387 * @return 1388 * 0 on success, negative errno value on failure. 1389 */ 1390 int 1391 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1392 { 1393 struct mlx5_priv *priv = dev->data->dev_private; 1394 struct rte_mempool *mp = priv->mprq_mp; 1395 char name[RTE_MEMPOOL_NAMESIZE]; 1396 unsigned int desc = 0; 1397 unsigned int buf_len; 1398 unsigned int obj_num; 1399 unsigned int obj_size; 1400 unsigned int log_strd_num = 0; 1401 unsigned int log_strd_sz = 0; 1402 unsigned int i; 1403 unsigned int n_ibv = 0; 1404 int ret; 1405 1406 if (!mlx5_mprq_enabled(dev)) 1407 return 0; 1408 /* Count the total number of descriptors configured. */ 1409 for (i = 0; i != priv->rxqs_n; ++i) { 1410 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1411 struct mlx5_rxq_data *rxq; 1412 1413 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1414 continue; 1415 rxq = &rxq_ctrl->rxq; 1416 n_ibv++; 1417 desc += 1 << rxq->elts_n; 1418 /* Get the max number of strides. */ 1419 if (log_strd_num < rxq->log_strd_num) 1420 log_strd_num = rxq->log_strd_num; 1421 /* Get the max size of a stride. */ 1422 if (log_strd_sz < rxq->log_strd_sz) 1423 log_strd_sz = rxq->log_strd_sz; 1424 } 1425 MLX5_ASSERT(log_strd_num && log_strd_sz); 1426 buf_len = RTE_BIT32(log_strd_num) * RTE_BIT32(log_strd_sz); 1427 obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + 1428 RTE_BIT32(log_strd_num) * 1429 sizeof(struct rte_mbuf_ext_shared_info) + 1430 RTE_PKTMBUF_HEADROOM; 1431 /* 1432 * Received packets can be either memcpy'd or externally referenced. In 1433 * case that the packet is attached to an mbuf as an external buffer, as 1434 * it isn't possible to predict how the buffers will be queued by 1435 * application, there's no option to exactly pre-allocate needed buffers 1436 * in advance but to speculatively prepares enough buffers. 1437 * 1438 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1439 * received packets to buffers provided by application (rxq->mp) until 1440 * this Mempool gets available again. 1441 */ 1442 desc *= 4; 1443 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * n_ibv; 1444 /* 1445 * rte_mempool_create_empty() has sanity check to refuse large cache 1446 * size compared to the number of elements. 1447 * CALC_CACHE_FLUSHTHRESH() is defined in a C file, so using a 1448 * constant number 2 instead. 1449 */ 1450 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1451 /* Check a mempool is already allocated and if it can be resued. */ 1452 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1453 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1454 dev->data->port_id, mp->name); 1455 /* Reuse. */ 1456 goto exit; 1457 } else if (mp != NULL) { 1458 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1459 dev->data->port_id, mp->name); 1460 /* 1461 * If failed to free, which means it may be still in use, no way 1462 * but to keep using the existing one. On buffer underrun, 1463 * packets will be memcpy'd instead of external buffer 1464 * attachment. 1465 */ 1466 if (mlx5_mprq_free_mp(dev)) { 1467 if (mp->elt_size >= obj_size) 1468 goto exit; 1469 else 1470 return -rte_errno; 1471 } 1472 } 1473 snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); 1474 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1475 0, NULL, NULL, mlx5_mprq_buf_init, 1476 (void *)((uintptr_t)1 << log_strd_num), 1477 dev->device->numa_node, 0); 1478 if (mp == NULL) { 1479 DRV_LOG(ERR, 1480 "port %u failed to allocate a mempool for" 1481 " Multi-Packet RQ, count=%u, size=%u", 1482 dev->data->port_id, obj_num, obj_size); 1483 rte_errno = ENOMEM; 1484 return -rte_errno; 1485 } 1486 ret = mlx5_mr_mempool_register(priv->sh->cdev, mp, false); 1487 if (ret < 0 && rte_errno != EEXIST) { 1488 ret = rte_errno; 1489 DRV_LOG(ERR, "port %u failed to register a mempool for Multi-Packet RQ", 1490 dev->data->port_id); 1491 rte_mempool_free(mp); 1492 rte_errno = ret; 1493 return -rte_errno; 1494 } 1495 priv->mprq_mp = mp; 1496 exit: 1497 /* Set mempool for each Rx queue. */ 1498 for (i = 0; i != priv->rxqs_n; ++i) { 1499 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i); 1500 1501 if (rxq_ctrl == NULL || rxq_ctrl->is_hairpin) 1502 continue; 1503 rxq_ctrl->rxq.mprq_mp = mp; 1504 } 1505 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1506 dev->data->port_id); 1507 return 0; 1508 } 1509 1510 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \ 1511 sizeof(struct rte_vlan_hdr) * 2 + \ 1512 sizeof(struct rte_ipv6_hdr))) 1513 #define MAX_TCP_OPTION_SIZE 40u 1514 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \ 1515 sizeof(struct rte_tcp_hdr) + \ 1516 MAX_TCP_OPTION_SIZE)) 1517 1518 /** 1519 * Adjust the maximum LRO massage size. 1520 * 1521 * @param dev 1522 * Pointer to Ethernet device. 1523 * @param idx 1524 * RX queue index. 1525 * @param max_lro_size 1526 * The maximum size for LRO packet. 1527 */ 1528 static void 1529 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx, 1530 uint32_t max_lro_size) 1531 { 1532 struct mlx5_priv *priv = dev->data->dev_private; 1533 1534 if (priv->sh->cdev->config.hca_attr.lro_max_msg_sz_mode == 1535 MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size > 1536 MLX5_MAX_TCP_HDR_OFFSET) 1537 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET; 1538 max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE); 1539 if (priv->max_lro_msg_size) 1540 priv->max_lro_msg_size = 1541 RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size); 1542 else 1543 priv->max_lro_msg_size = max_lro_size; 1544 DRV_LOG(DEBUG, 1545 "port %u Rx Queue %u max LRO message size adjusted to %u bytes", 1546 dev->data->port_id, idx, priv->max_lro_msg_size); 1547 } 1548 1549 /** 1550 * Prepare both size and number of stride for Multi-Packet RQ. 1551 * 1552 * @param dev 1553 * Pointer to Ethernet device. 1554 * @param idx 1555 * RX queue index. 1556 * @param desc 1557 * Number of descriptors to configure in queue. 1558 * @param rx_seg_en 1559 * Indicator if Rx segment enables, if so Multi-Packet RQ doesn't enable. 1560 * @param min_mbuf_size 1561 * Non scatter min mbuf size, max_rx_pktlen plus overhead. 1562 * @param actual_log_stride_num 1563 * Log number of strides to configure for this queue. 1564 * @param actual_log_stride_size 1565 * Log stride size to configure for this queue. 1566 * @param is_extmem 1567 * Is external pinned memory pool used. 1568 * @return 1569 * 0 if Multi-Packet RQ is supported, otherwise -1. 1570 */ 1571 static int 1572 mlx5_mprq_prepare(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1573 bool rx_seg_en, uint32_t min_mbuf_size, 1574 uint32_t *actual_log_stride_num, 1575 uint32_t *actual_log_stride_size, 1576 bool is_extmem) 1577 { 1578 struct mlx5_priv *priv = dev->data->dev_private; 1579 struct mlx5_port_config *config = &priv->config; 1580 struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap; 1581 uint32_t log_min_stride_num = dev_cap->mprq.log_min_stride_num; 1582 uint32_t log_max_stride_num = dev_cap->mprq.log_max_stride_num; 1583 uint32_t log_def_stride_num = 1584 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM, 1585 log_min_stride_num), 1586 log_max_stride_num); 1587 uint32_t log_min_stride_size = dev_cap->mprq.log_min_stride_size; 1588 uint32_t log_max_stride_size = dev_cap->mprq.log_max_stride_size; 1589 uint32_t log_def_stride_size = 1590 RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE, 1591 log_min_stride_size), 1592 log_max_stride_size); 1593 uint32_t log_stride_wqe_size; 1594 1595 if (mlx5_check_mprq_support(dev) != 1 || rx_seg_en || is_extmem) 1596 goto unsupport; 1597 /* Checks if chosen number of strides is in supported range. */ 1598 if (config->mprq.log_stride_num > log_max_stride_num || 1599 config->mprq.log_stride_num < log_min_stride_num) { 1600 *actual_log_stride_num = log_def_stride_num; 1601 DRV_LOG(WARNING, 1602 "Port %u Rx queue %u number of strides for Multi-Packet RQ is out of range, setting default value (%u)", 1603 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num)); 1604 } else { 1605 *actual_log_stride_num = config->mprq.log_stride_num; 1606 } 1607 /* Checks if chosen size of stride is in supported range. */ 1608 if (config->mprq.log_stride_size != (uint32_t)MLX5_ARG_UNSET) { 1609 if (config->mprq.log_stride_size > log_max_stride_size || 1610 config->mprq.log_stride_size < log_min_stride_size) { 1611 *actual_log_stride_size = log_def_stride_size; 1612 DRV_LOG(WARNING, 1613 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is out of range, setting default value (%u)", 1614 dev->data->port_id, idx, 1615 RTE_BIT32(log_def_stride_size)); 1616 } else { 1617 *actual_log_stride_size = config->mprq.log_stride_size; 1618 } 1619 } else { 1620 /* Make the stride fit the mbuf size by default. */ 1621 if (min_mbuf_size <= RTE_BIT32(log_max_stride_size)) { 1622 DRV_LOG(WARNING, 1623 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to match the mbuf size (%u)", 1624 dev->data->port_id, idx, min_mbuf_size); 1625 *actual_log_stride_size = log2above(min_mbuf_size); 1626 } else { 1627 goto unsupport; 1628 } 1629 } 1630 /* Make sure the stride size is greater than the headroom. */ 1631 if (RTE_BIT32(*actual_log_stride_size) < RTE_PKTMBUF_HEADROOM) { 1632 if (RTE_BIT32(log_max_stride_size) > RTE_PKTMBUF_HEADROOM) { 1633 DRV_LOG(WARNING, 1634 "Port %u Rx queue %u size of a stride for Multi-Packet RQ is adjusted to accommodate the headroom (%u)", 1635 dev->data->port_id, idx, RTE_PKTMBUF_HEADROOM); 1636 *actual_log_stride_size = log2above(RTE_PKTMBUF_HEADROOM); 1637 } else { 1638 goto unsupport; 1639 } 1640 } 1641 log_stride_wqe_size = *actual_log_stride_num + *actual_log_stride_size; 1642 /* Check if WQE buffer size is supported by hardware. */ 1643 if (log_stride_wqe_size < dev_cap->mprq.log_min_stride_wqe_size) { 1644 *actual_log_stride_num = log_def_stride_num; 1645 *actual_log_stride_size = log_def_stride_size; 1646 DRV_LOG(WARNING, 1647 "Port %u Rx queue %u size of WQE buffer for Multi-Packet RQ is too small, setting default values (stride_num_n=%u, stride_size_n=%u)", 1648 dev->data->port_id, idx, RTE_BIT32(log_def_stride_num), 1649 RTE_BIT32(log_def_stride_size)); 1650 log_stride_wqe_size = log_def_stride_num + log_def_stride_size; 1651 } 1652 MLX5_ASSERT(log_stride_wqe_size >= 1653 dev_cap->mprq.log_min_stride_wqe_size); 1654 if (desc <= RTE_BIT32(*actual_log_stride_num)) 1655 goto unsupport; 1656 if (min_mbuf_size > RTE_BIT32(log_stride_wqe_size)) { 1657 DRV_LOG(WARNING, "Port %u Rx queue %u " 1658 "Multi-Packet RQ is unsupported, WQE buffer size (%u) " 1659 "is smaller than min mbuf size (%u)", 1660 dev->data->port_id, idx, RTE_BIT32(log_stride_wqe_size), 1661 min_mbuf_size); 1662 goto unsupport; 1663 } 1664 DRV_LOG(DEBUG, "Port %u Rx queue %u " 1665 "Multi-Packet RQ is enabled strd_num_n = %u, strd_sz_n = %u", 1666 dev->data->port_id, idx, RTE_BIT32(*actual_log_stride_num), 1667 RTE_BIT32(*actual_log_stride_size)); 1668 return 0; 1669 unsupport: 1670 if (config->mprq.enabled) 1671 DRV_LOG(WARNING, 1672 "Port %u MPRQ is requested but cannot be enabled\n" 1673 " (requested: pkt_sz = %u, desc_num = %u," 1674 " rxq_num = %u, stride_sz = %u, stride_num = %u\n" 1675 " supported: min_rxqs_num = %u, min_buf_wqe_sz = %u" 1676 " min_stride_sz = %u, max_stride_sz = %u).\n" 1677 "Rx segment is %senabled. External mempool is %sused.", 1678 dev->data->port_id, min_mbuf_size, desc, priv->rxqs_n, 1679 config->mprq.log_stride_size == (uint32_t)MLX5_ARG_UNSET ? 1680 RTE_BIT32(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE) : 1681 RTE_BIT32(config->mprq.log_stride_size), 1682 RTE_BIT32(config->mprq.log_stride_num), 1683 config->mprq.min_rxqs_num, 1684 RTE_BIT32(dev_cap->mprq.log_min_stride_wqe_size), 1685 RTE_BIT32(dev_cap->mprq.log_min_stride_size), 1686 RTE_BIT32(dev_cap->mprq.log_max_stride_size), 1687 rx_seg_en ? "" : "not ", is_extmem ? "" : "not "); 1688 return -1; 1689 } 1690 1691 /** 1692 * Create a DPDK Rx queue. 1693 * 1694 * @param dev 1695 * Pointer to Ethernet device. 1696 * @param idx 1697 * RX queue index. 1698 * @param desc 1699 * Number of descriptors to configure in queue. 1700 * @param socket 1701 * NUMA socket on which memory must be allocated. 1702 * 1703 * @return 1704 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1705 */ 1706 struct mlx5_rxq_ctrl * 1707 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1708 unsigned int socket, const struct rte_eth_rxconf *conf, 1709 const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg, 1710 bool is_extmem) 1711 { 1712 struct mlx5_priv *priv = dev->data->dev_private; 1713 struct mlx5_rxq_ctrl *tmpl; 1714 unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp); 1715 struct mlx5_port_config *config = &priv->config; 1716 uint64_t offloads = conf->offloads | 1717 dev->data->dev_conf.rxmode.offloads; 1718 unsigned int lro_on_queue = !!(offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO); 1719 unsigned int max_rx_pktlen = lro_on_queue ? 1720 dev->data->dev_conf.rxmode.max_lro_pkt_size : 1721 dev->data->mtu + (unsigned int)RTE_ETHER_HDR_LEN + 1722 RTE_ETHER_CRC_LEN; 1723 unsigned int non_scatter_min_mbuf_size = max_rx_pktlen + 1724 RTE_PKTMBUF_HEADROOM; 1725 unsigned int max_lro_size = 0; 1726 unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM; 1727 uint32_t mprq_log_actual_stride_num = 0; 1728 uint32_t mprq_log_actual_stride_size = 0; 1729 bool rx_seg_en = n_seg != 1 || rx_seg[0].offset || rx_seg[0].length; 1730 const int mprq_en = !mlx5_mprq_prepare(dev, idx, desc, rx_seg_en, 1731 non_scatter_min_mbuf_size, 1732 &mprq_log_actual_stride_num, 1733 &mprq_log_actual_stride_size, 1734 is_extmem); 1735 /* 1736 * Always allocate extra slots, even if eventually 1737 * the vector Rx will not be used. 1738 */ 1739 uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1740 size_t alloc_size = sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *); 1741 const struct rte_eth_rxseg_split *qs_seg = rx_seg; 1742 unsigned int tail_len; 1743 1744 if (mprq_en) { 1745 /* Trim the number of descs needed. */ 1746 desc >>= mprq_log_actual_stride_num; 1747 alloc_size += desc * sizeof(struct mlx5_mprq_buf *); 1748 } 1749 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket); 1750 if (!tmpl) { 1751 rte_errno = ENOMEM; 1752 return NULL; 1753 } 1754 LIST_INIT(&tmpl->owners); 1755 MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG); 1756 /* 1757 * Save the original segment configuration in the shared queue 1758 * descriptor for the later check on the sibling queue creation. 1759 */ 1760 tmpl->rxseg_n = n_seg; 1761 rte_memcpy(tmpl->rxseg, qs_seg, 1762 sizeof(struct rte_eth_rxseg_split) * n_seg); 1763 /* 1764 * Build the array of actual buffer offsets and lengths. 1765 * Pad with the buffers from the last memory pool if 1766 * needed to handle max size packets, replace zero length 1767 * with the buffer length from the pool. 1768 */ 1769 tail_len = max_rx_pktlen; 1770 do { 1771 struct mlx5_eth_rxseg *hw_seg = 1772 &tmpl->rxq.rxseg[tmpl->rxq.rxseg_n]; 1773 uint32_t buf_len, offset, seg_len; 1774 1775 /* 1776 * For the buffers beyond descriptions offset is zero, 1777 * the first buffer contains head room. 1778 */ 1779 buf_len = rte_pktmbuf_data_room_size(qs_seg->mp); 1780 offset = (tmpl->rxq.rxseg_n >= n_seg ? 0 : qs_seg->offset) + 1781 (tmpl->rxq.rxseg_n ? 0 : RTE_PKTMBUF_HEADROOM); 1782 /* 1783 * For the buffers beyond descriptions the length is 1784 * pool buffer length, zero lengths are replaced with 1785 * pool buffer length either. 1786 */ 1787 seg_len = tmpl->rxq.rxseg_n >= n_seg ? buf_len : 1788 qs_seg->length ? 1789 qs_seg->length : 1790 (buf_len - offset); 1791 /* Check is done in long int, now overflows. */ 1792 if (buf_len < seg_len + offset) { 1793 DRV_LOG(ERR, "port %u Rx queue %u: Split offset/length " 1794 "%u/%u can't be satisfied", 1795 dev->data->port_id, idx, 1796 qs_seg->length, qs_seg->offset); 1797 rte_errno = EINVAL; 1798 goto error; 1799 } 1800 if (seg_len > tail_len) 1801 seg_len = buf_len - offset; 1802 if (++tmpl->rxq.rxseg_n > MLX5_MAX_RXQ_NSEG) { 1803 DRV_LOG(ERR, 1804 "port %u too many SGEs (%u) needed to handle" 1805 " requested maximum packet size %u, the maximum" 1806 " supported are %u", dev->data->port_id, 1807 tmpl->rxq.rxseg_n, max_rx_pktlen, 1808 MLX5_MAX_RXQ_NSEG); 1809 rte_errno = ENOTSUP; 1810 goto error; 1811 } 1812 /* Build the actual scattering element in the queue object. */ 1813 hw_seg->mp = qs_seg->mp; 1814 MLX5_ASSERT(offset <= UINT16_MAX); 1815 MLX5_ASSERT(seg_len <= UINT16_MAX); 1816 hw_seg->offset = (uint16_t)offset; 1817 hw_seg->length = (uint16_t)seg_len; 1818 /* 1819 * Advance the segment descriptor, the padding is the based 1820 * on the attributes of the last descriptor. 1821 */ 1822 if (tmpl->rxq.rxseg_n < n_seg) 1823 qs_seg++; 1824 tail_len -= RTE_MIN(tail_len, seg_len); 1825 } while (tail_len || !rte_is_power_of_2(tmpl->rxq.rxseg_n)); 1826 MLX5_ASSERT(tmpl->rxq.rxseg_n && 1827 tmpl->rxq.rxseg_n <= MLX5_MAX_RXQ_NSEG); 1828 if (tmpl->rxq.rxseg_n > 1 && !(offloads & RTE_ETH_RX_OFFLOAD_SCATTER)) { 1829 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not" 1830 " configured and no enough mbuf space(%u) to contain " 1831 "the maximum RX packet length(%u) with head-room(%u)", 1832 dev->data->port_id, idx, mb_len, max_rx_pktlen, 1833 RTE_PKTMBUF_HEADROOM); 1834 rte_errno = ENOSPC; 1835 goto error; 1836 } 1837 tmpl->is_hairpin = false; 1838 if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl, 1839 &priv->sh->cdev->mr_scache.dev_gen, socket)) { 1840 /* rte_errno is already set. */ 1841 goto error; 1842 } 1843 tmpl->socket = socket; 1844 if (dev->data->dev_conf.intr_conf.rxq) 1845 tmpl->irq = 1; 1846 if (mprq_en) { 1847 /* TODO: Rx scatter isn't supported yet. */ 1848 tmpl->rxq.sges_n = 0; 1849 tmpl->rxq.log_strd_num = mprq_log_actual_stride_num; 1850 tmpl->rxq.log_strd_sz = mprq_log_actual_stride_size; 1851 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1852 tmpl->rxq.strd_scatter_en = 1853 !!(offloads & RTE_ETH_RX_OFFLOAD_SCATTER); 1854 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, 1855 config->mprq.max_memcpy_len); 1856 max_lro_size = RTE_MIN(max_rx_pktlen, 1857 RTE_BIT32(tmpl->rxq.log_strd_num) * 1858 RTE_BIT32(tmpl->rxq.log_strd_sz)); 1859 } else if (tmpl->rxq.rxseg_n == 1) { 1860 MLX5_ASSERT(max_rx_pktlen <= first_mb_free_size); 1861 tmpl->rxq.sges_n = 0; 1862 max_lro_size = max_rx_pktlen; 1863 } else if (offloads & RTE_ETH_RX_OFFLOAD_SCATTER) { 1864 unsigned int sges_n; 1865 1866 if (lro_on_queue && first_mb_free_size < 1867 MLX5_MAX_LRO_HEADER_FIX) { 1868 DRV_LOG(ERR, "Not enough space in the first segment(%u)" 1869 " to include the max header size(%u) for LRO", 1870 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX); 1871 rte_errno = ENOTSUP; 1872 goto error; 1873 } 1874 /* 1875 * Determine the number of SGEs needed for a full packet 1876 * and round it to the next power of two. 1877 */ 1878 sges_n = log2above(tmpl->rxq.rxseg_n); 1879 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) { 1880 DRV_LOG(ERR, 1881 "port %u too many SGEs (%u) needed to handle" 1882 " requested maximum packet size %u, the maximum" 1883 " supported are %u", dev->data->port_id, 1884 1 << sges_n, max_rx_pktlen, 1885 1u << MLX5_MAX_LOG_RQ_SEGS); 1886 rte_errno = ENOTSUP; 1887 goto error; 1888 } 1889 tmpl->rxq.sges_n = sges_n; 1890 max_lro_size = max_rx_pktlen; 1891 } 1892 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1893 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1894 if (desc % (1 << tmpl->rxq.sges_n)) { 1895 DRV_LOG(ERR, 1896 "port %u number of Rx queue descriptors (%u) is not a" 1897 " multiple of SGEs per packet (%u)", 1898 dev->data->port_id, 1899 desc, 1900 1 << tmpl->rxq.sges_n); 1901 rte_errno = EINVAL; 1902 goto error; 1903 } 1904 mlx5_max_lro_msg_size_adjust(dev, idx, max_lro_size); 1905 /* Toggle RX checksum offload if hardware supports it. */ 1906 tmpl->rxq.csum = !!(offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM); 1907 /* Configure Rx timestamp. */ 1908 tmpl->rxq.hw_timestamp = !!(offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP); 1909 tmpl->rxq.timestamp_rx_flag = 0; 1910 if (tmpl->rxq.hw_timestamp && rte_mbuf_dyn_rx_timestamp_register( 1911 &tmpl->rxq.timestamp_offset, 1912 &tmpl->rxq.timestamp_rx_flag) != 0) { 1913 DRV_LOG(ERR, "Cannot register Rx timestamp field/flag"); 1914 goto error; 1915 } 1916 /* Configure VLAN stripping. */ 1917 tmpl->rxq.vlan_strip = !!(offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 1918 /* By default, FCS (CRC) is stripped by hardware. */ 1919 tmpl->rxq.crc_present = 0; 1920 tmpl->rxq.lro = lro_on_queue; 1921 if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) { 1922 if (priv->sh->config.hw_fcs_strip) { 1923 /* 1924 * RQs used for LRO-enabled TIRs should not be 1925 * configured to scatter the FCS. 1926 */ 1927 if (lro_on_queue) 1928 DRV_LOG(WARNING, 1929 "port %u CRC stripping has been " 1930 "disabled but will still be performed " 1931 "by hardware, because LRO is enabled", 1932 dev->data->port_id); 1933 else 1934 tmpl->rxq.crc_present = 1; 1935 } else { 1936 DRV_LOG(WARNING, 1937 "port %u CRC stripping has been disabled but will" 1938 " still be performed by hardware, make sure MLNX_OFED" 1939 " and firmware are up to date", 1940 dev->data->port_id); 1941 } 1942 } 1943 DRV_LOG(DEBUG, 1944 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1945 " incoming frames to hide it", 1946 dev->data->port_id, 1947 tmpl->rxq.crc_present ? "disabled" : "enabled", 1948 tmpl->rxq.crc_present << 2); 1949 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1950 (!!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS)); 1951 /* Save port ID. */ 1952 tmpl->rxq.port_id = dev->data->port_id; 1953 tmpl->sh = priv->sh; 1954 tmpl->rxq.mp = rx_seg[0].mp; 1955 tmpl->rxq.elts_n = log2above(desc); 1956 tmpl->rxq.rq_repl_thresh = MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n); 1957 tmpl->rxq.elts = (struct rte_mbuf *(*)[])(tmpl + 1); 1958 tmpl->rxq.mprq_bufs = (struct mlx5_mprq_buf *(*)[])(*tmpl->rxq.elts + desc_n); 1959 tmpl->rxq.idx = idx; 1960 if (conf->share_group > 0) { 1961 tmpl->rxq.shared = 1; 1962 tmpl->share_group = conf->share_group; 1963 tmpl->share_qid = conf->share_qid; 1964 LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry); 1965 } 1966 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1967 return tmpl; 1968 error: 1969 mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh); 1970 mlx5_free(tmpl); 1971 return NULL; 1972 } 1973 1974 /** 1975 * Create a DPDK Rx hairpin queue. 1976 * 1977 * @param dev 1978 * Pointer to Ethernet device. 1979 * @param rxq 1980 * RX queue. 1981 * @param desc 1982 * Number of descriptors to configure in queue. 1983 * @param hairpin_conf 1984 * The hairpin binding configuration. 1985 * 1986 * @return 1987 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1988 */ 1989 struct mlx5_rxq_ctrl * 1990 mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq, 1991 uint16_t desc, 1992 const struct rte_eth_hairpin_conf *hairpin_conf) 1993 { 1994 uint16_t idx = rxq->idx; 1995 struct mlx5_priv *priv = dev->data->dev_private; 1996 struct mlx5_rxq_ctrl *tmpl; 1997 1998 tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl), 0, 1999 SOCKET_ID_ANY); 2000 if (!tmpl) { 2001 rte_errno = ENOMEM; 2002 return NULL; 2003 } 2004 LIST_INIT(&tmpl->owners); 2005 rxq->ctrl = tmpl; 2006 LIST_INSERT_HEAD(&tmpl->owners, rxq, owner_entry); 2007 tmpl->is_hairpin = true; 2008 tmpl->socket = SOCKET_ID_ANY; 2009 tmpl->rxq.rss_hash = 0; 2010 tmpl->rxq.port_id = dev->data->port_id; 2011 tmpl->sh = priv->sh; 2012 tmpl->rxq.mp = NULL; 2013 tmpl->rxq.elts_n = log2above(desc); 2014 tmpl->rxq.elts = NULL; 2015 tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 }; 2016 tmpl->rxq.idx = idx; 2017 rxq->hairpin_conf = *hairpin_conf; 2018 mlx5_rxq_ref(dev, idx); 2019 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 2020 return tmpl; 2021 } 2022 2023 /** 2024 * Increase Rx queue reference count. 2025 * 2026 * @param dev 2027 * Pointer to Ethernet device. 2028 * @param idx 2029 * RX queue index. 2030 * 2031 * @return 2032 * A pointer to the queue if it exists, NULL otherwise. 2033 */ 2034 struct mlx5_rxq_priv * 2035 mlx5_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2036 { 2037 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2038 2039 if (rxq != NULL) 2040 rte_atomic_fetch_add_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed); 2041 return rxq; 2042 } 2043 2044 /** 2045 * Dereference a Rx queue. 2046 * 2047 * @param dev 2048 * Pointer to Ethernet device. 2049 * @param idx 2050 * RX queue index. 2051 * 2052 * @return 2053 * Updated reference count. 2054 */ 2055 uint32_t 2056 mlx5_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2057 { 2058 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2059 2060 if (rxq == NULL) 2061 return 0; 2062 return rte_atomic_fetch_sub_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed) - 1; 2063 } 2064 2065 /** 2066 * Get a Rx queue. 2067 * 2068 * @param dev 2069 * Pointer to Ethernet device. 2070 * @param idx 2071 * RX queue index. 2072 * 2073 * @return 2074 * A pointer to the queue if it exists, NULL otherwise. 2075 */ 2076 struct mlx5_rxq_priv * 2077 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2078 { 2079 struct mlx5_priv *priv = dev->data->dev_private; 2080 2081 if (idx >= priv->rxqs_n) 2082 return NULL; 2083 MLX5_ASSERT(priv->rxq_privs != NULL); 2084 return (*priv->rxq_privs)[idx]; 2085 } 2086 2087 /** 2088 * Get Rx queue shareable control. 2089 * 2090 * @param dev 2091 * Pointer to Ethernet device. 2092 * @param idx 2093 * RX queue index. 2094 * 2095 * @return 2096 * A pointer to the queue control if it exists, NULL otherwise. 2097 */ 2098 struct mlx5_rxq_ctrl * 2099 mlx5_rxq_ctrl_get(struct rte_eth_dev *dev, uint16_t idx) 2100 { 2101 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2102 2103 return rxq == NULL ? NULL : rxq->ctrl; 2104 } 2105 2106 /** 2107 * Get Rx queue shareable data. 2108 * 2109 * @param dev 2110 * Pointer to Ethernet device. 2111 * @param idx 2112 * RX queue index. 2113 * 2114 * @return 2115 * A pointer to the queue data if it exists, NULL otherwise. 2116 */ 2117 struct mlx5_rxq_data * 2118 mlx5_rxq_data_get(struct rte_eth_dev *dev, uint16_t idx) 2119 { 2120 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2121 2122 return rxq == NULL ? NULL : &rxq->ctrl->rxq; 2123 } 2124 2125 /** 2126 * Increase an external Rx queue reference count. 2127 * 2128 * @param dev 2129 * Pointer to Ethernet device. 2130 * @param idx 2131 * External RX queue index. 2132 * 2133 * @return 2134 * A pointer to the queue if it exists, NULL otherwise. 2135 */ 2136 struct mlx5_external_q * 2137 mlx5_ext_rxq_ref(struct rte_eth_dev *dev, uint16_t idx) 2138 { 2139 struct mlx5_external_q *rxq = mlx5_ext_rxq_get(dev, idx); 2140 2141 rte_atomic_fetch_add_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed); 2142 return rxq; 2143 } 2144 2145 /** 2146 * Decrease an external Rx queue reference count. 2147 * 2148 * @param dev 2149 * Pointer to Ethernet device. 2150 * @param idx 2151 * External RX queue index. 2152 * 2153 * @return 2154 * Updated reference count. 2155 */ 2156 uint32_t 2157 mlx5_ext_rxq_deref(struct rte_eth_dev *dev, uint16_t idx) 2158 { 2159 struct mlx5_external_q *rxq = mlx5_ext_rxq_get(dev, idx); 2160 2161 return rte_atomic_fetch_sub_explicit(&rxq->refcnt, 1, rte_memory_order_relaxed) - 1; 2162 } 2163 2164 /** 2165 * Get an external Rx queue. 2166 * 2167 * @param dev 2168 * Pointer to Ethernet device. 2169 * @param idx 2170 * External Rx queue index. 2171 * 2172 * @return 2173 * A pointer to the queue if it exists, NULL otherwise. 2174 */ 2175 struct mlx5_external_q * 2176 mlx5_ext_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 2177 { 2178 struct mlx5_priv *priv = dev->data->dev_private; 2179 2180 MLX5_ASSERT(mlx5_is_external_rxq(dev, idx)); 2181 return &priv->ext_rxqs[idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 2182 } 2183 2184 /** 2185 * Dereference a list of Rx queues. 2186 * 2187 * @param dev 2188 * Pointer to Ethernet device. 2189 * @param queues 2190 * List of Rx queues to deref. 2191 * @param queues_n 2192 * Number of queues in the array. 2193 */ 2194 static void 2195 mlx5_rxqs_deref(struct rte_eth_dev *dev, uint16_t *queues, 2196 const uint32_t queues_n) 2197 { 2198 uint32_t i; 2199 2200 for (i = 0; i < queues_n; i++) { 2201 if (mlx5_is_external_rxq(dev, queues[i])) 2202 claim_nonzero(mlx5_ext_rxq_deref(dev, queues[i])); 2203 else 2204 claim_nonzero(mlx5_rxq_deref(dev, queues[i])); 2205 } 2206 } 2207 2208 /** 2209 * Increase reference count for list of Rx queues. 2210 * 2211 * @param dev 2212 * Pointer to Ethernet device. 2213 * @param queues 2214 * List of Rx queues to ref. 2215 * @param queues_n 2216 * Number of queues in the array. 2217 * 2218 * @return 2219 * 0 on success, a negative errno value otherwise and rte_errno is set. 2220 */ 2221 static int 2222 mlx5_rxqs_ref(struct rte_eth_dev *dev, uint16_t *queues, 2223 const uint32_t queues_n) 2224 { 2225 uint32_t i; 2226 2227 for (i = 0; i != queues_n; ++i) { 2228 if (mlx5_is_external_rxq(dev, queues[i])) { 2229 if (mlx5_ext_rxq_ref(dev, queues[i]) == NULL) 2230 goto error; 2231 } else { 2232 if (mlx5_rxq_ref(dev, queues[i]) == NULL) 2233 goto error; 2234 } 2235 } 2236 return 0; 2237 error: 2238 mlx5_rxqs_deref(dev, queues, i); 2239 rte_errno = EINVAL; 2240 return -rte_errno; 2241 } 2242 2243 /** 2244 * Release a Rx queue. 2245 * 2246 * @param dev 2247 * Pointer to Ethernet device. 2248 * @param idx 2249 * RX queue index. 2250 * 2251 * @return 2252 * 1 while a reference on it exists, 0 when freed. 2253 */ 2254 int 2255 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 2256 { 2257 struct mlx5_priv *priv = dev->data->dev_private; 2258 struct mlx5_rxq_priv *rxq; 2259 struct mlx5_rxq_ctrl *rxq_ctrl; 2260 uint32_t refcnt; 2261 2262 if (priv->rxq_privs == NULL) 2263 return 0; 2264 rxq = mlx5_rxq_get(dev, idx); 2265 if (rxq == NULL || rxq->refcnt == 0) 2266 return 0; 2267 rxq_ctrl = rxq->ctrl; 2268 refcnt = mlx5_rxq_deref(dev, idx); 2269 if (refcnt > 1) { 2270 return 1; 2271 } else if (refcnt == 1) { /* RxQ stopped. */ 2272 priv->obj_ops.rxq_obj_release(rxq); 2273 if (!rxq_ctrl->started && rxq_ctrl->obj != NULL) { 2274 LIST_REMOVE(rxq_ctrl->obj, next); 2275 mlx5_free(rxq_ctrl->obj); 2276 rxq_ctrl->obj = NULL; 2277 } 2278 if (!rxq_ctrl->is_hairpin) { 2279 if (!rxq_ctrl->started) 2280 rxq_free_elts(rxq_ctrl); 2281 dev->data->rx_queue_state[idx] = 2282 RTE_ETH_QUEUE_STATE_STOPPED; 2283 } 2284 } else { /* Refcnt zero, closing device. */ 2285 LIST_REMOVE(rxq_ctrl, next); 2286 LIST_REMOVE(rxq, owner_entry); 2287 if (LIST_EMPTY(&rxq_ctrl->owners)) { 2288 if (!rxq_ctrl->is_hairpin) 2289 mlx5_mr_btree_free 2290 (&rxq_ctrl->rxq.mr_ctrl.cache_bh); 2291 if (rxq_ctrl->rxq.shared) 2292 LIST_REMOVE(rxq_ctrl, share_entry); 2293 mlx5_free(rxq_ctrl); 2294 } 2295 dev->data->rx_queues[idx] = NULL; 2296 mlx5_free(rxq); 2297 (*priv->rxq_privs)[idx] = NULL; 2298 } 2299 return 0; 2300 } 2301 2302 /** 2303 * Verify the Rx Queue list is empty 2304 * 2305 * @param dev 2306 * Pointer to Ethernet device. 2307 * 2308 * @return 2309 * The number of object not released. 2310 */ 2311 int 2312 mlx5_rxq_verify(struct rte_eth_dev *dev) 2313 { 2314 struct mlx5_priv *priv = dev->data->dev_private; 2315 struct mlx5_rxq_ctrl *rxq_ctrl; 2316 int ret = 0; 2317 2318 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 2319 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 2320 dev->data->port_id, rxq_ctrl->rxq.idx); 2321 ++ret; 2322 } 2323 return ret; 2324 } 2325 2326 /** 2327 * Verify the external Rx Queue list is empty. 2328 * 2329 * @param dev 2330 * Pointer to Ethernet device. 2331 * 2332 * @return 2333 * The number of object not released. 2334 */ 2335 int 2336 mlx5_ext_rxq_verify(struct rte_eth_dev *dev) 2337 { 2338 struct mlx5_priv *priv = dev->data->dev_private; 2339 struct mlx5_external_q *rxq; 2340 uint32_t i; 2341 int ret = 0; 2342 2343 if (priv->ext_rxqs == NULL) 2344 return 0; 2345 2346 for (i = RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN; i <= UINT16_MAX ; ++i) { 2347 rxq = mlx5_ext_rxq_get(dev, i); 2348 if (rxq->refcnt < 2) 2349 continue; 2350 DRV_LOG(DEBUG, "Port %u external RxQ %u still referenced.", 2351 dev->data->port_id, i); 2352 ++ret; 2353 } 2354 return ret; 2355 } 2356 2357 /** 2358 * Check whether RxQ type is Hairpin. 2359 * 2360 * @param dev 2361 * Pointer to Ethernet device. 2362 * @param idx 2363 * Rx queue index. 2364 * 2365 * @return 2366 * True if Rx queue type is Hairpin, otherwise False. 2367 */ 2368 bool 2369 mlx5_rxq_is_hairpin(struct rte_eth_dev *dev, uint16_t idx) 2370 { 2371 struct mlx5_rxq_ctrl *rxq_ctrl; 2372 2373 if (mlx5_is_external_rxq(dev, idx)) 2374 return false; 2375 rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx); 2376 return (rxq_ctrl != NULL && rxq_ctrl->is_hairpin); 2377 } 2378 2379 /* 2380 * Get a Rx hairpin queue configuration. 2381 * 2382 * @param dev 2383 * Pointer to Ethernet device. 2384 * @param idx 2385 * Rx queue index. 2386 * 2387 * @return 2388 * Pointer to the configuration if a hairpin RX queue, otherwise NULL. 2389 */ 2390 const struct rte_eth_hairpin_conf * 2391 mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx) 2392 { 2393 if (mlx5_rxq_is_hairpin(dev, idx)) { 2394 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx); 2395 2396 return rxq != NULL ? &rxq->hairpin_conf : NULL; 2397 } 2398 return NULL; 2399 } 2400 2401 /** 2402 * Match queues listed in arguments to queues contained in indirection table 2403 * object. 2404 * 2405 * @param ind_tbl 2406 * Pointer to indirection table to match. 2407 * @param queues 2408 * Queues to match to queues in indirection table. 2409 * @param queues_n 2410 * Number of queues in the array. 2411 * 2412 * @return 2413 * 1 if all queues in indirection table match 0 otherwise. 2414 */ 2415 static int 2416 mlx5_ind_table_obj_match_queues(const struct mlx5_ind_table_obj *ind_tbl, 2417 const uint16_t *queues, uint32_t queues_n) 2418 { 2419 return (ind_tbl->queues_n == queues_n) && 2420 (!memcmp(ind_tbl->queues, queues, 2421 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))); 2422 } 2423 2424 /** 2425 * Get an indirection table. 2426 * 2427 * @param dev 2428 * Pointer to Ethernet device. 2429 * @param queues 2430 * Queues entering in the indirection table. 2431 * @param queues_n 2432 * Number of queues in the array. 2433 * 2434 * @return 2435 * An indirection table if found. 2436 */ 2437 struct mlx5_ind_table_obj * 2438 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues, 2439 uint32_t queues_n) 2440 { 2441 struct mlx5_priv *priv = dev->data->dev_private; 2442 struct mlx5_ind_table_obj *ind_tbl; 2443 2444 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2445 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2446 if ((ind_tbl->queues_n == queues_n) && 2447 (memcmp(ind_tbl->queues, queues, 2448 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 2449 == 0)) { 2450 rte_atomic_fetch_add_explicit(&ind_tbl->refcnt, 1, 2451 rte_memory_order_relaxed); 2452 break; 2453 } 2454 } 2455 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2456 return ind_tbl; 2457 } 2458 2459 /** 2460 * Release an indirection table. 2461 * 2462 * @param dev 2463 * Pointer to Ethernet device. 2464 * @param ind_table 2465 * Indirection table to release. 2466 * @param deref_rxqs 2467 * If true, then dereference RX queues related to indirection table. 2468 * Otherwise, no additional action will be taken. 2469 * 2470 * @return 2471 * 1 while a reference on it exists, 0 when freed. 2472 */ 2473 int 2474 mlx5_ind_table_obj_release(struct rte_eth_dev *dev, 2475 struct mlx5_ind_table_obj *ind_tbl, 2476 bool deref_rxqs) 2477 { 2478 struct mlx5_priv *priv = dev->data->dev_private; 2479 unsigned int ret; 2480 2481 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2482 ret = rte_atomic_fetch_sub_explicit(&ind_tbl->refcnt, 1, rte_memory_order_relaxed) - 1; 2483 if (!ret) 2484 LIST_REMOVE(ind_tbl, next); 2485 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2486 if (ret) 2487 return 1; 2488 priv->obj_ops.ind_table_destroy(ind_tbl); 2489 if (deref_rxqs) 2490 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2491 mlx5_free(ind_tbl); 2492 return 0; 2493 } 2494 2495 /** 2496 * Verify the Rx Queue list is empty 2497 * 2498 * @param dev 2499 * Pointer to Ethernet device. 2500 * 2501 * @return 2502 * The number of object not released. 2503 */ 2504 int 2505 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev) 2506 { 2507 struct mlx5_priv *priv = dev->data->dev_private; 2508 struct mlx5_ind_table_obj *ind_tbl; 2509 int ret = 0; 2510 2511 rte_rwlock_read_lock(&priv->ind_tbls_lock); 2512 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2513 DRV_LOG(DEBUG, 2514 "port %u indirection table obj %p still referenced", 2515 dev->data->port_id, (void *)ind_tbl); 2516 ++ret; 2517 } 2518 rte_rwlock_read_unlock(&priv->ind_tbls_lock); 2519 return ret; 2520 } 2521 2522 /** 2523 * Setup an indirection table structure fields. 2524 * 2525 * @param dev 2526 * Pointer to Ethernet device. 2527 * @param ind_table 2528 * Indirection table to modify. 2529 * @param ref_qs 2530 * Whether to increment RxQ reference counters. 2531 * 2532 * @return 2533 * 0 on success, a negative errno value otherwise and rte_errno is set. 2534 */ 2535 int 2536 mlx5_ind_table_obj_setup(struct rte_eth_dev *dev, 2537 struct mlx5_ind_table_obj *ind_tbl, 2538 bool ref_qs) 2539 { 2540 struct mlx5_priv *priv = dev->data->dev_private; 2541 uint32_t queues_n = ind_tbl->queues_n; 2542 int ret; 2543 const unsigned int n = rte_is_power_of_2(queues_n) ? 2544 log2above(queues_n) : 2545 log2above(priv->sh->dev_cap.ind_table_max_size); 2546 2547 if (ref_qs && mlx5_rxqs_ref(dev, ind_tbl->queues, queues_n) < 0) { 2548 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2549 dev->data->port_id); 2550 return -rte_errno; 2551 } 2552 ret = priv->obj_ops.ind_table_new(dev, n, ind_tbl); 2553 if (ret) { 2554 DRV_LOG(DEBUG, "Port %u cannot create a new indirection table.", 2555 dev->data->port_id); 2556 if (ref_qs) { 2557 int err = rte_errno; 2558 2559 mlx5_rxqs_deref(dev, ind_tbl->queues, queues_n); 2560 rte_errno = err; 2561 } 2562 return ret; 2563 } 2564 rte_atomic_fetch_add_explicit(&ind_tbl->refcnt, 1, rte_memory_order_relaxed); 2565 return 0; 2566 } 2567 2568 /** 2569 * Create an indirection table. 2570 * 2571 * @param dev 2572 * Pointer to Ethernet device. 2573 * @param queues 2574 * Queues entering in the indirection table. 2575 * @param queues_n 2576 * Number of queues in the array. 2577 * @param standalone 2578 * Indirection table for Standalone queue. 2579 * @param ref_qs 2580 * Whether to increment RxQ reference counters. 2581 * 2582 * @return 2583 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 2584 */ 2585 struct mlx5_ind_table_obj * 2586 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues, 2587 uint32_t queues_n, bool standalone, bool ref_qs) 2588 { 2589 struct mlx5_priv *priv = dev->data->dev_private; 2590 struct mlx5_ind_table_obj *ind_tbl; 2591 int ret; 2592 uint32_t max_queues_n = priv->rxqs_n > queues_n ? priv->rxqs_n : queues_n; 2593 2594 /* 2595 * Allocate maximum queues for shared action as queue number 2596 * maybe modified later. 2597 */ 2598 ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) + 2599 (standalone ? max_queues_n : queues_n) * 2600 sizeof(uint16_t), 0, SOCKET_ID_ANY); 2601 if (!ind_tbl) { 2602 rte_errno = ENOMEM; 2603 return NULL; 2604 } 2605 ind_tbl->queues_n = queues_n; 2606 ind_tbl->queues = (uint16_t *)(ind_tbl + 1); 2607 memcpy(ind_tbl->queues, queues, queues_n * sizeof(*queues)); 2608 ret = mlx5_ind_table_obj_setup(dev, ind_tbl, ref_qs); 2609 if (ret < 0) { 2610 mlx5_free(ind_tbl); 2611 return NULL; 2612 } 2613 rte_rwlock_write_lock(&priv->ind_tbls_lock); 2614 if (!standalone) 2615 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 2616 else 2617 LIST_INSERT_HEAD(&priv->standalone_ind_tbls, ind_tbl, next); 2618 rte_rwlock_write_unlock(&priv->ind_tbls_lock); 2619 2620 return ind_tbl; 2621 } 2622 2623 static int 2624 mlx5_ind_table_obj_check_standalone(struct rte_eth_dev *dev __rte_unused, 2625 struct mlx5_ind_table_obj *ind_tbl) 2626 { 2627 uint32_t refcnt; 2628 2629 refcnt = rte_atomic_load_explicit(&ind_tbl->refcnt, rte_memory_order_relaxed); 2630 if (refcnt <= 1) 2631 return 0; 2632 /* 2633 * Modification of indirection tables having more than 1 2634 * reference is unsupported. 2635 */ 2636 DRV_LOG(DEBUG, 2637 "Port %u cannot modify indirection table %p (refcnt %u > 1).", 2638 dev->data->port_id, (void *)ind_tbl, refcnt); 2639 rte_errno = EINVAL; 2640 return -rte_errno; 2641 } 2642 2643 /** 2644 * Modify an indirection table. 2645 * 2646 * @param dev 2647 * Pointer to Ethernet device. 2648 * @param ind_table 2649 * Indirection table to modify. 2650 * @param queues 2651 * Queues replacement for the indirection table. 2652 * @param queues_n 2653 * Number of queues in the array. 2654 * @param standalone 2655 * Indirection table for Standalone queue. 2656 * @param ref_new_qs 2657 * Whether to increment new RxQ set reference counters. 2658 * @param deref_old_qs 2659 * Whether to decrement old RxQ set reference counters. 2660 * 2661 * @return 2662 * 0 on success, a negative errno value otherwise and rte_errno is set. 2663 */ 2664 int 2665 mlx5_ind_table_obj_modify(struct rte_eth_dev *dev, 2666 struct mlx5_ind_table_obj *ind_tbl, 2667 uint16_t *queues, const uint32_t queues_n, 2668 bool standalone, bool ref_new_qs, bool deref_old_qs) 2669 { 2670 struct mlx5_priv *priv = dev->data->dev_private; 2671 int ret; 2672 const unsigned int n = rte_is_power_of_2(queues_n) ? 2673 log2above(queues_n) : 2674 log2above(priv->sh->dev_cap.ind_table_max_size); 2675 2676 MLX5_ASSERT(standalone); 2677 RTE_SET_USED(standalone); 2678 if (mlx5_ind_table_obj_check_standalone(dev, ind_tbl) < 0) 2679 return -rte_errno; 2680 if (ref_new_qs && mlx5_rxqs_ref(dev, queues, queues_n) < 0) { 2681 DRV_LOG(DEBUG, "Port %u invalid indirection table queues.", 2682 dev->data->port_id); 2683 return -rte_errno; 2684 } 2685 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2686 ret = priv->obj_ops.ind_table_modify(dev, n, queues, queues_n, ind_tbl); 2687 if (ret) { 2688 DRV_LOG(DEBUG, "Port %u cannot modify indirection table.", 2689 dev->data->port_id); 2690 if (ref_new_qs) { 2691 int err = rte_errno; 2692 2693 mlx5_rxqs_deref(dev, queues, queues_n); 2694 rte_errno = err; 2695 } 2696 return ret; 2697 } 2698 if (deref_old_qs) 2699 mlx5_rxqs_deref(dev, ind_tbl->queues, ind_tbl->queues_n); 2700 ind_tbl->queues_n = queues_n; 2701 ind_tbl->queues = queues; 2702 return 0; 2703 } 2704 2705 /** 2706 * Attach an indirection table to its queues. 2707 * 2708 * @param dev 2709 * Pointer to Ethernet device. 2710 * @param ind_table 2711 * Indirection table to attach. 2712 * 2713 * @return 2714 * 0 on success, a negative errno value otherwise and rte_errno is set. 2715 */ 2716 int 2717 mlx5_ind_table_obj_attach(struct rte_eth_dev *dev, 2718 struct mlx5_ind_table_obj *ind_tbl) 2719 { 2720 int ret; 2721 2722 ret = mlx5_ind_table_obj_modify(dev, ind_tbl, ind_tbl->queues, 2723 ind_tbl->queues_n, 2724 true /* standalone */, 2725 true /* ref_new_qs */, 2726 false /* deref_old_qs */); 2727 if (ret != 0) 2728 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2729 dev->data->port_id, (void *)ind_tbl); 2730 return ret; 2731 } 2732 2733 /** 2734 * Detach an indirection table from its queues. 2735 * 2736 * @param dev 2737 * Pointer to Ethernet device. 2738 * @param ind_table 2739 * Indirection table to detach. 2740 * 2741 * @return 2742 * 0 on success, a negative errno value otherwise and rte_errno is set. 2743 */ 2744 int 2745 mlx5_ind_table_obj_detach(struct rte_eth_dev *dev, 2746 struct mlx5_ind_table_obj *ind_tbl) 2747 { 2748 struct mlx5_priv *priv = dev->data->dev_private; 2749 const unsigned int n = rte_is_power_of_2(ind_tbl->queues_n) ? 2750 log2above(ind_tbl->queues_n) : 2751 log2above(priv->sh->dev_cap.ind_table_max_size); 2752 unsigned int i; 2753 int ret; 2754 2755 ret = mlx5_ind_table_obj_check_standalone(dev, ind_tbl); 2756 if (ret != 0) 2757 return ret; 2758 MLX5_ASSERT(priv->obj_ops.ind_table_modify); 2759 ret = priv->obj_ops.ind_table_modify(dev, n, NULL, 0, ind_tbl); 2760 if (ret != 0) { 2761 DRV_LOG(ERR, "Port %u could not modify indirect table obj %p", 2762 dev->data->port_id, (void *)ind_tbl); 2763 return ret; 2764 } 2765 for (i = 0; i < ind_tbl->queues_n; i++) 2766 mlx5_rxq_release(dev, ind_tbl->queues[i]); 2767 return ret; 2768 } 2769 2770 int 2771 mlx5_hrxq_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry, 2772 void *cb_ctx) 2773 { 2774 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2775 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2776 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2777 2778 return (hrxq->rss_key_len != rss_desc->key_len || 2779 hrxq->symmetric_hash_function != rss_desc->symmetric_hash_function || 2780 memcmp(hrxq->rss_key, rss_desc->key, rss_desc->key_len) || 2781 hrxq->hws_flags != rss_desc->hws_flags || 2782 hrxq->hash_fields != rss_desc->hash_fields || 2783 hrxq->ind_table->queues_n != rss_desc->queue_num || 2784 memcmp(hrxq->ind_table->queues, rss_desc->queue, 2785 rss_desc->queue_num * sizeof(rss_desc->queue[0]))); 2786 } 2787 2788 /** 2789 * Modify an Rx Hash queue configuration. 2790 * 2791 * @param dev 2792 * Pointer to Ethernet device. 2793 * @param hrxq 2794 * Index to Hash Rx queue to modify. 2795 * @param rss_key 2796 * RSS key for the Rx hash queue. 2797 * @param rss_key_len 2798 * RSS key length. 2799 * @param hash_fields 2800 * Verbs protocol hash field to make the RSS on. 2801 * @param queues 2802 * Queues entering in hash queue. In case of empty hash_fields only the 2803 * first queue index will be taken for the indirection table. 2804 * @param queues_n 2805 * Number of queues. 2806 * 2807 * @return 2808 * 0 on success, a negative errno value otherwise and rte_errno is set. 2809 */ 2810 int 2811 mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hrxq_idx, 2812 const uint8_t *rss_key, uint32_t rss_key_len, 2813 uint64_t hash_fields, bool symmetric_hash_function, 2814 const uint16_t *queues, uint32_t queues_n) 2815 { 2816 int err; 2817 struct mlx5_ind_table_obj *ind_tbl = NULL; 2818 struct mlx5_priv *priv = dev->data->dev_private; 2819 struct mlx5_hrxq *hrxq = 2820 mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2821 bool dev_started = !!dev->data->dev_started; 2822 int ret; 2823 2824 if (!hrxq) { 2825 rte_errno = EINVAL; 2826 return -rte_errno; 2827 } 2828 /* validations */ 2829 if (hrxq->rss_key_len != rss_key_len) { 2830 /* rss_key_len is fixed size 40 byte & not supposed to change */ 2831 rte_errno = EINVAL; 2832 return -rte_errno; 2833 } 2834 queues_n = hash_fields ? queues_n : 1; 2835 if (mlx5_ind_table_obj_match_queues(hrxq->ind_table, 2836 queues, queues_n)) { 2837 ind_tbl = hrxq->ind_table; 2838 } else { 2839 if (hrxq->standalone) { 2840 /* 2841 * Replacement of indirection table unsupported for 2842 * standalone hrxq objects (used by shared RSS). 2843 */ 2844 rte_errno = ENOTSUP; 2845 return -rte_errno; 2846 } 2847 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2848 if (!ind_tbl) 2849 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2850 hrxq->standalone, 2851 dev_started); 2852 } 2853 if (!ind_tbl) { 2854 rte_errno = ENOMEM; 2855 return -rte_errno; 2856 } 2857 MLX5_ASSERT(priv->obj_ops.hrxq_modify); 2858 ret = priv->obj_ops.hrxq_modify(dev, hrxq, rss_key, hash_fields, 2859 symmetric_hash_function, ind_tbl); 2860 if (ret) { 2861 rte_errno = errno; 2862 goto error; 2863 } 2864 if (ind_tbl != hrxq->ind_table) { 2865 MLX5_ASSERT(!hrxq->standalone); 2866 mlx5_ind_table_obj_release(dev, hrxq->ind_table, true); 2867 hrxq->ind_table = ind_tbl; 2868 } 2869 hrxq->hash_fields = hash_fields; 2870 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2871 return 0; 2872 error: 2873 err = rte_errno; 2874 if (ind_tbl != hrxq->ind_table) { 2875 MLX5_ASSERT(!hrxq->standalone); 2876 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2877 } 2878 rte_errno = err; 2879 return -rte_errno; 2880 } 2881 2882 static void 2883 __mlx5_hrxq_remove(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 2884 { 2885 struct mlx5_priv *priv = dev->data->dev_private; 2886 2887 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2888 if (hrxq->hws_flags) 2889 mlx5dr_action_destroy(hrxq->action); 2890 else 2891 mlx5_glue->destroy_flow_action(hrxq->action); 2892 #endif 2893 priv->obj_ops.hrxq_destroy(hrxq); 2894 if (!hrxq->standalone) { 2895 mlx5_ind_table_obj_release(dev, hrxq->ind_table, 2896 hrxq->hws_flags ? 2897 (!!dev->data->dev_started) : true); 2898 } 2899 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 2900 } 2901 2902 /** 2903 * Release the hash Rx queue. 2904 * 2905 * @param dev 2906 * Pointer to Ethernet device. 2907 * @param hrxq 2908 * Index to Hash Rx queue to release. 2909 * 2910 * @param list 2911 * mlx5 list pointer. 2912 * @param entry 2913 * Hash queue entry pointer. 2914 */ 2915 void 2916 mlx5_hrxq_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry) 2917 { 2918 struct rte_eth_dev *dev = tool_ctx; 2919 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 2920 2921 __mlx5_hrxq_remove(dev, hrxq); 2922 } 2923 2924 static struct mlx5_hrxq * 2925 __mlx5_hrxq_create(struct rte_eth_dev *dev, 2926 struct mlx5_flow_rss_desc *rss_desc) 2927 { 2928 struct mlx5_priv *priv = dev->data->dev_private; 2929 const uint8_t *rss_key = rss_desc->key; 2930 uint32_t rss_key_len = rss_desc->key_len; 2931 bool standalone = !!rss_desc->shared_rss; 2932 const uint16_t *queues = 2933 standalone ? rss_desc->const_q : rss_desc->queue; 2934 uint32_t queues_n = rss_desc->queue_num; 2935 struct mlx5_hrxq *hrxq = NULL; 2936 uint32_t hrxq_idx = 0; 2937 struct mlx5_ind_table_obj *ind_tbl = rss_desc->ind_tbl; 2938 int ret; 2939 2940 queues_n = rss_desc->hash_fields ? queues_n : 1; 2941 if (!ind_tbl && !rss_desc->hws_flags) 2942 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2943 if (!ind_tbl) 2944 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, 2945 standalone || 2946 rss_desc->hws_flags, 2947 !!dev->data->dev_started); 2948 if (!ind_tbl) 2949 return NULL; 2950 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 2951 if (!hrxq) 2952 goto error; 2953 hrxq->standalone = standalone; 2954 hrxq->idx = hrxq_idx; 2955 hrxq->ind_table = ind_tbl; 2956 hrxq->rss_key_len = rss_key_len; 2957 hrxq->hash_fields = rss_desc->hash_fields; 2958 hrxq->hws_flags = rss_desc->hws_flags; 2959 hrxq->symmetric_hash_function = rss_desc->symmetric_hash_function; 2960 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2961 ret = priv->obj_ops.hrxq_new(dev, hrxq, rss_desc->tunnel); 2962 if (ret < 0) 2963 goto error; 2964 return hrxq; 2965 error: 2966 if (!rss_desc->ind_tbl) 2967 mlx5_ind_table_obj_release(dev, ind_tbl, true); 2968 if (hrxq) 2969 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 2970 return NULL; 2971 } 2972 2973 struct mlx5_list_entry * 2974 mlx5_hrxq_create_cb(void *tool_ctx, void *cb_ctx) 2975 { 2976 struct rte_eth_dev *dev = tool_ctx; 2977 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 2978 struct mlx5_flow_rss_desc *rss_desc = ctx->data; 2979 struct mlx5_hrxq *hrxq; 2980 2981 hrxq = __mlx5_hrxq_create(dev, rss_desc); 2982 return hrxq ? &hrxq->entry : NULL; 2983 } 2984 2985 struct mlx5_list_entry * 2986 mlx5_hrxq_clone_cb(void *tool_ctx, struct mlx5_list_entry *entry, 2987 void *cb_ctx __rte_unused) 2988 { 2989 struct rte_eth_dev *dev = tool_ctx; 2990 struct mlx5_priv *priv = dev->data->dev_private; 2991 struct mlx5_hrxq *hrxq; 2992 uint32_t hrxq_idx = 0; 2993 2994 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx); 2995 if (!hrxq) 2996 return NULL; 2997 memcpy(hrxq, entry, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN); 2998 hrxq->idx = hrxq_idx; 2999 return &hrxq->entry; 3000 } 3001 3002 void 3003 mlx5_hrxq_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry) 3004 { 3005 struct rte_eth_dev *dev = tool_ctx; 3006 struct mlx5_priv *priv = dev->data->dev_private; 3007 struct mlx5_hrxq *hrxq = container_of(entry, typeof(*hrxq), entry); 3008 3009 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq->idx); 3010 } 3011 3012 /** 3013 * Get an Rx Hash queue. 3014 * 3015 * @param dev 3016 * Pointer to Ethernet device. 3017 * @param rss_desc 3018 * RSS configuration for the Rx hash queue. 3019 * 3020 * @return 3021 * An hash Rx queue on success. 3022 */ 3023 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, 3024 struct mlx5_flow_rss_desc *rss_desc) 3025 { 3026 struct mlx5_priv *priv = dev->data->dev_private; 3027 struct mlx5_hrxq *hrxq = NULL; 3028 struct mlx5_list_entry *entry; 3029 struct mlx5_flow_cb_ctx ctx = { 3030 .data = rss_desc, 3031 }; 3032 3033 if (rss_desc->shared_rss) { 3034 hrxq = __mlx5_hrxq_create(dev, rss_desc); 3035 } else { 3036 entry = mlx5_list_register(priv->hrxqs, &ctx); 3037 if (!entry) 3038 return NULL; 3039 hrxq = container_of(entry, typeof(*hrxq), entry); 3040 } 3041 return hrxq; 3042 } 3043 3044 /** 3045 * Release the hash Rx queue. 3046 * 3047 * @param dev 3048 * Pointer to Ethernet device. 3049 * @param hrxq_idx 3050 * Hash Rx queue to release. 3051 * 3052 * @return 3053 * 1 while a reference on it exists, 0 when freed. 3054 */ 3055 int mlx5_hrxq_obj_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 3056 { 3057 struct mlx5_priv *priv = dev->data->dev_private; 3058 3059 if (!hrxq) 3060 return 0; 3061 if (!hrxq->standalone) 3062 return mlx5_list_unregister(priv->hrxqs, &hrxq->entry); 3063 __mlx5_hrxq_remove(dev, hrxq); 3064 return 0; 3065 } 3066 3067 /** 3068 * Release the hash Rx queue with index. 3069 * 3070 * @param dev 3071 * Pointer to Ethernet device. 3072 * @param hrxq_idx 3073 * Index to Hash Rx queue to release. 3074 * 3075 * @return 3076 * 1 while a reference on it exists, 0 when freed. 3077 */ 3078 int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hrxq_idx) 3079 { 3080 struct mlx5_priv *priv = dev->data->dev_private; 3081 struct mlx5_hrxq *hrxq; 3082 3083 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx); 3084 return mlx5_hrxq_obj_release(dev, hrxq); 3085 } 3086 3087 /** 3088 * Create a drop Rx Hash queue. 3089 * 3090 * @param dev 3091 * Pointer to Ethernet device. 3092 * 3093 * @return 3094 * The Verbs/DevX object initialized, NULL otherwise and rte_errno is set. 3095 */ 3096 struct mlx5_hrxq * 3097 mlx5_drop_action_create(struct rte_eth_dev *dev) 3098 { 3099 struct mlx5_priv *priv = dev->data->dev_private; 3100 struct mlx5_hrxq *hrxq = NULL; 3101 int ret; 3102 3103 if (priv->drop_queue.hrxq) 3104 return priv->drop_queue.hrxq; 3105 hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 3106 if (!hrxq) { 3107 DRV_LOG(WARNING, 3108 "Port %u cannot allocate memory for drop queue.", 3109 dev->data->port_id); 3110 rte_errno = ENOMEM; 3111 goto error; 3112 } 3113 priv->drop_queue.hrxq = hrxq; 3114 hrxq->ind_table = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq->ind_table), 3115 0, SOCKET_ID_ANY); 3116 if (!hrxq->ind_table) { 3117 rte_errno = ENOMEM; 3118 goto error; 3119 } 3120 ret = priv->obj_ops.drop_action_create(dev); 3121 if (ret < 0) 3122 goto error; 3123 return hrxq; 3124 error: 3125 if (hrxq) { 3126 if (hrxq->ind_table) 3127 mlx5_free(hrxq->ind_table); 3128 priv->drop_queue.hrxq = NULL; 3129 mlx5_free(hrxq); 3130 } 3131 return NULL; 3132 } 3133 3134 /** 3135 * Release a drop hash Rx queue. 3136 * 3137 * @param dev 3138 * Pointer to Ethernet device. 3139 */ 3140 void 3141 mlx5_drop_action_destroy(struct rte_eth_dev *dev) 3142 { 3143 struct mlx5_priv *priv = dev->data->dev_private; 3144 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 3145 3146 if (!priv->drop_queue.hrxq) 3147 return; 3148 priv->obj_ops.drop_action_destroy(dev); 3149 mlx5_free(priv->drop_queue.rxq); 3150 mlx5_free(hrxq->ind_table); 3151 mlx5_free(hrxq); 3152 priv->drop_queue.rxq = NULL; 3153 priv->drop_queue.hrxq = NULL; 3154 } 3155 3156 /** 3157 * Verify the Rx Queue list is empty 3158 * 3159 * @param dev 3160 * Pointer to Ethernet device. 3161 * 3162 * @return 3163 * The number of object not released. 3164 */ 3165 uint32_t 3166 mlx5_hrxq_verify(struct rte_eth_dev *dev) 3167 { 3168 struct mlx5_priv *priv = dev->data->dev_private; 3169 3170 return mlx5_list_get_entry_num(priv->hrxqs); 3171 } 3172 3173 /** 3174 * Set the Rx queue timestamp conversion parameters 3175 * 3176 * @param[in] dev 3177 * Pointer to the Ethernet device structure. 3178 */ 3179 void 3180 mlx5_rxq_timestamp_set(struct rte_eth_dev *dev) 3181 { 3182 struct mlx5_priv *priv = dev->data->dev_private; 3183 struct mlx5_dev_ctx_shared *sh = priv->sh; 3184 unsigned int i; 3185 3186 for (i = 0; i != priv->rxqs_n; ++i) { 3187 struct mlx5_rxq_data *data = mlx5_rxq_data_get(dev, i); 3188 3189 if (data == NULL) 3190 continue; 3191 data->sh = sh; 3192 data->rt_timestamp = sh->dev_cap.rt_timestamp; 3193 } 3194 } 3195 3196 /** 3197 * Validate given external RxQ rte_plow index, and get pointer to concurrent 3198 * external RxQ object to map/unmap. 3199 * 3200 * @param[in] port_id 3201 * The port identifier of the Ethernet device. 3202 * @param[in] dpdk_idx 3203 * Queue index in rte_flow. 3204 * 3205 * @return 3206 * Pointer to concurrent external RxQ on success, 3207 * NULL otherwise and rte_errno is set. 3208 */ 3209 static struct mlx5_external_q * 3210 mlx5_external_rx_queue_get_validate(uint16_t port_id, uint16_t dpdk_idx) 3211 { 3212 struct rte_eth_dev *dev; 3213 struct mlx5_priv *priv; 3214 int ret; 3215 3216 if (dpdk_idx < RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN) { 3217 DRV_LOG(ERR, "Queue index %u should be in range: [%u, %u].", 3218 dpdk_idx, RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN, UINT16_MAX); 3219 rte_errno = EINVAL; 3220 return NULL; 3221 } 3222 ret = mlx5_devx_extq_port_validate(port_id); 3223 if (unlikely(ret)) 3224 return NULL; 3225 dev = &rte_eth_devices[port_id]; 3226 priv = dev->data->dev_private; 3227 /* 3228 * When user configures remote PD and CTX and device creates RxQ by 3229 * DevX, external RxQs array is allocated. 3230 */ 3231 MLX5_ASSERT(priv->ext_rxqs != NULL); 3232 return &priv->ext_rxqs[dpdk_idx - RTE_PMD_MLX5_EXTERNAL_RX_QUEUE_ID_MIN]; 3233 } 3234 3235 int 3236 rte_pmd_mlx5_external_rx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx, 3237 uint32_t hw_idx) 3238 { 3239 struct mlx5_external_q *ext_rxq; 3240 uint32_t unmapped = 0; 3241 3242 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3243 if (ext_rxq == NULL) 3244 return -rte_errno; 3245 if (!rte_atomic_compare_exchange_strong_explicit(&ext_rxq->refcnt, &unmapped, 1, 3246 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 3247 if (ext_rxq->hw_id != hw_idx) { 3248 DRV_LOG(ERR, "Port %u external RxQ index %u " 3249 "is already mapped to HW index (requesting is " 3250 "%u, existing is %u).", 3251 port_id, dpdk_idx, hw_idx, ext_rxq->hw_id); 3252 rte_errno = EEXIST; 3253 return -rte_errno; 3254 } 3255 DRV_LOG(WARNING, "Port %u external RxQ index %u " 3256 "is already mapped to the requested HW index (%u)", 3257 port_id, dpdk_idx, hw_idx); 3258 3259 } else { 3260 ext_rxq->hw_id = hw_idx; 3261 DRV_LOG(DEBUG, "Port %u external RxQ index %u " 3262 "is successfully mapped to the requested HW index (%u)", 3263 port_id, dpdk_idx, hw_idx); 3264 } 3265 return 0; 3266 } 3267 3268 int 3269 rte_pmd_mlx5_external_rx_queue_id_unmap(uint16_t port_id, uint16_t dpdk_idx) 3270 { 3271 struct mlx5_external_q *ext_rxq; 3272 uint32_t mapped = 1; 3273 3274 ext_rxq = mlx5_external_rx_queue_get_validate(port_id, dpdk_idx); 3275 if (ext_rxq == NULL) 3276 return -rte_errno; 3277 if (ext_rxq->refcnt > 1) { 3278 DRV_LOG(ERR, "Port %u external RxQ index %u still referenced.", 3279 port_id, dpdk_idx); 3280 rte_errno = EINVAL; 3281 return -rte_errno; 3282 } 3283 if (!rte_atomic_compare_exchange_strong_explicit(&ext_rxq->refcnt, &mapped, 0, 3284 rte_memory_order_relaxed, rte_memory_order_relaxed)) { 3285 DRV_LOG(ERR, "Port %u external RxQ index %u doesn't exist.", 3286 port_id, dpdk_idx); 3287 rte_errno = EINVAL; 3288 return -rte_errno; 3289 } 3290 DRV_LOG(DEBUG, 3291 "Port %u external RxQ index %u is successfully unmapped.", 3292 port_id, dpdk_idx); 3293 return 0; 3294 } 3295