1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <stdint.h> 11 #include <fcntl.h> 12 #include <sys/queue.h> 13 14 /* Verbs header. */ 15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic ignored "-Wpedantic" 18 #endif 19 #include <infiniband/verbs.h> 20 #include <infiniband/mlx5dv.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 #include <rte_interrupts.h> 30 #include <rte_debug.h> 31 #include <rte_io.h> 32 33 #include "mlx5.h" 34 #include "mlx5_rxtx.h" 35 #include "mlx5_utils.h" 36 #include "mlx5_autoconf.h" 37 #include "mlx5_defs.h" 38 #include "mlx5_glue.h" 39 40 /* Default RSS hash key also used for ConnectX-3. */ 41 uint8_t rss_hash_default_key[] = { 42 0x2c, 0xc6, 0x81, 0xd1, 43 0x5b, 0xdb, 0xf4, 0xf7, 44 0xfc, 0xa2, 0x83, 0x19, 45 0xdb, 0x1a, 0x3e, 0x94, 46 0x6b, 0x9e, 0x38, 0xd9, 47 0x2c, 0x9c, 0x03, 0xd1, 48 0xad, 0x99, 0x44, 0xa7, 49 0xd9, 0x56, 0x3d, 0x59, 50 0x06, 0x3c, 0x25, 0xf3, 51 0xfc, 0x1f, 0xdc, 0x2a, 52 }; 53 54 /* Length of the default RSS hash key. */ 55 static_assert(MLX5_RSS_HASH_KEY_LEN == 56 (unsigned int)sizeof(rss_hash_default_key), 57 "wrong RSS default key size."); 58 59 /** 60 * Check whether Multi-Packet RQ can be enabled for the device. 61 * 62 * @param dev 63 * Pointer to Ethernet device. 64 * 65 * @return 66 * 1 if supported, negative errno value if not. 67 */ 68 inline int 69 mlx5_check_mprq_support(struct rte_eth_dev *dev) 70 { 71 struct mlx5_priv *priv = dev->data->dev_private; 72 73 if (priv->config.mprq.enabled && 74 priv->rxqs_n >= priv->config.mprq.min_rxqs_num) 75 return 1; 76 return -ENOTSUP; 77 } 78 79 /** 80 * Check whether Multi-Packet RQ is enabled for the Rx queue. 81 * 82 * @param rxq 83 * Pointer to receive queue structure. 84 * 85 * @return 86 * 0 if disabled, otherwise enabled. 87 */ 88 inline int 89 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) 90 { 91 return rxq->strd_num_n > 0; 92 } 93 94 /** 95 * Check whether Multi-Packet RQ is enabled for the device. 96 * 97 * @param dev 98 * Pointer to Ethernet device. 99 * 100 * @return 101 * 0 if disabled, otherwise enabled. 102 */ 103 inline int 104 mlx5_mprq_enabled(struct rte_eth_dev *dev) 105 { 106 struct mlx5_priv *priv = dev->data->dev_private; 107 uint16_t i; 108 uint16_t n = 0; 109 110 if (mlx5_check_mprq_support(dev) < 0) 111 return 0; 112 /* All the configured queues should be enabled. */ 113 for (i = 0; i < priv->rxqs_n; ++i) { 114 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 115 116 if (!rxq) 117 continue; 118 if (mlx5_rxq_mprq_enabled(rxq)) 119 ++n; 120 } 121 /* Multi-Packet RQ can't be partially configured. */ 122 assert(n == 0 || n == priv->rxqs_n); 123 return n == priv->rxqs_n; 124 } 125 126 /** 127 * Allocate RX queue elements for Multi-Packet RQ. 128 * 129 * @param rxq_ctrl 130 * Pointer to RX queue structure. 131 * 132 * @return 133 * 0 on success, a negative errno value otherwise and rte_errno is set. 134 */ 135 static int 136 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 137 { 138 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 139 unsigned int wqe_n = 1 << rxq->elts_n; 140 unsigned int i; 141 int err; 142 143 /* Iterate on segments. */ 144 for (i = 0; i <= wqe_n; ++i) { 145 struct mlx5_mprq_buf *buf; 146 147 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 148 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 149 rte_errno = ENOMEM; 150 goto error; 151 } 152 if (i < wqe_n) 153 (*rxq->mprq_bufs)[i] = buf; 154 else 155 rxq->mprq_repl = buf; 156 } 157 DRV_LOG(DEBUG, 158 "port %u Rx queue %u allocated and configured %u segments", 159 rxq->port_id, rxq->idx, wqe_n); 160 return 0; 161 error: 162 err = rte_errno; /* Save rte_errno before cleanup. */ 163 wqe_n = i; 164 for (i = 0; (i != wqe_n); ++i) { 165 if ((*rxq->mprq_bufs)[i] != NULL) 166 rte_mempool_put(rxq->mprq_mp, 167 (*rxq->mprq_bufs)[i]); 168 (*rxq->mprq_bufs)[i] = NULL; 169 } 170 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 171 rxq->port_id, rxq->idx); 172 rte_errno = err; /* Restore rte_errno. */ 173 return -rte_errno; 174 } 175 176 /** 177 * Allocate RX queue elements for Single-Packet RQ. 178 * 179 * @param rxq_ctrl 180 * Pointer to RX queue structure. 181 * 182 * @return 183 * 0 on success, errno value on failure. 184 */ 185 static int 186 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 187 { 188 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 189 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 190 unsigned int i; 191 int err; 192 193 /* Iterate on segments. */ 194 for (i = 0; (i != elts_n); ++i) { 195 struct rte_mbuf *buf; 196 197 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 198 if (buf == NULL) { 199 DRV_LOG(ERR, "port %u empty mbuf pool", 200 PORT_ID(rxq_ctrl->priv)); 201 rte_errno = ENOMEM; 202 goto error; 203 } 204 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 205 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 206 /* Buffer is supposed to be empty. */ 207 assert(rte_pktmbuf_data_len(buf) == 0); 208 assert(rte_pktmbuf_pkt_len(buf) == 0); 209 assert(!buf->next); 210 /* Only the first segment keeps headroom. */ 211 if (i % sges_n) 212 SET_DATA_OFF(buf, 0); 213 PORT(buf) = rxq_ctrl->rxq.port_id; 214 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 215 PKT_LEN(buf) = DATA_LEN(buf); 216 NB_SEGS(buf) = 1; 217 (*rxq_ctrl->rxq.elts)[i] = buf; 218 } 219 /* If Rx vector is activated. */ 220 if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { 221 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 222 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 223 int j; 224 225 /* Initialize default rearm_data for vPMD. */ 226 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 227 rte_mbuf_refcnt_set(mbuf_init, 1); 228 mbuf_init->nb_segs = 1; 229 mbuf_init->port = rxq->port_id; 230 /* 231 * prevent compiler reordering: 232 * rearm_data covers previous fields. 233 */ 234 rte_compiler_barrier(); 235 rxq->mbuf_initializer = 236 *(uint64_t *)&mbuf_init->rearm_data; 237 /* Padding with a fake mbuf for vectorized Rx. */ 238 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 239 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 240 } 241 DRV_LOG(DEBUG, 242 "port %u Rx queue %u allocated and configured %u segments" 243 " (max %u packets)", 244 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n, 245 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 246 return 0; 247 error: 248 err = rte_errno; /* Save rte_errno before cleanup. */ 249 elts_n = i; 250 for (i = 0; (i != elts_n); ++i) { 251 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 252 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 253 (*rxq_ctrl->rxq.elts)[i] = NULL; 254 } 255 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 256 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx); 257 rte_errno = err; /* Restore rte_errno. */ 258 return -rte_errno; 259 } 260 261 /** 262 * Allocate RX queue elements. 263 * 264 * @param rxq_ctrl 265 * Pointer to RX queue structure. 266 * 267 * @return 268 * 0 on success, errno value on failure. 269 */ 270 int 271 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 272 { 273 return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 274 rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl); 275 } 276 277 /** 278 * Free RX queue elements for Multi-Packet RQ. 279 * 280 * @param rxq_ctrl 281 * Pointer to RX queue structure. 282 */ 283 static void 284 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 285 { 286 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 287 uint16_t i; 288 289 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs", 290 rxq->port_id, rxq->idx); 291 if (rxq->mprq_bufs == NULL) 292 return; 293 assert(mlx5_rxq_check_vec_support(rxq) < 0); 294 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 295 if ((*rxq->mprq_bufs)[i] != NULL) 296 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 297 (*rxq->mprq_bufs)[i] = NULL; 298 } 299 if (rxq->mprq_repl != NULL) { 300 mlx5_mprq_buf_free(rxq->mprq_repl); 301 rxq->mprq_repl = NULL; 302 } 303 } 304 305 /** 306 * Free RX queue elements for Single-Packet RQ. 307 * 308 * @param rxq_ctrl 309 * Pointer to RX queue structure. 310 */ 311 static void 312 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 313 { 314 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 315 const uint16_t q_n = (1 << rxq->elts_n); 316 const uint16_t q_mask = q_n - 1; 317 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); 318 uint16_t i; 319 320 DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs", 321 PORT_ID(rxq_ctrl->priv), rxq->idx); 322 if (rxq->elts == NULL) 323 return; 324 /** 325 * Some mbuf in the Ring belongs to the application. They cannot be 326 * freed. 327 */ 328 if (mlx5_rxq_check_vec_support(rxq) > 0) { 329 for (i = 0; i < used; ++i) 330 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; 331 rxq->rq_pi = rxq->rq_ci; 332 } 333 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 334 if ((*rxq->elts)[i] != NULL) 335 rte_pktmbuf_free_seg((*rxq->elts)[i]); 336 (*rxq->elts)[i] = NULL; 337 } 338 } 339 340 /** 341 * Free RX queue elements. 342 * 343 * @param rxq_ctrl 344 * Pointer to RX queue structure. 345 */ 346 static void 347 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 348 { 349 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 350 rxq_free_elts_mprq(rxq_ctrl); 351 else 352 rxq_free_elts_sprq(rxq_ctrl); 353 } 354 355 /** 356 * Returns the per-queue supported offloads. 357 * 358 * @param dev 359 * Pointer to Ethernet device. 360 * 361 * @return 362 * Supported Rx offloads. 363 */ 364 uint64_t 365 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 366 { 367 struct mlx5_priv *priv = dev->data->dev_private; 368 struct mlx5_dev_config *config = &priv->config; 369 uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | 370 DEV_RX_OFFLOAD_TIMESTAMP | 371 DEV_RX_OFFLOAD_JUMBO_FRAME); 372 373 if (config->hw_fcs_strip) 374 offloads |= DEV_RX_OFFLOAD_KEEP_CRC; 375 376 if (config->hw_csum) 377 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | 378 DEV_RX_OFFLOAD_UDP_CKSUM | 379 DEV_RX_OFFLOAD_TCP_CKSUM); 380 if (config->hw_vlan_strip) 381 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 382 return offloads; 383 } 384 385 386 /** 387 * Returns the per-port supported offloads. 388 * 389 * @return 390 * Supported Rx offloads. 391 */ 392 uint64_t 393 mlx5_get_rx_port_offloads(void) 394 { 395 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 396 397 return offloads; 398 } 399 400 /** 401 * Verify if the queue can be released. 402 * 403 * @param dev 404 * Pointer to Ethernet device. 405 * @param idx 406 * RX queue index. 407 * 408 * @return 409 * 1 if the queue can be released 410 * 0 if the queue can not be released, there are references to it. 411 * Negative errno and rte_errno is set if queue doesn't exist. 412 */ 413 static int 414 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 415 { 416 struct mlx5_priv *priv = dev->data->dev_private; 417 struct mlx5_rxq_ctrl *rxq_ctrl; 418 419 if (!(*priv->rxqs)[idx]) { 420 rte_errno = EINVAL; 421 return -rte_errno; 422 } 423 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 424 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); 425 } 426 427 /** 428 * 429 * @param dev 430 * Pointer to Ethernet device structure. 431 * @param idx 432 * RX queue index. 433 * @param desc 434 * Number of descriptors to configure in queue. 435 * @param socket 436 * NUMA socket on which memory must be allocated. 437 * @param[in] conf 438 * Thresholds parameters. 439 * @param mp 440 * Memory pool for buffer allocations. 441 * 442 * @return 443 * 0 on success, a negative errno value otherwise and rte_errno is set. 444 */ 445 int 446 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 447 unsigned int socket, const struct rte_eth_rxconf *conf, 448 struct rte_mempool *mp) 449 { 450 struct mlx5_priv *priv = dev->data->dev_private; 451 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 452 struct mlx5_rxq_ctrl *rxq_ctrl = 453 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 454 455 if (!rte_is_power_of_2(desc)) { 456 desc = 1 << log2above(desc); 457 DRV_LOG(WARNING, 458 "port %u increased number of descriptors in Rx queue %u" 459 " to the next power of two (%d)", 460 dev->data->port_id, idx, desc); 461 } 462 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 463 dev->data->port_id, idx, desc); 464 if (idx >= priv->rxqs_n) { 465 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 466 dev->data->port_id, idx, priv->rxqs_n); 467 rte_errno = EOVERFLOW; 468 return -rte_errno; 469 } 470 if (!mlx5_rxq_releasable(dev, idx)) { 471 DRV_LOG(ERR, "port %u unable to release queue index %u", 472 dev->data->port_id, idx); 473 rte_errno = EBUSY; 474 return -rte_errno; 475 } 476 mlx5_rxq_release(dev, idx); 477 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp); 478 if (!rxq_ctrl) { 479 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 480 dev->data->port_id, idx); 481 rte_errno = ENOMEM; 482 return -rte_errno; 483 } 484 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 485 dev->data->port_id, idx); 486 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 487 return 0; 488 } 489 490 /** 491 * DPDK callback to release a RX queue. 492 * 493 * @param dpdk_rxq 494 * Generic RX queue pointer. 495 */ 496 void 497 mlx5_rx_queue_release(void *dpdk_rxq) 498 { 499 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq; 500 struct mlx5_rxq_ctrl *rxq_ctrl; 501 struct mlx5_priv *priv; 502 503 if (rxq == NULL) 504 return; 505 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); 506 priv = rxq_ctrl->priv; 507 if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx)) 508 rte_panic("port %u Rx queue %u is still used by a flow and" 509 " cannot be removed\n", 510 PORT_ID(priv), rxq->idx); 511 mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx); 512 } 513 514 /** 515 * Get an Rx queue Verbs object. 516 * 517 * @param dev 518 * Pointer to Ethernet device. 519 * @param idx 520 * Queue index in DPDK Rx queue array 521 * 522 * @return 523 * The Verbs object if it exists. 524 */ 525 static struct mlx5_rxq_ibv * 526 mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) 527 { 528 struct mlx5_priv *priv = dev->data->dev_private; 529 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 530 struct mlx5_rxq_ctrl *rxq_ctrl; 531 532 if (idx >= priv->rxqs_n) 533 return NULL; 534 if (!rxq_data) 535 return NULL; 536 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 537 if (rxq_ctrl->ibv) 538 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); 539 return rxq_ctrl->ibv; 540 } 541 542 /** 543 * Release an Rx verbs queue object. 544 * 545 * @param rxq_ibv 546 * Verbs Rx queue object. 547 * 548 * @return 549 * 1 while a reference on it exists, 0 when freed. 550 */ 551 static int 552 mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv) 553 { 554 assert(rxq_ibv); 555 assert(rxq_ibv->wq); 556 assert(rxq_ibv->cq); 557 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { 558 rxq_free_elts(rxq_ibv->rxq_ctrl); 559 claim_zero(mlx5_glue->destroy_wq(rxq_ibv->wq)); 560 claim_zero(mlx5_glue->destroy_cq(rxq_ibv->cq)); 561 if (rxq_ibv->channel) 562 claim_zero(mlx5_glue->destroy_comp_channel 563 (rxq_ibv->channel)); 564 LIST_REMOVE(rxq_ibv, next); 565 rte_free(rxq_ibv); 566 return 0; 567 } 568 return 1; 569 } 570 571 /** 572 * Allocate queue vector and fill epoll fd list for Rx interrupts. 573 * 574 * @param dev 575 * Pointer to Ethernet device. 576 * 577 * @return 578 * 0 on success, a negative errno value otherwise and rte_errno is set. 579 */ 580 int 581 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 582 { 583 struct mlx5_priv *priv = dev->data->dev_private; 584 unsigned int i; 585 unsigned int rxqs_n = priv->rxqs_n; 586 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 587 unsigned int count = 0; 588 struct rte_intr_handle *intr_handle = dev->intr_handle; 589 590 if (!dev->data->dev_conf.intr_conf.rxq) 591 return 0; 592 mlx5_rx_intr_vec_disable(dev); 593 intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); 594 if (intr_handle->intr_vec == NULL) { 595 DRV_LOG(ERR, 596 "port %u failed to allocate memory for interrupt" 597 " vector, Rx interrupts will not be supported", 598 dev->data->port_id); 599 rte_errno = ENOMEM; 600 return -rte_errno; 601 } 602 intr_handle->type = RTE_INTR_HANDLE_EXT; 603 for (i = 0; i != n; ++i) { 604 /* This rxq ibv must not be released in this function. */ 605 struct mlx5_rxq_ibv *rxq_ibv = mlx5_rxq_ibv_get(dev, i); 606 int fd; 607 int flags; 608 int rc; 609 610 /* Skip queues that cannot request interrupts. */ 611 if (!rxq_ibv || !rxq_ibv->channel) { 612 /* Use invalid intr_vec[] index to disable entry. */ 613 intr_handle->intr_vec[i] = 614 RTE_INTR_VEC_RXTX_OFFSET + 615 RTE_MAX_RXTX_INTR_VEC_ID; 616 continue; 617 } 618 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 619 DRV_LOG(ERR, 620 "port %u too many Rx queues for interrupt" 621 " vector size (%d), Rx interrupts cannot be" 622 " enabled", 623 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 624 mlx5_rx_intr_vec_disable(dev); 625 rte_errno = ENOMEM; 626 return -rte_errno; 627 } 628 fd = rxq_ibv->channel->fd; 629 flags = fcntl(fd, F_GETFL); 630 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 631 if (rc < 0) { 632 rte_errno = errno; 633 DRV_LOG(ERR, 634 "port %u failed to make Rx interrupt file" 635 " descriptor %d non-blocking for queue index" 636 " %d", 637 dev->data->port_id, fd, i); 638 mlx5_rx_intr_vec_disable(dev); 639 return -rte_errno; 640 } 641 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; 642 intr_handle->efds[count] = fd; 643 count++; 644 } 645 if (!count) 646 mlx5_rx_intr_vec_disable(dev); 647 else 648 intr_handle->nb_efd = count; 649 return 0; 650 } 651 652 /** 653 * Clean up Rx interrupts handler. 654 * 655 * @param dev 656 * Pointer to Ethernet device. 657 */ 658 void 659 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 660 { 661 struct mlx5_priv *priv = dev->data->dev_private; 662 struct rte_intr_handle *intr_handle = dev->intr_handle; 663 unsigned int i; 664 unsigned int rxqs_n = priv->rxqs_n; 665 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 666 667 if (!dev->data->dev_conf.intr_conf.rxq) 668 return; 669 if (!intr_handle->intr_vec) 670 goto free; 671 for (i = 0; i != n; ++i) { 672 struct mlx5_rxq_ctrl *rxq_ctrl; 673 struct mlx5_rxq_data *rxq_data; 674 675 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + 676 RTE_MAX_RXTX_INTR_VEC_ID) 677 continue; 678 /** 679 * Need to access directly the queue to release the reference 680 * kept in mlx5_rx_intr_vec_enable(). 681 */ 682 rxq_data = (*priv->rxqs)[i]; 683 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 684 if (rxq_ctrl->ibv) 685 mlx5_rxq_ibv_release(rxq_ctrl->ibv); 686 } 687 free: 688 rte_intr_free_epoll_fd(intr_handle); 689 if (intr_handle->intr_vec) 690 free(intr_handle->intr_vec); 691 intr_handle->nb_efd = 0; 692 intr_handle->intr_vec = NULL; 693 } 694 695 /** 696 * MLX5 CQ notification . 697 * 698 * @param rxq 699 * Pointer to receive queue structure. 700 * @param sq_n_rxq 701 * Sequence number per receive queue . 702 */ 703 static inline void 704 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 705 { 706 int sq_n = 0; 707 uint32_t doorbell_hi; 708 uint64_t doorbell; 709 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL; 710 711 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 712 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 713 doorbell = (uint64_t)doorbell_hi << 32; 714 doorbell |= rxq->cqn; 715 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); 716 mlx5_uar_write64(rte_cpu_to_be_64(doorbell), 717 cq_db_reg, rxq->uar_lock_cq); 718 } 719 720 /** 721 * DPDK callback for Rx queue interrupt enable. 722 * 723 * @param dev 724 * Pointer to Ethernet device structure. 725 * @param rx_queue_id 726 * Rx queue number. 727 * 728 * @return 729 * 0 on success, a negative errno value otherwise and rte_errno is set. 730 */ 731 int 732 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 733 { 734 struct mlx5_priv *priv = dev->data->dev_private; 735 struct mlx5_rxq_data *rxq_data; 736 struct mlx5_rxq_ctrl *rxq_ctrl; 737 738 rxq_data = (*priv->rxqs)[rx_queue_id]; 739 if (!rxq_data) { 740 rte_errno = EINVAL; 741 return -rte_errno; 742 } 743 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 744 if (rxq_ctrl->irq) { 745 struct mlx5_rxq_ibv *rxq_ibv; 746 747 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 748 if (!rxq_ibv) { 749 rte_errno = EINVAL; 750 return -rte_errno; 751 } 752 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); 753 mlx5_rxq_ibv_release(rxq_ibv); 754 } 755 return 0; 756 } 757 758 /** 759 * DPDK callback for Rx queue interrupt disable. 760 * 761 * @param dev 762 * Pointer to Ethernet device structure. 763 * @param rx_queue_id 764 * Rx queue number. 765 * 766 * @return 767 * 0 on success, a negative errno value otherwise and rte_errno is set. 768 */ 769 int 770 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 771 { 772 struct mlx5_priv *priv = dev->data->dev_private; 773 struct mlx5_rxq_data *rxq_data; 774 struct mlx5_rxq_ctrl *rxq_ctrl; 775 struct mlx5_rxq_ibv *rxq_ibv = NULL; 776 struct ibv_cq *ev_cq; 777 void *ev_ctx; 778 int ret; 779 780 rxq_data = (*priv->rxqs)[rx_queue_id]; 781 if (!rxq_data) { 782 rte_errno = EINVAL; 783 return -rte_errno; 784 } 785 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 786 if (!rxq_ctrl->irq) 787 return 0; 788 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 789 if (!rxq_ibv) { 790 rte_errno = EINVAL; 791 return -rte_errno; 792 } 793 ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); 794 if (ret || ev_cq != rxq_ibv->cq) { 795 rte_errno = EINVAL; 796 goto exit; 797 } 798 rxq_data->cq_arm_sn++; 799 mlx5_glue->ack_cq_events(rxq_ibv->cq, 1); 800 mlx5_rxq_ibv_release(rxq_ibv); 801 return 0; 802 exit: 803 ret = rte_errno; /* Save rte_errno before cleanup. */ 804 if (rxq_ibv) 805 mlx5_rxq_ibv_release(rxq_ibv); 806 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 807 dev->data->port_id, rx_queue_id); 808 rte_errno = ret; /* Restore rte_errno. */ 809 return -rte_errno; 810 } 811 812 /** 813 * Create the Rx queue Verbs object. 814 * 815 * @param dev 816 * Pointer to Ethernet device. 817 * @param idx 818 * Queue index in DPDK Rx queue array 819 * 820 * @return 821 * The Verbs object initialised, NULL otherwise and rte_errno is set. 822 */ 823 struct mlx5_rxq_ibv * 824 mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) 825 { 826 struct mlx5_priv *priv = dev->data->dev_private; 827 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 828 struct mlx5_rxq_ctrl *rxq_ctrl = 829 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 830 struct ibv_wq_attr mod; 831 union { 832 struct { 833 struct ibv_cq_init_attr_ex ibv; 834 struct mlx5dv_cq_init_attr mlx5; 835 } cq; 836 struct { 837 struct ibv_wq_init_attr ibv; 838 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 839 struct mlx5dv_wq_init_attr mlx5; 840 #endif 841 } wq; 842 struct ibv_cq_ex cq_attr; 843 } attr; 844 unsigned int cqe_n; 845 unsigned int wqe_n = 1 << rxq_data->elts_n; 846 struct mlx5_rxq_ibv *tmpl = NULL; 847 struct mlx5dv_cq cq_info; 848 struct mlx5dv_rwq rwq; 849 unsigned int i; 850 int ret = 0; 851 struct mlx5dv_obj obj; 852 struct mlx5_dev_config *config = &priv->config; 853 const int mprq_en = mlx5_rxq_mprq_enabled(rxq_data); 854 855 assert(rxq_data); 856 assert(!rxq_ctrl->ibv); 857 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 858 priv->verbs_alloc_ctx.obj = rxq_ctrl; 859 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 860 rxq_ctrl->socket); 861 if (!tmpl) { 862 DRV_LOG(ERR, 863 "port %u Rx queue %u cannot allocate verbs resources", 864 dev->data->port_id, rxq_data->idx); 865 rte_errno = ENOMEM; 866 goto error; 867 } 868 tmpl->rxq_ctrl = rxq_ctrl; 869 if (rxq_ctrl->irq) { 870 tmpl->channel = mlx5_glue->create_comp_channel(priv->sh->ctx); 871 if (!tmpl->channel) { 872 DRV_LOG(ERR, "port %u: comp channel creation failure", 873 dev->data->port_id); 874 rte_errno = ENOMEM; 875 goto error; 876 } 877 } 878 if (mprq_en) 879 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1; 880 else 881 cqe_n = wqe_n - 1; 882 attr.cq.ibv = (struct ibv_cq_init_attr_ex){ 883 .cqe = cqe_n, 884 .channel = tmpl->channel, 885 .comp_mask = 0, 886 }; 887 attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){ 888 .comp_mask = 0, 889 }; 890 if (config->cqe_comp && !rxq_data->hw_timestamp) { 891 attr.cq.mlx5.comp_mask |= 892 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 893 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 894 attr.cq.mlx5.cqe_comp_res_format = 895 mprq_en ? MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 896 MLX5DV_CQE_RES_FORMAT_HASH; 897 #else 898 attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 899 #endif 900 /* 901 * For vectorized Rx, it must not be doubled in order to 902 * make cq_ci and rq_ci aligned. 903 */ 904 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 905 attr.cq.ibv.cqe *= 2; 906 } else if (config->cqe_comp && rxq_data->hw_timestamp) { 907 DRV_LOG(DEBUG, 908 "port %u Rx CQE compression is disabled for HW" 909 " timestamp", 910 dev->data->port_id); 911 } 912 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 913 if (config->cqe_pad) { 914 attr.cq.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; 915 attr.cq.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 916 } 917 #endif 918 tmpl->cq = mlx5_glue->cq_ex_to_cq 919 (mlx5_glue->dv_create_cq(priv->sh->ctx, &attr.cq.ibv, 920 &attr.cq.mlx5)); 921 if (tmpl->cq == NULL) { 922 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure", 923 dev->data->port_id, idx); 924 rte_errno = ENOMEM; 925 goto error; 926 } 927 DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d", 928 dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr); 929 DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d", 930 dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge); 931 attr.wq.ibv = (struct ibv_wq_init_attr){ 932 .wq_context = NULL, /* Could be useful in the future. */ 933 .wq_type = IBV_WQT_RQ, 934 /* Max number of outstanding WRs. */ 935 .max_wr = wqe_n >> rxq_data->sges_n, 936 /* Max number of scatter/gather elements in a WR. */ 937 .max_sge = 1 << rxq_data->sges_n, 938 .pd = priv->sh->pd, 939 .cq = tmpl->cq, 940 .comp_mask = 941 IBV_WQ_FLAGS_CVLAN_STRIPPING | 942 0, 943 .create_flags = (rxq_data->vlan_strip ? 944 IBV_WQ_FLAGS_CVLAN_STRIPPING : 945 0), 946 }; 947 /* By default, FCS (CRC) is stripped by hardware. */ 948 if (rxq_data->crc_present) { 949 attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 950 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 951 } 952 if (config->hw_padding) { 953 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 954 attr.wq.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 955 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 956 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 957 attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; 958 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 959 #endif 960 } 961 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 962 attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){ 963 .comp_mask = 0, 964 }; 965 if (mprq_en) { 966 struct mlx5dv_striding_rq_init_attr *mprq_attr = 967 &attr.wq.mlx5.striding_rq_attrs; 968 969 attr.wq.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 970 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 971 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 972 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 973 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 974 }; 975 } 976 tmpl->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &attr.wq.ibv, 977 &attr.wq.mlx5); 978 #else 979 tmpl->wq = mlx5_glue->create_wq(priv->sh->ctx, &attr.wq.ibv); 980 #endif 981 if (tmpl->wq == NULL) { 982 DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure", 983 dev->data->port_id, idx); 984 rte_errno = ENOMEM; 985 goto error; 986 } 987 /* 988 * Make sure number of WRs*SGEs match expectations since a queue 989 * cannot allocate more than "desc" buffers. 990 */ 991 if (attr.wq.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 992 attr.wq.ibv.max_sge != (1u << rxq_data->sges_n)) { 993 DRV_LOG(ERR, 994 "port %u Rx queue %u requested %u*%u but got %u*%u" 995 " WRs*SGEs", 996 dev->data->port_id, idx, 997 wqe_n >> rxq_data->sges_n, (1 << rxq_data->sges_n), 998 attr.wq.ibv.max_wr, attr.wq.ibv.max_sge); 999 rte_errno = EINVAL; 1000 goto error; 1001 } 1002 /* Change queue state to ready. */ 1003 mod = (struct ibv_wq_attr){ 1004 .attr_mask = IBV_WQ_ATTR_STATE, 1005 .wq_state = IBV_WQS_RDY, 1006 }; 1007 ret = mlx5_glue->modify_wq(tmpl->wq, &mod); 1008 if (ret) { 1009 DRV_LOG(ERR, 1010 "port %u Rx queue %u WQ state to IBV_WQS_RDY failed", 1011 dev->data->port_id, idx); 1012 rte_errno = ret; 1013 goto error; 1014 } 1015 obj.cq.in = tmpl->cq; 1016 obj.cq.out = &cq_info; 1017 obj.rwq.in = tmpl->wq; 1018 obj.rwq.out = &rwq; 1019 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); 1020 if (ret) { 1021 rte_errno = ret; 1022 goto error; 1023 } 1024 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 1025 DRV_LOG(ERR, 1026 "port %u wrong MLX5_CQE_SIZE environment variable" 1027 " value: it should be set to %u", 1028 dev->data->port_id, RTE_CACHE_LINE_SIZE); 1029 rte_errno = EINVAL; 1030 goto error; 1031 } 1032 /* Fill the rings. */ 1033 rxq_data->wqes = rwq.buf; 1034 for (i = 0; (i != wqe_n); ++i) { 1035 volatile struct mlx5_wqe_data_seg *scat; 1036 uintptr_t addr; 1037 uint32_t byte_count; 1038 1039 if (mprq_en) { 1040 struct mlx5_mprq_buf *buf = (*rxq_data->mprq_bufs)[i]; 1041 1042 scat = &((volatile struct mlx5_wqe_mprq *) 1043 rxq_data->wqes)[i].dseg; 1044 addr = (uintptr_t)mlx5_mprq_buf_addr(buf); 1045 byte_count = (1 << rxq_data->strd_sz_n) * 1046 (1 << rxq_data->strd_num_n); 1047 } else { 1048 struct rte_mbuf *buf = (*rxq_data->elts)[i]; 1049 1050 scat = &((volatile struct mlx5_wqe_data_seg *) 1051 rxq_data->wqes)[i]; 1052 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1053 byte_count = DATA_LEN(buf); 1054 } 1055 /* scat->addr must be able to store a pointer. */ 1056 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 1057 *scat = (struct mlx5_wqe_data_seg){ 1058 .addr = rte_cpu_to_be_64(addr), 1059 .byte_count = rte_cpu_to_be_32(byte_count), 1060 .lkey = mlx5_rx_addr2mr(rxq_data, addr), 1061 }; 1062 } 1063 rxq_data->rq_db = rwq.dbrec; 1064 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 1065 rxq_data->cq_ci = 0; 1066 rxq_data->consumed_strd = 0; 1067 rxq_data->rq_pi = 0; 1068 rxq_data->zip = (struct rxq_zip){ 1069 .ai = 0, 1070 }; 1071 rxq_data->cq_db = cq_info.dbrec; 1072 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 1073 rxq_data->cq_uar = cq_info.cq_uar; 1074 rxq_data->cqn = cq_info.cqn; 1075 rxq_data->cq_arm_sn = 0; 1076 /* Update doorbell counter. */ 1077 rxq_data->rq_ci = wqe_n >> rxq_data->sges_n; 1078 rte_wmb(); 1079 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci); 1080 DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, 1081 idx, (void *)&tmpl); 1082 rte_atomic32_inc(&tmpl->refcnt); 1083 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); 1084 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1085 return tmpl; 1086 error: 1087 if (tmpl) { 1088 ret = rte_errno; /* Save rte_errno before cleanup. */ 1089 if (tmpl->wq) 1090 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 1091 if (tmpl->cq) 1092 claim_zero(mlx5_glue->destroy_cq(tmpl->cq)); 1093 if (tmpl->channel) 1094 claim_zero(mlx5_glue->destroy_comp_channel 1095 (tmpl->channel)); 1096 rte_free(tmpl); 1097 rte_errno = ret; /* Restore rte_errno. */ 1098 } 1099 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1100 return NULL; 1101 } 1102 1103 /** 1104 * Verify the Verbs Rx queue list is empty 1105 * 1106 * @param dev 1107 * Pointer to Ethernet device. 1108 * 1109 * @return 1110 * The number of object not released. 1111 */ 1112 int 1113 mlx5_rxq_ibv_verify(struct rte_eth_dev *dev) 1114 { 1115 struct mlx5_priv *priv = dev->data->dev_private; 1116 int ret = 0; 1117 struct mlx5_rxq_ibv *rxq_ibv; 1118 1119 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) { 1120 DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced", 1121 dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx); 1122 ++ret; 1123 } 1124 return ret; 1125 } 1126 1127 /** 1128 * Callback function to initialize mbufs for Multi-Packet RQ. 1129 */ 1130 static inline void 1131 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg __rte_unused, 1132 void *_m, unsigned int i __rte_unused) 1133 { 1134 struct mlx5_mprq_buf *buf = _m; 1135 1136 memset(_m, 0, sizeof(*buf)); 1137 buf->mp = mp; 1138 rte_atomic16_set(&buf->refcnt, 1); 1139 } 1140 1141 /** 1142 * Free mempool of Multi-Packet RQ. 1143 * 1144 * @param dev 1145 * Pointer to Ethernet device. 1146 * 1147 * @return 1148 * 0 on success, negative errno value on failure. 1149 */ 1150 int 1151 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1152 { 1153 struct mlx5_priv *priv = dev->data->dev_private; 1154 struct rte_mempool *mp = priv->mprq_mp; 1155 unsigned int i; 1156 1157 if (mp == NULL) 1158 return 0; 1159 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1160 dev->data->port_id, mp->name); 1161 /* 1162 * If a buffer in the pool has been externally attached to a mbuf and it 1163 * is still in use by application, destroying the Rx qeueue can spoil 1164 * the packet. It is unlikely to happen but if application dynamically 1165 * creates and destroys with holding Rx packets, this can happen. 1166 * 1167 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1168 * RQ isn't provided by application but managed by PMD. 1169 */ 1170 if (!rte_mempool_full(mp)) { 1171 DRV_LOG(ERR, 1172 "port %u mempool for Multi-Packet RQ is still in use", 1173 dev->data->port_id); 1174 rte_errno = EBUSY; 1175 return -rte_errno; 1176 } 1177 rte_mempool_free(mp); 1178 /* Unset mempool for each Rx queue. */ 1179 for (i = 0; i != priv->rxqs_n; ++i) { 1180 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1181 1182 if (rxq == NULL) 1183 continue; 1184 rxq->mprq_mp = NULL; 1185 } 1186 priv->mprq_mp = NULL; 1187 return 0; 1188 } 1189 1190 /** 1191 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1192 * mempool. If already allocated, reuse it if there're enough elements. 1193 * Otherwise, resize it. 1194 * 1195 * @param dev 1196 * Pointer to Ethernet device. 1197 * 1198 * @return 1199 * 0 on success, negative errno value on failure. 1200 */ 1201 int 1202 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1203 { 1204 struct mlx5_priv *priv = dev->data->dev_private; 1205 struct rte_mempool *mp = priv->mprq_mp; 1206 char name[RTE_MEMPOOL_NAMESIZE]; 1207 unsigned int desc = 0; 1208 unsigned int buf_len; 1209 unsigned int obj_num; 1210 unsigned int obj_size; 1211 unsigned int strd_num_n = 0; 1212 unsigned int strd_sz_n = 0; 1213 unsigned int i; 1214 1215 if (!mlx5_mprq_enabled(dev)) 1216 return 0; 1217 /* Count the total number of descriptors configured. */ 1218 for (i = 0; i != priv->rxqs_n; ++i) { 1219 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1220 1221 if (rxq == NULL) 1222 continue; 1223 desc += 1 << rxq->elts_n; 1224 /* Get the max number of strides. */ 1225 if (strd_num_n < rxq->strd_num_n) 1226 strd_num_n = rxq->strd_num_n; 1227 /* Get the max size of a stride. */ 1228 if (strd_sz_n < rxq->strd_sz_n) 1229 strd_sz_n = rxq->strd_sz_n; 1230 } 1231 assert(strd_num_n && strd_sz_n); 1232 buf_len = (1 << strd_num_n) * (1 << strd_sz_n); 1233 obj_size = buf_len + sizeof(struct mlx5_mprq_buf); 1234 /* 1235 * Received packets can be either memcpy'd or externally referenced. In 1236 * case that the packet is attached to an mbuf as an external buffer, as 1237 * it isn't possible to predict how the buffers will be queued by 1238 * application, there's no option to exactly pre-allocate needed buffers 1239 * in advance but to speculatively prepares enough buffers. 1240 * 1241 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1242 * received packets to buffers provided by application (rxq->mp) until 1243 * this Mempool gets available again. 1244 */ 1245 desc *= 4; 1246 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * priv->rxqs_n; 1247 /* 1248 * rte_mempool_create_empty() has sanity check to refuse large cache 1249 * size compared to the number of elements. 1250 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a 1251 * constant number 2 instead. 1252 */ 1253 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1254 /* Check a mempool is already allocated and if it can be resued. */ 1255 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1256 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1257 dev->data->port_id, mp->name); 1258 /* Reuse. */ 1259 goto exit; 1260 } else if (mp != NULL) { 1261 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1262 dev->data->port_id, mp->name); 1263 /* 1264 * If failed to free, which means it may be still in use, no way 1265 * but to keep using the existing one. On buffer underrun, 1266 * packets will be memcpy'd instead of external buffer 1267 * attachment. 1268 */ 1269 if (mlx5_mprq_free_mp(dev)) { 1270 if (mp->elt_size >= obj_size) 1271 goto exit; 1272 else 1273 return -rte_errno; 1274 } 1275 } 1276 snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); 1277 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1278 0, NULL, NULL, mlx5_mprq_buf_init, NULL, 1279 dev->device->numa_node, 0); 1280 if (mp == NULL) { 1281 DRV_LOG(ERR, 1282 "port %u failed to allocate a mempool for" 1283 " Multi-Packet RQ, count=%u, size=%u", 1284 dev->data->port_id, obj_num, obj_size); 1285 rte_errno = ENOMEM; 1286 return -rte_errno; 1287 } 1288 priv->mprq_mp = mp; 1289 exit: 1290 /* Set mempool for each Rx queue. */ 1291 for (i = 0; i != priv->rxqs_n; ++i) { 1292 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1293 1294 if (rxq == NULL) 1295 continue; 1296 rxq->mprq_mp = mp; 1297 } 1298 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1299 dev->data->port_id); 1300 return 0; 1301 } 1302 1303 /** 1304 * Create a DPDK Rx queue. 1305 * 1306 * @param dev 1307 * Pointer to Ethernet device. 1308 * @param idx 1309 * RX queue index. 1310 * @param desc 1311 * Number of descriptors to configure in queue. 1312 * @param socket 1313 * NUMA socket on which memory must be allocated. 1314 * 1315 * @return 1316 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1317 */ 1318 struct mlx5_rxq_ctrl * 1319 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1320 unsigned int socket, const struct rte_eth_rxconf *conf, 1321 struct rte_mempool *mp) 1322 { 1323 struct mlx5_priv *priv = dev->data->dev_private; 1324 struct mlx5_rxq_ctrl *tmpl; 1325 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 1326 unsigned int mprq_stride_size; 1327 struct mlx5_dev_config *config = &priv->config; 1328 /* 1329 * Always allocate extra slots, even if eventually 1330 * the vector Rx will not be used. 1331 */ 1332 uint16_t desc_n = 1333 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1334 uint64_t offloads = conf->offloads | 1335 dev->data->dev_conf.rxmode.offloads; 1336 const int mprq_en = mlx5_check_mprq_support(dev) > 0; 1337 1338 tmpl = rte_calloc_socket("RXQ", 1, 1339 sizeof(*tmpl) + 1340 desc_n * sizeof(struct rte_mbuf *), 1341 0, socket); 1342 if (!tmpl) { 1343 rte_errno = ENOMEM; 1344 return NULL; 1345 } 1346 if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh, 1347 MLX5_MR_BTREE_CACHE_N, socket)) { 1348 /* rte_errno is already set. */ 1349 goto error; 1350 } 1351 tmpl->socket = socket; 1352 if (dev->data->dev_conf.intr_conf.rxq) 1353 tmpl->irq = 1; 1354 /* 1355 * This Rx queue can be configured as a Multi-Packet RQ if all of the 1356 * following conditions are met: 1357 * - MPRQ is enabled. 1358 * - The number of descs is more than the number of strides. 1359 * - max_rx_pkt_len plus overhead is less than the max size of a 1360 * stride. 1361 * Otherwise, enable Rx scatter if necessary. 1362 */ 1363 assert(mb_len >= RTE_PKTMBUF_HEADROOM); 1364 mprq_stride_size = 1365 dev->data->dev_conf.rxmode.max_rx_pkt_len + 1366 sizeof(struct rte_mbuf_ext_shared_info) + 1367 RTE_PKTMBUF_HEADROOM; 1368 if (mprq_en && 1369 desc > (1U << config->mprq.stride_num_n) && 1370 mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) { 1371 /* TODO: Rx scatter isn't supported yet. */ 1372 tmpl->rxq.sges_n = 0; 1373 /* Trim the number of descs needed. */ 1374 desc >>= config->mprq.stride_num_n; 1375 tmpl->rxq.strd_num_n = config->mprq.stride_num_n; 1376 tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size), 1377 config->mprq.min_stride_size_n); 1378 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1379 tmpl->rxq.mprq_max_memcpy_len = 1380 RTE_MIN(mb_len - RTE_PKTMBUF_HEADROOM, 1381 config->mprq.max_memcpy_len); 1382 DRV_LOG(DEBUG, 1383 "port %u Rx queue %u: Multi-Packet RQ is enabled" 1384 " strd_num_n = %u, strd_sz_n = %u", 1385 dev->data->port_id, idx, 1386 tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n); 1387 } else if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= 1388 (mb_len - RTE_PKTMBUF_HEADROOM)) { 1389 tmpl->rxq.sges_n = 0; 1390 } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { 1391 unsigned int size = 1392 RTE_PKTMBUF_HEADROOM + 1393 dev->data->dev_conf.rxmode.max_rx_pkt_len; 1394 unsigned int sges_n; 1395 1396 /* 1397 * Determine the number of SGEs needed for a full packet 1398 * and round it to the next power of two. 1399 */ 1400 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 1401 tmpl->rxq.sges_n = sges_n; 1402 /* Make sure rxq.sges_n did not overflow. */ 1403 size = mb_len * (1 << tmpl->rxq.sges_n); 1404 size -= RTE_PKTMBUF_HEADROOM; 1405 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { 1406 DRV_LOG(ERR, 1407 "port %u too many SGEs (%u) needed to handle" 1408 " requested maximum packet size %u", 1409 dev->data->port_id, 1410 1 << sges_n, 1411 dev->data->dev_conf.rxmode.max_rx_pkt_len); 1412 rte_errno = EOVERFLOW; 1413 goto error; 1414 } 1415 } else { 1416 DRV_LOG(WARNING, 1417 "port %u the requested maximum Rx packet size (%u) is" 1418 " larger than a single mbuf (%u) and scattered mode has" 1419 " not been requested", 1420 dev->data->port_id, 1421 dev->data->dev_conf.rxmode.max_rx_pkt_len, 1422 mb_len - RTE_PKTMBUF_HEADROOM); 1423 } 1424 if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) 1425 DRV_LOG(WARNING, 1426 "port %u MPRQ is requested but cannot be enabled" 1427 " (requested: desc = %u, stride_sz = %u," 1428 " supported: min_stride_num = %u, max_stride_sz = %u).", 1429 dev->data->port_id, desc, mprq_stride_size, 1430 (1 << config->mprq.stride_num_n), 1431 (1 << config->mprq.max_stride_size_n)); 1432 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1433 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1434 if (desc % (1 << tmpl->rxq.sges_n)) { 1435 DRV_LOG(ERR, 1436 "port %u number of Rx queue descriptors (%u) is not a" 1437 " multiple of SGEs per packet (%u)", 1438 dev->data->port_id, 1439 desc, 1440 1 << tmpl->rxq.sges_n); 1441 rte_errno = EINVAL; 1442 goto error; 1443 } 1444 /* Toggle RX checksum offload if hardware supports it. */ 1445 tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM); 1446 tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP); 1447 /* Configure VLAN stripping. */ 1448 tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1449 /* By default, FCS (CRC) is stripped by hardware. */ 1450 tmpl->rxq.crc_present = 0; 1451 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { 1452 if (config->hw_fcs_strip) { 1453 tmpl->rxq.crc_present = 1; 1454 } else { 1455 DRV_LOG(WARNING, 1456 "port %u CRC stripping has been disabled but will" 1457 " still be performed by hardware, make sure MLNX_OFED" 1458 " and firmware are up to date", 1459 dev->data->port_id); 1460 } 1461 } 1462 DRV_LOG(DEBUG, 1463 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1464 " incoming frames to hide it", 1465 dev->data->port_id, 1466 tmpl->rxq.crc_present ? "disabled" : "enabled", 1467 tmpl->rxq.crc_present << 2); 1468 /* Save port ID. */ 1469 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1470 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); 1471 tmpl->rxq.port_id = dev->data->port_id; 1472 tmpl->priv = priv; 1473 tmpl->rxq.mp = mp; 1474 tmpl->rxq.elts_n = log2above(desc); 1475 tmpl->rxq.rq_repl_thresh = 1476 MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n); 1477 tmpl->rxq.elts = 1478 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); 1479 #ifndef RTE_ARCH_64 1480 tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq; 1481 #endif 1482 tmpl->rxq.idx = idx; 1483 rte_atomic32_inc(&tmpl->refcnt); 1484 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1485 return tmpl; 1486 error: 1487 rte_free(tmpl); 1488 return NULL; 1489 } 1490 1491 /** 1492 * Get a Rx queue. 1493 * 1494 * @param dev 1495 * Pointer to Ethernet device. 1496 * @param idx 1497 * RX queue index. 1498 * 1499 * @return 1500 * A pointer to the queue if it exists, NULL otherwise. 1501 */ 1502 struct mlx5_rxq_ctrl * 1503 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 1504 { 1505 struct mlx5_priv *priv = dev->data->dev_private; 1506 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 1507 1508 if ((*priv->rxqs)[idx]) { 1509 rxq_ctrl = container_of((*priv->rxqs)[idx], 1510 struct mlx5_rxq_ctrl, 1511 rxq); 1512 mlx5_rxq_ibv_get(dev, idx); 1513 rte_atomic32_inc(&rxq_ctrl->refcnt); 1514 } 1515 return rxq_ctrl; 1516 } 1517 1518 /** 1519 * Release a Rx queue. 1520 * 1521 * @param dev 1522 * Pointer to Ethernet device. 1523 * @param idx 1524 * RX queue index. 1525 * 1526 * @return 1527 * 1 while a reference on it exists, 0 when freed. 1528 */ 1529 int 1530 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 1531 { 1532 struct mlx5_priv *priv = dev->data->dev_private; 1533 struct mlx5_rxq_ctrl *rxq_ctrl; 1534 1535 if (!(*priv->rxqs)[idx]) 1536 return 0; 1537 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1538 assert(rxq_ctrl->priv); 1539 if (rxq_ctrl->ibv && !mlx5_rxq_ibv_release(rxq_ctrl->ibv)) 1540 rxq_ctrl->ibv = NULL; 1541 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { 1542 mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); 1543 LIST_REMOVE(rxq_ctrl, next); 1544 rte_free(rxq_ctrl); 1545 (*priv->rxqs)[idx] = NULL; 1546 return 0; 1547 } 1548 return 1; 1549 } 1550 1551 /** 1552 * Verify the Rx Queue list is empty 1553 * 1554 * @param dev 1555 * Pointer to Ethernet device. 1556 * 1557 * @return 1558 * The number of object not released. 1559 */ 1560 int 1561 mlx5_rxq_verify(struct rte_eth_dev *dev) 1562 { 1563 struct mlx5_priv *priv = dev->data->dev_private; 1564 struct mlx5_rxq_ctrl *rxq_ctrl; 1565 int ret = 0; 1566 1567 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 1568 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 1569 dev->data->port_id, rxq_ctrl->rxq.idx); 1570 ++ret; 1571 } 1572 return ret; 1573 } 1574 1575 /** 1576 * Create an indirection table. 1577 * 1578 * @param dev 1579 * Pointer to Ethernet device. 1580 * @param queues 1581 * Queues entering in the indirection table. 1582 * @param queues_n 1583 * Number of queues in the array. 1584 * 1585 * @return 1586 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1587 */ 1588 static struct mlx5_ind_table_ibv * 1589 mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, const uint16_t *queues, 1590 uint32_t queues_n) 1591 { 1592 struct mlx5_priv *priv = dev->data->dev_private; 1593 struct mlx5_ind_table_ibv *ind_tbl; 1594 const unsigned int wq_n = rte_is_power_of_2(queues_n) ? 1595 log2above(queues_n) : 1596 log2above(priv->config.ind_table_max_size); 1597 struct ibv_wq *wq[1 << wq_n]; 1598 unsigned int i; 1599 unsigned int j; 1600 1601 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) + 1602 queues_n * sizeof(uint16_t), 0); 1603 if (!ind_tbl) { 1604 rte_errno = ENOMEM; 1605 return NULL; 1606 } 1607 for (i = 0; i != queues_n; ++i) { 1608 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]); 1609 1610 if (!rxq) 1611 goto error; 1612 wq[i] = rxq->ibv->wq; 1613 ind_tbl->queues[i] = queues[i]; 1614 } 1615 ind_tbl->queues_n = queues_n; 1616 /* Finalise indirection table. */ 1617 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j) 1618 wq[i] = wq[j]; 1619 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table 1620 (priv->sh->ctx, 1621 &(struct ibv_rwq_ind_table_init_attr){ 1622 .log_ind_tbl_size = wq_n, 1623 .ind_tbl = wq, 1624 .comp_mask = 0, 1625 }); 1626 if (!ind_tbl->ind_table) { 1627 rte_errno = errno; 1628 goto error; 1629 } 1630 rte_atomic32_inc(&ind_tbl->refcnt); 1631 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 1632 return ind_tbl; 1633 error: 1634 rte_free(ind_tbl); 1635 DEBUG("port %u cannot create indirection table", dev->data->port_id); 1636 return NULL; 1637 } 1638 1639 /** 1640 * Get an indirection table. 1641 * 1642 * @param dev 1643 * Pointer to Ethernet device. 1644 * @param queues 1645 * Queues entering in the indirection table. 1646 * @param queues_n 1647 * Number of queues in the array. 1648 * 1649 * @return 1650 * An indirection table if found. 1651 */ 1652 static struct mlx5_ind_table_ibv * 1653 mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, const uint16_t *queues, 1654 uint32_t queues_n) 1655 { 1656 struct mlx5_priv *priv = dev->data->dev_private; 1657 struct mlx5_ind_table_ibv *ind_tbl; 1658 1659 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1660 if ((ind_tbl->queues_n == queues_n) && 1661 (memcmp(ind_tbl->queues, queues, 1662 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 1663 == 0)) 1664 break; 1665 } 1666 if (ind_tbl) { 1667 unsigned int i; 1668 1669 rte_atomic32_inc(&ind_tbl->refcnt); 1670 for (i = 0; i != ind_tbl->queues_n; ++i) 1671 mlx5_rxq_get(dev, ind_tbl->queues[i]); 1672 } 1673 return ind_tbl; 1674 } 1675 1676 /** 1677 * Release an indirection table. 1678 * 1679 * @param dev 1680 * Pointer to Ethernet device. 1681 * @param ind_table 1682 * Indirection table to release. 1683 * 1684 * @return 1685 * 1 while a reference on it exists, 0 when freed. 1686 */ 1687 static int 1688 mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, 1689 struct mlx5_ind_table_ibv *ind_tbl) 1690 { 1691 unsigned int i; 1692 1693 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) 1694 claim_zero(mlx5_glue->destroy_rwq_ind_table 1695 (ind_tbl->ind_table)); 1696 for (i = 0; i != ind_tbl->queues_n; ++i) 1697 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i])); 1698 if (!rte_atomic32_read(&ind_tbl->refcnt)) { 1699 LIST_REMOVE(ind_tbl, next); 1700 rte_free(ind_tbl); 1701 return 0; 1702 } 1703 return 1; 1704 } 1705 1706 /** 1707 * Verify the Rx Queue list is empty 1708 * 1709 * @param dev 1710 * Pointer to Ethernet device. 1711 * 1712 * @return 1713 * The number of object not released. 1714 */ 1715 int 1716 mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev) 1717 { 1718 struct mlx5_priv *priv = dev->data->dev_private; 1719 struct mlx5_ind_table_ibv *ind_tbl; 1720 int ret = 0; 1721 1722 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1723 DRV_LOG(DEBUG, 1724 "port %u Verbs indirection table %p still referenced", 1725 dev->data->port_id, (void *)ind_tbl); 1726 ++ret; 1727 } 1728 return ret; 1729 } 1730 1731 /** 1732 * Create an Rx Hash queue. 1733 * 1734 * @param dev 1735 * Pointer to Ethernet device. 1736 * @param rss_key 1737 * RSS key for the Rx hash queue. 1738 * @param rss_key_len 1739 * RSS key length. 1740 * @param hash_fields 1741 * Verbs protocol hash field to make the RSS on. 1742 * @param queues 1743 * Queues entering in hash queue. In case of empty hash_fields only the 1744 * first queue index will be taken for the indirection table. 1745 * @param queues_n 1746 * Number of queues. 1747 * @param tunnel 1748 * Tunnel type. 1749 * 1750 * @return 1751 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1752 */ 1753 struct mlx5_hrxq * 1754 mlx5_hrxq_new(struct rte_eth_dev *dev, 1755 const uint8_t *rss_key, uint32_t rss_key_len, 1756 uint64_t hash_fields, 1757 const uint16_t *queues, uint32_t queues_n, 1758 int tunnel __rte_unused) 1759 { 1760 struct mlx5_priv *priv = dev->data->dev_private; 1761 struct mlx5_hrxq *hrxq; 1762 struct mlx5_ind_table_ibv *ind_tbl; 1763 struct ibv_qp *qp; 1764 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1765 struct mlx5dv_qp_init_attr qp_init_attr; 1766 #endif 1767 int err; 1768 1769 queues_n = hash_fields ? queues_n : 1; 1770 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1771 if (!ind_tbl) 1772 ind_tbl = mlx5_ind_table_ibv_new(dev, queues, queues_n); 1773 if (!ind_tbl) { 1774 rte_errno = ENOMEM; 1775 return NULL; 1776 } 1777 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1778 memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 1779 if (tunnel) { 1780 qp_init_attr.comp_mask = 1781 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 1782 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; 1783 } 1784 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1785 if (dev->data->dev_conf.lpbk_mode) { 1786 /* Allow packet sent from NIC loop back w/o source MAC check. */ 1787 qp_init_attr.comp_mask |= 1788 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 1789 qp_init_attr.create_flags |= 1790 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; 1791 } 1792 #endif 1793 qp = mlx5_glue->dv_create_qp 1794 (priv->sh->ctx, 1795 &(struct ibv_qp_init_attr_ex){ 1796 .qp_type = IBV_QPT_RAW_PACKET, 1797 .comp_mask = 1798 IBV_QP_INIT_ATTR_PD | 1799 IBV_QP_INIT_ATTR_IND_TABLE | 1800 IBV_QP_INIT_ATTR_RX_HASH, 1801 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1802 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1803 .rx_hash_key_len = rss_key_len, 1804 .rx_hash_key = (void *)(uintptr_t)rss_key, 1805 .rx_hash_fields_mask = hash_fields, 1806 }, 1807 .rwq_ind_tbl = ind_tbl->ind_table, 1808 .pd = priv->sh->pd, 1809 }, 1810 &qp_init_attr); 1811 #else 1812 qp = mlx5_glue->create_qp_ex 1813 (priv->sh->ctx, 1814 &(struct ibv_qp_init_attr_ex){ 1815 .qp_type = IBV_QPT_RAW_PACKET, 1816 .comp_mask = 1817 IBV_QP_INIT_ATTR_PD | 1818 IBV_QP_INIT_ATTR_IND_TABLE | 1819 IBV_QP_INIT_ATTR_RX_HASH, 1820 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1821 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1822 .rx_hash_key_len = rss_key_len, 1823 .rx_hash_key = (void *)(uintptr_t)rss_key, 1824 .rx_hash_fields_mask = hash_fields, 1825 }, 1826 .rwq_ind_tbl = ind_tbl->ind_table, 1827 .pd = priv->sh->pd, 1828 }); 1829 #endif 1830 if (!qp) { 1831 rte_errno = errno; 1832 goto error; 1833 } 1834 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); 1835 if (!hrxq) 1836 goto error; 1837 hrxq->ind_table = ind_tbl; 1838 hrxq->qp = qp; 1839 hrxq->rss_key_len = rss_key_len; 1840 hrxq->hash_fields = hash_fields; 1841 memcpy(hrxq->rss_key, rss_key, rss_key_len); 1842 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1843 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 1844 if (!hrxq->action) { 1845 rte_errno = errno; 1846 goto error; 1847 } 1848 #endif 1849 rte_atomic32_inc(&hrxq->refcnt); 1850 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); 1851 return hrxq; 1852 error: 1853 err = rte_errno; /* Save rte_errno before cleanup. */ 1854 mlx5_ind_table_ibv_release(dev, ind_tbl); 1855 if (qp) 1856 claim_zero(mlx5_glue->destroy_qp(qp)); 1857 rte_errno = err; /* Restore rte_errno. */ 1858 return NULL; 1859 } 1860 1861 /** 1862 * Get an Rx Hash queue. 1863 * 1864 * @param dev 1865 * Pointer to Ethernet device. 1866 * @param rss_conf 1867 * RSS configuration for the Rx hash queue. 1868 * @param queues 1869 * Queues entering in hash queue. In case of empty hash_fields only the 1870 * first queue index will be taken for the indirection table. 1871 * @param queues_n 1872 * Number of queues. 1873 * 1874 * @return 1875 * An hash Rx queue on success. 1876 */ 1877 struct mlx5_hrxq * 1878 mlx5_hrxq_get(struct rte_eth_dev *dev, 1879 const uint8_t *rss_key, uint32_t rss_key_len, 1880 uint64_t hash_fields, 1881 const uint16_t *queues, uint32_t queues_n) 1882 { 1883 struct mlx5_priv *priv = dev->data->dev_private; 1884 struct mlx5_hrxq *hrxq; 1885 1886 queues_n = hash_fields ? queues_n : 1; 1887 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1888 struct mlx5_ind_table_ibv *ind_tbl; 1889 1890 if (hrxq->rss_key_len != rss_key_len) 1891 continue; 1892 if (memcmp(hrxq->rss_key, rss_key, rss_key_len)) 1893 continue; 1894 if (hrxq->hash_fields != hash_fields) 1895 continue; 1896 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1897 if (!ind_tbl) 1898 continue; 1899 if (ind_tbl != hrxq->ind_table) { 1900 mlx5_ind_table_ibv_release(dev, ind_tbl); 1901 continue; 1902 } 1903 rte_atomic32_inc(&hrxq->refcnt); 1904 return hrxq; 1905 } 1906 return NULL; 1907 } 1908 1909 /** 1910 * Release the hash Rx queue. 1911 * 1912 * @param dev 1913 * Pointer to Ethernet device. 1914 * @param hrxq 1915 * Pointer to Hash Rx queue to release. 1916 * 1917 * @return 1918 * 1 while a reference on it exists, 0 when freed. 1919 */ 1920 int 1921 mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 1922 { 1923 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 1924 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1925 mlx5_glue->destroy_flow_action(hrxq->action); 1926 #endif 1927 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 1928 mlx5_ind_table_ibv_release(dev, hrxq->ind_table); 1929 LIST_REMOVE(hrxq, next); 1930 rte_free(hrxq); 1931 return 0; 1932 } 1933 claim_nonzero(mlx5_ind_table_ibv_release(dev, hrxq->ind_table)); 1934 return 1; 1935 } 1936 1937 /** 1938 * Verify the Rx Queue list is empty 1939 * 1940 * @param dev 1941 * Pointer to Ethernet device. 1942 * 1943 * @return 1944 * The number of object not released. 1945 */ 1946 int 1947 mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev) 1948 { 1949 struct mlx5_priv *priv = dev->data->dev_private; 1950 struct mlx5_hrxq *hrxq; 1951 int ret = 0; 1952 1953 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1954 DRV_LOG(DEBUG, 1955 "port %u Verbs hash Rx queue %p still referenced", 1956 dev->data->port_id, (void *)hrxq); 1957 ++ret; 1958 } 1959 return ret; 1960 } 1961 1962 /** 1963 * Create a drop Rx queue Verbs object. 1964 * 1965 * @param dev 1966 * Pointer to Ethernet device. 1967 * 1968 * @return 1969 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1970 */ 1971 static struct mlx5_rxq_ibv * 1972 mlx5_rxq_ibv_drop_new(struct rte_eth_dev *dev) 1973 { 1974 struct mlx5_priv *priv = dev->data->dev_private; 1975 struct ibv_context *ctx = priv->sh->ctx; 1976 struct ibv_cq *cq; 1977 struct ibv_wq *wq = NULL; 1978 struct mlx5_rxq_ibv *rxq; 1979 1980 if (priv->drop_queue.rxq) 1981 return priv->drop_queue.rxq; 1982 cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); 1983 if (!cq) { 1984 DEBUG("port %u cannot allocate CQ for drop queue", 1985 dev->data->port_id); 1986 rte_errno = errno; 1987 goto error; 1988 } 1989 wq = mlx5_glue->create_wq(ctx, 1990 &(struct ibv_wq_init_attr){ 1991 .wq_type = IBV_WQT_RQ, 1992 .max_wr = 1, 1993 .max_sge = 1, 1994 .pd = priv->sh->pd, 1995 .cq = cq, 1996 }); 1997 if (!wq) { 1998 DEBUG("port %u cannot allocate WQ for drop queue", 1999 dev->data->port_id); 2000 rte_errno = errno; 2001 goto error; 2002 } 2003 rxq = rte_calloc(__func__, 1, sizeof(*rxq), 0); 2004 if (!rxq) { 2005 DEBUG("port %u cannot allocate drop Rx queue memory", 2006 dev->data->port_id); 2007 rte_errno = ENOMEM; 2008 goto error; 2009 } 2010 rxq->cq = cq; 2011 rxq->wq = wq; 2012 priv->drop_queue.rxq = rxq; 2013 return rxq; 2014 error: 2015 if (wq) 2016 claim_zero(mlx5_glue->destroy_wq(wq)); 2017 if (cq) 2018 claim_zero(mlx5_glue->destroy_cq(cq)); 2019 return NULL; 2020 } 2021 2022 /** 2023 * Release a drop Rx queue Verbs object. 2024 * 2025 * @param dev 2026 * Pointer to Ethernet device. 2027 * 2028 * @return 2029 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2030 */ 2031 static void 2032 mlx5_rxq_ibv_drop_release(struct rte_eth_dev *dev) 2033 { 2034 struct mlx5_priv *priv = dev->data->dev_private; 2035 struct mlx5_rxq_ibv *rxq = priv->drop_queue.rxq; 2036 2037 if (rxq->wq) 2038 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 2039 if (rxq->cq) 2040 claim_zero(mlx5_glue->destroy_cq(rxq->cq)); 2041 rte_free(rxq); 2042 priv->drop_queue.rxq = NULL; 2043 } 2044 2045 /** 2046 * Create a drop indirection table. 2047 * 2048 * @param dev 2049 * Pointer to Ethernet device. 2050 * 2051 * @return 2052 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2053 */ 2054 static struct mlx5_ind_table_ibv * 2055 mlx5_ind_table_ibv_drop_new(struct rte_eth_dev *dev) 2056 { 2057 struct mlx5_priv *priv = dev->data->dev_private; 2058 struct mlx5_ind_table_ibv *ind_tbl; 2059 struct mlx5_rxq_ibv *rxq; 2060 struct mlx5_ind_table_ibv tmpl; 2061 2062 rxq = mlx5_rxq_ibv_drop_new(dev); 2063 if (!rxq) 2064 return NULL; 2065 tmpl.ind_table = mlx5_glue->create_rwq_ind_table 2066 (priv->sh->ctx, 2067 &(struct ibv_rwq_ind_table_init_attr){ 2068 .log_ind_tbl_size = 0, 2069 .ind_tbl = &rxq->wq, 2070 .comp_mask = 0, 2071 }); 2072 if (!tmpl.ind_table) { 2073 DEBUG("port %u cannot allocate indirection table for drop" 2074 " queue", 2075 dev->data->port_id); 2076 rte_errno = errno; 2077 goto error; 2078 } 2079 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl), 0); 2080 if (!ind_tbl) { 2081 rte_errno = ENOMEM; 2082 goto error; 2083 } 2084 ind_tbl->ind_table = tmpl.ind_table; 2085 return ind_tbl; 2086 error: 2087 mlx5_rxq_ibv_drop_release(dev); 2088 return NULL; 2089 } 2090 2091 /** 2092 * Release a drop indirection table. 2093 * 2094 * @param dev 2095 * Pointer to Ethernet device. 2096 */ 2097 static void 2098 mlx5_ind_table_ibv_drop_release(struct rte_eth_dev *dev) 2099 { 2100 struct mlx5_priv *priv = dev->data->dev_private; 2101 struct mlx5_ind_table_ibv *ind_tbl = priv->drop_queue.hrxq->ind_table; 2102 2103 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 2104 mlx5_rxq_ibv_drop_release(dev); 2105 rte_free(ind_tbl); 2106 priv->drop_queue.hrxq->ind_table = NULL; 2107 } 2108 2109 /** 2110 * Create a drop Rx Hash queue. 2111 * 2112 * @param dev 2113 * Pointer to Ethernet device. 2114 * 2115 * @return 2116 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2117 */ 2118 struct mlx5_hrxq * 2119 mlx5_hrxq_drop_new(struct rte_eth_dev *dev) 2120 { 2121 struct mlx5_priv *priv = dev->data->dev_private; 2122 struct mlx5_ind_table_ibv *ind_tbl; 2123 struct ibv_qp *qp; 2124 struct mlx5_hrxq *hrxq; 2125 2126 if (priv->drop_queue.hrxq) { 2127 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt); 2128 return priv->drop_queue.hrxq; 2129 } 2130 ind_tbl = mlx5_ind_table_ibv_drop_new(dev); 2131 if (!ind_tbl) 2132 return NULL; 2133 qp = mlx5_glue->create_qp_ex(priv->sh->ctx, 2134 &(struct ibv_qp_init_attr_ex){ 2135 .qp_type = IBV_QPT_RAW_PACKET, 2136 .comp_mask = 2137 IBV_QP_INIT_ATTR_PD | 2138 IBV_QP_INIT_ATTR_IND_TABLE | 2139 IBV_QP_INIT_ATTR_RX_HASH, 2140 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2141 .rx_hash_function = 2142 IBV_RX_HASH_FUNC_TOEPLITZ, 2143 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 2144 .rx_hash_key = rss_hash_default_key, 2145 .rx_hash_fields_mask = 0, 2146 }, 2147 .rwq_ind_tbl = ind_tbl->ind_table, 2148 .pd = priv->sh->pd 2149 }); 2150 if (!qp) { 2151 DEBUG("port %u cannot allocate QP for drop queue", 2152 dev->data->port_id); 2153 rte_errno = errno; 2154 goto error; 2155 } 2156 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq), 0); 2157 if (!hrxq) { 2158 DRV_LOG(WARNING, 2159 "port %u cannot allocate memory for drop queue", 2160 dev->data->port_id); 2161 rte_errno = ENOMEM; 2162 goto error; 2163 } 2164 hrxq->ind_table = ind_tbl; 2165 hrxq->qp = qp; 2166 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2167 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 2168 if (!hrxq->action) { 2169 rte_errno = errno; 2170 goto error; 2171 } 2172 #endif 2173 priv->drop_queue.hrxq = hrxq; 2174 rte_atomic32_set(&hrxq->refcnt, 1); 2175 return hrxq; 2176 error: 2177 if (ind_tbl) 2178 mlx5_ind_table_ibv_drop_release(dev); 2179 return NULL; 2180 } 2181 2182 /** 2183 * Release a drop hash Rx queue. 2184 * 2185 * @param dev 2186 * Pointer to Ethernet device. 2187 */ 2188 void 2189 mlx5_hrxq_drop_release(struct rte_eth_dev *dev) 2190 { 2191 struct mlx5_priv *priv = dev->data->dev_private; 2192 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 2193 2194 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 2195 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2196 mlx5_glue->destroy_flow_action(hrxq->action); 2197 #endif 2198 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 2199 mlx5_ind_table_ibv_drop_release(dev); 2200 rte_free(hrxq); 2201 priv->drop_queue.hrxq = NULL; 2202 } 2203 } 2204