1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <stdint.h> 11 #include <fcntl.h> 12 #include <sys/queue.h> 13 14 /* Verbs header. */ 15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic ignored "-Wpedantic" 18 #endif 19 #include <infiniband/verbs.h> 20 #include <infiniband/mlx5dv.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 #include <rte_interrupts.h> 30 #include <rte_debug.h> 31 #include <rte_io.h> 32 33 #include "mlx5.h" 34 #include "mlx5_rxtx.h" 35 #include "mlx5_utils.h" 36 #include "mlx5_autoconf.h" 37 #include "mlx5_defs.h" 38 #include "mlx5_glue.h" 39 40 /* Default RSS hash key also used for ConnectX-3. */ 41 uint8_t rss_hash_default_key[] = { 42 0x2c, 0xc6, 0x81, 0xd1, 43 0x5b, 0xdb, 0xf4, 0xf7, 44 0xfc, 0xa2, 0x83, 0x19, 45 0xdb, 0x1a, 0x3e, 0x94, 46 0x6b, 0x9e, 0x38, 0xd9, 47 0x2c, 0x9c, 0x03, 0xd1, 48 0xad, 0x99, 0x44, 0xa7, 49 0xd9, 0x56, 0x3d, 0x59, 50 0x06, 0x3c, 0x25, 0xf3, 51 0xfc, 0x1f, 0xdc, 0x2a, 52 }; 53 54 /* Length of the default RSS hash key. */ 55 static_assert(MLX5_RSS_HASH_KEY_LEN == 56 (unsigned int)sizeof(rss_hash_default_key), 57 "wrong RSS default key size."); 58 59 /** 60 * Check whether Multi-Packet RQ can be enabled for the device. 61 * 62 * @param dev 63 * Pointer to Ethernet device. 64 * 65 * @return 66 * 1 if supported, negative errno value if not. 67 */ 68 inline int 69 mlx5_check_mprq_support(struct rte_eth_dev *dev) 70 { 71 struct mlx5_priv *priv = dev->data->dev_private; 72 73 if (priv->config.mprq.enabled && 74 priv->rxqs_n >= priv->config.mprq.min_rxqs_num) 75 return 1; 76 return -ENOTSUP; 77 } 78 79 /** 80 * Check whether Multi-Packet RQ is enabled for the Rx queue. 81 * 82 * @param rxq 83 * Pointer to receive queue structure. 84 * 85 * @return 86 * 0 if disabled, otherwise enabled. 87 */ 88 inline int 89 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) 90 { 91 return rxq->strd_num_n > 0; 92 } 93 94 /** 95 * Check whether Multi-Packet RQ is enabled for the device. 96 * 97 * @param dev 98 * Pointer to Ethernet device. 99 * 100 * @return 101 * 0 if disabled, otherwise enabled. 102 */ 103 inline int 104 mlx5_mprq_enabled(struct rte_eth_dev *dev) 105 { 106 struct mlx5_priv *priv = dev->data->dev_private; 107 uint16_t i; 108 uint16_t n = 0; 109 110 if (mlx5_check_mprq_support(dev) < 0) 111 return 0; 112 /* All the configured queues should be enabled. */ 113 for (i = 0; i < priv->rxqs_n; ++i) { 114 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 115 116 if (!rxq) 117 continue; 118 if (mlx5_rxq_mprq_enabled(rxq)) 119 ++n; 120 } 121 /* Multi-Packet RQ can't be partially configured. */ 122 assert(n == 0 || n == priv->rxqs_n); 123 return n == priv->rxqs_n; 124 } 125 126 /** 127 * Allocate RX queue elements for Multi-Packet RQ. 128 * 129 * @param rxq_ctrl 130 * Pointer to RX queue structure. 131 * 132 * @return 133 * 0 on success, a negative errno value otherwise and rte_errno is set. 134 */ 135 static int 136 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 137 { 138 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 139 unsigned int wqe_n = 1 << rxq->elts_n; 140 unsigned int i; 141 int err; 142 143 /* Iterate on segments. */ 144 for (i = 0; i <= wqe_n; ++i) { 145 struct mlx5_mprq_buf *buf; 146 147 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 148 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 149 rte_errno = ENOMEM; 150 goto error; 151 } 152 if (i < wqe_n) 153 (*rxq->mprq_bufs)[i] = buf; 154 else 155 rxq->mprq_repl = buf; 156 } 157 DRV_LOG(DEBUG, 158 "port %u Rx queue %u allocated and configured %u segments", 159 rxq->port_id, rxq_ctrl->idx, wqe_n); 160 return 0; 161 error: 162 err = rte_errno; /* Save rte_errno before cleanup. */ 163 wqe_n = i; 164 for (i = 0; (i != wqe_n); ++i) { 165 if ((*rxq->mprq_bufs)[i] != NULL) 166 rte_mempool_put(rxq->mprq_mp, 167 (*rxq->mprq_bufs)[i]); 168 (*rxq->mprq_bufs)[i] = NULL; 169 } 170 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 171 rxq->port_id, rxq_ctrl->idx); 172 rte_errno = err; /* Restore rte_errno. */ 173 return -rte_errno; 174 } 175 176 /** 177 * Allocate RX queue elements for Single-Packet RQ. 178 * 179 * @param rxq_ctrl 180 * Pointer to RX queue structure. 181 * 182 * @return 183 * 0 on success, errno value on failure. 184 */ 185 static int 186 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 187 { 188 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 189 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 190 unsigned int i; 191 int err; 192 193 /* Iterate on segments. */ 194 for (i = 0; (i != elts_n); ++i) { 195 struct rte_mbuf *buf; 196 197 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 198 if (buf == NULL) { 199 DRV_LOG(ERR, "port %u empty mbuf pool", 200 PORT_ID(rxq_ctrl->priv)); 201 rte_errno = ENOMEM; 202 goto error; 203 } 204 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 205 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 206 /* Buffer is supposed to be empty. */ 207 assert(rte_pktmbuf_data_len(buf) == 0); 208 assert(rte_pktmbuf_pkt_len(buf) == 0); 209 assert(!buf->next); 210 /* Only the first segment keeps headroom. */ 211 if (i % sges_n) 212 SET_DATA_OFF(buf, 0); 213 PORT(buf) = rxq_ctrl->rxq.port_id; 214 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 215 PKT_LEN(buf) = DATA_LEN(buf); 216 NB_SEGS(buf) = 1; 217 (*rxq_ctrl->rxq.elts)[i] = buf; 218 } 219 /* If Rx vector is activated. */ 220 if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { 221 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 222 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 223 int j; 224 225 /* Initialize default rearm_data for vPMD. */ 226 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 227 rte_mbuf_refcnt_set(mbuf_init, 1); 228 mbuf_init->nb_segs = 1; 229 mbuf_init->port = rxq->port_id; 230 /* 231 * prevent compiler reordering: 232 * rearm_data covers previous fields. 233 */ 234 rte_compiler_barrier(); 235 rxq->mbuf_initializer = 236 *(uint64_t *)&mbuf_init->rearm_data; 237 /* Padding with a fake mbuf for vectorized Rx. */ 238 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 239 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 240 } 241 DRV_LOG(DEBUG, 242 "port %u Rx queue %u allocated and configured %u segments" 243 " (max %u packets)", 244 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n, 245 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 246 return 0; 247 error: 248 err = rte_errno; /* Save rte_errno before cleanup. */ 249 elts_n = i; 250 for (i = 0; (i != elts_n); ++i) { 251 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 252 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 253 (*rxq_ctrl->rxq.elts)[i] = NULL; 254 } 255 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 256 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); 257 rte_errno = err; /* Restore rte_errno. */ 258 return -rte_errno; 259 } 260 261 /** 262 * Allocate RX queue elements. 263 * 264 * @param rxq_ctrl 265 * Pointer to RX queue structure. 266 * 267 * @return 268 * 0 on success, errno value on failure. 269 */ 270 int 271 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 272 { 273 return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 274 rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl); 275 } 276 277 /** 278 * Free RX queue elements for Multi-Packet RQ. 279 * 280 * @param rxq_ctrl 281 * Pointer to RX queue structure. 282 */ 283 static void 284 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 285 { 286 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 287 uint16_t i; 288 289 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs", 290 rxq->port_id, rxq_ctrl->idx); 291 if (rxq->mprq_bufs == NULL) 292 return; 293 assert(mlx5_rxq_check_vec_support(rxq) < 0); 294 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 295 if ((*rxq->mprq_bufs)[i] != NULL) 296 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 297 (*rxq->mprq_bufs)[i] = NULL; 298 } 299 if (rxq->mprq_repl != NULL) { 300 mlx5_mprq_buf_free(rxq->mprq_repl); 301 rxq->mprq_repl = NULL; 302 } 303 } 304 305 /** 306 * Free RX queue elements for Single-Packet RQ. 307 * 308 * @param rxq_ctrl 309 * Pointer to RX queue structure. 310 */ 311 static void 312 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 313 { 314 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 315 const uint16_t q_n = (1 << rxq->elts_n); 316 const uint16_t q_mask = q_n - 1; 317 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); 318 uint16_t i; 319 320 DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs", 321 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); 322 if (rxq->elts == NULL) 323 return; 324 /** 325 * Some mbuf in the Ring belongs to the application. They cannot be 326 * freed. 327 */ 328 if (mlx5_rxq_check_vec_support(rxq) > 0) { 329 for (i = 0; i < used; ++i) 330 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; 331 rxq->rq_pi = rxq->rq_ci; 332 } 333 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 334 if ((*rxq->elts)[i] != NULL) 335 rte_pktmbuf_free_seg((*rxq->elts)[i]); 336 (*rxq->elts)[i] = NULL; 337 } 338 } 339 340 /** 341 * Free RX queue elements. 342 * 343 * @param rxq_ctrl 344 * Pointer to RX queue structure. 345 */ 346 static void 347 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 348 { 349 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 350 rxq_free_elts_mprq(rxq_ctrl); 351 else 352 rxq_free_elts_sprq(rxq_ctrl); 353 } 354 355 /** 356 * Clean up a RX queue. 357 * 358 * Destroy objects, free allocated memory and reset the structure for reuse. 359 * 360 * @param rxq_ctrl 361 * Pointer to RX queue structure. 362 */ 363 void 364 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) 365 { 366 DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u", 367 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); 368 if (rxq_ctrl->ibv) 369 mlx5_rxq_ibv_release(rxq_ctrl->ibv); 370 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); 371 } 372 373 /** 374 * Returns the per-queue supported offloads. 375 * 376 * @param dev 377 * Pointer to Ethernet device. 378 * 379 * @return 380 * Supported Rx offloads. 381 */ 382 uint64_t 383 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 384 { 385 struct mlx5_priv *priv = dev->data->dev_private; 386 struct mlx5_dev_config *config = &priv->config; 387 uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | 388 DEV_RX_OFFLOAD_TIMESTAMP | 389 DEV_RX_OFFLOAD_JUMBO_FRAME); 390 391 if (config->hw_fcs_strip) 392 offloads |= DEV_RX_OFFLOAD_KEEP_CRC; 393 394 if (config->hw_csum) 395 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | 396 DEV_RX_OFFLOAD_UDP_CKSUM | 397 DEV_RX_OFFLOAD_TCP_CKSUM); 398 if (config->hw_vlan_strip) 399 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 400 return offloads; 401 } 402 403 404 /** 405 * Returns the per-port supported offloads. 406 * 407 * @return 408 * Supported Rx offloads. 409 */ 410 uint64_t 411 mlx5_get_rx_port_offloads(void) 412 { 413 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 414 415 return offloads; 416 } 417 418 /** 419 * 420 * @param dev 421 * Pointer to Ethernet device structure. 422 * @param idx 423 * RX queue index. 424 * @param desc 425 * Number of descriptors to configure in queue. 426 * @param socket 427 * NUMA socket on which memory must be allocated. 428 * @param[in] conf 429 * Thresholds parameters. 430 * @param mp 431 * Memory pool for buffer allocations. 432 * 433 * @return 434 * 0 on success, a negative errno value otherwise and rte_errno is set. 435 */ 436 int 437 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 438 unsigned int socket, const struct rte_eth_rxconf *conf, 439 struct rte_mempool *mp) 440 { 441 struct mlx5_priv *priv = dev->data->dev_private; 442 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 443 struct mlx5_rxq_ctrl *rxq_ctrl = 444 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 445 446 if (!rte_is_power_of_2(desc)) { 447 desc = 1 << log2above(desc); 448 DRV_LOG(WARNING, 449 "port %u increased number of descriptors in Rx queue %u" 450 " to the next power of two (%d)", 451 dev->data->port_id, idx, desc); 452 } 453 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 454 dev->data->port_id, idx, desc); 455 if (idx >= priv->rxqs_n) { 456 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 457 dev->data->port_id, idx, priv->rxqs_n); 458 rte_errno = EOVERFLOW; 459 return -rte_errno; 460 } 461 if (!mlx5_rxq_releasable(dev, idx)) { 462 DRV_LOG(ERR, "port %u unable to release queue index %u", 463 dev->data->port_id, idx); 464 rte_errno = EBUSY; 465 return -rte_errno; 466 } 467 mlx5_rxq_release(dev, idx); 468 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp); 469 if (!rxq_ctrl) { 470 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 471 dev->data->port_id, idx); 472 rte_errno = ENOMEM; 473 return -rte_errno; 474 } 475 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 476 dev->data->port_id, idx); 477 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 478 return 0; 479 } 480 481 /** 482 * DPDK callback to release a RX queue. 483 * 484 * @param dpdk_rxq 485 * Generic RX queue pointer. 486 */ 487 void 488 mlx5_rx_queue_release(void *dpdk_rxq) 489 { 490 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq; 491 struct mlx5_rxq_ctrl *rxq_ctrl; 492 struct mlx5_priv *priv; 493 494 if (rxq == NULL) 495 return; 496 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); 497 priv = rxq_ctrl->priv; 498 if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx)) 499 rte_panic("port %u Rx queue %u is still used by a flow and" 500 " cannot be removed\n", 501 PORT_ID(priv), rxq_ctrl->idx); 502 mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx); 503 } 504 505 /** 506 * Allocate queue vector and fill epoll fd list for Rx interrupts. 507 * 508 * @param dev 509 * Pointer to Ethernet device. 510 * 511 * @return 512 * 0 on success, a negative errno value otherwise and rte_errno is set. 513 */ 514 int 515 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 516 { 517 struct mlx5_priv *priv = dev->data->dev_private; 518 unsigned int i; 519 unsigned int rxqs_n = priv->rxqs_n; 520 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 521 unsigned int count = 0; 522 struct rte_intr_handle *intr_handle = dev->intr_handle; 523 524 if (!dev->data->dev_conf.intr_conf.rxq) 525 return 0; 526 mlx5_rx_intr_vec_disable(dev); 527 intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); 528 if (intr_handle->intr_vec == NULL) { 529 DRV_LOG(ERR, 530 "port %u failed to allocate memory for interrupt" 531 " vector, Rx interrupts will not be supported", 532 dev->data->port_id); 533 rte_errno = ENOMEM; 534 return -rte_errno; 535 } 536 intr_handle->type = RTE_INTR_HANDLE_EXT; 537 for (i = 0; i != n; ++i) { 538 /* This rxq ibv must not be released in this function. */ 539 struct mlx5_rxq_ibv *rxq_ibv = mlx5_rxq_ibv_get(dev, i); 540 int fd; 541 int flags; 542 int rc; 543 544 /* Skip queues that cannot request interrupts. */ 545 if (!rxq_ibv || !rxq_ibv->channel) { 546 /* Use invalid intr_vec[] index to disable entry. */ 547 intr_handle->intr_vec[i] = 548 RTE_INTR_VEC_RXTX_OFFSET + 549 RTE_MAX_RXTX_INTR_VEC_ID; 550 continue; 551 } 552 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 553 DRV_LOG(ERR, 554 "port %u too many Rx queues for interrupt" 555 " vector size (%d), Rx interrupts cannot be" 556 " enabled", 557 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 558 mlx5_rx_intr_vec_disable(dev); 559 rte_errno = ENOMEM; 560 return -rte_errno; 561 } 562 fd = rxq_ibv->channel->fd; 563 flags = fcntl(fd, F_GETFL); 564 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 565 if (rc < 0) { 566 rte_errno = errno; 567 DRV_LOG(ERR, 568 "port %u failed to make Rx interrupt file" 569 " descriptor %d non-blocking for queue index" 570 " %d", 571 dev->data->port_id, fd, i); 572 mlx5_rx_intr_vec_disable(dev); 573 return -rte_errno; 574 } 575 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; 576 intr_handle->efds[count] = fd; 577 count++; 578 } 579 if (!count) 580 mlx5_rx_intr_vec_disable(dev); 581 else 582 intr_handle->nb_efd = count; 583 return 0; 584 } 585 586 /** 587 * Clean up Rx interrupts handler. 588 * 589 * @param dev 590 * Pointer to Ethernet device. 591 */ 592 void 593 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 594 { 595 struct mlx5_priv *priv = dev->data->dev_private; 596 struct rte_intr_handle *intr_handle = dev->intr_handle; 597 unsigned int i; 598 unsigned int rxqs_n = priv->rxqs_n; 599 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 600 601 if (!dev->data->dev_conf.intr_conf.rxq) 602 return; 603 if (!intr_handle->intr_vec) 604 goto free; 605 for (i = 0; i != n; ++i) { 606 struct mlx5_rxq_ctrl *rxq_ctrl; 607 struct mlx5_rxq_data *rxq_data; 608 609 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + 610 RTE_MAX_RXTX_INTR_VEC_ID) 611 continue; 612 /** 613 * Need to access directly the queue to release the reference 614 * kept in priv_rx_intr_vec_enable(). 615 */ 616 rxq_data = (*priv->rxqs)[i]; 617 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 618 mlx5_rxq_ibv_release(rxq_ctrl->ibv); 619 } 620 free: 621 rte_intr_free_epoll_fd(intr_handle); 622 if (intr_handle->intr_vec) 623 free(intr_handle->intr_vec); 624 intr_handle->nb_efd = 0; 625 intr_handle->intr_vec = NULL; 626 } 627 628 /** 629 * MLX5 CQ notification . 630 * 631 * @param rxq 632 * Pointer to receive queue structure. 633 * @param sq_n_rxq 634 * Sequence number per receive queue . 635 */ 636 static inline void 637 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 638 { 639 int sq_n = 0; 640 uint32_t doorbell_hi; 641 uint64_t doorbell; 642 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL; 643 644 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 645 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 646 doorbell = (uint64_t)doorbell_hi << 32; 647 doorbell |= rxq->cqn; 648 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); 649 mlx5_uar_write64(rte_cpu_to_be_64(doorbell), 650 cq_db_reg, rxq->uar_lock_cq); 651 } 652 653 /** 654 * DPDK callback for Rx queue interrupt enable. 655 * 656 * @param dev 657 * Pointer to Ethernet device structure. 658 * @param rx_queue_id 659 * Rx queue number. 660 * 661 * @return 662 * 0 on success, a negative errno value otherwise and rte_errno is set. 663 */ 664 int 665 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 666 { 667 struct mlx5_priv *priv = dev->data->dev_private; 668 struct mlx5_rxq_data *rxq_data; 669 struct mlx5_rxq_ctrl *rxq_ctrl; 670 671 rxq_data = (*priv->rxqs)[rx_queue_id]; 672 if (!rxq_data) { 673 rte_errno = EINVAL; 674 return -rte_errno; 675 } 676 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 677 if (rxq_ctrl->irq) { 678 struct mlx5_rxq_ibv *rxq_ibv; 679 680 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 681 if (!rxq_ibv) { 682 rte_errno = EINVAL; 683 return -rte_errno; 684 } 685 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); 686 mlx5_rxq_ibv_release(rxq_ibv); 687 } 688 return 0; 689 } 690 691 /** 692 * DPDK callback for Rx queue interrupt disable. 693 * 694 * @param dev 695 * Pointer to Ethernet device structure. 696 * @param rx_queue_id 697 * Rx queue number. 698 * 699 * @return 700 * 0 on success, a negative errno value otherwise and rte_errno is set. 701 */ 702 int 703 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 704 { 705 struct mlx5_priv *priv = dev->data->dev_private; 706 struct mlx5_rxq_data *rxq_data; 707 struct mlx5_rxq_ctrl *rxq_ctrl; 708 struct mlx5_rxq_ibv *rxq_ibv = NULL; 709 struct ibv_cq *ev_cq; 710 void *ev_ctx; 711 int ret; 712 713 rxq_data = (*priv->rxqs)[rx_queue_id]; 714 if (!rxq_data) { 715 rte_errno = EINVAL; 716 return -rte_errno; 717 } 718 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 719 if (!rxq_ctrl->irq) 720 return 0; 721 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 722 if (!rxq_ibv) { 723 rte_errno = EINVAL; 724 return -rte_errno; 725 } 726 ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); 727 if (ret || ev_cq != rxq_ibv->cq) { 728 rte_errno = EINVAL; 729 goto exit; 730 } 731 rxq_data->cq_arm_sn++; 732 mlx5_glue->ack_cq_events(rxq_ibv->cq, 1); 733 return 0; 734 exit: 735 ret = rte_errno; /* Save rte_errno before cleanup. */ 736 if (rxq_ibv) 737 mlx5_rxq_ibv_release(rxq_ibv); 738 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 739 dev->data->port_id, rx_queue_id); 740 rte_errno = ret; /* Restore rte_errno. */ 741 return -rte_errno; 742 } 743 744 /** 745 * Create the Rx queue Verbs object. 746 * 747 * @param dev 748 * Pointer to Ethernet device. 749 * @param idx 750 * Queue index in DPDK Rx queue array 751 * 752 * @return 753 * The Verbs object initialised, NULL otherwise and rte_errno is set. 754 */ 755 struct mlx5_rxq_ibv * 756 mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) 757 { 758 struct mlx5_priv *priv = dev->data->dev_private; 759 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 760 struct mlx5_rxq_ctrl *rxq_ctrl = 761 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 762 struct ibv_wq_attr mod; 763 union { 764 struct { 765 struct ibv_cq_init_attr_ex ibv; 766 struct mlx5dv_cq_init_attr mlx5; 767 } cq; 768 struct { 769 struct ibv_wq_init_attr ibv; 770 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 771 struct mlx5dv_wq_init_attr mlx5; 772 #endif 773 } wq; 774 struct ibv_cq_ex cq_attr; 775 } attr; 776 unsigned int cqe_n; 777 unsigned int wqe_n = 1 << rxq_data->elts_n; 778 struct mlx5_rxq_ibv *tmpl; 779 struct mlx5dv_cq cq_info; 780 struct mlx5dv_rwq rwq; 781 unsigned int i; 782 int ret = 0; 783 struct mlx5dv_obj obj; 784 struct mlx5_dev_config *config = &priv->config; 785 const int mprq_en = mlx5_rxq_mprq_enabled(rxq_data); 786 787 assert(rxq_data); 788 assert(!rxq_ctrl->ibv); 789 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 790 priv->verbs_alloc_ctx.obj = rxq_ctrl; 791 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 792 rxq_ctrl->socket); 793 if (!tmpl) { 794 DRV_LOG(ERR, 795 "port %u Rx queue %u cannot allocate verbs resources", 796 dev->data->port_id, rxq_ctrl->idx); 797 rte_errno = ENOMEM; 798 goto error; 799 } 800 tmpl->rxq_ctrl = rxq_ctrl; 801 if (rxq_ctrl->irq) { 802 tmpl->channel = mlx5_glue->create_comp_channel(priv->sh->ctx); 803 if (!tmpl->channel) { 804 DRV_LOG(ERR, "port %u: comp channel creation failure", 805 dev->data->port_id); 806 rte_errno = ENOMEM; 807 goto error; 808 } 809 } 810 if (mprq_en) 811 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1; 812 else 813 cqe_n = wqe_n - 1; 814 attr.cq.ibv = (struct ibv_cq_init_attr_ex){ 815 .cqe = cqe_n, 816 .channel = tmpl->channel, 817 .comp_mask = 0, 818 }; 819 attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){ 820 .comp_mask = 0, 821 }; 822 if (config->cqe_comp && !rxq_data->hw_timestamp) { 823 attr.cq.mlx5.comp_mask |= 824 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 825 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 826 attr.cq.mlx5.cqe_comp_res_format = 827 mprq_en ? MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 828 MLX5DV_CQE_RES_FORMAT_HASH; 829 #else 830 attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 831 #endif 832 /* 833 * For vectorized Rx, it must not be doubled in order to 834 * make cq_ci and rq_ci aligned. 835 */ 836 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 837 attr.cq.ibv.cqe *= 2; 838 } else if (config->cqe_comp && rxq_data->hw_timestamp) { 839 DRV_LOG(DEBUG, 840 "port %u Rx CQE compression is disabled for HW" 841 " timestamp", 842 dev->data->port_id); 843 } 844 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 845 if (config->cqe_pad) { 846 attr.cq.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; 847 attr.cq.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 848 } 849 #endif 850 tmpl->cq = mlx5_glue->cq_ex_to_cq 851 (mlx5_glue->dv_create_cq(priv->sh->ctx, &attr.cq.ibv, 852 &attr.cq.mlx5)); 853 if (tmpl->cq == NULL) { 854 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure", 855 dev->data->port_id, idx); 856 rte_errno = ENOMEM; 857 goto error; 858 } 859 DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d", 860 dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr); 861 DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d", 862 dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge); 863 attr.wq.ibv = (struct ibv_wq_init_attr){ 864 .wq_context = NULL, /* Could be useful in the future. */ 865 .wq_type = IBV_WQT_RQ, 866 /* Max number of outstanding WRs. */ 867 .max_wr = wqe_n >> rxq_data->sges_n, 868 /* Max number of scatter/gather elements in a WR. */ 869 .max_sge = 1 << rxq_data->sges_n, 870 .pd = priv->sh->pd, 871 .cq = tmpl->cq, 872 .comp_mask = 873 IBV_WQ_FLAGS_CVLAN_STRIPPING | 874 0, 875 .create_flags = (rxq_data->vlan_strip ? 876 IBV_WQ_FLAGS_CVLAN_STRIPPING : 877 0), 878 }; 879 /* By default, FCS (CRC) is stripped by hardware. */ 880 if (rxq_data->crc_present) { 881 attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 882 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 883 } 884 if (config->hw_padding) { 885 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 886 attr.wq.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 887 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 888 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 889 attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; 890 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 891 #endif 892 } 893 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 894 attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){ 895 .comp_mask = 0, 896 }; 897 if (mprq_en) { 898 struct mlx5dv_striding_rq_init_attr *mprq_attr = 899 &attr.wq.mlx5.striding_rq_attrs; 900 901 attr.wq.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 902 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 903 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 904 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 905 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 906 }; 907 } 908 tmpl->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &attr.wq.ibv, 909 &attr.wq.mlx5); 910 #else 911 tmpl->wq = mlx5_glue->create_wq(priv->sh->ctx, &attr.wq.ibv); 912 #endif 913 if (tmpl->wq == NULL) { 914 DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure", 915 dev->data->port_id, idx); 916 rte_errno = ENOMEM; 917 goto error; 918 } 919 /* 920 * Make sure number of WRs*SGEs match expectations since a queue 921 * cannot allocate more than "desc" buffers. 922 */ 923 if (attr.wq.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 924 attr.wq.ibv.max_sge != (1u << rxq_data->sges_n)) { 925 DRV_LOG(ERR, 926 "port %u Rx queue %u requested %u*%u but got %u*%u" 927 " WRs*SGEs", 928 dev->data->port_id, idx, 929 wqe_n >> rxq_data->sges_n, (1 << rxq_data->sges_n), 930 attr.wq.ibv.max_wr, attr.wq.ibv.max_sge); 931 rte_errno = EINVAL; 932 goto error; 933 } 934 /* Change queue state to ready. */ 935 mod = (struct ibv_wq_attr){ 936 .attr_mask = IBV_WQ_ATTR_STATE, 937 .wq_state = IBV_WQS_RDY, 938 }; 939 ret = mlx5_glue->modify_wq(tmpl->wq, &mod); 940 if (ret) { 941 DRV_LOG(ERR, 942 "port %u Rx queue %u WQ state to IBV_WQS_RDY failed", 943 dev->data->port_id, idx); 944 rte_errno = ret; 945 goto error; 946 } 947 obj.cq.in = tmpl->cq; 948 obj.cq.out = &cq_info; 949 obj.rwq.in = tmpl->wq; 950 obj.rwq.out = &rwq; 951 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); 952 if (ret) { 953 rte_errno = ret; 954 goto error; 955 } 956 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 957 DRV_LOG(ERR, 958 "port %u wrong MLX5_CQE_SIZE environment variable" 959 " value: it should be set to %u", 960 dev->data->port_id, RTE_CACHE_LINE_SIZE); 961 rte_errno = EINVAL; 962 goto error; 963 } 964 /* Fill the rings. */ 965 rxq_data->wqes = rwq.buf; 966 for (i = 0; (i != wqe_n); ++i) { 967 volatile struct mlx5_wqe_data_seg *scat; 968 uintptr_t addr; 969 uint32_t byte_count; 970 971 if (mprq_en) { 972 struct mlx5_mprq_buf *buf = (*rxq_data->mprq_bufs)[i]; 973 974 scat = &((volatile struct mlx5_wqe_mprq *) 975 rxq_data->wqes)[i].dseg; 976 addr = (uintptr_t)mlx5_mprq_buf_addr(buf); 977 byte_count = (1 << rxq_data->strd_sz_n) * 978 (1 << rxq_data->strd_num_n); 979 } else { 980 struct rte_mbuf *buf = (*rxq_data->elts)[i]; 981 982 scat = &((volatile struct mlx5_wqe_data_seg *) 983 rxq_data->wqes)[i]; 984 addr = rte_pktmbuf_mtod(buf, uintptr_t); 985 byte_count = DATA_LEN(buf); 986 } 987 /* scat->addr must be able to store a pointer. */ 988 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 989 *scat = (struct mlx5_wqe_data_seg){ 990 .addr = rte_cpu_to_be_64(addr), 991 .byte_count = rte_cpu_to_be_32(byte_count), 992 .lkey = mlx5_rx_addr2mr(rxq_data, addr), 993 }; 994 } 995 rxq_data->rq_db = rwq.dbrec; 996 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 997 rxq_data->cq_ci = 0; 998 rxq_data->consumed_strd = 0; 999 rxq_data->rq_pi = 0; 1000 rxq_data->zip = (struct rxq_zip){ 1001 .ai = 0, 1002 }; 1003 rxq_data->cq_db = cq_info.dbrec; 1004 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 1005 rxq_data->cq_uar = cq_info.cq_uar; 1006 rxq_data->cqn = cq_info.cqn; 1007 rxq_data->cq_arm_sn = 0; 1008 /* Update doorbell counter. */ 1009 rxq_data->rq_ci = wqe_n >> rxq_data->sges_n; 1010 rte_wmb(); 1011 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci); 1012 DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, 1013 idx, (void *)&tmpl); 1014 rte_atomic32_inc(&tmpl->refcnt); 1015 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); 1016 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1017 return tmpl; 1018 error: 1019 ret = rte_errno; /* Save rte_errno before cleanup. */ 1020 if (tmpl->wq) 1021 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 1022 if (tmpl->cq) 1023 claim_zero(mlx5_glue->destroy_cq(tmpl->cq)); 1024 if (tmpl->channel) 1025 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel)); 1026 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1027 rte_errno = ret; /* Restore rte_errno. */ 1028 return NULL; 1029 } 1030 1031 /** 1032 * Get an Rx queue Verbs object. 1033 * 1034 * @param dev 1035 * Pointer to Ethernet device. 1036 * @param idx 1037 * Queue index in DPDK Rx queue array 1038 * 1039 * @return 1040 * The Verbs object if it exists. 1041 */ 1042 struct mlx5_rxq_ibv * 1043 mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) 1044 { 1045 struct mlx5_priv *priv = dev->data->dev_private; 1046 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1047 struct mlx5_rxq_ctrl *rxq_ctrl; 1048 1049 if (idx >= priv->rxqs_n) 1050 return NULL; 1051 if (!rxq_data) 1052 return NULL; 1053 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1054 if (rxq_ctrl->ibv) { 1055 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); 1056 } 1057 return rxq_ctrl->ibv; 1058 } 1059 1060 /** 1061 * Release an Rx verbs queue object. 1062 * 1063 * @param rxq_ibv 1064 * Verbs Rx queue object. 1065 * 1066 * @return 1067 * 1 while a reference on it exists, 0 when freed. 1068 */ 1069 int 1070 mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv) 1071 { 1072 assert(rxq_ibv); 1073 assert(rxq_ibv->wq); 1074 assert(rxq_ibv->cq); 1075 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { 1076 rxq_free_elts(rxq_ibv->rxq_ctrl); 1077 claim_zero(mlx5_glue->destroy_wq(rxq_ibv->wq)); 1078 claim_zero(mlx5_glue->destroy_cq(rxq_ibv->cq)); 1079 if (rxq_ibv->channel) 1080 claim_zero(mlx5_glue->destroy_comp_channel 1081 (rxq_ibv->channel)); 1082 LIST_REMOVE(rxq_ibv, next); 1083 rte_free(rxq_ibv); 1084 return 0; 1085 } 1086 return 1; 1087 } 1088 1089 /** 1090 * Verify the Verbs Rx queue list is empty 1091 * 1092 * @param dev 1093 * Pointer to Ethernet device. 1094 * 1095 * @return 1096 * The number of object not released. 1097 */ 1098 int 1099 mlx5_rxq_ibv_verify(struct rte_eth_dev *dev) 1100 { 1101 struct mlx5_priv *priv = dev->data->dev_private; 1102 int ret = 0; 1103 struct mlx5_rxq_ibv *rxq_ibv; 1104 1105 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) { 1106 DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced", 1107 dev->data->port_id, rxq_ibv->rxq_ctrl->idx); 1108 ++ret; 1109 } 1110 return ret; 1111 } 1112 1113 /** 1114 * Return true if a single reference exists on the object. 1115 * 1116 * @param rxq_ibv 1117 * Verbs Rx queue object. 1118 */ 1119 int 1120 mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv) 1121 { 1122 assert(rxq_ibv); 1123 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1); 1124 } 1125 1126 /** 1127 * Callback function to initialize mbufs for Multi-Packet RQ. 1128 */ 1129 static inline void 1130 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg __rte_unused, 1131 void *_m, unsigned int i __rte_unused) 1132 { 1133 struct mlx5_mprq_buf *buf = _m; 1134 1135 memset(_m, 0, sizeof(*buf)); 1136 buf->mp = mp; 1137 rte_atomic16_set(&buf->refcnt, 1); 1138 } 1139 1140 /** 1141 * Free mempool of Multi-Packet RQ. 1142 * 1143 * @param dev 1144 * Pointer to Ethernet device. 1145 * 1146 * @return 1147 * 0 on success, negative errno value on failure. 1148 */ 1149 int 1150 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1151 { 1152 struct mlx5_priv *priv = dev->data->dev_private; 1153 struct rte_mempool *mp = priv->mprq_mp; 1154 unsigned int i; 1155 1156 if (mp == NULL) 1157 return 0; 1158 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1159 dev->data->port_id, mp->name); 1160 /* 1161 * If a buffer in the pool has been externally attached to a mbuf and it 1162 * is still in use by application, destroying the Rx qeueue can spoil 1163 * the packet. It is unlikely to happen but if application dynamically 1164 * creates and destroys with holding Rx packets, this can happen. 1165 * 1166 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1167 * RQ isn't provided by application but managed by PMD. 1168 */ 1169 if (!rte_mempool_full(mp)) { 1170 DRV_LOG(ERR, 1171 "port %u mempool for Multi-Packet RQ is still in use", 1172 dev->data->port_id); 1173 rte_errno = EBUSY; 1174 return -rte_errno; 1175 } 1176 rte_mempool_free(mp); 1177 /* Unset mempool for each Rx queue. */ 1178 for (i = 0; i != priv->rxqs_n; ++i) { 1179 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1180 1181 if (rxq == NULL) 1182 continue; 1183 rxq->mprq_mp = NULL; 1184 } 1185 priv->mprq_mp = NULL; 1186 return 0; 1187 } 1188 1189 /** 1190 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1191 * mempool. If already allocated, reuse it if there're enough elements. 1192 * Otherwise, resize it. 1193 * 1194 * @param dev 1195 * Pointer to Ethernet device. 1196 * 1197 * @return 1198 * 0 on success, negative errno value on failure. 1199 */ 1200 int 1201 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1202 { 1203 struct mlx5_priv *priv = dev->data->dev_private; 1204 struct rte_mempool *mp = priv->mprq_mp; 1205 char name[RTE_MEMPOOL_NAMESIZE]; 1206 unsigned int desc = 0; 1207 unsigned int buf_len; 1208 unsigned int obj_num; 1209 unsigned int obj_size; 1210 unsigned int strd_num_n = 0; 1211 unsigned int strd_sz_n = 0; 1212 unsigned int i; 1213 1214 if (!mlx5_mprq_enabled(dev)) 1215 return 0; 1216 /* Count the total number of descriptors configured. */ 1217 for (i = 0; i != priv->rxqs_n; ++i) { 1218 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1219 1220 if (rxq == NULL) 1221 continue; 1222 desc += 1 << rxq->elts_n; 1223 /* Get the max number of strides. */ 1224 if (strd_num_n < rxq->strd_num_n) 1225 strd_num_n = rxq->strd_num_n; 1226 /* Get the max size of a stride. */ 1227 if (strd_sz_n < rxq->strd_sz_n) 1228 strd_sz_n = rxq->strd_sz_n; 1229 } 1230 assert(strd_num_n && strd_sz_n); 1231 buf_len = (1 << strd_num_n) * (1 << strd_sz_n); 1232 obj_size = buf_len + sizeof(struct mlx5_mprq_buf); 1233 /* 1234 * Received packets can be either memcpy'd or externally referenced. In 1235 * case that the packet is attached to an mbuf as an external buffer, as 1236 * it isn't possible to predict how the buffers will be queued by 1237 * application, there's no option to exactly pre-allocate needed buffers 1238 * in advance but to speculatively prepares enough buffers. 1239 * 1240 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1241 * received packets to buffers provided by application (rxq->mp) until 1242 * this Mempool gets available again. 1243 */ 1244 desc *= 4; 1245 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * priv->rxqs_n; 1246 /* 1247 * rte_mempool_create_empty() has sanity check to refuse large cache 1248 * size compared to the number of elements. 1249 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a 1250 * constant number 2 instead. 1251 */ 1252 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1253 /* Check a mempool is already allocated and if it can be resued. */ 1254 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1255 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1256 dev->data->port_id, mp->name); 1257 /* Reuse. */ 1258 goto exit; 1259 } else if (mp != NULL) { 1260 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1261 dev->data->port_id, mp->name); 1262 /* 1263 * If failed to free, which means it may be still in use, no way 1264 * but to keep using the existing one. On buffer underrun, 1265 * packets will be memcpy'd instead of external buffer 1266 * attachment. 1267 */ 1268 if (mlx5_mprq_free_mp(dev)) { 1269 if (mp->elt_size >= obj_size) 1270 goto exit; 1271 else 1272 return -rte_errno; 1273 } 1274 } 1275 snprintf(name, sizeof(name), "%s-mprq", dev->device->name); 1276 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1277 0, NULL, NULL, mlx5_mprq_buf_init, NULL, 1278 dev->device->numa_node, 0); 1279 if (mp == NULL) { 1280 DRV_LOG(ERR, 1281 "port %u failed to allocate a mempool for" 1282 " Multi-Packet RQ, count=%u, size=%u", 1283 dev->data->port_id, obj_num, obj_size); 1284 rte_errno = ENOMEM; 1285 return -rte_errno; 1286 } 1287 priv->mprq_mp = mp; 1288 exit: 1289 /* Set mempool for each Rx queue. */ 1290 for (i = 0; i != priv->rxqs_n; ++i) { 1291 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1292 1293 if (rxq == NULL) 1294 continue; 1295 rxq->mprq_mp = mp; 1296 } 1297 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1298 dev->data->port_id); 1299 return 0; 1300 } 1301 1302 /** 1303 * Create a DPDK Rx queue. 1304 * 1305 * @param dev 1306 * Pointer to Ethernet device. 1307 * @param idx 1308 * RX queue index. 1309 * @param desc 1310 * Number of descriptors to configure in queue. 1311 * @param socket 1312 * NUMA socket on which memory must be allocated. 1313 * 1314 * @return 1315 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1316 */ 1317 struct mlx5_rxq_ctrl * 1318 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1319 unsigned int socket, const struct rte_eth_rxconf *conf, 1320 struct rte_mempool *mp) 1321 { 1322 struct mlx5_priv *priv = dev->data->dev_private; 1323 struct mlx5_rxq_ctrl *tmpl; 1324 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 1325 unsigned int mprq_stride_size; 1326 struct mlx5_dev_config *config = &priv->config; 1327 /* 1328 * Always allocate extra slots, even if eventually 1329 * the vector Rx will not be used. 1330 */ 1331 uint16_t desc_n = 1332 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1333 uint64_t offloads = conf->offloads | 1334 dev->data->dev_conf.rxmode.offloads; 1335 const int mprq_en = mlx5_check_mprq_support(dev) > 0; 1336 1337 tmpl = rte_calloc_socket("RXQ", 1, 1338 sizeof(*tmpl) + 1339 desc_n * sizeof(struct rte_mbuf *), 1340 0, socket); 1341 if (!tmpl) { 1342 rte_errno = ENOMEM; 1343 return NULL; 1344 } 1345 if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh, 1346 MLX5_MR_BTREE_CACHE_N, socket)) { 1347 /* rte_errno is already set. */ 1348 goto error; 1349 } 1350 tmpl->socket = socket; 1351 if (dev->data->dev_conf.intr_conf.rxq) 1352 tmpl->irq = 1; 1353 /* 1354 * This Rx queue can be configured as a Multi-Packet RQ if all of the 1355 * following conditions are met: 1356 * - MPRQ is enabled. 1357 * - The number of descs is more than the number of strides. 1358 * - max_rx_pkt_len plus overhead is less than the max size of a 1359 * stride. 1360 * Otherwise, enable Rx scatter if necessary. 1361 */ 1362 assert(mb_len >= RTE_PKTMBUF_HEADROOM); 1363 mprq_stride_size = 1364 dev->data->dev_conf.rxmode.max_rx_pkt_len + 1365 sizeof(struct rte_mbuf_ext_shared_info) + 1366 RTE_PKTMBUF_HEADROOM; 1367 if (mprq_en && 1368 desc > (1U << config->mprq.stride_num_n) && 1369 mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) { 1370 /* TODO: Rx scatter isn't supported yet. */ 1371 tmpl->rxq.sges_n = 0; 1372 /* Trim the number of descs needed. */ 1373 desc >>= config->mprq.stride_num_n; 1374 tmpl->rxq.strd_num_n = config->mprq.stride_num_n; 1375 tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size), 1376 config->mprq.min_stride_size_n); 1377 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1378 tmpl->rxq.mprq_max_memcpy_len = 1379 RTE_MIN(mb_len - RTE_PKTMBUF_HEADROOM, 1380 config->mprq.max_memcpy_len); 1381 DRV_LOG(DEBUG, 1382 "port %u Rx queue %u: Multi-Packet RQ is enabled" 1383 " strd_num_n = %u, strd_sz_n = %u", 1384 dev->data->port_id, idx, 1385 tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n); 1386 } else if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= 1387 (mb_len - RTE_PKTMBUF_HEADROOM)) { 1388 tmpl->rxq.sges_n = 0; 1389 } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { 1390 unsigned int size = 1391 RTE_PKTMBUF_HEADROOM + 1392 dev->data->dev_conf.rxmode.max_rx_pkt_len; 1393 unsigned int sges_n; 1394 1395 /* 1396 * Determine the number of SGEs needed for a full packet 1397 * and round it to the next power of two. 1398 */ 1399 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 1400 tmpl->rxq.sges_n = sges_n; 1401 /* Make sure rxq.sges_n did not overflow. */ 1402 size = mb_len * (1 << tmpl->rxq.sges_n); 1403 size -= RTE_PKTMBUF_HEADROOM; 1404 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { 1405 DRV_LOG(ERR, 1406 "port %u too many SGEs (%u) needed to handle" 1407 " requested maximum packet size %u", 1408 dev->data->port_id, 1409 1 << sges_n, 1410 dev->data->dev_conf.rxmode.max_rx_pkt_len); 1411 rte_errno = EOVERFLOW; 1412 goto error; 1413 } 1414 } else { 1415 DRV_LOG(WARNING, 1416 "port %u the requested maximum Rx packet size (%u) is" 1417 " larger than a single mbuf (%u) and scattered mode has" 1418 " not been requested", 1419 dev->data->port_id, 1420 dev->data->dev_conf.rxmode.max_rx_pkt_len, 1421 mb_len - RTE_PKTMBUF_HEADROOM); 1422 } 1423 if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) 1424 DRV_LOG(WARNING, 1425 "port %u MPRQ is requested but cannot be enabled" 1426 " (requested: desc = %u, stride_sz = %u," 1427 " supported: min_stride_num = %u, max_stride_sz = %u).", 1428 dev->data->port_id, desc, mprq_stride_size, 1429 (1 << config->mprq.stride_num_n), 1430 (1 << config->mprq.max_stride_size_n)); 1431 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1432 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1433 if (desc % (1 << tmpl->rxq.sges_n)) { 1434 DRV_LOG(ERR, 1435 "port %u number of Rx queue descriptors (%u) is not a" 1436 " multiple of SGEs per packet (%u)", 1437 dev->data->port_id, 1438 desc, 1439 1 << tmpl->rxq.sges_n); 1440 rte_errno = EINVAL; 1441 goto error; 1442 } 1443 /* Toggle RX checksum offload if hardware supports it. */ 1444 tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM); 1445 tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP); 1446 /* Configure VLAN stripping. */ 1447 tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1448 /* By default, FCS (CRC) is stripped by hardware. */ 1449 tmpl->rxq.crc_present = 0; 1450 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { 1451 if (config->hw_fcs_strip) { 1452 tmpl->rxq.crc_present = 1; 1453 } else { 1454 DRV_LOG(WARNING, 1455 "port %u CRC stripping has been disabled but will" 1456 " still be performed by hardware, make sure MLNX_OFED" 1457 " and firmware are up to date", 1458 dev->data->port_id); 1459 } 1460 } 1461 DRV_LOG(DEBUG, 1462 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1463 " incoming frames to hide it", 1464 dev->data->port_id, 1465 tmpl->rxq.crc_present ? "disabled" : "enabled", 1466 tmpl->rxq.crc_present << 2); 1467 /* Save port ID. */ 1468 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1469 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); 1470 tmpl->rxq.port_id = dev->data->port_id; 1471 tmpl->priv = priv; 1472 tmpl->rxq.mp = mp; 1473 tmpl->rxq.stats.idx = idx; 1474 tmpl->rxq.elts_n = log2above(desc); 1475 tmpl->rxq.rq_repl_thresh = 1476 MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n); 1477 tmpl->rxq.elts = 1478 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); 1479 #ifndef RTE_ARCH_64 1480 tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq; 1481 #endif 1482 tmpl->idx = idx; 1483 rte_atomic32_inc(&tmpl->refcnt); 1484 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1485 return tmpl; 1486 error: 1487 rte_free(tmpl); 1488 return NULL; 1489 } 1490 1491 /** 1492 * Get a Rx queue. 1493 * 1494 * @param dev 1495 * Pointer to Ethernet device. 1496 * @param idx 1497 * TX queue index. 1498 * 1499 * @return 1500 * A pointer to the queue if it exists, NULL otherwise. 1501 */ 1502 struct mlx5_rxq_ctrl * 1503 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 1504 { 1505 struct mlx5_priv *priv = dev->data->dev_private; 1506 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 1507 1508 if ((*priv->rxqs)[idx]) { 1509 rxq_ctrl = container_of((*priv->rxqs)[idx], 1510 struct mlx5_rxq_ctrl, 1511 rxq); 1512 mlx5_rxq_ibv_get(dev, idx); 1513 rte_atomic32_inc(&rxq_ctrl->refcnt); 1514 } 1515 return rxq_ctrl; 1516 } 1517 1518 /** 1519 * Release a Rx queue. 1520 * 1521 * @param dev 1522 * Pointer to Ethernet device. 1523 * @param idx 1524 * TX queue index. 1525 * 1526 * @return 1527 * 1 while a reference on it exists, 0 when freed. 1528 */ 1529 int 1530 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 1531 { 1532 struct mlx5_priv *priv = dev->data->dev_private; 1533 struct mlx5_rxq_ctrl *rxq_ctrl; 1534 1535 if (!(*priv->rxqs)[idx]) 1536 return 0; 1537 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1538 assert(rxq_ctrl->priv); 1539 if (rxq_ctrl->ibv && !mlx5_rxq_ibv_release(rxq_ctrl->ibv)) 1540 rxq_ctrl->ibv = NULL; 1541 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { 1542 mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); 1543 LIST_REMOVE(rxq_ctrl, next); 1544 rte_free(rxq_ctrl); 1545 (*priv->rxqs)[idx] = NULL; 1546 return 0; 1547 } 1548 return 1; 1549 } 1550 1551 /** 1552 * Verify if the queue can be released. 1553 * 1554 * @param dev 1555 * Pointer to Ethernet device. 1556 * @param idx 1557 * TX queue index. 1558 * 1559 * @return 1560 * 1 if the queue can be released, negative errno otherwise and rte_errno is 1561 * set. 1562 */ 1563 int 1564 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1565 { 1566 struct mlx5_priv *priv = dev->data->dev_private; 1567 struct mlx5_rxq_ctrl *rxq_ctrl; 1568 1569 if (!(*priv->rxqs)[idx]) { 1570 rte_errno = EINVAL; 1571 return -rte_errno; 1572 } 1573 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1574 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); 1575 } 1576 1577 /** 1578 * Verify the Rx Queue list is empty 1579 * 1580 * @param dev 1581 * Pointer to Ethernet device. 1582 * 1583 * @return 1584 * The number of object not released. 1585 */ 1586 int 1587 mlx5_rxq_verify(struct rte_eth_dev *dev) 1588 { 1589 struct mlx5_priv *priv = dev->data->dev_private; 1590 struct mlx5_rxq_ctrl *rxq_ctrl; 1591 int ret = 0; 1592 1593 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 1594 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 1595 dev->data->port_id, rxq_ctrl->idx); 1596 ++ret; 1597 } 1598 return ret; 1599 } 1600 1601 /** 1602 * Create an indirection table. 1603 * 1604 * @param dev 1605 * Pointer to Ethernet device. 1606 * @param queues 1607 * Queues entering in the indirection table. 1608 * @param queues_n 1609 * Number of queues in the array. 1610 * 1611 * @return 1612 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1613 */ 1614 struct mlx5_ind_table_ibv * 1615 mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, const uint16_t *queues, 1616 uint32_t queues_n) 1617 { 1618 struct mlx5_priv *priv = dev->data->dev_private; 1619 struct mlx5_ind_table_ibv *ind_tbl; 1620 const unsigned int wq_n = rte_is_power_of_2(queues_n) ? 1621 log2above(queues_n) : 1622 log2above(priv->config.ind_table_max_size); 1623 struct ibv_wq *wq[1 << wq_n]; 1624 unsigned int i; 1625 unsigned int j; 1626 1627 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) + 1628 queues_n * sizeof(uint16_t), 0); 1629 if (!ind_tbl) { 1630 rte_errno = ENOMEM; 1631 return NULL; 1632 } 1633 for (i = 0; i != queues_n; ++i) { 1634 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]); 1635 1636 if (!rxq) 1637 goto error; 1638 wq[i] = rxq->ibv->wq; 1639 ind_tbl->queues[i] = queues[i]; 1640 } 1641 ind_tbl->queues_n = queues_n; 1642 /* Finalise indirection table. */ 1643 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j) 1644 wq[i] = wq[j]; 1645 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table 1646 (priv->sh->ctx, 1647 &(struct ibv_rwq_ind_table_init_attr){ 1648 .log_ind_tbl_size = wq_n, 1649 .ind_tbl = wq, 1650 .comp_mask = 0, 1651 }); 1652 if (!ind_tbl->ind_table) { 1653 rte_errno = errno; 1654 goto error; 1655 } 1656 rte_atomic32_inc(&ind_tbl->refcnt); 1657 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 1658 return ind_tbl; 1659 error: 1660 rte_free(ind_tbl); 1661 DEBUG("port %u cannot create indirection table", dev->data->port_id); 1662 return NULL; 1663 } 1664 1665 /** 1666 * Get an indirection table. 1667 * 1668 * @param dev 1669 * Pointer to Ethernet device. 1670 * @param queues 1671 * Queues entering in the indirection table. 1672 * @param queues_n 1673 * Number of queues in the array. 1674 * 1675 * @return 1676 * An indirection table if found. 1677 */ 1678 struct mlx5_ind_table_ibv * 1679 mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, const uint16_t *queues, 1680 uint32_t queues_n) 1681 { 1682 struct mlx5_priv *priv = dev->data->dev_private; 1683 struct mlx5_ind_table_ibv *ind_tbl; 1684 1685 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1686 if ((ind_tbl->queues_n == queues_n) && 1687 (memcmp(ind_tbl->queues, queues, 1688 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 1689 == 0)) 1690 break; 1691 } 1692 if (ind_tbl) { 1693 unsigned int i; 1694 1695 rte_atomic32_inc(&ind_tbl->refcnt); 1696 for (i = 0; i != ind_tbl->queues_n; ++i) 1697 mlx5_rxq_get(dev, ind_tbl->queues[i]); 1698 } 1699 return ind_tbl; 1700 } 1701 1702 /** 1703 * Release an indirection table. 1704 * 1705 * @param dev 1706 * Pointer to Ethernet device. 1707 * @param ind_table 1708 * Indirection table to release. 1709 * 1710 * @return 1711 * 1 while a reference on it exists, 0 when freed. 1712 */ 1713 int 1714 mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, 1715 struct mlx5_ind_table_ibv *ind_tbl) 1716 { 1717 unsigned int i; 1718 1719 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) 1720 claim_zero(mlx5_glue->destroy_rwq_ind_table 1721 (ind_tbl->ind_table)); 1722 for (i = 0; i != ind_tbl->queues_n; ++i) 1723 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i])); 1724 if (!rte_atomic32_read(&ind_tbl->refcnt)) { 1725 LIST_REMOVE(ind_tbl, next); 1726 rte_free(ind_tbl); 1727 return 0; 1728 } 1729 return 1; 1730 } 1731 1732 /** 1733 * Verify the Rx Queue list is empty 1734 * 1735 * @param dev 1736 * Pointer to Ethernet device. 1737 * 1738 * @return 1739 * The number of object not released. 1740 */ 1741 int 1742 mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev) 1743 { 1744 struct mlx5_priv *priv = dev->data->dev_private; 1745 struct mlx5_ind_table_ibv *ind_tbl; 1746 int ret = 0; 1747 1748 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1749 DRV_LOG(DEBUG, 1750 "port %u Verbs indirection table %p still referenced", 1751 dev->data->port_id, (void *)ind_tbl); 1752 ++ret; 1753 } 1754 return ret; 1755 } 1756 1757 /** 1758 * Create an Rx Hash queue. 1759 * 1760 * @param dev 1761 * Pointer to Ethernet device. 1762 * @param rss_key 1763 * RSS key for the Rx hash queue. 1764 * @param rss_key_len 1765 * RSS key length. 1766 * @param hash_fields 1767 * Verbs protocol hash field to make the RSS on. 1768 * @param queues 1769 * Queues entering in hash queue. In case of empty hash_fields only the 1770 * first queue index will be taken for the indirection table. 1771 * @param queues_n 1772 * Number of queues. 1773 * @param tunnel 1774 * Tunnel type. 1775 * 1776 * @return 1777 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1778 */ 1779 struct mlx5_hrxq * 1780 mlx5_hrxq_new(struct rte_eth_dev *dev, 1781 const uint8_t *rss_key, uint32_t rss_key_len, 1782 uint64_t hash_fields, 1783 const uint16_t *queues, uint32_t queues_n, 1784 int tunnel __rte_unused) 1785 { 1786 struct mlx5_priv *priv = dev->data->dev_private; 1787 struct mlx5_hrxq *hrxq; 1788 struct mlx5_ind_table_ibv *ind_tbl; 1789 struct ibv_qp *qp; 1790 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1791 struct mlx5dv_qp_init_attr qp_init_attr; 1792 #endif 1793 int err; 1794 1795 queues_n = hash_fields ? queues_n : 1; 1796 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1797 if (!ind_tbl) 1798 ind_tbl = mlx5_ind_table_ibv_new(dev, queues, queues_n); 1799 if (!ind_tbl) { 1800 rte_errno = ENOMEM; 1801 return NULL; 1802 } 1803 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1804 memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 1805 if (tunnel) { 1806 qp_init_attr.comp_mask = 1807 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 1808 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; 1809 } 1810 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1811 if (dev->data->dev_conf.lpbk_mode) { 1812 /* Allow packet sent from NIC loop back w/o source MAC check. */ 1813 qp_init_attr.comp_mask |= 1814 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 1815 qp_init_attr.create_flags |= 1816 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; 1817 } 1818 #endif 1819 qp = mlx5_glue->dv_create_qp 1820 (priv->sh->ctx, 1821 &(struct ibv_qp_init_attr_ex){ 1822 .qp_type = IBV_QPT_RAW_PACKET, 1823 .comp_mask = 1824 IBV_QP_INIT_ATTR_PD | 1825 IBV_QP_INIT_ATTR_IND_TABLE | 1826 IBV_QP_INIT_ATTR_RX_HASH, 1827 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1828 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1829 .rx_hash_key_len = rss_key_len, 1830 .rx_hash_key = (void *)(uintptr_t)rss_key, 1831 .rx_hash_fields_mask = hash_fields, 1832 }, 1833 .rwq_ind_tbl = ind_tbl->ind_table, 1834 .pd = priv->sh->pd, 1835 }, 1836 &qp_init_attr); 1837 #else 1838 qp = mlx5_glue->create_qp_ex 1839 (priv->sh->ctx, 1840 &(struct ibv_qp_init_attr_ex){ 1841 .qp_type = IBV_QPT_RAW_PACKET, 1842 .comp_mask = 1843 IBV_QP_INIT_ATTR_PD | 1844 IBV_QP_INIT_ATTR_IND_TABLE | 1845 IBV_QP_INIT_ATTR_RX_HASH, 1846 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1847 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1848 .rx_hash_key_len = rss_key_len, 1849 .rx_hash_key = (void *)(uintptr_t)rss_key, 1850 .rx_hash_fields_mask = hash_fields, 1851 }, 1852 .rwq_ind_tbl = ind_tbl->ind_table, 1853 .pd = priv->sh->pd, 1854 }); 1855 #endif 1856 if (!qp) { 1857 rte_errno = errno; 1858 goto error; 1859 } 1860 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); 1861 if (!hrxq) 1862 goto error; 1863 hrxq->ind_table = ind_tbl; 1864 hrxq->qp = qp; 1865 hrxq->rss_key_len = rss_key_len; 1866 hrxq->hash_fields = hash_fields; 1867 memcpy(hrxq->rss_key, rss_key, rss_key_len); 1868 rte_atomic32_inc(&hrxq->refcnt); 1869 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); 1870 return hrxq; 1871 error: 1872 err = rte_errno; /* Save rte_errno before cleanup. */ 1873 mlx5_ind_table_ibv_release(dev, ind_tbl); 1874 if (qp) 1875 claim_zero(mlx5_glue->destroy_qp(qp)); 1876 rte_errno = err; /* Restore rte_errno. */ 1877 return NULL; 1878 } 1879 1880 /** 1881 * Get an Rx Hash queue. 1882 * 1883 * @param dev 1884 * Pointer to Ethernet device. 1885 * @param rss_conf 1886 * RSS configuration for the Rx hash queue. 1887 * @param queues 1888 * Queues entering in hash queue. In case of empty hash_fields only the 1889 * first queue index will be taken for the indirection table. 1890 * @param queues_n 1891 * Number of queues. 1892 * 1893 * @return 1894 * An hash Rx queue on success. 1895 */ 1896 struct mlx5_hrxq * 1897 mlx5_hrxq_get(struct rte_eth_dev *dev, 1898 const uint8_t *rss_key, uint32_t rss_key_len, 1899 uint64_t hash_fields, 1900 const uint16_t *queues, uint32_t queues_n) 1901 { 1902 struct mlx5_priv *priv = dev->data->dev_private; 1903 struct mlx5_hrxq *hrxq; 1904 1905 queues_n = hash_fields ? queues_n : 1; 1906 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1907 struct mlx5_ind_table_ibv *ind_tbl; 1908 1909 if (hrxq->rss_key_len != rss_key_len) 1910 continue; 1911 if (memcmp(hrxq->rss_key, rss_key, rss_key_len)) 1912 continue; 1913 if (hrxq->hash_fields != hash_fields) 1914 continue; 1915 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1916 if (!ind_tbl) 1917 continue; 1918 if (ind_tbl != hrxq->ind_table) { 1919 mlx5_ind_table_ibv_release(dev, ind_tbl); 1920 continue; 1921 } 1922 rte_atomic32_inc(&hrxq->refcnt); 1923 return hrxq; 1924 } 1925 return NULL; 1926 } 1927 1928 /** 1929 * Release the hash Rx queue. 1930 * 1931 * @param dev 1932 * Pointer to Ethernet device. 1933 * @param hrxq 1934 * Pointer to Hash Rx queue to release. 1935 * 1936 * @return 1937 * 1 while a reference on it exists, 0 when freed. 1938 */ 1939 int 1940 mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 1941 { 1942 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 1943 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 1944 mlx5_ind_table_ibv_release(dev, hrxq->ind_table); 1945 LIST_REMOVE(hrxq, next); 1946 rte_free(hrxq); 1947 return 0; 1948 } 1949 claim_nonzero(mlx5_ind_table_ibv_release(dev, hrxq->ind_table)); 1950 return 1; 1951 } 1952 1953 /** 1954 * Verify the Rx Queue list is empty 1955 * 1956 * @param dev 1957 * Pointer to Ethernet device. 1958 * 1959 * @return 1960 * The number of object not released. 1961 */ 1962 int 1963 mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev) 1964 { 1965 struct mlx5_priv *priv = dev->data->dev_private; 1966 struct mlx5_hrxq *hrxq; 1967 int ret = 0; 1968 1969 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1970 DRV_LOG(DEBUG, 1971 "port %u Verbs hash Rx queue %p still referenced", 1972 dev->data->port_id, (void *)hrxq); 1973 ++ret; 1974 } 1975 return ret; 1976 } 1977 1978 /** 1979 * Create a drop Rx queue Verbs object. 1980 * 1981 * @param dev 1982 * Pointer to Ethernet device. 1983 * 1984 * @return 1985 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1986 */ 1987 struct mlx5_rxq_ibv * 1988 mlx5_rxq_ibv_drop_new(struct rte_eth_dev *dev) 1989 { 1990 struct mlx5_priv *priv = dev->data->dev_private; 1991 struct ibv_context *ctx = priv->sh->ctx; 1992 struct ibv_cq *cq; 1993 struct ibv_wq *wq = NULL; 1994 struct mlx5_rxq_ibv *rxq; 1995 1996 if (priv->drop_queue.rxq) 1997 return priv->drop_queue.rxq; 1998 cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); 1999 if (!cq) { 2000 DEBUG("port %u cannot allocate CQ for drop queue", 2001 dev->data->port_id); 2002 rte_errno = errno; 2003 goto error; 2004 } 2005 wq = mlx5_glue->create_wq(ctx, 2006 &(struct ibv_wq_init_attr){ 2007 .wq_type = IBV_WQT_RQ, 2008 .max_wr = 1, 2009 .max_sge = 1, 2010 .pd = priv->sh->pd, 2011 .cq = cq, 2012 }); 2013 if (!wq) { 2014 DEBUG("port %u cannot allocate WQ for drop queue", 2015 dev->data->port_id); 2016 rte_errno = errno; 2017 goto error; 2018 } 2019 rxq = rte_calloc(__func__, 1, sizeof(*rxq), 0); 2020 if (!rxq) { 2021 DEBUG("port %u cannot allocate drop Rx queue memory", 2022 dev->data->port_id); 2023 rte_errno = ENOMEM; 2024 goto error; 2025 } 2026 rxq->cq = cq; 2027 rxq->wq = wq; 2028 priv->drop_queue.rxq = rxq; 2029 return rxq; 2030 error: 2031 if (wq) 2032 claim_zero(mlx5_glue->destroy_wq(wq)); 2033 if (cq) 2034 claim_zero(mlx5_glue->destroy_cq(cq)); 2035 return NULL; 2036 } 2037 2038 /** 2039 * Release a drop Rx queue Verbs object. 2040 * 2041 * @param dev 2042 * Pointer to Ethernet device. 2043 * 2044 * @return 2045 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2046 */ 2047 void 2048 mlx5_rxq_ibv_drop_release(struct rte_eth_dev *dev) 2049 { 2050 struct mlx5_priv *priv = dev->data->dev_private; 2051 struct mlx5_rxq_ibv *rxq = priv->drop_queue.rxq; 2052 2053 if (rxq->wq) 2054 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 2055 if (rxq->cq) 2056 claim_zero(mlx5_glue->destroy_cq(rxq->cq)); 2057 rte_free(rxq); 2058 priv->drop_queue.rxq = NULL; 2059 } 2060 2061 /** 2062 * Create a drop indirection table. 2063 * 2064 * @param dev 2065 * Pointer to Ethernet device. 2066 * 2067 * @return 2068 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2069 */ 2070 struct mlx5_ind_table_ibv * 2071 mlx5_ind_table_ibv_drop_new(struct rte_eth_dev *dev) 2072 { 2073 struct mlx5_priv *priv = dev->data->dev_private; 2074 struct mlx5_ind_table_ibv *ind_tbl; 2075 struct mlx5_rxq_ibv *rxq; 2076 struct mlx5_ind_table_ibv tmpl; 2077 2078 rxq = mlx5_rxq_ibv_drop_new(dev); 2079 if (!rxq) 2080 return NULL; 2081 tmpl.ind_table = mlx5_glue->create_rwq_ind_table 2082 (priv->sh->ctx, 2083 &(struct ibv_rwq_ind_table_init_attr){ 2084 .log_ind_tbl_size = 0, 2085 .ind_tbl = &rxq->wq, 2086 .comp_mask = 0, 2087 }); 2088 if (!tmpl.ind_table) { 2089 DEBUG("port %u cannot allocate indirection table for drop" 2090 " queue", 2091 dev->data->port_id); 2092 rte_errno = errno; 2093 goto error; 2094 } 2095 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl), 0); 2096 if (!ind_tbl) { 2097 rte_errno = ENOMEM; 2098 goto error; 2099 } 2100 ind_tbl->ind_table = tmpl.ind_table; 2101 return ind_tbl; 2102 error: 2103 mlx5_rxq_ibv_drop_release(dev); 2104 return NULL; 2105 } 2106 2107 /** 2108 * Release a drop indirection table. 2109 * 2110 * @param dev 2111 * Pointer to Ethernet device. 2112 */ 2113 void 2114 mlx5_ind_table_ibv_drop_release(struct rte_eth_dev *dev) 2115 { 2116 struct mlx5_priv *priv = dev->data->dev_private; 2117 struct mlx5_ind_table_ibv *ind_tbl = priv->drop_queue.hrxq->ind_table; 2118 2119 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 2120 mlx5_rxq_ibv_drop_release(dev); 2121 rte_free(ind_tbl); 2122 priv->drop_queue.hrxq->ind_table = NULL; 2123 } 2124 2125 /** 2126 * Create a drop Rx Hash queue. 2127 * 2128 * @param dev 2129 * Pointer to Ethernet device. 2130 * 2131 * @return 2132 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2133 */ 2134 struct mlx5_hrxq * 2135 mlx5_hrxq_drop_new(struct rte_eth_dev *dev) 2136 { 2137 struct mlx5_priv *priv = dev->data->dev_private; 2138 struct mlx5_ind_table_ibv *ind_tbl; 2139 struct ibv_qp *qp; 2140 struct mlx5_hrxq *hrxq; 2141 2142 if (priv->drop_queue.hrxq) { 2143 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt); 2144 return priv->drop_queue.hrxq; 2145 } 2146 ind_tbl = mlx5_ind_table_ibv_drop_new(dev); 2147 if (!ind_tbl) 2148 return NULL; 2149 qp = mlx5_glue->create_qp_ex(priv->sh->ctx, 2150 &(struct ibv_qp_init_attr_ex){ 2151 .qp_type = IBV_QPT_RAW_PACKET, 2152 .comp_mask = 2153 IBV_QP_INIT_ATTR_PD | 2154 IBV_QP_INIT_ATTR_IND_TABLE | 2155 IBV_QP_INIT_ATTR_RX_HASH, 2156 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2157 .rx_hash_function = 2158 IBV_RX_HASH_FUNC_TOEPLITZ, 2159 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 2160 .rx_hash_key = rss_hash_default_key, 2161 .rx_hash_fields_mask = 0, 2162 }, 2163 .rwq_ind_tbl = ind_tbl->ind_table, 2164 .pd = priv->sh->pd 2165 }); 2166 if (!qp) { 2167 DEBUG("port %u cannot allocate QP for drop queue", 2168 dev->data->port_id); 2169 rte_errno = errno; 2170 goto error; 2171 } 2172 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq), 0); 2173 if (!hrxq) { 2174 DRV_LOG(WARNING, 2175 "port %u cannot allocate memory for drop queue", 2176 dev->data->port_id); 2177 rte_errno = ENOMEM; 2178 goto error; 2179 } 2180 hrxq->ind_table = ind_tbl; 2181 hrxq->qp = qp; 2182 priv->drop_queue.hrxq = hrxq; 2183 rte_atomic32_set(&hrxq->refcnt, 1); 2184 return hrxq; 2185 error: 2186 if (ind_tbl) 2187 mlx5_ind_table_ibv_drop_release(dev); 2188 return NULL; 2189 } 2190 2191 /** 2192 * Release a drop hash Rx queue. 2193 * 2194 * @param dev 2195 * Pointer to Ethernet device. 2196 */ 2197 void 2198 mlx5_hrxq_drop_release(struct rte_eth_dev *dev) 2199 { 2200 struct mlx5_priv *priv = dev->data->dev_private; 2201 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 2202 2203 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 2204 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 2205 mlx5_ind_table_ibv_drop_release(dev); 2206 rte_free(hrxq); 2207 priv->drop_queue.hrxq = NULL; 2208 } 2209 } 2210