1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <stdint.h> 11 #include <fcntl.h> 12 #include <sys/queue.h> 13 14 /* Verbs header. */ 15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic ignored "-Wpedantic" 18 #endif 19 #include <infiniband/verbs.h> 20 #include <infiniband/mlx5dv.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 #include <rte_interrupts.h> 30 #include <rte_debug.h> 31 #include <rte_io.h> 32 33 #include "mlx5.h" 34 #include "mlx5_rxtx.h" 35 #include "mlx5_utils.h" 36 #include "mlx5_autoconf.h" 37 #include "mlx5_defs.h" 38 #include "mlx5_glue.h" 39 40 /* Default RSS hash key also used for ConnectX-3. */ 41 uint8_t rss_hash_default_key[] = { 42 0x2c, 0xc6, 0x81, 0xd1, 43 0x5b, 0xdb, 0xf4, 0xf7, 44 0xfc, 0xa2, 0x83, 0x19, 45 0xdb, 0x1a, 0x3e, 0x94, 46 0x6b, 0x9e, 0x38, 0xd9, 47 0x2c, 0x9c, 0x03, 0xd1, 48 0xad, 0x99, 0x44, 0xa7, 49 0xd9, 0x56, 0x3d, 0x59, 50 0x06, 0x3c, 0x25, 0xf3, 51 0xfc, 0x1f, 0xdc, 0x2a, 52 }; 53 54 /* Length of the default RSS hash key. */ 55 static_assert(MLX5_RSS_HASH_KEY_LEN == 56 (unsigned int)sizeof(rss_hash_default_key), 57 "wrong RSS default key size."); 58 59 /** 60 * Check whether Multi-Packet RQ can be enabled for the device. 61 * 62 * @param dev 63 * Pointer to Ethernet device. 64 * 65 * @return 66 * 1 if supported, negative errno value if not. 67 */ 68 inline int 69 mlx5_check_mprq_support(struct rte_eth_dev *dev) 70 { 71 struct mlx5_priv *priv = dev->data->dev_private; 72 73 if (priv->config.mprq.enabled && 74 priv->rxqs_n >= priv->config.mprq.min_rxqs_num) 75 return 1; 76 return -ENOTSUP; 77 } 78 79 /** 80 * Check whether Multi-Packet RQ is enabled for the Rx queue. 81 * 82 * @param rxq 83 * Pointer to receive queue structure. 84 * 85 * @return 86 * 0 if disabled, otherwise enabled. 87 */ 88 inline int 89 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) 90 { 91 return rxq->strd_num_n > 0; 92 } 93 94 /** 95 * Check whether Multi-Packet RQ is enabled for the device. 96 * 97 * @param dev 98 * Pointer to Ethernet device. 99 * 100 * @return 101 * 0 if disabled, otherwise enabled. 102 */ 103 inline int 104 mlx5_mprq_enabled(struct rte_eth_dev *dev) 105 { 106 struct mlx5_priv *priv = dev->data->dev_private; 107 uint16_t i; 108 uint16_t n = 0; 109 110 if (mlx5_check_mprq_support(dev) < 0) 111 return 0; 112 /* All the configured queues should be enabled. */ 113 for (i = 0; i < priv->rxqs_n; ++i) { 114 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 115 116 if (!rxq) 117 continue; 118 if (mlx5_rxq_mprq_enabled(rxq)) 119 ++n; 120 } 121 /* Multi-Packet RQ can't be partially configured. */ 122 assert(n == 0 || n == priv->rxqs_n); 123 return n == priv->rxqs_n; 124 } 125 126 /** 127 * Allocate RX queue elements for Multi-Packet RQ. 128 * 129 * @param rxq_ctrl 130 * Pointer to RX queue structure. 131 * 132 * @return 133 * 0 on success, a negative errno value otherwise and rte_errno is set. 134 */ 135 static int 136 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 137 { 138 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 139 unsigned int wqe_n = 1 << rxq->elts_n; 140 unsigned int i; 141 int err; 142 143 /* Iterate on segments. */ 144 for (i = 0; i <= wqe_n; ++i) { 145 struct mlx5_mprq_buf *buf; 146 147 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 148 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 149 rte_errno = ENOMEM; 150 goto error; 151 } 152 if (i < wqe_n) 153 (*rxq->mprq_bufs)[i] = buf; 154 else 155 rxq->mprq_repl = buf; 156 } 157 DRV_LOG(DEBUG, 158 "port %u Rx queue %u allocated and configured %u segments", 159 rxq->port_id, rxq->idx, wqe_n); 160 return 0; 161 error: 162 err = rte_errno; /* Save rte_errno before cleanup. */ 163 wqe_n = i; 164 for (i = 0; (i != wqe_n); ++i) { 165 if ((*rxq->mprq_bufs)[i] != NULL) 166 rte_mempool_put(rxq->mprq_mp, 167 (*rxq->mprq_bufs)[i]); 168 (*rxq->mprq_bufs)[i] = NULL; 169 } 170 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 171 rxq->port_id, rxq->idx); 172 rte_errno = err; /* Restore rte_errno. */ 173 return -rte_errno; 174 } 175 176 /** 177 * Allocate RX queue elements for Single-Packet RQ. 178 * 179 * @param rxq_ctrl 180 * Pointer to RX queue structure. 181 * 182 * @return 183 * 0 on success, errno value on failure. 184 */ 185 static int 186 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 187 { 188 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 189 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 190 unsigned int i; 191 int err; 192 193 /* Iterate on segments. */ 194 for (i = 0; (i != elts_n); ++i) { 195 struct rte_mbuf *buf; 196 197 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 198 if (buf == NULL) { 199 DRV_LOG(ERR, "port %u empty mbuf pool", 200 PORT_ID(rxq_ctrl->priv)); 201 rte_errno = ENOMEM; 202 goto error; 203 } 204 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 205 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 206 /* Buffer is supposed to be empty. */ 207 assert(rte_pktmbuf_data_len(buf) == 0); 208 assert(rte_pktmbuf_pkt_len(buf) == 0); 209 assert(!buf->next); 210 /* Only the first segment keeps headroom. */ 211 if (i % sges_n) 212 SET_DATA_OFF(buf, 0); 213 PORT(buf) = rxq_ctrl->rxq.port_id; 214 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 215 PKT_LEN(buf) = DATA_LEN(buf); 216 NB_SEGS(buf) = 1; 217 (*rxq_ctrl->rxq.elts)[i] = buf; 218 } 219 /* If Rx vector is activated. */ 220 if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { 221 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 222 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 223 int j; 224 225 /* Initialize default rearm_data for vPMD. */ 226 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 227 rte_mbuf_refcnt_set(mbuf_init, 1); 228 mbuf_init->nb_segs = 1; 229 mbuf_init->port = rxq->port_id; 230 /* 231 * prevent compiler reordering: 232 * rearm_data covers previous fields. 233 */ 234 rte_compiler_barrier(); 235 rxq->mbuf_initializer = 236 *(uint64_t *)&mbuf_init->rearm_data; 237 /* Padding with a fake mbuf for vectorized Rx. */ 238 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 239 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 240 } 241 DRV_LOG(DEBUG, 242 "port %u Rx queue %u allocated and configured %u segments" 243 " (max %u packets)", 244 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n, 245 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 246 return 0; 247 error: 248 err = rte_errno; /* Save rte_errno before cleanup. */ 249 elts_n = i; 250 for (i = 0; (i != elts_n); ++i) { 251 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 252 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 253 (*rxq_ctrl->rxq.elts)[i] = NULL; 254 } 255 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 256 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx); 257 rte_errno = err; /* Restore rte_errno. */ 258 return -rte_errno; 259 } 260 261 /** 262 * Allocate RX queue elements. 263 * 264 * @param rxq_ctrl 265 * Pointer to RX queue structure. 266 * 267 * @return 268 * 0 on success, errno value on failure. 269 */ 270 int 271 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 272 { 273 return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 274 rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl); 275 } 276 277 /** 278 * Free RX queue elements for Multi-Packet RQ. 279 * 280 * @param rxq_ctrl 281 * Pointer to RX queue structure. 282 */ 283 static void 284 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 285 { 286 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 287 uint16_t i; 288 289 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs", 290 rxq->port_id, rxq->idx); 291 if (rxq->mprq_bufs == NULL) 292 return; 293 assert(mlx5_rxq_check_vec_support(rxq) < 0); 294 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 295 if ((*rxq->mprq_bufs)[i] != NULL) 296 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 297 (*rxq->mprq_bufs)[i] = NULL; 298 } 299 if (rxq->mprq_repl != NULL) { 300 mlx5_mprq_buf_free(rxq->mprq_repl); 301 rxq->mprq_repl = NULL; 302 } 303 } 304 305 /** 306 * Free RX queue elements for Single-Packet RQ. 307 * 308 * @param rxq_ctrl 309 * Pointer to RX queue structure. 310 */ 311 static void 312 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 313 { 314 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 315 const uint16_t q_n = (1 << rxq->elts_n); 316 const uint16_t q_mask = q_n - 1; 317 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); 318 uint16_t i; 319 320 DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs", 321 PORT_ID(rxq_ctrl->priv), rxq->idx); 322 if (rxq->elts == NULL) 323 return; 324 /** 325 * Some mbuf in the Ring belongs to the application. They cannot be 326 * freed. 327 */ 328 if (mlx5_rxq_check_vec_support(rxq) > 0) { 329 for (i = 0; i < used; ++i) 330 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; 331 rxq->rq_pi = rxq->rq_ci; 332 } 333 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 334 if ((*rxq->elts)[i] != NULL) 335 rte_pktmbuf_free_seg((*rxq->elts)[i]); 336 (*rxq->elts)[i] = NULL; 337 } 338 } 339 340 /** 341 * Free RX queue elements. 342 * 343 * @param rxq_ctrl 344 * Pointer to RX queue structure. 345 */ 346 static void 347 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 348 { 349 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 350 rxq_free_elts_mprq(rxq_ctrl); 351 else 352 rxq_free_elts_sprq(rxq_ctrl); 353 } 354 355 /** 356 * Clean up a RX queue. 357 * 358 * Destroy objects, free allocated memory and reset the structure for reuse. 359 * 360 * @param rxq_ctrl 361 * Pointer to RX queue structure. 362 */ 363 void 364 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) 365 { 366 DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u", 367 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx); 368 if (rxq_ctrl->ibv) 369 mlx5_rxq_ibv_release(rxq_ctrl->ibv); 370 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); 371 } 372 373 /** 374 * Returns the per-queue supported offloads. 375 * 376 * @param dev 377 * Pointer to Ethernet device. 378 * 379 * @return 380 * Supported Rx offloads. 381 */ 382 uint64_t 383 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 384 { 385 struct mlx5_priv *priv = dev->data->dev_private; 386 struct mlx5_dev_config *config = &priv->config; 387 uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | 388 DEV_RX_OFFLOAD_TIMESTAMP | 389 DEV_RX_OFFLOAD_JUMBO_FRAME); 390 391 if (config->hw_fcs_strip) 392 offloads |= DEV_RX_OFFLOAD_KEEP_CRC; 393 394 if (config->hw_csum) 395 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | 396 DEV_RX_OFFLOAD_UDP_CKSUM | 397 DEV_RX_OFFLOAD_TCP_CKSUM); 398 if (config->hw_vlan_strip) 399 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 400 return offloads; 401 } 402 403 404 /** 405 * Returns the per-port supported offloads. 406 * 407 * @return 408 * Supported Rx offloads. 409 */ 410 uint64_t 411 mlx5_get_rx_port_offloads(void) 412 { 413 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 414 415 return offloads; 416 } 417 418 /** 419 * 420 * @param dev 421 * Pointer to Ethernet device structure. 422 * @param idx 423 * RX queue index. 424 * @param desc 425 * Number of descriptors to configure in queue. 426 * @param socket 427 * NUMA socket on which memory must be allocated. 428 * @param[in] conf 429 * Thresholds parameters. 430 * @param mp 431 * Memory pool for buffer allocations. 432 * 433 * @return 434 * 0 on success, a negative errno value otherwise and rte_errno is set. 435 */ 436 int 437 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 438 unsigned int socket, const struct rte_eth_rxconf *conf, 439 struct rte_mempool *mp) 440 { 441 struct mlx5_priv *priv = dev->data->dev_private; 442 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 443 struct mlx5_rxq_ctrl *rxq_ctrl = 444 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 445 446 if (!rte_is_power_of_2(desc)) { 447 desc = 1 << log2above(desc); 448 DRV_LOG(WARNING, 449 "port %u increased number of descriptors in Rx queue %u" 450 " to the next power of two (%d)", 451 dev->data->port_id, idx, desc); 452 } 453 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 454 dev->data->port_id, idx, desc); 455 if (idx >= priv->rxqs_n) { 456 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 457 dev->data->port_id, idx, priv->rxqs_n); 458 rte_errno = EOVERFLOW; 459 return -rte_errno; 460 } 461 if (!mlx5_rxq_releasable(dev, idx)) { 462 DRV_LOG(ERR, "port %u unable to release queue index %u", 463 dev->data->port_id, idx); 464 rte_errno = EBUSY; 465 return -rte_errno; 466 } 467 mlx5_rxq_release(dev, idx); 468 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp); 469 if (!rxq_ctrl) { 470 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 471 dev->data->port_id, idx); 472 rte_errno = ENOMEM; 473 return -rte_errno; 474 } 475 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 476 dev->data->port_id, idx); 477 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 478 return 0; 479 } 480 481 /** 482 * DPDK callback to release a RX queue. 483 * 484 * @param dpdk_rxq 485 * Generic RX queue pointer. 486 */ 487 void 488 mlx5_rx_queue_release(void *dpdk_rxq) 489 { 490 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq; 491 struct mlx5_rxq_ctrl *rxq_ctrl; 492 struct mlx5_priv *priv; 493 494 if (rxq == NULL) 495 return; 496 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); 497 priv = rxq_ctrl->priv; 498 if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx)) 499 rte_panic("port %u Rx queue %u is still used by a flow and" 500 " cannot be removed\n", 501 PORT_ID(priv), rxq->idx); 502 mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx); 503 } 504 505 /** 506 * Allocate queue vector and fill epoll fd list for Rx interrupts. 507 * 508 * @param dev 509 * Pointer to Ethernet device. 510 * 511 * @return 512 * 0 on success, a negative errno value otherwise and rte_errno is set. 513 */ 514 int 515 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 516 { 517 struct mlx5_priv *priv = dev->data->dev_private; 518 unsigned int i; 519 unsigned int rxqs_n = priv->rxqs_n; 520 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 521 unsigned int count = 0; 522 struct rte_intr_handle *intr_handle = dev->intr_handle; 523 524 if (!dev->data->dev_conf.intr_conf.rxq) 525 return 0; 526 mlx5_rx_intr_vec_disable(dev); 527 intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); 528 if (intr_handle->intr_vec == NULL) { 529 DRV_LOG(ERR, 530 "port %u failed to allocate memory for interrupt" 531 " vector, Rx interrupts will not be supported", 532 dev->data->port_id); 533 rte_errno = ENOMEM; 534 return -rte_errno; 535 } 536 intr_handle->type = RTE_INTR_HANDLE_EXT; 537 for (i = 0; i != n; ++i) { 538 /* This rxq ibv must not be released in this function. */ 539 struct mlx5_rxq_ibv *rxq_ibv = mlx5_rxq_ibv_get(dev, i); 540 int fd; 541 int flags; 542 int rc; 543 544 /* Skip queues that cannot request interrupts. */ 545 if (!rxq_ibv || !rxq_ibv->channel) { 546 /* Use invalid intr_vec[] index to disable entry. */ 547 intr_handle->intr_vec[i] = 548 RTE_INTR_VEC_RXTX_OFFSET + 549 RTE_MAX_RXTX_INTR_VEC_ID; 550 continue; 551 } 552 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 553 DRV_LOG(ERR, 554 "port %u too many Rx queues for interrupt" 555 " vector size (%d), Rx interrupts cannot be" 556 " enabled", 557 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 558 mlx5_rx_intr_vec_disable(dev); 559 rte_errno = ENOMEM; 560 return -rte_errno; 561 } 562 fd = rxq_ibv->channel->fd; 563 flags = fcntl(fd, F_GETFL); 564 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 565 if (rc < 0) { 566 rte_errno = errno; 567 DRV_LOG(ERR, 568 "port %u failed to make Rx interrupt file" 569 " descriptor %d non-blocking for queue index" 570 " %d", 571 dev->data->port_id, fd, i); 572 mlx5_rx_intr_vec_disable(dev); 573 return -rte_errno; 574 } 575 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; 576 intr_handle->efds[count] = fd; 577 count++; 578 } 579 if (!count) 580 mlx5_rx_intr_vec_disable(dev); 581 else 582 intr_handle->nb_efd = count; 583 return 0; 584 } 585 586 /** 587 * Clean up Rx interrupts handler. 588 * 589 * @param dev 590 * Pointer to Ethernet device. 591 */ 592 void 593 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 594 { 595 struct mlx5_priv *priv = dev->data->dev_private; 596 struct rte_intr_handle *intr_handle = dev->intr_handle; 597 unsigned int i; 598 unsigned int rxqs_n = priv->rxqs_n; 599 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 600 601 if (!dev->data->dev_conf.intr_conf.rxq) 602 return; 603 if (!intr_handle->intr_vec) 604 goto free; 605 for (i = 0; i != n; ++i) { 606 struct mlx5_rxq_ctrl *rxq_ctrl; 607 struct mlx5_rxq_data *rxq_data; 608 609 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + 610 RTE_MAX_RXTX_INTR_VEC_ID) 611 continue; 612 /** 613 * Need to access directly the queue to release the reference 614 * kept in priv_rx_intr_vec_enable(). 615 */ 616 rxq_data = (*priv->rxqs)[i]; 617 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 618 mlx5_rxq_ibv_release(rxq_ctrl->ibv); 619 } 620 free: 621 rte_intr_free_epoll_fd(intr_handle); 622 if (intr_handle->intr_vec) 623 free(intr_handle->intr_vec); 624 intr_handle->nb_efd = 0; 625 intr_handle->intr_vec = NULL; 626 } 627 628 /** 629 * MLX5 CQ notification . 630 * 631 * @param rxq 632 * Pointer to receive queue structure. 633 * @param sq_n_rxq 634 * Sequence number per receive queue . 635 */ 636 static inline void 637 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 638 { 639 int sq_n = 0; 640 uint32_t doorbell_hi; 641 uint64_t doorbell; 642 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL; 643 644 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 645 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 646 doorbell = (uint64_t)doorbell_hi << 32; 647 doorbell |= rxq->cqn; 648 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); 649 mlx5_uar_write64(rte_cpu_to_be_64(doorbell), 650 cq_db_reg, rxq->uar_lock_cq); 651 } 652 653 /** 654 * DPDK callback for Rx queue interrupt enable. 655 * 656 * @param dev 657 * Pointer to Ethernet device structure. 658 * @param rx_queue_id 659 * Rx queue number. 660 * 661 * @return 662 * 0 on success, a negative errno value otherwise and rte_errno is set. 663 */ 664 int 665 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 666 { 667 struct mlx5_priv *priv = dev->data->dev_private; 668 struct mlx5_rxq_data *rxq_data; 669 struct mlx5_rxq_ctrl *rxq_ctrl; 670 671 rxq_data = (*priv->rxqs)[rx_queue_id]; 672 if (!rxq_data) { 673 rte_errno = EINVAL; 674 return -rte_errno; 675 } 676 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 677 if (rxq_ctrl->irq) { 678 struct mlx5_rxq_ibv *rxq_ibv; 679 680 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 681 if (!rxq_ibv) { 682 rte_errno = EINVAL; 683 return -rte_errno; 684 } 685 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); 686 mlx5_rxq_ibv_release(rxq_ibv); 687 } 688 return 0; 689 } 690 691 /** 692 * DPDK callback for Rx queue interrupt disable. 693 * 694 * @param dev 695 * Pointer to Ethernet device structure. 696 * @param rx_queue_id 697 * Rx queue number. 698 * 699 * @return 700 * 0 on success, a negative errno value otherwise and rte_errno is set. 701 */ 702 int 703 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 704 { 705 struct mlx5_priv *priv = dev->data->dev_private; 706 struct mlx5_rxq_data *rxq_data; 707 struct mlx5_rxq_ctrl *rxq_ctrl; 708 struct mlx5_rxq_ibv *rxq_ibv = NULL; 709 struct ibv_cq *ev_cq; 710 void *ev_ctx; 711 int ret; 712 713 rxq_data = (*priv->rxqs)[rx_queue_id]; 714 if (!rxq_data) { 715 rte_errno = EINVAL; 716 return -rte_errno; 717 } 718 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 719 if (!rxq_ctrl->irq) 720 return 0; 721 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 722 if (!rxq_ibv) { 723 rte_errno = EINVAL; 724 return -rte_errno; 725 } 726 ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); 727 if (ret || ev_cq != rxq_ibv->cq) { 728 rte_errno = EINVAL; 729 goto exit; 730 } 731 rxq_data->cq_arm_sn++; 732 mlx5_glue->ack_cq_events(rxq_ibv->cq, 1); 733 return 0; 734 exit: 735 ret = rte_errno; /* Save rte_errno before cleanup. */ 736 if (rxq_ibv) 737 mlx5_rxq_ibv_release(rxq_ibv); 738 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 739 dev->data->port_id, rx_queue_id); 740 rte_errno = ret; /* Restore rte_errno. */ 741 return -rte_errno; 742 } 743 744 /** 745 * Create the Rx queue Verbs object. 746 * 747 * @param dev 748 * Pointer to Ethernet device. 749 * @param idx 750 * Queue index in DPDK Rx queue array 751 * 752 * @return 753 * The Verbs object initialised, NULL otherwise and rte_errno is set. 754 */ 755 struct mlx5_rxq_ibv * 756 mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) 757 { 758 struct mlx5_priv *priv = dev->data->dev_private; 759 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 760 struct mlx5_rxq_ctrl *rxq_ctrl = 761 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 762 struct ibv_wq_attr mod; 763 union { 764 struct { 765 struct ibv_cq_init_attr_ex ibv; 766 struct mlx5dv_cq_init_attr mlx5; 767 } cq; 768 struct { 769 struct ibv_wq_init_attr ibv; 770 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 771 struct mlx5dv_wq_init_attr mlx5; 772 #endif 773 } wq; 774 struct ibv_cq_ex cq_attr; 775 } attr; 776 unsigned int cqe_n; 777 unsigned int wqe_n = 1 << rxq_data->elts_n; 778 struct mlx5_rxq_ibv *tmpl; 779 struct mlx5dv_cq cq_info; 780 struct mlx5dv_rwq rwq; 781 unsigned int i; 782 int ret = 0; 783 struct mlx5dv_obj obj; 784 struct mlx5_dev_config *config = &priv->config; 785 const int mprq_en = mlx5_rxq_mprq_enabled(rxq_data); 786 787 assert(rxq_data); 788 assert(!rxq_ctrl->ibv); 789 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 790 priv->verbs_alloc_ctx.obj = rxq_ctrl; 791 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 792 rxq_ctrl->socket); 793 if (!tmpl) { 794 DRV_LOG(ERR, 795 "port %u Rx queue %u cannot allocate verbs resources", 796 dev->data->port_id, rxq_data->idx); 797 rte_errno = ENOMEM; 798 goto error; 799 } 800 tmpl->rxq_ctrl = rxq_ctrl; 801 if (rxq_ctrl->irq) { 802 tmpl->channel = mlx5_glue->create_comp_channel(priv->sh->ctx); 803 if (!tmpl->channel) { 804 DRV_LOG(ERR, "port %u: comp channel creation failure", 805 dev->data->port_id); 806 rte_errno = ENOMEM; 807 goto error; 808 } 809 } 810 if (mprq_en) 811 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1; 812 else 813 cqe_n = wqe_n - 1; 814 attr.cq.ibv = (struct ibv_cq_init_attr_ex){ 815 .cqe = cqe_n, 816 .channel = tmpl->channel, 817 .comp_mask = 0, 818 }; 819 attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){ 820 .comp_mask = 0, 821 }; 822 if (config->cqe_comp && !rxq_data->hw_timestamp) { 823 attr.cq.mlx5.comp_mask |= 824 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 825 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 826 attr.cq.mlx5.cqe_comp_res_format = 827 mprq_en ? MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 828 MLX5DV_CQE_RES_FORMAT_HASH; 829 #else 830 attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 831 #endif 832 /* 833 * For vectorized Rx, it must not be doubled in order to 834 * make cq_ci and rq_ci aligned. 835 */ 836 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 837 attr.cq.ibv.cqe *= 2; 838 } else if (config->cqe_comp && rxq_data->hw_timestamp) { 839 DRV_LOG(DEBUG, 840 "port %u Rx CQE compression is disabled for HW" 841 " timestamp", 842 dev->data->port_id); 843 } 844 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 845 if (config->cqe_pad) { 846 attr.cq.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; 847 attr.cq.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 848 } 849 #endif 850 tmpl->cq = mlx5_glue->cq_ex_to_cq 851 (mlx5_glue->dv_create_cq(priv->sh->ctx, &attr.cq.ibv, 852 &attr.cq.mlx5)); 853 if (tmpl->cq == NULL) { 854 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure", 855 dev->data->port_id, idx); 856 rte_errno = ENOMEM; 857 goto error; 858 } 859 DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d", 860 dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr); 861 DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d", 862 dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge); 863 attr.wq.ibv = (struct ibv_wq_init_attr){ 864 .wq_context = NULL, /* Could be useful in the future. */ 865 .wq_type = IBV_WQT_RQ, 866 /* Max number of outstanding WRs. */ 867 .max_wr = wqe_n >> rxq_data->sges_n, 868 /* Max number of scatter/gather elements in a WR. */ 869 .max_sge = 1 << rxq_data->sges_n, 870 .pd = priv->sh->pd, 871 .cq = tmpl->cq, 872 .comp_mask = 873 IBV_WQ_FLAGS_CVLAN_STRIPPING | 874 0, 875 .create_flags = (rxq_data->vlan_strip ? 876 IBV_WQ_FLAGS_CVLAN_STRIPPING : 877 0), 878 }; 879 /* By default, FCS (CRC) is stripped by hardware. */ 880 if (rxq_data->crc_present) { 881 attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 882 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 883 } 884 if (config->hw_padding) { 885 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 886 attr.wq.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 887 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 888 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 889 attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; 890 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 891 #endif 892 } 893 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 894 attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){ 895 .comp_mask = 0, 896 }; 897 if (mprq_en) { 898 struct mlx5dv_striding_rq_init_attr *mprq_attr = 899 &attr.wq.mlx5.striding_rq_attrs; 900 901 attr.wq.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 902 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 903 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 904 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 905 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 906 }; 907 } 908 tmpl->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &attr.wq.ibv, 909 &attr.wq.mlx5); 910 #else 911 tmpl->wq = mlx5_glue->create_wq(priv->sh->ctx, &attr.wq.ibv); 912 #endif 913 if (tmpl->wq == NULL) { 914 DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure", 915 dev->data->port_id, idx); 916 rte_errno = ENOMEM; 917 goto error; 918 } 919 /* 920 * Make sure number of WRs*SGEs match expectations since a queue 921 * cannot allocate more than "desc" buffers. 922 */ 923 if (attr.wq.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 924 attr.wq.ibv.max_sge != (1u << rxq_data->sges_n)) { 925 DRV_LOG(ERR, 926 "port %u Rx queue %u requested %u*%u but got %u*%u" 927 " WRs*SGEs", 928 dev->data->port_id, idx, 929 wqe_n >> rxq_data->sges_n, (1 << rxq_data->sges_n), 930 attr.wq.ibv.max_wr, attr.wq.ibv.max_sge); 931 rte_errno = EINVAL; 932 goto error; 933 } 934 /* Change queue state to ready. */ 935 mod = (struct ibv_wq_attr){ 936 .attr_mask = IBV_WQ_ATTR_STATE, 937 .wq_state = IBV_WQS_RDY, 938 }; 939 ret = mlx5_glue->modify_wq(tmpl->wq, &mod); 940 if (ret) { 941 DRV_LOG(ERR, 942 "port %u Rx queue %u WQ state to IBV_WQS_RDY failed", 943 dev->data->port_id, idx); 944 rte_errno = ret; 945 goto error; 946 } 947 obj.cq.in = tmpl->cq; 948 obj.cq.out = &cq_info; 949 obj.rwq.in = tmpl->wq; 950 obj.rwq.out = &rwq; 951 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); 952 if (ret) { 953 rte_errno = ret; 954 goto error; 955 } 956 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 957 DRV_LOG(ERR, 958 "port %u wrong MLX5_CQE_SIZE environment variable" 959 " value: it should be set to %u", 960 dev->data->port_id, RTE_CACHE_LINE_SIZE); 961 rte_errno = EINVAL; 962 goto error; 963 } 964 /* Fill the rings. */ 965 rxq_data->wqes = rwq.buf; 966 for (i = 0; (i != wqe_n); ++i) { 967 volatile struct mlx5_wqe_data_seg *scat; 968 uintptr_t addr; 969 uint32_t byte_count; 970 971 if (mprq_en) { 972 struct mlx5_mprq_buf *buf = (*rxq_data->mprq_bufs)[i]; 973 974 scat = &((volatile struct mlx5_wqe_mprq *) 975 rxq_data->wqes)[i].dseg; 976 addr = (uintptr_t)mlx5_mprq_buf_addr(buf); 977 byte_count = (1 << rxq_data->strd_sz_n) * 978 (1 << rxq_data->strd_num_n); 979 } else { 980 struct rte_mbuf *buf = (*rxq_data->elts)[i]; 981 982 scat = &((volatile struct mlx5_wqe_data_seg *) 983 rxq_data->wqes)[i]; 984 addr = rte_pktmbuf_mtod(buf, uintptr_t); 985 byte_count = DATA_LEN(buf); 986 } 987 /* scat->addr must be able to store a pointer. */ 988 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 989 *scat = (struct mlx5_wqe_data_seg){ 990 .addr = rte_cpu_to_be_64(addr), 991 .byte_count = rte_cpu_to_be_32(byte_count), 992 .lkey = mlx5_rx_addr2mr(rxq_data, addr), 993 }; 994 } 995 rxq_data->rq_db = rwq.dbrec; 996 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 997 rxq_data->cq_ci = 0; 998 rxq_data->consumed_strd = 0; 999 rxq_data->rq_pi = 0; 1000 rxq_data->zip = (struct rxq_zip){ 1001 .ai = 0, 1002 }; 1003 rxq_data->cq_db = cq_info.dbrec; 1004 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 1005 rxq_data->cq_uar = cq_info.cq_uar; 1006 rxq_data->cqn = cq_info.cqn; 1007 rxq_data->cq_arm_sn = 0; 1008 /* Update doorbell counter. */ 1009 rxq_data->rq_ci = wqe_n >> rxq_data->sges_n; 1010 rte_wmb(); 1011 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci); 1012 DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, 1013 idx, (void *)&tmpl); 1014 rte_atomic32_inc(&tmpl->refcnt); 1015 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); 1016 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1017 return tmpl; 1018 error: 1019 ret = rte_errno; /* Save rte_errno before cleanup. */ 1020 if (tmpl->wq) 1021 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 1022 if (tmpl->cq) 1023 claim_zero(mlx5_glue->destroy_cq(tmpl->cq)); 1024 if (tmpl->channel) 1025 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel)); 1026 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1027 rte_errno = ret; /* Restore rte_errno. */ 1028 return NULL; 1029 } 1030 1031 /** 1032 * Get an Rx queue Verbs object. 1033 * 1034 * @param dev 1035 * Pointer to Ethernet device. 1036 * @param idx 1037 * Queue index in DPDK Rx queue array 1038 * 1039 * @return 1040 * The Verbs object if it exists. 1041 */ 1042 struct mlx5_rxq_ibv * 1043 mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) 1044 { 1045 struct mlx5_priv *priv = dev->data->dev_private; 1046 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1047 struct mlx5_rxq_ctrl *rxq_ctrl; 1048 1049 if (idx >= priv->rxqs_n) 1050 return NULL; 1051 if (!rxq_data) 1052 return NULL; 1053 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1054 if (rxq_ctrl->ibv) { 1055 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); 1056 } 1057 return rxq_ctrl->ibv; 1058 } 1059 1060 /** 1061 * Release an Rx verbs queue object. 1062 * 1063 * @param rxq_ibv 1064 * Verbs Rx queue object. 1065 * 1066 * @return 1067 * 1 while a reference on it exists, 0 when freed. 1068 */ 1069 int 1070 mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv) 1071 { 1072 assert(rxq_ibv); 1073 assert(rxq_ibv->wq); 1074 assert(rxq_ibv->cq); 1075 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { 1076 rxq_free_elts(rxq_ibv->rxq_ctrl); 1077 claim_zero(mlx5_glue->destroy_wq(rxq_ibv->wq)); 1078 claim_zero(mlx5_glue->destroy_cq(rxq_ibv->cq)); 1079 if (rxq_ibv->channel) 1080 claim_zero(mlx5_glue->destroy_comp_channel 1081 (rxq_ibv->channel)); 1082 LIST_REMOVE(rxq_ibv, next); 1083 rte_free(rxq_ibv); 1084 return 0; 1085 } 1086 return 1; 1087 } 1088 1089 /** 1090 * Verify the Verbs Rx queue list is empty 1091 * 1092 * @param dev 1093 * Pointer to Ethernet device. 1094 * 1095 * @return 1096 * The number of object not released. 1097 */ 1098 int 1099 mlx5_rxq_ibv_verify(struct rte_eth_dev *dev) 1100 { 1101 struct mlx5_priv *priv = dev->data->dev_private; 1102 int ret = 0; 1103 struct mlx5_rxq_ibv *rxq_ibv; 1104 1105 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) { 1106 DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced", 1107 dev->data->port_id, rxq_ibv->rxq_ctrl->rxq.idx); 1108 ++ret; 1109 } 1110 return ret; 1111 } 1112 1113 /** 1114 * Return true if a single reference exists on the object. 1115 * 1116 * @param rxq_ibv 1117 * Verbs Rx queue object. 1118 */ 1119 int 1120 mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv) 1121 { 1122 assert(rxq_ibv); 1123 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1); 1124 } 1125 1126 /** 1127 * Callback function to initialize mbufs for Multi-Packet RQ. 1128 */ 1129 static inline void 1130 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg __rte_unused, 1131 void *_m, unsigned int i __rte_unused) 1132 { 1133 struct mlx5_mprq_buf *buf = _m; 1134 1135 memset(_m, 0, sizeof(*buf)); 1136 buf->mp = mp; 1137 rte_atomic16_set(&buf->refcnt, 1); 1138 } 1139 1140 /** 1141 * Free mempool of Multi-Packet RQ. 1142 * 1143 * @param dev 1144 * Pointer to Ethernet device. 1145 * 1146 * @return 1147 * 0 on success, negative errno value on failure. 1148 */ 1149 int 1150 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1151 { 1152 struct mlx5_priv *priv = dev->data->dev_private; 1153 struct rte_mempool *mp = priv->mprq_mp; 1154 unsigned int i; 1155 1156 if (mp == NULL) 1157 return 0; 1158 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1159 dev->data->port_id, mp->name); 1160 /* 1161 * If a buffer in the pool has been externally attached to a mbuf and it 1162 * is still in use by application, destroying the Rx qeueue can spoil 1163 * the packet. It is unlikely to happen but if application dynamically 1164 * creates and destroys with holding Rx packets, this can happen. 1165 * 1166 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1167 * RQ isn't provided by application but managed by PMD. 1168 */ 1169 if (!rte_mempool_full(mp)) { 1170 DRV_LOG(ERR, 1171 "port %u mempool for Multi-Packet RQ is still in use", 1172 dev->data->port_id); 1173 rte_errno = EBUSY; 1174 return -rte_errno; 1175 } 1176 rte_mempool_free(mp); 1177 /* Unset mempool for each Rx queue. */ 1178 for (i = 0; i != priv->rxqs_n; ++i) { 1179 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1180 1181 if (rxq == NULL) 1182 continue; 1183 rxq->mprq_mp = NULL; 1184 } 1185 priv->mprq_mp = NULL; 1186 return 0; 1187 } 1188 1189 /** 1190 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1191 * mempool. If already allocated, reuse it if there're enough elements. 1192 * Otherwise, resize it. 1193 * 1194 * @param dev 1195 * Pointer to Ethernet device. 1196 * 1197 * @return 1198 * 0 on success, negative errno value on failure. 1199 */ 1200 int 1201 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1202 { 1203 struct mlx5_priv *priv = dev->data->dev_private; 1204 struct rte_mempool *mp = priv->mprq_mp; 1205 char name[RTE_MEMPOOL_NAMESIZE]; 1206 unsigned int desc = 0; 1207 unsigned int buf_len; 1208 unsigned int obj_num; 1209 unsigned int obj_size; 1210 unsigned int strd_num_n = 0; 1211 unsigned int strd_sz_n = 0; 1212 unsigned int i; 1213 1214 if (!mlx5_mprq_enabled(dev)) 1215 return 0; 1216 /* Count the total number of descriptors configured. */ 1217 for (i = 0; i != priv->rxqs_n; ++i) { 1218 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1219 1220 if (rxq == NULL) 1221 continue; 1222 desc += 1 << rxq->elts_n; 1223 /* Get the max number of strides. */ 1224 if (strd_num_n < rxq->strd_num_n) 1225 strd_num_n = rxq->strd_num_n; 1226 /* Get the max size of a stride. */ 1227 if (strd_sz_n < rxq->strd_sz_n) 1228 strd_sz_n = rxq->strd_sz_n; 1229 } 1230 assert(strd_num_n && strd_sz_n); 1231 buf_len = (1 << strd_num_n) * (1 << strd_sz_n); 1232 obj_size = buf_len + sizeof(struct mlx5_mprq_buf); 1233 /* 1234 * Received packets can be either memcpy'd or externally referenced. In 1235 * case that the packet is attached to an mbuf as an external buffer, as 1236 * it isn't possible to predict how the buffers will be queued by 1237 * application, there's no option to exactly pre-allocate needed buffers 1238 * in advance but to speculatively prepares enough buffers. 1239 * 1240 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1241 * received packets to buffers provided by application (rxq->mp) until 1242 * this Mempool gets available again. 1243 */ 1244 desc *= 4; 1245 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * priv->rxqs_n; 1246 /* 1247 * rte_mempool_create_empty() has sanity check to refuse large cache 1248 * size compared to the number of elements. 1249 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a 1250 * constant number 2 instead. 1251 */ 1252 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1253 /* Check a mempool is already allocated and if it can be resued. */ 1254 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1255 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1256 dev->data->port_id, mp->name); 1257 /* Reuse. */ 1258 goto exit; 1259 } else if (mp != NULL) { 1260 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1261 dev->data->port_id, mp->name); 1262 /* 1263 * If failed to free, which means it may be still in use, no way 1264 * but to keep using the existing one. On buffer underrun, 1265 * packets will be memcpy'd instead of external buffer 1266 * attachment. 1267 */ 1268 if (mlx5_mprq_free_mp(dev)) { 1269 if (mp->elt_size >= obj_size) 1270 goto exit; 1271 else 1272 return -rte_errno; 1273 } 1274 } 1275 snprintf(name, sizeof(name), "%s-mprq", dev->device->name); 1276 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1277 0, NULL, NULL, mlx5_mprq_buf_init, NULL, 1278 dev->device->numa_node, 0); 1279 if (mp == NULL) { 1280 DRV_LOG(ERR, 1281 "port %u failed to allocate a mempool for" 1282 " Multi-Packet RQ, count=%u, size=%u", 1283 dev->data->port_id, obj_num, obj_size); 1284 rte_errno = ENOMEM; 1285 return -rte_errno; 1286 } 1287 priv->mprq_mp = mp; 1288 exit: 1289 /* Set mempool for each Rx queue. */ 1290 for (i = 0; i != priv->rxqs_n; ++i) { 1291 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1292 1293 if (rxq == NULL) 1294 continue; 1295 rxq->mprq_mp = mp; 1296 } 1297 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1298 dev->data->port_id); 1299 return 0; 1300 } 1301 1302 /** 1303 * Create a DPDK Rx queue. 1304 * 1305 * @param dev 1306 * Pointer to Ethernet device. 1307 * @param idx 1308 * RX queue index. 1309 * @param desc 1310 * Number of descriptors to configure in queue. 1311 * @param socket 1312 * NUMA socket on which memory must be allocated. 1313 * 1314 * @return 1315 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1316 */ 1317 struct mlx5_rxq_ctrl * 1318 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1319 unsigned int socket, const struct rte_eth_rxconf *conf, 1320 struct rte_mempool *mp) 1321 { 1322 struct mlx5_priv *priv = dev->data->dev_private; 1323 struct mlx5_rxq_ctrl *tmpl; 1324 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 1325 unsigned int mprq_stride_size; 1326 struct mlx5_dev_config *config = &priv->config; 1327 /* 1328 * Always allocate extra slots, even if eventually 1329 * the vector Rx will not be used. 1330 */ 1331 uint16_t desc_n = 1332 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1333 uint64_t offloads = conf->offloads | 1334 dev->data->dev_conf.rxmode.offloads; 1335 const int mprq_en = mlx5_check_mprq_support(dev) > 0; 1336 1337 tmpl = rte_calloc_socket("RXQ", 1, 1338 sizeof(*tmpl) + 1339 desc_n * sizeof(struct rte_mbuf *), 1340 0, socket); 1341 if (!tmpl) { 1342 rte_errno = ENOMEM; 1343 return NULL; 1344 } 1345 if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh, 1346 MLX5_MR_BTREE_CACHE_N, socket)) { 1347 /* rte_errno is already set. */ 1348 goto error; 1349 } 1350 tmpl->socket = socket; 1351 if (dev->data->dev_conf.intr_conf.rxq) 1352 tmpl->irq = 1; 1353 /* 1354 * This Rx queue can be configured as a Multi-Packet RQ if all of the 1355 * following conditions are met: 1356 * - MPRQ is enabled. 1357 * - The number of descs is more than the number of strides. 1358 * - max_rx_pkt_len plus overhead is less than the max size of a 1359 * stride. 1360 * Otherwise, enable Rx scatter if necessary. 1361 */ 1362 assert(mb_len >= RTE_PKTMBUF_HEADROOM); 1363 mprq_stride_size = 1364 dev->data->dev_conf.rxmode.max_rx_pkt_len + 1365 sizeof(struct rte_mbuf_ext_shared_info) + 1366 RTE_PKTMBUF_HEADROOM; 1367 if (mprq_en && 1368 desc > (1U << config->mprq.stride_num_n) && 1369 mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) { 1370 /* TODO: Rx scatter isn't supported yet. */ 1371 tmpl->rxq.sges_n = 0; 1372 /* Trim the number of descs needed. */ 1373 desc >>= config->mprq.stride_num_n; 1374 tmpl->rxq.strd_num_n = config->mprq.stride_num_n; 1375 tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size), 1376 config->mprq.min_stride_size_n); 1377 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1378 tmpl->rxq.mprq_max_memcpy_len = 1379 RTE_MIN(mb_len - RTE_PKTMBUF_HEADROOM, 1380 config->mprq.max_memcpy_len); 1381 DRV_LOG(DEBUG, 1382 "port %u Rx queue %u: Multi-Packet RQ is enabled" 1383 " strd_num_n = %u, strd_sz_n = %u", 1384 dev->data->port_id, idx, 1385 tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n); 1386 } else if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= 1387 (mb_len - RTE_PKTMBUF_HEADROOM)) { 1388 tmpl->rxq.sges_n = 0; 1389 } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { 1390 unsigned int size = 1391 RTE_PKTMBUF_HEADROOM + 1392 dev->data->dev_conf.rxmode.max_rx_pkt_len; 1393 unsigned int sges_n; 1394 1395 /* 1396 * Determine the number of SGEs needed for a full packet 1397 * and round it to the next power of two. 1398 */ 1399 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 1400 tmpl->rxq.sges_n = sges_n; 1401 /* Make sure rxq.sges_n did not overflow. */ 1402 size = mb_len * (1 << tmpl->rxq.sges_n); 1403 size -= RTE_PKTMBUF_HEADROOM; 1404 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { 1405 DRV_LOG(ERR, 1406 "port %u too many SGEs (%u) needed to handle" 1407 " requested maximum packet size %u", 1408 dev->data->port_id, 1409 1 << sges_n, 1410 dev->data->dev_conf.rxmode.max_rx_pkt_len); 1411 rte_errno = EOVERFLOW; 1412 goto error; 1413 } 1414 } else { 1415 DRV_LOG(WARNING, 1416 "port %u the requested maximum Rx packet size (%u) is" 1417 " larger than a single mbuf (%u) and scattered mode has" 1418 " not been requested", 1419 dev->data->port_id, 1420 dev->data->dev_conf.rxmode.max_rx_pkt_len, 1421 mb_len - RTE_PKTMBUF_HEADROOM); 1422 } 1423 if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) 1424 DRV_LOG(WARNING, 1425 "port %u MPRQ is requested but cannot be enabled" 1426 " (requested: desc = %u, stride_sz = %u," 1427 " supported: min_stride_num = %u, max_stride_sz = %u).", 1428 dev->data->port_id, desc, mprq_stride_size, 1429 (1 << config->mprq.stride_num_n), 1430 (1 << config->mprq.max_stride_size_n)); 1431 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1432 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1433 if (desc % (1 << tmpl->rxq.sges_n)) { 1434 DRV_LOG(ERR, 1435 "port %u number of Rx queue descriptors (%u) is not a" 1436 " multiple of SGEs per packet (%u)", 1437 dev->data->port_id, 1438 desc, 1439 1 << tmpl->rxq.sges_n); 1440 rte_errno = EINVAL; 1441 goto error; 1442 } 1443 /* Toggle RX checksum offload if hardware supports it. */ 1444 tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM); 1445 tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP); 1446 /* Configure VLAN stripping. */ 1447 tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1448 /* By default, FCS (CRC) is stripped by hardware. */ 1449 tmpl->rxq.crc_present = 0; 1450 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { 1451 if (config->hw_fcs_strip) { 1452 tmpl->rxq.crc_present = 1; 1453 } else { 1454 DRV_LOG(WARNING, 1455 "port %u CRC stripping has been disabled but will" 1456 " still be performed by hardware, make sure MLNX_OFED" 1457 " and firmware are up to date", 1458 dev->data->port_id); 1459 } 1460 } 1461 DRV_LOG(DEBUG, 1462 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1463 " incoming frames to hide it", 1464 dev->data->port_id, 1465 tmpl->rxq.crc_present ? "disabled" : "enabled", 1466 tmpl->rxq.crc_present << 2); 1467 /* Save port ID. */ 1468 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1469 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); 1470 tmpl->rxq.port_id = dev->data->port_id; 1471 tmpl->priv = priv; 1472 tmpl->rxq.mp = mp; 1473 tmpl->rxq.elts_n = log2above(desc); 1474 tmpl->rxq.rq_repl_thresh = 1475 MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n); 1476 tmpl->rxq.elts = 1477 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); 1478 #ifndef RTE_ARCH_64 1479 tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq; 1480 #endif 1481 tmpl->rxq.idx = idx; 1482 rte_atomic32_inc(&tmpl->refcnt); 1483 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1484 return tmpl; 1485 error: 1486 rte_free(tmpl); 1487 return NULL; 1488 } 1489 1490 /** 1491 * Get a Rx queue. 1492 * 1493 * @param dev 1494 * Pointer to Ethernet device. 1495 * @param idx 1496 * TX queue index. 1497 * 1498 * @return 1499 * A pointer to the queue if it exists, NULL otherwise. 1500 */ 1501 struct mlx5_rxq_ctrl * 1502 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 1503 { 1504 struct mlx5_priv *priv = dev->data->dev_private; 1505 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 1506 1507 if ((*priv->rxqs)[idx]) { 1508 rxq_ctrl = container_of((*priv->rxqs)[idx], 1509 struct mlx5_rxq_ctrl, 1510 rxq); 1511 mlx5_rxq_ibv_get(dev, idx); 1512 rte_atomic32_inc(&rxq_ctrl->refcnt); 1513 } 1514 return rxq_ctrl; 1515 } 1516 1517 /** 1518 * Release a Rx queue. 1519 * 1520 * @param dev 1521 * Pointer to Ethernet device. 1522 * @param idx 1523 * TX queue index. 1524 * 1525 * @return 1526 * 1 while a reference on it exists, 0 when freed. 1527 */ 1528 int 1529 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 1530 { 1531 struct mlx5_priv *priv = dev->data->dev_private; 1532 struct mlx5_rxq_ctrl *rxq_ctrl; 1533 1534 if (!(*priv->rxqs)[idx]) 1535 return 0; 1536 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1537 assert(rxq_ctrl->priv); 1538 if (rxq_ctrl->ibv && !mlx5_rxq_ibv_release(rxq_ctrl->ibv)) 1539 rxq_ctrl->ibv = NULL; 1540 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { 1541 mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); 1542 LIST_REMOVE(rxq_ctrl, next); 1543 rte_free(rxq_ctrl); 1544 (*priv->rxqs)[idx] = NULL; 1545 return 0; 1546 } 1547 return 1; 1548 } 1549 1550 /** 1551 * Verify if the queue can be released. 1552 * 1553 * @param dev 1554 * Pointer to Ethernet device. 1555 * @param idx 1556 * TX queue index. 1557 * 1558 * @return 1559 * 1 if the queue can be released, negative errno otherwise and rte_errno is 1560 * set. 1561 */ 1562 int 1563 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1564 { 1565 struct mlx5_priv *priv = dev->data->dev_private; 1566 struct mlx5_rxq_ctrl *rxq_ctrl; 1567 1568 if (!(*priv->rxqs)[idx]) { 1569 rte_errno = EINVAL; 1570 return -rte_errno; 1571 } 1572 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1573 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); 1574 } 1575 1576 /** 1577 * Verify the Rx Queue list is empty 1578 * 1579 * @param dev 1580 * Pointer to Ethernet device. 1581 * 1582 * @return 1583 * The number of object not released. 1584 */ 1585 int 1586 mlx5_rxq_verify(struct rte_eth_dev *dev) 1587 { 1588 struct mlx5_priv *priv = dev->data->dev_private; 1589 struct mlx5_rxq_ctrl *rxq_ctrl; 1590 int ret = 0; 1591 1592 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 1593 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 1594 dev->data->port_id, rxq_ctrl->rxq.idx); 1595 ++ret; 1596 } 1597 return ret; 1598 } 1599 1600 /** 1601 * Create an indirection table. 1602 * 1603 * @param dev 1604 * Pointer to Ethernet device. 1605 * @param queues 1606 * Queues entering in the indirection table. 1607 * @param queues_n 1608 * Number of queues in the array. 1609 * 1610 * @return 1611 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1612 */ 1613 struct mlx5_ind_table_ibv * 1614 mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, const uint16_t *queues, 1615 uint32_t queues_n) 1616 { 1617 struct mlx5_priv *priv = dev->data->dev_private; 1618 struct mlx5_ind_table_ibv *ind_tbl; 1619 const unsigned int wq_n = rte_is_power_of_2(queues_n) ? 1620 log2above(queues_n) : 1621 log2above(priv->config.ind_table_max_size); 1622 struct ibv_wq *wq[1 << wq_n]; 1623 unsigned int i; 1624 unsigned int j; 1625 1626 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) + 1627 queues_n * sizeof(uint16_t), 0); 1628 if (!ind_tbl) { 1629 rte_errno = ENOMEM; 1630 return NULL; 1631 } 1632 for (i = 0; i != queues_n; ++i) { 1633 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]); 1634 1635 if (!rxq) 1636 goto error; 1637 wq[i] = rxq->ibv->wq; 1638 ind_tbl->queues[i] = queues[i]; 1639 } 1640 ind_tbl->queues_n = queues_n; 1641 /* Finalise indirection table. */ 1642 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j) 1643 wq[i] = wq[j]; 1644 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table 1645 (priv->sh->ctx, 1646 &(struct ibv_rwq_ind_table_init_attr){ 1647 .log_ind_tbl_size = wq_n, 1648 .ind_tbl = wq, 1649 .comp_mask = 0, 1650 }); 1651 if (!ind_tbl->ind_table) { 1652 rte_errno = errno; 1653 goto error; 1654 } 1655 rte_atomic32_inc(&ind_tbl->refcnt); 1656 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 1657 return ind_tbl; 1658 error: 1659 rte_free(ind_tbl); 1660 DEBUG("port %u cannot create indirection table", dev->data->port_id); 1661 return NULL; 1662 } 1663 1664 /** 1665 * Get an indirection table. 1666 * 1667 * @param dev 1668 * Pointer to Ethernet device. 1669 * @param queues 1670 * Queues entering in the indirection table. 1671 * @param queues_n 1672 * Number of queues in the array. 1673 * 1674 * @return 1675 * An indirection table if found. 1676 */ 1677 struct mlx5_ind_table_ibv * 1678 mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, const uint16_t *queues, 1679 uint32_t queues_n) 1680 { 1681 struct mlx5_priv *priv = dev->data->dev_private; 1682 struct mlx5_ind_table_ibv *ind_tbl; 1683 1684 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1685 if ((ind_tbl->queues_n == queues_n) && 1686 (memcmp(ind_tbl->queues, queues, 1687 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 1688 == 0)) 1689 break; 1690 } 1691 if (ind_tbl) { 1692 unsigned int i; 1693 1694 rte_atomic32_inc(&ind_tbl->refcnt); 1695 for (i = 0; i != ind_tbl->queues_n; ++i) 1696 mlx5_rxq_get(dev, ind_tbl->queues[i]); 1697 } 1698 return ind_tbl; 1699 } 1700 1701 /** 1702 * Release an indirection table. 1703 * 1704 * @param dev 1705 * Pointer to Ethernet device. 1706 * @param ind_table 1707 * Indirection table to release. 1708 * 1709 * @return 1710 * 1 while a reference on it exists, 0 when freed. 1711 */ 1712 int 1713 mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, 1714 struct mlx5_ind_table_ibv *ind_tbl) 1715 { 1716 unsigned int i; 1717 1718 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) 1719 claim_zero(mlx5_glue->destroy_rwq_ind_table 1720 (ind_tbl->ind_table)); 1721 for (i = 0; i != ind_tbl->queues_n; ++i) 1722 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i])); 1723 if (!rte_atomic32_read(&ind_tbl->refcnt)) { 1724 LIST_REMOVE(ind_tbl, next); 1725 rte_free(ind_tbl); 1726 return 0; 1727 } 1728 return 1; 1729 } 1730 1731 /** 1732 * Verify the Rx Queue list is empty 1733 * 1734 * @param dev 1735 * Pointer to Ethernet device. 1736 * 1737 * @return 1738 * The number of object not released. 1739 */ 1740 int 1741 mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev) 1742 { 1743 struct mlx5_priv *priv = dev->data->dev_private; 1744 struct mlx5_ind_table_ibv *ind_tbl; 1745 int ret = 0; 1746 1747 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1748 DRV_LOG(DEBUG, 1749 "port %u Verbs indirection table %p still referenced", 1750 dev->data->port_id, (void *)ind_tbl); 1751 ++ret; 1752 } 1753 return ret; 1754 } 1755 1756 /** 1757 * Create an Rx Hash queue. 1758 * 1759 * @param dev 1760 * Pointer to Ethernet device. 1761 * @param rss_key 1762 * RSS key for the Rx hash queue. 1763 * @param rss_key_len 1764 * RSS key length. 1765 * @param hash_fields 1766 * Verbs protocol hash field to make the RSS on. 1767 * @param queues 1768 * Queues entering in hash queue. In case of empty hash_fields only the 1769 * first queue index will be taken for the indirection table. 1770 * @param queues_n 1771 * Number of queues. 1772 * @param tunnel 1773 * Tunnel type. 1774 * 1775 * @return 1776 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1777 */ 1778 struct mlx5_hrxq * 1779 mlx5_hrxq_new(struct rte_eth_dev *dev, 1780 const uint8_t *rss_key, uint32_t rss_key_len, 1781 uint64_t hash_fields, 1782 const uint16_t *queues, uint32_t queues_n, 1783 int tunnel __rte_unused) 1784 { 1785 struct mlx5_priv *priv = dev->data->dev_private; 1786 struct mlx5_hrxq *hrxq; 1787 struct mlx5_ind_table_ibv *ind_tbl; 1788 struct ibv_qp *qp; 1789 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1790 struct mlx5dv_qp_init_attr qp_init_attr; 1791 #endif 1792 int err; 1793 1794 queues_n = hash_fields ? queues_n : 1; 1795 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1796 if (!ind_tbl) 1797 ind_tbl = mlx5_ind_table_ibv_new(dev, queues, queues_n); 1798 if (!ind_tbl) { 1799 rte_errno = ENOMEM; 1800 return NULL; 1801 } 1802 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1803 memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 1804 if (tunnel) { 1805 qp_init_attr.comp_mask = 1806 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 1807 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; 1808 } 1809 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 1810 if (dev->data->dev_conf.lpbk_mode) { 1811 /* Allow packet sent from NIC loop back w/o source MAC check. */ 1812 qp_init_attr.comp_mask |= 1813 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 1814 qp_init_attr.create_flags |= 1815 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; 1816 } 1817 #endif 1818 qp = mlx5_glue->dv_create_qp 1819 (priv->sh->ctx, 1820 &(struct ibv_qp_init_attr_ex){ 1821 .qp_type = IBV_QPT_RAW_PACKET, 1822 .comp_mask = 1823 IBV_QP_INIT_ATTR_PD | 1824 IBV_QP_INIT_ATTR_IND_TABLE | 1825 IBV_QP_INIT_ATTR_RX_HASH, 1826 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1827 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1828 .rx_hash_key_len = rss_key_len, 1829 .rx_hash_key = (void *)(uintptr_t)rss_key, 1830 .rx_hash_fields_mask = hash_fields, 1831 }, 1832 .rwq_ind_tbl = ind_tbl->ind_table, 1833 .pd = priv->sh->pd, 1834 }, 1835 &qp_init_attr); 1836 #else 1837 qp = mlx5_glue->create_qp_ex 1838 (priv->sh->ctx, 1839 &(struct ibv_qp_init_attr_ex){ 1840 .qp_type = IBV_QPT_RAW_PACKET, 1841 .comp_mask = 1842 IBV_QP_INIT_ATTR_PD | 1843 IBV_QP_INIT_ATTR_IND_TABLE | 1844 IBV_QP_INIT_ATTR_RX_HASH, 1845 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1846 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1847 .rx_hash_key_len = rss_key_len, 1848 .rx_hash_key = (void *)(uintptr_t)rss_key, 1849 .rx_hash_fields_mask = hash_fields, 1850 }, 1851 .rwq_ind_tbl = ind_tbl->ind_table, 1852 .pd = priv->sh->pd, 1853 }); 1854 #endif 1855 if (!qp) { 1856 rte_errno = errno; 1857 goto error; 1858 } 1859 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); 1860 if (!hrxq) 1861 goto error; 1862 hrxq->ind_table = ind_tbl; 1863 hrxq->qp = qp; 1864 hrxq->rss_key_len = rss_key_len; 1865 hrxq->hash_fields = hash_fields; 1866 memcpy(hrxq->rss_key, rss_key, rss_key_len); 1867 rte_atomic32_inc(&hrxq->refcnt); 1868 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); 1869 return hrxq; 1870 error: 1871 err = rte_errno; /* Save rte_errno before cleanup. */ 1872 mlx5_ind_table_ibv_release(dev, ind_tbl); 1873 if (qp) 1874 claim_zero(mlx5_glue->destroy_qp(qp)); 1875 rte_errno = err; /* Restore rte_errno. */ 1876 return NULL; 1877 } 1878 1879 /** 1880 * Get an Rx Hash queue. 1881 * 1882 * @param dev 1883 * Pointer to Ethernet device. 1884 * @param rss_conf 1885 * RSS configuration for the Rx hash queue. 1886 * @param queues 1887 * Queues entering in hash queue. In case of empty hash_fields only the 1888 * first queue index will be taken for the indirection table. 1889 * @param queues_n 1890 * Number of queues. 1891 * 1892 * @return 1893 * An hash Rx queue on success. 1894 */ 1895 struct mlx5_hrxq * 1896 mlx5_hrxq_get(struct rte_eth_dev *dev, 1897 const uint8_t *rss_key, uint32_t rss_key_len, 1898 uint64_t hash_fields, 1899 const uint16_t *queues, uint32_t queues_n) 1900 { 1901 struct mlx5_priv *priv = dev->data->dev_private; 1902 struct mlx5_hrxq *hrxq; 1903 1904 queues_n = hash_fields ? queues_n : 1; 1905 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1906 struct mlx5_ind_table_ibv *ind_tbl; 1907 1908 if (hrxq->rss_key_len != rss_key_len) 1909 continue; 1910 if (memcmp(hrxq->rss_key, rss_key, rss_key_len)) 1911 continue; 1912 if (hrxq->hash_fields != hash_fields) 1913 continue; 1914 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1915 if (!ind_tbl) 1916 continue; 1917 if (ind_tbl != hrxq->ind_table) { 1918 mlx5_ind_table_ibv_release(dev, ind_tbl); 1919 continue; 1920 } 1921 rte_atomic32_inc(&hrxq->refcnt); 1922 return hrxq; 1923 } 1924 return NULL; 1925 } 1926 1927 /** 1928 * Release the hash Rx queue. 1929 * 1930 * @param dev 1931 * Pointer to Ethernet device. 1932 * @param hrxq 1933 * Pointer to Hash Rx queue to release. 1934 * 1935 * @return 1936 * 1 while a reference on it exists, 0 when freed. 1937 */ 1938 int 1939 mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 1940 { 1941 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 1942 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 1943 mlx5_ind_table_ibv_release(dev, hrxq->ind_table); 1944 LIST_REMOVE(hrxq, next); 1945 rte_free(hrxq); 1946 return 0; 1947 } 1948 claim_nonzero(mlx5_ind_table_ibv_release(dev, hrxq->ind_table)); 1949 return 1; 1950 } 1951 1952 /** 1953 * Verify the Rx Queue list is empty 1954 * 1955 * @param dev 1956 * Pointer to Ethernet device. 1957 * 1958 * @return 1959 * The number of object not released. 1960 */ 1961 int 1962 mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev) 1963 { 1964 struct mlx5_priv *priv = dev->data->dev_private; 1965 struct mlx5_hrxq *hrxq; 1966 int ret = 0; 1967 1968 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1969 DRV_LOG(DEBUG, 1970 "port %u Verbs hash Rx queue %p still referenced", 1971 dev->data->port_id, (void *)hrxq); 1972 ++ret; 1973 } 1974 return ret; 1975 } 1976 1977 /** 1978 * Create a drop Rx queue Verbs object. 1979 * 1980 * @param dev 1981 * Pointer to Ethernet device. 1982 * 1983 * @return 1984 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1985 */ 1986 struct mlx5_rxq_ibv * 1987 mlx5_rxq_ibv_drop_new(struct rte_eth_dev *dev) 1988 { 1989 struct mlx5_priv *priv = dev->data->dev_private; 1990 struct ibv_context *ctx = priv->sh->ctx; 1991 struct ibv_cq *cq; 1992 struct ibv_wq *wq = NULL; 1993 struct mlx5_rxq_ibv *rxq; 1994 1995 if (priv->drop_queue.rxq) 1996 return priv->drop_queue.rxq; 1997 cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); 1998 if (!cq) { 1999 DEBUG("port %u cannot allocate CQ for drop queue", 2000 dev->data->port_id); 2001 rte_errno = errno; 2002 goto error; 2003 } 2004 wq = mlx5_glue->create_wq(ctx, 2005 &(struct ibv_wq_init_attr){ 2006 .wq_type = IBV_WQT_RQ, 2007 .max_wr = 1, 2008 .max_sge = 1, 2009 .pd = priv->sh->pd, 2010 .cq = cq, 2011 }); 2012 if (!wq) { 2013 DEBUG("port %u cannot allocate WQ for drop queue", 2014 dev->data->port_id); 2015 rte_errno = errno; 2016 goto error; 2017 } 2018 rxq = rte_calloc(__func__, 1, sizeof(*rxq), 0); 2019 if (!rxq) { 2020 DEBUG("port %u cannot allocate drop Rx queue memory", 2021 dev->data->port_id); 2022 rte_errno = ENOMEM; 2023 goto error; 2024 } 2025 rxq->cq = cq; 2026 rxq->wq = wq; 2027 priv->drop_queue.rxq = rxq; 2028 return rxq; 2029 error: 2030 if (wq) 2031 claim_zero(mlx5_glue->destroy_wq(wq)); 2032 if (cq) 2033 claim_zero(mlx5_glue->destroy_cq(cq)); 2034 return NULL; 2035 } 2036 2037 /** 2038 * Release a drop Rx queue Verbs object. 2039 * 2040 * @param dev 2041 * Pointer to Ethernet device. 2042 * 2043 * @return 2044 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2045 */ 2046 void 2047 mlx5_rxq_ibv_drop_release(struct rte_eth_dev *dev) 2048 { 2049 struct mlx5_priv *priv = dev->data->dev_private; 2050 struct mlx5_rxq_ibv *rxq = priv->drop_queue.rxq; 2051 2052 if (rxq->wq) 2053 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 2054 if (rxq->cq) 2055 claim_zero(mlx5_glue->destroy_cq(rxq->cq)); 2056 rte_free(rxq); 2057 priv->drop_queue.rxq = NULL; 2058 } 2059 2060 /** 2061 * Create a drop indirection table. 2062 * 2063 * @param dev 2064 * Pointer to Ethernet device. 2065 * 2066 * @return 2067 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2068 */ 2069 struct mlx5_ind_table_ibv * 2070 mlx5_ind_table_ibv_drop_new(struct rte_eth_dev *dev) 2071 { 2072 struct mlx5_priv *priv = dev->data->dev_private; 2073 struct mlx5_ind_table_ibv *ind_tbl; 2074 struct mlx5_rxq_ibv *rxq; 2075 struct mlx5_ind_table_ibv tmpl; 2076 2077 rxq = mlx5_rxq_ibv_drop_new(dev); 2078 if (!rxq) 2079 return NULL; 2080 tmpl.ind_table = mlx5_glue->create_rwq_ind_table 2081 (priv->sh->ctx, 2082 &(struct ibv_rwq_ind_table_init_attr){ 2083 .log_ind_tbl_size = 0, 2084 .ind_tbl = &rxq->wq, 2085 .comp_mask = 0, 2086 }); 2087 if (!tmpl.ind_table) { 2088 DEBUG("port %u cannot allocate indirection table for drop" 2089 " queue", 2090 dev->data->port_id); 2091 rte_errno = errno; 2092 goto error; 2093 } 2094 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl), 0); 2095 if (!ind_tbl) { 2096 rte_errno = ENOMEM; 2097 goto error; 2098 } 2099 ind_tbl->ind_table = tmpl.ind_table; 2100 return ind_tbl; 2101 error: 2102 mlx5_rxq_ibv_drop_release(dev); 2103 return NULL; 2104 } 2105 2106 /** 2107 * Release a drop indirection table. 2108 * 2109 * @param dev 2110 * Pointer to Ethernet device. 2111 */ 2112 void 2113 mlx5_ind_table_ibv_drop_release(struct rte_eth_dev *dev) 2114 { 2115 struct mlx5_priv *priv = dev->data->dev_private; 2116 struct mlx5_ind_table_ibv *ind_tbl = priv->drop_queue.hrxq->ind_table; 2117 2118 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 2119 mlx5_rxq_ibv_drop_release(dev); 2120 rte_free(ind_tbl); 2121 priv->drop_queue.hrxq->ind_table = NULL; 2122 } 2123 2124 /** 2125 * Create a drop Rx Hash queue. 2126 * 2127 * @param dev 2128 * Pointer to Ethernet device. 2129 * 2130 * @return 2131 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2132 */ 2133 struct mlx5_hrxq * 2134 mlx5_hrxq_drop_new(struct rte_eth_dev *dev) 2135 { 2136 struct mlx5_priv *priv = dev->data->dev_private; 2137 struct mlx5_ind_table_ibv *ind_tbl; 2138 struct ibv_qp *qp; 2139 struct mlx5_hrxq *hrxq; 2140 2141 if (priv->drop_queue.hrxq) { 2142 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt); 2143 return priv->drop_queue.hrxq; 2144 } 2145 ind_tbl = mlx5_ind_table_ibv_drop_new(dev); 2146 if (!ind_tbl) 2147 return NULL; 2148 qp = mlx5_glue->create_qp_ex(priv->sh->ctx, 2149 &(struct ibv_qp_init_attr_ex){ 2150 .qp_type = IBV_QPT_RAW_PACKET, 2151 .comp_mask = 2152 IBV_QP_INIT_ATTR_PD | 2153 IBV_QP_INIT_ATTR_IND_TABLE | 2154 IBV_QP_INIT_ATTR_RX_HASH, 2155 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2156 .rx_hash_function = 2157 IBV_RX_HASH_FUNC_TOEPLITZ, 2158 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 2159 .rx_hash_key = rss_hash_default_key, 2160 .rx_hash_fields_mask = 0, 2161 }, 2162 .rwq_ind_tbl = ind_tbl->ind_table, 2163 .pd = priv->sh->pd 2164 }); 2165 if (!qp) { 2166 DEBUG("port %u cannot allocate QP for drop queue", 2167 dev->data->port_id); 2168 rte_errno = errno; 2169 goto error; 2170 } 2171 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq), 0); 2172 if (!hrxq) { 2173 DRV_LOG(WARNING, 2174 "port %u cannot allocate memory for drop queue", 2175 dev->data->port_id); 2176 rte_errno = ENOMEM; 2177 goto error; 2178 } 2179 hrxq->ind_table = ind_tbl; 2180 hrxq->qp = qp; 2181 priv->drop_queue.hrxq = hrxq; 2182 rte_atomic32_set(&hrxq->refcnt, 1); 2183 return hrxq; 2184 error: 2185 if (ind_tbl) 2186 mlx5_ind_table_ibv_drop_release(dev); 2187 return NULL; 2188 } 2189 2190 /** 2191 * Release a drop hash Rx queue. 2192 * 2193 * @param dev 2194 * Pointer to Ethernet device. 2195 */ 2196 void 2197 mlx5_hrxq_drop_release(struct rte_eth_dev *dev) 2198 { 2199 struct mlx5_priv *priv = dev->data->dev_private; 2200 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 2201 2202 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 2203 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 2204 mlx5_ind_table_ibv_drop_release(dev); 2205 rte_free(hrxq); 2206 priv->drop_queue.hrxq = NULL; 2207 } 2208 } 2209