1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <stdint.h> 11 #include <fcntl.h> 12 #include <sys/queue.h> 13 14 /* Verbs header. */ 15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic ignored "-Wpedantic" 18 #endif 19 #include <infiniband/verbs.h> 20 #include <infiniband/mlx5dv.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 #include <rte_interrupts.h> 30 #include <rte_debug.h> 31 #include <rte_io.h> 32 33 #include "mlx5.h" 34 #include "mlx5_rxtx.h" 35 #include "mlx5_utils.h" 36 #include "mlx5_autoconf.h" 37 #include "mlx5_defs.h" 38 #include "mlx5_glue.h" 39 40 /* Default RSS hash key also used for ConnectX-3. */ 41 uint8_t rss_hash_default_key[] = { 42 0x2c, 0xc6, 0x81, 0xd1, 43 0x5b, 0xdb, 0xf4, 0xf7, 44 0xfc, 0xa2, 0x83, 0x19, 45 0xdb, 0x1a, 0x3e, 0x94, 46 0x6b, 0x9e, 0x38, 0xd9, 47 0x2c, 0x9c, 0x03, 0xd1, 48 0xad, 0x99, 0x44, 0xa7, 49 0xd9, 0x56, 0x3d, 0x59, 50 0x06, 0x3c, 0x25, 0xf3, 51 0xfc, 0x1f, 0xdc, 0x2a, 52 }; 53 54 /* Length of the default RSS hash key. */ 55 static_assert(MLX5_RSS_HASH_KEY_LEN == 56 (unsigned int)sizeof(rss_hash_default_key), 57 "wrong RSS default key size."); 58 59 /** 60 * Check whether Multi-Packet RQ can be enabled for the device. 61 * 62 * @param dev 63 * Pointer to Ethernet device. 64 * 65 * @return 66 * 1 if supported, negative errno value if not. 67 */ 68 inline int 69 mlx5_check_mprq_support(struct rte_eth_dev *dev) 70 { 71 struct priv *priv = dev->data->dev_private; 72 73 if (priv->config.mprq.enabled && 74 priv->rxqs_n >= priv->config.mprq.min_rxqs_num) 75 return 1; 76 return -ENOTSUP; 77 } 78 79 /** 80 * Check whether Multi-Packet RQ is enabled for the Rx queue. 81 * 82 * @param rxq 83 * Pointer to receive queue structure. 84 * 85 * @return 86 * 0 if disabled, otherwise enabled. 87 */ 88 inline int 89 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) 90 { 91 return rxq->strd_num_n > 0; 92 } 93 94 /** 95 * Check whether Multi-Packet RQ is enabled for the device. 96 * 97 * @param dev 98 * Pointer to Ethernet device. 99 * 100 * @return 101 * 0 if disabled, otherwise enabled. 102 */ 103 inline int 104 mlx5_mprq_enabled(struct rte_eth_dev *dev) 105 { 106 struct priv *priv = dev->data->dev_private; 107 uint16_t i; 108 uint16_t n = 0; 109 110 if (mlx5_check_mprq_support(dev) < 0) 111 return 0; 112 /* All the configured queues should be enabled. */ 113 for (i = 0; i < priv->rxqs_n; ++i) { 114 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 115 116 if (!rxq) 117 continue; 118 if (mlx5_rxq_mprq_enabled(rxq)) 119 ++n; 120 } 121 /* Multi-Packet RQ can't be partially configured. */ 122 assert(n == 0 || n == priv->rxqs_n); 123 return n == priv->rxqs_n; 124 } 125 126 /** 127 * Allocate RX queue elements for Multi-Packet RQ. 128 * 129 * @param rxq_ctrl 130 * Pointer to RX queue structure. 131 * 132 * @return 133 * 0 on success, a negative errno value otherwise and rte_errno is set. 134 */ 135 static int 136 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 137 { 138 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 139 unsigned int wqe_n = 1 << rxq->elts_n; 140 unsigned int i; 141 int err; 142 143 /* Iterate on segments. */ 144 for (i = 0; i <= wqe_n; ++i) { 145 struct mlx5_mprq_buf *buf; 146 147 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 148 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 149 rte_errno = ENOMEM; 150 goto error; 151 } 152 if (i < wqe_n) 153 (*rxq->mprq_bufs)[i] = buf; 154 else 155 rxq->mprq_repl = buf; 156 } 157 DRV_LOG(DEBUG, 158 "port %u Rx queue %u allocated and configured %u segments", 159 rxq->port_id, rxq_ctrl->idx, wqe_n); 160 return 0; 161 error: 162 err = rte_errno; /* Save rte_errno before cleanup. */ 163 wqe_n = i; 164 for (i = 0; (i != wqe_n); ++i) { 165 if ((*rxq->mprq_bufs)[i] != NULL) 166 rte_mempool_put(rxq->mprq_mp, 167 (*rxq->mprq_bufs)[i]); 168 (*rxq->mprq_bufs)[i] = NULL; 169 } 170 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 171 rxq->port_id, rxq_ctrl->idx); 172 rte_errno = err; /* Restore rte_errno. */ 173 return -rte_errno; 174 } 175 176 /** 177 * Allocate RX queue elements for Single-Packet RQ. 178 * 179 * @param rxq_ctrl 180 * Pointer to RX queue structure. 181 * 182 * @return 183 * 0 on success, errno value on failure. 184 */ 185 static int 186 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 187 { 188 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 189 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 190 unsigned int i; 191 int err; 192 193 /* Iterate on segments. */ 194 for (i = 0; (i != elts_n); ++i) { 195 struct rte_mbuf *buf; 196 197 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 198 if (buf == NULL) { 199 DRV_LOG(ERR, "port %u empty mbuf pool", 200 PORT_ID(rxq_ctrl->priv)); 201 rte_errno = ENOMEM; 202 goto error; 203 } 204 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 205 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 206 /* Buffer is supposed to be empty. */ 207 assert(rte_pktmbuf_data_len(buf) == 0); 208 assert(rte_pktmbuf_pkt_len(buf) == 0); 209 assert(!buf->next); 210 /* Only the first segment keeps headroom. */ 211 if (i % sges_n) 212 SET_DATA_OFF(buf, 0); 213 PORT(buf) = rxq_ctrl->rxq.port_id; 214 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 215 PKT_LEN(buf) = DATA_LEN(buf); 216 NB_SEGS(buf) = 1; 217 (*rxq_ctrl->rxq.elts)[i] = buf; 218 } 219 /* If Rx vector is activated. */ 220 if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { 221 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 222 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 223 int j; 224 225 /* Initialize default rearm_data for vPMD. */ 226 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 227 rte_mbuf_refcnt_set(mbuf_init, 1); 228 mbuf_init->nb_segs = 1; 229 mbuf_init->port = rxq->port_id; 230 /* 231 * prevent compiler reordering: 232 * rearm_data covers previous fields. 233 */ 234 rte_compiler_barrier(); 235 rxq->mbuf_initializer = 236 *(uint64_t *)&mbuf_init->rearm_data; 237 /* Padding with a fake mbuf for vectorized Rx. */ 238 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 239 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 240 } 241 DRV_LOG(DEBUG, 242 "port %u Rx queue %u allocated and configured %u segments" 243 " (max %u packets)", 244 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n, 245 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 246 return 0; 247 error: 248 err = rte_errno; /* Save rte_errno before cleanup. */ 249 elts_n = i; 250 for (i = 0; (i != elts_n); ++i) { 251 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 252 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 253 (*rxq_ctrl->rxq.elts)[i] = NULL; 254 } 255 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 256 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); 257 rte_errno = err; /* Restore rte_errno. */ 258 return -rte_errno; 259 } 260 261 /** 262 * Allocate RX queue elements. 263 * 264 * @param rxq_ctrl 265 * Pointer to RX queue structure. 266 * 267 * @return 268 * 0 on success, errno value on failure. 269 */ 270 int 271 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 272 { 273 return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 274 rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl); 275 } 276 277 /** 278 * Free RX queue elements for Multi-Packet RQ. 279 * 280 * @param rxq_ctrl 281 * Pointer to RX queue structure. 282 */ 283 static void 284 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 285 { 286 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 287 uint16_t i; 288 289 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs", 290 rxq->port_id, rxq_ctrl->idx); 291 if (rxq->mprq_bufs == NULL) 292 return; 293 assert(mlx5_rxq_check_vec_support(rxq) < 0); 294 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 295 if ((*rxq->mprq_bufs)[i] != NULL) 296 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 297 (*rxq->mprq_bufs)[i] = NULL; 298 } 299 if (rxq->mprq_repl != NULL) { 300 mlx5_mprq_buf_free(rxq->mprq_repl); 301 rxq->mprq_repl = NULL; 302 } 303 } 304 305 /** 306 * Free RX queue elements for Single-Packet RQ. 307 * 308 * @param rxq_ctrl 309 * Pointer to RX queue structure. 310 */ 311 static void 312 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 313 { 314 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 315 const uint16_t q_n = (1 << rxq->elts_n); 316 const uint16_t q_mask = q_n - 1; 317 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); 318 uint16_t i; 319 320 DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs", 321 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); 322 if (rxq->elts == NULL) 323 return; 324 /** 325 * Some mbuf in the Ring belongs to the application. They cannot be 326 * freed. 327 */ 328 if (mlx5_rxq_check_vec_support(rxq) > 0) { 329 for (i = 0; i < used; ++i) 330 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; 331 rxq->rq_pi = rxq->rq_ci; 332 } 333 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 334 if ((*rxq->elts)[i] != NULL) 335 rte_pktmbuf_free_seg((*rxq->elts)[i]); 336 (*rxq->elts)[i] = NULL; 337 } 338 } 339 340 /** 341 * Free RX queue elements. 342 * 343 * @param rxq_ctrl 344 * Pointer to RX queue structure. 345 */ 346 static void 347 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 348 { 349 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 350 rxq_free_elts_mprq(rxq_ctrl); 351 else 352 rxq_free_elts_sprq(rxq_ctrl); 353 } 354 355 /** 356 * Clean up a RX queue. 357 * 358 * Destroy objects, free allocated memory and reset the structure for reuse. 359 * 360 * @param rxq_ctrl 361 * Pointer to RX queue structure. 362 */ 363 void 364 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) 365 { 366 DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u", 367 PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); 368 if (rxq_ctrl->ibv) 369 mlx5_rxq_ibv_release(rxq_ctrl->ibv); 370 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); 371 } 372 373 /** 374 * Returns the per-queue supported offloads. 375 * 376 * @param dev 377 * Pointer to Ethernet device. 378 * 379 * @return 380 * Supported Rx offloads. 381 */ 382 uint64_t 383 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 384 { 385 struct priv *priv = dev->data->dev_private; 386 struct mlx5_dev_config *config = &priv->config; 387 uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | 388 DEV_RX_OFFLOAD_TIMESTAMP | 389 DEV_RX_OFFLOAD_JUMBO_FRAME); 390 391 offloads |= DEV_RX_OFFLOAD_CRC_STRIP; 392 if (config->hw_fcs_strip) 393 offloads |= DEV_RX_OFFLOAD_KEEP_CRC; 394 395 if (config->hw_csum) 396 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | 397 DEV_RX_OFFLOAD_UDP_CKSUM | 398 DEV_RX_OFFLOAD_TCP_CKSUM); 399 if (config->hw_vlan_strip) 400 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 401 return offloads; 402 } 403 404 405 /** 406 * Returns the per-port supported offloads. 407 * 408 * @return 409 * Supported Rx offloads. 410 */ 411 uint64_t 412 mlx5_get_rx_port_offloads(void) 413 { 414 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 415 416 return offloads; 417 } 418 419 /** 420 * 421 * @param dev 422 * Pointer to Ethernet device structure. 423 * @param idx 424 * RX queue index. 425 * @param desc 426 * Number of descriptors to configure in queue. 427 * @param socket 428 * NUMA socket on which memory must be allocated. 429 * @param[in] conf 430 * Thresholds parameters. 431 * @param mp 432 * Memory pool for buffer allocations. 433 * 434 * @return 435 * 0 on success, a negative errno value otherwise and rte_errno is set. 436 */ 437 int 438 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 439 unsigned int socket, const struct rte_eth_rxconf *conf, 440 struct rte_mempool *mp) 441 { 442 struct priv *priv = dev->data->dev_private; 443 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 444 struct mlx5_rxq_ctrl *rxq_ctrl = 445 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 446 447 if (!rte_is_power_of_2(desc)) { 448 desc = 1 << log2above(desc); 449 DRV_LOG(WARNING, 450 "port %u increased number of descriptors in Rx queue %u" 451 " to the next power of two (%d)", 452 dev->data->port_id, idx, desc); 453 } 454 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 455 dev->data->port_id, idx, desc); 456 if (idx >= priv->rxqs_n) { 457 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 458 dev->data->port_id, idx, priv->rxqs_n); 459 rte_errno = EOVERFLOW; 460 return -rte_errno; 461 } 462 if (!mlx5_rxq_releasable(dev, idx)) { 463 DRV_LOG(ERR, "port %u unable to release queue index %u", 464 dev->data->port_id, idx); 465 rte_errno = EBUSY; 466 return -rte_errno; 467 } 468 mlx5_rxq_release(dev, idx); 469 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp); 470 if (!rxq_ctrl) { 471 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 472 dev->data->port_id, idx); 473 rte_errno = ENOMEM; 474 return -rte_errno; 475 } 476 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 477 dev->data->port_id, idx); 478 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 479 return 0; 480 } 481 482 /** 483 * DPDK callback to release a RX queue. 484 * 485 * @param dpdk_rxq 486 * Generic RX queue pointer. 487 */ 488 void 489 mlx5_rx_queue_release(void *dpdk_rxq) 490 { 491 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq; 492 struct mlx5_rxq_ctrl *rxq_ctrl; 493 struct priv *priv; 494 495 if (rxq == NULL) 496 return; 497 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); 498 priv = rxq_ctrl->priv; 499 if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx)) 500 rte_panic("port %u Rx queue %u is still used by a flow and" 501 " cannot be removed\n", 502 PORT_ID(priv), rxq_ctrl->idx); 503 mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx); 504 } 505 506 /** 507 * Allocate queue vector and fill epoll fd list for Rx interrupts. 508 * 509 * @param dev 510 * Pointer to Ethernet device. 511 * 512 * @return 513 * 0 on success, a negative errno value otherwise and rte_errno is set. 514 */ 515 int 516 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 517 { 518 struct priv *priv = dev->data->dev_private; 519 unsigned int i; 520 unsigned int rxqs_n = priv->rxqs_n; 521 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 522 unsigned int count = 0; 523 struct rte_intr_handle *intr_handle = dev->intr_handle; 524 525 if (!dev->data->dev_conf.intr_conf.rxq) 526 return 0; 527 mlx5_rx_intr_vec_disable(dev); 528 intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); 529 if (intr_handle->intr_vec == NULL) { 530 DRV_LOG(ERR, 531 "port %u failed to allocate memory for interrupt" 532 " vector, Rx interrupts will not be supported", 533 dev->data->port_id); 534 rte_errno = ENOMEM; 535 return -rte_errno; 536 } 537 intr_handle->type = RTE_INTR_HANDLE_EXT; 538 for (i = 0; i != n; ++i) { 539 /* This rxq ibv must not be released in this function. */ 540 struct mlx5_rxq_ibv *rxq_ibv = mlx5_rxq_ibv_get(dev, i); 541 int fd; 542 int flags; 543 int rc; 544 545 /* Skip queues that cannot request interrupts. */ 546 if (!rxq_ibv || !rxq_ibv->channel) { 547 /* Use invalid intr_vec[] index to disable entry. */ 548 intr_handle->intr_vec[i] = 549 RTE_INTR_VEC_RXTX_OFFSET + 550 RTE_MAX_RXTX_INTR_VEC_ID; 551 continue; 552 } 553 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 554 DRV_LOG(ERR, 555 "port %u too many Rx queues for interrupt" 556 " vector size (%d), Rx interrupts cannot be" 557 " enabled", 558 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 559 mlx5_rx_intr_vec_disable(dev); 560 rte_errno = ENOMEM; 561 return -rte_errno; 562 } 563 fd = rxq_ibv->channel->fd; 564 flags = fcntl(fd, F_GETFL); 565 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 566 if (rc < 0) { 567 rte_errno = errno; 568 DRV_LOG(ERR, 569 "port %u failed to make Rx interrupt file" 570 " descriptor %d non-blocking for queue index" 571 " %d", 572 dev->data->port_id, fd, i); 573 mlx5_rx_intr_vec_disable(dev); 574 return -rte_errno; 575 } 576 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; 577 intr_handle->efds[count] = fd; 578 count++; 579 } 580 if (!count) 581 mlx5_rx_intr_vec_disable(dev); 582 else 583 intr_handle->nb_efd = count; 584 return 0; 585 } 586 587 /** 588 * Clean up Rx interrupts handler. 589 * 590 * @param dev 591 * Pointer to Ethernet device. 592 */ 593 void 594 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 595 { 596 struct priv *priv = dev->data->dev_private; 597 struct rte_intr_handle *intr_handle = dev->intr_handle; 598 unsigned int i; 599 unsigned int rxqs_n = priv->rxqs_n; 600 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 601 602 if (!dev->data->dev_conf.intr_conf.rxq) 603 return; 604 if (!intr_handle->intr_vec) 605 goto free; 606 for (i = 0; i != n; ++i) { 607 struct mlx5_rxq_ctrl *rxq_ctrl; 608 struct mlx5_rxq_data *rxq_data; 609 610 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + 611 RTE_MAX_RXTX_INTR_VEC_ID) 612 continue; 613 /** 614 * Need to access directly the queue to release the reference 615 * kept in priv_rx_intr_vec_enable(). 616 */ 617 rxq_data = (*priv->rxqs)[i]; 618 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 619 mlx5_rxq_ibv_release(rxq_ctrl->ibv); 620 } 621 free: 622 rte_intr_free_epoll_fd(intr_handle); 623 if (intr_handle->intr_vec) 624 free(intr_handle->intr_vec); 625 intr_handle->nb_efd = 0; 626 intr_handle->intr_vec = NULL; 627 } 628 629 /** 630 * MLX5 CQ notification . 631 * 632 * @param rxq 633 * Pointer to receive queue structure. 634 * @param sq_n_rxq 635 * Sequence number per receive queue . 636 */ 637 static inline void 638 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 639 { 640 int sq_n = 0; 641 uint32_t doorbell_hi; 642 uint64_t doorbell; 643 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL; 644 645 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 646 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 647 doorbell = (uint64_t)doorbell_hi << 32; 648 doorbell |= rxq->cqn; 649 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); 650 mlx5_uar_write64(rte_cpu_to_be_64(doorbell), 651 cq_db_reg, rxq->uar_lock_cq); 652 } 653 654 /** 655 * DPDK callback for Rx queue interrupt enable. 656 * 657 * @param dev 658 * Pointer to Ethernet device structure. 659 * @param rx_queue_id 660 * Rx queue number. 661 * 662 * @return 663 * 0 on success, a negative errno value otherwise and rte_errno is set. 664 */ 665 int 666 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 667 { 668 struct priv *priv = dev->data->dev_private; 669 struct mlx5_rxq_data *rxq_data; 670 struct mlx5_rxq_ctrl *rxq_ctrl; 671 672 rxq_data = (*priv->rxqs)[rx_queue_id]; 673 if (!rxq_data) { 674 rte_errno = EINVAL; 675 return -rte_errno; 676 } 677 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 678 if (rxq_ctrl->irq) { 679 struct mlx5_rxq_ibv *rxq_ibv; 680 681 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 682 if (!rxq_ibv) { 683 rte_errno = EINVAL; 684 return -rte_errno; 685 } 686 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); 687 mlx5_rxq_ibv_release(rxq_ibv); 688 } 689 return 0; 690 } 691 692 /** 693 * DPDK callback for Rx queue interrupt disable. 694 * 695 * @param dev 696 * Pointer to Ethernet device structure. 697 * @param rx_queue_id 698 * Rx queue number. 699 * 700 * @return 701 * 0 on success, a negative errno value otherwise and rte_errno is set. 702 */ 703 int 704 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 705 { 706 struct priv *priv = dev->data->dev_private; 707 struct mlx5_rxq_data *rxq_data; 708 struct mlx5_rxq_ctrl *rxq_ctrl; 709 struct mlx5_rxq_ibv *rxq_ibv = NULL; 710 struct ibv_cq *ev_cq; 711 void *ev_ctx; 712 int ret; 713 714 rxq_data = (*priv->rxqs)[rx_queue_id]; 715 if (!rxq_data) { 716 rte_errno = EINVAL; 717 return -rte_errno; 718 } 719 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 720 if (!rxq_ctrl->irq) 721 return 0; 722 rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); 723 if (!rxq_ibv) { 724 rte_errno = EINVAL; 725 return -rte_errno; 726 } 727 ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); 728 if (ret || ev_cq != rxq_ibv->cq) { 729 rte_errno = EINVAL; 730 goto exit; 731 } 732 rxq_data->cq_arm_sn++; 733 mlx5_glue->ack_cq_events(rxq_ibv->cq, 1); 734 return 0; 735 exit: 736 ret = rte_errno; /* Save rte_errno before cleanup. */ 737 if (rxq_ibv) 738 mlx5_rxq_ibv_release(rxq_ibv); 739 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 740 dev->data->port_id, rx_queue_id); 741 rte_errno = ret; /* Restore rte_errno. */ 742 return -rte_errno; 743 } 744 745 /** 746 * Create the Rx queue Verbs object. 747 * 748 * @param dev 749 * Pointer to Ethernet device. 750 * @param idx 751 * Queue index in DPDK Rx queue array 752 * 753 * @return 754 * The Verbs object initialised, NULL otherwise and rte_errno is set. 755 */ 756 struct mlx5_rxq_ibv * 757 mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) 758 { 759 struct priv *priv = dev->data->dev_private; 760 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 761 struct mlx5_rxq_ctrl *rxq_ctrl = 762 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 763 struct ibv_wq_attr mod; 764 union { 765 struct { 766 struct ibv_cq_init_attr_ex ibv; 767 struct mlx5dv_cq_init_attr mlx5; 768 } cq; 769 struct { 770 struct ibv_wq_init_attr ibv; 771 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 772 struct mlx5dv_wq_init_attr mlx5; 773 #endif 774 } wq; 775 struct ibv_cq_ex cq_attr; 776 } attr; 777 unsigned int cqe_n; 778 unsigned int wqe_n = 1 << rxq_data->elts_n; 779 struct mlx5_rxq_ibv *tmpl; 780 struct mlx5dv_cq cq_info; 781 struct mlx5dv_rwq rwq; 782 unsigned int i; 783 int ret = 0; 784 struct mlx5dv_obj obj; 785 struct mlx5_dev_config *config = &priv->config; 786 const int mprq_en = mlx5_rxq_mprq_enabled(rxq_data); 787 788 assert(rxq_data); 789 assert(!rxq_ctrl->ibv); 790 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 791 priv->verbs_alloc_ctx.obj = rxq_ctrl; 792 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 793 rxq_ctrl->socket); 794 if (!tmpl) { 795 DRV_LOG(ERR, 796 "port %u Rx queue %u cannot allocate verbs resources", 797 dev->data->port_id, rxq_ctrl->idx); 798 rte_errno = ENOMEM; 799 goto error; 800 } 801 tmpl->rxq_ctrl = rxq_ctrl; 802 if (rxq_ctrl->irq) { 803 tmpl->channel = mlx5_glue->create_comp_channel(priv->ctx); 804 if (!tmpl->channel) { 805 DRV_LOG(ERR, "port %u: comp channel creation failure", 806 dev->data->port_id); 807 rte_errno = ENOMEM; 808 goto error; 809 } 810 } 811 if (mprq_en) 812 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1; 813 else 814 cqe_n = wqe_n - 1; 815 attr.cq.ibv = (struct ibv_cq_init_attr_ex){ 816 .cqe = cqe_n, 817 .channel = tmpl->channel, 818 .comp_mask = 0, 819 }; 820 attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){ 821 .comp_mask = 0, 822 }; 823 if (config->cqe_comp && !rxq_data->hw_timestamp) { 824 attr.cq.mlx5.comp_mask |= 825 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 826 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 827 attr.cq.mlx5.cqe_comp_res_format = 828 mprq_en ? MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 829 MLX5DV_CQE_RES_FORMAT_HASH; 830 #else 831 attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 832 #endif 833 /* 834 * For vectorized Rx, it must not be doubled in order to 835 * make cq_ci and rq_ci aligned. 836 */ 837 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 838 attr.cq.ibv.cqe *= 2; 839 } else if (config->cqe_comp && rxq_data->hw_timestamp) { 840 DRV_LOG(DEBUG, 841 "port %u Rx CQE compression is disabled for HW" 842 " timestamp", 843 dev->data->port_id); 844 } 845 tmpl->cq = mlx5_glue->cq_ex_to_cq 846 (mlx5_glue->dv_create_cq(priv->ctx, &attr.cq.ibv, 847 &attr.cq.mlx5)); 848 if (tmpl->cq == NULL) { 849 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure", 850 dev->data->port_id, idx); 851 rte_errno = ENOMEM; 852 goto error; 853 } 854 DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d", 855 dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr); 856 DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d", 857 dev->data->port_id, priv->device_attr.orig_attr.max_sge); 858 attr.wq.ibv = (struct ibv_wq_init_attr){ 859 .wq_context = NULL, /* Could be useful in the future. */ 860 .wq_type = IBV_WQT_RQ, 861 /* Max number of outstanding WRs. */ 862 .max_wr = wqe_n >> rxq_data->sges_n, 863 /* Max number of scatter/gather elements in a WR. */ 864 .max_sge = 1 << rxq_data->sges_n, 865 .pd = priv->pd, 866 .cq = tmpl->cq, 867 .comp_mask = 868 IBV_WQ_FLAGS_CVLAN_STRIPPING | 869 0, 870 .create_flags = (rxq_data->vlan_strip ? 871 IBV_WQ_FLAGS_CVLAN_STRIPPING : 872 0), 873 }; 874 /* By default, FCS (CRC) is stripped by hardware. */ 875 if (rxq_data->crc_present) { 876 attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 877 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 878 } 879 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING 880 if (config->hw_padding) { 881 attr.wq.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 882 attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 883 } 884 #endif 885 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 886 attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){ 887 .comp_mask = 0, 888 }; 889 if (mprq_en) { 890 struct mlx5dv_striding_rq_init_attr *mprq_attr = 891 &attr.wq.mlx5.striding_rq_attrs; 892 893 attr.wq.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 894 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 895 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 896 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 897 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 898 }; 899 } 900 tmpl->wq = mlx5_glue->dv_create_wq(priv->ctx, &attr.wq.ibv, 901 &attr.wq.mlx5); 902 #else 903 tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq.ibv); 904 #endif 905 if (tmpl->wq == NULL) { 906 DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure", 907 dev->data->port_id, idx); 908 rte_errno = ENOMEM; 909 goto error; 910 } 911 /* 912 * Make sure number of WRs*SGEs match expectations since a queue 913 * cannot allocate more than "desc" buffers. 914 */ 915 if (attr.wq.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 916 attr.wq.ibv.max_sge != (1u << rxq_data->sges_n)) { 917 DRV_LOG(ERR, 918 "port %u Rx queue %u requested %u*%u but got %u*%u" 919 " WRs*SGEs", 920 dev->data->port_id, idx, 921 wqe_n >> rxq_data->sges_n, (1 << rxq_data->sges_n), 922 attr.wq.ibv.max_wr, attr.wq.ibv.max_sge); 923 rte_errno = EINVAL; 924 goto error; 925 } 926 /* Change queue state to ready. */ 927 mod = (struct ibv_wq_attr){ 928 .attr_mask = IBV_WQ_ATTR_STATE, 929 .wq_state = IBV_WQS_RDY, 930 }; 931 ret = mlx5_glue->modify_wq(tmpl->wq, &mod); 932 if (ret) { 933 DRV_LOG(ERR, 934 "port %u Rx queue %u WQ state to IBV_WQS_RDY failed", 935 dev->data->port_id, idx); 936 rte_errno = ret; 937 goto error; 938 } 939 obj.cq.in = tmpl->cq; 940 obj.cq.out = &cq_info; 941 obj.rwq.in = tmpl->wq; 942 obj.rwq.out = &rwq; 943 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); 944 if (ret) { 945 rte_errno = ret; 946 goto error; 947 } 948 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 949 DRV_LOG(ERR, 950 "port %u wrong MLX5_CQE_SIZE environment variable" 951 " value: it should be set to %u", 952 dev->data->port_id, RTE_CACHE_LINE_SIZE); 953 rte_errno = EINVAL; 954 goto error; 955 } 956 /* Fill the rings. */ 957 rxq_data->wqes = rwq.buf; 958 for (i = 0; (i != wqe_n); ++i) { 959 volatile struct mlx5_wqe_data_seg *scat; 960 uintptr_t addr; 961 uint32_t byte_count; 962 963 if (mprq_en) { 964 struct mlx5_mprq_buf *buf = (*rxq_data->mprq_bufs)[i]; 965 966 scat = &((volatile struct mlx5_wqe_mprq *) 967 rxq_data->wqes)[i].dseg; 968 addr = (uintptr_t)mlx5_mprq_buf_addr(buf); 969 byte_count = (1 << rxq_data->strd_sz_n) * 970 (1 << rxq_data->strd_num_n); 971 } else { 972 struct rte_mbuf *buf = (*rxq_data->elts)[i]; 973 974 scat = &((volatile struct mlx5_wqe_data_seg *) 975 rxq_data->wqes)[i]; 976 addr = rte_pktmbuf_mtod(buf, uintptr_t); 977 byte_count = DATA_LEN(buf); 978 } 979 /* scat->addr must be able to store a pointer. */ 980 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 981 *scat = (struct mlx5_wqe_data_seg){ 982 .addr = rte_cpu_to_be_64(addr), 983 .byte_count = rte_cpu_to_be_32(byte_count), 984 .lkey = mlx5_rx_addr2mr(rxq_data, addr), 985 }; 986 } 987 rxq_data->rq_db = rwq.dbrec; 988 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 989 rxq_data->cq_ci = 0; 990 rxq_data->consumed_strd = 0; 991 rxq_data->rq_pi = 0; 992 rxq_data->zip = (struct rxq_zip){ 993 .ai = 0, 994 }; 995 rxq_data->cq_db = cq_info.dbrec; 996 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 997 rxq_data->cq_uar = cq_info.cq_uar; 998 rxq_data->cqn = cq_info.cqn; 999 rxq_data->cq_arm_sn = 0; 1000 /* Update doorbell counter. */ 1001 rxq_data->rq_ci = wqe_n >> rxq_data->sges_n; 1002 rte_wmb(); 1003 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci); 1004 DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, 1005 idx, (void *)&tmpl); 1006 rte_atomic32_inc(&tmpl->refcnt); 1007 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); 1008 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1009 return tmpl; 1010 error: 1011 ret = rte_errno; /* Save rte_errno before cleanup. */ 1012 if (tmpl->wq) 1013 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 1014 if (tmpl->cq) 1015 claim_zero(mlx5_glue->destroy_cq(tmpl->cq)); 1016 if (tmpl->channel) 1017 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel)); 1018 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1019 rte_errno = ret; /* Restore rte_errno. */ 1020 return NULL; 1021 } 1022 1023 /** 1024 * Get an Rx queue Verbs object. 1025 * 1026 * @param dev 1027 * Pointer to Ethernet device. 1028 * @param idx 1029 * Queue index in DPDK Rx queue array 1030 * 1031 * @return 1032 * The Verbs object if it exists. 1033 */ 1034 struct mlx5_rxq_ibv * 1035 mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) 1036 { 1037 struct priv *priv = dev->data->dev_private; 1038 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1039 struct mlx5_rxq_ctrl *rxq_ctrl; 1040 1041 if (idx >= priv->rxqs_n) 1042 return NULL; 1043 if (!rxq_data) 1044 return NULL; 1045 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1046 if (rxq_ctrl->ibv) { 1047 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); 1048 } 1049 return rxq_ctrl->ibv; 1050 } 1051 1052 /** 1053 * Release an Rx verbs queue object. 1054 * 1055 * @param rxq_ibv 1056 * Verbs Rx queue object. 1057 * 1058 * @return 1059 * 1 while a reference on it exists, 0 when freed. 1060 */ 1061 int 1062 mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv) 1063 { 1064 assert(rxq_ibv); 1065 assert(rxq_ibv->wq); 1066 assert(rxq_ibv->cq); 1067 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { 1068 rxq_free_elts(rxq_ibv->rxq_ctrl); 1069 claim_zero(mlx5_glue->destroy_wq(rxq_ibv->wq)); 1070 claim_zero(mlx5_glue->destroy_cq(rxq_ibv->cq)); 1071 if (rxq_ibv->channel) 1072 claim_zero(mlx5_glue->destroy_comp_channel 1073 (rxq_ibv->channel)); 1074 LIST_REMOVE(rxq_ibv, next); 1075 rte_free(rxq_ibv); 1076 return 0; 1077 } 1078 return 1; 1079 } 1080 1081 /** 1082 * Verify the Verbs Rx queue list is empty 1083 * 1084 * @param dev 1085 * Pointer to Ethernet device. 1086 * 1087 * @return 1088 * The number of object not released. 1089 */ 1090 int 1091 mlx5_rxq_ibv_verify(struct rte_eth_dev *dev) 1092 { 1093 struct priv *priv = dev->data->dev_private; 1094 int ret = 0; 1095 struct mlx5_rxq_ibv *rxq_ibv; 1096 1097 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) { 1098 DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced", 1099 dev->data->port_id, rxq_ibv->rxq_ctrl->idx); 1100 ++ret; 1101 } 1102 return ret; 1103 } 1104 1105 /** 1106 * Return true if a single reference exists on the object. 1107 * 1108 * @param rxq_ibv 1109 * Verbs Rx queue object. 1110 */ 1111 int 1112 mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv) 1113 { 1114 assert(rxq_ibv); 1115 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1); 1116 } 1117 1118 /** 1119 * Callback function to initialize mbufs for Multi-Packet RQ. 1120 */ 1121 static inline void 1122 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg __rte_unused, 1123 void *_m, unsigned int i __rte_unused) 1124 { 1125 struct mlx5_mprq_buf *buf = _m; 1126 1127 memset(_m, 0, sizeof(*buf)); 1128 buf->mp = mp; 1129 rte_atomic16_set(&buf->refcnt, 1); 1130 } 1131 1132 /** 1133 * Free mempool of Multi-Packet RQ. 1134 * 1135 * @param dev 1136 * Pointer to Ethernet device. 1137 * 1138 * @return 1139 * 0 on success, negative errno value on failure. 1140 */ 1141 int 1142 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1143 { 1144 struct priv *priv = dev->data->dev_private; 1145 struct rte_mempool *mp = priv->mprq_mp; 1146 unsigned int i; 1147 1148 if (mp == NULL) 1149 return 0; 1150 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1151 dev->data->port_id, mp->name); 1152 /* 1153 * If a buffer in the pool has been externally attached to a mbuf and it 1154 * is still in use by application, destroying the Rx qeueue can spoil 1155 * the packet. It is unlikely to happen but if application dynamically 1156 * creates and destroys with holding Rx packets, this can happen. 1157 * 1158 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1159 * RQ isn't provided by application but managed by PMD. 1160 */ 1161 if (!rte_mempool_full(mp)) { 1162 DRV_LOG(ERR, 1163 "port %u mempool for Multi-Packet RQ is still in use", 1164 dev->data->port_id); 1165 rte_errno = EBUSY; 1166 return -rte_errno; 1167 } 1168 rte_mempool_free(mp); 1169 /* Unset mempool for each Rx queue. */ 1170 for (i = 0; i != priv->rxqs_n; ++i) { 1171 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1172 1173 if (rxq == NULL) 1174 continue; 1175 rxq->mprq_mp = NULL; 1176 } 1177 return 0; 1178 } 1179 1180 /** 1181 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1182 * mempool. If already allocated, reuse it if there're enough elements. 1183 * Otherwise, resize it. 1184 * 1185 * @param dev 1186 * Pointer to Ethernet device. 1187 * 1188 * @return 1189 * 0 on success, negative errno value on failure. 1190 */ 1191 int 1192 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1193 { 1194 struct priv *priv = dev->data->dev_private; 1195 struct rte_mempool *mp = priv->mprq_mp; 1196 char name[RTE_MEMPOOL_NAMESIZE]; 1197 unsigned int desc = 0; 1198 unsigned int buf_len; 1199 unsigned int obj_num; 1200 unsigned int obj_size; 1201 unsigned int strd_num_n = 0; 1202 unsigned int strd_sz_n = 0; 1203 unsigned int i; 1204 1205 if (!mlx5_mprq_enabled(dev)) 1206 return 0; 1207 /* Count the total number of descriptors configured. */ 1208 for (i = 0; i != priv->rxqs_n; ++i) { 1209 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1210 1211 if (rxq == NULL) 1212 continue; 1213 desc += 1 << rxq->elts_n; 1214 /* Get the max number of strides. */ 1215 if (strd_num_n < rxq->strd_num_n) 1216 strd_num_n = rxq->strd_num_n; 1217 /* Get the max size of a stride. */ 1218 if (strd_sz_n < rxq->strd_sz_n) 1219 strd_sz_n = rxq->strd_sz_n; 1220 } 1221 assert(strd_num_n && strd_sz_n); 1222 buf_len = (1 << strd_num_n) * (1 << strd_sz_n); 1223 obj_size = buf_len + sizeof(struct mlx5_mprq_buf); 1224 /* 1225 * Received packets can be either memcpy'd or externally referenced. In 1226 * case that the packet is attached to an mbuf as an external buffer, as 1227 * it isn't possible to predict how the buffers will be queued by 1228 * application, there's no option to exactly pre-allocate needed buffers 1229 * in advance but to speculatively prepares enough buffers. 1230 * 1231 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1232 * received packets to buffers provided by application (rxq->mp) until 1233 * this Mempool gets available again. 1234 */ 1235 desc *= 4; 1236 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * priv->rxqs_n; 1237 /* 1238 * rte_mempool_create_empty() has sanity check to refuse large cache 1239 * size compared to the number of elements. 1240 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a 1241 * constant number 2 instead. 1242 */ 1243 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1244 /* Check a mempool is already allocated and if it can be resued. */ 1245 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1246 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1247 dev->data->port_id, mp->name); 1248 /* Reuse. */ 1249 goto exit; 1250 } else if (mp != NULL) { 1251 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1252 dev->data->port_id, mp->name); 1253 /* 1254 * If failed to free, which means it may be still in use, no way 1255 * but to keep using the existing one. On buffer underrun, 1256 * packets will be memcpy'd instead of external buffer 1257 * attachment. 1258 */ 1259 if (mlx5_mprq_free_mp(dev)) { 1260 if (mp->elt_size >= obj_size) 1261 goto exit; 1262 else 1263 return -rte_errno; 1264 } 1265 } 1266 snprintf(name, sizeof(name), "%s-mprq", dev->device->name); 1267 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1268 0, NULL, NULL, mlx5_mprq_buf_init, NULL, 1269 dev->device->numa_node, 0); 1270 if (mp == NULL) { 1271 DRV_LOG(ERR, 1272 "port %u failed to allocate a mempool for" 1273 " Multi-Packet RQ, count=%u, size=%u", 1274 dev->data->port_id, obj_num, obj_size); 1275 rte_errno = ENOMEM; 1276 return -rte_errno; 1277 } 1278 priv->mprq_mp = mp; 1279 exit: 1280 /* Set mempool for each Rx queue. */ 1281 for (i = 0; i != priv->rxqs_n; ++i) { 1282 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1283 1284 if (rxq == NULL) 1285 continue; 1286 rxq->mprq_mp = mp; 1287 } 1288 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1289 dev->data->port_id); 1290 return 0; 1291 } 1292 1293 /** 1294 * Create a DPDK Rx queue. 1295 * 1296 * @param dev 1297 * Pointer to Ethernet device. 1298 * @param idx 1299 * RX queue index. 1300 * @param desc 1301 * Number of descriptors to configure in queue. 1302 * @param socket 1303 * NUMA socket on which memory must be allocated. 1304 * 1305 * @return 1306 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1307 */ 1308 struct mlx5_rxq_ctrl * 1309 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1310 unsigned int socket, const struct rte_eth_rxconf *conf, 1311 struct rte_mempool *mp) 1312 { 1313 struct priv *priv = dev->data->dev_private; 1314 struct mlx5_rxq_ctrl *tmpl; 1315 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 1316 unsigned int mprq_stride_size; 1317 struct mlx5_dev_config *config = &priv->config; 1318 /* 1319 * Always allocate extra slots, even if eventually 1320 * the vector Rx will not be used. 1321 */ 1322 uint16_t desc_n = 1323 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1324 uint64_t offloads = conf->offloads | 1325 dev->data->dev_conf.rxmode.offloads; 1326 const int mprq_en = mlx5_check_mprq_support(dev) > 0; 1327 1328 tmpl = rte_calloc_socket("RXQ", 1, 1329 sizeof(*tmpl) + 1330 desc_n * sizeof(struct rte_mbuf *), 1331 0, socket); 1332 if (!tmpl) { 1333 rte_errno = ENOMEM; 1334 return NULL; 1335 } 1336 if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh, 1337 MLX5_MR_BTREE_CACHE_N, socket)) { 1338 /* rte_errno is already set. */ 1339 goto error; 1340 } 1341 tmpl->socket = socket; 1342 if (dev->data->dev_conf.intr_conf.rxq) 1343 tmpl->irq = 1; 1344 /* 1345 * This Rx queue can be configured as a Multi-Packet RQ if all of the 1346 * following conditions are met: 1347 * - MPRQ is enabled. 1348 * - The number of descs is more than the number of strides. 1349 * - max_rx_pkt_len plus overhead is less than the max size of a 1350 * stride. 1351 * Otherwise, enable Rx scatter if necessary. 1352 */ 1353 assert(mb_len >= RTE_PKTMBUF_HEADROOM); 1354 mprq_stride_size = 1355 dev->data->dev_conf.rxmode.max_rx_pkt_len + 1356 sizeof(struct rte_mbuf_ext_shared_info) + 1357 RTE_PKTMBUF_HEADROOM; 1358 if (mprq_en && 1359 desc > (1U << config->mprq.stride_num_n) && 1360 mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) { 1361 /* TODO: Rx scatter isn't supported yet. */ 1362 tmpl->rxq.sges_n = 0; 1363 /* Trim the number of descs needed. */ 1364 desc >>= config->mprq.stride_num_n; 1365 tmpl->rxq.strd_num_n = config->mprq.stride_num_n; 1366 tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size), 1367 config->mprq.min_stride_size_n); 1368 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1369 tmpl->rxq.mprq_max_memcpy_len = 1370 RTE_MIN(mb_len - RTE_PKTMBUF_HEADROOM, 1371 config->mprq.max_memcpy_len); 1372 DRV_LOG(DEBUG, 1373 "port %u Rx queue %u: Multi-Packet RQ is enabled" 1374 " strd_num_n = %u, strd_sz_n = %u", 1375 dev->data->port_id, idx, 1376 tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n); 1377 } else if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= 1378 (mb_len - RTE_PKTMBUF_HEADROOM)) { 1379 tmpl->rxq.sges_n = 0; 1380 } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { 1381 unsigned int size = 1382 RTE_PKTMBUF_HEADROOM + 1383 dev->data->dev_conf.rxmode.max_rx_pkt_len; 1384 unsigned int sges_n; 1385 1386 /* 1387 * Determine the number of SGEs needed for a full packet 1388 * and round it to the next power of two. 1389 */ 1390 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 1391 tmpl->rxq.sges_n = sges_n; 1392 /* Make sure rxq.sges_n did not overflow. */ 1393 size = mb_len * (1 << tmpl->rxq.sges_n); 1394 size -= RTE_PKTMBUF_HEADROOM; 1395 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { 1396 DRV_LOG(ERR, 1397 "port %u too many SGEs (%u) needed to handle" 1398 " requested maximum packet size %u", 1399 dev->data->port_id, 1400 1 << sges_n, 1401 dev->data->dev_conf.rxmode.max_rx_pkt_len); 1402 rte_errno = EOVERFLOW; 1403 goto error; 1404 } 1405 } else { 1406 DRV_LOG(WARNING, 1407 "port %u the requested maximum Rx packet size (%u) is" 1408 " larger than a single mbuf (%u) and scattered mode has" 1409 " not been requested", 1410 dev->data->port_id, 1411 dev->data->dev_conf.rxmode.max_rx_pkt_len, 1412 mb_len - RTE_PKTMBUF_HEADROOM); 1413 } 1414 if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) 1415 DRV_LOG(WARNING, 1416 "port %u MPRQ is requested but cannot be enabled" 1417 " (requested: desc = %u, stride_sz = %u," 1418 " supported: min_stride_num = %u, max_stride_sz = %u).", 1419 dev->data->port_id, desc, mprq_stride_size, 1420 (1 << config->mprq.stride_num_n), 1421 (1 << config->mprq.max_stride_size_n)); 1422 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1423 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1424 if (desc % (1 << tmpl->rxq.sges_n)) { 1425 DRV_LOG(ERR, 1426 "port %u number of Rx queue descriptors (%u) is not a" 1427 " multiple of SGEs per packet (%u)", 1428 dev->data->port_id, 1429 desc, 1430 1 << tmpl->rxq.sges_n); 1431 rte_errno = EINVAL; 1432 goto error; 1433 } 1434 /* Toggle RX checksum offload if hardware supports it. */ 1435 tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM); 1436 tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP); 1437 /* Configure VLAN stripping. */ 1438 tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1439 /* By default, FCS (CRC) is stripped by hardware. */ 1440 tmpl->rxq.crc_present = 0; 1441 if (rte_eth_dev_must_keep_crc(offloads)) { 1442 if (config->hw_fcs_strip) { 1443 tmpl->rxq.crc_present = 1; 1444 } else { 1445 DRV_LOG(WARNING, 1446 "port %u CRC stripping has been disabled but will" 1447 " still be performed by hardware, make sure MLNX_OFED" 1448 " and firmware are up to date", 1449 dev->data->port_id); 1450 } 1451 } 1452 DRV_LOG(DEBUG, 1453 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1454 " incoming frames to hide it", 1455 dev->data->port_id, 1456 tmpl->rxq.crc_present ? "disabled" : "enabled", 1457 tmpl->rxq.crc_present << 2); 1458 /* Save port ID. */ 1459 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1460 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); 1461 tmpl->rxq.port_id = dev->data->port_id; 1462 tmpl->priv = priv; 1463 tmpl->rxq.mp = mp; 1464 tmpl->rxq.stats.idx = idx; 1465 tmpl->rxq.elts_n = log2above(desc); 1466 tmpl->rxq.elts = 1467 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); 1468 #ifndef RTE_ARCH_64 1469 tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq; 1470 #endif 1471 tmpl->idx = idx; 1472 rte_atomic32_inc(&tmpl->refcnt); 1473 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1474 return tmpl; 1475 error: 1476 rte_free(tmpl); 1477 return NULL; 1478 } 1479 1480 /** 1481 * Get a Rx queue. 1482 * 1483 * @param dev 1484 * Pointer to Ethernet device. 1485 * @param idx 1486 * TX queue index. 1487 * 1488 * @return 1489 * A pointer to the queue if it exists, NULL otherwise. 1490 */ 1491 struct mlx5_rxq_ctrl * 1492 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 1493 { 1494 struct priv *priv = dev->data->dev_private; 1495 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 1496 1497 if ((*priv->rxqs)[idx]) { 1498 rxq_ctrl = container_of((*priv->rxqs)[idx], 1499 struct mlx5_rxq_ctrl, 1500 rxq); 1501 mlx5_rxq_ibv_get(dev, idx); 1502 rte_atomic32_inc(&rxq_ctrl->refcnt); 1503 } 1504 return rxq_ctrl; 1505 } 1506 1507 /** 1508 * Release a Rx queue. 1509 * 1510 * @param dev 1511 * Pointer to Ethernet device. 1512 * @param idx 1513 * TX queue index. 1514 * 1515 * @return 1516 * 1 while a reference on it exists, 0 when freed. 1517 */ 1518 int 1519 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 1520 { 1521 struct priv *priv = dev->data->dev_private; 1522 struct mlx5_rxq_ctrl *rxq_ctrl; 1523 1524 if (!(*priv->rxqs)[idx]) 1525 return 0; 1526 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1527 assert(rxq_ctrl->priv); 1528 if (rxq_ctrl->ibv && !mlx5_rxq_ibv_release(rxq_ctrl->ibv)) 1529 rxq_ctrl->ibv = NULL; 1530 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { 1531 mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); 1532 LIST_REMOVE(rxq_ctrl, next); 1533 rte_free(rxq_ctrl); 1534 (*priv->rxqs)[idx] = NULL; 1535 return 0; 1536 } 1537 return 1; 1538 } 1539 1540 /** 1541 * Verify if the queue can be released. 1542 * 1543 * @param dev 1544 * Pointer to Ethernet device. 1545 * @param idx 1546 * TX queue index. 1547 * 1548 * @return 1549 * 1 if the queue can be released, negative errno otherwise and rte_errno is 1550 * set. 1551 */ 1552 int 1553 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 1554 { 1555 struct priv *priv = dev->data->dev_private; 1556 struct mlx5_rxq_ctrl *rxq_ctrl; 1557 1558 if (!(*priv->rxqs)[idx]) { 1559 rte_errno = EINVAL; 1560 return -rte_errno; 1561 } 1562 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1563 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); 1564 } 1565 1566 /** 1567 * Verify the Rx Queue list is empty 1568 * 1569 * @param dev 1570 * Pointer to Ethernet device. 1571 * 1572 * @return 1573 * The number of object not released. 1574 */ 1575 int 1576 mlx5_rxq_verify(struct rte_eth_dev *dev) 1577 { 1578 struct priv *priv = dev->data->dev_private; 1579 struct mlx5_rxq_ctrl *rxq_ctrl; 1580 int ret = 0; 1581 1582 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 1583 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 1584 dev->data->port_id, rxq_ctrl->idx); 1585 ++ret; 1586 } 1587 return ret; 1588 } 1589 1590 /** 1591 * Create an indirection table. 1592 * 1593 * @param dev 1594 * Pointer to Ethernet device. 1595 * @param queues 1596 * Queues entering in the indirection table. 1597 * @param queues_n 1598 * Number of queues in the array. 1599 * 1600 * @return 1601 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1602 */ 1603 struct mlx5_ind_table_ibv * 1604 mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, const uint16_t *queues, 1605 uint32_t queues_n) 1606 { 1607 struct priv *priv = dev->data->dev_private; 1608 struct mlx5_ind_table_ibv *ind_tbl; 1609 const unsigned int wq_n = rte_is_power_of_2(queues_n) ? 1610 log2above(queues_n) : 1611 log2above(priv->config.ind_table_max_size); 1612 struct ibv_wq *wq[1 << wq_n]; 1613 unsigned int i; 1614 unsigned int j; 1615 1616 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) + 1617 queues_n * sizeof(uint16_t), 0); 1618 if (!ind_tbl) { 1619 rte_errno = ENOMEM; 1620 return NULL; 1621 } 1622 for (i = 0; i != queues_n; ++i) { 1623 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]); 1624 1625 if (!rxq) 1626 goto error; 1627 wq[i] = rxq->ibv->wq; 1628 ind_tbl->queues[i] = queues[i]; 1629 } 1630 ind_tbl->queues_n = queues_n; 1631 /* Finalise indirection table. */ 1632 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j) 1633 wq[i] = wq[j]; 1634 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table 1635 (priv->ctx, 1636 &(struct ibv_rwq_ind_table_init_attr){ 1637 .log_ind_tbl_size = wq_n, 1638 .ind_tbl = wq, 1639 .comp_mask = 0, 1640 }); 1641 if (!ind_tbl->ind_table) { 1642 rte_errno = errno; 1643 goto error; 1644 } 1645 rte_atomic32_inc(&ind_tbl->refcnt); 1646 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 1647 return ind_tbl; 1648 error: 1649 rte_free(ind_tbl); 1650 DEBUG("port %u cannot create indirection table", dev->data->port_id); 1651 return NULL; 1652 } 1653 1654 /** 1655 * Get an indirection table. 1656 * 1657 * @param dev 1658 * Pointer to Ethernet device. 1659 * @param queues 1660 * Queues entering in the indirection table. 1661 * @param queues_n 1662 * Number of queues in the array. 1663 * 1664 * @return 1665 * An indirection table if found. 1666 */ 1667 struct mlx5_ind_table_ibv * 1668 mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, const uint16_t *queues, 1669 uint32_t queues_n) 1670 { 1671 struct priv *priv = dev->data->dev_private; 1672 struct mlx5_ind_table_ibv *ind_tbl; 1673 1674 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1675 if ((ind_tbl->queues_n == queues_n) && 1676 (memcmp(ind_tbl->queues, queues, 1677 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 1678 == 0)) 1679 break; 1680 } 1681 if (ind_tbl) { 1682 unsigned int i; 1683 1684 rte_atomic32_inc(&ind_tbl->refcnt); 1685 for (i = 0; i != ind_tbl->queues_n; ++i) 1686 mlx5_rxq_get(dev, ind_tbl->queues[i]); 1687 } 1688 return ind_tbl; 1689 } 1690 1691 /** 1692 * Release an indirection table. 1693 * 1694 * @param dev 1695 * Pointer to Ethernet device. 1696 * @param ind_table 1697 * Indirection table to release. 1698 * 1699 * @return 1700 * 1 while a reference on it exists, 0 when freed. 1701 */ 1702 int 1703 mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, 1704 struct mlx5_ind_table_ibv *ind_tbl) 1705 { 1706 unsigned int i; 1707 1708 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) 1709 claim_zero(mlx5_glue->destroy_rwq_ind_table 1710 (ind_tbl->ind_table)); 1711 for (i = 0; i != ind_tbl->queues_n; ++i) 1712 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i])); 1713 if (!rte_atomic32_read(&ind_tbl->refcnt)) { 1714 LIST_REMOVE(ind_tbl, next); 1715 rte_free(ind_tbl); 1716 return 0; 1717 } 1718 return 1; 1719 } 1720 1721 /** 1722 * Verify the Rx Queue list is empty 1723 * 1724 * @param dev 1725 * Pointer to Ethernet device. 1726 * 1727 * @return 1728 * The number of object not released. 1729 */ 1730 int 1731 mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev) 1732 { 1733 struct priv *priv = dev->data->dev_private; 1734 struct mlx5_ind_table_ibv *ind_tbl; 1735 int ret = 0; 1736 1737 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1738 DRV_LOG(DEBUG, 1739 "port %u Verbs indirection table %p still referenced", 1740 dev->data->port_id, (void *)ind_tbl); 1741 ++ret; 1742 } 1743 return ret; 1744 } 1745 1746 /** 1747 * Create an Rx Hash queue. 1748 * 1749 * @param dev 1750 * Pointer to Ethernet device. 1751 * @param rss_key 1752 * RSS key for the Rx hash queue. 1753 * @param rss_key_len 1754 * RSS key length. 1755 * @param hash_fields 1756 * Verbs protocol hash field to make the RSS on. 1757 * @param queues 1758 * Queues entering in hash queue. In case of empty hash_fields only the 1759 * first queue index will be taken for the indirection table. 1760 * @param queues_n 1761 * Number of queues. 1762 * 1763 * @return 1764 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1765 */ 1766 struct mlx5_hrxq * 1767 mlx5_hrxq_new(struct rte_eth_dev *dev, 1768 const uint8_t *rss_key, uint32_t rss_key_len, 1769 uint64_t hash_fields, 1770 const uint16_t *queues, uint32_t queues_n, 1771 int tunnel __rte_unused) 1772 { 1773 struct priv *priv = dev->data->dev_private; 1774 struct mlx5_hrxq *hrxq; 1775 struct mlx5_ind_table_ibv *ind_tbl; 1776 struct ibv_qp *qp; 1777 int err; 1778 1779 queues_n = hash_fields ? queues_n : 1; 1780 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1781 if (!ind_tbl) 1782 ind_tbl = mlx5_ind_table_ibv_new(dev, queues, queues_n); 1783 if (!ind_tbl) { 1784 rte_errno = ENOMEM; 1785 return NULL; 1786 } 1787 if (!rss_key_len) { 1788 rss_key_len = MLX5_RSS_HASH_KEY_LEN; 1789 rss_key = rss_hash_default_key; 1790 } 1791 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1792 qp = mlx5_glue->dv_create_qp 1793 (priv->ctx, 1794 &(struct ibv_qp_init_attr_ex){ 1795 .qp_type = IBV_QPT_RAW_PACKET, 1796 .comp_mask = 1797 IBV_QP_INIT_ATTR_PD | 1798 IBV_QP_INIT_ATTR_IND_TABLE | 1799 IBV_QP_INIT_ATTR_RX_HASH, 1800 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1801 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1802 .rx_hash_key_len = rss_key_len ? rss_key_len : 1803 MLX5_RSS_HASH_KEY_LEN, 1804 .rx_hash_key = rss_key ? 1805 (void *)(uintptr_t)rss_key : 1806 rss_hash_default_key, 1807 .rx_hash_fields_mask = hash_fields, 1808 }, 1809 .rwq_ind_tbl = ind_tbl->ind_table, 1810 .pd = priv->pd, 1811 }, 1812 &(struct mlx5dv_qp_init_attr){ 1813 .comp_mask = tunnel ? 1814 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS : 0, 1815 .create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS, 1816 }); 1817 #else 1818 qp = mlx5_glue->create_qp_ex 1819 (priv->ctx, 1820 &(struct ibv_qp_init_attr_ex){ 1821 .qp_type = IBV_QPT_RAW_PACKET, 1822 .comp_mask = 1823 IBV_QP_INIT_ATTR_PD | 1824 IBV_QP_INIT_ATTR_IND_TABLE | 1825 IBV_QP_INIT_ATTR_RX_HASH, 1826 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1827 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1828 .rx_hash_key_len = rss_key_len ? rss_key_len : 1829 MLX5_RSS_HASH_KEY_LEN, 1830 .rx_hash_key = rss_key ? 1831 (void *)(uintptr_t)rss_key : 1832 rss_hash_default_key, 1833 .rx_hash_fields_mask = hash_fields, 1834 }, 1835 .rwq_ind_tbl = ind_tbl->ind_table, 1836 .pd = priv->pd, 1837 }); 1838 #endif 1839 if (!qp) { 1840 rte_errno = errno; 1841 goto error; 1842 } 1843 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); 1844 if (!hrxq) 1845 goto error; 1846 hrxq->ind_table = ind_tbl; 1847 hrxq->qp = qp; 1848 hrxq->rss_key_len = rss_key_len; 1849 hrxq->hash_fields = hash_fields; 1850 memcpy(hrxq->rss_key, rss_key, rss_key_len); 1851 rte_atomic32_inc(&hrxq->refcnt); 1852 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); 1853 return hrxq; 1854 error: 1855 err = rte_errno; /* Save rte_errno before cleanup. */ 1856 mlx5_ind_table_ibv_release(dev, ind_tbl); 1857 if (qp) 1858 claim_zero(mlx5_glue->destroy_qp(qp)); 1859 rte_errno = err; /* Restore rte_errno. */ 1860 return NULL; 1861 } 1862 1863 /** 1864 * Get an Rx Hash queue. 1865 * 1866 * @param dev 1867 * Pointer to Ethernet device. 1868 * @param rss_conf 1869 * RSS configuration for the Rx hash queue. 1870 * @param queues 1871 * Queues entering in hash queue. In case of empty hash_fields only the 1872 * first queue index will be taken for the indirection table. 1873 * @param queues_n 1874 * Number of queues. 1875 * 1876 * @return 1877 * An hash Rx queue on success. 1878 */ 1879 struct mlx5_hrxq * 1880 mlx5_hrxq_get(struct rte_eth_dev *dev, 1881 const uint8_t *rss_key, uint32_t rss_key_len, 1882 uint64_t hash_fields, 1883 const uint16_t *queues, uint32_t queues_n) 1884 { 1885 struct priv *priv = dev->data->dev_private; 1886 struct mlx5_hrxq *hrxq; 1887 1888 queues_n = hash_fields ? queues_n : 1; 1889 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1890 struct mlx5_ind_table_ibv *ind_tbl; 1891 1892 if (hrxq->rss_key_len != rss_key_len) 1893 continue; 1894 if (memcmp(hrxq->rss_key, rss_key, rss_key_len)) 1895 continue; 1896 if (hrxq->hash_fields != hash_fields) 1897 continue; 1898 ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); 1899 if (!ind_tbl) 1900 continue; 1901 if (ind_tbl != hrxq->ind_table) { 1902 mlx5_ind_table_ibv_release(dev, ind_tbl); 1903 continue; 1904 } 1905 rte_atomic32_inc(&hrxq->refcnt); 1906 return hrxq; 1907 } 1908 return NULL; 1909 } 1910 1911 /** 1912 * Release the hash Rx queue. 1913 * 1914 * @param dev 1915 * Pointer to Ethernet device. 1916 * @param hrxq 1917 * Pointer to Hash Rx queue to release. 1918 * 1919 * @return 1920 * 1 while a reference on it exists, 0 when freed. 1921 */ 1922 int 1923 mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 1924 { 1925 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 1926 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 1927 mlx5_ind_table_ibv_release(dev, hrxq->ind_table); 1928 LIST_REMOVE(hrxq, next); 1929 rte_free(hrxq); 1930 return 0; 1931 } 1932 claim_nonzero(mlx5_ind_table_ibv_release(dev, hrxq->ind_table)); 1933 return 1; 1934 } 1935 1936 /** 1937 * Verify the Rx Queue list is empty 1938 * 1939 * @param dev 1940 * Pointer to Ethernet device. 1941 * 1942 * @return 1943 * The number of object not released. 1944 */ 1945 int 1946 mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev) 1947 { 1948 struct priv *priv = dev->data->dev_private; 1949 struct mlx5_hrxq *hrxq; 1950 int ret = 0; 1951 1952 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1953 DRV_LOG(DEBUG, 1954 "port %u Verbs hash Rx queue %p still referenced", 1955 dev->data->port_id, (void *)hrxq); 1956 ++ret; 1957 } 1958 return ret; 1959 } 1960 1961 /** 1962 * Create a drop Rx queue Verbs object. 1963 * 1964 * @param dev 1965 * Pointer to Ethernet device. 1966 * 1967 * @return 1968 * The Verbs object initialised, NULL otherwise and rte_errno is set. 1969 */ 1970 struct mlx5_rxq_ibv * 1971 mlx5_rxq_ibv_drop_new(struct rte_eth_dev *dev) 1972 { 1973 struct priv *priv = dev->data->dev_private; 1974 struct ibv_cq *cq; 1975 struct ibv_wq *wq = NULL; 1976 struct mlx5_rxq_ibv *rxq; 1977 1978 if (priv->drop_queue.rxq) 1979 return priv->drop_queue.rxq; 1980 cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); 1981 if (!cq) { 1982 DEBUG("port %u cannot allocate CQ for drop queue", 1983 dev->data->port_id); 1984 rte_errno = errno; 1985 goto error; 1986 } 1987 wq = mlx5_glue->create_wq(priv->ctx, 1988 &(struct ibv_wq_init_attr){ 1989 .wq_type = IBV_WQT_RQ, 1990 .max_wr = 1, 1991 .max_sge = 1, 1992 .pd = priv->pd, 1993 .cq = cq, 1994 }); 1995 if (!wq) { 1996 DEBUG("port %u cannot allocate WQ for drop queue", 1997 dev->data->port_id); 1998 rte_errno = errno; 1999 goto error; 2000 } 2001 rxq = rte_calloc(__func__, 1, sizeof(*rxq), 0); 2002 if (!rxq) { 2003 DEBUG("port %u cannot allocate drop Rx queue memory", 2004 dev->data->port_id); 2005 rte_errno = ENOMEM; 2006 goto error; 2007 } 2008 rxq->cq = cq; 2009 rxq->wq = wq; 2010 priv->drop_queue.rxq = rxq; 2011 return rxq; 2012 error: 2013 if (wq) 2014 claim_zero(mlx5_glue->destroy_wq(wq)); 2015 if (cq) 2016 claim_zero(mlx5_glue->destroy_cq(cq)); 2017 return NULL; 2018 } 2019 2020 /** 2021 * Release a drop Rx queue Verbs object. 2022 * 2023 * @param dev 2024 * Pointer to Ethernet device. 2025 * 2026 * @return 2027 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2028 */ 2029 void 2030 mlx5_rxq_ibv_drop_release(struct rte_eth_dev *dev) 2031 { 2032 struct priv *priv = dev->data->dev_private; 2033 struct mlx5_rxq_ibv *rxq = priv->drop_queue.rxq; 2034 2035 if (rxq->wq) 2036 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 2037 if (rxq->cq) 2038 claim_zero(mlx5_glue->destroy_cq(rxq->cq)); 2039 rte_free(rxq); 2040 priv->drop_queue.rxq = NULL; 2041 } 2042 2043 /** 2044 * Create a drop indirection table. 2045 * 2046 * @param dev 2047 * Pointer to Ethernet device. 2048 * 2049 * @return 2050 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2051 */ 2052 struct mlx5_ind_table_ibv * 2053 mlx5_ind_table_ibv_drop_new(struct rte_eth_dev *dev) 2054 { 2055 struct priv *priv = dev->data->dev_private; 2056 struct mlx5_ind_table_ibv *ind_tbl; 2057 struct mlx5_rxq_ibv *rxq; 2058 struct mlx5_ind_table_ibv tmpl; 2059 2060 rxq = mlx5_rxq_ibv_drop_new(dev); 2061 if (!rxq) 2062 return NULL; 2063 tmpl.ind_table = mlx5_glue->create_rwq_ind_table 2064 (priv->ctx, 2065 &(struct ibv_rwq_ind_table_init_attr){ 2066 .log_ind_tbl_size = 0, 2067 .ind_tbl = &rxq->wq, 2068 .comp_mask = 0, 2069 }); 2070 if (!tmpl.ind_table) { 2071 DEBUG("port %u cannot allocate indirection table for drop" 2072 " queue", 2073 dev->data->port_id); 2074 rte_errno = errno; 2075 goto error; 2076 } 2077 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl), 0); 2078 if (!ind_tbl) { 2079 rte_errno = ENOMEM; 2080 goto error; 2081 } 2082 ind_tbl->ind_table = tmpl.ind_table; 2083 return ind_tbl; 2084 error: 2085 mlx5_rxq_ibv_drop_release(dev); 2086 return NULL; 2087 } 2088 2089 /** 2090 * Release a drop indirection table. 2091 * 2092 * @param dev 2093 * Pointer to Ethernet device. 2094 */ 2095 void 2096 mlx5_ind_table_ibv_drop_release(struct rte_eth_dev *dev) 2097 { 2098 struct priv *priv = dev->data->dev_private; 2099 struct mlx5_ind_table_ibv *ind_tbl = priv->drop_queue.hrxq->ind_table; 2100 2101 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 2102 mlx5_rxq_ibv_drop_release(dev); 2103 rte_free(ind_tbl); 2104 priv->drop_queue.hrxq->ind_table = NULL; 2105 } 2106 2107 /** 2108 * Create a drop Rx Hash queue. 2109 * 2110 * @param dev 2111 * Pointer to Ethernet device. 2112 * 2113 * @return 2114 * The Verbs object initialised, NULL otherwise and rte_errno is set. 2115 */ 2116 struct mlx5_hrxq * 2117 mlx5_hrxq_drop_new(struct rte_eth_dev *dev) 2118 { 2119 struct priv *priv = dev->data->dev_private; 2120 struct mlx5_ind_table_ibv *ind_tbl; 2121 struct ibv_qp *qp; 2122 struct mlx5_hrxq *hrxq; 2123 2124 if (priv->drop_queue.hrxq) { 2125 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt); 2126 return priv->drop_queue.hrxq; 2127 } 2128 ind_tbl = mlx5_ind_table_ibv_drop_new(dev); 2129 if (!ind_tbl) 2130 return NULL; 2131 qp = mlx5_glue->create_qp_ex(priv->ctx, 2132 &(struct ibv_qp_init_attr_ex){ 2133 .qp_type = IBV_QPT_RAW_PACKET, 2134 .comp_mask = 2135 IBV_QP_INIT_ATTR_PD | 2136 IBV_QP_INIT_ATTR_IND_TABLE | 2137 IBV_QP_INIT_ATTR_RX_HASH, 2138 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2139 .rx_hash_function = 2140 IBV_RX_HASH_FUNC_TOEPLITZ, 2141 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 2142 .rx_hash_key = rss_hash_default_key, 2143 .rx_hash_fields_mask = 0, 2144 }, 2145 .rwq_ind_tbl = ind_tbl->ind_table, 2146 .pd = priv->pd 2147 }); 2148 if (!qp) { 2149 DEBUG("port %u cannot allocate QP for drop queue", 2150 dev->data->port_id); 2151 rte_errno = errno; 2152 goto error; 2153 } 2154 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq), 0); 2155 if (!hrxq) { 2156 DRV_LOG(WARNING, 2157 "port %u cannot allocate memory for drop queue", 2158 dev->data->port_id); 2159 rte_errno = ENOMEM; 2160 goto error; 2161 } 2162 hrxq->ind_table = ind_tbl; 2163 hrxq->qp = qp; 2164 priv->drop_queue.hrxq = hrxq; 2165 rte_atomic32_set(&hrxq->refcnt, 1); 2166 return hrxq; 2167 error: 2168 if (ind_tbl) 2169 mlx5_ind_table_ibv_drop_release(dev); 2170 return NULL; 2171 } 2172 2173 /** 2174 * Release a drop hash Rx queue. 2175 * 2176 * @param dev 2177 * Pointer to Ethernet device. 2178 */ 2179 void 2180 mlx5_hrxq_drop_release(struct rte_eth_dev *dev) 2181 { 2182 struct priv *priv = dev->data->dev_private; 2183 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 2184 2185 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 2186 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 2187 mlx5_ind_table_ibv_drop_release(dev); 2188 rte_free(hrxq); 2189 priv->drop_queue.hrxq = NULL; 2190 } 2191 } 2192