1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <stdint.h> 11 #include <fcntl.h> 12 #include <sys/queue.h> 13 14 /* Verbs header. */ 15 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic ignored "-Wpedantic" 18 #endif 19 #include <infiniband/verbs.h> 20 #include <infiniband/mlx5dv.h> 21 #ifdef PEDANTIC 22 #pragma GCC diagnostic error "-Wpedantic" 23 #endif 24 25 #include <rte_mbuf.h> 26 #include <rte_malloc.h> 27 #include <rte_ethdev_driver.h> 28 #include <rte_common.h> 29 #include <rte_interrupts.h> 30 #include <rte_debug.h> 31 #include <rte_io.h> 32 33 #include "mlx5.h" 34 #include "mlx5_rxtx.h" 35 #include "mlx5_utils.h" 36 #include "mlx5_autoconf.h" 37 #include "mlx5_defs.h" 38 #include "mlx5_glue.h" 39 40 /* Default RSS hash key also used for ConnectX-3. */ 41 uint8_t rss_hash_default_key[] = { 42 0x2c, 0xc6, 0x81, 0xd1, 43 0x5b, 0xdb, 0xf4, 0xf7, 44 0xfc, 0xa2, 0x83, 0x19, 45 0xdb, 0x1a, 0x3e, 0x94, 46 0x6b, 0x9e, 0x38, 0xd9, 47 0x2c, 0x9c, 0x03, 0xd1, 48 0xad, 0x99, 0x44, 0xa7, 49 0xd9, 0x56, 0x3d, 0x59, 50 0x06, 0x3c, 0x25, 0xf3, 51 0xfc, 0x1f, 0xdc, 0x2a, 52 }; 53 54 /* Length of the default RSS hash key. */ 55 static_assert(MLX5_RSS_HASH_KEY_LEN == 56 (unsigned int)sizeof(rss_hash_default_key), 57 "wrong RSS default key size."); 58 59 /** 60 * Check whether Multi-Packet RQ can be enabled for the device. 61 * 62 * @param dev 63 * Pointer to Ethernet device. 64 * 65 * @return 66 * 1 if supported, negative errno value if not. 67 */ 68 inline int 69 mlx5_check_mprq_support(struct rte_eth_dev *dev) 70 { 71 struct mlx5_priv *priv = dev->data->dev_private; 72 73 if (priv->config.mprq.enabled && 74 priv->rxqs_n >= priv->config.mprq.min_rxqs_num) 75 return 1; 76 return -ENOTSUP; 77 } 78 79 /** 80 * Check whether Multi-Packet RQ is enabled for the Rx queue. 81 * 82 * @param rxq 83 * Pointer to receive queue structure. 84 * 85 * @return 86 * 0 if disabled, otherwise enabled. 87 */ 88 inline int 89 mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) 90 { 91 return rxq->strd_num_n > 0; 92 } 93 94 /** 95 * Check whether Multi-Packet RQ is enabled for the device. 96 * 97 * @param dev 98 * Pointer to Ethernet device. 99 * 100 * @return 101 * 0 if disabled, otherwise enabled. 102 */ 103 inline int 104 mlx5_mprq_enabled(struct rte_eth_dev *dev) 105 { 106 struct mlx5_priv *priv = dev->data->dev_private; 107 uint16_t i; 108 uint16_t n = 0; 109 110 if (mlx5_check_mprq_support(dev) < 0) 111 return 0; 112 /* All the configured queues should be enabled. */ 113 for (i = 0; i < priv->rxqs_n; ++i) { 114 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 115 116 if (!rxq) 117 continue; 118 if (mlx5_rxq_mprq_enabled(rxq)) 119 ++n; 120 } 121 /* Multi-Packet RQ can't be partially configured. */ 122 assert(n == 0 || n == priv->rxqs_n); 123 return n == priv->rxqs_n; 124 } 125 126 /** 127 * Allocate RX queue elements for Multi-Packet RQ. 128 * 129 * @param rxq_ctrl 130 * Pointer to RX queue structure. 131 * 132 * @return 133 * 0 on success, a negative errno value otherwise and rte_errno is set. 134 */ 135 static int 136 rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 137 { 138 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 139 unsigned int wqe_n = 1 << rxq->elts_n; 140 unsigned int i; 141 int err; 142 143 /* Iterate on segments. */ 144 for (i = 0; i <= wqe_n; ++i) { 145 struct mlx5_mprq_buf *buf; 146 147 if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) { 148 DRV_LOG(ERR, "port %u empty mbuf pool", rxq->port_id); 149 rte_errno = ENOMEM; 150 goto error; 151 } 152 if (i < wqe_n) 153 (*rxq->mprq_bufs)[i] = buf; 154 else 155 rxq->mprq_repl = buf; 156 } 157 DRV_LOG(DEBUG, 158 "port %u Rx queue %u allocated and configured %u segments", 159 rxq->port_id, rxq->idx, wqe_n); 160 return 0; 161 error: 162 err = rte_errno; /* Save rte_errno before cleanup. */ 163 wqe_n = i; 164 for (i = 0; (i != wqe_n); ++i) { 165 if ((*rxq->mprq_bufs)[i] != NULL) 166 rte_mempool_put(rxq->mprq_mp, 167 (*rxq->mprq_bufs)[i]); 168 (*rxq->mprq_bufs)[i] = NULL; 169 } 170 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 171 rxq->port_id, rxq->idx); 172 rte_errno = err; /* Restore rte_errno. */ 173 return -rte_errno; 174 } 175 176 /** 177 * Allocate RX queue elements for Single-Packet RQ. 178 * 179 * @param rxq_ctrl 180 * Pointer to RX queue structure. 181 * 182 * @return 183 * 0 on success, errno value on failure. 184 */ 185 static int 186 rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 187 { 188 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 189 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 190 unsigned int i; 191 int err; 192 193 /* Iterate on segments. */ 194 for (i = 0; (i != elts_n); ++i) { 195 struct rte_mbuf *buf; 196 197 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 198 if (buf == NULL) { 199 DRV_LOG(ERR, "port %u empty mbuf pool", 200 PORT_ID(rxq_ctrl->priv)); 201 rte_errno = ENOMEM; 202 goto error; 203 } 204 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 205 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 206 /* Buffer is supposed to be empty. */ 207 assert(rte_pktmbuf_data_len(buf) == 0); 208 assert(rte_pktmbuf_pkt_len(buf) == 0); 209 assert(!buf->next); 210 /* Only the first segment keeps headroom. */ 211 if (i % sges_n) 212 SET_DATA_OFF(buf, 0); 213 PORT(buf) = rxq_ctrl->rxq.port_id; 214 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 215 PKT_LEN(buf) = DATA_LEN(buf); 216 NB_SEGS(buf) = 1; 217 (*rxq_ctrl->rxq.elts)[i] = buf; 218 } 219 /* If Rx vector is activated. */ 220 if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { 221 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 222 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 223 int j; 224 225 /* Initialize default rearm_data for vPMD. */ 226 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 227 rte_mbuf_refcnt_set(mbuf_init, 1); 228 mbuf_init->nb_segs = 1; 229 mbuf_init->port = rxq->port_id; 230 /* 231 * prevent compiler reordering: 232 * rearm_data covers previous fields. 233 */ 234 rte_compiler_barrier(); 235 rxq->mbuf_initializer = 236 *(uint64_t *)&mbuf_init->rearm_data; 237 /* Padding with a fake mbuf for vectorized Rx. */ 238 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 239 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 240 } 241 DRV_LOG(DEBUG, 242 "port %u Rx queue %u allocated and configured %u segments" 243 " (max %u packets)", 244 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n, 245 elts_n / (1 << rxq_ctrl->rxq.sges_n)); 246 return 0; 247 error: 248 err = rte_errno; /* Save rte_errno before cleanup. */ 249 elts_n = i; 250 for (i = 0; (i != elts_n); ++i) { 251 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 252 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 253 (*rxq_ctrl->rxq.elts)[i] = NULL; 254 } 255 DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", 256 PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx); 257 rte_errno = err; /* Restore rte_errno. */ 258 return -rte_errno; 259 } 260 261 /** 262 * Allocate RX queue elements. 263 * 264 * @param rxq_ctrl 265 * Pointer to RX queue structure. 266 * 267 * @return 268 * 0 on success, errno value on failure. 269 */ 270 int 271 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 272 { 273 return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 274 rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl); 275 } 276 277 /** 278 * Free RX queue elements for Multi-Packet RQ. 279 * 280 * @param rxq_ctrl 281 * Pointer to RX queue structure. 282 */ 283 static void 284 rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) 285 { 286 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 287 uint16_t i; 288 289 DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs", 290 rxq->port_id, rxq->idx); 291 if (rxq->mprq_bufs == NULL) 292 return; 293 assert(mlx5_rxq_check_vec_support(rxq) < 0); 294 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 295 if ((*rxq->mprq_bufs)[i] != NULL) 296 mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]); 297 (*rxq->mprq_bufs)[i] = NULL; 298 } 299 if (rxq->mprq_repl != NULL) { 300 mlx5_mprq_buf_free(rxq->mprq_repl); 301 rxq->mprq_repl = NULL; 302 } 303 } 304 305 /** 306 * Free RX queue elements for Single-Packet RQ. 307 * 308 * @param rxq_ctrl 309 * Pointer to RX queue structure. 310 */ 311 static void 312 rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) 313 { 314 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 315 const uint16_t q_n = (1 << rxq->elts_n); 316 const uint16_t q_mask = q_n - 1; 317 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); 318 uint16_t i; 319 320 DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs", 321 PORT_ID(rxq_ctrl->priv), rxq->idx); 322 if (rxq->elts == NULL) 323 return; 324 /** 325 * Some mbuf in the Ring belongs to the application. They cannot be 326 * freed. 327 */ 328 if (mlx5_rxq_check_vec_support(rxq) > 0) { 329 for (i = 0; i < used; ++i) 330 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; 331 rxq->rq_pi = rxq->rq_ci; 332 } 333 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 334 if ((*rxq->elts)[i] != NULL) 335 rte_pktmbuf_free_seg((*rxq->elts)[i]); 336 (*rxq->elts)[i] = NULL; 337 } 338 } 339 340 /** 341 * Free RX queue elements. 342 * 343 * @param rxq_ctrl 344 * Pointer to RX queue structure. 345 */ 346 static void 347 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 348 { 349 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 350 rxq_free_elts_mprq(rxq_ctrl); 351 else 352 rxq_free_elts_sprq(rxq_ctrl); 353 } 354 355 /** 356 * Returns the per-queue supported offloads. 357 * 358 * @param dev 359 * Pointer to Ethernet device. 360 * 361 * @return 362 * Supported Rx offloads. 363 */ 364 uint64_t 365 mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) 366 { 367 struct mlx5_priv *priv = dev->data->dev_private; 368 struct mlx5_dev_config *config = &priv->config; 369 uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | 370 DEV_RX_OFFLOAD_TIMESTAMP | 371 DEV_RX_OFFLOAD_JUMBO_FRAME); 372 373 if (config->hw_fcs_strip) 374 offloads |= DEV_RX_OFFLOAD_KEEP_CRC; 375 376 if (config->hw_csum) 377 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | 378 DEV_RX_OFFLOAD_UDP_CKSUM | 379 DEV_RX_OFFLOAD_TCP_CKSUM); 380 if (config->hw_vlan_strip) 381 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 382 if (MLX5_LRO_SUPPORTED(dev)) 383 offloads |= DEV_RX_OFFLOAD_TCP_LRO; 384 return offloads; 385 } 386 387 388 /** 389 * Returns the per-port supported offloads. 390 * 391 * @return 392 * Supported Rx offloads. 393 */ 394 uint64_t 395 mlx5_get_rx_port_offloads(void) 396 { 397 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 398 399 return offloads; 400 } 401 402 /** 403 * Verify if the queue can be released. 404 * 405 * @param dev 406 * Pointer to Ethernet device. 407 * @param idx 408 * RX queue index. 409 * 410 * @return 411 * 1 if the queue can be released 412 * 0 if the queue can not be released, there are references to it. 413 * Negative errno and rte_errno is set if queue doesn't exist. 414 */ 415 static int 416 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) 417 { 418 struct mlx5_priv *priv = dev->data->dev_private; 419 struct mlx5_rxq_ctrl *rxq_ctrl; 420 421 if (!(*priv->rxqs)[idx]) { 422 rte_errno = EINVAL; 423 return -rte_errno; 424 } 425 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 426 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); 427 } 428 429 /** 430 * 431 * @param dev 432 * Pointer to Ethernet device structure. 433 * @param idx 434 * RX queue index. 435 * @param desc 436 * Number of descriptors to configure in queue. 437 * @param socket 438 * NUMA socket on which memory must be allocated. 439 * @param[in] conf 440 * Thresholds parameters. 441 * @param mp 442 * Memory pool for buffer allocations. 443 * 444 * @return 445 * 0 on success, a negative errno value otherwise and rte_errno is set. 446 */ 447 int 448 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 449 unsigned int socket, const struct rte_eth_rxconf *conf, 450 struct rte_mempool *mp) 451 { 452 struct mlx5_priv *priv = dev->data->dev_private; 453 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 454 struct mlx5_rxq_ctrl *rxq_ctrl = 455 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 456 457 if (!rte_is_power_of_2(desc)) { 458 desc = 1 << log2above(desc); 459 DRV_LOG(WARNING, 460 "port %u increased number of descriptors in Rx queue %u" 461 " to the next power of two (%d)", 462 dev->data->port_id, idx, desc); 463 } 464 DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", 465 dev->data->port_id, idx, desc); 466 if (idx >= priv->rxqs_n) { 467 DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", 468 dev->data->port_id, idx, priv->rxqs_n); 469 rte_errno = EOVERFLOW; 470 return -rte_errno; 471 } 472 if (!mlx5_rxq_releasable(dev, idx)) { 473 DRV_LOG(ERR, "port %u unable to release queue index %u", 474 dev->data->port_id, idx); 475 rte_errno = EBUSY; 476 return -rte_errno; 477 } 478 mlx5_rxq_release(dev, idx); 479 rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, mp); 480 if (!rxq_ctrl) { 481 DRV_LOG(ERR, "port %u unable to allocate queue index %u", 482 dev->data->port_id, idx); 483 rte_errno = ENOMEM; 484 return -rte_errno; 485 } 486 DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", 487 dev->data->port_id, idx); 488 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 489 return 0; 490 } 491 492 /** 493 * DPDK callback to release a RX queue. 494 * 495 * @param dpdk_rxq 496 * Generic RX queue pointer. 497 */ 498 void 499 mlx5_rx_queue_release(void *dpdk_rxq) 500 { 501 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq; 502 struct mlx5_rxq_ctrl *rxq_ctrl; 503 struct mlx5_priv *priv; 504 505 if (rxq == NULL) 506 return; 507 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); 508 priv = rxq_ctrl->priv; 509 if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.idx)) 510 rte_panic("port %u Rx queue %u is still used by a flow and" 511 " cannot be removed\n", 512 PORT_ID(priv), rxq->idx); 513 mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.idx); 514 } 515 516 /** 517 * Get an Rx queue Verbs/DevX object. 518 * 519 * @param dev 520 * Pointer to Ethernet device. 521 * @param idx 522 * Queue index in DPDK Rx queue array 523 * 524 * @return 525 * The Verbs/DevX object if it exists. 526 */ 527 static struct mlx5_rxq_obj * 528 mlx5_rxq_obj_get(struct rte_eth_dev *dev, uint16_t idx) 529 { 530 struct mlx5_priv *priv = dev->data->dev_private; 531 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 532 struct mlx5_rxq_ctrl *rxq_ctrl; 533 534 if (idx >= priv->rxqs_n) 535 return NULL; 536 if (!rxq_data) 537 return NULL; 538 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 539 if (rxq_ctrl->obj) 540 rte_atomic32_inc(&rxq_ctrl->obj->refcnt); 541 return rxq_ctrl->obj; 542 } 543 544 /** 545 * Release the resources allocated for an RQ DevX object. 546 * 547 * @param rxq_ctrl 548 * DevX Rx queue object. 549 */ 550 static void 551 rxq_release_rq_resources(struct mlx5_rxq_ctrl *rxq_ctrl) 552 { 553 if (rxq_ctrl->rxq.wqes) { 554 rte_free((void *)(uintptr_t)rxq_ctrl->rxq.wqes); 555 rxq_ctrl->rxq.wqes = NULL; 556 } 557 if (rxq_ctrl->wq_umem) { 558 mlx5_glue->devx_umem_dereg(rxq_ctrl->wq_umem); 559 rxq_ctrl->wq_umem = NULL; 560 } 561 } 562 563 /** 564 * Release an Rx verbs/DevX queue object. 565 * 566 * @param rxq_obj 567 * Verbs/DevX Rx queue object. 568 * 569 * @return 570 * 1 while a reference on it exists, 0 when freed. 571 */ 572 static int 573 mlx5_rxq_obj_release(struct mlx5_rxq_obj *rxq_obj) 574 { 575 assert(rxq_obj); 576 if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_IBV) 577 assert(rxq_obj->wq); 578 assert(rxq_obj->cq); 579 if (rte_atomic32_dec_and_test(&rxq_obj->refcnt)) { 580 rxq_free_elts(rxq_obj->rxq_ctrl); 581 if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 582 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 583 } else if (rxq_obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) { 584 claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq)); 585 rxq_release_rq_resources(rxq_obj->rxq_ctrl); 586 } 587 claim_zero(mlx5_glue->destroy_cq(rxq_obj->cq)); 588 if (rxq_obj->channel) 589 claim_zero(mlx5_glue->destroy_comp_channel 590 (rxq_obj->channel)); 591 LIST_REMOVE(rxq_obj, next); 592 rte_free(rxq_obj); 593 return 0; 594 } 595 return 1; 596 } 597 598 /** 599 * Allocate queue vector and fill epoll fd list for Rx interrupts. 600 * 601 * @param dev 602 * Pointer to Ethernet device. 603 * 604 * @return 605 * 0 on success, a negative errno value otherwise and rte_errno is set. 606 */ 607 int 608 mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) 609 { 610 struct mlx5_priv *priv = dev->data->dev_private; 611 unsigned int i; 612 unsigned int rxqs_n = priv->rxqs_n; 613 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 614 unsigned int count = 0; 615 struct rte_intr_handle *intr_handle = dev->intr_handle; 616 617 if (!dev->data->dev_conf.intr_conf.rxq) 618 return 0; 619 mlx5_rx_intr_vec_disable(dev); 620 intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); 621 if (intr_handle->intr_vec == NULL) { 622 DRV_LOG(ERR, 623 "port %u failed to allocate memory for interrupt" 624 " vector, Rx interrupts will not be supported", 625 dev->data->port_id); 626 rte_errno = ENOMEM; 627 return -rte_errno; 628 } 629 intr_handle->type = RTE_INTR_HANDLE_EXT; 630 for (i = 0; i != n; ++i) { 631 /* This rxq obj must not be released in this function. */ 632 struct mlx5_rxq_obj *rxq_obj = mlx5_rxq_obj_get(dev, i); 633 int fd; 634 int flags; 635 int rc; 636 637 /* Skip queues that cannot request interrupts. */ 638 if (!rxq_obj || !rxq_obj->channel) { 639 /* Use invalid intr_vec[] index to disable entry. */ 640 intr_handle->intr_vec[i] = 641 RTE_INTR_VEC_RXTX_OFFSET + 642 RTE_MAX_RXTX_INTR_VEC_ID; 643 continue; 644 } 645 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 646 DRV_LOG(ERR, 647 "port %u too many Rx queues for interrupt" 648 " vector size (%d), Rx interrupts cannot be" 649 " enabled", 650 dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); 651 mlx5_rx_intr_vec_disable(dev); 652 rte_errno = ENOMEM; 653 return -rte_errno; 654 } 655 fd = rxq_obj->channel->fd; 656 flags = fcntl(fd, F_GETFL); 657 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 658 if (rc < 0) { 659 rte_errno = errno; 660 DRV_LOG(ERR, 661 "port %u failed to make Rx interrupt file" 662 " descriptor %d non-blocking for queue index" 663 " %d", 664 dev->data->port_id, fd, i); 665 mlx5_rx_intr_vec_disable(dev); 666 return -rte_errno; 667 } 668 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; 669 intr_handle->efds[count] = fd; 670 count++; 671 } 672 if (!count) 673 mlx5_rx_intr_vec_disable(dev); 674 else 675 intr_handle->nb_efd = count; 676 return 0; 677 } 678 679 /** 680 * Clean up Rx interrupts handler. 681 * 682 * @param dev 683 * Pointer to Ethernet device. 684 */ 685 void 686 mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) 687 { 688 struct mlx5_priv *priv = dev->data->dev_private; 689 struct rte_intr_handle *intr_handle = dev->intr_handle; 690 unsigned int i; 691 unsigned int rxqs_n = priv->rxqs_n; 692 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 693 694 if (!dev->data->dev_conf.intr_conf.rxq) 695 return; 696 if (!intr_handle->intr_vec) 697 goto free; 698 for (i = 0; i != n; ++i) { 699 struct mlx5_rxq_ctrl *rxq_ctrl; 700 struct mlx5_rxq_data *rxq_data; 701 702 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + 703 RTE_MAX_RXTX_INTR_VEC_ID) 704 continue; 705 /** 706 * Need to access directly the queue to release the reference 707 * kept in mlx5_rx_intr_vec_enable(). 708 */ 709 rxq_data = (*priv->rxqs)[i]; 710 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 711 if (rxq_ctrl->obj) 712 mlx5_rxq_obj_release(rxq_ctrl->obj); 713 } 714 free: 715 rte_intr_free_epoll_fd(intr_handle); 716 if (intr_handle->intr_vec) 717 free(intr_handle->intr_vec); 718 intr_handle->nb_efd = 0; 719 intr_handle->intr_vec = NULL; 720 } 721 722 /** 723 * MLX5 CQ notification . 724 * 725 * @param rxq 726 * Pointer to receive queue structure. 727 * @param sq_n_rxq 728 * Sequence number per receive queue . 729 */ 730 static inline void 731 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 732 { 733 int sq_n = 0; 734 uint32_t doorbell_hi; 735 uint64_t doorbell; 736 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL; 737 738 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 739 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 740 doorbell = (uint64_t)doorbell_hi << 32; 741 doorbell |= rxq->cqn; 742 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); 743 mlx5_uar_write64(rte_cpu_to_be_64(doorbell), 744 cq_db_reg, rxq->uar_lock_cq); 745 } 746 747 /** 748 * DPDK callback for Rx queue interrupt enable. 749 * 750 * @param dev 751 * Pointer to Ethernet device structure. 752 * @param rx_queue_id 753 * Rx queue number. 754 * 755 * @return 756 * 0 on success, a negative errno value otherwise and rte_errno is set. 757 */ 758 int 759 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 760 { 761 struct mlx5_priv *priv = dev->data->dev_private; 762 struct mlx5_rxq_data *rxq_data; 763 struct mlx5_rxq_ctrl *rxq_ctrl; 764 765 rxq_data = (*priv->rxqs)[rx_queue_id]; 766 if (!rxq_data) { 767 rte_errno = EINVAL; 768 return -rte_errno; 769 } 770 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 771 if (rxq_ctrl->irq) { 772 struct mlx5_rxq_obj *rxq_obj; 773 774 rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id); 775 if (!rxq_obj) { 776 rte_errno = EINVAL; 777 return -rte_errno; 778 } 779 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); 780 mlx5_rxq_obj_release(rxq_obj); 781 } 782 return 0; 783 } 784 785 /** 786 * DPDK callback for Rx queue interrupt disable. 787 * 788 * @param dev 789 * Pointer to Ethernet device structure. 790 * @param rx_queue_id 791 * Rx queue number. 792 * 793 * @return 794 * 0 on success, a negative errno value otherwise and rte_errno is set. 795 */ 796 int 797 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 798 { 799 struct mlx5_priv *priv = dev->data->dev_private; 800 struct mlx5_rxq_data *rxq_data; 801 struct mlx5_rxq_ctrl *rxq_ctrl; 802 struct mlx5_rxq_obj *rxq_obj = NULL; 803 struct ibv_cq *ev_cq; 804 void *ev_ctx; 805 int ret; 806 807 rxq_data = (*priv->rxqs)[rx_queue_id]; 808 if (!rxq_data) { 809 rte_errno = EINVAL; 810 return -rte_errno; 811 } 812 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 813 if (!rxq_ctrl->irq) 814 return 0; 815 rxq_obj = mlx5_rxq_obj_get(dev, rx_queue_id); 816 if (!rxq_obj) { 817 rte_errno = EINVAL; 818 return -rte_errno; 819 } 820 ret = mlx5_glue->get_cq_event(rxq_obj->channel, &ev_cq, &ev_ctx); 821 if (ret || ev_cq != rxq_obj->cq) { 822 rte_errno = EINVAL; 823 goto exit; 824 } 825 rxq_data->cq_arm_sn++; 826 mlx5_glue->ack_cq_events(rxq_obj->cq, 1); 827 mlx5_rxq_obj_release(rxq_obj); 828 return 0; 829 exit: 830 ret = rte_errno; /* Save rte_errno before cleanup. */ 831 if (rxq_obj) 832 mlx5_rxq_obj_release(rxq_obj); 833 DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", 834 dev->data->port_id, rx_queue_id); 835 rte_errno = ret; /* Restore rte_errno. */ 836 return -rte_errno; 837 } 838 839 /** 840 * Create a CQ Verbs object. 841 * 842 * @param dev 843 * Pointer to Ethernet device. 844 * @param priv 845 * Pointer to device private data. 846 * @param rxq_data 847 * Pointer to Rx queue data. 848 * @param cqe_n 849 * Number of CQEs in CQ. 850 * @param rxq_obj 851 * Pointer to Rx queue object data. 852 * 853 * @return 854 * The Verbs object initialised, NULL otherwise and rte_errno is set. 855 */ 856 static struct ibv_cq * 857 mlx5_ibv_cq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv, 858 struct mlx5_rxq_data *rxq_data, 859 unsigned int cqe_n, struct mlx5_rxq_obj *rxq_obj) 860 { 861 struct { 862 struct ibv_cq_init_attr_ex ibv; 863 struct mlx5dv_cq_init_attr mlx5; 864 } cq_attr; 865 866 cq_attr.ibv = (struct ibv_cq_init_attr_ex){ 867 .cqe = cqe_n, 868 .channel = rxq_obj->channel, 869 .comp_mask = 0, 870 }; 871 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){ 872 .comp_mask = 0, 873 }; 874 if (priv->config.cqe_comp && !rxq_data->hw_timestamp && 875 !rxq_data->lro) { 876 cq_attr.mlx5.comp_mask |= 877 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 878 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 879 cq_attr.mlx5.cqe_comp_res_format = 880 mlx5_rxq_mprq_enabled(rxq_data) ? 881 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : 882 MLX5DV_CQE_RES_FORMAT_HASH; 883 #else 884 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 885 #endif 886 /* 887 * For vectorized Rx, it must not be doubled in order to 888 * make cq_ci and rq_ci aligned. 889 */ 890 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 891 cq_attr.ibv.cqe *= 2; 892 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) { 893 DRV_LOG(DEBUG, 894 "port %u Rx CQE compression is disabled for HW" 895 " timestamp", 896 dev->data->port_id); 897 } else if (priv->config.cqe_comp && rxq_data->lro) { 898 DRV_LOG(DEBUG, 899 "port %u Rx CQE compression is disabled for LRO", 900 dev->data->port_id); 901 } 902 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD 903 if (priv->config.cqe_pad) { 904 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; 905 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; 906 } 907 #endif 908 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx, 909 &cq_attr.ibv, 910 &cq_attr.mlx5)); 911 } 912 913 /** 914 * Create a WQ Verbs object. 915 * 916 * @param dev 917 * Pointer to Ethernet device. 918 * @param priv 919 * Pointer to device private data. 920 * @param rxq_data 921 * Pointer to Rx queue data. 922 * @param idx 923 * Queue index in DPDK Rx queue array 924 * @param wqe_n 925 * Number of WQEs in WQ. 926 * @param rxq_obj 927 * Pointer to Rx queue object data. 928 * 929 * @return 930 * The Verbs object initialised, NULL otherwise and rte_errno is set. 931 */ 932 static struct ibv_wq * 933 mlx5_ibv_wq_new(struct rte_eth_dev *dev, struct mlx5_priv *priv, 934 struct mlx5_rxq_data *rxq_data, uint16_t idx, 935 unsigned int wqe_n, struct mlx5_rxq_obj *rxq_obj) 936 { 937 struct { 938 struct ibv_wq_init_attr ibv; 939 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 940 struct mlx5dv_wq_init_attr mlx5; 941 #endif 942 } wq_attr; 943 944 wq_attr.ibv = (struct ibv_wq_init_attr){ 945 .wq_context = NULL, /* Could be useful in the future. */ 946 .wq_type = IBV_WQT_RQ, 947 /* Max number of outstanding WRs. */ 948 .max_wr = wqe_n >> rxq_data->sges_n, 949 /* Max number of scatter/gather elements in a WR. */ 950 .max_sge = 1 << rxq_data->sges_n, 951 .pd = priv->sh->pd, 952 .cq = rxq_obj->cq, 953 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0, 954 .create_flags = (rxq_data->vlan_strip ? 955 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0), 956 }; 957 /* By default, FCS (CRC) is stripped by hardware. */ 958 if (rxq_data->crc_present) { 959 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 960 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 961 } 962 if (priv->config.hw_padding) { 963 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) 964 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 965 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 966 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) 967 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; 968 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 969 #endif 970 } 971 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT 972 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){ 973 .comp_mask = 0, 974 }; 975 if (mlx5_rxq_mprq_enabled(rxq_data)) { 976 struct mlx5dv_striding_rq_init_attr *mprq_attr = 977 &wq_attr.mlx5.striding_rq_attrs; 978 979 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; 980 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ 981 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, 982 .single_wqe_log_num_of_strides = rxq_data->strd_num_n, 983 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, 984 }; 985 } 986 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv, 987 &wq_attr.mlx5); 988 #else 989 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv); 990 #endif 991 if (rxq_obj->wq) { 992 /* 993 * Make sure number of WRs*SGEs match expectations since a queue 994 * cannot allocate more than "desc" buffers. 995 */ 996 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) || 997 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) { 998 DRV_LOG(ERR, 999 "port %u Rx queue %u requested %u*%u but got" 1000 " %u*%u WRs*SGEs", 1001 dev->data->port_id, idx, 1002 wqe_n >> rxq_data->sges_n, 1003 (1 << rxq_data->sges_n), 1004 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge); 1005 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq)); 1006 rxq_obj->wq = NULL; 1007 rte_errno = EINVAL; 1008 } 1009 } 1010 return rxq_obj->wq; 1011 } 1012 1013 /** 1014 * Fill common fields of create RQ attributes structure. 1015 * 1016 * @param rxq_data 1017 * Pointer to Rx queue data. 1018 * @param cqn 1019 * CQ number to use with this RQ. 1020 * @param rq_attr 1021 * RQ attributes structure to fill.. 1022 */ 1023 static void 1024 mlx5_devx_create_rq_attr_fill(struct mlx5_rxq_data *rxq_data, uint32_t cqn, 1025 struct mlx5_devx_create_rq_attr *rq_attr) 1026 { 1027 rq_attr->state = MLX5_RQC_STATE_RST; 1028 rq_attr->vsd = (rxq_data->vlan_strip) ? 0 : 1; 1029 rq_attr->cqn = cqn; 1030 rq_attr->scatter_fcs = (rxq_data->crc_present) ? 1 : 0; 1031 } 1032 1033 /** 1034 * Fill common fields of DevX WQ attributes structure. 1035 * 1036 * @param priv 1037 * Pointer to device private data. 1038 * @param rxq_ctrl 1039 * Pointer to Rx queue control structure. 1040 * @param wq_attr 1041 * WQ attributes structure to fill.. 1042 */ 1043 static void 1044 mlx5_devx_wq_attr_fill(struct mlx5_priv *priv, struct mlx5_rxq_ctrl *rxq_ctrl, 1045 struct mlx5_devx_wq_attr *wq_attr) 1046 { 1047 wq_attr->end_padding_mode = priv->config.cqe_pad ? 1048 MLX5_WQ_END_PAD_MODE_ALIGN : 1049 MLX5_WQ_END_PAD_MODE_NONE; 1050 wq_attr->pd = priv->sh->pdn; 1051 wq_attr->dbr_addr = rxq_ctrl->dbr_offset; 1052 wq_attr->dbr_umem_id = rxq_ctrl->dbr_umem_id; 1053 wq_attr->dbr_umem_valid = 1; 1054 wq_attr->wq_umem_id = rxq_ctrl->wq_umem->umem_id; 1055 wq_attr->wq_umem_valid = 1; 1056 } 1057 1058 /** 1059 * Create a RQ object using DevX. 1060 * 1061 * @param dev 1062 * Pointer to Ethernet device. 1063 * @param idx 1064 * Queue index in DPDK Rx queue array 1065 * @param cqn 1066 * CQ number to use with this RQ. 1067 * 1068 * @return 1069 * The DevX object initialised, NULL otherwise and rte_errno is set. 1070 */ 1071 static struct mlx5_devx_obj * 1072 mlx5_devx_rq_new(struct rte_eth_dev *dev, uint16_t idx, uint32_t cqn) 1073 { 1074 struct mlx5_priv *priv = dev->data->dev_private; 1075 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1076 struct mlx5_rxq_ctrl *rxq_ctrl = 1077 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1078 struct mlx5_devx_create_rq_attr rq_attr; 1079 uint32_t wqe_n = 1 << (rxq_data->elts_n - rxq_data->sges_n); 1080 uint32_t wq_size = 0; 1081 uint32_t wqe_size = 0; 1082 uint32_t log_wqe_size = 0; 1083 void *buf = NULL; 1084 struct mlx5_devx_obj *rq; 1085 1086 memset(&rq_attr, 0, sizeof(rq_attr)); 1087 /* Fill RQ attributes. */ 1088 rq_attr.mem_rq_type = MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE; 1089 rq_attr.flush_in_error_en = 1; 1090 mlx5_devx_create_rq_attr_fill(rxq_data, cqn, &rq_attr); 1091 /* Fill WQ attributes for this RQ. */ 1092 if (mlx5_rxq_mprq_enabled(rxq_data)) { 1093 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ; 1094 /* 1095 * Number of strides in each WQE: 1096 * 512*2^single_wqe_log_num_of_strides. 1097 */ 1098 rq_attr.wq_attr.single_wqe_log_num_of_strides = 1099 rxq_data->strd_num_n - 1100 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; 1101 /* Stride size = (2^single_stride_log_num_of_bytes)*64B. */ 1102 rq_attr.wq_attr.single_stride_log_num_of_bytes = 1103 rxq_data->strd_sz_n - 1104 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; 1105 wqe_size = sizeof(struct mlx5_wqe_mprq); 1106 } else { 1107 rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; 1108 wqe_size = sizeof(struct mlx5_wqe_data_seg); 1109 } 1110 log_wqe_size = log2above(wqe_size) + rxq_data->sges_n; 1111 rq_attr.wq_attr.log_wq_stride = log_wqe_size; 1112 rq_attr.wq_attr.log_wq_sz = rxq_data->elts_n - rxq_data->sges_n; 1113 /* Calculate and allocate WQ memory space. */ 1114 wqe_size = 1 << log_wqe_size; /* round up power of two.*/ 1115 wq_size = wqe_n * wqe_size; 1116 buf = rte_calloc_socket(__func__, 1, wq_size, MLX5_WQE_BUF_ALIGNMENT, 1117 rxq_ctrl->socket); 1118 if (!buf) 1119 return NULL; 1120 rxq_data->wqes = buf; 1121 rxq_ctrl->wq_umem = mlx5_glue->devx_umem_reg(priv->sh->ctx, 1122 buf, wq_size, 0); 1123 if (!rxq_ctrl->wq_umem) { 1124 rte_free(buf); 1125 return NULL; 1126 } 1127 mlx5_devx_wq_attr_fill(priv, rxq_ctrl, &rq_attr.wq_attr); 1128 rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &rq_attr, rxq_ctrl->socket); 1129 if (!rq) 1130 rxq_release_rq_resources(rxq_ctrl); 1131 return rq; 1132 } 1133 1134 /** 1135 * Create the Rx queue Verbs/DevX object. 1136 * 1137 * @param dev 1138 * Pointer to Ethernet device. 1139 * @param idx 1140 * Queue index in DPDK Rx queue array 1141 * @param type 1142 * Type of Rx queue object to create. 1143 * 1144 * @return 1145 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 1146 */ 1147 struct mlx5_rxq_obj * 1148 mlx5_rxq_obj_new(struct rte_eth_dev *dev, uint16_t idx, 1149 enum mlx5_rxq_obj_type type) 1150 { 1151 struct mlx5_priv *priv = dev->data->dev_private; 1152 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 1153 struct mlx5_rxq_ctrl *rxq_ctrl = 1154 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 1155 struct ibv_wq_attr mod; 1156 unsigned int cqe_n; 1157 unsigned int wqe_n = 1 << rxq_data->elts_n; 1158 struct mlx5_rxq_obj *tmpl = NULL; 1159 struct mlx5dv_cq cq_info; 1160 struct mlx5dv_rwq rwq; 1161 int ret = 0; 1162 struct mlx5dv_obj obj; 1163 1164 assert(rxq_data); 1165 assert(!rxq_ctrl->obj); 1166 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 1167 priv->verbs_alloc_ctx.obj = rxq_ctrl; 1168 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 1169 rxq_ctrl->socket); 1170 if (!tmpl) { 1171 DRV_LOG(ERR, 1172 "port %u Rx queue %u cannot allocate verbs resources", 1173 dev->data->port_id, rxq_data->idx); 1174 rte_errno = ENOMEM; 1175 goto error; 1176 } 1177 tmpl->type = type; 1178 tmpl->rxq_ctrl = rxq_ctrl; 1179 if (rxq_ctrl->irq) { 1180 tmpl->channel = mlx5_glue->create_comp_channel(priv->sh->ctx); 1181 if (!tmpl->channel) { 1182 DRV_LOG(ERR, "port %u: comp channel creation failure", 1183 dev->data->port_id); 1184 rte_errno = ENOMEM; 1185 goto error; 1186 } 1187 } 1188 if (mlx5_rxq_mprq_enabled(rxq_data)) 1189 cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1; 1190 else 1191 cqe_n = wqe_n - 1; 1192 tmpl->cq = mlx5_ibv_cq_new(dev, priv, rxq_data, cqe_n, tmpl); 1193 if (!tmpl->cq) { 1194 DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure", 1195 dev->data->port_id, idx); 1196 rte_errno = ENOMEM; 1197 goto error; 1198 } 1199 obj.cq.in = tmpl->cq; 1200 obj.cq.out = &cq_info; 1201 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ); 1202 if (ret) { 1203 rte_errno = ret; 1204 goto error; 1205 } 1206 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 1207 DRV_LOG(ERR, 1208 "port %u wrong MLX5_CQE_SIZE environment variable" 1209 " value: it should be set to %u", 1210 dev->data->port_id, RTE_CACHE_LINE_SIZE); 1211 rte_errno = EINVAL; 1212 goto error; 1213 } 1214 DRV_LOG(DEBUG, "port %u device_attr.max_qp_wr is %d", 1215 dev->data->port_id, priv->sh->device_attr.orig_attr.max_qp_wr); 1216 DRV_LOG(DEBUG, "port %u device_attr.max_sge is %d", 1217 dev->data->port_id, priv->sh->device_attr.orig_attr.max_sge); 1218 /* Allocate door-bell for types created with DevX. */ 1219 if (tmpl->type != MLX5_RXQ_OBJ_TYPE_IBV) { 1220 struct mlx5_devx_dbr_page *dbr_page; 1221 int64_t dbr_offset; 1222 1223 dbr_offset = mlx5_get_dbr(dev, &dbr_page); 1224 if (dbr_offset < 0) 1225 goto error; 1226 rxq_ctrl->dbr_offset = dbr_offset; 1227 rxq_ctrl->dbr_umem_id = dbr_page->umem->umem_id; 1228 rxq_ctrl->dbr_umem_id_valid = 1; 1229 rxq_data->rq_db = (uint32_t *)((uintptr_t)dbr_page->dbrs + 1230 (uintptr_t)rxq_ctrl->dbr_offset); 1231 } 1232 if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV) { 1233 tmpl->wq = mlx5_ibv_wq_new(dev, priv, rxq_data, idx, wqe_n, 1234 tmpl); 1235 if (!tmpl->wq) { 1236 DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure", 1237 dev->data->port_id, idx); 1238 rte_errno = ENOMEM; 1239 goto error; 1240 } 1241 /* Change queue state to ready. */ 1242 mod = (struct ibv_wq_attr){ 1243 .attr_mask = IBV_WQ_ATTR_STATE, 1244 .wq_state = IBV_WQS_RDY, 1245 }; 1246 ret = mlx5_glue->modify_wq(tmpl->wq, &mod); 1247 if (ret) { 1248 DRV_LOG(ERR, 1249 "port %u Rx queue %u WQ state to IBV_WQS_RDY" 1250 " failed", dev->data->port_id, idx); 1251 rte_errno = ret; 1252 goto error; 1253 } 1254 obj.rwq.in = tmpl->wq; 1255 obj.rwq.out = &rwq; 1256 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ); 1257 if (ret) { 1258 rte_errno = ret; 1259 goto error; 1260 } 1261 rxq_data->wqes = rwq.buf; 1262 rxq_data->rq_db = rwq.dbrec; 1263 } else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) { 1264 struct mlx5_devx_modify_rq_attr rq_attr; 1265 1266 memset(&rq_attr, 0, sizeof(rq_attr)); 1267 tmpl->rq = mlx5_devx_rq_new(dev, idx, cq_info.cqn); 1268 if (!tmpl->rq) { 1269 DRV_LOG(ERR, "port %u Rx queue %u RQ creation failure", 1270 dev->data->port_id, idx); 1271 rte_errno = ENOMEM; 1272 goto error; 1273 } 1274 /* Change queue state to ready. */ 1275 rq_attr.rq_state = MLX5_RQC_STATE_RST; 1276 rq_attr.state = MLX5_RQC_STATE_RDY; 1277 ret = mlx5_devx_cmd_modify_rq(tmpl->rq, &rq_attr); 1278 if (ret) 1279 goto error; 1280 } 1281 /* Fill the rings. */ 1282 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 1283 rxq_data->cq_db = cq_info.dbrec; 1284 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 1285 rxq_data->cq_uar = cq_info.cq_uar; 1286 rxq_data->cqn = cq_info.cqn; 1287 rxq_data->cq_arm_sn = 0; 1288 mlx5_rxq_initialize(rxq_data); 1289 rxq_data->cq_ci = 0; 1290 DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, 1291 idx, (void *)&tmpl); 1292 rte_atomic32_inc(&tmpl->refcnt); 1293 LIST_INSERT_HEAD(&priv->rxqsobj, tmpl, next); 1294 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1295 return tmpl; 1296 error: 1297 if (tmpl) { 1298 ret = rte_errno; /* Save rte_errno before cleanup. */ 1299 if (tmpl->type == MLX5_RXQ_OBJ_TYPE_IBV && tmpl->wq) 1300 claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); 1301 else if (tmpl->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ && tmpl->rq) 1302 claim_zero(mlx5_devx_cmd_destroy(tmpl->rq)); 1303 if (tmpl->cq) 1304 claim_zero(mlx5_glue->destroy_cq(tmpl->cq)); 1305 if (tmpl->channel) 1306 claim_zero(mlx5_glue->destroy_comp_channel 1307 (tmpl->channel)); 1308 rte_free(tmpl); 1309 rte_errno = ret; /* Restore rte_errno. */ 1310 } 1311 if (type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) 1312 rxq_release_rq_resources(rxq_ctrl); 1313 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 1314 return NULL; 1315 } 1316 1317 /** 1318 * Verify the Rx queue objects list is empty 1319 * 1320 * @param dev 1321 * Pointer to Ethernet device. 1322 * 1323 * @return 1324 * The number of objects not released. 1325 */ 1326 int 1327 mlx5_rxq_obj_verify(struct rte_eth_dev *dev) 1328 { 1329 struct mlx5_priv *priv = dev->data->dev_private; 1330 int ret = 0; 1331 struct mlx5_rxq_obj *rxq_obj; 1332 1333 LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) { 1334 DRV_LOG(DEBUG, "port %u Rx queue %u still referenced", 1335 dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx); 1336 ++ret; 1337 } 1338 return ret; 1339 } 1340 1341 /** 1342 * Callback function to initialize mbufs for Multi-Packet RQ. 1343 */ 1344 static inline void 1345 mlx5_mprq_buf_init(struct rte_mempool *mp, void *opaque_arg, 1346 void *_m, unsigned int i __rte_unused) 1347 { 1348 struct mlx5_mprq_buf *buf = _m; 1349 struct rte_mbuf_ext_shared_info *shinfo; 1350 unsigned int strd_n = (unsigned int)(uintptr_t)opaque_arg; 1351 unsigned int j; 1352 1353 memset(_m, 0, sizeof(*buf)); 1354 buf->mp = mp; 1355 rte_atomic16_set(&buf->refcnt, 1); 1356 for (j = 0; j != strd_n; ++j) { 1357 shinfo = &buf->shinfos[j]; 1358 shinfo->free_cb = mlx5_mprq_buf_free_cb; 1359 shinfo->fcb_opaque = buf; 1360 } 1361 } 1362 1363 /** 1364 * Free mempool of Multi-Packet RQ. 1365 * 1366 * @param dev 1367 * Pointer to Ethernet device. 1368 * 1369 * @return 1370 * 0 on success, negative errno value on failure. 1371 */ 1372 int 1373 mlx5_mprq_free_mp(struct rte_eth_dev *dev) 1374 { 1375 struct mlx5_priv *priv = dev->data->dev_private; 1376 struct rte_mempool *mp = priv->mprq_mp; 1377 unsigned int i; 1378 1379 if (mp == NULL) 1380 return 0; 1381 DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ", 1382 dev->data->port_id, mp->name); 1383 /* 1384 * If a buffer in the pool has been externally attached to a mbuf and it 1385 * is still in use by application, destroying the Rx queue can spoil 1386 * the packet. It is unlikely to happen but if application dynamically 1387 * creates and destroys with holding Rx packets, this can happen. 1388 * 1389 * TODO: It is unavoidable for now because the mempool for Multi-Packet 1390 * RQ isn't provided by application but managed by PMD. 1391 */ 1392 if (!rte_mempool_full(mp)) { 1393 DRV_LOG(ERR, 1394 "port %u mempool for Multi-Packet RQ is still in use", 1395 dev->data->port_id); 1396 rte_errno = EBUSY; 1397 return -rte_errno; 1398 } 1399 rte_mempool_free(mp); 1400 /* Unset mempool for each Rx queue. */ 1401 for (i = 0; i != priv->rxqs_n; ++i) { 1402 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1403 1404 if (rxq == NULL) 1405 continue; 1406 rxq->mprq_mp = NULL; 1407 } 1408 priv->mprq_mp = NULL; 1409 return 0; 1410 } 1411 1412 /** 1413 * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the 1414 * mempool. If already allocated, reuse it if there're enough elements. 1415 * Otherwise, resize it. 1416 * 1417 * @param dev 1418 * Pointer to Ethernet device. 1419 * 1420 * @return 1421 * 0 on success, negative errno value on failure. 1422 */ 1423 int 1424 mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) 1425 { 1426 struct mlx5_priv *priv = dev->data->dev_private; 1427 struct rte_mempool *mp = priv->mprq_mp; 1428 char name[RTE_MEMPOOL_NAMESIZE]; 1429 unsigned int desc = 0; 1430 unsigned int buf_len; 1431 unsigned int obj_num; 1432 unsigned int obj_size; 1433 unsigned int strd_num_n = 0; 1434 unsigned int strd_sz_n = 0; 1435 unsigned int i; 1436 1437 if (!mlx5_mprq_enabled(dev)) 1438 return 0; 1439 /* Count the total number of descriptors configured. */ 1440 for (i = 0; i != priv->rxqs_n; ++i) { 1441 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1442 1443 if (rxq == NULL) 1444 continue; 1445 desc += 1 << rxq->elts_n; 1446 /* Get the max number of strides. */ 1447 if (strd_num_n < rxq->strd_num_n) 1448 strd_num_n = rxq->strd_num_n; 1449 /* Get the max size of a stride. */ 1450 if (strd_sz_n < rxq->strd_sz_n) 1451 strd_sz_n = rxq->strd_sz_n; 1452 } 1453 assert(strd_num_n && strd_sz_n); 1454 buf_len = (1 << strd_num_n) * (1 << strd_sz_n); 1455 obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) * 1456 sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM; 1457 /* 1458 * Received packets can be either memcpy'd or externally referenced. In 1459 * case that the packet is attached to an mbuf as an external buffer, as 1460 * it isn't possible to predict how the buffers will be queued by 1461 * application, there's no option to exactly pre-allocate needed buffers 1462 * in advance but to speculatively prepares enough buffers. 1463 * 1464 * In the data path, if this Mempool is depleted, PMD will try to memcpy 1465 * received packets to buffers provided by application (rxq->mp) until 1466 * this Mempool gets available again. 1467 */ 1468 desc *= 4; 1469 obj_num = desc + MLX5_MPRQ_MP_CACHE_SZ * priv->rxqs_n; 1470 /* 1471 * rte_mempool_create_empty() has sanity check to refuse large cache 1472 * size compared to the number of elements. 1473 * CACHE_FLUSHTHRESH_MULTIPLIER is defined in a C file, so using a 1474 * constant number 2 instead. 1475 */ 1476 obj_num = RTE_MAX(obj_num, MLX5_MPRQ_MP_CACHE_SZ * 2); 1477 /* Check a mempool is already allocated and if it can be resued. */ 1478 if (mp != NULL && mp->elt_size >= obj_size && mp->size >= obj_num) { 1479 DRV_LOG(DEBUG, "port %u mempool %s is being reused", 1480 dev->data->port_id, mp->name); 1481 /* Reuse. */ 1482 goto exit; 1483 } else if (mp != NULL) { 1484 DRV_LOG(DEBUG, "port %u mempool %s should be resized, freeing it", 1485 dev->data->port_id, mp->name); 1486 /* 1487 * If failed to free, which means it may be still in use, no way 1488 * but to keep using the existing one. On buffer underrun, 1489 * packets will be memcpy'd instead of external buffer 1490 * attachment. 1491 */ 1492 if (mlx5_mprq_free_mp(dev)) { 1493 if (mp->elt_size >= obj_size) 1494 goto exit; 1495 else 1496 return -rte_errno; 1497 } 1498 } 1499 snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); 1500 mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, 1501 0, NULL, NULL, mlx5_mprq_buf_init, 1502 (void *)(uintptr_t)(1 << strd_num_n), 1503 dev->device->numa_node, 0); 1504 if (mp == NULL) { 1505 DRV_LOG(ERR, 1506 "port %u failed to allocate a mempool for" 1507 " Multi-Packet RQ, count=%u, size=%u", 1508 dev->data->port_id, obj_num, obj_size); 1509 rte_errno = ENOMEM; 1510 return -rte_errno; 1511 } 1512 priv->mprq_mp = mp; 1513 exit: 1514 /* Set mempool for each Rx queue. */ 1515 for (i = 0; i != priv->rxqs_n; ++i) { 1516 struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 1517 1518 if (rxq == NULL) 1519 continue; 1520 rxq->mprq_mp = mp; 1521 } 1522 DRV_LOG(INFO, "port %u Multi-Packet RQ is configured", 1523 dev->data->port_id); 1524 return 0; 1525 } 1526 1527 #define MLX5_MAX_LRO_SIZE (UINT8_MAX * 256u) 1528 #define MLX5_MAX_TCP_HDR_OFFSET ((unsigned int)(sizeof(struct rte_ether_hdr) + \ 1529 sizeof(struct rte_vlan_hdr) * 2 + \ 1530 sizeof(struct rte_ipv6_hdr))) 1531 #define MAX_TCP_OPTION_SIZE 40u 1532 #define MLX5_MAX_LRO_HEADER_FIX ((unsigned int)(MLX5_MAX_TCP_HDR_OFFSET + \ 1533 sizeof(struct rte_tcp_hdr) + \ 1534 MAX_TCP_OPTION_SIZE)) 1535 1536 /** 1537 * Adjust the maximum LRO massage size. 1538 * 1539 * @param dev 1540 * Pointer to Ethernet device. 1541 * @param max_lro_size 1542 * The maximum size for LRO packet. 1543 */ 1544 static void 1545 mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint32_t max_lro_size) 1546 { 1547 struct mlx5_priv *priv = dev->data->dev_private; 1548 1549 if (priv->config.hca_attr.lro_max_msg_sz_mode == 1550 MLX5_LRO_MAX_MSG_SIZE_START_FROM_L4 && max_lro_size > 1551 MLX5_MAX_TCP_HDR_OFFSET) 1552 max_lro_size -= MLX5_MAX_TCP_HDR_OFFSET; 1553 max_lro_size = RTE_MIN(max_lro_size, MLX5_MAX_LRO_SIZE); 1554 assert(max_lro_size >= 256u); 1555 max_lro_size /= 256u; 1556 if (priv->max_lro_msg_size) 1557 priv->max_lro_msg_size = 1558 RTE_MIN((uint32_t)priv->max_lro_msg_size, max_lro_size); 1559 else 1560 priv->max_lro_msg_size = max_lro_size; 1561 } 1562 1563 /** 1564 * Create a DPDK Rx queue. 1565 * 1566 * @param dev 1567 * Pointer to Ethernet device. 1568 * @param idx 1569 * RX queue index. 1570 * @param desc 1571 * Number of descriptors to configure in queue. 1572 * @param socket 1573 * NUMA socket on which memory must be allocated. 1574 * 1575 * @return 1576 * A DPDK queue object on success, NULL otherwise and rte_errno is set. 1577 */ 1578 struct mlx5_rxq_ctrl * 1579 mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1580 unsigned int socket, const struct rte_eth_rxconf *conf, 1581 struct rte_mempool *mp) 1582 { 1583 struct mlx5_priv *priv = dev->data->dev_private; 1584 struct mlx5_rxq_ctrl *tmpl; 1585 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 1586 unsigned int mprq_stride_size; 1587 struct mlx5_dev_config *config = &priv->config; 1588 unsigned int strd_headroom_en; 1589 /* 1590 * Always allocate extra slots, even if eventually 1591 * the vector Rx will not be used. 1592 */ 1593 uint16_t desc_n = 1594 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 1595 uint64_t offloads = conf->offloads | 1596 dev->data->dev_conf.rxmode.offloads; 1597 unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO); 1598 const int mprq_en = mlx5_check_mprq_support(dev) > 0; 1599 unsigned int max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len; 1600 unsigned int non_scatter_min_mbuf_size = max_rx_pkt_len + 1601 RTE_PKTMBUF_HEADROOM; 1602 unsigned int max_lro_size = 0; 1603 unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM; 1604 1605 if (non_scatter_min_mbuf_size > mb_len && !(offloads & 1606 DEV_RX_OFFLOAD_SCATTER)) { 1607 DRV_LOG(ERR, "port %u Rx queue %u: Scatter offload is not" 1608 " configured and no enough mbuf space(%u) to contain " 1609 "the maximum RX packet length(%u) with head-room(%u)", 1610 dev->data->port_id, idx, mb_len, max_rx_pkt_len, 1611 RTE_PKTMBUF_HEADROOM); 1612 rte_errno = ENOSPC; 1613 return NULL; 1614 } 1615 tmpl = rte_calloc_socket("RXQ", 1, 1616 sizeof(*tmpl) + 1617 desc_n * sizeof(struct rte_mbuf *), 1618 0, socket); 1619 if (!tmpl) { 1620 rte_errno = ENOMEM; 1621 return NULL; 1622 } 1623 if (mlx5_mr_btree_init(&tmpl->rxq.mr_ctrl.cache_bh, 1624 MLX5_MR_BTREE_CACHE_N, socket)) { 1625 /* rte_errno is already set. */ 1626 goto error; 1627 } 1628 tmpl->socket = socket; 1629 if (dev->data->dev_conf.intr_conf.rxq) 1630 tmpl->irq = 1; 1631 /* 1632 * LRO packet may consume all the stride memory, hence we cannot 1633 * guaranty head-room near the packet memory in the stride. 1634 * In this case scatter is, for sure, enabled and an empty mbuf may be 1635 * added in the start for the head-room. 1636 */ 1637 if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 && 1638 non_scatter_min_mbuf_size > mb_len) { 1639 strd_headroom_en = 0; 1640 mprq_stride_size = RTE_MIN(max_rx_pkt_len, 1641 1u << config->mprq.max_stride_size_n); 1642 } else { 1643 strd_headroom_en = 1; 1644 mprq_stride_size = non_scatter_min_mbuf_size; 1645 } 1646 /* 1647 * This Rx queue can be configured as a Multi-Packet RQ if all of the 1648 * following conditions are met: 1649 * - MPRQ is enabled. 1650 * - The number of descs is more than the number of strides. 1651 * - max_rx_pkt_len plus overhead is less than the max size of a 1652 * stride. 1653 * Otherwise, enable Rx scatter if necessary. 1654 */ 1655 if (mprq_en && 1656 desc > (1U << config->mprq.stride_num_n) && 1657 mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) { 1658 /* TODO: Rx scatter isn't supported yet. */ 1659 tmpl->rxq.sges_n = 0; 1660 /* Trim the number of descs needed. */ 1661 desc >>= config->mprq.stride_num_n; 1662 tmpl->rxq.strd_num_n = config->mprq.stride_num_n; 1663 tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size), 1664 config->mprq.min_stride_size_n); 1665 tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; 1666 tmpl->rxq.strd_headroom_en = strd_headroom_en; 1667 tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, 1668 config->mprq.max_memcpy_len); 1669 max_lro_size = RTE_MIN(max_rx_pkt_len, 1670 (1u << tmpl->rxq.strd_num_n) * 1671 (1u << tmpl->rxq.strd_sz_n)); 1672 DRV_LOG(DEBUG, 1673 "port %u Rx queue %u: Multi-Packet RQ is enabled" 1674 " strd_num_n = %u, strd_sz_n = %u", 1675 dev->data->port_id, idx, 1676 tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n); 1677 } else if (max_rx_pkt_len <= first_mb_free_size) { 1678 tmpl->rxq.sges_n = 0; 1679 max_lro_size = max_rx_pkt_len; 1680 } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { 1681 unsigned int size = non_scatter_min_mbuf_size; 1682 unsigned int sges_n; 1683 1684 if (lro_on_queue && first_mb_free_size < 1685 MLX5_MAX_LRO_HEADER_FIX) { 1686 DRV_LOG(ERR, "Not enough space in the first segment(%u)" 1687 " to include the max header size(%u) for LRO", 1688 first_mb_free_size, MLX5_MAX_LRO_HEADER_FIX); 1689 rte_errno = ENOTSUP; 1690 goto error; 1691 } 1692 /* 1693 * Determine the number of SGEs needed for a full packet 1694 * and round it to the next power of two. 1695 */ 1696 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 1697 if (sges_n > MLX5_MAX_LOG_RQ_SEGS) { 1698 DRV_LOG(ERR, 1699 "port %u too many SGEs (%u) needed to handle" 1700 " requested maximum packet size %u, the maximum" 1701 " supported are %u", dev->data->port_id, 1702 1 << sges_n, max_rx_pkt_len, 1703 1u << MLX5_MAX_LOG_RQ_SEGS); 1704 rte_errno = ENOTSUP; 1705 goto error; 1706 } 1707 tmpl->rxq.sges_n = sges_n; 1708 max_lro_size = max_rx_pkt_len; 1709 } 1710 if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) 1711 DRV_LOG(WARNING, 1712 "port %u MPRQ is requested but cannot be enabled" 1713 " (requested: desc = %u, stride_sz = %u," 1714 " supported: min_stride_num = %u, max_stride_sz = %u).", 1715 dev->data->port_id, desc, mprq_stride_size, 1716 (1 << config->mprq.stride_num_n), 1717 (1 << config->mprq.max_stride_size_n)); 1718 DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", 1719 dev->data->port_id, 1 << tmpl->rxq.sges_n); 1720 if (desc % (1 << tmpl->rxq.sges_n)) { 1721 DRV_LOG(ERR, 1722 "port %u number of Rx queue descriptors (%u) is not a" 1723 " multiple of SGEs per packet (%u)", 1724 dev->data->port_id, 1725 desc, 1726 1 << tmpl->rxq.sges_n); 1727 rte_errno = EINVAL; 1728 goto error; 1729 } 1730 mlx5_max_lro_msg_size_adjust(dev, max_lro_size); 1731 /* Toggle RX checksum offload if hardware supports it. */ 1732 tmpl->rxq.csum = !!(offloads & DEV_RX_OFFLOAD_CHECKSUM); 1733 tmpl->rxq.hw_timestamp = !!(offloads & DEV_RX_OFFLOAD_TIMESTAMP); 1734 /* Configure VLAN stripping. */ 1735 tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1736 /* By default, FCS (CRC) is stripped by hardware. */ 1737 tmpl->rxq.crc_present = 0; 1738 tmpl->rxq.lro = lro_on_queue; 1739 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { 1740 if (config->hw_fcs_strip) { 1741 /* 1742 * RQs used for LRO-enabled TIRs should not be 1743 * configured to scatter the FCS. 1744 */ 1745 if (lro_on_queue) 1746 DRV_LOG(WARNING, 1747 "port %u CRC stripping has been " 1748 "disabled but will still be performed " 1749 "by hardware, because LRO is enabled", 1750 dev->data->port_id); 1751 else 1752 tmpl->rxq.crc_present = 1; 1753 } else { 1754 DRV_LOG(WARNING, 1755 "port %u CRC stripping has been disabled but will" 1756 " still be performed by hardware, make sure MLNX_OFED" 1757 " and firmware are up to date", 1758 dev->data->port_id); 1759 } 1760 } 1761 DRV_LOG(DEBUG, 1762 "port %u CRC stripping is %s, %u bytes will be subtracted from" 1763 " incoming frames to hide it", 1764 dev->data->port_id, 1765 tmpl->rxq.crc_present ? "disabled" : "enabled", 1766 tmpl->rxq.crc_present << 2); 1767 /* Save port ID. */ 1768 tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && 1769 (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); 1770 tmpl->rxq.port_id = dev->data->port_id; 1771 tmpl->priv = priv; 1772 tmpl->rxq.mp = mp; 1773 tmpl->rxq.elts_n = log2above(desc); 1774 tmpl->rxq.rq_repl_thresh = 1775 MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n); 1776 tmpl->rxq.elts = 1777 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); 1778 #ifndef RTE_ARCH_64 1779 tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq; 1780 #endif 1781 tmpl->rxq.idx = idx; 1782 rte_atomic32_inc(&tmpl->refcnt); 1783 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1784 return tmpl; 1785 error: 1786 rte_free(tmpl); 1787 return NULL; 1788 } 1789 1790 /** 1791 * Get a Rx queue. 1792 * 1793 * @param dev 1794 * Pointer to Ethernet device. 1795 * @param idx 1796 * RX queue index. 1797 * 1798 * @return 1799 * A pointer to the queue if it exists, NULL otherwise. 1800 */ 1801 struct mlx5_rxq_ctrl * 1802 mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) 1803 { 1804 struct mlx5_priv *priv = dev->data->dev_private; 1805 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 1806 1807 if ((*priv->rxqs)[idx]) { 1808 rxq_ctrl = container_of((*priv->rxqs)[idx], 1809 struct mlx5_rxq_ctrl, 1810 rxq); 1811 mlx5_rxq_obj_get(dev, idx); 1812 rte_atomic32_inc(&rxq_ctrl->refcnt); 1813 } 1814 return rxq_ctrl; 1815 } 1816 1817 /** 1818 * Release a Rx queue. 1819 * 1820 * @param dev 1821 * Pointer to Ethernet device. 1822 * @param idx 1823 * RX queue index. 1824 * 1825 * @return 1826 * 1 while a reference on it exists, 0 when freed. 1827 */ 1828 int 1829 mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) 1830 { 1831 struct mlx5_priv *priv = dev->data->dev_private; 1832 struct mlx5_rxq_ctrl *rxq_ctrl; 1833 1834 if (!(*priv->rxqs)[idx]) 1835 return 0; 1836 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1837 assert(rxq_ctrl->priv); 1838 if (rxq_ctrl->obj && !mlx5_rxq_obj_release(rxq_ctrl->obj)) 1839 rxq_ctrl->obj = NULL; 1840 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { 1841 if (rxq_ctrl->dbr_umem_id_valid) 1842 claim_zero(mlx5_release_dbr(dev, rxq_ctrl->dbr_umem_id, 1843 rxq_ctrl->dbr_offset)); 1844 mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh); 1845 LIST_REMOVE(rxq_ctrl, next); 1846 rte_free(rxq_ctrl); 1847 (*priv->rxqs)[idx] = NULL; 1848 return 0; 1849 } 1850 return 1; 1851 } 1852 1853 /** 1854 * Verify the Rx Queue list is empty 1855 * 1856 * @param dev 1857 * Pointer to Ethernet device. 1858 * 1859 * @return 1860 * The number of object not released. 1861 */ 1862 int 1863 mlx5_rxq_verify(struct rte_eth_dev *dev) 1864 { 1865 struct mlx5_priv *priv = dev->data->dev_private; 1866 struct mlx5_rxq_ctrl *rxq_ctrl; 1867 int ret = 0; 1868 1869 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 1870 DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", 1871 dev->data->port_id, rxq_ctrl->rxq.idx); 1872 ++ret; 1873 } 1874 return ret; 1875 } 1876 1877 /** 1878 * Create an indirection table. 1879 * 1880 * @param dev 1881 * Pointer to Ethernet device. 1882 * @param queues 1883 * Queues entering in the indirection table. 1884 * @param queues_n 1885 * Number of queues in the array. 1886 * 1887 * @return 1888 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 1889 */ 1890 static struct mlx5_ind_table_obj * 1891 mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues, 1892 uint32_t queues_n, enum mlx5_ind_tbl_type type) 1893 { 1894 struct mlx5_priv *priv = dev->data->dev_private; 1895 struct mlx5_ind_table_obj *ind_tbl; 1896 unsigned int i = 0, j = 0, k = 0; 1897 1898 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) + 1899 queues_n * sizeof(uint16_t), 0); 1900 if (!ind_tbl) { 1901 rte_errno = ENOMEM; 1902 return NULL; 1903 } 1904 ind_tbl->type = type; 1905 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) { 1906 const unsigned int wq_n = rte_is_power_of_2(queues_n) ? 1907 log2above(queues_n) : 1908 log2above(priv->config.ind_table_max_size); 1909 struct ibv_wq *wq[1 << wq_n]; 1910 1911 for (i = 0; i != queues_n; ++i) { 1912 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, 1913 queues[i]); 1914 if (!rxq) 1915 goto error; 1916 wq[i] = rxq->obj->wq; 1917 ind_tbl->queues[i] = queues[i]; 1918 } 1919 ind_tbl->queues_n = queues_n; 1920 /* Finalise indirection table. */ 1921 k = i; /* Retain value of i for use in error case. */ 1922 for (j = 0; k != (unsigned int)(1 << wq_n); ++k, ++j) 1923 wq[k] = wq[j]; 1924 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table 1925 (priv->sh->ctx, 1926 &(struct ibv_rwq_ind_table_init_attr){ 1927 .log_ind_tbl_size = wq_n, 1928 .ind_tbl = wq, 1929 .comp_mask = 0, 1930 }); 1931 if (!ind_tbl->ind_table) { 1932 rte_errno = errno; 1933 goto error; 1934 } 1935 } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */ 1936 struct mlx5_devx_rqt_attr *rqt_attr = NULL; 1937 1938 rqt_attr = rte_calloc(__func__, 1, sizeof(*rqt_attr) + 1939 queues_n * sizeof(uint16_t), 0); 1940 if (!rqt_attr) { 1941 DRV_LOG(ERR, "port %u cannot allocate RQT resources", 1942 dev->data->port_id); 1943 rte_errno = ENOMEM; 1944 goto error; 1945 } 1946 rqt_attr->rqt_max_size = priv->config.ind_table_max_size; 1947 rqt_attr->rqt_actual_size = queues_n; 1948 for (i = 0; i != queues_n; ++i) { 1949 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, 1950 queues[i]); 1951 if (!rxq) 1952 goto error; 1953 rqt_attr->rq_list[i] = rxq->obj->rq->id; 1954 ind_tbl->queues[i] = queues[i]; 1955 } 1956 ind_tbl->rqt = mlx5_devx_cmd_create_rqt(priv->sh->ctx, 1957 rqt_attr); 1958 rte_free(rqt_attr); 1959 if (!ind_tbl->rqt) { 1960 DRV_LOG(ERR, "port %u cannot create DevX RQT", 1961 dev->data->port_id); 1962 rte_errno = errno; 1963 goto error; 1964 } 1965 ind_tbl->queues_n = queues_n; 1966 } 1967 rte_atomic32_inc(&ind_tbl->refcnt); 1968 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 1969 return ind_tbl; 1970 error: 1971 for (j = 0; j < i; j++) 1972 mlx5_rxq_release(dev, ind_tbl->queues[j]); 1973 rte_free(ind_tbl); 1974 DEBUG("port %u cannot create indirection table", dev->data->port_id); 1975 return NULL; 1976 } 1977 1978 /** 1979 * Get an indirection table. 1980 * 1981 * @param dev 1982 * Pointer to Ethernet device. 1983 * @param queues 1984 * Queues entering in the indirection table. 1985 * @param queues_n 1986 * Number of queues in the array. 1987 * 1988 * @return 1989 * An indirection table if found. 1990 */ 1991 static struct mlx5_ind_table_obj * 1992 mlx5_ind_table_obj_get(struct rte_eth_dev *dev, const uint16_t *queues, 1993 uint32_t queues_n) 1994 { 1995 struct mlx5_priv *priv = dev->data->dev_private; 1996 struct mlx5_ind_table_obj *ind_tbl; 1997 1998 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1999 if ((ind_tbl->queues_n == queues_n) && 2000 (memcmp(ind_tbl->queues, queues, 2001 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 2002 == 0)) 2003 break; 2004 } 2005 if (ind_tbl) { 2006 unsigned int i; 2007 2008 rte_atomic32_inc(&ind_tbl->refcnt); 2009 for (i = 0; i != ind_tbl->queues_n; ++i) 2010 mlx5_rxq_get(dev, ind_tbl->queues[i]); 2011 } 2012 return ind_tbl; 2013 } 2014 2015 /** 2016 * Release an indirection table. 2017 * 2018 * @param dev 2019 * Pointer to Ethernet device. 2020 * @param ind_table 2021 * Indirection table to release. 2022 * 2023 * @return 2024 * 1 while a reference on it exists, 0 when freed. 2025 */ 2026 static int 2027 mlx5_ind_table_obj_release(struct rte_eth_dev *dev, 2028 struct mlx5_ind_table_obj *ind_tbl) 2029 { 2030 unsigned int i; 2031 2032 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) { 2033 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) 2034 claim_zero(mlx5_glue->destroy_rwq_ind_table 2035 (ind_tbl->ind_table)); 2036 else if (ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX) 2037 claim_zero(mlx5_devx_cmd_destroy(ind_tbl->rqt)); 2038 } 2039 for (i = 0; i != ind_tbl->queues_n; ++i) 2040 claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i])); 2041 if (!rte_atomic32_read(&ind_tbl->refcnt)) { 2042 LIST_REMOVE(ind_tbl, next); 2043 rte_free(ind_tbl); 2044 return 0; 2045 } 2046 return 1; 2047 } 2048 2049 /** 2050 * Verify the Rx Queue list is empty 2051 * 2052 * @param dev 2053 * Pointer to Ethernet device. 2054 * 2055 * @return 2056 * The number of object not released. 2057 */ 2058 int 2059 mlx5_ind_table_obj_verify(struct rte_eth_dev *dev) 2060 { 2061 struct mlx5_priv *priv = dev->data->dev_private; 2062 struct mlx5_ind_table_obj *ind_tbl; 2063 int ret = 0; 2064 2065 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 2066 DRV_LOG(DEBUG, 2067 "port %u indirection table obj %p still referenced", 2068 dev->data->port_id, (void *)ind_tbl); 2069 ++ret; 2070 } 2071 return ret; 2072 } 2073 2074 /** 2075 * Create an Rx Hash queue. 2076 * 2077 * @param dev 2078 * Pointer to Ethernet device. 2079 * @param rss_key 2080 * RSS key for the Rx hash queue. 2081 * @param rss_key_len 2082 * RSS key length. 2083 * @param hash_fields 2084 * Verbs protocol hash field to make the RSS on. 2085 * @param queues 2086 * Queues entering in hash queue. In case of empty hash_fields only the 2087 * first queue index will be taken for the indirection table. 2088 * @param queues_n 2089 * Number of queues. 2090 * @param tunnel 2091 * Tunnel type. 2092 * 2093 * @return 2094 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 2095 */ 2096 struct mlx5_hrxq * 2097 mlx5_hrxq_new(struct rte_eth_dev *dev, 2098 const uint8_t *rss_key, uint32_t rss_key_len, 2099 uint64_t hash_fields, 2100 const uint16_t *queues, uint32_t queues_n, 2101 int tunnel __rte_unused) 2102 { 2103 struct mlx5_priv *priv = dev->data->dev_private; 2104 struct mlx5_hrxq *hrxq; 2105 struct ibv_qp *qp = NULL; 2106 struct mlx5_ind_table_obj *ind_tbl; 2107 int err; 2108 struct mlx5_devx_obj *tir = NULL; 2109 2110 queues_n = hash_fields ? queues_n : 1; 2111 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2112 if (!ind_tbl) { 2113 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[0]]; 2114 struct mlx5_rxq_ctrl *rxq_ctrl = 2115 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 2116 enum mlx5_ind_tbl_type type; 2117 2118 type = rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV ? 2119 MLX5_IND_TBL_TYPE_IBV : MLX5_IND_TBL_TYPE_DEVX; 2120 ind_tbl = mlx5_ind_table_obj_new(dev, queues, queues_n, type); 2121 } 2122 if (!ind_tbl) { 2123 rte_errno = ENOMEM; 2124 return NULL; 2125 } 2126 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) { 2127 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2128 struct mlx5dv_qp_init_attr qp_init_attr; 2129 2130 memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 2131 if (tunnel) { 2132 qp_init_attr.comp_mask = 2133 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 2134 qp_init_attr.create_flags = 2135 MLX5DV_QP_CREATE_TUNNEL_OFFLOADS; 2136 } 2137 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2138 if (dev->data->dev_conf.lpbk_mode) { 2139 /* 2140 * Allow packet sent from NIC loop back 2141 * w/o source MAC check. 2142 */ 2143 qp_init_attr.comp_mask |= 2144 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; 2145 qp_init_attr.create_flags |= 2146 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; 2147 } 2148 #endif 2149 qp = mlx5_glue->dv_create_qp 2150 (priv->sh->ctx, 2151 &(struct ibv_qp_init_attr_ex){ 2152 .qp_type = IBV_QPT_RAW_PACKET, 2153 .comp_mask = 2154 IBV_QP_INIT_ATTR_PD | 2155 IBV_QP_INIT_ATTR_IND_TABLE | 2156 IBV_QP_INIT_ATTR_RX_HASH, 2157 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2158 .rx_hash_function = 2159 IBV_RX_HASH_FUNC_TOEPLITZ, 2160 .rx_hash_key_len = rss_key_len, 2161 .rx_hash_key = 2162 (void *)(uintptr_t)rss_key, 2163 .rx_hash_fields_mask = hash_fields, 2164 }, 2165 .rwq_ind_tbl = ind_tbl->ind_table, 2166 .pd = priv->sh->pd, 2167 }, 2168 &qp_init_attr); 2169 #else 2170 qp = mlx5_glue->create_qp_ex 2171 (priv->sh->ctx, 2172 &(struct ibv_qp_init_attr_ex){ 2173 .qp_type = IBV_QPT_RAW_PACKET, 2174 .comp_mask = 2175 IBV_QP_INIT_ATTR_PD | 2176 IBV_QP_INIT_ATTR_IND_TABLE | 2177 IBV_QP_INIT_ATTR_RX_HASH, 2178 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2179 .rx_hash_function = 2180 IBV_RX_HASH_FUNC_TOEPLITZ, 2181 .rx_hash_key_len = rss_key_len, 2182 .rx_hash_key = 2183 (void *)(uintptr_t)rss_key, 2184 .rx_hash_fields_mask = hash_fields, 2185 }, 2186 .rwq_ind_tbl = ind_tbl->ind_table, 2187 .pd = priv->sh->pd, 2188 }); 2189 #endif 2190 if (!qp) { 2191 rte_errno = errno; 2192 goto error; 2193 } 2194 } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */ 2195 struct mlx5_devx_tir_attr tir_attr; 2196 uint32_t i; 2197 uint32_t lro = 1; 2198 2199 /* Enable TIR LRO only if all the queues were configured for. */ 2200 for (i = 0; i < queues_n; ++i) { 2201 if (!(*priv->rxqs)[queues[i]]->lro) { 2202 lro = 0; 2203 break; 2204 } 2205 } 2206 memset(&tir_attr, 0, sizeof(tir_attr)); 2207 tir_attr.disp_type = MLX5_TIRC_DISP_TYPE_INDIRECT; 2208 tir_attr.rx_hash_fn = MLX5_RX_HASH_FN_TOEPLITZ; 2209 memcpy(&tir_attr.rx_hash_field_selector_outer, &hash_fields, 2210 sizeof(uint64_t)); 2211 tir_attr.transport_domain = priv->sh->tdn; 2212 memcpy(tir_attr.rx_hash_toeplitz_key, rss_key, rss_key_len); 2213 tir_attr.indirect_table = ind_tbl->rqt->id; 2214 if (dev->data->dev_conf.lpbk_mode) 2215 tir_attr.self_lb_block = 2216 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; 2217 if (lro) { 2218 tir_attr.lro_timeout_period_usecs = 2219 priv->config.lro.timeout; 2220 tir_attr.lro_max_msg_sz = priv->max_lro_msg_size; 2221 tir_attr.lro_enable_mask = 2222 MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | 2223 MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO; 2224 } 2225 tir = mlx5_devx_cmd_create_tir(priv->sh->ctx, &tir_attr); 2226 if (!tir) { 2227 DRV_LOG(ERR, "port %u cannot create DevX TIR", 2228 dev->data->port_id); 2229 rte_errno = errno; 2230 goto error; 2231 } 2232 } 2233 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); 2234 if (!hrxq) 2235 goto error; 2236 hrxq->ind_table = ind_tbl; 2237 if (ind_tbl->type == MLX5_IND_TBL_TYPE_IBV) { 2238 hrxq->qp = qp; 2239 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2240 hrxq->action = 2241 mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 2242 if (!hrxq->action) { 2243 rte_errno = errno; 2244 goto error; 2245 } 2246 #endif 2247 } else { /* ind_tbl->type == MLX5_IND_TBL_TYPE_DEVX */ 2248 hrxq->tir = tir; 2249 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2250 hrxq->action = mlx5_glue->dv_create_flow_action_dest_devx_tir 2251 (hrxq->tir->obj); 2252 if (!hrxq->action) { 2253 rte_errno = errno; 2254 goto error; 2255 } 2256 #endif 2257 } 2258 hrxq->rss_key_len = rss_key_len; 2259 hrxq->hash_fields = hash_fields; 2260 memcpy(hrxq->rss_key, rss_key, rss_key_len); 2261 rte_atomic32_inc(&hrxq->refcnt); 2262 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); 2263 return hrxq; 2264 error: 2265 err = rte_errno; /* Save rte_errno before cleanup. */ 2266 mlx5_ind_table_obj_release(dev, ind_tbl); 2267 if (qp) 2268 claim_zero(mlx5_glue->destroy_qp(qp)); 2269 else if (tir) 2270 claim_zero(mlx5_devx_cmd_destroy(tir)); 2271 rte_errno = err; /* Restore rte_errno. */ 2272 return NULL; 2273 } 2274 2275 /** 2276 * Get an Rx Hash queue. 2277 * 2278 * @param dev 2279 * Pointer to Ethernet device. 2280 * @param rss_conf 2281 * RSS configuration for the Rx hash queue. 2282 * @param queues 2283 * Queues entering in hash queue. In case of empty hash_fields only the 2284 * first queue index will be taken for the indirection table. 2285 * @param queues_n 2286 * Number of queues. 2287 * 2288 * @return 2289 * An hash Rx queue on success. 2290 */ 2291 struct mlx5_hrxq * 2292 mlx5_hrxq_get(struct rte_eth_dev *dev, 2293 const uint8_t *rss_key, uint32_t rss_key_len, 2294 uint64_t hash_fields, 2295 const uint16_t *queues, uint32_t queues_n) 2296 { 2297 struct mlx5_priv *priv = dev->data->dev_private; 2298 struct mlx5_hrxq *hrxq; 2299 2300 queues_n = hash_fields ? queues_n : 1; 2301 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 2302 struct mlx5_ind_table_obj *ind_tbl; 2303 2304 if (hrxq->rss_key_len != rss_key_len) 2305 continue; 2306 if (memcmp(hrxq->rss_key, rss_key, rss_key_len)) 2307 continue; 2308 if (hrxq->hash_fields != hash_fields) 2309 continue; 2310 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n); 2311 if (!ind_tbl) 2312 continue; 2313 if (ind_tbl != hrxq->ind_table) { 2314 mlx5_ind_table_obj_release(dev, ind_tbl); 2315 continue; 2316 } 2317 rte_atomic32_inc(&hrxq->refcnt); 2318 return hrxq; 2319 } 2320 return NULL; 2321 } 2322 2323 /** 2324 * Release the hash Rx queue. 2325 * 2326 * @param dev 2327 * Pointer to Ethernet device. 2328 * @param hrxq 2329 * Pointer to Hash Rx queue to release. 2330 * 2331 * @return 2332 * 1 while a reference on it exists, 0 when freed. 2333 */ 2334 int 2335 mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) 2336 { 2337 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 2338 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2339 mlx5_glue->destroy_flow_action(hrxq->action); 2340 #endif 2341 if (hrxq->ind_table->type == MLX5_IND_TBL_TYPE_IBV) 2342 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 2343 else /* hrxq->ind_table->type == MLX5_IND_TBL_TYPE_DEVX */ 2344 claim_zero(mlx5_devx_cmd_destroy(hrxq->tir)); 2345 mlx5_ind_table_obj_release(dev, hrxq->ind_table); 2346 LIST_REMOVE(hrxq, next); 2347 rte_free(hrxq); 2348 return 0; 2349 } 2350 claim_nonzero(mlx5_ind_table_obj_release(dev, hrxq->ind_table)); 2351 return 1; 2352 } 2353 2354 /** 2355 * Verify the Rx Queue list is empty 2356 * 2357 * @param dev 2358 * Pointer to Ethernet device. 2359 * 2360 * @return 2361 * The number of object not released. 2362 */ 2363 int 2364 mlx5_hrxq_verify(struct rte_eth_dev *dev) 2365 { 2366 struct mlx5_priv *priv = dev->data->dev_private; 2367 struct mlx5_hrxq *hrxq; 2368 int ret = 0; 2369 2370 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 2371 DRV_LOG(DEBUG, 2372 "port %u hash Rx queue %p still referenced", 2373 dev->data->port_id, (void *)hrxq); 2374 ++ret; 2375 } 2376 return ret; 2377 } 2378 2379 /** 2380 * Create a drop Rx queue Verbs/DevX object. 2381 * 2382 * @param dev 2383 * Pointer to Ethernet device. 2384 * 2385 * @return 2386 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 2387 */ 2388 static struct mlx5_rxq_obj * 2389 mlx5_rxq_obj_drop_new(struct rte_eth_dev *dev) 2390 { 2391 struct mlx5_priv *priv = dev->data->dev_private; 2392 struct ibv_context *ctx = priv->sh->ctx; 2393 struct ibv_cq *cq; 2394 struct ibv_wq *wq = NULL; 2395 struct mlx5_rxq_obj *rxq; 2396 2397 if (priv->drop_queue.rxq) 2398 return priv->drop_queue.rxq; 2399 cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); 2400 if (!cq) { 2401 DEBUG("port %u cannot allocate CQ for drop queue", 2402 dev->data->port_id); 2403 rte_errno = errno; 2404 goto error; 2405 } 2406 wq = mlx5_glue->create_wq(ctx, 2407 &(struct ibv_wq_init_attr){ 2408 .wq_type = IBV_WQT_RQ, 2409 .max_wr = 1, 2410 .max_sge = 1, 2411 .pd = priv->sh->pd, 2412 .cq = cq, 2413 }); 2414 if (!wq) { 2415 DEBUG("port %u cannot allocate WQ for drop queue", 2416 dev->data->port_id); 2417 rte_errno = errno; 2418 goto error; 2419 } 2420 rxq = rte_calloc(__func__, 1, sizeof(*rxq), 0); 2421 if (!rxq) { 2422 DEBUG("port %u cannot allocate drop Rx queue memory", 2423 dev->data->port_id); 2424 rte_errno = ENOMEM; 2425 goto error; 2426 } 2427 rxq->cq = cq; 2428 rxq->wq = wq; 2429 priv->drop_queue.rxq = rxq; 2430 return rxq; 2431 error: 2432 if (wq) 2433 claim_zero(mlx5_glue->destroy_wq(wq)); 2434 if (cq) 2435 claim_zero(mlx5_glue->destroy_cq(cq)); 2436 return NULL; 2437 } 2438 2439 /** 2440 * Release a drop Rx queue Verbs/DevX object. 2441 * 2442 * @param dev 2443 * Pointer to Ethernet device. 2444 * 2445 * @return 2446 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 2447 */ 2448 static void 2449 mlx5_rxq_obj_drop_release(struct rte_eth_dev *dev) 2450 { 2451 struct mlx5_priv *priv = dev->data->dev_private; 2452 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq; 2453 2454 if (rxq->wq) 2455 claim_zero(mlx5_glue->destroy_wq(rxq->wq)); 2456 if (rxq->cq) 2457 claim_zero(mlx5_glue->destroy_cq(rxq->cq)); 2458 rte_free(rxq); 2459 priv->drop_queue.rxq = NULL; 2460 } 2461 2462 /** 2463 * Create a drop indirection table. 2464 * 2465 * @param dev 2466 * Pointer to Ethernet device. 2467 * 2468 * @return 2469 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 2470 */ 2471 static struct mlx5_ind_table_obj * 2472 mlx5_ind_table_obj_drop_new(struct rte_eth_dev *dev) 2473 { 2474 struct mlx5_priv *priv = dev->data->dev_private; 2475 struct mlx5_ind_table_obj *ind_tbl; 2476 struct mlx5_rxq_obj *rxq; 2477 struct mlx5_ind_table_obj tmpl; 2478 2479 rxq = mlx5_rxq_obj_drop_new(dev); 2480 if (!rxq) 2481 return NULL; 2482 tmpl.ind_table = mlx5_glue->create_rwq_ind_table 2483 (priv->sh->ctx, 2484 &(struct ibv_rwq_ind_table_init_attr){ 2485 .log_ind_tbl_size = 0, 2486 .ind_tbl = &rxq->wq, 2487 .comp_mask = 0, 2488 }); 2489 if (!tmpl.ind_table) { 2490 DEBUG("port %u cannot allocate indirection table for drop" 2491 " queue", 2492 dev->data->port_id); 2493 rte_errno = errno; 2494 goto error; 2495 } 2496 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl), 0); 2497 if (!ind_tbl) { 2498 rte_errno = ENOMEM; 2499 goto error; 2500 } 2501 ind_tbl->ind_table = tmpl.ind_table; 2502 return ind_tbl; 2503 error: 2504 mlx5_rxq_obj_drop_release(dev); 2505 return NULL; 2506 } 2507 2508 /** 2509 * Release a drop indirection table. 2510 * 2511 * @param dev 2512 * Pointer to Ethernet device. 2513 */ 2514 static void 2515 mlx5_ind_table_obj_drop_release(struct rte_eth_dev *dev) 2516 { 2517 struct mlx5_priv *priv = dev->data->dev_private; 2518 struct mlx5_ind_table_obj *ind_tbl = priv->drop_queue.hrxq->ind_table; 2519 2520 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table)); 2521 mlx5_rxq_obj_drop_release(dev); 2522 rte_free(ind_tbl); 2523 priv->drop_queue.hrxq->ind_table = NULL; 2524 } 2525 2526 /** 2527 * Create a drop Rx Hash queue. 2528 * 2529 * @param dev 2530 * Pointer to Ethernet device. 2531 * 2532 * @return 2533 * The Verbs/DevX object initialised, NULL otherwise and rte_errno is set. 2534 */ 2535 struct mlx5_hrxq * 2536 mlx5_hrxq_drop_new(struct rte_eth_dev *dev) 2537 { 2538 struct mlx5_priv *priv = dev->data->dev_private; 2539 struct mlx5_ind_table_obj *ind_tbl; 2540 struct ibv_qp *qp; 2541 struct mlx5_hrxq *hrxq; 2542 2543 if (priv->drop_queue.hrxq) { 2544 rte_atomic32_inc(&priv->drop_queue.hrxq->refcnt); 2545 return priv->drop_queue.hrxq; 2546 } 2547 ind_tbl = mlx5_ind_table_obj_drop_new(dev); 2548 if (!ind_tbl) 2549 return NULL; 2550 qp = mlx5_glue->create_qp_ex(priv->sh->ctx, 2551 &(struct ibv_qp_init_attr_ex){ 2552 .qp_type = IBV_QPT_RAW_PACKET, 2553 .comp_mask = 2554 IBV_QP_INIT_ATTR_PD | 2555 IBV_QP_INIT_ATTR_IND_TABLE | 2556 IBV_QP_INIT_ATTR_RX_HASH, 2557 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2558 .rx_hash_function = 2559 IBV_RX_HASH_FUNC_TOEPLITZ, 2560 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN, 2561 .rx_hash_key = rss_hash_default_key, 2562 .rx_hash_fields_mask = 0, 2563 }, 2564 .rwq_ind_tbl = ind_tbl->ind_table, 2565 .pd = priv->sh->pd 2566 }); 2567 if (!qp) { 2568 DEBUG("port %u cannot allocate QP for drop queue", 2569 dev->data->port_id); 2570 rte_errno = errno; 2571 goto error; 2572 } 2573 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq), 0); 2574 if (!hrxq) { 2575 DRV_LOG(WARNING, 2576 "port %u cannot allocate memory for drop queue", 2577 dev->data->port_id); 2578 rte_errno = ENOMEM; 2579 goto error; 2580 } 2581 hrxq->ind_table = ind_tbl; 2582 hrxq->qp = qp; 2583 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2584 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp); 2585 if (!hrxq->action) { 2586 rte_errno = errno; 2587 goto error; 2588 } 2589 #endif 2590 priv->drop_queue.hrxq = hrxq; 2591 rte_atomic32_set(&hrxq->refcnt, 1); 2592 return hrxq; 2593 error: 2594 if (ind_tbl) 2595 mlx5_ind_table_obj_drop_release(dev); 2596 return NULL; 2597 } 2598 2599 /** 2600 * Release a drop hash Rx queue. 2601 * 2602 * @param dev 2603 * Pointer to Ethernet device. 2604 */ 2605 void 2606 mlx5_hrxq_drop_release(struct rte_eth_dev *dev) 2607 { 2608 struct mlx5_priv *priv = dev->data->dev_private; 2609 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq; 2610 2611 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 2612 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 2613 mlx5_glue->destroy_flow_action(hrxq->action); 2614 #endif 2615 claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); 2616 mlx5_ind_table_obj_drop_release(dev); 2617 rte_free(hrxq); 2618 priv->drop_queue.hrxq = NULL; 2619 } 2620 } 2621