1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <errno.h> 37 #include <string.h> 38 #include <stdint.h> 39 #include <fcntl.h> 40 #include <sys/queue.h> 41 42 /* Verbs header. */ 43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 44 #ifdef PEDANTIC 45 #pragma GCC diagnostic ignored "-Wpedantic" 46 #endif 47 #include <infiniband/verbs.h> 48 #include <infiniband/mlx5dv.h> 49 #ifdef PEDANTIC 50 #pragma GCC diagnostic error "-Wpedantic" 51 #endif 52 53 #include <rte_mbuf.h> 54 #include <rte_malloc.h> 55 #include <rte_ethdev_driver.h> 56 #include <rte_common.h> 57 #include <rte_interrupts.h> 58 #include <rte_debug.h> 59 #include <rte_io.h> 60 61 #include "mlx5.h" 62 #include "mlx5_rxtx.h" 63 #include "mlx5_utils.h" 64 #include "mlx5_autoconf.h" 65 #include "mlx5_defs.h" 66 67 /* Default RSS hash key also used for ConnectX-3. */ 68 uint8_t rss_hash_default_key[] = { 69 0x2c, 0xc6, 0x81, 0xd1, 70 0x5b, 0xdb, 0xf4, 0xf7, 71 0xfc, 0xa2, 0x83, 0x19, 72 0xdb, 0x1a, 0x3e, 0x94, 73 0x6b, 0x9e, 0x38, 0xd9, 74 0x2c, 0x9c, 0x03, 0xd1, 75 0xad, 0x99, 0x44, 0xa7, 76 0xd9, 0x56, 0x3d, 0x59, 77 0x06, 0x3c, 0x25, 0xf3, 78 0xfc, 0x1f, 0xdc, 0x2a, 79 }; 80 81 /* Length of the default RSS hash key. */ 82 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key); 83 84 /** 85 * Allocate RX queue elements. 86 * 87 * @param rxq_ctrl 88 * Pointer to RX queue structure. 89 * 90 * @return 91 * 0 on success, errno value on failure. 92 */ 93 int 94 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 95 { 96 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 97 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 98 unsigned int i; 99 int ret = 0; 100 101 /* Iterate on segments. */ 102 for (i = 0; (i != elts_n); ++i) { 103 struct rte_mbuf *buf; 104 105 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 106 if (buf == NULL) { 107 ERROR("%p: empty mbuf pool", (void *)rxq_ctrl); 108 ret = ENOMEM; 109 goto error; 110 } 111 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 112 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 113 /* Buffer is supposed to be empty. */ 114 assert(rte_pktmbuf_data_len(buf) == 0); 115 assert(rte_pktmbuf_pkt_len(buf) == 0); 116 assert(!buf->next); 117 /* Only the first segment keeps headroom. */ 118 if (i % sges_n) 119 SET_DATA_OFF(buf, 0); 120 PORT(buf) = rxq_ctrl->rxq.port_id; 121 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 122 PKT_LEN(buf) = DATA_LEN(buf); 123 NB_SEGS(buf) = 1; 124 (*rxq_ctrl->rxq.elts)[i] = buf; 125 } 126 /* If Rx vector is activated. */ 127 if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { 128 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 129 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; 130 int j; 131 132 /* Initialize default rearm_data for vPMD. */ 133 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM; 134 rte_mbuf_refcnt_set(mbuf_init, 1); 135 mbuf_init->nb_segs = 1; 136 mbuf_init->port = rxq->port_id; 137 /* 138 * prevent compiler reordering: 139 * rearm_data covers previous fields. 140 */ 141 rte_compiler_barrier(); 142 rxq->mbuf_initializer = 143 *(uint64_t *)&mbuf_init->rearm_data; 144 /* Padding with a fake mbuf for vectorized Rx. */ 145 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) 146 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; 147 } 148 DEBUG("%p: allocated and configured %u segments (max %u packets)", 149 (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n)); 150 assert(ret == 0); 151 return 0; 152 error: 153 elts_n = i; 154 for (i = 0; (i != elts_n); ++i) { 155 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 156 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 157 (*rxq_ctrl->rxq.elts)[i] = NULL; 158 } 159 DEBUG("%p: failed, freed everything", (void *)rxq_ctrl); 160 assert(ret > 0); 161 return ret; 162 } 163 164 /** 165 * Free RX queue elements. 166 * 167 * @param rxq_ctrl 168 * Pointer to RX queue structure. 169 */ 170 static void 171 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) 172 { 173 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; 174 const uint16_t q_n = (1 << rxq->elts_n); 175 const uint16_t q_mask = q_n - 1; 176 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); 177 uint16_t i; 178 179 DEBUG("%p: freeing WRs", (void *)rxq_ctrl); 180 if (rxq->elts == NULL) 181 return; 182 /** 183 * Some mbuf in the Ring belongs to the application. They cannot be 184 * freed. 185 */ 186 if (rxq_check_vec_support(rxq) > 0) { 187 for (i = 0; i < used; ++i) 188 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; 189 rxq->rq_pi = rxq->rq_ci; 190 } 191 for (i = 0; (i != (1u << rxq->elts_n)); ++i) { 192 if ((*rxq->elts)[i] != NULL) 193 rte_pktmbuf_free_seg((*rxq->elts)[i]); 194 (*rxq->elts)[i] = NULL; 195 } 196 } 197 198 /** 199 * Clean up a RX queue. 200 * 201 * Destroy objects, free allocated memory and reset the structure for reuse. 202 * 203 * @param rxq_ctrl 204 * Pointer to RX queue structure. 205 */ 206 void 207 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) 208 { 209 DEBUG("cleaning up %p", (void *)rxq_ctrl); 210 if (rxq_ctrl->ibv) 211 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv); 212 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); 213 } 214 215 /** 216 * Returns the per-queue supported offloads. 217 * 218 * @param priv 219 * Pointer to private structure. 220 * 221 * @return 222 * Supported Rx offloads. 223 */ 224 uint64_t 225 mlx5_priv_get_rx_queue_offloads(struct priv *priv) 226 { 227 struct mlx5_dev_config *config = &priv->config; 228 uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | 229 DEV_RX_OFFLOAD_TIMESTAMP | 230 DEV_RX_OFFLOAD_JUMBO_FRAME); 231 232 if (config->hw_fcs_strip) 233 offloads |= DEV_RX_OFFLOAD_CRC_STRIP; 234 if (config->hw_csum) 235 offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | 236 DEV_RX_OFFLOAD_UDP_CKSUM | 237 DEV_RX_OFFLOAD_TCP_CKSUM); 238 if (config->hw_vlan_strip) 239 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; 240 return offloads; 241 } 242 243 244 /** 245 * Returns the per-port supported offloads. 246 * 247 * @param priv 248 * Pointer to private structure. 249 * @return 250 * Supported Rx offloads. 251 */ 252 uint64_t 253 mlx5_priv_get_rx_port_offloads(struct priv *priv __rte_unused) 254 { 255 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 256 257 return offloads; 258 } 259 260 /** 261 * Checks if the per-queue offload configuration is valid. 262 * 263 * @param priv 264 * Pointer to private structure. 265 * @param offloads 266 * Per-queue offloads configuration. 267 * 268 * @return 269 * 1 if the configuration is valid, 0 otherwise. 270 */ 271 static int 272 priv_is_rx_queue_offloads_allowed(struct priv *priv, uint64_t offloads) 273 { 274 uint64_t port_offloads = priv->dev->data->dev_conf.rxmode.offloads; 275 uint64_t queue_supp_offloads = 276 mlx5_priv_get_rx_queue_offloads(priv); 277 uint64_t port_supp_offloads = mlx5_priv_get_rx_port_offloads(priv); 278 279 if ((offloads & (queue_supp_offloads | port_supp_offloads)) != 280 offloads) 281 return 0; 282 if (((port_offloads ^ offloads) & port_supp_offloads)) 283 return 0; 284 return 1; 285 } 286 287 /** 288 * 289 * @param dev 290 * Pointer to Ethernet device structure. 291 * @param idx 292 * RX queue index. 293 * @param desc 294 * Number of descriptors to configure in queue. 295 * @param socket 296 * NUMA socket on which memory must be allocated. 297 * @param[in] conf 298 * Thresholds parameters. 299 * @param mp 300 * Memory pool for buffer allocations. 301 * 302 * @return 303 * 0 on success, negative errno value on failure. 304 */ 305 int 306 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 307 unsigned int socket, const struct rte_eth_rxconf *conf, 308 struct rte_mempool *mp) 309 { 310 struct priv *priv = dev->data->dev_private; 311 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; 312 struct mlx5_rxq_ctrl *rxq_ctrl = 313 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 314 int ret = 0; 315 316 priv_lock(priv); 317 if (!rte_is_power_of_2(desc)) { 318 desc = 1 << log2above(desc); 319 WARN("%p: increased number of descriptors in RX queue %u" 320 " to the next power of two (%d)", 321 (void *)dev, idx, desc); 322 } 323 DEBUG("%p: configuring queue %u for %u descriptors", 324 (void *)dev, idx, desc); 325 if (idx >= priv->rxqs_n) { 326 ERROR("%p: queue index out of range (%u >= %u)", 327 (void *)dev, idx, priv->rxqs_n); 328 priv_unlock(priv); 329 return -EOVERFLOW; 330 } 331 if (!priv_is_rx_queue_offloads_allowed(priv, conf->offloads)) { 332 ret = ENOTSUP; 333 ERROR("%p: Rx queue offloads 0x%" PRIx64 " don't match port " 334 "offloads 0x%" PRIx64 " or supported offloads 0x%" PRIx64, 335 (void *)dev, conf->offloads, 336 dev->data->dev_conf.rxmode.offloads, 337 (mlx5_priv_get_rx_port_offloads(priv) | 338 mlx5_priv_get_rx_queue_offloads(priv))); 339 goto out; 340 } 341 if (!mlx5_priv_rxq_releasable(priv, idx)) { 342 ret = EBUSY; 343 ERROR("%p: unable to release queue index %u", 344 (void *)dev, idx); 345 goto out; 346 } 347 mlx5_priv_rxq_release(priv, idx); 348 rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, conf, mp); 349 if (!rxq_ctrl) { 350 ERROR("%p: unable to allocate queue index %u", 351 (void *)dev, idx); 352 ret = ENOMEM; 353 goto out; 354 } 355 DEBUG("%p: adding RX queue %p to list", 356 (void *)dev, (void *)rxq_ctrl); 357 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 358 out: 359 priv_unlock(priv); 360 return -ret; 361 } 362 363 /** 364 * DPDK callback to release a RX queue. 365 * 366 * @param dpdk_rxq 367 * Generic RX queue pointer. 368 */ 369 void 370 mlx5_rx_queue_release(void *dpdk_rxq) 371 { 372 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq; 373 struct mlx5_rxq_ctrl *rxq_ctrl; 374 struct priv *priv; 375 376 if (rxq == NULL) 377 return; 378 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); 379 priv = rxq_ctrl->priv; 380 priv_lock(priv); 381 if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx)) 382 rte_panic("Rx queue %p is still used by a flow and cannot be" 383 " removed\n", (void *)rxq_ctrl); 384 mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx); 385 priv_unlock(priv); 386 } 387 388 /** 389 * Allocate queue vector and fill epoll fd list for Rx interrupts. 390 * 391 * @param priv 392 * Pointer to private structure. 393 * 394 * @return 395 * 0 on success, negative on failure. 396 */ 397 int 398 priv_rx_intr_vec_enable(struct priv *priv) 399 { 400 unsigned int i; 401 unsigned int rxqs_n = priv->rxqs_n; 402 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 403 unsigned int count = 0; 404 struct rte_intr_handle *intr_handle = priv->dev->intr_handle; 405 406 if (!priv->dev->data->dev_conf.intr_conf.rxq) 407 return 0; 408 priv_rx_intr_vec_disable(priv); 409 intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); 410 if (intr_handle->intr_vec == NULL) { 411 ERROR("failed to allocate memory for interrupt vector," 412 " Rx interrupts will not be supported"); 413 return -ENOMEM; 414 } 415 intr_handle->type = RTE_INTR_HANDLE_EXT; 416 for (i = 0; i != n; ++i) { 417 /* This rxq ibv must not be released in this function. */ 418 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i); 419 int fd; 420 int flags; 421 int rc; 422 423 /* Skip queues that cannot request interrupts. */ 424 if (!rxq_ibv || !rxq_ibv->channel) { 425 /* Use invalid intr_vec[] index to disable entry. */ 426 intr_handle->intr_vec[i] = 427 RTE_INTR_VEC_RXTX_OFFSET + 428 RTE_MAX_RXTX_INTR_VEC_ID; 429 continue; 430 } 431 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { 432 ERROR("too many Rx queues for interrupt vector size" 433 " (%d), Rx interrupts cannot be enabled", 434 RTE_MAX_RXTX_INTR_VEC_ID); 435 priv_rx_intr_vec_disable(priv); 436 return -1; 437 } 438 fd = rxq_ibv->channel->fd; 439 flags = fcntl(fd, F_GETFL); 440 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 441 if (rc < 0) { 442 ERROR("failed to make Rx interrupt file descriptor" 443 " %d non-blocking for queue index %d", fd, i); 444 priv_rx_intr_vec_disable(priv); 445 return -1; 446 } 447 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; 448 intr_handle->efds[count] = fd; 449 count++; 450 } 451 if (!count) 452 priv_rx_intr_vec_disable(priv); 453 else 454 intr_handle->nb_efd = count; 455 return 0; 456 } 457 458 /** 459 * Clean up Rx interrupts handler. 460 * 461 * @param priv 462 * Pointer to private structure. 463 */ 464 void 465 priv_rx_intr_vec_disable(struct priv *priv) 466 { 467 struct rte_intr_handle *intr_handle = priv->dev->intr_handle; 468 unsigned int i; 469 unsigned int rxqs_n = priv->rxqs_n; 470 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 471 472 if (!priv->dev->data->dev_conf.intr_conf.rxq) 473 return; 474 if (!intr_handle->intr_vec) 475 goto free; 476 for (i = 0; i != n; ++i) { 477 struct mlx5_rxq_ctrl *rxq_ctrl; 478 struct mlx5_rxq_data *rxq_data; 479 480 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET + 481 RTE_MAX_RXTX_INTR_VEC_ID) 482 continue; 483 /** 484 * Need to access directly the queue to release the reference 485 * kept in priv_rx_intr_vec_enable(). 486 */ 487 rxq_data = (*priv->rxqs)[i]; 488 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 489 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv); 490 } 491 free: 492 rte_intr_free_epoll_fd(intr_handle); 493 if (intr_handle->intr_vec) 494 free(intr_handle->intr_vec); 495 intr_handle->nb_efd = 0; 496 intr_handle->intr_vec = NULL; 497 } 498 499 /** 500 * MLX5 CQ notification . 501 * 502 * @param rxq 503 * Pointer to receive queue structure. 504 * @param sq_n_rxq 505 * Sequence number per receive queue . 506 */ 507 static inline void 508 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) 509 { 510 int sq_n = 0; 511 uint32_t doorbell_hi; 512 uint64_t doorbell; 513 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL; 514 515 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK; 516 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK); 517 doorbell = (uint64_t)doorbell_hi << 32; 518 doorbell |= rxq->cqn; 519 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); 520 rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg); 521 } 522 523 /** 524 * DPDK callback for Rx queue interrupt enable. 525 * 526 * @param dev 527 * Pointer to Ethernet device structure. 528 * @param rx_queue_id 529 * Rx queue number. 530 * 531 * @return 532 * 0 on success, negative on failure. 533 */ 534 int 535 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 536 { 537 struct priv *priv = dev->data->dev_private; 538 struct mlx5_rxq_data *rxq_data; 539 struct mlx5_rxq_ctrl *rxq_ctrl; 540 int ret = 0; 541 542 priv_lock(priv); 543 rxq_data = (*priv->rxqs)[rx_queue_id]; 544 if (!rxq_data) { 545 ret = EINVAL; 546 goto exit; 547 } 548 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 549 if (rxq_ctrl->irq) { 550 struct mlx5_rxq_ibv *rxq_ibv; 551 552 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); 553 if (!rxq_ibv) { 554 ret = EINVAL; 555 goto exit; 556 } 557 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); 558 mlx5_priv_rxq_ibv_release(priv, rxq_ibv); 559 } 560 exit: 561 priv_unlock(priv); 562 if (ret) 563 WARN("unable to arm interrupt on rx queue %d", rx_queue_id); 564 return -ret; 565 } 566 567 /** 568 * DPDK callback for Rx queue interrupt disable. 569 * 570 * @param dev 571 * Pointer to Ethernet device structure. 572 * @param rx_queue_id 573 * Rx queue number. 574 * 575 * @return 576 * 0 on success, negative on failure. 577 */ 578 int 579 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 580 { 581 struct priv *priv = dev->data->dev_private; 582 struct mlx5_rxq_data *rxq_data; 583 struct mlx5_rxq_ctrl *rxq_ctrl; 584 struct mlx5_rxq_ibv *rxq_ibv = NULL; 585 struct ibv_cq *ev_cq; 586 void *ev_ctx; 587 int ret = 0; 588 589 priv_lock(priv); 590 rxq_data = (*priv->rxqs)[rx_queue_id]; 591 if (!rxq_data) { 592 ret = EINVAL; 593 goto exit; 594 } 595 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 596 if (!rxq_ctrl->irq) 597 goto exit; 598 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); 599 if (!rxq_ibv) { 600 ret = EINVAL; 601 goto exit; 602 } 603 ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); 604 if (ret || ev_cq != rxq_ibv->cq) { 605 ret = EINVAL; 606 goto exit; 607 } 608 rxq_data->cq_arm_sn++; 609 ibv_ack_cq_events(rxq_ibv->cq, 1); 610 exit: 611 if (rxq_ibv) 612 mlx5_priv_rxq_ibv_release(priv, rxq_ibv); 613 priv_unlock(priv); 614 if (ret) 615 WARN("unable to disable interrupt on rx queue %d", 616 rx_queue_id); 617 return -ret; 618 } 619 620 /** 621 * Create the Rx queue Verbs object. 622 * 623 * @param priv 624 * Pointer to private structure. 625 * @param idx 626 * Queue index in DPDK Rx queue array 627 * 628 * @return 629 * The Verbs object initialised if it can be created. 630 */ 631 struct mlx5_rxq_ibv* 632 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) 633 { 634 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 635 struct mlx5_rxq_ctrl *rxq_ctrl = 636 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 637 struct ibv_wq_attr mod; 638 union { 639 struct { 640 struct ibv_cq_init_attr_ex ibv; 641 struct mlx5dv_cq_init_attr mlx5; 642 } cq; 643 struct ibv_wq_init_attr wq; 644 struct ibv_cq_ex cq_attr; 645 } attr; 646 unsigned int cqe_n = (1 << rxq_data->elts_n) - 1; 647 struct mlx5_rxq_ibv *tmpl; 648 struct mlx5dv_cq cq_info; 649 struct mlx5dv_rwq rwq; 650 unsigned int i; 651 int ret = 0; 652 struct mlx5dv_obj obj; 653 struct mlx5_dev_config *config = &priv->config; 654 655 assert(rxq_data); 656 assert(!rxq_ctrl->ibv); 657 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; 658 priv->verbs_alloc_ctx.obj = rxq_ctrl; 659 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, 660 rxq_ctrl->socket); 661 if (!tmpl) { 662 ERROR("%p: cannot allocate verbs resources", 663 (void *)rxq_ctrl); 664 goto error; 665 } 666 tmpl->rxq_ctrl = rxq_ctrl; 667 /* Use the entire RX mempool as the memory region. */ 668 tmpl->mr = priv_mr_get(priv, rxq_data->mp); 669 if (!tmpl->mr) { 670 tmpl->mr = priv_mr_new(priv, rxq_data->mp); 671 if (!tmpl->mr) { 672 ERROR("%p: MR creation failure", (void *)rxq_ctrl); 673 goto error; 674 } 675 } 676 if (rxq_ctrl->irq) { 677 tmpl->channel = ibv_create_comp_channel(priv->ctx); 678 if (!tmpl->channel) { 679 ERROR("%p: Comp Channel creation failure", 680 (void *)rxq_ctrl); 681 goto error; 682 } 683 } 684 attr.cq.ibv = (struct ibv_cq_init_attr_ex){ 685 .cqe = cqe_n, 686 .channel = tmpl->channel, 687 .comp_mask = 0, 688 }; 689 attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){ 690 .comp_mask = 0, 691 }; 692 if (config->cqe_comp && !rxq_data->hw_timestamp) { 693 attr.cq.mlx5.comp_mask |= 694 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; 695 attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; 696 /* 697 * For vectorized Rx, it must not be doubled in order to 698 * make cq_ci and rq_ci aligned. 699 */ 700 if (rxq_check_vec_support(rxq_data) < 0) 701 attr.cq.ibv.cqe *= 2; 702 } else if (config->cqe_comp && rxq_data->hw_timestamp) { 703 DEBUG("Rx CQE compression is disabled for HW timestamp"); 704 } 705 tmpl->cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(priv->ctx, &attr.cq.ibv, 706 &attr.cq.mlx5)); 707 if (tmpl->cq == NULL) { 708 ERROR("%p: CQ creation failure", (void *)rxq_ctrl); 709 goto error; 710 } 711 DEBUG("priv->device_attr.max_qp_wr is %d", 712 priv->device_attr.orig_attr.max_qp_wr); 713 DEBUG("priv->device_attr.max_sge is %d", 714 priv->device_attr.orig_attr.max_sge); 715 attr.wq = (struct ibv_wq_init_attr){ 716 .wq_context = NULL, /* Could be useful in the future. */ 717 .wq_type = IBV_WQT_RQ, 718 /* Max number of outstanding WRs. */ 719 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n, 720 /* Max number of scatter/gather elements in a WR. */ 721 .max_sge = 1 << rxq_data->sges_n, 722 .pd = priv->pd, 723 .cq = tmpl->cq, 724 .comp_mask = 725 IBV_WQ_FLAGS_CVLAN_STRIPPING | 726 0, 727 .create_flags = (rxq_data->vlan_strip ? 728 IBV_WQ_FLAGS_CVLAN_STRIPPING : 729 0), 730 }; 731 /* By default, FCS (CRC) is stripped by hardware. */ 732 if (rxq_data->crc_present) { 733 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 734 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 735 } 736 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING 737 if (config->hw_padding) { 738 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; 739 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 740 } 741 #endif 742 tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq); 743 if (tmpl->wq == NULL) { 744 ERROR("%p: WQ creation failure", (void *)rxq_ctrl); 745 goto error; 746 } 747 /* 748 * Make sure number of WRs*SGEs match expectations since a queue 749 * cannot allocate more than "desc" buffers. 750 */ 751 if (((int)attr.wq.max_wr != 752 ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) || 753 ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) { 754 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs", 755 (void *)rxq_ctrl, 756 ((1 << rxq_data->elts_n) >> rxq_data->sges_n), 757 (1 << rxq_data->sges_n), 758 attr.wq.max_wr, attr.wq.max_sge); 759 goto error; 760 } 761 /* Change queue state to ready. */ 762 mod = (struct ibv_wq_attr){ 763 .attr_mask = IBV_WQ_ATTR_STATE, 764 .wq_state = IBV_WQS_RDY, 765 }; 766 ret = ibv_modify_wq(tmpl->wq, &mod); 767 if (ret) { 768 ERROR("%p: WQ state to IBV_WQS_RDY failed", 769 (void *)rxq_ctrl); 770 goto error; 771 } 772 obj.cq.in = tmpl->cq; 773 obj.cq.out = &cq_info; 774 obj.rwq.in = tmpl->wq; 775 obj.rwq.out = &rwq; 776 ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); 777 if (ret != 0) 778 goto error; 779 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { 780 ERROR("Wrong MLX5_CQE_SIZE environment variable value: " 781 "it should be set to %u", RTE_CACHE_LINE_SIZE); 782 goto error; 783 } 784 /* Fill the rings. */ 785 rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[]) 786 (uintptr_t)rwq.buf; 787 for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) { 788 struct rte_mbuf *buf = (*rxq_data->elts)[i]; 789 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i]; 790 791 /* scat->addr must be able to store a pointer. */ 792 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 793 *scat = (struct mlx5_wqe_data_seg){ 794 .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, 795 uintptr_t)), 796 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), 797 .lkey = tmpl->mr->lkey, 798 }; 799 } 800 rxq_data->rq_db = rwq.dbrec; 801 rxq_data->cqe_n = log2above(cq_info.cqe_cnt); 802 rxq_data->cq_ci = 0; 803 rxq_data->rq_ci = 0; 804 rxq_data->rq_pi = 0; 805 rxq_data->zip = (struct rxq_zip){ 806 .ai = 0, 807 }; 808 rxq_data->cq_db = cq_info.dbrec; 809 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf; 810 rxq_data->cq_uar = cq_info.cq_uar; 811 rxq_data->cqn = cq_info.cqn; 812 rxq_data->cq_arm_sn = 0; 813 /* Update doorbell counter. */ 814 rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n; 815 rte_wmb(); 816 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci); 817 DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl); 818 rte_atomic32_inc(&tmpl->refcnt); 819 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, 820 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt)); 821 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); 822 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 823 return tmpl; 824 error: 825 if (tmpl->wq) 826 claim_zero(ibv_destroy_wq(tmpl->wq)); 827 if (tmpl->cq) 828 claim_zero(ibv_destroy_cq(tmpl->cq)); 829 if (tmpl->channel) 830 claim_zero(ibv_destroy_comp_channel(tmpl->channel)); 831 if (tmpl->mr) 832 priv_mr_release(priv, tmpl->mr); 833 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; 834 return NULL; 835 } 836 837 /** 838 * Get an Rx queue Verbs object. 839 * 840 * @param priv 841 * Pointer to private structure. 842 * @param idx 843 * Queue index in DPDK Rx queue array 844 * 845 * @return 846 * The Verbs object if it exists. 847 */ 848 struct mlx5_rxq_ibv* 849 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx) 850 { 851 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; 852 struct mlx5_rxq_ctrl *rxq_ctrl; 853 854 if (idx >= priv->rxqs_n) 855 return NULL; 856 if (!rxq_data) 857 return NULL; 858 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 859 if (rxq_ctrl->ibv) { 860 priv_mr_get(priv, rxq_data->mp); 861 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); 862 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, 863 (void *)rxq_ctrl->ibv, 864 rte_atomic32_read(&rxq_ctrl->ibv->refcnt)); 865 } 866 return rxq_ctrl->ibv; 867 } 868 869 /** 870 * Release an Rx verbs queue object. 871 * 872 * @param priv 873 * Pointer to private structure. 874 * @param rxq_ibv 875 * Verbs Rx queue object. 876 * 877 * @return 878 * 0 on success, errno value on failure. 879 */ 880 int 881 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) 882 { 883 int ret; 884 885 assert(rxq_ibv); 886 assert(rxq_ibv->wq); 887 assert(rxq_ibv->cq); 888 assert(rxq_ibv->mr); 889 ret = priv_mr_release(priv, rxq_ibv->mr); 890 if (!ret) 891 rxq_ibv->mr = NULL; 892 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, 893 (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt)); 894 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { 895 rxq_free_elts(rxq_ibv->rxq_ctrl); 896 claim_zero(ibv_destroy_wq(rxq_ibv->wq)); 897 claim_zero(ibv_destroy_cq(rxq_ibv->cq)); 898 if (rxq_ibv->channel) 899 claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel)); 900 LIST_REMOVE(rxq_ibv, next); 901 rte_free(rxq_ibv); 902 return 0; 903 } 904 return EBUSY; 905 } 906 907 /** 908 * Verify the Verbs Rx queue list is empty 909 * 910 * @param priv 911 * Pointer to private structure. 912 * 913 * @return the number of object not released. 914 */ 915 int 916 mlx5_priv_rxq_ibv_verify(struct priv *priv) 917 { 918 int ret = 0; 919 struct mlx5_rxq_ibv *rxq_ibv; 920 921 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) { 922 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv, 923 (void *)rxq_ibv); 924 ++ret; 925 } 926 return ret; 927 } 928 929 /** 930 * Return true if a single reference exists on the object. 931 * 932 * @param priv 933 * Pointer to private structure. 934 * @param rxq_ibv 935 * Verbs Rx queue object. 936 */ 937 int 938 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) 939 { 940 (void)priv; 941 assert(rxq_ibv); 942 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1); 943 } 944 945 /** 946 * Create a DPDK Rx queue. 947 * 948 * @param priv 949 * Pointer to private structure. 950 * @param idx 951 * TX queue index. 952 * @param desc 953 * Number of descriptors to configure in queue. 954 * @param socket 955 * NUMA socket on which memory must be allocated. 956 * 957 * @return 958 * A DPDK queue object on success. 959 */ 960 struct mlx5_rxq_ctrl* 961 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, 962 unsigned int socket, const struct rte_eth_rxconf *conf, 963 struct rte_mempool *mp) 964 { 965 struct rte_eth_dev *dev = priv->dev; 966 struct mlx5_rxq_ctrl *tmpl; 967 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 968 struct mlx5_dev_config *config = &priv->config; 969 /* 970 * Always allocate extra slots, even if eventually 971 * the vector Rx will not be used. 972 */ 973 const uint16_t desc_n = 974 desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; 975 976 tmpl = rte_calloc_socket("RXQ", 1, 977 sizeof(*tmpl) + 978 desc_n * sizeof(struct rte_mbuf *), 979 0, socket); 980 if (!tmpl) 981 return NULL; 982 tmpl->socket = socket; 983 if (priv->dev->data->dev_conf.intr_conf.rxq) 984 tmpl->irq = 1; 985 /* Enable scattered packets support for this queue if necessary. */ 986 assert(mb_len >= RTE_PKTMBUF_HEADROOM); 987 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= 988 (mb_len - RTE_PKTMBUF_HEADROOM)) { 989 tmpl->rxq.sges_n = 0; 990 } else if (conf->offloads & DEV_RX_OFFLOAD_SCATTER) { 991 unsigned int size = 992 RTE_PKTMBUF_HEADROOM + 993 dev->data->dev_conf.rxmode.max_rx_pkt_len; 994 unsigned int sges_n; 995 996 /* 997 * Determine the number of SGEs needed for a full packet 998 * and round it to the next power of two. 999 */ 1000 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 1001 tmpl->rxq.sges_n = sges_n; 1002 /* Make sure rxq.sges_n did not overflow. */ 1003 size = mb_len * (1 << tmpl->rxq.sges_n); 1004 size -= RTE_PKTMBUF_HEADROOM; 1005 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { 1006 ERROR("%p: too many SGEs (%u) needed to handle" 1007 " requested maximum packet size %u", 1008 (void *)dev, 1009 1 << sges_n, 1010 dev->data->dev_conf.rxmode.max_rx_pkt_len); 1011 goto error; 1012 } 1013 } else { 1014 WARN("%p: the requested maximum Rx packet size (%u) is" 1015 " larger than a single mbuf (%u) and scattered" 1016 " mode has not been requested", 1017 (void *)dev, 1018 dev->data->dev_conf.rxmode.max_rx_pkt_len, 1019 mb_len - RTE_PKTMBUF_HEADROOM); 1020 } 1021 DEBUG("%p: maximum number of segments per packet: %u", 1022 (void *)dev, 1 << tmpl->rxq.sges_n); 1023 if (desc % (1 << tmpl->rxq.sges_n)) { 1024 ERROR("%p: number of RX queue descriptors (%u) is not a" 1025 " multiple of SGEs per packet (%u)", 1026 (void *)dev, 1027 desc, 1028 1 << tmpl->rxq.sges_n); 1029 goto error; 1030 } 1031 /* Toggle RX checksum offload if hardware supports it. */ 1032 tmpl->rxq.csum = !!(conf->offloads & DEV_RX_OFFLOAD_CHECKSUM); 1033 tmpl->rxq.csum_l2tun = (!!(conf->offloads & DEV_RX_OFFLOAD_CHECKSUM) && 1034 priv->config.hw_csum_l2tun); 1035 tmpl->rxq.hw_timestamp = !!(conf->offloads & DEV_RX_OFFLOAD_TIMESTAMP); 1036 /* Configure VLAN stripping. */ 1037 tmpl->rxq.vlan_strip = !!(conf->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); 1038 /* By default, FCS (CRC) is stripped by hardware. */ 1039 if (conf->offloads & DEV_RX_OFFLOAD_CRC_STRIP) { 1040 tmpl->rxq.crc_present = 0; 1041 } else if (config->hw_fcs_strip) { 1042 tmpl->rxq.crc_present = 1; 1043 } else { 1044 WARN("%p: CRC stripping has been disabled but will still" 1045 " be performed by hardware, make sure MLNX_OFED and" 1046 " firmware are up to date", 1047 (void *)dev); 1048 tmpl->rxq.crc_present = 0; 1049 } 1050 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from" 1051 " incoming frames to hide it", 1052 (void *)dev, 1053 tmpl->rxq.crc_present ? "disabled" : "enabled", 1054 tmpl->rxq.crc_present << 2); 1055 /* Save port ID. */ 1056 tmpl->rxq.rss_hash = priv->rxqs_n > 1; 1057 tmpl->rxq.port_id = dev->data->port_id; 1058 tmpl->priv = priv; 1059 tmpl->rxq.mp = mp; 1060 tmpl->rxq.stats.idx = idx; 1061 tmpl->rxq.elts_n = log2above(desc); 1062 tmpl->rxq.elts = 1063 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); 1064 rte_atomic32_inc(&tmpl->refcnt); 1065 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv, 1066 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt)); 1067 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); 1068 return tmpl; 1069 error: 1070 rte_free(tmpl); 1071 return NULL; 1072 } 1073 1074 /** 1075 * Get a Rx queue. 1076 * 1077 * @param priv 1078 * Pointer to private structure. 1079 * @param idx 1080 * TX queue index. 1081 * 1082 * @return 1083 * A pointer to the queue if it exists. 1084 */ 1085 struct mlx5_rxq_ctrl* 1086 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx) 1087 { 1088 struct mlx5_rxq_ctrl *rxq_ctrl = NULL; 1089 1090 if ((*priv->rxqs)[idx]) { 1091 rxq_ctrl = container_of((*priv->rxqs)[idx], 1092 struct mlx5_rxq_ctrl, 1093 rxq); 1094 1095 mlx5_priv_rxq_ibv_get(priv, idx); 1096 rte_atomic32_inc(&rxq_ctrl->refcnt); 1097 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv, 1098 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt)); 1099 } 1100 return rxq_ctrl; 1101 } 1102 1103 /** 1104 * Release a Rx queue. 1105 * 1106 * @param priv 1107 * Pointer to private structure. 1108 * @param idx 1109 * TX queue index. 1110 * 1111 * @return 1112 * 0 on success, errno value on failure. 1113 */ 1114 int 1115 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx) 1116 { 1117 struct mlx5_rxq_ctrl *rxq_ctrl; 1118 1119 if (!(*priv->rxqs)[idx]) 1120 return 0; 1121 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1122 assert(rxq_ctrl->priv); 1123 if (rxq_ctrl->ibv) { 1124 int ret; 1125 1126 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv); 1127 if (!ret) 1128 rxq_ctrl->ibv = NULL; 1129 } 1130 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv, 1131 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt)); 1132 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { 1133 LIST_REMOVE(rxq_ctrl, next); 1134 rte_free(rxq_ctrl); 1135 (*priv->rxqs)[idx] = NULL; 1136 return 0; 1137 } 1138 return EBUSY; 1139 } 1140 1141 /** 1142 * Verify if the queue can be released. 1143 * 1144 * @param priv 1145 * Pointer to private structure. 1146 * @param idx 1147 * TX queue index. 1148 * 1149 * @return 1150 * 1 if the queue can be released. 1151 */ 1152 int 1153 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx) 1154 { 1155 struct mlx5_rxq_ctrl *rxq_ctrl; 1156 1157 if (!(*priv->rxqs)[idx]) 1158 return -1; 1159 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); 1160 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); 1161 } 1162 1163 /** 1164 * Verify the Rx Queue list is empty 1165 * 1166 * @param priv 1167 * Pointer to private structure. 1168 * 1169 * @return the number of object not released. 1170 */ 1171 int 1172 mlx5_priv_rxq_verify(struct priv *priv) 1173 { 1174 struct mlx5_rxq_ctrl *rxq_ctrl; 1175 int ret = 0; 1176 1177 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { 1178 DEBUG("%p: Rx Queue %p still referenced", (void *)priv, 1179 (void *)rxq_ctrl); 1180 ++ret; 1181 } 1182 return ret; 1183 } 1184 1185 /** 1186 * Create an indirection table. 1187 * 1188 * @param priv 1189 * Pointer to private structure. 1190 * @param queues 1191 * Queues entering in the indirection table. 1192 * @param queues_n 1193 * Number of queues in the array. 1194 * 1195 * @return 1196 * A new indirection table. 1197 */ 1198 struct mlx5_ind_table_ibv* 1199 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[], 1200 uint16_t queues_n) 1201 { 1202 struct mlx5_ind_table_ibv *ind_tbl; 1203 const unsigned int wq_n = rte_is_power_of_2(queues_n) ? 1204 log2above(queues_n) : 1205 log2above(priv->config.ind_table_max_size); 1206 struct ibv_wq *wq[1 << wq_n]; 1207 unsigned int i; 1208 unsigned int j; 1209 1210 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) + 1211 queues_n * sizeof(uint16_t), 0); 1212 if (!ind_tbl) 1213 return NULL; 1214 for (i = 0; i != queues_n; ++i) { 1215 struct mlx5_rxq_ctrl *rxq = 1216 mlx5_priv_rxq_get(priv, queues[i]); 1217 1218 if (!rxq) 1219 goto error; 1220 wq[i] = rxq->ibv->wq; 1221 ind_tbl->queues[i] = queues[i]; 1222 } 1223 ind_tbl->queues_n = queues_n; 1224 /* Finalise indirection table. */ 1225 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j) 1226 wq[i] = wq[j]; 1227 ind_tbl->ind_table = ibv_create_rwq_ind_table( 1228 priv->ctx, 1229 &(struct ibv_rwq_ind_table_init_attr){ 1230 .log_ind_tbl_size = wq_n, 1231 .ind_tbl = wq, 1232 .comp_mask = 0, 1233 }); 1234 if (!ind_tbl->ind_table) 1235 goto error; 1236 rte_atomic32_inc(&ind_tbl->refcnt); 1237 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); 1238 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, 1239 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); 1240 return ind_tbl; 1241 error: 1242 rte_free(ind_tbl); 1243 DEBUG("%p cannot create indirection table", (void *)priv); 1244 return NULL; 1245 } 1246 1247 /** 1248 * Get an indirection table. 1249 * 1250 * @param priv 1251 * Pointer to private structure. 1252 * @param queues 1253 * Queues entering in the indirection table. 1254 * @param queues_n 1255 * Number of queues in the array. 1256 * 1257 * @return 1258 * An indirection table if found. 1259 */ 1260 struct mlx5_ind_table_ibv* 1261 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[], 1262 uint16_t queues_n) 1263 { 1264 struct mlx5_ind_table_ibv *ind_tbl; 1265 1266 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1267 if ((ind_tbl->queues_n == queues_n) && 1268 (memcmp(ind_tbl->queues, queues, 1269 ind_tbl->queues_n * sizeof(ind_tbl->queues[0])) 1270 == 0)) 1271 break; 1272 } 1273 if (ind_tbl) { 1274 unsigned int i; 1275 1276 rte_atomic32_inc(&ind_tbl->refcnt); 1277 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, 1278 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); 1279 for (i = 0; i != ind_tbl->queues_n; ++i) 1280 mlx5_priv_rxq_get(priv, ind_tbl->queues[i]); 1281 } 1282 return ind_tbl; 1283 } 1284 1285 /** 1286 * Release an indirection table. 1287 * 1288 * @param priv 1289 * Pointer to private structure. 1290 * @param ind_table 1291 * Indirection table to release. 1292 * 1293 * @return 1294 * 0 on success, errno value on failure. 1295 */ 1296 int 1297 mlx5_priv_ind_table_ibv_release(struct priv *priv, 1298 struct mlx5_ind_table_ibv *ind_tbl) 1299 { 1300 unsigned int i; 1301 1302 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, 1303 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); 1304 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) 1305 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table)); 1306 for (i = 0; i != ind_tbl->queues_n; ++i) 1307 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i])); 1308 if (!rte_atomic32_read(&ind_tbl->refcnt)) { 1309 LIST_REMOVE(ind_tbl, next); 1310 rte_free(ind_tbl); 1311 return 0; 1312 } 1313 return EBUSY; 1314 } 1315 1316 /** 1317 * Verify the Rx Queue list is empty 1318 * 1319 * @param priv 1320 * Pointer to private structure. 1321 * 1322 * @return the number of object not released. 1323 */ 1324 int 1325 mlx5_priv_ind_table_ibv_verify(struct priv *priv) 1326 { 1327 struct mlx5_ind_table_ibv *ind_tbl; 1328 int ret = 0; 1329 1330 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { 1331 DEBUG("%p: Verbs indirection table %p still referenced", 1332 (void *)priv, (void *)ind_tbl); 1333 ++ret; 1334 } 1335 return ret; 1336 } 1337 1338 /** 1339 * Create an Rx Hash queue. 1340 * 1341 * @param priv 1342 * Pointer to private structure. 1343 * @param rss_key 1344 * RSS key for the Rx hash queue. 1345 * @param rss_key_len 1346 * RSS key length. 1347 * @param hash_fields 1348 * Verbs protocol hash field to make the RSS on. 1349 * @param queues 1350 * Queues entering in hash queue. In case of empty hash_fields only the 1351 * first queue index will be taken for the indirection table. 1352 * @param queues_n 1353 * Number of queues. 1354 * 1355 * @return 1356 * An hash Rx queue on success. 1357 */ 1358 struct mlx5_hrxq* 1359 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, 1360 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n) 1361 { 1362 struct mlx5_hrxq *hrxq; 1363 struct mlx5_ind_table_ibv *ind_tbl; 1364 struct ibv_qp *qp; 1365 1366 queues_n = hash_fields ? queues_n : 1; 1367 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n); 1368 if (!ind_tbl) 1369 ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n); 1370 if (!ind_tbl) 1371 return NULL; 1372 qp = ibv_create_qp_ex( 1373 priv->ctx, 1374 &(struct ibv_qp_init_attr_ex){ 1375 .qp_type = IBV_QPT_RAW_PACKET, 1376 .comp_mask = 1377 IBV_QP_INIT_ATTR_PD | 1378 IBV_QP_INIT_ATTR_IND_TABLE | 1379 IBV_QP_INIT_ATTR_RX_HASH, 1380 .rx_hash_conf = (struct ibv_rx_hash_conf){ 1381 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 1382 .rx_hash_key_len = rss_key_len, 1383 .rx_hash_key = rss_key, 1384 .rx_hash_fields_mask = hash_fields, 1385 }, 1386 .rwq_ind_tbl = ind_tbl->ind_table, 1387 .pd = priv->pd, 1388 }); 1389 if (!qp) 1390 goto error; 1391 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); 1392 if (!hrxq) 1393 goto error; 1394 hrxq->ind_table = ind_tbl; 1395 hrxq->qp = qp; 1396 hrxq->rss_key_len = rss_key_len; 1397 hrxq->hash_fields = hash_fields; 1398 memcpy(hrxq->rss_key, rss_key, rss_key_len); 1399 rte_atomic32_inc(&hrxq->refcnt); 1400 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); 1401 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, 1402 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); 1403 return hrxq; 1404 error: 1405 mlx5_priv_ind_table_ibv_release(priv, ind_tbl); 1406 if (qp) 1407 claim_zero(ibv_destroy_qp(qp)); 1408 return NULL; 1409 } 1410 1411 /** 1412 * Get an Rx Hash queue. 1413 * 1414 * @param priv 1415 * Pointer to private structure. 1416 * @param rss_conf 1417 * RSS configuration for the Rx hash queue. 1418 * @param queues 1419 * Queues entering in hash queue. In case of empty hash_fields only the 1420 * first queue index will be taken for the indirection table. 1421 * @param queues_n 1422 * Number of queues. 1423 * 1424 * @return 1425 * An hash Rx queue on success. 1426 */ 1427 struct mlx5_hrxq* 1428 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, 1429 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n) 1430 { 1431 struct mlx5_hrxq *hrxq; 1432 1433 queues_n = hash_fields ? queues_n : 1; 1434 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1435 struct mlx5_ind_table_ibv *ind_tbl; 1436 1437 if (hrxq->rss_key_len != rss_key_len) 1438 continue; 1439 if (memcmp(hrxq->rss_key, rss_key, rss_key_len)) 1440 continue; 1441 if (hrxq->hash_fields != hash_fields) 1442 continue; 1443 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n); 1444 if (!ind_tbl) 1445 continue; 1446 if (ind_tbl != hrxq->ind_table) { 1447 mlx5_priv_ind_table_ibv_release(priv, ind_tbl); 1448 continue; 1449 } 1450 rte_atomic32_inc(&hrxq->refcnt); 1451 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, 1452 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); 1453 return hrxq; 1454 } 1455 return NULL; 1456 } 1457 1458 /** 1459 * Release the hash Rx queue. 1460 * 1461 * @param priv 1462 * Pointer to private structure. 1463 * @param hrxq 1464 * Pointer to Hash Rx queue to release. 1465 * 1466 * @return 1467 * 0 on success, errno value on failure. 1468 */ 1469 int 1470 mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq) 1471 { 1472 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, 1473 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); 1474 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { 1475 claim_zero(ibv_destroy_qp(hrxq->qp)); 1476 mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table); 1477 LIST_REMOVE(hrxq, next); 1478 rte_free(hrxq); 1479 return 0; 1480 } 1481 claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table)); 1482 return EBUSY; 1483 } 1484 1485 /** 1486 * Verify the Rx Queue list is empty 1487 * 1488 * @param priv 1489 * Pointer to private structure. 1490 * 1491 * @return the number of object not released. 1492 */ 1493 int 1494 mlx5_priv_hrxq_ibv_verify(struct priv *priv) 1495 { 1496 struct mlx5_hrxq *hrxq; 1497 int ret = 0; 1498 1499 LIST_FOREACH(hrxq, &priv->hrxqs, next) { 1500 DEBUG("%p: Verbs Hash Rx queue %p still referenced", 1501 (void *)priv, (void *)hrxq); 1502 ++ret; 1503 } 1504 return ret; 1505 } 1506