1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 /** 7 * @file 8 * Rx queues configuration for mlx4 driver. 9 */ 10 11 #include <errno.h> 12 #include <stddef.h> 13 #include <stdint.h> 14 #include <string.h> 15 16 /* Verbs headers do not support -pedantic. */ 17 #ifdef PEDANTIC 18 #pragma GCC diagnostic ignored "-Wpedantic" 19 #endif 20 #include <infiniband/mlx4dv.h> 21 #include <infiniband/verbs.h> 22 #ifdef PEDANTIC 23 #pragma GCC diagnostic error "-Wpedantic" 24 #endif 25 26 #include <rte_byteorder.h> 27 #include <rte_common.h> 28 #include <rte_errno.h> 29 #include <ethdev_driver.h> 30 #include <rte_flow.h> 31 #include <rte_malloc.h> 32 #include <rte_mbuf.h> 33 #include <rte_mempool.h> 34 35 #include "mlx4.h" 36 #include "mlx4_glue.h" 37 #include "mlx4_flow.h" 38 #include "mlx4_rxtx.h" 39 #include "mlx4_utils.h" 40 41 /** 42 * Historical RSS hash key. 43 * 44 * This used to be the default for mlx4 in Linux before v3.19 switched to 45 * generating random hash keys through netdev_rss_key_fill(). 46 * 47 * It is used in this PMD for consistency with past DPDK releases but can 48 * now be overridden through user configuration. 49 * 50 * Note: this is not const to work around API quirks. 51 */ 52 uint8_t 53 mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE] = { 54 0x2c, 0xc6, 0x81, 0xd1, 55 0x5b, 0xdb, 0xf4, 0xf7, 56 0xfc, 0xa2, 0x83, 0x19, 57 0xdb, 0x1a, 0x3e, 0x94, 58 0x6b, 0x9e, 0x38, 0xd9, 59 0x2c, 0x9c, 0x03, 0xd1, 60 0xad, 0x99, 0x44, 0xa7, 61 0xd9, 0x56, 0x3d, 0x59, 62 0x06, 0x3c, 0x25, 0xf3, 63 0xfc, 0x1f, 0xdc, 0x2a, 64 }; 65 66 /** 67 * Obtain a RSS context with specified properties. 68 * 69 * Used when creating a flow rule targeting one or several Rx queues. 70 * 71 * If a matching RSS context already exists, it is returned with its 72 * reference count incremented. 73 * 74 * @param priv 75 * Pointer to private structure. 76 * @param fields 77 * Fields for RSS processing (Verbs format). 78 * @param[in] key 79 * Hash key to use (whose size is exactly MLX4_RSS_HASH_KEY_SIZE). 80 * @param queues 81 * Number of target queues. 82 * @param[in] queue_id 83 * Target queues. 84 * 85 * @return 86 * Pointer to RSS context on success, NULL otherwise and rte_errno is set. 87 */ 88 struct mlx4_rss * 89 mlx4_rss_get(struct mlx4_priv *priv, uint64_t fields, 90 const uint8_t key[MLX4_RSS_HASH_KEY_SIZE], 91 uint16_t queues, const uint16_t queue_id[]) 92 { 93 struct mlx4_rss *rss; 94 size_t queue_id_size = sizeof(queue_id[0]) * queues; 95 96 LIST_FOREACH(rss, &priv->rss, next) 97 if (fields == rss->fields && 98 queues == rss->queues && 99 !memcmp(key, rss->key, MLX4_RSS_HASH_KEY_SIZE) && 100 !memcmp(queue_id, rss->queue_id, queue_id_size)) { 101 ++rss->refcnt; 102 return rss; 103 } 104 rss = rte_malloc(__func__, offsetof(struct mlx4_rss, queue_id) + 105 queue_id_size, 0); 106 if (!rss) 107 goto error; 108 *rss = (struct mlx4_rss){ 109 .priv = priv, 110 .refcnt = 1, 111 .usecnt = 0, 112 .qp = NULL, 113 .ind = NULL, 114 .fields = fields, 115 .queues = queues, 116 }; 117 memcpy(rss->key, key, MLX4_RSS_HASH_KEY_SIZE); 118 memcpy(rss->queue_id, queue_id, queue_id_size); 119 LIST_INSERT_HEAD(&priv->rss, rss, next); 120 return rss; 121 error: 122 rte_errno = ENOMEM; 123 return NULL; 124 } 125 126 /** 127 * Release a RSS context instance. 128 * 129 * Used when destroying a flow rule targeting one or several Rx queues. 130 * 131 * This function decrements the reference count of the context and destroys 132 * it after reaching 0. The context must have no users at this point; all 133 * prior calls to mlx4_rss_attach() must have been followed by matching 134 * calls to mlx4_rss_detach(). 135 * 136 * @param rss 137 * RSS context to release. 138 */ 139 void 140 mlx4_rss_put(struct mlx4_rss *rss) 141 { 142 MLX4_ASSERT(rss->refcnt); 143 if (--rss->refcnt) 144 return; 145 MLX4_ASSERT(!rss->usecnt); 146 MLX4_ASSERT(!rss->qp); 147 MLX4_ASSERT(!rss->ind); 148 LIST_REMOVE(rss, next); 149 rte_free(rss); 150 } 151 152 /** 153 * Attach a user to a RSS context instance. 154 * 155 * Used when the RSS QP and indirection table objects must be instantiated, 156 * that is, when a flow rule must be enabled. 157 * 158 * This function increments the usage count of the context. 159 * 160 * @param rss 161 * RSS context to attach to. 162 * 163 * @return 164 * 0 on success, a negative errno value otherwise and rte_errno is set. 165 */ 166 int 167 mlx4_rss_attach(struct mlx4_rss *rss) 168 { 169 MLX4_ASSERT(rss->refcnt); 170 if (rss->usecnt++) { 171 MLX4_ASSERT(rss->qp); 172 MLX4_ASSERT(rss->ind); 173 return 0; 174 } 175 176 struct ibv_wq *ind_tbl[rss->queues]; 177 struct mlx4_priv *priv = rss->priv; 178 struct rte_eth_dev *dev = ETH_DEV(priv); 179 const char *msg; 180 unsigned int i = 0; 181 int ret; 182 183 if (!rte_is_power_of_2(RTE_DIM(ind_tbl))) { 184 ret = EINVAL; 185 msg = "number of RSS queues must be a power of two"; 186 goto error; 187 } 188 for (i = 0; i != RTE_DIM(ind_tbl); ++i) { 189 uint16_t id = rss->queue_id[i]; 190 struct rxq *rxq = NULL; 191 192 if (id < dev->data->nb_rx_queues) 193 rxq = dev->data->rx_queues[id]; 194 if (!rxq) { 195 ret = EINVAL; 196 msg = "RSS target queue is not configured"; 197 goto error; 198 } 199 ret = mlx4_rxq_attach(rxq); 200 if (ret) { 201 ret = -ret; 202 msg = "unable to attach RSS target queue"; 203 goto error; 204 } 205 ind_tbl[i] = rxq->wq; 206 } 207 rss->ind = mlx4_glue->create_rwq_ind_table 208 (priv->ctx, 209 &(struct ibv_rwq_ind_table_init_attr){ 210 .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), 211 .ind_tbl = ind_tbl, 212 .comp_mask = 0, 213 }); 214 if (!rss->ind) { 215 ret = errno ? errno : EINVAL; 216 msg = "RSS indirection table creation failure"; 217 goto error; 218 } 219 rss->qp = mlx4_glue->create_qp_ex 220 (priv->ctx, 221 &(struct ibv_qp_init_attr_ex){ 222 .comp_mask = (IBV_QP_INIT_ATTR_PD | 223 IBV_QP_INIT_ATTR_RX_HASH | 224 IBV_QP_INIT_ATTR_IND_TABLE), 225 .qp_type = IBV_QPT_RAW_PACKET, 226 .pd = priv->pd, 227 .rwq_ind_tbl = rss->ind, 228 .rx_hash_conf = { 229 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 230 .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, 231 .rx_hash_key = rss->key, 232 .rx_hash_fields_mask = rss->fields, 233 }, 234 }); 235 if (!rss->qp) { 236 ret = errno ? errno : EINVAL; 237 msg = "RSS hash QP creation failure"; 238 goto error; 239 } 240 ret = mlx4_glue->modify_qp 241 (rss->qp, 242 &(struct ibv_qp_attr){ 243 .qp_state = IBV_QPS_INIT, 244 .port_num = priv->port, 245 }, 246 IBV_QP_STATE | IBV_QP_PORT); 247 if (ret) { 248 msg = "failed to switch RSS hash QP to INIT state"; 249 goto error; 250 } 251 ret = mlx4_glue->modify_qp 252 (rss->qp, 253 &(struct ibv_qp_attr){ 254 .qp_state = IBV_QPS_RTR, 255 }, 256 IBV_QP_STATE); 257 if (ret) { 258 msg = "failed to switch RSS hash QP to RTR state"; 259 goto error; 260 } 261 return 0; 262 error: 263 if (rss->qp) { 264 claim_zero(mlx4_glue->destroy_qp(rss->qp)); 265 rss->qp = NULL; 266 } 267 if (rss->ind) { 268 claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); 269 rss->ind = NULL; 270 } 271 while (i--) 272 mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]); 273 ERROR("mlx4: %s", msg); 274 --rss->usecnt; 275 rte_errno = ret; 276 return -ret; 277 } 278 279 /** 280 * Detach a user from a RSS context instance. 281 * 282 * Used when disabling (not destroying) a flow rule. 283 * 284 * This function decrements the usage count of the context and destroys 285 * usage resources after reaching 0. 286 * 287 * @param rss 288 * RSS context to detach from. 289 */ 290 void 291 mlx4_rss_detach(struct mlx4_rss *rss) 292 { 293 struct mlx4_priv *priv = rss->priv; 294 struct rte_eth_dev *dev = ETH_DEV(priv); 295 unsigned int i; 296 297 MLX4_ASSERT(rss->refcnt); 298 MLX4_ASSERT(rss->qp); 299 MLX4_ASSERT(rss->ind); 300 if (--rss->usecnt) 301 return; 302 claim_zero(mlx4_glue->destroy_qp(rss->qp)); 303 rss->qp = NULL; 304 claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); 305 rss->ind = NULL; 306 for (i = 0; i != rss->queues; ++i) 307 mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]); 308 } 309 310 /** 311 * Initialize common RSS context resources. 312 * 313 * Because ConnectX-3 hardware limitations require a fixed order in the 314 * indirection table, WQs must be allocated sequentially to be part of a 315 * common RSS context. 316 * 317 * Since a newly created WQ cannot be moved to a different context, this 318 * function allocates them all at once, one for each configured Rx queue, 319 * as well as all related resources (CQs and mbufs). 320 * 321 * This must therefore be done before creating any Rx flow rules relying on 322 * indirection tables. 323 * 324 * @param priv 325 * Pointer to private structure. 326 * 327 * @return 328 * 0 on success, a negative errno value otherwise and rte_errno is set. 329 */ 330 int 331 mlx4_rss_init(struct mlx4_priv *priv) 332 { 333 struct rte_eth_dev *dev = ETH_DEV(priv); 334 uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues); 335 uint32_t wq_num_prev = 0; 336 const char *msg; 337 unsigned int i; 338 int ret; 339 340 if (priv->rss_init) 341 return 0; 342 if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) { 343 ERROR("RSS does not support more than %d queues", 344 priv->hw_rss_max_qps); 345 rte_errno = EINVAL; 346 return -rte_errno; 347 } 348 /* Prepare range for RSS contexts before creating the first WQ. */ 349 ret = mlx4_glue->dv_set_context_attr 350 (priv->ctx, 351 MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ, 352 &log2_range); 353 if (ret) { 354 ERROR("cannot set up range size for RSS context to %u" 355 " (for %u Rx queues), error: %s", 356 1 << log2_range, dev->data->nb_rx_queues, strerror(ret)); 357 rte_errno = ret; 358 return -ret; 359 } 360 for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) { 361 struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i]; 362 struct ibv_cq *cq; 363 struct ibv_wq *wq; 364 uint32_t wq_num; 365 366 /* Attach the configured Rx queues. */ 367 if (rxq) { 368 MLX4_ASSERT(!rxq->usecnt); 369 ret = mlx4_rxq_attach(rxq); 370 if (!ret) { 371 wq_num = rxq->wq->wq_num; 372 goto wq_num_check; 373 } 374 ret = -ret; 375 msg = "unable to create Rx queue resources"; 376 goto error; 377 } 378 /* 379 * WQs are temporarily allocated for unconfigured Rx queues 380 * to maintain proper index alignment in indirection table 381 * by skipping unused WQ numbers. 382 * 383 * The reason this works at all even though these WQs are 384 * immediately destroyed is that WQNs are allocated 385 * sequentially and are guaranteed to never be reused in the 386 * same context by the underlying implementation. 387 */ 388 cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); 389 if (!cq) { 390 ret = ENOMEM; 391 msg = "placeholder CQ creation failure"; 392 goto error; 393 } 394 wq = mlx4_glue->create_wq 395 (priv->ctx, 396 &(struct ibv_wq_init_attr){ 397 .wq_type = IBV_WQT_RQ, 398 .max_wr = 1, 399 .max_sge = 1, 400 .pd = priv->pd, 401 .cq = cq, 402 }); 403 if (wq) { 404 wq_num = wq->wq_num; 405 claim_zero(mlx4_glue->destroy_wq(wq)); 406 } else { 407 wq_num = 0; /* Shut up GCC 4.8 warnings. */ 408 } 409 claim_zero(mlx4_glue->destroy_cq(cq)); 410 if (!wq) { 411 ret = ENOMEM; 412 msg = "placeholder WQ creation failure"; 413 goto error; 414 } 415 wq_num_check: 416 /* 417 * While guaranteed by the implementation, make sure WQ 418 * numbers are really sequential (as the saying goes, 419 * trust, but verify). 420 */ 421 if (i && wq_num - wq_num_prev != 1) { 422 if (rxq) 423 mlx4_rxq_detach(rxq); 424 ret = ERANGE; 425 msg = "WQ numbers are not sequential"; 426 goto error; 427 } 428 wq_num_prev = wq_num; 429 } 430 priv->rss_init = 1; 431 return 0; 432 error: 433 ERROR("cannot initialize common RSS resources (queue %u): %s: %s", 434 i, msg, strerror(ret)); 435 while (i--) { 436 struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i]; 437 438 if (rxq) 439 mlx4_rxq_detach(rxq); 440 } 441 rte_errno = ret; 442 return -ret; 443 } 444 445 /** 446 * Release common RSS context resources. 447 * 448 * As the reverse of mlx4_rss_init(), this must be done after removing all 449 * flow rules relying on indirection tables. 450 * 451 * @param priv 452 * Pointer to private structure. 453 */ 454 void 455 mlx4_rss_deinit(struct mlx4_priv *priv) 456 { 457 unsigned int i; 458 459 if (!priv->rss_init) 460 return; 461 for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) { 462 struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i]; 463 464 if (rxq) { 465 MLX4_ASSERT(rxq->usecnt == 1); 466 mlx4_rxq_detach(rxq); 467 } 468 } 469 priv->rss_init = 0; 470 } 471 472 /** 473 * Attach a user to a Rx queue. 474 * 475 * Used when the resources of an Rx queue must be instantiated for it to 476 * become in a usable state. 477 * 478 * This function increments the usage count of the Rx queue. 479 * 480 * @param rxq 481 * Pointer to Rx queue structure. 482 * 483 * @return 484 * 0 on success, negative errno value otherwise and rte_errno is set. 485 */ 486 int 487 mlx4_rxq_attach(struct rxq *rxq) 488 { 489 if (rxq->usecnt++) { 490 MLX4_ASSERT(rxq->cq); 491 MLX4_ASSERT(rxq->wq); 492 MLX4_ASSERT(rxq->wqes); 493 MLX4_ASSERT(rxq->rq_db); 494 return 0; 495 } 496 497 struct mlx4_priv *priv = rxq->priv; 498 struct rte_eth_dev *dev = ETH_DEV(priv); 499 const uint32_t elts_n = 1 << rxq->elts_n; 500 const uint32_t sges_n = 1 << rxq->sges_n; 501 struct rte_mbuf *(*elts)[elts_n] = rxq->elts; 502 struct mlx4dv_obj mlxdv; 503 struct mlx4dv_rwq dv_rwq; 504 struct mlx4dv_cq dv_cq = { .comp_mask = MLX4DV_CQ_MASK_UAR, }; 505 const char *msg; 506 struct ibv_cq *cq = NULL; 507 struct ibv_wq *wq = NULL; 508 uint32_t create_flags = 0; 509 uint32_t comp_mask = 0; 510 volatile struct mlx4_wqe_data_seg (*wqes)[]; 511 unsigned int i; 512 int ret; 513 514 MLX4_ASSERT(rte_is_power_of_2(elts_n)); 515 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_RX_QUEUE; 516 priv->verbs_alloc_ctx.obj = rxq; 517 cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL, 518 rxq->channel, 0); 519 if (!cq) { 520 ret = ENOMEM; 521 msg = "CQ creation failure"; 522 goto error; 523 } 524 /* By default, FCS (CRC) is stripped by hardware. */ 525 if (rxq->crc_present) { 526 create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; 527 comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; 528 } 529 wq = mlx4_glue->create_wq 530 (priv->ctx, 531 &(struct ibv_wq_init_attr){ 532 .wq_type = IBV_WQT_RQ, 533 .max_wr = elts_n / sges_n, 534 .max_sge = sges_n, 535 .pd = priv->pd, 536 .cq = cq, 537 .comp_mask = comp_mask, 538 .create_flags = create_flags, 539 }); 540 if (!wq) { 541 ret = errno ? errno : EINVAL; 542 msg = "WQ creation failure"; 543 goto error; 544 } 545 ret = mlx4_glue->modify_wq 546 (wq, 547 &(struct ibv_wq_attr){ 548 .attr_mask = IBV_WQ_ATTR_STATE, 549 .wq_state = IBV_WQS_RDY, 550 }); 551 if (ret) { 552 msg = "WQ state change to IBV_WQS_RDY failed"; 553 goto error; 554 } 555 /* Retrieve device queue information. */ 556 mlxdv.cq.in = cq; 557 mlxdv.cq.out = &dv_cq; 558 mlxdv.rwq.in = wq; 559 mlxdv.rwq.out = &dv_rwq; 560 ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ); 561 if (ret) { 562 msg = "failed to obtain device information from WQ/CQ objects"; 563 goto error; 564 } 565 /* Pre-register Rx mempool. */ 566 DEBUG("port %u Rx queue %u registering mp %s having %u chunks", 567 ETH_DEV(priv)->data->port_id, rxq->stats.idx, 568 rxq->mp->name, rxq->mp->nb_mem_chunks); 569 mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp); 570 wqes = (volatile struct mlx4_wqe_data_seg (*)[]) 571 ((uintptr_t)dv_rwq.buf.buf + dv_rwq.rq.offset); 572 for (i = 0; i != RTE_DIM(*elts); ++i) { 573 volatile struct mlx4_wqe_data_seg *scat = &(*wqes)[i]; 574 struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp); 575 576 if (buf == NULL) { 577 while (i--) { 578 rte_pktmbuf_free_seg((*elts)[i]); 579 (*elts)[i] = NULL; 580 } 581 ret = ENOMEM; 582 msg = "cannot allocate mbuf"; 583 goto error; 584 } 585 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 586 MLX4_ASSERT(buf->data_off == RTE_PKTMBUF_HEADROOM); 587 /* Buffer is supposed to be empty. */ 588 MLX4_ASSERT(rte_pktmbuf_data_len(buf) == 0); 589 MLX4_ASSERT(rte_pktmbuf_pkt_len(buf) == 0); 590 /* Only the first segment keeps headroom. */ 591 if (i % sges_n) 592 buf->data_off = 0; 593 buf->port = rxq->port_id; 594 buf->data_len = rte_pktmbuf_tailroom(buf); 595 buf->pkt_len = rte_pktmbuf_tailroom(buf); 596 buf->nb_segs = 1; 597 *scat = (struct mlx4_wqe_data_seg){ 598 .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, 599 uintptr_t)), 600 .byte_count = rte_cpu_to_be_32(buf->data_len), 601 .lkey = mlx4_rx_mb2mr(rxq, buf), 602 }; 603 (*elts)[i] = buf; 604 } 605 DEBUG("%p: allocated and configured %u segments (max %u packets)", 606 (void *)rxq, elts_n, elts_n / sges_n); 607 rxq->cq = cq; 608 rxq->wq = wq; 609 rxq->wqes = wqes; 610 rxq->rq_db = dv_rwq.rdb; 611 rxq->mcq.buf = dv_cq.buf.buf; 612 rxq->mcq.cqe_cnt = dv_cq.cqe_cnt; 613 rxq->mcq.set_ci_db = dv_cq.set_ci_db; 614 rxq->mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0; 615 rxq->mcq.arm_db = dv_cq.arm_db; 616 rxq->mcq.arm_sn = dv_cq.arm_sn; 617 rxq->mcq.cqn = dv_cq.cqn; 618 rxq->mcq.cq_uar = dv_cq.cq_uar; 619 rxq->mcq.cq_db_reg = (uint8_t *)dv_cq.cq_uar + MLX4_CQ_DOORBELL; 620 /* Update doorbell counter. */ 621 rxq->rq_ci = elts_n / sges_n; 622 rte_wmb(); 623 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 624 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE; 625 return 0; 626 error: 627 if (wq) 628 claim_zero(mlx4_glue->destroy_wq(wq)); 629 if (cq) 630 claim_zero(mlx4_glue->destroy_cq(cq)); 631 --rxq->usecnt; 632 rte_errno = ret; 633 ERROR("error while attaching Rx queue %p: %s: %s", 634 (void *)rxq, msg, strerror(ret)); 635 priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE; 636 return -ret; 637 } 638 639 /** 640 * Detach a user from a Rx queue. 641 * 642 * This function decrements the usage count of the Rx queue and destroys 643 * usage resources after reaching 0. 644 * 645 * @param rxq 646 * Pointer to Rx queue structure. 647 */ 648 void 649 mlx4_rxq_detach(struct rxq *rxq) 650 { 651 unsigned int i; 652 struct rte_mbuf *(*elts)[1 << rxq->elts_n] = rxq->elts; 653 654 if (--rxq->usecnt) 655 return; 656 rxq->rq_ci = 0; 657 memset(&rxq->mcq, 0, sizeof(rxq->mcq)); 658 rxq->rq_db = NULL; 659 rxq->wqes = NULL; 660 claim_zero(mlx4_glue->destroy_wq(rxq->wq)); 661 rxq->wq = NULL; 662 claim_zero(mlx4_glue->destroy_cq(rxq->cq)); 663 rxq->cq = NULL; 664 DEBUG("%p: freeing Rx queue elements", (void *)rxq); 665 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 666 if (!(*elts)[i]) 667 continue; 668 rte_pktmbuf_free_seg((*elts)[i]); 669 (*elts)[i] = NULL; 670 } 671 } 672 673 /** 674 * Returns the per-queue supported offloads. 675 * 676 * @param priv 677 * Pointer to private structure. 678 * 679 * @return 680 * Supported Tx offloads. 681 */ 682 uint64_t 683 mlx4_get_rx_queue_offloads(struct mlx4_priv *priv) 684 { 685 uint64_t offloads = DEV_RX_OFFLOAD_SCATTER | 686 DEV_RX_OFFLOAD_KEEP_CRC | 687 DEV_RX_OFFLOAD_JUMBO_FRAME | 688 DEV_RX_OFFLOAD_RSS_HASH; 689 690 if (priv->hw_csum) 691 offloads |= DEV_RX_OFFLOAD_CHECKSUM; 692 return offloads; 693 } 694 695 /** 696 * Returns the per-port supported offloads. 697 * 698 * @param priv 699 * Pointer to private structure. 700 * 701 * @return 702 * Supported Rx offloads. 703 */ 704 uint64_t 705 mlx4_get_rx_port_offloads(struct mlx4_priv *priv) 706 { 707 uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; 708 709 (void)priv; 710 return offloads; 711 } 712 713 /** 714 * DPDK callback to configure a Rx queue. 715 * 716 * @param dev 717 * Pointer to Ethernet device structure. 718 * @param idx 719 * Rx queue index. 720 * @param desc 721 * Number of descriptors to configure in queue. 722 * @param socket 723 * NUMA socket on which memory must be allocated. 724 * @param[in] conf 725 * Thresholds parameters. 726 * @param mp 727 * Memory pool for buffer allocations. 728 * 729 * @return 730 * 0 on success, negative errno value otherwise and rte_errno is set. 731 */ 732 int 733 mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 734 unsigned int socket, const struct rte_eth_rxconf *conf, 735 struct rte_mempool *mp) 736 { 737 struct mlx4_priv *priv = dev->data->dev_private; 738 uint32_t mb_len = rte_pktmbuf_data_room_size(mp); 739 struct rte_mbuf *(*elts)[rte_align32pow2(desc)]; 740 struct rxq *rxq; 741 struct mlx4_malloc_vec vec[] = { 742 { 743 .align = RTE_CACHE_LINE_SIZE, 744 .size = sizeof(*rxq), 745 .addr = (void **)&rxq, 746 }, 747 { 748 .align = RTE_CACHE_LINE_SIZE, 749 .size = sizeof(*elts), 750 .addr = (void **)&elts, 751 }, 752 }; 753 int ret; 754 uint32_t crc_present; 755 uint64_t offloads; 756 757 offloads = conf->offloads | dev->data->dev_conf.rxmode.offloads; 758 759 DEBUG("%p: configuring queue %u for %u descriptors", 760 (void *)dev, idx, desc); 761 762 if (idx >= dev->data->nb_rx_queues) { 763 rte_errno = EOVERFLOW; 764 ERROR("%p: queue index out of range (%u >= %u)", 765 (void *)dev, idx, dev->data->nb_rx_queues); 766 return -rte_errno; 767 } 768 rxq = dev->data->rx_queues[idx]; 769 if (rxq) { 770 rte_errno = EEXIST; 771 ERROR("%p: Rx queue %u already configured, release it first", 772 (void *)dev, idx); 773 return -rte_errno; 774 } 775 if (!desc) { 776 rte_errno = EINVAL; 777 ERROR("%p: invalid number of Rx descriptors", (void *)dev); 778 return -rte_errno; 779 } 780 if (desc != RTE_DIM(*elts)) { 781 desc = RTE_DIM(*elts); 782 WARN("%p: increased number of descriptors in Rx queue %u" 783 " to the next power of two (%u)", 784 (void *)dev, idx, desc); 785 } 786 /* By default, FCS (CRC) is stripped by hardware. */ 787 crc_present = 0; 788 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) { 789 if (priv->hw_fcs_strip) { 790 crc_present = 1; 791 } else { 792 WARN("%p: CRC stripping has been disabled but will still" 793 " be performed by hardware, make sure MLNX_OFED and" 794 " firmware are up to date", 795 (void *)dev); 796 } 797 } 798 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from" 799 " incoming frames to hide it", 800 (void *)dev, 801 crc_present ? "disabled" : "enabled", 802 crc_present << 2); 803 /* Allocate and initialize Rx queue. */ 804 mlx4_zmallocv_socket("RXQ", vec, RTE_DIM(vec), socket); 805 if (!rxq) { 806 ERROR("%p: unable to allocate queue index %u", 807 (void *)dev, idx); 808 return -rte_errno; 809 } 810 *rxq = (struct rxq){ 811 .priv = priv, 812 .mp = mp, 813 .port_id = dev->data->port_id, 814 .sges_n = 0, 815 .elts_n = rte_log2_u32(desc), 816 .elts = elts, 817 /* Toggle Rx checksum offload if hardware supports it. */ 818 .csum = priv->hw_csum && 819 (offloads & DEV_RX_OFFLOAD_CHECKSUM), 820 .csum_l2tun = priv->hw_csum_l2tun && 821 (offloads & DEV_RX_OFFLOAD_CHECKSUM), 822 .crc_present = crc_present, 823 .l2tun_offload = priv->hw_csum_l2tun, 824 .stats = { 825 .idx = idx, 826 }, 827 .socket = socket, 828 }; 829 /* Enable scattered packets support for this queue if necessary. */ 830 MLX4_ASSERT(mb_len >= RTE_PKTMBUF_HEADROOM); 831 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= 832 (mb_len - RTE_PKTMBUF_HEADROOM)) { 833 ; 834 } else if (offloads & DEV_RX_OFFLOAD_SCATTER) { 835 uint32_t size = 836 RTE_PKTMBUF_HEADROOM + 837 dev->data->dev_conf.rxmode.max_rx_pkt_len; 838 uint32_t sges_n; 839 840 /* 841 * Determine the number of SGEs needed for a full packet 842 * and round it to the next power of two. 843 */ 844 sges_n = rte_log2_u32((size / mb_len) + !!(size % mb_len)); 845 rxq->sges_n = sges_n; 846 /* Make sure sges_n did not overflow. */ 847 size = mb_len * (1 << rxq->sges_n); 848 size -= RTE_PKTMBUF_HEADROOM; 849 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { 850 rte_errno = EOVERFLOW; 851 ERROR("%p: too many SGEs (%u) needed to handle" 852 " requested maximum packet size %u", 853 (void *)dev, 854 1 << sges_n, 855 dev->data->dev_conf.rxmode.max_rx_pkt_len); 856 goto error; 857 } 858 } else { 859 WARN("%p: the requested maximum Rx packet size (%u) is" 860 " larger than a single mbuf (%u) and scattered" 861 " mode has not been requested", 862 (void *)dev, 863 dev->data->dev_conf.rxmode.max_rx_pkt_len, 864 mb_len - RTE_PKTMBUF_HEADROOM); 865 } 866 DEBUG("%p: maximum number of segments per packet: %u", 867 (void *)dev, 1 << rxq->sges_n); 868 if (desc % (1 << rxq->sges_n)) { 869 rte_errno = EINVAL; 870 ERROR("%p: number of Rx queue descriptors (%u) is not a" 871 " multiple of maximum segments per packet (%u)", 872 (void *)dev, 873 desc, 874 1 << rxq->sges_n); 875 goto error; 876 } 877 if (mlx4_mr_btree_init(&rxq->mr_ctrl.cache_bh, 878 MLX4_MR_BTREE_CACHE_N, socket)) { 879 /* rte_errno is already set. */ 880 goto error; 881 } 882 if (dev->data->dev_conf.intr_conf.rxq) { 883 rxq->channel = mlx4_glue->create_comp_channel(priv->ctx); 884 if (rxq->channel == NULL) { 885 rte_errno = ENOMEM; 886 ERROR("%p: Rx interrupt completion channel creation" 887 " failure: %s", 888 (void *)dev, strerror(rte_errno)); 889 goto error; 890 } 891 if (mlx4_fd_set_non_blocking(rxq->channel->fd) < 0) { 892 ERROR("%p: unable to make Rx interrupt completion" 893 " channel non-blocking: %s", 894 (void *)dev, strerror(rte_errno)); 895 goto error; 896 } 897 } 898 DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq); 899 dev->data->rx_queues[idx] = rxq; 900 return 0; 901 error: 902 dev->data->rx_queues[idx] = NULL; 903 ret = rte_errno; 904 mlx4_rx_queue_release(rxq); 905 rte_errno = ret; 906 MLX4_ASSERT(rte_errno > 0); 907 return -rte_errno; 908 } 909 910 /** 911 * DPDK callback to release a Rx queue. 912 * 913 * @param dpdk_rxq 914 * Generic Rx queue pointer. 915 */ 916 void 917 mlx4_rx_queue_release(void *dpdk_rxq) 918 { 919 struct rxq *rxq = (struct rxq *)dpdk_rxq; 920 struct mlx4_priv *priv; 921 unsigned int i; 922 923 if (rxq == NULL) 924 return; 925 priv = rxq->priv; 926 for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) 927 if (ETH_DEV(priv)->data->rx_queues[i] == rxq) { 928 DEBUG("%p: removing Rx queue %p from list", 929 (void *)ETH_DEV(priv), (void *)rxq); 930 ETH_DEV(priv)->data->rx_queues[i] = NULL; 931 break; 932 } 933 MLX4_ASSERT(!rxq->cq); 934 MLX4_ASSERT(!rxq->wq); 935 MLX4_ASSERT(!rxq->wqes); 936 MLX4_ASSERT(!rxq->rq_db); 937 if (rxq->channel) 938 claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel)); 939 mlx4_mr_btree_free(&rxq->mr_ctrl.cache_bh); 940 rte_free(rxq); 941 } 942