1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <errno.h> 37 #include <string.h> 38 #include <stdint.h> 39 40 /* Verbs header. */ 41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 42 #ifdef PEDANTIC 43 #pragma GCC diagnostic ignored "-pedantic" 44 #endif 45 #include <infiniband/verbs.h> 46 #ifdef PEDANTIC 47 #pragma GCC diagnostic error "-pedantic" 48 #endif 49 50 /* DPDK headers don't like -pedantic. */ 51 #ifdef PEDANTIC 52 #pragma GCC diagnostic ignored "-pedantic" 53 #endif 54 #include <rte_mbuf.h> 55 #include <rte_malloc.h> 56 #include <rte_ethdev.h> 57 #include <rte_common.h> 58 #ifdef PEDANTIC 59 #pragma GCC diagnostic error "-pedantic" 60 #endif 61 62 #include "mlx5.h" 63 #include "mlx5_rxtx.h" 64 #include "mlx5_utils.h" 65 #include "mlx5_autoconf.h" 66 #include "mlx5_defs.h" 67 68 /* Initialization data for hash RX queues. */ 69 const struct hash_rxq_init hash_rxq_init[] = { 70 [HASH_RXQ_TCPV4] = { 71 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | 72 IBV_EXP_RX_HASH_DST_IPV4 | 73 IBV_EXP_RX_HASH_SRC_PORT_TCP | 74 IBV_EXP_RX_HASH_DST_PORT_TCP), 75 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP, 76 .flow_priority = 0, 77 .flow_spec.tcp_udp = { 78 .type = IBV_EXP_FLOW_SPEC_TCP, 79 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 80 }, 81 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], 82 }, 83 [HASH_RXQ_UDPV4] = { 84 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | 85 IBV_EXP_RX_HASH_DST_IPV4 | 86 IBV_EXP_RX_HASH_SRC_PORT_UDP | 87 IBV_EXP_RX_HASH_DST_PORT_UDP), 88 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP, 89 .flow_priority = 0, 90 .flow_spec.tcp_udp = { 91 .type = IBV_EXP_FLOW_SPEC_UDP, 92 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 93 }, 94 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], 95 }, 96 [HASH_RXQ_IPV4] = { 97 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | 98 IBV_EXP_RX_HASH_DST_IPV4), 99 .dpdk_rss_hf = (ETH_RSS_IPV4 | 100 ETH_RSS_FRAG_IPV4), 101 .flow_priority = 1, 102 .flow_spec.ipv4 = { 103 .type = IBV_EXP_FLOW_SPEC_IPV4, 104 .size = sizeof(hash_rxq_init[0].flow_spec.ipv4), 105 }, 106 .underlayer = &hash_rxq_init[HASH_RXQ_ETH], 107 }, 108 #ifdef HAVE_FLOW_SPEC_IPV6 109 [HASH_RXQ_TCPV6] = { 110 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | 111 IBV_EXP_RX_HASH_DST_IPV6 | 112 IBV_EXP_RX_HASH_SRC_PORT_TCP | 113 IBV_EXP_RX_HASH_DST_PORT_TCP), 114 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP, 115 .flow_priority = 0, 116 .flow_spec.tcp_udp = { 117 .type = IBV_EXP_FLOW_SPEC_TCP, 118 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 119 }, 120 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], 121 }, 122 [HASH_RXQ_UDPV6] = { 123 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | 124 IBV_EXP_RX_HASH_DST_IPV6 | 125 IBV_EXP_RX_HASH_SRC_PORT_UDP | 126 IBV_EXP_RX_HASH_DST_PORT_UDP), 127 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP, 128 .flow_priority = 0, 129 .flow_spec.tcp_udp = { 130 .type = IBV_EXP_FLOW_SPEC_UDP, 131 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 132 }, 133 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], 134 }, 135 [HASH_RXQ_IPV6] = { 136 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | 137 IBV_EXP_RX_HASH_DST_IPV6), 138 .dpdk_rss_hf = (ETH_RSS_IPV6 | 139 ETH_RSS_FRAG_IPV6), 140 .flow_priority = 1, 141 .flow_spec.ipv6 = { 142 .type = IBV_EXP_FLOW_SPEC_IPV6, 143 .size = sizeof(hash_rxq_init[0].flow_spec.ipv6), 144 }, 145 .underlayer = &hash_rxq_init[HASH_RXQ_ETH], 146 }, 147 #endif /* HAVE_FLOW_SPEC_IPV6 */ 148 [HASH_RXQ_ETH] = { 149 .hash_fields = 0, 150 .dpdk_rss_hf = 0, 151 .flow_priority = 2, 152 .flow_spec.eth = { 153 .type = IBV_EXP_FLOW_SPEC_ETH, 154 .size = sizeof(hash_rxq_init[0].flow_spec.eth), 155 }, 156 .underlayer = NULL, 157 }, 158 }; 159 160 /* Number of entries in hash_rxq_init[]. */ 161 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init); 162 163 /* Initialization data for hash RX queue indirection tables. */ 164 static const struct ind_table_init ind_table_init[] = { 165 { 166 .max_size = -1u, /* Superseded by HW limitations. */ 167 .hash_types = 168 1 << HASH_RXQ_TCPV4 | 169 1 << HASH_RXQ_UDPV4 | 170 1 << HASH_RXQ_IPV4 | 171 #ifdef HAVE_FLOW_SPEC_IPV6 172 1 << HASH_RXQ_TCPV6 | 173 1 << HASH_RXQ_UDPV6 | 174 1 << HASH_RXQ_IPV6 | 175 #endif /* HAVE_FLOW_SPEC_IPV6 */ 176 0, 177 #ifdef HAVE_FLOW_SPEC_IPV6 178 .hash_types_n = 6, 179 #else /* HAVE_FLOW_SPEC_IPV6 */ 180 .hash_types_n = 3, 181 #endif /* HAVE_FLOW_SPEC_IPV6 */ 182 }, 183 { 184 .max_size = 1, 185 .hash_types = 1 << HASH_RXQ_ETH, 186 .hash_types_n = 1, 187 }, 188 }; 189 190 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init) 191 192 /* Default RSS hash key also used for ConnectX-3. */ 193 uint8_t rss_hash_default_key[] = { 194 0x2c, 0xc6, 0x81, 0xd1, 195 0x5b, 0xdb, 0xf4, 0xf7, 196 0xfc, 0xa2, 0x83, 0x19, 197 0xdb, 0x1a, 0x3e, 0x94, 198 0x6b, 0x9e, 0x38, 0xd9, 199 0x2c, 0x9c, 0x03, 0xd1, 200 0xad, 0x99, 0x44, 0xa7, 201 0xd9, 0x56, 0x3d, 0x59, 202 0x06, 0x3c, 0x25, 0xf3, 203 0xfc, 0x1f, 0xdc, 0x2a, 204 }; 205 206 /* Length of the default RSS hash key. */ 207 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key); 208 209 /** 210 * Populate flow steering rule for a given hash RX queue type using 211 * information from hash_rxq_init[]. Nothing is written to flow_attr when 212 * flow_attr_size is not large enough, but the required size is still returned. 213 * 214 * @param priv 215 * Pointer to private structure. 216 * @param[out] flow_attr 217 * Pointer to flow attribute structure to fill. Note that the allocated 218 * area must be larger and large enough to hold all flow specifications. 219 * @param flow_attr_size 220 * Entire size of flow_attr and trailing room for flow specifications. 221 * @param type 222 * Hash RX queue type to use for flow steering rule. 223 * 224 * @return 225 * Total size of the flow attribute buffer. No errors are defined. 226 */ 227 size_t 228 priv_flow_attr(struct priv *priv, struct ibv_exp_flow_attr *flow_attr, 229 size_t flow_attr_size, enum hash_rxq_type type) 230 { 231 size_t offset = sizeof(*flow_attr); 232 const struct hash_rxq_init *init = &hash_rxq_init[type]; 233 234 assert(priv != NULL); 235 assert((size_t)type < RTE_DIM(hash_rxq_init)); 236 do { 237 offset += init->flow_spec.hdr.size; 238 init = init->underlayer; 239 } while (init != NULL); 240 if (offset > flow_attr_size) 241 return offset; 242 flow_attr_size = offset; 243 init = &hash_rxq_init[type]; 244 *flow_attr = (struct ibv_exp_flow_attr){ 245 .type = IBV_EXP_FLOW_ATTR_NORMAL, 246 #ifdef MLX5_FDIR_SUPPORT 247 /* Priorities < 3 are reserved for flow director. */ 248 .priority = init->flow_priority + 3, 249 #else /* MLX5_FDIR_SUPPORT */ 250 .priority = init->flow_priority, 251 #endif /* MLX5_FDIR_SUPPORT */ 252 .num_of_specs = 0, 253 .port = priv->port, 254 .flags = 0, 255 }; 256 do { 257 offset -= init->flow_spec.hdr.size; 258 memcpy((void *)((uintptr_t)flow_attr + offset), 259 &init->flow_spec, 260 init->flow_spec.hdr.size); 261 ++flow_attr->num_of_specs; 262 init = init->underlayer; 263 } while (init != NULL); 264 return flow_attr_size; 265 } 266 267 /** 268 * Convert hash type position in indirection table initializer to 269 * hash RX queue type. 270 * 271 * @param table 272 * Indirection table initializer. 273 * @param pos 274 * Hash type position. 275 * 276 * @return 277 * Hash RX queue type. 278 */ 279 static enum hash_rxq_type 280 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos) 281 { 282 enum hash_rxq_type type = 0; 283 284 assert(pos < table->hash_types_n); 285 do { 286 if ((table->hash_types & (1 << type)) && (pos-- == 0)) 287 break; 288 ++type; 289 } while (1); 290 return type; 291 } 292 293 /** 294 * Filter out disabled hash RX queue types from ind_table_init[]. 295 * 296 * @param priv 297 * Pointer to private structure. 298 * @param[out] table 299 * Output table. 300 * 301 * @return 302 * Number of table entries. 303 */ 304 static unsigned int 305 priv_make_ind_table_init(struct priv *priv, 306 struct ind_table_init (*table)[IND_TABLE_INIT_N]) 307 { 308 uint64_t rss_hf; 309 unsigned int i; 310 unsigned int j; 311 unsigned int table_n = 0; 312 /* Mandatory to receive frames not handled by normal hash RX queues. */ 313 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH; 314 315 rss_hf = priv->rss_hf; 316 /* Process other protocols only if more than one queue. */ 317 if (priv->rxqs_n > 1) 318 for (i = 0; (i != hash_rxq_init_n); ++i) 319 if (rss_hf & hash_rxq_init[i].dpdk_rss_hf) 320 hash_types_sup |= (1 << i); 321 322 /* Filter out entries whose protocols are not in the set. */ 323 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) { 324 unsigned int nb; 325 unsigned int h; 326 327 /* j is increased only if the table has valid protocols. */ 328 assert(j <= i); 329 (*table)[j] = ind_table_init[i]; 330 (*table)[j].hash_types &= hash_types_sup; 331 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h) 332 if (((*table)[j].hash_types >> h) & 0x1) 333 ++nb; 334 (*table)[i].hash_types_n = nb; 335 if (nb) { 336 ++table_n; 337 ++j; 338 } 339 } 340 return table_n; 341 } 342 343 /** 344 * Initialize hash RX queues and indirection table. 345 * 346 * @param priv 347 * Pointer to private structure. 348 * 349 * @return 350 * 0 on success, errno value on failure. 351 */ 352 int 353 priv_create_hash_rxqs(struct priv *priv) 354 { 355 struct ibv_exp_wq *wqs[priv->reta_idx_n]; 356 struct ind_table_init ind_table_init[IND_TABLE_INIT_N]; 357 unsigned int ind_tables_n = 358 priv_make_ind_table_init(priv, &ind_table_init); 359 unsigned int hash_rxqs_n = 0; 360 struct hash_rxq (*hash_rxqs)[] = NULL; 361 struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL; 362 unsigned int i; 363 unsigned int j; 364 unsigned int k; 365 int err = 0; 366 367 assert(priv->ind_tables == NULL); 368 assert(priv->ind_tables_n == 0); 369 assert(priv->hash_rxqs == NULL); 370 assert(priv->hash_rxqs_n == 0); 371 assert(priv->pd != NULL); 372 assert(priv->ctx != NULL); 373 if (priv->rxqs_n == 0) 374 return EINVAL; 375 assert(priv->rxqs != NULL); 376 if (ind_tables_n == 0) { 377 ERROR("all hash RX queue types have been filtered out," 378 " indirection table cannot be created"); 379 return EINVAL; 380 } 381 if (priv->rxqs_n & (priv->rxqs_n - 1)) { 382 INFO("%u RX queues are configured, consider rounding this" 383 " number to the next power of two for better balancing", 384 priv->rxqs_n); 385 DEBUG("indirection table extended to assume %u WQs", 386 priv->reta_idx_n); 387 } 388 for (i = 0; (i != priv->reta_idx_n); ++i) 389 wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq; 390 /* Get number of hash RX queues to configure. */ 391 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i) 392 hash_rxqs_n += ind_table_init[i].hash_types_n; 393 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables", 394 hash_rxqs_n, priv->rxqs_n, ind_tables_n); 395 /* Create indirection tables. */ 396 ind_tables = rte_calloc(__func__, ind_tables_n, 397 sizeof((*ind_tables)[0]), 0); 398 if (ind_tables == NULL) { 399 err = ENOMEM; 400 ERROR("cannot allocate indirection tables container: %s", 401 strerror(err)); 402 goto error; 403 } 404 for (i = 0; (i != ind_tables_n); ++i) { 405 struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = { 406 .pd = priv->pd, 407 .log_ind_tbl_size = 0, /* Set below. */ 408 .ind_tbl = wqs, 409 .comp_mask = 0, 410 }; 411 unsigned int ind_tbl_size = ind_table_init[i].max_size; 412 struct ibv_exp_rwq_ind_table *ind_table; 413 414 if (priv->reta_idx_n < ind_tbl_size) 415 ind_tbl_size = priv->reta_idx_n; 416 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size); 417 errno = 0; 418 ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, 419 &ind_init_attr); 420 if (ind_table != NULL) { 421 (*ind_tables)[i] = ind_table; 422 continue; 423 } 424 /* Not clear whether errno is set. */ 425 err = (errno ? errno : EINVAL); 426 ERROR("RX indirection table creation failed with error %d: %s", 427 err, strerror(err)); 428 goto error; 429 } 430 /* Allocate array that holds hash RX queues and related data. */ 431 hash_rxqs = rte_calloc(__func__, hash_rxqs_n, 432 sizeof((*hash_rxqs)[0]), 0); 433 if (hash_rxqs == NULL) { 434 err = ENOMEM; 435 ERROR("cannot allocate hash RX queues container: %s", 436 strerror(err)); 437 goto error; 438 } 439 for (i = 0, j = 0, k = 0; 440 ((i != hash_rxqs_n) && (j != ind_tables_n)); 441 ++i) { 442 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i]; 443 enum hash_rxq_type type = 444 hash_rxq_type_from_pos(&ind_table_init[j], k); 445 struct rte_eth_rss_conf *priv_rss_conf = 446 (*priv->rss_conf)[type]; 447 struct ibv_exp_rx_hash_conf hash_conf = { 448 .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ, 449 .rx_hash_key_len = (priv_rss_conf ? 450 priv_rss_conf->rss_key_len : 451 rss_hash_default_key_len), 452 .rx_hash_key = (priv_rss_conf ? 453 priv_rss_conf->rss_key : 454 rss_hash_default_key), 455 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields, 456 .rwq_ind_tbl = (*ind_tables)[j], 457 }; 458 struct ibv_exp_qp_init_attr qp_init_attr = { 459 .max_inl_recv = 0, /* Currently not supported. */ 460 .qp_type = IBV_QPT_RAW_PACKET, 461 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | 462 IBV_EXP_QP_INIT_ATTR_RX_HASH), 463 .pd = priv->pd, 464 .rx_hash_conf = &hash_conf, 465 .port_num = priv->port, 466 }; 467 468 DEBUG("using indirection table %u for hash RX queue %u type %d", 469 j, i, type); 470 *hash_rxq = (struct hash_rxq){ 471 .priv = priv, 472 .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr), 473 .type = type, 474 }; 475 if (hash_rxq->qp == NULL) { 476 err = (errno ? errno : EINVAL); 477 ERROR("Hash RX QP creation failure: %s", 478 strerror(err)); 479 goto error; 480 } 481 if (++k < ind_table_init[j].hash_types_n) 482 continue; 483 /* Switch to the next indirection table and reset hash RX 484 * queue type array index. */ 485 ++j; 486 k = 0; 487 } 488 priv->ind_tables = ind_tables; 489 priv->ind_tables_n = ind_tables_n; 490 priv->hash_rxqs = hash_rxqs; 491 priv->hash_rxqs_n = hash_rxqs_n; 492 assert(err == 0); 493 return 0; 494 error: 495 if (hash_rxqs != NULL) { 496 for (i = 0; (i != hash_rxqs_n); ++i) { 497 struct ibv_qp *qp = (*hash_rxqs)[i].qp; 498 499 if (qp == NULL) 500 continue; 501 claim_zero(ibv_destroy_qp(qp)); 502 } 503 rte_free(hash_rxqs); 504 } 505 if (ind_tables != NULL) { 506 for (j = 0; (j != ind_tables_n); ++j) { 507 struct ibv_exp_rwq_ind_table *ind_table = 508 (*ind_tables)[j]; 509 510 if (ind_table == NULL) 511 continue; 512 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); 513 } 514 rte_free(ind_tables); 515 } 516 return err; 517 } 518 519 /** 520 * Clean up hash RX queues and indirection table. 521 * 522 * @param priv 523 * Pointer to private structure. 524 */ 525 void 526 priv_destroy_hash_rxqs(struct priv *priv) 527 { 528 unsigned int i; 529 530 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n); 531 if (priv->hash_rxqs_n == 0) { 532 assert(priv->hash_rxqs == NULL); 533 assert(priv->ind_tables == NULL); 534 return; 535 } 536 for (i = 0; (i != priv->hash_rxqs_n); ++i) { 537 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i]; 538 unsigned int j, k; 539 540 assert(hash_rxq->priv == priv); 541 assert(hash_rxq->qp != NULL); 542 /* Also check that there are no remaining flows. */ 543 for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j) 544 for (k = 0; 545 (k != RTE_DIM(hash_rxq->special_flow[j])); 546 ++k) 547 assert(hash_rxq->special_flow[j][k] == NULL); 548 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j) 549 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k) 550 assert(hash_rxq->mac_flow[j][k] == NULL); 551 claim_zero(ibv_destroy_qp(hash_rxq->qp)); 552 } 553 priv->hash_rxqs_n = 0; 554 rte_free(priv->hash_rxqs); 555 priv->hash_rxqs = NULL; 556 for (i = 0; (i != priv->ind_tables_n); ++i) { 557 struct ibv_exp_rwq_ind_table *ind_table = 558 (*priv->ind_tables)[i]; 559 560 assert(ind_table != NULL); 561 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); 562 } 563 priv->ind_tables_n = 0; 564 rte_free(priv->ind_tables); 565 priv->ind_tables = NULL; 566 } 567 568 /** 569 * Check whether a given flow type is allowed. 570 * 571 * @param priv 572 * Pointer to private structure. 573 * @param type 574 * Flow type to check. 575 * 576 * @return 577 * Nonzero if the given flow type is allowed. 578 */ 579 int 580 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type) 581 { 582 /* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode 583 * has been requested. */ 584 if (priv->promisc_req) 585 return type == HASH_RXQ_FLOW_TYPE_PROMISC; 586 switch (type) { 587 case HASH_RXQ_FLOW_TYPE_PROMISC: 588 return !!priv->promisc_req; 589 case HASH_RXQ_FLOW_TYPE_ALLMULTI: 590 return !!priv->allmulti_req; 591 case HASH_RXQ_FLOW_TYPE_BROADCAST: 592 #ifdef HAVE_FLOW_SPEC_IPV6 593 case HASH_RXQ_FLOW_TYPE_IPV6MULTI: 594 #endif /* HAVE_FLOW_SPEC_IPV6 */ 595 /* If allmulti is enabled, broadcast and ipv6multi 596 * are unnecessary. */ 597 return !priv->allmulti_req; 598 case HASH_RXQ_FLOW_TYPE_MAC: 599 return 1; 600 default: 601 /* Unsupported flow type is not allowed. */ 602 return 0; 603 } 604 return 0; 605 } 606 607 /** 608 * Automatically enable/disable flows according to configuration. 609 * 610 * @param priv 611 * Private structure. 612 * 613 * @return 614 * 0 on success, errno value on failure. 615 */ 616 int 617 priv_rehash_flows(struct priv *priv) 618 { 619 unsigned int i; 620 621 for (i = 0; (i != RTE_DIM((*priv->hash_rxqs)[0].special_flow)); ++i) 622 if (!priv_allow_flow_type(priv, i)) { 623 priv_special_flow_disable(priv, i); 624 } else { 625 int ret = priv_special_flow_enable(priv, i); 626 627 if (ret) 628 return ret; 629 } 630 if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC)) 631 return priv_mac_addrs_enable(priv); 632 priv_mac_addrs_disable(priv); 633 return 0; 634 } 635 636 /** 637 * Allocate RX queue elements with scattered packets support. 638 * 639 * @param rxq 640 * Pointer to RX queue structure. 641 * @param elts_n 642 * Number of elements to allocate. 643 * @param[in] pool 644 * If not NULL, fetch buffers from this array instead of allocating them 645 * with rte_pktmbuf_alloc(). 646 * 647 * @return 648 * 0 on success, errno value on failure. 649 */ 650 static int 651 rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n, 652 struct rte_mbuf **pool) 653 { 654 unsigned int i; 655 struct rxq_elt_sp (*elts)[elts_n] = 656 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0, 657 rxq->socket); 658 int ret = 0; 659 660 if (elts == NULL) { 661 ERROR("%p: can't allocate packets array", (void *)rxq); 662 ret = ENOMEM; 663 goto error; 664 } 665 /* For each WR (packet). */ 666 for (i = 0; (i != elts_n); ++i) { 667 unsigned int j; 668 struct rxq_elt_sp *elt = &(*elts)[i]; 669 struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges; 670 671 /* These two arrays must have the same size. */ 672 assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs)); 673 /* For each SGE (segment). */ 674 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) { 675 struct ibv_sge *sge = &(*sges)[j]; 676 struct rte_mbuf *buf; 677 678 if (pool != NULL) { 679 buf = *(pool++); 680 assert(buf != NULL); 681 rte_pktmbuf_reset(buf); 682 } else 683 buf = rte_pktmbuf_alloc(rxq->mp); 684 if (buf == NULL) { 685 assert(pool == NULL); 686 ERROR("%p: empty mbuf pool", (void *)rxq); 687 ret = ENOMEM; 688 goto error; 689 } 690 elt->bufs[j] = buf; 691 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 692 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 693 /* Buffer is supposed to be empty. */ 694 assert(rte_pktmbuf_data_len(buf) == 0); 695 assert(rte_pktmbuf_pkt_len(buf) == 0); 696 /* sge->addr must be able to store a pointer. */ 697 assert(sizeof(sge->addr) >= sizeof(uintptr_t)); 698 if (j == 0) { 699 /* The first SGE keeps its headroom. */ 700 sge->addr = rte_pktmbuf_mtod(buf, uintptr_t); 701 sge->length = (buf->buf_len - 702 RTE_PKTMBUF_HEADROOM); 703 } else { 704 /* Subsequent SGEs lose theirs. */ 705 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 706 SET_DATA_OFF(buf, 0); 707 sge->addr = (uintptr_t)buf->buf_addr; 708 sge->length = buf->buf_len; 709 } 710 sge->lkey = rxq->mr->lkey; 711 /* Redundant check for tailroom. */ 712 assert(sge->length == rte_pktmbuf_tailroom(buf)); 713 } 714 } 715 DEBUG("%p: allocated and configured %u WRs (%zu segments)", 716 (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges))); 717 rxq->elts_n = elts_n; 718 rxq->elts_head = 0; 719 rxq->elts.sp = elts; 720 assert(ret == 0); 721 return 0; 722 error: 723 if (elts != NULL) { 724 assert(pool == NULL); 725 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 726 unsigned int j; 727 struct rxq_elt_sp *elt = &(*elts)[i]; 728 729 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) { 730 struct rte_mbuf *buf = elt->bufs[j]; 731 732 if (buf != NULL) 733 rte_pktmbuf_free_seg(buf); 734 } 735 } 736 rte_free(elts); 737 } 738 DEBUG("%p: failed, freed everything", (void *)rxq); 739 assert(ret > 0); 740 return ret; 741 } 742 743 /** 744 * Free RX queue elements with scattered packets support. 745 * 746 * @param rxq 747 * Pointer to RX queue structure. 748 */ 749 static void 750 rxq_free_elts_sp(struct rxq *rxq) 751 { 752 unsigned int i; 753 unsigned int elts_n = rxq->elts_n; 754 struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp; 755 756 DEBUG("%p: freeing WRs", (void *)rxq); 757 rxq->elts_n = 0; 758 rxq->elts.sp = NULL; 759 if (elts == NULL) 760 return; 761 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 762 unsigned int j; 763 struct rxq_elt_sp *elt = &(*elts)[i]; 764 765 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) { 766 struct rte_mbuf *buf = elt->bufs[j]; 767 768 if (buf != NULL) 769 rte_pktmbuf_free_seg(buf); 770 } 771 } 772 rte_free(elts); 773 } 774 775 /** 776 * Allocate RX queue elements. 777 * 778 * @param rxq 779 * Pointer to RX queue structure. 780 * @param elts_n 781 * Number of elements to allocate. 782 * @param[in] pool 783 * If not NULL, fetch buffers from this array instead of allocating them 784 * with rte_pktmbuf_alloc(). 785 * 786 * @return 787 * 0 on success, errno value on failure. 788 */ 789 static int 790 rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool) 791 { 792 unsigned int i; 793 struct rxq_elt (*elts)[elts_n] = 794 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0, 795 rxq->socket); 796 int ret = 0; 797 798 if (elts == NULL) { 799 ERROR("%p: can't allocate packets array", (void *)rxq); 800 ret = ENOMEM; 801 goto error; 802 } 803 /* For each WR (packet). */ 804 for (i = 0; (i != elts_n); ++i) { 805 struct rxq_elt *elt = &(*elts)[i]; 806 struct ibv_sge *sge = &(*elts)[i].sge; 807 struct rte_mbuf *buf; 808 809 if (pool != NULL) { 810 buf = *(pool++); 811 assert(buf != NULL); 812 rte_pktmbuf_reset(buf); 813 } else 814 buf = rte_pktmbuf_alloc(rxq->mp); 815 if (buf == NULL) { 816 assert(pool == NULL); 817 ERROR("%p: empty mbuf pool", (void *)rxq); 818 ret = ENOMEM; 819 goto error; 820 } 821 elt->buf = buf; 822 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 823 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 824 /* Buffer is supposed to be empty. */ 825 assert(rte_pktmbuf_data_len(buf) == 0); 826 assert(rte_pktmbuf_pkt_len(buf) == 0); 827 /* sge->addr must be able to store a pointer. */ 828 assert(sizeof(sge->addr) >= sizeof(uintptr_t)); 829 /* SGE keeps its headroom. */ 830 sge->addr = (uintptr_t) 831 ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM); 832 sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM); 833 sge->lkey = rxq->mr->lkey; 834 /* Redundant check for tailroom. */ 835 assert(sge->length == rte_pktmbuf_tailroom(buf)); 836 } 837 DEBUG("%p: allocated and configured %u single-segment WRs", 838 (void *)rxq, elts_n); 839 rxq->elts_n = elts_n; 840 rxq->elts_head = 0; 841 rxq->elts.no_sp = elts; 842 assert(ret == 0); 843 return 0; 844 error: 845 if (elts != NULL) { 846 assert(pool == NULL); 847 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 848 struct rxq_elt *elt = &(*elts)[i]; 849 struct rte_mbuf *buf = elt->buf; 850 851 if (buf != NULL) 852 rte_pktmbuf_free_seg(buf); 853 } 854 rte_free(elts); 855 } 856 DEBUG("%p: failed, freed everything", (void *)rxq); 857 assert(ret > 0); 858 return ret; 859 } 860 861 /** 862 * Free RX queue elements. 863 * 864 * @param rxq 865 * Pointer to RX queue structure. 866 */ 867 static void 868 rxq_free_elts(struct rxq *rxq) 869 { 870 unsigned int i; 871 unsigned int elts_n = rxq->elts_n; 872 struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp; 873 874 DEBUG("%p: freeing WRs", (void *)rxq); 875 rxq->elts_n = 0; 876 rxq->elts.no_sp = NULL; 877 if (elts == NULL) 878 return; 879 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 880 struct rxq_elt *elt = &(*elts)[i]; 881 struct rte_mbuf *buf = elt->buf; 882 883 if (buf != NULL) 884 rte_pktmbuf_free_seg(buf); 885 } 886 rte_free(elts); 887 } 888 889 /** 890 * Clean up a RX queue. 891 * 892 * Destroy objects, free allocated memory and reset the structure for reuse. 893 * 894 * @param rxq 895 * Pointer to RX queue structure. 896 */ 897 void 898 rxq_cleanup(struct rxq *rxq) 899 { 900 struct ibv_exp_release_intf_params params; 901 902 DEBUG("cleaning up %p", (void *)rxq); 903 if (rxq->sp) 904 rxq_free_elts_sp(rxq); 905 else 906 rxq_free_elts(rxq); 907 rxq->poll = NULL; 908 rxq->recv = NULL; 909 if (rxq->if_wq != NULL) { 910 assert(rxq->priv != NULL); 911 assert(rxq->priv->ctx != NULL); 912 assert(rxq->wq != NULL); 913 params = (struct ibv_exp_release_intf_params){ 914 .comp_mask = 0, 915 }; 916 claim_zero(ibv_exp_release_intf(rxq->priv->ctx, 917 rxq->if_wq, 918 ¶ms)); 919 } 920 if (rxq->if_cq != NULL) { 921 assert(rxq->priv != NULL); 922 assert(rxq->priv->ctx != NULL); 923 assert(rxq->cq != NULL); 924 params = (struct ibv_exp_release_intf_params){ 925 .comp_mask = 0, 926 }; 927 claim_zero(ibv_exp_release_intf(rxq->priv->ctx, 928 rxq->if_cq, 929 ¶ms)); 930 } 931 if (rxq->wq != NULL) 932 claim_zero(ibv_exp_destroy_wq(rxq->wq)); 933 if (rxq->cq != NULL) 934 claim_zero(ibv_destroy_cq(rxq->cq)); 935 if (rxq->rd != NULL) { 936 struct ibv_exp_destroy_res_domain_attr attr = { 937 .comp_mask = 0, 938 }; 939 940 assert(rxq->priv != NULL); 941 assert(rxq->priv->ctx != NULL); 942 claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx, 943 rxq->rd, 944 &attr)); 945 } 946 if (rxq->mr != NULL) 947 claim_zero(ibv_dereg_mr(rxq->mr)); 948 memset(rxq, 0, sizeof(*rxq)); 949 } 950 951 /** 952 * Reconfigure a RX queue with new parameters. 953 * 954 * rxq_rehash() does not allocate mbufs, which, if not done from the right 955 * thread (such as a control thread), may corrupt the pool. 956 * In case of failure, the queue is left untouched. 957 * 958 * @param dev 959 * Pointer to Ethernet device structure. 960 * @param rxq 961 * RX queue pointer. 962 * 963 * @return 964 * 0 on success, errno value on failure. 965 */ 966 int 967 rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) 968 { 969 struct priv *priv = rxq->priv; 970 struct rxq tmpl = *rxq; 971 unsigned int mbuf_n; 972 unsigned int desc_n; 973 struct rte_mbuf **pool; 974 unsigned int i, k; 975 struct ibv_exp_wq_attr mod; 976 int err; 977 978 DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq); 979 /* Number of descriptors and mbufs currently allocated. */ 980 desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1)); 981 mbuf_n = desc_n; 982 /* Toggle RX checksum offload if hardware supports it. */ 983 if (priv->hw_csum) { 984 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum; 985 rxq->csum = tmpl.csum; 986 } 987 if (priv->hw_csum_l2tun) { 988 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum; 989 rxq->csum_l2tun = tmpl.csum_l2tun; 990 } 991 /* Enable scattered packets support for this queue if necessary. */ 992 if ((dev->data->dev_conf.rxmode.jumbo_frame) && 993 (dev->data->dev_conf.rxmode.max_rx_pkt_len > 994 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) { 995 tmpl.sp = 1; 996 desc_n /= MLX5_PMD_SGE_WR_N; 997 } else 998 tmpl.sp = 0; 999 DEBUG("%p: %s scattered packets support (%u WRs)", 1000 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n); 1001 /* If scatter mode is the same as before, nothing to do. */ 1002 if (tmpl.sp == rxq->sp) { 1003 DEBUG("%p: nothing to do", (void *)dev); 1004 return 0; 1005 } 1006 /* From now on, any failure will render the queue unusable. 1007 * Reinitialize WQ. */ 1008 mod = (struct ibv_exp_wq_attr){ 1009 .attr_mask = IBV_EXP_WQ_ATTR_STATE, 1010 .wq_state = IBV_EXP_WQS_RESET, 1011 }; 1012 err = ibv_exp_modify_wq(tmpl.wq, &mod); 1013 if (err) { 1014 ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err)); 1015 assert(err > 0); 1016 return err; 1017 } 1018 /* Allocate pool. */ 1019 pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0); 1020 if (pool == NULL) { 1021 ERROR("%p: cannot allocate memory", (void *)dev); 1022 return ENOBUFS; 1023 } 1024 /* Snatch mbufs from original queue. */ 1025 k = 0; 1026 if (rxq->sp) { 1027 struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp; 1028 1029 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 1030 struct rxq_elt_sp *elt = &(*elts)[i]; 1031 unsigned int j; 1032 1033 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) { 1034 assert(elt->bufs[j] != NULL); 1035 pool[k++] = elt->bufs[j]; 1036 } 1037 } 1038 } else { 1039 struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp; 1040 1041 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 1042 struct rxq_elt *elt = &(*elts)[i]; 1043 struct rte_mbuf *buf = elt->buf; 1044 1045 pool[k++] = buf; 1046 } 1047 } 1048 assert(k == mbuf_n); 1049 tmpl.elts_n = 0; 1050 tmpl.elts.sp = NULL; 1051 assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp); 1052 err = ((tmpl.sp) ? 1053 rxq_alloc_elts_sp(&tmpl, desc_n, pool) : 1054 rxq_alloc_elts(&tmpl, desc_n, pool)); 1055 if (err) { 1056 ERROR("%p: cannot reallocate WRs, aborting", (void *)dev); 1057 rte_free(pool); 1058 assert(err > 0); 1059 return err; 1060 } 1061 assert(tmpl.elts_n == desc_n); 1062 assert(tmpl.elts.sp != NULL); 1063 rte_free(pool); 1064 /* Clean up original data. */ 1065 rxq->elts_n = 0; 1066 rte_free(rxq->elts.sp); 1067 rxq->elts.sp = NULL; 1068 /* Change queue state to ready. */ 1069 mod = (struct ibv_exp_wq_attr){ 1070 .attr_mask = IBV_EXP_WQ_ATTR_STATE, 1071 .wq_state = IBV_EXP_WQS_RDY, 1072 }; 1073 err = ibv_exp_modify_wq(tmpl.wq, &mod); 1074 if (err) { 1075 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s", 1076 (void *)dev, strerror(err)); 1077 goto error; 1078 } 1079 /* Post SGEs. */ 1080 assert(tmpl.if_wq != NULL); 1081 if (tmpl.sp) { 1082 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp; 1083 1084 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 1085 err = tmpl.if_wq->recv_sg_list 1086 (tmpl.wq, 1087 (*elts)[i].sges, 1088 RTE_DIM((*elts)[i].sges)); 1089 if (err) 1090 break; 1091 } 1092 } else { 1093 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp; 1094 1095 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 1096 err = tmpl.if_wq->recv_burst( 1097 tmpl.wq, 1098 &(*elts)[i].sge, 1099 1); 1100 if (err) 1101 break; 1102 } 1103 } 1104 if (err) { 1105 ERROR("%p: failed to post SGEs with error %d", 1106 (void *)dev, err); 1107 /* Set err because it does not contain a valid errno value. */ 1108 err = EIO; 1109 goto error; 1110 } 1111 if (tmpl.sp) 1112 tmpl.recv = tmpl.if_wq->recv_sg_list; 1113 else 1114 tmpl.recv = tmpl.if_wq->recv_burst; 1115 error: 1116 *rxq = tmpl; 1117 assert(err >= 0); 1118 return err; 1119 } 1120 1121 /** 1122 * Configure a RX queue. 1123 * 1124 * @param dev 1125 * Pointer to Ethernet device structure. 1126 * @param rxq 1127 * Pointer to RX queue structure. 1128 * @param desc 1129 * Number of descriptors to configure in queue. 1130 * @param socket 1131 * NUMA socket on which memory must be allocated. 1132 * @param[in] conf 1133 * Thresholds parameters. 1134 * @param mp 1135 * Memory pool for buffer allocations. 1136 * 1137 * @return 1138 * 0 on success, errno value on failure. 1139 */ 1140 int 1141 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, 1142 unsigned int socket, const struct rte_eth_rxconf *conf, 1143 struct rte_mempool *mp) 1144 { 1145 struct priv *priv = dev->data->dev_private; 1146 struct rxq tmpl = { 1147 .priv = priv, 1148 .mp = mp, 1149 .socket = socket 1150 }; 1151 struct ibv_exp_wq_attr mod; 1152 union { 1153 struct ibv_exp_query_intf_params params; 1154 struct ibv_exp_cq_init_attr cq; 1155 struct ibv_exp_res_domain_init_attr rd; 1156 struct ibv_exp_wq_init_attr wq; 1157 } attr; 1158 enum ibv_exp_query_intf_status status; 1159 struct rte_mbuf *buf; 1160 int ret = 0; 1161 unsigned int i; 1162 unsigned int cq_size = desc; 1163 1164 (void)conf; /* Thresholds configuration (ignored). */ 1165 if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) { 1166 ERROR("%p: invalid number of RX descriptors (must be a" 1167 " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N); 1168 return EINVAL; 1169 } 1170 /* Get mbuf length. */ 1171 buf = rte_pktmbuf_alloc(mp); 1172 if (buf == NULL) { 1173 ERROR("%p: unable to allocate mbuf", (void *)dev); 1174 return ENOMEM; 1175 } 1176 tmpl.mb_len = buf->buf_len; 1177 assert((rte_pktmbuf_headroom(buf) + 1178 rte_pktmbuf_tailroom(buf)) == tmpl.mb_len); 1179 assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM); 1180 rte_pktmbuf_free(buf); 1181 /* Toggle RX checksum offload if hardware supports it. */ 1182 if (priv->hw_csum) 1183 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum; 1184 if (priv->hw_csum_l2tun) 1185 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum; 1186 /* Enable scattered packets support for this queue if necessary. */ 1187 if ((dev->data->dev_conf.rxmode.jumbo_frame) && 1188 (dev->data->dev_conf.rxmode.max_rx_pkt_len > 1189 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) { 1190 tmpl.sp = 1; 1191 desc /= MLX5_PMD_SGE_WR_N; 1192 } 1193 DEBUG("%p: %s scattered packets support (%u WRs)", 1194 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc); 1195 /* Use the entire RX mempool as the memory region. */ 1196 tmpl.mr = mlx5_mp2mr(priv->pd, mp); 1197 if (tmpl.mr == NULL) { 1198 ret = EINVAL; 1199 ERROR("%p: MR creation failure: %s", 1200 (void *)dev, strerror(ret)); 1201 goto error; 1202 } 1203 attr.rd = (struct ibv_exp_res_domain_init_attr){ 1204 .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL | 1205 IBV_EXP_RES_DOMAIN_MSG_MODEL), 1206 .thread_model = IBV_EXP_THREAD_SINGLE, 1207 .msg_model = IBV_EXP_MSG_HIGH_BW, 1208 }; 1209 tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd); 1210 if (tmpl.rd == NULL) { 1211 ret = ENOMEM; 1212 ERROR("%p: RD creation failure: %s", 1213 (void *)dev, strerror(ret)); 1214 goto error; 1215 } 1216 attr.cq = (struct ibv_exp_cq_init_attr){ 1217 .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN, 1218 .res_domain = tmpl.rd, 1219 }; 1220 tmpl.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0, 1221 &attr.cq); 1222 if (tmpl.cq == NULL) { 1223 ret = ENOMEM; 1224 ERROR("%p: CQ creation failure: %s", 1225 (void *)dev, strerror(ret)); 1226 goto error; 1227 } 1228 DEBUG("priv->device_attr.max_qp_wr is %d", 1229 priv->device_attr.max_qp_wr); 1230 DEBUG("priv->device_attr.max_sge is %d", 1231 priv->device_attr.max_sge); 1232 /* Configure VLAN stripping. */ 1233 tmpl.vlan_strip = dev->data->dev_conf.rxmode.hw_vlan_strip; 1234 attr.wq = (struct ibv_exp_wq_init_attr){ 1235 .wq_context = NULL, /* Could be useful in the future. */ 1236 .wq_type = IBV_EXP_WQT_RQ, 1237 /* Max number of outstanding WRs. */ 1238 .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ? 1239 priv->device_attr.max_qp_wr : 1240 (int)cq_size), 1241 /* Max number of scatter/gather elements in a WR. */ 1242 .max_recv_sge = ((priv->device_attr.max_sge < 1243 MLX5_PMD_SGE_WR_N) ? 1244 priv->device_attr.max_sge : 1245 MLX5_PMD_SGE_WR_N), 1246 .pd = priv->pd, 1247 .cq = tmpl.cq, 1248 .comp_mask = 1249 IBV_EXP_CREATE_WQ_RES_DOMAIN | 1250 #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS 1251 IBV_EXP_CREATE_WQ_VLAN_OFFLOADS | 1252 #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ 1253 0, 1254 .res_domain = tmpl.rd, 1255 #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS 1256 .vlan_offloads = (tmpl.vlan_strip ? 1257 IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : 1258 0), 1259 #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ 1260 }; 1261 1262 #ifdef HAVE_VERBS_FCS 1263 /* By default, FCS (CRC) is stripped by hardware. */ 1264 if (dev->data->dev_conf.rxmode.hw_strip_crc) { 1265 tmpl.crc_present = 0; 1266 } else if (priv->hw_fcs_strip) { 1267 /* Ask HW/Verbs to leave CRC in place when supported. */ 1268 attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS; 1269 attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; 1270 tmpl.crc_present = 1; 1271 } else { 1272 WARN("%p: CRC stripping has been disabled but will still" 1273 " be performed by hardware, make sure MLNX_OFED and" 1274 " firmware are up to date", 1275 (void *)dev); 1276 tmpl.crc_present = 0; 1277 } 1278 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from" 1279 " incoming frames to hide it", 1280 (void *)dev, 1281 tmpl.crc_present ? "disabled" : "enabled", 1282 tmpl.crc_present << 2); 1283 #endif /* HAVE_VERBS_FCS */ 1284 1285 #ifdef HAVE_VERBS_RX_END_PADDING 1286 if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) 1287 ; /* Nothing else to do. */ 1288 else if (priv->hw_padding) { 1289 INFO("%p: enabling packet padding on queue %p", 1290 (void *)dev, (void *)rxq); 1291 attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING; 1292 attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; 1293 } else 1294 WARN("%p: packet padding has been requested but is not" 1295 " supported, make sure MLNX_OFED and firmware are" 1296 " up to date", 1297 (void *)dev); 1298 #endif /* HAVE_VERBS_RX_END_PADDING */ 1299 1300 tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq); 1301 if (tmpl.wq == NULL) { 1302 ret = (errno ? errno : EINVAL); 1303 ERROR("%p: WQ creation failure: %s", 1304 (void *)dev, strerror(ret)); 1305 goto error; 1306 } 1307 if (tmpl.sp) 1308 ret = rxq_alloc_elts_sp(&tmpl, desc, NULL); 1309 else 1310 ret = rxq_alloc_elts(&tmpl, desc, NULL); 1311 if (ret) { 1312 ERROR("%p: RXQ allocation failed: %s", 1313 (void *)dev, strerror(ret)); 1314 goto error; 1315 } 1316 /* Save port ID. */ 1317 tmpl.port_id = dev->data->port_id; 1318 DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id); 1319 attr.params = (struct ibv_exp_query_intf_params){ 1320 .intf_scope = IBV_EXP_INTF_GLOBAL, 1321 #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS 1322 .intf_version = 1, 1323 #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ 1324 .intf = IBV_EXP_INTF_CQ, 1325 .obj = tmpl.cq, 1326 }; 1327 tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); 1328 if (tmpl.if_cq == NULL) { 1329 ERROR("%p: CQ interface family query failed with status %d", 1330 (void *)dev, status); 1331 goto error; 1332 } 1333 attr.params = (struct ibv_exp_query_intf_params){ 1334 .intf_scope = IBV_EXP_INTF_GLOBAL, 1335 .intf = IBV_EXP_INTF_WQ, 1336 .obj = tmpl.wq, 1337 }; 1338 tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); 1339 if (tmpl.if_wq == NULL) { 1340 ERROR("%p: WQ interface family query failed with status %d", 1341 (void *)dev, status); 1342 goto error; 1343 } 1344 /* Change queue state to ready. */ 1345 mod = (struct ibv_exp_wq_attr){ 1346 .attr_mask = IBV_EXP_WQ_ATTR_STATE, 1347 .wq_state = IBV_EXP_WQS_RDY, 1348 }; 1349 ret = ibv_exp_modify_wq(tmpl.wq, &mod); 1350 if (ret) { 1351 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s", 1352 (void *)dev, strerror(ret)); 1353 goto error; 1354 } 1355 /* Post SGEs. */ 1356 if (tmpl.sp) { 1357 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp; 1358 1359 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 1360 ret = tmpl.if_wq->recv_sg_list 1361 (tmpl.wq, 1362 (*elts)[i].sges, 1363 RTE_DIM((*elts)[i].sges)); 1364 if (ret) 1365 break; 1366 } 1367 } else { 1368 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp; 1369 1370 for (i = 0; (i != RTE_DIM(*elts)); ++i) { 1371 ret = tmpl.if_wq->recv_burst( 1372 tmpl.wq, 1373 &(*elts)[i].sge, 1374 1); 1375 if (ret) 1376 break; 1377 } 1378 } 1379 if (ret) { 1380 ERROR("%p: failed to post SGEs with error %d", 1381 (void *)dev, ret); 1382 /* Set ret because it does not contain a valid errno value. */ 1383 ret = EIO; 1384 goto error; 1385 } 1386 /* Clean up rxq in case we're reinitializing it. */ 1387 DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq); 1388 rxq_cleanup(rxq); 1389 *rxq = tmpl; 1390 DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl); 1391 assert(ret == 0); 1392 /* Assign function in queue. */ 1393 #ifdef HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS 1394 rxq->poll = rxq->if_cq->poll_length_flags_cvlan; 1395 #else /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ 1396 rxq->poll = rxq->if_cq->poll_length_flags; 1397 #endif /* HAVE_EXP_DEVICE_ATTR_VLAN_OFFLOADS */ 1398 if (rxq->sp) 1399 rxq->recv = rxq->if_wq->recv_sg_list; 1400 else 1401 rxq->recv = rxq->if_wq->recv_burst; 1402 return 0; 1403 error: 1404 rxq_cleanup(&tmpl); 1405 assert(ret > 0); 1406 return ret; 1407 } 1408 1409 /** 1410 * DPDK callback to configure a RX queue. 1411 * 1412 * @param dev 1413 * Pointer to Ethernet device structure. 1414 * @param idx 1415 * RX queue index. 1416 * @param desc 1417 * Number of descriptors to configure in queue. 1418 * @param socket 1419 * NUMA socket on which memory must be allocated. 1420 * @param[in] conf 1421 * Thresholds parameters. 1422 * @param mp 1423 * Memory pool for buffer allocations. 1424 * 1425 * @return 1426 * 0 on success, negative errno value on failure. 1427 */ 1428 int 1429 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1430 unsigned int socket, const struct rte_eth_rxconf *conf, 1431 struct rte_mempool *mp) 1432 { 1433 struct priv *priv = dev->data->dev_private; 1434 struct rxq *rxq = (*priv->rxqs)[idx]; 1435 int ret; 1436 1437 if (mlx5_is_secondary()) 1438 return -E_RTE_SECONDARY; 1439 1440 priv_lock(priv); 1441 DEBUG("%p: configuring queue %u for %u descriptors", 1442 (void *)dev, idx, desc); 1443 if (idx >= priv->rxqs_n) { 1444 ERROR("%p: queue index out of range (%u >= %u)", 1445 (void *)dev, idx, priv->rxqs_n); 1446 priv_unlock(priv); 1447 return -EOVERFLOW; 1448 } 1449 if (rxq != NULL) { 1450 DEBUG("%p: reusing already allocated queue index %u (%p)", 1451 (void *)dev, idx, (void *)rxq); 1452 if (priv->started) { 1453 priv_unlock(priv); 1454 return -EEXIST; 1455 } 1456 (*priv->rxqs)[idx] = NULL; 1457 rxq_cleanup(rxq); 1458 } else { 1459 rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket); 1460 if (rxq == NULL) { 1461 ERROR("%p: unable to allocate queue index %u", 1462 (void *)dev, idx); 1463 priv_unlock(priv); 1464 return -ENOMEM; 1465 } 1466 } 1467 ret = rxq_setup(dev, rxq, desc, socket, conf, mp); 1468 if (ret) 1469 rte_free(rxq); 1470 else { 1471 rxq->stats.idx = idx; 1472 DEBUG("%p: adding RX queue %p to list", 1473 (void *)dev, (void *)rxq); 1474 (*priv->rxqs)[idx] = rxq; 1475 /* Update receive callback. */ 1476 if (rxq->sp) 1477 dev->rx_pkt_burst = mlx5_rx_burst_sp; 1478 else 1479 dev->rx_pkt_burst = mlx5_rx_burst; 1480 } 1481 priv_unlock(priv); 1482 return -ret; 1483 } 1484 1485 /** 1486 * DPDK callback to release a RX queue. 1487 * 1488 * @param dpdk_rxq 1489 * Generic RX queue pointer. 1490 */ 1491 void 1492 mlx5_rx_queue_release(void *dpdk_rxq) 1493 { 1494 struct rxq *rxq = (struct rxq *)dpdk_rxq; 1495 struct priv *priv; 1496 unsigned int i; 1497 1498 if (mlx5_is_secondary()) 1499 return; 1500 1501 if (rxq == NULL) 1502 return; 1503 priv = rxq->priv; 1504 priv_lock(priv); 1505 for (i = 0; (i != priv->rxqs_n); ++i) 1506 if ((*priv->rxqs)[i] == rxq) { 1507 DEBUG("%p: removing RX queue %p from list", 1508 (void *)priv->dev, (void *)rxq); 1509 (*priv->rxqs)[i] = NULL; 1510 break; 1511 } 1512 rxq_cleanup(rxq); 1513 rte_free(rxq); 1514 priv_unlock(priv); 1515 } 1516 1517 /** 1518 * DPDK callback for RX in secondary processes. 1519 * 1520 * This function configures all queues from primary process information 1521 * if necessary before reverting to the normal RX burst callback. 1522 * 1523 * @param dpdk_rxq 1524 * Generic pointer to RX queue structure. 1525 * @param[out] pkts 1526 * Array to store received packets. 1527 * @param pkts_n 1528 * Maximum number of packets in array. 1529 * 1530 * @return 1531 * Number of packets successfully received (<= pkts_n). 1532 */ 1533 uint16_t 1534 mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts, 1535 uint16_t pkts_n) 1536 { 1537 struct rxq *rxq = dpdk_rxq; 1538 struct priv *priv = mlx5_secondary_data_setup(rxq->priv); 1539 struct priv *primary_priv; 1540 unsigned int index; 1541 1542 if (priv == NULL) 1543 return 0; 1544 primary_priv = 1545 mlx5_secondary_data[priv->dev->data->port_id].primary_priv; 1546 /* Look for queue index in both private structures. */ 1547 for (index = 0; index != priv->rxqs_n; ++index) 1548 if (((*primary_priv->rxqs)[index] == rxq) || 1549 ((*priv->rxqs)[index] == rxq)) 1550 break; 1551 if (index == priv->rxqs_n) 1552 return 0; 1553 rxq = (*priv->rxqs)[index]; 1554 return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n); 1555 } 1556