1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <errno.h> 37 #include <string.h> 38 #include <stdint.h> 39 #include <fcntl.h> 40 41 /* Verbs header. */ 42 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 43 #ifdef PEDANTIC 44 #pragma GCC diagnostic ignored "-Wpedantic" 45 #endif 46 #include <infiniband/verbs.h> 47 #include <infiniband/arch.h> 48 #include <infiniband/mlx5_hw.h> 49 #ifdef PEDANTIC 50 #pragma GCC diagnostic error "-Wpedantic" 51 #endif 52 53 /* DPDK headers don't like -pedantic. */ 54 #ifdef PEDANTIC 55 #pragma GCC diagnostic ignored "-Wpedantic" 56 #endif 57 #include <rte_mbuf.h> 58 #include <rte_malloc.h> 59 #include <rte_ethdev.h> 60 #include <rte_common.h> 61 #include <rte_interrupts.h> 62 #ifdef PEDANTIC 63 #pragma GCC diagnostic error "-Wpedantic" 64 #endif 65 66 #include "mlx5.h" 67 #include "mlx5_rxtx.h" 68 #include "mlx5_utils.h" 69 #include "mlx5_autoconf.h" 70 #include "mlx5_defs.h" 71 72 /* Initialization data for hash RX queues. */ 73 const struct hash_rxq_init hash_rxq_init[] = { 74 [HASH_RXQ_TCPV4] = { 75 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | 76 IBV_EXP_RX_HASH_DST_IPV4 | 77 IBV_EXP_RX_HASH_SRC_PORT_TCP | 78 IBV_EXP_RX_HASH_DST_PORT_TCP), 79 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP, 80 .flow_priority = 0, 81 .flow_spec.tcp_udp = { 82 .type = IBV_EXP_FLOW_SPEC_TCP, 83 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 84 }, 85 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], 86 }, 87 [HASH_RXQ_UDPV4] = { 88 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | 89 IBV_EXP_RX_HASH_DST_IPV4 | 90 IBV_EXP_RX_HASH_SRC_PORT_UDP | 91 IBV_EXP_RX_HASH_DST_PORT_UDP), 92 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP, 93 .flow_priority = 0, 94 .flow_spec.tcp_udp = { 95 .type = IBV_EXP_FLOW_SPEC_UDP, 96 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 97 }, 98 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4], 99 }, 100 [HASH_RXQ_IPV4] = { 101 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 | 102 IBV_EXP_RX_HASH_DST_IPV4), 103 .dpdk_rss_hf = (ETH_RSS_IPV4 | 104 ETH_RSS_FRAG_IPV4), 105 .flow_priority = 1, 106 .flow_spec.ipv4 = { 107 .type = IBV_EXP_FLOW_SPEC_IPV4, 108 .size = sizeof(hash_rxq_init[0].flow_spec.ipv4), 109 }, 110 .underlayer = &hash_rxq_init[HASH_RXQ_ETH], 111 }, 112 [HASH_RXQ_TCPV6] = { 113 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | 114 IBV_EXP_RX_HASH_DST_IPV6 | 115 IBV_EXP_RX_HASH_SRC_PORT_TCP | 116 IBV_EXP_RX_HASH_DST_PORT_TCP), 117 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP, 118 .flow_priority = 0, 119 .flow_spec.tcp_udp = { 120 .type = IBV_EXP_FLOW_SPEC_TCP, 121 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 122 }, 123 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], 124 }, 125 [HASH_RXQ_UDPV6] = { 126 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | 127 IBV_EXP_RX_HASH_DST_IPV6 | 128 IBV_EXP_RX_HASH_SRC_PORT_UDP | 129 IBV_EXP_RX_HASH_DST_PORT_UDP), 130 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP, 131 .flow_priority = 0, 132 .flow_spec.tcp_udp = { 133 .type = IBV_EXP_FLOW_SPEC_UDP, 134 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp), 135 }, 136 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6], 137 }, 138 [HASH_RXQ_IPV6] = { 139 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 | 140 IBV_EXP_RX_HASH_DST_IPV6), 141 .dpdk_rss_hf = (ETH_RSS_IPV6 | 142 ETH_RSS_FRAG_IPV6), 143 .flow_priority = 1, 144 .flow_spec.ipv6 = { 145 .type = IBV_EXP_FLOW_SPEC_IPV6, 146 .size = sizeof(hash_rxq_init[0].flow_spec.ipv6), 147 }, 148 .underlayer = &hash_rxq_init[HASH_RXQ_ETH], 149 }, 150 [HASH_RXQ_ETH] = { 151 .hash_fields = 0, 152 .dpdk_rss_hf = 0, 153 .flow_priority = 2, 154 .flow_spec.eth = { 155 .type = IBV_EXP_FLOW_SPEC_ETH, 156 .size = sizeof(hash_rxq_init[0].flow_spec.eth), 157 }, 158 .underlayer = NULL, 159 }, 160 }; 161 162 /* Number of entries in hash_rxq_init[]. */ 163 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init); 164 165 /* Initialization data for hash RX queue indirection tables. */ 166 static const struct ind_table_init ind_table_init[] = { 167 { 168 .max_size = -1u, /* Superseded by HW limitations. */ 169 .hash_types = 170 1 << HASH_RXQ_TCPV4 | 171 1 << HASH_RXQ_UDPV4 | 172 1 << HASH_RXQ_IPV4 | 173 1 << HASH_RXQ_TCPV6 | 174 1 << HASH_RXQ_UDPV6 | 175 1 << HASH_RXQ_IPV6 | 176 0, 177 .hash_types_n = 6, 178 }, 179 { 180 .max_size = 1, 181 .hash_types = 1 << HASH_RXQ_ETH, 182 .hash_types_n = 1, 183 }, 184 }; 185 186 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init) 187 188 /* Default RSS hash key also used for ConnectX-3. */ 189 uint8_t rss_hash_default_key[] = { 190 0x2c, 0xc6, 0x81, 0xd1, 191 0x5b, 0xdb, 0xf4, 0xf7, 192 0xfc, 0xa2, 0x83, 0x19, 193 0xdb, 0x1a, 0x3e, 0x94, 194 0x6b, 0x9e, 0x38, 0xd9, 195 0x2c, 0x9c, 0x03, 0xd1, 196 0xad, 0x99, 0x44, 0xa7, 197 0xd9, 0x56, 0x3d, 0x59, 198 0x06, 0x3c, 0x25, 0xf3, 199 0xfc, 0x1f, 0xdc, 0x2a, 200 }; 201 202 /* Length of the default RSS hash key. */ 203 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key); 204 205 /** 206 * Populate flow steering rule for a given hash RX queue type using 207 * information from hash_rxq_init[]. Nothing is written to flow_attr when 208 * flow_attr_size is not large enough, but the required size is still returned. 209 * 210 * @param priv 211 * Pointer to private structure. 212 * @param[out] flow_attr 213 * Pointer to flow attribute structure to fill. Note that the allocated 214 * area must be larger and large enough to hold all flow specifications. 215 * @param flow_attr_size 216 * Entire size of flow_attr and trailing room for flow specifications. 217 * @param type 218 * Hash RX queue type to use for flow steering rule. 219 * 220 * @return 221 * Total size of the flow attribute buffer. No errors are defined. 222 */ 223 size_t 224 priv_flow_attr(struct priv *priv, struct ibv_exp_flow_attr *flow_attr, 225 size_t flow_attr_size, enum hash_rxq_type type) 226 { 227 size_t offset = sizeof(*flow_attr); 228 const struct hash_rxq_init *init = &hash_rxq_init[type]; 229 230 assert(priv != NULL); 231 assert((size_t)type < RTE_DIM(hash_rxq_init)); 232 do { 233 offset += init->flow_spec.hdr.size; 234 init = init->underlayer; 235 } while (init != NULL); 236 if (offset > flow_attr_size) 237 return offset; 238 flow_attr_size = offset; 239 init = &hash_rxq_init[type]; 240 *flow_attr = (struct ibv_exp_flow_attr){ 241 .type = IBV_EXP_FLOW_ATTR_NORMAL, 242 /* Priorities < 3 are reserved for flow director. */ 243 .priority = init->flow_priority + 3, 244 .num_of_specs = 0, 245 .port = priv->port, 246 .flags = 0, 247 }; 248 do { 249 offset -= init->flow_spec.hdr.size; 250 memcpy((void *)((uintptr_t)flow_attr + offset), 251 &init->flow_spec, 252 init->flow_spec.hdr.size); 253 ++flow_attr->num_of_specs; 254 init = init->underlayer; 255 } while (init != NULL); 256 return flow_attr_size; 257 } 258 259 /** 260 * Convert hash type position in indirection table initializer to 261 * hash RX queue type. 262 * 263 * @param table 264 * Indirection table initializer. 265 * @param pos 266 * Hash type position. 267 * 268 * @return 269 * Hash RX queue type. 270 */ 271 static enum hash_rxq_type 272 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos) 273 { 274 enum hash_rxq_type type = HASH_RXQ_TCPV4; 275 276 assert(pos < table->hash_types_n); 277 do { 278 if ((table->hash_types & (1 << type)) && (pos-- == 0)) 279 break; 280 ++type; 281 } while (1); 282 return type; 283 } 284 285 /** 286 * Filter out disabled hash RX queue types from ind_table_init[]. 287 * 288 * @param priv 289 * Pointer to private structure. 290 * @param[out] table 291 * Output table. 292 * 293 * @return 294 * Number of table entries. 295 */ 296 static unsigned int 297 priv_make_ind_table_init(struct priv *priv, 298 struct ind_table_init (*table)[IND_TABLE_INIT_N]) 299 { 300 uint64_t rss_hf; 301 unsigned int i; 302 unsigned int j; 303 unsigned int table_n = 0; 304 /* Mandatory to receive frames not handled by normal hash RX queues. */ 305 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH; 306 307 rss_hf = priv->rss_hf; 308 /* Process other protocols only if more than one queue. */ 309 if (priv->rxqs_n > 1) 310 for (i = 0; (i != hash_rxq_init_n); ++i) 311 if (rss_hf & hash_rxq_init[i].dpdk_rss_hf) 312 hash_types_sup |= (1 << i); 313 314 /* Filter out entries whose protocols are not in the set. */ 315 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) { 316 unsigned int nb; 317 unsigned int h; 318 319 /* j is increased only if the table has valid protocols. */ 320 assert(j <= i); 321 (*table)[j] = ind_table_init[i]; 322 (*table)[j].hash_types &= hash_types_sup; 323 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h) 324 if (((*table)[j].hash_types >> h) & 0x1) 325 ++nb; 326 (*table)[i].hash_types_n = nb; 327 if (nb) { 328 ++table_n; 329 ++j; 330 } 331 } 332 return table_n; 333 } 334 335 /** 336 * Initialize hash RX queues and indirection table. 337 * 338 * @param priv 339 * Pointer to private structure. 340 * 341 * @return 342 * 0 on success, errno value on failure. 343 */ 344 int 345 priv_create_hash_rxqs(struct priv *priv) 346 { 347 struct ibv_exp_wq *wqs[priv->reta_idx_n]; 348 struct ind_table_init ind_table_init[IND_TABLE_INIT_N]; 349 unsigned int ind_tables_n = 350 priv_make_ind_table_init(priv, &ind_table_init); 351 unsigned int hash_rxqs_n = 0; 352 struct hash_rxq (*hash_rxqs)[] = NULL; 353 struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL; 354 unsigned int i; 355 unsigned int j; 356 unsigned int k; 357 int err = 0; 358 359 assert(priv->ind_tables == NULL); 360 assert(priv->ind_tables_n == 0); 361 assert(priv->hash_rxqs == NULL); 362 assert(priv->hash_rxqs_n == 0); 363 assert(priv->pd != NULL); 364 assert(priv->ctx != NULL); 365 if (priv->rxqs_n == 0) 366 return EINVAL; 367 assert(priv->rxqs != NULL); 368 if (ind_tables_n == 0) { 369 ERROR("all hash RX queue types have been filtered out," 370 " indirection table cannot be created"); 371 return EINVAL; 372 } 373 if (priv->rxqs_n & (priv->rxqs_n - 1)) { 374 INFO("%u RX queues are configured, consider rounding this" 375 " number to the next power of two for better balancing", 376 priv->rxqs_n); 377 DEBUG("indirection table extended to assume %u WQs", 378 priv->reta_idx_n); 379 } 380 for (i = 0; (i != priv->reta_idx_n); ++i) { 381 struct rxq_ctrl *rxq_ctrl; 382 383 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]], 384 struct rxq_ctrl, rxq); 385 wqs[i] = rxq_ctrl->wq; 386 } 387 /* Get number of hash RX queues to configure. */ 388 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i) 389 hash_rxqs_n += ind_table_init[i].hash_types_n; 390 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables", 391 hash_rxqs_n, priv->rxqs_n, ind_tables_n); 392 /* Create indirection tables. */ 393 ind_tables = rte_calloc(__func__, ind_tables_n, 394 sizeof((*ind_tables)[0]), 0); 395 if (ind_tables == NULL) { 396 err = ENOMEM; 397 ERROR("cannot allocate indirection tables container: %s", 398 strerror(err)); 399 goto error; 400 } 401 for (i = 0; (i != ind_tables_n); ++i) { 402 struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = { 403 .pd = priv->pd, 404 .log_ind_tbl_size = 0, /* Set below. */ 405 .ind_tbl = wqs, 406 .comp_mask = 0, 407 }; 408 unsigned int ind_tbl_size = ind_table_init[i].max_size; 409 struct ibv_exp_rwq_ind_table *ind_table; 410 411 if (priv->reta_idx_n < ind_tbl_size) 412 ind_tbl_size = priv->reta_idx_n; 413 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size); 414 errno = 0; 415 ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, 416 &ind_init_attr); 417 if (ind_table != NULL) { 418 (*ind_tables)[i] = ind_table; 419 continue; 420 } 421 /* Not clear whether errno is set. */ 422 err = (errno ? errno : EINVAL); 423 ERROR("RX indirection table creation failed with error %d: %s", 424 err, strerror(err)); 425 goto error; 426 } 427 /* Allocate array that holds hash RX queues and related data. */ 428 hash_rxqs = rte_calloc(__func__, hash_rxqs_n, 429 sizeof((*hash_rxqs)[0]), 0); 430 if (hash_rxqs == NULL) { 431 err = ENOMEM; 432 ERROR("cannot allocate hash RX queues container: %s", 433 strerror(err)); 434 goto error; 435 } 436 for (i = 0, j = 0, k = 0; 437 ((i != hash_rxqs_n) && (j != ind_tables_n)); 438 ++i) { 439 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i]; 440 enum hash_rxq_type type = 441 hash_rxq_type_from_pos(&ind_table_init[j], k); 442 struct rte_eth_rss_conf *priv_rss_conf = 443 (*priv->rss_conf)[type]; 444 struct ibv_exp_rx_hash_conf hash_conf = { 445 .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ, 446 .rx_hash_key_len = (priv_rss_conf ? 447 priv_rss_conf->rss_key_len : 448 rss_hash_default_key_len), 449 .rx_hash_key = (priv_rss_conf ? 450 priv_rss_conf->rss_key : 451 rss_hash_default_key), 452 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields, 453 .rwq_ind_tbl = (*ind_tables)[j], 454 }; 455 struct ibv_exp_qp_init_attr qp_init_attr = { 456 .max_inl_recv = 0, /* Currently not supported. */ 457 .qp_type = IBV_QPT_RAW_PACKET, 458 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | 459 IBV_EXP_QP_INIT_ATTR_RX_HASH), 460 .pd = priv->pd, 461 .rx_hash_conf = &hash_conf, 462 .port_num = priv->port, 463 }; 464 465 DEBUG("using indirection table %u for hash RX queue %u type %d", 466 j, i, type); 467 *hash_rxq = (struct hash_rxq){ 468 .priv = priv, 469 .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr), 470 .type = type, 471 }; 472 if (hash_rxq->qp == NULL) { 473 err = (errno ? errno : EINVAL); 474 ERROR("Hash RX QP creation failure: %s", 475 strerror(err)); 476 goto error; 477 } 478 if (++k < ind_table_init[j].hash_types_n) 479 continue; 480 /* Switch to the next indirection table and reset hash RX 481 * queue type array index. */ 482 ++j; 483 k = 0; 484 } 485 priv->ind_tables = ind_tables; 486 priv->ind_tables_n = ind_tables_n; 487 priv->hash_rxqs = hash_rxqs; 488 priv->hash_rxqs_n = hash_rxqs_n; 489 assert(err == 0); 490 return 0; 491 error: 492 if (hash_rxqs != NULL) { 493 for (i = 0; (i != hash_rxqs_n); ++i) { 494 struct ibv_qp *qp = (*hash_rxqs)[i].qp; 495 496 if (qp == NULL) 497 continue; 498 claim_zero(ibv_destroy_qp(qp)); 499 } 500 rte_free(hash_rxqs); 501 } 502 if (ind_tables != NULL) { 503 for (j = 0; (j != ind_tables_n); ++j) { 504 struct ibv_exp_rwq_ind_table *ind_table = 505 (*ind_tables)[j]; 506 507 if (ind_table == NULL) 508 continue; 509 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); 510 } 511 rte_free(ind_tables); 512 } 513 return err; 514 } 515 516 /** 517 * Clean up hash RX queues and indirection table. 518 * 519 * @param priv 520 * Pointer to private structure. 521 */ 522 void 523 priv_destroy_hash_rxqs(struct priv *priv) 524 { 525 unsigned int i; 526 527 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n); 528 if (priv->hash_rxqs_n == 0) { 529 assert(priv->hash_rxqs == NULL); 530 assert(priv->ind_tables == NULL); 531 return; 532 } 533 for (i = 0; (i != priv->hash_rxqs_n); ++i) { 534 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i]; 535 unsigned int j, k; 536 537 assert(hash_rxq->priv == priv); 538 assert(hash_rxq->qp != NULL); 539 /* Also check that there are no remaining flows. */ 540 for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j) 541 for (k = 0; 542 (k != RTE_DIM(hash_rxq->special_flow[j])); 543 ++k) 544 assert(hash_rxq->special_flow[j][k] == NULL); 545 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j) 546 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k) 547 assert(hash_rxq->mac_flow[j][k] == NULL); 548 claim_zero(ibv_destroy_qp(hash_rxq->qp)); 549 } 550 priv->hash_rxqs_n = 0; 551 rte_free(priv->hash_rxqs); 552 priv->hash_rxqs = NULL; 553 for (i = 0; (i != priv->ind_tables_n); ++i) { 554 struct ibv_exp_rwq_ind_table *ind_table = 555 (*priv->ind_tables)[i]; 556 557 assert(ind_table != NULL); 558 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); 559 } 560 priv->ind_tables_n = 0; 561 rte_free(priv->ind_tables); 562 priv->ind_tables = NULL; 563 } 564 565 /** 566 * Check whether a given flow type is allowed. 567 * 568 * @param priv 569 * Pointer to private structure. 570 * @param type 571 * Flow type to check. 572 * 573 * @return 574 * Nonzero if the given flow type is allowed. 575 */ 576 int 577 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type) 578 { 579 /* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode 580 * has been requested. */ 581 if (priv->promisc_req) 582 return type == HASH_RXQ_FLOW_TYPE_PROMISC; 583 switch (type) { 584 case HASH_RXQ_FLOW_TYPE_PROMISC: 585 return !!priv->promisc_req; 586 case HASH_RXQ_FLOW_TYPE_ALLMULTI: 587 return !!priv->allmulti_req; 588 case HASH_RXQ_FLOW_TYPE_BROADCAST: 589 case HASH_RXQ_FLOW_TYPE_IPV6MULTI: 590 /* If allmulti is enabled, broadcast and ipv6multi 591 * are unnecessary. */ 592 return !priv->allmulti_req; 593 case HASH_RXQ_FLOW_TYPE_MAC: 594 return 1; 595 default: 596 /* Unsupported flow type is not allowed. */ 597 return 0; 598 } 599 return 0; 600 } 601 602 /** 603 * Automatically enable/disable flows according to configuration. 604 * 605 * @param priv 606 * Private structure. 607 * 608 * @return 609 * 0 on success, errno value on failure. 610 */ 611 int 612 priv_rehash_flows(struct priv *priv) 613 { 614 enum hash_rxq_flow_type i; 615 616 for (i = HASH_RXQ_FLOW_TYPE_PROMISC; 617 i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); 618 ++i) 619 if (!priv_allow_flow_type(priv, i)) { 620 priv_special_flow_disable(priv, i); 621 } else { 622 int ret = priv_special_flow_enable(priv, i); 623 624 if (ret) 625 return ret; 626 } 627 if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC)) 628 return priv_mac_addrs_enable(priv); 629 priv_mac_addrs_disable(priv); 630 return 0; 631 } 632 633 /** 634 * Allocate RX queue elements. 635 * 636 * @param rxq_ctrl 637 * Pointer to RX queue structure. 638 * @param elts_n 639 * Number of elements to allocate. 640 * @param[in] pool 641 * If not NULL, fetch buffers from this array instead of allocating them 642 * with rte_pktmbuf_alloc(). 643 * 644 * @return 645 * 0 on success, errno value on failure. 646 */ 647 static int 648 rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n, 649 struct rte_mbuf *(*pool)[]) 650 { 651 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; 652 unsigned int i; 653 int ret = 0; 654 655 /* Iterate on segments. */ 656 for (i = 0; (i != elts_n); ++i) { 657 struct rte_mbuf *buf; 658 volatile struct mlx5_wqe_data_seg *scat = 659 &(*rxq_ctrl->rxq.wqes)[i]; 660 661 if (pool != NULL) { 662 buf = (*pool)[i]; 663 assert(buf != NULL); 664 rte_pktmbuf_reset(buf); 665 rte_pktmbuf_refcnt_update(buf, 1); 666 } else 667 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); 668 if (buf == NULL) { 669 assert(pool == NULL); 670 ERROR("%p: empty mbuf pool", (void *)rxq_ctrl); 671 ret = ENOMEM; 672 goto error; 673 } 674 /* Headroom is reserved by rte_pktmbuf_alloc(). */ 675 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); 676 /* Buffer is supposed to be empty. */ 677 assert(rte_pktmbuf_data_len(buf) == 0); 678 assert(rte_pktmbuf_pkt_len(buf) == 0); 679 assert(!buf->next); 680 /* Only the first segment keeps headroom. */ 681 if (i % sges_n) 682 SET_DATA_OFF(buf, 0); 683 PORT(buf) = rxq_ctrl->rxq.port_id; 684 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf); 685 PKT_LEN(buf) = DATA_LEN(buf); 686 NB_SEGS(buf) = 1; 687 /* scat->addr must be able to store a pointer. */ 688 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 689 *scat = (struct mlx5_wqe_data_seg){ 690 .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), 691 .byte_count = htonl(DATA_LEN(buf)), 692 .lkey = htonl(rxq_ctrl->mr->lkey), 693 }; 694 (*rxq_ctrl->rxq.elts)[i] = buf; 695 } 696 DEBUG("%p: allocated and configured %u segments (max %u packets)", 697 (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n)); 698 assert(ret == 0); 699 return 0; 700 error: 701 assert(pool == NULL); 702 elts_n = i; 703 for (i = 0; (i != elts_n); ++i) { 704 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 705 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 706 (*rxq_ctrl->rxq.elts)[i] = NULL; 707 } 708 DEBUG("%p: failed, freed everything", (void *)rxq_ctrl); 709 assert(ret > 0); 710 return ret; 711 } 712 713 /** 714 * Free RX queue elements. 715 * 716 * @param rxq_ctrl 717 * Pointer to RX queue structure. 718 */ 719 static void 720 rxq_free_elts(struct rxq_ctrl *rxq_ctrl) 721 { 722 unsigned int i; 723 724 DEBUG("%p: freeing WRs", (void *)rxq_ctrl); 725 if (rxq_ctrl->rxq.elts == NULL) 726 return; 727 728 for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) { 729 if ((*rxq_ctrl->rxq.elts)[i] != NULL) 730 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); 731 (*rxq_ctrl->rxq.elts)[i] = NULL; 732 } 733 } 734 735 /** 736 * Clean up a RX queue. 737 * 738 * Destroy objects, free allocated memory and reset the structure for reuse. 739 * 740 * @param rxq_ctrl 741 * Pointer to RX queue structure. 742 */ 743 void 744 rxq_cleanup(struct rxq_ctrl *rxq_ctrl) 745 { 746 DEBUG("cleaning up %p", (void *)rxq_ctrl); 747 rxq_free_elts(rxq_ctrl); 748 if (rxq_ctrl->fdir_queue != NULL) 749 priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue); 750 if (rxq_ctrl->wq != NULL) 751 claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq)); 752 if (rxq_ctrl->cq != NULL) 753 claim_zero(ibv_destroy_cq(rxq_ctrl->cq)); 754 if (rxq_ctrl->channel != NULL) 755 claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel)); 756 if (rxq_ctrl->mr != NULL) 757 claim_zero(ibv_dereg_mr(rxq_ctrl->mr)); 758 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); 759 } 760 761 /** 762 * Reconfigure RX queue buffers. 763 * 764 * rxq_rehash() does not allocate mbufs, which, if not done from the right 765 * thread (such as a control thread), may corrupt the pool. 766 * In case of failure, the queue is left untouched. 767 * 768 * @param dev 769 * Pointer to Ethernet device structure. 770 * @param rxq_ctrl 771 * RX queue pointer. 772 * 773 * @return 774 * 0 on success, errno value on failure. 775 */ 776 int 777 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl) 778 { 779 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; 780 unsigned int i; 781 struct ibv_exp_wq_attr mod; 782 int err; 783 784 DEBUG("%p: rehashing queue %p with %u SGE(s) per packet", 785 (void *)dev, (void *)rxq_ctrl, 1 << rxq_ctrl->rxq.sges_n); 786 assert(!(elts_n % (1 << rxq_ctrl->rxq.sges_n))); 787 /* From now on, any failure will render the queue unusable. 788 * Reinitialize WQ. */ 789 mod = (struct ibv_exp_wq_attr){ 790 .attr_mask = IBV_EXP_WQ_ATTR_STATE, 791 .wq_state = IBV_EXP_WQS_RESET, 792 }; 793 err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod); 794 if (err) { 795 ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err)); 796 assert(err > 0); 797 return err; 798 } 799 /* Snatch mbufs from original queue. */ 800 claim_zero(rxq_alloc_elts(rxq_ctrl, elts_n, rxq_ctrl->rxq.elts)); 801 for (i = 0; i != elts_n; ++i) { 802 struct rte_mbuf *buf = (*rxq_ctrl->rxq.elts)[i]; 803 804 assert(rte_mbuf_refcnt_read(buf) == 2); 805 rte_pktmbuf_free_seg(buf); 806 } 807 /* Change queue state to ready. */ 808 mod = (struct ibv_exp_wq_attr){ 809 .attr_mask = IBV_EXP_WQ_ATTR_STATE, 810 .wq_state = IBV_EXP_WQS_RDY, 811 }; 812 err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod); 813 if (err) { 814 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s", 815 (void *)dev, strerror(err)); 816 goto error; 817 } 818 /* Update doorbell counter. */ 819 rxq_ctrl->rxq.rq_ci = elts_n >> rxq_ctrl->rxq.sges_n; 820 rte_wmb(); 821 *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci); 822 error: 823 assert(err >= 0); 824 return err; 825 } 826 827 /** 828 * Initialize RX queue. 829 * 830 * @param tmpl 831 * Pointer to RX queue control template. 832 * 833 * @return 834 * 0 on success, errno value on failure. 835 */ 836 static inline int 837 rxq_setup(struct rxq_ctrl *tmpl) 838 { 839 struct ibv_cq *ibcq = tmpl->cq; 840 struct mlx5_cq *cq = to_mxxx(cq, cq); 841 struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq); 842 struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] = 843 rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket); 844 845 if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) { 846 ERROR("Wrong MLX5_CQE_SIZE environment variable value: " 847 "it should be set to %u", RTE_CACHE_LINE_SIZE); 848 return EINVAL; 849 } 850 if (elts == NULL) 851 return ENOMEM; 852 tmpl->rxq.rq_db = rwq->rq.db; 853 tmpl->rxq.cqe_n = log2above(ibcq->cqe); 854 tmpl->rxq.cq_ci = 0; 855 tmpl->rxq.rq_ci = 0; 856 tmpl->rxq.cq_db = cq->dbrec; 857 tmpl->rxq.wqes = 858 (volatile struct mlx5_wqe_data_seg (*)[]) 859 (uintptr_t)rwq->rq.buff; 860 tmpl->rxq.cqes = 861 (volatile struct mlx5_cqe (*)[]) 862 (uintptr_t)cq->active_buf->buf; 863 tmpl->rxq.elts = elts; 864 return 0; 865 } 866 867 /** 868 * Configure a RX queue. 869 * 870 * @param dev 871 * Pointer to Ethernet device structure. 872 * @param rxq_ctrl 873 * Pointer to RX queue structure. 874 * @param desc 875 * Number of descriptors to configure in queue. 876 * @param socket 877 * NUMA socket on which memory must be allocated. 878 * @param[in] conf 879 * Thresholds parameters. 880 * @param mp 881 * Memory pool for buffer allocations. 882 * 883 * @return 884 * 0 on success, errno value on failure. 885 */ 886 int 887 rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, 888 uint16_t desc, unsigned int socket, 889 const struct rte_eth_rxconf *conf, struct rte_mempool *mp) 890 { 891 struct priv *priv = dev->data->dev_private; 892 struct rxq_ctrl tmpl = { 893 .priv = priv, 894 .socket = socket, 895 .rxq = { 896 .elts_n = log2above(desc), 897 .mp = mp, 898 .rss_hash = priv->rxqs_n > 1, 899 }, 900 }; 901 struct ibv_exp_wq_attr mod; 902 union { 903 struct ibv_exp_cq_init_attr cq; 904 struct ibv_exp_wq_init_attr wq; 905 struct ibv_exp_cq_attr cq_attr; 906 } attr; 907 unsigned int mb_len = rte_pktmbuf_data_room_size(mp); 908 unsigned int cqe_n = desc - 1; 909 struct rte_mbuf *(*elts)[desc] = NULL; 910 int ret = 0; 911 912 (void)conf; /* Thresholds configuration (ignored). */ 913 /* Enable scattered packets support for this queue if necessary. */ 914 assert(mb_len >= RTE_PKTMBUF_HEADROOM); 915 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= 916 (mb_len - RTE_PKTMBUF_HEADROOM)) { 917 tmpl.rxq.sges_n = 0; 918 } else if (dev->data->dev_conf.rxmode.enable_scatter) { 919 unsigned int size = 920 RTE_PKTMBUF_HEADROOM + 921 dev->data->dev_conf.rxmode.max_rx_pkt_len; 922 unsigned int sges_n; 923 924 /* 925 * Determine the number of SGEs needed for a full packet 926 * and round it to the next power of two. 927 */ 928 sges_n = log2above((size / mb_len) + !!(size % mb_len)); 929 tmpl.rxq.sges_n = sges_n; 930 /* Make sure rxq.sges_n did not overflow. */ 931 size = mb_len * (1 << tmpl.rxq.sges_n); 932 size -= RTE_PKTMBUF_HEADROOM; 933 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { 934 ERROR("%p: too many SGEs (%u) needed to handle" 935 " requested maximum packet size %u", 936 (void *)dev, 937 1 << sges_n, 938 dev->data->dev_conf.rxmode.max_rx_pkt_len); 939 return EOVERFLOW; 940 } 941 } else { 942 WARN("%p: the requested maximum Rx packet size (%u) is" 943 " larger than a single mbuf (%u) and scattered" 944 " mode has not been requested", 945 (void *)dev, 946 dev->data->dev_conf.rxmode.max_rx_pkt_len, 947 mb_len - RTE_PKTMBUF_HEADROOM); 948 } 949 DEBUG("%p: maximum number of segments per packet: %u", 950 (void *)dev, 1 << tmpl.rxq.sges_n); 951 if (desc % (1 << tmpl.rxq.sges_n)) { 952 ERROR("%p: number of RX queue descriptors (%u) is not a" 953 " multiple of SGEs per packet (%u)", 954 (void *)dev, 955 desc, 956 1 << tmpl.rxq.sges_n); 957 return EINVAL; 958 } 959 /* Toggle RX checksum offload if hardware supports it. */ 960 if (priv->hw_csum) 961 tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum; 962 if (priv->hw_csum_l2tun) 963 tmpl.rxq.csum_l2tun = 964 !!dev->data->dev_conf.rxmode.hw_ip_checksum; 965 /* Use the entire RX mempool as the memory region. */ 966 tmpl.mr = mlx5_mp2mr(priv->pd, mp); 967 if (tmpl.mr == NULL) { 968 ret = EINVAL; 969 ERROR("%p: MR creation failure: %s", 970 (void *)dev, strerror(ret)); 971 goto error; 972 } 973 if (dev->data->dev_conf.intr_conf.rxq) { 974 tmpl.channel = ibv_create_comp_channel(priv->ctx); 975 if (tmpl.channel == NULL) { 976 dev->data->dev_conf.intr_conf.rxq = 0; 977 ret = ENOMEM; 978 ERROR("%p: Comp Channel creation failure: %s", 979 (void *)dev, strerror(ret)); 980 goto error; 981 } 982 } 983 attr.cq = (struct ibv_exp_cq_init_attr){ 984 .comp_mask = 0, 985 }; 986 if (priv->cqe_comp) { 987 attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS; 988 attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE; 989 cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */ 990 } 991 tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0, 992 &attr.cq); 993 if (tmpl.cq == NULL) { 994 ret = ENOMEM; 995 ERROR("%p: CQ creation failure: %s", 996 (void *)dev, strerror(ret)); 997 goto error; 998 } 999 DEBUG("priv->device_attr.max_qp_wr is %d", 1000 priv->device_attr.max_qp_wr); 1001 DEBUG("priv->device_attr.max_sge is %d", 1002 priv->device_attr.max_sge); 1003 /* Configure VLAN stripping. */ 1004 tmpl.rxq.vlan_strip = (priv->hw_vlan_strip && 1005 !!dev->data->dev_conf.rxmode.hw_vlan_strip); 1006 attr.wq = (struct ibv_exp_wq_init_attr){ 1007 .wq_context = NULL, /* Could be useful in the future. */ 1008 .wq_type = IBV_EXP_WQT_RQ, 1009 /* Max number of outstanding WRs. */ 1010 .max_recv_wr = desc >> tmpl.rxq.sges_n, 1011 /* Max number of scatter/gather elements in a WR. */ 1012 .max_recv_sge = 1 << tmpl.rxq.sges_n, 1013 .pd = priv->pd, 1014 .cq = tmpl.cq, 1015 .comp_mask = 1016 IBV_EXP_CREATE_WQ_VLAN_OFFLOADS | 1017 0, 1018 .vlan_offloads = (tmpl.rxq.vlan_strip ? 1019 IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : 1020 0), 1021 }; 1022 /* By default, FCS (CRC) is stripped by hardware. */ 1023 if (dev->data->dev_conf.rxmode.hw_strip_crc) { 1024 tmpl.rxq.crc_present = 0; 1025 } else if (priv->hw_fcs_strip) { 1026 /* Ask HW/Verbs to leave CRC in place when supported. */ 1027 attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS; 1028 attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; 1029 tmpl.rxq.crc_present = 1; 1030 } else { 1031 WARN("%p: CRC stripping has been disabled but will still" 1032 " be performed by hardware, make sure MLNX_OFED and" 1033 " firmware are up to date", 1034 (void *)dev); 1035 tmpl.rxq.crc_present = 0; 1036 } 1037 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from" 1038 " incoming frames to hide it", 1039 (void *)dev, 1040 tmpl.rxq.crc_present ? "disabled" : "enabled", 1041 tmpl.rxq.crc_present << 2); 1042 if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) 1043 ; /* Nothing else to do. */ 1044 else if (priv->hw_padding) { 1045 INFO("%p: enabling packet padding on queue %p", 1046 (void *)dev, (void *)rxq_ctrl); 1047 attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING; 1048 attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS; 1049 } else 1050 WARN("%p: packet padding has been requested but is not" 1051 " supported, make sure MLNX_OFED and firmware are" 1052 " up to date", 1053 (void *)dev); 1054 1055 tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq); 1056 if (tmpl.wq == NULL) { 1057 ret = (errno ? errno : EINVAL); 1058 ERROR("%p: WQ creation failure: %s", 1059 (void *)dev, strerror(ret)); 1060 goto error; 1061 } 1062 /* 1063 * Make sure number of WRs*SGEs match expectations since a queue 1064 * cannot allocate more than "desc" buffers. 1065 */ 1066 if (((int)attr.wq.max_recv_wr != (desc >> tmpl.rxq.sges_n)) || 1067 ((int)attr.wq.max_recv_sge != (1 << tmpl.rxq.sges_n))) { 1068 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs", 1069 (void *)dev, 1070 (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n), 1071 attr.wq.max_recv_wr, attr.wq.max_recv_sge); 1072 ret = EINVAL; 1073 goto error; 1074 } 1075 /* Save port ID. */ 1076 tmpl.rxq.port_id = dev->data->port_id; 1077 DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id); 1078 /* Change queue state to ready. */ 1079 mod = (struct ibv_exp_wq_attr){ 1080 .attr_mask = IBV_EXP_WQ_ATTR_STATE, 1081 .wq_state = IBV_EXP_WQS_RDY, 1082 }; 1083 ret = ibv_exp_modify_wq(tmpl.wq, &mod); 1084 if (ret) { 1085 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s", 1086 (void *)dev, strerror(ret)); 1087 goto error; 1088 } 1089 ret = rxq_setup(&tmpl); 1090 if (ret) { 1091 ERROR("%p: cannot initialize RX queue structure: %s", 1092 (void *)dev, strerror(ret)); 1093 goto error; 1094 } 1095 /* Reuse buffers from original queue if possible. */ 1096 if (rxq_ctrl->rxq.elts_n) { 1097 assert(1 << rxq_ctrl->rxq.elts_n == desc); 1098 assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts); 1099 ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts); 1100 } else 1101 ret = rxq_alloc_elts(&tmpl, desc, NULL); 1102 if (ret) { 1103 ERROR("%p: RXQ allocation failed: %s", 1104 (void *)dev, strerror(ret)); 1105 goto error; 1106 } 1107 /* Clean up rxq in case we're reinitializing it. */ 1108 DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl); 1109 rxq_cleanup(rxq_ctrl); 1110 /* Move mbuf pointers to dedicated storage area in RX queue. */ 1111 elts = (void *)(rxq_ctrl + 1); 1112 rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts)); 1113 #ifndef NDEBUG 1114 memset(tmpl.rxq.elts, 0x55, sizeof(*elts)); 1115 #endif 1116 rte_free(tmpl.rxq.elts); 1117 tmpl.rxq.elts = elts; 1118 *rxq_ctrl = tmpl; 1119 /* Update doorbell counter. */ 1120 rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n; 1121 rte_wmb(); 1122 *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci); 1123 DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl); 1124 assert(ret == 0); 1125 return 0; 1126 error: 1127 elts = tmpl.rxq.elts; 1128 rxq_cleanup(&tmpl); 1129 rte_free(elts); 1130 assert(ret > 0); 1131 return ret; 1132 } 1133 1134 /** 1135 * DPDK callback to configure a RX queue. 1136 * 1137 * @param dev 1138 * Pointer to Ethernet device structure. 1139 * @param idx 1140 * RX queue index. 1141 * @param desc 1142 * Number of descriptors to configure in queue. 1143 * @param socket 1144 * NUMA socket on which memory must be allocated. 1145 * @param[in] conf 1146 * Thresholds parameters. 1147 * @param mp 1148 * Memory pool for buffer allocations. 1149 * 1150 * @return 1151 * 0 on success, negative errno value on failure. 1152 */ 1153 int 1154 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 1155 unsigned int socket, const struct rte_eth_rxconf *conf, 1156 struct rte_mempool *mp) 1157 { 1158 struct priv *priv = dev->data->dev_private; 1159 struct rxq *rxq = (*priv->rxqs)[idx]; 1160 struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq); 1161 int ret; 1162 1163 if (mlx5_is_secondary()) 1164 return -E_RTE_SECONDARY; 1165 1166 priv_lock(priv); 1167 if (!rte_is_power_of_2(desc)) { 1168 desc = 1 << log2above(desc); 1169 WARN("%p: increased number of descriptors in RX queue %u" 1170 " to the next power of two (%d)", 1171 (void *)dev, idx, desc); 1172 } 1173 DEBUG("%p: configuring queue %u for %u descriptors", 1174 (void *)dev, idx, desc); 1175 if (idx >= priv->rxqs_n) { 1176 ERROR("%p: queue index out of range (%u >= %u)", 1177 (void *)dev, idx, priv->rxqs_n); 1178 priv_unlock(priv); 1179 return -EOVERFLOW; 1180 } 1181 if (rxq != NULL) { 1182 DEBUG("%p: reusing already allocated queue index %u (%p)", 1183 (void *)dev, idx, (void *)rxq); 1184 if (priv->started) { 1185 priv_unlock(priv); 1186 return -EEXIST; 1187 } 1188 (*priv->rxqs)[idx] = NULL; 1189 rxq_cleanup(rxq_ctrl); 1190 /* Resize if rxq size is changed. */ 1191 if (rxq_ctrl->rxq.elts_n != log2above(desc)) { 1192 rxq_ctrl = rte_realloc(rxq_ctrl, 1193 sizeof(*rxq_ctrl) + 1194 desc * sizeof(struct rte_mbuf *), 1195 RTE_CACHE_LINE_SIZE); 1196 if (!rxq_ctrl) { 1197 ERROR("%p: unable to reallocate queue index %u", 1198 (void *)dev, idx); 1199 priv_unlock(priv); 1200 return -ENOMEM; 1201 } 1202 } 1203 } else { 1204 rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) + 1205 desc * sizeof(struct rte_mbuf *), 1206 0, socket); 1207 if (rxq_ctrl == NULL) { 1208 ERROR("%p: unable to allocate queue index %u", 1209 (void *)dev, idx); 1210 priv_unlock(priv); 1211 return -ENOMEM; 1212 } 1213 } 1214 ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp); 1215 if (ret) 1216 rte_free(rxq_ctrl); 1217 else { 1218 rxq_ctrl->rxq.stats.idx = idx; 1219 DEBUG("%p: adding RX queue %p to list", 1220 (void *)dev, (void *)rxq_ctrl); 1221 (*priv->rxqs)[idx] = &rxq_ctrl->rxq; 1222 /* Update receive callback. */ 1223 priv_select_rx_function(priv); 1224 } 1225 priv_unlock(priv); 1226 return -ret; 1227 } 1228 1229 /** 1230 * DPDK callback to release a RX queue. 1231 * 1232 * @param dpdk_rxq 1233 * Generic RX queue pointer. 1234 */ 1235 void 1236 mlx5_rx_queue_release(void *dpdk_rxq) 1237 { 1238 struct rxq *rxq = (struct rxq *)dpdk_rxq; 1239 struct rxq_ctrl *rxq_ctrl; 1240 struct priv *priv; 1241 unsigned int i; 1242 1243 if (mlx5_is_secondary()) 1244 return; 1245 1246 if (rxq == NULL) 1247 return; 1248 rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq); 1249 priv = rxq_ctrl->priv; 1250 priv_lock(priv); 1251 for (i = 0; (i != priv->rxqs_n); ++i) 1252 if ((*priv->rxqs)[i] == rxq) { 1253 DEBUG("%p: removing RX queue %p from list", 1254 (void *)priv->dev, (void *)rxq_ctrl); 1255 (*priv->rxqs)[i] = NULL; 1256 break; 1257 } 1258 rxq_cleanup(rxq_ctrl); 1259 rte_free(rxq_ctrl); 1260 priv_unlock(priv); 1261 } 1262 1263 /** 1264 * DPDK callback for RX in secondary processes. 1265 * 1266 * This function configures all queues from primary process information 1267 * if necessary before reverting to the normal RX burst callback. 1268 * 1269 * @param dpdk_rxq 1270 * Generic pointer to RX queue structure. 1271 * @param[out] pkts 1272 * Array to store received packets. 1273 * @param pkts_n 1274 * Maximum number of packets in array. 1275 * 1276 * @return 1277 * Number of packets successfully received (<= pkts_n). 1278 */ 1279 uint16_t 1280 mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts, 1281 uint16_t pkts_n) 1282 { 1283 struct rxq *rxq = dpdk_rxq; 1284 struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq); 1285 struct priv *priv = mlx5_secondary_data_setup(rxq_ctrl->priv); 1286 struct priv *primary_priv; 1287 unsigned int index; 1288 1289 if (priv == NULL) 1290 return 0; 1291 primary_priv = 1292 mlx5_secondary_data[priv->dev->data->port_id].primary_priv; 1293 /* Look for queue index in both private structures. */ 1294 for (index = 0; index != priv->rxqs_n; ++index) 1295 if (((*primary_priv->rxqs)[index] == rxq) || 1296 ((*priv->rxqs)[index] == rxq)) 1297 break; 1298 if (index == priv->rxqs_n) 1299 return 0; 1300 rxq = (*priv->rxqs)[index]; 1301 return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n); 1302 } 1303 1304 /** 1305 * Fill epoll fd list for rxq interrupts. 1306 * 1307 * @param priv 1308 * Private structure. 1309 * 1310 * @return 1311 * 0 on success, negative on failure. 1312 */ 1313 int 1314 priv_intr_efd_enable(struct priv *priv) 1315 { 1316 unsigned int i; 1317 unsigned int rxqs_n = priv->rxqs_n; 1318 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1319 struct rte_intr_handle *intr_handle = priv->dev->intr_handle; 1320 1321 if (n == 0) 1322 return 0; 1323 if (n < rxqs_n) { 1324 WARN("rxqs num is larger than EAL max interrupt vector " 1325 "%u > %u unable to supprt rxq interrupts", 1326 rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1327 return -EINVAL; 1328 } 1329 intr_handle->type = RTE_INTR_HANDLE_EXT; 1330 for (i = 0; i != n; ++i) { 1331 struct rxq *rxq = (*priv->rxqs)[i]; 1332 struct rxq_ctrl *rxq_ctrl = 1333 container_of(rxq, struct rxq_ctrl, rxq); 1334 int fd = rxq_ctrl->channel->fd; 1335 int flags; 1336 int rc; 1337 1338 flags = fcntl(fd, F_GETFL); 1339 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 1340 if (rc < 0) { 1341 WARN("failed to change rxq interrupt file " 1342 "descriptor %d for queue index %d", fd, i); 1343 return -1; 1344 } 1345 intr_handle->efds[i] = fd; 1346 } 1347 intr_handle->nb_efd = n; 1348 return 0; 1349 } 1350 1351 /** 1352 * Clean epoll fd list for rxq interrupts. 1353 * 1354 * @param priv 1355 * Private structure. 1356 */ 1357 void 1358 priv_intr_efd_disable(struct priv *priv) 1359 { 1360 struct rte_intr_handle *intr_handle = priv->dev->intr_handle; 1361 1362 rte_intr_free_epoll_fd(intr_handle); 1363 } 1364 1365 /** 1366 * Create and init interrupt vector array. 1367 * 1368 * @param priv 1369 * Private structure. 1370 * 1371 * @return 1372 * 0 on success, negative on failure. 1373 */ 1374 int 1375 priv_create_intr_vec(struct priv *priv) 1376 { 1377 unsigned int rxqs_n = priv->rxqs_n; 1378 unsigned int i; 1379 struct rte_intr_handle *intr_handle = priv->dev->intr_handle; 1380 1381 if (rxqs_n == 0) 1382 return 0; 1383 intr_handle->intr_vec = (int *) 1384 rte_malloc("intr_vec", rxqs_n * sizeof(int), 0); 1385 if (intr_handle->intr_vec == NULL) { 1386 WARN("Failed to allocate memory for intr_vec " 1387 "rxq interrupt will not be supported"); 1388 return -ENOMEM; 1389 } 1390 for (i = 0; i != rxqs_n; ++i) { 1391 /* 1:1 mapping between rxq and interrupt. */ 1392 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i; 1393 } 1394 return 0; 1395 } 1396 1397 /** 1398 * Destroy init interrupt vector array. 1399 * 1400 * @param priv 1401 * Private structure. 1402 * 1403 * @return 1404 * 0 on success, negative on failure. 1405 */ 1406 void 1407 priv_destroy_intr_vec(struct priv *priv) 1408 { 1409 struct rte_intr_handle *intr_handle = priv->dev->intr_handle; 1410 1411 rte_free(intr_handle->intr_vec); 1412 } 1413