1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include <rte_mempool.h> 11 12 #include "efx.h" 13 14 #include "sfc.h" 15 #include "sfc_debug.h" 16 #include "sfc_log.h" 17 #include "sfc_ev.h" 18 #include "sfc_rx.h" 19 #include "sfc_kvargs.h" 20 #include "sfc_tweak.h" 21 22 /* 23 * Maximum number of Rx queue flush attempt in the case of failure or 24 * flush timeout 25 */ 26 #define SFC_RX_QFLUSH_ATTEMPTS (3) 27 28 /* 29 * Time to wait between event queue polling attempts when waiting for Rx 30 * queue flush done or failed events. 31 */ 32 #define SFC_RX_QFLUSH_POLL_WAIT_MS (1) 33 34 /* 35 * Maximum number of event queue polling attempts when waiting for Rx queue 36 * flush done or failed events. It defines Rx queue flush attempt timeout 37 * together with SFC_RX_QFLUSH_POLL_WAIT_MS. 38 */ 39 #define SFC_RX_QFLUSH_POLL_ATTEMPTS (2000) 40 41 void 42 sfc_rx_qflush_done(struct sfc_rxq_info *rxq_info) 43 { 44 rxq_info->state |= SFC_RXQ_FLUSHED; 45 rxq_info->state &= ~SFC_RXQ_FLUSHING; 46 } 47 48 void 49 sfc_rx_qflush_failed(struct sfc_rxq_info *rxq_info) 50 { 51 rxq_info->state |= SFC_RXQ_FLUSH_FAILED; 52 rxq_info->state &= ~SFC_RXQ_FLUSHING; 53 } 54 55 static void 56 sfc_efx_rx_qrefill(struct sfc_efx_rxq *rxq) 57 { 58 unsigned int free_space; 59 unsigned int bulks; 60 void *objs[SFC_RX_REFILL_BULK]; 61 efsys_dma_addr_t addr[RTE_DIM(objs)]; 62 unsigned int added = rxq->added; 63 unsigned int id; 64 unsigned int i; 65 struct sfc_efx_rx_sw_desc *rxd; 66 struct rte_mbuf *m; 67 uint16_t port_id = rxq->dp.dpq.port_id; 68 69 free_space = rxq->max_fill_level - (added - rxq->completed); 70 71 if (free_space < rxq->refill_threshold) 72 return; 73 74 bulks = free_space / RTE_DIM(objs); 75 /* refill_threshold guarantees that bulks is positive */ 76 SFC_ASSERT(bulks > 0); 77 78 id = added & rxq->ptr_mask; 79 do { 80 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs, 81 RTE_DIM(objs)) < 0)) { 82 /* 83 * It is hardly a safe way to increment counter 84 * from different contexts, but all PMDs do it. 85 */ 86 rxq->evq->sa->eth_dev->data->rx_mbuf_alloc_failed += 87 RTE_DIM(objs); 88 /* Return if we have posted nothing yet */ 89 if (added == rxq->added) 90 return; 91 /* Push posted */ 92 break; 93 } 94 95 for (i = 0; i < RTE_DIM(objs); 96 ++i, id = (id + 1) & rxq->ptr_mask) { 97 m = objs[i]; 98 99 MBUF_RAW_ALLOC_CHECK(m); 100 101 rxd = &rxq->sw_desc[id]; 102 rxd->mbuf = m; 103 104 m->data_off = RTE_PKTMBUF_HEADROOM; 105 m->port = port_id; 106 107 addr[i] = rte_pktmbuf_iova(m); 108 } 109 110 efx_rx_qpost(rxq->common, addr, rxq->buf_size, 111 RTE_DIM(objs), rxq->completed, added); 112 added += RTE_DIM(objs); 113 } while (--bulks > 0); 114 115 SFC_ASSERT(added != rxq->added); 116 rxq->added = added; 117 efx_rx_qpush(rxq->common, added, &rxq->pushed); 118 } 119 120 static uint64_t 121 sfc_efx_rx_desc_flags_to_offload_flags(const unsigned int desc_flags) 122 { 123 uint64_t mbuf_flags = 0; 124 125 switch (desc_flags & (EFX_PKT_IPV4 | EFX_CKSUM_IPV4)) { 126 case (EFX_PKT_IPV4 | EFX_CKSUM_IPV4): 127 mbuf_flags |= PKT_RX_IP_CKSUM_GOOD; 128 break; 129 case EFX_PKT_IPV4: 130 mbuf_flags |= PKT_RX_IP_CKSUM_BAD; 131 break; 132 default: 133 RTE_BUILD_BUG_ON(PKT_RX_IP_CKSUM_UNKNOWN != 0); 134 SFC_ASSERT((mbuf_flags & PKT_RX_IP_CKSUM_MASK) == 135 PKT_RX_IP_CKSUM_UNKNOWN); 136 break; 137 } 138 139 switch ((desc_flags & 140 (EFX_PKT_TCP | EFX_PKT_UDP | EFX_CKSUM_TCPUDP))) { 141 case (EFX_PKT_TCP | EFX_CKSUM_TCPUDP): 142 case (EFX_PKT_UDP | EFX_CKSUM_TCPUDP): 143 mbuf_flags |= PKT_RX_L4_CKSUM_GOOD; 144 break; 145 case EFX_PKT_TCP: 146 case EFX_PKT_UDP: 147 mbuf_flags |= PKT_RX_L4_CKSUM_BAD; 148 break; 149 default: 150 RTE_BUILD_BUG_ON(PKT_RX_L4_CKSUM_UNKNOWN != 0); 151 SFC_ASSERT((mbuf_flags & PKT_RX_L4_CKSUM_MASK) == 152 PKT_RX_L4_CKSUM_UNKNOWN); 153 break; 154 } 155 156 return mbuf_flags; 157 } 158 159 static uint32_t 160 sfc_efx_rx_desc_flags_to_packet_type(const unsigned int desc_flags) 161 { 162 return RTE_PTYPE_L2_ETHER | 163 ((desc_flags & EFX_PKT_IPV4) ? 164 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 0) | 165 ((desc_flags & EFX_PKT_IPV6) ? 166 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 0) | 167 ((desc_flags & EFX_PKT_TCP) ? RTE_PTYPE_L4_TCP : 0) | 168 ((desc_flags & EFX_PKT_UDP) ? RTE_PTYPE_L4_UDP : 0); 169 } 170 171 static const uint32_t * 172 sfc_efx_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps) 173 { 174 static const uint32_t ptypes[] = { 175 RTE_PTYPE_L2_ETHER, 176 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 177 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 178 RTE_PTYPE_L4_TCP, 179 RTE_PTYPE_L4_UDP, 180 RTE_PTYPE_UNKNOWN 181 }; 182 183 return ptypes; 184 } 185 186 static void 187 sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags, 188 struct rte_mbuf *m) 189 { 190 uint8_t *mbuf_data; 191 192 193 if ((rxq->flags & SFC_EFX_RXQ_FLAG_RSS_HASH) == 0) 194 return; 195 196 mbuf_data = rte_pktmbuf_mtod(m, uint8_t *); 197 198 if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) { 199 m->hash.rss = efx_pseudo_hdr_hash_get(rxq->common, 200 EFX_RX_HASHALG_TOEPLITZ, 201 mbuf_data); 202 203 m->ol_flags |= PKT_RX_RSS_HASH; 204 } 205 } 206 207 static uint16_t 208 sfc_efx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 209 { 210 struct sfc_dp_rxq *dp_rxq = rx_queue; 211 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 212 unsigned int completed; 213 unsigned int prefix_size = rxq->prefix_size; 214 unsigned int done_pkts = 0; 215 boolean_t discard_next = B_FALSE; 216 struct rte_mbuf *scatter_pkt = NULL; 217 218 if (unlikely((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0)) 219 return 0; 220 221 sfc_ev_qpoll(rxq->evq); 222 223 completed = rxq->completed; 224 while (completed != rxq->pending && done_pkts < nb_pkts) { 225 unsigned int id; 226 struct sfc_efx_rx_sw_desc *rxd; 227 struct rte_mbuf *m; 228 unsigned int seg_len; 229 unsigned int desc_flags; 230 231 id = completed++ & rxq->ptr_mask; 232 rxd = &rxq->sw_desc[id]; 233 m = rxd->mbuf; 234 desc_flags = rxd->flags; 235 236 if (discard_next) 237 goto discard; 238 239 if (desc_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 240 goto discard; 241 242 if (desc_flags & EFX_PKT_PREFIX_LEN) { 243 uint16_t tmp_size; 244 int rc __rte_unused; 245 246 rc = efx_pseudo_hdr_pkt_length_get(rxq->common, 247 rte_pktmbuf_mtod(m, uint8_t *), &tmp_size); 248 SFC_ASSERT(rc == 0); 249 seg_len = tmp_size; 250 } else { 251 seg_len = rxd->size - prefix_size; 252 } 253 254 rte_pktmbuf_data_len(m) = seg_len; 255 rte_pktmbuf_pkt_len(m) = seg_len; 256 257 if (scatter_pkt != NULL) { 258 if (rte_pktmbuf_chain(scatter_pkt, m) != 0) { 259 rte_pktmbuf_free(scatter_pkt); 260 goto discard; 261 } 262 /* The packet to deliver */ 263 m = scatter_pkt; 264 } 265 266 if (desc_flags & EFX_PKT_CONT) { 267 /* The packet is scattered, more fragments to come */ 268 scatter_pkt = m; 269 /* Further fragments have no prefix */ 270 prefix_size = 0; 271 continue; 272 } 273 274 /* Scattered packet is done */ 275 scatter_pkt = NULL; 276 /* The first fragment of the packet has prefix */ 277 prefix_size = rxq->prefix_size; 278 279 m->ol_flags = 280 sfc_efx_rx_desc_flags_to_offload_flags(desc_flags); 281 m->packet_type = 282 sfc_efx_rx_desc_flags_to_packet_type(desc_flags); 283 284 /* 285 * Extract RSS hash from the packet prefix and 286 * set the corresponding field (if needed and possible) 287 */ 288 sfc_efx_rx_set_rss_hash(rxq, desc_flags, m); 289 290 m->data_off += prefix_size; 291 292 *rx_pkts++ = m; 293 done_pkts++; 294 continue; 295 296 discard: 297 discard_next = ((desc_flags & EFX_PKT_CONT) != 0); 298 rte_mbuf_raw_free(m); 299 rxd->mbuf = NULL; 300 } 301 302 /* pending is only moved when entire packet is received */ 303 SFC_ASSERT(scatter_pkt == NULL); 304 305 rxq->completed = completed; 306 307 sfc_efx_rx_qrefill(rxq); 308 309 return done_pkts; 310 } 311 312 static sfc_dp_rx_qdesc_npending_t sfc_efx_rx_qdesc_npending; 313 static unsigned int 314 sfc_efx_rx_qdesc_npending(struct sfc_dp_rxq *dp_rxq) 315 { 316 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 317 318 if ((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0) 319 return 0; 320 321 sfc_ev_qpoll(rxq->evq); 322 323 return rxq->pending - rxq->completed; 324 } 325 326 static sfc_dp_rx_qdesc_status_t sfc_efx_rx_qdesc_status; 327 static int 328 sfc_efx_rx_qdesc_status(struct sfc_dp_rxq *dp_rxq, uint16_t offset) 329 { 330 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 331 332 if (unlikely(offset > rxq->ptr_mask)) 333 return -EINVAL; 334 335 /* 336 * Poll EvQ to derive up-to-date 'rxq->pending' figure; 337 * it is required for the queue to be running, but the 338 * check is omitted because API design assumes that it 339 * is the duty of the caller to satisfy all conditions 340 */ 341 SFC_ASSERT((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 342 SFC_EFX_RXQ_FLAG_RUNNING); 343 sfc_ev_qpoll(rxq->evq); 344 345 /* 346 * There is a handful of reserved entries in the ring, 347 * but an explicit check whether the offset points to 348 * a reserved entry is neglected since the two checks 349 * below rely on the figures which take the HW limits 350 * into account and thus if an entry is reserved, the 351 * checks will fail and UNAVAIL code will be returned 352 */ 353 354 if (offset < (rxq->pending - rxq->completed)) 355 return RTE_ETH_RX_DESC_DONE; 356 357 if (offset < (rxq->added - rxq->completed)) 358 return RTE_ETH_RX_DESC_AVAIL; 359 360 return RTE_ETH_RX_DESC_UNAVAIL; 361 } 362 363 /** Get Rx datapath ops by the datapath RxQ handle */ 364 const struct sfc_dp_rx * 365 sfc_dp_rx_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq) 366 { 367 const struct sfc_dp_queue *dpq = &dp_rxq->dpq; 368 struct rte_eth_dev *eth_dev; 369 struct sfc_adapter_priv *sap; 370 371 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 372 eth_dev = &rte_eth_devices[dpq->port_id]; 373 374 sap = sfc_adapter_priv_by_eth_dev(eth_dev); 375 376 return sap->dp_rx; 377 } 378 379 struct sfc_rxq_info * 380 sfc_rxq_info_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq) 381 { 382 const struct sfc_dp_queue *dpq = &dp_rxq->dpq; 383 struct rte_eth_dev *eth_dev; 384 struct sfc_adapter_shared *sas; 385 386 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 387 eth_dev = &rte_eth_devices[dpq->port_id]; 388 389 sas = sfc_adapter_shared_by_eth_dev(eth_dev); 390 391 SFC_ASSERT(dpq->queue_id < sas->rxq_count); 392 return &sas->rxq_info[dpq->queue_id]; 393 } 394 395 struct sfc_rxq * 396 sfc_rxq_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq) 397 { 398 const struct sfc_dp_queue *dpq = &dp_rxq->dpq; 399 struct rte_eth_dev *eth_dev; 400 struct sfc_adapter *sa; 401 402 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 403 eth_dev = &rte_eth_devices[dpq->port_id]; 404 405 sa = sfc_adapter_by_eth_dev(eth_dev); 406 407 SFC_ASSERT(dpq->queue_id < sfc_sa2shared(sa)->rxq_count); 408 return &sa->rxq_ctrl[dpq->queue_id]; 409 } 410 411 static sfc_dp_rx_qsize_up_rings_t sfc_efx_rx_qsize_up_rings; 412 static int 413 sfc_efx_rx_qsize_up_rings(uint16_t nb_rx_desc, 414 __rte_unused struct sfc_dp_rx_hw_limits *limits, 415 __rte_unused struct rte_mempool *mb_pool, 416 unsigned int *rxq_entries, 417 unsigned int *evq_entries, 418 unsigned int *rxq_max_fill_level) 419 { 420 *rxq_entries = nb_rx_desc; 421 *evq_entries = nb_rx_desc; 422 *rxq_max_fill_level = EFX_RXQ_LIMIT(*rxq_entries); 423 return 0; 424 } 425 426 static sfc_dp_rx_qcreate_t sfc_efx_rx_qcreate; 427 static int 428 sfc_efx_rx_qcreate(uint16_t port_id, uint16_t queue_id, 429 const struct rte_pci_addr *pci_addr, int socket_id, 430 const struct sfc_dp_rx_qcreate_info *info, 431 struct sfc_dp_rxq **dp_rxqp) 432 { 433 struct sfc_efx_rxq *rxq; 434 int rc; 435 436 rc = ENOMEM; 437 rxq = rte_zmalloc_socket("sfc-efx-rxq", sizeof(*rxq), 438 RTE_CACHE_LINE_SIZE, socket_id); 439 if (rxq == NULL) 440 goto fail_rxq_alloc; 441 442 sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr); 443 444 rc = ENOMEM; 445 rxq->sw_desc = rte_calloc_socket("sfc-efx-rxq-sw_desc", 446 info->rxq_entries, 447 sizeof(*rxq->sw_desc), 448 RTE_CACHE_LINE_SIZE, socket_id); 449 if (rxq->sw_desc == NULL) 450 goto fail_desc_alloc; 451 452 /* efx datapath is bound to efx control path */ 453 rxq->evq = sfc_rxq_by_dp_rxq(&rxq->dp)->evq; 454 if (info->flags & SFC_RXQ_FLAG_RSS_HASH) 455 rxq->flags |= SFC_EFX_RXQ_FLAG_RSS_HASH; 456 rxq->ptr_mask = info->rxq_entries - 1; 457 rxq->batch_max = info->batch_max; 458 rxq->prefix_size = info->prefix_size; 459 rxq->max_fill_level = info->max_fill_level; 460 rxq->refill_threshold = info->refill_threshold; 461 rxq->buf_size = info->buf_size; 462 rxq->refill_mb_pool = info->refill_mb_pool; 463 464 *dp_rxqp = &rxq->dp; 465 return 0; 466 467 fail_desc_alloc: 468 rte_free(rxq); 469 470 fail_rxq_alloc: 471 return rc; 472 } 473 474 static sfc_dp_rx_qdestroy_t sfc_efx_rx_qdestroy; 475 static void 476 sfc_efx_rx_qdestroy(struct sfc_dp_rxq *dp_rxq) 477 { 478 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 479 480 rte_free(rxq->sw_desc); 481 rte_free(rxq); 482 } 483 484 static sfc_dp_rx_qstart_t sfc_efx_rx_qstart; 485 static int 486 sfc_efx_rx_qstart(struct sfc_dp_rxq *dp_rxq, 487 __rte_unused unsigned int evq_read_ptr) 488 { 489 /* libefx-based datapath is specific to libefx-based PMD */ 490 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 491 struct sfc_rxq *crxq = sfc_rxq_by_dp_rxq(dp_rxq); 492 493 rxq->common = crxq->common; 494 495 rxq->pending = rxq->completed = rxq->added = rxq->pushed = 0; 496 497 sfc_efx_rx_qrefill(rxq); 498 499 rxq->flags |= (SFC_EFX_RXQ_FLAG_STARTED | SFC_EFX_RXQ_FLAG_RUNNING); 500 501 return 0; 502 } 503 504 static sfc_dp_rx_qstop_t sfc_efx_rx_qstop; 505 static void 506 sfc_efx_rx_qstop(struct sfc_dp_rxq *dp_rxq, 507 __rte_unused unsigned int *evq_read_ptr) 508 { 509 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 510 511 rxq->flags &= ~SFC_EFX_RXQ_FLAG_RUNNING; 512 513 /* libefx-based datapath is bound to libefx-based PMD and uses 514 * event queue structure directly. So, there is no necessity to 515 * return EvQ read pointer. 516 */ 517 } 518 519 static sfc_dp_rx_qpurge_t sfc_efx_rx_qpurge; 520 static void 521 sfc_efx_rx_qpurge(struct sfc_dp_rxq *dp_rxq) 522 { 523 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 524 unsigned int i; 525 struct sfc_efx_rx_sw_desc *rxd; 526 527 for (i = rxq->completed; i != rxq->added; ++i) { 528 rxd = &rxq->sw_desc[i & rxq->ptr_mask]; 529 rte_mbuf_raw_free(rxd->mbuf); 530 rxd->mbuf = NULL; 531 /* Packed stream relies on 0 in inactive SW desc. 532 * Rx queue stop is not performance critical, so 533 * there is no harm to do it always. 534 */ 535 rxd->flags = 0; 536 rxd->size = 0; 537 } 538 539 rxq->flags &= ~SFC_EFX_RXQ_FLAG_STARTED; 540 } 541 542 struct sfc_dp_rx sfc_efx_rx = { 543 .dp = { 544 .name = SFC_KVARG_DATAPATH_EFX, 545 .type = SFC_DP_RX, 546 .hw_fw_caps = 0, 547 }, 548 .features = SFC_DP_RX_FEAT_SCATTER | 549 SFC_DP_RX_FEAT_CHECKSUM, 550 .qsize_up_rings = sfc_efx_rx_qsize_up_rings, 551 .qcreate = sfc_efx_rx_qcreate, 552 .qdestroy = sfc_efx_rx_qdestroy, 553 .qstart = sfc_efx_rx_qstart, 554 .qstop = sfc_efx_rx_qstop, 555 .qpurge = sfc_efx_rx_qpurge, 556 .supported_ptypes_get = sfc_efx_supported_ptypes_get, 557 .qdesc_npending = sfc_efx_rx_qdesc_npending, 558 .qdesc_status = sfc_efx_rx_qdesc_status, 559 .pkt_burst = sfc_efx_recv_pkts, 560 }; 561 562 static void 563 sfc_rx_qflush(struct sfc_adapter *sa, unsigned int sw_index) 564 { 565 struct sfc_rxq_info *rxq_info; 566 struct sfc_rxq *rxq; 567 unsigned int retry_count; 568 unsigned int wait_count; 569 int rc; 570 571 rxq_info = &sfc_sa2shared(sa)->rxq_info[sw_index]; 572 SFC_ASSERT(rxq_info->state & SFC_RXQ_STARTED); 573 574 rxq = &sa->rxq_ctrl[sw_index]; 575 576 /* 577 * Retry Rx queue flushing in the case of flush failed or 578 * timeout. In the worst case it can delay for 6 seconds. 579 */ 580 for (retry_count = 0; 581 ((rxq_info->state & SFC_RXQ_FLUSHED) == 0) && 582 (retry_count < SFC_RX_QFLUSH_ATTEMPTS); 583 ++retry_count) { 584 rc = efx_rx_qflush(rxq->common); 585 if (rc != 0) { 586 rxq_info->state |= (rc == EALREADY) ? 587 SFC_RXQ_FLUSHED : SFC_RXQ_FLUSH_FAILED; 588 break; 589 } 590 rxq_info->state &= ~SFC_RXQ_FLUSH_FAILED; 591 rxq_info->state |= SFC_RXQ_FLUSHING; 592 593 /* 594 * Wait for Rx queue flush done or failed event at least 595 * SFC_RX_QFLUSH_POLL_WAIT_MS milliseconds and not more 596 * than 2 seconds (SFC_RX_QFLUSH_POLL_WAIT_MS multiplied 597 * by SFC_RX_QFLUSH_POLL_ATTEMPTS). 598 */ 599 wait_count = 0; 600 do { 601 rte_delay_ms(SFC_RX_QFLUSH_POLL_WAIT_MS); 602 sfc_ev_qpoll(rxq->evq); 603 } while ((rxq_info->state & SFC_RXQ_FLUSHING) && 604 (wait_count++ < SFC_RX_QFLUSH_POLL_ATTEMPTS)); 605 606 if (rxq_info->state & SFC_RXQ_FLUSHING) 607 sfc_err(sa, "RxQ %u flush timed out", sw_index); 608 609 if (rxq_info->state & SFC_RXQ_FLUSH_FAILED) 610 sfc_err(sa, "RxQ %u flush failed", sw_index); 611 612 if (rxq_info->state & SFC_RXQ_FLUSHED) 613 sfc_notice(sa, "RxQ %u flushed", sw_index); 614 } 615 616 sa->priv.dp_rx->qpurge(rxq_info->dp); 617 } 618 619 static int 620 sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq) 621 { 622 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 623 boolean_t need_rss = (rss->channels > 0) ? B_TRUE : B_FALSE; 624 struct sfc_port *port = &sa->port; 625 int rc; 626 627 /* 628 * If promiscuous or all-multicast mode has been requested, setting 629 * filter for the default Rx queue might fail, in particular, while 630 * running over PCI function which is not a member of corresponding 631 * privilege groups; if this occurs, few iterations will be made to 632 * repeat this step without promiscuous and all-multicast flags set 633 */ 634 retry: 635 rc = efx_mac_filter_default_rxq_set(sa->nic, rxq->common, need_rss); 636 if (rc == 0) 637 return 0; 638 else if (rc != EOPNOTSUPP) 639 return rc; 640 641 if (port->promisc) { 642 sfc_warn(sa, "promiscuous mode has been requested, " 643 "but the HW rejects it"); 644 sfc_warn(sa, "promiscuous mode will be disabled"); 645 646 port->promisc = B_FALSE; 647 rc = sfc_set_rx_mode(sa); 648 if (rc != 0) 649 return rc; 650 651 goto retry; 652 } 653 654 if (port->allmulti) { 655 sfc_warn(sa, "all-multicast mode has been requested, " 656 "but the HW rejects it"); 657 sfc_warn(sa, "all-multicast mode will be disabled"); 658 659 port->allmulti = B_FALSE; 660 rc = sfc_set_rx_mode(sa); 661 if (rc != 0) 662 return rc; 663 664 goto retry; 665 } 666 667 return rc; 668 } 669 670 int 671 sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 672 { 673 struct sfc_rxq_info *rxq_info; 674 struct sfc_rxq *rxq; 675 struct sfc_evq *evq; 676 int rc; 677 678 sfc_log_init(sa, "sw_index=%u", sw_index); 679 680 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->rxq_count); 681 682 rxq_info = &sfc_sa2shared(sa)->rxq_info[sw_index]; 683 SFC_ASSERT(rxq_info->state == SFC_RXQ_INITIALIZED); 684 685 rxq = &sa->rxq_ctrl[sw_index]; 686 evq = rxq->evq; 687 688 rc = sfc_ev_qstart(evq, sfc_evq_index_by_rxq_sw_index(sa, sw_index)); 689 if (rc != 0) 690 goto fail_ev_qstart; 691 692 switch (rxq_info->type) { 693 case EFX_RXQ_TYPE_DEFAULT: 694 rc = efx_rx_qcreate(sa->nic, rxq->hw_index, 0, rxq_info->type, 695 rxq->buf_size, 696 &rxq->mem, rxq_info->entries, 0 /* not used on EF10 */, 697 rxq_info->type_flags, evq->common, &rxq->common); 698 break; 699 case EFX_RXQ_TYPE_ES_SUPER_BUFFER: { 700 struct rte_mempool *mp = rxq_info->refill_mb_pool; 701 struct rte_mempool_info mp_info; 702 703 rc = rte_mempool_ops_get_info(mp, &mp_info); 704 if (rc != 0) { 705 /* Positive errno is used in the driver */ 706 rc = -rc; 707 goto fail_mp_get_info; 708 } 709 if (mp_info.contig_block_size <= 0) { 710 rc = EINVAL; 711 goto fail_bad_contig_block_size; 712 } 713 rc = efx_rx_qcreate_es_super_buffer(sa->nic, rxq->hw_index, 0, 714 mp_info.contig_block_size, rxq->buf_size, 715 mp->header_size + mp->elt_size + mp->trailer_size, 716 sa->rxd_wait_timeout_ns, 717 &rxq->mem, rxq_info->entries, rxq_info->type_flags, 718 evq->common, &rxq->common); 719 break; 720 } 721 default: 722 rc = ENOTSUP; 723 } 724 if (rc != 0) 725 goto fail_rx_qcreate; 726 727 efx_rx_qenable(rxq->common); 728 729 rc = sa->priv.dp_rx->qstart(rxq_info->dp, evq->read_ptr); 730 if (rc != 0) 731 goto fail_dp_qstart; 732 733 rxq_info->state |= SFC_RXQ_STARTED; 734 735 if (sw_index == 0 && !sfc_sa2shared(sa)->isolated) { 736 rc = sfc_rx_default_rxq_set_filter(sa, rxq); 737 if (rc != 0) 738 goto fail_mac_filter_default_rxq_set; 739 } 740 741 /* It seems to be used by DPDK for debug purposes only ('rte_ether') */ 742 sa->eth_dev->data->rx_queue_state[sw_index] = 743 RTE_ETH_QUEUE_STATE_STARTED; 744 745 return 0; 746 747 fail_mac_filter_default_rxq_set: 748 sa->priv.dp_rx->qstop(rxq_info->dp, &rxq->evq->read_ptr); 749 750 fail_dp_qstart: 751 sfc_rx_qflush(sa, sw_index); 752 753 fail_rx_qcreate: 754 fail_bad_contig_block_size: 755 fail_mp_get_info: 756 sfc_ev_qstop(evq); 757 758 fail_ev_qstart: 759 return rc; 760 } 761 762 void 763 sfc_rx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 764 { 765 struct sfc_rxq_info *rxq_info; 766 struct sfc_rxq *rxq; 767 768 sfc_log_init(sa, "sw_index=%u", sw_index); 769 770 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->rxq_count); 771 772 rxq_info = &sfc_sa2shared(sa)->rxq_info[sw_index]; 773 774 if (rxq_info->state == SFC_RXQ_INITIALIZED) 775 return; 776 SFC_ASSERT(rxq_info->state & SFC_RXQ_STARTED); 777 778 /* It seems to be used by DPDK for debug purposes only ('rte_ether') */ 779 sa->eth_dev->data->rx_queue_state[sw_index] = 780 RTE_ETH_QUEUE_STATE_STOPPED; 781 782 rxq = &sa->rxq_ctrl[sw_index]; 783 sa->priv.dp_rx->qstop(rxq_info->dp, &rxq->evq->read_ptr); 784 785 if (sw_index == 0) 786 efx_mac_filter_default_rxq_clear(sa->nic); 787 788 sfc_rx_qflush(sa, sw_index); 789 790 rxq_info->state = SFC_RXQ_INITIALIZED; 791 792 efx_rx_qdestroy(rxq->common); 793 794 sfc_ev_qstop(rxq->evq); 795 } 796 797 uint64_t 798 sfc_rx_get_dev_offload_caps(struct sfc_adapter *sa) 799 { 800 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 801 uint64_t caps = 0; 802 803 caps |= DEV_RX_OFFLOAD_JUMBO_FRAME; 804 805 if (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_CHECKSUM) { 806 caps |= DEV_RX_OFFLOAD_IPV4_CKSUM; 807 caps |= DEV_RX_OFFLOAD_UDP_CKSUM; 808 caps |= DEV_RX_OFFLOAD_TCP_CKSUM; 809 } 810 811 if (encp->enc_tunnel_encapsulations_supported && 812 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_TUNNELS)) 813 caps |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM; 814 815 return caps; 816 } 817 818 uint64_t 819 sfc_rx_get_queue_offload_caps(struct sfc_adapter *sa) 820 { 821 uint64_t caps = 0; 822 823 if (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_SCATTER) 824 caps |= DEV_RX_OFFLOAD_SCATTER; 825 826 return caps; 827 } 828 829 static int 830 sfc_rx_qcheck_conf(struct sfc_adapter *sa, unsigned int rxq_max_fill_level, 831 const struct rte_eth_rxconf *rx_conf, 832 __rte_unused uint64_t offloads) 833 { 834 int rc = 0; 835 836 if (rx_conf->rx_thresh.pthresh != 0 || 837 rx_conf->rx_thresh.hthresh != 0 || 838 rx_conf->rx_thresh.wthresh != 0) { 839 sfc_warn(sa, 840 "RxQ prefetch/host/writeback thresholds are not supported"); 841 } 842 843 if (rx_conf->rx_free_thresh > rxq_max_fill_level) { 844 sfc_err(sa, 845 "RxQ free threshold too large: %u vs maximum %u", 846 rx_conf->rx_free_thresh, rxq_max_fill_level); 847 rc = EINVAL; 848 } 849 850 if (rx_conf->rx_drop_en == 0) { 851 sfc_err(sa, "RxQ drop disable is not supported"); 852 rc = EINVAL; 853 } 854 855 return rc; 856 } 857 858 static unsigned int 859 sfc_rx_mbuf_data_alignment(struct rte_mempool *mb_pool) 860 { 861 uint32_t data_off; 862 uint32_t order; 863 864 /* The mbuf object itself is always cache line aligned */ 865 order = rte_bsf32(RTE_CACHE_LINE_SIZE); 866 867 /* Data offset from mbuf object start */ 868 data_off = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mb_pool) + 869 RTE_PKTMBUF_HEADROOM; 870 871 order = MIN(order, rte_bsf32(data_off)); 872 873 return 1u << order; 874 } 875 876 static uint16_t 877 sfc_rx_mb_pool_buf_size(struct sfc_adapter *sa, struct rte_mempool *mb_pool) 878 { 879 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 880 const uint32_t nic_align_start = MAX(1, encp->enc_rx_buf_align_start); 881 const uint32_t nic_align_end = MAX(1, encp->enc_rx_buf_align_end); 882 uint16_t buf_size; 883 unsigned int buf_aligned; 884 unsigned int start_alignment; 885 unsigned int end_padding_alignment; 886 887 /* Below it is assumed that both alignments are power of 2 */ 888 SFC_ASSERT(rte_is_power_of_2(nic_align_start)); 889 SFC_ASSERT(rte_is_power_of_2(nic_align_end)); 890 891 /* 892 * mbuf is always cache line aligned, double-check 893 * that it meets rx buffer start alignment requirements. 894 */ 895 896 /* Start from mbuf pool data room size */ 897 buf_size = rte_pktmbuf_data_room_size(mb_pool); 898 899 /* Remove headroom */ 900 if (buf_size <= RTE_PKTMBUF_HEADROOM) { 901 sfc_err(sa, 902 "RxQ mbuf pool %s object data room size %u is smaller than headroom %u", 903 mb_pool->name, buf_size, RTE_PKTMBUF_HEADROOM); 904 return 0; 905 } 906 buf_size -= RTE_PKTMBUF_HEADROOM; 907 908 /* Calculate guaranteed data start alignment */ 909 buf_aligned = sfc_rx_mbuf_data_alignment(mb_pool); 910 911 /* Reserve space for start alignment */ 912 if (buf_aligned < nic_align_start) { 913 start_alignment = nic_align_start - buf_aligned; 914 if (buf_size <= start_alignment) { 915 sfc_err(sa, 916 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u and buffer start alignment %u required by NIC", 917 mb_pool->name, 918 rte_pktmbuf_data_room_size(mb_pool), 919 RTE_PKTMBUF_HEADROOM, start_alignment); 920 return 0; 921 } 922 buf_aligned = nic_align_start; 923 buf_size -= start_alignment; 924 } else { 925 start_alignment = 0; 926 } 927 928 /* Make sure that end padding does not write beyond the buffer */ 929 if (buf_aligned < nic_align_end) { 930 /* 931 * Estimate space which can be lost. If guarnteed buffer 932 * size is odd, lost space is (nic_align_end - 1). More 933 * accurate formula is below. 934 */ 935 end_padding_alignment = nic_align_end - 936 MIN(buf_aligned, 1u << (rte_bsf32(buf_size) - 1)); 937 if (buf_size <= end_padding_alignment) { 938 sfc_err(sa, 939 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u, buffer start alignment %u and end padding alignment %u required by NIC", 940 mb_pool->name, 941 rte_pktmbuf_data_room_size(mb_pool), 942 RTE_PKTMBUF_HEADROOM, start_alignment, 943 end_padding_alignment); 944 return 0; 945 } 946 buf_size -= end_padding_alignment; 947 } else { 948 /* 949 * Start is aligned the same or better than end, 950 * just align length. 951 */ 952 buf_size = P2ALIGN(buf_size, nic_align_end); 953 } 954 955 return buf_size; 956 } 957 958 int 959 sfc_rx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 960 uint16_t nb_rx_desc, unsigned int socket_id, 961 const struct rte_eth_rxconf *rx_conf, 962 struct rte_mempool *mb_pool) 963 { 964 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 965 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 966 int rc; 967 unsigned int rxq_entries; 968 unsigned int evq_entries; 969 unsigned int rxq_max_fill_level; 970 uint64_t offloads; 971 uint16_t buf_size; 972 struct sfc_rxq_info *rxq_info; 973 struct sfc_evq *evq; 974 struct sfc_rxq *rxq; 975 struct sfc_dp_rx_qcreate_info info; 976 struct sfc_dp_rx_hw_limits hw_limits; 977 uint16_t rx_free_thresh; 978 979 memset(&hw_limits, 0, sizeof(hw_limits)); 980 hw_limits.rxq_max_entries = sa->rxq_max_entries; 981 hw_limits.rxq_min_entries = sa->rxq_min_entries; 982 hw_limits.evq_max_entries = sa->evq_max_entries; 983 hw_limits.evq_min_entries = sa->evq_min_entries; 984 985 rc = sa->priv.dp_rx->qsize_up_rings(nb_rx_desc, &hw_limits, mb_pool, 986 &rxq_entries, &evq_entries, 987 &rxq_max_fill_level); 988 if (rc != 0) 989 goto fail_size_up_rings; 990 SFC_ASSERT(rxq_entries >= sa->rxq_min_entries); 991 SFC_ASSERT(rxq_entries <= sa->rxq_max_entries); 992 SFC_ASSERT(rxq_max_fill_level <= nb_rx_desc); 993 994 offloads = rx_conf->offloads | 995 sa->eth_dev->data->dev_conf.rxmode.offloads; 996 rc = sfc_rx_qcheck_conf(sa, rxq_max_fill_level, rx_conf, offloads); 997 if (rc != 0) 998 goto fail_bad_conf; 999 1000 buf_size = sfc_rx_mb_pool_buf_size(sa, mb_pool); 1001 if (buf_size == 0) { 1002 sfc_err(sa, "RxQ %u mbuf pool object size is too small", 1003 sw_index); 1004 rc = EINVAL; 1005 goto fail_bad_conf; 1006 } 1007 1008 if ((buf_size < sa->port.pdu + encp->enc_rx_prefix_size) && 1009 (~offloads & DEV_RX_OFFLOAD_SCATTER)) { 1010 sfc_err(sa, "Rx scatter is disabled and RxQ %u mbuf pool " 1011 "object size is too small", sw_index); 1012 sfc_err(sa, "RxQ %u calculated Rx buffer size is %u vs " 1013 "PDU size %u plus Rx prefix %u bytes", 1014 sw_index, buf_size, (unsigned int)sa->port.pdu, 1015 encp->enc_rx_prefix_size); 1016 rc = EINVAL; 1017 goto fail_bad_conf; 1018 } 1019 1020 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->rxq_count); 1021 rxq_info = &sfc_sa2shared(sa)->rxq_info[sw_index]; 1022 1023 SFC_ASSERT(rxq_entries <= rxq_info->max_entries); 1024 rxq_info->entries = rxq_entries; 1025 1026 if (sa->priv.dp_rx->dp.hw_fw_caps & SFC_DP_HW_FW_CAP_RX_ES_SUPER_BUFFER) 1027 rxq_info->type = EFX_RXQ_TYPE_ES_SUPER_BUFFER; 1028 else 1029 rxq_info->type = EFX_RXQ_TYPE_DEFAULT; 1030 1031 rxq_info->type_flags = 1032 (offloads & DEV_RX_OFFLOAD_SCATTER) ? 1033 EFX_RXQ_FLAG_SCATTER : EFX_RXQ_FLAG_NONE; 1034 1035 if ((encp->enc_tunnel_encapsulations_supported != 0) && 1036 (sa->priv.dp_rx->features & SFC_DP_RX_FEAT_TUNNELS)) 1037 rxq_info->type_flags |= EFX_RXQ_FLAG_INNER_CLASSES; 1038 1039 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_RX, sw_index, 1040 evq_entries, socket_id, &evq); 1041 if (rc != 0) 1042 goto fail_ev_qinit; 1043 1044 rxq = &sa->rxq_ctrl[sw_index]; 1045 rxq->evq = evq; 1046 rxq->hw_index = sw_index; 1047 /* 1048 * If Rx refill threshold is specified (its value is non zero) in 1049 * Rx configuration, use specified value. Otherwise use 1/8 of 1050 * the Rx descriptors number as the default. It allows to keep 1051 * Rx ring full-enough and does not refill too aggressive if 1052 * packet rate is high. 1053 * 1054 * Since PMD refills in bulks waiting for full bulk may be 1055 * refilled (basically round down), it is better to round up 1056 * here to mitigate it a bit. 1057 */ 1058 rx_free_thresh = (rx_conf->rx_free_thresh != 0) ? 1059 rx_conf->rx_free_thresh : EFX_DIV_ROUND_UP(nb_rx_desc, 8); 1060 /* Rx refill threshold cannot be smaller than refill bulk */ 1061 rxq_info->refill_threshold = 1062 RTE_MAX(rx_free_thresh, SFC_RX_REFILL_BULK); 1063 rxq_info->refill_mb_pool = mb_pool; 1064 rxq->buf_size = buf_size; 1065 1066 rc = sfc_dma_alloc(sa, "rxq", sw_index, 1067 efx_rxq_size(sa->nic, rxq_info->entries), 1068 socket_id, &rxq->mem); 1069 if (rc != 0) 1070 goto fail_dma_alloc; 1071 1072 memset(&info, 0, sizeof(info)); 1073 info.refill_mb_pool = rxq_info->refill_mb_pool; 1074 info.max_fill_level = rxq_max_fill_level; 1075 info.refill_threshold = rxq_info->refill_threshold; 1076 info.buf_size = buf_size; 1077 info.batch_max = encp->enc_rx_batch_max; 1078 info.prefix_size = encp->enc_rx_prefix_size; 1079 1080 if (rss->hash_support == EFX_RX_HASH_AVAILABLE && rss->channels > 0) 1081 info.flags |= SFC_RXQ_FLAG_RSS_HASH; 1082 1083 info.rxq_entries = rxq_info->entries; 1084 info.rxq_hw_ring = rxq->mem.esm_base; 1085 info.evq_entries = evq_entries; 1086 info.evq_hw_ring = evq->mem.esm_base; 1087 info.hw_index = rxq->hw_index; 1088 info.mem_bar = sa->mem_bar.esb_base; 1089 info.vi_window_shift = encp->enc_vi_window_shift; 1090 1091 rc = sa->priv.dp_rx->qcreate(sa->eth_dev->data->port_id, sw_index, 1092 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 1093 socket_id, &info, &rxq_info->dp); 1094 if (rc != 0) 1095 goto fail_dp_rx_qcreate; 1096 1097 evq->dp_rxq = rxq_info->dp; 1098 1099 rxq_info->state = SFC_RXQ_INITIALIZED; 1100 1101 rxq_info->deferred_start = (rx_conf->rx_deferred_start != 0); 1102 1103 return 0; 1104 1105 fail_dp_rx_qcreate: 1106 sfc_dma_free(sa, &rxq->mem); 1107 1108 fail_dma_alloc: 1109 sfc_ev_qfini(evq); 1110 1111 fail_ev_qinit: 1112 rxq_info->entries = 0; 1113 1114 fail_bad_conf: 1115 fail_size_up_rings: 1116 sfc_log_init(sa, "failed %d", rc); 1117 return rc; 1118 } 1119 1120 void 1121 sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 1122 { 1123 struct sfc_rxq_info *rxq_info; 1124 struct sfc_rxq *rxq; 1125 1126 SFC_ASSERT(sw_index < sfc_sa2shared(sa)->rxq_count); 1127 sa->eth_dev->data->rx_queues[sw_index] = NULL; 1128 1129 rxq_info = &sfc_sa2shared(sa)->rxq_info[sw_index]; 1130 1131 SFC_ASSERT(rxq_info->state == SFC_RXQ_INITIALIZED); 1132 1133 sa->priv.dp_rx->qdestroy(rxq_info->dp); 1134 rxq_info->dp = NULL; 1135 1136 rxq_info->state &= ~SFC_RXQ_INITIALIZED; 1137 rxq_info->entries = 0; 1138 1139 rxq = &sa->rxq_ctrl[sw_index]; 1140 1141 sfc_dma_free(sa, &rxq->mem); 1142 1143 sfc_ev_qfini(rxq->evq); 1144 rxq->evq = NULL; 1145 } 1146 1147 /* 1148 * Mapping between RTE RSS hash functions and their EFX counterparts. 1149 */ 1150 static const struct sfc_rss_hf_rte_to_efx sfc_rss_hf_map[] = { 1151 { ETH_RSS_NONFRAG_IPV4_TCP, 1152 EFX_RX_HASH(IPV4_TCP, 4TUPLE) }, 1153 { ETH_RSS_NONFRAG_IPV4_UDP, 1154 EFX_RX_HASH(IPV4_UDP, 4TUPLE) }, 1155 { ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX, 1156 EFX_RX_HASH(IPV6_TCP, 4TUPLE) }, 1157 { ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX, 1158 EFX_RX_HASH(IPV6_UDP, 4TUPLE) }, 1159 { ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | ETH_RSS_NONFRAG_IPV4_OTHER, 1160 EFX_RX_HASH(IPV4_TCP, 2TUPLE) | EFX_RX_HASH(IPV4_UDP, 2TUPLE) | 1161 EFX_RX_HASH(IPV4, 2TUPLE) }, 1162 { ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER | 1163 ETH_RSS_IPV6_EX, 1164 EFX_RX_HASH(IPV6_TCP, 2TUPLE) | EFX_RX_HASH(IPV6_UDP, 2TUPLE) | 1165 EFX_RX_HASH(IPV6, 2TUPLE) } 1166 }; 1167 1168 static efx_rx_hash_type_t 1169 sfc_rx_hash_types_mask_supp(efx_rx_hash_type_t hash_type, 1170 unsigned int *hash_type_flags_supported, 1171 unsigned int nb_hash_type_flags_supported) 1172 { 1173 efx_rx_hash_type_t hash_type_masked = 0; 1174 unsigned int i, j; 1175 1176 for (i = 0; i < nb_hash_type_flags_supported; ++i) { 1177 unsigned int class_tuple_lbn[] = { 1178 EFX_RX_CLASS_IPV4_TCP_LBN, 1179 EFX_RX_CLASS_IPV4_UDP_LBN, 1180 EFX_RX_CLASS_IPV4_LBN, 1181 EFX_RX_CLASS_IPV6_TCP_LBN, 1182 EFX_RX_CLASS_IPV6_UDP_LBN, 1183 EFX_RX_CLASS_IPV6_LBN 1184 }; 1185 1186 for (j = 0; j < RTE_DIM(class_tuple_lbn); ++j) { 1187 unsigned int tuple_mask = EFX_RX_CLASS_HASH_4TUPLE; 1188 unsigned int flag; 1189 1190 tuple_mask <<= class_tuple_lbn[j]; 1191 flag = hash_type & tuple_mask; 1192 1193 if (flag == hash_type_flags_supported[i]) 1194 hash_type_masked |= flag; 1195 } 1196 } 1197 1198 return hash_type_masked; 1199 } 1200 1201 int 1202 sfc_rx_hash_init(struct sfc_adapter *sa) 1203 { 1204 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 1205 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 1206 uint32_t alg_mask = encp->enc_rx_scale_hash_alg_mask; 1207 efx_rx_hash_alg_t alg; 1208 unsigned int flags_supp[EFX_RX_HASH_NFLAGS]; 1209 unsigned int nb_flags_supp; 1210 struct sfc_rss_hf_rte_to_efx *hf_map; 1211 struct sfc_rss_hf_rte_to_efx *entry; 1212 efx_rx_hash_type_t efx_hash_types; 1213 unsigned int i; 1214 int rc; 1215 1216 if (alg_mask & (1U << EFX_RX_HASHALG_TOEPLITZ)) 1217 alg = EFX_RX_HASHALG_TOEPLITZ; 1218 else if (alg_mask & (1U << EFX_RX_HASHALG_PACKED_STREAM)) 1219 alg = EFX_RX_HASHALG_PACKED_STREAM; 1220 else 1221 return EINVAL; 1222 1223 rc = efx_rx_scale_hash_flags_get(sa->nic, alg, flags_supp, 1224 RTE_DIM(flags_supp), &nb_flags_supp); 1225 if (rc != 0) 1226 return rc; 1227 1228 hf_map = rte_calloc_socket("sfc-rss-hf-map", 1229 RTE_DIM(sfc_rss_hf_map), 1230 sizeof(*hf_map), 0, sa->socket_id); 1231 if (hf_map == NULL) 1232 return ENOMEM; 1233 1234 entry = hf_map; 1235 efx_hash_types = 0; 1236 for (i = 0; i < RTE_DIM(sfc_rss_hf_map); ++i) { 1237 efx_rx_hash_type_t ht; 1238 1239 ht = sfc_rx_hash_types_mask_supp(sfc_rss_hf_map[i].efx, 1240 flags_supp, nb_flags_supp); 1241 if (ht != 0) { 1242 entry->rte = sfc_rss_hf_map[i].rte; 1243 entry->efx = ht; 1244 efx_hash_types |= ht; 1245 ++entry; 1246 } 1247 } 1248 1249 rss->hash_alg = alg; 1250 rss->hf_map_nb_entries = (unsigned int)(entry - hf_map); 1251 rss->hf_map = hf_map; 1252 rss->hash_types = efx_hash_types; 1253 1254 return 0; 1255 } 1256 1257 void 1258 sfc_rx_hash_fini(struct sfc_adapter *sa) 1259 { 1260 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 1261 1262 rte_free(rss->hf_map); 1263 } 1264 1265 int 1266 sfc_rx_hf_rte_to_efx(struct sfc_adapter *sa, uint64_t rte, 1267 efx_rx_hash_type_t *efx) 1268 { 1269 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 1270 efx_rx_hash_type_t hash_types = 0; 1271 unsigned int i; 1272 1273 for (i = 0; i < rss->hf_map_nb_entries; ++i) { 1274 uint64_t rte_mask = rss->hf_map[i].rte; 1275 1276 if ((rte & rte_mask) != 0) { 1277 rte &= ~rte_mask; 1278 hash_types |= rss->hf_map[i].efx; 1279 } 1280 } 1281 1282 if (rte != 0) { 1283 sfc_err(sa, "unsupported hash functions requested"); 1284 return EINVAL; 1285 } 1286 1287 *efx = hash_types; 1288 1289 return 0; 1290 } 1291 1292 uint64_t 1293 sfc_rx_hf_efx_to_rte(struct sfc_rss *rss, efx_rx_hash_type_t efx) 1294 { 1295 uint64_t rte = 0; 1296 unsigned int i; 1297 1298 for (i = 0; i < rss->hf_map_nb_entries; ++i) { 1299 efx_rx_hash_type_t hash_type = rss->hf_map[i].efx; 1300 1301 if ((efx & hash_type) == hash_type) 1302 rte |= rss->hf_map[i].rte; 1303 } 1304 1305 return rte; 1306 } 1307 1308 static int 1309 sfc_rx_process_adv_conf_rss(struct sfc_adapter *sa, 1310 struct rte_eth_rss_conf *conf) 1311 { 1312 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 1313 efx_rx_hash_type_t efx_hash_types = rss->hash_types; 1314 uint64_t rss_hf = sfc_rx_hf_efx_to_rte(rss, efx_hash_types); 1315 int rc; 1316 1317 if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE) { 1318 if ((conf->rss_hf != 0 && conf->rss_hf != rss_hf) || 1319 conf->rss_key != NULL) 1320 return EINVAL; 1321 } 1322 1323 if (conf->rss_hf != 0) { 1324 rc = sfc_rx_hf_rte_to_efx(sa, conf->rss_hf, &efx_hash_types); 1325 if (rc != 0) 1326 return rc; 1327 } 1328 1329 if (conf->rss_key != NULL) { 1330 if (conf->rss_key_len != sizeof(rss->key)) { 1331 sfc_err(sa, "RSS key size is wrong (should be %lu)", 1332 sizeof(rss->key)); 1333 return EINVAL; 1334 } 1335 rte_memcpy(rss->key, conf->rss_key, sizeof(rss->key)); 1336 } 1337 1338 rss->hash_types = efx_hash_types; 1339 1340 return 0; 1341 } 1342 1343 static int 1344 sfc_rx_rss_config(struct sfc_adapter *sa) 1345 { 1346 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 1347 int rc = 0; 1348 1349 if (rss->channels > 0) { 1350 rc = efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, 1351 rss->hash_alg, rss->hash_types, 1352 B_TRUE); 1353 if (rc != 0) 1354 goto finish; 1355 1356 rc = efx_rx_scale_key_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, 1357 rss->key, sizeof(rss->key)); 1358 if (rc != 0) 1359 goto finish; 1360 1361 rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, 1362 rss->tbl, RTE_DIM(rss->tbl)); 1363 } 1364 1365 finish: 1366 return rc; 1367 } 1368 1369 int 1370 sfc_rx_start(struct sfc_adapter *sa) 1371 { 1372 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 1373 unsigned int sw_index; 1374 int rc; 1375 1376 sfc_log_init(sa, "rxq_count=%u", sas->rxq_count); 1377 1378 rc = efx_rx_init(sa->nic); 1379 if (rc != 0) 1380 goto fail_rx_init; 1381 1382 rc = sfc_rx_rss_config(sa); 1383 if (rc != 0) 1384 goto fail_rss_config; 1385 1386 for (sw_index = 0; sw_index < sas->rxq_count; ++sw_index) { 1387 if (sas->rxq_info[sw_index].state == SFC_RXQ_INITIALIZED && 1388 (!sas->rxq_info[sw_index].deferred_start || 1389 sas->rxq_info[sw_index].deferred_started)) { 1390 rc = sfc_rx_qstart(sa, sw_index); 1391 if (rc != 0) 1392 goto fail_rx_qstart; 1393 } 1394 } 1395 1396 return 0; 1397 1398 fail_rx_qstart: 1399 while (sw_index-- > 0) 1400 sfc_rx_qstop(sa, sw_index); 1401 1402 fail_rss_config: 1403 efx_rx_fini(sa->nic); 1404 1405 fail_rx_init: 1406 sfc_log_init(sa, "failed %d", rc); 1407 return rc; 1408 } 1409 1410 void 1411 sfc_rx_stop(struct sfc_adapter *sa) 1412 { 1413 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 1414 unsigned int sw_index; 1415 1416 sfc_log_init(sa, "rxq_count=%u", sas->rxq_count); 1417 1418 sw_index = sas->rxq_count; 1419 while (sw_index-- > 0) { 1420 if (sas->rxq_info[sw_index].state & SFC_RXQ_STARTED) 1421 sfc_rx_qstop(sa, sw_index); 1422 } 1423 1424 efx_rx_fini(sa->nic); 1425 } 1426 1427 static int 1428 sfc_rx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 1429 { 1430 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 1431 struct sfc_rxq_info *rxq_info = &sas->rxq_info[sw_index]; 1432 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 1433 unsigned int max_entries; 1434 1435 max_entries = encp->enc_rxq_max_ndescs; 1436 SFC_ASSERT(rte_is_power_of_2(max_entries)); 1437 1438 rxq_info->max_entries = max_entries; 1439 1440 return 0; 1441 } 1442 1443 static int 1444 sfc_rx_check_mode(struct sfc_adapter *sa, struct rte_eth_rxmode *rxmode) 1445 { 1446 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 1447 uint64_t offloads_supported = sfc_rx_get_dev_offload_caps(sa) | 1448 sfc_rx_get_queue_offload_caps(sa); 1449 struct sfc_rss *rss = &sas->rss; 1450 int rc = 0; 1451 1452 switch (rxmode->mq_mode) { 1453 case ETH_MQ_RX_NONE: 1454 /* No special checks are required */ 1455 break; 1456 case ETH_MQ_RX_RSS: 1457 if (rss->context_type == EFX_RX_SCALE_UNAVAILABLE) { 1458 sfc_err(sa, "RSS is not available"); 1459 rc = EINVAL; 1460 } 1461 break; 1462 default: 1463 sfc_err(sa, "Rx multi-queue mode %u not supported", 1464 rxmode->mq_mode); 1465 rc = EINVAL; 1466 } 1467 1468 /* 1469 * Requested offloads are validated against supported by ethdev, 1470 * so unsupported offloads cannot be added as the result of 1471 * below check. 1472 */ 1473 if ((rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM) != 1474 (offloads_supported & DEV_RX_OFFLOAD_CHECKSUM)) { 1475 sfc_warn(sa, "Rx checksum offloads cannot be disabled - always on (IPv4/TCP/UDP)"); 1476 rxmode->offloads |= DEV_RX_OFFLOAD_CHECKSUM; 1477 } 1478 1479 if ((offloads_supported & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM) && 1480 (~rxmode->offloads & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)) { 1481 sfc_warn(sa, "Rx outer IPv4 checksum offload cannot be disabled - always on"); 1482 rxmode->offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM; 1483 } 1484 1485 return rc; 1486 } 1487 1488 /** 1489 * Destroy excess queues that are no longer needed after reconfiguration 1490 * or complete close. 1491 */ 1492 static void 1493 sfc_rx_fini_queues(struct sfc_adapter *sa, unsigned int nb_rx_queues) 1494 { 1495 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 1496 int sw_index; 1497 1498 SFC_ASSERT(nb_rx_queues <= sas->rxq_count); 1499 1500 sw_index = sas->rxq_count; 1501 while (--sw_index >= (int)nb_rx_queues) { 1502 if (sas->rxq_info[sw_index].state & SFC_RXQ_INITIALIZED) 1503 sfc_rx_qfini(sa, sw_index); 1504 } 1505 1506 sas->rxq_count = nb_rx_queues; 1507 } 1508 1509 /** 1510 * Initialize Rx subsystem. 1511 * 1512 * Called at device (re)configuration stage when number of receive queues is 1513 * specified together with other device level receive configuration. 1514 * 1515 * It should be used to allocate NUMA-unaware resources. 1516 */ 1517 int 1518 sfc_rx_configure(struct sfc_adapter *sa) 1519 { 1520 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 1521 struct sfc_rss *rss = &sas->rss; 1522 struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 1523 const unsigned int nb_rx_queues = sa->eth_dev->data->nb_rx_queues; 1524 int rc; 1525 1526 sfc_log_init(sa, "nb_rx_queues=%u (old %u)", 1527 nb_rx_queues, sas->rxq_count); 1528 1529 rc = sfc_rx_check_mode(sa, &dev_conf->rxmode); 1530 if (rc != 0) 1531 goto fail_check_mode; 1532 1533 if (nb_rx_queues == sas->rxq_count) 1534 goto configure_rss; 1535 1536 if (sas->rxq_info == NULL) { 1537 rc = ENOMEM; 1538 sas->rxq_info = rte_calloc_socket("sfc-rxqs", nb_rx_queues, 1539 sizeof(sas->rxq_info[0]), 0, 1540 sa->socket_id); 1541 if (sas->rxq_info == NULL) 1542 goto fail_rxqs_alloc; 1543 1544 /* 1545 * Allocate primary process only RxQ control from heap 1546 * since it should not be shared. 1547 */ 1548 rc = ENOMEM; 1549 sa->rxq_ctrl = calloc(nb_rx_queues, sizeof(sa->rxq_ctrl[0])); 1550 if (sa->rxq_ctrl == NULL) 1551 goto fail_rxqs_ctrl_alloc; 1552 } else { 1553 struct sfc_rxq_info *new_rxq_info; 1554 struct sfc_rxq *new_rxq_ctrl; 1555 1556 if (nb_rx_queues < sas->rxq_count) 1557 sfc_rx_fini_queues(sa, nb_rx_queues); 1558 1559 rc = ENOMEM; 1560 new_rxq_info = 1561 rte_realloc(sas->rxq_info, 1562 nb_rx_queues * sizeof(sas->rxq_info[0]), 0); 1563 if (new_rxq_info == NULL && nb_rx_queues > 0) 1564 goto fail_rxqs_realloc; 1565 1566 rc = ENOMEM; 1567 new_rxq_ctrl = realloc(sa->rxq_ctrl, 1568 nb_rx_queues * sizeof(sa->rxq_ctrl[0])); 1569 if (new_rxq_ctrl == NULL && nb_rx_queues > 0) 1570 goto fail_rxqs_ctrl_realloc; 1571 1572 sas->rxq_info = new_rxq_info; 1573 sa->rxq_ctrl = new_rxq_ctrl; 1574 if (nb_rx_queues > sas->rxq_count) { 1575 memset(&sas->rxq_info[sas->rxq_count], 0, 1576 (nb_rx_queues - sas->rxq_count) * 1577 sizeof(sas->rxq_info[0])); 1578 memset(&sa->rxq_ctrl[sas->rxq_count], 0, 1579 (nb_rx_queues - sas->rxq_count) * 1580 sizeof(sa->rxq_ctrl[0])); 1581 } 1582 } 1583 1584 while (sas->rxq_count < nb_rx_queues) { 1585 rc = sfc_rx_qinit_info(sa, sas->rxq_count); 1586 if (rc != 0) 1587 goto fail_rx_qinit_info; 1588 1589 sas->rxq_count++; 1590 } 1591 1592 configure_rss: 1593 rss->channels = (dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ? 1594 MIN(sas->rxq_count, EFX_MAXRSS) : 0; 1595 1596 if (rss->channels > 0) { 1597 struct rte_eth_rss_conf *adv_conf_rss; 1598 unsigned int sw_index; 1599 1600 for (sw_index = 0; sw_index < EFX_RSS_TBL_SIZE; ++sw_index) 1601 rss->tbl[sw_index] = sw_index % rss->channels; 1602 1603 adv_conf_rss = &dev_conf->rx_adv_conf.rss_conf; 1604 rc = sfc_rx_process_adv_conf_rss(sa, adv_conf_rss); 1605 if (rc != 0) 1606 goto fail_rx_process_adv_conf_rss; 1607 } 1608 1609 return 0; 1610 1611 fail_rx_process_adv_conf_rss: 1612 fail_rx_qinit_info: 1613 fail_rxqs_ctrl_realloc: 1614 fail_rxqs_realloc: 1615 fail_rxqs_ctrl_alloc: 1616 fail_rxqs_alloc: 1617 sfc_rx_close(sa); 1618 1619 fail_check_mode: 1620 sfc_log_init(sa, "failed %d", rc); 1621 return rc; 1622 } 1623 1624 /** 1625 * Shutdown Rx subsystem. 1626 * 1627 * Called at device close stage, for example, before device shutdown. 1628 */ 1629 void 1630 sfc_rx_close(struct sfc_adapter *sa) 1631 { 1632 struct sfc_rss *rss = &sfc_sa2shared(sa)->rss; 1633 1634 sfc_rx_fini_queues(sa, 0); 1635 1636 rss->channels = 0; 1637 1638 free(sa->rxq_ctrl); 1639 sa->rxq_ctrl = NULL; 1640 1641 rte_free(sfc_sa2shared(sa)->rxq_info); 1642 sfc_sa2shared(sa)->rxq_info = NULL; 1643 } 1644