1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 #include <rte_mempool.h> 11 12 #include "efx.h" 13 14 #include "sfc.h" 15 #include "sfc_debug.h" 16 #include "sfc_log.h" 17 #include "sfc_ev.h" 18 #include "sfc_rx.h" 19 #include "sfc_kvargs.h" 20 #include "sfc_tweak.h" 21 22 /* 23 * Maximum number of Rx queue flush attempt in the case of failure or 24 * flush timeout 25 */ 26 #define SFC_RX_QFLUSH_ATTEMPTS (3) 27 28 /* 29 * Time to wait between event queue polling attempts when waiting for Rx 30 * queue flush done or failed events. 31 */ 32 #define SFC_RX_QFLUSH_POLL_WAIT_MS (1) 33 34 /* 35 * Maximum number of event queue polling attempts when waiting for Rx queue 36 * flush done or failed events. It defines Rx queue flush attempt timeout 37 * together with SFC_RX_QFLUSH_POLL_WAIT_MS. 38 */ 39 #define SFC_RX_QFLUSH_POLL_ATTEMPTS (2000) 40 41 void 42 sfc_rx_qflush_done(struct sfc_rxq *rxq) 43 { 44 rxq->state |= SFC_RXQ_FLUSHED; 45 rxq->state &= ~SFC_RXQ_FLUSHING; 46 } 47 48 void 49 sfc_rx_qflush_failed(struct sfc_rxq *rxq) 50 { 51 rxq->state |= SFC_RXQ_FLUSH_FAILED; 52 rxq->state &= ~SFC_RXQ_FLUSHING; 53 } 54 55 static void 56 sfc_efx_rx_qrefill(struct sfc_efx_rxq *rxq) 57 { 58 unsigned int free_space; 59 unsigned int bulks; 60 void *objs[SFC_RX_REFILL_BULK]; 61 efsys_dma_addr_t addr[RTE_DIM(objs)]; 62 unsigned int added = rxq->added; 63 unsigned int id; 64 unsigned int i; 65 struct sfc_efx_rx_sw_desc *rxd; 66 struct rte_mbuf *m; 67 uint16_t port_id = rxq->dp.dpq.port_id; 68 69 free_space = rxq->max_fill_level - (added - rxq->completed); 70 71 if (free_space < rxq->refill_threshold) 72 return; 73 74 bulks = free_space / RTE_DIM(objs); 75 /* refill_threshold guarantees that bulks is positive */ 76 SFC_ASSERT(bulks > 0); 77 78 id = added & rxq->ptr_mask; 79 do { 80 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs, 81 RTE_DIM(objs)) < 0)) { 82 /* 83 * It is hardly a safe way to increment counter 84 * from different contexts, but all PMDs do it. 85 */ 86 rxq->evq->sa->eth_dev->data->rx_mbuf_alloc_failed += 87 RTE_DIM(objs); 88 /* Return if we have posted nothing yet */ 89 if (added == rxq->added) 90 return; 91 /* Push posted */ 92 break; 93 } 94 95 for (i = 0; i < RTE_DIM(objs); 96 ++i, id = (id + 1) & rxq->ptr_mask) { 97 m = objs[i]; 98 99 rxd = &rxq->sw_desc[id]; 100 rxd->mbuf = m; 101 102 SFC_ASSERT(rte_mbuf_refcnt_read(m) == 1); 103 m->data_off = RTE_PKTMBUF_HEADROOM; 104 SFC_ASSERT(m->next == NULL); 105 SFC_ASSERT(m->nb_segs == 1); 106 m->port = port_id; 107 108 addr[i] = rte_pktmbuf_iova(m); 109 } 110 111 efx_rx_qpost(rxq->common, addr, rxq->buf_size, 112 RTE_DIM(objs), rxq->completed, added); 113 added += RTE_DIM(objs); 114 } while (--bulks > 0); 115 116 SFC_ASSERT(added != rxq->added); 117 rxq->added = added; 118 efx_rx_qpush(rxq->common, added, &rxq->pushed); 119 } 120 121 static uint64_t 122 sfc_efx_rx_desc_flags_to_offload_flags(const unsigned int desc_flags) 123 { 124 uint64_t mbuf_flags = 0; 125 126 switch (desc_flags & (EFX_PKT_IPV4 | EFX_CKSUM_IPV4)) { 127 case (EFX_PKT_IPV4 | EFX_CKSUM_IPV4): 128 mbuf_flags |= PKT_RX_IP_CKSUM_GOOD; 129 break; 130 case EFX_PKT_IPV4: 131 mbuf_flags |= PKT_RX_IP_CKSUM_BAD; 132 break; 133 default: 134 RTE_BUILD_BUG_ON(PKT_RX_IP_CKSUM_UNKNOWN != 0); 135 SFC_ASSERT((mbuf_flags & PKT_RX_IP_CKSUM_MASK) == 136 PKT_RX_IP_CKSUM_UNKNOWN); 137 break; 138 } 139 140 switch ((desc_flags & 141 (EFX_PKT_TCP | EFX_PKT_UDP | EFX_CKSUM_TCPUDP))) { 142 case (EFX_PKT_TCP | EFX_CKSUM_TCPUDP): 143 case (EFX_PKT_UDP | EFX_CKSUM_TCPUDP): 144 mbuf_flags |= PKT_RX_L4_CKSUM_GOOD; 145 break; 146 case EFX_PKT_TCP: 147 case EFX_PKT_UDP: 148 mbuf_flags |= PKT_RX_L4_CKSUM_BAD; 149 break; 150 default: 151 RTE_BUILD_BUG_ON(PKT_RX_L4_CKSUM_UNKNOWN != 0); 152 SFC_ASSERT((mbuf_flags & PKT_RX_L4_CKSUM_MASK) == 153 PKT_RX_L4_CKSUM_UNKNOWN); 154 break; 155 } 156 157 return mbuf_flags; 158 } 159 160 static uint32_t 161 sfc_efx_rx_desc_flags_to_packet_type(const unsigned int desc_flags) 162 { 163 return RTE_PTYPE_L2_ETHER | 164 ((desc_flags & EFX_PKT_IPV4) ? 165 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 0) | 166 ((desc_flags & EFX_PKT_IPV6) ? 167 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 0) | 168 ((desc_flags & EFX_PKT_TCP) ? RTE_PTYPE_L4_TCP : 0) | 169 ((desc_flags & EFX_PKT_UDP) ? RTE_PTYPE_L4_UDP : 0); 170 } 171 172 static const uint32_t * 173 sfc_efx_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps) 174 { 175 static const uint32_t ptypes[] = { 176 RTE_PTYPE_L2_ETHER, 177 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 178 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 179 RTE_PTYPE_L4_TCP, 180 RTE_PTYPE_L4_UDP, 181 RTE_PTYPE_UNKNOWN 182 }; 183 184 return ptypes; 185 } 186 187 #if EFSYS_OPT_RX_SCALE 188 static void 189 sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags, 190 struct rte_mbuf *m) 191 { 192 uint8_t *mbuf_data; 193 194 195 if ((rxq->flags & SFC_EFX_RXQ_FLAG_RSS_HASH) == 0) 196 return; 197 198 mbuf_data = rte_pktmbuf_mtod(m, uint8_t *); 199 200 if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) { 201 m->hash.rss = efx_pseudo_hdr_hash_get(rxq->common, 202 EFX_RX_HASHALG_TOEPLITZ, 203 mbuf_data); 204 205 m->ol_flags |= PKT_RX_RSS_HASH; 206 } 207 } 208 #else 209 static void 210 sfc_efx_rx_set_rss_hash(__rte_unused struct sfc_efx_rxq *rxq, 211 __rte_unused unsigned int flags, 212 __rte_unused struct rte_mbuf *m) 213 { 214 } 215 #endif 216 217 static uint16_t 218 sfc_efx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 219 { 220 struct sfc_dp_rxq *dp_rxq = rx_queue; 221 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 222 unsigned int completed; 223 unsigned int prefix_size = rxq->prefix_size; 224 unsigned int done_pkts = 0; 225 boolean_t discard_next = B_FALSE; 226 struct rte_mbuf *scatter_pkt = NULL; 227 228 if (unlikely((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0)) 229 return 0; 230 231 sfc_ev_qpoll(rxq->evq); 232 233 completed = rxq->completed; 234 while (completed != rxq->pending && done_pkts < nb_pkts) { 235 unsigned int id; 236 struct sfc_efx_rx_sw_desc *rxd; 237 struct rte_mbuf *m; 238 unsigned int seg_len; 239 unsigned int desc_flags; 240 241 id = completed++ & rxq->ptr_mask; 242 rxd = &rxq->sw_desc[id]; 243 m = rxd->mbuf; 244 desc_flags = rxd->flags; 245 246 if (discard_next) 247 goto discard; 248 249 if (desc_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD)) 250 goto discard; 251 252 if (desc_flags & EFX_PKT_PREFIX_LEN) { 253 uint16_t tmp_size; 254 int rc __rte_unused; 255 256 rc = efx_pseudo_hdr_pkt_length_get(rxq->common, 257 rte_pktmbuf_mtod(m, uint8_t *), &tmp_size); 258 SFC_ASSERT(rc == 0); 259 seg_len = tmp_size; 260 } else { 261 seg_len = rxd->size - prefix_size; 262 } 263 264 rte_pktmbuf_data_len(m) = seg_len; 265 rte_pktmbuf_pkt_len(m) = seg_len; 266 267 if (scatter_pkt != NULL) { 268 if (rte_pktmbuf_chain(scatter_pkt, m) != 0) { 269 rte_pktmbuf_free(scatter_pkt); 270 goto discard; 271 } 272 /* The packet to deliver */ 273 m = scatter_pkt; 274 } 275 276 if (desc_flags & EFX_PKT_CONT) { 277 /* The packet is scattered, more fragments to come */ 278 scatter_pkt = m; 279 /* Further fragments have no prefix */ 280 prefix_size = 0; 281 continue; 282 } 283 284 /* Scattered packet is done */ 285 scatter_pkt = NULL; 286 /* The first fragment of the packet has prefix */ 287 prefix_size = rxq->prefix_size; 288 289 m->ol_flags = 290 sfc_efx_rx_desc_flags_to_offload_flags(desc_flags); 291 m->packet_type = 292 sfc_efx_rx_desc_flags_to_packet_type(desc_flags); 293 294 /* 295 * Extract RSS hash from the packet prefix and 296 * set the corresponding field (if needed and possible) 297 */ 298 sfc_efx_rx_set_rss_hash(rxq, desc_flags, m); 299 300 m->data_off += prefix_size; 301 302 *rx_pkts++ = m; 303 done_pkts++; 304 continue; 305 306 discard: 307 discard_next = ((desc_flags & EFX_PKT_CONT) != 0); 308 rte_mempool_put(rxq->refill_mb_pool, m); 309 rxd->mbuf = NULL; 310 } 311 312 /* pending is only moved when entire packet is received */ 313 SFC_ASSERT(scatter_pkt == NULL); 314 315 rxq->completed = completed; 316 317 sfc_efx_rx_qrefill(rxq); 318 319 return done_pkts; 320 } 321 322 static sfc_dp_rx_qdesc_npending_t sfc_efx_rx_qdesc_npending; 323 static unsigned int 324 sfc_efx_rx_qdesc_npending(struct sfc_dp_rxq *dp_rxq) 325 { 326 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 327 328 if ((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0) 329 return 0; 330 331 sfc_ev_qpoll(rxq->evq); 332 333 return rxq->pending - rxq->completed; 334 } 335 336 static sfc_dp_rx_qdesc_status_t sfc_efx_rx_qdesc_status; 337 static int 338 sfc_efx_rx_qdesc_status(struct sfc_dp_rxq *dp_rxq, uint16_t offset) 339 { 340 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 341 342 if (unlikely(offset > rxq->ptr_mask)) 343 return -EINVAL; 344 345 /* 346 * Poll EvQ to derive up-to-date 'rxq->pending' figure; 347 * it is required for the queue to be running, but the 348 * check is omitted because API design assumes that it 349 * is the duty of the caller to satisfy all conditions 350 */ 351 SFC_ASSERT((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 352 SFC_EFX_RXQ_FLAG_RUNNING); 353 sfc_ev_qpoll(rxq->evq); 354 355 /* 356 * There is a handful of reserved entries in the ring, 357 * but an explicit check whether the offset points to 358 * a reserved entry is neglected since the two checks 359 * below rely on the figures which take the HW limits 360 * into account and thus if an entry is reserved, the 361 * checks will fail and UNAVAIL code will be returned 362 */ 363 364 if (offset < (rxq->pending - rxq->completed)) 365 return RTE_ETH_RX_DESC_DONE; 366 367 if (offset < (rxq->added - rxq->completed)) 368 return RTE_ETH_RX_DESC_AVAIL; 369 370 return RTE_ETH_RX_DESC_UNAVAIL; 371 } 372 373 struct sfc_rxq * 374 sfc_rxq_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq) 375 { 376 const struct sfc_dp_queue *dpq = &dp_rxq->dpq; 377 struct rte_eth_dev *eth_dev; 378 struct sfc_adapter *sa; 379 struct sfc_rxq *rxq; 380 381 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id)); 382 eth_dev = &rte_eth_devices[dpq->port_id]; 383 384 sa = eth_dev->data->dev_private; 385 386 SFC_ASSERT(dpq->queue_id < sa->rxq_count); 387 rxq = sa->rxq_info[dpq->queue_id].rxq; 388 389 SFC_ASSERT(rxq != NULL); 390 return rxq; 391 } 392 393 static sfc_dp_rx_qsize_up_rings_t sfc_efx_rx_qsize_up_rings; 394 static int 395 sfc_efx_rx_qsize_up_rings(uint16_t nb_rx_desc, 396 unsigned int *rxq_entries, 397 unsigned int *evq_entries, 398 unsigned int *rxq_max_fill_level) 399 { 400 *rxq_entries = nb_rx_desc; 401 *evq_entries = nb_rx_desc; 402 *rxq_max_fill_level = EFX_RXQ_LIMIT(*rxq_entries); 403 return 0; 404 } 405 406 static sfc_dp_rx_qcreate_t sfc_efx_rx_qcreate; 407 static int 408 sfc_efx_rx_qcreate(uint16_t port_id, uint16_t queue_id, 409 const struct rte_pci_addr *pci_addr, int socket_id, 410 const struct sfc_dp_rx_qcreate_info *info, 411 struct sfc_dp_rxq **dp_rxqp) 412 { 413 struct sfc_efx_rxq *rxq; 414 int rc; 415 416 rc = ENOMEM; 417 rxq = rte_zmalloc_socket("sfc-efx-rxq", sizeof(*rxq), 418 RTE_CACHE_LINE_SIZE, socket_id); 419 if (rxq == NULL) 420 goto fail_rxq_alloc; 421 422 sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr); 423 424 rc = ENOMEM; 425 rxq->sw_desc = rte_calloc_socket("sfc-efx-rxq-sw_desc", 426 info->rxq_entries, 427 sizeof(*rxq->sw_desc), 428 RTE_CACHE_LINE_SIZE, socket_id); 429 if (rxq->sw_desc == NULL) 430 goto fail_desc_alloc; 431 432 /* efx datapath is bound to efx control path */ 433 rxq->evq = sfc_rxq_by_dp_rxq(&rxq->dp)->evq; 434 if (info->flags & SFC_RXQ_FLAG_RSS_HASH) 435 rxq->flags |= SFC_EFX_RXQ_FLAG_RSS_HASH; 436 rxq->ptr_mask = info->rxq_entries - 1; 437 rxq->batch_max = info->batch_max; 438 rxq->prefix_size = info->prefix_size; 439 rxq->max_fill_level = info->max_fill_level; 440 rxq->refill_threshold = info->refill_threshold; 441 rxq->buf_size = info->buf_size; 442 rxq->refill_mb_pool = info->refill_mb_pool; 443 444 *dp_rxqp = &rxq->dp; 445 return 0; 446 447 fail_desc_alloc: 448 rte_free(rxq); 449 450 fail_rxq_alloc: 451 return rc; 452 } 453 454 static sfc_dp_rx_qdestroy_t sfc_efx_rx_qdestroy; 455 static void 456 sfc_efx_rx_qdestroy(struct sfc_dp_rxq *dp_rxq) 457 { 458 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 459 460 rte_free(rxq->sw_desc); 461 rte_free(rxq); 462 } 463 464 static sfc_dp_rx_qstart_t sfc_efx_rx_qstart; 465 static int 466 sfc_efx_rx_qstart(struct sfc_dp_rxq *dp_rxq, 467 __rte_unused unsigned int evq_read_ptr) 468 { 469 /* libefx-based datapath is specific to libefx-based PMD */ 470 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 471 struct sfc_rxq *crxq = sfc_rxq_by_dp_rxq(dp_rxq); 472 473 rxq->common = crxq->common; 474 475 rxq->pending = rxq->completed = rxq->added = rxq->pushed = 0; 476 477 sfc_efx_rx_qrefill(rxq); 478 479 rxq->flags |= (SFC_EFX_RXQ_FLAG_STARTED | SFC_EFX_RXQ_FLAG_RUNNING); 480 481 return 0; 482 } 483 484 static sfc_dp_rx_qstop_t sfc_efx_rx_qstop; 485 static void 486 sfc_efx_rx_qstop(struct sfc_dp_rxq *dp_rxq, 487 __rte_unused unsigned int *evq_read_ptr) 488 { 489 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 490 491 rxq->flags &= ~SFC_EFX_RXQ_FLAG_RUNNING; 492 493 /* libefx-based datapath is bound to libefx-based PMD and uses 494 * event queue structure directly. So, there is no necessity to 495 * return EvQ read pointer. 496 */ 497 } 498 499 static sfc_dp_rx_qpurge_t sfc_efx_rx_qpurge; 500 static void 501 sfc_efx_rx_qpurge(struct sfc_dp_rxq *dp_rxq) 502 { 503 struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); 504 unsigned int i; 505 struct sfc_efx_rx_sw_desc *rxd; 506 507 for (i = rxq->completed; i != rxq->added; ++i) { 508 rxd = &rxq->sw_desc[i & rxq->ptr_mask]; 509 rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf); 510 rxd->mbuf = NULL; 511 /* Packed stream relies on 0 in inactive SW desc. 512 * Rx queue stop is not performance critical, so 513 * there is no harm to do it always. 514 */ 515 rxd->flags = 0; 516 rxd->size = 0; 517 } 518 519 rxq->flags &= ~SFC_EFX_RXQ_FLAG_STARTED; 520 } 521 522 struct sfc_dp_rx sfc_efx_rx = { 523 .dp = { 524 .name = SFC_KVARG_DATAPATH_EFX, 525 .type = SFC_DP_RX, 526 .hw_fw_caps = 0, 527 }, 528 .features = SFC_DP_RX_FEAT_SCATTER, 529 .qsize_up_rings = sfc_efx_rx_qsize_up_rings, 530 .qcreate = sfc_efx_rx_qcreate, 531 .qdestroy = sfc_efx_rx_qdestroy, 532 .qstart = sfc_efx_rx_qstart, 533 .qstop = sfc_efx_rx_qstop, 534 .qpurge = sfc_efx_rx_qpurge, 535 .supported_ptypes_get = sfc_efx_supported_ptypes_get, 536 .qdesc_npending = sfc_efx_rx_qdesc_npending, 537 .qdesc_status = sfc_efx_rx_qdesc_status, 538 .pkt_burst = sfc_efx_recv_pkts, 539 }; 540 541 unsigned int 542 sfc_rx_qdesc_npending(struct sfc_adapter *sa, unsigned int sw_index) 543 { 544 struct sfc_rxq *rxq; 545 546 SFC_ASSERT(sw_index < sa->rxq_count); 547 rxq = sa->rxq_info[sw_index].rxq; 548 549 if (rxq == NULL || (rxq->state & SFC_RXQ_STARTED) == 0) 550 return 0; 551 552 return sa->dp_rx->qdesc_npending(rxq->dp); 553 } 554 555 int 556 sfc_rx_qdesc_done(struct sfc_dp_rxq *dp_rxq, unsigned int offset) 557 { 558 struct sfc_rxq *rxq = sfc_rxq_by_dp_rxq(dp_rxq); 559 560 return offset < rxq->evq->sa->dp_rx->qdesc_npending(dp_rxq); 561 } 562 563 static void 564 sfc_rx_qflush(struct sfc_adapter *sa, unsigned int sw_index) 565 { 566 struct sfc_rxq *rxq; 567 unsigned int retry_count; 568 unsigned int wait_count; 569 int rc; 570 571 rxq = sa->rxq_info[sw_index].rxq; 572 SFC_ASSERT(rxq->state & SFC_RXQ_STARTED); 573 574 /* 575 * Retry Rx queue flushing in the case of flush failed or 576 * timeout. In the worst case it can delay for 6 seconds. 577 */ 578 for (retry_count = 0; 579 ((rxq->state & SFC_RXQ_FLUSHED) == 0) && 580 (retry_count < SFC_RX_QFLUSH_ATTEMPTS); 581 ++retry_count) { 582 rc = efx_rx_qflush(rxq->common); 583 if (rc != 0) { 584 rxq->state |= (rc == EALREADY) ? 585 SFC_RXQ_FLUSHED : SFC_RXQ_FLUSH_FAILED; 586 break; 587 } 588 rxq->state &= ~SFC_RXQ_FLUSH_FAILED; 589 rxq->state |= SFC_RXQ_FLUSHING; 590 591 /* 592 * Wait for Rx queue flush done or failed event at least 593 * SFC_RX_QFLUSH_POLL_WAIT_MS milliseconds and not more 594 * than 2 seconds (SFC_RX_QFLUSH_POLL_WAIT_MS multiplied 595 * by SFC_RX_QFLUSH_POLL_ATTEMPTS). 596 */ 597 wait_count = 0; 598 do { 599 rte_delay_ms(SFC_RX_QFLUSH_POLL_WAIT_MS); 600 sfc_ev_qpoll(rxq->evq); 601 } while ((rxq->state & SFC_RXQ_FLUSHING) && 602 (wait_count++ < SFC_RX_QFLUSH_POLL_ATTEMPTS)); 603 604 if (rxq->state & SFC_RXQ_FLUSHING) 605 sfc_err(sa, "RxQ %u flush timed out", sw_index); 606 607 if (rxq->state & SFC_RXQ_FLUSH_FAILED) 608 sfc_err(sa, "RxQ %u flush failed", sw_index); 609 610 if (rxq->state & SFC_RXQ_FLUSHED) 611 sfc_info(sa, "RxQ %u flushed", sw_index); 612 } 613 614 sa->dp_rx->qpurge(rxq->dp); 615 } 616 617 static int 618 sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq) 619 { 620 boolean_t rss = (sa->rss_channels > 0) ? B_TRUE : B_FALSE; 621 struct sfc_port *port = &sa->port; 622 int rc; 623 624 /* 625 * If promiscuous or all-multicast mode has been requested, setting 626 * filter for the default Rx queue might fail, in particular, while 627 * running over PCI function which is not a member of corresponding 628 * privilege groups; if this occurs, few iterations will be made to 629 * repeat this step without promiscuous and all-multicast flags set 630 */ 631 retry: 632 rc = efx_mac_filter_default_rxq_set(sa->nic, rxq->common, rss); 633 if (rc == 0) 634 return 0; 635 else if (rc != EOPNOTSUPP) 636 return rc; 637 638 if (port->promisc) { 639 sfc_warn(sa, "promiscuous mode has been requested, " 640 "but the HW rejects it"); 641 sfc_warn(sa, "promiscuous mode will be disabled"); 642 643 port->promisc = B_FALSE; 644 rc = sfc_set_rx_mode(sa); 645 if (rc != 0) 646 return rc; 647 648 goto retry; 649 } 650 651 if (port->allmulti) { 652 sfc_warn(sa, "all-multicast mode has been requested, " 653 "but the HW rejects it"); 654 sfc_warn(sa, "all-multicast mode will be disabled"); 655 656 port->allmulti = B_FALSE; 657 rc = sfc_set_rx_mode(sa); 658 if (rc != 0) 659 return rc; 660 661 goto retry; 662 } 663 664 return rc; 665 } 666 667 int 668 sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index) 669 { 670 struct sfc_port *port = &sa->port; 671 struct sfc_rxq_info *rxq_info; 672 struct sfc_rxq *rxq; 673 struct sfc_evq *evq; 674 int rc; 675 676 sfc_log_init(sa, "sw_index=%u", sw_index); 677 678 SFC_ASSERT(sw_index < sa->rxq_count); 679 680 rxq_info = &sa->rxq_info[sw_index]; 681 rxq = rxq_info->rxq; 682 SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED); 683 684 evq = rxq->evq; 685 686 rc = sfc_ev_qstart(evq, sfc_evq_index_by_rxq_sw_index(sa, sw_index)); 687 if (rc != 0) 688 goto fail_ev_qstart; 689 690 rc = efx_rx_qcreate(sa->nic, rxq->hw_index, 0, rxq_info->type, 691 &rxq->mem, rxq_info->entries, 692 0 /* not used on EF10 */, rxq_info->type_flags, 693 evq->common, &rxq->common); 694 if (rc != 0) 695 goto fail_rx_qcreate; 696 697 efx_rx_qenable(rxq->common); 698 699 rc = sa->dp_rx->qstart(rxq->dp, evq->read_ptr); 700 if (rc != 0) 701 goto fail_dp_qstart; 702 703 rxq->state |= SFC_RXQ_STARTED; 704 705 if ((sw_index == 0) && !port->isolated) { 706 rc = sfc_rx_default_rxq_set_filter(sa, rxq); 707 if (rc != 0) 708 goto fail_mac_filter_default_rxq_set; 709 } 710 711 /* It seems to be used by DPDK for debug purposes only ('rte_ether') */ 712 sa->eth_dev->data->rx_queue_state[sw_index] = 713 RTE_ETH_QUEUE_STATE_STARTED; 714 715 return 0; 716 717 fail_mac_filter_default_rxq_set: 718 sa->dp_rx->qstop(rxq->dp, &rxq->evq->read_ptr); 719 720 fail_dp_qstart: 721 sfc_rx_qflush(sa, sw_index); 722 723 fail_rx_qcreate: 724 sfc_ev_qstop(evq); 725 726 fail_ev_qstart: 727 return rc; 728 } 729 730 void 731 sfc_rx_qstop(struct sfc_adapter *sa, unsigned int sw_index) 732 { 733 struct sfc_rxq_info *rxq_info; 734 struct sfc_rxq *rxq; 735 736 sfc_log_init(sa, "sw_index=%u", sw_index); 737 738 SFC_ASSERT(sw_index < sa->rxq_count); 739 740 rxq_info = &sa->rxq_info[sw_index]; 741 rxq = rxq_info->rxq; 742 743 if (rxq->state == SFC_RXQ_INITIALIZED) 744 return; 745 SFC_ASSERT(rxq->state & SFC_RXQ_STARTED); 746 747 /* It seems to be used by DPDK for debug purposes only ('rte_ether') */ 748 sa->eth_dev->data->rx_queue_state[sw_index] = 749 RTE_ETH_QUEUE_STATE_STOPPED; 750 751 sa->dp_rx->qstop(rxq->dp, &rxq->evq->read_ptr); 752 753 if (sw_index == 0) 754 efx_mac_filter_default_rxq_clear(sa->nic); 755 756 sfc_rx_qflush(sa, sw_index); 757 758 rxq->state = SFC_RXQ_INITIALIZED; 759 760 efx_rx_qdestroy(rxq->common); 761 762 sfc_ev_qstop(rxq->evq); 763 } 764 765 uint64_t 766 sfc_rx_get_dev_offload_caps(struct sfc_adapter *sa) 767 { 768 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 769 uint64_t caps = 0; 770 771 caps |= DEV_RX_OFFLOAD_JUMBO_FRAME; 772 caps |= DEV_RX_OFFLOAD_CRC_STRIP; 773 caps |= DEV_RX_OFFLOAD_IPV4_CKSUM; 774 caps |= DEV_RX_OFFLOAD_UDP_CKSUM; 775 caps |= DEV_RX_OFFLOAD_TCP_CKSUM; 776 777 if (encp->enc_tunnel_encapsulations_supported && 778 (sa->dp_rx->features & SFC_DP_RX_FEAT_TUNNELS)) 779 caps |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM; 780 781 return caps; 782 } 783 784 uint64_t 785 sfc_rx_get_queue_offload_caps(struct sfc_adapter *sa) 786 { 787 uint64_t caps = 0; 788 789 if (sa->dp_rx->features & SFC_DP_RX_FEAT_SCATTER) 790 caps |= DEV_RX_OFFLOAD_SCATTER; 791 792 return caps; 793 } 794 795 static void 796 sfc_rx_log_offloads(struct sfc_adapter *sa, const char *offload_group, 797 const char *verdict, uint64_t offloads) 798 { 799 unsigned long long bit; 800 801 while ((bit = __builtin_ffsll(offloads)) != 0) { 802 uint64_t flag = (1ULL << --bit); 803 804 sfc_err(sa, "Rx %s offload %s %s", offload_group, 805 rte_eth_dev_rx_offload_name(flag), verdict); 806 807 offloads &= ~flag; 808 } 809 } 810 811 static boolean_t 812 sfc_rx_queue_offloads_mismatch(struct sfc_adapter *sa, uint64_t requested) 813 { 814 uint64_t mandatory = sa->eth_dev->data->dev_conf.rxmode.offloads; 815 uint64_t supported = sfc_rx_get_dev_offload_caps(sa) | 816 sfc_rx_get_queue_offload_caps(sa); 817 uint64_t rejected = requested & ~supported; 818 uint64_t missing = (requested & mandatory) ^ mandatory; 819 boolean_t mismatch = B_FALSE; 820 821 if (rejected) { 822 sfc_rx_log_offloads(sa, "queue", "is unsupported", rejected); 823 mismatch = B_TRUE; 824 } 825 826 if (missing) { 827 sfc_rx_log_offloads(sa, "queue", "must be set", missing); 828 mismatch = B_TRUE; 829 } 830 831 return mismatch; 832 } 833 834 static int 835 sfc_rx_qcheck_conf(struct sfc_adapter *sa, unsigned int rxq_max_fill_level, 836 const struct rte_eth_rxconf *rx_conf) 837 { 838 uint64_t offloads_supported = sfc_rx_get_dev_offload_caps(sa) | 839 sfc_rx_get_queue_offload_caps(sa); 840 int rc = 0; 841 842 if (rx_conf->rx_thresh.pthresh != 0 || 843 rx_conf->rx_thresh.hthresh != 0 || 844 rx_conf->rx_thresh.wthresh != 0) { 845 sfc_warn(sa, 846 "RxQ prefetch/host/writeback thresholds are not supported"); 847 } 848 849 if (rx_conf->rx_free_thresh > rxq_max_fill_level) { 850 sfc_err(sa, 851 "RxQ free threshold too large: %u vs maximum %u", 852 rx_conf->rx_free_thresh, rxq_max_fill_level); 853 rc = EINVAL; 854 } 855 856 if (rx_conf->rx_drop_en == 0) { 857 sfc_err(sa, "RxQ drop disable is not supported"); 858 rc = EINVAL; 859 } 860 861 if ((rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM) != 862 DEV_RX_OFFLOAD_CHECKSUM) 863 sfc_warn(sa, "Rx checksum offloads cannot be disabled - always on (IPv4/TCP/UDP)"); 864 865 if ((offloads_supported & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM) && 866 (~rx_conf->offloads & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)) 867 sfc_warn(sa, "Rx outer IPv4 checksum offload cannot be disabled - always on"); 868 869 if (sfc_rx_queue_offloads_mismatch(sa, rx_conf->offloads)) 870 rc = EINVAL; 871 872 return rc; 873 } 874 875 static unsigned int 876 sfc_rx_mbuf_data_alignment(struct rte_mempool *mb_pool) 877 { 878 uint32_t data_off; 879 uint32_t order; 880 881 /* The mbuf object itself is always cache line aligned */ 882 order = rte_bsf32(RTE_CACHE_LINE_SIZE); 883 884 /* Data offset from mbuf object start */ 885 data_off = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mb_pool) + 886 RTE_PKTMBUF_HEADROOM; 887 888 order = MIN(order, rte_bsf32(data_off)); 889 890 return 1u << (order - 1); 891 } 892 893 static uint16_t 894 sfc_rx_mb_pool_buf_size(struct sfc_adapter *sa, struct rte_mempool *mb_pool) 895 { 896 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 897 const uint32_t nic_align_start = MAX(1, encp->enc_rx_buf_align_start); 898 const uint32_t nic_align_end = MAX(1, encp->enc_rx_buf_align_end); 899 uint16_t buf_size; 900 unsigned int buf_aligned; 901 unsigned int start_alignment; 902 unsigned int end_padding_alignment; 903 904 /* Below it is assumed that both alignments are power of 2 */ 905 SFC_ASSERT(rte_is_power_of_2(nic_align_start)); 906 SFC_ASSERT(rte_is_power_of_2(nic_align_end)); 907 908 /* 909 * mbuf is always cache line aligned, double-check 910 * that it meets rx buffer start alignment requirements. 911 */ 912 913 /* Start from mbuf pool data room size */ 914 buf_size = rte_pktmbuf_data_room_size(mb_pool); 915 916 /* Remove headroom */ 917 if (buf_size <= RTE_PKTMBUF_HEADROOM) { 918 sfc_err(sa, 919 "RxQ mbuf pool %s object data room size %u is smaller than headroom %u", 920 mb_pool->name, buf_size, RTE_PKTMBUF_HEADROOM); 921 return 0; 922 } 923 buf_size -= RTE_PKTMBUF_HEADROOM; 924 925 /* Calculate guaranteed data start alignment */ 926 buf_aligned = sfc_rx_mbuf_data_alignment(mb_pool); 927 928 /* Reserve space for start alignment */ 929 if (buf_aligned < nic_align_start) { 930 start_alignment = nic_align_start - buf_aligned; 931 if (buf_size <= start_alignment) { 932 sfc_err(sa, 933 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u and buffer start alignment %u required by NIC", 934 mb_pool->name, 935 rte_pktmbuf_data_room_size(mb_pool), 936 RTE_PKTMBUF_HEADROOM, start_alignment); 937 return 0; 938 } 939 buf_aligned = nic_align_start; 940 buf_size -= start_alignment; 941 } else { 942 start_alignment = 0; 943 } 944 945 /* Make sure that end padding does not write beyond the buffer */ 946 if (buf_aligned < nic_align_end) { 947 /* 948 * Estimate space which can be lost. If guarnteed buffer 949 * size is odd, lost space is (nic_align_end - 1). More 950 * accurate formula is below. 951 */ 952 end_padding_alignment = nic_align_end - 953 MIN(buf_aligned, 1u << (rte_bsf32(buf_size) - 1)); 954 if (buf_size <= end_padding_alignment) { 955 sfc_err(sa, 956 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u, buffer start alignment %u and end padding alignment %u required by NIC", 957 mb_pool->name, 958 rte_pktmbuf_data_room_size(mb_pool), 959 RTE_PKTMBUF_HEADROOM, start_alignment, 960 end_padding_alignment); 961 return 0; 962 } 963 buf_size -= end_padding_alignment; 964 } else { 965 /* 966 * Start is aligned the same or better than end, 967 * just align length. 968 */ 969 buf_size = P2ALIGN(buf_size, nic_align_end); 970 } 971 972 return buf_size; 973 } 974 975 int 976 sfc_rx_qinit(struct sfc_adapter *sa, unsigned int sw_index, 977 uint16_t nb_rx_desc, unsigned int socket_id, 978 const struct rte_eth_rxconf *rx_conf, 979 struct rte_mempool *mb_pool) 980 { 981 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 982 int rc; 983 unsigned int rxq_entries; 984 unsigned int evq_entries; 985 unsigned int rxq_max_fill_level; 986 uint16_t buf_size; 987 struct sfc_rxq_info *rxq_info; 988 struct sfc_evq *evq; 989 struct sfc_rxq *rxq; 990 struct sfc_dp_rx_qcreate_info info; 991 992 rc = sa->dp_rx->qsize_up_rings(nb_rx_desc, &rxq_entries, &evq_entries, 993 &rxq_max_fill_level); 994 if (rc != 0) 995 goto fail_size_up_rings; 996 SFC_ASSERT(rxq_entries >= EFX_RXQ_MINNDESCS); 997 SFC_ASSERT(rxq_entries <= EFX_RXQ_MAXNDESCS); 998 SFC_ASSERT(rxq_entries >= nb_rx_desc); 999 SFC_ASSERT(rxq_max_fill_level <= nb_rx_desc); 1000 1001 rc = sfc_rx_qcheck_conf(sa, rxq_max_fill_level, rx_conf); 1002 if (rc != 0) 1003 goto fail_bad_conf; 1004 1005 buf_size = sfc_rx_mb_pool_buf_size(sa, mb_pool); 1006 if (buf_size == 0) { 1007 sfc_err(sa, "RxQ %u mbuf pool object size is too small", 1008 sw_index); 1009 rc = EINVAL; 1010 goto fail_bad_conf; 1011 } 1012 1013 if ((buf_size < sa->port.pdu + encp->enc_rx_prefix_size) && 1014 (~rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)) { 1015 sfc_err(sa, "Rx scatter is disabled and RxQ %u mbuf pool " 1016 "object size is too small", sw_index); 1017 sfc_err(sa, "RxQ %u calculated Rx buffer size is %u vs " 1018 "PDU size %u plus Rx prefix %u bytes", 1019 sw_index, buf_size, (unsigned int)sa->port.pdu, 1020 encp->enc_rx_prefix_size); 1021 rc = EINVAL; 1022 goto fail_bad_conf; 1023 } 1024 1025 SFC_ASSERT(sw_index < sa->rxq_count); 1026 rxq_info = &sa->rxq_info[sw_index]; 1027 1028 SFC_ASSERT(rxq_entries <= rxq_info->max_entries); 1029 rxq_info->entries = rxq_entries; 1030 rxq_info->type = EFX_RXQ_TYPE_DEFAULT; 1031 rxq_info->type_flags = 1032 (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER) ? 1033 EFX_RXQ_FLAG_SCATTER : EFX_RXQ_FLAG_NONE; 1034 1035 if ((encp->enc_tunnel_encapsulations_supported != 0) && 1036 (sa->dp_rx->features & SFC_DP_RX_FEAT_TUNNELS)) 1037 rxq_info->type_flags |= EFX_RXQ_FLAG_INNER_CLASSES; 1038 1039 rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_RX, sw_index, 1040 evq_entries, socket_id, &evq); 1041 if (rc != 0) 1042 goto fail_ev_qinit; 1043 1044 rc = ENOMEM; 1045 rxq = rte_zmalloc_socket("sfc-rxq", sizeof(*rxq), RTE_CACHE_LINE_SIZE, 1046 socket_id); 1047 if (rxq == NULL) 1048 goto fail_rxq_alloc; 1049 1050 rxq_info->rxq = rxq; 1051 1052 rxq->evq = evq; 1053 rxq->hw_index = sw_index; 1054 rxq->refill_threshold = 1055 RTE_MAX(rx_conf->rx_free_thresh, SFC_RX_REFILL_BULK); 1056 rxq->refill_mb_pool = mb_pool; 1057 1058 rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_RXQ_SIZE(rxq_info->entries), 1059 socket_id, &rxq->mem); 1060 if (rc != 0) 1061 goto fail_dma_alloc; 1062 1063 memset(&info, 0, sizeof(info)); 1064 info.refill_mb_pool = rxq->refill_mb_pool; 1065 info.max_fill_level = rxq_max_fill_level; 1066 info.refill_threshold = rxq->refill_threshold; 1067 info.buf_size = buf_size; 1068 info.batch_max = encp->enc_rx_batch_max; 1069 info.prefix_size = encp->enc_rx_prefix_size; 1070 1071 #if EFSYS_OPT_RX_SCALE 1072 if (sa->hash_support == EFX_RX_HASH_AVAILABLE && sa->rss_channels > 0) 1073 info.flags |= SFC_RXQ_FLAG_RSS_HASH; 1074 #endif 1075 1076 info.rxq_entries = rxq_info->entries; 1077 info.rxq_hw_ring = rxq->mem.esm_base; 1078 info.evq_entries = evq_entries; 1079 info.evq_hw_ring = evq->mem.esm_base; 1080 info.hw_index = rxq->hw_index; 1081 info.mem_bar = sa->mem_bar.esb_base; 1082 1083 rc = sa->dp_rx->qcreate(sa->eth_dev->data->port_id, sw_index, 1084 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, 1085 socket_id, &info, &rxq->dp); 1086 if (rc != 0) 1087 goto fail_dp_rx_qcreate; 1088 1089 evq->dp_rxq = rxq->dp; 1090 1091 rxq->state = SFC_RXQ_INITIALIZED; 1092 1093 rxq_info->deferred_start = (rx_conf->rx_deferred_start != 0); 1094 1095 return 0; 1096 1097 fail_dp_rx_qcreate: 1098 sfc_dma_free(sa, &rxq->mem); 1099 1100 fail_dma_alloc: 1101 rxq_info->rxq = NULL; 1102 rte_free(rxq); 1103 1104 fail_rxq_alloc: 1105 sfc_ev_qfini(evq); 1106 1107 fail_ev_qinit: 1108 rxq_info->entries = 0; 1109 1110 fail_bad_conf: 1111 fail_size_up_rings: 1112 sfc_log_init(sa, "failed %d", rc); 1113 return rc; 1114 } 1115 1116 void 1117 sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index) 1118 { 1119 struct sfc_rxq_info *rxq_info; 1120 struct sfc_rxq *rxq; 1121 1122 SFC_ASSERT(sw_index < sa->rxq_count); 1123 1124 rxq_info = &sa->rxq_info[sw_index]; 1125 1126 rxq = rxq_info->rxq; 1127 SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED); 1128 1129 sa->dp_rx->qdestroy(rxq->dp); 1130 rxq->dp = NULL; 1131 1132 rxq_info->rxq = NULL; 1133 rxq_info->entries = 0; 1134 1135 sfc_dma_free(sa, &rxq->mem); 1136 1137 sfc_ev_qfini(rxq->evq); 1138 rxq->evq = NULL; 1139 1140 rte_free(rxq); 1141 } 1142 1143 #if EFSYS_OPT_RX_SCALE 1144 efx_rx_hash_type_t 1145 sfc_rte_to_efx_hash_type(uint64_t rss_hf) 1146 { 1147 efx_rx_hash_type_t efx_hash_types = 0; 1148 1149 if ((rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 1150 ETH_RSS_NONFRAG_IPV4_OTHER)) != 0) 1151 efx_hash_types |= EFX_RX_HASH_IPV4; 1152 1153 if ((rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) != 0) 1154 efx_hash_types |= EFX_RX_HASH_TCPIPV4; 1155 1156 if ((rss_hf & (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 1157 ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX)) != 0) 1158 efx_hash_types |= EFX_RX_HASH_IPV6; 1159 1160 if ((rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX)) != 0) 1161 efx_hash_types |= EFX_RX_HASH_TCPIPV6; 1162 1163 return efx_hash_types; 1164 } 1165 1166 uint64_t 1167 sfc_efx_to_rte_hash_type(efx_rx_hash_type_t efx_hash_types) 1168 { 1169 uint64_t rss_hf = 0; 1170 1171 if ((efx_hash_types & EFX_RX_HASH_IPV4) != 0) 1172 rss_hf |= (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 1173 ETH_RSS_NONFRAG_IPV4_OTHER); 1174 1175 if ((efx_hash_types & EFX_RX_HASH_TCPIPV4) != 0) 1176 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP; 1177 1178 if ((efx_hash_types & EFX_RX_HASH_IPV6) != 0) 1179 rss_hf |= (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 1180 ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX); 1181 1182 if ((efx_hash_types & EFX_RX_HASH_TCPIPV6) != 0) 1183 rss_hf |= (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX); 1184 1185 return rss_hf; 1186 } 1187 #endif 1188 1189 #if EFSYS_OPT_RX_SCALE 1190 static int 1191 sfc_rx_rss_config(struct sfc_adapter *sa) 1192 { 1193 int rc = 0; 1194 1195 if (sa->rss_channels > 0) { 1196 rc = efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, 1197 EFX_RX_HASHALG_TOEPLITZ, 1198 sa->rss_hash_types, B_TRUE); 1199 if (rc != 0) 1200 goto finish; 1201 1202 rc = efx_rx_scale_key_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, 1203 sa->rss_key, 1204 sizeof(sa->rss_key)); 1205 if (rc != 0) 1206 goto finish; 1207 1208 rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, 1209 sa->rss_tbl, RTE_DIM(sa->rss_tbl)); 1210 } 1211 1212 finish: 1213 return rc; 1214 } 1215 #else 1216 static int 1217 sfc_rx_rss_config(__rte_unused struct sfc_adapter *sa) 1218 { 1219 return 0; 1220 } 1221 #endif 1222 1223 int 1224 sfc_rx_start(struct sfc_adapter *sa) 1225 { 1226 unsigned int sw_index; 1227 int rc; 1228 1229 sfc_log_init(sa, "rxq_count=%u", sa->rxq_count); 1230 1231 rc = efx_rx_init(sa->nic); 1232 if (rc != 0) 1233 goto fail_rx_init; 1234 1235 rc = sfc_rx_rss_config(sa); 1236 if (rc != 0) 1237 goto fail_rss_config; 1238 1239 for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) { 1240 if ((!sa->rxq_info[sw_index].deferred_start || 1241 sa->rxq_info[sw_index].deferred_started)) { 1242 rc = sfc_rx_qstart(sa, sw_index); 1243 if (rc != 0) 1244 goto fail_rx_qstart; 1245 } 1246 } 1247 1248 return 0; 1249 1250 fail_rx_qstart: 1251 while (sw_index-- > 0) 1252 sfc_rx_qstop(sa, sw_index); 1253 1254 fail_rss_config: 1255 efx_rx_fini(sa->nic); 1256 1257 fail_rx_init: 1258 sfc_log_init(sa, "failed %d", rc); 1259 return rc; 1260 } 1261 1262 void 1263 sfc_rx_stop(struct sfc_adapter *sa) 1264 { 1265 unsigned int sw_index; 1266 1267 sfc_log_init(sa, "rxq_count=%u", sa->rxq_count); 1268 1269 sw_index = sa->rxq_count; 1270 while (sw_index-- > 0) { 1271 if (sa->rxq_info[sw_index].rxq != NULL) 1272 sfc_rx_qstop(sa, sw_index); 1273 } 1274 1275 efx_rx_fini(sa->nic); 1276 } 1277 1278 static int 1279 sfc_rx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) 1280 { 1281 struct sfc_rxq_info *rxq_info = &sa->rxq_info[sw_index]; 1282 unsigned int max_entries; 1283 1284 max_entries = EFX_RXQ_MAXNDESCS; 1285 SFC_ASSERT(rte_is_power_of_2(max_entries)); 1286 1287 rxq_info->max_entries = max_entries; 1288 1289 return 0; 1290 } 1291 1292 static int 1293 sfc_rx_check_mode(struct sfc_adapter *sa, struct rte_eth_rxmode *rxmode) 1294 { 1295 uint64_t offloads_supported = sfc_rx_get_dev_offload_caps(sa) | 1296 sfc_rx_get_queue_offload_caps(sa); 1297 uint64_t offloads_rejected = rxmode->offloads & ~offloads_supported; 1298 int rc = 0; 1299 1300 switch (rxmode->mq_mode) { 1301 case ETH_MQ_RX_NONE: 1302 /* No special checks are required */ 1303 break; 1304 #if EFSYS_OPT_RX_SCALE 1305 case ETH_MQ_RX_RSS: 1306 if (sa->rss_support == EFX_RX_SCALE_UNAVAILABLE) { 1307 sfc_err(sa, "RSS is not available"); 1308 rc = EINVAL; 1309 } 1310 break; 1311 #endif 1312 default: 1313 sfc_err(sa, "Rx multi-queue mode %u not supported", 1314 rxmode->mq_mode); 1315 rc = EINVAL; 1316 } 1317 1318 if (offloads_rejected) { 1319 sfc_rx_log_offloads(sa, "device", "is unsupported", 1320 offloads_rejected); 1321 rc = EINVAL; 1322 } 1323 1324 if (~rxmode->offloads & DEV_RX_OFFLOAD_CRC_STRIP) { 1325 sfc_warn(sa, "FCS stripping cannot be disabled - always on"); 1326 rxmode->offloads |= DEV_RX_OFFLOAD_CRC_STRIP; 1327 rxmode->hw_strip_crc = 1; 1328 } 1329 1330 return rc; 1331 } 1332 1333 /** 1334 * Destroy excess queues that are no longer needed after reconfiguration 1335 * or complete close. 1336 */ 1337 static void 1338 sfc_rx_fini_queues(struct sfc_adapter *sa, unsigned int nb_rx_queues) 1339 { 1340 int sw_index; 1341 1342 SFC_ASSERT(nb_rx_queues <= sa->rxq_count); 1343 1344 sw_index = sa->rxq_count; 1345 while (--sw_index >= (int)nb_rx_queues) { 1346 if (sa->rxq_info[sw_index].rxq != NULL) 1347 sfc_rx_qfini(sa, sw_index); 1348 } 1349 1350 sa->rxq_count = nb_rx_queues; 1351 } 1352 1353 /** 1354 * Initialize Rx subsystem. 1355 * 1356 * Called at device (re)configuration stage when number of receive queues is 1357 * specified together with other device level receive configuration. 1358 * 1359 * It should be used to allocate NUMA-unaware resources. 1360 */ 1361 int 1362 sfc_rx_configure(struct sfc_adapter *sa) 1363 { 1364 struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; 1365 const unsigned int nb_rx_queues = sa->eth_dev->data->nb_rx_queues; 1366 int rc; 1367 1368 sfc_log_init(sa, "nb_rx_queues=%u (old %u)", 1369 nb_rx_queues, sa->rxq_count); 1370 1371 rc = sfc_rx_check_mode(sa, &dev_conf->rxmode); 1372 if (rc != 0) 1373 goto fail_check_mode; 1374 1375 if (nb_rx_queues == sa->rxq_count) 1376 goto done; 1377 1378 if (sa->rxq_info == NULL) { 1379 rc = ENOMEM; 1380 sa->rxq_info = rte_calloc_socket("sfc-rxqs", nb_rx_queues, 1381 sizeof(sa->rxq_info[0]), 0, 1382 sa->socket_id); 1383 if (sa->rxq_info == NULL) 1384 goto fail_rxqs_alloc; 1385 } else { 1386 struct sfc_rxq_info *new_rxq_info; 1387 1388 if (nb_rx_queues < sa->rxq_count) 1389 sfc_rx_fini_queues(sa, nb_rx_queues); 1390 1391 rc = ENOMEM; 1392 new_rxq_info = 1393 rte_realloc(sa->rxq_info, 1394 nb_rx_queues * sizeof(sa->rxq_info[0]), 0); 1395 if (new_rxq_info == NULL && nb_rx_queues > 0) 1396 goto fail_rxqs_realloc; 1397 1398 sa->rxq_info = new_rxq_info; 1399 if (nb_rx_queues > sa->rxq_count) 1400 memset(&sa->rxq_info[sa->rxq_count], 0, 1401 (nb_rx_queues - sa->rxq_count) * 1402 sizeof(sa->rxq_info[0])); 1403 } 1404 1405 while (sa->rxq_count < nb_rx_queues) { 1406 rc = sfc_rx_qinit_info(sa, sa->rxq_count); 1407 if (rc != 0) 1408 goto fail_rx_qinit_info; 1409 1410 sa->rxq_count++; 1411 } 1412 1413 #if EFSYS_OPT_RX_SCALE 1414 sa->rss_channels = (dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ? 1415 MIN(sa->rxq_count, EFX_MAXRSS) : 0; 1416 1417 if (sa->rss_channels > 0) { 1418 unsigned int sw_index; 1419 1420 for (sw_index = 0; sw_index < EFX_RSS_TBL_SIZE; ++sw_index) 1421 sa->rss_tbl[sw_index] = sw_index % sa->rss_channels; 1422 } 1423 #endif 1424 1425 done: 1426 return 0; 1427 1428 fail_rx_qinit_info: 1429 fail_rxqs_realloc: 1430 fail_rxqs_alloc: 1431 sfc_rx_close(sa); 1432 1433 fail_check_mode: 1434 sfc_log_init(sa, "failed %d", rc); 1435 return rc; 1436 } 1437 1438 /** 1439 * Shutdown Rx subsystem. 1440 * 1441 * Called at device close stage, for example, before device shutdown. 1442 */ 1443 void 1444 sfc_rx_close(struct sfc_adapter *sa) 1445 { 1446 sfc_rx_fini_queues(sa, 0); 1447 1448 sa->rss_channels = 0; 1449 1450 rte_free(sa->rxq_info); 1451 sa->rxq_info = NULL; 1452 } 1453