1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright (c) 2016-2018 Solarflare Communications Inc. 4 * All rights reserved. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 /* EF10 native datapath implementation */ 11 12 #include <stdbool.h> 13 14 #include <rte_byteorder.h> 15 #include <rte_mbuf_ptype.h> 16 #include <rte_mbuf.h> 17 #include <rte_io.h> 18 19 #include "efx.h" 20 #include "efx_types.h" 21 #include "efx_regs.h" 22 #include "efx_regs_ef10.h" 23 24 #include "sfc_tweak.h" 25 #include "sfc_dp_rx.h" 26 #include "sfc_kvargs.h" 27 #include "sfc_ef10.h" 28 29 #define sfc_ef10_rx_err(dpq, ...) \ 30 SFC_DP_LOG(SFC_KVARG_DATAPATH_EF10, ERR, dpq, __VA_ARGS__) 31 32 /** 33 * Alignment requirement for value written to RX WPTR: 34 * the WPTR must be aligned to an 8 descriptor boundary. 35 */ 36 #define SFC_EF10_RX_WPTR_ALIGN 8 37 38 /** 39 * Maximum number of descriptors/buffers in the Rx ring. 40 * It should guarantee that corresponding event queue never overfill. 41 * EF10 native datapath uses event queue of the same size as Rx queue. 42 * Maximum number of events on datapath can be estimated as number of 43 * Rx queue entries (one event per Rx buffer in the worst case) plus 44 * Rx error and flush events. 45 */ 46 #define SFC_EF10_RXQ_LIMIT(_ndesc) \ 47 ((_ndesc) - 1 /* head must not step on tail */ - \ 48 (SFC_EF10_EV_PER_CACHE_LINE - 1) /* max unused EvQ entries */ - \ 49 1 /* Rx error */ - 1 /* flush */) 50 51 struct sfc_ef10_rx_sw_desc { 52 struct rte_mbuf *mbuf; 53 }; 54 55 struct sfc_ef10_rxq { 56 /* Used on data path */ 57 unsigned int flags; 58 #define SFC_EF10_RXQ_STARTED 0x1 59 #define SFC_EF10_RXQ_NOT_RUNNING 0x2 60 #define SFC_EF10_RXQ_EXCEPTION 0x4 61 #define SFC_EF10_RXQ_RSS_HASH 0x8 62 unsigned int ptr_mask; 63 unsigned int prepared; 64 unsigned int completed; 65 unsigned int evq_read_ptr; 66 efx_qword_t *evq_hw_ring; 67 struct sfc_ef10_rx_sw_desc *sw_ring; 68 uint64_t rearm_data; 69 uint16_t prefix_size; 70 71 /* Used on refill */ 72 uint16_t buf_size; 73 unsigned int added; 74 unsigned int max_fill_level; 75 unsigned int refill_threshold; 76 struct rte_mempool *refill_mb_pool; 77 efx_qword_t *rxq_hw_ring; 78 volatile void *doorbell; 79 80 /* Datapath receive queue anchor */ 81 struct sfc_dp_rxq dp; 82 }; 83 84 static inline struct sfc_ef10_rxq * 85 sfc_ef10_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq) 86 { 87 return container_of(dp_rxq, struct sfc_ef10_rxq, dp); 88 } 89 90 static void 91 sfc_ef10_rx_qpush(struct sfc_ef10_rxq *rxq) 92 { 93 efx_dword_t dword; 94 95 /* Hardware has alignment restriction for WPTR */ 96 RTE_BUILD_BUG_ON(SFC_RX_REFILL_BULK % SFC_EF10_RX_WPTR_ALIGN != 0); 97 SFC_ASSERT(RTE_ALIGN(rxq->added, SFC_EF10_RX_WPTR_ALIGN) == rxq->added); 98 99 EFX_POPULATE_DWORD_1(dword, ERF_DZ_RX_DESC_WPTR, 100 rxq->added & rxq->ptr_mask); 101 102 /* DMA sync to device is not required */ 103 104 /* 105 * rte_write32() has rte_io_wmb() which guarantees that the STORE 106 * operations (i.e. Rx and event descriptor updates) that precede 107 * the rte_io_wmb() call are visible to NIC before the STORE 108 * operations that follow it (i.e. doorbell write). 109 */ 110 rte_write32(dword.ed_u32[0], rxq->doorbell); 111 } 112 113 static void 114 sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq) 115 { 116 const unsigned int ptr_mask = rxq->ptr_mask; 117 const uint32_t buf_size = rxq->buf_size; 118 unsigned int free_space; 119 unsigned int bulks; 120 void *objs[SFC_RX_REFILL_BULK]; 121 unsigned int added = rxq->added; 122 123 free_space = rxq->max_fill_level - (added - rxq->completed); 124 125 if (free_space < rxq->refill_threshold) 126 return; 127 128 bulks = free_space / RTE_DIM(objs); 129 /* refill_threshold guarantees that bulks is positive */ 130 SFC_ASSERT(bulks > 0); 131 132 do { 133 unsigned int id; 134 unsigned int i; 135 136 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs, 137 RTE_DIM(objs)) < 0)) { 138 struct rte_eth_dev_data *dev_data = 139 rte_eth_devices[rxq->dp.dpq.port_id].data; 140 141 /* 142 * It is hardly a safe way to increment counter 143 * from different contexts, but all PMDs do it. 144 */ 145 dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs); 146 /* Return if we have posted nothing yet */ 147 if (added == rxq->added) 148 return; 149 /* Push posted */ 150 break; 151 } 152 153 for (i = 0, id = added & ptr_mask; 154 i < RTE_DIM(objs); 155 ++i, ++id) { 156 struct rte_mbuf *m = objs[i]; 157 struct sfc_ef10_rx_sw_desc *rxd; 158 rte_iova_t phys_addr; 159 160 SFC_ASSERT((id & ~ptr_mask) == 0); 161 rxd = &rxq->sw_ring[id]; 162 rxd->mbuf = m; 163 164 /* 165 * Avoid writing to mbuf. It is cheaper to do it 166 * when we receive packet and fill in nearby 167 * structure members. 168 */ 169 170 phys_addr = rte_mbuf_data_iova_default(m); 171 EFX_POPULATE_QWORD_2(rxq->rxq_hw_ring[id], 172 ESF_DZ_RX_KER_BYTE_CNT, buf_size, 173 ESF_DZ_RX_KER_BUF_ADDR, phys_addr); 174 } 175 176 added += RTE_DIM(objs); 177 } while (--bulks > 0); 178 179 SFC_ASSERT(rxq->added != added); 180 rxq->added = added; 181 sfc_ef10_rx_qpush(rxq); 182 } 183 184 static void 185 sfc_ef10_rx_prefetch_next(struct sfc_ef10_rxq *rxq, unsigned int next_id) 186 { 187 struct rte_mbuf *next_mbuf; 188 189 /* Prefetch next bunch of software descriptors */ 190 if ((next_id % (RTE_CACHE_LINE_SIZE / sizeof(rxq->sw_ring[0]))) == 0) 191 rte_prefetch0(&rxq->sw_ring[next_id]); 192 193 /* 194 * It looks strange to prefetch depending on previous prefetch 195 * data, but measurements show that it is really efficient and 196 * increases packet rate. 197 */ 198 next_mbuf = rxq->sw_ring[next_id].mbuf; 199 if (likely(next_mbuf != NULL)) { 200 /* Prefetch the next mbuf structure */ 201 rte_mbuf_prefetch_part1(next_mbuf); 202 203 /* Prefetch pseudo header of the next packet */ 204 /* data_off is not filled in yet */ 205 /* Yes, data could be not ready yet, but we hope */ 206 rte_prefetch0((uint8_t *)next_mbuf->buf_addr + 207 RTE_PKTMBUF_HEADROOM); 208 } 209 } 210 211 static uint16_t 212 sfc_ef10_rx_prepared(struct sfc_ef10_rxq *rxq, struct rte_mbuf **rx_pkts, 213 uint16_t nb_pkts) 214 { 215 uint16_t n_rx_pkts = RTE_MIN(nb_pkts, rxq->prepared); 216 unsigned int completed = rxq->completed; 217 unsigned int i; 218 219 rxq->prepared -= n_rx_pkts; 220 rxq->completed = completed + n_rx_pkts; 221 222 for (i = 0; i < n_rx_pkts; ++i, ++completed) 223 rx_pkts[i] = rxq->sw_ring[completed & rxq->ptr_mask].mbuf; 224 225 return n_rx_pkts; 226 } 227 228 static void 229 sfc_ef10_rx_ev_to_offloads(struct sfc_ef10_rxq *rxq, const efx_qword_t rx_ev, 230 struct rte_mbuf *m) 231 { 232 uint32_t tun_ptype = 0; 233 /* Which event bit is mapped to PKT_RX_IP_CKSUM_* */ 234 int8_t ip_csum_err_bit; 235 /* Which event bit is mapped to PKT_RX_L4_CKSUM_* */ 236 int8_t l4_csum_err_bit; 237 uint32_t l2_ptype = 0; 238 uint32_t l3_ptype = 0; 239 uint32_t l4_ptype = 0; 240 uint64_t ol_flags = 0; 241 242 if (unlikely(EFX_TEST_QWORD_BIT(rx_ev, ESF_DZ_RX_PARSE_INCOMPLETE_LBN))) 243 goto done; 244 245 switch (EFX_QWORD_FIELD(rx_ev, ESF_EZ_RX_ENCAP_HDR)) { 246 default: 247 /* Unexpected encapsulation tag class */ 248 SFC_ASSERT(false); 249 /* FALLTHROUGH */ 250 case ESE_EZ_ENCAP_HDR_NONE: 251 break; 252 case ESE_EZ_ENCAP_HDR_VXLAN: 253 /* 254 * It is definitely UDP, but we have no information 255 * about IPv4 vs IPv6 and VLAN tagging. 256 */ 257 tun_ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 258 break; 259 case ESE_EZ_ENCAP_HDR_GRE: 260 /* 261 * We have no information about IPv4 vs IPv6 and VLAN tagging. 262 */ 263 tun_ptype = RTE_PTYPE_TUNNEL_NVGRE; 264 break; 265 } 266 267 if (tun_ptype == 0) { 268 ip_csum_err_bit = ESF_DZ_RX_IPCKSUM_ERR_LBN; 269 l4_csum_err_bit = ESF_DZ_RX_TCPUDP_CKSUM_ERR_LBN; 270 } else { 271 ip_csum_err_bit = ESF_EZ_RX_IP_INNER_CHKSUM_ERR_LBN; 272 l4_csum_err_bit = ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_LBN; 273 if (unlikely(EFX_TEST_QWORD_BIT(rx_ev, 274 ESF_DZ_RX_IPCKSUM_ERR_LBN))) 275 ol_flags |= PKT_RX_EIP_CKSUM_BAD; 276 } 277 278 switch (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_ETH_TAG_CLASS)) { 279 case ESE_DZ_ETH_TAG_CLASS_NONE: 280 l2_ptype = (tun_ptype == 0) ? RTE_PTYPE_L2_ETHER : 281 RTE_PTYPE_INNER_L2_ETHER; 282 break; 283 case ESE_DZ_ETH_TAG_CLASS_VLAN1: 284 l2_ptype = (tun_ptype == 0) ? RTE_PTYPE_L2_ETHER_VLAN : 285 RTE_PTYPE_INNER_L2_ETHER_VLAN; 286 break; 287 case ESE_DZ_ETH_TAG_CLASS_VLAN2: 288 l2_ptype = (tun_ptype == 0) ? RTE_PTYPE_L2_ETHER_QINQ : 289 RTE_PTYPE_INNER_L2_ETHER_QINQ; 290 break; 291 default: 292 /* Unexpected Eth tag class */ 293 SFC_ASSERT(false); 294 } 295 296 switch (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_L3_CLASS)) { 297 case ESE_DZ_L3_CLASS_IP4_FRAG: 298 l4_ptype = (tun_ptype == 0) ? RTE_PTYPE_L4_FRAG : 299 RTE_PTYPE_INNER_L4_FRAG; 300 /* FALLTHROUGH */ 301 case ESE_DZ_L3_CLASS_IP4: 302 l3_ptype = (tun_ptype == 0) ? RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 303 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN; 304 ol_flags |= PKT_RX_RSS_HASH | 305 ((EFX_TEST_QWORD_BIT(rx_ev, ip_csum_err_bit)) ? 306 PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD); 307 break; 308 case ESE_DZ_L3_CLASS_IP6_FRAG: 309 l4_ptype = (tun_ptype == 0) ? RTE_PTYPE_L4_FRAG : 310 RTE_PTYPE_INNER_L4_FRAG; 311 /* FALLTHROUGH */ 312 case ESE_DZ_L3_CLASS_IP6: 313 l3_ptype = (tun_ptype == 0) ? RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 314 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN; 315 ol_flags |= PKT_RX_RSS_HASH; 316 break; 317 case ESE_DZ_L3_CLASS_ARP: 318 /* Override Layer 2 packet type */ 319 /* There is no ARP classification for inner packets */ 320 if (tun_ptype == 0) 321 l2_ptype = RTE_PTYPE_L2_ETHER_ARP; 322 break; 323 default: 324 /* Unexpected Layer 3 class */ 325 SFC_ASSERT(false); 326 } 327 328 switch (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_L4_CLASS)) { 329 case ESE_DZ_L4_CLASS_TCP: 330 l4_ptype = (tun_ptype == 0) ? RTE_PTYPE_L4_TCP : 331 RTE_PTYPE_INNER_L4_TCP; 332 ol_flags |= 333 (EFX_TEST_QWORD_BIT(rx_ev, l4_csum_err_bit)) ? 334 PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD; 335 break; 336 case ESE_DZ_L4_CLASS_UDP: 337 l4_ptype = (tun_ptype == 0) ? RTE_PTYPE_L4_UDP : 338 RTE_PTYPE_INNER_L4_UDP; 339 ol_flags |= 340 (EFX_TEST_QWORD_BIT(rx_ev, l4_csum_err_bit)) ? 341 PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD; 342 break; 343 case ESE_DZ_L4_CLASS_UNKNOWN: 344 break; 345 default: 346 /* Unexpected Layer 4 class */ 347 SFC_ASSERT(false); 348 } 349 350 /* Remove RSS hash offload flag if RSS is not enabled */ 351 if (~rxq->flags & SFC_EF10_RXQ_RSS_HASH) 352 ol_flags &= ~PKT_RX_RSS_HASH; 353 354 done: 355 m->ol_flags = ol_flags; 356 m->packet_type = tun_ptype | l2_ptype | l3_ptype | l4_ptype; 357 } 358 359 static uint16_t 360 sfc_ef10_rx_pseudo_hdr_get_len(const uint8_t *pseudo_hdr) 361 { 362 return rte_le_to_cpu_16(*(const uint16_t *)&pseudo_hdr[8]); 363 } 364 365 static uint32_t 366 sfc_ef10_rx_pseudo_hdr_get_hash(const uint8_t *pseudo_hdr) 367 { 368 return rte_le_to_cpu_32(*(const uint32_t *)pseudo_hdr); 369 } 370 371 static uint16_t 372 sfc_ef10_rx_process_event(struct sfc_ef10_rxq *rxq, efx_qword_t rx_ev, 373 struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 374 { 375 const unsigned int ptr_mask = rxq->ptr_mask; 376 unsigned int completed = rxq->completed; 377 unsigned int ready; 378 struct sfc_ef10_rx_sw_desc *rxd; 379 struct rte_mbuf *m; 380 struct rte_mbuf *m0; 381 uint16_t n_rx_pkts; 382 const uint8_t *pseudo_hdr; 383 uint16_t pkt_len; 384 385 ready = (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_DSC_PTR_LBITS) - completed) & 386 EFX_MASK32(ESF_DZ_RX_DSC_PTR_LBITS); 387 SFC_ASSERT(ready > 0); 388 389 if (rx_ev.eq_u64[0] & 390 rte_cpu_to_le_64((1ull << ESF_DZ_RX_ECC_ERR_LBN) | 391 (1ull << ESF_DZ_RX_ECRC_ERR_LBN))) { 392 SFC_ASSERT(rxq->prepared == 0); 393 rxq->completed += ready; 394 while (ready-- > 0) { 395 rxd = &rxq->sw_ring[completed++ & ptr_mask]; 396 rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf); 397 } 398 return 0; 399 } 400 401 n_rx_pkts = RTE_MIN(ready, nb_pkts); 402 rxq->prepared = ready - n_rx_pkts; 403 rxq->completed += n_rx_pkts; 404 405 rxd = &rxq->sw_ring[completed++ & ptr_mask]; 406 407 sfc_ef10_rx_prefetch_next(rxq, completed & ptr_mask); 408 409 m = rxd->mbuf; 410 411 *rx_pkts++ = m; 412 413 RTE_BUILD_BUG_ON(sizeof(m->rearm_data[0]) != sizeof(rxq->rearm_data)); 414 m->rearm_data[0] = rxq->rearm_data; 415 416 /* Classify packet based on Rx event */ 417 sfc_ef10_rx_ev_to_offloads(rxq, rx_ev, m); 418 419 /* data_off already moved past pseudo header */ 420 pseudo_hdr = (uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM; 421 422 /* 423 * Always get RSS hash from pseudo header to avoid 424 * condition/branching. If it is valid or not depends on 425 * PKT_RX_RSS_HASH in m->ol_flags. 426 */ 427 m->hash.rss = sfc_ef10_rx_pseudo_hdr_get_hash(pseudo_hdr); 428 429 if (ready == 1) 430 pkt_len = EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_BYTES) - 431 rxq->prefix_size; 432 else 433 pkt_len = sfc_ef10_rx_pseudo_hdr_get_len(pseudo_hdr); 434 SFC_ASSERT(pkt_len > 0); 435 rte_pktmbuf_data_len(m) = pkt_len; 436 rte_pktmbuf_pkt_len(m) = pkt_len; 437 438 SFC_ASSERT(m->next == NULL); 439 440 /* Remember mbuf to copy offload flags and packet type from */ 441 m0 = m; 442 for (--ready; ready > 0; --ready) { 443 rxd = &rxq->sw_ring[completed++ & ptr_mask]; 444 445 sfc_ef10_rx_prefetch_next(rxq, completed & ptr_mask); 446 447 m = rxd->mbuf; 448 449 if (ready > rxq->prepared) 450 *rx_pkts++ = m; 451 452 RTE_BUILD_BUG_ON(sizeof(m->rearm_data[0]) != 453 sizeof(rxq->rearm_data)); 454 m->rearm_data[0] = rxq->rearm_data; 455 456 /* Event-dependent information is the same */ 457 m->ol_flags = m0->ol_flags; 458 m->packet_type = m0->packet_type; 459 460 /* data_off already moved past pseudo header */ 461 pseudo_hdr = (uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM; 462 463 /* 464 * Always get RSS hash from pseudo header to avoid 465 * condition/branching. If it is valid or not depends on 466 * PKT_RX_RSS_HASH in m->ol_flags. 467 */ 468 m->hash.rss = sfc_ef10_rx_pseudo_hdr_get_hash(pseudo_hdr); 469 470 pkt_len = sfc_ef10_rx_pseudo_hdr_get_len(pseudo_hdr); 471 SFC_ASSERT(pkt_len > 0); 472 rte_pktmbuf_data_len(m) = pkt_len; 473 rte_pktmbuf_pkt_len(m) = pkt_len; 474 475 SFC_ASSERT(m->next == NULL); 476 } 477 478 return n_rx_pkts; 479 } 480 481 static bool 482 sfc_ef10_rx_get_event(struct sfc_ef10_rxq *rxq, efx_qword_t *rx_ev) 483 { 484 *rx_ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask]; 485 486 if (!sfc_ef10_ev_present(*rx_ev)) 487 return false; 488 489 if (unlikely(EFX_QWORD_FIELD(*rx_ev, FSF_AZ_EV_CODE) != 490 FSE_AZ_EV_CODE_RX_EV)) { 491 /* 492 * Do not move read_ptr to keep the event for exception 493 * handling by the control path. 494 */ 495 rxq->flags |= SFC_EF10_RXQ_EXCEPTION; 496 sfc_ef10_rx_err(&rxq->dp.dpq, 497 "RxQ exception at EvQ read ptr %#x", 498 rxq->evq_read_ptr); 499 return false; 500 } 501 502 rxq->evq_read_ptr++; 503 return true; 504 } 505 506 static uint16_t 507 sfc_ef10_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 508 { 509 struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(rx_queue); 510 unsigned int evq_old_read_ptr; 511 uint16_t n_rx_pkts; 512 efx_qword_t rx_ev; 513 514 if (unlikely(rxq->flags & 515 (SFC_EF10_RXQ_NOT_RUNNING | SFC_EF10_RXQ_EXCEPTION))) 516 return 0; 517 518 n_rx_pkts = sfc_ef10_rx_prepared(rxq, rx_pkts, nb_pkts); 519 520 evq_old_read_ptr = rxq->evq_read_ptr; 521 while (n_rx_pkts != nb_pkts && sfc_ef10_rx_get_event(rxq, &rx_ev)) { 522 /* 523 * DROP_EVENT is an internal to the NIC, software should 524 * never see it and, therefore, may ignore it. 525 */ 526 527 n_rx_pkts += sfc_ef10_rx_process_event(rxq, rx_ev, 528 rx_pkts + n_rx_pkts, 529 nb_pkts - n_rx_pkts); 530 } 531 532 sfc_ef10_ev_qclear(rxq->evq_hw_ring, rxq->ptr_mask, evq_old_read_ptr, 533 rxq->evq_read_ptr); 534 535 /* It is not a problem if we refill in the case of exception */ 536 sfc_ef10_rx_qrefill(rxq); 537 538 return n_rx_pkts; 539 } 540 541 static const uint32_t * 542 sfc_ef10_supported_ptypes_get(uint32_t tunnel_encaps) 543 { 544 static const uint32_t ef10_native_ptypes[] = { 545 RTE_PTYPE_L2_ETHER, 546 RTE_PTYPE_L2_ETHER_ARP, 547 RTE_PTYPE_L2_ETHER_VLAN, 548 RTE_PTYPE_L2_ETHER_QINQ, 549 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 550 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 551 RTE_PTYPE_L4_FRAG, 552 RTE_PTYPE_L4_TCP, 553 RTE_PTYPE_L4_UDP, 554 RTE_PTYPE_UNKNOWN 555 }; 556 static const uint32_t ef10_overlay_ptypes[] = { 557 RTE_PTYPE_L2_ETHER, 558 RTE_PTYPE_L2_ETHER_ARP, 559 RTE_PTYPE_L2_ETHER_VLAN, 560 RTE_PTYPE_L2_ETHER_QINQ, 561 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 562 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 563 RTE_PTYPE_L4_FRAG, 564 RTE_PTYPE_L4_TCP, 565 RTE_PTYPE_L4_UDP, 566 RTE_PTYPE_TUNNEL_VXLAN, 567 RTE_PTYPE_TUNNEL_NVGRE, 568 RTE_PTYPE_INNER_L2_ETHER, 569 RTE_PTYPE_INNER_L2_ETHER_VLAN, 570 RTE_PTYPE_INNER_L2_ETHER_QINQ, 571 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 572 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 573 RTE_PTYPE_INNER_L4_FRAG, 574 RTE_PTYPE_INNER_L4_TCP, 575 RTE_PTYPE_INNER_L4_UDP, 576 RTE_PTYPE_UNKNOWN 577 }; 578 579 /* 580 * The function returns static set of supported packet types, 581 * so we can't build it dynamically based on supported tunnel 582 * encapsulations and should limit to known sets. 583 */ 584 switch (tunnel_encaps) { 585 case (1u << EFX_TUNNEL_PROTOCOL_VXLAN | 586 1u << EFX_TUNNEL_PROTOCOL_GENEVE | 587 1u << EFX_TUNNEL_PROTOCOL_NVGRE): 588 return ef10_overlay_ptypes; 589 default: 590 RTE_LOG(ERR, PMD, 591 "Unexpected set of supported tunnel encapsulations: %#x\n", 592 tunnel_encaps); 593 /* FALLTHROUGH */ 594 case 0: 595 return ef10_native_ptypes; 596 } 597 } 598 599 static sfc_dp_rx_qdesc_npending_t sfc_ef10_rx_qdesc_npending; 600 static unsigned int 601 sfc_ef10_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq) 602 { 603 /* 604 * Correct implementation requires EvQ polling and events 605 * processing (keeping all ready mbufs in prepared). 606 */ 607 return -ENOTSUP; 608 } 609 610 static sfc_dp_rx_qdesc_status_t sfc_ef10_rx_qdesc_status; 611 static int 612 sfc_ef10_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq, 613 __rte_unused uint16_t offset) 614 { 615 return -ENOTSUP; 616 } 617 618 619 static sfc_dp_rx_get_dev_info_t sfc_ef10_rx_get_dev_info; 620 static void 621 sfc_ef10_rx_get_dev_info(struct rte_eth_dev_info *dev_info) 622 { 623 /* 624 * Number of descriptors just defines maximum number of pushed 625 * descriptors (fill level). 626 */ 627 dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK; 628 dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK; 629 } 630 631 632 static sfc_dp_rx_qsize_up_rings_t sfc_ef10_rx_qsize_up_rings; 633 static int 634 sfc_ef10_rx_qsize_up_rings(uint16_t nb_rx_desc, 635 unsigned int *rxq_entries, 636 unsigned int *evq_entries, 637 unsigned int *rxq_max_fill_level) 638 { 639 /* 640 * rte_ethdev API guarantees that the number meets min, max and 641 * alignment requirements. 642 */ 643 if (nb_rx_desc <= EFX_RXQ_MINNDESCS) 644 *rxq_entries = EFX_RXQ_MINNDESCS; 645 else 646 *rxq_entries = rte_align32pow2(nb_rx_desc); 647 648 *evq_entries = *rxq_entries; 649 650 *rxq_max_fill_level = RTE_MIN(nb_rx_desc, 651 SFC_EF10_RXQ_LIMIT(*evq_entries)); 652 return 0; 653 } 654 655 656 static uint64_t 657 sfc_ef10_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size) 658 { 659 struct rte_mbuf m; 660 661 memset(&m, 0, sizeof(m)); 662 663 rte_mbuf_refcnt_set(&m, 1); 664 m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size; 665 m.nb_segs = 1; 666 m.port = port_id; 667 668 /* rearm_data covers structure members filled in above */ 669 rte_compiler_barrier(); 670 RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t)); 671 return m.rearm_data[0]; 672 } 673 674 static sfc_dp_rx_qcreate_t sfc_ef10_rx_qcreate; 675 static int 676 sfc_ef10_rx_qcreate(uint16_t port_id, uint16_t queue_id, 677 const struct rte_pci_addr *pci_addr, int socket_id, 678 const struct sfc_dp_rx_qcreate_info *info, 679 struct sfc_dp_rxq **dp_rxqp) 680 { 681 struct sfc_ef10_rxq *rxq; 682 int rc; 683 684 rc = EINVAL; 685 if (info->rxq_entries != info->evq_entries) 686 goto fail_rxq_args; 687 688 rc = ENOMEM; 689 rxq = rte_zmalloc_socket("sfc-ef10-rxq", sizeof(*rxq), 690 RTE_CACHE_LINE_SIZE, socket_id); 691 if (rxq == NULL) 692 goto fail_rxq_alloc; 693 694 sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr); 695 696 rc = ENOMEM; 697 rxq->sw_ring = rte_calloc_socket("sfc-ef10-rxq-sw_ring", 698 info->rxq_entries, 699 sizeof(*rxq->sw_ring), 700 RTE_CACHE_LINE_SIZE, socket_id); 701 if (rxq->sw_ring == NULL) 702 goto fail_desc_alloc; 703 704 rxq->flags |= SFC_EF10_RXQ_NOT_RUNNING; 705 if (info->flags & SFC_RXQ_FLAG_RSS_HASH) 706 rxq->flags |= SFC_EF10_RXQ_RSS_HASH; 707 rxq->ptr_mask = info->rxq_entries - 1; 708 rxq->evq_hw_ring = info->evq_hw_ring; 709 rxq->max_fill_level = info->max_fill_level; 710 rxq->refill_threshold = info->refill_threshold; 711 rxq->rearm_data = 712 sfc_ef10_mk_mbuf_rearm_data(port_id, info->prefix_size); 713 rxq->prefix_size = info->prefix_size; 714 rxq->buf_size = info->buf_size; 715 rxq->refill_mb_pool = info->refill_mb_pool; 716 rxq->rxq_hw_ring = info->rxq_hw_ring; 717 rxq->doorbell = (volatile uint8_t *)info->mem_bar + 718 ER_DZ_RX_DESC_UPD_REG_OFST + 719 info->hw_index * ER_DZ_RX_DESC_UPD_REG_STEP; 720 721 *dp_rxqp = &rxq->dp; 722 return 0; 723 724 fail_desc_alloc: 725 rte_free(rxq); 726 727 fail_rxq_alloc: 728 fail_rxq_args: 729 return rc; 730 } 731 732 static sfc_dp_rx_qdestroy_t sfc_ef10_rx_qdestroy; 733 static void 734 sfc_ef10_rx_qdestroy(struct sfc_dp_rxq *dp_rxq) 735 { 736 struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq); 737 738 rte_free(rxq->sw_ring); 739 rte_free(rxq); 740 } 741 742 static sfc_dp_rx_qstart_t sfc_ef10_rx_qstart; 743 static int 744 sfc_ef10_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr) 745 { 746 struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq); 747 748 rxq->prepared = 0; 749 rxq->completed = rxq->added = 0; 750 751 sfc_ef10_rx_qrefill(rxq); 752 753 rxq->evq_read_ptr = evq_read_ptr; 754 755 rxq->flags |= SFC_EF10_RXQ_STARTED; 756 rxq->flags &= ~(SFC_EF10_RXQ_NOT_RUNNING | SFC_EF10_RXQ_EXCEPTION); 757 758 return 0; 759 } 760 761 static sfc_dp_rx_qstop_t sfc_ef10_rx_qstop; 762 static void 763 sfc_ef10_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr) 764 { 765 struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq); 766 767 rxq->flags |= SFC_EF10_RXQ_NOT_RUNNING; 768 769 *evq_read_ptr = rxq->evq_read_ptr; 770 } 771 772 static sfc_dp_rx_qrx_ev_t sfc_ef10_rx_qrx_ev; 773 static bool 774 sfc_ef10_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id) 775 { 776 __rte_unused struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq); 777 778 SFC_ASSERT(rxq->flags & SFC_EF10_RXQ_NOT_RUNNING); 779 780 /* 781 * It is safe to ignore Rx event since we free all mbufs on 782 * queue purge anyway. 783 */ 784 785 return false; 786 } 787 788 static sfc_dp_rx_qpurge_t sfc_ef10_rx_qpurge; 789 static void 790 sfc_ef10_rx_qpurge(struct sfc_dp_rxq *dp_rxq) 791 { 792 struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq); 793 unsigned int i; 794 struct sfc_ef10_rx_sw_desc *rxd; 795 796 for (i = rxq->completed; i != rxq->added; ++i) { 797 rxd = &rxq->sw_ring[i & rxq->ptr_mask]; 798 rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf); 799 rxd->mbuf = NULL; 800 } 801 802 rxq->flags &= ~SFC_EF10_RXQ_STARTED; 803 } 804 805 struct sfc_dp_rx sfc_ef10_rx = { 806 .dp = { 807 .name = SFC_KVARG_DATAPATH_EF10, 808 .type = SFC_DP_RX, 809 .hw_fw_caps = SFC_DP_HW_FW_CAP_EF10, 810 }, 811 .features = SFC_DP_RX_FEAT_MULTI_PROCESS | 812 SFC_DP_RX_FEAT_TUNNELS, 813 .get_dev_info = sfc_ef10_rx_get_dev_info, 814 .qsize_up_rings = sfc_ef10_rx_qsize_up_rings, 815 .qcreate = sfc_ef10_rx_qcreate, 816 .qdestroy = sfc_ef10_rx_qdestroy, 817 .qstart = sfc_ef10_rx_qstart, 818 .qstop = sfc_ef10_rx_qstop, 819 .qrx_ev = sfc_ef10_rx_qrx_ev, 820 .qpurge = sfc_ef10_rx_qpurge, 821 .supported_ptypes_get = sfc_ef10_supported_ptypes_get, 822 .qdesc_npending = sfc_ef10_rx_qdesc_npending, 823 .qdesc_status = sfc_ef10_rx_qdesc_status, 824 .pkt_burst = sfc_ef10_recv_pkts, 825 }; 826