1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2019-2021 Xilinx, Inc. 4 * Copyright(c) 2018-2019 Solarflare Communications Inc. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 /* EF100 native datapath implementation */ 11 12 #include <stdbool.h> 13 14 #include <rte_byteorder.h> 15 #include <rte_mbuf_ptype.h> 16 #include <rte_mbuf.h> 17 #include <rte_io.h> 18 19 #include "efx_types.h" 20 #include "efx_regs_ef100.h" 21 #include "efx.h" 22 23 #include "sfc_debug.h" 24 #include "sfc_tweak.h" 25 #include "sfc_dp_rx.h" 26 #include "sfc_kvargs.h" 27 #include "sfc_ef100.h" 28 29 30 #define sfc_ef100_rx_err(_rxq, ...) \ 31 SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__) 32 33 #define sfc_ef100_rx_debug(_rxq, ...) \ 34 SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \ 35 __VA_ARGS__) 36 37 /** 38 * Maximum number of descriptors/buffers in the Rx ring. 39 * It should guarantee that corresponding event queue never overfill. 40 * EF10 native datapath uses event queue of the same size as Rx queue. 41 * Maximum number of events on datapath can be estimated as number of 42 * Rx queue entries (one event per Rx buffer in the worst case) plus 43 * Rx error and flush events. 44 */ 45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \ 46 ((_ndesc) - 1 /* head must not step on tail */ - \ 47 1 /* Rx error */ - 1 /* flush */) 48 49 struct sfc_ef100_rx_sw_desc { 50 struct rte_mbuf *mbuf; 51 }; 52 53 struct sfc_ef100_rxq { 54 /* Used on data path */ 55 unsigned int flags; 56 #define SFC_EF100_RXQ_STARTED 0x1 57 #define SFC_EF100_RXQ_NOT_RUNNING 0x2 58 #define SFC_EF100_RXQ_EXCEPTION 0x4 59 #define SFC_EF100_RXQ_RSS_HASH 0x10 60 #define SFC_EF100_RXQ_USER_MARK 0x20 61 #define SFC_EF100_RXQ_FLAG_INTR_EN 0x40 62 unsigned int ptr_mask; 63 unsigned int evq_phase_bit_shift; 64 unsigned int ready_pkts; 65 unsigned int completed; 66 unsigned int evq_read_ptr; 67 unsigned int evq_read_ptr_primed; 68 volatile efx_qword_t *evq_hw_ring; 69 struct sfc_ef100_rx_sw_desc *sw_ring; 70 uint64_t rearm_data; 71 uint16_t buf_size; 72 uint16_t prefix_size; 73 74 unsigned int evq_hw_index; 75 volatile void *evq_prime; 76 77 /* Used on refill */ 78 unsigned int added; 79 unsigned int max_fill_level; 80 unsigned int refill_threshold; 81 struct rte_mempool *refill_mb_pool; 82 efx_qword_t *rxq_hw_ring; 83 volatile void *doorbell; 84 85 /* Datapath receive queue anchor */ 86 struct sfc_dp_rxq dp; 87 }; 88 89 static inline struct sfc_ef100_rxq * 90 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq) 91 { 92 return container_of(dp_rxq, struct sfc_ef100_rxq, dp); 93 } 94 95 static void 96 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq) 97 { 98 sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index, 99 rxq->evq_read_ptr & rxq->ptr_mask); 100 rxq->evq_read_ptr_primed = rxq->evq_read_ptr; 101 } 102 103 static inline void 104 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added) 105 { 106 efx_dword_t dword; 107 108 EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask); 109 110 /* DMA sync to device is not required */ 111 112 /* 113 * rte_write32() has rte_io_wmb() which guarantees that the STORE 114 * operations (i.e. Rx and event descriptor updates) that precede 115 * the rte_io_wmb() call are visible to NIC before the STORE 116 * operations that follow it (i.e. doorbell write). 117 */ 118 rte_write32(dword.ed_u32[0], rxq->doorbell); 119 120 sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)", 121 EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX), 122 added); 123 } 124 125 static void 126 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq) 127 { 128 const unsigned int ptr_mask = rxq->ptr_mask; 129 unsigned int free_space; 130 unsigned int bulks; 131 void *objs[SFC_RX_REFILL_BULK]; 132 unsigned int added = rxq->added; 133 134 free_space = rxq->max_fill_level - (added - rxq->completed); 135 136 if (free_space < rxq->refill_threshold) 137 return; 138 139 bulks = free_space / RTE_DIM(objs); 140 /* refill_threshold guarantees that bulks is positive */ 141 SFC_ASSERT(bulks > 0); 142 143 do { 144 unsigned int id; 145 unsigned int i; 146 147 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs, 148 RTE_DIM(objs)) < 0)) { 149 struct rte_eth_dev_data *dev_data = 150 rte_eth_devices[rxq->dp.dpq.port_id].data; 151 152 /* 153 * It is hardly a safe way to increment counter 154 * from different contexts, but all PMDs do it. 155 */ 156 dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs); 157 /* Return if we have posted nothing yet */ 158 if (added == rxq->added) 159 return; 160 /* Push posted */ 161 break; 162 } 163 164 for (i = 0, id = added & ptr_mask; 165 i < RTE_DIM(objs); 166 ++i, ++id) { 167 struct rte_mbuf *m = objs[i]; 168 struct sfc_ef100_rx_sw_desc *rxd; 169 rte_iova_t phys_addr; 170 171 __rte_mbuf_raw_sanity_check(m); 172 173 SFC_ASSERT((id & ~ptr_mask) == 0); 174 rxd = &rxq->sw_ring[id]; 175 rxd->mbuf = m; 176 177 /* 178 * Avoid writing to mbuf. It is cheaper to do it 179 * when we receive packet and fill in nearby 180 * structure members. 181 */ 182 183 phys_addr = rte_mbuf_data_iova_default(m); 184 EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id], 185 ESF_GZ_RX_BUF_ADDR, phys_addr); 186 } 187 188 added += RTE_DIM(objs); 189 } while (--bulks > 0); 190 191 SFC_ASSERT(rxq->added != added); 192 rxq->added = added; 193 sfc_ef100_rx_qpush(rxq, added); 194 } 195 196 static inline uint64_t 197 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class) 198 { 199 return EFX_WORD_FIELD(class, 200 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) == 201 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ? 202 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD; 203 } 204 205 static inline uint64_t 206 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class) 207 { 208 return EFX_WORD_FIELD(class, 209 ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) == 210 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ? 211 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_GOOD; 212 } 213 214 static uint32_t 215 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags) 216 { 217 uint32_t ptype; 218 bool no_tunnel = false; 219 220 if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) != 221 ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN)) 222 return 0; 223 224 switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) { 225 case 0: 226 ptype = RTE_PTYPE_L2_ETHER; 227 break; 228 case 1: 229 ptype = RTE_PTYPE_L2_ETHER_VLAN; 230 break; 231 default: 232 ptype = RTE_PTYPE_L2_ETHER_QINQ; 233 break; 234 } 235 236 switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) { 237 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE: 238 no_tunnel = true; 239 break; 240 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN: 241 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 242 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class); 243 break; 244 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE: 245 ptype |= RTE_PTYPE_TUNNEL_NVGRE; 246 break; 247 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE: 248 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP; 249 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class); 250 break; 251 default: 252 /* 253 * Driver does not know the tunnel, but it is 254 * still a tunnel and NT_OR_INNER refer to inner 255 * frame. 256 */ 257 no_tunnel = false; 258 } 259 260 if (no_tunnel) { 261 bool l4_valid = true; 262 263 switch (EFX_WORD_FIELD(class, 264 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) { 265 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 266 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 267 *ol_flags |= PKT_RX_IP_CKSUM_GOOD; 268 break; 269 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 270 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 271 *ol_flags |= PKT_RX_IP_CKSUM_BAD; 272 break; 273 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 274 ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 275 break; 276 default: 277 l4_valid = false; 278 } 279 280 if (l4_valid) { 281 switch (EFX_WORD_FIELD(class, 282 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) { 283 case ESE_GZ_RH_HCLASS_L4_CLASS_TCP: 284 ptype |= RTE_PTYPE_L4_TCP; 285 *ol_flags |= 286 sfc_ef100_rx_nt_or_inner_l4_csum(class); 287 break; 288 case ESE_GZ_RH_HCLASS_L4_CLASS_UDP: 289 ptype |= RTE_PTYPE_L4_UDP; 290 *ol_flags |= 291 sfc_ef100_rx_nt_or_inner_l4_csum(class); 292 break; 293 case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG: 294 ptype |= RTE_PTYPE_L4_FRAG; 295 break; 296 } 297 } 298 } else { 299 bool l4_valid = true; 300 301 switch (EFX_WORD_FIELD(class, 302 ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) { 303 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 304 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 305 break; 306 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 307 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 308 *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD; 309 break; 310 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 311 ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 312 break; 313 } 314 315 switch (EFX_WORD_FIELD(class, 316 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) { 317 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 318 ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN; 319 *ol_flags |= PKT_RX_IP_CKSUM_GOOD; 320 break; 321 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 322 ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN; 323 *ol_flags |= PKT_RX_IP_CKSUM_BAD; 324 break; 325 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 326 ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN; 327 break; 328 default: 329 l4_valid = false; 330 break; 331 } 332 333 if (l4_valid) { 334 switch (EFX_WORD_FIELD(class, 335 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) { 336 case ESE_GZ_RH_HCLASS_L4_CLASS_TCP: 337 ptype |= RTE_PTYPE_INNER_L4_TCP; 338 *ol_flags |= 339 sfc_ef100_rx_nt_or_inner_l4_csum(class); 340 break; 341 case ESE_GZ_RH_HCLASS_L4_CLASS_UDP: 342 ptype |= RTE_PTYPE_INNER_L4_UDP; 343 *ol_flags |= 344 sfc_ef100_rx_nt_or_inner_l4_csum(class); 345 break; 346 case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG: 347 ptype |= RTE_PTYPE_INNER_L4_FRAG; 348 break; 349 } 350 } 351 } 352 353 return ptype; 354 } 355 356 /* 357 * Below function relies on the following fields in Rx prefix. 358 * Some fields are mandatory, some fields are optional. 359 * See sfc_ef100_rx_qstart() below. 360 */ 361 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = { 362 .erpl_fields = { 363 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \ 364 EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian) 365 366 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE), 367 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE), 368 SFC_EF100_RX_PREFIX_FIELD(USER_FLAG, B_FALSE), 369 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE), 370 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE), 371 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE), 372 373 #undef SFC_EF100_RX_PREFIX_FIELD 374 } 375 }; 376 377 static bool 378 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq, 379 const efx_oword_t *rx_prefix, 380 struct rte_mbuf *m) 381 { 382 const efx_word_t *class; 383 uint64_t ol_flags = 0; 384 385 RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0); 386 RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0); 387 RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT != 388 sizeof(*class)); 389 class = (const efx_word_t *)((const uint8_t *)rx_prefix + 390 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT); 391 if (unlikely(EFX_WORD_FIELD(*class, 392 ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) != 393 ESE_GZ_RH_HCLASS_L2_STATUS_OK)) 394 return false; 395 396 m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags); 397 398 if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) && 399 EFX_TEST_OWORD_BIT(rx_prefix[0], 400 ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) { 401 ol_flags |= PKT_RX_RSS_HASH; 402 /* EFX_OWORD_FIELD converts little-endian to CPU */ 403 m->hash.rss = EFX_OWORD_FIELD(rx_prefix[0], 404 ESF_GZ_RX_PREFIX_RSS_HASH); 405 } 406 407 if ((rxq->flags & SFC_EF100_RXQ_USER_MARK) && 408 EFX_TEST_OWORD_BIT(rx_prefix[0], ESF_GZ_RX_PREFIX_USER_FLAG_LBN)) { 409 ol_flags |= PKT_RX_FDIR_ID; 410 /* EFX_OWORD_FIELD converts little-endian to CPU */ 411 m->hash.fdir.hi = EFX_OWORD_FIELD(rx_prefix[0], 412 ESF_GZ_RX_PREFIX_USER_MARK); 413 } 414 415 m->ol_flags = ol_flags; 416 return true; 417 } 418 419 static const uint8_t * 420 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m) 421 { 422 return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM; 423 } 424 425 static struct rte_mbuf * 426 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq) 427 { 428 struct rte_mbuf *m; 429 unsigned int id; 430 431 /* mbuf associated with current Rx descriptor */ 432 m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf; 433 434 /* completed is already moved to the next one */ 435 if (unlikely(rxq->completed == rxq->added)) 436 goto done; 437 438 /* 439 * Prefetch Rx prefix of the next packet. 440 * Current packet is scattered and the next mbuf is its fragment 441 * it simply prefetches some data - no harm since packet rate 442 * should not be high if scatter is used. 443 */ 444 id = rxq->completed & rxq->ptr_mask; 445 rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf)); 446 447 if (unlikely(rxq->completed + 1 == rxq->added)) 448 goto done; 449 450 /* 451 * Prefetch mbuf control structure of the next after next Rx 452 * descriptor. 453 */ 454 id = (id == rxq->ptr_mask) ? 0 : (id + 1); 455 rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf); 456 457 /* 458 * If the next time we'll need SW Rx descriptor from the next 459 * cache line, try to make sure that we have it in cache. 460 */ 461 if ((id & 0x7) == 0x7) 462 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]); 463 464 done: 465 return m; 466 } 467 468 static struct rte_mbuf ** 469 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq, 470 struct rte_mbuf **rx_pkts, 471 struct rte_mbuf ** const rx_pkts_end) 472 { 473 while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) { 474 struct rte_mbuf *pkt; 475 struct rte_mbuf *lastseg; 476 const efx_oword_t *rx_prefix; 477 uint16_t pkt_len; 478 uint16_t seg_len; 479 bool deliver; 480 481 rxq->ready_pkts--; 482 483 pkt = sfc_ef100_rx_next_mbuf(rxq); 484 __rte_mbuf_raw_sanity_check(pkt); 485 486 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) != 487 sizeof(rxq->rearm_data)); 488 pkt->rearm_data[0] = rxq->rearm_data; 489 490 /* data_off already moved past Rx prefix */ 491 rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt); 492 493 pkt_len = EFX_OWORD_FIELD(rx_prefix[0], 494 ESF_GZ_RX_PREFIX_LENGTH); 495 SFC_ASSERT(pkt_len > 0); 496 rte_pktmbuf_pkt_len(pkt) = pkt_len; 497 498 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size); 499 rte_pktmbuf_data_len(pkt) = seg_len; 500 501 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt); 502 503 lastseg = pkt; 504 while ((pkt_len -= seg_len) > 0) { 505 struct rte_mbuf *seg; 506 507 seg = sfc_ef100_rx_next_mbuf(rxq); 508 __rte_mbuf_raw_sanity_check(seg); 509 510 seg->data_off = RTE_PKTMBUF_HEADROOM; 511 512 seg_len = RTE_MIN(pkt_len, rxq->buf_size); 513 rte_pktmbuf_data_len(seg) = seg_len; 514 rte_pktmbuf_pkt_len(seg) = seg_len; 515 516 pkt->nb_segs++; 517 lastseg->next = seg; 518 lastseg = seg; 519 } 520 521 if (likely(deliver)) 522 *rx_pkts++ = pkt; 523 else 524 rte_pktmbuf_free(pkt); 525 } 526 527 return rx_pkts; 528 } 529 530 static bool 531 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev) 532 { 533 *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask]; 534 535 if (!sfc_ef100_ev_present(ev, 536 (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1)) 537 return false; 538 539 if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) { 540 /* 541 * Do not move read_ptr to keep the event for exception 542 * handling by the control path. 543 */ 544 rxq->flags |= SFC_EF100_RXQ_EXCEPTION; 545 sfc_ef100_rx_err(rxq, 546 "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x", 547 rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask, 548 EFX_QWORD_FIELD(*ev, EFX_DWORD_1), 549 EFX_QWORD_FIELD(*ev, EFX_DWORD_0)); 550 return false; 551 } 552 553 sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)", 554 EFX_QWORD_FIELD(*ev, EFX_DWORD_1), 555 EFX_QWORD_FIELD(*ev, EFX_DWORD_0), 556 rxq->evq_read_ptr, 557 rxq->evq_read_ptr & rxq->ptr_mask); 558 559 rxq->evq_read_ptr++; 560 return true; 561 } 562 563 static uint16_t 564 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 565 { 566 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue); 567 struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts]; 568 efx_qword_t rx_ev; 569 570 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end); 571 572 if (unlikely(rxq->flags & 573 (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION))) 574 goto done; 575 576 while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) { 577 rxq->ready_pkts = 578 EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT); 579 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, 580 rx_pkts_end); 581 } 582 583 /* It is not a problem if we refill in the case of exception */ 584 sfc_ef100_rx_qrefill(rxq); 585 586 if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) && 587 rxq->evq_read_ptr_primed != rxq->evq_read_ptr) 588 sfc_ef100_rx_qprime(rxq); 589 590 done: 591 return nb_pkts - (rx_pkts_end - rx_pkts); 592 } 593 594 static const uint32_t * 595 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps) 596 { 597 static const uint32_t ef100_native_ptypes[] = { 598 RTE_PTYPE_L2_ETHER, 599 RTE_PTYPE_L2_ETHER_VLAN, 600 RTE_PTYPE_L2_ETHER_QINQ, 601 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 602 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 603 RTE_PTYPE_L4_TCP, 604 RTE_PTYPE_L4_UDP, 605 RTE_PTYPE_L4_FRAG, 606 RTE_PTYPE_TUNNEL_VXLAN, 607 RTE_PTYPE_TUNNEL_NVGRE, 608 RTE_PTYPE_TUNNEL_GENEVE, 609 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 610 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 611 RTE_PTYPE_INNER_L4_TCP, 612 RTE_PTYPE_INNER_L4_UDP, 613 RTE_PTYPE_INNER_L4_FRAG, 614 RTE_PTYPE_UNKNOWN 615 }; 616 617 return ef100_native_ptypes; 618 } 619 620 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending; 621 static unsigned int 622 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq) 623 { 624 return 0; 625 } 626 627 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status; 628 static int 629 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq, 630 __rte_unused uint16_t offset) 631 { 632 return -ENOTSUP; 633 } 634 635 636 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info; 637 static void 638 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info) 639 { 640 /* 641 * Number of descriptors just defines maximum number of pushed 642 * descriptors (fill level). 643 */ 644 dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK; 645 dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK; 646 } 647 648 649 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings; 650 static int 651 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc, 652 struct sfc_dp_rx_hw_limits *limits, 653 __rte_unused struct rte_mempool *mb_pool, 654 unsigned int *rxq_entries, 655 unsigned int *evq_entries, 656 unsigned int *rxq_max_fill_level) 657 { 658 /* 659 * rte_ethdev API guarantees that the number meets min, max and 660 * alignment requirements. 661 */ 662 if (nb_rx_desc <= limits->rxq_min_entries) 663 *rxq_entries = limits->rxq_min_entries; 664 else 665 *rxq_entries = rte_align32pow2(nb_rx_desc); 666 667 *evq_entries = *rxq_entries; 668 669 *rxq_max_fill_level = RTE_MIN(nb_rx_desc, 670 SFC_EF100_RXQ_LIMIT(*evq_entries)); 671 return 0; 672 } 673 674 675 static uint64_t 676 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size) 677 { 678 struct rte_mbuf m; 679 680 memset(&m, 0, sizeof(m)); 681 682 rte_mbuf_refcnt_set(&m, 1); 683 m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size; 684 m.nb_segs = 1; 685 m.port = port_id; 686 687 /* rearm_data covers structure members filled in above */ 688 rte_compiler_barrier(); 689 RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t)); 690 return m.rearm_data[0]; 691 } 692 693 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate; 694 static int 695 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id, 696 const struct rte_pci_addr *pci_addr, int socket_id, 697 const struct sfc_dp_rx_qcreate_info *info, 698 struct sfc_dp_rxq **dp_rxqp) 699 { 700 struct sfc_ef100_rxq *rxq; 701 int rc; 702 703 rc = EINVAL; 704 if (info->rxq_entries != info->evq_entries) 705 goto fail_rxq_args; 706 707 rc = ENOMEM; 708 rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq), 709 RTE_CACHE_LINE_SIZE, socket_id); 710 if (rxq == NULL) 711 goto fail_rxq_alloc; 712 713 sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr); 714 715 rc = ENOMEM; 716 rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring", 717 info->rxq_entries, 718 sizeof(*rxq->sw_ring), 719 RTE_CACHE_LINE_SIZE, socket_id); 720 if (rxq->sw_ring == NULL) 721 goto fail_desc_alloc; 722 723 rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING; 724 rxq->ptr_mask = info->rxq_entries - 1; 725 rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries); 726 rxq->evq_hw_ring = info->evq_hw_ring; 727 rxq->max_fill_level = info->max_fill_level; 728 rxq->refill_threshold = info->refill_threshold; 729 rxq->prefix_size = info->prefix_size; 730 rxq->buf_size = info->buf_size; 731 rxq->refill_mb_pool = info->refill_mb_pool; 732 rxq->rxq_hw_ring = info->rxq_hw_ring; 733 rxq->doorbell = (volatile uint8_t *)info->mem_bar + 734 ER_GZ_RX_RING_DOORBELL_OFST + 735 (info->hw_index << info->vi_window_shift); 736 737 rxq->evq_hw_index = info->evq_hw_index; 738 rxq->evq_prime = (volatile uint8_t *)info->mem_bar + 739 info->fcw_offset + 740 ER_GZ_EVQ_INT_PRIME_OFST; 741 742 sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell); 743 744 *dp_rxqp = &rxq->dp; 745 return 0; 746 747 fail_desc_alloc: 748 rte_free(rxq); 749 750 fail_rxq_alloc: 751 fail_rxq_args: 752 return rc; 753 } 754 755 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy; 756 static void 757 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq) 758 { 759 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 760 761 rte_free(rxq->sw_ring); 762 rte_free(rxq); 763 } 764 765 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart; 766 static int 767 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr, 768 const efx_rx_prefix_layout_t *pinfo) 769 { 770 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 771 uint32_t unsup_rx_prefix_fields; 772 773 SFC_ASSERT(rxq->completed == 0); 774 SFC_ASSERT(rxq->added == 0); 775 776 /* Prefix must fit into reserved Rx buffer space */ 777 if (pinfo->erpl_length > rxq->prefix_size) 778 return ENOTSUP; 779 780 unsup_rx_prefix_fields = 781 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout); 782 783 /* LENGTH and CLASS filds must always be present */ 784 if ((unsup_rx_prefix_fields & 785 ((1U << EFX_RX_PREFIX_FIELD_LENGTH) | 786 (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0) 787 return ENOTSUP; 788 789 if ((unsup_rx_prefix_fields & 790 ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) | 791 (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0) 792 rxq->flags |= SFC_EF100_RXQ_RSS_HASH; 793 else 794 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH; 795 796 if ((unsup_rx_prefix_fields & 797 ((1U << EFX_RX_PREFIX_FIELD_USER_FLAG) | 798 (1U << EFX_RX_PREFIX_FIELD_USER_MARK))) == 0) 799 rxq->flags |= SFC_EF100_RXQ_USER_MARK; 800 else 801 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK; 802 803 rxq->prefix_size = pinfo->erpl_length; 804 rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id, 805 rxq->prefix_size); 806 807 sfc_ef100_rx_qrefill(rxq); 808 809 rxq->evq_read_ptr = evq_read_ptr; 810 811 rxq->flags |= SFC_EF100_RXQ_STARTED; 812 rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION); 813 814 if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) 815 sfc_ef100_rx_qprime(rxq); 816 817 return 0; 818 } 819 820 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop; 821 static void 822 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr) 823 { 824 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 825 826 rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING; 827 828 *evq_read_ptr = rxq->evq_read_ptr; 829 } 830 831 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev; 832 static bool 833 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id) 834 { 835 __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 836 837 SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING); 838 839 /* 840 * It is safe to ignore Rx event since we free all mbufs on 841 * queue purge anyway. 842 */ 843 844 return false; 845 } 846 847 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge; 848 static void 849 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq) 850 { 851 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 852 unsigned int i; 853 struct sfc_ef100_rx_sw_desc *rxd; 854 855 for (i = rxq->completed; i != rxq->added; ++i) { 856 rxd = &rxq->sw_ring[i & rxq->ptr_mask]; 857 rte_mbuf_raw_free(rxd->mbuf); 858 rxd->mbuf = NULL; 859 } 860 861 rxq->completed = rxq->added = 0; 862 rxq->ready_pkts = 0; 863 864 rxq->flags &= ~SFC_EF100_RXQ_STARTED; 865 } 866 867 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable; 868 static int 869 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq) 870 { 871 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 872 873 rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN; 874 if (rxq->flags & SFC_EF100_RXQ_STARTED) 875 sfc_ef100_rx_qprime(rxq); 876 return 0; 877 } 878 879 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable; 880 static int 881 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq) 882 { 883 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 884 885 /* Cannot disarm, just disable rearm */ 886 rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN; 887 return 0; 888 } 889 890 struct sfc_dp_rx sfc_ef100_rx = { 891 .dp = { 892 .name = SFC_KVARG_DATAPATH_EF100, 893 .type = SFC_DP_RX, 894 .hw_fw_caps = SFC_DP_HW_FW_CAP_EF100, 895 }, 896 .features = SFC_DP_RX_FEAT_MULTI_PROCESS | 897 SFC_DP_RX_FEAT_INTR, 898 .dev_offload_capa = 0, 899 .queue_offload_capa = DEV_RX_OFFLOAD_CHECKSUM | 900 DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | 901 DEV_RX_OFFLOAD_OUTER_UDP_CKSUM | 902 DEV_RX_OFFLOAD_SCATTER | 903 DEV_RX_OFFLOAD_RSS_HASH, 904 .get_dev_info = sfc_ef100_rx_get_dev_info, 905 .qsize_up_rings = sfc_ef100_rx_qsize_up_rings, 906 .qcreate = sfc_ef100_rx_qcreate, 907 .qdestroy = sfc_ef100_rx_qdestroy, 908 .qstart = sfc_ef100_rx_qstart, 909 .qstop = sfc_ef100_rx_qstop, 910 .qrx_ev = sfc_ef100_rx_qrx_ev, 911 .qpurge = sfc_ef100_rx_qpurge, 912 .supported_ptypes_get = sfc_ef100_supported_ptypes_get, 913 .qdesc_npending = sfc_ef100_rx_qdesc_npending, 914 .qdesc_status = sfc_ef100_rx_qdesc_status, 915 .intr_enable = sfc_ef100_rx_intr_enable, 916 .intr_disable = sfc_ef100_rx_intr_disable, 917 .pkt_burst = sfc_ef100_recv_pkts, 918 }; 919