1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2019-2021 Xilinx, Inc. 4 * Copyright(c) 2018-2019 Solarflare Communications Inc. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 /* EF100 native datapath implementation */ 11 12 #include <stdbool.h> 13 14 #include <rte_byteorder.h> 15 #include <rte_mbuf_ptype.h> 16 #include <rte_mbuf.h> 17 #include <rte_io.h> 18 19 #include "efx_types.h" 20 #include "efx_regs_ef100.h" 21 #include "efx.h" 22 23 #include "sfc_debug.h" 24 #include "sfc_tweak.h" 25 #include "sfc_dp_rx.h" 26 #include "sfc_kvargs.h" 27 #include "sfc_ef100.h" 28 29 30 #define sfc_ef100_rx_err(_rxq, ...) \ 31 SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__) 32 33 #define sfc_ef100_rx_debug(_rxq, ...) \ 34 SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \ 35 __VA_ARGS__) 36 37 /** 38 * Maximum number of descriptors/buffers in the Rx ring. 39 * It should guarantee that corresponding event queue never overfill. 40 * EF10 native datapath uses event queue of the same size as Rx queue. 41 * Maximum number of events on datapath can be estimated as number of 42 * Rx queue entries (one event per Rx buffer in the worst case) plus 43 * Rx error and flush events. 44 */ 45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \ 46 ((_ndesc) - 1 /* head must not step on tail */ - \ 47 1 /* Rx error */ - 1 /* flush */) 48 49 /** Invalid user mark value when the mark should be treated as unset */ 50 #define SFC_EF100_USER_MARK_INVALID 0 51 52 struct sfc_ef100_rx_sw_desc { 53 struct rte_mbuf *mbuf; 54 }; 55 56 struct sfc_ef100_rxq { 57 /* Used on data path */ 58 unsigned int flags; 59 #define SFC_EF100_RXQ_STARTED 0x1 60 #define SFC_EF100_RXQ_NOT_RUNNING 0x2 61 #define SFC_EF100_RXQ_EXCEPTION 0x4 62 #define SFC_EF100_RXQ_RSS_HASH 0x10 63 #define SFC_EF100_RXQ_USER_MARK 0x20 64 #define SFC_EF100_RXQ_FLAG_INTR_EN 0x40 65 unsigned int ptr_mask; 66 unsigned int evq_phase_bit_shift; 67 unsigned int ready_pkts; 68 unsigned int completed; 69 unsigned int evq_read_ptr; 70 unsigned int evq_read_ptr_primed; 71 volatile efx_qword_t *evq_hw_ring; 72 struct sfc_ef100_rx_sw_desc *sw_ring; 73 uint64_t rearm_data; 74 uint16_t buf_size; 75 uint16_t prefix_size; 76 77 unsigned int evq_hw_index; 78 volatile void *evq_prime; 79 80 /* Used on refill */ 81 unsigned int added; 82 unsigned int max_fill_level; 83 unsigned int refill_threshold; 84 struct rte_mempool *refill_mb_pool; 85 efx_qword_t *rxq_hw_ring; 86 volatile void *doorbell; 87 88 /* Datapath receive queue anchor */ 89 struct sfc_dp_rxq dp; 90 }; 91 92 static inline struct sfc_ef100_rxq * 93 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq) 94 { 95 return container_of(dp_rxq, struct sfc_ef100_rxq, dp); 96 } 97 98 static void 99 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq) 100 { 101 sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index, 102 rxq->evq_read_ptr & rxq->ptr_mask); 103 rxq->evq_read_ptr_primed = rxq->evq_read_ptr; 104 } 105 106 static inline void 107 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added) 108 { 109 efx_dword_t dword; 110 111 EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask); 112 113 /* DMA sync to device is not required */ 114 115 /* 116 * rte_write32() has rte_io_wmb() which guarantees that the STORE 117 * operations (i.e. Rx and event descriptor updates) that precede 118 * the rte_io_wmb() call are visible to NIC before the STORE 119 * operations that follow it (i.e. doorbell write). 120 */ 121 rte_write32(dword.ed_u32[0], rxq->doorbell); 122 rxq->dp.dpq.rx_dbells++; 123 124 sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)", 125 EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX), 126 added); 127 } 128 129 static void 130 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq) 131 { 132 const unsigned int ptr_mask = rxq->ptr_mask; 133 unsigned int free_space; 134 unsigned int bulks; 135 void *objs[SFC_RX_REFILL_BULK]; 136 unsigned int added = rxq->added; 137 138 free_space = rxq->max_fill_level - (added - rxq->completed); 139 140 if (free_space < rxq->refill_threshold) 141 return; 142 143 bulks = free_space / RTE_DIM(objs); 144 /* refill_threshold guarantees that bulks is positive */ 145 SFC_ASSERT(bulks > 0); 146 147 do { 148 unsigned int id; 149 unsigned int i; 150 151 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs, 152 RTE_DIM(objs)) < 0)) { 153 struct rte_eth_dev_data *dev_data = 154 rte_eth_devices[rxq->dp.dpq.port_id].data; 155 156 /* 157 * It is hardly a safe way to increment counter 158 * from different contexts, but all PMDs do it. 159 */ 160 dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs); 161 /* Return if we have posted nothing yet */ 162 if (added == rxq->added) 163 return; 164 /* Push posted */ 165 break; 166 } 167 168 for (i = 0, id = added & ptr_mask; 169 i < RTE_DIM(objs); 170 ++i, ++id) { 171 struct rte_mbuf *m = objs[i]; 172 struct sfc_ef100_rx_sw_desc *rxd; 173 rte_iova_t phys_addr; 174 175 __rte_mbuf_raw_sanity_check(m); 176 177 SFC_ASSERT((id & ~ptr_mask) == 0); 178 rxd = &rxq->sw_ring[id]; 179 rxd->mbuf = m; 180 181 /* 182 * Avoid writing to mbuf. It is cheaper to do it 183 * when we receive packet and fill in nearby 184 * structure members. 185 */ 186 187 phys_addr = rte_mbuf_data_iova_default(m); 188 EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id], 189 ESF_GZ_RX_BUF_ADDR, phys_addr); 190 } 191 192 added += RTE_DIM(objs); 193 } while (--bulks > 0); 194 195 SFC_ASSERT(rxq->added != added); 196 rxq->added = added; 197 sfc_ef100_rx_qpush(rxq, added); 198 } 199 200 static inline uint64_t 201 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class) 202 { 203 return EFX_WORD_FIELD(class, 204 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) == 205 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ? 206 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD; 207 } 208 209 static inline uint64_t 210 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class) 211 { 212 return EFX_WORD_FIELD(class, 213 ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) == 214 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ? 215 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD; 216 } 217 218 static uint32_t 219 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags) 220 { 221 uint32_t ptype; 222 bool no_tunnel = false; 223 224 if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) != 225 ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN)) 226 return 0; 227 228 switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) { 229 case 0: 230 ptype = RTE_PTYPE_L2_ETHER; 231 break; 232 case 1: 233 ptype = RTE_PTYPE_L2_ETHER_VLAN; 234 break; 235 default: 236 ptype = RTE_PTYPE_L2_ETHER_QINQ; 237 break; 238 } 239 240 switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) { 241 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE: 242 no_tunnel = true; 243 break; 244 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN: 245 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 246 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class); 247 break; 248 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE: 249 ptype |= RTE_PTYPE_TUNNEL_NVGRE; 250 break; 251 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE: 252 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP; 253 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class); 254 break; 255 default: 256 /* 257 * Driver does not know the tunnel, but it is 258 * still a tunnel and NT_OR_INNER refer to inner 259 * frame. 260 */ 261 no_tunnel = false; 262 } 263 264 if (no_tunnel) { 265 bool l4_valid = true; 266 267 switch (EFX_WORD_FIELD(class, 268 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) { 269 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 270 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 271 *ol_flags |= PKT_RX_IP_CKSUM_GOOD; 272 break; 273 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 274 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 275 *ol_flags |= PKT_RX_IP_CKSUM_BAD; 276 break; 277 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 278 ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 279 break; 280 default: 281 l4_valid = false; 282 } 283 284 if (l4_valid) { 285 switch (EFX_WORD_FIELD(class, 286 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) { 287 case ESE_GZ_RH_HCLASS_L4_CLASS_TCP: 288 ptype |= RTE_PTYPE_L4_TCP; 289 *ol_flags |= 290 sfc_ef100_rx_nt_or_inner_l4_csum(class); 291 break; 292 case ESE_GZ_RH_HCLASS_L4_CLASS_UDP: 293 ptype |= RTE_PTYPE_L4_UDP; 294 *ol_flags |= 295 sfc_ef100_rx_nt_or_inner_l4_csum(class); 296 break; 297 case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG: 298 ptype |= RTE_PTYPE_L4_FRAG; 299 break; 300 } 301 } 302 } else { 303 bool l4_valid = true; 304 305 switch (EFX_WORD_FIELD(class, 306 ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) { 307 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 308 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 309 break; 310 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 311 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 312 *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD; 313 break; 314 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 315 ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 316 break; 317 } 318 319 switch (EFX_WORD_FIELD(class, 320 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) { 321 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 322 ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN; 323 *ol_flags |= PKT_RX_IP_CKSUM_GOOD; 324 break; 325 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 326 ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN; 327 *ol_flags |= PKT_RX_IP_CKSUM_BAD; 328 break; 329 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 330 ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN; 331 break; 332 default: 333 l4_valid = false; 334 break; 335 } 336 337 if (l4_valid) { 338 switch (EFX_WORD_FIELD(class, 339 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) { 340 case ESE_GZ_RH_HCLASS_L4_CLASS_TCP: 341 ptype |= RTE_PTYPE_INNER_L4_TCP; 342 *ol_flags |= 343 sfc_ef100_rx_nt_or_inner_l4_csum(class); 344 break; 345 case ESE_GZ_RH_HCLASS_L4_CLASS_UDP: 346 ptype |= RTE_PTYPE_INNER_L4_UDP; 347 *ol_flags |= 348 sfc_ef100_rx_nt_or_inner_l4_csum(class); 349 break; 350 case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG: 351 ptype |= RTE_PTYPE_INNER_L4_FRAG; 352 break; 353 } 354 } 355 } 356 357 return ptype; 358 } 359 360 /* 361 * Below function relies on the following fields in Rx prefix. 362 * Some fields are mandatory, some fields are optional. 363 * See sfc_ef100_rx_qstart() below. 364 */ 365 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = { 366 .erpl_fields = { 367 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \ 368 EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian) 369 370 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE), 371 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE), 372 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE), 373 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE), 374 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE), 375 376 #undef SFC_EF100_RX_PREFIX_FIELD 377 } 378 }; 379 380 static bool 381 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq, 382 const efx_oword_t *rx_prefix, 383 struct rte_mbuf *m) 384 { 385 const efx_word_t *class; 386 uint64_t ol_flags = 0; 387 388 RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0); 389 RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0); 390 RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT != 391 sizeof(*class)); 392 class = (const efx_word_t *)((const uint8_t *)rx_prefix + 393 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT); 394 if (unlikely(EFX_WORD_FIELD(*class, 395 ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) != 396 ESE_GZ_RH_HCLASS_L2_STATUS_OK)) 397 return false; 398 399 m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags); 400 401 if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) && 402 EFX_TEST_OWORD_BIT(rx_prefix[0], 403 ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) { 404 ol_flags |= PKT_RX_RSS_HASH; 405 /* EFX_OWORD_FIELD converts little-endian to CPU */ 406 m->hash.rss = EFX_OWORD_FIELD(rx_prefix[0], 407 ESF_GZ_RX_PREFIX_RSS_HASH); 408 } 409 410 if (rxq->flags & SFC_EF100_RXQ_USER_MARK) { 411 uint32_t user_mark; 412 413 /* EFX_OWORD_FIELD converts little-endian to CPU */ 414 user_mark = EFX_OWORD_FIELD(rx_prefix[0], 415 ESF_GZ_RX_PREFIX_USER_MARK); 416 if (user_mark != SFC_EF100_USER_MARK_INVALID) { 417 ol_flags |= PKT_RX_FDIR_ID; 418 m->hash.fdir.hi = user_mark; 419 } 420 } 421 422 m->ol_flags = ol_flags; 423 return true; 424 } 425 426 static const uint8_t * 427 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m) 428 { 429 return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM; 430 } 431 432 static struct rte_mbuf * 433 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq) 434 { 435 struct rte_mbuf *m; 436 unsigned int id; 437 438 /* mbuf associated with current Rx descriptor */ 439 m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf; 440 441 /* completed is already moved to the next one */ 442 if (unlikely(rxq->completed == rxq->added)) 443 goto done; 444 445 /* 446 * Prefetch Rx prefix of the next packet. 447 * Current packet is scattered and the next mbuf is its fragment 448 * it simply prefetches some data - no harm since packet rate 449 * should not be high if scatter is used. 450 */ 451 id = rxq->completed & rxq->ptr_mask; 452 rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf)); 453 454 if (unlikely(rxq->completed + 1 == rxq->added)) 455 goto done; 456 457 /* 458 * Prefetch mbuf control structure of the next after next Rx 459 * descriptor. 460 */ 461 id = (id == rxq->ptr_mask) ? 0 : (id + 1); 462 rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf); 463 464 /* 465 * If the next time we'll need SW Rx descriptor from the next 466 * cache line, try to make sure that we have it in cache. 467 */ 468 if ((id & 0x7) == 0x7) 469 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]); 470 471 done: 472 return m; 473 } 474 475 static struct rte_mbuf ** 476 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq, 477 struct rte_mbuf **rx_pkts, 478 struct rte_mbuf ** const rx_pkts_end) 479 { 480 while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) { 481 struct rte_mbuf *pkt; 482 struct rte_mbuf *lastseg; 483 const efx_oword_t *rx_prefix; 484 uint16_t pkt_len; 485 uint16_t seg_len; 486 bool deliver; 487 488 rxq->ready_pkts--; 489 490 pkt = sfc_ef100_rx_next_mbuf(rxq); 491 __rte_mbuf_raw_sanity_check(pkt); 492 493 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) != 494 sizeof(rxq->rearm_data)); 495 pkt->rearm_data[0] = rxq->rearm_data; 496 497 /* data_off already moved past Rx prefix */ 498 rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt); 499 500 pkt_len = EFX_OWORD_FIELD(rx_prefix[0], 501 ESF_GZ_RX_PREFIX_LENGTH); 502 SFC_ASSERT(pkt_len > 0); 503 rte_pktmbuf_pkt_len(pkt) = pkt_len; 504 505 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size); 506 rte_pktmbuf_data_len(pkt) = seg_len; 507 508 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt); 509 510 lastseg = pkt; 511 while ((pkt_len -= seg_len) > 0) { 512 struct rte_mbuf *seg; 513 514 seg = sfc_ef100_rx_next_mbuf(rxq); 515 __rte_mbuf_raw_sanity_check(seg); 516 517 seg->data_off = RTE_PKTMBUF_HEADROOM; 518 519 seg_len = RTE_MIN(pkt_len, rxq->buf_size); 520 rte_pktmbuf_data_len(seg) = seg_len; 521 rte_pktmbuf_pkt_len(seg) = seg_len; 522 523 pkt->nb_segs++; 524 lastseg->next = seg; 525 lastseg = seg; 526 } 527 528 if (likely(deliver)) 529 *rx_pkts++ = pkt; 530 else 531 rte_pktmbuf_free(pkt); 532 } 533 534 return rx_pkts; 535 } 536 537 static bool 538 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev) 539 { 540 *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask]; 541 542 if (!sfc_ef100_ev_present(ev, 543 (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1)) 544 return false; 545 546 if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) { 547 /* 548 * Do not move read_ptr to keep the event for exception 549 * handling by the control path. 550 */ 551 rxq->flags |= SFC_EF100_RXQ_EXCEPTION; 552 sfc_ef100_rx_err(rxq, 553 "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x", 554 rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask, 555 EFX_QWORD_FIELD(*ev, EFX_DWORD_1), 556 EFX_QWORD_FIELD(*ev, EFX_DWORD_0)); 557 return false; 558 } 559 560 sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)", 561 EFX_QWORD_FIELD(*ev, EFX_DWORD_1), 562 EFX_QWORD_FIELD(*ev, EFX_DWORD_0), 563 rxq->evq_read_ptr, 564 rxq->evq_read_ptr & rxq->ptr_mask); 565 566 rxq->evq_read_ptr++; 567 return true; 568 } 569 570 static uint16_t 571 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 572 { 573 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue); 574 struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts]; 575 efx_qword_t rx_ev; 576 577 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end); 578 579 if (unlikely(rxq->flags & 580 (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION))) 581 goto done; 582 583 while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) { 584 rxq->ready_pkts = 585 EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT); 586 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, 587 rx_pkts_end); 588 } 589 590 /* It is not a problem if we refill in the case of exception */ 591 sfc_ef100_rx_qrefill(rxq); 592 593 if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) && 594 rxq->evq_read_ptr_primed != rxq->evq_read_ptr) 595 sfc_ef100_rx_qprime(rxq); 596 597 done: 598 return nb_pkts - (rx_pkts_end - rx_pkts); 599 } 600 601 static const uint32_t * 602 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps) 603 { 604 static const uint32_t ef100_native_ptypes[] = { 605 RTE_PTYPE_L2_ETHER, 606 RTE_PTYPE_L2_ETHER_VLAN, 607 RTE_PTYPE_L2_ETHER_QINQ, 608 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 609 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 610 RTE_PTYPE_L4_TCP, 611 RTE_PTYPE_L4_UDP, 612 RTE_PTYPE_L4_FRAG, 613 RTE_PTYPE_TUNNEL_VXLAN, 614 RTE_PTYPE_TUNNEL_NVGRE, 615 RTE_PTYPE_TUNNEL_GENEVE, 616 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 617 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 618 RTE_PTYPE_INNER_L4_TCP, 619 RTE_PTYPE_INNER_L4_UDP, 620 RTE_PTYPE_INNER_L4_FRAG, 621 RTE_PTYPE_UNKNOWN 622 }; 623 624 return ef100_native_ptypes; 625 } 626 627 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending; 628 static unsigned int 629 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq) 630 { 631 return 0; 632 } 633 634 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status; 635 static int 636 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq, 637 __rte_unused uint16_t offset) 638 { 639 return -ENOTSUP; 640 } 641 642 643 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info; 644 static void 645 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info) 646 { 647 /* 648 * Number of descriptors just defines maximum number of pushed 649 * descriptors (fill level). 650 */ 651 dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK; 652 dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK; 653 } 654 655 656 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings; 657 static int 658 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc, 659 struct sfc_dp_rx_hw_limits *limits, 660 __rte_unused struct rte_mempool *mb_pool, 661 unsigned int *rxq_entries, 662 unsigned int *evq_entries, 663 unsigned int *rxq_max_fill_level) 664 { 665 /* 666 * rte_ethdev API guarantees that the number meets min, max and 667 * alignment requirements. 668 */ 669 if (nb_rx_desc <= limits->rxq_min_entries) 670 *rxq_entries = limits->rxq_min_entries; 671 else 672 *rxq_entries = rte_align32pow2(nb_rx_desc); 673 674 *evq_entries = *rxq_entries; 675 676 *rxq_max_fill_level = RTE_MIN(nb_rx_desc, 677 SFC_EF100_RXQ_LIMIT(*evq_entries)); 678 return 0; 679 } 680 681 682 static uint64_t 683 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size) 684 { 685 struct rte_mbuf m; 686 687 memset(&m, 0, sizeof(m)); 688 689 rte_mbuf_refcnt_set(&m, 1); 690 m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size; 691 m.nb_segs = 1; 692 m.port = port_id; 693 694 /* rearm_data covers structure members filled in above */ 695 rte_compiler_barrier(); 696 RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t)); 697 return m.rearm_data[0]; 698 } 699 700 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate; 701 static int 702 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id, 703 const struct rte_pci_addr *pci_addr, int socket_id, 704 const struct sfc_dp_rx_qcreate_info *info, 705 struct sfc_dp_rxq **dp_rxqp) 706 { 707 struct sfc_ef100_rxq *rxq; 708 int rc; 709 710 rc = EINVAL; 711 if (info->rxq_entries != info->evq_entries) 712 goto fail_rxq_args; 713 714 rc = ENOMEM; 715 rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq), 716 RTE_CACHE_LINE_SIZE, socket_id); 717 if (rxq == NULL) 718 goto fail_rxq_alloc; 719 720 sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr); 721 722 rc = ENOMEM; 723 rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring", 724 info->rxq_entries, 725 sizeof(*rxq->sw_ring), 726 RTE_CACHE_LINE_SIZE, socket_id); 727 if (rxq->sw_ring == NULL) 728 goto fail_desc_alloc; 729 730 rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING; 731 rxq->ptr_mask = info->rxq_entries - 1; 732 rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries); 733 rxq->evq_hw_ring = info->evq_hw_ring; 734 rxq->max_fill_level = info->max_fill_level; 735 rxq->refill_threshold = info->refill_threshold; 736 rxq->prefix_size = info->prefix_size; 737 rxq->buf_size = info->buf_size; 738 rxq->refill_mb_pool = info->refill_mb_pool; 739 rxq->rxq_hw_ring = info->rxq_hw_ring; 740 rxq->doorbell = (volatile uint8_t *)info->mem_bar + 741 ER_GZ_RX_RING_DOORBELL_OFST + 742 (info->hw_index << info->vi_window_shift); 743 744 rxq->evq_hw_index = info->evq_hw_index; 745 rxq->evq_prime = (volatile uint8_t *)info->mem_bar + 746 info->fcw_offset + 747 ER_GZ_EVQ_INT_PRIME_OFST; 748 749 sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell); 750 751 *dp_rxqp = &rxq->dp; 752 return 0; 753 754 fail_desc_alloc: 755 rte_free(rxq); 756 757 fail_rxq_alloc: 758 fail_rxq_args: 759 return rc; 760 } 761 762 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy; 763 static void 764 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq) 765 { 766 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 767 768 rte_free(rxq->sw_ring); 769 rte_free(rxq); 770 } 771 772 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart; 773 static int 774 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr, 775 const efx_rx_prefix_layout_t *pinfo) 776 { 777 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 778 uint32_t unsup_rx_prefix_fields; 779 780 SFC_ASSERT(rxq->completed == 0); 781 SFC_ASSERT(rxq->added == 0); 782 783 /* Prefix must fit into reserved Rx buffer space */ 784 if (pinfo->erpl_length > rxq->prefix_size) 785 return ENOTSUP; 786 787 unsup_rx_prefix_fields = 788 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout); 789 790 /* LENGTH and CLASS filds must always be present */ 791 if ((unsup_rx_prefix_fields & 792 ((1U << EFX_RX_PREFIX_FIELD_LENGTH) | 793 (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0) 794 return ENOTSUP; 795 796 if ((unsup_rx_prefix_fields & 797 ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) | 798 (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0) 799 rxq->flags |= SFC_EF100_RXQ_RSS_HASH; 800 else 801 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH; 802 803 if ((unsup_rx_prefix_fields & 804 (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0) 805 rxq->flags |= SFC_EF100_RXQ_USER_MARK; 806 else 807 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK; 808 809 rxq->prefix_size = pinfo->erpl_length; 810 rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id, 811 rxq->prefix_size); 812 813 sfc_ef100_rx_qrefill(rxq); 814 815 rxq->evq_read_ptr = evq_read_ptr; 816 817 rxq->flags |= SFC_EF100_RXQ_STARTED; 818 rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION); 819 820 if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) 821 sfc_ef100_rx_qprime(rxq); 822 823 return 0; 824 } 825 826 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop; 827 static void 828 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr) 829 { 830 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 831 832 rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING; 833 834 *evq_read_ptr = rxq->evq_read_ptr; 835 } 836 837 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev; 838 static bool 839 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id) 840 { 841 __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 842 843 SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING); 844 845 /* 846 * It is safe to ignore Rx event since we free all mbufs on 847 * queue purge anyway. 848 */ 849 850 return false; 851 } 852 853 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge; 854 static void 855 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq) 856 { 857 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 858 unsigned int i; 859 struct sfc_ef100_rx_sw_desc *rxd; 860 861 for (i = rxq->completed; i != rxq->added; ++i) { 862 rxd = &rxq->sw_ring[i & rxq->ptr_mask]; 863 rte_mbuf_raw_free(rxd->mbuf); 864 rxd->mbuf = NULL; 865 } 866 867 rxq->completed = rxq->added = 0; 868 rxq->ready_pkts = 0; 869 870 rxq->flags &= ~SFC_EF100_RXQ_STARTED; 871 } 872 873 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable; 874 static int 875 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq) 876 { 877 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 878 879 rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN; 880 if (rxq->flags & SFC_EF100_RXQ_STARTED) 881 sfc_ef100_rx_qprime(rxq); 882 return 0; 883 } 884 885 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable; 886 static int 887 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq) 888 { 889 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 890 891 /* Cannot disarm, just disable rearm */ 892 rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN; 893 return 0; 894 } 895 896 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed; 897 static unsigned int 898 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq) 899 { 900 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 901 902 /* 903 * The datapath keeps track only of added descriptors, since 904 * the number of pushed descriptors always equals the number 905 * of added descriptors due to enforced alignment. 906 */ 907 return rxq->added; 908 } 909 910 struct sfc_dp_rx sfc_ef100_rx = { 911 .dp = { 912 .name = SFC_KVARG_DATAPATH_EF100, 913 .type = SFC_DP_RX, 914 .hw_fw_caps = SFC_DP_HW_FW_CAP_EF100, 915 }, 916 .features = SFC_DP_RX_FEAT_MULTI_PROCESS | 917 SFC_DP_RX_FEAT_INTR, 918 .dev_offload_capa = 0, 919 .queue_offload_capa = DEV_RX_OFFLOAD_CHECKSUM | 920 DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | 921 DEV_RX_OFFLOAD_OUTER_UDP_CKSUM | 922 DEV_RX_OFFLOAD_SCATTER | 923 DEV_RX_OFFLOAD_RSS_HASH, 924 .get_dev_info = sfc_ef100_rx_get_dev_info, 925 .qsize_up_rings = sfc_ef100_rx_qsize_up_rings, 926 .qcreate = sfc_ef100_rx_qcreate, 927 .qdestroy = sfc_ef100_rx_qdestroy, 928 .qstart = sfc_ef100_rx_qstart, 929 .qstop = sfc_ef100_rx_qstop, 930 .qrx_ev = sfc_ef100_rx_qrx_ev, 931 .qpurge = sfc_ef100_rx_qpurge, 932 .supported_ptypes_get = sfc_ef100_supported_ptypes_get, 933 .qdesc_npending = sfc_ef100_rx_qdesc_npending, 934 .qdesc_status = sfc_ef100_rx_qdesc_status, 935 .intr_enable = sfc_ef100_rx_intr_enable, 936 .intr_disable = sfc_ef100_rx_intr_disable, 937 .get_pushed = sfc_ef100_rx_get_pushed, 938 .pkt_burst = sfc_ef100_recv_pkts, 939 }; 940