1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2023 Intel Corporation 3 */ 4 5 #include <rte_mbuf_dyn.h> 6 #include <rte_errno.h> 7 8 #include "idpf_common_rxtx.h" 9 10 int idpf_timestamp_dynfield_offset = -1; 11 uint64_t idpf_timestamp_dynflag; 12 13 int 14 idpf_qc_rx_thresh_check(uint16_t nb_desc, uint16_t thresh) 15 { 16 /* The following constraints must be satisfied: 17 * thresh < rxq->nb_rx_desc 18 */ 19 if (thresh >= nb_desc) { 20 DRV_LOG(ERR, "rx_free_thresh (%u) must be less than %u", 21 thresh, nb_desc); 22 return -EINVAL; 23 } 24 25 return 0; 26 } 27 28 int 29 idpf_qc_tx_thresh_check(uint16_t nb_desc, uint16_t tx_rs_thresh, 30 uint16_t tx_free_thresh) 31 { 32 /* TX descriptors will have their RS bit set after tx_rs_thresh 33 * descriptors have been used. The TX descriptor ring will be cleaned 34 * after tx_free_thresh descriptors are used or if the number of 35 * descriptors required to transmit a packet is greater than the 36 * number of free TX descriptors. 37 * 38 * The following constraints must be satisfied: 39 * - tx_rs_thresh must be less than the size of the ring minus 2. 40 * - tx_free_thresh must be less than the size of the ring minus 3. 41 * - tx_rs_thresh must be less than or equal to tx_free_thresh. 42 * - tx_rs_thresh must be a divisor of the ring size. 43 * 44 * One descriptor in the TX ring is used as a sentinel to avoid a H/W 45 * race condition, hence the maximum threshold constraints. When set 46 * to zero use default values. 47 */ 48 if (tx_rs_thresh >= (nb_desc - 2)) { 49 DRV_LOG(ERR, "tx_rs_thresh (%u) must be less than the " 50 "number of TX descriptors (%u) minus 2", 51 tx_rs_thresh, nb_desc); 52 return -EINVAL; 53 } 54 if (tx_free_thresh >= (nb_desc - 3)) { 55 DRV_LOG(ERR, "tx_free_thresh (%u) must be less than the " 56 "number of TX descriptors (%u) minus 3.", 57 tx_free_thresh, nb_desc); 58 return -EINVAL; 59 } 60 if (tx_rs_thresh > tx_free_thresh) { 61 DRV_LOG(ERR, "tx_rs_thresh (%u) must be less than or " 62 "equal to tx_free_thresh (%u).", 63 tx_rs_thresh, tx_free_thresh); 64 return -EINVAL; 65 } 66 if ((nb_desc % tx_rs_thresh) != 0) { 67 DRV_LOG(ERR, "tx_rs_thresh (%u) must be a divisor of the " 68 "number of TX descriptors (%u).", 69 tx_rs_thresh, nb_desc); 70 return -EINVAL; 71 } 72 73 return 0; 74 } 75 76 void 77 idpf_qc_rxq_mbufs_release(struct idpf_rx_queue *rxq) 78 { 79 uint16_t i; 80 81 if (rxq->sw_ring == NULL) 82 return; 83 84 for (i = 0; i < rxq->nb_rx_desc; i++) { 85 if (rxq->sw_ring[i] != NULL) { 86 rte_pktmbuf_free_seg(rxq->sw_ring[i]); 87 rxq->sw_ring[i] = NULL; 88 } 89 } 90 } 91 92 void 93 idpf_qc_txq_mbufs_release(struct idpf_tx_queue *txq) 94 { 95 uint16_t nb_desc, i; 96 97 if (txq == NULL || txq->sw_ring == NULL) { 98 DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL"); 99 return; 100 } 101 102 if (txq->sw_nb_desc != 0) { 103 /* For split queue model, descriptor ring */ 104 nb_desc = txq->sw_nb_desc; 105 } else { 106 /* For single queue model */ 107 nb_desc = txq->nb_tx_desc; 108 } 109 for (i = 0; i < nb_desc; i++) { 110 if (txq->sw_ring[i].mbuf != NULL) { 111 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf); 112 txq->sw_ring[i].mbuf = NULL; 113 } 114 } 115 } 116 117 void 118 idpf_qc_split_rx_descq_reset(struct idpf_rx_queue *rxq) 119 { 120 uint16_t len; 121 uint32_t i; 122 123 if (rxq == NULL) 124 return; 125 126 len = rxq->nb_rx_desc + IDPF_RX_MAX_BURST; 127 128 for (i = 0; i < len * sizeof(struct virtchnl2_rx_flex_desc_adv_nic_3); 129 i++) 130 ((volatile char *)rxq->rx_ring)[i] = 0; 131 132 rxq->rx_tail = 0; 133 rxq->expected_gen_id = 1; 134 } 135 136 void 137 idpf_qc_split_rx_bufq_reset(struct idpf_rx_queue *rxq) 138 { 139 uint16_t len; 140 uint32_t i; 141 142 if (rxq == NULL) 143 return; 144 145 len = rxq->nb_rx_desc + IDPF_RX_MAX_BURST; 146 147 for (i = 0; i < len * sizeof(struct virtchnl2_splitq_rx_buf_desc); 148 i++) 149 ((volatile char *)rxq->rx_ring)[i] = 0; 150 151 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf)); 152 153 for (i = 0; i < IDPF_RX_MAX_BURST; i++) 154 rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf; 155 156 /* The next descriptor id which can be received. */ 157 rxq->rx_next_avail = 0; 158 159 /* The next descriptor id which can be refilled. */ 160 rxq->rx_tail = 0; 161 /* The number of descriptors which can be refilled. */ 162 rxq->nb_rx_hold = rxq->nb_rx_desc - 1; 163 164 rxq->rxrearm_nb = 0; 165 rxq->rxrearm_start = 0; 166 167 rxq->bufq1 = NULL; 168 rxq->bufq2 = NULL; 169 } 170 171 void 172 idpf_qc_split_rx_queue_reset(struct idpf_rx_queue *rxq) 173 { 174 idpf_qc_split_rx_descq_reset(rxq); 175 idpf_qc_split_rx_bufq_reset(rxq->bufq1); 176 idpf_qc_split_rx_bufq_reset(rxq->bufq2); 177 } 178 179 void 180 idpf_qc_single_rx_queue_reset(struct idpf_rx_queue *rxq) 181 { 182 uint16_t len; 183 uint32_t i; 184 185 if (rxq == NULL) 186 return; 187 188 len = rxq->nb_rx_desc + IDPF_RX_MAX_BURST; 189 190 for (i = 0; i < len * sizeof(struct virtchnl2_singleq_rx_buf_desc); 191 i++) 192 ((volatile char *)rxq->rx_ring)[i] = 0; 193 194 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf)); 195 196 for (i = 0; i < IDPF_RX_MAX_BURST; i++) 197 rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf; 198 199 rxq->rx_tail = 0; 200 rxq->nb_rx_hold = 0; 201 202 rte_pktmbuf_free(rxq->pkt_first_seg); 203 204 rxq->pkt_first_seg = NULL; 205 rxq->pkt_last_seg = NULL; 206 rxq->rxrearm_start = 0; 207 rxq->rxrearm_nb = 0; 208 } 209 210 void 211 idpf_qc_split_tx_descq_reset(struct idpf_tx_queue *txq) 212 { 213 struct idpf_tx_entry *txe; 214 uint32_t i, size; 215 uint16_t prev; 216 217 if (txq == NULL) { 218 DRV_LOG(DEBUG, "Pointer to txq is NULL"); 219 return; 220 } 221 222 size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc; 223 for (i = 0; i < size; i++) 224 ((volatile char *)txq->desc_ring)[i] = 0; 225 226 txe = txq->sw_ring; 227 prev = (uint16_t)(txq->sw_nb_desc - 1); 228 for (i = 0; i < txq->sw_nb_desc; i++) { 229 txe[i].mbuf = NULL; 230 txe[i].last_id = i; 231 txe[prev].next_id = i; 232 prev = i; 233 } 234 235 txq->tx_tail = 0; 236 txq->nb_used = 0; 237 238 /* Use this as next to clean for split desc queue */ 239 txq->last_desc_cleaned = 0; 240 txq->sw_tail = 0; 241 txq->nb_free = txq->nb_tx_desc - 1; 242 243 memset(txq->ctype, 0, sizeof(txq->ctype)); 244 txq->next_dd = txq->rs_thresh - 1; 245 txq->next_rs = txq->rs_thresh - 1; 246 } 247 248 void 249 idpf_qc_split_tx_complq_reset(struct idpf_tx_queue *cq) 250 { 251 uint32_t i, size; 252 253 if (cq == NULL) { 254 DRV_LOG(DEBUG, "Pointer to complq is NULL"); 255 return; 256 } 257 258 size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc; 259 for (i = 0; i < size; i++) 260 ((volatile char *)cq->compl_ring)[i] = 0; 261 262 cq->tx_tail = 0; 263 cq->expected_gen_id = 1; 264 } 265 266 void 267 idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq) 268 { 269 struct idpf_tx_entry *txe; 270 uint32_t i, size; 271 uint16_t prev; 272 273 if (txq == NULL) { 274 DRV_LOG(DEBUG, "Pointer to txq is NULL"); 275 return; 276 } 277 278 txe = txq->sw_ring; 279 size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc; 280 for (i = 0; i < size; i++) 281 ((volatile char *)txq->tx_ring)[i] = 0; 282 283 prev = (uint16_t)(txq->nb_tx_desc - 1); 284 for (i = 0; i < txq->nb_tx_desc; i++) { 285 txq->tx_ring[i].qw1.cmd_dtype = 286 rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE); 287 txe[i].mbuf = NULL; 288 txe[i].last_id = i; 289 txe[prev].next_id = i; 290 prev = i; 291 } 292 293 txq->tx_tail = 0; 294 txq->nb_used = 0; 295 296 txq->last_desc_cleaned = txq->nb_tx_desc - 1; 297 txq->nb_free = txq->nb_tx_desc - 1; 298 299 txq->next_dd = txq->rs_thresh - 1; 300 txq->next_rs = txq->rs_thresh - 1; 301 } 302 303 void 304 idpf_qc_rx_queue_release(void *rxq) 305 { 306 struct idpf_rx_queue *q = rxq; 307 308 if (q == NULL) 309 return; 310 311 /* Split queue */ 312 if (q->bufq1 != NULL && q->bufq2 != NULL) { 313 q->bufq1->ops->release_mbufs(q->bufq1); 314 rte_free(q->bufq1->sw_ring); 315 rte_memzone_free(q->bufq1->mz); 316 rte_free(q->bufq1); 317 q->bufq2->ops->release_mbufs(q->bufq2); 318 rte_free(q->bufq2->sw_ring); 319 rte_memzone_free(q->bufq2->mz); 320 rte_free(q->bufq2); 321 rte_memzone_free(q->mz); 322 rte_free(q); 323 return; 324 } 325 326 /* Single queue */ 327 q->ops->release_mbufs(q); 328 rte_free(q->sw_ring); 329 rte_memzone_free(q->mz); 330 rte_free(q); 331 } 332 333 void 334 idpf_qc_tx_queue_release(void *txq) 335 { 336 struct idpf_tx_queue *q = txq; 337 338 if (q == NULL) 339 return; 340 341 if (q->complq) { 342 rte_memzone_free(q->complq->mz); 343 rte_free(q->complq); 344 } 345 346 q->ops->release_mbufs(q); 347 rte_free(q->sw_ring); 348 rte_memzone_free(q->mz); 349 rte_free(q); 350 } 351 352 int 353 idpf_qc_ts_mbuf_register(struct idpf_rx_queue *rxq) 354 { 355 int err; 356 if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { 357 /* Register mbuf field and flag for Rx timestamp */ 358 err = rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset, 359 &idpf_timestamp_dynflag); 360 if (err != 0) { 361 DRV_LOG(ERR, 362 "Cannot register mbuf field/flag for timestamp"); 363 return -EINVAL; 364 } 365 } 366 return 0; 367 } 368 369 int 370 idpf_qc_single_rxq_mbufs_alloc(struct idpf_rx_queue *rxq) 371 { 372 volatile struct virtchnl2_singleq_rx_buf_desc *rxd; 373 struct rte_mbuf *mbuf = NULL; 374 uint64_t dma_addr; 375 uint16_t i; 376 377 for (i = 0; i < rxq->nb_rx_desc; i++) { 378 mbuf = rte_mbuf_raw_alloc(rxq->mp); 379 if (unlikely(mbuf == NULL)) { 380 DRV_LOG(ERR, "Failed to allocate mbuf for RX"); 381 return -ENOMEM; 382 } 383 384 rte_mbuf_refcnt_set(mbuf, 1); 385 mbuf->next = NULL; 386 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 387 mbuf->nb_segs = 1; 388 mbuf->port = rxq->port_id; 389 390 dma_addr = 391 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 392 393 rxd = &((volatile struct virtchnl2_singleq_rx_buf_desc *)(rxq->rx_ring))[i]; 394 rxd->pkt_addr = dma_addr; 395 rxd->hdr_addr = 0; 396 rxd->rsvd1 = 0; 397 rxd->rsvd2 = 0; 398 rxq->sw_ring[i] = mbuf; 399 } 400 401 return 0; 402 } 403 404 int 405 idpf_qc_split_rxq_mbufs_alloc(struct idpf_rx_queue *rxq) 406 { 407 volatile struct virtchnl2_splitq_rx_buf_desc *rxd; 408 struct rte_mbuf *mbuf = NULL; 409 uint64_t dma_addr; 410 uint16_t i; 411 412 for (i = 0; i < rxq->nb_rx_desc; i++) { 413 mbuf = rte_mbuf_raw_alloc(rxq->mp); 414 if (unlikely(mbuf == NULL)) { 415 DRV_LOG(ERR, "Failed to allocate mbuf for RX"); 416 return -ENOMEM; 417 } 418 419 rte_mbuf_refcnt_set(mbuf, 1); 420 mbuf->next = NULL; 421 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 422 mbuf->nb_segs = 1; 423 mbuf->port = rxq->port_id; 424 425 dma_addr = 426 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 427 428 rxd = &((volatile struct virtchnl2_splitq_rx_buf_desc *)(rxq->rx_ring))[i]; 429 rxd->qword0.buf_id = i; 430 rxd->qword0.rsvd0 = 0; 431 rxd->qword0.rsvd1 = 0; 432 rxd->pkt_addr = dma_addr; 433 rxd->hdr_addr = 0; 434 rxd->rsvd2 = 0; 435 436 rxq->sw_ring[i] = mbuf; 437 } 438 439 rxq->nb_rx_hold = 0; 440 rxq->rx_tail = rxq->nb_rx_desc - 1; 441 442 return 0; 443 } 444 445 #define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND 10000 446 /* Helper function to convert a 32b nanoseconds timestamp to 64b. */ 447 static inline uint64_t 448 idpf_tstamp_convert_32b_64b(struct idpf_adapter *ad, uint32_t flag, 449 uint32_t in_timestamp) 450 { 451 #ifdef RTE_ARCH_X86_64 452 struct idpf_hw *hw = &ad->hw; 453 const uint64_t mask = 0xFFFFFFFF; 454 uint32_t hi, lo, lo2, delta; 455 uint64_t ns; 456 457 if (flag != 0) { 458 IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); 459 IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M | 460 PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); 461 lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); 462 hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); 463 /* 464 * On typical system, the delta between lo and lo2 is ~1000ns, 465 * so 10000 seems a large-enough but not overly-big guard band. 466 */ 467 if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND)) 468 lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); 469 else 470 lo2 = lo; 471 472 if (lo2 < lo) { 473 lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); 474 hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); 475 } 476 477 ad->time_hw = ((uint64_t)hi << 32) | lo; 478 } 479 480 delta = (in_timestamp - (uint32_t)(ad->time_hw & mask)); 481 if (delta > (mask / 2)) { 482 delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp); 483 ns = ad->time_hw - delta; 484 } else { 485 ns = ad->time_hw + delta; 486 } 487 488 return ns; 489 #else /* !RTE_ARCH_X86_64 */ 490 RTE_SET_USED(ad); 491 RTE_SET_USED(flag); 492 RTE_SET_USED(in_timestamp); 493 return 0; 494 #endif /* RTE_ARCH_X86_64 */ 495 } 496 497 #define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S \ 498 (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S) | \ 499 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S) | \ 500 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S) | \ 501 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) 502 503 static inline uint64_t 504 idpf_splitq_rx_csum_offload(uint8_t err) 505 { 506 uint64_t flags = 0; 507 508 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S)) == 0)) 509 return flags; 510 511 if (likely((err & IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S) == 0)) { 512 flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | 513 RTE_MBUF_F_RX_L4_CKSUM_GOOD); 514 return flags; 515 } 516 517 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)) != 0)) 518 flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 519 else 520 flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 521 522 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)) != 0)) 523 flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 524 else 525 flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 526 527 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)) != 0)) 528 flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 529 530 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) != 0)) 531 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; 532 else 533 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; 534 535 return flags; 536 } 537 538 #define IDPF_RX_FLEX_DESC_ADV_HASH1_S 0 539 #define IDPF_RX_FLEX_DESC_ADV_HASH2_S 16 540 #define IDPF_RX_FLEX_DESC_ADV_HASH3_S 24 541 542 static inline uint64_t 543 idpf_splitq_rx_rss_offload(struct rte_mbuf *mb, 544 volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) 545 { 546 uint8_t status_err0_qw0; 547 uint64_t flags = 0; 548 549 status_err0_qw0 = rx_desc->status_err0_qw0; 550 551 if ((status_err0_qw0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_S)) != 0) { 552 flags |= RTE_MBUF_F_RX_RSS_HASH; 553 mb->hash.rss = (rte_le_to_cpu_16(rx_desc->hash1) << 554 IDPF_RX_FLEX_DESC_ADV_HASH1_S) | 555 ((uint32_t)(rx_desc->ff2_mirrid_hash2.hash2) << 556 IDPF_RX_FLEX_DESC_ADV_HASH2_S) | 557 ((uint32_t)(rx_desc->hash3) << 558 IDPF_RX_FLEX_DESC_ADV_HASH3_S); 559 } 560 561 return flags; 562 } 563 564 static void 565 idpf_split_rx_bufq_refill(struct idpf_rx_queue *rx_bufq) 566 { 567 volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_ring; 568 volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_desc; 569 uint16_t nb_refill = rx_bufq->rx_free_thresh; 570 uint16_t nb_desc = rx_bufq->nb_rx_desc; 571 uint16_t next_avail = rx_bufq->rx_tail; 572 struct rte_mbuf *nmb[rx_bufq->rx_free_thresh]; 573 uint64_t dma_addr; 574 uint16_t delta; 575 int i; 576 577 if (rx_bufq->nb_rx_hold < rx_bufq->rx_free_thresh) 578 return; 579 580 rx_buf_ring = rx_bufq->rx_ring; 581 delta = nb_desc - next_avail; 582 if (unlikely(delta < nb_refill)) { 583 if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, delta) == 0)) { 584 for (i = 0; i < delta; i++) { 585 rx_buf_desc = &rx_buf_ring[next_avail + i]; 586 rx_bufq->sw_ring[next_avail + i] = nmb[i]; 587 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); 588 rx_buf_desc->hdr_addr = 0; 589 rx_buf_desc->pkt_addr = dma_addr; 590 } 591 nb_refill -= delta; 592 next_avail = 0; 593 rx_bufq->nb_rx_hold -= delta; 594 } else { 595 __atomic_fetch_add(&rx_bufq->rx_stats.mbuf_alloc_failed, 596 nb_desc - next_avail, __ATOMIC_RELAXED); 597 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", 598 rx_bufq->port_id, rx_bufq->queue_id); 599 return; 600 } 601 } 602 603 if (nb_desc - next_avail >= nb_refill) { 604 if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, nb_refill) == 0)) { 605 for (i = 0; i < nb_refill; i++) { 606 rx_buf_desc = &rx_buf_ring[next_avail + i]; 607 rx_bufq->sw_ring[next_avail + i] = nmb[i]; 608 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); 609 rx_buf_desc->hdr_addr = 0; 610 rx_buf_desc->pkt_addr = dma_addr; 611 } 612 next_avail += nb_refill; 613 rx_bufq->nb_rx_hold -= nb_refill; 614 } else { 615 __atomic_fetch_add(&rx_bufq->rx_stats.mbuf_alloc_failed, 616 nb_desc - next_avail, __ATOMIC_RELAXED); 617 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", 618 rx_bufq->port_id, rx_bufq->queue_id); 619 } 620 } 621 622 IDPF_PCI_REG_WRITE(rx_bufq->qrx_tail, next_avail); 623 624 rx_bufq->rx_tail = next_avail; 625 } 626 627 uint16_t 628 idpf_dp_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 629 uint16_t nb_pkts) 630 { 631 volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc_ring; 632 volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; 633 uint16_t pktlen_gen_bufq_id; 634 struct idpf_rx_queue *rxq; 635 const uint32_t *ptype_tbl; 636 uint8_t status_err0_qw1; 637 struct idpf_adapter *ad; 638 struct rte_mbuf *rxm; 639 uint16_t rx_id_bufq1; 640 uint16_t rx_id_bufq2; 641 uint64_t pkt_flags; 642 uint16_t pkt_len; 643 uint16_t bufq_id; 644 uint16_t gen_id; 645 uint16_t rx_id; 646 uint16_t nb_rx; 647 uint64_t ts_ns; 648 649 nb_rx = 0; 650 rxq = rx_queue; 651 ad = rxq->adapter; 652 653 if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) 654 return nb_rx; 655 656 rx_id = rxq->rx_tail; 657 rx_id_bufq1 = rxq->bufq1->rx_next_avail; 658 rx_id_bufq2 = rxq->bufq2->rx_next_avail; 659 rx_desc_ring = rxq->rx_ring; 660 ptype_tbl = rxq->adapter->ptype_tbl; 661 662 if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) 663 rxq->hw_register_set = 1; 664 665 while (nb_rx < nb_pkts) { 666 rx_desc = &rx_desc_ring[rx_id]; 667 668 pktlen_gen_bufq_id = 669 rte_le_to_cpu_16(rx_desc->pktlen_gen_bufq_id); 670 gen_id = (pktlen_gen_bufq_id & 671 VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M) >> 672 VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S; 673 if (gen_id != rxq->expected_gen_id) 674 break; 675 676 pkt_len = (pktlen_gen_bufq_id & 677 VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M) >> 678 VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S; 679 if (pkt_len == 0) 680 RX_LOG(ERR, "Packet length is 0"); 681 682 rx_id++; 683 if (unlikely(rx_id == rxq->nb_rx_desc)) { 684 rx_id = 0; 685 rxq->expected_gen_id ^= 1; 686 } 687 688 bufq_id = (pktlen_gen_bufq_id & 689 VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M) >> 690 VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S; 691 if (bufq_id == 0) { 692 rxm = rxq->bufq1->sw_ring[rx_id_bufq1]; 693 rx_id_bufq1++; 694 if (unlikely(rx_id_bufq1 == rxq->bufq1->nb_rx_desc)) 695 rx_id_bufq1 = 0; 696 rxq->bufq1->nb_rx_hold++; 697 } else { 698 rxm = rxq->bufq2->sw_ring[rx_id_bufq2]; 699 rx_id_bufq2++; 700 if (unlikely(rx_id_bufq2 == rxq->bufq2->nb_rx_desc)) 701 rx_id_bufq2 = 0; 702 rxq->bufq2->nb_rx_hold++; 703 } 704 705 rxm->pkt_len = pkt_len; 706 rxm->data_len = pkt_len; 707 rxm->data_off = RTE_PKTMBUF_HEADROOM; 708 rxm->next = NULL; 709 rxm->nb_segs = 1; 710 rxm->port = rxq->port_id; 711 rxm->ol_flags = 0; 712 rxm->packet_type = 713 ptype_tbl[(rte_le_to_cpu_16(rx_desc->ptype_err_fflags0) & 714 VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M) >> 715 VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S]; 716 717 status_err0_qw1 = rx_desc->status_err0_qw1; 718 pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1); 719 pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc); 720 if (idpf_timestamp_dynflag > 0 && 721 (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP)) { 722 /* timestamp */ 723 ts_ns = idpf_tstamp_convert_32b_64b(ad, 724 rxq->hw_register_set, 725 rte_le_to_cpu_32(rx_desc->ts_high)); 726 rxq->hw_register_set = 0; 727 *RTE_MBUF_DYNFIELD(rxm, 728 idpf_timestamp_dynfield_offset, 729 rte_mbuf_timestamp_t *) = ts_ns; 730 rxm->ol_flags |= idpf_timestamp_dynflag; 731 } 732 733 rxm->ol_flags |= pkt_flags; 734 735 rx_pkts[nb_rx++] = rxm; 736 } 737 738 if (nb_rx > 0) { 739 rxq->rx_tail = rx_id; 740 if (rx_id_bufq1 != rxq->bufq1->rx_next_avail) 741 rxq->bufq1->rx_next_avail = rx_id_bufq1; 742 if (rx_id_bufq2 != rxq->bufq2->rx_next_avail) 743 rxq->bufq2->rx_next_avail = rx_id_bufq2; 744 745 idpf_split_rx_bufq_refill(rxq->bufq1); 746 idpf_split_rx_bufq_refill(rxq->bufq2); 747 } 748 749 return nb_rx; 750 } 751 752 static inline void 753 idpf_split_tx_free(struct idpf_tx_queue *cq) 754 { 755 volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring; 756 volatile struct idpf_splitq_tx_compl_desc *txd; 757 uint16_t next = cq->tx_tail; 758 struct idpf_tx_entry *txe; 759 struct idpf_tx_queue *txq; 760 uint16_t gen, qid, q_head; 761 uint16_t nb_desc_clean; 762 uint8_t ctype; 763 764 txd = &compl_ring[next]; 765 gen = (rte_le_to_cpu_16(txd->qid_comptype_gen) & 766 IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S; 767 if (gen != cq->expected_gen_id) 768 return; 769 770 ctype = (rte_le_to_cpu_16(txd->qid_comptype_gen) & 771 IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> IDPF_TXD_COMPLQ_COMPL_TYPE_S; 772 qid = (rte_le_to_cpu_16(txd->qid_comptype_gen) & 773 IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S; 774 q_head = rte_le_to_cpu_16(txd->q_head_compl_tag.compl_tag); 775 txq = cq->txqs[qid - cq->tx_start_qid]; 776 777 switch (ctype) { 778 case IDPF_TXD_COMPLT_RE: 779 /* clean to q_head which indicates be fetched txq desc id + 1. 780 * TODO: need to refine and remove the if condition. 781 */ 782 if (unlikely(q_head % 32)) { 783 TX_LOG(ERR, "unexpected desc (head = %u) completion.", 784 q_head); 785 return; 786 } 787 if (txq->last_desc_cleaned > q_head) 788 nb_desc_clean = (txq->nb_tx_desc - txq->last_desc_cleaned) + 789 q_head; 790 else 791 nb_desc_clean = q_head - txq->last_desc_cleaned; 792 txq->nb_free += nb_desc_clean; 793 txq->last_desc_cleaned = q_head; 794 break; 795 case IDPF_TXD_COMPLT_RS: 796 /* q_head indicates sw_id when ctype is 2 */ 797 txe = &txq->sw_ring[q_head]; 798 if (txe->mbuf != NULL) { 799 rte_pktmbuf_free_seg(txe->mbuf); 800 txe->mbuf = NULL; 801 } 802 break; 803 default: 804 TX_LOG(ERR, "unknown completion type."); 805 return; 806 } 807 808 if (++next == cq->nb_tx_desc) { 809 next = 0; 810 cq->expected_gen_id ^= 1; 811 } 812 813 cq->tx_tail = next; 814 } 815 816 /* Check if the context descriptor is needed for TX offloading */ 817 static inline uint16_t 818 idpf_calc_context_desc(uint64_t flags) 819 { 820 if ((flags & RTE_MBUF_F_TX_TCP_SEG) != 0) 821 return 1; 822 823 return 0; 824 } 825 826 /* set TSO context descriptor 827 */ 828 static inline void 829 idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf, 830 union idpf_tx_offload tx_offload, 831 volatile union idpf_flex_tx_ctx_desc *ctx_desc) 832 { 833 uint16_t cmd_dtype; 834 uint32_t tso_len; 835 uint8_t hdr_len; 836 837 if (tx_offload.l4_len == 0) { 838 TX_LOG(DEBUG, "L4 length set to 0"); 839 return; 840 } 841 842 hdr_len = tx_offload.l2_len + 843 tx_offload.l3_len + 844 tx_offload.l4_len; 845 cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | 846 IDPF_TX_FLEX_CTX_DESC_CMD_TSO; 847 tso_len = mbuf->pkt_len - hdr_len; 848 849 ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype); 850 ctx_desc->tso.qw0.hdr_len = hdr_len; 851 ctx_desc->tso.qw0.mss_rt = 852 rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz & 853 IDPF_TXD_FLEX_CTX_MSS_RT_M); 854 ctx_desc->tso.qw0.flex_tlen = 855 rte_cpu_to_le_32(tso_len & 856 IDPF_TXD_FLEX_CTX_MSS_RT_M); 857 } 858 859 uint16_t 860 idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 861 uint16_t nb_pkts) 862 { 863 struct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue; 864 volatile struct idpf_flex_tx_sched_desc *txr; 865 volatile struct idpf_flex_tx_sched_desc *txd; 866 struct idpf_tx_entry *sw_ring; 867 union idpf_tx_offload tx_offload = {0}; 868 struct idpf_tx_entry *txe, *txn; 869 uint16_t nb_used, tx_id, sw_id; 870 struct rte_mbuf *tx_pkt; 871 uint16_t nb_to_clean; 872 uint16_t nb_tx = 0; 873 uint64_t ol_flags; 874 uint16_t nb_ctx; 875 876 if (unlikely(txq == NULL) || unlikely(!txq->q_started)) 877 return nb_tx; 878 879 txr = txq->desc_ring; 880 sw_ring = txq->sw_ring; 881 tx_id = txq->tx_tail; 882 sw_id = txq->sw_tail; 883 txe = &sw_ring[sw_id]; 884 885 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 886 tx_pkt = tx_pkts[nb_tx]; 887 888 if (txq->nb_free <= txq->free_thresh) { 889 /* TODO: Need to refine 890 * 1. free and clean: Better to decide a clean destination instead of 891 * loop times. And don't free mbuf when RS got immediately, free when 892 * transmit or according to the clean destination. 893 * Now, just ignore the RE write back, free mbuf when get RS 894 * 2. out-of-order rewrite back haven't be supported, SW head and HW head 895 * need to be separated. 896 **/ 897 nb_to_clean = 2 * txq->rs_thresh; 898 while (nb_to_clean--) 899 idpf_split_tx_free(txq->complq); 900 } 901 902 if (txq->nb_free < tx_pkt->nb_segs) 903 break; 904 905 ol_flags = tx_pkt->ol_flags; 906 tx_offload.l2_len = tx_pkt->l2_len; 907 tx_offload.l3_len = tx_pkt->l3_len; 908 tx_offload.l4_len = tx_pkt->l4_len; 909 tx_offload.tso_segsz = tx_pkt->tso_segsz; 910 /* Calculate the number of context descriptors needed. */ 911 nb_ctx = idpf_calc_context_desc(ol_flags); 912 nb_used = tx_pkt->nb_segs + nb_ctx; 913 914 /* context descriptor */ 915 if (nb_ctx != 0) { 916 volatile union idpf_flex_tx_ctx_desc *ctx_desc = 917 (volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id]; 918 919 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) 920 idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, 921 ctx_desc); 922 923 tx_id++; 924 if (tx_id == txq->nb_tx_desc) 925 tx_id = 0; 926 } 927 928 do { 929 txd = &txr[tx_id]; 930 txn = &sw_ring[txe->next_id]; 931 txe->mbuf = tx_pkt; 932 933 /* Setup TX descriptor */ 934 txd->buf_addr = 935 rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt)); 936 txd->qw1.cmd_dtype = 937 rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE); 938 txd->qw1.rxr_bufsize = tx_pkt->data_len; 939 txd->qw1.compl_tag = sw_id; 940 tx_id++; 941 if (tx_id == txq->nb_tx_desc) 942 tx_id = 0; 943 sw_id = txe->next_id; 944 txe = txn; 945 tx_pkt = tx_pkt->next; 946 } while (tx_pkt); 947 948 /* fill the last descriptor with End of Packet (EOP) bit */ 949 txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_EOP; 950 951 if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) 952 txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; 953 txq->nb_free = (uint16_t)(txq->nb_free - nb_used); 954 txq->nb_used = (uint16_t)(txq->nb_used + nb_used); 955 956 if (txq->nb_used >= 32) { 957 txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE; 958 /* Update txq RE bit counters */ 959 txq->nb_used = 0; 960 } 961 } 962 963 /* update the tail pointer if any packets were processed */ 964 if (likely(nb_tx > 0)) { 965 IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); 966 txq->tx_tail = tx_id; 967 txq->sw_tail = sw_id; 968 } 969 970 return nb_tx; 971 } 972 973 #define IDPF_RX_FLEX_DESC_STATUS0_XSUM_S \ 974 (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | \ 975 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | \ 976 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \ 977 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) 978 979 /* Translate the rx descriptor status and error fields to pkt flags */ 980 static inline uint64_t 981 idpf_rxd_to_pkt_flags(uint16_t status_error) 982 { 983 uint64_t flags = 0; 984 985 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_S)) == 0)) 986 return flags; 987 988 if (likely((status_error & IDPF_RX_FLEX_DESC_STATUS0_XSUM_S) == 0)) { 989 flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | 990 RTE_MBUF_F_RX_L4_CKSUM_GOOD); 991 return flags; 992 } 993 994 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)) != 0)) 995 flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 996 else 997 flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 998 999 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) != 0)) 1000 flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 1001 else 1002 flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 1003 1004 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)) != 0)) 1005 flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 1006 1007 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) != 0)) 1008 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; 1009 else 1010 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; 1011 1012 return flags; 1013 } 1014 1015 static inline void 1016 idpf_update_rx_tail(struct idpf_rx_queue *rxq, uint16_t nb_hold, 1017 uint16_t rx_id) 1018 { 1019 nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); 1020 1021 if (nb_hold > rxq->rx_free_thresh) { 1022 RX_LOG(DEBUG, 1023 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u", 1024 rxq->port_id, rxq->queue_id, rx_id, nb_hold); 1025 rx_id = (uint16_t)((rx_id == 0) ? 1026 (rxq->nb_rx_desc - 1) : (rx_id - 1)); 1027 IDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); 1028 nb_hold = 0; 1029 } 1030 rxq->nb_rx_hold = nb_hold; 1031 } 1032 1033 static inline void 1034 idpf_singleq_rx_rss_offload(struct rte_mbuf *mb, 1035 volatile struct virtchnl2_rx_flex_desc_nic *rx_desc, 1036 uint64_t *pkt_flags) 1037 { 1038 uint16_t rx_status0 = rte_le_to_cpu_16(rx_desc->status_error0); 1039 1040 if (rx_status0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_S)) { 1041 *pkt_flags |= RTE_MBUF_F_RX_RSS_HASH; 1042 mb->hash.rss = rte_le_to_cpu_32(rx_desc->rss_hash); 1043 } 1044 1045 } 1046 1047 uint16_t 1048 idpf_dp_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 1049 uint16_t nb_pkts) 1050 { 1051 volatile union virtchnl2_rx_desc *rx_ring; 1052 volatile union virtchnl2_rx_desc *rxdp; 1053 union virtchnl2_rx_desc rxd; 1054 struct idpf_rx_queue *rxq; 1055 const uint32_t *ptype_tbl; 1056 uint16_t rx_id, nb_hold; 1057 struct idpf_adapter *ad; 1058 uint16_t rx_packet_len; 1059 struct rte_mbuf *rxm; 1060 struct rte_mbuf *nmb; 1061 uint16_t rx_status0; 1062 uint64_t pkt_flags; 1063 uint64_t dma_addr; 1064 uint64_t ts_ns; 1065 uint16_t nb_rx; 1066 1067 nb_rx = 0; 1068 nb_hold = 0; 1069 rxq = rx_queue; 1070 1071 ad = rxq->adapter; 1072 1073 if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) 1074 return nb_rx; 1075 1076 rx_id = rxq->rx_tail; 1077 rx_ring = rxq->rx_ring; 1078 ptype_tbl = rxq->adapter->ptype_tbl; 1079 1080 if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) 1081 rxq->hw_register_set = 1; 1082 1083 while (nb_rx < nb_pkts) { 1084 rxdp = &rx_ring[rx_id]; 1085 rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0); 1086 1087 /* Check the DD bit first */ 1088 if ((rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S)) == 0) 1089 break; 1090 1091 nmb = rte_mbuf_raw_alloc(rxq->mp); 1092 if (unlikely(nmb == NULL)) { 1093 __atomic_fetch_add(&rxq->rx_stats.mbuf_alloc_failed, 1, __ATOMIC_RELAXED); 1094 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " 1095 "queue_id=%u", rxq->port_id, rxq->queue_id); 1096 break; 1097 } 1098 rxd = *rxdp; /* copy descriptor in ring to temp variable*/ 1099 1100 nb_hold++; 1101 rxm = rxq->sw_ring[rx_id]; 1102 rxq->sw_ring[rx_id] = nmb; 1103 rx_id++; 1104 if (unlikely(rx_id == rxq->nb_rx_desc)) 1105 rx_id = 0; 1106 1107 /* Prefetch next mbuf */ 1108 rte_prefetch0(rxq->sw_ring[rx_id]); 1109 1110 /* When next RX descriptor is on a cache line boundary, 1111 * prefetch the next 4 RX descriptors and next 8 pointers 1112 * to mbufs. 1113 */ 1114 if ((rx_id & 0x3) == 0) { 1115 rte_prefetch0(&rx_ring[rx_id]); 1116 rte_prefetch0(rxq->sw_ring[rx_id]); 1117 } 1118 dma_addr = 1119 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); 1120 rxdp->read.hdr_addr = 0; 1121 rxdp->read.pkt_addr = dma_addr; 1122 1123 rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) & 1124 VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M); 1125 1126 rxm->data_off = RTE_PKTMBUF_HEADROOM; 1127 rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM)); 1128 rxm->nb_segs = 1; 1129 rxm->next = NULL; 1130 rxm->pkt_len = rx_packet_len; 1131 rxm->data_len = rx_packet_len; 1132 rxm->port = rxq->port_id; 1133 rxm->ol_flags = 0; 1134 pkt_flags = idpf_rxd_to_pkt_flags(rx_status0); 1135 idpf_singleq_rx_rss_offload(rxm, &rxd.flex_nic_wb, &pkt_flags); 1136 rxm->packet_type = 1137 ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) & 1138 VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)]; 1139 1140 rxm->ol_flags |= pkt_flags; 1141 1142 if (idpf_timestamp_dynflag > 0 && 1143 (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { 1144 /* timestamp */ 1145 ts_ns = idpf_tstamp_convert_32b_64b(ad, 1146 rxq->hw_register_set, 1147 rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high)); 1148 rxq->hw_register_set = 0; 1149 *RTE_MBUF_DYNFIELD(rxm, 1150 idpf_timestamp_dynfield_offset, 1151 rte_mbuf_timestamp_t *) = ts_ns; 1152 rxm->ol_flags |= idpf_timestamp_dynflag; 1153 } 1154 1155 rx_pkts[nb_rx++] = rxm; 1156 } 1157 rxq->rx_tail = rx_id; 1158 1159 idpf_update_rx_tail(rxq, nb_hold, rx_id); 1160 1161 return nb_rx; 1162 } 1163 1164 uint16_t 1165 idpf_dp_singleq_recv_scatter_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 1166 uint16_t nb_pkts) 1167 { 1168 struct idpf_rx_queue *rxq = rx_queue; 1169 volatile union virtchnl2_rx_desc *rx_ring = rxq->rx_ring; 1170 volatile union virtchnl2_rx_desc *rxdp; 1171 union virtchnl2_rx_desc rxd; 1172 struct idpf_adapter *ad; 1173 struct rte_mbuf *first_seg = rxq->pkt_first_seg; 1174 struct rte_mbuf *last_seg = rxq->pkt_last_seg; 1175 struct rte_mbuf *rxm; 1176 struct rte_mbuf *nmb; 1177 struct rte_eth_dev *dev; 1178 const uint32_t *ptype_tbl = rxq->adapter->ptype_tbl; 1179 uint16_t rx_id = rxq->rx_tail; 1180 uint16_t rx_packet_len; 1181 uint16_t nb_hold = 0; 1182 uint16_t rx_status0; 1183 uint16_t nb_rx = 0; 1184 uint64_t pkt_flags; 1185 uint64_t dma_addr; 1186 uint64_t ts_ns; 1187 1188 ad = rxq->adapter; 1189 1190 if (unlikely(!rxq) || unlikely(!rxq->q_started)) 1191 return nb_rx; 1192 1193 while (nb_rx < nb_pkts) { 1194 rxdp = &rx_ring[rx_id]; 1195 rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0); 1196 1197 /* Check the DD bit first */ 1198 if (!(rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S))) 1199 break; 1200 1201 nmb = rte_mbuf_raw_alloc(rxq->mp); 1202 if (unlikely(!nmb)) { 1203 __atomic_fetch_add(&rxq->rx_stats.mbuf_alloc_failed, 1, __ATOMIC_RELAXED); 1204 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " 1205 "queue_id=%u", rxq->port_id, rxq->queue_id); 1206 break; 1207 } 1208 1209 rxd = *rxdp; 1210 1211 nb_hold++; 1212 rxm = rxq->sw_ring[rx_id]; 1213 rxq->sw_ring[rx_id] = nmb; 1214 rx_id++; 1215 if (unlikely(rx_id == rxq->nb_rx_desc)) 1216 rx_id = 0; 1217 1218 /* Prefetch next mbuf */ 1219 rte_prefetch0(rxq->sw_ring[rx_id]); 1220 1221 /* When next RX descriptor is on a cache line boundary, 1222 * prefetch the next 4 RX descriptors and next 8 pointers 1223 * to mbufs. 1224 */ 1225 if ((rx_id & 0x3) == 0) { 1226 rte_prefetch0(&rx_ring[rx_id]); 1227 rte_prefetch0(rxq->sw_ring[rx_id]); 1228 } 1229 dma_addr = 1230 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); 1231 rxdp->read.hdr_addr = 0; 1232 rxdp->read.pkt_addr = dma_addr; 1233 rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) & 1234 VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M); 1235 rxm->data_len = rx_packet_len; 1236 rxm->data_off = RTE_PKTMBUF_HEADROOM; 1237 1238 /** 1239 * If this is the first buffer of the received packet, set the 1240 * pointer to the first mbuf of the packet and initialize its 1241 * context. Otherwise, update the total length and the number 1242 * of segments of the current scattered packet, and update the 1243 * pointer to the last mbuf of the current packet. 1244 */ 1245 if (!first_seg) { 1246 first_seg = rxm; 1247 first_seg->nb_segs = 1; 1248 first_seg->pkt_len = rx_packet_len; 1249 } else { 1250 first_seg->pkt_len = 1251 (uint16_t)(first_seg->pkt_len + 1252 rx_packet_len); 1253 first_seg->nb_segs++; 1254 last_seg->next = rxm; 1255 } 1256 1257 if (!(rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_EOF_S))) { 1258 last_seg = rxm; 1259 continue; 1260 } 1261 1262 rxm->next = NULL; 1263 1264 first_seg->port = rxq->port_id; 1265 first_seg->ol_flags = 0; 1266 pkt_flags = idpf_rxd_to_pkt_flags(rx_status0); 1267 idpf_singleq_rx_rss_offload(first_seg, &rxd.flex_nic_wb, &pkt_flags); 1268 first_seg->packet_type = 1269 ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) & 1270 VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)]; 1271 1272 if (idpf_timestamp_dynflag > 0 && 1273 (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { 1274 /* timestamp */ 1275 ts_ns = idpf_tstamp_convert_32b_64b(ad, 1276 rxq->hw_register_set, 1277 rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high)); 1278 rxq->hw_register_set = 0; 1279 *RTE_MBUF_DYNFIELD(rxm, 1280 idpf_timestamp_dynfield_offset, 1281 rte_mbuf_timestamp_t *) = ts_ns; 1282 first_seg->ol_flags |= idpf_timestamp_dynflag; 1283 } 1284 1285 first_seg->ol_flags |= pkt_flags; 1286 rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr, 1287 first_seg->data_off)); 1288 rx_pkts[nb_rx++] = first_seg; 1289 first_seg = NULL; 1290 } 1291 rxq->rx_tail = rx_id; 1292 rxq->pkt_first_seg = first_seg; 1293 rxq->pkt_last_seg = last_seg; 1294 1295 idpf_update_rx_tail(rxq, nb_hold, rx_id); 1296 1297 return nb_rx; 1298 } 1299 1300 static inline int 1301 idpf_xmit_cleanup(struct idpf_tx_queue *txq) 1302 { 1303 uint16_t last_desc_cleaned = txq->last_desc_cleaned; 1304 struct idpf_tx_entry *sw_ring = txq->sw_ring; 1305 uint16_t nb_tx_desc = txq->nb_tx_desc; 1306 uint16_t desc_to_clean_to; 1307 uint16_t nb_tx_to_clean; 1308 uint16_t i; 1309 1310 volatile struct idpf_flex_tx_desc *txd = txq->tx_ring; 1311 1312 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh); 1313 if (desc_to_clean_to >= nb_tx_desc) 1314 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc); 1315 1316 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id; 1317 /* In the writeback Tx desccriptor, the only significant fields are the 4-bit DTYPE */ 1318 if ((txd[desc_to_clean_to].qw1.cmd_dtype & 1319 rte_cpu_to_le_16(IDPF_TXD_QW1_DTYPE_M)) != 1320 rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE)) { 1321 TX_LOG(DEBUG, "TX descriptor %4u is not done " 1322 "(port=%d queue=%d)", desc_to_clean_to, 1323 txq->port_id, txq->queue_id); 1324 return -1; 1325 } 1326 1327 if (last_desc_cleaned > desc_to_clean_to) 1328 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + 1329 desc_to_clean_to); 1330 else 1331 nb_tx_to_clean = (uint16_t)(desc_to_clean_to - 1332 last_desc_cleaned); 1333 1334 txd[desc_to_clean_to].qw1.cmd_dtype = 0; 1335 txd[desc_to_clean_to].qw1.buf_size = 0; 1336 for (i = 0; i < RTE_DIM(txd[desc_to_clean_to].qw1.flex.raw); i++) 1337 txd[desc_to_clean_to].qw1.flex.raw[i] = 0; 1338 1339 txq->last_desc_cleaned = desc_to_clean_to; 1340 txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); 1341 1342 return 0; 1343 } 1344 1345 /* TX function */ 1346 uint16_t 1347 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 1348 uint16_t nb_pkts) 1349 { 1350 volatile struct idpf_flex_tx_desc *txd; 1351 volatile struct idpf_flex_tx_desc *txr; 1352 union idpf_tx_offload tx_offload = {0}; 1353 struct idpf_tx_entry *txe, *txn; 1354 struct idpf_tx_entry *sw_ring; 1355 struct idpf_tx_queue *txq; 1356 struct rte_mbuf *tx_pkt; 1357 struct rte_mbuf *m_seg; 1358 uint64_t buf_dma_addr; 1359 uint64_t ol_flags; 1360 uint16_t tx_last; 1361 uint16_t nb_used; 1362 uint16_t nb_ctx; 1363 uint16_t td_cmd; 1364 uint16_t tx_id; 1365 uint16_t nb_tx; 1366 uint16_t slen; 1367 1368 nb_tx = 0; 1369 txq = tx_queue; 1370 1371 if (unlikely(txq == NULL) || unlikely(!txq->q_started)) 1372 return nb_tx; 1373 1374 sw_ring = txq->sw_ring; 1375 txr = txq->tx_ring; 1376 tx_id = txq->tx_tail; 1377 txe = &sw_ring[tx_id]; 1378 1379 /* Check if the descriptor ring needs to be cleaned. */ 1380 if (txq->nb_free < txq->free_thresh) 1381 (void)idpf_xmit_cleanup(txq); 1382 1383 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1384 td_cmd = 0; 1385 1386 tx_pkt = *tx_pkts++; 1387 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); 1388 1389 ol_flags = tx_pkt->ol_flags; 1390 tx_offload.l2_len = tx_pkt->l2_len; 1391 tx_offload.l3_len = tx_pkt->l3_len; 1392 tx_offload.l4_len = tx_pkt->l4_len; 1393 tx_offload.tso_segsz = tx_pkt->tso_segsz; 1394 /* Calculate the number of context descriptors needed. */ 1395 nb_ctx = idpf_calc_context_desc(ol_flags); 1396 1397 /* The number of descriptors that must be allocated for 1398 * a packet equals to the number of the segments of that 1399 * packet plus 1 context descriptor if needed. 1400 */ 1401 nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); 1402 tx_last = (uint16_t)(tx_id + nb_used - 1); 1403 1404 /* Circular ring */ 1405 if (tx_last >= txq->nb_tx_desc) 1406 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc); 1407 1408 TX_LOG(DEBUG, "port_id=%u queue_id=%u" 1409 " tx_first=%u tx_last=%u", 1410 txq->port_id, txq->queue_id, tx_id, tx_last); 1411 1412 if (nb_used > txq->nb_free) { 1413 if (idpf_xmit_cleanup(txq) != 0) { 1414 if (nb_tx == 0) 1415 return 0; 1416 goto end_of_tx; 1417 } 1418 if (unlikely(nb_used > txq->rs_thresh)) { 1419 while (nb_used > txq->nb_free) { 1420 if (idpf_xmit_cleanup(txq) != 0) { 1421 if (nb_tx == 0) 1422 return 0; 1423 goto end_of_tx; 1424 } 1425 } 1426 } 1427 } 1428 1429 if (nb_ctx != 0) { 1430 /* Setup TX context descriptor if required */ 1431 volatile union idpf_flex_tx_ctx_desc *ctx_txd = 1432 (volatile union idpf_flex_tx_ctx_desc *) 1433 &txr[tx_id]; 1434 1435 txn = &sw_ring[txe->next_id]; 1436 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); 1437 if (txe->mbuf != NULL) { 1438 rte_pktmbuf_free_seg(txe->mbuf); 1439 txe->mbuf = NULL; 1440 } 1441 1442 /* TSO enabled */ 1443 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) 1444 idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, 1445 ctx_txd); 1446 1447 txe->last_id = tx_last; 1448 tx_id = txe->next_id; 1449 txe = txn; 1450 } 1451 1452 m_seg = tx_pkt; 1453 do { 1454 txd = &txr[tx_id]; 1455 txn = &sw_ring[txe->next_id]; 1456 1457 if (txe->mbuf != NULL) 1458 rte_pktmbuf_free_seg(txe->mbuf); 1459 txe->mbuf = m_seg; 1460 1461 /* Setup TX Descriptor */ 1462 slen = m_seg->data_len; 1463 buf_dma_addr = rte_mbuf_data_iova(m_seg); 1464 txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr); 1465 txd->qw1.buf_size = slen; 1466 txd->qw1.cmd_dtype = rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_FLEX_DATA << 1467 IDPF_FLEX_TXD_QW1_DTYPE_S); 1468 1469 txe->last_id = tx_last; 1470 tx_id = txe->next_id; 1471 txe = txn; 1472 m_seg = m_seg->next; 1473 } while (m_seg); 1474 1475 /* The last packet data descriptor needs End Of Packet (EOP) */ 1476 td_cmd |= IDPF_TX_FLEX_DESC_CMD_EOP; 1477 txq->nb_used = (uint16_t)(txq->nb_used + nb_used); 1478 txq->nb_free = (uint16_t)(txq->nb_free - nb_used); 1479 1480 if (txq->nb_used >= txq->rs_thresh) { 1481 TX_LOG(DEBUG, "Setting RS bit on TXD id=" 1482 "%4u (port=%d queue=%d)", 1483 tx_last, txq->port_id, txq->queue_id); 1484 1485 td_cmd |= IDPF_TX_FLEX_DESC_CMD_RS; 1486 1487 /* Update txq RS bit counters */ 1488 txq->nb_used = 0; 1489 } 1490 1491 if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) 1492 td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; 1493 1494 txd->qw1.cmd_dtype |= rte_cpu_to_le_16(td_cmd << IDPF_FLEX_TXD_QW1_CMD_S); 1495 } 1496 1497 end_of_tx: 1498 rte_wmb(); 1499 1500 TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", 1501 txq->port_id, txq->queue_id, tx_id, nb_tx); 1502 1503 IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); 1504 txq->tx_tail = tx_id; 1505 1506 return nb_tx; 1507 } 1508 1509 /* TX prep functions */ 1510 uint16_t 1511 idpf_dp_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, 1512 uint16_t nb_pkts) 1513 { 1514 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1515 int ret; 1516 #endif 1517 int i; 1518 uint64_t ol_flags; 1519 struct rte_mbuf *m; 1520 1521 for (i = 0; i < nb_pkts; i++) { 1522 m = tx_pkts[i]; 1523 ol_flags = m->ol_flags; 1524 1525 /* Check condition for nb_segs > IDPF_TX_MAX_MTU_SEG. */ 1526 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) { 1527 if (m->nb_segs > IDPF_TX_MAX_MTU_SEG) { 1528 rte_errno = EINVAL; 1529 return i; 1530 } 1531 } else if ((m->tso_segsz < IDPF_MIN_TSO_MSS) || 1532 (m->tso_segsz > IDPF_MAX_TSO_MSS) || 1533 (m->pkt_len > IDPF_MAX_TSO_FRAME_SIZE)) { 1534 /* MSS outside the range are considered malicious */ 1535 rte_errno = EINVAL; 1536 return i; 1537 } 1538 1539 if ((ol_flags & IDPF_TX_OFFLOAD_NOTSUP_MASK) != 0) { 1540 rte_errno = ENOTSUP; 1541 return i; 1542 } 1543 1544 if (m->pkt_len < IDPF_MIN_FRAME_SIZE) { 1545 rte_errno = EINVAL; 1546 return i; 1547 } 1548 1549 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1550 ret = rte_validate_tx_offload(m); 1551 if (ret != 0) { 1552 rte_errno = -ret; 1553 return i; 1554 } 1555 #endif 1556 } 1557 1558 return i; 1559 } 1560 1561 static void __rte_cold 1562 release_rxq_mbufs_vec(struct idpf_rx_queue *rxq) 1563 { 1564 const uint16_t mask = rxq->nb_rx_desc - 1; 1565 uint16_t i; 1566 1567 if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc) 1568 return; 1569 1570 /* free all mbufs that are valid in the ring */ 1571 if (rxq->rxrearm_nb == 0) { 1572 for (i = 0; i < rxq->nb_rx_desc; i++) { 1573 if (rxq->sw_ring[i] != NULL) 1574 rte_pktmbuf_free_seg(rxq->sw_ring[i]); 1575 } 1576 } else { 1577 for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask) { 1578 if (rxq->sw_ring[i] != NULL) 1579 rte_pktmbuf_free_seg(rxq->sw_ring[i]); 1580 } 1581 } 1582 1583 rxq->rxrearm_nb = rxq->nb_rx_desc; 1584 1585 /* set all entries to NULL */ 1586 memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc); 1587 } 1588 1589 static const struct idpf_rxq_ops def_rx_ops_vec = { 1590 .release_mbufs = release_rxq_mbufs_vec, 1591 }; 1592 1593 static inline int 1594 idpf_rxq_vec_setup_default(struct idpf_rx_queue *rxq) 1595 { 1596 uintptr_t p; 1597 struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ 1598 1599 mb_def.nb_segs = 1; 1600 mb_def.data_off = RTE_PKTMBUF_HEADROOM; 1601 mb_def.port = rxq->port_id; 1602 rte_mbuf_refcnt_set(&mb_def, 1); 1603 1604 /* prevent compiler reordering: rearm_data covers previous fields */ 1605 rte_compiler_barrier(); 1606 p = (uintptr_t)&mb_def.rearm_data; 1607 rxq->mbuf_initializer = *(uint64_t *)p; 1608 return 0; 1609 } 1610 1611 int __rte_cold 1612 idpf_qc_singleq_rx_vec_setup(struct idpf_rx_queue *rxq) 1613 { 1614 rxq->ops = &def_rx_ops_vec; 1615 return idpf_rxq_vec_setup_default(rxq); 1616 } 1617 1618 int __rte_cold 1619 idpf_qc_splitq_rx_vec_setup(struct idpf_rx_queue *rxq) 1620 { 1621 rxq->bufq2->ops = &def_rx_ops_vec; 1622 return idpf_rxq_vec_setup_default(rxq->bufq2); 1623 } 1624