1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2023 Intel Corporation 3 */ 4 5 #include <rte_mbuf_dyn.h> 6 #include <rte_errno.h> 7 8 #include "idpf_common_rxtx.h" 9 10 int idpf_timestamp_dynfield_offset = -1; 11 uint64_t idpf_timestamp_dynflag; 12 13 int 14 idpf_qc_rx_thresh_check(uint16_t nb_desc, uint16_t thresh) 15 { 16 /* The following constraints must be satisfied: 17 * thresh < rxq->nb_rx_desc 18 */ 19 if (thresh >= nb_desc) { 20 DRV_LOG(ERR, "rx_free_thresh (%u) must be less than %u", 21 thresh, nb_desc); 22 return -EINVAL; 23 } 24 25 return 0; 26 } 27 28 int 29 idpf_qc_tx_thresh_check(uint16_t nb_desc, uint16_t tx_rs_thresh, 30 uint16_t tx_free_thresh) 31 { 32 /* TX descriptors will have their RS bit set after tx_rs_thresh 33 * descriptors have been used. The TX descriptor ring will be cleaned 34 * after tx_free_thresh descriptors are used or if the number of 35 * descriptors required to transmit a packet is greater than the 36 * number of free TX descriptors. 37 * 38 * The following constraints must be satisfied: 39 * - tx_rs_thresh must be less than the size of the ring minus 2. 40 * - tx_free_thresh must be less than the size of the ring minus 3. 41 * - tx_rs_thresh must be less than or equal to tx_free_thresh. 42 * - tx_rs_thresh must be a divisor of the ring size. 43 * 44 * One descriptor in the TX ring is used as a sentinel to avoid a H/W 45 * race condition, hence the maximum threshold constraints. When set 46 * to zero use default values. 47 */ 48 if (tx_rs_thresh >= (nb_desc - 2)) { 49 DRV_LOG(ERR, "tx_rs_thresh (%u) must be less than the " 50 "number of TX descriptors (%u) minus 2", 51 tx_rs_thresh, nb_desc); 52 return -EINVAL; 53 } 54 if (tx_free_thresh >= (nb_desc - 3)) { 55 DRV_LOG(ERR, "tx_free_thresh (%u) must be less than the " 56 "number of TX descriptors (%u) minus 3.", 57 tx_free_thresh, nb_desc); 58 return -EINVAL; 59 } 60 if (tx_rs_thresh > tx_free_thresh) { 61 DRV_LOG(ERR, "tx_rs_thresh (%u) must be less than or " 62 "equal to tx_free_thresh (%u).", 63 tx_rs_thresh, tx_free_thresh); 64 return -EINVAL; 65 } 66 if ((nb_desc % tx_rs_thresh) != 0) { 67 DRV_LOG(ERR, "tx_rs_thresh (%u) must be a divisor of the " 68 "number of TX descriptors (%u).", 69 tx_rs_thresh, nb_desc); 70 return -EINVAL; 71 } 72 73 return 0; 74 } 75 76 void 77 idpf_qc_rxq_mbufs_release(struct idpf_rx_queue *rxq) 78 { 79 uint16_t i; 80 81 if (rxq->sw_ring == NULL) 82 return; 83 84 for (i = 0; i < rxq->nb_rx_desc; i++) { 85 if (rxq->sw_ring[i] != NULL) { 86 rte_pktmbuf_free_seg(rxq->sw_ring[i]); 87 rxq->sw_ring[i] = NULL; 88 } 89 } 90 } 91 92 void 93 idpf_qc_txq_mbufs_release(struct idpf_tx_queue *txq) 94 { 95 uint16_t nb_desc, i; 96 97 if (txq == NULL || txq->sw_ring == NULL) { 98 DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL"); 99 return; 100 } 101 102 if (txq->sw_nb_desc != 0) { 103 /* For split queue model, descriptor ring */ 104 nb_desc = txq->sw_nb_desc; 105 } else { 106 /* For single queue model */ 107 nb_desc = txq->nb_tx_desc; 108 } 109 for (i = 0; i < nb_desc; i++) { 110 if (txq->sw_ring[i].mbuf != NULL) { 111 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf); 112 txq->sw_ring[i].mbuf = NULL; 113 } 114 } 115 } 116 117 void 118 idpf_qc_split_rx_descq_reset(struct idpf_rx_queue *rxq) 119 { 120 uint16_t len; 121 uint32_t i; 122 123 if (rxq == NULL) 124 return; 125 126 len = rxq->nb_rx_desc + IDPF_RX_MAX_BURST; 127 128 for (i = 0; i < len * sizeof(struct virtchnl2_rx_flex_desc_adv_nic_3); 129 i++) 130 ((volatile char *)rxq->rx_ring)[i] = 0; 131 132 rxq->rx_tail = 0; 133 rxq->expected_gen_id = 1; 134 } 135 136 void 137 idpf_qc_split_rx_bufq_reset(struct idpf_rx_queue *rxq) 138 { 139 uint16_t len; 140 uint32_t i; 141 142 if (rxq == NULL) 143 return; 144 145 len = rxq->nb_rx_desc + IDPF_RX_MAX_BURST; 146 147 for (i = 0; i < len * sizeof(struct virtchnl2_splitq_rx_buf_desc); 148 i++) 149 ((volatile char *)rxq->rx_ring)[i] = 0; 150 151 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf)); 152 153 for (i = 0; i < IDPF_RX_MAX_BURST; i++) 154 rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf; 155 156 /* The next descriptor id which can be received. */ 157 rxq->rx_next_avail = 0; 158 159 /* The next descriptor id which can be refilled. */ 160 rxq->rx_tail = 0; 161 /* The number of descriptors which can be refilled. */ 162 rxq->nb_rx_hold = rxq->nb_rx_desc - 1; 163 164 rxq->rxrearm_nb = 0; 165 rxq->rxrearm_start = 0; 166 167 rxq->bufq1 = NULL; 168 rxq->bufq2 = NULL; 169 } 170 171 void 172 idpf_qc_split_rx_queue_reset(struct idpf_rx_queue *rxq) 173 { 174 idpf_qc_split_rx_descq_reset(rxq); 175 idpf_qc_split_rx_bufq_reset(rxq->bufq1); 176 idpf_qc_split_rx_bufq_reset(rxq->bufq2); 177 } 178 179 void 180 idpf_qc_single_rx_queue_reset(struct idpf_rx_queue *rxq) 181 { 182 uint16_t len; 183 uint32_t i; 184 185 if (rxq == NULL) 186 return; 187 188 len = rxq->nb_rx_desc + IDPF_RX_MAX_BURST; 189 190 for (i = 0; i < len * sizeof(struct virtchnl2_singleq_rx_buf_desc); 191 i++) 192 ((volatile char *)rxq->rx_ring)[i] = 0; 193 194 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf)); 195 196 for (i = 0; i < IDPF_RX_MAX_BURST; i++) 197 rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf; 198 199 rxq->rx_tail = 0; 200 rxq->nb_rx_hold = 0; 201 202 rte_pktmbuf_free(rxq->pkt_first_seg); 203 204 rxq->pkt_first_seg = NULL; 205 rxq->pkt_last_seg = NULL; 206 rxq->rxrearm_start = 0; 207 rxq->rxrearm_nb = 0; 208 } 209 210 void 211 idpf_qc_split_tx_descq_reset(struct idpf_tx_queue *txq) 212 { 213 struct idpf_tx_entry *txe; 214 uint32_t i, size; 215 uint16_t prev; 216 217 if (txq == NULL) { 218 DRV_LOG(DEBUG, "Pointer to txq is NULL"); 219 return; 220 } 221 222 size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc; 223 for (i = 0; i < size; i++) 224 ((volatile char *)txq->desc_ring)[i] = 0; 225 226 txe = txq->sw_ring; 227 prev = (uint16_t)(txq->sw_nb_desc - 1); 228 for (i = 0; i < txq->sw_nb_desc; i++) { 229 txe[i].mbuf = NULL; 230 txe[i].last_id = i; 231 txe[prev].next_id = i; 232 prev = i; 233 } 234 235 txq->tx_tail = 0; 236 txq->nb_used = 0; 237 238 /* Use this as next to clean for split desc queue */ 239 txq->last_desc_cleaned = 0; 240 txq->sw_tail = 0; 241 txq->nb_free = txq->nb_tx_desc - 1; 242 243 memset(txq->ctype, 0, sizeof(txq->ctype)); 244 txq->next_dd = txq->rs_thresh - 1; 245 txq->next_rs = txq->rs_thresh - 1; 246 } 247 248 void 249 idpf_qc_split_tx_complq_reset(struct idpf_tx_queue *cq) 250 { 251 uint32_t i, size; 252 253 if (cq == NULL) { 254 DRV_LOG(DEBUG, "Pointer to complq is NULL"); 255 return; 256 } 257 258 size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc; 259 for (i = 0; i < size; i++) 260 ((volatile char *)cq->compl_ring)[i] = 0; 261 262 cq->tx_tail = 0; 263 cq->expected_gen_id = 1; 264 } 265 266 void 267 idpf_qc_single_tx_queue_reset(struct idpf_tx_queue *txq) 268 { 269 struct idpf_tx_entry *txe; 270 uint32_t i, size; 271 uint16_t prev; 272 273 if (txq == NULL) { 274 DRV_LOG(DEBUG, "Pointer to txq is NULL"); 275 return; 276 } 277 278 txe = txq->sw_ring; 279 size = sizeof(struct idpf_base_tx_desc) * txq->nb_tx_desc; 280 for (i = 0; i < size; i++) 281 ((volatile char *)txq->tx_ring)[i] = 0; 282 283 prev = (uint16_t)(txq->nb_tx_desc - 1); 284 for (i = 0; i < txq->nb_tx_desc; i++) { 285 txq->tx_ring[i].qw1 = 286 rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE); 287 txe[i].mbuf = NULL; 288 txe[i].last_id = i; 289 txe[prev].next_id = i; 290 prev = i; 291 } 292 293 txq->tx_tail = 0; 294 txq->nb_used = 0; 295 296 txq->last_desc_cleaned = txq->nb_tx_desc - 1; 297 txq->nb_free = txq->nb_tx_desc - 1; 298 299 txq->next_dd = txq->rs_thresh - 1; 300 txq->next_rs = txq->rs_thresh - 1; 301 } 302 303 void 304 idpf_qc_rx_queue_release(void *rxq) 305 { 306 struct idpf_rx_queue *q = rxq; 307 308 if (q == NULL) 309 return; 310 311 /* Split queue */ 312 if (!q->adapter->is_rx_singleq) { 313 q->bufq1->ops->release_mbufs(q->bufq1); 314 rte_free(q->bufq1->sw_ring); 315 rte_memzone_free(q->bufq1->mz); 316 rte_free(q->bufq1); 317 q->bufq2->ops->release_mbufs(q->bufq2); 318 rte_free(q->bufq2->sw_ring); 319 rte_memzone_free(q->bufq2->mz); 320 rte_free(q->bufq2); 321 rte_memzone_free(q->mz); 322 rte_free(q); 323 return; 324 } 325 326 /* Single queue */ 327 q->ops->release_mbufs(q); 328 rte_free(q->sw_ring); 329 rte_memzone_free(q->mz); 330 rte_free(q); 331 } 332 333 void 334 idpf_qc_tx_queue_release(void *txq) 335 { 336 struct idpf_tx_queue *q = txq; 337 338 if (q == NULL) 339 return; 340 341 if (q->complq) { 342 rte_memzone_free(q->complq->mz); 343 rte_free(q->complq); 344 } 345 346 q->ops->release_mbufs(q); 347 rte_free(q->sw_ring); 348 rte_memzone_free(q->mz); 349 rte_free(q); 350 } 351 352 int 353 idpf_qc_ts_mbuf_register(struct idpf_rx_queue *rxq) 354 { 355 int err; 356 if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { 357 /* Register mbuf field and flag for Rx timestamp */ 358 err = rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset, 359 &idpf_timestamp_dynflag); 360 if (err != 0) { 361 DRV_LOG(ERR, 362 "Cannot register mbuf field/flag for timestamp"); 363 return -EINVAL; 364 } 365 } 366 return 0; 367 } 368 369 int 370 idpf_qc_single_rxq_mbufs_alloc(struct idpf_rx_queue *rxq) 371 { 372 volatile struct virtchnl2_singleq_rx_buf_desc *rxd; 373 struct rte_mbuf *mbuf = NULL; 374 uint64_t dma_addr; 375 uint16_t i; 376 377 for (i = 0; i < rxq->nb_rx_desc; i++) { 378 mbuf = rte_mbuf_raw_alloc(rxq->mp); 379 if (unlikely(mbuf == NULL)) { 380 DRV_LOG(ERR, "Failed to allocate mbuf for RX"); 381 return -ENOMEM; 382 } 383 384 rte_mbuf_refcnt_set(mbuf, 1); 385 mbuf->next = NULL; 386 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 387 mbuf->nb_segs = 1; 388 mbuf->port = rxq->port_id; 389 390 dma_addr = 391 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 392 393 rxd = &((volatile struct virtchnl2_singleq_rx_buf_desc *)(rxq->rx_ring))[i]; 394 rxd->pkt_addr = dma_addr; 395 rxd->hdr_addr = 0; 396 rxd->rsvd1 = 0; 397 rxd->rsvd2 = 0; 398 rxq->sw_ring[i] = mbuf; 399 } 400 401 return 0; 402 } 403 404 int 405 idpf_qc_split_rxq_mbufs_alloc(struct idpf_rx_queue *rxq) 406 { 407 volatile struct virtchnl2_splitq_rx_buf_desc *rxd; 408 struct rte_mbuf *mbuf = NULL; 409 uint64_t dma_addr; 410 uint16_t i; 411 412 for (i = 0; i < rxq->nb_rx_desc; i++) { 413 mbuf = rte_mbuf_raw_alloc(rxq->mp); 414 if (unlikely(mbuf == NULL)) { 415 DRV_LOG(ERR, "Failed to allocate mbuf for RX"); 416 return -ENOMEM; 417 } 418 419 rte_mbuf_refcnt_set(mbuf, 1); 420 mbuf->next = NULL; 421 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 422 mbuf->nb_segs = 1; 423 mbuf->port = rxq->port_id; 424 425 dma_addr = 426 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 427 428 rxd = &((volatile struct virtchnl2_splitq_rx_buf_desc *)(rxq->rx_ring))[i]; 429 rxd->qword0.buf_id = i; 430 rxd->qword0.rsvd0 = 0; 431 rxd->qword0.rsvd1 = 0; 432 rxd->pkt_addr = dma_addr; 433 rxd->hdr_addr = 0; 434 rxd->rsvd2 = 0; 435 436 rxq->sw_ring[i] = mbuf; 437 } 438 439 rxq->nb_rx_hold = 0; 440 rxq->rx_tail = rxq->nb_rx_desc - 1; 441 442 return 0; 443 } 444 445 #define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND 10000 446 /* Helper function to convert a 32b nanoseconds timestamp to 64b. */ 447 static inline uint64_t 448 idpf_tstamp_convert_32b_64b(struct idpf_adapter *ad, uint32_t flag, 449 uint32_t in_timestamp) 450 { 451 #ifdef RTE_ARCH_X86_64 452 struct idpf_hw *hw = &ad->hw; 453 const uint64_t mask = 0xFFFFFFFF; 454 uint32_t hi, lo, lo2, delta; 455 uint64_t ns; 456 457 if (flag != 0) { 458 IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); 459 IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M | 460 PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); 461 lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); 462 hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); 463 /* 464 * On typical system, the delta between lo and lo2 is ~1000ns, 465 * so 10000 seems a large-enough but not overly-big guard band. 466 */ 467 if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND)) 468 lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); 469 else 470 lo2 = lo; 471 472 if (lo2 < lo) { 473 lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); 474 hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); 475 } 476 477 ad->time_hw = ((uint64_t)hi << 32) | lo; 478 } 479 480 delta = (in_timestamp - (uint32_t)(ad->time_hw & mask)); 481 if (delta > (mask / 2)) { 482 delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp); 483 ns = ad->time_hw - delta; 484 } else { 485 ns = ad->time_hw + delta; 486 } 487 488 return ns; 489 #else /* !RTE_ARCH_X86_64 */ 490 RTE_SET_USED(ad); 491 RTE_SET_USED(flag); 492 RTE_SET_USED(in_timestamp); 493 return 0; 494 #endif /* RTE_ARCH_X86_64 */ 495 } 496 497 #define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S \ 498 (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S) | \ 499 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S) | \ 500 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S) | \ 501 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) 502 503 static inline uint64_t 504 idpf_splitq_rx_csum_offload(uint8_t err) 505 { 506 uint64_t flags = 0; 507 508 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S)) == 0)) 509 return flags; 510 511 if (likely((err & IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S) == 0)) { 512 flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | 513 RTE_MBUF_F_RX_L4_CKSUM_GOOD); 514 return flags; 515 } 516 517 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)) != 0)) 518 flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 519 else 520 flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 521 522 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)) != 0)) 523 flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 524 else 525 flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 526 527 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)) != 0)) 528 flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 529 530 if (unlikely((err & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)) != 0)) 531 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; 532 else 533 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; 534 535 return flags; 536 } 537 538 #define IDPF_RX_FLEX_DESC_ADV_HASH1_S 0 539 #define IDPF_RX_FLEX_DESC_ADV_HASH2_S 16 540 #define IDPF_RX_FLEX_DESC_ADV_HASH3_S 24 541 542 static inline uint64_t 543 idpf_splitq_rx_rss_offload(struct rte_mbuf *mb, 544 volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) 545 { 546 uint8_t status_err0_qw0; 547 uint64_t flags = 0; 548 549 status_err0_qw0 = rx_desc->status_err0_qw0; 550 551 if ((status_err0_qw0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_S)) != 0) { 552 flags |= RTE_MBUF_F_RX_RSS_HASH; 553 mb->hash.rss = (rte_le_to_cpu_16(rx_desc->hash1) << 554 IDPF_RX_FLEX_DESC_ADV_HASH1_S) | 555 ((uint32_t)(rx_desc->ff2_mirrid_hash2.hash2) << 556 IDPF_RX_FLEX_DESC_ADV_HASH2_S) | 557 ((uint32_t)(rx_desc->hash3) << 558 IDPF_RX_FLEX_DESC_ADV_HASH3_S); 559 } 560 561 return flags; 562 } 563 564 static void 565 idpf_split_rx_bufq_refill(struct idpf_rx_queue *rx_bufq) 566 { 567 volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_ring; 568 volatile struct virtchnl2_splitq_rx_buf_desc *rx_buf_desc; 569 uint16_t nb_refill = rx_bufq->rx_free_thresh; 570 uint16_t nb_desc = rx_bufq->nb_rx_desc; 571 uint16_t next_avail = rx_bufq->rx_tail; 572 struct rte_mbuf *nmb[rx_bufq->rx_free_thresh]; 573 uint64_t dma_addr; 574 uint16_t delta; 575 int i; 576 577 if (rx_bufq->nb_rx_hold < rx_bufq->rx_free_thresh) 578 return; 579 580 rx_buf_ring = rx_bufq->rx_ring; 581 delta = nb_desc - next_avail; 582 if (unlikely(delta < nb_refill)) { 583 if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, delta) == 0)) { 584 for (i = 0; i < delta; i++) { 585 rx_buf_desc = &rx_buf_ring[next_avail + i]; 586 rx_bufq->sw_ring[next_avail + i] = nmb[i]; 587 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); 588 rx_buf_desc->hdr_addr = 0; 589 rx_buf_desc->pkt_addr = dma_addr; 590 } 591 nb_refill -= delta; 592 next_avail = 0; 593 rx_bufq->nb_rx_hold -= delta; 594 } else { 595 rte_atomic_fetch_add_explicit(&rx_bufq->rx_stats.mbuf_alloc_failed, 596 nb_desc - next_avail, rte_memory_order_relaxed); 597 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", 598 rx_bufq->port_id, rx_bufq->queue_id); 599 return; 600 } 601 } 602 603 if (nb_desc - next_avail >= nb_refill) { 604 if (likely(rte_pktmbuf_alloc_bulk(rx_bufq->mp, nmb, nb_refill) == 0)) { 605 for (i = 0; i < nb_refill; i++) { 606 rx_buf_desc = &rx_buf_ring[next_avail + i]; 607 rx_bufq->sw_ring[next_avail + i] = nmb[i]; 608 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); 609 rx_buf_desc->hdr_addr = 0; 610 rx_buf_desc->pkt_addr = dma_addr; 611 } 612 next_avail += nb_refill; 613 rx_bufq->nb_rx_hold -= nb_refill; 614 } else { 615 rte_atomic_fetch_add_explicit(&rx_bufq->rx_stats.mbuf_alloc_failed, 616 nb_desc - next_avail, rte_memory_order_relaxed); 617 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", 618 rx_bufq->port_id, rx_bufq->queue_id); 619 } 620 } 621 622 IDPF_PCI_REG_WRITE(rx_bufq->qrx_tail, next_avail); 623 624 rx_bufq->rx_tail = next_avail; 625 } 626 627 uint16_t 628 idpf_dp_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 629 uint16_t nb_pkts) 630 { 631 volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc_ring; 632 volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; 633 uint16_t pktlen_gen_bufq_id; 634 struct idpf_rx_queue *rxq; 635 const uint32_t *ptype_tbl; 636 uint8_t status_err0_qw1; 637 struct idpf_adapter *ad; 638 struct rte_mbuf *rxm; 639 uint16_t rx_id_bufq1; 640 uint16_t rx_id_bufq2; 641 uint64_t pkt_flags; 642 uint16_t pkt_len; 643 uint16_t bufq_id; 644 uint16_t gen_id; 645 uint16_t rx_id; 646 uint16_t nb_rx; 647 uint64_t ts_ns; 648 649 nb_rx = 0; 650 rxq = rx_queue; 651 ad = rxq->adapter; 652 653 if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) 654 return nb_rx; 655 656 rx_id = rxq->rx_tail; 657 rx_id_bufq1 = rxq->bufq1->rx_next_avail; 658 rx_id_bufq2 = rxq->bufq2->rx_next_avail; 659 rx_desc_ring = rxq->rx_ring; 660 ptype_tbl = rxq->adapter->ptype_tbl; 661 662 if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) 663 rxq->hw_register_set = 1; 664 665 while (nb_rx < nb_pkts) { 666 rx_desc = &rx_desc_ring[rx_id]; 667 668 pktlen_gen_bufq_id = 669 rte_le_to_cpu_16(rx_desc->pktlen_gen_bufq_id); 670 gen_id = (pktlen_gen_bufq_id & 671 VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M) >> 672 VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S; 673 if (gen_id != rxq->expected_gen_id) 674 break; 675 676 pkt_len = (pktlen_gen_bufq_id & 677 VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M) >> 678 VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S; 679 if (pkt_len == 0) 680 RX_LOG(ERR, "Packet length is 0"); 681 682 rx_id++; 683 if (unlikely(rx_id == rxq->nb_rx_desc)) { 684 rx_id = 0; 685 rxq->expected_gen_id ^= 1; 686 } 687 688 bufq_id = (pktlen_gen_bufq_id & 689 VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M) >> 690 VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S; 691 if (bufq_id == 0) { 692 rxm = rxq->bufq1->sw_ring[rx_id_bufq1]; 693 rx_id_bufq1++; 694 if (unlikely(rx_id_bufq1 == rxq->bufq1->nb_rx_desc)) 695 rx_id_bufq1 = 0; 696 rxq->bufq1->nb_rx_hold++; 697 } else { 698 rxm = rxq->bufq2->sw_ring[rx_id_bufq2]; 699 rx_id_bufq2++; 700 if (unlikely(rx_id_bufq2 == rxq->bufq2->nb_rx_desc)) 701 rx_id_bufq2 = 0; 702 rxq->bufq2->nb_rx_hold++; 703 } 704 705 rxm->pkt_len = pkt_len; 706 rxm->data_len = pkt_len; 707 rxm->data_off = RTE_PKTMBUF_HEADROOM; 708 rxm->next = NULL; 709 rxm->nb_segs = 1; 710 rxm->port = rxq->port_id; 711 rxm->ol_flags = 0; 712 rxm->packet_type = 713 ptype_tbl[(rte_le_to_cpu_16(rx_desc->ptype_err_fflags0) & 714 VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M) >> 715 VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S]; 716 717 status_err0_qw1 = rx_desc->status_err0_qw1; 718 pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1); 719 pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc); 720 if (idpf_timestamp_dynflag > 0 && 721 (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP)) { 722 /* timestamp */ 723 ts_ns = idpf_tstamp_convert_32b_64b(ad, 724 rxq->hw_register_set, 725 rte_le_to_cpu_32(rx_desc->ts_high)); 726 rxq->hw_register_set = 0; 727 *RTE_MBUF_DYNFIELD(rxm, 728 idpf_timestamp_dynfield_offset, 729 rte_mbuf_timestamp_t *) = ts_ns; 730 rxm->ol_flags |= idpf_timestamp_dynflag; 731 } 732 733 rxm->ol_flags |= pkt_flags; 734 735 rx_pkts[nb_rx++] = rxm; 736 } 737 738 if (nb_rx > 0) { 739 rxq->rx_tail = rx_id; 740 if (rx_id_bufq1 != rxq->bufq1->rx_next_avail) 741 rxq->bufq1->rx_next_avail = rx_id_bufq1; 742 if (rx_id_bufq2 != rxq->bufq2->rx_next_avail) 743 rxq->bufq2->rx_next_avail = rx_id_bufq2; 744 745 idpf_split_rx_bufq_refill(rxq->bufq1); 746 idpf_split_rx_bufq_refill(rxq->bufq2); 747 } 748 749 return nb_rx; 750 } 751 752 static inline void 753 idpf_split_tx_free(struct idpf_tx_queue *cq) 754 { 755 volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring; 756 volatile struct idpf_splitq_tx_compl_desc *txd; 757 uint16_t next = cq->tx_tail; 758 struct idpf_tx_entry *txe; 759 struct idpf_tx_queue *txq; 760 uint16_t gen, qid, q_head; 761 uint16_t nb_desc_clean; 762 uint8_t ctype; 763 764 txd = &compl_ring[next]; 765 gen = (rte_le_to_cpu_16(txd->qid_comptype_gen) & 766 IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S; 767 if (gen != cq->expected_gen_id) 768 return; 769 770 ctype = (rte_le_to_cpu_16(txd->qid_comptype_gen) & 771 IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> IDPF_TXD_COMPLQ_COMPL_TYPE_S; 772 qid = (rte_le_to_cpu_16(txd->qid_comptype_gen) & 773 IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S; 774 q_head = rte_le_to_cpu_16(txd->q_head_compl_tag.compl_tag); 775 txq = cq->txqs[qid - cq->tx_start_qid]; 776 777 switch (ctype) { 778 case IDPF_TXD_COMPLT_RE: 779 /* clean to q_head which indicates be fetched txq desc id + 1. 780 * TODO: need to refine and remove the if condition. 781 */ 782 if (unlikely(q_head % 32)) { 783 TX_LOG(ERR, "unexpected desc (head = %u) completion.", 784 q_head); 785 return; 786 } 787 if (txq->last_desc_cleaned > q_head) 788 nb_desc_clean = (txq->nb_tx_desc - txq->last_desc_cleaned) + 789 q_head; 790 else 791 nb_desc_clean = q_head - txq->last_desc_cleaned; 792 txq->nb_free += nb_desc_clean; 793 txq->last_desc_cleaned = q_head; 794 break; 795 case IDPF_TXD_COMPLT_RS: 796 /* q_head indicates sw_id when ctype is 2 */ 797 txe = &txq->sw_ring[q_head]; 798 if (txe->mbuf != NULL) { 799 rte_pktmbuf_free_seg(txe->mbuf); 800 txe->mbuf = NULL; 801 } 802 break; 803 default: 804 TX_LOG(ERR, "unknown completion type."); 805 return; 806 } 807 808 if (++next == cq->nb_tx_desc) { 809 next = 0; 810 cq->expected_gen_id ^= 1; 811 } 812 813 cq->tx_tail = next; 814 } 815 816 /* Check if the context descriptor is needed for TX offloading */ 817 static inline uint16_t 818 idpf_calc_context_desc(uint64_t flags) 819 { 820 if ((flags & RTE_MBUF_F_TX_TCP_SEG) != 0) 821 return 1; 822 823 return 0; 824 } 825 826 /* set TSO context descriptor 827 */ 828 static inline void 829 idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf, 830 union idpf_tx_offload tx_offload, 831 volatile union idpf_flex_tx_ctx_desc *ctx_desc) 832 { 833 uint16_t cmd_dtype; 834 uint32_t tso_len; 835 uint8_t hdr_len; 836 837 if (tx_offload.l4_len == 0) { 838 TX_LOG(DEBUG, "L4 length set to 0"); 839 return; 840 } 841 842 hdr_len = tx_offload.l2_len + 843 tx_offload.l3_len + 844 tx_offload.l4_len; 845 cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | 846 IDPF_TX_FLEX_CTX_DESC_CMD_TSO; 847 tso_len = mbuf->pkt_len - hdr_len; 848 849 ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype); 850 ctx_desc->tso.qw0.hdr_len = hdr_len; 851 ctx_desc->tso.qw0.mss_rt = 852 rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz & 853 IDPF_TXD_FLEX_CTX_MSS_RT_M); 854 ctx_desc->tso.qw0.flex_tlen = 855 rte_cpu_to_le_32(tso_len & 856 IDPF_TXD_FLEX_CTX_MSS_RT_M); 857 } 858 859 uint16_t 860 idpf_dp_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 861 uint16_t nb_pkts) 862 { 863 struct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue; 864 volatile struct idpf_flex_tx_sched_desc *txr; 865 volatile struct idpf_flex_tx_sched_desc *txd; 866 struct idpf_tx_entry *sw_ring; 867 union idpf_tx_offload tx_offload = {0}; 868 struct idpf_tx_entry *txe, *txn; 869 uint16_t nb_used, tx_id, sw_id; 870 struct rte_mbuf *tx_pkt; 871 uint16_t nb_to_clean; 872 uint16_t nb_tx = 0; 873 uint64_t ol_flags; 874 uint8_t cmd_dtype; 875 uint16_t nb_ctx; 876 877 if (unlikely(txq == NULL) || unlikely(!txq->q_started)) 878 return nb_tx; 879 880 txr = txq->desc_ring; 881 sw_ring = txq->sw_ring; 882 tx_id = txq->tx_tail; 883 sw_id = txq->sw_tail; 884 txe = &sw_ring[sw_id]; 885 886 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 887 tx_pkt = tx_pkts[nb_tx]; 888 889 if (txq->nb_free <= txq->free_thresh) { 890 /* TODO: Need to refine 891 * 1. free and clean: Better to decide a clean destination instead of 892 * loop times. And don't free mbuf when RS got immediately, free when 893 * transmit or according to the clean destination. 894 * Now, just ignore the RE write back, free mbuf when get RS 895 * 2. out-of-order rewrite back haven't be supported, SW head and HW head 896 * need to be separated. 897 **/ 898 nb_to_clean = 2 * txq->rs_thresh; 899 while (nb_to_clean--) 900 idpf_split_tx_free(txq->complq); 901 } 902 903 if (txq->nb_free < tx_pkt->nb_segs) 904 break; 905 906 cmd_dtype = 0; 907 ol_flags = tx_pkt->ol_flags; 908 tx_offload.l2_len = tx_pkt->l2_len; 909 tx_offload.l3_len = tx_pkt->l3_len; 910 tx_offload.l4_len = tx_pkt->l4_len; 911 tx_offload.tso_segsz = tx_pkt->tso_segsz; 912 /* Calculate the number of context descriptors needed. */ 913 nb_ctx = idpf_calc_context_desc(ol_flags); 914 nb_used = tx_pkt->nb_segs + nb_ctx; 915 916 if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) 917 cmd_dtype = IDPF_TXD_FLEX_FLOW_CMD_CS_EN; 918 919 /* context descriptor */ 920 if (nb_ctx != 0) { 921 volatile union idpf_flex_tx_ctx_desc *ctx_desc = 922 (volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id]; 923 924 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) 925 idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, 926 ctx_desc); 927 928 tx_id++; 929 if (tx_id == txq->nb_tx_desc) 930 tx_id = 0; 931 } 932 933 do { 934 txd = &txr[tx_id]; 935 txn = &sw_ring[txe->next_id]; 936 txe->mbuf = tx_pkt; 937 938 /* Setup TX descriptor */ 939 txd->buf_addr = 940 rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt)); 941 cmd_dtype |= IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; 942 txd->qw1.cmd_dtype = cmd_dtype; 943 txd->qw1.rxr_bufsize = tx_pkt->data_len; 944 txd->qw1.compl_tag = sw_id; 945 tx_id++; 946 if (tx_id == txq->nb_tx_desc) 947 tx_id = 0; 948 sw_id = txe->next_id; 949 txe = txn; 950 tx_pkt = tx_pkt->next; 951 } while (tx_pkt); 952 953 /* fill the last descriptor with End of Packet (EOP) bit */ 954 txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_EOP; 955 956 txq->nb_free = (uint16_t)(txq->nb_free - nb_used); 957 txq->nb_used = (uint16_t)(txq->nb_used + nb_used); 958 959 if (txq->nb_used >= 32) { 960 txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE; 961 /* Update txq RE bit counters */ 962 txq->nb_used = 0; 963 } 964 } 965 966 /* update the tail pointer if any packets were processed */ 967 if (likely(nb_tx > 0)) { 968 IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); 969 txq->tx_tail = tx_id; 970 txq->sw_tail = sw_id; 971 } 972 973 return nb_tx; 974 } 975 976 #define IDPF_RX_FLEX_DESC_STATUS0_XSUM_S \ 977 (RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | \ 978 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | \ 979 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \ 980 RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) 981 982 /* Translate the rx descriptor status and error fields to pkt flags */ 983 static inline uint64_t 984 idpf_rxd_to_pkt_flags(uint16_t status_error) 985 { 986 uint64_t flags = 0; 987 988 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_S)) == 0)) 989 return flags; 990 991 if (likely((status_error & IDPF_RX_FLEX_DESC_STATUS0_XSUM_S) == 0)) { 992 flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | 993 RTE_MBUF_F_RX_L4_CKSUM_GOOD); 994 return flags; 995 } 996 997 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)) != 0)) 998 flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 999 else 1000 flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 1001 1002 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) != 0)) 1003 flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 1004 else 1005 flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 1006 1007 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)) != 0)) 1008 flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 1009 1010 if (unlikely((status_error & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)) != 0)) 1011 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; 1012 else 1013 flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD; 1014 1015 return flags; 1016 } 1017 1018 static inline void 1019 idpf_update_rx_tail(struct idpf_rx_queue *rxq, uint16_t nb_hold, 1020 uint16_t rx_id) 1021 { 1022 nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); 1023 1024 if (nb_hold > rxq->rx_free_thresh) { 1025 RX_LOG(DEBUG, 1026 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u", 1027 rxq->port_id, rxq->queue_id, rx_id, nb_hold); 1028 rx_id = (uint16_t)((rx_id == 0) ? 1029 (rxq->nb_rx_desc - 1) : (rx_id - 1)); 1030 IDPF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); 1031 nb_hold = 0; 1032 } 1033 rxq->nb_rx_hold = nb_hold; 1034 } 1035 1036 static inline void 1037 idpf_singleq_rx_rss_offload(struct rte_mbuf *mb, 1038 volatile struct virtchnl2_rx_flex_desc_nic *rx_desc, 1039 uint64_t *pkt_flags) 1040 { 1041 uint16_t rx_status0 = rte_le_to_cpu_16(rx_desc->status_error0); 1042 1043 if (rx_status0 & RTE_BIT32(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_S)) { 1044 *pkt_flags |= RTE_MBUF_F_RX_RSS_HASH; 1045 mb->hash.rss = rte_le_to_cpu_32(rx_desc->rss_hash); 1046 } 1047 1048 } 1049 1050 uint16_t 1051 idpf_dp_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 1052 uint16_t nb_pkts) 1053 { 1054 volatile union virtchnl2_rx_desc *rx_ring; 1055 volatile union virtchnl2_rx_desc *rxdp; 1056 union virtchnl2_rx_desc rxd; 1057 struct idpf_rx_queue *rxq; 1058 const uint32_t *ptype_tbl; 1059 uint16_t rx_id, nb_hold; 1060 struct idpf_adapter *ad; 1061 uint16_t rx_packet_len; 1062 struct rte_mbuf *rxm; 1063 struct rte_mbuf *nmb; 1064 uint16_t rx_status0; 1065 uint64_t pkt_flags; 1066 uint64_t dma_addr; 1067 uint64_t ts_ns; 1068 uint16_t nb_rx; 1069 1070 nb_rx = 0; 1071 nb_hold = 0; 1072 rxq = rx_queue; 1073 1074 ad = rxq->adapter; 1075 1076 if (unlikely(rxq == NULL) || unlikely(!rxq->q_started)) 1077 return nb_rx; 1078 1079 rx_id = rxq->rx_tail; 1080 rx_ring = rxq->rx_ring; 1081 ptype_tbl = rxq->adapter->ptype_tbl; 1082 1083 if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) 1084 rxq->hw_register_set = 1; 1085 1086 while (nb_rx < nb_pkts) { 1087 rxdp = &rx_ring[rx_id]; 1088 rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0); 1089 1090 /* Check the DD bit first */ 1091 if ((rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S)) == 0) 1092 break; 1093 1094 nmb = rte_mbuf_raw_alloc(rxq->mp); 1095 if (unlikely(nmb == NULL)) { 1096 rte_atomic_fetch_add_explicit(&rxq->rx_stats.mbuf_alloc_failed, 1, 1097 rte_memory_order_relaxed); 1098 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " 1099 "queue_id=%u", rxq->port_id, rxq->queue_id); 1100 break; 1101 } 1102 rxd = *rxdp; /* copy descriptor in ring to temp variable*/ 1103 1104 nb_hold++; 1105 rxm = rxq->sw_ring[rx_id]; 1106 rxq->sw_ring[rx_id] = nmb; 1107 rx_id++; 1108 if (unlikely(rx_id == rxq->nb_rx_desc)) 1109 rx_id = 0; 1110 1111 /* Prefetch next mbuf */ 1112 rte_prefetch0(rxq->sw_ring[rx_id]); 1113 1114 /* When next RX descriptor is on a cache line boundary, 1115 * prefetch the next 4 RX descriptors and next 8 pointers 1116 * to mbufs. 1117 */ 1118 if ((rx_id & 0x3) == 0) { 1119 rte_prefetch0(&rx_ring[rx_id]); 1120 rte_prefetch0(rxq->sw_ring[rx_id]); 1121 } 1122 dma_addr = 1123 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); 1124 rxdp->read.hdr_addr = 0; 1125 rxdp->read.pkt_addr = dma_addr; 1126 1127 rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) & 1128 VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M); 1129 1130 rxm->data_off = RTE_PKTMBUF_HEADROOM; 1131 rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM)); 1132 rxm->nb_segs = 1; 1133 rxm->next = NULL; 1134 rxm->pkt_len = rx_packet_len; 1135 rxm->data_len = rx_packet_len; 1136 rxm->port = rxq->port_id; 1137 rxm->ol_flags = 0; 1138 pkt_flags = idpf_rxd_to_pkt_flags(rx_status0); 1139 idpf_singleq_rx_rss_offload(rxm, &rxd.flex_nic_wb, &pkt_flags); 1140 rxm->packet_type = 1141 ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) & 1142 VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)]; 1143 1144 rxm->ol_flags |= pkt_flags; 1145 1146 if (idpf_timestamp_dynflag > 0 && 1147 (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { 1148 /* timestamp */ 1149 ts_ns = idpf_tstamp_convert_32b_64b(ad, 1150 rxq->hw_register_set, 1151 rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high)); 1152 rxq->hw_register_set = 0; 1153 *RTE_MBUF_DYNFIELD(rxm, 1154 idpf_timestamp_dynfield_offset, 1155 rte_mbuf_timestamp_t *) = ts_ns; 1156 rxm->ol_flags |= idpf_timestamp_dynflag; 1157 } 1158 1159 rx_pkts[nb_rx++] = rxm; 1160 } 1161 rxq->rx_tail = rx_id; 1162 1163 idpf_update_rx_tail(rxq, nb_hold, rx_id); 1164 1165 return nb_rx; 1166 } 1167 1168 uint16_t 1169 idpf_dp_singleq_recv_scatter_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 1170 uint16_t nb_pkts) 1171 { 1172 struct idpf_rx_queue *rxq = rx_queue; 1173 volatile union virtchnl2_rx_desc *rx_ring = rxq->rx_ring; 1174 volatile union virtchnl2_rx_desc *rxdp; 1175 union virtchnl2_rx_desc rxd; 1176 struct idpf_adapter *ad; 1177 struct rte_mbuf *first_seg = rxq->pkt_first_seg; 1178 struct rte_mbuf *last_seg = rxq->pkt_last_seg; 1179 struct rte_mbuf *rxm; 1180 struct rte_mbuf *nmb; 1181 struct rte_eth_dev *dev; 1182 const uint32_t *ptype_tbl = rxq->adapter->ptype_tbl; 1183 uint16_t rx_id = rxq->rx_tail; 1184 uint16_t rx_packet_len; 1185 uint16_t nb_hold = 0; 1186 uint16_t rx_status0; 1187 uint16_t nb_rx = 0; 1188 uint64_t pkt_flags; 1189 uint64_t dma_addr; 1190 uint64_t ts_ns; 1191 1192 ad = rxq->adapter; 1193 1194 if (unlikely(!rxq) || unlikely(!rxq->q_started)) 1195 return nb_rx; 1196 1197 while (nb_rx < nb_pkts) { 1198 rxdp = &rx_ring[rx_id]; 1199 rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0); 1200 1201 /* Check the DD bit first */ 1202 if (!(rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_S))) 1203 break; 1204 1205 nmb = rte_mbuf_raw_alloc(rxq->mp); 1206 if (unlikely(!nmb)) { 1207 rte_atomic_fetch_add_explicit(&rxq->rx_stats.mbuf_alloc_failed, 1, 1208 rte_memory_order_relaxed); 1209 RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " 1210 "queue_id=%u", rxq->port_id, rxq->queue_id); 1211 break; 1212 } 1213 1214 rxd = *rxdp; 1215 1216 nb_hold++; 1217 rxm = rxq->sw_ring[rx_id]; 1218 rxq->sw_ring[rx_id] = nmb; 1219 rx_id++; 1220 if (unlikely(rx_id == rxq->nb_rx_desc)) 1221 rx_id = 0; 1222 1223 /* Prefetch next mbuf */ 1224 rte_prefetch0(rxq->sw_ring[rx_id]); 1225 1226 /* When next RX descriptor is on a cache line boundary, 1227 * prefetch the next 4 RX descriptors and next 8 pointers 1228 * to mbufs. 1229 */ 1230 if ((rx_id & 0x3) == 0) { 1231 rte_prefetch0(&rx_ring[rx_id]); 1232 rte_prefetch0(rxq->sw_ring[rx_id]); 1233 } 1234 dma_addr = 1235 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); 1236 rxdp->read.hdr_addr = 0; 1237 rxdp->read.pkt_addr = dma_addr; 1238 rx_packet_len = (rte_cpu_to_le_16(rxd.flex_nic_wb.pkt_len) & 1239 VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M); 1240 rxm->data_len = rx_packet_len; 1241 rxm->data_off = RTE_PKTMBUF_HEADROOM; 1242 1243 /** 1244 * If this is the first buffer of the received packet, set the 1245 * pointer to the first mbuf of the packet and initialize its 1246 * context. Otherwise, update the total length and the number 1247 * of segments of the current scattered packet, and update the 1248 * pointer to the last mbuf of the current packet. 1249 */ 1250 if (!first_seg) { 1251 first_seg = rxm; 1252 first_seg->nb_segs = 1; 1253 first_seg->pkt_len = rx_packet_len; 1254 } else { 1255 first_seg->pkt_len = 1256 (uint16_t)(first_seg->pkt_len + 1257 rx_packet_len); 1258 first_seg->nb_segs++; 1259 last_seg->next = rxm; 1260 } 1261 1262 if (!(rx_status0 & (1 << VIRTCHNL2_RX_FLEX_DESC_STATUS0_EOF_S))) { 1263 last_seg = rxm; 1264 continue; 1265 } 1266 1267 rxm->next = NULL; 1268 1269 first_seg->port = rxq->port_id; 1270 first_seg->ol_flags = 0; 1271 pkt_flags = idpf_rxd_to_pkt_flags(rx_status0); 1272 idpf_singleq_rx_rss_offload(first_seg, &rxd.flex_nic_wb, &pkt_flags); 1273 first_seg->packet_type = 1274 ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) & 1275 VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)]; 1276 1277 if (idpf_timestamp_dynflag > 0 && 1278 (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) { 1279 /* timestamp */ 1280 ts_ns = idpf_tstamp_convert_32b_64b(ad, 1281 rxq->hw_register_set, 1282 rte_le_to_cpu_32(rxd.flex_nic_wb.flex_ts.ts_high)); 1283 rxq->hw_register_set = 0; 1284 *RTE_MBUF_DYNFIELD(rxm, 1285 idpf_timestamp_dynfield_offset, 1286 rte_mbuf_timestamp_t *) = ts_ns; 1287 first_seg->ol_flags |= idpf_timestamp_dynflag; 1288 } 1289 1290 first_seg->ol_flags |= pkt_flags; 1291 rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr, 1292 first_seg->data_off)); 1293 rx_pkts[nb_rx++] = first_seg; 1294 first_seg = NULL; 1295 } 1296 rxq->rx_tail = rx_id; 1297 rxq->pkt_first_seg = first_seg; 1298 rxq->pkt_last_seg = last_seg; 1299 1300 idpf_update_rx_tail(rxq, nb_hold, rx_id); 1301 1302 return nb_rx; 1303 } 1304 1305 static inline int 1306 idpf_xmit_cleanup(struct idpf_tx_queue *txq) 1307 { 1308 uint16_t last_desc_cleaned = txq->last_desc_cleaned; 1309 struct idpf_tx_entry *sw_ring = txq->sw_ring; 1310 uint16_t nb_tx_desc = txq->nb_tx_desc; 1311 uint16_t desc_to_clean_to; 1312 uint16_t nb_tx_to_clean; 1313 uint16_t i; 1314 1315 volatile struct idpf_base_tx_desc *txd = txq->tx_ring; 1316 1317 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh); 1318 if (desc_to_clean_to >= nb_tx_desc) 1319 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc); 1320 1321 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id; 1322 if ((txd[desc_to_clean_to].qw1 & 1323 rte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) != 1324 rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE)) { 1325 TX_LOG(DEBUG, "TX descriptor %4u is not done " 1326 "(port=%d queue=%d)", desc_to_clean_to, 1327 txq->port_id, txq->queue_id); 1328 return -1; 1329 } 1330 1331 if (last_desc_cleaned > desc_to_clean_to) 1332 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) + 1333 desc_to_clean_to); 1334 else 1335 nb_tx_to_clean = (uint16_t)(desc_to_clean_to - 1336 last_desc_cleaned); 1337 1338 txd[desc_to_clean_to].qw1 = 0; 1339 1340 txq->last_desc_cleaned = desc_to_clean_to; 1341 txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean); 1342 1343 return 0; 1344 } 1345 1346 /* TX function */ 1347 uint16_t 1348 idpf_dp_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 1349 uint16_t nb_pkts) 1350 { 1351 volatile struct idpf_base_tx_desc *txd; 1352 volatile struct idpf_base_tx_desc *txr; 1353 union idpf_tx_offload tx_offload = {0}; 1354 struct idpf_tx_entry *txe, *txn; 1355 struct idpf_tx_entry *sw_ring; 1356 struct idpf_tx_queue *txq; 1357 struct rte_mbuf *tx_pkt; 1358 struct rte_mbuf *m_seg; 1359 uint64_t buf_dma_addr; 1360 uint32_t td_offset; 1361 uint64_t ol_flags; 1362 uint16_t tx_last; 1363 uint16_t nb_used; 1364 uint16_t nb_ctx; 1365 uint16_t td_cmd; 1366 uint16_t tx_id; 1367 uint16_t nb_tx; 1368 uint16_t slen; 1369 1370 nb_tx = 0; 1371 txq = tx_queue; 1372 1373 if (unlikely(txq == NULL) || unlikely(!txq->q_started)) 1374 return nb_tx; 1375 1376 sw_ring = txq->sw_ring; 1377 txr = txq->tx_ring; 1378 tx_id = txq->tx_tail; 1379 txe = &sw_ring[tx_id]; 1380 1381 /* Check if the descriptor ring needs to be cleaned. */ 1382 if (txq->nb_free < txq->free_thresh) 1383 (void)idpf_xmit_cleanup(txq); 1384 1385 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1386 td_cmd = 0; 1387 td_offset = 0; 1388 1389 tx_pkt = *tx_pkts++; 1390 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf); 1391 1392 ol_flags = tx_pkt->ol_flags; 1393 tx_offload.l2_len = tx_pkt->l2_len; 1394 tx_offload.l3_len = tx_pkt->l3_len; 1395 tx_offload.l4_len = tx_pkt->l4_len; 1396 tx_offload.tso_segsz = tx_pkt->tso_segsz; 1397 /* Calculate the number of context descriptors needed. */ 1398 nb_ctx = idpf_calc_context_desc(ol_flags); 1399 1400 /* The number of descriptors that must be allocated for 1401 * a packet equals to the number of the segments of that 1402 * packet plus 1 context descriptor if needed. 1403 */ 1404 nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); 1405 tx_last = (uint16_t)(tx_id + nb_used - 1); 1406 1407 /* Circular ring */ 1408 if (tx_last >= txq->nb_tx_desc) 1409 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc); 1410 1411 TX_LOG(DEBUG, "port_id=%u queue_id=%u" 1412 " tx_first=%u tx_last=%u", 1413 txq->port_id, txq->queue_id, tx_id, tx_last); 1414 1415 if (nb_used > txq->nb_free) { 1416 if (idpf_xmit_cleanup(txq) != 0) { 1417 if (nb_tx == 0) 1418 return 0; 1419 goto end_of_tx; 1420 } 1421 if (unlikely(nb_used > txq->rs_thresh)) { 1422 while (nb_used > txq->nb_free) { 1423 if (idpf_xmit_cleanup(txq) != 0) { 1424 if (nb_tx == 0) 1425 return 0; 1426 goto end_of_tx; 1427 } 1428 } 1429 } 1430 } 1431 1432 if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK) 1433 td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; 1434 1435 if (nb_ctx != 0) { 1436 /* Setup TX context descriptor if required */ 1437 volatile union idpf_flex_tx_ctx_desc *ctx_txd = 1438 (volatile union idpf_flex_tx_ctx_desc *) 1439 &txr[tx_id]; 1440 1441 txn = &sw_ring[txe->next_id]; 1442 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); 1443 if (txe->mbuf != NULL) { 1444 rte_pktmbuf_free_seg(txe->mbuf); 1445 txe->mbuf = NULL; 1446 } 1447 1448 /* TSO enabled */ 1449 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) 1450 idpf_set_splitq_tso_ctx(tx_pkt, tx_offload, 1451 ctx_txd); 1452 1453 txe->last_id = tx_last; 1454 tx_id = txe->next_id; 1455 txe = txn; 1456 } 1457 1458 m_seg = tx_pkt; 1459 do { 1460 txd = &txr[tx_id]; 1461 txn = &sw_ring[txe->next_id]; 1462 1463 if (txe->mbuf != NULL) 1464 rte_pktmbuf_free_seg(txe->mbuf); 1465 txe->mbuf = m_seg; 1466 1467 /* Setup TX Descriptor */ 1468 slen = m_seg->data_len; 1469 buf_dma_addr = rte_mbuf_data_iova(m_seg); 1470 txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr); 1471 txd->qw1 = rte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DATA | 1472 ((uint64_t)td_cmd << IDPF_TXD_QW1_CMD_S) | 1473 ((uint64_t)td_offset << IDPF_TXD_QW1_OFFSET_S) | 1474 ((uint64_t)slen << IDPF_TXD_QW1_TX_BUF_SZ_S)); 1475 1476 txe->last_id = tx_last; 1477 tx_id = txe->next_id; 1478 txe = txn; 1479 m_seg = m_seg->next; 1480 } while (m_seg); 1481 1482 /* The last packet data descriptor needs End Of Packet (EOP) */ 1483 td_cmd |= IDPF_TX_DESC_CMD_EOP; 1484 txq->nb_used = (uint16_t)(txq->nb_used + nb_used); 1485 txq->nb_free = (uint16_t)(txq->nb_free - nb_used); 1486 1487 if (txq->nb_used >= txq->rs_thresh) { 1488 TX_LOG(DEBUG, "Setting RS bit on TXD id=" 1489 "%4u (port=%d queue=%d)", 1490 tx_last, txq->port_id, txq->queue_id); 1491 1492 td_cmd |= IDPF_TX_DESC_CMD_RS; 1493 1494 /* Update txq RS bit counters */ 1495 txq->nb_used = 0; 1496 } 1497 1498 txd->qw1 |= rte_cpu_to_le_16(td_cmd << IDPF_TXD_QW1_CMD_S); 1499 } 1500 1501 end_of_tx: 1502 rte_wmb(); 1503 1504 TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", 1505 txq->port_id, txq->queue_id, tx_id, nb_tx); 1506 1507 IDPF_PCI_REG_WRITE(txq->qtx_tail, tx_id); 1508 txq->tx_tail = tx_id; 1509 1510 return nb_tx; 1511 } 1512 1513 /* TX prep functions */ 1514 uint16_t 1515 idpf_dp_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, 1516 uint16_t nb_pkts) 1517 { 1518 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1519 int ret; 1520 #endif 1521 int i; 1522 uint64_t ol_flags; 1523 struct rte_mbuf *m; 1524 1525 for (i = 0; i < nb_pkts; i++) { 1526 m = tx_pkts[i]; 1527 ol_flags = m->ol_flags; 1528 1529 /* Check condition for nb_segs > IDPF_TX_MAX_MTU_SEG. */ 1530 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) { 1531 if (m->nb_segs > IDPF_TX_MAX_MTU_SEG) { 1532 rte_errno = EINVAL; 1533 return i; 1534 } 1535 } else if ((m->tso_segsz < IDPF_MIN_TSO_MSS) || 1536 (m->tso_segsz > IDPF_MAX_TSO_MSS) || 1537 (m->pkt_len > IDPF_MAX_TSO_FRAME_SIZE)) { 1538 /* MSS outside the range are considered malicious */ 1539 rte_errno = EINVAL; 1540 return i; 1541 } 1542 1543 if ((ol_flags & IDPF_TX_OFFLOAD_NOTSUP_MASK) != 0) { 1544 rte_errno = ENOTSUP; 1545 return i; 1546 } 1547 1548 if (m->pkt_len < IDPF_MIN_FRAME_SIZE) { 1549 rte_errno = EINVAL; 1550 return i; 1551 } 1552 1553 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 1554 ret = rte_validate_tx_offload(m); 1555 if (ret != 0) { 1556 rte_errno = -ret; 1557 return i; 1558 } 1559 #endif 1560 } 1561 1562 return i; 1563 } 1564 1565 static void __rte_cold 1566 release_rxq_mbufs_vec(struct idpf_rx_queue *rxq) 1567 { 1568 const uint16_t mask = rxq->nb_rx_desc - 1; 1569 uint16_t i; 1570 1571 if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc) 1572 return; 1573 1574 /* free all mbufs that are valid in the ring */ 1575 if (rxq->rxrearm_nb == 0) { 1576 for (i = 0; i < rxq->nb_rx_desc; i++) { 1577 if (rxq->sw_ring[i] != NULL) 1578 rte_pktmbuf_free_seg(rxq->sw_ring[i]); 1579 } 1580 } else { 1581 for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask) { 1582 if (rxq->sw_ring[i] != NULL) 1583 rte_pktmbuf_free_seg(rxq->sw_ring[i]); 1584 } 1585 } 1586 1587 rxq->rxrearm_nb = rxq->nb_rx_desc; 1588 1589 /* set all entries to NULL */ 1590 memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc); 1591 } 1592 1593 static const struct idpf_rxq_ops def_rx_ops_vec = { 1594 .release_mbufs = release_rxq_mbufs_vec, 1595 }; 1596 1597 static inline int 1598 idpf_rxq_vec_setup_default(struct idpf_rx_queue *rxq) 1599 { 1600 uintptr_t p; 1601 struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ 1602 1603 mb_def.nb_segs = 1; 1604 mb_def.data_off = RTE_PKTMBUF_HEADROOM; 1605 mb_def.port = rxq->port_id; 1606 rte_mbuf_refcnt_set(&mb_def, 1); 1607 1608 /* prevent compiler reordering: rearm_data covers previous fields */ 1609 rte_compiler_barrier(); 1610 p = (uintptr_t)&mb_def.rearm_data; 1611 rxq->mbuf_initializer = *(uint64_t *)p; 1612 return 0; 1613 } 1614 1615 int __rte_cold 1616 idpf_qc_singleq_rx_vec_setup(struct idpf_rx_queue *rxq) 1617 { 1618 rxq->ops = &def_rx_ops_vec; 1619 return idpf_rxq_vec_setup_default(rxq); 1620 } 1621 1622 int __rte_cold 1623 idpf_qc_splitq_rx_vec_setup(struct idpf_rx_queue *rxq) 1624 { 1625 rxq->bufq2->ops = &def_rx_ops_vec; 1626 return idpf_rxq_vec_setup_default(rxq->bufq2); 1627 } 1628