1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2015 Intel Corporation 3 */ 4 5 #include <sys/queue.h> 6 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <errno.h> 11 #include <stdint.h> 12 #include <stdarg.h> 13 #include <unistd.h> 14 #include <inttypes.h> 15 16 #include <rte_byteorder.h> 17 #include <rte_common.h> 18 #include <rte_cycles.h> 19 #include <rte_log.h> 20 #include <rte_debug.h> 21 #include <rte_interrupts.h> 22 #include <rte_pci.h> 23 #include <rte_memory.h> 24 #include <rte_memzone.h> 25 #include <rte_launch.h> 26 #include <rte_eal.h> 27 #include <rte_per_lcore.h> 28 #include <rte_lcore.h> 29 #include <rte_atomic.h> 30 #include <rte_branch_prediction.h> 31 #include <rte_mempool.h> 32 #include <rte_malloc.h> 33 #include <rte_mbuf.h> 34 #include <rte_ether.h> 35 #include <ethdev_driver.h> 36 #include <rte_prefetch.h> 37 #include <rte_ip.h> 38 #include <rte_udp.h> 39 #include <rte_tcp.h> 40 #include <rte_sctp.h> 41 #include <rte_string_fns.h> 42 #include <rte_errno.h> 43 #include <rte_net.h> 44 45 #include "base/vmxnet3_defs.h" 46 #include "vmxnet3_ring.h" 47 48 #include "vmxnet3_logs.h" 49 #include "vmxnet3_ethdev.h" 50 51 #define VMXNET3_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_VLAN | \ 52 RTE_MBUF_F_TX_IPV6 | \ 53 RTE_MBUF_F_TX_IPV4 | \ 54 RTE_MBUF_F_TX_L4_MASK | \ 55 RTE_MBUF_F_TX_TCP_SEG) 56 57 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \ 58 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK) 59 60 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); 61 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *); 62 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 63 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *); 64 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *); 65 #endif 66 67 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 68 static void 69 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq) 70 { 71 uint32_t avail = 0; 72 73 if (rxq == NULL) 74 return; 75 76 PMD_RX_LOG(DEBUG, 77 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.", 78 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base); 79 PMD_RX_LOG(DEBUG, 80 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.", 81 (unsigned long)rxq->cmd_ring[0].basePA, 82 (unsigned long)rxq->cmd_ring[1].basePA, 83 (unsigned long)rxq->comp_ring.basePA); 84 85 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]); 86 PMD_RX_LOG(DEBUG, 87 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u", 88 (uint32_t)rxq->cmd_ring[0].size, avail, 89 rxq->comp_ring.next2proc, 90 rxq->cmd_ring[0].size - avail); 91 92 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]); 93 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u", 94 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc, 95 rxq->cmd_ring[1].size - avail); 96 97 } 98 99 static void 100 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq) 101 { 102 uint32_t avail = 0; 103 104 if (txq == NULL) 105 return; 106 107 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.", 108 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base); 109 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.", 110 (unsigned long)txq->cmd_ring.basePA, 111 (unsigned long)txq->comp_ring.basePA, 112 (unsigned long)txq->data_ring.basePA); 113 114 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 115 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u", 116 (uint32_t)txq->cmd_ring.size, avail, 117 txq->comp_ring.next2proc, txq->cmd_ring.size - avail); 118 } 119 #endif 120 121 static void 122 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) 123 { 124 while (ring->next2comp != ring->next2fill) { 125 /* No need to worry about desc ownership, device is quiesced by now. */ 126 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp; 127 128 if (buf_info->m) { 129 rte_pktmbuf_free(buf_info->m); 130 buf_info->m = NULL; 131 buf_info->bufPA = 0; 132 buf_info->len = 0; 133 } 134 vmxnet3_cmd_ring_adv_next2comp(ring); 135 } 136 } 137 138 static void 139 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) 140 { 141 uint32_t i; 142 143 for (i = 0; i < ring->size; i++) { 144 /* No need to worry about desc ownership, device is quiesced by now. */ 145 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i]; 146 147 if (buf_info->m) { 148 rte_pktmbuf_free_seg(buf_info->m); 149 buf_info->m = NULL; 150 buf_info->bufPA = 0; 151 buf_info->len = 0; 152 } 153 vmxnet3_cmd_ring_adv_next2comp(ring); 154 } 155 } 156 157 static void 158 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring) 159 { 160 rte_free(ring->buf_info); 161 ring->buf_info = NULL; 162 } 163 164 void 165 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 166 { 167 vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid]; 168 169 if (tq != NULL) { 170 /* Release mbufs */ 171 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); 172 /* Release the cmd_ring */ 173 vmxnet3_cmd_ring_release(&tq->cmd_ring); 174 /* Release the memzone */ 175 rte_memzone_free(tq->mz); 176 /* Release the queue */ 177 rte_free(tq); 178 } 179 } 180 181 void 182 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 183 { 184 int i; 185 vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid]; 186 187 if (rq != NULL) { 188 /* Release mbufs */ 189 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 190 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); 191 192 /* Release both the cmd_rings */ 193 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 194 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]); 195 196 /* Release the memzone */ 197 rte_memzone_free(rq->mz); 198 199 /* Release the queue */ 200 rte_free(rq); 201 } 202 } 203 204 static void 205 vmxnet3_dev_tx_queue_reset(void *txq) 206 { 207 vmxnet3_tx_queue_t *tq = txq; 208 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring; 209 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; 210 struct vmxnet3_data_ring *data_ring = &tq->data_ring; 211 int size; 212 213 if (tq != NULL) { 214 /* Release the cmd_ring mbufs */ 215 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); 216 } 217 218 /* Tx vmxnet rings structure initialization*/ 219 ring->next2fill = 0; 220 ring->next2comp = 0; 221 ring->gen = VMXNET3_INIT_GEN; 222 comp_ring->next2proc = 0; 223 comp_ring->gen = VMXNET3_INIT_GEN; 224 225 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 226 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 227 size += tq->txdata_desc_size * data_ring->size; 228 229 memset(ring->base, 0, size); 230 } 231 232 static void 233 vmxnet3_dev_rx_queue_reset(void *rxq) 234 { 235 int i; 236 vmxnet3_rx_queue_t *rq = rxq; 237 struct vmxnet3_hw *hw = rq->hw; 238 struct vmxnet3_cmd_ring *ring0, *ring1; 239 struct vmxnet3_comp_ring *comp_ring; 240 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring; 241 int size; 242 243 /* Release both the cmd_rings mbufs */ 244 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 245 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); 246 247 ring0 = &rq->cmd_ring[0]; 248 ring1 = &rq->cmd_ring[1]; 249 comp_ring = &rq->comp_ring; 250 251 /* Rx vmxnet rings structure initialization */ 252 ring0->next2fill = 0; 253 ring1->next2fill = 0; 254 ring0->next2comp = 0; 255 ring1->next2comp = 0; 256 ring0->gen = VMXNET3_INIT_GEN; 257 ring1->gen = VMXNET3_INIT_GEN; 258 comp_ring->next2proc = 0; 259 comp_ring->gen = VMXNET3_INIT_GEN; 260 261 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 262 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 263 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size) 264 size += rq->data_desc_size * data_ring->size; 265 266 memset(ring0->base, 0, size); 267 } 268 269 void 270 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev) 271 { 272 unsigned i; 273 274 PMD_INIT_FUNC_TRACE(); 275 276 for (i = 0; i < dev->data->nb_tx_queues; i++) { 277 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 278 279 if (txq != NULL) { 280 txq->stopped = TRUE; 281 vmxnet3_dev_tx_queue_reset(txq); 282 } 283 } 284 285 for (i = 0; i < dev->data->nb_rx_queues; i++) { 286 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i]; 287 288 if (rxq != NULL) { 289 rxq->stopped = TRUE; 290 vmxnet3_dev_rx_queue_reset(rxq); 291 } 292 } 293 } 294 295 static int 296 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq) 297 { 298 int completed = 0; 299 struct rte_mbuf *mbuf; 300 301 /* Release cmd_ring descriptor and free mbuf */ 302 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1); 303 304 mbuf = txq->cmd_ring.buf_info[eop_idx].m; 305 if (mbuf == NULL) 306 rte_panic("EOP desc does not point to a valid mbuf"); 307 rte_pktmbuf_free(mbuf); 308 309 txq->cmd_ring.buf_info[eop_idx].m = NULL; 310 311 while (txq->cmd_ring.next2comp != eop_idx) { 312 /* no out-of-order completion */ 313 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0); 314 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 315 completed++; 316 } 317 318 /* Mark the txd for which tcd was generated as completed */ 319 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 320 321 return completed + 1; 322 } 323 324 static void 325 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) 326 { 327 int completed = 0; 328 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring; 329 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *) 330 (comp_ring->base + comp_ring->next2proc); 331 332 while (tcd->gen == comp_ring->gen) { 333 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq); 334 335 vmxnet3_comp_ring_adv_next2proc(comp_ring); 336 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base + 337 comp_ring->next2proc); 338 } 339 340 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed); 341 342 /* To avoid compiler warnings when not in DEBUG mode. */ 343 RTE_SET_USED(completed); 344 } 345 346 uint16_t 347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, 348 uint16_t nb_pkts) 349 { 350 int32_t ret; 351 uint32_t i; 352 uint64_t ol_flags; 353 struct rte_mbuf *m; 354 355 for (i = 0; i != nb_pkts; i++) { 356 m = tx_pkts[i]; 357 ol_flags = m->ol_flags; 358 359 /* Non-TSO packet cannot occupy more than 360 * VMXNET3_MAX_TXD_PER_PKT TX descriptors. 361 */ 362 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 && 363 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) { 364 rte_errno = EINVAL; 365 return i; 366 } 367 /* TSO packet cannot occupy more than 368 * VMXNET3_MAX_TSO_TXD_PER_PKT TX descriptors. 369 */ 370 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0 && 371 m->nb_segs > VMXNET3_MAX_TSO_TXD_PER_PKT) { 372 rte_errno = EINVAL; 373 return i; 374 } 375 376 /* check that only supported TX offloads are requested. */ 377 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 || 378 (ol_flags & RTE_MBUF_F_TX_L4_MASK) == 379 RTE_MBUF_F_TX_SCTP_CKSUM) { 380 rte_errno = ENOTSUP; 381 return i; 382 } 383 384 #ifdef RTE_LIBRTE_ETHDEV_DEBUG 385 ret = rte_validate_tx_offload(m); 386 if (ret != 0) { 387 rte_errno = -ret; 388 return i; 389 } 390 #endif 391 ret = rte_net_intel_cksum_prepare(m); 392 if (ret != 0) { 393 rte_errno = -ret; 394 return i; 395 } 396 } 397 398 return i; 399 } 400 401 uint16_t 402 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 403 uint16_t nb_pkts) 404 { 405 uint16_t nb_tx; 406 vmxnet3_tx_queue_t *txq = tx_queue; 407 struct vmxnet3_hw *hw = txq->hw; 408 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl; 409 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred); 410 411 if (unlikely(txq->stopped)) { 412 PMD_TX_LOG(DEBUG, "Tx queue is stopped."); 413 return 0; 414 } 415 416 /* Free up the comp_descriptors aggressively */ 417 vmxnet3_tq_tx_complete(txq); 418 419 nb_tx = 0; 420 while (nb_tx < nb_pkts) { 421 Vmxnet3_GenericDesc *gdesc = NULL; 422 vmxnet3_buf_info_t *tbi = NULL; 423 uint32_t first2fill, avail, dw2; 424 struct rte_mbuf *txm = tx_pkts[nb_tx]; 425 struct rte_mbuf *m_seg = txm; 426 int copy_size = 0; 427 bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0; 428 /* # of descriptors needed for a packet. */ 429 unsigned count = txm->nb_segs; 430 431 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 432 if (count > avail) { 433 /* Is command ring full? */ 434 if (unlikely(avail == 0)) { 435 PMD_TX_LOG(DEBUG, "No free ring descriptors"); 436 txq->stats.tx_ring_full++; 437 txq->stats.drop_total += (nb_pkts - nb_tx); 438 break; 439 } 440 441 /* Command ring is not full but cannot handle the 442 * multi-segmented packet. Let's try the next packet 443 * in this case. 444 */ 445 PMD_TX_LOG(DEBUG, "Running out of ring descriptors " 446 "(avail %d needed %d)", avail, count); 447 txq->stats.drop_total++; 448 if (tso) 449 txq->stats.drop_tso++; 450 rte_pktmbuf_free(txm); 451 nb_tx++; 452 continue; 453 } 454 455 /* Drop non-TSO or TSO packet that is excessively fragmented */ 456 if (unlikely((!tso && count > VMXNET3_MAX_TXD_PER_PKT) || 457 (tso && count > VMXNET3_MAX_TSO_TXD_PER_PKT))) { 458 PMD_TX_LOG(ERR, "Non-TSO or TSO packet cannot occupy more than " 459 "%d or %d tx descriptors respectively. Packet dropped.", 460 VMXNET3_MAX_TXD_PER_PKT, VMXNET3_MAX_TSO_TXD_PER_PKT); 461 txq->stats.drop_too_many_segs++; 462 txq->stats.drop_total++; 463 rte_pktmbuf_free(txm); 464 nb_tx++; 465 continue; 466 } 467 468 /* Skip empty packets */ 469 if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) { 470 txq->stats.drop_total++; 471 rte_pktmbuf_free(txm); 472 nb_tx++; 473 continue; 474 } 475 476 if (txm->nb_segs == 1 && 477 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) { 478 struct Vmxnet3_TxDataDesc *tdd; 479 480 tdd = (struct Vmxnet3_TxDataDesc *) 481 ((uint8 *)txq->data_ring.base + 482 txq->cmd_ring.next2fill * 483 txq->txdata_desc_size); 484 copy_size = rte_pktmbuf_pkt_len(txm); 485 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size); 486 } 487 488 /* use the previous gen bit for the SOP desc */ 489 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; 490 first2fill = txq->cmd_ring.next2fill; 491 do { 492 /* Skip empty segments */ 493 if (unlikely(m_seg->data_len == 0)) 494 continue; 495 496 /* Remember the transmit buffer for cleanup */ 497 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill; 498 499 /* NB: the following assumes that VMXNET3 maximum 500 * transmit buffer size (16K) is greater than 501 * maximum size of mbuf segment size. 502 */ 503 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; 504 505 if (copy_size) { 506 uint64 offset = 507 (uint64)txq->cmd_ring.next2fill * 508 txq->txdata_desc_size; 509 gdesc->txd.addr = 510 rte_cpu_to_le_64(txq->data_ring.basePA + 511 offset); 512 } else { 513 gdesc->txd.addr = rte_mbuf_data_iova(m_seg); 514 } 515 516 gdesc->dword[2] = dw2 | m_seg->data_len; 517 gdesc->dword[3] = 0; 518 519 /* move to the next2fill descriptor */ 520 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring); 521 522 /* use the right gen for non-SOP desc */ 523 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT; 524 } while ((m_seg = m_seg->next) != NULL); 525 /* We must have executed the complete preceding loop at least 526 * once without skipping an empty segment, as we can't have 527 * a packet with only empty segments. 528 * Thus, tbi and gdesc have been initialized. 529 */ 530 531 /* set the last buf_info for the pkt */ 532 tbi->m = txm; 533 /* Update the EOP descriptor */ 534 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ; 535 536 /* Add VLAN tag if present */ 537 gdesc = txq->cmd_ring.base + first2fill; 538 if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) { 539 gdesc->txd.ti = 1; 540 gdesc->txd.tci = txm->vlan_tci; 541 } 542 543 if (tso) { 544 uint16_t mss = txm->tso_segsz; 545 546 RTE_ASSERT(mss > 0); 547 548 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len; 549 gdesc->txd.om = VMXNET3_OM_TSO; 550 gdesc->txd.msscof = mss; 551 552 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss; 553 } else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) { 554 gdesc->txd.om = VMXNET3_OM_CSUM; 555 gdesc->txd.hlen = txm->l2_len + txm->l3_len; 556 557 switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) { 558 case RTE_MBUF_F_TX_TCP_CKSUM: 559 gdesc->txd.msscof = gdesc->txd.hlen + 560 offsetof(struct rte_tcp_hdr, cksum); 561 break; 562 case RTE_MBUF_F_TX_UDP_CKSUM: 563 gdesc->txd.msscof = gdesc->txd.hlen + 564 offsetof(struct rte_udp_hdr, 565 dgram_cksum); 566 break; 567 default: 568 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx", 569 txm->ol_flags & RTE_MBUF_F_TX_L4_MASK); 570 abort(); 571 } 572 deferred++; 573 } else { 574 gdesc->txd.hlen = 0; 575 gdesc->txd.om = VMXNET3_OM_NONE; 576 gdesc->txd.msscof = 0; 577 deferred++; 578 } 579 580 /* flip the GEN bit on the SOP */ 581 rte_compiler_barrier(); 582 gdesc->dword[2] ^= VMXNET3_TXD_GEN; 583 584 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred); 585 nb_tx++; 586 } 587 588 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold)); 589 590 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) { 591 txq_ctrl->txNumDeferred = 0; 592 /* Notify vSwitch that packets are available. */ 593 VMXNET3_WRITE_BAR0_REG(hw, (hw->tx_prod_offset + txq->queue_id * VMXNET3_REG_ALIGN), 594 txq->cmd_ring.next2fill); 595 } 596 597 return nb_tx; 598 } 599 600 static inline void 601 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id, 602 struct rte_mbuf *mbuf) 603 { 604 uint32_t val; 605 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; 606 struct Vmxnet3_RxDesc *rxd = 607 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); 608 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill]; 609 610 if (ring_id == 0) { 611 /* Usually: One HEAD type buf per packet 612 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ? 613 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD; 614 */ 615 616 /* We use single packet buffer so all heads here */ 617 val = VMXNET3_RXD_BTYPE_HEAD; 618 } else { 619 /* All BODY type buffers for 2nd ring */ 620 val = VMXNET3_RXD_BTYPE_BODY; 621 } 622 623 /* 624 * Load mbuf pointer into buf_info[ring_size] 625 * buf_info structure is equivalent to cookie for virtio-virtqueue 626 */ 627 buf_info->m = mbuf; 628 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM); 629 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf); 630 631 /* Load Rx Descriptor with the buffer's GPA */ 632 rxd->addr = buf_info->bufPA; 633 634 /* After this point rxd->addr MUST not be NULL */ 635 rxd->btype = val; 636 rxd->len = buf_info->len; 637 /* Flip gen bit at the end to change ownership */ 638 rxd->gen = ring->gen; 639 640 vmxnet3_cmd_ring_adv_next2fill(ring); 641 } 642 /* 643 * Allocates mbufs and clusters. Post rx descriptors with buffer details 644 * so that device can receive packets in those buffers. 645 * Ring layout: 646 * Among the two rings, 1st ring contains buffers of type 0 and type 1. 647 * bufs_per_pkt is set such that for non-LRO cases all the buffers required 648 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1). 649 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used 650 * only for LRO. 651 */ 652 static int 653 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) 654 { 655 int err = 0; 656 uint32_t i = 0; 657 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; 658 659 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) { 660 struct rte_mbuf *mbuf; 661 662 /* Allocate blank mbuf for the current Rx Descriptor */ 663 mbuf = rte_mbuf_raw_alloc(rxq->mp); 664 if (unlikely(mbuf == NULL)) { 665 PMD_RX_LOG(ERR, "Error allocating mbuf"); 666 rxq->stats.rx_buf_alloc_failure++; 667 err = ENOMEM; 668 break; 669 } 670 671 vmxnet3_renew_desc(rxq, ring_id, mbuf); 672 i++; 673 } 674 675 /* Return error only if no buffers are posted at present */ 676 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1)) 677 return -err; 678 else 679 return i; 680 } 681 682 /* MSS not provided by vmxnet3, guess one with available information */ 683 static uint16_t 684 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd, 685 struct rte_mbuf *rxm) 686 { 687 uint32_t hlen, slen; 688 struct rte_ipv4_hdr *ipv4_hdr; 689 struct rte_ipv6_hdr *ipv6_hdr; 690 struct rte_tcp_hdr *tcp_hdr; 691 char *ptr; 692 uint8_t segs; 693 694 RTE_ASSERT(rcd->tcp); 695 696 ptr = rte_pktmbuf_mtod(rxm, char *); 697 slen = rte_pktmbuf_data_len(rxm); 698 hlen = sizeof(struct rte_ether_hdr); 699 700 if (rcd->v4) { 701 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr))) 702 return hw->mtu - sizeof(struct rte_ipv4_hdr) 703 - sizeof(struct rte_tcp_hdr); 704 705 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen); 706 hlen += rte_ipv4_hdr_len(ipv4_hdr); 707 } else if (rcd->v6) { 708 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr))) 709 return hw->mtu - sizeof(struct rte_ipv6_hdr) - 710 sizeof(struct rte_tcp_hdr); 711 712 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen); 713 hlen += sizeof(struct rte_ipv6_hdr); 714 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) { 715 int frag; 716 717 rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm, 718 &hlen, &frag); 719 } 720 } 721 722 if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr))) 723 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) + 724 sizeof(struct rte_ether_hdr); 725 726 tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen); 727 hlen += (tcp_hdr->data_off & 0xf0) >> 2; 728 729 segs = *vmxnet3_segs_dynfield(rxm); 730 if (segs > 1) 731 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs; 732 else 733 return hw->mtu - hlen + sizeof(struct rte_ether_hdr); 734 } 735 736 /* Receive side checksum and other offloads */ 737 static inline void 738 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd, 739 struct rte_mbuf *rxm, const uint8_t sop) 740 { 741 uint64_t ol_flags = rxm->ol_flags; 742 uint32_t packet_type = rxm->packet_type; 743 744 /* Offloads set in sop */ 745 if (sop) { 746 /* Set packet type */ 747 packet_type |= RTE_PTYPE_L2_ETHER; 748 749 /* Check large packet receive */ 750 if (VMXNET3_VERSION_GE_2(hw) && 751 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { 752 const Vmxnet3_RxCompDescExt *rcde = 753 (const Vmxnet3_RxCompDescExt *)rcd; 754 755 rxm->tso_segsz = rcde->mss; 756 *vmxnet3_segs_dynfield(rxm) = rcde->segCnt; 757 ol_flags |= RTE_MBUF_F_RX_LRO; 758 } 759 } else { /* Offloads set in eop */ 760 /* Check for RSS */ 761 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) { 762 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 763 rxm->hash.rss = rcd->rssHash; 764 } 765 766 /* Check for hardware stripped VLAN tag */ 767 if (rcd->ts) { 768 ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED); 769 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci); 770 } 771 772 /* Check packet type, checksum errors, etc. */ 773 if (rcd->cnc) { 774 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN; 775 776 if (rcd->v4) { 777 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 778 if (rcd->tcp) 779 packet_type |= RTE_PTYPE_L4_TCP; 780 else if (rcd->udp) 781 packet_type |= RTE_PTYPE_L4_UDP; 782 } else if (rcd->v6) { 783 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 784 if (rcd->tcp) 785 packet_type |= RTE_PTYPE_L4_TCP; 786 else if (rcd->udp) 787 packet_type |= RTE_PTYPE_L4_UDP; 788 } else { 789 packet_type |= RTE_PTYPE_UNKNOWN; 790 } 791 792 } else { 793 if (rcd->v4) { 794 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 795 796 if (rcd->ipc) 797 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 798 else 799 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 800 801 if (rcd->tuc) { 802 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 803 if (rcd->tcp) 804 packet_type |= RTE_PTYPE_L4_TCP; 805 else 806 packet_type |= RTE_PTYPE_L4_UDP; 807 } else { 808 if (rcd->tcp) { 809 packet_type |= RTE_PTYPE_L4_TCP; 810 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 811 } else if (rcd->udp) { 812 packet_type |= RTE_PTYPE_L4_UDP; 813 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 814 } 815 } 816 } else if (rcd->v6) { 817 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 818 819 if (rcd->tuc) { 820 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 821 if (rcd->tcp) 822 packet_type |= RTE_PTYPE_L4_TCP; 823 else 824 packet_type |= RTE_PTYPE_L4_UDP; 825 } else { 826 if (rcd->tcp) { 827 packet_type |= RTE_PTYPE_L4_TCP; 828 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 829 } else if (rcd->udp) { 830 packet_type |= RTE_PTYPE_L4_UDP; 831 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 832 } 833 } 834 } else { 835 packet_type |= RTE_PTYPE_UNKNOWN; 836 } 837 838 /* Old variants of vmxnet3 do not provide MSS */ 839 if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0) 840 rxm->tso_segsz = vmxnet3_guess_mss(hw, 841 rcd, rxm); 842 } 843 } 844 845 rxm->ol_flags = ol_flags; 846 rxm->packet_type = packet_type; 847 } 848 849 /* 850 * Process the Rx Completion Ring of given vmxnet3_rx_queue 851 * for nb_pkts burst and return the number of packets received 852 */ 853 uint16_t 854 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 855 { 856 uint16_t nb_rx; 857 uint32_t nb_rxd, idx; 858 uint8_t ring_idx; 859 vmxnet3_rx_queue_t *rxq; 860 Vmxnet3_RxCompDesc *rcd; 861 vmxnet3_buf_info_t *rbi; 862 Vmxnet3_RxDesc *rxd; 863 struct rte_mbuf *rxm = NULL; 864 struct vmxnet3_hw *hw; 865 866 nb_rx = 0; 867 ring_idx = 0; 868 nb_rxd = 0; 869 idx = 0; 870 871 rxq = rx_queue; 872 hw = rxq->hw; 873 874 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 875 876 if (unlikely(rxq->stopped)) { 877 PMD_RX_LOG(DEBUG, "Rx queue is stopped."); 878 return 0; 879 } 880 881 while (rcd->gen == rxq->comp_ring.gen) { 882 struct rte_mbuf *newm; 883 884 if (nb_rx >= nb_pkts) 885 break; 886 887 newm = rte_mbuf_raw_alloc(rxq->mp); 888 if (unlikely(newm == NULL)) { 889 PMD_RX_LOG(ERR, "Error allocating mbuf"); 890 rxq->stats.rx_buf_alloc_failure++; 891 break; 892 } 893 894 idx = rcd->rxdIdx; 895 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID); 896 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; 897 RTE_SET_USED(rxd); /* used only for assert when enabled */ 898 rbi = rxq->cmd_ring[ring_idx].buf_info + idx; 899 900 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx); 901 902 RTE_ASSERT(rcd->len <= rxd->len); 903 RTE_ASSERT(rbi->m); 904 905 /* Get the packet buffer pointer from buf_info */ 906 rxm = rbi->m; 907 908 /* Clear descriptor associated buf_info to be reused */ 909 rbi->m = NULL; 910 rbi->bufPA = 0; 911 912 /* Update the index that we received a packet */ 913 rxq->cmd_ring[ring_idx].next2comp = idx; 914 915 /* For RCD with EOP set, check if there is frame error */ 916 if (unlikely(rcd->eop && rcd->err)) { 917 rxq->stats.drop_total++; 918 rxq->stats.drop_err++; 919 920 if (!rcd->fcs) { 921 rxq->stats.drop_fcs++; 922 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err."); 923 } 924 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d", 925 (int)(rcd - (struct Vmxnet3_RxCompDesc *) 926 rxq->comp_ring.base), rcd->rxdIdx); 927 rte_pktmbuf_free_seg(rxm); 928 if (rxq->start_seg) { 929 struct rte_mbuf *start = rxq->start_seg; 930 931 rxq->start_seg = NULL; 932 rte_pktmbuf_free(start); 933 } 934 goto rcd_done; 935 } 936 937 /* Initialize newly received packet buffer */ 938 rxm->port = rxq->port_id; 939 rxm->nb_segs = 1; 940 rxm->next = NULL; 941 rxm->pkt_len = (uint16_t)rcd->len; 942 rxm->data_len = (uint16_t)rcd->len; 943 rxm->data_off = RTE_PKTMBUF_HEADROOM; 944 rxm->ol_flags = 0; 945 rxm->vlan_tci = 0; 946 rxm->packet_type = 0; 947 948 /* 949 * If this is the first buffer of the received packet, 950 * set the pointer to the first mbuf of the packet 951 * Otherwise, update the total length and the number of segments 952 * of the current scattered packet, and update the pointer to 953 * the last mbuf of the current packet. 954 */ 955 if (rcd->sop) { 956 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD); 957 958 if (unlikely(rcd->len == 0)) { 959 RTE_ASSERT(rcd->eop); 960 961 PMD_RX_LOG(DEBUG, 962 "Rx buf was skipped. rxring[%d][%d])", 963 ring_idx, idx); 964 rte_pktmbuf_free_seg(rxm); 965 goto rcd_done; 966 } 967 968 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) { 969 uint8_t *rdd = rxq->data_ring.base + 970 idx * rxq->data_desc_size; 971 972 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw)); 973 rte_memcpy(rte_pktmbuf_mtod(rxm, char *), 974 rdd, rcd->len); 975 } 976 977 rxq->start_seg = rxm; 978 rxq->last_seg = rxm; 979 vmxnet3_rx_offload(hw, rcd, rxm, 1); 980 } else { 981 struct rte_mbuf *start = rxq->start_seg; 982 983 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY); 984 985 if (likely(start && rxm->data_len > 0)) { 986 start->pkt_len += rxm->data_len; 987 start->nb_segs++; 988 989 rxq->last_seg->next = rxm; 990 rxq->last_seg = rxm; 991 } else { 992 PMD_RX_LOG(ERR, "Error received empty or out of order frame."); 993 rxq->stats.drop_total++; 994 rxq->stats.drop_err++; 995 996 rte_pktmbuf_free_seg(rxm); 997 } 998 } 999 1000 if (rcd->eop) { 1001 struct rte_mbuf *start = rxq->start_seg; 1002 1003 vmxnet3_rx_offload(hw, rcd, start, 0); 1004 rx_pkts[nb_rx++] = start; 1005 rxq->start_seg = NULL; 1006 } 1007 1008 rcd_done: 1009 rxq->cmd_ring[ring_idx].next2comp = idx; 1010 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, 1011 rxq->cmd_ring[ring_idx].size); 1012 1013 /* It's time to renew descriptors */ 1014 vmxnet3_renew_desc(rxq, ring_idx, newm); 1015 if (unlikely(rxq->shared->ctrl.updateRxProd && 1016 (rxq->cmd_ring[ring_idx].next2fill & 0xf) == 0)) { 1017 VMXNET3_WRITE_BAR0_REG(hw, hw->rx_prod_offset[ring_idx] + 1018 (rxq->queue_id * VMXNET3_REG_ALIGN), 1019 rxq->cmd_ring[ring_idx].next2fill); 1020 } 1021 1022 /* Advance to the next descriptor in comp_ring */ 1023 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring); 1024 1025 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 1026 nb_rxd++; 1027 if (nb_rxd > rxq->cmd_ring[0].size) { 1028 PMD_RX_LOG(ERR, "Used up quota of receiving packets," 1029 " relinquish control."); 1030 break; 1031 } 1032 } 1033 1034 if (unlikely(nb_rxd == 0)) { 1035 uint32_t avail; 1036 uint32_t posted = 0; 1037 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) { 1038 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]); 1039 if (unlikely(avail > 0)) { 1040 /* try to alloc new buf and renew descriptors */ 1041 if (vmxnet3_post_rx_bufs(rxq, ring_idx) > 0) 1042 posted |= (1 << ring_idx); 1043 } 1044 } 1045 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 1046 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) { 1047 if (posted & (1 << ring_idx)) 1048 VMXNET3_WRITE_BAR0_REG(hw, hw->rx_prod_offset[ring_idx] + 1049 (rxq->queue_id * VMXNET3_REG_ALIGN), 1050 rxq->cmd_ring[ring_idx].next2fill); 1051 } 1052 } 1053 } 1054 1055 return nb_rx; 1056 } 1057 1058 uint32_t 1059 vmxnet3_dev_rx_queue_count(void *rx_queue) 1060 { 1061 const vmxnet3_rx_queue_t *rxq; 1062 const Vmxnet3_RxCompDesc *rcd; 1063 uint32_t idx, nb_rxd = 0; 1064 uint8_t gen; 1065 1066 rxq = rx_queue; 1067 if (unlikely(rxq->stopped)) { 1068 PMD_RX_LOG(DEBUG, "Rx queue is stopped."); 1069 return 0; 1070 } 1071 1072 gen = rxq->comp_ring.gen; 1073 idx = rxq->comp_ring.next2proc; 1074 rcd = &rxq->comp_ring.base[idx].rcd; 1075 while (rcd->gen == gen) { 1076 if (rcd->eop) 1077 ++nb_rxd; 1078 if (++idx == rxq->comp_ring.size) { 1079 idx = 0; 1080 gen ^= 1; 1081 } 1082 rcd = &rxq->comp_ring.base[idx].rcd; 1083 } 1084 1085 return nb_rxd; 1086 } 1087 1088 int 1089 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, 1090 uint16_t queue_idx, 1091 uint16_t nb_desc, 1092 unsigned int socket_id, 1093 const struct rte_eth_txconf *tx_conf __rte_unused) 1094 { 1095 struct vmxnet3_hw *hw = dev->data->dev_private; 1096 const struct rte_memzone *mz; 1097 struct vmxnet3_tx_queue *txq; 1098 struct vmxnet3_cmd_ring *ring; 1099 struct vmxnet3_comp_ring *comp_ring; 1100 struct vmxnet3_data_ring *data_ring; 1101 int size; 1102 1103 PMD_INIT_FUNC_TRACE(); 1104 1105 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), 1106 RTE_CACHE_LINE_SIZE); 1107 if (txq == NULL) { 1108 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure"); 1109 return -ENOMEM; 1110 } 1111 1112 txq->queue_id = queue_idx; 1113 txq->port_id = dev->data->port_id; 1114 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */ 1115 txq->hw = hw; 1116 txq->qid = queue_idx; 1117 txq->stopped = TRUE; 1118 txq->txdata_desc_size = hw->txdata_desc_size; 1119 1120 ring = &txq->cmd_ring; 1121 comp_ring = &txq->comp_ring; 1122 data_ring = &txq->data_ring; 1123 1124 /* Tx vmxnet ring length should be between 512-4096 */ 1125 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) { 1126 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u", 1127 VMXNET3_DEF_TX_RING_SIZE); 1128 return -EINVAL; 1129 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) { 1130 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u", 1131 VMXNET3_TX_RING_MAX_SIZE); 1132 return -EINVAL; 1133 } else { 1134 ring->size = nb_desc; 1135 if (VMXNET3_VERSION_GE_7(hw)) 1136 ring->size = rte_align32prevpow2(nb_desc); 1137 ring->size &= ~VMXNET3_RING_SIZE_MASK; 1138 } 1139 comp_ring->size = data_ring->size = ring->size; 1140 1141 /* Tx vmxnet rings structure initialization*/ 1142 ring->next2fill = 0; 1143 ring->next2comp = 0; 1144 ring->gen = VMXNET3_INIT_GEN; 1145 comp_ring->next2proc = 0; 1146 comp_ring->gen = VMXNET3_INIT_GEN; 1147 1148 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 1149 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 1150 size += txq->txdata_desc_size * data_ring->size; 1151 1152 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size, 1153 VMXNET3_RING_BA_ALIGN, socket_id); 1154 if (mz == NULL) { 1155 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 1156 return -ENOMEM; 1157 } 1158 txq->mz = mz; 1159 memset(mz->addr, 0, mz->len); 1160 1161 /* cmd_ring initialization */ 1162 ring->base = mz->addr; 1163 ring->basePA = mz->iova; 1164 1165 /* comp_ring initialization */ 1166 comp_ring->base = ring->base + ring->size; 1167 comp_ring->basePA = ring->basePA + 1168 (sizeof(struct Vmxnet3_TxDesc) * ring->size); 1169 1170 /* data_ring initialization */ 1171 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size); 1172 data_ring->basePA = comp_ring->basePA + 1173 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size); 1174 1175 /* cmd_ring0 buf_info allocation */ 1176 ring->buf_info = rte_zmalloc("tx_ring_buf_info", 1177 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); 1178 if (ring->buf_info == NULL) { 1179 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure"); 1180 return -ENOMEM; 1181 } 1182 1183 /* Update the data portion with txq */ 1184 dev->data->tx_queues[queue_idx] = txq; 1185 1186 return 0; 1187 } 1188 1189 int 1190 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, 1191 uint16_t queue_idx, 1192 uint16_t nb_desc, 1193 unsigned int socket_id, 1194 __rte_unused const struct rte_eth_rxconf *rx_conf, 1195 struct rte_mempool *mp) 1196 { 1197 const struct rte_memzone *mz; 1198 struct vmxnet3_rx_queue *rxq; 1199 struct vmxnet3_hw *hw = dev->data->dev_private; 1200 struct vmxnet3_cmd_ring *ring0, *ring1, *ring; 1201 struct vmxnet3_comp_ring *comp_ring; 1202 struct vmxnet3_rx_data_ring *data_ring; 1203 int size; 1204 uint8_t i; 1205 char mem_name[32]; 1206 1207 PMD_INIT_FUNC_TRACE(); 1208 1209 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), 1210 RTE_CACHE_LINE_SIZE); 1211 if (rxq == NULL) { 1212 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure"); 1213 return -ENOMEM; 1214 } 1215 1216 rxq->mp = mp; 1217 /* Remember buffer size for initialization in dev start. */ 1218 hw->rxdata_buf_size = 1219 rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; 1220 rxq->queue_id = queue_idx; 1221 rxq->port_id = dev->data->port_id; 1222 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */ 1223 rxq->hw = hw; 1224 rxq->qid1 = queue_idx; 1225 rxq->qid2 = queue_idx + hw->num_rx_queues; 1226 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues; 1227 rxq->data_desc_size = hw->rxdata_desc_size; 1228 rxq->stopped = TRUE; 1229 1230 ring0 = &rxq->cmd_ring[0]; 1231 ring1 = &rxq->cmd_ring[1]; 1232 comp_ring = &rxq->comp_ring; 1233 data_ring = &rxq->data_ring; 1234 1235 /* Rx vmxnet rings length should be between 256-4096 */ 1236 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) { 1237 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256"); 1238 return -EINVAL; 1239 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) { 1240 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096"); 1241 return -EINVAL; 1242 } else { 1243 ring0->size = nb_desc; 1244 if (VMXNET3_VERSION_GE_7(hw)) 1245 ring0->size = rte_align32prevpow2(nb_desc); 1246 ring0->size &= ~VMXNET3_RING_SIZE_MASK; 1247 ring1->size = ring0->size; 1248 } 1249 1250 comp_ring->size = ring0->size + ring1->size; 1251 data_ring->size = ring0->size; 1252 1253 /* Rx vmxnet rings structure initialization */ 1254 ring0->next2fill = 0; 1255 ring1->next2fill = 0; 1256 ring0->next2comp = 0; 1257 ring1->next2comp = 0; 1258 ring0->gen = VMXNET3_INIT_GEN; 1259 ring1->gen = VMXNET3_INIT_GEN; 1260 comp_ring->next2proc = 0; 1261 comp_ring->gen = VMXNET3_INIT_GEN; 1262 1263 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 1264 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 1265 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) 1266 size += rxq->data_desc_size * data_ring->size; 1267 1268 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size, 1269 VMXNET3_RING_BA_ALIGN, socket_id); 1270 if (mz == NULL) { 1271 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 1272 return -ENOMEM; 1273 } 1274 rxq->mz = mz; 1275 memset(mz->addr, 0, mz->len); 1276 1277 /* cmd_ring0 initialization */ 1278 ring0->base = mz->addr; 1279 ring0->basePA = mz->iova; 1280 1281 /* cmd_ring1 initialization */ 1282 ring1->base = ring0->base + ring0->size; 1283 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size; 1284 1285 /* comp_ring initialization */ 1286 comp_ring->base = ring1->base + ring1->size; 1287 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) * 1288 ring1->size; 1289 1290 /* data_ring initialization */ 1291 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) { 1292 data_ring->base = 1293 (uint8_t *)(comp_ring->base + comp_ring->size); 1294 data_ring->basePA = comp_ring->basePA + 1295 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 1296 } 1297 1298 /* cmd_ring0-cmd_ring1 buf_info allocation */ 1299 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) { 1300 1301 ring = &rxq->cmd_ring[i]; 1302 ring->rid = i; 1303 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i); 1304 1305 ring->buf_info = rte_zmalloc(mem_name, 1306 ring->size * sizeof(vmxnet3_buf_info_t), 1307 RTE_CACHE_LINE_SIZE); 1308 if (ring->buf_info == NULL) { 1309 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure"); 1310 return -ENOMEM; 1311 } 1312 } 1313 1314 /* Update the data portion with rxq */ 1315 dev->data->rx_queues[queue_idx] = rxq; 1316 1317 return 0; 1318 } 1319 1320 /* 1321 * Initializes Receive Unit 1322 * Load mbufs in rx queue in advance 1323 */ 1324 int 1325 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev) 1326 { 1327 struct vmxnet3_hw *hw = dev->data->dev_private; 1328 1329 int i, ret; 1330 uint8_t j; 1331 1332 PMD_INIT_FUNC_TRACE(); 1333 1334 for (i = 0; i < hw->num_rx_queues; i++) { 1335 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; 1336 1337 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) { 1338 /* Passing 0 as alloc_num will allocate full ring */ 1339 ret = vmxnet3_post_rx_bufs(rxq, j); 1340 1341 /* Zero number of descriptors in the configuration of the RX queue */ 1342 if (ret == 0) { 1343 PMD_INIT_LOG(ERR, 1344 "Invalid configuration in Rx queue: %d, buffers ring: %d", 1345 i, j); 1346 return -EINVAL; 1347 } 1348 /* Return the error number */ 1349 if (ret < 0) { 1350 PMD_INIT_LOG(ERR, "Posting Rxq: %d buffers ring: %d", i, j); 1351 return ret; 1352 } 1353 /* 1354 * Updating device with the index:next2fill to fill the 1355 * mbufs for coming packets. 1356 */ 1357 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 1358 VMXNET3_WRITE_BAR0_REG(hw, hw->rx_prod_offset[j] + 1359 (rxq->queue_id * VMXNET3_REG_ALIGN), 1360 rxq->cmd_ring[j].next2fill); 1361 } 1362 } 1363 rxq->stopped = FALSE; 1364 rxq->start_seg = NULL; 1365 } 1366 1367 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1368 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 1369 1370 txq->stopped = FALSE; 1371 } 1372 1373 return 0; 1374 } 1375 1376 static uint8_t rss_intel_key[40] = { 1377 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 1378 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, 1379 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, 1380 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 1381 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA, 1382 }; 1383 1384 /* 1385 * Additional RSS configurations based on vmxnet v4+ APIs 1386 */ 1387 int 1388 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev) 1389 { 1390 struct vmxnet3_hw *hw = dev->data->dev_private; 1391 Vmxnet3_DriverShared *shared = hw->shared; 1392 Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; 1393 struct rte_eth_rss_conf *port_rss_conf; 1394 uint64_t rss_hf; 1395 uint32_t ret; 1396 1397 PMD_INIT_FUNC_TRACE(); 1398 1399 cmdInfo->setRSSFields = 0; 1400 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; 1401 1402 if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) != 1403 VMXNET3_MANDATORY_V4_RSS) { 1404 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS," 1405 "automatically setting it"); 1406 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS; 1407 } 1408 1409 rss_hf = port_rss_conf->rss_hf & 1410 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL); 1411 1412 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP) 1413 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4; 1414 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP) 1415 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6; 1416 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP) 1417 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4; 1418 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP) 1419 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6; 1420 1421 VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, 1422 VMXNET3_CMD_SET_RSS_FIELDS); 1423 ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); 1424 1425 if (ret != VMXNET3_SUCCESS) { 1426 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret); 1427 } 1428 1429 return ret; 1430 } 1431 1432 /* 1433 * Configure RSS feature 1434 */ 1435 int 1436 vmxnet3_rss_configure(struct rte_eth_dev *dev) 1437 { 1438 struct vmxnet3_hw *hw = dev->data->dev_private; 1439 struct VMXNET3_RSSConf *dev_rss_conf; 1440 struct rte_eth_rss_conf *port_rss_conf; 1441 uint64_t rss_hf; 1442 uint8_t i, j; 1443 1444 PMD_INIT_FUNC_TRACE(); 1445 1446 dev_rss_conf = hw->rss_conf; 1447 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; 1448 1449 /* loading hashFunc */ 1450 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; 1451 /* loading hashKeySize */ 1452 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE; 1453 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ 1454 dev_rss_conf->indTableSize = (uint16_t)((MAX_RX_QUEUES(hw)) * 4); 1455 1456 if (port_rss_conf->rss_key == NULL) { 1457 /* Default hash key */ 1458 port_rss_conf->rss_key = rss_intel_key; 1459 } 1460 1461 /* loading hashKey */ 1462 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, 1463 dev_rss_conf->hashKeySize); 1464 1465 /* loading indTable */ 1466 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) { 1467 if (j == dev->data->nb_rx_queues) 1468 j = 0; 1469 dev_rss_conf->indTable[i] = j; 1470 } 1471 1472 /* loading hashType */ 1473 dev_rss_conf->hashType = 0; 1474 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL; 1475 if (rss_hf & RTE_ETH_RSS_IPV4) 1476 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4; 1477 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP) 1478 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4; 1479 if (rss_hf & RTE_ETH_RSS_IPV6) 1480 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6; 1481 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP) 1482 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6; 1483 1484 return VMXNET3_SUCCESS; 1485 } 1486