1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/queue.h> 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <errno.h> 40 #include <stdint.h> 41 #include <stdarg.h> 42 #include <unistd.h> 43 #include <inttypes.h> 44 45 #include <rte_byteorder.h> 46 #include <rte_common.h> 47 #include <rte_cycles.h> 48 #include <rte_log.h> 49 #include <rte_debug.h> 50 #include <rte_interrupts.h> 51 #include <rte_pci.h> 52 #include <rte_memory.h> 53 #include <rte_memzone.h> 54 #include <rte_launch.h> 55 #include <rte_eal.h> 56 #include <rte_per_lcore.h> 57 #include <rte_lcore.h> 58 #include <rte_atomic.h> 59 #include <rte_branch_prediction.h> 60 #include <rte_mempool.h> 61 #include <rte_malloc.h> 62 #include <rte_mbuf.h> 63 #include <rte_ether.h> 64 #include <rte_ethdev.h> 65 #include <rte_prefetch.h> 66 #include <rte_ip.h> 67 #include <rte_udp.h> 68 #include <rte_tcp.h> 69 #include <rte_sctp.h> 70 #include <rte_string_fns.h> 71 #include <rte_errno.h> 72 73 #include "base/vmxnet3_defs.h" 74 #include "vmxnet3_ring.h" 75 76 #include "vmxnet3_logs.h" 77 #include "vmxnet3_ethdev.h" 78 79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; 80 81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); 82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *); 83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *); 85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *); 86 #endif 87 88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 89 static void 90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq) 91 { 92 uint32_t avail = 0; 93 94 if (rxq == NULL) 95 return; 96 97 PMD_RX_LOG(DEBUG, 98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.", 99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base); 100 PMD_RX_LOG(DEBUG, 101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.", 102 (unsigned long)rxq->cmd_ring[0].basePA, 103 (unsigned long)rxq->cmd_ring[1].basePA, 104 (unsigned long)rxq->comp_ring.basePA); 105 106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]); 107 PMD_RX_LOG(DEBUG, 108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u", 109 (uint32_t)rxq->cmd_ring[0].size, avail, 110 rxq->comp_ring.next2proc, 111 rxq->cmd_ring[0].size - avail); 112 113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]); 114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u", 115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc, 116 rxq->cmd_ring[1].size - avail); 117 118 } 119 120 static void 121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq) 122 { 123 uint32_t avail = 0; 124 125 if (txq == NULL) 126 return; 127 128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.", 129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base); 130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.", 131 (unsigned long)txq->cmd_ring.basePA, 132 (unsigned long)txq->comp_ring.basePA, 133 (unsigned long)txq->data_ring.basePA); 134 135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u", 137 (uint32_t)txq->cmd_ring.size, avail, 138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail); 139 } 140 #endif 141 142 static void 143 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) 144 { 145 while (ring->next2comp != ring->next2fill) { 146 /* No need to worry about desc ownership, device is quiesced by now. */ 147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp; 148 149 if (buf_info->m) { 150 rte_pktmbuf_free(buf_info->m); 151 buf_info->m = NULL; 152 buf_info->bufPA = 0; 153 buf_info->len = 0; 154 } 155 vmxnet3_cmd_ring_adv_next2comp(ring); 156 } 157 } 158 159 static void 160 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) 161 { 162 uint32_t i; 163 164 for (i = 0; i < ring->size; i++) { 165 /* No need to worry about desc ownership, device is quiesced by now. */ 166 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i]; 167 168 if (buf_info->m) { 169 rte_pktmbuf_free_seg(buf_info->m); 170 buf_info->m = NULL; 171 buf_info->bufPA = 0; 172 buf_info->len = 0; 173 } 174 vmxnet3_cmd_ring_adv_next2comp(ring); 175 } 176 } 177 178 static void 179 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring) 180 { 181 rte_free(ring->buf_info); 182 ring->buf_info = NULL; 183 } 184 185 void 186 vmxnet3_dev_tx_queue_release(void *txq) 187 { 188 vmxnet3_tx_queue_t *tq = txq; 189 190 if (tq != NULL) { 191 /* Release mbufs */ 192 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); 193 /* Release the cmd_ring */ 194 vmxnet3_cmd_ring_release(&tq->cmd_ring); 195 } 196 } 197 198 void 199 vmxnet3_dev_rx_queue_release(void *rxq) 200 { 201 int i; 202 vmxnet3_rx_queue_t *rq = rxq; 203 204 if (rq != NULL) { 205 /* Release mbufs */ 206 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 207 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); 208 209 /* Release both the cmd_rings */ 210 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 211 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]); 212 } 213 } 214 215 static void 216 vmxnet3_dev_tx_queue_reset(void *txq) 217 { 218 vmxnet3_tx_queue_t *tq = txq; 219 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring; 220 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; 221 struct vmxnet3_data_ring *data_ring = &tq->data_ring; 222 int size; 223 224 if (tq != NULL) { 225 /* Release the cmd_ring mbufs */ 226 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); 227 } 228 229 /* Tx vmxnet rings structure initialization*/ 230 ring->next2fill = 0; 231 ring->next2comp = 0; 232 ring->gen = VMXNET3_INIT_GEN; 233 comp_ring->next2proc = 0; 234 comp_ring->gen = VMXNET3_INIT_GEN; 235 236 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 237 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 238 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size; 239 240 memset(ring->base, 0, size); 241 } 242 243 static void 244 vmxnet3_dev_rx_queue_reset(void *rxq) 245 { 246 int i; 247 vmxnet3_rx_queue_t *rq = rxq; 248 struct vmxnet3_cmd_ring *ring0, *ring1; 249 struct vmxnet3_comp_ring *comp_ring; 250 int size; 251 252 if (rq != NULL) { 253 /* Release both the cmd_rings mbufs */ 254 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 255 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); 256 } 257 258 ring0 = &rq->cmd_ring[0]; 259 ring1 = &rq->cmd_ring[1]; 260 comp_ring = &rq->comp_ring; 261 262 /* Rx vmxnet rings structure initialization */ 263 ring0->next2fill = 0; 264 ring1->next2fill = 0; 265 ring0->next2comp = 0; 266 ring1->next2comp = 0; 267 ring0->gen = VMXNET3_INIT_GEN; 268 ring1->gen = VMXNET3_INIT_GEN; 269 comp_ring->next2proc = 0; 270 comp_ring->gen = VMXNET3_INIT_GEN; 271 272 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 273 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 274 275 memset(ring0->base, 0, size); 276 } 277 278 void 279 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev) 280 { 281 unsigned i; 282 283 PMD_INIT_FUNC_TRACE(); 284 285 for (i = 0; i < dev->data->nb_tx_queues; i++) { 286 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 287 288 if (txq != NULL) { 289 txq->stopped = TRUE; 290 vmxnet3_dev_tx_queue_reset(txq); 291 } 292 } 293 294 for (i = 0; i < dev->data->nb_rx_queues; i++) { 295 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i]; 296 297 if (rxq != NULL) { 298 rxq->stopped = TRUE; 299 vmxnet3_dev_rx_queue_reset(rxq); 300 } 301 } 302 } 303 304 static int 305 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq) 306 { 307 int completed = 0; 308 struct rte_mbuf *mbuf; 309 310 /* Release cmd_ring descriptor and free mbuf */ 311 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1); 312 313 mbuf = txq->cmd_ring.buf_info[eop_idx].m; 314 if (mbuf == NULL) 315 rte_panic("EOP desc does not point to a valid mbuf"); 316 rte_pktmbuf_free(mbuf); 317 318 txq->cmd_ring.buf_info[eop_idx].m = NULL; 319 320 while (txq->cmd_ring.next2comp != eop_idx) { 321 /* no out-of-order completion */ 322 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0); 323 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 324 completed++; 325 } 326 327 /* Mark the txd for which tcd was generated as completed */ 328 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 329 330 return completed + 1; 331 } 332 333 static void 334 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) 335 { 336 int completed = 0; 337 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring; 338 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *) 339 (comp_ring->base + comp_ring->next2proc); 340 341 while (tcd->gen == comp_ring->gen) { 342 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq); 343 344 vmxnet3_comp_ring_adv_next2proc(comp_ring); 345 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base + 346 comp_ring->next2proc); 347 } 348 349 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed); 350 } 351 352 uint16_t 353 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 354 uint16_t nb_pkts) 355 { 356 uint16_t nb_tx; 357 vmxnet3_tx_queue_t *txq = tx_queue; 358 struct vmxnet3_hw *hw = txq->hw; 359 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl; 360 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred); 361 362 if (unlikely(txq->stopped)) { 363 PMD_TX_LOG(DEBUG, "Tx queue is stopped."); 364 return 0; 365 } 366 367 /* Free up the comp_descriptors aggressively */ 368 vmxnet3_tq_tx_complete(txq); 369 370 nb_tx = 0; 371 while (nb_tx < nb_pkts) { 372 Vmxnet3_GenericDesc *gdesc; 373 vmxnet3_buf_info_t *tbi; 374 uint32_t first2fill, avail, dw2; 375 struct rte_mbuf *txm = tx_pkts[nb_tx]; 376 struct rte_mbuf *m_seg = txm; 377 int copy_size = 0; 378 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0; 379 /* # of descriptors needed for a packet. */ 380 unsigned count = txm->nb_segs; 381 382 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 383 if (count > avail) { 384 /* Is command ring full? */ 385 if (unlikely(avail == 0)) { 386 PMD_TX_LOG(DEBUG, "No free ring descriptors"); 387 txq->stats.tx_ring_full++; 388 txq->stats.drop_total += (nb_pkts - nb_tx); 389 break; 390 } 391 392 /* Command ring is not full but cannot handle the 393 * multi-segmented packet. Let's try the next packet 394 * in this case. 395 */ 396 PMD_TX_LOG(DEBUG, "Running out of ring descriptors " 397 "(avail %d needed %d)", avail, count); 398 txq->stats.drop_total++; 399 if (tso) 400 txq->stats.drop_tso++; 401 rte_pktmbuf_free(txm); 402 nb_tx++; 403 continue; 404 } 405 406 /* Drop non-TSO packet that is excessively fragmented */ 407 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) { 408 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx " 409 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT); 410 txq->stats.drop_too_many_segs++; 411 txq->stats.drop_total++; 412 rte_pktmbuf_free(txm); 413 nb_tx++; 414 continue; 415 } 416 417 if (txm->nb_segs == 1 && 418 rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { 419 struct Vmxnet3_TxDataDesc *tdd; 420 421 tdd = txq->data_ring.base + txq->cmd_ring.next2fill; 422 copy_size = rte_pktmbuf_pkt_len(txm); 423 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size); 424 } 425 426 /* use the previous gen bit for the SOP desc */ 427 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; 428 first2fill = txq->cmd_ring.next2fill; 429 do { 430 /* Remember the transmit buffer for cleanup */ 431 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill; 432 433 /* NB: the following assumes that VMXNET3 maximum 434 * transmit buffer size (16K) is greater than 435 * maximum size of mbuf segment size. 436 */ 437 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; 438 if (copy_size) 439 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA + 440 txq->cmd_ring.next2fill * 441 sizeof(struct Vmxnet3_TxDataDesc)); 442 else 443 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); 444 445 gdesc->dword[2] = dw2 | m_seg->data_len; 446 gdesc->dword[3] = 0; 447 448 /* move to the next2fill descriptor */ 449 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring); 450 451 /* use the right gen for non-SOP desc */ 452 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT; 453 } while ((m_seg = m_seg->next) != NULL); 454 455 /* set the last buf_info for the pkt */ 456 tbi->m = txm; 457 /* Update the EOP descriptor */ 458 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ; 459 460 /* Add VLAN tag if present */ 461 gdesc = txq->cmd_ring.base + first2fill; 462 if (txm->ol_flags & PKT_TX_VLAN_PKT) { 463 gdesc->txd.ti = 1; 464 gdesc->txd.tci = txm->vlan_tci; 465 } 466 467 if (tso) { 468 uint16_t mss = txm->tso_segsz; 469 470 RTE_ASSERT(mss > 0); 471 472 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len; 473 gdesc->txd.om = VMXNET3_OM_TSO; 474 gdesc->txd.msscof = mss; 475 476 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss; 477 } else if (txm->ol_flags & PKT_TX_L4_MASK) { 478 gdesc->txd.om = VMXNET3_OM_CSUM; 479 gdesc->txd.hlen = txm->l2_len + txm->l3_len; 480 481 switch (txm->ol_flags & PKT_TX_L4_MASK) { 482 case PKT_TX_TCP_CKSUM: 483 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum); 484 break; 485 case PKT_TX_UDP_CKSUM: 486 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum); 487 break; 488 default: 489 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx", 490 txm->ol_flags & PKT_TX_L4_MASK); 491 abort(); 492 } 493 deferred++; 494 } else { 495 gdesc->txd.hlen = 0; 496 gdesc->txd.om = VMXNET3_OM_NONE; 497 gdesc->txd.msscof = 0; 498 deferred++; 499 } 500 501 /* flip the GEN bit on the SOP */ 502 rte_compiler_barrier(); 503 gdesc->dword[2] ^= VMXNET3_TXD_GEN; 504 505 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred); 506 nb_tx++; 507 } 508 509 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold)); 510 511 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) { 512 txq_ctrl->txNumDeferred = 0; 513 /* Notify vSwitch that packets are available. */ 514 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN), 515 txq->cmd_ring.next2fill); 516 } 517 518 return nb_tx; 519 } 520 521 /* 522 * Allocates mbufs and clusters. Post rx descriptors with buffer details 523 * so that device can receive packets in those buffers. 524 * Ring layout: 525 * Among the two rings, 1st ring contains buffers of type 0 and type 1. 526 * bufs_per_pkt is set such that for non-LRO cases all the buffers required 527 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1). 528 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used 529 * only for LRO. 530 */ 531 static int 532 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) 533 { 534 int err = 0; 535 uint32_t i = 0, val = 0; 536 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; 537 538 if (ring_id == 0) { 539 /* Usually: One HEAD type buf per packet 540 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ? 541 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD; 542 */ 543 544 /* We use single packet buffer so all heads here */ 545 val = VMXNET3_RXD_BTYPE_HEAD; 546 } else { 547 /* All BODY type buffers for 2nd ring */ 548 val = VMXNET3_RXD_BTYPE_BODY; 549 } 550 551 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) { 552 struct Vmxnet3_RxDesc *rxd; 553 struct rte_mbuf *mbuf; 554 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill]; 555 556 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); 557 558 /* Allocate blank mbuf for the current Rx Descriptor */ 559 mbuf = rte_mbuf_raw_alloc(rxq->mp); 560 if (unlikely(mbuf == NULL)) { 561 PMD_RX_LOG(ERR, "Error allocating mbuf"); 562 rxq->stats.rx_buf_alloc_failure++; 563 err = ENOMEM; 564 break; 565 } 566 567 /* 568 * Load mbuf pointer into buf_info[ring_size] 569 * buf_info structure is equivalent to cookie for virtio-virtqueue 570 */ 571 buf_info->m = mbuf; 572 buf_info->len = (uint16_t)(mbuf->buf_len - 573 RTE_PKTMBUF_HEADROOM); 574 buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf); 575 576 /* Load Rx Descriptor with the buffer's GPA */ 577 rxd->addr = buf_info->bufPA; 578 579 /* After this point rxd->addr MUST not be NULL */ 580 rxd->btype = val; 581 rxd->len = buf_info->len; 582 /* Flip gen bit at the end to change ownership */ 583 rxd->gen = ring->gen; 584 585 vmxnet3_cmd_ring_adv_next2fill(ring); 586 i++; 587 } 588 589 /* Return error only if no buffers are posted at present */ 590 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1)) 591 return -err; 592 else 593 return i; 594 } 595 596 597 /* Receive side checksum and other offloads */ 598 static void 599 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm) 600 { 601 /* Check for RSS */ 602 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) { 603 rxm->ol_flags |= PKT_RX_RSS_HASH; 604 rxm->hash.rss = rcd->rssHash; 605 } 606 607 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */ 608 if (rcd->v4) { 609 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *); 610 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1); 611 612 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr)) 613 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT; 614 else 615 rxm->packet_type = RTE_PTYPE_L3_IPV4; 616 617 if (!rcd->cnc) { 618 if (!rcd->ipc) 619 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; 620 621 if ((rcd->tcp || rcd->udp) && !rcd->tuc) 622 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; 623 } 624 } 625 } 626 627 /* 628 * Process the Rx Completion Ring of given vmxnet3_rx_queue 629 * for nb_pkts burst and return the number of packets received 630 */ 631 uint16_t 632 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 633 { 634 uint16_t nb_rx; 635 uint32_t nb_rxd, idx; 636 uint8_t ring_idx; 637 vmxnet3_rx_queue_t *rxq; 638 Vmxnet3_RxCompDesc *rcd; 639 vmxnet3_buf_info_t *rbi; 640 Vmxnet3_RxDesc *rxd; 641 struct rte_mbuf *rxm = NULL; 642 struct vmxnet3_hw *hw; 643 644 nb_rx = 0; 645 ring_idx = 0; 646 nb_rxd = 0; 647 idx = 0; 648 649 rxq = rx_queue; 650 hw = rxq->hw; 651 652 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 653 654 if (unlikely(rxq->stopped)) { 655 PMD_RX_LOG(DEBUG, "Rx queue is stopped."); 656 return 0; 657 } 658 659 while (rcd->gen == rxq->comp_ring.gen) { 660 if (nb_rx >= nb_pkts) 661 break; 662 663 idx = rcd->rxdIdx; 664 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1); 665 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; 666 RTE_SET_USED(rxd); /* used only for assert when enabled */ 667 rbi = rxq->cmd_ring[ring_idx].buf_info + idx; 668 669 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx); 670 671 RTE_ASSERT(rcd->len <= rxd->len); 672 RTE_ASSERT(rbi->m); 673 674 /* Get the packet buffer pointer from buf_info */ 675 rxm = rbi->m; 676 677 /* Clear descriptor associated buf_info to be reused */ 678 rbi->m = NULL; 679 rbi->bufPA = 0; 680 681 /* Update the index that we received a packet */ 682 rxq->cmd_ring[ring_idx].next2comp = idx; 683 684 /* For RCD with EOP set, check if there is frame error */ 685 if (unlikely(rcd->eop && rcd->err)) { 686 rxq->stats.drop_total++; 687 rxq->stats.drop_err++; 688 689 if (!rcd->fcs) { 690 rxq->stats.drop_fcs++; 691 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err."); 692 } 693 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d", 694 (int)(rcd - (struct Vmxnet3_RxCompDesc *) 695 rxq->comp_ring.base), rcd->rxdIdx); 696 rte_pktmbuf_free_seg(rxm); 697 goto rcd_done; 698 } 699 700 /* Initialize newly received packet buffer */ 701 rxm->port = rxq->port_id; 702 rxm->nb_segs = 1; 703 rxm->next = NULL; 704 rxm->pkt_len = (uint16_t)rcd->len; 705 rxm->data_len = (uint16_t)rcd->len; 706 rxm->data_off = RTE_PKTMBUF_HEADROOM; 707 rxm->ol_flags = 0; 708 rxm->vlan_tci = 0; 709 710 /* 711 * If this is the first buffer of the received packet, 712 * set the pointer to the first mbuf of the packet 713 * Otherwise, update the total length and the number of segments 714 * of the current scattered packet, and update the pointer to 715 * the last mbuf of the current packet. 716 */ 717 if (rcd->sop) { 718 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD); 719 720 if (unlikely(rcd->len == 0)) { 721 RTE_ASSERT(rcd->eop); 722 723 PMD_RX_LOG(DEBUG, 724 "Rx buf was skipped. rxring[%d][%d])", 725 ring_idx, idx); 726 rte_pktmbuf_free_seg(rxm); 727 goto rcd_done; 728 } 729 730 rxq->start_seg = rxm; 731 vmxnet3_rx_offload(rcd, rxm); 732 } else { 733 struct rte_mbuf *start = rxq->start_seg; 734 735 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY); 736 737 start->pkt_len += rxm->data_len; 738 start->nb_segs++; 739 740 rxq->last_seg->next = rxm; 741 } 742 rxq->last_seg = rxm; 743 744 if (rcd->eop) { 745 struct rte_mbuf *start = rxq->start_seg; 746 747 /* Check for hardware stripped VLAN tag */ 748 if (rcd->ts) { 749 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED); 750 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci); 751 } 752 753 rx_pkts[nb_rx++] = start; 754 rxq->start_seg = NULL; 755 } 756 757 rcd_done: 758 rxq->cmd_ring[ring_idx].next2comp = idx; 759 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, 760 rxq->cmd_ring[ring_idx].size); 761 762 /* It's time to allocate some new buf and renew descriptors */ 763 vmxnet3_post_rx_bufs(rxq, ring_idx); 764 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 765 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), 766 rxq->cmd_ring[ring_idx].next2fill); 767 } 768 769 /* Advance to the next descriptor in comp_ring */ 770 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring); 771 772 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 773 nb_rxd++; 774 if (nb_rxd > rxq->cmd_ring[0].size) { 775 PMD_RX_LOG(ERR, "Used up quota of receiving packets," 776 " relinquish control."); 777 break; 778 } 779 } 780 781 return nb_rx; 782 } 783 784 /* 785 * Create memzone for device rings. malloc can't be used as the physical address is 786 * needed. If the memzone is already created, then this function returns a ptr 787 * to the old one. 788 */ 789 static const struct rte_memzone * 790 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, 791 uint16_t queue_id, uint32_t ring_size, int socket_id) 792 { 793 char z_name[RTE_MEMZONE_NAMESIZE]; 794 const struct rte_memzone *mz; 795 796 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", 797 dev->driver->pci_drv.driver.name, ring_name, 798 dev->data->port_id, queue_id); 799 800 mz = rte_memzone_lookup(z_name); 801 if (mz) 802 return mz; 803 804 return rte_memzone_reserve_aligned(z_name, ring_size, 805 socket_id, 0, VMXNET3_RING_BA_ALIGN); 806 } 807 808 int 809 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, 810 uint16_t queue_idx, 811 uint16_t nb_desc, 812 unsigned int socket_id, 813 __rte_unused const struct rte_eth_txconf *tx_conf) 814 { 815 struct vmxnet3_hw *hw = dev->data->dev_private; 816 const struct rte_memzone *mz; 817 struct vmxnet3_tx_queue *txq; 818 struct vmxnet3_cmd_ring *ring; 819 struct vmxnet3_comp_ring *comp_ring; 820 struct vmxnet3_data_ring *data_ring; 821 int size; 822 823 PMD_INIT_FUNC_TRACE(); 824 825 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) != 826 ETH_TXQ_FLAGS_NOXSUMSCTP) { 827 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported"); 828 return -EINVAL; 829 } 830 831 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), 832 RTE_CACHE_LINE_SIZE); 833 if (txq == NULL) { 834 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure"); 835 return -ENOMEM; 836 } 837 838 txq->queue_id = queue_idx; 839 txq->port_id = dev->data->port_id; 840 txq->shared = &hw->tqd_start[queue_idx]; 841 txq->hw = hw; 842 txq->qid = queue_idx; 843 txq->stopped = TRUE; 844 845 ring = &txq->cmd_ring; 846 comp_ring = &txq->comp_ring; 847 data_ring = &txq->data_ring; 848 849 /* Tx vmxnet ring length should be between 512-4096 */ 850 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) { 851 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u", 852 VMXNET3_DEF_TX_RING_SIZE); 853 return -EINVAL; 854 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) { 855 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u", 856 VMXNET3_TX_RING_MAX_SIZE); 857 return -EINVAL; 858 } else { 859 ring->size = nb_desc; 860 ring->size &= ~VMXNET3_RING_SIZE_MASK; 861 } 862 comp_ring->size = data_ring->size = ring->size; 863 864 /* Tx vmxnet rings structure initialization*/ 865 ring->next2fill = 0; 866 ring->next2comp = 0; 867 ring->gen = VMXNET3_INIT_GEN; 868 comp_ring->next2proc = 0; 869 comp_ring->gen = VMXNET3_INIT_GEN; 870 871 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 872 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 873 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size; 874 875 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id); 876 if (mz == NULL) { 877 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 878 return -ENOMEM; 879 } 880 memset(mz->addr, 0, mz->len); 881 882 /* cmd_ring initialization */ 883 ring->base = mz->addr; 884 ring->basePA = mz->phys_addr; 885 886 /* comp_ring initialization */ 887 comp_ring->base = ring->base + ring->size; 888 comp_ring->basePA = ring->basePA + 889 (sizeof(struct Vmxnet3_TxDesc) * ring->size); 890 891 /* data_ring initialization */ 892 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size); 893 data_ring->basePA = comp_ring->basePA + 894 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size); 895 896 /* cmd_ring0 buf_info allocation */ 897 ring->buf_info = rte_zmalloc("tx_ring_buf_info", 898 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); 899 if (ring->buf_info == NULL) { 900 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure"); 901 return -ENOMEM; 902 } 903 904 /* Update the data portion with txq */ 905 dev->data->tx_queues[queue_idx] = txq; 906 907 return 0; 908 } 909 910 int 911 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, 912 uint16_t queue_idx, 913 uint16_t nb_desc, 914 unsigned int socket_id, 915 __rte_unused const struct rte_eth_rxconf *rx_conf, 916 struct rte_mempool *mp) 917 { 918 const struct rte_memzone *mz; 919 struct vmxnet3_rx_queue *rxq; 920 struct vmxnet3_hw *hw = dev->data->dev_private; 921 struct vmxnet3_cmd_ring *ring0, *ring1, *ring; 922 struct vmxnet3_comp_ring *comp_ring; 923 int size; 924 uint8_t i; 925 char mem_name[32]; 926 927 PMD_INIT_FUNC_TRACE(); 928 929 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), 930 RTE_CACHE_LINE_SIZE); 931 if (rxq == NULL) { 932 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure"); 933 return -ENOMEM; 934 } 935 936 rxq->mp = mp; 937 rxq->queue_id = queue_idx; 938 rxq->port_id = dev->data->port_id; 939 rxq->shared = &hw->rqd_start[queue_idx]; 940 rxq->hw = hw; 941 rxq->qid1 = queue_idx; 942 rxq->qid2 = queue_idx + hw->num_rx_queues; 943 rxq->stopped = TRUE; 944 945 ring0 = &rxq->cmd_ring[0]; 946 ring1 = &rxq->cmd_ring[1]; 947 comp_ring = &rxq->comp_ring; 948 949 /* Rx vmxnet rings length should be between 256-4096 */ 950 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) { 951 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256"); 952 return -EINVAL; 953 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) { 954 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096"); 955 return -EINVAL; 956 } else { 957 ring0->size = nb_desc; 958 ring0->size &= ~VMXNET3_RING_SIZE_MASK; 959 ring1->size = ring0->size; 960 } 961 962 comp_ring->size = ring0->size + ring1->size; 963 964 /* Rx vmxnet rings structure initialization */ 965 ring0->next2fill = 0; 966 ring1->next2fill = 0; 967 ring0->next2comp = 0; 968 ring1->next2comp = 0; 969 ring0->gen = VMXNET3_INIT_GEN; 970 ring1->gen = VMXNET3_INIT_GEN; 971 comp_ring->next2proc = 0; 972 comp_ring->gen = VMXNET3_INIT_GEN; 973 974 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 975 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 976 977 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id); 978 if (mz == NULL) { 979 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 980 return -ENOMEM; 981 } 982 memset(mz->addr, 0, mz->len); 983 984 /* cmd_ring0 initialization */ 985 ring0->base = mz->addr; 986 ring0->basePA = mz->phys_addr; 987 988 /* cmd_ring1 initialization */ 989 ring1->base = ring0->base + ring0->size; 990 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size; 991 992 /* comp_ring initialization */ 993 comp_ring->base = ring1->base + ring1->size; 994 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) * 995 ring1->size; 996 997 /* cmd_ring0-cmd_ring1 buf_info allocation */ 998 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) { 999 1000 ring = &rxq->cmd_ring[i]; 1001 ring->rid = i; 1002 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i); 1003 1004 ring->buf_info = rte_zmalloc(mem_name, 1005 ring->size * sizeof(vmxnet3_buf_info_t), 1006 RTE_CACHE_LINE_SIZE); 1007 if (ring->buf_info == NULL) { 1008 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure"); 1009 return -ENOMEM; 1010 } 1011 } 1012 1013 /* Update the data portion with rxq */ 1014 dev->data->rx_queues[queue_idx] = rxq; 1015 1016 return 0; 1017 } 1018 1019 /* 1020 * Initializes Receive Unit 1021 * Load mbufs in rx queue in advance 1022 */ 1023 int 1024 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev) 1025 { 1026 struct vmxnet3_hw *hw = dev->data->dev_private; 1027 1028 int i, ret; 1029 uint8_t j; 1030 1031 PMD_INIT_FUNC_TRACE(); 1032 1033 for (i = 0; i < hw->num_rx_queues; i++) { 1034 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; 1035 1036 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) { 1037 /* Passing 0 as alloc_num will allocate full ring */ 1038 ret = vmxnet3_post_rx_bufs(rxq, j); 1039 if (ret <= 0) { 1040 PMD_INIT_LOG(ERR, 1041 "ERROR: Posting Rxq: %d buffers ring: %d", 1042 i, j); 1043 return -ret; 1044 } 1045 /* 1046 * Updating device with the index:next2fill to fill the 1047 * mbufs for coming packets. 1048 */ 1049 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 1050 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN), 1051 rxq->cmd_ring[j].next2fill); 1052 } 1053 } 1054 rxq->stopped = FALSE; 1055 rxq->start_seg = NULL; 1056 } 1057 1058 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1059 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 1060 1061 txq->stopped = FALSE; 1062 } 1063 1064 return 0; 1065 } 1066 1067 static uint8_t rss_intel_key[40] = { 1068 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 1069 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, 1070 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, 1071 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 1072 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA, 1073 }; 1074 1075 /* 1076 * Configure RSS feature 1077 */ 1078 int 1079 vmxnet3_rss_configure(struct rte_eth_dev *dev) 1080 { 1081 struct vmxnet3_hw *hw = dev->data->dev_private; 1082 struct VMXNET3_RSSConf *dev_rss_conf; 1083 struct rte_eth_rss_conf *port_rss_conf; 1084 uint64_t rss_hf; 1085 uint8_t i, j; 1086 1087 PMD_INIT_FUNC_TRACE(); 1088 1089 dev_rss_conf = hw->rss_conf; 1090 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; 1091 1092 /* loading hashFunc */ 1093 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; 1094 /* loading hashKeySize */ 1095 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE; 1096 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ 1097 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4); 1098 1099 if (port_rss_conf->rss_key == NULL) { 1100 /* Default hash key */ 1101 port_rss_conf->rss_key = rss_intel_key; 1102 } 1103 1104 /* loading hashKey */ 1105 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, 1106 dev_rss_conf->hashKeySize); 1107 1108 /* loading indTable */ 1109 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) { 1110 if (j == dev->data->nb_rx_queues) 1111 j = 0; 1112 dev_rss_conf->indTable[i] = j; 1113 } 1114 1115 /* loading hashType */ 1116 dev_rss_conf->hashType = 0; 1117 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL; 1118 if (rss_hf & ETH_RSS_IPV4) 1119 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4; 1120 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) 1121 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4; 1122 if (rss_hf & ETH_RSS_IPV6) 1123 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6; 1124 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) 1125 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6; 1126 1127 return VMXNET3_SUCCESS; 1128 } 1129