1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/queue.h> 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <errno.h> 40 #include <stdint.h> 41 #include <stdarg.h> 42 #include <unistd.h> 43 #include <inttypes.h> 44 45 #include <rte_byteorder.h> 46 #include <rte_common.h> 47 #include <rte_cycles.h> 48 #include <rte_log.h> 49 #include <rte_debug.h> 50 #include <rte_interrupts.h> 51 #include <rte_pci.h> 52 #include <rte_memory.h> 53 #include <rte_memzone.h> 54 #include <rte_launch.h> 55 #include <rte_eal.h> 56 #include <rte_per_lcore.h> 57 #include <rte_lcore.h> 58 #include <rte_atomic.h> 59 #include <rte_branch_prediction.h> 60 #include <rte_ring.h> 61 #include <rte_mempool.h> 62 #include <rte_malloc.h> 63 #include <rte_mbuf.h> 64 #include <rte_ether.h> 65 #include <rte_ethdev.h> 66 #include <rte_prefetch.h> 67 #include <rte_ip.h> 68 #include <rte_udp.h> 69 #include <rte_tcp.h> 70 #include <rte_sctp.h> 71 #include <rte_string_fns.h> 72 #include <rte_errno.h> 73 74 #include "base/vmxnet3_defs.h" 75 #include "vmxnet3_ring.h" 76 77 #include "vmxnet3_logs.h" 78 #include "vmxnet3_ethdev.h" 79 80 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; 81 82 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); 83 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *); 84 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 85 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *); 86 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *); 87 #endif 88 89 static struct rte_mbuf * 90 rte_rxmbuf_alloc(struct rte_mempool *mp) 91 { 92 struct rte_mbuf *m; 93 94 m = __rte_mbuf_raw_alloc(mp); 95 __rte_mbuf_sanity_check_raw(m, 0); 96 return m; 97 } 98 99 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 100 static void 101 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq) 102 { 103 uint32_t avail = 0; 104 105 if (rxq == NULL) 106 return; 107 108 PMD_RX_LOG(DEBUG, 109 "RXQ: cmd0 base : 0x%p cmd1 base : 0x%p comp ring base : 0x%p.", 110 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base); 111 PMD_RX_LOG(DEBUG, 112 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.", 113 (unsigned long)rxq->cmd_ring[0].basePA, 114 (unsigned long)rxq->cmd_ring[1].basePA, 115 (unsigned long)rxq->comp_ring.basePA); 116 117 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]); 118 PMD_RX_LOG(DEBUG, 119 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u", 120 (uint32_t)rxq->cmd_ring[0].size, avail, 121 rxq->comp_ring.next2proc, 122 rxq->cmd_ring[0].size - avail); 123 124 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]); 125 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u", 126 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc, 127 rxq->cmd_ring[1].size - avail); 128 129 } 130 131 static void 132 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq) 133 { 134 uint32_t avail = 0; 135 136 if (txq == NULL) 137 return; 138 139 PMD_TX_LOG(DEBUG, "TXQ: cmd base : 0x%p comp ring base : 0x%p data ring base : 0x%p.", 140 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base); 141 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.", 142 (unsigned long)txq->cmd_ring.basePA, 143 (unsigned long)txq->comp_ring.basePA, 144 (unsigned long)txq->data_ring.basePA); 145 146 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 147 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u", 148 (uint32_t)txq->cmd_ring.size, avail, 149 txq->comp_ring.next2proc, txq->cmd_ring.size - avail); 150 } 151 #endif 152 153 static void 154 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) 155 { 156 while (ring->next2comp != ring->next2fill) { 157 /* No need to worry about tx desc ownership, device is quiesced by now. */ 158 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp; 159 160 if (buf_info->m) { 161 rte_pktmbuf_free(buf_info->m); 162 buf_info->m = NULL; 163 buf_info->bufPA = 0; 164 buf_info->len = 0; 165 } 166 vmxnet3_cmd_ring_adv_next2comp(ring); 167 } 168 } 169 170 static void 171 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring) 172 { 173 vmxnet3_cmd_ring_release_mbufs(ring); 174 rte_free(ring->buf_info); 175 ring->buf_info = NULL; 176 } 177 178 179 void 180 vmxnet3_dev_tx_queue_release(void *txq) 181 { 182 vmxnet3_tx_queue_t *tq = txq; 183 184 if (tq != NULL) { 185 /* Release the cmd_ring */ 186 vmxnet3_cmd_ring_release(&tq->cmd_ring); 187 } 188 } 189 190 void 191 vmxnet3_dev_rx_queue_release(void *rxq) 192 { 193 int i; 194 vmxnet3_rx_queue_t *rq = rxq; 195 196 if (rq != NULL) { 197 /* Release both the cmd_rings */ 198 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 199 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]); 200 } 201 } 202 203 static void 204 vmxnet3_dev_tx_queue_reset(void *txq) 205 { 206 vmxnet3_tx_queue_t *tq = txq; 207 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring; 208 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; 209 struct vmxnet3_data_ring *data_ring = &tq->data_ring; 210 int size; 211 212 if (tq != NULL) { 213 /* Release the cmd_ring mbufs */ 214 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring); 215 } 216 217 /* Tx vmxnet rings structure initialization*/ 218 ring->next2fill = 0; 219 ring->next2comp = 0; 220 ring->gen = VMXNET3_INIT_GEN; 221 comp_ring->next2proc = 0; 222 comp_ring->gen = VMXNET3_INIT_GEN; 223 224 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 225 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 226 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size; 227 228 memset(ring->base, 0, size); 229 } 230 231 static void 232 vmxnet3_dev_rx_queue_reset(void *rxq) 233 { 234 int i; 235 vmxnet3_rx_queue_t *rq = rxq; 236 struct vmxnet3_cmd_ring *ring0, *ring1; 237 struct vmxnet3_comp_ring *comp_ring; 238 int size; 239 240 if (rq != NULL) { 241 /* Release both the cmd_rings mbufs */ 242 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 243 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]); 244 } 245 246 ring0 = &rq->cmd_ring[0]; 247 ring1 = &rq->cmd_ring[1]; 248 comp_ring = &rq->comp_ring; 249 250 /* Rx vmxnet rings structure initialization */ 251 ring0->next2fill = 0; 252 ring1->next2fill = 0; 253 ring0->next2comp = 0; 254 ring1->next2comp = 0; 255 ring0->gen = VMXNET3_INIT_GEN; 256 ring1->gen = VMXNET3_INIT_GEN; 257 comp_ring->next2proc = 0; 258 comp_ring->gen = VMXNET3_INIT_GEN; 259 260 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 261 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 262 263 memset(ring0->base, 0, size); 264 } 265 266 void 267 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev) 268 { 269 unsigned i; 270 271 PMD_INIT_FUNC_TRACE(); 272 273 for (i = 0; i < dev->data->nb_tx_queues; i++) { 274 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 275 276 if (txq != NULL) { 277 txq->stopped = TRUE; 278 vmxnet3_dev_tx_queue_reset(txq); 279 } 280 } 281 282 for (i = 0; i < dev->data->nb_rx_queues; i++) { 283 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i]; 284 285 if (rxq != NULL) { 286 rxq->stopped = TRUE; 287 vmxnet3_dev_rx_queue_reset(rxq); 288 } 289 } 290 } 291 292 static int 293 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq) 294 { 295 int completed = 0; 296 struct rte_mbuf *mbuf; 297 298 /* Release cmd_ring descriptor and free mbuf */ 299 VMXNET3_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1); 300 301 mbuf = txq->cmd_ring.buf_info[eop_idx].m; 302 if (mbuf == NULL) 303 rte_panic("EOP desc does not point to a valid mbuf"); 304 rte_pktmbuf_free(mbuf); 305 306 txq->cmd_ring.buf_info[eop_idx].m = NULL; 307 308 while (txq->cmd_ring.next2comp != eop_idx) { 309 /* no out-of-order completion */ 310 VMXNET3_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0); 311 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 312 completed++; 313 } 314 315 /* Mark the txd for which tcd was generated as completed */ 316 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 317 318 return completed + 1; 319 } 320 321 static void 322 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) 323 { 324 int completed = 0; 325 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring; 326 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *) 327 (comp_ring->base + comp_ring->next2proc); 328 329 while (tcd->gen == comp_ring->gen) { 330 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq); 331 332 vmxnet3_comp_ring_adv_next2proc(comp_ring); 333 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base + 334 comp_ring->next2proc); 335 } 336 337 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed); 338 } 339 340 uint16_t 341 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 342 uint16_t nb_pkts) 343 { 344 uint16_t nb_tx; 345 vmxnet3_tx_queue_t *txq = tx_queue; 346 struct vmxnet3_hw *hw = txq->hw; 347 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl; 348 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred); 349 350 if (unlikely(txq->stopped)) { 351 PMD_TX_LOG(DEBUG, "Tx queue is stopped."); 352 return 0; 353 } 354 355 /* Free up the comp_descriptors aggressively */ 356 vmxnet3_tq_tx_complete(txq); 357 358 nb_tx = 0; 359 while (nb_tx < nb_pkts) { 360 Vmxnet3_GenericDesc *gdesc; 361 vmxnet3_buf_info_t *tbi; 362 uint32_t first2fill, avail, dw2; 363 struct rte_mbuf *txm = tx_pkts[nb_tx]; 364 struct rte_mbuf *m_seg = txm; 365 int copy_size = 0; 366 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0; 367 /* # of descriptors needed for a packet. */ 368 unsigned count = txm->nb_segs; 369 370 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 371 if (count > avail) { 372 /* Is command ring full? */ 373 if (unlikely(avail == 0)) { 374 PMD_TX_LOG(DEBUG, "No free ring descriptors"); 375 txq->stats.tx_ring_full++; 376 txq->stats.drop_total += (nb_pkts - nb_tx); 377 break; 378 } 379 380 /* Command ring is not full but cannot handle the 381 * multi-segmented packet. Let's try the next packet 382 * in this case. 383 */ 384 PMD_TX_LOG(DEBUG, "Running out of ring descriptors " 385 "(avail %d needed %d)", avail, count); 386 txq->stats.drop_total++; 387 if (tso) 388 txq->stats.drop_tso++; 389 rte_pktmbuf_free(txm); 390 nb_tx++; 391 continue; 392 } 393 394 /* Drop non-TSO packet that is excessively fragmented */ 395 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) { 396 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx " 397 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT); 398 txq->stats.drop_too_many_segs++; 399 txq->stats.drop_total++; 400 rte_pktmbuf_free(txm); 401 nb_tx++; 402 continue; 403 } 404 405 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { 406 struct Vmxnet3_TxDataDesc *tdd; 407 408 tdd = txq->data_ring.base + txq->cmd_ring.next2fill; 409 copy_size = rte_pktmbuf_pkt_len(txm); 410 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size); 411 } 412 413 /* use the previous gen bit for the SOP desc */ 414 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; 415 first2fill = txq->cmd_ring.next2fill; 416 do { 417 /* Remember the transmit buffer for cleanup */ 418 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill; 419 420 /* NB: the following assumes that VMXNET3 maximum 421 * transmit buffer size (16K) is greater than 422 * maximum size of mbuf segment size. 423 */ 424 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; 425 if (copy_size) 426 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA + 427 txq->cmd_ring.next2fill * 428 sizeof(struct Vmxnet3_TxDataDesc)); 429 else 430 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); 431 432 gdesc->dword[2] = dw2 | m_seg->data_len; 433 gdesc->dword[3] = 0; 434 435 /* move to the next2fill descriptor */ 436 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring); 437 438 /* use the right gen for non-SOP desc */ 439 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT; 440 } while ((m_seg = m_seg->next) != NULL); 441 442 /* set the last buf_info for the pkt */ 443 tbi->m = txm; 444 /* Update the EOP descriptor */ 445 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ; 446 447 /* Add VLAN tag if present */ 448 gdesc = txq->cmd_ring.base + first2fill; 449 if (txm->ol_flags & PKT_TX_VLAN_PKT) { 450 gdesc->txd.ti = 1; 451 gdesc->txd.tci = txm->vlan_tci; 452 } 453 454 if (tso) { 455 uint16_t mss = txm->tso_segsz; 456 457 VMXNET3_ASSERT(mss > 0); 458 459 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len; 460 gdesc->txd.om = VMXNET3_OM_TSO; 461 gdesc->txd.msscof = mss; 462 463 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss; 464 } else if (txm->ol_flags & PKT_TX_L4_MASK) { 465 gdesc->txd.om = VMXNET3_OM_CSUM; 466 gdesc->txd.hlen = txm->l2_len + txm->l3_len; 467 468 switch (txm->ol_flags & PKT_TX_L4_MASK) { 469 case PKT_TX_TCP_CKSUM: 470 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum); 471 break; 472 case PKT_TX_UDP_CKSUM: 473 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum); 474 break; 475 default: 476 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx", 477 txm->ol_flags & PKT_TX_L4_MASK); 478 abort(); 479 } 480 deferred++; 481 } else { 482 gdesc->txd.hlen = 0; 483 gdesc->txd.om = VMXNET3_OM_NONE; 484 gdesc->txd.msscof = 0; 485 deferred++; 486 } 487 488 /* flip the GEN bit on the SOP */ 489 rte_compiler_barrier(); 490 gdesc->dword[2] ^= VMXNET3_TXD_GEN; 491 492 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred); 493 nb_tx++; 494 } 495 496 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold)); 497 498 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) { 499 txq_ctrl->txNumDeferred = 0; 500 /* Notify vSwitch that packets are available. */ 501 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN), 502 txq->cmd_ring.next2fill); 503 } 504 505 return nb_tx; 506 } 507 508 /* 509 * Allocates mbufs and clusters. Post rx descriptors with buffer details 510 * so that device can receive packets in those buffers. 511 * Ring layout: 512 * Among the two rings, 1st ring contains buffers of type 0 and type1. 513 * bufs_per_pkt is set such that for non-LRO cases all the buffers required 514 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1). 515 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used 516 * only for LRO. 517 * 518 */ 519 static int 520 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) 521 { 522 int err = 0; 523 uint32_t i = 0, val = 0; 524 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; 525 526 if (ring_id == 0) { 527 /* Usually: One HEAD type buf per packet 528 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ? 529 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD; 530 */ 531 532 /* We use single packet buffer so all heads here */ 533 val = VMXNET3_RXD_BTYPE_HEAD; 534 } else { 535 /* All BODY type buffers for 2nd ring */ 536 val = VMXNET3_RXD_BTYPE_BODY; 537 } 538 539 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) { 540 struct Vmxnet3_RxDesc *rxd; 541 struct rte_mbuf *mbuf; 542 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill]; 543 544 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); 545 546 /* Allocate blank mbuf for the current Rx Descriptor */ 547 mbuf = rte_rxmbuf_alloc(rxq->mp); 548 if (unlikely(mbuf == NULL)) { 549 PMD_RX_LOG(ERR, "Error allocating mbuf"); 550 rxq->stats.rx_buf_alloc_failure++; 551 err = ENOMEM; 552 break; 553 } 554 555 /* 556 * Load mbuf pointer into buf_info[ring_size] 557 * buf_info structure is equivalent to cookie for virtio-virtqueue 558 */ 559 buf_info->m = mbuf; 560 buf_info->len = (uint16_t)(mbuf->buf_len - 561 RTE_PKTMBUF_HEADROOM); 562 buf_info->bufPA = 563 rte_mbuf_data_dma_addr_default(mbuf); 564 565 /* Load Rx Descriptor with the buffer's GPA */ 566 rxd->addr = buf_info->bufPA; 567 568 /* After this point rxd->addr MUST not be NULL */ 569 rxd->btype = val; 570 rxd->len = buf_info->len; 571 /* Flip gen bit at the end to change ownership */ 572 rxd->gen = ring->gen; 573 574 vmxnet3_cmd_ring_adv_next2fill(ring); 575 i++; 576 } 577 578 /* Return error only if no buffers are posted at present */ 579 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1)) 580 return -err; 581 else 582 return i; 583 } 584 585 586 /* Receive side checksum and other offloads */ 587 static void 588 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm) 589 { 590 /* Check for hardware stripped VLAN tag */ 591 if (rcd->ts) { 592 rxm->ol_flags |= PKT_RX_VLAN_PKT; 593 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci); 594 } 595 596 /* Check for RSS */ 597 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) { 598 rxm->ol_flags |= PKT_RX_RSS_HASH; 599 rxm->hash.rss = rcd->rssHash; 600 } 601 602 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */ 603 if (rcd->v4) { 604 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *); 605 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1); 606 607 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr)) 608 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT; 609 else 610 rxm->packet_type = RTE_PTYPE_L3_IPV4; 611 612 if (!rcd->cnc) { 613 if (!rcd->ipc) 614 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; 615 616 if ((rcd->tcp || rcd->udp) && !rcd->tuc) 617 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; 618 } 619 } 620 } 621 622 /* 623 * Process the Rx Completion Ring of given vmxnet3_rx_queue 624 * for nb_pkts burst and return the number of packets received 625 */ 626 uint16_t 627 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 628 { 629 uint16_t nb_rx; 630 uint32_t nb_rxd, idx; 631 uint8_t ring_idx; 632 vmxnet3_rx_queue_t *rxq; 633 Vmxnet3_RxCompDesc *rcd; 634 vmxnet3_buf_info_t *rbi; 635 Vmxnet3_RxDesc *rxd; 636 struct rte_mbuf *rxm = NULL; 637 struct vmxnet3_hw *hw; 638 639 nb_rx = 0; 640 ring_idx = 0; 641 nb_rxd = 0; 642 idx = 0; 643 644 rxq = rx_queue; 645 hw = rxq->hw; 646 647 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 648 649 if (unlikely(rxq->stopped)) { 650 PMD_RX_LOG(DEBUG, "Rx queue is stopped."); 651 return 0; 652 } 653 654 while (rcd->gen == rxq->comp_ring.gen) { 655 if (nb_rx >= nb_pkts) 656 break; 657 658 idx = rcd->rxdIdx; 659 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1); 660 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; 661 rbi = rxq->cmd_ring[ring_idx].buf_info + idx; 662 663 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx); 664 665 VMXNET3_ASSERT(rcd->len <= rxd->len); 666 VMXNET3_ASSERT(rbi->m); 667 668 /* Get the packet buffer pointer from buf_info */ 669 rxm = rbi->m; 670 671 /* Clear descriptor associated buf_info to be reused */ 672 rbi->m = NULL; 673 rbi->bufPA = 0; 674 675 /* Update the index that we received a packet */ 676 rxq->cmd_ring[ring_idx].next2comp = idx; 677 678 /* For RCD with EOP set, check if there is frame error */ 679 if (unlikely(rcd->eop && rcd->err)) { 680 rxq->stats.drop_total++; 681 rxq->stats.drop_err++; 682 683 if (!rcd->fcs) { 684 rxq->stats.drop_fcs++; 685 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err."); 686 } 687 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d", 688 (int)(rcd - (struct Vmxnet3_RxCompDesc *) 689 rxq->comp_ring.base), rcd->rxdIdx); 690 rte_pktmbuf_free_seg(rxm); 691 goto rcd_done; 692 } 693 694 695 /* Initialize newly received packet buffer */ 696 rxm->port = rxq->port_id; 697 rxm->nb_segs = 1; 698 rxm->next = NULL; 699 rxm->pkt_len = (uint16_t)rcd->len; 700 rxm->data_len = (uint16_t)rcd->len; 701 rxm->data_off = RTE_PKTMBUF_HEADROOM; 702 rxm->ol_flags = 0; 703 rxm->vlan_tci = 0; 704 705 /* 706 * If this is the first buffer of the received packet, 707 * set the pointer to the first mbuf of the packet 708 * Otherwise, update the total length and the number of segments 709 * of the current scattered packet, and update the pointer to 710 * the last mbuf of the current packet. 711 */ 712 if (rcd->sop) { 713 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD); 714 715 if (unlikely(rcd->len == 0)) { 716 VMXNET3_ASSERT(rcd->eop); 717 718 PMD_RX_LOG(DEBUG, 719 "Rx buf was skipped. rxring[%d][%d])", 720 ring_idx, idx); 721 rte_pktmbuf_free_seg(rxm); 722 goto rcd_done; 723 } 724 725 rxq->start_seg = rxm; 726 vmxnet3_rx_offload(rcd, rxm); 727 } else { 728 struct rte_mbuf *start = rxq->start_seg; 729 730 VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY); 731 732 start->pkt_len += rxm->data_len; 733 start->nb_segs++; 734 735 rxq->last_seg->next = rxm; 736 } 737 rxq->last_seg = rxm; 738 739 if (rcd->eop) { 740 rx_pkts[nb_rx++] = rxq->start_seg; 741 rxq->start_seg = NULL; 742 } 743 744 rcd_done: 745 rxq->cmd_ring[ring_idx].next2comp = idx; 746 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size); 747 748 /* It's time to allocate some new buf and renew descriptors */ 749 vmxnet3_post_rx_bufs(rxq, ring_idx); 750 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 751 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), 752 rxq->cmd_ring[ring_idx].next2fill); 753 } 754 755 /* Advance to the next descriptor in comp_ring */ 756 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring); 757 758 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 759 nb_rxd++; 760 if (nb_rxd > rxq->cmd_ring[0].size) { 761 PMD_RX_LOG(ERR, 762 "Used up quota of receiving packets," 763 " relinquish control."); 764 break; 765 } 766 } 767 768 return nb_rx; 769 } 770 771 /* 772 * Create memzone for device rings. malloc can't be used as the physical address is 773 * needed. If the memzone is already created, then this function returns a ptr 774 * to the old one. 775 */ 776 static const struct rte_memzone * 777 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, 778 uint16_t queue_id, uint32_t ring_size, int socket_id) 779 { 780 char z_name[RTE_MEMZONE_NAMESIZE]; 781 const struct rte_memzone *mz; 782 783 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", 784 dev->driver->pci_drv.name, ring_name, 785 dev->data->port_id, queue_id); 786 787 mz = rte_memzone_lookup(z_name); 788 if (mz) 789 return mz; 790 791 return rte_memzone_reserve_aligned(z_name, ring_size, 792 socket_id, 0, VMXNET3_RING_BA_ALIGN); 793 } 794 795 int 796 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, 797 uint16_t queue_idx, 798 uint16_t nb_desc, 799 unsigned int socket_id, 800 __attribute__((unused)) const struct rte_eth_txconf *tx_conf) 801 { 802 struct vmxnet3_hw *hw = dev->data->dev_private; 803 const struct rte_memzone *mz; 804 struct vmxnet3_tx_queue *txq; 805 struct vmxnet3_cmd_ring *ring; 806 struct vmxnet3_comp_ring *comp_ring; 807 struct vmxnet3_data_ring *data_ring; 808 int size; 809 810 PMD_INIT_FUNC_TRACE(); 811 812 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) != 813 ETH_TXQ_FLAGS_NOXSUMSCTP) { 814 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported"); 815 return -EINVAL; 816 } 817 818 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE); 819 if (txq == NULL) { 820 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure"); 821 return -ENOMEM; 822 } 823 824 txq->queue_id = queue_idx; 825 txq->port_id = dev->data->port_id; 826 txq->shared = &hw->tqd_start[queue_idx]; 827 txq->hw = hw; 828 txq->qid = queue_idx; 829 txq->stopped = TRUE; 830 831 ring = &txq->cmd_ring; 832 comp_ring = &txq->comp_ring; 833 data_ring = &txq->data_ring; 834 835 /* Tx vmxnet ring length should be between 512-4096 */ 836 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) { 837 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u", 838 VMXNET3_DEF_TX_RING_SIZE); 839 return -EINVAL; 840 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) { 841 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u", 842 VMXNET3_TX_RING_MAX_SIZE); 843 return -EINVAL; 844 } else { 845 ring->size = nb_desc; 846 ring->size &= ~VMXNET3_RING_SIZE_MASK; 847 } 848 comp_ring->size = data_ring->size = ring->size; 849 850 /* Tx vmxnet rings structure initialization*/ 851 ring->next2fill = 0; 852 ring->next2comp = 0; 853 ring->gen = VMXNET3_INIT_GEN; 854 comp_ring->next2proc = 0; 855 comp_ring->gen = VMXNET3_INIT_GEN; 856 857 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 858 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 859 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size; 860 861 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id); 862 if (mz == NULL) { 863 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 864 return -ENOMEM; 865 } 866 memset(mz->addr, 0, mz->len); 867 868 /* cmd_ring initialization */ 869 ring->base = mz->addr; 870 ring->basePA = mz->phys_addr; 871 872 /* comp_ring initialization */ 873 comp_ring->base = ring->base + ring->size; 874 comp_ring->basePA = ring->basePA + 875 (sizeof(struct Vmxnet3_TxDesc) * ring->size); 876 877 /* data_ring initialization */ 878 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size); 879 data_ring->basePA = comp_ring->basePA + 880 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size); 881 882 /* cmd_ring0 buf_info allocation */ 883 ring->buf_info = rte_zmalloc("tx_ring_buf_info", 884 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); 885 if (ring->buf_info == NULL) { 886 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure"); 887 return -ENOMEM; 888 } 889 890 /* Update the data portion with txq */ 891 dev->data->tx_queues[queue_idx] = txq; 892 893 return 0; 894 } 895 896 int 897 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, 898 uint16_t queue_idx, 899 uint16_t nb_desc, 900 unsigned int socket_id, 901 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf, 902 struct rte_mempool *mp) 903 { 904 const struct rte_memzone *mz; 905 struct vmxnet3_rx_queue *rxq; 906 struct vmxnet3_hw *hw = dev->data->dev_private; 907 struct vmxnet3_cmd_ring *ring0, *ring1, *ring; 908 struct vmxnet3_comp_ring *comp_ring; 909 int size; 910 uint8_t i; 911 char mem_name[32]; 912 913 PMD_INIT_FUNC_TRACE(); 914 915 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE); 916 if (rxq == NULL) { 917 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure"); 918 return -ENOMEM; 919 } 920 921 rxq->mp = mp; 922 rxq->queue_id = queue_idx; 923 rxq->port_id = dev->data->port_id; 924 rxq->shared = &hw->rqd_start[queue_idx]; 925 rxq->hw = hw; 926 rxq->qid1 = queue_idx; 927 rxq->qid2 = queue_idx + hw->num_rx_queues; 928 rxq->stopped = TRUE; 929 930 ring0 = &rxq->cmd_ring[0]; 931 ring1 = &rxq->cmd_ring[1]; 932 comp_ring = &rxq->comp_ring; 933 934 /* Rx vmxnet rings length should be between 256-4096 */ 935 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) { 936 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256"); 937 return -EINVAL; 938 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) { 939 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096"); 940 return -EINVAL; 941 } else { 942 ring0->size = nb_desc; 943 ring0->size &= ~VMXNET3_RING_SIZE_MASK; 944 ring1->size = ring0->size; 945 } 946 947 comp_ring->size = ring0->size + ring1->size; 948 949 /* Rx vmxnet rings structure initialization */ 950 ring0->next2fill = 0; 951 ring1->next2fill = 0; 952 ring0->next2comp = 0; 953 ring1->next2comp = 0; 954 ring0->gen = VMXNET3_INIT_GEN; 955 ring1->gen = VMXNET3_INIT_GEN; 956 comp_ring->next2proc = 0; 957 comp_ring->gen = VMXNET3_INIT_GEN; 958 959 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 960 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 961 962 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id); 963 if (mz == NULL) { 964 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 965 return -ENOMEM; 966 } 967 memset(mz->addr, 0, mz->len); 968 969 /* cmd_ring0 initialization */ 970 ring0->base = mz->addr; 971 ring0->basePA = mz->phys_addr; 972 973 /* cmd_ring1 initialization */ 974 ring1->base = ring0->base + ring0->size; 975 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size; 976 977 /* comp_ring initialization */ 978 comp_ring->base = ring1->base + ring1->size; 979 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) * 980 ring1->size; 981 982 /* cmd_ring0-cmd_ring1 buf_info allocation */ 983 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) { 984 985 ring = &rxq->cmd_ring[i]; 986 ring->rid = i; 987 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i); 988 989 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); 990 if (ring->buf_info == NULL) { 991 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure"); 992 return -ENOMEM; 993 } 994 } 995 996 /* Update the data portion with rxq */ 997 dev->data->rx_queues[queue_idx] = rxq; 998 999 return 0; 1000 } 1001 1002 /* 1003 * Initializes Receive Unit 1004 * Load mbufs in rx queue in advance 1005 */ 1006 int 1007 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev) 1008 { 1009 struct vmxnet3_hw *hw = dev->data->dev_private; 1010 1011 int i, ret; 1012 uint8_t j; 1013 1014 PMD_INIT_FUNC_TRACE(); 1015 1016 for (i = 0; i < hw->num_rx_queues; i++) { 1017 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; 1018 1019 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) { 1020 /* Passing 0 as alloc_num will allocate full ring */ 1021 ret = vmxnet3_post_rx_bufs(rxq, j); 1022 if (ret <= 0) { 1023 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j); 1024 return -ret; 1025 } 1026 /* Updating device with the index:next2fill to fill the mbufs for coming packets */ 1027 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 1028 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN), 1029 rxq->cmd_ring[j].next2fill); 1030 } 1031 } 1032 rxq->stopped = FALSE; 1033 rxq->start_seg = NULL; 1034 } 1035 1036 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1037 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 1038 1039 txq->stopped = FALSE; 1040 } 1041 1042 return 0; 1043 } 1044 1045 static uint8_t rss_intel_key[40] = { 1046 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 1047 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, 1048 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, 1049 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 1050 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA, 1051 }; 1052 1053 /* 1054 * Configure RSS feature 1055 */ 1056 int 1057 vmxnet3_rss_configure(struct rte_eth_dev *dev) 1058 { 1059 struct vmxnet3_hw *hw = dev->data->dev_private; 1060 struct VMXNET3_RSSConf *dev_rss_conf; 1061 struct rte_eth_rss_conf *port_rss_conf; 1062 uint64_t rss_hf; 1063 uint8_t i, j; 1064 1065 PMD_INIT_FUNC_TRACE(); 1066 1067 dev_rss_conf = hw->rss_conf; 1068 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; 1069 1070 /* loading hashFunc */ 1071 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; 1072 /* loading hashKeySize */ 1073 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE; 1074 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ 1075 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4); 1076 1077 if (port_rss_conf->rss_key == NULL) { 1078 /* Default hash key */ 1079 port_rss_conf->rss_key = rss_intel_key; 1080 } 1081 1082 /* loading hashKey */ 1083 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize); 1084 1085 /* loading indTable */ 1086 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) { 1087 if (j == dev->data->nb_rx_queues) 1088 j = 0; 1089 dev_rss_conf->indTable[i] = j; 1090 } 1091 1092 /* loading hashType */ 1093 dev_rss_conf->hashType = 0; 1094 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL; 1095 if (rss_hf & ETH_RSS_IPV4) 1096 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4; 1097 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) 1098 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4; 1099 if (rss_hf & ETH_RSS_IPV6) 1100 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6; 1101 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) 1102 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6; 1103 1104 return VMXNET3_SUCCESS; 1105 } 1106