1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/queue.h> 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <errno.h> 40 #include <stdint.h> 41 #include <stdarg.h> 42 #include <unistd.h> 43 #include <inttypes.h> 44 45 #include <rte_byteorder.h> 46 #include <rte_common.h> 47 #include <rte_cycles.h> 48 #include <rte_log.h> 49 #include <rte_debug.h> 50 #include <rte_interrupts.h> 51 #include <rte_pci.h> 52 #include <rte_memory.h> 53 #include <rte_memzone.h> 54 #include <rte_launch.h> 55 #include <rte_eal.h> 56 #include <rte_per_lcore.h> 57 #include <rte_lcore.h> 58 #include <rte_atomic.h> 59 #include <rte_branch_prediction.h> 60 #include <rte_mempool.h> 61 #include <rte_malloc.h> 62 #include <rte_mbuf.h> 63 #include <rte_ether.h> 64 #include <rte_ethdev.h> 65 #include <rte_prefetch.h> 66 #include <rte_ip.h> 67 #include <rte_udp.h> 68 #include <rte_tcp.h> 69 #include <rte_sctp.h> 70 #include <rte_string_fns.h> 71 #include <rte_errno.h> 72 73 #include "base/vmxnet3_defs.h" 74 #include "vmxnet3_ring.h" 75 76 #include "vmxnet3_logs.h" 77 #include "vmxnet3_ethdev.h" 78 79 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; 80 81 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); 82 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *); 83 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 84 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *); 85 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *); 86 #endif 87 88 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED 89 static void 90 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq) 91 { 92 uint32_t avail = 0; 93 94 if (rxq == NULL) 95 return; 96 97 PMD_RX_LOG(DEBUG, 98 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.", 99 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base); 100 PMD_RX_LOG(DEBUG, 101 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.", 102 (unsigned long)rxq->cmd_ring[0].basePA, 103 (unsigned long)rxq->cmd_ring[1].basePA, 104 (unsigned long)rxq->comp_ring.basePA); 105 106 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]); 107 PMD_RX_LOG(DEBUG, 108 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u", 109 (uint32_t)rxq->cmd_ring[0].size, avail, 110 rxq->comp_ring.next2proc, 111 rxq->cmd_ring[0].size - avail); 112 113 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]); 114 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u", 115 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc, 116 rxq->cmd_ring[1].size - avail); 117 118 } 119 120 static void 121 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq) 122 { 123 uint32_t avail = 0; 124 125 if (txq == NULL) 126 return; 127 128 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.", 129 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base); 130 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.", 131 (unsigned long)txq->cmd_ring.basePA, 132 (unsigned long)txq->comp_ring.basePA, 133 (unsigned long)txq->data_ring.basePA); 134 135 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 136 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u", 137 (uint32_t)txq->cmd_ring.size, avail, 138 txq->comp_ring.next2proc, txq->cmd_ring.size - avail); 139 } 140 #endif 141 142 static void 143 vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) 144 { 145 while (ring->next2comp != ring->next2fill) { 146 /* No need to worry about tx desc ownership, device is quiesced by now. */ 147 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp; 148 149 if (buf_info->m) { 150 rte_pktmbuf_free(buf_info->m); 151 buf_info->m = NULL; 152 buf_info->bufPA = 0; 153 buf_info->len = 0; 154 } 155 vmxnet3_cmd_ring_adv_next2comp(ring); 156 } 157 } 158 159 static void 160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring) 161 { 162 vmxnet3_cmd_ring_release_mbufs(ring); 163 rte_free(ring->buf_info); 164 ring->buf_info = NULL; 165 } 166 167 168 void 169 vmxnet3_dev_tx_queue_release(void *txq) 170 { 171 vmxnet3_tx_queue_t *tq = txq; 172 173 if (tq != NULL) { 174 /* Release the cmd_ring */ 175 vmxnet3_cmd_ring_release(&tq->cmd_ring); 176 } 177 } 178 179 void 180 vmxnet3_dev_rx_queue_release(void *rxq) 181 { 182 int i; 183 vmxnet3_rx_queue_t *rq = rxq; 184 185 if (rq != NULL) { 186 /* Release both the cmd_rings */ 187 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 188 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]); 189 } 190 } 191 192 static void 193 vmxnet3_dev_tx_queue_reset(void *txq) 194 { 195 vmxnet3_tx_queue_t *tq = txq; 196 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring; 197 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring; 198 struct vmxnet3_data_ring *data_ring = &tq->data_ring; 199 int size; 200 201 if (tq != NULL) { 202 /* Release the cmd_ring mbufs */ 203 vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring); 204 } 205 206 /* Tx vmxnet rings structure initialization*/ 207 ring->next2fill = 0; 208 ring->next2comp = 0; 209 ring->gen = VMXNET3_INIT_GEN; 210 comp_ring->next2proc = 0; 211 comp_ring->gen = VMXNET3_INIT_GEN; 212 213 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 214 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 215 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size; 216 217 memset(ring->base, 0, size); 218 } 219 220 static void 221 vmxnet3_dev_rx_queue_reset(void *rxq) 222 { 223 int i; 224 vmxnet3_rx_queue_t *rq = rxq; 225 struct vmxnet3_cmd_ring *ring0, *ring1; 226 struct vmxnet3_comp_ring *comp_ring; 227 int size; 228 229 if (rq != NULL) { 230 /* Release both the cmd_rings mbufs */ 231 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) 232 vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]); 233 } 234 235 ring0 = &rq->cmd_ring[0]; 236 ring1 = &rq->cmd_ring[1]; 237 comp_ring = &rq->comp_ring; 238 239 /* Rx vmxnet rings structure initialization */ 240 ring0->next2fill = 0; 241 ring1->next2fill = 0; 242 ring0->next2comp = 0; 243 ring1->next2comp = 0; 244 ring0->gen = VMXNET3_INIT_GEN; 245 ring1->gen = VMXNET3_INIT_GEN; 246 comp_ring->next2proc = 0; 247 comp_ring->gen = VMXNET3_INIT_GEN; 248 249 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 250 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 251 252 memset(ring0->base, 0, size); 253 } 254 255 void 256 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev) 257 { 258 unsigned i; 259 260 PMD_INIT_FUNC_TRACE(); 261 262 for (i = 0; i < dev->data->nb_tx_queues; i++) { 263 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 264 265 if (txq != NULL) { 266 txq->stopped = TRUE; 267 vmxnet3_dev_tx_queue_reset(txq); 268 } 269 } 270 271 for (i = 0; i < dev->data->nb_rx_queues; i++) { 272 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i]; 273 274 if (rxq != NULL) { 275 rxq->stopped = TRUE; 276 vmxnet3_dev_rx_queue_reset(rxq); 277 } 278 } 279 } 280 281 static int 282 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq) 283 { 284 int completed = 0; 285 struct rte_mbuf *mbuf; 286 287 /* Release cmd_ring descriptor and free mbuf */ 288 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1); 289 290 mbuf = txq->cmd_ring.buf_info[eop_idx].m; 291 if (mbuf == NULL) 292 rte_panic("EOP desc does not point to a valid mbuf"); 293 rte_pktmbuf_free(mbuf); 294 295 txq->cmd_ring.buf_info[eop_idx].m = NULL; 296 297 while (txq->cmd_ring.next2comp != eop_idx) { 298 /* no out-of-order completion */ 299 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0); 300 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 301 completed++; 302 } 303 304 /* Mark the txd for which tcd was generated as completed */ 305 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring); 306 307 return completed + 1; 308 } 309 310 static void 311 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) 312 { 313 int completed = 0; 314 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring; 315 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *) 316 (comp_ring->base + comp_ring->next2proc); 317 318 while (tcd->gen == comp_ring->gen) { 319 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq); 320 321 vmxnet3_comp_ring_adv_next2proc(comp_ring); 322 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base + 323 comp_ring->next2proc); 324 } 325 326 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed); 327 } 328 329 uint16_t 330 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 331 uint16_t nb_pkts) 332 { 333 uint16_t nb_tx; 334 vmxnet3_tx_queue_t *txq = tx_queue; 335 struct vmxnet3_hw *hw = txq->hw; 336 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl; 337 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred); 338 339 if (unlikely(txq->stopped)) { 340 PMD_TX_LOG(DEBUG, "Tx queue is stopped."); 341 return 0; 342 } 343 344 /* Free up the comp_descriptors aggressively */ 345 vmxnet3_tq_tx_complete(txq); 346 347 nb_tx = 0; 348 while (nb_tx < nb_pkts) { 349 Vmxnet3_GenericDesc *gdesc; 350 vmxnet3_buf_info_t *tbi; 351 uint32_t first2fill, avail, dw2; 352 struct rte_mbuf *txm = tx_pkts[nb_tx]; 353 struct rte_mbuf *m_seg = txm; 354 int copy_size = 0; 355 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0; 356 /* # of descriptors needed for a packet. */ 357 unsigned count = txm->nb_segs; 358 359 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring); 360 if (count > avail) { 361 /* Is command ring full? */ 362 if (unlikely(avail == 0)) { 363 PMD_TX_LOG(DEBUG, "No free ring descriptors"); 364 txq->stats.tx_ring_full++; 365 txq->stats.drop_total += (nb_pkts - nb_tx); 366 break; 367 } 368 369 /* Command ring is not full but cannot handle the 370 * multi-segmented packet. Let's try the next packet 371 * in this case. 372 */ 373 PMD_TX_LOG(DEBUG, "Running out of ring descriptors " 374 "(avail %d needed %d)", avail, count); 375 txq->stats.drop_total++; 376 if (tso) 377 txq->stats.drop_tso++; 378 rte_pktmbuf_free(txm); 379 nb_tx++; 380 continue; 381 } 382 383 /* Drop non-TSO packet that is excessively fragmented */ 384 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) { 385 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx " 386 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT); 387 txq->stats.drop_too_many_segs++; 388 txq->stats.drop_total++; 389 rte_pktmbuf_free(txm); 390 nb_tx++; 391 continue; 392 } 393 394 if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { 395 struct Vmxnet3_TxDataDesc *tdd; 396 397 tdd = txq->data_ring.base + txq->cmd_ring.next2fill; 398 copy_size = rte_pktmbuf_pkt_len(txm); 399 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size); 400 } 401 402 /* use the previous gen bit for the SOP desc */ 403 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; 404 first2fill = txq->cmd_ring.next2fill; 405 do { 406 /* Remember the transmit buffer for cleanup */ 407 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill; 408 409 /* NB: the following assumes that VMXNET3 maximum 410 * transmit buffer size (16K) is greater than 411 * maximum size of mbuf segment size. 412 */ 413 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; 414 if (copy_size) 415 gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA + 416 txq->cmd_ring.next2fill * 417 sizeof(struct Vmxnet3_TxDataDesc)); 418 else 419 gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); 420 421 gdesc->dword[2] = dw2 | m_seg->data_len; 422 gdesc->dword[3] = 0; 423 424 /* move to the next2fill descriptor */ 425 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring); 426 427 /* use the right gen for non-SOP desc */ 428 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT; 429 } while ((m_seg = m_seg->next) != NULL); 430 431 /* set the last buf_info for the pkt */ 432 tbi->m = txm; 433 /* Update the EOP descriptor */ 434 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ; 435 436 /* Add VLAN tag if present */ 437 gdesc = txq->cmd_ring.base + first2fill; 438 if (txm->ol_flags & PKT_TX_VLAN_PKT) { 439 gdesc->txd.ti = 1; 440 gdesc->txd.tci = txm->vlan_tci; 441 } 442 443 if (tso) { 444 uint16_t mss = txm->tso_segsz; 445 446 RTE_ASSERT(mss > 0); 447 448 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len; 449 gdesc->txd.om = VMXNET3_OM_TSO; 450 gdesc->txd.msscof = mss; 451 452 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss; 453 } else if (txm->ol_flags & PKT_TX_L4_MASK) { 454 gdesc->txd.om = VMXNET3_OM_CSUM; 455 gdesc->txd.hlen = txm->l2_len + txm->l3_len; 456 457 switch (txm->ol_flags & PKT_TX_L4_MASK) { 458 case PKT_TX_TCP_CKSUM: 459 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct tcp_hdr, cksum); 460 break; 461 case PKT_TX_UDP_CKSUM: 462 gdesc->txd.msscof = gdesc->txd.hlen + offsetof(struct udp_hdr, dgram_cksum); 463 break; 464 default: 465 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx", 466 txm->ol_flags & PKT_TX_L4_MASK); 467 abort(); 468 } 469 deferred++; 470 } else { 471 gdesc->txd.hlen = 0; 472 gdesc->txd.om = VMXNET3_OM_NONE; 473 gdesc->txd.msscof = 0; 474 deferred++; 475 } 476 477 /* flip the GEN bit on the SOP */ 478 rte_compiler_barrier(); 479 gdesc->dword[2] ^= VMXNET3_TXD_GEN; 480 481 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred); 482 nb_tx++; 483 } 484 485 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold)); 486 487 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) { 488 txq_ctrl->txNumDeferred = 0; 489 /* Notify vSwitch that packets are available. */ 490 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN), 491 txq->cmd_ring.next2fill); 492 } 493 494 return nb_tx; 495 } 496 497 /* 498 * Allocates mbufs and clusters. Post rx descriptors with buffer details 499 * so that device can receive packets in those buffers. 500 * Ring layout: 501 * Among the two rings, 1st ring contains buffers of type 0 and type1. 502 * bufs_per_pkt is set such that for non-LRO cases all the buffers required 503 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1). 504 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used 505 * only for LRO. 506 * 507 */ 508 static int 509 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) 510 { 511 int err = 0; 512 uint32_t i = 0, val = 0; 513 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; 514 515 if (ring_id == 0) { 516 /* Usually: One HEAD type buf per packet 517 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ? 518 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD; 519 */ 520 521 /* We use single packet buffer so all heads here */ 522 val = VMXNET3_RXD_BTYPE_HEAD; 523 } else { 524 /* All BODY type buffers for 2nd ring */ 525 val = VMXNET3_RXD_BTYPE_BODY; 526 } 527 528 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) { 529 struct Vmxnet3_RxDesc *rxd; 530 struct rte_mbuf *mbuf; 531 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill]; 532 533 rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); 534 535 /* Allocate blank mbuf for the current Rx Descriptor */ 536 mbuf = rte_mbuf_raw_alloc(rxq->mp); 537 if (unlikely(mbuf == NULL)) { 538 PMD_RX_LOG(ERR, "Error allocating mbuf"); 539 rxq->stats.rx_buf_alloc_failure++; 540 err = ENOMEM; 541 break; 542 } 543 544 /* 545 * Load mbuf pointer into buf_info[ring_size] 546 * buf_info structure is equivalent to cookie for virtio-virtqueue 547 */ 548 buf_info->m = mbuf; 549 buf_info->len = (uint16_t)(mbuf->buf_len - 550 RTE_PKTMBUF_HEADROOM); 551 buf_info->bufPA = 552 rte_mbuf_data_dma_addr_default(mbuf); 553 554 /* Load Rx Descriptor with the buffer's GPA */ 555 rxd->addr = buf_info->bufPA; 556 557 /* After this point rxd->addr MUST not be NULL */ 558 rxd->btype = val; 559 rxd->len = buf_info->len; 560 /* Flip gen bit at the end to change ownership */ 561 rxd->gen = ring->gen; 562 563 vmxnet3_cmd_ring_adv_next2fill(ring); 564 i++; 565 } 566 567 /* Return error only if no buffers are posted at present */ 568 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1)) 569 return -err; 570 else 571 return i; 572 } 573 574 575 /* Receive side checksum and other offloads */ 576 static void 577 vmxnet3_rx_offload(const Vmxnet3_RxCompDesc *rcd, struct rte_mbuf *rxm) 578 { 579 /* Check for RSS */ 580 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) { 581 rxm->ol_flags |= PKT_RX_RSS_HASH; 582 rxm->hash.rss = rcd->rssHash; 583 } 584 585 /* Check packet type, checksum errors, etc. Only support IPv4 for now. */ 586 if (rcd->v4) { 587 struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *); 588 struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1); 589 590 if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr)) 591 rxm->packet_type = RTE_PTYPE_L3_IPV4_EXT; 592 else 593 rxm->packet_type = RTE_PTYPE_L3_IPV4; 594 595 if (!rcd->cnc) { 596 if (!rcd->ipc) 597 rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; 598 599 if ((rcd->tcp || rcd->udp) && !rcd->tuc) 600 rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; 601 } 602 } 603 } 604 605 /* 606 * Process the Rx Completion Ring of given vmxnet3_rx_queue 607 * for nb_pkts burst and return the number of packets received 608 */ 609 uint16_t 610 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 611 { 612 uint16_t nb_rx; 613 uint32_t nb_rxd, idx; 614 uint8_t ring_idx; 615 vmxnet3_rx_queue_t *rxq; 616 Vmxnet3_RxCompDesc *rcd; 617 vmxnet3_buf_info_t *rbi; 618 Vmxnet3_RxDesc *rxd; 619 struct rte_mbuf *rxm = NULL; 620 struct vmxnet3_hw *hw; 621 622 nb_rx = 0; 623 ring_idx = 0; 624 nb_rxd = 0; 625 idx = 0; 626 627 rxq = rx_queue; 628 hw = rxq->hw; 629 630 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 631 632 if (unlikely(rxq->stopped)) { 633 PMD_RX_LOG(DEBUG, "Rx queue is stopped."); 634 return 0; 635 } 636 637 while (rcd->gen == rxq->comp_ring.gen) { 638 if (nb_rx >= nb_pkts) 639 break; 640 641 idx = rcd->rxdIdx; 642 ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1); 643 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; 644 RTE_SET_USED(rxd); /* used only for assert when enabled */ 645 rbi = rxq->cmd_ring[ring_idx].buf_info + idx; 646 647 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx); 648 649 RTE_ASSERT(rcd->len <= rxd->len); 650 RTE_ASSERT(rbi->m); 651 652 /* Get the packet buffer pointer from buf_info */ 653 rxm = rbi->m; 654 655 /* Clear descriptor associated buf_info to be reused */ 656 rbi->m = NULL; 657 rbi->bufPA = 0; 658 659 /* Update the index that we received a packet */ 660 rxq->cmd_ring[ring_idx].next2comp = idx; 661 662 /* For RCD with EOP set, check if there is frame error */ 663 if (unlikely(rcd->eop && rcd->err)) { 664 rxq->stats.drop_total++; 665 rxq->stats.drop_err++; 666 667 if (!rcd->fcs) { 668 rxq->stats.drop_fcs++; 669 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err."); 670 } 671 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d", 672 (int)(rcd - (struct Vmxnet3_RxCompDesc *) 673 rxq->comp_ring.base), rcd->rxdIdx); 674 rte_pktmbuf_free_seg(rxm); 675 goto rcd_done; 676 } 677 678 679 /* Initialize newly received packet buffer */ 680 rxm->port = rxq->port_id; 681 rxm->nb_segs = 1; 682 rxm->next = NULL; 683 rxm->pkt_len = (uint16_t)rcd->len; 684 rxm->data_len = (uint16_t)rcd->len; 685 rxm->data_off = RTE_PKTMBUF_HEADROOM; 686 rxm->ol_flags = 0; 687 rxm->vlan_tci = 0; 688 689 /* 690 * If this is the first buffer of the received packet, 691 * set the pointer to the first mbuf of the packet 692 * Otherwise, update the total length and the number of segments 693 * of the current scattered packet, and update the pointer to 694 * the last mbuf of the current packet. 695 */ 696 if (rcd->sop) { 697 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD); 698 699 if (unlikely(rcd->len == 0)) { 700 RTE_ASSERT(rcd->eop); 701 702 PMD_RX_LOG(DEBUG, 703 "Rx buf was skipped. rxring[%d][%d])", 704 ring_idx, idx); 705 rte_pktmbuf_free_seg(rxm); 706 goto rcd_done; 707 } 708 709 rxq->start_seg = rxm; 710 vmxnet3_rx_offload(rcd, rxm); 711 } else { 712 struct rte_mbuf *start = rxq->start_seg; 713 714 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY); 715 716 start->pkt_len += rxm->data_len; 717 start->nb_segs++; 718 719 rxq->last_seg->next = rxm; 720 } 721 rxq->last_seg = rxm; 722 723 if (rcd->eop) { 724 struct rte_mbuf *start = rxq->start_seg; 725 726 /* Check for hardware stripped VLAN tag */ 727 if (rcd->ts) { 728 start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED); 729 start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci); 730 } 731 732 rx_pkts[nb_rx++] = start; 733 rxq->start_seg = NULL; 734 } 735 736 rcd_done: 737 rxq->cmd_ring[ring_idx].next2comp = idx; 738 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size); 739 740 /* It's time to allocate some new buf and renew descriptors */ 741 vmxnet3_post_rx_bufs(rxq, ring_idx); 742 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 743 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), 744 rxq->cmd_ring[ring_idx].next2fill); 745 } 746 747 /* Advance to the next descriptor in comp_ring */ 748 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring); 749 750 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; 751 nb_rxd++; 752 if (nb_rxd > rxq->cmd_ring[0].size) { 753 PMD_RX_LOG(ERR, 754 "Used up quota of receiving packets," 755 " relinquish control."); 756 break; 757 } 758 } 759 760 return nb_rx; 761 } 762 763 /* 764 * Create memzone for device rings. malloc can't be used as the physical address is 765 * needed. If the memzone is already created, then this function returns a ptr 766 * to the old one. 767 */ 768 static const struct rte_memzone * 769 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, 770 uint16_t queue_id, uint32_t ring_size, int socket_id) 771 { 772 char z_name[RTE_MEMZONE_NAMESIZE]; 773 const struct rte_memzone *mz; 774 775 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", 776 dev->driver->pci_drv.name, ring_name, 777 dev->data->port_id, queue_id); 778 779 mz = rte_memzone_lookup(z_name); 780 if (mz) 781 return mz; 782 783 return rte_memzone_reserve_aligned(z_name, ring_size, 784 socket_id, 0, VMXNET3_RING_BA_ALIGN); 785 } 786 787 int 788 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, 789 uint16_t queue_idx, 790 uint16_t nb_desc, 791 unsigned int socket_id, 792 __attribute__((unused)) const struct rte_eth_txconf *tx_conf) 793 { 794 struct vmxnet3_hw *hw = dev->data->dev_private; 795 const struct rte_memzone *mz; 796 struct vmxnet3_tx_queue *txq; 797 struct vmxnet3_cmd_ring *ring; 798 struct vmxnet3_comp_ring *comp_ring; 799 struct vmxnet3_data_ring *data_ring; 800 int size; 801 802 PMD_INIT_FUNC_TRACE(); 803 804 if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP) != 805 ETH_TXQ_FLAGS_NOXSUMSCTP) { 806 PMD_INIT_LOG(ERR, "SCTP checksum offload not supported"); 807 return -EINVAL; 808 } 809 810 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE); 811 if (txq == NULL) { 812 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure"); 813 return -ENOMEM; 814 } 815 816 txq->queue_id = queue_idx; 817 txq->port_id = dev->data->port_id; 818 txq->shared = &hw->tqd_start[queue_idx]; 819 txq->hw = hw; 820 txq->qid = queue_idx; 821 txq->stopped = TRUE; 822 823 ring = &txq->cmd_ring; 824 comp_ring = &txq->comp_ring; 825 data_ring = &txq->data_ring; 826 827 /* Tx vmxnet ring length should be between 512-4096 */ 828 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) { 829 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u", 830 VMXNET3_DEF_TX_RING_SIZE); 831 return -EINVAL; 832 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) { 833 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u", 834 VMXNET3_TX_RING_MAX_SIZE); 835 return -EINVAL; 836 } else { 837 ring->size = nb_desc; 838 ring->size &= ~VMXNET3_RING_SIZE_MASK; 839 } 840 comp_ring->size = data_ring->size = ring->size; 841 842 /* Tx vmxnet rings structure initialization*/ 843 ring->next2fill = 0; 844 ring->next2comp = 0; 845 ring->gen = VMXNET3_INIT_GEN; 846 comp_ring->next2proc = 0; 847 comp_ring->gen = VMXNET3_INIT_GEN; 848 849 size = sizeof(struct Vmxnet3_TxDesc) * ring->size; 850 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size; 851 size += sizeof(struct Vmxnet3_TxDataDesc) * data_ring->size; 852 853 mz = ring_dma_zone_reserve(dev, "txdesc", queue_idx, size, socket_id); 854 if (mz == NULL) { 855 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 856 return -ENOMEM; 857 } 858 memset(mz->addr, 0, mz->len); 859 860 /* cmd_ring initialization */ 861 ring->base = mz->addr; 862 ring->basePA = mz->phys_addr; 863 864 /* comp_ring initialization */ 865 comp_ring->base = ring->base + ring->size; 866 comp_ring->basePA = ring->basePA + 867 (sizeof(struct Vmxnet3_TxDesc) * ring->size); 868 869 /* data_ring initialization */ 870 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size); 871 data_ring->basePA = comp_ring->basePA + 872 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size); 873 874 /* cmd_ring0 buf_info allocation */ 875 ring->buf_info = rte_zmalloc("tx_ring_buf_info", 876 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); 877 if (ring->buf_info == NULL) { 878 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure"); 879 return -ENOMEM; 880 } 881 882 /* Update the data portion with txq */ 883 dev->data->tx_queues[queue_idx] = txq; 884 885 return 0; 886 } 887 888 int 889 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, 890 uint16_t queue_idx, 891 uint16_t nb_desc, 892 unsigned int socket_id, 893 __attribute__((unused)) const struct rte_eth_rxconf *rx_conf, 894 struct rte_mempool *mp) 895 { 896 const struct rte_memzone *mz; 897 struct vmxnet3_rx_queue *rxq; 898 struct vmxnet3_hw *hw = dev->data->dev_private; 899 struct vmxnet3_cmd_ring *ring0, *ring1, *ring; 900 struct vmxnet3_comp_ring *comp_ring; 901 int size; 902 uint8_t i; 903 char mem_name[32]; 904 905 PMD_INIT_FUNC_TRACE(); 906 907 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE); 908 if (rxq == NULL) { 909 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure"); 910 return -ENOMEM; 911 } 912 913 rxq->mp = mp; 914 rxq->queue_id = queue_idx; 915 rxq->port_id = dev->data->port_id; 916 rxq->shared = &hw->rqd_start[queue_idx]; 917 rxq->hw = hw; 918 rxq->qid1 = queue_idx; 919 rxq->qid2 = queue_idx + hw->num_rx_queues; 920 rxq->stopped = TRUE; 921 922 ring0 = &rxq->cmd_ring[0]; 923 ring1 = &rxq->cmd_ring[1]; 924 comp_ring = &rxq->comp_ring; 925 926 /* Rx vmxnet rings length should be between 256-4096 */ 927 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) { 928 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256"); 929 return -EINVAL; 930 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) { 931 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096"); 932 return -EINVAL; 933 } else { 934 ring0->size = nb_desc; 935 ring0->size &= ~VMXNET3_RING_SIZE_MASK; 936 ring1->size = ring0->size; 937 } 938 939 comp_ring->size = ring0->size + ring1->size; 940 941 /* Rx vmxnet rings structure initialization */ 942 ring0->next2fill = 0; 943 ring1->next2fill = 0; 944 ring0->next2comp = 0; 945 ring1->next2comp = 0; 946 ring0->gen = VMXNET3_INIT_GEN; 947 ring1->gen = VMXNET3_INIT_GEN; 948 comp_ring->next2proc = 0; 949 comp_ring->gen = VMXNET3_INIT_GEN; 950 951 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size); 952 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size; 953 954 mz = ring_dma_zone_reserve(dev, "rxdesc", queue_idx, size, socket_id); 955 if (mz == NULL) { 956 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); 957 return -ENOMEM; 958 } 959 memset(mz->addr, 0, mz->len); 960 961 /* cmd_ring0 initialization */ 962 ring0->base = mz->addr; 963 ring0->basePA = mz->phys_addr; 964 965 /* cmd_ring1 initialization */ 966 ring1->base = ring0->base + ring0->size; 967 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size; 968 969 /* comp_ring initialization */ 970 comp_ring->base = ring1->base + ring1->size; 971 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) * 972 ring1->size; 973 974 /* cmd_ring0-cmd_ring1 buf_info allocation */ 975 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) { 976 977 ring = &rxq->cmd_ring[i]; 978 ring->rid = i; 979 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i); 980 981 ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); 982 if (ring->buf_info == NULL) { 983 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure"); 984 return -ENOMEM; 985 } 986 } 987 988 /* Update the data portion with rxq */ 989 dev->data->rx_queues[queue_idx] = rxq; 990 991 return 0; 992 } 993 994 /* 995 * Initializes Receive Unit 996 * Load mbufs in rx queue in advance 997 */ 998 int 999 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev) 1000 { 1001 struct vmxnet3_hw *hw = dev->data->dev_private; 1002 1003 int i, ret; 1004 uint8_t j; 1005 1006 PMD_INIT_FUNC_TRACE(); 1007 1008 for (i = 0; i < hw->num_rx_queues; i++) { 1009 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i]; 1010 1011 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) { 1012 /* Passing 0 as alloc_num will allocate full ring */ 1013 ret = vmxnet3_post_rx_bufs(rxq, j); 1014 if (ret <= 0) { 1015 PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j); 1016 return -ret; 1017 } 1018 /* Updating device with the index:next2fill to fill the mbufs for coming packets */ 1019 if (unlikely(rxq->shared->ctrl.updateRxProd)) { 1020 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN), 1021 rxq->cmd_ring[j].next2fill); 1022 } 1023 } 1024 rxq->stopped = FALSE; 1025 rxq->start_seg = NULL; 1026 } 1027 1028 for (i = 0; i < dev->data->nb_tx_queues; i++) { 1029 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i]; 1030 1031 txq->stopped = FALSE; 1032 } 1033 1034 return 0; 1035 } 1036 1037 static uint8_t rss_intel_key[40] = { 1038 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 1039 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, 1040 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, 1041 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 1042 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA, 1043 }; 1044 1045 /* 1046 * Configure RSS feature 1047 */ 1048 int 1049 vmxnet3_rss_configure(struct rte_eth_dev *dev) 1050 { 1051 struct vmxnet3_hw *hw = dev->data->dev_private; 1052 struct VMXNET3_RSSConf *dev_rss_conf; 1053 struct rte_eth_rss_conf *port_rss_conf; 1054 uint64_t rss_hf; 1055 uint8_t i, j; 1056 1057 PMD_INIT_FUNC_TRACE(); 1058 1059 dev_rss_conf = hw->rss_conf; 1060 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; 1061 1062 /* loading hashFunc */ 1063 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; 1064 /* loading hashKeySize */ 1065 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE; 1066 /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ 1067 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4); 1068 1069 if (port_rss_conf->rss_key == NULL) { 1070 /* Default hash key */ 1071 port_rss_conf->rss_key = rss_intel_key; 1072 } 1073 1074 /* loading hashKey */ 1075 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize); 1076 1077 /* loading indTable */ 1078 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) { 1079 if (j == dev->data->nb_rx_queues) 1080 j = 0; 1081 dev_rss_conf->indTable[i] = j; 1082 } 1083 1084 /* loading hashType */ 1085 dev_rss_conf->hashType = 0; 1086 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL; 1087 if (rss_hf & ETH_RSS_IPV4) 1088 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4; 1089 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) 1090 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4; 1091 if (rss_hf & ETH_RSS_IPV6) 1092 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6; 1093 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) 1094 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6; 1095 1096 return VMXNET3_SUCCESS; 1097 } 1098