1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <errno.h> 39 40 #include <rte_cycles.h> 41 #include <rte_memory.h> 42 #include <rte_memzone.h> 43 #include <rte_branch_prediction.h> 44 #include <rte_mempool.h> 45 #include <rte_malloc.h> 46 #include <rte_mbuf.h> 47 #include <rte_ether.h> 48 #include <rte_ethdev.h> 49 #include <rte_prefetch.h> 50 #include <rte_string_fns.h> 51 #include <rte_errno.h> 52 #include <rte_byteorder.h> 53 #include <rte_cpuflags.h> 54 #include <rte_net.h> 55 #include <rte_ip.h> 56 #include <rte_udp.h> 57 #include <rte_tcp.h> 58 59 #include "virtio_logs.h" 60 #include "virtio_ethdev.h" 61 #include "virtio_pci.h" 62 #include "virtqueue.h" 63 #include "virtio_rxtx.h" 64 65 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP 66 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) 67 #else 68 #define VIRTIO_DUMP_PACKET(m, len) do { } while (0) 69 #endif 70 71 72 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ 73 ETH_TXQ_FLAGS_NOOFFLOADS) 74 75 int 76 virtio_dev_rx_queue_done(void *rxq, uint16_t offset) 77 { 78 struct virtnet_rx *rxvq = rxq; 79 struct virtqueue *vq = rxvq->vq; 80 81 return VIRTQUEUE_NUSED(vq) >= offset; 82 } 83 84 static void 85 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) 86 { 87 struct vring_desc *dp, *dp_tail; 88 struct vq_desc_extra *dxp; 89 uint16_t desc_idx_last = desc_idx; 90 91 dp = &vq->vq_ring.desc[desc_idx]; 92 dxp = &vq->vq_descx[desc_idx]; 93 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); 94 if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { 95 while (dp->flags & VRING_DESC_F_NEXT) { 96 desc_idx_last = dp->next; 97 dp = &vq->vq_ring.desc[dp->next]; 98 } 99 } 100 dxp->ndescs = 0; 101 102 /* 103 * We must append the existing free chain, if any, to the end of 104 * newly freed chain. If the virtqueue was completely used, then 105 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). 106 */ 107 if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { 108 vq->vq_desc_head_idx = desc_idx; 109 } else { 110 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; 111 dp_tail->next = desc_idx; 112 } 113 114 vq->vq_desc_tail_idx = desc_idx_last; 115 dp->next = VQ_RING_DESC_CHAIN_END; 116 } 117 118 static uint16_t 119 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, 120 uint32_t *len, uint16_t num) 121 { 122 struct vring_used_elem *uep; 123 struct rte_mbuf *cookie; 124 uint16_t used_idx, desc_idx; 125 uint16_t i; 126 127 /* Caller does the check */ 128 for (i = 0; i < num ; i++) { 129 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 130 uep = &vq->vq_ring.used->ring[used_idx]; 131 desc_idx = (uint16_t) uep->id; 132 len[i] = uep->len; 133 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; 134 135 if (unlikely(cookie == NULL)) { 136 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 137 vq->vq_used_cons_idx); 138 break; 139 } 140 141 rte_prefetch0(cookie); 142 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 143 rx_pkts[i] = cookie; 144 vq->vq_used_cons_idx++; 145 vq_ring_free_chain(vq, desc_idx); 146 vq->vq_descx[desc_idx].cookie = NULL; 147 } 148 149 return i; 150 } 151 152 #ifndef DEFAULT_TX_FREE_THRESH 153 #define DEFAULT_TX_FREE_THRESH 32 154 #endif 155 156 /* Cleanup from completed transmits. */ 157 static void 158 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) 159 { 160 uint16_t i, used_idx, desc_idx; 161 for (i = 0; i < num; i++) { 162 struct vring_used_elem *uep; 163 struct vq_desc_extra *dxp; 164 165 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); 166 uep = &vq->vq_ring.used->ring[used_idx]; 167 168 desc_idx = (uint16_t) uep->id; 169 dxp = &vq->vq_descx[desc_idx]; 170 vq->vq_used_cons_idx++; 171 vq_ring_free_chain(vq, desc_idx); 172 173 if (dxp->cookie != NULL) { 174 rte_pktmbuf_free(dxp->cookie); 175 dxp->cookie = NULL; 176 } 177 } 178 } 179 180 181 static inline int 182 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) 183 { 184 struct vq_desc_extra *dxp; 185 struct virtio_hw *hw = vq->hw; 186 struct vring_desc *start_dp; 187 uint16_t needed = 1; 188 uint16_t head_idx, idx; 189 190 if (unlikely(vq->vq_free_cnt == 0)) 191 return -ENOSPC; 192 if (unlikely(vq->vq_free_cnt < needed)) 193 return -EMSGSIZE; 194 195 head_idx = vq->vq_desc_head_idx; 196 if (unlikely(head_idx >= vq->vq_nentries)) 197 return -EFAULT; 198 199 idx = head_idx; 200 dxp = &vq->vq_descx[idx]; 201 dxp->cookie = (void *)cookie; 202 dxp->ndescs = needed; 203 204 start_dp = vq->vq_ring.desc; 205 start_dp[idx].addr = 206 VIRTIO_MBUF_ADDR(cookie, vq) + 207 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 208 start_dp[idx].len = 209 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 210 start_dp[idx].flags = VRING_DESC_F_WRITE; 211 idx = start_dp[idx].next; 212 vq->vq_desc_head_idx = idx; 213 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 214 vq->vq_desc_tail_idx = idx; 215 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 216 vq_update_avail_ring(vq, head_idx); 217 218 return 0; 219 } 220 221 /* When doing TSO, the IP length is not included in the pseudo header 222 * checksum of the packet given to the PMD, but for virtio it is 223 * expected. 224 */ 225 static void 226 virtio_tso_fix_cksum(struct rte_mbuf *m) 227 { 228 /* common case: header is not fragmented */ 229 if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + 230 m->l4_len)) { 231 struct ipv4_hdr *iph; 232 struct ipv6_hdr *ip6h; 233 struct tcp_hdr *th; 234 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; 235 uint32_t tmp; 236 237 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); 238 th = RTE_PTR_ADD(iph, m->l3_len); 239 if ((iph->version_ihl >> 4) == 4) { 240 iph->hdr_checksum = 0; 241 iph->hdr_checksum = rte_ipv4_cksum(iph); 242 ip_len = iph->total_length; 243 ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - 244 m->l3_len); 245 } else { 246 ip6h = (struct ipv6_hdr *)iph; 247 ip_paylen = ip6h->payload_len; 248 } 249 250 /* calculate the new phdr checksum not including ip_paylen */ 251 prev_cksum = th->cksum; 252 tmp = prev_cksum; 253 tmp += ip_paylen; 254 tmp = (tmp & 0xffff) + (tmp >> 16); 255 new_cksum = tmp; 256 257 /* replace it in the packet */ 258 th->cksum = new_cksum; 259 } 260 } 261 262 static inline int 263 tx_offload_enabled(struct virtio_hw *hw) 264 { 265 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || 266 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || 267 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); 268 } 269 270 /* avoid write operation when necessary, to lessen cache issues */ 271 #define ASSIGN_UNLESS_EQUAL(var, val) do { \ 272 if ((var) != (val)) \ 273 (var) = (val); \ 274 } while (0) 275 276 static inline void 277 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, 278 uint16_t needed, int use_indirect, int can_push) 279 { 280 struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; 281 struct vq_desc_extra *dxp; 282 struct virtqueue *vq = txvq->vq; 283 struct vring_desc *start_dp; 284 uint16_t seg_num = cookie->nb_segs; 285 uint16_t head_idx, idx; 286 uint16_t head_size = vq->hw->vtnet_hdr_size; 287 struct virtio_net_hdr *hdr; 288 int offload; 289 290 offload = tx_offload_enabled(vq->hw); 291 head_idx = vq->vq_desc_head_idx; 292 idx = head_idx; 293 dxp = &vq->vq_descx[idx]; 294 dxp->cookie = (void *)cookie; 295 dxp->ndescs = needed; 296 297 start_dp = vq->vq_ring.desc; 298 299 if (can_push) { 300 /* prepend cannot fail, checked by caller */ 301 hdr = (struct virtio_net_hdr *) 302 rte_pktmbuf_prepend(cookie, head_size); 303 /* if offload disabled, it is not zeroed below, do it now */ 304 if (offload == 0) { 305 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 306 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 307 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 308 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 309 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 310 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 311 } 312 } else if (use_indirect) { 313 /* setup tx ring slot to point to indirect 314 * descriptor list stored in reserved region. 315 * 316 * the first slot in indirect ring is already preset 317 * to point to the header in reserved region 318 */ 319 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 320 RTE_PTR_DIFF(&txr[idx].tx_indir, txr); 321 start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); 322 start_dp[idx].flags = VRING_DESC_F_INDIRECT; 323 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 324 325 /* loop below will fill in rest of the indirect elements */ 326 start_dp = txr[idx].tx_indir; 327 idx = 1; 328 } else { 329 /* setup first tx ring slot to point to header 330 * stored in reserved region. 331 */ 332 start_dp[idx].addr = txvq->virtio_net_hdr_mem + 333 RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); 334 start_dp[idx].len = vq->hw->vtnet_hdr_size; 335 start_dp[idx].flags = VRING_DESC_F_NEXT; 336 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; 337 338 idx = start_dp[idx].next; 339 } 340 341 /* Checksum Offload / TSO */ 342 if (offload) { 343 if (cookie->ol_flags & PKT_TX_TCP_SEG) 344 cookie->ol_flags |= PKT_TX_TCP_CKSUM; 345 346 switch (cookie->ol_flags & PKT_TX_L4_MASK) { 347 case PKT_TX_UDP_CKSUM: 348 hdr->csum_start = cookie->l2_len + cookie->l3_len; 349 hdr->csum_offset = offsetof(struct udp_hdr, 350 dgram_cksum); 351 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 352 break; 353 354 case PKT_TX_TCP_CKSUM: 355 hdr->csum_start = cookie->l2_len + cookie->l3_len; 356 hdr->csum_offset = offsetof(struct tcp_hdr, cksum); 357 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 358 break; 359 360 default: 361 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); 362 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); 363 ASSIGN_UNLESS_EQUAL(hdr->flags, 0); 364 break; 365 } 366 367 /* TCP Segmentation Offload */ 368 if (cookie->ol_flags & PKT_TX_TCP_SEG) { 369 virtio_tso_fix_cksum(cookie); 370 hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? 371 VIRTIO_NET_HDR_GSO_TCPV6 : 372 VIRTIO_NET_HDR_GSO_TCPV4; 373 hdr->gso_size = cookie->tso_segsz; 374 hdr->hdr_len = 375 cookie->l2_len + 376 cookie->l3_len + 377 cookie->l4_len; 378 } else { 379 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); 380 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); 381 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); 382 } 383 } 384 385 do { 386 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); 387 start_dp[idx].len = cookie->data_len; 388 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; 389 idx = start_dp[idx].next; 390 } while ((cookie = cookie->next) != NULL); 391 392 if (use_indirect) 393 idx = vq->vq_ring.desc[head_idx].next; 394 395 vq->vq_desc_head_idx = idx; 396 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) 397 vq->vq_desc_tail_idx = idx; 398 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); 399 vq_update_avail_ring(vq, head_idx); 400 } 401 402 void 403 virtio_dev_cq_start(struct rte_eth_dev *dev) 404 { 405 struct virtio_hw *hw = dev->data->dev_private; 406 407 if (hw->cvq && hw->cvq->vq) { 408 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); 409 } 410 } 411 412 int 413 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, 414 uint16_t queue_idx, 415 uint16_t nb_desc, 416 unsigned int socket_id __rte_unused, 417 __rte_unused const struct rte_eth_rxconf *rx_conf, 418 struct rte_mempool *mp) 419 { 420 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 421 struct virtio_hw *hw = dev->data->dev_private; 422 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 423 struct virtnet_rx *rxvq; 424 425 PMD_INIT_FUNC_TRACE(); 426 427 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 428 nb_desc = vq->vq_nentries; 429 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 430 431 rxvq = &vq->rxq; 432 rxvq->queue_id = queue_idx; 433 rxvq->mpool = mp; 434 if (rxvq->mpool == NULL) { 435 rte_exit(EXIT_FAILURE, 436 "Cannot allocate mbufs for rx virtqueue"); 437 } 438 dev->data->rx_queues[queue_idx] = rxvq; 439 440 return 0; 441 } 442 443 int 444 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) 445 { 446 uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; 447 struct virtio_hw *hw = dev->data->dev_private; 448 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 449 struct virtnet_rx *rxvq = &vq->rxq; 450 struct rte_mbuf *m; 451 uint16_t desc_idx; 452 int error, nbufs; 453 454 PMD_INIT_FUNC_TRACE(); 455 456 /* Allocate blank mbufs for the each rx descriptor */ 457 nbufs = 0; 458 459 if (hw->use_simple_rxtx) { 460 for (desc_idx = 0; desc_idx < vq->vq_nentries; 461 desc_idx++) { 462 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 463 vq->vq_ring.desc[desc_idx].flags = 464 VRING_DESC_F_WRITE; 465 } 466 } 467 468 memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); 469 for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; 470 desc_idx++) { 471 vq->sw_ring[vq->vq_nentries + desc_idx] = 472 &rxvq->fake_mbuf; 473 } 474 475 while (!virtqueue_full(vq)) { 476 m = rte_mbuf_raw_alloc(rxvq->mpool); 477 if (m == NULL) 478 break; 479 480 /* Enqueue allocated buffers */ 481 if (hw->use_simple_rxtx) 482 error = virtqueue_enqueue_recv_refill_simple(vq, m); 483 else 484 error = virtqueue_enqueue_recv_refill(vq, m); 485 486 if (error) { 487 rte_pktmbuf_free(m); 488 break; 489 } 490 nbufs++; 491 } 492 493 vq_update_avail_idx(vq); 494 495 PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); 496 497 virtio_rxq_vec_setup(rxvq); 498 499 VIRTQUEUE_DUMP(vq); 500 501 return 0; 502 } 503 504 static void 505 virtio_update_rxtx_handler(struct rte_eth_dev *dev, 506 const struct rte_eth_txconf *tx_conf) 507 { 508 uint8_t use_simple_rxtx = 0; 509 struct virtio_hw *hw = dev->data->dev_private; 510 511 #if defined RTE_ARCH_X86 512 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3)) 513 use_simple_rxtx = 1; 514 #elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM 515 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) 516 use_simple_rxtx = 1; 517 #endif 518 /* Use simple rx/tx func if single segment and no offloads */ 519 if (use_simple_rxtx && 520 (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && 521 !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { 522 PMD_INIT_LOG(INFO, "Using simple rx/tx path"); 523 dev->tx_pkt_burst = virtio_xmit_pkts_simple; 524 dev->rx_pkt_burst = virtio_recv_pkts_vec; 525 hw->use_simple_rxtx = use_simple_rxtx; 526 } 527 } 528 529 /* 530 * struct rte_eth_dev *dev: Used to update dev 531 * uint16_t nb_desc: Defaults to values read from config space 532 * unsigned int socket_id: Used to allocate memzone 533 * const struct rte_eth_txconf *tx_conf: Used to setup tx engine 534 * uint16_t queue_idx: Just used as an index in dev txq list 535 */ 536 int 537 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, 538 uint16_t queue_idx, 539 uint16_t nb_desc, 540 unsigned int socket_id __rte_unused, 541 const struct rte_eth_txconf *tx_conf) 542 { 543 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 544 struct virtio_hw *hw = dev->data->dev_private; 545 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 546 struct virtnet_tx *txvq; 547 uint16_t tx_free_thresh; 548 549 PMD_INIT_FUNC_TRACE(); 550 551 virtio_update_rxtx_handler(dev, tx_conf); 552 553 if (nb_desc == 0 || nb_desc > vq->vq_nentries) 554 nb_desc = vq->vq_nentries; 555 vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); 556 557 txvq = &vq->txq; 558 txvq->queue_id = queue_idx; 559 560 tx_free_thresh = tx_conf->tx_free_thresh; 561 if (tx_free_thresh == 0) 562 tx_free_thresh = 563 RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); 564 565 if (tx_free_thresh >= (vq->vq_nentries - 3)) { 566 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " 567 "number of TX entries minus 3 (%u)." 568 " (tx_free_thresh=%u port=%u queue=%u)\n", 569 vq->vq_nentries - 3, 570 tx_free_thresh, dev->data->port_id, queue_idx); 571 return -EINVAL; 572 } 573 574 vq->vq_free_thresh = tx_free_thresh; 575 576 dev->data->tx_queues[queue_idx] = txvq; 577 return 0; 578 } 579 580 int 581 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, 582 uint16_t queue_idx) 583 { 584 uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; 585 struct virtio_hw *hw = dev->data->dev_private; 586 struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; 587 uint16_t mid_idx = vq->vq_nentries >> 1; 588 struct virtnet_tx *txvq = &vq->txq; 589 uint16_t desc_idx; 590 591 PMD_INIT_FUNC_TRACE(); 592 593 if (hw->use_simple_rxtx) { 594 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { 595 vq->vq_ring.avail->ring[desc_idx] = 596 desc_idx + mid_idx; 597 vq->vq_ring.desc[desc_idx + mid_idx].next = 598 desc_idx; 599 vq->vq_ring.desc[desc_idx + mid_idx].addr = 600 txvq->virtio_net_hdr_mem + 601 offsetof(struct virtio_tx_region, tx_hdr); 602 vq->vq_ring.desc[desc_idx + mid_idx].len = 603 vq->hw->vtnet_hdr_size; 604 vq->vq_ring.desc[desc_idx + mid_idx].flags = 605 VRING_DESC_F_NEXT; 606 vq->vq_ring.desc[desc_idx].flags = 0; 607 } 608 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; 609 desc_idx++) 610 vq->vq_ring.avail->ring[desc_idx] = desc_idx; 611 } 612 613 VIRTQUEUE_DUMP(vq); 614 615 return 0; 616 } 617 618 static void 619 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) 620 { 621 int error; 622 /* 623 * Requeue the discarded mbuf. This should always be 624 * successful since it was just dequeued. 625 */ 626 error = virtqueue_enqueue_recv_refill(vq, m); 627 if (unlikely(error)) { 628 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); 629 rte_pktmbuf_free(m); 630 } 631 } 632 633 static void 634 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) 635 { 636 uint32_t s = mbuf->pkt_len; 637 struct ether_addr *ea; 638 639 if (s == 64) { 640 stats->size_bins[1]++; 641 } else if (s > 64 && s < 1024) { 642 uint32_t bin; 643 644 /* count zeros, and offset into correct bin */ 645 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; 646 stats->size_bins[bin]++; 647 } else { 648 if (s < 64) 649 stats->size_bins[0]++; 650 else if (s < 1519) 651 stats->size_bins[6]++; 652 else if (s >= 1519) 653 stats->size_bins[7]++; 654 } 655 656 ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); 657 if (is_multicast_ether_addr(ea)) { 658 if (is_broadcast_ether_addr(ea)) 659 stats->broadcast++; 660 else 661 stats->multicast++; 662 } 663 } 664 665 /* Optionally fill offload information in structure */ 666 static int 667 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 668 { 669 struct rte_net_hdr_lens hdr_lens; 670 uint32_t hdrlen, ptype; 671 int l4_supported = 0; 672 673 /* nothing to do */ 674 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) 675 return 0; 676 677 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; 678 679 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 680 m->packet_type = ptype; 681 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 682 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 683 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 684 l4_supported = 1; 685 686 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 687 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 688 if (hdr->csum_start <= hdrlen && l4_supported) { 689 m->ol_flags |= PKT_RX_L4_CKSUM_NONE; 690 } else { 691 /* Unknown proto or tunnel, do sw cksum. We can assume 692 * the cksum field is in the first segment since the 693 * buffers we provided to the host are large enough. 694 * In case of SCTP, this will be wrong since it's a CRC 695 * but there's nothing we can do. 696 */ 697 uint16_t csum, off; 698 699 rte_raw_cksum_mbuf(m, hdr->csum_start, 700 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 701 &csum); 702 if (likely(csum != 0xffff)) 703 csum = ~csum; 704 off = hdr->csum_offset + hdr->csum_start; 705 if (rte_pktmbuf_data_len(m) >= off + 1) 706 *rte_pktmbuf_mtod_offset(m, uint16_t *, 707 off) = csum; 708 } 709 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 710 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; 711 } 712 713 /* GSO request, save required information in mbuf */ 714 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 715 /* Check unsupported modes */ 716 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || 717 (hdr->gso_size == 0)) { 718 return -EINVAL; 719 } 720 721 /* Update mss lengthes in mbuf */ 722 m->tso_segsz = hdr->gso_size; 723 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 724 case VIRTIO_NET_HDR_GSO_TCPV4: 725 case VIRTIO_NET_HDR_GSO_TCPV6: 726 m->ol_flags |= PKT_RX_LRO | \ 727 PKT_RX_L4_CKSUM_NONE; 728 break; 729 default: 730 return -EINVAL; 731 } 732 } 733 734 return 0; 735 } 736 737 static inline int 738 rx_offload_enabled(struct virtio_hw *hw) 739 { 740 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || 741 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || 742 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); 743 } 744 745 #define VIRTIO_MBUF_BURST_SZ 64 746 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) 747 uint16_t 748 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 749 { 750 struct virtnet_rx *rxvq = rx_queue; 751 struct virtqueue *vq = rxvq->vq; 752 struct virtio_hw *hw = vq->hw; 753 struct rte_mbuf *rxm, *new_mbuf; 754 uint16_t nb_used, num, nb_rx; 755 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 756 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 757 int error; 758 uint32_t i, nb_enqueued; 759 uint32_t hdr_size; 760 int offload; 761 struct virtio_net_hdr *hdr; 762 763 nb_rx = 0; 764 if (unlikely(hw->started == 0)) 765 return nb_rx; 766 767 nb_used = VIRTQUEUE_NUSED(vq); 768 769 virtio_rmb(); 770 771 num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts; 772 if (unlikely(num > VIRTIO_MBUF_BURST_SZ)) 773 num = VIRTIO_MBUF_BURST_SZ; 774 if (likely(num > DESC_PER_CACHELINE)) 775 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); 776 777 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); 778 PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); 779 780 nb_enqueued = 0; 781 hdr_size = hw->vtnet_hdr_size; 782 offload = rx_offload_enabled(hw); 783 784 for (i = 0; i < num ; i++) { 785 rxm = rcv_pkts[i]; 786 787 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); 788 789 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { 790 PMD_RX_LOG(ERR, "Packet drop"); 791 nb_enqueued++; 792 virtio_discard_rxbuf(vq, rxm); 793 rxvq->stats.errors++; 794 continue; 795 } 796 797 rxm->port = rxvq->port_id; 798 rxm->data_off = RTE_PKTMBUF_HEADROOM; 799 rxm->ol_flags = 0; 800 rxm->vlan_tci = 0; 801 802 rxm->pkt_len = (uint32_t)(len[i] - hdr_size); 803 rxm->data_len = (uint16_t)(len[i] - hdr_size); 804 805 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + 806 RTE_PKTMBUF_HEADROOM - hdr_size); 807 808 if (hw->vlan_strip) 809 rte_vlan_strip(rxm); 810 811 if (offload && virtio_rx_offload(rxm, hdr) < 0) { 812 virtio_discard_rxbuf(vq, rxm); 813 rxvq->stats.errors++; 814 continue; 815 } 816 817 VIRTIO_DUMP_PACKET(rxm, rxm->data_len); 818 819 rx_pkts[nb_rx++] = rxm; 820 821 rxvq->stats.bytes += rxm->pkt_len; 822 virtio_update_packet_stats(&rxvq->stats, rxm); 823 } 824 825 rxvq->stats.packets += nb_rx; 826 827 /* Allocate new mbuf for the used descriptor */ 828 error = ENOSPC; 829 while (likely(!virtqueue_full(vq))) { 830 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 831 if (unlikely(new_mbuf == NULL)) { 832 struct rte_eth_dev *dev 833 = &rte_eth_devices[rxvq->port_id]; 834 dev->data->rx_mbuf_alloc_failed++; 835 break; 836 } 837 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 838 if (unlikely(error)) { 839 rte_pktmbuf_free(new_mbuf); 840 break; 841 } 842 nb_enqueued++; 843 } 844 845 if (likely(nb_enqueued)) { 846 vq_update_avail_idx(vq); 847 848 if (unlikely(virtqueue_kick_prepare(vq))) { 849 virtqueue_notify(vq); 850 PMD_RX_LOG(DEBUG, "Notified"); 851 } 852 } 853 854 return nb_rx; 855 } 856 857 uint16_t 858 virtio_recv_mergeable_pkts(void *rx_queue, 859 struct rte_mbuf **rx_pkts, 860 uint16_t nb_pkts) 861 { 862 struct virtnet_rx *rxvq = rx_queue; 863 struct virtqueue *vq = rxvq->vq; 864 struct virtio_hw *hw = vq->hw; 865 struct rte_mbuf *rxm, *new_mbuf; 866 uint16_t nb_used, num, nb_rx; 867 uint32_t len[VIRTIO_MBUF_BURST_SZ]; 868 struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; 869 struct rte_mbuf *prev; 870 int error; 871 uint32_t i, nb_enqueued; 872 uint32_t seg_num; 873 uint16_t extra_idx; 874 uint32_t seg_res; 875 uint32_t hdr_size; 876 int offload; 877 878 nb_rx = 0; 879 if (unlikely(hw->started == 0)) 880 return nb_rx; 881 882 nb_used = VIRTQUEUE_NUSED(vq); 883 884 virtio_rmb(); 885 886 PMD_RX_LOG(DEBUG, "used:%d", nb_used); 887 888 i = 0; 889 nb_enqueued = 0; 890 seg_num = 0; 891 extra_idx = 0; 892 seg_res = 0; 893 hdr_size = hw->vtnet_hdr_size; 894 offload = rx_offload_enabled(hw); 895 896 while (i < nb_used) { 897 struct virtio_net_hdr_mrg_rxbuf *header; 898 899 if (nb_rx == nb_pkts) 900 break; 901 902 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); 903 if (num != 1) 904 continue; 905 906 i++; 907 908 PMD_RX_LOG(DEBUG, "dequeue:%d", num); 909 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); 910 911 rxm = rcv_pkts[0]; 912 913 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { 914 PMD_RX_LOG(ERR, "Packet drop"); 915 nb_enqueued++; 916 virtio_discard_rxbuf(vq, rxm); 917 rxvq->stats.errors++; 918 continue; 919 } 920 921 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + 922 RTE_PKTMBUF_HEADROOM - hdr_size); 923 seg_num = header->num_buffers; 924 925 if (seg_num == 0) 926 seg_num = 1; 927 928 rxm->data_off = RTE_PKTMBUF_HEADROOM; 929 rxm->nb_segs = seg_num; 930 rxm->ol_flags = 0; 931 rxm->vlan_tci = 0; 932 rxm->pkt_len = (uint32_t)(len[0] - hdr_size); 933 rxm->data_len = (uint16_t)(len[0] - hdr_size); 934 935 rxm->port = rxvq->port_id; 936 rx_pkts[nb_rx] = rxm; 937 prev = rxm; 938 939 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { 940 virtio_discard_rxbuf(vq, rxm); 941 rxvq->stats.errors++; 942 continue; 943 } 944 945 seg_res = seg_num - 1; 946 947 while (seg_res != 0) { 948 /* 949 * Get extra segments for current uncompleted packet. 950 */ 951 uint16_t rcv_cnt = 952 RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); 953 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { 954 uint32_t rx_num = 955 virtqueue_dequeue_burst_rx(vq, 956 rcv_pkts, len, rcv_cnt); 957 i += rx_num; 958 rcv_cnt = rx_num; 959 } else { 960 PMD_RX_LOG(ERR, 961 "No enough segments for packet."); 962 nb_enqueued++; 963 virtio_discard_rxbuf(vq, rxm); 964 rxvq->stats.errors++; 965 break; 966 } 967 968 extra_idx = 0; 969 970 while (extra_idx < rcv_cnt) { 971 rxm = rcv_pkts[extra_idx]; 972 973 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; 974 rxm->pkt_len = (uint32_t)(len[extra_idx]); 975 rxm->data_len = (uint16_t)(len[extra_idx]); 976 977 if (prev) 978 prev->next = rxm; 979 980 prev = rxm; 981 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; 982 extra_idx++; 983 }; 984 seg_res -= rcv_cnt; 985 } 986 987 if (hw->vlan_strip) 988 rte_vlan_strip(rx_pkts[nb_rx]); 989 990 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], 991 rx_pkts[nb_rx]->data_len); 992 993 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; 994 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); 995 nb_rx++; 996 } 997 998 rxvq->stats.packets += nb_rx; 999 1000 /* Allocate new mbuf for the used descriptor */ 1001 error = ENOSPC; 1002 while (likely(!virtqueue_full(vq))) { 1003 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); 1004 if (unlikely(new_mbuf == NULL)) { 1005 struct rte_eth_dev *dev 1006 = &rte_eth_devices[rxvq->port_id]; 1007 dev->data->rx_mbuf_alloc_failed++; 1008 break; 1009 } 1010 error = virtqueue_enqueue_recv_refill(vq, new_mbuf); 1011 if (unlikely(error)) { 1012 rte_pktmbuf_free(new_mbuf); 1013 break; 1014 } 1015 nb_enqueued++; 1016 } 1017 1018 if (likely(nb_enqueued)) { 1019 vq_update_avail_idx(vq); 1020 1021 if (unlikely(virtqueue_kick_prepare(vq))) { 1022 virtqueue_notify(vq); 1023 PMD_RX_LOG(DEBUG, "Notified"); 1024 } 1025 } 1026 1027 return nb_rx; 1028 } 1029 1030 uint16_t 1031 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 1032 { 1033 struct virtnet_tx *txvq = tx_queue; 1034 struct virtqueue *vq = txvq->vq; 1035 struct virtio_hw *hw = vq->hw; 1036 uint16_t hdr_size = hw->vtnet_hdr_size; 1037 uint16_t nb_used, nb_tx = 0; 1038 int error; 1039 1040 if (unlikely(hw->started == 0)) 1041 return nb_tx; 1042 1043 if (unlikely(nb_pkts < 1)) 1044 return nb_pkts; 1045 1046 PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); 1047 nb_used = VIRTQUEUE_NUSED(vq); 1048 1049 virtio_rmb(); 1050 if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) 1051 virtio_xmit_cleanup(vq, nb_used); 1052 1053 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1054 struct rte_mbuf *txm = tx_pkts[nb_tx]; 1055 int can_push = 0, use_indirect = 0, slots, need; 1056 1057 /* Do VLAN tag insertion */ 1058 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { 1059 error = rte_vlan_insert(&txm); 1060 if (unlikely(error)) { 1061 rte_pktmbuf_free(txm); 1062 continue; 1063 } 1064 } 1065 1066 /* optimize ring usage */ 1067 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 1068 vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && 1069 rte_mbuf_refcnt_read(txm) == 1 && 1070 RTE_MBUF_DIRECT(txm) && 1071 txm->nb_segs == 1 && 1072 rte_pktmbuf_headroom(txm) >= hdr_size && 1073 rte_is_aligned(rte_pktmbuf_mtod(txm, char *), 1074 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) 1075 can_push = 1; 1076 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 1077 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 1078 use_indirect = 1; 1079 1080 /* How many main ring entries are needed to this Tx? 1081 * any_layout => number of segments 1082 * indirect => 1 1083 * default => number of segments + 1 1084 */ 1085 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 1086 need = slots - vq->vq_free_cnt; 1087 1088 /* Positive value indicates it need free vring descriptors */ 1089 if (unlikely(need > 0)) { 1090 nb_used = VIRTQUEUE_NUSED(vq); 1091 virtio_rmb(); 1092 need = RTE_MIN(need, (int)nb_used); 1093 1094 virtio_xmit_cleanup(vq, need); 1095 need = slots - vq->vq_free_cnt; 1096 if (unlikely(need > 0)) { 1097 PMD_TX_LOG(ERR, 1098 "No free tx descriptors to transmit"); 1099 break; 1100 } 1101 } 1102 1103 /* Enqueue Packet buffers */ 1104 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); 1105 1106 txvq->stats.bytes += txm->pkt_len; 1107 virtio_update_packet_stats(&txvq->stats, txm); 1108 } 1109 1110 txvq->stats.packets += nb_tx; 1111 1112 if (likely(nb_tx)) { 1113 vq_update_avail_idx(vq); 1114 1115 if (unlikely(virtqueue_kick_prepare(vq))) { 1116 virtqueue_notify(vq); 1117 PMD_TX_LOG(DEBUG, "Notified backend after xmit"); 1118 } 1119 } 1120 1121 return nb_tx; 1122 } 1123